From 212987c6c27d5f321464d6a3b00ea5e6637e9f78 Mon Sep 17 00:00:00 2001 From: darrenr Date: Mon, 25 Apr 2005 18:43:14 +0000 Subject: Merge the changes from 3.4.35 to 4.1.8 into the kernel source tree --- sys/contrib/ipfilter/netinet/fil.c | 7746 +++++++++++++++++++------- sys/contrib/ipfilter/netinet/ip_auth.c | 546 +- sys/contrib/ipfilter/netinet/ip_auth.h | 28 +- sys/contrib/ipfilter/netinet/ip_compat.h | 2540 ++++++--- sys/contrib/ipfilter/netinet/ip_fil.c | 2358 -------- sys/contrib/ipfilter/netinet/ip_fil.h | 1308 ++++- sys/contrib/ipfilter/netinet/ip_frag.c | 788 ++- sys/contrib/ipfilter/netinet/ip_frag.h | 64 +- sys/contrib/ipfilter/netinet/ip_ftp_pxy.c | 1092 ++-- sys/contrib/ipfilter/netinet/ip_log.c | 595 +- sys/contrib/ipfilter/netinet/ip_nat.c | 4777 +++++++++++----- sys/contrib/ipfilter/netinet/ip_nat.h | 405 +- sys/contrib/ipfilter/netinet/ip_proxy.c | 560 +- sys/contrib/ipfilter/netinet/ip_proxy.h | 309 +- sys/contrib/ipfilter/netinet/ip_raudio_pxy.c | 152 +- sys/contrib/ipfilter/netinet/ip_rcmd_pxy.c | 165 +- sys/contrib/ipfilter/netinet/ip_state.c | 3860 +++++++++---- sys/contrib/ipfilter/netinet/ip_state.h | 199 +- sys/contrib/ipfilter/netinet/ipl.h | 9 +- sys/contrib/ipfilter/netinet/mlfk_ipl.c | 326 +- 20 files changed, 17920 insertions(+), 9907 deletions(-) delete mode 100644 sys/contrib/ipfilter/netinet/ip_fil.c diff --git a/sys/contrib/ipfilter/netinet/fil.c b/sys/contrib/ipfilter/netinet/fil.c index 76fe9d9..69442c0 100644 --- a/sys/contrib/ipfilter/netinet/fil.c +++ b/sys/contrib/ipfilter/netinet/fil.c @@ -1,24 +1,29 @@ +/* $FreeBSD$ */ + /* - * Copyright (C) 1993-2001 by Darren Reed. + * Copyright (C) 1993-2003 by Darren Reed. * * See the IPFILTER.LICENCE file for details on licencing. */ -#if defined(__sgi) && (IRIX > 602) -# include +#if defined(KERNEL) || defined(_KERNEL) +# undef KERNEL +# undef _KERNEL +# define KERNEL 1 +# define _KERNEL 1 #endif #include #include #include #include -#include -#if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \ - defined(_KERNEL) -# include "opt_ipfilter_log.h" +#if defined(__NetBSD__) +# if (NetBSD >= 199905) && !defined(IPFILTER_LKM) && defined(_KERNEL) +# include "opt_ipfilter_log.h" +# endif #endif -#if (defined(KERNEL) || defined(_KERNEL)) && defined(__FreeBSD_version) && \ +#if defined(_KERNEL) && defined(__FreeBSD_version) && \ (__FreeBSD_version >= 220000) # if (__FreeBSD_version >= 400000) -# ifndef KLD_MODULE +# if !defined(IPFILTER_LKM) # include "opt_inet6.h" # endif # if (__FreeBSD_version == 400019) @@ -26,42 +31,56 @@ # endif # endif # include -# include #else # include #endif -#if (defined(_KERNEL) || defined(KERNEL)) && !defined(linux) +#include +#if defined(_KERNEL) # include +# include #else # include # include # include -#endif -#if !defined(__SVR4) && !defined(__svr4__) -# ifndef linux -# include +# include +# include +# define _KERNEL +# ifdef __OpenBSD__ +struct file; # endif +# include +# undef _KERNEL +#endif +#if !defined(__SVR4) && !defined(__svr4__) && !defined(__hpux) && \ + !defined(linux) +# include #else -# include -# include -# if SOLARIS2 < 5 +# if !defined(linux) +# include +# endif +# if (SOLARIS2 < 5) && defined(sun) # include # endif -# include #endif -#ifndef linux +#ifdef __hpux +# define _NET_ROUTE_INCLUDED +#endif +#if !defined(linux) # include -# include #endif +#include #include #ifdef sun # include #endif +#if !defined(_KERNEL) && defined(__FreeBSD__) +# include "radix_ipf.h" +#endif #include #include #include #include -#ifndef linux +#if !defined(linux) # include #endif #if defined(__sgi) && defined(IFF_DRVRLOCK) /* IRIX 6 */ @@ -69,12 +88,17 @@ # include #endif #include -#include -#include +#if !defined(__sgi) || defined(_KERNEL) +# include +# include +#endif +#ifdef __hpux +# undef _NET_ROUTE_INCLUDED +#endif #include "netinet/ip_compat.h" #ifdef USE_INET6 # include -# if !SOLARIS && defined(_KERNEL) +# if !SOLARIS && defined(_KERNEL) && !defined(__osf__) && !defined(__hpux) # include # endif #endif @@ -85,78 +109,145 @@ #include "netinet/ip_state.h" #include "netinet/ip_proxy.h" #include "netinet/ip_auth.h" -# if defined(__FreeBSD_version) && (__FreeBSD_version >= 300000) -# include -# if defined(_KERNEL) && !defined(IPFILTER_LKM) -# include "opt_ipfilter.h" -# endif +#ifdef IPFILTER_SCAN +# include "netinet/ip_scan.h" +#endif +#ifdef IPFILTER_SYNC +# include "netinet/ip_sync.h" +#endif +#include "netinet/ip_pool.h" +#include "netinet/ip_htable.h" +#ifdef IPFILTER_COMPILED +# include "netinet/ip_rules.h" +#endif +#if defined(IPFILTER_BPF) && defined(_KERNEL) +# include +#endif +#if defined(__FreeBSD_version) && (__FreeBSD_version >= 300000) +# include +# if defined(_KERNEL) && !defined(IPFILTER_LKM) +# include "opt_ipfilter.h" # endif -#ifndef MIN -# define MIN(a,b) (((a)<(b))?(a):(b)) #endif #include "netinet/ipl.h" +/* END OF INCLUDES */ #include #if !defined(lint) static const char sccsid[] = "@(#)fil.c 1.36 6/5/96 (C) 1993-2000 Darren Reed"; static const char rcsid[] = "@(#)$FreeBSD$"; +static const char rcsid[] = "@(#)Id: fil.c,v 2.243.2.57 2005/03/28 10:47:50 darrenr Exp"; #endif #ifndef _KERNEL # include "ipf.h" # include "ipt.h" +# include "bpf-ipf.h" extern int opts; # define FR_VERBOSE(verb_pr) verbose verb_pr # define FR_DEBUG(verb_pr) debug verb_pr -# define IPLLOG(a, c, d, e) ipflog(a, c, d, e) #else /* #ifndef _KERNEL */ # define FR_VERBOSE(verb_pr) # define FR_DEBUG(verb_pr) -# define IPLLOG(a, c, d, e) ipflog(a, c, d, e) -# ifdef USE_MUTEX -extern KRWLOCK_T ipf_mutex, ipf_auth, ipf_nat; -extern kmutex_t ipf_rw; -# endif /* USE_MUTEX */ #endif /* _KERNEL */ -struct filterstats frstats[2] = {{0,0,0,0,0},{0,0,0,0,0}}; +fr_info_t frcache[2][8]; +struct filterstats frstats[2] = { { 0, 0, 0, 0, 0 }, { 0, 0, 0, 0, 0 } }; struct frentry *ipfilter[2][2] = { { NULL, NULL }, { NULL, NULL } }, -#ifdef USE_INET6 *ipfilter6[2][2] = { { NULL, NULL }, { NULL, NULL } }, *ipacct6[2][2] = { { NULL, NULL }, { NULL, NULL } }, -#endif - *ipacct[2][2] = { { NULL, NULL }, { NULL, NULL } }; -struct frgroup *ipfgroups[3][2]; + *ipacct[2][2] = { { NULL, NULL }, { NULL, NULL } }, + *ipnatrules[2][2] = { { NULL, NULL }, { NULL, NULL } }; +struct frgroup *ipfgroups[IPL_LOGSIZE][2]; +char ipfilter_version[] = IPL_VERSION; +int fr_refcnt = 0; +/* + * For fr_running: + * 0 == loading, 1 = running, -1 = disabled, -2 = unloading + */ +int fr_running = 0; int fr_flags = IPF_LOGGING; int fr_active = 0; -int fr_chksrc = 0; -int fr_minttl = 3; -int fr_minttllog = 1; +int fr_control_forwarding = 0; +int fr_update_ipid = 0; +u_short fr_ip_id = 0; +int fr_chksrc = 0; /* causes a system crash if enabled */ +int fr_minttl = 4; +u_long fr_frouteok[2] = {0, 0}; +u_long fr_userifqs = 0; +u_long fr_badcoalesces[2] = {0, 0}; +u_char ipf_iss_secret[32]; #if defined(IPFILTER_DEFAULT_BLOCK) -int fr_pass = FR_NOMATCH|FR_BLOCK; +int fr_pass = FR_BLOCK|FR_NOMATCH; #else -int fr_pass = (IPF_DEFAULT_PASS|FR_NOMATCH); +int fr_pass = (IPF_DEFAULT_PASS)|FR_NOMATCH; #endif -char ipfilter_version[] = IPL_VERSION; - -fr_info_t frcache[2]; +int fr_features = 0 +#ifdef IPFILTER_LKM + | IPF_FEAT_LKM +#endif +#ifdef IPFILTER_LOG + | IPF_FEAT_LOG +#endif +#ifdef IPFILTER_LOOKUP + | IPF_FEAT_LOOKUP +#endif +#ifdef IPFILTER_BPF + | IPF_FEAT_BPF +#endif +#ifdef IPFILTER_COMPILED + | IPF_FEAT_COMPILED +#endif +#ifdef IPFILTER_CKSUM + | IPF_FEAT_CKSUM +#endif +#ifdef IPFILTER_SYNC + | IPF_FEAT_SYNC +#endif +#ifdef IPFILTER_SCAN + | IPF_FEAT_SCAN +#endif +#ifdef USE_INET6 + | IPF_FEAT_IPV6 +#endif + ; -static int frflushlist __P((int, minor_t, int *, frentry_t **)); -#ifdef _KERNEL -static void frsynclist __P((frentry_t *)); -# ifndef __sgi -static void *ipf_pullup __P((mb_t *, fr_info_t *, int, void *)); -# endif +static INLINE int fr_ipfcheck __P((fr_info_t *, frentry_t *, int)); +static int fr_portcheck __P((frpcmp_t *, u_short *)); +static int frflushlist __P((int, minor_t, int *, frentry_t **)); +static ipfunc_t fr_findfunc __P((ipfunc_t)); +static frentry_t *fr_firewall __P((fr_info_t *, u_32_t *)); +static int fr_funcinit __P((frentry_t *fr)); +static INLINE void frpr_esp __P((fr_info_t *)); +static INLINE void frpr_gre __P((fr_info_t *)); +static INLINE void frpr_udp __P((fr_info_t *)); +static INLINE void frpr_tcp __P((fr_info_t *)); +static INLINE void frpr_icmp __P((fr_info_t *)); +static INLINE void frpr_ipv4hdr __P((fr_info_t *)); +static INLINE int frpr_pullup __P((fr_info_t *, int)); +static INLINE void frpr_short __P((fr_info_t *, int)); +static INLINE void frpr_tcpcommon __P((fr_info_t *)); +static INLINE void frpr_udpcommon __P((fr_info_t *)); +static INLINE int fr_updateipid __P((fr_info_t *)); +#ifdef IPFILTER_LOOKUP +static int fr_grpmapinit __P((frentry_t *fr)); +static INLINE void *fr_resolvelookup __P((u_int, u_int, lookupfunc_t *)); #endif +static void frsynclist __P((frentry_t *, void *)); +static ipftuneable_t *fr_findtunebyname __P((char *)); +static ipftuneable_t *fr_findtunebycookie __P((void *, void **)); /* * bit values for identifying presence of individual IP options + * All of these tables should be ordered by increasing key value on the left + * hand side to allow for binary searching of the array and include a trailer + * with a 0 for the bitmask for linear searches to easily find the end with. */ -struct optlist ipopts[20] = { +const struct optlist ipopts[20] = { { IPOPT_NOP, 0x000001 }, { IPOPT_RR, 0x000002 }, { IPOPT_ZSU, 0x000004 }, @@ -179,10 +270,34 @@ struct optlist ipopts[20] = { { 0, 0x000000 } }; +#ifdef USE_INET6 +struct optlist ip6exthdr[] = { + { IPPROTO_HOPOPTS, 0x000001 }, + { IPPROTO_IPV6, 0x000002 }, + { IPPROTO_ROUTING, 0x000004 }, + { IPPROTO_FRAGMENT, 0x000008 }, + { IPPROTO_ESP, 0x000010 }, + { IPPROTO_AH, 0x000020 }, + { IPPROTO_NONE, 0x000040 }, + { IPPROTO_DSTOPTS, 0x000080 }, + { 0, 0 } +}; +#endif + +struct optlist tcpopts[] = { + { TCPOPT_NOP, 0x000001 }, + { TCPOPT_MAXSEG, 0x000002 }, + { TCPOPT_WINDOW, 0x000004 }, + { TCPOPT_SACK_PERMITTED, 0x000008 }, + { TCPOPT_SACK, 0x000010 }, + { TCPOPT_TIMESTAMP, 0x000020 }, + { 0, 0x000000 } +}; + /* * bit values for identifying presence of individual IP security options */ -struct optlist secopt[8] = { +const struct optlist secopt[8] = { { IPSO_CLASS_RES4, 0x01 }, { IPSO_CLASS_TOPS, 0x02 }, { IPSO_CLASS_SECR, 0x04 }, @@ -195,2224 +310,5903 @@ struct optlist secopt[8] = { /* - * compact the IP header into a structure which contains just the info. - * which is useful for comparing IP headers with. + * Table of functions available for use with call rules. */ -int fr_makefrip(hlen, ip, fin) -int hlen; -ip_t *ip; +static ipfunc_resolve_t fr_availfuncs[] = { +#ifdef IPFILTER_LOOKUP + { "fr_srcgrpmap", fr_srcgrpmap, fr_grpmapinit }, + { "fr_dstgrpmap", fr_dstgrpmap, fr_grpmapinit }, +#endif + { "", NULL } +}; + + +/* + * The next section of code is a a collection of small routines that set + * fields in the fr_info_t structure passed based on properties of the + * current packet. There are different routines for the same protocol + * for each of IPv4 and IPv6. Adding a new protocol, for which there + * will "special" inspection for setup, is now more easily done by adding + * a new routine and expanding the frpr_ipinit*() function rather than by + * adding more code to a growing switch statement. + */ +#ifdef USE_INET6 +static INLINE void frpr_udp6 __P((fr_info_t *)); +static INLINE void frpr_tcp6 __P((fr_info_t *)); +static INLINE void frpr_icmp6 __P((fr_info_t *)); +static INLINE void frpr_ipv6hdr __P((fr_info_t *)); +static INLINE void frpr_short6 __P((fr_info_t *, int)); +static INLINE int frpr_hopopts6 __P((fr_info_t *)); +static INLINE int frpr_routing6 __P((fr_info_t *)); +static INLINE int frpr_dstopts6 __P((fr_info_t *)); +static INLINE int frpr_fragment6 __P((fr_info_t *)); + + +/* ------------------------------------------------------------------------ */ +/* Function: frpr_short6 */ +/* Returns: void */ +/* Parameters: fin(I) - pointer to packet information */ +/* */ +/* IPv6 Only */ +/* This is function enforces the 'is a packet too short to be legit' rule */ +/* for IPv6 and marks the packet with FI_SHORT if so. See function comment */ +/* for frpr_short() for more details. */ +/* ------------------------------------------------------------------------ */ +static INLINE void frpr_short6(fin, min) fr_info_t *fin; +int min; { - u_short optmsk = 0, secmsk = 0, auth = 0; - int i, mv, ol, off, p, plen, v; -#if defined(_KERNEL) -# if SOLARIS - mb_t *m = fin->fin_qfm; -# else - mb_t *m = fin->fin_mp ? *fin->fin_mp : NULL; -# endif -#endif fr_ip_t *fi = &fin->fin_fi; - struct optlist *op; - u_char *s, opt; - tcphdr_t *tcp; - - fin->fin_rev = 0; - fin->fin_dp = NULL; - fin->fin_fr = NULL; - fin->fin_tcpf = 0; - fin->fin_data[0] = 0; - fin->fin_data[1] = 0; - fin->fin_rule = -1; - fin->fin_group = -1; - fin->fin_icode = ipl_unreach; - v = fin->fin_v; - fi->fi_v = v; - fin->fin_hlen = hlen; - if (v == 4) { - fin->fin_id = ip->ip_id; - fi->fi_tos = ip->ip_tos; -#if (OpenBSD >= 200311) && defined(_KERNEL) - ip->ip_off = ntohs(ip->ip_off); -#endif - off = (ip->ip_off & IP_OFFMASK); - (*(((u_short *)fi) + 1)) = (*(((u_short *)ip) + 4)); - fi->fi_src.i6[1] = 0; - fi->fi_src.i6[2] = 0; - fi->fi_src.i6[3] = 0; - fi->fi_dst.i6[1] = 0; - fi->fi_dst.i6[2] = 0; - fi->fi_dst.i6[3] = 0; - fi->fi_saddr = ip->ip_src.s_addr; - fi->fi_daddr = ip->ip_dst.s_addr; - p = ip->ip_p; - fi->fi_fl = (hlen > sizeof(ip_t)) ? FI_OPTIONS : 0; - if (ip->ip_off & (IP_MF|IP_OFFMASK)) - fi->fi_fl |= FI_FRAG; -#if (OpenBSD >= 200311) && defined(_KERNEL) - ip->ip_len = ntohs(ip->ip_len); -#endif - plen = ip->ip_len; - fin->fin_dlen = plen - hlen; - } -#ifdef USE_INET6 - else if (v == 6) { - ip6_t *ip6 = (ip6_t *)ip; + int off; - off = 0; - p = ip6->ip6_nxt; - fi->fi_p = p; - fi->fi_ttl = ip6->ip6_hlim; - fi->fi_src.in6 = ip6->ip6_src; - fi->fi_dst.in6 = ip6->ip6_dst; - fin->fin_id = (u_short)(ip6->ip6_flow & 0xffff); - fi->fi_tos = 0; - fi->fi_fl = 0; - plen = ntohs(ip6->ip6_plen); - fin->fin_dlen = plen; - plen += sizeof(*ip6); + off = fin->fin_off; + if (off == 0) { + if (fin->fin_plen < fin->fin_hlen + min) + fi->fi_flx |= FI_SHORT; + } else if (off < min) { + fi->fi_flx |= FI_SHORT; } -#endif - else - return -1; - - fin->fin_off = off; - fin->fin_plen = plen; - tcp = (tcphdr_t *)((char *)ip + hlen); - fin->fin_misc = 0; - off <<= 3; +} - /* - * For both ICMPV6 & ICMP, we attempt to pullup the entire packet into - * a single buffer for recognised error return packets. Why? Because - * the entire data section of the ICMP payload is considered to be of - * significance and maybe required in NAT/state processing, so rather - * than be careful later, attempt to get it all in one buffeer first. - * For TCP we just make sure the _entire_ TCP header is in the first - * buffer for convienience. - */ - switch (p) - { -#ifdef USE_INET6 - case IPPROTO_ICMPV6 : - { - int minicmpsz = sizeof(struct icmp6_hdr); - struct icmp6_hdr *icmp6; - if (!(fin->fin_fl & FI_SHORT) && (fin->fin_dlen > 1)) { - fin->fin_data[0] = *(u_short *)tcp; +/* ------------------------------------------------------------------------ */ +/* Function: frpr_ipv6hdr */ +/* Returns: void */ +/* Parameters: fin(I) - pointer to packet information */ +/* */ +/* IPv6 Only */ +/* Copy values from the IPv6 header into the fr_info_t struct and call the */ +/* per-protocol analyzer if it exists. */ +/* ------------------------------------------------------------------------ */ +static INLINE void frpr_ipv6hdr(fin) +fr_info_t *fin; +{ + int p, go = 1, i, hdrcount, coalesced; + ip6_t *ip6 = (ip6_t *)fin->fin_ip; + fr_ip_t *fi = &fin->fin_fi; - icmp6 = (struct icmp6_hdr *)tcp; + fin->fin_off = 0; - switch (icmp6->icmp6_type) - { - case ICMP6_ECHO_REPLY : - case ICMP6_ECHO_REQUEST : - minicmpsz = ICMP6_MINLEN; - break; - case ICMP6_DST_UNREACH : - case ICMP6_PACKET_TOO_BIG : - case ICMP6_TIME_EXCEEDED : - case ICMP6_PARAM_PROB : -# if defined(KERNEL) && !defined(__sgi) - if ((m != NULL) && (M_BLEN(m) < plen)) { - ip = ipf_pullup(m, fin, plen, ip); - if (ip == NULL) - return -1; - tcp = (tcphdr_t *)((char *)ip + hlen); - } -# endif /* KERNEL && !__sgi */ - minicmpsz = ICMP6ERR_IPICMPHLEN; - break; - default : - break; - } - } + fi->fi_tos = 0; + fi->fi_optmsk = 0; + fi->fi_secmsk = 0; + fi->fi_auth = 0; - if (!(fin->fin_dlen >= minicmpsz)) - fi->fi_fl |= FI_SHORT; + coalesced = (fin->fin_flx & FI_COALESCE) ? 1 : 0; + p = ip6->ip6_nxt; + fi->fi_ttl = ip6->ip6_hlim; + fi->fi_src.in6 = ip6->ip6_src; + fi->fi_dst.in6 = ip6->ip6_dst; + fin->fin_id = (u_short)(ip6->ip6_flow & 0xffff); - break; - } -#endif /* USE_INET6 */ + hdrcount = 0; + while (go && !(fin->fin_flx & (FI_BAD|FI_SHORT))) { + switch (p) + { + case IPPROTO_UDP : + frpr_udp6(fin); + go = 0; + break; - case IPPROTO_ICMP : - { - int minicmpsz = sizeof(struct icmp); - icmphdr_t *icmp; + case IPPROTO_TCP : + frpr_tcp6(fin); + go = 0; + break; - if (!off && (fin->fin_dlen > 1) && !(fin->fin_fl & FI_SHORT)) { - fin->fin_data[0] = *(u_short *)tcp; + case IPPROTO_ICMPV6 : + frpr_icmp6(fin); + go = 0; + break; - icmp = (icmphdr_t *)tcp; + case IPPROTO_GRE : + frpr_gre(fin); + go = 0; + break; + case IPPROTO_HOPOPTS : /* - * Minimum ICMP packet is type(1) code(1) cksum(2) - * plus 4 bytes following, totalling 8 bytes. - */ - switch (icmp->icmp_type) - { - case ICMP_ECHOREPLY : - case ICMP_ECHO : - /* Router discovery messages - RFC 1256 */ - case ICMP_ROUTERADVERT : - case ICMP_ROUTERSOLICIT : - minicmpsz = ICMP_MINLEN; - break; - /* - * type(1) + code(1) + cksum(2) + id(2) seq(2) + - * 3*timestamp(3*4) - */ - case ICMP_TSTAMP : - case ICMP_TSTAMPREPLY : - minicmpsz = ICMP_MINLEN + 12; - break; - /* - * type(1) + code(1) + cksum(2) + id(2) seq(2) + - * mask(4) - */ - case ICMP_MASKREQ : - case ICMP_MASKREPLY : - minicmpsz = ICMP_MINLEN + 4; - break; - /* - * type(1) + code(1) + cksum(2) + arg(4) ip(20+) + * Actually, hop by hop header is only allowed right + * after IPv6 header! */ - case ICMP_UNREACH : - case ICMP_SOURCEQUENCH : - case ICMP_REDIRECT : - case ICMP_TIMXCEED : - case ICMP_PARAMPROB : -#if defined(KERNEL) && !defined(__sgi) - if ((m != NULL) && (M_BLEN(m) < plen)) { - ip = ipf_pullup(m, fin, plen, ip); - if (ip == NULL) - return -1; - tcp = (tcphdr_t *)((char *)ip + hlen); - } -#endif /* KERNEL && !__sgi */ - minicmpsz = ICMPERR_MINPKTLEN - sizeof(ip_t); - break; - default : - minicmpsz = ICMP_MINLEN; - break; + if (hdrcount != 0) + fin->fin_flx |= FI_BAD; + + if (coalesced == 0) { + coalesced = fr_coalesce(fin); + if (coalesced != 1) + return; } - } + p = frpr_hopopts6(fin); + break; - if ((!(plen >= hlen + minicmpsz) && !off) || - (off && off < sizeof(struct icmp))) - fi->fi_fl |= FI_SHORT; - break; - } + case IPPROTO_DSTOPTS : + if (coalesced == 0) { + coalesced = fr_coalesce(fin); + if (coalesced != 1) + return; + } + p = frpr_dstopts6(fin); + break; - case IPPROTO_TCP : - fi->fi_fl |= FI_TCPUDP; -#ifdef USE_INET6 - if (v == 6) { - if (plen < sizeof(struct tcphdr)) - fi->fi_fl |= FI_SHORT; - } else -#endif - if (v == 4) { - if ((!IPMINLEN(ip, tcphdr) && !off) || - (off && off < sizeof(struct tcphdr))) - fi->fi_fl |= FI_SHORT; - } + case IPPROTO_ROUTING : + if (coalesced == 0) { + coalesced = fr_coalesce(fin); + if (coalesced != 1) + return; + } + p = frpr_routing6(fin); + break; -#if defined(KERNEL) && !defined(__sgi) - if (!off && !(fi->fi_fl & FI_SHORT)) { - int tlen = hlen + (tcp->th_off << 2); + case IPPROTO_ESP : + frpr_esp(fin); + /*FALLTHROUGH*/ + case IPPROTO_AH : + case IPPROTO_IPV6 : + for (i = 0; ip6exthdr[i].ol_bit != 0; i++) + if (ip6exthdr[i].ol_val == p) { + fin->fin_flx |= ip6exthdr[i].ol_bit; + break; + } + go = 0; + break; + + case IPPROTO_NONE : + go = 0; + break; - if ((m != NULL) && (M_BLEN(m) < tlen)) { - ip = ipf_pullup(m, fin, tlen, ip); - if (ip == NULL) - return -1; - tcp = (tcphdr_t *)((char *)ip + hlen); + case IPPROTO_FRAGMENT : + if (coalesced == 0) { + coalesced = fr_coalesce(fin); + if (coalesced != 1) + return; } - } -#endif /* _KERNEL && !_sgi */ + p = frpr_fragment6(fin); + break; - if (!(fi->fi_fl & FI_SHORT) && !off) - fin->fin_tcpf = tcp->th_flags; - goto getports; - case IPPROTO_UDP : - fi->fi_fl |= FI_TCPUDP; -#ifdef USE_INET6 - if (v == 6) { - if (plen < sizeof(struct udphdr)) - fi->fi_fl |= FI_SHORT; - } else -#endif - if (v == 4) { - if ((!IPMINLEN(ip, udphdr) && !off) || - (off && off < sizeof(struct udphdr))) - fi->fi_fl |= FI_SHORT; - } -getports: - if (!off && (fin->fin_dlen > 3)) { - fin->fin_data[0] = ntohs(tcp->th_sport); - fin->fin_data[1] = ntohs(tcp->th_dport); + default : + go = 0; + break; } - break; - case IPPROTO_ESP : -#ifdef USE_INET6 - if (v == 6) { - if (plen < 8) - fi->fi_fl |= FI_SHORT; - } else -#endif - if (v == 4) { - if (((ip->ip_len < hlen + 8) && !off) || - (off && off < 8)) - fi->fi_fl |= FI_SHORT; + hdrcount++; + + /* + * It is important to note that at this point, for the + * extension headers (go != 0), the entire header may not have + * been pulled up when the code gets to this point. This is + * only done for "go != 0" because the other header handlers + * will all pullup their complete header and the other + * indicator of an incomplete header is that this eas just an + * extension header. + */ + if ((go != 0) && (p != IPPROTO_NONE) && + (frpr_pullup(fin, 0) == -1)) { + p = IPPROTO_NONE; + go = 0; } - break; - default : - break; } + fi->fi_p = p; +} - fin->fin_dp = (char *)tcp; -#ifdef USE_INET6 - if (v == 6) { - fi->fi_optmsk = 0; - fi->fi_secmsk = 0; - fi->fi_auth = 0; - return 0; +/* ------------------------------------------------------------------------ */ +/* Function: frpr_hopopts6 */ +/* Returns: int - value of the next header or IPPROTO_NONE if error */ +/* Parameters: fin(I) - pointer to packet information */ +/* */ +/* IPv6 Only */ +/* This is function checks pending hop by hop options extension header */ +/* ------------------------------------------------------------------------ */ +static INLINE int frpr_hopopts6(fin) +fr_info_t *fin; +{ + struct ip6_ext *hdr; + u_short shift; + int i; + + fin->fin_flx |= FI_V6EXTHDR; + + /* 8 is default length of extension hdr */ + if ((fin->fin_dlen - 8) < 0) { + fin->fin_flx |= FI_SHORT; + return IPPROTO_NONE; } -#endif - for (s = (u_char *)(ip + 1), hlen -= (int)sizeof(*ip); hlen > 0; ) { - opt = *s; - if (opt == '\0') + if (frpr_pullup(fin, 8) == -1) + return IPPROTO_NONE; + + hdr = fin->fin_dp; + shift = 8 + (hdr->ip6e_len << 3); + if (shift > fin->fin_dlen) { /* Nasty extension header length? */ + fin->fin_flx |= FI_BAD; + return IPPROTO_NONE; + } + + for (i = 0; ip6exthdr[i].ol_bit != 0; i++) + if (ip6exthdr[i].ol_val == IPPROTO_HOPOPTS) { + fin->fin_optmsk |= ip6exthdr[i].ol_bit; break; - else if (opt == IPOPT_NOP) - ol = 1; - else { - if (hlen < 2) - break; - ol = (int)*(s + 1); - if (ol < 2 || ol > hlen) - break; } - for (i = 9, mv = 4; mv >= 0; ) { - op = ipopts + i; - if (opt == (u_char)op->ol_val) { - optmsk |= op->ol_bit; - if (opt == IPOPT_SECURITY) { - struct optlist *sp; - u_char sec; - int j, m; - sec = *(s + 2); /* classification */ - for (j = 3, m = 2; m >= 0; ) { - sp = secopt + j; - if (sec == sp->ol_val) { - secmsk |= sp->ol_bit; - auth = *(s + 3); - auth *= 256; - auth += *(s + 4); - break; - } - if (sec < sp->ol_val) - j -= m--; - else - j += m--; - } - } - break; - } - if (opt < op->ol_val) - i -= mv--; - else - i += mv--; - } - hlen -= ol; - s += ol; - } - if (auth && !(auth & 0x0100)) - auth &= 0xff00; - fi->fi_optmsk = optmsk; - fi->fi_secmsk = secmsk; - fi->fi_auth = auth; - return 0; + fin->fin_dp = (char *)fin->fin_dp + shift; + fin->fin_dlen -= shift; + + return hdr->ip6e_nxt; } -/* - * check an IP packet for TCP/UDP characteristics such as ports and flags. - */ -int fr_tcpudpchk(ft, fin) -frtuc_t *ft; +/* ------------------------------------------------------------------------ */ +/* Function: frpr_routing6 */ +/* Returns: int - value of the next header or IPPROTO_NONE if error */ +/* Parameters: fin(I) - pointer to packet information */ +/* */ +/* IPv6 Only */ +/* This is function checks pending routing extension header */ +/* ------------------------------------------------------------------------ */ +static INLINE int frpr_routing6(fin) fr_info_t *fin; { - register u_short po, tup; - register char i; - register int err = 1; + struct ip6_ext *hdr; + u_short shift; + int i; - /* - * Both ports should *always* be in the first fragment. - * So far, I cannot find any cases where they can not be. - * - * compare destination ports - */ - if ((i = (int)ft->ftu_dcmp)) { - po = ft->ftu_dport; - tup = fin->fin_data[1]; - /* - * Do opposite test to that required and - * continue if that succeeds. - */ - if (!--i && tup != po) /* EQUAL */ - err = 0; - else if (!--i && tup == po) /* NOTEQUAL */ - err = 0; - else if (!--i && tup >= po) /* LESSTHAN */ - err = 0; - else if (!--i && tup <= po) /* GREATERTHAN */ - err = 0; - else if (!--i && tup > po) /* LT or EQ */ - err = 0; - else if (!--i && tup < po) /* GT or EQ */ - err = 0; - else if (!--i && /* Out of range */ - (tup >= po && tup <= ft->ftu_dtop)) - err = 0; - else if (!--i && /* In range */ - (tup <= po || tup >= ft->ftu_dtop)) - err = 0; + fin->fin_flx |= FI_V6EXTHDR; + + /* 8 is default length of extension hdr */ + if ((fin->fin_dlen - 8) < 0) { + fin->fin_flx |= FI_SHORT; + return IPPROTO_NONE; } + + if (frpr_pullup(fin, 8) == -1) + return IPPROTO_NONE; + hdr = fin->fin_dp; + + shift = 8 + (hdr->ip6e_len << 3); /* - * compare source ports + * Nasty extension header length? */ - if (err && (i = (int)ft->ftu_scmp)) { - po = ft->ftu_sport; - tup = fin->fin_data[0]; - if (!--i && tup != po) - err = 0; - else if (!--i && tup == po) - err = 0; - else if (!--i && tup >= po) - err = 0; - else if (!--i && tup <= po) - err = 0; - else if (!--i && tup > po) - err = 0; - else if (!--i && tup < po) - err = 0; - else if (!--i && /* Out of range */ - (tup >= po && tup <= ft->ftu_stop)) - err = 0; - else if (!--i && /* In range */ - (tup <= po || tup >= ft->ftu_stop)) - err = 0; + if ((shift > fin->fin_dlen) || (shift < sizeof(struct ip6_hdr)) || + ((shift - sizeof(struct ip6_hdr)) & 15)) { + fin->fin_flx |= FI_BAD; + return IPPROTO_NONE; } - /* - * If we don't have all the TCP/UDP header, then how can we - * expect to do any sort of match on it ? If we were looking for - * TCP flags, then NO match. If not, then match (which should + for (i = 0; ip6exthdr[i].ol_bit != 0; i++) + if (ip6exthdr[i].ol_val == IPPROTO_ROUTING) { + fin->fin_optmsk |= ip6exthdr[i].ol_bit; + break; + } + + fin->fin_dp = (char *)fin->fin_dp + shift; + fin->fin_dlen -= shift; + + return hdr->ip6e_nxt; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: frpr_fragment6 */ +/* Returns: int - value of the next header or IPPROTO_NONE if error */ +/* Parameters: fin(I) - pointer to packet information */ +/* */ +/* IPv6 Only */ +/* Examine the IPv6 fragment header and extract fragment offset information.*/ +/* ------------------------------------------------------------------------ */ +static INLINE int frpr_fragment6(fin) +fr_info_t *fin; +{ + struct ip6_frag *frag; + struct ip6_ext *hdr; + int i; + + fin->fin_flx |= (FI_FRAG|FI_V6EXTHDR); + + /* 8 is default length of extension hdr */ + if ((fin->fin_dlen - 8) < 0) { + fin->fin_flx |= FI_SHORT; + return IPPROTO_NONE; + } + + /* + * Only one frgament header is allowed per IPv6 packet but it need + * not be the first nor last (not possible in some cases.) + */ + for (i = 0; ip6exthdr[i].ol_bit != 0; i++) + if (ip6exthdr[i].ol_val == IPPROTO_FRAGMENT) + break; + + if (fin->fin_optmsk & ip6exthdr[i].ol_bit) { + fin->fin_flx |= FI_BAD; + return IPPROTO_NONE; + } + + fin->fin_optmsk |= ip6exthdr[i].ol_bit; + + if (frpr_pullup(fin, sizeof(*frag)) == -1) + return IPPROTO_NONE; + hdr = fin->fin_dp; + + /* + * Length must be zero, i.e. it has no length. + */ + if (hdr->ip6e_len != 0) { + fin->fin_flx |= FI_BAD; + return IPPROTO_NONE; + } + + if ((int)(fin->fin_dlen - sizeof(*frag)) < 0) { + fin->fin_flx |= FI_SHORT; + return IPPROTO_NONE; + } + + frag = fin->fin_dp; + fin->fin_off = frag->ip6f_offlg & IP6F_OFF_MASK; + fin->fin_off <<= 3; + if (fin->fin_off != 0) + fin->fin_flx |= FI_FRAGBODY; + + fin->fin_dp = (char *)fin->fin_dp + sizeof(*frag); + fin->fin_dlen -= sizeof(*frag); + + return frag->ip6f_nxt; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: frpr_dstopts6 */ +/* Returns: int - value of the next header or IPPROTO_NONE if error */ +/* Parameters: fin(I) - pointer to packet information */ +/* nextheader(I) - stores next header value */ +/* */ +/* IPv6 Only */ +/* This is function checks pending destination options extension header */ +/* ------------------------------------------------------------------------ */ +static INLINE int frpr_dstopts6(fin) +fr_info_t *fin; +{ + struct ip6_ext *hdr; + u_short shift; + int i; + + /* 8 is default length of extension hdr */ + if ((fin->fin_dlen - 8) < 0) { + fin->fin_flx |= FI_SHORT; + return IPPROTO_NONE; + } + + if (frpr_pullup(fin, 8) == -1) + return IPPROTO_NONE; + hdr = fin->fin_dp; + + shift = 8 + (hdr->ip6e_len << 3); + if (shift > fin->fin_dlen) { /* Nasty extension header length? */ + fin->fin_flx |= FI_BAD; + return IPPROTO_NONE; + } + + for (i = 0; ip6exthdr[i].ol_bit != 0; i++) + if (ip6exthdr[i].ol_val == IPPROTO_DSTOPTS) + break; + fin->fin_optmsk |= ip6exthdr[i].ol_bit; + fin->fin_dp = (char *)fin->fin_dp + shift; + fin->fin_dlen -= shift; + + return hdr->ip6e_nxt; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: frpr_icmp6 */ +/* Returns: void */ +/* Parameters: fin(I) - pointer to packet information */ +/* */ +/* IPv6 Only */ +/* This routine is mainly concerned with determining the minimum valid size */ +/* for an ICMPv6 packet. */ +/* ------------------------------------------------------------------------ */ +static INLINE void frpr_icmp6(fin) +fr_info_t *fin; +{ + int minicmpsz = sizeof(struct icmp6_hdr); + struct icmp6_hdr *icmp6; + + if (frpr_pullup(fin, ICMP6ERR_MINPKTLEN + 8 - sizeof(ip6_t)) == -1) + return; + + if (fin->fin_dlen > 1) { + icmp6 = fin->fin_dp; + + fin->fin_data[0] = *(u_short *)icmp6; + + switch (icmp6->icmp6_type) + { + case ICMP6_ECHO_REPLY : + case ICMP6_ECHO_REQUEST : + minicmpsz = ICMP6ERR_MINPKTLEN - sizeof(ip6_t); + break; + case ICMP6_DST_UNREACH : + case ICMP6_PACKET_TOO_BIG : + case ICMP6_TIME_EXCEEDED : + case ICMP6_PARAM_PROB : + if ((fin->fin_m != NULL) && + (M_LEN(fin->fin_m) < fin->fin_plen)) { + if (fr_coalesce(fin) != 1) + return; + } + fin->fin_flx |= FI_ICMPERR; + minicmpsz = ICMP6ERR_IPICMPHLEN - sizeof(ip6_t); + break; + default : + break; + } + } + + frpr_short(fin, minicmpsz); +} + + +/* ------------------------------------------------------------------------ */ +/* Function: frpr_udp6 */ +/* Returns: void */ +/* Parameters: fin(I) - pointer to packet information */ +/* */ +/* IPv6 Only */ +/* Analyse the packet for IPv6/UDP properties. */ +/* ------------------------------------------------------------------------ */ +static INLINE void frpr_udp6(fin) +fr_info_t *fin; +{ + + fr_checkv6sum(fin); + + frpr_short(fin, sizeof(struct udphdr)); + + frpr_udpcommon(fin); +} + + +/* ------------------------------------------------------------------------ */ +/* Function: frpr_tcp6 */ +/* Returns: void */ +/* Parameters: fin(I) - pointer to packet information */ +/* */ +/* IPv6 Only */ +/* Analyse the packet for IPv6/TCP properties. */ +/* ------------------------------------------------------------------------ */ +static INLINE void frpr_tcp6(fin) +fr_info_t *fin; +{ + + fr_checkv6sum(fin); + + frpr_short(fin, sizeof(struct tcphdr)); + + frpr_tcpcommon(fin); +} +#endif /* USE_INET6 */ + + +/* ------------------------------------------------------------------------ */ +/* Function: frpr_pullup */ +/* Returns: int - 0 == pullup succeeded, -1 == failure */ +/* Parameters: fin(I) - pointer to packet information */ +/* plen(I) - length (excluding L3 header) to pullup */ +/* */ +/* Short inline function to cut down on code duplication to perform a call */ +/* to fr_pullup to ensure there is the required amount of data, */ +/* consecutively in the packet buffer. */ +/* ------------------------------------------------------------------------ */ +static INLINE int frpr_pullup(fin, plen) +fr_info_t *fin; +int plen; +{ +#if defined(_KERNEL) + if (fin->fin_m != NULL) { + if (fin->fin_dp != NULL) + plen += (char *)fin->fin_dp - + ((char *)fin->fin_ip + fin->fin_hlen); + plen += fin->fin_hlen; + if (M_LEN(fin->fin_m) < plen) { + if (fr_pullup(fin->fin_m, fin, plen) == NULL) + return -1; + } + } +#endif + return 0; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: frpr_short */ +/* Returns: void */ +/* Parameters: fin(I) - pointer to packet information */ +/* min(I) - minimum header size */ +/* */ +/* Check if a packet is "short" as defined by min. The rule we are */ +/* applying here is that the packet must not be fragmented within the layer */ +/* 4 header. That is, it must not be a fragment that has its offset set to */ +/* start within the layer 4 header (hdrmin) or if it is at offset 0, the */ +/* entire layer 4 header must be present (min). */ +/* ------------------------------------------------------------------------ */ +static INLINE void frpr_short(fin, min) +fr_info_t *fin; +int min; +{ + fr_ip_t *fi = &fin->fin_fi; + int off; + + off = fin->fin_off; + if (off == 0) { + if (fin->fin_plen < fin->fin_hlen + min) + fi->fi_flx |= FI_SHORT; + } else if (off < min) { + fi->fi_flx |= FI_SHORT; + } +} + + +/* ------------------------------------------------------------------------ */ +/* Function: frpr_icmp */ +/* Returns: void */ +/* Parameters: fin(I) - pointer to packet information */ +/* */ +/* IPv4 Only */ +/* Do a sanity check on the packet for ICMP (v4). In nearly all cases, */ +/* except extrememly bad packets, both type and code will be present. */ +/* The expected minimum size of an ICMP packet is very much dependant on */ +/* the type of it. */ +/* */ +/* XXX - other ICMP sanity checks? */ +/* ------------------------------------------------------------------------ */ +static INLINE void frpr_icmp(fin) +fr_info_t *fin; +{ + int minicmpsz = sizeof(struct icmp); + icmphdr_t *icmp; + + if (frpr_pullup(fin, ICMPERR_ICMPHLEN) == -1) + return; + + fr_checkv4sum(fin); + + if (!fin->fin_off && (fin->fin_dlen > 1)) { + icmp = fin->fin_dp; + + fin->fin_data[0] = *(u_short *)icmp; + + switch (icmp->icmp_type) + { + case ICMP_ECHOREPLY : + case ICMP_ECHO : + /* Router discovery messaes - RFC 1256 */ + case ICMP_ROUTERADVERT : + case ICMP_ROUTERSOLICIT : + minicmpsz = ICMP_MINLEN; + break; + /* + * type(1) + code(1) + cksum(2) + id(2) seq(2) + + * 3 * timestamp(3 * 4) + */ + case ICMP_TSTAMP : + case ICMP_TSTAMPREPLY : + minicmpsz = 20; + break; + /* + * type(1) + code(1) + cksum(2) + id(2) seq(2) + + * mask(4) + */ + case ICMP_MASKREQ : + case ICMP_MASKREPLY : + minicmpsz = 12; + break; + /* + * type(1) + code(1) + cksum(2) + id(2) seq(2) + ip(20+) + */ + case ICMP_UNREACH : + case ICMP_SOURCEQUENCH : + case ICMP_REDIRECT : + case ICMP_TIMXCEED : + case ICMP_PARAMPROB : + if (fr_coalesce(fin) != 1) + return; + fin->fin_flx |= FI_ICMPERR; + break; + default : + break; + } + + if (fin->fin_dlen >= 6) /* ID field */ + fin->fin_data[1] = icmp->icmp_id; + } + + frpr_short(fin, minicmpsz); +} + + +/* ------------------------------------------------------------------------ */ +/* Function: frpr_tcpcommon */ +/* Returns: void */ +/* Parameters: fin(I) - pointer to packet information */ +/* */ +/* TCP header sanity checking. Look for bad combinations of TCP flags, */ +/* and make some checks with how they interact with other fields. */ +/* If compiled with IPFILTER_CKSUM, check to see if the TCP checksum is */ +/* valid and mark the packet as bad if not. */ +/* ------------------------------------------------------------------------ */ +static INLINE void frpr_tcpcommon(fin) +fr_info_t *fin; +{ + int flags, tlen; + tcphdr_t *tcp; + fr_ip_t *fi; + + fi = &fin->fin_fi; + fi->fi_flx |= FI_TCPUDP; + if (fin->fin_off != 0) + return; + + if (frpr_pullup(fin, sizeof(*tcp)) == -1) + return; + tcp = fin->fin_dp; + + if (fin->fin_dlen > 3) { + fin->fin_sport = ntohs(tcp->th_sport); + fin->fin_dport = ntohs(tcp->th_dport); + } + + if ((fi->fi_flx & FI_SHORT) != 0) + return; + + /* + * Use of the TCP data offset *must* result in a value that is at + * least the same size as the TCP header. + */ + tlen = TCP_OFF(tcp) << 2; + if (tlen < sizeof(tcphdr_t)) { + fin->fin_flx |= FI_BAD; + return; + } + + flags = tcp->th_flags; + fin->fin_tcpf = tcp->th_flags; + + /* + * If the urgent flag is set, then the urgent pointer must + * also be set and vice versa. Good TCP packets do not have + * just one of these set. + */ + if ((flags & TH_URG) != 0 && (tcp->th_urp == 0)) { + fin->fin_flx |= FI_BAD; + } else if ((flags & TH_URG) == 0 && (tcp->th_urp != 0)) { + /* Ignore this case, it shows up in "real" traffic with */ + /* bogus values in the urgent pointer field. */ + ; + } else if (((flags & (TH_SYN|TH_FIN)) != 0) && + ((flags & (TH_RST|TH_ACK)) == TH_RST)) { + /* TH_FIN|TH_RST|TH_ACK seems to appear "naturally" */ + fin->fin_flx |= FI_BAD; + } else if (!(flags & TH_ACK)) { + /* + * If the ack bit isn't set, then either the SYN or + * RST bit must be set. If the SYN bit is set, then + * we expect the ACK field to be 0. If the ACK is + * not set and if URG, PSH or FIN are set, consdier + * that to indicate a bad TCP packet. + */ + if ((flags == TH_SYN) && (tcp->th_ack != 0)) { + /* + * Cisco PIX sets the ACK field to a random value. + * In light of this, do not set FI_BAD until a patch + * is available from Cisco to ensure that + * interoperability between existing systems is + * achieved. + */ + /*fin->fin_flx |= FI_BAD*/; + } else if (!(flags & (TH_RST|TH_SYN))) { + fin->fin_flx |= FI_BAD; + } else if ((flags & (TH_URG|TH_PUSH|TH_FIN)) != 0) { + fin->fin_flx |= FI_BAD; + } + } + + /* + * At this point, it's not exactly clear what is to be gained by + * marking up which TCP options are and are not present. The one we + * are most interested in is the TCP window scale. This is only in + * a SYN packet [RFC1323] so we don't need this here...? + * Now if we were to analyse the header for passive fingerprinting, + * then that might add some weight to adding this... + */ + if (tlen == sizeof(tcphdr_t)) + return; + + if (frpr_pullup(fin, tlen) == -1) + return; + +#if 0 + ip = fin->fin_ip; + s = (u_char *)(tcp + 1); + off = IP_HL(ip) << 2; +# ifdef _KERNEL + if (fin->fin_mp != NULL) { + mb_t *m = *fin->fin_mp; + + if (off + tlen > M_LEN(m)) + return; + } +# endif + for (tlen -= (int)sizeof(*tcp); tlen > 0; ) { + opt = *s; + if (opt == '\0') + break; + else if (opt == TCPOPT_NOP) + ol = 1; + else { + if (tlen < 2) + break; + ol = (int)*(s + 1); + if (ol < 2 || ol > tlen) + break; + } + + for (i = 9, mv = 4; mv >= 0; ) { + op = ipopts + i; + if (opt == (u_char)op->ol_val) { + optmsk |= op->ol_bit; + break; + } + } + tlen -= ol; + s += ol; + } +#endif /* 0 */ +} + + + +/* ------------------------------------------------------------------------ */ +/* Function: frpr_udpcommon */ +/* Returns: void */ +/* Parameters: fin(I) - pointer to packet information */ +/* */ +/* Extract the UDP source and destination ports, if present. If compiled */ +/* with IPFILTER_CKSUM, check to see if the UDP checksum is valid. */ +/* ------------------------------------------------------------------------ */ +static INLINE void frpr_udpcommon(fin) +fr_info_t *fin; +{ + udphdr_t *udp; + fr_ip_t *fi; + + fi = &fin->fin_fi; + fi->fi_flx |= FI_TCPUDP; + + if (!fin->fin_off && (fin->fin_dlen > 3)) { + if (frpr_pullup(fin, sizeof(*udp)) == -1) { + fi->fi_flx |= FI_SHORT; + return; + } + + udp = fin->fin_dp; + + fin->fin_sport = ntohs(udp->uh_sport); + fin->fin_dport = ntohs(udp->uh_dport); + } +} + + +/* ------------------------------------------------------------------------ */ +/* Function: frpr_tcp */ +/* Returns: void */ +/* Parameters: fin(I) - pointer to packet information */ +/* */ +/* IPv4 Only */ +/* Analyse the packet for IPv4/TCP properties. */ +/* ------------------------------------------------------------------------ */ +static INLINE void frpr_tcp(fin) +fr_info_t *fin; +{ + + fr_checkv4sum(fin); + + frpr_short(fin, sizeof(tcphdr_t)); + + frpr_tcpcommon(fin); +} + + +/* ------------------------------------------------------------------------ */ +/* Function: frpr_udp */ +/* Returns: void */ +/* Parameters: fin(I) - pointer to packet information */ +/* */ +/* IPv4 Only */ +/* Analyse the packet for IPv4/UDP properties. */ +/* ------------------------------------------------------------------------ */ +static INLINE void frpr_udp(fin) +fr_info_t *fin; +{ + + fr_checkv4sum(fin); + + frpr_short(fin, sizeof(udphdr_t)); + + frpr_udpcommon(fin); +} + + +/* ------------------------------------------------------------------------ */ +/* Function: frpr_esp */ +/* Returns: void */ +/* Parameters: fin(I) - pointer to packet information */ +/* */ +/* Analyse the packet for ESP properties. */ +/* The minimum length is taken to be the SPI (32bits) plus a tail (32bits) */ +/* even though the newer ESP packets must also have a sequence number that */ +/* is 32bits as well, it is not possible(?) to determine the version from a */ +/* simple packet header. */ +/* ------------------------------------------------------------------------ */ +static INLINE void frpr_esp(fin) +fr_info_t *fin; +{ + if (frpr_pullup(fin, 8) == -1) + return; + + if (fin->fin_v == 4) + frpr_short(fin, 8); +#ifdef USE_INET6 + else if (fin->fin_v == 6) + frpr_short6(fin, sizeof(grehdr_t)); +#endif +} + + +/* ------------------------------------------------------------------------ */ +/* Function: frpr_gre */ +/* Returns: void */ +/* Parameters: fin(I) - pointer to packet information */ +/* */ +/* Analyse the packet for GRE properties. */ +/* ------------------------------------------------------------------------ */ +static INLINE void frpr_gre(fin) +fr_info_t *fin; +{ + grehdr_t *gre; + + if (frpr_pullup(fin, sizeof(grehdr_t)) == -1) + return; + + if (fin->fin_v == 4) + frpr_short(fin, sizeof(grehdr_t)); +#ifdef USE_INET6 + else if (fin->fin_v == 6) + frpr_short6(fin, sizeof(grehdr_t)); +#endif + gre = fin->fin_dp; + if (GRE_REV(gre->gr_flags) == 1) + fin->fin_data[0] = gre->gr_call; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: frpr_ipv4hdr */ +/* Returns: void */ +/* Parameters: fin(I) - pointer to packet information */ +/* */ +/* IPv4 Only */ +/* Analyze the IPv4 header and set fields in the fr_info_t structure. */ +/* Check all options present and flag their presence if any exist. */ +/* ------------------------------------------------------------------------ */ +static INLINE void frpr_ipv4hdr(fin) +fr_info_t *fin; +{ + u_short optmsk = 0, secmsk = 0, auth = 0; + int hlen, ol, mv, p, i; + const struct optlist *op; + u_char *s, opt; + u_short off; + fr_ip_t *fi; + ip_t *ip; + + fi = &fin->fin_fi; + hlen = fin->fin_hlen; + + ip = fin->fin_ip; + p = ip->ip_p; + fi->fi_p = p; + fi->fi_tos = ip->ip_tos; + fin->fin_id = ip->ip_id; + off = ip->ip_off; + + /* Get both TTL and protocol */ + fi->fi_p = ip->ip_p; + fi->fi_ttl = ip->ip_ttl; +#if 0 + (*(((u_short *)fi) + 1)) = (*(((u_short *)ip) + 4)); +#endif + + /* Zero out bits not used in IPv6 address */ + fi->fi_src.i6[1] = 0; + fi->fi_src.i6[2] = 0; + fi->fi_src.i6[3] = 0; + fi->fi_dst.i6[1] = 0; + fi->fi_dst.i6[2] = 0; + fi->fi_dst.i6[3] = 0; + + fi->fi_saddr = ip->ip_src.s_addr; + fi->fi_daddr = ip->ip_dst.s_addr; + + /* + * set packet attribute flags based on the offset and + * calculate the byte offset that it represents. + */ + if ((off & IP_MF) != 0) { + fi->fi_flx |= FI_FRAG; + if (fin->fin_dlen == 0) + fi->fi_flx |= FI_BAD; + } + + off &= IP_MF|IP_OFFMASK; + if (off != 0) { + fi->fi_flx |= FI_FRAG; + off &= IP_OFFMASK; + if (off != 0) { + fin->fin_flx |= FI_FRAGBODY; + off <<= 3; + if (off + fin->fin_dlen > 0xffff) { + fi->fi_flx |= FI_BAD; + } + } + } + fin->fin_off = off; + + /* + * Call per-protocol setup and checking + */ + switch (p) + { + case IPPROTO_UDP : + frpr_udp(fin); + break; + case IPPROTO_TCP : + frpr_tcp(fin); + break; + case IPPROTO_ICMP : + frpr_icmp(fin); + break; + case IPPROTO_ESP : + frpr_esp(fin); + break; + case IPPROTO_GRE : + frpr_gre(fin); + break; + } + + ip = fin->fin_ip; + if (ip == NULL) + return; + + /* + * If it is a standard IP header (no options), set the flag fields + * which relate to options to 0. + */ + if (hlen == sizeof(*ip)) { + fi->fi_optmsk = 0; + fi->fi_secmsk = 0; + fi->fi_auth = 0; + return; + } + + /* + * So the IP header has some IP options attached. Walk the entire + * list of options present with this packet and set flags to indicate + * which ones are here and which ones are not. For the somewhat out + * of date and obscure security classification options, set a flag to + * represent which classification is present. + */ + fi->fi_flx |= FI_OPTIONS; + + for (s = (u_char *)(ip + 1), hlen -= (int)sizeof(*ip); hlen > 0; ) { + opt = *s; + if (opt == '\0') + break; + else if (opt == IPOPT_NOP) + ol = 1; + else { + if (hlen < 2) + break; + ol = (int)*(s + 1); + if (ol < 2 || ol > hlen) + break; + } + for (i = 9, mv = 4; mv >= 0; ) { + op = ipopts + i; + if ((opt == (u_char)op->ol_val) && (ol > 4)) { + optmsk |= op->ol_bit; + if (opt == IPOPT_SECURITY) { + const struct optlist *sp; + u_char sec; + int j, m; + + sec = *(s + 2); /* classification */ + for (j = 3, m = 2; m >= 0; ) { + sp = secopt + j; + if (sec == sp->ol_val) { + secmsk |= sp->ol_bit; + auth = *(s + 3); + auth *= 256; + auth += *(s + 4); + break; + } + if (sec < sp->ol_val) + j -= m; + else + j += m; + m--; + } + } + break; + } + if (opt < op->ol_val) + i -= mv; + else + i += mv; + mv--; + } + hlen -= ol; + s += ol; + } + + /* + * + */ + if (auth && !(auth & 0x0100)) + auth &= 0xff00; + fi->fi_optmsk = optmsk; + fi->fi_secmsk = secmsk; + fi->fi_auth = auth; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_makefrip */ +/* Returns: void */ +/* Parameters: hlen(I) - length of IP packet header */ +/* ip(I) - pointer to the IP header */ +/* fin(IO) - pointer to packet information */ +/* */ +/* Compact the IP header into a structure which contains just the info. */ +/* which is useful for comparing IP headers with and store this information */ +/* in the fr_info_t structure pointer to by fin. At present, it is assumed */ +/* this function will be called with either an IPv4 or IPv6 packet. */ +/* ------------------------------------------------------------------------ */ +int fr_makefrip(hlen, ip, fin) +int hlen; +ip_t *ip; +fr_info_t *fin; +{ + int v; + + fin->fin_nat = NULL; + fin->fin_state = NULL; + fin->fin_depth = 0; + fin->fin_hlen = (u_short)hlen; + fin->fin_ip = ip; + fin->fin_rule = 0xffffffff; + fin->fin_group[0] = -1; + fin->fin_group[1] = '\0'; + fin->fin_dlen = fin->fin_plen - hlen; + fin->fin_dp = (char *)ip + hlen; + + v = fin->fin_v; + if (v == 4) + frpr_ipv4hdr(fin); +#ifdef USE_INET6 + else if (v == 6) + frpr_ipv6hdr(fin); +#endif + if (fin->fin_ip == NULL) + return -1; + return 0; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_portcheck */ +/* Returns: int - 1 == port matched, 0 == port match failed */ +/* Parameters: frp(I) - pointer to port check `expression' */ +/* pop(I) - pointer to port number to evaluate */ +/* */ +/* Perform a comparison of a port number against some other(s), using a */ +/* structure with compare information stored in it. */ +/* ------------------------------------------------------------------------ */ +static INLINE int fr_portcheck(frp, pop) +frpcmp_t *frp; +u_short *pop; +{ + u_short tup, po; + int err = 1; + + tup = *pop; + po = frp->frp_port; + + /* + * Do opposite test to that required and continue if that succeeds. + */ + switch (frp->frp_cmp) + { + case FR_EQUAL : + if (tup != po) /* EQUAL */ + err = 0; + break; + case FR_NEQUAL : + if (tup == po) /* NOTEQUAL */ + err = 0; + break; + case FR_LESST : + if (tup >= po) /* LESSTHAN */ + err = 0; + break; + case FR_GREATERT : + if (tup <= po) /* GREATERTHAN */ + err = 0; + break; + case FR_LESSTE : + if (tup > po) /* LT or EQ */ + err = 0; + break; + case FR_GREATERTE : + if (tup < po) /* GT or EQ */ + err = 0; + break; + case FR_OUTRANGE : + if (tup >= po && tup <= frp->frp_top) /* Out of range */ + err = 0; + break; + case FR_INRANGE : + if (tup <= po || tup >= frp->frp_top) /* In range */ + err = 0; + break; + case FR_INCRANGE : + if (tup < po || tup > frp->frp_top) /* Inclusive range */ + err = 0; + break; + default : + break; + } + return err; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_tcpudpchk */ +/* Returns: int - 1 == protocol matched, 0 == check failed */ +/* Parameters: fin(I) - pointer to packet information */ +/* ft(I) - pointer to structure with comparison data */ +/* */ +/* Compares the current pcket (assuming it is TCP/UDP) information with a */ +/* structure containing information that we want to match against. */ +/* ------------------------------------------------------------------------ */ +int fr_tcpudpchk(fin, ft) +fr_info_t *fin; +frtuc_t *ft; +{ + int err = 1; + + /* + * Both ports should *always* be in the first fragment. + * So far, I cannot find any cases where they can not be. + * + * compare destination ports + */ + if (ft->ftu_dcmp) + err = fr_portcheck(&ft->ftu_dst, &fin->fin_dport); + + /* + * compare source ports + */ + if (err && ft->ftu_scmp) + err = fr_portcheck(&ft->ftu_src, &fin->fin_sport); + + /* + * If we don't have all the TCP/UDP header, then how can we + * expect to do any sort of match on it ? If we were looking for + * TCP flags, then NO match. If not, then match (which should * satisfy the "short" class too). */ - if (err && (fin->fin_fi.fi_p == IPPROTO_TCP)) { - if (fin->fin_fl & FI_SHORT) - return !(ft->ftu_tcpf | ft->ftu_tcpfm); + if (err && (fin->fin_p == IPPROTO_TCP)) { + if (fin->fin_flx & FI_SHORT) + return !(ft->ftu_tcpf | ft->ftu_tcpfm); + /* + * Match the flags ? If not, abort this match. + */ + if (ft->ftu_tcpfm && + ft->ftu_tcpf != (fin->fin_tcpf & ft->ftu_tcpfm)) { + FR_DEBUG(("f. %#x & %#x != %#x\n", fin->fin_tcpf, + ft->ftu_tcpfm, ft->ftu_tcpf)); + err = 0; + } + } + return err; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_ipfcheck */ +/* Returns: int - 0 == match, 1 == no match */ +/* Parameters: fin(I) - pointer to packet information */ +/* fr(I) - pointer to filter rule */ +/* portcmp(I) - flag indicating whether to attempt matching on */ +/* TCP/UDP port data. */ +/* */ +/* Check to see if a packet matches an IPFilter rule. Checks of addresses, */ +/* port numbers, etc, for "standard" IPFilter rules are all orchestrated in */ +/* this function. */ +/* ------------------------------------------------------------------------ */ +static INLINE int fr_ipfcheck(fin, fr, portcmp) +fr_info_t *fin; +frentry_t *fr; +int portcmp; +{ + u_32_t *ld, *lm, *lip; + fripf_t *fri; + fr_ip_t *fi; + int i; + + fi = &fin->fin_fi; + fri = fr->fr_ipf; + lip = (u_32_t *)fi; + lm = (u_32_t *)&fri->fri_mip; + ld = (u_32_t *)&fri->fri_ip; + + /* + * first 32 bits to check coversion: + * IP version, TOS, TTL, protocol + */ + i = ((*lip & *lm) != *ld); + FR_DEBUG(("0. %#08x & %#08x != %#08x\n", + *lip, *lm, *ld)); + if (i) + return 1; + + /* + * Next 32 bits is a constructed bitmask indicating which IP options + * are present (if any) in this packet. + */ + lip++, lm++, ld++; + i |= ((*lip & *lm) != *ld); + FR_DEBUG(("1. %#08x & %#08x != %#08x\n", + *lip, *lm, *ld)); + if (i) + return 1; + + lip++, lm++, ld++; + /* + * Unrolled loops (4 each, for 32 bits) for address checks. + */ + /* + * Check the source address. + */ +#ifdef IPFILTER_LOOKUP + if (fr->fr_satype == FRI_LOOKUP) { + i = (*fr->fr_srcfunc)(fr->fr_srcptr, fi->fi_v, lip); + if (i == -1) + return 1; + lip += 3; + lm += 3; + ld += 3; + } else { +#endif + i = ((*lip & *lm) != *ld); + FR_DEBUG(("2a. %#08x & %#08x != %#08x\n", + *lip, *lm, *ld)); + if (fi->fi_v == 6) { + lip++, lm++, ld++; + i |= ((*lip & *lm) != *ld); + FR_DEBUG(("2b. %#08x & %#08x != %#08x\n", + *lip, *lm, *ld)); + lip++, lm++, ld++; + i |= ((*lip & *lm) != *ld); + FR_DEBUG(("2c. %#08x & %#08x != %#08x\n", + *lip, *lm, *ld)); + lip++, lm++, ld++; + i |= ((*lip & *lm) != *ld); + FR_DEBUG(("2d. %#08x & %#08x != %#08x\n", + *lip, *lm, *ld)); + } else { + lip += 3; + lm += 3; + ld += 3; + } +#ifdef IPFILTER_LOOKUP + } +#endif + i ^= (fr->fr_flags & FR_NOTSRCIP) >> 6; + if (i) + return 1; + + /* + * Check the destination address. + */ + lip++, lm++, ld++; +#ifdef IPFILTER_LOOKUP + if (fr->fr_datype == FRI_LOOKUP) { + i = (*fr->fr_dstfunc)(fr->fr_dstptr, fi->fi_v, lip); + if (i == -1) + return 1; + lip += 3; + lm += 3; + ld += 3; + } else { +#endif + i = ((*lip & *lm) != *ld); + FR_DEBUG(("3a. %#08x & %#08x != %#08x\n", + *lip, *lm, *ld)); + if (fi->fi_v == 6) { + lip++, lm++, ld++; + i |= ((*lip & *lm) != *ld); + FR_DEBUG(("3b. %#08x & %#08x != %#08x\n", + *lip, *lm, *ld)); + lip++, lm++, ld++; + i |= ((*lip & *lm) != *ld); + FR_DEBUG(("3c. %#08x & %#08x != %#08x\n", + *lip, *lm, *ld)); + lip++, lm++, ld++; + i |= ((*lip & *lm) != *ld); + FR_DEBUG(("3d. %#08x & %#08x != %#08x\n", + *lip, *lm, *ld)); + } else { + lip += 3; + lm += 3; + ld += 3; + } +#ifdef IPFILTER_LOOKUP + } +#endif + i ^= (fr->fr_flags & FR_NOTDSTIP) >> 7; + if (i) + return 1; + /* + * IP addresses matched. The next 32bits contains: + * mast of old IP header security & authentication bits. + */ + lip++, lm++, ld++; + i |= ((*lip & *lm) != *ld); + FR_DEBUG(("4. %#08x & %#08x != %#08x\n", + *lip, *lm, *ld)); + + /* + * Next we have 32 bits of packet flags. + */ + lip++, lm++, ld++; + i |= ((*lip & *lm) != *ld); + FR_DEBUG(("5. %#08x & %#08x != %#08x\n", + *lip, *lm, *ld)); + + if (i == 0) { + /* + * If a fragment, then only the first has what we're + * looking for here... + */ + if (portcmp) { + if (!fr_tcpudpchk(fin, &fr->fr_tuc)) + i = 1; + } else { + if (fr->fr_dcmp || fr->fr_scmp || + fr->fr_tcpf || fr->fr_tcpfm) + i = 1; + if (fr->fr_icmpm || fr->fr_icmp) { + if (((fi->fi_p != IPPROTO_ICMP) && + (fi->fi_p != IPPROTO_ICMPV6)) || + fin->fin_off || (fin->fin_dlen < 2)) + i = 1; + else if ((fin->fin_data[0] & fr->fr_icmpm) != + fr->fr_icmp) { + FR_DEBUG(("i. %#x & %#x != %#x\n", + fin->fin_data[0], + fr->fr_icmpm, fr->fr_icmp)); + i = 1; + } + } + } + } + return i; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_scanlist */ +/* Returns: int - result flags of scanning filter list */ +/* Parameters: fin(I) - pointer to packet information */ +/* pass(I) - default result to return for filtering */ +/* */ +/* Check the input/output list of rules for a match to the current packet. */ +/* If a match is found, the value of fr_flags from the rule becomes the */ +/* return value and fin->fin_fr points to the matched rule. */ +/* */ +/* This function may be called recusively upto 16 times (limit inbuilt.) */ +/* When unwinding, it should finish up with fin_depth as 0. */ +/* */ +/* Could be per interface, but this gets real nasty when you don't have, */ +/* or can't easily change, the kernel source code to . */ +/* ------------------------------------------------------------------------ */ +int fr_scanlist(fin, pass) +fr_info_t *fin; +u_32_t pass; +{ + int rulen, portcmp, off, logged, skip; + struct frentry *fr, *fnext; + u_32_t passt; + + /* + * Do not allow nesting deeper than 16 levels. + */ + if (fin->fin_depth >= 16) + return pass; + + fr = fin->fin_fr; + + /* + * If there are no rules in this list, return now. + */ + if (fr == NULL) + return pass; + + skip = 0; + logged = 0; + portcmp = 0; + fin->fin_depth++; + fin->fin_fr = NULL; + off = fin->fin_off; + + if ((fin->fin_flx & FI_TCPUDP) && (fin->fin_dlen > 3) && !off) + portcmp = 1; + + for (rulen = 0; fr; fr = fnext, rulen++) { + fnext = fr->fr_next; + if (skip != 0) { + FR_VERBOSE(("%d (%#x)\n", skip, fr->fr_flags)); + skip--; + continue; + } + + /* + * In all checks below, a null (zero) value in the + * filter struture is taken to mean a wildcard. + * + * check that we are working for the right interface + */ +#ifdef _KERNEL + if (fr->fr_ifa && fr->fr_ifa != fin->fin_ifp) + continue; +#else + if (opts & (OPT_VERBOSE|OPT_DEBUG)) + printf("\n"); + FR_VERBOSE(("%c", FR_ISSKIP(pass) ? 's' : + FR_ISPASS(pass) ? 'p' : + FR_ISACCOUNT(pass) ? 'A' : + FR_ISAUTH(pass) ? 'a' : + (pass & FR_NOMATCH) ? 'n' :'b')); + if (fr->fr_ifa && fr->fr_ifa != fin->fin_ifp) + continue; + FR_VERBOSE((":i")); +#endif + + switch (fr->fr_type) + { + case FR_T_IPF : + case FR_T_IPF|FR_T_BUILTIN : + if (fr_ipfcheck(fin, fr, portcmp)) + continue; + break; +#if defined(IPFILTER_BPF) + case FR_T_BPFOPC : + case FR_T_BPFOPC|FR_T_BUILTIN : + { + u_char *mc; + int wlen; + + if (*fin->fin_mp == NULL) + continue; + if (fin->fin_v != fr->fr_v) + continue; + mc = (u_char *)fin->fin_m; + wlen = fin->fin_dlen + fin->fin_hlen; + if (!bpf_filter(fr->fr_data, mc, wlen, 0)) + continue; + break; + } +#endif + case FR_T_CALLFUNC|FR_T_BUILTIN : + { + frentry_t *f; + + f = (*fr->fr_func)(fin, &pass); + if (f != NULL) + fr = f; + else + continue; + break; + } + default : + break; + } + + if ((fin->fin_out == 0) && (fr->fr_nattag.ipt_num[0] != 0)) { + if (fin->fin_nattag == NULL) + continue; + if (fr_matchtag(&fr->fr_nattag, fin->fin_nattag) == 0) + continue; + } + FR_VERBOSE(("=%s.%d *", fr->fr_group, rulen)); + + passt = fr->fr_flags; + + /* + * Allowing a rule with the "keep state" flag set to match + * packets that have been tagged "out of window" by the TCP + * state tracking is foolish as the attempt to add a new + * state entry to the table will fail. + */ + if ((passt & FR_KEEPSTATE) && (fin->fin_flx & FI_OOW)) + continue; + + /* + * If the rule is a "call now" rule, then call the function + * in the rule, if it exists and use the results from that. + * If the function pointer is bad, just make like we ignore + * it, except for increasing the hit counter. + */ + if ((passt & FR_CALLNOW) != 0) { + ATOMIC_INC64(fr->fr_hits); + if ((fr->fr_func != NULL) && + (fr->fr_func != (ipfunc_t)-1)) { + frentry_t *frs; + + frs = fin->fin_fr; + fin->fin_fr = fr; + fr = (*fr->fr_func)(fin, &passt); + if (fr == NULL) { + fin->fin_fr = frs; + continue; + } + passt = fr->fr_flags; + fin->fin_fr = fr; + } + } else { + fin->fin_fr = fr; + } + +#ifdef IPFILTER_LOG + /* + * Just log this packet... + */ + if ((passt & FR_LOGMASK) == FR_LOG) { + if (ipflog(fin, passt) == -1) { + if (passt & FR_LOGORBLOCK) { + passt &= ~FR_CMDMASK; + passt |= FR_BLOCK|FR_QUICK; + } + ATOMIC_INCL(frstats[fin->fin_out].fr_skip); + } + ATOMIC_INCL(frstats[fin->fin_out].fr_pkl); + logged = 1; + } +#endif /* IPFILTER_LOG */ + fr->fr_bytes += (U_QUAD_T)fin->fin_plen; + if (FR_ISSKIP(passt)) + skip = fr->fr_arg; + else if ((passt & FR_LOGMASK) != FR_LOG) + pass = passt; + if (passt & (FR_RETICMP|FR_FAKEICMP)) + fin->fin_icode = fr->fr_icode; + FR_DEBUG(("pass %#x\n", pass)); + ATOMIC_INC64(fr->fr_hits); + fin->fin_rule = rulen; + (void) strncpy(fin->fin_group, fr->fr_group, FR_GROUPLEN); + if (fr->fr_grp != NULL) { + fin->fin_fr = *fr->fr_grp; + pass = fr_scanlist(fin, pass); + if (fin->fin_fr == NULL) { + fin->fin_rule = rulen; + (void) strncpy(fin->fin_group, fr->fr_group, + FR_GROUPLEN); + fin->fin_fr = fr; + } + if (fin->fin_flx & FI_DONTCACHE) + logged = 1; + } + if (pass & FR_QUICK) + break; + } + if (logged) + fin->fin_flx |= FI_DONTCACHE; + fin->fin_depth--; + return pass; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_acctpkt */ +/* Returns: frentry_t* - always returns NULL */ +/* Parameters: fin(I) - pointer to packet information */ +/* passp(IO) - pointer to current/new filter decision (unused) */ +/* */ +/* Checks a packet against accounting rules, if there are any for the given */ +/* IP protocol version. */ +/* */ +/* N.B.: this function returns NULL to match the prototype used by other */ +/* functions called from the IPFilter "mainline" in fr_check(). */ +/* ------------------------------------------------------------------------ */ +frentry_t *fr_acctpkt(fin, passp) +fr_info_t *fin; +u_32_t *passp; +{ + char group[FR_GROUPLEN]; + frentry_t *fr, *frsave; + u_32_t pass, rulen; + + passp = passp; +#ifdef USE_INET6 + if (fin->fin_v == 6) + fr = ipacct6[fin->fin_out][fr_active]; + else +#endif + fr = ipacct[fin->fin_out][fr_active]; + + if (fr != NULL) { + frsave = fin->fin_fr; + bcopy(fin->fin_group, group, FR_GROUPLEN); + rulen = fin->fin_rule; + fin->fin_fr = fr; + pass = fr_scanlist(fin, FR_NOMATCH); + if (FR_ISACCOUNT(pass)) { + ATOMIC_INCL(frstats[0].fr_acct); + } + fin->fin_fr = frsave; + bcopy(group, fin->fin_group, FR_GROUPLEN); + fin->fin_rule = rulen; + } + return NULL; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_firewall */ +/* Returns: frentry_t* - returns pointer to matched rule, if no matches */ +/* were found, returns NULL. */ +/* Parameters: fin(I) - pointer to packet information */ +/* passp(IO) - pointer to current/new filter decision (unused) */ +/* */ +/* Applies an appropriate set of firewall rules to the packet, to see if */ +/* there are any matches. The first check is to see if a match can be seen */ +/* in the cache. If not, then search an appropriate list of rules. Once a */ +/* matching rule is found, take any appropriate actions as defined by the */ +/* rule - except logging. */ +/* ------------------------------------------------------------------------ */ +static frentry_t *fr_firewall(fin, passp) +fr_info_t *fin; +u_32_t *passp; +{ + frentry_t *fr; + fr_info_t *fc; + u_32_t pass; + int out; + + out = fin->fin_out; + pass = *passp; + + /* + * If a packet is found in the auth table, then skip checking + * the access lists for permission but we do need to consider + * the result as if it were from the ACL's. + */ + fc = &frcache[out][CACHE_HASH(fin)]; + if (!bcmp((char *)fin, (char *)fc, FI_CSIZE)) { + /* + * copy cached data so we can unlock the mutex + * earlier. + */ + bcopy((char *)fc, (char *)fin, FI_COPYSIZE); + ATOMIC_INCL(frstats[out].fr_chit); + if ((fr = fin->fin_fr) != NULL) { + ATOMIC_INC64(fr->fr_hits); + pass = fr->fr_flags; + } + } else { +#ifdef USE_INET6 + if (fin->fin_v == 6) + fin->fin_fr = ipfilter6[out][fr_active]; + else +#endif + fin->fin_fr = ipfilter[out][fr_active]; + if (fin->fin_fr != NULL) + pass = fr_scanlist(fin, fr_pass); + if (((pass & FR_KEEPSTATE) == 0) && + ((fin->fin_flx & FI_DONTCACHE) == 0)) + bcopy((char *)fin, (char *)fc, FI_COPYSIZE); + if ((pass & FR_NOMATCH)) { + ATOMIC_INCL(frstats[out].fr_nom); + } + fr = fin->fin_fr; + } + + /* + * Apply packets per second rate-limiting to a rule as required. + */ + if ((fr != NULL) && (fr->fr_pps != 0) && + !ppsratecheck(&fr->fr_lastpkt, &fr->fr_curpps, fr->fr_pps)) { + pass &= ~(FR_CMDMASK|FR_DUP|FR_RETICMP|FR_RETRST); + pass |= FR_BLOCK; + ATOMIC_INCL(frstats[out].fr_ppshit); + } + + /* + * If we fail to add a packet to the authorization queue, then we + * drop the packet later. However, if it was added then pretend + * we've dropped it already. + */ + if (FR_ISAUTH(pass)) { + if (fr_newauth(fin->fin_m, fin) != 0) { +#ifdef _KERNEL + fin->fin_m = *fin->fin_mp = NULL; +#else + ; +#endif + fin->fin_error = 0; + } else + fin->fin_error = ENOSPC; + } + + if ((fr != NULL) && (fr->fr_func != NULL) && + (fr->fr_func != (ipfunc_t)-1) && !(pass & FR_CALLNOW)) + (void) (*fr->fr_func)(fin, &pass); + + /* + * If a rule is a pre-auth rule, check again in the list of rules + * loaded for authenticated use. It does not particulary matter + * if this search fails because a "preauth" result, from a rule, + * is treated as "not a pass", hence the packet is blocked. + */ + if (FR_ISPREAUTH(pass)) { + if ((fin->fin_fr = ipauth) != NULL) + pass = fr_scanlist(fin, fr_pass); + } + + /* + * If the rule has "keep frag" and the packet is actually a fragment, + * then create a fragment state entry. + */ + if ((pass & (FR_KEEPFRAG|FR_KEEPSTATE)) == FR_KEEPFRAG) { + if (fin->fin_flx & FI_FRAG) { + if (fr_newfrag(fin, pass) == -1) { + ATOMIC_INCL(frstats[out].fr_bnfr); + } else { + ATOMIC_INCL(frstats[out].fr_nfr); + } + } else { + ATOMIC_INCL(frstats[out].fr_cfr); + } + } + + /* + * Finally, if we've asked to track state for this packet, set it up. + */ + if ((pass & FR_KEEPSTATE) && !(fin->fin_flx & FI_STATE)) { + if (fr_addstate(fin, NULL, 0) != NULL) { + ATOMIC_INCL(frstats[out].fr_ads); + } else { + ATOMIC_INCL(frstats[out].fr_bads); + if (FR_ISPASS(pass)) { + pass &= ~FR_CMDMASK; + pass |= FR_BLOCK; + } + } + } + + fr = fin->fin_fr; + + if (passp != NULL) + *passp = pass; + + return fr; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_check */ +/* Returns: int - 0 == packet allowed through, */ +/* User space: */ +/* -1 == packet blocked */ +/* 1 == packet not matched */ +/* -2 == requires authantication */ +/* Kernel: */ +/* > 0 == filter error # for packet */ +/* Parameters: ip(I) - pointer to start of IPv4/6 packet */ +/* hlen(I) - length of header */ +/* ifp(I) - pointer to interface this packet is on */ +/* out(I) - 0 == packet going in, 1 == packet going out */ +/* mp(IO) - pointer to caller's buffer pointer that holds this */ +/* IP packet. */ +/* Solaris & HP-UX ONLY : */ +/* qpi(I) - pointer to STREAMS queue information for this */ +/* interface & direction. */ +/* */ +/* fr_check() is the master function for all IPFilter packet processing. */ +/* It orchestrates: Network Address Translation (NAT), checking for packet */ +/* authorisation (or pre-authorisation), presence of related state info., */ +/* generating log entries, IP packet accounting, routing of packets as */ +/* directed by firewall rules and of course whether or not to allow the */ +/* packet to be further processed by the kernel. */ +/* */ +/* For packets blocked, the contents of "mp" will be NULL'd and the buffer */ +/* freed. Packets passed may be returned with the pointer pointed to by */ +/* by "mp" changed to a new buffer. */ +/* ------------------------------------------------------------------------ */ +int fr_check(ip, hlen, ifp, out +#if defined(_KERNEL) && defined(MENTAT) +, qif, mp) +void *qif; +#else +, mp) +#endif +mb_t **mp; +ip_t *ip; +int hlen; +void *ifp; +int out; +{ + /* + * The above really sucks, but short of writing a diff + */ + fr_info_t frinfo; + fr_info_t *fin = &frinfo; + u_32_t pass = fr_pass; + frentry_t *fr = NULL; + int v = IP_V(ip); + mb_t *mc = NULL; + mb_t *m; +#ifdef USE_INET6 + ip6_t *ip6; +#endif + + /* + * The first part of fr_check() deals with making sure that what goes + * into the filtering engine makes some sense. Information about the + * the packet is distilled, collected into a fr_info_t structure and + * the an attempt to ensure the buffer the packet is in is big enough + * to hold all the required packet headers. + */ +#ifdef _KERNEL +# ifdef MENTAT + qpktinfo_t *qpi = qif; + + if ((u_int)ip & 0x3) + return 2; +# endif + + READ_ENTER(&ipf_global); + + if (fr_running <= 0) { + RWLOCK_EXIT(&ipf_global); + return 0; + } + + bzero((char *)fin, sizeof(*fin)); + +# ifdef MENTAT + if (qpi->qpi_flags & QF_GROUP) + fin->fin_flx |= FI_MBCAST; + m = qpi->qpi_m; + fin->fin_qfm = m; + fin->fin_qpi = qpi; +# else /* MENTAT */ + + m = *mp; + +# if defined(M_MCAST) + if ((m->m_flags & M_MCAST) != 0) + fin->fin_flx |= FI_MBCAST|FI_MULTICAST; +# endif +# if defined(M_BCAST) + if ((m->m_flags & M_BCAST) != 0) + fin->fin_flx |= FI_MBCAST|FI_BROADCAST; +# endif +# ifdef M_CANFASTFWD + /* + * XXX For now, IP Filter and fast-forwarding of cached flows + * XXX are mutually exclusive. Eventually, IP Filter should + * XXX get a "can-fast-forward" filter rule. + */ + m->m_flags &= ~M_CANFASTFWD; +# endif /* M_CANFASTFWD */ +# ifdef CSUM_DELAY_DATA + /* + * disable delayed checksums. + */ + if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { + in_delayed_cksum(m); + m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; + } +# endif /* CSUM_DELAY_DATA */ +# endif /* MENTAT */ +#else + READ_ENTER(&ipf_global); + + bzero((char *)fin, sizeof(*fin)); + m = *mp; +#endif /* _KERNEL */ + + fin->fin_v = v; + fin->fin_m = m; + fin->fin_ip = ip; + fin->fin_mp = mp; + fin->fin_out = out; + fin->fin_ifp = ifp; + fin->fin_error = ENETUNREACH; + fin->fin_hlen = (u_short )hlen; + fin->fin_dp = (char *)ip + hlen; + + fin->fin_ipoff = (char *)ip - MTOD(m, char *); + +#ifdef USE_INET6 + if (v == 6) { + ATOMIC_INCL(frstats[out].fr_ipv6); + /* + * Jumbo grams are quite likely too big for internal buffer + * structures to handle comfortably, for now, so just drop + * them. + */ + ip6 = (ip6_t *)ip; + fin->fin_plen = ntohs(ip6->ip6_plen); + if (fin->fin_plen == 0) { + pass = FR_BLOCK|FR_NOMATCH; + goto filtered; + } + fin->fin_plen += sizeof(ip6_t); + } else +#endif + { +#if (OpenBSD >= 200311) && defined(_KERNEL) + ip->ip_len = ntohs(ip->ip_len); + ip->ip_off = ntohs(ip->ip_off); +#endif + fin->fin_plen = ip->ip_len; + } + + if (fr_makefrip(hlen, ip, fin) == -1) + goto finished; + + /* + * For at least IPv6 packets, if a m_pullup() fails then this pointer + * becomes NULL and so we have no packet to free. + */ + if (*fin->fin_mp == NULL) + goto finished; + + if (!out) { + if (v == 4) { +#ifdef _KERNEL + if (fr_chksrc && !fr_verifysrc(fin)) { + ATOMIC_INCL(frstats[0].fr_badsrc); + fin->fin_flx |= FI_BADSRC; + } +#endif + if (fin->fin_ip->ip_ttl < fr_minttl) { + ATOMIC_INCL(frstats[0].fr_badttl); + fin->fin_flx |= FI_LOWTTL; + } + } +#ifdef USE_INET6 + else if (v == 6) { + ip6 = (ip6_t *)ip; + if (ip6->ip6_hlim < fr_minttl) { + ATOMIC_INCL(frstats[0].fr_badttl); + fin->fin_flx |= FI_LOWTTL; + } + } +#endif + } + + if (fin->fin_flx & FI_SHORT) { + ATOMIC_INCL(frstats[out].fr_short); + } + + READ_ENTER(&ipf_mutex); + + /* + * Check auth now. This, combined with the check below to see if apass + * is 0 is to ensure that we don't count the packet twice, which can + * otherwise occur when we reprocess it. As it is, we only count it + * after it has no auth. table matchup. This also stops NAT from + * occuring until after the packet has been auth'd. + */ + fr = fr_checkauth(fin, &pass); + if (!out) { + if (fr_checknatin(fin, &pass) == -1) { + RWLOCK_EXIT(&ipf_mutex); + goto finished; + } + } + if (!out) + (void) fr_acctpkt(fin, NULL); + + if (fr == NULL) + if ((fin->fin_flx & (FI_FRAG|FI_BAD)) == FI_FRAG) + fr = fr_knownfrag(fin, &pass); + if (fr == NULL) + fr = fr_checkstate(fin, &pass); + + if ((pass & FR_NOMATCH) || (fr == NULL)) + fr = fr_firewall(fin, &pass); + + fin->fin_fr = fr; + + /* + * Only count/translate packets which will be passed on, out the + * interface. + */ + if (out && FR_ISPASS(pass)) { + (void) fr_acctpkt(fin, NULL); + + if (fr_checknatout(fin, &pass) == -1) { + RWLOCK_EXIT(&ipf_mutex); + goto finished; + } else if ((fr_update_ipid != 0) && (v == 4)) { + if (fr_updateipid(fin) == -1) { + ATOMIC_INCL(frstats[1].fr_ipud); + pass &= ~FR_CMDMASK; + pass |= FR_BLOCK; + } else { + ATOMIC_INCL(frstats[0].fr_ipud); + } + } + } + +#ifdef IPFILTER_LOG + if ((fr_flags & FF_LOGGING) || (pass & FR_LOGMASK)) { + (void) fr_dolog(fin, &pass); + } +#endif + + if (fin->fin_state != NULL) + fr_statederef(fin, (ipstate_t **)&fin->fin_state); + + if (fin->fin_nat != NULL) + fr_natderef((nat_t **)&fin->fin_nat); + + /* + * Only allow FR_DUP to work if a rule matched - it makes no sense to + * set FR_DUP as a "default" as there are no instructions about where + * to send the packet. Use fin_m here because it may have changed + * (without an update of 'm') in prior processing. + */ + if ((fr != NULL) && (pass & FR_DUP)) { + mc = M_DUPLICATE(fin->fin_m); + } + + if (pass & (FR_RETRST|FR_RETICMP)) { + /* + * Should we return an ICMP packet to indicate error + * status passing through the packet filter ? + * WARNING: ICMP error packets AND TCP RST packets should + * ONLY be sent in repsonse to incoming packets. Sending them + * in response to outbound packets can result in a panic on + * some operating systems. + */ + if (!out) { + if (pass & FR_RETICMP) { + int dst; + + if ((pass & FR_RETMASK) == FR_FAKEICMP) + dst = 1; + else + dst = 0; + (void) fr_send_icmp_err(ICMP_UNREACH, fin, dst); + ATOMIC_INCL(frstats[0].fr_ret); + } else if (((pass & FR_RETMASK) == FR_RETRST) && + !(fin->fin_flx & FI_SHORT)) { + if (fr_send_reset(fin) == 0) { + ATOMIC_INCL(frstats[1].fr_ret); + } + } + } else { + if (pass & FR_RETRST) + fin->fin_error = ECONNRESET; + } + } + + /* + * If we didn't drop off the bottom of the list of rules (and thus + * the 'current' rule fr is not NULL), then we may have some extra + * instructions about what to do with a packet. + * Once we're finished return to our caller, freeing the packet if + * we are dropping it (* BSD ONLY *). + * Reassign m from fin_m as we may have a new buffer, now. + */ +#if defined(USE_INET6) || (defined(__sgi) && defined(_KERNEL)) +filtered: +#endif + m = fin->fin_m; + + if (fr != NULL) { + frdest_t *fdp; + + fdp = &fr->fr_tifs[fin->fin_rev]; + + if (!out && (pass & FR_FASTROUTE)) { + /* + * For fastroute rule, no destioation interface defined + * so pass NULL as the frdest_t parameter + */ + (void) fr_fastroute(m, mp, fin, NULL); + m = *mp = NULL; + } else if ((fdp->fd_ifp != NULL) && + (fdp->fd_ifp != (struct ifnet *)-1)) { + /* this is for to rules: */ + (void) fr_fastroute(m, mp, fin, fdp); + m = *mp = NULL; + } + + /* + * Generate a duplicated packet. + */ + if (mc != NULL) + (void) fr_fastroute(mc, &mc, fin, &fr->fr_dif); + } + + /* + * This late because the likes of fr_fastroute() use fin_fr. + */ + RWLOCK_EXIT(&ipf_mutex); + +finished: + if (!FR_ISPASS(pass)) { + ATOMIC_INCL(frstats[out].fr_block); + if (*mp != NULL) { + FREE_MB_T(*mp); + m = *mp = NULL; + } + } else { + ATOMIC_INCL(frstats[out].fr_pass); +#if defined(_KERNEL) && defined(__sgi) + if ((fin->fin_hbuf != NULL) && + (mtod(fin->fin_m, struct ip *) != fin->fin_ip)) { + COPYBACK(m, 0, fin->fin_plen, fin->fin_hbuf); + } +#endif + } + + RWLOCK_EXIT(&ipf_global); +#ifdef _KERNEL +# if OpenBSD >= 200311 + if (FR_ISPASS(pass) && (v == 4)) { + ip = fin->fin_ip; + ip->ip_len = ntohs(ip->ip_len); + ip->ip_off = ntohs(ip->ip_off); + } +# endif + return (FR_ISPASS(pass)) ? 0 : fin->fin_error; +#else /* _KERNEL */ + FR_VERBOSE(("fin_flx %#x pass %#x ", fin->fin_flx, pass)); + if ((pass & FR_NOMATCH) != 0) + return 1; + + if ((pass & FR_RETMASK) != 0) + switch (pass & FR_RETMASK) + { + case FR_RETRST : + return 3; + case FR_RETICMP : + return 4; + case FR_FAKEICMP : + return 5; + } + + switch (pass & FR_CMDMASK) + { + case FR_PASS : + return 0; + case FR_BLOCK : + return -1; + case FR_AUTH : + return -2; + case FR_ACCOUNT : + return -3; + case FR_PREAUTH : + return -4; + } + return 2; +#endif /* _KERNEL */ +} + + +#ifdef IPFILTER_LOG +/* ------------------------------------------------------------------------ */ +/* Function: fr_dolog */ +/* Returns: frentry_t* - returns contents of fin_fr (no change made) */ +/* Parameters: fin(I) - pointer to packet information */ +/* passp(IO) - pointer to current/new filter decision (unused) */ +/* */ +/* Checks flags set to see how a packet should be logged, if it is to be */ +/* logged. Adjust statistics based on its success or not. */ +/* ------------------------------------------------------------------------ */ +frentry_t *fr_dolog(fin, passp) +fr_info_t *fin; +u_32_t *passp; +{ + u_32_t pass; + int out; + + out = fin->fin_out; + pass = *passp; + + if ((fr_flags & FF_LOGNOMATCH) && (pass & FR_NOMATCH)) { + pass |= FF_LOGNOMATCH; + ATOMIC_INCL(frstats[out].fr_npkl); + goto logit; + } else if (((pass & FR_LOGMASK) == FR_LOGP) || + (FR_ISPASS(pass) && (fr_flags & FF_LOGPASS))) { + if ((pass & FR_LOGMASK) != FR_LOGP) + pass |= FF_LOGPASS; + ATOMIC_INCL(frstats[out].fr_ppkl); + goto logit; + } else if (((pass & FR_LOGMASK) == FR_LOGB) || + (FR_ISBLOCK(pass) && (fr_flags & FF_LOGBLOCK))) { + if ((pass & FR_LOGMASK) != FR_LOGB) + pass |= FF_LOGBLOCK; + ATOMIC_INCL(frstats[out].fr_bpkl); +logit: + if (ipflog(fin, pass) == -1) { + ATOMIC_INCL(frstats[out].fr_skip); + + /* + * If the "or-block" option has been used then + * block the packet if we failed to log it. + */ + if ((pass & FR_LOGORBLOCK) && + FR_ISPASS(pass)) { + pass &= ~FR_CMDMASK; + pass |= FR_BLOCK; + } + } + *passp = pass; + } + + return fin->fin_fr; +} +#endif /* IPFILTER_LOG */ + + +/* ------------------------------------------------------------------------ */ +/* Function: ipf_cksum */ +/* Returns: u_short - IP header checksum */ +/* Parameters: addr(I) - pointer to start of buffer to checksum */ +/* len(I) - length of buffer in bytes */ +/* */ +/* Calculate the two's complement 16 bit checksum of the buffer passed. */ +/* */ +/* N.B.: addr should be 16bit aligned. */ +/* ------------------------------------------------------------------------ */ +u_short ipf_cksum(addr, len) +u_short *addr; +int len; +{ + u_32_t sum = 0; + + for (sum = 0; len > 1; len -= 2) + sum += *addr++; + + /* mop up an odd byte, if necessary */ + if (len == 1) + sum += *(u_char *)addr; + + /* + * add back carry outs from top 16 bits to low 16 bits + */ + sum = (sum >> 16) + (sum & 0xffff); /* add hi 16 to low 16 */ + sum += (sum >> 16); /* add carry */ + return (u_short)(~sum); +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_cksum */ +/* Returns: u_short - layer 4 checksum */ +/* Parameters: m(I ) - pointer to buffer holding packet */ +/* ip(I) - pointer to IP header */ +/* l4proto(I) - protocol to caclulate checksum for */ +/* l4hdr(I) - pointer to layer 4 header */ +/* */ +/* Calculates the TCP checksum for the packet held in "m", using the data */ +/* in the IP header "ip" to seed it. */ +/* */ +/* NB: This function assumes we've pullup'd enough for all of the IP header */ +/* and the TCP header. We also assume that data blocks aren't allocated in */ +/* odd sizes. */ +/* */ +/* Expects ip_len to be in host byte order when called. */ +/* ------------------------------------------------------------------------ */ +u_short fr_cksum(m, ip, l4proto, l4hdr) +mb_t *m; +ip_t *ip; +int l4proto; +void *l4hdr; +{ + u_short *sp, slen, sumsave, l4hlen, *csump; + u_int sum, sum2; + int hlen; +#ifdef USE_INET6 + ip6_t *ip6; +#endif + + csump = NULL; + sumsave = 0; + l4hlen = 0; + sp = NULL; + slen = 0; + hlen = 0; + sum = 0; + + /* + * Add up IP Header portion + */ +#ifdef USE_INET6 + if (IP_V(ip) == 4) { +#endif + hlen = IP_HL(ip) << 2; + slen = ip->ip_len - hlen; + sum = htons((u_short)l4proto); + sum += htons(slen); + sp = (u_short *)&ip->ip_src; + sum += *sp++; /* ip_src */ + sum += *sp++; + sum += *sp++; /* ip_dst */ + sum += *sp++; +#ifdef USE_INET6 + } else if (IP_V(ip) == 6) { + ip6 = (ip6_t *)ip; + hlen = sizeof(*ip6); + slen = ntohs(ip6->ip6_plen); + sum = htons((u_short)l4proto); + sum += htons(slen); + sp = (u_short *)&ip6->ip6_src; + sum += *sp++; /* ip6_src */ + sum += *sp++; + sum += *sp++; + sum += *sp++; + sum += *sp++; + sum += *sp++; + sum += *sp++; + sum += *sp++; + sum += *sp++; /* ip6_dst */ + sum += *sp++; + sum += *sp++; + sum += *sp++; + sum += *sp++; + sum += *sp++; + sum += *sp++; + sum += *sp++; + } +#endif + + switch (l4proto) + { + case IPPROTO_UDP : + csump = &((udphdr_t *)l4hdr)->uh_sum; + l4hlen = sizeof(udphdr_t); + break; + + case IPPROTO_TCP : + csump = &((tcphdr_t *)l4hdr)->th_sum; + l4hlen = sizeof(tcphdr_t); + break; + case IPPROTO_ICMP : + csump = &((icmphdr_t *)l4hdr)->icmp_cksum; + l4hlen = 4; + sum = 0; + break; + default : + break; + } + + if (csump != NULL) { + sumsave = *csump; + *csump = 0; + } + + l4hlen = l4hlen; /* LINT */ + +#ifdef _KERNEL +# ifdef MENTAT + { + void *rp = m->b_rptr; + + if ((unsigned char *)ip > m->b_rptr && (unsigned char *)ip < m->b_wptr) + m->b_rptr = (u_char *)ip; + sum2 = ip_cksum(m, hlen, sum); /* hlen == offset */ + m->b_rptr = rp; + sum2 = (u_short)(~sum2 & 0xffff); + } +# else /* MENTAT */ +# if defined(BSD) || defined(sun) +# if BSD >= 199103 + m->m_data += hlen; +# else + m->m_off += hlen; +# endif + m->m_len -= hlen; + sum2 = in_cksum(m, slen); + m->m_len += hlen; +# if BSD >= 199103 + m->m_data -= hlen; +# else + m->m_off -= hlen; +# endif + /* + * Both sum and sum2 are partial sums, so combine them together. + */ + sum += ~sum2 & 0xffff; + while (sum > 0xffff) + sum = (sum & 0xffff) + (sum >> 16); + sum2 = ~sum & 0xffff; +# else /* defined(BSD) || defined(sun) */ +{ + union { + u_char c[2]; + u_short s; + } bytes; + u_short len = ip->ip_len; +# if defined(__sgi) + int add; +# endif + + /* + * Add up IP Header portion + */ + if (sp != (u_short *)l4hdr) + sp = (u_short *)l4hdr; + + switch (l4proto) + { + case IPPROTO_UDP : + sum += *sp++; /* sport */ + sum += *sp++; /* dport */ + sum += *sp++; /* udp length */ + sum += *sp++; /* checksum */ + break; + + case IPPROTO_TCP : + sum += *sp++; /* sport */ + sum += *sp++; /* dport */ + sum += *sp++; /* seq */ + sum += *sp++; + sum += *sp++; /* ack */ + sum += *sp++; + sum += *sp++; /* off */ + sum += *sp++; /* win */ + sum += *sp++; /* checksum */ + sum += *sp++; /* urp */ + break; + case IPPROTO_ICMP : + sum = *sp++; /* type/code */ + sum += *sp++; /* checksum */ + break; + } + +# ifdef __sgi + /* + * In case we had to copy the IP & TCP header out of mbufs, + * skip over the mbuf bits which are the header + */ + if ((caddr_t)ip != mtod(m, caddr_t)) { + hlen = (caddr_t)sp - (caddr_t)ip; + while (hlen) { + add = MIN(hlen, m->m_len); + sp = (u_short *)(mtod(m, caddr_t) + add); + hlen -= add; + if (add == m->m_len) { + m = m->m_next; + if (!hlen) { + if (!m) + break; + sp = mtod(m, u_short *); + } + PANIC((!m),("fr_cksum(1): not enough data")); + } + } + } +# endif + + len -= (l4hlen + hlen); + if (len <= 0) + goto nodata; + + while (len > 1) { + if (((caddr_t)sp - mtod(m, caddr_t)) >= m->m_len) { + m = m->m_next; + PANIC((!m),("fr_cksum(2): not enough data")); + sp = mtod(m, u_short *); + } + if (((caddr_t)(sp + 1) - mtod(m, caddr_t)) > m->m_len) { + bytes.c[0] = *(u_char *)sp; + m = m->m_next; + PANIC((!m),("fr_cksum(3): not enough data")); + sp = mtod(m, u_short *); + bytes.c[1] = *(u_char *)sp; + sum += bytes.s; + sp = (u_short *)((u_char *)sp + 1); + } + if ((u_long)sp & 1) { + bcopy((char *)sp++, (char *)&bytes.s, sizeof(bytes.s)); + sum += bytes.s; + } else + sum += *sp++; + len -= 2; + } + + if (len != 0) + sum += ntohs(*(u_char *)sp << 8); +nodata: + while (sum > 0xffff) + sum = (sum & 0xffff) + (sum >> 16); + sum2 = (u_short)(~sum & 0xffff); +} +# endif /* defined(BSD) || defined(sun) */ +# endif /* MENTAT */ +#else /* _KERNEL */ + for (; slen > 1; slen -= 2) + sum += *sp++; + if (slen) + sum += ntohs(*(u_char *)sp << 8); + while (sum > 0xffff) + sum = (sum & 0xffff) + (sum >> 16); + sum2 = (u_short)(~sum & 0xffff); +#endif /* _KERNEL */ + if (csump != NULL) + *csump = sumsave; + return sum2; +} + + +#if defined(_KERNEL) && ( ((BSD < 199103) && !defined(MENTAT)) || \ + defined(__sgi) ) && !defined(linux) +/* + * Copyright (c) 1982, 1986, 1988, 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)uipc_mbuf.c 8.2 (Berkeley) 1/4/94 + * Id: fil.c,v 2.243.2.57 2005/03/28 10:47:50 darrenr Exp + */ +/* + * Copy data from an mbuf chain starting "off" bytes from the beginning, + * continuing for "len" bytes, into the indicated buffer. + */ +void +m_copydata(m, off, len, cp) + mb_t *m; + int off; + int len; + caddr_t cp; +{ + unsigned count; + + if (off < 0 || len < 0) + panic("m_copydata"); + while (off > 0) { + if (m == 0) + panic("m_copydata"); + if (off < m->m_len) + break; + off -= m->m_len; + m = m->m_next; + } + while (len > 0) { + if (m == 0) + panic("m_copydata"); + count = MIN(m->m_len - off, len); + bcopy(mtod(m, caddr_t) + off, cp, count); + len -= count; + cp += count; + off = 0; + m = m->m_next; + } +} + + +/* + * Copy data from a buffer back into the indicated mbuf chain, + * starting "off" bytes from the beginning, extending the mbuf + * chain if necessary. + */ +void +m_copyback(m0, off, len, cp) + struct mbuf *m0; + int off; + int len; + caddr_t cp; +{ + int mlen; + struct mbuf *m = m0, *n; + int totlen = 0; + + if (m0 == 0) + return; + while (off > (mlen = m->m_len)) { + off -= mlen; + totlen += mlen; + if (m->m_next == 0) { + n = m_getclr(M_DONTWAIT, m->m_type); + if (n == 0) + goto out; + n->m_len = min(MLEN, len + off); + m->m_next = n; + } + m = m->m_next; + } + while (len > 0) { + mlen = min (m->m_len - off, len); + bcopy(cp, off + mtod(m, caddr_t), (unsigned)mlen); + cp += mlen; + len -= mlen; + mlen += off; + off = 0; + totlen += mlen; + if (len == 0) + break; + if (m->m_next == 0) { + n = m_get(M_DONTWAIT, m->m_type); + if (n == 0) + break; + n->m_len = min(MLEN, len); + m->m_next = n; + } + m = m->m_next; + } +out: +#if 0 + if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen)) + m->m_pkthdr.len = totlen; +#endif + return; +} +#endif /* (_KERNEL) && ( ((BSD < 199103) && !MENTAT) || __sgi) */ + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_findgroup */ +/* Returns: frgroup_t * - NULL = group not found, else pointer to group */ +/* Parameters: group(I) - group name to search for */ +/* unit(I) - device to which this group belongs */ +/* set(I) - which set of rules (inactive/inactive) this is */ +/* fgpp(O) - pointer to place to store pointer to the pointer */ +/* to where to add the next (last) group or where */ +/* to delete group from. */ +/* */ +/* Search amongst the defined groups for a particular group number. */ +/* ------------------------------------------------------------------------ */ +frgroup_t *fr_findgroup(group, unit, set, fgpp) +char *group; +minor_t unit; +int set; +frgroup_t ***fgpp; +{ + frgroup_t *fg, **fgp; + + /* + * Which list of groups to search in is dependant on which list of + * rules are being operated on. + */ + fgp = &ipfgroups[unit][set]; + + while ((fg = *fgp) != NULL) { + if (strncmp(group, fg->fg_name, FR_GROUPLEN) == 0) + break; + else + fgp = &fg->fg_next; + } + if (fgpp != NULL) + *fgpp = fgp; + return fg; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_addgroup */ +/* Returns: frgroup_t * - NULL == did not create group, */ +/* != NULL == pointer to the group */ +/* Parameters: num(I) - group number to add */ +/* head(I) - rule pointer that is using this as the head */ +/* flags(I) - rule flags which describe the type of rule it is */ +/* unit(I) - device to which this group will belong to */ +/* set(I) - which set of rules (inactive/inactive) this is */ +/* Write Locks: ipf_mutex */ +/* */ +/* Add a new group head, or if it already exists, increase the reference */ +/* count to it. */ +/* ------------------------------------------------------------------------ */ +frgroup_t *fr_addgroup(group, head, flags, unit, set) +char *group; +void *head; +u_32_t flags; +minor_t unit; +int set; +{ + frgroup_t *fg, **fgp; + u_32_t gflags; + + if (group == NULL) + return NULL; + + if (unit == IPL_LOGIPF && *group == '\0') + return NULL; + + fgp = NULL; + gflags = flags & FR_INOUT; + + fg = fr_findgroup(group, unit, set, &fgp); + if (fg != NULL) { + if (fg->fg_flags == 0) + fg->fg_flags = gflags; + else if (gflags != fg->fg_flags) + return NULL; + fg->fg_ref++; + return fg; + } + KMALLOC(fg, frgroup_t *); + if (fg != NULL) { + fg->fg_head = head; + fg->fg_start = NULL; + fg->fg_next = *fgp; + bcopy(group, fg->fg_name, FR_GROUPLEN); + fg->fg_flags = gflags; + fg->fg_ref = 1; + *fgp = fg; + } + return fg; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_delgroup */ +/* Returns: Nil */ +/* Parameters: group(I) - group name to delete */ +/* unit(I) - device to which this group belongs */ +/* set(I) - which set of rules (inactive/inactive) this is */ +/* Write Locks: ipf_mutex */ +/* */ +/* Attempt to delete a group head. */ +/* Only do this when its reference count reaches 0. */ +/* ------------------------------------------------------------------------ */ +void fr_delgroup(group, unit, set) +char *group; +minor_t unit; +int set; +{ + frgroup_t *fg, **fgp; + + fg = fr_findgroup(group, unit, set, &fgp); + if (fg == NULL) + return; + + fg->fg_ref--; + if (fg->fg_ref == 0) { + *fgp = fg->fg_next; + KFREE(fg); + } +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_getrulen */ +/* Returns: frentry_t * - NULL == not found, else pointer to rule n */ +/* Parameters: unit(I) - device for which to count the rule's number */ +/* flags(I) - which set of rules to find the rule in */ +/* group(I) - group name */ +/* n(I) - rule number to find */ +/* */ +/* Find rule # n in group # g and return a pointer to it. Return NULl if */ +/* group # g doesn't exist or there are less than n rules in the group. */ +/* ------------------------------------------------------------------------ */ +frentry_t *fr_getrulen(unit, group, n) +int unit; +char *group; +u_32_t n; +{ + frentry_t *fr; + frgroup_t *fg; + + fg = fr_findgroup(group, unit, fr_active, NULL); + if (fg == NULL) + return NULL; + for (fr = fg->fg_head; fr && n; fr = fr->fr_next, n--) + ; + if (n != 0) + return NULL; + return fr; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_rulen */ +/* Returns: int - >= 0 - rule number, -1 == search failed */ +/* Parameters: unit(I) - device for which to count the rule's number */ +/* fr(I) - pointer to rule to match */ +/* */ +/* Return the number for a rule on a specific filtering device. */ +/* ------------------------------------------------------------------------ */ +int fr_rulen(unit, fr) +int unit; +frentry_t *fr; +{ + frentry_t *fh; + frgroup_t *fg; + u_32_t n = 0; + + if (fr == NULL) + return -1; + fg = fr_findgroup(fr->fr_group, unit, fr_active, NULL); + if (fg == NULL) + return -1; + for (fh = fg->fg_head; fh; n++, fh = fh->fr_next) + if (fh == fr) + break; + if (fh == NULL) + return -1; + return n; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: frflushlist */ +/* Returns: int - >= 0 - number of flushed rules */ +/* Parameters: set(I) - which set of rules (inactive/inactive) this is */ +/* unit(I) - device for which to flush rules */ +/* flags(I) - which set of rules to flush */ +/* nfreedp(O) - pointer to int where flush count is stored */ +/* listp(I) - pointer to list to flush pointer */ +/* Write Locks: ipf_mutex */ +/* */ +/* Recursively flush rules from the list, descending groups as they are */ +/* encountered. if a rule is the head of a group and it has lost all its */ +/* group members, then also delete the group reference. nfreedp is needed */ +/* to store the accumulating count of rules removed, whereas the returned */ +/* value is just the number removed from the current list. The latter is */ +/* needed to correctly adjust reference counts on rules that define groups. */ +/* */ +/* NOTE: Rules not loaded from user space cannot be flushed. */ +/* ------------------------------------------------------------------------ */ +static int frflushlist(set, unit, nfreedp, listp) +int set; +minor_t unit; +int *nfreedp; +frentry_t **listp; +{ + int freed = 0, i; + frentry_t *fp; + + while ((fp = *listp) != NULL) { + if ((fp->fr_type & FR_T_BUILTIN) || + !(fp->fr_flags & FR_COPIED)) { + listp = &fp->fr_next; + continue; + } + *listp = fp->fr_next; + if (fp->fr_grp != NULL) { + i = frflushlist(set, unit, nfreedp, fp->fr_grp); + fp->fr_ref -= i; + } + + if (fp->fr_grhead != NULL) { + fr_delgroup(fp->fr_grhead, unit, set); + *fp->fr_grhead = '\0'; + } + + ASSERT(fp->fr_ref > 0); + fp->fr_next = NULL; + if (fr_derefrule(&fp) == 0) + freed++; + } + *nfreedp += freed; + return freed; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: frflush */ +/* Returns: int - >= 0 - number of flushed rules */ +/* Parameters: unit(I) - device for which to flush rules */ +/* flags(I) - which set of rules to flush */ +/* */ +/* Calls flushlist() for all filter rules (accounting, firewall - both IPv4 */ +/* and IPv6) as defined by the value of flags. */ +/* ------------------------------------------------------------------------ */ +int frflush(unit, proto, flags) +minor_t unit; +int proto, flags; +{ + int flushed = 0, set; + + WRITE_ENTER(&ipf_mutex); + bzero((char *)frcache, sizeof(frcache)); + + set = fr_active; + if ((flags & FR_INACTIVE) == FR_INACTIVE) + set = 1 - set; + + if (flags & FR_OUTQUE) { + if (proto == 0 || proto == 6) { + (void) frflushlist(set, unit, + &flushed, &ipfilter6[1][set]); + (void) frflushlist(set, unit, + &flushed, &ipacct6[1][set]); + } + if (proto == 0 || proto == 4) { + (void) frflushlist(set, unit, + &flushed, &ipfilter[1][set]); + (void) frflushlist(set, unit, + &flushed, &ipacct[1][set]); + } + } + if (flags & FR_INQUE) { + if (proto == 0 || proto == 6) { + (void) frflushlist(set, unit, + &flushed, &ipfilter6[0][set]); + (void) frflushlist(set, unit, + &flushed, &ipacct6[0][set]); + } + if (proto == 0 || proto == 4) { + (void) frflushlist(set, unit, + &flushed, &ipfilter[0][set]); + (void) frflushlist(set, unit, + &flushed, &ipacct[0][set]); + } + } + RWLOCK_EXIT(&ipf_mutex); + + if (unit == IPL_LOGIPF) { + int tmp; + + tmp = frflush(IPL_LOGCOUNT, proto, flags); + if (tmp >= 0) + flushed += tmp; + } + return flushed; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: memstr */ +/* Returns: char * - NULL if failed, != NULL pointer to matching bytes */ +/* Parameters: src(I) - pointer to byte sequence to match */ +/* dst(I) - pointer to byte sequence to search */ +/* slen(I) - match length */ +/* dlen(I) - length available to search in */ +/* */ +/* Search dst for a sequence of bytes matching those at src and extend for */ +/* slen bytes. */ +/* ------------------------------------------------------------------------ */ +char *memstr(src, dst, slen, dlen) +char *src, *dst; +int slen, dlen; +{ + char *s = NULL; + + while (dlen >= slen) { + if (bcmp(src, dst, slen) == 0) { + s = dst; + break; + } + dst++; + dlen--; + } + return s; +} +/* ------------------------------------------------------------------------ */ +/* Function: fr_fixskip */ +/* Returns: Nil */ +/* Parameters: listp(IO) - pointer to start of list with skip rule */ +/* rp(I) - rule added/removed with skip in it. */ +/* addremove(I) - adjustment (-1/+1) to make to skip count, */ +/* depending on whether a rule was just added */ +/* or removed. */ +/* */ +/* Adjust all the rules in a list which would have skip'd past the position */ +/* where we are inserting to skip to the right place given the change. */ +/* ------------------------------------------------------------------------ */ +void fr_fixskip(listp, rp, addremove) +frentry_t **listp, *rp; +int addremove; +{ + int rules, rn; + frentry_t *fp; + + rules = 0; + for (fp = *listp; (fp != NULL) && (fp != rp); fp = fp->fr_next) + rules++; + + if (!fp) + return; + + for (rn = 0, fp = *listp; fp && (fp != rp); fp = fp->fr_next, rn++) + if (FR_ISSKIP(fp->fr_flags) && (rn + fp->fr_arg >= rules)) + fp->fr_arg += addremove; +} + + +#ifdef _KERNEL +/* ------------------------------------------------------------------------ */ +/* Function: count4bits */ +/* Returns: int - >= 0 - number of consecutive bits in input */ +/* Parameters: ip(I) - 32bit IP address */ +/* */ +/* IPv4 ONLY */ +/* count consecutive 1's in bit mask. If the mask generated by counting */ +/* consecutive 1's is different to that passed, return -1, else return # */ +/* of bits. */ +/* ------------------------------------------------------------------------ */ +int count4bits(ip) +u_32_t ip; +{ + u_32_t ipn; + int cnt = 0, i, j; + + ip = ipn = ntohl(ip); + for (i = 32; i; i--, ipn *= 2) + if (ipn & 0x80000000) + cnt++; + else + break; + ipn = 0; + for (i = 32, j = cnt; i; i--, j--) { + ipn *= 2; + if (j > 0) + ipn++; + } + if (ipn == ip) + return cnt; + return -1; +} + + +# if 0 +/* ------------------------------------------------------------------------ */ +/* Function: count6bits */ +/* Returns: int - >= 0 - number of consecutive bits in input */ +/* Parameters: msk(I) - pointer to start of IPv6 bitmask */ +/* */ +/* IPv6 ONLY */ +/* count consecutive 1's in bit mask. */ +/* ------------------------------------------------------------------------ */ +int count6bits(msk) +u_32_t *msk; +{ + int i = 0, k; + u_32_t j; + + for (k = 3; k >= 0; k--) + if (msk[k] == 0xffffffff) + i += 32; + else { + for (j = msk[k]; j; j <<= 1) + if (j & 0x80000000) + i++; + } + return i; +} +# endif +#endif /* _KERNEL */ + + +/* ------------------------------------------------------------------------ */ +/* Function: frsynclist */ +/* Returns: void */ +/* Parameters: fr(I) - start of filter list to sync interface names for */ +/* ifp(I) - interface pointer for limiting sync lookups */ +/* Write Locks: ipf_mutex */ +/* */ +/* Walk through a list of filter rules and resolve any interface names into */ +/* pointers. Where dynamic addresses are used, also update the IP address */ +/* used in the rule. The interface pointer is used to limit the lookups to */ +/* a specific set of matching names if it is non-NULL. */ +/* ------------------------------------------------------------------------ */ +static void frsynclist(fr, ifp) +frentry_t *fr; +void *ifp; +{ + frdest_t *fdp; + int v, i; + + for (; fr; fr = fr->fr_next) { + v = fr->fr_v; + /* - * Match the flags ? If not, abort this match. + * Lookup all the interface names that are part of the rule. */ - if (ft->ftu_tcpfm && - ft->ftu_tcpf != (fin->fin_tcpf & ft->ftu_tcpfm)) { - FR_DEBUG(("f. %#x & %#x != %#x\n", fin->fin_tcpf, - ft->ftu_tcpfm, ft->ftu_tcpf)); - err = 0; + for (i = 0; i < 4; i++) { + if ((ifp != NULL) && (fr->fr_ifas[i] != ifp)) + continue; + fr->fr_ifas[i] = fr_resolvenic(fr->fr_ifnames[i], v); + } + + if (fr->fr_type == FR_T_IPF) { + if (fr->fr_satype != FRI_NORMAL && + fr->fr_satype != FRI_LOOKUP) { + (void)fr_ifpaddr(v, fr->fr_satype, + fr->fr_ifas[fr->fr_sifpidx], + &fr->fr_src, &fr->fr_smsk); + } + if (fr->fr_datype != FRI_NORMAL && + fr->fr_datype != FRI_LOOKUP) { + (void)fr_ifpaddr(v, fr->fr_datype, + fr->fr_ifas[fr->fr_difpidx], + &fr->fr_dst, &fr->fr_dmsk); + } + } + + fdp = &fr->fr_tifs[0]; + if ((ifp == NULL) || (fdp->fd_ifp == ifp)) + fr_resolvedest(fdp, v); + + fdp = &fr->fr_tifs[1]; + if ((ifp == NULL) || (fdp->fd_ifp == ifp)) + fr_resolvedest(fdp, v); + + fdp = &fr->fr_dif; + if ((ifp == NULL) || (fdp->fd_ifp == ifp)) { + fr_resolvedest(fdp, v); + + fr->fr_flags &= ~FR_DUP; + if ((fdp->fd_ifp != (void *)-1) && + (fdp->fd_ifp != NULL)) + fr->fr_flags |= FR_DUP; + } + +#ifdef IPFILTER_LOOKUP + if (fr->fr_type == FR_T_IPF && fr->fr_satype == FRI_LOOKUP && + fr->fr_srcptr == NULL) { + fr->fr_srcptr = fr_resolvelookup(fr->fr_srctype, + fr->fr_srcnum, + &fr->fr_srcfunc); + } + if (fr->fr_type == FR_T_IPF && fr->fr_datype == FRI_LOOKUP && + fr->fr_dstptr == NULL) { + fr->fr_dstptr = fr_resolvelookup(fr->fr_dsttype, + fr->fr_dstnum, + &fr->fr_dstfunc); } +#endif + } +} + + +#ifdef _KERNEL +/* ------------------------------------------------------------------------ */ +/* Function: frsync */ +/* Returns: void */ +/* Parameters: Nil */ +/* */ +/* frsync() is called when we suspect that the interface list or */ +/* information about interfaces (like IP#) has changed. Go through all */ +/* filter rules, NAT entries and the state table and check if anything */ +/* needs to be changed/updated. */ +/* ------------------------------------------------------------------------ */ +void frsync(ifp) +void *ifp; +{ + int i; + +# if !SOLARIS + fr_natsync(ifp); + fr_statesync(ifp); +# endif + + WRITE_ENTER(&ipf_mutex); + frsynclist(ipacct[0][fr_active], ifp); + frsynclist(ipacct[1][fr_active], ifp); + frsynclist(ipfilter[0][fr_active], ifp); + frsynclist(ipfilter[1][fr_active], ifp); + frsynclist(ipacct6[0][fr_active], ifp); + frsynclist(ipacct6[1][fr_active], ifp); + frsynclist(ipfilter6[0][fr_active], ifp); + frsynclist(ipfilter6[1][fr_active], ifp); + + for (i = 0; i < IPL_LOGSIZE; i++) { + frgroup_t *g; + + for (g = ipfgroups[i][0]; g != NULL; g = g->fg_next) + frsynclist(g->fg_start, ifp); + for (g = ipfgroups[i][1]; g != NULL; g = g->fg_next) + frsynclist(g->fg_start, ifp); } - return err; + RWLOCK_EXIT(&ipf_mutex); } + /* - * Check the input/output list of rules for a match and result. - * Could be per interface, but this gets real nasty when you don't have - * kernel sauce. + * In the functions below, bcopy() is called because the pointer being + * copied _from_ in this instance is a pointer to a char buf (which could + * end up being unaligned) and on the kernel's local stack. */ -int fr_scanlist(passin, ip, fin, m) -u_32_t passin; -ip_t *ip; -register fr_info_t *fin; -void *m; +/* ------------------------------------------------------------------------ */ +/* Function: copyinptr */ +/* Returns: int - 0 = success, else failure */ +/* Parameters: src(I) - pointer to the source address */ +/* dst(I) - destination address */ +/* size(I) - number of bytes to copy */ +/* */ +/* Copy a block of data in from user space, given a pointer to the pointer */ +/* to start copying from (src) and a pointer to where to store it (dst). */ +/* NB: src - pointer to user space pointer, dst - kernel space pointer */ +/* ------------------------------------------------------------------------ */ +int copyinptr(src, dst, size) +void *src, *dst; +size_t size; { - register struct frentry *fr; - register fr_ip_t *fi = &fin->fin_fi; - int rulen, portcmp = 0, off, skip = 0, logged = 0; - u_32_t pass, passt, passl; - frentry_t *frl; + caddr_t ca; + int err; - frl = NULL; - pass = passin; - fr = fin->fin_fr; - fin->fin_fr = NULL; - off = fin->fin_off; +# if SOLARIS + err = COPYIN(src, (caddr_t)&ca, sizeof(ca)); + if (err != 0) + return err; +# else + bcopy(src, (caddr_t)&ca, sizeof(ca)); +# endif + err = COPYIN(ca, dst, size); + return err; +} - if ((fi->fi_fl & FI_TCPUDP) && (fin->fin_dlen > 3) && !off) - portcmp = 1; - for (rulen = 0; fr; fr = fr->fr_next, rulen++) { - if (skip) { - FR_VERBOSE(("%d (%#x)\n", skip, fr->fr_flags)); - skip--; - continue; - } - /* - * In all checks below, a null (zero) value in the - * filter struture is taken to mean a wildcard. - * - * check that we are working for the right interface - */ -#ifdef _KERNEL -# if (BSD >= 199306) - if (fin->fin_out != 0) { - if ((fr->fr_oifa && - (fr->fr_oifa != ((mb_t *)m)->m_pkthdr.rcvif))) - continue; - } +/* ------------------------------------------------------------------------ */ +/* Function: copyoutptr */ +/* Returns: int - 0 = success, else failure */ +/* Parameters: src(I) - pointer to the source address */ +/* dst(I) - destination address */ +/* size(I) - number of bytes to copy */ +/* */ +/* Copy a block of data out to user space, given a pointer to the pointer */ +/* to start copying from (src) and a pointer to where to store it (dst). */ +/* NB: src - kernel space pointer, dst - pointer to user space pointer. */ +/* ------------------------------------------------------------------------ */ +int copyoutptr(src, dst, size) +void *src, *dst; +size_t size; +{ + caddr_t ca; + int err; + +# if SOLARIS + err = COPYIN(dst, (caddr_t)&ca, sizeof(ca)); + if (err != 0) + return err; +# else + bcopy(dst, (caddr_t)&ca, sizeof(ca)); # endif -#else - if (opts & (OPT_VERBOSE|OPT_DEBUG)) - printf("\n"); + err = COPYOUT(src, ca, size); + return err; +} #endif - FR_VERBOSE(("%c", fr->fr_skip ? 's' : - (pass & FR_PASS) ? 'p' : - (pass & FR_AUTH) ? 'a' : - (pass & FR_ACCOUNT) ? 'A' : - (pass & FR_NOMATCH) ? 'n' : 'b')); - if (fr->fr_ifa && fr->fr_ifa != fin->fin_ifp) - continue; +/* ------------------------------------------------------------------------ */ +/* Function: fr_lock */ +/* Returns: (void) */ +/* Parameters: data(I) - pointer to lock value to set */ +/* lockp(O) - pointer to location to store old lock value */ +/* */ +/* Get the new value for the lock integer, set it and return the old value */ +/* in *lockp. */ +/* ------------------------------------------------------------------------ */ +void fr_lock(data, lockp) +caddr_t data; +int *lockp; +{ + int arg; - FR_VERBOSE((":i")); - { - register u_32_t *ld, *lm, *lip; - register int i; - - lip = (u_32_t *)fi; - lm = (u_32_t *)&fr->fr_mip; - ld = (u_32_t *)&fr->fr_ip; - i = ((*lip & *lm) != *ld); - FR_DEBUG(("0. %#08x & %#08x != %#08x\n", - *lip, *lm, *ld)); - if (i) - continue; - /* - * We now know whether the packet version and the - * rule version match, along with protocol, ttl and - * tos. - */ - lip++, lm++, ld++; - /* - * Unrolled loops (4 each, for 32 bits). - */ - FR_DEBUG(("1a. %#08x & %#08x != %#08x\n", - *lip, *lm, *ld)); - i |= ((*lip++ & *lm++) != *ld++) << 5; - if (fi->fi_v == 6) { - FR_DEBUG(("1b. %#08x & %#08x != %#08x\n", - *lip, *lm, *ld)); - i |= ((*lip++ & *lm++) != *ld++) << 5; - FR_DEBUG(("1c. %#08x & %#08x != %#08x\n", - *lip, *lm, *ld)); - i |= ((*lip++ & *lm++) != *ld++) << 5; - FR_DEBUG(("1d. %#08x & %#08x != %#08x\n", - *lip, *lm, *ld)); - i |= ((*lip++ & *lm++) != *ld++) << 5; - } else { - lip += 3; - lm += 3; - ld += 3; - } - i ^= (fr->fr_flags & FR_NOTSRCIP); - if (i) - continue; - FR_DEBUG(("2a. %#08x & %#08x != %#08x\n", - *lip, *lm, *ld)); - i |= ((*lip++ & *lm++) != *ld++) << 6; - if (fi->fi_v == 6) { - FR_DEBUG(("2b. %#08x & %#08x != %#08x\n", - *lip, *lm, *ld)); - i |= ((*lip++ & *lm++) != *ld++) << 6; - FR_DEBUG(("2c. %#08x & %#08x != %#08x\n", - *lip, *lm, *ld)); - i |= ((*lip++ & *lm++) != *ld++) << 6; - FR_DEBUG(("2d. %#08x & %#08x != %#08x\n", - *lip, *lm, *ld)); - i |= ((*lip++ & *lm++) != *ld++) << 6; - } else { - lip += 3; - lm += 3; - ld += 3; - } - i ^= (fr->fr_flags & FR_NOTDSTIP); - if (i) - continue; - FR_DEBUG(("3. %#08x & %#08x != %#08x\n", - *lip, *lm, *ld)); - i |= ((*lip++ & *lm++) != *ld++); - FR_DEBUG(("4. %#08x & %#08x != %#08x\n", - *lip, *lm, *ld)); - i |= ((*lip & *lm) != *ld); - if (i) - continue; - } + BCOPYIN(data, (caddr_t)&arg, sizeof(arg)); + BCOPYOUT((caddr_t)lockp, data, sizeof(*lockp)); + *lockp = arg; +} - /* - * If a fragment, then only the first has what we're looking - * for here... - */ - if (!portcmp && (fr->fr_dcmp || fr->fr_scmp || fr->fr_tcpf || - fr->fr_tcpfm)) - continue; - if (fi->fi_fl & FI_TCPUDP) { - if (!fr_tcpudpchk(&fr->fr_tuc, fin)) - continue; - } else if (fr->fr_icmpm || fr->fr_icmp) { - if (((fi->fi_p != IPPROTO_ICMP) && - (fi->fi_p != IPPROTO_ICMPV6)) || off || - (fin->fin_dlen < 2)) - continue; - if ((fin->fin_data[0] & fr->fr_icmpm) != fr->fr_icmp) { - FR_DEBUG(("i. %#x & %#x != %#x\n", - fin->fin_data[0], fr->fr_icmpm, - fr->fr_icmp)); - continue; - } - } - FR_VERBOSE(("*")); - if (fr->fr_flags & FR_NOMATCH) { - passt = passl; - passl = passin; - fin->fin_fr = frl; - frl = NULL; - if (fr->fr_flags & FR_QUICK) - break; - continue; +/* ------------------------------------------------------------------------ */ +/* Function: fr_getstat */ +/* Returns: Nil */ +/* Parameters: fiop(I) - pointer to ipfilter stats structure */ +/* */ +/* Stores a copy of current pointers, counters, etc, in the friostat */ +/* structure. */ +/* ------------------------------------------------------------------------ */ +void fr_getstat(fiop) +friostat_t *fiop; +{ + int i, j; + + bcopy((char *)frstats, (char *)fiop->f_st, sizeof(filterstats_t) * 2); + fiop->f_locks[IPL_LOGSTATE] = fr_state_lock; + fiop->f_locks[IPL_LOGNAT] = fr_nat_lock; + fiop->f_locks[IPL_LOGIPF] = fr_frag_lock; + fiop->f_locks[IPL_LOGAUTH] = fr_auth_lock; + + for (i = 0; i < 2; i++) + for (j = 0; j < 2; j++) { + fiop->f_ipf[i][j] = ipfilter[i][j]; + fiop->f_acct[i][j] = ipacct[i][j]; + fiop->f_ipf6[i][j] = ipfilter6[i][j]; + fiop->f_acct6[i][j] = ipacct6[i][j]; } - passl = passt; - passt = fr->fr_flags; - frl = fin->fin_fr; - fin->fin_fr = fr; -#if (BSD >= 199306) && (defined(_KERNEL) || defined(KERNEL)) - if (securelevel <= 0) -#endif - if ((passt & FR_CALLNOW) && fr->fr_func) - passt = (*fr->fr_func)(passt, ip, fin); + fiop->f_ticks = fr_ticks; + fiop->f_active = fr_active; + fiop->f_froute[0] = fr_frouteok[0]; + fiop->f_froute[1] = fr_frouteok[1]; + + fiop->f_running = fr_running; + for (i = 0; i < IPL_LOGSIZE; i++) { + fiop->f_groups[i][0] = ipfgroups[i][0]; + fiop->f_groups[i][1] = ipfgroups[i][1]; + } #ifdef IPFILTER_LOG + fiop->f_logging = 1; +#else + fiop->f_logging = 0; +#endif + fiop->f_defpass = fr_pass; + fiop->f_features = fr_features; + (void) strncpy(fiop->f_version, ipfilter_version, + sizeof(fiop->f_version)); +} + + +#ifdef USE_INET6 +int icmptoicmp6types[ICMP_MAXTYPE+1] = { + ICMP6_ECHO_REPLY, /* 0: ICMP_ECHOREPLY */ + -1, /* 1: UNUSED */ + -1, /* 2: UNUSED */ + ICMP6_DST_UNREACH, /* 3: ICMP_UNREACH */ + -1, /* 4: ICMP_SOURCEQUENCH */ + ND_REDIRECT, /* 5: ICMP_REDIRECT */ + -1, /* 6: UNUSED */ + -1, /* 7: UNUSED */ + ICMP6_ECHO_REQUEST, /* 8: ICMP_ECHO */ + -1, /* 9: UNUSED */ + -1, /* 10: UNUSED */ + ICMP6_TIME_EXCEEDED, /* 11: ICMP_TIMXCEED */ + ICMP6_PARAM_PROB, /* 12: ICMP_PARAMPROB */ + -1, /* 13: ICMP_TSTAMP */ + -1, /* 14: ICMP_TSTAMPREPLY */ + -1, /* 15: ICMP_IREQ */ + -1, /* 16: ICMP_IREQREPLY */ + -1, /* 17: ICMP_MASKREQ */ + -1, /* 18: ICMP_MASKREPLY */ +}; + + +int icmptoicmp6unreach[ICMP_MAX_UNREACH] = { + ICMP6_DST_UNREACH_ADDR, /* 0: ICMP_UNREACH_NET */ + ICMP6_DST_UNREACH_ADDR, /* 1: ICMP_UNREACH_HOST */ + -1, /* 2: ICMP_UNREACH_PROTOCOL */ + ICMP6_DST_UNREACH_NOPORT, /* 3: ICMP_UNREACH_PORT */ + -1, /* 4: ICMP_UNREACH_NEEDFRAG */ + ICMP6_DST_UNREACH_NOTNEIGHBOR, /* 5: ICMP_UNREACH_SRCFAIL */ + ICMP6_DST_UNREACH_ADDR, /* 6: ICMP_UNREACH_NET_UNKNOWN */ + ICMP6_DST_UNREACH_ADDR, /* 7: ICMP_UNREACH_HOST_UNKNOWN */ + -1, /* 8: ICMP_UNREACH_ISOLATED */ + ICMP6_DST_UNREACH_ADMIN, /* 9: ICMP_UNREACH_NET_PROHIB */ + ICMP6_DST_UNREACH_ADMIN, /* 10: ICMP_UNREACH_HOST_PROHIB */ + -1, /* 11: ICMP_UNREACH_TOSNET */ + -1, /* 12: ICMP_UNREACH_TOSHOST */ + ICMP6_DST_UNREACH_ADMIN, /* 13: ICMP_UNREACH_ADMIN_PROHIBIT */ +}; +int icmpreplytype6[ICMP6_MAXTYPE + 1]; +#endif + +int icmpreplytype4[ICMP_MAXTYPE + 1]; + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_matchicmpqueryreply */ +/* Returns: int - 1 if "icmp" is a valid reply to "ic" else 0. */ +/* Parameters: v(I) - IP protocol version (4 or 6) */ +/* ic(I) - ICMP information */ +/* icmp(I) - ICMP packet header */ +/* rev(I) - direction (0 = forward/1 = reverse) of packet */ +/* */ +/* Check if the ICMP packet defined by the header pointed to by icmp is a */ +/* reply to one as described by what's in ic. If it is a match, return 1, */ +/* else return 0 for no match. */ +/* ------------------------------------------------------------------------ */ +int fr_matchicmpqueryreply(v, ic, icmp, rev) +int v; +icmpinfo_t *ic; +icmphdr_t *icmp; +int rev; +{ + int ictype; + + ictype = ic->ici_type; + + if (v == 4) { /* - * Just log this packet... + * If we matched its type on the way in, then when going out + * it will still be the same type. */ - if ((passt & FR_LOGMASK) == FR_LOG) { - if (!IPLLOG(passt, ip, fin, m)) { - if (passt & FR_LOGORBLOCK) - passt |= FR_BLOCK|FR_QUICK; - ATOMIC_INCL(frstats[fin->fin_out].fr_skip); - } - ATOMIC_INCL(frstats[fin->fin_out].fr_pkl); - logged = 1; + if ((!rev && (icmp->icmp_type == ictype)) || + (rev && (icmpreplytype4[ictype] == icmp->icmp_type))) { + if (icmp->icmp_type != ICMP_ECHOREPLY) + return 1; + if (icmp->icmp_id == ic->ici_id) + return 1; } -#endif /* IPFILTER_LOG */ - ATOMIC_INCL(fr->fr_hits); - if (passt & FR_ACCOUNT) - fr->fr_bytes += (U_QUAD_T)fin->fin_plen; - else - fin->fin_icode = fr->fr_icode; - fin->fin_rule = rulen; - fin->fin_group = fr->fr_group; - if (fr->fr_grp != NULL) { - fin->fin_fr = fr->fr_grp; - passt = fr_scanlist(passt, ip, fin, m); - if (fin->fin_fr == NULL) { - fin->fin_rule = rulen; - fin->fin_group = fr->fr_group; - fin->fin_fr = fr; - } - if (passt & FR_DONTCACHE) - logged = 1; + } +#ifdef USE_INET6 + else if (v == 6) { + if ((!rev && (icmp->icmp_type == ictype)) || + (rev && (icmpreplytype6[ictype] == icmp->icmp_type))) { + if (icmp->icmp_type != ICMP6_ECHO_REPLY) + return 1; + if (icmp->icmp_id == ic->ici_id) + return 1; } - if (!(skip = fr->fr_skip) && (passt & FR_LOGMASK) != FR_LOG) - pass = passt; - FR_DEBUG(("pass %#x\n", pass)); - if (passt & FR_QUICK) - break; } - if (logged) - pass |= FR_DONTCACHE; - pass |= (fi->fi_fl << 24); - return pass; +#endif + return 0; } -/* - * frcheck - filter check - * check using source and destination addresses/ports in a packet whether - * or not to pass it on or not. - */ -int fr_check(ip, hlen, ifp, out -#if defined(_KERNEL) && SOLARIS -, qif, mp) -qif_t *qif; +#ifdef IPFILTER_LOOKUP +/* ------------------------------------------------------------------------ */ +/* Function: fr_resolvelookup */ +/* Returns: void * - NULL = failure, else success. */ +/* Parameters: type(I) - type of lookup these parameters are for. */ +/* number(I) - table number to use when searching */ +/* funcptr(IO) - pointer to pointer for storing IP address */ +/* searching function. */ +/* */ +/* Search for the "table" number passed in amongst those configured for */ +/* that particular type. If the type is recognised then the function to */ +/* call to do the IP address search will be change, regardless of whether */ +/* or not the "table" number exists. */ +/* ------------------------------------------------------------------------ */ +static void *fr_resolvelookup(type, number, funcptr) +u_int type, number; +lookupfunc_t *funcptr; +{ + char name[FR_GROUPLEN]; + iphtable_t *iph; + ip_pool_t *ipo; + void *ptr; + +#if defined(SNPRINTF) && defined(_KERNEL) + SNPRINTF(name, sizeof(name), "%u", number); #else -, mp) + (void) sprintf(name, "%u", number); #endif -mb_t **mp; -ip_t *ip; -int hlen; -void *ifp; -int out; + + READ_ENTER(&ip_poolrw); + + switch (type) + { + case IPLT_POOL : +# if (defined(__osf__) && defined(_KERNEL)) + ptr = NULL; + *funcptr = NULL; +# else + ipo = ip_pool_find(IPL_LOGIPF, name); + ptr = ipo; + if (ipo != NULL) { + ATOMIC_INC32(ipo->ipo_ref); + } + *funcptr = ip_pool_search; +# endif + break; + case IPLT_HASH : + iph = fr_findhtable(IPL_LOGIPF, name); + ptr = iph; + if (iph != NULL) { + ATOMIC_INC32(iph->iph_ref); + } + *funcptr = fr_iphmfindip; + break; + default: + ptr = NULL; + *funcptr = NULL; + break; + } + RWLOCK_EXIT(&ip_poolrw); + + return ptr; +} +#endif + + +/* ------------------------------------------------------------------------ */ +/* Function: frrequest */ +/* Returns: int - 0 == success, > 0 == errno value */ +/* Parameters: unit(I) - device for which this is for */ +/* req(I) - ioctl command (SIOC*) */ +/* data(I) - pointr to ioctl data */ +/* set(I) - 1 or 0 (filter set) */ +/* makecopy(I) - flag indicating whether data points to a rule */ +/* in kernel space & hence doesn't need copying. */ +/* */ +/* This function handles all the requests which operate on the list of */ +/* filter rules. This includes adding, deleting, insertion. It is also */ +/* responsible for creating groups when a "head" rule is loaded. Interface */ +/* names are resolved here and other sanity checks are made on the content */ +/* of the rule structure being loaded. If a rule has user defined timeouts */ +/* then make sure they are created and initialised before exiting. */ +/* ------------------------------------------------------------------------ */ +int frrequest(unit, req, data, set, makecopy) +int unit; +ioctlcmd_t req; +int set, makecopy; +caddr_t data; { + frentry_t frd, *fp, *f, **fprev, **ftail; + int error = 0, in, v; + void *ptr, *uptr; + u_int *p, *pp; + frgroup_t *fg; + char *group; + + fg = NULL; + fp = &frd; + if (makecopy != 0) { + error = fr_inobj(data, fp, IPFOBJ_FRENTRY); + if (error) + return EFAULT; + if ((fp->fr_flags & FR_T_BUILTIN) != 0) + return EINVAL; + fp->fr_ref = 0; + fp->fr_flags |= FR_COPIED; + } else { + fp = (frentry_t *)data; + if ((fp->fr_type & FR_T_BUILTIN) == 0) + return EINVAL; + fp->fr_flags &= ~FR_COPIED; + } + + if (((fp->fr_dsize == 0) && (fp->fr_data != NULL)) || + ((fp->fr_dsize != 0) && (fp->fr_data == NULL))) + return EINVAL; + + v = fp->fr_v; + uptr = fp->fr_data; + /* - * The above really sucks, but short of writing a diff + * Only filter rules for IPv4 or IPv6 are accepted. + */ + if (v == 4) + /*EMPTY*/; +#ifdef USE_INET6 + else if (v == 6) + /*EMPTY*/; +#endif + else { + return EINVAL; + } + + /* + * If the rule is being loaded from user space, i.e. we had to copy it + * into kernel space, then do not trust the function pointer in the + * rule. + */ + if ((makecopy == 1) && (fp->fr_func != NULL)) { + if (fr_findfunc(fp->fr_func) == NULL) + return ESRCH; + error = fr_funcinit(fp); + if (error != 0) + return error; + } + + ptr = NULL; + /* + * Check that the group number does exist and that its use (in/out) + * matches what the rule is. + */ + if (!strncmp(fp->fr_grhead, "0", FR_GROUPLEN)) + *fp->fr_grhead = '\0'; + group = fp->fr_group; + if (!strncmp(group, "0", FR_GROUPLEN)) + *group = '\0'; + + if (FR_ISACCOUNT(fp->fr_flags)) + unit = IPL_LOGCOUNT; + + if ((req != (int)SIOCZRLST) && (*group != '\0')) { + fg = fr_findgroup(group, unit, set, NULL); + if (fg == NULL) + return ESRCH; + if (fg->fg_flags == 0) + fg->fg_flags = fp->fr_flags & FR_INOUT; + else if (fg->fg_flags != (fp->fr_flags & FR_INOUT)) + return ESRCH; + } + + in = (fp->fr_flags & FR_INQUE) ? 0 : 1; + + /* + * Work out which rule list this change is being applied to. + */ + ftail = NULL; + fprev = NULL; + if (unit == IPL_LOGAUTH) + fprev = &ipauth; + else if (v == 4) { + if (FR_ISACCOUNT(fp->fr_flags)) + fprev = &ipacct[in][set]; + else if ((fp->fr_flags & (FR_OUTQUE|FR_INQUE)) != 0) + fprev = &ipfilter[in][set]; + } else if (v == 6) { + if (FR_ISACCOUNT(fp->fr_flags)) + fprev = &ipacct6[in][set]; + else if ((fp->fr_flags & (FR_OUTQUE|FR_INQUE)) != 0) + fprev = &ipfilter6[in][set]; + } + if (fprev == NULL) + return ESRCH; + + if (*group != '\0') { + if (!fg && !(fg = fr_findgroup(group, unit, set, NULL))) + return ESRCH; + fprev = &fg->fg_start; + } + + for (f = *fprev; (f = *fprev) != NULL; fprev = &f->fr_next) + if (fp->fr_collect <= f->fr_collect) + break; + ftail = fprev; + + /* + * Copy in extra data for the rule. + */ + if (fp->fr_dsize != 0) { + if (makecopy != 0) { + KMALLOCS(ptr, void *, fp->fr_dsize); + if (!ptr) + return ENOMEM; + error = COPYIN(uptr, ptr, fp->fr_dsize); + } else { + ptr = uptr; + error = 0; + } + if (error != 0) { + KFREES(ptr, fp->fr_dsize); + return ENOMEM; + } + fp->fr_data = ptr; + } else + fp->fr_data = NULL; + + /* + * Perform per-rule type sanity checks of their members. */ - fr_info_t frinfo, *fc; - register fr_info_t *fin = &frinfo; - int changed, error = EHOSTUNREACH, v = ip->ip_v; - frentry_t *fr = NULL, *list; - u_32_t pass, apass; -#if !SOLARIS || !defined(_KERNEL) - register mb_t *m = *mp; + switch (fp->fr_type & ~FR_T_BUILTIN) + { +#if defined(IPFILTER_BPF) + case FR_T_BPFOPC : + if (fp->fr_dsize == 0) + return EINVAL; + if (!bpf_validate(ptr, fp->fr_dsize/sizeof(struct bpf_insn))) { + if (makecopy && fp->fr_data != NULL) { + KFREES(fp->fr_data, fp->fr_dsize); + } + return EINVAL; + } + break; +#endif + case FR_T_IPF : + if (fp->fr_dsize != sizeof(fripf_t)) + return EINVAL; + + /* + * Allowing a rule with both "keep state" and "with oow" is + * pointless because adding a state entry to the table will + * fail with the out of window (oow) flag set. + */ + if ((fp->fr_flags & FR_KEEPSTATE) && (fp->fr_flx & FI_OOW)) + return EINVAL; + + switch (fp->fr_satype) + { + case FRI_BROADCAST : + case FRI_DYNAMIC : + case FRI_NETWORK : + case FRI_NETMASKED : + case FRI_PEERADDR : + if (fp->fr_sifpidx < 0 || fp->fr_sifpidx > 3) { + if (makecopy && fp->fr_data != NULL) { + KFREES(fp->fr_data, fp->fr_dsize); + } + return EINVAL; + } + break; +#ifdef IPFILTER_LOOKUP + case FRI_LOOKUP : + fp->fr_srcptr = fr_resolvelookup(fp->fr_srctype, + fp->fr_srcnum, + &fp->fr_srcfunc); + break; #endif + default : + break; + } -#ifdef _KERNEL - int p, len, drop = 0, logit = 0; - mb_t *mc = NULL; -# if !defined(__SVR4) && !defined(__svr4__) - /* - * We don't do this section for Solaris because fr_precheck() does a - * pullupmsg() instead, effectively achieving the same result as here - * so no need to duplicate it. - */ -# ifdef __sgi - char hbuf[128]; -# endif - int up; - -# if !defined(NETBSD_PF) && \ - ((defined(__FreeBSD__) && (__FreeBSD_version < 500011)) || \ - defined(__OpenBSD__) || defined(_BSDI_VERSION)) - if (fr_checkp != fr_check && fr_running > 0) { - static int counter = 0; - - if (counter == 0) { - printf("WARNING: fr_checkp corrupt: value %lx\n", - (u_long)fr_checkp); - printf("WARNING: fr_checkp should be %lx\n", - (u_long)fr_check); - printf("WARNING: fixing fr_checkp\n"); - } - fr_checkp = fr_check; - counter++; - if (counter == 10000) - counter = 0; + switch (fp->fr_datype) + { + case FRI_BROADCAST : + case FRI_DYNAMIC : + case FRI_NETWORK : + case FRI_NETMASKED : + case FRI_PEERADDR : + if (fp->fr_difpidx < 0 || fp->fr_difpidx > 3) { + if (makecopy && fp->fr_data != NULL) { + KFREES(fp->fr_data, fp->fr_dsize); + } + return EINVAL; + } + break; +#ifdef IPFILTER_LOOKUP + case FRI_LOOKUP : + fp->fr_dstptr = fr_resolvelookup(fp->fr_dsttype, + fp->fr_dstnum, + &fp->fr_dstfunc); + break; +#endif + default : + + break; + } + break; + case FR_T_NONE : + break; + case FR_T_CALLFUNC : + break; + case FR_T_COMPIPF : + break; + default : + if (makecopy && fp->fr_data != NULL) { + KFREES(fp->fr_data, fp->fr_dsize); + } + return EINVAL; } -# endif -# ifdef M_CANFASTFWD /* - * XXX For now, IP Filter and fast-forwarding of cached flows - * XXX are mutually exclusive. Eventually, IP Filter should - * XXX get a "can-fast-forward" filter rule. + * Lookup all the interface names that are part of the rule. */ - m->m_flags &= ~M_CANFASTFWD; -# endif /* M_CANFASTFWD */ -# ifdef CSUM_DELAY_DATA + frsynclist(fp, NULL); + fp->fr_statecnt = 0; + /* - * disable delayed checksums. + * Look for an existing matching filter rule, but don't include the + * next or interface pointer in the comparison (fr_next, fr_ifa). + * This elminates rules which are indentical being loaded. Checksum + * the constant part of the filter rule to make comparisons quicker + * (this meaning no pointers are included). */ - if ((out != 0) && (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA)) { - in_delayed_cksum(m); - m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; - } -# endif /* CSUM_DELAY_DATA */ - -# ifdef USE_INET6 - if (v == 6) { - len = ntohs(((ip6_t*)ip)->ip6_plen); - if (!len) - return -1; /* potential jumbo gram */ - len += sizeof(ip6_t); - p = ((ip6_t *)ip)->ip6_nxt; - } else -# endif - { - p = ip->ip_p; - len = ip->ip_len; - } + for (fp->fr_cksum = 0, p = (u_int *)&fp->fr_func, pp = &fp->fr_cksum; + p < pp; p++) + fp->fr_cksum += *p; + pp = (u_int *)(fp->fr_caddr + fp->fr_dsize); + for (p = (u_int *)fp->fr_data; p < pp; p++) + fp->fr_cksum += *p; - fin->fin_mp = mp; - fin->fin_out = out; + WRITE_ENTER(&ipf_mutex); + bzero((char *)frcache, sizeof(frcache)); + + for (; (f = *ftail) != NULL; ftail = &f->fr_next) + if ((fp->fr_cksum == f->fr_cksum) && + (f->fr_dsize == fp->fr_dsize) && + !bcmp((char *)&f->fr_func, + (char *)&fp->fr_func, FR_CMPSIZ) && + (!ptr || !f->fr_data || + !bcmp((char *)ptr, (char *)f->fr_data, f->fr_dsize))) + break; - if ((p == IPPROTO_TCP || p == IPPROTO_UDP || - (v == 4 && p == IPPROTO_ICMP) -# ifdef USE_INET6 - || (v == 6 && p == IPPROTO_ICMPV6) -# endif - )) { - int plen = 0; - - if ((v == 6) || (ip->ip_off & IP_OFFMASK) == 0) - switch(p) - { - case IPPROTO_TCP: - plen = sizeof(tcphdr_t); - break; - case IPPROTO_UDP: - plen = sizeof(udphdr_t); - break; - /* 96 - enough for complete ICMP error IP header */ - case IPPROTO_ICMP: - plen = ICMPERR_MAXPKTLEN - sizeof(ip_t); - break; - case IPPROTO_ESP: - plen = 8; - break; -# ifdef USE_INET6 - case IPPROTO_ICMPV6 : - /* - * XXX does not take intermediate header - * into account - */ - plen = ICMP6ERR_MINPKTLEN + 8 - sizeof(ip6_t); - break; -# endif - } - if ((plen > 0) && (len < hlen + plen)) - fin->fin_fl |= FI_SHORT; - up = MIN(hlen + plen, len); - - if (up > m->m_len) { -# ifdef __sgi - /* Under IRIX, avoid m_pullup as it makes ping panic */ - if ((up > sizeof(hbuf)) || (m_length(m) < up)) { - ATOMIC_INCL(frstats[out].fr_pull[1]); - return -1; - } - m_copydata(m, 0, up, hbuf); - ATOMIC_INCL(frstats[out].fr_pull[0]); - ip = (ip_t *)hbuf; -# else /* __ sgi */ -# ifndef linux + /* + * If zero'ing statistics, copy current to caller and zero. + */ + if (req == (ioctlcmd_t)SIOCZRLST) { + if (f == NULL) + error = ESRCH; + else { /* - * Having determined that we need to pullup some data, - * try to bring as much of the packet up into a single - * buffer with the first pullup. This hopefully means - * less need for doing futher pullups. Not needed for - * Solaris because fr_precheck() does it anyway. - * - * The main potential for trouble here is if MLEN/MHLEN - * become quite small, lets say < 64 bytes...but if - * that did happen, BSD networking as a whole would be - * slow/inefficient. + * Copy and reduce lock because of impending copyout. + * Well we should, but if we do then the atomicity of + * this call and the correctness of fr_hits and + * fr_bytes cannot be guaranteed. As it is, this code + * only resets them to 0 if they are successfully + * copied out into user space. */ -# ifdef MHLEN + bcopy((char *)f, (char *)fp, sizeof(*f)); + /* MUTEX_DOWNGRADE(&ipf_mutex); */ + /* - * Assume that M_PKTHDR is set and just work with what - * is left rather than check.. Should not make any - * real difference, anyway. + * When we copy this rule back out, set the data + * pointer to be what it was in user space. */ - if ((MHLEN > up) && (len > up)) - up = MIN(len, MHLEN); -# else - if ((MLEN > up) && (len > up)) - up = MIN(len, MLEN); -# endif - ip = ipf_pullup(m, fin, up, ip); - if (ip == NULL) - return -1; - m = *mp; -# endif /* !linux */ -# endif /* __sgi */ - } else - up = 0; - } else - up = 0; -# endif /* !defined(__SVR4) && !defined(__svr4__) */ -# if SOLARIS - mb_t *m = qif->qf_m; - - if ((u_int)ip & 0x3) - return 2; - fin->fin_mp = mp; - fin->fin_out = out; - fin->fin_qfm = m; - fin->fin_qif = qif; -# endif -#else - fin->fin_mp = mp; - fin->fin_out = out; -#endif /* _KERNEL */ - - changed = 0; - fin->fin_v = v; - fin->fin_ifp = ifp; - if (fr_makefrip(hlen, ip, fin) == -1) - return -1; - -#ifdef _KERNEL -# ifdef USE_INET6 - if (v == 6) { - ATOMIC_INCL(frstats[0].fr_ipv6[out]); - if (((ip6_t *)ip)->ip6_hlim < fr_minttl) { - ATOMIC_INCL(frstats[0].fr_badttl); - if (fr_minttllog & 1) - logit = -3; - if (fr_minttllog & 2) - drop = 1; + fp->fr_data = uptr; + error = fr_outobj(data, fp, IPFOBJ_FRENTRY); + + if (error == 0) { + if ((f->fr_dsize != 0) && (uptr != NULL)) + error = COPYOUT(f->fr_data, uptr, + f->fr_dsize); + if (error == 0) { + f->fr_hits = 0; + f->fr_bytes = 0; + } + } } - } else -# endif - if (!out) { - if (fr_chksrc && !fr_verifysrc(ip->ip_src, ifp)) { - ATOMIC_INCL(frstats[0].fr_badsrc); - if (fr_chksrc & 1) - drop = 1; - if (fr_chksrc & 2) - logit = -2; - } else if (ip->ip_ttl < fr_minttl) { - ATOMIC_INCL(frstats[0].fr_badttl); - if (fr_minttllog & 1) - logit = -3; - if (fr_minttllog & 2) - drop = 1; - } - } - if (drop) { -# ifdef IPFILTER_LOG - if (logit) { - fin->fin_group = logit; - pass = FR_INQUE|FR_NOMATCH|FR_LOGB; - (void) IPLLOG(pass, ip, fin, m); + + if ((ptr != NULL) && (makecopy != 0)) { + KFREES(ptr, fp->fr_dsize); } -# endif -# if !SOLARIS - m_freem(m); -# endif + RWLOCK_EXIT(&ipf_mutex); return error; } -#endif - pass = fr_pass; - if (fin->fin_fl & FI_SHORT) { - ATOMIC_INCL(frstats[out].fr_short); - } - READ_ENTER(&ipf_mutex); + if (!f) { + if (req == (ioctlcmd_t)SIOCINAFR || + req == (ioctlcmd_t)SIOCINIFR) { + ftail = fprev; + if (fp->fr_hits != 0) { + while (--fp->fr_hits && (f = *ftail)) + ftail = &f->fr_next; + } + f = NULL; + ptr = NULL; + error = 0; + } + } /* - * Check auth now. This, combined with the check below to see if apass - * is 0 is to ensure that we don't count the packet twice, which can - * otherwise occur when we reprocess it. As it is, we only count it - * after it has no auth. table matchup. This also stops NAT from - * occuring until after the packet has been auth'd. + * Request to remove a rule. */ - apass = fr_checkauth(ip, fin); + if (req == (ioctlcmd_t)SIOCRMAFR || req == (ioctlcmd_t)SIOCRMIFR) { + if (!f) + error = ESRCH; + else { + /* + * Do not allow activity from user space to interfere + * with rules not loaded that way. + */ + if ((makecopy == 1) && !(f->fr_flags & FR_COPIED)) { + error = EPERM; + goto done; + } - if (!out) { -#ifdef USE_INET6 - if (v == 6) - list = ipacct6[0][fr_active]; - else + /* + * Return EBUSY if the rule is being reference by + * something else (eg state information. + */ + if (f->fr_ref > 1) { + error = EBUSY; + goto done; + } +#ifdef IPFILTER_SCAN + if (f->fr_isctag[0] != '\0' && + (f->fr_isc != (struct ipscan *)-1)) + ipsc_detachfr(f); #endif - list = ipacct[0][fr_active]; - changed = ip_natin(ip, fin); - if (!apass && (fin->fin_fr = list) && - (fr_scanlist(FR_NOMATCH, ip, fin, m) & FR_ACCOUNT)) { - ATOMIC_INCL(frstats[0].fr_acct); + if ((fg != NULL) && (fg->fg_head != NULL)) + fg->fg_head->fr_ref--; + if (unit == IPL_LOGAUTH) { + error = fr_preauthcmd(req, f, ftail); + goto done; + } + if (*f->fr_grhead != '\0') + fr_delgroup(f->fr_grhead, unit, set); + fr_fixskip(fprev, f, -1); + *ftail = f->fr_next; + f->fr_next = NULL; + (void)fr_derefrule(&f); } - } - - if (!apass) { - if ((fin->fin_fl & FI_FRAG) == FI_FRAG) - fr = ipfr_knownfrag(ip, fin); - if (!fr && !(fin->fin_fl & FI_SHORT)) - fr = fr_checkstate(ip, fin); - if (fr != NULL) - pass = fr->fr_flags; - if (fr && (pass & FR_LOGFIRST)) - pass &= ~(FR_LOGFIRST|FR_LOG); - } - - if (apass || !fr) { + } else { /* - * If a packet is found in the auth table, then skip checking - * the access lists for permission but we do need to consider - * the result as if it were from the ACL's. + * Not removing, so we must be adding/inserting a rule. */ - if (!apass) { - fc = frcache + out; - if (!bcmp((char *)fin, (char *)fc, FI_CSIZE)) { - /* - * copy cached data so we can unlock the mutex - * earlier. - */ - bcopy((char *)fc, (char *)fin, FI_COPYSIZE); - ATOMIC_INCL(frstats[out].fr_chit); - if ((fr = fin->fin_fr)) { - ATOMIC_INCL(fr->fr_hits); - pass = fr->fr_flags; - } - } else { -#ifdef USE_INET6 - if (v == 6) - list = ipfilter6[out][fr_active]; - else + if (f) + error = EEXIST; + else { + if (unit == IPL_LOGAUTH) { + error = fr_preauthcmd(req, fp, ftail); + goto done; + } + if (makecopy) { + KMALLOC(f, frentry_t *); + } else + f = fp; + if (f != NULL) { + if (fg != NULL && fg->fg_head!= NULL ) + fg->fg_head->fr_ref++; + if (fp != f) + bcopy((char *)fp, (char *)f, + sizeof(*f)); + MUTEX_NUKE(&f->fr_lock); + MUTEX_INIT(&f->fr_lock, "filter rule lock"); +#ifdef IPFILTER_SCAN + if (f->fr_isctag[0] != '\0' && + ipsc_attachfr(f)) + f->fr_isc = (struct ipscan *)-1; #endif - list = ipfilter[out][fr_active]; - if ((fin->fin_fr = list)) - pass = fr_scanlist(fr_pass, ip, fin, m); - if (!(pass & (FR_KEEPSTATE|FR_DONTCACHE))) - bcopy((char *)fin, (char *)fc, - FI_COPYSIZE); - if (pass & FR_NOMATCH) { - ATOMIC_INCL(frstats[out].fr_nom); - fin->fin_fr = NULL; + f->fr_hits = 0; + if (makecopy != 0) + f->fr_ref = 1; + f->fr_next = *ftail; + *ftail = f; + if (req == (ioctlcmd_t)SIOCINIFR || + req == (ioctlcmd_t)SIOCINAFR) + fr_fixskip(fprev, f, 1); + f->fr_grp = NULL; + group = f->fr_grhead; + if (*group != '\0') { + fg = fr_addgroup(group, f, f->fr_flags, + unit, set); + if (fg != NULL) + f->fr_grp = &fg->fg_start; } - } - } else - pass = apass; - fr = fin->fin_fr; - - /* - * If we fail to add a packet to the authorization queue, - * then we drop the packet later. However, if it was added - * then pretend we've dropped it already. - */ - if ((pass & FR_AUTH)) { - if (fr_newauth((mb_t *)m, fin, ip) != 0) { - m = *mp = NULL; - error = 0; } else - error = ENOSPC; + error = ENOMEM; } + } +done: + RWLOCK_EXIT(&ipf_mutex); + if ((ptr != NULL) && (error != 0) && (makecopy != 0)) { + KFREES(ptr, fp->fr_dsize); + } + return (error); +} - if (pass & FR_PREAUTH) { - READ_ENTER(&ipf_auth); - if ((fin->fin_fr = ipauth) && - (pass = fr_scanlist(0, ip, fin, m))) { - ATOMIC_INCL(fr_authstats.fas_hits); - } else { - ATOMIC_INCL(fr_authstats.fas_miss); - } - RWLOCK_EXIT(&ipf_auth); + +/* ------------------------------------------------------------------------ */ +/* Function: fr_funcinit */ +/* Returns: int - 0 == success, else ESRCH: cannot resolve rule details */ +/* Parameters: fr(I) - pointer to filter rule */ +/* */ +/* If a rule is a call rule, then check if the function it points to needs */ +/* an init function to be called now the rule has been loaded. */ +/* ------------------------------------------------------------------------ */ +static int fr_funcinit(fr) +frentry_t *fr; +{ + ipfunc_resolve_t *ft; + int err; + + err = ESRCH; + + for (ft = fr_availfuncs; ft->ipfu_addr != NULL; ft++) + if (ft->ipfu_addr == fr->fr_func) { + err = 0; + if (ft->ipfu_init != NULL) + err = (*ft->ipfu_init)(fr); + break; } + return err; +} - fin->fin_fr = fr; - if ((pass & (FR_KEEPFRAG|FR_KEEPSTATE)) == FR_KEEPFRAG) { - if (fin->fin_fl & FI_FRAG) { - if (ipfr_newfrag(ip, fin) == -1) { - ATOMIC_INCL(frstats[out].fr_bnfr); - } else { - ATOMIC_INCL(frstats[out].fr_nfr); - } - } else { - ATOMIC_INCL(frstats[out].fr_cfr); + +/* ------------------------------------------------------------------------ */ +/* Function: fr_findfunc */ +/* Returns: ipfunc_t - pointer to function if found, else NULL */ +/* Parameters: funcptr(I) - function pointer to lookup */ +/* */ +/* Look for a function in the table of known functions. */ +/* ------------------------------------------------------------------------ */ +static ipfunc_t fr_findfunc(funcptr) +ipfunc_t funcptr; +{ + ipfunc_resolve_t *ft; + + for (ft = fr_availfuncs; ft->ipfu_addr != NULL; ft++) + if (ft->ipfu_addr == funcptr) + return funcptr; + return NULL; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_resolvefunc */ +/* Returns: int - 0 == success, else error */ +/* Parameters: data(IO) - ioctl data pointer to ipfunc_resolve_t struct */ +/* */ +/* Copy in a ipfunc_resolve_t structure and then fill in the missing field. */ +/* This will either be the function name (if the pointer is set) or the */ +/* function pointer if the name is set. When found, fill in the other one */ +/* so that the entire, complete, structure can be copied back to user space.*/ +/* ------------------------------------------------------------------------ */ +int fr_resolvefunc(data) +void *data; +{ + ipfunc_resolve_t res, *ft; + + BCOPYIN(data, &res, sizeof(res)); + + if (res.ipfu_addr == NULL && res.ipfu_name[0] != '\0') { + for (ft = fr_availfuncs; ft->ipfu_addr != NULL; ft++) + if (strncmp(res.ipfu_name, ft->ipfu_name, + sizeof(res.ipfu_name)) == 0) { + res.ipfu_addr = ft->ipfu_addr; + res.ipfu_init = ft->ipfu_init; + if (COPYOUT(&res, data, sizeof(res)) != 0) + return EFAULT; + return 0; } - } - if (pass & FR_KEEPSTATE) { - if (fr_addstate(ip, fin, NULL, 0) == NULL) { - ATOMIC_INCL(frstats[out].fr_bads); - if (pass & FR_PASS) { - pass &= ~FR_PASS; - pass |= FR_BLOCK; - } - } else { - ATOMIC_INCL(frstats[out].fr_ads); + } + if (res.ipfu_addr != NULL && res.ipfu_name[0] == '\0') { + for (ft = fr_availfuncs; ft->ipfu_addr != NULL; ft++) + if (ft->ipfu_addr == res.ipfu_addr) { + (void) strncpy(res.ipfu_name, ft->ipfu_name, + sizeof(res.ipfu_name)); + res.ipfu_init = ft->ipfu_init; + if (COPYOUT(&res, data, sizeof(res)) != 0) + return EFAULT; + return 0; } - } - } else if (fr != NULL) { - pass = fr->fr_flags; - if (pass & FR_LOGFIRST) - pass &= ~(FR_LOGFIRST|FR_LOG); } + return ESRCH; +} -#if (BSD >= 199306) && (defined(_KERNEL) || defined(KERNEL)) - if (securelevel <= 0) -#endif - if (fr && fr->fr_func && !(pass & FR_CALLNOW)) - pass = (*fr->fr_func)(pass, ip, fin); + +#if !defined(_KERNEL) || (!defined(__NetBSD__) && !defined(__OpenBSD__) && !defined(__FreeBSD__)) || \ + (defined(__FreeBSD__) && (__FreeBSD_version < 490000)) || \ + (defined(__NetBSD__) && (__NetBSD_Version__ < 105000000)) || \ + (defined(__OpenBSD__) && (OpenBSD < 200006)) +/* + * From: NetBSD + * ppsratecheck(): packets (or events) per second limitation. + */ +int +ppsratecheck(lasttime, curpps, maxpps) + struct timeval *lasttime; + int *curpps; + int maxpps; /* maximum pps allowed */ +{ + struct timeval tv, delta; + int rv; + + GETKTIME(&tv); + + delta.tv_sec = tv.tv_sec - lasttime->tv_sec; + delta.tv_usec = tv.tv_usec - lasttime->tv_usec; + if (delta.tv_usec < 0) { + delta.tv_sec--; + delta.tv_usec += 1000000; + } /* - * Only count/translate packets which will be passed on, out the - * interface. + * check for 0,0 is so that the message will be seen at least once. + * if more than one second have passed since the last update of + * lasttime, reset the counter. + * + * we do increment *curpps even in *curpps < maxpps case, as some may + * try to use *curpps for stat purposes as well. */ - if (out && (pass & FR_PASS)) { -#ifdef USE_INET6 - if (v == 6) - list = ipacct6[1][fr_active]; - else + if ((lasttime->tv_sec == 0 && lasttime->tv_usec == 0) || + delta.tv_sec >= 1) { + *lasttime = tv; + *curpps = 0; + rv = 1; + } else if (maxpps < 0) + rv = 1; + else if (*curpps < maxpps) + rv = 1; + else + rv = 0; + *curpps = *curpps + 1; + + return (rv); +} #endif - list = ipacct[1][fr_active]; - if (list != NULL) { - u_32_t sg, sr; - fin->fin_fr = list; - sg = fin->fin_group; - sr = fin->fin_rule; - if (fr_scanlist(FR_NOMATCH, ip, fin, m) & FR_ACCOUNT) { - ATOMIC_INCL(frstats[1].fr_acct); - } - fin->fin_group = sg; - fin->fin_rule = sr; - fin->fin_fr = fr; - } - changed = ip_natout(ip, fin); - } else - fin->fin_fr = fr; - RWLOCK_EXIT(&ipf_mutex); -#ifdef IPFILTER_LOG - if ((fr_flags & FF_LOGGING) || (pass & FR_LOGMASK)) { - if ((fr_flags & FF_LOGNOMATCH) && (pass & FR_NOMATCH)) { - pass |= FF_LOGNOMATCH; - ATOMIC_INCL(frstats[out].fr_npkl); - goto logit; - } else if (((pass & FR_LOGMASK) == FR_LOGP) || - ((pass & FR_PASS) && (fr_flags & FF_LOGPASS))) { - if ((pass & FR_LOGMASK) != FR_LOGP) - pass |= FF_LOGPASS; - ATOMIC_INCL(frstats[out].fr_ppkl); - goto logit; - } else if (((pass & FR_LOGMASK) == FR_LOGB) || - ((pass & FR_BLOCK) && (fr_flags & FF_LOGBLOCK))) { - if ((pass & FR_LOGMASK) != FR_LOGB) - pass |= FF_LOGBLOCK; - ATOMIC_INCL(frstats[out].fr_bpkl); -logit: - if (!IPLLOG(pass, ip, fin, m)) { - ATOMIC_INCL(frstats[out].fr_skip); - if ((pass & (FR_PASS|FR_LOGORBLOCK)) == - (FR_PASS|FR_LOGORBLOCK)) - pass ^= FR_PASS|FR_BLOCK; - } +/* ------------------------------------------------------------------------ */ +/* Function: fr_derefrule */ +/* Returns: int - 0 == rule freed up, else rule not freed */ +/* Parameters: fr(I) - pointer to filter rule */ +/* */ +/* Decrement the reference counter to a rule by one. If it reaches zero, */ +/* free it and any associated storage space being used by it. */ +/* ------------------------------------------------------------------------ */ +int fr_derefrule(frp) +frentry_t **frp; +{ + frentry_t *fr; + + fr = *frp; + + MUTEX_ENTER(&fr->fr_lock); + fr->fr_ref--; + if (fr->fr_ref == 0) { + MUTEX_EXIT(&fr->fr_lock); + MUTEX_DESTROY(&fr->fr_lock); + +#ifdef IPFILTER_LOOKUP + if (fr->fr_type == FR_T_IPF && fr->fr_satype == FRI_LOOKUP) + ip_lookup_deref(fr->fr_srctype, fr->fr_srcptr); + if (fr->fr_type == FR_T_IPF && fr->fr_datype == FRI_LOOKUP) + ip_lookup_deref(fr->fr_dsttype, fr->fr_dstptr); +#endif + + if (fr->fr_dsize) { + KFREES(fr->fr_data, fr->fr_dsize); + } + if ((fr->fr_flags & FR_COPIED) != 0) { + KFREE(fr); + return 0; } + return 1; + } else { + MUTEX_EXIT(&fr->fr_lock); } -#endif /* IPFILTER_LOG */ + *frp = NULL; + return -1; +} -#ifdef _KERNEL - /* - * Only allow FR_DUP to work if a rule matched - it makes no sense to - * set FR_DUP as a "default" as there are no instructions about where - * to send the packet. - */ - if (fr && (pass & FR_DUP)) -# if SOLARIS - mc = dupmsg(m); -# else -# if defined(__OpenBSD__) && (OpenBSD >= 199905) - mc = m_copym2(m, 0, M_COPYALL, M_DONTWAIT); -# else - mc = m_copy(m, 0, M_COPYALL); -# endif -# endif + +#ifdef IPFILTER_LOOKUP +/* ------------------------------------------------------------------------ */ +/* Function: fr_grpmapinit */ +/* Returns: int - 0 == success, else ESRCH because table entry not found*/ +/* Parameters: fr(I) - pointer to rule to find hash table for */ +/* */ +/* Looks for group hash table fr_arg and stores a pointer to it in fr_ptr. */ +/* fr_ptr is later used by fr_srcgrpmap and fr_dstgrpmap. */ +/* ------------------------------------------------------------------------ */ +static int fr_grpmapinit(fr) +frentry_t *fr; +{ + char name[FR_GROUPLEN]; + iphtable_t *iph; + +#if defined(SNPRINTF) && defined(_KERNEL) + SNPRINTF(name, sizeof(name), "%d", fr->fr_arg); +#else + (void) sprintf(name, "%d", fr->fr_arg); #endif - if (pass & FR_PASS) { - ATOMIC_INCL(frstats[out].fr_pass); - } else if (pass & FR_BLOCK) { - ATOMIC_INCL(frstats[out].fr_block); - /* - * Should we return an ICMP packet to indicate error - * status passing through the packet filter ? - * WARNING: ICMP error packets AND TCP RST packets should - * ONLY be sent in repsonse to incoming packets. Sending them - * in response to outbound packets can result in a panic on - * some operating systems. - */ - if (!out) { - if (changed == -1) - /* - * If a packet results in a NAT error, do not - * send a reset or ICMP error as it may disrupt - * an existing flow. This is the proxy saying - * the content is bad so just drop the packet - * silently. - */ - ; - else if (pass & FR_RETICMP) { - int dst; + iph = fr_findhtable(IPL_LOGIPF, name); + if (iph == NULL) + return ESRCH; + if ((iph->iph_flags & FR_INOUT) != (fr->fr_flags & FR_INOUT)) + return ESRCH; + fr->fr_ptr = iph; + return 0; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_srcgrpmap */ +/* Returns: frentry_t * - pointer to "new last matching" rule or NULL */ +/* Parameters: fin(I) - pointer to packet information */ +/* passp(IO) - pointer to current/new filter decision (unused) */ +/* */ +/* Look for a rule group head in a hash table, using the source address as */ +/* the key, and descend into that group and continue matching rules against */ +/* the packet. */ +/* ------------------------------------------------------------------------ */ +frentry_t *fr_srcgrpmap(fin, passp) +fr_info_t *fin; +u_32_t *passp; +{ + frgroup_t *fg; + void *rval; + + rval = fr_iphmfindgroup(fin->fin_fr->fr_ptr, &fin->fin_src); + if (rval == NULL) + return NULL; + + fg = rval; + fin->fin_fr = fg->fg_start; + (void) fr_scanlist(fin, *passp); + return fin->fin_fr; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_dstgrpmap */ +/* Returns: frentry_t * - pointer to "new last matching" rule or NULL */ +/* Parameters: fin(I) - pointer to packet information */ +/* passp(IO) - pointer to current/new filter decision (unused) */ +/* */ +/* Look for a rule group head in a hash table, using the destination */ +/* address as the key, and descend into that group and continue matching */ +/* rules against the packet. */ +/* ------------------------------------------------------------------------ */ +frentry_t *fr_dstgrpmap(fin, passp) +fr_info_t *fin; +u_32_t *passp; +{ + frgroup_t *fg; + void *rval; + + rval = fr_iphmfindgroup(fin->fin_fr->fr_ptr, &fin->fin_dst); + if (rval == NULL) + return NULL; + + fg = rval; + fin->fin_fr = fg->fg_start; + (void) fr_scanlist(fin, *passp); + return fin->fin_fr; +} +#endif /* IPFILTER_LOOKUP */ + +/* + * Queue functions + * =============== + * These functions manage objects on queues for efficient timeouts. There are + * a number of system defined queues as well as user defined timeouts. It is + * expected that a lock is held in the domain in which the queue belongs + * (i.e. either state or NAT) when calling any of these functions that prevents + * fr_freetimeoutqueue() from being called at the same time as any other. + */ + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_addtimeoutqueue */ +/* Returns: struct ifqtq * - NULL if malloc fails, else pointer to */ +/* timeout queue with given interval. */ +/* Parameters: parent(I) - pointer to pointer to parent node of this list */ +/* of interface queues. */ +/* seconds(I) - timeout value in seconds for this queue. */ +/* */ +/* This routine first looks for a timeout queue that matches the interval */ +/* being requested. If it finds one, increments the reference counter and */ +/* returns a pointer to it. If none are found, it allocates a new one and */ +/* inserts it at the top of the list. */ +/* */ +/* Locking. */ +/* It is assumed that the caller of this function has an appropriate lock */ +/* held (exclusively) in the domain that encompases 'parent'. */ +/* ------------------------------------------------------------------------ */ +ipftq_t *fr_addtimeoutqueue(parent, seconds) +ipftq_t **parent; +u_int seconds; +{ + ipftq_t *ifq; + u_int period; - if ((pass & FR_RETMASK) == FR_FAKEICMP) - dst = 1; - else - dst = 0; - send_icmp_err(ip, ICMP_UNREACH, fin, dst); - ATOMIC_INCL(frstats[0].fr_ret); - } else if (((pass & FR_RETMASK) == FR_RETRST) && - !(fin->fin_fl & FI_SHORT)) { - if (send_reset(ip, fin) == 0) { - ATOMIC_INCL(frstats[1].fr_ret); - } - } - } else { - if (pass & FR_RETRST) - error = ECONNRESET; + period = seconds * IPF_HZ_DIVIDE; + + MUTEX_ENTER(&ipf_timeoutlock); + for (ifq = *parent; ifq != NULL; ifq = ifq->ifq_next) { + if (ifq->ifq_ttl == period) { + /* + * Reset the delete flag, if set, so the structure + * gets reused rather than freed and reallocated. + */ + MUTEX_ENTER(&ifq->ifq_lock); + ifq->ifq_flags &= ~IFQF_DELETE; + ifq->ifq_ref++; + MUTEX_EXIT(&ifq->ifq_lock); + MUTEX_EXIT(&ipf_timeoutlock); + + return ifq; } } - /* - * If we didn't drop off the bottom of the list of rules (and thus - * the 'current' rule fr is not NULL), then we may have some extra - * instructions about what to do with a packet. - * Once we're finished return to our caller, freeing the packet if - * we are dropping it (* BSD ONLY *). - */ - if ((changed == -1) && (pass & FR_PASS)) { - pass &= ~FR_PASS; - pass |= FR_BLOCK; + KMALLOC(ifq, ipftq_t *); + if (ifq != NULL) { + ifq->ifq_ttl = period; + ifq->ifq_head = NULL; + ifq->ifq_tail = &ifq->ifq_head; + ifq->ifq_next = *parent; + ifq->ifq_pnext = parent; + ifq->ifq_ref = 1; + ifq->ifq_flags = IFQF_USER; + *parent = ifq; + fr_userifqs++; + MUTEX_NUKE(&ifq->ifq_lock); + MUTEX_INIT(&ifq->ifq_lock, "ipftq mutex"); } -#if defined(_KERNEL) -# if !SOLARIS -# if !defined(linux) - if (fr) { - frdest_t *fdp = &fr->fr_tif; + MUTEX_EXIT(&ipf_timeoutlock); + return ifq; +} - if (((pass & FR_FASTROUTE) && !out) || - (fdp->fd_ifp && fdp->fd_ifp != (struct ifnet *)-1)) { - (void) ipfr_fastroute(m, mp, fin, fdp); - m = *mp; - } - if (mc != NULL) - (void) ipfr_fastroute(mc, &mc, fin, &fr->fr_dif); +/* ------------------------------------------------------------------------ */ +/* Function: fr_deletetimeoutqueue */ +/* Returns: int - new reference count value of the timeout queue */ +/* Parameters: ifq(I) - timeout queue which is losing a reference. */ +/* Locks: ifq->ifq_lock */ +/* */ +/* This routine must be called when we're discarding a pointer to a timeout */ +/* queue object, taking care of the reference counter. */ +/* */ +/* Now that this just sets a DELETE flag, it requires the expire code to */ +/* check the list of user defined timeout queues and call the free function */ +/* below (currently commented out) to stop memory leaking. It is done this */ +/* way because the locking may not be sufficient to safely do a free when */ +/* this function is called. */ +/* ------------------------------------------------------------------------ */ +int fr_deletetimeoutqueue(ifq) +ipftq_t *ifq; +{ + + ifq->ifq_ref--; + if ((ifq->ifq_ref == 0) && ((ifq->ifq_flags & IFQF_USER) != 0)) { + ifq->ifq_flags |= IFQF_DELETE; } - if (!(pass & FR_PASS) && m) { - m_freem(m); - m = *mp = NULL; + return ifq->ifq_ref; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_freetimeoutqueue */ +/* Parameters: ifq(I) - timeout queue which is losing a reference. */ +/* Returns: Nil */ +/* */ +/* Locking: */ +/* It is assumed that the caller of this function has an appropriate lock */ +/* held (exclusively) in the domain that encompases the callers "domain". */ +/* The ifq_lock for this structure should not be held. */ +/* */ +/* Remove a user definde timeout queue from the list of queues it is in and */ +/* tidy up after this is done. */ +/* ------------------------------------------------------------------------ */ +void fr_freetimeoutqueue(ifq) +ipftq_t *ifq; +{ + + + if (((ifq->ifq_flags & IFQF_DELETE) == 0) || (ifq->ifq_ref != 0) || + ((ifq->ifq_flags & IFQF_USER) == 0)) { + printf("fr_freetimeoutqueue(%lx) flags 0x%x ttl %d ref %d\n", + (u_long)ifq, ifq->ifq_flags, ifq->ifq_ttl, + ifq->ifq_ref); + return; } -# ifdef __sgi - else if (changed && up && m) - m_copyback(m, 0, up, hbuf); -# endif -# endif /* !linux */ -# else /* !SOLARIS */ - if (fr) { - frdest_t *fdp = &fr->fr_tif; - if (((pass & FR_FASTROUTE) && !out) || - (fdp->fd_ifp && fdp->fd_ifp != (struct ifnet *)-1)) - (void) ipfr_fastroute(ip, m, mp, fin, fdp); + /* + * Remove from its position in the list. + */ + *ifq->ifq_pnext = ifq->ifq_next; + if (ifq->ifq_next != NULL) + ifq->ifq_next->ifq_pnext = ifq->ifq_pnext; + + MUTEX_DESTROY(&ifq->ifq_lock); + fr_userifqs--; + KFREE(ifq); +} - if (mc != NULL) - (void) ipfr_fastroute(ip, mc, &mc, fin, &fr->fr_dif); + +/* ------------------------------------------------------------------------ */ +/* Function: fr_deletequeueentry */ +/* Returns: Nil */ +/* Parameters: tqe(I) - timeout queue entry to delete */ +/* ifq(I) - timeout queue to remove entry from */ +/* */ +/* Remove a tail queue entry from its queue and make it an orphan. */ +/* fr_deletetimeoutqueue is called to make sure the reference count on the */ +/* queue is correct. We can't, however, call fr_freetimeoutqueue because */ +/* the correct lock(s) may not be held that would make it safe to do so. */ +/* ------------------------------------------------------------------------ */ +void fr_deletequeueentry(tqe) +ipftqent_t *tqe; +{ + ipftq_t *ifq; + + ifq = tqe->tqe_ifq; + if (ifq == NULL) + return; + + MUTEX_ENTER(&ifq->ifq_lock); + + if (tqe->tqe_pnext != NULL) { + *tqe->tqe_pnext = tqe->tqe_next; + if (tqe->tqe_next != NULL) + tqe->tqe_next->tqe_pnext = tqe->tqe_pnext; + else /* we must be the tail anyway */ + ifq->ifq_tail = tqe->tqe_pnext; + + tqe->tqe_pnext = NULL; + tqe->tqe_ifq = NULL; } -# endif /* !SOLARIS */ -#if (OpenBSD >= 200311) && defined(_KERNEL) - if (pass & FR_PASS) { - ip->ip_len = htons(ip->ip_len); - ip->ip_off = htons(ip->ip_off); + + (void) fr_deletetimeoutqueue(ifq); + + MUTEX_EXIT(&ifq->ifq_lock); +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_queuefront */ +/* Returns: Nil */ +/* Parameters: tqe(I) - pointer to timeout queue entry */ +/* */ +/* Move a queue entry to the front of the queue, if it isn't already there. */ +/* ------------------------------------------------------------------------ */ +void fr_queuefront(tqe) +ipftqent_t *tqe; +{ + ipftq_t *ifq; + + ifq = tqe->tqe_ifq; + if (ifq == NULL) + return; + + MUTEX_ENTER(&ifq->ifq_lock); + if (ifq->ifq_head != tqe) { + *tqe->tqe_pnext = tqe->tqe_next; + if (tqe->tqe_next) + tqe->tqe_next->tqe_pnext = tqe->tqe_pnext; + else + ifq->ifq_tail = tqe->tqe_pnext; + + tqe->tqe_next = ifq->ifq_head; + ifq->ifq_head->tqe_pnext = &tqe->tqe_next; + ifq->ifq_head = tqe; + tqe->tqe_pnext = &ifq->ifq_head; } -#endif - return (pass & FR_PASS) ? 0 : error; -#else /* _KERNEL */ - if (pass & FR_NOMATCH) - return 1; - if (pass & FR_PASS) - return 0; - if (pass & FR_AUTH) - return -2; - if ((pass & FR_RETMASK) == FR_RETRST) - return -3; - if ((pass & FR_RETMASK) == FR_RETICMP) - return -4; - if ((pass & FR_RETMASK) == FR_FAKEICMP) - return -5; - return -1; -#endif /* _KERNEL */ + MUTEX_EXIT(&ifq->ifq_lock); } -/* - * ipf_cksum - * addr should be 16bit aligned and len is in bytes. - * length is in bytes - */ -u_short ipf_cksum(addr, len) -register u_short *addr; -register int len; +/* ------------------------------------------------------------------------ */ +/* Function: fr_queueback */ +/* Returns: Nil */ +/* Parameters: tqe(I) - pointer to timeout queue entry */ +/* */ +/* Move a queue entry to the back of the queue, if it isn't already there. */ +/* ------------------------------------------------------------------------ */ +void fr_queueback(tqe) +ipftqent_t *tqe; { - register u_32_t sum = 0; + ipftq_t *ifq; - for (sum = 0; len > 1; len -= 2) - sum += *addr++; + ifq = tqe->tqe_ifq; + if (ifq == NULL) + return; + tqe->tqe_die = fr_ticks + ifq->ifq_ttl; - /* mop up an odd byte, if necessary */ - if (len == 1) - sum += *(u_char *)addr; + MUTEX_ENTER(&ifq->ifq_lock); + if (tqe->tqe_next == NULL) { /* at the end already ? */ + MUTEX_EXIT(&ifq->ifq_lock); + return; + } /* - * add back carry outs from top 16 bits to low 16 bits + * Remove from list */ - sum = (sum >> 16) + (sum & 0xffff); /* add hi 16 to low 16 */ - sum += (sum >> 16); /* add carry */ - return (u_short)(~sum); + *tqe->tqe_pnext = tqe->tqe_next; + tqe->tqe_next->tqe_pnext = tqe->tqe_pnext; + + /* + * Make it the last entry. + */ + tqe->tqe_next = NULL; + tqe->tqe_pnext = ifq->ifq_tail; + *ifq->ifq_tail = tqe; + ifq->ifq_tail = &tqe->tqe_next; + MUTEX_EXIT(&ifq->ifq_lock); } -/* - * NB: This function assumes we've pullup'd enough for all of the IP header - * and the TCP header. We also assume that data blocks aren't allocated in - * odd sizes. - */ -u_short fr_tcpsum(m, ip, tcp) -mb_t *m; -ip_t *ip; -tcphdr_t *tcp; +/* ------------------------------------------------------------------------ */ +/* Function: fr_queueappend */ +/* Returns: Nil */ +/* Parameters: tqe(I) - pointer to timeout queue entry */ +/* ifq(I) - pointer to timeout queue */ +/* parent(I) - owing object pointer */ +/* */ +/* Add a new item to this queue and put it on the very end. */ +/* ------------------------------------------------------------------------ */ +void fr_queueappend(tqe, ifq, parent) +ipftqent_t *tqe; +ipftq_t *ifq; +void *parent; { - u_short *sp, slen, ts; - u_int sum, sum2; - int hlen; + MUTEX_ENTER(&ifq->ifq_lock); + tqe->tqe_parent = parent; + tqe->tqe_pnext = ifq->ifq_tail; + *ifq->ifq_tail = tqe; + ifq->ifq_tail = &tqe->tqe_next; + tqe->tqe_next = NULL; + tqe->tqe_ifq = ifq; + tqe->tqe_die = fr_ticks + ifq->ifq_ttl; + ifq->ifq_ref++; + MUTEX_EXIT(&ifq->ifq_lock); +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_movequeue */ +/* Returns: Nil */ +/* Parameters: tq(I) - pointer to timeout queue information */ +/* oifp(I) - old timeout queue entry was on */ +/* nifp(I) - new timeout queue to put entry on */ +/* */ +/* Move a queue entry from one timeout queue to another timeout queue. */ +/* If it notices that the current entry is already last and does not need */ +/* to move queue, the return. */ +/* ------------------------------------------------------------------------ */ +void fr_movequeue(tqe, oifq, nifq) +ipftqent_t *tqe; +ipftq_t *oifq, *nifq; +{ /* - * Add up IP Header portion + * Is the operation here going to be a no-op ? */ - hlen = ip->ip_hl << 2; - slen = ip->ip_len - hlen; - sum = htons((u_short)ip->ip_p); - sum += htons(slen); - sp = (u_short *)&ip->ip_src; - sum += *sp++; /* ip_src */ - sum += *sp++; - sum += *sp++; /* ip_dst */ - sum += *sp++; - ts = tcp->th_sum; - tcp->th_sum = 0; -#ifdef KERNEL -# if SOLARIS - sum2 = ip_cksum(m, hlen, sum); /* hlen == offset */ - sum2 = (sum2 & 0xffff) + (sum2 >> 16); - sum2 = ~sum2 & 0xffff; -# else /* SOLARIS */ -# if defined(BSD) || defined(sun) -# if BSD >= 199306 - m->m_data += hlen; -# else - m->m_off += hlen; -# endif - m->m_len -= hlen; - sum2 = in_cksum(m, slen); - m->m_len += hlen; -# if BSD >= 199306 - m->m_data -= hlen; -# else - m->m_off -= hlen; -# endif + MUTEX_ENTER(&oifq->ifq_lock); + if (oifq == nifq && *oifq->ifq_tail == tqe) { + MUTEX_EXIT(&oifq->ifq_lock); + return; + } + /* - * Both sum and sum2 are partial sums, so combine them together. + * Remove from the old queue */ - sum += ~sum2 & 0xffff; - while (sum > 0xffff) - sum = (sum & 0xffff) + (sum >> 16); - sum2 = ~sum & 0xffff; -# else /* defined(BSD) || defined(sun) */ -{ - union { - u_char c[2]; - u_short s; - } bytes; - u_short len = ip->ip_len; -# if defined(__sgi) - int add; -# endif + *tqe->tqe_pnext = tqe->tqe_next; + if (tqe->tqe_next) + tqe->tqe_next->tqe_pnext = tqe->tqe_pnext; + else + oifq->ifq_tail = tqe->tqe_pnext; + tqe->tqe_next = NULL; /* - * Add up IP Header portion + * If we're moving from one queue to another, release the lock on the + * old queue and get a lock on the new queue. For user defined queues, + * if we're moving off it, call delete in case it can now be freed. */ - sp = (u_short *)&ip->ip_src; - len -= (ip->ip_hl << 2); - sum = ntohs(IPPROTO_TCP); - sum += htons(len); - sum += *sp++; /* ip_src */ - sum += *sp++; - sum += *sp++; /* ip_dst */ - sum += *sp++; - if (sp != (u_short *)tcp) - sp = (u_short *)tcp; - sum += *sp++; /* sport */ - sum += *sp++; /* dport */ - sum += *sp++; /* seq */ - sum += *sp++; - sum += *sp++; /* ack */ - sum += *sp++; - sum += *sp++; /* off */ - sum += *sp++; /* win */ - sum += *sp++; /* Skip over checksum */ - sum += *sp++; /* urp */ - -# ifdef __sgi + if (oifq != nifq) { + tqe->tqe_ifq = NULL; + + (void) fr_deletetimeoutqueue(oifq); + + MUTEX_EXIT(&oifq->ifq_lock); + + MUTEX_ENTER(&nifq->ifq_lock); + + tqe->tqe_ifq = nifq; + nifq->ifq_ref++; + } + /* - * In case we had to copy the IP & TCP header out of mbufs, - * skip over the mbuf bits which are the header + * Add to the bottom of the new queue */ - if ((caddr_t)ip != mtod(m, caddr_t)) { - hlen = (caddr_t)sp - (caddr_t)ip; - while (hlen) { - add = MIN(hlen, m->m_len); - sp = (u_short *)(mtod(m, caddr_t) + add); - hlen -= add; - if (add == m->m_len) { - m = m->m_next; - if (!hlen) { - if (!m) - break; - sp = mtod(m, u_short *); - } - PANIC((!m),("fr_tcpsum(1): not enough data")); - } - } + tqe->tqe_die = fr_ticks + nifq->ifq_ttl; + tqe->tqe_pnext = nifq->ifq_tail; + *nifq->ifq_tail = tqe; + nifq->ifq_tail = &tqe->tqe_next; + MUTEX_EXIT(&nifq->ifq_lock); +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_updateipid */ +/* Returns: int - 0 == success, -1 == error (packet should be droppped) */ +/* Parameters: fin(I) - pointer to packet information */ +/* */ +/* When we are doing NAT, change the IP of every packet to represent a */ +/* single sequence of packets coming from the host, hiding any host */ +/* specific sequencing that might otherwise be revealed. If the packet is */ +/* a fragment, then store the 'new' IPid in the fragment cache and look up */ +/* the fragment cache for non-leading fragments. If a non-leading fragment */ +/* has no match in the cache, return an error. */ +/* ------------------------------------------------------------------------ */ +static INLINE int fr_updateipid(fin) +fr_info_t *fin; +{ + u_short id, ido, sums; + u_32_t sumd, sum; + ip_t *ip; + + if (fin->fin_off != 0) { + sum = fr_ipid_knownfrag(fin); + if (sum == 0xffffffff) + return -1; + sum &= 0xffff; + id = (u_short)sum; + } else { + id = fr_nextipid(fin); + if (fin->fin_off == 0 && (fin->fin_flx & FI_FRAG) != 0) + (void) fr_ipid_newfrag(fin, (u_32_t)id); } + + ip = fin->fin_ip; + ido = ntohs(ip->ip_id); + if (id == ido) + return 0; + ip->ip_id = htons(id); + CALC_SUMD(ido, id, sumd); /* DESTRUCTIVE MACRO! id,ido change */ + sum = (~ntohs(ip->ip_sum)) & 0xffff; + sum += sumd; + sum = (sum >> 16) + (sum & 0xffff); + sum = (sum >> 16) + (sum & 0xffff); + sums = ~(u_short)sum; + ip->ip_sum = htons(sums); + return 0; +} + + +#ifdef NEED_FRGETIFNAME +/* ------------------------------------------------------------------------ */ +/* Function: fr_getifname */ +/* Returns: char * - pointer to interface name */ +/* Parameters: ifp(I) - pointer to network interface */ +/* buffer(O) - pointer to where to store interface name */ +/* */ +/* Constructs an interface name in the buffer passed. The buffer passed is */ +/* expected to be at least LIFNAMSIZ in bytes big. If buffer is passed in */ +/* as a NULL pointer then return a pointer to a static array. */ +/* ------------------------------------------------------------------------ */ +char *fr_getifname(ifp, buffer) +struct ifnet *ifp; +char *buffer; +{ + static char namebuf[LIFNAMSIZ]; +# if defined(MENTAT) || defined(__FreeBSD__) || defined(__osf__) || \ + defined(__sgi) || defined(linux) || \ + (defined(sun) && !defined(__SVR4) && !defined(__svr4__)) + int unit, space; + char temp[20]; + char *s; # endif - if (!(len -= sizeof(*tcp))) - goto nodata; - while (len > 1) { - if (((caddr_t)sp - mtod(m, caddr_t)) >= m->m_len) { - m = m->m_next; - PANIC((!m),("fr_tcpsum(2): not enough data")); - sp = mtod(m, u_short *); - } - if (((caddr_t)(sp + 1) - mtod(m, caddr_t)) > m->m_len) { - bytes.c[0] = *(u_char *)sp; - m = m->m_next; - PANIC((!m),("fr_tcpsum(3): not enough data")); - sp = mtod(m, u_short *); - bytes.c[1] = *(u_char *)sp; - sum += bytes.s; - sp = (u_short *)((u_char *)sp + 1); - } - if ((u_long)sp & 1) { - bcopy((char *)sp++, (char *)&bytes.s, sizeof(bytes.s)); - sum += bytes.s; - } else - sum += *sp++; - len -= 2; + if (buffer == NULL) + buffer = namebuf; + (void) strncpy(buffer, ifp->if_name, LIFNAMSIZ); + buffer[LIFNAMSIZ - 1] = '\0'; +# if defined(MENTAT) || defined(__FreeBSD__) || defined(__osf__) || \ + defined(__sgi) || \ + (defined(sun) && !defined(__SVR4) && !defined(__svr4__)) + for (s = buffer; *s; s++) + ; + unit = ifp->if_unit; + space = LIFNAMSIZ - (s - buffer); + if (space > 0) { +# if defined(SNPRINTF) && defined(_KERNEL) + SNPRINTF(temp, sizeof(temp), "%d", unit); +# else + (void) sprintf(temp, "%d", unit); +# endif + (void) strncpy(s, temp, space); } - if (len) - sum += ntohs(*(u_char *)sp << 8); -nodata: - while (sum > 0xffff) - sum = (sum & 0xffff) + (sum >> 16); - sum2 = (u_short)(~sum & 0xffff); -} -# endif /* defined(BSD) || defined(sun) */ -# endif /* SOLARIS */ -#else /* KERNEL */ - for (; slen > 1; slen -= 2) - sum += *sp++; - if (slen) - sum += ntohs(*(u_char *)sp << 8); - while (sum > 0xffff) - sum = (sum & 0xffff) + (sum >> 16); - sum2 = (u_short)(~sum & 0xffff); -#endif /* KERNEL */ - tcp->th_sum = ts; - return sum2; +# endif + return buffer; +} +#endif + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_ioctlswitch */ +/* Returns: int - -1 continue processing, else ioctl return value */ +/* Parameters: unit(I) - device unit opened */ +/* data(I) - pointer to ioctl data */ +/* cmd(I) - ioctl command */ +/* mode(I) - mode value */ +/* */ +/* Based on the value of unit, call the appropriate ioctl handler or return */ +/* EIO if ipfilter is not running. Also checks if write perms are req'd */ +/* for the device in order to execute the ioctl. */ +/* ------------------------------------------------------------------------ */ +int fr_ioctlswitch(unit, data, cmd, mode) +int unit, mode; +ioctlcmd_t cmd; +void *data; +{ + int error = 0; + + switch (unit) + { + case IPL_LOGIPF : + error = -1; + break; + case IPL_LOGNAT : + if (fr_running > 0) + error = fr_nat_ioctl(data, cmd, mode); + else + error = EIO; + break; + case IPL_LOGSTATE : + if (fr_running > 0) + error = fr_state_ioctl(data, cmd, mode); + else + error = EIO; + break; + case IPL_LOGAUTH : + if (fr_running > 0) { + if ((cmd == (ioctlcmd_t)SIOCADAFR) || + (cmd == (ioctlcmd_t)SIOCRMAFR)) { + if (!(mode & FWRITE)) { + error = EPERM; + } else { + error = frrequest(unit, cmd, data, + fr_active, 1); + } + } else { + error = fr_auth_ioctl(data, cmd, mode); + } + } else + error = EIO; + break; + case IPL_LOGSYNC : +#ifdef IPFILTER_SYNC + if (fr_running > 0) + error = fr_sync_ioctl(data, cmd, mode); + else +#endif + error = EIO; + break; + case IPL_LOGSCAN : +#ifdef IPFILTER_SCAN + if (fr_running > 0) + error = fr_scan_ioctl(data, cmd, mode); + else +#endif + error = EIO; + break; + case IPL_LOGLOOKUP : +#ifdef IPFILTER_LOOKUP + if (fr_running > 0) + error = ip_lookup_ioctl(data, cmd, mode); + else +#endif + error = EIO; + break; + default : + error = EIO; + break; + } + + return error; } -#if defined(_KERNEL) && ( ((BSD < 199306) && !SOLARIS) || defined(__sgi) ) -/* - * Copyright (c) 1982, 1986, 1988, 1991, 1993 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)uipc_mbuf.c 8.2 (Berkeley) 1/4/94 - * $Id: fil.c,v 2.35.2.82 2004/06/20 10:27:47 darrenr Exp $ - */ /* - * Copy data from an mbuf chain starting "off" bytes from the beginning, - * continuing for "len" bytes, into the indicated buffer. + * This array defines the expected size of objects coming into the kernel + * for the various recognised object types. */ -void -m_copydata(m, off, len, cp) - register mb_t *m; - register int off; - register int len; - caddr_t cp; +#define NUM_OBJ_TYPES 14 + +static int fr_objbytes[NUM_OBJ_TYPES][2] = { + { 1, sizeof(struct frentry) }, /* frentry */ + { 0, sizeof(struct friostat) }, + { 0, sizeof(struct fr_info) }, + { 0, sizeof(struct fr_authstat) }, + { 0, sizeof(struct ipfrstat) }, + { 0, sizeof(struct ipnat) }, + { 0, sizeof(struct natstat) }, + { 0, sizeof(struct ipstate_save) }, + { 1, sizeof(struct nat_save) }, /* nat_save */ + { 0, sizeof(struct natlookup) }, + { 1, sizeof(struct ipstate) }, /* ipstate */ + { 0, sizeof(struct ips_stat) }, + { 0, sizeof(struct frauth) }, + { 0, sizeof(struct ipftune) } +}; + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_inobj */ +/* Returns: int - 0 = success, else failure */ +/* Parameters: data(I) - pointer to ioctl data */ +/* ptr(I) - pointer to store real data in */ +/* type(I) - type of structure being moved */ +/* */ +/* Copy in the contents of what the ipfobj_t points to. In future, we */ +/* add things to check for version numbers, sizes, etc, to make it backward */ +/* compatible at the ABI for user land. */ +/* ------------------------------------------------------------------------ */ +int fr_inobj(data, ptr, type) +void *data; +void *ptr; +int type; { - register unsigned count; + ipfobj_t obj; + int error = 0; - if (off < 0 || len < 0) - panic("m_copydata"); - while (off > 0) { - if (m == 0) - panic("m_copydata"); - if (off < m->m_len) - break; - off -= m->m_len; - m = m->m_next; - } - while (len > 0) { - if (m == 0) - panic("m_copydata"); - count = MIN(m->m_len - off, len); - bcopy(mtod(m, caddr_t) + off, cp, count); - len -= count; - cp += count; - off = 0; - m = m->m_next; + if ((type < 0) || (type > NUM_OBJ_TYPES-1)) + return EINVAL; + + BCOPYIN((caddr_t)data, (caddr_t)&obj, sizeof(obj)); + + if (obj.ipfo_type != type) + return EINVAL; + +#ifndef IPFILTER_COMPAT + if ((fr_objbytes[type][0] & 1) != 0) { + if (obj.ipfo_size < fr_objbytes[type][1]) + return EINVAL; + } else if (obj.ipfo_size != fr_objbytes[type][1]) + return EINVAL; +#else + if (obj.ipfo_rev != IPFILTER_VERSION) + /* XXX compatibility hook here */ + ; + if ((fr_objbytes[type][0] & 1) != 0) { + if (obj.ipfo_size < fr_objbytes[type][1]) + /* XXX compatibility hook here */ + return EINVAL; + } else if (obj.ipfo_size != fr_objbytes[type][1]) + /* XXX compatibility hook here */ + return EINVAL; +#endif + + if ((fr_objbytes[type][0] & 1) != 0) { + error = COPYIN((caddr_t)obj.ipfo_ptr, (caddr_t)ptr, + fr_objbytes[type][1]); + } else { + error = COPYIN((caddr_t)obj.ipfo_ptr, (caddr_t)ptr, + obj.ipfo_size); } + return error; } -# ifndef linux -/* - * Copy data from a buffer back into the indicated mbuf chain, - * starting "off" bytes from the beginning, extending the mbuf - * chain if necessary. - */ -void -m_copyback(m0, off, len, cp) - struct mbuf *m0; - register int off; - register int len; - caddr_t cp; +/* ------------------------------------------------------------------------ */ +/* Function: fr_inobjsz */ +/* Returns: int - 0 = success, else failure */ +/* Parameters: data(I) - pointer to ioctl data */ +/* ptr(I) - pointer to store real data in */ +/* type(I) - type of structure being moved */ +/* sz(I) - size of data to copy */ +/* */ +/* As per fr_inobj, except the size of the object to copy in is passed in */ +/* but it must not be smaller than the size defined for the type and the */ +/* type must allow for varied sized objects. The extra requirement here is */ +/* that sz must match the size of the object being passed in - this is not */ +/* not possible nor required in fr_inobj(). */ +/* ------------------------------------------------------------------------ */ +int fr_inobjsz(data, ptr, type, sz) +void *data; +void *ptr; +int type, sz; { - register int mlen; - register struct mbuf *m = m0, *n; - int totlen = 0; + ipfobj_t obj; + int error; - if (m0 == 0) - return; - while (off > (mlen = m->m_len)) { - off -= mlen; - totlen += mlen; - if (m->m_next == 0) { - n = m_getclr(M_DONTWAIT, m->m_type); - if (n == 0) - goto out; - n->m_len = min(MLEN, len + off); - m->m_next = n; - } - m = m->m_next; - } - while (len > 0) { - mlen = min (m->m_len - off, len); - bcopy(cp, off + mtod(m, caddr_t), (unsigned)mlen); - cp += mlen; - len -= mlen; - mlen += off; - off = 0; - totlen += mlen; - if (len == 0) - break; - if (m->m_next == 0) { - n = m_get(M_DONTWAIT, m->m_type); - if (n == 0) - break; - n->m_len = min(MLEN, len); - m->m_next = n; - } - m = m->m_next; - } -out: -#if 0 - if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen)) - m->m_pkthdr.len = totlen; + if ((type < 0) || (type > NUM_OBJ_TYPES-1)) + return EINVAL; + if (((fr_objbytes[type][0] & 1) == 0) || (sz < fr_objbytes[type][1])) + return EINVAL; + + BCOPYIN((caddr_t)data, (caddr_t)&obj, sizeof(obj)); + + if (obj.ipfo_type != type) + return EINVAL; + +#ifndef IPFILTER_COMPAT + if (obj.ipfo_size != sz) + return EINVAL; +#else + if (obj.ipfo_rev != IPFILTER_VERSION) + /* XXX compatibility hook here */ + ; + if (obj.ipfo_size != sz) + /* XXX compatibility hook here */ + return EINVAL; #endif - return; + + error = COPYIN((caddr_t)obj.ipfo_ptr, (caddr_t)ptr, sz); + return error; } -# endif /* linux */ -#endif /* (_KERNEL) && ( ((BSD < 199306) && !SOLARIS) || __sgi) */ -frgroup_t *fr_findgroup(num, flags, which, set, fgpp) -u_32_t num, flags; -minor_t which; -int set; -frgroup_t ***fgpp; +/* ------------------------------------------------------------------------ */ +/* Function: fr_outobjsz */ +/* Returns: int - 0 = success, else failure */ +/* Parameters: data(I) - pointer to ioctl data */ +/* ptr(I) - pointer to store real data in */ +/* type(I) - type of structure being moved */ +/* sz(I) - size of data to copy */ +/* */ +/* As per fr_outobj, except the size of the object to copy out is passed in */ +/* but it must not be smaller than the size defined for the type and the */ +/* type must allow for varied sized objects. The extra requirement here is */ +/* that sz must match the size of the object being passed in - this is not */ +/* not possible nor required in fr_outobj(). */ +/* ------------------------------------------------------------------------ */ +int fr_outobjsz(data, ptr, type, sz) +void *data; +void *ptr; +int type, sz; { - frgroup_t *fg, **fgp; + ipfobj_t obj; + int error; - if (which == IPL_LOGAUTH) - fgp = &ipfgroups[2][set]; - else if (flags & FR_ACCOUNT) - fgp = &ipfgroups[1][set]; - else if (flags & (FR_OUTQUE|FR_INQUE)) - fgp = &ipfgroups[0][set]; - else - return NULL; + if ((type < 0) || (type > NUM_OBJ_TYPES-1) || + ((fr_objbytes[type][0] & 1) == 0) || + (sz < fr_objbytes[type][1])) + return EINVAL; - while ((fg = *fgp)) - if (fg->fg_num == num) - break; - else - fgp = &fg->fg_next; - if (fgpp) - *fgpp = fgp; - return fg; + BCOPYIN((caddr_t)data, (caddr_t)&obj, sizeof(obj)); + + if (obj.ipfo_type != type) + return EINVAL; + +#ifndef IPFILTER_COMPAT + if (obj.ipfo_size != sz) + return EINVAL; +#else + if (obj.ipfo_rev != IPFILTER_VERSION) + /* XXX compatibility hook here */ + ; + if (obj.ipfo_size != sz) + /* XXX compatibility hook here */ + return EINVAL; +#endif + + error = COPYOUT((caddr_t)ptr, (caddr_t)obj.ipfo_ptr, sz); + return error; } -frgroup_t *fr_addgroup(num, fp, which, set) -u_32_t num; -frentry_t *fp; -minor_t which; -int set; +/* ------------------------------------------------------------------------ */ +/* Function: fr_outobj */ +/* Returns: int - 0 = success, else failure */ +/* Parameters: data(I) - pointer to ioctl data */ +/* ptr(I) - pointer to store real data in */ +/* type(I) - type of structure being moved */ +/* */ +/* Copy out the contents of what ptr is to where ipfobj points to. In */ +/* future, we add things to check for version numbers, sizes, etc, to make */ +/* it backward compatible at the ABI for user land. */ +/* ------------------------------------------------------------------------ */ +int fr_outobj(data, ptr, type) +void *data; +void *ptr; +int type; { - frgroup_t *fg, **fgp; + ipfobj_t obj; + int error; - if ((fg = fr_findgroup(num, fp->fr_flags, which, set, &fgp))) - return fg; + if ((type < 0) || (type > NUM_OBJ_TYPES-1)) + return EINVAL; - KMALLOC(fg, frgroup_t *); - if (fg) { - fg->fg_num = num; - fg->fg_next = *fgp; - fg->fg_head = fp; - fg->fg_start = &fp->fr_grp; - *fgp = fg; - } - return fg; + BCOPYIN((caddr_t)data, (caddr_t)&obj, sizeof(obj)); + + if (obj.ipfo_type != type) + return EINVAL; + +#ifndef IPFILTER_COMPAT + if ((fr_objbytes[type][0] & 1) != 0) { + if (obj.ipfo_size < fr_objbytes[type][1]) + return EINVAL; + } else if (obj.ipfo_size != fr_objbytes[type][1]) + return EINVAL; +#else + if (obj.ipfo_rev != IPFILTER_VERSION) + /* XXX compatibility hook here */ + ; + if ((fr_objbytes[type][0] & 1) != 0) { + if (obj.ipfo_size < fr_objbytes[type][1]) + /* XXX compatibility hook here */ + return EINVAL; + } else if (obj.ipfo_size != fr_objbytes[type][1]) + /* XXX compatibility hook here */ + return EINVAL; +#endif + + error = COPYOUT((caddr_t)ptr, (caddr_t)obj.ipfo_ptr, obj.ipfo_size); + return error; } -void fr_delgroup(num, flags, which, set) -u_32_t num, flags; -minor_t which; -int set; +/* ------------------------------------------------------------------------ */ +/* Function: fr_checkl4sum */ +/* Returns: int - 0 = good, -1 = bad, 1 = cannot check */ +/* Parameters: fin(I) - pointer to packet information */ +/* */ +/* If possible, calculate the layer 4 checksum for the packet. If this is */ +/* not possible, return without indicating a failure or success but in a */ +/* way that is ditinguishable. */ +/* ------------------------------------------------------------------------ */ +int fr_checkl4sum(fin) +fr_info_t *fin; { - frgroup_t *fg, **fgp; - - if (!(fg = fr_findgroup(num, flags, which, set, &fgp))) - return; - - *fgp = fg->fg_next; - KFREE(fg); -} + u_short sum, hdrsum, *csump; + udphdr_t *udp; + int dosum; + if ((fin->fin_flx & FI_NOCKSUM) != 0) + return 0; + /* + * If the TCP packet isn't a fragment, isn't too short and otherwise + * isn't already considered "bad", then validate the checksum. If + * this check fails then considered the packet to be "bad". + */ + if ((fin->fin_flx & (FI_FRAG|FI_SHORT|FI_BAD)) != 0) + return 1; -/* - * recursively flush rules from the list, descending groups as they are - * encountered. if a rule is the head of a group and it has lost all its - * group members, then also delete the group reference. - */ -static int frflushlist(set, unit, nfreedp, listp) -int set; -minor_t unit; -int *nfreedp; -frentry_t **listp; -{ - register int freed = 0, i; - register frentry_t *fp; + csump = NULL; + hdrsum = 0; + dosum = 0; + sum = 0; - while ((fp = *listp)) { - *listp = fp->fr_next; - if (fp->fr_grp) { - i = frflushlist(set, unit, nfreedp, &fp->fr_grp); - MUTEX_ENTER(&ipf_rw); - fp->fr_ref -= i; - MUTEX_EXIT(&ipf_rw); - } +#if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6) && defined(ICK_VALID) + if (dohwcksum && ((*fin->fin_mp)->b_ick_flag == ICK_VALID)) { + hdrsum = 0; + sum = 0; + } else { +#endif + switch (fin->fin_p) + { + case IPPROTO_TCP : + csump = &((tcphdr_t *)fin->fin_dp)->th_sum; + dosum = 1; + break; - ATOMIC_DEC32(fp->fr_ref); - if (fp->fr_grhead) { - fr_delgroup(fp->fr_grhead, fp->fr_flags, - unit, set); - fp->fr_grhead = 0; + case IPPROTO_UDP : + udp = fin->fin_dp; + if (udp->uh_sum != 0) { + csump = &udp->uh_sum; + dosum = 1; + } + break; + + case IPPROTO_ICMP : + csump = &((struct icmp *)fin->fin_dp)->icmp_cksum; + dosum = 1; + break; + + default : + return 1; + /*NOTREACHED*/ } - if (fp->fr_ref == 0) { - KFREE(fp); - freed++; - } else - fp->fr_next = NULL; + + if (csump != NULL) + hdrsum = *csump; + + if (dosum) + sum = fr_cksum(fin->fin_m, fin->fin_ip, + fin->fin_p, fin->fin_dp); +#if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6) && defined(ICK_VALID) } - *nfreedp += freed; - return freed; +#endif +#if !defined(_KERNEL) + if (sum == hdrsum) { + FR_DEBUG(("checkl4sum: %hx == %hx\n", sum, hdrsum)); + } else { + FR_DEBUG(("checkl4sum: %hx != %hx\n", sum, hdrsum)); + } +#endif + if (hdrsum == sum) + return 0; + return -1; } -int frflush(unit, proto, flags) -minor_t unit; -int proto, flags; +/* ------------------------------------------------------------------------ */ +/* Function: fr_ifpfillv4addr */ +/* Returns: int - 0 = address update, -1 = address not updated */ +/* Parameters: atype(I) - type of network address update to perform */ +/* sin(I) - pointer to source of address information */ +/* mask(I) - pointer to source of netmask information */ +/* inp(I) - pointer to destination address store */ +/* inpmask(I) - pointer to destination netmask store */ +/* */ +/* Given a type of network address update (atype) to perform, copy */ +/* information from sin/mask into inp/inpmask. If ipnmask is NULL then no */ +/* netmask update is performed unless FRI_NETMASKED is passed as atype, in */ +/* which case the operation fails. For all values of atype other than */ +/* FRI_NETMASKED, if inpmask is non-NULL then the mask is set to an all 1s */ +/* value. */ +/* ------------------------------------------------------------------------ */ +int fr_ifpfillv4addr(atype, sin, mask, inp, inpmask) +int atype; +struct sockaddr_in *sin, *mask; +struct in_addr *inp, *inpmask; { - int flushed = 0, set; + if (inpmask != NULL && atype != FRI_NETMASKED) + inpmask->s_addr = 0xffffffff; - if (unit != IPL_LOGIPF) - return 0; - WRITE_ENTER(&ipf_mutex); - bzero((char *)frcache, sizeof(frcache[0]) * 2); + if (atype == FRI_NETWORK || atype == FRI_NETMASKED) { + if (atype == FRI_NETMASKED) { + if (inpmask == NULL) + return -1; + inpmask->s_addr = mask->sin_addr.s_addr; + } + inp->s_addr = sin->sin_addr.s_addr & mask->sin_addr.s_addr; + } else { + inp->s_addr = sin->sin_addr.s_addr; + } + return 0; +} - set = fr_active; - if (flags & FR_INACTIVE) - set = 1 - set; - if (flags & FR_OUTQUE) { #ifdef USE_INET6 - if (proto == 0 || proto == 6) { - (void) frflushlist(set, unit, - &flushed, &ipfilter6[1][set]); - (void) frflushlist(set, unit, - &flushed, &ipacct6[1][set]); +/* ------------------------------------------------------------------------ */ +/* Function: fr_ifpfillv6addr */ +/* Returns: int - 0 = address update, -1 = address not updated */ +/* Parameters: atype(I) - type of network address update to perform */ +/* sin(I) - pointer to source of address information */ +/* mask(I) - pointer to source of netmask information */ +/* inp(I) - pointer to destination address store */ +/* inpmask(I) - pointer to destination netmask store */ +/* */ +/* Given a type of network address update (atype) to perform, copy */ +/* information from sin/mask into inp/inpmask. If ipnmask is NULL then no */ +/* netmask update is performed unless FRI_NETMASKED is passed as atype, in */ +/* which case the operation fails. For all values of atype other than */ +/* FRI_NETMASKED, if inpmask is non-NULL then the mask is set to an all 1s */ +/* value. */ +/* ------------------------------------------------------------------------ */ +int fr_ifpfillv6addr(atype, sin, mask, inp, inpmask) +int atype; +struct sockaddr_in6 *sin, *mask; +struct in_addr *inp, *inpmask; +{ + i6addr_t *src, *dst, *and, *dmask; + + src = (i6addr_t *)&sin->sin6_addr; + and = (i6addr_t *)&mask->sin6_addr; + dst = (i6addr_t *)inp; + dmask = (i6addr_t *)inpmask; + + if (inpmask != NULL && atype != FRI_NETMASKED) { + dmask->i6[0] = 0xffffffff; + dmask->i6[1] = 0xffffffff; + dmask->i6[2] = 0xffffffff; + dmask->i6[3] = 0xffffffff; + } + + if (atype == FRI_NETWORK || atype == FRI_NETMASKED) { + if (atype == FRI_NETMASKED) { + if (inpmask == NULL) + return -1; + dmask->i6[0] = and->i6[0]; + dmask->i6[1] = and->i6[1]; + dmask->i6[2] = and->i6[2]; + dmask->i6[3] = and->i6[3]; } + + dst->i6[0] = src->i6[0] & and->i6[0]; + dst->i6[1] = src->i6[1] & and->i6[1]; + dst->i6[2] = src->i6[2] & and->i6[2]; + dst->i6[3] = src->i6[3] & and->i6[3]; + } else { + dst->i6[0] = src->i6[0]; + dst->i6[1] = src->i6[1]; + dst->i6[2] = src->i6[2]; + dst->i6[3] = src->i6[3]; + } + return 0; +} #endif - if (proto == 0 || proto == 4) { - (void) frflushlist(set, unit, - &flushed, &ipfilter[1][set]); - (void) frflushlist(set, unit, - &flushed, &ipacct[1][set]); - } + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_matchtag */ +/* Returns: 0 == mismatch, 1 == match. */ +/* Parameters: tag1(I) - pointer to first tag to compare */ +/* tag2(I) - pointer to second tag to compare */ +/* */ +/* Returns true (non-zero) or false(0) if the two tag structures can be */ +/* considered to be a match or not match, respectively. The tag is 16 */ +/* bytes long (16 characters) but that is overlayed with 4 32bit ints so */ +/* compare the ints instead, for speed. tag1 is the master of the */ +/* comparison. This function should only be called with both tag1 and tag2 */ +/* as non-NULL pointers. */ +/* ------------------------------------------------------------------------ */ +int fr_matchtag(tag1, tag2) +ipftag_t *tag1, *tag2; +{ + if (tag1 == tag2) + return 1; + + if ((tag1->ipt_num[0] == 0) && (tag2->ipt_num[0] == 0)) + return 1; + + if ((tag1->ipt_num[0] == tag2->ipt_num[0]) && + (tag1->ipt_num[1] == tag2->ipt_num[1]) && + (tag1->ipt_num[2] == tag2->ipt_num[2]) && + (tag1->ipt_num[3] == tag2->ipt_num[3])) + return 1; + return 0; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_coalesce */ +/* Returns: 1 == success, -1 == failure, 0 == no change */ +/* Parameters: fin(I) - pointer to packet information */ +/* */ +/* Attempt to get all of the packet data into a single, contiguous buffer. */ +/* If this call returns a failure then the buffers have also been freed. */ +/* ------------------------------------------------------------------------ */ +int fr_coalesce(fin) +fr_info_t *fin; +{ + if ((fin->fin_flx & FI_COALESCE) != 0) + return 1; + + /* + * If the mbuf pointers indicate that there is no mbuf to work with, + * return but do not indicate success or failure. + */ + if (fin->fin_m == NULL || fin->fin_mp == NULL) + return 0; + +#if defined(_KERNEL) + if (fr_pullup(fin->fin_m, fin, fin->fin_plen) == NULL) { + ATOMIC_INCL(fr_badcoalesces[fin->fin_out]); +# ifdef MENTAT + FREE_MB_T(*fin->fin_mp); +# endif + *fin->fin_mp = NULL; + fin->fin_m = NULL; + return -1; } - if (flags & FR_INQUE) { -#ifdef USE_INET6 - if (proto == 0 || proto == 6) { - (void) frflushlist(set, unit, - &flushed, &ipfilter6[0][set]); - (void) frflushlist(set, unit, - &flushed, &ipacct6[0][set]); - } +#else + fin = fin; /* LINT */ #endif - if (proto == 0 || proto == 4) { - (void) frflushlist(set, unit, - &flushed, &ipfilter[0][set]); - (void) frflushlist(set, unit, - &flushed, &ipacct[0][set]); + return 1; +} + + +/* + * The following table lists all of the tunable variables that can be + * accessed via SIOCIPFGET/SIOCIPFSET/SIOCIPFGETNEXt. The format of each row + * in the table below is as follows: + * + * pointer to value, name of value, minimum, maximum, size of the value's + * container, value attribute flags + * + * For convienience, IPFT_RDONLY means the value is read-only, IPFT_WRDISABLED + * means the value can only be written to when IPFilter is loaded but disabled. + * The obvious implication is if neither of these are set then the value can be + * changed at any time without harm. + */ +ipftuneable_t ipf_tuneables[] = { + /* filtering */ + { { &fr_flags }, "fr_flags", 0, 0xffffffff, + sizeof(fr_flags), 0 }, + { { &fr_active }, "fr_active", 0, 0, + sizeof(fr_active), IPFT_RDONLY }, + { { &fr_control_forwarding }, "fr_control_forwarding", 0, 1, + sizeof(fr_control_forwarding), 0 }, + { { &fr_update_ipid }, "fr_update_ipid", 0, 1, + sizeof(fr_update_ipid), 0 }, + { { &fr_chksrc }, "fr_chksrc", 0, 1, + sizeof(fr_chksrc), 0 }, + { { &fr_pass }, "fr_pass", 0, 0xffffffff, + sizeof(fr_pass), 0 }, + /* state */ + { { &fr_tcpidletimeout }, "fr_tcpidletimeout", 1, 0x7fffffff, + sizeof(fr_tcpidletimeout), IPFT_WRDISABLED }, + { { &fr_tcpclosewait }, "fr_tcpclosewait", 1, 0x7fffffff, + sizeof(fr_tcpclosewait), IPFT_WRDISABLED }, + { { &fr_tcplastack }, "fr_tcplastack", 1, 0x7fffffff, + sizeof(fr_tcplastack), IPFT_WRDISABLED }, + { { &fr_tcptimeout }, "fr_tcptimeout", 1, 0x7fffffff, + sizeof(fr_tcptimeout), IPFT_WRDISABLED }, + { { &fr_tcpclosed }, "fr_tcpclosed", 1, 0x7fffffff, + sizeof(fr_tcpclosed), IPFT_WRDISABLED }, + { { &fr_tcphalfclosed }, "fr_tcphalfclosed", 1, 0x7fffffff, + sizeof(fr_tcphalfclosed), IPFT_WRDISABLED }, + { { &fr_udptimeout }, "fr_udptimeout", 1, 0x7fffffff, + sizeof(fr_udptimeout), IPFT_WRDISABLED }, + { { &fr_udpacktimeout }, "fr_udpacktimeout", 1, 0x7fffffff, + sizeof(fr_udpacktimeout), IPFT_WRDISABLED }, + { { &fr_icmptimeout }, "fr_icmptimeout", 1, 0x7fffffff, + sizeof(fr_icmptimeout), IPFT_WRDISABLED }, + { { &fr_icmpacktimeout }, "fr_icmpacktimeout", 1, 0x7fffffff, + sizeof(fr_icmpacktimeout), IPFT_WRDISABLED }, + { { &fr_iptimeout }, "fr_iptimeout", 1, 0x7fffffff, + sizeof(fr_iptimeout), IPFT_WRDISABLED }, + { { &fr_statemax }, "fr_statemax", 1, 0x7fffffff, + sizeof(fr_statemax), 0 }, + { { &fr_statesize }, "fr_statesize", 1, 0x7fffffff, + sizeof(fr_statesize), IPFT_WRDISABLED }, + { { &fr_state_lock }, "fr_state_lock", 0, 1, + sizeof(fr_state_lock), IPFT_RDONLY }, + { { &fr_state_maxbucket }, "fr_state_maxbucket", 1, 0x7fffffff, + sizeof(fr_state_maxbucket), IPFT_WRDISABLED }, + { { &fr_state_maxbucket_reset }, "fr_state_maxbucket_reset", 0, 1, + sizeof(fr_state_maxbucket_reset), IPFT_WRDISABLED }, + { { &ipstate_logging }, "ipstate_logging", 0, 1, + sizeof(ipstate_logging), 0 }, + /* nat */ + { { &fr_nat_lock }, "fr_nat_lock", 0, 1, + sizeof(fr_nat_lock), IPFT_RDONLY }, + { { &ipf_nattable_sz }, "ipf_nattable_sz", 1, 0x7fffffff, + sizeof(ipf_nattable_sz), IPFT_WRDISABLED }, + { { &ipf_nattable_max }, "ipf_nattable_max", 1, 0x7fffffff, + sizeof(ipf_nattable_max), 0 }, + { { &ipf_natrules_sz }, "ipf_natrules_sz", 1, 0x7fffffff, + sizeof(ipf_natrules_sz), IPFT_WRDISABLED }, + { { &ipf_rdrrules_sz }, "ipf_rdrrules_sz", 1, 0x7fffffff, + sizeof(ipf_rdrrules_sz), IPFT_WRDISABLED }, + { { &ipf_hostmap_sz }, "ipf_hostmap_sz", 1, 0x7fffffff, + sizeof(ipf_hostmap_sz), IPFT_WRDISABLED }, + { { &fr_nat_maxbucket }, "fr_nat_maxbucket", 1, 0x7fffffff, + sizeof(fr_nat_maxbucket), IPFT_WRDISABLED }, + { { &fr_nat_maxbucket_reset }, "fr_nat_maxbucket_reset", 0, 1, + sizeof(fr_nat_maxbucket_reset), IPFT_WRDISABLED }, + { { &nat_logging }, "nat_logging", 0, 1, + sizeof(nat_logging), 0 }, + { { &fr_defnatage }, "fr_defnatage", 1, 0x7fffffff, + sizeof(fr_defnatage), IPFT_WRDISABLED }, + { { &fr_defnatipage }, "fr_defnatipage", 1, 0x7fffffff, + sizeof(fr_defnatipage), IPFT_WRDISABLED }, + { { &fr_defnaticmpage }, "fr_defnaticmpage", 1, 0x7fffffff, + sizeof(fr_defnaticmpage), IPFT_WRDISABLED }, + /* frag */ + { { &ipfr_size }, "ipfr_size", 1, 0x7fffffff, + sizeof(ipfr_size), IPFT_WRDISABLED }, + { { &fr_ipfrttl }, "fr_ipfrttl", 1, 0x7fffffff, + sizeof(fr_ipfrttl), IPFT_WRDISABLED }, +#ifdef IPFILTER_LOG + /* log */ + { { &ipl_suppress }, "ipl_suppress", 0, 1, + sizeof(ipl_suppress), 0 }, + { { &ipl_buffer_sz }, "ipl_buffer_sz", 0, 0, + sizeof(ipl_buffer_sz), IPFT_RDONLY }, + { { &ipl_logmax }, "ipl_logmax", 0, 0x7fffffff, + sizeof(ipl_logmax), IPFT_WRDISABLED }, + { { &ipl_logall }, "ipl_logall", 0, 1, + sizeof(ipl_logall), 0 }, + { { &ipl_logsize }, "ipl_logsize", 0, 0x80000, + sizeof(ipl_logsize), 0 }, +#endif + { { NULL }, NULL, 0, 0 } +}; + +static ipftuneable_t *ipf_tunelist = NULL; + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_findtunebycookie */ +/* Returns: NULL = search failed, else pointer to tune struct */ +/* Parameters: cookie(I) - cookie value to search for amongst tuneables */ +/* next(O) - pointer to place to store the cookie for the */ +/* "next" tuneable, if it is desired. */ +/* */ +/* This function is used to walk through all of the existing tunables with */ +/* successive calls. It searches the known tunables for the one which has */ +/* a matching value for "cookie" - ie its address. When returning a match, */ +/* the next one to be found may be returned inside next. */ +/* ------------------------------------------------------------------------ */ +static ipftuneable_t *fr_findtunebycookie(cookie, next) +void *cookie, **next; +{ + ipftuneable_t *ta, **tap; + + for (ta = ipf_tuneables; ta->ipft_name != NULL; ta++) + if (ta == cookie) { + if (next != NULL) { + /* + * If the next entry in the array has a name + * present, then return a pointer to it for + * where to go next, else return a pointer to + * the dynaminc list as a key to search there + * next. This facilitates a weak linking of + * the two "lists" together. + */ + if ((ta + 1)->ipft_name != NULL) + *next = ta + 1; + else + *next = &ipf_tunelist; + } + return ta; + } + + for (tap = &ipf_tunelist; (ta = *tap) != NULL; tap = &ta->ipft_next) + if (tap == cookie) { + if (next != NULL) + *next = &ta->ipft_next; + return ta; } - } - RWLOCK_EXIT(&ipf_mutex); - return flushed; + + if (next != NULL) + *next = NULL; + return NULL; } -char *memstr(src, dst, slen, dlen) -char *src, *dst; -int slen, dlen; +/* ------------------------------------------------------------------------ */ +/* Function: fr_findtunebyname */ +/* Returns: NULL = search failed, else pointer to tune struct */ +/* Parameters: name(I) - name of the tuneable entry to find. */ +/* */ +/* Search the static array of tuneables and the list of dynamic tuneables */ +/* for an entry with a matching name. If we can find one, return a pointer */ +/* to the matching structure. */ +/* ------------------------------------------------------------------------ */ +static ipftuneable_t *fr_findtunebyname(name) +char *name; { - char *s = NULL; + ipftuneable_t *ta; - while (dlen >= slen) { - if (bcmp(src, dst, slen) == 0) { - s = dst; - break; + for (ta = ipf_tuneables; ta->ipft_name != NULL; ta++) + if (!strcmp(ta->ipft_name, name)) { + return ta; } - dst++; - dlen--; - } - return s; + + for (ta = ipf_tunelist; ta != NULL; ta = ta->ipft_next) + if (!strcmp(ta->ipft_name, name)) { + return ta; + } + + return NULL; } -void fixskip(listp, rp, addremove) -frentry_t **listp, *rp; -int addremove; +/* ------------------------------------------------------------------------ */ +/* Function: fr_addipftune */ +/* Returns: int - 0 == success, else failure */ +/* Parameters: newtune - pointer to new tune struct to add to tuneables */ +/* */ +/* Appends the tune structure pointer to by "newtune" to the end of the */ +/* current list of "dynamic" tuneable parameters. Once added, the owner */ +/* of the object is not expected to ever change "ipft_next". */ +/* ------------------------------------------------------------------------ */ +int fr_addipftune(newtune) +ipftuneable_t *newtune; { - frentry_t *fp; - int rules = 0, rn = 0; + ipftuneable_t *ta, **tap; - for (fp = *listp; fp && (fp != rp); fp = fp->fr_next, rules++) - ; + ta = fr_findtunebyname(newtune->ipft_name); + if (ta != NULL) + return EEXIST; - if (!fp) - return; + for (tap = &ipf_tunelist; *tap != NULL; tap = &(*tap)->ipft_next) + ; - for (fp = *listp; fp && (fp != rp); fp = fp->fr_next, rn++) - if (fp->fr_skip && (rn + fp->fr_skip >= rules)) - fp->fr_skip += addremove; + newtune->ipft_next = NULL; + *tap = newtune; + return 0; } -#ifdef _KERNEL -/* - * count consecutive 1's in bit mask. If the mask generated by counting - * consecutive 1's is different to that passed, return -1, else return # - * of bits. - */ -int countbits(ip) -u_32_t ip; +/* ------------------------------------------------------------------------ */ +/* Function: fr_delipftune */ +/* Returns: int - 0 == success, else failure */ +/* Parameters: oldtune - pointer to tune struct to remove from the list of */ +/* current dynamic tuneables */ +/* */ +/* Search for the tune structure, by pointer, in the list of those that are */ +/* dynamically added at run time. If found, adjust the list so that this */ +/* structure is no longer part of it. */ +/* ------------------------------------------------------------------------ */ +int fr_delipftune(oldtune) +ipftuneable_t *oldtune; { - u_32_t ipn; - int cnt = 0, i, j; + ipftuneable_t *ta, **tap; - ip = ipn = ntohl(ip); - for (i = 32; i; i--, ipn *= 2) - if (ipn & 0x80000000) - cnt++; - else - break; - ipn = 0; - for (i = 32, j = cnt; i; i--, j--) { - ipn *= 2; - if (j > 0) - ipn++; - } - if (ipn == ip) - return cnt; - return -1; + for (tap = &ipf_tunelist; (ta = *tap) != NULL; tap = &ta->ipft_next) + if (ta == oldtune) { + *tap = oldtune->ipft_next; + oldtune->ipft_next = NULL; + return 0; + } + + return ESRCH; } -/* - * return the first IP Address associated with an interface - */ -int fr_ifpaddr(v, ifptr, inp) -int v; -void *ifptr; -struct in_addr *inp; +/* ------------------------------------------------------------------------ */ +/* Function: fr_ipftune */ +/* Returns: int - 0 == success, else failure */ +/* Parameters: cmd(I) - ioctl command number */ +/* data(I) - pointer to ioctl data structure */ +/* */ +/* Implement handling of SIOCIPFGETNEXT, SIOCIPFGET and SIOCIPFSET. These */ +/* three ioctls provide the means to access and control global variables */ +/* within IPFilter, allowing (for example) timeouts and table sizes to be */ +/* changed without rebooting, reloading or recompiling. The initialisation */ +/* and 'destruction' routines of the various components of ipfilter are all */ +/* each responsible for handling their own values being too big. */ +/* ------------------------------------------------------------------------ */ +int fr_ipftune(cmd, data) +ioctlcmd_t cmd; +void *data; { -# ifdef USE_INET6 - struct in6_addr *inp6 = NULL; -# endif -# if SOLARIS - ill_t *ill = ifptr; -# else - struct ifnet *ifp = ifptr; -# endif - struct in_addr in; + ipftuneable_t *ta; + ipftune_t tu; + void *cookie; + int error; -# if SOLARIS -# ifdef USE_INET6 - if (v == 6) { - struct in6_addr in6; + error = fr_inobj(data, &tu, IPFOBJ_TUNEABLE); + if (error != 0) + return error; + tu.ipft_name[sizeof(tu.ipft_name) - 1] = '\0'; + cookie = tu.ipft_cookie; + ta = NULL; + + switch (cmd) + { + case SIOCIPFGETNEXT : /* - * First is always link local. + * If cookie is non-NULL, assume it to be a pointer to the last + * entry we looked at, so find it (if possible) and return a + * pointer to the next one after it. The last entry in the + * the table is a NULL entry, so when we get to it, set cookie + * to NULL and return that, indicating end of list, erstwhile + * if we come in with cookie set to NULL, we are starting anew + * at the front of the list. */ - if (ill->ill_ipif->ipif_next) - in6 = ill->ill_ipif->ipif_next->ipif_v6lcl_addr; - else - bzero((char *)&in6, sizeof(in6)); - bcopy((char *)&in6, (char *)inp, sizeof(in6)); - } else -# endif - { - in.s_addr = ill->ill_ipif->ipif_local_addr; - *inp = in; - } -# else /* SOLARIS */ -# if linux - ; -# else /* linux */ - struct sockaddr_in *sin; - struct ifaddr *ifa; - -# if (__FreeBSD_version >= 300000) - ifa = TAILQ_FIRST(&ifp->if_addrhead); -# else -# if defined(__NetBSD__) || defined(__OpenBSD__) - ifa = ifp->if_addrlist.tqh_first; -# else -# if defined(__sgi) && defined(IFF_DRVRLOCK) /* IRIX 6 */ - ifa = &((struct in_ifaddr *)ifp->in_ifaddr)->ia_ifa; -# else - ifa = ifp->if_addrlist; -# endif -# endif /* __NetBSD__ || __OpenBSD__ */ -# endif /* __FreeBSD_version >= 300000 */ -# if (BSD < 199306) && !(/*IRIX6*/defined(__sgi) && defined(IFF_DRVRLOCK)) - sin = (struct sockaddr_in *)&ifa->ifa_addr; -# else - sin = (struct sockaddr_in *)ifa->ifa_addr; - while (sin && ifa) { - if ((v == 4) && (sin->sin_family == AF_INET)) - break; -# ifdef USE_INET6 - if ((v == 6) && (sin->sin_family == AF_INET6)) { - inp6 = &((struct sockaddr_in6 *)sin)->sin6_addr; - if (!IN6_IS_ADDR_LINKLOCAL(inp6) && - !IN6_IS_ADDR_LOOPBACK(inp6)) - break; + if (cookie != NULL) { + ta = fr_findtunebycookie(cookie, &tu.ipft_cookie); + } else { + ta = ipf_tuneables; + tu.ipft_cookie = ta + 1; } -# endif -# if (__FreeBSD_version >= 300000) - ifa = TAILQ_NEXT(ifa, ifa_link); -# else -# if defined(__NetBSD__) || defined(__OpenBSD__) - ifa = ifa->ifa_list.tqe_next; -# else - ifa = ifa->ifa_next; -# endif -# endif /* __FreeBSD_version >= 300000 */ - if (ifa) - sin = (struct sockaddr_in *)ifa->ifa_addr; - } - if (ifa == NULL) - sin = NULL; - if (sin == NULL) - return -1; -# endif /* (BSD < 199306) && (!__sgi && IFF_DRVLOCK) */ -# ifdef USE_INET6 - if (v == 6) - bcopy((char *)inp6, (char *)inp, sizeof(*inp6)); - else -# endif - { - in = sin->sin_addr; - *inp = in; - } -# endif /* linux */ -# endif /* SOLARIS */ - return 0; -} + if (ta != NULL) { + /* + * Entry found, but does the data pointed to by that + * row fit in what we can return? + */ + if (ta->ipft_sz > sizeof(tu.ipft_un)) + return EINVAL; + + tu.ipft_vlong = 0; + if (ta->ipft_sz == sizeof(u_long)) + tu.ipft_vlong = *ta->ipft_plong; + else if (ta->ipft_sz == sizeof(u_int)) + tu.ipft_vint = *ta->ipft_pint; + else if (ta->ipft_sz == sizeof(u_short)) + tu.ipft_vshort = *ta->ipft_pshort; + else if (ta->ipft_sz == sizeof(u_char)) + tu.ipft_vchar = *ta->ipft_pchar; + + tu.ipft_sz = ta->ipft_sz; + tu.ipft_min = ta->ipft_min; + tu.ipft_max = ta->ipft_max; + tu.ipft_flags = ta->ipft_flags; + bcopy(ta->ipft_name, tu.ipft_name, + MIN(sizeof(tu.ipft_name), + strlen(ta->ipft_name) + 1)); + } + error = fr_outobj(data, &tu, IPFOBJ_TUNEABLE); + break; + case SIOCIPFGET : + case SIOCIPFSET : + /* + * Search by name or by cookie value for a particular entry + * in the tuning paramter table. + */ + error = ESRCH; + if (cookie != NULL) { + ta = fr_findtunebycookie(cookie, NULL); + if (ta != NULL) + error = 0; + } else if (tu.ipft_name[0] != '\0') { + ta = fr_findtunebyname(tu.ipft_name); + if (ta != NULL) + error = 0; + } + if (error != 0) + break; -static void frsynclist(fr) -register frentry_t *fr; -{ - frdest_t *fdp; - int i; + if (cmd == (ioctlcmd_t)SIOCIPFGET) { + /* + * Fetch the tuning parameters for a particular value + */ + tu.ipft_vlong = 0; + if (ta->ipft_sz == sizeof(u_long)) + tu.ipft_vlong = *ta->ipft_plong; + else if (ta->ipft_sz == sizeof(u_int)) + tu.ipft_vint = *ta->ipft_pint; + else if (ta->ipft_sz == sizeof(u_short)) + tu.ipft_vshort = *ta->ipft_pshort; + else if (ta->ipft_sz == sizeof(u_char)) + tu.ipft_vchar = *ta->ipft_pchar; + tu.ipft_sz = ta->ipft_sz; + tu.ipft_min = ta->ipft_min; + tu.ipft_max = ta->ipft_max; + tu.ipft_flags = ta->ipft_flags; + error = fr_outobj(data, &tu, IPFOBJ_TUNEABLE); + + } else if (cmd == (ioctlcmd_t)SIOCIPFSET) { + /* + * Set an internal parameter. The hard part here is + * getting the new value safely and correctly out of + * the kernel (given we only know its size, not type.) + */ + u_long in; - for (; fr; fr = fr->fr_next) { - for (i = 0; i < 4; i++) { - if ((fr->fr_ifnames[i][1] == '\0') && - ((fr->fr_ifnames[i][0] == '-') || - (fr->fr_ifnames[i][0] == '*'))) { - fr->fr_ifas[i] = NULL; - } else if (*fr->fr_ifnames[i]) { - fr->fr_ifas[i] = GETUNIT(fr->fr_ifnames[i], - fr->fr_v); - if (!fr->fr_ifas[i]) - fr->fr_ifas[i] = (void *)-1; + if (((ta->ipft_flags & IPFT_WRDISABLED) != 0) && + (fr_running > 0)) { + error = EBUSY; + break; } - } - fdp = &fr->fr_dif; - fr->fr_flags &= ~FR_DUP; - if (*fdp->fd_ifname) { - fdp->fd_ifp = GETUNIT(fdp->fd_ifname, fr->fr_v); - if (!fdp->fd_ifp) - fdp->fd_ifp = (struct ifnet *)-1; - else - fr->fr_flags |= FR_DUP; - } + in = tu.ipft_vlong; + if (in < ta->ipft_min || in > ta->ipft_max) { + error = EINVAL; + break; + } - fdp = &fr->fr_tif; - if (*fdp->fd_ifname) { - fdp->fd_ifp = GETUNIT(fdp->fd_ifname, fr->fr_v); - if (!fdp->fd_ifp) - fdp->fd_ifp = (struct ifnet *)-1; + if (ta->ipft_sz == sizeof(u_long)) { + tu.ipft_vlong = *ta->ipft_plong; + *ta->ipft_plong = in; + } else if (ta->ipft_sz == sizeof(u_int)) { + tu.ipft_vint = *ta->ipft_pint; + *ta->ipft_pint = (u_int)(in & 0xffffffff); + } else if (ta->ipft_sz == sizeof(u_short)) { + tu.ipft_vshort = *ta->ipft_pshort; + *ta->ipft_pshort = (u_short)(in & 0xffff); + } else if (ta->ipft_sz == sizeof(u_char)) { + tu.ipft_vchar = *ta->ipft_pchar; + *ta->ipft_pchar = (u_char)(in & 0xff); + } + error = fr_outobj(data, &tu, IPFOBJ_TUNEABLE); } + break; - if (fr->fr_grp) - frsynclist(fr->fr_grp); + default : + error = EINVAL; + break; } + + return error; } -void frsync() +/* ------------------------------------------------------------------------ */ +/* Function: fr_initialise */ +/* Returns: int - 0 == success, < 0 == failure */ +/* Parameters: None. */ +/* */ +/* Call of the initialise functions for all the various subsystems inside */ +/* of IPFilter. If any of them should fail, return immeadiately a failure */ +/* BUT do not try to recover from the error here. */ +/* ------------------------------------------------------------------------ */ +int fr_initialise() { - ip_natsync(NULL); - ip_statesync(NULL); + int i; - WRITE_ENTER(&ipf_mutex); - frsynclist(ipacct[0][fr_active]); - frsynclist(ipacct[1][fr_active]); - frsynclist(ipfilter[0][fr_active]); - frsynclist(ipfilter[1][fr_active]); -#ifdef USE_INET6 - frsynclist(ipacct6[0][fr_active]); - frsynclist(ipacct6[1][fr_active]); - frsynclist(ipfilter6[0][fr_active]); - frsynclist(ipfilter6[1][fr_active]); +#ifdef IPFILTER_LOG + i = fr_loginit(); + if (i < 0) + return -10 + i; #endif - RWLOCK_EXIT(&ipf_mutex); + i = fr_natinit(); + if (i < 0) + return -20 + i; + + i = fr_stateinit(); + if (i < 0) + return -30 + i; + + i = fr_authinit(); + if (i < 0) + return -40 + i; + + i = fr_fraginit(); + if (i < 0) + return -50 + i; + + i = appr_init(); + if (i < 0) + return -60 + i; + +#ifdef IPFILTER_SYNC + i = ipfsync_init(); + if (i < 0) + return -70 + i; +#endif +#ifdef IPFILTER_SCAN + i = ipsc_init(); + if (i < 0) + return -80 + i; +#endif +#ifdef IPFILTER_LOOKUP + i = ip_lookup_init(); + if (i < 0) + return -90 + i; +#endif +#ifdef IPFILTER_COMPILED + ipfrule_add(); +#endif + return 0; } -/* - * In the functions below, bcopy() is called because the pointer being - * copied _from_ in this instance is a pointer to a char buf (which could - * end up being unaligned) and on the kernel's local stack. - */ -int ircopyptr(a, b, c) -void *a, *b; -size_t c; +/* ------------------------------------------------------------------------ */ +/* Function: fr_deinitialise */ +/* Returns: None. */ +/* Parameters: None. */ +/* */ +/* Call all the various subsystem cleanup routines to deallocate memory or */ +/* destroy locks or whatever they've done that they need to now undo. */ +/* The order here IS important as there are some cross references of */ +/* internal data structures. */ +/* ------------------------------------------------------------------------ */ +void fr_deinitialise() { - caddr_t ca; - int err; + fr_fragunload(); + fr_authunload(); + fr_natunload(); + fr_stateunload(); +#ifdef IPFILTER_SCAN + fr_scanunload(); +#endif + appr_unload(); -#if SOLARIS - if (copyin(a, (char *)&ca, sizeof(ca))) - return EFAULT; -#else - bcopy(a, &ca, sizeof(ca)); +#ifdef IPFILTER_COMPILED + ipfrule_remove(); #endif - err = copyin(ca, b, c); - if (err) - err = EFAULT; - return err; -} + (void) frflush(IPL_LOGIPF, 0, FR_INQUE|FR_OUTQUE|FR_INACTIVE); + (void) frflush(IPL_LOGIPF, 0, FR_INQUE|FR_OUTQUE); + (void) frflush(IPL_LOGCOUNT, 0, FR_INQUE|FR_OUTQUE|FR_INACTIVE); + (void) frflush(IPL_LOGCOUNT, 0, FR_INQUE|FR_OUTQUE); -int iwcopyptr(a, b, c) -void *a, *b; -size_t c; -{ - caddr_t ca; - int err; +#ifdef IPFILTER_LOOKUP + ip_lookup_unload(); +#endif -#if SOLARIS - if (copyin(b, (char *)&ca, sizeof(ca))) - return EFAULT; -#else - bcopy(b, &ca, sizeof(ca)); +#ifdef IPFILTER_LOG + fr_logunload(); #endif - err = copyout(a, ca, c); - if (err) - err = EFAULT; - return err; } -#else /* _KERNEL */ - -/* - * return the first IP Address associated with an interface - */ -int fr_ifpaddr(v, ifptr, inp) -int v; -void *ifptr; -struct in_addr *inp; +/* ------------------------------------------------------------------------ */ +/* Function: fr_zerostats */ +/* Returns: int - 0 = success, else failure */ +/* Parameters: data(O) - pointer to pointer for copying data back to */ +/* */ +/* Copies the current statistics out to userspace and then zero's the */ +/* current ones in the kernel. The lock is only held across the bzero() as */ +/* the copyout may result in paging (ie network activity.) */ +/* ------------------------------------------------------------------------ */ +int fr_zerostats(data) +caddr_t data; { - return 0; -} + friostat_t fio; + int error; + fr_getstat(&fio); + error = copyoutptr(&fio, data, sizeof(fio)); + if (error) + return EFAULT; -int ircopyptr(a, b, c) -void *a, *b; -size_t c; -{ - caddr_t ca; + WRITE_ENTER(&ipf_mutex); + bzero((char *)frstats, sizeof(*frstats) * 2); + RWLOCK_EXIT(&ipf_mutex); - bcopy(a, &ca, sizeof(ca)); - bcopy(ca, b, c); return 0; } -int iwcopyptr(a, b, c) -void *a, *b; -size_t c; +/* ------------------------------------------------------------------------ */ +/* Function: fr_resolvedest */ +/* Returns: Nil */ +/* Parameters: fdp(IO) - pointer to destination information to resolve */ +/* v(I) - IP protocol version to match */ +/* */ +/* Looks up an interface name in the frdest structure pointed to by fdp and */ +/* if a matching name can be found for the particular IP protocol version */ +/* then store the interface pointer in the frdest struct. If no match is */ +/* found, then set the interface pointer to be -1 as NULL is considered to */ +/* indicate there is no information at all in the structure. */ +/* ------------------------------------------------------------------------ */ +void fr_resolvedest(fdp, v) +frdest_t *fdp; +int v; { - caddr_t ca; - - bcopy(b, &ca, sizeof(ca)); - bcopy(a, ca, c); - return 0; -} - - -#endif - + void *ifp; -int fr_lock(data, lockp) -caddr_t data; -int *lockp; -{ - int arg, error; + ifp = NULL; + v = v; /* LINT */ - error = IRCOPY(data, (caddr_t)&arg, sizeof(arg)); - if (!error) { - error = IWCOPY((caddr_t)lockp, data, sizeof(*lockp)); - if (!error) - *lockp = arg; + if (*fdp->fd_ifname != '\0') { + ifp = GETIFP(fdp->fd_ifname, v); + if (ifp == NULL) + ifp = (void *)-1; } - return error; + fdp->fd_ifp = ifp; } -void fr_getstat(fiop) -friostat_t *fiop; +/* ------------------------------------------------------------------------ */ +/* Function: fr_icmp4errortype */ +/* Returns: int - 1 == success, 0 == failure */ +/* Parameters: icmptype(I) - ICMP type number */ +/* */ +/* Tests to see if the ICMP type number passed is an error type or not. */ +/* ------------------------------------------------------------------------ */ +int fr_icmp4errortype(icmptype) +int icmptype; { - bcopy((char *)frstats, (char *)fiop->f_st, sizeof(filterstats_t) * 2); - fiop->f_locks[0] = fr_state_lock; - fiop->f_locks[1] = fr_nat_lock; - fiop->f_locks[2] = fr_frag_lock; - fiop->f_locks[3] = fr_auth_lock; - fiop->f_fin[0] = ipfilter[0][0]; - fiop->f_fin[1] = ipfilter[0][1]; - fiop->f_fout[0] = ipfilter[1][0]; - fiop->f_fout[1] = ipfilter[1][1]; - fiop->f_acctin[0] = ipacct[0][0]; - fiop->f_acctin[1] = ipacct[0][1]; - fiop->f_acctout[0] = ipacct[1][0]; - fiop->f_acctout[1] = ipacct[1][1]; -#ifdef USE_INET6 - fiop->f_fin6[0] = ipfilter6[0][0]; - fiop->f_fin6[1] = ipfilter6[0][1]; - fiop->f_fout6[0] = ipfilter6[1][0]; - fiop->f_fout6[1] = ipfilter6[1][1]; - fiop->f_acctin6[0] = ipacct6[0][0]; - fiop->f_acctin6[1] = ipacct6[0][1]; - fiop->f_acctout6[0] = ipacct6[1][0]; - fiop->f_acctout6[1] = ipacct6[1][1]; -#else - fiop->f_fin6[0] = NULL; - fiop->f_fin6[1] = NULL; - fiop->f_fout6[0] = NULL; - fiop->f_fout6[1] = NULL; - fiop->f_acctin6[0] = NULL; - fiop->f_acctin6[1] = NULL; - fiop->f_acctout6[0] = NULL; - fiop->f_acctout6[1] = NULL; -#endif - fiop->f_active = fr_active; - fiop->f_froute[0] = ipl_frouteok[0]; - fiop->f_froute[1] = ipl_frouteok[1]; - fiop->f_running = fr_running; - fiop->f_groups[0][0] = ipfgroups[0][0]; - fiop->f_groups[0][1] = ipfgroups[0][1]; - fiop->f_groups[1][0] = ipfgroups[1][0]; - fiop->f_groups[1][1] = ipfgroups[1][1]; - fiop->f_groups[2][0] = ipfgroups[2][0]; - fiop->f_groups[2][1] = ipfgroups[2][1]; -#ifdef IPFILTER_LOG - fiop->f_logging = 1; -#else - fiop->f_logging = 0; -#endif - fiop->f_defpass = fr_pass; - strncpy(fiop->f_version, ipfilter_version, sizeof(fiop->f_version)); + switch (icmptype) + { + case ICMP_SOURCEQUENCH : + case ICMP_PARAMPROB : + case ICMP_REDIRECT : + case ICMP_TIMXCEED : + case ICMP_UNREACH : + return 1; + default: + return 0; + } } -#ifdef USE_INET6 -int icmptoicmp6types[ICMP_MAXTYPE+1] = { - ICMP6_ECHO_REPLY, /* 0: ICMP_ECHOREPLY */ - -1, /* 1: UNUSED */ - -1, /* 2: UNUSED */ - ICMP6_DST_UNREACH, /* 3: ICMP_UNREACH */ - -1, /* 4: ICMP_SOURCEQUENCH */ - ND_REDIRECT, /* 5: ICMP_REDIRECT */ - -1, /* 6: UNUSED */ - -1, /* 7: UNUSED */ - ICMP6_ECHO_REQUEST, /* 8: ICMP_ECHO */ - -1, /* 9: UNUSED */ - -1, /* 10: UNUSED */ - ICMP6_TIME_EXCEEDED, /* 11: ICMP_TIMXCEED */ - ICMP6_PARAM_PROB, /* 12: ICMP_PARAMPROB */ - -1, /* 13: ICMP_TSTAMP */ - -1, /* 14: ICMP_TSTAMPREPLY */ - -1, /* 15: ICMP_IREQ */ - -1, /* 16: ICMP_IREQREPLY */ - -1, /* 17: ICMP_MASKREQ */ - -1, /* 18: ICMP_MASKREPLY */ -}; - - -int icmptoicmp6unreach[ICMP_MAX_UNREACH] = { - ICMP6_DST_UNREACH_ADDR, /* 0: ICMP_UNREACH_NET */ - ICMP6_DST_UNREACH_ADDR, /* 1: ICMP_UNREACH_HOST */ - -1, /* 2: ICMP_UNREACH_PROTOCOL */ - ICMP6_DST_UNREACH_NOPORT, /* 3: ICMP_UNREACH_PORT */ - -1, /* 4: ICMP_UNREACH_NEEDFRAG */ - ICMP6_DST_UNREACH_NOTNEIGHBOR, /* 5: ICMP_UNREACH_SRCFAIL */ - ICMP6_DST_UNREACH_ADDR, /* 6: ICMP_UNREACH_NET_UNKNOWN */ - ICMP6_DST_UNREACH_ADDR, /* 7: ICMP_UNREACH_HOST_UNKNOWN */ - -1, /* 8: ICMP_UNREACH_ISOLATED */ - ICMP6_DST_UNREACH_ADMIN, /* 9: ICMP_UNREACH_NET_PROHIB */ - ICMP6_DST_UNREACH_ADMIN, /* 10: ICMP_UNREACH_HOST_PROHIB */ - -1, /* 11: ICMP_UNREACH_TOSNET */ - -1, /* 12: ICMP_UNREACH_TOSHOST */ - ICMP6_DST_UNREACH_ADMIN, /* 13: ICMP_UNREACH_ADMIN_PROHIBIT */ -}; -#endif - - -#ifndef _KERNEL -int mbuflen(buf) -mb_t *buf; +/* ------------------------------------------------------------------------ */ +/* Function: fr_resolvenic */ +/* Returns: void* - NULL = wildcard name, -1 = failed to find NIC, else */ +/* pointer to interface structure for NIC */ +/* Parameters: name(I) - complete interface name */ +/* v(I) - IP protocol version */ +/* */ +/* Look for a network interface structure that firstly has a matching name */ +/* to that passed in and that is also being used for that IP protocol */ +/* version (necessary on some platforms where there are separate listings */ +/* for both IPv4 and IPv6 on the same physical NIC. */ +/* */ +/* One might wonder why name gets terminated with a \0 byte in here. The */ +/* reason is an interface name could get into the kernel structures of ipf */ +/* in any number of ways and so long as they all use the same sized array */ +/* to put the name in, it makes sense to ensure it gets null terminated */ +/* before it is used for its intended purpose - finding its match in the */ +/* kernel's list of configured interfaces. */ +/* */ +/* NOTE: This SHOULD ONLY be used with IPFilter structures that have an */ +/* array for the name that is LIFNAMSIZ bytes (at least) in length. */ +/* ------------------------------------------------------------------------ */ +void *fr_resolvenic(name, v) +char *name; +int v; { - ip_t *ip; - - ip = (ip_t *)buf; - return ip->ip_len; -} -#endif - + void *nic; -#if defined(_KERNEL) && !defined(__sgi) -void *ipf_pullup(m, fin, len, ipin) -mb_t *m; -fr_info_t *fin; -int len; -void *ipin; -{ -# if SOLARIS - qif_t *qf = fin->fin_qif; -# endif - int out = fin->fin_out, dpoff, ipoff; - char *ip; + if (name[0] == '\0') + return NULL; - if (m == NULL) + if ((name[1] == '\0') && ((name[0] == '-') || (name[0] == '*'))) { return NULL; + } - ipoff = (char *)ipin - MTOD(m, char *); - if (fin->fin_dp != NULL) - dpoff = (char *)fin->fin_dp - (char *)ipin; - else - dpoff = 0; + name[LIFNAMSIZ - 1] = '\0'; - if (M_BLEN(m) < len) { -# if SOLARIS - qif_t *qf = fin->fin_qif; - int inc = 0; - - if (ipoff > 0) { - if ((ipoff & 3) != 0) { - inc = 4 - (ipoff & 3); - if (m->b_rptr - inc >= m->b_datap->db_base) - m->b_rptr -= inc; - else - inc = 0; - } - } - if (!pullupmsg(m, len + ipoff + inc)) { - ATOMIC_INCL(frstats[out].fr_pull[1]); - return NULL; - } - m->b_rptr += inc; - ATOMIC_INCL(frstats[out].fr_pull[0]); - qf->qf_data = MTOD(m, char *) + ipoff; -# else -# if (__FreeBSD_version >= 490000) - if ((len > MHLEN) && ((m->m_flags & M_PKTHDR) != 0)) - m = m_defrag(m, M_DONTWAIT); - else -# endif - m = m_pullup(m, len); - *fin->fin_mp = m; - if (m == NULL) { - ATOMIC_INCL(frstats[out].fr_pull[1]); - return NULL; - } - ATOMIC_INCL(frstats[out].fr_pull[0]); -# endif /* SOLARIS */ - } - ip = MTOD(m, char *) + ipoff; - if (fin->fin_dp != NULL) - fin->fin_dp = (char *)ip + dpoff; - return ip; + nic = GETIFP(name, v); + if (nic == NULL) + nic = (void *)-1; + return nic; } -#endif /* _KERNEL */ diff --git a/sys/contrib/ipfilter/netinet/ip_auth.c b/sys/contrib/ipfilter/netinet/ip_auth.c index 1f2da07..bef9977 100644 --- a/sys/contrib/ipfilter/netinet/ip_auth.c +++ b/sys/contrib/ipfilter/netinet/ip_auth.c @@ -1,39 +1,49 @@ +/* $FreeBSD$ */ + /* - * Copyright (C) 1998-2001 by Darren Reed & Guido van Rooij. + * Copyright (C) 1998-2003 by Darren Reed & Guido van Rooij. * * See the IPFILTER.LICENCE file for details on licencing. */ -#if defined(__sgi) && (IRIX > 602) -# include +#if defined(KERNEL) || defined(_KERNEL) +# undef KERNEL +# undef _KERNEL +# define KERNEL 1 +# define _KERNEL 1 #endif #include #include #include #include #include -#if !defined(_KERNEL) && !defined(KERNEL) +#if !defined(_KERNEL) # include # include # include +# define _KERNEL +# ifdef __OpenBSD__ +struct file; +# endif +# include +# undef _KERNEL #endif -#if (defined(KERNEL) || defined(_KERNEL)) && (__FreeBSD_version >= 220000) +#if defined(_KERNEL) && (__FreeBSD_version >= 220000) # include # include #else # include #endif -#ifndef linux +#if !defined(linux) # include #endif #include -#if (defined(_KERNEL) || defined(KERNEL)) && !defined(linux) +#if defined(_KERNEL) # include -#endif -#if !defined(__SVR4) && !defined(__svr4__) -# ifndef linux +# if !defined(__SVR4) && !defined(__svr4__) && !defined(linux) # include # endif -#else +#endif +#if defined(__SVR4) || defined(__svr4__) # include # include # ifdef _KERNEL @@ -48,6 +58,9 @@ #if defined(__NetBSD__) || defined(__OpenBSD__) || defined(bsdi) # include #endif +#if defined(_KERNEL) && defined(__NetBSD__) && (__NetBSD_Version__ >= 104000000) +# include +#endif #include #ifdef sun # include @@ -56,28 +69,29 @@ #include #include #include -#ifndef KERNEL +#if !defined(_KERNEL) && !defined(__osf__) && !defined(__sgi) # define KERNEL +# define _KERNEL # define NOT_KERNEL #endif -#ifndef linux +#if !defined(linux) # include #endif #ifdef NOT_KERNEL +# undef _KERNEL # undef KERNEL #endif -#ifdef __sgi -# ifdef IFF_DRVRLOCK /* IRIX6 */ -# include -# endif -#endif #include -#if defined(__sgi) && !defined(IFF_DRVRLOCK) /* IRIX < 6 */ +#if defined(IRIX) && (IRIX < 60516) /* IRIX < 6 */ extern struct ifqueue ipintrq; /* ip packet input queue */ #else -# ifndef linux +# if !defined(__hpux) && !defined(linux) # if __FreeBSD_version >= 300000 # include +# if __FreeBSD_version >= 500042 +# define IF_QFULL _IF_QFULL +# define IF_DROP _IF_DROP +# endif /* __FreeBSD_version >= 500042 */ # endif # include # include @@ -89,7 +103,7 @@ extern struct ifqueue ipintrq; /* ip packet input queue */ #include #include "netinet/ip_fil.h" #include "netinet/ip_auth.h" -#if !SOLARIS && !defined(linux) +#if !defined(MENTAT) && !defined(linux) # include # ifdef __FreeBSD__ # include @@ -97,59 +111,90 @@ extern struct ifqueue ipintrq; /* ip packet input queue */ #endif #if (__FreeBSD_version >= 300000) # include -# if (defined(_KERNEL) || defined(KERNEL)) && !defined(IPFILTER_LKM) +# if defined(_KERNEL) && !defined(IPFILTER_LKM) # include # include # endif #endif +/* END OF INCLUDES */ #if !defined(lint) -/* static const char rcsid[] = "@(#)$Id: ip_auth.c,v 2.11.2.12 2001/07/18 14:57:08 darrenr Exp $"; */ static const char rcsid[] = "@(#)$FreeBSD$"; +static const char rcsid[] = "@(#)Id: ip_auth.c,v 2.73.2.3 2004/08/26 11:25:21 darrenr Exp"; #endif -#ifdef USE_MUTEX -extern KRWLOCK_T ipf_auth, ipf_mutex; -extern kmutex_t ipf_authmx; -# if SOLARIS +#if SOLARIS extern kcondvar_t ipfauthwait; -# endif -#endif -#ifdef linux -static struct wait_queue *ipfauthwait = NULL; +#endif /* SOLARIS */ +#if defined(linux) && defined(_KERNEL) +wait_queue_head_t fr_authnext_linux; #endif int fr_authsize = FR_NUMAUTH; int fr_authused = 0; int fr_defaultauthage = 600; int fr_auth_lock = 0; +int fr_auth_init = 0; fr_authstat_t fr_authstats; -static frauth_t fr_auth[FR_NUMAUTH]; -mb_t *fr_authpkts[FR_NUMAUTH]; -static int fr_authstart = 0, fr_authend = 0, fr_authnext = 0; -static frauthent_t *fae_list = NULL; +static frauth_t *fr_auth = NULL; +mb_t **fr_authpkts = NULL; +int fr_authstart = 0, fr_authend = 0, fr_authnext = 0; +frauthent_t *fae_list = NULL; frentry_t *ipauth = NULL, *fr_authlist = NULL; +int fr_authinit() +{ + KMALLOCS(fr_auth, frauth_t *, fr_authsize * sizeof(*fr_auth)); + if (fr_auth != NULL) + bzero((char *)fr_auth, fr_authsize * sizeof(*fr_auth)); + else + return -1; + + KMALLOCS(fr_authpkts, mb_t **, fr_authsize * sizeof(*fr_authpkts)); + if (fr_authpkts != NULL) + bzero((char *)fr_authpkts, fr_authsize * sizeof(*fr_authpkts)); + else + return -2; + + MUTEX_INIT(&ipf_authmx, "ipf auth log mutex"); + RWLOCK_INIT(&ipf_auth, "ipf IP User-Auth rwlock"); +#if SOLARIS && defined(_KERNEL) + cv_init(&ipfauthwait, "ipf auth condvar", CV_DRIVER, NULL); +#endif +#if defined(linux) && defined(_KERNEL) + init_waitqueue_head(&fr_authnext_linux); +#endif + + fr_auth_init = 1; + + return 0; +} + + /* * Check if a packet has authorization. If the packet is found to match an * authorization result and that would result in a feedback loop (i.e. it * will end up returning FR_AUTH) then return FR_BLOCK instead. */ -u_32_t fr_checkauth(ip, fin) -ip_t *ip; +frentry_t *fr_checkauth(fin, passp) fr_info_t *fin; +u_32_t *passp; { - u_short id = ip->ip_id; frentry_t *fr; frauth_t *fra; u_32_t pass; + u_short id; + ip_t *ip; int i; if (fr_auth_lock || !fr_authused) - return 0; + return NULL; + + ip = fin->fin_ip; + id = ip->ip_id; READ_ENTER(&ipf_auth); for (i = fr_authstart; i != fr_authend; ) { @@ -164,7 +209,7 @@ fr_info_t *fin; /* * Avoid feedback loop. */ - if (!(pass = fra->fra_pass) || (pass & FR_AUTH)) + if (!(pass = fra->fra_pass) || (FR_ISAUTH(pass))) pass = FR_BLOCK; /* * Create a dummy rule for the stateful checking to @@ -172,26 +217,26 @@ fr_info_t *fin; * trust from userland! */ if ((pass & FR_KEEPSTATE) || ((pass & FR_KEEPFRAG) && - (fin->fin_fi.fi_fl & FI_FRAG))) { + (fin->fin_flx & FI_FRAG))) { KMALLOC(fr, frentry_t *); if (fr) { bcopy((char *)fra->fra_info.fin_fr, - fr, sizeof(*fr)); + (char *)fr, sizeof(*fr)); fr->fr_grp = NULL; fr->fr_ifa = fin->fin_ifp; fr->fr_func = NULL; fr->fr_ref = 1; fr->fr_flags = pass; -#if BSD >= 199306 - fr->fr_oifa = NULL; -#endif + fr->fr_ifas[1] = NULL; + fr->fr_ifas[2] = NULL; + fr->fr_ifas[3] = NULL; } } else fr = fra->fra_info.fin_fr; fin->fin_fr = fr; RWLOCK_EXIT(&ipf_auth); WRITE_ENTER(&ipf_auth); - if (fr && fr != fra->fra_info.fin_fr) { + if ((fr != NULL) && (fr != fra->fra_info.fin_fr)) { fr->fr_next = fr_authlist; fr_authlist = fr; } @@ -202,7 +247,7 @@ fr_info_t *fin; while (fra->fra_index == -1) { i++; fra++; - if (i == FR_NUMAUTH) { + if (i == fr_authsize) { i = 0; fra = fr_auth; } @@ -216,15 +261,19 @@ fr_info_t *fin; } } RWLOCK_EXIT(&ipf_auth); - return pass; + if (passp != NULL) + *passp = pass; + ATOMIC_INC64(fr_authstats.fas_hits); + return fr; } i++; - if (i == FR_NUMAUTH) + if (i == fr_authsize) i = 0; } fr_authstats.fas_miss++; RWLOCK_EXIT(&ipf_auth); - return 0; + ATOMIC_INC64(fr_authstats.fas_miss); + return NULL; } @@ -233,15 +282,17 @@ fr_info_t *fin; * If we do, store it and wake up any user programs which are waiting to * hear about these events. */ -int fr_newauth(m, fin, ip) +int fr_newauth(m, fin) mb_t *m; fr_info_t *fin; -ip_t *ip; { -#if defined(_KERNEL) && SOLARIS - qif_t *qif = fin->fin_qif; +#if defined(_KERNEL) && defined(MENTAT) + qpktinfo_t *qpi = fin->fin_qpi; #endif frauth_t *fra; +#if !defined(sparc) && !defined(m68k) + ip_t *ip; +#endif int i; if (fr_auth_lock) @@ -253,7 +304,7 @@ ip_t *ip; RWLOCK_EXIT(&ipf_auth); return 0; } else { - if (fr_authused == FR_NUMAUTH) { + if (fr_authused == fr_authsize) { fr_authstats.fas_nospace++; RWLOCK_EXIT(&ipf_auth); return 0; @@ -263,21 +314,24 @@ ip_t *ip; fr_authstats.fas_added++; fr_authused++; i = fr_authend++; - if (fr_authend == FR_NUMAUTH) + if (fr_authend == fr_authsize) fr_authend = 0; RWLOCK_EXIT(&ipf_auth); + fra = fr_auth + i; fra->fra_index = i; fra->fra_pass = 0; fra->fra_age = fr_defaultauthage; bcopy((char *)fin, (char *)&fra->fra_info, sizeof(*fin)); -#if SOLARIS && defined(_KERNEL) -# if !defined(sparc) +#if !defined(sparc) && !defined(m68k) /* * No need to copyback here as we want to undo the changes, not keep * them. */ - if ((ip == (ip_t *)m->b_rptr) && (ip->ip_v == 4)) + ip = fin->fin_ip; +# if defined(MENTAT) && defined(_KERNEL) + if ((ip == (ip_t *)m->b_rptr) && (fin->fin_v == 4)) +# endif { register u_short bo; @@ -286,43 +340,42 @@ ip_t *ip; bo = ip->ip_off; ip->ip_off = htons(bo); } -# endif - m->b_rptr -= qif->qf_off; +#endif +#if SOLARIS && defined(_KERNEL) + m->b_rptr -= qpi->qpi_off; fr_authpkts[i] = *(mblk_t **)fin->fin_mp; - fra->fra_q = qif->qf_q; + fra->fra_q = qpi->qpi_q; /* The queue can disappear! */ cv_signal(&ipfauthwait); #else # if defined(BSD) && !defined(sparc) && (BSD >= 199306) - if (fin->fin_out == 0) { + if (!fin->fin_out) { ip->ip_len = htons(ip->ip_len); ip->ip_off = htons(ip->ip_off); } # endif fr_authpkts[i] = m; - WAKEUP(&fr_authnext); + WAKEUP(&fr_authnext,0); #endif return 1; } -int fr_auth_ioctl(data, mode, cmd) +int fr_auth_ioctl(data, cmd, mode) caddr_t data; +ioctlcmd_t cmd; int mode; -#if defined(__NetBSD__) || defined(__OpenBSD__) || (__FreeBSD_version >= 300003) -u_long cmd; -#else -int cmd; -#endif { mb_t *m; -#if defined(_KERNEL) && !SOLARIS -# if !defined(__FreeBSD_version) || (__FreeBSD_version < 501104) +#if defined(_KERNEL) && !defined(MENTAT) && !defined(linux) && \ + (!defined(__FreeBSD_version) || (__FreeBSD_version < 501000)) struct ifqueue *ifq; -# endif +# ifdef USE_SPL int s; +# endif /* USE_SPL */ #endif frauth_t auth, *au = &auth, *fra; - int i, error = 0; + int i, error = 0, len; + char *t; switch (cmd) { @@ -331,81 +384,119 @@ int cmd; error = EPERM; break; } - error = fr_lock(data, &fr_auth_lock); - break; - case SIOCINIFR : - case SIOCRMIFR : - case SIOCADIFR : - error = EINVAL; - break; - case SIOCINAFR : - error = EINVAL; - break; - case SIOCRMAFR : - case SIOCADAFR : - /* These commands go via request to fr_preauthcmd */ - error = EINVAL; + fr_lock(data, &fr_auth_lock); break; + case SIOCATHST: fr_authstats.fas_faelist = fae_list; - error = IWCOPYPTR((char *)&fr_authstats, data, - sizeof(fr_authstats)); + error = fr_outobj(data, &fr_authstats, IPFOBJ_AUTHSTAT); break; + + case SIOCIPFFL: + SPL_NET(s); + WRITE_ENTER(&ipf_auth); + i = fr_authflush(); + RWLOCK_EXIT(&ipf_auth); + SPL_X(s); + error = copyoutptr((char *)&i, data, sizeof(i)); + break; + case SIOCAUTHW: - if (!(mode & FWRITE)) { - error = EPERM; - break; - } fr_authioctlloop: + error = fr_inobj(data, au, IPFOBJ_FRAUTH); READ_ENTER(&ipf_auth); if ((fr_authnext != fr_authend) && fr_authpkts[fr_authnext]) { - error = IWCOPYPTR((char *)&fr_auth[fr_authnext], data, - sizeof(frauth_t)); + error = fr_outobj(data, &fr_auth[fr_authnext], + IPFOBJ_FRAUTH); + if (auth.fra_len != 0 && auth.fra_buf != NULL) { + /* + * Copy packet contents out to user space if + * requested. Bail on an error. + */ + m = fr_authpkts[fr_authnext]; + len = MSGDSIZE(m); + if (len > auth.fra_len) + len = auth.fra_len; + auth.fra_len = len; + for (t = auth.fra_buf; m && (len > 0); ) { + i = MIN(M_LEN(m), len); + error = copyoutptr(MTOD(m, char *), + t, i); + len -= i; + t += i; + if (error != 0) + break; + } + } RWLOCK_EXIT(&ipf_auth); - if (error) + if (error != 0) break; - WRITE_ENTER(&ipf_auth); SPL_NET(s); + WRITE_ENTER(&ipf_auth); fr_authnext++; - if (fr_authnext == FR_NUMAUTH) + if (fr_authnext == fr_authsize) fr_authnext = 0; - SPL_X(s); RWLOCK_EXIT(&ipf_auth); + SPL_X(s); return 0; } RWLOCK_EXIT(&ipf_auth); + /* + * We exit ipf_global here because a program that enters in + * here will have a lock on it and goto sleep having this lock. + * If someone were to do an 'ipf -D' the system would then + * deadlock. The catch with releasing it here is that the + * caller of this function expects it to be held when we + * return so we have to reacquire it in here. + */ + RWLOCK_EXIT(&ipf_global); + + MUTEX_ENTER(&ipf_authmx); #ifdef _KERNEL # if SOLARIS - mutex_enter(&ipf_authmx); - if (!cv_wait_sig(&ipfauthwait, &ipf_authmx)) { - mutex_exit(&ipf_authmx); - return EINTR; + error = 0; + if (!cv_wait_sig(&ipfauthwait, &ipf_authmx.ipf_lk)) + error = EINTR; +# else /* SOLARIS */ +# ifdef __hpux + { + lock_t *l; + + l = get_sleep_lock(&fr_authnext); + error = sleep(&fr_authnext, PZERO+1); + spinunlock(l); } - mutex_exit(&ipf_authmx); -# else +# else +# ifdef __osf__ + error = mpsleep(&fr_authnext, PSUSP|PCATCH, "fr_authnext", 0, + &ipf_authmx, MS_LOCK_SIMPLE); +# else error = SLEEP(&fr_authnext, "fr_authnext"); -# endif +# endif /* __osf__ */ +# endif /* __hpux */ +# endif /* SOLARIS */ #endif - if (!error) + MUTEX_EXIT(&ipf_authmx); + READ_ENTER(&ipf_global); + if (error == 0) { + READ_ENTER(&ipf_auth); goto fr_authioctlloop; + } break; + case SIOCAUTHR: - if (!(mode & FWRITE)) { - error = EPERM; - break; - } - error = IRCOPYPTR(data, (caddr_t)&auth, sizeof(auth)); - if (error) + error = fr_inobj(data, &auth, IPFOBJ_FRAUTH); + if (error != 0) return error; - WRITE_ENTER(&ipf_auth); SPL_NET(s); + WRITE_ENTER(&ipf_auth); i = au->fra_index; fra = fr_auth + i; - if ((i < 0) || (i > FR_NUMAUTH) || + if ((i < 0) || (i >= fr_authsize) || (fra->fra_info.fin_id != au->fra_info.fin_id)) { - SPL_X(s); RWLOCK_EXIT(&ipf_auth); - return EINVAL; + SPL_X(s); + return ESRCH; } m = fr_authpkts[i]; fra->fra_index = -2; @@ -413,50 +504,67 @@ fr_authioctlloop: fr_authpkts[i] = NULL; RWLOCK_EXIT(&ipf_auth); #ifdef _KERNEL - if (m && au->fra_info.fin_out) { -# if SOLARIS - error = (fr_qout(fra->fra_q, m) == 0) ? EINVAL : 0; -# else /* SOLARIS */ - struct route ro; - - bzero((char *)&ro, sizeof(ro)); -# if ((_BSDI_VERSION >= 199802) && (_BSDI_VERSION < 200005)) || \ - defined(__OpenBSD__) || (defined(IRIX) && (IRIX >= 605)) || \ - (__FreeBSD_version >= 470102) - error = ip_output(m, NULL, &ro, IP_FORWARDING, NULL, - NULL); + if ((m != NULL) && (au->fra_info.fin_out != 0)) { +# ifdef MENTAT + error = !putq(fra->fra_q, m); +# else /* MENTAT */ +# ifdef linux # else - error = ip_output(m, NULL, &ro, IP_FORWARDING, NULL); -# endif - if (ro.ro_rt) { - RTFREE(ro.ro_rt); - } -# endif /* SOLARIS */ - if (error) +# if (_BSDI_VERSION >= 199802) || defined(__OpenBSD__) || \ + (defined(__sgi) && (IRIX >= 60500) || \ + (defined(__FreeBSD__) && (__FreeBSD_version >= 470102))) + error = ip_output(m, NULL, NULL, IP_FORWARDING, NULL, + NULL); +# else + error = ip_output(m, NULL, NULL, IP_FORWARDING, NULL); +# endif +# endif /* Linux */ +# endif /* MENTAT */ + if (error != 0) fr_authstats.fas_sendfail++; else fr_authstats.fas_sendok++; } else if (m) { -# if SOLARIS - error = (fr_qin(fra->fra_q, m) == 0) ? EINVAL : 0; -# else /* SOLARIS */ - if (! netisr_queue(NETISR_IP, m)) +# ifdef MENTAT + error = !putq(fra->fra_q, m); +# else /* MENTAT */ +# ifdef linux +# else +# if __FreeBSD_version >= 501000 + netisr_dispatch(NETISR_IP, m); +# else +# if IRIX >= 60516 + ifq = &((struct ifnet *)fra->fra_info.fin_ifp)->if_snd; +# else + ifq = &ipintrq; +# endif + if (IF_QFULL(ifq)) { + IF_DROP(ifq); + FREE_MB_T(m); error = ENOBUFS; -# endif /* SOLARIS */ - if (error) + } else { + IF_ENQUEUE(ifq, m); +# if IRIX < 60500 + schednetisr(NETISR_IP); +# endif + } +# endif +# endif /* Linux */ +# endif /* MENTAT */ + if (error != 0) fr_authstats.fas_quefail++; else fr_authstats.fas_queok++; } else error = EINVAL; -# if SOLARIS - if (error) +# ifdef MENTAT + if (error != 0) error = EINVAL; -# else +# else /* MENTAT */ /* * If we experience an error which will result in the packet * not being processed, make sure we advance to the next one. - */ + */ if (error == ENOBUFS) { fr_authused--; fra->fra_index = -1; @@ -464,7 +572,7 @@ fr_authioctlloop: if (i == fr_authstart) { while (fra->fra_index == -1) { i++; - if (i == FR_NUMAUTH) + if (i == fr_authsize) i = 0; fr_authstart = i; if (i == fr_authend) @@ -476,10 +584,11 @@ fr_authioctlloop: } } } -# endif +# endif /* MENTAT */ #endif /* _KERNEL */ SPL_X(s); break; + default : error = EINVAL; break; @@ -498,41 +607,48 @@ void fr_authunload() frentry_t *fr, **frp; mb_t *m; - WRITE_ENTER(&ipf_auth); - for (i = 0; i < FR_NUMAUTH; i++) { - if ((m = fr_authpkts[i])) { - FREE_MB_T(m); - fr_authpkts[i] = NULL; - fr_auth[i].fra_index = -1; - } + if (fr_auth != NULL) { + KFREES(fr_auth, fr_authsize * sizeof(*fr_auth)); + fr_auth = NULL; } + if (fr_authpkts != NULL) { + for (i = 0; i < fr_authsize; i++) { + m = fr_authpkts[i]; + if (m != NULL) { + FREE_MB_T(m); + fr_authpkts[i] = NULL; + } + } + KFREES(fr_authpkts, fr_authsize * sizeof(*fr_authpkts)); + fr_authpkts = NULL; + } - for (faep = &fae_list; (fae = *faep); ) { + faep = &fae_list; + while ((fae = *faep) != NULL) { *faep = fae->fae_next; KFREE(fae); } ipauth = NULL; - RWLOCK_EXIT(&ipf_auth); - if (fr_authlist) { - /* - * We *MuST* reget ipf_auth because otherwise we won't get the - * locks in the right order and risk deadlock. - * We need ipf_mutex here to prevent a rule from using it - * inside fr_check(). - */ - WRITE_ENTER(&ipf_mutex); - WRITE_ENTER(&ipf_auth); - for (frp = &fr_authlist; (fr = *frp); ) { + if (fr_authlist != NULL) { + for (frp = &fr_authlist; ((fr = *frp) != NULL); ) { if (fr->fr_ref == 1) { *frp = fr->fr_next; KFREE(fr); } else frp = &fr->fr_next; } - RWLOCK_EXIT(&ipf_auth); - RWLOCK_EXIT(&ipf_mutex); + } + + if (fr_auth_init == 1) { +# if SOLARIS && defined(_KERNEL) + cv_destroy(&ipfauthwait); +# endif + MUTEX_DESTROY(&ipf_authmx); + RW_DESTROY(&ipf_auth); + + fr_auth_init = 0; } } @@ -548,17 +664,18 @@ void fr_authexpire() register frauthent_t *fae, **faep; register frentry_t *fr, **frp; mb_t *m; -#if !SOLARIS && defined(_KERNEL) +# if !defined(MENAT) && defined(_KERNEL) && defined(USE_SPL) int s; -#endif +# endif if (fr_auth_lock) return; SPL_NET(s); WRITE_ENTER(&ipf_auth); - for (i = 0, fra = fr_auth; i < FR_NUMAUTH; i++, fra++) { - if ((!--fra->fra_age) && (m = fr_authpkts[i])) { + for (i = 0, fra = fr_auth; i < fr_authsize; i++, fra++) { + fra->fra_age--; + if ((fra->fra_age == 0) && (m = fr_authpkts[i])) { FREE_MB_T(m); fr_authpkts[i] = NULL; fr_auth[i].fra_index = -1; @@ -567,8 +684,9 @@ void fr_authexpire() } } - for (faep = &fae_list; (fae = *faep); ) { - if (!--fae->fae_age) { + for (faep = &fae_list; ((fae = *faep) != NULL); ) { + fae->fae_age--; + if (fae->fae_age == 0) { *faep = fae->fae_next; KFREE(fae); fr_authstats.fas_expire++; @@ -580,7 +698,7 @@ void fr_authexpire() else ipauth = NULL; - for (frp = &fr_authlist; (fr = *frp); ) { + for (frp = &fr_authlist; ((fr = *frp) != NULL); ) { if (fr->fr_ref == 1) { *frp = fr->fr_next; KFREE(fr); @@ -592,52 +710,48 @@ void fr_authexpire() } int fr_preauthcmd(cmd, fr, frptr) -#if defined(__NetBSD__) || defined(__OpenBSD__) || \ - (_BSDI_VERSION >= 199701) || (__FreeBSD_version >= 300000) -u_long cmd; -#else -int cmd; -#endif +ioctlcmd_t cmd; frentry_t *fr, **frptr; { frauthent_t *fae, **faep; int error = 0; -#if defined(KERNEL) && !SOLARIS +# if !defined(MENAT) && defined(_KERNEL) && defined(USE_SPL) int s; #endif - if ((cmd != SIOCADAFR) && (cmd != SIOCRMAFR)) { - /* Should not happen */ - printf("fr_preauthcmd called with bad cmd 0x%lx", (u_long)cmd); + if ((cmd != SIOCADAFR) && (cmd != SIOCRMAFR)) return EIO; - } - for (faep = &fae_list; (fae = *faep); ) + for (faep = &fae_list; ((fae = *faep) != NULL); ) { if (&fae->fae_fr == fr) break; else faep = &fae->fae_next; - if (cmd == SIOCRMAFR) { - if (!fr || !frptr) + } + + if (cmd == (ioctlcmd_t)SIOCRMAFR) { + if (fr == NULL || frptr == NULL) error = EINVAL; - else if (!fae) + else if (fae == NULL) error = ESRCH; else { - WRITE_ENTER(&ipf_auth); SPL_NET(s); + WRITE_ENTER(&ipf_auth); *faep = fae->fae_next; - *frptr = fr->fr_next; - SPL_X(s); + if (ipauth == &fae->fae_fr) + ipauth = fae_list ? &fae_list->fae_fr : NULL; RWLOCK_EXIT(&ipf_auth); + SPL_X(s); + KFREE(fae); } - } else if (fr && frptr) { + } else if (fr != NULL && frptr != NULL) { KMALLOC(fae, frauthent_t *); if (fae != NULL) { bcopy((char *)fr, (char *)&fae->fae_fr, sizeof(*fr)); - WRITE_ENTER(&ipf_auth); SPL_NET(s); + WRITE_ENTER(&ipf_auth); fae->fae_age = fr_defaultauthage; fae->fae_fr.fr_hits = 0; fae->fae_fr.fr_next = *frptr; @@ -645,11 +759,47 @@ frentry_t *fr, **frptr; fae->fae_next = *faep; *faep = fae; ipauth = &fae_list->fae_fr; - SPL_X(s); RWLOCK_EXIT(&ipf_auth); + SPL_X(s); } else error = ENOMEM; } else error = EINVAL; return error; } + + +/* + * Flush held packets. + * Must already be properly SPL'ed and Locked on &ipf_auth. + * + */ +int fr_authflush() +{ + register int i, num_flushed; + mb_t *m; + + if (fr_auth_lock) + return -1; + + num_flushed = 0; + + for (i = 0 ; i < fr_authsize; i++) { + m = fr_authpkts[i]; + if (m != NULL) { + FREE_MB_T(m); + fr_authpkts[i] = NULL; + fr_auth[i].fra_index = -1; + /* perhaps add & use a flush counter inst.*/ + fr_authstats.fas_expire++; + fr_authused--; + num_flushed++; + } + } + + fr_authstart = 0; + fr_authend = 0; + fr_authnext = 0; + + return num_flushed; +} diff --git a/sys/contrib/ipfilter/netinet/ip_auth.h b/sys/contrib/ipfilter/netinet/ip_auth.h index 64fc2d7..5c93610 100644 --- a/sys/contrib/ipfilter/netinet/ip_auth.h +++ b/sys/contrib/ipfilter/netinet/ip_auth.h @@ -1,9 +1,12 @@ +/* $FreeBSD$ */ + /* * Copyright (C) 1997-2001 by Darren Reed & Guido Van Rooij. * * See the IPFILTER.LICENCE file for details on licencing. * * $FreeBSD$ + * Id: ip_auth.h,v 2.16 2003/07/25 12:29:56 darrenr Exp * */ #ifndef __IP_AUTH_H__ @@ -13,10 +16,12 @@ typedef struct frauth { int fra_age; + int fra_len; int fra_index; u_32_t fra_pass; fr_info_t fra_info; -#if SOLARIS + char *fra_buf; +#ifdef MENTAT queue_t *fra_q; #endif } frauth_t; @@ -44,20 +49,19 @@ typedef struct fr_authstat { extern frentry_t *ipauth; extern struct fr_authstat fr_authstats; extern int fr_defaultauthage; +extern int fr_authstart; +extern int fr_authend; extern int fr_authsize; extern int fr_authused; extern int fr_auth_lock; -extern u_32_t fr_checkauth __P((ip_t *, fr_info_t *)); +extern frentry_t *fr_checkauth __P((fr_info_t *, u_32_t *)); extern void fr_authexpire __P((void)); +extern int fr_authinit __P((void)); extern void fr_authunload __P((void)); -extern mb_t *fr_authpkts[]; -extern int fr_newauth __P((mb_t *, fr_info_t *, ip_t *)); -#if defined(__NetBSD__) || defined(__OpenBSD__) || \ - (__FreeBSD_version >= 300003) -extern int fr_preauthcmd __P((u_long, frentry_t *, frentry_t **)); -extern int fr_auth_ioctl __P((caddr_t, int, u_long)); -#else -extern int fr_preauthcmd __P((int, frentry_t *, frentry_t **)); -extern int fr_auth_ioctl __P((caddr_t, int, int)); -#endif +extern int fr_authflush __P((void)); +extern mb_t **fr_authpkts; +extern int fr_newauth __P((mb_t *, fr_info_t *)); +extern int fr_preauthcmd __P((ioctlcmd_t, frentry_t *, frentry_t **)); +extern int fr_auth_ioctl __P((caddr_t, ioctlcmd_t, int)); + #endif /* __IP_AUTH_H__ */ diff --git a/sys/contrib/ipfilter/netinet/ip_compat.h b/sys/contrib/ipfilter/netinet/ip_compat.h index d762684..7ae4b8f 100644 --- a/sys/contrib/ipfilter/netinet/ip_compat.h +++ b/sys/contrib/ipfilter/netinet/ip_compat.h @@ -1,11 +1,13 @@ +/* $FreeBSD$ */ + /* - * Copyright (C) 1993-2001 by Darren Reed. + * Copyright (C) 1993-2001, 2003 by Darren Reed. * * See the IPFILTER.LICENCE file for details on licencing. * * @(#)ip_compat.h 1.8 1/14/96 - * $Id: ip_compat.h,v 2.26.2.9 2001/01/14 14:58:01 darrenr Exp $ * $FreeBSD$ + * Id: ip_compat.h,v 2.142.2.25 2005/03/28 09:33:36 darrenr Exp */ #ifndef __IP_COMPAT_H__ @@ -23,25 +25,6 @@ # define const #endif -#ifndef SOLARIS -#define SOLARIS (defined(sun) && (defined(__svr4__) || defined(__SVR4))) -#endif -#if SOLARIS -# if !defined(SOLARIS2) -# define SOLARIS2 3 /* Pick an old version */ -# endif -# if SOLARIS2 >= 8 -# ifndef USE_INET6 -# define USE_INET6 -# endif -# else -# undef USE_INET6 -# endif -#endif -#if defined(sun) && !(defined(__svr4__) || defined(__SVR4)) -# undef USE_INET6 -#endif - #if defined(_KERNEL) || defined(KERNEL) || defined(__KERNEL__) # undef KERNEL # undef _KERNEL @@ -51,27 +34,53 @@ # define __KERNEL__ #endif -#if defined(__SVR4) || defined(__svr4__) || defined(__sgi) -#define index strchr -# if !defined(KERNEL) -# define bzero(a,b) memset(a,0,b) -# define bcmp memcmp -# define bcopy(a,b,c) memmove(b,a,c) +#ifndef SOLARIS +#define SOLARIS (defined(sun) && (defined(__svr4__) || defined(__SVR4))) +#endif +#if SOLARIS2 >= 8 +# ifndef USE_INET6 +# define USE_INET6 # endif #endif +#if defined(__FreeBSD_version) && (__FreeBSD_version >= 400000) && \ + !defined(_KERNEL) && !defined(USE_INET6) && !defined(NOINET6) +# define USE_INET6 +#endif +#if defined(__NetBSD_Version__) && (__NetBSD_Version__ >= 105000000) && \ + !defined(_KERNEL) && !defined(USE_INET6) +# define USE_INET6 +# define IPFILTER_M_IPFILTER +#endif +#if defined(OpenBSD) && (OpenBSD >= 200206) && \ + !defined(_KERNEL) && !defined(USE_INET6) +# define USE_INET6 +#endif +#if defined(__osf__) +# define USE_INET6 +#endif +#if defined(linux) && (!defined(_KERNEL) || defined(CONFIG_IPV6)) +# define USE_INET6 +#endif +#if defined(HPUXREV) && (HPUXREV >= 1111) +# define USE_INET6 +#endif -#ifndef offsetof -#define offsetof(t,m) (int)((&((t *)0L)->m)) +#if defined(BSD) && (BSD < 199103) && defined(__osf__) +# undef BSD +# define BSD 199103 #endif -#if defined(__sgi) || defined(bsdi) -struct ether_addr { - u_char ether_addr_octet[6]; -}; +#if defined(__SVR4) || defined(__svr4__) || defined(__sgi) +# define index strchr +# if !defined(_KERNEL) +# define bzero(a,b) memset(a,0,b) +# define bcmp memcmp +# define bcopy(a,b,c) memmove(b,a,c) +# endif #endif -#ifndef LIFNAMSIZ -# ifdef IF_NAMESIZE +#ifndef LIFNAMSIZ +# ifdef IF_NAMESIZE # define LIFNAMSIZ IF_NAMESIZE # else # ifdef IFNAMSIZ @@ -82,6 +91,12 @@ struct ether_addr { # endif #endif +#if defined(__sgi) || defined(bsdi) || defined(__hpux) || defined(hpux) +struct ether_addr { + u_char ether_addr_octet[6]; +}; +#endif + #if defined(__sgi) && !defined(IPFILTER_LKM) # ifdef __STDC__ # define IPL_EXTERN(ep) ipfilter##ep @@ -96,354 +111,108 @@ struct ether_addr { # endif #endif -#ifdef __sgi -# include -#endif - -#ifdef linux -# include -#endif - /* * This is a workaround for troubles on FreeBSD and OpenBSD. */ -#ifndef _KERNEL -# define ADD_KERNEL -# define _KERNEL -# define KERNEL -#endif -#ifdef __OpenBSD__ +#ifndef linux +# ifndef _KERNEL +# define ADD_KERNEL +# define _KERNEL +# define KERNEL +# endif +# ifdef __OpenBSD__ struct file; -#endif -#include -#ifdef ADD_KERNEL -# undef _KERNEL -# undef KERNEL +# endif +# include +# ifdef ADD_KERNEL +# undef _KERNEL +# undef KERNEL +# endif #endif -#if SOLARIS -# define MTYPE(m) ((m)->b_datap->db_type) -# if SOLARIS2 >= 4 -# include -# endif + +/* ----------------------------------------------------------------------- */ +/* S O L A R I S */ +/* ----------------------------------------------------------------------- */ +#if SOLARIS +# define MENTAT 1 +# include +# include +# include # include # include # include +# if SOLARIS2 >= 10 +# include +# include +# include +# include +# endif /* * because Solaris 2 defines these in two places :-/ */ +# ifndef KERNEL +# define _KERNEL +# undef RES_INIT +# endif /* _KERNEL */ + +# if SOLARIS2 >= 8 +# include +# include +# endif + +# include +/* These 5 are defined in and */ # undef IPOPT_EOL # undef IPOPT_NOP # undef IPOPT_LSRR # undef IPOPT_RR # undef IPOPT_SSRR +# ifdef i386 +# define _SYS_PROMIF_H +# endif +# include +# undef COPYOUT +# include # ifndef KERNEL -# define _KERNEL -# undef RES_INIT -# if SOLARIS2 >= 8 -# include -# endif -# include -# include -# include # undef _KERNEL -# else /* _KERNEL */ -# if SOLARIS2 >= 8 -# include -# endif -# include -# include -# include -# endif /* _KERNEL */ +# endif # if SOLARIS2 >= 8 +# define SNPRINTF snprintf + # include -# include # define ipif_local_addr ipif_lcl_addr /* Only defined in private include file */ # ifndef V4_PART_OF_V6 # define V4_PART_OF_V6(v6) v6.s6_addr32[3] # endif -# endif -# define M_BLEN(m) ((m)->b_wptr - (m)->b_rptr) - -typedef struct qif { - struct qif *qf_next; - ill_t *qf_ill; - kmutex_t qf_lock; - void *qf_iptr; - void *qf_optr; - queue_t *qf_in; - queue_t *qf_out; - void *qf_data; /* layer 3 header pointer */ - struct qinit *qf_wqinfo; - struct qinit *qf_rqinfo; - struct qinit qf_wqinit; - struct qinit qf_rqinit; - mblk_t *qf_m; /* These three fields are for passing data up from */ - queue_t *qf_q; /* fr_qin and fr_qout to the packet processing. */ - size_t qf_off; - size_t qf_len; /* this field is used for in ipfr_fastroute */ - char qf_name[LIFNAMSIZ]; - /* - * in case the ILL has disappeared... - */ - size_t qf_hl; /* header length */ - int qf_sap; -# if SOLARIS2 >= 8 - int qf_tunoff; /* tunnel offset */ -#endif - size_t qf_incnt; - size_t qf_outcnt; -} qif_t; -#else /* SOLARIS */ -# if !defined(__sgi) -typedef int minor_t; -# endif -#endif /* SOLARIS */ -#define IPMINLEN(i, h) ((i)->ip_len >= ((i)->ip_hl * 4 + sizeof(struct h))) - -#ifndef IP_OFFMASK -#define IP_OFFMASK 0x1fff -#endif - -#if BSD > 199306 -# define USE_QUAD_T -# define U_QUAD_T u_quad_t -# define QUAD_T quad_t -#else /* BSD > 199306 */ -# define U_QUAD_T u_long -# define QUAD_T long -#endif /* BSD > 199306 */ - - -#if defined(__FreeBSD__) && (defined(KERNEL) || defined(_KERNEL)) -# include -# ifndef __FreeBSD_version -# ifdef IPFILTER_LKM -# include -# else -# include -# endif -# endif -# ifdef IPFILTER_LKM -# define ACTUALLY_LKM_NOT_KERNEL -# endif -# if defined(__FreeBSD_version) && (__FreeBSD_version < 300000) -# include -# else -# if (__FreeBSD_version >= 300000) && (__FreeBSD_version < 400000) -# if defined(IPFILTER_LKM) && !defined(ACTUALLY_LKM_NOT_KERNEL) -# define ACTUALLY_LKM_NOT_KERNEL -# endif -# endif -# endif -#endif /* __FreeBSD__ && KERNEL */ - -#if defined(__FreeBSD_version) && (__FreeBSD_version >= 500000) && \ - defined(_KERNEL) -# include -#endif +struct ip6_ext { + u_char ip6e_nxt; + u_char ip6e_len; +}; +# endif /* SOLARIS2 >= 8 */ -/* - * These operating systems already take care of the problem for us. - */ -#if defined(__NetBSD__) || defined(__OpenBSD__) || defined(__FreeBSD__) || \ - defined(__sgi) -typedef u_int32_t u_32_t; -# if defined(_KERNEL) && !defined(IPFILTER_LKM) -# if defined(__NetBSD_Version__) && (__NetBSD_Version__ >= 104110000) -# include "opt_inet.h" -# endif -# if defined(__FreeBSD_version) && (__FreeBSD_version >= 400000) && \ - !defined(KLD_MODULE) -# include "opt_inet6.h" -# endif -# ifdef INET6 -# define USE_INET6 -# endif -# endif -# if !defined(_KERNEL) && !defined(IPFILTER_LKM) && !defined(USE_INET6) -# if (defined(__FreeBSD_version) && (__FreeBSD_version >= 400000) && \ - !defined(NOINET6)) || \ - (defined(OpenBSD) && (OpenBSD >= 200111)) || \ - (defined(__NetBSD_Version__) && (__NetBSD_Version__ >= 105000000)) -# define USE_INET6 -# endif -# endif -#else -/* - * Really, any arch where sizeof(long) != sizeof(int). - */ -# if defined(__alpha__) || defined(__alpha) || defined(_LP64) -typedef unsigned int u_32_t; -# else -# if SOLARIS2 >= 6 +# if SOLARIS2 >= 6 +# include typedef uint32_t u_32_t; -# else +# else typedef unsigned int u_32_t; -# endif -# endif -#endif /* __NetBSD__ || __OpenBSD__ || __FreeBSD__ || __sgi */ - -#ifdef USE_INET6 -# if defined(__NetBSD__) || defined(__OpenBSD__) || defined(__FreeBSD__) -# include -# ifdef _KERNEL -# include -# endif -typedef struct ip6_hdr ip6_t; -# endif -# include -union i6addr { - u_32_t i6[4]; - struct in_addr in4; - struct in6_addr in6; -}; -#else -union i6addr { - u_32_t i6[4]; - struct in_addr in4; -}; -#endif - -#define IP6CMP(a,b) bcmp((char *)&(a), (char *)&(b), sizeof(a)) -#define IP6EQ(a,b) (bcmp((char *)&(a), (char *)&(b), sizeof(a)) == 0) -#define IP6NEQ(a,b) (bcmp((char *)&(a), (char *)&(b), sizeof(a)) != 0) -#define IP6_ISZERO(a) ((((union i6addr *)(a))->i6[0] | \ - ((union i6addr *)(a))->i6[1] | \ - ((union i6addr *)(a))->i6[2] | \ - ((union i6addr *)(a))->i6[3]) == 0) -#define IP6_NOTZERO(a) ((((union i6addr *)(a))->i6[0] | \ - ((union i6addr *)(a))->i6[1] | \ - ((union i6addr *)(a))->i6[2] | \ - ((union i6addr *)(a))->i6[3]) != 0) - -#ifndef MAX -#define MAX(a,b) (((a) > (b)) ? (a) : (b)) -#endif - -/* - * Security Options for Intenet Protocol (IPSO) as defined in RFC 1108. - * - * Basic Option - * - * 00000001 - (Reserved 4) - * 00111101 - Top Secret - * 01011010 - Secret - * 10010110 - Confidential - * 01100110 - (Reserved 3) - * 11001100 - (Reserved 2) - * 10101011 - Unclassified - * 11110001 - (Reserved 1) - */ -#define IPSO_CLASS_RES4 0x01 -#define IPSO_CLASS_TOPS 0x3d -#define IPSO_CLASS_SECR 0x5a -#define IPSO_CLASS_CONF 0x96 -#define IPSO_CLASS_RES3 0x66 -#define IPSO_CLASS_RES2 0xcc -#define IPSO_CLASS_UNCL 0xab -#define IPSO_CLASS_RES1 0xf1 - -#define IPSO_AUTH_GENSER 0x80 -#define IPSO_AUTH_ESI 0x40 -#define IPSO_AUTH_SCI 0x20 -#define IPSO_AUTH_NSA 0x10 -#define IPSO_AUTH_DOE 0x08 -#define IPSO_AUTH_UN 0x06 -#define IPSO_AUTH_FTE 0x01 - -/* - * IP option #defines - */ -/*#define IPOPT_RR 7 */ -#define IPOPT_ZSU 10 /* ZSU */ -#define IPOPT_MTUP 11 /* MTUP */ -#define IPOPT_MTUR 12 /* MTUR */ -#define IPOPT_ENCODE 15 /* ENCODE */ -/*#define IPOPT_TS 68 */ -#define IPOPT_TR 82 /* TR */ -/*#define IPOPT_SECURITY 130 */ -/*#define IPOPT_LSRR 131 */ -#define IPOPT_E_SEC 133 /* E-SEC */ -#define IPOPT_CIPSO 134 /* CIPSO */ -/*#define IPOPT_SATID 136 */ -#ifndef IPOPT_SID -# define IPOPT_SID IPOPT_SATID -#endif -/*#define IPOPT_SSRR 137 */ -#define IPOPT_ADDEXT 147 /* ADDEXT */ -#define IPOPT_VISA 142 /* VISA */ -#define IPOPT_IMITD 144 /* IMITD */ -#define IPOPT_EIP 145 /* EIP */ -#define IPOPT_FINN 205 /* FINN */ - -#ifndef TCPOPT_WSCALE -# define TCPOPT_WSCALE 3 -#endif - -/* - * Build some macros and #defines to enable the same code to compile anywhere - * Well, that's the idea, anyway :-) - */ -#if SOLARIS -typedef mblk_t mb_t; -# if SOLARIS2 >= 7 -# ifdef lint -# define ALIGN32(ptr) (ptr ? 0L : 0L) -# define ALIGN16(ptr) (ptr ? 0L : 0L) -# else -# define ALIGN32(ptr) (ptr) -# define ALIGN16(ptr) (ptr) -# endif # endif -#else -typedef struct mbuf mb_t; -#endif /* SOLARIS */ +# define U_32_T 1 -#if !SOLARIS || (SOLARIS2 < 6) || !defined(KERNEL) -# define ATOMIC_INCL ATOMIC_INC -# define ATOMIC_INC64 ATOMIC_INC -# define ATOMIC_INC32 ATOMIC_INC -# define ATOMIC_INC16 ATOMIC_INC -# define ATOMIC_DECL ATOMIC_DEC -# define ATOMIC_DEC64 ATOMIC_DEC -# define ATOMIC_DEC32 ATOMIC_DEC -# define ATOMIC_DEC16 ATOMIC_DEC -#endif -#ifdef __sgi -# define hz HZ -# include -# define IPF_LOCK_PL plhi -# include -#undef kmutex_t -typedef struct { - lock_t *l; - int pl; -} kmutex_t; -# undef MUTEX_INIT -# undef MUTEX_DESTROY -#endif -#if defined(__FreeBSD_version) && (__FreeBSD_version >= 500043) -# include -# include -# include -# define USE_MUTEX 1 -# define kmutex_t struct mtx -# define KRWLOCK_T struct sx -# define NETBSD_PF -#endif -#ifdef KERNEL -# if SOLARIS +# ifdef _KERNEL +# define KRWLOCK_T krwlock_t +# define KMUTEX_T kmutex_t +# include "qif.h" +# include "pfil.h" # if SOLARIS2 >= 6 -# include # if SOLARIS2 == 6 # define ATOMIC_INCL(x) atomic_add_long((uint32_t*)&(x), 1) # define ATOMIC_DECL(x) atomic_add_long((uint32_t*)&(x), -1) # else # define ATOMIC_INCL(x) atomic_add_long(&(x), 1) # define ATOMIC_DECL(x) atomic_add_long(&(x), -1) -# endif +# endif /* SOLARIS2 == 6 */ # define ATOMIC_INC64(x) atomic_add_64((uint64_t*)&(x), 1) # define ATOMIC_INC32(x) atomic_add_32((uint32_t*)&(x), 1) # define ATOMIC_INC16(x) atomic_add_16((uint16_t*)&(x), 1) @@ -451,44 +220,32 @@ typedef struct { # define ATOMIC_DEC32(x) atomic_add_32((uint32_t*)&(x), -1) # define ATOMIC_DEC16(x) atomic_add_16((uint16_t*)&(x), -1) # else -# define IRE_CACHE IRE_ROUTE # define ATOMIC_INC(x) { mutex_enter(&ipf_rw); (x)++; \ mutex_exit(&ipf_rw); } # define ATOMIC_DEC(x) { mutex_enter(&ipf_rw); (x)--; \ mutex_exit(&ipf_rw); } -# endif -# define MUTEX_ENTER(x) mutex_enter(x) -# define USE_MUTEX 1 -# if 1 -# define KRWLOCK_T krwlock_t -# define READ_ENTER(x) rw_enter(x, RW_READER) -# define WRITE_ENTER(x) rw_enter(x, RW_WRITER) -# define RW_UPGRADE(x) { if (rw_tryupgrade(x) == 0) { \ - rw_exit(x); \ - rw_enter(x, RW_WRITER); } \ - } -# define MUTEX_DOWNGRADE(x) rw_downgrade(x) -# define RWLOCK_INIT(x, y, z) rw_init((x), (y), RW_DRIVER, (z)) -# define RWLOCK_EXIT(x) rw_exit(x) -# define RW_DESTROY(x) rw_destroy(x) -# else -# define KRWLOCK_T kmutex_t -# define READ_ENTER(x) mutex_enter(x) -# define WRITE_ENTER(x) mutex_enter(x) -# define MUTEX_DOWNGRADE(x) ; -# define RWLOCK_INIT(x, y, z) mutex_init((x), (y), MUTEX_DRIVER, (z)) -# define RWLOCK_EXIT(x) mutex_exit(x) -# define RW_DESTROY(x) mutex_destroy(x) -# endif -# define MUTEX_INIT(x, y, z) mutex_init((x), (y), MUTEX_DRIVER, (z)) -# define MUTEX_DESTROY(x) mutex_destroy(x) -# define MUTEX_EXIT(x) mutex_exit(x) -# define MTOD(m,t) (t)((m)->b_rptr) -# define IRCOPY(a,b,c) copyin((caddr_t)(a), (caddr_t)(b), (c)) -# define IWCOPY(a,b,c) copyout((caddr_t)(a), (caddr_t)(b), (c)) -# define IRCOPYPTR ircopyptr -# define IWCOPYPTR iwcopyptr -# define FREE_MB_T(m) freemsg(m) +# endif /* SOLARIS2 >= 6 */ +# define USE_MUTEXES +# define MUTEX_ENTER(x) mutex_enter(&(x)->ipf_lk) +# define READ_ENTER(x) rw_enter(&(x)->ipf_lk, RW_READER) +# define WRITE_ENTER(x) rw_enter(&(x)->ipf_lk, RW_WRITER) +# define MUTEX_DOWNGRADE(x) rw_downgrade(&(x)->ipf_lk) +# define RWLOCK_INIT(x, y) rw_init(&(x)->ipf_lk, (y), \ + RW_DRIVER, NULL) +# define RWLOCK_EXIT(x) rw_exit(&(x)->ipf_lk) +# define RW_DESTROY(x) rw_destroy(&(x)->ipf_lk) +# define MUTEX_INIT(x, y) mutex_init(&(x)->ipf_lk, (y), \ + MUTEX_DRIVER, NULL) +# define MUTEX_DESTROY(x) mutex_destroy(&(x)->ipf_lk) +# define MUTEX_NUKE(x) bzero((x), sizeof(*(x))) +# define MUTEX_EXIT(x) mutex_exit(&(x)->ipf_lk) +# define COPYIN(a,b,c) copyin((caddr_t)(a), (caddr_t)(b), (c)) +# define COPYOUT(a,b,c) copyout((caddr_t)(a), (caddr_t)(b), (c)) +# define BCOPYIN(a,b,c) (void) copyin((caddr_t)(a), (caddr_t)(b), (c)) +# define BCOPYOUT(a,b,c) (void) copyout((caddr_t)(a), (caddr_t)(b), (c)) +# define UIOMOVE(a,b,c,d) uiomove((caddr_t)a,b,c,d) +# define KFREE(x) kmem_free((char *)(x), sizeof(*(x))) +# define KFREES(x,s) kmem_free((char *)(x), (s)) # define SPL_NET(x) ; # define SPL_IMP(x) ; # undef SPL_X @@ -502,126 +259,1171 @@ typedef struct { # define KMALLOC(a,b) (a) = (b)kmem_alloc(sizeof(*(a)), KM_NOSLEEP) # define KMALLOCS(a,b,c) (a) = (b)kmem_alloc((c), KM_NOSLEEP) # define GET_MINOR(x) getminor(x) -extern ill_t *get_unit __P((char *, int)); -# define GETUNIT(n, v) get_unit(n, v) -# define IFNAME(x) ((ill_t *)x)->ill_name -# else /* SOLARIS */ -# if defined(__sgi) -# define USE_MUTEX 1 -# define ATOMIC_INC(x) { MUTEX_ENTER(&ipf_rw); \ - (x)++; MUTEX_EXIT(&ipf_rw); } -# define ATOMIC_DEC(x) { MUTEX_ENTER(&ipf_rw); \ - (x)--; MUTEX_EXIT(&ipf_rw); } -# define MUTEX_ENTER(x) (x)->pl = LOCK((x)->l, IPF_LOCK_PL); -# define KRWLOCK_T kmutex_t -# define READ_ENTER(x) MUTEX_ENTER(x) -# define WRITE_ENTER(x) MUTEX_ENTER(x) -# define RW_UPGRADE(x) ; -# define MUTEX_DOWNGRADE(x) ; -# define RWLOCK_EXIT(x) MUTEX_EXIT(x) -# define MUTEX_EXIT(x) UNLOCK((x)->l, (x)->pl); -# define MUTEX_INIT(x,y,z) (x)->l = LOCK_ALLOC((uchar_t)-1, IPF_LOCK_PL, (lkinfo_t *)-1, KM_NOSLEEP) -# define MUTEX_DESTROY(x) LOCK_DEALLOC((x)->l) -# else /* __sgi */ -# if defined(__FreeBSD_version) && (__FreeBSD_version >= 500043) -# include -# include -# include -# define USE_MUTEX 1 -# define kmutex_t struct mtx -# define KRWLOCK_T struct sx -# define ATOMIC_INC(x) { MUTEX_ENTER(&ipf_rw); \ - (x)++; MUTEX_EXIT(&ipf_rw); } -# define ATOMIC_DEC(x) { MUTEX_ENTER(&ipf_rw); \ - (x)--; MUTEX_EXIT(&ipf_rw); } -# define MUTEX_ENTER(x) mtx_lock(x) -# define READ_ENTER(x) sx_slock(x) -# define WRITE_ENTER(x) sx_xlock(x) -# define RW_UPGRADE(x) ; -# define MUTEX_DOWNGRADE(x) sx_downgrade(x) -# define RWLOCK_INIT(x, y, z) sx_init((x), (y)) -# define RWLOCK_EXIT(x) do { \ - if ((x)->sx_cnt < 0) \ - sx_xunlock(x); \ - else \ - sx_sunlock(x); \ - } while (0) -# define MUTEX_EXIT(x) mtx_unlock(x) -# define MUTEX_INIT(x,y,z) mtx_init((x), (y), NULL, MTX_DEF) -# define MUTEX_DESTROY(x) mtx_destroy(x) -# else -# define ATOMIC_INC(x) (x)++ -# define ATOMIC_DEC(x) (x)-- -# define MUTEX_ENTER(x) ; -# define READ_ENTER(x) ; -# define WRITE_ENTER(x) ; -# define RW_UPGRADE(x) ; -# define MUTEX_DOWNGRADE(x) ; -# define RWLOCK_EXIT(x) ; -# define MUTEX_EXIT(x) ; -# define MUTEX_INIT(x,y,z) ; -# define MUTEX_DESTROY(x) ; -# endif -# endif /* __sgi */ -# ifndef linux -# define FREE_MB_T(m) m_freem(m) -# define MTOD(m,t) mtod(m,t) -# define M_BLEN(m) (m)->m_len -# define IRCOPY(a,b,c) (bcopy((a), (b), (c)), 0) -# define IWCOPY(a,b,c) (bcopy((a), (b), (c)), 0) -# define IRCOPYPTR ircopyptr -# define IWCOPYPTR iwcopyptr -# endif /* !linux */ -# endif /* SOLARIS */ - -# ifdef sun -# if !SOLARIS -# include -# include -# define GETUNIT(n, v) ifunit(n, IFNAMSIZ) -# define IFNAME(x) ((struct ifnet *)x)->if_name -# endif -# else -# ifndef linux -# define GETUNIT(n, v) ifunit(n) -# if (defined(NetBSD) && (NetBSD <= 1991011) && (NetBSD >= 199606)) || \ - (defined(OpenBSD) && (OpenBSD >= 199603)) || \ - (defined(__FreeBSD__) && (__FreeBSD_version >= 501113)) -# define IFNAME(x) ((struct ifnet *)x)->if_xname +extern void *get_unit __P((char *, int)); +# define GETIFP(n, v) get_unit(n, v) +# define IFNAME(x) ((qif_t *)x)->qf_name +# define COPYIFNAME(x, b) \ + (void) strncpy(b, ((qif_t *)x)->qf_name, \ + LIFNAMSIZ) +# define GETKTIME(x) uniqtime((struct timeval *)x) +# define MSGDSIZE(x) msgdsize(x) +# define M_LEN(x) ((x)->b_wptr - (x)->b_rptr) +# define M_DUPLICATE(x) dupmsg((x)) +# define MTOD(m,t) ((t)((m)->b_rptr)) +# define MTYPE(m) ((m)->b_datap->db_type) +# define FREE_MB_T(m) freemsg(m) +# define m_next b_cont +# define CACHE_HASH(x) (((qpktinfo_t *)(x)->fin_qpi)->qpi_num & 7) +# define IPF_PANIC(x,y) if (x) { printf y; cmn_err(CE_PANIC, "ipf_panic"); } +typedef mblk_t mb_t; +# endif /* _KERNEL */ + +# if (SOLARIS2 >= 7) +# ifdef lint +# define ALIGN32(ptr) (ptr ? 0L : 0L) +# define ALIGN16(ptr) (ptr ? 0L : 0L) +# else +# define ALIGN32(ptr) (ptr) +# define ALIGN16(ptr) (ptr) +# endif +# endif + +# if SOLARIS2 < 6 +typedef struct uio uio_t; +# endif +typedef int ioctlcmd_t; + +# define OS_RECOGNISED 1 + +#endif /* SOLARIS */ + +/* ----------------------------------------------------------------------- */ +/* H P U X */ +/* ----------------------------------------------------------------------- */ +#ifdef __hpux +# define MENTAT 1 +# include +# include +# include +# include +# ifdef USE_INET6 +# include +# include +# include +typedef struct ip6_hdr ip6_t; +# endif + +# ifdef _KERNEL +# define SNPRINTF sprintf +# if (HPUXREV >= 1111) +# define IPL_SELECT +# ifdef IPL_SELECT +# include +# include +# define READ_COLLISION 0x01 + +typedef struct iplog_select_s { + kthread_t *read_waiter; + int state; +} iplog_select_t; +# endif +# endif + +# define GETKTIME(x) uniqtime((struct timeval *)x) + +# if HPUXREV == 1111 +# include "kern_svcs.h" +# else +# include +# endif +# undef ti_flags +# undef TCP_NODELAY +# undef TCP_MAXSEG +# include +# include "../netinet/ip_info.h" +/* + * According to /usr/include/sys/spinlock.h on HP-UX 11.00, these functions + * are available. Attempting to use them actually results in unresolved + * symbols when it comes time to load the module. + * This has been fixed! Yipee! + */ +# if 1 +# ifdef __LP64__ +# define ATOMIC_INCL(x) lock_and_incr_int64(&ipf_rw.ipf_lk, &(x), 1) +# define ATOMIC_DECL(x) lock_and_incr_int64(&ipf_rw.ipf_lk, &(x), -1) # else -# define USE_GETIFNAME 1 -# define IFNAME(x) get_ifname((struct ifnet *)x) -extern char *get_ifname __P((struct ifnet *)); +# define ATOMIC_INCL(x) lock_and_incr_int32(&ipf_rw.ipf_lk, &(x), 1) +# define ATOMIC_DECL(x) lock_and_incr_int32(&ipf_rw.ipf_lk, &(x), -1) # endif +# define ATOMIC_INC64(x) lock_and_incr_int64(&ipf_rw.ipf_lk, &(x), 1) +# define ATOMIC_INC32(x) lock_and_incr_int32(&ipf_rw.ipf_lk, &(x), 1) +# define ATOMIC_INC16(x) lock_and_incr_int16(&ipf_rw.ipf_lk, &(x), 1) +# define ATOMIC_DEC64(x) lock_and_incr_int64(&ipf_rw.ipf_lk, &(x), -1) +# define ATOMIC_DEC32(x) lock_and_incr_int32(&ipf_rw.ipf_lk, &(x), -1) +# define ATOMIC_DEC16(x) lock_and_incr_int16(&ipf_rw.ipf_lk, &(x), -1) +# else /* 0 */ +# define ATOMIC_INC64(x) { MUTEX_ENTER(&ipf_rw); (x)++; \ + MUTEX_EXIT(&ipf_rw); } +# define ATOMIC_DEC64(x) { MUTEX_ENTER(&ipf_rw); (x)--; \ + MUTEX_EXIT(&ipf_rw); } +# define ATOMIC_INC32(x) { MUTEX_ENTER(&ipf_rw); (x)++; \ + MUTEX_EXIT(&ipf_rw); } +# define ATOMIC_DEC32(x) { MUTEX_ENTER(&ipf_rw); (x)--; \ + MUTEX_EXIT(&ipf_rw); } +# define ATOMIC_INCL(x) { MUTEX_ENTER(&ipf_rw); (x)++; \ + MUTEX_EXIT(&ipf_rw); } +# define ATOMIC_DECL(x) { MUTEX_ENTER(&ipf_rw); (x)--; \ + MUTEX_EXIT(&ipf_rw); } +# define ATOMIC_INC(x) { MUTEX_ENTER(&ipf_rw); (x)++; \ + MUTEX_EXIT(&ipf_rw); } +# define ATOMIC_DEC(x) { MUTEX_ENTER(&ipf_rw); (x)--; \ + MUTEX_EXIT(&ipf_rw); } # endif -# endif /* sun */ +# define ip_cksum ip_csuma +# define memcpy(a,b,c) bcopy((caddr_t)b, (caddr_t)a, c) +# define USE_MUTEXES +# define MUTEX_INIT(x, y) initlock(&(x)->ipf_lk, 0, 0, (y)) +# define MUTEX_ENTER(x) spinlock(&(x)->ipf_lk) +# define MUTEX_EXIT(x) spinunlock(&(x)->ipf_lk); +# define MUTEX_DESTROY(x) +# define MUTEX_NUKE(x) bzero((char *)(x), sizeof(*(x))) +# define KMUTEX_T lock_t +# define kmutex_t lock_t /* for pfil.h */ +# define krwlock_t lock_t /* for pfil.h */ +/* + * The read-write lock implementation in HP-UX 11.0 is crippled - it can + * only be used by threads working in a user context! + * This has been fixed! Yipee! (Or at least it does in 11.00, not 11.11..) + */ +# if HPUXREV < 1111 +# define MUTEX_DOWNGRADE(x) lock_write_to_read(x) +# define KRWLOCK_T struct rw_lock +# define READ_ENTER(x) lock_read(&(x)->ipf_lk) +# define WRITE_ENTER(x) lock_write(&(x)->ipf_lk) +# if HPUXREV >= 1111 +# define RWLOCK_INIT(x, y) rwlock_init4(&(x)->ipf_lk, 0, RWLCK_CANSLEEP, 0, y) +# else +# define RWLOCK_INIT(x, y) lock_init3(&(x)->ipf_lk, 0, 1, 0, 0, y) +# endif +# define RWLOCK_EXIT(x) lock_done(&(x)->ipf_lk) +# else +# define KRWLOCK_T lock_t +# define KMUTEX_T lock_t +# define READ_ENTER(x) MUTEX_ENTER(x) +# define WRITE_ENTER(x) MUTEX_ENTER(x) +# define MUTEX_DOWNGRADE(x) +# define RWLOCK_INIT(x, y) initlock(&(x)->ipf_lk, 0, 0, y) +# define RWLOCK_EXIT(x) MUTEX_EXIT(x) +# endif +# define RW_DESTROY(x) +# define COPYIN(a,b,c) copyin((caddr_t)(a), (caddr_t)(b), (c)) +# define COPYOUT(a,b,c) copyout((caddr_t)(a), (caddr_t)(b), (c)) +# if HPUXREV >= 1111 +# define BCOPYIN(a,b,c) 0; bcopy((caddr_t)(a), (caddr_t)(b), (c)) +# define BCOPYOUT(a,b,c) 0; bcopy((caddr_t)(a), (caddr_t)(b), (c)) +# else +# define BCOPYIN(a,b,c) bcopy((caddr_t)(a), (caddr_t)(b), (c)) +# define BCOPYOUT(a,b,c) bcopy((caddr_t)(a), (caddr_t)(b), (c)) +# endif +# define SPL_NET(x) ; +# define SPL_IMP(x) ; +# undef SPL_X +# define SPL_X(x) ; +extern void *get_unit __P((char *, int)); +# define GETIFP(n, v) get_unit(n, v) +# define IFNAME(x, b) ((ill_t *)x)->ill_name +# define COPYIFNAME(x, b) \ + (void) strncpy(b, ((qif_t *)x)->qf_name, \ + LIFNAMSIZ) +# define UIOMOVE(a,b,c,d) uiomove((caddr_t)a,b,c,d) +# define SLEEP(id, n) { lock_t *_l = get_sleep_lock((caddr_t)id); \ + sleep(id, PZERO+1); \ + spinunlock(_l); \ + } +# define WAKEUP(id,x) { lock_t *_l = get_sleep_lock((caddr_t)id); \ + wakeup(id + x); \ + spinunlock(_l); \ + } +# define KMALLOC(a, b) MALLOC((a), b, sizeof(*(a)), M_IOSYS, M_NOWAIT) +# define KMALLOCS(a, b, c) MALLOC((a), b, (c), M_IOSYS, M_NOWAIT) +# define KFREE(x) kmem_free((char *)(x), sizeof(*(x))) +# define KFREES(x,s) kmem_free((char *)(x), (s)) +# define MSGDSIZE(x) msgdsize(x) +# define M_LEN(x) ((x)->b_wptr - (x)->b_rptr) +# define M_DUPLICATE(x) dupmsg((x)) +# define MTOD(m,t) ((t)((m)->b_rptr)) +# define MTYPE(m) ((m)->b_datap->db_type) +# define FREE_MB_T(m) freemsg(m) +# define m_next b_cont +# define IPF_PANIC(x,y) if (x) { printf y; panic("ipf_panic"); } +typedef mblk_t mb_t; + +# define CACHE_HASH(x) (((qpktinfo_t *)(x)->fin_qpi)->qpi_num & 7) + +# include "qif.h" +# include "pfil.h" + +# else /* _KERNEL */ + +typedef unsigned char uchar_t; + +# ifndef _SYS_STREAM_INCLUDED +typedef char * mblk_t; +typedef void * queue_t; +typedef u_long ulong; +# endif +# include + +# endif /* _KERNEL */ + +# ifdef lint +# define ALIGN32(ptr) (ptr ? 0L : 0L) +# define ALIGN16(ptr) (ptr ? 0L : 0L) +# else +# define ALIGN32(ptr) (ptr) +# define ALIGN16(ptr) (ptr) +# endif + +typedef struct uio uio_t; +typedef int ioctlcmd_t; +typedef int minor_t; +typedef unsigned int u_32_t; +# define U_32_T 1 + +# define OS_RECOGNISED 1 + +#endif /* __hpux */ + +/* ----------------------------------------------------------------------- */ +/* I R I X */ +/* ----------------------------------------------------------------------- */ +#ifdef __sgi +# undef MENTAT +# if IRIX < 60500 +typedef struct uio uio_t; +# endif +typedef int ioctlcmd_t; +typedef u_int32_t u_32_t; +# define U_32_T 1 + +# ifdef INET6 +# define USE_INET6 +# endif + +# define hz HZ +# include +# define IPF_LOCK_PL plhi +# include +# undef kmutex_t +typedef struct { + lock_t *l; + int pl; +} kmutex_t; + +# ifdef MUTEX_INIT +# define KMUTEX_T mutex_t +# else +# define KMUTEX_T kmutex_t +# define KRWLOCK_T kmutex_t +# endif -# if defined(sun) && !defined(linux) || defined(__sgi) +# ifdef _KERNEL +# define ATOMIC_INC(x) { MUTEX_ENTER(&ipf_rw); \ + (x)++; MUTEX_EXIT(&ipf_rw); } +# define ATOMIC_DEC(x) { MUTEX_ENTER(&ipf_rw); \ + (x)--; MUTEX_EXIT(&ipf_rw); } +# define USE_MUTEXES +# ifdef MUTEX_INIT +# include +# define ATOMIC_INCL(x) atomicAddUlong(&(x), 1) +# define ATOMIC_INC64(x) atomicAddUint64(&(x), 1) +# define ATOMIC_INC32(x) atomicAddUint(&(x), 1) +# define ATOMIC_INC16 ATOMIC_INC +# define ATOMIC_DECL(x) atomicAddUlong(&(x), -1) +# define ATOMIC_DEC64(x) atomicAddUint64(&(x), -1) +# define ATOMIC_DEC32(x) atomicAddUint(&(x), -1) +# define ATOMIC_DEC16 ATOMIC_DEC +# undef MUTEX_INIT +# define MUTEX_INIT(x, y) mutex_init(&(x)->ipf_lk, \ + MUTEX_DEFAULT, y) +# undef MUTEX_ENTER +# define MUTEX_ENTER(x) mutex_lock(&(x)->ipf_lk, 0) +# undef MUTEX_EXIT +# define MUTEX_EXIT(x) mutex_unlock(&(x)->ipf_lk) +# undef MUTEX_DESTROY +# define MUTEX_DESTROY(x) mutex_destroy(&(x)->ipf_lk) +# define MUTEX_DOWNGRADE(x) mrdemote(&(x)->ipf_lk) +# define KRWLOCK_T mrlock_t +# define RWLOCK_INIT(x, y) mrinit(&(x)->ipf_lk, y) +# undef RW_DESTROY +# define RW_DESTROY(x) mrfree(&(x)->ipf_lk) +# define READ_ENTER(x) RW_RDLOCK(&(x)->ipf_lk) +# define WRITE_ENTER(x) RW_WRLOCK(&(x)->ipf_lk) +# define RWLOCK_EXIT(x) RW_UNLOCK(&(x)->ipf_lk) +# else +# define READ_ENTER(x) MUTEX_ENTER(&(x)->ipf_lk) +# define WRITE_ENTER(x) MUTEX_ENTER(&(x)->ipf_lk) +# define MUTEX_DOWNGRADE(x) ; +# define RWLOCK_EXIT(x) MUTEX_EXIT(&(x)->ipf_lk) +# define MUTEX_EXIT(x) UNLOCK((x)->ipf_lk.l, (x)->ipf_lk.pl); +# define MUTEX_INIT(x,y) (x)->ipf_lk.l = LOCK_ALLOC((uchar_t)-1, IPF_LOCK_PL, (lkinfo_t *)-1, KM_NOSLEEP) +# define MUTEX_DESTROY(x) LOCK_DEALLOC((x)->ipf_lk.l) +# define MUTEX_ENTER(x) (x)->ipf_lk.pl = LOCK((x)->ipf_lk.l, \ + IPF_LOCK_PL); +# endif +# define MUTEX_NUKE(x) bzero((x), sizeof(*(x))) +# define FREE_MB_T(m) m_freem(m) +# define MTOD(m,t) mtod(m,t) +# define COPYIN(a,b,c) (bcopy((caddr_t)(a), (caddr_t)(b), (c)), 0) +# define COPYOUT(a,b,c) (bcopy((caddr_t)(a), (caddr_t)(b), (c)), 0) +# define BCOPYIN(a,b,c) (bcopy((caddr_t)(a), (caddr_t)(b), (c)), 0) +# define BCOPYOUT(a,b,c) (bcopy((caddr_t)(a), (caddr_t)(b), (c)), 0) # define UIOMOVE(a,b,c,d) uiomove((caddr_t)a,b,c,d) # define SLEEP(id, n) sleep((id), PZERO+1) -# define WAKEUP(id) wakeup(id) +# define WAKEUP(id,x) wakeup(id+x) # define KFREE(x) kmem_free((char *)(x), sizeof(*(x))) # define KFREES(x,s) kmem_free((char *)(x), (s)) -# if !SOLARIS +# define GETIFP(n,v) ifunit(n) +# include +# include +# define KMALLOC(a,b) (a) = (b)kmem_alloc(sizeof(*(a)), KM_NOSLEEP) +# define KMALLOCS(a,b,c) (a) = (b)kmem_alloc((c), KM_NOSLEEP) +# define GET_MINOR(x) getminor(x) +# define USE_SPL 1 +# define SPL_IMP(x) (x) = splimp() +# define SPL_NET(x) (x) = splnet() +# define SPL_X(x) (void) splx(x) extern void m_copydata __P((struct mbuf *, int, int, caddr_t)); extern void m_copyback __P((struct mbuf *, int, int, caddr_t)); +# define MSGDSIZE(x) mbufchainlen(x) +# define M_LEN(x) (x)->m_len +# define M_DUPLICATE(x) m_copy((x), 0, M_COPYALL) +# define GETKTIME(x) microtime((struct timeval *)x) +# define CACHE_HASH(x) ((IFNAME(fin->fin_ifp)[0] + \ + ((struct ifnet *)fin->fin_ifp)->if_unit) & 7) +# define IPF_PANIC(x,y) if (x) { printf y; panic("ipf_panic"); } +typedef struct mbuf mb_t; +# else +# undef RW_DESTROY +# undef MUTEX_INIT +# undef MUTEX_DESTROY +# endif /* _KERNEL */ + +# define OS_RECOGNISED 1 + +#endif /* __sgi */ + +/* ----------------------------------------------------------------------- */ +/* T R U 6 4 */ +/* ----------------------------------------------------------------------- */ +#ifdef __osf__ +# undef MENTAT + +# include +# include + +# ifdef _KERNEL +# define KMUTEX_T simple_lock_data_t +# define KRWLOCK_T lock_data_t +# include +# define USE_MUTEXES +# define READ_ENTER(x) lock_read(&(x)->ipf_lk) +# define WRITE_ENTER(x) lock_write(&(x)->ipf_lk) +# define MUTEX_DOWNGRADE(x) lock_write_to_read(&(x)->ipf_lk) +# define RWLOCK_INIT(x, y) lock_init(&(x)->ipf_lk, TRUE) +# define RWLOCK_EXIT(x) lock_done(&(x)->ipf_lk) +# define RW_DESTROY(x) lock_terminate(&(x)->ipf_lk) +# define MUTEX_ENTER(x) simple_lock(&(x)->ipf_lk) +# define MUTEX_INIT(x, y) simple_lock_init(&(x)->ipf_lk) +# define MUTEX_DESTROY(x) simple_lock_terminate(&(x)->ipf_lk) +# define MUTEX_EXIT(x) simple_unlock(&(x)->ipf_lk) +# define MUTEX_NUKE(x) bzero(x, sizeof(*(x))) +# define ATOMIC_INC64(x) atomic_incq((uint64_t*)&(x)) +# define ATOMIC_DEC64(x) atomic_decq((uint64_t*)&(x)) +# define ATOMIC_INC32(x) atomic_incl((uint32_t*)&(x)) +# define ATOMIC_DEC32(x) atomic_decl((uint32_t*)&(x)) +# define ATOMIC_INC16(x) { simple_lock(&ipf_rw); (x)++; \ + simple_unlock(&ipf_rw); } +# define ATOMIC_DEC16(x) { simple_lock(&ipf_rw); (x)--; \ + simple_unlock(&ipf_rw); } +# define ATOMIC_INCL(x) atomic_incl((uint32_t*)&(x)) +# define ATOMIC_DECL(x) atomic_decl((uint32_t*)&(x)) +# define ATOMIC_INC(x) { simple_lock(&ipf_rw); (x)++; \ + simple_unlock(&ipf_rw); } +# define ATOMIC_DEC(x) { simple_lock(&ipf_rw); (x)--; \ + simple_unlock(&ipf_rw); } +# define SPL_NET(x) ; +# define SPL_IMP(x) ; +# undef SPL_X +# define SPL_X(x) ; +# define UIOMOVE(a,b,c,d) uiomove((caddr_t)a, b, d) +# define FREE_MB_T(m) m_freem(m) +# define MTOD(m,t) mtod(m,t) +# define GETIFP(n, v) ifunit(n) +# define GET_MINOR getminor +# define WAKEUP(id,x) wakeup(id + x) +# define COPYIN(a,b,c) copyin((caddr_t)(a), (caddr_t)(b), (c)) +# define COPYOUT(a,b,c) copyout((caddr_t)(a), (caddr_t)(b), (c)) +# define BCOPYIN(a,b,c) bcopy((caddr_t)(a), (caddr_t)(b), (c)) +# define BCOPYOUT(a,b,c) bcopy((caddr_t)(a), (caddr_t)(b), (c)) +# define KMALLOC(a, b) MALLOC((a), b, sizeof(*(a)), M_PFILT, M_NOWAIT) +# define KMALLOCS(a, b, c) MALLOC((a), b, (c), M_PFILT, \ + ((c) > 4096) ? M_WAITOK : M_NOWAIT) +# define KFREE(x) FREE((x), M_PFILT) +# define KFREES(x,s) FREE((x), M_PFILT) +# define MSGDSIZE(x) mbufchainlen(x) +# define M_LEN(x) (x)->m_len +# define M_DUPLICATE(x) m_copy((x), 0, M_COPYALL) +# define GETKTIME(x) microtime((struct timeval *)x) +# define CACHE_HASH(x) ((IFNAME(fin->fin_ifp)[0] + \ + ((struct ifnet *)fin->fin_ifp)->if_unit) & 7) +# define IPF_PANIC(x,y) if (x) { printf y; panic("ipf_panic"); } +typedef struct mbuf mb_t; +# endif /* _KERNEL */ + +# if (defined(_KERNEL) || defined(_NO_BITFIELDS) || (__STDC__ == 1)) +# define IP_V(x) ((x)->ip_vhl >> 4) +# define IP_HL(x) ((x)->ip_vhl & 0xf) +# define IP_V_A(x,y) (x)->ip_vhl |= (((y) << 4) & 0xf0) +# define IP_HL_A(x,y) (x)->ip_vhl |= ((y) & 0xf) +# define TCP_X2(x) ((x)->th_xoff & 0xf) +# define TCP_X2_A(x,y) (x)->th_xoff |= ((y) & 0xf) +# define TCP_OFF(x) ((x)->th_xoff >> 4) +# define TCP_OFF_A(x,y) (x)->th_xoff |= (((y) << 4) & 0xf0) +# endif + +/* + * These are from's Solaris' #defines for little endian. + */ +#define IP6F_MORE_FRAG 0x0100 +#define IP6F_RESERVED_MASK 0x0600 +#define IP6F_OFF_MASK 0xf8ff + +struct ip6_ext { + u_char ip6e_nxt; + u_char ip6e_len; +}; + +typedef int ioctlcmd_t; +/* + * Really, any arch where sizeof(long) != sizeof(int). + */ +typedef unsigned int u_32_t; +# define U_32_T 1 + +# define OS_RECOGNISED 1 +#endif /* __osf__ */ + +/* ----------------------------------------------------------------------- */ +/* N E T B S D */ +/* ----------------------------------------------------------------------- */ +#ifdef __NetBSD__ +# if defined(_KERNEL) && !defined(IPFILTER_LKM) +# include "bpfilter.h" +# if defined(__NetBSD_Version__) && (__NetBSD_Version__ >= 104110000) +# include "opt_inet.h" +# endif +# ifdef INET6 +# define USE_INET6 +# endif +# if (__NetBSD_Version__ >= 105000000) +# define HAVE_M_PULLDOWN 1 # endif -# ifdef __sgi -# include -# include -# define KMALLOC(a,b) (a) = (b)kmem_alloc(sizeof(*(a)), KM_NOSLEEP) -# define KMALLOCS(a,b,c) (a) = (b)kmem_alloc((c), KM_NOSLEEP) -# define GET_MINOR(x) getminor(x) +# endif + +# ifdef _KERNEL +# define MSGDSIZE(x) mbufchainlen(x) +# define M_LEN(x) (x)->m_len +# define M_DUPLICATE(x) m_copy((x), 0, M_COPYALL) +# define GETKTIME(x) microtime((struct timeval *)x) +# define IPF_PANIC(x,y) if (x) { printf y; panic("ipf_panic"); } +# define COPYIN(a,b,c) copyin((caddr_t)(a), (caddr_t)(b), (c)) +# define COPYOUT(a,b,c) copyout((caddr_t)(a), (caddr_t)(b), (c)) +# define BCOPYIN(a,b,c) bcopy((caddr_t)(a), (caddr_t)(b), (c)) +# define BCOPYOUT(a,b,c) bcopy((caddr_t)(a), (caddr_t)(b), (c)) +typedef struct mbuf mb_t; +# endif /* _KERNEL */ +# if (NetBSD <= 1991011) && (NetBSD >= 199606) +# define IFNAME(x) ((struct ifnet *)x)->if_xname +# define COPYIFNAME(x, b) \ + (void) strncpy(b, \ + ((struct ifnet *)x)->if_xname, \ + LIFNAMSIZ) +# define CACHE_HASH(x) ((((struct ifnet *)fin->fin_ifp)->if_index)&7) +# else +# define CACHE_HASH(x) ((IFNAME(fin->fin_ifp)[0] + \ + ((struct ifnet *)fin->fin_ifp)->if_unit) & 7) +# endif + +typedef struct uio uio_t; +typedef u_long ioctlcmd_t; +typedef int minor_t; +typedef u_int32_t u_32_t; +# define U_32_T 1 + +# define OS_RECOGNISED 1 +#endif /* __NetBSD__ */ + + +/* ----------------------------------------------------------------------- */ +/* F R E E B S D */ +/* ----------------------------------------------------------------------- */ +#ifdef __FreeBSD__ +# if defined(_KERNEL) && !defined(IPFILTER_LKM) && !defined(KLD_MODULE) +# if (__FreeBSD_version >= 500000) +# include "opt_bpf.h" # else -# if !SOLARIS -# define KMALLOC(a,b) (a) = (b)new_kmem_alloc(sizeof(*(a)), \ - KMEM_NOSLEEP) -# define KMALLOCS(a,b,c) (a) = (b)new_kmem_alloc((c), KMEM_NOSLEEP) -# endif /* SOLARIS */ -# endif /* __sgi */ -# endif /* sun && !linux */ -# ifndef GET_MINOR -# define GET_MINOR(x) minor(x) +# include "bpf.h" +# endif +# if defined(__FreeBSD_version) && (__FreeBSD_version >= 400000) +# include "opt_inet6.h" +# endif +# if defined(INET6) && !defined(USE_INET6) +# define USE_INET6 +# endif +# endif + +# if defined(_KERNEL) +# if (__FreeBSD_version >= 400000) +/* + * When #define'd, the 5.2.1 kernel panics when used with the ftp proxy. + * There may be other, safe, kernels but this is not extensively tested yet. + */ +# define HAVE_M_PULLDOWN +# endif +# if !defined(IPFILTER_LKM) && (__FreeBSD_version >= 300000) +# include "opt_ipfilter.h" +# endif +# define COPYIN(a,b,c) copyin((caddr_t)(a), (caddr_t)(b), (c)) +# define COPYOUT(a,b,c) copyout((caddr_t)(a), (caddr_t)(b), (c)) +# define BCOPYIN(a,b,c) bcopy((caddr_t)(a), (caddr_t)(b), (c)) +# define BCOPYOUT(a,b,c) bcopy((caddr_t)(a), (caddr_t)(b), (c)) + +# if (__FreeBSD_version >= 500043) +# define NETBSD_PF +# endif +# endif /* _KERNEL */ + +# if (__FreeBSD_version >= 500043) +# include +# include +/* + * Whilst the sx(9) locks on FreeBSD have the right semantics and interface + * for what we want to use them for, despite testing showing they work - + * with a WITNESS kernel, it generates LOR messages. + */ +# define KMUTEX_T struct mtx +# if 1 +# define KRWLOCK_T struct mtx +# else +# define KRWLOCK_T struct sx +# endif +# endif + +# if (__FreeBSD_version >= 501113) +# include +# define IFNAME(x) ((struct ifnet *)x)->if_xname +# define COPYIFNAME(x, b) \ + (void) strncpy(b, \ + ((struct ifnet *)x)->if_xname, \ + LIFNAMSIZ) +# endif +# if (__FreeBSD_version >= 500043) +# define CACHE_HASH(x) ((((struct ifnet *)fin->fin_ifp)->if_index) & 7) +# else +# define CACHE_HASH(x) ((IFNAME(fin->fin_ifp)[0] + \ + ((struct ifnet *)fin->fin_ifp)->if_unit) & 7) +# endif + +# ifdef _KERNEL +# define GETKTIME(x) microtime((struct timeval *)x) + +# if (__FreeBSD_version >= 500002) +# include +# include +# include +# endif + +# if (__FreeBSD_version >= 500043) +# define USE_MUTEXES +# define MUTEX_ENTER(x) mtx_lock(&(x)->ipf_lk) +# define MUTEX_EXIT(x) mtx_unlock(&(x)->ipf_lk) +# define MUTEX_INIT(x,y) mtx_init(&(x)->ipf_lk, (y), NULL,\ + MTX_DEF) +# define MUTEX_DESTROY(x) mtx_destroy(&(x)->ipf_lk) +# define MUTEX_NUKE(x) bzero((x), sizeof(*(x))) +/* + * Whilst the sx(9) locks on FreeBSD have the right semantics and interface + * for what we want to use them for, despite testing showing they work - + * with a WITNESS kernel, it generates LOR messages. + */ +# if 1 +# define READ_ENTER(x) mtx_lock(&(x)->ipf_lk) +# define WRITE_ENTER(x) mtx_lock(&(x)->ipf_lk) +# define RWLOCK_EXIT(x) mtx_unlock(&(x)->ipf_lk) +# define MUTEX_DOWNGRADE(x) ; +# define RWLOCK_INIT(x,y) mtx_init(&(x)->ipf_lk, (y), NULL,\ + MTX_DEF) +# define RW_DESTROY(x) mtx_destroy(&(x)->ipf_lk) +# else +# define READ_ENTER(x) sx_slock(&(x)->ipf_lk) +# define WRITE_ENTER(x) sx_xlock(&(x)->ipf_lk) +# define MUTEX_DOWNGRADE(x) sx_downgrade(&(x)->ipf_lk) +# define RWLOCK_INIT(x, y) sx_init(&(x)->ipf_lk, (y)) +# define RW_DESTROY(x) sx_destroy(&(x)->ipf_lk) +# ifdef sx_unlock +# define RWLOCK_EXIT(x) sx_unlock(x) +# else +# define RWLOCK_EXIT(x) do { \ + if ((x)->ipf_lk.sx_cnt < 0) \ + sx_xunlock(&(x)->ipf_lk); \ + else \ + sx_sunlock(&(x)->ipf_lk); \ + } while (0) +# endif +# endif +# include +# define ATOMIC_INC(x) { mtx_lock(&ipf_rw.ipf_lk); (x)++; \ + mtx_unlock(&ipf_rw.ipf_lk); } +# define ATOMIC_DEC(x) { mtx_lock(&ipf_rw.ipf_lk); (x)--; \ + mtx_unlock(&ipf_rw.ipf_lk); } +# define ATOMIC_INCL(x) atomic_add_long(&(x), 1) +# define ATOMIC_INC64(x) ATOMIC_INC(x) +# define ATOMIC_INC32(x) atomic_add_32(&(x), 1) +# define ATOMIC_INC16(x) atomic_add_16(&(x), 1) +# define ATOMIC_DECL(x) atomic_add_long(&(x), -1) +# define ATOMIC_DEC64(x) ATOMIC_DEC(x) +# define ATOMIC_DEC32(x) atomic_add_32(&(x), -1) +# define ATOMIC_DEC16(x) atomic_add_16(&(x), -1) +# define SPL_X(x) ; +# define SPL_NET(x) ; +# define SPL_IMP(x) ; +extern int in_cksum __P((struct mbuf *, int)); +# endif /* __FreeBSD_version >= 500043 */ +# define MSGDSIZE(x) mbufchainlen(x) +# define M_LEN(x) (x)->m_len +# define M_DUPLICATE(x) m_copy((x), 0, M_COPYALL) +# define IPF_PANIC(x,y) if (x) { printf y; panic("ipf_panic"); } +typedef struct mbuf mb_t; +# endif /* _KERNEL */ + +# if __FreeBSD__ < 3 +# include +# else +# if __FreeBSD__ == 3 +# if defined(IPFILTER_LKM) && !defined(ACTUALLY_LKM_NOT_KERNEL) +# define ACTUALLY_LKM_NOT_KERNEL +# endif +# endif +# endif + +# if (__FreeBSD_version >= 300000) +typedef u_long ioctlcmd_t; +# else +typedef int ioctlcmd_t; +# endif +typedef struct uio uio_t; +typedef int minor_t; +typedef u_int32_t u_32_t; +# define U_32_T 1 + +# define OS_RECOGNISED 1 +#endif /* __FreeBSD__ */ + + +/* ----------------------------------------------------------------------- */ +/* O P E N B S D */ +/* ----------------------------------------------------------------------- */ +#ifdef __OpenBSD__ +# ifdef INET6 +# define USE_INET6 +# endif + +# ifdef _KERNEL +# if !defined(IPFILTER_LKM) +# include "bpfilter.h" +# endif +# if (OpenBSD >= 200311) +# define SNPRINTF snprintf +# if defined(USE_INET6) +# include "netinet6/in6_var.h" +# include "netinet6/nd6.h" +# endif +# endif +# if (OpenBSD >= 200012) +# define HAVE_M_PULLDOWN 1 +# endif +# define COPYIN(a,b,c) copyin((caddr_t)(a), (caddr_t)(b), (c)) +# define COPYOUT(a,b,c) copyout((caddr_t)(a), (caddr_t)(b), (c)) +# define BCOPYIN(a,b,c) bcopy((caddr_t)(a), (caddr_t)(b), (c)) +# define BCOPYOUT(a,b,c) bcopy((caddr_t)(a), (caddr_t)(b), (c)) +# define GETKTIME(x) microtime((struct timeval *)x) +# define MSGDSIZE(x) mbufchainlen(x) +# define M_LEN(x) (x)->m_len +# define M_DUPLICATE(x) m_copy((x), 0, M_COPYALL) +# define IPF_PANIC(x,y) if (x) { printf y; panic("ipf_panic"); } +typedef struct mbuf mb_t; +# endif /* _KERNEL */ +# if (OpenBSD >= 199603) +# define IFNAME(x, b) ((struct ifnet *)x)->if_xname +# define COPYIFNAME(x, b) \ + (void) strncpy(b, \ + ((struct ifnet *)x)->if_xname, \ + LIFNAMSIZ) +# define CACHE_HASH(x) ((((struct ifnet *)fin->fin_ifp)->if_index)&7) +# else +# define CACHE_HASH(x) ((IFNAME(fin->fin_ifp)[0] + \ + ((struct ifnet *)fin->fin_ifp)->if_unit) & 7) +# endif + +typedef struct uio uio_t; +typedef u_long ioctlcmd_t; +typedef int minor_t; +typedef u_int32_t u_32_t; +# define U_32_T 1 + +# define OS_RECOGNISED 1 +#endif /* __OpenBSD__ */ + + +/* ----------------------------------------------------------------------- */ +/* B S D O S */ +/* ----------------------------------------------------------------------- */ +#ifdef _BSDI_VERSION +# ifdef INET6 +# define USE_INET6 +# endif + +# ifdef _KERNEL +# define GETKTIME(x) microtime((struct timeval *)x) +# define MSGDSIZE(x) mbufchainlen(x) +# define M_LEN(x) (x)->m_len +# define M_DUPLICATE(x) m_copy((x), 0, M_COPYALL) +# define CACHE_HASH(x) ((IFNAME(fin->fin_ifp)[0] + \ + ((struct ifnet *)fin->fin_ifp)->if_unit) & 7) +typedef struct mbuf mb_t; +# endif /* _KERNEL */ + +# if (_BSDI_VERSION >= 199701) +typedef u_long ioctlcmd_t; +# else +typedef int ioctlcmd_t; +# endif +typedef u_int32_t u_32_t; +# define U_32_T 1 + +#endif /* _BSDI_VERSION */ + + +/* ----------------------------------------------------------------------- */ +/* S U N O S 4 */ +/* ----------------------------------------------------------------------- */ +#if defined(sun) && !defined(OS_RECOGNISED) /* SunOS4 */ +# ifdef _KERNEL +# include +# define GETKTIME(x) uniqtime((struct timeval *)x) +# define MSGDSIZE(x) mbufchainlen(x) +# define M_LEN(x) (x)->m_len +# define M_DUPLICATE(x) m_copy((x), 0, M_COPYALL) +# define CACHE_HASH(x) ((IFNAME(fin->fin_ifp)[0] + \ + ((struct ifnet *)fin->fin_ifp)->if_unit) & 7) +# define GETIFP(n, v) ifunit(n, IFNAMSIZ) +# define KFREE(x) kmem_free((char *)(x), sizeof(*(x))) +# define KFREES(x,s) kmem_free((char *)(x), (s)) +# define SLEEP(id, n) sleep((id), PZERO+1) +# define WAKEUP(id,x) wakeup(id + x) +# define UIOMOVE(a,b,c,d) uiomove((caddr_t)a,b,c,d) +# define IPF_PANIC(x,y) if (x) { printf y; panic("ipf_panic"); } + +extern void m_copydata __P((struct mbuf *, int, int, caddr_t)); +extern void m_copyback __P((struct mbuf *, int, int, caddr_t)); + +typedef struct mbuf mb_t; +# endif + +typedef struct uio uio_t; +typedef int ioctlcmd_t; +typedef int minor_t; +typedef unsigned int u_32_t; +# define U_32_T 1 + +# define OS_RECOGNISED 1 + +#endif /* SunOS 4 */ + +/* ----------------------------------------------------------------------- */ +/* L I N U X */ +/* ----------------------------------------------------------------------- */ +#if defined(linux) && !defined(OS_RECOGNISED) +#include +#include +# if LINUX >= 20600 +# define HDR_T_PRIVATE 1 +# endif +# undef USE_INET6 +# ifdef USE_INET6 +struct ip6_ext { + u_char ip6e_nxt; + u_char ip6e_len; +}; +# endif + +# ifdef _KERNEL +# define IPF_PANIC(x,y) if (x) { printf y; panic("ipf_panic"); } +# define BCOPYIN(a,b,c) bcopy((caddr_t)(a), (caddr_t)(b), (c)) +# define BCOPYOUT(a,b,c) bcopy((caddr_t)(a), (caddr_t)(b), (c)) +# define COPYIN(a,b,c) copy_from_user((caddr_t)(b), (caddr_t)(a), (c)) +# define COPYOUT(a,b,c) copy_to_user((caddr_t)(b), (caddr_t)(a), (c)) +# define FREE_MB_T(m) kfree_skb(m) +# define GETKTIME(x) do_gettimeofday((struct timeval *)x) +# define SLEEP(x,s) 0, interruptible_sleep_on(x##_linux) +# define WAKEUP(x,y) wake_up(x##_linux + y) +# define UIOMOVE(a,b,c,d) uiomove(a,b,c,d) +# define USE_MUTEXES +# define KRWLOCK_T rwlock_t +# define KMUTEX_T spinlock_t +# define MUTEX_INIT(x,y) spin_lock_init(&(x)->ipf_lk) +# define MUTEX_ENTER(x) spin_lock(&(x)->ipf_lk) +# define MUTEX_EXIT(x) spin_unlock(&(x)->ipf_lk) +# define MUTEX_DESTROY(x) do { } while (0) +# define MUTEX_NUKE(x) bzero(&(x)->ipf_lk, sizeof((x)->ipf_lk)) +# define READ_ENTER(x) ipf_read_enter(x) +# define WRITE_ENTER(x) ipf_write_enter(x) +# define RWLOCK_INIT(x,y) rwlock_init(&(x)->ipf_lk) +# define RW_DESTROY(x) do { } while (0) +# define RWLOCK_EXIT(x) ipf_rw_exit(x) +# define MUTEX_DOWNGRADE(x) ipf_rw_downgrade(x) +# define ATOMIC_INCL(x) MUTEX_ENTER(&ipf_rw); (x)++; \ + MUTEX_EXIT(&ipf_rw) +# define ATOMIC_DECL(x) MUTEX_ENTER(&ipf_rw); (x)--; \ + MUTEX_EXIT(&ipf_rw) +# define ATOMIC_INC64(x) MUTEX_ENTER(&ipf_rw); (x)++; \ + MUTEX_EXIT(&ipf_rw) +# define ATOMIC_INC32(x) MUTEX_ENTER(&ipf_rw); (x)++; \ + MUTEX_EXIT(&ipf_rw) +# define ATOMIC_INC16(x) MUTEX_ENTER(&ipf_rw); (x)++; \ + MUTEX_EXIT(&ipf_rw) +# define ATOMIC_DEC64(x) MUTEX_ENTER(&ipf_rw); (x)--; \ + MUTEX_EXIT(&ipf_rw) +# define ATOMIC_DEC32(x) MUTEX_ENTER(&ipf_rw); (x)--; \ + MUTEX_EXIT(&ipf_rw) +# define ATOMIC_DEC16(x) MUTEX_ENTER(&ipf_rw); (x)--; \ + MUTEX_EXIT(&ipf_rw) +# define SPL_IMP(x) do { } while (0) +# define SPL_NET(x) do { } while (0) +# define SPL_X(x) do { } while (0) +# define IFNAME(x) ((struct net_device*)x)->name +# define CACHE_HASH(x) ((IFNAME(fin->fin_ifp)[0] + \ + ((struct net_device *)fin->fin_ifp)->ifindex) & 7) +typedef struct sk_buff mb_t; +extern void m_copydata __P((mb_t *, int, int, caddr_t)); +extern void m_copyback __P((mb_t *, int, int, caddr_t)); +extern void m_adj __P((mb_t *, int)); +extern mb_t *m_pullup __P((mb_t *, int)); +# define mbuf sk_buff + +# define mtod(m, t) ((t)(m)->data) +# define m_len len +# define m_next next +# define M_DUPLICATE(m) skb_clone((m), in_interrupt() ? GFP_ATOMIC : \ + GFP_KERNEL) +# define MSGDSIZE(m) (m)->len +# define M_LEN(m) (m)->len + +# define splnet(x) ; +# define printf printk +# define bcopy(s,d,z) memmove(d, s, z) +# define bzero(s,z) memset(s, 0, z) +# define bcmp(a,b,z) memcmp(a, b, z) + +# define ifnet net_device +# define if_xname name +# define if_unit ifindex + +# define KMALLOC(x,t) (x) = (t)kmalloc(sizeof(*(x)), \ + in_interrupt() ? GFP_ATOMIC : GFP_KERNEL) +# define KFREE(x) kfree(x) +# define KMALLOCS(x,t,s) (x) = (t)kmalloc((s), \ + in_interrupt() ? GFP_ATOMIC : GFP_KERNEL) +# define KFREES(x,s) kfree(x) + +# define GETIFP(n,v) dev_get_by_name(n) + +# else +# include + +struct mbuf { +}; + +# ifndef _NET_ROUTE_H +struct rtentry { +}; +# endif + +struct ifnet { + char if_xname[IFNAMSIZ]; + int if_unit; + int (* if_output) __P((struct ifnet *, struct mbuf *, struct sockaddr *, struct rtentry *)); + struct ifaddr *if_addrlist; +}; +# define IFNAME(x) ((struct ifnet *)x)->if_xname + +# endif /* _KERNEL */ + +# define COPYIFNAME(x, b) \ + (void) strncpy(b, \ + ((struct ifnet *)x)->if_xname, \ + LIFNAMSIZ) + +# include +# define FWRITE FMODE_WRITE +# define FREAD FMODE_READ + +# define __USE_MISC 1 +# define __FAVOR_BSD 1 + +typedef struct uio { + struct iovec *uio_iov; + void *uio_file; + char *uio_buf; + int uio_iovcnt; + int uio_offset; + size_t uio_resid; + int uio_rw; +} uio_t; + +extern int uiomove __P((caddr_t, size_t, int, struct uio *)); + +# define UIO_READ 1 +# define UIO_WRITE 2 + +typedef u_long ioctlcmd_t; +typedef int minor_t; +typedef u_int32_t u_32_t; +# define U_32_T 1 + +# define OS_RECOGNISED 1 + +#endif + + +#ifndef OS_RECOGNISED +#error ip_compat.h does not recognise this platform/OS. +#endif + + +/* ----------------------------------------------------------------------- */ +/* G E N E R I C */ +/* ----------------------------------------------------------------------- */ +#ifndef OS_RECOGNISED +#endif + +/* + * For BSD kernels, if bpf is in the kernel, enable ipfilter to use bpf in + * filter rules. + */ +#if !defined(IPFILTER_BPF) && ((NBPF > 0) || (NBPFILTER > 0)) +# define IPFILTER_BPF +#endif + +/* + * Userland locking primitives + */ +typedef struct { + char *eMm_owner; + char *eMm_heldin; + u_int eMm_magic; + int eMm_held; + int eMm_heldat; +#ifdef __hpux + char eMm_fill[8]; +#endif +} eMmutex_t; + +typedef struct { + char *eMrw_owner; + char *eMrw_heldin; + u_int eMrw_magic; + short eMrw_read; + short eMrw_write; + int eMrw_heldat; +#ifdef __hpux + char eMm_fill[24]; +#endif +} eMrwlock_t; + +typedef union { +#ifdef KMUTEX_T + struct { + KMUTEX_T ipf_slk; + char *ipf_lname; + } ipf_lkun_s; +#endif + eMmutex_t ipf_emu; +} ipfmutex_t; + +typedef union { +#ifdef KRWLOCK_T + struct { + KRWLOCK_T ipf_slk; + char *ipf_lname; + int ipf_sr; + int ipf_sw; + u_int ipf_magic; + } ipf_lkun_s; +#endif + eMrwlock_t ipf_emu; +} ipfrwlock_t; + +#define ipf_lk ipf_lkun_s.ipf_slk +#define ipf_lname ipf_lkun_s.ipf_lname +#define ipf_isr ipf_lkun_s.ipf_sr +#define ipf_isw ipf_lkun_s.ipf_sw +#define ipf_magic ipf_lkun_s.ipf_magic + +#if !defined(__GNUC__) || \ + (defined(__FreeBSD_version) && (__FreeBSD_version >= 503000)) +# ifndef INLINE +# define INLINE +# endif +#else +# define INLINE __inline__ +#endif + +#if defined(linux) && defined(_KERNEL) +extern INLINE void ipf_read_enter __P((ipfrwlock_t *)); +extern INLINE void ipf_write_enter __P((ipfrwlock_t *)); +extern INLINE void ipf_rw_exit __P((ipfrwlock_t *)); +extern INLINE void ipf_rw_downgrade __P((ipfrwlock_t *)); +#endif + +/* + * In a non-kernel environment, there are a lot of macros that need to be + * filled in to be null-ops or to point to some compatibility function, + * somewhere in userland. + */ +#ifndef _KERNEL +typedef struct mb_s { + struct mb_s *mb_next; + int mb_len; + u_long mb_buf[2048]; +} mb_t; +# undef m_next +# define m_next mb_next +# define MSGDSIZE(x) (x)->mb_len /* XXX - from ipt.c */ +# define M_LEN(x) (x)->mb_len +# define M_DUPLICATE(x) (x) +# define GETKTIME(x) gettimeofday((struct timeval *)(x), NULL) +# define MTOD(m, t) ((t)(m)->mb_buf) +# define FREE_MB_T(x) +# define SLEEP(x,y) 1; +# define WAKEUP(x,y) ; +# define IPF_PANIC(x,y) ; +# define PANIC(x,y) ; +# define SPL_NET(x) ; +# define SPL_IMP(x) ; +# define SPL_X(x) ; +# define KMALLOC(a,b) (a) = (b)malloc(sizeof(*a)) +# define KMALLOCS(a,b,c) (a) = (b)malloc(c) +# define KFREE(x) free(x) +# define KFREES(x,s) free(x) +# define GETIFP(x, v) get_unit(x,v) +# define COPYIN(a,b,c) (bcopy((a), (b), (c)), 0) +# define COPYOUT(a,b,c) (bcopy((a), (b), (c)), 0) +# define BCOPYIN(a,b,c) (bcopy((a), (b), (c)), 0) +# define BCOPYOUT(a,b,c) (bcopy((a), (b), (c)), 0) +# define COPYDATA(m, o, l, b) bcopy(MTOD((mb_t *)m, char *) + (o), \ + (b), (l)) +# define COPYBACK(m, o, l, b) bcopy((b), \ + MTOD((mb_t *)m, char *) + (o), \ + (l)) +# define UIOMOVE(a,b,c,d) ipfuiomove(a,b,c,d) +extern void m_copydata __P((mb_t *, int, int, caddr_t)); +extern int ipfuiomove __P((caddr_t, int, int, struct uio *)); +# ifndef CACHE_HASH +# define CACHE_HASH(x) ((IFNAME(fin->fin_ifp)[0] + \ + ((struct ifnet *)fin->fin_ifp)->if_unit) & 7) +# endif + +# define MUTEX_DESTROY(x) eMmutex_destroy(&(x)->ipf_emu) +# define MUTEX_ENTER(x) eMmutex_enter(&(x)->ipf_emu, \ + __FILE__, __LINE__) +# define MUTEX_EXIT(x) eMmutex_exit(&(x)->ipf_emu) +# define MUTEX_INIT(x,y) eMmutex_init(&(x)->ipf_emu, y) +# define MUTEX_NUKE(x) bzero((x), sizeof(*(x))) + +# define MUTEX_DOWNGRADE(x) eMrwlock_downgrade(&(x)->ipf_emu, \ + __FILE__, __LINE__) +# define READ_ENTER(x) eMrwlock_read_enter(&(x)->ipf_emu, \ + __FILE__, __LINE__) +# define RWLOCK_INIT(x, y) eMrwlock_init(&(x)->ipf_emu, y) +# define RWLOCK_EXIT(x) eMrwlock_exit(&(x)->ipf_emu) +# define RW_DESTROY(x) eMrwlock_destroy(&(x)->ipf_emu) +# define WRITE_ENTER(x) eMrwlock_write_enter(&(x)->ipf_emu, \ + __FILE__, \ + __LINE__) + +# define USE_MUTEXES 1 + +extern void eMmutex_destroy __P((eMmutex_t *)); +extern void eMmutex_enter __P((eMmutex_t *, char *, int)); +extern void eMmutex_exit __P((eMmutex_t *)); +extern void eMmutex_init __P((eMmutex_t *, char *)); +extern void eMrwlock_destroy __P((eMrwlock_t *)); +extern void eMrwlock_exit __P((eMrwlock_t *)); +extern void eMrwlock_init __P((eMrwlock_t *, char *)); +extern void eMrwlock_read_enter __P((eMrwlock_t *, char *, int)); +extern void eMrwlock_write_enter __P((eMrwlock_t *, char *, int)); +extern void eMrwlock_downgrade __P((eMrwlock_t *, char *, int)); + +#endif + +#define MAX_IPV4HDR ((0xf << 2) + sizeof(struct icmp) + sizeof(ip_t) + 8) + +#ifndef IP_OFFMASK +# define IP_OFFMASK 0x1fff +#endif + + +/* + * On BSD's use quad_t as a guarantee for getting at least a 64bit sized + * object. + */ +#if BSD > 199306 +# define USE_QUAD_T +# define U_QUAD_T u_quad_t +# define QUAD_T quad_t +#else /* BSD > 199306 */ +# define U_QUAD_T u_long +# define QUAD_T long +#endif /* BSD > 199306 */ + + +#ifdef USE_INET6 +# if defined(__NetBSD__) || defined(__OpenBSD__) || defined(__FreeBSD__) || \ + defined(__osf__) || defined(linux) +# include +# include +# if !defined(linux) +# if defined(_KERNEL) && !defined(__osf__) +# include +# endif +# endif +typedef struct ip6_hdr ip6_t; +# endif +#endif + +#ifndef MAX +# define MAX(a,b) (((a) > (b)) ? (a) : (b)) +#endif + +#if defined(_KERNEL) +# ifdef MENTAT +# define COPYDATA mb_copydata +# define COPYBACK mb_copyback +# else +# define COPYDATA m_copydata +# define COPYBACK m_copyback # endif # if (BSD >= 199306) || defined(__FreeBSD__) # if (defined(__NetBSD_Version__) && (__NetBSD_Version__ < 105180000)) || \ @@ -636,80 +1438,330 @@ extern void m_copyback __P((struct mbuf *, int, int, caddr_t)); # include # else # include -extern vm_map_t kmem_map; +extern vm_map_t kmem_map; # endif # include # else /* !__FreeBSD__ || (__FreeBSD__ && __FreeBSD_version >= 300000) */ # include # endif /* !__FreeBSD__ || (__FreeBSD__ && __FreeBSD_version >= 300000) */ -# ifdef M_PFIL -# define KMALLOC(a, b) MALLOC((a), b, sizeof(*(a)), M_PFIL, M_NOWAIT) -# define KMALLOCS(a, b, c) MALLOC((a), b, (c), M_PFIL, M_NOWAIT) -# define KFREE(x) FREE((x), M_PFIL) -# define KFREES(x,s) FREE((x), M_PFIL) -# else -# define KMALLOC(a, b) MALLOC((a), b, sizeof(*(a)), M_TEMP, M_NOWAIT) -# define KMALLOCS(a, b, c) MALLOC((a), b, (c), M_TEMP, M_NOWAIT) -# define KFREE(x) FREE((x), M_TEMP) -# define KFREES(x,s) FREE((x), M_TEMP) -# endif /* M_PFIL */ + +# ifdef IPFILTER_M_IPFILTER +# include +MALLOC_DECLARE(M_IPFILTER); +# define _M_IPF M_IPFILTER +# else /* IPFILTER_M_IPFILTER */ +# ifdef M_PFIL +# define _M_IPF M_PFIL +# else +# ifdef M_IPFILTER +# define _M_IPF M_IPFILTER +# else +# define _M_IPF M_TEMP +# endif /* M_IPFILTER */ +# endif /* M_PFIL */ +# endif /* IPFILTER_M_IPFILTER */ +# define KMALLOC(a, b) MALLOC((a), b, sizeof(*(a)), _M_IPF, M_NOWAIT) +# define KMALLOCS(a, b, c) MALLOC((a), b, (c), _M_IPF, M_NOWAIT) +# define KFREE(x) FREE((x), _M_IPF) +# define KFREES(x,s) FREE((x), _M_IPF) # define UIOMOVE(a,b,c,d) uiomove(a,b,d) # define SLEEP(id, n) tsleep((id), PPAUSE|PCATCH, n, 0) -# define WAKEUP(id) wakeup(id) -# endif /* BSD */ -# if (defined(NetBSD) && (NetBSD <= 1991011) && (NetBSD >= 199407)) || \ - (defined(OpenBSD) && (OpenBSD >= 200006)) -# define SPL_NET(x) x = splsoftnet() -# define SPL_X(x) (void) splx(x) -# else -# if !SOLARIS && !defined(linux) +# define WAKEUP(id,x) wakeup(id+x) +# define GETIFP(n, v) ifunit(n) +# endif /* (Free)BSD */ + +# if !defined(USE_MUTEXES) && !defined(SPL_NET) +# if (defined(NetBSD) && (NetBSD <= 1991011) && (NetBSD >= 199407)) || \ + (defined(OpenBSD) && (OpenBSD >= 200006)) +# define SPL_NET(x) x = splsoftnet() +# else # define SPL_IMP(x) x = splimp() # define SPL_NET(x) x = splnet() -# define SPL_X(x) (void) splx(x) -# endif -# endif /* NetBSD && (NetBSD <= 1991011) && (NetBSD >= 199407) */ +# endif /* NetBSD && (NetBSD <= 1991011) && (NetBSD >= 199407) */ +# define SPL_X(x) (void) splx(x) +# endif /* !USE_MUTEXES */ + +# ifndef FREE_MB_T +# define FREE_MB_T(m) m_freem(m) +# endif + +# ifndef MTOD +# define MTOD(m,t) mtod(m,t) +# endif + +# ifndef COPYIN +# define COPYIN(a,b,c) (bcopy((caddr_t)(a), (caddr_t)(b), (c)), 0) +# define COPYOUT(a,b,c) (bcopy((caddr_t)(a), (caddr_t)(b), (c)), 0) +# define BCOPYIN(a,b,c) (bcopy((caddr_t)(a), (caddr_t)(b), (c)), 0) +# define BCOPYOUT(a,b,c) (bcopy((caddr_t)(a), (caddr_t)(b), (c)), 0) +# endif + +# ifndef KMALLOC +# define KMALLOC(a,b) (a) = (b)new_kmem_alloc(sizeof(*(a)), \ + KMEM_NOSLEEP) +# define KMALLOCS(a,b,c) (a) = (b)new_kmem_alloc((c), KMEM_NOSLEEP) +# endif + +# ifndef GET_MINOR +# define GET_MINOR(x) minor(x) +# endif # define PANIC(x,y) if (x) panic y -#else /* KERNEL */ -# define SLEEP(x,y) 1 -# define WAKEUP(x) ; -# define PANIC(x,y) ; -# define ATOMIC_INC(x) (x)++ -# define ATOMIC_DEC(x) (x)-- -# define MUTEX_ENTER(x) ; -# define READ_ENTER(x) ; -# define MUTEX_INIT(x,y,z) ; -# define MUTEX_DESTROY(x) ; -# define WRITE_ENTER(x) ; -# define RW_UPGRADE(x) ; +#endif /* _KERNEL */ + +#ifndef IFNAME +# define IFNAME(x) ((struct ifnet *)x)->if_name +#endif +#ifndef COPYIFNAME +# define NEED_FRGETIFNAME +extern char *fr_getifname __P((struct ifnet *, char *)); +# define COPYIFNAME(x, b) \ + fr_getifname((struct ifnet *)x, b) +#endif + +#ifndef ASSERT +# define ASSERT(x) +#endif + +/* + * Because the ctype(3) posix definition, if used "safely" in code everywhere, + * would mean all normal code that walks through strings needed casts. Yuck. + */ +#define ISALNUM(x) isalnum((u_char)(x)) +#define ISALPHA(x) isalpha((u_char)(x)) +#define ISASCII(x) isascii((u_char)(x)) +#define ISDIGIT(x) isdigit((u_char)(x)) +#define ISPRINT(x) isprint((u_char)(x)) +#define ISSPACE(x) isspace((u_char)(x)) +#define ISUPPER(x) isupper((u_char)(x)) +#define ISXDIGIT(x) isxdigit((u_char)(x)) +#define ISLOWER(x) islower((u_char)(x)) +#define TOUPPER(x) toupper((u_char)(x)) +#define TOLOWER(x) tolower((u_char)(x)) + +/* + * If mutexes aren't being used, turn all the mutex functions into null-ops. + */ +#if !defined(USE_MUTEXES) +# define USE_SPL 1 +# undef RW_DESTROY +# undef MUTEX_INIT +# undef MUTEX_NUKE +# undef MUTEX_DESTROY +# define MUTEX_ENTER(x) ; +# define READ_ENTER(x) ; +# define WRITE_ENTER(x) ; # define MUTEX_DOWNGRADE(x) ; -# define RWLOCK_EXIT(x) ; -# define MUTEX_EXIT(x) ; -# define SPL_NET(x) ; -# define SPL_IMP(x) ; -# undef SPL_X -# define SPL_X(x) ; -# define KMALLOC(a,b) (a) = (b)malloc(sizeof(*a)) -# define KMALLOCS(a,b,c) (a) = (b)malloc(c) -# define KFREE(x) free(x) -# define KFREES(x,s) free(x) -# define FREE_MB_T(x) ; -# define GETUNIT(x, v) get_unit(x,v) -# define IRCOPY(a,b,c) (bcopy((a), (b), (c)), 0) -# define IWCOPY(a,b,c) (bcopy((a), (b), (c)), 0) -# define IRCOPYPTR ircopyptr -# define IWCOPYPTR iwcopyptr -# define IFNAME(x) get_ifname((struct ifnet *)x) -# define UIOMOVE(a,b,c,d) ipfuiomove(a,b,c,d) -# include -extern void m_copydata __P((mb_t *, int, int, caddr_t)); -extern int ipfuiomove __P((caddr_t, int, int, struct uio *)); -#endif /* KERNEL */ +# define RWLOCK_INIT(x, y) ; +# define RWLOCK_EXIT(x) ; +# define RW_DESTROY(x) ; +# define MUTEX_EXIT(x) ; +# define MUTEX_INIT(x,y) ; +# define MUTEX_DESTROY(x) ; +# define MUTEX_NUKE(x) ; +#endif /* !USE_MUTEXES */ +#ifndef ATOMIC_INC +# define ATOMIC_INC(x) (x)++ +# define ATOMIC_DEC(x) (x)-- +#endif + +/* + * If there are no atomic operations for bit sizes defined, define them to all + * use a generic one that works for all sizes. + */ +#ifndef ATOMIC_INCL +# define ATOMIC_INCL ATOMIC_INC +# define ATOMIC_INC64 ATOMIC_INC +# define ATOMIC_INC32 ATOMIC_INC +# define ATOMIC_INC16 ATOMIC_INC +# define ATOMIC_DECL ATOMIC_DEC +# define ATOMIC_DEC64 ATOMIC_DEC +# define ATOMIC_DEC32 ATOMIC_DEC +# define ATOMIC_DEC16 ATOMIC_DEC +#endif + +#ifndef HDR_T_PRIVATE +typedef struct tcphdr tcphdr_t; +typedef struct udphdr udphdr_t; +#endif +typedef struct icmp icmphdr_t; +typedef struct ip ip_t; +typedef struct ether_header ether_header_t; +typedef struct tcpiphdr tcpiphdr_t; + +#ifndef FR_GROUPLEN +# define FR_GROUPLEN 16 +#endif + +#ifdef offsetof +# undef offsetof +#endif +#ifndef offsetof +# define offsetof(t,m) (int)((&((t *)0L)->m)) +#endif + +/* + * This set of macros has been brought about because on Tru64 it is not + * possible to easily assign or examine values in a structure that are + * bit fields. + */ +#ifndef IP_V +# define IP_V(x) (x)->ip_v +#endif +#ifndef IP_V_A +# define IP_V_A(x,y) (x)->ip_v = (y) +#endif +#ifndef IP_HL +# define IP_HL(x) (x)->ip_hl +#endif +#ifndef IP_HL_A +# define IP_HL_A(x,y) (x)->ip_hl = (y) +#endif +#ifndef TCP_X2 +# define TCP_X2(x) (x)->th_x2 +#endif +#ifndef TCP_X2_A +# define TCP_X2_A(x,y) (x)->th_x2 = (y) +#endif +#ifndef TCP_OFF +# define TCP_OFF(x) (x)->th_off +#endif +#ifndef TCP_OFF_A +# define TCP_OFF_A(x,y) (x)->th_off = (y) +#endif +#define IPMINLEN(i, h) ((i)->ip_len >= (IP_HL(i) * 4 + sizeof(struct h))) + + +/* + * XXX - This is one of those *awful* hacks which nobody likes + */ +#ifdef ultrix +#define A_A +#else +#define A_A & +#endif + +#define TCPF_ALL (TH_FIN|TH_SYN|TH_RST|TH_PUSH|TH_ACK|TH_URG|\ + TH_ECN|TH_CWR) + +#if (BSD >= 199306) && !defined(m_act) +# define m_act m_nextpkt +#endif + +/* + * Security Options for Intenet Protocol (IPSO) as defined in RFC 1108. + * + * Basic Option + * + * 00000001 - (Reserved 4) + * 00111101 - Top Secret + * 01011010 - Secret + * 10010110 - Confidential + * 01100110 - (Reserved 3) + * 11001100 - (Reserved 2) + * 10101011 - Unclassified + * 11110001 - (Reserved 1) + */ +#define IPSO_CLASS_RES4 0x01 +#define IPSO_CLASS_TOPS 0x3d +#define IPSO_CLASS_SECR 0x5a +#define IPSO_CLASS_CONF 0x96 +#define IPSO_CLASS_RES3 0x66 +#define IPSO_CLASS_RES2 0xcc +#define IPSO_CLASS_UNCL 0xab +#define IPSO_CLASS_RES1 0xf1 + +#define IPSO_AUTH_GENSER 0x80 +#define IPSO_AUTH_ESI 0x40 +#define IPSO_AUTH_SCI 0x20 +#define IPSO_AUTH_NSA 0x10 +#define IPSO_AUTH_DOE 0x08 +#define IPSO_AUTH_UN 0x06 +#define IPSO_AUTH_FTE 0x01 /* - * These #ifdef's are here mainly for linux, but who knows, they may - * not be in other places or maybe one day linux will grow up and some - * of these will turn up there too. + * IP option #defines */ +#undef IPOPT_RR +#define IPOPT_RR 7 +#undef IPOPT_ZSU +#define IPOPT_ZSU 10 /* ZSU */ +#undef IPOPT_MTUP +#define IPOPT_MTUP 11 /* MTUP */ +#undef IPOPT_MTUR +#define IPOPT_MTUR 12 /* MTUR */ +#undef IPOPT_ENCODE +#define IPOPT_ENCODE 15 /* ENCODE */ +#undef IPOPT_TS +#define IPOPT_TS 68 +#undef IPOPT_TR +#define IPOPT_TR 82 /* TR */ +#undef IPOPT_SECURITY +#define IPOPT_SECURITY 130 +#undef IPOPT_LSRR +#define IPOPT_LSRR 131 +#undef IPOPT_E_SEC +#define IPOPT_E_SEC 133 /* E-SEC */ +#undef IPOPT_CIPSO +#define IPOPT_CIPSO 134 /* CIPSO */ +#undef IPOPT_SATID +#define IPOPT_SATID 136 +#ifndef IPOPT_SID +# define IPOPT_SID IPOPT_SATID +#endif +#undef IPOPT_SSRR +#define IPOPT_SSRR 137 +#undef IPOPT_ADDEXT +#define IPOPT_ADDEXT 147 /* ADDEXT */ +#undef IPOPT_VISA +#define IPOPT_VISA 142 /* VISA */ +#undef IPOPT_IMITD +#define IPOPT_IMITD 144 /* IMITD */ +#undef IPOPT_EIP +#define IPOPT_EIP 145 /* EIP */ +#undef IPOPT_RTRALRT +#define IPOPT_RTRALRT 148 /* RTRALRT */ +#undef IPOPT_SDB +#define IPOPT_SDB 149 +#undef IPOPT_NSAPA +#define IPOPT_NSAPA 150 +#undef IPOPT_DPS +#define IPOPT_DPS 151 +#undef IPOPT_UMP +#define IPOPT_UMP 152 +#undef IPOPT_FINN +#define IPOPT_FINN 205 /* FINN */ + +#ifndef TCPOPT_EOL +# define TCPOPT_EOL 0 +#endif +#ifndef TCPOPT_NOP +# define TCPOPT_NOP 1 +#endif +#ifndef TCPOPT_MAXSEG +# define TCPOPT_MAXSEG 2 +#endif +#ifndef TCPOLEN_MAXSEG +# define TCPOLEN_MAXSEG 4 +#endif +#ifndef TCPOPT_WINDOW +# define TCPOPT_WINDOW 3 +#endif +#ifndef TCPOLEN_WINDOW +# define TCPOLEN_WINDOW 3 +#endif +#ifndef TCPOPT_SACK_PERMITTED +# define TCPOPT_SACK_PERMITTED 4 +#endif +#ifndef TCPOLEN_SACK_PERMITTED +# define TCPOLEN_SACK_PERMITTED 2 +#endif +#ifndef TCPOPT_SACK +# define TCPOPT_SACK 5 +#endif +#ifndef TCPOPT_TIMESTAMP +# define TCPOPT_TIMESTAMP 8 +#endif + #ifndef ICMP_MINLEN # define ICMP_MINLEN 8 #endif @@ -761,6 +1813,9 @@ extern int ipfuiomove __P((caddr_t, int, int, struct uio *)); #ifndef ICMP_UNREACH_ADMIN_PROHIBIT # define ICMP_UNREACH_ADMIN_PROHIBIT 13 #endif +#ifndef ICMP_UNREACH_FILTER +# define ICMP_UNREACH_FILTER 13 +#endif #ifndef ICMP_UNREACH_HOST_PRECEDENCE # define ICMP_UNREACH_HOST_PRECEDENCE 14 #endif @@ -899,6 +1954,9 @@ extern int ipfuiomove __P((caddr_t, int, int, struct uio *)); #ifndef TH_URG # define TH_URG 0x20 #endif +#undef TH_ACKMASK +#define TH_ACKMASK (TH_FIN|TH_SYN|TH_RST|TH_ACK) + #ifndef IPOPT_EOL # define IPOPT_EOL 0 #endif @@ -947,313 +2005,48 @@ extern int ipfuiomove __P((caddr_t, int, int, struct uio *)); #ifndef IPOPT_OLEN # define IPOPT_OLEN 1 #endif +#ifndef IPPROTO_HOPOPTS +# define IPPROTO_HOPOPTS 0 +#endif +#ifndef IPPROTO_ENCAP +# define IPPROTO_ENCAP 4 +#endif +#ifndef IPPROTO_IPV6 +# define IPPROTO_IPV6 41 +#endif +#ifndef IPPROTO_ROUTING +# define IPPROTO_ROUTING 43 +#endif +#ifndef IPPROTO_FRAGMENT +# define IPPROTO_FRAGMENT 44 +#endif #ifndef IPPROTO_GRE # define IPPROTO_GRE 47 /* GRE encaps RFC 1701 */ #endif #ifndef IPPROTO_ESP # define IPPROTO_ESP 50 #endif +#ifndef IPPROTO_AH +# define IPPROTO_AH 51 +#endif #ifndef IPPROTO_ICMPV6 # define IPPROTO_ICMPV6 58 #endif - -#ifdef linux -#include -/* - * TCP States - */ -#define TCPS_CLOSED 0 /* closed */ -#define TCPS_LISTEN 1 /* listening for connection */ -#define TCPS_SYN_SENT 2 /* active, have sent syn */ -#define TCPS_SYN_RECEIVED 3 /* have send and received syn */ -/* states < TCPS_ESTABLISHED are those where connections not established */ -#define TCPS_ESTABLISHED 4 /* established */ -#define TCPS_CLOSE_WAIT 5 /* rcvd fin, waiting for close */ -/* states > TCPS_CLOSE_WAIT are those where user has closed */ -#define TCPS_FIN_WAIT_1 6 /* have closed, sent fin */ -#define TCPS_CLOSING 7 /* closed xchd FIN; await FIN ACK */ -#define TCPS_LAST_ACK 8 /* had fin and close; await FIN ACK */ -/* states > TCPS_CLOSE_WAIT && < TCPS_FIN_WAIT_2 await ACK of FIN */ -#define TCPS_FIN_WAIT_2 9 /* have closed, fin is acked */ -#define TCPS_TIME_WAIT 10 /* in 2*msl quiet wait after close */ - -/* - * file flags. - */ -#ifdef WRITE -#define FWRITE WRITE -#define FREAD READ -#else -#define FWRITE _IOC_WRITE -#define FREAD _IOC_READ -#endif -/* - * mbuf related problems. - */ -#define mtod(m,t) (t)((m)->data) -#define m_len len -#define m_next next - -#ifdef IP_DF -#undef IP_DF -#endif -#define IP_DF 0x4000 - -typedef struct { - __u16 th_sport; - __u16 th_dport; - __u32 th_seq; - __u32 th_ack; -# if defined(__i386__) || defined(__MIPSEL__) || defined(__alpha__) ||\ - defined(__vax__) - __u8 th_res:4; - __u8 th_off:4; -#else - __u8 th_off:4; - __u8 th_res:4; -#endif - __u8 th_flags; - __u16 th_win; - __u16 th_sum; - __u16 th_urp; -} tcphdr_t; - -typedef struct { - __u16 uh_sport; - __u16 uh_dport; - __u16 uh_ulen; - __u16 uh_sum; -} udphdr_t; - -typedef struct { -# if defined(__i386__) || defined(__MIPSEL__) || defined(__alpha__) ||\ - defined(__vax__) - __u8 ip_hl:4; - __u8 ip_v:4; -# else - __u8 ip_v:4; - __u8 ip_hl:4; -# endif - __u8 ip_tos; - __u16 ip_len; - __u16 ip_id; - __u16 ip_off; - __u8 ip_ttl; - __u8 ip_p; - __u16 ip_sum; - struct in_addr ip_src; - struct in_addr ip_dst; -} ip_t; - -/* - * Structure of an icmp header. - */ -typedef struct icmp { - __u8 icmp_type; /* type of message, see below */ - __u8 icmp_code; /* type sub code */ - __u16 icmp_cksum; /* ones complement cksum of struct */ - union { - __u8 ih_pptr; /* ICMP_PARAMPROB */ - struct in_addr ih_gwaddr; /* ICMP_REDIRECT */ - struct ih_idseq { - __u16 icd_id; - __u16 icd_seq; - } ih_idseq; - int ih_void; - } icmp_hun; -# define icmp_pptr icmp_hun.ih_pptr -# define icmp_gwaddr icmp_hun.ih_gwaddr -# define icmp_id icmp_hun.ih_idseq.icd_id -# define icmp_seq icmp_hun.ih_idseq.icd_seq -# define icmp_void icmp_hun.ih_void - union { - struct id_ts { - n_time its_otime; - n_time its_rtime; - n_time its_ttime; - } id_ts; - struct id_ip { - ip_t idi_ip; - /* options and then 64 bits of data */ - } id_ip; - u_long id_mask; - char id_data[1]; - } icmp_dun; -# define icmp_otime icmp_dun.id_ts.its_otime -# define icmp_rtime icmp_dun.id_ts.its_rtime -# define icmp_ttime icmp_dun.id_ts.its_ttime -# define icmp_ip icmp_dun.id_ip.idi_ip -# define icmp_mask icmp_dun.id_mask -# define icmp_data icmp_dun.id_data -} icmphdr_t; - -# ifndef LINUX_IPOVLY -# define LINUX_IPOVLY -struct ipovly { - caddr_t ih_next, ih_prev; /* for protocol sequence q's */ - u_char ih_x1; /* (unused) */ - u_char ih_pr; /* protocol */ - short ih_len; /* protocol length */ - struct in_addr ih_src; /* source internet address */ - struct in_addr ih_dst; /* destination internet address */ -}; -# endif - -typedef struct { - __u8 ether_dhost[6]; - __u8 ether_shost[6]; - __u16 ether_type; -} ether_header_t; - -typedef struct uio { - int uio_resid; - int uio_rw; - caddr_t uio_buf; -} uio_t; - -# define UIO_READ 0 -# define UIO_WRITE 1 -# define UIOMOVE(a, b, c, d) uiomove(a,b,c,d) - -/* - * For masking struct ifnet onto struct device - */ -# define if_name name - -# ifdef KERNEL -# define GETUNIT(x, v) dev_get(x) -# define FREE_MB_T(m) kfree_skb(m, FREE_WRITE) -# define uniqtime do_gettimeofday -# undef INT_MAX -# undef UINT_MAX -# undef LONG_MAX -# undef ULONG_MAX -# include -# define SPL_X(x) -# define SPL_NET(x) -# define SPL_IMP(x) - -# define bcmp(a,b,c) memcmp(a,b,c) -# define bcopy(a,b,c) memcpy(b,a,c) -# define bzero(a,c) memset(a,0,c) - -# define UNITNAME(n) dev_get((n)) - -# define KMALLOC(a,b) (a) = (b)kmalloc(sizeof(*(a)), GFP_ATOMIC) -# define KMALLOCS(a,b,c) (a) = (b)kmalloc((c), GFP_ATOMIC) -# define KFREE(x) kfree_s((x), sizeof(*(x))) -# define KFREES(x,s) kfree_s((x), (s)) -#define IRCOPY(const void *a, void *b, size_t c) { \ - int error; \ - - error = verify_area(VERIFY_READ, a ,c); \ - if (!error) \ - memcpy_fromfs(b, a, c); \ - return error; \ -} -static inline int IWCOPY(const void *a, void *b, size_t c) -{ - int error; - - error = verify_area(VERIFY_WRITE, b, c); - if (!error) - memcpy_tofs(b, a, c); - return error; -} -static inline int IRCOPYPTR(const void *a, void *b, size_t c) { - caddr_t ca; - int error; - - error = verify_area(VERIFY_READ, a ,sizeof(ca)); - if (!error) { - memcpy_fromfs(ca, a, sizeof(ca)); - error = verify_area(VERIFY_READ, ca , c); - if (!error) - memcpy_fromfs(b, ca, c); - } - return error; -} -static inline int IWCOPYPTR(const void *a, void *b, size_t c) { - caddr_t ca; - int error; - - - error = verify_area(VERIFY_READ, b ,sizeof(ca)); - if (!error) { - memcpy_fromfs(ca, b, sizeof(ca)); - error = verify_area(VERIFY_WRITE, ca, c); - if (!error) - memcpy_tofs(ca, a, c); - } - return error; -} -# else -# define __KERNEL__ -# undef INT_MAX -# undef UINT_MAX -# undef LONG_MAX -# undef ULONG_MAX -# define s8 __s8 -# define u8 __u8 -# define s16 __s16 -# define u16 __u16 -# define s32 __s32 -# define u32 __u32 -# include -# undef __KERNEL__ -# endif -# define ifnet device -#else -typedef struct tcphdr tcphdr_t; -typedef struct udphdr udphdr_t; -typedef struct icmp icmphdr_t; -typedef struct ip ip_t; -typedef struct ether_header ether_header_t; -#endif /* linux */ -typedef struct tcpiphdr tcpiphdr_t; - -#if defined(hpux) || defined(linux) -struct ether_addr { - char ether_addr_octet[6]; -}; +#ifndef IPPROTO_NONE +# define IPPROTO_NONE 59 #endif - -/* - * XXX - This is one of those *awful* hacks which nobody likes - */ -#ifdef ultrix -#define A_A -#else -#define A_A & +#ifndef IPPROTO_DSTOPTS +# define IPPROTO_DSTOPTS 60 #endif - -#if (BSD >= 199306) && !defined(m_act) -# define m_act m_nextpkt +#ifndef IPPROTO_FRAGMENT +# define IPPROTO_FRAGMENT 44 #endif - #ifndef ICMP_ROUTERADVERT # define ICMP_ROUTERADVERT 9 #endif #ifndef ICMP_ROUTERSOLICIT # define ICMP_ROUTERSOLICIT 10 #endif -#undef ICMP_MAX_UNREACH -#define ICMP_MAX_UNREACH 14 -#undef ICMP_MAXTYPE -#define ICMP_MAXTYPE 18 -/* - * ICMP error replies have an IP header (20 bytes), 8 bytes of ICMP data, - * another IP header and then 64 bits of data, totalling 56. Of course, - * the last 64 bits is dependant on that being available. - */ -#define ICMPERR_ICMPHLEN 8 -#define ICMPERR_IPICMPHLEN (20 + 8) -#define ICMPERR_MINPKTLEN (20 + 8 + 20) -#define ICMPERR_MAXPKTLEN (20 + 8 + 20 + 8) -#define ICMP6_MINLEN 8 -#define ICMP6ERR_IPICMPHLEN (40 + 8) -#define ICMP6ERR_MINPKTLEN (40 + 8 + 40) - #ifndef ICMP6_DST_UNREACH # define ICMP6_DST_UNREACH 1 #endif @@ -1425,6 +2218,79 @@ struct ether_addr { #endif #define TH_ECNALL (TH_ECN|TH_CWR) -#define TCPF_ALL (TH_FIN|TH_SYN|TH_RST|TH_PUSH|TH_ACK|TH_URG|TH_ECN|TH_CWR) +/* + * TCP States + */ +#define IPF_TCPS_CLOSED 0 /* closed */ +#define IPF_TCPS_LISTEN 1 /* listening for connection */ +#define IPF_TCPS_SYN_SENT 2 /* active, have sent syn */ +#define IPF_TCPS_SYN_RECEIVED 3 /* have send and received syn */ +#define IPF_TCPS_HALF_ESTAB 4 /* for connections not fully "up" */ +/* states < IPF_TCPS_ESTABLISHED are those where connections not established */ +#define IPF_TCPS_ESTABLISHED 5 /* established */ +#define IPF_TCPS_CLOSE_WAIT 6 /* rcvd fin, waiting for close */ +/* states > IPF_TCPS_CLOSE_WAIT are those where user has closed */ +#define IPF_TCPS_FIN_WAIT_1 7 /* have closed, sent fin */ +#define IPF_TCPS_CLOSING 8 /* closed xchd FIN; await FIN ACK */ +#define IPF_TCPS_LAST_ACK 9 /* had fin and close; await FIN ACK */ +/* states > IPF_TCPS_CLOSE_WAIT && < IPF_TCPS_FIN_WAIT_2 await ACK of FIN */ +#define IPF_TCPS_FIN_WAIT_2 10 /* have closed, fin is acked */ +#define IPF_TCPS_TIME_WAIT 11 /* in 2*msl quiet wait after close */ +#define IPF_TCP_NSTATES 12 + +#define TCP_MSL 120 + +#undef ICMP_MAX_UNREACH +#define ICMP_MAX_UNREACH 14 +#undef ICMP_MAXTYPE +#define ICMP_MAXTYPE 18 + +#ifndef IFNAMSIZ +#define IFNAMSIZ 16 +#endif + +#ifndef LOG_FTP +# define LOG_FTP (11<<3) +#endif +#ifndef LOG_AUTHPRIV +# define LOG_AUTHPRIV (10<<3) +#endif +#ifndef LOG_AUDIT +# define LOG_AUDIT (13<<3) +#endif +#ifndef LOG_NTP +# define LOG_NTP (12<<3) +#endif +#ifndef LOG_SECURITY +# define LOG_SECURITY (13<<3) +#endif +#ifndef LOG_LFMT +# define LOG_LFMT (14<<3) +#endif +#ifndef LOG_CONSOLE +# define LOG_CONSOLE (14<<3) +#endif + +/* + * ICMP error replies have an IP header (20 bytes), 8 bytes of ICMP data, + * another IP header and then 64 bits of data, totalling 56. Of course, + * the last 64 bits is dependant on that being available. + */ +#define ICMPERR_ICMPHLEN 8 +#define ICMPERR_IPICMPHLEN (20 + 8) +#define ICMPERR_MINPKTLEN (20 + 8 + 20) +#define ICMPERR_MAXPKTLEN (20 + 8 + 20 + 8) +#define ICMP6ERR_MINPKTLEN (40 + 8) +#define ICMP6ERR_IPICMPHLEN (40 + 8 + 40) + +#ifndef MIN +# define MIN(a,b) (((a)<(b))?(a):(b)) +#endif + +#ifdef IPF_DEBUG +# define DPRINT(x) printf x +#else +# define DPRINT(x) +#endif #endif /* __IP_COMPAT_H__ */ diff --git a/sys/contrib/ipfilter/netinet/ip_fil.c b/sys/contrib/ipfilter/netinet/ip_fil.c deleted file mode 100644 index f974400..0000000 --- a/sys/contrib/ipfilter/netinet/ip_fil.c +++ /dev/null @@ -1,2358 +0,0 @@ -/* - * Copyright (C) 1993-2001 by Darren Reed. - * - * See the IPFILTER.LICENCE file for details on licencing. - */ - -#ifndef SOLARIS -#define SOLARIS (defined(sun) && (defined(__svr4__) || defined(__SVR4))) -#endif - -#if defined(KERNEL) && !defined(_KERNEL) -# define _KERNEL -#endif -#if defined(_KERNEL) && defined(__FreeBSD_version) && \ - (__FreeBSD_version >= 400000) && !defined(KLD_MODULE) -#include "opt_inet6.h" -#endif -#include -#if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \ - defined(_KERNEL) && !defined(_LKM) -# include "opt_ipfilter_log.h" -#endif -#if defined(__FreeBSD__) && !defined(__FreeBSD_version) -# if !defined(_KERNEL) || defined(IPFILTER_LKM) -# include -# endif -#endif -#if defined(__sgi) && (IRIX > 602) -# define _KMEMUSER -# include -#endif -#ifndef _KERNEL -# include -# include -# include -# include -# include -#endif -#include -#include -#include -#if __FreeBSD_version >= 220000 && defined(_KERNEL) -# include -# include -#else -# include -#endif -#include -#ifdef _KERNEL -# include -#endif -#if !SOLARIS -# if (NetBSD > 199609) || (OpenBSD > 199603) || (__FreeBSD_version >= 300000) -# include -# else -# include -# endif -# include -#else -# include -#endif -#include -#include - -#include -#ifdef sun -# include -#endif -#if __FreeBSD_version >= 300000 -# include -# if defined(_KERNEL) && !defined(IPFILTER_LKM) -# include "opt_ipfilter.h" -# endif -# if defined(_KERNEL) && (__FreeBSD_version >= 501108) && \ - (__FreeBSD_version < 600001) && !defined(KLD_MODULE) -# include "opt_pfil_hooks.h" -# endif -#endif -#ifdef __sgi -#include -# ifdef IFF_DRVRLOCK /* IRIX6 */ -#include -# endif -#endif -#include -#include -#if !(defined(__sgi) && !defined(IFF_DRVRLOCK)) /* IRIX < 6 */ -# include -#endif -#include -#include -#include -#include -#include -#include -#include -#ifndef _KERNEL -# include -# include -#endif -#include "netinet/ip_compat.h" -#ifdef USE_INET6 -# include -# if !SOLARIS -# include -# include -# endif -#endif -#include "netinet/ip_fil.h" -#include "netinet/ip_nat.h" -#include "netinet/ip_frag.h" -#include "netinet/ip_state.h" -#include "netinet/ip_proxy.h" -#include "netinet/ip_auth.h" -#if defined(__FreeBSD_version) && (__FreeBSD_version >= 300000) -# include -#endif -#ifndef MIN -# define MIN(a,b) (((a)<(b))?(a):(b)) -#endif -#if !SOLARIS && defined(_KERNEL) && !defined(__sgi) -# include -extern int ip_optcopy __P((struct ip *, struct ip *)); -#endif -#if defined(OpenBSD) && (OpenBSD >= 200211) && defined(_KERNEL) -extern int ip6_getpmtu(struct route_in6 *, struct route_in6 *, - struct ifnet *, struct in6_addr *, u_long *); -#endif - -#include - -#if !defined(lint) -static const char sccsid[] = "@(#)ip_fil.c 2.41 6/5/96 (C) 1993-2000 Darren Reed"; -/* static const char rcsid[] = "@(#)$Id: ip_fil.c,v 2.42.2.34 2001/07/23 13:49:57 darrenr Exp $"; */ -static const char rcsid[] = "@(#)$FreeBSD$"; -#endif - -extern struct protosw inetsw[]; - -#ifndef _KERNEL -# include "ipt.h" -static struct ifnet **ifneta = NULL; -static int nifs = 0; -#else -# if (BSD < 199306) || defined(__sgi) -extern int tcp_ttl; -# endif -#endif - -#ifdef ICMP_UNREACH_FILTER_PROHIB -int ipl_unreach = ICMP_UNREACH_FILTER_PROHIB; -#else -int ipl_unreach = ICMP_UNREACH_FILTER; -#endif -u_long ipl_frouteok[2] = {0, 0}; - -static int frzerostats __P((caddr_t)); -#if defined(__NetBSD__) || defined(__OpenBSD__) || (__FreeBSD_version >= 300003) -static int frrequest __P((int, u_long, caddr_t, int)); -#else -static int frrequest __P((int, int, caddr_t, int)); -#endif -#ifdef _KERNEL -static int (*fr_savep) __P((ip_t *, int, void *, int, struct mbuf **)); -static int send_ip __P((ip_t *, fr_info_t *, struct mbuf **)); -# ifdef USE_INET6 -static int ipfr_fastroute6 __P((struct mbuf *, struct mbuf **, - fr_info_t *, frdest_t *)); -# endif -# ifdef __sgi -extern int tcp_mtudisc; -# endif -# ifdef USE_MUTEX -extern kmutex_t ipf_rw; -extern KRWLOCK_T ipf_mutex; -# endif -#else -void init_ifp __P((void)); -# if defined(__sgi) && (IRIX < 605) -static int no_output __P((struct ifnet *, struct mbuf *, - struct sockaddr *)); -static int write_output __P((struct ifnet *, struct mbuf *, - struct sockaddr *)); -# else -static int no_output __P((struct ifnet *, struct mbuf *, - struct sockaddr *, struct rtentry *)); -static int write_output __P((struct ifnet *, struct mbuf *, - struct sockaddr *, struct rtentry *)); -# endif -#endif -int fr_running = 0; - -#if (__FreeBSD_version >= 300000) && defined(_KERNEL) -struct callout_handle ipfr_slowtimer_ch; -#endif -#if defined(__NetBSD__) && (__NetBSD_Version__ >= 104230000) -# include -struct callout ipfr_slowtimer_ch; -#endif -#if defined(__OpenBSD__) -# include -struct timeout ipfr_slowtimer_ch; -#endif -#if defined(__sgi) && defined(_KERNEL) -toid_t ipfr_slowtimer_ch; -#endif - -#if defined(__NetBSD__) && (__NetBSD_Version__ >= 106080000) && \ - defined(_KERNEL) -# include -const struct cdevsw ipl_cdevsw = { - iplopen, iplclose, iplread, nowrite, iplioctl, - nostop, notty, nopoll, nommap, -}; -#endif - -#if (_BSDI_VERSION >= 199510) && defined(_KERNEL) -# include -# include - -struct cfdriver iplcd = { - NULL, "ipl", NULL, NULL, DV_DULL, 0 -}; - -struct devsw iplsw = { - &iplcd, - iplopen, iplclose, iplread, nowrite, iplioctl, noselect, nommap, - nostrat, nodump, nopsize, 0, - nostop -}; -#endif /* _BSDI_VERSION >= 199510 && _KERNEL */ - -#if defined(__NetBSD__) || defined(__OpenBSD__) || \ - (_BSDI_VERSION >= 199701) || \ - ((__FreeBSD_version >= 500011) && defined(_KERNEL)) -# include -# if defined(NETBSD_PF) -# include -/* - * We provide the fr_checkp name just to minimize changes later. - */ -int (*fr_checkp) __P((ip_t *ip, int hlen, void *ifp, int out, mb_t **mp)); -# endif /* NETBSD_PF */ -#endif /* __NetBSD__ */ - - -#if defined(__NetBSD_Version__) && (__NetBSD_Version__ >= 105110000) && \ - defined(_KERNEL) -# include - -static int fr_check_wrapper(void *, struct mbuf **, struct ifnet *, int ); - -static int fr_check_wrapper(arg, mp, ifp, dir) -void *arg; -struct mbuf **mp; -struct ifnet *ifp; -int dir; -{ - struct ip *ip = mtod(*mp, struct ip *); - int rv, hlen = ip->ip_hl << 2; - -#if defined(M_CSUM_TCPv4) - /* - * If the packet is out-bound, we can't delay checksums - * here. For in-bound, the checksum has already been - * validated. - */ - if (dir == PFIL_OUT) { - if ((*mp)->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4)) { - in_delayed_cksum(*mp); - (*mp)->m_pkthdr.csum_flags &= - ~(M_CSUM_TCPv4|M_CSUM_UDPv4); - } - } -#endif /* M_CSUM_TCPv4 */ - - /* - * We get the packet with all fields in network byte - * order. We expect ip_len and ip_off to be in host - * order. We frob them, call the filter, then frob - * them back. - * - * Note, we don't need to update the checksum, because - * it has already been verified. - */ - NTOHS(ip->ip_len); - NTOHS(ip->ip_off); - - rv = fr_check(ip, hlen, ifp, (dir == PFIL_OUT), mp); - - if (rv == 0 && *mp != NULL) { - ip = mtod(*mp, struct ip *); - HTONS(ip->ip_len); - HTONS(ip->ip_off); - } - - return (rv); -} - -# ifdef USE_INET6 -# include - -static int fr_check_wrapper6(void *, struct mbuf **, struct ifnet *, int ); - -static int fr_check_wrapper6(arg, mp, ifp, dir) -void *arg; -struct mbuf **mp; -struct ifnet *ifp; -int dir; -{ - - return (fr_check(mtod(*mp, struct ip *), sizeof(struct ip6_hdr), - ifp, (dir == PFIL_OUT), mp)); -} -# endif -#endif /* __NetBSD_Version >= 105110000 && _KERNEL */ -#if (__FreeBSD_version >= 501108) && (__FreeBSD_version < 600004) && \ - defined(_KERNEL) - -static int -fr_check_wrapper(void *arg, struct mbuf **mp, struct ifnet *ifp, int dir) -{ - struct ip *ip = mtod(*mp, struct ip *); - return fr_check(ip, ip->ip_hl << 2, ifp, (dir == PFIL_OUT), mp); -} - -# ifdef USE_INET6 -# include - -static int -fr_check_wrapper6(void *arg, struct mbuf **mp, struct ifnet *ifp, int dir) -{ - return (fr_check(mtod(*mp, struct ip *), sizeof(struct ip6_hdr), - ifp, (dir == PFIL_OUT), mp)); -} -# endif - -#elif (__FreeBSD_version >= 600004) && defined(_KERNEL) - -static int -fr_check_wrapper(void *arg, struct mbuf **mp, struct ifnet *ifp, int dir, - struct inpcb *inp) -{ - struct ip *ip = mtod(*mp, struct ip *); - return fr_check(ip, ip->ip_hl << 2, ifp, (dir == PFIL_OUT), mp); -} - -# ifdef USE_INET6 -# include - -static int -fr_check_wrapper6(void *arg, struct mbuf **mp, struct ifnet *ifp, int dir, - struct inpcb *inp) -{ - return (fr_check(mtod(*mp, struct ip *), sizeof(struct ip6_hdr), - ifp, (dir == PFIL_OUT), mp)); -} -# endif -#endif /* __FreeBSD_version >= 600004 && _KERNEL */ -#ifdef _KERNEL -# if defined(IPFILTER_LKM) && !defined(__sgi) -int iplidentify(s) -char *s; -{ - if (strcmp(s, "ipl") == 0) - return 1; - return 0; -} -# endif /* IPFILTER_LKM */ - - -/* - * Try to detect the case when compiling for NetBSD with pseudo-device - */ -# if defined(__NetBSD__) && defined(PFIL_HOOKS) -void -ipfilterattach(count) -int count; -{ - - /* - * Do nothing here, really. The filter will be enabled - * by the SIOCFRENB ioctl. - */ -} -# endif - - -# if defined(__NetBSD__) || defined(__OpenBSD__) -int ipl_enable() -# else -int iplattach() -# endif -{ - char *defpass; - int s; -# if defined(__sgi) || (defined(NETBSD_PF) && \ - ((__NetBSD_Version__ >= 104200000) || (__FreeBSD_version >= 500011))) - int error = 0; -# endif -#if (defined(__NetBSD_Version__) && (__NetBSD_Version__ >= 105110000)) || \ - (__FreeBSD_version >= 501108) - struct pfil_head *ph_inet; -# ifdef USE_INET6 - struct pfil_head *ph_inet6; -# endif -#endif - - SPL_NET(s); - if (fr_running || (fr_checkp == fr_check)) { - printf("IP Filter: already initialized\n"); - SPL_X(s); - return EBUSY; - } - -# ifdef IPFILTER_LOG - ipflog_init(); -# endif - if (nat_init() == -1) { - SPL_X(s); - return EIO; - } - if (fr_stateinit() == -1) { - SPL_X(s); - return EIO; - } - if (appr_init() == -1) { - SPL_X(s); - return EIO; - } - -# ifdef NETBSD_PF -# if (__NetBSD_Version__ >= 104200000) || (__FreeBSD_version >= 500011) -# if (__NetBSD_Version__ >= 105110000) || (__FreeBSD_version >= 501108) - ph_inet = pfil_head_get(PFIL_TYPE_AF, AF_INET); -# ifdef USE_INET6 - ph_inet6 = pfil_head_get(PFIL_TYPE_AF, AF_INET6); -# endif - if (ph_inet == NULL -# ifdef USE_INET6 - && ph_inet6 == NULL -# endif - ) - return ENODEV; - - if (ph_inet != NULL) - error = pfil_add_hook((void *)fr_check_wrapper, NULL, - PFIL_IN|PFIL_OUT, ph_inet); - else - error = 0; -# else - error = pfil_add_hook((void *)fr_check, PFIL_IN|PFIL_OUT, - &inetsw[ip_protox[IPPROTO_IP]].pr_pfh); -# endif - if (error) { -# ifdef USE_INET6 - goto pfil_error; -# else - SPL_X(s); - appr_unload(); - ip_natunload(); - fr_stateunload(); - return error; -# endif - } -# else - pfil_add_hook((void *)fr_check, PFIL_IN|PFIL_OUT); -# endif -# ifdef USE_INET6 -# if (__NetBSD_Version__ >= 105110000) || (__FreeBSD_version >= 501108) - if (ph_inet6 != NULL) - error = pfil_add_hook((void *)fr_check_wrapper6, NULL, - PFIL_IN|PFIL_OUT, ph_inet6); - else - error = 0; - if (error) { - pfil_remove_hook((void *)fr_check_wrapper6, NULL, - PFIL_IN|PFIL_OUT, ph_inet6); -# else - error = pfil_add_hook((void *)fr_check, PFIL_IN|PFIL_OUT, - &inet6sw[ip6_protox[IPPROTO_IPV6]].pr_pfh); - if (error) { - pfil_remove_hook((void *)fr_check, PFIL_IN|PFIL_OUT, - &inetsw[ip_protox[IPPROTO_IP]].pr_pfh); -# endif -pfil_error: - SPL_X(s); - appr_unload(); - ip_natunload(); - fr_stateunload(); - return error; - } -# endif -# endif - -# ifdef __sgi - error = ipfilter_sgi_attach(); - if (error) { - SPL_X(s); - appr_unload(); - ip_natunload(); - fr_stateunload(); - return error; - } -# endif - - bzero((char *)frcache, sizeof(frcache)); - fr_savep = fr_checkp; - fr_checkp = fr_check; - fr_running = 1; - - SPL_X(s); - if (fr_pass & FR_PASS) - defpass = "pass"; - else if (fr_pass & FR_BLOCK) - defpass = "block"; - else - defpass = "no-match -> block"; - - printf("%s initialized. Default = %s all, Logging = %s\n", - ipfilter_version, defpass, -# ifdef IPFILTER_LOG - "enabled"); -# else - "disabled"); -# endif -#ifdef _KERNEL -# if defined(__NetBSD__) && (__NetBSD_Version__ >= 104230000) - callout_init(&ipfr_slowtimer_ch); - callout_reset(&ipfr_slowtimer_ch, hz / 2, ipfr_slowtimer, NULL); -# else -# if defined(__OpenBSD__) - timeout_set(&ipfr_slowtimer_ch, ipfr_slowtimer, NULL); - timeout_add(&ipfr_slowtimer_ch, hz/2); -# else -# if (__FreeBSD_version >= 300000) || defined(__sgi) - ipfr_slowtimer_ch = timeout(ipfr_slowtimer, NULL, hz/2); -# else - timeout(ipfr_slowtimer, NULL, hz/2); -# endif -# endif -# endif -#endif - return 0; -} - - -/* - * Disable the filter by removing the hooks from the IP input/output - * stream. - */ -# if defined(__NetBSD__) -int ipl_disable() -# else -int ipldetach() -# endif -{ - int s, i; -#if defined(NETBSD_PF) && \ - ((__NetBSD_Version__ >= 104200000) || (__FreeBSD_version >= 500011)) - int error = 0; -# if (__NetBSD_Version__ >= 105150000) || (__FreeBSD_version >= 501108) - struct pfil_head *ph_inet = pfil_head_get(PFIL_TYPE_AF, AF_INET); -# ifdef USE_INET6 - struct pfil_head *ph_inet6 = pfil_head_get(PFIL_TYPE_AF, AF_INET6); -# endif -# endif -#endif - -#ifdef _KERNEL -# if defined(__NetBSD__) && (__NetBSD_Version__ >= 104230000) - callout_stop(&ipfr_slowtimer_ch); -# else -# if (__FreeBSD_version >= 300000) - untimeout(ipfr_slowtimer, NULL, ipfr_slowtimer_ch); -# else -# ifdef __sgi - untimeout(ipfr_slowtimer_ch); -# else -# if defined(__OpenBSD__) - timeout_del(&ipfr_slowtimer_ch); -# else - untimeout(ipfr_slowtimer, NULL); -# endif /* OpenBSD */ -# endif /* __sgi */ -# endif /* FreeBSD */ -# endif /* NetBSD */ -#endif - SPL_NET(s); - if (!fr_running) - { - printf("IP Filter: not initialized\n"); - SPL_X(s); - return 0; - } - - printf("%s unloaded\n", ipfilter_version); - - fr_checkp = fr_savep; - i = frflush(IPL_LOGIPF, 0, FR_INQUE|FR_OUTQUE|FR_INACTIVE); - i += frflush(IPL_LOGIPF, 0, FR_INQUE|FR_OUTQUE); - fr_running = 0; - -# ifdef NETBSD_PF -# if ((__NetBSD_Version__ >= 104200000) || (__FreeBSD_version >= 500011)) -# if (__NetBSD_Version__ >= 105110000) || (__FreeBSD_version >= 501108) - if (ph_inet != NULL) - error = pfil_remove_hook((void *)fr_check_wrapper, NULL, - PFIL_IN|PFIL_OUT, ph_inet); - else - error = 0; -# else - error = pfil_remove_hook((void *)fr_check, PFIL_IN|PFIL_OUT, - &inetsw[ip_protox[IPPROTO_IP]].pr_pfh); -# endif - if (error) { - SPL_X(s); - return error; - } -# else - pfil_remove_hook((void *)fr_check, PFIL_IN|PFIL_OUT); -# endif -# ifdef USE_INET6 -# if (__NetBSD_Version__ >= 105110000) || (__FreeBSD_version >= 501108) - if (ph_inet6 != NULL) - error = pfil_remove_hook((void *)fr_check_wrapper6, NULL, - PFIL_IN|PFIL_OUT, ph_inet6); - else - error = 0; -# else - error = pfil_remove_hook((void *)fr_check, PFIL_IN|PFIL_OUT, - &inet6sw[ip6_protox[IPPROTO_IPV6]].pr_pfh); -# endif - if (error) { - SPL_X(s); - return error; - } -# endif -# endif - -# ifdef __sgi - ipfilter_sgi_detach(); -# endif - - appr_unload(); - ipfr_unload(); - ip_natunload(); - fr_stateunload(); - fr_authunload(); - - SPL_X(s); - return 0; -} -#endif /* _KERNEL */ - - -static int frzerostats(data) -caddr_t data; -{ - friostat_t fio; - int error; - - fr_getstat(&fio); - error = IWCOPYPTR((caddr_t)&fio, data, sizeof(fio)); - if (error) - return EFAULT; - - bzero((char *)frstats, sizeof(*frstats) * 2); - - return 0; -} - - -/* - * Filter ioctl interface. - */ -#ifdef __sgi -int IPL_EXTERN(ioctl)(dev_t dev, int cmd, caddr_t data, int mode -# ifdef _KERNEL - , cred_t *cp, int *rp -# endif -) -#else -int IPL_EXTERN(ioctl)(dev, cmd, data, mode -# if (defined(_KERNEL) && ((_BSDI_VERSION >= 199510) || (BSD >= 199506) || \ - (NetBSD >= 199511) || (__FreeBSD_version >= 220000) || \ - defined(__OpenBSD__))) -, td) -struct thread *td; -# else -) -# endif -#ifdef _KERNEL -struct cdev *dev; -#else -dev_t dev; -#endif -# if defined(__NetBSD__) || defined(__OpenBSD__) || \ - (_BSDI_VERSION >= 199701) || (__FreeBSD_version >= 300000) -u_long cmd; -# else -int cmd; -# endif -caddr_t data; -int mode; -#endif /* __sgi */ -{ -#if defined(_KERNEL) && !SOLARIS - int s; -#endif - int error = 0, unit = 0, tmp; - -#if (BSD >= 199306) && defined(_KERNEL) - if ((securelevel >= 3) && (mode & FWRITE)) - return EPERM; -#endif -#ifdef _KERNEL - unit = GET_MINOR(dev); - if ((IPL_LOGMAX < unit) || (unit < 0)) - return ENXIO; -#else - unit = dev; -#endif - - if (fr_running == 0 && (cmd != SIOCFRENB || unit != IPL_LOGIPF)) - return ENODEV; - - SPL_NET(s); - - if (unit == IPL_LOGNAT) { - if (fr_running) - error = nat_ioctl(data, cmd, mode); - else - error = EIO; - SPL_X(s); - return error; - } - if (unit == IPL_LOGSTATE) { - if (fr_running) - error = fr_state_ioctl(data, cmd, mode); - else - error = EIO; - SPL_X(s); - return error; - } - if (unit == IPL_LOGAUTH) { - if (!fr_running) - error = EIO; - else - if ((cmd == SIOCADAFR) || (cmd == SIOCRMAFR)) { - if (!(mode & FWRITE)) { - error = EPERM; - } else { - error = frrequest(unit, cmd, data, - fr_active); - } - } else { - error = fr_auth_ioctl(data, mode, cmd); - } - SPL_X(s); - return error; - } - - switch (cmd) { - case FIONREAD : -#ifdef IPFILTER_LOG - error = IWCOPY((caddr_t)&iplused[IPL_LOGIPF], (caddr_t)data, - sizeof(iplused[IPL_LOGIPF])); -#endif - break; -#if (!defined(IPFILTER_LKM) || defined(__NetBSD__)) && defined(_KERNEL) - case SIOCFRENB : - { - u_int enable; - - if (!(mode & FWRITE)) - error = EPERM; - else { - error = IRCOPY(data, (caddr_t)&enable, sizeof(enable)); - if (error) - break; - if (enable) -# if defined(__NetBSD__) || defined(__OpenBSD__) - error = ipl_enable(); -# else - error = iplattach(); -# endif - else -# if defined(__NetBSD__) - error = ipl_disable(); -# else - error = ipldetach(); -# endif - } - break; - } -#endif - case SIOCSETFF : - if (!(mode & FWRITE)) - error = EPERM; - else - error = IRCOPY(data, (caddr_t)&fr_flags, - sizeof(fr_flags)); - break; - case SIOCGETFF : - error = IWCOPY((caddr_t)&fr_flags, data, sizeof(fr_flags)); - break; - case SIOCINAFR : - case SIOCRMAFR : - case SIOCADAFR : - case SIOCZRLST : - if (!(mode & FWRITE)) - error = EPERM; - else - error = frrequest(unit, cmd, data, fr_active); - break; - case SIOCINIFR : - case SIOCRMIFR : - case SIOCADIFR : - if (!(mode & FWRITE)) - error = EPERM; - else - error = frrequest(unit, cmd, data, 1 - fr_active); - break; - case SIOCSWAPA : - if (!(mode & FWRITE)) - error = EPERM; - else { - bzero((char *)frcache, sizeof(frcache[0]) * 2); - *(u_int *)data = fr_active; - fr_active = 1 - fr_active; - } - break; - case SIOCGETFS : - { - friostat_t fio; - - fr_getstat(&fio); - error = IWCOPYPTR((caddr_t)&fio, data, sizeof(fio)); - if (error) - error = EFAULT; - break; - } - case SIOCFRZST : - if (!(mode & FWRITE)) - error = EPERM; - else - error = frzerostats(data); - break; - case SIOCIPFFL : - if (!(mode & FWRITE)) - error = EPERM; - else { - error = IRCOPY(data, (caddr_t)&tmp, sizeof(tmp)); - if (!error) { - tmp = frflush(unit, 4, tmp); - error = IWCOPY((caddr_t)&tmp, data, - sizeof(tmp)); - } - } - break; -#ifdef USE_INET6 - case SIOCIPFL6 : - if (!(mode & FWRITE)) - error = EPERM; - else { - error = IRCOPY(data, (caddr_t)&tmp, sizeof(tmp)); - if (!error) { - tmp = frflush(unit, 6, tmp); - error = IWCOPY((caddr_t)&tmp, data, - sizeof(tmp)); - } - } - break; -#endif - case SIOCSTLCK : - error = IRCOPY(data, (caddr_t)&tmp, sizeof(tmp)); - if (!error) { - fr_state_lock = tmp; - fr_nat_lock = tmp; - fr_frag_lock = tmp; - fr_auth_lock = tmp; - } else - error = EFAULT; - break; -#ifdef IPFILTER_LOG - case SIOCIPFFB : - if (!(mode & FWRITE)) - error = EPERM; - else - *(int *)data = ipflog_clear(unit); - break; -#endif /* IPFILTER_LOG */ - case SIOCGFRST : - error = IWCOPYPTR((caddr_t)ipfr_fragstats(), data, - sizeof(ipfrstat_t)); - if (error) - error = EFAULT; - break; - case SIOCFRSYN : - if (!(mode & FWRITE)) - error = EPERM; - else { -#if defined(_KERNEL) && defined(__sgi) - ipfsync(); -#endif - frsync(); - } - break; - default : - error = EINVAL; - break; - } - SPL_X(s); - return error; -} - - -void fr_forgetifp(ifp) -void *ifp; -{ - register frentry_t *f; - - WRITE_ENTER(&ipf_mutex); - for (f = ipacct[0][fr_active]; (f != NULL); f = f->fr_next) - if (f->fr_ifa == ifp) - f->fr_ifa = (void *)-1; - for (f = ipacct[1][fr_active]; (f != NULL); f = f->fr_next) - if (f->fr_ifa == ifp) - f->fr_ifa = (void *)-1; - for (f = ipfilter[0][fr_active]; (f != NULL); f = f->fr_next) - if (f->fr_ifa == ifp) - f->fr_ifa = (void *)-1; - for (f = ipfilter[1][fr_active]; (f != NULL); f = f->fr_next) - if (f->fr_ifa == ifp) - f->fr_ifa = (void *)-1; -#ifdef USE_INET6 - for (f = ipacct6[0][fr_active]; (f != NULL); f = f->fr_next) - if (f->fr_ifa == ifp) - f->fr_ifa = (void *)-1; - for (f = ipacct6[1][fr_active]; (f != NULL); f = f->fr_next) - if (f->fr_ifa == ifp) - f->fr_ifa = (void *)-1; - for (f = ipfilter6[0][fr_active]; (f != NULL); f = f->fr_next) - if (f->fr_ifa == ifp) - f->fr_ifa = (void *)-1; - for (f = ipfilter6[1][fr_active]; (f != NULL); f = f->fr_next) - if (f->fr_ifa == ifp) - f->fr_ifa = (void *)-1; -#endif - RWLOCK_EXIT(&ipf_mutex); - ip_natsync(ifp); -} - - -static int frrequest(unit, req, data, set) -int unit; -#if defined(__NetBSD__) || defined(__OpenBSD__) || (__FreeBSD_version >= 300003) -u_long req; -#else -int req; -#endif -int set; -caddr_t data; -{ - register frentry_t *fp, *f, **fprev; - register frentry_t **ftail; - frgroup_t *fg = NULL; - int error = 0, in, i; - u_int *p, *pp; - frentry_t frd; - frdest_t *fdp; - u_int group; - - fp = &frd; - error = IRCOPYPTR(data, (caddr_t)fp, sizeof(*fp)); - if (error) - return EFAULT; - fp->fr_ref = 0; -#if (BSD >= 199306) && defined(_KERNEL) - if ((securelevel > 0) && (fp->fr_func != NULL)) - return EPERM; -#endif - - /* - * Check that the group number does exist and that if a head group - * has been specified, doesn't exist. - */ - if ((req != SIOCZRLST) && ((req == SIOCINAFR) || (req == SIOCINIFR) || - (req == SIOCADAFR) || (req == SIOCADIFR)) && fp->fr_grhead && - fr_findgroup((u_int)fp->fr_grhead, fp->fr_flags, unit, set, NULL)) - return EEXIST; - if ((req != SIOCZRLST) && fp->fr_group && - !fr_findgroup((u_int)fp->fr_group, fp->fr_flags, unit, set, NULL)) - return ESRCH; - - in = (fp->fr_flags & FR_INQUE) ? 0 : 1; - - if (unit == IPL_LOGAUTH) - ftail = fprev = &ipauth; - else if ((fp->fr_flags & FR_ACCOUNT) && (fp->fr_v == 4)) - ftail = fprev = &ipacct[in][set]; - else if ((fp->fr_flags & (FR_OUTQUE|FR_INQUE)) && (fp->fr_v == 4)) - ftail = fprev = &ipfilter[in][set]; -#ifdef USE_INET6 - else if ((fp->fr_flags & FR_ACCOUNT) && (fp->fr_v == 6)) - ftail = fprev = &ipacct6[in][set]; - else if ((fp->fr_flags & (FR_OUTQUE|FR_INQUE)) && (fp->fr_v == 6)) - ftail = fprev = &ipfilter6[in][set]; -#endif - else - return ESRCH; - - if ((group = fp->fr_group)) { - if (!(fg = fr_findgroup(group, fp->fr_flags, unit, set, NULL))) - return ESRCH; - ftail = fprev = fg->fg_start; - } - - bzero((char *)frcache, sizeof(frcache[0]) * 2); - - for (i = 0; i < 4; i++) { - if ((fp->fr_ifnames[i][1] == '\0') && - ((fp->fr_ifnames[i][0] == '-') || - (fp->fr_ifnames[i][0] == '*'))) { - fp->fr_ifas[i] = NULL; - } else if (*fp->fr_ifnames[i]) { - fp->fr_ifas[i] = GETUNIT(fp->fr_ifnames[i], fp->fr_v); - if (!fp->fr_ifas[i]) - fp->fr_ifas[i] = (void *)-1; - } - } - - fdp = &fp->fr_dif; - fp->fr_flags &= ~FR_DUP; - if (*fdp->fd_ifname) { - fdp->fd_ifp = GETUNIT(fdp->fd_ifname, fp->fr_v); - if (!fdp->fd_ifp) - fdp->fd_ifp = (struct ifnet *)-1; - else - fp->fr_flags |= FR_DUP; - } - - fdp = &fp->fr_tif; - if (*fdp->fd_ifname) { - fdp->fd_ifp = GETUNIT(fdp->fd_ifname, fp->fr_v); - if (!fdp->fd_ifp) - fdp->fd_ifp = (struct ifnet *)-1; - } - - /* - * Look for a matching filter rule, but don't include the next or - * interface pointer in the comparison (fr_next, fr_ifa). - */ - for (fp->fr_cksum = 0, p = (u_int *)&fp->fr_ip, pp = &fp->fr_cksum; - p < pp; p++) - fp->fr_cksum += *p; - - for (; (f = *ftail); ftail = &f->fr_next) - if ((fp->fr_cksum == f->fr_cksum) && - !bcmp((char *)&f->fr_ip, (char *)&fp->fr_ip, FR_CMPSIZ)) - break; - - /* - * If zero'ing statistics, copy current to caller and zero. - */ - if (req == SIOCZRLST) { - if (!f) - return ESRCH; - error = IWCOPYPTR((caddr_t)f, data, sizeof(*f)); - if (error) - return EFAULT; - f->fr_hits = 0; - f->fr_bytes = 0; - return 0; - } - - if (!f) { - if (req != SIOCINAFR && req != SIOCINIFR) - while ((f = *ftail)) - ftail = &f->fr_next; - else { - ftail = fprev; - if (fp->fr_hits) { - while (--fp->fr_hits && (f = *ftail)) - ftail = &f->fr_next; - } - f = NULL; - } - } - - if (req == SIOCRMAFR || req == SIOCRMIFR) { - if (!f) - error = ESRCH; - else { - /* - * Only return EBUSY if there is a group list, else - * it's probably just state information referencing - * the rule. - */ - if ((f->fr_ref > 1) && f->fr_grp) - return EBUSY; - if (fg && fg->fg_head) - fg->fg_head->fr_ref--; - if (unit == IPL_LOGAUTH) { - return fr_preauthcmd(req, f, ftail); - } - if (f->fr_grhead) - fr_delgroup((u_int)f->fr_grhead, fp->fr_flags, - unit, set); - fixskip(fprev, f, -1); - *ftail = f->fr_next; - f->fr_next = NULL; - f->fr_ref--; - if (f->fr_ref == 0) - KFREE(f); - } - } else { - if (f) - error = EEXIST; - else { - if (unit == IPL_LOGAUTH) { - return fr_preauthcmd(req, fp, ftail); - } - KMALLOC(f, frentry_t *); - if (f != NULL) { - if (fg && fg->fg_head) - fg->fg_head->fr_ref++; - bcopy((char *)fp, (char *)f, sizeof(*f)); - f->fr_ref = 1; - f->fr_hits = 0; - f->fr_next = *ftail; - *ftail = f; - if (req == SIOCINIFR || req == SIOCINAFR) - fixskip(fprev, f, 1); - f->fr_grp = NULL; - if ((group = f->fr_grhead)) - fg = fr_addgroup(group, f, unit, set); - } else - error = ENOMEM; - } - } - return (error); -} - - -#ifdef _KERNEL -/* - * routines below for saving IP headers to buffer - */ -# ifdef __sgi -# ifdef _KERNEL -int IPL_EXTERN(open)(dev_t *pdev, int flags, int devtype, cred_t *cp) -# else -int IPL_EXTERN(open)(dev_t dev, int flags) -# endif -# else -int IPL_EXTERN(open)(dev, flags -# if ((_BSDI_VERSION >= 199510) || (BSD >= 199506) || (NetBSD >= 199511) || \ - (__FreeBSD_version >= 220000) || defined(__OpenBSD__)) && defined(_KERNEL) -, devtype, td) -int devtype; -struct thread *td; -# else -) -# endif -struct cdev *dev; -int flags; -# endif /* __sgi */ -{ -# if defined(__sgi) && defined(_KERNEL) - u_int min = geteminor(*pdev); -# else - u_int min = GET_MINOR(dev); -# endif - - if (IPL_LOGMAX < min) - min = ENXIO; - else - min = 0; - return min; -} - - -# ifdef __sgi -int IPL_EXTERN(close)(dev_t dev, int flags, int devtype, cred_t *cp) -#else -int IPL_EXTERN(close)(dev, flags -# if ((_BSDI_VERSION >= 199510) || (BSD >= 199506) || (NetBSD >= 199511) || \ - (__FreeBSD_version >= 220000) || defined(__OpenBSD__)) && defined(_KERNEL) -, devtype, td) -int devtype; -struct thread *td; -# else -) -# endif -struct cdev *dev; -int flags; -# endif /* __sgi */ -{ - u_int min = GET_MINOR(dev); - - if (IPL_LOGMAX < min) - min = ENXIO; - else - min = 0; - return min; -} - -/* - * iplread/ipllog - * both of these must operate with at least splnet() lest they be - * called during packet processing and cause an inconsistancy to appear in - * the filter lists. - */ -# ifdef __sgi -int IPL_EXTERN(read)(dev_t dev, uio_t *uio, cred_t *crp) -# else -# if BSD >= 199306 -int IPL_EXTERN(read)(dev, uio, ioflag) -int ioflag; -# else -int IPL_EXTERN(read)(dev, uio) -# endif -struct cdev *dev; -register struct uio *uio; -# endif /* __sgi */ -{ -# ifdef IPFILTER_LOG - return ipflog_read(GET_MINOR(dev), uio); -# else - return ENXIO; -# endif -} - - -/* - * send_reset - this could conceivably be a call to tcp_respond(), but that - * requires a large amount of setting up and isn't any more efficient. - */ -int send_reset(oip, fin) -struct ip *oip; -fr_info_t *fin; -{ - struct tcphdr *tcp, *tcp2; - int tlen = 0, hlen; - struct mbuf *m; -#ifdef USE_INET6 - ip6_t *ip6, *oip6 = (ip6_t *)oip; -#endif - ip_t *ip; - - tcp = (struct tcphdr *)fin->fin_dp; - if (tcp->th_flags & TH_RST) - return -1; /* feedback loop */ -# if (BSD < 199306) || defined(__sgi) - m = m_get(M_DONTWAIT, MT_HEADER); -# else - m = m_gethdr(M_DONTWAIT, MT_HEADER); -# endif - if (m == NULL) - return ENOBUFS; - if (m == NULL) - return -1; - - tlen = fin->fin_dlen - (tcp->th_off << 2) + - ((tcp->th_flags & TH_SYN) ? 1 : 0) + - ((tcp->th_flags & TH_FIN) ? 1 : 0); - -#ifdef USE_INET6 - hlen = (fin->fin_v == 6) ? sizeof(ip6_t) : sizeof(ip_t); -#else - hlen = sizeof(ip_t); -#endif - m->m_len = sizeof(*tcp2) + hlen; -# if BSD >= 199306 - m->m_data += max_linkhdr; - m->m_pkthdr.len = m->m_len; - m->m_pkthdr.rcvif = (struct ifnet *)0; -# endif - ip = mtod(m, struct ip *); -# ifdef USE_INET6 - ip6 = (ip6_t *)ip; -# endif - bzero((char *)ip, sizeof(*tcp2) + hlen); - tcp2 = (struct tcphdr *)((char *)ip + hlen); - - tcp2->th_sport = tcp->th_dport; - tcp2->th_dport = tcp->th_sport; - if (tcp->th_flags & TH_ACK) { - tcp2->th_seq = tcp->th_ack; - tcp2->th_flags = TH_RST; - } else { - tcp2->th_ack = ntohl(tcp->th_seq); - tcp2->th_ack += tlen; - tcp2->th_ack = htonl(tcp2->th_ack); - tcp2->th_flags = TH_RST|TH_ACK; - } - tcp2->th_off = sizeof(*tcp2) >> 2; -# ifdef USE_INET6 - if (fin->fin_v == 6) { - ip6->ip6_plen = htons(sizeof(struct tcphdr)); - ip6->ip6_nxt = IPPROTO_TCP; - ip6->ip6_src = oip6->ip6_dst; - ip6->ip6_dst = oip6->ip6_src; - tcp2->th_sum = in6_cksum(m, IPPROTO_TCP, - sizeof(*ip6), sizeof(*tcp2)); - return send_ip(oip, fin, &m); - } -# endif - ip->ip_p = IPPROTO_TCP; - ip->ip_len = htons(sizeof(struct tcphdr)); - ip->ip_src.s_addr = oip->ip_dst.s_addr; - ip->ip_dst.s_addr = oip->ip_src.s_addr; - tcp2->th_sum = in_cksum(m, hlen + sizeof(*tcp2)); - ip->ip_len = hlen + sizeof(*tcp2); - return send_ip(oip, fin, &m); -} - - -/* - * Send an IP(v4/v6) datagram out into the network - */ -static int send_ip(oip, fin, mp) -ip_t *oip; -fr_info_t *fin; -struct mbuf **mp; -{ - struct mbuf *m = *mp; - int error, hlen; - fr_info_t frn; - ip_t *ip; - - bzero((char *)&frn, sizeof(frn)); - frn.fin_ifp = fin->fin_ifp; - frn.fin_v = fin->fin_v; - frn.fin_out = fin->fin_out; - frn.fin_mp = mp; - - ip = mtod(m, ip_t *); - hlen = sizeof(*ip); - - ip->ip_v = fin->fin_v; - if (ip->ip_v == 4) { - ip->ip_hl = (sizeof(*oip) >> 2); - ip->ip_v = IPVERSION; - ip->ip_tos = oip->ip_tos; - ip->ip_id = oip->ip_id; - -# if defined(__NetBSD__) || \ - (defined(__OpenBSD__) && (OpenBSD >= 200012)) - if (ip_mtudisc != 0) - ip->ip_off = IP_DF; -# else -# if defined(__sgi) - if (ip->ip_p == IPPROTO_TCP && tcp_mtudisc != 0) - ip->ip_off = IP_DF; -# endif -# endif - -# if (BSD < 199306) || defined(__sgi) - ip->ip_ttl = tcp_ttl; -# else - ip->ip_ttl = ip_defttl; -# endif - ip->ip_sum = 0; - frn.fin_dp = (char *)(ip + 1); - } -# ifdef USE_INET6 - else if (ip->ip_v == 6) { - ip6_t *ip6 = (ip6_t *)ip; - - hlen = sizeof(*ip6); - ip6->ip6_hlim = 127; - frn.fin_dp = (char *)(ip6 + 1); - } -# endif -# ifdef IPSEC - m->m_pkthdr.rcvif = NULL; -# endif - - if (fr_makefrip(hlen, ip, &frn) == 0) - error = ipfr_fastroute(m, mp, &frn, NULL); - else - error = EINVAL; - return error; -} - - -int send_icmp_err(oip, type, fin, dst) -ip_t *oip; -int type; -fr_info_t *fin; -int dst; -{ - int err, hlen = 0, xtra = 0, iclen, ohlen = 0, avail, code; - u_short shlen, slen = 0, soff = 0; - struct in_addr dst4; - struct icmp *icmp; - struct mbuf *m; - void *ifp; -#ifdef USE_INET6 - ip6_t *ip6, *oip6 = (ip6_t *)oip; - struct in6_addr dst6; -#endif - ip_t *ip; - - if ((type < 0) || (type > ICMP_MAXTYPE)) - return -1; - - code = fin->fin_icode; -#ifdef USE_INET6 - if ((code < 0) || (code > sizeof(icmptoicmp6unreach)/sizeof(int))) - return -1; -#endif - - avail = 0; - m = NULL; - ifp = fin->fin_ifp; - if (fin->fin_v == 4) { - if ((oip->ip_p == IPPROTO_ICMP) && - !(fin->fin_fi.fi_fl & FI_SHORT)) - switch (ntohs(fin->fin_data[0]) >> 8) - { - case ICMP_ECHO : - case ICMP_TSTAMP : - case ICMP_IREQ : - case ICMP_MASKREQ : - break; - default : - return 0; - } - -# if (BSD < 199306) || defined(__sgi) - avail = MLEN; - m = m_get(M_DONTWAIT, MT_HEADER); -# else - avail = MHLEN; - m = m_gethdr(M_DONTWAIT, MT_HEADER); -# endif - if (m == NULL) - return ENOBUFS; - - if (dst == 0) { - if (fr_ifpaddr(4, ifp, &dst4) == -1) - return -1; - } else - dst4.s_addr = oip->ip_dst.s_addr; - - hlen = sizeof(ip_t); - ohlen = oip->ip_hl << 2; - xtra = 8; - } - -#ifdef USE_INET6 - else if (fin->fin_v == 6) { - hlen = sizeof(ip6_t); - ohlen = sizeof(ip6_t); - type = icmptoicmp6types[type]; - if (type == ICMP6_DST_UNREACH) - code = icmptoicmp6unreach[code]; - - MGETHDR(m, M_DONTWAIT, MT_HEADER); - if (!m) - return ENOBUFS; - - MCLGET(m, M_DONTWAIT); - if ((m->m_flags & M_EXT) == 0) { - m_freem(m); - return ENOBUFS; - } -# ifdef M_TRAILINGSPACE - m->m_len = 0; - avail = M_TRAILINGSPACE(m); -# else - avail = MCLBYTES; -# endif - xtra = MIN(ntohs(oip6->ip6_plen) + sizeof(ip6_t), - avail - hlen - sizeof(*icmp) - max_linkhdr); - if (dst == 0) { - if (fr_ifpaddr(6, ifp, (struct in_addr *)&dst6) == -1) - return -1; - } else - dst6 = oip6->ip6_dst; - } -#endif - - iclen = hlen + sizeof(*icmp); -# if BSD >= 199306 - avail -= (max_linkhdr + iclen); - m->m_data += max_linkhdr; - m->m_pkthdr.rcvif = (struct ifnet *)0; - if (xtra > avail) - xtra = avail; - iclen += xtra; - m->m_pkthdr.len = iclen; -#else - avail -= (m->m_off + iclen); - if (xtra > avail) - xtra = avail; - iclen += xtra; -#endif - m->m_len = iclen; - ip = mtod(m, ip_t *); - icmp = (struct icmp *)((char *)ip + hlen); - bzero((char *)ip, iclen); - - icmp->icmp_type = type; - icmp->icmp_code = fin->fin_icode; - icmp->icmp_cksum = 0; -#ifdef icmp_nextmtu - if (type == ICMP_UNREACH && - fin->fin_icode == ICMP_UNREACH_NEEDFRAG && ifp) - icmp->icmp_nextmtu = htons(((struct ifnet *) ifp)->if_mtu); -#endif - - if (avail) { - slen = oip->ip_len; - oip->ip_len = htons(oip->ip_len); - soff = oip->ip_off; - oip->ip_off = htons(oip->ip_off); - bcopy((char *)oip, (char *)&icmp->icmp_ip, MIN(ohlen, avail)); - oip->ip_len = slen; - oip->ip_off = soff; - avail -= MIN(ohlen, avail); - } - -#ifdef USE_INET6 - ip6 = (ip6_t *)ip; - if (fin->fin_v == 6) { - ip6->ip6_flow = 0; - ip6->ip6_plen = htons(iclen - hlen); - ip6->ip6_nxt = IPPROTO_ICMPV6; - ip6->ip6_hlim = 0; - ip6->ip6_src = dst6; - ip6->ip6_dst = oip6->ip6_src; - if (avail) - bcopy((char *)oip + ohlen, - (char *)&icmp->icmp_ip + ohlen, avail); - icmp->icmp_cksum = in6_cksum(m, IPPROTO_ICMPV6, - sizeof(*ip6), iclen - hlen); - } else -#endif - { - - ip->ip_src.s_addr = dst4.s_addr; - ip->ip_dst.s_addr = oip->ip_src.s_addr; - - if (avail > 8) - avail = 8; - if (avail) - bcopy((char *)oip + ohlen, - (char *)&icmp->icmp_ip + ohlen, avail); - icmp->icmp_cksum = ipf_cksum((u_short *)icmp, - sizeof(*icmp) + 8); - ip->ip_len = iclen; - ip->ip_p = IPPROTO_ICMP; - } - - shlen = fin->fin_hlen; - fin->fin_hlen = hlen; - err = send_ip(oip, fin, &m); - fin->fin_hlen = shlen; - - return err; -} - - -# if !defined(IPFILTER_LKM) && !defined(__sgi) && \ - (!defined(__FreeBSD_version) || (__FreeBSD_version < 300000)) -# if (BSD < 199306) -int iplinit __P((void)); - -int -# else -void iplinit __P((void)); - -void -# endif -iplinit() -{ - -# if defined(__NetBSD__) || defined(__OpenBSD__) - if (ipl_enable() != 0) -# else - if (iplattach() != 0) -# endif - { - printf("IP Filter failed to attach\n"); - } - ip_init(); -} -# endif /* ! __NetBSD__ */ - - -/* - * Return the length of the entire mbuf. - */ -size_t mbufchainlen(m0) -register struct mbuf *m0; -{ -#if BSD >= 199306 - return m0->m_pkthdr.len; -#else - register size_t len = 0; - - for (; m0; m0 = m0->m_next) - len += m0->m_len; - return len; -#endif -} - - -int ipfr_fastroute(m0, mpp, fin, fdp) -struct mbuf *m0, **mpp; -fr_info_t *fin; -frdest_t *fdp; -{ - register struct ip *ip, *mhip; - register struct mbuf *m = m0; - register struct route *ro; - int len, off, error = 0, hlen, code, sout; - struct ifnet *ifp, *sifp; - struct sockaddr_in *dst; - struct route iproute; - frentry_t *fr; - - ip = NULL; - ro = NULL; - ifp = NULL; - ro = &iproute; - ro->ro_rt = NULL; - -#ifdef USE_INET6 - if (fin->fin_v == 6) { - error = ipfr_fastroute6(m0, mpp, fin, fdp); - if (error != 0) - goto bad; - goto done; - } -#else - if (fin->fin_v == 6) - goto bad; -#endif - -#ifdef M_WRITABLE - /* - * HOT FIX/KLUDGE: - * - * If the mbuf we're about to send is not writable (because of - * a cluster reference, for example) we'll need to make a copy - * of it since this routine modifies the contents. - * - * If you have non-crappy network hardware that can transmit data - * from the mbuf, rather than making a copy, this is gonna be a - * problem. - */ - if (M_WRITABLE(m) == 0) { - if ((m0 = m_dup(m, M_DONTWAIT)) != NULL) { - m_freem(*mpp); - *mpp = m0; - m = m0; - } else { - error = ENOBUFS; - m_freem(*mpp); - goto done; - } - } -#endif - - hlen = fin->fin_hlen; - ip = mtod(m0, struct ip *); - -#if defined(__NetBSD__) && defined(M_CSUM_IPv4) - /* - * Clear any in-bound checksum flags for this packet. - */ -# if (__NetBSD_Version__ > 105009999) - m0->m_pkthdr.csum_flags = 0; -# else - m0->m_pkthdr.csuminfo = 0; -# endif -#endif /* __NetBSD__ && M_CSUM_IPv4 */ - - /* - * Route packet. - */ -#if (defined(IRIX) && (IRIX >= 605)) - ROUTE_RDLOCK(); -#endif - bzero((caddr_t)ro, sizeof (*ro)); - dst = (struct sockaddr_in *)&ro->ro_dst; - dst->sin_family = AF_INET; - dst->sin_addr = ip->ip_dst; - - fr = fin->fin_fr; - if (fdp != NULL) - ifp = fdp->fd_ifp; - else - ifp = fin->fin_ifp; - - /* - * In case we're here due to "to " being used with "keep state", - * check that we're going in the correct direction. - */ - if ((fr != NULL) && (fin->fin_rev != 0)) { - if ((ifp != NULL) && (fdp == &fr->fr_tif)) { -# if (defined(IRIX) && (IRIX >= 605)) - ROUTE_UNLOCK(); -# endif - return 0; - } - } else if (fdp != NULL) { - if (fdp->fd_ip.s_addr != 0) - dst->sin_addr = fdp->fd_ip; - } - -# if BSD >= 199306 - dst->sin_len = sizeof(*dst); -# endif -# if (BSD >= 199306) && !defined(__NetBSD__) && !defined(__bsdi__) && \ - !defined(__OpenBSD__) -# ifdef RTF_CLONING - rtalloc_ign(ro, RTF_CLONING); -# else - rtalloc_ign(ro, RTF_PRCLONING); -# endif -# else - rtalloc(ro); -# endif - - if (!ifp) { - if (!fr || !(fr->fr_flags & FR_FASTROUTE)) { - error = -2; -# if (defined(IRIX) && (IRIX >= 605)) - ROUTE_UNLOCK(); -# endif - goto bad; - } - } - - if ((ifp == NULL) && (ro->ro_rt != NULL)) - ifp = ro->ro_rt->rt_ifp; - - if ((ro->ro_rt == NULL) || (ifp == NULL)) { - if (in_localaddr(ip->ip_dst)) - error = EHOSTUNREACH; - else - error = ENETUNREACH; -# if (defined(IRIX) && (IRIX >= 605)) - ROUTE_UNLOCK(); -# endif - goto bad; - } - - if (ro->ro_rt->rt_flags & RTF_GATEWAY) { -#if (BSD >= 199306) || (defined(IRIX) && (IRIX >= 605)) - dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway; -#else - dst = (struct sockaddr_in *)&ro->ro_rt->rt_gateway; -#endif - } - ro->ro_rt->rt_use++; - -#if (defined(IRIX) && (IRIX > 602)) - ROUTE_UNLOCK(); -#endif - - /* - * For input packets which are being "fastrouted", they won't - * go back through output filtering and miss their chance to get - * NAT'd and counted. - */ - if (fin->fin_out == 0) { - sifp = fin->fin_ifp; - sout = fin->fin_out; - fin->fin_ifp = ifp; - fin->fin_out = 1; - if ((fin->fin_fr = ipacct[1][fr_active]) && - (fr_scanlist(FR_NOMATCH, ip, fin, m) & FR_ACCOUNT)) { - ATOMIC_INCL(frstats[1].fr_acct); - } - fin->fin_fr = NULL; - if (!fr || !(fr->fr_flags & FR_RETMASK)) - (void) fr_checkstate(ip, fin); - - switch (ip_natout(ip, fin)) - { - case 0 : - break; - case 1 : - ip->ip_sum = 0; - break; - case -1 : - error = EINVAL; - goto done; - break; - } - - fin->fin_ifp = sifp; - fin->fin_out = sout; - } else - ip->ip_sum = 0; - - /* - * If small enough for interface, can just send directly. - */ - if (ip->ip_len <= ifp->if_mtu) { -# ifndef sparc -# if (!defined(__FreeBSD__) && !(_BSDI_VERSION >= 199510)) && \ - !(__NetBSD_Version__ >= 105110000) - ip->ip_id = htons(ip->ip_id); -# endif - ip->ip_len = htons(ip->ip_len); - ip->ip_off = htons(ip->ip_off); -# endif -# if defined(__NetBSD__) && defined(M_CSUM_IPv4) -# if (__NetBSD_Version__ > 105009999) - if (ifp->if_csum_flags_tx & IFCAP_CSUM_IPv4) - m->m_pkthdr.csum_flags |= M_CSUM_IPv4; - else if (ip->ip_sum == 0) - ip->ip_sum = in_cksum(m, hlen); -# else - if (ifp->if_capabilities & IFCAP_CSUM_IPv4) - m->m_pkthdr.csuminfo |= M_CSUM_IPv4; - else if (ip->ip_sum == 0) - ip->ip_sum = in_cksum(m, hlen); -# endif -# else - if (!ip->ip_sum) - ip->ip_sum = in_cksum(m, hlen); -# endif /* __NetBSD__ && M_CSUM_IPv4 */ -# if (BSD >= 199306) || (defined(IRIX) && (IRIX >= 605)) -# ifdef IRIX - IFNET_UPPERLOCK(ifp); -# endif - error = (*ifp->if_output)(ifp, m, (struct sockaddr *)dst, - ro->ro_rt); -# ifdef IRIX - IFNET_UPPERUNLOCK(ifp); -# endif -# else - error = (*ifp->if_output)(ifp, m, (struct sockaddr *)dst); -# endif - goto done; - } - - /* - * Too large for interface; fragment if possible. - * Must be able to put at least 8 bytes per fragment. - */ - if (ip->ip_off & IP_DF) { - error = EMSGSIZE; - goto bad; - } - len = (ifp->if_mtu - hlen) &~ 7; - if (len < 8) { - error = EMSGSIZE; - goto bad; - } - - { - int mhlen, firstlen = len; - struct mbuf **mnext = &m->m_act; - - /* - * Loop through length of segment after first fragment, - * make new header and copy data of each part and link onto chain. - */ - m0 = m; - mhlen = sizeof (struct ip); - for (off = hlen + len; off < ip->ip_len; off += len) { -# ifdef MGETHDR - MGETHDR(m, M_DONTWAIT, MT_HEADER); -# else - MGET(m, M_DONTWAIT, MT_HEADER); -# endif - if (m == 0) { - error = ENOBUFS; - goto bad; - } -# if BSD >= 199306 - m->m_data += max_linkhdr; -# else - m->m_off = MMAXOFF - hlen; -# endif - mhip = mtod(m, struct ip *); - bcopy((char *)ip, (char *)mhip, sizeof(*ip)); - if (hlen > sizeof (struct ip)) { - mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip); - mhip->ip_hl = mhlen >> 2; - } - m->m_len = mhlen; - mhip->ip_off = ((off - hlen) >> 3) + (ip->ip_off & ~IP_MF); - if (ip->ip_off & IP_MF) - mhip->ip_off |= IP_MF; - if (off + len >= ip->ip_len) - len = ip->ip_len - off; - else - mhip->ip_off |= IP_MF; - mhip->ip_len = htons((u_short)(len + mhlen)); - m->m_next = m_copy(m0, off, len); - if (m->m_next == 0) { - error = ENOBUFS; /* ??? */ - goto sendorfree; - } -# if BSD >= 199306 - m->m_pkthdr.len = mhlen + len; - m->m_pkthdr.rcvif = NULL; -# endif - mhip->ip_off = htons((u_short)mhip->ip_off); - mhip->ip_sum = 0; - mhip->ip_sum = in_cksum(m, mhlen); - *mnext = m; - mnext = &m->m_act; - } - /* - * Update first fragment by trimming what's been copied out - * and updating header, then send each fragment (in order). - */ - m_adj(m0, hlen + firstlen - ip->ip_len); - ip->ip_len = htons((u_short)(hlen + firstlen)); - ip->ip_off = htons((u_short)(ip->ip_off | IP_MF)); - ip->ip_sum = 0; - ip->ip_sum = in_cksum(m0, hlen); -sendorfree: - for (m = m0; m; m = m0) { - m0 = m->m_act; - m->m_act = 0; - if (error == 0) -# if (BSD >= 199306) || (defined(IRIX) && (IRIX >= 605)) - error = (*ifp->if_output)(ifp, m, - (struct sockaddr *)dst, ro->ro_rt); -# else - error = (*ifp->if_output)(ifp, m, - (struct sockaddr *)dst); -# endif - else - m_freem(m); - } - } -done: - if (!error) - ipl_frouteok[0]++; - else - ipl_frouteok[1]++; - - if (ro->ro_rt != NULL) { - RTFREE(ro->ro_rt); - } - *mpp = NULL; - return error; -bad: - if ((error == EMSGSIZE) && (fin->fin_v == 4)) { - sifp = fin->fin_ifp; - code = fin->fin_icode; - fin->fin_icode = ICMP_UNREACH_NEEDFRAG; - fin->fin_ifp = ifp; - (void) send_icmp_err(ip, ICMP_UNREACH, fin, 1); - fin->fin_ifp = sifp; - fin->fin_icode = code; - } - m_freem(m); - goto done; -} - - -/* - * Return true or false depending on whether the route to the - * given IP address uses the same interface as the one passed. - */ -int fr_verifysrc(ipa, ifp) -struct in_addr ipa; -void *ifp; -{ - struct sockaddr_in *dst; - struct route iproute; - - bzero((char *)&iproute, sizeof(iproute)); - dst = (struct sockaddr_in *)&iproute.ro_dst; -# if (BSD >= 199306) - dst->sin_len = sizeof(*dst); -# endif - dst->sin_family = AF_INET; - dst->sin_addr = ipa; -# if (BSD >= 199306) && !defined(__NetBSD__) && !defined(__bsdi__) && \ - !defined(__OpenBSD__) -# ifdef RTF_CLONING - rtalloc_ign(&iproute, RTF_CLONING); -# else - rtalloc_ign(&iproute, RTF_PRCLONING); -# endif -# else - rtalloc(&iproute); -# endif - if (iproute.ro_rt == NULL) - return 0; - return (ifp == iproute.ro_rt->rt_ifp); -} - - -# ifdef USE_GETIFNAME -char * -get_ifname(ifp) -struct ifnet *ifp; -{ - static char workbuf[64]; - - sprintf(workbuf, "%s%d", ifp->if_name, ifp->if_unit); - return workbuf; -} -# endif - - -# if defined(USE_INET6) -/* - * This is the IPv6 specific fastroute code. It doesn't clean up the mbuf's - * or ensure that it is an IPv6 packet that is being forwarded, those are - * expected to be done by the called (ipfr_fastroute). - */ -static int ipfr_fastroute6(m0, mpp, fin, fdp) -struct mbuf *m0, **mpp; -fr_info_t *fin; -frdest_t *fdp; -{ - struct route_in6 ip6route; - struct sockaddr_in6 *dst6; - struct route_in6 *ro; - struct ifnet *ifp; - frentry_t *fr; -#if defined(OpenBSD) && (OpenBSD >= 200211) - struct route_in6 *ro_pmtu = NULL; - struct in6_addr finaldst; - ip6_t *ip6; -#endif - u_long mtu; - int error; - - ro = &ip6route; - fr = fin->fin_fr; - bzero((caddr_t)ro, sizeof(*ro)); - dst6 = (struct sockaddr_in6 *)&ro->ro_dst; - dst6->sin6_family = AF_INET6; - dst6->sin6_len = sizeof(struct sockaddr_in6); - dst6->sin6_addr = fin->fin_fi.fi_dst.in6; - - if (fdp != NULL) - ifp = fdp->fd_ifp; - else - ifp = fin->fin_ifp; - - if ((fr != NULL) && (fin->fin_rev != 0)) { - if ((ifp != NULL) && (fdp == &fr->fr_tif)) - return 0; - } else if (fdp != NULL) { - if (IP6_NOTZERO(&fdp->fd_ip6)) - dst6->sin6_addr = fdp->fd_ip6.in6; - } - if (ifp == NULL) - return -2; - -#if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) - /* KAME */ - if (IN6_IS_ADDR_LINKLOCAL(&dst6->sin6_addr)) - dst6->sin6_addr.s6_addr16[1] = htons(ifp->if_index); -#endif - rtalloc((struct route *)ro); - - if ((ifp == NULL) && (ro->ro_rt != NULL)) - ifp = ro->ro_rt->rt_ifp; - - if ((ro->ro_rt == NULL) || (ifp == NULL) || - (ifp != ro->ro_rt->rt_ifp)) { - error = EHOSTUNREACH; - } else { - if (ro->ro_rt->rt_flags & RTF_GATEWAY) - dst6 = (struct sockaddr_in6 *)ro->ro_rt->rt_gateway; - ro->ro_rt->rt_use++; - -#if defined(OpenBSD) && (OpenBSD >= 200211) - ip6 = mtod(m0, ip6_t *); - ro_pmtu = ro; - finaldst = ip6->ip6_dst; - error = ip6_getpmtu(ro_pmtu, ro, ifp, &finaldst, &mtu); - if (error == 0) { -#else -# ifdef IN6_LINKMTU - mtu = IN6_LINKMTU(ifp); -# else -# ifdef ND_IFINFO - mtu = ND_IFINFO(ifp)->linkmtu; -# else - mtu = nd_ifinfo[ifp->if_index].linkmtu; -# endif -# endif -#endif - if (m0->m_pkthdr.len <= mtu) - error = nd6_output(ifp, fin->fin_ifp, m0, - dst6, ro->ro_rt); - else - error = EMSGSIZE; -#if defined(OpenBSD) && (OpenBSD >= 200211) - } -#endif - } - - if (ro->ro_rt != NULL) { - RTFREE(ro->ro_rt); - } - return error; -} -# endif -#else /* #ifdef _KERNEL */ - - -# if defined(__sgi) && (IRIX < 605) -static int no_output __P((struct ifnet *ifp, struct mbuf *m, - struct sockaddr *s)) -# else -static int no_output __P((struct ifnet *ifp, struct mbuf *m, - struct sockaddr *s, struct rtentry *rt)) -# endif -{ - return 0; -} - - -# ifdef __STDC__ -# if defined(__sgi) && (IRIX < 605) -static int write_output __P((struct ifnet *ifp, struct mbuf *m, - struct sockaddr *s)) -# else -static int write_output __P((struct ifnet *ifp, struct mbuf *m, - struct sockaddr *s, struct rtentry *rt)) -# endif -{ - ip_t *ip = (ip_t *)m; -# else -static int write_output(ifp, ip) -struct ifnet *ifp; -ip_t *ip; -{ -# endif - char fname[32]; - int fd; - -# if (defined(NetBSD) && (NetBSD <= 1991011) && (NetBSD >= 199606)) || \ - (defined(OpenBSD) && (OpenBSD >= 199603)) || \ - (defined(__FreeBSD__) && (__FreeBSD_version >= 501113)) - sprintf(fname, "%s", ifp->if_xname); -# else - sprintf(fname, "%s%d", ifp->if_name, ifp->if_unit); -# endif - fd = open(fname, O_WRONLY|O_APPEND); - if (fd == -1) { - perror("open"); - return -1; - } - write(fd, (char *)ip, ntohs(ip->ip_len)); - close(fd); - return 0; -} - - -char *get_ifname(ifp) -struct ifnet *ifp; -{ -# if (defined(NetBSD) && (NetBSD <= 1991011) && (NetBSD >= 199606)) || \ - (defined(OpenBSD) && (OpenBSD >= 199603)) || \ - (defined(__FreeBSD__) && (__FreeBSD_version >= 501113)) - return ifp->if_xname; -# else - static char fullifname[LIFNAMSIZ]; - - sprintf(fullifname, "%s%d", ifp->if_name, ifp->if_unit); - return fullifname; -# endif -} - - -struct ifnet *get_unit(ifname, v) -char *ifname; -int v; -{ - struct ifnet *ifp, **ifa, **old_ifneta; - - for (ifa = ifneta; ifa && (ifp = *ifa); ifa++) { -# if (defined(NetBSD) && (NetBSD <= 1991011) && (NetBSD >= 199606)) || \ - (defined(OpenBSD) && (OpenBSD >= 199603)) || \ - (defined(__FreeBSD__) && (__FreeBSD_version >= 501113)) - if (!strncmp(ifname, ifp->if_xname, sizeof(ifp->if_xname))) -# else - char fullname[LIFNAMSIZ]; - - sprintf(fullname, "%s%d", ifp->if_name, ifp->if_unit); - if (!strcmp(ifname, fullname)) -# endif - return ifp; - } - - if (!ifneta) { - ifneta = (struct ifnet **)malloc(sizeof(ifp) * 2); - if (!ifneta) - return NULL; - ifneta[1] = NULL; - ifneta[0] = (struct ifnet *)calloc(1, sizeof(*ifp)); - if (!ifneta[0]) { - free(ifneta); - return NULL; - } - nifs = 1; - } else { - old_ifneta = ifneta; - nifs++; - ifneta = (struct ifnet **)realloc(ifneta, - (nifs + 1) * sizeof(*ifa)); - if (!ifneta) { - free(old_ifneta); - nifs = 0; - return NULL; - } - ifneta[nifs] = NULL; - ifneta[nifs - 1] = (struct ifnet *)malloc(sizeof(*ifp)); - if (!ifneta[nifs - 1]) { - nifs--; - return NULL; - } - } - ifp = ifneta[nifs - 1]; - -# if (defined(NetBSD) && (NetBSD <= 1991011) && (NetBSD >= 199606)) || \ - (defined(OpenBSD) && (OpenBSD >= 199603)) || \ - (defined(__FreeBSD__) && (__FreeBSD_version >= 501113)) - strncpy(ifp->if_xname, ifname, sizeof(ifp->if_xname)); -# else - ifp->if_name = strdup(ifname); - - ifname = ifp->if_name; - while (*ifname && !isdigit(*ifname)) - ifname++; - if (*ifname && isdigit(*ifname)) { - ifp->if_unit = atoi(ifname); - *ifname = '\0'; - } else - ifp->if_unit = -1; -# endif - ifp->if_output = no_output; - return ifp; -} - - - -void init_ifp() -{ - struct ifnet *ifp, **ifa; - char fname[32]; - int fd; - -# if (defined(NetBSD) && (NetBSD <= 1991011) && (NetBSD >= 199606)) || \ - (defined(OpenBSD) && (OpenBSD >= 199603)) || \ - (defined(__FreeBSD__) && (__FreeBSD_version >= 501113)) - for (ifa = ifneta; ifa && (ifp = *ifa); ifa++) { - ifp->if_output = write_output; - sprintf(fname, "/tmp/%s", ifp->if_xname); - fd = open(fname, O_WRONLY|O_CREAT|O_EXCL|O_TRUNC, 0600); - if (fd == -1) - perror("open"); - else - close(fd); - } -# else - - for (ifa = ifneta; ifa && (ifp = *ifa); ifa++) { - ifp->if_output = write_output; - sprintf(fname, "/tmp/%s%d", ifp->if_name, ifp->if_unit); - fd = open(fname, O_WRONLY|O_CREAT|O_EXCL|O_TRUNC, 0600); - if (fd == -1) - perror("open"); - else - close(fd); - } -# endif -} - - -int send_reset(ip, fin) -ip_t *ip; -fr_info_t *fin; -{ - verbose("- TCP RST sent\n"); - return 0; -} - - -int send_icmp_err(ip, code, fin, dst) -ip_t *ip; -int code; -fr_info_t *fin; -int dst; -{ - verbose("- ICMP UNREACHABLE sent\n"); - return 0; -} - - -void frsync() -{ - return; -} - -void m_copydata(m, off, len, cp) -mb_t *m; -int off, len; -caddr_t cp; -{ - bcopy((char *)m + off, cp, len); -} - - -int ipfuiomove(buf, len, rwflag, uio) -caddr_t buf; -int len, rwflag; -struct uio *uio; -{ - int left, ioc, num, offset; - struct iovec *io; - char *start; - - if (rwflag == UIO_READ) { - left = len; - ioc = 0; - - offset = uio->uio_offset; - - while ((left > 0) && (ioc < uio->uio_iovcnt)) { - io = uio->uio_iov + ioc; - num = io->iov_len; - if (num > left) - num = left; - start = (char *)io->iov_base + offset; - if (start > (char *)io->iov_base + io->iov_len) { - offset -= io->iov_len; - ioc++; - continue; - } - bcopy(buf, start, num); - uio->uio_resid -= num; - uio->uio_offset += num; - left -= num; - if (left > 0) - ioc++; - } - if (left > 0) - return EFAULT; - } - return 0; -} -#endif /* _KERNEL */ diff --git a/sys/contrib/ipfilter/netinet/ip_fil.h b/sys/contrib/ipfilter/netinet/ip_fil.h index aaca0dc..d2bfc07 100644 --- a/sys/contrib/ipfilter/netinet/ip_fil.h +++ b/sys/contrib/ipfilter/netinet/ip_fil.h @@ -1,32 +1,22 @@ +/* $FreeBSD$ */ + /* - * Copyright (C) 1993-2002 by Darren Reed. + * Copyright (C) 1993-2001, 2003 by Darren Reed. * * See the IPFILTER.LICENCE file for details on licencing. * * @(#)ip_fil.h 1.35 6/5/96 - * $Id: ip_fil.h,v 2.29.2.4 2000/11/12 11:54:53 darrenr Exp $ * $FreeBSD$ + * Id: ip_fil.h,v 2.170.2.18 2005/03/28 10:47:52 darrenr Exp */ #ifndef __IP_FIL_H__ #define __IP_FIL_H__ -/* - * Pathnames for various IP Filter control devices. Used by LKM - * and userland, so defined here. - */ -#define IPNAT_NAME "/dev/ipnat" -#define IPSTATE_NAME "/dev/ipstate" -#define IPAUTH_NAME "/dev/ipauth" - #ifndef SOLARIS # define SOLARIS (defined(sun) && (defined(__svr4__) || defined(__SVR4))) #endif -#if defined(KERNEL) && !defined(_KERNEL) -# define _KERNEL -#endif - #ifndef __P # ifdef __STDC__ # define __P(x) x @@ -35,137 +25,321 @@ # endif #endif -#ifndef offsetof -# define offsetof(t,m) (int)((&((t *)0L)->m)) -#endif - #if defined(__STDC__) || defined(__GNUC__) -# define SIOCADAFR _IOW('r', 60, struct frentry *) -# define SIOCRMAFR _IOW('r', 61, struct frentry *) +# define SIOCADAFR _IOW('r', 60, struct ipfobj) +# define SIOCRMAFR _IOW('r', 61, struct ipfobj) # define SIOCSETFF _IOW('r', 62, u_int) # define SIOCGETFF _IOR('r', 63, u_int) -# define SIOCGETFS _IOWR('r', 64, struct friostat *) +# define SIOCGETFS _IOWR('r', 64, struct ipfobj) # define SIOCIPFFL _IOWR('r', 65, int) # define SIOCIPFFB _IOR('r', 66, int) -# define SIOCADIFR _IOW('r', 67, struct frentry *) -# define SIOCRMIFR _IOW('r', 68, struct frentry *) +# define SIOCADIFR _IOW('r', 67, struct ipfobj) +# define SIOCRMIFR _IOW('r', 68, struct ipfobj) # define SIOCSWAPA _IOR('r', 69, u_int) -# define SIOCINAFR _IOW('r', 70, struct frentry *) -# define SIOCINIFR _IOW('r', 71, struct frentry *) +# define SIOCINAFR _IOW('r', 70, struct ipfobj) +# define SIOCINIFR _IOW('r', 71, struct ipfobj) # define SIOCFRENB _IOW('r', 72, u_int) # define SIOCFRSYN _IOW('r', 73, u_int) -# define SIOCFRZST _IOWR('r', 74, struct friostat *) -# define SIOCZRLST _IOWR('r', 75, struct frentry *) -# define SIOCAUTHW _IOWR('r', 76, struct frauth *) -# define SIOCAUTHR _IOWR('r', 77, struct frauth *) -# define SIOCATHST _IOWR('r', 78, struct fr_authstat *) +# define SIOCFRZST _IOWR('r', 74, struct ipfobj) +# define SIOCZRLST _IOWR('r', 75, struct ipfobj) +# define SIOCAUTHW _IOWR('r', 76, struct ipfobj) +# define SIOCAUTHR _IOWR('r', 77, struct ipfobj) +# define SIOCATHST _IOWR('r', 78, struct ipfobj) # define SIOCSTLCK _IOWR('r', 79, u_int) -# define SIOCSTPUT _IOWR('r', 80, struct ipstate_save *) -# define SIOCSTGET _IOWR('r', 81, struct ipstate_save *) -# define SIOCSTGSZ _IOWR('r', 82, struct natget) -# define SIOCGFRST _IOWR('r', 83, struct ipfrstat *) -# define SIOCIPFL6 _IOWR('r', 84, int) +# define SIOCSTPUT _IOWR('r', 80, struct ipfobj) +# define SIOCSTGET _IOWR('r', 81, struct ipfobj) +# define SIOCSTGSZ _IOWR('r', 82, struct ipfobj) +# define SIOCGFRST _IOWR('r', 83, struct ipfobj) +# define SIOCSETLG _IOWR('r', 84, int) +# define SIOCGETLG _IOWR('r', 85, int) +# define SIOCFUNCL _IOWR('r', 86, struct ipfunc_resolve) +# define SIOCIPFGETNEXT _IOWR('r', 87, struct ipfobj) +# define SIOCIPFGET _IOWR('r', 88, struct ipfobj) +# define SIOCIPFSET _IOWR('r', 89, struct ipfobj) +# define SIOCIPFL6 _IOWR('r', 90, int) #else -# define SIOCADAFR _IOW(r, 60, struct frentry *) -# define SIOCRMAFR _IOW(r, 61, struct frentry *) +# define SIOCADAFR _IOW(r, 60, struct ipfobj) +# define SIOCRMAFR _IOW(r, 61, struct ipfobj) # define SIOCSETFF _IOW(r, 62, u_int) # define SIOCGETFF _IOR(r, 63, u_int) -# define SIOCGETFS _IOWR(r, 64, struct friostat *) +# define SIOCGETFS _IOWR(r, 64, struct ipfobj) # define SIOCIPFFL _IOWR(r, 65, int) # define SIOCIPFFB _IOR(r, 66, int) -# define SIOCADIFR _IOW(r, 67, struct frentry *) -# define SIOCRMIFR _IOW(r, 68, struct frentry *) +# define SIOCADIFR _IOW(r, 67, struct ipfobj) +# define SIOCRMIFR _IOW(r, 68, struct ipfobj) # define SIOCSWAPA _IOR(r, 69, u_int) -# define SIOCINAFR _IOW(r, 70, struct frentry *) -# define SIOCINIFR _IOW(r, 71, struct frentry *) +# define SIOCINAFR _IOW(r, 70, struct ipfobj) +# define SIOCINIFR _IOW(r, 71, struct ipfobj) # define SIOCFRENB _IOW(r, 72, u_int) # define SIOCFRSYN _IOW(r, 73, u_int) -# define SIOCFRZST _IOWR(r, 74, struct friostat *) -# define SIOCZRLST _IOWR(r, 75, struct frentry *) -# define SIOCAUTHW _IOWR(r, 76, struct frauth *) -# define SIOCAUTHR _IOWR(r, 77, struct frauth *) -# define SIOCATHST _IOWR(r, 78, struct fr_authstat *) +# define SIOCFRZST _IOWR(r, 74, struct ipfobj) +# define SIOCZRLST _IOWR(r, 75, struct ipfobj) +# define SIOCAUTHW _IOWR(r, 76, struct ipfobj) +# define SIOCAUTHR _IOWR(r, 77, struct ipfobj) +# define SIOCATHST _IOWR(r, 78, struct ipfobj) # define SIOCSTLCK _IOWR(r, 79, u_int) -# define SIOCSTPUT _IOWR(r, 80, struct ipstate_save *) -# define SIOCSTGET _IOWR(r, 81, struct ipstate_save *) -# define SIOCSTGSZ _IOWR(r, 82, struct natget) -# define SIOCGFRST _IOWR(r, 83, struct ipfrstat *) -# define SIOCIPFL6 _IOWR(r, 84, int) +# define SIOCSTPUT _IOWR(r, 80, struct ipfobj) +# define SIOCSTGET _IOWR(r, 81, struct ipfobj) +# define SIOCSTGSZ _IOWR(r, 82, struct ipfobj) +# define SIOCGFRST _IOWR(r, 83, struct ipfobj) +# define SIOCSETLG _IOWR(r, 84, int) +# define SIOCGETLG _IOWR(r, 85, int) +# define SIOCFUNCL _IOWR(r, 86, struct ipfunc_resolve) +# define SIOCIPFGETNEXT _IOWR(r, 87, struct ipfobj) +# define SIOCIPFGET _IOWR(r, 88, struct ipfobj) +# define SIOCIPFSET _IOWR(r, 89, struct ipfobj) +# define SIOCIPFL6 _IOWR(r, 90, int) #endif #define SIOCADDFR SIOCADAFR #define SIOCDELFR SIOCRMAFR #define SIOCINSFR SIOCINAFR +struct ipscan; +struct ifnet; + + +typedef int (* lookupfunc_t) __P((void *, int, void *)); + +/* + * i6addr is used as a container for both IPv4 and IPv6 addresses, as well + * as other types of objects, depending on its qualifier. + */ +#ifdef USE_INET6 +typedef union i6addr { + u_32_t i6[4]; + struct in_addr in4; + struct in6_addr in6; + void *vptr[2]; + lookupfunc_t lptr[2]; +} i6addr_t; +#else +typedef union i6addr { + u_32_t i6[4]; + struct in_addr in4; + void *vptr[2]; + lookupfunc_t lptr[2]; +} i6addr_t; +#endif + +#define in4_addr in4.s_addr +#define iplookupnum i6[0] +#define iplookuptype i6[1] +/* + * NOTE: These DO overlap the above on 64bit systems and this IS recognised. + */ +#define iplookupptr vptr[0] +#define iplookupfunc lptr[1] + +#define I60(x) (((i6addr_t *)(x))->i6[0]) +#define I61(x) (((i6addr_t *)(x))->i6[1]) +#define I62(x) (((i6addr_t *)(x))->i6[2]) +#define I63(x) (((i6addr_t *)(x))->i6[3]) +#define HI60(x) ntohl(((i6addr_t *)(x))->i6[0]) +#define HI61(x) ntohl(((i6addr_t *)(x))->i6[1]) +#define HI62(x) ntohl(((i6addr_t *)(x))->i6[2]) +#define HI63(x) ntohl(((i6addr_t *)(x))->i6[3]) + +#define IP6_EQ(a,b) ((I63(a) == I63(b)) && (I62(a) == I62(b)) && \ + (I61(a) == I61(b)) && (I60(a) == I60(b))) +#define IP6_NEQ(a,b) ((I63(a) != I63(b)) || (I62(a) != I62(b)) || \ + (I61(a) != I61(b)) || (I60(a) != I60(b))) +#define IP6_ISZERO(a) ((I60(a) | I61(a) | I62(a) | I63(a)) == 0) +#define IP6_NOTZERO(a) ((I60(a) | I61(a) | I62(a) | I63(a)) != 0) +#define IP6_GT(a,b) (HI60(a) > HI60(b) || (HI60(a) == HI60(b) && \ + (HI61(a) > HI61(b) || (HI61(a) == HI61(b) && \ + (HI62(a) > HI62(b) || (HI62(a) == HI62(b) && \ + HI63(a) > HI63(b))))))) +#define IP6_LT(a,b) (HI60(a) < HI60(b) || (HI60(a) == HI60(b) && \ + (HI61(a) < HI61(b) || (HI61(a) == HI61(b) && \ + (HI62(a) < HI62(b) || (HI62(a) == HI62(b) && \ + HI63(a) < HI63(b))))))) +#define NLADD(n,x) htonl(ntohl(n) + (x)) +#define IP6_INC(a) \ + { i6addr_t *_i6 = (i6addr_t *)(a); \ + _i6->i6[0] = NLADD(_i6->i6[0], 1); \ + if (_i6->i6[0] == 0) { \ + _i6->i6[0] = NLADD(_i6->i6[1], 1); \ + if (_i6->i6[1] == 0) { \ + _i6->i6[0] = NLADD(_i6->i6[2], 1); \ + if (_i6->i6[2] == 0) { \ + _i6->i6[0] = NLADD(_i6->i6[3], 1); \ + } \ + } \ + } \ + } +#define IP6_ADD(a,x,d) \ + { i6addr_t *_s = (i6addr_t *)(a); \ + i6addr_t *_d = (i6addr_t *)(d); \ + _d->i6[0] = NLADD(_s->i6[0], x); \ + if (ntohl(_d->i6[0]) < ntohl(_s->i6[0])) { \ + _d->i6[1] = NLADD(_d->i6[1], 1); \ + if (ntohl(_d->i6[1]) < ntohl(_s->i6[1])) { \ + _d->i6[2] = NLADD(_d->i6[2], 1); \ + if (ntohl(_d->i6[2]) < ntohl(_s->i6[2])) { \ + _d->i6[3] = NLADD(_d->i6[3], 1); \ + } \ + } \ + } \ + } +#define IP6_AND(a,b,d) { i6addr_t *_s1 = (i6addr_t *)(a); \ + i6addr_t *_s2 = (i6addr_t *)(d); \ + i6addr_t *_d = (i6addr_t *)(d); \ + _d->i6[0] = _s1->i6[0] & _s2->i6[0]; \ + _d->i6[1] = _s1->i6[1] & _s2->i6[1]; \ + _d->i6[2] = _s1->i6[2] & _s2->i6[2]; \ + _d->i6[3] = _s1->i6[3] & _s2->i6[3]; \ + } +#define IP6_MERGE(a,b,c) \ + { i6addr_t *_d, *_s1, *_s2; \ + _d = (i6addr_t *)(a); \ + _s1 = (i6addr_t *)(b); \ + _s2 = (i6addr_t *)(c); \ + _d->i6[0] |= _s1->i6[0] & ~_s2->i6[0]; \ + _d->i6[1] |= _s1->i6[1] & ~_s2->i6[1]; \ + _d->i6[2] |= _s1->i6[2] & ~_s2->i6[2]; \ + _d->i6[2] |= _s1->i6[3] & ~_s2->i6[3]; \ + } + + typedef struct fr_ip { u_32_t fi_v:4; /* IP version */ - u_32_t fi_fl:4; /* packet flags */ + u_32_t fi_xx:4; /* spare */ u_32_t fi_tos:8; /* IP packet TOS */ u_32_t fi_ttl:8; /* IP packet TTL */ u_32_t fi_p:8; /* IP packet protocol */ - union i6addr fi_src; /* source address from packet */ - union i6addr fi_dst; /* destination address from packet */ u_32_t fi_optmsk; /* bitmask composed from IP options */ + i6addr_t fi_src; /* source address from packet */ + i6addr_t fi_dst; /* destination address from packet */ u_short fi_secmsk; /* bitmask composed from IP security options */ u_short fi_auth; /* authentication code from IP sec. options */ + u_32_t fi_flx; /* packet flags */ + u_32_t fi_tcpmsk; /* TCP options set/reset */ + u_32_t fi_res1; /* RESERVED */ } fr_ip_t; -#define FI_OPTIONS (FF_OPTIONS >> 24) -#define FI_TCPUDP (FF_TCPUDP >> 24) /* TCP/UCP implied comparison*/ -#define FI_FRAG (FF_FRAG >> 24) -#define FI_SHORT (FF_SHORT >> 24) -#define FI_CMP (FI_OPTIONS|FI_TCPUDP|FI_SHORT) +/* + * For use in fi_flx + */ +#define FI_TCPUDP 0x0001 /* TCP/UCP implied comparison*/ +#define FI_OPTIONS 0x0002 +#define FI_FRAG 0x0004 +#define FI_SHORT 0x0008 +#define FI_NATED 0x0010 +#define FI_MULTICAST 0x0020 +#define FI_BROADCAST 0x0040 +#define FI_MBCAST 0x0080 +#define FI_STATE 0x0100 +#define FI_BADNAT 0x0200 +#define FI_BAD 0x0400 +#define FI_OOW 0x0800 /* Out of state window, else match */ +#define FI_ICMPERR 0x1000 +#define FI_FRAGBODY 0x2000 +#define FI_BADSRC 0x4000 +#define FI_LOWTTL 0x8000 +#define FI_CMP 0xcfe3 /* Not FI_FRAG,FI_NATED,FI_FRAGTAIL */ +#define FI_ICMPCMP 0x0003 /* Flags we can check for ICMP error packets */ +#define FI_WITH 0xeffe /* Not FI_TCPUDP */ +#define FI_V6EXTHDR 0x10000 +#define FI_COALESCE 0x20000 +#define FI_NOCKSUM 0x20000000 /* don't do a L4 checksum validation */ +#define FI_DONTCACHE 0x40000000 /* don't cache the result */ +#define FI_IGNORE 0x80000000 #define fi_saddr fi_src.in4.s_addr #define fi_daddr fi_dst.in4.s_addr +#define fi_srcnum fi_src.iplookupnum +#define fi_dstnum fi_dst.iplookupnum +#define fi_srctype fi_src.iplookuptype +#define fi_dsttype fi_dst.iplookuptype +#define fi_srcptr fi_src.iplookupptr +#define fi_dstptr fi_dst.iplookupptr +#define fi_srcfunc fi_src.iplookupfunc +#define fi_dstfunc fi_dst.iplookupfunc /* * These are both used by the state and NAT code to indicate that one port or * the other should be treated as a wildcard. + * NOTE: When updating, check bit masks in ip_state.h and update there too. */ -#define FI_W_SPORT 0x00000100 -#define FI_W_DPORT 0x00000200 -#define FI_WILDP (FI_W_SPORT|FI_W_DPORT) -#define FI_W_SADDR 0x00000400 -#define FI_W_DADDR 0x00000800 -#define FI_WILDA (FI_W_SADDR|FI_W_DADDR) -#define FI_NEWFR 0x00001000 /* Create a filter rule */ -#define FI_IGNOREPKT 0x00002000 /* Do not treat as a real packet */ -#define FI_NORULE 0x00004000 /* Not direct a result of a rule */ +#define SI_W_SPORT 0x00000100 +#define SI_W_DPORT 0x00000200 +#define SI_WILDP (SI_W_SPORT|SI_W_DPORT) +#define SI_W_SADDR 0x00000400 +#define SI_W_DADDR 0x00000800 +#define SI_WILDA (SI_W_SADDR|SI_W_DADDR) +#define SI_NEWFR 0x00001000 +#define SI_CLONE 0x00002000 +#define SI_CLONED 0x00004000 + typedef struct fr_info { void *fin_ifp; /* interface packet is `on' */ - struct fr_ip fin_fi; /* IP Packet summary */ - u_short fin_data[2]; /* TCP/UDP ports, ICMP code/type */ - u_int fin_out; /* in or out ? 1 == out, 0 == in */ + fr_ip_t fin_fi; /* IP Packet summary */ + union { + u_short fid_16[2]; /* TCP/UDP ports, ICMP code/type */ + u_32_t fid_32; + } fin_dat; + int fin_out; /* in or out ? 1 == out, 0 == in */ + int fin_rev; /* state only: 1 = reverse */ u_short fin_hlen; /* length of IP header in bytes */ - u_char fin_rev; /* state only: 1 = reverse */ u_char fin_tcpf; /* TCP header flags (SYN, ACK, etc) */ - u_int fin_icode; /* ICMP error to return */ + u_char fin_icode; /* ICMP error to return */ u_32_t fin_rule; /* rule # last matched */ - u_32_t fin_group; /* group number, -1 for none */ + char fin_group[FR_GROUPLEN]; /* group number, -1 for none */ struct frentry *fin_fr; /* last matching rule */ - char *fin_dp; /* start of data past IP header */ - u_short fin_plen; - u_short fin_off; - u_short fin_dlen; /* length of data portion of packet */ + void *fin_dp; /* start of data past IP header */ + int fin_dlen; /* length of data portion of packet */ + int fin_plen; + int fin_ipoff; /* # bytes from buffer start to hdr */ u_short fin_id; /* IP packet id field */ - u_int fin_misc; + u_short fin_off; + int fin_depth; /* Group nesting depth */ + int fin_error; /* Error code to return */ + void *fin_nat; + void *fin_state; + void *fin_nattag; + ip_t *fin_ip; mb_t **fin_mp; /* pointer to pointer to mbuf */ -#if SOLARIS - void *fin_qfm; /* pointer to mblk where pkt starts */ - void *fin_qif; + mb_t *fin_m; /* pointer to mbuf */ +#ifdef MENTAT + mb_t *fin_qfm; /* pointer to mblk where pkt starts */ + void *fin_qpi; +#endif +#ifdef __sgi + void *fin_hbuf; #endif } fr_info_t; #define fin_v fin_fi.fi_v #define fin_p fin_fi.fi_p -#define fin_saddr fin_fi.fi_saddr +#define fin_flx fin_fi.fi_flx +#define fin_optmsk fin_fi.fi_optmsk +#define fin_secmsk fin_fi.fi_secmsk +#define fin_auth fin_fi.fi_auth #define fin_src fin_fi.fi_src.in4 -#define fin_daddr fin_fi.fi_daddr +#define fin_src6 fin_fi.fi_src.in6 +#define fin_saddr fin_fi.fi_saddr #define fin_dst fin_fi.fi_dst.in4 -#define fin_fl fin_fi.fi_fl +#define fin_dst6 fin_fi.fi_dst.in6 +#define fin_daddr fin_fi.fi_daddr +#define fin_data fin_dat.fid_16 +#define fin_sport fin_dat.fid_16[0] +#define fin_dport fin_dat.fid_16[1] +#define fin_ports fin_dat.fid_32 + +#define IPF_IN 0 +#define IPF_OUT 1 + +typedef struct frentry *(*ipfunc_t) __P((fr_info_t *, u_32_t *)); +typedef int (*ipfuncinit_t) __P((struct frentry *)); + +typedef struct ipfunc_resolve { + char ipfu_name[32]; + ipfunc_t ipfu_addr; + ipfuncinit_t ipfu_init; +} ipfunc_resolve_t; /* * Size for compares on fr_info structures @@ -174,36 +348,66 @@ typedef struct fr_info { #define FI_LCSIZE offsetof(fr_info_t, fin_dp) /* - * For fin_misc + * Size for copying cache fr_info structure */ -#define FM_BADSTATE 0x00000001 +#define FI_COPYSIZE offsetof(fr_info_t, fin_dp) /* - * Size for copying cache fr_info structure + * Structure for holding IPFilter's tag information */ -#define FI_COPYSIZE offsetof(fr_info_t, fin_dp) +#define IPFTAG_LEN 16 +typedef struct { + union { + u_32_t iptu_num[4]; + char iptu_tag[IPFTAG_LEN]; + } ipt_un; + int ipt_not; +} ipftag_t; + +#define ipt_tag ipt_un.iptu_tag +#define ipt_num ipt_un.iptu_num + +/* + * This structure is used to hold information about the next hop for where + * to forward a packet. + */ typedef struct frdest { void *fd_ifp; - union i6addr fd_ip6; + i6addr_t fd_ip6; char fd_ifname[LIFNAMSIZ]; -#if SOLARIS - mb_t *fd_mp; /* cache resolver for to/dup-to */ -#endif } frdest_t; #define fd_ip fd_ip6.in4 +/* + * This structure holds information about a port comparison. + */ typedef struct frpcmp { int frp_cmp; /* data for port comparisons */ u_short frp_port; /* top port for <> and >< */ u_short frp_top; /* top port for <> and >< */ } frpcmp_t; +#define FR_NONE 0 +#define FR_EQUAL 1 +#define FR_NEQUAL 2 +#define FR_LESST 3 +#define FR_GREATERT 4 +#define FR_LESSTE 5 +#define FR_GREATERTE 6 +#define FR_OUTRANGE 7 +#define FR_INRANGE 8 +#define FR_INCRANGE 9 + +/* + * Structure containing all the relevant TCP things that can be checked in + * a filter rule. + */ typedef struct frtuc { - u_char ftu_tcpfm; /* tcp flags mask */ - u_char ftu_tcpf; /* tcp flags */ + u_char ftu_tcpfm; /* tcp flags mask */ + u_char ftu_tcpf; /* tcp flags */ frpcmp_t ftu_src; frpcmp_t ftu_dst; } frtuc_t; @@ -215,47 +419,123 @@ typedef struct frtuc { #define ftu_stop ftu_src.frp_top #define ftu_dtop ftu_dst.frp_top +#define FR_TCPFMAX 0x3f + +/* + * This structure makes up what is considered to be the IPFilter specific + * matching components of a filter rule, as opposed to the data structures + * used to define the result which are in frentry_t and not here. + */ +typedef struct fripf { + fr_ip_t fri_ip; + fr_ip_t fri_mip; /* mask structure */ + + u_short fri_icmpm; /* data for ICMP packets (mask) */ + u_short fri_icmp; + + frtuc_t fri_tuc; + int fri_satype; /* addres type */ + int fri_datype; /* addres type */ + int fri_sifpidx; /* doing dynamic addressing */ + int fri_difpidx; /* index into fr_ifps[] to use when */ +} fripf_t; + +#define fri_dstnum fri_ip.fi_dstnum +#define fri_srcnum fri_mip.fi_srcnum +#define fri_dstptr fri_ip.fi_dstptr +#define fri_srcptr fri_mip.fi_srcptr + +#define FRI_NORMAL 0 /* Normal address */ +#define FRI_DYNAMIC 1 /* dynamic address */ +#define FRI_LOOKUP 2 /* address is a pool # */ +#define FRI_RANGE 3 /* address/mask is a range */ +#define FRI_NETWORK 4 /* network address from if */ +#define FRI_BROADCAST 5 /* broadcast address from if */ +#define FRI_PEERADDR 6 /* Peer address for P-to-P */ +#define FRI_NETMASKED 7 /* network address with netmask from if */ + + +typedef struct frentry * (* frentfunc_t) __P((fr_info_t *)); + typedef struct frentry { + ipfmutex_t fr_lock; struct frentry *fr_next; - struct frentry *fr_grp; - int fr_ref; /* reference count - for grouping */ + struct frentry **fr_grp; + struct ipscan *fr_isc; void *fr_ifas[4]; + void *fr_ptr; /* for use with fr_arg */ + char *fr_comment; /* text comment for rule */ + int fr_ref; /* reference count - for grouping */ + int fr_statecnt; /* state count - for limit rules */ /* * These are only incremented when a packet matches this rule and * it is the last match */ U_QUAD_T fr_hits; U_QUAD_T fr_bytes; + /* - * Fields after this may not change whilst in the kernel. + * For PPS rate limiting */ - struct fr_ip fr_ip; - struct fr_ip fr_mip; /* mask structure */ - + struct timeval fr_lastpkt; + int fr_curpps; - u_short fr_icmpm; /* data for ICMP packets (mask) */ - u_short fr_icmp; + union { + void *fru_data; + caddr_t fru_caddr; + fripf_t *fru_ipf; + frentfunc_t fru_func; + } fr_dun; - u_int fr_age[2]; /* aging for state */ - frtuc_t fr_tuc; - u_32_t fr_group; /* group to which this rule belongs */ - u_32_t fr_grhead; /* group # which this rule starts */ + /* + * Fields after this may not change whilst in the kernel. + */ + ipfunc_t fr_func; /* call this function */ + int fr_dsize; + int fr_pps; + int fr_statemax; /* max reference count */ + int fr_flineno; /* line number from conf file */ + u_32_t fr_type; u_32_t fr_flags; /* per-rule flags && options (see below) */ - u_int fr_skip; /* # of rules to skip */ + u_32_t fr_logtag; /* user defined log tag # */ + u_32_t fr_collect; /* collection number */ + u_int fr_arg; /* misc. numeric arg for rule */ u_int fr_loglevel; /* syslog log facility + priority */ - int (*fr_func) __P((int, ip_t *, fr_info_t *)); /* call this function */ - int fr_sap; /* For solaris only */ + u_int fr_age[2]; /* non-TCP timeouts */ + u_char fr_v; u_char fr_icode; /* return ICMP code */ + char fr_group[FR_GROUPLEN]; /* group to which this rule belongs */ + char fr_grhead[FR_GROUPLEN]; /* group # which this rule starts */ + ipftag_t fr_nattag; char fr_ifnames[4][LIFNAMSIZ]; - struct frdest fr_tif; /* "to" interface */ - struct frdest fr_dif; /* duplicate packet interfaces */ + char fr_isctag[16]; + frdest_t fr_tifs[2]; /* "to"/"reply-to" interface */ + frdest_t fr_dif; /* duplicate packet interface */ + /* + * This must be last and will change after loaded into the kernel. + */ u_int fr_cksum; /* checksum on filter rules for performance */ } frentry_t; -#define fr_v fr_ip.fi_v +#define fr_caddr fr_dun.fru_caddr +#define fr_data fr_dun.fru_data +#define fr_dfunc fr_dun.fru_func +#define fr_ipf fr_dun.fru_ipf +#define fr_ip fr_ipf->fri_ip +#define fr_mip fr_ipf->fri_mip +#define fr_icmpm fr_ipf->fri_icmpm +#define fr_icmp fr_ipf->fri_icmp +#define fr_tuc fr_ipf->fri_tuc +#define fr_satype fr_ipf->fri_satype +#define fr_datype fr_ipf->fri_datype +#define fr_sifpidx fr_ipf->fri_sifpidx +#define fr_difpidx fr_ipf->fri_difpidx #define fr_proto fr_ip.fi_p +#define fr_mproto fr_mip.fi_p #define fr_ttl fr_ip.fi_ttl +#define fr_mttl fr_mip.fi_ttl #define fr_tos fr_ip.fi_tos +#define fr_mtos fr_mip.fi_tos #define fr_tcpfm fr_tuc.ftu_tcpfm #define fr_tcpf fr_tuc.ftu_tcpf #define fr_scmp fr_tuc.ftu_scmp @@ -265,58 +545,110 @@ typedef struct frentry { #define fr_stop fr_tuc.ftu_stop #define fr_dtop fr_tuc.ftu_dtop #define fr_dst fr_ip.fi_dst.in4 +#define fr_daddr fr_ip.fi_dst.in4.s_addr #define fr_src fr_ip.fi_src.in4 +#define fr_saddr fr_ip.fi_src.in4.s_addr #define fr_dmsk fr_mip.fi_dst.in4 +#define fr_dmask fr_mip.fi_dst.in4.s_addr #define fr_smsk fr_mip.fi_src.in4 +#define fr_smask fr_mip.fi_src.in4.s_addr +#define fr_dstnum fr_ip.fi_dstnum +#define fr_srcnum fr_ip.fi_srcnum +#define fr_dsttype fr_ip.fi_dsttype +#define fr_srctype fr_ip.fi_srctype +#define fr_dstptr fr_mip.fi_dstptr +#define fr_srcptr fr_mip.fi_srcptr +#define fr_dstfunc fr_mip.fi_dstfunc +#define fr_srcfunc fr_mip.fi_srcfunc +#define fr_optbits fr_ip.fi_optmsk +#define fr_optmask fr_mip.fi_optmsk +#define fr_secbits fr_ip.fi_secmsk +#define fr_secmask fr_mip.fi_secmsk +#define fr_authbits fr_ip.fi_auth +#define fr_authmask fr_mip.fi_auth +#define fr_flx fr_ip.fi_flx +#define fr_mflx fr_mip.fi_flx #define fr_ifname fr_ifnames[0] #define fr_oifname fr_ifnames[2] #define fr_ifa fr_ifas[0] #define fr_oifa fr_ifas[2] +#define fr_tif fr_tifs[0] +#define fr_rif fr_tifs[1] + +#define FR_NOLOGTAG 0 -#define FR_CMPSIZ (sizeof(struct frentry) - offsetof(frentry_t, fr_ip)) +#ifndef offsetof +#define offsetof(t,m) (int)((&((t *)0L)->m)) +#endif +#define FR_CMPSIZ (sizeof(struct frentry) - \ + offsetof(struct frentry, fr_func)) + +/* + * fr_type + */ +#define FR_T_NONE 0 +#define FR_T_IPF 1 /* IPF structures */ +#define FR_T_BPFOPC 2 /* BPF opcode */ +#define FR_T_CALLFUNC 3 /* callout to function in fr_func only */ +#define FR_T_COMPIPF 4 /* compiled C code */ +#define FR_T_BUILTIN 0x80000000 /* rule is in kernel space */ /* * fr_flags */ +#define FR_CALL 0x00000 /* call rule */ #define FR_BLOCK 0x00001 /* do not allow packet to pass */ #define FR_PASS 0x00002 /* allow packet to pass */ -#define FR_OUTQUE 0x00004 /* outgoing packets */ -#define FR_INQUE 0x00008 /* ingoing packets */ +#define FR_AUTH 0x00003 /* use authentication */ +#define FR_PREAUTH 0x00004 /* require preauthentication */ +#define FR_ACCOUNT 0x00005 /* Accounting rule */ +#define FR_SKIP 0x00006 /* skip rule */ +#define FR_DIVERT 0x00007 /* divert rule */ +#define FR_CMDMASK 0x0000f #define FR_LOG 0x00010 /* Log */ #define FR_LOGB 0x00011 /* Log-fail */ #define FR_LOGP 0x00012 /* Log-pass */ -#define FR_NOTSRCIP 0x00020 /* not the src IP# */ -#define FR_NOTDSTIP 0x00040 /* not the dst IP# */ -#define FR_RETRST 0x00080 /* Return TCP RST packet - reset connection */ -#define FR_RETICMP 0x00100 /* Return ICMP unreachable packet */ -#define FR_FAKEICMP 0x00180 /* Return ICMP unreachable with fake source */ -#define FR_NOMATCH 0x00200 /* no match occured */ -#define FR_ACCOUNT 0x00400 /* count packet bytes */ -#define FR_KEEPFRAG 0x00800 /* keep fragment information */ -#define FR_KEEPSTATE 0x01000 /* keep `connection' state information */ -#define FR_INACTIVE 0x02000 -#define FR_QUICK 0x04000 /* match & stop processing list */ -#define FR_FASTROUTE 0x08000 /* bypass normal routing */ -#define FR_CALLNOW 0x10000 /* call another function (fr_func) if matches */ -#define FR_DUP 0x20000 /* duplicate packet */ +#define FR_LOGMASK (FR_LOG|FR_CMDMASK) +#define FR_CALLNOW 0x00020 /* call another function (fr_func) if matches */ +#define FR_NOTSRCIP 0x00040 +#define FR_NOTDSTIP 0x00080 +#define FR_QUICK 0x00100 /* match & stop processing list */ +#define FR_KEEPFRAG 0x00200 /* keep fragment information */ +#define FR_KEEPSTATE 0x00400 /* keep `connection' state information */ +#define FR_FASTROUTE 0x00800 /* bypass normal routing */ +#define FR_RETRST 0x01000 /* Return TCP RST packet - reset connection */ +#define FR_RETICMP 0x02000 /* Return ICMP unreachable packet */ +#define FR_FAKEICMP 0x03000 /* Return ICMP unreachable with fake source */ +#define FR_OUTQUE 0x04000 /* outgoing packets */ +#define FR_INQUE 0x08000 /* ingoing packets */ +#define FR_LOGBODY 0x10000 /* Log the body */ +#define FR_LOGFIRST 0x20000 /* Log the first byte if state held */ #define FR_LOGORBLOCK 0x40000 /* block the packet if it can't be logged */ -#define FR_LOGBODY 0x80000 /* Log the body */ -#define FR_LOGFIRST 0x100000 /* Log the first byte if state held */ -#define FR_AUTH 0x200000 /* use authentication */ -#define FR_PREAUTH 0x400000 /* require preauthentication */ -#define FR_DONTCACHE 0x800000 /* don't cache the result */ +#define FR_DUP 0x80000 /* duplicate packet */ +#define FR_FRSTRICT 0x100000 /* strict frag. cache */ +#define FR_STSTRICT 0x200000 /* strict keep state */ +#define FR_NEWISN 0x400000 /* new ISN for outgoing TCP */ +#define FR_NOICMPERR 0x800000 /* do not match ICMP errors in state */ +#define FR_STATESYNC 0x1000000 /* synchronize state to slave */ +#define FR_NOMATCH 0x8000000 /* no match occured */ + /* 0x10000000 FF_LOGPASS */ + /* 0x20000000 FF_LOGBLOCK */ + /* 0x40000000 FF_LOGNOMATCH */ + /* 0x80000000 FF_BLOCKNONIP */ +#define FR_COPIED 0x40000000 /* copied from user space */ +#define FR_INACTIVE 0x80000000 /* only used when flush'ing rules */ -#define FR_LOGMASK (FR_LOG|FR_LOGP|FR_LOGB) #define FR_RETMASK (FR_RETICMP|FR_RETRST|FR_FAKEICMP) +#define FR_ISBLOCK(x) (((x) & FR_CMDMASK) == FR_BLOCK) +#define FR_ISPASS(x) (((x) & FR_CMDMASK) == FR_PASS) +#define FR_ISAUTH(x) (((x) & FR_CMDMASK) == FR_AUTH) +#define FR_ISPREAUTH(x) (((x) & FR_CMDMASK) == FR_PREAUTH) +#define FR_ISACCOUNT(x) (((x) & FR_CMDMASK) == FR_ACCOUNT) +#define FR_ISSKIP(x) (((x) & FR_CMDMASK) == FR_SKIP) +#define FR_ISNOMATCH(x) ((x) & FR_NOMATCH) +#define FR_INOUT (FR_INQUE|FR_OUTQUE) /* - * These correspond to #define's for FI_* and are stored in fr_flags - */ -#define FF_OPTIONS 0x01000000 -#define FF_TCPUDP 0x02000000 -#define FF_FRAG 0x04000000 -#define FF_SHORT 0x08000000 -/* * recognized flags for SIOCGETFF and SIOCSETFF, and get put in fr_flags */ #define FF_LOGPASS 0x10000000 @@ -325,16 +657,40 @@ typedef struct frentry { #define FF_LOGGING (FF_LOGPASS|FF_LOGBLOCK|FF_LOGNOMATCH) #define FF_BLOCKNONIP 0x80000000 /* Solaris2 Only */ -#define FR_NONE 0 -#define FR_EQUAL 1 -#define FR_NEQUAL 2 -#define FR_LESST 3 -#define FR_GREATERT 4 -#define FR_LESSTE 5 -#define FR_GREATERTE 6 -#define FR_OUTRANGE 7 -#define FR_INRANGE 8 +/* + * Structure that passes information on what/how to flush to the kernel. + */ +typedef struct ipfflush { + int ipflu_how; + int ipflu_arg; +} ipfflush_t; + + +/* + * + */ +typedef struct ipfgetctl { + u_int ipfg_min; /* min value */ + u_int ipfg_current; /* current value */ + u_int ipfg_max; /* max value */ + u_int ipfg_default; /* default value */ + u_int ipfg_steps; /* value increments */ + char ipfg_name[40]; /* tag name for this control */ +} ipfgetctl_t; + +typedef struct ipfsetctl { + int ipfs_which; /* 0 = min 1 = current 2 = max 3 = default */ + u_int ipfs_value; /* min value */ + char ipfs_name[40]; /* tag name for this control */ +} ipfsetctl_t; + + +/* + * Some of the statistics below are in their own counters, but most are kept + * in this single structure so that they can all easily be collected and + * copied back as required. + */ typedef struct filterstats { u_long fr_pass; /* packets allowed */ u_long fr_block; /* packets denied */ @@ -357,59 +713,13 @@ typedef struct filterstats { u_long fr_pull[2]; /* good and bad pullup attempts */ u_long fr_badsrc; /* source received doesn't match route */ u_long fr_badttl; /* TTL in packet doesn't reach minimum */ -#if SOLARIS - u_long fr_notdata; /* PROTO/PCPROTO that have no data */ - u_long fr_nodata; /* mblks that have no data */ u_long fr_bad; /* bad IP packets to the filter */ - u_long fr_notip; /* packets passed through no on ip queue */ - u_long fr_drop; /* packets dropped - no info for them! */ - u_long fr_copy; /* messages copied due to db_ref > 1 */ -#endif - u_long fr_ipv6[2]; /* IPv6 packets in/out */ + u_long fr_ipv6; /* IPv6 packets in/out */ + u_long fr_ppshit; /* dropped because of pps ceiling */ + u_long fr_ipud; /* IP id update failures */ } filterstats_t; /* - * For SIOCGETFS - */ -typedef struct friostat { - struct filterstats f_st[2]; - struct frentry *f_fin[2]; - struct frentry *f_fout[2]; - struct frentry *f_acctin[2]; - struct frentry *f_acctout[2]; - struct frentry *f_fin6[2]; - struct frentry *f_fout6[2]; - struct frentry *f_acctin6[2]; - struct frentry *f_acctout6[2]; - struct frentry *f_auth; - struct frgroup *f_groups[3][2]; - u_long f_froute[2]; - int f_defpass; /* default pass - from fr_pass */ - char f_active; /* 1 or 0 - active rule set */ - char f_running; /* 1 if running, else 0 */ - char f_logging; /* 1 if enabled, else 0 */ - char f_version[32]; /* version string */ - int f_locks[4]; -} friostat_t; - -typedef struct optlist { - u_short ol_val; - int ol_bit; -} optlist_t; - - -/* - * Group list structure. - */ -typedef struct frgroup { - u_32_t fg_num; - struct frgroup *fg_next; - struct frentry *fg_head; - struct frentry **fg_start; -} frgroup_t; - - -/* * Log structure. Each packet header logged is prepended by one of these. * Following this in the log records read from the device will be an ipflog * structure which is then followed by any packet data. @@ -417,40 +727,39 @@ typedef struct frgroup { typedef struct iplog { u_32_t ipl_magic; u_int ipl_count; - struct timeval ipl_tv; + struct timeval ipl_time; size_t ipl_dsize; struct iplog *ipl_next; } iplog_t; -#define ipl_sec ipl_tv.tv_sec -#define ipl_usec ipl_tv.tv_usec +#define ipl_sec ipl_time.tv_sec +#define ipl_usec ipl_time.tv_usec -#define IPL_MAGIC 0x49504c4d /* 'IPLM' */ +#define IPL_MAGIC 0x49504c4d /* 'IPLM' */ +#define IPL_MAGIC_NAT 0x49504c4e /* 'IPLN' */ +#define IPL_MAGIC_STATE 0x49504c53 /* 'IPLS' */ #define IPLOG_SIZE sizeof(iplog_t) typedef struct ipflog { #if (defined(NetBSD) && (NetBSD <= 1991011) && (NetBSD >= 199603)) || \ (defined(OpenBSD) && (OpenBSD >= 199603)) - char fl_ifname[LIFNAMSIZ]; #else u_int fl_unit; - char fl_ifname[LIFNAMSIZ]; #endif - u_char fl_plen; /* extra data after hlen */ - u_char fl_hlen; /* length of IP headers saved */ - u_short fl_loglevel; /* syslog log level */ u_32_t fl_rule; - u_32_t fl_group; u_32_t fl_flags; + u_32_t fl_lflags; + u_32_t fl_logtag; + ipftag_t fl_nattag; + u_short fl_plen; /* extra data after hlen */ + u_short fl_loglevel; /* syslog log level */ + char fl_group[FR_GROUPLEN]; + u_char fl_hlen; /* length of IP headers saved */ u_char fl_dir; - u_char fl_pad[3]; + u_char fl_xxx[2]; /* pad */ + char fl_ifname[LIFNAMSIZ]; } ipflog_t; - -#ifndef ICMP_UNREACH_FILTER -# define ICMP_UNREACH_FILTER 13 -#endif - #ifndef IPF_LOGGING # define IPF_LOGGING 0 #endif @@ -458,8 +767,14 @@ typedef struct ipflog { # define IPF_DEFAULT_PASS FR_PASS #endif -#define IPMINLEN(i, h) ((i)->ip_len >= ((i)->ip_hl * 4 + sizeof(struct h))) -#define IPLLOGSIZE 8192 +#define DEFAULT_IPFLOGSIZE 8192 +#ifndef IPFILTER_LOGSIZE +# define IPFILTER_LOGSIZE DEFAULT_IPFLOGSIZE +#else +# if IPFILTER_LOGSIZE < DEFAULT_IPFLOGSIZE +# error IPFILTER_LOGSIZE too small. Must be >= DEFAULT_IPFLOGSIZE +# endif +#endif #define IPF_OPTCOPY 0x07ff00 /* bit mask of copied options */ @@ -474,15 +789,309 @@ typedef struct ipflog { # define IPL_NAME "/dev/ipl" # endif #endif -#define IPL_NAT IPNAT_NAME -#define IPL_STATE IPSTATE_NAME -#define IPL_AUTH IPAUTH_NAME +/* + * Pathnames for various IP Filter control devices. Used by LKM + * and userland, so defined here. + */ +#define IPNAT_NAME "/dev/ipnat" +#define IPSTATE_NAME "/dev/ipstate" +#define IPAUTH_NAME "/dev/ipauth" +#define IPSYNC_NAME "/dev/ipsync" +#define IPSCAN_NAME "/dev/ipscan" +#define IPLOOKUP_NAME "/dev/iplookup" #define IPL_LOGIPF 0 /* Minor device #'s for accessing logs */ #define IPL_LOGNAT 1 #define IPL_LOGSTATE 2 #define IPL_LOGAUTH 3 -#define IPL_LOGMAX 3 +#define IPL_LOGSYNC 4 +#define IPL_LOGSCAN 5 +#define IPL_LOGLOOKUP 6 +#define IPL_LOGCOUNT 7 +#define IPL_LOGMAX 7 +#define IPL_LOGSIZE IPL_LOGMAX + 1 +#define IPL_LOGALL -1 +#define IPL_LOGNONE -2 + +/* + * For SIOCGETFS + */ +typedef struct friostat { + struct filterstats f_st[2]; + struct frentry *f_ipf[2][2]; + struct frentry *f_acct[2][2]; + struct frentry *f_ipf6[2][2]; + struct frentry *f_acct6[2][2]; + struct frentry *f_auth; + struct frgroup *f_groups[IPL_LOGSIZE][2]; + u_long f_froute[2]; + u_long f_ticks; + int f_locks[IPL_LOGMAX]; + size_t f_kmutex_sz; + size_t f_krwlock_sz; + int f_defpass; /* default pass - from fr_pass */ + int f_active; /* 1 or 0 - active rule set */ + int f_running; /* 1 if running, else 0 */ + int f_logging; /* 1 if enabled, else 0 */ + int f_features; + char f_version[32]; /* version string */ +} friostat_t; + +#define f_fin f_ipf[0] +#define f_fin6 f_ipf6[0] +#define f_fout f_ipf[1] +#define f_fout6 f_ipf6[1] +#define f_acctin f_acct[0] +#define f_acctin6 f_acct6[0] +#define f_acctout f_acct[1] +#define f_acctout6 f_acct6[1] + +#define IPF_FEAT_LKM 0x001 +#define IPF_FEAT_LOG 0x002 +#define IPF_FEAT_LOOKUP 0x004 +#define IPF_FEAT_BPF 0x008 +#define IPF_FEAT_COMPILED 0x010 +#define IPF_FEAT_CKSUM 0x020 +#define IPF_FEAT_SYNC 0x040 +#define IPF_FEAT_SCAN 0x080 +#define IPF_FEAT_IPV6 0x100 + +typedef struct optlist { + u_short ol_val; + int ol_bit; +} optlist_t; + + +/* + * Group list structure. + */ +typedef struct frgroup { + struct frgroup *fg_next; + struct frentry *fg_head; + struct frentry *fg_start; + u_32_t fg_flags; + int fg_ref; + char fg_name[FR_GROUPLEN]; +} frgroup_t; + +#define FG_NAME(g) (*(g)->fg_name == '\0' ? "" : (g)->fg_name) + + +/* + * Used by state and NAT tables + */ +typedef struct icmpinfo { + u_short ici_id; + u_short ici_seq; + u_char ici_type; +} icmpinfo_t; + +typedef struct udpinfo { + u_short us_sport; + u_short us_dport; +} udpinfo_t; + + +typedef struct tcpdata { + u_32_t td_end; + u_32_t td_maxend; + u_32_t td_maxwin; + u_32_t td_winscale; + u_32_t td_maxseg; + int td_winflags; +} tcpdata_t; + +#define TCP_WSCALE_MAX 14 + +#define TCP_WSCALE_SEEN 0x00000001 +#define TCP_WSCALE_FIRST 0x00000002 + + +typedef struct tcpinfo { + u_short ts_sport; + u_short ts_dport; + tcpdata_t ts_data[2]; +} tcpinfo_t; + + +struct grebits { + u_32_t grb_C:1; + u_32_t grb_R:1; + u_32_t grb_K:1; + u_32_t grb_S:1; + u_32_t grb_s:1; + u_32_t grb_recur:1; + u_32_t grb_A:1; + u_32_t grb_flags:3; + u_32_t grb_ver:3; + u_short grb_ptype; +}; + +typedef struct grehdr { + union { + struct grebits gru_bits; + u_short gru_flags; + } gr_un; + u_short gr_len; + u_short gr_call; +} grehdr_t; + +#define gr_flags gr_un.gru_flags +#define gr_bits gr_un.gru_bits +#define gr_ptype gr_bits.grb_ptype +#define gr_C gr_bits.grb_C +#define gr_R gr_bits.grb_R +#define gr_K gr_bits.grb_K +#define gr_S gr_bits.grb_S +#define gr_s gr_bits.grb_s +#define gr_recur gr_bits.grb_recur +#define gr_A gr_bits.grb_A +#define gr_ver gr_bits.grb_ver + + +typedef struct greinfo { + u_short gs_call[2]; + u_short gs_flags; + u_short gs_ptype; +} greinfo_t; + +#define GRE_REV(x) ((ntohs(x) >> 13) & 7) + + +/* + * Timeout tail queue list member + */ +typedef struct ipftqent { + struct ipftqent **tqe_pnext; + struct ipftqent *tqe_next; + struct ipftq *tqe_ifq; + void *tqe_parent; /* pointer back to NAT/state struct */ + u_long tqe_die; /* when this entriy is to die */ + u_long tqe_touched; + int tqe_flags; + int tqe_state[2]; /* current state of this entry */ +} ipftqent_t; + +#define TQE_RULEBASED 0x00000001 + + +/* + * Timeout tail queue head for IPFilter + */ +typedef struct ipftq { + ipfmutex_t ifq_lock; + u_int ifq_ttl; + ipftqent_t *ifq_head; + ipftqent_t **ifq_tail; + struct ipftq *ifq_next; + struct ipftq **ifq_pnext; + int ifq_ref; + u_int ifq_flags; +} ipftq_t; + +#define IFQF_USER 0x01 /* User defined aging */ +#define IFQF_DELETE 0x02 /* Marked for deletion */ +#define IFQF_PROXY 0x04 /* Timeout queue in use by a proxy */ + +#define IPF_HZ_MULT 1 +#define IPF_HZ_DIVIDE 2 /* How many times a second ipfilter */ + /* checks its timeout queues. */ +#define IPF_TTLVAL(x) (((x) / IPF_HZ_MULT) * IPF_HZ_DIVIDE) + +/* + * Structure to define address for pool lookups. + */ +typedef struct { + u_char adf_len; + i6addr_t adf_addr; +} addrfamily_t; + + +/* + * Object structure description. For passing through in ioctls. + */ +typedef struct ipfobj { + u_32_t ipfo_rev; /* IPFilter version number */ + u_32_t ipfo_size; /* size of object at ipfo_ptr */ + void *ipfo_ptr; /* pointer to object */ + int ipfo_type; /* type of object being pointed to */ + int ipfo_offset; /* bytes from ipfo_ptr where to start */ + u_char ipfo_xxxpad[32]; /* reserved for future use */ +} ipfobj_t; + +#define IPFOBJ_FRENTRY 0 /* struct frentry */ +#define IPFOBJ_IPFSTAT 1 /* struct friostat */ +#define IPFOBJ_IPFINFO 2 /* struct fr_info */ +#define IPFOBJ_AUTHSTAT 3 /* struct fr_authstat */ +#define IPFOBJ_FRAGSTAT 4 /* struct ipfrstat */ +#define IPFOBJ_IPNAT 5 /* struct ipnat */ +#define IPFOBJ_NATSTAT 6 /* struct natstat */ +#define IPFOBJ_STATESAVE 7 /* struct ipstate_save */ +#define IPFOBJ_NATSAVE 8 /* struct nat_save */ +#define IPFOBJ_NATLOOKUP 9 /* struct natlookup */ +#define IPFOBJ_IPSTATE 10 /* struct ipstate */ +#define IPFOBJ_STATESTAT 11 /* struct ips_stat */ +#define IPFOBJ_FRAUTH 12 /* struct frauth */ +#define IPFOBJ_TUNEABLE 13 /* struct ipftune */ + + +typedef union ipftunevalptr { + void *ipftp_void; + u_long *ipftp_long; + u_int *ipftp_int; + u_short *ipftp_short; + u_char *ipftp_char; +} ipftunevalptr_t; + +typedef struct ipftuneable { + ipftunevalptr_t ipft_una; + char *ipft_name; + u_long ipft_min; + u_long ipft_max; + int ipft_sz; + int ipft_flags; + struct ipftuneable *ipft_next; +} ipftuneable_t; + +#define ipft_addr ipft_una.ipftp_void +#define ipft_plong ipft_una.ipftp_long +#define ipft_pint ipft_una.ipftp_int +#define ipft_pshort ipft_una.ipftp_short +#define ipft_pchar ipft_una.ipftp_char + +#define IPFT_RDONLY 1 /* read-only */ +#define IPFT_WRDISABLED 2 /* write when disabled only */ + +typedef union ipftuneval { + u_long ipftu_long; + u_int ipftu_int; + u_short ipftu_short; + u_char ipftu_char; +} ipftuneval_t; + +typedef struct ipftune { + void *ipft_cookie; + ipftuneval_t ipft_un; + u_long ipft_min; + u_long ipft_max; + int ipft_sz; + int ipft_flags; + char ipft_name[80]; +} ipftune_t; + +#define ipft_vlong ipft_un.ipftu_long +#define ipft_vint ipft_un.ipftu_int +#define ipft_vshort ipft_un.ipftu_short +#define ipft_vchar ipft_un.ipftu_char + + +/* +** HPUX Port +*/ +#ifdef __hpux +/* HP-UX locking sequence deadlock detection module lock MAJOR ID */ +# define IPF_SMAJ 0 /* temp assignment XXX, not critical */ +#endif #if !defined(CDEV_MAJOR) && defined (__FreeBSD_version) && \ (__FreeBSD_version >= 220000) @@ -495,9 +1104,9 @@ typedef struct ipflog { * with this! */ #if (defined(NetBSD) && (NetBSD > 199609) && (NetBSD <= 1991011)) || \ - (defined(NetBSD1_2) && NetBSD1_2 > 1) || (defined(__FreeBSD_version) && \ - (__FreeBSD_version >= 500011)) -# if (NetBSD >= 199905) || (__FreeBSD_version >= 600001) + (defined(NetBSD1_2) && NetBSD1_2 > 1) || \ + (defined(__FreeBSD__) && (__FreeBSD_version >= 500043)) +# if (NetBSD >= 199905) # define PFIL_HOOKS # endif # ifdef PFIL_HOOKS @@ -505,63 +1114,62 @@ typedef struct ipflog { # endif #endif - #ifndef _KERNEL -extern char *get_ifname __P((struct ifnet *)); -extern int fr_check __P((ip_t *, int, void *, int, mb_t **)); +extern int fr_check __P((struct ip *, int, void *, int, mb_t **)); extern int (*fr_checkp) __P((ip_t *, int, void *, int, mb_t **)); -extern int send_reset __P((ip_t *, fr_info_t *)); -extern int send_icmp_err __P((ip_t *, int, fr_info_t *, int)); extern int ipf_log __P((void)); extern struct ifnet *get_unit __P((char *, int)); -extern int mbuflen __P((mb_t *)); +extern char *get_ifname __P((struct ifnet *)); # if defined(__NetBSD__) || defined(__OpenBSD__) || \ (_BSDI_VERSION >= 199701) || (__FreeBSD_version >= 300000) -extern int iplioctl __P((dev_t, u_long, caddr_t, int)); +extern int iplioctl __P((int, ioctlcmd_t, caddr_t, int)); # else -extern int iplioctl __P((dev_t, int, caddr_t, int)); +extern int iplioctl __P((int, ioctlcmd_t, caddr_t, int)); # endif extern int iplopen __P((dev_t, int)); extern int iplclose __P((dev_t, int)); +extern void m_freem __P((mb_t *)); #else /* #ifndef _KERNEL */ # if defined(__NetBSD__) && defined(PFIL_HOOKS) extern void ipfilterattach __P((int)); # endif -extern int iplattach __P((void)); extern int ipl_enable __P((void)); extern int ipl_disable __P((void)); -extern int send_icmp_err __P((ip_t *, int, fr_info_t *, int)); -extern int send_reset __P((ip_t *, fr_info_t *)); -# if SOLARIS -extern int fr_check __P((ip_t *, int, void *, int, qif_t *, mb_t **)); -extern int (*fr_checkp) __P((ip_t *, int, void *, - int, qif_t *, mb_t **)); -# if SOLARIS2 >= 7 +# ifdef MENTAT +extern int fr_check __P((struct ip *, int, void *, int, void *, + mblk_t **)); +# if SOLARIS +# if SOLARIS2 >= 7 extern int iplioctl __P((dev_t, int, intptr_t, int, cred_t *, int *)); -# else +# else extern int iplioctl __P((dev_t, int, int *, int, cred_t *, int *)); -# endif +# endif extern int iplopen __P((dev_t *, int, int, cred_t *)); extern int iplclose __P((dev_t, int, int, cred_t *)); +extern int iplread __P((dev_t, uio_t *, cred_t *)); +extern int iplwrite __P((dev_t, uio_t *, cred_t *)); +# endif +# ifdef __hpux +extern int iplopen __P((dev_t, int, intptr_t, int)); +extern int iplclose __P((dev_t, int, int)); +extern int iplioctl __P((dev_t, int, caddr_t, int)); +extern int iplread __P((dev_t, uio_t *)); +extern int iplwrite __P((dev_t, uio_t *)); +extern int iplselect __P((dev_t, int)); +# endif extern int ipfsync __P((void)); -extern int ipfr_fastroute __P((ip_t *, mblk_t *, mblk_t **, - fr_info_t *, frdest_t *)); -extern void copyin_mblk __P((mblk_t *, size_t, size_t, char *)); -extern void copyout_mblk __P((mblk_t *, size_t, size_t, char *)); -extern int fr_qin __P((queue_t *, mblk_t *)); extern int fr_qout __P((queue_t *, mblk_t *)); -extern int iplread __P((dev_t, struct uio *, cred_t *)); -# else /* SOLARIS */ -extern int fr_check __P((ip_t *, int, void *, int, mb_t **)); +# else /* MENTAT */ +extern int fr_check __P((struct ip *, int, void *, int, mb_t **)); extern int (*fr_checkp) __P((ip_t *, int, void *, int, mb_t **)); -extern int ipfr_fastroute __P((mb_t *, mb_t **, fr_info_t *, frdest_t *)); extern size_t mbufchainlen __P((mb_t *)); # ifdef __sgi # include extern int iplioctl __P((dev_t, int, caddr_t, int, cred_t *, int *)); extern int iplopen __P((dev_t *, int, int, cred_t *)); extern int iplclose __P((dev_t, int, int, cred_t *)); -extern int iplread __P((dev_t, struct uio *, cred_t *)); +extern int iplread __P((dev_t, uio_t *, cred_t *)); +extern int iplwrite __P((dev_t, uio_t *, cred_t *)); extern int ipfsync __P((void)); extern int ipfilter_sgi_attach __P((void)); extern void ipfilter_sgi_detach __P((void)); @@ -574,88 +1182,188 @@ extern int iplidentify __P((char *)); (NetBSD >= 199511) || defined(__OpenBSD__) # if defined(__NetBSD__) || (_BSDI_VERSION >= 199701) || \ defined(__OpenBSD__) || (__FreeBSD_version >= 300000) -extern int iplioctl __P((struct cdev *, u_long, caddr_t, int, struct thread *)); +# if (__FreeBSD_version >= 500024) +# if (__FreeBSD_version >= 502116) +extern int iplioctl __P((struct cdev*, u_long, caddr_t, int, struct thread *)); +# else +extern int iplioctl __P((dev_t, u_long, caddr_t, int, struct thread *)); +# endif /* __FreeBSD_version >= 502116 */ +# else +extern int iplioctl __P((dev_t, u_long, caddr_t, int, struct proc *)); +# endif /* __FreeBSD_version >= 500024 */ # else extern int iplioctl __P((dev_t, int, caddr_t, int, struct thread *)); # endif -extern int iplopen __P((struct cdev *, int, int, struct thread *)); -extern int iplclose __P((struct cdev *, int, int, struct thread *)); +# if (__FreeBSD_version >= 500024) +# if (__FreeBSD_version >= 502116) +extern int iplopen __P((struct cdev*, int, int, struct thread *)); +extern int iplclose __P((struct cdev*, int, int, struct thread *)); +# else +extern int iplopen __P((dev_t, int, int, struct thread *)); +extern int iplclose __P((dev_t, int, int, struct thread *)); +# endif /* __FreeBSD_version >= 502116 */ +# else +extern int iplopen __P((dev_t, int, int, struct proc *)); +extern int iplclose __P((dev_t, int, int, struct proc *)); +# endif /* __FreeBSD_version >= 500024 */ # else -# ifndef linux +# ifdef linux +extern int iplioctl __P((struct inode *, struct file *, u_int, u_long)); +# else extern int iplopen __P((dev_t, int)); extern int iplclose __P((dev_t, int)); extern int iplioctl __P((dev_t, int, caddr_t, int)); -# else -extern int iplioctl(struct inode *, struct file *, u_int, u_long); -extern int iplopen __P((struct inode *, struct file *)); -extern void iplclose __P((struct inode *, struct file *)); -# endif /* !linux */ +# endif # endif /* (_BSDI_VERSION >= 199510) */ # if BSD >= 199306 -extern int iplread __P((struct cdev *, struct uio *, int)); +# if (__FreeBSD_version >= 502116) +extern int iplread __P((struct cdev*, struct uio *, int)); +extern int iplwrite __P((struct cdev*, struct uio *, int)); +# else +extern int iplread __P((dev_t, struct uio *, int)); +extern int iplwrite __P((dev_t, struct uio *, int)); +# endif /* __FreeBSD_version >= 502116 */ # else # ifndef linux extern int iplread __P((dev_t, struct uio *)); -# else -extern int iplread(struct inode *, struct file *, char *, int); -# endif /* !linux */ +extern int iplwrite __P((dev_t, struct uio *)); +# endif # endif /* BSD >= 199306 */ # endif /* __ sgi */ -# endif /* SOLARIS */ +# endif /* MENTAT */ + #endif /* #ifndef _KERNEL */ +extern ipfmutex_t ipl_mutex, ipf_authmx, ipf_rw, ipf_hostmap; +extern ipfmutex_t ipf_timeoutlock, ipf_stinsert, ipf_natio, ipf_nat_new; +extern ipfrwlock_t ipf_mutex, ipf_global, ip_poolrw, ipf_ipidfrag; +extern ipfrwlock_t ipf_frag, ipf_state, ipf_nat, ipf_natfrag, ipf_auth; + extern char *memstr __P((char *, char *, int, int)); -extern void fixskip __P((frentry_t **, frentry_t *, int)); -extern int countbits __P((u_32_t)); +extern int count4bits __P((u_32_t)); +extern int frrequest __P((int, ioctlcmd_t, caddr_t, int, int)); +extern char *getifname __P((struct ifnet *)); +extern int iplattach __P((void)); extern int ipldetach __P((void)); extern u_short ipf_cksum __P((u_short *, int)); -extern int ircopyptr __P((void *, void *, size_t)); -extern int iwcopyptr __P((void *, void *, size_t)); +extern int copyinptr __P((void *, void *, size_t)); +extern int copyoutptr __P((void *, void *, size_t)); +extern int fr_fastroute __P((mb_t *, mb_t **, fr_info_t *, frdest_t *)); +extern int fr_inobj __P((void *, void *, int)); +extern int fr_inobjsz __P((void *, void *, int, int)); +extern int fr_ioctlswitch __P((int, void *, ioctlcmd_t, int)); +extern int fr_ipftune __P((ioctlcmd_t, void *)); +extern int fr_outobj __P((void *, void *, int)); +extern int fr_outobjsz __P((void *, void *, int, int)); +extern void *fr_pullup __P((mb_t *, fr_info_t *, int)); +extern void fr_resolvedest __P((struct frdest *, int)); +extern int fr_resolvefunc __P((void *)); +extern void *fr_resolvenic __P((char *, int)); +extern int fr_send_icmp_err __P((int, fr_info_t *, int)); +extern int fr_send_reset __P((fr_info_t *)); +#if (__FreeBSD_version < 490000) || !defined(_KERNEL) +extern int ppsratecheck __P((struct timeval *, int *, int)); +#endif +extern ipftq_t *fr_addtimeoutqueue __P((ipftq_t **, u_int)); +extern void fr_deletequeueentry __P((ipftqent_t *)); +extern int fr_deletetimeoutqueue __P((ipftq_t *)); +extern void fr_freetimeoutqueue __P((ipftq_t *)); +extern void fr_movequeue __P((ipftqent_t *, ipftq_t *, ipftq_t *)); +extern void fr_queueappend __P((ipftqent_t *, ipftq_t *, void *)); +extern void fr_queueback __P((ipftqent_t *)); +extern void fr_queuefront __P((ipftqent_t *)); +extern void fr_checkv4sum __P((fr_info_t *)); +extern int fr_checkl4sum __P((fr_info_t *)); +extern int fr_ifpfillv4addr __P((int, struct sockaddr_in *, + struct sockaddr_in *, struct in_addr *, + struct in_addr *)); +extern int fr_coalesce __P((fr_info_t *)); +#ifdef USE_INET6 +extern void fr_checkv6sum __P((fr_info_t *)); +extern int fr_ifpfillv6addr __P((int, struct sockaddr_in6 *, + struct sockaddr_in6 *, struct in_addr *, + struct in_addr *)); +#endif + +extern int fr_addipftune __P((ipftuneable_t *)); +extern int fr_delipftune __P((ipftuneable_t *)); -extern void ipflog_init __P((void)); +extern int frflush __P((minor_t, int, int)); +extern void frsync __P((void *)); +extern frgroup_t *fr_addgroup __P((char *, void *, u_32_t, minor_t, int)); +extern int fr_derefrule __P((frentry_t **)); +extern void fr_delgroup __P((char *, minor_t, int)); +extern frgroup_t *fr_findgroup __P((char *, minor_t, int, frgroup_t ***)); + +extern int fr_loginit __P((void)); extern int ipflog_clear __P((minor_t)); -extern int ipflog __P((u_int, ip_t *, fr_info_t *, mb_t *)); +extern int ipflog_read __P((minor_t, uio_t *)); +extern int ipflog __P((fr_info_t *, u_int)); extern int ipllog __P((int, fr_info_t *, void **, size_t *, int *, int)); -extern int ipflog_read __P((minor_t, struct uio *)); +extern void fr_logunload __P((void)); + +extern frentry_t *fr_acctpkt __P((fr_info_t *, u_32_t *)); +extern int fr_copytolog __P((int, char *, int)); +extern u_short fr_cksum __P((mb_t *, ip_t *, int, void *)); +extern void fr_deinitialise __P((void)); +extern frentry_t *fr_dolog __P((fr_info_t *, u_32_t *)); +extern frentry_t *fr_dstgrpmap __P((fr_info_t *, u_32_t *)); +extern void fr_fixskip __P((frentry_t **, frentry_t *, int)); +extern void fr_forgetifp __P((void *)); +extern frentry_t *fr_getrulen __P((int, char *, u_32_t)); +extern void fr_getstat __P((struct friostat *)); +extern int fr_icmp4errortype __P((int)); +extern int fr_ifpaddr __P((int, int, void *, + struct in_addr *, struct in_addr *)); +extern int fr_initialise __P((void)); +extern void fr_lock __P((caddr_t, int *)); +extern int fr_makefrip __P((int, ip_t *, fr_info_t *)); +extern int fr_matchtag __P((ipftag_t *, ipftag_t *)); +extern int fr_matchicmpqueryreply __P((int, icmpinfo_t *, + struct icmp *, int)); +extern u_32_t fr_newisn __P((fr_info_t *)); +extern u_short fr_nextipid __P((fr_info_t *)); +extern int fr_rulen __P((int, frentry_t *)); +extern int fr_scanlist __P((fr_info_t *, u_32_t)); +extern frentry_t *fr_srcgrpmap __P((fr_info_t *, u_32_t *)); +extern int fr_tcpudpchk __P((fr_info_t *, frtuc_t *)); +extern int fr_verifysrc __P((fr_info_t *fin)); +extern int fr_zerostats __P((char *)); -extern int frflush __P((minor_t, int, int)); -extern void frsync __P((void)); -extern frgroup_t *fr_addgroup __P((u_32_t, frentry_t *, minor_t, int)); -extern void fr_delgroup __P((u_32_t, u_32_t, minor_t, int)); -extern frgroup_t *fr_findgroup __P((u_32_t, u_32_t, minor_t, int, - frgroup_t ***)); - -extern int fr_copytolog __P((int, char *, int)); -extern void fr_forgetifp __P((void *)); -extern void fr_getstat __P((struct friostat *)); -extern int fr_ifpaddr __P((int, void *, struct in_addr *)); -extern int fr_lock __P((caddr_t, int *)); -extern int fr_makefrip __P((int, ip_t *, fr_info_t *)); -extern u_short fr_tcpsum __P((mb_t *, ip_t *, tcphdr_t *)); -extern int fr_scanlist __P((u_32_t, ip_t *, fr_info_t *, void *)); -extern int fr_tcpudpchk __P((frtuc_t *, fr_info_t *)); -extern int fr_verifysrc __P((struct in_addr, void *)); - -extern int ipl_unreach; extern int fr_running; -extern u_long ipl_frouteok[2]; +extern u_long fr_frouteok[2]; extern int fr_pass; extern int fr_flags; extern int fr_active; extern int fr_chksrc; extern int fr_minttl; -extern int fr_minttllog; -extern fr_info_t frcache[2]; +extern int fr_refcnt; +extern int fr_control_forwarding; +extern int fr_update_ipid; +extern int nat_logging; +extern int ipstate_logging; +extern int ipl_suppress; +extern int ipl_buffer_sz; +extern int ipl_logmax; +extern int ipl_logall; +extern int ipl_logsize; +extern u_long fr_ticks; +extern fr_info_t frcache[2][8]; extern char ipfilter_version[]; extern iplog_t **iplh[IPL_LOGMAX+1], *iplt[IPL_LOGMAX+1]; -extern size_t iplused[IPL_LOGMAX + 1]; +extern int iplused[IPL_LOGMAX + 1]; extern struct frentry *ipfilter[2][2], *ipacct[2][2]; #ifdef USE_INET6 extern struct frentry *ipfilter6[2][2], *ipacct6[2][2]; extern int icmptoicmp6types[ICMP_MAXTYPE+1]; extern int icmptoicmp6unreach[ICMP_MAX_UNREACH]; +extern int icmpreplytype6[ICMP6_MAXTYPE + 1]; #endif -extern struct frgroup *ipfgroups[3][2]; +extern int icmpreplytype4[ICMP_MAXTYPE + 1]; +extern struct frgroup *ipfgroups[IPL_LOGSIZE][2]; extern struct filterstats frstats[]; +extern frentry_t *ipfrule_match __P((fr_info_t *)); +extern u_char ipf_iss_secret[32]; +extern ipftuneable_t ipf_tuneables[]; #endif /* __IP_FIL_H__ */ diff --git a/sys/contrib/ipfilter/netinet/ip_frag.c b/sys/contrib/ipfilter/netinet/ip_frag.c index 9683932..32867c9 100644 --- a/sys/contrib/ipfilter/netinet/ip_frag.c +++ b/sys/contrib/ipfilter/netinet/ip_frag.c @@ -1,45 +1,55 @@ +/* $FreeBSD$ */ + /* - * Copyright (C) 1993-2001 by Darren Reed. + * Copyright (C) 1993-2003 by Darren Reed. * * See the IPFILTER.LICENCE file for details on licencing. */ -#if defined(KERNEL) && !defined(_KERNEL) -# define _KERNEL -#endif - -#if defined(__sgi) && (IRIX > 602) -# include +#if defined(KERNEL) || defined(_KERNEL) +# undef KERNEL +# undef _KERNEL +# define KERNEL 1 +# define _KERNEL 1 #endif #include #include #include #include #include -#if !defined(_KERNEL) && !defined(KERNEL) +#ifdef __hpux +# include +#endif +#if !defined(_KERNEL) # include # include # include +# define _KERNEL +# ifdef __OpenBSD__ +struct file; +# endif +# include +# undef _KERNEL #endif -#if (defined(KERNEL) || defined(_KERNEL)) && (__FreeBSD_version >= 220000) +#if defined(_KERNEL) && (__FreeBSD_version >= 220000) # include # include #else # include #endif -#ifndef linux +#if !defined(linux) # include #endif #include -#if defined(_KERNEL) && !defined(linux) +#if defined(_KERNEL) # include +# if !defined(__SVR4) && !defined(__svr4__) +# include +# endif #endif #if !defined(__SVR4) && !defined(__svr4__) # if defined(_KERNEL) && !defined(__sgi) # include # endif -# ifndef linux -# include -# endif #else # include # ifdef _KERNEL @@ -56,7 +66,7 @@ #include #include #include -#ifndef linux +#if !defined(linux) # include #endif #include @@ -69,63 +79,130 @@ #include "netinet/ip_frag.h" #include "netinet/ip_state.h" #include "netinet/ip_auth.h" +#include "netinet/ip_proxy.h" #if (__FreeBSD_version >= 300000) # include -# if (defined(KERNEL) || defined(_KERNEL)) +# if defined(_KERNEL) # ifndef IPFILTER_LKM # include # include # endif -extern struct callout_handle ipfr_slowtimer_ch; +extern struct callout_handle fr_slowtimer_ch; # endif #endif #if defined(__NetBSD__) && (__NetBSD_Version__ >= 104230000) # include -extern struct callout ipfr_slowtimer_ch; +extern struct callout fr_slowtimer_ch; #endif #if defined(__OpenBSD__) # include -extern struct timeout ipfr_slowtimer_ch; +extern struct timeout fr_slowtimer_ch; #endif +/* END OF INCLUDES */ #if !defined(lint) static const char sccsid[] = "@(#)ip_frag.c 1.11 3/24/96 (C) 1993-2000 Darren Reed"; static const char rcsid[] = "@(#)$FreeBSD$"; +static const char rcsid[] = "@(#)Id: ip_frag.c,v 2.77 2004/01/27 00:24:54 darrenr Exp"; #endif -static ipfr_t *ipfr_heads[IPFT_SIZE]; -static ipfr_t *ipfr_nattab[IPFT_SIZE]; +static ipfr_t *ipfr_list = NULL; +static ipfr_t **ipfr_tail = &ipfr_list; +static ipfr_t **ipfr_heads; + +static ipfr_t *ipfr_natlist = NULL; +static ipfr_t **ipfr_nattail = &ipfr_natlist; +static ipfr_t **ipfr_nattab; + +static ipfr_t *ipfr_ipidlist = NULL; +static ipfr_t **ipfr_ipidtail = &ipfr_ipidlist; +static ipfr_t **ipfr_ipidtab; + static ipfrstat_t ipfr_stats; static int ipfr_inuse = 0; +int ipfr_size = IPFT_SIZE; int fr_ipfrttl = 120; /* 60 seconds */ int fr_frag_lock = 0; +int fr_frag_init = 0; +u_long fr_ticks = 0; -#ifdef _KERNEL -# if SOLARIS2 >= 7 -extern timeout_id_t ipfr_timer_id; -# else -extern int ipfr_timer_id; -# endif -#endif -#ifdef USE_MUTEX -extern KRWLOCK_T ipf_frag, ipf_natfrag, ipf_nat, ipf_mutex; -# if SOLARIS -extern KRWLOCK_T ipf_solaris; -# else -KRWLOCK_T ipf_solaris; -# endif -extern kmutex_t ipf_rw; -#endif +static ipfr_t *ipfr_newfrag __P((fr_info_t *, u_32_t, ipfr_t **)); +static ipfr_t *fr_fraglookup __P((fr_info_t *, ipfr_t **)); +static void fr_fragdelete __P((ipfr_t *, ipfr_t ***)); + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_fraginit */ +/* Returns: int - 0 == success, -1 == error */ +/* Parameters: Nil */ +/* */ +/* Initialise the hash tables for the fragment cache lookups. */ +/* ------------------------------------------------------------------------ */ +int fr_fraginit() +{ + KMALLOCS(ipfr_heads, ipfr_t **, ipfr_size * sizeof(ipfr_t *)); + if (ipfr_heads == NULL) + return -1; + bzero((char *)ipfr_heads, ipfr_size * sizeof(ipfr_t *)); + + KMALLOCS(ipfr_nattab, ipfr_t **, ipfr_size * sizeof(ipfr_t *)); + if (ipfr_nattab == NULL) + return -1; + bzero((char *)ipfr_nattab, ipfr_size * sizeof(ipfr_t *)); + + KMALLOCS(ipfr_ipidtab, ipfr_t **, ipfr_size * sizeof(ipfr_t *)); + if (ipfr_ipidtab == NULL) + return -1; + bzero((char *)ipfr_ipidtab, ipfr_size * sizeof(ipfr_t *)); + + RWLOCK_INIT(&ipf_frag, "ipf fragment rwlock"); + fr_frag_init = 1; + + return 0; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_fragunload */ +/* Returns: Nil */ +/* Parameters: Nil */ +/* */ +/* Free all memory allocated whilst running and from initialisation. */ +/* ------------------------------------------------------------------------ */ +void fr_fragunload() +{ + if (fr_frag_init == 1) { + fr_fragclear(); + + RW_DESTROY(&ipf_frag); + fr_frag_init = 0; + } + + if (ipfr_heads != NULL) + KFREES(ipfr_heads, ipfr_size * sizeof(ipfr_t *)); + ipfr_heads = NULL; + + if (ipfr_nattab != NULL) + KFREES(ipfr_nattab, ipfr_size * sizeof(ipfr_t *)); + ipfr_nattab = NULL; -static ipfr_t *ipfr_new __P((ip_t *, fr_info_t *, ipfr_t **)); -static ipfr_t *ipfr_lookup __P((ip_t *, fr_info_t *, ipfr_t **)); -static void ipfr_delete __P((ipfr_t *)); + if (ipfr_ipidtab != NULL) + KFREES(ipfr_ipidtab, ipfr_size * sizeof(ipfr_t *)); + ipfr_ipidtab = NULL; +} -ipfrstat_t *ipfr_fragstats() +/* ------------------------------------------------------------------------ */ +/* Function: fr_fragstats */ +/* Returns: ipfrstat_t* - pointer to struct with current frag stats */ +/* Parameters: Nil */ +/* */ +/* Updates ipfr_stats with current information and returns a pointer to it */ +/* ------------------------------------------------------------------------ */ +ipfrstat_t *fr_fragstats() { ipfr_stats.ifs_table = ipfr_heads; ipfr_stats.ifs_nattab = ipfr_nattab; @@ -134,24 +211,36 @@ ipfrstat_t *ipfr_fragstats() } -/* - * add a new entry to the fragment cache, registering it as having come - * through this box, with the result of the filter operation. - */ -static ipfr_t *ipfr_new(ip, fin, table) -ip_t *ip; +/* ------------------------------------------------------------------------ */ +/* Function: ipfr_newfrag */ +/* Returns: ipfr_t * - pointer to fragment cache state info or NULL */ +/* Parameters: fin(I) - pointer to packet information */ +/* table(I) - pointer to frag table to add to */ +/* */ +/* Add a new entry to the fragment cache, registering it as having come */ +/* through this box, with the result of the filter operation. */ +/* ------------------------------------------------------------------------ */ +static ipfr_t *ipfr_newfrag(fin, pass, table) fr_info_t *fin; +u_32_t pass; ipfr_t *table[]; { - ipfr_t **fp, *fra, frag; + ipfr_t *fra, frag; u_int idx, off; + ip_t *ip; if (ipfr_inuse >= IPFT_SIZE) return NULL; - if (!(fin->fin_fl & FI_FRAG)) + if ((fin->fin_flx & (FI_FRAG|FI_BAD)) != FI_FRAG) return NULL; + ip = fin->fin_ip; + + if (pass & FR_FRSTRICT) + if ((ip->ip_off & IP_OFFMASK) != 0) + return NULL; + frag.ipfr_p = ip->ip_p; idx = ip->ip_p; frag.ipfr_id = ip->ip_id; @@ -172,10 +261,10 @@ ipfr_t *table[]; /* * first, make sure it isn't already there... */ - for (fp = &table[idx]; (fra = *fp); fp = &fra->ipfr_next) - if (!bcmp((char *)&frag.ipfr_src, (char *)&fra->ipfr_src, + for (fra = table[idx]; (fra != NULL); fra = fra->ipfr_hnext) + if (!bcmp((char *)&frag.ipfr_ifp, (char *)&fra->ipfr_ifp, IPFR_CMPSZ)) { - ATOMIC_INCL(ipfr_stats.ifs_exists); + ipfr_stats.ifs_exists++; return NULL; } @@ -185,98 +274,157 @@ ipfr_t *table[]; */ KMALLOC(fra, ipfr_t *); if (fra == NULL) { - ATOMIC_INCL(ipfr_stats.ifs_nomem); + ipfr_stats.ifs_nomem++; return NULL; } - if ((fra->ipfr_rule = fin->fin_fr) != NULL) { - ATOMIC_INC32(fin->fin_fr->fr_ref); - } - + if ((fra->ipfr_rule = fin->fin_fr) != NULL) + fin->fin_fr->fr_ref++; /* * Insert the fragment into the fragment table, copy the struct used * in the search using bcopy rather than reassign each field. * Set the ttl to the default. */ - if ((fra->ipfr_next = table[idx])) - table[idx]->ipfr_prev = fra; - fra->ipfr_prev = NULL; + if ((fra->ipfr_hnext = table[idx]) != NULL) + table[idx]->ipfr_hprev = &fra->ipfr_hnext; + fra->ipfr_hprev = table + idx; fra->ipfr_data = NULL; table[idx] = fra; - bcopy((char *)&frag.ipfr_src, (char *)&fra->ipfr_src, IPFR_CMPSZ); - fra->ipfr_ttl = fr_ipfrttl; + bcopy((char *)&frag.ipfr_ifp, (char *)&fra->ipfr_ifp, IPFR_CMPSZ); + fra->ipfr_ttl = fr_ticks + fr_ipfrttl; + /* * Compute the offset of the expected start of the next packet. */ off = ip->ip_off & IP_OFFMASK; - if (!off) + if (off == 0) fra->ipfr_seen0 = 1; fra->ipfr_off = off + (fin->fin_dlen >> 3); - ATOMIC_INCL(ipfr_stats.ifs_new); - ATOMIC_INC32(ipfr_inuse); + fra->ipfr_pass = pass; + ipfr_stats.ifs_new++; + ipfr_inuse++; return fra; } -int ipfr_newfrag(ip, fin) -ip_t *ip; +/* ------------------------------------------------------------------------ */ +/* Function: fr_newfrag */ +/* Returns: int - 0 == success, -1 == error */ +/* Parameters: fin(I) - pointer to packet information */ +/* */ +/* Add a new entry to the fragment cache table based on the current packet */ +/* ------------------------------------------------------------------------ */ +int fr_newfrag(fin, pass) +u_32_t pass; fr_info_t *fin; { - ipfr_t *ipf; + ipfr_t *fra; - if ((ip->ip_v != 4) || (fr_frag_lock)) + if ((fin->fin_v != 4) || (fr_frag_lock != 0)) return -1; + WRITE_ENTER(&ipf_frag); - ipf = ipfr_new(ip, fin, ipfr_heads); - RWLOCK_EXIT(&ipf_frag); - if (ipf == NULL) { - ATOMIC_INCL(frstats[fin->fin_out].fr_bnfr); - return -1; + fra = ipfr_newfrag(fin, pass, ipfr_heads); + if (fra != NULL) { + *ipfr_tail = fra; + fra->ipfr_prev = ipfr_tail; + ipfr_tail = &fra->ipfr_next; + if (ipfr_list == NULL) + ipfr_list = fra; + fra->ipfr_next = NULL; } - ATOMIC_INCL(frstats[fin->fin_out].fr_nfr); - return 0; + RWLOCK_EXIT(&ipf_frag); + return fra ? 0 : -1; } -int ipfr_nat_newfrag(ip, fin, nat) -ip_t *ip; +/* ------------------------------------------------------------------------ */ +/* Function: fr_nat_newfrag */ +/* Returns: int - 0 == success, -1 == error */ +/* Parameters: fin(I) - pointer to packet information */ +/* nat(I) - pointer to NAT structure */ +/* */ +/* Create a new NAT fragment cache entry based on the current packet and */ +/* the NAT structure for this "session". */ +/* ------------------------------------------------------------------------ */ +int fr_nat_newfrag(fin, pass, nat) fr_info_t *fin; +u_32_t pass; nat_t *nat; { - ipfr_t *ipf; - int off; + ipfr_t *fra; - if ((ip->ip_v != 4) || (fr_frag_lock)) - return -1; - - off = fin->fin_off; - off <<= 3; - if ((off + fin->fin_dlen) > 0xffff || (fin->fin_dlen == 0)) - return -1; + if ((fin->fin_v != 4) || (fr_frag_lock != 0)) + return 0; WRITE_ENTER(&ipf_natfrag); - ipf = ipfr_new(ip, fin, ipfr_nattab); - if (ipf != NULL) { - ipf->ipfr_data = nat; - nat->nat_data = ipf; + fra = ipfr_newfrag(fin, pass, ipfr_nattab); + if (fra != NULL) { + fra->ipfr_data = nat; + nat->nat_data = fra; + *ipfr_nattail = fra; + fra->ipfr_prev = ipfr_nattail; + ipfr_nattail = &fra->ipfr_next; + fra->ipfr_next = NULL; } RWLOCK_EXIT(&ipf_natfrag); - return ipf ? 0 : -1; + return fra ? 0 : -1; } -/* - * check the fragment cache to see if there is already a record of this packet - * with its filter result known. - */ -static ipfr_t *ipfr_lookup(ip, fin, table) -ip_t *ip; +/* ------------------------------------------------------------------------ */ +/* Function: fr_ipid_newfrag */ +/* Returns: int - 0 == success, -1 == error */ +/* Parameters: fin(I) - pointer to packet information */ +/* ipid(I) - new IP ID for this fragmented packet */ +/* */ +/* Create a new fragment cache entry for this packet and store, as a data */ +/* pointer, the new IP ID value. */ +/* ------------------------------------------------------------------------ */ +int fr_ipid_newfrag(fin, ipid) +fr_info_t *fin; +u_32_t ipid; +{ + ipfr_t *fra; + + if ((fin->fin_v != 4) || (fr_frag_lock)) + return 0; + + WRITE_ENTER(&ipf_ipidfrag); + fra = ipfr_newfrag(fin, 0, ipfr_ipidtab); + if (fra != NULL) { + fra->ipfr_data = (void *)ipid; + *ipfr_ipidtail = fra; + fra->ipfr_prev = ipfr_ipidtail; + ipfr_ipidtail = &fra->ipfr_next; + fra->ipfr_next = NULL; + } + RWLOCK_EXIT(&ipf_ipidfrag); + return fra ? 0 : -1; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_fraglookup */ +/* Returns: ipfr_t * - pointer to ipfr_t structure if there's a */ +/* matching entry in the frag table, else NULL */ +/* Parameters: fin(I) - pointer to packet information */ +/* table(I) - pointer to fragment cache table to search */ +/* */ +/* Check the fragment cache to see if there is already a record of this */ +/* packet with its filter result known. */ +/* ------------------------------------------------------------------------ */ +static ipfr_t *fr_fraglookup(fin, table) fr_info_t *fin; ipfr_t *table[]; { - ipfr_t *f, frag; - u_int idx; + ipfr_t *f, frag; + u_int idx; + ip_t *ip; + + if ((fin->fin_flx & (FI_FRAG|FI_BAD)) != FI_FRAG) + return NULL; /* * For fragments, we record protocol, packet id, TOS and both IP#'s @@ -284,6 +432,7 @@ ipfr_t *table[]; * * build up a hash value to index the table with. */ + ip = fin->fin_ip; frag.ipfr_p = ip->ip_p; idx = ip->ip_p; frag.ipfr_id = ip->ip_id; @@ -304,48 +453,71 @@ ipfr_t *table[]; /* * check the table, careful to only compare the right amount of data */ - for (f = table[idx]; f; f = f->ipfr_next) - if (!bcmp((char *)&frag.ipfr_src, (char *)&f->ipfr_src, + for (f = table[idx]; f; f = f->ipfr_hnext) + if (!bcmp((char *)&frag.ipfr_ifp, (char *)&f->ipfr_ifp, IPFR_CMPSZ)) { - u_short atoff, off; + u_short off; - off = fin->fin_off; + /* + * We don't want to let short packets match because + * they could be compromising the security of other + * rules that want to match on layer 4 fields (and + * can't because they have been fragmented off.) + * Why do this check here? The counter acts as an + * indicator of this kind of attack, whereas if it was + * elsewhere, it wouldn't know if other matching + * packets had been seen. + */ + if (fin->fin_flx & FI_SHORT) { + ATOMIC_INCL(ipfr_stats.ifs_short); + continue; + } /* * XXX - We really need to be guarding against the * retransmission of (src,dst,id,offset-range) here * because a fragmented packet is never resent with - * the same IP ID#. + * the same IP ID# (or shouldn't). */ + off = ip->ip_off & IP_OFFMASK; if (f->ipfr_seen0) { - if (!off || (fin->fin_fl & FI_SHORT)) + if (off == 0) { + ATOMIC_INCL(ipfr_stats.ifs_retrans0); continue; - } else if (!off) + } + } else if (off == 0) f->ipfr_seen0 = 1; if (f != table[idx]) { + ipfr_t **fp; + /* - * move fragment info. to the top of the list - * to speed up searches. + * Move fragment info. to the top of the list + * to speed up searches. First, delink... */ - if ((f->ipfr_prev->ipfr_next = f->ipfr_next)) - f->ipfr_next->ipfr_prev = f->ipfr_prev; - f->ipfr_next = table[idx]; - table[idx]->ipfr_prev = f; - f->ipfr_prev = NULL; + fp = f->ipfr_hprev; + (*fp) = f->ipfr_hnext; + if (f->ipfr_hnext != NULL) + f->ipfr_hnext->ipfr_hprev = fp; + /* + * Then put back at the top of the chain. + */ + f->ipfr_hnext = table[idx]; + table[idx]->ipfr_hprev = &f->ipfr_hnext; + f->ipfr_hprev = table + idx; table[idx] = f; } - atoff = off + (fin->fin_dlen >> 3); + /* * If we've follwed the fragments, and this is the * last (in order), shrink expiration time. */ if (off == f->ipfr_off) { if (!(ip->ip_off & IP_MF)) - f->ipfr_ttl = 1; - else - f->ipfr_off = atoff; - } + f->ipfr_ttl = fr_ticks + 1; + f->ipfr_off = (fin->fin_dlen >> 3) + off; + } else if (f->ipfr_pass & FR_FRSTRICT) + continue; ATOMIC_INCL(ipfr_stats.ifs_hits); return f; } @@ -353,33 +525,30 @@ ipfr_t *table[]; } -/* - * functional interface for NAT lookups of the NAT fragment cache - */ -nat_t *ipfr_nat_knownfrag(ip, fin) -ip_t *ip; +/* ------------------------------------------------------------------------ */ +/* Function: fr_nat_knownfrag */ +/* Returns: nat_t* - pointer to 'parent' NAT structure if frag table */ +/* match found, else NULL */ +/* Parameters: fin(I) - pointer to packet information */ +/* */ +/* Functional interface for NAT lookups of the NAT fragment cache */ +/* ------------------------------------------------------------------------ */ +nat_t *fr_nat_knownfrag(fin) fr_info_t *fin; { - ipfr_t *ipf; - nat_t *nat; - int off; - - if ((fin->fin_v != 4) || (fr_frag_lock)) - return NULL; + nat_t *nat; + ipfr_t *ipf; - off = fin->fin_off; - off <<= 3; - if ((off + fin->fin_dlen) > 0xffff || (fin->fin_dlen == 0)) + if ((fin->fin_v != 4) || (fr_frag_lock) || !ipfr_natlist) return NULL; - READ_ENTER(&ipf_natfrag); - ipf = ipfr_lookup(ip, fin, ipfr_nattab); + ipf = fr_fraglookup(fin, ipfr_nattab); if (ipf != NULL) { nat = ipf->ipfr_data; /* * This is the last fragment for this packet. */ - if ((ipf->ipfr_ttl == 1) && (nat != NULL)) { + if ((ipf->ipfr_ttl == fr_ticks + 1) && (nat != NULL)) { nat->nat_data = NULL; ipf->ipfr_data = NULL; } @@ -390,136 +559,196 @@ fr_info_t *fin; } -/* - * functional interface for normal lookups of the fragment cache - */ -frentry_t *ipfr_knownfrag(ip, fin) -ip_t *ip; +/* ------------------------------------------------------------------------ */ +/* Function: fr_ipid_knownfrag */ +/* Returns: u_32_t - IPv4 ID for this packet if match found, else */ +/* return 0xfffffff to indicate no match. */ +/* Parameters: fin(I) - pointer to packet information */ +/* */ +/* Functional interface for IP ID lookups of the IP ID fragment cache */ +/* ------------------------------------------------------------------------ */ +u_32_t fr_ipid_knownfrag(fin) fr_info_t *fin; { - frentry_t *fr; - ipfr_t *fra; - int off; + ipfr_t *ipf; + u_32_t id; + + if ((fin->fin_v != 4) || (fr_frag_lock) || !ipfr_ipidlist) + return 0xffffffff; + + READ_ENTER(&ipf_ipidfrag); + ipf = fr_fraglookup(fin, ipfr_ipidtab); + if (ipf != NULL) + id = (u_32_t)ipf->ipfr_data; + else + id = 0xffffffff; + RWLOCK_EXIT(&ipf_ipidfrag); + return id; +} - if ((fin->fin_v != 4) || (fr_frag_lock)) - return NULL; - off = fin->fin_off; - off <<= 3; - if ((off + fin->fin_dlen) > 0xffff || (fin->fin_dlen == 0)) +/* ------------------------------------------------------------------------ */ +/* Function: fr_knownfrag */ +/* Returns: frentry_t* - pointer to filter rule if a match is found in */ +/* the frag cache table, else NULL. */ +/* Parameters: fin(I) - pointer to packet information */ +/* passp(O) - pointer to where to store rule flags resturned */ +/* */ +/* Functional interface for normal lookups of the fragment cache. If a */ +/* match is found, return the rule pointer and flags from the rule, except */ +/* that if FR_LOGFIRST is set, reset FR_LOG. */ +/* ------------------------------------------------------------------------ */ +frentry_t *fr_knownfrag(fin, passp) +fr_info_t *fin; +u_32_t *passp; +{ + frentry_t *fr = NULL; + ipfr_t *fra; + u_32_t pass; + + if ((fin->fin_v != 4) || (fr_frag_lock) || (ipfr_list == NULL)) return NULL; READ_ENTER(&ipf_frag); - fra = ipfr_lookup(ip, fin, ipfr_heads); - if (fra != NULL) + fra = fr_fraglookup(fin, ipfr_heads); + if (fra != NULL) { fr = fra->ipfr_rule; - else - fr = NULL; + fin->fin_fr = fr; + if (fr != NULL) { + pass = fr->fr_flags; + if ((pass & FR_LOGFIRST) != 0) + pass &= ~(FR_LOGFIRST|FR_LOG); + *passp = pass; + } + } RWLOCK_EXIT(&ipf_frag); return fr; } -/* - * forget any references to this external object. - */ -void ipfr_forget(ptr) +/* ------------------------------------------------------------------------ */ +/* Function: fr_forget */ +/* Returns: Nil */ +/* Parameters: ptr(I) - pointer to data structure */ +/* */ +/* Search through all of the fragment cache entries and wherever a pointer */ +/* is found to match ptr, reset it to NULL. */ +/* ------------------------------------------------------------------------ */ +void fr_forget(ptr) void *ptr; { ipfr_t *fr; - int idx; WRITE_ENTER(&ipf_frag); - for (idx = IPFT_SIZE - 1; idx >= 0; idx--) - for (fr = ipfr_heads[idx]; fr; fr = fr->ipfr_next) - if (fr->ipfr_data == ptr) - fr->ipfr_data = NULL; - + for (fr = ipfr_list; fr; fr = fr->ipfr_next) + if (fr->ipfr_data == ptr) + fr->ipfr_data = NULL; RWLOCK_EXIT(&ipf_frag); } -/* - * forget any references to this external object. - */ -void ipfr_forgetnat(nat) -void *nat; +/* ------------------------------------------------------------------------ */ +/* Function: fr_forgetnat */ +/* Returns: Nil */ +/* Parameters: ptr(I) - pointer to data structure */ +/* */ +/* Search through all of the fragment cache entries for NAT and wherever a */ +/* pointer is found to match ptr, reset it to NULL. */ +/* ------------------------------------------------------------------------ */ +void fr_forgetnat(ptr) +void *ptr; { ipfr_t *fr; - int idx; WRITE_ENTER(&ipf_natfrag); - for (idx = IPFT_SIZE - 1; idx >= 0; idx--) - for (fr = ipfr_nattab[idx]; fr; fr = fr->ipfr_next) - if (fr->ipfr_data == nat) - fr->ipfr_data = NULL; - + for (fr = ipfr_natlist; fr; fr = fr->ipfr_next) + if (fr->ipfr_data == ptr) + fr->ipfr_data = NULL; RWLOCK_EXIT(&ipf_natfrag); } -static void ipfr_delete(fra) -ipfr_t *fra; +/* ------------------------------------------------------------------------ */ +/* Function: fr_fragdelete */ +/* Returns: Nil */ +/* Parameters: fra(I) - pointer to fragment structure to delete */ +/* tail(IO) - pointer to the pointer to the tail of the frag */ +/* list */ +/* */ +/* Remove a fragment cache table entry from the table & list. Also free */ +/* the filter rule it is associated with it if it is no longer used as a */ +/* result of decreasing the reference count. */ +/* ------------------------------------------------------------------------ */ +static void fr_fragdelete(fra, tail) +ipfr_t *fra, ***tail; { frentry_t *fr; fr = fra->ipfr_rule; - if (fr != NULL) { - ATOMIC_DEC32(fr->fr_ref); - if (fr->fr_ref == 0) - KFREE(fr); - } - if (fra->ipfr_prev) - fra->ipfr_prev->ipfr_next = fra->ipfr_next; + if (fr != NULL) + (void)fr_derefrule(&fr); + if (fra->ipfr_next) fra->ipfr_next->ipfr_prev = fra->ipfr_prev; + *fra->ipfr_prev = fra->ipfr_next; + if (*tail == &fra->ipfr_next) + *tail = fra->ipfr_prev; + + if (fra->ipfr_hnext) + fra->ipfr_hnext->ipfr_hprev = fra->ipfr_hprev; + *fra->ipfr_hprev = fra->ipfr_hnext; KFREE(fra); } -/* - * Free memory in use by fragment state info. kept. - */ -void ipfr_unload() +/* ------------------------------------------------------------------------ */ +/* Function: fr_fragclear */ +/* Returns: Nil */ +/* Parameters: Nil */ +/* */ +/* Free memory in use by fragment state information kept. Do the normal */ +/* fragment state stuff first and then the NAT-fragment table. */ +/* ------------------------------------------------------------------------ */ +void fr_fragclear() { - ipfr_t **fp, *fra; + ipfr_t *fra; nat_t *nat; - int idx; WRITE_ENTER(&ipf_frag); - for (idx = IPFT_SIZE - 1; idx >= 0; idx--) - for (fp = &ipfr_heads[idx]; (fra = *fp); ) { - *fp = fra->ipfr_next; - ipfr_delete(fra); - } + while ((fra = ipfr_list) != NULL) + fr_fragdelete(fra, &ipfr_tail); + ipfr_tail = &ipfr_list; RWLOCK_EXIT(&ipf_frag); WRITE_ENTER(&ipf_nat); WRITE_ENTER(&ipf_natfrag); - for (idx = IPFT_SIZE - 1; idx >= 0; idx--) - for (fp = &ipfr_nattab[idx]; (fra = *fp); ) { - *fp = fra->ipfr_next; - nat = fra->ipfr_data; - if (nat != NULL) { - if (nat->nat_data == fra) - nat->nat_data = NULL; - } - ipfr_delete(fra); + while ((fra = ipfr_natlist) != NULL) { + nat = fra->ipfr_data; + if (nat != NULL) { + if (nat->nat_data == fra) + nat->nat_data = NULL; } + fr_fragdelete(fra, &ipfr_nattail); + } + ipfr_nattail = &ipfr_natlist; RWLOCK_EXIT(&ipf_natfrag); RWLOCK_EXIT(&ipf_nat); } -void ipfr_fragexpire() +/* ------------------------------------------------------------------------ */ +/* Function: fr_fragexpire */ +/* Returns: Nil */ +/* Parameters: Nil */ +/* */ +/* Expire entries in the fragment cache table that have been there too long */ +/* ------------------------------------------------------------------------ */ +void fr_fragexpire() { ipfr_t **fp, *fra; nat_t *nat; - int idx; -#if defined(_KERNEL) -# if !SOLARIS +#if defined(USE_SPL) && defined(_KERNEL) int s; -# endif #endif if (fr_frag_lock) @@ -527,25 +756,29 @@ void ipfr_fragexpire() SPL_NET(s); WRITE_ENTER(&ipf_frag); - /* * Go through the entire table, looking for entries to expire, - * decreasing the ttl by one for each entry. If it reaches 0, - * remove it from the chain and free it. + * which is indicated by the ttl being less than or equal to fr_ticks. */ - for (idx = IPFT_SIZE - 1; idx >= 0; idx--) - for (fp = &ipfr_heads[idx]; (fra = *fp); ) { - --fra->ipfr_ttl; - if (fra->ipfr_ttl == 0) { - *fp = fra->ipfr_next; - ipfr_delete(fra); - ATOMIC_INCL(ipfr_stats.ifs_expire); - ATOMIC_DEC32(ipfr_inuse); - } else - fp = &fra->ipfr_next; - } + for (fp = &ipfr_list; ((fra = *fp) != NULL); ) { + if (fra->ipfr_ttl > fr_ticks) + break; + fr_fragdelete(fra, &ipfr_tail); + ipfr_stats.ifs_expire++; + ipfr_inuse--; + } RWLOCK_EXIT(&ipf_frag); + WRITE_ENTER(&ipf_ipidfrag); + for (fp = &ipfr_ipidlist; ((fra = *fp) != NULL); ) { + if (fra->ipfr_ttl > fr_ticks) + break; + fr_fragdelete(fra, &ipfr_ipidtail); + ipfr_stats.ifs_expire++; + ipfr_inuse--; + } + RWLOCK_EXIT(&ipf_ipidfrag); + /* * Same again for the NAT table, except that if the structure also * still points to a NAT structure, and the NAT structure points back @@ -555,83 +788,72 @@ void ipfr_fragexpire() */ WRITE_ENTER(&ipf_nat); WRITE_ENTER(&ipf_natfrag); - for (idx = IPFT_SIZE - 1; idx >= 0; idx--) - for (fp = &ipfr_nattab[idx]; (fra = *fp); ) { - --fra->ipfr_ttl; - if (fra->ipfr_ttl == 0) { - ATOMIC_INCL(ipfr_stats.ifs_expire); - ATOMIC_DEC32(ipfr_inuse); - nat = fra->ipfr_data; - if (nat != NULL) { - if (nat->nat_data == fra) - nat->nat_data = NULL; - } - *fp = fra->ipfr_next; - ipfr_delete(fra); - } else - fp = &fra->ipfr_next; + for (fp = &ipfr_natlist; ((fra = *fp) != NULL); ) { + if (fra->ipfr_ttl > fr_ticks) + break; + nat = fra->ipfr_data; + if (nat != NULL) { + if (nat->nat_data == fra) + nat->nat_data = NULL; } + fr_fragdelete(fra, &ipfr_nattail); + ipfr_stats.ifs_expire++; + ipfr_inuse--; + } RWLOCK_EXIT(&ipf_natfrag); RWLOCK_EXIT(&ipf_nat); SPL_X(s); } -/* - * Slowly expire held state for fragments. Timeouts are set * in expectation - * of this being called twice per second. - */ -#ifdef _KERNEL -# if (BSD >= 199306) || SOLARIS || defined(__sgi) -# if defined(SOLARIS2) && (SOLARIS2 < 7) -void ipfr_slowtimer() -# else -void ipfr_slowtimer __P((void *ptr)) -# endif +/* ------------------------------------------------------------------------ */ +/* Function: fr_slowtimer */ +/* Returns: Nil */ +/* Parameters: Nil */ +/* */ +/* Slowly expire held state for fragments. Timeouts are set * in */ +/* expectation of this being called twice per second. */ +/* ------------------------------------------------------------------------ */ +#if !defined(_KERNEL) || (!SOLARIS && !defined(__hpux) && !defined(__sgi) && \ + !defined(__osf__)) +# if defined(_KERNEL) && ((BSD >= 199103) || defined(__sgi)) +void fr_slowtimer __P((void *ptr)) # else -int ipfr_slowtimer() +int fr_slowtimer() # endif -#else -void ipfr_slowtimer() -#endif { -#if defined(_KERNEL) && SOLARIS - extern int fr_running; - - if (fr_running <= 0) - return; - READ_ENTER(&ipf_solaris); -#endif + READ_ENTER(&ipf_global); -#if defined(__sgi) && defined(_KERNEL) - ipfilter_sgi_intfsync(); -#endif - - ipfr_fragexpire(); + fr_fragexpire(); fr_timeoutstate(); - ip_natexpire(); + fr_natexpire(); fr_authexpire(); -#if defined(_KERNEL) -# if SOLARIS - ipfr_timer_id = timeout(ipfr_slowtimer, NULL, drv_usectohz(500000)); - RWLOCK_EXIT(&ipf_solaris); -# else + fr_ticks++; + if (fr_running <= 0) + goto done; +# ifdef _KERNEL # if defined(__NetBSD__) && (__NetBSD_Version__ >= 104240000) - callout_reset(&ipfr_slowtimer_ch, hz / 2, ipfr_slowtimer, NULL); + callout_reset(&fr_slowtimer_ch, hz / 2, fr_slowtimer, NULL); # else -# if (__FreeBSD_version >= 300000) - ipfr_slowtimer_ch = timeout(ipfr_slowtimer, NULL, hz/2); +# if defined(__OpenBSD__) + timeout_add(&fr_slowtimer_ch, hz/2); # else -# if defined(__OpenBSD__) - timeout_add(&ipfr_slowtimer_ch, hz/2); +# if (__FreeBSD_version >= 300000) + fr_slowtimer_ch = timeout(fr_slowtimer, NULL, hz/2); # else - timeout(ipfr_slowtimer, NULL, hz/2); -# endif -# endif -# if (BSD < 199306) && !defined(__sgi) - return 0; -# endif /* FreeBSD */ +# ifdef linux + ; +# else + timeout(fr_slowtimer, NULL, hz/2); +# endif +# endif /* FreeBSD */ +# endif /* OpenBSD */ # endif /* NetBSD */ -# endif /* SOLARIS */ -#endif /* defined(_KERNEL) */ +# endif +done: + RWLOCK_EXIT(&ipf_global); +# if (BSD < 199103) || !defined(_KERNEL) + return 0; +# endif } +#endif /* !SOLARIS && !defined(__hpux) && !defined(__sgi) */ diff --git a/sys/contrib/ipfilter/netinet/ip_frag.h b/sys/contrib/ipfilter/netinet/ip_frag.h index 4055cb1..5198999 100644 --- a/sys/contrib/ipfilter/netinet/ip_frag.h +++ b/sys/contrib/ipfilter/netinet/ip_frag.h @@ -1,11 +1,13 @@ +/* $FreeBSD$ */ + /* * Copyright (C) 1993-2001 by Darren Reed. * * See the IPFILTER.LICENCE file for details on licencing. * * @(#)ip_frag.h 1.5 3/24/96 - * $Id: ip_frag.h,v 2.4.2.2 2000/11/10 13:10:54 darrenr Exp $ * $FreeBSD$ + * Id: ip_frag.h,v 2.23.2.1 2004/03/29 16:21:56 darrenr Exp */ #ifndef __IP_FRAG_H__ @@ -14,17 +16,19 @@ #define IPFT_SIZE 257 typedef struct ipfr { - struct ipfr *ipfr_next, *ipfr_prev; + struct ipfr *ipfr_hnext, **ipfr_hprev; + struct ipfr *ipfr_next, **ipfr_prev; void *ipfr_data; + void *ipfr_ifp; struct in_addr ipfr_src; struct in_addr ipfr_dst; - void *ipfr_ifp; u_32_t ipfr_optmsk; u_short ipfr_secmsk; u_short ipfr_auth; u_short ipfr_id; u_char ipfr_p; u_char ipfr_tos; + u_32_t ipfr_pass; u_short ipfr_off; u_char ipfr_ttl; u_char ipfr_seen0; @@ -39,37 +43,45 @@ typedef struct ipfrstat { u_long ifs_hits; u_long ifs_expire; u_long ifs_inuse; + u_long ifs_retrans0; + u_long ifs_short; struct ipfr **ifs_table; struct ipfr **ifs_nattab; } ipfrstat_t; -#define IPFR_CMPSZ (offsetof(ipfr_t, ipfr_off) - \ - offsetof(ipfr_t, ipfr_src)) +#define IPFR_CMPSZ (offsetof(ipfr_t, ipfr_pass) - \ + offsetof(ipfr_t, ipfr_ifp)) +extern int ipfr_size; extern int fr_ipfrttl; extern int fr_frag_lock; -extern ipfrstat_t *ipfr_fragstats __P((void)); -extern int ipfr_newfrag __P((ip_t *, fr_info_t *)); -extern int ipfr_nat_newfrag __P((ip_t *, fr_info_t *, struct nat *)); -extern nat_t *ipfr_nat_knownfrag __P((ip_t *, fr_info_t *)); -extern frentry_t *ipfr_knownfrag __P((ip_t *, fr_info_t *)); -extern void ipfr_forget __P((void *)); -extern void ipfr_forgetnat __P((void *)); -extern void ipfr_unload __P((void)); -extern void ipfr_fragexpire __P((void)); +extern int fr_fraginit __P((void)); +extern void fr_fragunload __P((void)); +extern ipfrstat_t *fr_fragstats __P((void)); + +extern int fr_newfrag __P((fr_info_t *, u_32_t)); +extern frentry_t *fr_knownfrag __P((fr_info_t *, u_32_t *)); + +extern int fr_nat_newfrag __P((fr_info_t *, u_32_t, struct nat *)); +extern nat_t *fr_nat_knownfrag __P((fr_info_t *)); + +extern int fr_ipid_newfrag __P((fr_info_t *, u_32_t)); +extern u_32_t fr_ipid_knownfrag __P((fr_info_t *)); + +extern void fr_forget __P((void *)); +extern void fr_forgetnat __P((void *)); +extern void fr_fragclear __P((void)); +extern void fr_fragexpire __P((void)); -#ifdef _KERNEL -# if (BSD >= 199306) || SOLARIS || defined(__sgi) -# if defined(SOLARIS2) && (SOLARIS2 < 7) -extern void ipfr_slowtimer __P((void)); -# else -extern void ipfr_slowtimer __P((void *)); -# endif +#if defined(_KERNEL) && ((BSD >= 199306) || SOLARIS || defined(__sgi) \ + || defined(__osf__) || (defined(__sgi) && (IRIX >= 60500))) +# if defined(SOLARIS2) && (SOLARIS2 < 7) +extern void fr_slowtimer __P((void)); # else -extern int ipfr_slowtimer __P((void)); -# endif /* (BSD >= 199306) || SOLARIS */ +extern void fr_slowtimer __P((void *)); +# endif #else -extern void ipfr_slowtimer __P((void)); -#endif /* _KERNEL */ +extern int fr_slowtimer __P((void)); +#endif -#endif /* __IP_FIL_H__ */ +#endif /* __IP_FRAG_H__ */ diff --git a/sys/contrib/ipfilter/netinet/ip_ftp_pxy.c b/sys/contrib/ipfilter/netinet/ip_ftp_pxy.c index e8bf216..91a48c0 100644 --- a/sys/contrib/ipfilter/netinet/ip_ftp_pxy.c +++ b/sys/contrib/ipfilter/netinet/ip_ftp_pxy.c @@ -1,18 +1,16 @@ +/* $FreeBSD$ */ + /* + * Copyright (C) 1997-2003 by Darren Reed + * + * See the IPFILTER.LICENCE file for details on licencing. + * * Simple FTP transparent proxy for in-kernel use. For use with the NAT * code. * * $FreeBSD$ + * Id: ip_ftp_pxy.c,v 2.88.2.15 2005/03/19 19:38:10 darrenr Exp */ -#if SOLARIS && defined(_KERNEL) -extern kmutex_t ipf_rw; -#endif - -#define isdigit(x) ((x) >= '0' && (x) <= '9') -#define isupper(x) (((unsigned)(x) >= 'A') && ((unsigned)(x) <= 'Z')) -#define islower(x) (((unsigned)(x) >= 'a') && ((unsigned)(x) <= 'z')) -#define isalpha(x) (isupper(x) || islower(x)) -#define toupper(x) (isupper(x) ? (x) : (x) - 'a' + 'A') #define IPF_FTP_PROXY @@ -20,7 +18,8 @@ extern kmutex_t ipf_rw; #define IPF_MAXPORTLEN 30 #define IPF_MIN227LEN 39 #define IPF_MAX227LEN 51 -#define IPF_FTPBUFSZ 96 /* This *MUST* be >= 53! */ +#define IPF_MIN229LEN 47 +#define IPF_MAX229LEN 51 #define FTPXY_GO 0 #define FTPXY_INIT 1 @@ -46,23 +45,53 @@ extern kmutex_t ipf_rw; int ippr_ftp_client __P((fr_info_t *, ip_t *, nat_t *, ftpinfo_t *, int)); int ippr_ftp_complete __P((char *, size_t)); -int ippr_ftp_in __P((fr_info_t *, ip_t *, ap_session_t *, nat_t *)); +int ippr_ftp_in __P((fr_info_t *, ap_session_t *, nat_t *)); int ippr_ftp_init __P((void)); -int ippr_ftp_new __P((fr_info_t *, ip_t *, ap_session_t *, nat_t *)); -int ippr_ftp_out __P((fr_info_t *, ip_t *, ap_session_t *, nat_t *)); +void ippr_ftp_fini __P((void)); +int ippr_ftp_new __P((fr_info_t *, ap_session_t *, nat_t *)); +int ippr_ftp_out __P((fr_info_t *, ap_session_t *, nat_t *)); int ippr_ftp_pasv __P((fr_info_t *, ip_t *, nat_t *, ftpinfo_t *, int)); +int ippr_ftp_epsv __P((fr_info_t *, ip_t *, nat_t *, ftpside_t *, int)); int ippr_ftp_port __P((fr_info_t *, ip_t *, nat_t *, ftpside_t *, int)); -int ippr_ftp_process __P((fr_info_t *, ip_t *, nat_t *, ftpinfo_t *, int)); +int ippr_ftp_process __P((fr_info_t *, nat_t *, ftpinfo_t *, int)); int ippr_ftp_server __P((fr_info_t *, ip_t *, nat_t *, ftpinfo_t *, int)); int ippr_ftp_valid __P((ftpinfo_t *, int, char *, size_t)); int ippr_ftp_server_valid __P((ftpside_t *, char *, size_t)); int ippr_ftp_client_valid __P((ftpside_t *, char *, size_t)); u_short ippr_ftp_atoi __P((char **)); +int ippr_ftp_pasvreply __P((fr_info_t *, ip_t *, nat_t *, ftpside_t *, + u_int, char *, char *, u_int)); -static frentry_t ftppxyfr; + +int ftp_proxy_init = 0; int ippr_ftp_pasvonly = 0; -int ippr_ftp_insecure = 0; -int ippr_ftp_forcepasv = 0; +int ippr_ftp_insecure = 0; /* Do not require logins before transfers */ +int ippr_ftp_pasvrdr = 0; +int ippr_ftp_forcepasv = 0; /* PASV must be last command prior to 227 */ +#if defined(_KERNEL) +int ippr_ftp_debug = 0; +#else +int ippr_ftp_debug = 2; +#endif +/* + * 1 - security + * 2 - errors + * 3 - error debugging + * 4 - parsing errors + * 5 - parsing info + * 6 - parsing debug + */ + +static frentry_t ftppxyfr; +static ipftuneable_t ftptune = { + { &ippr_ftp_debug }, + "ippr_ftp_debug", + 0, + 10, + sizeof(ippr_ftp_debug), + 0, + NULL +}; /* @@ -73,13 +102,27 @@ int ippr_ftp_init() bzero((char *)&ftppxyfr, sizeof(ftppxyfr)); ftppxyfr.fr_ref = 1; ftppxyfr.fr_flags = FR_INQUE|FR_PASS|FR_QUICK|FR_KEEPSTATE; + MUTEX_INIT(&ftppxyfr.fr_lock, "FTP Proxy Mutex"); + ftp_proxy_init = 1; + (void) fr_addipftune(&ftptune); + return 0; } -int ippr_ftp_new(fin, ip, aps, nat) +void ippr_ftp_fini() +{ + (void) fr_delipftune(&ftptune); + + if (ftp_proxy_init == 1) { + MUTEX_DESTROY(&ftppxyfr.fr_lock); + ftp_proxy_init = 0; + } +} + + +int ippr_ftp_new(fin, aps, nat) fr_info_t *fin; -ip_t *ip; ap_session_t *aps; nat_t *nat; { @@ -89,6 +132,10 @@ nat_t *nat; KMALLOC(ftp, ftpinfo_t *); if (ftp == NULL) return -1; + + fin = fin; /* LINT */ + nat = nat; /* LINT */ + aps->aps_data = ftp; aps->aps_psiz = sizeof(ftpinfo_t); @@ -100,6 +147,7 @@ nat_t *nat; f->ftps_rptr = f->ftps_buf; f->ftps_wptr = f->ftps_buf; ftp->ftp_passok = FTPXY_INIT; + ftp->ftp_incok = 0; return 0; } @@ -113,30 +161,28 @@ int dlen; { tcphdr_t *tcp, tcph, *tcp2 = &tcph; char newbuf[IPF_FTPBUFSZ], *s; + struct in_addr swip, swip2; u_int a1, a2, a3, a4; - struct in_addr swip; + int inc, off, flags; u_short a5, a6, sp; size_t nlen, olen; fr_info_t fi; - int inc, off; - nat_t *ipn; + nat_t *nat2; mb_t *m; -#if SOLARIS && defined(_KERNEL) - mb_t *m1; -#endif + m = fin->fin_m; tcp = (tcphdr_t *)fin->fin_dp; + off = (char *)tcp - (char *)ip + (TCP_OFF(tcp) << 2) + fin->fin_ipoff; + /* * Check for client sending out PORT message. */ if (dlen < IPF_MINPORTLEN) { -#if !defined(_KERNEL) && !defined(KERNEL) - fprintf(stdout, - "ippr_ftp_port:dlen(%d) < IPF_MINPORTLEN\n", dlen); -#endif + if (ippr_ftp_debug > 1) + printf("ippr_ftp_port:dlen(%d) < IPF_MINPORTLEN\n", + dlen); return 0; } - off = fin->fin_hlen + (tcp->th_off << 2); /* * Skip the PORT command + space */ @@ -146,36 +192,36 @@ int dlen; */ a1 = ippr_ftp_atoi(&s); if (s == NULL) { -#if !defined(_KERNEL) && !defined(KERNEL) - fprintf(stdout, "ippr_ftp_port:ippr_ftp_atoi(1) failed\n"); -#endif + if (ippr_ftp_debug > 1) + printf("ippr_ftp_port:ippr_ftp_atoi(%d) failed\n", 1); return 0; } a2 = ippr_ftp_atoi(&s); if (s == NULL) { -#if !defined(_KERNEL) && !defined(KERNEL) - fprintf(stdout, "ippr_ftp_port:ippr_ftp_atoi(2) failed\n"); -#endif + if (ippr_ftp_debug > 1) + printf("ippr_ftp_port:ippr_ftp_atoi(%d) failed\n", 2); return 0; } + /* - * check that IP address in the PORT/PASV reply is the same as the + * Check that IP address in the PORT/PASV reply is the same as the * sender of the command - prevents using PORT for port scanning. */ a1 <<= 16; a1 |= a2; - if (a1 != ntohl(nat->nat_inip.s_addr)) { -#if !defined(_KERNEL) && !defined(KERNEL) - fprintf(stdout, "ippr_ftp_port:a1 != nat->nat_inip\n"); -#endif - return 0; + if (((nat->nat_dir == NAT_OUTBOUND) && + (a1 != ntohl(nat->nat_inip.s_addr))) || + ((nat->nat_dir == NAT_INBOUND) && + (a1 != ntohl(nat->nat_oip.s_addr)))) { + if (ippr_ftp_debug > 0) + printf("ippr_ftp_port:%s != nat->nat_inip\n", "a1"); + return APR_ERR(1); } a5 = ippr_ftp_atoi(&s); if (s == NULL) { -#if !defined(_KERNEL) && !defined(KERNEL) - fprintf(stdout, "ippr_ftp_port:ippr_ftp_atoi(3) failed\n"); -#endif + if (ippr_ftp_debug > 1) + printf("ippr_ftp_port:ippr_ftp_atoi(%d) failed\n", 3); return 0; } if (*s == ')') @@ -190,26 +236,39 @@ int dlen; s += 2; a6 = a5 & 0xff; } else { -#if !defined(_KERNEL) && !defined(KERNEL) - fprintf(stdout, "ippr_ftp_port:missing cr-lf\n"); -#endif + if (ippr_ftp_debug > 1) + printf("ippr_ftp_port:missing %s\n", "cr-lf"); return 0; } + a5 >>= 8; a5 &= 0xff; + sp = a5 << 8 | a6; + /* + * Don't allow the PORT command to specify a port < 1024 due to + * security crap. + */ + if (sp < 1024) { + if (ippr_ftp_debug > 0) + printf("ippr_ftp_port:sp(%d) < 1024\n", sp); + return 0; + } /* * Calculate new address parts for PORT command */ - a1 = ntohl(ip->ip_src.s_addr); + if (nat->nat_dir == NAT_INBOUND) + a1 = ntohl(nat->nat_oip.s_addr); + else + a1 = ntohl(ip->ip_src.s_addr); a2 = (a1 >> 16) & 0xff; a3 = (a1 >> 8) & 0xff; a4 = a1 & 0xff; a1 >>= 24; olen = s - f->ftps_rptr; /* DO NOT change this to snprintf! */ -#if defined(OpenBSD) && (200311 >= 200311) - (void) snprintf(newbuf, sizeof(newbuf), "%s %u,%u,%u,%u,%u,%u\r\n", - "PORT", a1, a2, a3, a4, a5, a6); +#if defined(SNPRINTF) && defined(_KERNEL) + SNPRINTF(newbuf, sizeof(newbuf), "%s %u,%u,%u,%u,%u,%u\r\n", + "PORT", a1, a2, a3, a4, a5, a6); #else (void) sprintf(newbuf, "%s %u,%u,%u,%u,%u,%u\r\n", "PORT", a1, a2, a3, a4, a5, a6); @@ -218,87 +277,34 @@ int dlen; nlen = strlen(newbuf); inc = nlen - olen; if ((inc + ip->ip_len) > 65535) { -#if !defined(_KERNEL) && !defined(KERNEL) - fprintf(stdout, - "ippr_ftp_port:inc(%d) + ip->ip_len > 65535\n", inc); -#endif + if (ippr_ftp_debug > 0) + printf("ippr_ftp_port:inc(%d) + ip->ip_len > 65535\n", + inc); return 0; } #if !defined(_KERNEL) - m = *fin->fin_mp; - bcopy(newbuf, (char *)m + off, nlen); + bcopy(newbuf, MTOD(m, char *) + off, nlen); #else -# if SOLARIS - m = fin->fin_qfm; - for (m1 = m; m1->b_cont; m1 = m1->b_cont) - ; - if ((inc > 0) && (m1->b_datap->db_lim - m1->b_wptr < inc)) { - mblk_t *nm; - - /* alloc enough to keep same trailer space for lower driver */ - nm = allocb(nlen, BPRI_MED); - PANIC((!nm),("ippr_ftp_out: allocb failed")); - - nm->b_band = m1->b_band; - nm->b_wptr += nlen; - - m1->b_wptr -= olen; - PANIC((m1->b_wptr < m1->b_rptr), - ("ippr_ftp_out: cannot handle fragmented data block")); - - linkb(m1, nm); - } else { - if (m1->b_datap->db_struiolim == m1->b_wptr) - m1->b_datap->db_struiolim += inc; - m1->b_datap->db_struioflag &= ~STRUIO_IP; - m1->b_wptr += inc; - } - copyin_mblk(m, off, nlen, newbuf); -# else - m = *fin->fin_mp; +# if defined(MENTAT) + if (inc < 0) + (void)adjmsg(m, inc); +# else /* defined(MENTAT) */ + /* + * m_adj takes care of pkthdr.len, if required and treats inc<0 to + * mean remove -len bytes from the end of the packet. + * The mbuf chain will be extended if necessary by m_copyback(). + */ if (inc < 0) m_adj(m, inc); - /* the mbuf chain will be extended if necessary by m_copyback() */ - m_copyback(m, off, nlen, newbuf); -# ifdef M_PKTHDR - if (!(m->m_flags & M_PKTHDR)) - m->m_pkthdr.len += inc; -# endif -# endif -#endif - if (inc != 0) { -#if ((SOLARIS || defined(__sgi)) && defined(_KERNEL)) || !defined(_KERNEL) - register u_32_t sum1, sum2; - - sum1 = ip->ip_len; - sum2 = ip->ip_len + inc; +# endif /* defined(MENTAT) */ +#endif /* !defined(_KERNEL) */ + COPYBACK(m, off, nlen, newbuf); - /* Because ~1 == -2, We really need ~1 == -1 */ - if (sum1 > sum2) - sum2--; - sum2 -= sum1; - sum2 = (sum2 & 0xffff) + (sum2 >> 16); - - fix_outcksum(fin, &ip->ip_sum, sum2); -#endif + if (inc != 0) { ip->ip_len += inc; - } - - /* - * Add skeleton NAT entry for connection which will come back the - * other way. - */ - sp = (a5 << 8 | a6); - /* - * Don't allow the PORT command to specify a port < 1024 due to - * security crap. - */ - if (sp < 1024) { -#if !defined(_KERNEL) && !defined(KERNEL) - fprintf(stdout, "ippr_ftp_port:sp(%d) < 1024\n", sp); -#endif - return 0; + fin->fin_dlen += inc; + fin->fin_plen += inc; } /* @@ -307,11 +313,22 @@ int dlen; * mapping. */ bcopy((char *)fin, (char *)&fi, sizeof(fi)); + fi.fin_state = NULL; + fi.fin_nat = NULL; + fi.fin_flx |= FI_IGNORE; fi.fin_data[0] = sp; fi.fin_data[1] = fin->fin_data[1] - 1; - ipn = nat_outlookup(&fi, IPN_TCP, nat->nat_p, nat->nat_inip, - ip->ip_dst, 0); - if (ipn == NULL) { + /* + * Add skeleton NAT entry for connection which will come back the + * other way. + */ + if (nat->nat_dir == NAT_OUTBOUND) + nat2 = nat_outlookup(&fi, NAT_SEARCH|IPN_TCP, nat->nat_p, + nat->nat_inip, nat->nat_oip); + else + nat2 = nat_inlookup(&fi, NAT_SEARCH|IPN_TCP, nat->nat_p, + nat->nat_inip, nat->nat_oip); + if (nat2 == NULL) { int slen; slen = ip->ip_len; @@ -319,28 +336,61 @@ int dlen; bzero((char *)tcp2, sizeof(*tcp2)); tcp2->th_win = htons(8192); tcp2->th_sport = htons(sp); - tcp2->th_off = 5; + TCP_OFF_A(tcp2, 5); tcp2->th_flags = TH_SYN; tcp2->th_dport = 0; /* XXX - don't specify remote port */ fi.fin_data[1] = 0; fi.fin_dlen = sizeof(*tcp2); + fi.fin_plen = fi.fin_hlen + sizeof(*tcp2); fi.fin_dp = (char *)tcp2; fi.fin_fr = &ftppxyfr; - fi.fin_out = 1; + fi.fin_out = nat->nat_dir; + fi.fin_flx &= FI_LOWTTL|FI_FRAG|FI_TCPUDP|FI_OPTIONS|FI_IGNORE; swip = ip->ip_src; - fi.fin_fi.fi_saddr = nat->nat_inip.s_addr; - ip->ip_src = nat->nat_inip; - ipn = nat_new(&fi, ip, nat->nat_ptr, NULL, IPN_TCP|FI_W_DPORT, - NAT_OUTBOUND); - if (ipn != NULL) { - ipn->nat_age = fr_defnatage; - (void) fr_addstate(ip, &fi, NULL, - FI_W_DPORT|FI_IGNOREPKT); + swip2 = ip->ip_dst; + if (nat->nat_dir == NAT_OUTBOUND) { + fi.fin_fi.fi_saddr = nat->nat_inip.s_addr; + ip->ip_src = nat->nat_inip; + } else if (nat->nat_dir == NAT_INBOUND) { + fi.fin_fi.fi_saddr = nat->nat_oip.s_addr; + ip->ip_src = nat->nat_oip; + } + + flags = NAT_SLAVE|IPN_TCP|SI_W_DPORT; + if (nat->nat_dir == NAT_INBOUND) + flags |= NAT_NOTRULEPORT; + nat2 = nat_new(&fi, nat->nat_ptr, NULL, flags, nat->nat_dir); + + if (nat2 != NULL) { + (void) nat_proto(&fi, nat2, IPN_TCP); + nat_update(&fi, nat2, nat->nat_ptr); + fi.fin_ifp = NULL; + if (nat->nat_dir == NAT_INBOUND) { + fi.fin_fi.fi_daddr = nat->nat_inip.s_addr; + ip->ip_dst = nat->nat_inip; + } + (void) fr_addstate(&fi, &nat2->nat_state, SI_W_DPORT); + if (fi.fin_state != NULL) + fr_statederef(&fi, (ipstate_t **)&fi.fin_state); } ip->ip_len = slen; ip->ip_src = swip; + ip->ip_dst = swip2; + } else { + ipstate_t *is; + + nat_update(&fi, nat2, nat->nat_ptr); + READ_ENTER(&ipf_state); + is = nat2->nat_state; + if (is != NULL) { + MUTEX_ENTER(&is->is_lock); + (void)fr_tcp_age(&is->is_sti, &fi, ips_tqtqb, + is->is_flags); + MUTEX_EXIT(&is->is_lock); + } + RWLOCK_EXIT(&ipf_state); } - return inc; + return APR_INC(inc); } @@ -362,8 +412,8 @@ int dlen; for (i = 0; (i < 5) && (i < dlen); i++) { c = rptr[i]; - if (isalpha(c)) { - cmd[i] = toupper(c); + if (ISALPHA(c)) { + cmd[i] = TOUPPER(c); } else { cmd[i] = c; } @@ -422,23 +472,17 @@ nat_t *nat; ftpinfo_t *ftp; int dlen; { - tcphdr_t *tcp, tcph, *tcp2 = &tcph; - struct in_addr swip, swip2; - u_int a1, a2, a3, a4; - u_short a5, a6, dp; - fr_info_t fi; + u_int a1, a2, a3, a4, data_ip; + char newbuf[IPF_FTPBUFSZ]; + char *s, *brackets[2]; + u_short a5, a6; ftpside_t *f; - nat_t *ipn; - int inc; - char *s; if (ippr_ftp_forcepasv != 0 && ftp->ftp_side[0].ftps_cmds != FTPXY_C_PASV) { -#if !defined(_KERNEL) && !defined(KERNEL) - fprintf(stdout, - "ippr_ftp_pasv:ftps_cmds(%d) != FTPXY_C_PASV\n", - ftp->ftp_side[0].ftps_cmds); -#endif + if (ippr_ftp_debug > 0) + printf("ippr_ftp_pasv:ftps_cmds(%d) != FTPXY_C_PASV\n", + ftp->ftp_side[0].ftps_cmds); return 0; } @@ -449,63 +493,67 @@ int dlen; * Check for PASV reply message. */ if (dlen < IPF_MIN227LEN) { -#if !defined(_KERNEL) && !defined(KERNEL) - fprintf(stdout, - "ippr_ftp_pasv:dlen(%d) < IPF_MIN227LEN\n", dlen); -#endif + if (ippr_ftp_debug > 1) + printf("ippr_ftp_pasv:dlen(%d) < IPF_MIN227LEN\n", + dlen); return 0; } else if (strncmp(f->ftps_rptr, "227 Entering Passive Mod", PASV_REPLEN)) { -#if !defined(_KERNEL) && !defined(KERNEL) - fprintf(stdout, "ippr_ftp_pasv:227 reply wrong\n"); -#endif + if (ippr_ftp_debug > 0) + printf("ippr_ftp_pasv:%d reply wrong\n", 227); return 0; } - tcp = (tcphdr_t *)fin->fin_dp; - + brackets[0] = ""; + brackets[1] = ""; /* * Skip the PASV reply + space */ s = f->ftps_rptr + PASV_REPLEN; - while (*s && !isdigit(*s)) + while (*s && !ISDIGIT(*s)) { + if (*s == '(') { + brackets[0] = "("; + brackets[1] = ")"; + } s++; + } + /* * Pick out the address components, two at a time. */ a1 = ippr_ftp_atoi(&s); if (s == NULL) { -#if !defined(_KERNEL) && !defined(KERNEL) - fprintf(stdout, "ippr_ftp_pasv:ippr_ftp_atoi(1) failed\n"); -#endif + if (ippr_ftp_debug > 1) + printf("ippr_ftp_pasv:ippr_ftp_atoi(%d) failed\n", 1); return 0; } a2 = ippr_ftp_atoi(&s); if (s == NULL) { -#if !defined(_KERNEL) && !defined(KERNEL) - fprintf(stdout, "ippr_ftp_pasv:ippr_ftp_atoi(2) failed\n"); -#endif + if (ippr_ftp_debug > 1) + printf("ippr_ftp_pasv:ippr_ftp_atoi(%d) failed\n", 2); return 0; } /* - * check that IP address in the PORT/PASV reply is the same as the - * sender of the command - prevents using PORT for port scanning. + * check that IP address in the PASV reply is the same as the + * sender of the command - prevents using PASV for port scanning. */ a1 <<= 16; a1 |= a2; - if (a1 != ntohl(nat->nat_oip.s_addr)) { -#if !defined(_KERNEL) && !defined(KERNEL) - fprintf(stdout, "ippr_ftp_pasv:a1 != nat->nat_oip\n"); -#endif + + if (((nat->nat_dir == NAT_INBOUND) && + (a1 != ntohl(nat->nat_inip.s_addr))) || + ((nat->nat_dir == NAT_OUTBOUND) && + (a1 != ntohl(nat->nat_oip.s_addr)))) { + if (ippr_ftp_debug > 0) + printf("ippr_ftp_pasv:%s != nat->nat_oip\n", "a1"); return 0; } a5 = ippr_ftp_atoi(&s); if (s == NULL) { -#if !defined(_KERNEL) && !defined(KERNEL) - fprintf(stdout, "ippr_ftp_pasv:ippr_ftp_atoi(3) failed\n"); -#endif + if (ippr_ftp_debug > 1) + printf("ippr_ftp_pasv:ippr_ftp_atoi(%d) failed\n", 3); return 0; } @@ -520,97 +568,123 @@ int dlen; */ if ((*s == '\r') && (*(s + 1) == '\n')) { s += 2; - a6 = a5 & 0xff; } else { -#if !defined(_KERNEL) && !defined(KERNEL) - fprintf(stdout, "ippr_ftp_pasv:missing cr-lf\n"); -#endif + if (ippr_ftp_debug > 1) + printf("ippr_ftp_pasv:missing %s", "cr-lf\n"); return 0; } + + a6 = a5 & 0xff; a5 >>= 8; /* * Calculate new address parts for 227 reply */ - a1 = ntohl(ip->ip_src.s_addr); + if (nat->nat_dir == NAT_INBOUND) { + data_ip = nat->nat_outip.s_addr; + a1 = ntohl(data_ip); + } else + data_ip = htonl(a1); + a2 = (a1 >> 16) & 0xff; a3 = (a1 >> 8) & 0xff; a4 = a1 & 0xff; a1 >>= 24; - inc = 0; -#if 0 - olen = s - f->ftps_rptr; - (void) sprintf(newbuf, "%s %u,%u,%u,%u,%u,%u\r\n", - "227 Entering Passive Mode", a1, a2, a3, a4, a5, a6); - nlen = strlen(newbuf); - inc = nlen - olen; - if ((inc + ip->ip_len) > 65535) - return 0; -#if !defined(_KERNEL) - m = *fin->fin_mp; - m_copyback(m, off, nlen, newbuf); +#if defined(SNPRINTF) && defined(_KERNEL) + SNPRINTF(newbuf, sizeof(newbuf), "%s %s%u,%u,%u,%u,%u,%u%s\r\n", + "227 Entering Passive Mode", brackets[0], a1, a2, a3, a4, + a5, a6, brackets[1]); #else -# if SOLARIS - m = fin->fin_qfm; - for (m1 = m; m1->b_cont; m1 = m1->b_cont) - ; - if ((inc > 0) && (m1->b_datap->db_lim - m1->b_wptr < inc)) { - mblk_t *nm; + (void) sprintf(newbuf, "%s %s%u,%u,%u,%u,%u,%u%s\r\n", + "227 Entering Passive Mode", brackets[0], a1, a2, a3, a4, + a5, a6, brackets[1]); +#endif + return ippr_ftp_pasvreply(fin, ip, nat, f, (a5 << 8 | a6), + newbuf, s, data_ip); +} - /* alloc enough to keep same trailer space for lower driver */ - nm = allocb(nlen, BPRI_MED); - PANIC((!nm),("ippr_ftp_out: allocb failed")); +int ippr_ftp_pasvreply(fin, ip, nat, f, port, newmsg, s, data_ip) +fr_info_t *fin; +ip_t *ip; +nat_t *nat; +ftpside_t *f; +u_int port; +char *newmsg; +char *s; +u_int data_ip; +{ + int inc, off, nflags, sflags; + tcphdr_t *tcp, tcph, *tcp2; + struct in_addr swip, swip2; + struct in_addr data_addr; + size_t nlen, olen; + fr_info_t fi; + nat_t *nat2; + mb_t *m; - nm->b_band = m1->b_band; - nm->b_wptr += nlen; + m = fin->fin_m; + tcp = (tcphdr_t *)fin->fin_dp; + off = (char *)tcp - (char *)ip + (TCP_OFF(tcp) << 2) + fin->fin_ipoff; - m1->b_wptr -= olen; - PANIC((m1->b_wptr < m1->b_rptr), - ("ippr_ftp_out: cannot handle fragmented data block")); + data_addr.s_addr = data_ip; + tcp2 = &tcph; + inc = 0; - linkb(m1, nm); - } else { - m1->b_wptr += inc; + + olen = s - f->ftps_rptr; + nlen = strlen(newmsg); + inc = nlen - olen; + if ((inc + ip->ip_len) > 65535) { + if (ippr_ftp_debug > 0) + printf("ippr_ftp_pasv:inc(%d) + ip->ip_len > 65535\n", + inc); + return 0; } - /*copyin_mblk(m, off, nlen, newbuf);*/ -# else /* SOLARIS */ - m = *fin->fin_mp; + +#if !defined(_KERNEL) + bcopy(newmsg, MTOD(m, char *) + off, nlen); +#else +# if defined(MENTAT) + if (inc < 0) + (void)adjmsg(m, inc); +# else /* defined(MENTAT) */ + /* + * m_adj takes care of pkthdr.len, if required and treats inc<0 to + * mean remove -len bytes from the end of the packet. + * The mbuf chain will be extended if necessary by m_copyback(). + */ if (inc < 0) m_adj(m, inc); - /* the mbuf chain will be extended if necessary by m_copyback() */ - /*m_copyback(m, off, nlen, newbuf);*/ -# endif /* SOLARIS */ -#endif /* _KERNEL */ - if (inc != 0) { -#if ((SOLARIS || defined(__sgi)) && defined(_KERNEL)) || !defined(_KERNEL) - register u_32_t sum1, sum2; +# endif /* defined(MENTAT) */ +#endif /* !defined(_KERNEL) */ + COPYBACK(m, off, nlen, newmsg); - sum1 = ip->ip_len; - sum2 = ip->ip_len + inc; - - /* Because ~1 == -2, We really need ~1 == -1 */ - if (sum1 > sum2) - sum2--; - sum2 -= sum1; - sum2 = (sum2 & 0xffff) + (sum2 >> 16); - - fix_outcksum(fin, &ip->ip_sum, sum2); -#endif /* SOLARIS || defined(__sgi) */ + if (inc != 0) { ip->ip_len += inc; + fin->fin_dlen += inc; + fin->fin_plen += inc; } -#endif /* 0 */ /* * Add skeleton NAT entry for connection which will come back the * other way. */ bcopy((char *)fin, (char *)&fi, sizeof(fi)); + fi.fin_state = NULL; + fi.fin_nat = NULL; + fi.fin_flx |= FI_IGNORE; fi.fin_data[0] = 0; - dp = htons(fin->fin_data[1] - 1); - fi.fin_data[1] = ntohs(dp); - ipn = nat_outlookup(&fi, IPN_TCP, nat->nat_p, nat->nat_inip, - ip->ip_dst, 0); - if (ipn == NULL) { + fi.fin_data[1] = port; + nflags = IPN_TCP|SI_W_SPORT; + if (ippr_ftp_pasvrdr && f->ftps_ifp) + nflags |= SI_W_DPORT; + if (nat->nat_dir == NAT_OUTBOUND) + nat2 = nat_outlookup(&fi, nflags|NAT_SEARCH, + nat->nat_p, nat->nat_inip, nat->nat_oip); + else + nat2 = nat_inlookup(&fi, nflags|NAT_SEARCH, + nat->nat_p, nat->nat_inip, nat->nat_oip); + if (nat2 == NULL) { int slen; slen = ip->ip_len; @@ -618,31 +692,65 @@ int dlen; bzero((char *)tcp2, sizeof(*tcp2)); tcp2->th_win = htons(8192); tcp2->th_sport = 0; /* XXX - fake it for nat_new */ - tcp2->th_off = 5; + TCP_OFF_A(tcp2, 5); tcp2->th_flags = TH_SYN; - fi.fin_data[1] = a5 << 8 | a6; + fi.fin_data[1] = port; fi.fin_dlen = sizeof(*tcp2); - tcp2->th_dport = htons(fi.fin_data[1]); + tcp2->th_dport = htons(port); fi.fin_data[0] = 0; fi.fin_dp = (char *)tcp2; + fi.fin_plen = fi.fin_hlen + sizeof(*tcp); fi.fin_fr = &ftppxyfr; - fi.fin_out = 1; + fi.fin_out = nat->nat_dir; + fi.fin_flx &= FI_LOWTTL|FI_FRAG|FI_TCPUDP|FI_OPTIONS|FI_IGNORE; swip = ip->ip_src; swip2 = ip->ip_dst; - fi.fin_fi.fi_daddr = ip->ip_src.s_addr; - fi.fin_fi.fi_saddr = nat->nat_inip.s_addr; - ip->ip_dst = ip->ip_src; - ip->ip_src = nat->nat_inip; - ipn = nat_new(&fi, ip, nat->nat_ptr, NULL, IPN_TCP|FI_W_SPORT, - NAT_OUTBOUND); - if (ipn != NULL) { - ipn->nat_age = fr_defnatage; - (void) fr_addstate(ip, &fi, NULL, - FI_W_SPORT|FI_IGNOREPKT); + if (nat->nat_dir == NAT_OUTBOUND) { + fi.fin_fi.fi_daddr = data_addr.s_addr; + fi.fin_fi.fi_saddr = nat->nat_inip.s_addr; + ip->ip_dst = data_addr; + ip->ip_src = nat->nat_inip; + } else if (nat->nat_dir == NAT_INBOUND) { + fi.fin_fi.fi_saddr = nat->nat_oip.s_addr; + fi.fin_fi.fi_daddr = nat->nat_outip.s_addr; + ip->ip_src = nat->nat_oip; + ip->ip_dst = nat->nat_outip; } + + sflags = nflags; + nflags |= NAT_SLAVE; + if (nat->nat_dir == NAT_INBOUND) + nflags |= NAT_NOTRULEPORT; + nat2 = nat_new(&fi, nat->nat_ptr, NULL, nflags, nat->nat_dir); + if (nat2 != NULL) { + (void) nat_proto(&fi, nat2, IPN_TCP); + nat_update(&fi, nat2, nat->nat_ptr); + fi.fin_ifp = NULL; + if (nat->nat_dir == NAT_INBOUND) { + fi.fin_fi.fi_daddr = nat->nat_inip.s_addr; + ip->ip_dst = nat->nat_inip; + } + (void) fr_addstate(&fi, &nat2->nat_state, sflags); + if (fi.fin_state != NULL) + fr_statederef(&fi, (ipstate_t **)&fi.fin_state); + } + ip->ip_len = slen; ip->ip_src = swip; ip->ip_dst = swip2; + } else { + ipstate_t *is; + + nat_update(&fi, nat2, nat->nat_ptr); + READ_ENTER(&ipf_state); + is = nat2->nat_state; + if (is != NULL) { + MUTEX_ENTER(&is->is_lock); + (void)fr_tcp_age(&is->is_sti, &fi, ips_tqtqb, + is->is_flags); + MUTEX_EXIT(&is->is_lock); + } + RWLOCK_EXIT(&ipf_state); } return inc; } @@ -664,13 +772,19 @@ int dlen; rptr = f->ftps_rptr; wptr = f->ftps_wptr; - if (!isdigit(*rptr) || !isdigit(*(rptr + 1)) || !isdigit(*(rptr + 2))) + if (*rptr == ' ') + goto server_cmd_ok; + if (!ISDIGIT(*rptr) || !ISDIGIT(*(rptr + 1)) || !ISDIGIT(*(rptr + 2))) return 0; if (ftp->ftp_passok == FTPXY_GO) { if (!strncmp(rptr, "227 ", 4)) inc = ippr_ftp_pasv(fin, ip, nat, ftp, dlen); + else if (!strncmp(rptr, "229 ", 4)) + inc = ippr_ftp_epsv(fin, ip, nat, f, dlen); } else if (ippr_ftp_insecure && !strncmp(rptr, "227 ", 4)) { inc = ippr_ftp_pasv(fin, ip, nat, ftp, dlen); + } else if (ippr_ftp_insecure && !strncmp(rptr, "229 ", 4)) { + inc = ippr_ftp_epsv(fin, ip, nat, f, dlen); } else if (*rptr == '5' || *rptr == '4') ftp->ftp_passok = FTPXY_INIT; else if (ftp->ftp_incok) { @@ -695,6 +809,7 @@ int dlen; } } } +server_cmd_ok: ftp->ftp_incok = 0; while ((*rptr++ != '\n') && (rptr < wptr)) @@ -713,35 +828,38 @@ ftpside_t *ftps; char *buf; size_t len; { - register char *s, c; + register char *s, c, pc; register size_t i = len; char cmd[5]; + s = buf; + + if (ftps->ftps_junk == 1) + return 1; + if (i < 5) { -#if !defined(_KERNEL) && !defined(KERNEL) - fprintf(stdout, "ippr_ftp_client_valid:i(%lu) < 5\n", - (u_long)i); -#endif + if (ippr_ftp_debug > 3) + printf("ippr_ftp_client_valid:i(%d) < 5\n", (int)i); return 2; } - s = buf; - c = *s++; + i--; + c = *s++; - if (isalpha(c)) { - cmd[0] = toupper(c); + if (ISALPHA(c)) { + cmd[0] = TOUPPER(c); c = *s++; i--; - if (isalpha(c)) { - cmd[1] = toupper(c); + if (ISALPHA(c)) { + cmd[1] = TOUPPER(c); c = *s++; i--; - if (isalpha(c)) { - cmd[2] = toupper(c); + if (ISALPHA(c)) { + cmd[2] = TOUPPER(c); c = *s++; i--; - if (isalpha(c)) { - cmd[3] = toupper(c); + if (ISALPHA(c)) { + cmd[3] = TOUPPER(c); c = *s++; i--; if ((c != ' ') && (c != '\r')) @@ -754,17 +872,18 @@ size_t len; goto bad_client_command; } else { bad_client_command: -#if !defined(_KERNEL) && !defined(KERNEL) - fprintf(stdout, - "ippr_ftp_client_valid:bad cmd:len %lu i %lu c 0x%x\n", - (u_long)i, (u_long)len, c); -#endif + if (ippr_ftp_debug > 3) + printf("%s:bad:junk %d len %d/%d c 0x%x buf [%*.*s]\n", + "ippr_ftp_client_valid", + ftps->ftps_junk, (int)len, (int)i, c, + (int)len, (int)len, buf); return 1; } for (; i; i--) { + pc = c; c = *s++; - if (c == '\n') { + if ((pc == '\r') && (c == '\n')) { cmd[4] = '\0'; if (!strcmp(cmd, "PASV")) ftps->ftps_cmds = FTPXY_C_PASV; @@ -773,8 +892,9 @@ bad_client_command: return 0; } } -#if !defined(_KERNEL) && !defined(KERNEL) - fprintf(stdout, "ippr_ftp_client_valid:junk after cmd[%s]\n", buf); +#if !defined(_KERNEL) + printf("ippr_ftp_client_valid:junk after cmd[%*.*s]\n", + (int)len, (int)len, buf); #endif return 2; } @@ -785,26 +905,36 @@ ftpside_t *ftps; char *buf; size_t len; { - register char *s, c; + register char *s, c, pc; register size_t i = len; int cmd; - if (i < 5) - return 2; s = buf; - c = *s++; cmd = 0; + + if (ftps->ftps_junk == 1) + return 1; + + if (i < 5) { + if (ippr_ftp_debug > 3) + printf("ippr_ftp_servert_valid:i(%d) < 5\n", (int)i); + return 2; + } + + c = *s++; i--; + if (c == ' ') + goto search_eol; - if (isdigit(c)) { + if (ISDIGIT(c)) { cmd = (c - '0') * 100; c = *s++; i--; - if (isdigit(c)) { + if (ISDIGIT(c)) { cmd += (c - '0') * 10; c = *s++; i--; - if (isdigit(c)) { + if (ISDIGIT(c)) { cmd += (c - '0'); c = *s++; i--; @@ -816,24 +946,25 @@ size_t len; goto bad_server_command; } else { bad_server_command: -#if !defined(_KERNEL) && !defined(KERNEL) - fprintf(stdout, - "ippr_ftp_server_valid:bad cmd:len %lu i %lu c 0x%x\n", - (u_long)i, (u_long)len, c); -#endif + if (ippr_ftp_debug > 3) + printf("%s:bad:junk %d len %d/%d c 0x%x buf [%*.*s]\n", + "ippr_ftp_server_valid", + ftps->ftps_junk, (int)len, (int)i, + c, (int)len, (int)len, buf); return 1; } - +search_eol: for (; i; i--) { + pc = c; c = *s++; - if (c == '\n') { + if ((pc == '\r') && (c == '\n')) { ftps->ftps_cmds = cmd; return 0; } } -#if !defined(_KERNEL) && !defined(KERNEL) - fprintf(stdout, "ippr_ftp_server_valid:junk after cmd[%s]\n", buf); -#endif + if (ippr_ftp_debug > 3) + printf("ippr_ftp_server_valid:junk after cmd[%*.*s]\n", + (int)len, (int)len, buf); return 2; } @@ -858,48 +989,54 @@ size_t len; /* + * For map rules, the following applies: * rv == 0 for outbound processing, * rv == 1 for inbound processing. + * For rdr rules, the following applies: + * rv == 0 for inbound processing, + * rv == 1 for outbound processing. */ -int ippr_ftp_process(fin, ip, nat, ftp, rv) +int ippr_ftp_process(fin, nat, ftp, rv) fr_info_t *fin; -ip_t *ip; nat_t *nat; ftpinfo_t *ftp; int rv; { int mlen, len, off, inc, i, sel, sel2, ok, ackoff, seqoff; + char *rptr, *wptr, *s; u_32_t thseq, thack; - char *rptr, *wptr; ap_session_t *aps; ftpside_t *f, *t; tcphdr_t *tcp; + ip_t *ip; mb_t *m; + m = fin->fin_m; + ip = fin->fin_ip; tcp = (tcphdr_t *)fin->fin_dp; - off = fin->fin_hlen + (tcp->th_off << 2); -#if SOLARIS && defined(_KERNEL) - m = fin->fin_qfm; -#else - m = *fin->fin_mp; -#endif + off = (char *)tcp - (char *)ip + (TCP_OFF(tcp) << 2) + fin->fin_ipoff; + + f = &ftp->ftp_side[rv]; + t = &ftp->ftp_side[1 - rv]; + thseq = ntohl(tcp->th_seq); + thack = ntohl(tcp->th_ack); -#ifndef _KERNEL - mlen = mbuflen(m); +#ifdef __sgi + mlen = fin->fin_plen - off; #else -# if SOLARIS - mlen = msgdsize(m); -# else - mlen = mbufchainlen(m); -# endif + mlen = MSGDSIZE(m) - off; #endif - mlen -= off; + if (ippr_ftp_debug > 4) + printf("ippr_ftp_process: mlen %d\n", mlen); + if (mlen <= 0) { + if ((tcp->th_flags & TH_OPENING) == TH_OPENING) { + f->ftps_seq[0] = thseq + 1; + t->ftps_seq[0] = thack; + } + return 0; + } aps = nat->nat_aps; - t = &ftp->ftp_side[1 - rv]; - f = &ftp->ftp_side[rv]; - thseq = ntohl(tcp->th_seq); - thack = ntohl(tcp->th_ack); sel = aps->aps_sel[1 - rv]; sel2 = aps->aps_sel[rv]; @@ -911,19 +1048,17 @@ int rv; if (aps->aps_ackmin[sel2] > ackoff + thack) ackoff = aps->aps_ackoff[!sel2]; } else { -#if PROXY_DEBUG - printf("seqoff %d thseq %x ackmin %x\n", seqoff, thseq, - aps->aps_ackmin[sel]); -#endif seqoff = aps->aps_ackoff[sel]; + if (ippr_ftp_debug > 2) + printf("seqoff %d thseq %x ackmin %x\n", seqoff, thseq, + aps->aps_ackmin[sel]); if (aps->aps_ackmin[sel] > seqoff + thseq) seqoff = aps->aps_ackoff[!sel]; -#if PROXY_DEBUG - printf("ackoff %d thack %x seqmin %x\n", ackoff, thack, - aps->aps_seqmin[sel2]); -#endif ackoff = aps->aps_seqoff[sel2]; + if (ippr_ftp_debug > 2) + printf("ackoff %d thack %x seqmin %x\n", ackoff, thack, + aps->aps_seqmin[sel2]); if (ackoff > 0) { if (aps->aps_seqmin[sel2] > ackoff + thack) ackoff = aps->aps_seqoff[!sel2]; @@ -932,26 +1067,27 @@ int rv; ackoff = aps->aps_seqoff[!sel2]; } } -#if PROXY_DEBUG - printf("%s: %x seq %x/%d ack %x/%d len %d\n", rv ? "IN" : "OUT", - tcp->th_flags, thseq, seqoff, thack, ackoff, mlen); - printf("sel %d seqmin %x/%x offset %d/%d\n", sel, - aps->aps_seqmin[sel], aps->aps_seqmin[sel2], - aps->aps_seqoff[sel], aps->aps_seqoff[sel2]); - printf("sel %d ackmin %x/%x offset %d/%d\n", sel2, - aps->aps_ackmin[sel], aps->aps_ackmin[sel2], - aps->aps_ackoff[sel], aps->aps_ackoff[sel2]); -#endif + if (ippr_ftp_debug > 2) { + printf("%s: %x seq %x/%d ack %x/%d len %d/%d off %d\n", + rv ? "IN" : "OUT", tcp->th_flags, thseq, seqoff, + thack, ackoff, mlen, fin->fin_plen, off); + printf("sel %d seqmin %x/%x offset %d/%d\n", sel, + aps->aps_seqmin[sel], aps->aps_seqmin[sel2], + aps->aps_seqoff[sel], aps->aps_seqoff[sel2]); + printf("sel %d ackmin %x/%x offset %d/%d\n", sel2, + aps->aps_ackmin[sel], aps->aps_ackmin[sel2], + aps->aps_ackoff[sel], aps->aps_ackoff[sel2]); + } /* * XXX - Ideally, this packet should get dropped because we now know * that it is out of order (and there is no real danger in doing so * apart from causing packets to go through here ordered). */ -#if PROXY_DEBUG - printf("rv %d t:seq[0] %x seq[1] %x %d/%d\n", - rv, t->ftps_seq[0], t->ftps_seq[1], seqoff, ackoff); -#endif + if (ippr_ftp_debug > 2) { + printf("rv %d t:seq[0] %x seq[1] %x %d/%d\n", + rv, t->ftps_seq[0], t->ftps_seq[1], seqoff, ackoff); + } ok = 0; if (t->ftps_seq[0] == 0) { @@ -980,33 +1116,35 @@ int rv; } } -#if PROXY_DEBUG - if (!ok) - printf("not ok\n"); -#endif + if (ippr_ftp_debug > 2) { + if (!ok) + printf("%s ok\n", "not"); + } if (!mlen) { if (t->ftps_seq[0] + ackoff != thack) { -#if !defined(_KERNEL) && !defined(KERNEL) - fprintf(stdout, - "ippr_ftp_process:seq[0](%x) + ackoff(%x) != thack(%x)\n", - t->ftps_seq[0], ackoff, thack); -#endif + if (ippr_ftp_debug > 1) { + printf("%s:seq[0](%x) + (%x) != (%x)\n", + "ippr_ftp_process", t->ftps_seq[0], + ackoff, thack); + } return APR_ERR(1); } -#if PROXY_DEBUG - printf("f:seq[0] %x seq[1] %x\n", f->ftps_seq[0], f->ftps_seq[1]); -#endif + if (ippr_ftp_debug > 2) { + printf("ippr_ftp_process:f:seq[0] %x seq[1] %x\n", + f->ftps_seq[0], f->ftps_seq[1]); + } + if (tcp->th_flags & TH_FIN) { if (thseq == f->ftps_seq[1]) { f->ftps_seq[0] = f->ftps_seq[1] - seqoff; f->ftps_seq[1] = thseq + 1 - seqoff; } else { -#if PROXY_DEBUG || (!defined(_KERNEL) && !defined(KERNEL)) - printf("FIN: thseq %x seqoff %d ftps_seq %x\n", - thseq, seqoff, f->ftps_seq[0]); -#endif + if (ippr_ftp_debug > 1) { + printf("FIN: thseq %x seqoff %d ftps_seq %x\n", + thseq, seqoff, f->ftps_seq[0]); + } return APR_ERR(1); } } @@ -1027,15 +1165,15 @@ int rv; if (ok == 0) { inc = thseq - f->ftps_seq[0]; -#if PROXY_DEBUG || (!defined(_KERNEL) && !defined(KERNEL)) - printf("inc %d sel %d rv %d\n", inc, sel, rv); - printf("th_seq %x ftps_seq %x/%x\n", thseq, f->ftps_seq[0], - f->ftps_seq[1]); - printf("ackmin %x ackoff %d\n", (u_int)aps->aps_ackmin[sel], - aps->aps_ackoff[sel]); - printf("seqmin %x seqoff %d\n", (u_int)aps->aps_seqmin[sel], - aps->aps_seqoff[sel]); -#endif + if (ippr_ftp_debug > 1) { + printf("inc %d sel %d rv %d\n", inc, sel, rv); + printf("th_seq %x ftps_seq %x/%x\n", + thseq, f->ftps_seq[0], f->ftps_seq[1]); + printf("ackmin %x ackoff %d\n", aps->aps_ackmin[sel], + aps->aps_ackoff[sel]); + printf("seqmin %x seqoff %d\n", aps->aps_seqmin[sel], + aps->aps_seqoff[sel]); + } return APR_ERR(1); } @@ -1048,31 +1186,62 @@ int rv; f->ftps_len = mlen; while (mlen > 0) { - len = MIN(mlen, FTP_BUFSZ / 2); - -#if !defined(_KERNEL) - bcopy((char *)m + off, wptr, len); -#else -# if SOLARIS - copyout_mblk(m, off, len, wptr); -# else - m_copydata(m, off, len, wptr); -# endif -#endif + len = MIN(mlen, sizeof(f->ftps_buf) - (wptr - rptr)); + COPYDATA(m, off, len, wptr); mlen -= len; off += len; wptr += len; + + if (ippr_ftp_debug > 3) + printf("%s:len %d/%d off %d wptr %lx junk %d [%*.*s]\n", + "ippr_ftp_process", + len, mlen, off, (u_long)wptr, f->ftps_junk, + len, len, rptr); + f->ftps_wptr = wptr; - if (f->ftps_junk == 2) + if (f->ftps_junk != 0) { + i = f->ftps_junk; f->ftps_junk = ippr_ftp_valid(ftp, rv, rptr, wptr - rptr); + if (ippr_ftp_debug > 5) + printf("%s:junk %d -> %d\n", + "ippr_ftp_process", i, f->ftps_junk); + + if (f->ftps_junk != 0) { + if (wptr - rptr == sizeof(f->ftps_buf)) { + if (ippr_ftp_debug > 4) + printf("%s:full buffer\n", + "ippr_ftp_process"); + f->ftps_rptr = f->ftps_buf; + f->ftps_wptr = f->ftps_buf; + rptr = f->ftps_rptr; + wptr = f->ftps_wptr; + /* + * Because we throw away data here that + * we would otherwise parse, set the + * junk flag to indicate just ignore + * any data upto the next CRLF. + */ + f->ftps_junk = 1; + continue; + } + } + } + while ((f->ftps_junk == 0) && (wptr > rptr)) { - f->ftps_junk = ippr_ftp_valid(ftp, rv, rptr, - wptr - rptr); + len = wptr - rptr; + f->ftps_junk = ippr_ftp_valid(ftp, rv, rptr, len); + + if (ippr_ftp_debug > 3) { + printf("%s=%d len %d rv %d ptr %lx/%lx ", + "ippr_ftp_valid", + f->ftps_junk, len, rv, (u_long)rptr, + (u_long)wptr); + printf("buf [%*.*s]\n", len, len, rptr); + } + if (f->ftps_junk == 0) { - f->ftps_cmds++; - len = wptr - rptr; f->ftps_rptr = rptr; if (rv) inc += ippr_ftp_server(fin, ip, nat, @@ -1091,66 +1260,56 @@ int rv; */ if ((f->ftps_cmds == 0) && (f->ftps_junk == 1)) { /* f->ftps_seq[1] += inc; */ -#if !defined(_KERNEL) && !defined(KERNEL) - fprintf(stdout, - "ippr_ftp_process:cmds == 0 junk == 1\n"); -#endif + + if (ippr_ftp_debug > 1) + printf("%s:cmds == 0 junk == 1\n", + "ippr_ftp_process"); return APR_ERR(2); } - while ((f->ftps_junk == 1) && (rptr < wptr)) { - while ((rptr < wptr) && (*rptr != '\r')) - rptr++; - - if (*rptr == '\r') { - if (rptr + 1 < wptr) { - if (*(rptr + 1) == '\n') { - rptr += 2; - f->ftps_junk = 0; - } else - rptr++; - } else + if ((f->ftps_junk != 0) && (rptr < wptr)) { + for (s = rptr; s < wptr; s++) { + if ((*s == '\r') && (s + 1 < wptr) && + (*(s + 1) == '\n')) { + rptr = s + 2; + f->ftps_junk = 0; break; + } } } - f->ftps_rptr = rptr; if (rptr == wptr) { rptr = wptr = f->ftps_buf; } else { - if ((wptr > f->ftps_buf + FTP_BUFSZ / 2)) { - i = wptr - rptr; - if ((rptr == f->ftps_buf) || - (wptr - rptr > FTP_BUFSZ / 2)) { - f->ftps_junk = 1; - rptr = wptr = f->ftps_buf; - } else { - bcopy(rptr, f->ftps_buf, i); - wptr = f->ftps_buf + i; - rptr = f->ftps_buf; - } + /* + * Compact the buffer back to the start. The junk + * flag should already be set and because we're not + * throwing away any data, it is preserved from its + * current state. + */ + if (rptr > f->ftps_buf) { + bcopy(rptr, f->ftps_buf, len); + wptr -= rptr - f->ftps_buf; + rptr = f->ftps_buf; } - f->ftps_rptr = rptr; - f->ftps_wptr = wptr; } + f->ftps_rptr = rptr; + f->ftps_wptr = wptr; } /* f->ftps_seq[1] += inc; */ if (tcp->th_flags & TH_FIN) f->ftps_seq[1]++; -#if PROXY_DEBUG -# ifndef _KERNEL - mlen = mbuflen(m); -# else -# if SOLARIS - mlen = msgdsize(m); -# else - mlen = mbufchainlen(m); -# endif -# endif - mlen -= off; - printf("ftps_seq[1] = %x inc %d len %d\n", f->ftps_seq[1], inc, mlen); + if (ippr_ftp_debug > 3) { +#ifdef __sgi + mlen = fin->fin_plen; +#else + mlen = MSGDSIZE(m); #endif + mlen -= off; + printf("ftps_seq[1] = %x inc %d len %d\n", + f->ftps_seq[1], inc, mlen); + } f->ftps_rptr = rptr; f->ftps_wptr = wptr; @@ -1158,33 +1317,43 @@ int rv; } -int ippr_ftp_out(fin, ip, aps, nat) +int ippr_ftp_out(fin, aps, nat) fr_info_t *fin; -ip_t *ip; ap_session_t *aps; nat_t *nat; { ftpinfo_t *ftp; + int rev; ftp = aps->aps_data; if (ftp == NULL) return 0; - return ippr_ftp_process(fin, ip, nat, ftp, 0); + + rev = (nat->nat_dir == NAT_OUTBOUND) ? 0 : 1; + if (ftp->ftp_side[1 - rev].ftps_ifp == NULL) + ftp->ftp_side[1 - rev].ftps_ifp = fin->fin_ifp; + + return ippr_ftp_process(fin, nat, ftp, rev); } -int ippr_ftp_in(fin, ip, aps, nat) +int ippr_ftp_in(fin, aps, nat) fr_info_t *fin; -ip_t *ip; ap_session_t *aps; nat_t *nat; { ftpinfo_t *ftp; + int rev; ftp = aps->aps_data; if (ftp == NULL) return 0; - return ippr_ftp_process(fin, ip, nat, ftp, 1); + + rev = (nat->nat_dir == NAT_OUTBOUND) ? 0 : 1; + if (ftp->ftp_side[rev].ftps_ifp == NULL) + ftp->ftp_side[rev].ftps_ifp = fin->fin_ifp; + + return ippr_ftp_process(fin, nat, ftp, 1 - rev); } @@ -1200,7 +1369,7 @@ char **ptr; register char *s = *ptr, c; register u_char i = 0, j = 0; - while ((c = *s++) && isdigit(c)) { + while (((c = *s++) != '\0') && ISDIGIT(c)) { i *= 10; i += c - '0'; } @@ -1208,7 +1377,7 @@ char **ptr; *ptr = NULL; return 0; } - while ((c = *s++) && isdigit(c)) { + while (((c = *s++) != '\0') && ISDIGIT(c)) { j *= 10; j += c - '0'; } @@ -1217,3 +1386,70 @@ char **ptr; j &= 0xff; return (i << 8) | j; } + + +int ippr_ftp_epsv(fin, ip, nat, f, dlen) +fr_info_t *fin; +ip_t *ip; +nat_t *nat; +ftpside_t *f; +int dlen; +{ + char newbuf[IPF_FTPBUFSZ]; + char *s; + u_short ap = 0; + +#define EPSV_REPLEN 33 + /* + * Check for EPSV reply message. + */ + if (dlen < IPF_MIN229LEN) + return (0); + else if (strncmp(f->ftps_rptr, + "229 Entering Extended Passive Mode", EPSV_REPLEN)) + return (0); + + /* + * Skip the EPSV command + space + */ + s = f->ftps_rptr + 33; + while (*s && !ISDIGIT(*s)) + s++; + + /* + * As per RFC 2428, there are no addres components in the EPSV + * response. So we'll go straight to getting the port. + */ + while (*s && ISDIGIT(*s)) { + ap *= 10; + ap += *s++ - '0'; + } + + if (!s) + return 0; + + if (*s == '|') + s++; + if (*s == ')') + s++; + if (*s == '\n') + s--; + /* + * check for CR-LF at the end. + */ + if ((*s == '\r') && (*(s + 1) == '\n')) { + s += 2; + } else + return 0; + +#if defined(SNPRINTF) && defined(_KERNEL) + SNPRINTF(newbuf, sizeof(newbuf), "%s (|||%u|)\r\n", + "229 Entering Extended Passive Mode", ap); +#else + (void) sprintf(newbuf, "%s (|||%u|)\r\n", + "229 Entering Extended Passive Mode", ap); +#endif + + return ippr_ftp_pasvreply(fin, ip, nat, f, (u_int)ap, newbuf, s, + ip->ip_src.s_addr); +} diff --git a/sys/contrib/ipfilter/netinet/ip_log.c b/sys/contrib/ipfilter/netinet/ip_log.c index abca4d8..c42ce0a 100644 --- a/sys/contrib/ipfilter/netinet/ip_log.c +++ b/sys/contrib/ipfilter/netinet/ip_log.c @@ -1,150 +1,178 @@ +/* $FreeBSD$ */ + /* - * Copyright (C) 1997-2001 by Darren Reed. + * Copyright (C) 1997-2003 by Darren Reed. * * See the IPFILTER.LICENCE file for details on licencing. * - * $Id: ip_log.c,v 2.5.2.1 2000/07/19 13:11:47 darrenr Exp $ * $FreeBSD$ + * Id: ip_log.c,v 2.75.2.6 2004/10/16 07:59:27 darrenr Exp */ #include -#if defined(KERNEL) && !defined(_KERNEL) -# define _KERNEL +#if defined(KERNEL) || defined(_KERNEL) +# undef KERNEL +# undef _KERNEL +# define KERNEL 1 +# define _KERNEL 1 #endif #if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \ defined(_KERNEL) # include "opt_ipfilter_log.h" #endif -#ifdef __FreeBSD__ -# if defined(IPFILTER_LKM) || defined(_KERNEL) -# if !defined(__FreeBSD_version) -# include -# endif -# if !defined(IPFILTER_LKM) -# if defined(__FreeBSD_version) && (__FreeBSD_version >= 300000) -# include "opt_ipfilter.h" -# endif +#if defined(__FreeBSD__) && !defined(IPFILTER_LKM) +# if defined(_KERNEL) +# if defined(__FreeBSD_version) && (__FreeBSD_version >= 300000) +# include "opt_ipfilter.h" # endif # else -# ifdef KLD_MODULE -# ifndef __FreeBSD_cc_version -# include -# else -# if __FreeBSD_cc_version < 430000 -# include -# endif -# endif -# endif +# include # endif #endif -#ifdef IPFILTER_LOG -# ifndef SOLARIS -# define SOLARIS (defined(sun) && (defined(__svr4__) || defined(__SVR4))) +#ifndef SOLARIS +# define SOLARIS (defined(sun) && (defined(__svr4__) || defined(__SVR4))) +#endif +#include +#include +#include +#ifndef _KERNEL +# include +# include +# include +# include +# define _KERNEL +# define KERNEL +# ifdef __OpenBSD__ +struct file; # endif -# ifndef _KERNEL -# include -# include -# include -# include +# include +# undef _KERNEL +# undef KERNEL +#endif +#if __FreeBSD_version >= 220000 && defined(_KERNEL) +# include +# include +#else +# include +#endif +#include +#if defined(_KERNEL) +# include +# if defined(NetBSD) && (__NetBSD_Version__ >= 104000000) +# include # endif -# include -# include -# include -# if __FreeBSD_version >= 220000 && defined(_KERNEL) -# include -# include +#endif /* _KERNEL */ +#if !SOLARIS && !defined(__hpux) && !defined(linux) +# if (NetBSD > 199609) || (OpenBSD > 199603) || (__FreeBSD_version >= 300000) +# include # else -# include +# include # endif -# include -# if defined(_KERNEL) -# include -# endif -# if !SOLARIS -# if (NetBSD > 199609) || (OpenBSD > 199603) || (__FreeBSD_version >= 300000) -# include -# else -# include -# endif -# include -# else +# include +#else +# if !defined(__hpux) && defined(_KERNEL) # include # include +# include +# include +# include # include -# ifdef _KERNEL -# include -# include -# include -# include -# include -# endif -# endif +# include +# include +# include +# endif /* !__hpux */ +#endif /* !SOLARIS && !__hpux */ +#if !defined(linux) # include -# include +#endif +#include -# include -# ifdef sun -# include -# endif -# if __FreeBSD_version >= 300000 -# include -# endif -# include -# include -# ifdef __sgi -# define _KMEMUSER -# include -# ifdef IFF_DRVRLOCK /* IRIX6 */ -# include -# endif -# endif -# if !(defined(__sgi) && !defined(IFF_DRVRLOCK)) /*IRIX<6*/ -# include -# endif -# include -# include -# include -# include -# include -# ifdef USE_INET6 -# include +#include +#ifdef sun +# include +#endif +#if __FreeBSD_version >= 300000 +# include +#endif +#include +#include +#ifdef __sgi +# include +# ifdef IFF_DRVRLOCK /* IRIX6 */ +# include # endif +#endif +#if !defined(__hpux) && !defined(linux) && \ + !(defined(__sgi) && !defined(IFF_DRVRLOCK)) /*IRIX<6*/ +# include +#endif +#include +#include +#include +#include +#include +#ifdef USE_INET6 +# include +#endif +#if !defined(linux) # include -# ifndef _KERNEL -# include -# endif -# include "netinet/ip_compat.h" -# include -# include "netinet/ip_fil.h" -# if (__FreeBSD_version >= 300000) -# include -# endif +#endif +#ifndef _KERNEL +# include +#endif +#include "netinet/ip_compat.h" +#include +#include "netinet/ip_fil.h" +#include "netinet/ip_nat.h" +#include "netinet/ip_frag.h" +#include "netinet/ip_state.h" +#include "netinet/ip_auth.h" +#if (__FreeBSD_version >= 300000) || defined(__NetBSD__) +# include +#endif +/* END OF INCLUDES */ -# ifndef MIN -# define MIN(a,b) (((a)<(b))?(a):(b)) -# endif -# ifdef IPFILTER_LOGSIZE -# undef IPLLOGSIZE -# define IPLLOGSIZE IPFILTER_LOGSIZE -# endif +#ifdef IPFILTER_LOG +# if defined(IPL_SELECT) +# include +# include +# define READ_COLLISION 0x001 -# if USE_MUTEX -extern kmutex_t ipl_mutex; -# if SOLARIS +iplog_select_t iplog_ss[IPL_LOGMAX+1]; + +extern int selwait; +# endif /* IPL_SELECT */ + +# if defined(linux) && defined(_KERNEL) +wait_queue_head_t iplh_linux[IPL_LOGSIZE]; +# endif +# if SOLARIS extern kcondvar_t iplwait; -# endif # endif -iplog_t **iplh[IPL_LOGMAX+1], *iplt[IPL_LOGMAX+1], *ipll[IPL_LOGMAX+1]; -size_t iplused[IPL_LOGMAX+1]; -static fr_info_t iplcrc[IPL_LOGMAX+1]; +iplog_t **iplh[IPL_LOGSIZE], *iplt[IPL_LOGSIZE], *ipll[IPL_LOGSIZE]; +int iplused[IPL_LOGSIZE]; +static fr_info_t iplcrc[IPL_LOGSIZE]; +int ipl_suppress = 1; +int ipl_buffer_sz; +int ipl_logmax = IPL_LOGMAX; +int ipl_logall = 0; +int ipl_log_init = 0; +int ipl_logsize = IPFILTER_LOGSIZE; +int ipl_magic[IPL_LOGSIZE] = { IPL_MAGIC, IPL_MAGIC_NAT, IPL_MAGIC_STATE, + IPL_MAGIC, IPL_MAGIC, IPL_MAGIC, + IPL_MAGIC, IPL_MAGIC }; -/* - * Initialise log buffers & pointers. Also iniialised the CRC to a local - * secret for use in calculating the "last log checksum". - */ -void ipflog_init() +/* ------------------------------------------------------------------------ */ +/* Function: fr_loginit */ +/* Returns: int - 0 == success (always returned) */ +/* Parameters: Nil */ +/* */ +/* Initialise log buffers & pointers. Also iniialised the CRC to a local */ +/* secret for use in calculating the "last log checksum". */ +/* ------------------------------------------------------------------------ */ +int fr_loginit() { int i; @@ -154,40 +182,88 @@ void ipflog_init() iplh[i] = &iplt[i]; iplused[i] = 0; bzero((char *)&iplcrc[i], sizeof(iplcrc[i])); +# ifdef IPL_SELECT + iplog_ss[i].read_waiter = 0; + iplog_ss[i].state = 0; +# endif +# if defined(linux) && defined(_KERNEL) + init_waitqueue_head(iplh_linux + i); +# endif } + +# if SOLARIS && defined(_KERNEL) + cv_init(&iplwait, "ipl condvar", CV_DRIVER, NULL); +# endif + MUTEX_INIT(&ipl_mutex, "ipf log mutex"); + + ipl_log_init = 1; + + return 0; } -/* - * ipflog - * Create a log record for a packet given that it has been triggered by a - * rule (or the default setting). Calculate the transport protocol header - * size using predetermined size of a couple of popular protocols and thus - * how much data to copy into the log, including part of the data body if - * requested. - */ -int ipflog(flags, ip, fin, m) -u_int flags; -ip_t *ip; +/* ------------------------------------------------------------------------ */ +/* Function: fr_logunload */ +/* Returns: Nil */ +/* Parameters: Nil */ +/* */ +/* Clean up any log data that has accumulated without being read. */ +/* ------------------------------------------------------------------------ */ +void fr_logunload() +{ + int i; + + if (ipl_log_init == 0) + return; + + for (i = IPL_LOGMAX; i >= 0; i--) + (void) ipflog_clear(i); + +# if SOLARIS && defined(_KERNEL) + cv_destroy(&iplwait); +# endif + MUTEX_DESTROY(&ipl_mutex); + + ipl_log_init = 0; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: ipflog */ +/* Returns: int - 0 == success, -1 == failure */ +/* Parameters: fin(I) - pointer to packet information */ +/* flags(I) - flags from filter rules */ +/* */ +/* Create a log record for a packet given that it has been triggered by a */ +/* rule (or the default setting). Calculate the transport protocol header */ +/* size using predetermined size of a couple of popular protocols and thus */ +/* how much data to copy into the log, including part of the data body if */ +/* requested. */ +/* ------------------------------------------------------------------------ */ +int ipflog(fin, flags) fr_info_t *fin; -mb_t *m; +u_int flags; { - ipflog_t ipfl; - register size_t mlen, hlen; + register size_t hlen; + int types[2], mlen; size_t sizes[2]; void *ptrs[2]; - int types[2]; + ipflog_t ipfl; u_char p; -# if SOLARIS && defined(_KERNEL) - ill_t *ifp = fin->fin_ifp; + mb_t *m; +# if (SOLARIS || defined(__hpux)) && defined(_KERNEL) + qif_t *ifp; # else - struct ifnet *ifp = fin->fin_ifp; -# endif + struct ifnet *ifp; +# endif /* SOLARIS || __hpux */ + ipfl.fl_nattag.ipt_num[0] = 0; + m = fin->fin_m; + ifp = fin->fin_ifp; + hlen = fin->fin_hlen; /* * calculate header size. */ - hlen = fin->fin_hlen; if (fin->fin_off == 0) { p = fin->fin_fi.fi_p; if (p == IPPROTO_TCP) @@ -198,7 +274,7 @@ mb_t *m; struct icmp *icmp; icmp = (struct icmp *)fin->fin_dp; - + /* * For ICMP, if the packet is an error packet, also * include the information about the packet which @@ -220,12 +296,12 @@ mb_t *m; break; } } -#ifdef USE_INET6 +# ifdef USE_INET6 else if (p == IPPROTO_ICMPV6) { struct icmp6_hdr *icmp; icmp = (struct icmp6_hdr *)fin->fin_dp; - + /* * For ICMPV6, if the packet is an error packet, also * include the information about the packet which @@ -239,53 +315,71 @@ mb_t *m; fin->fin_dlen); } } -#endif +# endif } /* * Get the interface number and name to which this packet is * currently associated. */ - bzero((char *)ipfl.fl_ifname, sizeof(ipfl.fl_ifname)); -# if SOLARIS && defined(_KERNEL) - ipfl.fl_unit = (u_int)ifp->ill_ppa; - bcopy(ifp->ill_name, ipfl.fl_ifname, - MIN(ifp->ill_name_length, sizeof(ipfl.fl_ifname))); - mlen = (flags & FR_LOGBODY) ? MIN(msgdsize(m) - hlen, 128) : 0; +# if (SOLARIS || defined(__hpux)) && defined(_KERNEL) + ipfl.fl_unit = (u_int)ifp->qf_ppa; + COPYIFNAME(ifp, ipfl.fl_ifname); # else # if (defined(NetBSD) && (NetBSD <= 1991011) && (NetBSD >= 199603)) || \ - (defined(OpenBSD) && (OpenBSD >= 199603)) || \ + (defined(OpenBSD) && (OpenBSD >= 199603)) || defined(linux) || \ (defined(__FreeBSD__) && (__FreeBSD_version >= 501113)) - strncpy(ipfl.fl_ifname, ifp->if_xname, IFNAMSIZ); + COPYIFNAME(ifp, ipfl.fl_ifname); # else ipfl.fl_unit = (u_int)ifp->if_unit; - strncpy(ipfl.fl_ifname, ifp->if_name, MIN(sizeof(ipfl.fl_ifname), - sizeof(ifp->if_name))); +# if defined(_KERNEL) + if ((ipfl.fl_ifname[0] = ifp->if_name[0])) + if ((ipfl.fl_ifname[1] = ifp->if_name[1])) + if ((ipfl.fl_ifname[2] = ifp->if_name[2])) + ipfl.fl_ifname[3] = ifp->if_name[3]; +# else + (void) strncpy(ipfl.fl_ifname, IFNAME(ifp), sizeof(ipfl.fl_ifname)); + ipfl.fl_ifname[sizeof(ipfl.fl_ifname) - 1] = '\0'; +# endif # endif - mlen = (flags & FR_LOGBODY) ? MIN(fin->fin_plen - hlen, 128) : 0; -# endif +# endif /* __hpux || SOLARIS */ + mlen = fin->fin_plen - hlen; + if (!ipl_logall) { + mlen = (flags & FR_LOGBODY) ? MIN(mlen, 128) : 0; + } else if ((flags & FR_LOGBODY) == 0) { + mlen = 0; + } + if (mlen < 0) + mlen = 0; ipfl.fl_plen = (u_char)mlen; ipfl.fl_hlen = (u_char)hlen; ipfl.fl_rule = fin->fin_rule; - ipfl.fl_group = fin->fin_group; - if (fin->fin_fr != NULL) + (void) strncpy(ipfl.fl_group, fin->fin_group, FR_GROUPLEN); + if (fin->fin_fr != NULL) { ipfl.fl_loglevel = fin->fin_fr->fr_loglevel; - else + ipfl.fl_logtag = fin->fin_fr->fr_logtag; + } else { ipfl.fl_loglevel = 0xffff; + ipfl.fl_logtag = FR_NOLOGTAG; + } + if (fin->fin_nattag != NULL) + bcopy(fin->fin_nattag, (void *)&ipfl.fl_nattag, + sizeof(ipfl.fl_nattag)); ipfl.fl_flags = flags; ipfl.fl_dir = fin->fin_out; + ipfl.fl_lflags = fin->fin_flx; ptrs[0] = (void *)&ipfl; sizes[0] = sizeof(ipfl); types[0] = 0; -# if SOLARIS && defined(_KERNEL) +# if defined(MENTAT) && defined(_KERNEL) /* * Are we copied from the mblk or an aligned array ? */ - if (ip == (ip_t *)m->b_rptr) { + if (fin->fin_ip == (ip_t *)m->b_rptr) { ptrs[1] = m; sizes[1] = hlen + mlen; types[1] = 1; } else { - ptrs[1] = ip; + ptrs[1] = fin->fin_ip; sizes[1] = hlen + mlen; types[1] = 0; } @@ -293,14 +387,25 @@ mb_t *m; ptrs[1] = m; sizes[1] = hlen + mlen; types[1] = 1; -# endif +# endif /* MENTAT */ return ipllog(IPL_LOGIPF, fin, ptrs, sizes, types, 2); } -/* - * ipllog - */ +/* ------------------------------------------------------------------------ */ +/* Function: ipllog */ +/* Returns: int - 0 == success, -1 == failure */ +/* Parameters: dev(I) - device that owns this log record */ +/* fin(I) - pointer to packet information */ +/* items(I) - array of pointers to log data */ +/* itemsz(I) - array of size of valid memory pointed to */ +/* types(I) - type of data pointed to by items pointers */ +/* cnt(I) - number of elements in arrays items/itemsz/types */ +/* */ +/* Takes an array of parameters and constructs one record to include the */ +/* miscellaneous packet information, as well as packet data, for reading */ +/* from the log device. */ +/* ------------------------------------------------------------------------ */ int ipllog(dev, fin, items, itemsz, types, cnt) int dev; fr_info_t *fin; @@ -308,33 +413,39 @@ void **items; size_t *itemsz; int *types, cnt; { - caddr_t buf, s; + caddr_t buf, ptr; iplog_t *ipl; size_t len; int i; - +# if defined(_KERNEL) && !defined(MENTAT) && defined(USE_SPL) + int s; +# endif + /* * Check to see if this log record has a CRC which matches the last * record logged. If it does, just up the count on the previous one * rather than create a new one. */ - MUTEX_ENTER(&ipl_mutex); - if ((fin != NULL) && (fin->fin_off == 0)) { - if ((ipll[dev] != NULL) && - bcmp((char *)fin, (char *)&iplcrc[dev], FI_LCSIZE) == 0) { - ipll[dev]->ipl_count++; - MUTEX_EXIT(&ipl_mutex); - return 1; - } - bcopy((char *)fin, (char *)&iplcrc[dev], FI_LCSIZE); - } else - bzero((char *)&iplcrc[dev], FI_LCSIZE); - MUTEX_EXIT(&ipl_mutex); + if (ipl_suppress) { + MUTEX_ENTER(&ipl_mutex); + if ((fin != NULL) && (fin->fin_off == 0)) { + if ((ipll[dev] != NULL) && + bcmp((char *)fin, (char *)&iplcrc[dev], + FI_LCSIZE) == 0) { + ipll[dev]->ipl_count++; + MUTEX_EXIT(&ipl_mutex); + return 0; + } + bcopy((char *)fin, (char *)&iplcrc[dev], FI_LCSIZE); + } else + bzero((char *)&iplcrc[dev], FI_CSIZE); + MUTEX_EXIT(&ipl_mutex); + } /* * Get the total amount of data to be logged. */ - for (i = 0, len = IPLOG_SIZE; i < cnt; i++) + for (i = 0, len = sizeof(iplog_t); i < cnt; i++) len += itemsz[i]; /* @@ -342,70 +453,85 @@ int *types, cnt; * allocate that much. */ KMALLOCS(buf, caddr_t, len); - if (!buf) - return 0; + if (buf == NULL) + return -1; + SPL_NET(s); MUTEX_ENTER(&ipl_mutex); - if ((iplused[dev] + len) > IPLLOGSIZE) { + if ((iplused[dev] + len) > ipl_logsize) { MUTEX_EXIT(&ipl_mutex); + SPL_X(s); KFREES(buf, len); - return 0; + return -1; } iplused[dev] += len; MUTEX_EXIT(&ipl_mutex); + SPL_X(s); /* * advance the log pointer to the next empty record and deduct the * amount of space we're going to use. */ ipl = (iplog_t *)buf; - ipl->ipl_magic = IPL_MAGIC; + ipl->ipl_magic = ipl_magic[dev]; ipl->ipl_count = 1; ipl->ipl_next = NULL; ipl->ipl_dsize = len; -# ifdef _KERNEL -# if SOLARIS || defined(sun) - uniqtime(&ipl->ipl_tv); -# else -# if BSD >= 199306 || defined(__FreeBSD__) || defined(__sgi) - microtime(&ipl->ipl_tv); -# endif -# endif -# else +#ifdef _KERNEL + GETKTIME(&ipl->ipl_sec); +#else ipl->ipl_sec = 0; ipl->ipl_usec = 0; -# endif +#endif /* * Loop through all the items to be logged, copying each one to the * buffer. Use bcopy for normal data or the mb_t copyout routine. */ - for (i = 0, s = buf + IPLOG_SIZE; i < cnt; i++) { - if (types[i] == 0) - bcopy(items[i], s, itemsz[i]); - else if (types[i] == 1) { -# if SOLARIS && defined(_KERNEL) - copyout_mblk(items[i], 0, itemsz[i], s); -# else - m_copydata(items[i], 0, itemsz[i], s); -# endif + for (i = 0, ptr = buf + sizeof(*ipl); i < cnt; i++) { + if (types[i] == 0) { + bcopy(items[i], ptr, itemsz[i]); + } else if (types[i] == 1) { + COPYDATA(items[i], 0, itemsz[i], ptr); } - s += itemsz[i]; + ptr += itemsz[i]; } + SPL_NET(s); MUTEX_ENTER(&ipl_mutex); ipll[dev] = ipl; *iplh[dev] = ipl; iplh[dev] = &ipl->ipl_next; + + /* + * Now that the log record has been completed and added to the queue, + * wake up any listeners who may want to read it. + */ # if SOLARIS && defined(_KERNEL) cv_signal(&iplwait); - mutex_exit(&ipl_mutex); + MUTEX_EXIT(&ipl_mutex); # else MUTEX_EXIT(&ipl_mutex); - WAKEUP(&iplh[dev]); + WAKEUP(iplh,dev); +# endif + SPL_X(s); +# ifdef IPL_SELECT + iplog_input_ready(dev); # endif - return 1; + return 0; } +/* ------------------------------------------------------------------------ */ +/* Function: ipflog_read */ +/* Returns: int - 0 == success, else error value. */ +/* Parameters: unit(I) - device we are reading from */ +/* uio(O) - pointer to information about where to store data */ +/* */ +/* Called to handle a read on an IPFilter device. Returns only complete */ +/* log messages - will not partially copy a log record out to userland. */ +/* */ +/* NOTE: This function will block and wait for a signal to return data if */ +/* there is none present. Asynchronous I/O is not implemented. */ +/* ------------------------------------------------------------------------ */ int ipflog_read(unit, uio) minor_t unit; struct uio *uio; @@ -413,7 +539,7 @@ struct uio *uio; size_t dlen, copied; int error = 0; iplog_t *ipl; -# if defined(_KERNEL) && !SOLARIS +# if defined(_KERNEL) && !defined(MENTAT) && defined(USE_SPL) int s; # endif @@ -423,11 +549,12 @@ struct uio *uio; */ if (IPL_LOGMAX < unit) return ENXIO; - if (!uio->uio_resid) + if (uio->uio_resid == 0) return 0; - if (uio->uio_resid < IPLOG_SIZE) + if ((uio->uio_resid < sizeof(iplog_t)) || + (uio->uio_resid > ipl_logsize)) return EINVAL; - + /* * Lock the log so we can snapshot the variables. Wait for a signal * if the log is empty. @@ -437,26 +564,48 @@ struct uio *uio; while (iplt[unit] == NULL) { # if SOLARIS && defined(_KERNEL) - if (!cv_wait_sig(&iplwait, &ipl_mutex)) { + if (!cv_wait_sig(&iplwait, &ipl_mutex.ipf_lk)) { MUTEX_EXIT(&ipl_mutex); return EINTR; } # else +# if defined(__hpux) && defined(_KERNEL) + lock_t *l; + +# ifdef IPL_SELECT + if (uio->uio_fpflags & (FNBLOCK|FNDELAY)) { + /* this is no blocking system call */ + MUTEX_EXIT(&ipl_mutex); + return 0; + } +# endif + MUTEX_EXIT(&ipl_mutex); - error = SLEEP(&iplh[unit], "ipl sleep"); - if (error) { - SPL_X(s); + l = get_sleep_lock(&iplh[unit]); + error = sleep(&iplh[unit], PZERO+1); + spinunlock(l); +# else +# if defined(__osf__) && defined(_KERNEL) + error = mpsleep(&iplh[unit], PSUSP|PCATCH, "iplread", 0, + &ipl_mutex, MS_LOCK_SIMPLE); +# else + MUTEX_EXIT(&ipl_mutex); + SPL_X(s); + error = SLEEP(unit + iplh, "ipl sleep"); +# endif /* __osf__ */ +# endif /* __hpux */ + if (error) return error; - } + SPL_NET(s); MUTEX_ENTER(&ipl_mutex); # endif /* SOLARIS */ } -# if BSD >= 199306 || defined(__FreeBSD__) +# if (BSD >= 199101) || defined(__FreeBSD__) || defined(__osf__) uio->uio_rw = UIO_READ; # endif - for (copied = 0; (ipl = iplt[unit]); copied += dlen) { + for (copied = 0; (ipl = iplt[unit]) != NULL; copied += dlen) { dlen = ipl->ipl_dsize; if (dlen > uio->uio_resid) break; @@ -466,15 +615,19 @@ struct uio *uio; iplt[unit] = ipl->ipl_next; iplused[unit] -= dlen; MUTEX_EXIT(&ipl_mutex); + SPL_X(s); error = UIOMOVE((caddr_t)ipl, dlen, UIO_READ, uio); - MUTEX_ENTER(&ipl_mutex); if (error) { + SPL_NET(s); + MUTEX_ENTER(&ipl_mutex); ipl->ipl_next = iplt[unit]; iplt[unit] = ipl; iplused[unit] += dlen; break; } + MUTEX_ENTER(&ipl_mutex); KFREES((caddr_t)ipl, dlen); + SPL_NET(s); } if (!iplt[unit]) { iplused[unit] = 0; @@ -488,14 +641,25 @@ struct uio *uio; } +/* ------------------------------------------------------------------------ */ +/* Function: ipflog_clear */ +/* Returns: int - number of log bytes cleared. */ +/* Parameters: unit(I) - device we are reading from */ +/* */ +/* Deletes all queued up log records for a given output device. */ +/* ------------------------------------------------------------------------ */ int ipflog_clear(unit) minor_t unit; { iplog_t *ipl; int used; +# if defined(_KERNEL) && !defined(MENTAT) && defined(USE_SPL) + int s; +# endif + SPL_NET(s); MUTEX_ENTER(&ipl_mutex); - while ((ipl = iplt[unit])) { + while ((ipl = iplt[unit]) != NULL) { iplt[unit] = ipl->ipl_next; KFREES((caddr_t)ipl, ipl->ipl_dsize); } @@ -503,8 +667,9 @@ minor_t unit; ipll[unit] = NULL; used = iplused[unit]; iplused[unit] = 0; - bzero((char *)&iplcrc[unit], FI_LCSIZE); + bzero((char *)&iplcrc[unit], FI_CSIZE); MUTEX_EXIT(&ipl_mutex); + SPL_X(s); return used; } #endif /* IPFILTER_LOG */ diff --git a/sys/contrib/ipfilter/netinet/ip_nat.c b/sys/contrib/ipfilter/netinet/ip_nat.c index 5ebdcfc..144c7ff 100644 --- a/sys/contrib/ipfilter/netinet/ip_nat.c +++ b/sys/contrib/ipfilter/netinet/ip_nat.c @@ -1,17 +1,15 @@ +/* $FreeBSD$ */ + /* - * Copyright (C) 1995-2001 by Darren Reed. + * Copyright (C) 1995-2003 by Darren Reed. * * See the IPFILTER.LICENCE file for details on licencing. - * - * Added redirect stuff and a LOT of bug fixes. (mcn@EnGarde.com) */ - -#if defined(__FreeBSD__) && defined(KERNEL) && !defined(_KERNEL) -#define _KERNEL -#endif - -#if defined(__sgi) && (IRIX > 602) -# include +#if defined(KERNEL) || defined(_KERNEL) +# undef KERNEL +# undef _KERNEL +# define KERNEL 1 +# define _KERNEL 1 #endif #include #include @@ -22,30 +20,35 @@ defined(_KERNEL) # include "opt_ipfilter_log.h" #endif -#if !defined(_KERNEL) && !defined(KERNEL) +#if !defined(_KERNEL) # include # include # include +# define _KERNEL +# ifdef __OpenBSD__ +struct file; +# endif +# include +# undef _KERNEL #endif -#if (defined(KERNEL) || defined(_KERNEL)) && (__FreeBSD_version >= 220000) +#if defined(_KERNEL) && (__FreeBSD_version >= 220000) # include # include #else # include #endif #include -#ifndef linux +#if !defined(linux) # include #endif #include -#if defined(_KERNEL) && !defined(linux) +#if defined(_KERNEL) # include -#endif -#if !defined(__SVR4) && !defined(__svr4__) -# ifndef linux +# if !defined(__SVR4) && !defined(__svr4__) # include # endif -#else +#endif +#if defined(__SVR4) || defined(__svr4__) # include # include # ifdef _KERNEL @@ -72,22 +75,14 @@ #include #include -#ifdef __sgi -# ifdef IFF_DRVRLOCK /* IRIX6 */ -#include -#include -# endif -#endif - #ifdef RFC1825 # include # include extern struct ifnet vpnif; #endif -#ifndef linux +#if !defined(linux) # include -# include #endif #include #include @@ -99,21 +94,46 @@ extern struct ifnet vpnif; #include "netinet/ip_frag.h" #include "netinet/ip_state.h" #include "netinet/ip_proxy.h" +#ifdef IPFILTER_SYNC +#include "netinet/ip_sync.h" +#endif #if (__FreeBSD_version >= 300000) # include #endif -#ifndef MIN -# define MIN(a,b) (((a)<(b))?(a):(b)) -#endif +/* END OF INCLUDES */ + #undef SOCKADDR_IN #define SOCKADDR_IN struct sockaddr_in #if !defined(lint) static const char sccsid[] = "@(#)ip_nat.c 1.11 6/5/96 (C) 1995 Darren Reed"; -/* static const char rcsid[] = "@(#)$Id: ip_nat.c,v 2.37.2.44 2001/07/21 07:17:22 darrenr Exp $"; */ static const char rcsid[] = "@(#)$FreeBSD$"; +static const char rcsid[] = "@(#)Id: ip_nat.c,v 2.195.2.38 2005/03/28 11:09:54 darrenr Exp"; #endif + +/* ======================================================================== */ +/* How the NAT is organised and works. */ +/* */ +/* Inside (interface y) NAT Outside (interface x) */ +/* -------------------- -+- ------------------------------------- */ +/* Packet going | out, processsed by fr_checknatout() for x */ +/* ------------> | ------------> */ +/* src=10.1.1.1 | src=192.1.1.1 */ +/* | */ +/* | in, processed by fr_checknatin() for x */ +/* <------------ | <------------ */ +/* dst=10.1.1.1 | dst=192.1.1.1 */ +/* -------------------- -+- ------------------------------------- */ +/* fr_checknatout() - changes ip_src and if required, sport */ +/* - creates a new mapping, if required. */ +/* fr_checknatin() - changes ip_dst and if required, dport */ +/* */ +/* In the NAT table, internal source is recorded as "in" and externally */ +/* seen as "out". */ +/* ======================================================================== */ + + nat_t **nat_table[2] = { NULL, NULL }, *nat_instances = NULL; ipnat_t *nat_list = NULL; @@ -122,40 +142,73 @@ u_int ipf_nattable_sz = NAT_TABLE_SZ; u_int ipf_natrules_sz = NAT_SIZE; u_int ipf_rdrrules_sz = RDR_SIZE; u_int ipf_hostmap_sz = HOSTMAP_SIZE; +u_int fr_nat_maxbucket = 0, + fr_nat_maxbucket_reset = 1; u_32_t nat_masks = 0; u_32_t rdr_masks = 0; ipnat_t **nat_rules = NULL; ipnat_t **rdr_rules = NULL; hostmap_t **maptable = NULL; +ipftq_t nat_tqb[IPF_TCP_NSTATES]; +ipftq_t nat_udptq; +ipftq_t nat_icmptq; +ipftq_t nat_iptq; +ipftq_t *nat_utqe = NULL; +#ifdef IPFILTER_LOG +int nat_logging = 1; +#else +int nat_logging = 0; +#endif u_long fr_defnatage = DEF_NAT_AGE, + fr_defnatipage = 120, /* 60 seconds */ fr_defnaticmpage = 6; /* 3 seconds */ natstat_t nat_stats; int fr_nat_lock = 0; -#ifdef USE_MUTEX -extern kmutex_t ipf_rw; -extern KRWLOCK_T ipf_nat; +int fr_nat_init = 0; +#if SOLARIS +extern int pfil_delayed_copy; #endif static int nat_flushtable __P((void)); +static int nat_clearlist __P((void)); static void nat_addnat __P((struct ipnat *)); static void nat_addrdr __P((struct ipnat *)); -static void nat_delete __P((struct nat *)); +static void nat_delete __P((struct nat *, int)); static void nat_delrdr __P((struct ipnat *)); static void nat_delnat __P((struct ipnat *)); static int fr_natgetent __P((caddr_t)); static int fr_natgetsz __P((caddr_t)); -static int fr_natputent __P((caddr_t)); -static void nat_tabmove __P((fr_info_t *, nat_t *)); -static int nat_match __P((fr_info_t *, ipnat_t *, ip_t *)); +static int fr_natputent __P((caddr_t, int)); +static void nat_tabmove __P((nat_t *)); +static int nat_match __P((fr_info_t *, ipnat_t *)); +static INLINE int nat_newmap __P((fr_info_t *, nat_t *, natinfo_t *)); +static INLINE int nat_newrdr __P((fr_info_t *, nat_t *, natinfo_t *)); static hostmap_t *nat_hostmap __P((ipnat_t *, struct in_addr, - struct in_addr)); + struct in_addr, struct in_addr, u_32_t)); static void nat_hostmapdel __P((struct hostmap *)); +static INLINE int nat_icmpquerytype4 __P((int)); +static int nat_siocaddnat __P((ipnat_t *, ipnat_t **, int)); +static void nat_siocdelnat __P((ipnat_t *, ipnat_t **, int)); +static INLINE int nat_finalise __P((fr_info_t *, nat_t *, natinfo_t *, + tcphdr_t *, nat_t **, int)); +static void nat_resolverule __P((ipnat_t *)); +static nat_t *fr_natclone __P((fr_info_t *, nat_t *)); static void nat_mssclamp __P((tcphdr_t *, u_32_t, fr_info_t *, u_short *)); +static INLINE int nat_wildok __P((nat_t *, int, int, int, int)); -int nat_init() +/* ------------------------------------------------------------------------ */ +/* Function: fr_natinit */ +/* Returns: int - 0 == success, -1 == failure */ +/* Parameters: Nil */ +/* */ +/* Initialise all of the NAT locks, tables and other structures. */ +/* ------------------------------------------------------------------------ */ +int fr_natinit() { + int i; + KMALLOCS(nat_table[0], nat_t **, sizeof(nat_t *) * ipf_nattable_sz); if (nat_table[0] != NULL) bzero((char *)nat_table[0], ipf_nattable_sz * sizeof(nat_t *)); @@ -166,29 +219,109 @@ int nat_init() if (nat_table[1] != NULL) bzero((char *)nat_table[1], ipf_nattable_sz * sizeof(nat_t *)); else - return -1; + return -2; KMALLOCS(nat_rules, ipnat_t **, sizeof(ipnat_t *) * ipf_natrules_sz); if (nat_rules != NULL) bzero((char *)nat_rules, ipf_natrules_sz * sizeof(ipnat_t *)); else - return -1; + return -3; KMALLOCS(rdr_rules, ipnat_t **, sizeof(ipnat_t *) * ipf_rdrrules_sz); if (rdr_rules != NULL) bzero((char *)rdr_rules, ipf_rdrrules_sz * sizeof(ipnat_t *)); else - return -1; + return -4; KMALLOCS(maptable, hostmap_t **, sizeof(hostmap_t *) * ipf_hostmap_sz); if (maptable != NULL) bzero((char *)maptable, sizeof(hostmap_t *) * ipf_hostmap_sz); else - return -1; + return -5; + + KMALLOCS(nat_stats.ns_bucketlen[0], u_long *, + ipf_nattable_sz * sizeof(u_long)); + if (nat_stats.ns_bucketlen[0] == NULL) + return -6; + bzero((char *)nat_stats.ns_bucketlen[0], + ipf_nattable_sz * sizeof(u_long)); + + KMALLOCS(nat_stats.ns_bucketlen[1], u_long *, + ipf_nattable_sz * sizeof(u_long)); + if (nat_stats.ns_bucketlen[1] == NULL) + return -7; + + bzero((char *)nat_stats.ns_bucketlen[1], + ipf_nattable_sz * sizeof(u_long)); + + if (fr_nat_maxbucket == 0) { + for (i = ipf_nattable_sz; i > 0; i >>= 1) + fr_nat_maxbucket++; + fr_nat_maxbucket *= 2; + } + + fr_sttab_init(nat_tqb); + /* + * Increase this because we may have "keep state" following this too + * and packet storms can occur if this is removed too quickly. + */ + nat_tqb[IPF_TCPS_CLOSED].ifq_ttl = fr_tcplastack; + nat_tqb[IPF_TCP_NSTATES - 1].ifq_next = &nat_udptq; + nat_udptq.ifq_ttl = fr_defnatage; + nat_udptq.ifq_ref = 1; + nat_udptq.ifq_head = NULL; + nat_udptq.ifq_tail = &nat_udptq.ifq_head; + MUTEX_INIT(&nat_udptq.ifq_lock, "nat ipftq udp tab"); + nat_udptq.ifq_next = &nat_icmptq; + nat_icmptq.ifq_ttl = fr_defnaticmpage; + nat_icmptq.ifq_ref = 1; + nat_icmptq.ifq_head = NULL; + nat_icmptq.ifq_tail = &nat_icmptq.ifq_head; + MUTEX_INIT(&nat_icmptq.ifq_lock, "nat icmp ipftq tab"); + nat_icmptq.ifq_next = &nat_iptq; + nat_iptq.ifq_ttl = fr_defnatipage; + nat_iptq.ifq_ref = 1; + nat_iptq.ifq_head = NULL; + nat_iptq.ifq_tail = &nat_iptq.ifq_head; + MUTEX_INIT(&nat_iptq.ifq_lock, "nat ip ipftq tab"); + nat_iptq.ifq_next = NULL; + + for (i = 0; i < IPF_TCP_NSTATES; i++) { + if (nat_tqb[i].ifq_ttl < fr_defnaticmpage) + nat_tqb[i].ifq_ttl = fr_defnaticmpage; +#ifdef LARGE_NAT + else if (nat_tqb[i].ifq_ttl > fr_defnatage) + nat_tqb[i].ifq_ttl = fr_defnatage; +#endif + } + + /* + * Increase this because we may have "keep state" following + * this too and packet storms can occur if this is removed + * too quickly. + */ + nat_tqb[IPF_TCPS_CLOSED].ifq_ttl = nat_tqb[IPF_TCPS_LAST_ACK].ifq_ttl; + + RWLOCK_INIT(&ipf_nat, "ipf IP NAT rwlock"); + RWLOCK_INIT(&ipf_natfrag, "ipf IP NAT-Frag rwlock"); + MUTEX_INIT(&ipf_nat_new, "ipf nat new mutex"); + MUTEX_INIT(&ipf_natio, "ipf nat io mutex"); + + fr_nat_init = 1; + return 0; } +/* ------------------------------------------------------------------------ */ +/* Function: nat_addrdr */ +/* Returns: Nil */ +/* Parameters: n(I) - pointer to NAT rule to add */ +/* */ +/* Adds a redirect rule to the hash table of redirect rules and the list of */ +/* loaded NAT rules. Updates the bitmask indicating which netmasks are in */ +/* use by redirect rules. */ +/* ------------------------------------------------------------------------ */ static void nat_addrdr(n) ipnat_t *n; { @@ -197,7 +330,7 @@ ipnat_t *n; u_int hv; int k; - k = countbits(n->in_outmsk); + k = count4bits(n->in_outmsk); if ((k >= 0) && (k != 32)) rdr_masks |= 1 << k; j = (n->in_outip & n->in_outmsk); @@ -207,10 +340,20 @@ ipnat_t *n; np = &(*np)->in_rnext; n->in_rnext = NULL; n->in_prnext = np; + n->in_hv = hv; *np = n; } +/* ------------------------------------------------------------------------ */ +/* Function: nat_addnat */ +/* Returns: Nil */ +/* Parameters: n(I) - pointer to NAT rule to add */ +/* */ +/* Adds a NAT map rule to the hash table of rules and the list of loaded */ +/* NAT rules. Updates the bitmask indicating which netmasks are in use by */ +/* redirect rules. */ +/* ------------------------------------------------------------------------ */ static void nat_addnat(n) ipnat_t *n; { @@ -219,7 +362,7 @@ ipnat_t *n; u_int hv; int k; - k = countbits(n->in_inmsk); + k = count4bits(n->in_inmsk); if ((k >= 0) && (k != 32)) nat_masks |= 1 << k; j = (n->in_inip & n->in_inmsk); @@ -229,10 +372,18 @@ ipnat_t *n; np = &(*np)->in_mnext; n->in_mnext = NULL; n->in_pmnext = np; + n->in_hv = hv; *np = n; } +/* ------------------------------------------------------------------------ */ +/* Function: nat_delrdr */ +/* Returns: Nil */ +/* Parameters: n(I) - pointer to NAT rule to delete */ +/* */ +/* Removes a redirect rule from the hash table of redirect rules. */ +/* ------------------------------------------------------------------------ */ static void nat_delrdr(n) ipnat_t *n; { @@ -242,60 +393,93 @@ ipnat_t *n; } +/* ------------------------------------------------------------------------ */ +/* Function: nat_delnat */ +/* Returns: Nil */ +/* Parameters: n(I) - pointer to NAT rule to delete */ +/* */ +/* Removes a NAT map rule from the hash table of NAT map rules. */ +/* ------------------------------------------------------------------------ */ static void nat_delnat(n) ipnat_t *n; { - if (n->in_mnext) + if (n->in_mnext != NULL) n->in_mnext->in_pmnext = n->in_pmnext; *n->in_pmnext = n->in_mnext; } -/* - * check if an ip address has already been allocated for a given mapping that - * is not doing port based translation. - * - * Must be called with ipf_nat held as a write lock. - */ -static struct hostmap *nat_hostmap(np, real, map) +/* ------------------------------------------------------------------------ */ +/* Function: nat_hostmap */ +/* Returns: struct hostmap* - NULL if no hostmap could be created, */ +/* else a pointer to the hostmapping to use */ +/* Parameters: np(I) - pointer to NAT rule */ +/* real(I) - real IP address */ +/* map(I) - mapped IP address */ +/* port(I) - destination port number */ +/* Write Locks: ipf_nat */ +/* */ +/* Check if an ip address has already been allocated for a given mapping */ +/* that is not doing port based translation. If is not yet allocated, then */ +/* create a new entry if a non-NULL NAT rule pointer has been supplied. */ +/* ------------------------------------------------------------------------ */ +static struct hostmap *nat_hostmap(np, src, dst, map, port) ipnat_t *np; -struct in_addr real; +struct in_addr src; +struct in_addr dst; struct in_addr map; +u_32_t port; { hostmap_t *hm; u_int hv; - hv = real.s_addr % HOSTMAP_SIZE; + hv = (src.s_addr ^ dst.s_addr); + hv += src.s_addr; + hv += dst.s_addr; + hv %= HOSTMAP_SIZE; for (hm = maptable[hv]; hm; hm = hm->hm_next) - if ((hm->hm_realip.s_addr == real.s_addr) && - (np == hm->hm_ipnat)) { + if ((hm->hm_srcip.s_addr == src.s_addr) && + (hm->hm_dstip.s_addr == dst.s_addr) && + ((np == NULL) || (np == hm->hm_ipnat)) && + ((port == 0) || (port == hm->hm_port))) { hm->hm_ref++; return hm; } + if (np == NULL) + return NULL; + KMALLOC(hm, hostmap_t *); if (hm) { hm->hm_next = maptable[hv]; hm->hm_pnext = maptable + hv; - if (maptable[hv]) + if (maptable[hv] != NULL) maptable[hv]->hm_pnext = &hm->hm_next; maptable[hv] = hm; hm->hm_ipnat = np; - hm->hm_realip = real; + hm->hm_srcip = src; + hm->hm_dstip = dst; hm->hm_mapip = map; hm->hm_ref = 1; + hm->hm_port = port; } return hm; } -/* - * Must be called with ipf_nat held as a write lock. - */ +/* ------------------------------------------------------------------------ */ +/* Function: nat_hostmapdel */ +/* Returns: Nil */ +/* Parameters: hm(I) - pointer to hostmap structure */ +/* Write Locks: ipf_nat */ +/* */ +/* Decrement the references to this hostmap structure by one. If this */ +/* reaches zero then remove it and free it. */ +/* ------------------------------------------------------------------------ */ static void nat_hostmapdel(hm) struct hostmap *hm; { - ATOMIC_DEC32(hm->hm_ref); + hm->hm_ref--; if (hm->hm_ref == 0) { if (hm->hm_next) hm->hm_next->hm_pnext = hm->hm_pnext; @@ -305,17 +489,27 @@ struct hostmap *hm; } +/* ------------------------------------------------------------------------ */ +/* Function: fix_outcksum */ +/* Returns: Nil */ +/* Parameters: fin(I) - pointer to packet information */ +/* sp(I) - location of 16bit checksum to update */ +/* n((I) - amount to adjust checksum by */ +/* */ +/* Adjusts the 16bit checksum by "n" for packets going out. */ +/* ------------------------------------------------------------------------ */ void fix_outcksum(fin, sp, n) fr_info_t *fin; u_short *sp; u_32_t n; { - register u_short sumshort; - register u_32_t sum1; + u_short sumshort; + u_32_t sum1; - if (!n) + if (n == 0) return; - else if (n & NAT_HW_CKSUM) { + + if (n & NAT_HW_CKSUM) { n &= 0xffff; n += fin->fin_dlen; n = (n & 0xffff) + (n >> 16); @@ -332,28 +526,34 @@ u_32_t n; } +/* ------------------------------------------------------------------------ */ +/* Function: fix_incksum */ +/* Returns: Nil */ +/* Parameters: fin(I) - pointer to packet information */ +/* sp(I) - location of 16bit checksum to update */ +/* n((I) - amount to adjust checksum by */ +/* */ +/* Adjusts the 16bit checksum by "n" for packets going in. */ +/* ------------------------------------------------------------------------ */ void fix_incksum(fin, sp, n) fr_info_t *fin; u_short *sp; u_32_t n; { - register u_short sumshort; - register u_32_t sum1; + u_short sumshort; + u_32_t sum1; - if (!n) + if (n == 0) return; - else if (n & NAT_HW_CKSUM) { + + if (n & NAT_HW_CKSUM) { n &= 0xffff; n += fin->fin_dlen; n = (n & 0xffff) + (n >> 16); *sp = n & 0xffff; return; } -#ifdef sparc - sum1 = (~(*sp)) & 0xffff; -#else sum1 = (~ntohs(*sp)) & 0xffff; -#endif sum1 += ~(n) & 0xffff; sum1 = (sum1 >> 16) + (sum1 & 0xffff); /* Again */ @@ -363,27 +563,32 @@ u_32_t n; } -/* - * fix_datacksum is used *only* for the adjustments of checksums in the data - * section of an IP packet. - * - * The only situation in which you need to do this is when NAT'ing an - * ICMP error message. Such a message, contains in its body the IP header - * of the original IP packet, that causes the error. - * - * You can't use fix_incksum or fix_outcksum in that case, because for the - * kernel the data section of the ICMP error is just data, and no special - * processing like hardware cksum or ntohs processing have been done by the - * kernel on the data section. - */ +/* ------------------------------------------------------------------------ */ +/* Function: fix_datacksum */ +/* Returns: Nil */ +/* Parameters: sp(I) - location of 16bit checksum to update */ +/* n((I) - amount to adjust checksum by */ +/* */ +/* Fix_datacksum is used *only* for the adjustments of checksums in the */ +/* data section of an IP packet. */ +/* */ +/* The only situation in which you need to do this is when NAT'ing an */ +/* ICMP error message. Such a message, contains in its body the IP header */ +/* of the original IP packet, that causes the error. */ +/* */ +/* You can't use fix_incksum or fix_outcksum in that case, because for the */ +/* kernel the data section of the ICMP error is just data, and no special */ +/* processing like hardware cksum or ntohs processing have been done by the */ +/* kernel on the data section. */ +/* ------------------------------------------------------------------------ */ void fix_datacksum(sp, n) u_short *sp; u_32_t n; { - register u_short sumshort; - register u_32_t sum1; + u_short sumshort; + u_32_t sum1; - if (!n) + if (n == 0) return; sum1 = (~ntohs(*sp)) & 0xffff; @@ -395,76 +600,65 @@ u_32_t n; *(sp) = htons(sumshort); } -/* - * How the NAT is organised and works. - * - * Inside (interface y) NAT Outside (interface x) - * -------------------- -+- ------------------------------------- - * Packet going | out, processsed by ip_natout() for x - * ------------> | ------------> - * src=10.1.1.1 | src=192.1.1.1 - * | - * | in, processed by ip_natin() for x - * <------------ | <------------ - * dst=10.1.1.1 | dst=192.1.1.1 - * -------------------- -+- ------------------------------------- - * ip_natout() - changes ip_src and if required, sport - * - creates a new mapping, if required. - * ip_natin() - changes ip_dst and if required, dport - * - * In the NAT table, internal source is recorded as "in" and externally - * seen as "out". - */ -/* - * Handle ioctls which manipulate the NAT. - */ -int nat_ioctl(data, cmd, mode) -#if defined(__NetBSD__) || defined(__OpenBSD__) || (__FreeBSD_version >= 300003) -u_long cmd; -#else -int cmd; -#endif +/* ------------------------------------------------------------------------ */ +/* Function: fr_nat_ioctl */ +/* Returns: int - 0 == success, != 0 == failure */ +/* Parameters: data(I) - pointer to ioctl data */ +/* cmd(I) - ioctl command integer */ +/* mode(I) - file mode bits used with open */ +/* */ +/* Processes an ioctl call made to operate on the IP Filter NAT device. */ +/* ------------------------------------------------------------------------ */ +int fr_nat_ioctl(data, cmd, mode) +ioctlcmd_t cmd; caddr_t data; int mode; { - register ipnat_t *nat, *nt, *n = NULL, **np = NULL; + ipnat_t *nat, *nt, *n = NULL, **np = NULL; int error = 0, ret, arg, getlock; ipnat_t natd; - u_32_t i, j; #if (BSD >= 199306) && defined(_KERNEL) if ((securelevel >= 3) && (mode & FWRITE)) return EPERM; #endif - nat = NULL; /* XXX gcc -Wuninitialized */ - KMALLOC(nt, ipnat_t *); +#if defined(__osf__) && defined(_KERNEL) + getlock = 0; +#else getlock = (mode & NAT_LOCKHELD) ? 0 : 1; - if ((cmd == SIOCADNAT) || (cmd == SIOCRMNAT)) { +#endif + + nat = NULL; /* XXX gcc -Wuninitialized */ + if (cmd == (ioctlcmd_t)SIOCADNAT) { + KMALLOC(nt, ipnat_t *); + } else { + nt = NULL; + } + + if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT)) { if (mode & NAT_SYSSPACE) { bcopy(data, (char *)&natd, sizeof(natd)); error = 0; } else { - error = IRCOPYPTR(data, (char *)&natd, sizeof(natd)); + error = fr_inobj(data, &natd, IPFOBJ_IPNAT); } - } else if (cmd == SIOCIPFFL) { /* SIOCFLNAT & SIOCCNATL */ - error = IRCOPY(data, (char *)&arg, sizeof(arg)); - if (error) - error = EFAULT; + + } else if (cmd == (ioctlcmd_t)SIOCIPFFL) { /* SIOCFLNAT & SIOCCNATL */ + BCOPYIN(data, &arg, sizeof(arg)); } - if (error) + if (error != 0) goto done; /* * For add/delete, look to see if the NAT entry is already present */ - if (getlock == 1) { - WRITE_ENTER(&ipf_nat); - } - if ((cmd == SIOCADNAT) || (cmd == SIOCRMNAT)) { + if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT)) { nat = &natd; + if (nat->in_v == 0) /* For backward compat. */ + nat->in_v = 4; nat->in_flags &= IPN_USERFLAGS; if ((nat->in_redir & NAT_MAPBLK) == 0) { if ((nat->in_flags & IPN_SPLIT) == 0) @@ -472,14 +666,11 @@ int mode; if ((nat->in_flags & IPN_IPRANGE) == 0) nat->in_outip &= nat->in_outmsk; } - for (np = &nat_list; (n = *np); np = &n->in_next) + MUTEX_ENTER(&ipf_natio); + for (np = &nat_list; ((n = *np) != NULL); np = &n->in_next) if (!bcmp((char *)&nat->in_flags, (char *)&n->in_flags, - IPN_CMPSIZ)) { - if (n->in_redir == NAT_REDIRECT && - n->in_pnext != nat->in_pnext) - continue; + IPN_CMPSIZ)) break; - } } switch (cmd) @@ -493,182 +684,94 @@ int mode; error = EPERM; else { tmp = ipflog_clear(IPL_LOGNAT); - IWCOPY((char *)&tmp, (char *)data, sizeof(tmp)); + BCOPYOUT((char *)&tmp, (char *)data, sizeof(tmp)); } break; } + case SIOCSETLG : + if (!(mode & FWRITE)) + error = EPERM; + else { + BCOPYIN((char *)data, (char *)&nat_logging, + sizeof(nat_logging)); + } + break; + case SIOCGETLG : + BCOPYOUT((char *)&nat_logging, (char *)data, + sizeof(nat_logging)); + break; + case FIONREAD : + arg = iplused[IPL_LOGNAT]; + BCOPYOUT(&arg, data, sizeof(arg)); + break; #endif case SIOCADNAT : if (!(mode & FWRITE)) { error = EPERM; - break; - } - if (n) { + } else if (n != NULL) { error = EEXIST; - break; - } - if (nt == NULL) { + } else if (nt == NULL) { error = ENOMEM; - break; - } - n = nt; - nt = NULL; - bcopy((char *)nat, (char *)n, sizeof(*n)); - n->in_ifp = (void *)GETUNIT(n->in_ifname, 4); - if (!n->in_ifp) - n->in_ifp = (void *)-1; - if (n->in_plabel[0] != '\0') { - n->in_apr = appr_lookup(n->in_p, n->in_plabel); - if (!n->in_apr) { - error = ENOENT; - break; - } } - n->in_next = NULL; - *np = n; - - if (n->in_redir & NAT_REDIRECT) { - n->in_flags &= ~IPN_NOTDST; - nat_addrdr(n); - } - if (n->in_redir & (NAT_MAP|NAT_MAPBLK)) { - n->in_flags &= ~IPN_NOTSRC; - nat_addnat(n); - } - - n->in_use = 0; - if (n->in_redir & NAT_MAPBLK) - n->in_space = USABLE_PORTS * ~ntohl(n->in_outmsk); - else if (n->in_flags & IPN_AUTOPORTMAP) - n->in_space = USABLE_PORTS * ~ntohl(n->in_inmsk); - else if (n->in_flags & IPN_IPRANGE) - n->in_space = ntohl(n->in_outmsk) - ntohl(n->in_outip); - else if (n->in_flags & IPN_SPLIT) - n->in_space = 2; - else - n->in_space = ~ntohl(n->in_outmsk); - /* - * Calculate the number of valid IP addresses in the output - * mapping range. In all cases, the range is inclusive of - * the start and ending IP addresses. - * If to a CIDR address, lose 2: broadcast + network address - * (so subtract 1) - * If to a range, add one. - * If to a single IP address, set to 1. - */ - if (n->in_space) { - if ((n->in_flags & IPN_IPRANGE) != 0) - n->in_space += 1; - else - n->in_space -= 1; - } else - n->in_space = 1; - if ((n->in_outmsk != 0xffffffff) && (n->in_outmsk != 0) && - ((n->in_flags & (IPN_IPRANGE|IPN_SPLIT)) == 0)) - n->in_nip = ntohl(n->in_outip) + 1; - else if ((n->in_flags & IPN_SPLIT) && - (n->in_redir & NAT_REDIRECT)) - n->in_nip = ntohl(n->in_inip); - else - n->in_nip = ntohl(n->in_outip); - if (n->in_redir & NAT_MAP) { - n->in_pnext = ntohs(n->in_pmin); - /* - * Multiply by the number of ports made available. - */ - if (ntohs(n->in_pmax) >= ntohs(n->in_pmin)) { - n->in_space *= (ntohs(n->in_pmax) - - ntohs(n->in_pmin) + 1); - /* - * Because two different sources can map to - * different destinations but use the same - * local IP#/port #. - * If the result is smaller than in_space, then - * we may have wrapped around 32bits. - */ - i = n->in_inmsk; - if ((i != 0) && (i != 0xffffffff)) { - j = n->in_space * (~ntohl(i) + 1); - if (j >= n->in_space) - n->in_space = j; - else - n->in_space = 0xffffffff; - } - } - /* - * If no protocol is specified, multiple by 256. - */ - if ((n->in_flags & IPN_TCPUDP) == 0) { - j = n->in_space * 256; - if (j >= n->in_space) - n->in_space = j; - else - n->in_space = 0xffffffff; - } + if (error != 0) { + MUTEX_EXIT(&ipf_natio); + break; } - /* Otherwise, these fields are preset */ - n = NULL; - nat_stats.ns_rules++; + bcopy((char *)nat, (char *)nt, sizeof(*n)); + error = nat_siocaddnat(nt, np, getlock); + MUTEX_EXIT(&ipf_natio); + if (error == 0) + nt = NULL; break; case SIOCRMNAT : if (!(mode & FWRITE)) { error = EPERM; n = NULL; - break; - } - if (!n) { + } else if (n == NULL) { error = ESRCH; - break; } - if (n->in_redir & NAT_REDIRECT) - nat_delrdr(n); - if (n->in_redir & (NAT_MAPBLK|NAT_MAP)) - nat_delnat(n); - if (nat_list == NULL) { - nat_masks = 0; - rdr_masks = 0; - } - *np = n->in_next; - if (!n->in_use) { - if (n->in_apr) - appr_free(n->in_apr); - KFREE(n); - nat_stats.ns_rules--; - } else { - n->in_flags |= IPN_DELETE; - n->in_next = NULL; + + if (error != 0) { + MUTEX_EXIT(&ipf_natio); + break; } + nat_siocdelnat(n, np, getlock); + + MUTEX_EXIT(&ipf_natio); n = NULL; break; case SIOCGNATS : - MUTEX_DOWNGRADE(&ipf_nat); nat_stats.ns_table[0] = nat_table[0]; nat_stats.ns_table[1] = nat_table[1]; nat_stats.ns_list = nat_list; nat_stats.ns_maptable = maptable; nat_stats.ns_nattab_sz = ipf_nattable_sz; + nat_stats.ns_nattab_max = ipf_nattable_max; nat_stats.ns_rultab_sz = ipf_natrules_sz; nat_stats.ns_rdrtab_sz = ipf_rdrrules_sz; nat_stats.ns_hostmap_sz = ipf_hostmap_sz; nat_stats.ns_instances = nat_instances; nat_stats.ns_apslist = ap_sess_list; - error = IWCOPYPTR((char *)&nat_stats, (char *)data, - sizeof(nat_stats)); + error = fr_outobj(data, &nat_stats, IPFOBJ_NATSTAT); break; case SIOCGNATL : { natlookup_t nl; - MUTEX_DOWNGRADE(&ipf_nat); - error = IRCOPYPTR((char *)data, (char *)&nl, sizeof(nl)); - if (error) - break; - - if (nat_lookupredir(&nl)) { - error = IWCOPYPTR((char *)&nl, (char *)data, - sizeof(nl)); - } else - error = ESRCH; + if (getlock) { + READ_ENTER(&ipf_nat); + } + error = fr_inobj(data, &nl, IPFOBJ_NATLOOKUP); + if (error == 0) { + if (nat_lookupredir(&nl) != NULL) { + error = fr_outobj(data, &nl, IPFOBJ_NATLOOKUP); + } else { + error = ESRCH; + } + } + if (getlock) { + RWLOCK_EXIT(&ipf_nat); + } break; } case SIOCIPFFL : /* old SIOCFLNAT & SIOCCNATL */ @@ -676,6 +779,9 @@ int mode; error = EPERM; break; } + if (getlock) { + WRITE_ENTER(&ipf_nat); + } error = 0; if (arg == 0) ret = nat_flushtable(); @@ -683,57 +789,54 @@ int mode; ret = nat_clearlist(); else error = EINVAL; - MUTEX_DOWNGRADE(&ipf_nat); - if (!error) { - error = IWCOPY((caddr_t)&ret, data, sizeof(ret)); - if (error) - error = EFAULT; + if (getlock) { + RWLOCK_EXIT(&ipf_nat); + } + if (error == 0) { + BCOPYOUT(&ret, data, sizeof(ret)); } break; + case SIOCPROXY : + error = appr_ioctl(data, cmd, mode); + break; case SIOCSTLCK : - error = IRCOPY(data, (caddr_t)&arg, sizeof(arg)); - if (!error) { - error = IWCOPY((caddr_t)&fr_nat_lock, data, - sizeof(fr_nat_lock)); - if (!error) - fr_nat_lock = arg; - } else - error = EFAULT; + fr_lock(data, &fr_nat_lock); break; case SIOCSTPUT : - if (fr_nat_lock) - error = fr_natputent(data); - else + if (fr_nat_lock) { + error = fr_natputent(data, getlock); + } else { error = EACCES; + } break; case SIOCSTGSZ : - if (fr_nat_lock) + if (fr_nat_lock) { + if (getlock) { + READ_ENTER(&ipf_nat); + } error = fr_natgetsz(data); - else + if (getlock) { + RWLOCK_EXIT(&ipf_nat); + } + } else error = EACCES; break; case SIOCSTGET : - if (fr_nat_lock) + if (fr_nat_lock) { + if (getlock) { + READ_ENTER(&ipf_nat); + } error = fr_natgetent(data); - else + if (getlock) { + RWLOCK_EXIT(&ipf_nat); + } + } else error = EACCES; break; - case FIONREAD : -#ifdef IPFILTER_LOG - arg = (int)iplused[IPL_LOGNAT]; - MUTEX_DOWNGRADE(&ipf_nat); - error = IWCOPY((caddr_t)&arg, (caddr_t)data, sizeof(arg)); - if (error) - error = EFAULT; -#endif - break; default : error = EINVAL; break; } - if (getlock == 1) { - RWLOCK_EXIT(&ipf_nat); /* READ/WRITE */ - } done: if (nt) KFREE(nt); @@ -741,27 +844,264 @@ done: } +/* ------------------------------------------------------------------------ */ +/* Function: nat_siocaddnat */ +/* Returns: int - 0 == success, != 0 == failure */ +/* Parameters: n(I) - pointer to new NAT rule */ +/* np(I) - pointer to where to insert new NAT rule */ +/* getlock(I) - flag indicating if lock on ipf_nat is held */ +/* Mutex Locks: ipf_natio */ +/* */ +/* Handle SIOCADNAT. Resolve and calculate details inside the NAT rule */ +/* from information passed to the kernel, then add it to the appropriate */ +/* NAT rule table(s). */ +/* ------------------------------------------------------------------------ */ +static int nat_siocaddnat(n, np, getlock) +ipnat_t *n, **np; +int getlock; +{ + int error = 0, i, j; + + nat_resolverule(n); + if (n->in_plabel[0] != '\0') { + if (n->in_apr == NULL) + return ENOENT; + } + + if ((n->in_age[0] == 0) && (n->in_age[1] != 0)) + return EINVAL; + + n->in_use = 0; + if (n->in_redir & NAT_MAPBLK) + n->in_space = USABLE_PORTS * ~ntohl(n->in_outmsk); + else if (n->in_flags & IPN_AUTOPORTMAP) + n->in_space = USABLE_PORTS * ~ntohl(n->in_inmsk); + else if (n->in_flags & IPN_IPRANGE) + n->in_space = ntohl(n->in_outmsk) - ntohl(n->in_outip); + else if (n->in_flags & IPN_SPLIT) + n->in_space = 2; + else if (n->in_outmsk != 0) + n->in_space = ~ntohl(n->in_outmsk); + else + n->in_space = 1; + + /* + * Calculate the number of valid IP addresses in the output + * mapping range. In all cases, the range is inclusive of + * the start and ending IP addresses. + * If to a CIDR address, lose 2: broadcast + network address + * (so subtract 1) + * If to a range, add one. + * If to a single IP address, set to 1. + */ + if (n->in_space) { + if ((n->in_flags & IPN_IPRANGE) != 0) + n->in_space += 1; + else + n->in_space -= 1; + } else + n->in_space = 1; + + if ((n->in_outmsk != 0xffffffff) && (n->in_outmsk != 0) && + ((n->in_flags & (IPN_IPRANGE|IPN_SPLIT)) == 0)) + n->in_nip = ntohl(n->in_outip) + 1; + else if ((n->in_flags & IPN_SPLIT) && + (n->in_redir & NAT_REDIRECT)) + n->in_nip = ntohl(n->in_inip); + else + n->in_nip = ntohl(n->in_outip); + if (n->in_redir & NAT_MAP) { + n->in_pnext = ntohs(n->in_pmin); + /* + * Multiply by the number of ports made available. + */ + if (ntohs(n->in_pmax) >= ntohs(n->in_pmin)) { + n->in_space *= (ntohs(n->in_pmax) - + ntohs(n->in_pmin) + 1); + /* + * Because two different sources can map to + * different destinations but use the same + * local IP#/port #. + * If the result is smaller than in_space, then + * we may have wrapped around 32bits. + */ + i = n->in_inmsk; + if ((i != 0) && (i != 0xffffffff)) { + j = n->in_space * (~ntohl(i) + 1); + if (j >= n->in_space) + n->in_space = j; + else + n->in_space = 0xffffffff; + } + } + /* + * If no protocol is specified, multiple by 256 to allow for + * at least one IP:IP mapping per protocol. + */ + if ((n->in_flags & IPN_TCPUDPICMP) == 0) { + j = n->in_space * 256; + if (j >= n->in_space) + n->in_space = j; + else + n->in_space = 0xffffffff; + } + } + + /* Otherwise, these fields are preset */ + + if (getlock) { + WRITE_ENTER(&ipf_nat); + } + n->in_next = NULL; + *np = n; + + if (n->in_age[0] != 0) + n->in_tqehead[0] = fr_addtimeoutqueue(&nat_utqe, n->in_age[0]); + + if (n->in_age[1] != 0) + n->in_tqehead[1] = fr_addtimeoutqueue(&nat_utqe, n->in_age[1]); + + if (n->in_redir & NAT_REDIRECT) { + n->in_flags &= ~IPN_NOTDST; + nat_addrdr(n); + } + if (n->in_redir & (NAT_MAP|NAT_MAPBLK)) { + n->in_flags &= ~IPN_NOTSRC; + nat_addnat(n); + } + n = NULL; + nat_stats.ns_rules++; +#if SOLARIS + pfil_delayed_copy = 0; +#endif + if (getlock) { + RWLOCK_EXIT(&ipf_nat); /* WRITE */ + } + + return error; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: nat_resolvrule */ +/* Returns: Nil */ +/* Parameters: n(I) - pointer to NAT rule */ +/* */ +/* Handle SIOCADNAT. Resolve and calculate details inside the NAT rule */ +/* from information passed to the kernel, then add it to the appropriate */ +/* NAT rule table(s). */ +/* ------------------------------------------------------------------------ */ +static void nat_resolverule(n) +ipnat_t *n; +{ + n->in_ifnames[0][LIFNAMSIZ - 1] = '\0'; + n->in_ifps[0] = fr_resolvenic(n->in_ifnames[0], 4); + + n->in_ifnames[1][LIFNAMSIZ - 1] = '\0'; + if (n->in_ifnames[1][0] == '\0') { + (void) strncpy(n->in_ifnames[1], n->in_ifnames[0], LIFNAMSIZ); + n->in_ifps[1] = n->in_ifps[0]; + } else { + n->in_ifps[1] = fr_resolvenic(n->in_ifnames[0], 4); + } + + if (n->in_plabel[0] != '\0') { + n->in_apr = appr_lookup(n->in_p, n->in_plabel); + } +} + + +/* ------------------------------------------------------------------------ */ +/* Function: nat_siocdelnat */ +/* Returns: int - 0 == success, != 0 == failure */ +/* Parameters: n(I) - pointer to new NAT rule */ +/* np(I) - pointer to where to insert new NAT rule */ +/* getlock(I) - flag indicating if lock on ipf_nat is held */ +/* Mutex Locks: ipf_natio */ +/* */ +/* Handle SIOCADNAT. Resolve and calculate details inside the NAT rule */ +/* from information passed to the kernel, then add it to the appropriate */ +/* NAT rule table(s). */ +/* ------------------------------------------------------------------------ */ +static void nat_siocdelnat(n, np, getlock) +ipnat_t *n, **np; +int getlock; +{ + if (getlock) { + WRITE_ENTER(&ipf_nat); + } + if (n->in_redir & NAT_REDIRECT) + nat_delrdr(n); + if (n->in_redir & (NAT_MAPBLK|NAT_MAP)) + nat_delnat(n); + if (nat_list == NULL) { + nat_masks = 0; + rdr_masks = 0; + } + + if (n->in_tqehead[0] != NULL) { + if (fr_deletetimeoutqueue(n->in_tqehead[0]) == 0) { + fr_freetimeoutqueue(n->in_tqehead[1]); + } + } + + if (n->in_tqehead[1] != NULL) { + if (fr_deletetimeoutqueue(n->in_tqehead[1]) == 0) { + fr_freetimeoutqueue(n->in_tqehead[1]); + } + } + + *np = n->in_next; + + if (n->in_use == 0) { + if (n->in_apr) + appr_free(n->in_apr); + KFREE(n); + nat_stats.ns_rules--; +#if SOLARIS + if (nat_stats.ns_rules == 0) + pfil_delayed_copy = 1; +#endif + } else { + n->in_flags |= IPN_DELETE; + n->in_next = NULL; + } + if (getlock) { + RWLOCK_EXIT(&ipf_nat); /* READ/WRITE */ + } +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_natgetsz */ +/* Returns: int - 0 == success, != 0 is the error value. */ +/* Parameters: data(I) - pointer to natget structure with kernel pointer */ +/* get the size of. */ +/* */ +/* Handle SIOCSTGSZ. */ +/* Return the size of the nat list entry to be copied back to user space. */ +/* The size of the entry is stored in the ng_sz field and the enture natget */ +/* structure is copied back to the user. */ +/* ------------------------------------------------------------------------ */ static int fr_natgetsz(data) caddr_t data; { ap_session_t *aps; nat_t *nat, *n; - int error = 0; natget_t ng; - error = IRCOPY(data, (caddr_t)&ng, sizeof(ng)); - if (error) - return EFAULT; + BCOPYIN(data, &ng, sizeof(ng)); nat = ng.ng_ptr; if (!nat) { nat = nat_instances; ng.ng_sz = 0; + /* + * Empty list so the size returned is 0. Simple. + */ if (nat == NULL) { - error = IWCOPY((caddr_t)&ng, data, sizeof(ng)); - if (error) - error = EFAULT; - return error; + BCOPYOUT(&ng, data, sizeof(ng)); + return 0; } } else { /* @@ -776,45 +1116,59 @@ caddr_t data; return ESRCH; } + /* + * Incluse any space required for proxy data structures. + */ ng.ng_sz = sizeof(nat_save_t); aps = nat->nat_aps; - if ((aps != NULL) && (aps->aps_data != 0)) { - ng.ng_sz += sizeof(ap_session_t); - ng.ng_sz += aps->aps_psiz; - if (aps->aps_psiz > 4) /* XXX - sizeof(ipn_data) */ - ng.ng_sz -= 4; + if (aps != NULL) { + ng.ng_sz += sizeof(ap_session_t) - 4; + if (aps->aps_data != 0) + ng.ng_sz += aps->aps_psiz; } - error = IWCOPY((caddr_t)&ng, data, sizeof(ng)); - if (error) - error = EFAULT; - return error; + BCOPYOUT(&ng, data, sizeof(ng)); + return 0; } +/* ------------------------------------------------------------------------ */ +/* Function: fr_natgetent */ +/* Returns: int - 0 == success, != 0 is the error value. */ +/* Parameters: data(I) - pointer to natget structure with kernel pointer */ +/* to NAT structure to copy out. */ +/* */ +/* Handle SIOCSTGET. */ +/* Copies out NAT entry to user space. Any additional data held for a */ +/* proxy is also copied, as to is the NAT rule which was responsible for it */ +/* ------------------------------------------------------------------------ */ static int fr_natgetent(data) caddr_t data; { - nat_save_t ipn, *ipnp, *ipnn = NULL; - register nat_t *n, *nat; + int error, outsize; ap_session_t *aps; - size_t dsz; - int error; + nat_save_t *ipn, ipns; + nat_t *n, *nat; - error = IRCOPY(data, (caddr_t)&ipnp, sizeof(ipnp)); - if (error) - return EFAULT; - error = IRCOPY((caddr_t)ipnp, (caddr_t)&ipn, sizeof(ipn)); - if (error) - return EFAULT; + error = fr_inobj(data, &ipns, IPFOBJ_NATSAVE); + if (error != 0) + return error; - nat = ipn.ipn_next; - if (!nat) { + if ((ipns.ipn_dsize < sizeof(ipns)) || (ipns.ipn_dsize > 81920)) + return EINVAL; + + KMALLOCS(ipn, nat_save_t *, ipns.ipn_dsize); + if (ipn == NULL) + return ENOMEM; + + ipn->ipn_dsize = ipns.ipn_dsize; + nat = ipns.ipn_next; + if (nat == NULL) { nat = nat_instances; if (nat == NULL) { if (nat_instances == NULL) - return ENOENT; - return 0; + error = ENOENT; + goto finished; } } else { /* @@ -825,150 +1179,215 @@ caddr_t data; for (n = nat_instances; n; n = n->nat_next) if (n == nat) break; - if (!n) - return ESRCH; - } + if (n == NULL) { + error = ESRCH; + goto finished; + } + } + ipn->ipn_next = nat->nat_next; - ipn.ipn_next = nat->nat_next; - bcopy((char *)nat, (char *)&ipn.ipn_nat, sizeof(ipn.ipn_nat)); - ipn.ipn_nat.nat_data = NULL; + /* + * Copy the NAT structure. + */ + bcopy((char *)nat, &ipn->ipn_nat, sizeof(*nat)); - if (nat->nat_ptr) { - bcopy((char *)nat->nat_ptr, (char *)&ipn.ipn_ipnat, - sizeof(ipn.ipn_ipnat)); - } + /* + * If we have a pointer to the NAT rule it belongs to, save that too. + */ + if (nat->nat_ptr != NULL) + bcopy((char *)nat->nat_ptr, (char *)&ipn->ipn_ipnat, + sizeof(ipn->ipn_ipnat)); - if (nat->nat_fr) - bcopy((char *)nat->nat_fr, (char *)&ipn.ipn_rule, - sizeof(ipn.ipn_rule)); + /* + * If we also know the NAT entry has an associated filter rule, + * save that too. + */ + if (nat->nat_fr != NULL) + bcopy((char *)nat->nat_fr, (char *)&ipn->ipn_fr, + sizeof(ipn->ipn_fr)); - if ((aps = nat->nat_aps)) { - dsz = sizeof(*aps); - if (aps->aps_data) - dsz += aps->aps_psiz; - ipn.ipn_dsize = dsz; - if (dsz > sizeof(ipn.ipn_data)) - dsz -= sizeof(ipn.ipn_data); - KMALLOCS(ipnn, nat_save_t *, sizeof(*ipnn) + dsz); - if (ipnn == NULL) - return ENOMEM; - bcopy((char *)&ipn, (char *)ipnn, sizeof(ipn)); + /* + * Last but not least, if there is an application proxy session set + * up for this NAT entry, then copy that out too, including any + * private data saved along side it by the proxy. + */ + aps = nat->nat_aps; + outsize = ipn->ipn_dsize - sizeof(*ipn) + sizeof(ipn->ipn_data); + if (aps != NULL) { + char *s; - bcopy((char *)aps, (char *)ipnn->ipn_data, sizeof(*aps)); - if (aps->aps_data) { - bcopy(aps->aps_data, ipnn->ipn_data + sizeof(*aps), - aps->aps_psiz); + if (outsize < sizeof(*aps)) { + error = ENOBUFS; + goto finished; } - error = IWCOPY((caddr_t)ipnn, ipnp, - sizeof(ipn) + dsz); - if (error) - error = EFAULT; - KFREES(ipnn, sizeof(*ipnn) + dsz); - } else { - ipn.ipn_dsize = 0; - error = IWCOPY((caddr_t)&ipn, ipnp, sizeof(ipn)); - if (error) - error = EFAULT; + + s = ipn->ipn_data; + bcopy((char *)aps, s, sizeof(*aps)); + s += sizeof(*aps); + outsize -= sizeof(*aps); + if ((aps->aps_data != NULL) && (outsize >= aps->aps_psiz)) + bcopy(aps->aps_data, s, aps->aps_psiz); + else + error = ENOBUFS; + } + if (error == 0) { + error = fr_outobjsz(data, ipn, IPFOBJ_NATSAVE, ipns.ipn_dsize); + } + +finished: + if (ipn != NULL) { + KFREES(ipn, ipns.ipn_dsize); } return error; } -static int fr_natputent(data) +/* ------------------------------------------------------------------------ */ +/* Function: fr_natputent */ +/* Returns: int - 0 == success, != 0 is the error value. */ +/* Parameters: data(I) - pointer to natget structure with NAT */ +/* structure information to load into the kernel */ +/* getlock(I) - flag indicating whether or not a write lock */ +/* on ipf_nat is already held. */ +/* */ +/* Handle SIOCSTPUT. */ +/* Loads a NAT table entry from user space, including a NAT rule, proxy and */ +/* firewall rule data structures, if pointers to them indicate so. */ +/* ------------------------------------------------------------------------ */ +static int fr_natputent(data, getlock) caddr_t data; +int getlock; { - nat_save_t ipn, *ipnp, *ipnn = NULL; - register nat_t *n, *nat; + nat_save_t ipn, *ipnn; ap_session_t *aps; + nat_t *n, *nat; frentry_t *fr; + fr_info_t fin; ipnat_t *in; - int error; - error = IRCOPY(data, (caddr_t)&ipnp, sizeof(ipnp)); - if (error) - return EFAULT; - error = IRCOPY((caddr_t)ipnp, (caddr_t)&ipn, sizeof(ipn)); - if (error) - return EFAULT; + error = fr_inobj(data, &ipn, IPFOBJ_NATSAVE); + if (error != 0) + return error; + + /* + * Initialise early because of code at junkput label. + */ + in = NULL; + aps = NULL; nat = NULL; - if (ipn.ipn_dsize) { - KMALLOCS(ipnn, nat_save_t *, sizeof(*ipnn) + ipn.ipn_dsize); + ipnn = NULL; + + /* + * New entry, copy in the rest of the NAT entry if it's size is more + * than just the nat_t structure. + */ + fr = NULL; + if (ipn.ipn_dsize > sizeof(ipn)) { + if (ipn.ipn_dsize > 81920) { + error = ENOMEM; + goto junkput; + } + + KMALLOCS(ipnn, nat_save_t *, ipn.ipn_dsize); if (ipnn == NULL) return ENOMEM; - bcopy((char *)&ipn, (char *)ipnn, sizeof(ipn)); - error = IRCOPY((caddr_t)ipnp + offsetof(nat_save_t, ipn_data), - (caddr_t)ipnn->ipn_data, ipn.ipn_dsize); - if (error) { + + error = fr_inobjsz(data, ipnn, IPFOBJ_NATSAVE, ipn.ipn_dsize); + if (error != 0) { error = EFAULT; goto junkput; } } else - ipnn = NULL; + ipnn = &ipn; KMALLOC(nat, nat_t *); if (nat == NULL) { - error = EFAULT; + error = ENOMEM; goto junkput; } - bcopy((char *)&ipn.ipn_nat, (char *)nat, sizeof(*nat)); + bcopy((char *)&ipnn->ipn_nat, (char *)nat, sizeof(*nat)); /* * Initialize all these so that nat_delete() doesn't cause a crash. */ - nat->nat_phnext[0] = NULL; - nat->nat_phnext[1] = NULL; - fr = nat->nat_fr; - nat->nat_fr = NULL; - aps = nat->nat_aps; - nat->nat_aps = NULL; - in = nat->nat_ptr; - nat->nat_ptr = NULL; - nat->nat_hm = NULL; - nat->nat_data = NULL; - nat->nat_ifp = GETUNIT(nat->nat_ifname, 4); + bzero((char *)nat, offsetof(struct nat, nat_tqe)); + nat->nat_tqe.tqe_pnext = NULL; + nat->nat_tqe.tqe_next = NULL; + nat->nat_tqe.tqe_ifq = NULL; + nat->nat_tqe.tqe_parent = nat; /* * Restore the rule associated with this nat session */ - if (in) { + in = ipnn->ipn_nat.nat_ptr; + if (in != NULL) { KMALLOC(in, ipnat_t *); + nat->nat_ptr = in; if (in == NULL) { error = ENOMEM; goto junkput; } - nat->nat_ptr = in; - bcopy((char *)&ipn.ipn_ipnat, (char *)in, sizeof(*in)); + bzero((char *)in, offsetof(struct ipnat, in_next6)); + bcopy((char *)&ipnn->ipn_ipnat, (char *)in, sizeof(*in)); in->in_use = 1; in->in_flags |= IPN_DELETE; - in->in_next = NULL; - in->in_rnext = NULL; - in->in_prnext = NULL; - in->in_mnext = NULL; - in->in_pmnext = NULL; - in->in_ifp = GETUNIT(in->in_ifname, 4); - if (in->in_plabel[0] != '\0') { - in->in_apr = appr_lookup(in->in_p, in->in_plabel); + + ATOMIC_INC(nat_stats.ns_rules); + + nat_resolverule(in); + } + + /* + * Check that the NAT entry doesn't already exist in the kernel. + */ + bzero((char *)&fin, sizeof(fin)); + fin.fin_p = nat->nat_p; + if (nat->nat_dir == NAT_OUTBOUND) { + fin.fin_data[0] = ntohs(nat->nat_oport); + fin.fin_data[1] = ntohs(nat->nat_outport); + fin.fin_ifp = nat->nat_ifps[1]; + if (nat_inlookup(&fin, 0, fin.fin_p, nat->nat_oip, + nat->nat_inip) != NULL) { + error = EEXIST; + goto junkput; + } + } else if (nat->nat_dir == NAT_INBOUND) { + fin.fin_data[0] = ntohs(nat->nat_outport); + fin.fin_data[1] = ntohs(nat->nat_oport); + fin.fin_ifp = nat->nat_ifps[0]; + if (nat_outlookup(&fin, 0, fin.fin_p, nat->nat_outip, + nat->nat_oip) != NULL) { + error = EEXIST; + goto junkput; } + } else { + error = EINVAL; + goto junkput; } /* * Restore ap_session_t structure. Include the private data allocated * if it was there. */ - if (aps) { + aps = nat->nat_aps; + if (aps != NULL) { KMALLOC(aps, ap_session_t *); + nat->nat_aps = aps; if (aps == NULL) { error = ENOMEM; goto junkput; } - nat->nat_aps = aps; - aps->aps_next = ap_sess_list; - ap_sess_list = aps; bcopy(ipnn->ipn_data, (char *)aps, sizeof(*aps)); - if (in) + if (in != NULL) aps->aps_apr = in->in_apr; - if (aps->aps_psiz) { + else + aps->aps_apr = NULL; + if (aps->aps_psiz != 0) { + if (aps->aps_psiz > 81920) { + error = ENOMEM; + goto junkput; + } KMALLOCS(aps->aps_data, void *, aps->aps_psiz); if (aps->aps_data == NULL) { error = ENOMEM; @@ -986,25 +1405,34 @@ caddr_t data; * If there was a filtering rule associated with this entry then * build up a new one. */ + fr = nat->nat_fr; if (fr != NULL) { - if (nat->nat_flags & FI_NEWFR) { + if ((nat->nat_flags & SI_NEWFR) != 0) { KMALLOC(fr, frentry_t *); nat->nat_fr = fr; if (fr == NULL) { error = ENOMEM; goto junkput; } - bcopy((char *)&ipn.ipn_fr, (char *)fr, sizeof(*fr)); - ipn.ipn_nat.nat_fr = fr; - error = IWCOPY((caddr_t)&ipn, ipnp, sizeof(ipn)); - if (error) { - error = EFAULT; - goto junkput; - } + ipnn->ipn_nat.nat_fr = fr; + fr->fr_ref = 1; + (void) fr_outobj(data, ipnn, IPFOBJ_NATSAVE); + bcopy((char *)&ipnn->ipn_fr, (char *)fr, sizeof(*fr)); + MUTEX_NUKE(&fr->fr_lock); + MUTEX_INIT(&fr->fr_lock, "nat-filter rule lock"); } else { + READ_ENTER(&ipf_nat); for (n = nat_instances; n; n = n->nat_next) if (n->nat_fr == fr) break; + + if (n != NULL) { + MUTEX_ENTER(&fr->fr_lock); + fr->fr_ref++; + MUTEX_EXIT(&fr->fr_lock); + } + RWLOCK_EXIT(&ipf_nat); + if (!n) { error = ESRCH; goto junkput; @@ -1012,82 +1440,184 @@ caddr_t data; } } - if (ipnn) - KFREES(ipnn, sizeof(ipn) + ipn.ipn_dsize); - nat_insert(nat); - return 0; + if (ipnn != &ipn) { + KFREES(ipnn, ipn.ipn_dsize); + ipnn = NULL; + } + + if (getlock) { + WRITE_ENTER(&ipf_nat); + } + error = nat_insert(nat, nat->nat_rev); + if ((error == 0) && (aps != NULL)) { + aps->aps_next = ap_sess_list; + ap_sess_list = aps; + } + if (getlock) { + RWLOCK_EXIT(&ipf_nat); + } + + if (error == 0) + return 0; + + error = ENOMEM; + junkput: - if (ipnn) - KFREES(ipnn, sizeof(ipn) + ipn.ipn_dsize); - if (nat) - nat_delete(nat); + if (fr != NULL) + fr_derefrule(&fr); + + if ((ipnn != NULL) && (ipnn != &ipn)) { + KFREES(ipnn, ipn.ipn_dsize); + } + if (nat != NULL) { + if (aps != NULL) { + if (aps->aps_data != NULL) { + KFREES(aps->aps_data, aps->aps_psiz); + } + KFREE(aps); + } + if (in != NULL) { + if (in->in_apr) + appr_free(in->in_apr); + KFREE(in); + } + KFREE(nat); + } return error; } -/* - * Delete a nat entry from the various lists and table. - */ -static void nat_delete(natd) -struct nat *natd; +/* ------------------------------------------------------------------------ */ +/* Function: nat_delete */ +/* Returns: Nil */ +/* Parameters: natd(I) - pointer to NAT structure to delete */ +/* logtype(I) - type of LOG record to create before deleting */ +/* Write Lock: ipf_nat */ +/* */ +/* Delete a nat entry from the various lists and table. If NAT logging is */ +/* enabled then generate a NAT log record for this event. */ +/* ------------------------------------------------------------------------ */ +static void nat_delete(nat, logtype) +struct nat *nat; +int logtype; { struct ipnat *ipn; - if (natd->nat_flags & FI_WILDP) - nat_stats.ns_wilds--; - if (natd->nat_hnext[0]) - natd->nat_hnext[0]->nat_phnext[0] = natd->nat_phnext[0]; - *natd->nat_phnext[0] = natd->nat_hnext[0]; - if (natd->nat_hnext[1]) - natd->nat_hnext[1]->nat_phnext[1] = natd->nat_phnext[1]; - *natd->nat_phnext[1] = natd->nat_hnext[1]; - if (natd->nat_me != NULL) - *natd->nat_me = NULL; + if (logtype != 0 && nat_logging != 0) + nat_log(nat, logtype); + + MUTEX_ENTER(&ipf_nat_new); + + /* + * Take it as a general indication that all the pointers are set if + * nat_pnext is set. + */ + if (nat->nat_pnext != NULL) { + nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--; + nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--; + + *nat->nat_pnext = nat->nat_next; + if (nat->nat_next != NULL) { + nat->nat_next->nat_pnext = nat->nat_pnext; + nat->nat_next = NULL; + } + nat->nat_pnext = NULL; + + *nat->nat_phnext[0] = nat->nat_hnext[0]; + if (nat->nat_hnext[0] != NULL) { + nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0]; + nat->nat_hnext[0] = NULL; + } + nat->nat_phnext[0] = NULL; + + *nat->nat_phnext[1] = nat->nat_hnext[1]; + if (nat->nat_hnext[1] != NULL) { + nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1]; + nat->nat_hnext[1] = NULL; + } + nat->nat_phnext[1] = NULL; + + if ((nat->nat_flags & SI_WILDP) != 0) + nat_stats.ns_wilds--; + } - if (natd->nat_fr != NULL) { - ATOMIC_DEC32(natd->nat_fr->fr_ref); + if (nat->nat_me != NULL) { + *nat->nat_me = NULL; + nat->nat_me = NULL; } - if (natd->nat_hm != NULL) - nat_hostmapdel(natd->nat_hm); + fr_deletequeueentry(&nat->nat_tqe); + + nat->nat_ref--; + if (nat->nat_ref > 0) { + MUTEX_EXIT(&ipf_nat_new); + return; + } + +#ifdef IPFILTER_SYNC + if (nat->nat_sync) + ipfsync_del(nat->nat_sync); +#endif + + if (nat->nat_fr != NULL) + (void)fr_derefrule(&nat->nat_fr); + + if (nat->nat_hm != NULL) + nat_hostmapdel(nat->nat_hm); /* * If there is an active reference from the nat entry to its parent * rule, decrement the rule's reference count and free it too if no * longer being used. */ - ipn = natd->nat_ptr; + ipn = nat->nat_ptr; if (ipn != NULL) { ipn->in_space++; ipn->in_use--; - if (!ipn->in_use && (ipn->in_flags & IPN_DELETE)) { + if (ipn->in_use == 0 && (ipn->in_flags & IPN_DELETE)) { if (ipn->in_apr) appr_free(ipn->in_apr); KFREE(ipn); nat_stats.ns_rules--; +#if SOLARIS + if (nat_stats.ns_rules == 0) + pfil_delayed_copy = 1; +#endif } } - MUTEX_DESTROY(&natd->nat_lock); + MUTEX_DESTROY(&nat->nat_lock); + + aps_free(nat->nat_aps); + nat_stats.ns_inuse--; + MUTEX_EXIT(&ipf_nat_new); + /* * If there's a fragment table entry too for this nat entry, then - * dereference that as well. + * dereference that as well. This is after nat_lock is released + * because of Tru64. */ - ipfr_forgetnat((void *)natd); - aps_free(natd->nat_aps); - nat_stats.ns_inuse--; - KFREE(natd); + fr_forgetnat((void *)nat); + + KFREE(nat); } +/* ------------------------------------------------------------------------ */ +/* Function: nat_flushtable */ +/* Returns: int - number of NAT rules deleted */ +/* Parameters: Nil */ +/* */ +/* Deletes all currently active NAT sessions. In deleting each NAT entry a */ +/* log record should be emitted in nat_delete() if NAT logging is enabled. */ +/* ------------------------------------------------------------------------ */ /* * nat_flushtable - clear the NAT table of all mapping entries. - * (this is for the dynamic mappings) */ static int nat_flushtable() { - register nat_t *nat, **natp; - register int j = 0; + nat_t *nat; + int j = 0; /* * ALL NAT mappings deleted, so lets just make the deletions @@ -1100,26 +1630,28 @@ static int nat_flushtable() bzero((char *)nat_table[1], sizeof(nat_table[1]) * ipf_nattable_sz); - for (natp = &nat_instances; (nat = *natp); ) { - *natp = nat->nat_next; -#ifdef IPFILTER_LOG - nat_log(nat, NL_FLUSH); -#endif - nat_delete(nat); + while ((nat = nat_instances) != NULL) { + nat_delete(nat, NL_FLUSH); j++; } + nat_stats.ns_inuse = 0; return j; } -/* - * nat_clearlist - delete all rules in the active NAT mapping list. - * (this is for NAT/RDR rules) - */ -int nat_clearlist() +/* ------------------------------------------------------------------------ */ +/* Function: nat_clearlist */ +/* Returns: int - number of NAT/RDR rules deleted */ +/* Parameters: Nil */ +/* */ +/* Delete all rules in the current list of rules. There is nothing elegant */ +/* about this cleanup: simply free all entries on the list of rules and */ +/* clear out the tables used for hashed NAT rule lookups. */ +/* ------------------------------------------------------------------------ */ +static int nat_clearlist() { - register ipnat_t *n, **np = &nat_list; + ipnat_t *n, **np = &nat_list; int i = 0; if (nat_rules != NULL) @@ -1127,10 +1659,10 @@ int nat_clearlist() if (rdr_rules != NULL) bzero((char *)rdr_rules, sizeof(*rdr_rules) * ipf_rdrrules_sz); - while ((n = *np)) { + while ((n = *np) != NULL) { *np = n->in_next; - if (!n->in_use) { - if (n->in_apr) + if (n->in_use == 0) { + if (n->in_apr != NULL) appr_free(n->in_apr); KFREE(n); nat_stats.ns_rules--; @@ -1140,35 +1672,481 @@ int nat_clearlist() } i++; } +#if SOLARIS + pfil_delayed_copy = 1; +#endif nat_masks = 0; rdr_masks = 0; return i; } -/* - * Create a new NAT table entry. - * NOTE: Assumes write lock on ipf_nat has been obtained already. - * If you intend on changing this, beware: appr_new() may call nat_new() - * recursively! - */ -nat_t *nat_new(fin, ip, np, natsave, flags, direction) +/* ------------------------------------------------------------------------ */ +/* Function: nat_newmap */ +/* Returns: int - -1 == error, 0 == success */ +/* Parameters: fin(I) - pointer to packet information */ +/* nat(I) - pointer to NAT entry */ +/* ni(I) - pointer to structure with misc. information needed */ +/* to create new NAT entry. */ +/* */ +/* Given an empty NAT structure, populate it with new information about a */ +/* new NAT session, as defined by the matching NAT rule. */ +/* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/ +/* to the new IP address for the translation. */ +/* ------------------------------------------------------------------------ */ +static INLINE int nat_newmap(fin, nat, ni) +fr_info_t *fin; +nat_t *nat; +natinfo_t *ni; +{ + u_short st_port, dport, sport, port, sp, dp; + struct in_addr in, inb; + hostmap_t *hm; + u_32_t flags; + u_32_t st_ip; + ipnat_t *np; + nat_t *natl; + int l; + + /* + * If it's an outbound packet which doesn't match any existing + * record, then create a new port + */ + l = 0; + hm = NULL; + np = ni->nai_np; + st_ip = np->in_nip; + st_port = np->in_pnext; + flags = ni->nai_flags; + sport = ni->nai_sport; + dport = ni->nai_dport; + + /* + * Do a loop until we either run out of entries to try or we find + * a NAT mapping that isn't currently being used. This is done + * because the change to the source is not (usually) being fixed. + */ + do { + port = 0; + in.s_addr = htonl(np->in_nip); + if (l == 0) { + /* + * Check to see if there is an existing NAT + * setup for this IP address pair. + */ + hm = nat_hostmap(np, fin->fin_src, fin->fin_dst, + in, 0); + if (hm != NULL) + in.s_addr = hm->hm_mapip.s_addr; + } else if ((l == 1) && (hm != NULL)) { + nat_hostmapdel(hm); + hm = NULL; + } + in.s_addr = ntohl(in.s_addr); + + nat->nat_hm = hm; + + if ((np->in_outmsk == 0xffffffff) && (np->in_pnext == 0)) { + if (l > 0) + return -1; + } + + if (np->in_redir == NAT_BIMAP && + np->in_inmsk == np->in_outmsk) { + /* + * map the address block in a 1:1 fashion + */ + in.s_addr = np->in_outip; + in.s_addr |= fin->fin_saddr & ~np->in_inmsk; + in.s_addr = ntohl(in.s_addr); + + } else if (np->in_redir & NAT_MAPBLK) { + if ((l >= np->in_ppip) || ((l > 0) && + !(flags & IPN_TCPUDP))) + return -1; + /* + * map-block - Calculate destination address. + */ + in.s_addr = ntohl(fin->fin_saddr); + in.s_addr &= ntohl(~np->in_inmsk); + inb.s_addr = in.s_addr; + in.s_addr /= np->in_ippip; + in.s_addr &= ntohl(~np->in_outmsk); + in.s_addr += ntohl(np->in_outip); + /* + * Calculate destination port. + */ + if ((flags & IPN_TCPUDP) && + (np->in_ppip != 0)) { + port = ntohs(sport) + l; + port %= np->in_ppip; + port += np->in_ppip * + (inb.s_addr % np->in_ippip); + port += MAPBLK_MINPORT; + port = htons(port); + } + + } else if ((np->in_outip == 0) && + (np->in_outmsk == 0xffffffff)) { + /* + * 0/32 - use the interface's IP address. + */ + if ((l > 0) || + fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp, + &in, NULL) == -1) + return -1; + in.s_addr = ntohl(in.s_addr); + + } else if ((np->in_outip == 0) && (np->in_outmsk == 0)) { + /* + * 0/0 - use the original source address/port. + */ + if (l > 0) + return -1; + in.s_addr = ntohl(fin->fin_saddr); + + } else if ((np->in_outmsk != 0xffffffff) && + (np->in_pnext == 0) && ((l > 0) || (hm == NULL))) + np->in_nip++; + + natl = NULL; + + if ((flags & IPN_TCPUDP) && + ((np->in_redir & NAT_MAPBLK) == 0) && + (np->in_flags & IPN_AUTOPORTMAP)) { + /* + * "ports auto" (without map-block) + */ + if ((l > 0) && (l % np->in_ppip == 0)) { + if (l > np->in_space) { + return -1; + } else if ((l > np->in_ppip) && + np->in_outmsk != 0xffffffff) + np->in_nip++; + } + if (np->in_ppip != 0) { + port = ntohs(sport); + port += (l % np->in_ppip); + port %= np->in_ppip; + port += np->in_ppip * + (ntohl(fin->fin_saddr) % + np->in_ippip); + port += MAPBLK_MINPORT; + port = htons(port); + } + + } else if (((np->in_redir & NAT_MAPBLK) == 0) && + (flags & IPN_TCPUDPICMP) && (np->in_pnext != 0)) { + /* + * Standard port translation. Select next port. + */ + port = htons(np->in_pnext++); + + if (np->in_pnext > ntohs(np->in_pmax)) { + np->in_pnext = ntohs(np->in_pmin); + if (np->in_outmsk != 0xffffffff) + np->in_nip++; + } + } + + if (np->in_flags & IPN_IPRANGE) { + if (np->in_nip > ntohl(np->in_outmsk)) + np->in_nip = ntohl(np->in_outip); + } else { + if ((np->in_outmsk != 0xffffffff) && + ((np->in_nip + 1) & ntohl(np->in_outmsk)) > + ntohl(np->in_outip)) + np->in_nip = ntohl(np->in_outip) + 1; + } + + if ((port == 0) && (flags & (IPN_TCPUDPICMP|IPN_ICMPQUERY))) + port = sport; + + /* + * Here we do a lookup of the connection as seen from + * the outside. If an IP# pair already exists, try + * again. So if you have A->B becomes C->B, you can + * also have D->E become C->E but not D->B causing + * another C->B. Also take protocol and ports into + * account when determining whether a pre-existing + * NAT setup will cause an external conflict where + * this is appropriate. + */ + inb.s_addr = htonl(in.s_addr); + sp = fin->fin_data[0]; + dp = fin->fin_data[1]; + fin->fin_data[0] = fin->fin_data[1]; + fin->fin_data[1] = htons(port); + natl = nat_inlookup(fin, flags & ~(SI_WILDP|NAT_SEARCH), + (u_int)fin->fin_p, fin->fin_dst, inb); + fin->fin_data[0] = sp; + fin->fin_data[1] = dp; + + /* + * Has the search wrapped around and come back to the + * start ? + */ + if ((natl != NULL) && + (np->in_pnext != 0) && (st_port == np->in_pnext) && + (np->in_nip != 0) && (st_ip == np->in_nip)) + return -1; + l++; + } while (natl != NULL); + + if (np->in_space > 0) + np->in_space--; + + /* Setup the NAT table */ + nat->nat_inip = fin->fin_src; + nat->nat_outip.s_addr = htonl(in.s_addr); + nat->nat_oip = fin->fin_dst; + if (nat->nat_hm == NULL) + nat->nat_hm = nat_hostmap(np, fin->fin_src, fin->fin_dst, + nat->nat_outip, 0); + + /* + * The ICMP checksum does not have a pseudo header containing + * the IP addresses + */ + ni->nai_sum1 = LONG_SUM(ntohl(fin->fin_saddr)); + ni->nai_sum2 = LONG_SUM(in.s_addr); + if ((flags & IPN_TCPUDP)) { + ni->nai_sum1 += ntohs(sport); + ni->nai_sum2 += ntohs(port); + } + + if (flags & IPN_TCPUDP) { + nat->nat_inport = sport; + nat->nat_outport = port; /* sport */ + nat->nat_oport = dport; + ((tcphdr_t *)fin->fin_dp)->th_sport = port; + } else if (flags & IPN_ICMPQUERY) { + ((icmphdr_t *)fin->fin_dp)->icmp_id = port; + nat->nat_inport = port; + nat->nat_outport = port; + } else if (fin->fin_p == IPPROTO_GRE) { +#if 0 + nat->nat_gre.gs_flags = ((grehdr_t *)fin->fin_dp)->gr_flags; + if (GRE_REV(nat->nat_gre.gs_flags) == 1) { + nat->nat_oport = 0;/*fin->fin_data[1];*/ + nat->nat_inport = 0;/*fin->fin_data[0];*/ + nat->nat_outport = 0;/*fin->fin_data[0];*/ + nat->nat_call[0] = fin->fin_data[0]; + nat->nat_call[1] = fin->fin_data[0]; + } +#endif + } + ni->nai_ip.s_addr = in.s_addr; + ni->nai_port = port; + ni->nai_nport = dport; + return 0; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: nat_newrdr */ +/* Returns: int - -1 == error, 0 == success (no move), 1 == success and */ +/* allow rule to be moved if IPN_ROUNDR is set. */ +/* Parameters: fin(I) - pointer to packet information */ +/* nat(I) - pointer to NAT entry */ +/* ni(I) - pointer to structure with misc. information needed */ +/* to create new NAT entry. */ +/* */ +/* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/ +/* to the new IP address for the translation. */ +/* ------------------------------------------------------------------------ */ +static INLINE int nat_newrdr(fin, nat, ni) +fr_info_t *fin; +nat_t *nat; +natinfo_t *ni; +{ + u_short nport, dport, sport; + struct in_addr in; + hostmap_t *hm; + u_32_t flags; + ipnat_t *np; + int move; + + move = 1; + hm = NULL; + in.s_addr = 0; + np = ni->nai_np; + flags = ni->nai_flags; + sport = ni->nai_sport; + dport = ni->nai_dport; + + /* + * If the matching rule has IPN_STICKY set, then we want to have the + * same rule kick in as before. Why would this happen? If you have + * a collection of rdr rules with "round-robin sticky", the current + * packet might match a different one to the previous connection but + * we want the same destination to be used. + */ + if ((np->in_flags & (IPN_ROUNDR|IPN_STICKY)) == + (IPN_ROUNDR|IPN_STICKY)) { + hm = nat_hostmap(NULL, fin->fin_src, fin->fin_dst, in, + (u_32_t)dport); + if (hm != NULL) { + in.s_addr = ntohl(hm->hm_mapip.s_addr); + np = hm->hm_ipnat; + ni->nai_np = np; + move = 0; + } + } + + /* + * Otherwise, it's an inbound packet. Most likely, we don't + * want to rewrite source ports and source addresses. Instead, + * we want to rewrite to a fixed internal address and fixed + * internal port. + */ + if (np->in_flags & IPN_SPLIT) { + in.s_addr = np->in_nip; + + if ((np->in_flags & (IPN_ROUNDR|IPN_STICKY)) == IPN_STICKY) { + hm = nat_hostmap(np, fin->fin_src, fin->fin_dst, + in, (u_32_t)dport); + if (hm != NULL) { + in.s_addr = hm->hm_mapip.s_addr; + move = 0; + } + } + + if (hm == NULL || hm->hm_ref == 1) { + if (np->in_inip == htonl(in.s_addr)) { + np->in_nip = ntohl(np->in_inmsk); + move = 0; + } else { + np->in_nip = ntohl(np->in_inip); + } + } + + } else if ((np->in_inip == 0) && (np->in_inmsk == 0xffffffff)) { + /* + * 0/32 - use the interface's IP address. + */ + if (fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp, &in, NULL) == -1) + return -1; + in.s_addr = ntohl(in.s_addr); + + } else if ((np->in_inip == 0) && (np->in_inmsk== 0)) { + /* + * 0/0 - use the original destination address/port. + */ + in.s_addr = ntohl(fin->fin_daddr); + + } else if (np->in_redir == NAT_BIMAP && + np->in_inmsk == np->in_outmsk) { + /* + * map the address block in a 1:1 fashion + */ + in.s_addr = np->in_inip; + in.s_addr |= fin->fin_daddr & ~np->in_inmsk; + in.s_addr = ntohl(in.s_addr); + } else { + in.s_addr = ntohl(np->in_inip); + } + + if ((np->in_pnext == 0) || ((flags & NAT_NOTRULEPORT) != 0)) + nport = dport; + else { + /* + * Whilst not optimized for the case where + * pmin == pmax, the gain is not significant. + */ + if (((np->in_flags & IPN_FIXEDDPORT) == 0) && + (np->in_pmin != np->in_pmax)) { + nport = ntohs(dport) - ntohs(np->in_pmin) + + ntohs(np->in_pnext); + nport = htons(nport); + } else + nport = np->in_pnext; + } + + /* + * When the redirect-to address is set to 0.0.0.0, just + * assume a blank `forwarding' of the packet. We don't + * setup any translation for this either. + */ + if (in.s_addr == 0) { + if (nport == dport) + return -1; + in.s_addr = ntohl(fin->fin_daddr); + } + + nat->nat_inip.s_addr = htonl(in.s_addr); + nat->nat_outip = fin->fin_dst; + nat->nat_oip = fin->fin_src; + + ni->nai_sum1 = LONG_SUM(ntohl(fin->fin_daddr)) + ntohs(dport); + ni->nai_sum2 = LONG_SUM(in.s_addr) + ntohs(nport); + + ni->nai_ip.s_addr = in.s_addr; + ni->nai_nport = nport; + ni->nai_port = sport; + + if (flags & IPN_TCPUDP) { + nat->nat_inport = nport; + nat->nat_outport = dport; + nat->nat_oport = sport; + ((tcphdr_t *)fin->fin_dp)->th_dport = nport; + } else if (flags & IPN_ICMPQUERY) { + ((icmphdr_t *)fin->fin_dp)->icmp_id = nport; + nat->nat_inport = nport; + nat->nat_outport = nport; + } else if (fin->fin_p == IPPROTO_GRE) { +#if 0 + nat->nat_gre.gs_flags = ((grehdr_t *)fin->fin_dp)->gr_flags; + if (GRE_REV(nat->nat_gre.gs_flags) == 1) { + nat->nat_call[0] = fin->fin_data[0]; + nat->nat_call[1] = fin->fin_data[1]; + nat->nat_oport = 0; /*fin->fin_data[0];*/ + nat->nat_inport = 0; /*fin->fin_data[1];*/ + nat->nat_outport = 0; /*fin->fin_data[1];*/ + } +#endif + } + + return move; +} + +/* ------------------------------------------------------------------------ */ +/* Function: nat_new */ +/* Returns: nat_t* - NULL == failure to create new NAT structure, */ +/* else pointer to new NAT structure */ +/* Parameters: fin(I) - pointer to packet information */ +/* np(I) - pointer to NAT rule */ +/* natsave(I) - pointer to where to store NAT struct pointer */ +/* flags(I) - flags describing the current packet */ +/* direction(I) - direction of packet (in/out) */ +/* Write Lock: ipf_nat */ +/* */ +/* Attempts to create a new NAT entry. Does not actually change the packet */ +/* in any way. */ +/* */ +/* This fucntion is in three main parts: (1) deal with creating a new NAT */ +/* structure for a "MAP" rule (outgoing NAT translation); (2) deal with */ +/* creating a new NAT structure for a "RDR" rule (incoming NAT translation) */ +/* and (3) building that structure and putting it into the NAT table(s). */ +/* ------------------------------------------------------------------------ */ +nat_t *nat_new(fin, np, natsave, flags, direction) fr_info_t *fin; -ip_t *ip; ipnat_t *np; nat_t **natsave; u_int flags; int direction; { - register u_32_t sum1, sum2, sumd, l; u_short port = 0, sport = 0, dport = 0, nport = 0; - struct in_addr in, inb; - u_short nflags, sp, dp; tcphdr_t *tcp = NULL; hostmap_t *hm = NULL; + struct in_addr in; nat_t *nat, *natl; -#if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6) - qif_t *qf = fin->fin_qif; + u_int nflags; + natinfo_t ni; + u_32_t sumd; + int move; +#if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6) && defined(ICK_M_CTL_MAGIC) + qpktinfo_t *qpi = fin->fin_qpi; #endif if (nat_stats.ns_inuse >= ipf_nattable_max) { @@ -1176,12 +2154,13 @@ int direction; return NULL; } - nflags = flags & np->in_flags; - if (flags & IPN_TCPUDP) { - tcp = (tcphdr_t *)fin->fin_dp; - sport = htons(fin->fin_data[0]); - dport = htons(fin->fin_data[1]); - } + move = 1; + nflags = np->in_flags & flags; + nflags &= NAT_FROMRULE; + + ni.nai_np = np; + ni.nai_nflags = nflags; + ni.nai_flags = flags; /* Give me a new nat */ KMALLOC(nat, nat_t *); @@ -1201,352 +2180,237 @@ int direction; return NULL; } + if (flags & IPN_TCPUDP) { + tcp = fin->fin_dp; + ni.nai_sport = htons(fin->fin_sport); + ni.nai_dport = htons(fin->fin_dport); + } else if (flags & IPN_ICMPQUERY) { + /* + * In the ICMP query NAT code, we translate the ICMP id fields + * to make them unique. This is indepedent of the ICMP type + * (e.g. in the unlikely event that a host sends an echo and + * an tstamp request with the same id, both packets will have + * their ip address/id field changed in the same way). + */ + /* The icmp_id field is used by the sender to identify the + * process making the icmp request. (the receiver justs + * copies it back in its response). So, it closely matches + * the concept of source port. We overlay sport, so we can + * maximally reuse the existing code. + */ + ni.nai_sport = ((icmphdr_t *)fin->fin_dp)->icmp_id; + ni.nai_dport = ni.nai_sport; + } + bzero((char *)nat, sizeof(*nat)); nat->nat_flags = flags; - if (flags & FI_WILDP) - nat_stats.ns_wilds++; + + if ((flags & NAT_SLAVE) == 0) { + MUTEX_ENTER(&ipf_nat_new); + } + /* * Search the current table for a match. */ if (direction == NAT_OUTBOUND) { /* - * Values at which the search for a free resouce starts. - */ - u_32_t st_ip; - u_short st_port; - - /* - * If it's an outbound packet which doesn't match any existing - * record, then create a new port + * We can now arrange to call this for the same connection + * because ipf_nat_new doesn't protect the code path into + * this function. */ - l = 0; - st_ip = np->in_nip; - st_port = np->in_pnext; - - do { - port = 0; - in.s_addr = htonl(np->in_nip); - if (l == 0) { - /* - * Check to see if there is an existing NAT - * setup for this IP address pair. - */ - hm = nat_hostmap(np, fin->fin_src, in); - if (hm != NULL) - in.s_addr = hm->hm_mapip.s_addr; - } else if ((l == 1) && (hm != NULL)) { - nat_hostmapdel(hm); - hm = NULL; - } - in.s_addr = ntohl(in.s_addr); - - nat->nat_hm = hm; - - if ((np->in_outmsk == 0xffffffff) && - (np->in_pnext == 0)) { - if (l > 0) - goto badnat; - } - - if (np->in_redir & NAT_MAPBLK) { - if ((l >= np->in_ppip) || ((l > 0) && - !(flags & IPN_TCPUDP))) - goto badnat; - /* - * map-block - Calculate destination address. - */ - in.s_addr = ntohl(fin->fin_saddr); - in.s_addr &= ntohl(~np->in_inmsk); - inb.s_addr = in.s_addr; - in.s_addr /= np->in_ippip; - in.s_addr &= ntohl(~np->in_outmsk); - in.s_addr += ntohl(np->in_outip); - /* - * Calculate destination port. - */ - if ((flags & IPN_TCPUDP) && - (np->in_ppip != 0)) { - port = ntohs(sport) + l; - port %= np->in_ppip; - port += np->in_ppip * - (inb.s_addr % np->in_ippip); - port += MAPBLK_MINPORT; - port = htons(port); - } - } else if (!np->in_outip && - (np->in_outmsk == 0xffffffff)) { - /* - * 0/32 - use the interface's IP address. - */ - if ((l > 0) || - fr_ifpaddr(4, fin->fin_ifp, &in) == -1) - goto badnat; - in.s_addr = ntohl(in.s_addr); - } else if (!np->in_outip && !np->in_outmsk) { - /* - * 0/0 - use the original source address/port. - */ - if (l > 0) - goto badnat; - in.s_addr = ntohl(fin->fin_saddr); - } else if ((np->in_outmsk != 0xffffffff) && - (np->in_pnext == 0) && - ((l > 0) || (hm == NULL))) - np->in_nip++; - natl = NULL; - - if ((nflags & IPN_TCPUDP) && - ((np->in_redir & NAT_MAPBLK) == 0) && - (np->in_flags & IPN_AUTOPORTMAP)) { - if ((l > 0) && (l % np->in_ppip == 0)) { - if (l > np->in_space) { - goto badnat; - } else if ((l > np->in_ppip) && - np->in_outmsk != 0xffffffff) - np->in_nip++; - } - if (np->in_ppip != 0) { - port = ntohs(sport); - port += (l % np->in_ppip); - port %= np->in_ppip; - port += np->in_ppip * - (ntohl(fin->fin_saddr) % - np->in_ippip); - port += MAPBLK_MINPORT; - port = htons(port); - } - } else if (((np->in_redir & NAT_MAPBLK) == 0) && - (nflags & IPN_TCPUDP) && - (np->in_pnext != 0)) { - port = htons(np->in_pnext++); - if (np->in_pnext > ntohs(np->in_pmax)) { - np->in_pnext = ntohs(np->in_pmin); - if (np->in_outmsk != 0xffffffff) - np->in_nip++; - } - } - - if (np->in_flags & IPN_IPRANGE) { - if (np->in_nip > ntohl(np->in_outmsk)) - np->in_nip = ntohl(np->in_outip); - } else { - if ((np->in_outmsk != 0xffffffff) && - ((np->in_nip + 1) & ntohl(np->in_outmsk)) > - ntohl(np->in_outip)) - np->in_nip = ntohl(np->in_outip) + 1; - } - - if (!port && (flags & IPN_TCPUDP)) - port = sport; + natl = nat_outlookup(fin, nflags, (u_int)fin->fin_p, + fin->fin_src, fin->fin_dst); + if (natl != NULL) { + nat = natl; + goto done; + } - /* - * Here we do a lookup of the connection as seen from - * the outside. If an IP# pair already exists, try - * again. So if you have A->B becomes C->B, you can - * also have D->E become C->E but not D->B causing - * another C->B. Also take protocol and ports into - * account when determining whether a pre-existing - * NAT setup will cause an external conflict where - * this is appropriate. - */ - inb.s_addr = htonl(in.s_addr); - sp = fin->fin_data[0]; - dp = fin->fin_data[1]; - fin->fin_data[0] = fin->fin_data[1]; - fin->fin_data[1] = htons(port); - natl = nat_inlookup(fin, flags & ~FI_WILDP, - (u_int)fin->fin_p, fin->fin_dst, - inb, 1); - fin->fin_data[0] = sp; - fin->fin_data[1] = dp; + move = nat_newmap(fin, nat, &ni); + if (move == -1) + goto badnat; - /* - * Has the search wrapped around and come back to the - * start ? - */ - if ((natl != NULL) && - (np->in_pnext != 0) && (st_port == np->in_pnext) && - (np->in_nip != 0) && (st_ip == np->in_nip)) - goto badnat; - l++; - } while (natl != NULL); - - if (np->in_space > 0) - np->in_space--; - - /* Setup the NAT table */ - nat->nat_inip = fin->fin_src; - nat->nat_outip.s_addr = htonl(in.s_addr); - nat->nat_oip = fin->fin_dst; - if (nat->nat_hm == NULL) - nat->nat_hm = nat_hostmap(np, fin->fin_src, - nat->nat_outip); - - sum1 = LONG_SUM(ntohl(fin->fin_saddr)) + ntohs(sport); - sum2 = LONG_SUM(in.s_addr) + ntohs(port); - - if (flags & IPN_TCPUDP) { - nat->nat_inport = sport; - nat->nat_outport = port; /* sport */ - nat->nat_oport = dport; - } + np = ni.nai_np; + in = ni.nai_ip; } else { /* - * Otherwise, it's an inbound packet. Most likely, we don't - * want to rewrite source ports and source addresses. Instead, - * we want to rewrite to a fixed internal address and fixed - * internal port. + * NAT_INBOUND is used only for redirects rules */ - if (np->in_flags & IPN_SPLIT) { - in.s_addr = np->in_nip; - if (np->in_inip == htonl(in.s_addr)) - np->in_nip = ntohl(np->in_inmsk); - else { - np->in_nip = ntohl(np->in_inip); - if (np->in_flags & IPN_ROUNDR) { - nat_delrdr(np); - nat_addrdr(np); - } - } - } else { - in.s_addr = ntohl(np->in_inip); - if (np->in_flags & IPN_ROUNDR) { - nat_delrdr(np); - nat_addrdr(np); - } - } - if (!np->in_pnext) - nport = dport; - else { - /* - * Whilst not optimized for the case where - * pmin == pmax, the gain is not significant. - */ - if (np->in_pmin != np->in_pmax) { - nport = ntohs(dport) - ntohs(np->in_pmin) + - ntohs(np->in_pnext); - nport = ntohs(nport); - } else - nport = np->in_pnext; + natl = nat_inlookup(fin, nflags, (u_int)fin->fin_p, + fin->fin_src, fin->fin_dst); + if (natl != NULL) { + nat = natl; + goto done; } - /* - * When the redirect-to address is set to 0.0.0.0, just - * assume a blank `forwarding' of the packet. - */ - if (in.s_addr == 0) - in.s_addr = ntohl(fin->fin_daddr); - - nat->nat_inip.s_addr = htonl(in.s_addr); - nat->nat_outip = fin->fin_dst; - nat->nat_oip = fin->fin_src; + move = nat_newrdr(fin, nat, &ni); + if (move == -1) + goto badnat; - sum1 = LONG_SUM(ntohl(fin->fin_daddr)) + ntohs(dport); - sum2 = LONG_SUM(in.s_addr) + ntohs(nport); - - if (flags & IPN_TCPUDP) { - nat->nat_inport = nport; - nat->nat_outport = dport; - nat->nat_oport = sport; + np = ni.nai_np; + in = ni.nai_ip; + } + port = ni.nai_port; + nport = ni.nai_nport; + + if ((move == 1) && (np->in_flags & IPN_ROUNDR)) { + if (np->in_redir == NAT_REDIRECT) { + nat_delrdr(np); + nat_addrdr(np); + } else if (np->in_redir == NAT_MAP) { + nat_delnat(np); + nat_addnat(np); } } - CALC_SUMD(sum1, sum2, sumd); + if (flags & IPN_TCPUDP) { + sport = ni.nai_sport; + dport = ni.nai_dport; + } else if (flags & IPN_ICMPQUERY) { + sport = ni.nai_sport; + dport = 0; + } + + CALC_SUMD(ni.nai_sum1, ni.nai_sum2, sumd); nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16); -#if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6) +#if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6) && defined(ICK_M_CTL_MAGIC) if ((flags & IPN_TCP) && dohwcksum && - (qf->qf_ill->ill_ick.ick_magic == ICK_M_CTL_MAGIC)) { + (((ill_t *)qpi->qpi_ill)->ill_ick.ick_magic == ICK_M_CTL_MAGIC)) { if (direction == NAT_OUTBOUND) - sum1 = LONG_SUM(ntohl(in.s_addr)); + ni.nai_sum1 = LONG_SUM(in.s_addr); else - sum1 = LONG_SUM(ntohl(fin->fin_saddr)); - sum1 += LONG_SUM(ntohl(fin->fin_daddr)); - sum1 += IPPROTO_TCP; - sum1 = (sum1 & 0xffff) + (sum1 >> 16); - nat->nat_sumd[1] = NAT_HW_CKSUM|(sum1 & 0xffff); + ni.nai_sum1 = LONG_SUM(ntohl(fin->fin_saddr)); + ni.nai_sum1 += LONG_SUM(ntohl(fin->fin_daddr)); + ni.nai_sum1 += 30; + ni.nai_sum1 = (ni.nai_sum1 & 0xffff) + (ni.nai_sum1 >> 16); + nat->nat_sumd[1] = NAT_HW_CKSUM|(ni.nai_sum1 & 0xffff); } else #endif nat->nat_sumd[1] = nat->nat_sumd[0]; - if ((flags & IPN_TCPUDP) && ((sport != port) || (dport != nport))) { + if ((flags & IPN_TCPUDPICMP) && ((sport != port) || (dport != nport))) { if (direction == NAT_OUTBOUND) - sum1 = LONG_SUM(ntohl(fin->fin_saddr)); + ni.nai_sum1 = LONG_SUM(ntohl(fin->fin_saddr)); else - sum1 = LONG_SUM(ntohl(fin->fin_daddr)); + ni.nai_sum1 = LONG_SUM(ntohl(fin->fin_daddr)); - sum2 = LONG_SUM(in.s_addr); + ni.nai_sum2 = LONG_SUM(in.s_addr); - CALC_SUMD(sum1, sum2, sumd); + CALC_SUMD(ni.nai_sum1, ni.nai_sum2, sumd); nat->nat_ipsumd = (sumd & 0xffff) + (sumd >> 16); - } else + } else { nat->nat_ipsumd = nat->nat_sumd[0]; + if (!(flags & IPN_TCPUDPICMP)) { + nat->nat_sumd[0] = 0; + nat->nat_sumd[1] = 0; + } + } + + if (nat_finalise(fin, nat, &ni, tcp, natsave, direction) == -1) { + goto badnat; + } + if (flags & SI_WILDP) + nat_stats.ns_wilds++; + goto done; +badnat: + nat_stats.ns_badnat++; + if ((hm = nat->nat_hm) != NULL) + nat_hostmapdel(hm); + KFREE(nat); + nat = NULL; +done: + if ((flags & NAT_SLAVE) == 0) { + MUTEX_EXIT(&ipf_nat_new); + } + return nat; +} - in.s_addr = htonl(in.s_addr); - strncpy(nat->nat_ifname, IFNAME(fin->fin_ifp), IFNAMSIZ); +/* ------------------------------------------------------------------------ */ +/* Function: nat_finalise */ +/* Returns: int - 0 == sucess, -1 == failure */ +/* Parameters: fin(I) - pointer to packet information */ +/* nat(I) - pointer to NAT entry */ +/* ni(I) - pointer to structure with misc. information needed */ +/* to create new NAT entry. */ +/* Write Lock: ipf_nat */ +/* */ +/* This is the tail end of constructing a new NAT entry and is the same */ +/* for both IPv4 and IPv6. */ +/* ------------------------------------------------------------------------ */ +/*ARGSUSED*/ +static INLINE int nat_finalise(fin, nat, ni, tcp, natsave, direction) +fr_info_t *fin; +nat_t *nat; +natinfo_t *ni; +tcphdr_t *tcp; +nat_t **natsave; +int direction; +{ + frentry_t *fr; + ipnat_t *np; + + np = ni->nai_np; + + COPYIFNAME(fin->fin_ifp, nat->nat_ifnames[0]); +#ifdef IPFILTER_SYNC + if ((nat->nat_flags & SI_CLONE) == 0) + nat->nat_sync = ipfsync_new(SMC_NAT, fin, nat); +#endif nat->nat_me = natsave; nat->nat_dir = direction; - nat->nat_ifp = fin->fin_ifp; + nat->nat_ifps[0] = fin->fin_ifp; nat->nat_ptr = np; nat->nat_p = fin->fin_p; - nat->nat_bytes = 0; - nat->nat_pkts = 0; nat->nat_mssclamp = np->in_mssclamp; - nat->nat_fr = fin->fin_fr; - if (nat->nat_fr != NULL) { - ATOMIC_INC32(nat->nat_fr->fr_ref); - } - if (direction == NAT_OUTBOUND) { - if (flags & IPN_TCPUDP) - tcp->th_sport = port; - } else { - if (flags & IPN_TCPUDP) - tcp->th_dport = nport; + fr = fin->fin_fr; + nat->nat_fr = fr; + + if ((np->in_apr != NULL) && ((ni->nai_flags & NAT_SLAVE) == 0)) + if (appr_new(fin, nat) == -1) + return -1; + + if (nat_insert(nat, fin->fin_rev) == 0) { + if (nat_logging) + nat_log(nat, (u_int)np->in_redir); + np->in_use++; + if (fr != NULL) { + MUTEX_ENTER(&fr->fr_lock); + fr->fr_ref++; + MUTEX_EXIT(&fr->fr_lock); + } + return 0; } - nat_insert(nat); - - if ((np->in_apr != NULL) && (np->in_dport == 0 || - (tcp != NULL && dport == np->in_dport))) - (void) appr_new(fin, ip, nat); - - np->in_use++; -#ifdef IPFILTER_LOG - nat_log(nat, (u_int)np->in_redir); -#endif - return nat; -badnat: - nat_stats.ns_badnat++; - if ((hm = nat->nat_hm) != NULL) - nat_hostmapdel(hm); - KFREE(nat); - return NULL; + /* + * nat_insert failed, so cleanup time... + */ + return -1; } -/* - * Insert a NAT entry into the hash tables for searching and add it to the - * list of active NAT entries. Adjust global counters when complete. - */ -void nat_insert(nat) +/* ------------------------------------------------------------------------ */ +/* Function: nat_insert */ +/* Returns: int - 0 == sucess, -1 == failure */ +/* Parameters: nat(I) - pointer to NAT structure */ +/* rev(I) - flag indicating forward/reverse direction of packet */ +/* Write Lock: ipf_nat */ +/* */ +/* Insert a NAT entry into the hash tables for searching and add it to the */ +/* list of active NAT entries. Adjust global counters when complete. */ +/* ------------------------------------------------------------------------ */ +int nat_insert(nat, rev) nat_t *nat; +int rev; { u_int hv1, hv2; nat_t **natp; - MUTEX_INIT(&nat->nat_lock, "nat entry lock", NULL); - - nat->nat_age = fr_defnatage; - nat->nat_ifname[sizeof(nat->nat_ifname) - 1] = '\0'; - if (nat->nat_ifname[0] !='\0') { - nat->nat_ifp = GETUNIT(nat->nat_ifname, 4); - } - - nat->nat_next = nat_instances; - nat_instances = nat; - - if (!(nat->nat_flags & (FI_W_SPORT|FI_W_DPORT))) { + /* + * Try and return an error as early as possible, so calculate the hash + * entry numbers first and then proceed. + */ + if ((nat->nat_flags & (SI_W_SPORT|SI_W_DPORT)) == 0) { hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport, 0xffffffff); hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1 + nat->nat_oport, @@ -1554,20 +2418,57 @@ nat_t *nat; hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport, 0xffffffff); hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2 + nat->nat_oport, - ipf_nattable_sz); - } else { - hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, nat->nat_inip.s_addr, - ipf_nattable_sz); - hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, nat->nat_outip.s_addr, ipf_nattable_sz); + } else { + hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, 0, 0xffffffff); + hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1, ipf_nattable_sz); + hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, 0, 0xffffffff); + hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2, ipf_nattable_sz); } + if (nat_stats.ns_bucketlen[0][hv1] >= fr_nat_maxbucket || + nat_stats.ns_bucketlen[1][hv2] >= fr_nat_maxbucket) { + return -1; + } + + nat->nat_hv[0] = hv1; + nat->nat_hv[1] = hv2; + + MUTEX_INIT(&nat->nat_lock, "nat entry lock"); + + nat->nat_rev = rev; + nat->nat_ref = 1; + nat->nat_bytes[0] = 0; + nat->nat_pkts[0] = 0; + nat->nat_bytes[1] = 0; + nat->nat_pkts[1] = 0; + + nat->nat_ifnames[0][LIFNAMSIZ - 1] = '\0'; + nat->nat_ifps[0] = fr_resolvenic(nat->nat_ifnames[0], 4); + + if (nat->nat_ifnames[1][0] !='\0') { + nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0'; + nat->nat_ifps[1] = fr_resolvenic(nat->nat_ifnames[1], 4); + } else { + (void) strncpy(nat->nat_ifnames[1], nat->nat_ifnames[0], + LIFNAMSIZ); + nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0'; + nat->nat_ifps[1] = nat->nat_ifps[0]; + } + + nat->nat_next = nat_instances; + nat->nat_pnext = &nat_instances; + if (nat_instances) + nat_instances->nat_pnext = &nat->nat_next; + nat_instances = nat; + natp = &nat_table[0][hv1]; if (*natp) (*natp)->nat_phnext[0] = &nat->nat_hnext[0]; nat->nat_phnext[0] = natp; nat->nat_hnext[0] = *natp; *natp = nat; + nat_stats.ns_bucketlen[0][hv1]++; natp = &nat_table[1][hv2]; if (*natp) @@ -1575,44 +2476,56 @@ nat_t *nat; nat->nat_phnext[1] = natp; nat->nat_hnext[1] = *natp; *natp = nat; + nat_stats.ns_bucketlen[1][hv2]++; + + fr_setnatqueue(nat, rev); nat_stats.ns_added++; nat_stats.ns_inuse++; + return 0; } -nat_t *nat_icmplookup(ip, fin, dir) -ip_t *ip; +/* ------------------------------------------------------------------------ */ +/* Function: nat_icmperrorlookup */ +/* Returns: nat_t* - point to matching NAT structure */ +/* Parameters: fin(I) - pointer to packet information */ +/* dir(I) - direction of packet (in/out) */ +/* */ +/* Check if the ICMP error message is related to an existing TCP, UDP or */ +/* ICMP query nat entry. It is assumed that the packet is already of the */ +/* the required length. */ +/* ------------------------------------------------------------------------ */ +nat_t *nat_icmperrorlookup(fin, dir) fr_info_t *fin; int dir; { - icmphdr_t *icmp; + int flags = 0, type, minlen; + icmphdr_t *icmp, *orgicmp; tcphdr_t *tcp = NULL; + u_short data[2]; + nat_t *nat; ip_t *oip; - int flags = 0, type, minlen; + u_int p; - icmp = (icmphdr_t *)fin->fin_dp; + icmp = fin->fin_dp; + type = icmp->icmp_type; /* * Does it at least have the return (basic) IP header ? * Only a basic IP header (no options) should be with an ICMP error - * header. + * header. Also, if it's not an error type, then return. */ - if ((ip->ip_hl != 5) || (ip->ip_len < ICMPERR_MINPKTLEN)) + if ((fin->fin_hlen != sizeof(ip_t)) || + !fr_icmp4errortype(type)) return NULL; - type = icmp->icmp_type; + /* - * If it's not an error type, then return. + * Check packet size */ - if ((type != ICMP_UNREACH) && (type != ICMP_SOURCEQUENCH) && - (type != ICMP_REDIRECT) && (type != ICMP_TIMXCEED) && - (type != ICMP_PARAMPROB)) - return NULL; - oip = (ip_t *)((char *)fin->fin_dp + 8); - minlen = (oip->ip_hl << 2); - if (minlen < sizeof(ip_t)) - return NULL; - if (ip->ip_len < ICMPERR_IPICMPHLEN + minlen) + minlen = IP_HL(oip) << 2; + if ((minlen < sizeof(ip_t)) || + (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen)) return NULL; /* * Is the buffer big enough for all of it ? It's the size of the IP @@ -1627,105 +2540,146 @@ int dir; { mb_t *m; -# if SOLARIS - m = fin->fin_qfm; + m = fin->fin_m; +# if defined(MENTAT) if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN > (char *)m->b_wptr) return NULL; # else - m = *(mb_t **)fin->fin_mp; if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN > - (char *)ip + m->m_len) + (char *)fin->fin_ip + M_LEN(m)) return NULL; # endif } #endif - if (oip->ip_p == IPPROTO_TCP) + if (fin->fin_daddr != oip->ip_src.s_addr) + return NULL; + + p = oip->ip_p; + if (p == IPPROTO_TCP) flags = IPN_TCP; - else if (oip->ip_p == IPPROTO_UDP) + else if (p == IPPROTO_UDP) flags = IPN_UDP; - if (flags & IPN_TCPUDP) { - u_short data[2]; - nat_t *nat; + else if (p == IPPROTO_ICMP) { + orgicmp = (icmphdr_t *)((char *)oip + (IP_HL(oip) << 2)); + + /* see if this is related to an ICMP query */ + if (nat_icmpquerytype4(orgicmp->icmp_type)) { + data[0] = fin->fin_data[0]; + data[1] = fin->fin_data[1]; + fin->fin_data[0] = 0; + fin->fin_data[1] = orgicmp->icmp_id; + flags = IPN_ICMPERR|IPN_ICMPQUERY; + /* + * NOTE : dir refers to the direction of the original + * ip packet. By definition the icmp error + * message flows in the opposite direction. + */ + if (dir == NAT_INBOUND) + nat = nat_inlookup(fin, flags, p, oip->ip_dst, + oip->ip_src); + else + nat = nat_outlookup(fin, flags, p, oip->ip_dst, + oip->ip_src); + fin->fin_data[0] = data[0]; + fin->fin_data[1] = data[1]; + return nat; + } + } + + if (flags & IPN_TCPUDP) { minlen += 8; /* + 64bits of data to get ports */ - if (ip->ip_len < ICMPERR_IPICMPHLEN + minlen) + if (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen) return NULL; data[0] = fin->fin_data[0]; data[1] = fin->fin_data[1]; - tcp = (tcphdr_t *)((char *)oip + (oip->ip_hl << 2)); + tcp = (tcphdr_t *)((char *)oip + (IP_HL(oip) << 2)); fin->fin_data[0] = ntohs(tcp->th_dport); fin->fin_data[1] = ntohs(tcp->th_sport); if (dir == NAT_INBOUND) { - nat = nat_inlookup(fin, flags, (u_int)oip->ip_p, - oip->ip_dst, oip->ip_src, 0); + nat = nat_inlookup(fin, flags, p, oip->ip_dst, + oip->ip_src); } else { - nat = nat_outlookup(fin, flags, (u_int)oip->ip_p, - oip->ip_dst, oip->ip_src, 0); + nat = nat_outlookup(fin, flags, p, oip->ip_dst, + oip->ip_src); } fin->fin_data[0] = data[0]; fin->fin_data[1] = data[1]; return nat; } if (dir == NAT_INBOUND) - return nat_inlookup(fin, 0, (u_int)oip->ip_p, - oip->ip_dst, oip->ip_src, 0); + return nat_inlookup(fin, 0, p, oip->ip_dst, oip->ip_src); else - return nat_outlookup(fin, 0, (u_int)oip->ip_p, - oip->ip_dst, oip->ip_src, 0); + return nat_outlookup(fin, 0, p, oip->ip_dst, oip->ip_src); } -/* - * This should *ONLY* be used for incoming packets to make sure a NAT'd ICMP - * packet gets correctly recognised. - */ -nat_t *nat_icmp(ip, fin, nflags, dir) -ip_t *ip; +/* ------------------------------------------------------------------------ */ +/* Function: nat_icmperror */ +/* Returns: nat_t* - point to matching NAT structure */ +/* Parameters: fin(I) - pointer to packet information */ +/* nflags(I) - NAT flags for this packet */ +/* dir(I) - direction of packet (in/out) */ +/* */ +/* Fix up an ICMP packet which is an error message for an existing NAT */ +/* session. This will correct both packet header data and checksums. */ +/* */ +/* This should *ONLY* be used for incoming ICMP error packets to make sure */ +/* a NAT'd ICMP packet gets correctly recognised. */ +/* ------------------------------------------------------------------------ */ +nat_t *nat_icmperror(fin, nflags, dir) fr_info_t *fin; u_int *nflags; int dir; { - u_32_t sum1, sum2, sumd, sumd2 = 0; + u_32_t sum1, sum2, sumd, sumd2; struct in_addr in; - int flags, dlen; icmphdr_t *icmp; - udphdr_t *udp; + int flags, dlen; + u_short *csump; tcphdr_t *tcp; nat_t *nat; ip_t *oip; + void *dp; - if ((fin->fin_fl & FI_SHORT) || (fin->fin_off != 0)) + if ((fin->fin_flx & (FI_SHORT|FI_FRAGBODY))) return NULL; /* - * nat_icmplookup() will return NULL for `defective' packets. + * nat_icmperrorlookup() will return NULL for `defective' packets. */ - if ((ip->ip_v != 4) || !(nat = nat_icmplookup(ip, fin, dir))) + if ((fin->fin_v != 4) || !(nat = nat_icmperrorlookup(fin, dir))) return NULL; + tcp = NULL; + csump = NULL; flags = 0; sumd2 = 0; *nflags = IPN_ICMPERR; - icmp = (icmphdr_t *)fin->fin_dp; + icmp = fin->fin_dp; oip = (ip_t *)&icmp->icmp_ip; - if (oip->ip_p == IPPROTO_TCP) + dp = (((char *)oip) + (IP_HL(oip) << 2)); + if (oip->ip_p == IPPROTO_TCP) { + tcp = (tcphdr_t *)dp; + csump = (u_short *)&tcp->th_sum; flags = IPN_TCP; - else if (oip->ip_p == IPPROTO_UDP) + } else if (oip->ip_p == IPPROTO_UDP) { + udphdr_t *udp; + + udp = (udphdr_t *)dp; + tcp = (tcphdr_t *)dp; + csump = (u_short *)&udp->uh_sum; flags = IPN_UDP; - udp = (udphdr_t *)((((char *)oip) + (oip->ip_hl << 2))); - dlen = ip->ip_len - ((char *)udp - (char *)ip); - /* - * XXX - what if this is bogus hl and we go off the end ? - * In this case, nat_icmplookup() will have returned NULL. - */ - tcp = (tcphdr_t *)udp; + } else if (oip->ip_p == IPPROTO_ICMP) + flags = IPN_ICMPQUERY; + dlen = fin->fin_plen - ((char *)dp - (char *)fin->fin_ip); /* * Need to adjust ICMP header to include the real IP#'s and * port #'s. Only apply a checksum change relative to the - * IP address change as it will be modified again in ip_natout + * IP address change as it will be modified again in fr_checknatout * for both address and port. Two checksum changes are * necessary for the two header address changes. Be careful * to only modify the checksum once for the port # and twice @@ -1743,7 +2697,6 @@ int dir; * checksum. So, we must compensate that as well. Even worse, the * change in the UDP and TCP checksums require yet another * adjustment of the ICMP checksum of the ICMP error message. - * */ if (oip->ip_dst.s_addr == nat->nat_oip.s_addr) { @@ -1764,14 +2717,14 @@ int dir; * Fix IP checksum of the offending IP packet to adjust for * the change in the IP address. * - * Normally, you would expect that the ICMP checksum of the + * Normally, you would expect that the ICMP checksum of the * ICMP error message needs to be adjusted as well for the * IP address change in oip. - * However, this is a NOP, because the ICMP checksum is + * However, this is a NOP, because the ICMP checksum is * calculated over the complete ICMP packet, which includes the - * changed oip IP addresses and oip->ip_sum. However, these + * changed oip IP addresses and oip->ip_sum. However, these * two changes cancel each other out (if the delta for - * the IP address is x, then the delta for ip_sum is minus x), + * the IP address is x, then the delta for ip_sum is minus x), * so no change in the icmp_cksum is necessary. * * Be careful that nat_dir refers to the direction of the @@ -1779,22 +2732,23 @@ int dir; */ fix_datacksum(&oip->ip_sum, sumd); /* Fix icmp cksum : IP Addr + Cksum */ + sumd2 = (sumd >> 16); /* * Fix UDP pseudo header checksum to compensate for the * IP address change. */ - if ((oip->ip_p == IPPROTO_UDP) && (dlen >= 8) && udp->uh_sum) { + if ((oip->ip_p == IPPROTO_UDP) && (dlen >= 8) && (*csump != 0)) { /* - * The UDP checksum is optional, only adjust it + * The UDP checksum is optional, only adjust it * if it has been set. */ - sum1 = ntohs(udp->uh_sum); - fix_datacksum(&udp->uh_sum, sumd); - sum2 = ntohs(udp->uh_sum); + sum1 = ntohs(*csump); + fix_datacksum(csump, sumd); + sum2 = ntohs(*csump); /* - * Fix ICMP checksum to compensate the UDP + * Fix ICMP checksum to compensate the UDP * checksum adjustment. */ sumd2 = sumd << 1; @@ -1803,25 +2757,25 @@ int dir; } /* - * Fix TCP pseudo header checksum to compensate for the + * Fix TCP pseudo header checksum to compensate for the * IP address change. Before we can do the change, we * must make sure that oip is sufficient large to hold * the TCP checksum (normally it does not!). + * 18 = offsetof(tcphdr_t, th_sum) + 2 */ - else if ((oip->ip_p == IPPROTO_TCP) && (dlen >= 18)) { - sum1 = ntohs(tcp->th_sum); - fix_datacksum(&tcp->th_sum, sumd); - sum2 = ntohs(tcp->th_sum); + else if (oip->ip_p == IPPROTO_TCP && dlen >= 18) { + sum1 = ntohs(*csump); + fix_datacksum(csump, sumd); + sum2 = ntohs(*csump); /* - * Fix ICMP checksum to compensate the TCP + * Fix ICMP checksum to compensate the TCP * checksum adjustment. */ sumd2 = sumd << 1; CALC_SUMD(sum1, sum2, sumd); sumd2 += sumd; } else { - sumd2 = (sumd >> 16); if (nat->nat_dir == NAT_OUTBOUND) sumd2 = ~sumd2; else @@ -1829,6 +2783,8 @@ int dir; } if (((flags & IPN_TCPUDP) != 0) && (dlen >= 4)) { + int mode = 0; + /* * Step 2 : * For offending TCP/UDP IP packets, translate the ports as @@ -1848,166 +2804,215 @@ int dir; * include the TCP checksum. So we have to check if the * ip->ip_len actually holds the TCP checksum of the oip! */ - if (nat->nat_oport == tcp->th_dport) { + + if (nat->nat_oport == tcp->th_dport) { if (tcp->th_sport != nat->nat_inport) { - /* - * Fix ICMP checksum to compensate port - * adjustment. - */ + mode = 1; sum1 = ntohs(nat->nat_inport); sum2 = ntohs(tcp->th_sport); - tcp->th_sport = nat->nat_inport; + } + } else if (tcp->th_sport == nat->nat_oport) { + mode = 2; + sum1 = ntohs(nat->nat_outport); + sum2 = ntohs(tcp->th_dport); + } + + if (mode == 1) { + /* + * Fix ICMP checksum to compensate port adjustment. + */ + tcp->th_sport = htons(sum1); + + /* + * Fix udp checksum to compensate port adjustment. + * NOTE : the offending IP packet flows the other + * direction compared to the ICMP message. + * + * The UDP checksum is optional, only adjust it if + * it has been set. + */ + if ((oip->ip_p == IPPROTO_UDP) && + (dlen >= 8) && (*csump != 0)) { + sumd = sum1 - sum2; + sumd2 += sumd; + + sum1 = ntohs(*csump); + fix_datacksum(csump, sumd); + sum2 = ntohs(*csump); /* - * Fix udp checksum to compensate port - * adjustment. NOTE : the offending IP packet - * flows the other direction compared to the - * ICMP message. - * - * The UDP checksum is optional, only adjust - * it if it has been set. + * Fix ICMP checksum to compenstate + * UDP checksum adjustment. */ - if ((oip->ip_p == IPPROTO_UDP) && - (dlen >= 8) && udp->uh_sum) { + CALC_SUMD(sum1, sum2, sumd); + sumd2 += sumd; + } + + /* + * Fix TCP checksum (if present) to compensate port + * adjustment. NOTE : the offending IP packet flows + * the other direction compared to the ICMP message. + */ + if (oip->ip_p == IPPROTO_TCP) { + if (dlen >= 18) { sumd = sum1 - sum2; sumd2 += sumd; - sum1 = ntohs(udp->uh_sum); - fix_datacksum(&udp->uh_sum, sumd); - sum2 = ntohs(udp->uh_sum); + sum1 = ntohs(*csump); + fix_datacksum(csump, sumd); + sum2 = ntohs(*csump); /* * Fix ICMP checksum to compensate - * UDP checksum adjustment. + * TCP checksum adjustment. */ CALC_SUMD(sum1, sum2, sumd); sumd2 += sumd; - } - - /* - * Fix tcp checksum (if present) to compensate - * port adjustment. NOTE : the offending IP - * packet flows the other direction compared to - * the ICMP message. - */ - if (oip->ip_p == IPPROTO_TCP) { - if (dlen >= 18) { - sumd = sum1 - sum2; - sumd2 += sumd; - - sum1 = ntohs(tcp->th_sum); - fix_datacksum(&tcp->th_sum, - sumd); - sum2 = ntohs(tcp->th_sum); - - /* - * Fix ICMP checksum to - * compensate TCP checksum - * adjustment. - */ - CALC_SUMD(sum1, sum2, sumd); - sumd2 += sumd; - } else { - sumd = sum2 - sum1 + 1; - sumd2 += sumd; - } + } else { + sumd = sum2 - sum1 + 1; + sumd2 += sumd; } } - } else if (tcp->th_dport != nat->nat_outport) { + } else if (mode == 2) { /* - * Fix ICMP checksum to compensate port - * adjustment. + * Fix ICMP checksum to compensate port adjustment. */ - sum1 = ntohs(nat->nat_outport); - sum2 = ntohs(tcp->th_dport); - tcp->th_dport = nat->nat_outport; + tcp->th_dport = htons(sum1); /* - * Fix udp checksum to compensate port - * adjustment. NOTE : the offending IP - * packet flows the other direction compared - * to the ICMP message. + * Fix UDP checksum to compensate port adjustment. + * NOTE : the offending IP packet flows the other + * direction compared to the ICMP message. * * The UDP checksum is optional, only adjust * it if it has been set. */ if ((oip->ip_p == IPPROTO_UDP) && - (dlen >= 8) && udp->uh_sum) { + (dlen >= 8) && (*csump != 0)) { sumd = sum1 - sum2; sumd2 += sumd; - sum1 = ntohs(udp->uh_sum); - fix_datacksum(&udp->uh_sum, sumd); - sum2 = ntohs(udp->uh_sum); + sum1 = ntohs(*csump); + fix_datacksum(csump, sumd); + sum2 = ntohs(*csump); /* * Fix ICMP checksum to compensate * UDP checksum adjustment. */ CALC_SUMD(sum1, sum2, sumd); + sumd2 += sumd; } /* - * Fix tcp checksum (if present) to compensate - * port adjustment. NOTE : the offending IP - * packet flows the other direction compared to - * the ICMP message. + * Fix TCP checksum (if present) to compensate port + * adjustment. NOTE : the offending IP packet flows + * the other direction compared to the ICMP message. */ if (oip->ip_p == IPPROTO_TCP) { if (dlen >= 18) { sumd = sum1 - sum2; sumd2 += sumd; - sum1 = ntohs(tcp->th_sum); - fix_datacksum(&tcp->th_sum, sumd); - sum2 = ntohs(tcp->th_sum); + sum1 = ntohs(*csump); + fix_datacksum(csump, sumd); + sum2 = ntohs(*csump); /* * Fix ICMP checksum to compensate - * UDP checksum adjustment. + * TCP checksum adjustment. */ CALC_SUMD(sum1, sum2, sumd); + sumd2 += sumd; } else { - sumd = sum2 - sum1; - if (nat->nat_dir == NAT_OUTBOUND) - sumd++; + if (nat->nat_dir == NAT_INBOUND) + sumd = sum2 - sum1; + else + sumd = sum2 - sum1 + 1; + sumd2 += sumd; } } - sumd2 += sumd; } - if (sumd2) { + if (sumd2 != 0) { sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16); sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16); fix_incksum(fin, &icmp->icmp_cksum, sumd2); } + } else if (((flags & IPN_ICMPQUERY) != 0) && (dlen >= 8)) { + icmphdr_t *orgicmp; + + /* + * XXX - what if this is bogus hl and we go off the end ? + * In this case, nat_icmperrorlookup() will have returned NULL. + */ + orgicmp = (icmphdr_t *)dp; + + if (nat->nat_dir == NAT_OUTBOUND) { + if (orgicmp->icmp_id != nat->nat_inport) { + + /* + * Fix ICMP checksum (of the offening ICMP + * query packet) to compensate the change + * in the ICMP id of the offending ICMP + * packet. + * + * Since you modify orgicmp->icmp_id with + * a delta (say x) and you compensate that + * in origicmp->icmp_cksum with a delta + * minus x, you don't have to adjust the + * overall icmp->icmp_cksum + */ + sum1 = ntohs(orgicmp->icmp_id); + sum2 = ntohs(nat->nat_inport); + CALC_SUMD(sum1, sum2, sumd); + orgicmp->icmp_id = nat->nat_inport; + fix_datacksum(&orgicmp->icmp_cksum, sumd); + } + } /* nat_dir == NAT_INBOUND is impossible for icmp queries */ } - if (oip->ip_p == IPPROTO_ICMP) - nat->nat_age = fr_defnaticmpage; return nat; } /* - * NB: these lookups don't lock access to the list, it assume it has already - * been done! - */ -/* - * Lookup a nat entry based on the mapped destination ip address/port and - * real source address/port. We use this lookup when receiving a packet, - * we're looking for a table entry, based on the destination address. - * NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY. + * NB: these lookups don't lock access to the list, it assumed that it has + * already been done! */ -nat_t *nat_inlookup(fin, flags, p, src, mapdst, rw) + +/* ------------------------------------------------------------------------ */ +/* Function: nat_inlookup */ +/* Returns: nat_t* - NULL == no match, */ +/* else pointer to matching NAT entry */ +/* Parameters: fin(I) - pointer to packet information */ +/* flags(I) - NAT flags for this packet */ +/* p(I) - protocol for this packet */ +/* src(I) - source IP address */ +/* mapdst(I) - destination IP address */ +/* */ +/* Lookup a nat entry based on the mapped destination ip address/port and */ +/* real source address/port. We use this lookup when receiving a packet, */ +/* we're looking for a table entry, based on the destination address. */ +/* */ +/* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY. */ +/* */ +/* NOTE: IT IS ASSUMED THAT ipf_nat IS ONLY HELD WITH A READ LOCK WHEN */ +/* THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags. */ +/* */ +/* flags -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if */ +/* the packet is of said protocol */ +/* ------------------------------------------------------------------------ */ +nat_t *nat_inlookup(fin, flags, p, src, mapdst) fr_info_t *fin; -register u_int flags, p; +u_int flags, p; struct in_addr src , mapdst; -int rw; { - register u_short sport, dport; - register nat_t *nat; - register int nflags; - register u_32_t dst; + u_short sport, dport; + grehdr_t *gre; ipnat_t *ipn; + u_int sflags; + nat_t *nat; + int nflags; + u_32_t dst; void *ifp; u_int hv; @@ -2015,26 +3020,71 @@ int rw; ifp = fin->fin_ifp; else ifp = NULL; + sport = 0; + dport = 0; + gre = NULL; dst = mapdst.s_addr; - if (flags & IPN_TCPUDP) { + sflags = flags & NAT_TCPUDPICMP; + + switch (p) + { + case IPPROTO_TCP : + case IPPROTO_UDP : sport = htons(fin->fin_data[0]); dport = htons(fin->fin_data[1]); - } else { - sport = 0; - dport = 0; + break; + case IPPROTO_ICMP : + if (flags & IPN_ICMPERR) + sport = fin->fin_data[1]; + else + dport = fin->fin_data[1]; + break; + default : + break; } + + if ((flags & SI_WILDP) != 0) + goto find_in_wild_ports; + hv = NAT_HASH_FN(dst, dport, 0xffffffff); hv = NAT_HASH_FN(src.s_addr, hv + sport, ipf_nattable_sz); nat = nat_table[1][hv]; for (; nat; nat = nat->nat_hnext[1]) { nflags = nat->nat_flags; - if ((!ifp || ifp == nat->nat_ifp) && - nat->nat_oip.s_addr == src.s_addr && + + if (ifp != NULL) { + if (nat->nat_dir == NAT_REDIRECT) { + if (ifp != nat->nat_ifps[0]) + continue; + } else { + if (ifp != nat->nat_ifps[1]) + continue; + } + } + + if (nat->nat_oip.s_addr == src.s_addr && nat->nat_outip.s_addr == dst && - ((p == 0) || (p == nat->nat_p))) { + (((p == 0) && + (sflags == (nat->nat_flags & IPN_TCPUDPICMP))) + || (p == nat->nat_p))) { switch (p) { +#if 0 + case IPPROTO_GRE : + if (nat->nat_call[1] != fin->fin_data[0]) + continue; + break; +#endif + case IPPROTO_ICMP : + if ((flags & IPN_ICMPERR) != 0) { + if (nat->nat_outport != sport) + continue; + } else { + if (nat->nat_outport != dport) + continue; + } + break; case IPPROTO_TCP : case IPPROTO_UDP : if (nat->nat_oport != sport) @@ -2053,56 +3103,94 @@ int rw; return nat; } } - if (!nat_stats.ns_wilds || !(flags & FI_WILDP)) + + /* + * So if we didn't find it but there are wildcard members in the hash + * table, go back and look for them. We do this search and update here + * because it is modifying the NAT table and we want to do this only + * for the first packet that matches. The exception, of course, is + * for "dummy" (FI_IGNORE) lookups. + */ +find_in_wild_ports: + if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH)) return NULL; - if (!rw) { - RWLOCK_EXIT(&ipf_nat); - } + if (nat_stats.ns_wilds == 0) + return NULL; + + RWLOCK_EXIT(&ipf_nat); + hv = NAT_HASH_FN(dst, 0, 0xffffffff); - hv = NAT_HASH_FN(src.s_addr, dst, ipf_nattable_sz); - if (!rw) { - WRITE_ENTER(&ipf_nat); - } + hv = NAT_HASH_FN(src.s_addr, hv, ipf_nattable_sz); + + WRITE_ENTER(&ipf_nat); + nat = nat_table[1][hv]; for (; nat; nat = nat->nat_hnext[1]) { - nflags = nat->nat_flags; - if (ifp && ifp != nat->nat_ifp) - continue; - if (!(nflags & FI_WILDP)) + if (ifp != NULL) { + if (nat->nat_dir == NAT_REDIRECT) { + if (ifp != nat->nat_ifps[0]) + continue; + } else { + if (ifp != nat->nat_ifps[1]) + continue; + } + } + + if (nat->nat_p != fin->fin_p) continue; if (nat->nat_oip.s_addr != src.s_addr || nat->nat_outip.s_addr != dst) continue; - if (((nat->nat_oport == sport) || (nflags & FI_W_DPORT)) && - ((nat->nat_outport == dport) || (nflags & FI_W_SPORT))) { - nat_tabmove(fin, nat); + + nflags = nat->nat_flags; + if (!(nflags & (NAT_TCPUDP|SI_WILDP))) + continue; + + if (nat_wildok(nat, (int)sport, (int)dport, nflags, + NAT_INBOUND) == 1) { + if ((fin->fin_flx & FI_IGNORE) != 0) + break; + if ((nflags & SI_CLONE) != 0) { + nat = fr_natclone(fin, nat); + if (nat == NULL) + break; + } else { + MUTEX_ENTER(&ipf_nat_new); + nat_stats.ns_wilds--; + MUTEX_EXIT(&ipf_nat_new); + } + nat->nat_oport = sport; + nat->nat_outport = dport; + nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT); + nat_tabmove(nat); break; } } - if (!rw) { - MUTEX_DOWNGRADE(&ipf_nat); - } + + MUTEX_DOWNGRADE(&ipf_nat); + return nat; } -/* - * This function is only called for TCP/UDP NAT table entries where the - * original was placed in the table without hashing on the ports and we now - * want to include hashing on port numbers. - */ -static void nat_tabmove(fin, nat) -fr_info_t *fin; +/* ------------------------------------------------------------------------ */ +/* Function: nat_tabmove */ +/* Returns: Nil */ +/* Parameters: nat(I) - pointer to NAT structure */ +/* Write Lock: ipf_nat */ +/* */ +/* This function is only called for TCP/UDP NAT table entries where the */ +/* original was placed in the table without hashing on the ports and we now */ +/* want to include hashing on port numbers. */ +/* ------------------------------------------------------------------------ */ +static void nat_tabmove(nat) nat_t *nat; { - register u_short sport, dport; - u_int hv, nflags; nat_t **natp; + u_int hv; - nflags = nat->nat_flags; - - sport = ntohs(fin->fin_data[0]); - dport = ntohs(fin->fin_data[1]); + if (nat->nat_flags & SI_CLONE) + return; /* * Remove the NAT entry from the old location @@ -2110,76 +3198,133 @@ nat_t *nat; if (nat->nat_hnext[0]) nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0]; *nat->nat_phnext[0] = nat->nat_hnext[0]; + nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--; if (nat->nat_hnext[1]) nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1]; *nat->nat_phnext[1] = nat->nat_hnext[1]; + nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--; /* * Add into the NAT table in the new position */ - hv = NAT_HASH_FN(nat->nat_inip.s_addr, sport, 0xffffffff); - hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + dport, ipf_nattable_sz); + hv = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport, 0xffffffff); + hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + nat->nat_oport, + ipf_nattable_sz); + nat->nat_hv[0] = hv; natp = &nat_table[0][hv]; if (*natp) (*natp)->nat_phnext[0] = &nat->nat_hnext[0]; nat->nat_phnext[0] = natp; nat->nat_hnext[0] = *natp; *natp = nat; + nat_stats.ns_bucketlen[0][hv]++; - hv = NAT_HASH_FN(nat->nat_outip.s_addr, sport, 0xffffffff); - hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + dport, ipf_nattable_sz); + hv = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport, 0xffffffff); + hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + nat->nat_oport, + ipf_nattable_sz); + nat->nat_hv[1] = hv; natp = &nat_table[1][hv]; if (*natp) (*natp)->nat_phnext[1] = &nat->nat_hnext[1]; nat->nat_phnext[1] = natp; nat->nat_hnext[1] = *natp; *natp = nat; + nat_stats.ns_bucketlen[1][hv]++; } -/* - * Lookup a nat entry based on the source 'real' ip address/port and - * destination address/port. We use this lookup when sending a packet out, - * we're looking for a table entry, based on the source address. - * NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY. - */ -nat_t *nat_outlookup(fin, flags, p, src, dst, rw) +/* ------------------------------------------------------------------------ */ +/* Function: nat_outlookup */ +/* Returns: nat_t* - NULL == no match, */ +/* else pointer to matching NAT entry */ +/* Parameters: fin(I) - pointer to packet information */ +/* flags(I) - NAT flags for this packet */ +/* p(I) - protocol for this packet */ +/* src(I) - source IP address */ +/* dst(I) - destination IP address */ +/* rw(I) - 1 == write lock on ipf_nat held, 0 == read lock. */ +/* */ +/* Lookup a nat entry based on the source 'real' ip address/port and */ +/* destination address/port. We use this lookup when sending a packet out, */ +/* we're looking for a table entry, based on the source address. */ +/* */ +/* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY. */ +/* */ +/* NOTE: IT IS ASSUMED THAT ipf_nat IS ONLY HELD WITH A READ LOCK WHEN */ +/* THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags. */ +/* */ +/* flags -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if */ +/* the packet is of said protocol */ +/* ------------------------------------------------------------------------ */ +nat_t *nat_outlookup(fin, flags, p, src, dst) fr_info_t *fin; -register u_int flags, p; +u_int flags, p; struct in_addr src , dst; -int rw; { - register u_short sport, dport; - register nat_t *nat; - register int nflags; + u_short sport, dport; + u_int sflags; ipnat_t *ipn; u_32_t srcip; + nat_t *nat; + int nflags; void *ifp; u_int hv; ifp = fin->fin_ifp; srcip = src.s_addr; - if (flags & IPN_TCPUDP) { - sport = ntohs(fin->fin_data[0]); - dport = ntohs(fin->fin_data[1]); - } else { - sport = 0; - dport = 0; + sflags = flags & IPN_TCPUDPICMP; + sport = 0; + dport = 0; + + switch (p) + { + case IPPROTO_TCP : + case IPPROTO_UDP : + sport = htons(fin->fin_data[0]); + dport = htons(fin->fin_data[1]); + break; + case IPPROTO_ICMP : + if (flags & IPN_ICMPERR) + sport = fin->fin_data[1]; + else + dport = fin->fin_data[1]; + break; + default : + break; } + if ((flags & SI_WILDP) != 0) + goto find_out_wild_ports; + hv = NAT_HASH_FN(srcip, sport, 0xffffffff); hv = NAT_HASH_FN(dst.s_addr, hv + dport, ipf_nattable_sz); nat = nat_table[0][hv]; for (; nat; nat = nat->nat_hnext[0]) { nflags = nat->nat_flags; - if ((!ifp || ifp == nat->nat_ifp) && - nat->nat_inip.s_addr == srcip && + if (ifp != NULL) { + if (nat->nat_dir == NAT_REDIRECT) { + if (ifp != nat->nat_ifps[1]) + continue; + } else { + if (ifp != nat->nat_ifps[0]) + continue; + } + } + + if (nat->nat_inip.s_addr == srcip && nat->nat_oip.s_addr == dst.s_addr && - ((p == 0) || (p == nat->nat_p))) { + (((p == 0) && (sflags == (nflags & NAT_TCPUDPICMP))) + || (p == nat->nat_p))) { switch (p) { +#if 0 + case IPPROTO_GRE : + if (nat->nat_call[1] != fin->fin_data[0]) + continue; + break; +#endif case IPPROTO_TCP : case IPPROTO_UDP : if (nat->nat_oport != dport) @@ -2198,77 +3343,170 @@ int rw; return nat; } } - if (!nat_stats.ns_wilds || !(flags & FI_WILDP)) + + /* + * So if we didn't find it but there are wildcard members in the hash + * table, go back and look for them. We do this search and update here + * because it is modifying the NAT table and we want to do this only + * for the first packet that matches. The exception, of course, is + * for "dummy" (FI_IGNORE) lookups. + */ +find_out_wild_ports: + if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH)) + return NULL; + if (nat_stats.ns_wilds == 0) return NULL; - if (!rw) { - RWLOCK_EXIT(&ipf_nat); - } - hv = NAT_HASH_FN(dst.s_addr, srcip, ipf_nattable_sz); - if (!rw) { - WRITE_ENTER(&ipf_nat); - } + RWLOCK_EXIT(&ipf_nat); + + hv = NAT_HASH_FN(srcip, 0, 0xffffffff); + hv = NAT_HASH_FN(dst.s_addr, hv, ipf_nattable_sz); + + WRITE_ENTER(&ipf_nat); + nat = nat_table[0][hv]; for (; nat; nat = nat->nat_hnext[0]) { - nflags = nat->nat_flags; - if (ifp && ifp != nat->nat_ifp) - continue; - if (!(nflags & FI_WILDP)) + if (ifp != NULL) { + if (nat->nat_dir == NAT_REDIRECT) { + if (ifp != nat->nat_ifps[1]) + continue; + } else { + if (ifp != nat->nat_ifps[0]) + continue; + } + } + + if (nat->nat_p != fin->fin_p) continue; if ((nat->nat_inip.s_addr != srcip) || (nat->nat_oip.s_addr != dst.s_addr)) continue; - if (((nat->nat_inport == sport) || (nflags & FI_W_SPORT)) && - ((nat->nat_oport == dport) || (nflags & FI_W_DPORT))) { - nat_tabmove(fin, nat); + + nflags = nat->nat_flags; + if (!(nflags & (NAT_TCPUDP|SI_WILDP))) + continue; + + if (nat_wildok(nat, (int)sport, (int)dport, nflags, + NAT_OUTBOUND) == 1) { + if ((fin->fin_flx & FI_IGNORE) != 0) + break; + if ((nflags & SI_CLONE) != 0) { + nat = fr_natclone(fin, nat); + if (nat == NULL) + break; + } else { + MUTEX_ENTER(&ipf_nat_new); + nat_stats.ns_wilds--; + MUTEX_EXIT(&ipf_nat_new); + } + nat->nat_inport = sport; + nat->nat_oport = dport; + if (nat->nat_outport == 0) + nat->nat_outport = sport; + nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT); + nat_tabmove(nat); break; } } - if (!rw) { - MUTEX_DOWNGRADE(&ipf_nat); - } + + MUTEX_DOWNGRADE(&ipf_nat); + return nat; } -/* - * Lookup the NAT tables to search for a matching redirect - */ +/* ------------------------------------------------------------------------ */ +/* Function: nat_lookupredir */ +/* Returns: nat_t* - NULL == no match, */ +/* else pointer to matching NAT entry */ +/* Parameters: np(I) - pointer to description of packet to find NAT table */ +/* entry for. */ +/* */ +/* Lookup the NAT tables to search for a matching redirect */ +/* ------------------------------------------------------------------------ */ nat_t *nat_lookupredir(np) -register natlookup_t *np; +natlookup_t *np; { - nat_t *nat; fr_info_t fi; + nat_t *nat; bzero((char *)&fi, sizeof(fi)); - fi.fin_data[0] = ntohs(np->nl_inport); - fi.fin_data[1] = ntohs(np->nl_outport); + if (np->nl_flags & IPN_IN) { + fi.fin_data[0] = ntohs(np->nl_realport); + fi.fin_data[1] = ntohs(np->nl_outport); + } else { + fi.fin_data[0] = ntohs(np->nl_inport); + fi.fin_data[1] = ntohs(np->nl_outport); + } + if (np->nl_flags & IPN_TCP) + fi.fin_p = IPPROTO_TCP; + else if (np->nl_flags & IPN_UDP) + fi.fin_p = IPPROTO_UDP; + else if (np->nl_flags & (IPN_ICMPERR|IPN_ICMPQUERY)) + fi.fin_p = IPPROTO_ICMP; /* - * If nl_inip is non null, this is a lookup based on the real - * ip address. Else, we use the fake. + * We can do two sorts of lookups: + * - IPN_IN: we have the `real' and `out' address, look for `in'. + * - default: we have the `in' and `out' address, look for `real'. */ - if ((nat = nat_outlookup(&fi, np->nl_flags, 0, np->nl_inip, - np->nl_outip, 0))) { - np->nl_realip = nat->nat_outip; - np->nl_realport = nat->nat_outport; - } + if (np->nl_flags & IPN_IN) { + if ((nat = nat_inlookup(&fi, np->nl_flags, fi.fin_p, + np->nl_realip, np->nl_outip))) { + np->nl_inip = nat->nat_inip; + np->nl_inport = nat->nat_inport; + } + } else { + /* + * If nl_inip is non null, this is a lookup based on the real + * ip address. Else, we use the fake. + */ + if ((nat = nat_outlookup(&fi, np->nl_flags, fi.fin_p, + np->nl_inip, np->nl_outip))) { + + if ((np->nl_flags & IPN_FINDFORWARD) != 0) { + fr_info_t fin; + bzero((char *)&fin, sizeof(fin)); + fin.fin_p = nat->nat_p; + fin.fin_data[0] = ntohs(nat->nat_outport); + fin.fin_data[1] = ntohs(nat->nat_oport); + if (nat_inlookup(&fin, np->nl_flags, fin.fin_p, + nat->nat_outip, + nat->nat_oip) != NULL) { + np->nl_flags &= ~IPN_FINDFORWARD; + } + } + + np->nl_realip = nat->nat_outip; + np->nl_realport = nat->nat_outport; + } + } + return nat; } -static int nat_match(fin, np, ip) +/* ------------------------------------------------------------------------ */ +/* Function: nat_match */ +/* Returns: int - 0 == no match, 1 == match */ +/* Parameters: fin(I) - pointer to packet information */ +/* np(I) - pointer to NAT rule */ +/* */ +/* Pull the matching of a packet against a NAT rule out of that complex */ +/* loop inside fr_checknatin() and lay it out properly in its own function. */ +/* ------------------------------------------------------------------------ */ +static int nat_match(fin, np) fr_info_t *fin; ipnat_t *np; -ip_t *ip; { frtuc_t *ft; - if (ip->ip_v != 4) + if (fin->fin_v != 4) return 0; if (np->in_p && fin->fin_p != np->in_p) return 0; + if (fin->fin_out) { if (!(np->in_redir & (NAT_MAP|NAT_MAPBLK))) return 0; @@ -2290,319 +3528,439 @@ ip_t *ip; } ft = &np->in_tuc; - if (!(fin->fin_fl & FI_TCPUDP) || - (fin->fin_fl & FI_SHORT) || (fin->fin_off != 0)) { + if (!(fin->fin_flx & FI_TCPUDP) || + (fin->fin_flx & (FI_SHORT|FI_FRAGBODY))) { if (ft->ftu_scmp || ft->ftu_dcmp) return 0; return 1; } - return fr_tcpudpchk(ft, fin); + return fr_tcpudpchk(fin, ft); } -/* - * Packets going out on the external interface go through this. - * Here, the source address requires alteration, if anything. - */ -int ip_natout(ip, fin) -ip_t *ip; +/* ------------------------------------------------------------------------ */ +/* Function: nat_update */ +/* Returns: Nil */ +/* Parameters: nat(I) - pointer to NAT structure */ +/* np(I) - pointer to NAT rule */ +/* */ +/* Updates the lifetime of a NAT table entry for non-TCP packets. Must be */ +/* called with fin_rev updated - i.e. after calling nat_proto(). */ +/* ------------------------------------------------------------------------ */ +void nat_update(fin, nat, np) +fr_info_t *fin; +nat_t *nat; +ipnat_t *np; +{ + ipftq_t *ifq, *ifq2; + ipftqent_t *tqe; + + MUTEX_ENTER(&nat->nat_lock); + tqe = &nat->nat_tqe; + ifq = tqe->tqe_ifq; + + /* + * We allow over-riding of NAT timeouts from NAT rules, even for + * TCP, however, if it is TCP and there is no rule timeout set, + * then do not update the timeout here. + */ + if (np != NULL) + ifq2 = np->in_tqehead[fin->fin_rev]; + else + ifq2 = NULL; + + if (nat->nat_p == IPPROTO_TCP && ifq2 == NULL) { + (void) fr_tcp_age(&nat->nat_tqe, fin, nat_tqb, 0); + } else { + if (ifq2 == NULL) { + if (nat->nat_p == IPPROTO_UDP) + ifq2 = &nat_udptq; + else if (nat->nat_p == IPPROTO_ICMP) + ifq2 = &nat_icmptq; + else + ifq2 = &nat_iptq; + } + + fr_movequeue(tqe, ifq, ifq2); + } + MUTEX_EXIT(&nat->nat_lock); +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_checknatout */ +/* Returns: int - -1 == packet failed NAT checks so block it, */ +/* 0 == no packet translation occurred, */ +/* 1 == packet was successfully translated. */ +/* Parameters: fin(I) - pointer to packet information */ +/* passp(I) - pointer to filtering result flags */ +/* */ +/* Check to see if an outcoming packet should be changed. ICMP packets are */ +/* first checked to see if they match an existing entry (if an error), */ +/* otherwise a search of the current NAT table is made. If neither results */ +/* in a match then a search for a matching NAT rule is made. Create a new */ +/* NAT entry if a we matched a NAT rule. Lastly, actually change the */ +/* packet header(s) as required. */ +/* ------------------------------------------------------------------------ */ +int fr_checknatout(fin, passp) fr_info_t *fin; +u_32_t *passp; { - register ipnat_t *np = NULL; - register u_32_t ipa; + struct ifnet *ifp, *sifp; + icmphdr_t *icmp = NULL; tcphdr_t *tcp = NULL; - u_short sport = 0, dport = 0, *csump = NULL; - int natadd = 1, i, icmpset = 1; - u_int nflags = 0, hv, msk; - struct ifnet *ifp; + int rval, natfailed; + ipnat_t *np = NULL; + u_int nflags = 0; + u_32_t ipa, iph; + int natadd = 1; frentry_t *fr; - void *sifp; - u_32_t iph; nat_t *nat; - if (nat_list == NULL || (fr_nat_lock)) + if (nat_stats.ns_rules == 0 || fr_nat_lock != 0) return 0; - if ((fr = fin->fin_fr) && !(fr->fr_flags & FR_DUP) && - fr->fr_tif.fd_ifp && fr->fr_tif.fd_ifp != (void *)-1) { - sifp = fin->fin_ifp; + natfailed = 0; + fr = fin->fin_fr; + sifp = fin->fin_ifp; + if ((fr != NULL) && !(fr->fr_flags & FR_DUP) && + fr->fr_tif.fd_ifp && fr->fr_tif.fd_ifp != (void *)-1) fin->fin_ifp = fr->fr_tif.fd_ifp; - } else - sifp = fin->fin_ifp; ifp = fin->fin_ifp; - if ((fin->fin_off == 0) && !(fin->fin_fl & FI_SHORT)) { - if (fin->fin_p == IPPROTO_TCP) + if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) { + switch (fin->fin_p) + { + case IPPROTO_TCP : nflags = IPN_TCP; - else if (fin->fin_p == IPPROTO_UDP) + break; + case IPPROTO_UDP : nflags = IPN_UDP; - if ((nflags & IPN_TCPUDP)) { - tcp = (tcphdr_t *)fin->fin_dp; - sport = tcp->th_sport; - dport = tcp->th_dport; + break; + case IPPROTO_ICMP : + icmp = fin->fin_dp; + + /* + * This is an incoming packet, so the destination is + * the icmp_id and the source port equals 0 + */ + if (nat_icmpquerytype4(icmp->icmp_type)) + nflags = IPN_ICMPQUERY; + break; + default : + break; } + + if ((nflags & IPN_TCPUDP)) + tcp = fin->fin_dp; } ipa = fin->fin_saddr; READ_ENTER(&ipf_nat); - if ((fin->fin_p == IPPROTO_ICMP) && - (nat = nat_icmp(ip, fin, &nflags, NAT_OUTBOUND))) - icmpset = 1; - else if ((fin->fin_fl & FI_FRAG) && - (nat = ipfr_nat_knownfrag(ip, fin))) + if ((fin->fin_p == IPPROTO_ICMP) && !(nflags & IPN_ICMPQUERY) && + (nat = nat_icmperror(fin, &nflags, NAT_OUTBOUND))) + /*EMPTY*/; + else if ((fin->fin_flx & FI_FRAG) && (nat = fr_nat_knownfrag(fin))) natadd = 0; - else if ((nat = nat_outlookup(fin, nflags|FI_WILDP|FI_WILDA, - (u_int)fin->fin_p, fin->fin_src, - fin->fin_dst, 0))) { + else if ((nat = nat_outlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p, + fin->fin_src, fin->fin_dst))) { nflags = nat->nat_flags; - if ((nflags & (FI_W_SPORT|FI_W_DPORT)) != 0) { - if ((nflags & FI_W_SPORT) && - (nat->nat_inport != sport)) - nat->nat_inport = sport; - if ((nflags & FI_W_DPORT) && - (nat->nat_oport != dport)) - nat->nat_oport = dport; - - if (nat->nat_outport == 0) - nat->nat_outport = sport; - nat->nat_flags &= ~(FI_W_DPORT|FI_W_SPORT); - nflags = nat->nat_flags; - nat_stats.ns_wilds--; - } } else { - RWLOCK_EXIT(&ipf_nat); - - msk = 0xffffffff; - i = 32; + u_32_t hv, msk, nmsk; - WRITE_ENTER(&ipf_nat); /* * If there is no current entry in the nat table for this IP#, * create one for it (if there is a matching rule). */ + RWLOCK_EXIT(&ipf_nat); + msk = 0xffffffff; + nmsk = nat_masks; + WRITE_ENTER(&ipf_nat); maskloop: iph = ipa & htonl(msk); hv = NAT_HASH_FN(iph, 0, ipf_natrules_sz); for (np = nat_rules[hv]; np; np = np->in_mnext) { - if (np->in_ifp && (np->in_ifp != ifp)) + if ((np->in_ifps[0] && (np->in_ifps[0] != ifp))) + continue; + if (np->in_v != fin->fin_v) continue; - if ((np->in_flags & IPN_RF) && - !(np->in_flags & nflags)) + if (np->in_p && (np->in_p != fin->fin_p)) + continue; + if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags)) continue; if (np->in_flags & IPN_FILTER) { - if (!nat_match(fin, np, ip)) + if (!nat_match(fin, np)) continue; } else if ((ipa & np->in_inmsk) != np->in_inip) continue; - if (*np->in_plabel && !appr_ok(ip, tcp, np)) + + if ((fr != NULL) && + !fr_matchtag(&np->in_tag, &fr->fr_nattag)) continue; - nat = nat_new(fin, ip, np, NULL, - (u_int)nflags, NAT_OUTBOUND); - if (nat != NULL) { + + if (*np->in_plabel != '\0') { + if (((np->in_flags & IPN_FILTER) == 0) && + (np->in_dport != tcp->th_dport)) + continue; + if (appr_ok(fin, tcp, np) == 0) + continue; + } + + if ((nat = nat_new(fin, np, NULL, nflags, + NAT_OUTBOUND))) { np->in_hits++; break; - } + } else + natfailed = -1; } - if ((np == NULL) && (i > 0)) { - do { - i--; + if ((np == NULL) && (nmsk != 0)) { + while (nmsk) { msk <<= 1; - } while ((i >= 0) && ((nat_masks & (1 << i)) == 0)); - if (i >= 0) + if (nmsk & 0x80000000) + break; + nmsk <<= 1; + } + if (nmsk != 0) { + nmsk <<= 1; goto maskloop; + } } MUTEX_DOWNGRADE(&ipf_nat); } - /* - * NOTE: ipf_nat must now only be held as a read lock - */ - if (nat) { - np = nat->nat_ptr; - if (natadd && (fin->fin_fl & FI_FRAG) && np) - ipfr_nat_newfrag(ip, fin, nat); - MUTEX_ENTER(&nat->nat_lock); - if (fin->fin_p != IPPROTO_TCP) { - if (np && np->in_age[1]) - nat->nat_age = np->in_age[1]; - else if (!icmpset && (fin->fin_p == IPPROTO_ICMP)) - nat->nat_age = fr_defnaticmpage; - else - nat->nat_age = fr_defnatage; + if (nat != NULL) { + rval = fr_natout(fin, nat, natadd, nflags); + if (rval == 1) { + MUTEX_ENTER(&nat->nat_lock); + nat->nat_ref++; + MUTEX_EXIT(&nat->nat_lock); + fin->fin_nat = nat; } - nat->nat_bytes += ip->ip_len; - nat->nat_pkts++; - MUTEX_EXIT(&nat->nat_lock); + } else + rval = natfailed; + RWLOCK_EXIT(&ipf_nat); - /* - * Fix up checksums, not by recalculating them, but - * simply computing adjustments. - */ + if (rval == -1) { + if (passp != NULL) + *passp = FR_BLOCK; + fin->fin_flx |= FI_BADNAT; + } + fin->fin_ifp = sifp; + return rval; +} + +/* ------------------------------------------------------------------------ */ +/* Function: fr_natout */ +/* Returns: int - -1 == packet failed NAT checks so block it, */ +/* 1 == packet was successfully translated. */ +/* Parameters: fin(I) - pointer to packet information */ +/* nat(I) - pointer to NAT structure */ +/* natadd(I) - flag indicating if it is safe to add frag cache */ +/* nflags(I) - NAT flags set for this packet */ +/* */ +/* Translate a packet coming "out" on an interface. */ +/* ------------------------------------------------------------------------ */ +int fr_natout(fin, nat, natadd, nflags) +fr_info_t *fin; +nat_t *nat; +int natadd; +u_32_t nflags; +{ + icmphdr_t *icmp; + u_short *csump; + tcphdr_t *tcp; + ipnat_t *np; + int i; + + tcp = NULL; + icmp = NULL; + csump = NULL; + np = nat->nat_ptr; + + if ((natadd != 0) && (fin->fin_flx & FI_FRAG) && (np != NULL)) + (void) fr_nat_newfrag(fin, 0, nat); + + MUTEX_ENTER(&nat->nat_lock); + nat->nat_bytes[1] += fin->fin_plen; + nat->nat_pkts[1]++; + MUTEX_EXIT(&nat->nat_lock); + + /* + * Fix up checksums, not by recalculating them, but + * simply computing adjustments. + * This is only done for STREAMS based IP implementations where the + * checksum has already been calculated by IP. In all other cases, + * IPFilter is called before the checksum needs calculating so there + * is no call to modify whatever is in the header now. + */ + if (fin->fin_v == 4) { if (nflags == IPN_ICMPERR) { u_32_t s1, s2, sumd; s1 = LONG_SUM(ntohl(fin->fin_saddr)); s2 = LONG_SUM(ntohl(nat->nat_outip.s_addr)); CALC_SUMD(s1, s2, sumd); - fix_outcksum(fin, &ip->ip_sum, sumd); + fix_outcksum(fin, &fin->fin_ip->ip_sum, sumd); } -#if (SOLARIS || defined(__sgi)) || !defined(_KERNEL) +#if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || defined(linux) else { if (nat->nat_dir == NAT_OUTBOUND) - fix_outcksum(fin, &ip->ip_sum, nat->nat_ipsumd); + fix_outcksum(fin, &fin->fin_ip->ip_sum, + nat->nat_ipsumd); else - fix_incksum(fin, &ip->ip_sum, nat->nat_ipsumd); + fix_incksum(fin, &fin->fin_ip->ip_sum, + nat->nat_ipsumd); } #endif - /* - * Only change the packet contents, not what is filtered upon. - */ - ip->ip_src = nat->nat_outip; - - if ((fin->fin_off == 0) && !(fin->fin_fl & FI_SHORT)) { + } - if ((nat->nat_outport != 0) && (tcp != NULL)) { - tcp->th_sport = nat->nat_outport; - fin->fin_data[0] = ntohs(tcp->th_sport); - } + if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) { + if ((nat->nat_outport != 0) && (nflags & IPN_TCPUDP)) { + tcp = fin->fin_dp; - if (fin->fin_p == IPPROTO_TCP) { - csump = &tcp->th_sum; - MUTEX_ENTER(&nat->nat_lock); - fr_tcp_age(&nat->nat_age, - nat->nat_tcpstate, fin, 1, 0); - if (nat->nat_age < fr_defnaticmpage) - nat->nat_age = fr_defnaticmpage; -#ifdef LARGE_NAT - else if ((!np || !np->in_age[1]) && - (nat->nat_age > fr_defnatage)) - nat->nat_age = fr_defnatage; -#endif - /* - * Increase this because we may have - * "keep state" following this too and - * packet storms can occur if this is - * removed too quickly. - */ - if (nat->nat_age == fr_tcpclosed) - nat->nat_age = fr_tcplastack; - - /* - * Do a MSS CLAMPING on a SYN packet, - * only deal IPv4 for now. - */ - if (nat->nat_mssclamp && - (tcp->th_flags & TH_SYN) != 0) - nat_mssclamp(tcp, nat->nat_mssclamp, - fin, csump); - - MUTEX_EXIT(&nat->nat_lock); - } else if (fin->fin_p == IPPROTO_UDP) { - udphdr_t *udp = (udphdr_t *)tcp; - - if (udp->uh_sum) - csump = &udp->uh_sum; - } + tcp->th_sport = nat->nat_outport; + fin->fin_data[0] = ntohs(nat->nat_outport); + } - if (csump) { - if (nat->nat_dir == NAT_OUTBOUND) - fix_outcksum(fin, csump, - nat->nat_sumd[1]); - else - fix_incksum(fin, csump, - nat->nat_sumd[1]); - } + if ((nat->nat_outport != 0) && (nflags & IPN_ICMPQUERY)) { + icmp = fin->fin_dp; + icmp->icmp_id = nat->nat_outport; } - if (np && (np->in_apr != NULL) && (np->in_dport == 0 || - (tcp != NULL && dport == np->in_dport))) { - i = appr_check(ip, fin, nat); - if (i == 0) - i = 1; - else if (i == -1) - nat->nat_drop[1]++; - } else - i = 1; - ATOMIC_INCL(nat_stats.ns_mapped[1]); - RWLOCK_EXIT(&ipf_nat); /* READ */ - fin->fin_ifp = sifp; - return i; + csump = nat_proto(fin, nat, nflags); } - RWLOCK_EXIT(&ipf_nat); /* READ/WRITE */ - fin->fin_ifp = sifp; - return 0; + + fin->fin_ip->ip_src = nat->nat_outip; + + nat_update(fin, nat, np); + + /* + * The above comments do not hold for layer 4 (or higher) checksums... + */ + if (csump != NULL) { + if (nat->nat_dir == NAT_OUTBOUND) + fix_outcksum(fin, csump, nat->nat_sumd[1]); + else + fix_incksum(fin, csump, nat->nat_sumd[1]); + } +#ifdef IPFILTER_SYNC + ipfsync_update(SMC_NAT, fin, nat->nat_sync); +#endif + /* ------------------------------------------------------------- */ + /* A few quick notes: */ + /* Following are test conditions prior to calling the */ + /* appr_check routine. */ + /* */ + /* A NULL tcp indicates a non TCP/UDP packet. When dealing */ + /* with a redirect rule, we attempt to match the packet's */ + /* source port against in_dport, otherwise we'd compare the */ + /* packet's destination. */ + /* ------------------------------------------------------------- */ + if ((np != NULL) && (np->in_apr != NULL)) { + i = appr_check(fin, nat); + if (i == 0) + i = 1; + } else + i = 1; + ATOMIC_INCL(nat_stats.ns_mapped[1]); + fin->fin_flx |= FI_NATED; + return i; } -/* - * Packets coming in from the external interface go through this. - * Here, the destination address requires alteration, if anything. - */ -int ip_natin(ip, fin) -ip_t *ip; +/* ------------------------------------------------------------------------ */ +/* Function: fr_checknatin */ +/* Returns: int - -1 == packet failed NAT checks so block it, */ +/* 0 == no packet translation occurred, */ +/* 1 == packet was successfully translated. */ +/* Parameters: fin(I) - pointer to packet information */ +/* passp(I) - pointer to filtering result flags */ +/* */ +/* Check to see if an incoming packet should be changed. ICMP packets are */ +/* first checked to see if they match an existing entry (if an error), */ +/* otherwise a search of the current NAT table is made. If neither results */ +/* in a match then a search for a matching NAT rule is made. Create a new */ +/* NAT entry if a we matched a NAT rule. Lastly, actually change the */ +/* packet header(s) as required. */ +/* ------------------------------------------------------------------------ */ +int fr_checknatin(fin, passp) fr_info_t *fin; +u_32_t *passp; { - register struct in_addr src; - register struct in_addr in; - register ipnat_t *np; - u_short sport = 0, dport = 0, *csump = NULL; - u_int nflags = 0, natadd = 1, hv, msk; - struct ifnet *ifp = fin->fin_ifp; - tcphdr_t *tcp = NULL; - int i, icmpset = 0; + u_int nflags, natadd; + int rval, natfailed; + struct ifnet *ifp; + struct in_addr in; + icmphdr_t *icmp; + tcphdr_t *tcp; + u_short dport; + ipnat_t *np; nat_t *nat; u_32_t iph; - if ((nat_list == NULL) || (ip->ip_v != 4) || (fr_nat_lock)) + if (nat_stats.ns_rules == 0 || fr_nat_lock != 0) return 0; - if ((fin->fin_off == 0) && !(fin->fin_fl & FI_SHORT)) { - if (fin->fin_p == IPPROTO_TCP) + tcp = NULL; + icmp = NULL; + dport = 0; + natadd = 1; + nflags = 0; + natfailed = 0; + ifp = fin->fin_ifp; + + if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) { + switch (fin->fin_p) + { + case IPPROTO_TCP : nflags = IPN_TCP; - else if (fin->fin_p == IPPROTO_UDP) + break; + case IPPROTO_UDP : nflags = IPN_UDP; + break; + case IPPROTO_ICMP : + icmp = fin->fin_dp; + + /* + * This is an incoming packet, so the destination is + * the icmp_id and the source port equals 0 + */ + if (nat_icmpquerytype4(icmp->icmp_type)) { + nflags = IPN_ICMPQUERY; + dport = icmp->icmp_id; + } break; + default : + break; + } + if ((nflags & IPN_TCPUDP)) { - tcp = (tcphdr_t *)fin->fin_dp; - sport = tcp->th_sport; + tcp = fin->fin_dp; dport = tcp->th_dport; } } in = fin->fin_dst; - /* make sure the source address is to be redirected */ - src = fin->fin_src; READ_ENTER(&ipf_nat); - if ((fin->fin_p == IPPROTO_ICMP) && - (nat = nat_icmp(ip, fin, &nflags, NAT_INBOUND))) - icmpset = 1; - else if ((fin->fin_fl & FI_FRAG) && - (nat = ipfr_nat_knownfrag(ip, fin))) + if ((fin->fin_p == IPPROTO_ICMP) && !(nflags & IPN_ICMPQUERY) && + (nat = nat_icmperror(fin, &nflags, NAT_INBOUND))) + /*EMPTY*/; + else if ((fin->fin_flx & FI_FRAG) && (nat = fr_nat_knownfrag(fin))) natadd = 0; - else if ((nat = nat_inlookup(fin, nflags|FI_WILDP|FI_WILDA, - (u_int)fin->fin_p, fin->fin_src, in, 0))) { + else if ((nat = nat_inlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p, + fin->fin_src, in))) { nflags = nat->nat_flags; - if ((nflags & (FI_W_SPORT|FI_W_DPORT)) != 0) { - if ((nat->nat_oport != sport) && (nflags & FI_W_DPORT)) - nat->nat_oport = sport; - if ((nat->nat_outport != dport) && - (nflags & FI_W_SPORT)) - nat->nat_outport = dport; - nat->nat_flags &= ~(FI_W_SPORT|FI_W_DPORT); - nflags = nat->nat_flags; - nat_stats.ns_wilds--; - } } else { - RWLOCK_EXIT(&ipf_nat); + u_32_t hv, msk, rmsk; + RWLOCK_EXIT(&ipf_nat); + rmsk = rdr_masks; msk = 0xffffffff; - i = 32; - WRITE_ENTER(&ipf_nat); /* * If there is no current entry in the nat table for this IP#, @@ -2612,152 +3970,288 @@ maskloop: iph = in.s_addr & htonl(msk); hv = NAT_HASH_FN(iph, 0, ipf_rdrrules_sz); for (np = rdr_rules[hv]; np; np = np->in_rnext) { - if ((np->in_ifp && (np->in_ifp != ifp)) || - (np->in_p && (np->in_p != fin->fin_p))) + if (np->in_ifps[0] && (np->in_ifps[0] != ifp)) + continue; + if (np->in_v != fin->fin_v) + continue; + if (np->in_p && (np->in_p != fin->fin_p)) continue; - if ((np->in_flags & IPN_RF) && - !(nflags & np->in_flags)) + if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags)) continue; if (np->in_flags & IPN_FILTER) { - if (!nat_match(fin, np, ip)) + if (!nat_match(fin, np)) + continue; + } else { + if ((in.s_addr & np->in_outmsk) != np->in_outip) + continue; + if (np->in_pmin && + ((ntohs(np->in_pmax) < ntohs(dport)) || + (ntohs(dport) < ntohs(np->in_pmin)))) + continue; + } + + if (*np->in_plabel != '\0') { + if (!appr_ok(fin, tcp, np)) { continue; - } else if ((in.s_addr & np->in_outmsk) != np->in_outip) - continue; - if ((!np->in_pmin || (np->in_flags & IPN_FILTER) || - ((ntohs(np->in_pmax) >= ntohs(dport)) && - (ntohs(dport) >= ntohs(np->in_pmin))))) - if ((nat = nat_new(fin, ip, np, NULL, nflags, - NAT_INBOUND))) { - np->in_hits++; - break; } + } + + nat = nat_new(fin, np, NULL, nflags, NAT_INBOUND); + if (nat != NULL) { + np->in_hits++; + break; + } else + natfailed = -1; } - if ((np == NULL) && (i > 0)) { - do { - i--; + if ((np == NULL) && (rmsk != 0)) { + while (rmsk) { msk <<= 1; - } while ((i >= 0) && ((rdr_masks & (1 << i)) == 0)); - if (i >= 0) + if (rmsk & 0x80000000) + break; + rmsk <<= 1; + } + if (rmsk != 0) { + rmsk <<= 1; goto maskloop; + } } MUTEX_DOWNGRADE(&ipf_nat); } + if (nat != NULL) { + rval = fr_natin(fin, nat, natadd, nflags); + if (rval == 1) { + MUTEX_ENTER(&nat->nat_lock); + nat->nat_ref++; + MUTEX_EXIT(&nat->nat_lock); + fin->fin_nat = nat; + fin->fin_state = nat->nat_state; + } + } else + rval = natfailed; + RWLOCK_EXIT(&ipf_nat); + + if (rval == -1) { + if (passp != NULL) + *passp = FR_BLOCK; + fin->fin_flx |= FI_BADNAT; + } + return rval; +} - /* - * NOTE: ipf_nat must now only be held as a read lock - */ - if (nat) { - np = nat->nat_ptr; - fin->fin_fr = nat->nat_fr; - if (natadd && (fin->fin_fl & FI_FRAG) && np) - ipfr_nat_newfrag(ip, fin, nat); - if (np && (np->in_apr != NULL) && (np->in_dport == 0 || - (tcp != NULL && sport == np->in_dport))) { - i = appr_check(ip, fin, nat); + +/* ------------------------------------------------------------------------ */ +/* Function: fr_natin */ +/* Returns: int - -1 == packet failed NAT checks so block it, */ +/* 1 == packet was successfully translated. */ +/* Parameters: fin(I) - pointer to packet information */ +/* nat(I) - pointer to NAT structure */ +/* natadd(I) - flag indicating if it is safe to add frag cache */ +/* nflags(I) - NAT flags set for this packet */ +/* Locks Held: ipf_nat (READ) */ +/* */ +/* Translate a packet coming "in" on an interface. */ +/* ------------------------------------------------------------------------ */ +int fr_natin(fin, nat, natadd, nflags) +fr_info_t *fin; +nat_t *nat; +int natadd; +u_32_t nflags; +{ + icmphdr_t *icmp; + u_short *csump; + tcphdr_t *tcp; + ipnat_t *np; + int i; + + tcp = NULL; + csump = NULL; + np = nat->nat_ptr; + fin->fin_fr = nat->nat_fr; + + if (np != NULL) { + if ((natadd != 0) && (fin->fin_flx & FI_FRAG)) + (void) fr_nat_newfrag(fin, 0, nat); + + /* ------------------------------------------------------------- */ + /* A few quick notes: */ + /* Following are test conditions prior to calling the */ + /* appr_check routine. */ + /* */ + /* A NULL tcp indicates a non TCP/UDP packet. When dealing */ + /* with a map rule, we attempt to match the packet's */ + /* source port against in_dport, otherwise we'd compare the */ + /* packet's destination. */ + /* ------------------------------------------------------------- */ + if (np->in_apr != NULL) { + i = appr_check(fin, nat); if (i == -1) { - nat->nat_drop[0]++; - RWLOCK_EXIT(&ipf_nat); - return i; + return -1; } } + } - MUTEX_ENTER(&nat->nat_lock); - if (fin->fin_p != IPPROTO_TCP) { - if (np && np->in_age[0]) - nat->nat_age = np->in_age[0]; - else if (!icmpset && (fin->fin_p == IPPROTO_ICMP)) - nat->nat_age = fr_defnaticmpage; - else - nat->nat_age = fr_defnatage; +#ifdef IPFILTER_SYNC + ipfsync_update(SMC_NAT, fin, nat->nat_sync); +#endif + + MUTEX_ENTER(&nat->nat_lock); + nat->nat_bytes[0] += fin->fin_plen; + nat->nat_pkts[0]++; + MUTEX_EXIT(&nat->nat_lock); + + fin->fin_ip->ip_dst = nat->nat_inip; + fin->fin_fi.fi_daddr = nat->nat_inip.s_addr; + if (nflags & IPN_TCPUDP) + tcp = fin->fin_dp; + + /* + * Fix up checksums, not by recalculating them, but + * simply computing adjustments. + * Why only do this for some platforms on inbound packets ? + * Because for those that it is done, IP processing is yet to happen + * and so the IPv4 header checksum has not yet been evaluated. + * Perhaps it should always be done for the benefit of things like + * fast forwarding (so that it doesn't need to be recomputed) but with + * header checksum offloading, perhaps it is a moot point. + */ +#if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \ + defined(__osf__) || defined(linux) + if (nat->nat_dir == NAT_OUTBOUND) + fix_incksum(fin, &fin->fin_ip->ip_sum, nat->nat_ipsumd); + else + fix_outcksum(fin, &fin->fin_ip->ip_sum, nat->nat_ipsumd); +#endif + + if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) { + if ((nat->nat_inport != 0) && (nflags & IPN_TCPUDP)) { + tcp->th_dport = nat->nat_inport; + fin->fin_data[1] = ntohs(nat->nat_inport); } - nat->nat_bytes += ip->ip_len; - nat->nat_pkts++; - MUTEX_EXIT(&nat->nat_lock); - /* - * Fix up checksums, not by recalculating them, but - * simply computing adjustments. - */ + + if ((nat->nat_inport != 0) && (nflags & IPN_ICMPQUERY)) { + icmp = fin->fin_dp; + + icmp->icmp_id = nat->nat_inport; + } + + csump = nat_proto(fin, nat, nflags); + } + + nat_update(fin, nat, np); + + /* + * The above comments do not hold for layer 4 (or higher) checksums... + */ + if (csump != NULL) { if (nat->nat_dir == NAT_OUTBOUND) - fix_incksum(fin, &ip->ip_sum, nat->nat_ipsumd); + fix_incksum(fin, csump, nat->nat_sumd[0]); else - fix_outcksum(fin, &ip->ip_sum, nat->nat_ipsumd); + fix_outcksum(fin, csump, nat->nat_sumd[0]); + } + ATOMIC_INCL(nat_stats.ns_mapped[0]); + fin->fin_flx |= FI_NATED; + if (np != NULL && np->in_tag.ipt_num[0] != 0) + fin->fin_nattag = &np->in_tag; + return 1; +} + - ip->ip_dst = nat->nat_inip; - fin->fin_fi.fi_daddr = nat->nat_inip.s_addr; +/* ------------------------------------------------------------------------ */ +/* Function: nat_proto */ +/* Returns: u_short* - pointer to transport header checksum to update, */ +/* NULL if the transport protocol is not recognised */ +/* as needing a checksum update. */ +/* Parameters: fin(I) - pointer to packet information */ +/* nat(I) - pointer to NAT structure */ +/* nflags(I) - NAT flags set for this packet */ +/* */ +/* Return the pointer to the checksum field for each protocol so understood.*/ +/* If support for making other changes to a protocol header is required, */ +/* that is not strictly 'address' translation, such as clamping the MSS in */ +/* TCP down to a specific value, then do it from here. */ +/* ------------------------------------------------------------------------ */ +u_short *nat_proto(fin, nat, nflags) +fr_info_t *fin; +nat_t *nat; +u_int nflags; +{ + icmphdr_t *icmp; + u_short *csump; + tcphdr_t *tcp; + udphdr_t *udp; - if ((fin->fin_off == 0) && !(fin->fin_fl & FI_SHORT)) { + csump = NULL; + if (fin->fin_out == 0) { + fin->fin_rev = (nat->nat_dir == NAT_OUTBOUND); + } else { + fin->fin_rev = (nat->nat_dir == NAT_INBOUND); + } - if ((nat->nat_inport != 0) && (tcp != NULL)) { - tcp->th_dport = nat->nat_inport; - fin->fin_data[1] = ntohs(tcp->th_dport); - } + switch (fin->fin_p) + { + case IPPROTO_TCP : + tcp = fin->fin_dp; - if (fin->fin_p == IPPROTO_TCP) { - csump = &tcp->th_sum; - MUTEX_ENTER(&nat->nat_lock); - fr_tcp_age(&nat->nat_age, - nat->nat_tcpstate, fin, 0, 0); - if (nat->nat_age < fr_defnaticmpage) - nat->nat_age = fr_defnaticmpage; -#ifdef LARGE_NAT - else if ((!np || !np->in_age[0]) && - (nat->nat_age > fr_defnatage)) - nat->nat_age = fr_defnatage; -#endif - /* - * Increase this because we may have - * "keep state" following this too and - * packet storms can occur if this is - * removed too quickly. - */ - if (nat->nat_age == fr_tcpclosed) - nat->nat_age = fr_tcplastack; - /* - * Do a MSS CLAMPING on a SYN packet, - * only deal IPv4 for now. - */ - if (nat->nat_mssclamp && - (tcp->th_flags & TH_SYN) != 0) - nat_mssclamp(tcp, nat->nat_mssclamp, - fin, csump); - - MUTEX_EXIT(&nat->nat_lock); - } else if (fin->fin_p == IPPROTO_UDP) { - udphdr_t *udp = (udphdr_t *)tcp; - - if (udp->uh_sum) - csump = &udp->uh_sum; - } + csump = &tcp->th_sum; - if (csump) { - if (nat->nat_dir == NAT_OUTBOUND) - fix_incksum(fin, csump, - nat->nat_sumd[0]); - else - fix_outcksum(fin, csump, - nat->nat_sumd[0]); - } + /* + * Do a MSS CLAMPING on a SYN packet, + * only deal IPv4 for now. + */ + if ((nat->nat_mssclamp != 0) && (tcp->th_flags & TH_SYN) != 0) + nat_mssclamp(tcp, nat->nat_mssclamp, fin, csump); + + break; + + case IPPROTO_UDP : + udp = fin->fin_dp; + + if (udp->uh_sum) + csump = &udp->uh_sum; + break; + + case IPPROTO_ICMP : + icmp = fin->fin_dp; + + if ((nflags & IPN_ICMPQUERY) != 0) { + if (icmp->icmp_cksum != 0) + csump = &icmp->icmp_cksum; } - ATOMIC_INCL(nat_stats.ns_mapped[0]); - RWLOCK_EXIT(&ipf_nat); /* READ */ - return 1; + break; } - RWLOCK_EXIT(&ipf_nat); /* READ/WRITE */ - return 0; + return csump; } -/* - * Free all memory used by NAT structures allocated at runtime. - */ -void ip_natunload() +/* ------------------------------------------------------------------------ */ +/* Function: fr_natunload */ +/* Returns: Nil */ +/* Parameters: Nil */ +/* */ +/* Free all memory used by NAT structures allocated at runtime. */ +/* ------------------------------------------------------------------------ */ +void fr_natunload() { - WRITE_ENTER(&ipf_nat); + ipftq_t *ifq, *ifqnext; + (void) nat_clearlist(); (void) nat_flushtable(); - RWLOCK_EXIT(&ipf_nat); + + /* + * Proxy timeout queues are not cleaned here because although they + * exist on the NAT list, appr_unload is called after fr_natunload + * and the proxies actually are responsible for them being created. + * Should the proxy timeouts have their own list? There's no real + * justification as this is the only complication. + */ + for (ifq = nat_utqe; ifq != NULL; ifq = ifqnext) { + ifqnext = ifq->ifq_next; + if (((ifq->ifq_flags & IFQF_PROXY) == 0) && + (fr_deletetimeoutqueue(ifq) == 0)) + fr_freetimeoutqueue(ifq); + } if (nat_table[0] != NULL) { KFREES(nat_table[0], sizeof(nat_t *) * ipf_nattable_sz); @@ -2779,72 +4273,152 @@ void ip_natunload() KFREES(maptable, sizeof(hostmap_t *) * ipf_hostmap_sz); maptable = NULL; } + if (nat_stats.ns_bucketlen[0] != NULL) { + KFREES(nat_stats.ns_bucketlen[0], + sizeof(u_long *) * ipf_nattable_sz); + nat_stats.ns_bucketlen[0] = NULL; + } + if (nat_stats.ns_bucketlen[1] != NULL) { + KFREES(nat_stats.ns_bucketlen[1], + sizeof(u_long *) * ipf_nattable_sz); + nat_stats.ns_bucketlen[1] = NULL; + } + + if (fr_nat_maxbucket_reset == 1) + fr_nat_maxbucket = 0; + + if (fr_nat_init == 1) { + fr_nat_init = 0; + fr_sttab_destroy(nat_tqb); + + RW_DESTROY(&ipf_natfrag); + RW_DESTROY(&ipf_nat); + + MUTEX_DESTROY(&ipf_nat_new); + MUTEX_DESTROY(&ipf_natio); + + MUTEX_DESTROY(&nat_udptq.ifq_lock); + MUTEX_DESTROY(&nat_icmptq.ifq_lock); + MUTEX_DESTROY(&nat_iptq.ifq_lock); + } } -/* - * Slowly expire held state for NAT entries. Timeouts are set in - * expectation of this being called twice per second. - */ -void ip_natexpire() +/* ------------------------------------------------------------------------ */ +/* Function: fr_natexpire */ +/* Returns: Nil */ +/* Parameters: Nil */ +/* */ +/* Check all of the timeout queues for entries at the top which need to be */ +/* expired. */ +/* ------------------------------------------------------------------------ */ +void fr_natexpire() { - register struct nat *nat, **natp; -#if defined(_KERNEL) && !SOLARIS + ipftq_t *ifq, *ifqnext; + ipftqent_t *tqe, *tqn; +#if defined(_KERNEL) && !defined(MENTAT) && defined(USE_SPL) int s; #endif + int i; SPL_NET(s); WRITE_ENTER(&ipf_nat); - for (natp = &nat_instances; (nat = *natp); ) { - nat->nat_age--; - if (nat->nat_age) { - natp = &nat->nat_next; - continue; + for (ifq = nat_tqb, i = 0; ifq != NULL; ifq = ifq->ifq_next) { + for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) { + if (tqe->tqe_die > fr_ticks) + break; + tqn = tqe->tqe_next; + nat_delete(tqe->tqe_parent, NL_EXPIRE); + } + } + + for (ifq = nat_utqe; ifq != NULL; ifq = ifqnext) { + ifqnext = ifq->ifq_next; + + for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) { + if (tqe->tqe_die > fr_ticks) + break; + tqn = tqe->tqe_next; + nat_delete(tqe->tqe_parent, NL_EXPIRE); + } + } + + for (ifq = nat_utqe; ifq != NULL; ifq = ifqnext) { + ifqnext = ifq->ifq_next; + + if (((ifq->ifq_flags & IFQF_DELETE) != 0) && + (ifq->ifq_ref == 0)) { + fr_freetimeoutqueue(ifq); } - *natp = nat->nat_next; -#ifdef IPFILTER_LOG - nat_log(nat, NL_EXPIRE); -#endif - nat_delete(nat); - nat_stats.ns_expire++; } + RWLOCK_EXIT(&ipf_nat); SPL_X(s); } -/* - */ -void ip_natsync(ifp) +/* ------------------------------------------------------------------------ */ +/* Function: fr_natsync */ +/* Returns: Nil */ +/* Parameters: ifp(I) - pointer to network interface */ +/* */ +/* Walk through all of the currently active NAT sessions, looking for those */ +/* which need to have their translated address updated. */ +/* ------------------------------------------------------------------------ */ +void fr_natsync(ifp) void *ifp; { - register ipnat_t *n; - register nat_t *nat; - register u_32_t sum1, sum2, sumd; + u_32_t sum1, sum2, sumd; struct in_addr in; - ipnat_t *np; + ipnat_t *n; + nat_t *nat; void *ifp2; -#if defined(_KERNEL) && !SOLARIS +#if defined(_KERNEL) && !defined(MENTAT) && defined(USE_SPL) int s; #endif + if (fr_running <= 0) + return; + /* * Change IP addresses for NAT sessions for any protocol except TCP - * since it will break the TCP connection anyway. + * since it will break the TCP connection anyway. The only rules + * which will get changed are those which are "map ... -> 0/32", + * where the rule specifies the address is taken from the interface. */ SPL_NET(s); WRITE_ENTER(&ipf_nat); - for (nat = nat_instances; nat; nat = nat->nat_next) - if (((ifp == NULL) || (ifp == nat->nat_ifp)) && - !(nat->nat_flags & IPN_TCP) && (np = nat->nat_ptr) && - (np->in_outmsk == 0xffffffff) && !np->in_nip) { - ifp2 = nat->nat_ifp; + + if (fr_running <= 0) { + RWLOCK_EXIT(&ipf_nat); + return; + } + + for (nat = nat_instances; nat; nat = nat->nat_next) { + if ((nat->nat_flags & IPN_TCP) != 0) + continue; + n = nat->nat_ptr; + if ((n == NULL) || + (n->in_outip != 0) || (n->in_outmsk != 0xffffffff)) + continue; + if (((ifp == NULL) || (ifp == nat->nat_ifps[0]) || + (ifp == nat->nat_ifps[1]))) { + nat->nat_ifps[0] = GETIFP(nat->nat_ifnames[0], 4); + if (nat->nat_ifnames[1][0] != '\0') { + nat->nat_ifps[1] = GETIFP(nat->nat_ifnames[1], + 4); + } else + nat->nat_ifps[1] = nat->nat_ifps[0]; + ifp2 = nat->nat_ifps[0]; + if (ifp2 == NULL) + continue; + /* * Change the map-to address to be the same as the * new one. */ sum1 = nat->nat_outip.s_addr; - if (fr_ifpaddr(4, ifp2, &in) != -1) + if (fr_ifpaddr(4, FRI_NORMAL, ifp2, &in, NULL) != -1) nat->nat_outip = in; sum2 = nat->nat_outip.s_addr; @@ -2862,23 +4436,74 @@ void *ifp; nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16); nat->nat_sumd[1] = nat->nat_sumd[0]; } + } - for (n = nat_list; (n != NULL); n = n->in_next) - if (n->in_ifp == ifp) { - n->in_ifp = (void *)GETUNIT(n->in_ifname, 4); - if (!n->in_ifp) - n->in_ifp = (void *)-1; - } + for (n = nat_list; (n != NULL); n = n->in_next) { + if ((ifp == NULL) || (n->in_ifps[0] == ifp)) + n->in_ifps[0] = fr_resolvenic(n->in_ifnames[0], 4); + if ((ifp == NULL) || (n->in_ifps[1] == ifp)) + n->in_ifps[1] = fr_resolvenic(n->in_ifnames[1], 4); + } RWLOCK_EXIT(&ipf_nat); SPL_X(s); } -#ifdef IPFILTER_LOG +/* ------------------------------------------------------------------------ */ +/* Function: nat_icmpquerytype4 */ +/* Returns: int - 1 == success, 0 == failure */ +/* Parameters: icmptype(I) - ICMP type number */ +/* */ +/* Tests to see if the ICMP type number passed is a query/response type or */ +/* not. */ +/* ------------------------------------------------------------------------ */ +static INLINE int nat_icmpquerytype4(icmptype) +int icmptype; +{ + + /* + * For the ICMP query NAT code, it is essential that both the query + * and the reply match on the NAT rule. Because the NAT structure + * does not keep track of the icmptype, and a single NAT structure + * is used for all icmp types with the same src, dest and id, we + * simply define the replies as queries as well. The funny thing is, + * altough it seems silly to call a reply a query, this is exactly + * as it is defined in the IPv4 specification + */ + + switch (icmptype) + { + + case ICMP_ECHOREPLY: + case ICMP_ECHO: + /* route aedvertisement/solliciation is currently unsupported: */ + /* it would require rewriting the ICMP data section */ + case ICMP_TSTAMP: + case ICMP_TSTAMPREPLY: + case ICMP_IREQ: + case ICMP_IREQREPLY: + case ICMP_MASKREQ: + case ICMP_MASKREPLY: + return 1; + default: + return 0; + } +} + + +/* ------------------------------------------------------------------------ */ +/* Function: nat_log */ +/* Returns: Nil */ +/* Parameters: nat(I) - pointer to NAT structure */ +/* type(I) - type of log entry to create */ +/* */ +/* Creates a NAT log entry. */ +/* ------------------------------------------------------------------------ */ void nat_log(nat, type) struct nat *nat; u_int type; { +#ifdef IPFILTER_LOG # ifndef LARGE_NAT struct ipnat *np; int rulen; @@ -2891,15 +4516,17 @@ u_int type; natl.nl_inip = nat->nat_inip; natl.nl_outip = nat->nat_outip; natl.nl_origip = nat->nat_oip; - natl.nl_bytes = nat->nat_bytes; - natl.nl_pkts = nat->nat_pkts; + natl.nl_bytes[0] = nat->nat_bytes[0]; + natl.nl_bytes[1] = nat->nat_bytes[1]; + natl.nl_pkts[0] = nat->nat_pkts[0]; + natl.nl_pkts[1] = nat->nat_pkts[1]; natl.nl_origport = nat->nat_oport; natl.nl_inport = nat->nat_inport; natl.nl_outport = nat->nat_outport; natl.nl_p = nat->nat_p; natl.nl_type = type; natl.nl_rule = -1; -#ifndef LARGE_NAT +# ifndef LARGE_NAT if (nat->nat_ptr != NULL) { for (rulen = 0, np = nat_list; np; np = np->in_next, rulen++) if (np == nat->nat_ptr) { @@ -2907,29 +4534,202 @@ u_int type; break; } } -#endif +# endif items[0] = &natl; sizes[0] = sizeof(natl); types[0] = 0; (void) ipllog(IPL_LOGNAT, NULL, items, sizes, types, 1); -} #endif +} #if defined(__OpenBSD__) +/* ------------------------------------------------------------------------ */ +/* Function: nat_ifdetach */ +/* Returns: Nil */ +/* Parameters: ifp(I) - pointer to network interface */ +/* */ +/* Compatibility interface for OpenBSD to trigger the correct updating of */ +/* interface references within IPFilter. */ +/* ------------------------------------------------------------------------ */ void nat_ifdetach(ifp) void *ifp; { - frsync(); + frsync(ifp); return; } #endif -/* - * Check for MSS option and clamp it if necessary. - */ +/* ------------------------------------------------------------------------ */ +/* Function: fr_natderef */ +/* Returns: Nil */ +/* Parameters: isp(I) - pointer to pointer to NAT table entry */ +/* */ +/* Decrement the reference counter for this NAT table entry and free it if */ +/* there are no more things using it. */ +/* ------------------------------------------------------------------------ */ +void fr_natderef(natp) +nat_t **natp; +{ + nat_t *nat; + + nat = *natp; + *natp = NULL; + WRITE_ENTER(&ipf_nat); + nat->nat_ref--; + if (nat->nat_ref == 0) + nat_delete(nat, NL_EXPIRE); + RWLOCK_EXIT(&ipf_nat); +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_natclone */ +/* Returns: ipstate_t* - NULL == cloning failed, */ +/* else pointer to new state structure */ +/* Parameters: fin(I) - pointer to packet information */ +/* is(I) - pointer to master state structure */ +/* Write Lock: ipf_nat */ +/* */ +/* Create a "duplcate" state table entry from the master. */ +/* ------------------------------------------------------------------------ */ +static nat_t *fr_natclone(fin, nat) +fr_info_t *fin; +nat_t *nat; +{ + frentry_t *fr; + nat_t *clone; + ipnat_t *np; + + KMALLOC(clone, nat_t *); + if (clone == NULL) + return NULL; + bcopy((char *)nat, (char *)clone, sizeof(*clone)); + + MUTEX_NUKE(&clone->nat_lock); + + clone->nat_flags &= ~SI_CLONE; + clone->nat_flags |= SI_CLONED; + + + if (nat_insert(clone, fin->fin_rev) == -1) { + KFREE(clone); + return NULL; + } + np = clone->nat_ptr; + if (np != NULL) { + if (nat_logging) + nat_log(clone, (u_int)np->in_redir); + np->in_use++; + } + fr = clone->nat_fr; + if (fr != NULL) { + MUTEX_ENTER(&fr->fr_lock); + fr->fr_ref++; + MUTEX_EXIT(&fr->fr_lock); + } + + + /* + * Because the clone is created outside the normal loop of things and + * TCP has special needs in terms of state, initialise the timeout + * state of the new NAT from here. + */ + if (clone->nat_p == IPPROTO_TCP) { + (void) fr_tcp_age(&clone->nat_tqe, fin, nat_tqb, \ + clone->nat_flags); + } +#ifdef IPFILTER_SYNC + clone->nat_sync = ipfsync_new(SMC_NAT, fin, clone); +#endif + if (nat_logging) + nat_log(clone, NL_CLONE); + return clone; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: nat_wildok */ +/* Returns: int - 1 == packet's ports match wildcards */ +/* 0 == packet's ports don't match wildcards */ +/* Parameters: nat(I) - NAT entry */ +/* sport(I) - source port */ +/* dport(I) - destination port */ +/* flags(I) - wildcard flags */ +/* dir(I) - packet direction */ +/* */ +/* Use NAT entry and packet direction to determine which combination of */ +/* wildcard flags should be used. */ +/* ------------------------------------------------------------------------ */ +static INLINE int nat_wildok(nat, sport, dport, flags, dir) +nat_t *nat; +int sport; +int dport; +int flags; +int dir; +{ + /* + * When called by dir is set to + * nat_inlookup NAT_INBOUND (0) + * nat_outlookup NAT_OUTBOUND (1) + * + * We simply combine the packet's direction in dir with the original + * "intended" direction of that NAT entry in nat->nat_dir to decide + * which combination of wildcard flags to allow. + */ + + switch ((dir << 1) | nat->nat_dir) + { + case 3: /* outbound packet / outbound entry */ + if (((nat->nat_inport == sport) || + (flags & SI_W_SPORT)) && + ((nat->nat_oport == dport) || + (flags & SI_W_DPORT))) + return 1; + break; + case 2: /* outbound packet / inbound entry */ + if (((nat->nat_outport == sport) || + (flags & SI_W_DPORT)) && + ((nat->nat_oport == dport) || + (flags & SI_W_SPORT))) + return 1; + break; + case 1: /* inbound packet / outbound entry */ + if (((nat->nat_oport == sport) || + (flags & SI_W_DPORT)) && + ((nat->nat_outport == dport) || + (flags & SI_W_SPORT))) + return 1; + break; + case 0: /* inbound packet / inbound entry */ + if (((nat->nat_oport == sport) || + (flags & SI_W_SPORT)) && + ((nat->nat_outport == dport) || + (flags & SI_W_DPORT))) + return 1; + break; + default: + break; + } + + return(0); +} + + +/* ------------------------------------------------------------------------ */ +/* Function: nat_mssclamp */ +/* Returns: Nil */ +/* Parameters: tcp(I) - pointer to TCP header */ +/* maxmss(I) - value to clamp the TCP MSS to */ +/* fin(I) - pointer to packet information */ +/* csump(I) - pointer to TCP checksum */ +/* */ +/* Check for MSS option and clamp it if necessary. If found and changed, */ +/* then the TCP header checksum will be updated to reflect the change in */ +/* the MSS. */ +/* ------------------------------------------------------------------------ */ static void nat_mssclamp(tcp, maxmss, fin, csump) tcphdr_t *tcp; u_32_t maxmss; @@ -2939,9 +4739,8 @@ u_short *csump; u_char *cp, *ep, opt; int hlen, advance; u_32_t mss, sumd; - u_short v; - hlen = tcp->th_off << 2; + hlen = TCP_OFF(tcp) << 2; if (hlen > sizeof(*tcp)) { cp = (u_char *)tcp + sizeof(*tcp); ep = (u_char *)tcp + hlen; @@ -2954,21 +4753,21 @@ u_short *csump; cp++; continue; } - - if (&cp[1] >= ep) + + if (cp + 1 >= ep) break; advance = cp[1]; - if (&cp[advance] > ep) + if ((cp + advance > ep) || (advance <= 0)) break; - switch (opt) { + switch (opt) + { case TCPOPT_MAXSEG: if (advance != 4) break; - bcopy(&cp[2], &v, sizeof(v)); - mss = ntohs(v); + mss = cp[2] * 256 + cp[3]; if (mss > maxmss) { - v = htons(maxmss); - bcopy(&v, &cp[2], sizeof(v)); + cp[2] = maxmss / 256; + cp[3] = maxmss & 0xff; CALC_SUMD(mss, maxmss, sumd); fix_outcksum(fin, csump, sumd); } @@ -2977,8 +4776,60 @@ u_short *csump; /* ignore unknown options */ break; } - - cp += advance; - } - } -} + + cp += advance; + } + } +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_setnatqueue */ +/* Returns: Nil */ +/* Parameters: nat(I)- pointer to NAT structure */ +/* rev(I) - forward(0) or reverse(1) direction */ +/* Locks: ipf_nat (read or write) */ +/* */ +/* Put the NAT entry on its default queue entry, using rev as a helped in */ +/* determining which queue it should be placed on. */ +/* ------------------------------------------------------------------------ */ +void fr_setnatqueue(nat, rev) +nat_t *nat; +int rev; +{ + ipftq_t *oifq, *nifq; + + if (nat->nat_ptr != NULL) + nifq = nat->nat_ptr->in_tqehead[rev]; + else + nifq = NULL; + + if (nifq == NULL) { + switch (nat->nat_p) + { + case IPPROTO_UDP : + nifq = &nat_udptq; + break; + case IPPROTO_ICMP : + nifq = &nat_icmptq; + break; + case IPPROTO_TCP : + nifq = nat_tqb + nat->nat_tqe.tqe_state[rev]; + break; + default : + nifq = &nat_iptq; + break; + } + } + + oifq = nat->nat_tqe.tqe_ifq; + /* + * If it's currently on a timeout queue, move it from one queue to + * another, else put it on the end of the newly determined queue. + */ + if (oifq != NULL) + fr_movequeue(&nat->nat_tqe, oifq, nifq); + else + fr_queueappend(&nat->nat_tqe, nifq, nat); + return; +} diff --git a/sys/contrib/ipfilter/netinet/ip_nat.h b/sys/contrib/ipfilter/netinet/ip_nat.h index bdadb60..a90a792 100644 --- a/sys/contrib/ipfilter/netinet/ip_nat.h +++ b/sys/contrib/ipfilter/netinet/ip_nat.h @@ -1,69 +1,77 @@ +/* $FreeBSD$ */ + /* - * Copyright (C) 1995-2001 by Darren Reed. + * Copyright (C) 1995-2001, 2003 by Darren Reed. * * See the IPFILTER.LICENCE file for details on licencing. * * @(#)ip_nat.h 1.5 2/4/96 - * $Id: ip_nat.h,v 2.17.2.14 2000/11/18 03:58:04 darrenr Exp $ * $FreeBSD$ + * Id: ip_nat.h,v 2.90.2.9 2005/03/28 11:09:55 darrenr Exp */ #ifndef __IP_NAT_H__ #define __IP_NAT_H__ #ifndef SOLARIS -#define SOLARIS (defined(sun) && (defined(__svr4__) || defined(__SVR4))) +#define SOLARIS (defined(sun) && (defined(__svr4__) || defined(__SVR4))) #endif #if defined(__STDC__) || defined(__GNUC__) -#define SIOCADNAT _IOW('r', 60, struct ipnat *) -#define SIOCRMNAT _IOW('r', 61, struct ipnat *) -#define SIOCGNATS _IOWR('r', 62, struct natstat *) -#define SIOCGNATL _IOWR('r', 63, struct natlookup *) +#define SIOCADNAT _IOW('r', 60, struct ipfobj) +#define SIOCRMNAT _IOW('r', 61, struct ipfobj) +#define SIOCGNATS _IOWR('r', 62, struct ipfobj) +#define SIOCGNATL _IOWR('r', 63, struct ipfobj) +#define SIOCPROXY _IOWR('r', 64, struct ap_control) #else -#define SIOCADNAT _IOW(r, 60, struct ipnat *) -#define SIOCRMNAT _IOW(r, 61, struct ipnat *) -#define SIOCGNATS _IOWR(r, 62, struct natstat *) -#define SIOCGNATL _IOWR(r, 63, struct natlookup *) +#define SIOCADNAT _IOW(r, 60, struct ipfobj) +#define SIOCRMNAT _IOW(r, 61, struct ipfobj) +#define SIOCGNATS _IOWR(r, 62, struct ipfobj) +#define SIOCGNATL _IOWR(r, 63, struct ipfobj) +#define SIOCPROXY _IOWR(r, 64, struct ap_control) #endif -#undef LARGE_NAT /* define this if you're setting up a system to NAT +#undef LARGE_NAT /* define this if you're setting up a system to NAT * LARGE numbers of networks/hosts - i.e. in the * hundreds or thousands. In such a case, you should * also change the RDR_SIZE and NAT_SIZE below to more * appropriate sizes. The figures below were used for * a setup with 1000-2000 networks to NAT. */ -#ifndef NAT_SIZE -# ifdef LARGE_NAT +#ifndef NAT_SIZE +# ifdef LARGE_NAT # define NAT_SIZE 2047 # else # define NAT_SIZE 127 # endif #endif -#ifndef RDR_SIZE -# ifdef LARGE_NAT +#ifndef RDR_SIZE +# ifdef LARGE_NAT # define RDR_SIZE 2047 # else # define RDR_SIZE 127 # endif #endif -#ifndef HOSTMAP_SIZE -# ifdef LARGE_NAT +#ifndef HOSTMAP_SIZE +# ifdef LARGE_NAT # define HOSTMAP_SIZE 8191 # else # define HOSTMAP_SIZE 2047 # endif #endif #ifndef NAT_TABLE_MAX +/* + * This is newly introduced and for the sake of "least surprise", the numbers + * present aren't what we'd normally use for creating a proper hash table. + */ # ifdef LARGE_NAT # define NAT_TABLE_MAX 180000 # else # define NAT_TABLE_MAX 30000 # endif #endif -#ifndef NAT_TABLE_SZ -# ifdef LARGE_NAT +#ifndef NAT_TABLE_SZ +# ifdef LARGE_NAT # define NAT_TABLE_SZ 16383 # else # define NAT_TABLE_SZ 2047 @@ -76,82 +84,141 @@ #define DEF_NAT_AGE 1200 /* 10 minutes (600 seconds) */ +struct ipstate; struct ap_session; typedef struct nat { - u_long nat_age; - int nat_flags; - u_32_t nat_sumd[2]; - u_32_t nat_ipsumd; - void *nat_data; - struct ap_session *nat_aps; /* proxy session */ - struct frentry *nat_fr; /* filter rule ptr if appropriate */ - struct in_addr nat_inip; - struct in_addr nat_outip; - struct in_addr nat_oip; /* other ip */ - U_QUAD_T nat_pkts; - U_QUAD_T nat_bytes; - u_int nat_drop[2]; - u_short nat_oport; /* other port */ - u_short nat_inport; - u_short nat_outport; - u_short nat_use; - u_char nat_tcpstate[2]; - u_char nat_p; /* protocol for NAT */ - u_32_t nat_mssclamp; /* if != zero clamp MSS to this */ - struct ipnat *nat_ptr; /* pointer back to the rule */ - struct hostmap *nat_hm; + ipfmutex_t nat_lock; struct nat *nat_next; + struct nat **nat_pnext; struct nat *nat_hnext[2]; struct nat **nat_phnext[2]; + struct hostmap *nat_hm; + void *nat_data; struct nat **nat_me; - void *nat_ifp; - int nat_dir; - char nat_ifname[IFNAMSIZ]; -#if SOLARIS || defined(__sgi) || (__FreeBSD_version >= 500043) - kmutex_t nat_lock; -#endif + struct ipstate *nat_state; + struct ap_session *nat_aps; /* proxy session */ + frentry_t *nat_fr; /* filter rule ptr if appropriate */ + struct ipnat *nat_ptr; /* pointer back to the rule */ + void *nat_ifps[2]; + void *nat_sync; + ipftqent_t nat_tqe; + u_32_t nat_flags; + u_32_t nat_sumd[2]; /* ip checksum delta for data segment*/ + u_32_t nat_ipsumd; /* ip checksum delta for ip header */ + u_32_t nat_mssclamp; /* if != zero clamp MSS to this */ + i6addr_t nat_inip6; + i6addr_t nat_outip6; + i6addr_t nat_oip6; /* other ip */ + U_QUAD_T nat_pkts[2]; + U_QUAD_T nat_bytes[2]; + union { + udpinfo_t nat_unu; + tcpinfo_t nat_unt; + icmpinfo_t nat_uni; + greinfo_t nat_ugre; + } nat_un; + u_short nat_oport; /* other port */ + u_short nat_use; + u_char nat_p; /* protocol for NAT */ + int nat_dir; + int nat_ref; /* reference count */ + int nat_hv[2]; + char nat_ifnames[2][LIFNAMSIZ]; + int nat_rev; /* 0 = forward, 1 = reverse */ } nat_t; +#define nat_inip nat_inip6.in4 +#define nat_outip nat_outip6.in4 +#define nat_oip nat_oip6.in4 +#define nat_age nat_tqe.tqe_die +#define nat_inport nat_un.nat_unt.ts_sport +#define nat_outport nat_un.nat_unt.ts_dport +#define nat_type nat_un.nat_uni.ici_type +#define nat_seq nat_un.nat_uni.ici_seq +#define nat_id nat_un.nat_uni.ici_id +#define nat_tcpstate nat_tqe.tqe_state + +/* + * Values for nat_dir + */ +#define NAT_INBOUND 0 +#define NAT_OUTBOUND 1 + +/* + * Definitions for nat_flags + */ +#define NAT_TCP 0x0001 /* IPN_TCP */ +#define NAT_UDP 0x0002 /* IPN_UDP */ +#define NAT_ICMPERR 0x0004 /* IPN_ICMPERR */ +#define NAT_ICMPQUERY 0x0008 /* IPN_ICMPQUERY */ +#define NAT_SEARCH 0x0010 +#define NAT_SLAVE 0x0020 /* Slave connection for a proxy */ +#define NAT_NOTRULEPORT 0x0040 + +#define NAT_TCPUDP (NAT_TCP|NAT_UDP) +#define NAT_TCPUDPICMP (NAT_TCP|NAT_UDP|NAT_ICMPERR) +#define NAT_TCPUDPICMPQ (NAT_TCP|NAT_UDP|NAT_ICMPQUERY) +#define NAT_FROMRULE (NAT_TCP|NAT_UDP) + +/* 0x0100 reserved for FI_W_SPORT */ +/* 0x0200 reserved for FI_W_DPORT */ +/* 0x0400 reserved for FI_W_SADDR */ +/* 0x0800 reserved for FI_W_DADDR */ +/* 0x1000 reserved for FI_W_NEWFR */ +/* 0x2000 reserved for SI_CLONE */ +/* 0x4000 reserved for SI_CLONED */ +/* 0x8000 reserved for SI_IGNOREPKT */ + +#define NAT_DEBUG 0x800000 + typedef struct ipnat { - struct ipnat *in_next; - struct ipnat *in_rnext; - struct ipnat **in_prnext; - struct ipnat *in_mnext; - struct ipnat **in_pmnext; - void *in_ifp; - void *in_apr; - u_long in_space; - u_int in_use; - u_int in_hits; - struct in_addr in_nextip; - u_short in_pnext; - u_short in_ippip; /* IP #'s per IP# */ - u_32_t in_flags; /* From here to in_dport must be reflected */ - u_32_t in_mssclamp; /* if != zero clamp MSS to this */ - u_short in_spare; - u_short in_ppip; /* ports per IP */ - u_short in_port[2]; /* correctly in IPN_CMPSIZ */ - struct in_addr in_in[2]; - struct in_addr in_out[2]; - struct in_addr in_src[2]; - struct frtuc in_tuc; - u_int in_age[2]; /* Aging for NAT entries. Not for TCP */ - int in_redir; /* 0 if it's a mapping, 1 if it's a hard redir */ - char in_ifname[IFNAMSIZ]; - char in_plabel[APR_LABELLEN]; /* proxy label */ - char in_p; /* protocol */ + struct ipnat *in_next; /* NAT rule list next */ + struct ipnat *in_rnext; /* rdr rule hash next */ + struct ipnat **in_prnext; /* prior rdr next ptr */ + struct ipnat *in_mnext; /* map rule hash next */ + struct ipnat **in_pmnext; /* prior map next ptr */ + struct ipftq *in_tqehead[2]; + void *in_ifps[2]; + void *in_apr; + char *in_comment; + i6addr_t in_next6; + u_long in_space; + u_long in_hits; + u_int in_use; + u_int in_hv; + int in_flineno; /* conf. file line number */ + u_short in_pnext; + u_char in_v; + u_char in_xxx; + /* From here to the end is covered by IPN_CMPSIZ */ + u_32_t in_flags; + u_32_t in_mssclamp; /* if != 0 clamp MSS to this */ + u_int in_age[2]; + int in_redir; /* see below for values */ + int in_p; /* protocol. */ + i6addr_t in_in[2]; + i6addr_t in_out[2]; + i6addr_t in_src[2]; + frtuc_t in_tuc; + u_short in_port[2]; + u_short in_ppip; /* ports per IP. */ + u_short in_ippip; /* IP #'s per IP# */ + char in_ifnames[2][LIFNAMSIZ]; + char in_plabel[APR_LABELLEN]; /* proxy label. */ + ipftag_t in_tag; } ipnat_t; #define in_pmin in_port[0] /* Also holds static redir port */ #define in_pmax in_port[1] -#define in_nip in_nextip.s_addr -#define in_inip in_in[0].s_addr -#define in_inmsk in_in[1].s_addr -#define in_outip in_out[0].s_addr -#define in_outmsk in_out[1].s_addr -#define in_srcip in_src[0].s_addr -#define in_srcmsk in_src[1].s_addr +#define in_nextip in_next6.in4 +#define in_nip in_next6.in4.s_addr +#define in_inip in_in[0].in4.s_addr +#define in_inmsk in_in[1].in4.s_addr +#define in_outip in_out[0].in4.s_addr +#define in_outmsk in_out[1].in4.s_addr +#define in_srcip in_src[0].in4.s_addr +#define in_srcmsk in_src[1].in4.s_addr #define in_scmp in_tuc.ftu_scmp #define in_dcmp in_tuc.ftu_dcmp #define in_stop in_tuc.ftu_stop @@ -159,18 +226,44 @@ typedef struct ipnat { #define in_sport in_tuc.ftu_sport #define in_dport in_tuc.ftu_dport -#define NAT_OUTBOUND 0 -#define NAT_INBOUND 1 +/* + * Bit definitions for in_flags + */ +#define IPN_ANY 0x00000 +#define IPN_TCP 0x00001 +#define IPN_UDP 0x00002 +#define IPN_TCPUDP (IPN_TCP|IPN_UDP) +#define IPN_ICMPERR 0x00004 +#define IPN_TCPUDPICMP (IPN_TCP|IPN_UDP|IPN_ICMPERR) +#define IPN_ICMPQUERY 0x00008 +#define IPN_TCPUDPICMPQ (IPN_TCP|IPN_UDP|IPN_ICMPQUERY) +#define IPN_RF (IPN_TCPUDP|IPN_DELETE|IPN_ICMPERR) +#define IPN_AUTOPORTMAP 0x00010 +#define IPN_IPRANGE 0x00020 +#define IPN_FILTER 0x00040 +#define IPN_SPLIT 0x00080 +#define IPN_ROUNDR 0x00100 +#define IPN_NOTSRC 0x04000 +#define IPN_NOTDST 0x08000 +#define IPN_DYNSRCIP 0x10000 /* dynamic src IP# */ +#define IPN_DYNDSTIP 0x20000 /* dynamic dst IP# */ +#define IPN_DELETE 0x40000 +#define IPN_STICKY 0x80000 +#define IPN_FRAG 0x100000 +#define IPN_FIXEDDPORT 0x200000 +#define IPN_FINDFORWARD 0x400000 +#define IPN_IN 0x800000 +#define IPN_USERFLAGS (IPN_TCPUDP|IPN_AUTOPORTMAP|IPN_IPRANGE|IPN_SPLIT|\ + IPN_ROUNDR|IPN_FILTER|IPN_NOTSRC|IPN_NOTDST|\ + IPN_FRAG|IPN_STICKY|IPN_FIXEDDPORT|IPN_ICMPQUERY) +/* + * Values for in_redir + */ #define NAT_MAP 0x01 #define NAT_REDIRECT 0x02 #define NAT_BIMAP (NAT_MAP|NAT_REDIRECT) #define NAT_MAPBLK 0x04 -/* 0x100 reserved for FI_W_SPORT */ -/* 0x200 reserved for FI_W_DPORT */ -/* 0x400 reserved for FI_W_SADDR */ -/* 0x800 reserved for FI_W_DADDR */ -/* 0x1000 reserved for FI_W_NEWFR */ #define MAPBLK_MINPORT 1024 /* don't use reserved ports for src port */ #define USABLE_PORTS (65536 - MAPBLK_MINPORT) @@ -205,16 +298,59 @@ typedef struct natget { } natget_t; +typedef struct nattrpnt { + struct in_addr tr_dstip; /* real destination IP# */ + struct in_addr tr_srcip; /* real source IP# */ + struct in_addr tr_locip; /* local source IP# */ + u_int tr_flags; + int tr_expire; + u_short tr_dstport; /* real destination port# */ + u_short tr_srcport; /* real source port# */ + u_short tr_locport; /* local source port# */ + struct nattrpnt *tr_hnext; + struct nattrpnt **tr_phnext; + struct nattrpnt *tr_next; + struct nattrpnt **tr_pnext; /* previous next */ +} nattrpnt_t; + +#define TN_CMPSIZ offsetof(nattrpnt_t, tr_hnext) + + +/* + * This structure gets used to help NAT sessions keep the same NAT rule (and + * thus translation for IP address) when: + * (a) round-robin redirects are in use + * (b) different IP add + */ typedef struct hostmap { struct hostmap *hm_next; struct hostmap **hm_pnext; struct ipnat *hm_ipnat; - struct in_addr hm_realip; + struct in_addr hm_srcip; + struct in_addr hm_dstip; struct in_addr hm_mapip; - int hm_ref; + u_32_t hm_port; + int hm_ref; } hostmap_t; +/* + * Structure used to pass information in to nat_newmap and nat_newrdr. + */ +typedef struct natinfo { + ipnat_t *nai_np; + u_32_t nai_sum1; + u_32_t nai_sum2; + u_32_t nai_nflags; + u_32_t nai_flags; + struct in_addr nai_ip; + u_short nai_port; + u_short nai_nport; + u_short nai_sport; + u_short nai_dport; +} natinfo_t; + + typedef struct natstat { u_long ns_mapped[2]; u_long ns_rules; @@ -225,37 +361,23 @@ typedef struct natstat { u_long ns_logfail; u_long ns_memfail; u_long ns_badnat; + u_long ns_addtrpnt; nat_t **ns_table[2]; hostmap_t **ns_maptable; ipnat_t *ns_list; void *ns_apslist; + u_int ns_wilds; u_int ns_nattab_sz; + u_int ns_nattab_max; u_int ns_rultab_sz; u_int ns_rdrtab_sz; + u_int ns_trpntab_sz; u_int ns_hostmap_sz; nat_t *ns_instances; - u_int ns_wilds; + nattrpnt_t *ns_trpntlist; + u_long *ns_bucketlen[2]; } natstat_t; -#define IPN_ANY 0x000 -#define IPN_TCP 0x001 -#define IPN_UDP 0x002 -#define IPN_TCPUDP (IPN_TCP|IPN_UDP) -#define IPN_DELETE 0x004 -#define IPN_ICMPERR 0x008 -#define IPN_RF (IPN_TCPUDP|IPN_DELETE|IPN_ICMPERR) -#define IPN_AUTOPORTMAP 0x010 -#define IPN_IPRANGE 0x020 -#define IPN_USERFLAGS (IPN_TCPUDP|IPN_AUTOPORTMAP|IPN_IPRANGE|IPN_SPLIT|\ - IPN_ROUNDR|IPN_FILTER|IPN_NOTSRC|IPN_NOTDST|IPN_FRAG) -#define IPN_FILTER 0x040 -#define IPN_SPLIT 0x080 -#define IPN_ROUNDR 0x100 -#define IPN_NOTSRC 0x080000 -#define IPN_NOTDST 0x100000 -#define IPN_FRAG 0x200000 - - typedef struct natlog { struct in_addr nl_origip; struct in_addr nl_outip; @@ -265,8 +387,8 @@ typedef struct natlog { u_short nl_inport; u_short nl_type; int nl_rule; - U_QUAD_T nl_pkts; - U_QUAD_T nl_bytes; + U_QUAD_T nl_pkts[2]; + U_QUAD_T nl_bytes[2]; u_char nl_p; } natlog_t; @@ -275,6 +397,7 @@ typedef struct natlog { #define NL_NEWRDR NAT_REDIRECT #define NL_NEWBIMAP NAT_BIMAP #define NL_NEWBLOCK NAT_MAPBLK +#define NL_CLONE 0xfffd #define NL_FLUSH 0xfffe #define NL_EXPIRE 0xffff @@ -296,46 +419,60 @@ typedef struct natlog { #define NAT_SYSSPACE 0x80000000 #define NAT_LOCKHELD 0x40000000 + extern u_int ipf_nattable_sz; +extern u_int ipf_nattable_max; extern u_int ipf_natrules_sz; extern u_int ipf_rdrrules_sz; +extern u_int ipf_hostmap_sz; +extern u_int fr_nat_maxbucket; +extern u_int fr_nat_maxbucket_reset; extern int fr_nat_lock; -extern void ip_natsync __P((void *)); +extern void fr_natsync __P((void *)); extern u_long fr_defnatage; extern u_long fr_defnaticmpage; +extern u_long fr_defnatipage; + /* nat_table[0] -> hashed list sorted by inside (ip, port) */ + /* nat_table[1] -> hashed list sorted by outside (ip, port) */ extern nat_t **nat_table[2]; extern nat_t *nat_instances; +extern ipnat_t *nat_list; extern ipnat_t **nat_rules; extern ipnat_t **rdr_rules; -extern ipnat_t *nat_list; +extern ipftq_t *nat_utqe; extern natstat_t nat_stats; + #if defined(__OpenBSD__) extern void nat_ifdetach __P((void *)); #endif -#if defined(__NetBSD__) || defined(__OpenBSD__) || (__FreeBSD_version >= 300003) -extern int nat_ioctl __P((caddr_t, u_long, int)); -#else -extern int nat_ioctl __P((caddr_t, int, int)); -#endif -extern int nat_init __P((void)); -extern nat_t *nat_new __P((fr_info_t *, ip_t *, ipnat_t *, nat_t **, - u_int, int)); +extern int fr_nat_ioctl __P((caddr_t, ioctlcmd_t, int)); +extern int fr_natinit __P((void)); +extern nat_t *nat_new __P((fr_info_t *, ipnat_t *, nat_t **, u_int, int)); extern nat_t *nat_outlookup __P((fr_info_t *, u_int, u_int, struct in_addr, - struct in_addr, int)); + struct in_addr)); +extern void fix_datacksum __P((u_short *, u_32_t)); extern nat_t *nat_inlookup __P((fr_info_t *, u_int, u_int, struct in_addr, - struct in_addr, int)); + struct in_addr)); +extern nat_t *nat_tnlookup __P((fr_info_t *, int)); +extern nat_t *nat_maplookup __P((void *, u_int, struct in_addr, + struct in_addr)); extern nat_t *nat_lookupredir __P((natlookup_t *)); -extern nat_t *nat_icmplookup __P((ip_t *, fr_info_t *, int)); -extern nat_t *nat_icmp __P((ip_t *, fr_info_t *, u_int *, int)); -extern int nat_clearlist __P((void)); -extern void nat_insert __P((nat_t *)); - -extern int ip_natout __P((ip_t *, fr_info_t *)); -extern int ip_natin __P((ip_t *, fr_info_t *)); -extern void ip_natunload __P((void)), ip_natexpire __P((void)); +extern nat_t *nat_icmperrorlookup __P((fr_info_t *, int)); +extern nat_t *nat_icmperror __P((fr_info_t *, u_int *, int)); +extern int nat_insert __P((nat_t *, int)); + +extern int fr_checknatout __P((fr_info_t *, u_32_t *)); +extern int fr_natout __P((fr_info_t *, nat_t *, int, u_32_t)); +extern int fr_checknatin __P((fr_info_t *, u_32_t *)); +extern int fr_natin __P((fr_info_t *, nat_t *, int, u_32_t)); +extern void fr_natunload __P((void)); +extern void fr_natexpire __P((void)); extern void nat_log __P((struct nat *, u_int)); extern void fix_incksum __P((fr_info_t *, u_short *, u_32_t)); extern void fix_outcksum __P((fr_info_t *, u_short *, u_32_t)); -extern void fix_datacksum __P((u_short *, u_32_t)); +extern void fr_natderef __P((nat_t **)); +extern u_short *nat_proto __P((fr_info_t *, nat_t *, u_int)); +extern void nat_update __P((fr_info_t *, nat_t *, ipnat_t *)); +extern void fr_setnatqueue __P((nat_t *, int)); #endif /* __IP_NAT_H__ */ diff --git a/sys/contrib/ipfilter/netinet/ip_proxy.c b/sys/contrib/ipfilter/netinet/ip_proxy.c index f7b82d9..062d8b4 100644 --- a/sys/contrib/ipfilter/netinet/ip_proxy.c +++ b/sys/contrib/ipfilter/netinet/ip_proxy.c @@ -1,46 +1,58 @@ +/* $FreeBSD$ */ + /* - * Copyright (C) 1997-2002 by Darren Reed. + * Copyright (C) 1997-2003 by Darren Reed. * * See the IPFILTER.LICENCE file for details on licencing. */ - -#if defined(__FreeBSD__) && defined(KERNEL) && !defined(_KERNEL) -# define _KERNEL -#endif - -#if defined(__sgi) && (IRIX > 602) -# include +#if defined(KERNEL) || defined(_KERNEL) +# undef KERNEL +# undef _KERNEL +# define KERNEL 1 +# define _KERNEL 1 #endif #include #include #include #include #include -#if !defined(__FreeBSD_version) -# include -#endif #include -#if !defined(_KERNEL) && !defined(KERNEL) +#if !defined(_KERNEL) && !defined(__KERNEL__) # include # include # include +# include +# define _KERNEL +# ifdef __OpenBSD__ +struct file; +# endif +# include +# undef _KERNEL #endif -#ifndef linux +#if !defined(linux) # include #endif #include #if defined(_KERNEL) -# if !defined(linux) -# include -# else -# include +# if !defined(__NetBSD__) && !defined(sun) && !defined(__osf__) && \ + !defined(__OpenBSD__) && !defined(__hpux) && !defined(__sgi) +# include # endif -#endif -#if !defined(__SVR4) && !defined(__svr4__) -# ifndef linux +# include +# if !defined(__SVR4) && !defined(__svr4__) # include # endif +#endif +#if defined(_KERNEL) && (__FreeBSD_version >= 220000) +# include +# include +# if (__FreeBSD_version >= 300000) && !defined(IPFILTER_LKM) +# include "opt_ipfilter.h" +# endif #else +# include +#endif +#if defined(__SVR4) || defined(__svr4__) # include # ifdef _KERNEL # include @@ -75,65 +87,93 @@ # include #endif -#if !defined(lint) -/* static const char rcsid[] = "@(#)$Id: ip_proxy.c,v 2.9.2.6 2001/07/15 22:06:15 darrenr Exp $"; */ -static const char rcsid[] = "@(#)$FreeBSD$"; +#include "netinet/ip_ftp_pxy.c" +#include "netinet/ip_rcmd_pxy.c" +# include "netinet/ip_pptp_pxy.c" +#if defined(_KERNEL) +# include "netinet/ip_irc_pxy.c" +# include "netinet/ip_raudio_pxy.c" +# include "netinet/ip_h323_pxy.c" +# ifdef IPFILTER_PRO +# include "netinet/ip_msnrpc_pxy.c" +# endif +# include "netinet/ip_netbios_pxy.c" #endif +#include "netinet/ip_ipsec_pxy.c" +#include "netinet/ip_rpcb_pxy.c" -#ifdef USE_MUTEX -extern KRWLOCK_T ipf_nat, ipf_state; +/* END OF INCLUDES */ + +#if !defined(lint) +static const char rcsid[] = "@(#)Id: ip_proxy.c,v 2.62.2.12 2005/03/03 14:28:24 darrenr Exp"; #endif static int appr_fixseqack __P((fr_info_t *, ip_t *, ap_session_t *, int )); - -#define PROXY_DEBUG 0 - #define AP_SESS_SIZE 53 -#include "netinet/ip_ftp_pxy.c" #if defined(_KERNEL) -#include "netinet/ip_rcmd_pxy.c" -#include "netinet/ip_raudio_pxy.c" -#include "netinet/ip_netbios_pxy.c" -#include "netinet/ip_ipsec_pxy.c" +int ipf_proxy_debug = 0; +#else +int ipf_proxy_debug = 2; #endif - ap_session_t *ap_sess_tab[AP_SESS_SIZE]; ap_session_t *ap_sess_list = NULL; aproxy_t *ap_proxylist = NULL; aproxy_t ap_proxies[] = { #ifdef IPF_FTP_PROXY - { NULL, "ftp", (char)IPPROTO_TCP, 0, 0, ippr_ftp_init, NULL, + { NULL, "ftp", (char)IPPROTO_TCP, 0, 0, ippr_ftp_init, ippr_ftp_fini, ippr_ftp_new, NULL, ippr_ftp_in, ippr_ftp_out, NULL }, #endif +#ifdef IPF_IRC_PROXY + { NULL, "irc", (char)IPPROTO_TCP, 0, 0, ippr_irc_init, ippr_irc_fini, + ippr_irc_new, NULL, NULL, ippr_irc_out, NULL, NULL }, +#endif #ifdef IPF_RCMD_PROXY - { NULL, "rcmd", (char)IPPROTO_TCP, 0, 0, ippr_rcmd_init, NULL, - ippr_rcmd_new, NULL, NULL, ippr_rcmd_out, NULL }, + { NULL, "rcmd", (char)IPPROTO_TCP, 0, 0, ippr_rcmd_init, ippr_rcmd_fini, + ippr_rcmd_new, NULL, ippr_rcmd_in, ippr_rcmd_out, NULL, NULL }, #endif #ifdef IPF_RAUDIO_PROXY - { NULL, "raudio", (char)IPPROTO_TCP, 0, 0, ippr_raudio_init, NULL, - ippr_raudio_new, NULL, ippr_raudio_in, ippr_raudio_out, NULL }, + { NULL, "raudio", (char)IPPROTO_TCP, 0, 0, ippr_raudio_init, ippr_raudio_fini, + ippr_raudio_new, NULL, ippr_raudio_in, ippr_raudio_out, NULL, NULL }, #endif -#ifdef IPF_IPSEC_PROXY - { NULL, "ipsec", (char)IPPROTO_UDP, 0, 0, ippr_ipsec_init, NULL, - ippr_ipsec_new, ippr_ipsec_del, NULL, ippr_ipsec_out, - ippr_ipsec_match }, +#ifdef IPF_MSNRPC_PROXY + { NULL, "msnrpc", (char)IPPROTO_TCP, 0, 0, ippr_msnrpc_init, ippr_msnrpc_fini, + ippr_msnrpc_new, NULL, ippr_msnrpc_in, ippr_msnrpc_out, NULL, NULL }, #endif #ifdef IPF_NETBIOS_PROXY - { NULL, "netbios", (char)IPPROTO_UDP, 0, 0, ippr_netbios_init, NULL, - NULL, NULL, NULL, ippr_netbios_out, NULL }, + { NULL, "netbios", (char)IPPROTO_UDP, 0, 0, ippr_netbios_init, ippr_netbios_fini, + NULL, NULL, NULL, ippr_netbios_out, NULL, NULL }, +#endif +#ifdef IPF_IPSEC_PROXY + { NULL, "ipsec", (char)IPPROTO_UDP, 0, 0, + ippr_ipsec_init, ippr_ipsec_fini, ippr_ipsec_new, ippr_ipsec_del, + ippr_ipsec_inout, ippr_ipsec_inout, ippr_ipsec_match, NULL }, +#endif +#ifdef IPF_PPTP_PROXY + { NULL, "pptp", (char)IPPROTO_TCP, 0, 0, + ippr_pptp_init, ippr_pptp_fini, ippr_pptp_new, ippr_pptp_del, + ippr_pptp_inout, ippr_pptp_inout, NULL, NULL }, #endif #ifdef IPF_H323_PROXY - { NULL, "h323", (char)IPPROTO_TCP, 0, 0, ippr_h323_init, NULL, - ippr_h323_new, ippr_h323_del, ippr_h323_in, ippr_h323_out, NULL }, - { NULL, "h245", (char)IPPROTO_TCP, 0, 0, ippr_h245_init, NULL, - ippr_h245_new, NULL, NULL, ippr_h245_out, NULL }, -#endif - { NULL, "", '\0', 0, 0, NULL, NULL, NULL } + { NULL, "h323", (char)IPPROTO_TCP, 0, 0, ippr_h323_init, ippr_h323_fini, + ippr_h323_new, ippr_h323_del, ippr_h323_in, NULL, NULL }, + { NULL, "h245", (char)IPPROTO_TCP, 0, 0, NULL, NULL, + ippr_h245_new, NULL, NULL, ippr_h245_out, NULL }, +#endif +#ifdef IPF_RPCB_PROXY +# if 0 + { NULL, "rpcbt", (char)IPPROTO_TCP, 0, 0, + ippr_rpcb_init, ippr_rpcb_fini, ippr_rpcb_new, ippr_rpcb_del, + ippr_rpcb_in, ippr_rpcb_out, NULL, NULL }, +# endif + { NULL, "rpcbu", (char)IPPROTO_UDP, 0, 0, + ippr_rpcb_init, ippr_rpcb_fini, ippr_rpcb_new, ippr_rpcb_del, + ippr_rpcb_in, ippr_rpcb_out, NULL, NULL }, +#endif + { NULL, "", '\0', 0, 0, NULL, NULL, NULL, NULL } }; - /* * Dynamically add a new kernel proxy. Ensure that it is unique in the * collection compiled in and dynamically added. @@ -146,17 +186,59 @@ aproxy_t *ap; for (a = ap_proxies; a->apr_p; a++) if ((a->apr_p == ap->apr_p) && !strncmp(a->apr_label, ap->apr_label, - sizeof(ap->apr_label))) + sizeof(ap->apr_label))) { + if (ipf_proxy_debug > 1) + printf("appr_add: %s/%d already present (B)\n", + a->apr_label, a->apr_p); return -1; + } - for (a = ap_proxylist; a && a->apr_p; a = a->apr_next) + for (a = ap_proxylist; a->apr_p; a = a->apr_next) if ((a->apr_p == ap->apr_p) && !strncmp(a->apr_label, ap->apr_label, - sizeof(ap->apr_label))) + sizeof(ap->apr_label))) { + if (ipf_proxy_debug > 1) + printf("appr_add: %s/%d already present (D)\n", + a->apr_label, a->apr_p); return -1; + } ap->apr_next = ap_proxylist; ap_proxylist = ap; - return (*ap->apr_init)(); + if (ap->apr_init != NULL) + return (*ap->apr_init)(); + return 0; +} + + +/* + * Check to see if the proxy this control request has come through for + * exists, and if it does and it has a control function then invoke that + * control function. + */ +int appr_ctl(ctl) +ap_ctl_t *ctl; +{ + aproxy_t *a; + int error; + + a = appr_lookup(ctl->apc_p, ctl->apc_label); + if (a == NULL) { + if (ipf_proxy_debug > 1) + printf("appr_ctl: can't find %s/%d\n", + ctl->apc_label, ctl->apc_p); + error = ESRCH; + } else if (a->apr_ctl == NULL) { + if (ipf_proxy_debug > 1) + printf("appr_ctl: no ctl function for %s/%d\n", + ctl->apc_label, ctl->apc_p); + error = ENXIO; + } else { + error = (*a->apr_ctl)(a, ctl); + if ((error != 0) && (ipf_proxy_debug > 1)) + printf("appr_ctl: %s/%d ctl error %d\n", + a->apr_label, a->apr_p, error); + } + return error; } @@ -170,14 +252,20 @@ aproxy_t *ap; { aproxy_t *a, **app; - for (app = &ap_proxylist; (a = *app); app = &a->apr_next) + for (app = &ap_proxylist; ((a = *app) != NULL); app = &a->apr_next) if (a == ap) { a->apr_flags |= APR_DELETE; *app = a->apr_next; - if (ap->apr_ref != 0) + if (ap->apr_ref != 0) { + if (ipf_proxy_debug > 2) + printf("appr_del: orphaning %s/%d\n", + ap->apr_label, ap->apr_p); return 1; + } return 0; } + if (ipf_proxy_debug > 1) + printf("appr_del: proxy %lx not found\n", (u_long)ap); return -1; } @@ -185,8 +273,8 @@ aproxy_t *ap; /* * Return 1 if the packet is a good match against a proxy, else 0. */ -int appr_ok(ip, tcp, nat) -ip_t *ip; +int appr_ok(fin, tcp, nat) +fr_info_t *fin; tcphdr_t *tcp; ipnat_t *nat; { @@ -194,14 +282,62 @@ ipnat_t *nat; u_short dport = nat->in_dport; if ((apr == NULL) || (apr->apr_flags & APR_DELETE) || - (ip->ip_p != apr->apr_p)) + (fin->fin_p != apr->apr_p)) return 0; - if (((tcp != NULL) && (tcp->th_dport != dport)) || (!tcp && dport)) + if ((tcp == NULL) && dport) return 0; return 1; } +int appr_ioctl(data, cmd, mode) +caddr_t data; +ioctlcmd_t cmd; +int mode; +{ + ap_ctl_t ctl; + caddr_t ptr; + int error; + + mode = mode; /* LINT */ + + switch (cmd) + { + case SIOCPROXY : + BCOPYIN(data, &ctl, sizeof(ctl)); + ptr = NULL; + + if (ctl.apc_dsize > 0) { + KMALLOCS(ptr, caddr_t, ctl.apc_dsize); + if (ptr == NULL) + error = ENOMEM; + else { + error = copyinptr(ctl.apc_data, ptr, + ctl.apc_dsize); + if (error == 0) + ctl.apc_data = ptr; + } + } else { + ctl.apc_data = NULL; + error = 0; + } + + if (error == 0) + error = appr_ctl(&ctl); + + if ((ctl.apc_dsize > 0) && (ptr != NULL) && + (ctl.apc_data == ptr)) { + KFREES(ptr, ctl.apc_dsize); + } + break; + + default : + error = EINVAL; + } + return error; +} + + /* * If a proxy has a match function, call that to do extended packet * matching. @@ -212,17 +348,37 @@ nat_t *nat; { aproxy_t *apr; ipnat_t *ipn; + int result; ipn = nat->nat_ptr; - if (ipn == NULL) + if (ipf_proxy_debug > 8) + printf("appr_match(%lx,%lx) aps %lx ptr %lx\n", + (u_long)fin, (u_long)nat, (u_long)nat->nat_aps, + (u_long)ipn); + + if ((fin->fin_flx & (FI_SHORT|FI_BAD)) != 0) { + if (ipf_proxy_debug > 0) + printf("appr_match: flx 0x%x (BAD|SHORT)\n", + fin->fin_flx); return -1; + } + apr = ipn->in_apr; - if ((apr == NULL) || (apr->apr_flags & APR_DELETE) || - (nat->nat_aps == NULL)) + if ((apr == NULL) || (apr->apr_flags & APR_DELETE)) { + if (ipf_proxy_debug > 0) + printf("appr_match:apr %lx apr_flags 0x%x\n", + (u_long)apr, apr ? apr->apr_flags : 0); return -1; - if (apr->apr_match != NULL) - if ((*apr->apr_match)(fin, nat->nat_aps, nat) != 0) + } + + if (apr->apr_match != NULL) { + result = (*apr->apr_match)(fin, nat->nat_aps, nat); + if (result != 0) { + if (ipf_proxy_debug > 4) + printf("appr_match: result %d\n", result); return -1; + } + } return 0; } @@ -232,36 +388,55 @@ nat_t *nat; * relevant details. call the init function once complete, prior to * returning. */ -int appr_new(fin, ip, nat) +int appr_new(fin, nat) fr_info_t *fin; -ip_t *ip; nat_t *nat; { register ap_session_t *aps; aproxy_t *apr; - if ((nat->nat_ptr == NULL) || (nat->nat_aps != NULL)) + if (ipf_proxy_debug > 8) + printf("appr_new(%lx,%lx) \n", (u_long)fin, (u_long)nat); + + if ((nat->nat_ptr == NULL) || (nat->nat_aps != NULL)) { + if (ipf_proxy_debug > 0) + printf("appr_new: nat_ptr %lx nat_aps %lx\n", + (u_long)nat->nat_ptr, (u_long)nat->nat_aps); return -1; + } apr = nat->nat_ptr->in_apr; - if (!apr || (apr->apr_flags & APR_DELETE) || (ip->ip_p != apr->apr_p)) + if ((apr->apr_flags & APR_DELETE) || + (fin->fin_p != apr->apr_p)) { + if (ipf_proxy_debug > 2) + printf("appr_new: apr_flags 0x%x p %d/%d\n", + apr->apr_flags, fin->fin_p, apr->apr_p); return -1; + } KMALLOC(aps, ap_session_t *); - if (!aps) + if (!aps) { + if (ipf_proxy_debug > 0) + printf("appr_new: malloc failed (%lu)\n", + (u_long)sizeof(ap_session_t)); return -1; + } + bzero((char *)aps, sizeof(*aps)); - aps->aps_p = ip->ip_p; + aps->aps_p = fin->fin_p; aps->aps_data = NULL; aps->aps_apr = apr; aps->aps_psiz = 0; if (apr->apr_new != NULL) - if ((*apr->apr_new)(fin, ip, aps, nat) == -1) { + if ((*apr->apr_new)(fin, aps, nat) == -1) { if ((aps->aps_data != NULL) && (aps->aps_psiz != 0)) { KFREES(aps->aps_data, aps->aps_psiz); } KFREE(aps); + if (ipf_proxy_debug > 2) + printf("appr_new: new(%lx) failed\n", + (u_long)apr->apr_new); return -1; } aps->aps_nat = nat; @@ -274,95 +449,159 @@ nat_t *nat; /* - * check to see if a packet should be passed through an active proxy routine - * if one has been setup for it. + * Check to see if a packet should be passed through an active proxy routine + * if one has been setup for it. We don't need to check the checksum here if + * IPFILTER_CKSUM is defined because if it is, a failed check causes FI_BAD + * to be set. */ -int appr_check(ip, fin, nat) -ip_t *ip; +int appr_check(fin, nat) fr_info_t *fin; nat_t *nat; { #if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6) - mb_t *m = fin->fin_qfm; +# if defined(ICK_VALID) + mb_t *m; +# endif int dosum = 1; #endif tcphdr_t *tcp = NULL; + udphdr_t *udp = NULL; ap_session_t *aps; aproxy_t *apr; - u_32_t sum; + ip_t *ip; short rv; int err; - - aps = nat->nat_aps; - if ((aps != NULL) && (aps->aps_p == ip->ip_p)) { - if (ip->ip_p == IPPROTO_TCP) { - tcp = (tcphdr_t *)fin->fin_dp; - /* - * verify that the checksum is correct. If not, then - * don't do anything with this packet. - */ -#if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6) - if (dohwcksum && (m->b_ick_flag == ICK_VALID)) { - sum = tcp->th_sum; - dosum = 0; - } - if (dosum) - sum = fr_tcpsum(fin->fin_qfm, ip, tcp); -#else - sum = fr_tcpsum(*(mb_t **)fin->fin_mp, ip, tcp); +#if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) + u_32_t s1, s2, sd; #endif - if (sum != tcp->th_sum) { -#if PROXY_DEBUG || (!defined(_KERNEL) && !defined(KERNEL)) - printf("proxy tcp checksum failure\n"); + + if (fin->fin_flx & FI_BAD) { + if (ipf_proxy_debug > 0) + printf("appr_check: flx 0x%x (BAD)\n", fin->fin_flx); + return -1; + } + +#ifndef IPFILTER_CKSUM + if ((fin->fin_out == 0) && (fr_checkl4sum(fin) == -1)) { + if (ipf_proxy_debug > 0) + printf("appr_check: l4 checksum failure %d\n", + fin->fin_p); + if (fin->fin_p == IPPROTO_TCP) + frstats[fin->fin_out].fr_tcpbad++; + return -1; + } #endif - frstats[fin->fin_out].fr_tcpbad++; + + aps = nat->nat_aps; + if ((aps != NULL) && (aps->aps_p == fin->fin_p)) { + /* + * If there is data in this packet to be proxied then try and + * get it all into the one buffer, else drop it. + */ +#if defined(MENTAT) || defined(HAVE_M_PULLDOWN) + if ((fin->fin_dlen > 0) && !(fin->fin_flx & FI_COALESCE)) + if (fr_coalesce(fin) == -1) { + if (ipf_proxy_debug > 0) + printf("appr_check: fr_coalesce failed %x\n", fin->fin_flx); return -1; } +#endif + ip = fin->fin_ip; + switch (fin->fin_p) + { + case IPPROTO_TCP : + tcp = (tcphdr_t *)fin->fin_dp; + +#if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6) && defined(ICK_VALID) + m = fin->fin_qfm; + if (dohwcksum && (m->b_ick_flag == ICK_VALID)) + dosum = 0; +#endif /* * Don't bother the proxy with these...or in fact, * should we free up proxy stuff when seen? */ - if ((tcp->th_flags & TH_RST) != 0) - return 0; + if ((fin->fin_tcpf & TH_RST) != 0) + break; + /*FALLTHROUGH*/ + case IPPROTO_UDP : + udp = (udphdr_t *)fin->fin_dp; + break; + default : + break; } apr = aps->aps_apr; err = 0; if (fin->fin_out != 0) { if (apr->apr_outpkt != NULL) - err = (*apr->apr_outpkt)(fin, ip, aps, nat); + err = (*apr->apr_outpkt)(fin, aps, nat); } else { if (apr->apr_inpkt != NULL) - err = (*apr->apr_inpkt)(fin, ip, aps, nat); + err = (*apr->apr_inpkt)(fin, aps, nat); } rv = APR_EXIT(err); - if (rv == 1) { -#if PROXY_DEBUG || (!defined(_KERNEL) && !defined(KERNEL)) - printf("proxy says bad packet received\n"); -#endif + if (((ipf_proxy_debug > 0) && (rv != 0)) || + (ipf_proxy_debug > 8)) + printf("appr_check: out %d err %x rv %d\n", + fin->fin_out, err, rv); + if (rv == 1) return -1; - } + if (rv == 2) { -#if PROXY_DEBUG || (!defined(_KERNEL) && !defined(KERNEL)) - printf("proxy says free app proxy data\n"); -#endif appr_free(apr); nat->nat_aps = NULL; return -1; } + /* + * If err != 0 then the data size of the packet has changed + * so we need to recalculate the header checksums for the + * packet. + */ +#if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) + if (err != 0) { + short adjlen = err & 0xffff; + + s1 = LONG_SUM(ip->ip_len - adjlen); + s2 = LONG_SUM(ip->ip_len); + CALC_SUMD(s1, s2, sd); + fix_outcksum(fin, &ip->ip_sum, sd); + } +#endif + + /* + * For TCP packets, we may need to adjust the sequence and + * acknowledgement numbers to reflect changes in size of the + * data stream. + * + * For both TCP and UDP, recalculate the layer 4 checksum, + * regardless, as we can't tell (here) if data has been + * changed or not. + */ if (tcp != NULL) { err = appr_fixseqack(fin, ip, aps, APR_INC(err)); #if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6) if (dosum) - tcp->th_sum = fr_tcpsum(fin->fin_qfm, ip, tcp); + tcp->th_sum = fr_cksum(fin->fin_qfm, ip, + IPPROTO_TCP, tcp); +#else + tcp->th_sum = fr_cksum(fin->fin_m, ip, + IPPROTO_TCP, tcp); +#endif + } else if ((udp != NULL) && (udp->uh_sum != 0)) { +#if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6) + if (dosum) + udp->uh_sum = fr_cksum(fin->fin_qfm, ip, + IPPROTO_UDP, udp); #else - tcp->th_sum = fr_tcpsum(*(mb_t **)fin->fin_mp, ip, tcp); + udp->uh_sum = fr_cksum(fin->fin_m, ip, + IPPROTO_UDP, udp); #endif } - aps->aps_bytes += ip->ip_len; + aps->aps_bytes += fin->fin_plen; aps->aps_pkts++; return 1; } @@ -379,6 +618,9 @@ char *name; { aproxy_t *ap; + if (ipf_proxy_debug > 8) + printf("appr_lookup(%d,%s)\n", pr, name); + for (ap = ap_proxies; ap->apr_p; ap++) if ((ap->apr_p == pr) && !strncmp(name, ap->apr_label, sizeof(ap->apr_label))) { @@ -392,6 +634,8 @@ char *name; ap->apr_ref++; return ap; } + if (ipf_proxy_debug > 2) + printf("appr_lookup: failed for %d/%s\n", pr, name); return NULL; } @@ -412,7 +656,7 @@ ap_session_t *aps; if (!aps) return; - for (ap = &ap_sess_list; (a = *ap); ap = &a->aps_next) + for (ap = &ap_sess_list; ((a = *ap) != NULL); ap = &a->aps_next) if (a == aps) { *ap = a->aps_next; break; @@ -448,7 +692,7 @@ int inc; * ip_len has already been adjusted by 'inc'. */ nlen = ip->ip_len; - nlen -= (ip->ip_hl << 2) + (tcp->th_off << 2); + nlen -= (IP_HL(ip) << 2) + (TCP_OFF(tcp) << 2); inc2 = inc; inc = (int)inc2; @@ -460,10 +704,10 @@ int inc; /* switch to other set ? */ if ((aps->aps_seqmin[!sel] > aps->aps_seqmin[sel]) && (seq1 > aps->aps_seqmin[!sel])) { -#if PROXY_DEBUG - printf("proxy out switch set seq %d -> %d %x > %x\n", - sel, !sel, seq1, aps->aps_seqmin[!sel]); -#endif + if (ipf_proxy_debug > 7) + printf("proxy out switch set seq %d -> %d %x > %x\n", + sel, !sel, seq1, + aps->aps_seqmin[!sel]); sel = aps->aps_sel[out] = !sel; } @@ -480,11 +724,10 @@ int inc; if (inc && (seq1 > aps->aps_seqmin[!sel])) { aps->aps_seqmin[sel] = seq1 + nlen - 1; aps->aps_seqoff[sel] = aps->aps_seqoff[sel] + inc; -#if PROXY_DEBUG - printf("proxy seq set %d at %x to %d + %d\n", sel, - aps->aps_seqmin[sel], aps->aps_seqoff[sel], - inc); -#endif + if (ipf_proxy_debug > 7) + printf("proxy seq set %d at %x to %d + %d\n", + sel, aps->aps_seqmin[sel], + aps->aps_seqoff[sel], inc); } /***/ @@ -495,10 +738,10 @@ int inc; /* switch to other set ? */ if ((aps->aps_ackmin[!sel] > aps->aps_ackmin[sel]) && (seq1 > aps->aps_ackmin[!sel])) { -#if PROXY_DEBUG - printf("proxy out switch set ack %d -> %d %x > %x\n", - sel, !sel, seq1, aps->aps_ackmin[!sel]); -#endif + if (ipf_proxy_debug > 7) + printf("proxy out switch set ack %d -> %d %x > %x\n", + sel, !sel, seq1, + aps->aps_ackmin[!sel]); sel = aps->aps_sel[1 - out] = !sel; } @@ -514,10 +757,9 @@ int inc; /* switch to other set ? */ if ((aps->aps_ackmin[!sel] > aps->aps_ackmin[sel]) && (seq1 > aps->aps_ackmin[!sel])) { -#if PROXY_DEBUG - printf("proxy in switch set ack %d -> %d %x > %x\n", - sel, !sel, seq1, aps->aps_ackmin[!sel]); -#endif + if (ipf_proxy_debug > 7) + printf("proxy in switch set ack %d -> %d %x > %x\n", + sel, !sel, seq1, aps->aps_ackmin[!sel]); sel = aps->aps_sel[out] = !sel; } @@ -534,11 +776,11 @@ int inc; if (inc && (seq1 > aps->aps_ackmin[!sel])) { aps->aps_ackmin[!sel] = seq1 + nlen - 1; aps->aps_ackoff[!sel] = aps->aps_ackoff[sel] + inc; -#if PROXY_DEBUG - printf("proxy ack set %d at %x to %d + %d\n", !sel, - aps->aps_seqmin[!sel], aps->aps_seqoff[sel], - inc); -#endif + + if (ipf_proxy_debug > 7) + printf("proxy ack set %d at %x to %d + %d\n", + !sel, aps->aps_seqmin[!sel], + aps->aps_seqoff[sel], inc); } /***/ @@ -549,19 +791,17 @@ int inc; /* switch to other set ? */ if ((aps->aps_seqmin[!sel] > aps->aps_seqmin[sel]) && (seq1 > aps->aps_seqmin[!sel])) { -#if PROXY_DEBUG - printf("proxy in switch set seq %d -> %d %x > %x\n", - sel, !sel, seq1, aps->aps_seqmin[!sel]); -#endif + if (ipf_proxy_debug > 7) + printf("proxy in switch set seq %d -> %d %x > %x\n", + sel, !sel, seq1, aps->aps_seqmin[!sel]); sel = aps->aps_sel[1 - out] = !sel; } if (aps->aps_seqoff[sel] != 0) { -#if PROXY_DEBUG - printf("sel %d seqoff %d seq1 %x seqmin %x\n", sel, - aps->aps_seqoff[sel], seq1, - aps->aps_seqmin[sel]); -#endif + if (ipf_proxy_debug > 7) + printf("sel %d seqoff %d seq1 %x seqmin %x\n", + sel, aps->aps_seqoff[sel], seq1, + aps->aps_seqmin[sel]); if (seq1 > aps->aps_seqmin[sel]) { seq2 = aps->aps_seqoff[sel]; tcp->th_ack = htonl(seq1 - seq2); @@ -569,10 +809,10 @@ int inc; } } } -#if PROXY_DEBUG - printf("appr_fixseqack: seq %x ack %x\n", ntohl(tcp->th_seq), - ntohl(tcp->th_ack)); -#endif + + if (ipf_proxy_debug > 8) + printf("appr_fixseqack: seq %x ack %x\n", + ntohl(tcp->th_seq), ntohl(tcp->th_ack)); return ch ? 2 : 0; } @@ -587,9 +827,11 @@ int appr_init() int err = 0; for (ap = ap_proxies; ap->apr_p; ap++) { - err = (*ap->apr_init)(); - if (err != 0) - break; + if (ap->apr_init != NULL) { + err = (*ap->apr_init)(); + if (err != 0) + break; + } } return err; } @@ -604,9 +846,9 @@ void appr_unload() aproxy_t *ap; for (ap = ap_proxies; ap->apr_p; ap++) - if (ap->apr_fini) + if (ap->apr_fini != NULL) (*ap->apr_fini)(); for (ap = ap_proxylist; ap; ap = ap->apr_next) - if (ap->apr_fini) + if (ap->apr_fini != NULL) (*ap->apr_fini)(); } diff --git a/sys/contrib/ipfilter/netinet/ip_proxy.h b/sys/contrib/ipfilter/netinet/ip_proxy.h index 8488188..ac5cb83 100644 --- a/sys/contrib/ipfilter/netinet/ip_proxy.h +++ b/sys/contrib/ipfilter/netinet/ip_proxy.h @@ -1,10 +1,12 @@ +/* $FreeBSD$ */ + /* * Copyright (C) 1997-2001 by Darren Reed. * * See the IPFILTER.LICENCE file for details on licencing. * - * $Id: ip_proxy.h,v 2.8.2.4 2000/12/02 00:15:03 darrenr Exp $ * $FreeBSD$ + * Id: ip_proxy.h,v 2.31.2.2 2005/03/12 19:33:48 darrenr Exp */ #ifndef __IP_PROXY_H__ @@ -65,6 +67,26 @@ typedef struct ap_session { #define aps_ackmin aps_un.apu_tcp.apt_ackmin +typedef struct ap_control { + char apc_label[APR_LABELLEN]; + u_char apc_p; + /* + * The following fields are upto the proxy's apr_ctl routine to deal + * with. When the proxy gets this in kernel space, apc_data will + * point to a malloc'd region of memory of apc_dsize bytes. If the + * proxy wants to keep that memory, it must set apc_data to NULL + * before it returns. It is expected if this happens that it will + * take care to free it in apr_fini or otherwise as appropriate. + * apc_cmd is provided as a standard place to put simple commands, + * with apc_arg being available to put a simple arg. + */ + u_long apc_cmd; + u_long apc_arg; + void *apc_data; + size_t apc_dsize; +} ap_ctl_t; + + typedef struct aproxy { struct aproxy *apr_next; char apr_label[APR_LABELLEN]; /* Proxy label # */ @@ -73,34 +95,65 @@ typedef struct aproxy { int apr_flags; int (* apr_init) __P((void)); void (* apr_fini) __P((void)); - int (* apr_new) __P((fr_info_t *, ip_t *, - ap_session_t *, struct nat *)); + int (* apr_new) __P((fr_info_t *, ap_session_t *, struct nat *)); void (* apr_del) __P((ap_session_t *)); - int (* apr_inpkt) __P((fr_info_t *, ip_t *, - ap_session_t *, struct nat *)); - int (* apr_outpkt) __P((fr_info_t *, ip_t *, - ap_session_t *, struct nat *)); + int (* apr_inpkt) __P((fr_info_t *, ap_session_t *, struct nat *)); + int (* apr_outpkt) __P((fr_info_t *, ap_session_t *, struct nat *)); int (* apr_match) __P((fr_info_t *, ap_session_t *, struct nat *)); + int (* apr_ctl) __P((struct aproxy *, struct ap_control *)); } aproxy_t; #define APR_DELETE 1 -#define APR_ERR(x) (((x) & 0xffff) << 16) +#define APR_ERR(x) ((x) << 16) #define APR_EXIT(x) (((x) >> 16) & 0xffff) #define APR_INC(x) ((x) & 0xffff) -#define FTP_BUFSZ 160 /* - * For the ftp proxy. + * Generic #define's to cover missing things in the kernel */ +#ifndef isdigit +#define isdigit(x) ((x) >= '0' && (x) <= '9') +#endif +#ifndef isupper +#define isupper(x) (((unsigned)(x) >= 'A') && ((unsigned)(x) <= 'Z')) +#endif +#ifndef islower +#define islower(x) (((unsigned)(x) >= 'a') && ((unsigned)(x) <= 'z')) +#endif +#ifndef isalpha +#define isalpha(x) (isupper(x) || islower(x)) +#endif +#ifndef toupper +#define toupper(x) (isupper(x) ? (x) : (x) - 'a' + 'A') +#endif +#ifndef isspace +#define isspace(x) (((x) == ' ') || ((x) == '\r') || ((x) == '\n') || \ + ((x) == '\t') || ((x) == '\b')) +#endif + +/* + * This is the scratch buffer size used to hold strings from the TCP stream + * that we may want to parse. It's an arbitrary size, really, but it must + * be at least as large as IPF_FTPBUFSZ. + */ +#define FTP_BUFSZ 120 + +/* + * This buffer, however, doesn't need to be nearly so big. It just needs to + * be able to squeeze in the largest command it needs to rewrite, Which ones + * does it rewrite? EPRT, PORT, 227 replies. + */ +#define IPF_FTPBUFSZ 80 /* This *MUST* be >= 53! */ + typedef struct ftpside { char *ftps_rptr; char *ftps_wptr; + void *ftps_ifp; u_32_t ftps_seq[2]; u_32_t ftps_len; - int ftps_junk; + int ftps_junk; /* 2 = no cr/lf yet, 1 = cannot parse */ int ftps_cmds; - int ftps_cmd; char ftps_buf[FTP_BUFSZ]; } ftpside_t; @@ -110,6 +163,22 @@ typedef struct ftpinfo { ftpside_t ftp_side[2]; } ftpinfo_t; + +/* + * For the irc proxy. + */ +typedef struct ircinfo { + size_t irc_len; + char *irc_snick; + char *irc_dnick; + char *irc_type; + char *irc_arg; + char *irc_addr; + u_32_t irc_ipnum; + u_short irc_port; +} ircinfo_t; + + /* * Real audio proxy structure and #defines */ @@ -141,6 +210,19 @@ typedef struct raudio_s { #define RAP_M_TCP 4 #define RAP_M_UDP_ROBUST (RAP_M_UDP|RAP_M_ROBUST) + +/* + * MSN RPC proxy + */ +typedef struct msnrpcinfo { + u_int mri_flags; + int mri_cmd[2]; + u_int mri_valid; + struct in_addr mri_raddr; + u_short mri_rport; +} msnrpcinfo_t; + + /* * IPSec proxy */ @@ -155,21 +237,218 @@ typedef struct ipsec_pxy { ipstate_t *ipsc_state; } ipsec_pxy_t; +/* + * PPTP proxy + */ +typedef struct pptp_side { + u_32_t pptps_nexthdr; + u_32_t pptps_next; + int pptps_state; + int pptps_gothdr; + int pptps_len; + int pptps_bytes; + char *pptps_wptr; + char pptps_buffer[512]; +} pptp_side_t; + +typedef struct pptp_pxy { + ipnat_t pptp_rule; + nat_t *pptp_nat; + ipstate_t *pptp_state; + u_short pptp_call[2]; + pptp_side_t pptp_side[2]; +} pptp_pxy_t; + + +/* + * Sun RPCBIND proxy + */ +#define RPCB_MAXMSG 888 +#define RPCB_RES_PMAP 0 /* Response contains a v2 port. */ +#define RPCB_RES_STRING 1 /* " " " v3 (GETADDR) string. */ +#define RPCB_RES_LIST 2 /* " " " v4 (GETADDRLIST) list. */ +#define RPCB_MAXREQS 32 /* Arbitrary limit on tracked transactions */ + +#define RPCB_REQMIN 40 +#define RPCB_REQMAX 888 +#define RPCB_REPMIN 20 +#define RPCB_REPMAX 604 /* XXX double check this! */ + +/* + * These macros determine the number of bytes between p and the end of + * r->rs_buf relative to l. + */ +#define RPCB_BUF_END(r) (char *)((r)->rm_msgbuf + (r)->rm_buflen) +#define RPCB_BUF_GEQ(r, p, l) \ + ((RPCB_BUF_END((r)) > (char *)(p)) && \ + ((RPCB_BUF_END((r)) - (char *)(p)) >= (l))) +#define RPCB_BUF_EQ(r, p, l) \ + (RPCB_BUF_END((r)) == ((char *)(p) + (l))) + +/* + * The following correspond to RPC(B) detailed in RFC183[13]. + */ +#define RPCB_CALL 0 +#define RPCB_REPLY 1 +#define RPCB_MSG_VERSION 2 +#define RPCB_PROG 100000 +#define RPCB_GETPORT 3 +#define RPCB_GETADDR 3 +#define RPCB_GETADDRLIST 11 +#define RPCB_MSG_ACCEPTED 0 +#define RPCB_MSG_DENIED 1 + +/* BEGIN (Generic XDR structures) */ +typedef struct xdr_string { + u_32_t *xs_len; + char *xs_str; +} xdr_string_t; + +typedef struct xdr_auth { + /* u_32_t xa_flavor; */ + xdr_string_t xa_string; +} xdr_auth_t; + +typedef struct xdr_uaddr { + u_32_t xu_ip; + u_short xu_port; + xdr_string_t xu_str; +} xdr_uaddr_t; + +typedef struct xdr_proto { + u_int xp_proto; + xdr_string_t xp_str; +} xdr_proto_t; + +#define xu_xslen xu_str.xs_len +#define xu_xsstr xu_str.xs_str +#define xp_xslen xp_str.xs_len +#define xp_xsstr xp_str.xs_str +/* END (Generic XDR structures) */ + +/* BEGIN (RPC call structures) */ +typedef struct pmap_args { + /* u_32_t pa_prog; */ + /* u_32_t pa_vers; */ + u_32_t *pa_prot; + /* u_32_t pa_port; */ +} pmap_args_t; + +typedef struct rpcb_args { + /* u_32_t *ra_prog; */ + /* u_32_t *ra_vers; */ + xdr_proto_t ra_netid; + xdr_uaddr_t ra_maddr; + /* xdr_string_t ra_owner; */ +} rpcb_args_t; + +typedef struct rpc_call { + /* u_32_t rc_rpcvers; */ + /* u_32_t rc_prog; */ + u_32_t *rc_vers; + u_32_t *rc_proc; + xdr_auth_t rc_authcred; + xdr_auth_t rc_authverf; + union { + pmap_args_t ra_pmapargs; + rpcb_args_t ra_rpcbargs; + } rpcb_args; +} rpc_call_t; + +#define rc_pmapargs rpcb_args.ra_pmapargs +#define rc_rpcbargs rpcb_args.ra_rpcbargs +/* END (RPC call structures) */ + +/* BEGIN (RPC reply structures) */ +typedef struct rpcb_entry { + xdr_uaddr_t re_maddr; + xdr_proto_t re_netid; + /* u_32_t re_semantics; */ + xdr_string_t re_family; + xdr_proto_t re_proto; + u_32_t *re_more; /* 1 == another entry follows */ +} rpcb_entry_t; + +typedef struct rpcb_listp { + u_32_t *rl_list; /* 1 == list follows */ + int rl_cnt; + rpcb_entry_t rl_entries[2]; /* TCP / UDP only */ +} rpcb_listp_t; + +typedef struct rpc_resp { + /* u_32_t rr_acceptdeny; */ + /* Omitted 'message denied' fork; we don't care about rejects. */ + xdr_auth_t rr_authverf; + /* u_32_t *rr_astat; */ + union { + u_32_t *resp_pmap; + xdr_uaddr_t resp_getaddr; + rpcb_listp_t resp_getaddrlist; + } rpcb_reply; +} rpc_resp_t; + +#define rr_v2 rpcb_reply.resp_pmap +#define rr_v3 rpcb_reply.resp_getaddr +#define rr_v4 rpcb_reply.resp_getaddrlist +/* END (RPC reply structures) */ + +/* BEGIN (RPC message structure & macros) */ +typedef struct rpc_msg { + char rm_msgbuf[RPCB_MAXMSG]; /* RPCB data buffer */ + u_int rm_buflen; + u_32_t *rm_xid; + /* u_32_t Call vs Reply */ + union { + rpc_call_t rb_call; + rpc_resp_t rb_resp; + } rm_body; +} rpc_msg_t; + +#define rm_call rm_body.rb_call +#define rm_resp rm_body.rb_resp +/* END (RPC message structure & macros) */ + +/* + * These code paths aren't hot enough to warrant per transaction + * mutexes. + */ +typedef struct rpcb_xact { + struct rpcb_xact *rx_next; + struct rpcb_xact **rx_pnext; + u_32_t rx_xid; /* RPC transmission ID */ + u_int rx_type; /* RPCB response type */ + u_int rx_ref; /* reference count */ + u_int rx_proto; /* transport protocol (v2 only) */ +} rpcb_xact_t; + +typedef struct rpcb_session { + ipfmutex_t rs_rxlock; + rpcb_xact_t *rs_rxlist; +} rpcb_session_t; + +/* + * For an explanation, please see the following: + * RFC1832 - Sections 3.11, 4.4, and 4.5. + */ +#define XDRALIGN(x) ((((x) % 4) != 0) ? ((((x) + 3) / 4) * 4) : (x)) + extern ap_session_t *ap_sess_tab[AP_SESS_SIZE]; extern ap_session_t *ap_sess_list; extern aproxy_t ap_proxies[]; extern int ippr_ftp_pasvonly; extern int appr_add __P((aproxy_t *)); +extern int appr_ctl __P((ap_ctl_t *)); extern int appr_del __P((aproxy_t *)); extern int appr_init __P((void)); extern void appr_unload __P((void)); -extern int appr_ok __P((ip_t *, tcphdr_t *, struct ipnat *)); +extern int appr_ok __P((fr_info_t *, tcphdr_t *, struct ipnat *)); extern int appr_match __P((fr_info_t *, struct nat *)); extern void appr_free __P((aproxy_t *)); extern void aps_free __P((ap_session_t *)); -extern int appr_check __P((ip_t *, fr_info_t *, struct nat *)); +extern int appr_check __P((fr_info_t *, struct nat *)); extern aproxy_t *appr_lookup __P((u_int, char *)); -extern int appr_new __P((fr_info_t *, ip_t *, struct nat *)); +extern int appr_new __P((fr_info_t *, struct nat *)); +extern int appr_ioctl __P((caddr_t, ioctlcmd_t, int)); #endif /* __IP_PROXY_H__ */ diff --git a/sys/contrib/ipfilter/netinet/ip_raudio_pxy.c b/sys/contrib/ipfilter/netinet/ip_raudio_pxy.c index 933e7ea..d24519f 100644 --- a/sys/contrib/ipfilter/netinet/ip_raudio_pxy.c +++ b/sys/contrib/ipfilter/netinet/ip_raudio_pxy.c @@ -1,20 +1,27 @@ +/* $FreeBSD$ */ + /* * $FreeBSD$ + * Copyright (C) 1998-2003 by Darren Reed + * + * See the IPFILTER.LICENCE file for details on licencing. + * + * Id: ip_raudio_pxy.c,v 1.40.2.3 2005/02/04 10:22:55 darrenr Exp */ -#if SOLARIS && defined(_KERNEL) -extern kmutex_t ipf_rw; -#endif #define IPF_RAUDIO_PROXY int ippr_raudio_init __P((void)); -int ippr_raudio_new __P((fr_info_t *, ip_t *, ap_session_t *, nat_t *)); -int ippr_raudio_in __P((fr_info_t *, ip_t *, ap_session_t *, nat_t *)); -int ippr_raudio_out __P((fr_info_t *, ip_t *, ap_session_t *, nat_t *)); +void ippr_raudio_fini __P((void)); +int ippr_raudio_new __P((fr_info_t *, ap_session_t *, nat_t *)); +int ippr_raudio_in __P((fr_info_t *, ap_session_t *, nat_t *)); +int ippr_raudio_out __P((fr_info_t *, ap_session_t *, nat_t *)); static frentry_t raudiofr; +int raudio_proxy_init = 0; + /* * Real Audio application proxy initialization. @@ -24,26 +31,39 @@ int ippr_raudio_init() bzero((char *)&raudiofr, sizeof(raudiofr)); raudiofr.fr_ref = 1; raudiofr.fr_flags = FR_INQUE|FR_PASS|FR_QUICK|FR_KEEPSTATE; + MUTEX_INIT(&raudiofr.fr_lock, "Real Audio proxy rule lock"); + raudio_proxy_init = 1; + return 0; } +void ippr_raudio_fini() +{ + if (raudio_proxy_init == 1) { + MUTEX_DESTROY(&raudiofr.fr_lock); + raudio_proxy_init = 0; + } +} + + /* * Setup for a new proxy to handle Real Audio. */ -int ippr_raudio_new(fin, ip, aps, nat) +int ippr_raudio_new(fin, aps, nat) fr_info_t *fin; -ip_t *ip; ap_session_t *aps; nat_t *nat; { raudio_t *rap; - KMALLOCS(aps->aps_data, void *, sizeof(raudio_t)); if (aps->aps_data == NULL) return -1; + fin = fin; /* LINT */ + nat = nat; /* LINT */ + bzero(aps->aps_data, sizeof(raudio_t)); rap = aps->aps_data; aps->aps_psiz = sizeof(raudio_t); @@ -53,20 +73,21 @@ nat_t *nat; -int ippr_raudio_out(fin, ip, aps, nat) +int ippr_raudio_out(fin, aps, nat) fr_info_t *fin; -ip_t *ip; ap_session_t *aps; nat_t *nat; { raudio_t *rap = aps->aps_data; unsigned char membuf[512 + 1], *s; u_short id = 0; - int off, dlen; tcphdr_t *tcp; + int off, dlen; int len = 0; mb_t *m; + nat = nat; /* LINT */ + /* * If we've already processed the start messages, then nothing left * for the proxy to do. @@ -74,26 +95,24 @@ nat_t *nat; if (rap->rap_eos == 1) return 0; + m = fin->fin_m; tcp = (tcphdr_t *)fin->fin_dp; - off = fin->fin_hlen + (tcp->th_off << 2); - bzero(membuf, sizeof(membuf)); -#if SOLARIS - m = fin->fin_qfm; + off = (char *)tcp - (char *)fin->fin_ip; + off += (TCP_OFF(tcp) << 2) + fin->fin_ipoff; - dlen = msgdsize(m) - off; - if (dlen <= 0) - return 0; - dlen = MIN(sizeof(membuf), dlen); - copyout_mblk(m, off, dlen, (char *)membuf); +#ifdef __sgi + dlen = fin->fin_plen - off; #else - m = *(mb_t **)fin->fin_mp; - - dlen = mbufchainlen(m) - off; + dlen = MSGDSIZE(m) - off; +#endif if (dlen <= 0) return 0; - dlen = MIN(sizeof(membuf), dlen); - m_copydata(m, off, dlen, (char *)membuf); -#endif + + if (dlen > sizeof(membuf)) + dlen = sizeof(membuf); + + bzero((char *)membuf, sizeof(membuf)); + COPYDATA(m, off, dlen, (char *)membuf); /* * In all the startup parsing, ensure that we don't go outside * the packet buffer boundary. @@ -160,23 +179,23 @@ nat_t *nat; } -int ippr_raudio_in(fin, ip, aps, nat) +int ippr_raudio_in(fin, aps, nat) fr_info_t *fin; -ip_t *ip; ap_session_t *aps; nat_t *nat; { unsigned char membuf[IPF_MAXPORTLEN + 1], *s; tcphdr_t *tcp, tcph, *tcp2 = &tcph; raudio_t *rap = aps->aps_data; - int off, dlen, slen, clen; struct in_addr swa, swb; + int off, dlen, slen; int a1, a2, a3, a4; u_short sp, dp; fr_info_t fi; tcp_seq seq; - nat_t *ipn; + nat_t *nat2; u_char swp; + ip_t *ip; mb_t *m; /* @@ -187,27 +206,24 @@ nat_t *nat; if (rap->rap_sdone != 0) return 0; + m = fin->fin_m; tcp = (tcphdr_t *)fin->fin_dp; - off = fin->fin_hlen + (tcp->th_off << 2); - m = *(mb_t **)fin->fin_mp; - -#if SOLARIS - m = fin->fin_qfm; + off = (char *)tcp - (char *)fin->fin_ip; + off += (TCP_OFF(tcp) << 2) + fin->fin_ipoff; - dlen = msgdsize(m) - off; - if (dlen <= 0) - return 0; - bzero(membuf, sizeof(membuf)); - clen = MIN(sizeof(membuf), dlen); - copyout_mblk(m, off, clen, (char *)membuf); +#ifdef __sgi + dlen = fin->fin_plen - off; #else - dlen = mbufchainlen(m) - off; + dlen = MSGDSIZE(m) - off; +#endif if (dlen <= 0) return 0; - bzero(membuf, sizeof(membuf)); - clen = MIN(sizeof(membuf), dlen); - m_copydata(m, off, clen, (char *)membuf); -#endif + + if (dlen > sizeof(membuf)) + dlen = sizeof(membuf); + + bzero((char *)membuf, sizeof(membuf)); + COPYDATA(m, off, dlen, (char *)membuf); seq = ntohl(tcp->th_seq); /* @@ -215,7 +231,7 @@ nat_t *nat; * We only care for the first 19 bytes coming back from the server. */ if (rap->rap_sseq == 0) { - s = (u_char *)memstr("PNA", (char *)membuf, 3, clen); + s = (u_char *)memstr("PNA", (char *)membuf, 3, dlen); if (s == NULL) return 0; a1 = s - membuf; @@ -250,6 +266,7 @@ nat_t *nat; rap->rap_srport = (*s << 8) | *(s + 1); } + ip = fin->fin_ip; swp = ip->ip_p; swa = ip->ip_src; swb = ip->ip_dst; @@ -260,10 +277,14 @@ nat_t *nat; bcopy((char *)fin, (char *)&fi, sizeof(fi)); bzero((char *)tcp2, sizeof(*tcp2)); - tcp2->th_off = 5; + TCP_OFF_A(tcp2, 5); + fi.fin_state = NULL; + fi.fin_nat = NULL; + fi.fin_flx |= FI_IGNORE; fi.fin_dp = (char *)tcp2; fi.fin_fr = &raudiofr; fi.fin_dlen = sizeof(*tcp2); + fi.fin_plen = fi.fin_hlen + sizeof(*tcp2); tcp2->th_win = htons(8192); slen = ip->ip_len; ip->ip_len = fin->fin_hlen + sizeof(*tcp); @@ -277,13 +298,16 @@ nat_t *nat; fi.fin_data[0] = dp; fi.fin_data[1] = sp; fi.fin_out = 0; - ipn = nat_new(&fi, ip, nat->nat_ptr, NULL, - IPN_UDP | (sp ? 0 : FI_W_SPORT), NAT_OUTBOUND); - if (ipn != NULL) { - ipn->nat_age = fr_defnatage; - (void) fr_addstate(ip, &fi, NULL, - FI_IGNOREPKT|FI_NORULE| - (sp ? 0 : FI_W_SPORT)); + nat2 = nat_new(&fi, nat->nat_ptr, NULL, + NAT_SLAVE|IPN_UDP | (sp ? 0 : SI_W_SPORT), + NAT_OUTBOUND); + if (nat2 != NULL) { + (void) nat_proto(&fi, nat2, IPN_UDP); + nat_update(&fi, nat2, nat2->nat_ptr); + + (void) fr_addstate(&fi, NULL, (sp ? 0 : SI_W_SPORT)); + if (fi.fin_state != NULL) + fr_statederef(&fi, (ipstate_t **)&fi.fin_state); } } @@ -294,12 +318,16 @@ nat_t *nat; fi.fin_data[0] = sp; fi.fin_data[1] = 0; fi.fin_out = 1; - ipn = nat_new(&fi, ip, nat->nat_ptr, NULL, IPN_UDP|FI_W_DPORT, - NAT_OUTBOUND); - if (ipn != NULL) { - ipn->nat_age = fr_defnatage; - (void) fr_addstate(ip, &fi, NULL, - FI_W_DPORT|FI_IGNOREPKT|FI_NORULE); + nat2 = nat_new(&fi, nat->nat_ptr, NULL, + NAT_SLAVE|IPN_UDP|SI_W_DPORT, + NAT_OUTBOUND); + if (nat2 != NULL) { + (void) nat_proto(&fi, nat2, IPN_UDP); + nat_update(&fi, nat2, nat2->nat_ptr); + + (void) fr_addstate(&fi, NULL, SI_W_DPORT); + if (fi.fin_state != NULL) + fr_statederef(&fi, (ipstate_t **)&fi.fin_state); } } diff --git a/sys/contrib/ipfilter/netinet/ip_rcmd_pxy.c b/sys/contrib/ipfilter/netinet/ip_rcmd_pxy.c index 8f450ea..540b837 100644 --- a/sys/contrib/ipfilter/netinet/ip_rcmd_pxy.c +++ b/sys/contrib/ipfilter/netinet/ip_rcmd_pxy.c @@ -1,28 +1,32 @@ +/* $FreeBSD$ */ + /* - * $Id: ip_rcmd_pxy.c,v 1.4.2.7 2003/04/26 05:59:39 darrenr Exp $ - */ -/* + * Copyright (C) 1998-2003 by Darren Reed + * + * See the IPFILTER.LICENCE file for details on licencing. + * + * Id: ip_rcmd_pxy.c,v 1.41.2.4 2005/02/04 10:22:55 darrenr Exp + * * Simple RCMD transparent proxy for in-kernel use. For use with the NAT * code. * $FreeBSD$ */ -#if SOLARIS && defined(_KERNEL) -extern kmutex_t ipf_rw; -#endif - -#define isdigit(x) ((x) >= '0' && (x) <= '9') #define IPF_RCMD_PROXY int ippr_rcmd_init __P((void)); -int ippr_rcmd_new __P((fr_info_t *, ip_t *, ap_session_t *, nat_t *)); -int ippr_rcmd_out __P((fr_info_t *, ip_t *, ap_session_t *, nat_t *)); +void ippr_rcmd_fini __P((void)); +int ippr_rcmd_new __P((fr_info_t *, ap_session_t *, nat_t *)); +int ippr_rcmd_out __P((fr_info_t *, ap_session_t *, nat_t *)); +int ippr_rcmd_in __P((fr_info_t *, ap_session_t *, nat_t *)); u_short ipf_rcmd_atoi __P((char *)); -int ippr_rcmd_portmsg __P((fr_info_t *, ip_t *, ap_session_t *, nat_t *)); +int ippr_rcmd_portmsg __P((fr_info_t *, ap_session_t *, nat_t *)); static frentry_t rcmdfr; +int rcmd_proxy_init = 0; + /* * RCMD application proxy initialization. @@ -32,25 +36,43 @@ int ippr_rcmd_init() bzero((char *)&rcmdfr, sizeof(rcmdfr)); rcmdfr.fr_ref = 1; rcmdfr.fr_flags = FR_INQUE|FR_PASS|FR_QUICK|FR_KEEPSTATE; + MUTEX_INIT(&rcmdfr.fr_lock, "RCMD proxy rule lock"); + rcmd_proxy_init = 1; + return 0; } +void ippr_rcmd_fini() +{ + if (rcmd_proxy_init == 1) { + MUTEX_DESTROY(&rcmdfr.fr_lock); + rcmd_proxy_init = 0; + } +} + + /* * Setup for a new RCMD proxy. */ -int ippr_rcmd_new(fin, ip, aps, nat) +int ippr_rcmd_new(fin, aps, nat) fr_info_t *fin; -ip_t *ip; ap_session_t *aps; nat_t *nat; { tcphdr_t *tcp = (tcphdr_t *)fin->fin_dp; + fin = fin; /* LINT */ + nat = nat; /* LINT */ + aps->aps_psiz = sizeof(u_32_t); KMALLOCS(aps->aps_data, u_32_t *, sizeof(u_32_t)); - if (aps->aps_data == NULL) + if (aps->aps_data == NULL) { +#ifdef IP_RCMD_PROXY_DEBUG + printf("ippr_rcmd_new:KMALLOCS(%d) failed\n", sizeof(u_32_t)); +#endif return -1; + } *(u_32_t *)aps->aps_data = 0; aps->aps_sport = tcp->th_sport; aps->aps_dport = tcp->th_dport; @@ -67,7 +89,7 @@ char *ptr; register char *s = ptr, c; register u_short i = 0; - while ((c = *s++) && isdigit(c)) { + while (((c = *s++) != '\0') && ISDIGIT(c)) { i *= 10; i += c - '0'; } @@ -75,19 +97,19 @@ char *ptr; } -int ippr_rcmd_portmsg(fin, ip, aps, nat) +int ippr_rcmd_portmsg(fin, aps, nat) fr_info_t *fin; -ip_t *ip; ap_session_t *aps; nat_t *nat; { - char portbuf[8], *s; - struct in_addr swip; - int off, dlen; tcphdr_t *tcp, tcph, *tcp2 = &tcph; + struct in_addr swip, swip2; + int off, dlen, nflags; + char portbuf[8], *s; fr_info_t fi; u_short sp; - nat_t *ipn; + nat_t *nat2; + ip_t *ip; mb_t *m; tcp = (tcphdr_t *)fin->fin_dp; @@ -101,37 +123,47 @@ nat_t *nat; (tcp->th_seq != *(u_32_t *)aps->aps_data)) return 0; - off = fin->fin_hlen + (tcp->th_off << 2); + m = fin->fin_m; + ip = fin->fin_ip; + off = (char *)tcp - (char *)ip + (TCP_OFF(tcp) << 2) + fin->fin_ipoff; -#if SOLARIS - m = fin->fin_qfm; - - dlen = msgdsize(m) - off; - bzero(portbuf, sizeof(portbuf)); - copyout_mblk(m, off, MIN(sizeof(portbuf), dlen), portbuf); +#ifdef __sgi + dlen = fin->fin_plen - off; #else - m = *(mb_t **)fin->fin_mp; - dlen = mbufchainlen(m) - off; - bzero(portbuf, sizeof(portbuf)); - m_copydata(m, off, MIN(sizeof(portbuf), dlen), portbuf); + dlen = MSGDSIZE(m) - off; #endif + if (dlen <= 0) + return 0; + + bzero(portbuf, sizeof(portbuf)); + COPYDATA(m, off, MIN(sizeof(portbuf), dlen), portbuf); portbuf[sizeof(portbuf) - 1] = '\0'; s = portbuf; sp = ipf_rcmd_atoi(s); - if (!sp) + if (sp == 0) { +#ifdef IP_RCMD_PROXY_DEBUG + printf("ippr_rcmd_portmsg:sp == 0 dlen %d [%s]\n", + dlen, portbuf); +#endif return 0; + } /* * Add skeleton NAT entry for connection which will come back the * other way. */ bcopy((char *)fin, (char *)&fi, sizeof(fi)); + fi.fin_flx |= FI_IGNORE; fi.fin_data[0] = sp; - fi.fin_data[1] = fin->fin_data[1]; - ipn = nat_outlookup(&fi, IPN_TCP, nat->nat_p, nat->nat_inip, - ip->ip_dst, 0); - if (ipn == NULL) { + fi.fin_data[1] = 0; + if (nat->nat_dir == NAT_OUTBOUND) + nat2 = nat_outlookup(&fi, NAT_SEARCH|IPN_TCP, nat->nat_p, + nat->nat_inip, nat->nat_oip); + else + nat2 = nat_inlookup(&fi, NAT_SEARCH|IPN_TCP, nat->nat_p, + nat->nat_inip, nat->nat_oip); + if (nat2 == NULL) { int slen; slen = ip->ip_len; @@ -140,33 +172,66 @@ nat_t *nat; tcp2->th_win = htons(8192); tcp2->th_sport = htons(sp); tcp2->th_dport = 0; /* XXX - don't specify remote port */ - tcp2->th_off = 5; + TCP_OFF_A(tcp2, 5); tcp2->th_flags = TH_SYN; - fi.fin_data[1] = 0; fi.fin_dp = (char *)tcp2; + fi.fin_fr = &rcmdfr; fi.fin_dlen = sizeof(*tcp2); + fi.fin_plen = fi.fin_hlen + sizeof(*tcp2); + fi.fin_flx &= FI_LOWTTL|FI_FRAG|FI_TCPUDP|FI_OPTIONS|FI_IGNORE; + nflags = NAT_SLAVE|IPN_TCP|SI_W_DPORT; + swip = ip->ip_src; - ip->ip_src = nat->nat_inip; - ipn = nat_new(&fi, ip, nat->nat_ptr, NULL, IPN_TCP|FI_W_DPORT, - NAT_OUTBOUND); - if (ipn != NULL) { - ipn->nat_age = fr_defnatage; - fi.fin_fr = &rcmdfr; - (void) fr_addstate(ip, &fi, NULL, - FI_W_DPORT|FI_IGNOREPKT); + swip2 = ip->ip_dst; + + if (nat->nat_dir == NAT_OUTBOUND) { + fi.fin_fi.fi_saddr = nat->nat_inip.s_addr; + ip->ip_src = nat->nat_inip; + } else { + fi.fin_fi.fi_saddr = nat->nat_oip.s_addr; + ip->ip_src = nat->nat_oip; + nflags |= NAT_NOTRULEPORT; + } + + nat2 = nat_new(&fi, nat->nat_ptr, NULL, nflags, nat->nat_dir); + + if (nat2 != NULL) { + (void) nat_proto(&fi, nat2, IPN_TCP); + nat_update(&fi, nat2, nat2->nat_ptr); + fi.fin_ifp = NULL; + if (nat->nat_dir == NAT_INBOUND) { + fi.fin_fi.fi_daddr = nat->nat_inip.s_addr; + ip->ip_dst = nat->nat_inip; + } + (void) fr_addstate(&fi, &nat2->nat_state, SI_W_DPORT); + if (fi.fin_state != NULL) + fr_statederef(&fi, (ipstate_t **)&fi.fin_state); } ip->ip_len = slen; ip->ip_src = swip; + ip->ip_dst = swip2; } return 0; } -int ippr_rcmd_out(fin, ip, aps, nat) +int ippr_rcmd_out(fin, aps, nat) fr_info_t *fin; -ip_t *ip; ap_session_t *aps; nat_t *nat; { - return ippr_rcmd_portmsg(fin, ip, aps, nat); + if (nat->nat_dir == NAT_OUTBOUND) + return ippr_rcmd_portmsg(fin, aps, nat); + return 0; +} + + +int ippr_rcmd_in(fin, aps, nat) +fr_info_t *fin; +ap_session_t *aps; +nat_t *nat; +{ + if (nat->nat_dir == NAT_INBOUND) + return ippr_rcmd_portmsg(fin, aps, nat); + return 0; } diff --git a/sys/contrib/ipfilter/netinet/ip_state.c b/sys/contrib/ipfilter/netinet/ip_state.c index 87bebfb..7c1b311 100644 --- a/sys/contrib/ipfilter/netinet/ip_state.c +++ b/sys/contrib/ipfilter/netinet/ip_state.c @@ -1,11 +1,15 @@ +/* $FreeBSD$ */ + /* - * Copyright (C) 1995-2002 by Darren Reed. + * Copyright (C) 1995-2003 by Darren Reed. * * See the IPFILTER.LICENCE file for details on licencing. */ - -#if defined(__sgi) && (IRIX > 602) -# include +#if defined(KERNEL) || defined(_KERNEL) +# undef KERNEL +# undef _KERNEL +# define KERNEL 1 +# define _KERNEL 1 #endif #include #include @@ -19,17 +23,18 @@ (__FreeBSD_version >= 400000) && !defined(KLD_MODULE) #include "opt_inet6.h" #endif -#if !defined(_KERNEL) && !defined(KERNEL) && !defined(__KERNEL__) +#if !defined(_KERNEL) && !defined(__KERNEL__) # include # include # include -#else -# ifdef linux -# include -# include +# define _KERNEL +# ifdef __OpenBSD__ +struct file; # endif +# include +# undef _KERNEL #endif -#if (defined(KERNEL) || defined(_KERNEL)) && (__FreeBSD_version >= 220000) +#if defined(_KERNEL) && (__FreeBSD_version >= 220000) # include # include # if (__FreeBSD_version >= 300000) && !defined(IPFILTER_LKM) @@ -39,18 +44,17 @@ # include #endif #include -#ifndef linux +#if !defined(linux) # include #endif #include -#if (defined(_KERNEL) || defined(KERNEL)) && !defined(linux) +#if defined(_KERNEL) # include -#endif -#if !defined(__SVR4) && !defined(__svr4__) -# ifndef linux +# if !defined(__SVR4) && !defined(__svr4__) # include # endif -#else +#endif +#if defined(__SVR4) || defined(__svr4__) # include # include # ifdef _KERNEL @@ -69,8 +73,10 @@ #include #include #include -#ifndef linux +#if !defined(linux) # include +#endif +#if !defined(__hpux) && !defined(linux) # include #endif #include @@ -81,89 +87,129 @@ #include "netinet/ip_nat.h" #include "netinet/ip_frag.h" #include "netinet/ip_state.h" +#include "netinet/ip_proxy.h" +#ifdef IPFILTER_SYNC +#include "netinet/ip_sync.h" +#endif +#ifdef IPFILTER_SCAN +#include "netinet/ip_scan.h" +#endif #ifdef USE_INET6 #include #endif #if (__FreeBSD_version >= 300000) # include -# if (defined(_KERNEL) || defined(KERNEL)) && !defined(IPFILTER_LKM) +# if defined(_KERNEL) && !defined(IPFILTER_LKM) # include # include # endif #endif +/* END OF INCLUDES */ + #if !defined(lint) static const char sccsid[] = "@(#)ip_state.c 1.8 6/5/96 (C) 1993-2000 Darren Reed"; -/* static const char rcsid[] = "@(#)$Id: ip_state.c,v 2.30.2.38 2001/07/23 13:49:46 darrenr Exp $"; */ -static const char rcsid[] = "@(#)$FreeBSD$"; -#endif - -#ifndef MIN -# define MIN(a,b) (((a)<(b))?(a):(b)) +static const char rcsid[] = "@(#)Id: ip_state.c,v 2.186.2.29 2005/03/28 10:47:54 darrenr Exp"; #endif -#define TCP_CLOSE (TH_FIN|TH_RST) - -static ipstate_t **ips_table = NULL; -static int ips_num = 0; -static int ips_wild = 0; -static ips_stat_t ips_stats; -#ifdef USE_MUTEX -extern KRWLOCK_T ipf_state, ipf_mutex; -extern kmutex_t ipf_rw; -#endif +static ipstate_t **ips_table = NULL; +static u_long *ips_seed = NULL; +static int ips_num = 0; +static u_long ips_last_force_flush = 0; +ips_stat_t ips_stats; #ifdef USE_INET6 -static frentry_t *fr_checkicmp6matchingstate __P((ip6_t *, fr_info_t *)); +static ipstate_t *fr_checkicmp6matchingstate __P((fr_info_t *)); #endif -static int fr_matchsrcdst __P((ipstate_t *, union i6addr, union i6addr, - fr_info_t *, tcphdr_t *)); -static frentry_t *fr_checkicmpmatchingstate __P((ip_t *, fr_info_t *)); -static int fr_matchicmpqueryreply __P((int, ipstate_t *, icmphdr_t *, int)); +static ipstate_t *fr_matchsrcdst __P((fr_info_t *, ipstate_t *, i6addr_t *, + i6addr_t *, tcphdr_t *, u_32_t)); +static ipstate_t *fr_checkicmpmatchingstate __P((fr_info_t *)); static int fr_state_flush __P((int, int)); static ips_stat_t *fr_statetstats __P((void)); -static void fr_delstate __P((ipstate_t *)); +static void fr_delstate __P((ipstate_t *, int)); static int fr_state_remove __P((caddr_t)); -static void fr_ipsmove __P((ipstate_t **, ipstate_t *, u_int)); -static int fr_tcpoptions __P((tcphdr_t *)); +static void fr_ipsmove __P((ipstate_t *, u_int)); +static int fr_tcpstate __P((fr_info_t *, tcphdr_t *, ipstate_t *)); +static int fr_tcpoptions __P((fr_info_t *, tcphdr_t *, tcpdata_t *)); +static ipstate_t *fr_stclone __P((fr_info_t *, tcphdr_t *, ipstate_t *)); +static void fr_fixinisn __P((fr_info_t *, ipstate_t *)); +static void fr_fixoutisn __P((fr_info_t *, ipstate_t *)); +static void fr_checknewisn __P((fr_info_t *, ipstate_t *)); + int fr_stputent __P((caddr_t)); int fr_stgetent __P((caddr_t)); -void fr_stinsert __P((ipstate_t *)); - -#define FIVE_DAYS (2 * 5 * 86400) /* 5 days: half closed session */ +#define ONE_DAY IPF_TTLVAL(1 * 86400) /* 1 day */ +#define FIVE_DAYS (5 * ONE_DAY) +#define DOUBLE_HASH(x) (((x) + ips_seed[(x) % fr_statesize]) % fr_statesize) -#define TCP_MSL 240 /* 2 minutes */ u_long fr_tcpidletimeout = FIVE_DAYS, - fr_tcpclosewait = 2 * TCP_MSL, - fr_tcplastack = 2 * TCP_MSL, - fr_tcptimeout = 2 * TCP_MSL, - fr_tcpclosed = 120, - fr_tcphalfclosed = 2 * 2 * 3600, /* 2 hours */ - fr_udptimeout = 240, - fr_udpacktimeout = 24, - fr_icmptimeout = 120, - fr_icmpacktimeout = 12; + fr_tcpclosewait = IPF_TTLVAL(2 * TCP_MSL), + fr_tcplastack = IPF_TTLVAL(2 * TCP_MSL), + fr_tcptimeout = IPF_TTLVAL(2 * TCP_MSL), + fr_tcpclosed = IPF_TTLVAL(60), + fr_tcphalfclosed = IPF_TTLVAL(2 * 3600), /* 2 hours */ + fr_udptimeout = IPF_TTLVAL(120), + fr_udpacktimeout = IPF_TTLVAL(12), + fr_icmptimeout = IPF_TTLVAL(60), + fr_icmpacktimeout = IPF_TTLVAL(6), + fr_iptimeout = IPF_TTLVAL(60); int fr_statemax = IPSTATE_MAX, fr_statesize = IPSTATE_SIZE; int fr_state_doflush = 0, - fr_state_lock = 0; + fr_state_lock = 0, + fr_state_maxbucket = 0, + fr_state_maxbucket_reset = 1, + fr_state_init = 0; +ipftq_t ips_tqtqb[IPF_TCP_NSTATES], + ips_udptq, + ips_udpacktq, + ips_iptq, + ips_icmptq, + ips_icmpacktq, + *ips_utqe = NULL; +#ifdef IPFILTER_LOG +int ipstate_logging = 1; +#else +int ipstate_logging = 0; +#endif ipstate_t *ips_list = NULL; -static int icmpreplytype4[ICMP_MAXTYPE + 1]; -#ifdef USE_INET6 -static int icmpreplytype6[ICMP6_MAXTYPE + 1]; -#endif +/* ------------------------------------------------------------------------ */ +/* Function: fr_stateinit */ +/* Returns: int - 0 == success, -1 == failure */ +/* Parameters: Nil */ +/* */ +/* Initialise all the global variables used within the state code. */ +/* This action also includes initiailising locks. */ +/* ------------------------------------------------------------------------ */ int fr_stateinit() { int i; KMALLOCS(ips_table, ipstate_t **, fr_statesize * sizeof(ipstate_t *)); - if (ips_table != NULL) - bzero((char *)ips_table, fr_statesize * sizeof(ipstate_t *)); - else + if (ips_table == NULL) return -1; + bzero((char *)ips_table, fr_statesize * sizeof(ipstate_t *)); + + KMALLOCS(ips_seed, u_long *, fr_statesize * sizeof(*ips_seed)); + if (ips_seed == NULL) + return -2; + for (i = 0; i < fr_statesize; i++) { + /* + * XXX - ips_seed[X] should be a random number of sorts. + */ +#if (__FreeBSD_version >= 400000) + ips_seed[i] = arc4random(); +#else + ips_seed[i] = ((u_long)ips_seed + i) * fr_statesize; + ips_seed[i] ^= 0xa5a55a5a; + ips_seed[i] *= (u_long)ips_seed; + ips_seed[i] ^= 0x5a5aa5a5; + ips_seed[i] *= fr_statemax; +#endif + } /* fill icmp reply type table */ for (i = 0; i <= ICMP_MAXTYPE; i++) @@ -183,111 +229,155 @@ int fr_stateinit() icmpreplytype6[ND_NEIGHBOR_SOLICIT] = ND_NEIGHBOR_ADVERT; #endif + KMALLOCS(ips_stats.iss_bucketlen, u_long *, + fr_statesize * sizeof(u_long)); + if (ips_stats.iss_bucketlen == NULL) + return -1; + bzero((char *)ips_stats.iss_bucketlen, fr_statesize * sizeof(u_long)); + + if (fr_state_maxbucket == 0) { + for (i = fr_statesize; i > 0; i >>= 1) + fr_state_maxbucket++; + fr_state_maxbucket *= 2; + } + + fr_sttab_init(ips_tqtqb); + ips_tqtqb[IPF_TCP_NSTATES - 1].ifq_next = &ips_udptq; + ips_udptq.ifq_ttl = (u_long)fr_udptimeout; + ips_udptq.ifq_ref = 1; + ips_udptq.ifq_head = NULL; + ips_udptq.ifq_tail = &ips_udptq.ifq_head; + MUTEX_INIT(&ips_udptq.ifq_lock, "ipftq udp tab"); + ips_udptq.ifq_next = &ips_udpacktq; + ips_udpacktq.ifq_ttl = (u_long)fr_udpacktimeout; + ips_udpacktq.ifq_ref = 1; + ips_udpacktq.ifq_head = NULL; + ips_udpacktq.ifq_tail = &ips_udpacktq.ifq_head; + MUTEX_INIT(&ips_udpacktq.ifq_lock, "ipftq udpack tab"); + ips_udpacktq.ifq_next = &ips_icmptq; + ips_icmptq.ifq_ttl = (u_long)fr_icmptimeout; + ips_icmptq.ifq_ref = 1; + ips_icmptq.ifq_head = NULL; + ips_icmptq.ifq_tail = &ips_icmptq.ifq_head; + MUTEX_INIT(&ips_icmptq.ifq_lock, "ipftq icmp tab"); + ips_icmptq.ifq_next = &ips_icmpacktq; + ips_icmpacktq.ifq_ttl = (u_long)fr_icmpacktimeout; + ips_icmpacktq.ifq_ref = 1; + ips_icmpacktq.ifq_head = NULL; + ips_icmpacktq.ifq_tail = &ips_icmpacktq.ifq_head; + MUTEX_INIT(&ips_icmpacktq.ifq_lock, "ipftq icmpack tab"); + ips_icmpacktq.ifq_next = &ips_iptq; + ips_iptq.ifq_ttl = (u_long)fr_iptimeout; + ips_iptq.ifq_ref = 1; + ips_iptq.ifq_head = NULL; + ips_iptq.ifq_tail = &ips_iptq.ifq_head; + MUTEX_INIT(&ips_iptq.ifq_lock, "ipftq ip tab"); + ips_iptq.ifq_next = NULL; + + RWLOCK_INIT(&ipf_state, "ipf IP state rwlock"); + MUTEX_INIT(&ipf_stinsert, "ipf state insert mutex"); + fr_state_init = 1; + + ips_last_force_flush = fr_ticks; return 0; } -static ips_stat_t *fr_statetstats() +/* ------------------------------------------------------------------------ */ +/* Function: fr_stateunload */ +/* Returns: Nil */ +/* Parameters: Nil */ +/* */ +/* Release and destroy any resources acquired or initialised so that */ +/* IPFilter can be unloaded or re-initialised. */ +/* ------------------------------------------------------------------------ */ +void fr_stateunload() { - ips_stats.iss_active = ips_num; - ips_stats.iss_table = ips_table; - ips_stats.iss_list = ips_list; - return &ips_stats; -} + ipftq_t *ifq, *ifqnext; + ipstate_t *is; + while ((is = ips_list) != NULL) + fr_delstate(is, 0); -/* - * flush state tables. two actions currently defined: - * which == 0 : flush all state table entries - * which == 1 : flush TCP connections which have started to close but are - * stuck for some reason. - * which == 2 : flush TCP connections which have been idle for a long time, - * starting at > 4 days idle and working back in successive half- - * days to at most 12 hours old. - */ -static int fr_state_flush(which, proto) -int which, proto; -{ - ipstate_t *is, **isp; -#if defined(_KERNEL) && !SOLARIS - int s; -#endif - int delete, removed = 0, try; + /* + * Proxy timeout queues are not cleaned here because although they + * exist on the state list, appr_unload is called after fr_stateunload + * and the proxies actually are responsible for them being created. + * Should the proxy timeouts have their own list? There's no real + * justification as this is the only complicationA + */ + for (ifq = ips_utqe; ifq != NULL; ifq = ifqnext) { + ifqnext = ifq->ifq_next; + if (((ifq->ifq_flags & IFQF_PROXY) == 0) && + (fr_deletetimeoutqueue(ifq) == 0)) + fr_freetimeoutqueue(ifq); + } - SPL_NET(s); - for (isp = &ips_list; (is = *isp); ) { - delete = 0; + ips_stats.iss_inuse = 0; + ips_num = 0; - if ((proto != 0) && (is->is_v != proto)) - continue; + if (fr_state_init == 1) { + fr_sttab_destroy(ips_tqtqb); + MUTEX_DESTROY(&ips_udptq.ifq_lock); + MUTEX_DESTROY(&ips_icmptq.ifq_lock); + MUTEX_DESTROY(&ips_udpacktq.ifq_lock); + MUTEX_DESTROY(&ips_icmpacktq.ifq_lock); + MUTEX_DESTROY(&ips_iptq.ifq_lock); + } - switch (which) - { - case 0 : - delete = 1; - break; - case 1 : - case 2 : - if (is->is_p != IPPROTO_TCP) - break; - if ((is->is_state[0] != TCPS_ESTABLISHED) || - (is->is_state[1] != TCPS_ESTABLISHED)) - delete = 1; - break; - } + if (ips_table != NULL) { + KFREES(ips_table, fr_statesize * sizeof(*ips_table)); + ips_table = NULL; + } - if (delete) { - if (is->is_p == IPPROTO_TCP) - ips_stats.iss_fin++; - else - ips_stats.iss_expire++; -#ifdef IPFILTER_LOG - ipstate_log(is, ISL_FLUSH); -#endif - fr_delstate(is); - removed++; - } else - isp = &is->is_next; + if (ips_seed != NULL) { + KFREES(ips_seed, fr_statesize * sizeof(*ips_seed)); + ips_seed = NULL; } - /* - * Asked to remove inactive entries, try again if first attempt - * failed. In this case, 86400 is half a day because the counter is - * activated every half second. - */ - if ((which == 2) && (removed == 0)) { - try = 86400; /* half a day */ - for (; (try < FIVE_DAYS) && (removed == 0); try += 86400) { - for (isp = &ips_list; (is = *isp); ) { - delete = 0; - if ((is->is_p == IPPROTO_TCP) && - ((is->is_state[0] == TCPS_ESTABLISHED) || - (is->is_state[1] == TCPS_ESTABLISHED)) && - (is->is_age < try)) { - ips_stats.iss_fin++; - delete = 1; - } else if ((is->is_p != IPPROTO_TCP) && - (is->is_pkts > 1)) { - ips_stats.iss_expire++; - delete = 1; - } - if (delete) { -#ifdef IPFILTER_LOG - ipstate_log(is, ISL_FLUSH); -#endif - fr_delstate(is); - removed++; - } else - isp = &is->is_next; - } - } + if (ips_stats.iss_bucketlen != NULL) { + KFREES(ips_stats.iss_bucketlen, fr_statesize * sizeof(u_long)); + ips_stats.iss_bucketlen = NULL; } - SPL_X(s); - return removed; + if (fr_state_maxbucket_reset == 1) + fr_state_maxbucket = 0; + + if (fr_state_init == 1) { + fr_state_init = 0; + RW_DESTROY(&ipf_state); + MUTEX_DESTROY(&ipf_stinsert); + } } +/* ------------------------------------------------------------------------ */ +/* Function: fr_statetstats */ +/* Returns: ips_state_t* - pointer to state stats structure */ +/* Parameters: Nil */ +/* */ +/* Put all the current numbers and pointers into a single struct and return */ +/* a pointer to it. */ +/* ------------------------------------------------------------------------ */ +static ips_stat_t *fr_statetstats() +{ + ips_stats.iss_active = ips_num; + ips_stats.iss_statesize = fr_statesize; + ips_stats.iss_statemax = fr_statemax; + ips_stats.iss_table = ips_table; + ips_stats.iss_list = ips_list; + ips_stats.iss_ticks = fr_ticks; + return &ips_stats; +} + +/* ------------------------------------------------------------------------ */ +/* Function: fr_state_remove */ +/* Returns: int - 0 == success, != 0 == failure */ +/* Parameters: data(I) - pointer to state structure to delete from table */ +/* */ +/* Search for a state structure that matches the one passed, according to */ +/* the IP addresses and other protocol specific information. */ +/* ------------------------------------------------------------------------ */ static int fr_state_remove(data) caddr_t data; { @@ -295,23 +385,20 @@ caddr_t data; int error; sp = &st; - error = IRCOPYPTR(data, (caddr_t)&st, sizeof(st)); + error = fr_inobj(data, &st, IPFOBJ_IPSTATE); if (error) return EFAULT; WRITE_ENTER(&ipf_state); for (sp = ips_list; sp; sp = sp->is_next) if ((sp->is_p == st.is_p) && (sp->is_v == st.is_v) && - !bcmp((char *)&sp->is_src, (char *)&st.is_src, + !bcmp((caddr_t)&sp->is_src, (caddr_t)&st.is_src, sizeof(st.is_src)) && - !bcmp((char *)&sp->is_dst, (char *)&st.is_dst, + !bcmp((caddr_t)&sp->is_dst, (caddr_t)&st.is_src, sizeof(st.is_dst)) && - !bcmp((char *)&sp->is_ps, (char *)&st.is_ps, + !bcmp((caddr_t)&sp->is_ps, (caddr_t)&st.is_ps, sizeof(st.is_ps))) { -#ifdef IPFILTER_LOG - ipstate_log(sp, ISL_REMOVE); -#endif - fr_delstate(sp); + fr_delstate(sp, ISL_REMOVE); RWLOCK_EXIT(&ipf_state); return 0; } @@ -320,49 +407,59 @@ caddr_t data; } +/* ------------------------------------------------------------------------ */ +/* Function: fr_state_ioctl */ +/* Returns: int - 0 == success, != 0 == failure */ +/* Parameters: data(I) - pointer to ioctl data */ +/* cmd(I) - ioctl command integer */ +/* mode(I) - file mode bits used with open */ +/* */ +/* Processes an ioctl call made to operate on the IP Filter state device. */ +/* ------------------------------------------------------------------------ */ int fr_state_ioctl(data, cmd, mode) caddr_t data; -#if defined(__NetBSD__) || defined(__OpenBSD__) -u_long cmd; -#else -int cmd; -#endif +ioctlcmd_t cmd; int mode; { int arg, ret, error = 0; switch (cmd) { + /* + * Delete an entry from the state table. + */ case SIOCDELST : error = fr_state_remove(data); break; + /* + * Flush the state table + */ case SIOCIPFFL : - error = IRCOPY(data, (caddr_t)&arg, sizeof(arg)); - if (error) - break; + BCOPYIN(data, (char *)&arg, sizeof(arg)); if (arg == 0 || arg == 1) { WRITE_ENTER(&ipf_state); ret = fr_state_flush(arg, 4); RWLOCK_EXIT(&ipf_state); - error = IWCOPY((caddr_t)&ret, data, sizeof(ret)); + BCOPYOUT((char *)&ret, data, sizeof(ret)); } else error = EINVAL; break; -#ifdef USE_INET6 +#ifdef USE_INET6 case SIOCIPFL6 : - error = IRCOPY(data, (caddr_t)&arg, sizeof(arg)); - if (error) - break; + BCOPYIN(data, (char *)&arg, sizeof(arg)); if (arg == 0 || arg == 1) { WRITE_ENTER(&ipf_state); ret = fr_state_flush(arg, 6); RWLOCK_EXIT(&ipf_state); - error = IWCOPY((caddr_t)&ret, data, sizeof(ret)); + BCOPYOUT((char *)&ret, data, sizeof(ret)); } else error = EINVAL; break; #endif #ifdef IPFILTER_LOG + /* + * Flush the state log. + */ case SIOCIPFFB : if (!(mode & FWRITE)) error = EPERM; @@ -370,23 +467,51 @@ int mode; int tmp; tmp = ipflog_clear(IPL_LOGSTATE); - IWCOPY((char *)&tmp, data, sizeof(tmp)); + BCOPYOUT((char *)&tmp, data, sizeof(tmp)); } break; -#endif - case SIOCGETFS : - error = IWCOPYPTR((caddr_t)fr_statetstats(), data, - sizeof(ips_stat_t)); + /* + * Turn logging of state information on/off. + */ + case SIOCSETLG : + if (!(mode & FWRITE)) + error = EPERM; + else { + BCOPYIN((char *)data, (char *)&ipstate_logging, + sizeof(ipstate_logging)); + } + break; + /* + * Return the current state of logging. + */ + case SIOCGETLG : + BCOPYOUT((char *)&ipstate_logging, (char *)data, + sizeof(ipstate_logging)); break; + /* + * Return the number of bytes currently waiting to be read. + */ case FIONREAD : -#ifdef IPFILTER_LOG - arg = (int)iplused[IPL_LOGSTATE]; - error = IWCOPY((caddr_t)&arg, (caddr_t)data, sizeof(arg)); + arg = iplused[IPL_LOGSTATE]; /* returned in an int */ + BCOPYOUT((char *)&arg, data, sizeof(arg)); + break; #endif + /* + * Get the current state statistics. + */ + case SIOCGETFS : + error = fr_outobj(data, fr_statetstats(), IPFOBJ_STATESTAT); break; + /* + * Lock/Unlock the state table. (Locking prevents any changes, which + * means no packets match). + */ case SIOCSTLCK : - error = fr_lock(data, &fr_state_lock); + fr_lock(data, &fr_state_lock); break; + /* + * Add an entry to the current state table. + */ case SIOCSTPUT : if (!fr_state_lock) { error = EACCES; @@ -394,6 +519,9 @@ int mode; } error = fr_stputent(data); break; + /* + * Get a state table entry. + */ case SIOCSTGET : if (!fr_state_lock) { error = EACCES; @@ -409,22 +537,30 @@ int mode; } -/* - * Copy out state information from the kernel to a user space process. - */ +/* ------------------------------------------------------------------------ */ +/* Function: fr_stgetent */ +/* Returns: int - 0 == success, != 0 == failure */ +/* Parameters: data(I) - pointer to state structure to retrieve from table */ +/* */ +/* Copy out state information from the kernel to a user space process. If */ +/* there is a filter rule associated with the state entry, copy that out */ +/* as well. The entry to copy out is taken from the value of "ips_next" in */ +/* the struct passed in and if not null and not found in the list of current*/ +/* state entries, the retrieval fails. */ +/* ------------------------------------------------------------------------ */ int fr_stgetent(data) caddr_t data; { - register ipstate_t *is, *isn; + ipstate_t *is, *isn; ipstate_save_t ips; int error; - error = IRCOPYPTR(data, (caddr_t)&ips, sizeof(ips)); + error = fr_inobj(data, &ips, IPFOBJ_STATESAVE); if (error) - return error; + return EFAULT; isn = ips.ips_next; - if (!isn) { + if (isn == NULL) { isn = ips_list; if (isn == NULL) { if (ips.ips_next == NULL) @@ -445,171 +581,272 @@ caddr_t data; } ips.ips_next = isn->is_next; bcopy((char *)isn, (char *)&ips.ips_is, sizeof(ips.ips_is)); - if (isn->is_rule) + ips.ips_rule = isn->is_rule; + if (isn->is_rule != NULL) bcopy((char *)isn->is_rule, (char *)&ips.ips_fr, sizeof(ips.ips_fr)); - error = IWCOPYPTR((caddr_t)&ips, data, sizeof(ips)); + error = fr_outobj(data, &ips, IPFOBJ_STATESAVE); if (error) - error = EFAULT; - return error; + return EFAULT; + return 0; } +/* ------------------------------------------------------------------------ */ +/* Function: fr_stputent */ +/* Returns: int - 0 == success, != 0 == failure */ +/* Parameters: data(I) - pointer to state information struct */ +/* */ +/* This function implements the SIOCSTPUT ioctl: insert a state entry into */ +/* the state table. If the state info. includes a pointer to a filter rule */ +/* then also add in an orphaned rule (will not show up in any "ipfstat -io" */ +/* output. */ +/* ------------------------------------------------------------------------ */ int fr_stputent(data) caddr_t data; { - register ipstate_t *is, *isn; + ipstate_t *is, *isn; ipstate_save_t ips; int error, out, i; frentry_t *fr; char *name; - error = IRCOPYPTR(data, (caddr_t)&ips, sizeof(ips)); + error = fr_inobj(data, &ips, IPFOBJ_STATESAVE); if (error) - return error; + return EFAULT; KMALLOC(isn, ipstate_t *); if (isn == NULL) return ENOMEM; bcopy((char *)&ips.ips_is, (char *)isn, sizeof(*isn)); - fr = isn->is_rule; - if (fr != NULL) { - if (isn->is_flags & FI_NEWFR) { - KMALLOC(fr, frentry_t *); - if (fr == NULL) { - KFREE(isn); - return ENOMEM; - } - bcopy((char *)&ips.ips_fr, (char *)fr, sizeof(*fr)); - out = fr->fr_flags & FR_OUTQUE ? 1 : 0; - isn->is_rule = fr; - ips.ips_is.is_rule = fr; + bzero((char *)isn, offsetof(struct ipstate, is_pkts)); + isn->is_sti.tqe_pnext = NULL; + isn->is_sti.tqe_next = NULL; + isn->is_sti.tqe_ifq = NULL; + isn->is_sti.tqe_parent = isn; + isn->is_ifp[0] = NULL; + isn->is_ifp[1] = NULL; + isn->is_ifp[2] = NULL; + isn->is_ifp[3] = NULL; + isn->is_sync = NULL; + fr = ips.ips_rule; + + if (fr == NULL) { + READ_ENTER(&ipf_state); + fr_stinsert(isn, 0); + RWLOCK_EXIT(&ipf_state); + return 0; + } - /* - * Look up all the interface names in the rule. - */ - for (i = 0; i < 4; i++) { - name = fr->fr_ifnames[i]; - if ((name[1] == '\0') && - ((name[0] == '-') || (name[0] == '*'))) { - fr->fr_ifas[i] = NULL; - } else if (*name != '\0') { - fr->fr_ifas[i] = GETUNIT(name, - fr->fr_v); - if (fr->fr_ifas[i] == NULL) - fr->fr_ifas[i] = (void *)-1; - else { - strncpy(isn->is_ifname[i], - IFNAME(fr->fr_ifas[i]), - IFNAMSIZ); - } - } - isn->is_ifp[out] = fr->fr_ifas[i]; - } + if (isn->is_flags & SI_NEWFR) { + KMALLOC(fr, frentry_t *); + if (fr == NULL) { + KFREE(isn); + return ENOMEM; + } + bcopy((char *)&ips.ips_fr, (char *)fr, sizeof(*fr)); + out = fr->fr_flags & FR_OUTQUE ? 1 : 0; + isn->is_rule = fr; + ips.ips_is.is_rule = fr; + MUTEX_NUKE(&fr->fr_lock); + MUTEX_INIT(&fr->fr_lock, "state filter rule lock"); - /* - * send a copy back to userland of what we ended up - * to allow for verification. - */ - error = IWCOPYPTR((caddr_t)&ips, data, sizeof(ips)); - if (error) { - KFREE(isn); - KFREE(fr); - return EFAULT; - } - } else { - for (is = ips_list; is; is = is->is_next) - if (is->is_rule == fr) - break; - if (!is) { - KFREE(isn); - return ESRCH; + /* + * Look up all the interface names in the rule. + */ + for (i = 0; i < 4; i++) { + name = fr->fr_ifnames[i]; + fr->fr_ifas[i] = fr_resolvenic(name, fr->fr_v); + name = isn->is_ifname[i]; + isn->is_ifp[i] = fr_resolvenic(name, isn->is_v); + } + + fr->fr_ref = 0; + fr->fr_dsize = 0; + fr->fr_data = NULL; + + fr_resolvedest(&fr->fr_tif, fr->fr_v); + fr_resolvedest(&fr->fr_dif, fr->fr_v); + + /* + * send a copy back to userland of what we ended up + * to allow for verification. + */ + error = fr_outobj(data, &ips, IPFOBJ_STATESAVE); + if (error) { + KFREE(isn); + MUTEX_DESTROY(&fr->fr_lock); + KFREE(fr); + return EFAULT; + } + READ_ENTER(&ipf_state); + fr_stinsert(isn, 0); + RWLOCK_EXIT(&ipf_state); + + } else { + READ_ENTER(&ipf_state); + for (is = ips_list; is; is = is->is_next) + if (is->is_rule == fr) { + fr_stinsert(isn, 0); + break; } + + if (is == NULL) { + KFREE(isn); + isn = NULL; } + RWLOCK_EXIT(&ipf_state); + + return (isn == NULL) ? ESRCH : 0; } - fr_stinsert(isn); + return 0; } -/* - * Insert a state table entry manually. - */ -void fr_stinsert(is) -register ipstate_t *is; +/* ------------------------------------------------------------------------ */ +/* Function: fr_stinsert */ +/* Returns: Nil */ +/* Parameters: is(I) - pointer to state structure */ +/* rev(I) - flag indicating forward/reverse direction of packet */ +/* */ +/* Inserts a state structure into the hash table (for lookups) and the list */ +/* of state entries (for enumeration). Resolves all of the interface names */ +/* to pointers and adjusts running stats for the hash table as appropriate. */ +/* */ +/* Locking: it is assumed that some kind of lock on ipf_state is held. */ +/* ------------------------------------------------------------------------ */ +void fr_stinsert(is, rev) +ipstate_t *is; +int rev; { - register u_int hv = is->is_hv; - char *name; + frentry_t *fr; + u_int hv; int i; - MUTEX_INIT(&is->is_lock, "ipf state entry", NULL); + MUTEX_INIT(&is->is_lock, "ipf state entry"); + + fr = is->is_rule; + if (fr != NULL) { + MUTEX_ENTER(&fr->fr_lock); + fr->fr_ref++; + fr->fr_statecnt++; + MUTEX_EXIT(&fr->fr_lock); + } /* * Look up all the interface names in the state entry. */ for (i = 0; i < 4; i++) { - name = is->is_ifname[i]; - if ((name[1] == '\0') && - ((name[0] == '-') || (name[0] == '*'))) { - is->is_ifp[0] = NULL; - } else if (*name != '\0') { - is->is_ifp[i] = GETUNIT(name, is->is_v); - if (is->is_ifp[i] == NULL) - is->is_ifp[i] = (void *)-1; - } + if (is->is_ifp[i] != NULL) + continue; + is->is_ifp[i] = fr_resolvenic(is->is_ifname[i], is->is_v); } + /* + * If we could trust is_hv, then the modulous would not be needed, but + * when running with IPFILTER_SYNC, this stops bad values. + */ + hv = is->is_hv % fr_statesize; + is->is_hv = hv; + + /* + * We need to get both of these locks...the first because it is + * possible that once the insert is complete another packet might + * come along, match the entry and want to update it. + */ + MUTEX_ENTER(&is->is_lock); + MUTEX_ENTER(&ipf_stinsert); /* * add into list table. */ - if (ips_list) + if (ips_list != NULL) ips_list->is_pnext = &is->is_next; is->is_pnext = &ips_list; is->is_next = ips_list; ips_list = is; - if (ips_table[hv]) + + if (ips_table[hv] != NULL) ips_table[hv]->is_phnext = &is->is_hnext; else ips_stats.iss_inuse++; is->is_phnext = ips_table + hv; is->is_hnext = ips_table[hv]; ips_table[hv] = is; + ips_stats.iss_bucketlen[hv]++; ips_num++; + MUTEX_EXIT(&ipf_stinsert); + + fr_setstatequeue(is, rev); + MUTEX_EXIT(&is->is_lock); } -/* - * Create a new ipstate structure and hang it off the hash table. - */ -ipstate_t *fr_addstate(ip, fin, stsave, flags) -ip_t *ip; +/* ------------------------------------------------------------------------ */ +/* Function: fr_addstate */ +/* Returns: ipstate_t* - NULL == failure, else pointer to new state */ +/* Parameters: fin(I) - pointer to packet information */ +/* stsave(O) - pointer to place to save pointer to created */ +/* state structure. */ +/* flags(I) - flags to use when creating the structure */ +/* */ +/* Creates a new IP state structure from the packet information collected. */ +/* Inserts it into the state table and appends to the bottom of the active */ +/* list. If the capacity of the table has reached the maximum allowed then */ +/* the call will fail and a flush is scheduled for the next timeout call. */ +/* ------------------------------------------------------------------------ */ +ipstate_t *fr_addstate(fin, stsave, flags) fr_info_t *fin; ipstate_t **stsave; u_int flags; { - register tcphdr_t *tcp = NULL; - register ipstate_t *is; - register u_int hv; + ipstate_t *is, ips; struct icmp *ic; - ipstate_t ips; - int out, ws; - u_int pass; + u_int pass, hv; + frentry_t *fr; + tcphdr_t *tcp; + grehdr_t *gre; void *ifp; + int out; - if (fr_state_lock || (fin->fin_off != 0) || (fin->fin_fl & FI_SHORT) || - (fin->fin_misc & FM_BADSTATE)) + if (fr_state_lock || + (fin->fin_flx & (FI_SHORT|FI_STATE|FI_FRAGBODY|FI_BAD))) return NULL; - if (ips_num == fr_statemax) { - ips_stats.iss_max++; + + if ((fin->fin_flx & FI_OOW) && !(fin->fin_tcpf & TH_SYN)) + return NULL; + + fr = fin->fin_fr; + if ((fr->fr_statemax == 0) && (ips_num == fr_statemax)) { + ATOMIC_INCL(ips_stats.iss_max); fr_state_doflush = 1; return NULL; } + + /* + * If a "keep state" rule has reached the maximum number of references + * to it, then schedule an automatic flush in case we can clear out + * some "dead old wood". + */ + if ((fr != NULL) && (fr->fr_statemax != 0) && + (fr->fr_statecnt >= fr->fr_statemax)) { + MUTEX_EXIT(&fr->fr_lock); + ATOMIC_INCL(ips_stats.iss_maxref); + fr_state_doflush = 1; + return NULL; + } + + pass = (fr == NULL) ? 0 : fr->fr_flags; + + ic = NULL; + tcp = NULL; out = fin->fin_out; is = &ips; bzero((char *)is, sizeof(*is)); - ips.is_age = 1; + is->is_die = 1 + fr_ticks; + /* * Copy and calculate... */ @@ -620,12 +857,22 @@ u_int flags; hv += is->is_daddr; #ifdef USE_INET6 if (fin->fin_v == 6) { + /* + * For ICMPv6, we check to see if the destination address is + * a multicast address. If it is, do not include it in the + * calculation of the hash because the correct reply will come + * back from a real address, not a multicast address. + */ if ((is->is_p == IPPROTO_ICMPV6) && IN6_IS_ADDR_MULTICAST(&is->is_dst.in6)) { /* * So you can do keep state with neighbour discovery. + * + * Here we could use the address from the neighbour + * solicit message to put in the state structure and + * we could use that without a wildcard flag too... */ - flags |= FI_W_DADDR; + flags |= SI_W_DADDR; hv -= is->is_daddr; } else { hv += is->is_dst.i6[1]; @@ -640,36 +887,30 @@ u_int flags; switch (is->is_p) { - int off; - #ifdef USE_INET6 case IPPROTO_ICMPV6 : - ic = (struct icmp *)fin->fin_dp; - if ((ic->icmp_type & ICMP6_INFOMSG_MASK) == 0) - return NULL; + ic = fin->fin_dp; switch (ic->icmp_type) { case ICMP6_ECHO_REQUEST : - is->is_icmp.ics_type = ic->icmp_type; - hv += (is->is_icmp.ics_id = ic->icmp_id); - hv += (is->is_icmp.ics_seq = ic->icmp_seq); + is->is_icmp.ici_type = ic->icmp_type; + hv += (is->is_icmp.ici_id = ic->icmp_id); break; case ICMP6_MEMBERSHIP_QUERY : case ND_ROUTER_SOLICIT : case ND_NEIGHBOR_SOLICIT : case ICMP6_NI_QUERY : - is->is_icmp.ics_type = ic->icmp_type; + is->is_icmp.ici_type = ic->icmp_type; break; default : return NULL; } ATOMIC_INCL(ips_stats.iss_icmp); - is->is_age = fr_icmptimeout; break; #endif case IPPROTO_ICMP : - ic = (struct icmp *)fin->fin_dp; + ic = fin->fin_dp; switch (ic->icmp_type) { @@ -677,18 +918,28 @@ u_int flags; case ICMP_TSTAMP : case ICMP_IREQ : case ICMP_MASKREQ : - is->is_icmp.ics_type = ic->icmp_type; - hv += (is->is_icmp.ics_id = ic->icmp_id); - hv += (is->is_icmp.ics_seq = ic->icmp_seq); + is->is_icmp.ici_type = ic->icmp_type; + hv += (is->is_icmp.ici_id = ic->icmp_id); break; default : return NULL; } ATOMIC_INCL(ips_stats.iss_icmp); - is->is_age = fr_icmptimeout; break; + + case IPPROTO_GRE : + gre = fin->fin_dp; + + is->is_gre.gs_flags = gre->gr_flags; + is->is_gre.gs_ptype = gre->gr_ptype; + if (GRE_REV(is->is_gre.gs_flags) == 1) { + is->is_call[0] = fin->fin_data[0]; + is->is_call[1] = fin->fin_data[1]; + } + break; + case IPPROTO_TCP : - tcp = (tcphdr_t *)fin->fin_dp; + tcp = fin->fin_dp; if (tcp->th_flags & TH_RST) return NULL; @@ -698,32 +949,55 @@ u_int flags; */ is->is_sport = htons(fin->fin_data[0]); is->is_dport = htons(fin->fin_data[1]); - if ((flags & (FI_W_DPORT|FI_W_SPORT)) == 0) { + if ((flags & (SI_W_DPORT|SI_W_SPORT)) == 0) { hv += is->is_sport; hv += is->is_dport; } - if ((flags & FI_IGNOREPKT) == 0) { + + /* + * If this is a real packet then initialise fields in the + * state information structure from the TCP header information. + */ + + is->is_maxdwin = 1; + is->is_maxswin = ntohs(tcp->th_win); + if (is->is_maxswin == 0) + is->is_maxswin = 1; + + if ((fin->fin_flx & FI_IGNORE) == 0) { is->is_send = ntohl(tcp->th_seq) + fin->fin_dlen - - (off = (tcp->th_off << 2)) + + (TCP_OFF(tcp) << 2) + ((tcp->th_flags & TH_SYN) ? 1 : 0) + ((tcp->th_flags & TH_FIN) ? 1 : 0); is->is_maxsend = is->is_send; - if ((tcp->th_flags & TH_SYN) && - ((tcp->th_off << 2) >= (sizeof(*tcp) + 4))) { - ws = fr_tcpoptions(tcp); - if (ws >= 0) - is->is_swscale = ws; + /* + * Window scale option is only present in + * SYN/SYN-ACK packet. + */ + if ((tcp->th_flags & ~(TH_FIN|TH_ACK|TH_ECNALL)) == + TH_SYN && + (TCP_OFF(tcp) > (sizeof(tcphdr_t) >> 2))) { + if (fr_tcpoptions(fin, tcp, + &is->is_tcp.ts_data[0])) + is->is_swinflags = TCP_WSCALE_SEEN| + TCP_WSCALE_FIRST; } - } - is->is_maxdwin = 1; - is->is_maxswin = ntohs(tcp->th_win); - if (is->is_maxswin == 0) - is->is_maxswin = 1; + if ((fin->fin_out != 0) && (pass & FR_NEWISN) != 0) { + fr_checknewisn(fin, is); + fr_fixoutisn(fin, is); + } - if ((tcp->th_flags & TH_OPENING) == TH_SYN) - is->is_fsm = 1; + if ((tcp->th_flags & TH_OPENING) == TH_SYN) + flags |= IS_TCPFSM; + else { + is->is_maxdwin = is->is_maxswin * 2; + is->is_dend = ntohl(tcp->th_ack); + is->is_maxdend = ntohl(tcp->th_ack); + is->is_maxdwin *= 2; + } + } /* * If we're creating state for a starting connection, start the @@ -734,213 +1008,462 @@ u_int flags; break; case IPPROTO_UDP : - tcp = (tcphdr_t *)fin->fin_dp; + tcp = fin->fin_dp; is->is_sport = htons(fin->fin_data[0]); is->is_dport = htons(fin->fin_data[1]); - if ((flags & (FI_W_DPORT|FI_W_SPORT)) == 0) { - hv += is->is_sport; - hv += is->is_dport; + if ((flags & (SI_W_DPORT|SI_W_SPORT)) == 0) { + hv += tcp->th_dport; + hv += tcp->th_sport; } ATOMIC_INCL(ips_stats.iss_udp); - is->is_age = fr_udptimeout; break; + default : - is->is_age = fr_udptimeout; break; } + hv = DOUBLE_HASH(hv); + is->is_hv = hv; + is->is_rule = fr; + is->is_flags = flags & IS_INHERITED; + + /* + * Look for identical state. + */ + for (is = ips_table[is->is_hv % fr_statesize]; is != NULL; + is = is->is_hnext) { + if (bcmp(&ips.is_src, &is->is_src, + offsetof(struct ipstate, is_ps) - + offsetof(struct ipstate, is_src)) == 0) + break; + } + if (is != NULL) + return NULL; + if (ips_stats.iss_bucketlen[hv] >= fr_state_maxbucket) { + ATOMIC_INCL(ips_stats.iss_bucketfull); + return NULL; + } KMALLOC(is, ipstate_t *); if (is == NULL) { ATOMIC_INCL(ips_stats.iss_nomem); return NULL; } bcopy((char *)&ips, (char *)is, sizeof(*is)); - hv %= fr_statesize; - is->is_hv = hv; - is->is_rule = fin->fin_fr; - if (is->is_rule != NULL) { - is->is_group = is->is_rule->fr_group; - ATOMIC_INC32(is->is_rule->fr_ref); - pass = is->is_rule->fr_flags; - is->is_frage[0] = is->is_rule->fr_age[0]; - is->is_frage[1] = is->is_rule->fr_age[1]; - if (is->is_frage[0] != 0) - is->is_age = is->is_frage[0]; - - is->is_ifp[(out << 1) + 1] = is->is_rule->fr_ifas[1]; - is->is_ifp[(1 - out) << 1] = is->is_rule->fr_ifas[2]; - is->is_ifp[((1 - out) << 1) + 1] = is->is_rule->fr_ifas[3]; - - if (((ifp = is->is_rule->fr_ifas[1]) != NULL) && - (ifp != (void *)-1)) - strncpy(is->is_ifname[(out << 1) + 1], - IFNAME(ifp), IFNAMSIZ); - if (((ifp = is->is_rule->fr_ifas[2]) != NULL) && - (ifp != (void *)-1)) - strncpy(is->is_ifname[(1 - out) << 1], - IFNAME(ifp), IFNAMSIZ); - if (((ifp = is->is_rule->fr_ifas[3]) != NULL) && - (ifp != (void *)-1)) - strncpy(is->is_ifname[((1 - out) << 1) + 1], - IFNAME(ifp), IFNAMSIZ); - } else + /* + * Do not do the modulous here, it is done in fr_stinsert(). + */ + if (fr != NULL) { + (void) strncpy(is->is_group, fr->fr_group, FR_GROUPLEN); + if (fr->fr_age[0] != 0) { + is->is_tqehead[0] = fr_addtimeoutqueue(&ips_utqe, + fr->fr_age[0]); + is->is_sti.tqe_flags |= TQE_RULEBASED; + } + if (fr->fr_age[1] != 0) { + is->is_tqehead[1] = fr_addtimeoutqueue(&ips_utqe, + fr->fr_age[1]); + is->is_sti.tqe_flags |= TQE_RULEBASED; + } + + is->is_tag = fr->fr_logtag; + + is->is_ifp[(out << 1) + 1] = fr->fr_ifas[1]; + is->is_ifp[(1 - out) << 1] = fr->fr_ifas[2]; + is->is_ifp[((1 - out) << 1) + 1] = fr->fr_ifas[3]; + + if (((ifp = fr->fr_ifas[1]) != NULL) && + (ifp != (void *)-1)) { + COPYIFNAME(ifp, is->is_ifname[(out << 1) + 1]); + } + if (((ifp = fr->fr_ifas[2]) != NULL) && + (ifp != (void *)-1)) { + COPYIFNAME(ifp, is->is_ifname[(1 - out) << 1]); + } + if (((ifp = fr->fr_ifas[3]) != NULL) && + (ifp != (void *)-1)) { + COPYIFNAME(ifp, is->is_ifname[((1 - out) << 1) + 1]); + } + } else { pass = fr_flags; + is->is_tag = FR_NOLOGTAG; + } is->is_ifp[out << 1] = fin->fin_ifp; - strncpy(is->is_ifname[out << 1], IFNAME(fin->fin_ifp), IFNAMSIZ); - - WRITE_ENTER(&ipf_state); + if (fin->fin_ifp != NULL) { + COPYIFNAME(fin->fin_ifp, is->is_ifname[out << 1]); + } + /* + * It may seem strange to set is_ref to 2, but fr_check() will call + * fr_statederef() after calling fr_addstate() and the idea is to + * have it exist at the end of fr_check() with is_ref == 1. + */ + is->is_ref = 2; is->is_pass = pass; - if ((flags & FI_IGNOREPKT) == 0) { - is->is_pkts = 1; - is->is_bytes = fin->fin_dlen + fin->fin_hlen; + is->is_pkts[0] = 0, is->is_bytes[0] = 0; + is->is_pkts[1] = 0, is->is_bytes[1] = 0; + is->is_pkts[2] = 0, is->is_bytes[2] = 0; + is->is_pkts[3] = 0, is->is_bytes[3] = 0; + if ((fin->fin_flx & FI_IGNORE) == 0) { + is->is_pkts[out] = 1; + is->is_bytes[out] = fin->fin_plen; + is->is_flx[out][0] = fin->fin_flx & FI_CMP; + is->is_flx[out][0] &= ~FI_OOW; } + + if (pass & FR_STSTRICT) + is->is_flags |= IS_STRICT; + + if (pass & FR_STATESYNC) + is->is_flags |= IS_STATESYNC; + /* * We want to check everything that is a property of this packet, * but we don't (automatically) care about it's fragment status as * this may change. */ is->is_v = fin->fin_v; - is->is_rulen = fin->fin_rule; - is->is_opt = fin->fin_fi.fi_optmsk; + is->is_opt = fin->fin_optmsk; is->is_optmsk = 0xffffffff; - is->is_sec = fin->fin_fi.fi_secmsk; + is->is_sec = fin->fin_secmsk; is->is_secmsk = 0xffff; - is->is_auth = fin->fin_fi.fi_auth; + is->is_auth = fin->fin_auth; is->is_authmsk = 0xffff; - is->is_flags = fin->fin_fl & FI_CMP; - is->is_flags |= FI_CMP << 4; - is->is_flags |= flags & (FI_WILDP|FI_WILDA); - if (flags & (FI_WILDP|FI_WILDA)) - ips_wild++; + if (flags & (SI_WILDP|SI_WILDA)) { + ATOMIC_INCL(ips_stats.iss_wild); + } + is->is_rulen = fin->fin_rule; + if (pass & FR_LOGFIRST) is->is_pass &= ~(FR_LOGFIRST|FR_LOG); - fr_stinsert(is); + + READ_ENTER(&ipf_state); is->is_me = stsave; - if (is->is_p == IPPROTO_TCP) { - fr_tcp_age(&is->is_age, is->is_state, fin, - 0, is->is_fsm); /* 0 = packet from the source */ + + fr_stinsert(is, fin->fin_rev); + + if (fin->fin_p == IPPROTO_TCP) { + /* + * If we're creating state for a starting connection, start the + * timer on it as we'll never see an error if it fails to + * connect. + */ + MUTEX_ENTER(&is->is_lock); + (void) fr_tcp_age(&is->is_sti, fin, ips_tqtqb, is->is_flags); + MUTEX_EXIT(&is->is_lock); +#ifdef IPFILTER_SCAN + if ((is->is_flags & SI_CLONE) == 0) + (void) ipsc_attachis(is); +#endif } -#ifdef IPFILTER_LOG - ipstate_log(is, ISL_NEW); +#ifdef IPFILTER_SYNC + if ((is->is_flags & IS_STATESYNC) && ((is->is_flags & SI_CLONE) == 0)) + is->is_sync = ipfsync_new(SMC_STATE, fin, is); #endif + if (ipstate_logging) + ipstate_log(is, ISL_NEW); + RWLOCK_EXIT(&ipf_state); - fin->fin_rev = IP6NEQ(is->is_dst, fin->fin_fi.fi_dst); - if ((fin->fin_fl & FI_FRAG) && (pass & FR_KEEPFRAG)) - ipfr_newfrag(ip, fin); + fin->fin_state = is; + fin->fin_rev = IP6_NEQ(&is->is_dst, &fin->fin_daddr); + fin->fin_flx |= FI_STATE; + if (fin->fin_flx & FI_FRAG) + (void) fr_newfrag(fin, pass ^ FR_KEEPSTATE); + return is; } -static int fr_tcpoptions(tcp) +/* ------------------------------------------------------------------------ */ +/* Function: fr_tcpoptions */ +/* Returns: int - 1 == packet matches state entry, 0 == it does not */ +/* Parameters: fin(I) - pointer to packet information */ +/* tcp(I) - pointer to TCP packet header */ +/* td(I) - pointer to TCP data held as part of the state */ +/* */ +/* Look after the TCP header for any options and deal with those that are */ +/* present. Record details about those that we recogise. */ +/* ------------------------------------------------------------------------ */ +static int fr_tcpoptions(fin, tcp, td) +fr_info_t *fin; tcphdr_t *tcp; +tcpdata_t *td; { - u_char *opt, *last; - int wscale; + int off, mlen, ol, i, len, retval; + char buf[64], *s, opt; + mb_t *m = NULL; + + off = fin->fin_hlen + sizeof(*tcp); + len = (TCP_OFF(tcp) << 2) - sizeof(*tcp); + if (fin->fin_plen < off + len) + return 0; - opt = (u_char *) (tcp + 1); - last = ((u_char *)tcp) + (tcp->th_off << 2); + m = fin->fin_m; + off += fin->fin_ipoff; + mlen = MSGDSIZE(m) - off; + if (len > mlen) { + len = mlen; + retval = 0; + } else { + retval = 1; + } - /* If we don't find wscale here, we need to clear it */ - wscale = -2; + COPYDATA(m, off, len, buf); - /* Termination condition picked such that opt[0 .. 2] exist */ - while ((opt < last - 2) && (*opt != TCPOPT_EOL)) { - switch (*opt) { - case TCPOPT_NOP: - opt++; - continue; - case TCPOPT_WSCALE: - /* Proper length ? */ - if (opt[1] == 3) { - if (opt[2] > 14) - wscale = 14; - else - wscale = opt[2]; - } + for (s = buf; len > 0; ) { + opt = *s; + if (opt == TCPOPT_EOL) break; - default: - /* Unknown options must be two bytes+ */ - if (opt[1] < 2) + else if (opt == TCPOPT_NOP) + ol = 1; + else { + if (len < 2) break; - opt += opt[1]; - continue; + ol = (int)*(s + 1); + if (ol < 2 || ol > len) + break; + + /* + * Extract the TCP options we are interested in out of + * the header and store them in the the tcpdata struct. + */ + switch (opt) + { + case TCPOPT_WINDOW : + if (ol == TCPOLEN_WINDOW) { + i = (int)*(s + 2); + if (i > TCP_WSCALE_MAX) + i = TCP_WSCALE_MAX; + else if (i < 0) + i = 0; + td->td_winscale = i; + } + break; + case TCPOPT_MAXSEG : + /* + * So, if we wanted to set the TCP MAXSEG, + * it should be done here... + */ + if (ol == TCPOLEN_MAXSEG) { + i = (int)*(s + 2); + i <<= 8; + i += (int)*(s + 3); + td->td_maxseg = i; + } + break; + } } - break; + len -= ol; + s += ol; } - return wscale; + return retval; } - -/* - * check to see if a packet with TCP headers fits within the TCP window. - * change timeout depending on whether new packet is a SYN-ACK returning for a - * SYN or a RST or FIN which indicate time to close up shop. - */ -int fr_tcpstate(is, fin, ip, tcp) -register ipstate_t *is; +/* ------------------------------------------------------------------------ */ +/* Function: fr_tcpstate */ +/* Returns: int - 1 == packet matches state entry, 0 == it does not */ +/* Parameters: fin(I) - pointer to packet information */ +/* tcp(I) - pointer to TCP packet header */ +/* is(I) - pointer to master state structure */ +/* */ +/* Check to see if a packet with TCP headers fits within the TCP window. */ +/* Change timeout depending on whether new packet is a SYN-ACK returning */ +/* for a SYN or a RST or FIN which indicate time to close up shop. */ +/* ------------------------------------------------------------------------ */ +static int fr_tcpstate(fin, tcp, is) fr_info_t *fin; -ip_t *ip; tcphdr_t *tcp; +ipstate_t *is; { - register tcp_seq seq, ack, end; - register int ackskew; + int source, ret = 0, flags; tcpdata_t *fdata, *tdata; - u_32_t win, maxwin; - int ret = 0, off; - int source; - int wscale; - /* - * Find difference between last checked packet and this packet. - */ - source = IP6EQ(fin->fin_fi.fi_src, is->is_src); - if (source && (ntohs(is->is_sport) != fin->fin_data[0])) + source = !fin->fin_rev; + if (((is->is_flags & IS_TCPFSM) != 0) && (source == 1) && + (ntohs(is->is_sport) != fin->fin_data[0])) source = 0; fdata = &is->is_tcp.ts_data[!source]; tdata = &is->is_tcp.ts_data[source]; - off = tcp->th_off << 2; - seq = ntohl(tcp->th_seq); - ack = ntohl(tcp->th_ack); - win = ntohs(tcp->th_win); - end = seq + fin->fin_dlen - off + - ((tcp->th_flags & TH_SYN) ? 1 : 0) + - ((tcp->th_flags & TH_FIN) ? 1 : 0); + MUTEX_ENTER(&is->is_lock); + if (fr_tcpinwindow(fin, fdata, tdata, tcp, is->is_flags)) { +#ifdef IPFILTER_SCAN + if (is->is_flags & (IS_SC_CLIENT|IS_SC_SERVER)) { + ipsc_packet(fin, is); + if (FR_ISBLOCK(is->is_pass)) { + MUTEX_EXIT(&is->is_lock); + return 1; + } + } +#endif + + /* + * Nearing end of connection, start timeout. + */ + ret = fr_tcp_age(&is->is_sti, fin, ips_tqtqb, is->is_flags); + if (ret == 0) { + MUTEX_EXIT(&is->is_lock); + return 0; + } + + /* + * set s0's as appropriate. Use syn-ack packet as it + * contains both pieces of required information. + */ + /* + * Window scale option is only present in SYN/SYN-ACK packet. + * Compare with ~TH_FIN to mask out T/TCP setups. + */ + flags = tcp->th_flags & ~(TH_FIN|TH_ECNALL); + if (flags == (TH_SYN|TH_ACK)) { + is->is_s0[source] = ntohl(tcp->th_ack); + is->is_s0[!source] = ntohl(tcp->th_seq) + 1; + if ((TCP_OFF(tcp) > (sizeof(tcphdr_t) >> 2)) && + tdata->td_winscale) { + if (fr_tcpoptions(fin, tcp, fdata)) { + fdata->td_winflags = TCP_WSCALE_SEEN| + TCP_WSCALE_FIRST; + } else { + if (!fdata->td_winscale) + tdata->td_winscale = 0; + } + } + if ((fin->fin_out != 0) && (is->is_pass & FR_NEWISN)) + fr_checknewisn(fin, is); + } else if (flags == TH_SYN) { + is->is_s0[source] = ntohl(tcp->th_seq) + 1; + if ((TCP_OFF(tcp) > (sizeof(tcphdr_t) >> 2))) + if (fr_tcpoptions(fin, tcp, tdata)) { + tdata->td_winflags = TCP_WSCALE_SEEN| + TCP_WSCALE_FIRST; + } + + if ((fin->fin_out != 0) && (is->is_pass & FR_NEWISN)) + fr_checknewisn(fin, is); + + } + ret = 1; + } else + fin->fin_flx |= FI_OOW; + MUTEX_EXIT(&is->is_lock); + return ret; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_checknewisn */ +/* Returns: Nil */ +/* Parameters: fin(I) - pointer to packet information */ +/* is(I) - pointer to master state structure */ +/* */ +/* Check to see if this TCP connection is expecting and needs a new */ +/* sequence number for a particular direction of the connection. */ +/* */ +/* NOTE: This does not actually change the sequence numbers, only gets new */ +/* one ready. */ +/* ------------------------------------------------------------------------ */ +static void fr_checknewisn(fin, is) +fr_info_t *fin; +ipstate_t *is; +{ + u_32_t sumd, old, new; + tcphdr_t *tcp; + int i; + + i = fin->fin_rev; + tcp = fin->fin_dp; - if ((tcp->th_flags & TH_SYN) && (off >= sizeof(*tcp) + 4)) - wscale = fr_tcpoptions(tcp); + if (((i == 0) && !(is->is_flags & IS_ISNSYN)) || + ((i == 1) && !(is->is_flags & IS_ISNACK))) { + old = ntohl(tcp->th_seq); + new = fr_newisn(fin); + is->is_isninc[i] = new - old; + CALC_SUMD(old, new, sumd); + is->is_sumd[i] = (sumd & 0xffff) + (sumd >> 16); + + is->is_flags |= ((i == 0) ? IS_ISNSYN : IS_ISNACK); + } +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_tcpinwindow */ +/* Returns: int - 1 == packet inside TCP "window", 0 == not inside. */ +/* Parameters: fin(I) - pointer to packet information */ +/* fdata(I) - pointer to tcp state informatio (forward) */ +/* tdata(I) - pointer to tcp state informatio (reverse) */ +/* tcp(I) - pointer to TCP packet header */ +/* */ +/* Given a packet has matched addresses and ports, check to see if it is */ +/* within the TCP data window. In a show of generosity, allow packets that */ +/* are within the window space behind the current sequence # as well. */ +/* ------------------------------------------------------------------------ */ +int fr_tcpinwindow(fin, fdata, tdata, tcp, flags) +fr_info_t *fin; +tcpdata_t *fdata, *tdata; +tcphdr_t *tcp; +int flags; +{ + tcp_seq seq, ack, end; + int ackskew, tcpflags; + u_32_t win, maxwin; + + /* + * Find difference between last checked packet and this packet. + */ + tcpflags = tcp->th_flags; + seq = ntohl(tcp->th_seq); + ack = ntohl(tcp->th_ack); + if (tcpflags & TH_SYN) + win = ntohs(tcp->th_win); else - wscale = -1; + win = ntohs(tcp->th_win) << fdata->td_winscale; + if (win == 0) + win = 1; - MUTEX_ENTER(&is->is_lock); + /* + * if window scaling is present, the scaling is only allowed + * for windows not in the first SYN packet. In that packet the + * window is 65535 to specify the largest window possible + * for receivers not implementing the window scale option. + * Currently, we do not assume TTCP here. That means that + * if we see a second packet from a host (after the initial + * SYN), we can assume that the receiver of the SYN did + * already send back the SYN/ACK (and thus that we know if + * the receiver also does window scaling) + */ + if (!(tcpflags & TH_SYN) && (fdata->td_winflags & TCP_WSCALE_FIRST)) { + if (tdata->td_winflags & TCP_WSCALE_SEEN) { + fdata->td_winflags &= ~TCP_WSCALE_FIRST; + fdata->td_maxwin = win; + } else { + fdata->td_winscale = 0; + fdata->td_winflags = 0; + tdata->td_winscale = 0; + tdata->td_winflags = 0; + } + } - if (wscale >= 0) - fdata->td_wscale = wscale; - else if (wscale == -2) - fdata->td_wscale = tdata->td_wscale = 0; - if (!(tcp->th_flags & TH_SYN)) - win <<= fdata->td_wscale; + end = seq + fin->fin_dlen - (TCP_OFF(tcp) << 2) + + ((tcpflags & TH_SYN) ? 1 : 0) + ((tcpflags & TH_FIN) ? 1 : 0); if ((fdata->td_end == 0) && - (!is->is_fsm || ((tcp->th_flags & TH_OPENING) == TH_OPENING))) { + (!(flags & IS_TCPFSM) || + ((tcpflags & TH_OPENING) == TH_OPENING))) { /* * Must be a (outgoing) SYN-ACK in reply to a SYN. */ fdata->td_end = end; fdata->td_maxwin = 1; fdata->td_maxend = end + win; - if (win == 0) - fdata->td_maxend++; } - if (!(tcp->th_flags & TH_ACK)) { /* Pretend an ack was sent */ + if (!(tcpflags & TH_ACK)) { /* Pretend an ack was sent */ ack = tdata->td_end; - } else if (((tcp->th_flags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) && + } else if (((tcpflags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) && (ack == 0)) { /* gross hack to get around certain broken tcp stacks */ ack = tdata->td_end; @@ -952,14 +1475,26 @@ tcphdr_t *tcp; maxwin = tdata->td_maxwin; ackskew = tdata->td_end - ack; + /* + * Strict sequencing only allows in-order delivery. + */ + if ((flags & IS_STRICT) != 0) { + if (seq != fdata->td_end) { + return 0; + } + } + #define SEQ_GE(a,b) ((int)((a) - (b)) >= 0) #define SEQ_GT(a,b) ((int)((a) - (b)) > 0) - if ((SEQ_GE(fdata->td_maxend, end)) && + if ( +#if defined(_KERNEL) + (SEQ_GE(fdata->td_maxend, end)) && (SEQ_GE(seq, fdata->td_end - maxwin)) && +#endif /* XXX what about big packets */ #define MAXACKWINDOW 66000 - (-ackskew <= (MAXACKWINDOW << tdata->td_wscale)) && - ( ackskew <= (MAXACKWINDOW << tdata->td_wscale))) { + (-ackskew <= (MAXACKWINDOW << fdata->td_winscale)) && + ( ackskew <= (MAXACKWINDOW << fdata->td_winscale))) { /* if ackskew < 0 then this should be due to fragmented * packets. There is no way to know the length of the @@ -975,220 +1510,358 @@ tcphdr_t *tcp; * Thus, when ackskew is negative but still seems to belong * to this session, we bump up the destinations end value. */ - /* - * Nearing end of connection, start timeout. - */ - /* source ? 0 : 1 -> !source */ - if (fr_tcp_age(&is->is_age, is->is_state, fin, !source, - (int)is->is_fsm) == 0) { - if (ackskew < 0) - tdata->td_end = ack; - - /* update max window seen */ - if (fdata->td_maxwin < win) - fdata->td_maxwin = win; - if (SEQ_GT(end, fdata->td_end)) - fdata->td_end = end; - if (SEQ_GE(ack + win, tdata->td_maxend)) { - tdata->td_maxend = ack + win; - if (win == 0) - tdata->td_maxend++; - } - - ATOMIC_INCL(ips_stats.iss_hits); - ret = 1; - } + if (ackskew < 0) + tdata->td_end = ack; + + /* update max window seen */ + if (fdata->td_maxwin < win) + fdata->td_maxwin = win; + if (SEQ_GT(end, fdata->td_end)) + fdata->td_end = end; + if (SEQ_GE(ack + win, tdata->td_maxend)) + tdata->td_maxend = ack + win; + return 1; } - MUTEX_EXIT(&is->is_lock); - if ((ret == 0) && ((tcp->th_flags & TH_OPENING) != TH_SYN)) - fin->fin_misc |= FM_BADSTATE; - return ret; + return 0; } -/* - * Match a state table entry against an IP packet. - */ -static int fr_matchsrcdst(is, src, dst, fin, tcp) +/* ------------------------------------------------------------------------ */ +/* Function: fr_stclone */ +/* Returns: ipstate_t* - NULL == cloning failed, */ +/* else pointer to new state structure */ +/* Parameters: fin(I) - pointer to packet information */ +/* tcp(I) - pointer to TCP/UDP header */ +/* is(I) - pointer to master state structure */ +/* */ +/* Create a "duplcate" state table entry from the master. */ +/* ------------------------------------------------------------------------ */ +static ipstate_t *fr_stclone(fin, tcp, is) +fr_info_t *fin; +tcphdr_t *tcp; ipstate_t *is; -union i6addr src, dst; +{ + ipstate_t *clone; + u_32_t send; + + if (ips_num == fr_statemax) { + ATOMIC_INCL(ips_stats.iss_max); + fr_state_doflush = 1; + return NULL; + } + KMALLOC(clone, ipstate_t *); + if (clone == NULL) + return NULL; + bcopy((char *)is, (char *)clone, sizeof(*clone)); + + MUTEX_NUKE(&clone->is_lock); + + clone->is_die = ONE_DAY + fr_ticks; + clone->is_state[0] = 0; + clone->is_state[1] = 0; + send = ntohl(tcp->th_seq) + fin->fin_dlen - (TCP_OFF(tcp) << 2) + + ((tcp->th_flags & TH_SYN) ? 1 : 0) + + ((tcp->th_flags & TH_FIN) ? 1 : 0); + + if (fin->fin_rev == 1) { + clone->is_dend = send; + clone->is_maxdend = send; + clone->is_send = 0; + clone->is_maxswin = 1; + clone->is_maxdwin = ntohs(tcp->th_win); + if (clone->is_maxdwin == 0) + clone->is_maxdwin = 1; + } else { + clone->is_send = send; + clone->is_maxsend = send; + clone->is_dend = 0; + clone->is_maxdwin = 1; + clone->is_maxswin = ntohs(tcp->th_win); + if (clone->is_maxswin == 0) + clone->is_maxswin = 1; + } + + clone->is_flags &= ~SI_CLONE; + clone->is_flags |= SI_CLONED; + fr_stinsert(clone, fin->fin_rev); + MUTEX_ENTER(&clone->is_lock); + clone->is_ref = 1; + if (clone->is_p == IPPROTO_TCP) { + (void) fr_tcp_age(&clone->is_sti, fin, ips_tqtqb, + clone->is_flags); + } + MUTEX_EXIT(&clone->is_lock); +#ifdef IPFILTER_SCAN + (void) ipsc_attachis(is); +#endif +#ifdef IPFILTER_SYNC + if (is->is_flags & IS_STATESYNC) + clone->is_sync = ipfsync_new(SMC_STATE, fin, clone); +#endif + return clone; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_matchsrcdst */ +/* Returns: Nil */ +/* Parameters: fin(I) - pointer to packet information */ +/* is(I) - pointer to state structure */ +/* src(I) - pointer to source address */ +/* dst(I) - pointer to destination address */ +/* tcp(I) - pointer to TCP/UDP header */ +/* */ +/* Match a state table entry against an IP packet. The logic below is that */ +/* ret gets set to one if the match succeeds, else remains 0. If it is */ +/* still 0 after the test. no match. */ +/* ------------------------------------------------------------------------ */ +static ipstate_t *fr_matchsrcdst(fin, is, src, dst, tcp, cmask) fr_info_t *fin; +ipstate_t *is; +i6addr_t *src, *dst; tcphdr_t *tcp; +u_32_t cmask; { - int ret = 0, rev, out, flags, idx; + int ret = 0, rev, out, flags, flx = 0, idx; u_short sp, dp; + u_32_t cflx; void *ifp; - rev = IP6NEQ(is->is_dst, dst); + rev = IP6_NEQ(&is->is_dst, dst); ifp = fin->fin_ifp; out = fin->fin_out; - flags = is->is_flags & (FI_WILDA|FI_WILDP); + flags = is->is_flags; sp = 0; dp = 0; if (tcp != NULL) { - flags = is->is_flags; - sp = tcp->th_sport; - dp = tcp->th_dport; - if (!rev) { - if (!(flags & FI_W_SPORT) && (sp != is->is_sport)) + sp = htons(fin->fin_sport); + dp = ntohs(fin->fin_dport); + } + if (!rev) { + if (tcp != NULL) { + if (!(flags & SI_W_SPORT) && (sp != is->is_sport)) rev = 1; - else if (!(flags & FI_W_DPORT) && (dp != is->is_dport)) + else if (!(flags & SI_W_DPORT) && (dp != is->is_dport)) rev = 1; } } idx = (out << 1) + rev; - if ((is->is_ifp[idx] == NULL && - (*is->is_ifname[idx] == '\0' || *is->is_ifname[idx] == '*')) || + /* + * If the interface for this 'direction' is set, make sure it matches. + * An interface name that is not set matches any, as does a name of *. + */ + if ((is->is_ifp[idx] == NULL && + (*is->is_ifname[idx] == '\0' || *is->is_ifname[idx] == '*')) || is->is_ifp[idx] == ifp) ret = 1; if (ret == 0) - return 0; + return NULL; ret = 0; + /* + * Match addresses and ports. + */ if (rev == 0) { - if ((IP6EQ(is->is_dst, dst) || (flags & FI_W_DADDR)) && - (IP6EQ(is->is_src, src) || (flags & FI_W_SADDR)) && - (!tcp || ((sp == is->is_sport || flags & FI_W_SPORT) && - (dp == is->is_dport || flags & FI_W_DPORT)))) { - ret = 1; + if ((IP6_EQ(&is->is_dst, dst) || (flags & SI_W_DADDR)) && + (IP6_EQ(&is->is_src, src) || (flags & SI_W_SADDR))) { + if (tcp) { + if ((sp == is->is_sport || flags & SI_W_SPORT)&& + (dp == is->is_dport || flags & SI_W_DPORT)) + ret = 1; + } else { + ret = 1; + } } } else { - if ((IP6EQ(is->is_dst, src) || (flags & FI_W_DADDR)) && - (IP6EQ(is->is_src, dst) || (flags & FI_W_SADDR)) && - (!tcp || ((sp == is->is_dport || flags & FI_W_DPORT) && - (dp == is->is_sport || flags & FI_W_SPORT)))) { - ret = 1; + if ((IP6_EQ(&is->is_dst, src) || (flags & SI_W_DADDR)) && + (IP6_EQ(&is->is_src, dst) || (flags & SI_W_SADDR))) { + if (tcp) { + if ((dp == is->is_sport || flags & SI_W_SPORT)&& + (sp == is->is_dport || flags & SI_W_DPORT)) + ret = 1; + } else { + ret = 1; + } } } + if (ret == 0) - return 0; + return NULL; /* * Whether or not this should be here, is questionable, but the aim * is to get this out of the main line. */ if (tcp == NULL) - flags = is->is_flags & (FI_CMP|(FI_CMP<<4)); + flags = is->is_flags & ~(SI_WILDP|SI_NEWFR|SI_CLONE|SI_CLONED); - if (((fin->fin_fl & (flags >> 4)) != (flags & FI_CMP)) || - (fin->fin_fi.fi_optmsk != is->is_opt) || - (fin->fin_fi.fi_secmsk != is->is_sec) || - (fin->fin_fi.fi_auth != is->is_auth)) - return 0; + /* + * Only one of the source or destination address can be flaged as a + * wildcard. Fill in the missing address, if set. + * For IPv6, if the address being copied in is multicast, then + * don't reset the wild flag - multicast causes it to be set in the + * first place! + */ + if ((flags & (SI_W_SADDR|SI_W_DADDR))) { + fr_ip_t *fi = &fin->fin_fi; - flags = is->is_flags & (FI_WILDA|FI_WILDP); - if ((flags & (FI_W_SADDR|FI_W_DADDR))) { - if ((flags & FI_W_SADDR) != 0) { + if ((flags & SI_W_SADDR) != 0) { if (rev == 0) { - is->is_src = fin->fin_fi.fi_src; +#ifdef USE_INET6 + if (is->is_v == 6 && + IN6_IS_ADDR_MULTICAST(&fi->fi_src.in6)) + /*EMPTY*/; + else +#endif + { + is->is_src = fi->fi_src; + is->is_flags &= ~SI_W_SADDR; + } } else { - is->is_src = fin->fin_fi.fi_dst; +#ifdef USE_INET6 + if (is->is_v == 6 && + IN6_IS_ADDR_MULTICAST(&fi->fi_dst.in6)) + /*EMPTY*/; + else +#endif + { + is->is_src = fi->fi_dst; + is->is_flags &= ~SI_W_SADDR; + } } - } else if ((flags & FI_W_DADDR) != 0) { + } else if ((flags & SI_W_DADDR) != 0) { if (rev == 0) { - is->is_dst = fin->fin_fi.fi_dst; +#ifdef USE_INET6 + if (is->is_v == 6 && + IN6_IS_ADDR_MULTICAST(&fi->fi_dst.in6)) + /*EMPTY*/; + else +#endif + { + is->is_dst = fi->fi_dst; + is->is_flags &= ~SI_W_DADDR; + } } else { - is->is_dst = fin->fin_fi.fi_src; +#ifdef USE_INET6 + if (is->is_v == 6 && + IN6_IS_ADDR_MULTICAST(&fi->fi_src.in6)) + /*EMPTY*/; + else +#endif + { + is->is_dst = fi->fi_src; + is->is_flags &= ~SI_W_DADDR; + } } } - is->is_flags &= ~(FI_W_SADDR|FI_W_DADDR); - if ((is->is_flags & (FI_WILDA|FI_WILDP)) == 0) - ips_wild--; + if ((is->is_flags & (SI_WILDA|SI_WILDP)) == 0) { + ATOMIC_DECL(ips_stats.iss_wild); + } + } + + flx = fin->fin_flx & cmask; + cflx = is->is_flx[out][rev]; + + /* + * Match up any flags set from IP options. + */ + if ((cflx && (flx != (cflx & cmask))) || + ((fin->fin_optmsk & is->is_optmsk) != is->is_opt) || + ((fin->fin_secmsk & is->is_secmsk) != is->is_sec) || + ((fin->fin_auth & is->is_authmsk) != is->is_auth)) + return NULL; + + /* + * Only one of the source or destination port can be flagged as a + * wildcard. When filling it in, fill in a copy of the matched entry + * if it has the cloning flag set. + */ + if ((fin->fin_flx & FI_IGNORE) != 0) { + fin->fin_rev = rev; + return is; } - if ((flags & (FI_W_SPORT|FI_W_DPORT))) { - if ((flags & FI_W_SPORT) != 0) { + if ((flags & (SI_W_SPORT|SI_W_DPORT))) { + if ((flags & SI_CLONE) != 0) { + is = fr_stclone(fin, tcp, is); + if (is == NULL) + return NULL; + } else { + ATOMIC_DECL(ips_stats.iss_wild); + } + + if ((flags & SI_W_SPORT) != 0) { if (rev == 0) { is->is_sport = sp; - is->is_send = htonl(tcp->th_seq); + is->is_send = ntohl(tcp->th_seq); } else { is->is_sport = dp; - is->is_send = htonl(tcp->th_ack); + is->is_send = ntohl(tcp->th_ack); } is->is_maxsend = is->is_send + 1; - } else if ((flags & FI_W_DPORT) != 0) { + } else if ((flags & SI_W_DPORT) != 0) { if (rev == 0) { is->is_dport = dp; - is->is_dend = htonl(tcp->th_ack); + is->is_dend = ntohl(tcp->th_ack); } else { is->is_dport = sp; - is->is_dend = htonl(tcp->th_seq); + is->is_dend = ntohl(tcp->th_seq); } is->is_maxdend = is->is_dend + 1; } - is->is_flags &= ~(FI_W_SPORT|FI_W_DPORT); - ips_wild--; + is->is_flags &= ~(SI_W_SPORT|SI_W_DPORT); + if ((flags & SI_CLONED) && ipstate_logging) + ipstate_log(is, ISL_CLONE); } ret = -1; - if (is->is_ifp[idx] == NULL && - (*is->is_ifname[idx] == '\0' || *is->is_ifname[idx] == '*')) - ret = idx; + if (is->is_flx[out][rev] == 0) + is->is_flx[out][rev] = flx; - if (ret >= 0) { - is->is_ifp[ret] = ifp; - strncpy(is->is_ifname[ret], IFNAME(ifp), - sizeof(is->is_ifname[ret])); + /* + * Check if the interface name for this "direction" is set and if not, + * fill it in. + */ + if (is->is_ifp[idx] == NULL && + (*is->is_ifname[idx] == '\0' || *is->is_ifname[idx] == '*')) { + is->is_ifp[idx] = ifp; + COPYIFNAME(ifp, is->is_ifname[idx]); } fin->fin_rev = rev; - return 1; + return is; } -static int fr_matchicmpqueryreply(v, is, icmp, rev) -int v; -ipstate_t *is; -icmphdr_t *icmp; -int rev; -{ - if (v == 4) { - /* - * If we matched its type on the way in, then when going out - * it will still be the same type. - */ - if ((!rev && (icmp->icmp_type == is->is_type)) || - (rev && (icmpreplytype4[is->is_type] == icmp->icmp_type))) { - if (icmp->icmp_type != ICMP_ECHOREPLY) - return 1; - if ((icmp->icmp_id == is->is_icmp.ics_id) && - (icmp->icmp_seq == is->is_icmp.ics_seq)) - return 1; - } - } -#ifdef USE_INET6 - else if (is->is_v == 6) { - if ((!rev && (icmp->icmp_type == is->is_type)) || - (rev && (icmpreplytype6[is->is_type] == icmp->icmp_type))) { - if (icmp->icmp_type != ICMP6_ECHO_REPLY) - return 1; - if ((icmp->icmp_id == is->is_icmp.ics_id) && - (icmp->icmp_seq == is->is_icmp.ics_seq)) - return 1; - } - } -#endif - return 0; -} -static frentry_t *fr_checkicmpmatchingstate(ip, fin) -ip_t *ip; +/* ------------------------------------------------------------------------ */ +/* Function: fr_checkicmpmatchingstate */ +/* Returns: Nil */ +/* Parameters: fin(I) - pointer to packet information */ +/* */ +/* If we've got an ICMP error message, using the information stored in the */ +/* ICMP packet, look for a matching state table entry. */ +/* */ +/* If we return NULL then no lock on ipf_state is held. */ +/* If we return non-null then a read-lock on ipf_state is held. */ +/* ------------------------------------------------------------------------ */ +static ipstate_t *fr_checkicmpmatchingstate(fin) fr_info_t *fin; { - register ipstate_t *is, **isp; - register u_short sport, dport; - register u_char pr; - u_short savelen, ohlen; - union i6addr dst, src; + ipstate_t *is, **isp; + u_short sport, dport; + u_char pr; + int backward, i, oi; + i6addr_t dst, src; struct icmp *ic; + u_short savelen; icmphdr_t *icmp; fr_info_t ofin; - int type, len; tcphdr_t *tcp; - frentry_t *fr; + int type, len; ip_t *oip; u_int hv; @@ -1197,11 +1870,10 @@ fr_info_t *fin; * Only a basic IP header (no options) should be with * an ICMP error header. */ - if (((ip->ip_v != 4) || (ip->ip_hl != 5)) || + if ((fin->fin_v != 4) || (fin->fin_hlen != sizeof(ip_t)) || (fin->fin_plen < ICMPERR_MINPKTLEN)) return NULL; - - ic = (struct icmp *)fin->fin_dp; + ic = fin->fin_dp; type = ic->icmp_type; /* * If it's not an error type, then return @@ -1212,19 +1884,18 @@ fr_info_t *fin; return NULL; oip = (ip_t *)((char *)ic + ICMPERR_ICMPHLEN); - ohlen = oip->ip_hl << 2; /* * Check if the at least the old IP header (with options) and * 8 bytes of payload is present. */ - if (fin->fin_plen < ICMPERR_MAXPKTLEN + ohlen - sizeof(*oip)) + if (fin->fin_plen < ICMPERR_MAXPKTLEN + ((IP_HL(oip) - 5) << 2)) return NULL; /* - * Sanity checks. + * Sanity Checks. */ len = fin->fin_dlen - ICMPERR_ICMPHLEN; - if ((len <= 0) || (ohlen > len)) + if ((len <= 0) || ((IP_HL(oip) << 2) > len)) return NULL; /* @@ -1240,69 +1911,68 @@ fr_info_t *fin; { mb_t *m; -# if SOLARIS - m = fin->fin_qfm; + m = fin->fin_m; +# if defined(MENTAT) if ((char *)oip + len > (char *)m->b_wptr) return NULL; # else - m = *(mb_t **)fin->fin_mp; - if ((char *)oip + len > (char *)ip + m->m_len) + if ((char *)oip + len > (char *)fin->fin_ip + m->m_len) return NULL; # endif } #endif + bcopy((char *)fin, (char *)&ofin, sizeof(fin)); /* * in the IPv4 case we must zero the i6addr union otherwise - * the IP6EQ and IP6NEQ macros produce the wrong results because + * the IP6_EQ and IP6_NEQ macros produce the wrong results because * of the 'junk' in the unused part of the union */ bzero((char *)&src, sizeof(src)); bzero((char *)&dst, sizeof(dst)); - bzero((char *)&ofin, sizeof(ofin)); + /* - * We make an fin entry to be able to feed it to - * matchsrcdst. Note that not all fields are encessary - * but this is the cleanest way. Note further that we - * fill in fin_mp such that if someone uses it we'll get + * we make an fin entry to be able to feed it to + * matchsrcdst note that not all fields are encessary + * but this is the cleanest way. Note further we fill + * in fin_mp such that if someone uses it we'll get * a kernel panic. fr_matchsrcdst does not use this. - */ - ofin.fin_ifp = fin->fin_ifp; - ofin.fin_out = !fin->fin_out; - ofin.fin_mp = NULL; - ofin.fin_v = 4; - /* + * * watch out here, as ip is in host order and oip in network * order. Any change we make must be undone afterwards, like * oip->ip_off - it is still in network byte order so fix it. */ savelen = oip->ip_len; oip->ip_len = len; - oip->ip_off = ntohs(oip->ip_off); - (void) fr_makefrip(ohlen, oip, &ofin); - /* - * Reset the short flag here because in fr_matchsrcdst() the flags - * for the current packet (fin_fl) are compared against * those for - * the existing session. - */ - ofin.fin_fl &= ~FI_SHORT; + oip->ip_off = htons(oip->ip_off); - /* + ofin.fin_flx = FI_NOCKSUM; + ofin.fin_v = 4; + ofin.fin_ip = oip; + ofin.fin_m = NULL; /* if dereferenced, panic XXX */ + ofin.fin_mp = NULL; /* if dereferenced, panic XXX */ + ofin.fin_plen = fin->fin_dlen - ICMPERR_ICMPHLEN; + (void) fr_makefrip(IP_HL(oip) << 2, oip, &ofin); + ofin.fin_ifp = fin->fin_ifp; + ofin.fin_out = !fin->fin_out; + /* + * Reset the short and bad flag here because in fr_matchsrcdst() + * the flags for the current packet (fin_flx) are compared against + * those for the existing session. + */ + ofin.fin_flx &= ~(FI_BAD|FI_SHORT); + + /* * Put old values of ip_len and ip_off back as we don't know * if we have to forward the packet (or process it again. */ oip->ip_len = savelen; oip->ip_off = htons(oip->ip_off); -#if SOLARIS - ofin.fin_qfm = NULL; -#endif - fr = NULL; - switch (oip->ip_p) { case IPPROTO_ICMP : - icmp = (icmphdr_t *)((char *)oip + ohlen); + icmp = (icmphdr_t *)((char *)oip + (IP_HL(oip) << 2)); /* * an ICMP error can only be generated as a result of an @@ -1311,10 +1981,10 @@ fr_info_t *fin; * XXX theoretically ICMP_ECHOREP and the other reply's are * ICMP query's as well, but adding them here seems strange XXX */ - if ((icmp->icmp_type != ICMP_ECHO) && - (icmp->icmp_type != ICMP_TSTAMP) && - (icmp->icmp_type != ICMP_IREQ) && - (icmp->icmp_type != ICMP_MASKREQ)) + if ((icmp->icmp_type != ICMP_ECHO) && + (icmp->icmp_type != ICMP_TSTAMP) && + (icmp->icmp_type != ICMP_IREQ) && + (icmp->icmp_type != ICMP_MASKREQ)) return NULL; /* @@ -1326,35 +1996,52 @@ fr_info_t *fin; dst.in4 = oip->ip_dst; hv += dst.in4.s_addr; hv += icmp->icmp_id; - hv += icmp->icmp_seq; - hv %= fr_statesize; + hv = DOUBLE_HASH(hv); READ_ENTER(&ipf_state); - for (isp = &ips_table[hv]; (is = *isp); isp = &is->is_hnext) - if ((is->is_p == pr) && (is->is_v == 4) && - (is->is_icmppkts < is->is_pkts) && - fr_matchsrcdst(is, src, dst, &ofin, NULL) && - fr_matchicmpqueryreply(is->is_v, is, icmp, - fin->fin_rev)) { + for (isp = &ips_table[hv]; ((is = *isp) != NULL); ) { + isp = &is->is_hnext; + if ((is->is_p != pr) || (is->is_v != 4)) + continue; + if (is->is_pass & FR_NOICMPERR) + continue; + is = fr_matchsrcdst(&ofin, is, &src, &dst, + NULL, FI_ICMPCMP); + if (is != NULL) { + if ((is->is_pass & FR_NOICMPERR) != 0) { + RWLOCK_EXIT(&ipf_state); + return NULL; + } + /* + * i : the index of this packet (the icmp + * unreachable) + * oi : the index of the original packet found + * in the icmp header (i.e. the packet + * causing this icmp) + * backward : original packet was backward + * compared to the state + */ + backward = IP6_NEQ(&is->is_src, &src); + fin->fin_rev = !backward; + i = (!backward << 1) + fin->fin_out; + oi = (backward << 1) + ofin.fin_out; + if (is->is_icmppkts[i] > is->is_pkts[oi]) + continue; ips_stats.iss_hits++; - is->is_icmppkts++; - is->is_bytes += ip->ip_len; - fr = is->is_rule; - break; + is->is_icmppkts[i]++; + return is; } + } RWLOCK_EXIT(&ipf_state); - return fr; - + return NULL; case IPPROTO_TCP : case IPPROTO_UDP : - if (fin->fin_plen < ICMPERR_MAXPKTLEN) - return NULL; break; default : return NULL; } - tcp = (tcphdr_t *)((char *)oip + ohlen); + tcp = (tcphdr_t *)((char *)oip + (IP_HL(oip) << 2)); dport = tcp->th_dport; sport = tcp->th_sport; @@ -1365,98 +2052,129 @@ fr_info_t *fin; hv += dst.in4.s_addr; hv += dport; hv += sport; - hv %= fr_statesize; + hv = DOUBLE_HASH(hv); READ_ENTER(&ipf_state); - for (isp = &ips_table[hv]; (is = *isp); isp = &is->is_hnext) { + for (isp = &ips_table[hv]; ((is = *isp) != NULL); ) { + isp = &is->is_hnext; /* * Only allow this icmp though if the * encapsulated packet was allowed through the * other way around. Note that the minimal amount * of info present does not allow for checking against - * tcp internals such as seq and ack numbers. Only the + * tcp internals such as seq and ack numbers. Only the * ports are known to be present and can be even if the * short flag is set. */ if ((is->is_p == pr) && (is->is_v == 4) && - (is->is_icmppkts < is->is_pkts) && - fr_matchsrcdst(is, src, dst, &ofin, tcp)) { - fr = is->is_rule; + (is = fr_matchsrcdst(&ofin, is, &src, &dst, + tcp, FI_ICMPCMP))) { + /* + * i : the index of this packet (the icmp unreachable) + * oi : the index of the original packet found in the + * icmp header (i.e. the packet causing this icmp) + * backward : original packet was backward compared to + * the state + */ + backward = IP6_NEQ(&is->is_src, &src); + fin->fin_rev = !backward; + i = (!backward << 1) + fin->fin_out; + oi = (backward << 1) + ofin.fin_out; + + if (((is->is_pass & FR_NOICMPERR) != 0) || + (is->is_icmppkts[i] > is->is_pkts[oi])) + break; ips_stats.iss_hits++; - is->is_icmppkts++; - is->is_bytes += fin->fin_plen; + is->is_icmppkts[i]++; /* * we deliberately do not touch the timeouts * for the accompanying state table entry. * It remains to be seen if that is correct. XXX */ - break; + return is; } } RWLOCK_EXIT(&ipf_state); - return fr; + return NULL; } -/* - * Move a state hash table entry from its old location at is->is_hv to - * its new location, indexed by hv % fr_statesize. - */ -static void fr_ipsmove(isp, is, hv) -ipstate_t **isp, *is; +/* ------------------------------------------------------------------------ */ +/* Function: fr_ipsmove */ +/* Returns: Nil */ +/* Parameters: is(I) - pointer to state table entry */ +/* hv(I) - new hash value for state table entry */ +/* Write Locks: ipf_state */ +/* */ +/* Move a state entry from one position in the hash table to another. */ +/* ------------------------------------------------------------------------ */ +static void fr_ipsmove(is, hv) +ipstate_t *is; u_int hv; { + ipstate_t **isp; u_int hvm; + ASSERT(rw_read_locked(&ipf_state.ipf_lk) == 0); + hvm = is->is_hv; /* * Remove the hash from the old location... */ + isp = is->is_phnext; if (is->is_hnext) is->is_hnext->is_phnext = isp; *isp = is->is_hnext; if (ips_table[hvm] == NULL) ips_stats.iss_inuse--; + ips_stats.iss_bucketlen[hvm]--; /* * ...and put the hash in the new one. */ - hvm = hv % fr_statesize; + hvm = DOUBLE_HASH(hv); is->is_hv = hvm; isp = &ips_table[hvm]; if (*isp) (*isp)->is_phnext = &is->is_hnext; else ips_stats.iss_inuse++; + ips_stats.iss_bucketlen[hvm]++; is->is_phnext = isp; is->is_hnext = *isp; *isp = is; } -/* - * Check if a packet has a registered state. - */ -frentry_t *fr_checkstate(ip, fin) -ip_t *ip; +/* ------------------------------------------------------------------------ */ +/* Function: fr_stlookup */ +/* Returns: ipstate_t* - NULL == no matching state found, */ +/* else pointer to state information is returned */ +/* Parameters: fin(I) - pointer to packet information */ +/* tcp(I) - pointer to TCP/UDP header. */ +/* */ +/* Search the state table for a matching entry to the packet described by */ +/* the contents of *fin. */ +/* */ +/* If we return NULL then no lock on ipf_state is held. */ +/* If we return non-null then a read-lock on ipf_state is held. */ +/* ------------------------------------------------------------------------ */ +ipstate_t *fr_stlookup(fin, tcp, ifqp) fr_info_t *fin; +tcphdr_t *tcp; +ipftq_t **ifqp; { - union i6addr dst, src; - register ipstate_t *is, **isp; - register u_char pr; - u_int hv, hvm, hlen, tryagain, pass, v; + u_int hv, hvm, pr, v, tryagain; + ipstate_t *is, **isp; + u_short dport, sport; + i6addr_t src, dst; struct icmp *ic; - frentry_t *fr; - tcphdr_t *tcp; - int rev; - - if ((ips_list == NULL) || (fin->fin_off != 0) || fr_state_lock || - (fin->fin_fl & FI_SHORT)) - return NULL; + ipftq_t *ifq; + int oow; is = NULL; - hlen = fin->fin_hlen; - tcp = (tcphdr_t *)((char *)ip + hlen); + ifq = NULL; + tcp = fin->fin_dp; ic = (struct icmp *)tcp; hv = (pr = fin->fin_fi.fi_p); src = fin->fin_fi.fi_src; @@ -1464,18 +2182,12 @@ fr_info_t *fin; hv += src.in4.s_addr; hv += dst.in4.s_addr; - /* - * Search the hash table for matching packet header info. - * At the bottom of this switch statement, the following is expected: - * is == NULL, no lock on ipf_state is held. - * is != NULL, a lock on ipf_state is held. - */ v = fin->fin_fi.fi_v; #ifdef USE_INET6 if (v == 6) { - hv += fin->fin_fi.fi_src.i6[1]; - hv += fin->fin_fi.fi_src.i6[2]; - hv += fin->fin_fi.fi_src.i6[3]; + hv += fin->fin_fi.fi_src.i6[1]; + hv += fin->fin_fi.fi_src.i6[2]; + hv += fin->fin_fi.fi_src.i6[3]; if ((fin->fin_p == IPPROTO_ICMPV6) && IN6_IS_ADDR_MULTICAST(&fin->fin_fi.fi_dst.in6)) { @@ -1488,43 +2200,46 @@ fr_info_t *fin; } #endif - switch (fin->fin_p) + /* + * Search the hash table for matching packet header info. + */ + switch (pr) { #ifdef USE_INET6 case IPPROTO_ICMPV6 : - tcp = NULL; tryagain = 0; if (v == 6) { if ((ic->icmp_type == ICMP6_ECHO_REQUEST) || (ic->icmp_type == ICMP6_ECHO_REPLY)) { hv += ic->icmp_id; - hv += ic->icmp_seq; } } READ_ENTER(&ipf_state); icmp6again: - hvm = hv % fr_statesize; - for (isp = &ips_table[hvm]; (is = *isp); isp = &is->is_hnext) - if ((is->is_p == pr) && (is->is_v == v) && - fr_matchsrcdst(is, src, dst, fin, NULL) && - fr_matchicmpqueryreply(v, is, ic, fin->fin_rev)) { - rev = fin->fin_rev; - if (is->is_frage[rev] != 0) - is->is_age = is->is_frage[rev]; - else if (rev != 0) - is->is_age = fr_icmpacktimeout; + hvm = DOUBLE_HASH(hv); + for (isp = &ips_table[hvm]; ((is = *isp) != NULL); ) { + isp = &is->is_hnext; + if ((is->is_p != pr) || (is->is_v != v)) + continue; + is = fr_matchsrcdst(fin, is, &src, &dst, NULL, FI_CMP); + if (is != NULL && + fr_matchicmpqueryreply(v, &is->is_icmp, + ic, fin->fin_rev)) { + if (fin->fin_rev) + ifq = &ips_icmpacktq; else - is->is_age = fr_icmptimeout; + ifq = &ips_icmptq; break; } + } if (is != NULL) { - if (tryagain && !(is->is_flags & FI_W_DADDR)) { + if ((tryagain != 0) && !(is->is_flags & SI_W_DADDR)) { hv += fin->fin_fi.fi_src.i6[0]; hv += fin->fin_fi.fi_src.i6[1]; hv += fin->fin_fi.fi_src.i6[2]; hv += fin->fin_fi.fi_src.i6[3]; - fr_ipsmove(isp, is, hv); + fr_ipsmove(is, hv); MUTEX_DOWNGRADE(&ipf_state); } break; @@ -1534,8 +2249,14 @@ icmp6again: /* * No matching icmp state entry. Perhaps this is a * response to another state entry. + * + * XXX With some ICMP6 packets, the "other" address is already + * in the packet, after the ICMP6 header, and this could be + * used in place of the multicast address. However, taking + * advantage of this requires some significant code changes + * to handle the specific types where that is the case. */ - if ((ips_wild != 0) && (v == 6) && (tryagain == 0) && + if ((ips_stats.iss_wild != 0) && (v == 6) && (tryagain == 0) && !IN6_IS_ADDR_MULTICAST(&fin->fin_fi.fi_src.in6)) { hv -= fin->fin_fi.fi_src.i6[0]; hv -= fin->fin_fi.fi_src.i6[1]; @@ -1546,113 +2267,107 @@ icmp6again: goto icmp6again; } - fr = fr_checkicmp6matchingstate((ip6_t *)ip, fin); - if (fr) - return fr; + is = fr_checkicmp6matchingstate(fin); + if (is != NULL) + return is; break; #endif + case IPPROTO_ICMP : - tcp = NULL; if (v == 4) { hv += ic->icmp_id; - hv += ic->icmp_seq; } - hvm = hv % fr_statesize; + hv = DOUBLE_HASH(hv); READ_ENTER(&ipf_state); - for (isp = &ips_table[hvm]; (is = *isp); isp = &is->is_hnext) - if ((is->is_p == pr) && (is->is_v == v) && - fr_matchsrcdst(is, src, dst, fin, NULL) && - fr_matchicmpqueryreply(v, is, ic, fin->fin_rev)) { - rev = fin->fin_rev; - if (is->is_frage[rev] != 0) - is->is_age = is->is_frage[rev]; - else if (fin->fin_rev) - is->is_age = fr_icmpacktimeout; + for (isp = &ips_table[hv]; ((is = *isp) != NULL); ) { + isp = &is->is_hnext; + if ((is->is_p != pr) || (is->is_v != v)) + continue; + is = fr_matchsrcdst(fin, is, &src, &dst, NULL, FI_CMP); + if (is != NULL && + fr_matchicmpqueryreply(v, &is->is_icmp, + ic, fin->fin_rev)) { + if (fin->fin_rev) + ifq = &ips_icmpacktq; else - is->is_age = fr_icmptimeout; + ifq = &ips_icmptq; break; } - - if (is != NULL) - break; - RWLOCK_EXIT(&ipf_state); - /* - * No matching icmp state entry. Perhaps this is a - * response to another state entry. - */ - fr = fr_checkicmpmatchingstate(ip, fin); - if (fr) - return fr; + } + if (is == NULL) { + RWLOCK_EXIT(&ipf_state); + } break; + case IPPROTO_TCP : - /* - * Just plain ignore RST flag set with either FIN or SYN. - */ - if ((tcp->th_flags & TH_RST) && - ((tcp->th_flags & (TH_FIN|TH_SYN|TH_RST)) != TH_RST)) - break; case IPPROTO_UDP : - { - register u_short dport, sport; - - dport = tcp->th_dport; - sport = tcp->th_sport; - tryagain = 0; - hv += dport; + ifqp = NULL; + sport = htons(fin->fin_data[0]); hv += sport; + dport = htons(fin->fin_data[1]); + hv += dport; + oow = 0; + tryagain = 0; READ_ENTER(&ipf_state); retry_tcpudp: - hvm = hv % fr_statesize; - for (isp = &ips_table[hvm]; (is = *isp); isp = &is->is_hnext) - if ((is->is_p == pr) && (is->is_v == v) && - fr_matchsrcdst(is, src, dst, fin, tcp)) { - rev = fin->fin_rev; - if ((pr == IPPROTO_TCP)) { - if (!fr_tcpstate(is, fin, ip, tcp)) - is = NULL; - } else if ((pr == IPPROTO_UDP)) { - if (is->is_frage[rev] != 0) - is->is_age = is->is_frage[rev]; - else if (fin->fin_rev) - is->is_age = fr_udpacktimeout; - else - is->is_age = fr_udptimeout; + hvm = DOUBLE_HASH(hv); + for (isp = &ips_table[hvm]; ((is = *isp) != NULL); ) { + isp = &is->is_hnext; + if ((is->is_p != pr) || (is->is_v != v)) + continue; + fin->fin_flx &= ~FI_OOW; + is = fr_matchsrcdst(fin, is, &src, &dst, tcp, FI_CMP); + if (is != NULL) { + if (pr == IPPROTO_TCP) { + if (!fr_tcpstate(fin, tcp, is)) { + oow |= fin->fin_flx & FI_OOW; + continue; + } } break; } + } if (is != NULL) { if (tryagain && - !(is->is_flags & (FI_WILDP|FI_WILDA))) { + !(is->is_flags & (SI_CLONE|SI_WILDP|SI_WILDA))) { hv += dport; hv += sport; - fr_ipsmove(isp, is, hv); + fr_ipsmove(is, hv); MUTEX_DOWNGRADE(&ipf_state); } break; } - RWLOCK_EXIT(&ipf_state); - if (!tryagain && ips_wild) { + + if (!tryagain && ips_stats.iss_wild) { hv -= dport; hv -= sport; tryagain = 1; WRITE_ENTER(&ipf_state); goto retry_tcpudp; } + fin->fin_flx |= oow; break; - } + +#if 0 + case IPPROTO_GRE : + gre = fin->fin_dp; + if (GRE_REV(gre->gr_flags) == 1) { + hv += gre->gr_call; + } + /* FALLTHROUGH */ +#endif default : - tcp = NULL; - hv %= fr_statesize; + ifqp = NULL; + hvm = DOUBLE_HASH(hv); READ_ENTER(&ipf_state); - for (isp = &ips_table[hv]; (is = *isp); isp = &is->is_hnext) { - if ((is->is_p == pr) && (is->is_v == v) && - fr_matchsrcdst(is, src, dst, fin, NULL)) { - rev = fin->fin_rev; - if (is->is_frage[rev] != 0) - is->is_age = is->is_frage[rev]; - else - is->is_age = fr_udptimeout; + for (isp = &ips_table[hvm]; ((is = *isp) != NULL); ) { + isp = &is->is_hnext; + if ((is->is_p != pr) || (is->is_v != v)) + continue; + is = fr_matchsrcdst(fin, is, &src, &dst, NULL, FI_CMP); + if (is != NULL) { + ifq = &ips_iptq; break; } } @@ -1662,463 +2377,1013 @@ retry_tcpudp: break; } + if ((is != NULL) && ((is->is_sti.tqe_flags & TQE_RULEBASED) != 0) && + (is->is_tqehead[fin->fin_rev] != NULL)) + ifq = is->is_tqehead[fin->fin_rev]; + if (ifq != NULL && ifqp != NULL) + *ifqp = ifq; + return is; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_updatestate */ +/* Returns: Nil */ +/* Parameters: fin(I) - pointer to packet information */ +/* is(I) - pointer to state table entry */ +/* Read Locks: ipf_state */ +/* */ +/* Updates packet and byte counters for a newly received packet. Seeds the */ +/* fragment cache with a new entry as required. */ +/* ------------------------------------------------------------------------ */ +void fr_updatestate(fin, is, ifq) +fr_info_t *fin; +ipstate_t *is; +ipftq_t *ifq; +{ + ipftqent_t *tqe; + int i, pass; + + i = (fin->fin_rev << 1) + fin->fin_out; + + /* + * For TCP packets, ifq == NULL. For all others, check if this new + * queue is different to the last one it was on and move it if so. + */ + tqe = &is->is_sti; + MUTEX_ENTER(&is->is_lock); + if ((tqe->tqe_flags & TQE_RULEBASED) != 0) + ifq = is->is_tqehead[fin->fin_rev]; + + if (ifq != NULL) + fr_movequeue(tqe, tqe->tqe_ifq, ifq); + + is->is_pkts[i]++; + is->is_bytes[i] += fin->fin_plen; + MUTEX_EXIT(&is->is_lock); + +#ifdef IPFILTER_SYNC + if (is->is_flags & IS_STATESYNC) + ipfsync_update(SMC_STATE, fin, is->is_sync); +#endif + + ATOMIC_INCL(ips_stats.iss_hits); + + fin->fin_fr = is->is_rule; + + /* + * If this packet is a fragment and the rule says to track fragments, + * then create a new fragment cache entry. + */ + pass = is->is_pass; + if ((fin->fin_flx & FI_FRAG) && FR_ISPASS(pass)) + (void) fr_newfrag(fin, pass ^ FR_KEEPSTATE); +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_checkstate */ +/* Returns: frentry_t* - NULL == search failed, */ +/* else pointer to rule for matching state */ +/* Parameters: ifp(I) - pointer to interface */ +/* passp(I) - pointer to filtering result flags */ +/* */ +/* Check if a packet is associated with an entry in the state table. */ +/* ------------------------------------------------------------------------ */ +frentry_t *fr_checkstate(fin, passp) +fr_info_t *fin; +u_32_t *passp; +{ + ipstate_t *is; + frentry_t *fr; + tcphdr_t *tcp; + ipftq_t *ifq; + u_int pass; + + if (fr_state_lock || (ips_list == NULL) || + (fin->fin_flx & (FI_SHORT|FI_STATE|FI_FRAGBODY|FI_BAD))) + return NULL; + + is = NULL; + if ((fin->fin_flx & FI_TCPUDP) || + (fin->fin_fi.fi_p == IPPROTO_ICMP) +#ifdef USE_INET6 + || (fin->fin_fi.fi_p == IPPROTO_ICMPV6) +#endif + ) + tcp = fin->fin_dp; + else + tcp = NULL; + + /* + * Search the hash table for matching packet header info. + */ + ifq = NULL; + is = fin->fin_state; + if (is == NULL) + is = fr_stlookup(fin, tcp, &ifq); + switch (fin->fin_p) + { +#ifdef USE_INET6 + case IPPROTO_ICMPV6 : + if (is != NULL) + break; + if (fin->fin_v == 6) { + is = fr_checkicmp6matchingstate(fin); + if (is != NULL) + goto matched; + } + break; +#endif + case IPPROTO_ICMP : + if (is != NULL) + break; + /* + * No matching icmp state entry. Perhaps this is a + * response to another state entry. + */ + is = fr_checkicmpmatchingstate(fin); + if (is != NULL) + goto matched; + break; + case IPPROTO_TCP : + if (is == NULL) + break; + + if (is->is_pass & FR_NEWISN) { + if (fin->fin_out == 0) + fr_fixinisn(fin, is); + else if (fin->fin_out == 1) + fr_fixoutisn(fin, is); + } + break; + default : + if (fin->fin_rev) + ifq = &ips_udpacktq; + else + ifq = &ips_udptq; + break; + } if (is == NULL) { ATOMIC_INCL(ips_stats.iss_miss); return NULL; } - MUTEX_ENTER(&is->is_lock); - is->is_bytes += fin->fin_plen; - ips_stats.iss_hits++; - is->is_pkts++; - MUTEX_EXIT(&is->is_lock); +matched: fr = is->is_rule; - fin->fin_rule = is->is_rulen; if (fr != NULL) { - fin->fin_group = fr->fr_group; + if ((fin->fin_out == 0) && (fr->fr_nattag.ipt_num[0] != 0)) { + if (fin->fin_nattag == NULL) + return NULL; + if (fr_matchtag(&fr->fr_nattag, fin->fin_nattag) != 0) + return NULL; + } + (void) strncpy(fin->fin_group, fr->fr_group, FR_GROUPLEN); fin->fin_icode = fr->fr_icode; } - fin->fin_fr = fr; + + fin->fin_rule = is->is_rulen; pass = is->is_pass; + fr_updatestate(fin, is, ifq); + if (fin->fin_out == 1) + fin->fin_nat = is->is_nat[fin->fin_rev]; + + fin->fin_state = is; + is->is_touched = fr_ticks; + MUTEX_ENTER(&is->is_lock); + is->is_ref++; + MUTEX_EXIT(&is->is_lock); RWLOCK_EXIT(&ipf_state); - if ((fin->fin_fl & FI_FRAG) && (pass & FR_KEEPFRAG)) - ipfr_newfrag(ip, fin); -#ifndef _KERNEL - if ((tcp != NULL) && (tcp->th_flags & TCP_CLOSE)) - fr_delstate(is); -#endif + fin->fin_flx |= FI_STATE; + if ((pass & FR_LOGFIRST) != 0) + pass &= ~(FR_LOGFIRST|FR_LOG); + *passp = pass; return fr; } -/* - * Sync. state entries. If interfaces come or go or just change position, - * this is needed. - */ -void ip_statesync(ifp) +/* ------------------------------------------------------------------------ */ +/* Function: fr_fixoutisn */ +/* Returns: Nil */ +/* Parameters: fin(I) - pointer to packet information */ +/* is(I) - pointer to master state structure */ +/* */ +/* Called only for outbound packets, adjusts the sequence number and the */ +/* TCP checksum to match that change. */ +/* ------------------------------------------------------------------------ */ +static void fr_fixoutisn(fin, is) +fr_info_t *fin; +ipstate_t *is; +{ + tcphdr_t *tcp; + int rev; + u_32_t seq; + + tcp = fin->fin_dp; + rev = fin->fin_rev; + if ((is->is_flags & IS_ISNSYN) != 0) { + if (rev == 0) { + seq = ntohl(tcp->th_seq); + seq += is->is_isninc[0]; + tcp->th_seq = htonl(seq); + fix_outcksum(fin, &tcp->th_sum, is->is_sumd[0]); + } + } + if ((is->is_flags & IS_ISNACK) != 0) { + if (rev == 1) { + seq = ntohl(tcp->th_seq); + seq += is->is_isninc[1]; + tcp->th_seq = htonl(seq); + fix_outcksum(fin, &tcp->th_sum, is->is_sumd[1]); + } + } +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_fixinisn */ +/* Returns: Nil */ +/* Parameters: fin(I) - pointer to packet information */ +/* is(I) - pointer to master state structure */ +/* */ +/* Called only for inbound packets, adjusts the acknowledge number and the */ +/* TCP checksum to match that change. */ +/* ------------------------------------------------------------------------ */ +static void fr_fixinisn(fin, is) +fr_info_t *fin; +ipstate_t *is; +{ + tcphdr_t *tcp; + int rev; + u_32_t ack; + + tcp = fin->fin_dp; + rev = fin->fin_rev; + if ((is->is_flags & IS_ISNSYN) != 0) { + if (rev == 1) { + ack = ntohl(tcp->th_ack); + ack -= is->is_isninc[0]; + tcp->th_ack = htonl(ack); + fix_incksum(fin, &tcp->th_sum, is->is_sumd[0]); + } + } + if ((is->is_flags & IS_ISNACK) != 0) { + if (rev == 0) { + ack = ntohl(tcp->th_ack); + ack -= is->is_isninc[1]; + tcp->th_ack = htonl(ack); + fix_incksum(fin, &tcp->th_sum, is->is_sumd[1]); + } + } +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_statesync */ +/* Returns: Nil */ +/* Parameters: ifp(I) - pointer to interface */ +/* */ +/* Walk through all state entries and if an interface pointer match is */ +/* found then look it up again, based on its name in case the pointer has */ +/* changed since last time. */ +/* */ +/* If ifp is passed in as being non-null then we are only doing updates for */ +/* existing, matching, uses of it. */ +/* ------------------------------------------------------------------------ */ +void fr_statesync(ifp) void *ifp; { - register ipstate_t *is; + ipstate_t *is; int i; + if (fr_running <= 0) + return; + WRITE_ENTER(&ipf_state); + + if (fr_running <= 0) { + RWLOCK_EXIT(&ipf_state); + return; + } + for (is = ips_list; is; is = is->is_next) { + /* + * Look up all the interface names in the state entry. + */ for (i = 0; i < 4; i++) { - if (is->is_ifp[i] == ifp) { - is->is_ifp[i] = GETUNIT(is->is_ifname[i], - is->is_v); - if (!is->is_ifp[i]) - is->is_ifp[i] = (void *)-1; - } + if (ifp == NULL || ifp == is->is_ifp[i]) + is->is_ifp[i] = fr_resolvenic(is->is_ifname[i], + is->is_v); } } RWLOCK_EXIT(&ipf_state); } -/* - * Must always be called with fr_ipfstate held as a write lock. - */ -static void fr_delstate(is) +/* ------------------------------------------------------------------------ */ +/* Function: fr_delstate */ +/* Returns: Nil */ +/* Parameters: is(I) - pointer to state structure to delete */ +/* why(I) - if not 0, log reason why it was deleted */ +/* Write Locks: ipf_state */ +/* */ +/* Deletes a state entry from the enumerated list as well as the hash table */ +/* and timeout queue lists. Make adjustments to hash table statistics and */ +/* global counters as required. */ +/* ------------------------------------------------------------------------ */ +static void fr_delstate(is, why) ipstate_t *is; +int why; { - frentry_t *fr; - if (is->is_flags & (FI_WILDP|FI_WILDA)) - ips_wild--; - if (is->is_next) - is->is_next->is_pnext = is->is_pnext; - *is->is_pnext = is->is_next; - if (is->is_hnext) - is->is_hnext->is_phnext = is->is_phnext; - *is->is_phnext = is->is_hnext; - if (ips_table[is->is_hv] == NULL) - ips_stats.iss_inuse--; - if (is->is_me) - *is->is_me = NULL; + ASSERT(rw_read_locked(&ipf_state.ipf_lk) == 0); - fr = is->is_rule; - if (fr != NULL) { - fr->fr_ref--; - if (fr->fr_ref == 0) { - KFREE(fr); + /* + * Since we want to delete this, remove it from the state table, + * where it can be found & used, first. + */ + if (is->is_pnext != NULL) { + *is->is_pnext = is->is_next; + + if (is->is_next != NULL) + is->is_next->is_pnext = is->is_pnext; + + is->is_pnext = NULL; + is->is_next = NULL; + } + + if (is->is_phnext != NULL) { + *is->is_phnext = is->is_hnext; + if (is->is_hnext != NULL) + is->is_hnext->is_phnext = is->is_phnext; + if (ips_table[is->is_hv] == NULL) + ips_stats.iss_inuse--; + ips_stats.iss_bucketlen[is->is_hv]--; + + is->is_phnext = NULL; + is->is_hnext = NULL; + } + + /* + * Because ips_stats.iss_wild is a count of entries in the state + * table that have wildcard flags set, only decerement it once + * and do it here. + */ + if (is->is_flags & (SI_WILDP|SI_WILDA)) { + if (!(is->is_flags & SI_CLONED)) { + ATOMIC_DECL(ips_stats.iss_wild); } + is->is_flags &= ~(SI_WILDP|SI_WILDA); } -#ifdef _KERNEL - MUTEX_DESTROY(&is->is_lock); + + /* + * Next, remove it from the timeout queue it is in. + */ + fr_deletequeueentry(&is->is_sti); + + if (is->is_me != NULL) { + *is->is_me = NULL; + is->is_me = NULL; + } + + /* + * If it is still in use by something else, do not go any further, + * but note that at this point it is now an orphan. + */ + is->is_ref--; + if (is->is_ref > 0) + return; + + if (is->is_tqehead[0] != NULL) { + if (fr_deletetimeoutqueue(is->is_tqehead[0]) == 0) + fr_freetimeoutqueue(is->is_tqehead[0]); + } + if (is->is_tqehead[1] != NULL) { + if (fr_deletetimeoutqueue(is->is_tqehead[1]) == 0) + fr_freetimeoutqueue(is->is_tqehead[1]); + } + +#ifdef IPFILTER_SYNC + if (is->is_sync) + ipfsync_del(is->is_sync); +#endif +#ifdef IPFILTER_SCAN + (void) ipsc_detachis(is); #endif - KFREE(is); - ips_num--; -} + if (ipstate_logging != 0 && why != 0) + ipstate_log(is, why); -/* - * Free memory in use by all state info. kept. - */ -void fr_stateunload() -{ - register ipstate_t *is; + if (is->is_rule != NULL) { + is->is_rule->fr_statecnt--; + (void)fr_derefrule(&is->is_rule); + } - WRITE_ENTER(&ipf_state); - while ((is = ips_list)) - fr_delstate(is); - ips_stats.iss_inuse = 0; - ips_num = 0; - RWLOCK_EXIT(&ipf_state); - if (ips_table) - KFREES(ips_table, fr_statesize * sizeof(ipstate_t *)); - ips_table = NULL; + MUTEX_DESTROY(&is->is_lock); + KFREE(is); + ips_num--; } -/* - * Slowly expire held state for thingslike UDP and ICMP. Timeouts are set - * in expectation of this being called twice per second. - */ +/* ------------------------------------------------------------------------ */ +/* Function: fr_timeoutstate */ +/* Returns: Nil */ +/* Parameters: Nil */ +/* */ +/* Slowly expire held state for thingslike UDP and ICMP. The algorithm */ +/* used here is to keep the queue sorted with the oldest things at the top */ +/* and the youngest at the bottom. So if the top one doesn't need to be */ +/* expired then neither will any under it. */ +/* ------------------------------------------------------------------------ */ void fr_timeoutstate() { - register ipstate_t *is, **isp; -#if defined(_KERNEL) && !SOLARIS + ipftq_t *ifq, *ifqnext; + ipftqent_t *tqe, *tqn; + ipstate_t *is; +#if defined(USE_SPL) && defined(_KERNEL) int s; #endif SPL_NET(s); WRITE_ENTER(&ipf_state); - for (isp = &ips_list; (is = *isp); ) - if (is->is_age && !--is->is_age) { - if (is->is_p == IPPROTO_TCP) - ips_stats.iss_fin++; - else - ips_stats.iss_expire++; -#ifdef IPFILTER_LOG - ipstate_log(is, ISL_EXPIRE); -#endif - fr_delstate(is); - } else - isp = &is->is_next; + for (ifq = ips_tqtqb; ifq != NULL; ifq = ifq->ifq_next) + for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); ) { + if (tqe->tqe_die > fr_ticks) + break; + tqn = tqe->tqe_next; + is = tqe->tqe_parent; + fr_delstate(is, ISL_EXPIRE); + } + + for (ifq = ips_utqe; ifq != NULL; ifq = ifqnext) { + ifqnext = ifq->ifq_next; + + for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); ) { + if (tqe->tqe_die > fr_ticks) + break; + tqn = tqe->tqe_next; + is = tqe->tqe_parent; + fr_delstate(is, ISL_EXPIRE); + } + } + + for (ifq = ips_utqe; ifq != NULL; ifq = ifqnext) { + ifqnext = ifq->ifq_next; + + if (((ifq->ifq_flags & IFQF_DELETE) != 0) && + (ifq->ifq_ref == 0)) { + fr_freetimeoutqueue(ifq); + } + } + if (fr_state_doflush) { (void) fr_state_flush(2, 0); fr_state_doflush = 0; } + RWLOCK_EXIT(&ipf_state); SPL_X(s); } -/* - * Original idea freom Pradeep Krishnan for use primarily with NAT code. - * (pkrishna@netcom.com) - * - * Rewritten by Arjan de Vet , 2000-07-29: - * - * - (try to) base state transitions on real evidence only, - * i.e. packets that are sent and have been received by ipfilter; - * diagram 18.12 of TCP/IP volume 1 by W. Richard Stevens was used. - * - * - deal with half-closed connections correctly; - * - * - store the state of the source in state[0] such that ipfstat - * displays the state as source/dest instead of dest/source; the calls - * to fr_tcp_age have been changed accordingly. - * - * Parameters: - * - * state[0] = state of source (host that initiated connection) - * state[1] = state of dest (host that accepted the connection) - * - * dir == 0 : a packet from source to dest - * dir == 1 : a packet from dest to source - * - */ -int fr_tcp_age(age, state, fin, dir, fsm) -u_long *age; -u_char *state; -fr_info_t *fin; -int dir, fsm; +/* ------------------------------------------------------------------------ */ +/* Function: fr_state_flush */ +/* Returns: int - 0 == success, -1 == failure */ +/* Parameters: Nil */ +/* Write Locks: ipf_state */ +/* */ +/* Flush state tables. Three actions currently defined: */ +/* which == 0 : flush all state table entries */ +/* which == 1 : flush TCP connections which have started to close but are */ +/* stuck for some reason. */ +/* which == 2 : flush TCP connections which have been idle for a long time, */ +/* starting at > 4 days idle and working back in successive half-*/ +/* days to at most 12 hours old. If this fails to free enough */ +/* slots then work backwards in half hour slots to 30 minutes. */ +/* If that too fails, then work backwards in 30 second intervals */ +/* for the last 30 minutes to at worst 30 seconds idle. */ +/* ------------------------------------------------------------------------ */ +static int fr_state_flush(which, proto) +int which, proto; { - tcphdr_t *tcp = (tcphdr_t *)fin->fin_dp; - u_char flags = tcp->th_flags; - int dlen, ostate; - u_long newage; + ipftq_t *ifq, *ifqnext; + ipftqent_t *tqe, *tqn; + ipstate_t *is, **isp; + int delete, removed; + long try, maxtick; + u_long interval; +#if defined(_KERNEL) && !defined(MENTAT) && defined(USE_SPL) + int s; +#endif - ostate = state[1 - dir]; + removed = 0; - dlen = fin->fin_plen - fin->fin_hlen - (tcp->th_off << 2); + SPL_NET(s); + for (isp = &ips_list; ((is = *isp) != NULL); ) { + delete = 0; - if (flags & TH_RST) { - if (!(tcp->th_flags & TH_PUSH) && !dlen) { - *age = fr_tcpclosed; - state[dir] = TCPS_CLOSED; - } else { - *age = fr_tcpclosewait; - state[dir] = TCPS_CLOSE_WAIT; + if ((proto != 0) && (is->is_v != proto)) { + isp = &is->is_next; + continue; } - return 0; + + switch (which) + { + case 0 : + delete = 1; + break; + case 1 : + case 2 : + if (is->is_p != IPPROTO_TCP) + break; + if ((is->is_state[0] != IPF_TCPS_ESTABLISHED) || + (is->is_state[1] != IPF_TCPS_ESTABLISHED)) + delete = 1; + break; + } + + if (delete) { + if (is->is_p == IPPROTO_TCP) + ips_stats.iss_fin++; + else + ips_stats.iss_expire++; + fr_delstate(is, ISL_FLUSH); + removed++; + } else + isp = &is->is_next; } - newage = 0; + if (which != 2) { + SPL_X(s); + return removed; + } - switch(state[dir]) - { - case TCPS_CLOSED: /* 0 */ - if ((flags & TH_OPENING) == TH_OPENING) { - /* - * 'dir' received an S and sends SA in response, - * CLOSED -> SYN_RECEIVED - */ - state[dir] = TCPS_SYN_RECEIVED; - newage = fr_tcptimeout; - } else if ((flags & TH_OPENING) == TH_SYN) { - /* 'dir' sent S, CLOSED -> SYN_SENT */ - state[dir] = TCPS_SYN_SENT; - newage = fr_tcptimeout; - } - - /* - * It is apparently possible that a hosts sends two syncs - * before the remote party is able to respond with a SA. In - * such a case the remote server sometimes ACK's the second - * sync, and then responds with a SA. The following code - * is used to prevent this ack from being blocked. - * - * We do not reset the timeout here to fr_tcptimeout because - * a connection connect timeout does not renew after every - * packet that is sent. We need to set newage to something - * to indicate the packet has passed the check for its flags - * being valid in the TCP FSM. - */ - else if ((ostate == TCPS_SYN_SENT) && - ((flags & (TH_FIN|TH_SYN|TH_RST|TH_ACK)) == TH_ACK)) { - newage = *age; + /* + * Asked to remove inactive entries because the table is full, try + * again, 3 times, if first attempt failed with a different criteria + * each time. The order tried in must be in decreasing age. + * Another alternative is to implement random drop and drop N entries + * at random until N have been freed up. + */ + if (fr_ticks - ips_last_force_flush < IPF_TTLVAL(5)) + goto force_flush_skipped; + ips_last_force_flush = fr_ticks; + + if (fr_ticks > IPF_TTLVAL(43200)) + interval = IPF_TTLVAL(43200); + else if (fr_ticks > IPF_TTLVAL(1800)) + interval = IPF_TTLVAL(1800); + else if (fr_ticks > IPF_TTLVAL(30)) + interval = IPF_TTLVAL(30); + else + interval = IPF_TTLVAL(10); + try = fr_ticks - (fr_ticks - interval); + if (try < 0) + goto force_flush_skipped; + + while (removed == 0) { + maxtick = fr_ticks - interval; + if (maxtick < 0) + break; + + while (try < maxtick) { + for (ifq = ips_tqtqb; ifq != NULL; + ifq = ifq->ifq_next) { + for (tqn = ifq->ifq_head; + ((tqe = tqn) != NULL); ) { + if (tqe->tqe_die > try) + break; + tqn = tqe->tqe_next; + is = tqe->tqe_parent; + fr_delstate(is, ISL_EXPIRE); + removed++; + } + } + + for (ifq = ips_utqe; ifq != NULL; ifq = ifqnext) { + ifqnext = ifq->ifq_next; + + for (tqn = ifq->ifq_head; + ((tqe = tqn) != NULL); ) { + if (tqe->tqe_die > try) + break; + tqn = tqe->tqe_next; + is = tqe->tqe_parent; + fr_delstate(is, ISL_EXPIRE); + removed++; + } + } + if (try + interval > maxtick) + break; + try += interval; } - /* - * The next piece of code makes it possible to get - * already established connections into the state table - * after a restart or reload of the filter rules; this - * does not work when a strict 'flags S keep state' is - * used for tcp connections of course, however, use a - * lower time-out so the state disappears quickly if - * the other side does not pick it up. - */ - else if (!fsm && - (flags & (TH_FIN|TH_SYN|TH_RST|TH_ACK)) == TH_ACK) { - /* we saw an A, guess 'dir' is in ESTABLISHED mode */ - if (ostate == TCPS_CLOSED) { - state[dir] = TCPS_ESTABLISHED; - newage = fr_tcptimeout; - } else if (ostate == TCPS_ESTABLISHED) { - state[dir] = TCPS_ESTABLISHED; - newage = fr_tcpidletimeout; + if (removed == 0) { + if (interval == IPF_TTLVAL(43200)) { + interval = IPF_TTLVAL(1800); + } else if (interval == IPF_TTLVAL(1800)) { + interval = IPF_TTLVAL(30); + } else if (interval == IPF_TTLVAL(30)) { + interval = IPF_TTLVAL(10); + } else { + break; } } - /* - * TODO: besides regular ACK packets we can have other - * packets as well; it is yet to be determined how we - * should initialize the states in those cases - */ - break; + } +force_flush_skipped: + SPL_X(s); + return removed; +} - case TCPS_LISTEN: /* 1 */ - /* NOT USED */ - break; - case TCPS_SYN_SENT: /* 2 */ - if ((flags & ~(TH_ECN|TH_CWR)) == TH_SYN) { - /* - * A retransmitted SYN packet. We do not reset the - * timeout here to fr_tcptimeout because a connection - * connect timeout does not renew after every packet - * that is sent. We need to set newage to something - * to indicate the packet has passed the check for its - * flags being valid in the TCP FSM. - */ - newage = *age; - } else if ((flags & (TH_SYN|TH_FIN|TH_ACK)) == TH_ACK) { - /* - * We see an A from 'dir' which is in SYN_SENT - * state: 'dir' sent an A in response to an SA - * which it received, SYN_SENT -> ESTABLISHED - */ - state[dir] = TCPS_ESTABLISHED; - newage = fr_tcpidletimeout; - } else if (flags & TH_FIN) { - /* - * We see an F from 'dir' which is in SYN_SENT - * state and wants to close its side of the - * connection; SYN_SENT -> FIN_WAIT_1 - */ - state[dir] = TCPS_FIN_WAIT_1; - newage = fr_tcpidletimeout; /* or fr_tcptimeout? */ - } else if ((flags & TH_OPENING) == TH_OPENING) { - /* - * We see an SA from 'dir' which is already in - * SYN_SENT state, this means we have a - * simultaneous open; SYN_SENT -> SYN_RECEIVED - */ - state[dir] = TCPS_SYN_RECEIVED; - newage = fr_tcptimeout; - } - break; - case TCPS_SYN_RECEIVED: /* 3 */ - if ((flags & (TH_SYN|TH_FIN|TH_ACK)) == TH_ACK) { - /* - * We see an A from 'dir' which was in SYN_RECEIVED - * state so it must now be in established state, - * SYN_RECEIVED -> ESTABLISHED - */ - state[dir] = TCPS_ESTABLISHED; - newage = fr_tcpidletimeout; - } else if ((flags & ~(TH_ECN|TH_CWR)) == TH_OPENING) { +/* ------------------------------------------------------------------------ */ +/* Function: fr_tcp_age */ +/* Returns: int - 1 == state transition made, 0 == no change (rejected) */ +/* Parameters: tq(I) - pointer to timeout queue information */ +/* fin(I) - pointer to packet information */ +/* tqtab(I) - TCP timeout queue table this is in */ +/* flags(I) - flags from state/NAT entry */ +/* */ +/* Rewritten by Arjan de Vet , 2000-07-29: */ +/* */ +/* - (try to) base state transitions on real evidence only, */ +/* i.e. packets that are sent and have been received by ipfilter; */ +/* diagram 18.12 of TCP/IP volume 1 by W. Richard Stevens was used. */ +/* */ +/* - deal with half-closed connections correctly; */ +/* */ +/* - store the state of the source in state[0] such that ipfstat */ +/* displays the state as source/dest instead of dest/source; the calls */ +/* to fr_tcp_age have been changed accordingly. */ +/* */ +/* Internal Parameters: */ +/* */ +/* state[0] = state of source (host that initiated connection) */ +/* state[1] = state of dest (host that accepted the connection) */ +/* */ +/* dir == 0 : a packet from source to dest */ +/* dir == 1 : a packet from dest to source */ +/* */ +/* Locking: it is assumed that the parent of the tqe structure is locked. */ +/* ------------------------------------------------------------------------ */ +int fr_tcp_age(tqe, fin, tqtab, flags) +ipftqent_t *tqe; +fr_info_t *fin; +ipftq_t *tqtab; +int flags; +{ + int dlen, ostate, nstate, rval, dir; + u_char tcpflags; + tcphdr_t *tcp; + + tcp = fin->fin_dp; + + rval = 0; + dir = fin->fin_rev; + tcpflags = tcp->th_flags; + dlen = fin->fin_plen - fin->fin_hlen - (TCP_OFF(tcp) << 2); + + if (tcpflags & TH_RST) { + if (!(tcpflags & TH_PUSH) && !dlen) + nstate = IPF_TCPS_CLOSED; + else + nstate = IPF_TCPS_CLOSE_WAIT; + rval = 1; + } else { + ostate = tqe->tqe_state[1 - dir]; + nstate = tqe->tqe_state[dir]; + + switch (nstate) + { + case IPF_TCPS_CLOSED: /* 0 */ + if ((tcpflags & TH_OPENING) == TH_OPENING) { + /* + * 'dir' received an S and sends SA in + * response, CLOSED -> SYN_RECEIVED + */ + nstate = IPF_TCPS_SYN_RECEIVED; + rval = 1; + } else if ((tcpflags & TH_OPENING) == TH_SYN) { + /* 'dir' sent S, CLOSED -> SYN_SENT */ + nstate = IPF_TCPS_SYN_SENT; + rval = 1; + } /* - * We see an SA from 'dir' which is already in - * SYN_RECEIVED state. + * the next piece of code makes it possible to get + * already established connections into the state table + * after a restart or reload of the filter rules; this + * does not work when a strict 'flags S keep state' is + * used for tcp connections of course */ - newage = fr_tcptimeout; - } else if (flags & TH_FIN) { + if (((flags & IS_TCPFSM) == 0) && + ((tcpflags & TH_ACKMASK) == TH_ACK)) { + /* + * we saw an A, guess 'dir' is in ESTABLISHED + * mode + */ + switch (ostate) + { + case IPF_TCPS_CLOSED : + case IPF_TCPS_SYN_RECEIVED : + nstate = IPF_TCPS_HALF_ESTAB; + rval = 1; + break; + case IPF_TCPS_HALF_ESTAB : + case IPF_TCPS_ESTABLISHED : + nstate = IPF_TCPS_ESTABLISHED; + rval = 1; + break; + default : + break; + } + } /* - * We see an F from 'dir' which is in SYN_RECEIVED - * state and wants to close its side of the connection; - * SYN_RECEIVED -> FIN_WAIT_1 + * TODO: besides regular ACK packets we can have other + * packets as well; it is yet to be determined how we + * should initialize the states in those cases */ - state[dir] = TCPS_FIN_WAIT_1; - newage = fr_tcpidletimeout; - } - break; + break; - case TCPS_ESTABLISHED: /* 4 */ - if (flags & TH_FIN) { - /* - * 'dir' closed its side of the connection; this - * gives us a half-closed connection; - * ESTABLISHED -> FIN_WAIT_1 - */ - state[dir] = TCPS_FIN_WAIT_1; - newage = fr_tcphalfclosed; - } else if (flags & TH_ACK) { - /* an ACK, should we exclude other flags here? */ - if (ostate == TCPS_FIN_WAIT_1) { + case IPF_TCPS_LISTEN: /* 1 */ + /* NOT USED */ + break; + + case IPF_TCPS_SYN_SENT: /* 2 */ + if ((tcpflags & ~(TH_ECN|TH_CWR)) == TH_SYN) { /* - * We know the other side did an active close, - * so we are ACKing the recvd FIN packet (does - * the window matching code guarantee this?) - * and go into CLOSE_WAIT state; this gives us - * a half-closed connection + * A retransmitted SYN packet. We do not reset + * the timeout here to fr_tcptimeout because a + * connection connect timeout does not renew + * after every packet that is sent. We need to + * set rval so as to indicate the packet has + * passed the check for its flags being valid + * in the TCP FSM. Setting rval to 2 has the + * result of not resetting the timeout. */ - state[dir] = TCPS_CLOSE_WAIT; - newage = fr_tcphalfclosed; - } else if (ostate < TCPS_CLOSE_WAIT) + rval = 2; + } else if ((tcpflags & (TH_SYN|TH_FIN|TH_ACK)) == + TH_ACK) { /* - * Still a fully established connection, - * reset timeout + * we see an A from 'dir' which is in SYN_SENT + * state: 'dir' sent an A in response to an SA + * which it received, SYN_SENT -> ESTABLISHED */ - newage = fr_tcpidletimeout; - } - break; + nstate = IPF_TCPS_ESTABLISHED; + rval = 1; + } else if (tcpflags & TH_FIN) { + /* + * we see an F from 'dir' which is in SYN_SENT + * state and wants to close its side of the + * connection; SYN_SENT -> FIN_WAIT_1 + */ + nstate = IPF_TCPS_FIN_WAIT_1; + rval = 1; + } else if ((tcpflags & TH_OPENING) == TH_OPENING) { + /* + * we see an SA from 'dir' which is already in + * SYN_SENT state, this means we have a + * simultaneous open; SYN_SENT -> SYN_RECEIVED + */ + nstate = IPF_TCPS_SYN_RECEIVED; + rval = 1; + } + break; - case TCPS_CLOSE_WAIT: /* 5 */ - if (flags & TH_FIN) { - /* - * Application closed and 'dir' sent a FIN, we're now - * going into LAST_ACK state - */ - newage = fr_tcplastack; - state[dir] = TCPS_LAST_ACK; - } else { - /* - * We remain in CLOSE_WAIT because the other side has - * closed already and we did not close our side yet; - * reset timeout - */ - newage = fr_tcphalfclosed; - } - break; + case IPF_TCPS_SYN_RECEIVED: /* 3 */ + if ((tcpflags & (TH_SYN|TH_FIN|TH_ACK)) == TH_ACK) { + /* + * we see an A from 'dir' which was in + * SYN_RECEIVED state so it must now be in + * established state, SYN_RECEIVED -> + * ESTABLISHED + */ + nstate = IPF_TCPS_ESTABLISHED; + rval = 1; + } else if ((tcpflags & ~(TH_ECN|TH_CWR)) == + TH_OPENING) { + /* + * We see an SA from 'dir' which is already in + * SYN_RECEIVED state. + */ + rval = 2; + } else if (tcpflags & TH_FIN) { + /* + * we see an F from 'dir' which is in + * SYN_RECEIVED state and wants to close its + * side of the connection; SYN_RECEIVED -> + * FIN_WAIT_1 + */ + nstate = IPF_TCPS_FIN_WAIT_1; + rval = 1; + } + break; - case TCPS_FIN_WAIT_1: /* 6 */ - if ((flags & TH_ACK) && ostate > TCPS_CLOSE_WAIT) { - /* - * If the other side is not active anymore it has sent - * us a FIN packet that we are ack'ing now with an ACK; - * this means both sides have now closed the connection - * and we go into TIME_WAIT - */ - /* - * XXX: how do we know we really are ACKing the FIN - * packet here? does the window code guarantee that? - */ - state[dir] = TCPS_TIME_WAIT; - newage = fr_tcptimeout; - } else - /* - * We closed our side of the connection already but the - * other side is still active (ESTABLISHED/CLOSE_WAIT); - * continue with this half-closed connection - */ - newage = fr_tcphalfclosed; - break; + case IPF_TCPS_HALF_ESTAB: /* 4 */ + if (ostate >= IPF_TCPS_HALF_ESTAB) { + if ((tcpflags & TH_ACKMASK) == TH_ACK) { + nstate = IPF_TCPS_ESTABLISHED; + rval = 1; + } + } + + break; - case TCPS_CLOSING: /* 7 */ - /* NOT USED */ - break; + case IPF_TCPS_ESTABLISHED: /* 5 */ + rval = 1; + if (tcpflags & TH_FIN) { + /* + * 'dir' closed its side of the connection; + * this gives us a half-closed connection; + * ESTABLISHED -> FIN_WAIT_1 + */ + nstate = IPF_TCPS_FIN_WAIT_1; + } else if (tcpflags & TH_ACK) { + /* + * an ACK, should we exclude other flags here? + */ + if (ostate == IPF_TCPS_FIN_WAIT_1) { + /* + * We know the other side did an active + * close, so we are ACKing the recvd + * FIN packet (does the window matching + * code guarantee this?) and go into + * CLOSE_WAIT state; this gives us a + * half-closed connection + */ + nstate = IPF_TCPS_CLOSE_WAIT; + } else if (ostate < IPF_TCPS_CLOSE_WAIT) { + /* + * still a fully established + * connection reset timeout + */ + nstate = IPF_TCPS_ESTABLISHED; + } + } + break; - case TCPS_LAST_ACK: /* 8 */ - if (flags & TH_ACK) { - if ((flags & TH_PUSH) || dlen) + case IPF_TCPS_CLOSE_WAIT: /* 6 */ + rval = 1; + if (tcpflags & TH_FIN) { /* - * There is still data to be delivered, reset - * timeout + * application closed and 'dir' sent a FIN, + * we're now going into LAST_ACK state */ - newage = fr_tcplastack; - else - newage = *age; - } - /* - * We cannot detect when we go out of LAST_ACK state to CLOSED - * because that is based on the reception of ACK packets; - * ipfilter can only detect that a packet has been sent by a - * host - */ - break; + nstate = IPF_TCPS_LAST_ACK; + } else { + /* + * we remain in CLOSE_WAIT because the other + * side has closed already and we did not + * close our side yet; reset timeout + */ + nstate = IPF_TCPS_CLOSE_WAIT; + } + break; - case TCPS_FIN_WAIT_2: /* 9 */ - /* NOT USED */ - break; + case IPF_TCPS_FIN_WAIT_1: /* 7 */ + rval = 1; + if ((tcpflags & TH_ACK) && + ostate > IPF_TCPS_CLOSE_WAIT) { + /* + * if the other side is not active anymore + * it has sent us a FIN packet that we are + * ack'ing now with an ACK; this means both + * sides have now closed the connection and + * we go into TIME_WAIT + */ + /* + * XXX: how do we know we really are ACKing + * the FIN packet here? does the window code + * guarantee that? + */ + nstate = IPF_TCPS_TIME_WAIT; + } else { + /* + * we closed our side of the connection + * already but the other side is still active + * (ESTABLISHED/CLOSE_WAIT); continue with + * this half-closed connection + */ + nstate = IPF_TCPS_FIN_WAIT_1; + } + break; - case TCPS_TIME_WAIT: /* 10 */ - newage = fr_tcptimeout; /* default 4 mins */ - /* we're in 2MSL timeout now */ - break; + case IPF_TCPS_CLOSING: /* 8 */ + /* NOT USED */ + break; + + case IPF_TCPS_LAST_ACK: /* 9 */ + if (tcpflags & TH_ACK) { + if ((tcpflags & TH_PUSH) || dlen) + /* + * there is still data to be delivered, + * reset timeout + */ + rval = 1; + else + rval = 2; + } + /* + * we cannot detect when we go out of LAST_ACK state to + * CLOSED because that is based on the reception of ACK + * packets; ipfilter can only detect that a packet + * has been sent by a host + */ + break; + + case IPF_TCPS_FIN_WAIT_2: /* 10 */ + rval = 1; + if ((tcpflags & TH_OPENING) == TH_OPENING) + nstate = IPF_TCPS_SYN_RECEIVED; + else if (tcpflags & TH_SYN) + nstate = IPF_TCPS_SYN_SENT; + break; + + case IPF_TCPS_TIME_WAIT: /* 11 */ + /* we're in 2MSL timeout now */ + rval = 1; + break; + + default : +#if defined(_KERNEL) +# if SOLARIS + cmn_err(CE_NOTE, + "tcp %lx flags %x si %lx nstate %d ostate %d\n", + (u_long)tcp, tcpflags, (u_long)tqe, + nstate, ostate); +# else + printf("tcp %lx flags %x si %lx nstate %d ostate %d\n", + (u_long)tcp, tcpflags, (u_long)tqe, + nstate, ostate); +# endif +# ifdef DIAGNOSTIC + panic("invalid TCP state"); +# endif +#else + abort(); +#endif + break; + } } - if (newage != 0) { - *age = newage; - return 0; + /* + * If rval == 2 then do not update the queue position, but treat the + * packet as being ok. + */ + if (rval == 2) + rval = 1; + else if (rval == 1) { + tqe->tqe_state[dir] = nstate; + if ((tqe->tqe_flags & TQE_RULEBASED) == 0) + fr_movequeue(tqe, tqe->tqe_ifq, tqtab + nstate); } - return -1; + + return rval; } -#ifdef IPFILTER_LOG +/* ------------------------------------------------------------------------ */ +/* Function: ipstate_log */ +/* Returns: Nil */ +/* Parameters: is(I) - pointer to state structure */ +/* type(I) - type of log entry to create */ +/* */ +/* Creates a state table log entry using the state structure and type info. */ +/* passed in. Log packet/byte counts, source/destination address and other */ +/* protocol specific information. */ +/* ------------------------------------------------------------------------ */ void ipstate_log(is, type) struct ipstate *is; u_int type; { +#ifdef IPFILTER_LOG struct ipslog ipsl; - void *items[1]; size_t sizes[1]; + void *items[1]; int types[1]; + /* + * Copy information out of the ipstate_t structure and into the + * structure used for logging. + */ ipsl.isl_type = type; - ipsl.isl_pkts = is->is_pkts + is->is_icmppkts; - ipsl.isl_bytes = is->is_bytes; + ipsl.isl_pkts[0] = is->is_pkts[0] + is->is_icmppkts[0]; + ipsl.isl_bytes[0] = is->is_bytes[0]; + ipsl.isl_pkts[1] = is->is_pkts[1] + is->is_icmppkts[1]; + ipsl.isl_bytes[1] = is->is_bytes[1]; + ipsl.isl_pkts[2] = is->is_pkts[2] + is->is_icmppkts[2]; + ipsl.isl_bytes[2] = is->is_bytes[2]; + ipsl.isl_pkts[3] = is->is_pkts[3] + is->is_icmppkts[3]; + ipsl.isl_bytes[3] = is->is_bytes[3]; ipsl.isl_src = is->is_src; ipsl.isl_dst = is->is_dst; ipsl.isl_p = is->is_p; ipsl.isl_v = is->is_v; ipsl.isl_flags = is->is_flags; + ipsl.isl_tag = is->is_tag; ipsl.isl_rulen = is->is_rulen; - ipsl.isl_group = is->is_group; + (void) strncpy(ipsl.isl_group, is->is_group, FR_GROUPLEN); + if (ipsl.isl_p == IPPROTO_TCP || ipsl.isl_p == IPPROTO_UDP) { ipsl.isl_sport = is->is_sport; ipsl.isl_dport = is->is_dport; @@ -2127,13 +3392,14 @@ u_int type; ipsl.isl_state[1] = is->is_state[1]; } } else if (ipsl.isl_p == IPPROTO_ICMP) { - ipsl.isl_itype = is->is_icmp.ics_type; + ipsl.isl_itype = is->is_icmp.ici_type; } else if (ipsl.isl_p == IPPROTO_ICMPV6) { - ipsl.isl_itype = is->is_icmp.ics_type; + ipsl.isl_itype = is->is_icmp.ici_type; } else { ipsl.isl_ps.isl_filler[0] = 0; ipsl.isl_ps.isl_filler[1] = 0; } + items[0] = &ipsl; sizes[0] = sizeof(ipsl); types[0] = 0; @@ -2143,26 +3409,35 @@ u_int type; } else { ATOMIC_INCL(ips_stats.iss_logfail); } -} #endif +} #ifdef USE_INET6 -frentry_t *fr_checkicmp6matchingstate(ip, fin) -ip6_t *ip; +/* ------------------------------------------------------------------------ */ +/* Function: fr_checkicmp6matchingstate */ +/* Returns: ipstate_t* - NULL == no match found, */ +/* else pointer to matching state entry */ +/* Parameters: fin(I) - pointer to packet information */ +/* Locks: NULL == no locks, else Read Lock on ipf_state */ +/* */ +/* If we've got an ICMPv6 error message, using the information stored in */ +/* the ICMPv6 packet, look for a matching state table entry. */ +/* ------------------------------------------------------------------------ */ +static ipstate_t *fr_checkicmp6matchingstate(fin) fr_info_t *fin; { - register ipstate_t *is, **isp; - register u_short sport, dport; - register u_char pr; - struct icmp6_hdr *ic, *oic; - union i6addr dst, src; + struct icmp6_hdr *ic6, *oic; + int type, backward, i; + ipstate_t *is, **isp; + u_short sport, dport; + i6addr_t dst, src; u_short savelen; + icmpinfo_t *ic; fr_info_t ofin; tcphdr_t *tcp; - frentry_t *fr; - ip6_t *oip; - int type; + ip6_t *oip6; + u_char pr; u_int hv; /* @@ -2172,8 +3447,9 @@ fr_info_t *fin; */ if ((fin->fin_v != 6) || (fin->fin_plen < ICMP6ERR_MINPKTLEN)) return NULL; - ic = (struct icmp6_hdr *)fin->fin_dp; - type = ic->icmp6_type; + + ic6 = fin->fin_dp; + type = ic6->icmp6_type; /* * If it's not an error type, then return */ @@ -2181,22 +3457,17 @@ fr_info_t *fin; (type != ICMP6_TIME_EXCEEDED) && (type != ICMP6_PARAM_PROB)) return NULL; - oip = (ip6_t *)((char *)ic + ICMPERR_ICMPHLEN); - if (fin->fin_plen < sizeof(*oip)) - return NULL; - - if ((oip->ip6_nxt != IPPROTO_TCP) && (oip->ip6_nxt != IPPROTO_UDP) && - (oip->ip6_nxt != IPPROTO_ICMPV6)) + oip6 = (ip6_t *)((char *)ic6 + ICMPERR_ICMPHLEN); + if (fin->fin_plen < sizeof(*oip6)) return NULL; - bzero((char *)&ofin, sizeof(ofin)); - ofin.fin_out = !fin->fin_out; - ofin.fin_ifp = fin->fin_ifp; - ofin.fin_mp = NULL; + bcopy((char *)fin, (char *)&ofin, sizeof(fin)); ofin.fin_v = 6; -#if SOLARIS - ofin.fin_qfm = NULL; -#endif + ofin.fin_ifp = fin->fin_ifp; + ofin.fin_out = !fin->fin_out; + ofin.fin_m = NULL; /* if dereferenced, panic XXX */ + ofin.fin_mp = NULL; /* if dereferenced, panic XXX */ + /* * We make a fin entry to be able to feed it to * matchsrcdst. Note that not all fields are necessary @@ -2204,16 +3475,20 @@ fr_info_t *fin; * in fin_mp such that if someone uses it we'll get * a kernel panic. fr_matchsrcdst does not use this. * - * watch out here, as ip is in host order and oip in network + * watch out here, as ip is in host order and oip6 in network * order. Any change we make must be undone afterwards. */ - savelen = oip->ip6_plen; - oip->ip6_plen = ip->ip6_plen - sizeof(*ip) - ICMPERR_ICMPHLEN; - fr_makefrip(sizeof(*oip), (ip_t *)oip, &ofin); - oip->ip6_plen = savelen; - - if (oip->ip6_nxt == IPPROTO_ICMPV6) { - oic = (struct icmp6_hdr *)(oip + 1); + savelen = oip6->ip6_plen; + oip6->ip6_plen = fin->fin_dlen - ICMPERR_ICMPHLEN; + ofin.fin_flx = FI_NOCKSUM; + ofin.fin_ip = (ip_t *)oip6; + ofin.fin_plen = oip6->ip6_plen; + (void) fr_makefrip(sizeof(*oip6), (ip_t *)oip6, &ofin); + ofin.fin_flx &= ~(FI_BAD|FI_SHORT); + oip6->ip6_plen = savelen; + + if (oip6->ip6_nxt == IPPROTO_ICMPV6) { + oic = (struct icmp6_hdr *)(oip6 + 1); /* * an ICMP error can only be generated as a result of an * ICMP query, not as the response on an ICMP error @@ -2227,61 +3502,71 @@ fr_info_t *fin; /* * perform a lookup of the ICMP packet in the state table */ - hv = (pr = oip->ip6_nxt); - src.in6 = oip->ip6_src; + hv = (pr = oip6->ip6_nxt); + src.in6 = oip6->ip6_src; hv += src.in4.s_addr; - dst.in6 = oip->ip6_dst; + dst.in6 = oip6->ip6_dst; hv += dst.in4.s_addr; hv += oic->icmp6_id; hv += oic->icmp6_seq; - hv %= fr_statesize; + hv = DOUBLE_HASH(hv); READ_ENTER(&ipf_state); - for (isp = &ips_table[hv]; (is = *isp); isp = &is->is_hnext) + for (isp = &ips_table[hv]; ((is = *isp) != NULL); ) { + ic = &is->is_icmp; + isp = &is->is_hnext; if ((is->is_p == pr) && - (oic->icmp6_id == is->is_icmp.ics_id) && - (oic->icmp6_seq == is->is_icmp.ics_seq) && - fr_matchsrcdst(is, src, dst, &ofin, NULL)) { + !(is->is_pass & FR_NOICMPERR) && + (oic->icmp6_id == ic->ici_id) && + (oic->icmp6_seq == ic->ici_seq) && + (is = fr_matchsrcdst(&ofin, is, &src, + &dst, NULL, FI_ICMPCMP))) { /* * in the state table ICMP query's are stored * with the type of the corresponding ICMP * response. Correct here */ - if (((is->is_type == ICMP6_ECHO_REPLY) && + if (((ic->ici_type == ICMP6_ECHO_REPLY) && (oic->icmp6_type == ICMP6_ECHO_REQUEST)) || - (is->is_type - 1 == oic->icmp6_type )) { + (ic->ici_type - 1 == oic->icmp6_type )) { ips_stats.iss_hits++; - is->is_pkts++; - is->is_bytes += fin->fin_plen; - return is->is_rule; + backward = IP6_NEQ(&is->is_dst, &src); + fin->fin_rev = !backward; + i = (backward << 1) + fin->fin_out; + is->is_icmppkts[i]++; + return is; } } + } RWLOCK_EXIT(&ipf_state); - return NULL; } - tcp = (tcphdr_t *)(oip + 1); - dport = tcp->th_dport; - sport = tcp->th_sport; - - hv = (pr = oip->ip6_nxt); - src.in6 = oip->ip6_src; - hv += src.in4.s_addr; + hv = (pr = oip6->ip6_nxt); + src.in6 = oip6->ip6_src; + hv += src.i6[0]; hv += src.i6[1]; hv += src.i6[2]; hv += src.i6[3]; - dst.in6 = oip->ip6_dst; - hv += dst.in4.s_addr; + dst.in6 = oip6->ip6_dst; + hv += dst.i6[0]; hv += dst.i6[1]; hv += dst.i6[2]; hv += dst.i6[3]; - hv += dport; - hv += sport; - hv %= fr_statesize; + + if ((oip6->ip6_nxt == IPPROTO_TCP) || (oip6->ip6_nxt == IPPROTO_UDP)) { + tcp = (tcphdr_t *)(oip6 + 1); + dport = tcp->th_dport; + sport = tcp->th_sport; + hv += dport; + hv += sport; + } else + tcp = NULL; + hv = DOUBLE_HASH(hv); READ_ENTER(&ipf_state); - for (isp = &ips_table[hv]; (is = *isp); isp = &is->is_hnext) { + for (isp = &ips_table[hv]; ((is = *isp) != NULL); ) { + isp = &is->is_hnext; /* * Only allow this icmp though if the * encapsulated packet was allowed through the @@ -2289,22 +3574,229 @@ fr_info_t *fin; * of info present does not allow for checking against * tcp internals such as seq and ack numbers. */ - if ((is->is_p == pr) && (is->is_v == 6) && - fr_matchsrcdst(is, src, dst, &ofin, tcp)) { - fr = is->is_rule; + if ((is->is_p != pr) || (is->is_v != 6) || + (is->is_pass & FR_NOICMPERR)) + continue; + is = fr_matchsrcdst(&ofin, is, &src, &dst, tcp, FI_ICMPCMP); + if (is != NULL) { ips_stats.iss_hits++; - is->is_pkts++; - is->is_bytes += fin->fin_plen; + backward = IP6_NEQ(&is->is_dst, &src); + fin->fin_rev = !backward; + i = (backward << 1) + fin->fin_out; + is->is_icmppkts[i]++; /* * we deliberately do not touch the timeouts * for the accompanying state table entry. * It remains to be seen if that is correct. XXX */ - RWLOCK_EXIT(&ipf_state); - return fr; + return is; } } RWLOCK_EXIT(&ipf_state); return NULL; } #endif + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_sttab_init */ +/* Returns: Nil */ +/* Parameters: tqp(I) - pointer to an array of timeout queues for TCP */ +/* */ +/* Initialise the array of timeout queues for TCP. */ +/* ------------------------------------------------------------------------ */ +void fr_sttab_init(tqp) +ipftq_t *tqp; +{ + int i; + + for (i = IPF_TCP_NSTATES - 1; i >= 0; i--) { + tqp[i].ifq_ttl = 0; + tqp[i].ifq_ref = 1; + tqp[i].ifq_head = NULL; + tqp[i].ifq_tail = &tqp[i].ifq_head; + tqp[i].ifq_next = tqp + i + 1; + MUTEX_INIT(&tqp[i].ifq_lock, "ipftq tcp tab"); + } + tqp[IPF_TCP_NSTATES - 1].ifq_next = NULL; + tqp[IPF_TCPS_CLOSED].ifq_ttl = fr_tcpclosed; + tqp[IPF_TCPS_LISTEN].ifq_ttl = fr_tcptimeout; + tqp[IPF_TCPS_SYN_SENT].ifq_ttl = fr_tcptimeout; + tqp[IPF_TCPS_SYN_RECEIVED].ifq_ttl = fr_tcptimeout; + tqp[IPF_TCPS_ESTABLISHED].ifq_ttl = fr_tcpidletimeout; + tqp[IPF_TCPS_CLOSE_WAIT].ifq_ttl = fr_tcphalfclosed; + tqp[IPF_TCPS_FIN_WAIT_1].ifq_ttl = fr_tcphalfclosed; + tqp[IPF_TCPS_CLOSING].ifq_ttl = fr_tcptimeout; + tqp[IPF_TCPS_LAST_ACK].ifq_ttl = fr_tcplastack; + tqp[IPF_TCPS_FIN_WAIT_2].ifq_ttl = fr_tcpclosewait; + tqp[IPF_TCPS_TIME_WAIT].ifq_ttl = fr_tcptimeout; + tqp[IPF_TCPS_HALF_ESTAB].ifq_ttl = fr_tcptimeout; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_sttab_destroy */ +/* Returns: Nil */ +/* Parameters: tqp(I) - pointer to an array of timeout queues for TCP */ +/* */ +/* Do whatever is necessary to "destroy" each of the entries in the array */ +/* of timeout queues for TCP. */ +/* ------------------------------------------------------------------------ */ +void fr_sttab_destroy(tqp) +ipftq_t *tqp; +{ + int i; + + for (i = IPF_TCP_NSTATES - 1; i >= 0; i--) + MUTEX_DESTROY(&tqp[i].ifq_lock); +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_statederef */ +/* Returns: Nil */ +/* Parameters: isp(I) - pointer to pointer to state table entry */ +/* */ +/* Decrement the reference counter for this state table entry and free it */ +/* if there are no more things using it. */ +/* */ +/* When operating in userland (ipftest), we have no timers to clear a state */ +/* entry. Therefore, we make a few simple tests before deleting an entry */ +/* outright. We compare states on each side looking for a combination of */ +/* TIME_WAIT (should really be FIN_WAIT_2?) and LAST_ACK. Then we factor */ +/* in packet direction with the interface list to make sure we don't */ +/* prematurely delete an entry on a final inbound packet that's we're also */ +/* supposed to route elsewhere. */ +/* */ +/* Internal parameters: */ +/* state[0] = state of source (host that initiated connection) */ +/* state[1] = state of dest (host that accepted the connection) */ +/* */ +/* dir == 0 : a packet from source to dest */ +/* dir == 1 : a packet from dest to source */ +/* ------------------------------------------------------------------------ */ +void fr_statederef(fin, isp) +fr_info_t *fin; +ipstate_t **isp; +{ + ipstate_t *is = *isp; +#if 0 + int nstate, ostate, dir, eol; + + eol = 0; /* End-of-the-line flag. */ + dir = fin->fin_rev; + ostate = is->is_state[1 - dir]; + nstate = is->is_state[dir]; + /* + * Determine whether this packet is local or routed. State entries + * with us as the destination will have an interface list of + * int1,-,-,int1. Entries with us as the origin run as -,int1,int1,-. + */ + if ((fin->fin_p == IPPROTO_TCP) && (fin->fin_out == 0)) { + if ((strcmp(is->is_ifname[0], is->is_ifname[3]) == 0) && + (strcmp(is->is_ifname[1], is->is_ifname[2]) == 0)) { + if ((dir == 0) && + (strcmp(is->is_ifname[1], "-") == 0) && + (strcmp(is->is_ifname[0], "-") != 0)) { + eol = 1; + } else if ((dir == 1) && + (strcmp(is->is_ifname[0], "-") == 0) && + (strcmp(is->is_ifname[1], "-") != 0)) { + eol = 1; + } + } + } +#endif + + fin = fin; /* LINT */ + is = *isp; + *isp = NULL; + WRITE_ENTER(&ipf_state); + is->is_ref--; + if (is->is_ref == 0) { + is->is_ref++; /* To counter ref-- in fr_delstate() */ + fr_delstate(is, ISL_EXPIRE); +#ifndef _KERNEL +#if 0 + } else if (((fin->fin_out == 1) || (eol == 1)) && + ((ostate == IPF_TCPS_LAST_ACK) && + (nstate == IPF_TCPS_TIME_WAIT))) { + ; +#else + } else if ((is->is_sti.tqe_state[0] > IPF_TCPS_ESTABLISHED) || + (is->is_sti.tqe_state[1] > IPF_TCPS_ESTABLISHED)) { +#endif + fr_delstate(is, ISL_ORPHAN); +#endif + } + RWLOCK_EXIT(&ipf_state); +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_setstatequeue */ +/* Returns: Nil */ +/* Parameters: is(I) - pointer to state structure */ +/* rev(I) - forward(0) or reverse(1) direction */ +/* Locks: ipf_state (read or write) */ +/* */ +/* Put the state entry on its default queue entry, using rev as a helped in */ +/* determining which queue it should be placed on. */ +/* ------------------------------------------------------------------------ */ +void fr_setstatequeue(is, rev) +ipstate_t *is; +int rev; +{ + ipftq_t *oifq, *nifq; + + + if ((is->is_sti.tqe_flags & TQE_RULEBASED) != 0) + nifq = is->is_tqehead[rev]; + else + nifq = NULL; + + if (nifq == NULL) { + switch (is->is_p) + { +#ifdef USE_INET6 + case IPPROTO_ICMPV6 : + if (rev == 1) + nifq = &ips_icmpacktq; + else + nifq = &ips_icmptq; + break; +#endif + case IPPROTO_ICMP : + if (rev == 1) + nifq = &ips_icmpacktq; + else + nifq = &ips_icmptq; + break; + case IPPROTO_TCP : + nifq = ips_tqtqb + is->is_state[rev]; + break; + + case IPPROTO_UDP : + if (rev == 1) + nifq = &ips_udpacktq; + else + nifq = &ips_udptq; + break; + + default : + nifq = &ips_iptq; + break; + } + } + + oifq = is->is_sti.tqe_ifq; + /* + * If it's currently on a timeout queue, move it from one queue to + * another, else put it on the end of the newly determined queue. + */ + if (oifq != NULL) + fr_movequeue(&is->is_sti, oifq, nifq); + else + fr_queueappend(&is->is_sti, nifq, is); + return; +} diff --git a/sys/contrib/ipfilter/netinet/ip_state.h b/sys/contrib/ipfilter/netinet/ip_state.h index 73d699f..5e3fe87 100644 --- a/sys/contrib/ipfilter/netinet/ip_state.h +++ b/sys/contrib/ipfilter/netinet/ip_state.h @@ -1,21 +1,25 @@ +/* $FreeBSD$ */ + /* * Copyright (C) 1995-2001 by Darren Reed. * * See the IPFILTER.LICENCE file for details on licencing. * * @(#)ip_state.h 1.3 1/12/96 (C) 1995 Darren Reed - * $Id: ip_state.h,v 2.13.2.1 2000/07/08 02:15:35 darrenr Exp $ * $FreeBSD$ + * Id: ip_state.h,v 2.68.2.3 2005/03/03 14:24:11 darrenr Exp */ #ifndef __IP_STATE_H__ #define __IP_STATE_H__ #if defined(__STDC__) || defined(__GNUC__) -# define SIOCDELST _IOW('r', 61, struct ipstate *) +# define SIOCDELST _IOW('r', 61, struct ipfobj) #else -# define SIOCDELST _IOW(r, 61, struct ipstate *) +# define SIOCDELST _IOW(r, 61, struct ipfobj) #endif +struct ipscan; + #ifndef IPSTATE_SIZE # define IPSTATE_SIZE 5737 #endif @@ -29,54 +33,34 @@ (s2).s_addr, (d2).s_addr) -typedef struct udpstate { - u_short us_sport; - u_short us_dport; -} udpstate_t; - -typedef struct icmpstate { - u_short ics_id; - u_short ics_seq; - u_char ics_type; -} icmpstate_t; - -typedef struct tcpdata { - u_32_t td_end; - u_32_t td_maxend; - u_32_t td_maxwin; - u_char td_wscale; -} tcpdata_t; - -typedef struct tcpstate { - u_short ts_sport; - u_short ts_dport; - tcpdata_t ts_data[2]; - u_char ts_state[2]; -} tcpstate_t; - typedef struct ipstate { + ipfmutex_t is_lock; struct ipstate *is_next; struct ipstate **is_pnext; struct ipstate *is_hnext; struct ipstate **is_phnext; struct ipstate **is_me; + void *is_ifp[4]; + void *is_sync; + struct nat *is_nat[2]; frentry_t *is_rule; - U_QUAD_T is_pkts; - U_QUAD_T is_bytes; - U_QUAD_T is_icmppkts; - union i6addr is_src; - union i6addr is_dst; - void *is_ifp[4]; - u_long is_age; - u_int is_frage[2]; /* age from filter rule, forward & reverse */ + struct ipftq *is_tqehead[2]; + struct ipscan *is_isc; + U_QUAD_T is_pkts[4]; + U_QUAD_T is_bytes[4]; + U_QUAD_T is_icmppkts[4]; + struct ipftqent is_sti; + u_int is_frage[2]; + int is_ref; /* reference count */ + int is_isninc[2]; + u_short is_sumd[2]; + i6addr_t is_src; + i6addr_t is_dst; u_int is_pass; u_char is_p; /* Protocol */ - u_char is_v; /* IP version */ - u_char is_fsm; /* 1 = following FSM, 0 = not */ - u_char is_xxx; /* pad */ - u_int is_hv; /* hash value for this in the table */ - u_32_t is_rulen; /* rule number */ - u_32_t is_flags; /* flags for this structure */ + u_char is_v; + u_32_t is_hv; + u_32_t is_tag; u_32_t is_opt; /* packet options set */ u_32_t is_optmsk; /* " " mask */ u_short is_sec; /* security options set */ @@ -84,37 +68,74 @@ typedef struct ipstate { u_short is_auth; /* authentication options set */ u_short is_authmsk; /* " " mask */ union { - icmpstate_t is_ics; - tcpstate_t is_ts; - udpstate_t is_us; + icmpinfo_t is_ics; + tcpinfo_t is_ts; + udpinfo_t is_us; + greinfo_t is_ug; } is_ps; - u_32_t is_group; - char is_ifname[4][IFNAMSIZ]; -#if SOLARIS || defined(__sgi) || (__FreeBSD_version >= 500043) - kmutex_t is_lock; -#endif + u_32_t is_flags; + int is_flx[2][2]; + u_32_t is_rulen; /* rule number when created */ + u_32_t is_s0[2]; + u_short is_smsk[2]; + char is_group[FR_GROUPLEN]; + char is_sbuf[2][16]; + char is_ifname[4][LIFNAMSIZ]; } ipstate_t; +#define is_die is_sti.tqe_die +#define is_state is_sti.tqe_state +#define is_touched is_sti.tqe_touched #define is_saddr is_src.in4.s_addr #define is_daddr is_dst.in4.s_addr #define is_icmp is_ps.is_ics -#define is_type is_icmp.ics_type -#define is_code is_icmp.ics_code +#define is_type is_icmp.ici_type +#define is_code is_icmp.ici_code #define is_tcp is_ps.is_ts #define is_udp is_ps.is_us #define is_send is_tcp.ts_data[0].td_end #define is_dend is_tcp.ts_data[1].td_end #define is_maxswin is_tcp.ts_data[0].td_maxwin #define is_maxdwin is_tcp.ts_data[1].td_maxwin -#define is_swscale is_tcp.ts_data[0].td_wscale -#define is_dwscale is_tcp.ts_data[1].td_wscale #define is_maxsend is_tcp.ts_data[0].td_maxend #define is_maxdend is_tcp.ts_data[1].td_maxend +#define is_swinscale is_tcp.ts_data[0].td_winscale +#define is_dwinscale is_tcp.ts_data[1].td_winscale +#define is_swinflags is_tcp.ts_data[0].td_winflags +#define is_dwinflags is_tcp.ts_data[1].td_winflags #define is_sport is_tcp.ts_sport #define is_dport is_tcp.ts_dport -#define is_state is_tcp.ts_state #define is_ifpin is_ifp[0] #define is_ifpout is_ifp[2] +#define is_gre is_ps.is_ug +#define is_call is_gre.gs_call + +#define IS_WSPORT SI_W_SPORT /* 0x00100 */ +#define IS_WDPORT SI_W_DPORT /* 0x00200 */ +#define IS_WSADDR SI_W_SADDR /* 0x00400 */ +#define IS_WDADDR SI_W_DADDR /* 0x00800 */ +#define IS_NEWFR SI_NEWFR /* 0x01000 */ +#define IS_CLONE SI_CLONE /* 0x02000 */ +#define IS_CLONED SI_CLONED /* 0x04000 */ +#define IS_TCPFSM 0x10000 +#define IS_STRICT 0x20000 +#define IS_ISNSYN 0x40000 +#define IS_ISNACK 0x80000 +#define IS_STATESYNC 0x100000 +/* + * IS_SC flags are for scan-operations that need to be recognised in state. + */ +#define IS_SC_CLIENT 0x10000000 +#define IS_SC_SERVER 0x20000000 +#define IS_SC_MATCHC 0x40000000 +#define IS_SC_MATCHS 0x80000000 +#define IS_SC_MATCHALL (IS_SC_MATCHC|IS_SC_MATCHC) +#define IS_SC_ALL (IS_SC_MATCHC|IS_SC_MATCHC|IS_SC_CLIENT|IS_SC_SERVER) + +/* + * Flags that can be passed into fr_addstate + */ +#define IS_INHERITED 0x0fffff00 #define TH_OPENING (TH_SYN|TH_ACK) /* @@ -124,6 +145,8 @@ typedef struct ipstate { * Bits 4 - 7 are set from the initial packet and contain what the packet * anded with bits 0-3 must match. * Bits 8,9 are used to indicate wildcard source/destination port matching. + * Bits 10,11 are reserved for other wildcard flag compatibility. + * Bits 12,13 are for scaning. */ typedef struct ipstate_save { @@ -136,10 +159,11 @@ typedef struct ipstate_save { typedef struct ipslog { - U_QUAD_T isl_pkts; - U_QUAD_T isl_bytes; - union i6addr isl_src; - union i6addr isl_dst; + U_QUAD_T isl_pkts[4]; + U_QUAD_T isl_bytes[4]; + i6addr_t isl_src; + i6addr_t isl_dst; + u_32_t isl_tag; u_short isl_type; union { u_short isl_filler[2]; @@ -151,23 +175,28 @@ typedef struct ipslog { u_char isl_flags; u_char isl_state[2]; u_32_t isl_rulen; - u_32_t isl_group; + char isl_group[FR_GROUPLEN]; } ipslog_t; #define isl_sport isl_ps.isl_ports[0] #define isl_dport isl_ps.isl_ports[1] #define isl_itype isl_ps.isl_icmp -#define ISL_NEW 0 -#define ISL_EXPIRE 0xffff -#define ISL_FLUSH 0xfffe -#define ISL_REMOVE 0xfffd +#define ISL_NEW 0 +#define ISL_CLONE 1 +#define ISL_EXPIRE 0xffff +#define ISL_FLUSH 0xfffe +#define ISL_REMOVE 0xfffd +#define ISL_INTERMEDIATE 0xfffc +#define ISL_KILLED 0xfffb +#define ISL_ORPHAN 0xfffa typedef struct ips_stat { u_long iss_hits; u_long iss_miss; u_long iss_max; + u_long iss_maxref; u_long iss_tcp; u_long iss_udp; u_long iss_icmp; @@ -178,8 +207,15 @@ typedef struct ips_stat { u_long iss_logged; u_long iss_logfail; u_long iss_inuse; + u_long iss_wild; + u_long iss_killed; + u_long iss_ticks; + u_long iss_bucketfull; + int iss_statesize; + int iss_statemax; ipstate_t **iss_table; ipstate_t *iss_list; + u_long *iss_bucketlen; } ips_stat_t; @@ -193,21 +229,34 @@ extern u_long fr_udptimeout; extern u_long fr_udpacktimeout; extern u_long fr_icmptimeout; extern u_long fr_icmpacktimeout; -extern ipstate_t *ips_list; +extern u_long fr_iptimeout; +extern int fr_statemax; +extern int fr_statesize; extern int fr_state_lock; +extern int fr_state_maxbucket; +extern int fr_state_maxbucket_reset; +extern ipstate_t *ips_list; +extern ipftq_t *ips_utqe; +extern ipftq_t ips_tqtqb[IPF_TCP_NSTATES]; + extern int fr_stateinit __P((void)); -extern int fr_tcpstate __P((ipstate_t *, fr_info_t *, ip_t *, tcphdr_t *)); -extern ipstate_t *fr_addstate __P((ip_t *, fr_info_t *, ipstate_t **, u_int)); -extern frentry_t *fr_checkstate __P((ip_t *, fr_info_t *)); -extern void ip_statesync __P((void *)); +extern ipstate_t *fr_addstate __P((fr_info_t *, ipstate_t **, u_int)); +extern frentry_t *fr_checkstate __P((struct fr_info *, u_32_t *)); +extern ipstate_t *fr_stlookup __P((fr_info_t *, tcphdr_t *, ipftq_t **)); +extern void fr_statesync __P((void *)); extern void fr_timeoutstate __P((void)); -extern int fr_tcp_age __P((u_long *, u_char *, fr_info_t *, int, int)); +extern int fr_tcp_age __P((struct ipftqent *, struct fr_info *, + struct ipftq *, int)); +extern int fr_tcpinwindow __P((struct fr_info *, struct tcpdata *, + struct tcpdata *, tcphdr_t *, int)); extern void fr_stateunload __P((void)); extern void ipstate_log __P((struct ipstate *, u_int)); -#if defined(__NetBSD__) || defined(__OpenBSD__) -extern int fr_state_ioctl __P((caddr_t, u_long, int)); -#else -extern int fr_state_ioctl __P((caddr_t, int, int)); -#endif +extern int fr_state_ioctl __P((caddr_t, ioctlcmd_t, int)); +extern void fr_stinsert __P((struct ipstate *, int)); +extern void fr_sttab_init __P((struct ipftq *)); +extern void fr_sttab_destroy __P((struct ipftq *)); +extern void fr_updatestate __P((fr_info_t *, ipstate_t *, ipftq_t *)); +extern void fr_statederef __P((fr_info_t *, ipstate_t **)); +extern void fr_setstatequeue __P((ipstate_t *, int)); #endif /* __IP_STATE_H__ */ diff --git a/sys/contrib/ipfilter/netinet/ipl.h b/sys/contrib/ipfilter/netinet/ipl.h index 31dfa15..364423c 100644 --- a/sys/contrib/ipfilter/netinet/ipl.h +++ b/sys/contrib/ipfilter/netinet/ipl.h @@ -1,15 +1,20 @@ +/* $FreeBSD$ */ + /* - * Copyright (C) 1993-2002 by Darren Reed. + * Copyright (C) 1993-2001, 2003 by Darren Reed. * * See the IPFILTER.LICENCE file for details on licencing. * * @(#)ipl.h 1.21 6/5/96 * $FreeBSD$ + * Id: ipl.h,v 2.52.2.9 2005/03/30 14:14:05 darrenr Exp */ #ifndef __IPL_H__ #define __IPL_H__ -#define IPL_VERSION "IP Filter: v3.4.35" +#define IPL_VERSION "IP Filter: v4.1.8" + +#define IPFILTER_VERSION 4010800 #endif diff --git a/sys/contrib/ipfilter/netinet/mlfk_ipl.c b/sys/contrib/ipfilter/netinet/mlfk_ipl.c index dc5a1c8..2869697 100644 --- a/sys/contrib/ipfilter/netinet/mlfk_ipl.c +++ b/sys/contrib/ipfilter/netinet/mlfk_ipl.c @@ -1,29 +1,10 @@ +/* $FreeBSD$ */ + /* - * Copyright 1999 Guido van Rooij. All rights reserved. - * - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER ``AS IS'' AND ANY EXPRESS - * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE HOLDER OR CONTRIBUTORS BE LIABLE FOR - * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR - * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. + * Copyright (C) 2000 by Darren Reed. * * $FreeBSD$ + * See the IPFILTER.LICENCE file for details on licencing. */ @@ -37,13 +18,6 @@ #include #include #include -#include -#if (__FreeBSD_version >= 199511) -# include -# include -# include -# include -#endif #include @@ -53,156 +27,248 @@ #include #include #include -#include -static struct cdev *ipf_devs[IPL_LOGMAX + 1]; +#if __FreeBSD_version >= 502116 +static struct cdev *ipf_devs[IPL_LOGSIZE]; +#else +static dev_t ipf_devs[IPL_LOGSIZE]; +#endif + +static int sysctl_ipf_int ( SYSCTL_HANDLER_ARGS ); +static int ipf_modload(void); +static int ipf_modunload(void); SYSCTL_DECL(_net_inet); +#define SYSCTL_IPF(parent, nbr, name, access, ptr, val, descr) \ + SYSCTL_OID(parent, nbr, name, CTLTYPE_INT|access, \ + ptr, val, sysctl_ipf_int, "I", descr); +#define CTLFLAG_OFF 0x00800000 /* IPFilter must be disabled */ +#define CTLFLAG_RWO (CTLFLAG_RW|CTLFLAG_OFF) SYSCTL_NODE(_net_inet, OID_AUTO, ipf, CTLFLAG_RW, 0, "IPF"); -SYSCTL_INT(_net_inet_ipf, OID_AUTO, fr_flags, CTLFLAG_RW, &fr_flags, 0, ""); -SYSCTL_INT(_net_inet_ipf, OID_AUTO, fr_pass, CTLFLAG_RW, &fr_pass, 0, ""); -SYSCTL_INT(_net_inet_ipf, OID_AUTO, fr_active, CTLFLAG_RD, &fr_active, 0, ""); -SYSCTL_INT(_net_inet_ipf, OID_AUTO, fr_tcpidletimeout, CTLFLAG_RW, +SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_flags, CTLFLAG_RW, &fr_flags, 0, ""); +SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_pass, CTLFLAG_RW, &fr_pass, 0, ""); +SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_active, CTLFLAG_RD, &fr_active, 0, ""); +SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_tcpidletimeout, CTLFLAG_RWO, &fr_tcpidletimeout, 0, ""); -SYSCTL_INT(_net_inet_ipf, OID_AUTO, fr_tcpclosewait, CTLFLAG_RW, +SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_tcphalfclosed, CTLFLAG_RWO, + &fr_tcphalfclosed, 0, ""); +SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_tcpclosewait, CTLFLAG_RWO, &fr_tcpclosewait, 0, ""); -SYSCTL_INT(_net_inet_ipf, OID_AUTO, fr_tcplastack, CTLFLAG_RW, +SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_tcplastack, CTLFLAG_RWO, &fr_tcplastack, 0, ""); -SYSCTL_INT(_net_inet_ipf, OID_AUTO, fr_tcptimeout, CTLFLAG_RW, +SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_tcptimeout, CTLFLAG_RWO, &fr_tcptimeout, 0, ""); -SYSCTL_INT(_net_inet_ipf, OID_AUTO, fr_tcpclosed, CTLFLAG_RW, +SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_tcpclosed, CTLFLAG_RWO, &fr_tcpclosed, 0, ""); -SYSCTL_INT(_net_inet_ipf, OID_AUTO, fr_tcphalfclosed, CTLFLAG_RW, - &fr_tcphalfclosed, 0, ""); -SYSCTL_INT(_net_inet_ipf, OID_AUTO, fr_udptimeout, CTLFLAG_RW, +SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_udptimeout, CTLFLAG_RWO, &fr_udptimeout, 0, ""); -SYSCTL_INT(_net_inet_ipf, OID_AUTO, fr_udpacktimeout, CTLFLAG_RW, +SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_udpacktimeout, CTLFLAG_RWO, &fr_udpacktimeout, 0, ""); -SYSCTL_INT(_net_inet_ipf, OID_AUTO, fr_icmptimeout, CTLFLAG_RW, +SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_icmptimeout, CTLFLAG_RWO, &fr_icmptimeout, 0, ""); -SYSCTL_INT(_net_inet_ipf, OID_AUTO, fr_icmpacktimeout, CTLFLAG_RW, - &fr_icmpacktimeout, 0, ""); -SYSCTL_INT(_net_inet_ipf, OID_AUTO, fr_defnatage, CTLFLAG_RW, +SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_defnatage, CTLFLAG_RWO, &fr_defnatage, 0, ""); -SYSCTL_INT(_net_inet_ipf, OID_AUTO, fr_ipfrttl, CTLFLAG_RW, +SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_ipfrttl, CTLFLAG_RW, &fr_ipfrttl, 0, ""); -SYSCTL_INT(_net_inet_ipf, OID_AUTO, ipl_unreach, CTLFLAG_RW, - &ipl_unreach, 0, ""); -SYSCTL_INT(_net_inet_ipf, OID_AUTO, fr_running, CTLFLAG_RD, +SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_running, CTLFLAG_RD, &fr_running, 0, ""); -SYSCTL_INT(_net_inet_ipf, OID_AUTO, fr_authsize, CTLFLAG_RD, +SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_statesize, CTLFLAG_RWO, + &fr_statesize, 0, ""); +SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_statemax, CTLFLAG_RWO, + &fr_statemax, 0, ""); +SYSCTL_IPF(_net_inet_ipf, OID_AUTO, ipf_nattable_sz, CTLFLAG_RWO, + &ipf_nattable_sz, 0, ""); +SYSCTL_IPF(_net_inet_ipf, OID_AUTO, ipf_natrules_sz, CTLFLAG_RWO, + &ipf_natrules_sz, 0, ""); +SYSCTL_IPF(_net_inet_ipf, OID_AUTO, ipf_rdrrules_sz, CTLFLAG_RWO, + &ipf_rdrrules_sz, 0, ""); +SYSCTL_IPF(_net_inet_ipf, OID_AUTO, ipf_hostmap_sz, CTLFLAG_RWO, + &ipf_hostmap_sz, 0, ""); +SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_authsize, CTLFLAG_RWO, &fr_authsize, 0, ""); -SYSCTL_INT(_net_inet_ipf, OID_AUTO, fr_authused, CTLFLAG_RD, +SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_authused, CTLFLAG_RD, &fr_authused, 0, ""); -SYSCTL_INT(_net_inet_ipf, OID_AUTO, fr_defaultauthage, CTLFLAG_RW, +SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_defaultauthage, CTLFLAG_RW, &fr_defaultauthage, 0, ""); -SYSCTL_INT(_net_inet_ipf, OID_AUTO, fr_chksrc, CTLFLAG_RW, &fr_chksrc, 0, ""); -SYSCTL_INT(_net_inet_ipf, OID_AUTO, ippr_ftp_pasvonly, CTLFLAG_RW, - &ippr_ftp_pasvonly, 0, ""); -SYSCTL_INT(_net_inet_ipf, OID_AUTO, fr_minttl, CTLFLAG_RW, &fr_minttl, 0, ""); -SYSCTL_INT(_net_inet_ipf, OID_AUTO, fr_minttllog, CTLFLAG_RW, - &fr_minttllog, 0, ""); +SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_chksrc, CTLFLAG_RW, &fr_chksrc, 0, ""); +SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_minttl, CTLFLAG_RW, &fr_minttl, 0, ""); +#define CDEV_MAJOR 79 +#if __FreeBSD_version >= 501000 static struct cdevsw ipl_cdevsw = { +# if __FreeBSD_version >= 502103 .d_version = D_VERSION, - .d_flags = 0, + .d_flags = 0, /* D_NEEDGIANT - Should be SMP safe */ +# endif .d_open = iplopen, .d_close = iplclose, .d_read = iplread, .d_ioctl = iplioctl, .d_name = "ipl", +# if __FreeBSD_version < 600000 + .d_maj = CDEV_MAJOR, +# endif +}; +#else +static struct cdevsw ipl_cdevsw = { + /* open */ iplopen, + /* close */ iplclose, + /* read */ iplread, + /* write */ iplwrite, + /* ioctl */ iplioctl, + /* poll */ nopoll, + /* mmap */ nommap, + /* strategy */ nostrategy, + /* name */ "ipl", + /* maj */ CDEV_MAJOR, + /* dump */ nodump, + /* psize */ nopsize, + /* flags */ 0, +# if (__FreeBSD_version < 500043) + /* bmaj */ -1, +# endif + /* kqfilter */ NULL }; - -#if (__FreeBSD_version >= 500000) -kmutex_t ipl_mutex, ipf_rw; -KRWLOCK_T ipf_mutex, ipf_frag, ipf_state, ipf_nat, ipf_natfrag, ipf_auth; #endif +static char *ipf_devfiles[] = { IPL_NAME, IPNAT_NAME, IPSTATE_NAME, IPAUTH_NAME, + IPSYNC_NAME, IPSCAN_NAME, IPLOOKUP_NAME, NULL }; + + static int ipfilter_modevent(module_t mod, int type, void *unused) { - char *c; - int i, error = 0; + int error = 0; - switch (type) { + switch (type) + { case MOD_LOAD : + error = ipf_modload(); + break; - error = iplattach(); - if (error) - break; -#if (__FreeBSD_version >= 500000) - MUTEX_INIT(&ipl_mutex, "ipf log mutex", NULL); - MUTEX_INIT(&ipf_rw, "ipf rw mutex", NULL); - RWLOCK_INIT(&ipf_mutex, "ipf filter rwlock", NULL); - RWLOCK_INIT(&ipf_frag, "ipf fragment rwlock", NULL); - RWLOCK_INIT(&ipf_state, "ipf IP state rwlock", NULL); - RWLOCK_INIT(&ipf_nat, "ipf IP NAT rwlock", NULL); - RWLOCK_INIT(&ipf_natfrag, "ipf IP NAT-Frag rwlock", NULL); - RWLOCK_INIT(&ipf_auth, "ipf User-Auth rwlock", NULL); -#endif + case MOD_UNLOAD : + error = ipf_modunload(); + break; + default: + error = EINVAL; + break; + } + return error; +} - c = NULL; - for(i = strlen(IPL_NAME); i > 0; i--) - if (IPL_NAME[i] == '/') { - c = &IPL_NAME[i + 1]; - break; - } - if (!c) - c = IPL_NAME; - ipf_devs[IPL_LOGIPF] = - make_dev(&ipl_cdevsw, IPL_LOGIPF, 0, 0, 0600, c); - c = NULL; - for(i = strlen(IPL_NAT); i > 0; i--) - if (IPL_NAT[i] == '/') { - c = &IPL_NAT[i + 1]; - break; - } - if (!c) - c = IPL_NAT; - ipf_devs[IPL_LOGNAT] = - make_dev(&ipl_cdevsw, IPL_LOGNAT, 0, 0, 0600, c); +static int +ipf_modload() +{ + char *defpass, *c, *str; + int i, j, error; - c = NULL; - for(i = strlen(IPL_STATE); i > 0; i--) - if (IPL_STATE[i] == '/') { - c = &IPL_STATE[i + 1]; - break; - } - if (!c) - c = IPL_STATE; - ipf_devs[IPL_LOGSTATE] = - make_dev(&ipl_cdevsw, IPL_LOGSTATE, 0, 0, 0600, c); + error = iplattach(); + if (error) + return error; + + for (i = 0; i < IPL_LOGSIZE; i++) + ipf_devs[i] = NULL; + for (i = 0; (str = ipf_devfiles[i]); i++) { c = NULL; - for(i = strlen(IPL_AUTH); i > 0; i--) - if (IPL_AUTH[i] == '/') { - c = &IPL_AUTH[i + 1]; + for(j = strlen(str); j > 0; j--) + if (str[j] == '/') { + c = str + j + 1; break; } if (!c) - c = IPL_AUTH; - ipf_devs[IPL_LOGAUTH] = - make_dev(&ipl_cdevsw, IPL_LOGAUTH, 0, 0, 0600, c); + c = str; + ipf_devs[i] = make_dev(&ipl_cdevsw, i, 0, 0, 0600, c); + } - break; - case MOD_UNLOAD : - destroy_dev(ipf_devs[IPL_LOGIPF]); - destroy_dev(ipf_devs[IPL_LOGNAT]); - destroy_dev(ipf_devs[IPL_LOGSTATE]); - destroy_dev(ipf_devs[IPL_LOGAUTH]); + if (FR_ISPASS(fr_pass)) + defpass = "pass"; + else if (FR_ISBLOCK(fr_pass)) + defpass = "block"; + else + defpass = "no-match -> block"; + + printf("%s initialized. Default = %s all, Logging = %s%s\n", + ipfilter_version, defpass, +#ifdef IPFILTER_LOG + "enabled", +#else + "disabled", +#endif +#ifdef IPFILTER_COMPILED + " (COMPILED)" +#else + "" +#endif + ); + return 0; +} + + +static int +ipf_modunload() +{ + int error, i; + + if (fr_refcnt) + return EBUSY; + + if (fr_running >= 0) { error = ipldetach(); - break; - default: - error = EINVAL; - break; + if (error != 0) + return error; + } else + error = 0; + + fr_running = -2; + + for (i = 0; ipf_devfiles[i]; i++) { + if (ipf_devs[i] != NULL) + destroy_dev(ipf_devs[i]); } + + printf("%s unloaded\n", ipfilter_version); + return error; } + static moduledata_t ipfiltermod = { - IPL_VERSION, + "ipfilter", ipfilter_modevent, - 0 + 0 }; + + DECLARE_MODULE(ipfilter, ipfiltermod, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY); +#ifdef MODULE_VERSION +MODULE_VERSION(ipfilter, 1); +#endif + + +#ifdef SYSCTL_IPF +int +sysctl_ipf_int ( SYSCTL_HANDLER_ARGS ) +{ + int error = 0; + + if (arg1) + error = SYSCTL_OUT(req, arg1, sizeof(int)); + else + error = SYSCTL_OUT(req, &arg2, sizeof(int)); + + if (error || !req->newptr) + return (error); + + if (!arg1) + error = EPERM; + else { + if ((oidp->oid_kind & CTLFLAG_OFF) && (fr_running > 0)) + error = EBUSY; + else + error = SYSCTL_IN(req, arg1, sizeof(int)); + } + return (error); +} +#endif -- cgit v1.1