diff options
Diffstat (limited to 'sys/netinet/ip_nat.c')
-rw-r--r-- | sys/netinet/ip_nat.c | 2739 |
1 files changed, 2739 insertions, 0 deletions
diff --git a/sys/netinet/ip_nat.c b/sys/netinet/ip_nat.c new file mode 100644 index 0000000..816d8e7 --- /dev/null +++ b/sys/netinet/ip_nat.c @@ -0,0 +1,2739 @@ +/* + * Copyright (C) 1995-2000 by Darren Reed. + * + * Redistribution and use in source and binary forms are permitted + * provided that this notice is preserved and due credit is given + * to the original author and the contributors. + * + * Added redirect stuff and a LOT of bug fixes. (mcn@EnGarde.com) + */ +#if !defined(lint) +/*static const char rcsid[] = "@(#)$Id: ip_nat.c,v 2.37.2.16 2000/07/18 13:57:40 darrenr Exp $";*/ +static const char rcsid[] = "@(#)$FreeBSD$"; +#endif + +#if defined(__FreeBSD__) && defined(KERNEL) && !defined(_KERNEL) +#define _KERNEL +#endif + +#include <sys/errno.h> +#include <sys/types.h> +#include <sys/param.h> +#include <sys/time.h> +#include <sys/file.h> +#if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \ + defined(_KERNEL) +# include "opt_ipfilter_log.h" +#endif +#if !defined(_KERNEL) && !defined(KERNEL) +# include <stdio.h> +# include <string.h> +# include <stdlib.h> +#endif +#if (defined(KERNEL) || defined(_KERNEL)) && (__FreeBSD_version >= 220000) +# include <sys/filio.h> +# include <sys/fcntl.h> +#else +# include <sys/ioctl.h> +#endif +#include <sys/fcntl.h> +#include <sys/uio.h> +#ifndef linux +# include <sys/protosw.h> +#endif +#include <sys/socket.h> +#if defined(_KERNEL) && !defined(linux) +# include <sys/systm.h> +#endif +#if !defined(__SVR4) && !defined(__svr4__) +# ifndef linux +# include <sys/mbuf.h> +# endif +#else +# include <sys/filio.h> +# include <sys/byteorder.h> +# ifdef _KERNEL +# include <sys/dditypes.h> +# endif +# include <sys/stream.h> +# include <sys/kmem.h> +#endif +#if __FreeBSD_version >= 300000 +# include <sys/queue.h> +#endif +#include <net/if.h> +#if __FreeBSD_version >= 300000 +# include <net/if_var.h> +# if defined(_KERNEL) && !defined(IPFILTER_LKM) +# include "opt_ipfilter.h" +# endif +#endif +#ifdef sun +# include <net/af.h> +#endif +#include <net/route.h> +#include <netinet/in.h> +#include <netinet/in_systm.h> +#include <netinet/ip.h> + +#ifdef __sgi +# ifdef IFF_DRVRLOCK /* IRIX6 */ +#include <sys/hashing.h> +#include <netinet/in_var.h> +# endif +#endif + +#ifdef RFC1825 +# include <vpn/md5.h> +# include <vpn/ipsec.h> +extern struct ifnet vpnif; +#endif + +#ifndef linux +# include <netinet/ip_var.h> +#endif +#include <netinet/tcp.h> +#include <netinet/udp.h> +#include <netinet/ip_icmp.h> +#include "netinet/ip_compat.h" +#include <netinet/tcpip.h> +#include "netinet/ip_fil.h" +#include "netinet/ip_proxy.h" +#include "netinet/ip_nat.h" +#include "netinet/ip_frag.h" +#include "netinet/ip_state.h" +#if (__FreeBSD_version >= 300000) +# include <sys/malloc.h> +#endif +#ifndef MIN +# define MIN(a,b) (((a)<(b))?(a):(b)) +#endif +#undef SOCKADDR_IN +#define SOCKADDR_IN struct sockaddr_in + +nat_t **nat_table[2] = { NULL, NULL }, + *nat_instances = NULL; +ipnat_t *nat_list = NULL; +u_int ipf_nattable_sz = NAT_TABLE_SZ; +u_int ipf_natrules_sz = NAT_SIZE; +u_int ipf_rdrrules_sz = RDR_SIZE; +u_int ipf_hostmap_sz = HOSTMAP_SIZE; +u_32_t nat_masks = 0; +u_32_t rdr_masks = 0; +ipnat_t **nat_rules = NULL; +ipnat_t **rdr_rules = NULL; +hostmap_t **maptable = NULL; + +u_long fr_defnatage = DEF_NAT_AGE, + fr_defnaticmpage = 6; /* 3 seconds */ +natstat_t nat_stats; +int fr_nat_lock = 0; +#if (SOLARIS || defined(__sgi)) && defined(_KERNEL) +extern kmutex_t ipf_rw; +extern KRWLOCK_T ipf_nat; +#endif + +static int nat_flushtable __P((void)); +static int nat_clearlist __P((void)); +static void nat_addnat __P((struct ipnat *)); +static void nat_addrdr __P((struct ipnat *)); +static void nat_delete __P((struct nat *)); +static void nat_delrdr __P((struct ipnat *)); +static void nat_delnat __P((struct ipnat *)); +static int fr_natgetent __P((caddr_t)); +static int fr_natgetsz __P((caddr_t)); +static int fr_natputent __P((caddr_t)); +static void nat_tabmove __P((nat_t *, u_32_t)); +static int nat_match __P((fr_info_t *, ipnat_t *, ip_t *)); +static hostmap_t *nat_hostmap __P((ipnat_t *, struct in_addr, + struct in_addr)); +static void nat_hostmapdel __P((struct hostmap *)); + + +int nat_init() +{ + KMALLOCS(nat_table[0], nat_t **, sizeof(nat_t *) * ipf_nattable_sz); + if (nat_table[0] != NULL) + bzero((char *)nat_table[0], ipf_nattable_sz * sizeof(nat_t *)); + else + return -1; + + KMALLOCS(nat_table[1], nat_t **, sizeof(nat_t *) * ipf_nattable_sz); + if (nat_table[1] != NULL) + bzero((char *)nat_table[1], ipf_nattable_sz * sizeof(nat_t *)); + else + return -1; + + KMALLOCS(nat_rules, ipnat_t **, sizeof(ipnat_t *) * ipf_natrules_sz); + if (nat_rules != NULL) + bzero((char *)nat_rules, ipf_natrules_sz * sizeof(ipnat_t *)); + else + return -1; + + KMALLOCS(rdr_rules, ipnat_t **, sizeof(ipnat_t *) * ipf_rdrrules_sz); + if (rdr_rules != NULL) + bzero((char *)rdr_rules, ipf_rdrrules_sz * sizeof(ipnat_t *)); + else + return -1; + + KMALLOCS(maptable, hostmap_t **, sizeof(hostmap_t *) * ipf_hostmap_sz); + if (maptable != NULL) + bzero((char *)maptable, sizeof(hostmap_t *) * ipf_hostmap_sz); + else + return -1; + return 0; +} + + +static void nat_addrdr(n) +ipnat_t *n; +{ + ipnat_t **np; + u_32_t j; + u_int hv; + int k; + + k = countbits(n->in_outmsk); + if ((k >= 0) && (k != 32)) + rdr_masks |= 1 << k; + j = (n->in_outip & n->in_outmsk); + hv = NAT_HASH_FN(j, 0, ipf_rdrrules_sz); + np = rdr_rules + hv; + while (*np != NULL) + np = &(*np)->in_rnext; + n->in_rnext = NULL; + n->in_prnext = np; + *np = n; +} + + +static void nat_addnat(n) +ipnat_t *n; +{ + ipnat_t **np; + u_32_t j; + u_int hv; + int k; + + k = countbits(n->in_inmsk); + if ((k >= 0) && (k != 32)) + nat_masks |= 1 << k; + j = (n->in_inip & n->in_inmsk); + hv = NAT_HASH_FN(j, 0, ipf_natrules_sz); + np = nat_rules + hv; + while (*np != NULL) + np = &(*np)->in_mnext; + n->in_mnext = NULL; + n->in_pmnext = np; + *np = n; +} + + +static void nat_delrdr(n) +ipnat_t *n; +{ + if (n->in_rnext) + n->in_rnext->in_prnext = n->in_prnext; + *n->in_prnext = n->in_rnext; +} + + +static void nat_delnat(n) +ipnat_t *n; +{ + if (n->in_mnext) + n->in_mnext->in_pmnext = n->in_pmnext; + *n->in_pmnext = n->in_mnext; +} + + +/* + * check if an ip address has already been allocated for a given mapping that + * is not doing port based translation. + * + * Must be called with ipf_nat held as a write lock. + */ +static struct hostmap *nat_hostmap(np, real, map) +ipnat_t *np; +struct in_addr real; +struct in_addr map; +{ + hostmap_t *hm; + u_int hv; + + hv = real.s_addr % HOSTMAP_SIZE; + for (hm = maptable[hv]; hm; hm = hm->hm_next) + if ((hm->hm_realip.s_addr == real.s_addr) && + (np == hm->hm_ipnat)) { + hm->hm_ref++; + return hm; + } + + KMALLOC(hm, hostmap_t *); + if (hm) { + hm->hm_next = maptable[hv]; + hm->hm_pnext = maptable + hv; + if (maptable[hv]) + maptable[hv]->hm_pnext = &hm->hm_next; + maptable[hv] = hm; + hm->hm_ipnat = np; + hm->hm_realip = real; + hm->hm_mapip = map; + hm->hm_ref = 1; + } + return hm; +} + + +/* + * Must be called with ipf_nat held as a write lock. + */ +static void nat_hostmapdel(hm) +struct hostmap *hm; +{ + ATOMIC_DEC32(hm->hm_ref); + if (hm->hm_ref == 0) { + if (hm->hm_next) + hm->hm_next->hm_pnext = hm->hm_pnext; + *hm->hm_pnext = hm->hm_next; + KFREE(hm); + } +} + + +void fix_outcksum(sp, n) +u_short *sp; +u_32_t n; +{ + register u_short sumshort; + register u_32_t sum1; + + if (!n) + return; +#if SOLARIS2 >= 6 + else if (n & NAT_HW_CKSUM) { + *sp = n & 0xffff; + return; + } +#endif + sum1 = (~ntohs(*sp)) & 0xffff; + sum1 += (n); + sum1 = (sum1 >> 16) + (sum1 & 0xffff); + /* Again */ + sum1 = (sum1 >> 16) + (sum1 & 0xffff); + sumshort = ~(u_short)sum1; + *(sp) = htons(sumshort); +} + + +void fix_incksum(sp, n) +u_short *sp; +u_32_t n; +{ + register u_short sumshort; + register u_32_t sum1; + + if (!n) + return; +#if SOLARIS2 >= 6 + else if (n & NAT_HW_CKSUM) { + *sp = n & 0xffff; + return; + } +#endif +#ifdef sparc + sum1 = (~(*sp)) & 0xffff; +#else + sum1 = (~ntohs(*sp)) & 0xffff; +#endif + sum1 += ~(n) & 0xffff; + sum1 = (sum1 >> 16) + (sum1 & 0xffff); + /* Again */ + sum1 = (sum1 >> 16) + (sum1 & 0xffff); + sumshort = ~(u_short)sum1; + *(sp) = htons(sumshort); +} + + +/* + * fix_datacksum is used *only* for the adjustments of checksums in the data + * section of an IP packet. + * + * The only situation in which you need to do this is when NAT'ing an + * ICMP error message. Such a message, contains in its body the IP header + * of the original IP packet, that causes the error. + * + * You can't use fix_incksum or fix_outcksum in that case, because for the + * kernel the data section of the ICMP error is just data, and no special + * processing like hardware cksum or ntohs processing have been done by the + * kernel on the data section. + */ +void fix_datacksum(sp, n) +u_short *sp; +u_32_t n; +{ + register u_short sumshort; + register u_32_t sum1; + + if (!n) + return; + + sum1 = (~ntohs(*sp)) & 0xffff; + sum1 += (n); + sum1 = (sum1 >> 16) + (sum1 & 0xffff); + /* Again */ + sum1 = (sum1 >> 16) + (sum1 & 0xffff); + sumshort = ~(u_short)sum1; + *(sp) = htons(sumshort); +} + +/* + * How the NAT is organised and works. + * + * Inside (interface y) NAT Outside (interface x) + * -------------------- -+- ------------------------------------- + * Packet going | out, processsed by ip_natout() for x + * ------------> | ------------> + * src=10.1.1.1 | src=192.1.1.1 + * | + * | in, processed by ip_natin() for x + * <------------ | <------------ + * dst=10.1.1.1 | dst=192.1.1.1 + * -------------------- -+- ------------------------------------- + * ip_natout() - changes ip_src and if required, sport + * - creates a new mapping, if required. + * ip_natin() - changes ip_dst and if required, dport + * + * In the NAT table, internal source is recorded as "in" and externally + * seen as "out". + */ + +/* + * Handle ioctls which manipulate the NAT. + */ +int nat_ioctl(data, cmd, mode) +#if defined(__NetBSD__) || defined(__OpenBSD__) || (__FreeBSD_version >= 300003) +u_long cmd; +#else +int cmd; +#endif +caddr_t data; +int mode; +{ + register ipnat_t *nat, *nt, *n = NULL, **np = NULL; + int error = 0, ret, arg; + ipnat_t natd; + u_32_t i, j; + +#if (BSD >= 199306) && defined(_KERNEL) + if ((securelevel >= 2) && (mode & FWRITE)) + return EPERM; +#endif + + nat = NULL; /* XXX gcc -Wuninitialized */ + KMALLOC(nt, ipnat_t *); + if ((cmd == SIOCADNAT) || (cmd == SIOCRMNAT)) + error = IRCOPYPTR(data, (char *)&natd, sizeof(natd)); + else if (cmd == SIOCIPFFL) { /* SIOCFLNAT & SIOCCNATL */ + error = IRCOPY(data, (char *)&arg, sizeof(arg)); + if (error) + error = EFAULT; + } + + if (error) + goto done; + + /* + * For add/delete, look to see if the NAT entry is already present + */ + WRITE_ENTER(&ipf_nat); + if ((cmd == SIOCADNAT) || (cmd == SIOCRMNAT)) { + nat = &natd; + nat->in_flags &= IPN_USERFLAGS; + if ((nat->in_redir & NAT_MAPBLK) == 0) { + if ((nat->in_flags & IPN_SPLIT) == 0) + nat->in_inip &= nat->in_inmsk; + if ((nat->in_flags & IPN_IPRANGE) == 0) + nat->in_outip &= nat->in_outmsk; + } + for (np = &nat_list; (n = *np); np = &n->in_next) + if (!bcmp((char *)&nat->in_flags, (char *)&n->in_flags, + IPN_CMPSIZ)) + break; + } + + switch (cmd) + { +#ifdef IPFILTER_LOG + case SIOCIPFFB : + { + int tmp; + + if (!(mode & FWRITE)) + error = EPERM; + else { + tmp = ipflog_clear(IPL_LOGNAT); + IWCOPY((char *)&tmp, (char *)data, sizeof(tmp)); + } + break; + } +#endif + case SIOCADNAT : + if (!(mode & FWRITE)) { + error = EPERM; + break; + } + if (n) { + error = EEXIST; + break; + } + if (nt == NULL) { + error = ENOMEM; + break; + } + n = nt; + nt = NULL; + bcopy((char *)nat, (char *)n, sizeof(*n)); + n->in_ifp = (void *)GETUNIT(n->in_ifname, 4); + if (!n->in_ifp) + n->in_ifp = (void *)-1; + if (n->in_plabel[0] != '\0') { + n->in_apr = appr_match(n->in_p, n->in_plabel); + if (!n->in_apr) { + error = ENOENT; + break; + } + } + n->in_next = NULL; + *np = n; + + if (n->in_redir & NAT_REDIRECT) { + n->in_flags &= ~IPN_NOTDST; + nat_addrdr(n); + } + if (n->in_redir & (NAT_MAP|NAT_MAPBLK)) { + n->in_flags &= ~IPN_NOTSRC; + nat_addnat(n); + } + + n->in_use = 0; + if (n->in_redir & NAT_MAPBLK) + n->in_space = USABLE_PORTS * ~ntohl(n->in_outmsk); + else if (n->in_flags & IPN_AUTOPORTMAP) + n->in_space = USABLE_PORTS * ~ntohl(n->in_inmsk); + else if (n->in_flags & IPN_IPRANGE) + n->in_space = ntohl(n->in_outmsk) - ntohl(n->in_outip); + else if (n->in_flags & IPN_SPLIT) + n->in_space = 2; + else + n->in_space = ~ntohl(n->in_outmsk); + /* + * Calculate the number of valid IP addresses in the output + * mapping range. In all cases, the range is inclusive of + * the start and ending IP addresses. + * If to a CIDR address, lose 2: broadcast + network address + * (so subtract 1) + * If to a range, add one. + * If to a single IP address, set to 1. + */ + if (n->in_space) { + if ((n->in_flags & IPN_IPRANGE) != 0) + n->in_space += 1; + else + n->in_space -= 1; + } else + n->in_space = 1; + if ((n->in_outmsk != 0xffffffff) && (n->in_outmsk != 0) && + ((n->in_flags & (IPN_IPRANGE|IPN_SPLIT)) == 0)) + n->in_nip = ntohl(n->in_outip) + 1; + else if ((n->in_flags & IPN_SPLIT) && + (n->in_redir & NAT_REDIRECT)) + n->in_nip = ntohl(n->in_inip); + else + n->in_nip = ntohl(n->in_outip); + if (n->in_redir & NAT_MAP) { + n->in_pnext = ntohs(n->in_pmin); + /* + * Multiply by the number of ports made available. + */ + if (ntohs(n->in_pmax) >= ntohs(n->in_pmin)) { + n->in_space *= (ntohs(n->in_pmax) - + ntohs(n->in_pmin) + 1); + /* + * Because two different sources can map to + * different destinations but use the same + * local IP#/port #. + * If the result is smaller than in_space, then + * we may have wrapped around 32bits. + */ + i = n->in_inmsk; + if ((i != 0) && (i != 0xffffffff)) { + j = n->in_space * (~ntohl(i) + 1); + if (j >= n->in_space) + n->in_space = j; + else + n->in_space = 0xffffffff; + } + } + /* + * If no protocol is specified, multiple by 256. + */ + if ((n->in_flags & IPN_TCPUDP) == 0) { + j = n->in_space * 256; + if (j >= n->in_space) + n->in_space = j; + else + n->in_space = 0xffffffff; + } + } + /* Otherwise, these fields are preset */ + n = NULL; + nat_stats.ns_rules++; + break; + case SIOCRMNAT : + if (!(mode & FWRITE)) { + error = EPERM; + n = NULL; + break; + } + if (!n) { + error = ESRCH; + break; + } + if (n->in_redir & NAT_REDIRECT) + nat_delrdr(n); + if (n->in_redir & (NAT_MAPBLK|NAT_MAP)) + nat_delnat(n); + if (nat_list == NULL) { + nat_masks = 0; + rdr_masks = 0; + } + *np = n->in_next; + if (!n->in_use) { + if (n->in_apr) + appr_free(n->in_apr); + KFREE(n); + nat_stats.ns_rules--; + } else { + n->in_flags |= IPN_DELETE; + n->in_next = NULL; + } + n = NULL; + break; + case SIOCGNATS : + MUTEX_DOWNGRADE(&ipf_nat); + nat_stats.ns_table[0] = nat_table[0]; + nat_stats.ns_table[1] = nat_table[1]; + nat_stats.ns_list = nat_list; + nat_stats.ns_nattab_sz = ipf_nattable_sz; + nat_stats.ns_rultab_sz = ipf_natrules_sz; + nat_stats.ns_rdrtab_sz = ipf_rdrrules_sz; + nat_stats.ns_instances = nat_instances; + nat_stats.ns_apslist = ap_sess_list; + error = IWCOPYPTR((char *)&nat_stats, (char *)data, + sizeof(nat_stats)); + break; + case SIOCGNATL : + { + natlookup_t nl; + + MUTEX_DOWNGRADE(&ipf_nat); + error = IRCOPYPTR((char *)data, (char *)&nl, sizeof(nl)); + if (error) + break; + + if (nat_lookupredir(&nl)) { + error = IWCOPYPTR((char *)&nl, (char *)data, + sizeof(nl)); + } else + error = ESRCH; + break; + } + case SIOCIPFFL : /* old SIOCFLNAT & SIOCCNATL */ + if (!(mode & FWRITE)) { + error = EPERM; + break; + } + error = 0; + if (arg == 0) + ret = nat_flushtable(); + else if (arg == 1) + ret = nat_clearlist(); + else + error = EINVAL; + MUTEX_DOWNGRADE(&ipf_nat); + if (!error) { + error = IWCOPY((caddr_t)&ret, data, sizeof(ret)); + if (error) + error = EFAULT; + } + break; + case SIOCSTLCK : + error = IRCOPY(data, (caddr_t)&arg, sizeof(arg)); + if (!error) { + error = IWCOPY((caddr_t)&fr_nat_lock, data, + sizeof(fr_nat_lock)); + if (!error) + fr_nat_lock = arg; + } else + error = EFAULT; + break; + case SIOCSTPUT : + if (fr_nat_lock) + error = fr_natputent(data); + else + error = EACCES; + break; + case SIOCSTGSZ : + if (fr_nat_lock) + error = fr_natgetsz(data); + else + error = EACCES; + break; + case SIOCSTGET : + if (fr_nat_lock) + error = fr_natgetent(data); + else + error = EACCES; + break; + case FIONREAD : +#ifdef IPFILTER_LOG + arg = (int)iplused[IPL_LOGNAT]; + MUTEX_DOWNGRADE(&ipf_nat); + error = IWCOPY((caddr_t)&arg, (caddr_t)data, sizeof(arg)); + if (error) + error = EFAULT; +#endif + break; + default : + error = EINVAL; + break; + } + RWLOCK_EXIT(&ipf_nat); /* READ/WRITE */ +done: + if (nt) + KFREE(nt); + return error; +} + + +static int fr_natgetsz(data) +caddr_t data; +{ + ap_session_t *aps; + nat_t *nat, *n; + int error = 0; + natget_t ng; + + error = IRCOPY(data, (caddr_t)&ng, sizeof(ng)); + if (error) + return EFAULT; + + nat = ng.ng_ptr; + if (!nat) { + nat = nat_instances; + ng.ng_sz = 0; + if (nat == NULL) { + error = IWCOPY((caddr_t)&ng, data, sizeof(ng)); + if (error) + error = EFAULT; + return error; + } + } else { + /* + * Make sure the pointer we're copying from exists in the + * current list of entries. Security precaution to prevent + * copying of random kernel data. + */ + for (n = nat_instances; n; n = n->nat_next) + if (n == nat) + break; + if (!n) + return ESRCH; + } + + ng.ng_sz = sizeof(nat_save_t); + aps = nat->nat_aps; + if ((aps != NULL) && (aps->aps_data != 0)) { + ng.ng_sz += sizeof(ap_session_t); + ng.ng_sz += aps->aps_psiz; + } + + error = IWCOPY((caddr_t)&ng, data, sizeof(ng)); + if (error) + error = EFAULT; + return error; +} + + +static int fr_natgetent(data) +caddr_t data; +{ + nat_save_t ipn, *ipnp, *ipnn = NULL; + register nat_t *n, *nat; + ap_session_t *aps; + int error; + + error = IRCOPY(data, (caddr_t)&ipnp, sizeof(ipnp)); + if (error) + return EFAULT; + error = IRCOPY((caddr_t)ipnp, (caddr_t)&ipn, sizeof(ipn)); + if (error) + return EFAULT; + + nat = ipn.ipn_next; + if (!nat) { + nat = nat_instances; + if (nat == NULL) { + if (nat_instances == NULL) + return ENOENT; + return 0; + } + } else { + /* + * Make sure the pointer we're copying from exists in the + * current list of entries. Security precaution to prevent + * copying of random kernel data. + */ + for (n = nat_instances; n; n = n->nat_next) + if (n == nat) + break; + if (!n) + return ESRCH; + } + + ipn.ipn_next = nat->nat_next; + ipn.ipn_dsize = 0; + bcopy((char *)nat, (char *)&ipn.ipn_nat, sizeof(ipn.ipn_nat)); + ipn.ipn_nat.nat_data = NULL; + + if (nat->nat_ptr) { + bcopy((char *)nat->nat_ptr, (char *)&ipn.ipn_ipnat, + sizeof(ipn.ipn_ipnat)); + } + + if (nat->nat_fr) + bcopy((char *)nat->nat_fr, (char *)&ipn.ipn_rule, + sizeof(ipn.ipn_rule)); + + if ((aps = nat->nat_aps)) { + ipn.ipn_dsize = sizeof(*aps); + if (aps->aps_data) + ipn.ipn_dsize += aps->aps_psiz; + KMALLOCS(ipnn, nat_save_t *, sizeof(*ipnn) + ipn.ipn_dsize); + if (ipnn == NULL) + return ENOMEM; + bcopy((char *)&ipn, (char *)ipnn, sizeof(ipn)); + + bcopy((char *)aps, ipnn->ipn_data, sizeof(*aps)); + if (aps->aps_data) { + bcopy(aps->aps_data, ipnn->ipn_data + sizeof(*aps), + aps->aps_psiz); + ipnn->ipn_dsize += aps->aps_psiz; + } + error = IWCOPY((caddr_t)ipnn, ipnp, + sizeof(ipn) + ipn.ipn_dsize); + if (error) + error = EFAULT; + KFREES(ipnn, sizeof(*ipnn) + ipn.ipn_dsize); + } else { + error = IWCOPY((caddr_t)&ipn, ipnp, sizeof(ipn)); + if (error) + error = EFAULT; + } + return error; +} + + +static int fr_natputent(data) +caddr_t data; +{ + nat_save_t ipn, *ipnp, *ipnn = NULL; + register nat_t *n, *nat; + ap_session_t *aps; + frentry_t *fr; + ipnat_t *in; + + int error; + + error = IRCOPY(data, (caddr_t)&ipnp, sizeof(ipnp)); + if (error) + return EFAULT; + error = IRCOPY((caddr_t)ipnp, (caddr_t)&ipn, sizeof(ipn)); + if (error) + return EFAULT; + nat = NULL; + if (ipn.ipn_dsize) { + KMALLOCS(ipnn, nat_save_t *, sizeof(ipn) + ipn.ipn_dsize); + if (ipnn == NULL) + return ENOMEM; + bcopy((char *)&ipn, (char *)ipnn, sizeof(ipn)); + error = IRCOPY((caddr_t)ipnp, (caddr_t)ipn.ipn_data, + ipn.ipn_dsize); + if (error) { + error = EFAULT; + goto junkput; + } + } else + ipnn = NULL; + + KMALLOC(nat, nat_t *); + if (nat == NULL) { + error = EFAULT; + goto junkput; + } + + bcopy((char *)&ipn.ipn_nat, (char *)nat, sizeof(*nat)); + /* + * Initialize all these so that nat_delete() doesn't cause a crash. + */ + nat->nat_phnext[0] = NULL; + nat->nat_phnext[1] = NULL; + fr = nat->nat_fr; + nat->nat_fr = NULL; + aps = nat->nat_aps; + nat->nat_aps = NULL; + in = nat->nat_ptr; + nat->nat_ptr = NULL; + nat->nat_data = NULL; + + /* + * Restore the rule associated with this nat session + */ + if (in) { + KMALLOC(in, ipnat_t *); + if (in == NULL) { + error = ENOMEM; + goto junkput; + } + nat->nat_ptr = in; + bcopy((char *)&ipn.ipn_ipnat, (char *)in, sizeof(*in)); + in->in_use = 1; + in->in_flags |= IPN_DELETE; + in->in_next = NULL; + in->in_rnext = NULL; + in->in_prnext = NULL; + in->in_mnext = NULL; + in->in_pmnext = NULL; + in->in_ifp = GETUNIT(in->in_ifname, 4); + if (in->in_plabel[0] != '\0') { + in->in_apr = appr_match(in->in_p, in->in_plabel); + } + } + + /* + * Restore ap_session_t structure. Include the private data allocated + * if it was there. + */ + if (aps) { + KMALLOC(aps, ap_session_t *); + if (aps == NULL) { + error = ENOMEM; + goto junkput; + } + nat->nat_aps = aps; + aps->aps_next = ap_sess_list; + ap_sess_list = aps; + bcopy(ipnn->ipn_data, (char *)aps, sizeof(*aps)); + if (in) + aps->aps_apr = in->in_apr; + if (aps->aps_psiz) { + KMALLOCS(aps->aps_data, void *, aps->aps_psiz); + if (aps->aps_data == NULL) { + error = ENOMEM; + goto junkput; + } + bcopy(ipnn->ipn_data + sizeof(*aps), aps->aps_data, + aps->aps_psiz); + } else { + aps->aps_psiz = 0; + aps->aps_data = NULL; + } + } + + /* + * If there was a filtering rule associated with this entry then + * build up a new one. + */ + if (fr != NULL) { + if (nat->nat_flags & FI_NEWFR) { + KMALLOC(fr, frentry_t *); + nat->nat_fr = fr; + if (fr == NULL) { + error = ENOMEM; + goto junkput; + } + bcopy((char *)&ipn.ipn_fr, (char *)fr, sizeof(*fr)); + ipn.ipn_nat.nat_fr = fr; + error = IWCOPY((caddr_t)&ipn, ipnp, sizeof(ipn)); + if (error) { + error = EFAULT; + goto junkput; + } + } else { + for (n = nat_instances; n; n = n->nat_next) + if (n->nat_fr == fr) + break; + if (!n) { + error = ESRCH; + goto junkput; + } + } + } + + if (ipnn) + KFREES(ipnn, sizeof(ipn) + ipn.ipn_dsize); + nat_insert(nat); + return 0; +junkput: + if (ipnn) + KFREES(ipnn, sizeof(ipn) + ipn.ipn_dsize); + if (nat) + nat_delete(nat); + return error; +} + + +/* + * Delete a nat entry from the various lists and table. + */ +static void nat_delete(natd) +struct nat *natd; +{ + struct ipnat *ipn; + + if (natd->nat_flags & FI_WILDP) + nat_stats.ns_wilds--; + if (natd->nat_hnext[0]) + natd->nat_hnext[0]->nat_phnext[0] = natd->nat_phnext[0]; + *natd->nat_phnext[0] = natd->nat_hnext[0]; + if (natd->nat_hnext[1]) + natd->nat_hnext[1]->nat_phnext[1] = natd->nat_phnext[1]; + *natd->nat_phnext[1] = natd->nat_hnext[1]; + + if (natd->nat_fr != NULL) { + ATOMIC_DEC32(natd->nat_fr->fr_ref); + } + + if (natd->nat_hm != NULL) + nat_hostmapdel(natd->nat_hm); + + /* + * If there is an active reference from the nat entry to its parent + * rule, decrement the rule's reference count and free it too if no + * longer being used. + */ + ipn = natd->nat_ptr; + if (ipn != NULL) { + ipn->in_space++; + ipn->in_use--; + if (!ipn->in_use && (ipn->in_flags & IPN_DELETE)) { + if (ipn->in_apr) + appr_free(ipn->in_apr); + KFREE(ipn); + nat_stats.ns_rules--; + } + } + + MUTEX_DESTROY(&natd->nat_lock); + /* + * If there's a fragment table entry too for this nat entry, then + * dereference that as well. + */ + ipfr_forget((void *)natd); + aps_free(natd->nat_aps); + nat_stats.ns_inuse--; + KFREE(natd); +} + + +/* + * nat_flushtable - clear the NAT table of all mapping entries. + */ +static int nat_flushtable() +{ + register nat_t *nat, **natp; + register int j = 0; + + /* + * ALL NAT mappings deleted, so lets just make the deletions + * quicker. + */ + if (nat_table[0] != NULL) + bzero((char *)nat_table[0], + sizeof(nat_table[0]) * ipf_nattable_sz); + if (nat_table[1] != NULL) + bzero((char *)nat_table[1], + sizeof(nat_table[1]) * ipf_nattable_sz); + + for (natp = &nat_instances; (nat = *natp); ) { + *natp = nat->nat_next; +#ifdef IPFILTER_LOG + nat_log(nat, NL_FLUSH); +#endif + nat_delete(nat); + j++; + } + nat_stats.ns_inuse = 0; + return j; +} + + +/* + * nat_clearlist - delete all rules in the active NAT mapping list. + */ +static int nat_clearlist() +{ + register ipnat_t *n, **np = &nat_list; + int i = 0; + + if (nat_rules != NULL) + bzero((char *)nat_rules, sizeof(*nat_rules) * ipf_natrules_sz); + if (rdr_rules != NULL) + bzero((char *)rdr_rules, sizeof(*rdr_rules) * ipf_rdrrules_sz); + + while ((n = *np)) { + *np = n->in_next; + if (!n->in_use) { + if (n->in_apr) + appr_free(n->in_apr); + KFREE(n); + nat_stats.ns_rules--; + } else { + n->in_flags |= IPN_DELETE; + n->in_next = NULL; + } + i++; + } + nat_masks = 0; + rdr_masks = 0; + return i; +} + + +/* + * Create a new NAT table entry. + * NOTE: assumes write lock on ipf_nat has been obtained already. + */ +nat_t *nat_new(np, ip, fin, flags, direction) +ipnat_t *np; +ip_t *ip; +fr_info_t *fin; +u_int flags; +int direction; +{ + register u_32_t sum1, sum2, sumd, l; + u_short port = 0, sport = 0, dport = 0, nport = 0; + struct in_addr in, inb; + tcphdr_t *tcp = NULL; + hostmap_t *hm = NULL; + nat_t *nat, *natl; + u_short nflags; +#if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6) + qif_t *qf = fin->fin_qif; +#endif + + nflags = flags & np->in_flags; + if (flags & IPN_TCPUDP) { + tcp = (tcphdr_t *)fin->fin_dp; + sport = tcp->th_sport; + dport = tcp->th_dport; + } + + /* Give me a new nat */ + KMALLOC(nat, nat_t *); + if (nat == NULL) { + nat_stats.ns_memfail++; + return NULL; + } + + bzero((char *)nat, sizeof(*nat)); + nat->nat_flags = flags; + if (flags & FI_WILDP) + nat_stats.ns_wilds++; + /* + * Search the current table for a match. + */ + if (direction == NAT_OUTBOUND) { + /* + * Values at which the search for a free resouce starts. + */ + u_32_t st_ip; + u_short st_port; + + /* + * If it's an outbound packet which doesn't match any existing + * record, then create a new port + */ + l = 0; + st_ip = np->in_nip; + st_port = np->in_pnext; + + do { + port = 0; + in.s_addr = htonl(np->in_nip); + if (l == 0) { + /* + * Check to see if there is an existing NAT + * setup for this IP address pair. + */ + hm = nat_hostmap(np, ip->ip_src, in); + if (hm != NULL) + in.s_addr = hm->hm_mapip.s_addr; + } else if ((l == 1) && (hm != NULL)) { + nat_hostmapdel(hm); + hm = NULL; + } + in.s_addr = ntohl(in.s_addr); + + nat->nat_hm = hm; + + if ((np->in_outmsk == 0xffffffff) && + (np->in_pnext == 0)) { + if (l > 0) + goto badnat; + } + + if (np->in_redir & NAT_MAPBLK) { + if ((l >= np->in_ppip) || ((l > 0) && + !(flags & IPN_TCPUDP))) + goto badnat; + /* + * map-block - Calculate destination address. + */ + in.s_addr = ntohl(ip->ip_src.s_addr); + in.s_addr &= ntohl(~np->in_inmsk); + inb.s_addr = in.s_addr; + in.s_addr /= np->in_ippip; + in.s_addr &= ntohl(~np->in_outmsk); + in.s_addr += ntohl(np->in_outip); + /* + * Calculate destination port. + */ + if ((flags & IPN_TCPUDP) && + (np->in_ppip != 0)) { + port = ntohs(sport) + l; + port %= np->in_ppip; + port += np->in_ppip * + (inb.s_addr % np->in_ippip); + port += MAPBLK_MINPORT; + port = htons(port); + } + } else if (!np->in_outip && + (np->in_outmsk == 0xffffffff)) { + /* + * 0/32 - use the interface's IP address. + */ + if ((l > 0) || + fr_ifpaddr(4, fin->fin_ifp, &in) == -1) + goto badnat; + in.s_addr = ntohl(in.s_addr); + } else if (!np->in_outip && !np->in_outmsk) { + /* + * 0/0 - use the original source address/port. + */ + if (l > 0) + goto badnat; + in.s_addr = ntohl(ip->ip_src.s_addr); + } else if ((np->in_outmsk != 0xffffffff) && + (np->in_pnext == 0) && + ((l > 0) || (hm == NULL))) + np->in_nip++; + natl = NULL; + + if ((nflags & IPN_TCPUDP) && + ((np->in_redir & NAT_MAPBLK) == 0) && + (np->in_flags & IPN_AUTOPORTMAP)) { + if ((l > 0) && (l % np->in_ppip == 0)) { + if (l > np->in_space) { + goto badnat; + } else if ((l > np->in_ppip) && + np->in_outmsk != 0xffffffff) + np->in_nip++; + } + if (np->in_ppip != 0) { + port = ntohs(sport); + port += (l % np->in_ppip); + port %= np->in_ppip; + port += np->in_ppip * + (ntohl(ip->ip_src.s_addr) % + np->in_ippip); + port += MAPBLK_MINPORT; + port = htons(port); + } + } else if (((np->in_redir & NAT_MAPBLK) == 0) && + (nflags & IPN_TCPUDP) && + (np->in_pnext != 0)) { + port = htons(np->in_pnext++); + if (np->in_pnext > ntohs(np->in_pmax)) { + np->in_pnext = ntohs(np->in_pmin); + if (np->in_outmsk != 0xffffffff) + np->in_nip++; + } + } + + if (np->in_flags & IPN_IPRANGE) { + if (np->in_nip > ntohl(np->in_outmsk)) + np->in_nip = ntohl(np->in_outip); + } else { + if ((np->in_outmsk != 0xffffffff) && + ((np->in_nip + 1) & ntohl(np->in_outmsk)) > + ntohl(np->in_outip)) + np->in_nip = ntohl(np->in_outip) + 1; + } + + if (!port && (flags & IPN_TCPUDP)) + port = sport; + + /* + * Here we do a lookup of the connection as seen from + * the outside. If an IP# pair already exists, try + * again. So if you have A->B becomes C->B, you can + * also have D->E become C->E but not D->B causing + * another C->B. Also take protocol and ports into + * account when determining whether a pre-existing + * NAT setup will cause an external conflict where + * this is appropriate. + */ + inb.s_addr = htonl(in.s_addr); + natl = nat_inlookup(fin->fin_ifp, flags & ~FI_WILDP, + (u_int)ip->ip_p, ip->ip_dst, inb, + (port << 16) | dport, 1); + + /* + * Has the search wrapped around and come back to the + * start ? + */ + if ((natl != NULL) && + (np->in_pnext != 0) && (st_port == np->in_pnext) && + (np->in_nip != 0) && (st_ip == np->in_nip)) + goto badnat; + l++; + } while (natl != NULL); + + if (np->in_space > 0) + np->in_space--; + + /* Setup the NAT table */ + nat->nat_inip = ip->ip_src; + nat->nat_outip.s_addr = htonl(in.s_addr); + nat->nat_oip = ip->ip_dst; + if (nat->nat_hm == NULL) + nat->nat_hm = nat_hostmap(np, ip->ip_src, + nat->nat_outip); + + sum1 = LONG_SUM(ntohl(ip->ip_src.s_addr)) + ntohs(sport); + sum2 = LONG_SUM(in.s_addr) + ntohs(port); + + if (flags & IPN_TCPUDP) { + nat->nat_inport = sport; + nat->nat_outport = port; /* sport */ + nat->nat_oport = dport; + } + } else { + /* + * Otherwise, it's an inbound packet. Most likely, we don't + * want to rewrite source ports and source addresses. Instead, + * we want to rewrite to a fixed internal address and fixed + * internal port. + */ + if (np->in_flags & IPN_SPLIT) { + in.s_addr = np->in_nip; + if (np->in_inip == htonl(in.s_addr)) + np->in_nip = ntohl(np->in_inmsk); + else { + np->in_nip = ntohl(np->in_inip); + if (np->in_flags & IPN_ROUNDR) { + nat_delrdr(np); + nat_addrdr(np); + } + } + } else { + in.s_addr = ntohl(np->in_inip); + if (np->in_flags & IPN_ROUNDR) { + nat_delrdr(np); + nat_addrdr(np); + } + } + if (!np->in_pnext) + nport = dport; + else { + /* + * Whilst not optimized for the case where + * pmin == pmax, the gain is not significant. + */ + nport = ntohs(dport) - ntohs(np->in_pmin) + + ntohs(np->in_pnext); + nport = htons(nport); + } + + /* + * When the redirect-to address is set to 0.0.0.0, just + * assume a blank `forwarding' of the packet. We don't + * setup any translation for this either. + */ + if (in.s_addr == 0) { + if (nport == dport) + goto badnat; + in.s_addr = ntohl(ip->ip_dst.s_addr); + } + + nat->nat_inip.s_addr = htonl(in.s_addr); + nat->nat_outip = ip->ip_dst; + nat->nat_oip = ip->ip_src; + + sum1 = LONG_SUM(ntohl(ip->ip_dst.s_addr)) + ntohs(dport); + sum2 = LONG_SUM(in.s_addr) + ntohs(nport); + + if (flags & IPN_TCPUDP) { + nat->nat_inport = nport; + nat->nat_outport = dport; + nat->nat_oport = sport; + } + } + + CALC_SUMD(sum1, sum2, sumd); + nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16); +#if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6) + if ((flags == IPN_TCP) && dohwcksum && + (qf->qf_ill->ill_ick.ick_magic == ICK_M_CTL_MAGIC)) { + if (direction == NAT_OUTBOUND) + sum1 = LONG_SUM(ntohl(in.s_addr)); + else + sum1 = LONG_SUM(ntohl(ip->ip_src.s_addr)); + sum1 += LONG_SUM(ntohl(ip->ip_dst.s_addr)); + sum1 += 30; + sum1 = (sum1 & 0xffff) + (sum1 >> 16); + nat->nat_sumd[1] = NAT_HW_CKSUM|(sum1 & 0xffff); + } else +#endif + nat->nat_sumd[1] = nat->nat_sumd[0]; + + if ((flags & IPN_TCPUDP) && ((sport != port) || (dport != nport))) { + if (direction == NAT_OUTBOUND) + sum1 = LONG_SUM(ntohl(ip->ip_src.s_addr)); + else + sum1 = LONG_SUM(ntohl(ip->ip_dst.s_addr)); + + sum2 = LONG_SUM(in.s_addr); + + CALC_SUMD(sum1, sum2, sumd); + nat->nat_ipsumd = (sumd & 0xffff) + (sumd >> 16); + } else + nat->nat_ipsumd = nat->nat_sumd[0]; + + in.s_addr = htonl(in.s_addr); + +#ifdef _KERNEL + strncpy(nat->nat_ifname, IFNAME(fin->fin_ifp), IFNAMSIZ); +#endif + nat_insert(nat); + + nat->nat_dir = direction; + nat->nat_ifp = fin->fin_ifp; + nat->nat_ptr = np; + nat->nat_p = ip->ip_p; + nat->nat_bytes = 0; + nat->nat_pkts = 0; + nat->nat_fr = fin->fin_fr; + if (nat->nat_fr != NULL) { + ATOMIC_INC32(nat->nat_fr->fr_ref); + } + if (direction == NAT_OUTBOUND) { + if (flags & IPN_TCPUDP) + tcp->th_sport = port; + } else { + if (flags & IPN_TCPUDP) + tcp->th_dport = nport; + } + np->in_use++; +#ifdef IPFILTER_LOG + nat_log(nat, (u_int)np->in_redir); +#endif + return nat; +badnat: + nat_stats.ns_badnat++; + if ((hm = nat->nat_hm) != NULL) + nat_hostmapdel(hm); + KFREE(nat); + return NULL; +} + + +void nat_insert(nat) +nat_t *nat; +{ + nat_t **natp; + u_int hv; + + MUTEX_INIT(&nat->nat_lock, "nat entry lock", NULL); + + nat->nat_age = fr_defnatage; + nat->nat_ifname[sizeof(nat->nat_ifname) - 1] = '\0'; + if (nat->nat_ifname[0] !='\0') { + nat->nat_ifp = GETUNIT(nat->nat_ifname, 4); + } + + nat->nat_next = nat_instances; + nat_instances = nat; + + hv = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport, + ipf_nattable_sz); + natp = &nat_table[0][hv]; + if (*natp) + (*natp)->nat_phnext[0] = &nat->nat_hnext[0]; + nat->nat_phnext[0] = natp; + nat->nat_hnext[0] = *natp; + *natp = nat; + + hv = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport, + ipf_nattable_sz); + natp = &nat_table[1][hv]; + if (*natp) + (*natp)->nat_phnext[1] = &nat->nat_hnext[1]; + nat->nat_phnext[1] = natp; + nat->nat_hnext[1] = *natp; + *natp = nat; + + nat_stats.ns_added++; + nat_stats.ns_inuse++; +} + + +nat_t *nat_icmplookup(ip, fin, dir) +ip_t *ip; +fr_info_t *fin; +int dir; +{ + icmphdr_t *icmp; + tcphdr_t *tcp = NULL; + ip_t *oip; + int flags = 0, type, minlen; + + icmp = (icmphdr_t *)fin->fin_dp; + /* + * Does it at least have the return (basic) IP header ? + * Only a basic IP header (no options) should be with an ICMP error + * header. + */ + if ((ip->ip_hl != 5) || (ip->ip_len < ICMPERR_MINPKTLEN)) + return NULL; + type = icmp->icmp_type; + /* + * If it's not an error type, then return. + */ + if ((type != ICMP_UNREACH) && (type != ICMP_SOURCEQUENCH) && + (type != ICMP_REDIRECT) && (type != ICMP_TIMXCEED) && + (type != ICMP_PARAMPROB)) + return NULL; + + oip = (ip_t *)((char *)fin->fin_dp + 8); + minlen = (oip->ip_hl << 2); + if (minlen < sizeof(ip_t)) + return NULL; + if (ip->ip_len < ICMPERR_IPICMPHLEN + minlen) + return NULL; + /* + * Is the buffer big enough for all of it ? It's the size of the IP + * header claimed in the encapsulated part which is of concern. It + * may be too big to be in this buffer but not so big that it's + * outside the ICMP packet, leading to TCP deref's causing problems. + * This is possible because we don't know how big oip_hl is when we + * do the pullup early in fr_check() and thus can't gaurantee it is + * all here now. + */ +#ifdef _KERNEL + { + mb_t *m; + +# if SOLARIS + m = fin->fin_qfm; + if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN > (char *)m->b_wptr) + return NULL; +# else + m = *(mb_t **)fin->fin_mp; + if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN > + (char *)ip + m->m_len) + return NULL; +# endif + } +#endif + + if (oip->ip_p == IPPROTO_TCP) + flags = IPN_TCP; + else if (oip->ip_p == IPPROTO_UDP) + flags = IPN_UDP; + if (flags & IPN_TCPUDP) { + minlen += 8; /* + 64bits of data to get ports */ + if (ip->ip_len < ICMPERR_IPICMPHLEN + minlen) + return NULL; + tcp = (tcphdr_t *)((char *)oip + (oip->ip_hl << 2)); + if (dir == NAT_INBOUND) + return nat_inlookup(fin->fin_ifp, flags, + (u_int)oip->ip_p, oip->ip_dst, oip->ip_src, + (tcp->th_sport << 16) | tcp->th_dport, 0); + else + return nat_outlookup(fin->fin_ifp, flags, + (u_int)oip->ip_p, oip->ip_dst, oip->ip_src, + (tcp->th_sport << 16) | tcp->th_dport, 0); + } + if (dir == NAT_INBOUND) + return nat_inlookup(fin->fin_ifp, 0, (u_int)oip->ip_p, + oip->ip_dst, oip->ip_src, 0, 0); + else + return nat_outlookup(fin->fin_ifp, 0, (u_int)oip->ip_p, + oip->ip_dst, oip->ip_src, 0, 0); +} + + +/* + * This should *ONLY* be used for incoming packets to make sure a NAT'd ICMP + * packet gets correctly recognised. + */ +nat_t *nat_icmp(ip, fin, nflags, dir) +ip_t *ip; +fr_info_t *fin; +u_int *nflags; +int dir; +{ + u_32_t sum1, sum2, sumd, sumd2 = 0; + struct in_addr in; + icmphdr_t *icmp; + udphdr_t *udp; + nat_t *nat; + ip_t *oip; + int flags = 0; + + if ((fin->fin_fi.fi_fl & FI_SHORT) || (ip->ip_off & IP_OFFMASK)) + return NULL; + /* + * nat_icmplookup() will return NULL for `defective' packets. + */ + if ((ip->ip_v != 4) || !(nat = nat_icmplookup(ip, fin, dir))) + return NULL; + *nflags = IPN_ICMPERR; + icmp = (icmphdr_t *)fin->fin_dp; + oip = (ip_t *)&icmp->icmp_ip; + if (oip->ip_p == IPPROTO_TCP) + flags = IPN_TCP; + else if (oip->ip_p == IPPROTO_UDP) + flags = IPN_UDP; + udp = (udphdr_t *)((((char *)oip) + (oip->ip_hl << 2))); + /* + * Need to adjust ICMP header to include the real IP#'s and + * port #'s. Only apply a checksum change relative to the + * IP address change as it will be modified again in ip_natout + * for both address and port. Two checksum changes are + * necessary for the two header address changes. Be careful + * to only modify the checksum once for the port # and twice + * for the IP#. + */ + + /* + * Step 1 + * Fix the IP addresses in the offending IP packet. You also need + * to adjust the IP header checksum of that offending IP packet + * and the ICMP checksum of the ICMP error message itself. + * + * Unfortunately, for UDP and TCP, the IP addresses are also contained + * in the pseudo header that is used to compute the UDP resp. TCP + * checksum. So, we must compensate that as well. Even worse, the + * change in the UDP and TCP checksums require yet another + * adjustment of the ICMP checksum of the ICMP error message. + * + * For the moment we forget about TCP, because that checksum is not + * in the first 8 bytes, so it will not be available in most cases. + */ + + if (oip->ip_dst.s_addr == nat->nat_oip.s_addr) { + sum1 = LONG_SUM(ntohl(oip->ip_src.s_addr)); + in = nat->nat_inip; + oip->ip_src = in; + } else { + sum1 = LONG_SUM(ntohl(oip->ip_dst.s_addr)); + in = nat->nat_outip; + oip->ip_dst = in; + } + + sum2 = LONG_SUM(ntohl(in.s_addr)); + + CALC_SUMD(sum1, sum2, sumd); + + if (nat->nat_dir == NAT_OUTBOUND) { + /* + * Fix IP checksum of the offending IP packet to adjust for + * the change in the IP address. + * + * Normally, you would expect that the ICMP checksum of the + * ICMP error message needs to be adjusted as well for the + * IP address change in oip. + * However, this is a NOP, because the ICMP checksum is + * calculated over the complete ICMP packet, which includes the + * changed oip IP addresses and oip->ip_sum. However, these + * two changes cancel each other out (if the delta for + * the IP address is x, then the delta for ip_sum is minus x), + * so no change in the icmp_cksum is necessary. + * + * Be careful that nat_dir refers to the direction of the + * offending IP packet (oip), not to its ICMP response (icmp) + */ + fix_datacksum(&oip->ip_sum, sumd); + + /* + * Fix UDP pseudo header checksum to compensate for the + * IP address change. + */ + if (oip->ip_p == IPPROTO_UDP && udp->uh_sum) { + /* + * The UDP checksum is optional, only adjust it + * if it has been set. + */ + sum1 = ntohs(udp->uh_sum); + fix_datacksum(&udp->uh_sum, sumd); + sum2 = ntohs(udp->uh_sum); + + /* + * Fix ICMP checksum to compensate the UDP + * checksum adjustment. + */ + CALC_SUMD(sum1, sum2, sumd); + sumd2 = sumd; + } + +#if 0 + /* + * Fix TCP pseudo header checksum to compensate for the + * IP address change. Before we can do the change, we + * must make sure that oip is sufficient large to hold + * the TCP checksum (normally it does not!). + */ + if (oip->ip_p == IPPROTO_TCP) { + + } +#endif + } else { + + /* + * Fix IP checksum of the offending IP packet to adjust for + * the change in the IP address. + * + * Normally, you would expect that the ICMP checksum of the + * ICMP error message needs to be adjusted as well for the + * IP address change in oip. + * However, this is a NOP, because the ICMP checksum is + * calculated over the complete ICMP packet, which includes the + * changed oip IP addresses and oip->ip_sum. However, these + * two changes cancel each other out (if the delta for + * the IP address is x, then the delta for ip_sum is minus x), + * so no change in the icmp_cksum is necessary. + * + * Be careful that nat_dir refers to the direction of the + * offending IP packet (oip), not to its ICMP response (icmp) + */ + fix_datacksum(&oip->ip_sum, sumd); + +/* XXX FV : without having looked at Solaris source code, it seems unlikely + * that SOLARIS would compensate this in the kernel (a body of an IP packet + * in the data section of an ICMP packet). I have the feeling that this should + * be unconditional, but I'm not in a position to check. + */ +#if !SOLARIS && !defined(__sgi) + /* + * Fix UDP pseudo header checksum to compensate for the + * IP address change. + */ + if (oip->ip_p == IPPROTO_UDP && udp->uh_sum) { + /* + * The UDP checksum is optional, only adjust it + * if it has been set + */ + sum1 = ntohs(udp->uh_sum); + fix_datacksum(&udp->uh_sum, sumd); + sum2 = ntohs(udp->uh_sum); + + /* + * Fix ICMP checksum to compensate the UDP + * checksum adjustment. + */ + CALC_SUMD(sum1, sum2, sumd); + sumd2 = sumd; + } + +#if 0 + /* + * Fix TCP pseudo header checksum to compensate for the + * IP address change. Before we can do the change, we + * must make sure that oip is sufficient large to hold + * the TCP checksum (normally it does not!). + */ + if (oip->ip_p == IPPROTO_TCP) { + + }; +#endif + +#endif + } + + if ((flags & IPN_TCPUDP) != 0) { + tcphdr_t *tcp; + + /* + * XXX - what if this is bogus hl and we go off the end ? + * In this case, nat_icmpinlookup() will have returned NULL. + */ + tcp = (tcphdr_t *)udp; + + /* + * Step 2 : + * For offending TCP/UDP IP packets, translate the ports as + * well, based on the NAT specification. Of course such + * a change must be reflected in the ICMP checksum as well. + * + * Advance notice : Now it becomes complicated :-) + * + * Since the port fields are part of the TCP/UDP checksum + * of the offending IP packet, you need to adjust that checksum + * as well... but, if you change, you must change the icmp + * checksum *again*, to reflect that change. + * + * To further complicate: the TCP checksum is not in the first + * 8 bytes of the offending ip packet, so it most likely is not + * available (we might have to fix that if the encounter a + * device that returns more than 8 data bytes on icmp error) + */ + + if (nat->nat_oport == tcp->th_dport) { + if (tcp->th_sport != nat->nat_inport) { + /* + * Fix ICMP checksum to compensate port + * adjustment. + */ + sum1 = ntohs(tcp->th_sport); + sum2 = ntohs(nat->nat_inport); + CALC_SUMD(sum1, sum2, sumd); + sumd2 += sumd; + tcp->th_sport = nat->nat_inport; + + /* + * Fix udp checksum to compensate port + * adjustment. NOTE : the offending IP packet + * flows the other direction compared to the + * ICMP message. + * + * The UDP checksum is optional, only adjust + * it if it has been set. + */ + if (oip->ip_p == IPPROTO_UDP && udp->uh_sum) { + + sum1 = ntohs(udp->uh_sum); + fix_datacksum(&udp->uh_sum, sumd); + sum2 = ntohs(udp->uh_sum); + + /* + * Fix ICMP checksum to + * compensate UDP checksum + * adjustment. + */ + CALC_SUMD(sum1, sum2, sumd); + sumd2 += sumd; + } + } + } else { + if (tcp->th_dport != nat->nat_outport) { + /* + * Fix ICMP checksum to compensate port + * adjustment. + */ + sum1 = ntohs(tcp->th_dport); + sum2 = ntohs(nat->nat_outport); + CALC_SUMD(sum1, sum2, sumd); + sumd2 += sumd; + tcp->th_dport = nat->nat_outport; + + /* + * Fix udp checksum to compensate port + * adjustment. NOTE : the offending IP + * packet flows the other direction compared + * to the ICMP message. + * + * The UDP checksum is optional, only adjust + * it if it has been set. + */ + if (oip->ip_p == IPPROTO_UDP && udp->uh_sum) { + + sum1 = ntohs(udp->uh_sum); + fix_datacksum(&udp->uh_sum, sumd); + sum2 = ntohs(udp->uh_sum); + + /* + * Fix ICMP checksum to compensate + * UDP checksum adjustment. + */ + CALC_SUMD(sum1, sum2, sumd); + sumd2 += sumd; + } + } + } + if (sumd2) { + sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16); + sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16); + if (nat->nat_dir == NAT_OUTBOUND) { + fix_outcksum(&icmp->icmp_cksum, sumd2); + } else { + fix_incksum(&icmp->icmp_cksum, sumd2); + } + } + } + nat->nat_age = fr_defnaticmpage; + return nat; +} + + +/* + * NB: these lookups don't lock access to the list, it assume it has already + * been done! + */ +/* + * Lookup a nat entry based on the mapped destination ip address/port and + * real source address/port. We use this lookup when receiving a packet, + * we're looking for a table entry, based on the destination address. + * NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY. + */ +nat_t *nat_inlookup(ifp, flags, p, src, mapdst, ports, rw) +void *ifp; +register u_int flags, p; +struct in_addr src , mapdst; +u_32_t ports; +int rw; +{ + register u_short sport, dport; + register nat_t *nat; + register int nflags; + register u_32_t dst; + u_int hv; + + dst = mapdst.s_addr; + dport = ports >> 16; + sport = ports & 0xffff; + flags &= IPN_TCPUDP; + + hv = NAT_HASH_FN(dst, dport, ipf_nattable_sz); + nat = nat_table[1][hv]; + for (; nat; nat = nat->nat_hnext[1]) { + nflags = nat->nat_flags; + if ((!ifp || ifp == nat->nat_ifp) && + nat->nat_oip.s_addr == src.s_addr && + nat->nat_outip.s_addr == dst && + (((p == 0) && (flags == (nat->nat_flags & IPN_TCPUDP))) + || (p == nat->nat_p)) && (!flags || + (((nat->nat_oport == sport) || (nflags & FI_W_DPORT)) && + ((nat->nat_outport == dport) || (nflags & FI_W_SPORT))))) + return nat; + } + if (!nat_stats.ns_wilds || !(flags & IPN_TCPUDP)) + return NULL; + if (!rw) { + RWLOCK_EXIT(&ipf_nat); + } + hv = NAT_HASH_FN(dst, 0, ipf_nattable_sz); + if (!rw) { + WRITE_ENTER(&ipf_nat); + } + nat = nat_table[1][hv]; + for (; nat; nat = nat->nat_hnext[1]) { + nflags = nat->nat_flags; + if (ifp && ifp != nat->nat_ifp) + continue; + if (!(nflags & IPN_TCPUDP)) + continue; + if (!(nflags & FI_WILDP)) + continue; + if (nat->nat_oip.s_addr != src.s_addr || + nat->nat_outip.s_addr != dst) + continue; + if (((nat->nat_oport == sport) || (nflags & FI_W_DPORT)) && + ((nat->nat_outport == dport) || (nflags & FI_W_SPORT))) { + nat_tabmove(nat, ports); + break; + } + } + if (!rw) { + MUTEX_DOWNGRADE(&ipf_nat); + } + return nat; +} + + +/* + * This function is only called for TCP/UDP NAT table entries where the + * original was placed in the table without hashing on the ports and we now + * want to include hashing on port numbers. + */ +static void nat_tabmove(nat, ports) +nat_t *nat; +u_32_t ports; +{ + register u_short sport, dport; + nat_t **natp; + u_int hv; + + dport = ports >> 16; + sport = ports & 0xffff; + + if (nat->nat_oport == dport) { + nat->nat_inport = sport; + nat->nat_outport = sport; + } + + /* + * Remove the NAT entry from the old location + */ + if (nat->nat_hnext[0]) + nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0]; + *nat->nat_phnext[0] = nat->nat_hnext[0]; + + if (nat->nat_hnext[1]) + nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1]; + *nat->nat_phnext[1] = nat->nat_hnext[1]; + + /* + * Add into the NAT table in the new position + */ + hv = NAT_HASH_FN(nat->nat_inip.s_addr, sport, ipf_nattable_sz); + natp = &nat_table[0][hv]; + if (*natp) + (*natp)->nat_phnext[0] = &nat->nat_hnext[0]; + nat->nat_phnext[0] = natp; + nat->nat_hnext[0] = *natp; + *natp = nat; + + hv = NAT_HASH_FN(nat->nat_outip.s_addr, sport, ipf_nattable_sz); + natp = &nat_table[1][hv]; + if (*natp) + (*natp)->nat_phnext[1] = &nat->nat_hnext[1]; + nat->nat_phnext[1] = natp; + nat->nat_hnext[1] = *natp; + *natp = nat; +} + + +/* + * Lookup a nat entry based on the source 'real' ip address/port and + * destination address/port. We use this lookup when sending a packet out, + * we're looking for a table entry, based on the source address. + * NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY. + */ +nat_t *nat_outlookup(ifp, flags, p, src, dst, ports, rw) +void *ifp; +register u_int flags, p; +struct in_addr src , dst; +u_32_t ports; +int rw; +{ + register u_short sport, dport; + register nat_t *nat; + register int nflags; + u_32_t srcip; + u_int hv; + + sport = ports & 0xffff; + dport = ports >> 16; + flags &= IPN_TCPUDP; + srcip = src.s_addr; + + hv = NAT_HASH_FN(srcip, sport, ipf_nattable_sz); + nat = nat_table[0][hv]; + for (; nat; nat = nat->nat_hnext[0]) { + nflags = nat->nat_flags; + + if ((!ifp || ifp == nat->nat_ifp) && + nat->nat_inip.s_addr == srcip && + nat->nat_oip.s_addr == dst.s_addr && + (((p == 0) && (flags == (nflags & IPN_TCPUDP))) + || (p == nat->nat_p)) && (!flags || + ((nat->nat_inport == sport || nflags & FI_W_SPORT) && + (nat->nat_oport == dport || nflags & FI_W_DPORT)))) + return nat; + } + if (!nat_stats.ns_wilds || !(flags & IPN_TCPUDP)) + return NULL; + if (!rw) { + RWLOCK_EXIT(&ipf_nat); + } + hv = NAT_HASH_FN(srcip, 0, ipf_nattable_sz); + if (!rw) { + WRITE_ENTER(&ipf_nat); + } + nat = nat_table[0][hv]; + for (; nat; nat = nat->nat_hnext[0]) { + nflags = nat->nat_flags; + if (ifp && ifp != nat->nat_ifp) + continue; + if (!(nflags & IPN_TCPUDP)) + continue; + if (!(nflags & FI_WILDP)) + continue; + if ((nat->nat_inip.s_addr != srcip) || + (nat->nat_oip.s_addr != dst.s_addr)) + continue; + if (((nat->nat_inport == sport) || (nflags & FI_W_SPORT)) && + ((nat->nat_oport == dport) || (nflags & FI_W_DPORT))) { + nat_tabmove(nat, ports); + break; + } + } + if (!rw) { + MUTEX_DOWNGRADE(&ipf_nat); + } + return nat; +} + + +/* + * Lookup the NAT tables to search for a matching redirect + */ +nat_t *nat_lookupredir(np) +register natlookup_t *np; +{ + u_32_t ports; + nat_t *nat; + + ports = (np->nl_outport << 16) | np->nl_inport; + /* + * If nl_inip is non null, this is a lookup based on the real + * ip address. Else, we use the fake. + */ + if ((nat = nat_outlookup(NULL, np->nl_flags, 0, np->nl_inip, + np->nl_outip, ports, 0))) { + np->nl_realip = nat->nat_outip; + np->nl_realport = nat->nat_outport; + } + return nat; +} + + +static int nat_match(fin, np, ip) +fr_info_t *fin; +ipnat_t *np; +ip_t *ip; +{ + frtuc_t *ft; + + if (ip->ip_v != 4) + return 0; + + if (np->in_p && ip->ip_p != np->in_p) + return 0; + if (fin->fin_out) { + if (!(np->in_redir & (NAT_MAP|NAT_MAPBLK))) + return 0; + if (((fin->fin_fi.fi_saddr & np->in_inmsk) != np->in_inip) + ^ ((np->in_flags & IPN_NOTSRC) != 0)) + return 0; + if (((fin->fin_fi.fi_daddr & np->in_srcmsk) != np->in_srcip) + ^ ((np->in_flags & IPN_NOTDST) != 0)) + return 0; + } else { + if (!(np->in_redir & NAT_REDIRECT)) + return 0; + if (((fin->fin_fi.fi_saddr & np->in_srcmsk) != np->in_srcip) + ^ ((np->in_flags & IPN_NOTSRC) != 0)) + return 0; + if (((fin->fin_fi.fi_daddr & np->in_outmsk) != np->in_outip) + ^ ((np->in_flags & IPN_NOTDST) != 0)) + return 0; + } + + ft = &np->in_tuc; + if (!(fin->fin_fi.fi_fl & FI_TCPUDP) || + (fin->fin_fi.fi_fl & FI_SHORT) || (ip->ip_off & IP_OFFMASK)) { + if (ft->ftu_scmp || ft->ftu_dcmp) + return 0; + return 1; + } + + return fr_tcpudpchk(ft, fin); +} + + +/* + * Packets going out on the external interface go through this. + * Here, the source address requires alteration, if anything. + */ +int ip_natout(ip, fin) +ip_t *ip; +fr_info_t *fin; +{ + register ipnat_t *np = NULL; + register u_32_t ipa; + tcphdr_t *tcp = NULL; + u_short sport = 0, dport = 0, *csump = NULL; + struct ifnet *ifp; + int natadd = 1; + frentry_t *fr; + u_int nflags = 0, hv, msk; + u_32_t iph; + nat_t *nat; + int i; + + if (nat_list == NULL || (fr_nat_lock)) + return 0; + + if ((fr = fin->fin_fr) && !(fr->fr_flags & FR_DUP) && + fr->fr_tif.fd_ifp && fr->fr_tif.fd_ifp != (void *)-1) + ifp = fr->fr_tif.fd_ifp; + else + ifp = fin->fin_ifp; + + if (!(ip->ip_off & IP_OFFMASK) && !(fin->fin_fi.fi_fl & FI_SHORT)) { + if (ip->ip_p == IPPROTO_TCP) + nflags = IPN_TCP; + else if (ip->ip_p == IPPROTO_UDP) + nflags = IPN_UDP; + if ((nflags & IPN_TCPUDP)) { + tcp = (tcphdr_t *)fin->fin_dp; + sport = tcp->th_sport; + dport = tcp->th_dport; + } + } + + ipa = ip->ip_src.s_addr; + + READ_ENTER(&ipf_nat); + + if ((ip->ip_p == IPPROTO_ICMP) && + (nat = nat_icmp(ip, fin, &nflags, NAT_OUTBOUND))) + ; + else if ((ip->ip_off & (IP_OFFMASK|IP_MF)) && + (nat = ipfr_nat_knownfrag(ip, fin))) + natadd = 0; + else if ((nat = nat_outlookup(ifp, nflags, (u_int)ip->ip_p, + ip->ip_src, ip->ip_dst, + (dport << 16) | sport, 0))) { + nflags = nat->nat_flags; + if ((nflags & (FI_W_SPORT|FI_W_DPORT)) != 0) { + if ((nflags & FI_W_SPORT) && + (nat->nat_inport != sport)) + nat->nat_inport = sport; + else if ((nflags & FI_W_DPORT) && + (nat->nat_oport != dport)) + nat->nat_oport = dport; + if (nat->nat_outport == 0) + nat->nat_outport = sport; + nat->nat_flags &= ~(FI_W_DPORT|FI_W_SPORT); + nflags = nat->nat_flags; + nat_stats.ns_wilds--; + } + } else { + RWLOCK_EXIT(&ipf_nat); + WRITE_ENTER(&ipf_nat); + /* + * If there is no current entry in the nat table for this IP#, + * create one for it (if there is a matching rule). + */ + msk = 0xffffffff; + i = 32; +maskloop: + iph = ipa & htonl(msk); + hv = NAT_HASH_FN(iph, 0, ipf_natrules_sz); + for (np = nat_rules[hv]; np; np = np->in_mnext) + { + if ((np->in_ifp && (np->in_ifp != ifp)) || + !np->in_space) + continue; + if ((np->in_flags & IPN_RF) && + !(np->in_flags & nflags)) + continue; + if (np->in_flags & IPN_FILTER) { + if (!nat_match(fin, np, ip)) + continue; + } else if ((ipa & np->in_inmsk) != np->in_inip) + continue; + if (np->in_redir & (NAT_MAP|NAT_MAPBLK)) { + if (*np->in_plabel && !appr_ok(ip, tcp, np)) + continue; + /* + * If it's a redirection, then we don't want to + * create new outgoing port stuff. + * Redirections are only for incoming + * connections. + */ + if (!(np->in_redir & (NAT_MAP|NAT_MAPBLK))) + continue; + if ((nat = nat_new(np, ip, fin, (u_int)nflags, + NAT_OUTBOUND))) { + np->in_hits++; + break; + } + } + } + if ((np == NULL) && (i > 0)) { + do { + i--; + msk <<= 1; + } while ((i >= 0) && ((nat_masks & (1 << i)) == 0)); + if (i >= 0) + goto maskloop; + } + MUTEX_DOWNGRADE(&ipf_nat); + } + + /* + * NOTE: ipf_nat must now only be held as a read lock + */ + if (nat) { + np = nat->nat_ptr; + if (natadd && (fin->fin_fi.fi_fl & FI_FRAG) && + np && (np->in_flags & IPN_FRAG)) + ipfr_nat_newfrag(ip, fin, 0, nat); + MUTEX_ENTER(&nat->nat_lock); + nat->nat_age = fr_defnatage; + nat->nat_bytes += ip->ip_len; + nat->nat_pkts++; + MUTEX_EXIT(&nat->nat_lock); + + /* + * Fix up checksums, not by recalculating them, but + * simply computing adjustments. + */ + if (nflags == IPN_ICMPERR) { + u_32_t s1, s2, sumd; + + s1 = LONG_SUM(ntohl(ip->ip_src.s_addr)); + s2 = LONG_SUM(ntohl(nat->nat_outip.s_addr)); + CALC_SUMD(s1, s2, sumd); + + if (nat->nat_dir == NAT_OUTBOUND) + fix_incksum(&ip->ip_sum, sumd); + else + fix_outcksum(&ip->ip_sum, sumd); + } +#if SOLARIS || defined(__sgi) + else { + if (nat->nat_dir == NAT_OUTBOUND) + fix_outcksum(&ip->ip_sum, nat->nat_ipsumd); + else + fix_incksum(&ip->ip_sum, nat->nat_ipsumd); + } +#endif + ip->ip_src = nat->nat_outip; + + if (!(ip->ip_off & IP_OFFMASK) && + !(fin->fin_fi.fi_fl & FI_SHORT)) { + + if ((nat->nat_outport != 0) && (nflags & IPN_TCPUDP)) { + tcp->th_sport = nat->nat_outport; + fin->fin_data[0] = ntohs(tcp->th_sport); + } + + if (ip->ip_p == IPPROTO_TCP) { + csump = &tcp->th_sum; + MUTEX_ENTER(&nat->nat_lock); + fr_tcp_age(&nat->nat_age, + nat->nat_tcpstate, fin, 1); + if (nat->nat_age < fr_defnaticmpage) + nat->nat_age = fr_defnaticmpage; +#ifdef LARGE_NAT + else if (nat->nat_age > fr_defnatage) + nat->nat_age = fr_defnatage; +#endif + /* + * Increase this because we may have + * "keep state" following this too and + * packet storms can occur if this is + * removed too quickly. + */ + if (nat->nat_age == fr_tcpclosed) + nat->nat_age = fr_tcplastack; + MUTEX_EXIT(&nat->nat_lock); + } else if (ip->ip_p == IPPROTO_UDP) { + udphdr_t *udp = (udphdr_t *)tcp; + + if (udp->uh_sum) + csump = &udp->uh_sum; + } else if (ip->ip_p == IPPROTO_ICMP) { + nat->nat_age = fr_defnaticmpage; + } + + if (csump) { + if (nat->nat_dir == NAT_OUTBOUND) + fix_outcksum(csump, nat->nat_sumd[1]); + else + fix_incksum(csump, nat->nat_sumd[1]); + } + } + + if ((np->in_apr != NULL) && (np->in_dport == 0 || + (tcp != NULL && dport == np->in_dport))) { + i = appr_check(ip, fin, nat); + if (i == 0) + i = 1; + } else + i = 1; + ATOMIC_INCL(nat_stats.ns_mapped[1]); + RWLOCK_EXIT(&ipf_nat); /* READ */ + return i; + } + RWLOCK_EXIT(&ipf_nat); /* READ/WRITE */ + return 0; +} + + +/* + * Packets coming in from the external interface go through this. + * Here, the destination address requires alteration, if anything. + */ +int ip_natin(ip, fin) +ip_t *ip; +fr_info_t *fin; +{ + register struct in_addr src; + register struct in_addr in; + register ipnat_t *np; + u_int nflags = 0, natadd = 1, hv, msk; + struct ifnet *ifp = fin->fin_ifp; + tcphdr_t *tcp = NULL; + u_short sport = 0, dport = 0, *csump = NULL; + nat_t *nat; + u_32_t iph; + int i; + + if ((nat_list == NULL) || (ip->ip_v != 4) || (fr_nat_lock)) + return 0; + + if (!(ip->ip_off & IP_OFFMASK) && !(fin->fin_fi.fi_fl & FI_SHORT)) { + if (ip->ip_p == IPPROTO_TCP) + nflags = IPN_TCP; + else if (ip->ip_p == IPPROTO_UDP) + nflags = IPN_UDP; + if ((nflags & IPN_TCPUDP)) { + tcp = (tcphdr_t *)fin->fin_dp; + dport = tcp->th_dport; + sport = tcp->th_sport; + } + } + + in = ip->ip_dst; + /* make sure the source address is to be redirected */ + src = ip->ip_src; + + READ_ENTER(&ipf_nat); + + if ((ip->ip_p == IPPROTO_ICMP) && + (nat = nat_icmp(ip, fin, &nflags, NAT_INBOUND))) + ; + else if ((ip->ip_off & (IP_OFFMASK|IP_MF)) && + (nat = ipfr_nat_knownfrag(ip, fin))) + natadd = 0; + else if ((nat = nat_inlookup(fin->fin_ifp, nflags, (u_int)ip->ip_p, + ip->ip_src, in, (dport << 16) | sport, + 0))) { + nflags = nat->nat_flags; + if ((nflags & (FI_W_SPORT|FI_W_DPORT)) != 0) { + if ((nat->nat_oport != sport) && (nflags & FI_W_DPORT)) + nat->nat_oport = sport; + else if ((nat->nat_outport != dport) && + (nflags & FI_W_SPORT)) + nat->nat_outport = dport; + nat->nat_flags &= ~(FI_W_SPORT|FI_W_DPORT); + nflags = nat->nat_flags; + nat_stats.ns_wilds--; + } + } else { + RWLOCK_EXIT(&ipf_nat); + WRITE_ENTER(&ipf_nat); + /* + * If there is no current entry in the nat table for this IP#, + * create one for it (if there is a matching rule). + */ + msk = 0xffffffff; + i = 32; +maskloop: + iph = in.s_addr & htonl(msk); + hv = NAT_HASH_FN(iph, 0, ipf_rdrrules_sz); + for (np = rdr_rules[hv]; np; np = np->in_rnext) { + if ((np->in_ifp && (np->in_ifp != ifp)) || + (np->in_p && (np->in_p != ip->ip_p)) || + (np->in_flags && !(nflags & np->in_flags))) + continue; + if (np->in_flags & IPN_FILTER) { + if (!nat_match(fin, np, ip)) + continue; + } else if ((in.s_addr & np->in_outmsk) != np->in_outip) + continue; + if ((np->in_redir & NAT_REDIRECT) && + (!np->in_pmin || (np->in_flags & IPN_FILTER) || + ((ntohs(np->in_pmax) >= ntohs(dport)) && + (ntohs(dport) >= ntohs(np->in_pmin))))) + if ((nat = nat_new(np, ip, fin, nflags, + NAT_INBOUND))) { + np->in_hits++; + break; + } + } + + if ((np == NULL) && (i > 0)) { + do { + i--; + msk <<= 1; + } while ((i >= 0) && ((rdr_masks & (1 << i)) == 0)); + if (i >= 0) + goto maskloop; + } + MUTEX_DOWNGRADE(&ipf_nat); + } + + /* + * NOTE: ipf_nat must now only be held as a read lock + */ + if (nat) { + np = nat->nat_ptr; + fin->fin_fr = nat->nat_fr; + if (natadd && (fin->fin_fi.fi_fl & FI_FRAG) && + np && (np->in_flags & IPN_FRAG)) + ipfr_nat_newfrag(ip, fin, 0, nat); + if ((np->in_apr != NULL) && (np->in_dport == 0 || + (tcp != NULL && sport == np->in_dport))) { + i = appr_check(ip, fin, nat); + if (i == -1) { + RWLOCK_EXIT(&ipf_nat); + return i; + } + } + + MUTEX_ENTER(&nat->nat_lock); + if (nflags != IPN_ICMPERR) + nat->nat_age = fr_defnatage; + + nat->nat_bytes += ip->ip_len; + nat->nat_pkts++; + MUTEX_EXIT(&nat->nat_lock); + ip->ip_dst = nat->nat_inip; + fin->fin_fi.fi_daddr = nat->nat_inip.s_addr; + + /* + * Fix up checksums, not by recalculating them, but + * simply computing adjustments. + */ +#if SOLARIS || defined(__sgi) + if (nat->nat_dir == NAT_OUTBOUND) + fix_incksum(&ip->ip_sum, nat->nat_ipsumd); + else + fix_outcksum(&ip->ip_sum, nat->nat_ipsumd); +#endif + if (!(ip->ip_off & IP_OFFMASK) && + !(fin->fin_fi.fi_fl & FI_SHORT)) { + + if ((nat->nat_inport != 0) && (nflags & IPN_TCPUDP)) { + tcp->th_dport = nat->nat_inport; + fin->fin_data[1] = ntohs(tcp->th_dport); + } + + if (ip->ip_p == IPPROTO_TCP) { + csump = &tcp->th_sum; + MUTEX_ENTER(&nat->nat_lock); + fr_tcp_age(&nat->nat_age, + nat->nat_tcpstate, fin, 0); + if (nat->nat_age < fr_defnaticmpage) + nat->nat_age = fr_defnaticmpage; +#ifdef LARGE_NAT + else if (nat->nat_age > fr_defnatage) + nat->nat_age = fr_defnatage; +#endif + /* + * Increase this because we may have + * "keep state" following this too and + * packet storms can occur if this is + * removed too quickly. + */ + if (nat->nat_age == fr_tcpclosed) + nat->nat_age = fr_tcplastack; + MUTEX_EXIT(&nat->nat_lock); + } else if (ip->ip_p == IPPROTO_UDP) { + udphdr_t *udp = (udphdr_t *)tcp; + + if (udp->uh_sum) + csump = &udp->uh_sum; + } else if (ip->ip_p == IPPROTO_ICMP) { + nat->nat_age = fr_defnaticmpage; + } + + if (csump) { + if (nat->nat_dir == NAT_OUTBOUND) + fix_incksum(csump, nat->nat_sumd[0]); + else + fix_outcksum(csump, nat->nat_sumd[0]); + } + } + ATOMIC_INCL(nat_stats.ns_mapped[0]); + RWLOCK_EXIT(&ipf_nat); /* READ */ + return 1; + } + RWLOCK_EXIT(&ipf_nat); /* READ/WRITE */ + return 0; +} + + +/* + * Free all memory used by NAT structures allocated at runtime. + */ +void ip_natunload() +{ + WRITE_ENTER(&ipf_nat); + (void) nat_clearlist(); + (void) nat_flushtable(); + RWLOCK_EXIT(&ipf_nat); + + if (nat_table[0] != NULL) { + KFREES(nat_table[0], sizeof(nat_t *) * ipf_nattable_sz); + nat_table[0] = NULL; + } + if (nat_table[1] != NULL) { + KFREES(nat_table[1], sizeof(nat_t *) * ipf_nattable_sz); + nat_table[1] = NULL; + } + if (nat_rules != NULL) { + KFREES(nat_rules, sizeof(ipnat_t *) * ipf_natrules_sz); + nat_rules = NULL; + } + if (rdr_rules != NULL) { + KFREES(rdr_rules, sizeof(ipnat_t *) * ipf_rdrrules_sz); + rdr_rules = NULL; + } + if (maptable != NULL) { + KFREES(maptable, sizeof(hostmap_t *) * ipf_hostmap_sz); + maptable = NULL; + } +} + + +/* + * Slowly expire held state for NAT entries. Timeouts are set in + * expectation of this being called twice per second. + */ +void ip_natexpire() +{ + register struct nat *nat, **natp; +#if defined(_KERNEL) && !SOLARIS + int s; +#endif + + SPL_NET(s); + WRITE_ENTER(&ipf_nat); + for (natp = &nat_instances; (nat = *natp); ) { + nat->nat_age--; + if (nat->nat_age) { + natp = &nat->nat_next; + continue; + } + *natp = nat->nat_next; +#ifdef IPFILTER_LOG + nat_log(nat, NL_EXPIRE); +#endif + nat_delete(nat); + nat_stats.ns_expire++; + } + RWLOCK_EXIT(&ipf_nat); + SPL_X(s); +} + + +/* + */ +void ip_natsync(ifp) +void *ifp; +{ + register ipnat_t *n; + register nat_t *nat; + register u_32_t sum1, sum2, sumd; + struct in_addr in; + ipnat_t *np; + void *ifp2; +#if defined(_KERNEL) && !SOLARIS + int s; +#endif + + /* + * Change IP addresses for NAT sessions for any protocol except TCP + * since it will break the TCP connection anyway. + */ + SPL_NET(s); + WRITE_ENTER(&ipf_nat); + for (nat = nat_instances; nat; nat = nat->nat_next) + if (((ifp == NULL) || (ifp == nat->nat_ifp)) && + !(nat->nat_flags & IPN_TCP) && (np = nat->nat_ptr) && + (np->in_outmsk == 0xffffffff) && !np->in_nip) { + ifp2 = nat->nat_ifp; + /* + * Change the map-to address to be the same as the + * new one. + */ + sum1 = nat->nat_outip.s_addr; + if (fr_ifpaddr(4, ifp2, &in) != -1) + nat->nat_outip = in; + sum2 = nat->nat_outip.s_addr; + + if (sum1 == sum2) + continue; + /* + * Readjust the checksum adjustment to take into + * account the new IP#. + */ + CALC_SUMD(sum1, sum2, sumd); + /* XXX - dont change for TCP when solaris does + * hardware checksumming. + */ + sumd += nat->nat_sumd[0]; + nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16); + nat->nat_sumd[1] = nat->nat_sumd[0]; + } + + for (n = nat_list; (n != NULL); n = n->in_next) + if (n->in_ifp == ifp) { + n->in_ifp = (void *)GETUNIT(n->in_ifname, 4); + if (!n->in_ifp) + n->in_ifp = (void *)-1; + } + RWLOCK_EXIT(&ipf_nat); + SPL_X(s); +} + + +#ifdef IPFILTER_LOG +void nat_log(nat, type) +struct nat *nat; +u_int type; +{ + struct ipnat *np; + struct natlog natl; + void *items[1]; + size_t sizes[1]; + int rulen, types[1]; + + natl.nl_inip = nat->nat_inip; + natl.nl_outip = nat->nat_outip; + natl.nl_origip = nat->nat_oip; + natl.nl_bytes = nat->nat_bytes; + natl.nl_pkts = nat->nat_pkts; + natl.nl_origport = nat->nat_oport; + natl.nl_inport = nat->nat_inport; + natl.nl_outport = nat->nat_outport; + natl.nl_p = nat->nat_p; + natl.nl_type = type; + natl.nl_rule = -1; +#ifndef LARGE_NAT + if (nat->nat_ptr != NULL) { + for (rulen = 0, np = nat_list; np; np = np->in_next, rulen++) + if (np == nat->nat_ptr) { + natl.nl_rule = rulen; + break; + } + } +#endif + items[0] = &natl; + sizes[0] = sizeof(natl); + types[0] = 0; + + (void) ipllog(IPL_LOGNAT, NULL, items, sizes, types, 1); +} +#endif |