diff options
author | darrenr <darrenr@FreeBSD.org> | 2007-06-04 02:50:28 +0000 |
---|---|---|
committer | darrenr <darrenr@FreeBSD.org> | 2007-06-04 02:50:28 +0000 |
commit | b8cc98bd6c45a4a050a69e926139c5db53e1601d (patch) | |
tree | f99358cce4a313a1af08c9ace83b16818c199d93 /sys/contrib/ipfilter/netinet/ip_state.c | |
parent | 83043906d8f7c639d3fecb55801bf3f68a800501 (diff) | |
download | FreeBSD-src-b8cc98bd6c45a4a050a69e926139c5db53e1601d.zip FreeBSD-src-b8cc98bd6c45a4a050a69e926139c5db53e1601d.tar.gz |
Import IPFilter 4.1.23 to vendor branch.
See src/contrib/ipfilter/HISTORY for details of changes since 4.1.13
Diffstat (limited to 'sys/contrib/ipfilter/netinet/ip_state.c')
-rw-r--r-- | sys/contrib/ipfilter/netinet/ip_state.c | 759 |
1 files changed, 510 insertions, 249 deletions
diff --git a/sys/contrib/ipfilter/netinet/ip_state.c b/sys/contrib/ipfilter/netinet/ip_state.c index 32aa805..774dff3 100644 --- a/sys/contrib/ipfilter/netinet/ip_state.c +++ b/sys/contrib/ipfilter/netinet/ip_state.c @@ -15,7 +15,11 @@ #include <sys/file.h> #if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \ defined(_KERNEL) -# include "opt_ipfilter_log.h" +# if (__NetBSD_Version__ < 399001400) +# include "opt_ipfilter_log.h" +# else +# include "opt_ipfilter.h" +# endif #endif #if defined(_KERNEL) && defined(__FreeBSD_version) && \ (__FreeBSD_version >= 400000) && !defined(KLD_MODULE) @@ -107,7 +111,7 @@ struct file; #if !defined(lint) static const char sccsid[] = "@(#)ip_state.c 1.8 6/5/96 (C) 1993-2000 Darren Reed"; -static const char rcsid[] = "@(#)$Id: ip_state.c,v 2.186.2.41 2006/04/01 10:16:28 darrenr Exp $"; +static const char rcsid[] = "@(#)$Id: ip_state.c,v 2.186.2.69 2007/05/26 13:05:14 darrenr Exp $"; #endif static ipstate_t **ips_table = NULL; @@ -123,8 +127,9 @@ static ipstate_t *fr_matchsrcdst __P((fr_info_t *, ipstate_t *, i6addr_t *, i6addr_t *, tcphdr_t *, u_32_t)); static ipstate_t *fr_checkicmpmatchingstate __P((fr_info_t *)); static int fr_state_flush __P((int, int)); +static int fr_state_flush_entry __P((void *)); static ips_stat_t *fr_statetstats __P((void)); -static void fr_delstate __P((ipstate_t *, int)); +static int fr_delstate __P((ipstate_t *, int)); static int fr_state_remove __P((caddr_t)); static void fr_ipsmove __P((ipstate_t *, u_int)); static int fr_tcpstate __P((fr_info_t *, tcphdr_t *, ipstate_t *)); @@ -133,6 +138,8 @@ static ipstate_t *fr_stclone __P((fr_info_t *, tcphdr_t *, ipstate_t *)); static void fr_fixinisn __P((fr_info_t *, ipstate_t *)); static void fr_fixoutisn __P((fr_info_t *, ipstate_t *)); static void fr_checknewisn __P((fr_info_t *, ipstate_t *)); +static int fr_stateiter __P((ipftoken_t *, ipfgeniter_t *)); +static int fr_stgettable __P((char *)); int fr_stputent __P((caddr_t)); int fr_stgetent __P((caddr_t)); @@ -143,9 +150,10 @@ int fr_stgetent __P((caddr_t)); u_long fr_tcpidletimeout = FIVE_DAYS, fr_tcpclosewait = IPF_TTLVAL(2 * TCP_MSL), - fr_tcplastack = IPF_TTLVAL(2 * TCP_MSL), + fr_tcplastack = IPF_TTLVAL(30), fr_tcptimeout = IPF_TTLVAL(2 * TCP_MSL), - fr_tcpclosed = IPF_TTLVAL(60), + fr_tcptimewait = IPF_TTLVAL(2 * TCP_MSL), + fr_tcpclosed = IPF_TTLVAL(30), fr_tcphalfclosed = IPF_TTLVAL(2 * 3600), /* 2 hours */ fr_udptimeout = IPF_TTLVAL(120), fr_udpacktimeout = IPF_TTLVAL(12), @@ -165,6 +173,7 @@ ipftq_t ips_tqtqb[IPF_TCP_NSTATES], ips_iptq, ips_icmptq, ips_icmpacktq, + ips_deletetq, *ips_utqe = NULL; #ifdef IPFILTER_LOG int ipstate_logging = 1; @@ -239,6 +248,7 @@ int fr_stateinit() fr_state_maxbucket *= 2; } + ips_stats.iss_tcptab = ips_tqtqb; fr_sttab_init(ips_tqtqb); ips_tqtqb[IPF_TCP_NSTATES - 1].ifq_next = &ips_udptq; ips_udptq.ifq_ttl = (u_long)fr_udptimeout; @@ -270,7 +280,13 @@ int fr_stateinit() ips_iptq.ifq_head = NULL; ips_iptq.ifq_tail = &ips_iptq.ifq_head; MUTEX_INIT(&ips_iptq.ifq_lock, "ipftq ip tab"); - ips_iptq.ifq_next = NULL; + ips_iptq.ifq_next = &ips_deletetq; + ips_deletetq.ifq_ttl = (u_long)1; + ips_deletetq.ifq_ref = 1; + ips_deletetq.ifq_head = NULL; + ips_deletetq.ifq_tail = &ips_deletetq.ifq_head; + MUTEX_INIT(&ips_deletetq.ifq_lock, "state delete queue"); + ips_deletetq.ifq_next = NULL; RWLOCK_INIT(&ipf_state, "ipf IP state rwlock"); MUTEX_INIT(&ipf_stinsert, "ipf state insert mutex"); @@ -295,7 +311,7 @@ void fr_stateunload() ipstate_t *is; while ((is = ips_list) != NULL) - fr_delstate(is, 0); + fr_delstate(is, ISL_UNLOAD); /* * Proxy timeout queues are not cleaned here because although they @@ -321,6 +337,7 @@ void fr_stateunload() MUTEX_DESTROY(&ips_udpacktq.ifq_lock); MUTEX_DESTROY(&ips_icmpacktq.ifq_lock); MUTEX_DESTROY(&ips_iptq.ifq_lock); + MUTEX_DESTROY(&ips_deletetq.ifq_lock); } if (ips_table != NULL) { @@ -414,12 +431,14 @@ caddr_t data; /* */ /* Processes an ioctl call made to operate on the IP Filter state device. */ /* ------------------------------------------------------------------------ */ -int fr_state_ioctl(data, cmd, mode) +int fr_state_ioctl(data, cmd, mode, uid, ctx) caddr_t data; ioctlcmd_t cmd; -int mode; +int mode, uid; +void *ctx; { int arg, ret, error = 0; + SPL_INT(s); switch (cmd) { @@ -429,29 +448,37 @@ int mode; case SIOCDELST : error = fr_state_remove(data); break; + /* * Flush the state table */ case SIOCIPFFL : - BCOPYIN(data, (char *)&arg, sizeof(arg)); - if (arg == 0 || arg == 1) { + error = BCOPYIN(data, (char *)&arg, sizeof(arg)); + if (error != 0) { + error = EFAULT; + } else { WRITE_ENTER(&ipf_state); ret = fr_state_flush(arg, 4); RWLOCK_EXIT(&ipf_state); - BCOPYOUT((char *)&ret, data, sizeof(ret)); - } else - error = EINVAL; + error = BCOPYOUT((char *)&ret, data, sizeof(ret)); + if (error != 0) + error = EFAULT; + } break; + #ifdef USE_INET6 case SIOCIPFL6 : - BCOPYIN(data, (char *)&arg, sizeof(arg)); - if (arg == 0 || arg == 1) { + error = BCOPYIN(data, (char *)&arg, sizeof(arg)); + if (error != 0) { + error = EFAULT; + } else { WRITE_ENTER(&ipf_state); ret = fr_state_flush(arg, 6); RWLOCK_EXIT(&ipf_state); - BCOPYOUT((char *)&ret, data, sizeof(ret)); - } else - error = EINVAL; + error = BCOPYOUT((char *)&ret, data, sizeof(ret)); + if (error != 0) + error = EFAULT; + } break; #endif #ifdef IPFILTER_LOG @@ -465,9 +492,12 @@ int mode; int tmp; tmp = ipflog_clear(IPL_LOGSTATE); - BCOPYOUT((char *)&tmp, data, sizeof(tmp)); + error = BCOPYOUT((char *)&tmp, data, sizeof(tmp)); + if (error != 0) + error = EFAULT; } break; + /* * Turn logging of state information on/off. */ @@ -475,31 +505,41 @@ int mode; if (!(mode & FWRITE)) error = EPERM; else { - BCOPYIN((char *)data, (char *)&ipstate_logging, - sizeof(ipstate_logging)); + error = BCOPYIN((char *)data, (char *)&ipstate_logging, + sizeof(ipstate_logging)); + if (error != 0) + error = EFAULT; } break; + /* * Return the current state of logging. */ case SIOCGETLG : - BCOPYOUT((char *)&ipstate_logging, (char *)data, - sizeof(ipstate_logging)); + error = BCOPYOUT((char *)&ipstate_logging, (char *)data, + sizeof(ipstate_logging)); + if (error != 0) + error = EFAULT; break; + /* * Return the number of bytes currently waiting to be read. */ case FIONREAD : arg = iplused[IPL_LOGSTATE]; /* returned in an int */ - BCOPYOUT((char *)&arg, data, sizeof(arg)); + error = BCOPYOUT((char *)&arg, data, sizeof(arg)); + if (error != 0) + error = EFAULT; break; #endif + /* * Get the current state statistics. */ case SIOCGETFS : error = fr_outobj(data, fr_statetstats(), IPFOBJ_STATESTAT); break; + /* * Lock/Unlock the state table. (Locking prevents any changes, which * means no packets match). @@ -511,6 +551,7 @@ int mode; fr_lock(data, &fr_state_lock); } break; + /* * Add an entry to the current state table. */ @@ -521,6 +562,7 @@ int mode; } error = fr_stputent(data); break; + /* * Get a state table entry. */ @@ -531,6 +573,56 @@ int mode; } error = fr_stgetent(data); break; + + /* + * Return a copy of the hash table bucket lengths + */ + case SIOCSTAT1 : + error = BCOPYOUT(ips_stats.iss_bucketlen, data, + fr_statesize * sizeof(u_long)); + if (error != 0) + error = EFAULT; + break; + + case SIOCGENITER : + { + ipftoken_t *token; + ipfgeniter_t iter; + + error = fr_inobj(data, &iter, IPFOBJ_GENITER); + if (error != 0) + break; + + SPL_SCHED(s); + token = ipf_findtoken(IPFGENITER_STATE, uid, ctx); + if (token != NULL) + error = fr_stateiter(token, &iter); + else + error = ESRCH; + RWLOCK_EXIT(&ipf_tokens); + SPL_X(s); + break; + } + + case SIOCGTABL : + error = fr_stgettable(data); + break; + + case SIOCIPFDELTOK : + error = BCOPYIN(data, (char *)&arg, sizeof(arg)); + if (error != 0) { + error = EFAULT; + } else { + SPL_SCHED(s); + error = ipf_deltoken(arg, uid, ctx); + SPL_X(s); + } + break; + + case SIOCGTQTAB : + error = fr_outobj(data, ips_tqtqb, IPFOBJ_STATETQTAB); + break; + default : error = EINVAL; break; @@ -670,7 +762,8 @@ caddr_t data; fr->fr_data = NULL; fr->fr_type = FR_T_NONE; - fr_resolvedest(&fr->fr_tif, fr->fr_v); + fr_resolvedest(&fr->fr_tifs[0], fr->fr_v); + fr_resolvedest(&fr->fr_tifs[1], fr->fr_v); fr_resolvedest(&fr->fr_dif, fr->fr_v); /* @@ -819,7 +912,6 @@ u_int flags; frentry_t *fr; tcphdr_t *tcp; grehdr_t *gre; - void *ifp; int out; if (fr_state_lock || @@ -841,7 +933,7 @@ u_int flags; */ fr = fin->fin_fr; if (fr != NULL) { - if ((ips_num == fr_statemax) && (fr->fr_statemax == 0)) { + if ((ips_num >= fr_statemax) && (fr->fr_statemax == 0)) { ATOMIC_INCL(ips_stats.iss_max); fr_state_doflush = 1; return NULL; @@ -849,7 +941,6 @@ u_int flags; if ((fr->fr_statemax != 0) && (fr->fr_statecnt >= fr->fr_statemax)) { ATOMIC_INCL(ips_stats.iss_maxref); - fr_state_doflush = 1; return NULL; } } @@ -1084,30 +1175,41 @@ u_int flags; is->is_tag = fr->fr_logtag; + /* + * The name '-' is special for network interfaces and causes + * a NULL name to be present, always, allowing packets to + * match it, regardless of their interface. + */ + if ((fin->fin_ifp == NULL) || + (fr->fr_ifnames[out << 1][0] == '-' && + fr->fr_ifnames[out << 1][1] == '\0')) { + is->is_ifp[out << 1] = fr->fr_ifas[0]; + strncpy(is->is_ifname[out << 1], fr->fr_ifnames[0], + sizeof(fr->fr_ifnames[0])); + } else { + is->is_ifp[out << 1] = fin->fin_ifp; + COPYIFNAME(fin->fin_ifp, is->is_ifname[out << 1]); + } + is->is_ifp[(out << 1) + 1] = fr->fr_ifas[1]; + strncpy(is->is_ifname[(out << 1) + 1], fr->fr_ifnames[1], + sizeof(fr->fr_ifnames[1])); + is->is_ifp[(1 - out) << 1] = fr->fr_ifas[2]; - is->is_ifp[((1 - out) << 1) + 1] = fr->fr_ifas[3]; + strncpy(is->is_ifname[((1 - out) << 1)], fr->fr_ifnames[2], + sizeof(fr->fr_ifnames[2])); - if (((ifp = fr->fr_ifas[1]) != NULL) && - (ifp != (void *)-1)) { - COPYIFNAME(ifp, is->is_ifname[(out << 1) + 1]); - } - if (((ifp = fr->fr_ifas[2]) != NULL) && - (ifp != (void *)-1)) { - COPYIFNAME(ifp, is->is_ifname[(1 - out) << 1]); - } - if (((ifp = fr->fr_ifas[3]) != NULL) && - (ifp != (void *)-1)) { - COPYIFNAME(ifp, is->is_ifname[((1 - out) << 1) + 1]); - } + is->is_ifp[((1 - out) << 1) + 1] = fr->fr_ifas[3]; + strncpy(is->is_ifname[((1 - out) << 1) + 1], fr->fr_ifnames[3], + sizeof(fr->fr_ifnames[3])); } else { pass = fr_flags; is->is_tag = FR_NOLOGTAG; - } - is->is_ifp[out << 1] = fin->fin_ifp; - if (fin->fin_ifp != NULL) { - COPYIFNAME(fin->fin_ifp, is->is_ifname[out << 1]); + if (fin->fin_ifp != NULL) { + is->is_ifp[out << 1] = fin->fin_ifp; + COPYIFNAME(fin->fin_ifp, is->is_ifname[out << 1]); + } } /* @@ -1324,6 +1426,22 @@ ipstate_t *is; tdata = &is->is_tcp.ts_data[source]; MUTEX_ENTER(&is->is_lock); + + /* + * If a SYN packet is received for a connection that is on the way out + * but hasn't yet departed then advance this session along the way. + */ + if ((tcp->th_flags & TH_OPENING) == TH_SYN) { + if ((is->is_state[0] > IPF_TCPS_ESTABLISHED) && + (is->is_state[1] > IPF_TCPS_ESTABLISHED)) { + is->is_state[!source] = IPF_TCPS_CLOSED; + fr_movequeue(&is->is_sti, is->is_sti.tqe_ifq, + &ips_deletetq); + MUTEX_ENTER(&is->is_lock); + return 0; + } + } + if (fr_tcpinwindow(fin, fdata, tdata, tcp, is->is_flags)) { #ifdef IPFILTER_SCAN if (is->is_flags & (IS_SC_CLIENT|IS_SC_SERVER)) { @@ -1356,21 +1474,16 @@ ipstate_t *is; if (flags == (TH_SYN|TH_ACK)) { is->is_s0[source] = ntohl(tcp->th_ack); is->is_s0[!source] = ntohl(tcp->th_seq) + 1; - if ((TCP_OFF(tcp) > (sizeof(tcphdr_t) >> 2)) && - (tdata->td_winflags & TCP_WSCALE_SEEN)) { + if ((TCP_OFF(tcp) > (sizeof(tcphdr_t) >> 2))) { if (fr_tcpoptions(fin, tcp, fdata) == -1) fin->fin_flx |= FI_BAD; - if (!(fdata->td_winflags & TCP_WSCALE_SEEN)) { - fdata->td_winscale = 0; - tdata->td_winscale = 0; - } } if ((fin->fin_out != 0) && (is->is_pass & FR_NEWISN)) fr_checknewisn(fin, is); } else if (flags == TH_SYN) { is->is_s0[source] = ntohl(tcp->th_seq) + 1; if ((TCP_OFF(tcp) > (sizeof(tcphdr_t) >> 2))) { - if (fr_tcpoptions(fin, tcp, tdata) == -1) + if (fr_tcpoptions(fin, tcp, fdata) == -1) fin->fin_flx |= FI_BAD; } @@ -1379,8 +1492,9 @@ ipstate_t *is; } ret = 1; - } else + } else { fin->fin_flx |= FI_OOW; + } MUTEX_EXIT(&is->is_lock); return ret; } @@ -1477,17 +1591,8 @@ int flags; * the receiver also does window scaling) */ if (!(tcpflags & TH_SYN) && (fdata->td_winflags & TCP_WSCALE_FIRST)) { - if (tdata->td_winflags & TCP_WSCALE_SEEN) { - fdata->td_winflags &= ~TCP_WSCALE_FIRST; - fdata->td_maxwin = win; - } else { - fdata->td_winscale = 0; - fdata->td_winflags &= ~(TCP_WSCALE_FIRST| - TCP_WSCALE_SEEN); - tdata->td_winscale = 0; - tdata->td_winflags &= ~(TCP_WSCALE_FIRST| - TCP_WSCALE_SEEN); - } + fdata->td_winflags &= ~TCP_WSCALE_FIRST; + fdata->td_maxwin = win; } end = seq + dsize; @@ -1530,7 +1635,7 @@ int flags; (SEQ_GE(seq, fdata->td_end - maxwin)) && /* XXX what about big packets */ #define MAXACKWINDOW 66000 - (-ackskew <= (MAXACKWINDOW << fdata->td_winscale)) && + (-ackskew <= (MAXACKWINDOW)) && ( ackskew <= (MAXACKWINDOW << fdata->td_winscale))) { inseq = 1; /* @@ -1576,6 +1681,8 @@ int flags; } } + /* TRACE(inseq, fdata, tdata, seq, end, ack, ackskew, win, maxwin) */ + if (inseq) { /* if ackskew < 0 then this should be due to fragmented * packets. There is no way to know the length of the @@ -1733,9 +1840,9 @@ u_32_t cmask; * If the interface for this 'direction' is set, make sure it matches. * An interface name that is not set matches any, as does a name of *. */ - if ((is->is_ifp[idx] == NULL && - (*is->is_ifname[idx] == '\0' || *is->is_ifname[idx] == '*')) || - is->is_ifp[idx] == ifp) + if ((is->is_ifp[idx] == ifp) || (is->is_ifp[idx] == NULL && + (*is->is_ifname[idx] == '\0' || *is->is_ifname[idx] == '-' || + *is->is_ifname[idx] == '*'))) ret = 1; if (ret == 0) @@ -2005,7 +2112,7 @@ fr_info_t *fin; # endif } #endif - bcopy((char *)fin, (char *)&ofin, sizeof(fin)); + bcopy((char *)fin, (char *)&ofin, sizeof(*fin)); /* * in the IPv4 case we must zero the i6addr union otherwise @@ -2035,7 +2142,6 @@ fr_info_t *fin; ofin.fin_ip = oip; ofin.fin_m = NULL; /* if dereferenced, panic XXX */ ofin.fin_mp = NULL; /* if dereferenced, panic XXX */ - ofin.fin_plen = fin->fin_dlen - ICMPERR_ICMPHLEN; (void) fr_makefrip(IP_HL(oip) << 2, oip, &ofin); ofin.fin_ifp = fin->fin_ifp; ofin.fin_out = !fin->fin_out; @@ -2295,6 +2401,13 @@ icmp6again: hvm = DOUBLE_HASH(hv); for (isp = &ips_table[hvm]; ((is = *isp) != NULL); ) { isp = &is->is_hnext; + /* + * If a connection is about to be deleted, no packets + * are allowed to match it. + */ + if (is->is_sti.tqe_ifq == &ips_deletetq) + continue; + if ((is->is_p != pr) || (is->is_v != v)) continue; is = fr_matchsrcdst(fin, is, &src, &dst, NULL, FI_CMP); @@ -2624,8 +2737,6 @@ matched: fin->fin_rule = is->is_rulen; pass = is->is_pass; fr_updatestate(fin, is, ifq); - if (fin->fin_out == 1) - fin->fin_nat = is->is_nat[fin->fin_rev]; fin->fin_state = is; is->is_touched = fr_ticks; @@ -2761,7 +2872,7 @@ void *ifp; /* ------------------------------------------------------------------------ */ /* Function: fr_delstate */ -/* Returns: Nil */ +/* Returns: int - 0 = entry deleted, else reference count on struct */ /* Parameters: is(I) - pointer to state structure to delete */ /* why(I) - if not 0, log reason why it was deleted */ /* Write Locks: ipf_state */ @@ -2770,7 +2881,7 @@ void *ifp; /* and timeout queue lists. Make adjustments to hash table statistics and */ /* global counters as required. */ /* ------------------------------------------------------------------------ */ -static void fr_delstate(is, why) +static int fr_delstate(is, why) ipstate_t *is; int why; { @@ -2781,16 +2892,6 @@ int why; * Since we want to delete this, remove it from the state table, * where it can be found & used, first. */ - if (is->is_pnext != NULL) { - *is->is_pnext = is->is_next; - - if (is->is_next != NULL) - is->is_next->is_pnext = is->is_pnext; - - is->is_pnext = NULL; - is->is_next = NULL; - } - if (is->is_phnext != NULL) { *is->is_phnext = is->is_hnext; if (is->is_hnext != NULL) @@ -2818,7 +2919,8 @@ int why; /* * Next, remove it from the timeout queue it is in. */ - fr_deletequeueentry(&is->is_sti); + if (is->is_sti.tqe_ifq != NULL) + fr_deletequeueentry(&is->is_sti); if (is->is_me != NULL) { *is->is_me = NULL; @@ -2827,11 +2929,15 @@ int why; /* * If it is still in use by something else, do not go any further, - * but note that at this point it is now an orphan. + * but note that at this point it is now an orphan. How can this + * be? fr_state_flush() calls fr_delete() directly because it wants + * to empty the table out and if something has a hold on a state + * entry (such as ipfstat), it'll do the deref path that'll bring + * us back here to do the real delete & free. */ is->is_ref--; if (is->is_ref > 0) - return; + return is->is_ref; if (is->is_tqehead[0] != NULL) { if (fr_deletetimeoutqueue(is->is_tqehead[0]) == 0) @@ -2850,6 +2956,19 @@ int why; (void) ipsc_detachis(is); #endif + /* + * Now remove it from the linked list of known states + */ + if (is->is_pnext != NULL) { + *is->is_pnext = is->is_next; + + if (is->is_next != NULL) + is->is_next->is_pnext = is->is_pnext; + + is->is_pnext = NULL; + is->is_next = NULL; + } + if (ipstate_logging != 0 && why != 0) ipstate_log(is, why); @@ -2860,12 +2979,14 @@ int why; if (is->is_rule != NULL) { is->is_rule->fr_statecnt--; - (void)fr_derefrule(&is->is_rule); + (void) fr_derefrule(&is->is_rule); } MUTEX_DESTROY(&is->is_lock); KFREE(is); ips_num--; + + return 0; } @@ -2951,42 +3072,115 @@ int which, proto; ipftq_t *ifq, *ifqnext; ipftqent_t *tqe, *tqn; ipstate_t *is, **isp; - int delete, removed; - long try, maxtick; - u_long interval; + int removed; SPL_INT(s); removed = 0; SPL_NET(s); - for (isp = &ips_list; ((is = *isp) != NULL); ) { - delete = 0; - if ((proto != 0) && (is->is_v != proto)) { - isp = &is->is_next; - continue; + switch (which) + { + case 0 : + /* + * Style 0 flush removes everything... + */ + for (isp = &ips_list; ((is = *isp) != NULL); ) { + if ((proto != 0) && (is->is_v != proto)) { + isp = &is->is_next; + continue; + } + if (fr_delstate(is, ISL_FLUSH) == 0) + removed++; + else + isp = &is->is_next; } + break; - switch (which) - { - case 0 : - delete = 1; - break; - case 1 : - case 2 : - if (is->is_p != IPPROTO_TCP) - break; - if ((is->is_state[0] != IPF_TCPS_ESTABLISHED) || - (is->is_state[1] != IPF_TCPS_ESTABLISHED)) - delete = 1; - break; + case 1 : + /* + * Since we're only interested in things that are closing, + * we can start with the appropriate timeout queue. + */ + for (ifq = ips_tqtqb + IPF_TCPS_CLOSE_WAIT; ifq != NULL; + ifq = ifq->ifq_next) { + + for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); ) { + tqn = tqe->tqe_next; + is = tqe->tqe_parent; + if (is->is_p != IPPROTO_TCP) + break; + if (fr_delstate(is, ISL_EXPIRE) == 0) + removed++; + } + } + + /* + * Also need to look through the user defined queues. + */ + for (ifq = ips_utqe; ifq != NULL; ifq = ifqnext) { + ifqnext = ifq->ifq_next; + for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); ) { + tqn = tqe->tqe_next; + is = tqe->tqe_parent; + if (is->is_p != IPPROTO_TCP) + continue; + + if ((is->is_state[0] > IPF_TCPS_ESTABLISHED) && + (is->is_state[1] > IPF_TCPS_ESTABLISHED)) { + if (fr_delstate(is, ISL_EXPIRE) == 0) + removed++; + } + } } + break; - if (delete) { - fr_delstate(is, ISL_FLUSH); - removed++; - } else + case 2 : + break; + + /* + * Args 5-11 correspond to flushing those particular states + * for TCP connections. + */ + case IPF_TCPS_CLOSE_WAIT : + case IPF_TCPS_FIN_WAIT_1 : + case IPF_TCPS_CLOSING : + case IPF_TCPS_LAST_ACK : + case IPF_TCPS_FIN_WAIT_2 : + case IPF_TCPS_TIME_WAIT : + case IPF_TCPS_CLOSED : + tqn = ips_tqtqb[which].ifq_head; + while (tqn != NULL) { + tqe = tqn; + tqn = tqe->tqe_next; + is = tqe->tqe_parent; + if (fr_delstate(is, ISL_FLUSH) == 0) + removed++; + } + break; + + default : + if (which < 30) + break; + + /* + * Take a large arbitrary number to mean the number of seconds + * for which which consider to be the maximum value we'll allow + * the expiration to be. + */ + which = IPF_TTLVAL(which); + for (isp = &ips_list; ((is = *isp) != NULL); ) { + if ((proto == 0) || (is->is_v == proto)) { + if (fr_ticks - is->is_touched > which) { + if (fr_delstate(is, ISL_FLUSH) == 0) { + removed++; + continue; + } + } + } isp = &is->is_next; + } + break; } if (which != 2) { @@ -2995,83 +3189,35 @@ int which, proto; } /* - * Asked to remove inactive entries because the table is full, try - * again, 3 times, if first attempt failed with a different criteria - * each time. The order tried in must be in decreasing age. - * Another alternative is to implement random drop and drop N entries - * at random until N have been freed up. + * Asked to remove inactive entries because the table is full. */ - if (fr_ticks - ips_last_force_flush < IPF_TTLVAL(5)) - goto force_flush_skipped; - ips_last_force_flush = fr_ticks; - - if (fr_ticks > IPF_TTLVAL(43200)) - interval = IPF_TTLVAL(43200); - else if (fr_ticks > IPF_TTLVAL(1800)) - interval = IPF_TTLVAL(1800); - else if (fr_ticks > IPF_TTLVAL(30)) - interval = IPF_TTLVAL(30); - else - interval = IPF_TTLVAL(10); - try = fr_ticks - (fr_ticks - interval); - if (try < 0) - goto force_flush_skipped; - - while (removed == 0) { - maxtick = fr_ticks - interval; - if (maxtick < 0) - break; - - while (try < maxtick) { - for (ifq = ips_tqtqb; ifq != NULL; - ifq = ifq->ifq_next) { - for (tqn = ifq->ifq_head; - ((tqe = tqn) != NULL); ) { - if (tqe->tqe_die > try) - break; - tqn = tqe->tqe_next; - is = tqe->tqe_parent; - fr_delstate(is, ISL_EXPIRE); - removed++; - } - } - - for (ifq = ips_utqe; ifq != NULL; ifq = ifqnext) { - ifqnext = ifq->ifq_next; - - for (tqn = ifq->ifq_head; - ((tqe = tqn) != NULL); ) { - if (tqe->tqe_die > try) - break; - tqn = tqe->tqe_next; - is = tqe->tqe_parent; - fr_delstate(is, ISL_EXPIRE); - removed++; - } - } - if (try + interval > maxtick) - break; - try += interval; - } - - if (removed == 0) { - if (interval == IPF_TTLVAL(43200)) { - interval = IPF_TTLVAL(1800); - } else if (interval == IPF_TTLVAL(1800)) { - interval = IPF_TTLVAL(30); - } else if (interval == IPF_TTLVAL(30)) { - interval = IPF_TTLVAL(10); - } else { - break; - } - } + if (fr_ticks - ips_last_force_flush > IPF_TTLVAL(5)) { + ips_last_force_flush = fr_ticks; + removed = ipf_queueflush(fr_state_flush_entry, ips_tqtqb, + ips_utqe); } -force_flush_skipped: + SPL_X(s); return removed; } +/* ------------------------------------------------------------------------ */ +/* Function: fr_state_flush_entry */ +/* Returns: int - 0 = entry deleted, else not deleted */ +/* Parameters: entry(I) - pointer to state structure to delete */ +/* Write Locks: ipf_state */ +/* */ +/* This function is a stepping stone between ipf_queueflush() and */ +/* fr_delstate(). It is used so we can provide a uniform interface via the */ +/* ipf_queueflush() function. */ +/* ------------------------------------------------------------------------ */ +static int fr_state_flush_entry(entry) +void *entry; +{ + return fr_delstate(entry, ISL_FLUSH); +} + /* ------------------------------------------------------------------------ */ /* Function: fr_tcp_age */ @@ -3101,6 +3247,25 @@ force_flush_skipped: /* dir == 0 : a packet from source to dest */ /* dir == 1 : a packet from dest to source */ /* */ +/* A typical procession for a connection is as follows: */ +/* */ +/* +--------------+-------------------+ */ +/* | Side '0' | Side '1' | */ +/* +--------------+-------------------+ */ +/* | 0 -> 1 (SYN) | | */ +/* | | 0 -> 2 (SYN-ACK) | */ +/* | 1 -> 3 (ACK) | | */ +/* | | 2 -> 4 (ACK-PUSH) | */ +/* | 3 -> 4 (ACK) | | */ +/* | ... | ... | */ +/* | | 4 -> 6 (FIN-ACK) | */ +/* | 4 -> 5 (ACK) | | */ +/* | | 6 -> 6 (ACK-PUSH) | */ +/* | 5 -> 5 (ACK) | | */ +/* | 5 -> 8 (FIN) | | */ +/* | | 6 -> 10 (ACK) | */ +/* +--------------+-------------------+ */ +/* */ /* Locking: it is assumed that the parent of the tqe structure is locked. */ /* ------------------------------------------------------------------------ */ int fr_tcp_age(tqe, fin, tqtab, flags) @@ -3132,16 +3297,16 @@ int flags; switch (nstate) { - case IPF_TCPS_CLOSED: /* 0 */ + case IPF_TCPS_LISTEN: /* 0 */ if ((tcpflags & TH_OPENING) == TH_OPENING) { /* * 'dir' received an S and sends SA in - * response, CLOSED -> SYN_RECEIVED + * response, LISTEN -> SYN_RECEIVED */ nstate = IPF_TCPS_SYN_RECEIVED; rval = 1; } else if ((tcpflags & TH_OPENING) == TH_SYN) { - /* 'dir' sent S, CLOSED -> SYN_SENT */ + /* 'dir' sent S, LISTEN -> SYN_SENT */ nstate = IPF_TCPS_SYN_SENT; rval = 1; } @@ -3160,7 +3325,7 @@ int flags; */ switch (ostate) { - case IPF_TCPS_CLOSED : + case IPF_TCPS_LISTEN : case IPF_TCPS_SYN_RECEIVED : nstate = IPF_TCPS_HALF_ESTAB; rval = 1; @@ -3181,11 +3346,7 @@ int flags; */ break; - case IPF_TCPS_LISTEN: /* 1 */ - /* NOT USED */ - break; - - case IPF_TCPS_SYN_SENT: /* 2 */ + case IPF_TCPS_SYN_SENT: /* 1 */ if ((tcpflags & ~(TH_ECN|TH_CWR)) == TH_SYN) { /* * A retransmitted SYN packet. We do not reset @@ -3226,7 +3387,7 @@ int flags; } break; - case IPF_TCPS_SYN_RECEIVED: /* 3 */ + case IPF_TCPS_SYN_RECEIVED: /* 2 */ if ((tcpflags & (TH_SYN|TH_FIN|TH_ACK)) == TH_ACK) { /* * we see an A from 'dir' which was in @@ -3255,7 +3416,7 @@ int flags; } break; - case IPF_TCPS_HALF_ESTAB: /* 4 */ + case IPF_TCPS_HALF_ESTAB: /* 3 */ if (tcpflags & TH_FIN) { nstate = IPF_TCPS_FIN_WAIT_1; rval = 1; @@ -3270,7 +3431,7 @@ int flags; */ switch (ostate) { - case IPF_TCPS_CLOSED : + case IPF_TCPS_LISTEN : case IPF_TCPS_SYN_SENT : case IPF_TCPS_SYN_RECEIVED : rval = 1; @@ -3286,7 +3447,7 @@ int flags; } break; - case IPF_TCPS_ESTABLISHED: /* 5 */ + case IPF_TCPS_ESTABLISHED: /* 4 */ rval = 1; if (tcpflags & TH_FIN) { /* @@ -3294,7 +3455,11 @@ int flags; * this gives us a half-closed connection; * ESTABLISHED -> FIN_WAIT_1 */ - nstate = IPF_TCPS_FIN_WAIT_1; + if (ostate == IPF_TCPS_FIN_WAIT_1) { + nstate = IPF_TCPS_CLOSING; + } else { + nstate = IPF_TCPS_FIN_WAIT_1; + } } else if (tcpflags & TH_ACK) { /* * an ACK, should we exclude other flags here? @@ -3319,7 +3484,7 @@ int flags; } break; - case IPF_TCPS_CLOSE_WAIT: /* 6 */ + case IPF_TCPS_CLOSE_WAIT: /* 5 */ rval = 1; if (tcpflags & TH_FIN) { /* @@ -3337,7 +3502,7 @@ int flags; } break; - case IPF_TCPS_FIN_WAIT_1: /* 7 */ + case IPF_TCPS_FIN_WAIT_1: /* 6 */ rval = 1; if ((tcpflags & TH_ACK) && ostate > IPF_TCPS_CLOSE_WAIT) { @@ -3365,11 +3530,14 @@ int flags; } break; - case IPF_TCPS_CLOSING: /* 8 */ - /* NOT USED */ + case IPF_TCPS_CLOSING: /* 7 */ + if ((tcpflags & (TH_FIN|TH_ACK)) == TH_ACK) { + nstate = IPF_TCPS_TIME_WAIT; + } + rval = 1; break; - case IPF_TCPS_LAST_ACK: /* 9 */ + case IPF_TCPS_LAST_ACK: /* 8 */ if (tcpflags & TH_ACK) { if ((tcpflags & TH_PUSH) || dlen) /* @@ -3388,17 +3556,30 @@ int flags; */ break; - case IPF_TCPS_FIN_WAIT_2: /* 10 */ + case IPF_TCPS_FIN_WAIT_2: /* 9 */ + /* NOT USED */ +#if 0 rval = 1; - if ((tcpflags & TH_OPENING) == TH_OPENING) + if ((tcpflags & TH_OPENING) == TH_OPENING) { nstate = IPF_TCPS_SYN_RECEIVED; - else if (tcpflags & TH_SYN) + } else if (tcpflags & TH_SYN) { nstate = IPF_TCPS_SYN_SENT; + } else if ((tcpflags & (TH_FIN|TH_ACK)) != 0) { + nstate = IPF_TCPS_TIME_WAIT; + } +#endif break; - case IPF_TCPS_TIME_WAIT: /* 11 */ + case IPF_TCPS_TIME_WAIT: /* 10 */ /* we're in 2MSL timeout now */ - rval = 1; + rval = 2; + if (ostate == IPF_TCPS_LAST_ACK) { + nstate = IPF_TCPS_CLOSED; + } + break; + + case IPF_TCPS_CLOSED: /* 11 */ + rval = 2; break; default : @@ -3551,7 +3732,7 @@ fr_info_t *fin; if (fin->fin_plen < sizeof(*oip6)) return NULL; - bcopy((char *)fin, (char *)&ofin, sizeof(fin)); + bcopy((char *)fin, (char *)&ofin, sizeof(*fin)); ofin.fin_v = 6; ofin.fin_ifp = fin->fin_ifp; ofin.fin_out = !fin->fin_out; @@ -3572,7 +3753,6 @@ fr_info_t *fin; oip6->ip6_plen = fin->fin_dlen - ICMPERR_ICMPHLEN; ofin.fin_flx = FI_NOCKSUM; ofin.fin_ip = (ip_t *)oip6; - ofin.fin_plen = oip6->ip6_plen; (void) fr_makefrip(sizeof(*oip6), (ip_t *)oip6, &ofin); ofin.fin_flx &= ~(FI_BAD|FI_SHORT); oip6->ip6_plen = savelen; @@ -3719,7 +3899,7 @@ ipftq_t *tqp; tqp[IPF_TCPS_CLOSING].ifq_ttl = fr_tcptimeout; tqp[IPF_TCPS_LAST_ACK].ifq_ttl = fr_tcplastack; tqp[IPF_TCPS_FIN_WAIT_2].ifq_ttl = fr_tcpclosewait; - tqp[IPF_TCPS_TIME_WAIT].ifq_ttl = fr_tcptimeout; + tqp[IPF_TCPS_TIME_WAIT].ifq_ttl = fr_tcptimewait; tqp[IPF_TCPS_HALF_ESTAB].ifq_ttl = fr_tcptimeout; } @@ -3765,40 +3945,11 @@ ipftq_t *tqp; /* dir == 0 : a packet from source to dest */ /* dir == 1 : a packet from dest to source */ /* ------------------------------------------------------------------------ */ -void fr_statederef(fin, isp) -fr_info_t *fin; +void fr_statederef(isp) ipstate_t **isp; { - ipstate_t *is = *isp; -#if 0 - int nstate, ostate, dir, eol; - - eol = 0; /* End-of-the-line flag. */ - dir = fin->fin_rev; - ostate = is->is_state[1 - dir]; - nstate = is->is_state[dir]; - /* - * Determine whether this packet is local or routed. State entries - * with us as the destination will have an interface list of - * int1,-,-,int1. Entries with us as the origin run as -,int1,int1,-. - */ - if ((fin->fin_p == IPPROTO_TCP) && (fin->fin_out == 0)) { - if ((strcmp(is->is_ifname[0], is->is_ifname[3]) == 0) && - (strcmp(is->is_ifname[1], is->is_ifname[2]) == 0)) { - if ((dir == 0) && - (strcmp(is->is_ifname[1], "-") == 0) && - (strcmp(is->is_ifname[0], "-") != 0)) { - eol = 1; - } else if ((dir == 1) && - (strcmp(is->is_ifname[0], "-") == 0) && - (strcmp(is->is_ifname[1], "-") != 0)) { - eol = 1; - } - } - } -#endif + ipstate_t *is; - fin = fin; /* LINT */ is = *isp; *isp = NULL; WRITE_ENTER(&ipf_state); @@ -3807,15 +3958,8 @@ ipstate_t **isp; is->is_ref++; /* To counter ref-- in fr_delstate() */ fr_delstate(is, ISL_EXPIRE); #ifndef _KERNEL -#if 0 - } else if (((fin->fin_out == 1) || (eol == 1)) && - ((ostate == IPF_TCPS_LAST_ACK) && - (nstate == IPF_TCPS_TIME_WAIT))) { - ; -#else } else if ((is->is_sti.tqe_state[0] > IPF_TCPS_ESTABLISHED) || (is->is_sti.tqe_state[1] > IPF_TCPS_ESTABLISHED)) { -#endif fr_delstate(is, ISL_ORPHAN); #endif } @@ -3890,3 +4034,120 @@ int rev; fr_queueappend(&is->is_sti, nifq, is); return; } + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_stateiter */ +/* Returns: int - 0 == success, else error */ +/* Parameters: token(I) - pointer to ipftoken structure */ +/* itp(I) - pointer to ipfgeniter structure */ +/* */ +/* This function handles the SIOCGENITER ioctl for the state tables and */ +/* walks through the list of entries in the state table list (ips_list.) */ +/* ------------------------------------------------------------------------ */ +static int fr_stateiter(token, itp) +ipftoken_t *token; +ipfgeniter_t *itp; +{ + ipstate_t *is, *next, zero; + int error, count; + char *dst; + + if (itp->igi_data == NULL) + return EFAULT; + + if (itp->igi_nitems == 0) + return ENOSPC; + + if (itp->igi_type != IPFGENITER_STATE) + return EINVAL; + + is = token->ipt_data; + if (is == (void *)-1) { + ipf_freetoken(token); + return ESRCH; + } + + error = 0; + dst = itp->igi_data; + + READ_ENTER(&ipf_state); + if (is == NULL) { + next = ips_list; + } else { + next = is->is_next; + } + + for (count = itp->igi_nitems; count > 0; count--) { + if (next != NULL) { + /* + * If we find a state entry to use, bump its + * reference count so that it can be used for + * is_next when we come back. + */ + MUTEX_ENTER(&next->is_lock); + next->is_ref++; + MUTEX_EXIT(&next->is_lock); + token->ipt_data = next; + } else { + bzero(&zero, sizeof(zero)); + next = &zero; + token->ipt_data = (void *)-1; + count = 1; + } + RWLOCK_EXIT(&ipf_state); + + /* + * If we had a prior pointer to a state entry, release it. + */ + if (is != NULL) { + fr_statederef(&is); + } + + /* + * This should arguably be via fr_outobj() so that the state + * structure can (if required) be massaged going out. + */ + error = COPYOUT(next, dst, sizeof(*next)); + if (error != 0) + error = EFAULT; + if ((count == 1) || (error != 0)) + break; + + dst += sizeof(*next); + READ_ENTER(&ipf_state); + is = next; + next = is->is_next; + } + + return error; +} + + +/* ------------------------------------------------------------------------ */ +/* Function: fr_stgettable */ +/* Returns: int - 0 = success, else error */ +/* Parameters: data(I) - pointer to ioctl data */ +/* */ +/* This function handles ioctl requests for tables of state information. */ +/* At present the only table it deals with is the hash bucket statistics. */ +/* ------------------------------------------------------------------------ */ +static int fr_stgettable(data) +char *data; +{ + ipftable_t table; + int error; + + error = fr_inobj(data, &table, IPFOBJ_GTABLE); + if (error != 0) + return error; + + if (table.ita_type != IPFTABLE_BUCKETS) + return EINVAL; + + error = COPYOUT(ips_stats.iss_bucketlen, table.ita_table, + fr_statesize * sizeof(u_long)); + if (error != 0) + error = EFAULT; + return error; +} |