diff options
author | darrenr <darrenr@FreeBSD.org> | 2007-10-18 21:52:14 +0000 |
---|---|---|
committer | darrenr <darrenr@FreeBSD.org> | 2007-10-18 21:52:14 +0000 |
commit | fd172ed3272b523c5499832d7098b6766bac7e4f (patch) | |
tree | 7eb0ed562f560c2289c5b113e742797727d126db /sys/contrib/ipfilter/netinet/ip_state.c | |
parent | 6f755e940898e80d77f95031600e671c36e0a7a6 (diff) | |
download | FreeBSD-src-fd172ed3272b523c5499832d7098b6766bac7e4f.zip FreeBSD-src-fd172ed3272b523c5499832d7098b6766bac7e4f.tar.gz |
Pullup IPFilter 4.1.28 from the vendor branch into HEAD.
MFC after: 7 days
Diffstat (limited to 'sys/contrib/ipfilter/netinet/ip_state.c')
-rw-r--r-- | sys/contrib/ipfilter/netinet/ip_state.c | 172 |
1 files changed, 108 insertions, 64 deletions
diff --git a/sys/contrib/ipfilter/netinet/ip_state.c b/sys/contrib/ipfilter/netinet/ip_state.c index 39a094a..cfb7974 100644 --- a/sys/contrib/ipfilter/netinet/ip_state.c +++ b/sys/contrib/ipfilter/netinet/ip_state.c @@ -113,7 +113,7 @@ struct file; #if !defined(lint) static const char sccsid[] = "@(#)ip_state.c 1.8 6/5/96 (C) 1993-2000 Darren Reed"; -static const char rcsid[] = "@(#)$Id: ip_state.c,v 2.186.2.69 2007/05/26 13:05:14 darrenr Exp $"; +static const char rcsid[] = "@(#)$Id: ip_state.c,v 2.186.2.80 2007/10/16 09:33:23 darrenr Exp $"; #endif static ipstate_t **ips_table = NULL; @@ -550,7 +550,7 @@ void *ctx; if (!(mode & FWRITE)) { error = EPERM; } else { - fr_lock(data, &fr_state_lock); + error = fr_lock(data, &fr_state_lock); } break; @@ -652,8 +652,8 @@ caddr_t data; int error; error = fr_inobj(data, &ips, IPFOBJ_STATESAVE); - if (error) - return EFAULT; + if (error != 0) + return error; isn = ips.ips_next; if (isn == NULL) { @@ -682,9 +682,7 @@ caddr_t data; bcopy((char *)isn->is_rule, (char *)&ips.ips_fr, sizeof(ips.ips_fr)); error = fr_outobj(data, &ips, IPFOBJ_STATESAVE); - if (error) - return EFAULT; - return 0; + return error; } @@ -993,6 +991,16 @@ u_int flags; hv += is->is_src.i6[3]; } #endif + if ((fin->fin_v == 4) && + (fin->fin_flx & (FI_MULTICAST|FI_BROADCAST|FI_MBCAST))) { + if (fin->fin_out == 0) { + flags |= SI_W_DADDR|SI_CLONE; + hv -= is->is_daddr; + } else { + flags |= SI_W_SADDR|SI_CLONE; + hv -= is->is_saddr; + } + } switch (is->is_p) { @@ -1190,7 +1198,8 @@ u_int flags; sizeof(fr->fr_ifnames[0])); } else { is->is_ifp[out << 1] = fin->fin_ifp; - COPYIFNAME(fin->fin_ifp, is->is_ifname[out << 1]); + COPYIFNAME(is->is_v, fin->fin_ifp, + is->is_ifname[out << 1]); } is->is_ifp[(out << 1) + 1] = fr->fr_ifas[1]; @@ -1210,7 +1219,8 @@ u_int flags; if (fin->fin_ifp != NULL) { is->is_ifp[out << 1] = fin->fin_ifp; - COPYIFNAME(fin->fin_ifp, is->is_ifname[out << 1]); + COPYIFNAME(is->is_v, fin->fin_ifp, + is->is_ifname[out << 1]); } } @@ -1439,12 +1449,13 @@ ipstate_t *is; is->is_state[!source] = IPF_TCPS_CLOSED; fr_movequeue(&is->is_sti, is->is_sti.tqe_ifq, &ips_deletetq); - MUTEX_ENTER(&is->is_lock); + MUTEX_EXIT(&is->is_lock); return 0; } } - if (fr_tcpinwindow(fin, fdata, tdata, tcp, is->is_flags)) { + ret = fr_tcpinwindow(fin, fdata, tdata, tcp, is->is_flags); + if (ret > 0) { #ifdef IPFILTER_SCAN if (is->is_flags & (IS_SC_CLIENT|IS_SC_SERVER)) { ipsc_packet(fin, is); @@ -1540,7 +1551,8 @@ ipstate_t *is; /* ------------------------------------------------------------------------ */ /* Function: fr_tcpinwindow */ -/* Returns: int - 1 == packet inside TCP "window", 0 == not inside. */ +/* Returns: int - 1 == packet inside TCP "window", 0 == not inside, */ +/* 2 == packet seq number matches next expected */ /* Parameters: fin(I) - pointer to packet information */ /* fdata(I) - pointer to tcp state informatio (forward) */ /* tdata(I) - pointer to tcp state informatio (reverse) */ @@ -1624,14 +1636,12 @@ int flags; /* * Strict sequencing only allows in-order delivery. */ - if ((flags & IS_STRICT) != 0) { - if (seq != fdata->td_end) { + if (seq != fdata->td_end) { + if ((flags & IS_STRICT) != 0) { return 0; } } -#define SEQ_GE(a,b) ((int)((a) - (b)) >= 0) -#define SEQ_GT(a,b) ((int)((a) - (b)) > 0) inseq = 0; if ((SEQ_GE(fdata->td_maxend, end)) && (SEQ_GE(seq, fdata->td_end - maxwin)) && @@ -2028,7 +2038,7 @@ u_32_t cmask; if (is->is_ifp[idx] == NULL && (*is->is_ifname[idx] == '\0' || *is->is_ifname[idx] == '*')) { is->is_ifp[idx] = ifp; - COPYIFNAME(ifp, is->is_ifname[idx]); + COPYIFNAME(is->is_v, ifp, is->is_ifname[idx]); } fin->fin_rev = rev; return is; @@ -2299,8 +2309,6 @@ u_int hv; ipstate_t **isp; u_int hvm; - ASSERT(rw_read_locked(&ipf_state.ipf_lk) == 0); - hvm = is->is_hv; /* * Remove the hash from the old location... @@ -2383,6 +2391,14 @@ ipftq_t **ifqp; } } #endif + if ((v == 4) && + (fin->fin_flx & (FI_MULTICAST|FI_BROADCAST|FI_MBCAST))) { + if (fin->fin_out == 0) { + hv -= src.in4.s_addr; + } else { + hv -= dst.in4.s_addr; + } + } /* * Search the hash table for matching packet header info. @@ -2531,12 +2547,31 @@ retry_tcpudp: } RWLOCK_EXIT(&ipf_state); - if (!tryagain && ips_stats.iss_wild) { - hv -= dport; - hv -= sport; - tryagain = 1; - WRITE_ENTER(&ipf_state); - goto retry_tcpudp; + if (ips_stats.iss_wild) { + if (tryagain == 0) { + hv -= dport; + hv -= sport; + } else if (tryagain == 1) { + hv = fin->fin_fi.fi_p; + /* + * If we try to pretend this is a reply to a + * multicast/broadcast packet then we need to + * exclude part of the address from the hash + * calculation. + */ + if (fin->fin_out == 0) { + hv += src.in4.s_addr; + } else { + hv += dst.in4.s_addr; + } + hv += dport; + hv += sport; + } + tryagain++; + if (tryagain <= 2) { + WRITE_ENTER(&ipf_state); + goto retry_tcpudp; + } } fin->fin_flx |= oow; break; @@ -2888,8 +2923,6 @@ ipstate_t *is; int why; { - ASSERT(rw_read_locked(&ipf_state.ipf_lk) == 0); - /* * Since we want to delete this, remove it from the state table, * where it can be found & used, first. @@ -2937,9 +2970,15 @@ int why; * entry (such as ipfstat), it'll do the deref path that'll bring * us back here to do the real delete & free. */ - is->is_ref--; - if (is->is_ref > 0) + MUTEX_ENTER(&is->is_lock); + if (is->is_ref > 1) { + is->is_ref--; + MUTEX_EXIT(&is->is_lock); return is->is_ref; + } + MUTEX_EXIT(&is->is_lock); + + is->is_ref = 0; if (is->is_tqehead[0] != NULL) { if (fr_deletetimeoutqueue(is->is_tqehead[0]) == 0) @@ -3536,7 +3575,7 @@ int flags; if ((tcpflags & (TH_FIN|TH_ACK)) == TH_ACK) { nstate = IPF_TCPS_TIME_WAIT; } - rval = 1; + rval = 2; break; case IPF_TCPS_LAST_ACK: /* 8 */ @@ -3560,24 +3599,14 @@ int flags; case IPF_TCPS_FIN_WAIT_2: /* 9 */ /* NOT USED */ -#if 0 - rval = 1; - if ((tcpflags & TH_OPENING) == TH_OPENING) { - nstate = IPF_TCPS_SYN_RECEIVED; - } else if (tcpflags & TH_SYN) { - nstate = IPF_TCPS_SYN_SENT; - } else if ((tcpflags & (TH_FIN|TH_ACK)) != 0) { - nstate = IPF_TCPS_TIME_WAIT; - } -#endif break; case IPF_TCPS_TIME_WAIT: /* 10 */ /* we're in 2MSL timeout now */ - rval = 2; if (ostate == IPF_TCPS_LAST_ACK) { nstate = IPF_TCPS_CLOSED; } + rval = 1; break; case IPF_TCPS_CLOSED: /* 11 */ @@ -3932,6 +3961,14 @@ ipftq_t *tqp; /* Decrement the reference counter for this state table entry and free it */ /* if there are no more things using it. */ /* */ +/* This function is only called when cleaning up after increasing is_ref by */ +/* one earlier in the 'code path' so if is_ref is 1 when entering, we do */ +/* have an orphan, otherwise not. However there is a possible race between */ +/* the entry being deleted via flushing with an ioctl call (that calls the */ +/* delete function directly) and the tail end of packet processing so we */ +/* need to grab is_lock before doing the check to synchronise the two code */ +/* paths. */ +/* */ /* When operating in userland (ipftest), we have no timers to clear a state */ /* entry. Therefore, we make a few simple tests before deleting an entry */ /* outright. We compare states on each side looking for a combination of */ @@ -3954,17 +3991,23 @@ ipstate_t **isp; is = *isp; *isp = NULL; - WRITE_ENTER(&ipf_state); - is->is_ref--; - if (is->is_ref == 0) { - is->is_ref++; /* To counter ref-- in fr_delstate() */ - fr_delstate(is, ISL_EXPIRE); + + MUTEX_ENTER(&is->is_lock); + if (is->is_ref > 1) { + is->is_ref--; + MUTEX_EXIT(&is->is_lock); #ifndef _KERNEL - } else if ((is->is_sti.tqe_state[0] > IPF_TCPS_ESTABLISHED) || + if ((is->is_sti.tqe_state[0] > IPF_TCPS_ESTABLISHED) || (is->is_sti.tqe_state[1] > IPF_TCPS_ESTABLISHED)) { - fr_delstate(is, ISL_ORPHAN); + fr_delstate(is, ISL_ORPHAN); + } #endif + return; } + MUTEX_EXIT(&is->is_lock); + + WRITE_ENTER(&ipf_state); + fr_delstate(is, ISL_EXPIRE); RWLOCK_EXIT(&ipf_state); } @@ -4058,7 +4101,7 @@ ipfgeniter_t *itp; if (itp->igi_data == NULL) return EFAULT; - if (itp->igi_nitems == 0) + if (itp->igi_nitems < 1) return ENOSPC; if (itp->igi_type != IPFGENITER_STATE) @@ -4080,33 +4123,29 @@ ipfgeniter_t *itp; next = is->is_next; } - for (count = itp->igi_nitems; count > 0; count--) { + count = itp->igi_nitems; + for (;;) { if (next != NULL) { /* * If we find a state entry to use, bump its * reference count so that it can be used for * is_next when we come back. */ - MUTEX_ENTER(&next->is_lock); - next->is_ref++; - MUTEX_EXIT(&next->is_lock); - token->ipt_data = next; + if (count == 1) { + MUTEX_ENTER(&next->is_lock); + next->is_ref++; + MUTEX_EXIT(&next->is_lock); + token->ipt_data = next; + } } else { bzero(&zero, sizeof(zero)); next = &zero; - token->ipt_data = (void *)-1; count = 1; + token->ipt_data = NULL; } RWLOCK_EXIT(&ipf_state); /* - * If we had a prior pointer to a state entry, release it. - */ - if (is != NULL) { - fr_statederef(&is); - } - - /* * This should arguably be via fr_outobj() so that the state * structure can (if required) be massaged going out. */ @@ -4117,9 +4156,14 @@ ipfgeniter_t *itp; break; dst += sizeof(*next); + count--; + READ_ENTER(&ipf_state); - is = next; - next = is->is_next; + next = next->is_next; + } + + if (is != NULL) { + fr_statederef(&is); } return error; |