diff options
author | guido <guido@FreeBSD.org> | 2005-12-30 11:22:11 +0000 |
---|---|---|
committer | guido <guido@FreeBSD.org> | 2005-12-30 11:22:11 +0000 |
commit | 74f74123f3fdf4235d201dc8b4227d9d11d7dce5 (patch) | |
tree | 658db597994c3e9227c2101b920c16b0f10bfdfc /sys/contrib/ipfilter/netinet/ip_state.c | |
parent | 28f64fe3cea04b47311e7db06814b6b9be539040 (diff) | |
download | FreeBSD-src-74f74123f3fdf4235d201dc8b4227d9d11d7dce5.zip FreeBSD-src-74f74123f3fdf4235d201dc8b4227d9d11d7dce5.tar.gz |
Import IP Filter version 4.1.10
Diffstat (limited to 'sys/contrib/ipfilter/netinet/ip_state.c')
-rw-r--r-- | sys/contrib/ipfilter/netinet/ip_state.c | 256 |
1 files changed, 161 insertions, 95 deletions
diff --git a/sys/contrib/ipfilter/netinet/ip_state.c b/sys/contrib/ipfilter/netinet/ip_state.c index 7c1b311..b0bf742 100644 --- a/sys/contrib/ipfilter/netinet/ip_state.c +++ b/sys/contrib/ipfilter/netinet/ip_state.c @@ -1,5 +1,3 @@ -/* $FreeBSD$ */ - /* * Copyright (C) 1995-2003 by Darren Reed. * @@ -109,7 +107,7 @@ struct file; #if !defined(lint) static const char sccsid[] = "@(#)ip_state.c 1.8 6/5/96 (C) 1993-2000 Darren Reed"; -static const char rcsid[] = "@(#)Id: ip_state.c,v 2.186.2.29 2005/03/28 10:47:54 darrenr Exp"; +static const char rcsid[] = "@(#)$Id: ip_state.c,v 2.186.2.36 2005/12/04 22:25:36 darrenr Exp $"; #endif static ipstate_t **ips_table = NULL; @@ -507,13 +505,17 @@ int mode; * means no packets match). */ case SIOCSTLCK : - fr_lock(data, &fr_state_lock); + if (!(mode & FWRITE)) { + error = EPERM; + } else { + fr_lock(data, &fr_state_lock); + } break; /* * Add an entry to the current state table. */ case SIOCSTPUT : - if (!fr_state_lock) { + if (!fr_state_lock || !(mode &FWRITE)) { error = EACCES; break; } @@ -635,6 +637,7 @@ caddr_t data; if (fr == NULL) { READ_ENTER(&ipf_state); fr_stinsert(isn, 0); + MUTEX_EXIT(&isn->is_lock); RWLOCK_EXIT(&ipf_state); return 0; } @@ -682,6 +685,7 @@ caddr_t data; } READ_ENTER(&ipf_state); fr_stinsert(isn, 0); + MUTEX_EXIT(&isn->is_lock); RWLOCK_EXIT(&ipf_state); } else { @@ -689,6 +693,7 @@ caddr_t data; for (is = ips_list; is; is = is->is_next) if (is->is_rule == fr) { fr_stinsert(isn, 0); + MUTEX_EXIT(&isn->is_lock); break; } @@ -716,6 +721,7 @@ caddr_t data; /* to pointers and adjusts running stats for the hash table as appropriate. */ /* */ /* Locking: it is assumed that some kind of lock on ipf_state is held. */ +/* Exits with is_lock initialised and held. */ /* ------------------------------------------------------------------------ */ void fr_stinsert(is, rev) ipstate_t *is; @@ -780,7 +786,6 @@ int rev; MUTEX_EXIT(&ipf_stinsert); fr_setstatequeue(is, rev); - MUTEX_EXIT(&is->is_lock); } @@ -830,6 +835,7 @@ u_int flags; * to it, then schedule an automatic flush in case we can clear out * some "dead old wood". */ + MUTEX_ENTER(&fr->fr_lock); if ((fr != NULL) && (fr->fr_statemax != 0) && (fr->fr_statecnt >= fr->fr_statemax)) { MUTEX_EXIT(&fr->fr_lock); @@ -837,6 +843,8 @@ u_int flags; fr_state_doflush = 1; return NULL; } + fr->fr_statecnt++; + MUTEX_EXIT(&fr->fr_lock); pass = (fr == NULL) ? 0 : fr->fr_flags; @@ -979,9 +987,9 @@ u_int flags; TH_SYN && (TCP_OFF(tcp) > (sizeof(tcphdr_t) >> 2))) { if (fr_tcpoptions(fin, tcp, - &is->is_tcp.ts_data[0])) - is->is_swinflags = TCP_WSCALE_SEEN| - TCP_WSCALE_FIRST; + &is->is_tcp.ts_data[0]) == -1) { + fin->fin_flx |= FI_BAD; + } } if ((fin->fin_out != 0) && (pass & FR_NEWISN) != 0) { @@ -1038,16 +1046,16 @@ u_int flags; break; } if (is != NULL) - return NULL; + goto cantaddstate; if (ips_stats.iss_bucketlen[hv] >= fr_state_maxbucket) { ATOMIC_INCL(ips_stats.iss_bucketfull); - return NULL; + goto cantaddstate; } KMALLOC(is, ipstate_t *); if (is == NULL) { ATOMIC_INCL(ips_stats.iss_nomem); - return NULL; + goto cantaddstate; } bcopy((char *)&ips, (char *)is, sizeof(*is)); /* @@ -1124,8 +1132,14 @@ u_int flags; * this may change. */ is->is_v = fin->fin_v; - is->is_opt = fin->fin_optmsk; - is->is_optmsk = 0xffffffff; + is->is_opt[0] = fin->fin_optmsk; + is->is_optmsk[0] = 0xffffffff; + is->is_optmsk[1] = 0xffffffff; + if (is->is_v == 6) { + is->is_opt[0] &= ~0x8; + is->is_optmsk[0] &= ~0x8; + is->is_optmsk[1] &= ~0x8; + } is->is_sec = fin->fin_secmsk; is->is_secmsk = 0xffff; is->is_auth = fin->fin_auth; @@ -1150,13 +1164,14 @@ u_int flags; * timer on it as we'll never see an error if it fails to * connect. */ - MUTEX_ENTER(&is->is_lock); (void) fr_tcp_age(&is->is_sti, fin, ips_tqtqb, is->is_flags); MUTEX_EXIT(&is->is_lock); #ifdef IPFILTER_SCAN if ((is->is_flags & SI_CLONE) == 0) (void) ipsc_attachis(is); #endif + } else { + MUTEX_EXIT(&is->is_lock); } #ifdef IPFILTER_SYNC if ((is->is_flags & IS_STATESYNC) && ((is->is_flags & SI_CLONE) == 0)) @@ -1173,12 +1188,21 @@ u_int flags; (void) fr_newfrag(fin, pass ^ FR_KEEPSTATE); return is; + +cantaddstate: + if (fr != NULL) { + MUTEX_ENTER(&fr->fr_lock); + fr->fr_statecnt--; + MUTEX_EXIT(&fr->fr_lock); + } + return NULL; } /* ------------------------------------------------------------------------ */ /* Function: fr_tcpoptions */ -/* Returns: int - 1 == packet matches state entry, 0 == it does not */ +/* Returns: int - 1 == packet matches state entry, 0 == it does not, */ +/* -1 == packet has bad TCP options data */ /* Parameters: fin(I) - pointer to packet information */ /* tcp(I) - pointer to TCP packet header */ /* td(I) - pointer to TCP data held as part of the state */ @@ -1195,13 +1219,14 @@ tcpdata_t *td; char buf[64], *s, opt; mb_t *m = NULL; - off = fin->fin_hlen + sizeof(*tcp); - len = (TCP_OFF(tcp) << 2) - sizeof(*tcp); - if (fin->fin_plen < off + len) + len = (TCP_OFF(tcp) << 2); + if (fin->fin_dlen < len) return 0; + len -= sizeof(*tcp); + + off = fin->fin_plen - fin->fin_dlen + sizeof(*tcp) + fin->fin_ipoff; m = fin->fin_m; - off += fin->fin_ipoff; mlen = MSGDSIZE(m) - off; if (len > mlen) { len = mlen; @@ -1239,7 +1264,10 @@ tcpdata_t *td; else if (i < 0) i = 0; td->td_winscale = i; - } + td->td_winflags |= TCP_WSCALE_SEEN| + TCP_WSCALE_FIRST; + } else + retval = -1; break; case TCPOPT_MAXSEG : /* @@ -1251,7 +1279,14 @@ tcpdata_t *td; i <<= 8; i += (int)*(s + 3); td->td_maxseg = i; - } + } else + retval = -1; + break; + case TCPOPT_SACK_PERMITTED : + if (ol == TCPOLEN_SACK_PERMITTED) + td->td_winflags |= TCP_SACK_PERMIT; + else + retval = -1; break; } } @@ -1322,24 +1357,22 @@ ipstate_t *is; is->is_s0[source] = ntohl(tcp->th_ack); is->is_s0[!source] = ntohl(tcp->th_seq) + 1; if ((TCP_OFF(tcp) > (sizeof(tcphdr_t) >> 2)) && - tdata->td_winscale) { - if (fr_tcpoptions(fin, tcp, fdata)) { - fdata->td_winflags = TCP_WSCALE_SEEN| - TCP_WSCALE_FIRST; - } else { - if (!fdata->td_winscale) - tdata->td_winscale = 0; + (tdata->td_winflags & TCP_WSCALE_SEEN)) { + if (fr_tcpoptions(fin, tcp, fdata) == -1) + fin->fin_flx |= FI_BAD; + if (!(fdata->td_winflags & TCP_WSCALE_SEEN)) { + fdata->td_winscale = 0; + tdata->td_winscale = 0; } } if ((fin->fin_out != 0) && (is->is_pass & FR_NEWISN)) fr_checknewisn(fin, is); } else if (flags == TH_SYN) { is->is_s0[source] = ntohl(tcp->th_seq) + 1; - if ((TCP_OFF(tcp) > (sizeof(tcphdr_t) >> 2))) - if (fr_tcpoptions(fin, tcp, tdata)) { - tdata->td_winflags = TCP_WSCALE_SEEN| - TCP_WSCALE_FIRST; - } + if ((TCP_OFF(tcp) > (sizeof(tcphdr_t) >> 2))) { + if (fr_tcpoptions(fin, tcp, tdata) == -1) + fin->fin_flx |= FI_BAD; + } if ((fin->fin_out != 0) && (is->is_pass & FR_NEWISN)) fr_checknewisn(fin, is); @@ -1410,6 +1443,7 @@ int flags; tcp_seq seq, ack, end; int ackskew, tcpflags; u_32_t win, maxwin; + int dsize, inseq; /* * Find difference between last checked packet and this packet. @@ -1421,9 +1455,28 @@ int flags; win = ntohs(tcp->th_win); else win = ntohs(tcp->th_win) << fdata->td_winscale; +#if 0 + /* + * XXX - This is a kludge is here because IPFilter doesn't track SACK + * options in TCP packets. This is not a trivial to do if one is to + * consider the performance impact of it. So instead, if the + * receiver has said SACK is ok, double the allowed window size. + * This is disabled for testing of another workaround for a problem + * with Microsoft Windows - see below. + */ + if ((tdata->td_winflags & TCP_SACK_PERMIT) != 0) + win *= 2; +#endif + + /* + * A window of 0 produces undesirable behaviour from this function. + */ if (win == 0) win = 1; + dsize = fin->fin_dlen - (TCP_OFF(tcp) << 2) + + ((tcpflags & TH_SYN) ? 1 : 0) + ((tcpflags & TH_FIN) ? 1 : 0); + /* * if window scaling is present, the scaling is only allowed * for windows not in the first SYN packet. In that packet the @@ -1441,14 +1494,15 @@ int flags; fdata->td_maxwin = win; } else { fdata->td_winscale = 0; - fdata->td_winflags = 0; + fdata->td_winflags &= ~(TCP_WSCALE_FIRST| + TCP_WSCALE_SEEN); tdata->td_winscale = 0; - tdata->td_winflags = 0; + tdata->td_winflags &= ~(TCP_WSCALE_FIRST| + TCP_WSCALE_SEEN); } } - end = seq + fin->fin_dlen - (TCP_OFF(tcp) << 2) + - ((tcpflags & TH_SYN) ? 1 : 0) + ((tcpflags & TH_FIN) ? 1 : 0); + end = seq + dsize; if ((fdata->td_end == 0) && (!(flags & IS_TCPFSM) || @@ -1456,7 +1510,7 @@ int flags; /* * Must be a (outgoing) SYN-ACK in reply to a SYN. */ - fdata->td_end = end; + fdata->td_end = end - 1; fdata->td_maxwin = 1; fdata->td_maxend = end + win; } @@ -1469,9 +1523,6 @@ int flags; ack = tdata->td_end; } - if (seq == end) - seq = end = fdata->td_end; - maxwin = tdata->td_maxwin; ackskew = tdata->td_end - ack; @@ -1486,16 +1537,25 @@ int flags; #define SEQ_GE(a,b) ((int)((a) - (b)) >= 0) #define SEQ_GT(a,b) ((int)((a) - (b)) > 0) - if ( -#if defined(_KERNEL) - (SEQ_GE(fdata->td_maxend, end)) && + inseq = 0; + if ((SEQ_GE(fdata->td_maxend, end)) && (SEQ_GE(seq, fdata->td_end - maxwin)) && -#endif /* XXX what about big packets */ #define MAXACKWINDOW 66000 (-ackskew <= (MAXACKWINDOW << fdata->td_winscale)) && ( ackskew <= (MAXACKWINDOW << fdata->td_winscale))) { + inseq = 1; + /* + * Microsoft Windows will send the next packet to the right of the + * window if SACK is in use. + */ + } else if ((seq == fdata->td_maxend) && (ackskew == 0) && + (fdata->td_winflags & TCP_SACK_PERMIT) && + (tdata->td_winflags & TCP_SACK_PERMIT)) { + inseq = 1; + } + if (inseq) { /* if ackskew < 0 then this should be due to fragmented * packets. There is no way to know the length of the * total packet in advance. @@ -1584,8 +1644,7 @@ ipstate_t *is; clone->is_flags &= ~SI_CLONE; clone->is_flags |= SI_CLONED; fr_stinsert(clone, fin->fin_rev); - MUTEX_ENTER(&clone->is_lock); - clone->is_ref = 1; + clone->is_ref = 2; if (clone->is_p == IPPROTO_TCP) { (void) fr_tcp_age(&clone->is_sti, fin, ips_tqtqb, clone->is_flags); @@ -1770,7 +1829,7 @@ u_32_t cmask; * Match up any flags set from IP options. */ if ((cflx && (flx != (cflx & cmask))) || - ((fin->fin_optmsk & is->is_optmsk) != is->is_opt) || + ((fin->fin_optmsk & is->is_optmsk[rev]) != is->is_opt[rev]) || ((fin->fin_secmsk & is->is_secmsk) != is->is_sec) || ((fin->fin_auth & is->is_authmsk) != is->is_auth)) return NULL; @@ -1787,9 +1846,12 @@ u_32_t cmask; if ((flags & (SI_W_SPORT|SI_W_DPORT))) { if ((flags & SI_CLONE) != 0) { - is = fr_stclone(fin, tcp, is); - if (is == NULL) + ipstate_t *clone; + + clone = fr_stclone(fin, tcp, is); + if (clone == NULL) return NULL; + is = clone; } else { ATOMIC_DECL(ips_stats.iss_wild); } @@ -1820,8 +1882,14 @@ u_32_t cmask; ret = -1; - if (is->is_flx[out][rev] == 0) + if (is->is_flx[out][rev] == 0) { is->is_flx[out][rev] = flx; + is->is_opt[rev] = fin->fin_optmsk; + if (is->is_v == 6) { + is->is_opt[rev] &= ~0x8; + is->is_optmsk[rev] &= ~0x8; + } + } /* * Check if the interface name for this "direction" is set and if not, @@ -1867,21 +1935,16 @@ fr_info_t *fin; /* * Does it at least have the return (basic) IP header ? + * Is it an actual recognised ICMP error type? * Only a basic IP header (no options) should be with * an ICMP error header. */ if ((fin->fin_v != 4) || (fin->fin_hlen != sizeof(ip_t)) || - (fin->fin_plen < ICMPERR_MINPKTLEN)) + (fin->fin_plen < ICMPERR_MINPKTLEN) || + !(fin->fin_flx & FI_ICMPERR)) return NULL; ic = fin->fin_dp; type = ic->icmp_type; - /* - * If it's not an error type, then return - */ - if ((type != ICMP_UNREACH) && (type != ICMP_SOURCEQUENCH) && - (type != ICMP_REDIRECT) && (type != ICMP_TIMXCEED) && - (type != ICMP_PARAMPROB)) - return NULL; oip = (ip_t *)((char *)ic + ICMPERR_ICMPHLEN); /* @@ -1944,7 +2007,7 @@ fr_info_t *fin; */ savelen = oip->ip_len; oip->ip_len = len; - oip->ip_off = htons(oip->ip_off); + oip->ip_off = ntohs(oip->ip_off); ofin.fin_flx = FI_NOCKSUM; ofin.fin_v = 4; @@ -1972,8 +2035,6 @@ fr_info_t *fin; switch (oip->ip_p) { case IPPROTO_ICMP : - icmp = (icmphdr_t *)((char *)oip + (IP_HL(oip) << 2)); - /* * an ICMP error can only be generated as a result of an * ICMP query, not as the response on an ICMP error @@ -1981,15 +2042,13 @@ fr_info_t *fin; * XXX theoretically ICMP_ECHOREP and the other reply's are * ICMP query's as well, but adding them here seems strange XXX */ - if ((icmp->icmp_type != ICMP_ECHO) && - (icmp->icmp_type != ICMP_TSTAMP) && - (icmp->icmp_type != ICMP_IREQ) && - (icmp->icmp_type != ICMP_MASKREQ)) + if ((ofin.fin_flx & FI_ICMPERR) != 0) return NULL; /* * perform a lookup of the ICMP packet in the state table */ + icmp = (icmphdr_t *)((char *)oip + (IP_HL(oip) << 2)); hv = (pr = oip->ip_p); src.in4 = oip->ip_src; hv += src.in4.s_addr; @@ -2008,10 +2067,6 @@ fr_info_t *fin; is = fr_matchsrcdst(&ofin, is, &src, &dst, NULL, FI_ICMPCMP); if (is != NULL) { - if ((is->is_pass & FR_NOICMPERR) != 0) { - RWLOCK_EXIT(&ipf_state); - return NULL; - } /* * i : the index of this packet (the icmp * unreachable) @@ -2774,6 +2829,11 @@ int why; if (ipstate_logging != 0 && why != 0) ipstate_log(is, why); + if (is->is_p == IPPROTO_TCP) + ips_stats.iss_fin++; + else + ips_stats.iss_expire++; + if (is->is_rule != NULL) { is->is_rule->fr_statecnt--; (void)fr_derefrule(&is->is_rule); @@ -2800,9 +2860,7 @@ void fr_timeoutstate() ipftq_t *ifq, *ifqnext; ipftqent_t *tqe, *tqn; ipstate_t *is; -#if defined(USE_SPL) && defined(_KERNEL) - int s; -#endif + SPL_INT(s); SPL_NET(s); WRITE_ENTER(&ipf_state); @@ -2872,9 +2930,7 @@ int which, proto; int delete, removed; long try, maxtick; u_long interval; -#if defined(_KERNEL) && !defined(MENTAT) && defined(USE_SPL) - int s; -#endif + SPL_INT(s); removed = 0; @@ -2903,10 +2959,6 @@ int which, proto; } if (delete) { - if (is->is_p == IPPROTO_TCP) - ips_stats.iss_fin++; - else - ips_stats.iss_expire++; fr_delstate(is, ISL_FLUSH); removed++; } else @@ -3042,7 +3094,7 @@ int flags; rval = 0; dir = fin->fin_rev; tcpflags = tcp->th_flags; - dlen = fin->fin_plen - fin->fin_hlen - (TCP_OFF(tcp) << 2); + dlen = fin->fin_dlen - (TCP_OFF(tcp) << 2); if (tcpflags & TH_RST) { if (!(tcpflags & TH_PUSH) && !dlen) @@ -3180,13 +3232,34 @@ int flags; break; case IPF_TCPS_HALF_ESTAB: /* 4 */ - if (ostate >= IPF_TCPS_HALF_ESTAB) { - if ((tcpflags & TH_ACKMASK) == TH_ACK) { + if (tcpflags & TH_FIN) { + nstate = IPF_TCPS_FIN_WAIT_1; + rval = 1; + } else if ((tcpflags & TH_ACKMASK) == TH_ACK) { + /* + * If we've picked up a connection in mid + * flight, we could be looking at a follow on + * packet from the same direction as the one + * that created this state. Recognise it but + * do not advance the entire connection's + * state. + */ + switch (ostate) + { + case IPF_TCPS_CLOSED : + case IPF_TCPS_SYN_SENT : + case IPF_TCPS_SYN_RECEIVED : + rval = 1; + break; + case IPF_TCPS_HALF_ESTAB : + case IPF_TCPS_ESTABLISHED : nstate = IPF_TCPS_ESTABLISHED; rval = 1; + break; + default : + break; } } - break; case IPF_TCPS_ESTABLISHED: /* 5 */ @@ -3316,9 +3389,6 @@ int flags; (u_long)tcp, tcpflags, (u_long)tqe, nstate, ostate); # endif -# ifdef DIAGNOSTIC - panic("invalid TCP state"); -# endif #else abort(); #endif @@ -3442,20 +3512,16 @@ fr_info_t *fin; /* * Does it at least have the return (basic) IP header ? + * Is it an actual recognised ICMP error type? * Only a basic IP header (no options) should be with * an ICMP error header. */ - if ((fin->fin_v != 6) || (fin->fin_plen < ICMP6ERR_MINPKTLEN)) + if ((fin->fin_v != 6) || (fin->fin_plen < ICMP6ERR_MINPKTLEN) || + !(fin->fin_flx & FI_ICMPERR)) return NULL; ic6 = fin->fin_dp; type = ic6->icmp6_type; - /* - * If it's not an error type, then return - */ - if ((type != ICMP6_DST_UNREACH) && (type != ICMP6_PACKET_TOO_BIG) && - (type != ICMP6_TIME_EXCEEDED) && (type != ICMP6_PARAM_PROB)) - return NULL; oip6 = (ip6_t *)((char *)ic6 + ICMPERR_ICMPHLEN); if (fin->fin_plen < sizeof(*oip6)) |