diff options
Diffstat (limited to 'sys/netinet')
-rw-r--r-- | sys/netinet/if_ether.c | 31 | ||||
-rw-r--r-- | sys/netinet/in.c | 11 | ||||
-rw-r--r-- | sys/netinet/in_var.h | 2 | ||||
-rw-r--r-- | sys/netinet/ip_carp.c | 107 | ||||
-rw-r--r-- | sys/netinet/ip_divert.c | 15 | ||||
-rw-r--r-- | sys/netinet/ip_fastfwd.c | 129 | ||||
-rw-r--r-- | sys/netinet/ip_fw.h | 22 | ||||
-rw-r--r-- | sys/netinet/ip_input.c | 36 | ||||
-rw-r--r-- | sys/netinet/ip_ipsec.c | 153 | ||||
-rw-r--r-- | sys/netinet/ip_ipsec.h | 4 | ||||
-rw-r--r-- | sys/netinet/ip_output.c | 38 | ||||
-rw-r--r-- | sys/netinet/sctp_input.c | 2 | ||||
-rw-r--r-- | sys/netinet/tcp_input.c | 2 | ||||
-rw-r--r-- | sys/netinet/udp_usrreq.c | 4 |
14 files changed, 200 insertions, 356 deletions
diff --git a/sys/netinet/if_ether.c b/sys/netinet/if_ether.c index 78d2f43..3a9d262 100644 --- a/sys/netinet/if_ether.c +++ b/sys/netinet/if_ether.c @@ -80,6 +80,8 @@ SYSCTL_DECL(_net_link_ether); static SYSCTL_NODE(_net_link_ether, PF_INET, inet, CTLFLAG_RW, 0, ""); static SYSCTL_NODE(_net_link_ether, PF_ARP, arp, CTLFLAG_RW, 0, ""); +static VNET_DEFINE(int, arp_carp_mac) = 0; /* default to disabled */ + /* timer values */ static VNET_DEFINE(int, arpt_keep) = (20*60); /* once resolved, good for 20 * minutes */ @@ -98,12 +100,16 @@ VNET_PCPUSTAT_SYSUNINIT(arpstat); static VNET_DEFINE(int, arp_maxhold) = 1; +#define V_arp_carp_mac VNET(arp_carp_mac) #define V_arpt_keep VNET(arpt_keep) #define V_arpt_down VNET(arpt_down) #define V_arp_maxtries VNET(arp_maxtries) #define V_arp_proxyall VNET(arp_proxyall) #define V_arp_maxhold VNET(arp_maxhold) +SYSCTL_VNET_INT(_net_link_ether_inet, OID_AUTO, carp_mac, CTLFLAG_RW, + &VNET_NAME(arp_carp_mac), 0, + "Send CARP mac with replies to CARP ips"); SYSCTL_VNET_INT(_net_link_ether_inet, OID_AUTO, max_age, CTLFLAG_RW, &VNET_NAME(arpt_keep), 0, "ARP entry lifetime in seconds"); @@ -909,6 +915,29 @@ reply: /* default behaviour; never reply by broadcast. */ m->m_flags &= ~(M_BCAST|M_MCAST); } +#ifdef DEV_CARP + if (V_arp_carp_mac && carp_match) { + struct ether_header *eh = (struct ether_header *) sa.sa_data; + short type = htons(ETHERTYPE_ARP); + + ah->ar_hrd = htons(ARPHRD_ETHER); + + (void)memcpy(&eh->ether_type, &type, + sizeof(eh->ether_type)); + (void)memcpy(eh->ether_dhost, ar_tha(ah), + sizeof (eh->ether_dhost)); + (void)memcpy(eh->ether_shost, enaddr, + sizeof(eh->ether_shost)); + + sa.sa_family = pseudo_AF_HDRCMPLT; + sa.sa_len = sizeof(sa); + } else { +#endif + sa.sa_family = AF_ARP; + sa.sa_len = 2; +#ifdef DEV_CARP + } +#endif (void)memcpy(ar_tpa(ah), ar_spa(ah), ah->ar_pln); (void)memcpy(ar_spa(ah), &itaddr, ah->ar_pln); ah->ar_op = htons(ARPOP_REPLY); @@ -916,8 +945,6 @@ reply: m->m_len = sizeof(*ah) + (2 * ah->ar_pln) + (2 * ah->ar_hln); m->m_pkthdr.len = m->m_len; m->m_pkthdr.rcvif = NULL; - sa.sa_family = AF_ARP; - sa.sa_len = 2; m_clrprotoflags(m); /* Avoid confusing lower layers. */ (*ifp->if_output)(ifp, m, &sa, NULL); ARPSTAT_INC(txreplies); diff --git a/sys/netinet/in.c b/sys/netinet/in.c index b61b4b6..fde54be 100644 --- a/sys/netinet/in.c +++ b/sys/netinet/in.c @@ -315,6 +315,7 @@ in_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, * Security checks before we get involved in any work. */ switch (cmd) { + case SIOCORDERIFADDR: case SIOCAIFADDR: if (td != NULL) { error = priv_check(td, PRIV_NET_ADDIFADDR); @@ -374,6 +375,7 @@ in_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, error = 0; switch (cmd) { + case SIOCORDERIFADDR: case SIOCAIFADDR: case SIOCDIFADDR: if (ifra->ifra_addr.sin_family == AF_INET) { @@ -399,10 +401,17 @@ in_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, goto out; } } - if (cmd == SIOCDIFADDR && ia == NULL) { + if ((cmd == SIOCDIFADDR || cmd == SIOCORDERIFADDR) && ia == NULL) { error = EADDRNOTAVAIL; goto out; } + if (cmd == SIOCORDERIFADDR && ia != NULL) { + IF_ADDR_WLOCK(ifp); + TAILQ_REMOVE(&ifp->if_addrhead, &ia->ia_ifa, ifa_link); + TAILQ_INSERT_AFTER(&ifp->if_addrhead, TAILQ_FIRST(&ifp->if_addrhead), &ia->ia_ifa, ifa_link); + IF_ADDR_WUNLOCK(ifp); + goto out; + } if (ia == NULL) { ia = (struct in_ifaddr *) malloc(sizeof *ia, M_IFADDR, M_NOWAIT | diff --git a/sys/netinet/in_var.h b/sys/netinet/in_var.h index 8657dbb..511ba26 100644 --- a/sys/netinet/in_var.h +++ b/sys/netinet/in_var.h @@ -452,7 +452,7 @@ int in_scrubprefix(struct in_ifaddr *, u_int); void ip_input(struct mbuf *); int in_ifadown(struct ifaddr *ifa, int); void in_ifscrub(struct ifnet *, struct in_ifaddr *, u_int); -struct mbuf *ip_fastforward(struct mbuf *); +struct mbuf *ip_tryforward(struct mbuf *); void *in_domifattach(struct ifnet *); void in_domifdetach(struct ifnet *, void *); diff --git a/sys/netinet/ip_carp.c b/sys/netinet/ip_carp.c index c5fea16..f85a322 100644 --- a/sys/netinet/ip_carp.c +++ b/sys/netinet/ip_carp.c @@ -115,7 +115,6 @@ struct carp_softc { int sc_sendad_success; #define CARP_SENDAD_MIN_SUCCESS 3 - int sc_init_counter; uint64_t sc_counter; /* authentication */ @@ -143,7 +142,7 @@ struct carp_if { struct ip6_moptions cif_im6o; #endif struct ifnet *cif_ifp; - struct mtx cif_mtx; + struct rwlock cif_mtx; }; #define CARP_INET 0 @@ -247,18 +246,19 @@ SYSCTL_VNET_PCPUSTAT(_net_inet_carp, OID_AUTO, stats, struct carpstats, #define CARP_LOCK_ASSERT(sc) mtx_assert(&(sc)->sc_mtx, MA_OWNED) #define CARP_LOCK(sc) mtx_lock(&(sc)->sc_mtx) #define CARP_UNLOCK(sc) mtx_unlock(&(sc)->sc_mtx) -#define CIF_LOCK_INIT(cif) mtx_init(&(cif)->cif_mtx, "carp_if", \ - NULL, MTX_DEF) -#define CIF_LOCK_DESTROY(cif) mtx_destroy(&(cif)->cif_mtx) -#define CIF_LOCK_ASSERT(cif) mtx_assert(&(cif)->cif_mtx, MA_OWNED) -#define CIF_LOCK(cif) mtx_lock(&(cif)->cif_mtx) -#define CIF_UNLOCK(cif) mtx_unlock(&(cif)->cif_mtx) +#define CIF_LOCK_INIT(cif) rw_init(&(cif)->cif_mtx, "carp_if") +#define CIF_LOCK_DESTROY(cif) rw_destroy(&(cif)->cif_mtx) +#define CIF_LOCK_ASSERT(cif) rw_assert(&(cif)->cif_mtx, MA_OWNED) +#define CIF_RLOCK(cif) rw_rlock(&(cif)->cif_mtx) +#define CIF_RUNLOCK(cif) rw_runlock(&(cif)->cif_mtx) +#define CIF_WLOCK(cif) rw_wlock(&(cif)->cif_mtx) +#define CIF_WUNLOCK(cif) rw_wunlock(&(cif)->cif_mtx) #define CIF_FREE(cif) do { \ CIF_LOCK_ASSERT(cif); \ if (TAILQ_EMPTY(&(cif)->cif_vrs)) \ carp_free_if(cif); \ else \ - CIF_UNLOCK(cif); \ + CIF_WUNLOCK(cif); \ } while (0) #define CARP_LOG(...) do { \ @@ -579,7 +579,6 @@ carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af) struct ifnet *ifp = m->m_pkthdr.rcvif; struct ifaddr *ifa; struct carp_softc *sc; - uint64_t tmp_counter; struct timeval sc_tv, ch_tv; /* verify that the VHID is valid on the receiving interface */ @@ -619,14 +618,20 @@ carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af) goto out; } - tmp_counter = ntohl(ch->carp_counter[0]); - tmp_counter = tmp_counter<<32; - tmp_counter += ntohl(ch->carp_counter[1]); - - /* XXX Replay protection goes here */ - - sc->sc_init_counter = 0; - sc->sc_counter = tmp_counter; + if (!bcmp(&sc->sc_counter, ch->carp_counter, + sizeof(ch->carp_counter))) { + /* Do not log duplicates from non simplex interfaces */ + if (sc->sc_carpdev->if_flags & IFF_SIMPLEX) { + CARPSTATS_INC(carps_badauth); + ifp->if_ierrors++; + CARP_UNLOCK(sc); + CARP_LOG("%s, replay or network loop detected.\n", + ifp->if_xname); + } else + CARP_UNLOCK(sc); + m_freem(m); + return; + } sc_tv.tv_sec = sc->sc_advbase; sc_tv.tv_usec = DEMOTE_ADVSKEW(sc) * 1000000 / 256; @@ -700,13 +705,12 @@ carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch) { struct m_tag *mtag; - if (sc->sc_init_counter) { + if (!sc->sc_counter) { /* this could also be seconds since unix epoch */ sc->sc_counter = arc4random(); sc->sc_counter = sc->sc_counter << 32; sc->sc_counter += arc4random(); - } else - sc->sc_counter++; + } ch->carp_counter[0] = htonl((sc->sc_counter>>32)&0xffffffff); ch->carp_counter[1] = htonl(sc->sc_counter&0xffffffff); @@ -772,7 +776,8 @@ carp_send_ad_error(struct carp_softc *sc, int error) char msg[sizeof(fmt) + IFNAMSIZ]; sprintf(msg, fmt, error, sc->sc_carpdev->if_xname); - carp_demote_adj(V_carp_senderr_adj, msg); + if (V_carp_senderr_adj > 0) + carp_demote_adj(V_carp_senderr_adj, msg); } sc->sc_sendad_success = 0; } else { @@ -782,7 +787,8 @@ carp_send_ad_error(struct carp_softc *sc, int error) char msg[sizeof(fmt) + IFNAMSIZ]; sprintf(msg, fmt, sc->sc_carpdev->if_xname); - carp_demote_adj(-V_carp_senderr_adj, msg); + if (V_carp_senderr_adj > 0) + carp_demote_adj(-V_carp_senderr_adj, msg); sc->sc_sendad_errors = 0; } else sc->sc_sendad_errors = 0; @@ -1117,18 +1123,17 @@ carp_forus(struct ifnet *ifp, u_char *dhost) if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1) return (0); - CIF_LOCK(ifp->if_carp); + CIF_RLOCK(ifp->if_carp); IFNET_FOREACH_CARP(ifp, sc) { - CARP_LOCK(sc); - if (sc->sc_state == MASTER && !bcmp(dhost, LLADDR(&sc->sc_addr), - ETHER_ADDR_LEN)) { - CARP_UNLOCK(sc); - CIF_UNLOCK(ifp->if_carp); + //CARP_LOCK(sc); + if (sc->sc_state == MASTER && ena[5] == sc->sc_vhid) { + //CARP_UNLOCK(sc); + CIF_RUNLOCK(ifp->if_carp); return (1); } - CARP_UNLOCK(sc); + //CARP_UNLOCK(sc); } - CIF_UNLOCK(ifp->if_carp); + CIF_RUNLOCK(ifp->if_carp); return (0); } @@ -1485,9 +1490,9 @@ carp_alloc(struct ifnet *ifp) sc = malloc(sizeof(*sc), M_CARP, M_WAITOK|M_ZERO); + sc->sc_counter = 0; sc->sc_advbase = CARP_DFLTINTV; sc->sc_vhid = -1; /* required setting */ - sc->sc_init_counter = 1; sc->sc_state = INIT; sc->sc_ifasiz = sizeof(struct ifaddr *); @@ -1503,9 +1508,9 @@ carp_alloc(struct ifnet *ifp) #endif callout_init_mtx(&sc->sc_ad_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED); - CIF_LOCK(cif); + CIF_WLOCK(cif); TAILQ_INSERT_TAIL(&cif->cif_vrs, sc, sc_list); - CIF_UNLOCK(cif); + CIF_WUNLOCK(cif); mtx_lock(&carp_mtx); LIST_INSERT_HEAD(&carp_list, sc, sc_next); @@ -1669,11 +1674,11 @@ carp_ioctl(struct ifreq *ifr, u_long cmd, struct thread *td) } if (ifp->if_carp) { - CIF_LOCK(ifp->if_carp); + CIF_RLOCK(ifp->if_carp); IFNET_FOREACH_CARP(ifp, sc) if (sc->sc_vhid == carpr.carpr_vhid) break; - CIF_UNLOCK(ifp->if_carp); + CIF_RUNLOCK(ifp->if_carp); } if (sc == NULL) { sc = carp_alloc(ifp); @@ -1747,11 +1752,11 @@ carp_ioctl(struct ifreq *ifr, u_long cmd, struct thread *td) priveleged = (priv_check(td, PRIV_NETINET_CARP) == 0); if (carpr.carpr_vhid != 0) { - CIF_LOCK(ifp->if_carp); + CIF_RLOCK(ifp->if_carp); IFNET_FOREACH_CARP(ifp, sc) if (sc->sc_vhid == carpr.carpr_vhid) break; - CIF_UNLOCK(ifp->if_carp); + CIF_RUNLOCK(ifp->if_carp); if (sc == NULL) { error = ENOENT; break; @@ -1762,12 +1767,12 @@ carp_ioctl(struct ifreq *ifr, u_long cmd, struct thread *td) int i, count; count = 0; - CIF_LOCK(ifp->if_carp); + CIF_RLOCK(ifp->if_carp); IFNET_FOREACH_CARP(ifp, sc) count++; if (count > carpr.carpr_count) { - CIF_UNLOCK(ifp->if_carp); + CIF_RUNLOCK(ifp->if_carp); error = EMSGSIZE; break; } @@ -1779,12 +1784,12 @@ carp_ioctl(struct ifreq *ifr, u_long cmd, struct thread *td) error = copyout(&carpr, ifr->ifr_data + (i * sizeof(carpr)), sizeof(carpr)); if (error) { - CIF_UNLOCK(ifp->if_carp); + CIF_RUNLOCK(ifp->if_carp); break; } i++; } - CIF_UNLOCK(ifp->if_carp); + CIF_RUNLOCK(ifp->if_carp); } break; } @@ -1833,12 +1838,12 @@ carp_attach(struct ifaddr *ifa, int vhid) return (EPROTOTYPE); } - CIF_LOCK(cif); + CIF_WLOCK(cif); IFNET_FOREACH_CARP(ifp, sc) if (sc->sc_vhid == vhid) break; if (sc == NULL) { - CIF_UNLOCK(cif); + CIF_WUNLOCK(cif); return (ENOENT); } @@ -1846,7 +1851,7 @@ carp_attach(struct ifaddr *ifa, int vhid) if (ifa->ifa_carp->sc_vhid != vhid) carp_detach_locked(ifa); else { - CIF_UNLOCK(cif); + CIF_WUNLOCK(cif); return (0); } } @@ -1891,7 +1896,7 @@ carp_attach(struct ifaddr *ifa, int vhid) carp_sc_state(sc); CARP_UNLOCK(sc); - CIF_UNLOCK(cif); + CIF_WUNLOCK(cif); return (0); } @@ -1902,7 +1907,7 @@ carp_detach(struct ifaddr *ifa) struct ifnet *ifp = ifa->ifa_ifp; struct carp_if *cif = ifp->if_carp; - CIF_LOCK(cif); + CIF_WLOCK(cif); carp_detach_locked(ifa); CIF_FREE(cif); } @@ -1984,13 +1989,13 @@ carp_linkstate(struct ifnet *ifp) { struct carp_softc *sc; - CIF_LOCK(ifp->if_carp); + CIF_RLOCK(ifp->if_carp); IFNET_FOREACH_CARP(ifp, sc) { CARP_LOCK(sc); carp_sc_state(sc); CARP_UNLOCK(sc); } - CIF_UNLOCK(ifp->if_carp); + CIF_RUNLOCK(ifp->if_carp); } static void @@ -2025,9 +2030,11 @@ carp_sc_state(struct carp_softc *sc) static void carp_demote_adj(int adj, char *reason) { + if (adj == 0) + return; atomic_add_int(&V_carp_demotion, adj); CARP_LOG("demoted by %d to %d (%s)\n", adj, V_carp_demotion, reason); - taskqueue_enqueue(taskqueue_swi, &carp_sendall_task); + taskqueue_enqueue(taskqueue_thread, &carp_sendall_task); } static int diff --git a/sys/netinet/ip_divert.c b/sys/netinet/ip_divert.c index e698035..b74d60d 100644 --- a/sys/netinet/ip_divert.c +++ b/sys/netinet/ip_divert.c @@ -267,8 +267,7 @@ divert_packet(struct mbuf *m, int incoming) * this iface name will come along for the ride. * (see div_output for the other half of this.) */ - strlcpy(divsrc.sin_zero, m->m_pkthdr.rcvif->if_xname, - sizeof(divsrc.sin_zero)); + *((u_short *)divsrc.sin_zero) = m->m_pkthdr.rcvif->if_index; } /* Put packet on socket queue, if any */ @@ -342,7 +341,7 @@ div_output(struct socket *so, struct mbuf *m, struct sockaddr_in *sin, /* Loopback avoidance and state recovery */ if (sin) { - int i; + u_short idx; /* set the starting point. We provide a non-zero slot, * but a non_matching chain_id to skip that info and use @@ -350,7 +349,7 @@ div_output(struct socket *so, struct mbuf *m, struct sockaddr_in *sin, */ dt->slot = 1; /* dummy, chain_id is invalid */ dt->chain_id = 0; - dt->rulenum = sin->sin_port+1; /* host format ? */ + dt->rulenum = sin->sin_port; /* host format ? */ dt->rule_id = 0; /* * Find receive interface with the given name, stuffed @@ -358,10 +357,9 @@ div_output(struct socket *so, struct mbuf *m, struct sockaddr_in *sin, * The name is user supplied data so don't trust its size * or that it is zero terminated. */ - for (i = 0; i < sizeof(sin->sin_zero) && sin->sin_zero[i]; i++) - ; - if ( i > 0 && i < sizeof(sin->sin_zero)) - m->m_pkthdr.rcvif = ifunit(sin->sin_zero); + idx = *((u_short *)sin->sin_zero); + if ( idx > 0 ) + m->m_pkthdr.rcvif = ifnet_byindex(idx); } /* Reinject packet into the system as incoming or outgoing */ @@ -832,5 +830,4 @@ static moduledata_t ipdivertmod = { }; DECLARE_MODULE(ipdivert, ipdivertmod, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY); -MODULE_DEPEND(ipdivert, ipfw, 2, 2, 2); MODULE_VERSION(ipdivert, 1); diff --git a/sys/netinet/ip_fastfwd.c b/sys/netinet/ip_fastfwd.c index 0772cf0..9c3ea66 100644 --- a/sys/netinet/ip_fastfwd.c +++ b/sys/netinet/ip_fastfwd.c @@ -109,12 +109,6 @@ __FBSDID("$FreeBSD$"); #include <machine/in_cksum.h> -static VNET_DEFINE(int, ipfastforward_active); -#define V_ipfastforward_active VNET(ipfastforward_active) - -SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, fastforwarding, CTLFLAG_RW, - &VNET_NAME(ipfastforward_active), 0, "Enable fast IP forwarding"); - static struct sockaddr_in * ip_findroute(struct route *ro, struct in_addr dest, struct mbuf *m) { @@ -159,7 +153,7 @@ ip_findroute(struct route *ro, struct in_addr dest, struct mbuf *m) * to ip_input for full processing. */ struct mbuf * -ip_fastforward(struct mbuf *m) +ip_tryforward(struct mbuf *m) { struct ip *ip; struct mbuf *m0 = NULL; @@ -167,119 +161,20 @@ ip_fastforward(struct mbuf *m) struct sockaddr_in *dst = NULL; struct ifnet *ifp; struct in_addr odest, dest; - uint16_t sum, ip_len, ip_off; + uint16_t ip_len, ip_off; int error = 0; - int hlen, mtu; + int mtu; struct m_tag *fwd_tag = NULL; /* * Are we active and forwarding packets? */ - if (!V_ipfastforward_active || !V_ipforwarding) - return m; M_ASSERTVALID(m); M_ASSERTPKTHDR(m); bzero(&ro, sizeof(ro)); - /* - * Step 1: check for packet drop conditions (and sanity checks) - */ - - /* - * Is entire packet big enough? - */ - if (m->m_pkthdr.len < sizeof(struct ip)) { - IPSTAT_INC(ips_tooshort); - goto drop; - } - - /* - * Is first mbuf large enough for ip header and is header present? - */ - if (m->m_len < sizeof (struct ip) && - (m = m_pullup(m, sizeof (struct ip))) == NULL) { - IPSTAT_INC(ips_toosmall); - return NULL; /* mbuf already free'd */ - } - - ip = mtod(m, struct ip *); - - /* - * Is it IPv4? - */ - if (ip->ip_v != IPVERSION) { - IPSTAT_INC(ips_badvers); - goto drop; - } - - /* - * Is IP header length correct and is it in first mbuf? - */ - hlen = ip->ip_hl << 2; - if (hlen < sizeof(struct ip)) { /* minimum header length */ - IPSTAT_INC(ips_badhlen); - goto drop; - } - if (hlen > m->m_len) { - if ((m = m_pullup(m, hlen)) == NULL) { - IPSTAT_INC(ips_badhlen); - return NULL; /* mbuf already free'd */ - } - ip = mtod(m, struct ip *); - } - - /* - * Checksum correct? - */ - if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) - sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID); - else { - if (hlen == sizeof(struct ip)) - sum = in_cksum_hdr(ip); - else - sum = in_cksum(m, hlen); - } - if (sum) { - IPSTAT_INC(ips_badsum); - goto drop; - } - - /* - * Remember that we have checked the IP header and found it valid. - */ - m->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID); - - ip_len = ntohs(ip->ip_len); - - /* - * Is IP length longer than packet we have got? - */ - if (m->m_pkthdr.len < ip_len) { - IPSTAT_INC(ips_tooshort); - goto drop; - } - - /* - * Is packet longer than IP header tells us? If yes, truncate packet. - */ - if (m->m_pkthdr.len > ip_len) { - if (m->m_len == m->m_pkthdr.len) { - m->m_len = ip_len; - m->m_pkthdr.len = ip_len; - } else - m_adj(m, ip_len - m->m_pkthdr.len); - } - - /* - * Is packet from or to 127/8? - */ - if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET || - (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) { - IPSTAT_INC(ips_badaddr); - goto drop; - } #ifdef ALTQ /* @@ -290,12 +185,10 @@ ip_fastforward(struct mbuf *m) #endif /* - * Step 2: fallback conditions to normal ip_input path processing - */ - - /* * Only IP packets without options */ + ip = mtod(m, struct ip *); + if (ip->ip_hl != (sizeof(struct ip) >> 2)) { if (V_ip_doopts == 1) return m; @@ -496,18 +389,6 @@ passout: goto consumed; } -#ifndef ALTQ - /* - * Check if there is enough space in the interface queue - */ - if ((ifp->if_snd.ifq_len + ip_len / ifp->if_mtu + 1) >= - ifp->if_snd.ifq_maxlen) { - IPSTAT_INC(ips_odropped); - /* would send source quench here but that is depreciated */ - goto drop; - } -#endif - /* * Check if media link state of interface is not down */ diff --git a/sys/netinet/ip_fw.h b/sys/netinet/ip_fw.h index dae8cc0..188057d 100644 --- a/sys/netinet/ip_fw.h +++ b/sys/netinet/ip_fw.h @@ -65,7 +65,8 @@ /* IP_FW3 header/opcodes */ typedef struct _ip_fw3_opheader { uint16_t opcode; /* Operation opcode */ - uint16_t reserved[3]; /* Align to 64-bit boundary */ + uint16_t ctxid; + uint16_t reserved[2]; /* Align to 64-bit boundary */ } ip_fw3_opheader; @@ -74,6 +75,14 @@ typedef struct _ip_fw3_opheader { #define IP_FW_TABLE_XDEL 87 /* delete entry */ #define IP_FW_TABLE_XGETSIZE 88 /* get table size */ #define IP_FW_TABLE_XLIST 89 /* list table contents */ +#define IP_FW_TABLE_XLISTENTRY 90 /* list one table entry contents */ +#define IP_FW_TABLE_XZEROENTRY 91 /* zero one table entry stats */ +#define IP_FW_CTX_GET 92 +#define IP_FW_CTX_ADD 93 +#define IP_FW_CTX_DEL 94 +#define IP_FW_CTX_SET 95 +#define IP_FW_CTX_ADDMEMBER 96 +#define IP_FW_CTX_DELMEMBER 97 /* * The kernel representation of ipfw rules is made of a list of @@ -600,11 +609,16 @@ struct _ipfw_dyn_rule { #define IPFW_TABLE_CIDR 1 /* Table for holding IPv4/IPv6 prefixes */ #define IPFW_TABLE_INTERFACE 2 /* Table for holding interface names */ -#define IPFW_TABLE_MAXTYPE 2 /* Maximum valid number */ +#define IPFW_TABLE_MIX 3 /* Table for holding IPv4/mac entries */ +#define IPFW_TABLE_MAC 4 /* Table for holding mac entries */ +#define IPFW_TABLE_MAXTYPE 5 /* Maximum valid number */ typedef struct _ipfw_table_entry { in_addr_t addr; /* network address */ u_int32_t value; /* value */ + uint64_t mac_addr; + uint64_t bytes; + uint64_t packets; u_int16_t tbl; /* table number */ u_int8_t masklen; /* mask length */ } ipfw_table_entry; @@ -616,6 +630,10 @@ typedef struct _ipfw_table_xentry { uint16_t tbl; /* table number */ uint16_t flags; /* record flags */ uint32_t value; /* value */ + uint32_t timestamp; + uint64_t mac_addr; + uint64_t bytes; + uint64_t packets; union { /* Longest field needs to be aligned by 4-byte boundary */ struct in6_addr addr6; /* IPv6 address */ diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c index 2dc080f..5adc173 100644 --- a/sys/netinet/ip_input.c +++ b/sys/netinet/ip_input.c @@ -77,6 +77,8 @@ __FBSDID("$FreeBSD$"); #include <netinet/ip_carp.h> #ifdef IPSEC #include <netinet/ip_ipsec.h> +#include <netipsec/ipsec.h> +#include <netipsec/key.h> #endif /* IPSEC */ #include <sys/socketvar.h> @@ -97,6 +99,11 @@ SYSCTL_VNET_INT(_net_inet_ip, IPCTL_FORWARDING, forwarding, CTLFLAG_RW, &VNET_NAME(ipforwarding), 0, "Enable IP forwarding between interfaces"); +static VNET_DEFINE(int, ipfastforward) = 1; +#define V_ipfastforward VNET(ipfastforward) +SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, fastforwarding, CTLFLAG_RW, + &VNET_NAME(ipfastforward), 0, "Enable fast IP forwarding"); + static VNET_DEFINE(int, ipsendredirects) = 1; /* XXX */ #define V_ipsendredirects VNET(ipsendredirects) SYSCTL_VNET_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect, CTLFLAG_RW, @@ -297,6 +304,9 @@ ip_init(void) if ((i = pfil_head_register(&V_inet_pfil_hook)) != 0) printf("%s: WARNING: unable to register pfil hook, " "error %d\n", __func__, i); + else + pfil_head_export_sysctl(&V_inet_pfil_hook, + SYSCTL_STATIC_CHILDREN(_net_inet_ip)); /* Skip initialization of globals for non-default instances. */ if (!IS_DEFAULT_VNET(curvnet)) @@ -464,12 +474,22 @@ tooshort: } else m_adj(m, ip_len - m->m_pkthdr.len); } + /* Try to forward the packet, but if we fail continue */ #ifdef IPSEC + /* For now we do not handle IPSEC in tryforward. */ + if (!key_havesp(IPSEC_DIR_INBOUND) && !key_havesp(IPSEC_DIR_OUTBOUND) && + (V_ipforwarding == 1 && V_ipfastforward == 1)) + if (ip_tryforward(m) == NULL) + return; /* * Bypass packet filtering for packets previously handled by IPsec. */ if (ip_ipsec_filtertunnel(m)) goto passin; +#else + if (V_ipforwarding == 1 && V_ipfastforward == 1) + if (ip_tryforward(m) == NULL) + return; #endif /* IPSEC */ /* @@ -499,8 +519,7 @@ tooshort: goto ours; } if (m->m_flags & M_IP_NEXTHOP) { - dchg = (m_tag_find(m, PACKET_TAG_IPFORWARD, NULL) != NULL); - if (dchg != 0) { + if (m_tag_find(m, PACKET_TAG_IPFORWARD, NULL) != NULL) { /* * Directly ship the packet on. This allows * forwarding packets originally destined to us @@ -674,10 +693,6 @@ passin: IPSTAT_INC(ips_cantforward); m_freem(m); } else { -#ifdef IPSEC - if (ip_ipsec_fwd(m)) - goto bad; -#endif /* IPSEC */ ip_forward(m, dchg); } return; @@ -722,7 +737,7 @@ ours: * note that we do not visit this with protocols with pcb layer * code - like udp/tcp/raw ip. */ - if (ip_ipsec_input(m)) + if (ip_ipsec_input(m, ip->ip_p) != 0) goto bad; #endif /* IPSEC */ @@ -1355,6 +1370,13 @@ ip_forward(struct mbuf *m, int srcrt) m_freem(m); return; } +#ifdef IPSEC + if (ip_ipsec_fwd(m) != 0) { + IPSTAT_INC(ips_cantforward); + m_freem(m); + return; + } +#endif /* IPSEC */ #ifdef IPSTEALTH if (!V_ipstealth) { #endif diff --git a/sys/netinet/ip_ipsec.c b/sys/netinet/ip_ipsec.c index 133fa7c..4a702d7 100644 --- a/sys/netinet/ip_ipsec.c +++ b/sys/netinet/ip_ipsec.c @@ -61,15 +61,12 @@ __FBSDID("$FreeBSD$"); #include <machine/in_cksum.h> -#ifdef IPSEC #include <netipsec/ipsec.h> #include <netipsec/xform.h> #include <netipsec/key.h> -#endif /*IPSEC*/ extern struct protosw inetsw[]; -#ifdef IPSEC #ifdef IPSEC_FILTERTUNNEL static VNET_DEFINE(int, ip4_ipsec_filtertunnel) = 1; #else @@ -81,7 +78,6 @@ SYSCTL_DECL(_net_inet_ipsec); SYSCTL_VNET_INT(_net_inet_ipsec, OID_AUTO, filtertunnel, CTLFLAG_RW, &VNET_NAME(ip4_ipsec_filtertunnel), 0, "If set filter packets from an IPsec tunnel."); -#endif /* IPSEC */ /* * Check if we have to jump over firewall processing for this packet. @@ -91,7 +87,6 @@ SYSCTL_VNET_INT(_net_inet_ipsec, OID_AUTO, filtertunnel, int ip_ipsec_filtertunnel(struct mbuf *m) { -#ifdef IPSEC /* * Bypass packet filtering for packets previously handled by IPsec. @@ -99,50 +94,20 @@ ip_ipsec_filtertunnel(struct mbuf *m) if (!V_ip4_ipsec_filtertunnel && m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL) != NULL) return 1; -#endif return 0; } /* * Check if this packet has an active SA and needs to be dropped instead * of forwarded. - * Called from ip_input(). + * Called from ip_forward(). * 1 = drop packet, 0 = forward packet. */ int ip_ipsec_fwd(struct mbuf *m) { -#ifdef IPSEC - struct m_tag *mtag; - struct tdb_ident *tdbi; - struct secpolicy *sp; - int error; - mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL); - if (mtag != NULL) { - tdbi = (struct tdb_ident *)(mtag + 1); - sp = ipsec_getpolicy(tdbi, IPSEC_DIR_INBOUND); - } else { - sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_INBOUND, - IP_FORWARDING, &error); - } - if (sp == NULL) { /* NB: can happen if error */ - /*XXX error stat???*/ - DPRINTF(("ip_input: no SP for forwarding\n")); /*XXX*/ - return 1; - } - - /* - * Check security policy against packet attributes. - */ - error = ipsec_in_reject(sp, m); - KEY_FREESP(&sp); - if (error) { - IPSTAT_INC(ips_cantforward); - return 1; - } -#endif /* IPSEC */ - return 0; + return (ipsec4_in_reject(m, NULL)); } /* @@ -153,51 +118,16 @@ ip_ipsec_fwd(struct mbuf *m) * 1 = drop packet, 0 = continue processing packet. */ int -ip_ipsec_input(struct mbuf *m) +ip_ipsec_input(struct mbuf *m, int nxt) { -#ifdef IPSEC - struct ip *ip = mtod(m, struct ip *); - struct m_tag *mtag; - struct tdb_ident *tdbi; - struct secpolicy *sp; - int error; /* * enforce IPsec policy checking if we are seeing last header. * note that we do not visit this with protocols with pcb layer * code - like udp/tcp/raw ip. */ - if ((inetsw[ip_protox[ip->ip_p]].pr_flags & PR_LASTHDR) != 0) { - /* - * Check if the packet has already had IPsec processing - * done. If so, then just pass it along. This tag gets - * set during AH, ESP, etc. input handling, before the - * packet is returned to the ip input queue for delivery. - */ - mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL); - if (mtag != NULL) { - tdbi = (struct tdb_ident *)(mtag + 1); - sp = ipsec_getpolicy(tdbi, IPSEC_DIR_INBOUND); - } else { - sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_INBOUND, - IP_FORWARDING, &error); - } - if (sp != NULL) { - /* - * Check security policy against packet attributes. - */ - error = ipsec_in_reject(sp, m); - KEY_FREESP(&sp); - } else { - /* XXX error stat??? */ - error = EINVAL; - DPRINTF(("ip_input: no SP, packet discarded\n"));/*XXX*/ - return 1; - } - if (error) - return 1; - } -#endif /* IPSEC */ - return 0; + if ((inetsw[ip_protox[nxt]].pr_flags & PR_LASTHDR) != 0) + return (ipsec4_in_reject(m, NULL)); + return (0); } /* @@ -224,12 +154,9 @@ ip_ipsec_mtu(struct mbuf *m, int mtu) * -1 = packet was reinjected and stop processing packet */ int -ip_ipsec_output(struct mbuf **m, struct inpcb *inp, int *flags, int *error) +ip_ipsec_output(struct mbuf **m, struct inpcb *inp, int *error) { -#ifdef IPSEC - struct secpolicy *sp = NULL; - struct tdb_ident *tdbi; - struct m_tag *mtag; + struct secpolicy *sp; if (!key_havesp(IPSEC_DIR_OUTBOUND)) return 0; @@ -243,17 +170,11 @@ ip_ipsec_output(struct mbuf **m, struct inpcb *inp, int *flags, int *error) * AH, ESP, etc. processing), there will be a tag to bypass * the lookup and related policy checking. */ - mtag = m_tag_find(*m, PACKET_TAG_IPSEC_PENDING_TDB, NULL); - if (mtag != NULL) { - tdbi = (struct tdb_ident *)(mtag + 1); - sp = ipsec_getpolicy(tdbi, IPSEC_DIR_OUTBOUND); - if (sp == NULL) - *error = -EINVAL; /* force silent drop */ - m_tag_delete(*m, mtag); - } else { - sp = ipsec4_checkpolicy(*m, IPSEC_DIR_OUTBOUND, *flags, - error, inp); + if (m_tag_find(*m, PACKET_TAG_IPSEC_OUT_DONE, NULL) != NULL) { + *error = 0; + return (0); } + sp = ipsec4_checkpolicy(*m, IPSEC_DIR_OUTBOUND, error, inp); /* * There are four return cases: * sp != NULL apply IPsec policy @@ -262,40 +183,6 @@ ip_ipsec_output(struct mbuf **m, struct inpcb *inp, int *flags, int *error) * sp == NULL, error != 0 discard packet, report error */ if (sp != NULL) { - /* Loop detection, check if ipsec processing already done */ - KASSERT(sp->req != NULL, ("ip_output: no ipsec request")); - for (mtag = m_tag_first(*m); mtag != NULL; - mtag = m_tag_next(*m, mtag)) { - if (mtag->m_tag_cookie != MTAG_ABI_COMPAT) - continue; - if (mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_DONE && - mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED) - continue; - /* - * Check if policy has an SA associated with it. - * This can happen when an SP has yet to acquire - * an SA; e.g. on first reference. If it occurs, - * then we let ipsec4_process_packet do its thing. - */ - if (sp->req->sav == NULL) - break; - tdbi = (struct tdb_ident *)(mtag + 1); - if (tdbi->spi == sp->req->sav->spi && - tdbi->proto == sp->req->sav->sah->saidx.proto && - bcmp(&tdbi->dst, &sp->req->sav->sah->saidx.dst, - sizeof (union sockaddr_union)) == 0) { - /* - * No IPsec processing is needed, free - * reference to SP. - * - * NB: null pointer to avoid free at - * done: below. - */ - KEY_FREESP(&sp), sp = NULL; - goto done; - } - } - /* * Do delayed checksums now because we send before * this is done in the normal processing path. @@ -314,7 +201,8 @@ ip_ipsec_output(struct mbuf **m, struct inpcb *inp, int *flags, int *error) #endif /* NB: callee frees mbuf */ - *error = ipsec4_process_packet(*m, sp->req, *flags, 0); + *error = ipsec4_process_packet(*m, sp->req); + KEY_FREESP(&sp); if (*error == EJUSTRETURN) { /* * We had a SP with a level of 'use' and no SA. We @@ -346,22 +234,15 @@ ip_ipsec_output(struct mbuf **m, struct inpcb *inp, int *flags, int *error) if (*error == -EINVAL) *error = 0; goto bad; - } else { - /* No IPsec processing for this packet. */ } + /* No IPsec processing for this packet. */ } done: - if (sp != NULL) - KEY_FREESP(&sp); - return 0; + return (0); reinjected: - if (sp != NULL) - KEY_FREESP(&sp); - return -1; + return (-1); bad: if (sp != NULL) KEY_FREESP(&sp); return 1; -#endif /* IPSEC */ - return 0; } diff --git a/sys/netinet/ip_ipsec.h b/sys/netinet/ip_ipsec.h index 2870c11..f499b74 100644 --- a/sys/netinet/ip_ipsec.h +++ b/sys/netinet/ip_ipsec.h @@ -34,7 +34,7 @@ int ip_ipsec_filtertunnel(struct mbuf *); int ip_ipsec_fwd(struct mbuf *); -int ip_ipsec_input(struct mbuf *); +int ip_ipsec_input(struct mbuf *, int); int ip_ipsec_mtu(struct mbuf *, int); -int ip_ipsec_output(struct mbuf **, struct inpcb *, int *, int *); +int ip_ipsec_output(struct mbuf **, struct inpcb *, int *); #endif diff --git a/sys/netinet/ip_output.c b/sys/netinet/ip_output.c index 954785d..0aee48d 100644 --- a/sys/netinet/ip_output.c +++ b/sys/netinet/ip_output.c @@ -123,7 +123,7 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags, struct ifnet *ifp = NULL; /* keep compiler happy */ struct mbuf *m0; int hlen = sizeof (struct ip); - int mtu; + int mtu = 0; int n; /* scratchpad */ int error = 0; struct sockaddr_in *dst; @@ -136,9 +136,8 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags, struct in_addr odst; struct m_tag *fwd_tag = NULL; int have_ia_ref; -#ifdef IPSEC - int no_route_but_check_spd = 0; -#endif + int no_route_but_check = 0; + M_ASSERTPKTHDR(m); if (inp != NULL) { @@ -292,10 +291,11 @@ again: * There is no route for this packet, but it is * possible that a matching SPD entry exists. */ - no_route_but_check_spd = 1; mtu = 0; /* Silence GCC warning. */ - goto sendit; #endif + no_route_but_check = 1; + goto sendit; + IPSTAT_INC(ips_noroute); error = EHOSTUNREACH; goto bad; @@ -482,7 +482,7 @@ again: sendit: #ifdef IPSEC - switch(ip_ipsec_output(&m, inp, &flags, &error)) { + switch(ip_ipsec_output(&m, inp, &error)) { case 1: goto bad; case -1: @@ -491,28 +491,34 @@ sendit: default: break; /* Continue with packet processing. */ } - /* - * Check if there was a route for this packet; return error if not. - */ - if (no_route_but_check_spd) { - IPSTAT_INC(ips_noroute); - error = EHOSTUNREACH; - goto bad; - } /* Update variables that are affected by ipsec4_output(). */ ip = mtod(m, struct ip *); hlen = ip->ip_hl << 2; #endif /* IPSEC */ /* Jump over all PFIL processing if hooks are not active. */ - if (!PFIL_HOOKED(&V_inet_pfil_hook)) + if (!PFIL_HOOKED(&V_inet_pfil_hook)) { + if (no_route_but_check) { + IPSTAT_INC(ips_noroute); + error = EHOSTUNREACH; + goto bad; + } goto passout; + } + + if (ifp == NULL) + ifp = V_loif; /* Run through list of hooks for output packets. */ odst.s_addr = ip->ip_dst.s_addr; error = pfil_run_hooks(&V_inet_pfil_hook, &m, ifp, PFIL_OUT, inp); if (error != 0 || m == NULL) goto done; + if (no_route_but_check) { + IPSTAT_INC(ips_noroute); + error = EHOSTUNREACH; + goto bad; + } ip = mtod(m, struct ip *); diff --git a/sys/netinet/sctp_input.c b/sys/netinet/sctp_input.c index 05eb026..4cd9a9b 100644 --- a/sys/netinet/sctp_input.c +++ b/sys/netinet/sctp_input.c @@ -5788,7 +5788,6 @@ sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset, int lengt #ifdef INET case AF_INET: if (ipsec4_in_reject(m, &inp->ip_inp.inp)) { - IPSECSTAT_INC(ips_in_polvio); SCTP_STAT_INCR(sctps_hdrops); goto out; } @@ -5797,7 +5796,6 @@ sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset, int lengt #ifdef INET6 case AF_INET6: if (ipsec6_in_reject(m, &inp->ip_inp.inp)) { - IPSEC6STAT_INC(ips_in_polvio); SCTP_STAT_INCR(sctps_hdrops); goto out; } diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c index 32133ae..114802e 100644 --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -889,12 +889,10 @@ findpcb: #ifdef IPSEC #ifdef INET6 if (isipv6 && ipsec6_in_reject(m, inp)) { - IPSEC6STAT_INC(ips_in_polvio); goto dropunlock; } else #endif /* INET6 */ if (ipsec4_in_reject(m, inp) != 0) { - IPSECSTAT_INC(ips_in_polvio); goto dropunlock; } #endif /* IPSEC */ diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c index 79a2166..46bebf6 100644 --- a/sys/netinet/udp_usrreq.c +++ b/sys/netinet/udp_usrreq.c @@ -314,7 +314,6 @@ udp_append(struct inpcb *inp, struct ip *ip, struct mbuf *n, int off, /* Check AH/ESP integrity. */ if (ipsec4_in_reject(n, inp)) { m_freem(n); - IPSECSTAT_INC(ips_in_polvio); return; } #ifdef IPSEC_NAT_T @@ -1561,7 +1560,8 @@ udp4_espdecap(struct inpcb *inp, struct mbuf *m, int off) if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) m->m_pkthdr.csum_flags &= ~(CSUM_DATA_VALID|CSUM_PSEUDO_HDR); - (void) ipsec4_common_input(m, iphlen, ip->ip_p); + (void) ipsec_common_input(m, iphlen, offsetof(struct ip, ip_p), + AF_INET, ip->ip_p); return (NULL); /* NB: consumed, bypass processing. */ } #endif /* defined(IPSEC) && defined(IPSEC_NAT_T) */ |