diff options
author | sam <sam@FreeBSD.org> | 2003-09-01 05:12:36 +0000 |
---|---|---|
committer | sam <sam@FreeBSD.org> | 2003-09-01 05:12:36 +0000 |
commit | 284da805ed783bce2fb095603a05350576b5c7ba (patch) | |
tree | bc34204168fe67cbcf81492ee0dd9525b81a8959 /sys/netinet | |
parent | f13a722652fb71f09c5943abad4fac4e82da66b4 (diff) | |
download | FreeBSD-src-284da805ed783bce2fb095603a05350576b5c7ba.zip FreeBSD-src-284da805ed783bce2fb095603a05350576b5c7ba.tar.gz |
add locking
NB: There is a known LOR on the forwarding path; this needs to be resolved
together with a similar issue in the bridge. For the moment it is
believed to be benign.
Sponsored by: FreeBSD Fondation
Diffstat (limited to 'sys/netinet')
-rw-r--r-- | sys/netinet/ip_flow.c | 190 | ||||
-rw-r--r-- | sys/netinet/ip_flow.h | 7 |
2 files changed, 123 insertions, 74 deletions
diff --git a/sys/netinet/ip_flow.c b/sys/netinet/ip_flow.c index a23f2bb..f2e23ea 100644 --- a/sys/netinet/ip_flow.c +++ b/sys/netinet/ip_flow.c @@ -38,8 +38,10 @@ #include <sys/param.h> #include <sys/systm.h> +#include <sys/lock.h> #include <sys/malloc.h> #include <sys/mbuf.h> +#include <sys/mutex.h> #include <sys/protosw.h> #include <sys/socket.h> #include <sys/kernel.h> @@ -59,10 +61,31 @@ #define IPFLOW_TIMER (5 * PR_SLOWHZ) #define IPFLOW_HASHBITS 6 /* should not be a multiple of 8 */ #define IPFLOW_HASHSIZE (1 << IPFLOW_HASHBITS) -static LIST_HEAD(ipflowhead, ipflow) ipflows[IPFLOW_HASHSIZE]; +#if IPFLOW_HASHSIZE > 255 +#error "make ipf_hash larger" +#endif +static struct ipflow_head ipflows[IPFLOW_HASHSIZE]; static int ipflow_inuse; #define IPFLOW_MAX 256 +/* + * Each flow list has a lock that guards updates to the list and to + * all entries on the list. Flow entries hold the hash index for + * finding the head of the list so the lock can be found quickly. + * + * ipflow_inuse holds a count of the number of flow entries present. + * This is used to bound the size of the table. When IPFLOW_MAX entries + * are present and an additional entry is needed one is chosen for + * replacement. We could use atomic ops for this counter but having it + * inconsistent doesn't appear to be a problem. + */ +#define IPFLOW_HEAD_LOCK(_ipfh) mtx_lock(&(_ipfh)->ipfh_mtx) +#define IPFLOW_HEAD_UNLOCK(_ipfh) mtx_unlock(&(_ipfh)->ipfh_mtx) +#define IPFLOW_LOCK(_ipf) \ + IPFLOW_HEAD_LOCK(&ipflows[(_ipf)->ipf_hash]) +#define IPFLOW_UNLOCK(_ipf) \ + IPFLOW_HEAD_UNLOCK(&ipflows[(_ipf)->ipf_hash]) + static int ipflow_active = 0; SYSCTL_INT(_net_inet_ip, IPCTL_FASTFORWARDING, fastforwarding, CTLFLAG_RW, &ipflow_active, 0, "Enable flow-based IP forwarding"); @@ -70,10 +93,7 @@ SYSCTL_INT(_net_inet_ip, IPCTL_FASTFORWARDING, fastforwarding, CTLFLAG_RW, static MALLOC_DEFINE(M_IPFLOW, "ip_flow", "IP flow"); static unsigned -ipflow_hash( - struct in_addr dst, - struct in_addr src, - unsigned tos) +ipflow_hash(struct in_addr dst, struct in_addr src, unsigned tos) { unsigned hash = tos; int idx; @@ -83,28 +103,30 @@ ipflow_hash( } static struct ipflow * -ipflow_lookup( - const struct ip *ip) +ipflow_lookup(const struct ip *ip) { unsigned hash; + struct ipflow_head *head; struct ipflow *ipf; hash = ipflow_hash(ip->ip_dst, ip->ip_src, ip->ip_tos); + head = &ipflows[hash]; - ipf = LIST_FIRST(&ipflows[hash]); - while (ipf != NULL) { + IPFLOW_HEAD_LOCK(head); + LIST_FOREACH(ipf, &head->ipfh_head, ipf_next) { if (ip->ip_dst.s_addr == ipf->ipf_dst.s_addr && ip->ip_src.s_addr == ipf->ipf_src.s_addr - && ip->ip_tos == ipf->ipf_tos) - break; - ipf = LIST_NEXT(ipf, ipf_next); + && ip->ip_tos == ipf->ipf_tos) { + /* NB: return head locked */ + return ipf; + } } - return ipf; + IPFLOW_HEAD_UNLOCK(head); + return NULL; } int -ipflow_fastforward( - struct mbuf *m) +ipflow_fastforward(struct mbuf *m) { struct ip *ip; struct ipflow *ipf; @@ -134,14 +156,18 @@ ipflow_fastforward( * Route and interface still up? */ rt = ipf->ipf_ro.ro_rt; - if ((rt->rt_flags & RTF_UP) == 0 || (rt->rt_ifp->if_flags & IFF_UP) == 0) + if ((rt->rt_flags & RTF_UP) == 0 || (rt->rt_ifp->if_flags & IFF_UP) == 0) { + IPFLOW_UNLOCK(ipf); return 0; + } /* * Packet size OK? TTL? */ - if (m->m_pkthdr.len > rt->rt_ifp->if_mtu || ip->ip_ttl <= IPTTLDEC) + if (m->m_pkthdr.len > rt->rt_ifp->if_mtu || ip->ip_ttl <= IPTTLDEC) { + IPFLOW_UNLOCK(ipf); return 0; + } /* * Everything checks out and so we can forward this packet. @@ -170,12 +196,12 @@ ipflow_fastforward( else ipf->ipf_errors++; } + IPFLOW_UNLOCK(ipf); return 1; } - + static void -ipflow_addstats( - struct ipflow *ipf) +ipflow_addstats(struct ipflow *ipf) { ipf->ipf_ro.ro_rt->rt_use += ipf->ipf_uses; ipstat.ips_cantforward += ipf->ipf_errors + ipf->ipf_dropped; @@ -183,36 +209,21 @@ ipflow_addstats( ipstat.ips_fastforward += ipf->ipf_uses; } -static void -ipflow_free( - struct ipflow *ipf) -{ - int s; - /* - * Remove the flow from the hash table (at elevated IPL). - * Once it's off the list, we can deal with it at normal - * network IPL. - */ - s = splimp(); - LIST_REMOVE(ipf, ipf_next); - splx(s); - ipflow_addstats(ipf); - RTFREE(ipf->ipf_ro.ro_rt); - ipflow_inuse--; - free(ipf, M_IPFLOW); -} - +/* + * XXX the locking here makes reaping an entry very expensive... + */ static struct ipflow * -ipflow_reap( - void) +ipflow_reap(void) { - struct ipflow *ipf, *maybe_ipf = NULL; + struct ipflow *victim = NULL; + struct ipflow *ipf; int idx; - int s; for (idx = 0; idx < IPFLOW_HASHSIZE; idx++) { - ipf = LIST_FIRST(&ipflows[idx]); - while (ipf != NULL) { + struct ipflow_head *head = &ipflows[idx]; + + IPFLOW_HEAD_LOCK(head); + LIST_FOREACH(ipf, &head->ipfh_head, ipf_next) { /* * If this no longer points to a valid route * reclaim it. @@ -224,38 +235,58 @@ ipflow_reap( * or has had the least uses in the last 1.5 * intervals. */ - if (maybe_ipf == NULL - || ipf->ipf_timer < maybe_ipf->ipf_timer - || (ipf->ipf_timer == maybe_ipf->ipf_timer + if (victim == NULL) + victim = ipf; + else if (ipf->ipf_timer < victim->ipf_timer + || (ipf->ipf_timer == victim->ipf_timer && ipf->ipf_last_uses + ipf->ipf_uses < - maybe_ipf->ipf_last_uses + - maybe_ipf->ipf_uses)) - maybe_ipf = ipf; - ipf = LIST_NEXT(ipf, ipf_next); + victim->ipf_last_uses + victim->ipf_uses)) { + if (victim->ipf_hash != ipf->ipf_hash) + IPFLOW_UNLOCK(victim); + victim = ipf; + } } + if (victim && victim->ipf_hash != idx) + IPFLOW_HEAD_UNLOCK(head); } - ipf = maybe_ipf; + ipf = victim; done: /* * Remove the entry from the flow table. */ - s = splimp(); LIST_REMOVE(ipf, ipf_next); - splx(s); + IPFLOW_UNLOCK(ipf); + ipflow_addstats(ipf); RTFREE(ipf->ipf_ro.ro_rt); return ipf; } +static void +ipflow_free(struct ipflow *ipf) +{ + /* + * Remove the flow from the hash table. + */ + LIST_REMOVE(ipf, ipf_next); + + ipflow_addstats(ipf); + RTFREE(ipf->ipf_ro.ro_rt); + ipflow_inuse--; + free(ipf, M_IPFLOW); +} + void -ipflow_slowtimo( - void) +ipflow_slowtimo(void) { struct ipflow *ipf; int idx; for (idx = 0; idx < IPFLOW_HASHSIZE; idx++) { - ipf = LIST_FIRST(&ipflows[idx]); + struct ipflow_head *head = &ipflows[idx]; + + IPFLOW_HEAD_LOCK(head); + ipf = LIST_FIRST(&head->ipfh_head); while (ipf != NULL) { struct ipflow *next_ipf = LIST_NEXT(ipf, ipf_next); if (--ipf->ipf_timer == 0) { @@ -269,18 +300,15 @@ ipflow_slowtimo( } ipf = next_ipf; } + IPFLOW_HEAD_UNLOCK(head); } } void -ipflow_create( - const struct route *ro, - struct mbuf *m) +ipflow_create(const struct route *ro, struct mbuf *m) { const struct ip *const ip = mtod(m, struct ip *); struct ipflow *ipf; - unsigned hash; - int s; /* * Don't create cache entries for ICMP messages. @@ -304,12 +332,18 @@ ipflow_create( ipflow_inuse++; } bzero((caddr_t) ipf, sizeof(*ipf)); + + ipf->ipf_hash = ipflow_hash(ip->ip_dst, ip->ip_src, ip->ip_tos); + ipf->ipf_dst = ip->ip_dst; + ipf->ipf_src = ip->ip_src; + ipf->ipf_tos = ip->ip_tos; + + IPFLOW_LOCK(ipf); } else { - s = splimp(); LIST_REMOVE(ipf, ipf_next); - splx(s); - ipflow_addstats(ipf); - RTFREE(ipf->ipf_ro.ro_rt); + + ipflow_addstats(ipf); /* add stats to old route */ + RTFREE(ipf->ipf_ro.ro_rt); /* clear reference */ ipf->ipf_uses = ipf->ipf_last_uses = 0; ipf->ipf_errors = ipf->ipf_dropped = 0; } @@ -319,15 +353,23 @@ ipflow_create( */ ipf->ipf_ro = *ro; ro->ro_rt->rt_refcnt++; - ipf->ipf_dst = ip->ip_dst; - ipf->ipf_src = ip->ip_src; - ipf->ipf_tos = ip->ip_tos; ipf->ipf_timer = IPFLOW_TIMER; /* * Insert into the approriate bucket of the flow table. */ - hash = ipflow_hash(ip->ip_dst, ip->ip_src, ip->ip_tos); - s = splimp(); - LIST_INSERT_HEAD(&ipflows[hash], ipf, ipf_next); - splx(s); + LIST_INSERT_HEAD(&ipflows[ipf->ipf_hash].ipfh_head, ipf, ipf_next); + IPFLOW_UNLOCK(ipf); +} + +static void +ipflow_init(void) +{ + int idx; + + for (idx = 0; idx < IPFLOW_HASHSIZE; idx++) { + struct ipflow_head *head = &ipflows[idx]; + LIST_INIT(&head->ipfh_head); + mtx_init(&head->ipfh_mtx, "ipflow list head", NULL, MTX_DEF); + } } +SYSINIT(ipflow, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY, ipflow_init, 0); diff --git a/sys/netinet/ip_flow.h b/sys/netinet/ip_flow.h index 4675996..7279b49 100644 --- a/sys/netinet/ip_flow.h +++ b/sys/netinet/ip_flow.h @@ -44,6 +44,8 @@ struct ipflow { struct in_addr ipf_dst; /* destination address */ struct in_addr ipf_src; /* source address */ + /* NB: this assumes the size of the list head hash table is <=256 */ + u_int8_t ipf_hash; /* index in list head table */ u_int8_t ipf_tos; /* type-of-service */ struct route ipf_ro; /* associated route entry */ u_long ipf_uses; /* number of uses in this period */ @@ -54,4 +56,9 @@ struct ipflow { u_long ipf_last_uses; /* number of uses in last period */ }; +struct ipflow_head { + LIST_HEAD(ipflowhead, ipflow) ipfh_head; + struct mtx ipfh_mtx; +}; + #endif |