summaryrefslogtreecommitdiffstats
path: root/sys/net/flowtable.c
diff options
context:
space:
mode:
authorkmacy <kmacy@FreeBSD.org>2009-08-18 20:28:58 +0000
committerkmacy <kmacy@FreeBSD.org>2009-08-18 20:28:58 +0000
commitbbd03fa206b39c464a0bdee0cbee82fb7659a405 (patch)
tree4bba016fc345bb4589b4f846bcbeba1d9b670a91 /sys/net/flowtable.c
parent55fac3f9e0bfbff28af262ca9c4e0a4db8e160de (diff)
downloadFreeBSD-src-bbd03fa206b39c464a0bdee0cbee82fb7659a405.zip
FreeBSD-src-bbd03fa206b39c464a0bdee0cbee82fb7659a405.tar.gz
- change the interface to flowtable_lookup so that we don't rely on
the mbuf for obtaining the fib index - check that a cached flow corresponds to the same fib index as the packet for which we are doing the lookup - at interface detach time flush any flows referencing stale rtentrys associated with the interface that is going away (fixes reported panics) - reduce the time between cleans in case the cleaner is running at the time the eventhandler is called and the wakeup is missed less time will elapse before the eventhandler returns - separate per-vnet initialization from global initialization (pointed out by jeli@) Reviewed by: sam@ Approved by: re@
Diffstat (limited to 'sys/net/flowtable.c')
-rw-r--r--sys/net/flowtable.c235
1 files changed, 194 insertions, 41 deletions
diff --git a/sys/net/flowtable.c b/sys/net/flowtable.c
index efdde72a..e974727 100644
--- a/sys/net/flowtable.c
+++ b/sys/net/flowtable.c
@@ -29,6 +29,7 @@ POSSIBILITY OF SUCH DAMAGE.
#include "opt_route.h"
#include "opt_mpath.h"
+#include "opt_ddb.h"
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
@@ -36,6 +37,7 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/types.h>
#include <sys/bitstring.h>
+#include <sys/condvar.h>
#include <sys/callout.h>
#include <sys/kernel.h>
#include <sys/kthread.h>
@@ -66,6 +68,7 @@ __FBSDID("$FreeBSD$");
#include <netinet/sctp.h>
#include <libkern/jenkins.h>
+#include <ddb/ddb.h>
struct ipv4_tuple {
uint16_t ip_sport; /* source port */
@@ -94,8 +97,9 @@ union ipv6_flow {
struct flentry {
volatile uint32_t f_fhash; /* hash flowing forward */
uint16_t f_flags; /* flow flags */
- uint8_t f_pad; /* alignment */
+ uint8_t f_pad;
uint8_t f_proto; /* protocol */
+ uint32_t f_fibnum; /* fib index */
uint32_t f_uptime; /* uptime at last access */
struct flentry *f_next; /* pointer to collision entry */
volatile struct rtentry *f_rt; /* rtentry for flow */
@@ -173,6 +177,10 @@ static VNET_DEFINE(uma_zone_t, flow_ipv6_zone);
#define V_flow_ipv4_zone VNET(flow_ipv4_zone)
#define V_flow_ipv6_zone VNET(flow_ipv6_zone)
+static struct cv flowclean_cv;
+static struct mtx flowclean_lock;
+static uint32_t flowclean_cycles;
+
/*
* TODO:
* - Make flowtable stats per-cpu, aggregated at sysctl call time,
@@ -288,10 +296,10 @@ SYSCTL_VNET_PROC(_net_inet_flowtable, OID_AUTO, nmbflows,
#ifndef RADIX_MPATH
static void
-in_rtalloc_ign_wrapper(struct route *ro, uint32_t hash, u_int fib)
+in_rtalloc_ign_wrapper(struct route *ro, uint32_t hash, u_int fibnum)
{
- rtalloc_ign_fib(ro, 0, fib);
+ rtalloc_ign_fib(ro, 0, fibnum);
}
#endif
@@ -425,7 +433,7 @@ static bitstr_t *
flowtable_mask(struct flowtable *ft)
{
bitstr_t *mask;
-
+
if (ft->ft_flags & FL_PCPU)
mask = ft->ft_masks[curcpu];
else
@@ -501,7 +509,7 @@ flowtable_set_hashkey(struct flentry *fle, uint32_t *key)
static int
flowtable_insert(struct flowtable *ft, uint32_t hash, uint32_t *key,
- uint8_t proto, struct route *ro, uint16_t flags)
+ uint8_t proto, uint32_t fibnum, struct route *ro, uint16_t flags)
{
struct flentry *fle, *fletail, *newfle, **flep;
int depth;
@@ -564,6 +572,7 @@ skip:
fle->f_rt = ro->ro_rt;
fle->f_lle = ro->ro_lle;
fle->f_fhash = hash;
+ fle->f_fibnum = fibnum;
fle->f_uptime = time_uptime;
FL_ENTRY_UNLOCK(ft, hash);
return (0);
@@ -591,13 +600,13 @@ flowtable_key_equal(struct flentry *fle, uint32_t *key)
}
int
-flowtable_lookup(struct flowtable *ft, struct mbuf *m, struct route *ro)
+flowtable_lookup(struct flowtable *ft, struct mbuf *m, struct route *ro, uint32_t fibnum)
{
uint32_t key[9], hash;
struct flentry *fle;
uint16_t flags;
uint8_t proto = 0;
- int error = 0, fib = 0;
+ int error = 0;
struct rtentry *rt;
struct llentry *lle;
@@ -640,6 +649,7 @@ keycheck:
&& fle->f_fhash == hash
&& flowtable_key_equal(fle, key)
&& (proto == fle->f_proto)
+ && (fibnum == fle->f_fibnum)
&& (rt->rt_flags & RTF_UP)
&& (rt->rt_ifp != NULL)) {
V_flowtable_hits++;
@@ -668,10 +678,8 @@ uncached:
* of arpresolve with an rt_check variant that expected to
* receive the route locked
*/
- if (m != NULL)
- fib = M_GETFIB(m);
- ft->ft_rtalloc(ro, hash, fib);
+ ft->ft_rtalloc(ro, hash, fibnum);
if (ro->ro_rt == NULL)
error = ENETUNREACH;
else {
@@ -692,7 +700,7 @@ uncached:
ro->ro_rt = NULL;
return (ENOENT);
}
- error = flowtable_insert(ft, hash, key, proto,
+ error = flowtable_insert(ft, hash, key, proto, fibnum,
ro, flags);
if (error) {
@@ -791,35 +799,6 @@ flowtable_alloc(int nentry, int flags)
return (ft);
}
-static void
-flowtable_init(const void *unused __unused)
-{
-
- V_flow_ipv4_zone = uma_zcreate("ip4flow", sizeof(struct flentry_v4),
- NULL, NULL, NULL, NULL, 64, UMA_ZONE_MAXBUCKET);
- V_flow_ipv6_zone = uma_zcreate("ip6flow", sizeof(struct flentry_v6),
- NULL, NULL, NULL, NULL, 64, UMA_ZONE_MAXBUCKET);
- uma_zone_set_max(V_flow_ipv4_zone, V_flowtable_nmbflows);
- uma_zone_set_max(V_flow_ipv6_zone, V_flowtable_nmbflows);
- V_flowtable_ready = 1;
-}
-
-VNET_SYSINIT(flowtable_init, SI_SUB_KTHREAD_INIT, SI_ORDER_ANY,
- flowtable_init, NULL);
-
-#ifdef VIMAGE
-static void
-flowtable_uninit(const void *unused __unused)
-{
-
- uma_zdestroy(V_flow_ipv4_zone);
- uma_zdestroy(V_flow_ipv6_zone);
-}
-
-VNET_SYSUNINIT(flowtable_uninit, SI_SUB_KTHREAD_INIT, SI_ORDER_ANY,
- flowtable_uninit, NULL);
-#endif
-
/*
* The rest of the code is devoted to garbage collection of expired entries.
* It is a new additon made necessary by the switch to dynamically allocating
@@ -973,12 +952,30 @@ flowtable_cleaner(void)
}
VNET_LIST_RUNLOCK();
+ flowclean_cycles++;
/*
* The 20 second interval between cleaning checks
* is arbitrary
*/
- pause("flowcleanwait", 20*hz);
+ mtx_lock(&flowclean_lock);
+ cv_broadcast(&flowclean_cv);
+ cv_timedwait(&flowclean_cv, &flowclean_lock, 10*hz);
+ mtx_unlock(&flowclean_lock);
+ }
+}
+
+static void
+flowtable_flush(void *unused __unused)
+{
+ uint64_t start;
+
+ mtx_lock(&flowclean_lock);
+ start = flowclean_cycles;
+ while (start == flowclean_cycles) {
+ cv_broadcast(&flowclean_cv);
+ cv_wait(&flowclean_cv, &flowclean_lock);
}
+ mtx_unlock(&flowclean_lock);
}
static struct kproc_desc flow_kp = {
@@ -988,3 +985,159 @@ static struct kproc_desc flow_kp = {
};
SYSINIT(flowcleaner, SI_SUB_KTHREAD_IDLE, SI_ORDER_ANY, kproc_start, &flow_kp);
+static void
+flowtable_init_vnet(const void *unused __unused)
+{
+
+ V_flow_ipv4_zone = uma_zcreate("ip4flow", sizeof(struct flentry_v4),
+ NULL, NULL, NULL, NULL, 64, UMA_ZONE_MAXBUCKET);
+ V_flow_ipv6_zone = uma_zcreate("ip6flow", sizeof(struct flentry_v6),
+ NULL, NULL, NULL, NULL, 64, UMA_ZONE_MAXBUCKET);
+ uma_zone_set_max(V_flow_ipv4_zone, V_flowtable_nmbflows);
+ uma_zone_set_max(V_flow_ipv6_zone, V_flowtable_nmbflows);
+}
+VNET_SYSINIT(flowtable_init_vnet, SI_SUB_KTHREAD_INIT, SI_ORDER_MIDDLE,
+ flowtable_init_vnet, NULL);
+
+static void
+flowtable_init(const void *unused __unused)
+{
+
+ cv_init(&flowclean_cv, "flowcleanwait");
+ mtx_init(&flowclean_lock, "flowclean lock", NULL, MTX_DEF);
+ EVENTHANDLER_REGISTER(ifnet_departure_event, flowtable_flush, NULL,
+ EVENTHANDLER_PRI_ANY);
+ V_flowtable_ready = 1;
+}
+SYSINIT(flowtable_init, SI_SUB_KTHREAD_INIT, SI_ORDER_ANY,
+ flowtable_init, NULL);
+
+
+#ifdef VIMAGE
+static void
+flowtable_uninit(const void *unused __unused)
+{
+
+ uma_zdestroy(V_flow_ipv4_zone);
+ uma_zdestroy(V_flow_ipv6_zone);
+}
+
+VNET_SYSUNINIT(flowtable_uninit, SI_SUB_KTHREAD_INIT, SI_ORDER_ANY,
+ flowtable_uninit, NULL);
+#endif
+
+#ifdef DDB
+static bitstr_t *
+flowtable_mask_pcpu(struct flowtable *ft, int cpuid)
+{
+ bitstr_t *mask;
+
+ if (ft->ft_flags & FL_PCPU)
+ mask = ft->ft_masks[cpuid];
+ else
+ mask = ft->ft_masks[0];
+
+ return (mask);
+}
+
+static struct flentry **
+flowtable_entry_pcpu(struct flowtable *ft, uint32_t hash, int cpuid)
+{
+ struct flentry **fle;
+ int index = (hash % ft->ft_size);
+
+ if (ft->ft_flags & FL_PCPU) {
+ fle = &ft->ft_table.pcpu[cpuid][index];
+ } else {
+ fle = &ft->ft_table.global[index];
+ }
+
+ return (fle);
+}
+
+static void
+flow_show(struct flowtable *ft, struct flentry *fle)
+{
+ int idle_time;
+ int rt_valid;
+
+ idle_time = (int)(time_uptime - fle->f_uptime);
+ rt_valid = fle->f_rt != NULL;
+ db_printf("hash=0x%08x idle_time=%03d rt=%p ifp=%p",
+ fle->f_fhash, idle_time,
+ fle->f_rt, rt_valid ? fle->f_rt->rt_ifp : NULL);
+ if (rt_valid && (fle->f_rt->rt_flags & RTF_UP))
+ db_printf(" RTF_UP ");
+ if (fle->f_flags & FL_STALE)
+ db_printf(" FL_STALE ");
+ db_printf("\n");
+}
+
+static void
+flowtable_show(struct flowtable *ft, int cpuid)
+{
+ int curbit = 0;
+ struct flentry *fle, **flehead;
+ bitstr_t *mask, *tmpmask;
+
+ db_printf("cpu: %d\n", cpuid);
+ mask = flowtable_mask_pcpu(ft, cpuid);
+ tmpmask = ft->ft_tmpmask;
+ memcpy(tmpmask, mask, ft->ft_size/8);
+ /*
+ * XXX Note to self, bit_ffs operates at the byte level
+ * and thus adds gratuitous overhead
+ */
+ bit_ffs(tmpmask, ft->ft_size, &curbit);
+ while (curbit != -1) {
+ if (curbit >= ft->ft_size || curbit < -1) {
+ db_printf("warning: bad curbit value %d \n",
+ curbit);
+ break;
+ }
+
+ flehead = flowtable_entry_pcpu(ft, curbit, cpuid);
+ fle = *flehead;
+
+ while (fle != NULL) {
+ flow_show(ft, fle);
+ fle = fle->f_next;
+ continue;
+ }
+ bit_clear(tmpmask, curbit);
+ bit_ffs(tmpmask, ft->ft_size, &curbit);
+ }
+}
+
+static void
+flowtable_show_vnet(void)
+{
+ struct flowtable *ft;
+ int i;
+
+ ft = V_flow_list_head;
+ while (ft != NULL) {
+ if (ft->ft_flags & FL_PCPU) {
+ for (i = 0; i <= mp_maxid; i++) {
+ if (CPU_ABSENT(i))
+ continue;
+ flowtable_show(ft, i);
+ }
+ } else {
+ flowtable_show(ft, 0);
+ }
+ ft = ft->ft_next;
+ }
+}
+
+DB_SHOW_COMMAND(flowtables, db_show_flowtables)
+{
+ VNET_ITERATOR_DECL(vnet_iter);
+
+ VNET_FOREACH(vnet_iter) {
+ CURVNET_SET(vnet_iter);
+ flowtable_show_vnet();
+ CURVNET_RESTORE();
+ }
+}
+#endif
OpenPOWER on IntegriCloud