summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--sys/net/flowtable.c235
-rw-r--r--sys/net/flowtable.h2
-rw-r--r--sys/netinet/ip_output.c2
3 files changed, 196 insertions, 43 deletions
diff --git a/sys/net/flowtable.c b/sys/net/flowtable.c
index efdde72a..e974727 100644
--- a/sys/net/flowtable.c
+++ b/sys/net/flowtable.c
@@ -29,6 +29,7 @@ POSSIBILITY OF SUCH DAMAGE.
#include "opt_route.h"
#include "opt_mpath.h"
+#include "opt_ddb.h"
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
@@ -36,6 +37,7 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/types.h>
#include <sys/bitstring.h>
+#include <sys/condvar.h>
#include <sys/callout.h>
#include <sys/kernel.h>
#include <sys/kthread.h>
@@ -66,6 +68,7 @@ __FBSDID("$FreeBSD$");
#include <netinet/sctp.h>
#include <libkern/jenkins.h>
+#include <ddb/ddb.h>
struct ipv4_tuple {
uint16_t ip_sport; /* source port */
@@ -94,8 +97,9 @@ union ipv6_flow {
struct flentry {
volatile uint32_t f_fhash; /* hash flowing forward */
uint16_t f_flags; /* flow flags */
- uint8_t f_pad; /* alignment */
+ uint8_t f_pad;
uint8_t f_proto; /* protocol */
+ uint32_t f_fibnum; /* fib index */
uint32_t f_uptime; /* uptime at last access */
struct flentry *f_next; /* pointer to collision entry */
volatile struct rtentry *f_rt; /* rtentry for flow */
@@ -173,6 +177,10 @@ static VNET_DEFINE(uma_zone_t, flow_ipv6_zone);
#define V_flow_ipv4_zone VNET(flow_ipv4_zone)
#define V_flow_ipv6_zone VNET(flow_ipv6_zone)
+static struct cv flowclean_cv;
+static struct mtx flowclean_lock;
+static uint32_t flowclean_cycles;
+
/*
* TODO:
* - Make flowtable stats per-cpu, aggregated at sysctl call time,
@@ -288,10 +296,10 @@ SYSCTL_VNET_PROC(_net_inet_flowtable, OID_AUTO, nmbflows,
#ifndef RADIX_MPATH
static void
-in_rtalloc_ign_wrapper(struct route *ro, uint32_t hash, u_int fib)
+in_rtalloc_ign_wrapper(struct route *ro, uint32_t hash, u_int fibnum)
{
- rtalloc_ign_fib(ro, 0, fib);
+ rtalloc_ign_fib(ro, 0, fibnum);
}
#endif
@@ -425,7 +433,7 @@ static bitstr_t *
flowtable_mask(struct flowtable *ft)
{
bitstr_t *mask;
-
+
if (ft->ft_flags & FL_PCPU)
mask = ft->ft_masks[curcpu];
else
@@ -501,7 +509,7 @@ flowtable_set_hashkey(struct flentry *fle, uint32_t *key)
static int
flowtable_insert(struct flowtable *ft, uint32_t hash, uint32_t *key,
- uint8_t proto, struct route *ro, uint16_t flags)
+ uint8_t proto, uint32_t fibnum, struct route *ro, uint16_t flags)
{
struct flentry *fle, *fletail, *newfle, **flep;
int depth;
@@ -564,6 +572,7 @@ skip:
fle->f_rt = ro->ro_rt;
fle->f_lle = ro->ro_lle;
fle->f_fhash = hash;
+ fle->f_fibnum = fibnum;
fle->f_uptime = time_uptime;
FL_ENTRY_UNLOCK(ft, hash);
return (0);
@@ -591,13 +600,13 @@ flowtable_key_equal(struct flentry *fle, uint32_t *key)
}
int
-flowtable_lookup(struct flowtable *ft, struct mbuf *m, struct route *ro)
+flowtable_lookup(struct flowtable *ft, struct mbuf *m, struct route *ro, uint32_t fibnum)
{
uint32_t key[9], hash;
struct flentry *fle;
uint16_t flags;
uint8_t proto = 0;
- int error = 0, fib = 0;
+ int error = 0;
struct rtentry *rt;
struct llentry *lle;
@@ -640,6 +649,7 @@ keycheck:
&& fle->f_fhash == hash
&& flowtable_key_equal(fle, key)
&& (proto == fle->f_proto)
+ && (fibnum == fle->f_fibnum)
&& (rt->rt_flags & RTF_UP)
&& (rt->rt_ifp != NULL)) {
V_flowtable_hits++;
@@ -668,10 +678,8 @@ uncached:
* of arpresolve with an rt_check variant that expected to
* receive the route locked
*/
- if (m != NULL)
- fib = M_GETFIB(m);
- ft->ft_rtalloc(ro, hash, fib);
+ ft->ft_rtalloc(ro, hash, fibnum);
if (ro->ro_rt == NULL)
error = ENETUNREACH;
else {
@@ -692,7 +700,7 @@ uncached:
ro->ro_rt = NULL;
return (ENOENT);
}
- error = flowtable_insert(ft, hash, key, proto,
+ error = flowtable_insert(ft, hash, key, proto, fibnum,
ro, flags);
if (error) {
@@ -791,35 +799,6 @@ flowtable_alloc(int nentry, int flags)
return (ft);
}
-static void
-flowtable_init(const void *unused __unused)
-{
-
- V_flow_ipv4_zone = uma_zcreate("ip4flow", sizeof(struct flentry_v4),
- NULL, NULL, NULL, NULL, 64, UMA_ZONE_MAXBUCKET);
- V_flow_ipv6_zone = uma_zcreate("ip6flow", sizeof(struct flentry_v6),
- NULL, NULL, NULL, NULL, 64, UMA_ZONE_MAXBUCKET);
- uma_zone_set_max(V_flow_ipv4_zone, V_flowtable_nmbflows);
- uma_zone_set_max(V_flow_ipv6_zone, V_flowtable_nmbflows);
- V_flowtable_ready = 1;
-}
-
-VNET_SYSINIT(flowtable_init, SI_SUB_KTHREAD_INIT, SI_ORDER_ANY,
- flowtable_init, NULL);
-
-#ifdef VIMAGE
-static void
-flowtable_uninit(const void *unused __unused)
-{
-
- uma_zdestroy(V_flow_ipv4_zone);
- uma_zdestroy(V_flow_ipv6_zone);
-}
-
-VNET_SYSUNINIT(flowtable_uninit, SI_SUB_KTHREAD_INIT, SI_ORDER_ANY,
- flowtable_uninit, NULL);
-#endif
-
/*
* The rest of the code is devoted to garbage collection of expired entries.
* It is a new additon made necessary by the switch to dynamically allocating
@@ -973,12 +952,30 @@ flowtable_cleaner(void)
}
VNET_LIST_RUNLOCK();
+ flowclean_cycles++;
/*
* The 20 second interval between cleaning checks
* is arbitrary
*/
- pause("flowcleanwait", 20*hz);
+ mtx_lock(&flowclean_lock);
+ cv_broadcast(&flowclean_cv);
+ cv_timedwait(&flowclean_cv, &flowclean_lock, 10*hz);
+ mtx_unlock(&flowclean_lock);
+ }
+}
+
+static void
+flowtable_flush(void *unused __unused)
+{
+ uint64_t start;
+
+ mtx_lock(&flowclean_lock);
+ start = flowclean_cycles;
+ while (start == flowclean_cycles) {
+ cv_broadcast(&flowclean_cv);
+ cv_wait(&flowclean_cv, &flowclean_lock);
}
+ mtx_unlock(&flowclean_lock);
}
static struct kproc_desc flow_kp = {
@@ -988,3 +985,159 @@ static struct kproc_desc flow_kp = {
};
SYSINIT(flowcleaner, SI_SUB_KTHREAD_IDLE, SI_ORDER_ANY, kproc_start, &flow_kp);
+static void
+flowtable_init_vnet(const void *unused __unused)
+{
+
+ V_flow_ipv4_zone = uma_zcreate("ip4flow", sizeof(struct flentry_v4),
+ NULL, NULL, NULL, NULL, 64, UMA_ZONE_MAXBUCKET);
+ V_flow_ipv6_zone = uma_zcreate("ip6flow", sizeof(struct flentry_v6),
+ NULL, NULL, NULL, NULL, 64, UMA_ZONE_MAXBUCKET);
+ uma_zone_set_max(V_flow_ipv4_zone, V_flowtable_nmbflows);
+ uma_zone_set_max(V_flow_ipv6_zone, V_flowtable_nmbflows);
+}
+VNET_SYSINIT(flowtable_init_vnet, SI_SUB_KTHREAD_INIT, SI_ORDER_MIDDLE,
+ flowtable_init_vnet, NULL);
+
+static void
+flowtable_init(const void *unused __unused)
+{
+
+ cv_init(&flowclean_cv, "flowcleanwait");
+ mtx_init(&flowclean_lock, "flowclean lock", NULL, MTX_DEF);
+ EVENTHANDLER_REGISTER(ifnet_departure_event, flowtable_flush, NULL,
+ EVENTHANDLER_PRI_ANY);
+ V_flowtable_ready = 1;
+}
+SYSINIT(flowtable_init, SI_SUB_KTHREAD_INIT, SI_ORDER_ANY,
+ flowtable_init, NULL);
+
+
+#ifdef VIMAGE
+static void
+flowtable_uninit(const void *unused __unused)
+{
+
+ uma_zdestroy(V_flow_ipv4_zone);
+ uma_zdestroy(V_flow_ipv6_zone);
+}
+
+VNET_SYSUNINIT(flowtable_uninit, SI_SUB_KTHREAD_INIT, SI_ORDER_ANY,
+ flowtable_uninit, NULL);
+#endif
+
+#ifdef DDB
+static bitstr_t *
+flowtable_mask_pcpu(struct flowtable *ft, int cpuid)
+{
+ bitstr_t *mask;
+
+ if (ft->ft_flags & FL_PCPU)
+ mask = ft->ft_masks[cpuid];
+ else
+ mask = ft->ft_masks[0];
+
+ return (mask);
+}
+
+static struct flentry **
+flowtable_entry_pcpu(struct flowtable *ft, uint32_t hash, int cpuid)
+{
+ struct flentry **fle;
+ int index = (hash % ft->ft_size);
+
+ if (ft->ft_flags & FL_PCPU) {
+ fle = &ft->ft_table.pcpu[cpuid][index];
+ } else {
+ fle = &ft->ft_table.global[index];
+ }
+
+ return (fle);
+}
+
+static void
+flow_show(struct flowtable *ft, struct flentry *fle)
+{
+ int idle_time;
+ int rt_valid;
+
+ idle_time = (int)(time_uptime - fle->f_uptime);
+ rt_valid = fle->f_rt != NULL;
+ db_printf("hash=0x%08x idle_time=%03d rt=%p ifp=%p",
+ fle->f_fhash, idle_time,
+ fle->f_rt, rt_valid ? fle->f_rt->rt_ifp : NULL);
+ if (rt_valid && (fle->f_rt->rt_flags & RTF_UP))
+ db_printf(" RTF_UP ");
+ if (fle->f_flags & FL_STALE)
+ db_printf(" FL_STALE ");
+ db_printf("\n");
+}
+
+static void
+flowtable_show(struct flowtable *ft, int cpuid)
+{
+ int curbit = 0;
+ struct flentry *fle, **flehead;
+ bitstr_t *mask, *tmpmask;
+
+ db_printf("cpu: %d\n", cpuid);
+ mask = flowtable_mask_pcpu(ft, cpuid);
+ tmpmask = ft->ft_tmpmask;
+ memcpy(tmpmask, mask, ft->ft_size/8);
+ /*
+ * XXX Note to self, bit_ffs operates at the byte level
+ * and thus adds gratuitous overhead
+ */
+ bit_ffs(tmpmask, ft->ft_size, &curbit);
+ while (curbit != -1) {
+ if (curbit >= ft->ft_size || curbit < -1) {
+ db_printf("warning: bad curbit value %d \n",
+ curbit);
+ break;
+ }
+
+ flehead = flowtable_entry_pcpu(ft, curbit, cpuid);
+ fle = *flehead;
+
+ while (fle != NULL) {
+ flow_show(ft, fle);
+ fle = fle->f_next;
+ continue;
+ }
+ bit_clear(tmpmask, curbit);
+ bit_ffs(tmpmask, ft->ft_size, &curbit);
+ }
+}
+
+static void
+flowtable_show_vnet(void)
+{
+ struct flowtable *ft;
+ int i;
+
+ ft = V_flow_list_head;
+ while (ft != NULL) {
+ if (ft->ft_flags & FL_PCPU) {
+ for (i = 0; i <= mp_maxid; i++) {
+ if (CPU_ABSENT(i))
+ continue;
+ flowtable_show(ft, i);
+ }
+ } else {
+ flowtable_show(ft, 0);
+ }
+ ft = ft->ft_next;
+ }
+}
+
+DB_SHOW_COMMAND(flowtables, db_show_flowtables)
+{
+ VNET_ITERATOR_DECL(vnet_iter);
+
+ VNET_FOREACH(vnet_iter) {
+ CURVNET_SET(vnet_iter);
+ flowtable_show_vnet();
+ CURVNET_RESTORE();
+ }
+}
+#endif
diff --git a/sys/net/flowtable.h b/sys/net/flowtable.h
index 6334d8b..afc8fb7 100644
--- a/sys/net/flowtable.h
+++ b/sys/net/flowtable.h
@@ -49,7 +49,7 @@ struct flowtable *flowtable_alloc(int nentry, int flags);
*
*/
int flowtable_lookup(struct flowtable *ft, struct mbuf *m,
- struct route *ro);
+ struct route *ro, uint32_t fibnum);
#endif /* _KERNEL */
#endif
diff --git a/sys/netinet/ip_output.c b/sys/netinet/ip_output.c
index 33ba838..2ce94b5 100644
--- a/sys/netinet/ip_output.c
+++ b/sys/netinet/ip_output.c
@@ -157,7 +157,7 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
* longer than that long for the stability of ro_rt. The
* flow ID assignment must have happened before this point.
*/
- if (flowtable_lookup(V_ip_ft, m, ro) == 0)
+ if (flowtable_lookup(V_ip_ft, m, ro, M_GETFIB(m)) == 0)
nortfree = 1;
#endif
}
OpenPOWER on IntegriCloud