summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorkmacy <kmacy@FreeBSD.org>2010-03-22 23:04:12 +0000
committerkmacy <kmacy@FreeBSD.org>2010-03-22 23:04:12 +0000
commit01cb21605be06f4d949c56cfab80ad09d2132931 (patch)
treee3278a7298633ae642d6b3590dc38d4d5f5643d1
parent122090fb7e8c9dc016b8cb0cb8904d6be5cdb272 (diff)
downloadFreeBSD-src-01cb21605be06f4d949c56cfab80ad09d2132931.zip
FreeBSD-src-01cb21605be06f4d949c56cfab80ad09d2132931.tar.gz
- boot-time size the ipv4 flowtable and the maximum number of flows
- increase flow cleaning frequency and decrease flow caching time when near the flow limit - stop allocating new flows when within 3% of maxflows don't start allocating again until below 12.5% MFC after: 7 days
-rw-r--r--sys/net/flowtable.c106
-rw-r--r--sys/netinet/ip_input.c16
2 files changed, 96 insertions, 26 deletions
diff --git a/sys/net/flowtable.c b/sys/net/flowtable.c
index fe79c80..39b6b40 100644
--- a/sys/net/flowtable.c
+++ b/sys/net/flowtable.c
@@ -155,30 +155,33 @@ struct flowtable_stats {
uint64_t ft_frees;
uint64_t ft_hits;
uint64_t ft_lookups;
-} __aligned(128);
+} __aligned(CACHE_LINE_SIZE);
struct flowtable {
struct flowtable_stats ft_stats[MAXCPU];
int ft_size;
int ft_lock_count;
uint32_t ft_flags;
-
- uint32_t ft_udp_idle;
- uint32_t ft_fin_wait_idle;
- uint32_t ft_syn_idle;
- uint32_t ft_tcp_idle;
-
char *ft_name;
fl_lock_t *ft_lock;
fl_lock_t *ft_unlock;
fl_rtalloc_t *ft_rtalloc;
+ /*
+ * XXX need to pad out
+ */
struct mtx *ft_locks;
-
union flentryp ft_table;
bitstr_t *ft_masks[MAXCPU];
bitstr_t *ft_tmpmask;
struct flowtable *ft_next;
-} __aligned(128);
+
+ uint32_t ft_count __aligned(CACHE_LINE_SIZE);
+ uint32_t ft_udp_idle __aligned(CACHE_LINE_SIZE);
+ uint32_t ft_fin_wait_idle;
+ uint32_t ft_syn_idle;
+ uint32_t ft_tcp_idle;
+ boolean_t ft_full;
+} __aligned(CACHE_LINE_SIZE);
static struct proc *flowcleanerproc;
static VNET_DEFINE(struct flowtable *, flow_list_head);
@@ -191,9 +194,11 @@ static VNET_DEFINE(uma_zone_t, flow_ipv6_zone);
#define V_flow_ipv4_zone VNET(flow_ipv4_zone)
#define V_flow_ipv6_zone VNET(flow_ipv6_zone)
+
static struct cv flowclean_cv;
static struct mtx flowclean_lock;
static uint32_t flowclean_cycles;
+static uint32_t flowclean_freq;
#ifdef FLOWTABLE_DEBUG
#define FLDPRINTF(ft, flags, fmt, ...) \
@@ -230,7 +235,7 @@ static VNET_DEFINE(int, flowtable_syn_expire) = SYN_IDLE;
static VNET_DEFINE(int, flowtable_udp_expire) = UDP_IDLE;
static VNET_DEFINE(int, flowtable_fin_wait_expire) = FIN_WAIT_IDLE;
static VNET_DEFINE(int, flowtable_tcp_expire) = TCP_IDLE;
-static VNET_DEFINE(int, flowtable_nmbflows) = 4096;
+static VNET_DEFINE(int, flowtable_nmbflows);
static VNET_DEFINE(int, flowtable_ready) = 0;
#define V_flowtable_enable VNET(flowtable_enable)
@@ -905,6 +910,61 @@ flowtable_set_hashkey(struct flentry *fle, uint32_t *key)
hashkey[i] = key[i];
}
+static struct flentry *
+flow_alloc(struct flowtable *ft)
+{
+ struct flentry *newfle;
+ uma_zone_t zone;
+
+ newfle = NULL;
+ zone = (ft->ft_flags & FL_IPV6) ? V_flow_ipv6_zone : V_flow_ipv4_zone;
+
+ newfle = uma_zalloc(zone, M_NOWAIT | M_ZERO);
+ if (newfle != NULL)
+ atomic_add_int(&ft->ft_count, 1);
+ return (newfle);
+}
+
+static void
+flow_free(struct flentry *fle, struct flowtable *ft)
+{
+ uma_zone_t zone;
+
+ zone = (ft->ft_flags & FL_IPV6) ? V_flow_ipv6_zone : V_flow_ipv4_zone;
+ atomic_add_int(&ft->ft_count, -1);
+ uma_zfree(zone, fle);
+}
+
+static int
+flow_full(struct flowtable *ft)
+{
+ boolean_t full;
+ uint32_t count;
+
+ full = ft->ft_full;
+ count = ft->ft_count;
+
+ if (full && (count < (V_flowtable_nmbflows - (V_flowtable_nmbflows >> 3))))
+ ft->ft_full = FALSE;
+ else if (!full && (count > (V_flowtable_nmbflows - (V_flowtable_nmbflows >> 5))))
+ ft->ft_full = TRUE;
+
+ if (full && !ft->ft_full) {
+ flowclean_freq = 4*hz;
+ if ((ft->ft_flags & FL_HASH_ALL) == 0)
+ ft->ft_udp_idle = ft->ft_fin_wait_idle =
+ ft->ft_syn_idle = ft->ft_tcp_idle = 5;
+ cv_broadcast(&flowclean_cv);
+ } else if (!full && ft->ft_full) {
+ flowclean_freq = 20*hz;
+ if ((ft->ft_flags & FL_HASH_ALL) == 0)
+ ft->ft_udp_idle = ft->ft_fin_wait_idle =
+ ft->ft_syn_idle = ft->ft_tcp_idle = 30;
+ }
+
+ return (ft->ft_full);
+}
+
static int
flowtable_insert(struct flowtable *ft, uint32_t hash, uint32_t *key,
uint32_t fibnum, struct route *ro, uint16_t flags)
@@ -912,12 +972,10 @@ flowtable_insert(struct flowtable *ft, uint32_t hash, uint32_t *key,
struct flentry *fle, *fletail, *newfle, **flep;
struct flowtable_stats *fs = &ft->ft_stats[curcpu];
int depth;
- uma_zone_t flezone;
bitstr_t *mask;
uint8_t proto;
- flezone = (flags & FL_IPV6) ? V_flow_ipv6_zone : V_flow_ipv4_zone;
- newfle = uma_zalloc(flezone, M_NOWAIT | M_ZERO);
+ newfle = flow_alloc(ft);
if (newfle == NULL)
return (ENOMEM);
@@ -948,9 +1006,8 @@ flowtable_insert(struct flowtable *ft, uint32_t hash, uint32_t *key,
* or we lost a race to insert
*/
FL_ENTRY_UNLOCK(ft, hash);
- uma_zfree((newfle->f_flags & FL_IPV6) ?
- V_flow_ipv6_zone : V_flow_ipv4_zone, newfle);
-
+ flow_free(newfle, ft);
+
if (flags & FL_OVERWRITE)
goto skip;
return (EEXIST);
@@ -1147,7 +1204,7 @@ keycheck:
}
FL_ENTRY_UNLOCK(ft, hash);
uncached:
- if (flags & FL_NOAUTO)
+ if (flags & FL_NOAUTO || flow_full(ft))
return (NULL);
fs->ft_misses++;
@@ -1325,7 +1382,7 @@ flowtable_alloc(char *name, int nentry, int flags)
*
*/
static void
-fle_free(struct flentry *fle)
+fle_free(struct flentry *fle, struct flowtable *ft)
{
struct rtentry *rt;
struct llentry *lle;
@@ -1334,8 +1391,7 @@ fle_free(struct flentry *fle)
lle = __DEVOLATILE(struct llentry *, fle->f_lle);
RTFREE(rt);
LLE_FREE(lle);
- uma_zfree((fle->f_flags & FL_IPV6) ?
- V_flow_ipv6_zone : V_flow_ipv4_zone, fle);
+ flow_free(fle, ft);
}
static void
@@ -1426,7 +1482,7 @@ flowtable_free_stale(struct flowtable *ft, struct rtentry *rt)
flefreehead = fle->f_next;
count++;
fs->ft_frees++;
- fle_free(fle);
+ fle_free(fle, ft);
}
if (V_flowtable_debug && count)
log(LOG_DEBUG, "freed %d flow entries\n", count);
@@ -1518,7 +1574,7 @@ flowtable_cleaner(void)
*/
mtx_lock(&flowclean_lock);
cv_broadcast(&flowclean_cv);
- cv_timedwait(&flowclean_cv, &flowclean_lock, 10*hz);
+ cv_timedwait(&flowclean_cv, &flowclean_lock, flowclean_freq);
mtx_unlock(&flowclean_lock);
}
}
@@ -1548,6 +1604,7 @@ static void
flowtable_init_vnet(const void *unused __unused)
{
+ V_flowtable_nmbflows = 1024 + maxusers * 64 * mp_ncpus;
V_flow_ipv4_zone = uma_zcreate("ip4flow", sizeof(struct flentry_v4),
NULL, NULL, NULL, NULL, 64, UMA_ZONE_MAXBUCKET);
V_flow_ipv6_zone = uma_zcreate("ip6flow", sizeof(struct flentry_v6),
@@ -1556,7 +1613,7 @@ flowtable_init_vnet(const void *unused __unused)
uma_zone_set_max(V_flow_ipv6_zone, V_flowtable_nmbflows);
V_flowtable_ready = 1;
}
-VNET_SYSINIT(flowtable_init_vnet, SI_SUB_KTHREAD_INIT, SI_ORDER_MIDDLE,
+VNET_SYSINIT(flowtable_init_vnet, SI_SUB_SMP, SI_ORDER_ANY,
flowtable_init_vnet, NULL);
static void
@@ -1567,8 +1624,9 @@ flowtable_init(const void *unused __unused)
mtx_init(&flowclean_lock, "flowclean lock", NULL, MTX_DEF);
EVENTHANDLER_REGISTER(ifnet_departure_event, flowtable_flush, NULL,
EVENTHANDLER_PRI_ANY);
+ flowclean_freq = 20*hz;
}
-SYSINIT(flowtable_init, SI_SUB_KTHREAD_INIT, SI_ORDER_ANY,
+SYSINIT(flowtable_init, SI_SUB_SMP, SI_ORDER_MIDDLE,
flowtable_init, NULL);
diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c
index d91bcb0..8be51fb 100644
--- a/sys/netinet/ip_input.c
+++ b/sys/netinet/ip_input.c
@@ -327,8 +327,20 @@ ip_init(void)
"error %d\n", __func__, i);
#ifdef FLOWTABLE
- TUNABLE_INT_FETCH("net.inet.ip.output_flowtable_size",
- &V_ip_output_flowtable_size);
+ if (TUNABLE_INT_FETCH("net.inet.ip.output_flowtable_size",
+ &V_ip_output_flowtable_size)) {
+ if (V_ip_output_flowtable_size < 256)
+ V_ip_output_flowtable_size = 256;
+ if (!powerof2(V_ip_output_flowtable_size)) {
+ printf("flowtable must be power of 2 size\n");
+ V_ip_output_flowtable_size = 2048;
+ }
+ } else {
+ /*
+ * round up to the next power of 2
+ */
+ V_ip_output_flowtable_size = 1 << fls((1024 + maxusers * 64)-1);
+ }
V_ip_ft = flowtable_alloc("ipv4", V_ip_output_flowtable_size, FL_PCPU);
#endif
OpenPOWER on IntegriCloud