summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--sys/net/flowtable.c870
-rw-r--r--sys/net/flowtable.h35
-rw-r--r--sys/net/if_llatbl.c4
-rw-r--r--sys/net/if_llatbl.h2
-rw-r--r--sys/netinet/ip_input.c2
-rw-r--r--sys/netinet/ip_output.c22
6 files changed, 763 insertions, 172 deletions
diff --git a/sys/net/flowtable.c b/sys/net/flowtable.c
index b7ec578..2e9f045 100644
--- a/sys/net/flowtable.c
+++ b/sys/net/flowtable.c
@@ -1,6 +1,6 @@
/**************************************************************************
-Copyright (c) 2008-2009, BitGravity Inc.
+Copyright (c) 2008-2010, BitGravity Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -30,6 +30,8 @@ POSSIBILITY OF SUCH DAMAGE.
#include "opt_route.h"
#include "opt_mpath.h"
#include "opt_ddb.h"
+#include "opt_inet.h"
+#include "opt_inet6.h"
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
@@ -45,6 +47,7 @@ __FBSDID("$FreeBSD$");
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/proc.h>
+#include <sys/sbuf.h>
#include <sys/sched.h>
#include <sys/smp.h>
#include <sys/socket.h>
@@ -63,6 +66,9 @@ __FBSDID("$FreeBSD$");
#include <netinet/in_var.h>
#include <netinet/if_ether.h>
#include <netinet/ip.h>
+#ifdef INET6
+#include <netinet/ip6.h>
+#endif
#include <netinet/tcp.h>
#include <netinet/udp.h>
#include <netinet/sctp.h>
@@ -140,31 +146,39 @@ union flentryp {
struct flentry **pcpu[MAXCPU];
};
+struct flowtable_stats {
+ uint64_t ft_collisions;
+ uint64_t ft_allocated;
+ uint64_t ft_misses;
+ uint64_t ft_max_depth;
+ uint64_t ft_free_checks;
+ uint64_t ft_frees;
+ uint64_t ft_hits;
+ uint64_t ft_lookups;
+} __aligned(128);
+
struct flowtable {
+ struct flowtable_stats ft_stats[MAXCPU];
int ft_size;
int ft_lock_count;
uint32_t ft_flags;
- uint32_t ft_collisions;
- uint32_t ft_allocated;
- uint32_t ft_misses;
- uint64_t ft_hits;
uint32_t ft_udp_idle;
uint32_t ft_fin_wait_idle;
uint32_t ft_syn_idle;
uint32_t ft_tcp_idle;
+ char *ft_name;
fl_lock_t *ft_lock;
fl_lock_t *ft_unlock;
fl_rtalloc_t *ft_rtalloc;
struct mtx *ft_locks;
-
union flentryp ft_table;
bitstr_t *ft_masks[MAXCPU];
bitstr_t *ft_tmpmask;
struct flowtable *ft_next;
-};
+} __aligned(128);
static struct proc *flowcleanerproc;
static VNET_DEFINE(struct flowtable *, flow_list_head);
@@ -181,12 +195,24 @@ static struct cv flowclean_cv;
static struct mtx flowclean_lock;
static uint32_t flowclean_cycles;
+#ifdef FLOWTABLE_DEBUG
+#define FLDPRINTF(ft, flags, fmt, ...) \
+do { \
+ if ((ft)->ft_flags & (flags)) \
+ printf((fmt), __VA_ARGS__); \
+} while (0); \
+
+#else
+#define FLDPRINTF(ft, flags, fmt, ...)
+
+#endif
+
+
/*
* TODO:
* - Make flowtable stats per-cpu, aggregated at sysctl call time,
* to avoid extra cache evictions caused by incrementing a shared
* counter
- * - add IPv6 support to flow lookup
* - add sysctls to resize && flush flow tables
* - Add per flowtable sysctls for statistics and configuring timeouts
* - add saturation counter to rtentry to support per-packet load-balancing
@@ -200,13 +226,6 @@ static uint32_t flowclean_cycles;
*/
VNET_DEFINE(int, flowtable_enable) = 1;
static VNET_DEFINE(int, flowtable_debug);
-static VNET_DEFINE(int, flowtable_hits);
-static VNET_DEFINE(int, flowtable_lookups);
-static VNET_DEFINE(int, flowtable_misses);
-static VNET_DEFINE(int, flowtable_frees);
-static VNET_DEFINE(int, flowtable_free_checks);
-static VNET_DEFINE(int, flowtable_max_depth);
-static VNET_DEFINE(int, flowtable_collisions);
static VNET_DEFINE(int, flowtable_syn_expire) = SYN_IDLE;
static VNET_DEFINE(int, flowtable_udp_expire) = UDP_IDLE;
static VNET_DEFINE(int, flowtable_fin_wait_expire) = FIN_WAIT_IDLE;
@@ -216,13 +235,6 @@ static VNET_DEFINE(int, flowtable_ready) = 0;
#define V_flowtable_enable VNET(flowtable_enable)
#define V_flowtable_debug VNET(flowtable_debug)
-#define V_flowtable_hits VNET(flowtable_hits)
-#define V_flowtable_lookups VNET(flowtable_lookups)
-#define V_flowtable_misses VNET(flowtable_misses)
-#define V_flowtable_frees VNET(flowtable_frees)
-#define V_flowtable_free_checks VNET(flowtable_free_checks)
-#define V_flowtable_max_depth VNET(flowtable_max_depth)
-#define V_flowtable_collisions VNET(flowtable_collisions)
#define V_flowtable_syn_expire VNET(flowtable_syn_expire)
#define V_flowtable_udp_expire VNET(flowtable_udp_expire)
#define V_flowtable_fin_wait_expire VNET(flowtable_fin_wait_expire)
@@ -235,20 +247,6 @@ SYSCTL_VNET_INT(_net_inet_flowtable, OID_AUTO, debug, CTLFLAG_RW,
&VNET_NAME(flowtable_debug), 0, "print debug info.");
SYSCTL_VNET_INT(_net_inet_flowtable, OID_AUTO, enable, CTLFLAG_RW,
&VNET_NAME(flowtable_enable), 0, "enable flowtable caching.");
-SYSCTL_VNET_INT(_net_inet_flowtable, OID_AUTO, hits, CTLFLAG_RD,
- &VNET_NAME(flowtable_hits), 0, "# flowtable hits.");
-SYSCTL_VNET_INT(_net_inet_flowtable, OID_AUTO, lookups, CTLFLAG_RD,
- &VNET_NAME(flowtable_lookups), 0, "# flowtable lookups.");
-SYSCTL_VNET_INT(_net_inet_flowtable, OID_AUTO, misses, CTLFLAG_RD,
- &VNET_NAME(flowtable_misses), 0, "#flowtable misses.");
-SYSCTL_VNET_INT(_net_inet_flowtable, OID_AUTO, frees, CTLFLAG_RD,
- &VNET_NAME(flowtable_frees), 0, "#flows freed.");
-SYSCTL_VNET_INT(_net_inet_flowtable, OID_AUTO, free_checks, CTLFLAG_RD,
- &VNET_NAME(flowtable_free_checks), 0, "#flows free checks.");
-SYSCTL_VNET_INT(_net_inet_flowtable, OID_AUTO, max_depth, CTLFLAG_RD,
- &VNET_NAME(flowtable_max_depth), 0, "max collision list length.");
-SYSCTL_VNET_INT(_net_inet_flowtable, OID_AUTO, collisions, CTLFLAG_RD,
- &VNET_NAME(flowtable_collisions), 0, "#flowtable collisions.");
/*
* XXX This does not end up updating timeouts at runtime
@@ -298,6 +296,77 @@ SYSCTL_VNET_PROC(_net_inet_flowtable, OID_AUTO, nmbflows,
CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_nmbflows, "IU",
"Maximum number of flows allowed");
+
+
+#define FS_PRINT(sb, field) sbuf_printf((sb), "\t%s=%jd", #field, fs->ft_##field)
+
+static void
+fs_print(struct flowtable_stats *fs)
+{
+ struct sbuf *sb;
+
+ sb = sbuf_new(NULL, NULL, 32*1024, SBUF_FIXEDLEN);
+
+ FS_PRINT(sb, collisions);
+ FS_PRINT(sb, allocated);
+ FS_PRINT(sb, misses);
+ FS_PRINT(sb, max_depth);
+ FS_PRINT(sb, free_checks);
+ FS_PRINT(sb, frees);
+ FS_PRINT(sb, hits);
+ FS_PRINT(sb, lookups);
+ sbuf_finish(sb);
+
+}
+
+static void
+flowtable_show_stats(struct flowtable *ft)
+{
+ int i;
+ struct flowtable_stats fs, *pfs;
+
+ if (ft->ft_flags & FL_PCPU) {
+ bzero(&fs, sizeof(fs));
+ pfs = &fs;
+ for (i = 0; i <= mp_maxid; i++) {
+ if (CPU_ABSENT(i))
+ continue;
+ pfs->ft_collisions += ft->ft_stats[i].ft_collisions;
+ pfs->ft_allocated += ft->ft_stats[i].ft_allocated;
+ pfs->ft_misses += ft->ft_stats[i].ft_misses;
+ pfs->ft_free_checks += ft->ft_stats[i].ft_free_checks;
+ pfs->ft_frees += ft->ft_stats[i].ft_frees;
+ pfs->ft_hits += ft->ft_stats[i].ft_hits;
+ pfs->ft_lookups += ft->ft_stats[i].ft_lookups;
+ if (ft->ft_stats[i].ft_max_depth > pfs->ft_max_depth)
+ pfs->ft_max_depth = ft->ft_stats[i].ft_max_depth;
+ }
+ } else {
+ pfs = &ft->ft_stats[0];
+ }
+
+ fs_print(pfs);
+}
+
+static int
+sysctl_flowtable_stats(SYSCTL_HANDLER_ARGS)
+{
+ struct flowtable *ft;
+
+ ft = V_flow_list_head;
+ while (ft != NULL) {
+ printf("name: %s\n", ft->ft_name);
+ flowtable_show_stats(ft);
+ ft = ft->ft_next;
+ }
+
+ return (0);
+}
+SYSCTL_VNET_PROC(_net_inet_flowtable, OID_AUTO, stats,
+ CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_flowtable_stats, "IU",
+ "flowtable statistics");
+
+
#ifndef RADIX_MPATH
static void
in_rtalloc_ign_wrapper(struct route *ro, uint32_t hash, u_int fibnum)
@@ -342,52 +411,122 @@ flowtable_pcpu_unlock(struct flowtable *table, uint32_t hash)
#define FL_ENTRY_LOCK(table, hash) (table)->ft_lock((table), (hash))
#define FL_ENTRY_UNLOCK(table, hash) (table)->ft_unlock((table), (hash))
-#define FL_STALE (1<<8)
-#define FL_IPV6 (1<<9)
+#define FL_STALE (1<<8)
+#define FL_IPV6 (1<<9)
+#define FL_OVERWRITE (1<<10)
-static uint32_t
-ipv4_flow_lookup_hash_internal(struct mbuf *m, struct route *ro,
- uint32_t *key, uint16_t *flags, uint8_t *protop)
+void
+flow_invalidate(struct flentry *fle)
{
- uint16_t sport = 0, dport = 0;
- struct ip *ip = NULL;
- uint8_t proto = 0;
+
+ fle->f_flags |= FL_STALE;
+}
+
+static __inline int
+proto_to_flags(uint8_t proto)
+{
+ int flag;
+
+ switch (proto) {
+ case IPPROTO_TCP:
+ flag = FL_TCP;
+ break;
+ case IPPROTO_SCTP:
+ flag = FL_SCTP;
+ break;
+ case IPPROTO_UDP:
+ flag = FL_UDP;
+ break;
+ default:
+ flag = 0;
+ break;
+ }
+
+ return (flag);
+}
+
+static __inline int
+flags_to_proto(int flags)
+{
+ int proto, protoflags;
+
+ protoflags = flags & (FL_TCP|FL_SCTP|FL_UDP);
+ switch (protoflags) {
+ case FL_TCP:
+ proto = IPPROTO_TCP;
+ break;
+ case FL_SCTP:
+ proto = IPPROTO_SCTP;
+ break;
+ case FL_UDP:
+ proto = IPPROTO_UDP;
+ break;
+ default:
+ proto = 0;
+ break;
+ }
+ return (proto);
+}
+
+#ifdef INET
+#ifdef FLOWTABLE_DEBUG
+static void
+ipv4_flow_print_tuple(int flags, int proto, struct sockaddr_in *ssin,
+ struct sockaddr_in *dsin)
+{
+ char saddr[4*sizeof "123"], daddr[4*sizeof "123"];
+
+ if (flags & FL_HASH_ALL) {
+ inet_ntoa_r(ssin->sin_addr, saddr);
+ inet_ntoa_r(dsin->sin_addr, daddr);
+ printf("proto=%d %s:%d->%s:%d\n",
+ proto, saddr, ntohs(ssin->sin_port), daddr,
+ ntohs(dsin->sin_port));
+ } else {
+ inet_ntoa_r(*(struct in_addr *) &dsin->sin_addr, daddr);
+ printf("proto=%d %s\n", proto, daddr);
+ }
+
+}
+#endif
+
+static int
+ipv4_mbuf_demarshal(struct flowtable *ft, struct mbuf *m,
+ struct sockaddr_in *ssin, struct sockaddr_in *dsin, uint16_t *flags)
+{
+ struct ip *ip;
+ uint8_t proto;
int iphlen;
- uint32_t hash;
- struct sockaddr_in *sin;
struct tcphdr *th;
struct udphdr *uh;
struct sctphdr *sh;
+ uint16_t sport, dport;
- if ((V_flowtable_enable == 0) || (V_flowtable_ready == 0))
- return (0);
-
- key[1] = key[0] = 0;
- sin = (struct sockaddr_in *)&ro->ro_dst;
- if (m != NULL) {
- ip = mtod(m, struct ip *);
- sin->sin_family = AF_INET;
- sin->sin_len = sizeof(*sin);
- sin->sin_addr = ip->ip_dst;
- } else
- *flags &= ~FL_HASH_PORTS;
-
- key[2] = sin->sin_addr.s_addr;
+ proto = sport = dport = 0;
+ ip = mtod(m, struct ip *);
+ dsin->sin_family = AF_INET;
+ dsin->sin_len = sizeof(*dsin);
+ dsin->sin_addr = ip->ip_dst;
+ ssin->sin_family = AF_INET;
+ ssin->sin_len = sizeof(*ssin);
+ ssin->sin_addr = ip->ip_src;
- if ((*flags & FL_HASH_PORTS) == 0)
+ proto = ip->ip_p;
+ if ((*flags & FL_HASH_ALL) == 0) {
+ FLDPRINTF(ft, FL_DEBUG_ALL, "skip port check flags=0x%x ",
+ *flags);
goto skipports;
+ }
- proto = ip->ip_p;
iphlen = ip->ip_hl << 2; /* XXX options? */
- key[1] = ip->ip_src.s_addr;
-
+
switch (proto) {
case IPPROTO_TCP:
th = (struct tcphdr *)((caddr_t)ip + iphlen);
- sport = ntohs(th->th_sport);
- dport = ntohs(th->th_dport);
- *flags |= th->th_flags;
- if (*flags & TH_RST)
+ sport = th->th_sport;
+ dport = th->th_dport;
+ if ((*flags & FL_HASH_ALL) &&
+ (th->th_flags & (TH_RST|TH_FIN)))
*flags |= FL_STALE;
break;
case IPPROTO_UDP:
@@ -401,38 +540,288 @@ ipv4_flow_lookup_hash_internal(struct mbuf *m, struct route *ro,
dport = sh->dest_port;
break;
default:
- if (*flags & FL_HASH_PORTS)
- goto noop;
+ FLDPRINTF(ft, FL_DEBUG_ALL, "proto=0x%x not supported\n", proto);
+ return (ENOTSUP);
/* no port - hence not a protocol we care about */
break;
}
- *protop = proto;
- /*
- * If this is a transmit route cache then
- * hash all flows to a given destination to
- * the same bucket
- */
- if ((*flags & FL_HASH_PORTS) == 0)
- proto = sport = dport = 0;
+skipports:
+ *flags |= proto_to_flags(proto);
+ ssin->sin_port = sport;
+ dsin->sin_port = dport;
+ return (0);
+}
- ((uint16_t *)key)[0] = sport;
- ((uint16_t *)key)[1] = dport;
+static uint32_t
+ipv4_flow_lookup_hash_internal(
+ struct sockaddr_in *ssin, struct sockaddr_in *dsin,
+ uint32_t *key, uint16_t flags)
+{
+ uint16_t sport, dport;
+ uint8_t proto;
+ int offset = 0;
-skipports:
- hash = jenkins_hashword(key, 3, V_flow_hashjitter + proto);
- if (m != NULL && (m->m_flags & M_FLOWID) == 0) {
- m->m_flags |= M_FLOWID;
- m->m_pkthdr.flowid = hash;
+ if ((V_flowtable_enable == 0) || (V_flowtable_ready == 0))
+ return (0);
+ proto = flags_to_proto(flags);
+ sport = dport = key[2] = key[1] = key[0] = 0;
+ if ((ssin != NULL) && (flags & FL_HASH_ALL)) {
+ key[1] = ssin->sin_addr.s_addr;
+ sport = ssin->sin_port;
+ }
+ if (dsin != NULL) {
+ key[2] = dsin->sin_addr.s_addr;
+ dport = dsin->sin_port;
+ }
+ if (flags & FL_HASH_ALL) {
+ ((uint16_t *)key)[0] = sport;
+ ((uint16_t *)key)[1] = dport;
+ } else
+ offset = V_flow_hashjitter + proto;
+
+ return (jenkins_hashword(key, 3, offset));
+}
+
+static struct flentry *
+flowtable_lookup_mbuf4(struct flowtable *ft, struct mbuf *m)
+{
+ struct sockaddr_storage ssa, dsa;
+ uint16_t flags;
+ struct sockaddr_in *dsin, *ssin;
+
+ dsin = (struct sockaddr_in *)&dsa;
+ ssin = (struct sockaddr_in *)&ssa;
+ flags = ft->ft_flags;
+ if (ipv4_mbuf_demarshal(ft, m, ssin, dsin, &flags) != 0)
+ return (NULL);
+
+ return (flowtable_lookup(ft, &ssa, &dsa, M_GETFIB(m), flags));
+}
+
+void
+flow_to_route(struct flentry *fle, struct route *ro)
+{
+ uint32_t *hashkey = NULL;
+ struct sockaddr_in *sin;
+
+ sin = (struct sockaddr_in *)&ro->ro_dst;
+ sin->sin_family = AF_INET;
+ sin->sin_len = sizeof(*sin);
+ hashkey = ((struct flentry_v4 *)fle)->fl_flow.ipf_key;
+ sin->sin_addr.s_addr = hashkey[2];
+ ro->ro_rt = __DEVOLATILE(struct rtentry *, fle->f_rt);
+ ro->ro_lle = __DEVOLATILE(struct llentry *, fle->f_lle);
+}
+#endif /* INET */
+
+#ifdef INET6
+/*
+ * PULLUP_TO(len, p, T) makes sure that len + sizeof(T) is contiguous,
+ * then it sets p to point at the offset "len" in the mbuf. WARNING: the
+ * pointer might become stale after other pullups (but we never use it
+ * this way).
+ */
+#define PULLUP_TO(_len, p, T) \
+do { \
+ int x = (_len) + sizeof(T); \
+ if ((m)->m_len < x) { \
+ goto receive_failed; \
+ } \
+ p = (mtod(m, char *) + (_len)); \
+} while (0)
+
+#define TCP(p) ((struct tcphdr *)(p))
+#define SCTP(p) ((struct sctphdr *)(p))
+#define UDP(p) ((struct udphdr *)(p))
+
+static int
+ipv6_mbuf_demarshal(struct flowtable *ft, struct mbuf *m,
+ struct sockaddr_in6 *ssin6, struct sockaddr_in6 *dsin6, uint16_t *flags)
+{
+ struct ip6_hdr *ip6;
+ uint8_t proto;
+ int hlen;
+ uint16_t src_port, dst_port;
+ u_short offset;
+ void *ulp;
+
+ offset = hlen = src_port = dst_port = 0;
+ ulp = NULL;
+ ip6 = mtod(m, struct ip6_hdr *);
+ hlen = sizeof(struct ip6_hdr);
+ proto = ip6->ip6_nxt;
+
+ if ((*flags & FL_HASH_ALL) == 0)
+ goto skipports;
+
+ while (ulp == NULL) {
+ switch (proto) {
+ case IPPROTO_ICMPV6:
+ case IPPROTO_OSPFIGP:
+ case IPPROTO_PIM:
+ case IPPROTO_CARP:
+ case IPPROTO_ESP:
+ case IPPROTO_NONE:
+ ulp = ip6;
+ break;
+ case IPPROTO_TCP:
+ PULLUP_TO(hlen, ulp, struct tcphdr);
+ dst_port = TCP(ulp)->th_dport;
+ src_port = TCP(ulp)->th_sport;
+ if ((*flags & FL_HASH_ALL) &&
+ (TCP(ulp)->th_flags & (TH_RST|TH_FIN)))
+ *flags |= FL_STALE;
+ break;
+ case IPPROTO_SCTP:
+ PULLUP_TO(hlen, ulp, struct sctphdr);
+ src_port = SCTP(ulp)->src_port;
+ dst_port = SCTP(ulp)->dest_port;
+ break;
+ case IPPROTO_UDP:
+ PULLUP_TO(hlen, ulp, struct udphdr);
+ dst_port = UDP(ulp)->uh_dport;
+ src_port = UDP(ulp)->uh_sport;
+ break;
+ case IPPROTO_HOPOPTS: /* RFC 2460 */
+ PULLUP_TO(hlen, ulp, struct ip6_hbh);
+ hlen += (((struct ip6_hbh *)ulp)->ip6h_len + 1) << 3;
+ proto = ((struct ip6_hbh *)ulp)->ip6h_nxt;
+ ulp = NULL;
+ break;
+ case IPPROTO_ROUTING: /* RFC 2460 */
+ PULLUP_TO(hlen, ulp, struct ip6_rthdr);
+ hlen += (((struct ip6_rthdr *)ulp)->ip6r_len + 1) << 3;
+ proto = ((struct ip6_rthdr *)ulp)->ip6r_nxt;
+ ulp = NULL;
+ break;
+ case IPPROTO_FRAGMENT: /* RFC 2460 */
+ PULLUP_TO(hlen, ulp, struct ip6_frag);
+ hlen += sizeof (struct ip6_frag);
+ proto = ((struct ip6_frag *)ulp)->ip6f_nxt;
+ offset = ((struct ip6_frag *)ulp)->ip6f_offlg &
+ IP6F_OFF_MASK;
+ ulp = NULL;
+ break;
+ case IPPROTO_DSTOPTS: /* RFC 2460 */
+ PULLUP_TO(hlen, ulp, struct ip6_hbh);
+ hlen += (((struct ip6_hbh *)ulp)->ip6h_len + 1) << 3;
+ proto = ((struct ip6_hbh *)ulp)->ip6h_nxt;
+ ulp = NULL;
+ break;
+ case IPPROTO_AH: /* RFC 2402 */
+ PULLUP_TO(hlen, ulp, struct ip6_ext);
+ hlen += (((struct ip6_ext *)ulp)->ip6e_len + 2) << 2;
+ proto = ((struct ip6_ext *)ulp)->ip6e_nxt;
+ ulp = NULL;
+ break;
+ default:
+ PULLUP_TO(hlen, ulp, struct ip6_ext);
+ break;
+ }
+ }
+
+ if (src_port == 0) {
+ receive_failed:
+ return (ENOTSUP);
}
- return (hash);
-noop:
- *protop = proto;
+skipports:
+ dsin6->sin6_family = AF_INET6;
+ dsin6->sin6_len = sizeof(*dsin6);
+ dsin6->sin6_port = dst_port;
+ memcpy(&dsin6->sin6_addr, &ip6->ip6_dst, sizeof(struct in6_addr));
+
+ ssin6->sin6_family = AF_INET6;
+ ssin6->sin6_len = sizeof(*ssin6);
+ ssin6->sin6_port = src_port;
+ memcpy(&ssin6->sin6_addr, &ip6->ip6_src, sizeof(struct in6_addr));
+ *flags |= proto_to_flags(proto);
+
return (0);
}
+#define zero_key(key) \
+do { \
+ key[0] = 0; \
+ key[1] = 0; \
+ key[2] = 0; \
+ key[3] = 0; \
+ key[4] = 0; \
+ key[5] = 0; \
+ key[6] = 0; \
+ key[7] = 0; \
+ key[8] = 0; \
+} while (0)
+
+static uint32_t
+ipv6_flow_lookup_hash_internal(
+ struct sockaddr_in6 *ssin6, struct sockaddr_in6 *dsin6,
+ uint32_t *key, uint16_t flags)
+{
+ uint16_t sport, dport;
+ uint8_t proto;
+ int offset = 0;
+
+ if ((V_flowtable_enable == 0) || (V_flowtable_ready == 0))
+ return (0);
+
+ proto = flags_to_proto(flags);
+ zero_key(key);
+ sport = dport = 0;
+ if (dsin6 != NULL) {
+ memcpy(&key[1], &dsin6->sin6_addr, sizeof(struct in6_addr));
+ dport = dsin6->sin6_port;
+ }
+ if ((ssin6 != NULL) && (flags & FL_HASH_ALL)) {
+ memcpy(&key[5], &ssin6->sin6_addr, sizeof(struct in6_addr));
+ sport = ssin6->sin6_port;
+ }
+ if (flags & FL_HASH_ALL) {
+ ((uint16_t *)key)[0] = sport;
+ ((uint16_t *)key)[1] = dport;
+ } else
+ offset = V_flow_hashjitter + proto;
+
+ return (jenkins_hashword(key, 9, offset));
+}
+
+static struct flentry *
+flowtable_lookup_mbuf6(struct flowtable *ft, struct mbuf *m)
+{
+ struct sockaddr_storage ssa, dsa;
+ struct sockaddr_in6 *dsin6, *ssin6;
+ uint16_t flags;
+
+ dsin6 = (struct sockaddr_in6 *)&dsa;
+ ssin6 = (struct sockaddr_in6 *)&ssa;
+ flags = ft->ft_flags;
+
+ if (ipv6_mbuf_demarshal(ft, m, ssin6, dsin6, &flags) != 0)
+ return (NULL);
+
+ return (flowtable_lookup(ft, &ssa, &dsa, M_GETFIB(m), flags));
+}
+
+void
+flow_to_route_in6(struct flentry *fle, struct route_in6 *ro)
+{
+ uint32_t *hashkey = NULL;
+ struct sockaddr_in6 *sin6;
+
+ sin6 = (struct sockaddr_in6 *)&ro->ro_dst;
+
+ sin6->sin6_family = AF_INET6;
+ sin6->sin6_len = sizeof(*sin6);
+ hashkey = ((struct flentry_v6 *)fle)->fl_flow.ipf_key;
+ memcpy(&sin6->sin6_addr, &hashkey[5], sizeof (struct in6_addr));
+ ro->ro_rt = __DEVOLATILE(struct rtentry *, fle->f_rt);
+ ro->ro_lle = __DEVOLATILE(struct llentry *, fle->f_lle);
+
+}
+#endif /* INET6 */
+
static bitstr_t *
flowtable_mask(struct flowtable *ft)
{
@@ -512,14 +901,30 @@ flowtable_set_hashkey(struct flentry *fle, uint32_t *key)
hashkey[i] = key[i];
}
+
+static uint32_t *
+flowtable_get_hashkey(struct flentry *fle)
+{
+ uint32_t *hashkey;
+
+ if (fle->f_flags & FL_IPV6)
+ hashkey = ((struct flentry_v4 *)fle)->fl_flow.ipf_key;
+ else
+ hashkey = ((struct flentry_v6 *)fle)->fl_flow.ipf_key;
+
+ return (hashkey);
+}
+
static int
flowtable_insert(struct flowtable *ft, uint32_t hash, uint32_t *key,
- uint8_t proto, uint32_t fibnum, struct route *ro, uint16_t flags)
+ uint32_t fibnum, struct route *ro, uint16_t flags)
{
struct flentry *fle, *fletail, *newfle, **flep;
+ struct flowtable_stats *fs = &ft->ft_stats[curcpu];
int depth;
uma_zone_t flezone;
bitstr_t *mask;
+ uint8_t proto;
flezone = (flags & FL_IPV6) ? V_flow_ipv6_zone : V_flow_ipv4_zone;
newfle = uma_zalloc(flezone, M_NOWAIT | M_ZERO);
@@ -527,7 +932,8 @@ flowtable_insert(struct flowtable *ft, uint32_t hash, uint32_t *key,
return (ENOMEM);
newfle->f_flags |= (flags & FL_IPV6);
-
+ proto = flags_to_proto(flags);
+
FL_ENTRY_LOCK(ft, hash);
mask = flowtable_mask(ft);
flep = flowtable_entry(ft, hash);
@@ -540,7 +946,7 @@ flowtable_insert(struct flowtable *ft, uint32_t hash, uint32_t *key,
}
depth = 0;
- V_flowtable_collisions++;
+ fs->ft_collisions++;
/*
* find end of list and make sure that we were not
* preempted by another thread handling this flow
@@ -554,6 +960,9 @@ flowtable_insert(struct flowtable *ft, uint32_t hash, uint32_t *key,
FL_ENTRY_UNLOCK(ft, hash);
uma_zfree((newfle->f_flags & FL_IPV6) ?
V_flow_ipv6_zone : V_flow_ipv4_zone, newfle);
+
+ if (flags & FL_OVERWRITE)
+ goto skip;
return (EEXIST);
}
/*
@@ -566,8 +975,8 @@ flowtable_insert(struct flowtable *ft, uint32_t hash, uint32_t *key,
fle = fle->f_next;
}
- if (depth > V_flowtable_max_depth)
- V_flowtable_max_depth = depth;
+ if (depth > fs->ft_max_depth)
+ fs->ft_max_depth = depth;
fletail->f_next = newfle;
fle = newfle;
skip:
@@ -583,6 +992,35 @@ skip:
return (0);
}
+int
+kern_flowtable_insert(struct flowtable *ft,
+ struct sockaddr_storage *ssa, struct sockaddr_storage *dsa,
+ struct route *ro, uint32_t fibnum, int flags)
+{
+ uint32_t key[9], hash;
+
+ flags = (ft->ft_flags | flags | FL_OVERWRITE);
+ hash = 0;
+
+#ifdef INET
+ if (ssa->ss_family == AF_INET)
+ hash = ipv4_flow_lookup_hash_internal((struct sockaddr_in *)ssa,
+ (struct sockaddr_in *)dsa, key, flags);
+#endif
+#ifdef INET6
+ if (ssa->ss_family == AF_INET6)
+ hash = ipv6_flow_lookup_hash_internal((struct sockaddr_in6 *)ssa,
+ (struct sockaddr_in6 *)dsa, key, flags);
+#endif
+ if (ro->ro_rt == NULL || ro->ro_lle == NULL)
+ return (EINVAL);
+
+ FLDPRINTF(ft, FL_DEBUG,
+ "kern_flowtable_insert: key=%x:%x:%x hash=%x fibnum=%d flags=%x\n",
+ key[0], key[1], key[2], hash, fibnum, flags);
+ return (flowtable_insert(ft, hash, key, fibnum, ro, flags));
+}
+
static int
flowtable_key_equal(struct flentry *fle, uint32_t *key)
{
@@ -596,7 +1034,7 @@ flowtable_key_equal(struct flentry *fle, uint32_t *key)
nwords = 3;
hashkey = ((struct flentry_v6 *)fle)->fl_flow.ipf_key;
}
-
+
for (i = 0; i < nwords; i++)
if (hashkey[i] != key[i])
return (0);
@@ -604,44 +1042,86 @@ flowtable_key_equal(struct flentry *fle, uint32_t *key)
return (1);
}
-int
-flowtable_lookup(struct flowtable *ft, struct mbuf *m, struct route *ro, uint32_t fibnum)
+struct flentry *
+flowtable_lookup_mbuf(struct flowtable *ft, struct mbuf *m, int af)
+{
+ struct flentry *fle = NULL;
+
+#ifdef INET
+ if (af == AF_INET)
+ fle = flowtable_lookup_mbuf4(ft, m);
+#endif
+#ifdef INET6
+ if (af == AF_INET6)
+ fle = flowtable_lookup_mbuf6(ft, m);
+#endif
+ if (fle != NULL && m != NULL && (m->m_flags & M_FLOWID) == 0) {
+ m->m_flags |= M_FLOWID;
+ m->m_pkthdr.flowid = fle->f_fhash;
+ }
+ return (fle);
+}
+
+struct flentry *
+flowtable_lookup(struct flowtable *ft, struct sockaddr_storage *ssa,
+ struct sockaddr_storage *dsa, uint32_t fibnum, int flags)
{
uint32_t key[9], hash;
struct flentry *fle;
- uint16_t flags;
+ struct flowtable_stats *fs = &ft->ft_stats[curcpu];
uint8_t proto = 0;
int error = 0;
struct rtentry *rt;
struct llentry *lle;
-
- flags = ft->ft_flags;
- ro->ro_rt = NULL;
- ro->ro_lle = NULL;
-
- /*
- * The internal hash lookup is the only IPv4 specific bit
- * remaining
- *
- * XXX BZ: to add IPv6 support just add a check for the
- * address type in m and ro and an equivalent ipv6 lookup
- * function - the rest of the code should automatically
- * handle an ipv6 flow (note that m can be NULL in which
- * case ro will be set)
- */
- hash = ipv4_flow_lookup_hash_internal(m, ro, key,
- &flags, &proto);
-
+ struct route sro, *ro;
+ struct route_in6 sro6;
+
+ sro.ro_rt = sro6.ro_rt = NULL;
+ sro.ro_lle = sro6.ro_lle = NULL;
+ ro = NULL;
+ hash = 0;
+ flags |= ft->ft_flags;
+ proto = flags_to_proto(flags);
+#ifdef INET
+ if (ssa->ss_family == AF_INET) {
+ struct sockaddr_in *ssin, *dsin;
+
+ ro = &sro;
+ memcpy(&ro->ro_dst, dsa, sizeof(struct sockaddr_in));
+ dsin = (struct sockaddr_in *)dsa;
+ ssin = (struct sockaddr_in *)ssa;
+ if ((dsin->sin_addr.s_addr == ssin->sin_addr.s_addr) ||
+ (ntohl(dsin->sin_addr.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
+ (ntohl(ssin->sin_addr.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)
+ return (NULL);
+
+ hash = ipv4_flow_lookup_hash_internal(ssin, dsin, key, flags);
+ }
+#endif
+#ifdef INET6
+ if (ssa->ss_family == AF_INET6) {
+ struct sockaddr_in6 *ssin6, *dsin6;
+
+ ro = (struct route *)&sro6;
+ memcpy(&sro6.ro_dst, dsa,
+ sizeof(struct sockaddr_in6));
+ dsin6 = (struct sockaddr_in6 *)dsa;
+ ssin6 = (struct sockaddr_in6 *)ssa;
+
+ flags |= FL_IPV6;
+ hash = ipv6_flow_lookup_hash_internal(ssin6, dsin6, key, flags);
+ }
+#endif
/*
* Ports are zero and this isn't a transmit cache
* - thus not a protocol for which we need to keep
* state
- * FL_HASH_PORTS => key[0] != 0 for TCP || UDP || SCTP
+ * FL_HASH_ALL => key[0] != 0 for TCP || UDP || SCTP
*/
- if (hash == 0 || (key[0] == 0 && (ft->ft_flags & FL_HASH_PORTS)))
- return (ENOENT);
+ if (hash == 0 || (key[0] == 0 && (ft->ft_flags & FL_HASH_ALL)))
+ return (NULL);
- V_flowtable_lookups++;
+ fs->ft_lookups++;
FL_ENTRY_LOCK(ft, hash);
if ((fle = FL_ENTRY(ft, hash)) == NULL) {
FL_ENTRY_UNLOCK(ft, hash);
@@ -657,21 +1137,21 @@ keycheck:
&& (fibnum == fle->f_fibnum)
&& (rt->rt_flags & RTF_UP)
&& (rt->rt_ifp != NULL)) {
- V_flowtable_hits++;
+ fs->ft_hits++;
fle->f_uptime = time_uptime;
fle->f_flags |= flags;
- ro->ro_rt = rt;
- ro->ro_lle = lle;
FL_ENTRY_UNLOCK(ft, hash);
- return (0);
+ return (fle);
} else if (fle->f_next != NULL) {
fle = fle->f_next;
goto keycheck;
}
FL_ENTRY_UNLOCK(ft, hash);
-
uncached:
- V_flowtable_misses++;
+ if (flags & FL_NOAUTO)
+ return (NULL);
+
+ fs->ft_misses++;
/*
* This bit of code ends up locking the
* same route 3 times (just like ip_output + ether_output)
@@ -684,36 +1164,64 @@ uncached:
* receive the route locked
*/
+#ifdef INVARIANTS
+ if ((ro->ro_dst.sa_family != AF_INET) &&
+ (ro->ro_dst.sa_family != AF_INET6))
+ panic("sa_family == %d\n", ro->ro_dst.sa_family);
+#endif
+
ft->ft_rtalloc(ro, hash, fibnum);
if (ro->ro_rt == NULL)
error = ENETUNREACH;
else {
struct llentry *lle = NULL;
- struct sockaddr *l3addr;
+ struct sockaddr_storage *l3addr;
struct rtentry *rt = ro->ro_rt;
struct ifnet *ifp = rt->rt_ifp;
if (ifp->if_flags & (IFF_POINTOPOINT | IFF_LOOPBACK)) {
RTFREE(rt);
ro->ro_rt = NULL;
- return (ENOENT);
+ return (NULL);
}
+#ifdef INET6
+ if (ssa->ss_family == AF_INET6) {
+ struct sockaddr_in6 *dsin6;
+
+ dsin6 = (struct sockaddr_in6 *)dsa;
+ if (in6_localaddr(&dsin6->sin6_addr)) {
+ RTFREE(rt);
+ ro->ro_rt = NULL;
+ return (NULL);
+ }
- if (rt->rt_flags & RTF_GATEWAY)
- l3addr = rt->rt_gateway;
- else
- l3addr = &ro->ro_dst;
- llentry_update(&lle, LLTABLE(ifp), l3addr, ifp);
+ if (rt->rt_flags & RTF_GATEWAY)
+ l3addr = (struct sockaddr_storage *)rt->rt_gateway;
+
+ else
+ l3addr = (struct sockaddr_storage *)&ro->ro_dst;
+ llentry_update(&lle, LLTABLE6(ifp), l3addr, ifp);
+ }
+#endif
+#ifdef INET
+ if (ssa->ss_family == AF_INET) {
+ if (rt->rt_flags & RTF_GATEWAY)
+ l3addr = (struct sockaddr_storage *)rt->rt_gateway;
+ else
+ l3addr = (struct sockaddr_storage *)&ro->ro_dst;
+ llentry_update(&lle, LLTABLE(ifp), l3addr, ifp);
+ }
+
+#endif
ro->ro_lle = lle;
if (lle == NULL) {
RTFREE(rt);
ro->ro_rt = NULL;
- return (ENOENT);
+ return (NULL);
}
- error = flowtable_insert(ft, hash, key, proto, fibnum,
- ro, flags);
-
+ error = flowtable_insert(ft, hash, key, fibnum, ro, flags);
+
if (error) {
RTFREE(rt);
LLE_FREE(lle);
@@ -722,7 +1230,7 @@ uncached:
}
}
- return (error);
+ return ((error) ? NULL : fle);
}
/*
@@ -731,7 +1239,7 @@ uncached:
#define calloc(count, size) malloc((count)*(size), M_DEVBUF, M_WAITOK|M_ZERO)
struct flowtable *
-flowtable_alloc(int nentry, int flags)
+flowtable_alloc(char *name, int nentry, int flags)
{
struct flowtable *ft, *fttail;
int i;
@@ -743,7 +1251,8 @@ flowtable_alloc(int nentry, int flags)
ft = malloc(sizeof(struct flowtable),
M_RTABLE, M_WAITOK | M_ZERO);
-
+
+ ft->ft_name = name;
ft->ft_flags = flags;
ft->ft_size = nentry;
#ifdef RADIX_MPATH
@@ -784,7 +1293,7 @@ flowtable_alloc(int nentry, int flags)
* just a cache - so everything is eligible for
* replacement after 5s of non-use
*/
- if (flags & FL_HASH_PORTS) {
+ if (flags & FL_HASH_ALL) {
ft->ft_udp_idle = V_flowtable_udp_expire;
ft->ft_syn_idle = V_flowtable_syn_expire;
ft->ft_fin_wait_idle = V_flowtable_fin_wait_expire;
@@ -837,7 +1346,8 @@ flowtable_free_stale(struct flowtable *ft, struct rtentry *rt)
struct flentry *fle, **flehead, *fleprev;
struct flentry *flefreehead, *flefreetail, *fletmp;
bitstr_t *mask, *tmpmask;
-
+ struct flowtable_stats *fs = &ft->ft_stats[curcpu];
+
flefreehead = flefreetail = NULL;
mask = flowtable_mask(ft);
tmpmask = ft->ft_tmpmask;
@@ -854,12 +1364,12 @@ flowtable_free_stale(struct flowtable *ft, struct rtentry *rt)
curbit);
break;
}
-
+
FL_ENTRY_LOCK(ft, curbit);
flehead = flowtable_entry(ft, curbit);
fle = fleprev = *flehead;
- V_flowtable_free_checks++;
+ fs->ft_free_checks++;
#ifdef DIAGNOSTIC
if (fle == NULL && curbit > 0) {
log(LOG_ALERT,
@@ -897,7 +1407,7 @@ flowtable_free_stale(struct flowtable *ft, struct rtentry *rt)
fleprev->f_next = fle->f_next;
fle = fleprev->f_next;
}
-
+
if (flefreehead == NULL)
flefreehead = flefreetail = fletmp;
else {
@@ -916,7 +1426,7 @@ flowtable_free_stale(struct flowtable *ft, struct rtentry *rt)
while ((fle = flefreehead) != NULL) {
flefreehead = fle->f_next;
count++;
- V_flowtable_frees++;
+ fs->ft_frees++;
fle_free(fle);
}
if (V_flowtable_debug && count)
@@ -927,6 +1437,7 @@ void
flowtable_route_flush(struct flowtable *ft, struct rtentry *rt)
{
int i;
+
if (ft->ft_flags & FL_PCPU) {
for (i = 0; i <= mp_maxid; i++) {
if (CPU_ABSENT(i))
@@ -1017,7 +1528,7 @@ static void
flowtable_flush(void *unused __unused)
{
uint64_t start;
-
+
mtx_lock(&flowclean_lock);
start = flowclean_cycles;
while (start == flowclean_cycles) {
@@ -1109,17 +1620,64 @@ static void
flow_show(struct flowtable *ft, struct flentry *fle)
{
int idle_time;
- int rt_valid;
+ int rt_valid, ifp_valid;
+ uint16_t sport, dport;
+ uint32_t *hashkey;
+ char saddr[4*sizeof "123"], daddr[4*sizeof "123"];
+ volatile struct rtentry *rt;
+ struct ifnet *ifp = NULL;
idle_time = (int)(time_uptime - fle->f_uptime);
- rt_valid = fle->f_rt != NULL;
- db_printf("hash=0x%08x idle_time=%03d rt=%p ifp=%p",
- fle->f_fhash, idle_time,
- fle->f_rt, rt_valid ? fle->f_rt->rt_ifp : NULL);
- if (rt_valid && (fle->f_rt->rt_flags & RTF_UP))
- db_printf(" RTF_UP ");
+ rt = fle->f_rt;
+ rt_valid = rt != NULL;
+ if (rt_valid)
+ ifp = rt->rt_ifp;
+ ifp_valid = ifp != NULL;
+ hashkey = flowtable_get_hashkey(fle);
+ if (fle->f_flags & FL_IPV6)
+ goto skipaddr;
+
+ inet_ntoa_r(*(struct in_addr *) &hashkey[2], daddr);
+ if (ft->ft_flags & FL_HASH_ALL) {
+ inet_ntoa_r(*(struct in_addr *) &hashkey[1], saddr);
+ sport = ntohs(((uint16_t *)hashkey)[0]);
+ dport = ntohs(((uint16_t *)hashkey)[1]);
+ db_printf("%s:%d->%s:%d",
+ saddr, sport, daddr,
+ dport);
+ } else
+ db_printf("%s ", daddr);
+
+skipaddr:
if (fle->f_flags & FL_STALE)
db_printf(" FL_STALE ");
+ if (fle->f_flags & FL_TCP)
+ db_printf(" FL_TCP ");
+ if (fle->f_flags & FL_UDP)
+ db_printf(" FL_UDP ");
+ if (rt_valid) {
+ if (rt->rt_flags & RTF_UP)
+ db_printf(" RTF_UP ");
+ }
+ if (ifp_valid) {
+ if (ifp->if_flags & IFF_LOOPBACK)
+ db_printf(" IFF_LOOPBACK ");
+ if (ifp->if_flags & IFF_UP)
+ db_printf(" IFF_UP ");
+ if (ifp->if_flags & IFF_POINTOPOINT)
+ db_printf(" IFF_POINTOPOINT ");
+ }
+ if (fle->f_flags & FL_IPV6)
+ db_printf("\n\tkey=%08x:%08x:%08x%08x:%08x:%08x%08x:%08x:%08x",
+ hashkey[0], hashkey[1], hashkey[2],
+ hashkey[3], hashkey[4], hashkey[5],
+ hashkey[6], hashkey[7], hashkey[8]);
+ else
+ db_printf("\n\tkey=%08x:%08x:%08x ",
+ hashkey[0], hashkey[1], hashkey[2]);
+ db_printf("hash=%08x idle_time=%03d"
+ "\n\tfibnum=%02d rt=%p",
+ fle->f_fhash, idle_time, fle->f_fibnum, fle->f_rt);
db_printf("\n");
}
@@ -1130,7 +1688,8 @@ flowtable_show(struct flowtable *ft, int cpuid)
struct flentry *fle, **flehead;
bitstr_t *mask, *tmpmask;
- db_printf("cpu: %d\n", cpuid);
+ if (cpuid != -1)
+ db_printf("cpu: %d\n", cpuid);
mask = flowtable_mask_pcpu(ft, cpuid);
tmpmask = ft->ft_tmpmask;
memcpy(tmpmask, mask, ft->ft_size/8);
@@ -1167,6 +1726,7 @@ flowtable_show_vnet(void)
ft = V_flow_list_head;
while (ft != NULL) {
+ printf("name: %s\n", ft->ft_name);
if (ft->ft_flags & FL_PCPU) {
for (i = 0; i <= mp_maxid; i++) {
if (CPU_ABSENT(i))
@@ -1174,7 +1734,7 @@ flowtable_show_vnet(void)
flowtable_show(ft, i);
}
} else {
- flowtable_show(ft, 0);
+ flowtable_show(ft, -1);
}
ft = ft->ft_next;
}
diff --git a/sys/net/flowtable.h b/sys/net/flowtable.h
index 7d7abdf..6e79a3c 100644
--- a/sys/net/flowtable.h
+++ b/sys/net/flowtable.h
@@ -1,6 +1,6 @@
/**************************************************************************
-Copyright (c) 2008-2009, BitGravity Inc.
+Copyright (c) 2008-2010, BitGravity Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -34,24 +34,49 @@ $FreeBSD$
#ifdef _KERNEL
-#define FL_HASH_PORTS (1<<0) /* hash 4-tuple + protocol */
+#define FL_HASH_ALL (1<<0) /* hash 4-tuple + protocol */
#define FL_PCPU (1<<1) /* pcpu cache */
+#define FL_NOAUTO (1<<2) /* don't automatically add flentry on miss */
+
+#define FL_TCP (1<<11)
+#define FL_SCTP (1<<12)
+#define FL_UDP (1<<13)
+#define FL_DEBUG (1<<14)
+#define FL_DEBUG_ALL (1<<15)
struct flowtable;
+struct flentry;
+struct route;
+struct route_in6;
+
VNET_DECLARE(struct flowtable *, ip_ft);
#define V_ip_ft VNET(ip_ft)
-struct flowtable *flowtable_alloc(int nentry, int flags);
+VNET_DECLARE(struct flowtable *, ip6_ft);
+#define V_ip6_ft VNET(ip6_ft)
+
+struct flowtable *flowtable_alloc(char *name, int nentry, int flags);
/*
* Given a flow table, look up the L3 and L2 information and
* return it in the route.
*
*/
-int flowtable_lookup(struct flowtable *ft, struct mbuf *m,
- struct route *ro, uint32_t fibnum);
+struct flentry *flowtable_lookup_mbuf(struct flowtable *ft, struct mbuf *m, int af);
+
+struct flentry *flowtable_lookup(struct flowtable *ft, struct sockaddr_storage *ssa,
+ struct sockaddr_storage *dsa, uint32_t fibnum, int flags);
+int kern_flowtable_insert(struct flowtable *ft, struct sockaddr_storage *ssa,
+ struct sockaddr_storage *dsa, struct route *ro, uint32_t fibnum, int flags);
+
+void flow_invalidate(struct flentry *fl);
void flowtable_route_flush(struct flowtable *ft, struct rtentry *rt);
+void flow_to_route(struct flentry *fl, struct route *ro);
+
+void flow_to_route_in6(struct flentry *fl, struct route_in6 *ro);
+
+
#endif /* _KERNEL */
#endif
diff --git a/sys/net/if_llatbl.c b/sys/net/if_llatbl.c
index 5992f6d..f52f9ff 100644
--- a/sys/net/if_llatbl.c
+++ b/sys/net/if_llatbl.c
@@ -111,13 +111,13 @@ llentry_free(struct llentry *lle)
/*
* Update an llentry for address dst (equivalent to rtalloc for new-arp)
- * Caller must pass in a valid struct llentry *
+ * Caller must pass in a valid struct llentry * (or NULL)
*
* if found the llentry * is returned referenced and unlocked
*/
int
llentry_update(struct llentry **llep, struct lltable *lt,
- struct sockaddr *dst, struct ifnet *ifp)
+ struct sockaddr_storage *dst, struct ifnet *ifp)
{
struct llentry *la;
diff --git a/sys/net/if_llatbl.h b/sys/net/if_llatbl.h
index 21357eb..debb416 100644
--- a/sys/net/if_llatbl.h
+++ b/sys/net/if_llatbl.h
@@ -191,7 +191,7 @@ int lltable_sysctl_dumparp(int, struct sysctl_req *);
void llentry_free(struct llentry *);
int llentry_update(struct llentry **, struct lltable *,
- struct sockaddr *, struct ifnet *);
+ struct sockaddr_storage *, struct ifnet *);
/*
* Generic link layer address lookup function.
diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c
index 084bac0..d91bcb0 100644
--- a/sys/netinet/ip_input.c
+++ b/sys/netinet/ip_input.c
@@ -329,7 +329,7 @@ ip_init(void)
#ifdef FLOWTABLE
TUNABLE_INT_FETCH("net.inet.ip.output_flowtable_size",
&V_ip_output_flowtable_size);
- V_ip_ft = flowtable_alloc(V_ip_output_flowtable_size, FL_PCPU);
+ V_ip_ft = flowtable_alloc("ipv4", V_ip_output_flowtable_size, FL_PCPU);
#endif
/* Skip initialization of globals for non-default instances. */
diff --git a/sys/netinet/ip_output.c b/sys/netinet/ip_output.c
index e238e41..ca5de12 100644
--- a/sys/netinet/ip_output.c
+++ b/sys/netinet/ip_output.c
@@ -148,14 +148,20 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
bzero(ro, sizeof (*ro));
#ifdef FLOWTABLE
- /*
- * The flow table returns route entries valid for up to 30
- * seconds; we rely on the remainder of ip_output() taking no
- * longer than that long for the stability of ro_rt. The
- * flow ID assignment must have happened before this point.
- */
- if (flowtable_lookup(V_ip_ft, m, ro, M_GETFIB(m)) == 0)
- nortfree = 1;
+ {
+ struct flentry *fle;
+
+ /*
+ * The flow table returns route entries valid for up to 30
+ * seconds; we rely on the remainder of ip_output() taking no
+ * longer than that long for the stability of ro_rt. The
+ * flow ID assignment must have happened before this point.
+ */
+ if ((fle = flowtable_lookup_mbuf(V_ip_ft, m, AF_INET)) != NULL) {
+ flow_to_route(fle, ro);
+ nortfree = 1;
+ }
+ }
#endif
}
OpenPOWER on IntegriCloud