summaryrefslogtreecommitdiffstats
path: root/sys/netinet
diff options
context:
space:
mode:
authoradrian <adrian@FreeBSD.org>2014-09-09 01:45:39 +0000
committeradrian <adrian@FreeBSD.org>2014-09-09 01:45:39 +0000
commite623d51cd5c4ea0255d03a6a082071e1ae700947 (patch)
tree7a08d9539d5dc0c08bba7869418c535b30644538 /sys/netinet
parent4f769d2ecf996b927b7da791e339653a8036f4af (diff)
downloadFreeBSD-src-e623d51cd5c4ea0255d03a6a082071e1ae700947.zip
FreeBSD-src-e623d51cd5c4ea0255d03a6a082071e1ae700947.tar.gz
Add support for receiving and setting flowtype, flowid and RSS bucket
information as part of recvmsg(). This is primarily used for debugging/verification of the various processing paths in the IP, PCB and driver layers. Unfortunately the current implementation of the control message path results in a ~10% or so drop in UDP frame throughput when it's used. Differential Revision: https://reviews.freebsd.org/D527 Reviewed by: grehan
Diffstat (limited to 'sys/netinet')
-rw-r--r--sys/netinet/in.h2
-rw-r--r--sys/netinet/in_pcb.h2
-rw-r--r--sys/netinet/ip_input.c37
-rw-r--r--sys/netinet/ip_output.c18
-rw-r--r--sys/netinet/udp_usrreq.c46
5 files changed, 105 insertions, 0 deletions
diff --git a/sys/netinet/in.h b/sys/netinet/in.h
index 254401f..4776278 100644
--- a/sys/netinet/in.h
+++ b/sys/netinet/in.h
@@ -492,6 +492,8 @@ __END_DECLS
#define IP_FLOWID 90 /* get flow id for the given socket/inp */
#define IP_FLOWTYPE 91 /* get flow type (M_HASHTYPE) */
#define IP_RSSBUCKETID 92 /* get RSS flowid -> bucket mapping */
+#define IP_RECVFLOWID 93 /* bool; receive IP flowid/flowtype w/ datagram */
+#define IP_RECVRSSBUCKETID 94 /* bool; receive IP RSS bucket id w/ datagram */
/*
* Defaults and limits for options
diff --git a/sys/netinet/in_pcb.h b/sys/netinet/in_pcb.h
index 8c73f2d..6207ddd 100644
--- a/sys/netinet/in_pcb.h
+++ b/sys/netinet/in_pcb.h
@@ -549,6 +549,8 @@ short inp_so_options(const struct inpcb *inp);
#define INP_REUSEADDR 0x00000020 /* SO_REUSEADDR option is set */
#define INP_BINDMULTI 0x00000040 /* IP_BINDMULTI option is set */
#define INP_RSS_BUCKET_SET 0x00000080 /* IP_RSS_LISTEN_BUCKET is set */
+#define INP_RECVFLOWID 0x00000100 /* populate recv datagram with flow info */
+#define INP_RECVRSSBUCKETID 0x00000200 /* populate recv datagram with bucket id */
/*
* Flags passed to in_pcblookup*() functions.
diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c
index d622e01..c16017d 100644
--- a/sys/netinet/ip_input.c
+++ b/sys/netinet/ip_input.c
@@ -1662,6 +1662,43 @@ makedummy:
if (*mp)
mp = &(*mp)->m_next;
}
+
+ if (inp->inp_flags2 & INP_RECVFLOWID) {
+ uint32_t flowid, flow_type;
+
+ flowid = m->m_pkthdr.flowid;
+ flow_type = M_HASHTYPE_GET(m);
+
+ /*
+ * XXX should handle the failure of one or the
+ * other - don't populate both?
+ */
+ *mp = sbcreatecontrol((caddr_t) &flowid,
+ sizeof(uint32_t), IP_FLOWID, IPPROTO_IP);
+ if (*mp)
+ mp = &(*mp)->m_next;
+ *mp = sbcreatecontrol((caddr_t) &flow_type,
+ sizeof(uint32_t), IP_FLOWTYPE, IPPROTO_IP);
+ if (*mp)
+ mp = &(*mp)->m_next;
+ }
+
+#ifdef RSS
+ if (inp->inp_flags2 & INP_RECVRSSBUCKETID) {
+ uint32_t flowid, flow_type;
+ uint32_t rss_bucketid;
+
+ flowid = m->m_pkthdr.flowid;
+ flow_type = M_HASHTYPE_GET(m);
+
+ if (rss_hash2bucket(flowid, flow_type, &rss_bucketid) == 0) {
+ *mp = sbcreatecontrol((caddr_t) &rss_bucketid,
+ sizeof(uint32_t), IP_RSSBUCKETID, IPPROTO_IP);
+ if (*mp)
+ mp = &(*mp)->m_next;
+ }
+ }
+#endif
}
/*
diff --git a/sys/netinet/ip_output.c b/sys/netinet/ip_output.c
index 99d3886..b5edea0 100644
--- a/sys/netinet/ip_output.c
+++ b/sys/netinet/ip_output.c
@@ -1016,6 +1016,10 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt)
case IP_ONESBCAST:
case IP_DONTFRAG:
case IP_RECVTOS:
+ case IP_RECVFLOWID:
+#ifdef RSS
+ case IP_RECVRSSBUCKETID:
+#endif
error = sooptcopyin(sopt, &optval, sizeof optval,
sizeof optval);
if (error)
@@ -1094,6 +1098,9 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt)
case IP_BINDMULTI:
OPTSET2(INP_BINDMULTI, optval);
break;
+ case IP_RECVFLOWID:
+ OPTSET2(INP_RECVFLOWID, optval);
+ break;
#ifdef RSS
case IP_RSS_LISTEN_BUCKET:
if ((optval >= 0) &&
@@ -1104,6 +1111,9 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt)
error = EINVAL;
}
break;
+ case IP_RECVRSSBUCKETID:
+ OPTSET2(INP_RECVRSSBUCKETID, optval);
+ break;
#endif
}
break;
@@ -1219,8 +1229,10 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt)
case IP_BINDMULTI:
case IP_FLOWID:
case IP_FLOWTYPE:
+ case IP_RECVFLOWID:
#ifdef RSS
case IP_RSSBUCKETID:
+ case IP_RECVRSSBUCKETID:
#endif
switch (sopt->sopt_name) {
@@ -1290,6 +1302,9 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt)
case IP_FLOWTYPE:
optval = inp->inp_flowtype;
break;
+ case IP_RECVFLOWID:
+ optval = OPTBIT2(INP_RECVFLOWID);
+ break;
#ifdef RSS
case IP_RSSBUCKETID:
retval = rss_hash2bucket(inp->inp_flowid,
@@ -1300,6 +1315,9 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt)
else
error = EINVAL;
break;
+ case IP_RECVRSSBUCKETID:
+ optval = OPTBIT2(INP_RECVRSSBUCKETID);
+ break;
#endif
case IP_BINDMULTI:
optval = OPTBIT2(INP_BINDMULTI);
diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c
index 5860c57..ecb7aec 100644
--- a/sys/netinet/udp_usrreq.c
+++ b/sys/netinet/udp_usrreq.c
@@ -43,6 +43,7 @@ __FBSDID("$FreeBSD$");
#include "opt_inet.h"
#include "opt_inet6.h"
#include "opt_ipsec.h"
+#include "opt_rss.h"
#include <sys/param.h>
#include <sys/domain.h>
@@ -1084,6 +1085,9 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
u_char tos;
uint8_t pr;
uint16_t cscov = 0;
+ uint32_t flowid = 0;
+ int flowid_type = 0;
+ int use_flowid = 0;
/*
* udp_output() may need to temporarily bind or connect the current
@@ -1147,6 +1151,32 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
tos = *(u_char *)CMSG_DATA(cm);
break;
+ case IP_FLOWID:
+ if (cm->cmsg_len != CMSG_LEN(sizeof(uint32_t))) {
+ error = EINVAL;
+ break;
+ }
+ flowid = *(uint32_t *) CMSG_DATA(cm);
+ break;
+
+ case IP_FLOWTYPE:
+ if (cm->cmsg_len != CMSG_LEN(sizeof(uint32_t))) {
+ error = EINVAL;
+ break;
+ }
+ flowid_type = *(uint32_t *) CMSG_DATA(cm);
+ use_flowid = 1;
+ break;
+
+#ifdef RSS
+ case IP_RSSBUCKETID:
+ if (cm->cmsg_len != CMSG_LEN(sizeof(uint32_t))) {
+ error = EINVAL;
+ break;
+ }
+ /* This is just a placeholder for now */
+ break;
+#endif /* RSS */
default:
error = ENOPROTOOPT;
break;
@@ -1395,6 +1425,22 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
((struct ip *)ui)->ip_tos = tos; /* XXX */
UDPSTAT_INC(udps_opackets);
+ /*
+ * Setup flowid / RSS information for outbound socket.
+ *
+ * Once the UDP code decides to set a flowid some other way,
+ * this allows the flowid to be overridden by userland.
+ */
+ if (use_flowid) {
+ m->m_flags |= M_FLOWID;
+ m->m_pkthdr.flowid = flowid;
+ M_HASHTYPE_SET(m, flowid_type);
+ }
+
+#ifdef RSS
+ ipflags |= IP_NODEFAULTFLOWID;
+#endif /* RSS */
+
if (unlock_udbinfo == UH_WLOCKED)
INP_HASH_WUNLOCK(pcbinfo);
else if (unlock_udbinfo == UH_RLOCKED)
OpenPOWER on IntegriCloud