summaryrefslogtreecommitdiffstats
path: root/sys/net/bpfdesc.h
diff options
context:
space:
mode:
authorcsjp <csjp@FreeBSD.org>2008-03-24 13:49:17 +0000
committercsjp <csjp@FreeBSD.org>2008-03-24 13:49:17 +0000
commit310e3f93ddb4d35429a892af85c9b1cf4ef64ebe (patch)
treedb538fa445b2afbf451bd34ca79ea61b4d771a83 /sys/net/bpfdesc.h
parent98fbc814ae21210ec2e68611af7195f66c41e37c (diff)
downloadFreeBSD-src-310e3f93ddb4d35429a892af85c9b1cf4ef64ebe.zip
FreeBSD-src-310e3f93ddb4d35429a892af85c9b1cf4ef64ebe.tar.gz
Introduce support for zero-copy BPF buffering, which reduces the
overhead of packet capture by allowing a user process to directly "loan" buffer memory to the kernel rather than using read(2) to explicitly copy data from kernel address space. The user process will issue new BPF ioctls to set the shared memory buffer mode and provide pointers to buffers and their size. The kernel then wires and maps the pages into kernel address space using sf_buf(9), which on supporting architectures will use the direct map region. The current "buffered" access mode remains the default, and support for zero-copy buffers must, for the time being, be explicitly enabled using a sysctl for the kernel to accept requests to use it. The kernel and user process synchronize use of the buffers with atomic operations, avoiding the need for system calls under load; the user process may use select()/poll()/kqueue() to manage blocking while waiting for network data if the user process is able to consume data faster than the kernel generates it. Patchs to libpcap are available to allow libpcap applications to transparently take advantage of this support. Detailed information on the new API may be found in bpf(4), including specific atomic operations and memory barriers required to synchronize buffer use safely. These changes modify the base BPF implementation to (roughly) abstrac the current buffer model, allowing the new shared memory model to be added, and add new monitoring statistics for netstat to print. The implementation, with the exception of some monitoring hanges that break the netstat monitoring ABI for BPF, will be MFC'd. Zerocopy bpf buffers are still considered experimental are disabled by default. To experiment with this new facility, adjust the net.bpf.zerocopy_enable sysctl variable to 1. Changes to libpcap will be made available as a patch for the time being, and further refinements to the implementation are expected. Sponsored by: Seccuris Inc. In collaboration with: rwatson Tested by: pwood, gallatin MFC after: 4 months [1] [1] Certain portions will probably not be MFCed, specifically things that can break the monitoring ABI.
Diffstat (limited to 'sys/net/bpfdesc.h')
-rw-r--r--sys/net/bpfdesc.h38
1 files changed, 25 insertions, 13 deletions
diff --git a/sys/net/bpfdesc.h b/sys/net/bpfdesc.h
index a46013e..ad9ab20 100644
--- a/sys/net/bpfdesc.h
+++ b/sys/net/bpfdesc.h
@@ -48,10 +48,11 @@
/*
* Descriptor associated with each open bpf file.
*/
+struct zbuf;
struct bpf_d {
LIST_ENTRY(bpf_d) bd_next; /* Linked list of descriptors */
/*
- * Buffer slots: two malloc buffers store the incoming packets.
+ * Buffer slots: two memory buffers store the incoming packets.
* The model has three slots. Sbuf is always occupied.
* sbuf (store) - Receive interrupt puts packets here.
* hbuf (hold) - When sbuf is full, put buffer here and
@@ -74,8 +75,8 @@ struct bpf_d {
#ifdef BPF_JITTER
bpf_jit_filter *bd_bfilter; /* binary filter code */
#endif
- u_long bd_rcount; /* number of packets received */
- u_long bd_dcount; /* number of packets dropped */
+ u_int64_t bd_rcount; /* number of packets received */
+ u_int64_t bd_dcount; /* number of packets dropped */
u_char bd_promisc; /* true if listening promiscuously */
u_char bd_state; /* idle, waiting, or timed out */
@@ -90,9 +91,14 @@ struct bpf_d {
struct mtx bd_mtx; /* mutex for this descriptor */
struct callout bd_callout; /* for BPF timeouts with select */
struct label *bd_label; /* MAC label for descriptor */
- u_long bd_fcount; /* number of packets which matched filter */
+ u_int64_t bd_fcount; /* number of packets which matched filter */
pid_t bd_pid; /* PID which created descriptor */
int bd_locked; /* true if descriptor is locked */
+ u_int bd_bufmode; /* Current buffer mode. */
+ u_int64_t bd_wcount; /* number of packets written */
+ u_int64_t bd_wfcount; /* number of packets that matched write filter */
+ u_int64_t bd_wdcount; /* number of packets dropped during a write */
+ u_int64_t bd_zcopy; /* number of zero copy operations */
};
/* Values for bd_state */
@@ -104,25 +110,21 @@ struct bpf_d {
#define BPFD_UNLOCK(bd) mtx_unlock(&(bd)->bd_mtx)
#define BPFD_LOCK_ASSERT(bd) mtx_assert(&(bd)->bd_mtx, MA_OWNED);
-/* Test whether a BPF is ready for read(). */
-#define bpf_ready(bd) \
- ((bd)->bd_hlen != 0 || \
- (((bd)->bd_immediate || (bd)->bd_state == BPF_TIMED_OUT) && \
- (bd)->bd_slen != 0))
-
/*
* External representation of the bpf descriptor
*/
struct xbpf_d {
+ u_int bd_structsize; /* Size of this structure. */
u_char bd_promisc;
u_char bd_immediate;
+ u_char __bd_pad[6];
int bd_hdrcmplt;
int bd_direction;
int bd_feedback;
int bd_async;
- u_long bd_rcount;
- u_long bd_dcount;
- u_long bd_fcount;
+ u_int64_t bd_rcount;
+ u_int64_t bd_dcount;
+ u_int64_t bd_fcount;
int bd_sig;
int bd_slen;
int bd_hlen;
@@ -130,6 +132,16 @@ struct xbpf_d {
pid_t bd_pid;
char bd_ifname[IFNAMSIZ];
int bd_locked;
+ u_int64_t bd_wcount;
+ u_int64_t bd_wfcount;
+ u_int64_t bd_wdcount;
+ u_int64_t bd_zcopy;
+ int bd_bufmode;
+ /*
+ * Allocate 4 64 bit unsigned integers for future expansion so we do
+ * not have to worry about breaking the ABI.
+ */
+ u_int64_t bd_spare[4];
};
#define BPFIF_LOCK(bif) mtx_lock(&(bif)->bif_mtx)
OpenPOWER on IntegriCloud