summaryrefslogtreecommitdiffstats
path: root/sys/net/bpf.h
diff options
context:
space:
mode:
authorcsjp <csjp@FreeBSD.org>2008-03-24 13:49:17 +0000
committercsjp <csjp@FreeBSD.org>2008-03-24 13:49:17 +0000
commit310e3f93ddb4d35429a892af85c9b1cf4ef64ebe (patch)
treedb538fa445b2afbf451bd34ca79ea61b4d771a83 /sys/net/bpf.h
parent98fbc814ae21210ec2e68611af7195f66c41e37c (diff)
downloadFreeBSD-src-310e3f93ddb4d35429a892af85c9b1cf4ef64ebe.zip
FreeBSD-src-310e3f93ddb4d35429a892af85c9b1cf4ef64ebe.tar.gz
Introduce support for zero-copy BPF buffering, which reduces the
overhead of packet capture by allowing a user process to directly "loan" buffer memory to the kernel rather than using read(2) to explicitly copy data from kernel address space. The user process will issue new BPF ioctls to set the shared memory buffer mode and provide pointers to buffers and their size. The kernel then wires and maps the pages into kernel address space using sf_buf(9), which on supporting architectures will use the direct map region. The current "buffered" access mode remains the default, and support for zero-copy buffers must, for the time being, be explicitly enabled using a sysctl for the kernel to accept requests to use it. The kernel and user process synchronize use of the buffers with atomic operations, avoiding the need for system calls under load; the user process may use select()/poll()/kqueue() to manage blocking while waiting for network data if the user process is able to consume data faster than the kernel generates it. Patchs to libpcap are available to allow libpcap applications to transparently take advantage of this support. Detailed information on the new API may be found in bpf(4), including specific atomic operations and memory barriers required to synchronize buffer use safely. These changes modify the base BPF implementation to (roughly) abstrac the current buffer model, allowing the new shared memory model to be added, and add new monitoring statistics for netstat to print. The implementation, with the exception of some monitoring hanges that break the netstat monitoring ABI for BPF, will be MFC'd. Zerocopy bpf buffers are still considered experimental are disabled by default. To experiment with this new facility, adjust the net.bpf.zerocopy_enable sysctl variable to 1. Changes to libpcap will be made available as a patch for the time being, and further refinements to the implementation are expected. Sponsored by: Seccuris Inc. In collaboration with: rwatson Tested by: pwood, gallatin MFC after: 4 months [1] [1] Certain portions will probably not be MFCed, specifically things that can break the monitoring ABI.
Diffstat (limited to 'sys/net/bpf.h')
-rw-r--r--sys/net/bpf.h66
1 files changed, 66 insertions, 0 deletions
diff --git a/sys/net/bpf.h b/sys/net/bpf.h
index 91ea0f6..1d6f9db 100644
--- a/sys/net/bpf.h
+++ b/sys/net/bpf.h
@@ -92,6 +92,27 @@ struct bpf_version {
#define BPF_MAJOR_VERSION 1
#define BPF_MINOR_VERSION 1
+/*
+ * Historically, BPF has supported a single buffering model, first using mbuf
+ * clusters in kernel, and later using malloc(9) buffers in kernel. We now
+ * support multiple buffering modes, which may be queried and set using
+ * BIOCGETBUFMODE and BIOCSETBUFMODE. So as to avoid handling the complexity
+ * of changing modes while sniffing packets, the mode becomes fixed once an
+ * interface has been attached to the BPF descriptor.
+ */
+#define BPF_BUFMODE_BUFFER 1 /* Kernel buffers with read(). */
+#define BPF_BUFMODE_ZBUF 2 /* Zero-copy buffers. */
+
+/*-
+ * Struct used by BIOCSETZBUF, BIOCROTZBUF: describes up to two zero-copy
+ * buffer as used by BPF.
+ */
+struct bpf_zbuf {
+ void *bz_bufa; /* Location of 'a' zero-copy buffer. */
+ void *bz_bufb; /* Location of 'b' zero-copy buffer. */
+ size_t bz_buflen; /* Size of zero-copy buffers. */
+};
+
#define BIOCGBLEN _IOR('B',102, u_int)
#define BIOCSBLEN _IOWR('B',102, u_int)
#define BIOCSETF _IOW('B',103, struct bpf_program)
@@ -116,6 +137,11 @@ struct bpf_version {
#define BIOCLOCK _IO('B', 122)
#define BIOCSETWF _IOW('B',123, struct bpf_program)
#define BIOCFEEDBACK _IOW('B',124, u_int)
+#define BIOCGETBUFMODE _IOR('B',125, u_int)
+#define BIOCSETBUFMODE _IOW('B',126, u_int)
+#define BIOCGETZMAX _IOR('B',127, size_t)
+#define BIOCROTZBUF _IOR('B',128, struct bpf_zbuf)
+#define BIOCSETZBUF _IOW('B',129, struct bpf_zbuf)
/* Obsolete */
#define BIOCGSEESENT BIOCGDIRECTION
@@ -149,6 +175,24 @@ struct bpf_hdr {
#endif
/*
+ * When using zero-copy BPF buffers, a shared memory header is present
+ * allowing the kernel BPF implementation and user process to synchronize
+ * without using system calls. This structure defines that header. When
+ * accessing these fields, appropriate atomic operation and memory barriers
+ * are required in order not to see stale or out-of-order data; see bpf(4)
+ * for reference code to access these fields from userspace.
+ *
+ * The layout of this structure is critical, and must not be changed; if must
+ * fit in a single page on all architectures.
+ */
+struct bpf_zbuf_header {
+ volatile u_int bzh_kernel_gen; /* Kernel generation number. */
+ volatile u_int bzh_kernel_len; /* Length of data in the buffer. */
+ volatile u_int bzh_user_gen; /* User generation number. */
+ u_int _bzh_pad[5];
+};
+
+/*
* Data-link level type codes.
*/
#define DLT_NULL 0 /* BSD loopback encapsulation */
@@ -761,6 +805,27 @@ struct bpf_dltlist {
};
#ifdef _KERNEL
+#ifdef MALLOC_DECLARE
+MALLOC_DECLARE(M_BPF);
+#endif
+#ifdef SYSCTL_DECL
+SYSCTL_DECL(_net_bpf);
+#endif
+
+/*
+ * Rotate the packet buffers in descriptor d. Move the store buffer into the
+ * hold slot, and the free buffer ino the store slot. Zero the length of the
+ * new store buffer. Descriptor lock should be held.
+ */
+#define ROTATE_BUFFERS(d) do { \
+ (d)->bd_hbuf = (d)->bd_sbuf; \
+ (d)->bd_hlen = (d)->bd_slen; \
+ (d)->bd_sbuf = (d)->bd_fbuf; \
+ (d)->bd_slen = 0; \
+ (d)->bd_fbuf = NULL; \
+ bpf_bufheld(d); \
+} while (0)
+
/*
* Descriptor associated with each attached hardware interface.
*/
@@ -773,6 +838,7 @@ struct bpf_if {
struct mtx bif_mtx; /* mutex for interface */
};
+void bpf_bufheld(struct bpf_d *d);
int bpf_validate(const struct bpf_insn *, int);
void bpf_tap(struct bpf_if *, u_char *, u_int);
void bpf_mtap(struct bpf_if *, struct mbuf *);
OpenPOWER on IntegriCloud