summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--share/man/man4/bpf.416
-rw-r--r--sys/net/bpf.c60
-rw-r--r--sys/net/bpf_zerocopy.c77
-rw-r--r--sys/net/bpf_zerocopy.h2
4 files changed, 133 insertions, 22 deletions
diff --git a/share/man/man4/bpf.4 b/share/man/man4/bpf.4
index 9116b2d..39e1399 100644
--- a/share/man/man4/bpf.4
+++ b/share/man/man4/bpf.4
@@ -259,14 +259,14 @@ may be used to sleep awaiting the availbility of a completed buffer.
They will return a readable file descriptor when ownership of the next buffer
is assigned to user space.
.Pp
-In the current implementation, the kernel will assign ownership of at most
-one buffer at a time to the user process.
-The user processes must acknowledge the current buffer in order to be
-notified that the next buffer is ready for processing.
-Programs should not rely on this as an invariant, as it may change in future
-versions; in particular, they must maintain their own notion of which buffer
-is "next" so that if both buffers are owned by userspace, it can process them
-in the correct order.
+In the current implementation, the kernel may assign zero, one, or both
+buffers to the user process; however, an earlier implementation maintained
+the invariant that at most one buffer could be assigned to the user process
+at a time.
+In order to both ensure progress and high performance, user processes should
+acknowledge a completely processed buffer as quickly as possible, returning
+it for reuse, and not block waiting on a second buffer while holding another
+buffer.
.Sh IOCTLS
The
.Xr ioctl 2
diff --git a/sys/net/bpf.c b/sys/net/bpf.c
index 7de49049..f95dd8d 100644
--- a/sys/net/bpf.c
+++ b/sys/net/bpf.c
@@ -218,6 +218,45 @@ bpf_canfreebuf(struct bpf_d *d)
return (0);
}
+/*
+ * Allow the buffer model to indicate that the current store buffer is
+ * immutable, regardless of the appearance of space. Return (1) if the
+ * buffer is writable, and (0) if not.
+ */
+static int
+bpf_canwritebuf(struct bpf_d *d)
+{
+
+ BPFD_LOCK_ASSERT(d);
+
+ switch (d->bd_bufmode) {
+ case BPF_BUFMODE_ZBUF:
+ return (bpf_zerocopy_canwritebuf(d));
+ }
+ return (1);
+}
+
+/*
+ * Notify buffer model that an attempt to write to the store buffer has
+ * resulted in a dropped packet, in which case the buffer may be considered
+ * full.
+ */
+static void
+bpf_buffull(struct bpf_d *d)
+{
+
+ BPFD_LOCK_ASSERT(d);
+
+ switch (d->bd_bufmode) {
+ case BPF_BUFMODE_ZBUF:
+ bpf_zerocopy_buffull(d);
+ break;
+ }
+}
+
+/*
+ * Notify the buffer model that a buffer has moved into the hold position.
+ */
void
bpf_bufheld(struct bpf_d *d)
{
@@ -1691,27 +1730,28 @@ catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen,
/*
* Round up the end of the previous packet to the next longword.
+ *
+ * Drop the packet if there's no room and no hope of room
+ * If the packet would overflow the storage buffer or the storage
+ * buffer is considered immutable by the buffer model, try to rotate
+ * the buffer and wakeup pending processes.
*/
curlen = BPF_WORDALIGN(d->bd_slen);
- if (curlen + totlen > d->bd_bufsize) {
- /*
- * This packet will overflow the storage buffer.
- * Rotate the buffers if we can, then wakeup any
- * pending reads.
- */
+ if (curlen + totlen > d->bd_bufsize || !bpf_canwritebuf(d)) {
if (d->bd_fbuf == NULL) {
/*
- * We haven't completed the previous read yet,
- * so drop the packet.
+ * There's no room in the store buffer, and no
+ * prospect of room, so drop the packet. Notify the
+ * buffer model.
*/
+ bpf_buffull(d);
++d->bd_dcount;
return;
}
ROTATE_BUFFERS(d);
do_wakeup = 1;
curlen = 0;
- }
- else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT)
+ } else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT)
/*
* Immediate mode is set, or the read timeout has already
* expired during a select call. A packet arrived, so the
diff --git a/sys/net/bpf_zerocopy.c b/sys/net/bpf_zerocopy.c
index d746bda..9ca2ba7 100644
--- a/sys/net/bpf_zerocopy.c
+++ b/sys/net/bpf_zerocopy.c
@@ -85,7 +85,7 @@ __FBSDID("$FreeBSD$");
* scatter-gather copying. One significant mitigating factor is that on
* systems with a direct memory map, we can avoid TLB misses.
*
- * At the front of the shared memor region is a bpf_zbuf_header, which
+ * At the front of the shared memory region is a bpf_zbuf_header, which
* contains shared control data to allow user space and the kernel to
* synchronize; this is included in zb_size, but not bpf_bufsize, so that BPF
* knows that the space is not available.
@@ -94,11 +94,19 @@ struct zbuf {
vm_offset_t zb_uaddr; /* User address, may be stale. */
size_t zb_size; /* Size of buffer, incl. header. */
u_int zb_numpages; /* Number of pages. */
+ int zb_flags; /* Flags on zbuf. */
struct sf_buf **zb_pages; /* Pages themselves. */
struct bpf_zbuf_header *zb_header; /* Shared header. */
};
/*
+ * When a buffer has been assigned to userspace, flag it as such, as the
+ * buffer may remain in the store position as a result of the user process
+ * not yet having acknowledged the buffer in the hold position yet.
+ */
+#define ZBUF_FLAG_IMMUTABLE 0x00000001 /* Set when owned by user. */
+
+/*
* Release a page we've previously wired.
*/
static void
@@ -254,6 +262,9 @@ bpf_zerocopy_append_bytes(struct bpf_d *d, caddr_t buf, u_int offset,
src_bytes = (u_char *)src;
zb = (struct zbuf *)buf;
+ KASSERT((zb->zb_flags & ZBUF_FLAG_IMMUTABLE) == 0,
+ ("bpf_zerocopy_append_bytes: ZBUF_FLAG_IMMUTABLE"));
+
/*
* Scatter-gather copy to user pages mapped into kernel address space
* using sf_bufs: copy up to a page at a time.
@@ -303,6 +314,9 @@ bpf_zerocopy_append_mbuf(struct bpf_d *d, caddr_t buf, u_int offset,
m = (struct mbuf *)src;
zb = (struct zbuf *)buf;
+ KASSERT((zb->zb_flags & ZBUF_FLAG_IMMUTABLE) == 0,
+ ("bpf_zerocopy_append_mbuf: ZBUF_FLAG_IMMUTABLE"));
+
/*
* Scatter gather both from an mbuf chain and to a user page set
* mapped into kernel address space using sf_bufs. If we're lucky,
@@ -344,9 +358,38 @@ bpf_zerocopy_append_mbuf(struct bpf_d *d, caddr_t buf, u_int offset,
}
/*
+ * Notification from the BPF framework that a buffer in the store position is
+ * rejecting packets and may be considered full. We mark the buffer as
+ * immutable and assign to userspace so that it is immediately available for
+ * the user process to access.
+ */
+void
+bpf_zerocopy_buffull(struct bpf_d *d)
+{
+ struct zbuf *zb;
+
+ KASSERT(d->bd_bufmode == BPF_BUFMODE_ZBUF,
+ ("bpf_zerocopy_buffull: not in zbuf mode"));
+
+ zb = (struct zbuf *)d->bd_sbuf;
+ KASSERT(zb != NULL, ("bpf_zerocopy_buffull: zb == NULL"));
+
+ if ((zb->zb_flags & ZBUF_FLAG_IMMUTABLE) == 0) {
+ zb->zb_flags |= ZBUF_FLAG_IMMUTABLE;
+ zb->zb_header->bzh_kernel_len = d->bd_slen;
+ atomic_add_rel_int(&zb->zb_header->bzh_kernel_gen, 1);
+ }
+}
+
+/*
* Notification from the BPF framework that a buffer has moved into the held
* slot on a descriptor. Zero-copy BPF will update the shared page to let
- * the user process know.
+ * the user process know and flag the buffer as immutable if it hasn't
+ * already been marked immutable due to filling while it was in the store
+ * position.
+ *
+ * Note: identical logic as in bpf_zerocopy_buffull(), except that we operate
+ * on bd_hbuf and bd_hlen.
*/
void
bpf_zerocopy_bufheld(struct bpf_d *d)
@@ -358,8 +401,12 @@ bpf_zerocopy_bufheld(struct bpf_d *d)
zb = (struct zbuf *)d->bd_hbuf;
KASSERT(zb != NULL, ("bpf_zerocopy_bufheld: zb == NULL"));
- zb->zb_header->bzh_kernel_len = d->bd_hlen;
- atomic_add_rel_int(&zb->zb_header->bzh_kernel_gen, 1);
+
+ if ((zb->zb_flags & ZBUF_FLAG_IMMUTABLE) == 0) {
+ zb->zb_flags |= ZBUF_FLAG_IMMUTABLE;
+ zb->zb_header->bzh_kernel_len = d->bd_hlen;
+ atomic_add_rel_int(&zb->zb_header->bzh_kernel_gen, 1);
+ }
}
/*
@@ -386,6 +433,28 @@ bpf_zerocopy_canfreebuf(struct bpf_d *d)
}
/*
+ * Query from the BPF framework as to whether or not the buffer current in
+ * the store position can actually be written to. This may return false if
+ * the store buffer is assigned to userspace before the hold buffer is
+ * acknowledged.
+ */
+int
+bpf_zerocopy_canwritebuf(struct bpf_d *d)
+{
+ struct zbuf *zb;
+
+ KASSERT(d->bd_bufmode == BPF_BUFMODE_ZBUF,
+ ("bpf_zerocopy_canwritebuf: not in zbuf mode"));
+
+ zb = (struct zbuf *)d->bd_sbuf;
+ KASSERT(zb != NULL, ("bpf_zerocopy_canwritebuf: bd_sbuf NULL"));
+
+ if (zb->zb_flags & ZBUF_FLAG_IMMUTABLE)
+ return (0);
+ return (1);
+}
+
+/*
* Free zero copy buffers at request of descriptor.
*/
void
diff --git a/sys/net/bpf_zerocopy.h b/sys/net/bpf_zerocopy.h
index 33d1f25..92186a8 100644
--- a/sys/net/bpf_zerocopy.h
+++ b/sys/net/bpf_zerocopy.h
@@ -40,8 +40,10 @@ void bpf_zerocopy_append_bytes(struct bpf_d *d, caddr_t buf, u_int offset,
void *src, u_int len);
void bpf_zerocopy_append_mbuf(struct bpf_d *d, caddr_t buf, u_int offset,
void *src, u_int len);
+void bpf_zerocopy_buffull(struct bpf_d *);
void bpf_zerocopy_bufheld(struct bpf_d *);
int bpf_zerocopy_canfreebuf(struct bpf_d *);
+int bpf_zerocopy_canwritebuf(struct bpf_d *);
void bpf_zerocopy_free(struct bpf_d *d);
int bpf_zerocopy_ioctl_getzmax(struct thread *td, struct bpf_d *d,
size_t *i);
OpenPOWER on IntegriCloud