summaryrefslogtreecommitdiffstats
path: root/sys
diff options
context:
space:
mode:
authorrwatson <rwatson@FreeBSD.org>2008-04-07 02:51:00 +0000
committerrwatson <rwatson@FreeBSD.org>2008-04-07 02:51:00 +0000
commit6d3db5778b32c179b5499f160873c71457fb1a56 (patch)
tree22f480af437b5929bf51d1bf686a1ec51d4fd260 /sys
parentd566093a4035dd7af0c40607a130aa52bfb4e301 (diff)
downloadFreeBSD-src-6d3db5778b32c179b5499f160873c71457fb1a56.zip
FreeBSD-src-6d3db5778b32c179b5499f160873c71457fb1a56.tar.gz
Maintain and observe a ZBUF_FLAG_IMMUTABLE flag on zero-copy BPF
buffer kernel descriptors, which is used to allow the buffer currently in the BPF "store" position to be assigned to userspace when it fills, even if userspace hasn't acknowledged the buffer in the "hold" position yet. To implement this, notify the buffer model when a buffer becomes full, and check that the store buffer is writable, not just for it being full, before trying to append new packet data. Shared memory buffers will be assigned to userspace at most once per fill, be it in the store or in the hold position. This removes the restriction that at most one shared memory can by owned by userspace, reducing the chances that userspace will need to call select() after acknowledging one buffer in order to wait for the next buffer when under high load. This more fully realizes the goal of zero system calls in order to process a high-speed packet stream from BPF. Update bpf.4 to reflect that both buffers may be owned by userspace at once; caution against assuming this.
Diffstat (limited to 'sys')
-rw-r--r--sys/net/bpf.c60
-rw-r--r--sys/net/bpf_zerocopy.c77
-rw-r--r--sys/net/bpf_zerocopy.h2
3 files changed, 125 insertions, 14 deletions
diff --git a/sys/net/bpf.c b/sys/net/bpf.c
index 7de49049..f95dd8d 100644
--- a/sys/net/bpf.c
+++ b/sys/net/bpf.c
@@ -218,6 +218,45 @@ bpf_canfreebuf(struct bpf_d *d)
return (0);
}
+/*
+ * Allow the buffer model to indicate that the current store buffer is
+ * immutable, regardless of the appearance of space. Return (1) if the
+ * buffer is writable, and (0) if not.
+ */
+static int
+bpf_canwritebuf(struct bpf_d *d)
+{
+
+ BPFD_LOCK_ASSERT(d);
+
+ switch (d->bd_bufmode) {
+ case BPF_BUFMODE_ZBUF:
+ return (bpf_zerocopy_canwritebuf(d));
+ }
+ return (1);
+}
+
+/*
+ * Notify buffer model that an attempt to write to the store buffer has
+ * resulted in a dropped packet, in which case the buffer may be considered
+ * full.
+ */
+static void
+bpf_buffull(struct bpf_d *d)
+{
+
+ BPFD_LOCK_ASSERT(d);
+
+ switch (d->bd_bufmode) {
+ case BPF_BUFMODE_ZBUF:
+ bpf_zerocopy_buffull(d);
+ break;
+ }
+}
+
+/*
+ * Notify the buffer model that a buffer has moved into the hold position.
+ */
void
bpf_bufheld(struct bpf_d *d)
{
@@ -1691,27 +1730,28 @@ catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen,
/*
* Round up the end of the previous packet to the next longword.
+ *
+ * Drop the packet if there's no room and no hope of room
+ * If the packet would overflow the storage buffer or the storage
+ * buffer is considered immutable by the buffer model, try to rotate
+ * the buffer and wakeup pending processes.
*/
curlen = BPF_WORDALIGN(d->bd_slen);
- if (curlen + totlen > d->bd_bufsize) {
- /*
- * This packet will overflow the storage buffer.
- * Rotate the buffers if we can, then wakeup any
- * pending reads.
- */
+ if (curlen + totlen > d->bd_bufsize || !bpf_canwritebuf(d)) {
if (d->bd_fbuf == NULL) {
/*
- * We haven't completed the previous read yet,
- * so drop the packet.
+ * There's no room in the store buffer, and no
+ * prospect of room, so drop the packet. Notify the
+ * buffer model.
*/
+ bpf_buffull(d);
++d->bd_dcount;
return;
}
ROTATE_BUFFERS(d);
do_wakeup = 1;
curlen = 0;
- }
- else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT)
+ } else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT)
/*
* Immediate mode is set, or the read timeout has already
* expired during a select call. A packet arrived, so the
diff --git a/sys/net/bpf_zerocopy.c b/sys/net/bpf_zerocopy.c
index d746bda..9ca2ba7 100644
--- a/sys/net/bpf_zerocopy.c
+++ b/sys/net/bpf_zerocopy.c
@@ -85,7 +85,7 @@ __FBSDID("$FreeBSD$");
* scatter-gather copying. One significant mitigating factor is that on
* systems with a direct memory map, we can avoid TLB misses.
*
- * At the front of the shared memor region is a bpf_zbuf_header, which
+ * At the front of the shared memory region is a bpf_zbuf_header, which
* contains shared control data to allow user space and the kernel to
* synchronize; this is included in zb_size, but not bpf_bufsize, so that BPF
* knows that the space is not available.
@@ -94,11 +94,19 @@ struct zbuf {
vm_offset_t zb_uaddr; /* User address, may be stale. */
size_t zb_size; /* Size of buffer, incl. header. */
u_int zb_numpages; /* Number of pages. */
+ int zb_flags; /* Flags on zbuf. */
struct sf_buf **zb_pages; /* Pages themselves. */
struct bpf_zbuf_header *zb_header; /* Shared header. */
};
/*
+ * When a buffer has been assigned to userspace, flag it as such, as the
+ * buffer may remain in the store position as a result of the user process
+ * not yet having acknowledged the buffer in the hold position yet.
+ */
+#define ZBUF_FLAG_IMMUTABLE 0x00000001 /* Set when owned by user. */
+
+/*
* Release a page we've previously wired.
*/
static void
@@ -254,6 +262,9 @@ bpf_zerocopy_append_bytes(struct bpf_d *d, caddr_t buf, u_int offset,
src_bytes = (u_char *)src;
zb = (struct zbuf *)buf;
+ KASSERT((zb->zb_flags & ZBUF_FLAG_IMMUTABLE) == 0,
+ ("bpf_zerocopy_append_bytes: ZBUF_FLAG_IMMUTABLE"));
+
/*
* Scatter-gather copy to user pages mapped into kernel address space
* using sf_bufs: copy up to a page at a time.
@@ -303,6 +314,9 @@ bpf_zerocopy_append_mbuf(struct bpf_d *d, caddr_t buf, u_int offset,
m = (struct mbuf *)src;
zb = (struct zbuf *)buf;
+ KASSERT((zb->zb_flags & ZBUF_FLAG_IMMUTABLE) == 0,
+ ("bpf_zerocopy_append_mbuf: ZBUF_FLAG_IMMUTABLE"));
+
/*
* Scatter gather both from an mbuf chain and to a user page set
* mapped into kernel address space using sf_bufs. If we're lucky,
@@ -344,9 +358,38 @@ bpf_zerocopy_append_mbuf(struct bpf_d *d, caddr_t buf, u_int offset,
}
/*
+ * Notification from the BPF framework that a buffer in the store position is
+ * rejecting packets and may be considered full. We mark the buffer as
+ * immutable and assign to userspace so that it is immediately available for
+ * the user process to access.
+ */
+void
+bpf_zerocopy_buffull(struct bpf_d *d)
+{
+ struct zbuf *zb;
+
+ KASSERT(d->bd_bufmode == BPF_BUFMODE_ZBUF,
+ ("bpf_zerocopy_buffull: not in zbuf mode"));
+
+ zb = (struct zbuf *)d->bd_sbuf;
+ KASSERT(zb != NULL, ("bpf_zerocopy_buffull: zb == NULL"));
+
+ if ((zb->zb_flags & ZBUF_FLAG_IMMUTABLE) == 0) {
+ zb->zb_flags |= ZBUF_FLAG_IMMUTABLE;
+ zb->zb_header->bzh_kernel_len = d->bd_slen;
+ atomic_add_rel_int(&zb->zb_header->bzh_kernel_gen, 1);
+ }
+}
+
+/*
* Notification from the BPF framework that a buffer has moved into the held
* slot on a descriptor. Zero-copy BPF will update the shared page to let
- * the user process know.
+ * the user process know and flag the buffer as immutable if it hasn't
+ * already been marked immutable due to filling while it was in the store
+ * position.
+ *
+ * Note: identical logic as in bpf_zerocopy_buffull(), except that we operate
+ * on bd_hbuf and bd_hlen.
*/
void
bpf_zerocopy_bufheld(struct bpf_d *d)
@@ -358,8 +401,12 @@ bpf_zerocopy_bufheld(struct bpf_d *d)
zb = (struct zbuf *)d->bd_hbuf;
KASSERT(zb != NULL, ("bpf_zerocopy_bufheld: zb == NULL"));
- zb->zb_header->bzh_kernel_len = d->bd_hlen;
- atomic_add_rel_int(&zb->zb_header->bzh_kernel_gen, 1);
+
+ if ((zb->zb_flags & ZBUF_FLAG_IMMUTABLE) == 0) {
+ zb->zb_flags |= ZBUF_FLAG_IMMUTABLE;
+ zb->zb_header->bzh_kernel_len = d->bd_hlen;
+ atomic_add_rel_int(&zb->zb_header->bzh_kernel_gen, 1);
+ }
}
/*
@@ -386,6 +433,28 @@ bpf_zerocopy_canfreebuf(struct bpf_d *d)
}
/*
+ * Query from the BPF framework as to whether or not the buffer current in
+ * the store position can actually be written to. This may return false if
+ * the store buffer is assigned to userspace before the hold buffer is
+ * acknowledged.
+ */
+int
+bpf_zerocopy_canwritebuf(struct bpf_d *d)
+{
+ struct zbuf *zb;
+
+ KASSERT(d->bd_bufmode == BPF_BUFMODE_ZBUF,
+ ("bpf_zerocopy_canwritebuf: not in zbuf mode"));
+
+ zb = (struct zbuf *)d->bd_sbuf;
+ KASSERT(zb != NULL, ("bpf_zerocopy_canwritebuf: bd_sbuf NULL"));
+
+ if (zb->zb_flags & ZBUF_FLAG_IMMUTABLE)
+ return (0);
+ return (1);
+}
+
+/*
* Free zero copy buffers at request of descriptor.
*/
void
diff --git a/sys/net/bpf_zerocopy.h b/sys/net/bpf_zerocopy.h
index 33d1f25..92186a8 100644
--- a/sys/net/bpf_zerocopy.h
+++ b/sys/net/bpf_zerocopy.h
@@ -40,8 +40,10 @@ void bpf_zerocopy_append_bytes(struct bpf_d *d, caddr_t buf, u_int offset,
void *src, u_int len);
void bpf_zerocopy_append_mbuf(struct bpf_d *d, caddr_t buf, u_int offset,
void *src, u_int len);
+void bpf_zerocopy_buffull(struct bpf_d *);
void bpf_zerocopy_bufheld(struct bpf_d *);
int bpf_zerocopy_canfreebuf(struct bpf_d *);
+int bpf_zerocopy_canwritebuf(struct bpf_d *);
void bpf_zerocopy_free(struct bpf_d *d);
int bpf_zerocopy_ioctl_getzmax(struct thread *td, struct bpf_d *d,
size_t *i);
OpenPOWER on IntegriCloud