diff options
-rw-r--r-- | share/man/man4/bpf.4 | 16 | ||||
-rw-r--r-- | sys/net/bpf.c | 60 | ||||
-rw-r--r-- | sys/net/bpf_zerocopy.c | 77 | ||||
-rw-r--r-- | sys/net/bpf_zerocopy.h | 2 |
4 files changed, 133 insertions, 22 deletions
diff --git a/share/man/man4/bpf.4 b/share/man/man4/bpf.4 index 9116b2d..39e1399 100644 --- a/share/man/man4/bpf.4 +++ b/share/man/man4/bpf.4 @@ -259,14 +259,14 @@ may be used to sleep awaiting the availbility of a completed buffer. They will return a readable file descriptor when ownership of the next buffer is assigned to user space. .Pp -In the current implementation, the kernel will assign ownership of at most -one buffer at a time to the user process. -The user processes must acknowledge the current buffer in order to be -notified that the next buffer is ready for processing. -Programs should not rely on this as an invariant, as it may change in future -versions; in particular, they must maintain their own notion of which buffer -is "next" so that if both buffers are owned by userspace, it can process them -in the correct order. +In the current implementation, the kernel may assign zero, one, or both +buffers to the user process; however, an earlier implementation maintained +the invariant that at most one buffer could be assigned to the user process +at a time. +In order to both ensure progress and high performance, user processes should +acknowledge a completely processed buffer as quickly as possible, returning +it for reuse, and not block waiting on a second buffer while holding another +buffer. .Sh IOCTLS The .Xr ioctl 2 diff --git a/sys/net/bpf.c b/sys/net/bpf.c index 7de49049..f95dd8d 100644 --- a/sys/net/bpf.c +++ b/sys/net/bpf.c @@ -218,6 +218,45 @@ bpf_canfreebuf(struct bpf_d *d) return (0); } +/* + * Allow the buffer model to indicate that the current store buffer is + * immutable, regardless of the appearance of space. Return (1) if the + * buffer is writable, and (0) if not. + */ +static int +bpf_canwritebuf(struct bpf_d *d) +{ + + BPFD_LOCK_ASSERT(d); + + switch (d->bd_bufmode) { + case BPF_BUFMODE_ZBUF: + return (bpf_zerocopy_canwritebuf(d)); + } + return (1); +} + +/* + * Notify buffer model that an attempt to write to the store buffer has + * resulted in a dropped packet, in which case the buffer may be considered + * full. + */ +static void +bpf_buffull(struct bpf_d *d) +{ + + BPFD_LOCK_ASSERT(d); + + switch (d->bd_bufmode) { + case BPF_BUFMODE_ZBUF: + bpf_zerocopy_buffull(d); + break; + } +} + +/* + * Notify the buffer model that a buffer has moved into the hold position. + */ void bpf_bufheld(struct bpf_d *d) { @@ -1691,27 +1730,28 @@ catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen, /* * Round up the end of the previous packet to the next longword. + * + * Drop the packet if there's no room and no hope of room + * If the packet would overflow the storage buffer or the storage + * buffer is considered immutable by the buffer model, try to rotate + * the buffer and wakeup pending processes. */ curlen = BPF_WORDALIGN(d->bd_slen); - if (curlen + totlen > d->bd_bufsize) { - /* - * This packet will overflow the storage buffer. - * Rotate the buffers if we can, then wakeup any - * pending reads. - */ + if (curlen + totlen > d->bd_bufsize || !bpf_canwritebuf(d)) { if (d->bd_fbuf == NULL) { /* - * We haven't completed the previous read yet, - * so drop the packet. + * There's no room in the store buffer, and no + * prospect of room, so drop the packet. Notify the + * buffer model. */ + bpf_buffull(d); ++d->bd_dcount; return; } ROTATE_BUFFERS(d); do_wakeup = 1; curlen = 0; - } - else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT) + } else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT) /* * Immediate mode is set, or the read timeout has already * expired during a select call. A packet arrived, so the diff --git a/sys/net/bpf_zerocopy.c b/sys/net/bpf_zerocopy.c index d746bda..9ca2ba7 100644 --- a/sys/net/bpf_zerocopy.c +++ b/sys/net/bpf_zerocopy.c @@ -85,7 +85,7 @@ __FBSDID("$FreeBSD$"); * scatter-gather copying. One significant mitigating factor is that on * systems with a direct memory map, we can avoid TLB misses. * - * At the front of the shared memor region is a bpf_zbuf_header, which + * At the front of the shared memory region is a bpf_zbuf_header, which * contains shared control data to allow user space and the kernel to * synchronize; this is included in zb_size, but not bpf_bufsize, so that BPF * knows that the space is not available. @@ -94,11 +94,19 @@ struct zbuf { vm_offset_t zb_uaddr; /* User address, may be stale. */ size_t zb_size; /* Size of buffer, incl. header. */ u_int zb_numpages; /* Number of pages. */ + int zb_flags; /* Flags on zbuf. */ struct sf_buf **zb_pages; /* Pages themselves. */ struct bpf_zbuf_header *zb_header; /* Shared header. */ }; /* + * When a buffer has been assigned to userspace, flag it as such, as the + * buffer may remain in the store position as a result of the user process + * not yet having acknowledged the buffer in the hold position yet. + */ +#define ZBUF_FLAG_IMMUTABLE 0x00000001 /* Set when owned by user. */ + +/* * Release a page we've previously wired. */ static void @@ -254,6 +262,9 @@ bpf_zerocopy_append_bytes(struct bpf_d *d, caddr_t buf, u_int offset, src_bytes = (u_char *)src; zb = (struct zbuf *)buf; + KASSERT((zb->zb_flags & ZBUF_FLAG_IMMUTABLE) == 0, + ("bpf_zerocopy_append_bytes: ZBUF_FLAG_IMMUTABLE")); + /* * Scatter-gather copy to user pages mapped into kernel address space * using sf_bufs: copy up to a page at a time. @@ -303,6 +314,9 @@ bpf_zerocopy_append_mbuf(struct bpf_d *d, caddr_t buf, u_int offset, m = (struct mbuf *)src; zb = (struct zbuf *)buf; + KASSERT((zb->zb_flags & ZBUF_FLAG_IMMUTABLE) == 0, + ("bpf_zerocopy_append_mbuf: ZBUF_FLAG_IMMUTABLE")); + /* * Scatter gather both from an mbuf chain and to a user page set * mapped into kernel address space using sf_bufs. If we're lucky, @@ -344,9 +358,38 @@ bpf_zerocopy_append_mbuf(struct bpf_d *d, caddr_t buf, u_int offset, } /* + * Notification from the BPF framework that a buffer in the store position is + * rejecting packets and may be considered full. We mark the buffer as + * immutable and assign to userspace so that it is immediately available for + * the user process to access. + */ +void +bpf_zerocopy_buffull(struct bpf_d *d) +{ + struct zbuf *zb; + + KASSERT(d->bd_bufmode == BPF_BUFMODE_ZBUF, + ("bpf_zerocopy_buffull: not in zbuf mode")); + + zb = (struct zbuf *)d->bd_sbuf; + KASSERT(zb != NULL, ("bpf_zerocopy_buffull: zb == NULL")); + + if ((zb->zb_flags & ZBUF_FLAG_IMMUTABLE) == 0) { + zb->zb_flags |= ZBUF_FLAG_IMMUTABLE; + zb->zb_header->bzh_kernel_len = d->bd_slen; + atomic_add_rel_int(&zb->zb_header->bzh_kernel_gen, 1); + } +} + +/* * Notification from the BPF framework that a buffer has moved into the held * slot on a descriptor. Zero-copy BPF will update the shared page to let - * the user process know. + * the user process know and flag the buffer as immutable if it hasn't + * already been marked immutable due to filling while it was in the store + * position. + * + * Note: identical logic as in bpf_zerocopy_buffull(), except that we operate + * on bd_hbuf and bd_hlen. */ void bpf_zerocopy_bufheld(struct bpf_d *d) @@ -358,8 +401,12 @@ bpf_zerocopy_bufheld(struct bpf_d *d) zb = (struct zbuf *)d->bd_hbuf; KASSERT(zb != NULL, ("bpf_zerocopy_bufheld: zb == NULL")); - zb->zb_header->bzh_kernel_len = d->bd_hlen; - atomic_add_rel_int(&zb->zb_header->bzh_kernel_gen, 1); + + if ((zb->zb_flags & ZBUF_FLAG_IMMUTABLE) == 0) { + zb->zb_flags |= ZBUF_FLAG_IMMUTABLE; + zb->zb_header->bzh_kernel_len = d->bd_hlen; + atomic_add_rel_int(&zb->zb_header->bzh_kernel_gen, 1); + } } /* @@ -386,6 +433,28 @@ bpf_zerocopy_canfreebuf(struct bpf_d *d) } /* + * Query from the BPF framework as to whether or not the buffer current in + * the store position can actually be written to. This may return false if + * the store buffer is assigned to userspace before the hold buffer is + * acknowledged. + */ +int +bpf_zerocopy_canwritebuf(struct bpf_d *d) +{ + struct zbuf *zb; + + KASSERT(d->bd_bufmode == BPF_BUFMODE_ZBUF, + ("bpf_zerocopy_canwritebuf: not in zbuf mode")); + + zb = (struct zbuf *)d->bd_sbuf; + KASSERT(zb != NULL, ("bpf_zerocopy_canwritebuf: bd_sbuf NULL")); + + if (zb->zb_flags & ZBUF_FLAG_IMMUTABLE) + return (0); + return (1); +} + +/* * Free zero copy buffers at request of descriptor. */ void diff --git a/sys/net/bpf_zerocopy.h b/sys/net/bpf_zerocopy.h index 33d1f25..92186a8 100644 --- a/sys/net/bpf_zerocopy.h +++ b/sys/net/bpf_zerocopy.h @@ -40,8 +40,10 @@ void bpf_zerocopy_append_bytes(struct bpf_d *d, caddr_t buf, u_int offset, void *src, u_int len); void bpf_zerocopy_append_mbuf(struct bpf_d *d, caddr_t buf, u_int offset, void *src, u_int len); +void bpf_zerocopy_buffull(struct bpf_d *); void bpf_zerocopy_bufheld(struct bpf_d *); int bpf_zerocopy_canfreebuf(struct bpf_d *); +int bpf_zerocopy_canwritebuf(struct bpf_d *); void bpf_zerocopy_free(struct bpf_d *d); int bpf_zerocopy_ioctl_getzmax(struct thread *td, struct bpf_d *d, size_t *i); |