diff options
author | attilio <attilio@FreeBSD.org> | 2012-12-28 17:41:36 +0000 |
---|---|---|
committer | attilio <attilio@FreeBSD.org> | 2012-12-28 17:41:36 +0000 |
commit | 5da31a993660c9f95f598228167307ae36fc3177 (patch) | |
tree | 1d166478174f2b155e17ede7894444b8a895a7d6 /sys/sys/buf_ring.h | |
parent | 1bd72ff3fbdfa47a3afaec309975829ddf71fc25 (diff) | |
download | FreeBSD-src-5da31a993660c9f95f598228167307ae36fc3177.zip FreeBSD-src-5da31a993660c9f95f598228167307ae36fc3177.tar.gz |
Improve bufring impl:
- Remove unused br_prod_bufs member
- Fixup r241037: buf_ring pads br_prod_* and br_cons_* members at 128
bytes, assuming a fixed cache line size for all the architectures.
However, the above mentioned revision broke the padding.
Use explicit padding to the CACHE_LINE_SIZE on the members that
mark the initial new padded sections. Of course, the padding is not
important for performance reasons in the DEBUG_BUFRING case, leaving
br_cons members to share the cache line with br_lock.
- Fixup r244732: by removing incorrectly added membar in
buf_ring_dequeue_sc() where surrounding locking shoud be enough.
- Drastically reduce the number of membar used (pratically reverting
r244732) by switching rmb() in buf_ring_dequeue_mc() and wmb() in
buf_ring_enqueue() to be complete barriers. This, along with
br_prod_bufs departure, should fix ordering issues as explained in
the provided comments.
This patch is not targeted for MFC.
Sponsored by: EMC / Isilon storage division
Reviewed by: glebius
Diffstat (limited to 'sys/sys/buf_ring.h')
-rw-r--r-- | sys/sys/buf_ring.h | 40 |
1 files changed, 19 insertions, 21 deletions
diff --git a/sys/sys/buf_ring.h b/sys/sys/buf_ring.h index 66f1607..b660d9b 100644 --- a/sys/sys/buf_ring.h +++ b/sys/sys/buf_ring.h @@ -47,25 +47,14 @@ struct buf_ring { int br_prod_size; int br_prod_mask; uint64_t br_drops; - uint64_t br_prod_bufs; - /* - * Pad out to next L2 cache line - */ - uint64_t _pad0[11]; - - volatile uint32_t br_cons_head; + volatile uint32_t br_cons_head __aligned(CACHE_LINE_SIZE); volatile uint32_t br_cons_tail; int br_cons_size; int br_cons_mask; - - /* - * Pad out to next L2 cache line - */ - uint64_t _pad1[14]; #ifdef DEBUG_BUFRING struct mtx *br_lock; #endif - void *br_ring[0]; + void *br_ring[0] __aligned(CACHE_LINE_SIZE); }; /* @@ -103,17 +92,21 @@ buf_ring_enqueue(struct buf_ring *br, void *buf) panic("dangling value in enqueue"); #endif br->br_ring[prod_head] = buf; - wmb(); + + /* + * The full memory barrier also avoids that br_prod_tail store + * is reordered before the br_ring[prod_head] is full setup. + */ + mb(); /* * If there are other enqueues in progress * that preceeded us, we need to wait for them * to complete */ - while (atomic_load_acq_32(&br->br_prod_tail) != prod_head) + while (br->br_prod_tail != prod_head) cpu_spinwait(); - br->br_prod_bufs++; - atomic_store_rel_32(&br->br_prod_tail, prod_next); + br->br_prod_tail = prod_next; critical_exit(); return (0); } @@ -150,17 +143,22 @@ buf_ring_dequeue_mc(struct buf_ring *br) #ifdef DEBUG_BUFRING br->br_ring[cons_head] = NULL; #endif - rmb(); + + /* + * The full memory barrier also avoids that br_ring[cons_read] + * load is reordered after br_cons_tail is set. + */ + mb(); /* * If there are other dequeues in progress * that preceeded us, we need to wait for them * to complete */ - while (atomic_load_acq_32(&br->br_cons_tail) != cons_head) + while (br->br_cons_tail != cons_head) cpu_spinwait(); - atomic_store_rel_32(&br->br_cons_tail, cons_next); + br->br_cons_tail = cons_next; critical_exit(); return (buf); @@ -205,7 +203,7 @@ buf_ring_dequeue_sc(struct buf_ring *br) panic("inconsistent list cons_tail=%d cons_head=%d", br->br_cons_tail, cons_head); #endif - atomic_store_rel_32(&br->br_cons_tail, cons_next); + br->br_cons_tail = cons_next; return (buf); } |