diff options
author | Omar Sandoval <osandov@fb.com> | 2017-01-18 11:55:22 -0800 |
---|---|---|
committer | Jens Axboe <axboe@fb.com> | 2017-01-18 13:41:55 -0700 |
commit | 6c0ca7ae292adea09b8bdd33a524bb9326c3e989 (patch) | |
tree | bbe47f1b91edbae94b582649b7d11a4b2b2fb877 /lib/sbitmap.c | |
parent | f66227de5924ed0fde1823f5cbc4d8b8f45faaa2 (diff) | |
download | op-kernel-dev-6c0ca7ae292adea09b8bdd33a524bb9326c3e989.zip op-kernel-dev-6c0ca7ae292adea09b8bdd33a524bb9326c3e989.tar.gz |
sbitmap: fix wakeup hang after sbq resize
When we resize a struct sbitmap_queue, we update the wakeup batch size,
but we don't update the wait count in the struct sbq_wait_states. If we
resized down from a size which could use a bigger batch size, these
counts could be too large and cause us to miss necessary wakeups. To fix
this, update the wait counts when we resize (ensuring some careful
memory ordering so that it's safe w.r.t. concurrent clears).
This also fixes a theoretical issue where two threads could end up
bumping the wait count up by the batch size, which could also
potentially lead to hangs.
Reported-by: Martin Raiber <martin@urbackup.org>
Fixes: e3a2b3f931f5 ("blk-mq: allow changing of queue depth through sysfs")
Fixes: 2971c35f3588 ("blk-mq: bitmap tag: fix race on blk_mq_bitmap_tags::wake_cnt")
Signed-off-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
Diffstat (limited to 'lib/sbitmap.c')
-rw-r--r-- | lib/sbitmap.c | 35 |
1 files changed, 30 insertions, 5 deletions
diff --git a/lib/sbitmap.c b/lib/sbitmap.c index df4e472..8f5c3b2 100644 --- a/lib/sbitmap.c +++ b/lib/sbitmap.c @@ -239,7 +239,19 @@ EXPORT_SYMBOL_GPL(sbitmap_queue_init_node); void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth) { - sbq->wake_batch = sbq_calc_wake_batch(depth); + unsigned int wake_batch = sbq_calc_wake_batch(depth); + int i; + + if (sbq->wake_batch != wake_batch) { + WRITE_ONCE(sbq->wake_batch, wake_batch); + /* + * Pairs with the memory barrier in sbq_wake_up() to ensure that + * the batch size is updated before the wait counts. + */ + smp_mb__before_atomic(); + for (i = 0; i < SBQ_WAIT_QUEUES; i++) + atomic_set(&sbq->ws[i].wait_cnt, 1); + } sbitmap_resize(&sbq->sb, depth); } EXPORT_SYMBOL_GPL(sbitmap_queue_resize); @@ -297,6 +309,7 @@ static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq) static void sbq_wake_up(struct sbitmap_queue *sbq) { struct sbq_wait_state *ws; + unsigned int wake_batch; int wait_cnt; /* @@ -313,10 +326,22 @@ static void sbq_wake_up(struct sbitmap_queue *sbq) return; wait_cnt = atomic_dec_return(&ws->wait_cnt); - if (unlikely(wait_cnt < 0)) - wait_cnt = atomic_inc_return(&ws->wait_cnt); - if (wait_cnt == 0) { - atomic_add(sbq->wake_batch, &ws->wait_cnt); + if (wait_cnt <= 0) { + wake_batch = READ_ONCE(sbq->wake_batch); + /* + * Pairs with the memory barrier in sbitmap_queue_resize() to + * ensure that we see the batch size update before the wait + * count is reset. + */ + smp_mb__before_atomic(); + /* + * If there are concurrent callers to sbq_wake_up(), the last + * one to decrement the wait count below zero will bump it back + * up. If there is a concurrent resize, the count reset will + * either cause the cmpxchg to fail or overwrite after the + * cmpxchg. + */ + atomic_cmpxchg(&ws->wait_cnt, wait_cnt, wait_cnt + wake_batch); sbq_index_atomic_inc(&sbq->wake_index); wake_up(&ws->wait); } |