From f3da54ba140c6427fa4a32913e1bf406f41b5dda Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 13 Sep 2007 14:26:53 +0200 Subject: Fix race with shared tag queue maps There's a race condition in blk_queue_end_tag() for shared tag maps, users include stex (promise supertrak thingy) and qla2xxx. The former at least has reported bugs in this area, not sure why we haven't seen any for the latter. It could be because the window is narrow and that other conditions in the qla2xxx code hide this. It's a real bug, though, as the stex smp users can attest. We need to ensure two things - the tag bit clearing needs to happen AFTER we cleared the tag pointer, as the tag bit clearing/setting is what protects this map. Secondly, we need to ensure that the visibility of the tag pointer and tag bit clear are ordered properly. [ I removed the SMP barriers - "test_and_clear_bit()" already implies all the required barriers. -- Linus ] Also see http://bugzilla.kernel.org/show_bug.cgi?id=7842 Signed-off-by: Jens Axboe Signed-off-by: Linus Torvalds --- block/ll_rw_blk.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'block/ll_rw_blk.c') diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index a15845c..cd20367 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c @@ -1075,12 +1075,6 @@ void blk_queue_end_tag(struct request_queue *q, struct request *rq) */ return; - if (unlikely(!__test_and_clear_bit(tag, bqt->tag_map))) { - printk(KERN_ERR "%s: attempt to clear non-busy tag (%d)\n", - __FUNCTION__, tag); - return; - } - list_del_init(&rq->queuelist); rq->cmd_flags &= ~REQ_QUEUED; rq->tag = -1; @@ -1090,6 +1084,13 @@ void blk_queue_end_tag(struct request_queue *q, struct request *rq) __FUNCTION__, tag); bqt->tag_index[tag] = NULL; + + if (unlikely(!test_and_clear_bit(tag, bqt->tag_map))) { + printk(KERN_ERR "%s: attempt to clear non-busy tag (%d)\n", + __FUNCTION__, tag); + return; + } + bqt->busy--; } -- cgit v1.1 From dd941252a81b02b5915e2db160fe02c972875846 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 14 Sep 2007 08:41:12 +0200 Subject: shared tag queue barrier comment Should add some comments for the tag barriers (they won't be so important if we can switch over to the explicit _lock bitops, but for now we should make it clear). Jens' original patch said a barrier after the test_and_clear_bit was also required. I can't see why (and it would prevent the use of the _lock bitop). Acked-by: Jens Axboe Signed-off-by: Linus Torvalds -- --- block/ll_rw_blk.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'block/ll_rw_blk.c') diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index cd20367..ed39313 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c @@ -1085,6 +1085,12 @@ void blk_queue_end_tag(struct request_queue *q, struct request *rq) bqt->tag_index[tag] = NULL; + /* + * We use test_and_clear_bit's memory ordering properties here. + * The tag_map bit acts as a lock for tag_index[bit], so we need + * a barrer before clearing the bit (precisely: release semantics). + * Could use clear_bit_unlock when it is merged. + */ if (unlikely(!test_and_clear_bit(tag, bqt->tag_map))) { printk(KERN_ERR "%s: attempt to clear non-busy tag (%d)\n", __FUNCTION__, tag); @@ -1137,6 +1143,10 @@ int blk_queue_start_tag(struct request_queue *q, struct request *rq) return 1; } while (test_and_set_bit(tag, bqt->tag_map)); + /* + * We rely on test_and_set_bit providing lock memory ordering semantics + * (could use test_and_set_bit_lock when it is merged). + */ rq->cmd_flags |= REQ_QUEUED; rq->tag = tag; -- cgit v1.1