From c1c201200a359cf3b6e2e36a4236cdca77a3cd8e Mon Sep 17 00:00:00 2001
From: Boaz Harrosh <bharrosh@panasas.com>
Date: Tue, 3 Feb 2009 07:47:29 +0100
Subject: bsg: Fix sense buffer bug in SG_IO

When submitting requests via SG_IO, which does a sync io, a
bsg_command is not allocated. So an in-Kernel sense_buffer was not
set. However when calling blk_execute_rq() with no sense buffer
one is provided from the stack. Now bsg at blk_complete_sgv4_hdr_rq()
would check if rq->sense_len and a sense was requested by sg_io_v4
the rq->sense was copy_user() back, but by now it is already mangled
stack memory.

I have fixed that by forcing a sense_buffer when calling bsg_map_hdr().
The bsg_command->sense is provided in the write/read path like before,
and on-the-stack buffer is provided when doing SG_IO.

I have also fixed a dprintk message to print rq->errors in hex because
of the scsi bit-field use of this member. For other block devices it
does not matter anyway.

Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
Acked-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/bsg.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

(limited to 'block')

diff --git a/block/bsg.c b/block/bsg.c
index d414bb5..0ce8806 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -244,7 +244,8 @@ bsg_validate_sgv4_hdr(struct request_queue *q, struct sg_io_v4 *hdr, int *rw)
  * map sg_io_v4 to a request.
  */
 static struct request *
-bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t has_write_perm)
+bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t has_write_perm,
+	    u8 *sense)
 {
 	struct request_queue *q = bd->queue;
 	struct request *rq, *next_rq = NULL;
@@ -306,6 +307,10 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t has_write_perm)
 		if (ret)
 			goto out;
 	}
+
+	rq->sense = sense;
+	rq->sense_len = 0;
+
 	return rq;
 out:
 	if (rq->cmd != rq->__cmd)
@@ -348,9 +353,6 @@ static void bsg_rq_end_io(struct request *rq, int uptodate)
 static void bsg_add_command(struct bsg_device *bd, struct request_queue *q,
 			    struct bsg_command *bc, struct request *rq)
 {
-	rq->sense = bc->sense;
-	rq->sense_len = 0;
-
 	/*
 	 * add bc command to busy queue and submit rq for io
 	 */
@@ -419,7 +421,7 @@ static int blk_complete_sgv4_hdr_rq(struct request *rq, struct sg_io_v4 *hdr,
 {
 	int ret = 0;
 
-	dprintk("rq %p bio %p %u\n", rq, bio, rq->errors);
+	dprintk("rq %p bio %p 0x%x\n", rq, bio, rq->errors);
 	/*
 	 * fill in all the output members
 	 */
@@ -635,7 +637,7 @@ static int __bsg_write(struct bsg_device *bd, const char __user *buf,
 		/*
 		 * get a request, fill in the blanks, and add to request queue
 		 */
-		rq = bsg_map_hdr(bd, &bc->hdr, has_write_perm);
+		rq = bsg_map_hdr(bd, &bc->hdr, has_write_perm, bc->sense);
 		if (IS_ERR(rq)) {
 			ret = PTR_ERR(rq);
 			rq = NULL;
@@ -922,11 +924,12 @@ static long bsg_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 		struct request *rq;
 		struct bio *bio, *bidi_bio = NULL;
 		struct sg_io_v4 hdr;
+		u8 sense[SCSI_SENSE_BUFFERSIZE];
 
 		if (copy_from_user(&hdr, uarg, sizeof(hdr)))
 			return -EFAULT;
 
-		rq = bsg_map_hdr(bd, &hdr, file->f_mode & FMODE_WRITE);
+		rq = bsg_map_hdr(bd, &hdr, file->f_mode & FMODE_WRITE, sense);
 		if (IS_ERR(rq))
 			return PTR_ERR(rq);
 
-- 
cgit v1.1


From 93dbb393503d53cd226e5e1f0088fe8f4dbaa2b8 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Mon, 16 Feb 2009 10:25:40 +0100
Subject: block: fix bad definition of BIO_RW_SYNC

We can't OR shift values, so get rid of BIO_RW_SYNC and use BIO_RW_SYNCIO
and BIO_RW_UNPLUG explicitly. This brings back the behaviour from before
213d9417fec62ef4c3675621b9364a667954d4dd.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blktrace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'block')

diff --git a/block/blktrace.c b/block/blktrace.c
index 39cc3bf..7cf9d1f 100644
--- a/block/blktrace.c
+++ b/block/blktrace.c
@@ -142,7 +142,7 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
 
 	what |= ddir_act[rw & WRITE];
 	what |= MASK_TC_BIT(rw, BARRIER);
-	what |= MASK_TC_BIT(rw, SYNC);
+	what |= MASK_TC_BIT(rw, SYNCIO);
 	what |= MASK_TC_BIT(rw, AHEAD);
 	what |= MASK_TC_BIT(rw, META);
 	what |= MASK_TC_BIT(rw, DISCARD);
-- 
cgit v1.1


From 41b8c853a495438208faa5be03bbb0050859163b Mon Sep 17 00:00:00 2001
From: Neil Brown <neilb@suse.de>
Date: Wed, 18 Feb 2009 10:33:59 +0100
Subject: block: fix booting from partitioned md array

Hi Tejun,

 it looks like your commit:

   block: don't depend on consecutive minor space
   f331c0296f2a9fee0d396a70598b954062603015

 broke a particular case for booting from partitioned md/raid devices.
 That is the second time this has been broken recently.  The previous
 time was fixed by

   block: do_mounts - accept root=<non-existant partition>
   30f2f0eb4bd2c43d10a8b0d872c6e5ad8f31c9a0

 Because the data isn't available when an md device is first created
 (we add disks and set it up after creation), the initial partition
 scan finds nothing.  It is not until the device is opened that
 another partition scan happens and finds something.

 So at the point where the kernel parameter "root=/dev/md_d0p1" is
 being parsed, md_d0 exists, but md_d0p1 does not.
 However if we let blk_lookup_devt return the correct device number
 even though the device doesn't exist, then the attempt to mount it
 will successfully find the partition.

 I have tried in the past to find a way to get the partition table to
 be read as soon as the array is assembled but that proved impossible
 (at the time).  I don't remember the details, and could possibly
 revisit it.  However it would be really nice if blk_lookup_devt
 could be adjusted to again accept non existant partitions.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/genhd.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'block')

diff --git a/block/genhd.c b/block/genhd.c
index 397960c..e1eadcc 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1087,6 +1087,14 @@ dev_t blk_lookup_devt(const char *name, int partno)
 		if (strcmp(dev_name(dev), name))
 			continue;
 
+		if (partno < disk->minors) {
+			/* We need to return the right devno, even
+			 * if the partition doesn't exist yet.
+			 */
+			devt = MKDEV(MAJOR(dev->devt),
+				     MINOR(dev->devt) + partno);
+			break;
+		}
 		part = disk_get_part(disk, partno);
 		if (part) {
 			devt = part_devt(part);
-- 
cgit v1.1


From be987fdb55a4726e2fcbab7501f89276bdb57288 Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@suse.de>
Date: Wed, 18 Feb 2009 10:30:15 +0100
Subject: block: fix deadlock in blk_abort_queue() for drivers that readd to
 timeout list

blk_abort_queue() iterates the timeout list and aborts each request on the
list, but if the driver error handling readds a request to the timeout list
during this processing, we could be looping forever. Fix this by splicing
current entries to a local list and run over that list instead.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-timeout.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'block')

diff --git a/block/blk-timeout.c b/block/blk-timeout.c
index a095353..bbbdc4b 100644
--- a/block/blk-timeout.c
+++ b/block/blk-timeout.c
@@ -209,12 +209,19 @@ void blk_abort_queue(struct request_queue *q)
 {
 	unsigned long flags;
 	struct request *rq, *tmp;
+	LIST_HEAD(list);
 
 	spin_lock_irqsave(q->queue_lock, flags);
 
 	elv_abort_queue(q);
 
-	list_for_each_entry_safe(rq, tmp, &q->timeout_list, timeout_list)
+	/*
+	 * Splice entries to local list, to avoid deadlocking if entries
+	 * get readded to the timeout list by error handling
+	 */
+	list_splice_init(&q->timeout_list, &list);
+
+	list_for_each_entry_safe(rq, tmp, &list, timeout_list)
 		blk_abort_request(rq);
 
 	spin_unlock_irqrestore(q->queue_lock, flags);
-- 
cgit v1.1