diff options
-rw-r--r-- | block/ll_rw_blk.c | 106 | ||||
-rw-r--r-- | drivers/block/cciss.c | 72 | ||||
-rw-r--r-- | drivers/ide/ide-io.c | 42 | ||||
-rw-r--r-- | drivers/ide/ide-probe.c | 2 | ||||
-rw-r--r-- | drivers/scsi/scsi.c | 109 | ||||
-rw-r--r-- | drivers/scsi/scsi_lib.c | 36 | ||||
-rw-r--r-- | drivers/scsi/scsi_priv.h | 1 | ||||
-rw-r--r-- | include/linux/blkdev.h | 21 | ||||
-rw-r--r-- | include/linux/ide.h | 1 | ||||
-rw-r--r-- | include/linux/interrupt.h | 2 |
10 files changed, 253 insertions, 139 deletions
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index c44d6fe..8e27d0a 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c @@ -26,6 +26,8 @@ #include <linux/slab.h> #include <linux/swap.h> #include <linux/writeback.h> +#include <linux/interrupt.h> +#include <linux/cpu.h> /* * for max sense size @@ -61,13 +63,15 @@ static wait_queue_head_t congestion_wqh[2] = { /* * Controlling structure to kblockd */ -static struct workqueue_struct *kblockd_workqueue; +static struct workqueue_struct *kblockd_workqueue; unsigned long blk_max_low_pfn, blk_max_pfn; EXPORT_SYMBOL(blk_max_low_pfn); EXPORT_SYMBOL(blk_max_pfn); +static DEFINE_PER_CPU(struct list_head, blk_cpu_done); + /* Amount of time in which a process may batch requests */ #define BLK_BATCH_TIME (HZ/50UL) @@ -206,6 +210,13 @@ void blk_queue_merge_bvec(request_queue_t *q, merge_bvec_fn *mbfn) EXPORT_SYMBOL(blk_queue_merge_bvec); +void blk_queue_softirq_done(request_queue_t *q, softirq_done_fn *fn) +{ + q->softirq_done_fn = fn; +} + +EXPORT_SYMBOL(blk_queue_softirq_done); + /** * blk_queue_make_request - define an alternate make_request function for a device * @q: the request queue for the device to be affected @@ -269,6 +280,7 @@ EXPORT_SYMBOL(blk_queue_make_request); static inline void rq_init(request_queue_t *q, struct request *rq) { INIT_LIST_HEAD(&rq->queuelist); + INIT_LIST_HEAD(&rq->donelist); rq->errors = 0; rq->rq_status = RQ_ACTIVE; @@ -285,6 +297,7 @@ static inline void rq_init(request_queue_t *q, struct request *rq) rq->sense = NULL; rq->end_io = NULL; rq->end_io_data = NULL; + rq->completion_data = NULL; } /** @@ -3262,6 +3275,87 @@ int end_that_request_chunk(struct request *req, int uptodate, int nr_bytes) EXPORT_SYMBOL(end_that_request_chunk); /* + * splice the completion data to a local structure and hand off to + * process_completion_queue() to complete the requests + */ +static void blk_done_softirq(struct softirq_action *h) +{ + struct list_head *cpu_list; + LIST_HEAD(local_list); + + local_irq_disable(); + cpu_list = &__get_cpu_var(blk_cpu_done); + list_splice_init(cpu_list, &local_list); + local_irq_enable(); + + while (!list_empty(&local_list)) { + struct request *rq = list_entry(local_list.next, struct request, donelist); + + list_del_init(&rq->donelist); + rq->q->softirq_done_fn(rq); + } +} + +#ifdef CONFIG_HOTPLUG_CPU + +static int blk_cpu_notify(struct notifier_block *self, unsigned long action, + void *hcpu) +{ + /* + * If a CPU goes away, splice its entries to the current CPU + * and trigger a run of the softirq + */ + if (action == CPU_DEAD) { + int cpu = (unsigned long) hcpu; + + local_irq_disable(); + list_splice_init(&per_cpu(blk_cpu_done, cpu), + &__get_cpu_var(blk_cpu_done)); + raise_softirq_irqoff(BLOCK_SOFTIRQ); + local_irq_enable(); + } + + return NOTIFY_OK; +} + + +static struct notifier_block __devinitdata blk_cpu_notifier = { + .notifier_call = blk_cpu_notify, +}; + +#endif /* CONFIG_HOTPLUG_CPU */ + +/** + * blk_complete_request - end I/O on a request + * @req: the request being processed + * + * Description: + * Ends all I/O on a request. It does not handle partial completions, + * unless the driver actually implements this in its completionc callback + * through requeueing. Theh actual completion happens out-of-order, + * through a softirq handler. The user must have registered a completion + * callback through blk_queue_softirq_done(). + **/ + +void blk_complete_request(struct request *req) +{ + struct list_head *cpu_list; + unsigned long flags; + + BUG_ON(!req->q->softirq_done_fn); + + local_irq_save(flags); + + cpu_list = &__get_cpu_var(blk_cpu_done); + list_add_tail(&req->donelist, cpu_list); + raise_softirq_irqoff(BLOCK_SOFTIRQ); + + local_irq_restore(flags); +} + +EXPORT_SYMBOL(blk_complete_request); + +/* * queue lock must be held */ void end_that_request_last(struct request *req, int uptodate) @@ -3339,6 +3433,8 @@ EXPORT_SYMBOL(kblockd_flush); int __init blk_dev_init(void) { + int i; + kblockd_workqueue = create_workqueue("kblockd"); if (!kblockd_workqueue) panic("Failed to create kblockd\n"); @@ -3352,6 +3448,14 @@ int __init blk_dev_init(void) iocontext_cachep = kmem_cache_create("blkdev_ioc", sizeof(struct io_context), 0, SLAB_PANIC, NULL, NULL); + for (i = 0; i < NR_CPUS; i++) + INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i)); + + open_softirq(BLOCK_SOFTIRQ, blk_done_softirq, NULL); +#ifdef CONFIG_HOTPLUG_CPU + register_cpu_notifier(&blk_cpu_notifier); +#endif + blk_max_low_pfn = max_low_pfn; blk_max_pfn = max_pfn; diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 88452c7..e4e9f25 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -2178,16 +2178,48 @@ static inline void resend_cciss_cmd( ctlr_info_t *h, CommandList_struct *c) start_io(h); } + +static void cciss_softirq_done(struct request *rq) +{ + CommandList_struct *cmd = rq->completion_data; + ctlr_info_t *h = hba[cmd->ctlr]; + u64bit temp64; + int i, ddir; + + if (cmd->Request.Type.Direction == XFER_READ) + ddir = PCI_DMA_FROMDEVICE; + else + ddir = PCI_DMA_TODEVICE; + + /* command did not need to be retried */ + /* unmap the DMA mapping for all the scatter gather elements */ + for(i=0; i<cmd->Header.SGList; i++) { + temp64.val32.lower = cmd->SG[i].Addr.lower; + temp64.val32.upper = cmd->SG[i].Addr.upper; + pci_unmap_page(h->pdev, temp64.val, cmd->SG[i].Len, ddir); + } + + complete_buffers(rq->bio, rq->errors); + +#ifdef CCISS_DEBUG + printk("Done with %p\n", rq); +#endif /* CCISS_DEBUG */ + + spin_lock_irq(&h->lock); + end_that_request_last(rq, rq->errors); + cmd_free(h, cmd,1); + spin_unlock_irq(&h->lock); +} + /* checks the status of the job and calls complete buffers to mark all - * buffers for the completed job. + * buffers for the completed job. Note that this function does not need + * to hold the hba/queue lock. */ static inline void complete_command( ctlr_info_t *h, CommandList_struct *cmd, int timeout) { int status = 1; - int i; int retry_cmd = 0; - u64bit temp64; if (timeout) status = 0; @@ -2295,24 +2327,10 @@ static inline void complete_command( ctlr_info_t *h, CommandList_struct *cmd, resend_cciss_cmd(h,cmd); return; } - /* command did not need to be retried */ - /* unmap the DMA mapping for all the scatter gather elements */ - for(i=0; i<cmd->Header.SGList; i++) { - temp64.val32.lower = cmd->SG[i].Addr.lower; - temp64.val32.upper = cmd->SG[i].Addr.upper; - pci_unmap_page(hba[cmd->ctlr]->pdev, - temp64.val, cmd->SG[i].Len, - (cmd->Request.Type.Direction == XFER_READ) ? - PCI_DMA_FROMDEVICE : PCI_DMA_TODEVICE); - } - complete_buffers(cmd->rq->bio, status); - -#ifdef CCISS_DEBUG - printk("Done with %p\n", cmd->rq); -#endif /* CCISS_DEBUG */ - end_that_request_last(cmd->rq, status ? 1 : -EIO); - cmd_free(h,cmd,1); + cmd->rq->completion_data = cmd; + cmd->rq->errors = status; + blk_complete_request(cmd->rq); } /* @@ -3199,15 +3217,17 @@ static int __devinit cciss_init_one(struct pci_dev *pdev, drv->queue = q; q->backing_dev_info.ra_pages = READ_AHEAD; - blk_queue_bounce_limit(q, hba[i]->pdev->dma_mask); + blk_queue_bounce_limit(q, hba[i]->pdev->dma_mask); + + /* This is a hardware imposed limit. */ + blk_queue_max_hw_segments(q, MAXSGENTRIES); - /* This is a hardware imposed limit. */ - blk_queue_max_hw_segments(q, MAXSGENTRIES); + /* This is a limit in the driver and could be eliminated. */ + blk_queue_max_phys_segments(q, MAXSGENTRIES); - /* This is a limit in the driver and could be eliminated. */ - blk_queue_max_phys_segments(q, MAXSGENTRIES); + blk_queue_max_sectors(q, 512); - blk_queue_max_sectors(q, 512); + blk_queue_softirq_done(q, cciss_softirq_done); q->queuedata = hba[i]; sprintf(disk->disk_name, "cciss/c%dd%d", i, j); diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index b5dc6df..dea2d4d 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -55,9 +55,22 @@ #include <asm/io.h> #include <asm/bitops.h> +void ide_softirq_done(struct request *rq) +{ + request_queue_t *q = rq->q; + + add_disk_randomness(rq->rq_disk); + end_that_request_chunk(rq, rq->errors, rq->data_len); + + spin_lock_irq(q->queue_lock); + end_that_request_last(rq, rq->errors); + spin_unlock_irq(q->queue_lock); +} + int __ide_end_request(ide_drive_t *drive, struct request *rq, int uptodate, int nr_sectors) { + unsigned int nbytes; int ret = 1; BUG_ON(!(rq->flags & REQ_STARTED)); @@ -81,17 +94,28 @@ int __ide_end_request(ide_drive_t *drive, struct request *rq, int uptodate, HWGROUP(drive)->hwif->ide_dma_on(drive); } - if (!end_that_request_first(rq, uptodate, nr_sectors)) { - add_disk_randomness(rq->rq_disk); - - if (blk_rq_tagged(rq)) - blk_queue_end_tag(drive->queue, rq); - + /* + * For partial completions (or non fs/pc requests), use the regular + * direct completion path. + */ + nbytes = nr_sectors << 9; + if (rq_all_done(rq, nbytes)) { + rq->errors = uptodate; + rq->data_len = nbytes; blkdev_dequeue_request(rq); HWGROUP(drive)->rq = NULL; - end_that_request_last(rq, uptodate); + blk_complete_request(rq); ret = 0; + } else { + if (!end_that_request_first(rq, uptodate, nr_sectors)) { + add_disk_randomness(rq->rq_disk); + blkdev_dequeue_request(rq); + HWGROUP(drive)->rq = NULL; + end_that_request_last(rq, uptodate); + ret = 0; + } } + return ret; } EXPORT_SYMBOL(__ide_end_request); @@ -113,6 +137,10 @@ int ide_end_request (ide_drive_t *drive, int uptodate, int nr_sectors) unsigned long flags; int ret = 1; + /* + * room for locking improvements here, the calls below don't + * need the queue lock held at all + */ spin_lock_irqsave(&ide_lock, flags); rq = HWGROUP(drive)->rq; diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c index 02167a5..1ddaa71 100644 --- a/drivers/ide/ide-probe.c +++ b/drivers/ide/ide-probe.c @@ -1011,6 +1011,8 @@ static int ide_init_queue(ide_drive_t *drive) blk_queue_max_hw_segments(q, max_sg_entries); blk_queue_max_phys_segments(q, max_sg_entries); + blk_queue_softirq_done(q, ide_softirq_done); + /* assign drive queue */ drive->queue = q; diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index 180676d..ee5f4df 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -69,7 +69,6 @@ #include "scsi_logging.h" static void scsi_done(struct scsi_cmnd *cmd); -static int scsi_retry_command(struct scsi_cmnd *cmd); /* * Definitions and constants. @@ -752,7 +751,7 @@ static void scsi_done(struct scsi_cmnd *cmd) * isn't running --- used by scsi_times_out */ void __scsi_done(struct scsi_cmnd *cmd) { - unsigned long flags; + struct request *rq = cmd->request; /* * Set the serial numbers back to zero @@ -763,71 +762,14 @@ void __scsi_done(struct scsi_cmnd *cmd) if (cmd->result) atomic_inc(&cmd->device->ioerr_cnt); + BUG_ON(!rq); + /* - * Next, enqueue the command into the done queue. - * It is a per-CPU queue, so we just disable local interrupts - * and need no spinlock. + * The uptodate/nbytes values don't matter, as we allow partial + * completes and thus will check this in the softirq callback */ - local_irq_save(flags); - list_add_tail(&cmd->eh_entry, &__get_cpu_var(scsi_done_q)); - raise_softirq_irqoff(SCSI_SOFTIRQ); - local_irq_restore(flags); -} - -/** - * scsi_softirq - Perform post-interrupt processing of finished SCSI commands. - * - * This is the consumer of the done queue. - * - * This is called with all interrupts enabled. This should reduce - * interrupt latency, stack depth, and reentrancy of the low-level - * drivers. - */ -static void scsi_softirq(struct softirq_action *h) -{ - int disposition; - LIST_HEAD(local_q); - - local_irq_disable(); - list_splice_init(&__get_cpu_var(scsi_done_q), &local_q); - local_irq_enable(); - - while (!list_empty(&local_q)) { - struct scsi_cmnd *cmd = list_entry(local_q.next, - struct scsi_cmnd, eh_entry); - /* The longest time any command should be outstanding is the - * per command timeout multiplied by the number of retries. - * - * For a typical command, this is 2.5 minutes */ - unsigned long wait_for - = cmd->allowed * cmd->timeout_per_command; - list_del_init(&cmd->eh_entry); - - disposition = scsi_decide_disposition(cmd); - if (disposition != SUCCESS && - time_before(cmd->jiffies_at_alloc + wait_for, jiffies)) { - sdev_printk(KERN_ERR, cmd->device, - "timing out command, waited %lus\n", - wait_for/HZ); - disposition = SUCCESS; - } - - scsi_log_completion(cmd, disposition); - switch (disposition) { - case SUCCESS: - scsi_finish_command(cmd); - break; - case NEEDS_RETRY: - scsi_retry_command(cmd); - break; - case ADD_TO_MLQUEUE: - scsi_queue_insert(cmd, SCSI_MLQUEUE_DEVICE_BUSY); - break; - default: - if (!scsi_eh_scmd_add(cmd, 0)) - scsi_finish_command(cmd); - } - } + rq->completion_data = cmd; + blk_complete_request(rq); } /* @@ -840,7 +782,7 @@ static void scsi_softirq(struct softirq_action *h) * level drivers should not become re-entrant as a result of * this. */ -static int scsi_retry_command(struct scsi_cmnd *cmd) +int scsi_retry_command(struct scsi_cmnd *cmd) { /* * Restore the SCSI command state. @@ -1273,38 +1215,6 @@ int scsi_device_cancel(struct scsi_device *sdev, int recovery) } EXPORT_SYMBOL(scsi_device_cancel); -#ifdef CONFIG_HOTPLUG_CPU -static int scsi_cpu_notify(struct notifier_block *self, - unsigned long action, void *hcpu) -{ - int cpu = (unsigned long)hcpu; - - switch(action) { - case CPU_DEAD: - /* Drain scsi_done_q. */ - local_irq_disable(); - list_splice_init(&per_cpu(scsi_done_q, cpu), - &__get_cpu_var(scsi_done_q)); - raise_softirq_irqoff(SCSI_SOFTIRQ); - local_irq_enable(); - break; - default: - break; - } - return NOTIFY_OK; -} - -static struct notifier_block __devinitdata scsi_cpu_nb = { - .notifier_call = scsi_cpu_notify, -}; - -#define register_scsi_cpu() register_cpu_notifier(&scsi_cpu_nb) -#define unregister_scsi_cpu() unregister_cpu_notifier(&scsi_cpu_nb) -#else -#define register_scsi_cpu() -#define unregister_scsi_cpu() -#endif /* CONFIG_HOTPLUG_CPU */ - MODULE_DESCRIPTION("SCSI core"); MODULE_LICENSE("GPL"); @@ -1338,8 +1248,6 @@ static int __init init_scsi(void) INIT_LIST_HEAD(&per_cpu(scsi_done_q, i)); devfs_mk_dir("scsi"); - open_softirq(SCSI_SOFTIRQ, scsi_softirq, NULL); - register_scsi_cpu(); printk(KERN_NOTICE "SCSI subsystem initialized\n"); return 0; @@ -1367,7 +1275,6 @@ static void __exit exit_scsi(void) devfs_remove("scsi"); scsi_exit_procfs(); scsi_exit_queue(); - unregister_scsi_cpu(); } subsys_initcall(init_scsi); diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index ba93d6e..00c9bf3 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1493,6 +1493,41 @@ static void scsi_kill_request(struct request *req, request_queue_t *q) __scsi_done(cmd); } +static void scsi_softirq_done(struct request *rq) +{ + struct scsi_cmnd *cmd = rq->completion_data; + unsigned long wait_for = cmd->allowed * cmd->timeout_per_command; + int disposition; + + INIT_LIST_HEAD(&cmd->eh_entry); + + disposition = scsi_decide_disposition(cmd); + if (disposition != SUCCESS && + time_before(cmd->jiffies_at_alloc + wait_for, jiffies)) { + sdev_printk(KERN_ERR, cmd->device, + "timing out command, waited %lus\n", + wait_for/HZ); + disposition = SUCCESS; + } + + scsi_log_completion(cmd, disposition); + + switch (disposition) { + case SUCCESS: + scsi_finish_command(cmd); + break; + case NEEDS_RETRY: + scsi_retry_command(cmd); + break; + case ADD_TO_MLQUEUE: + scsi_queue_insert(cmd, SCSI_MLQUEUE_DEVICE_BUSY); + break; + default: + if (!scsi_eh_scmd_add(cmd, 0)) + scsi_finish_command(cmd); + } +} + /* * Function: scsi_request_fn() * @@ -1667,6 +1702,7 @@ struct request_queue *scsi_alloc_queue(struct scsi_device *sdev) blk_queue_bounce_limit(q, scsi_calculate_bounce_limit(shost)); blk_queue_segment_boundary(q, shost->dma_boundary); blk_queue_issue_flush_fn(q, scsi_issue_flush_fn); + blk_queue_softirq_done(q, scsi_softirq_done); if (!shost->use_clustering) clear_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags); diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h index f04e7e1..14a6198 100644 --- a/drivers/scsi/scsi_priv.h +++ b/drivers/scsi/scsi_priv.h @@ -44,6 +44,7 @@ extern void scsi_init_cmd_from_req(struct scsi_cmnd *cmd, struct scsi_request *sreq); extern void __scsi_release_request(struct scsi_request *sreq); extern void __scsi_done(struct scsi_cmnd *cmd); +extern int scsi_retry_command(struct scsi_cmnd *cmd); #ifdef CONFIG_SCSI_LOGGING void scsi_log_send(struct scsi_cmnd *cmd); void scsi_log_completion(struct scsi_cmnd *cmd, int disposition); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 96b2339..02a585f 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -118,9 +118,9 @@ struct request_list { * try to put the fields that are referenced together in the same cacheline */ struct request { - struct list_head queuelist; /* looking for ->queue? you must _not_ - * access it directly, use - * blkdev_dequeue_request! */ + struct list_head queuelist; + struct list_head donelist; + unsigned long flags; /* see REQ_ bits below */ /* Maintain bio traversal state for part by part I/O submission. @@ -141,6 +141,7 @@ struct request { struct bio *biotail; void *elevator_private; + void *completion_data; unsigned short ioprio; @@ -291,6 +292,7 @@ typedef int (merge_bvec_fn) (request_queue_t *, struct bio *, struct bio_vec *); typedef void (activity_fn) (void *data, int rw); typedef int (issue_flush_fn) (request_queue_t *, struct gendisk *, sector_t *); typedef void (prepare_flush_fn) (request_queue_t *, struct request *); +typedef void (softirq_done_fn)(struct request *); enum blk_queue_state { Queue_down, @@ -332,6 +334,7 @@ struct request_queue activity_fn *activity_fn; issue_flush_fn *issue_flush_fn; prepare_flush_fn *prepare_flush_fn; + softirq_done_fn *softirq_done_fn; /* * Dispatch queue sorting @@ -645,6 +648,17 @@ extern int end_that_request_first(struct request *, int, int); extern int end_that_request_chunk(struct request *, int, int); extern void end_that_request_last(struct request *, int); extern void end_request(struct request *req, int uptodate); +extern void blk_complete_request(struct request *); + +static inline int rq_all_done(struct request *rq, unsigned int nr_bytes) +{ + if (blk_fs_request(rq)) + return (nr_bytes >= (rq->hard_nr_sectors << 9)); + else if (blk_pc_request(rq)) + return nr_bytes >= rq->data_len; + + return 0; +} /* * end_that_request_first/chunk() takes an uptodate argument. we account @@ -693,6 +707,7 @@ extern void blk_queue_segment_boundary(request_queue_t *, unsigned long); extern void blk_queue_prep_rq(request_queue_t *, prep_rq_fn *pfn); extern void blk_queue_merge_bvec(request_queue_t *, merge_bvec_fn *); extern void blk_queue_dma_alignment(request_queue_t *, int); +extern void blk_queue_softirq_done(request_queue_t *, softirq_done_fn *); extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); extern int blk_queue_ordered(request_queue_t *, unsigned, prepare_flush_fn *); extern void blk_queue_issue_flush_fn(request_queue_t *, issue_flush_fn *); diff --git a/include/linux/ide.h b/include/linux/ide.h index 4dd6694..ef8d0cb 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1001,6 +1001,7 @@ extern int noautodma; extern int ide_end_request (ide_drive_t *drive, int uptodate, int nrsecs); extern int __ide_end_request (ide_drive_t *drive, struct request *rq, int uptodate, int nrsecs); +extern void ide_softirq_done(struct request *rq); /* * This is used on exit from the driver to designate the next irq handler diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index e50a95f..2c08fdc 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -112,7 +112,7 @@ enum TIMER_SOFTIRQ, NET_TX_SOFTIRQ, NET_RX_SOFTIRQ, - SCSI_SOFTIRQ, + BLOCK_SOFTIRQ, TASKLET_SOFTIRQ }; |