summaryrefslogtreecommitdiffstats
path: root/sys/dev/isci/isci_controller.c
diff options
context:
space:
mode:
authorjimharris <jimharris@FreeBSD.org>2012-05-21 22:54:33 +0000
committerjimharris <jimharris@FreeBSD.org>2012-05-21 22:54:33 +0000
commit1acd03e35eb0ed156d3395b41f920585e773315f (patch)
treeabdafc7548dbd957f84cf3e1b096b99637169f8d /sys/dev/isci/isci_controller.c
parent8d75cfbaa616698128a0fd12926ca3c85b6bd21c (diff)
downloadFreeBSD-src-1acd03e35eb0ed156d3395b41f920585e773315f.zip
FreeBSD-src-1acd03e35eb0ed156d3395b41f920585e773315f.tar.gz
Wait until completion context unwinds before retrying CCBs that have been
queued internally. This works around issue in the isci HAL where it cannot accept new I/O to a device after a resetting->ready state transition until the completion context has unwound. This issue was found by submitting non-tagged CCBs through pass(4) interface to a SATA disk with an extremely small timeout value (5ms). This would trigger internal resets with I/O in the isci(4) internal queues. The small timeout value had not been intentional (and original reporter has since changed his test to use 5sec instead), but it did uncover this corner case that would result in a hung disk. Sponsored by: Intel Reported and tested by: Ravi Pokala <rpokala at panasas dot com> Reviewed by: scottl (earlier version) MFC after: 1 week
Diffstat (limited to 'sys/dev/isci/isci_controller.c')
-rw-r--r--sys/dev/isci/isci_controller.c47
1 files changed, 47 insertions, 0 deletions
diff --git a/sys/dev/isci/isci_controller.c b/sys/dev/isci/isci_controller.c
index a4b7cfe..49785cb 100644
--- a/sys/dev/isci/isci_controller.c
+++ b/sys/dev/isci/isci_controller.c
@@ -201,6 +201,7 @@ void isci_controller_construct(struct ISCI_CONTROLLER *controller,
controller->is_started = FALSE;
controller->is_frozen = FALSE;
+ controller->release_queued_ccbs = FALSE;
controller->sim = NULL;
controller->initial_discovery_mask = 0;
@@ -431,6 +432,8 @@ int isci_controller_allocate_memory(struct ISCI_CONTROLLER *controller)
sci_fast_list_element_init(remote_device,
&remote_device->pending_device_reset_element);
TAILQ_INIT(&remote_device->queued_ccbs);
+ remote_device->release_queued_ccb = FALSE;
+ remote_device->queued_ccb_in_progress = NULL;
/*
* For the first SCI_MAX_DOMAINS device objects, do not put
@@ -694,3 +697,47 @@ void isci_action(struct cam_sim *sim, union ccb *ccb)
}
}
+/*
+ * Unfortunately, SCIL doesn't cleanly handle retry conditions.
+ * CAM_REQUEUE_REQ works only when no one is using the pass(4) interface. So
+ * when SCIL denotes an I/O needs to be retried (typically because of mixing
+ * tagged/non-tagged ATA commands, or running out of NCQ slots), we queue
+ * these I/O internally. Once SCIL completes an I/O to this device, or we get
+ * a ready notification, we will retry the first I/O on the queue.
+ * Unfortunately, SCIL also doesn't cleanly handle starting the new I/O within
+ * the context of the completion handler, so we need to retry these I/O after
+ * the completion handler is done executing.
+ */
+void
+isci_controller_release_queued_ccbs(struct ISCI_CONTROLLER *controller)
+{
+ struct ISCI_REMOTE_DEVICE *dev;
+ struct ccb_hdr *ccb_h;
+ int dev_idx;
+
+ KASSERT(mtx_owned(&controller->lock), ("controller lock not owned"));
+
+ controller->release_queued_ccbs = FALSE;
+ for (dev_idx = 0;
+ dev_idx < SCI_MAX_REMOTE_DEVICES;
+ dev_idx++) {
+
+ dev = controller->remote_device[dev_idx];
+ if (dev != NULL &&
+ dev->release_queued_ccb == TRUE &&
+ dev->queued_ccb_in_progress == NULL) {
+ dev->release_queued_ccb = FALSE;
+ ccb_h = TAILQ_FIRST(&dev->queued_ccbs);
+
+ if (ccb_h == NULL)
+ continue;
+
+ isci_log_message(1, "ISCI", "release %p %x\n", ccb_h,
+ ((union ccb *)ccb_h)->csio.cdb_io.cdb_bytes[0]);
+
+ dev->queued_ccb_in_progress = (union ccb *)ccb_h;
+ isci_io_request_execute_scsi_io(
+ (union ccb *)ccb_h, controller);
+ }
+ }
+}
OpenPOWER on IntegriCloud