diff options
author | jimharris <jimharris@FreeBSD.org> | 2012-05-21 22:54:33 +0000 |
---|---|---|
committer | jimharris <jimharris@FreeBSD.org> | 2012-05-21 22:54:33 +0000 |
commit | 1acd03e35eb0ed156d3395b41f920585e773315f (patch) | |
tree | abdafc7548dbd957f84cf3e1b096b99637169f8d /sys/dev/isci/isci_io_request.c | |
parent | 8d75cfbaa616698128a0fd12926ca3c85b6bd21c (diff) | |
download | FreeBSD-src-1acd03e35eb0ed156d3395b41f920585e773315f.zip FreeBSD-src-1acd03e35eb0ed156d3395b41f920585e773315f.tar.gz |
Wait until completion context unwinds before retrying CCBs that have been
queued internally. This works around issue in the isci HAL where it cannot
accept new I/O to a device after a resetting->ready state transition until
the completion context has unwound.
This issue was found by submitting non-tagged CCBs through pass(4) interface
to a SATA disk with an extremely small timeout value (5ms). This would trigger
internal resets with I/O in the isci(4) internal queues.
The small timeout value had not been intentional (and original reporter has
since changed his test to use 5sec instead), but it did uncover this corner
case that would result in a hung disk.
Sponsored by: Intel
Reported and tested by: Ravi Pokala <rpokala at panasas dot com>
Reviewed by: scottl (earlier version)
MFC after: 1 week
Diffstat (limited to 'sys/dev/isci/isci_io_request.c')
-rw-r--r-- | sys/dev/isci/isci_io_request.c | 51 |
1 files changed, 45 insertions, 6 deletions
diff --git a/sys/dev/isci/isci_io_request.c b/sys/dev/isci/isci_io_request.c index 985a2e4..67ed1da 100644 --- a/sys/dev/isci/isci_io_request.c +++ b/sys/dev/isci/isci_io_request.c @@ -223,7 +223,7 @@ isci_io_request_complete(SCI_CONTROLLER_HANDLE_T scif_controller, (struct ISCI_REQUEST *)isci_request); if (complete_ccb) { - if (ccb->ccb_h.status != CAM_REQ_CMP) { + if ((ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) { /* ccb will be completed with some type of non-success * status. So temporarily freeze the queue until the * upper layers can act on the status. The @@ -234,6 +234,26 @@ isci_io_request_complete(SCI_CONTROLLER_HANDLE_T scif_controller, xpt_freeze_devq(ccb->ccb_h.path, 1); } + if (ccb->ccb_h.status & CAM_SIM_QUEUED) { + + KASSERT(ccb == isci_remote_device->queued_ccb_in_progress, + ("multiple internally queued ccbs in flight")); + + TAILQ_REMOVE(&isci_remote_device->queued_ccbs, + &ccb->ccb_h, sim_links.tqe); + ccb->ccb_h.status &= ~CAM_SIM_QUEUED; + + /* + * This CCB that was in the queue was completed, so + * set the in_progress pointer to NULL denoting that + * we can retry another CCB from the queue. We only + * allow one CCB at a time from the queue to be + * in progress so that we can effectively maintain + * ordering. + */ + isci_remote_device->queued_ccb_in_progress = NULL; + } + if (isci_remote_device->frozen_lun_mask != 0) { isci_remote_device_release_device_queue(isci_remote_device); } @@ -248,11 +268,30 @@ isci_io_request_complete(SCI_CONTROLLER_HANDLE_T scif_controller, isci_remote_device_freeze_lun_queue(isci_remote_device, ccb->ccb_h.target_lun); - isci_log_message(1, "ISCI", "queue %p %x\n", ccb, - ccb->csio.cdb_io.cdb_bytes[0]); - ccb->ccb_h.status |= CAM_SIM_QUEUED; - TAILQ_INSERT_TAIL(&isci_remote_device->queued_ccbs, - &ccb->ccb_h, sim_links.tqe); + if (ccb->ccb_h.status & CAM_SIM_QUEUED) { + + KASSERT(ccb == isci_remote_device->queued_ccb_in_progress, + ("multiple internally queued ccbs in flight")); + + /* + * Do nothing, CCB is already on the device's queue. + * We leave it on the queue, to be retried again + * next time a CCB on this device completes, or we + * get a ready notification for this device. + */ + isci_log_message(1, "ISCI", "already queued %p %x\n", + ccb, ccb->csio.cdb_io.cdb_bytes[0]); + + isci_remote_device->queued_ccb_in_progress = NULL; + + } else { + isci_log_message(1, "ISCI", "queue %p %x\n", ccb, + ccb->csio.cdb_io.cdb_bytes[0]); + ccb->ccb_h.status |= CAM_SIM_QUEUED; + + TAILQ_INSERT_TAIL(&isci_remote_device->queued_ccbs, + &ccb->ccb_h, sim_links.tqe); + } } } |