summaryrefslogtreecommitdiffstats
path: root/sys
diff options
context:
space:
mode:
authorscottl <scottl@FreeBSD.org>2014-07-01 06:23:48 +0000
committerscottl <scottl@FreeBSD.org>2014-07-01 06:23:48 +0000
commitc80125f5779f0cd75c6574b1247237faca74e8ed (patch)
treedc1ac9fc1b978adc2bdc229556bb619902528f5f /sys
parentc4c1c285482bb13854d4dc0b53d4c970a1237fc8 (diff)
downloadFreeBSD-src-c80125f5779f0cd75c6574b1247237faca74e8ed.zip
FreeBSD-src-c80125f5779f0cd75c6574b1247237faca74e8ed.tar.gz
Merge r268024, 268025:
Fix a case in ndling ATA_PASSTHROUGH commands that have an unaligned buffer. This impacts some home-rolled SMART tools. In rare cases, a SATA drive can stop responding to commands and trigger a reset device task request from the driver. If the drive fails to respond with a signature FIS, the driver would previously get into an endless retry loop, stalling all I/O to the drive and keeping user processes stranded. Instead, fail the i/o and invalidate the device if the task management command times out. This is controllable with the sysctl and tunable hw.isci.fail_on_task_timeout dev.isci.0.fail_on_task_timeout The default for these is 1. Obtained from: Netflix, Inc.
Diffstat (limited to 'sys')
-rw-r--r--sys/dev/isci/isci.h1
-rw-r--r--sys/dev/isci/isci_controller.c6
-rw-r--r--sys/dev/isci/isci_sysctl.c23
-rw-r--r--sys/dev/isci/isci_task_request.c13
-rw-r--r--sys/dev/isci/scil/scic_sds_stp_request.c1
5 files changed, 41 insertions, 3 deletions
diff --git a/sys/dev/isci/isci.h b/sys/dev/isci/isci.h
index 1dc8e93..ce0426a 100644
--- a/sys/dev/isci/isci.h
+++ b/sys/dev/isci/isci.h
@@ -164,6 +164,7 @@ struct ISCI_CONTROLLER
uint32_t initial_discovery_mask;
BOOL is_frozen;
BOOL release_queued_ccbs;
+ BOOL fail_on_task_timeout;
uint8_t *remote_device_memory;
struct ISCI_MEMORY cached_controller_memory;
struct ISCI_MEMORY uncached_controller_memory;
diff --git a/sys/dev/isci/isci_controller.c b/sys/dev/isci/isci_controller.c
index 0760f34..f3ff082 100644
--- a/sys/dev/isci/isci_controller.c
+++ b/sys/dev/isci/isci_controller.c
@@ -300,6 +300,8 @@ SCI_STATUS isci_controller_initialize(struct ISCI_CONTROLLER *controller)
SCI_CONTROLLER_HANDLE_T scic_controller_handle;
char led_name[64];
unsigned long tunable;
+ uint32_t io_shortage;
+ uint32_t fail_on_timeout;
int i;
scic_controller_handle =
@@ -365,10 +367,12 @@ SCI_STATUS isci_controller_initialize(struct ISCI_CONTROLLER *controller)
* this io_shortage parameter, which will tell CAM that we have a
* large queue depth than we really do.
*/
- uint32_t io_shortage = 0;
+ io_shortage = 0;
TUNABLE_INT_FETCH("hw.isci.io_shortage", &io_shortage);
controller->sim_queue_depth += io_shortage;
+ fail_on_timeout = 1;
+ TUNABLE_INT_FETCH("hw.isci.fail_on_task_timeout", &fail_on_timeout);
/* Attach to CAM using xpt_bus_register now, then immediately freeze
* the simq. It will get released later when initial domain discovery
* is complete.
diff --git a/sys/dev/isci/isci_sysctl.c b/sys/dev/isci/isci_sysctl.c
index 4623a8a..62a10b9 100644
--- a/sys/dev/isci/isci_sysctl.c
+++ b/sys/dev/isci/isci_sysctl.c
@@ -222,6 +222,24 @@ isci_sysctl_log_frozen_lun_masks(SYSCTL_HANDLER_ARGS)
return (0);
}
+static int
+isci_sysctl_fail_on_task_timeout(SYSCTL_HANDLER_ARGS)
+{
+ struct isci_softc *isci = (struct isci_softc *)arg1;
+ int32_t fail_on_timeout = 0;
+ int error, i;
+
+ error = sysctl_handle_int(oidp, &fail_on_timeout, 0, req);
+
+ if (error || fail_on_timeout == 0)
+ return (error);
+
+ for (i = 0; i < isci->controller_count; i++)
+ isci->controllers[i].fail_on_task_timeout = fail_on_timeout;
+
+ return (0);
+}
+
void isci_sysctl_initialize(struct isci_softc *isci)
{
struct sysctl_ctx_list *sysctl_ctx = device_get_sysctl_ctx(isci->device);
@@ -259,5 +277,10 @@ void isci_sysctl_initialize(struct isci_softc *isci)
"log_frozen_lun_masks", CTLTYPE_UINT| CTLFLAG_RW, isci, 0,
isci_sysctl_log_frozen_lun_masks, "IU",
"Log frozen lun masks to kernel log");
+
+ SYSCTL_ADD_PROC(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
+ "fail_on_task_timeout", CTLTYPE_UINT | CTLFLAG_RW, isci, 0,
+ isci_sysctl_fail_on_task_timeout, "IU",
+ "Fail a command that has encountered a task management timeout");
}
diff --git a/sys/dev/isci/isci_task_request.c b/sys/dev/isci/isci_task_request.c
index 5d7f63d..6c8be45 100644
--- a/sys/dev/isci/isci_task_request.c
+++ b/sys/dev/isci/isci_task_request.c
@@ -206,8 +206,17 @@ isci_task_request_complete(SCI_CONTROLLER_HANDLE_T scif_controller,
break;
case SCI_FAILURE_TIMEOUT:
- retry_task = TRUE;
- isci_log_message(0, "ISCI", "task timeout - retrying\n");
+ if (isci_controller->fail_on_task_timeout) {
+ retry_task = FALSE;
+ isci_log_message(0, "ISCI",
+ "task timeout - not retrying\n");
+ scif_cb_domain_device_removed(isci_controller,
+ isci_remote_device->domain, isci_remote_device);
+ } else {
+ retry_task = TRUE;
+ isci_log_message(0, "ISCI",
+ "task timeout - retrying\n");
+ }
break;
case SCI_TASK_FAILURE:
diff --git a/sys/dev/isci/scil/scic_sds_stp_request.c b/sys/dev/isci/scil/scic_sds_stp_request.c
index ed597b1..f3fe28d 100644
--- a/sys/dev/isci/scil/scic_sds_stp_request.c
+++ b/sys/dev/isci/scil/scic_sds_stp_request.c
@@ -1222,6 +1222,7 @@ SCI_STATUS scic_sds_stp_request_pio_data_in_copy_data_buffer(
length -= copy_length;
sgl_offset += copy_length;
data_offset += copy_length;
+ source_address += copy_length;
#endif
}
}
OpenPOWER on IntegriCloud