diff options
Diffstat (limited to 'sys/cam/cam_periph.c')
-rw-r--r-- | sys/cam/cam_periph.c | 918 |
1 files changed, 445 insertions, 473 deletions
diff --git a/sys/cam/cam_periph.c b/sys/cam/cam_periph.c index eba1787..ee6eace 100644 --- a/sys/cam/cam_periph.c +++ b/sys/cam/cam_periph.c @@ -62,6 +62,20 @@ static u_int camperiphunit(struct periph_driver *p_drv, static void camperiphdone(struct cam_periph *periph, union ccb *done_ccb); static void camperiphfree(struct cam_periph *periph); +static int camperiphscsistatuserror(union ccb *ccb, + cam_flags camflags, + u_int32_t sense_flags, + union ccb *save_ccb, + int *openings, + u_int32_t *relsim_flags, + u_int32_t *timeout); +static int camperiphscsisenseerror(union ccb *ccb, + cam_flags camflags, + u_int32_t sense_flags, + union ccb *save_ccb, + int *openings, + u_int32_t *relsim_flags, + u_int32_t *timeout); static int nperiph_drivers; struct periph_driver **periph_drivers; @@ -473,15 +487,23 @@ cam_periph_lock(struct cam_periph *periph, int priority) { int error; + /* + * Increment the reference count on the peripheral + * while we wait for our lock attempt to succeed + * to ensure the peripheral doesn't dissappear + * out from under us while we sleep. + */ + if (cam_periph_acquire(periph) != CAM_REQ_CMP) + return(ENXIO); + while ((periph->flags & CAM_PERIPH_LOCKED) != 0) { periph->flags |= CAM_PERIPH_LOCK_WANTED; - if ((error = tsleep(periph, priority, "caplck", 0)) != 0) + if ((error = tsleep(periph, priority, "caplck", 0)) != 0) { + cam_periph_release(periph); return error; + } } - if (cam_periph_acquire(periph) != CAM_REQ_CMP) - return(ENXIO); - periph->flags |= CAM_PERIPH_LOCKED; return 0; } @@ -891,13 +913,16 @@ cam_release_devq(struct cam_path *path, u_int32_t relsim_flags, static void camperiphdone(struct cam_periph *periph, union ccb *done_ccb) { + union ccb *saved_ccb; cam_status status; int frozen; int sense; struct scsi_start_stop_unit *scsi_cmd; u_int32_t relsim_flags, timeout; u_int32_t qfrozen_cnt; + int xpt_done_ccb; + xpt_done_ccb = FALSE; status = done_ccb->ccb_h.status; frozen = (status & CAM_DEV_QFRZN) != 0; sense = (status & CAM_AUTOSNS_VALID) != 0; @@ -905,6 +930,7 @@ camperiphdone(struct cam_periph *periph, union ccb *done_ccb) timeout = 0; relsim_flags = 0; + saved_ccb = (union ccb *)done_ccb->ccb_h.saved_ccb_ptr; /* * Unfreeze the queue once if it is already frozen.. @@ -918,15 +944,19 @@ camperiphdone(struct cam_periph *periph, union ccb *done_ccb) } switch (status) { - case CAM_REQ_CMP: - + { /* * If we have successfully taken a device from the not - * ready to ready state, re-scan the device and re-get the - * inquiry information. Many devices (mostly disks) don't - * properly report their inquiry information unless they - * are spun up. + * ready to ready state, re-scan the device and re-get + * the inquiry information. Many devices (mostly disks) + * don't properly report their inquiry information unless + * they are spun up. + * + * If we manually retrieved sense into a CCB and got + * something other than "NO SENSE" send the updated CCB + * back to the client via xpt_done() to be processed via + * the error recovery code again. */ if (done_ccb->ccb_h.func_code == XPT_SCSI_IO) { scsi_cmd = (struct scsi_start_stop_unit *) @@ -935,15 +965,35 @@ camperiphdone(struct cam_periph *periph, union ccb *done_ccb) if (scsi_cmd->opcode == START_STOP_UNIT) xpt_async(AC_INQ_CHANGED, done_ccb->ccb_h.path, NULL); + if (scsi_cmd->opcode == REQUEST_SENSE) { + u_int sense_key; + + sense_key = saved_ccb->csio.sense_data.flags; + sense_key &= SSD_KEY; + if (sense_key != SSD_KEY_NO_SENSE) { + saved_ccb->ccb_h.flags |= + CAM_AUTOSNS_VALID; + xpt_print_path(saved_ccb->ccb_h.path); + printf("Recovered Sense\n"); +#if 0 + scsi_sense_print(&saved_ccb->csio); +#endif + cam_error_print(saved_ccb, CAM_ESF_ALL, + CAM_EPF_ALL); + xpt_done_ccb = TRUE; + } + } } bcopy(done_ccb->ccb_h.saved_ccb_ptr, done_ccb, sizeof(union ccb)); periph->flags &= ~CAM_PERIPH_RECOVERY_INPROG; - xpt_action(done_ccb); + if (xpt_done_ccb == FALSE) + xpt_action(done_ccb); break; + } case CAM_SCSI_STATUS_ERROR: scsi_cmd = (struct scsi_start_stop_unit *) &done_ccb->csio.cdb_io.cdb_bytes; @@ -982,7 +1032,7 @@ camperiphdone(struct cam_periph *periph, union ccb *done_ccb) xpt_action(done_ccb); - } else if (done_ccb->ccb_h.retry_count > 0) { + } else if (done_ccb->ccb_h.retry_count > 1) { /* * In this case, the error recovery * command failed, but we've got @@ -1001,8 +1051,9 @@ camperiphdone(struct cam_periph *periph, union ccb *done_ccb) } else { /* - * Copy the original CCB back and - * send it back to the caller. + * Perform the final retry with the original + * CCB so that final error processing is + * performed by the owner of the CCB. */ bcopy(done_ccb->ccb_h.saved_ccb_ptr, done_ccb, sizeof(union ccb)); @@ -1039,6 +1090,13 @@ camperiphdone(struct cam_periph *periph, union ccb *done_ccb) } /* decrement the retry count */ + /* + * XXX This isn't appropriate in all cases. Restructure, + * so that the retry count is only decremented on an + * actual retry. Remeber that the orignal ccb had its + * retry count dropped before entering recovery, so + * doing it again is a bug. + */ if (done_ccb->ccb_h.retry_count > 0) done_ccb->ccb_h.retry_count--; @@ -1047,6 +1105,8 @@ camperiphdone(struct cam_periph *periph, union ccb *done_ccb) /*openings*/0, /*timeout*/timeout, /*getcount_only*/0); + if (xpt_done_ccb == TRUE) + (*done_ccb->ccb_h.cbfcnp)(periph, done_ccb); } /* @@ -1113,469 +1173,370 @@ cam_periph_freeze_after_event(struct cam_periph *periph, } -/* - * Generic error handler. Peripheral drivers usually filter - * out the errors that they handle in a unique mannor, then - * call this function. - */ -int -cam_periph_error(union ccb *ccb, cam_flags camflags, - u_int32_t sense_flags, union ccb *save_ccb) +static int +camperiphscsistatuserror(union ccb *ccb, cam_flags camflags, + u_int32_t sense_flags, union ccb *save_ccb, + int *openings, u_int32_t *relsim_flags, + u_int32_t *timeout) { - cam_status status; - int frozen; - int sense; - int error; - int openings; - int retry; - u_int32_t relsim_flags; - u_int32_t timeout; - - status = ccb->ccb_h.status; - frozen = (status & CAM_DEV_QFRZN) != 0; - sense = (status & CAM_AUTOSNS_VALID) != 0; - status &= CAM_STATUS_MASK; - relsim_flags = 0; + int error; - switch (status) { - case CAM_REQ_CMP: - /* decrement the number of retries */ - retry = ccb->ccb_h.retry_count > 0; - if (retry) - ccb->ccb_h.retry_count--; + switch (ccb->csio.scsi_status) { + case SCSI_STATUS_OK: + case SCSI_STATUS_COND_MET: + case SCSI_STATUS_INTERMED: + case SCSI_STATUS_INTERMED_COND_MET: error = 0; break; - case CAM_AUTOSENSE_FAIL: - case CAM_SCSI_STATUS_ERROR: + case SCSI_STATUS_CMD_TERMINATED: + case SCSI_STATUS_CHECK_COND: + error = camperiphscsisenseerror(ccb, + camflags, + sense_flags, + save_ccb, + openings, + relsim_flags, + timeout); + break; + case SCSI_STATUS_QUEUE_FULL: + { + /* no decrement */ + struct ccb_getdevstats cgds; - switch (ccb->csio.scsi_status) { - case SCSI_STATUS_OK: - case SCSI_STATUS_COND_MET: - case SCSI_STATUS_INTERMED: - case SCSI_STATUS_INTERMED_COND_MET: - error = 0; - break; - case SCSI_STATUS_CMD_TERMINATED: - case SCSI_STATUS_CHECK_COND: - if (sense != 0) { - struct scsi_sense_data *sense; - int error_code, sense_key, asc, ascq; - struct cam_periph *periph; - scsi_sense_action err_action; - struct ccb_getdev cgd; - - sense = &ccb->csio.sense_data; - scsi_extract_sense(sense, &error_code, - &sense_key, &asc, &ascq); - periph = xpt_path_periph(ccb->ccb_h.path); + /* + * First off, find out what the current + * transaction counts are. + */ + xpt_setup_ccb(&cgds.ccb_h, + ccb->ccb_h.path, + /*priority*/1); + cgds.ccb_h.func_code = XPT_GDEV_STATS; + xpt_action((union ccb *)&cgds); + + /* + * If we were the only transaction active, treat + * the QUEUE FULL as if it were a BUSY condition. + */ + if (cgds.dev_active != 0) { + int total_openings; + /* + * Reduce the number of openings to + * be 1 less than the amount it took + * to get a queue full bounded by the + * minimum allowed tag count for this + * device. + */ + total_openings = cgds.dev_active + cgds.dev_openings; + *openings = cgds.dev_active; + if (*openings < cgds.mintags) + *openings = cgds.mintags; + if (*openings < total_openings) + *relsim_flags = RELSIM_ADJUST_OPENINGS; + else { /* - * Grab the inquiry data for this device. + * Some devices report queue full for + * temporary resource shortages. For + * this reason, we allow a minimum + * tag count to be entered via a + * quirk entry to prevent the queue + * count on these devices from falling + * to a pessimisticly low value. We + * still wait for the next successful + * completion, however, before queueing + * more transactions to the device. */ - xpt_setup_ccb(&cgd.ccb_h, ccb->ccb_h.path, - /*priority*/ 1); - cgd.ccb_h.func_code = XPT_GDEV_TYPE; - xpt_action((union ccb *)&cgd); + *relsim_flags = RELSIM_RELEASE_AFTER_CMDCMPLT; + } + *timeout = 0; + error = ERESTART; + break; + } + /* FALLTHROUGH */ + } + case SCSI_STATUS_BUSY: + /* + * Restart the queue after either another + * command completes or a 1 second timeout. + */ + if (ccb->ccb_h.retry_count > 0) { + ccb->ccb_h.retry_count--; + error = ERESTART; + *relsim_flags = RELSIM_RELEASE_AFTER_TIMEOUT + | RELSIM_RELEASE_AFTER_CMDCMPLT; + *timeout = 1000; + } else { + error = EIO; + } + break; + case SCSI_STATUS_RESERV_CONFLICT: + error = EIO; + break; + default: + error = EIO; + break; + } + return (error); +} - err_action = scsi_error_action(asc, ascq, - &cgd.inq_data); +static int +camperiphscsisenseerror(union ccb *ccb, cam_flags camflags, + u_int32_t sense_flags, union ccb *save_ccb, + int *openings, u_int32_t *relsim_flags, + u_int32_t *timeout) +{ + struct cam_periph *periph; + int error; - /* - * Send a Test Unit Ready to the device. - * If the 'many' flag is set, we send 120 - * test unit ready commands, one every half - * second. Otherwise, we just send one TUR. - * We only want to do this if the retry - * count has not been exhausted. - */ - if (((err_action & SS_MASK) == SS_TUR) - && save_ccb != NULL - && ccb->ccb_h.retry_count > 0) { - - /* - * Since error recovery is already - * in progress, don't attempt to - * process this error. It is probably - * related to the error that caused - * the currently active error recovery - * action. Also, we only have - * space for one saved CCB, so if we - * had two concurrent error recovery - * actions, we would end up - * over-writing one error recovery - * CCB with another one. - */ - if (periph->flags & - CAM_PERIPH_RECOVERY_INPROG) { - error = ERESTART; - break; - } - - periph->flags |= - CAM_PERIPH_RECOVERY_INPROG; - - /* decrement the number of retries */ - if ((err_action & - SSQ_DECREMENT_COUNT) != 0) { - retry = 1; - ccb->ccb_h.retry_count--; - } - - bcopy(ccb, save_ccb, sizeof(*save_ccb)); - - /* - * We retry this one every half - * second for a minute. If the - * device hasn't become ready in a - * minute's time, it's unlikely to - * ever become ready. If the table - * doesn't specify SSQ_MANY, we can - * only try this once. Oh well. - */ - if ((err_action & SSQ_MANY) != 0) - scsi_test_unit_ready(&ccb->csio, - /*retries*/120, - camperiphdone, - MSG_SIMPLE_Q_TAG, - SSD_FULL_SIZE, - /*timeout*/5000); - else - scsi_test_unit_ready(&ccb->csio, - /*retries*/1, - camperiphdone, - MSG_SIMPLE_Q_TAG, - SSD_FULL_SIZE, - /*timeout*/5000); - - /* release the queue after .5 sec. */ - relsim_flags = - RELSIM_RELEASE_AFTER_TIMEOUT; - timeout = 500; - /* - * Drop the priority to 0 so that - * we are the first to execute. Also - * freeze the queue after this command - * is sent so that we can restore the - * old csio and have it queued in the - * proper order before we let normal - * transactions go to the drive. - */ - ccb->ccb_h.pinfo.priority = 0; - ccb->ccb_h.flags |= CAM_DEV_QFREEZE; - - /* - * Save a pointer to the original - * CCB in the new CCB. - */ - ccb->ccb_h.saved_ccb_ptr = save_ccb; - - error = ERESTART; - } - /* - * Send a start unit command to the device, - * and then retry the command. We only - * want to do this if the retry count has - * not been exhausted. If the user - * specified 0 retries, then we follow - * their request and do not retry. - */ - else if (((err_action & SS_MASK) == SS_START) - && save_ccb != NULL - && ccb->ccb_h.retry_count > 0) { - int le; - - /* - * Only one error recovery action - * at a time. See above. - */ - if (periph->flags & - CAM_PERIPH_RECOVERY_INPROG) { - error = ERESTART; - break; - } - - periph->flags |= - CAM_PERIPH_RECOVERY_INPROG; - - /* decrement the number of retries */ - retry = 1; - ccb->ccb_h.retry_count--; - - /* - * Check for removable media and - * set load/eject flag - * appropriately. - */ - if (SID_IS_REMOVABLE(&cgd.inq_data)) - le = TRUE; - else - le = FALSE; - - /* - * Attempt to start the drive up. - * - * Save the current ccb so it can - * be restored and retried once the - * drive is started up. - */ - bcopy(ccb, save_ccb, sizeof(*save_ccb)); - - scsi_start_stop(&ccb->csio, - /*retries*/1, - camperiphdone, - MSG_SIMPLE_Q_TAG, - /*start*/TRUE, - /*load/eject*/le, - /*immediate*/FALSE, - SSD_FULL_SIZE, - /*timeout*/50000); - /* - * Drop the priority to 0 so that - * we are the first to execute. Also - * freeze the queue after this command - * is sent so that we can restore the - * old csio and have it queued in the - * proper order before we let normal - * transactions go to the drive. - */ - ccb->ccb_h.pinfo.priority = 0; - ccb->ccb_h.flags |= CAM_DEV_QFREEZE; - - /* - * Save a pointer to the original - * CCB in the new CCB. - */ - ccb->ccb_h.saved_ccb_ptr = save_ccb; - - error = ERESTART; - } else if ((sense_flags & SF_RETRY_UA) != 0) { - /* - * XXX KDM this is a *horrible* - * hack. - */ - error = scsi_interpret_sense(ccb, - sense_flags, - &relsim_flags, - &openings, - &timeout, - err_action); - } + periph = xpt_path_periph(ccb->ccb_h.path); + if (periph->flags & CAM_PERIPH_RECOVERY_INPROG) { - /* - * Theoretically, this code should send a - * test unit ready to the given device, and - * if it returns and error, send a start - * unit command. Since we don't yet have - * the capability to do two-command error - * recovery, just send a start unit. - * XXX KDM fix this! - */ - else if (((err_action & SS_MASK) == SS_TURSTART) - && save_ccb != NULL - && ccb->ccb_h.retry_count > 0) { - int le; - - /* - * Only one error recovery action - * at a time. See above. - */ - if (periph->flags & - CAM_PERIPH_RECOVERY_INPROG) { - error = ERESTART; - break; - } - - periph->flags |= - CAM_PERIPH_RECOVERY_INPROG; - - /* decrement the number of retries */ - retry = 1; - ccb->ccb_h.retry_count--; - - /* - * Check for removable media and - * set load/eject flag - * appropriately. - */ - if (SID_IS_REMOVABLE(&cgd.inq_data)) - le = TRUE; - else - le = FALSE; - - /* - * Attempt to start the drive up. - * - * Save the current ccb so it can - * be restored and retried once the - * drive is started up. - */ - bcopy(ccb, save_ccb, sizeof(*save_ccb)); - - scsi_start_stop(&ccb->csio, - /*retries*/1, - camperiphdone, - MSG_SIMPLE_Q_TAG, - /*start*/TRUE, - /*load/eject*/le, - /*immediate*/FALSE, - SSD_FULL_SIZE, - /*timeout*/50000); - - /* release the queue after .5 sec. */ - relsim_flags = - RELSIM_RELEASE_AFTER_TIMEOUT; - timeout = 500; - /* - * Drop the priority to 0 so that - * we are the first to execute. Also - * freeze the queue after this command - * is sent so that we can restore the - * old csio and have it queued in the - * proper order before we let normal - * transactions go to the drive. - */ - ccb->ccb_h.pinfo.priority = 0; - ccb->ccb_h.flags |= CAM_DEV_QFREEZE; - - /* - * Save a pointer to the original - * CCB in the new CCB. - */ - ccb->ccb_h.saved_ccb_ptr = save_ccb; - - error = ERESTART; - } else { - error = scsi_interpret_sense(ccb, - sense_flags, - &relsim_flags, - &openings, - &timeout, - err_action); - } - } else if (ccb->csio.scsi_status == - SCSI_STATUS_CHECK_COND - && status != CAM_AUTOSENSE_FAIL) { - /* no point in decrementing the retry count */ - panic("cam_periph_error: scsi status of " - "CHECK COND returned but no sense " - "information is availible. " - "Controller should have returned " - "CAM_AUTOSENSE_FAILED"); - /* NOTREACHED */ - error = EIO; - } else if (ccb->ccb_h.retry_count == 0) { - /* - * XXX KDM shouldn't there be a better - * argument to return?? - */ - error = EIO; - } else { - /* decrement the number of retries */ - retry = ccb->ccb_h.retry_count > 0; - if (retry) - ccb->ccb_h.retry_count--; - /* - * If it was aborted with no - * clue as to the reason, just - * retry it again. - */ - error = ERESTART; + /* + * If error recovery is already in progress, don't attempt + * to process this error, but requeue it unconditionally + * and attempt to process it once error recovery has + * completed. This failed command is probably related to + * the error that caused the currently active error recovery + * action so our current recovery efforts should also + * address this command. Be aware that the error recovery + * code assumes that only one recovery action is in progress + * on a particular peripheral instance at any given time + * (e.g. only one saved CCB for error recovery) so it is + * imperitive that we don't violate this assumption. + */ + error = ERESTART; + } else { + scsi_sense_action err_action; + struct ccb_getdev cgd; + const char *action_string; + union ccb* print_ccb; + + /* A description of the error recovery action performed */ + action_string = NULL; + + /* + * The location of the orignal ccb + * for sense printing purposes. + */ + print_ccb = ccb; + + /* + * Grab the inquiry data for this device. + */ + xpt_setup_ccb(&cgd.ccb_h, ccb->ccb_h.path, /*priority*/ 1); + cgd.ccb_h.func_code = XPT_GDEV_TYPE; + xpt_action((union ccb *)&cgd); + + if ((ccb->ccb_h.status & CAM_AUTOSNS_VALID) != 0) + err_action = scsi_error_action(&ccb->csio, + &cgd.inq_data, + sense_flags); + else if ((ccb->ccb_h.flags & CAM_DIS_AUTOSENSE) == 0) + err_action = SS_REQSENSE; + else + err_action = SS_RETRY|SSQ_DECREMENT_COUNT|EIO; + + error = err_action & SS_ERRMASK; + + /* + * If the recovery action will consume a retry, + * make sure we actually have retries available. + */ + if ((err_action & SSQ_DECREMENT_COUNT) != 0) { + if (ccb->ccb_h.retry_count > 0) + ccb->ccb_h.retry_count--; + else { + action_string = "Retries Exhausted"; + goto sense_error_done; + } + } + + if ((err_action & SS_MASK) >= SS_START) { + /* + * Do common portions of commands that + * use recovery CCBs. + */ + if (save_ccb == NULL) { + action_string = "No recovery CCB supplied"; + goto sense_error_done; } + bcopy(ccb, save_ccb, sizeof(*save_ccb)); + print_ccb = save_ccb; + periph->flags |= CAM_PERIPH_RECOVERY_INPROG; + } + + switch (err_action & SS_MASK) { + case SS_NOP: + case SS_RETRY: + action_string = "Retrying Command"; + error = ERESTART; break; - case SCSI_STATUS_QUEUE_FULL: + case SS_FAIL: + action_string = "Unretryable error"; + break; + case SS_START: { - /* no decrement */ - struct ccb_getdevstats cgds; + int le; /* - * First off, find out what the current - * transaction counts are. + * Send a start unit command to the device, and + * then retry the command. */ - xpt_setup_ccb(&cgds.ccb_h, - ccb->ccb_h.path, - /*priority*/1); - cgds.ccb_h.func_code = XPT_GDEV_STATS; - xpt_action((union ccb *)&cgds); + action_string = "Attempting to Start Unit"; /* - * If we were the only transaction active, treat - * the QUEUE FULL as if it were a BUSY condition. + * Check for removable media and set + * load/eject flag appropriately. */ - if (cgds.dev_active != 0) { - int total_openings; - - /* - * Reduce the number of openings to - * be 1 less than the amount it took - * to get a queue full bounded by the - * minimum allowed tag count for this - * device. - */ - total_openings = - cgds.dev_active+cgds.dev_openings; - openings = cgds.dev_active; - if (openings < cgds.mintags) - openings = cgds.mintags; - if (openings < total_openings) - relsim_flags = RELSIM_ADJUST_OPENINGS; - else { - /* - * Some devices report queue full for - * temporary resource shortages. For - * this reason, we allow a minimum - * tag count to be entered via a - * quirk entry to prevent the queue - * count on these devices from falling - * to a pessimisticly low value. We - * still wait for the next successful - * completion, however, before queueing - * more transactions to the device. - */ - relsim_flags = - RELSIM_RELEASE_AFTER_CMDCMPLT; - } - timeout = 0; - error = ERESTART; - break; - } - /* FALLTHROUGH */ + if (SID_IS_REMOVABLE(&cgd.inq_data)) + le = TRUE; + else + le = FALSE; + + scsi_start_stop(&ccb->csio, + /*retries*/1, + camperiphdone, + MSG_SIMPLE_Q_TAG, + /*start*/TRUE, + /*load/eject*/le, + /*immediate*/FALSE, + SSD_FULL_SIZE, + /*timeout*/50000); + break; } - case SCSI_STATUS_BUSY: + case SS_TUR: + { /* - * Restart the queue after either another - * command completes or a 1 second timeout. - * If we have any retries left, that is. + * Send a Test Unit Ready to the device. + * If the 'many' flag is set, we send 120 + * test unit ready commands, one every half + * second. Otherwise, we just send one TUR. + * We only want to do this if the retry + * count has not been exhausted. */ - retry = ccb->ccb_h.retry_count > 0; - if (retry) { - ccb->ccb_h.retry_count--; - error = ERESTART; - relsim_flags = RELSIM_RELEASE_AFTER_TIMEOUT - | RELSIM_RELEASE_AFTER_CMDCMPLT; - timeout = 1000; + int retries; + + if ((err_action & SSQ_MANY) != 0) { + action_string = "Polling device for readiness"; + retries = 120; } else { - error = EIO; + action_string = "Testing device for readiness"; + retries = 1; } + scsi_test_unit_ready(&ccb->csio, + retries, + camperiphdone, + MSG_SIMPLE_Q_TAG, + SSD_FULL_SIZE, + /*timeout*/5000); + + /* + * Accomplish our 500ms delay by deferring + * the release of our device queue appropriately. + */ + *relsim_flags = RELSIM_RELEASE_AFTER_TIMEOUT; + *timeout = 500; break; - case SCSI_STATUS_RESERV_CONFLICT: - error = EIO; + } + case SS_REQSENSE: + { + /* + * Send a Request Sense to the device. We + * assume that we are in a contingent allegiance + * condition so we do not tag this request. + */ + scsi_request_sense(&ccb->csio, /*retries*/1, + camperiphdone, + &save_ccb->csio.sense_data, + sizeof(save_ccb->csio.sense_data), + CAM_TAG_ACTION_NONE, + /*sense_len*/SSD_FULL_SIZE, + /*timeout*/5000); break; + } default: - error = EIO; - break; + panic("Unhandled error action %x\n", err_action); + } + + if ((err_action & SS_MASK) >= SS_START) { + /* + * Drop the priority to 0 so that the recovery + * CCB is the first to execute. Freeze the queue + * after this command is sent so that we can + * restore the old csio and have it queued in + * the proper order before we release normal + * transactions to the device. + */ + ccb->ccb_h.pinfo.priority = 0; + ccb->ccb_h.flags |= CAM_DEV_QFREEZE; + ccb->ccb_h.saved_ccb_ptr = save_ccb; + error = ERESTART; } + +sense_error_done: + if ((err_action & SSQ_PRINT_SENSE) != 0 + && (ccb->ccb_h.status & CAM_AUTOSNS_VALID) != 0) { +#if 0 + scsi_sense_print(&print_ccb->csio); +#endif + cam_error_print(print_ccb, CAM_ESF_ALL, CAM_EPF_ALL); + xpt_print_path(ccb->ccb_h.path); + printf("%s\n", action_string); + } + } + return (error); +} + +/* + * Generic error handler. Peripheral drivers usually filter + * out the errors that they handle in a unique mannor, then + * call this function. + */ +int +cam_periph_error(union ccb *ccb, cam_flags camflags, + u_int32_t sense_flags, union ccb *save_ccb) +{ + const char *action_string; + cam_status status; + int frozen; + int error; + int openings; + u_int32_t relsim_flags; + u_int32_t timeout; + + action_string = NULL; + status = ccb->ccb_h.status; + frozen = (status & CAM_DEV_QFRZN) != 0; + status &= CAM_STATUS_MASK; + relsim_flags = 0; + + switch (status) { + case CAM_REQ_CMP: + error = 0; break; + case CAM_SCSI_STATUS_ERROR: + error = camperiphscsistatuserror(ccb, + camflags, + sense_flags, + save_ccb, + &openings, + &relsim_flags, + &timeout); + break; + case CAM_AUTOSENSE_FAIL: + xpt_print_path(ccb->ccb_h.path); + printf("AutoSense Failed\n"); case CAM_REQ_CMP_ERR: case CAM_CMD_TIMEOUT: case CAM_UNEXP_BUSFREE: case CAM_UNCOR_PARITY: case CAM_DATA_RUN_ERR: /* decrement the number of retries */ - retry = ccb->ccb_h.retry_count > 0; - if (retry) { + if (ccb->ccb_h.retry_count > 0) { ccb->ccb_h.retry_count--; error = ERESTART; } else { + action_string = "Retries Exausted"; error = EIO; } break; @@ -1587,46 +1548,37 @@ cam_periph_error(union ccb *ccb, cam_flags camflags, break; case CAM_SEL_TIMEOUT: { - /* - * XXX - * A single selection timeout should not be enough - * to invalidate a device. We should retry for multiple - * seconds assuming this isn't a probe. We'll probably - * need a special flag for that. - */ -#if 0 struct cam_path *newpath; + if ((camflags & CAM_RETRY_SELTO) != 0) { + if (ccb->ccb_h.retry_count > 0) { + + ccb->ccb_h.retry_count--; + error = ERESTART; + + /* + * Wait a second to give the device + * time to recover before we try again. + */ + relsim_flags = RELSIM_RELEASE_AFTER_TIMEOUT; + timeout = 1000; + break; + } + } + error = ENXIO; /* Should we do more if we can't create the path?? */ if (xpt_create_path(&newpath, xpt_path_periph(ccb->ccb_h.path), xpt_path_path_id(ccb->ccb_h.path), xpt_path_target_id(ccb->ccb_h.path), CAM_LUN_WILDCARD) != CAM_REQ_CMP) break; + /* * Let peripheral drivers know that this device has gone * away. */ xpt_async(AC_LOST_DEVICE, newpath, NULL); xpt_free_path(newpath); -#endif - if ((sense_flags & SF_RETRY_SELTO) != 0) { - retry = ccb->ccb_h.retry_count > 0; - if (retry) { - ccb->ccb_h.retry_count--; - error = ERESTART; - /* - * Wait half a second to give the device - * time to recover before we try again. - */ - relsim_flags = RELSIM_RELEASE_AFTER_TIMEOUT; - timeout = 500; - } else { - error = ENXIO; - } - } else { - error = ENXIO; - } break; } case CAM_REQ_INVALID: @@ -1634,13 +1586,22 @@ cam_periph_error(union ccb *ccb, cam_flags camflags, case CAM_DEV_NOT_THERE: case CAM_NO_HBA: case CAM_PROVIDE_FAIL: - case CAM_REQ_TOO_BIG: + case CAM_REQ_TOO_BIG: error = EINVAL; break; case CAM_SCSI_BUS_RESET: - case CAM_BDR_SENT: + case CAM_BDR_SENT: + /* + * Commands that repeatedly timeout and cause these + * kinds of error recovery actions, should return + * CAM_CMD_TIMEOUT, which allows us to safely assume + * that this command was an innocent bystander to + * these events and should be unconditionally + * retried. + */ + /* FALLTHROUGH */ case CAM_REQUEUE_REQ: - /* Unconditional requeue, dammit */ + /* Unconditional requeue */ error = ERESTART; break; case CAM_RESRC_UNAVAIL: @@ -1648,13 +1609,12 @@ cam_periph_error(union ccb *ccb, cam_flags camflags, /* timeout??? */ default: /* decrement the number of retries */ - retry = ccb->ccb_h.retry_count > 0; - if (retry) { + if (ccb->ccb_h.retry_count > 0) { ccb->ccb_h.retry_count--; error = ERESTART; } else { - /* Check the sense codes */ error = EIO; + action_string = "Retries Exhausted"; } break; } @@ -1664,18 +1624,30 @@ cam_periph_error(union ccb *ccb, cam_flags camflags, if (frozen != 0) ccb->ccb_h.status &= ~CAM_DEV_QFRZN; - if (error == ERESTART) + if (error == ERESTART) { + action_string = "Retrying Command"; xpt_action(ccb); + } - if (frozen != 0) { + if (frozen != 0) cam_release_devq(ccb->ccb_h.path, relsim_flags, openings, timeout, /*getcount_only*/0); - } } + if (error != 0 && bootverbose) { + + if (action_string == NULL) + action_string = "Unretryable Error"; + if (error != ERESTART) { + xpt_print_path(ccb->ccb_h.path); + printf("error %d\n", error); + } + xpt_print_path(ccb->ccb_h.path); + printf("%s\n", action_string); + } return (error); } |