From 7fbd764881a5f9dc81a378293b7a74227fcc04ed Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Fri, 26 Aug 2011 00:36:23 +0400 Subject: [SCSI] mpt2sas: Fix mismatch in mpt2sas_base_hard_reset_handler() mutex lock-unlock If ioc->pci_error_recovery is set, goto out in mpt2sas_base_hard_reset_handler() leads to unlock unheld ioc->reset_in_progress_mutex. The patch fixes the issue by jumping afer mutex_unlock() call. Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Alexey Khoroshilov Acked-by: "Nandigama, Nagalakshmi" Signed-off-by: James Bottomley --- drivers/scsi/mpt2sas/mpt2sas_base.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/mpt2sas/mpt2sas_base.c b/drivers/scsi/mpt2sas/mpt2sas_base.c index 0b2c955..a78036f 100644 --- a/drivers/scsi/mpt2sas/mpt2sas_base.c +++ b/drivers/scsi/mpt2sas/mpt2sas_base.c @@ -4548,7 +4548,7 @@ mpt2sas_base_hard_reset_handler(struct MPT2SAS_ADAPTER *ioc, int sleep_flag, printk(MPT2SAS_ERR_FMT "%s: pci error recovery reset\n", ioc->name, __func__); r = 0; - goto out; + goto out_unlocked; } if (mpt2sas_fwfault_debug) @@ -4604,6 +4604,7 @@ mpt2sas_base_hard_reset_handler(struct MPT2SAS_ADAPTER *ioc, int sleep_flag, spin_unlock_irqrestore(&ioc->ioc_reset_in_progress_lock, flags); mutex_unlock(&ioc->reset_in_progress_mutex); + out_unlocked: dtmprintk(ioc, printk(MPT2SAS_INFO_FMT "%s: exit\n", ioc->name, __func__)); return r; -- cgit v1.1 From a92fa25c63a788758bd52e9123504d133210c8b7 Mon Sep 17 00:00:00 2001 From: Kleber Sacilotto de Souza Date: Mon, 16 Jan 2012 19:30:25 -0200 Subject: [SCSI] ipr: fix eeh recovery for 64-bit adapters In some scenarios, an EEH error can take a long time to be detected, since the driver issues an MMIO read only after a device reset command times out and we try to reset the adapter. This patch adds some code in ipr_cancel_op() to read a hardware register so we detect the error earlier in case the op is being aborted because of a timeout caused by a frozen adapter slot. Another problem in such scenarios is that in __ipr_eh_host_reset() we change the dump state flag from WAIT_FOR_DUMP to GET_DUMP, and the flag is later changed from GET_DUMP to READ_DUMP in ipr_reset_restore_cfg_space(). However, if when __ipr_eh_host_reset() is called by the SCSI error handling the function ipr_reset_restore_cfg_space() has already been called by the PCI EEH code, we end up with the flag in an inconsistent state. This patch also prevents this problem. Signed-off-by: Kleber Sacilotto de Souza Acked-by: Brian King Signed-off-by: James Bottomley --- drivers/scsi/ipr.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index 67b169b..b538f08 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -4613,11 +4613,13 @@ static int __ipr_eh_host_reset(struct scsi_cmnd * scsi_cmd) ENTER; ioa_cfg = (struct ipr_ioa_cfg *) scsi_cmd->device->host->hostdata; - dev_err(&ioa_cfg->pdev->dev, - "Adapter being reset as a result of error recovery.\n"); + if (!ioa_cfg->in_reset_reload) { + dev_err(&ioa_cfg->pdev->dev, + "Adapter being reset as a result of error recovery.\n"); - if (WAIT_FOR_DUMP == ioa_cfg->sdt_state) - ioa_cfg->sdt_state = GET_DUMP; + if (WAIT_FOR_DUMP == ioa_cfg->sdt_state) + ioa_cfg->sdt_state = GET_DUMP; + } rc = ipr_reset_reload(ioa_cfg, IPR_SHUTDOWN_ABBREV); @@ -4907,7 +4909,7 @@ static int ipr_cancel_op(struct scsi_cmnd * scsi_cmd) struct ipr_ioa_cfg *ioa_cfg; struct ipr_resource_entry *res; struct ipr_cmd_pkt *cmd_pkt; - u32 ioasc; + u32 ioasc, int_reg; int op_found = 0; ENTER; @@ -4920,7 +4922,17 @@ static int ipr_cancel_op(struct scsi_cmnd * scsi_cmd) */ if (ioa_cfg->in_reset_reload || ioa_cfg->ioa_is_dead) return FAILED; - if (!res || !ipr_is_gscsi(res)) + if (!res) + return FAILED; + + /* + * If we are aborting a timed out op, chances are that the timeout was caused + * by a still not detected EEH error. In such cases, reading a register will + * trigger the EEH recovery infrastructure. + */ + int_reg = readl(ioa_cfg->regs.sense_interrupt_reg); + + if (!ipr_is_gscsi(res)) return FAILED; list_for_each_entry(ipr_cmd, &ioa_cfg->pending_q, queue) { -- cgit v1.1 From 6d7938f46f89c9773f9396c1d13b20bbc5c6d95b Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 27 Jan 2012 11:17:37 -0800 Subject: [SCSI] isci: Fix NULL ptr dereference when no firmware is being loaded NULL orom ptr passed in for verification which caused page fault. We will set a default version when we don't have orom struct. Reported-by: Dan Melnic Signed-off-by: Dave Jiang Signed-off-by: Dan Williams Signed-off-by: James Bottomley --- drivers/scsi/isci/host.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/isci/host.c b/drivers/scsi/isci/host.c index 1a65d65..418391b 100644 --- a/drivers/scsi/isci/host.c +++ b/drivers/scsi/isci/host.c @@ -1848,9 +1848,11 @@ static enum sci_status sci_oem_parameters_set(struct isci_host *ihost) if (state == SCIC_RESET || state == SCIC_INITIALIZING || state == SCIC_INITIALIZED) { + u8 oem_version = pci_info->orom ? pci_info->orom->hdr.version : + ISCI_ROM_VER_1_0; if (sci_oem_parameters_validate(&ihost->oem_parameters, - pci_info->orom->hdr.version)) + oem_version)) return SCI_FAILURE_INVALID_PARAMETER_VALUE; return SCI_SUCCESS; -- cgit v1.1 From a55aac79de0ea6fc52d35f535867b6573a5ff0f8 Mon Sep 17 00:00:00 2001 From: Arun Easi Date: Thu, 9 Feb 2012 11:14:03 -0800 Subject: [SCSI] qla2xxx: Propagate up abort failures. Signed-off-by: Arun Easi Signed-off-by: Chad Dupuis Signed-off-by: James Bottomley --- drivers/scsi/qla2xxx/qla_os.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 4ed1e4a..b4b185c 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -877,6 +877,7 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd) spin_unlock_irqrestore(&ha->hardware_lock, flags); if (ha->isp_ops->abort_command(sp)) { + ret = FAILED; ql_dbg(ql_dbg_taskm, vha, 0x8003, "Abort command mbx failed cmd=%p.\n", cmd); } else { -- cgit v1.1 From aa651be83dfec5587dabce0a9d471c1e2095c33e Mon Sep 17 00:00:00 2001 From: Chad Dupuis Date: Thu, 9 Feb 2012 11:14:04 -0800 Subject: [SCSI] qla2xxx: Add check for null fcport references in qla2xxx_queuecommand. Signed-off-by: Giridhar Malavali Signed-off-by: Chad Dupuis Signed-off-by: James Bottomley --- drivers/scsi/qla2xxx/qla_os.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index b4b185c..5fd89d7 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -625,6 +625,12 @@ qla2xxx_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd) cmd->result = DID_NO_CONNECT << 16; goto qc24_fail_command; } + + if (!fcport) { + cmd->result = DID_NO_CONNECT << 16; + goto qc24_fail_command; + } + if (atomic_read(&fcport->state) != FCS_ONLINE) { if (atomic_read(&fcport->state) == FCS_DEVICE_DEAD || atomic_read(&base_vha->loop_state) == LOOP_DEAD) { -- cgit v1.1 From d051a5aa1c234c8de01fc0a488b1a18d65246150 Mon Sep 17 00:00:00 2001 From: Andrew Vasquez Date: Thu, 9 Feb 2012 11:14:05 -0800 Subject: [SCSI] qla2xxx: Add an "is reset active" helper. Many locations within the driver would use an inconsistent set of checks to determine ISP-reset state. Consolidate the checks into this inline-helper. Signed-off-by: Andrew Vasquez Signed-off-by: Chad Dupuis Signed-off-by: James Bottomley --- drivers/scsi/qla2xxx/qla_attr.c | 13 ++++------ drivers/scsi/qla2xxx/qla_bsg.c | 50 +++++++-------------------------------- drivers/scsi/qla2xxx/qla_dbg.c | 3 ++- drivers/scsi/qla2xxx/qla_inline.h | 13 ++++++++++ 4 files changed, 28 insertions(+), 51 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c index a2f1b30..9f41b3b 100644 --- a/drivers/scsi/qla2xxx/qla_attr.c +++ b/drivers/scsi/qla2xxx/qla_attr.c @@ -1036,8 +1036,7 @@ qla2x00_link_state_show(struct device *dev, struct device_attribute *attr, vha->device_flags & DFLG_NO_CABLE) len = snprintf(buf, PAGE_SIZE, "Link Down\n"); else if (atomic_read(&vha->loop_state) != LOOP_READY || - test_bit(ABORT_ISP_ACTIVE, &vha->dpc_flags) || - test_bit(ISP_ABORT_NEEDED, &vha->dpc_flags)) + qla2x00_reset_active(vha)) len = snprintf(buf, PAGE_SIZE, "Unknown Link State\n"); else { len = snprintf(buf, PAGE_SIZE, "Link Up - "); @@ -1359,8 +1358,7 @@ qla2x00_thermal_temp_show(struct device *dev, return snprintf(buf, PAGE_SIZE, "\n"); temp = frac = 0; - if (test_bit(ABORT_ISP_ACTIVE, &vha->dpc_flags) || - test_bit(ISP_ABORT_NEEDED, &vha->dpc_flags)) + if (qla2x00_reset_active(vha)) ql_log(ql_log_warn, vha, 0x707b, "ISP reset active.\n"); else if (!vha->hw->flags.eeh_busy) @@ -1379,8 +1377,7 @@ qla2x00_fw_state_show(struct device *dev, struct device_attribute *attr, int rval = QLA_FUNCTION_FAILED; uint16_t state[5]; - if (test_bit(ABORT_ISP_ACTIVE, &vha->dpc_flags) || - test_bit(ISP_ABORT_NEEDED, &vha->dpc_flags)) + if (qla2x00_reset_active(vha)) ql_log(ql_log_warn, vha, 0x707c, "ISP reset active.\n"); else if (!vha->hw->flags.eeh_busy) @@ -1693,9 +1690,7 @@ qla2x00_get_fc_host_stats(struct Scsi_Host *shost) if (IS_FWI2_CAPABLE(ha)) { rval = qla24xx_get_isp_stats(base_vha, stats, stats_dma); } else if (atomic_read(&base_vha->loop_state) == LOOP_READY && - !test_bit(ABORT_ISP_ACTIVE, &base_vha->dpc_flags) && - !test_bit(ISP_ABORT_NEEDED, &base_vha->dpc_flags) && - !ha->dpc_active) { + !qla2x00_reset_active(vha) && !ha->dpc_active) { /* Must be in a 'READY' state for statistics retrieval. */ rval = qla2x00_get_link_status(base_vha, base_vha->loop_id, stats, stats_dma); diff --git a/drivers/scsi/qla2xxx/qla_bsg.c b/drivers/scsi/qla2xxx/qla_bsg.c index b1d0f93..1682e2e 100644 --- a/drivers/scsi/qla2xxx/qla_bsg.c +++ b/drivers/scsi/qla2xxx/qla_bsg.c @@ -108,13 +108,6 @@ qla24xx_proc_fcp_prio_cfg_cmd(struct fc_bsg_job *bsg_job) goto exit_fcp_prio_cfg; } - if (test_bit(ISP_ABORT_NEEDED, &vha->dpc_flags) || - test_bit(ABORT_ISP_ACTIVE, &vha->dpc_flags) || - test_bit(ISP_ABORT_RETRY, &vha->dpc_flags)) { - ret = -EBUSY; - goto exit_fcp_prio_cfg; - } - /* Get the sub command */ oper = bsg_job->request->rqst_data.h_vendor.vendor_cmd[1]; @@ -646,13 +639,6 @@ qla2x00_process_loopback(struct fc_bsg_job *bsg_job) dma_addr_t rsp_data_dma; uint32_t rsp_data_len; - if (test_bit(ISP_ABORT_NEEDED, &vha->dpc_flags) || - test_bit(ABORT_ISP_ACTIVE, &vha->dpc_flags) || - test_bit(ISP_ABORT_RETRY, &vha->dpc_flags)) { - ql_log(ql_log_warn, vha, 0x7018, "Abort active or needed.\n"); - return -EBUSY; - } - if (!vha->flags.online) { ql_log(ql_log_warn, vha, 0x7019, "Host is not online.\n"); return -EIO; @@ -874,13 +860,6 @@ qla84xx_reset(struct fc_bsg_job *bsg_job) int rval = 0; uint32_t flag; - if (test_bit(ISP_ABORT_NEEDED, &vha->dpc_flags) || - test_bit(ABORT_ISP_ACTIVE, &vha->dpc_flags) || - test_bit(ISP_ABORT_RETRY, &vha->dpc_flags)) { - ql_log(ql_log_warn, vha, 0x702e, "Abort active or needed.\n"); - return -EBUSY; - } - if (!IS_QLA84XX(ha)) { ql_dbg(ql_dbg_user, vha, 0x702f, "Not 84xx, exiting.\n"); return -EINVAL; @@ -922,11 +901,6 @@ qla84xx_updatefw(struct fc_bsg_job *bsg_job) uint32_t flag; uint32_t fw_ver; - if (test_bit(ISP_ABORT_NEEDED, &vha->dpc_flags) || - test_bit(ABORT_ISP_ACTIVE, &vha->dpc_flags) || - test_bit(ISP_ABORT_RETRY, &vha->dpc_flags)) - return -EBUSY; - if (!IS_QLA84XX(ha)) { ql_dbg(ql_dbg_user, vha, 0x7032, "Not 84xx, exiting.\n"); @@ -1036,14 +1010,6 @@ qla84xx_mgmt_cmd(struct fc_bsg_job *bsg_job) uint32_t data_len = 0; uint32_t dma_direction = DMA_NONE; - if (test_bit(ISP_ABORT_NEEDED, &vha->dpc_flags) || - test_bit(ABORT_ISP_ACTIVE, &vha->dpc_flags) || - test_bit(ISP_ABORT_RETRY, &vha->dpc_flags)) { - ql_log(ql_log_warn, vha, 0x7039, - "Abort active or needed.\n"); - return -EBUSY; - } - if (!IS_QLA84XX(ha)) { ql_log(ql_log_warn, vha, 0x703a, "Not 84xx, exiting.\n"); @@ -1246,13 +1212,6 @@ qla24xx_iidma(struct fc_bsg_job *bsg_job) bsg_job->reply->reply_payload_rcv_len = 0; - if (test_bit(ISP_ABORT_NEEDED, &vha->dpc_flags) || - test_bit(ABORT_ISP_ACTIVE, &vha->dpc_flags) || - test_bit(ISP_ABORT_RETRY, &vha->dpc_flags)) { - ql_log(ql_log_warn, vha, 0x7045, "abort active or needed.\n"); - return -EBUSY; - } - if (!IS_IIDMA_CAPABLE(vha->hw)) { ql_log(ql_log_info, vha, 0x7046, "iiDMA not supported.\n"); return -EINVAL; @@ -1668,6 +1627,15 @@ qla24xx_bsg_request(struct fc_bsg_job *bsg_job) vha = shost_priv(host); } + if (qla2x00_reset_active(vha)) { + ql_dbg(ql_dbg_user, vha, 0x709f, + "BSG: ISP abort active/needed -- cmd=%d.\n", + bsg_job->request->msgcode); + bsg_job->reply->result = (DID_ERROR << 16); + bsg_job->job_done(bsg_job); + return -EBUSY; + } + ql_dbg(ql_dbg_user, vha, 0x7000, "Entered %s msgcode=0x%x.\n", __func__, bsg_job->request->msgcode); diff --git a/drivers/scsi/qla2xxx/qla_dbg.c b/drivers/scsi/qla2xxx/qla_dbg.c index 7c54624..45cbf0b 100644 --- a/drivers/scsi/qla2xxx/qla_dbg.c +++ b/drivers/scsi/qla2xxx/qla_dbg.c @@ -19,7 +19,8 @@ * | DPC Thread | 0x401c | | * | Async Events | 0x5057 | 0x5052 | * | Timer Routines | 0x6011 | 0x600e,0x600f | - * | User Space Interactions | 0x709e | | + * | User Space Interactions | 0x709e | 0x7018,0x702e | + * | | | 0x7039,0x7045 | * | Task Management | 0x803c | 0x8025-0x8026 | * | | | 0x800b,0x8039 | * | AER/EEH | 0x900f | | diff --git a/drivers/scsi/qla2xxx/qla_inline.h b/drivers/scsi/qla2xxx/qla_inline.h index 9902834..7cc4f36 100644 --- a/drivers/scsi/qla2xxx/qla_inline.h +++ b/drivers/scsi/qla2xxx/qla_inline.h @@ -131,3 +131,16 @@ qla2x00_hba_err_chk_enabled(srb_t *sp) } return 0; } + +static inline int +qla2x00_reset_active(scsi_qla_host_t *vha) +{ + scsi_qla_host_t *base_vha = pci_get_drvdata(vha->hw->pdev); + + /* Test appropriate base-vha and vha flags. */ + return test_bit(ISP_ABORT_NEEDED, &base_vha->dpc_flags) || + test_bit(ABORT_ISP_ACTIVE, &base_vha->dpc_flags) || + test_bit(ISP_ABORT_RETRY, &base_vha->dpc_flags) || + test_bit(ISP_ABORT_NEEDED, &vha->dpc_flags) || + test_bit(ABORT_ISP_ACTIVE, &vha->dpc_flags); +} -- cgit v1.1 From 4ba988db8d60eb16b7da69f9e3705b52ac8a6540 Mon Sep 17 00:00:00 2001 From: Andrew Vasquez Date: Thu, 9 Feb 2012 11:14:06 -0800 Subject: [SCSI] qla2xxx: Clear options-flags while issuing stop-firmware mbx command. Not clearing the options flags in mbx1 could lead the firmware into interpreting old data in mbx1 through mbx8. This could lead to inadvertent DMA read/write operations to stale memory. Signed-off-by: Andrew Vasquez Signed-off-by: Chad Dupuis Signed-off-by: James Bottomley --- drivers/scsi/qla2xxx/qla_mbx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c index 34344d3..8635722 100644 --- a/drivers/scsi/qla2xxx/qla_mbx.c +++ b/drivers/scsi/qla2xxx/qla_mbx.c @@ -2581,7 +2581,8 @@ qla2x00_stop_firmware(scsi_qla_host_t *vha) ql_dbg(ql_dbg_mbx, vha, 0x10a1, "Entered %s.\n", __func__); mcp->mb[0] = MBC_STOP_FIRMWARE; - mcp->out_mb = MBX_0; + mcp->mb[1] = 0; + mcp->out_mb = MBX_1|MBX_0; mcp->in_mb = MBX_0; mcp->tov = 5; mcp->flags = 0; -- cgit v1.1 From 7cb0eb1c17fa69535b6b2a80296c2f2ca300b800 Mon Sep 17 00:00:00 2001 From: Andrew Vasquez Date: Thu, 9 Feb 2012 11:14:07 -0800 Subject: [SCSI] qla2xxx: Remove errant clearing of MBX_INTERRUPT flag during CT-IOCB processing. This can cause instability in mailbox command state machine handling. Signed-off-by: Andrew Vasquez Signed-off-by: Chad Dupuis Signed-off-by: James Bottomley --- drivers/scsi/qla2xxx/qla_isr.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c index e804585..349843e 100644 --- a/drivers/scsi/qla2xxx/qla_isr.c +++ b/drivers/scsi/qla2xxx/qla_isr.c @@ -2090,7 +2090,6 @@ void qla24xx_process_response_queue(struct scsi_qla_host *vha, break; case CT_IOCB_TYPE: qla24xx_els_ct_entry(vha, rsp->req, pkt, CT_IOCB_TYPE); - clear_bit(MBX_INTERRUPT, &vha->hw->mbx_cmd_flags); break; case ELS_IOCB_TYPE: qla24xx_els_ct_entry(vha, rsp->req, pkt, ELS_IOCB_TYPE); -- cgit v1.1 From 67ddda353c4e26ba23a199ae64fdf283b669469b Mon Sep 17 00:00:00 2001 From: Andrew Vasquez Date: Thu, 9 Feb 2012 11:14:08 -0800 Subject: [SCSI] qla2xxx: Correct out of bounds read of ISP2200 mailbox registers. ISP2200 adapters only have 24 mailbox registers so read only that many. Reported-by: Olatunji Ruwase Signed-off-by: Andrew Vasquez Signed-off-by: Chad Dupuis Signed-off-by: James Bottomley --- drivers/scsi/qla2xxx/qla_def.h | 1 + drivers/scsi/qla2xxx/qla_os.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index a6a4eeb..af1003f 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -44,6 +44,7 @@ * ISP2100 HBAs. */ #define MAILBOX_REGISTER_COUNT_2100 8 +#define MAILBOX_REGISTER_COUNT_2200 24 #define MAILBOX_REGISTER_COUNT 32 #define QLA2200A_RISC_ROM_VER 4 diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 5fd89d7..7e617a6 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -2054,7 +2054,7 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) ha->nvram_data_off = ~0; ha->isp_ops = &qla2100_isp_ops; } else if (IS_QLA2200(ha)) { - ha->mbx_count = MAILBOX_REGISTER_COUNT; + ha->mbx_count = MAILBOX_REGISTER_COUNT_2200; req_length = REQUEST_ENTRY_CNT_2200; rsp_length = RESPONSE_ENTRY_CNT_2100; ha->max_loop_id = SNS_LAST_LOOP_ID_2100; -- cgit v1.1 From c7a992784240c1b16425c6a9606c9db0fc28fb0c Mon Sep 17 00:00:00 2001 From: Michael Christie Date: Thu, 9 Feb 2012 11:14:09 -0800 Subject: [SCSI] qla2xxx: Remove check for null fcport from host reset handler. Remove the check for a NULL fcport so that the host reset will run unconditionally to unwedge any commands before the device is offlined and to prevent a quick runthrough of the SCSI error handling. Signed-off-by: Michael Christie Signed-off-by: Chad Dupuis Signed-off-by: James Bottomley --- drivers/scsi/qla2xxx/qla_os.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 7e617a6..036030c 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -1131,7 +1131,6 @@ static int qla2xxx_eh_host_reset(struct scsi_cmnd *cmd) { scsi_qla_host_t *vha = shost_priv(cmd->device->host); - fc_port_t *fcport = (struct fc_port *) cmd->device->hostdata; struct qla_hw_data *ha = vha->hw; int ret = FAILED; unsigned int id, lun; @@ -1140,15 +1139,6 @@ qla2xxx_eh_host_reset(struct scsi_cmnd *cmd) id = cmd->device->id; lun = cmd->device->lun; - if (!fcport) { - return ret; - } - - ret = fc_block_scsi_eh(cmd); - if (ret != 0) - return ret; - ret = FAILED; - ql_log(ql_log_info, vha, 0x8018, "ADAPTER RESET ISSUED nexus=%ld:%d:%d.\n", vha->host_no, id, lun); -- cgit v1.1 From d33609607c5abc0b4b31d238e33f3ab075e2f96f Mon Sep 17 00:00:00 2001 From: Giridhar Malavali Date: Thu, 9 Feb 2012 11:14:10 -0800 Subject: [SCSI] qla2xxx: Complete mailbox command timedout to avoid initialization failures during next reset cycle. Complete the mailbox command timed out before initiating another abort cycle to recover so that mailbox commands issued during next reset cycle don't fail due to pending mailbox access timeout. Signed-off-by: Giridhar Malavali Signed-off-by: Chad Dupuis Signed-off-by: James Bottomley --- drivers/scsi/qla2xxx/qla_mbx.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c index 8635722..08f1d01 100644 --- a/drivers/scsi/qla2xxx/qla_mbx.c +++ b/drivers/scsi/qla2xxx/qla_mbx.c @@ -342,6 +342,8 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp) set_bit(ABORT_ISP_ACTIVE, &vha->dpc_flags); clear_bit(ISP_ABORT_NEEDED, &vha->dpc_flags); + /* Allow next mbx cmd to come in. */ + complete(&ha->mbx_cmd_comp); if (ha->isp_ops->abort_isp(vha)) { /* Failed. retry later. */ set_bit(ISP_ABORT_NEEDED, @@ -350,6 +352,7 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp) clear_bit(ABORT_ISP_ACTIVE, &vha->dpc_flags); ql_dbg(ql_dbg_mbx, base_vha, 0x101f, "Finished abort_isp.\n"); + goto mbx_done; } } } @@ -358,6 +361,7 @@ premature_exit: /* Allow next mbx cmd to come in. */ complete(&ha->mbx_cmd_comp); +mbx_done: if (rval) { ql_dbg(ql_dbg_mbx, base_vha, 0x1020, "**** Failed mbx[0]=%x, mb[1]=%x, mb[2]=%x, cmd=%x ****.\n", -- cgit v1.1 From 5a034bb3c33aad59769e7289716c8d1f075b3894 Mon Sep 17 00:00:00 2001 From: Shyam Sundar Date: Thu, 9 Feb 2012 11:14:11 -0800 Subject: [SCSI] qla2xxx: Remove resetting memory during device initialization for ISP82xx. With IOs running and PegHalt testing the system reboots when memory reset is performed during device initialization. Signed-off-by: Shyam Sundar Signed-off-by: Giridhar Malavali Signed-off-by: Chad Dupuis Signed-off-by: James Bottomley --- drivers/scsi/qla2xxx/qla_nx.c | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_nx.c b/drivers/scsi/qla2xxx/qla_nx.c index 1cd46cd..bb06c20 100644 --- a/drivers/scsi/qla2xxx/qla_nx.c +++ b/drivers/scsi/qla2xxx/qla_nx.c @@ -1165,19 +1165,6 @@ qla82xx_pinit_from_rom(scsi_qla_host_t *vha) qla82xx_wr_32(ha, QLA82XX_ROMUSB_GLB_SW_RESET, 0xfeffffff); else qla82xx_wr_32(ha, QLA82XX_ROMUSB_GLB_SW_RESET, 0xffffffff); - - /* reset ms */ - val = qla82xx_rd_32(ha, QLA82XX_CRB_QDR_NET + 0xe4); - val |= (1 << 1); - qla82xx_wr_32(ha, QLA82XX_CRB_QDR_NET + 0xe4, val); - msleep(20); - - /* unreset ms */ - val = qla82xx_rd_32(ha, QLA82XX_CRB_QDR_NET + 0xe4); - val &= ~(1 << 1); - qla82xx_wr_32(ha, QLA82XX_CRB_QDR_NET + 0xe4, val); - msleep(20); - qla82xx_rom_unlock(ha); /* Read the signature value from the flash. -- cgit v1.1 From 2cc97965e4f6e84792b958d4ad10631274d42834 Mon Sep 17 00:00:00 2001 From: Giridhar Malavali Date: Thu, 9 Feb 2012 11:14:12 -0800 Subject: [SCSI] qla2xxx: Proper detection of firmware abort error code for ISP82xx. Signed-off-by: Giridhar Malavali Signed-off-by: Chad Dupuis Signed-off-by: James Bottomley --- drivers/scsi/qla2xxx/qla_nx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/qla_nx.c b/drivers/scsi/qla2xxx/qla_nx.c index bb06c20..270ba31 100644 --- a/drivers/scsi/qla2xxx/qla_nx.c +++ b/drivers/scsi/qla2xxx/qla_nx.c @@ -3379,7 +3379,7 @@ void qla82xx_watchdog(scsi_qla_host_t *vha) QLA82XX_CRB_PEG_NET_3 + 0x3c), qla82xx_rd_32(ha, QLA82XX_CRB_PEG_NET_4 + 0x3c)); - if (LSW(MSB(halt_status)) == 0x67) + if (((halt_status & 0x1fffff00) >> 8) == 0x67) ql_log(ql_log_warn, vha, 0xb052, "Firmware aborted with " "error code 0x00006700. Device is " -- cgit v1.1 From 477e3e9ffc13e99918b916a294dcc2d306b677a5 Mon Sep 17 00:00:00 2001 From: Chad Dupuis Date: Thu, 9 Feb 2012 11:14:13 -0800 Subject: [SCSI] qla2xxx: Update version number to 8.03.07.13-k. Signed-off-by: Giridhar Malavali Signed-off-by: Chad Dupuis Signed-off-by: James Bottomley --- drivers/scsi/qla2xxx/qla_version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/qla_version.h b/drivers/scsi/qla2xxx/qla_version.h index 23f33a6..29d780c 100644 --- a/drivers/scsi/qla2xxx/qla_version.h +++ b/drivers/scsi/qla2xxx/qla_version.h @@ -7,7 +7,7 @@ /* * Driver version */ -#define QLA2XXX_VERSION "8.03.07.12-k" +#define QLA2XXX_VERSION "8.03.07.13-k" #define QLA_DRIVER_MAJOR_VER 8 #define QLA_DRIVER_MINOR_VER 3 -- cgit v1.1 From 267a6ad4aefaafbde607804c60945bcf97f91c1b Mon Sep 17 00:00:00 2001 From: Huajun Li Date: Sun, 12 Feb 2012 19:59:14 +0800 Subject: [SCSI] scsi_scan: Fix 'Poison overwritten' warning caused by using freed 'shost' In do_scan_async(), calling scsi_autopm_put_host(shost) may reference freed shost, and cause Posison overwitten warning. Yes, this case can happen, for example, an USB is disconnected just when do_scan_async() thread starts to run, then scsi_host_put() called in scsi_finish_async_scan() will lead to shost be freed(because the refcount of shost->shost_gendev decreases to 1 after USB disconnects), at this point, if references shost again, system will show following warning msg. To make scsi_autopm_put_host(shost) always reference a valid shost, put it just before scsi_host_put() in function scsi_finish_async_scan(). [ 299.281565] ============================================================================= [ 299.281634] BUG kmalloc-4096 (Tainted: G I ): Poison overwritten [ 299.281682] ----------------------------------------------------------------------------- [ 299.281684] [ 299.281752] INFO: 0xffff880056c305d0-0xffff880056c305d0. First byte 0x6a instead of 0x6b [ 299.281816] INFO: Allocated in scsi_host_alloc+0x4a/0x490 age=1688 cpu=1 pid=2004 [ 299.281870] __slab_alloc+0x617/0x6c1 [ 299.281901] __kmalloc+0x28c/0x2e0 [ 299.281931] scsi_host_alloc+0x4a/0x490 [ 299.281966] usb_stor_probe1+0x5b/0xc40 [usb_storage] [ 299.282010] storage_probe+0xa4/0xe0 [usb_storage] [ 299.282062] usb_probe_interface+0x172/0x330 [usbcore] [ 299.282105] driver_probe_device+0x257/0x3b0 [ 299.282138] __driver_attach+0x103/0x110 [ 299.282171] bus_for_each_dev+0x8e/0xe0 [ 299.282201] driver_attach+0x26/0x30 [ 299.282230] bus_add_driver+0x1c4/0x430 [ 299.282260] driver_register+0xb6/0x230 [ 299.282298] usb_register_driver+0xe5/0x270 [usbcore] [ 299.282337] 0xffffffffa04ab03d [ 299.282364] do_one_initcall+0x47/0x230 [ 299.282396] sys_init_module+0xa0f/0x1fe0 [ 299.282429] INFO: Freed in scsi_host_dev_release+0x18a/0x1d0 age=85 cpu=0 pid=2008 [ 299.282482] __slab_free+0x3c/0x2a1 [ 299.282510] kfree+0x296/0x310 [ 299.282536] scsi_host_dev_release+0x18a/0x1d0 [ 299.282574] device_release+0x74/0x100 [ 299.282606] kobject_release+0xc7/0x2a0 [ 299.282637] kobject_put+0x54/0xa0 [ 299.282668] put_device+0x27/0x40 [ 299.282694] scsi_host_put+0x1d/0x30 [ 299.282723] do_scan_async+0x1fc/0x2b0 [ 299.282753] kthread+0xdf/0xf0 [ 299.282782] kernel_thread_helper+0x4/0x10 [ 299.282817] INFO: Slab 0xffffea00015b0c00 objects=7 used=7 fp=0x (null) flags=0x100000000004080 [ 299.282882] INFO: Object 0xffff880056c30000 @offset=0 fp=0x (null) [ 299.282884] ... Signed-off-by: Huajun Li Cc: stable@kernel.org Acked-by: Alan Stern Signed-off-by: James Bottomley --- drivers/scsi/scsi_scan.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index 89da43f..29c4c04 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -1815,6 +1815,7 @@ static void scsi_finish_async_scan(struct async_scan_data *data) } spin_unlock(&async_scan_lock); + scsi_autopm_put_host(shost); scsi_host_put(shost); kfree(data); } @@ -1841,7 +1842,6 @@ static int do_scan_async(void *_data) do_scsi_scan_host(shost); scsi_finish_async_scan(data); - scsi_autopm_put_host(shost); return 0; } @@ -1869,7 +1869,7 @@ void scsi_scan_host(struct Scsi_Host *shost) p = kthread_run(do_scan_async, data, "scsi_scan_%d", shost->host_no); if (IS_ERR(p)) do_scan_async(data); - /* scsi_autopm_put_host(shost) is called in do_scan_async() */ + /* scsi_autopm_put_host(shost) is called in scsi_finish_async_scan() */ } EXPORT_SYMBOL(scsi_scan_host); -- cgit v1.1 From fea6d607e154cf96ab22254ccb48addfd43d4cb5 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Fri, 17 Feb 2012 16:25:08 -0500 Subject: [SCSI] scsi_pm: Fix bug in the SCSI power management handler This patch (as1520) fixes a bug in the SCSI layer's power management implementation. LUN scanning can be carried out asynchronously in do_scan_async(), and sd uses an asynchronous thread for the time-consuming parts of disk probing in sd_probe_async(). Currently nothing coordinates these async threads with system sleep transitions; they can and do attempt to continue scanning/probing SCSI devices even after the host adapter has been suspended. As one might expect, the outcome is not ideal. This is what the "prepare" stage of system suspend was created for. After the prepare callback has been called for a host, target, or device, drivers are not allowed to register any children underneath them. Currently the SCSI prepare callback is not implemented; this patch rectifies that omission. For SCSI hosts, the prepare routine calls scsi_complete_async_scans() to wait until async scanning is finished. It might be slightly more efficient to wait only until the host in question has been scanned, but there's currently no way to do that. Besides, during a sleep transition we will ultimately have to wait until all the host scanning has finished anyway. For SCSI devices, the prepare routine calls async_synchronize_full() to wait until sd probing is finished. The routine does nothing for SCSI targets, because asynchronous target scanning is done only as part of host scanning. Signed-off-by: Alan Stern CC: Signed-off-by: James Bottomley --- drivers/scsi/scsi_pm.c | 16 ++++++++++++++++ drivers/scsi/scsi_priv.h | 1 + 2 files changed, 17 insertions(+) diff --git a/drivers/scsi/scsi_pm.c b/drivers/scsi/scsi_pm.c index bf8bf79..c467064 100644 --- a/drivers/scsi/scsi_pm.c +++ b/drivers/scsi/scsi_pm.c @@ -7,6 +7,7 @@ #include #include +#include #include #include @@ -92,6 +93,19 @@ static int scsi_bus_resume_common(struct device *dev) return err; } +static int scsi_bus_prepare(struct device *dev) +{ + if (scsi_is_sdev_device(dev)) { + /* sd probing uses async_schedule. Wait until it finishes. */ + async_synchronize_full(); + + } else if (scsi_is_host_device(dev)) { + /* Wait until async scanning is finished */ + scsi_complete_async_scans(); + } + return 0; +} + static int scsi_bus_suspend(struct device *dev) { return scsi_bus_suspend_common(dev, PMSG_SUSPEND); @@ -110,6 +124,7 @@ static int scsi_bus_poweroff(struct device *dev) #else /* CONFIG_PM_SLEEP */ #define scsi_bus_resume_common NULL +#define scsi_bus_prepare NULL #define scsi_bus_suspend NULL #define scsi_bus_freeze NULL #define scsi_bus_poweroff NULL @@ -218,6 +233,7 @@ void scsi_autopm_put_host(struct Scsi_Host *shost) #endif /* CONFIG_PM_RUNTIME */ const struct dev_pm_ops scsi_bus_pm_ops = { + .prepare = scsi_bus_prepare, .suspend = scsi_bus_suspend, .resume = scsi_bus_resume_common, .freeze = scsi_bus_freeze, diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h index 68eadd1..be4fa6d 100644 --- a/drivers/scsi/scsi_priv.h +++ b/drivers/scsi/scsi_priv.h @@ -109,6 +109,7 @@ extern void scsi_exit_procfs(void); #endif /* CONFIG_PROC_FS */ /* scsi_scan.c */ +extern int scsi_complete_async_scans(void); extern int scsi_scan_host_selected(struct Scsi_Host *, unsigned int, unsigned int, unsigned int, int); extern void scsi_forget_host(struct Scsi_Host *); -- cgit v1.1 From 3569e5374df66a42ab66368b8bbb075e81d4e85c Mon Sep 17 00:00:00 2001 From: "Moger, Babu" Date: Thu, 2 Feb 2012 15:21:54 +0000 Subject: [SCSI] scsi_dh_rdac: Fix for unbalanced reference count This patch fixes an unbalanced refcount issue. Elevating the lock for both kref_put and also for controller node deletion. Previously, controller deletion was protected but the not the kref_put. This was causing the other thread to pick up the controller structure which was already kref'd zero. This was causing the following WARN_ON and also sometimes panic. WARNING: at lib/kref.c:43 kref_get+0x2d/0x30() (Not tainted) Hardware name: IBM System x3655 -[7985AC1]- Modules linked in: fuse scsi_dh_rdac autofs4 nfs lockd fscache nfs_acl auth_rpcgss sunrpc 8021q garp stp llc ipv6 ib_srp(U) scsi_transport_srp scsi_tgt ib_cm(U) ib_sa(U) ib_uverbs(U) ib_umad(U) mlx4_ib(U) mlx4_core(U) ib_mthca(U) ib_mad(U) ib_core(U) dm_mirror dm_region_hash dm_log dm_round_robin dm_multipath uinput bnx2 ses enclosure sg ibmpex ibmaem ipmi_msghandler serio_raw k8temp hwmon amd64_edac_mod edac_core edac_mce_amd shpchp i2c_piix4 ext4 mbcache jbd2 sr_mod cdrom sd_mod crc_t10dif sata_svw pata_acpi ata_generic pata_serverworks aacraid radeon ttm drm_kms_helper drm i2c_algo_bit i2c_core dm_mod [last unloaded: freq_table] Pid: 13735, comm: srp_daemon Not tainted 2.6.32-71.el6.x86_64 #1 Call Trace: [] warn_slowpath_common+0x87/0xc0 [] warn_slowpath_null+0x1a/0x20 [] kref_get+0x2d/0x30 [] rdac_bus_attach+0x459/0x580 [scsi_dh_rdac] [] scsi_dh_handler_attach+0x2a/0x80 [] scsi_dh_notifier+0x9b/0xa0 [] notifier_call_chain+0x55/0x80 [] __blocking_notifier_call_chain+0x5a/0x80 [] blocking_notifier_call_chain+0x16/0x20 [] device_add+0x515/0x640 [] ? attribute_container_device_trigger+0xc4/0xe0 [] scsi_sysfs_add_sdev+0x89/0x2c0 [] scsi_probe_and_add_lun+0xea6/0xed0 [] ? scsi_alloc_target+0x292/0x2d0 [] __scsi_scan_target+0x121/0x750 [] ? sysfs_create_file+0x26/0x30 [] ? device_create_file+0x19/0x20 [] ? attribute_container_add_attrs+0x78/0x90 [] ? klist_next+0x4c/0xf0 [] ? transport_configure+0x0/0x20 [] ? attribute_container_device_trigger+0xc4/0xe0 [] scsi_scan_target+0xd0/0xe0 [] srp_create_target+0x75a/0x890 [ib_srp] [] dev_attr_store+0x20/0x30 [] sysfs_write_file+0xe5/0x170 [] vfs_write+0xb8/0x1a0 [] ? audit_syscall_entry+0x272/0x2a0 [] sys_write+0x51/0x90 [] system_call_fastpath+0x16/0x1b Signed-off-by: Babu Moger Acked-by: Mike Snitzer Signed-off-by: James Bottomley --- drivers/scsi/device_handler/scsi_dh_rdac.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/drivers/scsi/device_handler/scsi_dh_rdac.c b/drivers/scsi/device_handler/scsi_dh_rdac.c index 53a31c7..20c4557f 100644 --- a/drivers/scsi/device_handler/scsi_dh_rdac.c +++ b/drivers/scsi/device_handler/scsi_dh_rdac.c @@ -364,10 +364,7 @@ static void release_controller(struct kref *kref) struct rdac_controller *ctlr; ctlr = container_of(kref, struct rdac_controller, kref); - flush_workqueue(kmpath_rdacd); - spin_lock(&list_lock); list_del(&ctlr->node); - spin_unlock(&list_lock); kfree(ctlr); } @@ -376,20 +373,17 @@ static struct rdac_controller *get_controller(int index, char *array_name, { struct rdac_controller *ctlr, *tmp; - spin_lock(&list_lock); - list_for_each_entry(tmp, &ctlr_list, node) { if ((memcmp(tmp->array_id, array_id, UNIQUE_ID_LEN) == 0) && (tmp->index == index) && (tmp->host == sdev->host)) { kref_get(&tmp->kref); - spin_unlock(&list_lock); return tmp; } } ctlr = kmalloc(sizeof(*ctlr), GFP_ATOMIC); if (!ctlr) - goto done; + return NULL; /* initialize fields of controller */ memcpy(ctlr->array_id, array_id, UNIQUE_ID_LEN); @@ -405,8 +399,7 @@ static struct rdac_controller *get_controller(int index, char *array_name, INIT_WORK(&ctlr->ms_work, send_mode_select); INIT_LIST_HEAD(&ctlr->ms_head); list_add(&ctlr->node, &ctlr_list); -done: - spin_unlock(&list_lock); + return ctlr; } @@ -517,9 +510,12 @@ static int initialize_controller(struct scsi_device *sdev, index = 0; else index = 1; + + spin_lock(&list_lock); h->ctlr = get_controller(index, array_name, array_id, sdev); if (!h->ctlr) err = SCSI_DH_RES_TEMP_UNAVAIL; + spin_unlock(&list_lock); } return err; } @@ -906,7 +902,9 @@ static int rdac_bus_attach(struct scsi_device *sdev) return 0; clean_ctlr: + spin_lock(&list_lock); kref_put(&h->ctlr->kref, release_controller); + spin_unlock(&list_lock); failed: kfree(scsi_dh_data); @@ -921,14 +919,19 @@ static void rdac_bus_detach( struct scsi_device *sdev ) struct rdac_dh_data *h; unsigned long flags; - spin_lock_irqsave(sdev->request_queue->queue_lock, flags); scsi_dh_data = sdev->scsi_dh_data; + h = (struct rdac_dh_data *) scsi_dh_data->buf; + if (h->ctlr && h->ctlr->ms_queued) + flush_workqueue(kmpath_rdacd); + + spin_lock_irqsave(sdev->request_queue->queue_lock, flags); sdev->scsi_dh_data = NULL; spin_unlock_irqrestore(sdev->request_queue->queue_lock, flags); - h = (struct rdac_dh_data *) scsi_dh_data->buf; + spin_lock(&list_lock); if (h->ctlr) kref_put(&h->ctlr->kref, release_controller); + spin_unlock(&list_lock); kfree(scsi_dh_data); module_put(THIS_MODULE); sdev_printk(KERN_NOTICE, sdev, "%s: Detached\n", RDAC_NAME); -- cgit v1.1