From d0efab26f89506387a1bde898556660e06d7eb15 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Fri, 2 Sep 2011 19:31:46 +0400 Subject: [SCSI] aacraid: reset should disable MSI interrupt scsi reset on hardware with enabled MSI interrupts generates WARNING message [11027.798722] aacraid: Host adapter abort request (0,0,0,0) [11027.798814] aacraid: Host adapter reset request. SCSI hang ? [11087.762237] aacraid: SCSI bus appears hung [11135.082543] ------------[ cut here ]------------ [11135.082646] WARNING: at drivers/pci/msi.c:658 pci_enable_msi_block+0x251/0x290() Signed-off-by: Vasily Averin Acked-by: Mark Salyzyn Cc: stable@kernel.org Signed-off-by: James Bottomley --- drivers/scsi/aacraid/commsup.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/scsi') diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c index e7d0d47..e5f2d7d 100644 --- a/drivers/scsi/aacraid/commsup.c +++ b/drivers/scsi/aacraid/commsup.c @@ -1283,6 +1283,8 @@ static int _aac_reset_adapter(struct aac_dev *aac, int forced) kfree(aac->queues); aac->queues = NULL; free_irq(aac->pdev->irq, aac); + if (aac->msi) + pci_disable_msi(aac->pdev); kfree(aac->fsa_dev); aac->fsa_dev = NULL; quirks = aac_get_driver_ident(index)->quirks; -- cgit v1.1 From 24926dadc41cc566e974022b0e66231b82c6375f Mon Sep 17 00:00:00 2001 From: Mark Salyzyn Date: Thu, 1 Sep 2011 06:11:17 -0700 Subject: [SCSI] libsas: fix failure to revalidate domain for anything but the first expander child. In an enclosure model where there are chaining expanders to a large body of storage, it was discovered that libsas, responding to a broadcast event change, would only revalidate the domain of first child expander in the list. The issue is that the pointer value to the discovered source device was used to break out of the loop, rather than the content of the pointer. This still remains non-compliant as the revalidate domain code is supposed to loop through all child expanders, and not stop at the first one it finds that reports a change count. However, the design of this routine does not allow multiple device discoveries and that would be a more complicated set of patches reserved for another day. We are fixing the glaring bug rather than refactoring the code. Signed-off-by: Mark Salyzyn Cc: stable@kernel.org Signed-off-by: James Bottomley --- drivers/scsi/libsas/sas_expander.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/scsi') diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c index f84084b..c9e3dc0 100644 --- a/drivers/scsi/libsas/sas_expander.c +++ b/drivers/scsi/libsas/sas_expander.c @@ -1721,7 +1721,7 @@ static int sas_find_bcast_dev(struct domain_device *dev, list_for_each_entry(ch, &ex->children, siblings) { if (ch->dev_type == EDGE_DEV || ch->dev_type == FANOUT_DEV) { res = sas_find_bcast_dev(ch, src_dev); - if (src_dev) + if (*src_dev) return res; } } -- cgit v1.1 From 3538a001ea7db13fa1be2966b71f69d808acff01 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 31 Aug 2011 15:02:00 -0700 Subject: [SCSI] scsi: qla4xxx needs libiscsi.o qla4xxx driver needs to be linked with libiscsi.o to fix build errors. This happens when no other drivers that use libiscsi.o are enabled. ERROR: "iscsi_conn_stop" [drivers/scsi/qla4xxx/qla4xxx.ko] undefined! ERROR: "iscsi_conn_get_addr_param" [drivers/scsi/qla4xxx/qla4xxx.ko] undefined! ERROR: "iscsi_session_teardown" [drivers/scsi/qla4xxx/qla4xxx.ko] undefined! ERROR: "iscsi_host_alloc" [drivers/scsi/qla4xxx/qla4xxx.ko] undefined! ERROR: "iscsi_conn_start" [drivers/scsi/qla4xxx/qla4xxx.ko] undefined! ERROR: "iscsi_conn_send_pdu" [drivers/scsi/qla4xxx/qla4xxx.ko] undefined! ERROR: "iscsi_session_get_param" [drivers/scsi/qla4xxx/qla4xxx.ko] undefined! ERROR: "iscsi_conn_get_param" [drivers/scsi/qla4xxx/qla4xxx.ko] undefined! ERROR: "iscsi_set_param" [drivers/scsi/qla4xxx/qla4xxx.ko] undefined! ERROR: "iscsi_session_failure" [drivers/scsi/qla4xxx/qla4xxx.ko] undefined! ERROR: "iscsi_complete_pdu" [drivers/scsi/qla4xxx/qla4xxx.ko] undefined! ERROR: "iscsi_session_setup" [drivers/scsi/qla4xxx/qla4xxx.ko] undefined! ERROR: "iscsi_conn_bind" [drivers/scsi/qla4xxx/qla4xxx.ko] undefined! ERROR: "iscsi_conn_setup" [drivers/scsi/qla4xxx/qla4xxx.ko] undefined! ERROR: "iscsi_itt_to_task" [drivers/scsi/qla4xxx/qla4xxx.ko] undefined! Signed-off-by: Randy Dunlap Reviewed-by: Mike Christie Cc: stable@kernel.org Signed-off-by: James Bottomley --- drivers/scsi/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/scsi') diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile index 3c08f53..6153a66 100644 --- a/drivers/scsi/Makefile +++ b/drivers/scsi/Makefile @@ -88,7 +88,7 @@ obj-$(CONFIG_SCSI_QLOGIC_FAS) += qlogicfas408.o qlogicfas.o obj-$(CONFIG_PCMCIA_QLOGIC) += qlogicfas408.o obj-$(CONFIG_SCSI_QLOGIC_1280) += qla1280.o obj-$(CONFIG_SCSI_QLA_FC) += qla2xxx/ -obj-$(CONFIG_SCSI_QLA_ISCSI) += qla4xxx/ +obj-$(CONFIG_SCSI_QLA_ISCSI) += libiscsi.o qla4xxx/ obj-$(CONFIG_SCSI_LPFC) += lpfc/ obj-$(CONFIG_SCSI_BFA_FC) += bfa/ obj-$(CONFIG_SCSI_PAS16) += pas16.o -- cgit v1.1 From 2b7fe39babcbd67458a0c4fbed5704089e0c8c1f Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 23 Sep 2011 15:43:54 -0700 Subject: scsi: SCSI_ISCI needs to select SCSI_SAS_HOST_SMP, fixes build error SCSI_ISCI needs to select SCSI_SAS_HOST_SMP to ensure that all needed symbols are available to it. Fixes this build error: ERROR: "try_test_sas_gpio_gp_bit" [drivers/scsi/isci/isci.ko] undefined! Signed-off-by: Randy Dunlap Signed-off-by: Linus Torvalds --- drivers/scsi/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/scsi') diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig index 8d9dae8..3878b73 100644 --- a/drivers/scsi/Kconfig +++ b/drivers/scsi/Kconfig @@ -837,6 +837,7 @@ config SCSI_ISCI # (temporary): known alpha quality driver depends on EXPERIMENTAL select SCSI_SAS_LIBSAS + select SCSI_SAS_HOST_SMP ---help--- This driver supports the 6Gb/s SAS capabilities of the storage control unit found in the Intel(R) C600 series chipset. -- cgit v1.1 From 8ec9c7fb15fd8edf6b34555f5b498033121b2173 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 23 Sep 2011 15:40:50 -0700 Subject: scsi: fix qla2xxx printk format warning sector_t can be different types, so cast it to its largest possible type. drivers/scsi/qla2xxx/qla_isr.c:1509:5: warning: format '%lx' expects type 'long unsigned int', but argument 5 has type 'sector_t' Signed-off-by: Randy Dunlap Signed-off-by: Linus Torvalds --- drivers/scsi/qla2xxx/qla_isr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/scsi') diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c index 646fc52..8a7591f 100644 --- a/drivers/scsi/qla2xxx/qla_isr.c +++ b/drivers/scsi/qla2xxx/qla_isr.c @@ -1507,8 +1507,8 @@ qla2x00_handle_dif_error(srb_t *sp, struct sts_entry_24xx *sts24) if (k != blocks_done) { qla_printk(KERN_WARNING, sp->fcport->vha->hw, - "unexpected tag values tag:lba=%x:%lx)\n", - e_ref_tag, lba_s); + "unexpected tag values tag:lba=%x:%llx)\n", + e_ref_tag, (unsigned long long)lba_s); return 1; } -- cgit v1.1 From e48f129c2f200dde8899f6ea5c6e7173674fc482 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Tue, 6 Sep 2011 13:59:13 -0400 Subject: [SCSI] cxgb3i: convert cdev->l2opt to use rcu to prevent NULL dereference This oops was reported recently: d:mon> e cpu 0xd: Vector: 300 (Data Access) at [c0000000fd4c7120] pc: d00000000076f194: .t3_l2t_get+0x44/0x524 [cxgb3] lr: d000000000b02108: .init_act_open+0x150/0x3d4 [cxgb3i] sp: c0000000fd4c73a0 msr: 8000000000009032 dar: 0 dsisr: 40000000 current = 0xc0000000fd640d40 paca = 0xc00000000054ff80 pid = 5085, comm = iscsid d:mon> t [c0000000fd4c7450] d000000000b02108 .init_act_open+0x150/0x3d4 [cxgb3i] [c0000000fd4c7500] d000000000e45378 .cxgbi_ep_connect+0x784/0x8e8 [libcxgbi] [c0000000fd4c7650] d000000000db33f0 .iscsi_if_rx+0x71c/0xb18 [scsi_transport_iscsi2] [c0000000fd4c7740] c000000000370c9c .netlink_data_ready+0x40/0xa4 [c0000000fd4c77c0] c00000000036f010 .netlink_sendskb+0x4c/0x9c [c0000000fd4c7850] c000000000370c18 .netlink_sendmsg+0x358/0x39c [c0000000fd4c7950] c00000000033be24 .sock_sendmsg+0x114/0x1b8 [c0000000fd4c7b50] c00000000033d208 .sys_sendmsg+0x218/0x2ac [c0000000fd4c7d70] c00000000033f55c .sys_socketcall+0x228/0x27c [c0000000fd4c7e30] c0000000000086a4 syscall_exit+0x0/0x40 --- Exception: c01 (System Call) at 00000080da560cfc The root cause was an EEH error, which sent us down the offload_close path in the cxgb3 driver, which in turn sets cdev->l2opt to NULL, without regard for upper layer driver (like the cxgbi drivers) which might have execution contexts in the middle of its use. The result is the oops above, when t3_l2t_get attempts to dereference L2DATA(cdev)->nentries in arp_hash right after the EEH error handler sets it to NULL. The fix is to prevent the setting of the NULL pointer until after there are no further users of it. The t3cdev->l2opt pointer is now converted to be an rcu pointer and the L2DATA macro is now called under the protection of the rcu_read_lock(). When the EEH error path: t3_adapter_error->offload_close->cxgb3_offload_deactivate Is exectured, setting of that l2opt pointer to NULL, is now gated on an rcu quiescence point, preventing, allowing L2DATA callers to safely check for a NULL pointer without concern that the underlying data will be freeded before the pointer is dereferenced. This has been tested by the reporter and shown to fix the reproted oops [nhorman: fix up unitinialised variable reported by Dan Carpenter] Signed-off-by: Neil Horman Reviewed-by: Karen Xie Cc: stable@kernel.org Signed-off-by: James Bottomley --- drivers/scsi/cxgbi/cxgb3i/cxgb3i.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/scsi') diff --git a/drivers/scsi/cxgbi/cxgb3i/cxgb3i.c b/drivers/scsi/cxgbi/cxgb3i/cxgb3i.c index bd22041..f586448 100644 --- a/drivers/scsi/cxgbi/cxgb3i/cxgb3i.c +++ b/drivers/scsi/cxgbi/cxgb3i/cxgb3i.c @@ -913,7 +913,7 @@ static void l2t_put(struct cxgbi_sock *csk) struct t3cdev *t3dev = (struct t3cdev *)csk->cdev->lldev; if (csk->l2t) { - l2t_release(L2DATA(t3dev), csk->l2t); + l2t_release(t3dev, csk->l2t); csk->l2t = NULL; cxgbi_sock_put(csk); } -- cgit v1.1 From 96067723e46b0dd24ae7b934085ab4eff4d26a1b Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Sun, 18 Sep 2011 18:56:20 +0400 Subject: [SCSI] 3w-9xxx: fix iommu_iova leak Following reports on the list, it looks like the 3e-9xxx driver will leak dma mappings every time we get a transient queueing error back from the card. This is because it maps the sg list in the routine that sends the command, but doesn't unmap again in the transient failure path (even though the command is sent back to the block layer). Fix by unmapping before returning the status. Reported-by: Chris Boot Tested-by: Chris Boot Acked-by: Adam Radford Cc: stable@kernel.org Signed-off-by: James Bottomley --- drivers/scsi/3w-9xxx.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/scsi') diff --git a/drivers/scsi/3w-9xxx.c b/drivers/scsi/3w-9xxx.c index b7bd5b0..3868ab2 100644 --- a/drivers/scsi/3w-9xxx.c +++ b/drivers/scsi/3w-9xxx.c @@ -1800,10 +1800,12 @@ static int twa_scsi_queue_lck(struct scsi_cmnd *SCpnt, void (*done)(struct scsi_ switch (retval) { case SCSI_MLQUEUE_HOST_BUSY: twa_free_request_id(tw_dev, request_id); + twa_unmap_scsi_data(tw_dev, request_id); break; case 1: tw_dev->state[request_id] = TW_S_COMPLETED; twa_free_request_id(tw_dev, request_id); + twa_unmap_scsi_data(tw_dev, request_id); SCpnt->result = (DID_ERROR << 16); done(SCpnt); retval = 0; -- cgit v1.1 From 9bfacd01dc9b7519e1e6da12b01963550b9d09a2 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Thu, 22 Sep 2011 00:06:05 -0700 Subject: [SCSI] qla2xxx: Fix crash in qla2x00_abort_all_cmds() on unload I hit a crash in qla2x00_abort_all_cmds() if the qla2xxx module is unloaded right after it is loaded. I debugged this down to the abort handling improperly treating a command of type SRB_ADISC_CMD as if it had a bsg_job to complete when that command actually uses the iocb_cmd part of the union. (I guess to hit this one has to unload the module while the async FC initialization is still in progress) It seems we should only look for a bsg_job if type is SRB_ELS_CMD_RPT, SRB_ELS_CMD_HST or SRB_CT_CMD, so switch the test to make that explicit. Signed-off-by: Roland Dreier Acked-by: Chad Dupuis Cc: stable@kernel.org Signed-off-by: James Bottomley --- drivers/scsi/qla2xxx/qla_os.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'drivers/scsi') diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 4cace3f..1e69527 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -1328,10 +1328,9 @@ qla2x00_abort_all_cmds(scsi_qla_host_t *vha, int res) qla2x00_sp_compl(ha, sp); } else { ctx = sp->ctx; - if (ctx->type == SRB_LOGIN_CMD || - ctx->type == SRB_LOGOUT_CMD) { - ctx->u.iocb_cmd->free(sp); - } else { + if (ctx->type == SRB_ELS_CMD_RPT || + ctx->type == SRB_ELS_CMD_HST || + ctx->type == SRB_CT_CMD) { struct fc_bsg_job *bsg_job = ctx->u.bsg_job; if (bsg_job->request->msgcode @@ -1343,6 +1342,8 @@ qla2x00_abort_all_cmds(scsi_qla_host_t *vha, int res) kfree(sp->ctx); mempool_free(sp, ha->srb_mempool); + } else { + ctx->u.iocb_cmd->free(sp); } } } -- cgit v1.1 From a73914c35b05d80f8ce78288e10056c91090b666 Mon Sep 17 00:00:00 2001 From: Mark Salyzyn Date: Thu, 22 Sep 2011 08:32:23 -0700 Subject: [SCSI] libsas: fix panic when single phy is disabled on a wide port When a wide port is being utilized to a target, if one disables only one of the phys, we get an OS crash: BUG: unable to handle kernel NULL pointer dereference at 0000000000000238 IP: [] mutex_lock+0x21/0x50 PGD 4103f5067 PUD 41dba9067 PMD 0 Oops: 0002 [#1] SMP last sysfs file: /sys/bus/pci/slots/5/address CPU 0 Modules linked in: pm8001(U) ses enclosure fuse nfsd exportfs autofs4 ipmi_devintf ipmi_si ipmi_msghandler nfs lockd fscache nfs_acl auth_rpcgss 8021q fcoe libfcoe garp libfc scsi_transport_fc stp scsi_tgt llc sunrpc cpufreq_ondemand acpi_cpufreq freq_table ipv6 sr_mod cdrom dm_mirror dm_region_hash dm_log uinput sg i2c_i801 i2c_core iTCO_wdt iTCO_vendor_support e1000e mlx4_ib ib_mad ib_core mlx4_en mlx4_core ext3 jbd mbcache sd_mod crc_t10dif usb_storage ata_generic pata_acpi ata_piix libsas(U) scsi_transport_sas dm_mod [last unloaded: pm8001] Modules linked in: pm8001(U) ses enclosure fuse nfsd exportfs autofs4 ipmi_devintf ipmi_si ipmi_msghandler nfs lockd fscache nfs_acl auth_rpcgss 8021q fcoe libfcoe garp libfc scsi_transport_fc stp scsi_tgt llc sunrpc cpufreq_ondemand acpi_cpufreq freq_table ipv6 sr_mod cdrom dm_mirror dm_region_hash dm_log uinput sg i2c_i801 i2c_core iTCO_wdt iTCO_vendor_support e1000e mlx4_ib ib_mad ib_core mlx4_en mlx4_core ext3 jbd mbcache sd_mod crc_t10dif usb_storage ata_generic pata_acpi ata_piix libsas(U) scsi_transport_sas dm_mod [last unloaded: pm8001] Pid: 5146, comm: scsi_wq_5 Not tainted 2.6.32-71.29.1.el6.lustre.7.x86_64 #1 Storage Server RIP: 0010:[] [] mutex_lock+0x21/0x50 RSP: 0018:ffff8803e4e33d30 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000238 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffff8803e664c800 RDI: 0000000000000238 RBP: ffff8803e4e33d40 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000001 R12: 0000000000000000 R13: 0000000000000238 R14: ffff88041acb7200 R15: ffff88041c51ada0 FS: 0000000000000000(0000) GS:ffff880028200000(0000) knlGS:0000000000000000 CS: 0010 DS: 0018 ES: 0018 CR0: 000000008005003b CR2: 0000000000000238 CR3: 0000000410143000 CR4: 00000000000006f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process scsi_wq_5 (pid: 5146, threadinfo ffff8803e4e32000, task ffff8803e4e294a0) Stack: ffff8803e664c800 0000000000000000 ffff8803e4e33d70 ffffffffa001f06e <0> ffff8803e4e33d60 ffff88041c51ada0 ffff88041acb7200 ffff88041bc0aa00 <0> ffff8803e4e33d90 ffffffffa0032b6c 0000000000000014 ffff88041acb7200 Call Trace: [] sas_port_delete_phy+0x2e/0xa0 [scsi_transport_sas] [] sas_unregister_devs_sas_addr+0xac/0xe0 [libsas] [] sas_ex_revalidate_domain+0x204/0x330 [libsas] [] ? sas_revalidate_domain+0x0/0x90 [libsas] [] sas_revalidate_domain+0x65/0x90 [libsas] [] worker_thread+0x170/0x2a0 [] ? autoremove_wake_function+0x0/0x40 [] ? worker_thread+0x0/0x2a0 [] kthread+0x96/0xa0 [] child_rip+0xa/0x20 [] ? kthread+0x0/0xa0 [] ? child_rip+0x0/0x20 Code: ff ff 85 c0 75 ed eb d6 66 90 55 48 89 e5 48 83 ec 10 48 89 1c 24 4c 89 64 24 08 0f 1f 44 00 00 48 89 fb e8 92 f4 ff ff 48 89 df ff 0f 79 05 e8 25 00 00 00 65 48 8b 04 25 08 cc 00 00 48 2d RIP [] mutex_lock+0x21/0x50 RSP CR2: 0000000000000238 The following patch is admittedly a band-aid, and does not solve the root cause, but it still is a good candidate for hardening as a pointer check before reference. Signed-off-by: Mark Salyzyn Tested-by: Jack Wang Cc: stable@kernel.org Signed-off-by: James Bottomley --- drivers/scsi/libsas/sas_expander.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'drivers/scsi') diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c index c9e3dc0..16ad97d 100644 --- a/drivers/scsi/libsas/sas_expander.c +++ b/drivers/scsi/libsas/sas_expander.c @@ -1769,10 +1769,12 @@ static void sas_unregister_devs_sas_addr(struct domain_device *parent, sas_disable_routing(parent, phy->attached_sas_addr); } memset(phy->attached_sas_addr, 0, SAS_ADDR_SIZE); - sas_port_delete_phy(phy->port, phy->phy); - if (phy->port->num_phys == 0) - sas_port_delete(phy->port); - phy->port = NULL; + if (phy->port) { + sas_port_delete_phy(phy->port, phy->phy); + if (phy->port->num_phys == 0) + sas_port_delete(phy->port); + phy->port = NULL; + } } static int sas_discover_bfs_by_root_level(struct domain_device *root, -- cgit v1.1