summaryrefslogtreecommitdiffstats
path: root/drivers/scsi/lpfc/lpfc_init.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/scsi/lpfc/lpfc_init.c')
-rw-r--r--drivers/scsi/lpfc/lpfc_init.c607
1 files changed, 508 insertions, 99 deletions
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 90b8b05..cb465b2 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -33,6 +33,7 @@
#include <linux/slab.h>
#include <linux/firmware.h>
#include <linux/miscdevice.h>
+#include <linux/percpu.h>
#include <scsi/scsi.h>
#include <scsi/scsi_device.h>
@@ -58,6 +59,9 @@ char *_dump_buf_dif;
unsigned long _dump_buf_dif_order;
spinlock_t _dump_buf_lock;
+/* Used when mapping IRQ vectors in a driver centric manner */
+uint16_t lpfc_used_cpu[LPFC_MAX_CPU];
+
static void lpfc_get_hba_model_desc(struct lpfc_hba *, uint8_t *, uint8_t *);
static int lpfc_post_rcv_buf(struct lpfc_hba *);
static int lpfc_sli4_queue_verify(struct lpfc_hba *);
@@ -541,13 +545,16 @@ lpfc_config_port_post(struct lpfc_hba *phba)
/* Set up ring-0 (ELS) timer */
timeout = phba->fc_ratov * 2;
- mod_timer(&vport->els_tmofunc, jiffies + HZ * timeout);
+ mod_timer(&vport->els_tmofunc,
+ jiffies + msecs_to_jiffies(1000 * timeout));
/* Set up heart beat (HB) timer */
- mod_timer(&phba->hb_tmofunc, jiffies + HZ * LPFC_HB_MBOX_INTERVAL);
+ mod_timer(&phba->hb_tmofunc,
+ jiffies + msecs_to_jiffies(1000 * LPFC_HB_MBOX_INTERVAL));
phba->hb_outstanding = 0;
phba->last_completion_time = jiffies;
/* Set up error attention (ERATT) polling timer */
- mod_timer(&phba->eratt_poll, jiffies + HZ * LPFC_ERATT_POLL_INTERVAL);
+ mod_timer(&phba->eratt_poll,
+ jiffies + msecs_to_jiffies(1000 * LPFC_ERATT_POLL_INTERVAL));
if (phba->hba_flag & LINK_DISABLED) {
lpfc_printf_log(phba,
@@ -908,9 +915,9 @@ lpfc_hba_down_post_s4(struct lpfc_hba *phba)
psb->pCmd = NULL;
psb->status = IOSTAT_SUCCESS;
}
- spin_lock_irqsave(&phba->scsi_buf_list_lock, iflag);
- list_splice(&aborts, &phba->lpfc_scsi_buf_list);
- spin_unlock_irqrestore(&phba->scsi_buf_list_lock, iflag);
+ spin_lock_irqsave(&phba->scsi_buf_list_put_lock, iflag);
+ list_splice(&aborts, &phba->lpfc_scsi_buf_list_put);
+ spin_unlock_irqrestore(&phba->scsi_buf_list_put_lock, iflag);
return 0;
}
@@ -1021,7 +1028,8 @@ lpfc_hb_mbox_cmpl(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmboxq)
!(phba->link_state == LPFC_HBA_ERROR) &&
!(phba->pport->load_flag & FC_UNLOADING))
mod_timer(&phba->hb_tmofunc,
- jiffies + HZ * LPFC_HB_MBOX_INTERVAL);
+ jiffies +
+ msecs_to_jiffies(1000 * LPFC_HB_MBOX_INTERVAL));
return;
}
@@ -1064,15 +1072,18 @@ lpfc_hb_timeout_handler(struct lpfc_hba *phba)
spin_lock_irq(&phba->pport->work_port_lock);
- if (time_after(phba->last_completion_time + LPFC_HB_MBOX_INTERVAL * HZ,
- jiffies)) {
+ if (time_after(phba->last_completion_time +
+ msecs_to_jiffies(1000 * LPFC_HB_MBOX_INTERVAL),
+ jiffies)) {
spin_unlock_irq(&phba->pport->work_port_lock);
if (!phba->hb_outstanding)
mod_timer(&phba->hb_tmofunc,
- jiffies + HZ * LPFC_HB_MBOX_INTERVAL);
+ jiffies +
+ msecs_to_jiffies(1000 * LPFC_HB_MBOX_INTERVAL));
else
mod_timer(&phba->hb_tmofunc,
- jiffies + HZ * LPFC_HB_MBOX_TIMEOUT);
+ jiffies +
+ msecs_to_jiffies(1000 * LPFC_HB_MBOX_TIMEOUT));
return;
}
spin_unlock_irq(&phba->pport->work_port_lock);
@@ -1104,7 +1115,8 @@ lpfc_hb_timeout_handler(struct lpfc_hba *phba)
if (!pmboxq) {
mod_timer(&phba->hb_tmofunc,
jiffies +
- HZ * LPFC_HB_MBOX_INTERVAL);
+ msecs_to_jiffies(1000 *
+ LPFC_HB_MBOX_INTERVAL));
return;
}
@@ -1120,7 +1132,8 @@ lpfc_hb_timeout_handler(struct lpfc_hba *phba)
phba->mbox_mem_pool);
mod_timer(&phba->hb_tmofunc,
jiffies +
- HZ * LPFC_HB_MBOX_INTERVAL);
+ msecs_to_jiffies(1000 *
+ LPFC_HB_MBOX_INTERVAL));
return;
}
phba->skipped_hb = 0;
@@ -1136,7 +1149,8 @@ lpfc_hb_timeout_handler(struct lpfc_hba *phba)
phba->skipped_hb = jiffies;
mod_timer(&phba->hb_tmofunc,
- jiffies + HZ * LPFC_HB_MBOX_TIMEOUT);
+ jiffies +
+ msecs_to_jiffies(1000 * LPFC_HB_MBOX_TIMEOUT));
return;
} else {
/*
@@ -1150,7 +1164,8 @@ lpfc_hb_timeout_handler(struct lpfc_hba *phba)
jiffies_to_msecs(jiffies
- phba->last_completion_time));
mod_timer(&phba->hb_tmofunc,
- jiffies + HZ * LPFC_HB_MBOX_TIMEOUT);
+ jiffies +
+ msecs_to_jiffies(1000 * LPFC_HB_MBOX_TIMEOUT));
}
}
}
@@ -1191,7 +1206,7 @@ lpfc_offline_eratt(struct lpfc_hba *phba)
* This routine is called to bring a SLI4 HBA offline when HBA hardware error
* other than Port Error 6 has been detected.
**/
-static void
+void
lpfc_sli4_offline_eratt(struct lpfc_hba *phba)
{
lpfc_offline_prep(phba, LPFC_MBX_NO_WAIT);
@@ -2633,6 +2648,7 @@ lpfc_online(struct lpfc_hba *phba)
struct lpfc_vport *vport;
struct lpfc_vport **vports;
int i;
+ bool vpis_cleared = false;
if (!phba)
return 0;
@@ -2656,6 +2672,10 @@ lpfc_online(struct lpfc_hba *phba)
lpfc_unblock_mgmt_io(phba);
return 1;
}
+ spin_lock_irq(&phba->hbalock);
+ if (!phba->sli4_hba.max_cfg_param.vpi_used)
+ vpis_cleared = true;
+ spin_unlock_irq(&phba->hbalock);
} else {
if (lpfc_sli_hba_setup(phba)) { /* Initialize SLI2/SLI3 HBA */
lpfc_unblock_mgmt_io(phba);
@@ -2672,8 +2692,13 @@ lpfc_online(struct lpfc_hba *phba)
vports[i]->fc_flag &= ~FC_OFFLINE_MODE;
if (phba->sli3_options & LPFC_SLI3_NPIV_ENABLED)
vports[i]->fc_flag |= FC_VPORT_NEEDS_REG_VPI;
- if (phba->sli_rev == LPFC_SLI_REV4)
+ if (phba->sli_rev == LPFC_SLI_REV4) {
vports[i]->fc_flag |= FC_VPORT_NEEDS_INIT_VPI;
+ if ((vpis_cleared) &&
+ (vports[i]->port_type !=
+ LPFC_PHYSICAL_PORT))
+ vports[i]->vpi = 0;
+ }
spin_unlock_irq(shost->host_lock);
}
lpfc_destroy_vport_work_array(phba, vports);
@@ -2833,16 +2858,30 @@ lpfc_scsi_free(struct lpfc_hba *phba)
struct lpfc_iocbq *io, *io_next;
spin_lock_irq(&phba->hbalock);
+
/* Release all the lpfc_scsi_bufs maintained by this host. */
- spin_lock(&phba->scsi_buf_list_lock);
- list_for_each_entry_safe(sb, sb_next, &phba->lpfc_scsi_buf_list, list) {
+
+ spin_lock(&phba->scsi_buf_list_put_lock);
+ list_for_each_entry_safe(sb, sb_next, &phba->lpfc_scsi_buf_list_put,
+ list) {
list_del(&sb->list);
pci_pool_free(phba->lpfc_scsi_dma_buf_pool, sb->data,
sb->dma_handle);
kfree(sb);
phba->total_scsi_bufs--;
}
- spin_unlock(&phba->scsi_buf_list_lock);
+ spin_unlock(&phba->scsi_buf_list_put_lock);
+
+ spin_lock(&phba->scsi_buf_list_get_lock);
+ list_for_each_entry_safe(sb, sb_next, &phba->lpfc_scsi_buf_list_get,
+ list) {
+ list_del(&sb->list);
+ pci_pool_free(phba->lpfc_scsi_dma_buf_pool, sb->data,
+ sb->dma_handle);
+ kfree(sb);
+ phba->total_scsi_bufs--;
+ }
+ spin_unlock(&phba->scsi_buf_list_get_lock);
/* Release all the lpfc_iocbq entries maintained by this host. */
list_for_each_entry_safe(io, io_next, &phba->lpfc_iocb_list, list) {
@@ -2978,9 +3017,12 @@ lpfc_sli4_xri_sgl_update(struct lpfc_hba *phba)
phba->sli4_hba.scsi_xri_cnt,
phba->sli4_hba.scsi_xri_max);
- spin_lock_irq(&phba->scsi_buf_list_lock);
- list_splice_init(&phba->lpfc_scsi_buf_list, &scsi_sgl_list);
- spin_unlock_irq(&phba->scsi_buf_list_lock);
+ spin_lock_irq(&phba->scsi_buf_list_get_lock);
+ spin_lock_irq(&phba->scsi_buf_list_put_lock);
+ list_splice_init(&phba->lpfc_scsi_buf_list_get, &scsi_sgl_list);
+ list_splice(&phba->lpfc_scsi_buf_list_put, &scsi_sgl_list);
+ spin_unlock_irq(&phba->scsi_buf_list_put_lock);
+ spin_unlock_irq(&phba->scsi_buf_list_get_lock);
if (phba->sli4_hba.scsi_xri_cnt > phba->sli4_hba.scsi_xri_max) {
/* max scsi xri shrinked below the allocated scsi buffers */
@@ -2994,9 +3036,9 @@ lpfc_sli4_xri_sgl_update(struct lpfc_hba *phba)
psb->dma_handle);
kfree(psb);
}
- spin_lock_irq(&phba->scsi_buf_list_lock);
+ spin_lock_irq(&phba->scsi_buf_list_get_lock);
phba->sli4_hba.scsi_xri_cnt -= scsi_xri_cnt;
- spin_unlock_irq(&phba->scsi_buf_list_lock);
+ spin_unlock_irq(&phba->scsi_buf_list_get_lock);
}
/* update xris associated to remaining allocated scsi buffers */
@@ -3014,9 +3056,12 @@ lpfc_sli4_xri_sgl_update(struct lpfc_hba *phba)
psb->cur_iocbq.sli4_lxritag = lxri;
psb->cur_iocbq.sli4_xritag = phba->sli4_hba.xri_ids[lxri];
}
- spin_lock_irq(&phba->scsi_buf_list_lock);
- list_splice_init(&scsi_sgl_list, &phba->lpfc_scsi_buf_list);
- spin_unlock_irq(&phba->scsi_buf_list_lock);
+ spin_lock_irq(&phba->scsi_buf_list_get_lock);
+ spin_lock_irq(&phba->scsi_buf_list_put_lock);
+ list_splice_init(&scsi_sgl_list, &phba->lpfc_scsi_buf_list_get);
+ INIT_LIST_HEAD(&phba->lpfc_scsi_buf_list_put);
+ spin_unlock_irq(&phba->scsi_buf_list_put_lock);
+ spin_unlock_irq(&phba->scsi_buf_list_get_lock);
return 0;
@@ -3197,14 +3242,15 @@ int lpfc_scan_finished(struct Scsi_Host *shost, unsigned long time)
stat = 1;
goto finished;
}
- if (time >= 30 * HZ) {
+ if (time >= msecs_to_jiffies(30 * 1000)) {
lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
"0461 Scanning longer than 30 "
"seconds. Continuing initialization\n");
stat = 1;
goto finished;
}
- if (time >= 15 * HZ && phba->link_state <= LPFC_LINK_DOWN) {
+ if (time >= msecs_to_jiffies(15 * 1000) &&
+ phba->link_state <= LPFC_LINK_DOWN) {
lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
"0465 Link down longer than 15 "
"seconds. Continuing initialization\n");
@@ -3216,7 +3262,7 @@ int lpfc_scan_finished(struct Scsi_Host *shost, unsigned long time)
goto finished;
if (vport->num_disc_nodes || vport->fc_prli_sent)
goto finished;
- if (vport->fc_map_cnt == 0 && time < 2 * HZ)
+ if (vport->fc_map_cnt == 0 && time < msecs_to_jiffies(2 * 1000))
goto finished;
if ((phba->sli.sli_flag & LPFC_SLI_MBOX_ACTIVE) != 0)
goto finished;
@@ -4215,7 +4261,8 @@ lpfc_sli4_async_fip_evt(struct lpfc_hba *phba,
* If there are other active VLinks present,
* re-instantiate the Vlink using FDISC.
*/
- mod_timer(&ndlp->nlp_delayfunc, jiffies + HZ);
+ mod_timer(&ndlp->nlp_delayfunc,
+ jiffies + msecs_to_jiffies(1000));
shost = lpfc_shost_from_vport(vport);
spin_lock_irq(shost->host_lock);
ndlp->nlp_flag |= NLP_DELAY_TMO;
@@ -4707,23 +4754,52 @@ lpfc_sli_driver_resource_setup(struct lpfc_hba *phba)
return -ENOMEM;
/*
- * Since the sg_tablesize is module parameter, the sg_dma_buf_size
+ * Since lpfc_sg_seg_cnt is module parameter, the sg_dma_buf_size
* used to create the sg_dma_buf_pool must be dynamically calculated.
- * 2 segments are added since the IOCB needs a command and response bde.
*/
- phba->cfg_sg_dma_buf_size = sizeof(struct fcp_cmnd) +
- sizeof(struct fcp_rsp) +
- ((phba->cfg_sg_seg_cnt + 2) * sizeof(struct ulp_bde64));
+ /* Initialize the host templates the configured values. */
+ lpfc_vport_template.sg_tablesize = phba->cfg_sg_seg_cnt;
+ lpfc_template.sg_tablesize = phba->cfg_sg_seg_cnt;
+
+ /* There are going to be 2 reserved BDEs: 1 FCP cmnd + 1 FCP rsp */
if (phba->cfg_enable_bg) {
- phba->cfg_sg_seg_cnt = LPFC_MAX_SG_SEG_CNT;
- phba->cfg_sg_dma_buf_size +=
- phba->cfg_prot_sg_seg_cnt * sizeof(struct ulp_bde64);
+ /*
+ * The scsi_buf for a T10-DIF I/O will hold the FCP cmnd,
+ * the FCP rsp, and a BDE for each. Sice we have no control
+ * over how many protection data segments the SCSI Layer
+ * will hand us (ie: there could be one for every block
+ * in the IO), we just allocate enough BDEs to accomidate
+ * our max amount and we need to limit lpfc_sg_seg_cnt to
+ * minimize the risk of running out.
+ */
+ phba->cfg_sg_dma_buf_size = sizeof(struct fcp_cmnd) +
+ sizeof(struct fcp_rsp) +
+ (LPFC_MAX_SG_SEG_CNT * sizeof(struct ulp_bde64));
+
+ if (phba->cfg_sg_seg_cnt > LPFC_MAX_SG_SEG_CNT_DIF)
+ phba->cfg_sg_seg_cnt = LPFC_MAX_SG_SEG_CNT_DIF;
+
+ /* Total BDEs in BPL for scsi_sg_list and scsi_sg_prot_list */
+ phba->cfg_total_seg_cnt = LPFC_MAX_SG_SEG_CNT;
+ } else {
+ /*
+ * The scsi_buf for a regular I/O will hold the FCP cmnd,
+ * the FCP rsp, a BDE for each, and a BDE for up to
+ * cfg_sg_seg_cnt data segments.
+ */
+ phba->cfg_sg_dma_buf_size = sizeof(struct fcp_cmnd) +
+ sizeof(struct fcp_rsp) +
+ ((phba->cfg_sg_seg_cnt + 2) * sizeof(struct ulp_bde64));
+
+ /* Total BDEs in BPL for scsi_sg_list */
+ phba->cfg_total_seg_cnt = phba->cfg_sg_seg_cnt + 2;
}
- /* Also reinitialize the host templates with new values. */
- lpfc_vport_template.sg_tablesize = phba->cfg_sg_seg_cnt;
- lpfc_template.sg_tablesize = phba->cfg_sg_seg_cnt;
+ lpfc_printf_log(phba, KERN_INFO, LOG_INIT | LOG_FCP,
+ "9088 sg_tablesize:%d dmabuf_size:%d total_bde:%d\n",
+ phba->cfg_sg_seg_cnt, phba->cfg_sg_dma_buf_size,
+ phba->cfg_total_seg_cnt);
phba->max_vpi = LPFC_MAX_VPI;
/* This will be set to correct value after config_port mbox */
@@ -4789,13 +4865,13 @@ lpfc_sli_driver_resource_unset(struct lpfc_hba *phba)
static int
lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
{
+ struct lpfc_vector_map_info *cpup;
struct lpfc_sli *psli;
LPFC_MBOXQ_t *mboxq;
- int rc, i, hbq_count, buf_size, dma_buf_size, max_buf_size;
+ int rc, i, hbq_count, max_buf_size;
uint8_t pn_page[LPFC_MAX_SUPPORTED_PAGES] = {0};
struct lpfc_mqe *mqe;
- int longs, sli_family;
- int sges_per_segment;
+ int longs;
/* Before proceed, wait for POST done and device ready */
rc = lpfc_sli4_post_status_check(phba);
@@ -4863,11 +4939,6 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
phba->fc_map[1] = LPFC_FCOE_FCF_MAP1;
phba->fc_map[2] = LPFC_FCOE_FCF_MAP2;
- /* With BlockGuard we can have multiple SGEs per Data Segemnt */
- sges_per_segment = 1;
- if (phba->cfg_enable_bg)
- sges_per_segment = 2;
-
/*
* For SLI4, instead of using ring 0 (LPFC_FCP_RING) for FCP commands
* we will associate a new ring, for each FCP fastpath EQ/CQ/WQ tuple.
@@ -4878,43 +4949,71 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
sizeof(struct lpfc_sli_ring), GFP_KERNEL);
if (!phba->sli.ring)
return -ENOMEM;
+
/*
- * Since the sg_tablesize is module parameter, the sg_dma_buf_size
+ * It doesn't matter what family our adapter is in, we are
+ * limited to 2 Pages, 512 SGEs, for our SGL.
+ * There are going to be 2 reserved SGEs: 1 FCP cmnd + 1 FCP rsp
+ */
+ max_buf_size = (2 * SLI4_PAGE_SIZE);
+ if (phba->cfg_sg_seg_cnt > LPFC_MAX_SGL_SEG_CNT - 2)
+ phba->cfg_sg_seg_cnt = LPFC_MAX_SGL_SEG_CNT - 2;
+
+ /*
+ * Since lpfc_sg_seg_cnt is module parameter, the sg_dma_buf_size
* used to create the sg_dma_buf_pool must be dynamically calculated.
- * 2 segments are added since the IOCB needs a command and response bde.
- * To insure that the scsi sgl does not cross a 4k page boundary only
- * sgl sizes of must be a power of 2.
*/
- buf_size = (sizeof(struct fcp_cmnd) + sizeof(struct fcp_rsp) +
- (((phba->cfg_sg_seg_cnt * sges_per_segment) + 2) *
- sizeof(struct sli4_sge)));
-
- sli_family = bf_get(lpfc_sli_intf_sli_family, &phba->sli4_hba.sli_intf);
- max_buf_size = LPFC_SLI4_MAX_BUF_SIZE;
- switch (sli_family) {
- case LPFC_SLI_INTF_FAMILY_BE2:
- case LPFC_SLI_INTF_FAMILY_BE3:
- /* There is a single hint for BE - 2 pages per BPL. */
- if (bf_get(lpfc_sli_intf_sli_hint1, &phba->sli4_hba.sli_intf) ==
- LPFC_SLI_INTF_SLI_HINT1_1)
- max_buf_size = LPFC_SLI4_FL1_MAX_BUF_SIZE;
- break;
- case LPFC_SLI_INTF_FAMILY_LNCR_A0:
- case LPFC_SLI_INTF_FAMILY_LNCR_B0:
- default:
- break;
+
+ if (phba->cfg_enable_bg) {
+ /*
+ * The scsi_buf for a T10-DIF I/O will hold the FCP cmnd,
+ * the FCP rsp, and a SGE for each. Sice we have no control
+ * over how many protection data segments the SCSI Layer
+ * will hand us (ie: there could be one for every block
+ * in the IO), we just allocate enough SGEs to accomidate
+ * our max amount and we need to limit lpfc_sg_seg_cnt to
+ * minimize the risk of running out.
+ */
+ phba->cfg_sg_dma_buf_size = sizeof(struct fcp_cmnd) +
+ sizeof(struct fcp_rsp) + max_buf_size;
+
+ /* Total SGEs for scsi_sg_list and scsi_sg_prot_list */
+ phba->cfg_total_seg_cnt = LPFC_MAX_SGL_SEG_CNT;
+
+ if (phba->cfg_sg_seg_cnt > LPFC_MAX_SG_SLI4_SEG_CNT_DIF)
+ phba->cfg_sg_seg_cnt = LPFC_MAX_SG_SLI4_SEG_CNT_DIF;
+ } else {
+ /*
+ * The scsi_buf for a regular I/O will hold the FCP cmnd,
+ * the FCP rsp, a SGE for each, and a SGE for up to
+ * cfg_sg_seg_cnt data segments.
+ */
+ phba->cfg_sg_dma_buf_size = sizeof(struct fcp_cmnd) +
+ sizeof(struct fcp_rsp) +
+ ((phba->cfg_sg_seg_cnt + 2) * sizeof(struct sli4_sge));
+
+ /* Total SGEs for scsi_sg_list */
+ phba->cfg_total_seg_cnt = phba->cfg_sg_seg_cnt + 2;
+ /*
+ * NOTE: if (phba->cfg_sg_seg_cnt + 2) <= 256 we only need
+ * to post 1 page for the SGL.
+ */
}
- for (dma_buf_size = LPFC_SLI4_MIN_BUF_SIZE;
- dma_buf_size < max_buf_size && buf_size > dma_buf_size;
- dma_buf_size = dma_buf_size << 1)
- ;
- if (dma_buf_size == max_buf_size)
- phba->cfg_sg_seg_cnt = (dma_buf_size -
- sizeof(struct fcp_cmnd) - sizeof(struct fcp_rsp) -
- (2 * sizeof(struct sli4_sge))) /
- sizeof(struct sli4_sge);
- phba->cfg_sg_dma_buf_size = dma_buf_size;
+ /* Initialize the host templates with the updated values. */
+ lpfc_vport_template.sg_tablesize = phba->cfg_sg_seg_cnt;
+ lpfc_template.sg_tablesize = phba->cfg_sg_seg_cnt;
+
+ if (phba->cfg_sg_dma_buf_size <= LPFC_MIN_SG_SLI4_BUF_SZ)
+ phba->cfg_sg_dma_buf_size = LPFC_MIN_SG_SLI4_BUF_SZ;
+ else
+ phba->cfg_sg_dma_buf_size =
+ SLI4_PAGE_ALIGN(phba->cfg_sg_dma_buf_size);
+
+ lpfc_printf_log(phba, KERN_INFO, LOG_INIT | LOG_FCP,
+ "9087 sg_tablesize:%d dmabuf_size:%d total_sge:%d\n",
+ phba->cfg_sg_seg_cnt, phba->cfg_sg_dma_buf_size,
+ phba->cfg_total_seg_cnt);
/* Initialize buffer queue management fields */
hbq_count = lpfc_sli_hbq_count();
@@ -5104,6 +5203,26 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
goto out_free_fcp_eq_hdl;
}
+ phba->sli4_hba.cpu_map = kzalloc((sizeof(struct lpfc_vector_map_info) *
+ phba->sli4_hba.num_present_cpu),
+ GFP_KERNEL);
+ if (!phba->sli4_hba.cpu_map) {
+ lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+ "3327 Failed allocate memory for msi-x "
+ "interrupt vector mapping\n");
+ rc = -ENOMEM;
+ goto out_free_msix;
+ }
+ /* Initialize io channels for round robin */
+ cpup = phba->sli4_hba.cpu_map;
+ rc = 0;
+ for (i = 0; i < phba->sli4_hba.num_present_cpu; i++) {
+ cpup->channel_id = rc;
+ rc++;
+ if (rc >= phba->cfg_fcp_io_channel)
+ rc = 0;
+ }
+
/*
* Enable sr-iov virtual functions if supported and configured
* through the module parameter.
@@ -5123,6 +5242,8 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
return 0;
+out_free_msix:
+ kfree(phba->sli4_hba.msix_entries);
out_free_fcp_eq_hdl:
kfree(phba->sli4_hba.fcp_eq_hdl);
out_free_fcf_rr_bmask:
@@ -5152,6 +5273,11 @@ lpfc_sli4_driver_resource_unset(struct lpfc_hba *phba)
{
struct lpfc_fcf_conn_entry *conn_entry, *next_conn_entry;
+ /* Free memory allocated for msi-x interrupt vector to CPU mapping */
+ kfree(phba->sli4_hba.cpu_map);
+ phba->sli4_hba.num_present_cpu = 0;
+ phba->sli4_hba.num_online_cpu = 0;
+
/* Free memory allocated for msi-x interrupt vector entries */
kfree(phba->sli4_hba.msix_entries);
@@ -5260,8 +5386,10 @@ lpfc_setup_driver_resource_phase1(struct lpfc_hba *phba)
init_waitqueue_head(&phba->work_waitq);
/* Initialize the scsi buffer list used by driver for scsi IO */
- spin_lock_init(&phba->scsi_buf_list_lock);
- INIT_LIST_HEAD(&phba->lpfc_scsi_buf_list);
+ spin_lock_init(&phba->scsi_buf_list_get_lock);
+ INIT_LIST_HEAD(&phba->lpfc_scsi_buf_list_get);
+ spin_lock_init(&phba->scsi_buf_list_put_lock);
+ INIT_LIST_HEAD(&phba->lpfc_scsi_buf_list_put);
/* Initialize the fabric iocb list */
INIT_LIST_HEAD(&phba->fabric_iocb_list);
@@ -6696,6 +6824,7 @@ lpfc_sli4_queue_verify(struct lpfc_hba *phba)
int cfg_fcp_io_channel;
uint32_t cpu;
uint32_t i = 0;
+ uint32_t j = 0;
/*
@@ -6706,15 +6835,21 @@ lpfc_sli4_queue_verify(struct lpfc_hba *phba)
/* Sanity check on HBA EQ parameters */
cfg_fcp_io_channel = phba->cfg_fcp_io_channel;
- /* It doesn't make sense to have more io channels then CPUs */
- for_each_online_cpu(cpu) {
- i++;
+ /* It doesn't make sense to have more io channels then online CPUs */
+ for_each_present_cpu(cpu) {
+ if (cpu_online(cpu))
+ i++;
+ j++;
}
+ phba->sli4_hba.num_online_cpu = i;
+ phba->sli4_hba.num_present_cpu = j;
+
if (i < cfg_fcp_io_channel) {
lpfc_printf_log(phba,
KERN_ERR, LOG_INIT,
"3188 Reducing IO channels to match number of "
- "CPUs: from %d to %d\n", cfg_fcp_io_channel, i);
+ "online CPUs: from %d to %d\n",
+ cfg_fcp_io_channel, i);
cfg_fcp_io_channel = i;
}
@@ -7743,8 +7878,13 @@ lpfc_pci_function_reset(struct lpfc_hba *phba)
out:
/* Catch the not-ready port failure after a port reset. */
- if (num_resets >= MAX_IF_TYPE_2_RESETS)
+ if (num_resets >= MAX_IF_TYPE_2_RESETS) {
+ lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+ "3317 HBA not functional: IP Reset Failed "
+ "after (%d) retries, try: "
+ "echo fw_reset > board_mode\n", num_resets);
rc = -ENODEV;
+ }
return rc;
}
@@ -8209,6 +8349,269 @@ lpfc_sli_disable_intr(struct lpfc_hba *phba)
}
/**
+ * lpfc_find_next_cpu - Find next available CPU that matches the phys_id
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * Find next available CPU to use for IRQ to CPU affinity.
+ */
+static int
+lpfc_find_next_cpu(struct lpfc_hba *phba, uint32_t phys_id)
+{
+ struct lpfc_vector_map_info *cpup;
+ int cpu;
+
+ cpup = phba->sli4_hba.cpu_map;
+ for (cpu = 0; cpu < phba->sli4_hba.num_present_cpu; cpu++) {
+ /* CPU must be online */
+ if (cpu_online(cpu)) {
+ if ((cpup->irq == LPFC_VECTOR_MAP_EMPTY) &&
+ (lpfc_used_cpu[cpu] == LPFC_VECTOR_MAP_EMPTY) &&
+ (cpup->phys_id == phys_id)) {
+ return cpu;
+ }
+ }
+ cpup++;
+ }
+
+ /*
+ * If we get here, we have used ALL CPUs for the specific
+ * phys_id. Now we need to clear out lpfc_used_cpu and start
+ * reusing CPUs.
+ */
+
+ for (cpu = 0; cpu < phba->sli4_hba.num_present_cpu; cpu++) {
+ if (lpfc_used_cpu[cpu] == phys_id)
+ lpfc_used_cpu[cpu] = LPFC_VECTOR_MAP_EMPTY;
+ }
+
+ cpup = phba->sli4_hba.cpu_map;
+ for (cpu = 0; cpu < phba->sli4_hba.num_present_cpu; cpu++) {
+ /* CPU must be online */
+ if (cpu_online(cpu)) {
+ if ((cpup->irq == LPFC_VECTOR_MAP_EMPTY) &&
+ (cpup->phys_id == phys_id)) {
+ return cpu;
+ }
+ }
+ cpup++;
+ }
+ return LPFC_VECTOR_MAP_EMPTY;
+}
+
+/**
+ * lpfc_sli4_set_affinity - Set affinity for HBA IRQ vectors
+ * @phba: pointer to lpfc hba data structure.
+ * @vectors: number of HBA vectors
+ *
+ * Affinitize MSIX IRQ vectors to CPUs. Try to equally spread vector
+ * affinization across multple physical CPUs (numa nodes).
+ * In addition, this routine will assign an IO channel for each CPU
+ * to use when issuing I/Os.
+ */
+static int
+lpfc_sli4_set_affinity(struct lpfc_hba *phba, int vectors)
+{
+ int i, idx, saved_chann, used_chann, cpu, phys_id;
+ int max_phys_id, num_io_channel, first_cpu;
+ struct lpfc_vector_map_info *cpup;
+#ifdef CONFIG_X86
+ struct cpuinfo_x86 *cpuinfo;
+#endif
+ struct cpumask *mask;
+ uint8_t chann[LPFC_FCP_IO_CHAN_MAX+1];
+
+ /* If there is no mapping, just return */
+ if (!phba->cfg_fcp_cpu_map)
+ return 1;
+
+ /* Init cpu_map array */
+ memset(phba->sli4_hba.cpu_map, 0xff,
+ (sizeof(struct lpfc_vector_map_info) *
+ phba->sli4_hba.num_present_cpu));
+
+ max_phys_id = 0;
+ phys_id = 0;
+ num_io_channel = 0;
+ first_cpu = LPFC_VECTOR_MAP_EMPTY;
+
+ /* Update CPU map with physical id and core id of each CPU */
+ cpup = phba->sli4_hba.cpu_map;
+ for (cpu = 0; cpu < phba->sli4_hba.num_present_cpu; cpu++) {
+#ifdef CONFIG_X86
+ cpuinfo = &cpu_data(cpu);
+ cpup->phys_id = cpuinfo->phys_proc_id;
+ cpup->core_id = cpuinfo->cpu_core_id;
+#else
+ /* No distinction between CPUs for other platforms */
+ cpup->phys_id = 0;
+ cpup->core_id = 0;
+#endif
+
+ lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+ "3328 CPU physid %d coreid %d\n",
+ cpup->phys_id, cpup->core_id);
+
+ if (cpup->phys_id > max_phys_id)
+ max_phys_id = cpup->phys_id;
+ cpup++;
+ }
+
+ /* Now associate the HBA vectors with specific CPUs */
+ for (idx = 0; idx < vectors; idx++) {
+ cpup = phba->sli4_hba.cpu_map;
+ cpu = lpfc_find_next_cpu(phba, phys_id);
+ if (cpu == LPFC_VECTOR_MAP_EMPTY) {
+
+ /* Try for all phys_id's */
+ for (i = 1; i < max_phys_id; i++) {
+ phys_id++;
+ if (phys_id > max_phys_id)
+ phys_id = 0;
+ cpu = lpfc_find_next_cpu(phba, phys_id);
+ if (cpu == LPFC_VECTOR_MAP_EMPTY)
+ continue;
+ goto found;
+ }
+
+ lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+ "3329 Cannot set affinity:"
+ "Error mapping vector %d (%d)\n",
+ idx, vectors);
+ return 0;
+ }
+found:
+ cpup += cpu;
+ if (phba->cfg_fcp_cpu_map == LPFC_DRIVER_CPU_MAP)
+ lpfc_used_cpu[cpu] = phys_id;
+
+ /* Associate vector with selected CPU */
+ cpup->irq = phba->sli4_hba.msix_entries[idx].vector;
+
+ /* Associate IO channel with selected CPU */
+ cpup->channel_id = idx;
+ num_io_channel++;
+
+ if (first_cpu == LPFC_VECTOR_MAP_EMPTY)
+ first_cpu = cpu;
+
+ /* Now affinitize to the selected CPU */
+ mask = &cpup->maskbits;
+ cpumask_clear(mask);
+ cpumask_set_cpu(cpu, mask);
+ i = irq_set_affinity_hint(phba->sli4_hba.msix_entries[idx].
+ vector, mask);
+
+ lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+ "3330 Set Affinity: CPU %d channel %d "
+ "irq %d (%x)\n",
+ cpu, cpup->channel_id,
+ phba->sli4_hba.msix_entries[idx].vector, i);
+
+ /* Spread vector mapping across multple physical CPU nodes */
+ phys_id++;
+ if (phys_id > max_phys_id)
+ phys_id = 0;
+ }
+
+ /*
+ * Finally fill in the IO channel for any remaining CPUs.
+ * At this point, all IO channels have been assigned to a specific
+ * MSIx vector, mapped to a specific CPU.
+ * Base the remaining IO channel assigned, to IO channels already
+ * assigned to other CPUs on the same phys_id.
+ */
+ for (i = 0; i <= max_phys_id; i++) {
+ /*
+ * If there are no io channels already mapped to
+ * this phys_id, just round robin thru the io_channels.
+ * Setup chann[] for round robin.
+ */
+ for (idx = 0; idx < phba->cfg_fcp_io_channel; idx++)
+ chann[idx] = idx;
+
+ saved_chann = 0;
+ used_chann = 0;
+
+ /*
+ * First build a list of IO channels already assigned
+ * to this phys_id before reassigning the same IO
+ * channels to the remaining CPUs.
+ */
+ cpup = phba->sli4_hba.cpu_map;
+ cpu = first_cpu;
+ cpup += cpu;
+ for (idx = 0; idx < phba->sli4_hba.num_present_cpu;
+ idx++) {
+ if (cpup->phys_id == i) {
+ /*
+ * Save any IO channels that are
+ * already mapped to this phys_id.
+ */
+ if (cpup->irq != LPFC_VECTOR_MAP_EMPTY) {
+ chann[saved_chann] =
+ cpup->channel_id;
+ saved_chann++;
+ goto out;
+ }
+
+ /* See if we are using round-robin */
+ if (saved_chann == 0)
+ saved_chann =
+ phba->cfg_fcp_io_channel;
+
+ /* Associate next IO channel with CPU */
+ cpup->channel_id = chann[used_chann];
+ num_io_channel++;
+ used_chann++;
+ if (used_chann == saved_chann)
+ used_chann = 0;
+
+ lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+ "3331 Set IO_CHANN "
+ "CPU %d channel %d\n",
+ idx, cpup->channel_id);
+ }
+out:
+ cpu++;
+ if (cpu >= phba->sli4_hba.num_present_cpu) {
+ cpup = phba->sli4_hba.cpu_map;
+ cpu = 0;
+ } else {
+ cpup++;
+ }
+ }
+ }
+
+ if (phba->sli4_hba.num_online_cpu != phba->sli4_hba.num_present_cpu) {
+ cpup = phba->sli4_hba.cpu_map;
+ for (idx = 0; idx < phba->sli4_hba.num_present_cpu; idx++) {
+ if (cpup->channel_id == LPFC_VECTOR_MAP_EMPTY) {
+ cpup->channel_id = 0;
+ num_io_channel++;
+
+ lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+ "3332 Assign IO_CHANN "
+ "CPU %d channel %d\n",
+ idx, cpup->channel_id);
+ }
+ cpup++;
+ }
+ }
+
+ /* Sanity check */
+ if (num_io_channel != phba->sli4_hba.num_present_cpu)
+ lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+ "3333 Set affinity mismatch:"
+ "%d chann != %d cpus: %d vactors\n",
+ num_io_channel, phba->sli4_hba.num_present_cpu,
+ vectors);
+
+ phba->cfg_fcp_io_sched = LPFC_FCP_SCHED_BY_CPU;
+ return 1;
+}
+
+
+/**
* lpfc_sli4_enable_msix - Enable MSI-X interrupt mode to SLI-4 device
* @phba: pointer to lpfc hba data structure.
*
@@ -8259,9 +8662,7 @@ enable_msix_vectors:
phba->sli4_hba.msix_entries[index].vector,
phba->sli4_hba.msix_entries[index].entry);
- /*
- * Assign MSI-X vectors to interrupt handlers
- */
+ /* Assign MSI-X vectors to interrupt handlers */
for (index = 0; index < vectors; index++) {
memset(&phba->sli4_hba.handler_name[index], 0, 16);
sprintf((char *)&phba->sli4_hba.handler_name[index],
@@ -8289,6 +8690,8 @@ enable_msix_vectors:
phba->cfg_fcp_io_channel, vectors);
phba->cfg_fcp_io_channel = vectors;
}
+
+ lpfc_sli4_set_affinity(phba, vectors);
return rc;
cfg_fail_out:
@@ -9213,15 +9616,15 @@ lpfc_sli_prep_dev_for_reset(struct lpfc_hba *phba)
/* Block all SCSI devices' I/Os on the host */
lpfc_scsi_dev_block(phba);
+ /* Flush all driver's outstanding SCSI I/Os as we are to reset */
+ lpfc_sli_flush_fcp_rings(phba);
+
/* stop all timers */
lpfc_stop_hba_timers(phba);
/* Disable interrupt and pci device */
lpfc_sli_disable_intr(phba);
pci_disable_device(phba->pcidev);
-
- /* Flush all driver's outstanding SCSI I/Os as we are to reset */
- lpfc_sli_flush_fcp_rings(phba);
}
/**
@@ -9966,6 +10369,9 @@ lpfc_sli4_prep_dev_for_reset(struct lpfc_hba *phba)
/* Block all SCSI devices' I/Os on the host */
lpfc_scsi_dev_block(phba);
+ /* Flush all driver's outstanding SCSI I/Os as we are to reset */
+ lpfc_sli_flush_fcp_rings(phba);
+
/* stop all timers */
lpfc_stop_hba_timers(phba);
@@ -9973,9 +10379,6 @@ lpfc_sli4_prep_dev_for_reset(struct lpfc_hba *phba)
lpfc_sli4_disable_intr(phba);
lpfc_sli4_queue_destroy(phba);
pci_disable_device(phba->pcidev);
-
- /* Flush all driver's outstanding SCSI I/Os as we are to reset */
- lpfc_sli_flush_fcp_rings(phba);
}
/**
@@ -10535,6 +10938,7 @@ static struct miscdevice lpfc_mgmt_dev = {
static int __init
lpfc_init(void)
{
+ int cpu;
int error = 0;
printk(LPFC_MODULE_DESC "\n");
@@ -10561,6 +10965,11 @@ lpfc_init(void)
return -ENOMEM;
}
}
+
+ /* Initialize in case vector mapping is needed */
+ for (cpu = 0; cpu < LPFC_MAX_CPU; cpu++)
+ lpfc_used_cpu[cpu] = LPFC_VECTOR_MAP_EMPTY;
+
error = pci_register_driver(&lpfc_driver);
if (error) {
fc_release_transport(lpfc_transport_template);
OpenPOWER on IntegriCloud