diff options
author | gjb <gjb@FreeBSD.org> | 2016-04-04 23:55:32 +0000 |
---|---|---|
committer | gjb <gjb@FreeBSD.org> | 2016-04-04 23:55:32 +0000 |
commit | 1dc4c40e3b35564cb2e787ad968e6b4a9fb7eb0f (patch) | |
tree | a027fe5a27446f32854d6a07b34b5f2a992bf283 /sys/dev/cxgbe | |
parent | 3669a0dced7e344be71d234ffc3a71530ef0ae08 (diff) | |
parent | 589cedfe0cde2b49d5f47fc240de37c8bf307abd (diff) | |
download | FreeBSD-src-1dc4c40e3b35564cb2e787ad968e6b4a9fb7eb0f.zip FreeBSD-src-1dc4c40e3b35564cb2e787ad968e6b4a9fb7eb0f.tar.gz |
MFH
Sponsored by: The FreeBSD Foundation
Diffstat (limited to 'sys/dev/cxgbe')
-rw-r--r-- | sys/dev/cxgbe/adapter.h | 22 | ||||
-rw-r--r-- | sys/dev/cxgbe/common/t4_hw.c | 15 | ||||
-rw-r--r-- | sys/dev/cxgbe/firmware/t4fw_cfg.txt | 157 | ||||
-rw-r--r-- | sys/dev/cxgbe/firmware/t5fw_cfg.txt | 197 | ||||
-rw-r--r-- | sys/dev/cxgbe/iw_cxgbe/cm.c | 271 | ||||
-rw-r--r-- | sys/dev/cxgbe/iw_cxgbe/cq.c | 6 | ||||
-rw-r--r-- | sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h | 14 | ||||
-rw-r--r-- | sys/dev/cxgbe/iw_cxgbe/mem.c | 34 | ||||
-rw-r--r-- | sys/dev/cxgbe/iw_cxgbe/qp.c | 16 | ||||
-rw-r--r-- | sys/dev/cxgbe/iw_cxgbe/t4.h | 3 | ||||
-rw-r--r-- | sys/dev/cxgbe/iw_cxgbe/user.h | 1 | ||||
-rw-r--r-- | sys/dev/cxgbe/offload.h | 2 | ||||
-rw-r--r-- | sys/dev/cxgbe/t4_main.c | 46 | ||||
-rw-r--r-- | sys/dev/cxgbe/t4_sge.c | 9 |
14 files changed, 554 insertions, 239 deletions
diff --git a/sys/dev/cxgbe/adapter.h b/sys/dev/cxgbe/adapter.h index ee14712..2193142 100644 --- a/sys/dev/cxgbe/adapter.h +++ b/sys/dev/cxgbe/adapter.h @@ -246,12 +246,10 @@ struct vi_info { int rsrv_noflowq; /* Reserve queue 0 for non-flowid packets */ int nrxq; /* # of rx queues */ int first_rxq; /* index of first rx queue */ -#ifdef TCP_OFFLOAD int nofldtxq; /* # of offload tx queues */ int first_ofld_txq; /* index of first offload tx queue */ int nofldrxq; /* # of offload rx queues */ int first_ofld_rxq; /* index of first offload rx queue */ -#endif int tmr_idx; int pktc_idx; int qsize_rxq; @@ -311,9 +309,7 @@ struct cluster_layout { struct cluster_metadata { u_int refcount; -#ifdef INVARIANTS struct fl_sdesc *sd; /* For debug only. Could easily be stale */ -#endif }; struct fl_sdesc { @@ -571,7 +567,6 @@ iq_to_rxq(struct sge_iq *iq) } -#ifdef TCP_OFFLOAD /* ofld_rxq: SGE ingress queue + SGE free list + miscellaneous items */ struct sge_ofld_rxq { struct sge_iq iq; /* MUST be first */ @@ -584,7 +579,6 @@ iq_to_ofld_rxq(struct sge_iq *iq) return (__containerof(iq, struct sge_ofld_rxq, iq)); } -#endif struct wrqe { STAILQ_ENTRY(wrqe) link; @@ -636,7 +630,6 @@ struct sge_wrq { } __aligned(CACHE_LINE_SIZE); -#ifdef DEV_NETMAP struct sge_nm_rxq { struct vi_info *vi; @@ -691,19 +684,14 @@ struct sge_nm_txq { bus_addr_t ba; int iqidx; } __aligned(CACHE_LINE_SIZE); -#endif struct sge { int nrxq; /* total # of Ethernet rx queues */ int ntxq; /* total # of Ethernet tx tx queues */ -#ifdef TCP_OFFLOAD int nofldrxq; /* total # of TOE rx queues */ int nofldtxq; /* total # of TOE tx queues */ -#endif -#ifdef DEV_NETMAP int nnmrxq; /* total # of netmap rx queues */ int nnmtxq; /* total # of netmap tx queues */ -#endif int niq; /* total # of ingress queues */ int neq; /* total # of egress queues */ @@ -712,14 +700,10 @@ struct sge { struct sge_wrq *ctrlq; /* Control queues */ struct sge_txq *txq; /* NIC tx queues */ struct sge_rxq *rxq; /* NIC rx queues */ -#ifdef TCP_OFFLOAD struct sge_wrq *ofld_txq; /* TOE tx queues */ struct sge_ofld_rxq *ofld_rxq; /* TOE rx queues */ -#endif -#ifdef DEV_NETMAP struct sge_nm_txq *nm_txq; /* netmap tx queues */ struct sge_nm_rxq *nm_rxq; /* netmap rx queues */ -#endif uint16_t iq_start; int eq_start; @@ -778,20 +762,16 @@ struct adapter { struct port_info *port[MAX_NPORTS]; uint8_t chan_map[MAX_NCHAN]; -#ifdef TCP_OFFLOAD void *tom_softc; /* (struct tom_data *) */ struct tom_tunables tt; void *iwarp_softc; /* (struct c4iw_dev *) */ void *iscsi_ulp_softc; /* (struct cxgbei_data *) */ -#endif struct l2t_data *l2t; /* L2 table */ struct tid_info tids; uint16_t doorbells; -#ifdef TCP_OFFLOAD int offload_map; /* ports with IFCAP_TOE enabled */ int active_ulds; /* ULDs activated on this adapter */ -#endif int flags; int debug_flags; @@ -840,11 +820,9 @@ struct adapter { fw_msg_handler_t fw_msg_handler[7]; /* NUM_FW6_TYPES */ cpl_handler_t cpl_handler[0xef]; /* NUM_CPL_CMDS */ -#ifdef INVARIANTS const char *last_op; const void *last_op_thr; int last_op_flags; -#endif int sc_do_rxcopy; }; diff --git a/sys/dev/cxgbe/common/t4_hw.c b/sys/dev/cxgbe/common/t4_hw.c index 92104c8..588f6fd 100644 --- a/sys/dev/cxgbe/common/t4_hw.c +++ b/sys/dev/cxgbe/common/t4_hw.c @@ -5615,6 +5615,7 @@ void t4_get_port_stats_offset(struct adapter *adap, int idx, void t4_get_port_stats(struct adapter *adap, int idx, struct port_stats *p) { u32 bgmap = t4_get_mps_bg_map(adap, idx); + u32 stat_ctl; #define GET_STAT(name) \ t4_read_reg64(adap, \ @@ -5622,6 +5623,8 @@ void t4_get_port_stats(struct adapter *adap, int idx, struct port_stats *p) T5_PORT_REG(idx, A_MPS_PORT_STAT_##name##_L))) #define GET_STAT_COM(name) t4_read_reg64(adap, A_MPS_STAT_##name##_L) + stat_ctl = t4_read_reg(adap, A_MPS_STAT_CTL); + p->tx_pause = GET_STAT(TX_PORT_PAUSE); p->tx_octets = GET_STAT(TX_PORT_BYTES); p->tx_frames = GET_STAT(TX_PORT_FRAMES); @@ -5646,6 +5649,12 @@ void t4_get_port_stats(struct adapter *adap, int idx, struct port_stats *p) p->tx_ppp6 = GET_STAT(TX_PORT_PPP6); p->tx_ppp7 = GET_STAT(TX_PORT_PPP7); + if (stat_ctl & F_COUNTPAUSESTATTX) { + p->tx_frames -= p->tx_pause; + p->tx_octets -= p->tx_pause * 64; + p->tx_mcast_frames -= p->tx_pause; + } + p->rx_pause = GET_STAT(RX_PORT_PAUSE); p->rx_octets = GET_STAT(RX_PORT_BYTES); p->rx_frames = GET_STAT(RX_PORT_FRAMES); @@ -5674,6 +5683,12 @@ void t4_get_port_stats(struct adapter *adap, int idx, struct port_stats *p) p->rx_ppp6 = GET_STAT(RX_PORT_PPP6); p->rx_ppp7 = GET_STAT(RX_PORT_PPP7); + if (stat_ctl & F_COUNTPAUSESTATRX) { + p->rx_frames -= p->rx_pause; + p->rx_octets -= p->rx_pause * 64; + p->rx_mcast_frames -= p->rx_pause; + } + p->rx_ovflow0 = (bgmap & 1) ? GET_STAT_COM(RX_BG_0_MAC_DROP_FRAME) : 0; p->rx_ovflow1 = (bgmap & 2) ? GET_STAT_COM(RX_BG_1_MAC_DROP_FRAME) : 0; p->rx_ovflow2 = (bgmap & 4) ? GET_STAT_COM(RX_BG_2_MAC_DROP_FRAME) : 0; diff --git a/sys/dev/cxgbe/firmware/t4fw_cfg.txt b/sys/dev/cxgbe/firmware/t4fw_cfg.txt index 0e13122..43820a0 100644 --- a/sys/dev/cxgbe/firmware/t4fw_cfg.txt +++ b/sys/dev/cxgbe/firmware/t4fw_cfg.txt @@ -10,7 +10,7 @@ [global] rss_glb_config_mode = basicvirtual - rss_glb_config_options = tnlmapen, hashtoeplitz, tnlalllkp + rss_glb_config_options = tnlmapen,hashtoeplitz,tnlalllkp sge_timer_value = 1, 5, 10, 50, 100, 200 # usecs @@ -20,61 +20,82 @@ # disable TP_PARA_REG3.RxFragEn reg[0x7d6c] = 0x00000000/0x00007000 - # TP_SHIFT_CNT - reg[0x7dc0] = 0x62f8849 + reg[0x7dc0] = 0x0e2f8849 # TP_SHIFT_CNT filterMode = fragmentation, mpshittype, protocol, vlan, port, fcoe filterMask = protocol, fcoe - # TP rx and tx channels (0 = auto). + tp_pmrx = 36, 512 + tp_pmrx_pagesize = 64K + + # TP number of RX channels (0 = auto) tp_nrxch = 0 - tp_ntxch = 0 - # TP rx and tx payload memory (% of the total EDRAM + DDR3). - tp_pmrx = 38, 512 - tp_pmtx = 60, 512 - tp_pmrx_pagesize = 64K + tp_pmtx = 46, 512 tp_pmtx_pagesize = 64K - # cluster, lan, or wan. - tp_tcptuning = lan + # TP number of TX channels (0 = auto) + tp_ntxch = 0 # TP OFLD MTUs tp_mtus = 88, 256, 512, 576, 808, 1024, 1280, 1488, 1500, 2002, 2048, 4096, 4352, 8192, 9000, 9600 + # cluster, lan, or wan. + tp_tcptuning = lan + # PFs 0-3. These get 8 MSI/8 MSI-X vectors each. VFs are supported by -# these 4 PFs only. Not used here at all. +# these 4 PFs only. [function "0"] - nvf = 16 - nvi = 1 - rssnvi = 0 -[function "0/*"] - nvi = 1 - rssnvi = 0 + nvf = 4 + wx_caps = all + r_caps = all + nvi = 2 + rssnvi = 2 + niqflint = 4 + nethctrl = 4 + neq = 8 + nexactf = 4 + cmask = all + pmask = 0x1 [function "1"] - nvf = 16 - nvi = 1 - rssnvi = 0 -[function "1/*"] - nvi = 1 - rssnvi = 0 + nvf = 4 + wx_caps = all + r_caps = all + nvi = 2 + rssnvi = 2 + niqflint = 4 + nethctrl = 4 + neq = 8 + nexactf = 4 + cmask = all + pmask = 0x2 [function "2"] - nvf = 16 - nvi = 1 - rssnvi = 0 -[function "2/*"] - nvi = 1 - rssnvi = 0 + nvf = 4 + wx_caps = all + r_caps = all + nvi = 2 + rssnvi = 2 + niqflint = 4 + nethctrl = 4 + neq = 8 + nexactf = 4 + cmask = all + pmask = 0x4 [function "3"] - nvf = 16 - nvi = 1 - rssnvi = 0 -[function "3/*"] - nvi = 1 - rssnvi = 0 + nvf = 4 + wx_caps = all + r_caps = all + nvi = 2 + rssnvi = 2 + niqflint = 4 + nethctrl = 4 + neq = 8 + nexactf = 4 + cmask = all + pmask = 0x8 # PF4 is the resource-rich PF that the bus/nexus driver attaches to. # It gets 32 MSI/128 MSI-X vectors. @@ -86,15 +107,19 @@ niqflint = 512 nethctrl = 1024 neq = 2048 - nexactf = 328 + nexactf = 280 cmask = all pmask = all # driver will mask off features it won't use - protocol = ofld + protocol = ofld, rddp, rdmac, iscsi_initiator_pdu, iscsi_target_pdu tp_l2t = 4096 tp_ddp = 2 + tp_ddp_iscsi = 2 + tp_stag = 2 + tp_pbl = 5 + tp_rq = 7 # TCAM has 8K cells; each region must start at a multiple of 128 cell. # Each entry in these categories takes 4 cells each. nhash will use the @@ -130,6 +155,60 @@ nexactf = 8 nfilter = 16 +# For Virtual functions, we only allow NIC functionality and we only allow +# access to one port (1 << PF). Note that because of limitations in the +# Scatter Gather Engine (SGE) hardware which checks writes to VF KDOORBELL +# and GTS registers, the number of Ingress and Egress Queues must be a power +# of 2. +# +[function "0/*"] + wx_caps = 0x82 + r_caps = 0x86 + nvi = 1 + rssnvi = 1 + niqflint = 2 + nethctrl = 2 + neq = 4 + nexactf = 2 + cmask = all + pmask = 0x1 + +[function "1/*"] + wx_caps = 0x82 + r_caps = 0x86 + nvi = 1 + rssnvi = 1 + niqflint = 2 + nethctrl = 2 + neq = 4 + nexactf = 2 + cmask = all + pmask = 0x2 + +[function "2/*"] + wx_caps = 0x82 + r_caps = 0x86 + nvi = 1 + rssnvi = 1 + niqflint = 2 + nethctrl = 2 + neq = 4 + nexactf = 2 + cmask = all + pmask = 0x4 + +[function "3/*"] + wx_caps = 0x82 + r_caps = 0x86 + nvi = 1 + rssnvi = 1 + niqflint = 2 + nethctrl = 2 + neq = 4 + nexactf = 2 + cmask = all + pmask = 0x8 + # MPS has 192K buffer space for ingress packets from the wire as well as # loopback path of the L2 switch. [port "0"] @@ -166,7 +245,7 @@ [fini] version = 0x1 - checksum = 0x98210e18 + checksum = 0xbec0621 # # $FreeBSD$ # diff --git a/sys/dev/cxgbe/firmware/t5fw_cfg.txt b/sys/dev/cxgbe/firmware/t5fw_cfg.txt index 4ae6c99..9e16da5 100644 --- a/sys/dev/cxgbe/firmware/t5fw_cfg.txt +++ b/sys/dev/cxgbe/firmware/t5fw_cfg.txt @@ -10,12 +10,33 @@ [global] rss_glb_config_mode = basicvirtual - rss_glb_config_options = tnlmapen, hashtoeplitz, tnlalllkp + rss_glb_config_options = tnlmapen,hashtoeplitz,tnlalllkp # PL_TIMEOUT register - pl_timeout_value = 200 # the timeout value in units of us + pl_timeout_value = 10000 # the timeout value in units of us - sge_timer_value = 1, 5, 10, 50, 100, 200 # usecs + # SGE_THROTTLE_CONTROL + bar2throttlecount = 500 # bar2throttlecount in us + + sge_timer_value = 1, 5, 10, 50, 100, 200 # SGE_TIMER_VALUE* in usecs + + reg[0x1124] = 0x00000400/0x00000400 # SGE_CONTROL2, enable VFIFO; if + # SGE_VFIFO_SIZE is not set, then + # firmware will set it up in function + # of number of egress queues used + + reg[0x1130] = 0x00d5ffeb # SGE_DBP_FETCH_THRESHOLD, fetch + # threshold set to queue depth + # minus 128-entries for FL and HP + # queues, and 0xfff for LP which + # prompts the firmware to set it up + # in function of egress queues + # used + + reg[0x113c] = 0x0002ffc0 # SGE_VFIFO_SIZE, set to 0x2ffc0 which + # prompts the firmware to set it up in + # function of number of egress queues + # used # enable TP_OUT_CONFIG.IPIDSPLITMODE reg[0x7d04] = 0x00010000/0x00010000 @@ -26,34 +47,38 @@ # enable TP_PARA_REG6.EnableCSnd reg[0x7d78] = 0x00000400/0x00000000 - # TP_SHIFT_CNT - reg[0x7dc0] = 0x62f8849 - - # TP_GLOBAL_CONFIG - reg[0x7d08] = 0x00000800/0x00000800 # set IssFromCplEnable - - # TP_PARA_REG0 - reg[0x7d60] = 0x06000000/0x07000000 # set InitCWND to 6 + reg[0x7dc0] = 0x0e2f8849 # TP_SHIFT_CNT filterMode = fragmentation, mpshittype, protocol, vlan, port, fcoe filterMask = protocol, fcoe - # TP rx and tx channels (0 = auto). + tp_pmrx = 36, 512 + tp_pmrx_pagesize = 64K + + # TP number of RX channels (0 = auto) tp_nrxch = 0 - tp_ntxch = 0 - # TP rx and tx payload memory (% of the total EDRAM + DDR3). - tp_pmrx = 38, 512 - tp_pmtx = 60, 512 - tp_pmrx_pagesize = 64K + tp_pmtx = 46, 512 tp_pmtx_pagesize = 64K - # cluster, lan, or wan. - tp_tcptuning = lan + # TP number of TX channels (0 = auto) + tp_ntxch = 0 # TP OFLD MTUs tp_mtus = 88, 256, 512, 576, 808, 1024, 1280, 1488, 1500, 2002, 2048, 4096, 4352, 8192, 9000, 9600 + # TP_GLOBAL_CONFIG + reg[0x7d08] = 0x00000800/0x00000800 # set IssFromCplEnable + + # TP_PC_CONFIG + reg[0x7d48] = 0x00000000/0x00000400 # clear EnableFLMError + + # TP_PARA_REG0 + reg[0x7d60] = 0x06000000/0x07000000 # set InitCWND to 6 + + # cluster, lan, or wan. + tp_tcptuning = lan + # MC configuration mc_mode_brc[0] = 1 # mc0 - 1: enable BRC, 0: enable RBC mc_mode_brc[1] = 1 # mc1 - 1: enable BRC, 0: enable RBC @@ -63,38 +88,58 @@ # TPT error. # PFs 0-3. These get 8 MSI/8 MSI-X vectors each. VFs are supported by -# these 4 PFs only. Not used here at all. +# these 4 PFs only. [function "0"] - nvf = 16 - nvi = 1 - rssnvi = 0 -[function "0/*"] - nvi = 1 - rssnvi = 0 + nvf = 4 + wx_caps = all + r_caps = all + nvi = 2 + rssnvi = 2 + niqflint = 4 + nethctrl = 4 + neq = 8 + nexactf = 4 + cmask = all + pmask = 0x1 [function "1"] - nvf = 16 - nvi = 1 - rssnvi = 0 -[function "1/*"] - nvi = 1 - rssnvi = 0 + nvf = 4 + wx_caps = all + r_caps = all + nvi = 2 + rssnvi = 2 + niqflint = 4 + nethctrl = 4 + neq = 8 + nexactf = 4 + cmask = all + pmask = 0x2 [function "2"] - nvf = 16 - nvi = 1 - rssnvi = 0 -[function "2/*"] - nvi = 1 - rssnvi = 0 + nvf = 4 + wx_caps = all + r_caps = all + nvi = 2 + rssnvi = 2 + niqflint = 4 + nethctrl = 4 + neq = 8 + nexactf = 4 + cmask = all + pmask = 0x4 [function "3"] - nvf = 16 - nvi = 1 - rssnvi = 0 -[function "3/*"] - nvi = 1 - rssnvi = 0 + nvf = 4 + wx_caps = all + r_caps = all + nvi = 2 + rssnvi = 2 + niqflint = 4 + nethctrl = 4 + neq = 8 + nexactf = 4 + cmask = all + pmask = 0x8 # PF4 is the resource-rich PF that the bus/nexus driver attaches to. # It gets 32 MSI/128 MSI-X vectors. @@ -106,15 +151,19 @@ niqflint = 512 nethctrl = 1024 neq = 2048 - nexactf = 328 + nexactf = 456 cmask = all pmask = all # driver will mask off features it won't use - protocol = ofld + protocol = ofld, rddp, rdmac, iscsi_initiator_pdu, iscsi_target_pdu, iscsi_t10dif tp_l2t = 4096 tp_ddp = 2 + tp_ddp_iscsi = 2 + tp_stag = 2 + tp_pbl = 5 + tp_rq = 7 # TCAM has 8K cells; each region must start at a multiple of 128 cell. # Each entry in these categories takes 4 cells each. nhash will use the @@ -150,6 +199,60 @@ nexactf = 8 nfilter = 16 +# For Virtual functions, we only allow NIC functionality and we only allow +# access to one port (1 << PF). Note that because of limitations in the +# Scatter Gather Engine (SGE) hardware which checks writes to VF KDOORBELL +# and GTS registers, the number of Ingress and Egress Queues must be a power +# of 2. +# +[function "0/*"] + wx_caps = 0x82 + r_caps = 0x86 + nvi = 1 + rssnvi = 1 + niqflint = 2 + nethctrl = 2 + neq = 4 + nexactf = 2 + cmask = all + pmask = 0x1 + +[function "1/*"] + wx_caps = 0x82 + r_caps = 0x86 + nvi = 1 + rssnvi = 1 + niqflint = 2 + nethctrl = 2 + neq = 4 + nexactf = 2 + cmask = all + pmask = 0x2 + +[function "2/*"] + wx_caps = 0x82 + r_caps = 0x86 + nvi = 1 + rssnvi = 1 + niqflint = 2 + nethctrl = 2 + neq = 4 + nexactf = 2 + cmask = all + pmask = 0x4 + +[function "3/*"] + wx_caps = 0x82 + r_caps = 0x86 + nvi = 1 + rssnvi = 1 + niqflint = 2 + nethctrl = 2 + neq = 4 + nexactf = 2 + cmask = all + pmask = 0x8 + # MPS has 192K buffer space for ingress packets from the wire as well as # loopback path of the L2 switch. [port "0"] @@ -186,7 +289,7 @@ [fini] version = 0x1 - checksum = 0x7044b7fd + checksum = 0x2d7417e5 # # $FreeBSD$ # diff --git a/sys/dev/cxgbe/iw_cxgbe/cm.c b/sys/dev/cxgbe/iw_cxgbe/cm.c index c884f5a..c2b72fa 100644 --- a/sys/dev/cxgbe/iw_cxgbe/cm.c +++ b/sys/dev/cxgbe/iw_cxgbe/cm.c @@ -80,7 +80,7 @@ static spinlock_t timeout_lock; static void process_req(struct work_struct *ctx); static void start_ep_timer(struct c4iw_ep *ep); -static void stop_ep_timer(struct c4iw_ep *ep); +static int stop_ep_timer(struct c4iw_ep *ep); static int set_tcpinfo(struct c4iw_ep *ep); static enum c4iw_ep_state state_read(struct c4iw_ep_common *epc); static void __state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state tostate); @@ -96,14 +96,14 @@ static void send_mpa_req(struct c4iw_ep *ep); static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen); static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen); static void close_complete_upcall(struct c4iw_ep *ep, int status); -static int abort_connection(struct c4iw_ep *ep); +static int send_abort(struct c4iw_ep *ep); static void peer_close_upcall(struct c4iw_ep *ep); static void peer_abort_upcall(struct c4iw_ep *ep); static void connect_reply_upcall(struct c4iw_ep *ep, int status); static int connect_request_upcall(struct c4iw_ep *ep); static void established_upcall(struct c4iw_ep *ep); -static void process_mpa_reply(struct c4iw_ep *ep); -static void process_mpa_request(struct c4iw_ep *ep); +static int process_mpa_reply(struct c4iw_ep *ep); +static int process_mpa_request(struct c4iw_ep *ep); static void process_peer_close(struct c4iw_ep *ep); static void process_conn_error(struct c4iw_ep *ep); static void process_close_complete(struct c4iw_ep *ep); @@ -123,11 +123,11 @@ static void release_ep_resources(struct c4iw_ep *ep); } while (0) #define STOP_EP_TIMER(ep) \ - do { \ + ({ \ CTR3(KTR_IW_CXGBE, "stop_ep_timer (%s:%d) ep %p", \ __func__, __LINE__, (ep)); \ stop_ep_timer(ep); \ - } while (0) + }) #ifdef KTR static char *states[] = { @@ -147,6 +147,34 @@ static char *states[] = { }; #endif + +static void deref_cm_id(struct c4iw_ep_common *epc) +{ + epc->cm_id->rem_ref(epc->cm_id); + epc->cm_id = NULL; + set_bit(CM_ID_DEREFED, &epc->history); +} + +static void ref_cm_id(struct c4iw_ep_common *epc) +{ + set_bit(CM_ID_REFED, &epc->history); + epc->cm_id->add_ref(epc->cm_id); +} + +static void deref_qp(struct c4iw_ep *ep) +{ + c4iw_qp_rem_ref(&ep->com.qp->ibqp); + clear_bit(QP_REFERENCED, &ep->com.flags); + set_bit(QP_DEREFED, &ep->com.history); +} + +static void ref_qp(struct c4iw_ep *ep) +{ + set_bit(QP_REFERENCED, &ep->com.flags); + set_bit(QP_REFED, &ep->com.history); + c4iw_qp_add_ref(&ep->com.qp->ibqp); +} + static void process_req(struct work_struct *ctx) { @@ -304,9 +332,7 @@ process_peer_close(struct c4iw_ep *ep) disconnect = 0; STOP_EP_TIMER(ep); close_socket(&ep->com, 0); - ep->com.cm_id->rem_ref(ep->com.cm_id); - ep->com.cm_id = NULL; - ep->com.qp = NULL; + deref_cm_id(&ep->com); release = 1; break; @@ -490,6 +516,7 @@ process_close_complete(struct c4iw_ep *ep) /* The cm_id may be null if we failed to connect */ mutex_lock(&ep->com.mutex); + set_bit(CLOSE_CON_RPL, &ep->com.history); switch (ep->com.state) { @@ -580,13 +607,14 @@ static void process_data(struct c4iw_ep *ep) { struct sockaddr_in *local, *remote; + int disconnect = 0; CTR5(KTR_IW_CXGBE, "%s: so %p, ep %p, state %s, sbused %d", __func__, ep->com.so, ep, states[ep->com.state], sbused(&ep->com.so->so_rcv)); switch (state_read(&ep->com)) { case MPA_REQ_SENT: - process_mpa_reply(ep); + disconnect = process_mpa_reply(ep); break; case MPA_REQ_WAIT: in_getsockaddr(ep->com.so, (struct sockaddr **)&local); @@ -595,7 +623,7 @@ process_data(struct c4iw_ep *ep) ep->com.remote_addr = *remote; free(local, M_SONAME); free(remote, M_SONAME); - process_mpa_request(ep); + disconnect = process_mpa_request(ep); break; default: if (sbused(&ep->com.so->so_rcv)) @@ -605,6 +633,9 @@ process_data(struct c4iw_ep *ep) ep->com.so->so_state, sbused(&ep->com.so->so_rcv)); break; } + if (disconnect) + c4iw_ep_disconnect(ep, disconnect == 2, GFP_KERNEL); + } static void @@ -749,9 +780,9 @@ int db_delay_usecs = 1; SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, db_delay_usecs, CTLFLAG_RWTUN, &db_delay_usecs, 0, "Usecs to delay awaiting db fifo to drain"); -static int dack_mode = 1; +static int dack_mode = 0; SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, dack_mode, CTLFLAG_RWTUN, &dack_mode, 0, - "Delayed ack mode (default = 1)"); + "Delayed ack mode (default = 0)"); int c4iw_max_read_depth = 8; SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, c4iw_max_read_depth, CTLFLAG_RWTUN, &c4iw_max_read_depth, 0, @@ -773,9 +804,9 @@ int c4iw_debug = 1; SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, c4iw_debug, CTLFLAG_RWTUN, &c4iw_debug, 0, "Enable debug logging (default = 0)"); -static int peer2peer; +static int peer2peer = 1; SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, peer2peer, CTLFLAG_RWTUN, &peer2peer, 0, - "Support peer2peer ULPs (default = 0)"); + "Support peer2peer ULPs (default = 1)"); static int p2p_type = FW_RI_INIT_P2PTYPE_READ_REQ; SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, p2p_type, CTLFLAG_RWTUN, &p2p_type, 0, @@ -827,14 +858,16 @@ start_ep_timer(struct c4iw_ep *ep) add_timer(&ep->timer); } -static void +static int stop_ep_timer(struct c4iw_ep *ep) { del_timer_sync(&ep->timer); if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) { c4iw_put_ep(&ep->com); + return 0; } + return 1; } static enum @@ -900,6 +933,8 @@ void _c4iw_free_ep(struct kref *kref) epc = &ep->com; KASSERT(!epc->entry.tqe_prev, ("%s epc %p still on req list", __func__, epc)); + if (test_bit(QP_REFERENCED, &ep->com.flags)) + deref_qp(ep); kfree(ep); } @@ -1175,20 +1210,17 @@ static void close_complete_upcall(struct c4iw_ep *ep, int status) CTR2(KTR_IW_CXGBE, "%s:ccu1 %1", __func__, ep); ep->com.cm_id->event_handler(ep->com.cm_id, &event); - ep->com.cm_id->rem_ref(ep->com.cm_id); - ep->com.cm_id = NULL; - ep->com.qp = NULL; + deref_cm_id(&ep->com); set_bit(CLOSE_UPCALL, &ep->com.history); } CTR2(KTR_IW_CXGBE, "%s:ccuE %p", __func__, ep); } -static int abort_connection(struct c4iw_ep *ep) +static int send_abort(struct c4iw_ep *ep) { int err; CTR2(KTR_IW_CXGBE, "%s:abB %p", __func__, ep); - state_set(&ep->com, ABORTING); abort_socket(ep); err = close_socket(&ep->com, 0); set_bit(ABORT_CONN, &ep->com.history); @@ -1226,9 +1258,7 @@ static void peer_abort_upcall(struct c4iw_ep *ep) CTR2(KTR_IW_CXGBE, "%s:pau1 %p", __func__, ep); ep->com.cm_id->event_handler(ep->com.cm_id, &event); - ep->com.cm_id->rem_ref(ep->com.cm_id); - ep->com.cm_id = NULL; - ep->com.qp = NULL; + deref_cm_id(&ep->com); set_bit(ABORT_UPCALL, &ep->com.history); } CTR2(KTR_IW_CXGBE, "%s:pauE %p", __func__, ep); @@ -1282,9 +1312,7 @@ static void connect_reply_upcall(struct c4iw_ep *ep, int status) if (status < 0) { CTR3(KTR_IW_CXGBE, "%s:cru4 %p %d", __func__, ep, status); - ep->com.cm_id->rem_ref(ep->com.cm_id); - ep->com.cm_id = NULL; - ep->com.qp = NULL; + deref_cm_id(&ep->com); } CTR2(KTR_IW_CXGBE, "%s:cruE %p", __func__, ep); @@ -1353,8 +1381,19 @@ static void established_upcall(struct c4iw_ep *ep) } - -static void process_mpa_reply(struct c4iw_ep *ep) +/* + * process_mpa_reply - process streaming mode MPA reply + * + * Returns: + * + * 0 upon success indicating a connect request was delivered to the ULP + * or the mpa request is incomplete but valid so far. + * + * 1 if a failure requires the caller to close the connection. + * + * 2 if a failure requires the caller to abort the connection. + */ +static int process_mpa_reply(struct c4iw_ep *ep) { struct mpa_message *mpa; struct mpa_v2_conn_params *mpa_v2_params; @@ -1367,17 +1406,17 @@ static void process_mpa_reply(struct c4iw_ep *ep) struct mbuf *top, *m; int flags = MSG_DONTWAIT; struct uio uio; + int disconnect = 0; CTR2(KTR_IW_CXGBE, "%s:pmrB %p", __func__, ep); /* - * Stop mpa timer. If it expired, then the state has - * changed and we bail since ep_timeout already aborted - * the connection. + * Stop mpa timer. If it expired, then + * we ignore the MPA reply. process_timeout() + * will abort the connection. */ - STOP_EP_TIMER(ep); - if (state_read(&ep->com) != MPA_REQ_SENT) - return; + if (STOP_EP_TIMER(ep)) + return 0; uio.uio_resid = 1000000; uio.uio_td = ep->com.thread; @@ -1389,7 +1428,7 @@ static void process_mpa_reply(struct c4iw_ep *ep) CTR2(KTR_IW_CXGBE, "%s:pmr1 %p", __func__, ep); START_EP_TIMER(ep); - return; + return 0; } err = -err; CTR2(KTR_IW_CXGBE, "%s:pmr2 %p", __func__, ep); @@ -1417,7 +1456,7 @@ static void process_mpa_reply(struct c4iw_ep *ep) CTR3(KTR_IW_CXGBE, "%s:pmr5 %p %d", __func__, ep, ep->mpa_pkt_len + m->m_len); err = (-EINVAL); - goto err; + goto err_stop_timer; } /* @@ -1435,8 +1474,9 @@ static void process_mpa_reply(struct c4iw_ep *ep) /* * if we don't even have the mpa message, then bail. */ - if (ep->mpa_pkt_len < sizeof(*mpa)) - return; + if (ep->mpa_pkt_len < sizeof(*mpa)) { + return 0; + } mpa = (struct mpa_message *) ep->mpa_pkt; /* Validate MPA header. */ @@ -1447,14 +1487,14 @@ static void process_mpa_reply(struct c4iw_ep *ep) printk(KERN_ERR MOD "%s MPA version mismatch. Local = %d, " " Received = %d\n", __func__, mpa_rev, mpa->revision); err = -EPROTO; - goto err; + goto err_stop_timer; } if (memcmp(mpa->key, MPA_KEY_REP, sizeof(mpa->key))) { CTR2(KTR_IW_CXGBE, "%s:pmr7 %p", __func__, ep); err = -EPROTO; - goto err; + goto err_stop_timer; } plen = ntohs(mpa->private_data_size); @@ -1466,7 +1506,7 @@ static void process_mpa_reply(struct c4iw_ep *ep) CTR2(KTR_IW_CXGBE, "%s:pmr8 %p", __func__, ep); err = -EPROTO; - goto err; + goto err_stop_timer; } /* @@ -1475,8 +1515,9 @@ static void process_mpa_reply(struct c4iw_ep *ep) if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) { CTR2(KTR_IW_CXGBE, "%s:pmr9 %p", __func__, ep); + STOP_EP_TIMER(ep); err = -EPROTO; - goto err; + goto err_stop_timer; } ep->plen = (u8) plen; @@ -1488,14 +1529,14 @@ static void process_mpa_reply(struct c4iw_ep *ep) if (ep->mpa_pkt_len < (sizeof(*mpa) + plen)) { CTR2(KTR_IW_CXGBE, "%s:pmra %p", __func__, ep); - return; + return 0; } if (mpa->flags & MPA_REJECT) { CTR2(KTR_IW_CXGBE, "%s:pmrb %p", __func__, ep); err = -ECONNREFUSED; - goto err; + goto err_stop_timer; } /* @@ -1638,6 +1679,7 @@ static void process_mpa_reply(struct c4iw_ep *ep) err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 0); err = -ENOMEM; + disconnect = 1; goto out; } @@ -1658,19 +1700,33 @@ static void process_mpa_reply(struct c4iw_ep *ep) err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 0); err = -ENOMEM; + disconnect = 1; goto out; } goto out; +err_stop_timer: + STOP_EP_TIMER(ep); err: - state_set(&ep->com, ABORTING); - abort_connection(ep); + disconnect = 2; out: connect_reply_upcall(ep, err); CTR2(KTR_IW_CXGBE, "%s:pmrE %p", __func__, ep); - return; + return disconnect; } -static void +/* + * process_mpa_request - process streaming mode MPA request + * + * Returns: + * + * 0 upon success indicating a connect request was delivered to the ULP + * or the mpa request is incomplete but valid so far. + * + * 1 if a failure requires the caller to close the connection. + * + * 2 if a failure requires the caller to abort the connection. + */ +static int process_mpa_request(struct c4iw_ep *ep) { struct mpa_message *mpa; @@ -1684,7 +1740,7 @@ process_mpa_request(struct c4iw_ep *ep) CTR3(KTR_IW_CXGBE, "%s: ep %p, state %s", __func__, ep, states[state]); if (state != MPA_REQ_WAIT) - return; + return 0; iov.iov_base = &ep->mpa_pkt[ep->mpa_pkt_len]; iov.iov_len = sizeof(ep->mpa_pkt) - ep->mpa_pkt_len; @@ -1698,13 +1754,10 @@ process_mpa_request(struct c4iw_ep *ep) rc = soreceive(ep->com.so, NULL, &uio, NULL, NULL, &flags); if (rc == EAGAIN) - return; - else if (rc) { -abort: - STOP_EP_TIMER(ep); - abort_connection(ep); - return; - } + return 0; + else if (rc) + goto err_stop_timer; + KASSERT(uio.uio_offset > 0, ("%s: sorecieve on so %p read no data", __func__, ep->com.so)); ep->mpa_pkt_len += uio.uio_offset; @@ -1718,7 +1771,7 @@ abort: /* Don't even have the MPA message. Wait for more data to arrive. */ if (ep->mpa_pkt_len < sizeof(*mpa)) - return; + return 0; mpa = (struct mpa_message *) ep->mpa_pkt; /* @@ -1727,24 +1780,24 @@ abort: if (mpa->revision > mpa_rev) { log(LOG_ERR, "%s: MPA version mismatch. Local = %d," " Received = %d\n", __func__, mpa_rev, mpa->revision); - goto abort; + goto err_stop_timer; } if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key))) - goto abort; + goto err_stop_timer; /* * Fail if there's too much private data. */ plen = ntohs(mpa->private_data_size); if (plen > MPA_MAX_PRIVATE_DATA) - goto abort; + goto err_stop_timer; /* * If plen does not account for pkt size */ if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) - goto abort; + goto err_stop_timer; ep->plen = (u8) plen; @@ -1752,7 +1805,7 @@ abort: * If we don't have all the pdata yet, then bail. */ if (ep->mpa_pkt_len < (sizeof(*mpa) + plen)) - return; + return 0; /* * If we get here we have accumulated the entire mpa @@ -1794,7 +1847,7 @@ abort: ep->mpa_attr.p2p_type = p2p_type; if (set_tcpinfo(ep)) - goto abort; + goto err_stop_timer; CTR5(KTR_IW_CXGBE, "%s: crc_enabled = %d, recv_marker_enabled = %d, " "xmit_marker_enabled = %d, version = %d", __func__, @@ -1807,12 +1860,18 @@ abort: /* drive upcall */ mutex_lock(&ep->parent_ep->com.mutex); if (ep->parent_ep->com.state != DEAD) { - if(connect_request_upcall(ep)) { - abort_connection(ep); - } - }else - abort_connection(ep); + if(connect_request_upcall(ep)) + goto err_out; + }else { + goto err_out; + } mutex_unlock(&ep->parent_ep->com.mutex); + return 0; + +err_stop_timer: + STOP_EP_TIMER(ep); +err_out: + return 2; } /* @@ -1825,6 +1884,7 @@ int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len) int err; struct c4iw_ep *ep = to_ep(cm_id); CTR2(KTR_IW_CXGBE, "%s:crcB %p", __func__, ep); + int disconnect = 0; if (state_read(&ep->com) == DEAD) { @@ -1838,7 +1898,7 @@ int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len) if (mpa_rev == 0) { CTR2(KTR_IW_CXGBE, "%s:crc2 %p", __func__, ep); - abort_connection(ep); + disconnect = 2; } else { @@ -1847,6 +1907,8 @@ int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len) err = soshutdown(ep->com.so, 3); } c4iw_put_ep(&ep->com); + if (disconnect) + err = c4iw_ep_disconnect(ep, disconnect == 2, GFP_KERNEL); CTR2(KTR_IW_CXGBE, "%s:crc4 %p", __func__, ep); return 0; } @@ -1859,6 +1921,7 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) struct c4iw_ep *ep = to_ep(cm_id); struct c4iw_dev *h = to_c4iw_dev(cm_id->device); struct c4iw_qp *qp = get_qhp(h, conn_param->qpn); + int abort = 0; CTR2(KTR_IW_CXGBE, "%s:cacB %p", __func__, ep); @@ -1866,7 +1929,7 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) CTR2(KTR_IW_CXGBE, "%s:cac1 %p", __func__, ep); err = -ECONNRESET; - goto err; + goto err_out; } BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD); @@ -1878,9 +1941,8 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) (conn_param->ird > c4iw_max_read_depth)) { CTR2(KTR_IW_CXGBE, "%s:cac2 %p", __func__, ep); - abort_connection(ep); err = -EINVAL; - goto err; + goto err_abort; } if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) { @@ -1894,9 +1956,8 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) ep->ord = conn_param->ord; send_mpa_reject(ep, conn_param->private_data, conn_param->private_data_len); - abort_connection(ep); err = -ENOMEM; - goto err; + goto err_abort; } if (conn_param->ird > ep->ord) { @@ -1910,9 +1971,8 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) } else { CTR2(KTR_IW_CXGBE, "%s:cac7 %p", __func__, ep); - abort_connection(ep); err = -ENOMEM; - goto err; + goto err_abort; } } @@ -1932,9 +1992,10 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) } - cm_id->add_ref(cm_id); ep->com.cm_id = cm_id; + ref_cm_id(&ep->com); ep->com.qp = qp; + ref_qp(ep); //ep->ofld_txq = TOEPCB(ep->com.so)->ofld_txq; /* bind QP to EP and move to RTS */ @@ -1956,7 +2017,7 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) if (err) { CTR2(KTR_IW_CXGBE, "%s:caca %p", __func__, ep); - goto err1; + goto err_defef_cm_id; } err = send_mpa_reply(ep, conn_param->private_data, conn_param->private_data_len); @@ -1964,7 +2025,7 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) if (err) { CTR2(KTR_IW_CXGBE, "%s:caca %p", __func__, ep); - goto err1; + goto err_defef_cm_id; } state_set(&ep->com, FPDU_MODE); @@ -1972,11 +2033,13 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) c4iw_put_ep(&ep->com); CTR2(KTR_IW_CXGBE, "%s:cacE %p", __func__, ep); return 0; -err1: - ep->com.cm_id = NULL; - ep->com.qp = NULL; - cm_id->rem_ref(cm_id); -err: +err_defef_cm_id: + deref_cm_id(&ep->com); +err_abort: + abort = 1; +err_out: + if (abort) + c4iw_ep_disconnect(ep, 1, GFP_KERNEL); c4iw_put_ep(&ep->com); CTR2(KTR_IW_CXGBE, "%s:cacE err %p", __func__, ep); return err; @@ -2028,9 +2091,9 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) ep->ord = 1; } - cm_id->add_ref(cm_id); ep->com.dev = dev; ep->com.cm_id = cm_id; + ref_cm_id(&ep->com); ep->com.qp = get_qhp(dev, conn_param->qpn); if (!ep->com.qp) { @@ -2039,6 +2102,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) err = -EINVAL; goto fail2; } + ref_qp(ep); ep->com.thread = curthread; ep->com.so = cm_id->so; @@ -2096,7 +2160,7 @@ fail3: CTR2(KTR_IW_CXGBE, "%s:ccb %p", __func__, ep); fib4_free_nh_ext(RT_DEFAULT_FIB, &nh4); fail2: - cm_id->rem_ref(cm_id); + deref_cm_id(&ep->com); c4iw_put_ep(&ep->com); out: CTR2(KTR_IW_CXGBE, "%s:ccE %p", __func__, ep); @@ -2124,8 +2188,8 @@ c4iw_create_listen_ep(struct iw_cm_id *cm_id, int backlog) goto failed; } - cm_id->add_ref(cm_id); ep->com.cm_id = cm_id; + ref_cm_id(&ep->com); ep->com.dev = dev; ep->backlog = backlog; ep->com.local_addr = cm_id->local_addr; @@ -2150,7 +2214,7 @@ c4iw_destroy_listen_ep(struct iw_cm_id *cm_id) cm_id->so, states[ep->com.state]); state_set(&ep->com, DEAD); - cm_id->rem_ref(cm_id); + deref_cm_id(&ep->com); c4iw_put_ep(&ep->com); return; @@ -2232,7 +2296,8 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp) CTR2(KTR_IW_CXGBE, "%s:ced4 %p", __func__, ep); set_bit(EP_DISC_ABORT, &ep->com.history); - ret = abort_connection(ep); + close_complete_upcall(ep, -ECONNRESET); + ret = send_abort(ep); } else { CTR2(KTR_IW_CXGBE, "%s:ced5 %p", __func__, ep); @@ -2250,8 +2315,25 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp) } if (fatal) { + set_bit(EP_DISC_FAIL, &ep->com.history); + if (!abrupt) { + STOP_EP_TIMER(ep); + close_complete_upcall(ep, -EIO); + } + if (ep->com.qp) { + struct c4iw_qp_attributes attrs; + attrs.next_state = C4IW_QP_STATE_ERROR; + ret = c4iw_modify_qp(ep->com.dev, ep->com.qp, + C4IW_QP_ATTR_NEXT_STATE, + &attrs, 1); + if (ret) { + CTR2(KTR_IW_CXGBE, "%s:ced7 %p", __func__, ep); + printf("%s - qp <- error failed!\n", __func__); + } + } release_ep_resources(ep); + ep->com.state = DEAD; CTR2(KTR_IW_CXGBE, "%s:ced6 %p", __func__, ep); } CTR2(KTR_IW_CXGBE, "%s:cedE %p", __func__, ep); @@ -2290,8 +2372,13 @@ static void ep_timeout(unsigned long arg) if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) { - list_add_tail(&ep->entry, &timeout_list); - kickit = 1; + /* + * Only insert if it is not already on the list. + */ + if (!ep->entry.next) { + list_add_tail(&ep->entry, &timeout_list); + kickit = 1; + } } spin_unlock(&timeout_lock); diff --git a/sys/dev/cxgbe/iw_cxgbe/cq.c b/sys/dev/cxgbe/iw_cxgbe/cq.c index 8710e03..b40ffc7 100644 --- a/sys/dev/cxgbe/iw_cxgbe/cq.c +++ b/sys/dev/cxgbe/iw_cxgbe/cq.c @@ -724,7 +724,7 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc) default: printf("Unexpected cqe_status 0x%x for QPID = 0x%0x\n", CQE_STATUS(&cqe), CQE_QPID(&cqe)); - ret = -EINVAL; + wc->status = IB_WC_FATAL_ERR; } } out: @@ -861,6 +861,7 @@ c4iw_create_cq(struct ib_device *ibdev, struct ib_cq_init_attr *attr, if (!mm2) goto err4; + memset(&uresp, 0, sizeof(uresp)); uresp.qid_mask = rhp->rdev.cqmask; uresp.cqid = chp->cq.cqid; uresp.size = chp->cq.size; @@ -871,7 +872,8 @@ c4iw_create_cq(struct ib_device *ibdev, struct ib_cq_init_attr *attr, uresp.gts_key = ucontext->key; ucontext->key += PAGE_SIZE; spin_unlock(&ucontext->mmap_lock); - ret = ib_copy_to_udata(udata, &uresp, sizeof uresp); + ret = ib_copy_to_udata(udata, &uresp, + sizeof(uresp) - sizeof(uresp.reserved)); if (ret) goto err5; diff --git a/sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h b/sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h index c232f70..38a8af6 100644 --- a/sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h +++ b/sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h @@ -157,7 +157,7 @@ static inline int c4iw_fatal_error(struct c4iw_rdev *rdev) static inline int c4iw_num_stags(struct c4iw_rdev *rdev) { - return min((int)T4_MAX_NUM_STAG, (int)(rdev->adap->vres.stag.size >> 5)); + return (int)(rdev->adap->vres.stag.size >> 5); } #define C4IW_WR_TO (10*HZ) @@ -435,6 +435,7 @@ struct c4iw_qp { atomic_t refcnt; wait_queue_head_t wait; struct timer_list timer; + int sq_sig_all; }; static inline struct c4iw_qp *to_c4iw_qp(struct ib_qp *ibqp) @@ -712,7 +713,8 @@ enum c4iw_ep_flags { ABORT_REQ_IN_PROGRESS = 1, RELEASE_RESOURCES = 2, CLOSE_SENT = 3, - TIMEOUT = 4 + TIMEOUT = 4, + QP_REFERENCED = 5 }; enum c4iw_ep_history { @@ -737,7 +739,13 @@ enum c4iw_ep_history { EP_DISC_ABORT = 18, CONN_RPL_UPCALL = 19, ACT_RETRY_NOMEM = 20, - ACT_RETRY_INUSE = 21 + ACT_RETRY_INUSE = 21, + CLOSE_CON_RPL = 22, + EP_DISC_FAIL = 24, + QP_REFED = 25, + QP_DEREFED = 26, + CM_ID_REFED = 27, + CM_ID_DEREFED = 28 }; struct c4iw_ep_common { diff --git a/sys/dev/cxgbe/iw_cxgbe/mem.c b/sys/dev/cxgbe/iw_cxgbe/mem.c index f7c460a..e42aa1a 100644 --- a/sys/dev/cxgbe/iw_cxgbe/mem.c +++ b/sys/dev/cxgbe/iw_cxgbe/mem.c @@ -46,6 +46,12 @@ __FBSDID("$FreeBSD$"); #define T4_ULPTX_MIN_IO 32 #define C4IW_MAX_INLINE_SIZE 96 +static int mr_exceeds_hw_limits(struct c4iw_dev *dev, u64 length) +{ + return (is_t4(dev->rdev.adap) || + is_t5(dev->rdev.adap)) && + length >= 8*1024*1024*1024ULL; +} static int write_adapter_mem(struct c4iw_rdev *rdev, u32 addr, u32 len, void *data) { @@ -144,8 +150,12 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry, if ((!reset_tpt_entry) && (*stag == T4_STAG_UNSET)) { stag_idx = c4iw_get_resource(&rdev->resource.tpt_table); - if (!stag_idx) + if (!stag_idx) { + mutex_lock(&rdev->stats.lock); + rdev->stats.stag.fail++; + mutex_unlock(&rdev->stats.lock); return -ENOMEM; + } mutex_lock(&rdev->stats.lock); rdev->stats.stag.cur += 32; if (rdev->stats.stag.cur > rdev->stats.stag.max) @@ -250,9 +260,9 @@ static int register_mem(struct c4iw_dev *rhp, struct c4iw_pd *php, int ret; ret = write_tpt_entry(&rhp->rdev, 0, &stag, 1, mhp->attr.pdid, - FW_RI_STAG_NSMR, mhp->attr.perms, + FW_RI_STAG_NSMR, mhp->attr.len ? mhp->attr.perms : 0, mhp->attr.mw_bind_enable, mhp->attr.zbva, - mhp->attr.va_fbo, mhp->attr.len, shift - 12, + mhp->attr.va_fbo, mhp->attr.len ? mhp->attr.len : -1, shift - 12, mhp->attr.pbl_size, mhp->attr.pbl_addr); if (ret) return ret; @@ -381,7 +391,7 @@ int c4iw_reregister_phys_mem(struct ib_mr *mr, int mr_rereg_mask, struct c4iw_dev *rhp; __be64 *page_list = NULL; int shift = 0; - u64 total_size; + u64 total_size = 0; int npages = 0; int ret; @@ -416,7 +426,10 @@ int c4iw_reregister_phys_mem(struct ib_mr *mr, int mr_rereg_mask, if (ret) return ret; } - + if (mr_exceeds_hw_limits(rhp, total_size)) { + kfree(page_list); + return -EINVAL; + } ret = reregister_mem(rhp, php, &mh, shift, npages); kfree(page_list); if (ret) @@ -477,10 +490,15 @@ struct ib_mr *c4iw_register_phys_mem(struct ib_pd *pd, if (ret) goto err; + if (mr_exceeds_hw_limits(rhp, total_size)) { + kfree(page_list); + ret = -EINVAL; + goto err; + } ret = alloc_pbl(mhp, npages); if (ret) { kfree(page_list); - goto err_pbl; + goto err; } ret = write_pbl(&mhp->rhp->rdev, page_list, mhp->attr.pbl_addr, @@ -580,6 +598,10 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, php = to_c4iw_pd(pd); rhp = php->rhp; + + if (mr_exceeds_hw_limits(rhp, length)) + return ERR_PTR(-EINVAL); + mhp = kzalloc(sizeof(*mhp), GFP_KERNEL); if (!mhp) return ERR_PTR(-ENOMEM); diff --git a/sys/dev/cxgbe/iw_cxgbe/qp.c b/sys/dev/cxgbe/iw_cxgbe/qp.c index 1c0381c..3ee16b1 100644 --- a/sys/dev/cxgbe/iw_cxgbe/qp.c +++ b/sys/dev/cxgbe/iw_cxgbe/qp.c @@ -615,7 +615,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, fw_flags = 0; if (wr->send_flags & IB_SEND_SOLICITED) fw_flags |= FW_RI_SOLICITED_EVENT_FLAG; - if (wr->send_flags & IB_SEND_SIGNALED) + if (wr->send_flags & IB_SEND_SIGNALED || qhp->sq_sig_all) fw_flags |= FW_RI_COMPLETION_FLAG; swsqe = &qhp->wq.sq.sw_sq[qhp->wq.sq.pidx]; switch (wr->opcode) { @@ -673,7 +673,8 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, } swsqe->idx = qhp->wq.sq.pidx; swsqe->complete = 0; - swsqe->signaled = (wr->send_flags & IB_SEND_SIGNALED); + swsqe->signaled = (wr->send_flags & IB_SEND_SIGNALED) || + qhp->sq_sig_all; swsqe->wr_id = wr->wr_id; init_wr_hdr(wqe, qhp->wq.sq.pidx, fw_opcode, fw_flags, len16); @@ -952,7 +953,7 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp, flushed = c4iw_flush_rq(&qhp->wq, &rchp->cq, count); spin_unlock(&qhp->lock); spin_unlock_irqrestore(&rchp->lock, flag); - if (flushed) { + if (flushed && rchp->ibcq.comp_handler) { spin_lock_irqsave(&rchp->comp_handler_lock, flag); (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context); spin_unlock_irqrestore(&rchp->comp_handler_lock, flag); @@ -966,7 +967,7 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp, flushed = c4iw_flush_sq(&qhp->wq, &schp->cq, count); spin_unlock(&qhp->lock); spin_unlock_irqrestore(&schp->lock, flag); - if (flushed) { + if (flushed && schp->ibcq.comp_handler) { spin_lock_irqsave(&schp->comp_handler_lock, flag); (*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context); spin_unlock_irqrestore(&schp->comp_handler_lock, flag); @@ -1530,6 +1531,7 @@ c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, qhp->attr.enable_bind = 1; qhp->attr.max_ord = 1; qhp->attr.max_ird = 1; + qhp->sq_sig_all = attrs->sq_sig_type == IB_SIGNAL_ALL_WR; spin_lock_init(&qhp->lock); mutex_init(&qhp->mutex); init_waitqueue_head(&qhp->wait); @@ -1702,6 +1704,12 @@ int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, memset(attr, 0, sizeof *attr); memset(init_attr, 0, sizeof *init_attr); attr->qp_state = to_ib_qp_state(qhp->attr.state); + init_attr->cap.max_send_wr = qhp->attr.sq_num_entries; + init_attr->cap.max_recv_wr = qhp->attr.rq_num_entries; + init_attr->cap.max_send_sge = qhp->attr.sq_max_sges; + init_attr->cap.max_recv_sge = qhp->attr.sq_max_sges; + init_attr->cap.max_inline_data = T4_MAX_SEND_INLINE; + init_attr->sq_sig_type = qhp->sq_sig_all ? IB_SIGNAL_ALL_WR : 0; return 0; } #endif diff --git a/sys/dev/cxgbe/iw_cxgbe/t4.h b/sys/dev/cxgbe/iw_cxgbe/t4.h index 023c607..b0118f9 100644 --- a/sys/dev/cxgbe/iw_cxgbe/t4.h +++ b/sys/dev/cxgbe/iw_cxgbe/t4.h @@ -69,7 +69,6 @@ #define T4_MAX_SQ_SIZE (T4_MAX_EQ_SIZE - 1) #define T4_MAX_QP_DEPTH (T4_MAX_RQ_SIZE - 1) #define T4_MAX_CQ_DEPTH (T4_MAX_IQ_SIZE - 1) -#define T4_MAX_NUM_STAG (1<<15) #define T4_MAX_MR_SIZE (~0ULL - 1) #define T4_PAGESIZE_MASK 0xffff000 /* 4KB-128MB */ #define T4_STAG_UNSET 0xffffffff @@ -524,7 +523,7 @@ static inline void t4_swcq_consume(struct t4_cq *cq) static inline void t4_hwcq_consume(struct t4_cq *cq) { cq->bits_type_ts = cq->queue[cq->cidx].bits_type_ts; - if (++cq->cidx_inc == (cq->size >> 4)) { + if (++cq->cidx_inc == (cq->size >> 4) || cq->cidx_inc == M_CIDXINC) { u32 val; val = SEINTARM(0) | CIDXINC(cq->cidx_inc) | TIMERREG(7) | diff --git a/sys/dev/cxgbe/iw_cxgbe/user.h b/sys/dev/cxgbe/iw_cxgbe/user.h index 59a1f43..d42f659 100644 --- a/sys/dev/cxgbe/iw_cxgbe/user.h +++ b/sys/dev/cxgbe/iw_cxgbe/user.h @@ -50,6 +50,7 @@ struct c4iw_create_cq_resp { __u32 cqid; __u32 size; __u32 qid_mask; + __u32 reserved; /* explicit padding (optional for i386) */ }; struct c4iw_create_qp_resp { diff --git a/sys/dev/cxgbe/offload.h b/sys/dev/cxgbe/offload.h index 2b5e4dc..992b4cd 100644 --- a/sys/dev/cxgbe/offload.h +++ b/sys/dev/cxgbe/offload.h @@ -125,7 +125,6 @@ struct t4_virt_res { /* virtualized HW resources */ struct t4_range l2t; }; -#ifdef TCP_OFFLOAD enum { ULD_TOM = 0, ULD_IWARP, @@ -152,6 +151,7 @@ struct tom_tunables { int tx_align; }; +#ifdef TCP_OFFLOAD int t4_register_uld(struct uld_info *); int t4_unregister_uld(struct uld_info *); int t4_activate_uld(struct adapter *, int); diff --git a/sys/dev/cxgbe/t4_main.c b/sys/dev/cxgbe/t4_main.c index 77777f8..11d4253 100644 --- a/sys/dev/cxgbe/t4_main.c +++ b/sys/dev/cxgbe/t4_main.c @@ -334,7 +334,8 @@ TUNABLE_INT("hw.cxgbe.nbmcaps_allowed", &t4_nbmcaps_allowed); static int t4_linkcaps_allowed = 0; /* No DCBX, PPP, etc. by default */ TUNABLE_INT("hw.cxgbe.linkcaps_allowed", &t4_linkcaps_allowed); -static int t4_switchcaps_allowed = 0; +static int t4_switchcaps_allowed = FW_CAPS_CONFIG_SWITCH_INGRESS | + FW_CAPS_CONFIG_SWITCH_EGRESS; TUNABLE_INT("hw.cxgbe.switchcaps_allowed", &t4_switchcaps_allowed); static int t4_niccaps_allowed = FW_CAPS_CONFIG_NIC; @@ -343,13 +344,13 @@ TUNABLE_INT("hw.cxgbe.niccaps_allowed", &t4_niccaps_allowed); static int t4_toecaps_allowed = -1; TUNABLE_INT("hw.cxgbe.toecaps_allowed", &t4_toecaps_allowed); -static int t4_rdmacaps_allowed = 0; +static int t4_rdmacaps_allowed = -1; TUNABLE_INT("hw.cxgbe.rdmacaps_allowed", &t4_rdmacaps_allowed); static int t4_tlscaps_allowed = 0; TUNABLE_INT("hw.cxgbe.tlscaps_allowed", &t4_tlscaps_allowed); -static int t4_iscsicaps_allowed = 0; +static int t4_iscsicaps_allowed = -1; TUNABLE_INT("hw.cxgbe.iscsicaps_allowed", &t4_iscsicaps_allowed); static int t4_fcoecaps_allowed = 0; @@ -1731,29 +1732,29 @@ cxgbe_get_counter(struct ifnet *ifp, ift_counter c) switch (c) { case IFCOUNTER_IPACKETS: - return (s->rx_frames - s->rx_pause); + return (s->rx_frames); case IFCOUNTER_IERRORS: return (s->rx_jabber + s->rx_runt + s->rx_too_long + s->rx_fcs_err + s->rx_len_err); case IFCOUNTER_OPACKETS: - return (s->tx_frames - s->tx_pause); + return (s->tx_frames); case IFCOUNTER_OERRORS: return (s->tx_error_frames); case IFCOUNTER_IBYTES: - return (s->rx_octets - s->rx_pause * 64); + return (s->rx_octets); case IFCOUNTER_OBYTES: - return (s->tx_octets - s->tx_pause * 64); + return (s->tx_octets); case IFCOUNTER_IMCASTS: - return (s->rx_mcast_frames - s->rx_pause); + return (s->rx_mcast_frames); case IFCOUNTER_OMCASTS: - return (s->tx_mcast_frames - s->tx_pause); + return (s->tx_mcast_frames); case IFCOUNTER_IQDROPS: return (s->rx_ovflow0 + s->rx_ovflow1 + s->rx_ovflow2 + @@ -6287,6 +6288,9 @@ mem_region_show(struct sbuf *sb, const char *name, unsigned int from, { unsigned int size; + if (from == to) + return; + size = to - from + 1; if (size == 0) return; @@ -6390,13 +6394,10 @@ sysctl_meminfo(SYSCTL_HANDLER_ARGS) md++; if (t4_read_reg(sc, A_LE_DB_CONFIG) & F_HASHEN) { - if (chip_id(sc) <= CHELSIO_T5) { - hi = t4_read_reg(sc, A_LE_DB_TID_HASHBASE) / 4; + if (chip_id(sc) <= CHELSIO_T5) md->base = t4_read_reg(sc, A_LE_DB_HASH_TID_BASE); - } else { - hi = t4_read_reg(sc, A_LE_DB_HASH_TID_BASE); + else md->base = t4_read_reg(sc, A_LE_DB_HASH_TBL_BASE_ADDR); - } md->limit = 0; } else { md->base = 0; @@ -9103,9 +9104,26 @@ tweak_tunables(void) if (t4_toecaps_allowed == -1) t4_toecaps_allowed = FW_CAPS_CONFIG_TOE; + + if (t4_rdmacaps_allowed == -1) { + t4_rdmacaps_allowed = FW_CAPS_CONFIG_RDMA_RDDP | + FW_CAPS_CONFIG_RDMA_RDMAC; + } + + if (t4_iscsicaps_allowed == -1) { + t4_iscsicaps_allowed = FW_CAPS_CONFIG_ISCSI_INITIATOR_PDU | + FW_CAPS_CONFIG_ISCSI_TARGET_PDU | + FW_CAPS_CONFIG_ISCSI_T10DIF; + } #else if (t4_toecaps_allowed == -1) t4_toecaps_allowed = 0; + + if (t4_rdmacaps_allowed == -1) + t4_rdmacaps_allowed = 0; + + if (t4_iscsicaps_allowed == -1) + t4_iscsicaps_allowed = 0; #endif #ifdef DEV_NETMAP diff --git a/sys/dev/cxgbe/t4_sge.c b/sys/dev/cxgbe/t4_sge.c index 33d8d48..daf3b8a 100644 --- a/sys/dev/cxgbe/t4_sge.c +++ b/sys/dev/cxgbe/t4_sge.c @@ -1397,13 +1397,8 @@ process_iql: #if defined(INET) || defined(INET6) if (iq->flags & IQ_LRO_ENABLED) { struct lro_ctrl *lro = &rxq->lro; - struct lro_entry *l; - while (!SLIST_EMPTY(&lro->lro_active)) { - l = SLIST_FIRST(&lro->lro_active); - SLIST_REMOVE_HEAD(&lro->lro_active, next); - tcp_lro_flush(lro, l); - } + tcp_lro_flush_all(lro); } #endif @@ -2343,8 +2338,8 @@ eth_tx(struct mp_ring *r, u_int cidx, u_int pidx) } else { total++; remaining--; - n = write_txpkt_wr(txq, (void *)wr, m0, available); ETHER_BPF_MTAP(ifp, m0); + n = write_txpkt_wr(txq, (void *)wr, m0, available); } MPASS(n >= 1 && n <= available && n <= SGE_MAX_WR_NDESC); |