summaryrefslogtreecommitdiffstats
path: root/sys/dev/cxgb
diff options
context:
space:
mode:
authornp <np@FreeBSD.org>2012-06-19 07:34:13 +0000
committernp <np@FreeBSD.org>2012-06-19 07:34:13 +0000
commit67d5f1a727273d8e141e96c429114dff9fb06ec3 (patch)
tree9255a545bbd49a0458ed8850371b4fe6ed2cd01f /sys/dev/cxgb
parent27063437e23a5e5e7debf9144ee974d21b6a6774 (diff)
downloadFreeBSD-src-67d5f1a727273d8e141e96c429114dff9fb06ec3.zip
FreeBSD-src-67d5f1a727273d8e141e96c429114dff9fb06ec3.tar.gz
- Updated TOE support in the kernel.
- Stateful TCP offload drivers for Terminator 3 and 4 (T3 and T4) ASICs. These are available as t3_tom and t4_tom modules that augment cxgb(4) and cxgbe(4) respectively. The cxgb/cxgbe drivers continue to work as usual with or without these extra features. - iWARP driver for Terminator 3 ASIC (kernel verbs). T4 iWARP in the works and will follow soon. Build-tested with make universe. 30s overview ============ What interfaces support TCP offload? Look for TOE4 and/or TOE6 in the capabilities of an interface: # ifconfig -m | grep TOE Enable/disable TCP offload on an interface (just like any other ifnet capability): # ifconfig cxgbe0 toe # ifconfig cxgbe0 -toe Which connections are offloaded? Look for toe4 and/or toe6 in the output of netstat and sockstat: # netstat -np tcp | grep toe # sockstat -46c | grep toe Reviewed by: bz, gnn Sponsored by: Chelsio communications. MFC after: ~3 months (after 9.1, and after ensuring MFC is feasible)
Diffstat (limited to 'sys/dev/cxgb')
-rw-r--r--sys/dev/cxgb/common/cxgb_ctl_defs.h10
-rw-r--r--sys/dev/cxgb/cxgb_adapter.h38
-rw-r--r--sys/dev/cxgb/cxgb_main.c452
-rw-r--r--sys/dev/cxgb/cxgb_offload.c465
-rw-r--r--sys/dev/cxgb/cxgb_offload.h249
-rw-r--r--sys/dev/cxgb/cxgb_osdep.h29
-rw-r--r--sys/dev/cxgb/cxgb_sge.c418
-rw-r--r--sys/dev/cxgb/sys/mvec.h21
-rw-r--r--sys/dev/cxgb/t3cdev.h62
-rw-r--r--sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb.c310
-rw-r--r--sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb.h43
-rw-r--r--sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.c401
-rw-r--r--sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.h17
-rw-r--r--sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cq.c14
-rw-r--r--sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_dbg.c150
-rw-r--r--sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_ev.c55
-rw-r--r--sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_hal.c519
-rw-r--r--sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_hal.h96
-rw-r--r--sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_ib_intfc.h22
-rw-r--r--sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_mem.c94
-rw-r--r--sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.c409
-rw-r--r--sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.h12
-rw-r--r--sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_qp.c417
-rw-r--r--sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_resource.c13
-rw-r--r--sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_user.h8
-rw-r--r--sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_wr.h75
-rw-r--r--sys/dev/cxgb/ulp/toecore/cxgb_toedev.h49
-rw-r--r--sys/dev/cxgb/ulp/toecore/toedev.c420
-rw-r--r--sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c4945
-rw-r--r--sys/dev/cxgb/ulp/tom/cxgb_cpl_socket.c1034
-rw-r--r--sys/dev/cxgb/ulp/tom/cxgb_ddp.c738
-rw-r--r--sys/dev/cxgb/ulp/tom/cxgb_defs.h91
-rw-r--r--sys/dev/cxgb/ulp/tom/cxgb_l2t.c590
-rw-r--r--sys/dev/cxgb/ulp/tom/cxgb_l2t.h121
-rw-r--r--sys/dev/cxgb/ulp/tom/cxgb_listen.c1293
-rw-r--r--sys/dev/cxgb/ulp/tom/cxgb_t3_ddp.h181
-rw-r--r--sys/dev/cxgb/ulp/tom/cxgb_tcp.h47
-rw-r--r--sys/dev/cxgb/ulp/tom/cxgb_tcp_offload.c97
-rw-r--r--sys/dev/cxgb/ulp/tom/cxgb_tcp_offload.h14
-rw-r--r--sys/dev/cxgb/ulp/tom/cxgb_toepcb.h69
-rw-r--r--sys/dev/cxgb/ulp/tom/cxgb_tom.c1649
-rw-r--r--sys/dev/cxgb/ulp/tom/cxgb_tom.h305
-rw-r--r--sys/dev/cxgb/ulp/tom/cxgb_tom_sysctl.c140
43 files changed, 4834 insertions, 11348 deletions
diff --git a/sys/dev/cxgb/common/cxgb_ctl_defs.h b/sys/dev/cxgb/common/cxgb_ctl_defs.h
index b228a25..fe8a4f7 100644
--- a/sys/dev/cxgb/common/cxgb_ctl_defs.h
+++ b/sys/dev/cxgb/common/cxgb_ctl_defs.h
@@ -60,14 +60,12 @@ struct mtutab {
const unsigned short *mtus; /* the MTU table values */
};
-struct net_device;
-
/*
- * Structure used to request the adapter net_device owning a given MAC address.
+ * Structure used to request the ifnet that owns a given MAC address.
*/
struct iff_mac {
- struct net_device *dev; /* the net_device */
- const unsigned char *mac_addr; /* MAC address to lookup */
+ struct ifnet *dev;
+ const unsigned char *mac_addr;
u16 vlan_tag;
};
@@ -85,7 +83,7 @@ struct ddp_params {
struct adap_ports {
unsigned int nports; /* number of ports on this adapter */
- struct net_device *lldevs[MAX_NPORTS];
+ struct ifnet *lldevs[MAX_NPORTS];
};
/*
diff --git a/sys/dev/cxgb/cxgb_adapter.h b/sys/dev/cxgb/cxgb_adapter.h
index 4354b1c..928f8fe 100644
--- a/sys/dev/cxgb/cxgb_adapter.h
+++ b/sys/dev/cxgb/cxgb_adapter.h
@@ -57,7 +57,6 @@ $FreeBSD$
#include <dev/pci/pcivar.h>
#include <cxgb_osdep.h>
-#include <t3cdev.h>
#include <sys/mbufq.h>
struct adapter;
@@ -130,6 +129,7 @@ enum {
CXGB_OFLD_INIT = (1 << 7),
TP_PARITY_INIT = (1 << 8),
CXGB_BUSY = (1 << 9),
+ TOM_INIT_DONE = (1 << 10),
/* port flags */
DOOMED = (1 << 0),
@@ -179,7 +179,6 @@ struct sge_rspq {
uint32_t async_notif;
uint32_t cntxt_id;
uint32_t offload_pkts;
- uint32_t offload_bundles;
uint32_t pure_rsps;
uint32_t unhandled_irqs;
uint32_t starved;
@@ -291,6 +290,7 @@ struct sge_qset {
uint32_t txq_stopped; /* which Tx queues are stopped */
uint64_t port_stats[SGE_PSTAT_MAX];
struct port_info *port;
+ struct adapter *adap;
int idx; /* qset # */
int qs_flags;
int coalescing;
@@ -307,10 +307,13 @@ struct sge {
struct filter_info;
+typedef int (*cpl_handler_t)(struct sge_qset *, struct rsp_desc *,
+ struct mbuf *);
+
struct adapter {
+ SLIST_ENTRY(adapter) link;
device_t dev;
int flags;
- TAILQ_ENTRY(adapter) adapter_entry;
/* PCI register resources */
int regs_rid;
@@ -376,11 +379,16 @@ struct adapter {
struct port_info port[MAX_NPORTS];
device_t portdev[MAX_NPORTS];
- struct t3cdev tdev;
+#ifdef TCP_OFFLOAD
+ void *tom_softc;
+ void *iwarp_softc;
+#endif
char fw_version[64];
char port_types[MAX_NPORTS + 1];
uint32_t open_device_map;
- uint32_t registered_device_map;
+#ifdef TCP_OFFLOAD
+ int offload_map;
+#endif
struct mtx lock;
driver_intr_t *cxgb_intr;
int msi_count;
@@ -392,6 +400,11 @@ struct adapter {
char elmerlockbuf[ADAPTER_LOCK_NAME_LEN];
int timestamp;
+
+#ifdef TCP_OFFLOAD
+#define NUM_CPL_HANDLERS 0xa7
+ cpl_handler_t cpl_handler[NUM_CPL_HANDLERS] __aligned(CACHE_LINE_SIZE);
+#endif
};
struct t3_rx_mode {
@@ -502,10 +515,12 @@ void t3_os_link_changed(adapter_t *adapter, int port_id, int link_status,
int speed, int duplex, int fc, int mac_was_reset);
void t3_os_phymod_changed(struct adapter *adap, int port_id);
void t3_sge_err_intr_handler(adapter_t *adapter);
-int t3_offload_tx(struct t3cdev *, struct mbuf *);
+#ifdef TCP_OFFLOAD
+int t3_offload_tx(struct adapter *, struct mbuf *);
+#endif
void t3_os_set_hw_addr(adapter_t *adapter, int port_idx, u8 hw_addr[]);
int t3_mgmt_tx(adapter_t *adap, struct mbuf *m);
-
+int t3_register_cpl_handler(struct adapter *, int, cpl_handler_t);
int t3_sge_alloc(struct adapter *);
int t3_sge_free(struct adapter *);
@@ -556,15 +571,9 @@ txq_to_qset(struct sge_txq *q, int qidx)
return container_of(q, struct sge_qset, txq[qidx]);
}
-static __inline struct adapter *
-tdev2adap(struct t3cdev *d)
-{
- return container_of(d, struct adapter, tdev);
-}
-
#undef container_of
-#define OFFLOAD_DEVMAP_BIT 15
+#define OFFLOAD_DEVMAP_BIT (1 << MAX_NPORTS)
static inline int offload_running(adapter_t *adapter)
{
return isset(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT);
@@ -573,4 +582,5 @@ static inline int offload_running(adapter_t *adapter)
void cxgb_tx_watchdog(void *arg);
int cxgb_transmit(struct ifnet *ifp, struct mbuf *m);
void cxgb_qflush(struct ifnet *ifp);
+void t3_iterate(void (*)(struct adapter *, void *), void *);
#endif
diff --git a/sys/dev/cxgb/cxgb_main.c b/sys/dev/cxgb/cxgb_main.c
index fb42004..aad7abf 100644
--- a/sys/dev/cxgb/cxgb_main.c
+++ b/sys/dev/cxgb/cxgb_main.c
@@ -30,6 +30,8 @@ POSSIBILITY OF SUCH DAMAGE.
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#include "opt_inet.h"
+
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
@@ -107,6 +109,9 @@ static inline void mk_set_tcb_field(struct cpl_set_tcb_field *, unsigned int,
unsigned int, u64, u64);
static inline void set_tcb_field_ulp(struct cpl_set_tcb_field *, unsigned int,
unsigned int, u64, u64);
+#ifdef TCP_OFFLOAD
+static int cpl_not_handled(struct sge_qset *, struct rsp_desc *, struct mbuf *);
+#endif
/* Attachment glue for the PCI controller end of the device. Each port of
* the device is attached separately, as defined later.
@@ -119,10 +124,11 @@ static __inline void reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned i
unsigned int end);
static void cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf);
static int cxgb_get_regs_len(void);
-static int offload_open(struct port_info *pi);
static void touch_bars(device_t dev);
-static int offload_close(struct t3cdev *tdev);
static void cxgb_update_mac_settings(struct port_info *p);
+#ifdef TCP_OFFLOAD
+static int toe_capability(struct port_info *, int);
+#endif
static device_method_t cxgb_controller_methods[] = {
DEVMETHOD(device_probe, cxgb_controller_probe),
@@ -138,8 +144,11 @@ static driver_t cxgb_controller_driver = {
sizeof(struct adapter)
};
+static int cxgbc_mod_event(module_t, int, void *);
static devclass_t cxgb_controller_devclass;
-DRIVER_MODULE(cxgbc, pci, cxgb_controller_driver, cxgb_controller_devclass, 0, 0);
+DRIVER_MODULE(cxgbc, pci, cxgb_controller_driver, cxgb_controller_devclass,
+ cxgbc_mod_event, 0);
+MODULE_VERSION(cxgbc, 1);
/*
* Attachment glue for the ports. Attachment is done directly to the
@@ -177,6 +186,14 @@ static struct cdevsw cxgb_cdevsw = {
static devclass_t cxgb_port_devclass;
DRIVER_MODULE(cxgb, cxgbc, cxgb_port_driver, cxgb_port_devclass, 0, 0);
+MODULE_VERSION(cxgb, 1);
+
+static struct mtx t3_list_lock;
+static SLIST_HEAD(, adapter) t3_list;
+#ifdef TCP_OFFLOAD
+static struct mtx t3_uld_list_lock;
+static SLIST_HEAD(, uld_info) t3_uld_list;
+#endif
/*
* The driver uses the best interrupt scheme available on a platform in the
@@ -195,15 +212,6 @@ SYSCTL_INT(_hw_cxgb, OID_AUTO, msi_allowed, CTLFLAG_RDTUN, &msi_allowed, 0,
"MSI-X, MSI, INTx selector");
/*
- * The driver enables offload as a default.
- * To disable it, use ofld_disable = 1.
- */
-static int ofld_disable = 0;
-TUNABLE_INT("hw.cxgb.ofld_disable", &ofld_disable);
-SYSCTL_INT(_hw_cxgb, OID_AUTO, ofld_disable, CTLFLAG_RDTUN, &ofld_disable, 0,
- "disable ULP offload");
-
-/*
* The driver uses an auto-queue algorithm by default.
* To disable it and force a single queue-set per port, use multiq = 0
*/
@@ -445,6 +453,25 @@ cxgb_controller_attach(device_t dev)
sc->msi_count = 0;
ai = cxgb_get_adapter_info(dev);
+ snprintf(sc->lockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb controller lock %d",
+ device_get_unit(dev));
+ ADAPTER_LOCK_INIT(sc, sc->lockbuf);
+
+ snprintf(sc->reglockbuf, ADAPTER_LOCK_NAME_LEN, "SGE reg lock %d",
+ device_get_unit(dev));
+ snprintf(sc->mdiolockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb mdio lock %d",
+ device_get_unit(dev));
+ snprintf(sc->elmerlockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb elmer lock %d",
+ device_get_unit(dev));
+
+ MTX_INIT(&sc->sge.reg_lock, sc->reglockbuf, NULL, MTX_SPIN);
+ MTX_INIT(&sc->mdio_lock, sc->mdiolockbuf, NULL, MTX_DEF);
+ MTX_INIT(&sc->elmer_lock, sc->elmerlockbuf, NULL, MTX_DEF);
+
+ mtx_lock(&t3_list_lock);
+ SLIST_INSERT_HEAD(&t3_list, sc, link);
+ mtx_unlock(&t3_list_lock);
+
/* find the PCIe link width and set max read request to 4KB*/
if (pci_find_cap(dev, PCIY_EXPRESS, &reg) == 0) {
uint16_t lnk;
@@ -471,24 +498,10 @@ cxgb_controller_attach(device_t dev)
if ((sc->regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
&sc->regs_rid, RF_ACTIVE)) == NULL) {
device_printf(dev, "Cannot allocate BAR region 0\n");
- return (ENXIO);
+ error = ENXIO;
+ goto out;
}
- snprintf(sc->lockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb controller lock %d",
- device_get_unit(dev));
- ADAPTER_LOCK_INIT(sc, sc->lockbuf);
-
- snprintf(sc->reglockbuf, ADAPTER_LOCK_NAME_LEN, "SGE reg lock %d",
- device_get_unit(dev));
- snprintf(sc->mdiolockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb mdio lock %d",
- device_get_unit(dev));
- snprintf(sc->elmerlockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb elmer lock %d",
- device_get_unit(dev));
-
- MTX_INIT(&sc->sge.reg_lock, sc->reglockbuf, NULL, MTX_SPIN);
- MTX_INIT(&sc->mdio_lock, sc->mdiolockbuf, NULL, MTX_DEF);
- MTX_INIT(&sc->elmer_lock, sc->elmerlockbuf, NULL, MTX_DEF);
-
sc->bt = rman_get_bustag(sc->regs_res);
sc->bh = rman_get_bushandle(sc->regs_res);
sc->mmio_len = rman_get_size(sc->regs_res);
@@ -604,7 +617,7 @@ cxgb_controller_attach(device_t dev)
} else {
sc->flags |= TPS_UPTODATE;
}
-
+
/*
* Create a child device for each MAC. The ethernet attachment
* will be done in these children.
@@ -636,12 +649,7 @@ cxgb_controller_attach(device_t dev)
t3_sge_init_adapter(sc);
t3_led_ready(sc);
-
- cxgb_offload_init();
- if (is_offload(sc)) {
- setbit(&sc->registered_device_map, OFFLOAD_DEVMAP_BIT);
- cxgb_adapter_ofld(sc);
- }
+
error = t3_get_fw_version(sc, &vers);
if (error)
goto out;
@@ -662,6 +670,11 @@ cxgb_controller_attach(device_t dev)
device_printf(sc->dev, "Firmware Version %s\n", &sc->fw_version[0]);
callout_reset(&sc->cxgb_tick_ch, hz, cxgb_tick, sc);
t3_add_attach_sysctls(sc);
+
+#ifdef TCP_OFFLOAD
+ for (i = 0; i < NUM_CPL_HANDLERS; i++)
+ sc->cpl_handler[i] = cpl_not_handled;
+#endif
out:
if (error)
cxgb_free(sc);
@@ -775,20 +788,9 @@ cxgb_free(struct adapter *sc)
sc->tq = NULL;
}
- if (is_offload(sc)) {
- clrbit(&sc->registered_device_map, OFFLOAD_DEVMAP_BIT);
- cxgb_adapter_unofld(sc);
- }
-
-#ifdef notyet
- if (sc->flags & CXGB_OFLD_INIT)
- cxgb_offload_deactivate(sc);
-#endif
free(sc->filters, M_DEVBUF);
t3_sge_free(sc);
- cxgb_offload_exit();
-
if (sc->udbs_res != NULL)
bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->udbs_rid,
sc->udbs_res);
@@ -800,6 +802,9 @@ cxgb_free(struct adapter *sc)
MTX_DESTROY(&sc->mdio_lock);
MTX_DESTROY(&sc->sge.reg_lock);
MTX_DESTROY(&sc->elmer_lock);
+ mtx_lock(&t3_list_lock);
+ SLIST_REMOVE(&t3_list, sc, adapter, link);
+ mtx_unlock(&t3_list_lock);
ADAPTER_LOCK_DEINIT(sc);
}
@@ -1017,6 +1022,10 @@ cxgb_port_attach(device_t dev)
ifp->if_qflush = cxgb_qflush;
ifp->if_capabilities = CXGB_CAP;
+#ifdef TCP_OFFLOAD
+ if (is_offload(sc))
+ ifp->if_capabilities |= IFCAP_TOE4;
+#endif
ifp->if_capenable = CXGB_CAP_ENABLE;
ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO;
@@ -1420,65 +1429,6 @@ setup_rss(adapter_t *adap)
cpus, rspq_map);
}
-
-/*
- * Sends an mbuf to an offload queue driver
- * after dealing with any active network taps.
- */
-static inline int
-offload_tx(struct t3cdev *tdev, struct mbuf *m)
-{
- int ret;
-
- ret = t3_offload_tx(tdev, m);
- return (ret);
-}
-
-static int
-write_smt_entry(struct adapter *adapter, int idx)
-{
- struct port_info *pi = &adapter->port[idx];
- struct cpl_smt_write_req *req;
- struct mbuf *m;
-
- if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
- return (ENOMEM);
-
- req = mtod(m, struct cpl_smt_write_req *);
- m->m_pkthdr.len = m->m_len = sizeof(struct cpl_smt_write_req);
-
- req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
- OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, idx));
- req->mtu_idx = NMTUS - 1; /* should be 0 but there's a T3 bug */
- req->iff = idx;
- memset(req->src_mac1, 0, sizeof(req->src_mac1));
- memcpy(req->src_mac0, pi->hw_addr, ETHER_ADDR_LEN);
-
- m_set_priority(m, 1);
-
- offload_tx(&adapter->tdev, m);
-
- return (0);
-}
-
-static int
-init_smt(struct adapter *adapter)
-{
- int i;
-
- for_each_port(adapter, i)
- write_smt_entry(adapter, i);
- return 0;
-}
-
-static void
-init_port_mtus(adapter_t *adapter)
-{
- unsigned int mtus = ETHERMTU | (ETHERMTU << 16);
-
- t3_write_reg(adapter, A_TP_MTU_PORT_TABLE, mtus);
-}
-
static void
send_pktsched_cmd(struct adapter *adap, int sched, int qidx, int lo,
int hi, int port)
@@ -1705,45 +1655,6 @@ cxgb_down(struct adapter *sc)
t3_intr_disable(sc);
}
-static int
-offload_open(struct port_info *pi)
-{
- struct adapter *sc = pi->adapter;
- struct t3cdev *tdev = &sc->tdev;
-
- setbit(&sc->open_device_map, OFFLOAD_DEVMAP_BIT);
-
- t3_tp_set_offload_mode(sc, 1);
- tdev->lldev = pi->ifp;
- init_port_mtus(sc);
- t3_load_mtus(sc, sc->params.mtus, sc->params.a_wnd, sc->params.b_wnd,
- sc->params.rev == 0 ? sc->port[0].ifp->if_mtu : 0xffff);
- init_smt(sc);
- cxgb_add_clients(tdev);
-
- return (0);
-}
-
-static int
-offload_close(struct t3cdev *tdev)
-{
- struct adapter *adapter = tdev2adap(tdev);
-
- if (!isset(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT))
- return (0);
-
- /* Call back all registered clients */
- cxgb_remove_clients(tdev);
-
- tdev->lldev = NULL;
- cxgb_set_dummy_ops(tdev);
- t3_tp_set_offload_mode(adapter, 0);
-
- clrbit(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT);
-
- return (0);
-}
-
/*
* if_init for cxgb ports.
*/
@@ -1793,15 +1704,9 @@ cxgb_init_locked(struct port_info *p)
ADAPTER_UNLOCK(sc);
}
- if (sc->open_device_map == 0) {
- if ((rc = cxgb_up(sc)) != 0)
+ if (sc->open_device_map == 0 && ((rc = cxgb_up(sc)) != 0))
goto done;
- if (is_offload(sc) && !ofld_disable && offload_open(p))
- log(LOG_WARNING,
- "Could not initialize offload capabilities\n");
- }
-
PORT_LOCK(p);
if (isset(&sc->open_device_map, p->port_id) &&
(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
@@ -1929,7 +1834,6 @@ cxgb_uninit_synchronized(struct port_info *pi)
DELAY(100 * 1000);
t3_mac_disable(&pi->mac, MAC_DIRECTION_RX);
-
pi->phy.ops->power_down(&pi->phy, 1);
PORT_UNLOCK(pi);
@@ -1937,9 +1841,6 @@ cxgb_uninit_synchronized(struct port_info *pi)
pi->link_config.link_ok = 0;
t3_os_link_changed(sc, pi->port_id, 0, 0, 0, 0, 0);
- if ((sc->open_device_map & PORT_MASK) == 0)
- offload_close(&sc->tdev);
-
if (sc->open_device_map == 0)
cxgb_down(pi->adapter);
@@ -2081,6 +1982,15 @@ fail:
/* Safe to do this even if cxgb_up not called yet */
cxgb_set_lro(p, ifp->if_capenable & IFCAP_LRO);
}
+#ifdef TCP_OFFLOAD
+ if (mask & IFCAP_TOE4) {
+ int enable = (ifp->if_capenable ^ mask) & IFCAP_TOE4;
+
+ error = toe_capability(p, enable);
+ if (error == 0)
+ ifp->if_capenable ^= mask;
+ }
+#endif
if (mask & IFCAP_VLAN_HWTAGGING) {
ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
@@ -3362,3 +3272,235 @@ set_tcb_field_ulp(struct cpl_set_tcb_field *req, unsigned int tid,
txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*req) / 8));
mk_set_tcb_field(req, tid, word, mask, val);
}
+
+void
+t3_iterate(void (*func)(struct adapter *, void *), void *arg)
+{
+ struct adapter *sc;
+
+ mtx_lock(&t3_list_lock);
+ SLIST_FOREACH(sc, &t3_list, link) {
+ /*
+ * func should not make any assumptions about what state sc is
+ * in - the only guarantee is that sc->sc_lock is a valid lock.
+ */
+ func(sc, arg);
+ }
+ mtx_unlock(&t3_list_lock);
+}
+
+#ifdef TCP_OFFLOAD
+static int
+toe_capability(struct port_info *pi, int enable)
+{
+ int rc;
+ struct adapter *sc = pi->adapter;
+
+ ADAPTER_LOCK_ASSERT_OWNED(sc);
+
+ if (!is_offload(sc))
+ return (ENODEV);
+
+ if (enable) {
+ if (!(sc->flags & FULL_INIT_DONE)) {
+ log(LOG_WARNING,
+ "You must enable a cxgb interface first\n");
+ return (EAGAIN);
+ }
+
+ if (isset(&sc->offload_map, pi->port_id))
+ return (0);
+
+ if (!(sc->flags & TOM_INIT_DONE)) {
+ rc = t3_activate_uld(sc, ULD_TOM);
+ if (rc == EAGAIN) {
+ log(LOG_WARNING,
+ "You must kldload t3_tom.ko before trying "
+ "to enable TOE on a cxgb interface.\n");
+ }
+ if (rc != 0)
+ return (rc);
+ KASSERT(sc->tom_softc != NULL,
+ ("%s: TOM activated but softc NULL", __func__));
+ KASSERT(sc->flags & TOM_INIT_DONE,
+ ("%s: TOM activated but flag not set", __func__));
+ }
+
+ setbit(&sc->offload_map, pi->port_id);
+
+ /*
+ * XXX: Temporary code to allow iWARP to be enabled when TOE is
+ * enabled on any port. Need to figure out how to enable,
+ * disable, load, and unload iWARP cleanly.
+ */
+ if (!isset(&sc->offload_map, MAX_NPORTS) &&
+ t3_activate_uld(sc, ULD_IWARP) == 0)
+ setbit(&sc->offload_map, MAX_NPORTS);
+ } else {
+ if (!isset(&sc->offload_map, pi->port_id))
+ return (0);
+
+ KASSERT(sc->flags & TOM_INIT_DONE,
+ ("%s: TOM never initialized?", __func__));
+ clrbit(&sc->offload_map, pi->port_id);
+ }
+
+ return (0);
+}
+
+/*
+ * Add an upper layer driver to the global list.
+ */
+int
+t3_register_uld(struct uld_info *ui)
+{
+ int rc = 0;
+ struct uld_info *u;
+
+ mtx_lock(&t3_uld_list_lock);
+ SLIST_FOREACH(u, &t3_uld_list, link) {
+ if (u->uld_id == ui->uld_id) {
+ rc = EEXIST;
+ goto done;
+ }
+ }
+
+ SLIST_INSERT_HEAD(&t3_uld_list, ui, link);
+ ui->refcount = 0;
+done:
+ mtx_unlock(&t3_uld_list_lock);
+ return (rc);
+}
+
+int
+t3_unregister_uld(struct uld_info *ui)
+{
+ int rc = EINVAL;
+ struct uld_info *u;
+
+ mtx_lock(&t3_uld_list_lock);
+
+ SLIST_FOREACH(u, &t3_uld_list, link) {
+ if (u == ui) {
+ if (ui->refcount > 0) {
+ rc = EBUSY;
+ goto done;
+ }
+
+ SLIST_REMOVE(&t3_uld_list, ui, uld_info, link);
+ rc = 0;
+ goto done;
+ }
+ }
+done:
+ mtx_unlock(&t3_uld_list_lock);
+ return (rc);
+}
+
+int
+t3_activate_uld(struct adapter *sc, int id)
+{
+ int rc = EAGAIN;
+ struct uld_info *ui;
+
+ mtx_lock(&t3_uld_list_lock);
+
+ SLIST_FOREACH(ui, &t3_uld_list, link) {
+ if (ui->uld_id == id) {
+ rc = ui->activate(sc);
+ if (rc == 0)
+ ui->refcount++;
+ goto done;
+ }
+ }
+done:
+ mtx_unlock(&t3_uld_list_lock);
+
+ return (rc);
+}
+
+int
+t3_deactivate_uld(struct adapter *sc, int id)
+{
+ int rc = EINVAL;
+ struct uld_info *ui;
+
+ mtx_lock(&t3_uld_list_lock);
+
+ SLIST_FOREACH(ui, &t3_uld_list, link) {
+ if (ui->uld_id == id) {
+ rc = ui->deactivate(sc);
+ if (rc == 0)
+ ui->refcount--;
+ goto done;
+ }
+ }
+done:
+ mtx_unlock(&t3_uld_list_lock);
+
+ return (rc);
+}
+
+static int
+cpl_not_handled(struct sge_qset *qs __unused, struct rsp_desc *r __unused,
+ struct mbuf *m)
+{
+ m_freem(m);
+ return (EDOOFUS);
+}
+
+int
+t3_register_cpl_handler(struct adapter *sc, int opcode, cpl_handler_t h)
+{
+ uintptr_t *loc, new;
+
+ if (opcode >= NUM_CPL_HANDLERS)
+ return (EINVAL);
+
+ new = h ? (uintptr_t)h : (uintptr_t)cpl_not_handled;
+ loc = (uintptr_t *) &sc->cpl_handler[opcode];
+ atomic_store_rel_ptr(loc, new);
+
+ return (0);
+}
+#endif
+
+static int
+cxgbc_mod_event(module_t mod, int cmd, void *arg)
+{
+ int rc = 0;
+
+ switch (cmd) {
+ case MOD_LOAD:
+ mtx_init(&t3_list_lock, "T3 adapters", 0, MTX_DEF);
+ SLIST_INIT(&t3_list);
+#ifdef TCP_OFFLOAD
+ mtx_init(&t3_uld_list_lock, "T3 ULDs", 0, MTX_DEF);
+ SLIST_INIT(&t3_uld_list);
+#endif
+ break;
+
+ case MOD_UNLOAD:
+#ifdef TCP_OFFLOAD
+ mtx_lock(&t3_uld_list_lock);
+ if (!SLIST_EMPTY(&t3_uld_list)) {
+ rc = EBUSY;
+ mtx_unlock(&t3_uld_list_lock);
+ break;
+ }
+ mtx_unlock(&t3_uld_list_lock);
+ mtx_destroy(&t3_uld_list_lock);
+#endif
+ mtx_lock(&t3_list_lock);
+ if (!SLIST_EMPTY(&t3_list)) {
+ rc = EBUSY;
+ mtx_unlock(&t3_list_lock);
+ break;
+ }
+ mtx_unlock(&t3_list_lock);
+ mtx_destroy(&t3_list_lock);
+ break;
+ }
+
+ return (rc);
+}
diff --git a/sys/dev/cxgb/cxgb_offload.c b/sys/dev/cxgb/cxgb_offload.c
deleted file mode 100644
index 2ae83bd..0000000
--- a/sys/dev/cxgb/cxgb_offload.c
+++ /dev/null
@@ -1,465 +0,0 @@
-/**************************************************************************
-
-Copyright (c) 2007-2008, Chelsio Inc.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
- this list of conditions and the following disclaimer.
-
- 2. Neither the name of the Chelsio Corporation nor the names of its
- contributors may be used to endorse or promote products derived from
- this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-
-***************************************************************************/
-
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/kernel.h>
-#include <sys/bus.h>
-#include <sys/module.h>
-#include <sys/pciio.h>
-#include <sys/conf.h>
-#include <machine/bus.h>
-#include <machine/resource.h>
-#include <sys/bus_dma.h>
-#include <sys/rman.h>
-#include <sys/ioccom.h>
-#include <sys/mbuf.h>
-#include <sys/linker.h>
-#include <sys/firmware.h>
-#include <sys/socket.h>
-#include <sys/sockio.h>
-#include <sys/smp.h>
-#include <sys/sysctl.h>
-#include <sys/syslog.h>
-#include <sys/queue.h>
-#include <sys/taskqueue.h>
-#include <sys/proc.h>
-
-#include <cxgb_include.h>
-
-#include <net/route.h>
-
-#define VALIDATE_TID 0
-MALLOC_DEFINE(M_CXGB, "cxgb", "Chelsio 10 Gigabit Ethernet and services");
-
-TAILQ_HEAD(, cxgb_client) client_list;
-TAILQ_HEAD(, t3cdev) ofld_dev_list;
-
-
-static struct mtx cxgb_db_lock;
-
-
-static int inited = 0;
-
-static inline int
-offload_activated(struct t3cdev *tdev)
-{
- struct adapter *adapter = tdev2adap(tdev);
-
- return (isset(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT));
-}
-
-static inline void
-register_tdev(struct t3cdev *tdev)
-{
- static int unit;
-
- mtx_lock(&cxgb_db_lock);
- snprintf(tdev->name, sizeof(tdev->name), "ofld_dev%d", unit++);
- TAILQ_INSERT_TAIL(&ofld_dev_list, tdev, entry);
- mtx_unlock(&cxgb_db_lock);
-}
-
-static inline void
-unregister_tdev(struct t3cdev *tdev)
-{
- if (!inited)
- return;
-
- mtx_lock(&cxgb_db_lock);
- TAILQ_REMOVE(&ofld_dev_list, tdev, entry);
- mtx_unlock(&cxgb_db_lock);
-}
-
-#ifndef TCP_OFFLOAD_DISABLE
-/**
- * cxgb_register_client - register an offload client
- * @client: the client
- *
- * Add the client to the client list,
- * and call backs the client for each activated offload device
- */
-void
-cxgb_register_client(struct cxgb_client *client)
-{
- struct t3cdev *tdev;
-
- mtx_lock(&cxgb_db_lock);
- TAILQ_INSERT_TAIL(&client_list, client, client_entry);
-
- if (client->add) {
- TAILQ_FOREACH(tdev, &ofld_dev_list, entry) {
- if (offload_activated(tdev)) {
- client->add(tdev);
- } else
- CTR1(KTR_CXGB,
- "cxgb_register_client: %p not activated", tdev);
-
- }
- }
- mtx_unlock(&cxgb_db_lock);
-}
-
-/**
- * cxgb_unregister_client - unregister an offload client
- * @client: the client
- *
- * Remove the client to the client list,
- * and call backs the client for each activated offload device.
- */
-void
-cxgb_unregister_client(struct cxgb_client *client)
-{
- struct t3cdev *tdev;
-
- mtx_lock(&cxgb_db_lock);
- TAILQ_REMOVE(&client_list, client, client_entry);
-
- if (client->remove) {
- TAILQ_FOREACH(tdev, &ofld_dev_list, entry) {
- if (offload_activated(tdev))
- client->remove(tdev);
- }
- }
- mtx_unlock(&cxgb_db_lock);
-}
-
-/**
- * cxgb_add_clients - activate register clients for an offload device
- * @tdev: the offload device
- *
- * Call backs all registered clients once a offload device is activated
- */
-void
-cxgb_add_clients(struct t3cdev *tdev)
-{
- struct cxgb_client *client;
-
- mtx_lock(&cxgb_db_lock);
- TAILQ_FOREACH(client, &client_list, client_entry) {
- if (client->add)
- client->add(tdev);
- }
- mtx_unlock(&cxgb_db_lock);
-}
-
-/**
- * cxgb_remove_clients - activate register clients for an offload device
- * @tdev: the offload device
- *
- * Call backs all registered clients once a offload device is deactivated
- */
-void
-cxgb_remove_clients(struct t3cdev *tdev)
-{
- struct cxgb_client *client;
-
- mtx_lock(&cxgb_db_lock);
- TAILQ_FOREACH(client, &client_list, client_entry) {
- if (client->remove)
- client->remove(tdev);
- }
- mtx_unlock(&cxgb_db_lock);
-}
-#endif
-
-/**
- * cxgb_ofld_recv - process n received offload packets
- * @dev: the offload device
- * @m: an array of offload packets
- * @n: the number of offload packets
- *
- * Process an array of ingress offload packets. Each packet is forwarded
- * to any active network taps and then passed to the offload device's receive
- * method. We optimize passing packets to the receive method by passing
- * it the whole array at once except when there are active taps.
- */
-int
-cxgb_ofld_recv(struct t3cdev *dev, struct mbuf **m, int n)
-{
-
- return dev->recv(dev, m, n);
-}
-
-/*
- * Dummy handler for Rx offload packets in case we get an offload packet before
- * proper processing is setup. This complains and drops the packet as it isn't
- * normal to get offload packets at this stage.
- */
-static int
-rx_offload_blackhole(struct t3cdev *dev, struct mbuf **m, int n)
-{
- while (n--)
- m_freem(m[n]);
- return 0;
-}
-
-static void
-dummy_neigh_update(struct t3cdev *dev, struct rtentry *neigh, uint8_t *enaddr,
- struct sockaddr *sa)
-{
-}
-
-void
-cxgb_set_dummy_ops(struct t3cdev *dev)
-{
- dev->recv = rx_offload_blackhole;
- dev->arp_update = dummy_neigh_update;
-}
-
-static int
-do_smt_write_rpl(struct t3cdev *dev, struct mbuf *m)
-{
- struct cpl_smt_write_rpl *rpl = cplhdr(m);
-
- if (rpl->status != CPL_ERR_NONE)
- log(LOG_ERR,
- "Unexpected SMT_WRITE_RPL status %u for entry %u\n",
- rpl->status, GET_TID(rpl));
-
- return CPL_RET_BUF_DONE;
-}
-
-static int
-do_l2t_write_rpl(struct t3cdev *dev, struct mbuf *m)
-{
- struct cpl_l2t_write_rpl *rpl = cplhdr(m);
-
- if (rpl->status != CPL_ERR_NONE)
- log(LOG_ERR,
- "Unexpected L2T_WRITE_RPL status %u for entry %u\n",
- rpl->status, GET_TID(rpl));
-
- return CPL_RET_BUF_DONE;
-}
-
-static int
-do_rte_write_rpl(struct t3cdev *dev, struct mbuf *m)
-{
- struct cpl_rte_write_rpl *rpl = cplhdr(m);
-
- if (rpl->status != CPL_ERR_NONE)
- log(LOG_ERR,
- "Unexpected L2T_WRITE_RPL status %u for entry %u\n",
- rpl->status, GET_TID(rpl));
-
- return CPL_RET_BUF_DONE;
-}
-
-static int
-do_set_tcb_rpl(struct t3cdev *dev, struct mbuf *m)
-{
- struct cpl_set_tcb_rpl *rpl = cplhdr(m);
-
- if (rpl->status != CPL_ERR_NONE)
- log(LOG_ERR,
- "Unexpected SET_TCB_RPL status %u for tid %u\n",
- rpl->status, GET_TID(rpl));
- return CPL_RET_BUF_DONE;
-}
-
-static int
-do_trace(struct t3cdev *dev, struct mbuf *m)
-{
-#if 0
- struct cpl_trace_pkt *p = cplhdr(m);
-
-
- skb->protocol = 0xffff;
- skb->dev = dev->lldev;
- skb_pull(skb, sizeof(*p));
- skb->mac.raw = mtod(m, (char *));
- netif_receive_skb(skb);
-#endif
- return 0;
-}
-
-/*
- * Process a received packet with an unknown/unexpected CPL opcode.
- */
-static int
-do_bad_cpl(struct t3cdev *dev, struct mbuf *m)
-{
- log(LOG_ERR, "%s: received bad CPL command 0x%x\n", dev->name,
- 0xFF & *mtod(m, uint32_t *));
- return (CPL_RET_BUF_DONE | CPL_RET_BAD_MSG);
-}
-
-/*
- * Handlers for each CPL opcode
- */
-static cpl_handler_func cpl_handlers[256];
-
-/*
- * T3CDEV's receive method.
- */
-int
-process_rx(struct t3cdev *dev, struct mbuf **m, int n)
-{
- while (n--) {
- struct mbuf *m0 = *m++;
- unsigned int opcode = G_OPCODE(ntohl(m0->m_pkthdr.csum_data));
- int ret;
-
- DPRINTF("processing op=0x%x m=%p data=%p\n", opcode, m0, m0->m_data);
-
- ret = cpl_handlers[opcode] (dev, m0);
-
-#if VALIDATE_TID
- if (ret & CPL_RET_UNKNOWN_TID) {
- union opcode_tid *p = cplhdr(m0);
-
- log(LOG_ERR, "%s: CPL message (opcode %u) had "
- "unknown TID %u\n", dev->name, opcode,
- G_TID(ntohl(p->opcode_tid)));
- }
-#endif
- if (ret & CPL_RET_BUF_DONE)
- m_freem(m0);
- }
- return 0;
-}
-
-/*
- * Add a new handler to the CPL dispatch table. A NULL handler may be supplied
- * to unregister an existing handler.
- */
-void
-t3_register_cpl_handler(unsigned int opcode, cpl_handler_func h)
-{
- if (opcode < NUM_CPL_CMDS)
- cpl_handlers[opcode] = h ? h : do_bad_cpl;
- else
- log(LOG_ERR, "T3C: handler registration for "
- "opcode %x failed\n", opcode);
-}
-
-/*
- * Allocate a chunk of memory using kmalloc or, if that fails, vmalloc.
- * The allocated memory is cleared.
- */
-void *
-cxgb_alloc_mem(unsigned long size)
-{
-
- return malloc(size, M_CXGB, M_ZERO|M_NOWAIT);
-}
-
-/*
- * Free memory allocated through t3_alloc_mem().
- */
-void
-cxgb_free_mem(void *addr)
-{
- free(addr, M_CXGB);
-}
-
-static __inline int
-adap2type(struct adapter *adapter)
-{
- int type = 0;
-
- switch (adapter->params.rev) {
- case T3_REV_A:
- type = T3A;
- break;
- case T3_REV_B:
- case T3_REV_B2:
- type = T3B;
- break;
- case T3_REV_C:
- type = T3C;
- break;
- }
- return type;
-}
-
-void
-cxgb_adapter_ofld(struct adapter *adapter)
-{
- struct t3cdev *tdev = &adapter->tdev;
-
- cxgb_set_dummy_ops(tdev);
- tdev->type = adap2type(adapter);
- tdev->adapter = adapter;
- register_tdev(tdev);
-
-}
-
-void
-cxgb_adapter_unofld(struct adapter *adapter)
-{
- struct t3cdev *tdev = &adapter->tdev;
-
- tdev->recv = NULL;
- tdev->arp_update = NULL;
- unregister_tdev(tdev);
-}
-
-void
-cxgb_offload_init(void)
-{
- int i;
-
- if (inited++)
- return;
-
- mtx_init(&cxgb_db_lock, "ofld db", NULL, MTX_DEF);
-
- TAILQ_INIT(&client_list);
- TAILQ_INIT(&ofld_dev_list);
-
- for (i = 0; i < 0x100; ++i)
- cpl_handlers[i] = do_bad_cpl;
-
- t3_register_cpl_handler(CPL_SMT_WRITE_RPL, do_smt_write_rpl);
- t3_register_cpl_handler(CPL_RTE_WRITE_RPL, do_rte_write_rpl);
- t3_register_cpl_handler(CPL_L2T_WRITE_RPL, do_l2t_write_rpl);
-
- t3_register_cpl_handler(CPL_SET_TCB_RPL, do_set_tcb_rpl);
- t3_register_cpl_handler(CPL_TRACE_PKT, do_trace);
-
-}
-
-void
-cxgb_offload_exit(void)
-{
-
- if (--inited)
- return;
-
- mtx_destroy(&cxgb_db_lock);
-}
-
-MODULE_VERSION(if_cxgb, 1);
diff --git a/sys/dev/cxgb/cxgb_offload.h b/sys/dev/cxgb/cxgb_offload.h
index a8b858e..364ab43 100644
--- a/sys/dev/cxgb/cxgb_offload.h
+++ b/sys/dev/cxgb/cxgb_offload.h
@@ -1,4 +1,3 @@
-
/**************************************************************************
Copyright (c) 2007-2008, Chelsio Inc.
@@ -33,221 +32,93 @@ $FreeBSD$
#ifndef _CXGB_OFFLOAD_H
#define _CXGB_OFFLOAD_H
-#include <common/cxgb_tcb.h>
-#include <t3cdev.h>
-
-MALLOC_DECLARE(M_CXGB);
+#ifdef TCP_OFFLOAD
+enum {
+ ULD_TOM = 1,
+ ULD_IWARP = 2,
+};
struct adapter;
-struct cxgb_client;
-
-void cxgb_offload_init(void);
-void cxgb_offload_exit(void);
-
-void cxgb_adapter_ofld(struct adapter *adapter);
-void cxgb_adapter_unofld(struct adapter *adapter);
-int cxgb_offload_activate(struct adapter *adapter);
-void cxgb_offload_deactivate(struct adapter *adapter);
-int cxgb_ofld_recv(struct t3cdev *dev, struct mbuf **m, int n);
-
-void cxgb_set_dummy_ops(struct t3cdev *dev);
-
-
-/*
- * Client registration. Users of T3 driver must register themselves.
- * The T3 driver will call the add function of every client for each T3
- * adapter activated, passing up the t3cdev ptr. Each client fills out an
- * array of callback functions to process CPL messages.
- */
-
-void cxgb_register_client(struct cxgb_client *client);
-void cxgb_unregister_client(struct cxgb_client *client);
-void cxgb_add_clients(struct t3cdev *tdev);
-void cxgb_remove_clients(struct t3cdev *tdev);
-
-typedef int (*cxgb_cpl_handler_func)(struct t3cdev *dev,
- struct mbuf *m, void *ctx);
-
-struct l2t_entry;
-struct cxgb_client {
- char *name;
- void (*add) (struct t3cdev *);
- void (*remove) (struct t3cdev *);
- cxgb_cpl_handler_func *handlers;
- int (*redirect)(void *ctx, struct rtentry *old,
- struct rtentry *new,
- struct l2t_entry *l2t);
- TAILQ_ENTRY(cxgb_client) client_entry;
+struct uld_info {
+ SLIST_ENTRY(uld_info) link;
+ int refcount;
+ int uld_id;
+ int (*activate)(struct adapter *);
+ int (*deactivate)(struct adapter *);
};
-/*
- * TID allocation services.
- */
-int cxgb_alloc_atid(struct t3cdev *dev, struct cxgb_client *client,
- void *ctx);
-int cxgb_alloc_stid(struct t3cdev *dev, struct cxgb_client *client,
- void *ctx);
-void *cxgb_free_atid(struct t3cdev *dev, int atid);
-void cxgb_free_stid(struct t3cdev *dev, int stid);
-void *cxgb_get_lctx(struct t3cdev *tdev, int stid);
-void cxgb_insert_tid(struct t3cdev *dev, struct cxgb_client *client,
- void *ctx,
- unsigned int tid);
-void cxgb_queue_tid_release(struct t3cdev *dev, unsigned int tid);
-void cxgb_remove_tid(struct t3cdev *dev, void *ctx, unsigned int tid);
-
-struct toe_tid_entry {
- struct cxgb_client *client;
- void *ctx;
+struct tom_tunables {
+ int sndbuf;
+ int ddp;
+ int indsz;
+ int ddp_thres;
};
/* CPL message priority levels */
enum {
CPL_PRIORITY_DATA = 0, /* data messages */
- CPL_PRIORITY_SETUP = 1, /* connection setup messages */
- CPL_PRIORITY_TEARDOWN = 0, /* connection teardown messages */
- CPL_PRIORITY_LISTEN = 1, /* listen start/stop messages */
- CPL_PRIORITY_ACK = 1, /* RX ACK messages */
CPL_PRIORITY_CONTROL = 1 /* offload control messages */
};
-/* Flags for return value of CPL message handlers */
-enum {
- CPL_RET_BUF_DONE = 1, // buffer processing done, buffer may be freed
- CPL_RET_BAD_MSG = 2, // bad CPL message (e.g., unknown opcode)
- CPL_RET_UNKNOWN_TID = 4 // unexpected unknown TID
-};
+#define S_HDR_NDESC 0
+#define M_HDR_NDESC 0xf
+#define V_HDR_NDESC(x) ((x) << S_HDR_NDESC)
+#define G_HDR_NDESC(x) (((x) >> S_HDR_NDESC) & M_HDR_NDESC)
-typedef int (*cpl_handler_func)(struct t3cdev *dev, struct mbuf *m);
+#define S_HDR_QSET 4
+#define M_HDR_QSET 0xf
+#define V_HDR_QSET(x) ((x) << S_HDR_QSET)
+#define G_HDR_QSET(x) (((x) >> S_HDR_QSET) & M_HDR_QSET)
-/*
- * Returns a pointer to the first byte of the CPL header in an sk_buff that
- * contains a CPL message.
- */
-static inline void *cplhdr(struct mbuf *m)
-{
- return mtod(m, uint8_t *);
-}
-
-void t3_register_cpl_handler(unsigned int opcode, cpl_handler_func h);
-
-union listen_entry {
- struct toe_tid_entry toe_tid;
- union listen_entry *next;
-};
+#define S_HDR_CTRL 8
+#define V_HDR_CTRL(x) ((x) << S_HDR_CTRL)
+#define F_HDR_CTRL V_HDR_CTRL(1U)
-union active_open_entry {
- struct toe_tid_entry toe_tid;
- union active_open_entry *next;
-};
+#define S_HDR_DF 9
+#define V_HDR_DF(x) ((x) << S_HDR_DF)
+#define F_HDR_DF V_HDR_DF(1U)
-/*
- * Holds the size, base address, free list start, etc of the TID, server TID,
- * and active-open TID tables for a offload device.
- * The tables themselves are allocated dynamically.
- */
-struct tid_info {
- struct toe_tid_entry *tid_tab;
- unsigned int ntids;
- volatile unsigned int tids_in_use;
-
- union listen_entry *stid_tab;
- unsigned int nstids;
- unsigned int stid_base;
-
- union active_open_entry *atid_tab;
- unsigned int natids;
- unsigned int atid_base;
-
- /*
- * The following members are accessed R/W so we put them in their own
- * cache lines.
- *
- * XXX We could combine the atid fields above with the lock here since
- * atids are use once (unlike other tids). OTOH the above fields are
- * usually in cache due to tid_tab.
- */
- struct mtx atid_lock /* ____cacheline_aligned_in_smp */;
- union active_open_entry *afree;
- unsigned int atids_in_use;
-
- struct mtx stid_lock /*____cacheline_aligned */;
- union listen_entry *sfree;
- unsigned int stids_in_use;
-};
+#define S_HDR_SGL 10
+#define V_HDR_SGL(x) ((x) << S_HDR_SGL)
+#define F_HDR_SGL V_HDR_SGL(1U)
-struct t3c_data {
- struct t3cdev *dev;
- unsigned int tx_max_chunk; /* max payload for TX_DATA */
- unsigned int max_wrs; /* max in-flight WRs per connection */
- unsigned int nmtus;
- const unsigned short *mtus;
- struct tid_info tid_maps;
-
- struct toe_tid_entry *tid_release_list;
- struct mtx tid_release_lock;
- struct task tid_release_task;
+struct ofld_hdr
+{
+ void *sgl; /* SGL, if F_HDR_SGL set in flags */
+ int plen; /* amount of payload (in bytes) */
+ int flags;
};
/*
- * t3cdev -> toe_data accessor
- */
-#define T3C_DATA(dev) (*(struct t3c_data **)&(dev)->l4opt)
-
-/*
- * Map an ATID or STID to their entries in the corresponding TID tables.
+ * Convenience function for fixed size CPLs that fit in 1 desc.
*/
-static inline union active_open_entry *atid2entry(const struct tid_info *t,
- unsigned int atid)
+#define M_GETHDR_OFLD(qset, ctrl, cpl) \
+ m_gethdr_ofld(qset, ctrl, sizeof(*cpl), (void **)&cpl)
+static inline struct mbuf *
+m_gethdr_ofld(int qset, int ctrl, int cpllen, void **cpl)
{
- return &t->atid_tab[atid - t->atid_base];
-}
+ struct mbuf *m;
+ struct ofld_hdr *oh;
+ m = m_gethdr(M_NOWAIT, MT_DATA);
+ if (m == NULL)
+ return (NULL);
-static inline union listen_entry *stid2entry(const struct tid_info *t,
- unsigned int stid)
-{
- return &t->stid_tab[stid - t->stid_base];
-}
+ oh = mtod(m, struct ofld_hdr *);
+ oh->flags = V_HDR_NDESC(1) | V_HDR_QSET(qset) | V_HDR_CTRL(ctrl);
+ *cpl = (void *)(oh + 1);
+ m->m_pkthdr.len = m->m_len = sizeof(*oh) + cpllen;
-/*
- * Find the connection corresponding to a TID.
- */
-static inline struct toe_tid_entry *lookup_tid(const struct tid_info *t,
- unsigned int tid)
-{
- return tid < t->ntids ? &(t->tid_tab[tid]) : NULL;
+ return (m);
}
-/*
- * Find the connection corresponding to a server TID.
- */
-static inline struct toe_tid_entry *lookup_stid(const struct tid_info *t,
- unsigned int tid)
-{
- if (tid < t->stid_base || tid >= t->stid_base + t->nstids)
- return NULL;
- return &(stid2entry(t, tid)->toe_tid);
-}
-
-/*
- * Find the connection corresponding to an active-open TID.
- */
-static inline struct toe_tid_entry *lookup_atid(const struct tid_info *t,
- unsigned int tid)
-{
- if (tid < t->atid_base || tid >= t->atid_base + t->natids)
- return NULL;
- return &(atid2entry(t, tid)->toe_tid);
-}
+int t3_register_uld(struct uld_info *);
+int t3_unregister_uld(struct uld_info *);
+int t3_activate_uld(struct adapter *, int);
+int t3_deactivate_uld(struct adapter *, int);
+#endif /* TCP_OFFLOAD */
-void *cxgb_alloc_mem(unsigned long size);
-void cxgb_free_mem(void *addr);
-void cxgb_neigh_update(struct rtentry *rt, uint8_t *enaddr, struct sockaddr *sa);
-void cxgb_redirect(struct rtentry *old, struct rtentry *new, struct sockaddr *sa);
-int process_rx(struct t3cdev *dev, struct mbuf **m, int n);
-int attach_t3cdev(struct t3cdev *dev);
-void detach_t3cdev(struct t3cdev *dev);
+#define CXGB_UNIMPLEMENTED() \
+ panic("IMPLEMENT: %s:%s:%d", __FUNCTION__, __FILE__, __LINE__)
-#define CXGB_UNIMPLEMENTED() panic("IMPLEMENT: %s:%s:%d", __FUNCTION__, __FILE__, __LINE__)
#endif
diff --git a/sys/dev/cxgb/cxgb_osdep.h b/sys/dev/cxgb/cxgb_osdep.h
index 5dc256d..37171d9 100644
--- a/sys/dev/cxgb/cxgb_osdep.h
+++ b/sys/dev/cxgb/cxgb_osdep.h
@@ -67,27 +67,6 @@ struct t3_mbuf_hdr {
} while (0)
#endif
-#define m_get_priority(m) ((uintptr_t)(m)->m_pkthdr.rcvif)
-#define m_set_priority(m, pri) ((m)->m_pkthdr.rcvif = (struct ifnet *)((uintptr_t)pri))
-#define m_set_sgl(m, sgl) ((m)->m_pkthdr.header = (sgl))
-#define m_get_sgl(m) ((bus_dma_segment_t *)(m)->m_pkthdr.header)
-#define m_set_sgllen(m, len) ((m)->m_pkthdr.ether_vtag = len)
-#define m_get_sgllen(m) ((m)->m_pkthdr.ether_vtag)
-
-/*
- * XXX FIXME
- */
-#define m_set_toep(m, a) ((m)->m_pkthdr.header = (a))
-#define m_get_toep(m) ((m)->m_pkthdr.header)
-#define m_set_handler(m, handler) ((m)->m_pkthdr.header = (handler))
-
-#define m_set_socket(m, a) ((m)->m_pkthdr.header = (a))
-#define m_get_socket(m) ((m)->m_pkthdr.header)
-
-#define KTR_CXGB KTR_SPARE2
-
-#define MT_DONTFREE 128
-
#if __FreeBSD_version < 800054
#if defined (__GNUC__)
#if #cpu(i386) || defined __i386 || defined i386 || defined __i386__ || #cpu(x86_64) || defined __x86_64__
@@ -123,13 +102,6 @@ struct t3_mbuf_hdr {
#define CXGB_TX_CLEANUP_THRESHOLD 32
-
-#ifdef DEBUG_PRINT
-#define DPRINTF printf
-#else
-#define DPRINTF(...)
-#endif
-
#define TX_MAX_SIZE (1 << 16) /* 64KB */
#define TX_MAX_SEGS 36 /* maximum supported by card */
@@ -199,7 +171,6 @@ static const int debug_flags = DBG_RX;
#define test_and_clear_bit(bit, p) atomic_cmpset_int((p), ((*(p)) | (1<<bit)), ((*(p)) & ~(1<<bit)))
#define max_t(type, a, b) (type)max((a), (b))
-#define net_device ifnet
#define cpu_to_be32 htobe32
/* Standard PHY definitions */
diff --git a/sys/dev/cxgb/cxgb_sge.c b/sys/dev/cxgb/cxgb_sge.c
index 2a6dc27..456d0e8 100644
--- a/sys/dev/cxgb/cxgb_sge.c
+++ b/sys/dev/cxgb/cxgb_sge.c
@@ -54,6 +54,7 @@ __FBSDID("$FreeBSD$");
#include <sys/systm.h>
#include <sys/syslog.h>
#include <sys/socket.h>
+#include <sys/sglist.h>
#include <net/bpf.h>
#include <net/ethernet.h>
@@ -78,6 +79,10 @@ __FBSDID("$FreeBSD$");
int txq_fills = 0;
int multiq_tx_enable = 1;
+#ifdef TCP_OFFLOAD
+CTASSERT(NUM_CPL_HANDLERS >= NUM_CPL_CMDS);
+#endif
+
extern struct sysctl_oid_list sysctl__hw_cxgb_children;
int cxgb_txq_buf_ring_size = TX_ETH_Q_SIZE;
TUNABLE_INT("hw.cxgb.txq_mr_size", &cxgb_txq_buf_ring_size);
@@ -471,10 +476,17 @@ static int
get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m)
{
- m->m_len = m->m_pkthdr.len = IMMED_PKT_SIZE;
+ if (resp->rss_hdr.opcode == CPL_RX_DATA) {
+ const struct cpl_rx_data *cpl = (const void *)&resp->imm_data[0];
+ m->m_len = sizeof(*cpl) + ntohs(cpl->len);
+ } else if (resp->rss_hdr.opcode == CPL_RX_PKT) {
+ const struct cpl_rx_pkt *cpl = (const void *)&resp->imm_data[0];
+ m->m_len = sizeof(*cpl) + ntohs(cpl->len);
+ } else
+ m->m_len = IMMED_PKT_SIZE;
m->m_ext.ext_buf = NULL;
m->m_ext.ext_type = 0;
- memcpy(mtod(m, uint8_t *), resp->imm_data, IMMED_PKT_SIZE);
+ memcpy(mtod(m, uint8_t *), resp->imm_data, m->m_len);
return (0);
}
@@ -703,7 +715,8 @@ refill_fl(adapter_t *sc, struct sge_fl *q, int n)
cb_arg.error = 0;
while (n--) {
/*
- * We only allocate a cluster, mbuf allocation happens after rx
+ * We allocate an uninitialized mbuf + cluster, mbuf is
+ * initialized after rx.
*/
if (q->zone == zone_pack) {
if ((m = m_getcl(M_NOWAIT, MT_NOINIT, M_PKTHDR)) == NULL)
@@ -1170,57 +1183,6 @@ calc_tx_descs(const struct mbuf *m, int nsegs)
return flits_to_desc(flits);
}
-static unsigned int
-busdma_map_mbufs(struct mbuf **m, struct sge_txq *txq,
- struct tx_sw_desc *txsd, bus_dma_segment_t *segs, int *nsegs)
-{
- struct mbuf *m0;
- int err, pktlen, pass = 0;
- bus_dma_tag_t tag = txq->entry_tag;
-
-retry:
- err = 0;
- m0 = *m;
- pktlen = m0->m_pkthdr.len;
-#if defined(__i386__) || defined(__amd64__)
- if (busdma_map_sg_collapse(tag, txsd->map, m, segs, nsegs) == 0) {
- goto done;
- } else
-#endif
- err = bus_dmamap_load_mbuf_sg(tag, txsd->map, m0, segs, nsegs, 0);
-
- if (err == 0) {
- goto done;
- }
- if (err == EFBIG && pass == 0) {
- pass = 1;
- /* Too many segments, try to defrag */
- m0 = m_defrag(m0, M_DONTWAIT);
- if (m0 == NULL) {
- m_freem(*m);
- *m = NULL;
- return (ENOBUFS);
- }
- *m = m0;
- goto retry;
- } else if (err == ENOMEM) {
- return (err);
- } if (err) {
- if (cxgb_debug)
- printf("map failure err=%d pktlen=%d\n", err, pktlen);
- m_freem(m0);
- *m = NULL;
- return (err);
- }
-done:
-#if !defined(__i386__) && !defined(__amd64__)
- bus_dmamap_sync(tag, txsd->map, BUS_DMASYNC_PREWRITE);
-#endif
- txsd->flags |= TX_SW_DESC_MAPPED;
-
- return (0);
-}
-
/**
* make_sgl - populate a scatter/gather list for a packet
* @sgp: the SGL to populate
@@ -1328,10 +1290,10 @@ write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs
if (__predict_true(ndesc == 1)) {
set_wr_hdr(wrp, htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
- V_WR_SGLSFLT(flits)) | wr_hi,
- htonl(V_WR_LEN(flits + sgl_flits) |
- V_WR_GEN(txqs->gen)) | wr_lo);
- /* XXX gen? */
+ V_WR_SGLSFLT(flits)) | wr_hi,
+ htonl(V_WR_LEN(flits + sgl_flits) | V_WR_GEN(txqs->gen)) |
+ wr_lo);
+
wr_gen2(txd, txqs->gen);
} else {
@@ -1813,34 +1775,23 @@ cxgb_qflush(struct ifnet *ifp)
* its entirety.
*/
static __inline void
-write_imm(struct tx_desc *d, struct mbuf *m,
+write_imm(struct tx_desc *d, caddr_t src,
unsigned int len, unsigned int gen)
{
- struct work_request_hdr *from = mtod(m, struct work_request_hdr *);
+ struct work_request_hdr *from = (struct work_request_hdr *)src;
struct work_request_hdr *to = (struct work_request_hdr *)d;
uint32_t wr_hi, wr_lo;
- if (len > WR_LEN)
- panic("len too big %d\n", len);
- if (len < sizeof(*from))
- panic("len too small %d", len);
+ KASSERT(len <= WR_LEN && len >= sizeof(*from),
+ ("%s: invalid len %d", __func__, len));
memcpy(&to[1], &from[1], len - sizeof(*from));
wr_hi = from->wrh_hi | htonl(F_WR_SOP | F_WR_EOP |
- V_WR_BCNTLFLT(len & 7));
- wr_lo = from->wrh_lo | htonl(V_WR_GEN(gen) |
- V_WR_LEN((len + 7) / 8));
+ V_WR_BCNTLFLT(len & 7));
+ wr_lo = from->wrh_lo | htonl(V_WR_GEN(gen) | V_WR_LEN((len + 7) / 8));
set_wr_hdr(to, wr_hi, wr_lo);
wmb();
wr_gen2(d, gen);
-
- /*
- * This check is a hack we should really fix the logic so
- * that this can't happen
- */
- if (m->m_type != MT_DONTFREE)
- m_freem(m);
-
}
/**
@@ -1908,12 +1859,6 @@ reclaim_completed_tx_imm(struct sge_txq *q)
q->cleaned += reclaim;
}
-static __inline int
-immediate(const struct mbuf *m)
-{
- return m->m_len <= WR_LEN && m->m_pkthdr.len <= WR_LEN ;
-}
-
/**
* ctrl_xmit - send a packet through an SGE control Tx queue
* @adap: the adapter
@@ -1931,11 +1876,8 @@ ctrl_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m)
struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *);
struct sge_txq *q = &qs->txq[TXQ_CTRL];
- if (__predict_false(!immediate(m))) {
- m_freem(m);
- return 0;
- }
-
+ KASSERT(m->m_len <= WR_LEN, ("%s: bad tx data", __func__));
+
wrp->wrh_hi |= htonl(F_WR_SOP | F_WR_EOP);
wrp->wrh_lo = htonl(V_WR_TID(q->token));
@@ -1950,7 +1892,7 @@ again: reclaim_completed_tx_imm(q);
}
goto again;
}
- write_imm(&q->desc[q->pidx], m, m->m_len, q->gen);
+ write_imm(&q->desc[q->pidx], m->m_data, m->m_len, q->gen);
q->in_use++;
if (++q->pidx >= q->size) {
@@ -1960,7 +1902,9 @@ again: reclaim_completed_tx_imm(q);
TXQ_UNLOCK(qs);
wmb();
t3_write_reg(adap, A_SG_KDOORBELL,
- F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
+ F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
+
+ m_free(m);
return (0);
}
@@ -1985,7 +1929,8 @@ again: reclaim_completed_tx_imm(q);
while (q->in_use < q->size &&
(m = mbufq_dequeue(&q->sendq)) != NULL) {
- write_imm(&q->desc[q->pidx], m, m->m_len, q->gen);
+ write_imm(&q->desc[q->pidx], m->m_data, m->m_len, q->gen);
+ m_free(m);
if (++q->pidx >= q->size) {
q->pidx = 0;
@@ -2239,6 +2184,7 @@ is_new_response(const struct rsp_desc *r,
/* How long to delay the next interrupt in case of memory shortage, in 0.1us. */
#define NOMEM_INTR_DELAY 2500
+#ifdef TCP_OFFLOAD
/**
* write_ofld_wr - write an offload work request
* @adap: the adapter
@@ -2252,71 +2198,66 @@ is_new_response(const struct rsp_desc *r,
* data already carry the work request with most fields populated.
*/
static void
-write_ofld_wr(adapter_t *adap, struct mbuf *m,
- struct sge_txq *q, unsigned int pidx,
- unsigned int gen, unsigned int ndesc,
- bus_dma_segment_t *segs, unsigned int nsegs)
+write_ofld_wr(adapter_t *adap, struct mbuf *m, struct sge_txq *q,
+ unsigned int pidx, unsigned int gen, unsigned int ndesc)
{
unsigned int sgl_flits, flits;
+ int i, idx, nsegs, wrlen;
struct work_request_hdr *from;
- struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1];
+ struct sg_ent *sgp, t3sgl[TX_MAX_SEGS / 2 + 1];
struct tx_desc *d = &q->desc[pidx];
struct txq_state txqs;
-
- if (immediate(m) && nsegs == 0) {
- write_imm(d, m, m->m_len, gen);
+ struct sglist_seg *segs;
+ struct ofld_hdr *oh = mtod(m, struct ofld_hdr *);
+ struct sglist *sgl;
+
+ from = (void *)(oh + 1); /* Start of WR within mbuf */
+ wrlen = m->m_len - sizeof(*oh);
+
+ if (!(oh->flags & F_HDR_SGL)) {
+ write_imm(d, (caddr_t)from, wrlen, gen);
+
+ /*
+ * mbuf with "real" immediate tx data will be enqueue_wr'd by
+ * t3_push_frames and freed in wr_ack. Others, like those sent
+ * down by close_conn, t3_send_reset, etc. should be freed here.
+ */
+ if (!(oh->flags & F_HDR_DF))
+ m_free(m);
return;
}
- /* Only TX_DATA builds SGLs */
- from = mtod(m, struct work_request_hdr *);
- memcpy(&d->flit[1], &from[1], m->m_len - sizeof(*from));
+ memcpy(&d->flit[1], &from[1], wrlen - sizeof(*from));
- flits = m->m_len / 8;
- sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : sgl;
+ sgl = oh->sgl;
+ flits = wrlen / 8;
+ sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : t3sgl;
- make_sgl(sgp, segs, nsegs);
- sgl_flits = sgl_len(nsegs);
+ nsegs = sgl->sg_nseg;
+ segs = sgl->sg_segs;
+ for (idx = 0, i = 0; i < nsegs; i++) {
+ KASSERT(segs[i].ss_len, ("%s: 0 len in sgl", __func__));
+ if (i && idx == 0)
+ ++sgp;
+ sgp->len[idx] = htobe32(segs[i].ss_len);
+ sgp->addr[idx] = htobe64(segs[i].ss_paddr);
+ idx ^= 1;
+ }
+ if (idx) {
+ sgp->len[idx] = 0;
+ sgp->addr[idx] = 0;
+ }
+ sgl_flits = sgl_len(nsegs);
txqs.gen = gen;
txqs.pidx = pidx;
txqs.compl = 0;
- write_wr_hdr_sgl(ndesc, d, &txqs, q, sgl, flits, sgl_flits,
+ write_wr_hdr_sgl(ndesc, d, &txqs, q, t3sgl, flits, sgl_flits,
from->wrh_hi, from->wrh_lo);
}
/**
- * calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet
- * @m: the packet
- *
- * Returns the number of Tx descriptors needed for the given offload
- * packet. These packets are already fully constructed.
- */
-static __inline unsigned int
-calc_tx_descs_ofld(struct mbuf *m, unsigned int nsegs)
-{
- unsigned int flits, cnt = 0;
- int ndescs;
-
- if (m->m_len <= WR_LEN && nsegs == 0)
- return (1); /* packet fits as immediate data */
-
- /*
- * This needs to be re-visited for TOE
- */
-
- cnt = nsegs;
-
- /* headers */
- flits = m->m_len / 8;
-
- ndescs = flits_to_desc(flits + sgl_len(cnt));
-
- return (ndescs);
-}
-
-/**
* ofld_xmit - send a packet through an offload queue
* @adap: the adapter
* @q: the Tx offload queue
@@ -2327,28 +2268,19 @@ calc_tx_descs_ofld(struct mbuf *m, unsigned int nsegs)
static int
ofld_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m)
{
- int ret, nsegs;
+ int ret;
unsigned int ndesc;
unsigned int pidx, gen;
struct sge_txq *q = &qs->txq[TXQ_OFLD];
- bus_dma_segment_t segs[TX_MAX_SEGS], *vsegs;
- struct tx_sw_desc *stx;
+ struct ofld_hdr *oh = mtod(m, struct ofld_hdr *);
- nsegs = m_get_sgllen(m);
- vsegs = m_get_sgl(m);
- ndesc = calc_tx_descs_ofld(m, nsegs);
- busdma_map_sgl(vsegs, segs, nsegs);
+ ndesc = G_HDR_NDESC(oh->flags);
- stx = &q->sdesc[q->pidx];
-
TXQ_LOCK(qs);
again: reclaim_completed_tx(qs, 16, TXQ_OFLD);
ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD);
if (__predict_false(ret)) {
if (ret == 1) {
- printf("no ofld desc avail\n");
-
- m_set_priority(m, ndesc); /* save for restart */
TXQ_UNLOCK(qs);
return (EINTR);
}
@@ -2363,16 +2295,11 @@ again: reclaim_completed_tx(qs, 16, TXQ_OFLD);
q->pidx -= q->size;
q->gen ^= 1;
}
-#ifdef T3_TRACE
- T3_TRACE5(adap->tb[q->cntxt_id & 7],
- "ofld_xmit: ndesc %u, pidx %u, len %u, main %u, frags %u",
- ndesc, pidx, skb->len, skb->len - skb->data_len,
- skb_shinfo(skb)->nr_frags);
-#endif
- TXQ_UNLOCK(qs);
- write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs);
+ write_ofld_wr(adap, m, q, pidx, gen, ndesc);
check_ring_tx_db(adap, q, 1);
+ TXQ_UNLOCK(qs);
+
return (0);
}
@@ -2389,16 +2316,15 @@ restart_offloadq(void *data, int npending)
struct sge_qset *qs = data;
struct sge_txq *q = &qs->txq[TXQ_OFLD];
adapter_t *adap = qs->port->adapter;
- bus_dma_segment_t segs[TX_MAX_SEGS];
- struct tx_sw_desc *stx = &q->sdesc[q->pidx];
- int nsegs, cleaned;
+ int cleaned;
TXQ_LOCK(qs);
again: cleaned = reclaim_completed_tx(qs, 16, TXQ_OFLD);
while ((m = mbufq_peek(&q->sendq)) != NULL) {
unsigned int gen, pidx;
- unsigned int ndesc = m_get_priority(m);
+ struct ofld_hdr *oh = mtod(m, struct ofld_hdr *);
+ unsigned int ndesc = G_HDR_NDESC(oh->flags);
if (__predict_false(q->size - q->in_use < ndesc)) {
setbit(&qs->txq_stopped, TXQ_OFLD);
@@ -2419,9 +2345,8 @@ again: cleaned = reclaim_completed_tx(qs, 16, TXQ_OFLD);
}
(void)mbufq_dequeue(&q->sendq);
- busdma_map_mbufs(&m, q, stx, segs, &nsegs);
TXQ_UNLOCK(qs);
- write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs);
+ write_ofld_wr(adap, m, q, pidx, gen, ndesc);
TXQ_LOCK(qs);
}
#if USE_GTS
@@ -2435,34 +2360,7 @@ again: cleaned = reclaim_completed_tx(qs, 16, TXQ_OFLD);
}
/**
- * queue_set - return the queue set a packet should use
- * @m: the packet
- *
- * Maps a packet to the SGE queue set it should use. The desired queue
- * set is carried in bits 1-3 in the packet's priority.
- */
-static __inline int
-queue_set(const struct mbuf *m)
-{
- return m_get_priority(m) >> 1;
-}
-
-/**
- * is_ctrl_pkt - return whether an offload packet is a control packet
- * @m: the packet
- *
- * Determines whether an offload packet should use an OFLD or a CTRL
- * Tx queue. This is indicated by bit 0 in the packet's priority.
- */
-static __inline int
-is_ctrl_pkt(const struct mbuf *m)
-{
- return m_get_priority(m) & 1;
-}
-
-/**
* t3_offload_tx - send an offload packet
- * @tdev: the offload device to send to
* @m: the packet
*
* Sends an offload packet. We use the packet priority to select the
@@ -2470,77 +2368,35 @@ is_ctrl_pkt(const struct mbuf *m)
* should be sent as regular or control, bits 1-3 select the queue set.
*/
int
-t3_offload_tx(struct t3cdev *tdev, struct mbuf *m)
-{
- adapter_t *adap = tdev2adap(tdev);
- struct sge_qset *qs = &adap->sge.qs[queue_set(m)];
-
- if (__predict_false(is_ctrl_pkt(m)))
- return ctrl_xmit(adap, qs, m);
-
- return ofld_xmit(adap, qs, m);
-}
-
-/**
- * deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts
- * @tdev: the offload device that will be receiving the packets
- * @q: the SGE response queue that assembled the bundle
- * @m: the partial bundle
- * @n: the number of packets in the bundle
- *
- * Delivers a (partial) bundle of Rx offload packets to an offload device.
- */
-static __inline void
-deliver_partial_bundle(struct t3cdev *tdev,
- struct sge_rspq *q,
- struct mbuf *mbufs[], int n)
+t3_offload_tx(struct adapter *sc, struct mbuf *m)
{
- if (n) {
- q->offload_bundles++;
- cxgb_ofld_recv(tdev, mbufs, n);
- }
-}
+ struct ofld_hdr *oh = mtod(m, struct ofld_hdr *);
+ struct sge_qset *qs = &sc->sge.qs[G_HDR_QSET(oh->flags)];
-static __inline int
-rx_offload(struct t3cdev *tdev, struct sge_rspq *rq,
- struct mbuf *m, struct mbuf *rx_gather[],
- unsigned int gather_idx)
-{
-
- rq->offload_pkts++;
- m->m_pkthdr.header = mtod(m, void *);
- rx_gather[gather_idx++] = m;
- if (gather_idx == RX_BUNDLE_SIZE) {
- cxgb_ofld_recv(tdev, rx_gather, RX_BUNDLE_SIZE);
- gather_idx = 0;
- rq->offload_bundles++;
- }
- return (gather_idx);
+ if (oh->flags & F_HDR_CTRL) {
+ m_adj(m, sizeof (*oh)); /* trim ofld_hdr off */
+ return (ctrl_xmit(sc, qs, m));
+ } else
+ return (ofld_xmit(sc, qs, m));
}
+#endif
static void
restart_tx(struct sge_qset *qs)
{
struct adapter *sc = qs->port->adapter;
-
-
+
if (isset(&qs->txq_stopped, TXQ_OFLD) &&
should_restart_tx(&qs->txq[TXQ_OFLD]) &&
test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) {
qs->txq[TXQ_OFLD].restarts++;
- DPRINTF("restarting TXQ_OFLD\n");
taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
}
- DPRINTF("stopped=0x%x restart=%d processed=%d cleaned=%d in_use=%d\n",
- qs->txq_stopped, should_restart_tx(&qs->txq[TXQ_CTRL]),
- qs->txq[TXQ_CTRL].processed, qs->txq[TXQ_CTRL].cleaned,
- qs->txq[TXQ_CTRL].in_use);
-
+
if (isset(&qs->txq_stopped, TXQ_CTRL) &&
should_restart_tx(&qs->txq[TXQ_CTRL]) &&
test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) {
qs->txq[TXQ_CTRL].restarts++;
- DPRINTF("restarting TXQ_CTRL\n");
taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
}
}
@@ -2569,6 +2425,7 @@ t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx,
MTX_INIT(&q->lock, q->namebuf, NULL, MTX_DEF);
q->port = pi;
+ q->adap = sc;
if ((q->txq[TXQ_ETH].txq_mr = buf_ring_alloc(cxgb_txq_buf_ring_size,
M_DEVBUF, M_WAITOK, &q->lock)) == NULL) {
@@ -2630,8 +2487,10 @@ t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx,
q->txq[i].gen = 1;
q->txq[i].size = p->txq_size[i];
}
-
+
+#ifdef TCP_OFFLOAD
TASK_INIT(&q->txq[TXQ_OFLD].qresume_task, 0, restart_offloadq, q);
+#endif
TASK_INIT(&q->txq[TXQ_CTRL].qresume_task, 0, restart_ctrlq, q);
TASK_INIT(&q->txq[TXQ_ETH].qreclaim_task, 0, sge_txq_reclaim_handler, q);
TASK_INIT(&q->txq[TXQ_OFLD].qreclaim_task, 0, sge_txq_reclaim_handler, q);
@@ -2736,8 +2595,7 @@ t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx,
mtx_unlock_spin(&sc->sge.reg_lock);
t3_update_qset_coalesce(q, p);
- q->port = pi;
-
+
refill_fl(sc, &q->fl[0], q->fl[0].size);
refill_fl(sc, &q->fl[1], q->fl[1].size);
refill_rspq(sc, &q->rspq, q->rspq.size - 1);
@@ -2768,8 +2626,6 @@ t3_rx_eth(struct adapter *adap, struct sge_rspq *rq, struct mbuf *m, int ethpad)
struct port_info *pi = &adap->port[adap->rxpkt_map[cpl->iff]];
struct ifnet *ifp = pi->ifp;
- DPRINTF("rx_eth m=%p m->m_data=%p p->iff=%d\n", m, mtod(m, uint8_t *), cpl->iff);
-
if ((ifp->if_capenable & IFCAP_RXCSUM) && !cpl->fragment &&
cpl->csum_valid && cpl->csum == 0xffff) {
m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID);
@@ -2967,8 +2823,6 @@ process_responses(adapter_t *adap, struct sge_qset *qs, int budget)
int skip_lro;
struct lro_ctrl *lro_ctrl = &qs->lro.ctrl;
#endif
- struct mbuf *offload_mbufs[RX_BUNDLE_SIZE];
- int ngathered = 0;
struct t3_mbuf_hdr *mh = &rspq->rspq_mh;
#ifdef DEBUG
static int last_holdoff = 0;
@@ -2982,10 +2836,10 @@ process_responses(adapter_t *adap, struct sge_qset *qs, int budget)
while (__predict_true(budget_left && is_new_response(r, rspq))) {
int eth, eop = 0, ethpad = 0;
uint32_t flags = ntohl(r->flags);
- uint32_t rss_csum = *(const uint32_t *)r;
uint32_t rss_hash = be32toh(r->rss_hdr.rss_hash_val);
+ uint8_t opcode = r->rss_hdr.opcode;
- eth = (r->rss_hdr.opcode == CPL_RX_PKT);
+ eth = (opcode == CPL_RX_PKT);
if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) {
struct mbuf *m;
@@ -3005,27 +2859,27 @@ process_responses(adapter_t *adap, struct sge_qset *qs, int budget)
memcpy(mtod(m, char *), r, AN_PKT_SIZE);
m->m_len = m->m_pkthdr.len = AN_PKT_SIZE;
*mtod(m, char *) = CPL_ASYNC_NOTIF;
- rss_csum = htonl(CPL_ASYNC_NOTIF << 24);
+ opcode = CPL_ASYNC_NOTIF;
eop = 1;
rspq->async_notif++;
goto skip;
} else if (flags & F_RSPD_IMM_DATA_VALID) {
- struct mbuf *m = NULL;
+ struct mbuf *m = m_gethdr(M_DONTWAIT, MT_DATA);
- DPRINTF("IMM DATA VALID opcode=0x%x rspq->cidx=%d\n",
- r->rss_hdr.opcode, rspq->cidx);
- if (mh->mh_head == NULL)
- mh->mh_head = m_gethdr(M_DONTWAIT, MT_DATA);
- else
- m = m_gethdr(M_DONTWAIT, MT_DATA);
-
- if (mh->mh_head == NULL && m == NULL) {
+ if (m == NULL) {
no_mem:
rspq->next_holdoff = NOMEM_INTR_DELAY;
budget_left--;
break;
}
- get_imm_packet(adap, r, mh->mh_head);
+ if (mh->mh_head == NULL)
+ mh->mh_head = m;
+ else
+ mh->mh_tail->m_next = m;
+ mh->mh_tail = m;
+
+ get_imm_packet(adap, r, m);
+ mh->mh_head->m_pkthdr.len += m->m_len;
eop = 1;
rspq->imm_data++;
} else if (r->len_cq) {
@@ -3048,30 +2902,14 @@ process_responses(adapter_t *adap, struct sge_qset *qs, int budget)
handle_rsp_cntrl_info(qs, flags);
}
- r++;
- if (__predict_false(++rspq->cidx == rspq->size)) {
- rspq->cidx = 0;
- rspq->gen ^= 1;
- r = rspq->desc;
- }
-
- if (++rspq->credits >= 64) {
- refill_rspq(adap, rspq, rspq->credits);
- rspq->credits = 0;
- }
if (!eth && eop) {
- mh->mh_head->m_pkthdr.csum_data = rss_csum;
- /*
- * XXX size mismatch
- */
- m_set_priority(mh->mh_head, rss_hash);
-
-
- ngathered = rx_offload(&adap->tdev, rspq,
- mh->mh_head, offload_mbufs, ngathered);
+ rspq->offload_pkts++;
+#ifdef TCP_OFFLOAD
+ adap->cpl_handler[opcode](qs, r, mh->mh_head);
+#else
+ m_freem(mh->mh_head);
+#endif
mh->mh_head = NULL;
- DPRINTF("received offload packet\n");
-
} else if (eth && eop) {
struct mbuf *m = mh->mh_head;
@@ -3106,13 +2944,23 @@ process_responses(adapter_t *adap, struct sge_qset *qs, int budget)
mh->mh_head = NULL;
}
+
+ r++;
+ if (__predict_false(++rspq->cidx == rspq->size)) {
+ rspq->cidx = 0;
+ rspq->gen ^= 1;
+ r = rspq->desc;
+ }
+
+ if (++rspq->credits >= 64) {
+ refill_rspq(adap, rspq, rspq->credits);
+ rspq->credits = 0;
+ }
__refill_fl_lt(adap, &qs->fl[0], 32);
__refill_fl_lt(adap, &qs->fl[1], 32);
--budget_left;
}
- deliver_partial_bundle(&adap->tdev, rspq, offload_mbufs, ngathered);
-
#if defined(INET6) || defined(INET)
/* Flush LRO */
while (!SLIST_EMPTY(&lro_ctrl->lro_active)) {
diff --git a/sys/dev/cxgb/sys/mvec.h b/sys/dev/cxgb/sys/mvec.h
index 9db27dd..dff6f03 100644
--- a/sys/dev/cxgb/sys/mvec.h
+++ b/sys/dev/cxgb/sys/mvec.h
@@ -31,15 +31,6 @@
#define _MVEC_H_
#include <machine/bus.h>
-#define M_DDP 0x200000 /* direct data placement mbuf */
-#define EXT_PHYS 10 /* physical/bus address */
-
-#define m_cur_offset m_ext.ext_size /* override to provide ddp offset */
-#define m_seq m_pkthdr.csum_data /* stored sequence */
-#define m_ddp_gl m_ext.ext_buf /* ddp list */
-#define m_ddp_flags m_pkthdr.csum_flags /* ddp flags */
-#define m_ulp_mode m_pkthdr.tso_segsz /* upper level protocol */
-
static __inline void
busdma_map_mbuf_fast(bus_dma_tag_t tag, bus_dmamap_t map,
struct mbuf *m, bus_dma_segment_t *seg)
@@ -58,17 +49,6 @@ int busdma_map_sg_collapse(bus_dma_tag_t tag, bus_dmamap_t map,
struct mbuf **m, bus_dma_segment_t *segs, int *nsegs);
void busdma_map_sg_vec(bus_dma_tag_t tag, bus_dmamap_t map,
struct mbuf *m, bus_dma_segment_t *segs, int *nsegs);
-static __inline int
-busdma_map_sgl(bus_dma_segment_t *vsegs, bus_dma_segment_t *segs, int count)
-{
- while (count--) {
- segs->ds_addr = pmap_kextract((vm_offset_t)vsegs->ds_addr);
- segs->ds_len = vsegs->ds_len;
- segs++;
- vsegs++;
- }
- return (0);
-}
static __inline void
m_freem_list(struct mbuf *m)
@@ -84,5 +64,4 @@ m_freem_list(struct mbuf *m)
}
}
-
#endif /* _MVEC_H_ */
diff --git a/sys/dev/cxgb/t3cdev.h b/sys/dev/cxgb/t3cdev.h
deleted file mode 100644
index e0004b7..0000000
--- a/sys/dev/cxgb/t3cdev.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*-
- * Copyright (c) 2007-2008, Chelsio Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Neither the name of the Chelsio Corporation nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-#ifndef _T3CDEV_H_
-#define _T3CDEV_H_
-
-#define T3CNAMSIZ 16
-
-/* Get the t3cdev associated with an ifnet */
-#define T3CDEV(ifp) (&(((struct port_info *)(ifp)->if_softc))->adapter->tdev)
-
-struct cxgb3_client;
-
-enum t3ctype {
- T3A = 0,
- T3B,
- T3C
-};
-
-struct t3cdev {
- char name[T3CNAMSIZ]; /* T3C device name */
- enum t3ctype type;
- TAILQ_ENTRY(t3cdev) entry; /* for list linking */
- struct ifnet *lldev; /* LL dev associated with T3C messages */
- struct adapter *adapter;
- int (*send)(struct t3cdev *dev, struct mbuf *m);
- int (*recv)(struct t3cdev *dev, struct mbuf **m, int n);
- int (*ctl)(struct t3cdev *dev, unsigned int req, void *data);
- void (*arp_update)(struct t3cdev *dev, struct rtentry *neigh, uint8_t *enaddr, struct sockaddr *sa);
- void *priv; /* driver private data */
- void *l2opt; /* optional layer 2 data */
- void *l3opt; /* optional layer 3 data */
- void *l4opt; /* optional layer 4 data */
- void *ulp; /* ulp stuff */
-};
-
-#endif /* _T3CDEV_H_ */
diff --git a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb.c b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb.c
index a4f2ff6..c8652a0 100644
--- a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb.c
+++ b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb.c
@@ -29,11 +29,12 @@ POSSIBILITY OF SUCH DAMAGE.
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#include "opt_inet.h"
+
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/bus.h>
-#include <sys/module.h>
#include <sys/pciio.h>
#include <sys/conf.h>
#include <machine/bus.h>
@@ -54,20 +55,14 @@ __FBSDID("$FreeBSD$");
#include <sys/proc.h>
#include <sys/eventhandler.h>
-#if __FreeBSD_version < 800044
-#define V_ifnet ifnet
-#endif
-
-#include <net/if.h>
-#include <net/if_var.h>
-#if __FreeBSD_version >= 800056
-#include <net/vnet.h>
-#endif
-
#include <netinet/in.h>
+#include <netinet/toecore.h>
-#include <contrib/rdma/ib_verbs.h>
+#include <rdma/ib_verbs.h>
+#include <linux/idr.h>
+#include <ulp/iw_cxgb/iw_cxgb_ib_intfc.h>
+#ifdef TCP_OFFLOAD
#include <cxgb_include.h>
#include <ulp/iw_cxgb/iw_cxgb_wr.h>
#include <ulp/iw_cxgb/iw_cxgb_hal.h>
@@ -75,26 +70,21 @@ __FBSDID("$FreeBSD$");
#include <ulp/iw_cxgb/iw_cxgb_cm.h>
#include <ulp/iw_cxgb/iw_cxgb.h>
-/*
- * XXX :-/
- *
- */
-
-#define idr_init(x)
+static int iwch_mod_load(void);
+static int iwch_mod_unload(void);
+static int iwch_activate(struct adapter *);
+static int iwch_deactivate(struct adapter *);
-cxgb_cpl_handler_func t3c_handlers[NUM_CPL_CMDS];
-
-static void open_rnic_dev(struct t3cdev *);
-static void close_rnic_dev(struct t3cdev *);
-
-static TAILQ_HEAD( ,iwch_dev) dev_list;
-static struct mtx dev_mutex;
-static eventhandler_tag event_tag;
+static struct uld_info iwch_uld_info = {
+ .uld_id = ULD_IWARP,
+ .activate = iwch_activate,
+ .deactivate = iwch_deactivate,
+};
static void
rnic_init(struct iwch_dev *rnicp)
{
- CTR2(KTR_IW_CXGB, "%s iwch_dev %p", __FUNCTION__, rnicp);
+
idr_init(&rnicp->cqidr);
idr_init(&rnicp->qpidr);
idr_init(&rnicp->mmidr);
@@ -103,15 +93,16 @@ rnic_init(struct iwch_dev *rnicp)
rnicp->attr.vendor_id = 0x168;
rnicp->attr.vendor_part_id = 7;
rnicp->attr.max_qps = T3_MAX_NUM_QP - 32;
- rnicp->attr.max_wrs = (1UL << 24) - 1;
+ rnicp->attr.max_wrs = T3_MAX_QP_DEPTH;
rnicp->attr.max_sge_per_wr = T3_MAX_SGE;
rnicp->attr.max_sge_per_rdma_write_wr = T3_MAX_SGE;
rnicp->attr.max_cqs = T3_MAX_NUM_CQ - 1;
- rnicp->attr.max_cqes_per_cq = (1UL << 24) - 1;
+ rnicp->attr.max_cqes_per_cq = T3_MAX_CQ_DEPTH;
rnicp->attr.max_mem_regs = cxio_num_stags(&rnicp->rdev);
rnicp->attr.max_phys_buf_entries = T3_MAX_PBL_SIZE;
rnicp->attr.max_pds = T3_MAX_NUM_PD - 1;
- rnicp->attr.mem_pgsizes_bitmask = 0x7FFF; /* 4KB-128MB */
+ rnicp->attr.mem_pgsizes_bitmask = T3_PAGESIZE_MASK;
+ rnicp->attr.max_mr_size = T3_MAX_MR_SIZE;
rnicp->attr.can_resize_wq = 0;
rnicp->attr.max_rdma_reads_per_qp = 8;
rnicp->attr.max_rdma_read_resources =
@@ -127,170 +118,183 @@ rnic_init(struct iwch_dev *rnicp)
rnicp->attr.zbva_support = 1;
rnicp->attr.local_invalidate_fence = 1;
rnicp->attr.cq_overflow_detection = 1;
+
return;
}
static void
-open_rnic_dev(struct t3cdev *tdev)
+rnic_uninit(struct iwch_dev *rnicp)
+{
+ idr_destroy(&rnicp->cqidr);
+ idr_destroy(&rnicp->qpidr);
+ idr_destroy(&rnicp->mmidr);
+ mtx_destroy(&rnicp->lock);
+}
+
+static int
+iwch_activate(struct adapter *sc)
{
struct iwch_dev *rnicp;
- static int vers_printed;
+ int rc;
+
+ KASSERT(!isset(&sc->offload_map, MAX_NPORTS),
+ ("%s: iWARP already activated on %s", __func__,
+ device_get_nameunit(sc->dev)));
- CTR2(KTR_IW_CXGB, "%s t3cdev %p", __FUNCTION__, tdev);
- if (!vers_printed++)
- printf("Chelsio T3 RDMA Driver - version x.xx\n");
rnicp = (struct iwch_dev *)ib_alloc_device(sizeof(*rnicp));
- if (!rnicp) {
- printf("Cannot allocate ib device\n");
- return;
- }
- rnicp->rdev.ulp = rnicp;
- rnicp->rdev.t3cdev_p = tdev;
+ if (rnicp == NULL)
+ return (ENOMEM);
+
+ sc->iwarp_softc = rnicp;
+ rnicp->rdev.adap = sc;
- mtx_lock(&dev_mutex);
+ cxio_hal_init(sc);
+ iwch_cm_init_cpl(sc);
- if (cxio_rdev_open(&rnicp->rdev)) {
- mtx_unlock(&dev_mutex);
+ rc = cxio_rdev_open(&rnicp->rdev);
+ if (rc != 0) {
printf("Unable to open CXIO rdev\n");
- ib_dealloc_device(&rnicp->ibdev);
- return;
+ goto err1;
}
rnic_init(rnicp);
- TAILQ_INSERT_TAIL(&dev_list, rnicp, entry);
- mtx_unlock(&dev_mutex);
-
- if (iwch_register_device(rnicp)) {
+ rc = iwch_register_device(rnicp);
+ if (rc != 0) {
printf("Unable to register device\n");
- close_rnic_dev(tdev);
+ goto err2;
}
-#ifdef notyet
- printf("Initialized device %s\n",
- pci_name(rnicp->rdev.rnic_info.pdev));
-#endif
- return;
+
+ return (0);
+
+err2:
+ rnic_uninit(rnicp);
+ cxio_rdev_close(&rnicp->rdev);
+err1:
+ cxio_hal_uninit(sc);
+ iwch_cm_term_cpl(sc);
+ sc->iwarp_softc = NULL;
+
+ return (rc);
}
-static void
-close_rnic_dev(struct t3cdev *tdev)
+static int
+iwch_deactivate(struct adapter *sc)
{
- struct iwch_dev *dev, *tmp;
- CTR2(KTR_IW_CXGB, "%s t3cdev %p", __FUNCTION__, tdev);
- mtx_lock(&dev_mutex);
-
- TAILQ_FOREACH_SAFE(dev, &dev_list, entry, tmp) {
- if (dev->rdev.t3cdev_p == tdev) {
-#ifdef notyet
- list_del(&dev->entry);
- iwch_unregister_device(dev);
- cxio_rdev_close(&dev->rdev);
- idr_destroy(&dev->cqidr);
- idr_destroy(&dev->qpidr);
- idr_destroy(&dev->mmidr);
- ib_dealloc_device(&dev->ibdev);
-#endif
- break;
- }
- }
- mtx_unlock(&dev_mutex);
+ struct iwch_dev *rnicp;
+
+ rnicp = sc->iwarp_softc;
+
+ iwch_unregister_device(rnicp);
+ rnic_uninit(rnicp);
+ cxio_rdev_close(&rnicp->rdev);
+ cxio_hal_uninit(sc);
+ iwch_cm_term_cpl(sc);
+ ib_dealloc_device(&rnicp->ibdev);
+
+ sc->iwarp_softc = NULL;
+
+ return (0);
}
-static ifaddr_event_handler_t
-ifaddr_event_handler(void *arg, struct ifnet *ifp)
+static void
+iwch_activate_all(struct adapter *sc, void *arg __unused)
{
- printf("%s if name %s \n", __FUNCTION__, ifp->if_xname);
- if (ifp->if_capabilities & IFCAP_TOE4) {
- KASSERT(T3CDEV(ifp) != NULL, ("null t3cdev ptr!"));
- if (cxio_hal_find_rdev_by_t3cdev(T3CDEV(ifp)) == NULL)
- open_rnic_dev(T3CDEV(ifp));
- }
- return 0;
+ ADAPTER_LOCK(sc);
+ if ((sc->open_device_map & sc->offload_map) != 0 &&
+ t3_activate_uld(sc, ULD_IWARP) == 0)
+ setbit(&sc->offload_map, MAX_NPORTS);
+ ADAPTER_UNLOCK(sc);
}
+static void
+iwch_deactivate_all(struct adapter *sc, void *arg __unused)
+{
+ ADAPTER_LOCK(sc);
+ if (isset(&sc->offload_map, MAX_NPORTS) &&
+ t3_deactivate_uld(sc, ULD_IWARP) == 0)
+ clrbit(&sc->offload_map, MAX_NPORTS);
+ ADAPTER_UNLOCK(sc);
+}
static int
-iwch_init_module(void)
+iwch_mod_load(void)
{
- VNET_ITERATOR_DECL(vnet_iter);
- int err;
- struct ifnet *ifp;
-
- printf("%s enter\n", __FUNCTION__);
- TAILQ_INIT(&dev_list);
- mtx_init(&dev_mutex, "iwch dev_list lock", NULL, MTX_DEF);
-
- err = cxio_hal_init();
- if (err)
- return err;
- err = iwch_cm_init();
- if (err)
- return err;
- cxio_register_ev_cb(iwch_ev_dispatch);
-
- /* Register for ifaddr events to dynamically add TOE devs */
- event_tag = EVENTHANDLER_REGISTER(ifaddr_event, ifaddr_event_handler,
- NULL, EVENTHANDLER_PRI_ANY);
-
- /* Register existing TOE interfaces by walking the ifnet chain */
- IFNET_RLOCK();
- VNET_LIST_RLOCK();
- VNET_FOREACH(vnet_iter) {
- CURVNET_SET(vnet_iter); /* XXX CURVNET_SET_QUIET() ? */
- TAILQ_FOREACH(ifp, &V_ifnet, if_link)
- (void)ifaddr_event_handler(NULL, ifp);
- CURVNET_RESTORE();
+ int rc;
+
+ rc = iwch_cm_init();
+ if (rc != 0)
+ return (rc);
+
+ rc = t3_register_uld(&iwch_uld_info);
+ if (rc != 0) {
+ iwch_cm_term();
+ return (rc);
}
- VNET_LIST_RUNLOCK();
- IFNET_RUNLOCK();
- return 0;
+
+ t3_iterate(iwch_activate_all, NULL);
+
+ return (rc);
}
-static void
-iwch_exit_module(void)
+static int
+iwch_mod_unload(void)
{
- EVENTHANDLER_DEREGISTER(ifaddr_event, event_tag);
- cxio_unregister_ev_cb(iwch_ev_dispatch);
+ t3_iterate(iwch_deactivate_all, NULL);
+
iwch_cm_term();
- cxio_hal_exit();
+
+ if (t3_unregister_uld(&iwch_uld_info) == EBUSY)
+ return (EBUSY);
+
+ return (0);
}
+#endif /* TCP_OFFLOAD */
-static int
-iwch_load(module_t mod, int cmd, void *arg)
+#undef MODULE_VERSION
+#include <sys/module.h>
+
+static int
+iwch_modevent(module_t mod, int cmd, void *arg)
{
- int err = 0;
-
- switch (cmd) {
- case MOD_LOAD:
- printf("Loading iw_cxgb.\n");
-
- iwch_init_module();
- break;
- case MOD_QUIESCE:
- break;
- case MOD_UNLOAD:
- printf("Unloading iw_cxgb.\n");
- iwch_exit_module();
- break;
- case MOD_SHUTDOWN:
- break;
- default:
- err = EOPNOTSUPP;
- break;
- }
-
- return (err);
+ int rc = 0;
+
+#ifdef TCP_OFFLOAD
+ switch (cmd) {
+ case MOD_LOAD:
+ rc = iwch_mod_load();
+ if(rc)
+ printf("iw_cxgb: Chelsio T3 RDMA Driver failed to load\n");
+ else
+ printf("iw_cxgb: Chelsio T3 RDMA Driver loaded\n");
+ break;
+
+ case MOD_UNLOAD:
+ rc = iwch_mod_unload();
+ if(rc)
+ printf("iw_cxgb: Chelsio T3 RDMA Driver failed to unload\n");
+ else
+ printf("iw_cxgb: Chelsio T3 RDMA Driver unloaded\n");
+ break;
+
+ default:
+ rc = EINVAL;
+ }
+#else
+ printf("iw_cxgb: compiled without TCP_OFFLOAD support.\n");
+ rc = EOPNOTSUPP;
+#endif
+ return (rc);
}
-static moduledata_t mod_data = {
+static moduledata_t iwch_mod_data = {
"iw_cxgb",
- iwch_load,
+ iwch_modevent,
0
};
MODULE_VERSION(iw_cxgb, 1);
-DECLARE_MODULE(iw_cxgb, mod_data, SI_SUB_EXEC, SI_ORDER_ANY);
-MODULE_DEPEND(iw_cxgb, rdma_core, 1, 1, 1);
-MODULE_DEPEND(iw_cxgb, if_cxgb, 1, 1, 1);
+DECLARE_MODULE(iw_cxgb, iwch_mod_data, SI_SUB_EXEC, SI_ORDER_ANY);
+MODULE_DEPEND(t3_tom, cxgbc, 1, 1, 1);
+MODULE_DEPEND(iw_cxgb, toecore, 1, 1, 1);
MODULE_DEPEND(iw_cxgb, t3_tom, 1, 1, 1);
-
diff --git a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb.h b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb.h
index f4b2856..9fd3e0c 100644
--- a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb.h
+++ b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb.h
@@ -37,6 +37,13 @@ struct iwch_cq;
struct iwch_qp;
struct iwch_mr;
+enum t3ctype {
+ T3A = 0,
+ T3B,
+ T3C
+};
+
+#define PAGE_MASK_IWARP (~(PAGE_SIZE-1))
struct iwch_rnic_attributes {
u32 vendor_id;
@@ -57,6 +64,7 @@ struct iwch_rnic_attributes {
* size (4k)^i. Phys block list mode unsupported.
*/
u32 mem_pgsizes_bitmask;
+ u64 max_mr_size;
u8 can_resize_wq;
/*
@@ -97,9 +105,9 @@ struct iwch_dev {
struct cxio_rdev rdev;
u32 device_cap_flags;
struct iwch_rnic_attributes attr;
- struct kvl cqidr;
- struct kvl qpidr;
- struct kvl mmidr;
+ struct idr cqidr;
+ struct idr qpidr;
+ struct idr mmidr;
struct mtx lock;
TAILQ_ENTRY(iwch_dev) entry;
};
@@ -113,40 +121,43 @@ static inline struct iwch_dev *to_iwch_dev(struct ib_device *ibdev)
return container_of(ibdev, struct iwch_dev, ibdev);
}
-static inline int t3b_device(const struct iwch_dev *rhp)
+static inline int t3b_device(const struct iwch_dev *rhp __unused)
{
- return rhp->rdev.t3cdev_p->type == T3B;
+ return (0);
}
-static inline int t3a_device(const struct iwch_dev *rhp)
+static inline int t3a_device(const struct iwch_dev *rhp __unused)
{
- return rhp->rdev.t3cdev_p->type == T3A;
+ return (0);
}
static inline struct iwch_cq *get_chp(struct iwch_dev *rhp, u32 cqid)
{
- return kvl_lookup(&rhp->cqidr, cqid);
+ return idr_find(&rhp->cqidr, cqid);
}
static inline struct iwch_qp *get_qhp(struct iwch_dev *rhp, u32 qpid)
{
- return kvl_lookup(&rhp->qpidr, qpid);
+ return idr_find(&rhp->qpidr, qpid);
}
static inline struct iwch_mr *get_mhp(struct iwch_dev *rhp, u32 mmid)
{
- return kvl_lookup(&rhp->mmidr, mmid);
+ return idr_find(&rhp->mmidr, mmid);
}
-static inline int insert_handle(struct iwch_dev *rhp, struct kvl *kvlp,
+static inline int insert_handle(struct iwch_dev *rhp, struct idr *idr,
void *handle, u32 id)
{
int ret;
u32 newid;
do {
+ if (!idr_pre_get(idr, GFP_KERNEL)) {
+ return -ENOMEM;
+ }
mtx_lock(&rhp->lock);
- ret = kvl_alloc_above(kvlp, handle, id, &newid);
+ ret = idr_get_new_above(idr, handle, id, &newid);
WARN_ON(ret != 0);
WARN_ON(!ret && newid != id);
mtx_unlock(&rhp->lock);
@@ -155,14 +166,12 @@ static inline int insert_handle(struct iwch_dev *rhp, struct kvl *kvlp,
return ret;
}
-static inline void remove_handle(struct iwch_dev *rhp, struct kvl *kvlp, u32 id)
+static inline void remove_handle(struct iwch_dev *rhp, struct idr *idr, u32 id)
{
mtx_lock(&rhp->lock);
- kvl_delete(kvlp, id);
+ idr_remove(idr, id);
mtx_unlock(&rhp->lock);
}
-extern struct cxgb_client t3c_client;
-extern cxgb_cpl_handler_func t3c_handlers[NUM_CPL_CMDS];
-extern void iwch_ev_dispatch(struct cxio_rdev *rdev_p, struct mbuf *m);
+void iwch_ev_dispatch(struct iwch_dev *, struct mbuf *);
#endif
diff --git a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.c b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.c
index d95e48d..9afad62 100644
--- a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.c
+++ b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.c
@@ -29,11 +29,13 @@ POSSIBILITY OF SUCH DAMAGE.
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#include "opt_inet.h"
+
+#ifdef TCP_OFFLOAD
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/bus.h>
-#include <sys/module.h>
#include <sys/pciio.h>
#include <sys/conf.h>
#include <machine/bus.h>
@@ -66,13 +68,17 @@ __FBSDID("$FreeBSD$");
#include <netinet/tcp.h>
#include <netinet/tcpip.h>
-#include <contrib/rdma/ib_verbs.h>
+#include <rdma/ib_verbs.h>
+#include <linux/idr.h>
+#include <ulp/iw_cxgb/iw_cxgb_ib_intfc.h>
#include <cxgb_include.h>
#include <ulp/tom/cxgb_tom.h>
-#include <ulp/tom/cxgb_t3_ddp.h>
-#include <ulp/tom/cxgb_defs.h>
#include <ulp/tom/cxgb_toepcb.h>
+#include <ulp/iw_cxgb/iw_cxgb_ib_intfc.h>
+#include <rdma/ib_verbs.h>
+#include <linux/idr.h>
+
#include <ulp/iw_cxgb/iw_cxgb_wr.h>
#include <ulp/iw_cxgb/iw_cxgb_hal.h>
#include <ulp/iw_cxgb/iw_cxgb_provider.h>
@@ -97,46 +103,46 @@ static char *states[] = {
};
#endif
-SYSCTL_NODE(_hw, OID_AUTO, cxgb, CTLFLAG_RD, 0, "iw_cxgb driver parameters");
+SYSCTL_NODE(_hw, OID_AUTO, iw_cxgb, CTLFLAG_RD, 0, "iw_cxgb driver parameters");
-static int ep_timeout_secs = 10;
+static int ep_timeout_secs = 60;
TUNABLE_INT("hw.iw_cxgb.ep_timeout_secs", &ep_timeout_secs);
-SYSCTL_INT(_hw_cxgb, OID_AUTO, ep_timeout_secs, CTLFLAG_RDTUN, &ep_timeout_secs, 0,
- "CM Endpoint operation timeout in seconds (default=10)");
+SYSCTL_INT(_hw_iw_cxgb, OID_AUTO, ep_timeout_secs, CTLFLAG_RW, &ep_timeout_secs, 0,
+ "CM Endpoint operation timeout in seconds (default=60)");
static int mpa_rev = 1;
TUNABLE_INT("hw.iw_cxgb.mpa_rev", &mpa_rev);
-SYSCTL_INT(_hw_cxgb, OID_AUTO, mpa_rev, CTLFLAG_RDTUN, &mpa_rev, 0,
+SYSCTL_INT(_hw_iw_cxgb, OID_AUTO, mpa_rev, CTLFLAG_RW, &mpa_rev, 0,
"MPA Revision, 0 supports amso1100, 1 is spec compliant. (default=1)");
static int markers_enabled = 0;
TUNABLE_INT("hw.iw_cxgb.markers_enabled", &markers_enabled);
-SYSCTL_INT(_hw_cxgb, OID_AUTO, markers_enabled, CTLFLAG_RDTUN, &markers_enabled, 0,
+SYSCTL_INT(_hw_iw_cxgb, OID_AUTO, markers_enabled, CTLFLAG_RW, &markers_enabled, 0,
"Enable MPA MARKERS (default(0)=disabled)");
static int crc_enabled = 1;
TUNABLE_INT("hw.iw_cxgb.crc_enabled", &crc_enabled);
-SYSCTL_INT(_hw_cxgb, OID_AUTO, crc_enabled, CTLFLAG_RDTUN, &crc_enabled, 0,
+SYSCTL_INT(_hw_iw_cxgb, OID_AUTO, crc_enabled, CTLFLAG_RW, &crc_enabled, 0,
"Enable MPA CRC (default(1)=enabled)");
static int rcv_win = 256 * 1024;
TUNABLE_INT("hw.iw_cxgb.rcv_win", &rcv_win);
-SYSCTL_INT(_hw_cxgb, OID_AUTO, rcv_win, CTLFLAG_RDTUN, &rcv_win, 0,
+SYSCTL_INT(_hw_iw_cxgb, OID_AUTO, rcv_win, CTLFLAG_RW, &rcv_win, 0,
"TCP receive window in bytes (default=256KB)");
static int snd_win = 32 * 1024;
TUNABLE_INT("hw.iw_cxgb.snd_win", &snd_win);
-SYSCTL_INT(_hw_cxgb, OID_AUTO, snd_win, CTLFLAG_RDTUN, &snd_win, 0,
+SYSCTL_INT(_hw_iw_cxgb, OID_AUTO, snd_win, CTLFLAG_RW, &snd_win, 0,
"TCP send window in bytes (default=32KB)");
static unsigned int nocong = 0;
TUNABLE_INT("hw.iw_cxgb.nocong", &nocong);
-SYSCTL_UINT(_hw_cxgb, OID_AUTO, nocong, CTLFLAG_RDTUN, &nocong, 0,
+SYSCTL_UINT(_hw_iw_cxgb, OID_AUTO, nocong, CTLFLAG_RW, &nocong, 0,
"Turn off congestion control (default=0)");
static unsigned int cong_flavor = 1;
TUNABLE_INT("hw.iw_cxgb.cong_flavor", &cong_flavor);
-SYSCTL_UINT(_hw_cxgb, OID_AUTO, cong_flavor, CTLFLAG_RDTUN, &cong_flavor, 0,
+SYSCTL_UINT(_hw_iw_cxgb, OID_AUTO, cong_flavor, CTLFLAG_RW, &cong_flavor, 0,
"TCP Congestion control flavor (default=1)");
static void ep_timeout(void *arg);
@@ -174,42 +180,44 @@ static void
stop_ep_timer(struct iwch_ep *ep)
{
CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep);
+ if (!callout_pending(&ep->timer)) {
+ CTR3(KTR_IW_CXGB, "%s timer stopped when its not running! ep %p state %u\n",
+ __func__, ep, ep->com.state);
+ return;
+ }
callout_drain(&ep->timer);
put_ep(&ep->com);
}
-static int set_tcpinfo(struct iwch_ep *ep)
+static int
+set_tcpinfo(struct iwch_ep *ep)
{
- struct tcp_info ti;
- struct sockopt sopt;
- int err;
+ struct socket *so = ep->com.so;
+ struct inpcb *inp = sotoinpcb(so);
+ struct tcpcb *tp;
+ struct toepcb *toep;
+ int rc = 0;
- sopt.sopt_dir = SOPT_GET;
- sopt.sopt_level = IPPROTO_TCP;
- sopt.sopt_name = TCP_INFO;
- sopt.sopt_val = (caddr_t)&ti;
- sopt.sopt_valsize = sizeof ti;
- sopt.sopt_td = NULL;
-
- err = sogetopt(ep->com.so, &sopt);
- if (err) {
- printf("%s can't get tcpinfo\n", __FUNCTION__);
- return -err;
- }
- if (!(ti.tcpi_options & TCPI_OPT_TOE)) {
- printf("%s connection NOT OFFLOADED!\n", __FUNCTION__);
- return -EINVAL;
+ INP_WLOCK(inp);
+ tp = intotcpcb(inp);
+
+ if ((tp->t_flags & TF_TOE) == 0) {
+ rc = EINVAL;
+ printf("%s: connection NOT OFFLOADED!\n", __func__);
+ goto done;
}
+ toep = tp->t_toe;
- ep->snd_seq = ti.tcpi_snd_nxt;
- ep->rcv_seq = ti.tcpi_rcv_nxt;
- ep->emss = ti.tcpi_snd_mss - sizeof(struct tcpiphdr);
- ep->hwtid = TOEPCB(ep->com.so)->tp_tid; /* XXX */
- if (ti.tcpi_options & TCPI_OPT_TIMESTAMPS)
- ep->emss -= 12;
+ ep->hwtid = toep->tp_tid;
+ ep->snd_seq = tp->snd_nxt;
+ ep->rcv_seq = tp->rcv_nxt;
+ ep->emss = tp->t_maxseg;
if (ep->emss < 128)
ep->emss = 128;
- return 0;
+done:
+ INP_WUNLOCK(inp);
+ return (rc);
+
}
static enum iwch_ep_state
@@ -264,56 +272,6 @@ void __free_ep(struct iwch_ep_common *epc)
free(epc, M_DEVBUF);
}
-int
-iwch_quiesce_tid(struct iwch_ep *ep)
-{
-#ifdef notyet
- struct cpl_set_tcb_field *req;
- struct mbuf *m = get_mbuf(NULL, sizeof(*req), M_NOWAIT);
-
- if (m == NULL)
- return (-ENOMEM);
- req = (struct cpl_set_tcb_field *) mbuf_put(m, sizeof(*req));
- req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
- req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
- OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, ep->hwtid));
- req->reply = 0;
- req->cpu_idx = 0;
- req->word = htons(W_TCB_RX_QUIESCE);
- req->mask = cpu_to_be64(1ULL << S_TCB_RX_QUIESCE);
- req->val = cpu_to_be64(1 << S_TCB_RX_QUIESCE);
-
- m_set_priority(m, CPL_PRIORITY_DATA);
- cxgb_ofld_send(ep->com.tdev, m);
-#endif
- return 0;
-}
-
-int
-iwch_resume_tid(struct iwch_ep *ep)
-{
-#ifdef notyet
- struct cpl_set_tcb_field *req;
- struct mbuf *m = get_mbuf(NULL, sizeof(*req), M_NOWAIT);
-
- if (m == NULL)
- return (-ENOMEM);
- req = (struct cpl_set_tcb_field *) mbuf_put(m, sizeof(*req));
- req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
- req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
- OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, ep->hwtid));
- req->reply = 0;
- req->cpu_idx = 0;
- req->word = htons(W_TCB_RX_QUIESCE);
- req->mask = cpu_to_be64(1ULL << S_TCB_RX_QUIESCE);
- req->val = 0;
-
- m_set_priority(m, CPL_PRIORITY_DATA);
- cxgb_ofld_send(ep->com.tdev, m);
-#endif
- return 0;
-}
-
static struct rtentry *
find_route(__be32 local_ip, __be32 peer_ip, __be16 local_port,
__be16 peer_port, u8 tos)
@@ -331,13 +289,16 @@ find_route(__be32 local_ip, __be32 peer_ip, __be16 local_port,
}
static void
-close_socket(struct iwch_ep_common *epc)
+close_socket(struct iwch_ep_common *epc, int close)
{
CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, epc, epc->so, states[epc->state]);
SOCK_LOCK(epc->so);
soupcall_clear(epc->so, SO_RCV);
SOCK_UNLOCK(epc->so);
- soshutdown(epc->so, SHUT_WR|SHUT_RD);
+ if (close)
+ soclose(epc->so);
+ else
+ soshutdown(epc->so, SHUT_WR|SHUT_RD);
epc->so = NULL;
}
@@ -500,7 +461,7 @@ abort_connection(struct iwch_ep *ep)
CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
state_set(&ep->com, ABORTING);
abort_socket(ep);
- close_socket(&ep->com);
+ close_socket(&ep->com, 0);
close_complete_upcall(ep);
state_set(&ep->com, DEAD);
put_ep(&ep->com);
@@ -582,12 +543,13 @@ connect_request_upcall(struct iwch_ep *ep)
event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
event.provider_data = ep;
event.so = ep->com.so;
- if (state_read(&ep->parent_ep->com) != DEAD)
+ if (state_read(&ep->parent_ep->com) != DEAD) {
+ get_ep(&ep->com);
ep->parent_ep->com.cm_id->event_handler(
ep->parent_ep->com.cm_id,
&event);
+ }
put_ep(&ep->parent_ep->com);
- ep->parent_ep = NULL;
}
static void
@@ -729,6 +691,7 @@ process_mpa_reply(struct iwch_ep *ep)
*/
CTR1(KTR_IW_CXGB, "%s mpa rpl looks good!", __FUNCTION__);
state_set(&ep->com, FPDU_MODE);
+ ep->mpa_attr.initiator = 1;
ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
ep->mpa_attr.recv_marker_enabled = markers_enabled;
ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
@@ -885,6 +848,7 @@ process_mpa_request(struct iwch_ep *ep)
* If we get here we have accumulated the entire mpa
* start reply message including private data.
*/
+ ep->mpa_attr.initiator = 0;
ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
ep->mpa_attr.recv_marker_enabled = markers_enabled;
ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
@@ -934,7 +898,6 @@ process_peer_close(struct iwch_ep *ep)
* rejects the CR.
*/
__state_set(&ep->com, CLOSING);
- get_ep(&ep->com);
break;
case MPA_REP_SENT:
__state_set(&ep->com, CLOSING);
@@ -961,7 +924,7 @@ process_peer_close(struct iwch_ep *ep)
iwch_modify_qp(ep->com.qp->rhp, ep->com.qp,
IWCH_QP_ATTR_NEXT_STATE, &attrs, 1);
}
- close_socket(&ep->com);
+ close_socket(&ep->com, 0);
close_complete_upcall(ep);
__state_set(&ep->com, DEAD);
release = 1;
@@ -986,11 +949,10 @@ process_conn_error(struct iwch_ep *ep)
{
struct iwch_qp_attributes attrs;
int ret;
- int state;
- state = state_read(&ep->com);
- CTR5(KTR_IW_CXGB, "%s ep %p so %p so->so_error %u state %s", __FUNCTION__, ep, ep->com.so, ep->com.so->so_error, states[ep->com.state]);
- switch (state) {
+ mtx_lock(&ep->com.lock);
+ CTR3(KTR_IW_CXGB, "%s ep %p state %u", __func__, ep, ep->com.state);
+ switch (ep->com.state) {
case MPA_REQ_WAIT:
stop_ep_timer(ep);
break;
@@ -1009,7 +971,6 @@ process_conn_error(struct iwch_ep *ep)
* the reference on it until the ULP accepts or
* rejects the CR.
*/
- get_ep(&ep->com);
break;
case MORIBUND:
case CLOSING:
@@ -1031,6 +992,7 @@ process_conn_error(struct iwch_ep *ep)
case ABORTING:
break;
case DEAD:
+ mtx_unlock(&ep->com.lock);
CTR2(KTR_IW_CXGB, "%s so_error %d IN DEAD STATE!!!!", __FUNCTION__,
ep->com.so->so_error);
return;
@@ -1039,11 +1001,12 @@ process_conn_error(struct iwch_ep *ep)
break;
}
- if (state != ABORTING) {
- close_socket(&ep->com);
- state_set(&ep->com, DEAD);
+ if (ep->com.state != ABORTING) {
+ close_socket(&ep->com, 0);
+ __state_set(&ep->com, DEAD);
put_ep(&ep->com);
}
+ mtx_unlock(&ep->com.lock);
return;
}
@@ -1071,7 +1034,10 @@ process_close_complete(struct iwch_ep *ep)
IWCH_QP_ATTR_NEXT_STATE,
&attrs, 1);
}
- close_socket(&ep->com);
+ if (ep->parent_ep)
+ close_socket(&ep->com, 1);
+ else
+ close_socket(&ep->com, 0);
close_complete_upcall(ep);
__state_set(&ep->com, DEAD);
release = 1;
@@ -1102,77 +1068,59 @@ process_close_complete(struct iwch_ep *ep)
* terminate() handles case (1)...
*/
static int
-terminate(struct t3cdev *tdev, struct mbuf *m, void *ctx)
+terminate(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
{
- struct toepcb *toep = (struct toepcb *)ctx;
- struct socket *so = toeptoso(toep);
+ struct adapter *sc = qs->adap;
+ struct tom_data *td = sc->tom_softc;
+ uint32_t hash = *((uint32_t *)r + 1);
+ unsigned int tid = ntohl(hash) >> 8 & 0xfffff;
+ struct toepcb *toep = lookup_tid(&td->tid_maps, tid);
+ struct socket *so = toep->tp_inp->inp_socket;
struct iwch_ep *ep = so->so_rcv.sb_upcallarg;
- CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep);
+ if (state_read(&ep->com) != FPDU_MODE)
+ goto done;
+
m_adj(m, sizeof(struct cpl_rdma_terminate));
- CTR2(KTR_IW_CXGB, "%s saving %d bytes of term msg", __FUNCTION__, m->m_len);
+
+ CTR4(KTR_IW_CXGB, "%s: tid %u, ep %p, saved %d bytes",
+ __func__, tid, ep, m->m_len);
+
m_copydata(m, 0, m->m_len, ep->com.qp->attr.terminate_buffer);
ep->com.qp->attr.terminate_msg_len = m->m_len;
ep->com.qp->attr.is_terminate_local = 0;
- return CPL_RET_BUF_DONE;
+
+done:
+ m_freem(m);
+ return (0);
}
static int
-ec_status(struct t3cdev *tdev, struct mbuf *m, void *ctx)
+ec_status(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
{
- struct toepcb *toep = (struct toepcb *)ctx;
- struct socket *so = toeptoso(toep);
- struct cpl_rdma_ec_status *rep = cplhdr(m);
- struct iwch_ep *ep;
- struct iwch_qp_attributes attrs;
- int release = 0;
+ struct adapter *sc = qs->adap;
+ struct tom_data *td = sc->tom_softc;
+ struct cpl_rdma_ec_status *rep = mtod(m, void *);
+ unsigned int tid = GET_TID(rep);
+ struct toepcb *toep = lookup_tid(&td->tid_maps, tid);
+ struct socket *so = toep->tp_inp->inp_socket;
+ struct iwch_ep *ep = so->so_rcv.sb_upcallarg;
- ep = so->so_rcv.sb_upcallarg;
- CTR5(KTR_IW_CXGB, "%s ep %p so %p state %s ec_status %d", __FUNCTION__, ep, ep->com.so, states[ep->com.state], rep->status);
- if (!so || !ep) {
- panic("bogosity ep %p state %d, so %p state %x\n", ep, ep ? ep->com.state : -1, so, so ? so->so_state : -1);
- }
- mtx_lock(&ep->com.lock);
- switch (ep->com.state) {
- case CLOSING:
- if (!rep->status)
- __state_set(&ep->com, MORIBUND);
- else
- __state_set(&ep->com, ABORTING);
- break;
- case MORIBUND:
- stop_ep_timer(ep);
- if (!rep->status) {
- if ((ep->com.cm_id) && (ep->com.qp)) {
- attrs.next_state = IWCH_QP_STATE_IDLE;
- iwch_modify_qp(ep->com.qp->rhp,
- ep->com.qp,
- IWCH_QP_ATTR_NEXT_STATE,
- &attrs, 1);
- }
- close_socket(&ep->com);
- close_complete_upcall(ep);
- __state_set(&ep->com, DEAD);
- release = 1;
- }
- break;
- case DEAD:
- break;
- default:
- panic("unknown state: %d\n", ep->com.state);
- }
- mtx_unlock(&ep->com.lock);
if (rep->status) {
- log(LOG_ERR, "%s BAD CLOSE - Aborting tid %u\n",
- __FUNCTION__, ep->hwtid);
+ struct iwch_qp_attributes attrs;
+
+ CTR1(KTR_IW_CXGB, "%s BAD CLOSE - Aborting", __FUNCTION__);
+ stop_ep_timer(ep);
attrs.next_state = IWCH_QP_STATE_ERROR;
iwch_modify_qp(ep->com.qp->rhp,
- ep->com.qp, IWCH_QP_ATTR_NEXT_STATE,
- &attrs, 1);
+ ep->com.qp,
+ IWCH_QP_ATTR_NEXT_STATE,
+ &attrs, 1);
+ abort_connection(ep);
}
- if (release)
- put_ep(&ep->com);
- return CPL_RET_BUF_DONE;
+
+ m_freem(m);
+ return (0);
}
static void
@@ -1181,24 +1129,29 @@ ep_timeout(void *arg)
struct iwch_ep *ep = (struct iwch_ep *)arg;
struct iwch_qp_attributes attrs;
int err = 0;
+ int abort = 1;
mtx_lock(&ep->com.lock);
CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
switch (ep->com.state) {
case MPA_REQ_SENT:
+ __state_set(&ep->com, ABORTING);
connect_reply_upcall(ep, -ETIMEDOUT);
break;
case MPA_REQ_WAIT:
+ __state_set(&ep->com, ABORTING);
break;
case CLOSING:
case MORIBUND:
if (ep->com.cm_id && ep->com.qp)
err = 1;
+ __state_set(&ep->com, ABORTING);
break;
default:
- panic("unknown state: %d\n", ep->com.state);
+ CTR3(KTR_IW_CXGB, "%s unexpected state ep %p state %u\n",
+ __func__, ep, ep->com.state);
+ abort = 0;
}
- __state_set(&ep->com, ABORTING);
mtx_unlock(&ep->com.lock);
if (err){
attrs.next_state = IWCH_QP_STATE_ERROR;
@@ -1206,7 +1159,8 @@ ep_timeout(void *arg)
ep->com.qp, IWCH_QP_ATTR_NEXT_STATE,
&attrs, 1);
}
- abort_connection(ep);
+ if (abort)
+ abort_connection(ep);
put_ep(&ep->com);
}
@@ -1228,6 +1182,7 @@ iwch_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
err = send_mpa_reject(ep, pdata, pdata_len);
err = soshutdown(ep->com.so, 3);
}
+ put_ep(&ep->com);
return 0;
}
@@ -1242,8 +1197,10 @@ iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
struct iwch_qp *qp = get_qhp(h, conn_param->qpn);
CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
- if (state_read(&ep->com) == DEAD)
- return (-ECONNRESET);
+ if (state_read(&ep->com) == DEAD) {
+ err = -ECONNRESET;
+ goto err;
+ }
PANIC_IF(state_read(&ep->com) != MPA_REQ_RCVD);
PANIC_IF(!qp);
@@ -1251,7 +1208,8 @@ iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
if ((conn_param->ord > qp->rhp->attr.max_rdma_read_qp_depth) ||
(conn_param->ird > qp->rhp->attr.max_rdma_reads_per_qp)) {
abort_connection(ep);
- return (-EINVAL);
+ err = -EINVAL;
+ goto err;
}
cm_id->add_ref(cm_id);
@@ -1263,11 +1221,10 @@ iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
ep->ird = conn_param->ird;
ep->ord = conn_param->ord;
CTR3(KTR_IW_CXGB, "%s ird %d ord %d", __FUNCTION__, ep->ird, ep->ord);
- get_ep(&ep->com);
/* bind QP to EP and move to RTS */
attrs.mpa_attr = ep->mpa_attr;
- attrs.max_ird = ep->ord;
+ attrs.max_ird = ep->ird;
attrs.max_ord = ep->ord;
attrs.llp_stream_handle = ep;
attrs.next_state = IWCH_QP_STATE_RTS;
@@ -1283,20 +1240,21 @@ iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
ep->com.qp, mask, &attrs, 1);
if (err)
- goto err;
+ goto err1;
err = send_mpa_reply(ep, conn_param->private_data,
conn_param->private_data_len);
if (err)
- goto err;
+ goto err1;
state_set(&ep->com, FPDU_MODE);
established_upcall(ep);
put_ep(&ep->com);
return 0;
-err:
+err1:
ep->com.cm_id = NULL;
ep->com.qp = NULL;
cm_id->rem_ref(cm_id);
+err:
put_ep(&ep->com);
return err;
}
@@ -1312,15 +1270,6 @@ static int init_sock(struct iwch_ep_common *epc)
epc->so->so_state |= SS_NBIO;
SOCK_UNLOCK(epc->so);
sopt.sopt_dir = SOPT_SET;
- sopt.sopt_level = SOL_SOCKET;
- sopt.sopt_name = SO_NO_DDP;
- sopt.sopt_val = (caddr_t)&on;
- sopt.sopt_valsize = sizeof on;
- sopt.sopt_td = NULL;
- err = sosetopt(epc->so, &sopt);
- if (err)
- printf("%s can't set SO_NO_DDP err %d\n", __FUNCTION__, err);
- sopt.sopt_dir = SOPT_SET;
sopt.sopt_level = IPPROTO_TCP;
sopt.sopt_name = TCP_NODELAY;
sopt.sopt_val = (caddr_t)&on;
@@ -1400,16 +1349,14 @@ iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
if (!(rt->rt_ifp->if_flags & IFCAP_TOE)) {
printf("%s - interface not TOE capable.\n", __FUNCTION__);
- goto fail3;
+ RTFREE(rt);
+ goto fail2;
}
tdev = TOEDEV(rt->rt_ifp);
if (tdev == NULL) {
printf("%s - No toedev for interface.\n", __FUNCTION__);
- goto fail3;
- }
- if (!tdev->tod_can_offload(tdev, ep->com.so)) {
- printf("%s - interface cannot offload!.\n", __FUNCTION__);
- goto fail3;
+ RTFREE(rt);
+ goto fail2;
}
RTFREE(rt);
@@ -1420,8 +1367,6 @@ iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
ep->com.thread);
if (!err)
goto out;
-fail3:
- RTFREE(ep->dst);
fail2:
put_ep(&ep->com);
out:
@@ -1458,7 +1403,7 @@ iwch_create_listen(struct iw_cm_id *cm_id, int backlog)
cm_id->provider_data = ep;
goto out;
}
- close_socket(&ep->com);
+ close_socket(&ep->com, 0);
fail:
cm_id->rem_ref(cm_id);
put_ep(&ep->com);
@@ -1474,7 +1419,7 @@ iwch_destroy_listen(struct iw_cm_id *cm_id)
CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep);
state_set(&ep->com, DEAD);
- close_socket(&ep->com);
+ close_socket(&ep->com, 0);
cm_id->rem_ref(cm_id);
put_ep(&ep->com);
return 0;
@@ -1493,47 +1438,48 @@ iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, int flags)
CTR5(KTR_IW_CXGB, "%s ep %p so %p state %s, abrupt %d", __FUNCTION__, ep,
ep->com.so, states[ep->com.state], abrupt);
- if (ep->com.state == DEAD) {
- CTR2(KTR_IW_CXGB, "%s already dead ep %p", __FUNCTION__, ep);
- goto out;
- }
-
- if (abrupt) {
- if (ep->com.state != ABORTING) {
- ep->com.state = ABORTING;
- close = 1;
- }
- goto out;
- }
-
switch (ep->com.state) {
case MPA_REQ_WAIT:
case MPA_REQ_SENT:
case MPA_REQ_RCVD:
case MPA_REP_SENT:
case FPDU_MODE:
- start_ep_timer(ep);
- ep->com.state = CLOSING;
close = 1;
+ if (abrupt)
+ ep->com.state = ABORTING;
+ else {
+ ep->com.state = CLOSING;
+ start_ep_timer(ep);
+ }
break;
case CLOSING:
- ep->com.state = MORIBUND;
close = 1;
+ if (abrupt) {
+ stop_ep_timer(ep);
+ ep->com.state = ABORTING;
+ } else
+ ep->com.state = MORIBUND;
break;
case MORIBUND:
case ABORTING:
+ case DEAD:
+ CTR3(KTR_IW_CXGB, "%s ignoring disconnect ep %p state %u\n",
+ __func__, ep, ep->com.state);
break;
default:
panic("unknown state: %d\n", ep->com.state);
break;
}
-out:
+
mtx_unlock(&ep->com.lock);
if (close) {
if (abrupt)
abort_connection(ep);
- else
+ else {
+ if (!ep->parent_ep)
+ __state_set(&ep->com, MORIBUND);
shutdown_socket(&ep->com);
+ }
}
return 0;
}
@@ -1587,7 +1533,7 @@ process_connected(struct iwch_ep *ep)
send_mpa_req(ep);
} else {
connect_reply_upcall(ep, -ep->com.so->so_error);
- close_socket(&ep->com);
+ close_socket(&ep->com, 0);
state_set(&ep->com, DEAD);
put_ep(&ep->com);
}
@@ -1643,10 +1589,20 @@ process_newconn(struct iwch_ep *parent_ep)
}
CTR3(KTR_IW_CXGB, "%s remote addr %s port %d", __FUNCTION__,
inet_ntoa(remote->sin_addr), ntohs(remote->sin_port));
+ child_ep->com.tdev = parent_ep->com.tdev;
+ child_ep->com.local_addr.sin_family = parent_ep->com.local_addr.sin_family;
+ child_ep->com.local_addr.sin_port = parent_ep->com.local_addr.sin_port;
+ child_ep->com.local_addr.sin_addr.s_addr = parent_ep->com.local_addr.sin_addr.s_addr;
+ child_ep->com.local_addr.sin_len = parent_ep->com.local_addr.sin_len;
+ child_ep->com.remote_addr.sin_family = remote->sin_family;
+ child_ep->com.remote_addr.sin_port = remote->sin_port;
+ child_ep->com.remote_addr.sin_addr.s_addr = remote->sin_addr.s_addr;
+ child_ep->com.remote_addr.sin_len = remote->sin_len;
child_ep->com.so = child_so;
child_ep->com.cm_id = NULL;
child_ep->com.thread = parent_ep->com.thread;
child_ep->parent_ep = parent_ep;
+
free(remote, M_SONAME);
get_ep(&parent_ep->com);
child_ep->parent_ep = parent_ep;
@@ -1747,17 +1703,30 @@ iwch_cm_init(void)
}
taskqueue_start_threads(&iw_cxgb_taskq, 1, PI_NET, "iw_cxgb taskq");
TASK_INIT(&iw_cxgb_task, 0, process_req, NULL);
- t3tom_register_cpl_handler(CPL_RDMA_TERMINATE, terminate);
- t3tom_register_cpl_handler(CPL_RDMA_EC_STATUS, ec_status);
- return 0;
+ return (0);
}
void
iwch_cm_term(void)
{
- t3tom_register_cpl_handler(CPL_RDMA_TERMINATE, NULL);
- t3tom_register_cpl_handler(CPL_RDMA_EC_STATUS, NULL);
+
taskqueue_drain(iw_cxgb_taskq, &iw_cxgb_task);
taskqueue_free(iw_cxgb_taskq);
}
+void
+iwch_cm_init_cpl(struct adapter *sc)
+{
+
+ t3_register_cpl_handler(sc, CPL_RDMA_TERMINATE, terminate);
+ t3_register_cpl_handler(sc, CPL_RDMA_EC_STATUS, ec_status);
+}
+
+void
+iwch_cm_term_cpl(struct adapter *sc)
+{
+
+ t3_register_cpl_handler(sc, CPL_RDMA_TERMINATE, NULL);
+ t3_register_cpl_handler(sc, CPL_RDMA_EC_STATUS, NULL);
+}
+#endif
diff --git a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.h b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.h
index 4250be3..ef76729 100644
--- a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.h
+++ b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.h
@@ -31,8 +31,8 @@ $FreeBSD$
#ifndef _IWCH_CM_H_
#define _IWCH_CM_H_
-#include <contrib/rdma/ib_verbs.h>
-#include <contrib/rdma/iw_cm.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/iw_cm.h>
#include <sys/refcount.h>
#include <sys/condvar.h>
#include <sys/proc.h>
@@ -42,21 +42,21 @@ $FreeBSD$
#define MPA_KEY_REP "MPA ID Rep Frame"
#define MPA_MAX_PRIVATE_DATA 256
-#define MPA_REV o0 /* XXX - amso1100 uses rev 0 ! */
+#define MPA_REV 0 /* XXX - amso1100 uses rev 0 ! */
#define MPA_REJECT 0x20
#define MPA_CRC 0x40
#define MPA_MARKERS 0x80
#define MPA_FLAGS_MASK 0xE0
#define put_ep(ep) { \
- CTR4(KTR_IW_CXGB, "put_ep (via %s:%u) ep %p refcnt %d\n", __FUNCTION__, __LINE__, \
+ CTR4(KTR_IW_CXGB, "put_ep (via %s:%u) ep %p refcnt %d", __FUNCTION__, __LINE__, \
ep, atomic_load_acq_int(&((ep)->refcount))); \
if (refcount_release(&((ep)->refcount))) \
__free_ep(ep); \
}
#define get_ep(ep) { \
- CTR4(KTR_IW_CXGB, "get_ep (via %s:%u) ep %p, refcnt %d\n", __FUNCTION__, __LINE__, \
+ CTR4(KTR_IW_CXGB, "get_ep (via %s:%u) ep %p, refcnt %d", __FUNCTION__, __LINE__, \
ep, atomic_load_acq_int(&((ep)->refcount))); \
refcount_acquire(&((ep)->refcount)); \
}
@@ -148,7 +148,7 @@ struct iwch_ep_common {
TAILQ_ENTRY(iwch_ep_common) entry;
struct iw_cm_id *cm_id;
struct iwch_qp *qp;
- struct t3cdev *tdev;
+ struct toedev *tdev;
enum iwch_ep_state state;
u_int refcount;
struct cv waitq;
@@ -176,7 +176,6 @@ struct iwch_ep {
u32 snd_seq;
u32 rcv_seq;
struct l2t_entry *l2t;
- struct rtentry *dst;
struct mbuf *mpa_mbuf;
struct iwch_mpa_attributes mpa_attr;
unsigned int mpa_pkt_len;
@@ -237,13 +236,13 @@ int iwch_destroy_listen(struct iw_cm_id *cm_id);
int iwch_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len);
int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param);
int iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, int flags);
-int iwch_quiesce_tid(struct iwch_ep *ep);
-int iwch_resume_tid(struct iwch_ep *ep);
void __free_ep(struct iwch_ep_common *ep);
void iwch_rearp(struct iwch_ep *ep);
int iwch_ep_redirect(void *ctx, struct rtentry *old, struct rtentry *new, struct l2t_entry *l2t);
int iwch_cm_init(void);
void iwch_cm_term(void);
+void iwch_cm_init_cpl(struct adapter *);
+void iwch_cm_term_cpl(struct adapter *);
#endif /* _IWCH_CM_H_ */
diff --git a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cq.c b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cq.c
index bb564bb..9ad6ed9 100644
--- a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cq.c
+++ b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cq.c
@@ -30,11 +30,13 @@ POSSIBILITY OF SUCH DAMAGE.
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#include "opt_inet.h"
+
+#ifdef TCP_OFFLOAD
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/bus.h>
-#include <sys/module.h>
#include <sys/pciio.h>
#include <sys/conf.h>
#include <machine/bus.h>
@@ -59,9 +61,11 @@ __FBSDID("$FreeBSD$");
#include <sys/libkern.h>
#include <netinet/in.h>
-#include <contrib/rdma/ib_verbs.h>
-#include <contrib/rdma/ib_umem.h>
-#include <contrib/rdma/ib_user_verbs.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_umem.h>
+#include <rdma/ib_user_verbs.h>
+#include <linux/idr.h>
+#include <ulp/iw_cxgb/iw_cxgb_ib_intfc.h>
#include <cxgb_include.h>
#include <ulp/iw_cxgb/iw_cxgb_wr.h>
@@ -261,4 +265,4 @@ int iwch_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
return npolled;
}
}
-
+#endif
diff --git a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_dbg.c b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_dbg.c
index ac9c742..f5f59a3 100644
--- a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_dbg.c
+++ b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_dbg.c
@@ -30,11 +30,12 @@ POSSIBILITY OF SUCH DAMAGE.
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#include "opt_inet.h"
+
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/bus.h>
-#include <sys/module.h>
#include <sys/pciio.h>
#include <sys/conf.h>
#include <machine/bus.h>
@@ -60,11 +61,13 @@ __FBSDID("$FreeBSD$");
#include <netinet/in.h>
-#include <contrib/rdma/ib_verbs.h>
-#include <contrib/rdma/ib_umem.h>
-#include <contrib/rdma/ib_user_verbs.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_umem.h>
+#include <rdma/ib_user_verbs.h>
+#include <linux/idr.h>
+#include <ulp/iw_cxgb/iw_cxgb_ib_intfc.h>
-#ifdef DEBUG
+#if defined(INVARIANTS) && defined(TCP_OFFLOAD)
#include <cxgb_include.h>
#include <ulp/iw_cxgb/iw_cxgb_wr.h>
#include <ulp/iw_cxgb/iw_cxgb_hal.h>
@@ -74,75 +77,100 @@ __FBSDID("$FreeBSD$");
#include <ulp/iw_cxgb/iw_cxgb_resource.h>
#include <ulp/iw_cxgb/iw_cxgb_user.h>
+static int
+cxio_rdma_get_mem(struct cxio_rdev *rdev, struct ch_mem_range *m)
+{
+ struct adapter *sc = rdev->adap;
+ struct mc7 *mem;
+
+ if ((m->addr & 7) || (m->len & 7))
+ return (EINVAL);
+ if (m->mem_id == MEM_CM)
+ mem = &sc->cm;
+ else if (m->mem_id == MEM_PMRX)
+ mem = &sc->pmrx;
+ else if (m->mem_id == MEM_PMTX)
+ mem = &sc->pmtx;
+ else
+ return (EINVAL);
+
+ return (t3_mc7_bd_read(mem, m->addr/8, m->len/8, (u64 *)m->buf));
+}
+
void cxio_dump_tpt(struct cxio_rdev *rdev, uint32_t stag)
{
- struct ch_mem_range *m;
+ struct ch_mem_range m;
u64 *data;
+ u32 addr;
int rc;
int size = 32;
- m = kmalloc(sizeof(*m) + size, M_NOWAIT);
- if (!m) {
+ m.buf = malloc(size, M_DEVBUF, M_NOWAIT);
+ if (m.buf == NULL) {
CTR1(KTR_IW_CXGB, "%s couldn't allocate memory.", __FUNCTION__);
return;
}
- m->mem_id = MEM_PMRX;
- m->addr = (stag>>8) * 32 + rdev->rnic_info.tpt_base;
- m->len = size;
- CTR3(KTR_IW_CXGB, "%s TPT addr 0x%x len %d", __FUNCTION__, m->addr, m->len);
- rc = rdev->t3cdev_p->ctl(rdev->t3cdev_p, RDMA_GET_MEM, m);
+ m.mem_id = MEM_PMRX;
+ m.addr = (stag >> 8) * 32 + rdev->rnic_info.tpt_base;
+ m.len = size;
+ CTR3(KTR_IW_CXGB, "%s TPT addr 0x%x len %d", __FUNCTION__, m.addr, m.len);
+
+ rc = cxio_rdma_get_mem(rdev, &m);
if (rc) {
CTR2(KTR_IW_CXGB, "%s toectl returned error %d", __FUNCTION__, rc);
- free(m, M_DEVBUF);
+ free(m.buf, M_DEVBUF);
return;
}
- data = (u64 *)m->buf;
+ data = (u64 *)m.buf;
+ addr = m.addr;
while (size > 0) {
- CTR2(KTR_IW_CXGB, "TPT %08x: %016llx", m->addr, (unsigned long long) *data);
+ CTR2(KTR_IW_CXGB, "TPT %08x: %016llx", addr, (unsigned long long) *data);
size -= 8;
data++;
- m->addr += 8;
+ addr += 8;
}
- free(m, M_DEVBUF);
+ free(m.buf, M_DEVBUF);
}
void cxio_dump_pbl(struct cxio_rdev *rdev, uint32_t pbl_addr, uint32_t len, u8 shift)
{
- struct ch_mem_range *m;
+ struct ch_mem_range m;
u64 *data;
+ u32 addr;
int rc;
int size, npages;
shift += 12;
npages = (len + (1ULL << shift) - 1) >> shift;
size = npages * sizeof(u64);
-
- m = kmalloc(sizeof(*m) + size, M_NOWAIT);
- if (!m) {
+ m.buf = malloc(size, M_DEVBUF, M_NOWAIT);
+ if (m.buf == NULL) {
CTR1(KTR_IW_CXGB, "%s couldn't allocate memory.", __FUNCTION__);
return;
}
- m->mem_id = MEM_PMRX;
- m->addr = pbl_addr;
- m->len = size;
+ m.mem_id = MEM_PMRX;
+ m.addr = pbl_addr;
+ m.len = size;
CTR4(KTR_IW_CXGB, "%s PBL addr 0x%x len %d depth %d",
- __FUNCTION__, m->addr, m->len, npages);
- rc = rdev->t3cdev_p->ctl(rdev->t3cdev_p, RDMA_GET_MEM, m);
+ __FUNCTION__, m.addr, m.len, npages);
+
+ rc = cxio_rdma_get_mem(rdev, &m);
if (rc) {
CTR2(KTR_IW_CXGB, "%s toectl returned error %d", __FUNCTION__, rc);
- free(m, M_DEVBUF);
+ free(m.buf, M_DEVBUF);
return;
}
- data = (u64 *)m->buf;
+ data = (u64 *)m.buf;
+ addr = m.addr;
while (size > 0) {
- CTR2(KTR_IW_CXGB, "PBL %08x: %016llx", m->addr, (unsigned long long) *data);
+ CTR2(KTR_IW_CXGB, "PBL %08x: %016llx", addr, (unsigned long long) *data);
size -= 8;
data++;
- m->addr += 8;
+ addr += 8;
}
- free(m, M_DEVBUF);
+ free(m.buf, M_DEVBUF);
}
void cxio_dump_wqe(union t3_wr *wqe)
@@ -175,70 +203,76 @@ void cxio_dump_wce(struct t3_cqe *wce)
void cxio_dump_rqt(struct cxio_rdev *rdev, uint32_t hwtid, int nents)
{
- struct ch_mem_range *m;
+ struct ch_mem_range m;
int size = nents * 64;
u64 *data;
+ u32 addr;
int rc;
- m = kmalloc(sizeof(*m) + size, M_NOWAIT);
- if (!m) {
+ m.buf = malloc(size, M_DEVBUF, M_NOWAIT);
+ if (m.buf == NULL) {
CTR1(KTR_IW_CXGB, "%s couldn't allocate memory.", __FUNCTION__);
return;
}
- m->mem_id = MEM_PMRX;
- m->addr = ((hwtid)<<10) + rdev->rnic_info.rqt_base;
- m->len = size;
- CTR3(KTR_IW_CXGB, "%s RQT addr 0x%x len %d", __FUNCTION__, m->addr, m->len);
- rc = rdev->t3cdev_p->ctl(rdev->t3cdev_p, RDMA_GET_MEM, m);
+ m.mem_id = MEM_PMRX;
+ m.addr = ((hwtid)<<10) + rdev->rnic_info.rqt_base;
+ m.len = size;
+ CTR3(KTR_IW_CXGB, "%s RQT addr 0x%x len %d", __FUNCTION__, m.addr, m.len);
+
+ rc = cxio_rdma_get_mem(rdev, &m);
if (rc) {
CTR2(KTR_IW_CXGB, "%s toectl returned error %d", __FUNCTION__, rc);
- free(m, M_DEVBUF);
+ free(m.buf, M_DEVBUF);
return;
}
- data = (u64 *)m->buf;
+ data = (u64 *)m.buf;
+ addr = m.addr;
while (size > 0) {
- CTR2(KTR_IW_CXGB, "RQT %08x: %016llx", m->addr, (unsigned long long) *data);
+ CTR2(KTR_IW_CXGB, "RQT %08x: %016llx", addr, (unsigned long long) *data);
size -= 8;
data++;
- m->addr += 8;
+ addr += 8;
}
- free(m, M_DEVBUF);
+ free(m.buf, M_DEVBUF);
}
void cxio_dump_tcb(struct cxio_rdev *rdev, uint32_t hwtid)
{
- struct ch_mem_range *m;
+ struct ch_mem_range m;
int size = TCB_SIZE;
uint32_t *data;
+ uint32_t addr;
int rc;
- m = kmalloc(sizeof(*m) + size, M_NOWAIT);
- if (!m) {
+ m.buf = malloc(size, M_DEVBUF, M_NOWAIT);
+ if (m.buf == NULL) {
CTR1(KTR_IW_CXGB, "%s couldn't allocate memory.", __FUNCTION__);
return;
}
- m->mem_id = MEM_CM;
- m->addr = hwtid * size;
- m->len = size;
- CTR3(KTR_IW_CXGB, "%s TCB %d len %d", __FUNCTION__, m->addr, m->len);
- rc = rdev->t3cdev_p->ctl(rdev->t3cdev_p, RDMA_GET_MEM, m);
+ m.mem_id = MEM_CM;
+ m.addr = hwtid * size;
+ m.len = size;
+ CTR3(KTR_IW_CXGB, "%s TCB %d len %d", __FUNCTION__, m.addr, m.len);
+
+ rc = cxio_rdma_get_mem(rdev, &m);
if (rc) {
CTR2(KTR_IW_CXGB, "%s toectl returned error %d", __FUNCTION__, rc);
- free(m, M_DEVBUF);
+ free(m.buf, M_DEVBUF);
return;
}
- data = (uint32_t *)m->buf;
+ data = (uint32_t *)m.buf;
+ addr = m.addr;
while (size > 0) {
printf("%2u: %08x %08x %08x %08x %08x %08x %08x %08x\n",
- m->addr,
+ addr,
*(data+2), *(data+3), *(data),*(data+1),
*(data+6), *(data+7), *(data+4), *(data+5));
size -= 32;
data += 8;
- m->addr += 32;
+ addr += 32;
}
- free(m, M_DEVBUF);
+ free(m.buf, M_DEVBUF);
}
#endif
diff --git a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_ev.c b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_ev.c
index cf0891d..65b7a2d 100644
--- a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_ev.c
+++ b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_ev.c
@@ -29,11 +29,13 @@ POSSIBILITY OF SUCH DAMAGE.
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#include "opt_inet.h"
+
+#ifdef TCP_OFFLOAD
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/bus.h>
-#include <sys/module.h>
#include <sys/pciio.h>
#include <sys/conf.h>
#include <machine/bus.h>
@@ -59,9 +61,11 @@ __FBSDID("$FreeBSD$");
#include <netinet/in.h>
-#include <contrib/rdma/ib_verbs.h>
-#include <contrib/rdma/ib_umem.h>
-#include <contrib/rdma/ib_user_verbs.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_umem.h>
+#include <rdma/ib_user_verbs.h>
+#include <linux/idr.h>
+#include <ulp/iw_cxgb/iw_cxgb_ib_intfc.h>
#include <cxgb_include.h>
#include <ulp/iw_cxgb/iw_cxgb_wr.h>
@@ -81,11 +85,22 @@ post_qp_event(struct iwch_dev *rnicp, struct iwch_qp *qhp, struct iwch_cq *chp,
struct ib_event event;
struct iwch_qp_attributes attrs;
+ mtx_lock(&rnicp->lock);
+
+ if (!qhp) {
+ CTR3(KTR_IW_CXGB, "%s unaffiliated error 0x%x qpid 0x%x\n",
+ __func__, CQE_STATUS(rsp_msg->cqe),
+ CQE_QPID(rsp_msg->cqe));
+ mtx_unlock(&rnicp->lock);
+ return;
+ }
+
if ((qhp->attr.state == IWCH_QP_STATE_ERROR) ||
(qhp->attr.state == IWCH_QP_STATE_TERMINATE)) {
CTR4(KTR_IW_CXGB, "%s AE received after RTS - "
"qp state %d qpid 0x%x status 0x%x", __FUNCTION__,
qhp->attr.state, qhp->wq.qpid, CQE_STATUS(rsp_msg->cqe));
+ mtx_unlock(&rnicp->lock);
return;
}
@@ -95,6 +110,15 @@ post_qp_event(struct iwch_dev *rnicp, struct iwch_qp *qhp, struct iwch_cq *chp,
CQE_STATUS(rsp_msg->cqe), CQE_TYPE(rsp_msg->cqe),
CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe));
+ mtx_unlock(&rnicp->lock);
+
+ if (qhp->attr.state == IWCH_QP_STATE_RTS) {
+ attrs.next_state = IWCH_QP_STATE_TERMINATE;
+ iwch_modify_qp(qhp->rhp, qhp, IWCH_QP_ATTR_NEXT_STATE,
+ &attrs, 1);
+ if (send_term)
+ iwch_post_terminate(qhp, rsp_msg);
+ }
event.event = ib_event;
event.device = chp->ibcq.device;
@@ -106,25 +130,17 @@ post_qp_event(struct iwch_dev *rnicp, struct iwch_qp *qhp, struct iwch_cq *chp,
if (qhp->ibqp.event_handler)
(*qhp->ibqp.event_handler)(&event, qhp->ibqp.qp_context);
- if (qhp->attr.state == IWCH_QP_STATE_RTS) {
- attrs.next_state = IWCH_QP_STATE_TERMINATE;
- iwch_modify_qp(qhp->rhp, qhp, IWCH_QP_ATTR_NEXT_STATE,
- &attrs, 1);
- if (send_term)
- iwch_post_terminate(qhp, rsp_msg);
- }
+ (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context);
}
void
-iwch_ev_dispatch(struct cxio_rdev *rdev_p, struct mbuf *m)
+iwch_ev_dispatch(struct iwch_dev *rnicp, struct mbuf *m)
{
- struct iwch_dev *rnicp;
struct respQ_msg_t *rsp_msg = (struct respQ_msg_t *) m->m_data;
struct iwch_cq *chp;
struct iwch_qp *qhp;
u32 cqid = RSPQ_CQID(rsp_msg);
- rnicp = (struct iwch_dev *) rdev_p->ulp;
mtx_lock(&rnicp->lock);
chp = get_chp(rnicp, cqid);
qhp = get_qhp(rnicp, CQE_QPID(rsp_msg->cqe));
@@ -136,7 +152,7 @@ iwch_ev_dispatch(struct cxio_rdev *rdev_p, struct mbuf *m)
CQE_TYPE(rsp_msg->cqe), CQE_WRID_HI(rsp_msg->cqe),
CQE_WRID_LOW(rsp_msg->cqe));
mtx_unlock(&rnicp->lock);
- goto out;
+ return;
}
iwch_qp_add_ref(&qhp->ibqp);
mtx_lock(&chp->lock);
@@ -200,12 +216,6 @@ iwch_ev_dispatch(struct cxio_rdev *rdev_p, struct mbuf *m)
case TPT_ERR_BOUND:
case TPT_ERR_INVALIDATE_SHARED_MR:
case TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND:
- log(LOG_ERR, "%s - CQE Err qpid 0x%x opcode %d status 0x%x "
- "type %d wrid.hi 0x%x wrid.lo 0x%x \n", __FUNCTION__,
- CQE_QPID(rsp_msg->cqe), CQE_OPCODE(rsp_msg->cqe),
- CQE_STATUS(rsp_msg->cqe), CQE_TYPE(rsp_msg->cqe),
- CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe));
- (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context);
post_qp_event(rnicp, qhp, chp, rsp_msg, IB_EVENT_QP_ACCESS_ERR, 1);
break;
@@ -248,6 +258,5 @@ done:
wakeup(chp);
mtx_unlock(&chp->lock);
iwch_qp_rem_ref(&qhp->ibqp);
-out:
- m_free(m);
}
+#endif
diff --git a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_hal.c b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_hal.c
index cf42e38..2427263 100644
--- a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_hal.c
+++ b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_hal.c
@@ -1,4 +1,3 @@
-
/**************************************************************************
Copyright (c) 2007, Chelsio Inc.
@@ -30,11 +29,13 @@ POSSIBILITY OF SUCH DAMAGE.
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#include "opt_inet.h"
+
+#ifdef TCP_OFFLOAD
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/bus.h>
-#include <sys/module.h>
#include <sys/pciio.h>
#include <sys/conf.h>
#include <machine/bus.h>
@@ -47,6 +48,8 @@ __FBSDID("$FreeBSD$");
#include <sys/linker.h>
#include <sys/firmware.h>
#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/sockopt.h>
#include <sys/sockio.h>
#include <sys/smp.h>
#include <sys/sysctl.h>
@@ -59,12 +62,25 @@ __FBSDID("$FreeBSD$");
#include <vm/vm.h>
#include <vm/pmap.h>
+#include <net/route.h>
+#include <netinet/in_systm.h>
#include <netinet/in.h>
-
-#include <contrib/rdma/ib_verbs.h>
+#include <netinet/in_pcb.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet/tcp_var.h>
+#include <netinet/toecore.h>
+#include <netinet/tcp.h>
+#include <netinet/tcpip.h>
+
+#include <rdma/ib_verbs.h>
+#include <linux/idr.h>
+#include <ulp/iw_cxgb/iw_cxgb_ib_intfc.h>
#include <cxgb_include.h>
#include <ulp/tom/cxgb_l2t.h>
+#include <ulp/tom/cxgb_tom.h>
+#include <ulp/tom/cxgb_toepcb.h>
#include <ulp/iw_cxgb/iw_cxgb_wr.h>
#include <ulp/iw_cxgb/iw_cxgb_hal.h>
#include <ulp/iw_cxgb/iw_cxgb_provider.h>
@@ -72,29 +88,21 @@ __FBSDID("$FreeBSD$");
#include <ulp/iw_cxgb/iw_cxgb.h>
#include <ulp/iw_cxgb/iw_cxgb_resource.h>
-static TAILQ_HEAD( ,cxio_rdev) rdev_list;
-static cxio_hal_ev_callback_func_t cxio_ev_cb = NULL;
-
-static struct cxio_rdev *
-cxio_hal_find_rdev_by_name(char *dev_name)
+/* Response queue used for RDMA events. */
+#define ASYNC_NOTIF_RSPQ 0
+static inline int
+cxio_rdma_cq_setup(struct cxio_rdev *rdev_p, unsigned id, uint64_t base_addr,
+ unsigned size, unsigned ovfl_mode, unsigned credits, unsigned credit_thres)
{
- struct cxio_rdev *rdev;
+ struct adapter *sc = rdev_p->adap;
+ int rc;
- TAILQ_FOREACH(rdev, &rdev_list, entry)
- if (!strcmp(rdev->dev_name, dev_name))
- return rdev;
- return NULL;
-}
-
-struct cxio_rdev *
-cxio_hal_find_rdev_by_t3cdev(struct t3cdev *tdev)
-{
- struct cxio_rdev *rdev;
+ mtx_lock_spin(&sc->sge.reg_lock);
+ rc = -t3_sge_init_cqcntxt(sc, id, base_addr, size, ASYNC_NOTIF_RSPQ,
+ ovfl_mode, credits, credit_thres);
+ mtx_unlock_spin(&sc->sge.reg_lock);
- TAILQ_FOREACH(rdev, &rdev_list, entry)
- if (rdev->t3cdev_p == tdev)
- return rdev;
- return NULL;
+ return (rc);
}
int
@@ -104,12 +112,14 @@ cxio_hal_cq_op(struct cxio_rdev *rdev_p, struct t3_cq *cq,
int ret;
struct t3_cqe *cqe;
u32 rptr;
+ struct adapter *sc = rdev_p->adap;
+
+ if (op != CQ_CREDIT_UPDATE)
+ credit = 0;
- struct rdma_cq_op setup;
- setup.id = cq->cqid;
- setup.credits = (op == CQ_CREDIT_UPDATE) ? credit : 0;
- setup.op = op;
- ret = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_OP, &setup);
+ mtx_lock_spin(&sc->sge.reg_lock);
+ ret = t3_sge_cqcntxt_op(sc, cq->cqid, op, credit);
+ mtx_unlock_spin(&sc->sge.reg_lock);
if ((ret < 0) || (op == CQ_CREDIT_UPDATE))
return (ret);
@@ -140,30 +150,26 @@ cxio_hal_cq_op(struct cxio_rdev *rdev_p, struct t3_cq *cq,
while (!CQ_VLD_ENTRY(rptr, cq->size_log2, cqe)) {
DELAY(1);
if (i++ > 1000000) {
- PANIC_IF(1);
+ struct adapter *sc = rdev_p->adap;
+
log(LOG_ERR, "%s: stalled rnic\n",
- rdev_p->dev_name);
+ device_get_nameunit(sc->dev));
+ PANIC_IF(1);
return (-EIO);
}
}
- return 1;
+ return (1);
}
- return 0;
+ return (0);
}
static int
cxio_hal_clear_cq_ctx(struct cxio_rdev *rdev_p, u32 cqid)
{
- struct rdma_cq_setup setup;
- setup.id = cqid;
- setup.base_addr = 0; /* NULL address */
- setup.size = 0; /* disaable the CQ */
- setup.credits = 0;
- setup.credit_thres = 0;
- setup.ovfl_mode = 0;
- return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup));
+
+ return (cxio_rdma_cq_setup(rdev_p, cqid, 0, 0, 0, 0, 0));
}
static int
@@ -171,43 +177,38 @@ cxio_hal_clear_qp_ctx(struct cxio_rdev *rdev_p, u32 qpid)
{
u64 sge_cmd;
struct t3_modify_qp_wr *wqe;
- struct mbuf *m = m_gethdr(MT_DATA, M_NOWAIT);
+ struct mbuf *m;
+
+ m = M_GETHDR_OFLD(0, CPL_PRIORITY_CONTROL, wqe);
if (m == NULL) {
CTR1(KTR_IW_CXGB, "%s m_gethdr failed", __FUNCTION__);
return (-ENOMEM);
}
wqe = mtod(m, struct t3_modify_qp_wr *);
- m->m_len = m->m_pkthdr.len = sizeof(*wqe);
memset(wqe, 0, sizeof(*wqe));
build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, 3, 0, qpid, 7);
wqe->flags = htobe32(MODQP_WRITE_EC);
sge_cmd = qpid << 8 | 3;
wqe->sge_cmd = htobe64(sge_cmd);
- m_set_priority(m, CPL_PRIORITY_CONTROL);
- m_set_sgl(m, NULL);
- m_set_sgllen(m, 0);
- return (cxgb_ofld_send(rdev_p->t3cdev_p, m));
+ return t3_offload_tx(rdev_p->adap, m);
}
int
-cxio_create_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq)
+cxio_create_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq, int kernel)
{
- struct rdma_cq_setup setup;
int size = (1UL << (cq->size_log2)) * sizeof(struct t3_cqe);
+ size += 1; /* one extra page for storing cq-in-err state */
cq->cqid = cxio_hal_get_cqid(rdev_p->rscp);
if (!cq->cqid)
return (-ENOMEM);
- cq->sw_queue = malloc(size, M_DEVBUF, M_NOWAIT|M_ZERO);
- if (!cq->sw_queue)
- return (-ENOMEM);
-#if 0
- cq->queue = dma_alloc_coherent(rdev_p->rnic_info.pdev,
- (1UL << (cq->size_log2)) *
- sizeof(struct t3_cqe),
- &(cq->dma_addr), M_NOWAIT);
-#else
- cq->queue = contigmalloc((1UL << (cq->size_log2))*sizeof(struct t3_cqe),
+ if (kernel) {
+ cq->sw_queue = malloc(size, M_DEVBUF, M_NOWAIT|M_ZERO);
+ if (!cq->sw_queue)
+ return (-ENOMEM);
+ }
+
+ cq->queue = contigmalloc(size,
M_DEVBUF, M_NOWAIT, 0ul, ~0ul, 4096, 0);
if (cq->queue)
cq->dma_addr = vtophys(cq->queue);
@@ -215,35 +216,10 @@ cxio_create_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq)
free(cq->sw_queue, M_DEVBUF);
return (-ENOMEM);
}
-#endif
-
-#ifdef notyet
- pci_unmap_addr_set(cq, mapping, cq->dma_addr);
-#endif
memset(cq->queue, 0, size);
- setup.id = cq->cqid;
- setup.base_addr = (u64) (cq->dma_addr);
- setup.size = 1UL << cq->size_log2;
- setup.credits = 65535;
- setup.credit_thres = 1;
- if (rdev_p->t3cdev_p->type != T3A)
- setup.ovfl_mode = 0;
- else
- setup.ovfl_mode = 1;
- return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup));
-}
-int
-cxio_resize_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq)
-{
- struct rdma_cq_setup setup;
- setup.id = cq->cqid;
- setup.base_addr = (u64) (cq->dma_addr);
- setup.size = 1UL << cq->size_log2;
- setup.credits = setup.size;
- setup.credit_thres = setup.size; /* TBD: overflow recovery */
- setup.ovfl_mode = 1;
- return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup));
+ return (cxio_rdma_cq_setup(rdev_p, cq->cqid, cq->dma_addr,
+ 1UL << cq->size_log2, 0, 65535, 1));
}
static u32
@@ -325,7 +301,7 @@ cxio_create_qp(struct cxio_rdev *rdev_p, u32 kernel_domain,
if (!wq->qpid)
return (-ENOMEM);
- wq->rq = malloc(depth * sizeof(u64), M_DEVBUF, M_NOWAIT|M_ZERO);
+ wq->rq = malloc(depth * sizeof(struct t3_swrq), M_DEVBUF, M_NOWAIT|M_ZERO);
if (!wq->rq)
goto err1;
@@ -336,28 +312,19 @@ cxio_create_qp(struct cxio_rdev *rdev_p, u32 kernel_domain,
wq->sq = malloc(depth * sizeof(struct t3_swsq), M_DEVBUF, M_NOWAIT|M_ZERO);
if (!wq->sq)
goto err3;
-#if 0
- wq->queue = dma_alloc_coherent(rdev_p->rnic_info.pdev,
- depth * sizeof(union t3_wr),
- &(wq->dma_addr), M_NOWAIT);
-#else
wq->queue = contigmalloc(depth *sizeof(union t3_wr),
M_DEVBUF, M_NOWAIT, 0ul, ~0ul, 4096, 0);
if (wq->queue)
wq->dma_addr = vtophys(wq->queue);
-
-#endif
- if (!wq->queue)
+ else
goto err4;
memset(wq->queue, 0, depth * sizeof(union t3_wr));
-#ifdef notyet
- pci_unmap_addr_set(wq, mapping, wq->dma_addr);
-#endif
wq->doorbell = rdev_p->rnic_info.kdb_addr;
if (!kernel_domain)
wq->udb = (u64)rdev_p->rnic_info.udbell_physbase +
(wq->qpid << rdev_p->qpshift);
+ wq->rdev = rdev_p;
CTR4(KTR_IW_CXGB, "%s qpid 0x%x doorbell 0x%p udb 0x%llx", __FUNCTION__,
wq->qpid, wq->doorbell, (unsigned long long) wq->udb);
return 0;
@@ -431,10 +398,11 @@ insert_recv_cqe(struct t3_wq *wq, struct t3_cq *cq)
cq->sw_wptr++;
}
-void
+int
cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count)
{
u32 ptr;
+ int flushed = 0;
CTR3(KTR_IW_CXGB, "%s wq %p cq %p", __FUNCTION__, wq, cq);
@@ -442,8 +410,11 @@ cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count)
CTR4(KTR_IW_CXGB, "%s rq_rptr %u rq_wptr %u skip count %u", __FUNCTION__,
wq->rq_rptr, wq->rq_wptr, count);
ptr = wq->rq_rptr + count;
- while (ptr++ != wq->rq_wptr)
+ while (ptr++ != wq->rq_wptr) {
insert_recv_cqe(wq, cq);
+ flushed++;
+ }
+ return flushed;
}
static void
@@ -468,19 +439,22 @@ insert_sq_cqe(struct t3_wq *wq, struct t3_cq *cq,
cq->sw_wptr++;
}
-void
+int
cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count)
{
__u32 ptr;
+ int flushed = 0;
struct t3_swsq *sqp = wq->sq + Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2);
ptr = wq->sq_rptr + count;
- sqp += count;
+ sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2);
while (ptr != wq->sq_wptr) {
insert_sq_cqe(wq, cq, sqp);
- sqp++;
ptr++;
+ sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2);
+ flushed++;
}
+ return flushed;
}
/*
@@ -516,7 +490,7 @@ static int cqe_completes_wr(struct t3_cqe *cqe, struct t3_wq *wq)
if ((CQE_OPCODE(*cqe) == T3_READ_RESP) && SQ_TYPE(*cqe))
return 0;
- if ((CQE_OPCODE(*cqe) == T3_SEND) && RQ_TYPE(*cqe) &&
+ if (CQE_OPCODE(*cqe) && RQ_TYPE(*cqe) &&
Q_EMPTY(wq->rq_rptr, wq->rq_wptr))
return 0;
@@ -563,16 +537,8 @@ cxio_count_rcqes(struct t3_cq *cq, struct t3_wq *wq, int *count)
static int
cxio_hal_init_ctrl_cq(struct cxio_rdev *rdev_p)
{
- struct rdma_cq_setup setup;
- setup.id = 0;
- setup.base_addr = 0; /* NULL address */
- setup.size = 1; /* enable the CQ */
- setup.credits = 0;
-
- /* force SGE to redirect to RspQ and interrupt */
- setup.credit_thres = 0;
- setup.ovfl_mode = 1;
- return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup));
+
+ return (cxio_rdma_cq_setup(rdev_p, 0, 0, 1, 1, 0, 0));
}
static int
@@ -584,41 +550,28 @@ cxio_hal_init_ctrl_qp(struct cxio_rdev *rdev_p)
struct t3_modify_qp_wr *wqe;
struct mbuf *m;
- m = m_gethdr(MT_DATA, M_NOWAIT);
+ m = M_GETHDR_OFLD(0, CPL_PRIORITY_CONTROL, wqe);
if (m == NULL) {
CTR1(KTR_IW_CXGB, "%s m_gethdr failed", __FUNCTION__);
- return (-ENOMEM);
+ return (ENOMEM);
}
err = cxio_hal_init_ctrl_cq(rdev_p);
if (err) {
CTR2(KTR_IW_CXGB, "%s err %d initializing ctrl_cq", __FUNCTION__, err);
goto err;
}
-#if 0
- rdev_p->ctrl_qp.workq = dma_alloc_coherent(
- rdev_p->rnic_info.pdev,
- (1 << T3_CTRL_QP_SIZE_LOG2) *
- sizeof(union t3_wr),
- &(rdev_p->ctrl_qp.dma_addr),
- M_NOWAIT);
-#else
+
rdev_p->ctrl_qp.workq = contigmalloc((1 << T3_CTRL_QP_SIZE_LOG2)
*sizeof(union t3_wr), M_DEVBUF, M_NOWAIT, 0ul, ~0ul, 4096, 0);
if (rdev_p->ctrl_qp.workq)
rdev_p->ctrl_qp.dma_addr = vtophys(rdev_p->ctrl_qp.workq);
-
-#endif
-
- if (!rdev_p->ctrl_qp.workq) {
+ else {
CTR1(KTR_IW_CXGB, "%s dma_alloc_coherent failed", __FUNCTION__);
- err = -ENOMEM;
+ err = ENOMEM;
goto err;
}
-#if 0
- pci_unmap_addr_set(&rdev_p->ctrl_qp, mapping,
- rdev_p->ctrl_qp.dma_addr);
-#endif
- rdev_p->ctrl_qp.doorbell = (void /*__iomem */ *)rdev_p->rnic_info.kdb_addr;
+
+ rdev_p->ctrl_qp.doorbell = rdev_p->rnic_info.kdb_addr;
memset(rdev_p->ctrl_qp.workq, 0,
(1 << T3_CTRL_QP_SIZE_LOG2) * sizeof(union t3_wr));
@@ -637,10 +590,8 @@ cxio_hal_init_ctrl_qp(struct cxio_rdev *rdev_p)
ctx1 |= ((u64) (V_EC_BASE_HI((u32) base_addr & 0xf) | V_EC_RESPQ(0) |
V_EC_TYPE(0) | V_EC_GEN(1) |
V_EC_UP_TOKEN(T3_CTL_QP_TID) | F_EC_VALID)) << 32;
- wqe = mtod(m, struct t3_modify_qp_wr *);
- m->m_len = m->m_pkthdr.len = sizeof(*wqe);
memset(wqe, 0, sizeof(*wqe));
- build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, 3, 0,
+ build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, 0, 0,
T3_CTL_QP_TID, 7);
wqe->flags = htobe32(MODQP_WRITE_EC);
sge_cmd = (3ULL << 56) | FW_RI_SGEEC_START << 8 | 3;
@@ -650,12 +601,9 @@ cxio_hal_init_ctrl_qp(struct cxio_rdev *rdev_p)
CTR3(KTR_IW_CXGB, "CtrlQP dma_addr 0x%llx workq %p size %d",
(unsigned long long) rdev_p->ctrl_qp.dma_addr,
rdev_p->ctrl_qp.workq, 1 << T3_CTRL_QP_SIZE_LOG2);
- m_set_priority(m, CPL_PRIORITY_CONTROL);
- m_set_sgl(m, NULL);
- m_set_sgllen(m, 0);
- return (cxgb_ofld_send(rdev_p->t3cdev_p, m));
+ return t3_offload_tx(rdev_p->adap, m);
err:
- m_free(m);
+ m_freem(m);
return err;
}
@@ -681,7 +629,7 @@ cxio_hal_destroy_ctrl_qp(struct cxio_rdev *rdev_p)
*/
static int
cxio_hal_ctrl_qp_write_mem(struct cxio_rdev *rdev_p, u32 addr,
- u32 len, void *data, int completion)
+ u32 len, void *data)
{
u32 i, nr_wqe, copy_len;
u8 *copy_data;
@@ -718,7 +666,7 @@ cxio_hal_ctrl_qp_write_mem(struct cxio_rdev *rdev_p, u32 addr,
flag = 0;
if (i == (nr_wqe - 1)) {
/* last WQE */
- flag = completion ? T3_COMPLETION_FLAG : 0;
+ flag = T3_COMPLETION_FLAG;
if (len % 32)
utx_len = len / 32 + 1;
else
@@ -786,14 +734,13 @@ static int
__cxio_tpt_op(struct cxio_rdev *rdev_p, u32 reset_tpt_entry,
u32 *stag, u8 stag_state, u32 pdid,
enum tpt_mem_type type, enum tpt_mem_perm perm,
- u32 zbva, u64 to, u32 len, u8 page_size, __be64 *pbl,
- u32 *pbl_size, u32 *pbl_addr)
+ u32 zbva, u64 to, u32 len, u8 page_size,
+ u32 pbl_size, u32 pbl_addr)
{
int err;
struct tpt_entry tpt;
u32 stag_idx;
u32 wptr;
- int rereg = (*stag != T3_STAG_UNSET);
stag_state = stag_state > 0;
stag_idx = (*stag) >> 8;
@@ -807,30 +754,8 @@ __cxio_tpt_op(struct cxio_rdev *rdev_p, u32 reset_tpt_entry,
CTR5(KTR_IW_CXGB, "%s stag_state 0x%0x type 0x%0x pdid 0x%0x, stag_idx 0x%x",
__FUNCTION__, stag_state, type, pdid, stag_idx);
- if (reset_tpt_entry)
- cxio_hal_pblpool_free(rdev_p, *pbl_addr, *pbl_size << 3);
- else if (!rereg) {
- *pbl_addr = cxio_hal_pblpool_alloc(rdev_p, *pbl_size << 3);
- if (!*pbl_addr) {
- return (-ENOMEM);
- }
- }
-
mtx_lock(&rdev_p->ctrl_qp.lock);
- /* write PBL first if any - update pbl only if pbl list exist */
- if (pbl) {
-
- CTR4(KTR_IW_CXGB, "%s *pdb_addr 0x%x, pbl_base 0x%x, pbl_size %d",
- __FUNCTION__, *pbl_addr, rdev_p->rnic_info.pbl_base,
- *pbl_size);
- err = cxio_hal_ctrl_qp_write_mem(rdev_p,
- (*pbl_addr >> 5),
- (*pbl_size << 3), pbl, 0);
- if (err)
- goto ret;
- }
-
/* write TPT entry */
if (reset_tpt_entry)
memset(&tpt, 0, sizeof(tpt));
@@ -845,23 +770,23 @@ __cxio_tpt_op(struct cxio_rdev *rdev_p, u32 reset_tpt_entry,
V_TPT_ADDR_TYPE((zbva ? TPT_ZBTO : TPT_VATO)) |
V_TPT_PAGE_SIZE(page_size));
tpt.rsvd_pbl_addr = reset_tpt_entry ? 0 :
- htobe32(V_TPT_PBL_ADDR(PBL_OFF(rdev_p, *pbl_addr)>>3));
+ htobe32(V_TPT_PBL_ADDR(PBL_OFF(rdev_p, pbl_addr)>>3));
tpt.len = htobe32(len);
tpt.va_hi = htobe32((u32) (to >> 32));
tpt.va_low_or_fbo = htobe32((u32) (to & 0xFFFFFFFFULL));
tpt.rsvd_bind_cnt_or_pstag = 0;
tpt.rsvd_pbl_size = reset_tpt_entry ? 0 :
- htobe32(V_TPT_PBL_SIZE((*pbl_size) >> 2));
+ htobe32(V_TPT_PBL_SIZE((pbl_size) >> 2));
}
err = cxio_hal_ctrl_qp_write_mem(rdev_p,
stag_idx +
(rdev_p->rnic_info.tpt_base >> 5),
- sizeof(tpt), &tpt, 1);
+ sizeof(tpt), &tpt);
/* release the stag index to free pool */
if (reset_tpt_entry)
cxio_hal_put_stag(rdev_p->rscp, stag_idx);
-ret:
+
wptr = rdev_p->ctrl_qp.wptr;
mtx_unlock(&rdev_p->ctrl_qp.lock);
if (!err)
@@ -872,61 +797,90 @@ ret:
return err;
}
+int cxio_write_pbl(struct cxio_rdev *rdev_p, __be64 *pbl,
+ u32 pbl_addr, u32 pbl_size)
+{
+ u32 wptr;
+ int err;
+
+ CTR4(KTR_IW_CXGB, "%s *pdb_addr 0x%x, pbl_base 0x%x, pbl_size %d",
+ __func__, pbl_addr, rdev_p->rnic_info.pbl_base,
+ pbl_size);
+
+ mtx_lock(&rdev_p->ctrl_qp.lock);
+ err = cxio_hal_ctrl_qp_write_mem(rdev_p, pbl_addr >> 5, pbl_size << 3,
+ pbl);
+ wptr = rdev_p->ctrl_qp.wptr;
+ mtx_unlock(&rdev_p->ctrl_qp.lock);
+ if (err)
+ return err;
+
+ if (cxio_wait(&rdev_p->ctrl_qp,
+ &rdev_p->ctrl_qp.lock,
+ SEQ32_GE(rdev_p->ctrl_qp.rptr, wptr)))
+ return ERESTART;
+
+ return 0;
+}
+
int
cxio_register_phys_mem(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid,
enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
- u8 page_size, __be64 *pbl, u32 *pbl_size,
- u32 *pbl_addr)
+ u8 page_size, u32 pbl_size, u32 pbl_addr)
{
*stag = T3_STAG_UNSET;
return __cxio_tpt_op(rdev_p, 0, stag, 1, pdid, TPT_NON_SHARED_MR, perm,
- zbva, to, len, page_size, pbl, pbl_size, pbl_addr);
+ zbva, to, len, page_size, pbl_size, pbl_addr);
}
int
cxio_reregister_phys_mem(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid,
enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
- u8 page_size, __be64 *pbl, u32 *pbl_size,
- u32 *pbl_addr)
+ u8 page_size, u32 pbl_size, u32 pbl_addr)
{
return __cxio_tpt_op(rdev_p, 0, stag, 1, pdid, TPT_NON_SHARED_MR, perm,
- zbva, to, len, page_size, pbl, pbl_size, pbl_addr);
+ zbva, to, len, page_size, pbl_size, pbl_addr);
}
int
cxio_dereg_mem(struct cxio_rdev *rdev_p, u32 stag, u32 pbl_size,
u32 pbl_addr)
{
- return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0, NULL,
- &pbl_size, &pbl_addr);
+ return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0,
+ pbl_size, pbl_addr);
}
int
cxio_allocate_window(struct cxio_rdev *rdev_p, u32 * stag, u32 pdid)
{
- u32 pbl_size = 0;
*stag = T3_STAG_UNSET;
return __cxio_tpt_op(rdev_p, 0, stag, 0, pdid, TPT_MW, 0, 0, 0ULL, 0, 0,
- NULL, &pbl_size, NULL);
+ 0, 0);
}
int
cxio_deallocate_window(struct cxio_rdev *rdev_p, u32 stag)
{
- return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0, NULL,
- NULL, NULL);
+ return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0,
+ 0, 0);
}
int
-cxio_rdma_init(struct cxio_rdev *rdev_p, struct t3_rdma_init_attr *attr)
+cxio_rdma_init(struct cxio_rdev *rdev_p, struct t3_rdma_init_attr *attr,
+ struct socket *so)
{
struct t3_rdma_init_wr *wqe;
- struct mbuf *m = m_gethdr(MT_DATA, M_NOWAIT);
+ struct mbuf *m;
+ struct ofld_hdr *oh;
+ int rc;
+ struct tcpcb *tp;
+ struct inpcb *inp;
+ struct toepcb *toep;
+
+ m = M_GETHDR_OFLD(0, CPL_PRIORITY_DATA, wqe);
if (m == NULL)
return (-ENOMEM);
CTR2(KTR_IW_CXGB, "%s rdev_p %p", __FUNCTION__, rdev_p);
- wqe = mtod(m, struct t3_rdma_init_wr *);
- m->m_len = m->m_pkthdr.len = sizeof(*wqe);
wqe->wrh.op_seop_flags = htobe32(V_FW_RIWR_OP(T3_WR_INIT));
wqe->wrh.gen_tid_len = htobe32(V_FW_RIWR_TID(attr->tid) |
V_FW_RIWR_LEN(sizeof(*wqe) >> 3));
@@ -940,36 +894,41 @@ cxio_rdma_init(struct cxio_rdev *rdev_p, struct t3_rdma_init_attr *attr)
wqe->mpaattrs = attr->mpaattrs;
wqe->qpcaps = attr->qpcaps;
wqe->ulpdu_size = htobe16(attr->tcp_emss);
- wqe->flags = htobe32(attr->flags);
+ wqe->rqe_count = htobe16(attr->rqe_count);
+ wqe->flags_rtr_type = htobe16(attr->flags |
+ V_RTR_TYPE(attr->rtr_type) |
+ V_CHAN(attr->chan));
wqe->ord = htobe32(attr->ord);
wqe->ird = htobe32(attr->ird);
wqe->qp_dma_addr = htobe64(attr->qp_dma_addr);
wqe->qp_dma_size = htobe32(attr->qp_dma_size);
wqe->irs = htobe32(attr->irs);
- m_set_priority(m, 0); /* 0=>ToeQ; 1=>CtrlQ */
- m_set_sgl(m, NULL);
- m_set_sgllen(m, 0);
- return (cxgb_ofld_send(rdev_p->t3cdev_p, m));
-}
-void
-cxio_register_ev_cb(cxio_hal_ev_callback_func_t ev_cb)
-{
- cxio_ev_cb = ev_cb;
-}
-
-void
-cxio_unregister_ev_cb(cxio_hal_ev_callback_func_t ev_cb)
-{
- cxio_ev_cb = NULL;
+ /* XXX: bad form, fix later */
+ inp = sotoinpcb(so);
+ INP_WLOCK(inp);
+ tp = intotcpcb(inp);
+ toep = tp->t_toe;
+ oh = mtod(m, struct ofld_hdr *);
+ oh->plen = 0;
+ oh->flags |= F_HDR_DF;
+ enqueue_wr(toep, m);
+ toep->tp_wr_avail--;
+ toep->tp_wr_unacked++;
+ rc = t3_offload_tx(rdev_p->adap, m);
+ INP_WUNLOCK(inp);
+
+ return (rc);
}
static int
-cxio_hal_ev_handler(struct t3cdev *t3cdev_p, struct mbuf *m)
+cxio_hal_ev_handler(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
{
- static int cnt;
- struct cxio_rdev *rdev_p = NULL;
+ struct adapter *sc = qs->adap;
+ struct iwch_dev *rnicp = sc->iwarp_softc;
+ struct cxio_rdev *rdev_p = &rnicp->rdev;
struct respQ_msg_t *rsp_msg = (struct respQ_msg_t *) m->m_data;
+ int qpid = CQE_QPID(rsp_msg->cqe);
CTR6(KTR_IW_CXGB, "%s cq_id 0x%x cq_ptr 0x%x genbit %0x overflow %0x an %0x",
__FUNCTION__, RSPQ_CQID(rsp_msg), RSPQ_CQPTR(rsp_msg),
@@ -978,80 +937,50 @@ cxio_hal_ev_handler(struct t3cdev *t3cdev_p, struct mbuf *m)
RSPQ_SE(rsp_msg), RSPQ_NOTIFY(rsp_msg), RSPQ_CQBRANCH(rsp_msg),
RSPQ_CREDIT_THRESH(rsp_msg));
CTR4(KTR_IW_CXGB, "CQE: QPID 0x%0x type 0x%0x status 0x%0x opcode %d",
- CQE_QPID(rsp_msg->cqe),
- CQE_TYPE(rsp_msg->cqe), CQE_STATUS(rsp_msg->cqe),
- CQE_OPCODE(rsp_msg->cqe));
+ qpid, CQE_TYPE(rsp_msg->cqe), CQE_STATUS(rsp_msg->cqe),
+ CQE_OPCODE(rsp_msg->cqe));
CTR3(KTR_IW_CXGB, "len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x",
CQE_LEN(rsp_msg->cqe), CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe));
- rdev_p = (struct cxio_rdev *)t3cdev_p->ulp;
- if (!rdev_p) {
- CTR2(KTR_IW_CXGB, "%s called by t3cdev %p with null ulp", __FUNCTION__,
- t3cdev_p);
- return 0;
- }
- if (CQE_QPID(rsp_msg->cqe) == T3_CTRL_QP_ID) {
+
+ switch(qpid) {
+ case T3_CTRL_QP_ID:
mtx_lock(&rdev_p->ctrl_qp.lock);
rdev_p->ctrl_qp.rptr = CQE_WRID_LOW(rsp_msg->cqe) + 1;
wakeup(&rdev_p->ctrl_qp);
mtx_unlock(&rdev_p->ctrl_qp.lock);
- m_free(m);
- } else if (CQE_QPID(rsp_msg->cqe) == 0xfff8)
- m_free(m);
- else if (cxio_ev_cb)
- (*cxio_ev_cb) (rdev_p, m);
- else
- m_free(m);
- cnt++;
- return 0;
+ break;
+ case 0xfff8:
+ break;
+ default:
+ iwch_ev_dispatch(rnicp, m);
+ }
+
+ m_freem(m);
+ return (0);
}
/* Caller takes care of locking if needed */
int
cxio_rdev_open(struct cxio_rdev *rdev_p)
{
- struct ifnet *ifp;
int err = 0;
+ struct rdma_info *ri = &rdev_p->rnic_info;
+ struct adapter *sc = rdev_p->adap;
- if (strlen(rdev_p->dev_name)) {
- if (cxio_hal_find_rdev_by_name(rdev_p->dev_name)) {
- return (-EBUSY);
- }
- ifp = rdev_p->ifp;
- if (ifp == NULL)
- return (-EINVAL);
- if_free(ifp);
- } else if (rdev_p->t3cdev_p) {
- if (cxio_hal_find_rdev_by_t3cdev(rdev_p->t3cdev_p))
- return (-EBUSY);
- ifp = rdev_p->t3cdev_p->lldev;
- strncpy(rdev_p->dev_name, rdev_p->t3cdev_p->name,
- T3_MAX_DEV_NAME_LEN);
- } else {
- CTR1(KTR_IW_CXGB, "%s t3cdev_p or dev_name must be set", __FUNCTION__);
- return (-EINVAL);
- }
-
- TAILQ_INSERT_TAIL(&rdev_list, rdev_p, entry);
+ KASSERT(rdev_p->adap, ("%s: adap is NULL", __func__));
- CTR2(KTR_IW_CXGB, "%s opening rnic dev %s", __FUNCTION__, rdev_p->dev_name);
memset(&rdev_p->ctrl_qp, 0, sizeof(rdev_p->ctrl_qp));
- if (!rdev_p->t3cdev_p)
- rdev_p->t3cdev_p = T3CDEV(ifp);
- rdev_p->t3cdev_p->ulp = (void *) rdev_p;
- err = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_GET_PARAMS,
- &(rdev_p->rnic_info));
- if (err) {
- log(LOG_ERR, "%s t3cdev_p(%p)->ctl returned error %d.\n",
- __FUNCTION__, rdev_p->t3cdev_p, err);
- goto err1;
- }
- err = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, GET_PORTS,
- &(rdev_p->port_info));
- if (err) {
- log(LOG_ERR, "%s t3cdev_p(%p)->ctl returned error %d.\n",
- __FUNCTION__, rdev_p->t3cdev_p, err);
- goto err1;
- }
+
+ ri->udbell_physbase = rman_get_start(sc->udbs_res);
+ ri->udbell_len = rman_get_size(sc->udbs_res);
+ ri->tpt_base = t3_read_reg(sc, A_ULPTX_TPT_LLIMIT);
+ ri->tpt_top = t3_read_reg(sc, A_ULPTX_TPT_ULIMIT);
+ ri->pbl_base = t3_read_reg(sc, A_ULPTX_PBL_LLIMIT);
+ ri->pbl_top = t3_read_reg(sc, A_ULPTX_PBL_ULIMIT);
+ ri->rqt_base = t3_read_reg(sc, A_ULPRX_RQ_LLIMIT);
+ ri->rqt_top = t3_read_reg(sc, A_ULPRX_RQ_ULIMIT);
+ ri->kdb_addr = (void *)((unsigned long)
+ rman_get_virtual(sc->regs_res) + A_SG_KDOORBELL);
/*
* qpshift is the number of bits to shift the qpid left in order
@@ -1064,8 +993,8 @@ cxio_rdev_open(struct cxio_rdev *rdev_p)
PAGE_SHIFT));
rdev_p->qpnr = rdev_p->rnic_info.udbell_len >> PAGE_SHIFT;
rdev_p->qpmask = (65536 >> ilog2(rdev_p->qpnr)) - 1;
- CTR4(KTR_IW_CXGB, "cxio_rdev_open rnic %s info: tpt_base 0x%0x tpt_top 0x%0x num stags %d",
- rdev_p->dev_name, rdev_p->rnic_info.tpt_base,
+ CTR4(KTR_IW_CXGB, "cxio_rdev_open rnic %p info: tpt_base 0x%0x tpt_top 0x%0x num stags %d",
+ rdev_p->adap, rdev_p->rnic_info.tpt_base,
rdev_p->rnic_info.tpt_top, cxio_num_stags(rdev_p));
CTR4(KTR_IW_CXGB, "pbl_base 0x%0x pbl_top 0x%0x rqt_base 0x%0x, rqt_top 0x%0x",
rdev_p->rnic_info.pbl_base,
@@ -1111,43 +1040,34 @@ err3:
err2:
cxio_hal_destroy_ctrl_qp(rdev_p);
err1:
- TAILQ_REMOVE(&rdev_list, rdev_p, entry);
return err;
}
void
cxio_rdev_close(struct cxio_rdev *rdev_p)
{
- if (rdev_p) {
- cxio_hal_pblpool_destroy(rdev_p);
- cxio_hal_rqtpool_destroy(rdev_p);
- TAILQ_REMOVE(&rdev_list, rdev_p, entry);
- rdev_p->t3cdev_p->ulp = NULL;
- cxio_hal_destroy_ctrl_qp(rdev_p);
- cxio_hal_destroy_resource(rdev_p->rscp);
- }
+ cxio_hal_pblpool_destroy(rdev_p);
+ cxio_hal_rqtpool_destroy(rdev_p);
+ cxio_hal_destroy_ctrl_qp(rdev_p);
+ cxio_hal_destroy_resource(rdev_p->rscp);
}
int
-cxio_hal_init(void)
+cxio_hal_init(struct adapter *sc)
{
- TAILQ_INIT(&rdev_list);
#ifdef needed
if (cxio_hal_init_rhdl_resource(T3_MAX_NUM_RI))
- return (-ENOMEM);
+ return (ENOMEM);
#endif
- t3_register_cpl_handler(CPL_ASYNC_NOTIF, cxio_hal_ev_handler);
- return 0;
+ t3_register_cpl_handler(sc, CPL_ASYNC_NOTIF, cxio_hal_ev_handler);
+
+ return (0);
}
void
-cxio_hal_exit(void)
+cxio_hal_uninit(struct adapter *sc)
{
- struct cxio_rdev *rdev, *tmp;
-
- t3_register_cpl_handler(CPL_ASYNC_NOTIF, NULL);
- TAILQ_FOREACH_SAFE(rdev, &rdev_list, entry, tmp)
- cxio_rdev_close(rdev);
+ t3_register_cpl_handler(sc, CPL_ASYNC_NOTIF, NULL);
#ifdef needed
cxio_hal_destroy_rhdl_resource();
#endif
@@ -1304,11 +1224,12 @@ cxio_poll_cq(struct t3_wq *wq, struct t3_cq *cq, struct t3_cqe *cqe,
}
/* incoming SEND with no receive posted failures */
- if ((CQE_OPCODE(*hw_cqe) == T3_SEND) && RQ_TYPE(*hw_cqe) &&
+ if (CQE_OPCODE(*hw_cqe) && RQ_TYPE(*hw_cqe) &&
Q_EMPTY(wq->rq_rptr, wq->rq_wptr)) {
ret = -1;
goto skip_cqe;
}
+ PANIC_IF((*cqe_flushed == 0) && !SW_CQE(*hw_cqe));
goto proc_cqe;
}
@@ -1323,6 +1244,13 @@ cxio_poll_cq(struct t3_wq *wq, struct t3_cq *cq, struct t3_cqe *cqe,
* then we complete this with TPT_ERR_MSN and mark the wq in
* error.
*/
+
+ if (Q_EMPTY(wq->rq_rptr, wq->rq_wptr)) {
+ wq->error = 1;
+ ret = -1;
+ goto skip_cqe;
+ }
+
if (__predict_false((CQE_WRID_MSN(*hw_cqe) != (wq->rq_rptr + 1)))) {
wq->error = 1;
hw_cqe->header |= htonl(V_CQE_STATUS(TPT_ERR_MSN));
@@ -1367,13 +1295,17 @@ proc_cqe:
wq->sq_rptr = CQE_WRID_SQ_WPTR(*hw_cqe);
CTR2(KTR_IW_CXGB, "%s completing sq idx %ld", __FUNCTION__,
Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2));
- *cookie = (wq->sq +
- Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2))->wr_id;
+ *cookie = wq->sq[Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2)].wr_id;
wq->sq_rptr++;
} else {
CTR2(KTR_IW_CXGB, "%s completing rq idx %ld", __FUNCTION__,
Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2));
- *cookie = *(wq->rq + Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2));
+ *cookie = wq->rq[Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)].wr_id;
+ if (wq->rq[Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)].pbl_addr)
+ cxio_hal_pblpool_free(wq->rdev,
+ wq->rq[Q_PTR2IDX(wq->rq_rptr,
+ wq->rq_size_log2)].pbl_addr, T3_STAG0_PBL_SIZE);
+ PANIC_IF(Q_EMPTY(wq->rq_rptr, wq->rq_wptr));
wq->rq_rptr++;
}
@@ -1404,5 +1336,4 @@ skip_cqe:
}
return ret;
}
-
-
+#endif
diff --git a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_hal.h b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_hal.h
index 6a401e0..6b5f948 100644
--- a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_hal.h
+++ b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_hal.h
@@ -45,7 +45,11 @@ $FreeBSD$
#define T3_MAX_NUM_PD (1<<15)
#define T3_MAX_PBL_SIZE 256
#define T3_MAX_RQ_SIZE 1024
+#define T3_MAX_QP_DEPTH (T3_MAX_RQ_SIZE-1)
+#define T3_MAX_CQ_DEPTH 65536
#define T3_MAX_NUM_STAG (1<<15)
+#define T3_MAX_MR_SIZE 0x100000000ULL
+#define T3_PAGESIZE_MASK 0xffff000 /* 4KB-128MB */
#define T3_STAG_UNSET 0xffffffff
@@ -55,12 +59,9 @@ struct cxio_hal_ctrl_qp {
u32 wptr;
u32 rptr;
struct mtx lock; /* for the wtpr, can sleep */
-#ifdef notyet
- DECLARE_PCI_UNMAP_ADDR(mapping)
-#endif
union t3_wr *workq; /* the work request queue */
bus_addr_t dma_addr; /* pci bus address of the workq */
- void /* __iomem */ *doorbell;
+ void *doorbell;
};
struct cxio_hal_resource {
@@ -85,13 +86,10 @@ struct cxio_ucontext {
};
struct cxio_rdev {
- char dev_name[T3_MAX_DEV_NAME_LEN];
- struct t3cdev *t3cdev_p;
+ struct adapter *adap;
struct rdma_info rnic_info;
- struct adap_ports port_info;
struct cxio_hal_resource *rscp;
struct cxio_hal_ctrl_qp ctrl_qp;
- void *ulp;
unsigned long qpshift;
u32 qpnr;
u32 qpmask;
@@ -139,9 +137,8 @@ int cxio_rdev_open(struct cxio_rdev *rdev);
void cxio_rdev_close(struct cxio_rdev *rdev);
int cxio_hal_cq_op(struct cxio_rdev *rdev, struct t3_cq *cq,
enum t3_cq_opcode op, u32 credit);
-int cxio_create_cq(struct cxio_rdev *rdev, struct t3_cq *cq);
+int cxio_create_cq(struct cxio_rdev *rdev, struct t3_cq *cq, int kernel);
int cxio_destroy_cq(struct cxio_rdev *rdev, struct t3_cq *cq);
-int cxio_resize_cq(struct cxio_rdev *rdev, struct t3_cq *cq);
void cxio_release_ucontext(struct cxio_rdev *rdev, struct cxio_ucontext *uctx);
void cxio_init_ucontext(struct cxio_rdev *rdev, struct cxio_ucontext *uctx);
int cxio_create_qp(struct cxio_rdev *rdev, u32 kernel_domain, struct t3_wq *wq,
@@ -149,27 +146,27 @@ int cxio_create_qp(struct cxio_rdev *rdev, u32 kernel_domain, struct t3_wq *wq,
int cxio_destroy_qp(struct cxio_rdev *rdev, struct t3_wq *wq,
struct cxio_ucontext *uctx);
int cxio_peek_cq(struct t3_wq *wr, struct t3_cq *cq, int opcode);
+int cxio_write_pbl(struct cxio_rdev *rdev_p, __be64 *pbl,
+ u32 pbl_addr, u32 pbl_size);
int cxio_register_phys_mem(struct cxio_rdev *rdev, u32 * stag, u32 pdid,
enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
- u8 page_size, __be64 *pbl, u32 *pbl_size,
- u32 *pbl_addr);
+ u8 page_size, u32 pbl_size, u32 pbl_addr);
int cxio_reregister_phys_mem(struct cxio_rdev *rdev, u32 * stag, u32 pdid,
enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
- u8 page_size, __be64 *pbl, u32 *pbl_size,
- u32 *pbl_addr);
+ u8 page_size, u32 pbl_size, u32 pbl_addr);
int cxio_dereg_mem(struct cxio_rdev *rdev, u32 stag, u32 pbl_size,
u32 pbl_addr);
int cxio_allocate_window(struct cxio_rdev *rdev, u32 * stag, u32 pdid);
int cxio_deallocate_window(struct cxio_rdev *rdev, u32 stag);
-int cxio_rdma_init(struct cxio_rdev *rdev, struct t3_rdma_init_attr *attr);
-void cxio_register_ev_cb(cxio_hal_ev_callback_func_t ev_cb);
-void cxio_unregister_ev_cb(cxio_hal_ev_callback_func_t ev_cb);
+int cxio_rdma_init(struct cxio_rdev *rdev, struct t3_rdma_init_attr *attr,
+ struct socket *so);
u32 cxio_hal_get_pdid(struct cxio_hal_resource *rscp);
void cxio_hal_put_pdid(struct cxio_hal_resource *rscp, u32 pdid);
-int cxio_hal_init(void);
+int cxio_hal_init(struct adapter *);
+void cxio_hal_uninit(struct adapter *);
void cxio_hal_exit(void);
-void cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count);
-void cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count);
+int cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count);
+int cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count);
void cxio_count_rcqes(struct t3_cq *cq, struct t3_wq *wq, int *count);
void cxio_count_scqes(struct t3_cq *cq, struct t3_wq *wq, int *count);
void cxio_flush_hw_cq(struct t3_cq *cq);
@@ -178,7 +175,7 @@ int cxio_poll_cq(struct t3_wq *wq, struct t3_cq *cq, struct t3_cqe *cqe,
#define MOD "iw_cxgb: "
-#ifdef DEBUG
+#ifdef INVARIANTS
void cxio_dump_tpt(struct cxio_rdev *rev, u32 stag);
void cxio_dump_pbl(struct cxio_rdev *rev, u32 pbl_addr, uint32_t len, u8 shift);
void cxio_dump_wqe(union t3_wr *wqe);
@@ -187,60 +184,7 @@ void cxio_dump_rqt(struct cxio_rdev *rdev, u32 hwtid, int nents);
void cxio_dump_tcb(struct cxio_rdev *rdev, u32 hwtid);
#endif
-
- static unsigned char hiBitSetTab[] = {
- 0, 1, 2, 2, 3, 3, 3, 3,
- 4, 4, 4, 4, 4, 4, 4, 4,
- 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5,
- 6, 6, 6, 6, 6, 6, 6, 6,
- 6, 6, 6, 6, 6, 6, 6, 6,
- 6, 6, 6, 6, 6, 6, 6, 6,
- 6, 6, 6, 6, 6, 6, 6, 6,
- 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7
-
-};
-
-
-static __inline
-int ilog2(unsigned long val)
-{
- unsigned long tmp;
-
- tmp = val >> 24;
- if (tmp) {
- return hiBitSetTab[tmp] + 23;
- }
- tmp = (val >> 16) & 0xff;
- if (tmp) {
- return hiBitSetTab[tmp] + 15;
- }
- tmp = (val >> 8) & 0xff;
- if (tmp) {
- return hiBitSetTab[tmp] + 7;
-
- }
- return hiBitSetTab[val & 0xff] - 1;
-}
-
#define cxfree(a) free((a), M_DEVBUF);
-#define kmalloc(a, b) malloc((a), M_DEVBUF, (b))
-#define kzalloc(a, b) malloc((a), M_DEVBUF, (b)|M_ZERO)
-
-static __inline __attribute__((const))
-unsigned long roundup_pow_of_two(unsigned long n)
-{
- return 1UL << flsl(n - 1);
-}
-
-#define PAGE_ALIGN(x) roundup2((x), PAGE_SIZE)
#include <sys/blist.h>
struct gen_pool {
@@ -259,6 +203,7 @@ gen_pool_create(daddr_t base, u_int chunk_shift, u_int len)
if (gp == NULL)
return (NULL);
+ memset(gp, 0, sizeof(struct gen_pool));
gp->gen_list = blist_create(len >> chunk_shift, M_NOWAIT);
if (gp->gen_list == NULL) {
free(gp, M_DEVBUF);
@@ -323,8 +268,7 @@ gen_pool_destroy(struct gen_pool *gp)
mtx_unlock(lockp); \
__ret; \
})
-extern struct cxio_rdev *cxio_hal_find_rdev_by_t3cdev(struct t3cdev *tdev);
-#define KTR_IW_CXGB KTR_SPARE4
+#define KTR_IW_CXGB KTR_SPARE3
#endif
diff --git a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_ib_intfc.h b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_ib_intfc.h
new file mode 100644
index 0000000..7c7cd24
--- /dev/null
+++ b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_ib_intfc.h
@@ -0,0 +1,22 @@
+#ifndef __IB_INTFC_H__
+#define __IB_INTFC_H__
+
+/* $FreeBSD$ */
+
+#undef prefetch
+#undef WARN_ON
+#undef max_t
+#undef udelay
+#undef le32_to_cpu
+#undef le16_to_cpu
+#undef cpu_to_le32
+#undef swab32
+#undef container_of
+
+#undef LIST_HEAD
+#define LIST_HEAD(name, type) \
+struct name { \
+ struct type *lh_first; /* first element */ \
+}
+
+#endif /* __IB_INTFC_H__ */
diff --git a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_mem.c b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_mem.c
index c052aa6..0ccb70f 100644
--- a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_mem.c
+++ b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_mem.c
@@ -29,11 +29,13 @@ POSSIBILITY OF SUCH DAMAGE.
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#include "opt_inet.h"
+
+#ifdef TCP_OFFLOAD
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/bus.h>
-#include <sys/module.h>
#include <sys/pciio.h>
#include <sys/conf.h>
#include <machine/bus.h>
@@ -59,9 +61,11 @@ __FBSDID("$FreeBSD$");
#include <netinet/in.h>
-#include <contrib/rdma/ib_verbs.h>
-#include <contrib/rdma/ib_umem.h>
-#include <contrib/rdma/ib_user_verbs.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_umem.h>
+#include <rdma/ib_user_verbs.h>
+#include <linux/idr.h>
+#include <ulp/iw_cxgb/iw_cxgb_ib_intfc.h>
#include <cxgb_include.h>
#include <ulp/iw_cxgb/iw_cxgb_wr.h>
@@ -72,14 +76,24 @@ __FBSDID("$FreeBSD$");
#include <ulp/iw_cxgb/iw_cxgb_resource.h>
#include <ulp/iw_cxgb/iw_cxgb_user.h>
+static int iwch_finish_mem_reg(struct iwch_mr *mhp, u32 stag)
+{
+ u32 mmid;
+
+ mhp->attr.state = 1;
+ mhp->attr.stag = stag;
+ mmid = stag >> 8;
+ mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
+ CTR3(KTR_IW_CXGB, "%s mmid 0x%x mhp %p", __func__, mmid, mhp);
+ return insert_handle(mhp->rhp, &mhp->rhp->mmidr, mhp, mmid);
+}
+
int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php,
struct iwch_mr *mhp,
- int shift,
- __be64 *page_list)
+ int shift)
{
u32 stag;
- u32 mmid;
-
+ int ret;
if (cxio_register_phys_mem(&rhp->rdev,
&stag, mhp->attr.pdid,
@@ -87,28 +101,24 @@ int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php,
mhp->attr.zbva,
mhp->attr.va_fbo,
mhp->attr.len,
- shift-12,
- page_list,
- &mhp->attr.pbl_size, &mhp->attr.pbl_addr))
+ shift - 12,
+ mhp->attr.pbl_size, mhp->attr.pbl_addr))
return (-ENOMEM);
- mhp->attr.state = 1;
- mhp->attr.stag = stag;
- mmid = stag >> 8;
- mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
- insert_handle(rhp, &rhp->mmidr, mhp, mmid);
- CTR3(KTR_IW_CXGB, "%s mmid 0x%x mhp %p", __FUNCTION__, mmid, mhp);
- return 0;
+
+ ret = iwch_finish_mem_reg(mhp, stag);
+ if (ret)
+ cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
+ mhp->attr.pbl_addr);
+ return ret;
}
int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php,
struct iwch_mr *mhp,
int shift,
- __be64 *page_list,
int npages)
{
u32 stag;
- u32 mmid;
-
+ int ret;
/* We could support this... */
if (npages > mhp->attr.pbl_size)
@@ -121,17 +131,40 @@ int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php,
mhp->attr.zbva,
mhp->attr.va_fbo,
mhp->attr.len,
- shift-12,
- page_list,
- &mhp->attr.pbl_size, &mhp->attr.pbl_addr))
+ shift - 12,
+ mhp->attr.pbl_size, mhp->attr.pbl_addr))
return (-ENOMEM);
- mhp->attr.state = 1;
- mhp->attr.stag = stag;
- mmid = stag >> 8;
- mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
- insert_handle(rhp, &rhp->mmidr, mhp, mmid);
- CTR3(KTR_IW_CXGB, "%s mmid 0x%x mhp %p", __FUNCTION__, mmid, mhp);
+
+ ret = iwch_finish_mem_reg(mhp, stag);
+ if (ret)
+ cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
+ mhp->attr.pbl_addr);
+ return ret;
+}
+
+int iwch_alloc_pbl(struct iwch_mr *mhp, int npages)
+{
+ mhp->attr.pbl_addr = cxio_hal_pblpool_alloc(&mhp->rhp->rdev,
+ npages << 3);
+
+ if (!mhp->attr.pbl_addr)
+ return -ENOMEM;
+
+ mhp->attr.pbl_size = npages;
+
return 0;
+ }
+
+void iwch_free_pbl(struct iwch_mr *mhp)
+{
+ cxio_hal_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr,
+ mhp->attr.pbl_size << 3);
+}
+
+int iwch_write_pbl(struct iwch_mr *mhp, __be64 *pages, int npages, int offset)
+{
+ return cxio_write_pbl(&mhp->rhp->rdev, pages,
+ mhp->attr.pbl_addr + (offset << 3), npages);
}
int build_phys_page_list(struct ib_phys_buf *buffer_list,
@@ -204,3 +237,4 @@ int build_phys_page_list(struct ib_phys_buf *buffer_list,
return 0;
}
+#endif
diff --git a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.c b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.c
index bc336a4..4961395 100644
--- a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.c
+++ b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.c
@@ -29,11 +29,13 @@ POSSIBILITY OF SUCH DAMAGE.
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#include "opt_inet.h"
+
+#ifdef TCP_OFFLOAD
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/bus.h>
-#include <sys/module.h>
#include <sys/pciio.h>
#include <sys/conf.h>
#include <machine/bus.h>
@@ -62,9 +64,12 @@ __FBSDID("$FreeBSD$");
#include <vm/vm.h>
#include <vm/pmap.h>
-#include <contrib/rdma/ib_verbs.h>
-#include <contrib/rdma/ib_umem.h>
-#include <contrib/rdma/ib_user_verbs.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_umem.h>
+#include <rdma/ib_user_verbs.h>
+#include <linux/idr.h>
+#include <ulp/iw_cxgb/iw_cxgb_ib_intfc.h>
+
#include <cxgb_include.h>
#include <ulp/iw_cxgb/iw_cxgb_wr.h>
@@ -180,6 +185,8 @@ iwch_create_cq(struct ib_device *ibdev, int entries, int vector,
struct iwch_create_cq_resp uresp;
struct iwch_create_cq_req ureq;
struct iwch_ucontext *ucontext = NULL;
+ static int warned;
+ size_t resplen;
CTR3(KTR_IW_CXGB, "%s ib_dev %p entries %d", __FUNCTION__, ibdev, entries);
rhp = to_iwch_dev(ibdev);
@@ -214,7 +221,7 @@ iwch_create_cq(struct ib_device *ibdev, int entries, int vector,
entries = roundup_pow_of_two(entries);
chp->cq.size_log2 = ilog2(entries);
- if (cxio_create_cq(&rhp->rdev, &chp->cq)) {
+ if (cxio_create_cq(&rhp->rdev, &chp->cq, !ucontext)) {
cxfree(chp);
return ERR_PTR(-ENOMEM);
}
@@ -222,7 +229,11 @@ iwch_create_cq(struct ib_device *ibdev, int entries, int vector,
chp->ibcq.cqe = 1 << chp->cq.size_log2;
mtx_init(&chp->lock, "cxgb cq", NULL, MTX_DEF|MTX_DUPOK);
chp->refcnt = 1;
- insert_handle(rhp, &rhp->cqidr, chp, chp->cq.cqid);
+ if (insert_handle(rhp, &rhp->cqidr, chp, chp->cq.cqid)) {
+ cxio_destroy_cq(&chp->rhp->rdev, &chp->cq);
+ cxfree(chp);
+ return ERR_PTR(-ENOMEM);
+ }
if (ucontext) {
struct iwch_mm_entry *mm;
@@ -238,15 +249,27 @@ iwch_create_cq(struct ib_device *ibdev, int entries, int vector,
uresp.key = ucontext->key;
ucontext->key += PAGE_SIZE;
mtx_unlock(&ucontext->mmap_lock);
- if (ib_copy_to_udata(udata, &uresp, sizeof (uresp))) {
+ mm->key = uresp.key;
+ mm->addr = vtophys(chp->cq.queue);
+ if (udata->outlen < sizeof uresp) {
+ if (!warned++)
+ CTR1(KTR_IW_CXGB, "%s Warning - "
+ "downlevel libcxgb3 (non-fatal).\n",
+ __func__);
+ mm->len = PAGE_ALIGN((1UL << uresp.size_log2) *
+ sizeof(struct t3_cqe));
+ resplen = sizeof(struct iwch_create_cq_resp_v0);
+ } else {
+ mm->len = PAGE_ALIGN(((1UL << uresp.size_log2) + 1) *
+ sizeof(struct t3_cqe));
+ uresp.memsize = mm->len;
+ resplen = sizeof uresp;
+ }
+ if (ib_copy_to_udata(udata, &uresp, resplen)) {
cxfree(mm);
iwch_destroy_cq(&chp->ibcq);
return ERR_PTR(-EFAULT);
}
- mm->key = uresp.key;
- mm->addr = vtophys(chp->cq.queue);
- mm->len = PAGE_ALIGN((1UL << uresp.size_log2) *
- sizeof (struct t3_cqe));
insert_mmap(ucontext, mm);
}
CTR4(KTR_IW_CXGB, "created cqid 0x%0x chp %p size 0x%0x, dma_addr 0x%0llx",
@@ -256,72 +279,11 @@ iwch_create_cq(struct ib_device *ibdev, int entries, int vector,
}
static int
-iwch_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata)
+iwch_resize_cq(struct ib_cq *cq __unused, int cqe __unused,
+ struct ib_udata *udata __unused)
{
-#ifdef notyet
- struct iwch_cq *chp = to_iwch_cq(cq);
- struct t3_cq oldcq, newcq;
- int ret;
-
- CTR3(KTR_IW_CXGB, "%s ib_cq %p cqe %d", __FUNCTION__, cq, cqe);
-
- /* We don't downsize... */
- if (cqe <= cq->cqe)
- return 0;
-
- /* create new t3_cq with new size */
- cqe = roundup_pow_of_two(cqe+1);
- newcq.size_log2 = ilog2(cqe);
-
- /* Dont allow resize to less than the current wce count */
- if (cqe < Q_COUNT(chp->cq.rptr, chp->cq.wptr)) {
- return (-ENOMEM);
- }
- /* Quiesce all QPs using this CQ */
- ret = iwch_quiesce_qps(chp);
- if (ret) {
- return (ret);
- }
-
- ret = cxio_create_cq(&chp->rhp->rdev, &newcq);
- if (ret) {
- return (ret);
- }
-
- /* copy CQEs */
- memcpy(newcq.queue, chp->cq.queue, (1 << chp->cq.size_log2) *
- sizeof(struct t3_cqe));
-
- /* old iwch_qp gets new t3_cq but keeps old cqid */
- oldcq = chp->cq;
- chp->cq = newcq;
- chp->cq.cqid = oldcq.cqid;
-
- /* resize new t3_cq to update the HW context */
- ret = cxio_resize_cq(&chp->rhp->rdev, &chp->cq);
- if (ret) {
- chp->cq = oldcq;
- return ret;
- }
- chp->ibcq.cqe = (1<<chp->cq.size_log2) - 1;
-
- /* destroy old t3_cq */
- oldcq.cqid = newcq.cqid;
- ret = cxio_destroy_cq(&chp->rhp->rdev, &oldcq);
- if (ret) {
- log(LOG_ERR, "%s - cxio_destroy_cq failed %d\n",
- __FUNCTION__, ret);
- }
-
- /* add user hooks here */
-
- /* resume qps */
- ret = iwch_resume_qps(chp);
- return ret;
-#else
return (-ENOSYS);
-#endif
}
static int
@@ -357,67 +319,12 @@ iwch_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
return err;
}
-#ifdef notyet
static int
-iwch_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
+iwch_mmap(struct ib_ucontext *context __unused, struct vm_area_struct *vma __unused)
{
-#ifdef notyet
- int len = vma->vm_end - vma->vm_start;
- u32 key = vma->vm_pgoff << PAGE_SHIFT;
- struct cxio_rdev *rdev_p;
- int ret = 0;
- struct iwch_mm_entry *mm;
- struct iwch_ucontext *ucontext;
- u64 addr;
-
- CTR4(KTR_IW_CXGB, "%s pgoff 0x%lx key 0x%x len %d", __FUNCTION__, vma->vm_pgoff,
- key, len);
-
- if (vma->vm_start & (PAGE_SIZE-1)) {
- return (-EINVAL);
- }
-
- rdev_p = &(to_iwch_dev(context->device)->rdev);
- ucontext = to_iwch_ucontext(context);
-
- mm = remove_mmap(ucontext, key, len);
- if (!mm)
- return (-EINVAL);
- addr = mm->addr;
- cxfree(mm);
-
- if ((addr >= rdev_p->rnic_info.udbell_physbase) &&
- (addr < (rdev_p->rnic_info.udbell_physbase +
- rdev_p->rnic_info.udbell_len))) {
-
- /*
- * Map T3 DB register.
- */
- if (vma->vm_flags & VM_READ) {
- return (-EPERM);
- }
- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
- vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND;
- vma->vm_flags &= ~VM_MAYREAD;
- ret = io_remap_pfn_range(vma, vma->vm_start,
- addr >> PAGE_SHIFT,
- len, vma->vm_page_prot);
- } else {
-
- /*
- * Map WQ or CQ contig dma memory...
- */
- ret = remap_pfn_range(vma, vma->vm_start,
- addr >> PAGE_SHIFT,
- len, vma->vm_page_prot);
- }
-
- return ret;
-#endif
- return (0);
+ return (-ENOSYS);
}
-#endif
static int iwch_deallocate_pd(struct ib_pd *pd)
{
@@ -470,7 +377,7 @@ static int iwch_dereg_mr(struct ib_mr *ib_mr)
CTR2(KTR_IW_CXGB, "%s ib_mr %p", __FUNCTION__, ib_mr);
/* There can be no memory windows */
- if (atomic_load_acq_int(&ib_mr->usecnt))
+ if (atomic_load_acq_int(&ib_mr->usecnt.counter))
return (-EINVAL);
mhp = to_iwch_mr(ib_mr);
@@ -478,6 +385,7 @@ static int iwch_dereg_mr(struct ib_mr *ib_mr)
mmid = mhp->attr.stag >> 8;
cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
mhp->attr.pbl_addr);
+ iwch_free_pbl(mhp);
remove_handle(rhp, &rhp->mmidr, mmid);
if (mhp->kva)
cxfree((void *) (unsigned long) mhp->kva);
@@ -511,6 +419,8 @@ static struct ib_mr *iwch_register_phys_mem(struct ib_pd *pd,
if (!mhp)
return ERR_PTR(-ENOMEM);
+ mhp->rhp = rhp;
+
/* First check that we have enough alignment */
if ((*iova_start & ~PAGE_MASK) != (buffer_list[0].addr & ~PAGE_MASK)) {
ret = -EINVAL;
@@ -528,7 +438,17 @@ static struct ib_mr *iwch_register_phys_mem(struct ib_pd *pd,
if (ret)
goto err;
- mhp->rhp = rhp;
+ ret = iwch_alloc_pbl(mhp, npages);
+ if (ret) {
+ cxfree(page_list);
+ goto err_pbl;
+ }
+
+ ret = iwch_write_pbl(mhp, page_list, npages, 0);
+ cxfree(page_list);
+ if (ret)
+ goto err;
+
mhp->attr.pdid = php->pdid;
mhp->attr.zbva = 0;
@@ -538,15 +458,18 @@ static struct ib_mr *iwch_register_phys_mem(struct ib_pd *pd,
mhp->attr.len = (u32) total_size;
mhp->attr.pbl_size = npages;
- ret = iwch_register_mem(rhp, php, mhp, shift, page_list);
- cxfree(page_list);
- if (ret) {
- goto err;
- }
+ ret = iwch_register_mem(rhp, php, mhp, shift);
+ if (ret)
+ goto err_pbl;
+
return &mhp->ibmr;
+
+err_pbl:
+ iwch_free_pbl(mhp);
+
err:
cxfree(mhp);
- return ERR_PTR(-ret);
+ return ERR_PTR(ret);
}
@@ -570,7 +493,7 @@ static int iwch_reregister_phys_mem(struct ib_mr *mr,
CTR3(KTR_IW_CXGB, "%s ib_mr %p ib_pd %p", __FUNCTION__, mr, pd);
/* There can be no memory windows */
- if (atomic_load_acq_int(&mr->usecnt))
+ if (atomic_load_acq_int(&mr->usecnt.counter))
return (-EINVAL);
mhp = to_iwch_mr(mr);
@@ -596,7 +519,7 @@ static int iwch_reregister_phys_mem(struct ib_mr *mr,
return ret;
}
- ret = iwch_reregister_mem(rhp, php, &mh, shift, page_list, npages);
+ ret = iwch_reregister_mem(rhp, php, &mh, shift, npages);
cxfree(page_list);
if (ret) {
return ret;
@@ -640,7 +563,9 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
if (!mhp)
return ERR_PTR(-ENOMEM);
- mhp->umem = ib_umem_get(pd->uobject->context, start, length, acc);
+ mhp->rhp = rhp;
+
+ mhp->umem = ib_umem_get(pd->uobject->context, start, length, acc, 0);
if (IS_ERR(mhp->umem)) {
err = PTR_ERR(mhp->umem);
cxfree(mhp);
@@ -650,18 +575,22 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
shift = ffs(mhp->umem->page_size) - 1;
n = 0;
- TAILQ_FOREACH(chunk, &mhp->umem->chunk_list, entry)
+ list_for_each_entry(chunk, &mhp->umem->chunk_list, list)
n += chunk->nents;
- pages = kmalloc(n * sizeof(u64), M_NOWAIT);
+ err = iwch_alloc_pbl(mhp, n);
+ if (err)
+ goto err;
+
+ pages = (__be64 *) kmalloc(n * sizeof(u64), M_NOWAIT);
if (!pages) {
err = -ENOMEM;
- goto err;
+ goto err_pbl;
}
i = n = 0;
-#if 0
+#ifdef notyet
TAILQ_FOREACH(chunk, &mhp->umem->chunk_list, entry)
for (j = 0; j < chunk->nmap; ++j) {
len = sg_dma_len(&chunk->page_list[j]) >> shift;
@@ -669,21 +598,36 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
pages[i++] = htobe64(sg_dma_address(
&chunk->page_list[j]) +
mhp->umem->page_size * k);
+ if (i == PAGE_SIZE / sizeof *pages) {
+ err = iwch_write_pbl(mhp, pages, i, n);
+ if (err)
+ goto pbl_done;
+ n += i;
+ i = 0;
+ }
}
}
#endif
- mhp->rhp = rhp;
+
+ if (i)
+ err = iwch_write_pbl(mhp, pages, i, n);
+#ifdef notyet
+pbl_done:
+#endif
+ cxfree(pages);
+ if (err)
+ goto err_pbl;
+
mhp->attr.pdid = php->pdid;
mhp->attr.zbva = 0;
mhp->attr.perms = iwch_ib_to_tpt_access(acc);
mhp->attr.va_fbo = virt;
mhp->attr.page_size = shift - 12;
mhp->attr.len = (u32) length;
- mhp->attr.pbl_size = i;
- err = iwch_register_mem(rhp, php, mhp, shift, pages);
- cxfree(pages);
+
+ err = iwch_register_mem(rhp, php, mhp, shift);
if (err)
- goto err;
+ goto err_pbl;
if (udata && !t3a_device(rhp)) {
uresp.pbl_addr = (mhp->attr.pbl_addr -
@@ -700,6 +644,9 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
return &mhp->ibmr;
+err_pbl:
+ iwch_free_pbl(mhp);
+
err:
ib_umem_release(mhp->umem);
cxfree(mhp);
@@ -748,7 +695,12 @@ static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd)
mhp->attr.type = TPT_MW;
mhp->attr.stag = stag;
mmid = (stag) >> 8;
- insert_handle(rhp, &rhp->mmidr, mhp, mmid);
+ mhp->ibmw.rkey = stag;
+ if (insert_handle(rhp, &rhp->mmidr, mhp, mmid)) {
+ cxio_deallocate_window(&rhp->rdev, mhp->attr.stag);
+ cxfree(mhp);
+ return ERR_PTR(-ENOMEM);
+ }
CTR4(KTR_IW_CXGB, "%s mmid 0x%x mhp %p stag 0x%x", __FUNCTION__, mmid, mhp, stag);
return &(mhp->ibmw);
}
@@ -893,7 +845,13 @@ static struct ib_qp *iwch_create_qp(struct ib_pd *pd,
mtx_init(&qhp->lock, "cxgb qp", NULL, MTX_DEF|MTX_DUPOK);
qhp->refcnt = 1;
- insert_handle(rhp, &rhp->qpidr, qhp, qhp->wq.qpid);
+
+ if (insert_handle(rhp, &rhp->qpidr, qhp, qhp->wq.qpid)) {
+ cxio_destroy_qp(&rhp->rdev, &qhp->wq,
+ ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
+ cxfree(qhp);
+ return ERR_PTR(-ENOMEM);
+ }
if (udata) {
@@ -1023,12 +981,14 @@ static int iwch_query_gid(struct ib_device *ibdev, u8 port,
{
struct iwch_dev *dev;
struct port_info *pi;
+ struct adapter *sc;
CTR5(KTR_IW_CXGB, "%s ibdev %p, port %d, index %d, gid %p",
__FUNCTION__, ibdev, port, index, gid);
dev = to_iwch_dev(ibdev);
+ sc = dev->rdev.adap;
PANIC_IF(port == 0 || port > 2);
- pi = ((struct port_info *)dev->rdev.port_info.lldevs[port-1]->if_softc);
+ pi = &sc->port[port - 1];
memset(&(gid->raw[0]), 0, sizeof(gid->raw));
memcpy(&(gid->raw[0]), pi->hw_addr, 6);
return 0;
@@ -1037,21 +997,20 @@ static int iwch_query_gid(struct ib_device *ibdev, u8 port,
static int iwch_query_device(struct ib_device *ibdev,
struct ib_device_attr *props)
{
-
struct iwch_dev *dev;
+ struct adapter *sc;
+
CTR2(KTR_IW_CXGB, "%s ibdev %p", __FUNCTION__, ibdev);
dev = to_iwch_dev(ibdev);
+ sc = dev->rdev.adap;
memset(props, 0, sizeof *props);
-#ifdef notyet
- memcpy(&props->sys_image_guid, dev->rdev.t3cdev_p->lldev->if_addr.ifa_addr, 6);
-#endif
+ memcpy(&props->sys_image_guid, sc->port[0].hw_addr, 6);
props->device_cap_flags = dev->device_cap_flags;
-#ifdef notyet
- props->vendor_id = (u32)dev->rdev.rnic_info.pdev->vendor;
- props->vendor_part_id = (u32)dev->rdev.rnic_info.pdev->device;
-#endif
- props->max_mr_size = ~0ull;
+ props->page_size_cap = dev->attr.mem_pgsizes_bitmask;
+ props->vendor_id = pci_get_vendor(sc->dev);
+ props->vendor_part_id = pci_get_device(sc->dev);
+ props->max_mr_size = dev->attr.max_mr_size;
props->max_qp = dev->attr.max_qps;
props->max_qp_wr = dev->attr.max_wrs;
props->max_sge = dev->attr.max_sge_per_wr;
@@ -1071,13 +1030,10 @@ static int iwch_query_port(struct ib_device *ibdev,
u8 port, struct ib_port_attr *props)
{
CTR2(KTR_IW_CXGB, "%s ibdev %p", __FUNCTION__, ibdev);
+ memset(props, 0, sizeof(struct ib_port_attr));
props->max_mtu = IB_MTU_4096;
- props->lid = 0;
- props->lmc = 0;
- props->sm_lid = 0;
- props->sm_sl = 0;
+ props->active_mtu = IB_MTU_2048;
props->state = IB_PORT_ACTIVE;
- props->phys_state = 0;
props->port_cap_flags =
IB_PORT_CM_SUP |
IB_PORT_SNMP_TUNNEL_SUP |
@@ -1086,7 +1042,6 @@ static int iwch_query_port(struct ib_device *ibdev,
IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP;
props->gid_tbl_len = 1;
props->pkey_tbl_len = 1;
- props->qkey_viol_cntr = 0;
props->active_width = 2;
props->active_speed = 2;
props->max_msg_sz = -1;
@@ -1094,80 +1049,18 @@ static int iwch_query_port(struct ib_device *ibdev,
return 0;
}
-#ifdef notyet
-static ssize_t show_rev(struct class_device *cdev, char *buf)
-{
- struct iwch_dev *dev = container_of(cdev, struct iwch_dev,
- ibdev.class_dev);
- CTR2(KTR_IW_CXGB, "%s class dev 0x%p", __FUNCTION__, cdev);
- return sprintf(buf, "%d\n", dev->rdev.t3cdev_p->type);
-}
-
-static ssize_t show_fw_ver(struct class_device *cdev, char *buf)
-{
- struct iwch_dev *dev = container_of(cdev, struct iwch_dev,
- ibdev.class_dev);
- struct ethtool_drvinfo info;
- struct net_device *lldev = dev->rdev.t3cdev_p->lldev;
-
- CTR2(KTR_IW_CXGB, "%s class dev 0x%p", __FUNCTION__, cdev);
- lldev->ethtool_ops->get_drvinfo(lldev, &info);
- return sprintf(buf, "%s\n", info.fw_version);
-}
-
-static ssize_t show_hca(struct class_device *cdev, char *buf)
-{
- struct iwch_dev *dev = container_of(cdev, struct iwch_dev,
- ibdev.class_dev);
- struct ethtool_drvinfo info;
- struct net_device *lldev = dev->rdev.t3cdev_p->lldev;
-
- CTR2(KTR_IW_CXGB, "%s class dev 0x%p", __FUNCTION__, cdev);
- lldev->ethtool_ops->get_drvinfo(lldev, &info);
- return sprintf(buf, "%s\n", info.driver);
-}
-
-static ssize_t show_board(struct class_device *cdev, char *buf)
-{
- struct iwch_dev *dev = container_of(cdev, struct iwch_dev,
- ibdev.class_dev);
- CTR2(KTR_IW_CXGB, "%s class dev 0x%p", __FUNCTION__, dev);
-#ifdef notyet
- return sprintf(buf, "%x.%x\n", dev->rdev.rnic_info.pdev->vendor,
- dev->rdev.rnic_info.pdev->device);
-#else
- return sprintf(buf, "%x.%x\n", 0xdead, 0xbeef); /* XXX */
-#endif
-}
-
-static CLASS_DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
-static CLASS_DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
-static CLASS_DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
-static CLASS_DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
-
-static struct class_device_attribute *iwch_class_attributes[] = {
- &class_device_attr_hw_rev,
- &class_device_attr_fw_ver,
- &class_device_attr_hca_type,
- &class_device_attr_board_id
-};
-#endif
-
int iwch_register_device(struct iwch_dev *dev)
{
int ret;
-#ifdef notyet
- int i;
-#endif
+ struct adapter *sc = dev->rdev.adap;
+
CTR2(KTR_IW_CXGB, "%s iwch_dev %p", __FUNCTION__, dev);
strlcpy(dev->ibdev.name, "cxgb3_%d", IB_DEVICE_NAME_MAX);
memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid));
-#ifdef notyet
- memcpy(&dev->ibdev.node_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6);
-#endif
+ memcpy(&dev->ibdev.node_guid, sc->port[0].hw_addr, 6);
dev->device_cap_flags =
- (IB_DEVICE_ZERO_STAG |
- IB_DEVICE_SEND_W_INV | IB_DEVICE_MEM_WINDOW);
+ (IB_DEVICE_LOCAL_DMA_LKEY |
+ IB_DEVICE_MEM_WINDOW);
dev->ibdev.uverbs_cmd_mask =
(1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
@@ -1189,9 +1082,9 @@ int iwch_register_device(struct iwch_dev *dev)
(1ull << IB_USER_VERBS_CMD_POST_RECV);
dev->ibdev.node_type = RDMA_NODE_RNIC;
memcpy(dev->ibdev.node_desc, IWCH_NODE_DESC, sizeof(IWCH_NODE_DESC));
- dev->ibdev.phys_port_cnt = dev->rdev.port_info.nports;
+ dev->ibdev.phys_port_cnt = sc->params.nports;
dev->ibdev.num_comp_vectors = 1;
- dev->ibdev.dma_device = dev->rdev.rnic_info.pdev;
+ dev->ibdev.dma_device = dev->rdev.adap->dev;
dev->ibdev.query_device = iwch_query_device;
dev->ibdev.query_port = iwch_query_port;
dev->ibdev.modify_port = iwch_modify_port;
@@ -1199,9 +1092,7 @@ int iwch_register_device(struct iwch_dev *dev)
dev->ibdev.query_gid = iwch_query_gid;
dev->ibdev.alloc_ucontext = iwch_alloc_ucontext;
dev->ibdev.dealloc_ucontext = iwch_dealloc_ucontext;
-#ifdef notyet
dev->ibdev.mmap = iwch_mmap;
-#endif
dev->ibdev.alloc_pd = iwch_allocate_pd;
dev->ibdev.dealloc_pd = iwch_deallocate_pd;
dev->ibdev.create_ah = iwch_ah_create;
@@ -1229,11 +1120,13 @@ int iwch_register_device(struct iwch_dev *dev)
dev->ibdev.req_notify_cq = iwch_arm_cq;
dev->ibdev.post_send = iwch_post_send;
dev->ibdev.post_recv = iwch_post_receive;
-
+ dev->ibdev.uverbs_abi_ver = IWCH_UVERBS_ABI_VERSION;
dev->ibdev.iwcm =
- (struct iw_cm_verbs *) kmalloc(sizeof(struct iw_cm_verbs),
- M_NOWAIT);
+ kmalloc(sizeof(struct iw_cm_verbs), M_NOWAIT);
+ if (!dev->ibdev.iwcm)
+ return (ENOMEM);
+
dev->ibdev.iwcm->connect = iwch_connect;
dev->ibdev.iwcm->accept = iwch_accept_cr;
dev->ibdev.iwcm->reject = iwch_reject_cr;
@@ -1246,35 +1139,19 @@ int iwch_register_device(struct iwch_dev *dev)
ret = ib_register_device(&dev->ibdev);
if (ret)
goto bail1;
-#ifdef notyet
- for (i = 0; i < ARRAY_SIZE(iwch_class_attributes); ++i) {
- ret = class_device_create_file(&dev->ibdev.class_dev,
- iwch_class_attributes[i]);
- if (ret) {
- goto bail2;
- }
- }
-#endif
- return 0;
-#ifdef notyet
-bail2:
-#endif
- ib_unregister_device(&dev->ibdev);
+
+ return (0);
+
bail1:
- return ret;
+ cxfree(dev->ibdev.iwcm);
+ return (ret);
}
void iwch_unregister_device(struct iwch_dev *dev)
{
-#ifdef notyet
- int i;
- CTR2(KTR_IW_CXGB, "%s iwch_dev %p", __FUNCTION__, dev);
-
- for (i = 0; i < ARRAY_SIZE(iwch_class_attributes); ++i)
- class_device_remove_file(&dev->ibdev.class_dev,
- iwch_class_attributes[i]);
-#endif
ib_unregister_device(&dev->ibdev);
+ cxfree(dev->ibdev.iwcm);
return;
}
+#endif
diff --git a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.h b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.h
index c857ce8..2e012fd 100644
--- a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.h
+++ b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.h
@@ -31,7 +31,7 @@ $FreeBSD$
#ifndef __IWCH_PROVIDER_H__
#define __IWCH_PROVIDER_H__
-#include <contrib/rdma/ib_verbs.h>
+#include <rdma/ib_verbs.h>
struct iwch_pd {
struct ib_pd ibpd;
@@ -116,6 +116,7 @@ enum IWCH_QP_FLAGS {
};
struct iwch_mpa_attributes {
+ u8 initiator;
u8 recv_marker_enabled;
u8 xmit_marker_enabled; /* iWARP: enable inbound Read Resp. */
u8 crc_enabled;
@@ -336,18 +337,17 @@ int iwch_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg);
int iwch_register_device(struct iwch_dev *dev);
void iwch_unregister_device(struct iwch_dev *dev);
-int iwch_quiesce_qps(struct iwch_cq *chp);
-int iwch_resume_qps(struct iwch_cq *chp);
void stop_read_rep_timer(struct iwch_qp *qhp);
int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php,
struct iwch_mr *mhp,
- int shift,
- __be64 *page_list);
+ int shift);
int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php,
struct iwch_mr *mhp,
int shift,
- __be64 *page_list,
int npages);
+int iwch_alloc_pbl(struct iwch_mr *mhp, int npages);
+void iwch_free_pbl(struct iwch_mr *mhp);
+int iwch_write_pbl(struct iwch_mr *mhp, __be64 *pages, int npages, int offset);
int build_phys_page_list(struct ib_phys_buf *buffer_list,
int num_phys_buf,
u64 *iova_start,
diff --git a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_qp.c b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_qp.c
index fd51498..7734fd0 100644
--- a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_qp.c
+++ b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_qp.c
@@ -1,4 +1,3 @@
-
/**************************************************************************
Copyright (c) 2007, Chelsio Inc.
@@ -30,11 +29,13 @@ POSSIBILITY OF SUCH DAMAGE.
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#include "opt_inet.h"
+
+#ifdef TCP_OFFLOAD
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/bus.h>
-#include <sys/module.h>
#include <sys/pciio.h>
#include <sys/conf.h>
#include <machine/bus.h>
@@ -48,6 +49,7 @@ __FBSDID("$FreeBSD$");
#include <sys/linker.h>
#include <sys/firmware.h>
#include <sys/socket.h>
+#include <sys/socketvar.h>
#include <sys/sockio.h>
#include <sys/smp.h>
#include <sys/sysctl.h>
@@ -57,14 +59,26 @@ __FBSDID("$FreeBSD$");
#include <sys/proc.h>
#include <sys/queue.h>
+#include <net/route.h>
+#include <netinet/in_systm.h>
#include <netinet/in.h>
-
-#include <contrib/rdma/ib_verbs.h>
-#include <contrib/rdma/ib_umem.h>
-#include <contrib/rdma/ib_user_verbs.h>
+#include <netinet/toecore.h>
+#include <netinet/in_pcb.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet/tcp_var.h>
+#include <netinet/tcp.h>
+#include <netinet/tcpip.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_umem.h>
+#include <rdma/ib_user_verbs.h>
+#include <linux/idr.h>
+#include <ulp/iw_cxgb/iw_cxgb_ib_intfc.h>
#include <cxgb_include.h>
#include <ulp/tom/cxgb_l2t.h>
+#include <ulp/tom/cxgb_toepcb.h>
#include <ulp/iw_cxgb/iw_cxgb_wr.h>
#include <ulp/iw_cxgb/iw_cxgb_hal.h>
#include <ulp/iw_cxgb/iw_cxgb_provider.h>
@@ -75,7 +89,7 @@ __FBSDID("$FreeBSD$");
#define NO_SUPPORT -1
-static int iwch_build_rdma_send(union t3_wr *wqe, struct ib_send_wr *wr,
+static int build_rdma_send(union t3_wr *wqe, struct ib_send_wr *wr,
u8 * flit_cnt)
{
int i;
@@ -83,59 +97,46 @@ static int iwch_build_rdma_send(union t3_wr *wqe, struct ib_send_wr *wr,
switch (wr->opcode) {
case IB_WR_SEND:
- case IB_WR_SEND_WITH_IMM:
if (wr->send_flags & IB_SEND_SOLICITED)
wqe->send.rdmaop = T3_SEND_WITH_SE;
else
wqe->send.rdmaop = T3_SEND;
wqe->send.rem_stag = 0;
break;
-#if 0 /* Not currently supported */
- case TYPE_SEND_INVALIDATE:
- case TYPE_SEND_INVALIDATE_IMMEDIATE:
- wqe->send.rdmaop = T3_SEND_WITH_INV;
- wqe->send.rem_stag = htobe32(wr->wr.rdma.rkey);
- break;
- case TYPE_SEND_SE_INVALIDATE:
- wqe->send.rdmaop = T3_SEND_WITH_SE_INV;
- wqe->send.rem_stag = htobe32(wr->wr.rdma.rkey);
+ case IB_WR_SEND_WITH_IMM:
+ if (wr->send_flags & IB_SEND_SOLICITED)
+ wqe->send.rdmaop = T3_SEND_WITH_SE_INV;
+ else
+ wqe->send.rdmaop = T3_SEND_WITH_INV;
+ wqe->send.rem_stag = 0;
break;
-#endif
default:
- break;
+ return -EINVAL;
}
if (wr->num_sge > T3_MAX_SGE)
return (-EINVAL);
wqe->send.reserved[0] = 0;
wqe->send.reserved[1] = 0;
wqe->send.reserved[2] = 0;
- if (wr->opcode == IB_WR_SEND_WITH_IMM) {
- plen = 4;
- wqe->send.sgl[0].stag = wr->imm_data;
- wqe->send.sgl[0].len = 0;
- wqe->send.num_sgle = 0;
- *flit_cnt = 5;
- } else {
- plen = 0;
- for (i = 0; i < wr->num_sge; i++) {
- if ((plen + wr->sg_list[i].length) < plen) {
- return (-EMSGSIZE);
- }
- plen += wr->sg_list[i].length;
- wqe->send.sgl[i].stag =
- htobe32(wr->sg_list[i].lkey);
- wqe->send.sgl[i].len =
- htobe32(wr->sg_list[i].length);
- wqe->send.sgl[i].to = htobe64(wr->sg_list[i].addr);
+ plen = 0;
+ for (i = 0; i < wr->num_sge; i++) {
+ if ((plen + wr->sg_list[i].length) < plen) {
+ return (-EMSGSIZE);
}
- wqe->send.num_sgle = htobe32(wr->num_sge);
- *flit_cnt = 4 + ((wr->num_sge) << 1);
+ plen += wr->sg_list[i].length;
+ wqe->send.sgl[i].stag =
+ htobe32(wr->sg_list[i].lkey);
+ wqe->send.sgl[i].len =
+ htobe32(wr->sg_list[i].length);
+ wqe->send.sgl[i].to = htobe64(wr->sg_list[i].addr);
}
+ wqe->send.num_sgle = htobe32(wr->num_sge);
+ *flit_cnt = 4 + ((wr->num_sge) << 1);
wqe->send.plen = htobe32(plen);
return 0;
}
-static int iwch_build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr,
+static int build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr,
u8 *flit_cnt)
{
int i;
@@ -152,7 +153,7 @@ static int iwch_build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr,
if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) {
plen = 4;
- wqe->write.sgl[0].stag = wr->imm_data;
+ wqe->write.sgl[0].stag = wr->ex.imm_data;
wqe->write.sgl[0].len = 0;
wqe->write.num_sgle = 0;
*flit_cnt = 6;
@@ -177,7 +178,7 @@ static int iwch_build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr,
return 0;
}
-static int iwch_build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr,
+static int build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr,
u8 *flit_cnt)
{
if (wr->num_sge > 1)
@@ -195,15 +196,12 @@ static int iwch_build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr,
return 0;
}
-/*
- * TBD: this is going to be moved to firmware. Missing pdid/qpid check for now.
- */
static int iwch_sgl2pbl_map(struct iwch_dev *rhp, struct ib_sge *sg_list,
u32 num_sgle, u32 * pbl_addr, u8 * page_size)
{
int i;
struct iwch_mr *mhp;
- u32 offset;
+ u64 offset;
for (i = 0; i < num_sgle; i++) {
mhp = get_mhp(rhp, (sg_list[i].lkey) >> 8);
@@ -235,8 +233,8 @@ static int iwch_sgl2pbl_map(struct iwch_dev *rhp, struct ib_sge *sg_list,
return (-EINVAL);
}
offset = sg_list[i].addr - mhp->attr.va_fbo;
- offset += ((u32) mhp->attr.va_fbo) %
- (1UL << (12 + mhp->attr.page_size));
+ offset += mhp->attr.va_fbo &
+ ((1UL << (12 + mhp->attr.page_size)) - 1);
pbl_addr[i] = ((mhp->attr.pbl_addr -
rhp->rdev.rnic_info.pbl_base) >> 3) +
(offset >> (12 + mhp->attr.page_size));
@@ -245,26 +243,113 @@ static int iwch_sgl2pbl_map(struct iwch_dev *rhp, struct ib_sge *sg_list,
return 0;
}
-static int iwch_build_rdma_recv(struct iwch_dev *rhp, union t3_wr *wqe,
+static int build_rdma_recv(struct iwch_qp *qhp, union t3_wr *wqe,
struct ib_recv_wr *wr)
{
- int i;
- if (wr->num_sge > T3_MAX_SGE)
+ int i, err = 0;
+ u32 pbl_addr[T3_MAX_SGE];
+ u8 page_size[T3_MAX_SGE];
+
+ if (wr->num_sge > T3_MAX_SGE)
return (-EINVAL);
+
+
+ err = iwch_sgl2pbl_map(qhp->rhp, wr->sg_list, wr->num_sge, pbl_addr,
+ page_size);
+ if (err)
+ return err;
+ wqe->recv.pagesz[0] = page_size[0];
+ wqe->recv.pagesz[1] = page_size[1];
+ wqe->recv.pagesz[2] = page_size[2];
+ wqe->recv.pagesz[3] = page_size[3];
wqe->recv.num_sgle = htobe32(wr->num_sge);
+
for (i = 0; i < wr->num_sge; i++) {
wqe->recv.sgl[i].stag = htobe32(wr->sg_list[i].lkey);
wqe->recv.sgl[i].len = htobe32(wr->sg_list[i].length);
- wqe->recv.sgl[i].to = htobe64(wr->sg_list[i].addr);
+ wqe->recv.sgl[i].to = htobe64(((u32)wr->sg_list[i].addr) &
+ ((1UL << (12 + page_size[i])) - 1));
+ /* pbl_addr is the adapters address in the PBL */
+ wqe->recv.pbl_addr[i] = cpu_to_be32(pbl_addr[i]);
}
for (; i < T3_MAX_SGE; i++) {
wqe->recv.sgl[i].stag = 0;
wqe->recv.sgl[i].len = 0;
wqe->recv.sgl[i].to = 0;
+ wqe->recv.pbl_addr[i] = 0;
}
+
+ qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr,
+ qhp->wq.rq_size_log2)].wr_id = wr->wr_id;
+ qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr,
+ qhp->wq.rq_size_log2)].pbl_addr = 0;
+
return 0;
}
+static int build_zero_stag_recv(struct iwch_qp *qhp, union t3_wr *wqe,
+ struct ib_recv_wr *wr)
+{
+ int i;
+ u32 pbl_addr;
+ u32 pbl_offset;
+
+
+ /*
+ * The T3 HW requires the PBL in the HW recv descriptor to reference
+ * a PBL entry. So we allocate the max needed PBL memory here and pass
+ * it to the uP in the recv WR. The uP will build the PBL and setup
+ * the HW recv descriptor.
+ */
+ pbl_addr = cxio_hal_pblpool_alloc(&qhp->rhp->rdev, T3_STAG0_PBL_SIZE);
+ if (!pbl_addr)
+ return -ENOMEM;
+
+ /*
+ * Compute the 8B aligned offset.
+ */
+ pbl_offset = (pbl_addr - qhp->rhp->rdev.rnic_info.pbl_base) >> 3;
+
+ wqe->recv.num_sgle = cpu_to_be32(wr->num_sge);
+
+ for (i = 0; i < wr->num_sge; i++) {
+
+ /*
+ * Use a 128MB page size. This and an imposed 128MB
+ * sge length limit allows us to require only a 2-entry HW
+ * PBL for each SGE. This restriction is acceptable since
+ * since it is not possible to allocate 128MB of contiguous
+ * DMA coherent memory!
+ */
+ if (wr->sg_list[i].length > T3_STAG0_MAX_PBE_LEN)
+ return -EINVAL;
+ wqe->recv.pagesz[i] = T3_STAG0_PAGE_SHIFT;
+
+ /*
+ * T3 restricts a recv to all zero-stag or all non-zero-stag.
+ */
+ if (wr->sg_list[i].lkey != 0)
+ return -EINVAL;
+ wqe->recv.sgl[i].stag = 0;
+ wqe->recv.sgl[i].len = htobe32(wr->sg_list[i].length);
+ wqe->recv.sgl[i].to = htobe64(wr->sg_list[i].addr);
+ wqe->recv.pbl_addr[i] = htobe32(pbl_offset);
+ pbl_offset += 2;
+ }
+ for (; i < T3_MAX_SGE; i++) {
+ wqe->recv.pagesz[i] = 0;
+ wqe->recv.sgl[i].stag = 0;
+ wqe->recv.sgl[i].len = 0;
+ wqe->recv.sgl[i].to = 0;
+ wqe->recv.pbl_addr[i] = 0;
+ }
+ qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr,
+ qhp->wq.rq_size_log2)].wr_id = wr->wr_id;
+ qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr,
+ qhp->wq.rq_size_log2)].pbl_addr = pbl_addr;
+ return 0;
+}
+
int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
struct ib_send_wr **bad_wr)
{
@@ -282,18 +367,19 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
mtx_lock(&qhp->lock);
if (qhp->attr.state > IWCH_QP_STATE_RTS) {
mtx_unlock(&qhp->lock);
- return (-EINVAL);
+ err = -EINVAL;
+ goto out;
}
num_wrs = Q_FREECNT(qhp->wq.sq_rptr, qhp->wq.sq_wptr,
qhp->wq.sq_size_log2);
- if (num_wrs <= 0) {
+ if (num_wrs == 0) {
mtx_unlock(&qhp->lock);
- return (-ENOMEM);
+ err = -EINVAL;
+ goto out;
}
while (wr) {
if (num_wrs == 0) {
err = -ENOMEM;
- *bad_wr = wr;
break;
}
idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
@@ -311,17 +397,17 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
case IB_WR_SEND:
case IB_WR_SEND_WITH_IMM:
t3_wr_opcode = T3_WR_SEND;
- err = iwch_build_rdma_send(wqe, wr, &t3_wr_flit_cnt);
+ err = build_rdma_send(wqe, wr, &t3_wr_flit_cnt);
break;
case IB_WR_RDMA_WRITE:
case IB_WR_RDMA_WRITE_WITH_IMM:
t3_wr_opcode = T3_WR_WRITE;
- err = iwch_build_rdma_write(wqe, wr, &t3_wr_flit_cnt);
+ err = build_rdma_write(wqe, wr, &t3_wr_flit_cnt);
break;
case IB_WR_RDMA_READ:
t3_wr_opcode = T3_WR_READ;
t3_wr_flags = 0; /* T3 reads are always signaled */
- err = iwch_build_rdma_read(wqe, wr, &t3_wr_flit_cnt);
+ err = build_rdma_read(wqe, wr, &t3_wr_flit_cnt);
if (err)
break;
sqp->read_len = wqe->read.local_len;
@@ -333,10 +419,9 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
wr->opcode);
err = -EINVAL;
}
- if (err) {
- *bad_wr = wr;
+ if (err)
break;
- }
+
wqe->send.wrid.id0.hi = qhp->wq.sq_wptr;
sqp->wr_id = wr->wr_id;
sqp->opcode = wr2opcode(t3_wr_opcode);
@@ -358,6 +443,9 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
}
mtx_unlock(&qhp->lock);
ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
+out:
+ if (err)
+ *bad_wr = wr;
return err;
}
@@ -374,27 +462,35 @@ int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
mtx_lock(&qhp->lock);
if (qhp->attr.state > IWCH_QP_STATE_RTS) {
mtx_unlock(&qhp->lock);
- return (-EINVAL);
+ err = -EINVAL;
+ goto out;
}
num_wrs = Q_FREECNT(qhp->wq.rq_rptr, qhp->wq.rq_wptr,
qhp->wq.rq_size_log2) - 1;
if (!wr) {
mtx_unlock(&qhp->lock);
- return (-EINVAL);
+ err = -EINVAL;
+ goto out;
}
+
while (wr) {
+ if (wr->num_sge > T3_MAX_SGE) {
+ err = -EINVAL;
+ break;
+ }
+
idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
wqe = (union t3_wr *) (qhp->wq.queue + idx);
- if (num_wrs)
- err = iwch_build_rdma_recv(qhp->rhp, wqe, wr);
- else
+ if (num_wrs) {
+ if (wr->sg_list[0].lkey)
+ err = build_rdma_recv(qhp, wqe, wr);
+ else
+ err = build_zero_stag_recv(qhp, wqe, wr);
+ } else
err = -ENOMEM;
- if (err) {
- *bad_wr = wr;
+ if (err)
break;
- }
- qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr, qhp->wq.rq_size_log2)] =
- wr->wr_id;
+
build_fw_riwrh((void *) wqe, T3_WR_RCV, T3_COMPLETION_FLAG,
Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2),
0, sizeof(struct t3_receive_wr) >> 3);
@@ -408,6 +504,9 @@ int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
}
mtx_unlock(&qhp->lock);
ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
+out:
+ if (err)
+ *bad_wr = wr;
return err;
}
@@ -439,7 +538,7 @@ int iwch_bind_mw(struct ib_qp *qp,
}
num_wrs = Q_FREECNT(qhp->wq.sq_rptr, qhp->wq.sq_wptr,
qhp->wq.sq_size_log2);
- if ((num_wrs) <= 0) {
+ if ((num_wrs) == 0) {
mtx_unlock(&qhp->lock);
return (-ENOMEM);
}
@@ -491,7 +590,7 @@ int iwch_bind_mw(struct ib_qp *qp,
return err;
}
-static inline void build_term_codes(struct respQ_msg_t *rsp_msg,
+static void build_term_codes(struct respQ_msg_t *rsp_msg,
u8 *layer_type, u8 *ecode)
{
int status = TPT_ERR_INTERNAL_ERR;
@@ -631,15 +730,18 @@ int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg)
union t3_wr *wqe;
struct terminate_message *term;
struct mbuf *m;
+ struct ofld_hdr *oh;
- CTR2(KTR_IW_CXGB, "%s %d", __FUNCTION__, __LINE__);
- m = m_gethdr(MT_DATA, M_NOWAIT);
- if (!m) {
+ CTR3(KTR_IW_CXGB, "%s: tid %u, %p", __func__, qhp->ep->hwtid, rsp_msg);
+ m = m_gethdr(M_NOWAIT, MT_DATA);
+ if (m == NULL) {
log(LOG_ERR, "%s cannot send TERMINATE!\n", __FUNCTION__);
return (-ENOMEM);
}
- wqe = mtod(m, union t3_wr *);
- m->m_len = m->m_pkthdr.len = 40;
+ oh = mtod(m, struct ofld_hdr *);
+ m->m_pkthdr.len = m->m_len = sizeof(*oh) + 40;
+ oh->flags = V_HDR_NDESC(1) | V_HDR_CTRL(CPL_PRIORITY_DATA) | V_HDR_QSET(0);
+ wqe = (void *)(oh + 1);
memset(wqe, 0, 40);
wqe->send.rdmaop = T3_TERMINATE;
@@ -653,22 +755,17 @@ int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg)
V_FW_RIWR_FLAGS(T3_COMPLETION_FLAG | T3_NOTIFY_FLAG));
wqe->send.wrh.gen_tid_len = htobe32(V_FW_RIWR_TID(qhp->ep->hwtid));
- m_set_priority(m, CPL_PRIORITY_DATA);
- m_set_sgl(m, NULL);
- m_set_sgllen(m, 0);
- return cxgb_ofld_send(qhp->rhp->rdev.t3cdev_p, m);
+ return t3_offload_tx(qhp->rhp->rdev.adap, m);
}
/*
* Assumes qhp lock is held.
*/
-static void __flush_qp(struct iwch_qp *qhp)
+static void __flush_qp(struct iwch_qp *qhp, struct iwch_cq *rchp,
+ struct iwch_cq *schp)
{
- struct iwch_cq *rchp, *schp;
int count;
-
- rchp = get_chp(qhp->rhp, qhp->attr.rcq);
- schp = get_chp(qhp->rhp, qhp->attr.scq);
+ int flushed;
CTR4(KTR_IW_CXGB, "%s qhp %p rchp %p schp %p", __FUNCTION__, qhp, rchp, schp);
/* take a ref on the qhp since we must release the lock */
@@ -680,20 +777,22 @@ static void __flush_qp(struct iwch_qp *qhp)
mtx_lock(&qhp->lock);
cxio_flush_hw_cq(&rchp->cq);
cxio_count_rcqes(&rchp->cq, &qhp->wq, &count);
- cxio_flush_rq(&qhp->wq, &rchp->cq, count);
+ flushed = cxio_flush_rq(&qhp->wq, &rchp->cq, count);
mtx_unlock(&qhp->lock);
mtx_unlock(&rchp->lock);
- (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
+ if (flushed)
+ (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
/* locking hierarchy: cq lock first, then qp lock. */
mtx_lock(&schp->lock);
mtx_lock(&qhp->lock);
cxio_flush_hw_cq(&schp->cq);
cxio_count_scqes(&schp->cq, &qhp->wq, &count);
- cxio_flush_sq(&qhp->wq, &schp->cq, count);
+ flushed = cxio_flush_sq(&qhp->wq, &schp->cq, count);
mtx_unlock(&qhp->lock);
mtx_unlock(&schp->lock);
- (*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context);
+ if (flushed)
+ (*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context);
/* deref */
mtx_lock(&qhp->lock);
@@ -703,10 +802,23 @@ static void __flush_qp(struct iwch_qp *qhp)
static void flush_qp(struct iwch_qp *qhp)
{
- if (qhp->ibqp.uobject)
+ struct iwch_cq *rchp, *schp;
+
+ rchp = get_chp(qhp->rhp, qhp->attr.rcq);
+ schp = get_chp(qhp->rhp, qhp->attr.scq);
+
+ if (qhp->ibqp.uobject) {
cxio_set_wq_in_error(&qhp->wq);
- else
- __flush_qp(qhp);
+ cxio_set_cq_in_error(&rchp->cq);
+ (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
+ if (schp != rchp) {
+ cxio_set_cq_in_error(&schp->cq);
+ (*schp->ibcq.comp_handler)(&schp->ibcq,
+ schp->ibcq.cq_context);
+ }
+ return;
+ }
+ __flush_qp(qhp, rchp, schp);
}
@@ -715,7 +827,13 @@ static void flush_qp(struct iwch_qp *qhp)
*/
static int rqes_posted(struct iwch_qp *qhp)
{
- return fw_riwrh_opcode((struct fw_riwrh *)qhp->wq.queue) == T3_WR_RCV;
+ union t3_wr *wqe = qhp->wq.queue;
+ u16 count = 0;
+ while ((count+1) != 0 && fw_riwrh_opcode((struct fw_riwrh *)wqe) == T3_WR_RCV) {
+ count++;
+ wqe++;
+ }
+ return count;
}
static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp,
@@ -724,6 +842,10 @@ static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp,
{
struct t3_rdma_init_attr init_attr;
int ret;
+ struct socket *so = qhp->ep->com.so;
+ struct inpcb *inp = sotoinpcb(so);
+ struct tcpcb *tp;
+ struct toepcb *toep;
init_attr.tid = qhp->ep->hwtid;
init_attr.qpid = qhp->wq.qpid;
@@ -737,32 +859,28 @@ static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp,
(qhp->attr.mpa_attr.xmit_marker_enabled << 1) |
(qhp->attr.mpa_attr.crc_enabled << 2);
- /*
- * XXX - The IWCM doesn't quite handle getting these
- * attrs set before going into RTS. For now, just turn
- * them on always...
- */
-#if 0
- init_attr.qpcaps = qhp->attr.enableRdmaRead |
- (qhp->attr.enableRdmaWrite << 1) |
- (qhp->attr.enableBind << 2) |
- (qhp->attr.enable_stag0_fastreg << 3) |
- (qhp->attr.enable_stag0_fastreg << 4);
-#else
- init_attr.qpcaps = 0x1f;
-#endif
+ init_attr.qpcaps = uP_RI_QP_RDMA_READ_ENABLE |
+ uP_RI_QP_RDMA_WRITE_ENABLE |
+ uP_RI_QP_BIND_ENABLE;
+ if (!qhp->ibqp.uobject)
+ init_attr.qpcaps |= uP_RI_QP_STAG0_ENABLE;
init_attr.tcp_emss = qhp->ep->emss;
init_attr.ord = qhp->attr.max_ord;
init_attr.ird = qhp->attr.max_ird;
init_attr.qp_dma_addr = qhp->wq.dma_addr;
init_attr.qp_dma_size = (1UL << qhp->wq.size_log2);
- init_attr.flags = rqes_posted(qhp) ? RECVS_POSTED : 0;
+ init_attr.rqe_count = rqes_posted(qhp);
+ init_attr.flags = qhp->attr.mpa_attr.initiator ? MPA_INITIATOR : 0;
+ init_attr.rtr_type = 0;
+ tp = intotcpcb(inp);
+ toep = tp->t_toe;
+ init_attr.chan = toep->tp_l2t->smt_idx;
init_attr.irs = qhp->ep->rcv_seq;
CTR5(KTR_IW_CXGB, "%s init_attr.rq_addr 0x%x init_attr.rq_size = %d "
"flags 0x%x qpcaps 0x%x", __FUNCTION__,
init_attr.rq_addr, init_attr.rq_size,
init_attr.flags, init_attr.qpcaps);
- ret = cxio_rdma_init(&rhp->rdev, &init_attr);
+ ret = cxio_rdma_init(&rhp->rdev, &init_attr, qhp->ep->com.so);
CTR2(KTR_IW_CXGB, "%s ret %d", __FUNCTION__, ret);
return ret;
}
@@ -870,8 +988,8 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp,
abort=0;
disconnect = 1;
ep = qhp->ep;
+ get_ep(&ep->com);
}
- flush_qp(qhp);
break;
case IWCH_QP_STATE_TERMINATE:
qhp->attr.state = IWCH_QP_STATE_TERMINATE;
@@ -886,6 +1004,7 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp,
abort=1;
disconnect = 1;
ep = qhp->ep;
+ get_ep(&ep->com);
}
goto err;
break;
@@ -901,6 +1020,7 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp,
}
switch (attrs->next_state) {
case IWCH_QP_STATE_IDLE:
+ flush_qp(qhp);
qhp->attr.state = IWCH_QP_STATE_IDLE;
qhp->attr.llp_stream_handle = NULL;
put_ep(&qhp->ep->com);
@@ -908,7 +1028,6 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp,
wakeup(qhp);
break;
case IWCH_QP_STATE_ERROR:
- disconnect=1;
goto err;
default:
ret = -EINVAL;
@@ -960,81 +1079,29 @@ err:
out:
mtx_unlock(&qhp->lock);
- if (terminate)
+ if (terminate)
iwch_post_terminate(qhp, NULL);
+
/*
* If disconnect is 1, then we need to initiate a disconnect
* on the EP. This can be a normal close (RTS->CLOSING) or
* an abnormal close (RTS/CLOSING->ERROR).
*/
- if (disconnect)
+ if (disconnect) {
iwch_ep_disconnect(ep, abort, M_NOWAIT);
-
+ put_ep(&ep->com);
+ }
+
/*
* If free is 1, then we've disassociated the EP from the QP
* and we need to dereference the EP.
*/
- if (free)
+ if (free)
put_ep(&ep->com);
+
CTR2(KTR_IW_CXGB, "%s exit state %d", __FUNCTION__, qhp->attr.state);
return ret;
}
-
-static int quiesce_qp(struct iwch_qp *qhp)
-{
- mtx_lock(&qhp->lock);
- iwch_quiesce_tid(qhp->ep);
- qhp->flags |= QP_QUIESCED;
- mtx_unlock(&qhp->lock);
- return 0;
-}
-
-static int resume_qp(struct iwch_qp *qhp)
-{
- mtx_lock(&qhp->lock);
- iwch_resume_tid(qhp->ep);
- qhp->flags &= ~QP_QUIESCED;
- mtx_lock(&qhp->lock);
- return 0;
-}
-
-int iwch_quiesce_qps(struct iwch_cq *chp)
-{
- int i;
- struct iwch_qp *qhp;
-
- for (i=0; i < T3_MAX_NUM_QP; i++) {
- qhp = get_qhp(chp->rhp, i);
- if (!qhp)
- continue;
- if ((qhp->attr.rcq == chp->cq.cqid) && !qp_quiesced(qhp)) {
- quiesce_qp(qhp);
- continue;
- }
- if ((qhp->attr.scq == chp->cq.cqid) && !qp_quiesced(qhp))
- quiesce_qp(qhp);
- }
- return 0;
-}
-
-int iwch_resume_qps(struct iwch_cq *chp)
-{
- int i;
- struct iwch_qp *qhp;
-
- for (i=0; i < T3_MAX_NUM_QP; i++) {
- qhp = get_qhp(chp->rhp, i);
- if (!qhp)
- continue;
- if ((qhp->attr.rcq == chp->cq.cqid) && qp_quiesced(qhp)) {
- resume_qp(qhp);
- continue;
- }
- if ((qhp->attr.scq == chp->cq.cqid) && qp_quiesced(qhp))
- resume_qp(qhp);
- }
- return 0;
-}
-
+#endif
diff --git a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_resource.c b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_resource.c
index 62ffef5..a99bf66 100644
--- a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_resource.c
+++ b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_resource.c
@@ -29,11 +29,13 @@ POSSIBILITY OF SUCH DAMAGE.
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#include "opt_inet.h"
+
+#ifdef TCP_OFFLOAD
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/bus.h>
-#include <sys/module.h>
#include <sys/pciio.h>
#include <sys/conf.h>
#include <machine/bus.h>
@@ -59,9 +61,11 @@ __FBSDID("$FreeBSD$");
#include <netinet/in.h>
-#include <contrib/rdma/ib_verbs.h>
-#include <contrib/rdma/ib_umem.h>
-#include <contrib/rdma/ib_user_verbs.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_umem.h>
+#include <rdma/ib_user_verbs.h>
+#include <linux/idr.h>
+#include <ulp/iw_cxgb/iw_cxgb_ib_intfc.h>
#include <cxgb_include.h>
#include <ulp/iw_cxgb/iw_cxgb_wr.h>
@@ -369,3 +373,4 @@ void cxio_hal_rqtpool_destroy(struct cxio_rdev *rdev_p)
{
gen_pool_destroy(rdev_p->rqt_pool);
}
+#endif
diff --git a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_user.h b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_user.h
index 3086a63..55a1006 100644
--- a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_user.h
+++ b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_user.h
@@ -47,10 +47,18 @@ struct iwch_create_cq_req {
uint64_t user_rptr_addr;
};
+struct iwch_create_cq_resp_v0 {
+ __u64 key;
+ __u32 cqid;
+ __u32 size_log2;
+};
+
struct iwch_create_cq_resp {
uint64_t key;
uint32_t cqid;
uint32_t size_log2;
+ __u32 memsize;
+ __u32 reserved;
};
struct iwch_create_qp_resp {
diff --git a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_wr.h b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_wr.h
index bf8f2d6..8718aa0 100644
--- a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_wr.h
+++ b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_wr.h
@@ -32,6 +32,9 @@ $FreeBSD$
#define __CXIO_WR_H__
#define T3_MAX_SGE 4
#define T3_MAX_INLINE 64
+#define T3_STAG0_PBL_SIZE (2 * T3_MAX_SGE << 3)
+#define T3_STAG0_MAX_PBE_LEN (128 * 1024 * 1024)
+#define T3_STAG0_PAGE_SHIFT 15
#define Q_EMPTY(rptr,wptr) ((rptr)==(wptr))
#define Q_FULL(rptr,wptr,size_log2) ( (((wptr)-(rptr))>>(size_log2)) && \
@@ -272,6 +275,22 @@ enum t3_qp_caps {
uP_RI_QP_STAG0_ENABLE = 0x10
} __attribute__ ((packed));
+enum rdma_init_rtr_types {
+ RTR_READ = 1,
+ RTR_WRITE = 2,
+ RTR_SEND = 3,
+};
+
+#define S_RTR_TYPE 2
+#define M_RTR_TYPE 0x3
+#define V_RTR_TYPE(x) ((x) << S_RTR_TYPE)
+#define G_RTR_TYPE(x) ((((x) >> S_RTR_TYPE)) & M_RTR_TYPE)
+
+#define S_CHAN 4
+#define M_CHAN 0x3
+#define V_CHAN(x) ((x) << S_CHAN)
+#define G_CHAN(x) ((((x) >> S_CHAN)) & M_CHAN)
+
struct t3_rdma_init_attr {
u32 tid;
u32 qpid;
@@ -287,8 +306,11 @@ struct t3_rdma_init_attr {
u32 ird;
u64 qp_dma_addr;
u32 qp_dma_size;
- u32 flags;
+ enum rdma_init_rtr_types rtr_type;
+ u16 flags;
+ u16 rqe_count;
u32 irs;
+ u32 chan;
};
struct t3_rdma_init_wr {
@@ -303,13 +325,13 @@ struct t3_rdma_init_wr {
u8 mpaattrs; /* 5 */
u8 qpcaps;
__be16 ulpdu_size;
- __be32 flags; /* bits 31-1 - reservered */
- /* bit 0 - set if RECV posted */
+ __be16 flags_rtr_type;
+ __be16 rqe_count;
__be32 ord; /* 6 */
__be32 ird;
__be64 qp_dma_addr; /* 7 */
__be32 qp_dma_size; /* 8 */
- u32 irs;
+ __be32 irs;
};
struct t3_genbit {
@@ -318,7 +340,8 @@ struct t3_genbit {
};
enum rdma_init_wr_flags {
- RECVS_POSTED = 1,
+ MPA_INITIATOR = (1<<0),
+ PRIV_QP = (1<<1),
};
union t3_wr {
@@ -531,6 +554,12 @@ struct t3_cqe {
#define CQE_STATUS(x) (G_CQE_STATUS(be32toh((x).header)))
#define CQE_OPCODE(x) (G_CQE_OPCODE(be32toh((x).header)))
+#define CQE_SEND_OPCODE(x)( \
+ (G_CQE_OPCODE(be32_to_cpu((x).header)) == T3_SEND) || \
+ (G_CQE_OPCODE(be32_to_cpu((x).header)) == T3_SEND_WITH_SE) || \
+ (G_CQE_OPCODE(be32_to_cpu((x).header)) == T3_SEND_WITH_INV) || \
+ (G_CQE_OPCODE(be32_to_cpu((x).header)) == T3_SEND_WITH_SE_INV))
+
#define CQE_LEN(x) (be32toh((x).len))
/* used for RQ completion processing */
@@ -589,21 +618,23 @@ struct t3_swsq {
uint64_t wr_id;
struct t3_cqe cqe;
uint32_t sq_wptr;
- uint32_t read_len;
+ __be32 read_len;
int opcode;
int complete;
int signaled;
};
+struct t3_swrq {
+ __u64 wr_id;
+ __u32 pbl_addr;
+};
+
/*
* A T3 WQ implements both the SQ and RQ.
*/
struct t3_wq {
union t3_wr *queue; /* DMA accessable memory */
bus_addr_t dma_addr; /* DMA address for HW */
-#ifdef notyet
- DECLARE_PCI_UNMAP_ADDR(mapping) /* unmap kruft */
-#endif
u32 error; /* 1 once we go to ERROR */
u32 qpid;
u32 wptr; /* idx to next available WR slot */
@@ -613,14 +644,15 @@ struct t3_wq {
u32 sq_wptr; /* sq_wptr - sq_rptr == count of */
u32 sq_rptr; /* pending wrs */
u32 sq_size_log2; /* sq size */
- u64 *rq; /* SW RQ (holds consumer wr_ids */
+ struct t3_swrq *rq; /* SW RQ (holds consumer wr_ids */
u32 rq_wptr; /* rq_wptr - rq_rptr == count of */
u32 rq_rptr; /* pending wrs */
- u64 *rq_oldest_wr; /* oldest wr on the SW RQ */
+ struct t3_swrq *rq_oldest_wr; /* oldest wr on the SW RQ */
u32 rq_size_log2; /* rq size */
u32 rq_addr; /* rq adapter address */
- void /* __iomem */ *doorbell; /* kernel db */
+ void *doorbell; /* kernel db */
u64 udb; /* user db if any */
+ struct cxio_rdev *rdev;
};
struct t3_cq {
@@ -629,9 +661,6 @@ struct t3_cq {
u32 wptr;
u32 size_log2;
bus_addr_t dma_addr;
-#ifdef notyet
- DECLARE_PCI_UNMAP_ADDR(mapping)
-#endif
struct t3_cqe *queue;
struct t3_cqe *sw_queue;
u32 sw_rptr;
@@ -641,6 +670,22 @@ struct t3_cq {
#define CQ_VLD_ENTRY(ptr,size_log2,cqe) (Q_GENBIT(ptr,size_log2) == \
CQE_GENBIT(*cqe))
+struct t3_cq_status_page {
+ u32 cq_err;
+};
+
+static inline int cxio_cq_in_error(struct t3_cq *cq)
+{
+ return ((struct t3_cq_status_page *)
+ &cq->queue[1 << cq->size_log2])->cq_err;
+}
+
+static inline void cxio_set_cq_in_error(struct t3_cq *cq)
+{
+ ((struct t3_cq_status_page *)
+ &cq->queue[1 << cq->size_log2])->cq_err = 1;
+}
+
static inline void cxio_set_wq_in_error(struct t3_wq *wq)
{
wq->queue->flit[13] = 1;
diff --git a/sys/dev/cxgb/ulp/toecore/cxgb_toedev.h b/sys/dev/cxgb/ulp/toecore/cxgb_toedev.h
deleted file mode 100644
index 398923a..0000000
--- a/sys/dev/cxgb/ulp/toecore/cxgb_toedev.h
+++ /dev/null
@@ -1,49 +0,0 @@
-
-/**************************************************************************
-
-Copyright (c) 2007, Chelsio Inc.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
- this list of conditions and the following disclaimer.
-
- 2. Neither the name of the Chelsio Corporation nor the names of its
- contributors may be used to endorse or promote products derived from
- this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-$FreeBSD$
-
-***************************************************************************/
-
-#ifndef _CXGB_TOEDEV_H_
-#define _CXGB_TOEDEV_H_
-#include <netinet/toedev.h>
-
-
-/* offload type ids */
-enum {
- TOE_ID_CHELSIO_T1 = 1,
- TOE_ID_CHELSIO_T1C,
- TOE_ID_CHELSIO_T2,
- TOE_ID_CHELSIO_T3,
- TOE_ID_CHELSIO_T3B,
- TOE_ID_CHELSIO_T3C,
-}
- ;
-
-#endif
diff --git a/sys/dev/cxgb/ulp/toecore/toedev.c b/sys/dev/cxgb/ulp/toecore/toedev.c
deleted file mode 100644
index 01a7d90..0000000
--- a/sys/dev/cxgb/ulp/toecore/toedev.c
+++ /dev/null
@@ -1,420 +0,0 @@
-
-/**************************************************************************
-
-Copyright (c) 2007, Chelsio Inc.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
- this list of conditions and the following disclaimer.
-
- 2. Neither the name of the Chelsio Corporation nor the names of its
- contributors may be used to endorse or promote products derived from
- this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-***************************************************************************/
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/kernel.h>
-#include <sys/bus.h>
-#include <sys/module.h>
-#include <sys/queue.h>
-#include <sys/mbuf.h>
-#include <sys/proc.h>
-
-#include <sys/socket.h>
-#include <sys/sockio.h>
-
-#include <net/bpf.h>
-#include <net/ethernet.h>
-#include <net/if.h>
-#include <net/route.h>
-
-
-/*
- * XXX
- */
-#include <cxgb_include.h>
-#include <ulp/toecore/cxgb_toedev.h>
-
-static struct mtx offload_db_lock;
-static TAILQ_HEAD(, toedev) offload_dev_list;
-static TAILQ_HEAD(, tom_info) offload_module_list;
-
-/*
- * Returns the entry in the given table with the given offload id, or NULL
- * if the id is not found.
- */
-static const struct offload_id *
-id_find(unsigned int id, const struct offload_id *table)
-{
- for ( ; table->id; ++table)
- if (table->id == id)
- return table;
- return NULL;
-}
-
-/*
- * Returns true if an offload device is presently attached to an offload module.
- */
-static inline int
-is_attached(const struct toedev *dev)
-{
- return dev->tod_offload_mod != NULL;
-}
-
-/*
- * Try to attach a new offload device to an existing TCP offload module that
- * can handle the device's offload id. Returns 0 if it succeeds.
- *
- * Must be called with the offload_db_lock held.
- */
-static int
-offload_attach(struct toedev *dev)
-{
- struct tom_info *t;
-
- TAILQ_FOREACH(t, &offload_module_list, entry) {
- const struct offload_id *entry;
-
- entry = id_find(dev->tod_ttid, t->ti_id_table);
- if (entry && t->ti_attach(dev, entry) == 0) {
- dev->tod_offload_mod = t;
- return 0;
- }
- }
- return (ENOPROTOOPT);
-}
-
-/**
- * register_tom - register a TCP Offload Module (TOM)
- * @t: the offload module to register
- *
- * Register a TCP Offload Module (TOM).
- */
-int
-register_tom(struct tom_info *t)
-{
- mtx_lock(&offload_db_lock);
- toedev_registration_count++;
- TAILQ_INSERT_HEAD(&offload_module_list, t, entry);
- mtx_unlock(&offload_db_lock);
- return 0;
-}
-
-/**
- * unregister_tom - unregister a TCP Offload Module (TOM)
- * @t: the offload module to register
- *
- * Unregister a TCP Offload Module (TOM). Note that this does not affect any
- * TOE devices to which the TOM is already attached.
- */
-int
-unregister_tom(struct tom_info *t)
-{
- mtx_lock(&offload_db_lock);
- TAILQ_REMOVE(&offload_module_list, t, entry);
- mtx_unlock(&offload_db_lock);
- return 0;
-}
-
-/*
- * Find an offload device by name. Must be called with offload_db_lock held.
- */
-static struct toedev *
-__find_offload_dev_by_name(const char *name)
-{
- struct toedev *dev;
-
- TAILQ_FOREACH(dev, &offload_dev_list, entry) {
- if (!strncmp(dev->tod_name, name, TOENAMSIZ))
- return dev;
- }
- return NULL;
-}
-
-/*
- * Returns true if an offload device is already registered.
- * Must be called with the offload_db_lock held.
- */
-static int
-is_registered(const struct toedev *dev)
-{
- struct toedev *d;
-
- TAILQ_FOREACH(d, &offload_dev_list, entry) {
- if (d == dev)
- return 1;
- }
- return 0;
-}
-
-/*
- * Finalize the name of an offload device by assigning values to any format
- * strings in its name.
- */
-static int
-assign_name(struct toedev *dev, const char *name, int limit)
-{
- int i;
-
- for (i = 0; i < limit; ++i) {
- char s[TOENAMSIZ];
-
- if (snprintf(s, sizeof(s), name, i) >= sizeof(s))
- return -1; /* name too long */
- if (!__find_offload_dev_by_name(s)) {
- strcpy(dev->tod_name, s);
- return 0;
- }
- }
- return -1;
-}
-
-/**
- * register_toedev - register a TOE device
- * @dev: the device
- * @name: a name template for the device
- *
- * Register a TOE device and try to attach an appropriate TCP offload module
- * to it. @name is a template that may contain at most one %d format
- * specifier.
- */
-int
-register_toedev(struct toedev *dev, const char *name)
-{
- int ret;
- const char *p;
-
- /*
- * Validate the name template. Only one %d allowed and name must be
- * a valid filename so it can appear in sysfs.
- */
- if (!name || !*name || !strcmp(name, ".") || !strcmp(name, "..") ||
- strchr(name, '/'))
- return EINVAL;
-
- p = strchr(name, '%');
- if (p && (p[1] != 'd' || strchr(p + 2, '%')))
- return EINVAL;
-
- mtx_lock(&offload_db_lock);
- if (is_registered(dev)) { /* device already registered */
- ret = EEXIST;
- goto out;
- }
-
- if ((ret = assign_name(dev, name, 32)) != 0)
- goto out;
-
- dev->tod_offload_mod = NULL;
- TAILQ_INSERT_TAIL(&offload_dev_list, dev, entry);
-out:
- mtx_unlock(&offload_db_lock);
- return ret;
-}
-
-/**
- * unregister_toedev - unregister a TOE device
- * @dev: the device
- *
- * Unregister a TOE device. The device must not be attached to an offload
- * module.
- */
-int
-unregister_toedev(struct toedev *dev)
-{
- int ret = 0;
-
- mtx_lock(&offload_db_lock);
- if (!is_registered(dev)) {
- ret = ENODEV;
- goto out;
- }
- if (is_attached(dev)) {
- ret = EBUSY;
- goto out;
- }
- TAILQ_REMOVE(&offload_dev_list, dev, entry);
-out:
- mtx_unlock(&offload_db_lock);
- return ret;
-}
-
-/**
- * activate_offload - activate an offload device
- * @dev: the device
- *
- * Activate an offload device by locating an appropriate registered offload
- * module. If no module is found the operation fails and may be retried at
- * a later time.
- */
-int
-activate_offload(struct toedev *dev)
-{
- int ret = 0;
-
- mtx_lock(&offload_db_lock);
- if (!is_registered(dev))
- ret = ENODEV;
- else if (!is_attached(dev))
- ret = offload_attach(dev);
- mtx_unlock(&offload_db_lock);
- return ret;
-}
-
-/**
- * toe_send - send a packet to a TOE device
- * @dev: the device
- * @m: the packet
- *
- * Sends an mbuf to a TOE driver after dealing with any active network taps.
- */
-int
-toe_send(struct toedev *dev, struct mbuf *m)
-{
- int r;
-
- critical_enter(); /* XXX neccessary? */
- r = dev->tod_send(dev, m);
- critical_exit();
- if (r)
- BPF_MTAP(dev->tod_lldev, m);
- return r;
-}
-
-/**
- * toe_receive_mbuf - process n received TOE packets
- * @dev: the toe device
- * @m: an array of offload packets
- * @n: the number of offload packets
- *
- * Process an array of ingress offload packets. Each packet is forwarded
- * to any active network taps and then passed to the toe device's receive
- * method. We optimize passing packets to the receive method by passing
- * it the whole array at once except when there are active taps.
- */
-int
-toe_receive_mbuf(struct toedev *dev, struct mbuf **m, int n)
-{
- if (__predict_true(!bpf_peers_present(dev->tod_lldev->if_bpf)))
- return dev->tod_recv(dev, m, n);
-
- for ( ; n; n--, m++) {
- m[0]->m_pkthdr.rcvif = dev->tod_lldev;
- BPF_MTAP(dev->tod_lldev, m[0]);
- dev->tod_recv(dev, m, 1);
- }
- return 0;
-}
-
-static inline int
-ifnet_is_offload(const struct ifnet *ifp)
-{
- return (ifp->if_flags & IFCAP_TOE);
-}
-
-void
-toe_arp_update(struct rtentry *rt)
-{
- struct ifnet *ifp = rt->rt_ifp;
-
- if (ifp && ifnet_is_offload(ifp)) {
- struct toedev *tdev = TOEDEV(ifp);
-
- if (tdev && tdev->tod_arp_update)
- tdev->tod_arp_update(tdev, rt);
- }
-}
-
-/**
- * offload_get_phys_egress - find the physical egress device
- * @root_dev: the root device anchoring the search
- * @so: the socket used to determine egress port in bonding mode
- * @context: in bonding mode, indicates a connection set up or failover
- *
- * Given a root network device it returns the physical egress device that is a
- * descendant of the root device. The root device may be either a physical
- * device, in which case it is the device returned, or a virtual device, such
- * as a VLAN or bonding device. In case of a bonding device the search
- * considers the decisions of the bonding device given its mode to locate the
- * correct egress device.
- */
-struct ifnet *
-offload_get_phys_egress(struct ifnet *root_dev, struct socket *so, int context)
-{
-
-#if 0
- while (root_dev && ifnet_is_offload(root_dev)) {
- if (root_dev->tod_priv_flags & IFF_802_1Q_VLAN)
- root_dev = VLAN_DEV_INFO(root_dev)->real_dev;
- else if (root_dev->tod_flags & IFF_MASTER)
- root_dev = toe_bond_get_slave(root_dev, sk, context);
- else
- break;
- }
-#endif
- return root_dev;
-}
-
-static int
-toecore_load(module_t mod, int cmd, void *arg)
-{
- int err = 0;
-
- switch (cmd) {
- case MOD_LOAD:
- mtx_init(&offload_db_lock, "toedev lock", NULL, MTX_DEF);
- TAILQ_INIT(&offload_dev_list);
- TAILQ_INIT(&offload_module_list);
- break;
- case MOD_QUIESCE:
- break;
- case MOD_UNLOAD:
- mtx_lock(&offload_db_lock);
- if (!TAILQ_EMPTY(&offload_dev_list) ||
- !TAILQ_EMPTY(&offload_module_list)) {
- err = EBUSY;
- mtx_unlock(&offload_db_lock);
- break;
- }
- mtx_unlock(&offload_db_lock);
- mtx_destroy(&offload_db_lock);
- break;
- case MOD_SHUTDOWN:
- break;
- default:
- err = EOPNOTSUPP;
- break;
- }
-
- return (err);
-}
-
-
-static moduledata_t mod_data= {
- "toecore",
- toecore_load,
- 0
-};
-
-MODULE_VERSION(toecore, 1);
-DECLARE_MODULE(toecore, mod_data, SI_SUB_EXEC, SI_ORDER_ANY);
diff --git a/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c b/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c
index e019c61..16b5394 100644
--- a/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c
+++ b/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c
@@ -1,35 +1,35 @@
-/**************************************************************************
-
-Copyright (c) 2007-2008, Chelsio Inc.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
- this list of conditions and the following disclaimer.
-
- 2. Neither the name of the Chelsio Corporation nor the names of its
- contributors may be used to endorse or promote products derived from
- this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-***************************************************************************/
+/*-
+ * Copyright (c) 2012 Chelsio Communications, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#include "opt_inet.h"
+
+#ifdef TCP_OFFLOAD
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/fcntl.h>
@@ -42,22 +42,17 @@ __FBSDID("$FreeBSD$");
#include <sys/sockstate.h>
#include <sys/sockopt.h>
#include <sys/socket.h>
+#include <sys/socketvar.h>
#include <sys/sockbuf.h>
#include <sys/sysctl.h>
#include <sys/syslog.h>
#include <sys/protosw.h>
#include <sys/priv.h>
-
-#if __FreeBSD_version < 800044
-#define V_tcp_do_autosndbuf tcp_do_autosndbuf
-#define V_tcp_autosndbuf_max tcp_autosndbuf_max
-#define V_tcp_do_rfc1323 tcp_do_rfc1323
-#define V_tcp_do_autorcvbuf tcp_do_autorcvbuf
-#define V_tcp_autorcvbuf_max tcp_autorcvbuf_max
-#define V_tcpstat tcpstat
-#endif
+#include <sys/sglist.h>
+#include <sys/taskqueue.h>
#include <net/if.h>
+#include <net/ethernet.h>
#include <net/route.h>
#include <netinet/in.h>
@@ -65,37 +60,33 @@ __FBSDID("$FreeBSD$");
#include <netinet/in_systm.h>
#include <netinet/in_var.h>
-
-#include <cxgb_osdep.h>
-#include <sys/mbufq.h>
-
#include <netinet/ip.h>
#include <netinet/tcp_var.h>
+#define TCPSTATES
#include <netinet/tcp_fsm.h>
-#include <netinet/tcp_offload.h>
+#include <netinet/toecore.h>
#include <netinet/tcp_seq.h>
-#include <netinet/tcp_syncache.h>
#include <netinet/tcp_timer.h>
#include <net/route.h>
-#include <t3cdev.h>
-#include <common/cxgb_firmware_exports.h>
-#include <common/cxgb_t3_cpl.h>
-#include <common/cxgb_tcb.h>
-#include <common/cxgb_ctl_defs.h>
-#include <cxgb_offload.h>
-#include <vm/vm.h>
-#include <vm/pmap.h>
-#include <machine/bus.h>
-#include <sys/mvec.h>
-#include <ulp/toecore/cxgb_toedev.h>
-#include <ulp/tom/cxgb_l2t.h>
-#include <ulp/tom/cxgb_defs.h>
-#include <ulp/tom/cxgb_tom.h>
-#include <ulp/tom/cxgb_t3_ddp.h>
-#include <ulp/tom/cxgb_toepcb.h>
-#include <ulp/tom/cxgb_tcp.h>
-#include <ulp/tom/cxgb_tcp_offload.h>
+#include "cxgb_include.h"
+#include "ulp/tom/cxgb_l2t.h"
+#include "ulp/tom/cxgb_tom.h"
+#include "ulp/tom/cxgb_toepcb.h"
+
+VNET_DECLARE(int, tcp_do_autosndbuf);
+#define V_tcp_do_autosndbuf VNET(tcp_do_autosndbuf)
+VNET_DECLARE(int, tcp_autosndbuf_inc);
+#define V_tcp_autosndbuf_inc VNET(tcp_autosndbuf_inc)
+VNET_DECLARE(int, tcp_autosndbuf_max);
+#define V_tcp_autosndbuf_max VNET(tcp_autosndbuf_max)
+VNET_DECLARE(int, tcp_do_autorcvbuf);
+#define V_tcp_do_autorcvbuf VNET(tcp_do_autorcvbuf)
+VNET_DECLARE(int, tcp_autorcvbuf_inc);
+#define V_tcp_autorcvbuf_inc VNET(tcp_autorcvbuf_inc)
+VNET_DECLARE(int, tcp_autorcvbuf_max);
+#define V_tcp_autorcvbuf_max VNET(tcp_autorcvbuf_max)
+extern int always_keepalive;
/*
* For ULP connections HW may add headers, e.g., for digests, that aren't part
@@ -108,29 +99,6 @@ __FBSDID("$FreeBSD$");
*/
const unsigned int t3_ulp_extra_len[] = {0, 4, 4, 8};
-#ifdef notyet
-/*
- * This sk_buff holds a fake header-only TCP segment that we use whenever we
- * need to exploit SW TCP functionality that expects TCP headers, such as
- * tcp_create_openreq_child(). It's a RO buffer that may be used by multiple
- * CPUs without locking.
- */
-static struct mbuf *tcphdr_mbuf __read_mostly;
-#endif
-
-/*
- * Size of WRs in bytes. Note that we assume all devices we are handling have
- * the same WR size.
- */
-static unsigned int wrlen __read_mostly;
-
-/*
- * The number of WRs needed for an skb depends on the number of page fragments
- * in the skb and whether it has any payload in its main body. This maps the
- * length of the gather list represented by an skb into the # of necessary WRs.
- */
-static unsigned int mbuf_wrs[TX_MAX_SEGS + 1] __read_mostly;
-
/*
* Max receive window supported by HW in bytes. Only a small part of it can
* be set through option0, the rest needs to be set through RX_DATA_ACK.
@@ -144,1260 +112,759 @@ static unsigned int mbuf_wrs[TX_MAX_SEGS + 1] __read_mostly;
#define MIN_RCV_WND (24 * 1024U)
#define INP_TOS(inp) ((inp_ip_tos_get(inp) >> 2) & M_TOS)
-#define VALIDATE_SEQ 0
-#define VALIDATE_SOCK(so)
-#define DEBUG_WR 0
-
-#define TCP_TIMEWAIT 1
-#define TCP_CLOSE 2
-#define TCP_DROP 3
-
-static void t3_send_reset(struct toepcb *toep);
-static void send_abort_rpl(struct mbuf *m, struct toedev *tdev, int rst_status);
-static inline void free_atid(struct t3cdev *cdev, unsigned int tid);
-static void handle_syncache_event(int event, void *arg);
-
-static inline void
-SBAPPEND(struct sockbuf *sb, struct mbuf *n)
-{
- struct mbuf *m;
-
- m = sb->sb_mb;
- while (m) {
- KASSERT(((m->m_flags & M_EXT) && (m->m_ext.ext_type == EXT_EXTREF)) ||
- !(m->m_flags & M_EXT), ("unexpected type M_EXT=%d ext_type=%d m_len=%d\n",
- !!(m->m_flags & M_EXT), m->m_ext.ext_type, m->m_len));
- KASSERT(m->m_next != (struct mbuf *)0xffffffff, ("bad next value m_next=%p m_nextpkt=%p m_flags=0x%x",
- m->m_next, m->m_nextpkt, m->m_flags));
- m = m->m_next;
- }
- m = n;
- while (m) {
- KASSERT(((m->m_flags & M_EXT) && (m->m_ext.ext_type == EXT_EXTREF)) ||
- !(m->m_flags & M_EXT), ("unexpected type M_EXT=%d ext_type=%d m_len=%d\n",
- !!(m->m_flags & M_EXT), m->m_ext.ext_type, m->m_len));
- KASSERT(m->m_next != (struct mbuf *)0xffffffff, ("bad next value m_next=%p m_nextpkt=%p m_flags=0x%x",
- m->m_next, m->m_nextpkt, m->m_flags));
- m = m->m_next;
- }
- KASSERT(sb->sb_flags & SB_NOCOALESCE, ("NOCOALESCE not set"));
- sbappendstream_locked(sb, n);
- m = sb->sb_mb;
-
- while (m) {
- KASSERT(m->m_next != (struct mbuf *)0xffffffff, ("bad next value m_next=%p m_nextpkt=%p m_flags=0x%x",
- m->m_next, m->m_nextpkt, m->m_flags));
- m = m->m_next;
- }
-}
+static void t3_release_offload_resources(struct toepcb *);
+static void send_reset(struct toepcb *toep);
-static inline int
-is_t3a(const struct toedev *dev)
+/*
+ * Called after the last CPL for the toepcb has been received.
+ *
+ * The inp must be wlocked on entry and is unlocked (or maybe destroyed) by the
+ * time this function exits.
+ */
+static int
+toepcb_release(struct toepcb *toep)
{
- return (dev->tod_ttid == TOE_ID_CHELSIO_T3);
-}
+ struct inpcb *inp = toep->tp_inp;
+ struct toedev *tod = toep->tp_tod;
+ struct tom_data *td = t3_tomdata(tod);
+ int rc;
-static void
-dump_toepcb(struct toepcb *toep)
-{
- DPRINTF("qset_idx=%d qset=%d ulp_mode=%d mtu_idx=%d tid=%d\n",
- toep->tp_qset_idx, toep->tp_qset, toep->tp_ulp_mode,
- toep->tp_mtu_idx, toep->tp_tid);
+ INP_WLOCK_ASSERT(inp);
+ KASSERT(!(toep->tp_flags & TP_CPL_DONE),
+ ("%s: double release?", __func__));
- DPRINTF("wr_max=%d wr_avail=%d wr_unacked=%d mss_clamp=%d flags=0x%x\n",
- toep->tp_wr_max, toep->tp_wr_avail, toep->tp_wr_unacked,
- toep->tp_mss_clamp, toep->tp_flags);
-}
+ CTR2(KTR_CXGB, "%s: tid %d", __func__, toep->tp_tid);
-#ifndef RTALLOC2_DEFINED
-static struct rtentry *
-rtalloc2(struct sockaddr *dst, int report, u_long ignflags)
-{
- struct rtentry *rt = NULL;
-
- if ((rt = rtalloc1(dst, report, ignflags)) != NULL)
- RT_UNLOCK(rt);
+ toep->tp_flags |= TP_CPL_DONE;
+ toep->tp_inp = NULL;
- return (rt);
-}
-#endif
+ mtx_lock(&td->toep_list_lock);
+ TAILQ_REMOVE(&td->toep_list, toep, link);
+ mtx_unlock(&td->toep_list_lock);
-/*
- * Determine whether to send a CPL message now or defer it. A message is
- * deferred if the connection is in SYN_SENT since we don't know the TID yet.
- * For connections in other states the message is sent immediately.
- * If through_l2t is set the message is subject to ARP processing, otherwise
- * it is sent directly.
- */
-static inline void
-send_or_defer(struct toepcb *toep, struct mbuf *m, int through_l2t)
-{
- struct tcpcb *tp = toep->tp_tp;
-
- if (__predict_false(tp->t_state == TCPS_SYN_SENT)) {
- inp_wlock(tp->t_inpcb);
- mbufq_tail(&toep->out_of_order_queue, m); // defer
- inp_wunlock(tp->t_inpcb);
- } else if (through_l2t)
- l2t_send(TOEP_T3C_DEV(toep), m, toep->tp_l2t); // send through L2T
- else
- cxgb_ofld_send(TOEP_T3C_DEV(toep), m); // send directly
-}
+ if (!(toep->tp_flags & TP_ATTACHED))
+ t3_release_offload_resources(toep);
-static inline unsigned int
-mkprio(unsigned int cntrl, const struct toepcb *toep)
-{
- return (cntrl);
+ rc = in_pcbrele_wlocked(inp);
+ if (!rc)
+ INP_WUNLOCK(inp);
+ return (rc);
}
/*
- * Populate a TID_RELEASE WR. The skb must be already propely sized.
+ * One sided detach. The tcpcb is going away and we need to unhook the toepcb
+ * hanging off it. If the TOE driver is also done with the toepcb we'll release
+ * all offload resources.
*/
-static inline void
-mk_tid_release(struct mbuf *m, const struct toepcb *toep, unsigned int tid)
-{
- struct cpl_tid_release *req;
-
- m_set_priority(m, mkprio(CPL_PRIORITY_SETUP, toep));
- m->m_pkthdr.len = m->m_len = sizeof(*req);
- req = mtod(m, struct cpl_tid_release *);
- req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
- req->wr.wr_lo = 0;
- OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_TID_RELEASE, tid));
-}
-
-static inline void
-make_tx_data_wr(struct socket *so, struct mbuf *m, int len, struct mbuf *tail)
+static void
+toepcb_detach(struct inpcb *inp)
{
- struct tcpcb *tp = so_sototcpcb(so);
- struct toepcb *toep = tp->t_toe;
- struct tx_data_wr *req;
- struct sockbuf *snd;
-
- inp_lock_assert(tp->t_inpcb);
- snd = so_sockbuf_snd(so);
-
- req = mtod(m, struct tx_data_wr *);
- m->m_len = sizeof(*req);
- req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA));
- req->wr_lo = htonl(V_WR_TID(toep->tp_tid));
- /* len includes the length of any HW ULP additions */
- req->len = htonl(len);
- req->param = htonl(V_TX_PORT(toep->tp_l2t->smt_idx));
- /* V_TX_ULP_SUBMODE sets both the mode and submode */
- req->flags = htonl(V_TX_ULP_SUBMODE(/*skb_ulp_mode(skb)*/ 0) |
- V_TX_URG(/* skb_urgent(skb) */ 0 ) |
- V_TX_SHOVE((!(tp->t_flags & TF_MORETOCOME) &&
- (tail ? 0 : 1))));
- req->sndseq = htonl(tp->snd_nxt);
- if (__predict_false((toep->tp_flags & TP_DATASENT) == 0)) {
- req->flags |= htonl(V_TX_ACK_PAGES(2) | F_TX_INIT |
- V_TX_CPU_IDX(toep->tp_qset));
-
- /* Sendbuffer is in units of 32KB.
- */
- if (V_tcp_do_autosndbuf && snd->sb_flags & SB_AUTOSIZE)
- req->param |= htonl(V_TX_SNDBUF(V_tcp_autosndbuf_max >> 15));
- else {
- req->param |= htonl(V_TX_SNDBUF(snd->sb_hiwat >> 15));
- }
-
- toep->tp_flags |= TP_DATASENT;
- }
-}
+ struct toepcb *toep;
+ struct tcpcb *tp;
-#define IMM_LEN 64 /* XXX - see WR_LEN in the cxgb driver */
+ KASSERT(inp, ("%s: inp is NULL", __func__));
+ INP_WLOCK_ASSERT(inp);
-int
-t3_push_frames(struct socket *so, int req_completion)
-{
- struct tcpcb *tp = so_sototcpcb(so);
- struct toepcb *toep = tp->t_toe;
-
- struct mbuf *tail, *m0, *last;
- struct t3cdev *cdev;
- struct tom_data *d;
- int state, bytes, count, total_bytes;
- bus_dma_segment_t segs[TX_MAX_SEGS], *segp;
- struct sockbuf *snd;
-
- if (tp->t_state == TCPS_SYN_SENT || tp->t_state == TCPS_CLOSED) {
- DPRINTF("tcp state=%d\n", tp->t_state);
- return (0);
- }
+ tp = intotcpcb(inp);
+ toep = tp->t_toe;
- state = so_state_get(so);
-
- if (state & (SS_ISDISCONNECTING|SS_ISDISCONNECTED)) {
- DPRINTF("disconnecting\n");
-
- return (0);
- }
+ KASSERT(toep != NULL, ("%s: toep is NULL", __func__));
+ KASSERT(toep->tp_flags & TP_ATTACHED, ("%s: not attached", __func__));
- inp_lock_assert(tp->t_inpcb);
+ CTR6(KTR_CXGB, "%s: %s %u, toep %p, inp %p, tp %p", __func__,
+ tp->t_state == TCPS_SYN_SENT ? "atid" : "tid", toep->tp_tid,
+ toep, inp, tp);
- snd = so_sockbuf_snd(so);
- sockbuf_lock(snd);
+ tp->t_toe = NULL;
+ tp->t_flags &= ~TF_TOE;
+ toep->tp_flags &= ~TP_ATTACHED;
- d = TOM_DATA(toep->tp_toedev);
- cdev = d->cdev;
+ if (toep->tp_flags & TP_CPL_DONE)
+ t3_release_offload_resources(toep);
+}
- last = tail = snd->sb_sndptr ? snd->sb_sndptr : snd->sb_mb;
+void
+t3_pcb_detach(struct toedev *tod __unused, struct tcpcb *tp)
+{
- total_bytes = 0;
- DPRINTF("wr_avail=%d tail=%p snd.cc=%d tp_last=%p\n",
- toep->tp_wr_avail, tail, snd->sb_cc, toep->tp_m_last);
+ toepcb_detach(tp->t_inpcb);
+}
- if (last && toep->tp_m_last == last && snd->sb_sndptroff != 0) {
- KASSERT(tail, ("sbdrop error"));
- last = tail = tail->m_next;
- }
+static int
+alloc_atid(struct tid_info *t, void *ctx)
+{
+ int atid = -1;
- if ((toep->tp_wr_avail == 0 ) || (tail == NULL)) {
- DPRINTF("wr_avail=%d tail=%p\n", toep->tp_wr_avail, tail);
- sockbuf_unlock(snd);
+ mtx_lock(&t->atid_lock);
+ if (t->afree) {
+ union active_open_entry *p = t->afree;
- return (0);
+ atid = (p - t->atid_tab) + t->atid_base;
+ t->afree = p->next;
+ p->ctx = ctx;
+ t->atids_in_use++;
}
-
- toep->tp_m_last = NULL;
- while (toep->tp_wr_avail && (tail != NULL)) {
- count = bytes = 0;
- segp = segs;
- if ((m0 = m_gethdr(M_NOWAIT, MT_DATA)) == NULL) {
- sockbuf_unlock(snd);
- return (0);
- }
- /*
- * If the data in tail fits as in-line, then
- * make an immediate data wr.
- */
- if (tail->m_len <= IMM_LEN) {
- count = 1;
- bytes = tail->m_len;
- last = tail;
- tail = tail->m_next;
- m_set_sgl(m0, NULL);
- m_set_sgllen(m0, 0);
- make_tx_data_wr(so, m0, bytes, tail);
- m_append(m0, bytes, mtod(last, caddr_t));
- KASSERT(!m0->m_next, ("bad append"));
- } else {
- while ((mbuf_wrs[count + 1] <= toep->tp_wr_avail)
- && (tail != NULL) && (count < TX_MAX_SEGS-1)) {
- bytes += tail->m_len;
- last = tail;
- count++;
- /*
- * technically an abuse to be using this for a VA
- * but less gross than defining my own structure
- * or calling pmap_kextract from here :-|
- */
- segp->ds_addr = (bus_addr_t)tail->m_data;
- segp->ds_len = tail->m_len;
- DPRINTF("count=%d wr_needed=%d ds_addr=%p ds_len=%d\n",
- count, mbuf_wrs[count], tail->m_data, tail->m_len);
- segp++;
- tail = tail->m_next;
- }
- DPRINTF("wr_avail=%d mbuf_wrs[%d]=%d tail=%p\n",
- toep->tp_wr_avail, count, mbuf_wrs[count], tail);
-
- m_set_sgl(m0, segs);
- m_set_sgllen(m0, count);
- make_tx_data_wr(so, m0, bytes, tail);
- }
- m_set_priority(m0, mkprio(CPL_PRIORITY_DATA, toep));
-
- if (tail) {
- snd->sb_sndptr = tail;
- toep->tp_m_last = NULL;
- } else
- toep->tp_m_last = snd->sb_sndptr = last;
-
+ mtx_unlock(&t->atid_lock);
- DPRINTF("toep->tp_m_last=%p\n", toep->tp_m_last);
+ return (atid);
+}
- snd->sb_sndptroff += bytes;
- total_bytes += bytes;
- toep->tp_write_seq += bytes;
- CTR6(KTR_TOM, "t3_push_frames: wr_avail=%d mbuf_wrs[%d]=%d"
- " tail=%p sndptr=%p sndptroff=%d",
- toep->tp_wr_avail, count, mbuf_wrs[count],
- tail, snd->sb_sndptr, snd->sb_sndptroff);
- if (tail)
- CTR4(KTR_TOM, "t3_push_frames: total_bytes=%d"
- " tp_m_last=%p tailbuf=%p snd_una=0x%08x",
- total_bytes, toep->tp_m_last, tail->m_data,
- tp->snd_una);
- else
- CTR3(KTR_TOM, "t3_push_frames: total_bytes=%d"
- " tp_m_last=%p snd_una=0x%08x",
- total_bytes, toep->tp_m_last, tp->snd_una);
+static void
+free_atid(struct tid_info *t, int atid)
+{
+ union active_open_entry *p = atid2entry(t, atid);
+ mtx_lock(&t->atid_lock);
+ p->next = t->afree;
+ t->afree = p;
+ t->atids_in_use--;
+ mtx_unlock(&t->atid_lock);
+}
-#ifdef KTR
+void
+insert_tid(struct tom_data *td, void *ctx, unsigned int tid)
{
- int i;
-
- i = 0;
- while (i < count && m_get_sgllen(m0)) {
- if ((count - i) >= 3) {
- CTR6(KTR_TOM,
- "t3_push_frames: pa=0x%zx len=%d pa=0x%zx"
- " len=%d pa=0x%zx len=%d",
- segs[i].ds_addr, segs[i].ds_len,
- segs[i + 1].ds_addr, segs[i + 1].ds_len,
- segs[i + 2].ds_addr, segs[i + 2].ds_len);
- i += 3;
- } else if ((count - i) == 2) {
- CTR4(KTR_TOM,
- "t3_push_frames: pa=0x%zx len=%d pa=0x%zx"
- " len=%d",
- segs[i].ds_addr, segs[i].ds_len,
- segs[i + 1].ds_addr, segs[i + 1].ds_len);
- i += 2;
- } else {
- CTR2(KTR_TOM, "t3_push_frames: pa=0x%zx len=%d",
- segs[i].ds_addr, segs[i].ds_len);
- i++;
- }
-
- }
-}
-#endif
- /*
- * remember credits used
- */
- m0->m_pkthdr.csum_data = mbuf_wrs[count];
- m0->m_pkthdr.len = bytes;
- toep->tp_wr_avail -= mbuf_wrs[count];
- toep->tp_wr_unacked += mbuf_wrs[count];
-
- if ((req_completion && toep->tp_wr_unacked == mbuf_wrs[count]) ||
- toep->tp_wr_unacked >= toep->tp_wr_max / 2) {
- struct work_request_hdr *wr = cplhdr(m0);
+ struct tid_info *t = &td->tid_maps;
- wr->wr_hi |= htonl(F_WR_COMPL);
- toep->tp_wr_unacked = 0;
- }
- KASSERT((m0->m_pkthdr.csum_data > 0) &&
- (m0->m_pkthdr.csum_data <= 4), ("bad credit count %d",
- m0->m_pkthdr.csum_data));
- m0->m_type = MT_DONTFREE;
- enqueue_wr(toep, m0);
- DPRINTF("sending offload tx with %d bytes in %d segments\n",
- bytes, count);
- l2t_send(cdev, m0, toep->tp_l2t);
- }
- sockbuf_unlock(snd);
- return (total_bytes);
+ t->tid_tab[tid] = ctx;
+ atomic_add_int(&t->tids_in_use, 1);
}
-/*
- * Close a connection by sending a CPL_CLOSE_CON_REQ message. Cannot fail
- * under any circumstances. We take the easy way out and always queue the
- * message to the write_queue. We can optimize the case where the queue is
- * already empty though the optimization is probably not worth it.
- */
-static void
-close_conn(struct socket *so)
+void
+update_tid(struct tom_data *td, void *ctx, unsigned int tid)
{
- struct mbuf *m;
- struct cpl_close_con_req *req;
- struct tom_data *d;
- struct inpcb *inp = so_sotoinpcb(so);
- struct tcpcb *tp;
- struct toepcb *toep;
- unsigned int tid;
+ struct tid_info *t = &td->tid_maps;
+ t->tid_tab[tid] = ctx;
+}
- inp_wlock(inp);
- tp = so_sototcpcb(so);
- toep = tp->t_toe;
-
- if (tp->t_state != TCPS_SYN_SENT)
- t3_push_frames(so, 1);
-
- if (toep->tp_flags & TP_FIN_SENT) {
- inp_wunlock(inp);
- return;
- }
+void
+remove_tid(struct tom_data *td, unsigned int tid)
+{
+ struct tid_info *t = &td->tid_maps;
- tid = toep->tp_tid;
-
- d = TOM_DATA(toep->tp_toedev);
-
- m = m_gethdr_nofail(sizeof(*req));
- m_set_priority(m, CPL_PRIORITY_DATA);
- m_set_sgl(m, NULL);
- m_set_sgllen(m, 0);
+ t->tid_tab[tid] = NULL;
+ atomic_add_int(&t->tids_in_use, -1);
+}
- toep->tp_flags |= TP_FIN_SENT;
- req = mtod(m, struct cpl_close_con_req *);
-
- req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_CLOSE_CON));
- req->wr.wr_lo = htonl(V_WR_TID(tid));
- OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, tid));
- req->rsvd = 0;
- inp_wunlock(inp);
- /*
- * XXX - need to defer shutdown while there is still data in the queue
- *
- */
- CTR4(KTR_TOM, "%s CLOSE_CON_REQ so %p tp %p tid=%u", __FUNCTION__, so, tp, tid);
- cxgb_ofld_send(d->cdev, m);
+/* use ctx as a next pointer in the tid release list */
+void
+queue_tid_release(struct toedev *tod, unsigned int tid)
+{
+ struct tom_data *td = t3_tomdata(tod);
+ void **p = &td->tid_maps.tid_tab[tid];
+ struct adapter *sc = tod->tod_softc;
+ mtx_lock(&td->tid_release_lock);
+ *p = td->tid_release_list;
+ td->tid_release_list = p;
+ if (!*p)
+ taskqueue_enqueue(sc->tq, &td->tid_release_task);
+ mtx_unlock(&td->tid_release_lock);
}
/*
- * Handle an ARP failure for a CPL_ABORT_REQ. Change it into a no RST variant
- * and send it along.
+ * Populate a TID_RELEASE WR.
*/
-static void
-abort_arp_failure(struct t3cdev *cdev, struct mbuf *m)
+static inline void
+mk_tid_release(struct cpl_tid_release *cpl, unsigned int tid)
{
- struct cpl_abort_req *req = cplhdr(m);
- req->cmd = CPL_ABORT_NO_RST;
- cxgb_ofld_send(cdev, m);
+ cpl->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
+ OPCODE_TID(cpl) = htonl(MK_OPCODE_TID(CPL_TID_RELEASE, tid));
}
-/*
- * Send RX credits through an RX_DATA_ACK CPL message. If nofail is 0 we are
- * permitted to return without sending the message in case we cannot allocate
- * an sk_buff. Returns the number of credits sent.
- */
-uint32_t
-t3_send_rx_credits(struct tcpcb *tp, uint32_t credits, uint32_t dack, int nofail)
+void
+release_tid(struct toedev *tod, unsigned int tid, int qset)
{
+ struct tom_data *td = t3_tomdata(tod);
+ struct adapter *sc = tod->tod_softc;
struct mbuf *m;
- struct cpl_rx_data_ack *req;
- struct toepcb *toep = tp->t_toe;
- struct toedev *tdev = toep->tp_toedev;
-
- m = m_gethdr_nofail(sizeof(*req));
+ struct cpl_tid_release *cpl;
+#ifdef INVARIANTS
+ struct tid_info *t = &td->tid_maps;
+#endif
+
+ KASSERT(tid >= 0 && tid < t->ntids,
+ ("%s: tid=%d, ntids=%d", __func__, tid, t->ntids));
+
+ m = M_GETHDR_OFLD(qset, CPL_PRIORITY_CONTROL, cpl);
+ if (m) {
+ mk_tid_release(cpl, tid);
+ t3_offload_tx(sc, m);
+ remove_tid(td, tid);
+ } else
+ queue_tid_release(tod, tid);
- DPRINTF("returning %u credits to HW\n", credits);
-
- req = mtod(m, struct cpl_rx_data_ack *);
- req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
- req->wr.wr_lo = 0;
- OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, toep->tp_tid));
- req->credit_dack = htonl(dack | V_RX_CREDITS(credits));
- m_set_priority(m, mkprio(CPL_PRIORITY_ACK, toep));
- cxgb_ofld_send(TOM_DATA(tdev)->cdev, m);
- return (credits);
}
-/*
- * Send RX_DATA_ACK CPL message to request a modulation timer to be scheduled.
- * This is only used in DDP mode, so we take the opportunity to also set the
- * DACK mode and flush any Rx credits.
- */
void
-t3_send_rx_modulate(struct toepcb *toep)
+t3_process_tid_release_list(void *data, int pending)
{
struct mbuf *m;
- struct cpl_rx_data_ack *req;
+ struct tom_data *td = data;
+ struct adapter *sc = td->tod.tod_softc;
- m = m_gethdr_nofail(sizeof(*req));
+ mtx_lock(&td->tid_release_lock);
+ while (td->tid_release_list) {
+ void **p = td->tid_release_list;
+ unsigned int tid = p - td->tid_maps.tid_tab;
+ struct cpl_tid_release *cpl;
- req = mtod(m, struct cpl_rx_data_ack *);
- req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
- req->wr.wr_lo = 0;
- m->m_pkthdr.len = m->m_len = sizeof(*req);
-
- OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, toep->tp_tid));
- req->credit_dack = htonl(F_RX_MODULATE | F_RX_DACK_CHANGE |
- V_RX_DACK_MODE(1) |
- V_RX_CREDITS(toep->tp_copied_seq - toep->tp_rcv_wup));
- m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep));
- cxgb_ofld_send(TOEP_T3C_DEV(toep), m);
- toep->tp_rcv_wup = toep->tp_copied_seq;
+ td->tid_release_list = (void **)*p;
+ m = M_GETHDR_OFLD(0, CPL_PRIORITY_CONTROL, cpl); /* qs 0 here */
+ if (m == NULL)
+ break; /* XXX: who reschedules the release task? */
+ mtx_unlock(&td->tid_release_lock);
+ mk_tid_release(cpl, tid);
+ t3_offload_tx(sc, m);
+ remove_tid(td, tid);
+ mtx_lock(&td->tid_release_lock);
+ }
+ mtx_unlock(&td->tid_release_lock);
}
-/*
- * Handle receipt of an urgent pointer.
- */
static void
-handle_urg_ptr(struct socket *so, uint32_t urg_seq)
+close_conn(struct adapter *sc, struct toepcb *toep)
{
-#ifdef URGENT_DATA_SUPPORTED
- struct tcpcb *tp = so_sototcpcb(so);
+ struct mbuf *m;
+ struct cpl_close_con_req *req;
- urg_seq--; /* initially points past the urgent data, per BSD */
+ if (toep->tp_flags & TP_FIN_SENT)
+ return;
- if (tp->urg_data && !after(urg_seq, tp->urg_seq))
- return; /* duplicate pointer */
- sk_send_sigurg(sk);
- if (tp->urg_seq == tp->copied_seq && tp->urg_data &&
- !sock_flag(sk, SOCK_URGINLINE) && tp->copied_seq != tp->rcv_nxt) {
- struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
+ m = M_GETHDR_OFLD(toep->tp_qset, CPL_PRIORITY_DATA, req);
+ if (m == NULL)
+ CXGB_UNIMPLEMENTED();
- tp->copied_seq++;
- if (skb && tp->copied_seq - TCP_SKB_CB(skb)->seq >= skb->len)
- tom_eat_skb(sk, skb, 0);
- }
- tp->urg_data = TCP_URG_NOTYET;
- tp->urg_seq = urg_seq;
-#endif
-}
+ req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_CLOSE_CON));
+ req->wr.wrh_lo = htonl(V_WR_TID(toep->tp_tid));
+ OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, toep->tp_tid));
+ req->rsvd = 0;
-/*
- * Returns true if a socket cannot accept new Rx data.
- */
-static inline int
-so_no_receive(const struct socket *so)
-{
- return (so_state_get(so) & (SS_ISDISCONNECTED|SS_ISDISCONNECTING));
+ toep->tp_flags |= TP_FIN_SENT;
+ t3_offload_tx(sc, m);
}
-/*
- * Process an urgent data notification.
- */
-static void
-rx_urg_notify(struct toepcb *toep, struct mbuf *m)
+static inline void
+make_tx_data_wr(struct socket *so, struct tx_data_wr *req, int len,
+ struct mbuf *tail)
{
- struct cpl_rx_urg_notify *hdr = cplhdr(m);
- struct socket *so = inp_inpcbtosocket(toep->tp_tp->t_inpcb);
-
- VALIDATE_SOCK(so);
+ struct tcpcb *tp = so_sototcpcb(so);
+ struct toepcb *toep = tp->t_toe;
+ struct sockbuf *snd;
- if (!so_no_receive(so))
- handle_urg_ptr(so, ntohl(hdr->seq));
+ inp_lock_assert(tp->t_inpcb);
+ snd = so_sockbuf_snd(so);
- m_freem(m);
-}
+ req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA));
+ req->wr.wrh_lo = htonl(V_WR_TID(toep->tp_tid));
+ /* len includes the length of any HW ULP additions */
+ req->len = htonl(len);
+ req->param = htonl(V_TX_PORT(toep->tp_l2t->smt_idx));
+ /* V_TX_ULP_SUBMODE sets both the mode and submode */
+ req->flags = htonl(V_TX_ULP_SUBMODE(toep->tp_ulp_mode) | V_TX_URG(0) |
+ V_TX_SHOVE(!(tp->t_flags & TF_MORETOCOME) && (tail ? 0 : 1)));
+ req->sndseq = htonl(tp->snd_nxt);
+ if (__predict_false((toep->tp_flags & TP_DATASENT) == 0)) {
+ struct adapter *sc = toep->tp_tod->tod_softc;
+ int cpu_idx = sc->rrss_map[toep->tp_qset];
-/*
- * Handler for RX_URG_NOTIFY CPL messages.
- */
-static int
-do_rx_urg_notify(struct t3cdev *cdev, struct mbuf *m, void *ctx)
-{
- struct toepcb *toep = (struct toepcb *)ctx;
+ req->flags |= htonl(V_TX_ACK_PAGES(2) | F_TX_INIT |
+ V_TX_CPU_IDX(cpu_idx));
- rx_urg_notify(toep, m);
- return (0);
-}
+ /* Sendbuffer is in units of 32KB. */
+ if (V_tcp_do_autosndbuf && snd->sb_flags & SB_AUTOSIZE)
+ req->param |= htonl(V_TX_SNDBUF(VNET(tcp_autosndbuf_max) >> 15));
+ else
+ req->param |= htonl(V_TX_SNDBUF(snd->sb_hiwat >> 15));
-static __inline int
-is_delack_mode_valid(struct toedev *dev, struct toepcb *toep)
-{
- return (toep->tp_ulp_mode ||
- (toep->tp_ulp_mode == ULP_MODE_TCPDDP &&
- dev->tod_ttid >= TOE_ID_CHELSIO_T3));
+ toep->tp_flags |= TP_DATASENT;
+ }
}
/*
- * Set of states for which we should return RX credits.
+ * TOM_XXX_DUPLICATION sgl_len, calc_tx_descs, calc_tx_descs_ofld, mbuf_wrs, etc.
+ * TOM_XXX_MOVE to some common header file.
*/
-#define CREDIT_RETURN_STATE (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2)
-
/*
- * Called after some received data has been read. It returns RX credits
- * to the HW for the amount of data processed.
+ * IMM_LEN: # of bytes that can be tx'd as immediate data. There are 16 flits
+ * in a tx desc; subtract 3 for tx_data_wr (including the WR header), and 1 more
+ * for the second gen bit flit. This leaves us with 12 flits.
+ *
+ * descs_to_sgllen: # of SGL entries that can fit into the given # of tx descs.
+ * The first desc has a tx_data_wr (which includes the WR header), the rest have
+ * the WR header only. All descs have the second gen bit flit.
+ *
+ * sgllen_to_descs: # of tx descs used up by an sgl of given length. The first
+ * desc has a tx_data_wr (which includes the WR header), the rest have the WR
+ * header only. All descs have the second gen bit flit.
+ *
+ * flits_to_sgllen: # of SGL entries that can be fit in the given # of flits.
+ *
*/
-void
-t3_cleanup_rbuf(struct tcpcb *tp, int copied)
+#define IMM_LEN 96
+static int descs_to_sgllen[TX_MAX_DESC + 1] = {0, 8, 17, 26, 35};
+static int sgllen_to_descs[TX_MAX_SEGS] = {
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, /* 0 - 9 */
+ 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, /* 10 - 19 */
+ 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, /* 20 - 29 */
+ 4, 4, 4, 4, 4, 4 /* 30 - 35 */
+};
+#if 0
+static int flits_to_sgllen[TX_DESC_FLITS + 1] = {
+ 0, 0, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 8, 8, 9, 10, 10
+};
+#endif
+#if SGE_NUM_GENBITS != 2
+#error "SGE_NUM_GENBITS really must be 2"
+#endif
+
+int
+t3_push_frames(struct socket *so, int req_completion)
{
+ struct tcpcb *tp = so_sototcpcb(so);
struct toepcb *toep = tp->t_toe;
- struct socket *so;
- struct toedev *dev;
- int dack_mode, must_send, read;
- u32 thres, credits, dack = 0;
- struct sockbuf *rcv;
-
- so = inp_inpcbtosocket(tp->t_inpcb);
- rcv = so_sockbuf_rcv(so);
-
- if (!((tp->t_state == TCPS_ESTABLISHED) || (tp->t_state == TCPS_FIN_WAIT_1) ||
- (tp->t_state == TCPS_FIN_WAIT_2))) {
- if (copied) {
- sockbuf_lock(rcv);
- toep->tp_copied_seq += copied;
- sockbuf_unlock(rcv);
- }
-
- return;
- }
-
- inp_lock_assert(tp->t_inpcb);
-
- sockbuf_lock(rcv);
- if (copied)
- toep->tp_copied_seq += copied;
- else {
- read = toep->tp_enqueued_bytes - rcv->sb_cc;
- toep->tp_copied_seq += read;
- }
- credits = toep->tp_copied_seq - toep->tp_rcv_wup;
- toep->tp_enqueued_bytes = rcv->sb_cc;
- sockbuf_unlock(rcv);
-
- if (credits > rcv->sb_mbmax) {
- log(LOG_ERR, "copied_seq=%u rcv_wup=%u credits=%u\n",
- toep->tp_copied_seq, toep->tp_rcv_wup, credits);
- credits = rcv->sb_mbmax;
- }
-
-
- /*
- * XXX this won't accurately reflect credit return - we need
- * to look at the difference between the amount that has been
- * put in the recv sockbuf and what is there now
- */
-
- if (__predict_false(!credits))
- return;
-
- dev = toep->tp_toedev;
- thres = TOM_TUNABLE(dev, rx_credit_thres);
+ struct mbuf *m0, *sndptr, *m;
+ struct toedev *tod = toep->tp_tod;
+ struct adapter *sc = tod->tod_softc;
+ int bytes, ndesc, total_bytes = 0, mlen;
+ struct sockbuf *snd;
+ struct sglist *sgl;
+ struct ofld_hdr *oh;
+ caddr_t dst;
+ struct tx_data_wr *wr;
- if (__predict_false(thres == 0))
- return;
+ inp_lock_assert(tp->t_inpcb);
- if (is_delack_mode_valid(dev, toep)) {
- dack_mode = TOM_TUNABLE(dev, delack);
- if (__predict_false(dack_mode != toep->tp_delack_mode)) {
- u32 r = tp->rcv_nxt - toep->tp_delack_seq;
+ snd = so_sockbuf_snd(so);
+ SOCKBUF_LOCK(snd);
- if (r >= tp->rcv_wnd || r >= 16 * toep->tp_mss_clamp)
- dack = F_RX_DACK_CHANGE |
- V_RX_DACK_MODE(dack_mode);
- }
- } else
- dack = F_RX_DACK_CHANGE | V_RX_DACK_MODE(1);
-
/*
- * For coalescing to work effectively ensure the receive window has
- * at least 16KB left.
+ * Autosize the send buffer.
*/
- must_send = credits + 16384 >= tp->rcv_wnd;
+ if (snd->sb_flags & SB_AUTOSIZE && VNET(tcp_do_autosndbuf)) {
+ if (snd->sb_cc >= (snd->sb_hiwat / 8 * 7) &&
+ snd->sb_cc < VNET(tcp_autosndbuf_max)) {
+ if (!sbreserve_locked(snd, min(snd->sb_hiwat +
+ VNET(tcp_autosndbuf_inc), VNET(tcp_autosndbuf_max)),
+ so, curthread))
+ snd->sb_flags &= ~SB_AUTOSIZE;
+ }
+ }
- if (must_send || credits >= thres)
- toep->tp_rcv_wup += t3_send_rx_credits(tp, credits, dack, must_send);
-}
+ if (toep->tp_m_last && toep->tp_m_last == snd->sb_sndptr)
+ sndptr = toep->tp_m_last->m_next;
+ else
+ sndptr = snd->sb_sndptr ? snd->sb_sndptr : snd->sb_mb;
-static int
-cxgb_toe_disconnect(struct tcpcb *tp)
-{
- struct socket *so;
-
- DPRINTF("cxgb_toe_disconnect\n");
+ /* Nothing to send or no WRs available for sending data */
+ if (toep->tp_wr_avail == 0 || sndptr == NULL)
+ goto out;
- so = inp_inpcbtosocket(tp->t_inpcb);
- close_conn(so);
- return (0);
-}
+ /* Something to send and at least 1 WR available */
+ while (toep->tp_wr_avail && sndptr != NULL) {
-static int
-cxgb_toe_reset(struct tcpcb *tp)
-{
- struct toepcb *toep = tp->t_toe;
+ m0 = m_gethdr(M_NOWAIT, MT_DATA);
+ if (m0 == NULL)
+ break;
+ oh = mtod(m0, struct ofld_hdr *);
+ wr = (void *)(oh + 1);
+ dst = (void *)(wr + 1);
- t3_send_reset(toep);
+ m0->m_pkthdr.len = m0->m_len = sizeof(*oh) + sizeof(*wr);
+ oh->flags = V_HDR_CTRL(CPL_PRIORITY_DATA) | F_HDR_DF |
+ V_HDR_QSET(toep->tp_qset);
- /*
- * unhook from socket
- */
- tp->t_flags &= ~TF_TOE;
- toep->tp_tp = NULL;
- tp->t_toe = NULL;
- return (0);
-}
-
-static int
-cxgb_toe_send(struct tcpcb *tp)
-{
- struct socket *so;
-
- DPRINTF("cxgb_toe_send\n");
- dump_toepcb(tp->t_toe);
+ /*
+ * Try to construct an immediate data WR if possible. Stuff as
+ * much data into it as possible, one whole mbuf at a time.
+ */
+ mlen = sndptr->m_len;
+ ndesc = bytes = 0;
+ while (mlen <= IMM_LEN - bytes) {
+ bcopy(sndptr->m_data, dst, mlen);
+ bytes += mlen;
+ dst += mlen;
+
+ if (!(sndptr = sndptr->m_next))
+ break;
+ mlen = sndptr->m_len;
+ }
- so = inp_inpcbtosocket(tp->t_inpcb);
- t3_push_frames(so, 1);
- return (0);
-}
+ if (bytes) {
-static int
-cxgb_toe_rcvd(struct tcpcb *tp)
-{
+ /* Was able to fit 'bytes' bytes in an immediate WR */
- inp_lock_assert(tp->t_inpcb);
+ ndesc = 1;
+ make_tx_data_wr(so, wr, bytes, sndptr);
- t3_cleanup_rbuf(tp, 0);
-
- return (0);
-}
+ m0->m_len += bytes;
+ m0->m_pkthdr.len = m0->m_len;
-static void
-cxgb_toe_detach(struct tcpcb *tp)
-{
- struct toepcb *toep;
+ } else {
+ int wr_avail = min(toep->tp_wr_avail, TX_MAX_DESC);
- /*
- * XXX how do we handle teardown in the SYN_SENT state?
- *
- */
- inp_lock_assert(tp->t_inpcb);
- toep = tp->t_toe;
- toep->tp_tp = NULL;
+ /* Need to make an SGL */
- /*
- * unhook from socket
- */
- tp->t_flags &= ~TF_TOE;
- tp->t_toe = NULL;
-}
-
+ sgl = sglist_alloc(descs_to_sgllen[wr_avail], M_NOWAIT);
+ if (sgl == NULL)
+ break;
-static struct toe_usrreqs cxgb_toe_usrreqs = {
- .tu_disconnect = cxgb_toe_disconnect,
- .tu_reset = cxgb_toe_reset,
- .tu_send = cxgb_toe_send,
- .tu_rcvd = cxgb_toe_rcvd,
- .tu_detach = cxgb_toe_detach,
- .tu_detach = cxgb_toe_detach,
- .tu_syncache_event = handle_syncache_event,
-};
+ for (m = sndptr; m != NULL; m = m->m_next) {
+ if ((mlen = m->m_len) > 0) {
+ if (sglist_append(sgl, m->m_data, mlen))
+ break;
+ }
+ bytes += mlen;
+ }
+ sndptr = m;
+ if (bytes == 0) {
+ sglist_free(sgl);
+ break;
+ }
+ ndesc = sgllen_to_descs[sgl->sg_nseg];
+ oh->flags |= F_HDR_SGL;
+ oh->sgl = sgl;
+ make_tx_data_wr(so, wr, bytes, sndptr);
+ }
+ oh->flags |= V_HDR_NDESC(ndesc);
+ oh->plen = bytes;
-static void
-__set_tcb_field(struct toepcb *toep, struct mbuf *m, uint16_t word,
- uint64_t mask, uint64_t val, int no_reply)
-{
- struct cpl_set_tcb_field *req;
+ snd->sb_sndptr = sndptr;
+ snd->sb_sndptroff += bytes;
+ if (sndptr == NULL) {
+ snd->sb_sndptr = snd->sb_mbtail;
+ snd->sb_sndptroff -= snd->sb_mbtail->m_len;
+ toep->tp_m_last = snd->sb_mbtail;
+ } else
+ toep->tp_m_last = NULL;
- CTR4(KTR_TCB, "__set_tcb_field_ulp(tid=%u word=0x%x mask=%jx val=%jx",
- toep->tp_tid, word, mask, val);
+ total_bytes += bytes;
- req = mtod(m, struct cpl_set_tcb_field *);
- m->m_pkthdr.len = m->m_len = sizeof(*req);
- req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
- req->wr.wr_lo = 0;
- OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, toep->tp_tid));
- req->reply = V_NO_REPLY(no_reply);
- req->cpu_idx = 0;
- req->word = htons(word);
- req->mask = htobe64(mask);
- req->val = htobe64(val);
+ toep->tp_wr_avail -= ndesc;
+ toep->tp_wr_unacked += ndesc;
- m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep));
- send_or_defer(toep, m, 0);
-}
+ if ((req_completion && toep->tp_wr_unacked == ndesc) ||
+ toep->tp_wr_unacked >= toep->tp_wr_max / 2) {
+ wr->wr.wrh_hi |= htonl(F_WR_COMPL);
+ toep->tp_wr_unacked = 0;
+ }
-static void
-t3_set_tcb_field(struct toepcb *toep, uint16_t word, uint64_t mask, uint64_t val)
-{
- struct mbuf *m;
- struct tcpcb *tp = toep->tp_tp;
-
- if (toep == NULL)
- return;
-
- if (tp->t_state == TCPS_CLOSED || (toep->tp_flags & TP_ABORT_SHUTDOWN)) {
- printf("not seting field\n");
- return;
+ enqueue_wr(toep, m0);
+ l2t_send(sc, m0, toep->tp_l2t);
}
-
- m = m_gethdr_nofail(sizeof(struct cpl_set_tcb_field));
+out:
+ SOCKBUF_UNLOCK(snd);
+
+ if (sndptr == NULL && (toep->tp_flags & TP_SEND_FIN))
+ close_conn(sc, toep);
- __set_tcb_field(toep, m, word, mask, val, 1);
+ return (total_bytes);
}
-/*
- * Set one of the t_flags bits in the TCB.
- */
-static void
-set_tcb_tflag(struct toepcb *toep, unsigned int bit_pos, int val)
+static int
+send_rx_credits(struct adapter *sc, struct toepcb *toep, int credits)
{
+ struct mbuf *m;
+ struct cpl_rx_data_ack *req;
+ uint32_t dack = F_RX_DACK_CHANGE | V_RX_DACK_MODE(1);
- t3_set_tcb_field(toep, W_TCB_T_FLAGS1, 1ULL << bit_pos, val << bit_pos);
-}
+ m = M_GETHDR_OFLD(toep->tp_qset, CPL_PRIORITY_CONTROL, req);
+ if (m == NULL)
+ return (0);
-/*
- * Send a SET_TCB_FIELD CPL message to change a connection's Nagle setting.
- */
-static void
-t3_set_nagle(struct toepcb *toep)
-{
- struct tcpcb *tp = toep->tp_tp;
-
- set_tcb_tflag(toep, S_TF_NAGLE, !(tp->t_flags & TF_NODELAY));
+ req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
+ req->wr.wrh_lo = 0;
+ OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, toep->tp_tid));
+ req->credit_dack = htonl(dack | V_RX_CREDITS(credits));
+ t3_offload_tx(sc, m);
+ return (credits);
}
-/*
- * Send a SET_TCB_FIELD CPL message to change a connection's keepalive setting.
- */
void
-t3_set_keepalive(struct toepcb *toep, int on_off)
+t3_rcvd(struct toedev *tod, struct tcpcb *tp)
{
+ struct adapter *sc = tod->tod_softc;
+ struct inpcb *inp = tp->t_inpcb;
+ struct socket *so = inp->inp_socket;
+ struct sockbuf *so_rcv = &so->so_rcv;
+ struct toepcb *toep = tp->t_toe;
+ int must_send;
- set_tcb_tflag(toep, S_TF_KEEPALIVE, on_off);
-}
+ INP_WLOCK_ASSERT(inp);
-void
-t3_set_rcv_coalesce_enable(struct toepcb *toep, int on_off)
-{
- set_tcb_tflag(toep, S_TF_RCV_COALESCE_ENABLE, on_off);
-}
+ SOCKBUF_LOCK(so_rcv);
+ KASSERT(toep->tp_enqueued >= so_rcv->sb_cc,
+ ("%s: so_rcv->sb_cc > enqueued", __func__));
+ toep->tp_rx_credits += toep->tp_enqueued - so_rcv->sb_cc;
+ toep->tp_enqueued = so_rcv->sb_cc;
+ SOCKBUF_UNLOCK(so_rcv);
-void
-t3_set_dack_mss(struct toepcb *toep, int on_off)
-{
+ must_send = toep->tp_rx_credits + 16384 >= tp->rcv_wnd;
+ if (must_send || toep->tp_rx_credits >= 15 * 1024) {
+ int credits;
- set_tcb_tflag(toep, S_TF_DACK_MSS, on_off);
+ credits = send_rx_credits(sc, toep, toep->tp_rx_credits);
+ toep->tp_rx_credits -= credits;
+ tp->rcv_wnd += credits;
+ tp->rcv_adv += credits;
+ }
}
-/*
- * Send a SET_TCB_FIELD CPL message to change a connection's TOS setting.
- */
-static void
-t3_set_tos(struct toepcb *toep)
+static int
+do_rx_urg_notify(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
{
- int tos = inp_ip_tos_get(toep->tp_tp->t_inpcb);
-
- t3_set_tcb_field(toep, W_TCB_TOS, V_TCB_TOS(M_TCB_TOS),
- V_TCB_TOS(tos));
-}
+ struct adapter *sc = qs->adap;
+ struct tom_data *td = sc->tom_softc;
+ struct cpl_rx_urg_notify *hdr = mtod(m, void *);
+ unsigned int tid = GET_TID(hdr);
+ struct toepcb *toep = lookup_tid(&td->tid_maps, tid);
+ log(LOG_ERR, "%s: tid %u inp %p", __func__, tid, toep->tp_inp);
-/*
- * In DDP mode, TP fails to schedule a timer to push RX data to the host when
- * DDP is disabled (data is delivered to freelist). [Note that, the peer should
- * set the PSH bit in the last segment, which would trigger delivery.]
- * We work around the issue by setting a DDP buffer in a partial placed state,
- * which guarantees that TP will schedule a timer.
- */
-#define TP_DDP_TIMER_WORKAROUND_MASK\
- (V_TF_DDP_BUF0_VALID(1) | V_TF_DDP_ACTIVE_BUF(1) |\
- ((V_TCB_RX_DDP_BUF0_OFFSET(M_TCB_RX_DDP_BUF0_OFFSET) |\
- V_TCB_RX_DDP_BUF0_LEN(3)) << 32))
-#define TP_DDP_TIMER_WORKAROUND_VAL\
- (V_TF_DDP_BUF0_VALID(1) | V_TF_DDP_ACTIVE_BUF(0) |\
- ((V_TCB_RX_DDP_BUF0_OFFSET((uint64_t)1) | V_TCB_RX_DDP_BUF0_LEN((uint64_t)2)) <<\
- 32))
-
-static void
-t3_enable_ddp(struct toepcb *toep, int on)
-{
- if (on) {
-
- t3_set_tcb_field(toep, W_TCB_RX_DDP_FLAGS, V_TF_DDP_OFF(1),
- V_TF_DDP_OFF(0));
- } else
- t3_set_tcb_field(toep, W_TCB_RX_DDP_FLAGS,
- V_TF_DDP_OFF(1) |
- TP_DDP_TIMER_WORKAROUND_MASK,
- V_TF_DDP_OFF(1) |
- TP_DDP_TIMER_WORKAROUND_VAL);
-
+ m_freem(m);
+ return (0);
}
-void
-t3_set_ddp_tag(struct toepcb *toep, int buf_idx, unsigned int tag_color)
+int
+t3_send_fin(struct toedev *tod, struct tcpcb *tp)
{
- t3_set_tcb_field(toep, W_TCB_RX_DDP_BUF0_TAG + buf_idx,
- V_TCB_RX_DDP_BUF0_TAG(M_TCB_RX_DDP_BUF0_TAG),
- tag_color);
-}
+ struct toepcb *toep = tp->t_toe;
+ struct inpcb *inp = tp->t_inpcb;
+ struct socket *so = inp_inpcbtosocket(inp);
+#if defined(KTR)
+ unsigned int tid = toep->tp_tid;
+#endif
-void
-t3_set_ddp_buf(struct toepcb *toep, int buf_idx, unsigned int offset,
- unsigned int len)
-{
- if (buf_idx == 0)
- t3_set_tcb_field(toep, W_TCB_RX_DDP_BUF0_OFFSET,
- V_TCB_RX_DDP_BUF0_OFFSET(M_TCB_RX_DDP_BUF0_OFFSET) |
- V_TCB_RX_DDP_BUF0_LEN(M_TCB_RX_DDP_BUF0_LEN),
- V_TCB_RX_DDP_BUF0_OFFSET((uint64_t)offset) |
- V_TCB_RX_DDP_BUF0_LEN((uint64_t)len));
- else
- t3_set_tcb_field(toep, W_TCB_RX_DDP_BUF1_OFFSET,
- V_TCB_RX_DDP_BUF1_OFFSET(M_TCB_RX_DDP_BUF1_OFFSET) |
- V_TCB_RX_DDP_BUF1_LEN(M_TCB_RX_DDP_BUF1_LEN << 32),
- V_TCB_RX_DDP_BUF1_OFFSET((uint64_t)offset) |
- V_TCB_RX_DDP_BUF1_LEN(((uint64_t)len) << 32));
-}
+ INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_WLOCK_ASSERT(inp);
-static int
-t3_set_cong_control(struct socket *so, const char *name)
-{
-#ifdef CONGESTION_CONTROL_SUPPORTED
- int cong_algo;
+ CTR4(KTR_CXGB, "%s: tid %d, toep %p, flags %x", __func__, tid, toep,
+ toep->tp_flags);
- for (cong_algo = 0; cong_algo < ARRAY_SIZE(t3_cong_ops); cong_algo++)
- if (!strcmp(name, t3_cong_ops[cong_algo].name))
- break;
+ toep->tp_flags |= TP_SEND_FIN;
+ t3_push_frames(so, 1);
- if (cong_algo >= ARRAY_SIZE(t3_cong_ops))
- return -EINVAL;
-#endif
- return 0;
+ return (0);
}
int
-t3_get_tcb(struct toepcb *toep)
+t3_tod_output(struct toedev *tod, struct tcpcb *tp)
{
- struct cpl_get_tcb *req;
- struct tcpcb *tp = toep->tp_tp;
- struct mbuf *m = m_gethdr(M_NOWAIT, MT_DATA);
+ struct inpcb *inp = tp->t_inpcb;
+ struct socket *so = inp->inp_socket;
- if (!m)
- return (ENOMEM);
-
- inp_lock_assert(tp->t_inpcb);
- m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep));
- req = mtod(m, struct cpl_get_tcb *);
- m->m_pkthdr.len = m->m_len = sizeof(*req);
- req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
- req->wr.wr_lo = 0;
- OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_GET_TCB, toep->tp_tid));
- req->cpuno = htons(toep->tp_qset);
- req->rsvd = 0;
- if (tp->t_state == TCPS_SYN_SENT)
- mbufq_tail(&toep->out_of_order_queue, m); // defer
- else
- cxgb_ofld_send(TOEP_T3C_DEV(toep), m);
- return 0;
+ t3_push_frames(so, 1);
+ return (0);
}
-static inline void
-so_insert_tid(struct tom_data *d, struct toepcb *toep, unsigned int tid)
+/* What mtu_idx to use, given a 4-tuple and/or an MSS cap */
+int
+find_best_mtu_idx(struct adapter *sc, struct in_conninfo *inc, int pmss)
{
+ unsigned short *mtus = &sc->params.mtus[0];
+ int i = 0, mss;
- toepcb_hold(toep);
-
- cxgb_insert_tid(d->cdev, d->client, toep, tid);
-}
+ KASSERT(inc != NULL || pmss > 0,
+ ("%s: at least one of inc/pmss must be specified", __func__));
-/**
- * find_best_mtu - find the entry in the MTU table closest to an MTU
- * @d: TOM state
- * @mtu: the target MTU
- *
- * Returns the index of the value in the MTU table that is closest to but
- * does not exceed the target MTU.
- */
-static unsigned int
-find_best_mtu(const struct t3c_data *d, unsigned short mtu)
-{
- int i = 0;
+ mss = inc ? tcp_mssopt(inc) : pmss;
+ if (pmss > 0 && mss > pmss)
+ mss = pmss;
- while (i < d->nmtus - 1 && d->mtus[i + 1] <= mtu)
+ while (i < NMTUS - 1 && mtus[i + 1] <= mss + 40)
++i;
- return (i);
-}
-static unsigned int
-select_mss(struct t3c_data *td, struct tcpcb *tp, unsigned int pmtu)
-{
- unsigned int idx;
-
-#ifdef notyet
- struct rtentry *dst = so_sotoinpcb(so)->inp_route.ro_rt;
-#endif
- if (tp) {
- tp->t_maxseg = pmtu - 40;
- if (tp->t_maxseg < td->mtus[0] - 40)
- tp->t_maxseg = td->mtus[0] - 40;
- idx = find_best_mtu(td, tp->t_maxseg + 40);
-
- tp->t_maxseg = td->mtus[idx] - 40;
- } else
- idx = find_best_mtu(td, pmtu);
-
- return (idx);
+ return (i);
}
static inline void
-free_atid(struct t3cdev *cdev, unsigned int tid)
+purge_wr_queue(struct toepcb *toep)
{
- struct toepcb *toep = cxgb_free_atid(cdev, tid);
+ struct mbuf *m;
+ struct ofld_hdr *oh;
- if (toep)
- toepcb_release(toep);
+ while ((m = mbufq_dequeue(&toep->wr_list)) != NULL) {
+ oh = mtod(m, struct ofld_hdr *);
+ if (oh->flags & F_HDR_SGL)
+ sglist_free(oh->sgl);
+ m_freem(m);
+ }
}
/*
- * Release resources held by an offload connection (TID, L2T entry, etc.)
+ * Release cxgb(4) and T3 resources held by an offload connection (TID, L2T
+ * entry, etc.)
*/
static void
t3_release_offload_resources(struct toepcb *toep)
{
- struct tcpcb *tp = toep->tp_tp;
- struct toedev *tdev = toep->tp_toedev;
- struct t3cdev *cdev;
- struct socket *so;
- unsigned int tid = toep->tp_tid;
- struct sockbuf *rcv;
-
- CTR0(KTR_TOM, "t3_release_offload_resources");
-
- if (!tdev)
- return;
-
- cdev = TOEP_T3C_DEV(toep);
- if (!cdev)
- return;
-
- toep->tp_qset = 0;
- t3_release_ddp_resources(toep);
+ struct toedev *tod = toep->tp_tod;
+ struct tom_data *td = t3_tomdata(tod);
-#ifdef CTRL_SKB_CACHE
- kfree_skb(CTRL_SKB_CACHE(tp));
- CTRL_SKB_CACHE(tp) = NULL;
-#endif
+ /*
+ * The TOM explicitly detaches its toepcb from the system's inp before
+ * it releases the offload resources.
+ */
+ if (toep->tp_inp) {
+ panic("%s: inp %p still attached to toepcb %p",
+ __func__, toep->tp_inp, toep);
+ }
- if (toep->tp_wr_avail != toep->tp_wr_max) {
+ if (toep->tp_wr_avail != toep->tp_wr_max)
purge_wr_queue(toep);
- reset_wr_list(toep);
- }
if (toep->tp_l2t) {
- l2t_release(L2DATA(cdev), toep->tp_l2t);
+ l2t_release(td->l2t, toep->tp_l2t);
toep->tp_l2t = NULL;
}
- toep->tp_tp = NULL;
- if (tp) {
- inp_lock_assert(tp->t_inpcb);
- so = inp_inpcbtosocket(tp->t_inpcb);
- rcv = so_sockbuf_rcv(so);
- /*
- * cancel any offloaded reads
- *
- */
- sockbuf_lock(rcv);
- tp->t_toe = NULL;
- tp->t_flags &= ~TF_TOE;
- if (toep->tp_ddp_state.user_ddp_pending) {
- t3_cancel_ubuf(toep, rcv);
- toep->tp_ddp_state.user_ddp_pending = 0;
- }
- so_sorwakeup_locked(so);
-
- }
-
- if (toep->tp_state == TCPS_SYN_SENT) {
- free_atid(cdev, tid);
-#ifdef notyet
- __skb_queue_purge(&tp->out_of_order_queue);
-#endif
- } else { // we have TID
- cxgb_remove_tid(cdev, toep, tid);
- toepcb_release(toep);
- }
-#if 0
- log(LOG_INFO, "closing TID %u, state %u\n", tid, tp->t_state);
-#endif
-}
-static void
-install_offload_ops(struct socket *so)
-{
- struct tcpcb *tp = so_sototcpcb(so);
+ if (toep->tp_tid >= 0)
+ release_tid(tod, toep->tp_tid, toep->tp_qset);
- KASSERT(tp->t_toe != NULL, ("toepcb not set"));
-
- t3_install_socket_ops(so);
- tp->t_flags |= TF_TOE;
- tp->t_tu = &cxgb_toe_usrreqs;
+ toepcb_free(toep);
}
/*
- * Determine the receive window scaling factor given a target max
- * receive window.
+ * Determine the receive window size for a socket.
*/
-static __inline int
-select_rcv_wscale(int space, struct vnet *vnet)
+unsigned long
+select_rcv_wnd(struct socket *so)
+{
+ unsigned long wnd;
+
+ SOCKBUF_LOCK_ASSERT(&so->so_rcv);
+
+ wnd = sbspace(&so->so_rcv);
+ if (wnd < MIN_RCV_WND)
+ wnd = MIN_RCV_WND;
+
+ return min(wnd, MAX_RCV_WND);
+}
+
+int
+select_rcv_wscale(void)
{
int wscale = 0;
+ unsigned long space = sb_max;
if (space > MAX_RCV_WND)
space = MAX_RCV_WND;
- if (V_tcp_do_rfc1323)
- for (; space > 65535 && wscale < 14; space >>= 1, ++wscale) ;
+ while (wscale < TCP_MAX_WINSHIFT && (TCP_MAXWIN << wscale) < space)
+ wscale++;
return (wscale);
}
+
/*
- * Determine the receive window size for a socket.
+ * Set up the socket for TCP offload.
*/
-static unsigned long
-select_rcv_wnd(struct toedev *dev, struct socket *so)
+void
+offload_socket(struct socket *so, struct toepcb *toep)
{
- struct tom_data *d = TOM_DATA(dev);
- unsigned int wnd;
- unsigned int max_rcv_wnd;
- struct sockbuf *rcv;
+ struct toedev *tod = toep->tp_tod;
+ struct tom_data *td = t3_tomdata(tod);
+ struct inpcb *inp = sotoinpcb(so);
+ struct tcpcb *tp = intotcpcb(inp);
- rcv = so_sockbuf_rcv(so);
-
- if (V_tcp_do_autorcvbuf)
- wnd = V_tcp_autorcvbuf_max;
- else
- wnd = rcv->sb_hiwat;
+ INP_WLOCK_ASSERT(inp);
-
-
- /* XXX
- * For receive coalescing to work effectively we need a receive window
- * that can accomodate a coalesced segment.
- */
- if (wnd < MIN_RCV_WND)
- wnd = MIN_RCV_WND;
-
- /* PR 5138 */
- max_rcv_wnd = (dev->tod_ttid < TOE_ID_CHELSIO_T3C ?
- (uint32_t)d->rx_page_size * 23 :
- MAX_RCV_WND);
-
- return min(wnd, max_rcv_wnd);
-}
+ /* Update socket */
+ SOCKBUF_LOCK(&so->so_snd);
+ so_sockbuf_snd(so)->sb_flags |= SB_NOCOALESCE;
+ SOCKBUF_UNLOCK(&so->so_snd);
+ SOCKBUF_LOCK(&so->so_rcv);
+ so_sockbuf_rcv(so)->sb_flags |= SB_NOCOALESCE;
+ SOCKBUF_UNLOCK(&so->so_rcv);
-/*
- * Assign offload parameters to some socket fields. This code is used by
- * both active and passive opens.
- */
-static inline void
-init_offload_socket(struct socket *so, struct toedev *dev, unsigned int tid,
- struct l2t_entry *e, struct rtentry *dst, struct toepcb *toep)
-{
- struct tcpcb *tp = so_sototcpcb(so);
- struct t3c_data *td = T3C_DATA(TOM_DATA(dev)->cdev);
- struct sockbuf *snd, *rcv;
-
-#ifdef notyet
- SOCK_LOCK_ASSERT(so);
-#endif
-
- snd = so_sockbuf_snd(so);
- rcv = so_sockbuf_rcv(so);
-
- log(LOG_INFO, "initializing offload socket\n");
- /*
- * We either need to fix push frames to work with sbcompress
- * or we need to add this
- */
- snd->sb_flags |= SB_NOCOALESCE;
- rcv->sb_flags |= SB_NOCOALESCE;
-
+ /* Update TCP PCB */
+ tp->tod = toep->tp_tod;
tp->t_toe = toep;
- toep->tp_tp = tp;
- toep->tp_toedev = dev;
-
- toep->tp_tid = tid;
- toep->tp_l2t = e;
- toep->tp_wr_max = toep->tp_wr_avail = TOM_TUNABLE(dev, max_wrs);
- toep->tp_wr_unacked = 0;
- toep->tp_delack_mode = 0;
-
- toep->tp_mtu_idx = select_mss(td, tp, dst->rt_ifp->if_mtu);
- /*
- * XXX broken
- *
- */
- tp->rcv_wnd = select_rcv_wnd(dev, so);
+ tp->t_flags |= TF_TOE;
- toep->tp_ulp_mode = TOM_TUNABLE(dev, ddp) && !(so_options_get(so) & SO_NO_DDP) &&
- tp->rcv_wnd >= MIN_DDP_RCV_WIN ? ULP_MODE_TCPDDP : 0;
- toep->tp_qset_idx = 0;
-
- reset_wr_list(toep);
- DPRINTF("initialization done\n");
-}
+ /* Install an extra hold on inp */
+ toep->tp_inp = inp;
+ toep->tp_flags |= TP_ATTACHED;
+ in_pcbref(inp);
-/*
- * The next two functions calculate the option 0 value for a socket.
- */
-static inline unsigned int
-calc_opt0h(struct socket *so, int mtu_idx)
-{
- struct tcpcb *tp = so_sototcpcb(so);
- int wscale = select_rcv_wscale(tp->rcv_wnd, so->so_vnet);
-
- return V_NAGLE((tp->t_flags & TF_NODELAY) == 0) |
- V_KEEP_ALIVE((so_options_get(so) & SO_KEEPALIVE) != 0) | F_TCAM_BYPASS |
- V_WND_SCALE(wscale) | V_MSS_IDX(mtu_idx);
+ /* Add the TOE PCB to the active list */
+ mtx_lock(&td->toep_list_lock);
+ TAILQ_INSERT_HEAD(&td->toep_list, toep, link);
+ mtx_unlock(&td->toep_list_lock);
}
-static inline unsigned int
-calc_opt0l(struct socket *so, int ulp_mode)
+/* This is _not_ the normal way to "unoffload" a socket. */
+void
+undo_offload_socket(struct socket *so)
{
- struct tcpcb *tp = so_sototcpcb(so);
- unsigned int val;
-
- val = V_TOS(INP_TOS(tp->t_inpcb)) | V_ULP_MODE(ulp_mode) |
- V_RCV_BUFSIZ(min(tp->rcv_wnd >> 10, (u32)M_RCV_BUFSIZ));
+ struct inpcb *inp = sotoinpcb(so);
+ struct tcpcb *tp = intotcpcb(inp);
+ struct toepcb *toep = tp->t_toe;
+ struct toedev *tod = toep->tp_tod;
+ struct tom_data *td = t3_tomdata(tod);
- DPRINTF("opt0l tos=%08x rcv_wnd=%ld opt0l=%08x\n", INP_TOS(tp->t_inpcb), tp->rcv_wnd, val);
- return (val);
-}
+ INP_WLOCK_ASSERT(inp);
-static inline unsigned int
-calc_opt2(const struct socket *so, struct toedev *dev)
-{
- int flv_valid;
+ so_sockbuf_snd(so)->sb_flags &= ~SB_NOCOALESCE;
+ so_sockbuf_rcv(so)->sb_flags &= ~SB_NOCOALESCE;
+
+ tp->tod = NULL;
+ tp->t_toe = NULL;
+ tp->t_flags &= ~TF_TOE;
- flv_valid = (TOM_TUNABLE(dev, cong_alg) != -1);
+ toep->tp_inp = NULL;
+ toep->tp_flags &= ~TP_ATTACHED;
+ if (in_pcbrele_wlocked(inp))
+ panic("%s: inp freed.", __func__);
- return (V_FLAVORS_VALID(flv_valid) |
- V_CONG_CONTROL_FLAVOR(flv_valid ? TOM_TUNABLE(dev, cong_alg) : 0));
+ mtx_lock(&td->toep_list_lock);
+ TAILQ_REMOVE(&td->toep_list, toep, link);
+ mtx_unlock(&td->toep_list_lock);
}
-#if DEBUG_WR > 1
-static int
-count_pending_wrs(const struct toepcb *toep)
+/*
+ * Socket could be a listening socket, and we may not have a toepcb at all at
+ * this time.
+ */
+uint32_t
+calc_opt0h(struct socket *so, int mtu_idx, int rscale, struct l2t_entry *e)
{
- const struct mbuf *m;
- int n = 0;
+ uint32_t opt0h = F_TCAM_BYPASS | V_WND_SCALE(rscale) |
+ V_MSS_IDX(mtu_idx);
+
+ if (so != NULL) {
+ struct inpcb *inp = sotoinpcb(so);
+ struct tcpcb *tp = intotcpcb(inp);
+ int keepalive = always_keepalive ||
+ so_options_get(so) & SO_KEEPALIVE;
- wr_queue_walk(toep, m)
- n += m->m_pkthdr.csum_data;
- return (n);
+ opt0h |= V_NAGLE((tp->t_flags & TF_NODELAY) == 0);
+ opt0h |= V_KEEP_ALIVE(keepalive != 0);
+ }
+
+ if (e != NULL)
+ opt0h |= V_L2T_IDX(e->idx) | V_TX_CHANNEL(e->smt_idx);
+
+ return (htobe32(opt0h));
}
-#endif
-#if 0
-(((*(struct tom_data **)&(dev)->l4opt)->conf.cong_alg) != -1)
-#endif
-
-static void
-mk_act_open_req(struct socket *so, struct mbuf *m,
- unsigned int atid, const struct l2t_entry *e)
+uint32_t
+calc_opt0l(struct socket *so, int rcv_bufsize)
{
- struct cpl_act_open_req *req;
- struct inpcb *inp = so_sotoinpcb(so);
- struct tcpcb *tp = inp_inpcbtotcpcb(inp);
- struct toepcb *toep = tp->t_toe;
- struct toedev *tdev = toep->tp_toedev;
-
- m_set_priority((struct mbuf *)m, mkprio(CPL_PRIORITY_SETUP, toep));
-
- req = mtod(m, struct cpl_act_open_req *);
- m->m_pkthdr.len = m->m_len = sizeof(*req);
-
- req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
- req->wr.wr_lo = 0;
- OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, atid));
- inp_4tuple_get(inp, &req->local_ip, &req->local_port, &req->peer_ip, &req->peer_port);
-#if 0
- req->local_port = inp->inp_lport;
- req->peer_port = inp->inp_fport;
- memcpy(&req->local_ip, &inp->inp_laddr, 4);
- memcpy(&req->peer_ip, &inp->inp_faddr, 4);
-#endif
- req->opt0h = htonl(calc_opt0h(so, toep->tp_mtu_idx) | V_L2T_IDX(e->idx) |
- V_TX_CHANNEL(e->smt_idx));
- req->opt0l = htonl(calc_opt0l(so, toep->tp_ulp_mode));
- req->params = 0;
- req->opt2 = htonl(calc_opt2(so, tdev));
-}
+ uint32_t opt0l = V_ULP_MODE(ULP_MODE_NONE) | V_RCV_BUFSIZ(rcv_bufsize);
+
+ KASSERT(rcv_bufsize <= M_RCV_BUFSIZ,
+ ("%s: rcv_bufsize (%d) is too high", __func__, rcv_bufsize));
+
+ if (so != NULL) /* optional because noone cares about IP TOS */
+ opt0l |= V_TOS(INP_TOS(sotoinpcb(so)));
+ return (htobe32(opt0l));
+}
/*
* Convert an ACT_OPEN_RPL status to an errno.
@@ -1422,61 +889,6 @@ act_open_rpl_status_to_errno(int status)
}
}
-static void
-fail_act_open(struct toepcb *toep, int errno)
-{
- struct tcpcb *tp = toep->tp_tp;
-
- t3_release_offload_resources(toep);
- if (tp) {
- inp_wunlock(tp->t_inpcb);
- tcp_offload_drop(tp, errno);
- }
-
-#ifdef notyet
- TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
-#endif
-}
-
-/*
- * Handle active open failures.
- */
-static void
-active_open_failed(struct toepcb *toep, struct mbuf *m)
-{
- struct cpl_act_open_rpl *rpl = cplhdr(m);
- struct inpcb *inp;
-
- if (toep->tp_tp == NULL)
- goto done;
-
- inp = toep->tp_tp->t_inpcb;
-
-/*
- * Don't handle connection retry for now
- */
-#ifdef notyet
- struct inet_connection_sock *icsk = inet_csk(sk);
-
- if (rpl->status == CPL_ERR_CONN_EXIST &&
- icsk->icsk_retransmit_timer.function != act_open_retry_timer) {
- icsk->icsk_retransmit_timer.function = act_open_retry_timer;
- sk_reset_timer(so, &icsk->icsk_retransmit_timer,
- jiffies + HZ / 2);
- } else
-#endif
- {
- inp_wlock(inp);
- /*
- * drops the inpcb lock
- */
- fail_act_open(toep, act_open_rpl_status_to_errno(rpl->status));
- }
-
- done:
- m_free(m);
-}
-
/*
* Return whether a failed active open has allocated a TID
*/
@@ -1488,1072 +900,350 @@ act_open_has_tid(int status)
}
/*
- * Process an ACT_OPEN_RPL CPL message.
+ * Active open failed.
*/
static int
-do_act_open_rpl(struct t3cdev *cdev, struct mbuf *m, void *ctx)
-{
- struct toepcb *toep = (struct toepcb *)ctx;
- struct cpl_act_open_rpl *rpl = cplhdr(m);
-
- if (cdev->type != T3A && act_open_has_tid(rpl->status))
- cxgb_queue_tid_release(cdev, GET_TID(rpl));
-
- active_open_failed(toep, m);
- return (0);
-}
-
-/*
- * Handle an ARP failure for an active open. XXX purge ofo queue
- *
- * XXX badly broken for crossed SYNs as the ATID is no longer valid.
- * XXX crossed SYN errors should be generated by PASS_ACCEPT_RPL which should
- * check SOCK_DEAD or sk->sk_sock. Or maybe generate the error here but don't
- * free the atid. Hmm.
- */
-#ifdef notyet
-static void
-act_open_req_arp_failure(struct t3cdev *dev, struct mbuf *m)
-{
- struct toepcb *toep = m_get_toep(m);
- struct tcpcb *tp = toep->tp_tp;
- struct inpcb *inp = tp->t_inpcb;
- struct socket *so;
-
- inp_wlock(inp);
- if (tp->t_state == TCPS_SYN_SENT || tp->t_state == TCPS_SYN_RECEIVED) {
- /*
- * drops the inpcb lock
- */
- fail_act_open(so, EHOSTUNREACH);
- printf("freeing %p\n", m);
-
- m_free(m);
- } else
- inp_wunlock(inp);
-}
-#endif
-/*
- * Send an active open request.
- */
-int
-t3_connect(struct toedev *tdev, struct socket *so,
- struct rtentry *rt, struct sockaddr *nam)
-{
- struct mbuf *m;
- struct l2t_entry *e;
- struct tom_data *d = TOM_DATA(tdev);
- struct inpcb *inp = so_sotoinpcb(so);
+do_act_open_rpl(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
+{
+ struct adapter *sc = qs->adap;
+ struct tom_data *td = sc->tom_softc;
+ struct toedev *tod = &td->tod;
+ struct cpl_act_open_rpl *rpl = mtod(m, void *);
+ unsigned int atid = G_TID(ntohl(rpl->atid));
+ struct toepcb *toep = lookup_atid(&td->tid_maps, atid);
+ struct inpcb *inp = toep->tp_inp;
struct tcpcb *tp = intotcpcb(inp);
- struct toepcb *toep; /* allocated by init_offload_socket */
-
- int atid;
+ int s = rpl->status;
- toep = toepcb_alloc();
- if (toep == NULL)
- goto out_err;
-
- if ((atid = cxgb_alloc_atid(d->cdev, d->client, toep)) < 0)
- goto out_err;
-
- e = t3_l2t_get(d->cdev, rt, rt->rt_ifp, nam);
- if (!e)
- goto free_tid;
+ CTR3(KTR_CXGB, "%s: atid %u, status %u ", __func__, atid, s);
- inp_lock_assert(inp);
- m = m_gethdr(MT_DATA, M_WAITOK);
-
-#if 0
- m->m_toe.mt_toepcb = tp->t_toe;
- set_arp_failure_handler((struct mbuf *)m, act_open_req_arp_failure);
-#endif
- so_lock(so);
-
- init_offload_socket(so, tdev, atid, e, rt, toep);
-
- install_offload_ops(so);
-
- mk_act_open_req(so, m, atid, e);
- so_unlock(so);
-
- soisconnecting(so);
- toep = tp->t_toe;
- m_set_toep(m, tp->t_toe);
-
- toep->tp_state = TCPS_SYN_SENT;
- l2t_send(d->cdev, (struct mbuf *)m, e);
+ free_atid(&td->tid_maps, atid);
+ toep->tp_tid = -1;
- if (toep->tp_ulp_mode)
- t3_enable_ddp(toep, 0);
- return (0);
-
-free_tid:
- printf("failing connect - free atid\n");
-
- free_atid(d->cdev, atid);
-out_err:
- printf("return ENOMEM\n");
- return (ENOMEM);
-}
+ if (act_open_has_tid(s))
+ queue_tid_release(tod, GET_TID(rpl));
-/*
- * Send an ABORT_REQ message. Cannot fail. This routine makes sure we do
- * not send multiple ABORT_REQs for the same connection and also that we do
- * not try to send a message after the connection has closed. Returns 1 if
- * an ABORT_REQ wasn't generated after all, 0 otherwise.
- */
-static void
-t3_send_reset(struct toepcb *toep)
-{
-
- struct cpl_abort_req *req;
- unsigned int tid = toep->tp_tid;
- int mode = CPL_ABORT_SEND_RST;
- struct tcpcb *tp = toep->tp_tp;
- struct toedev *tdev = toep->tp_toedev;
- struct socket *so = NULL;
- struct mbuf *m;
- struct sockbuf *snd;
-
- if (tp) {
- inp_lock_assert(tp->t_inpcb);
- so = inp_inpcbtosocket(tp->t_inpcb);
+ if (s == CPL_ERR_TCAM_FULL || s == CPL_ERR_CONN_EXIST) {
+ INP_WLOCK(inp);
+ toe_connect_failed(tod, tp, EAGAIN);
+ toepcb_release(toep); /* unlocks inp */
+ } else {
+ INP_INFO_WLOCK(&V_tcbinfo);
+ INP_WLOCK(inp);
+ toe_connect_failed(tod, tp, act_open_rpl_status_to_errno(s));
+ toepcb_release(toep); /* unlocks inp */
+ INP_INFO_WUNLOCK(&V_tcbinfo);
}
-
- if (__predict_false((toep->tp_flags & TP_ABORT_SHUTDOWN) ||
- tdev == NULL))
- return;
- toep->tp_flags |= (TP_ABORT_RPL_PENDING|TP_ABORT_SHUTDOWN);
- snd = so_sockbuf_snd(so);
- /* Purge the send queue so we don't send anything after an abort. */
- if (so)
- sbflush(snd);
- if ((toep->tp_flags & TP_CLOSE_CON_REQUESTED) && is_t3a(tdev))
- mode |= CPL_ABORT_POST_CLOSE_REQ;
-
- m = m_gethdr_nofail(sizeof(*req));
- m_set_priority(m, mkprio(CPL_PRIORITY_DATA, toep));
- set_arp_failure_handler(m, abort_arp_failure);
-
- req = mtod(m, struct cpl_abort_req *);
- req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_REQ));
- req->wr.wr_lo = htonl(V_WR_TID(tid));
- OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ABORT_REQ, tid));
- req->rsvd0 = tp ? htonl(tp->snd_nxt) : 0;
- req->rsvd1 = !(toep->tp_flags & TP_DATASENT);
- req->cmd = mode;
- if (tp && (tp->t_state == TCPS_SYN_SENT))
- mbufq_tail(&toep->out_of_order_queue, m); // defer
- else
- l2t_send(TOEP_T3C_DEV(toep), m, toep->tp_l2t);
+ m_freem(m);
+ return (0);
}
-static int
-t3_ip_ctloutput(struct socket *so, struct sockopt *sopt)
+/*
+ * Send an active open request.
+ *
+ * State of affairs on entry:
+ * soisconnecting (so_state |= SS_ISCONNECTING)
+ * tcbinfo not locked (this has changed - used to be WLOCKed)
+ * inp WLOCKed
+ * tp->t_state = TCPS_SYN_SENT
+ * rtalloc1, RT_UNLOCK on rt.
+ */
+int
+t3_connect(struct toedev *tod, struct socket *so,
+ struct rtentry *rt, struct sockaddr *nam)
{
- struct inpcb *inp;
- int error, optval;
-
- if (sopt->sopt_name == IP_OPTIONS)
- return (ENOPROTOOPT);
-
- if (sopt->sopt_name != IP_TOS)
- return (EOPNOTSUPP);
-
- error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval);
-
- if (error)
- return (error);
-
- if (optval > IPTOS_PREC_CRITIC_ECP)
- return (EINVAL);
-
- inp = so_sotoinpcb(so);
- inp_wlock(inp);
- inp_ip_tos_set(inp, optval);
-#if 0
- inp->inp_ip_tos = optval;
-#endif
- t3_set_tos(inp_inpcbtotcpcb(inp)->t_toe);
- inp_wunlock(inp);
+ struct mbuf *m = NULL;
+ struct l2t_entry *e = NULL;
+ struct tom_data *td = t3_tomdata(tod);
+ struct adapter *sc = tod->tod_softc;
+ struct cpl_act_open_req *cpl;
+ struct inpcb *inp = sotoinpcb(so);
+ struct tcpcb *tp = intotcpcb(inp);
+ struct toepcb *toep;
+ int atid = -1, mtu_idx, rscale, cpu_idx, qset;
+ struct sockaddr *gw;
+ struct ifnet *ifp = rt->rt_ifp;
+ struct port_info *pi = ifp->if_softc; /* XXX wrong for VLAN etc. */
- return (0);
-}
+ INP_WLOCK_ASSERT(inp);
-static int
-t3_tcp_ctloutput(struct socket *so, struct sockopt *sopt)
-{
- int err = 0;
- size_t copied;
-
- if (sopt->sopt_name != TCP_CONGESTION &&
- sopt->sopt_name != TCP_NODELAY)
- return (EOPNOTSUPP);
-
- if (sopt->sopt_name == TCP_CONGESTION) {
- char name[TCP_CA_NAME_MAX];
- int optlen = sopt->sopt_valsize;
- struct tcpcb *tp;
-
- if (sopt->sopt_dir == SOPT_GET) {
- KASSERT(0, ("unimplemented"));
- return (EOPNOTSUPP);
- }
+ toep = toepcb_alloc(tod);
+ if (toep == NULL)
+ goto failed;
- if (optlen < 1)
- return (EINVAL);
-
- err = copyinstr(sopt->sopt_val, name,
- min(TCP_CA_NAME_MAX - 1, optlen), &copied);
- if (err)
- return (err);
- if (copied < 1)
- return (EINVAL);
-
- tp = so_sototcpcb(so);
- /*
- * XXX I need to revisit this
- */
- if ((err = t3_set_cong_control(so, name)) == 0) {
-#ifdef CONGESTION_CONTROL_SUPPORTED
- tp->t_cong_control = strdup(name, M_CXGB);
-#endif
- } else
- return (err);
- } else {
- int optval, oldval;
- struct inpcb *inp;
- struct tcpcb *tp;
+ atid = alloc_atid(&td->tid_maps, toep);
+ if (atid < 0)
+ goto failed;
- if (sopt->sopt_dir == SOPT_GET)
- return (EOPNOTSUPP);
-
- err = sooptcopyin(sopt, &optval, sizeof optval,
- sizeof optval);
+ qset = pi->first_qset + (arc4random() % pi->nqsets);
- if (err)
- return (err);
+ m = M_GETHDR_OFLD(qset, CPL_PRIORITY_CONTROL, cpl);
+ if (m == NULL)
+ goto failed;
- inp = so_sotoinpcb(so);
- inp_wlock(inp);
- tp = inp_inpcbtotcpcb(inp);
+ gw = rt->rt_flags & RTF_GATEWAY ? rt->rt_gateway : nam;
+ e = t3_l2t_get(pi, ifp, gw);
+ if (e == NULL)
+ goto failed;
- oldval = tp->t_flags;
- if (optval)
- tp->t_flags |= TF_NODELAY;
- else
- tp->t_flags &= ~TF_NODELAY;
- inp_wunlock(inp);
+ toep->tp_l2t = e;
+ toep->tp_tid = atid; /* used to double check response */
+ toep->tp_qset = qset;
+ SOCKBUF_LOCK(&so->so_rcv);
+ /* opt0 rcv_bufsiz initially, assumes its normal meaning later */
+ toep->tp_rx_credits = min(select_rcv_wnd(so) >> 10, M_RCV_BUFSIZ);
+ SOCKBUF_UNLOCK(&so->so_rcv);
- if (oldval != tp->t_flags && (tp->t_toe != NULL))
- t3_set_nagle(tp->t_toe);
+ offload_socket(so, toep);
- }
+ /*
+ * The kernel sets request_r_scale based on sb_max whereas we need to
+ * take hardware's MAX_RCV_WND into account too. This is normally a
+ * no-op as MAX_RCV_WND is much larger than the default sb_max.
+ */
+ if (tp->t_flags & TF_REQ_SCALE)
+ rscale = tp->request_r_scale = select_rcv_wscale();
+ else
+ rscale = 0;
+ mtu_idx = find_best_mtu_idx(sc, &inp->inp_inc, 0);
+ cpu_idx = sc->rrss_map[qset];
+
+ cpl->wr.wrh_hi = htobe32(V_WR_OP(FW_WROPCODE_FORWARD));
+ cpl->wr.wrh_lo = 0;
+ OPCODE_TID(cpl) = htobe32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, atid));
+ inp_4tuple_get(inp, &cpl->local_ip, &cpl->local_port, &cpl->peer_ip,
+ &cpl->peer_port);
+ cpl->opt0h = calc_opt0h(so, mtu_idx, rscale, e);
+ cpl->opt0l = calc_opt0l(so, toep->tp_rx_credits);
+ cpl->params = 0;
+ cpl->opt2 = calc_opt2(cpu_idx);
+
+ CTR5(KTR_CXGB, "%s: atid %u (%s), toep %p, inp %p", __func__,
+ toep->tp_tid, tcpstates[tp->t_state], toep, inp);
+
+ if (l2t_send(sc, m, e) == 0)
+ return (0);
- return (0);
-}
+ undo_offload_socket(so);
-int
-t3_ctloutput(struct socket *so, struct sockopt *sopt)
-{
- int err;
+failed:
+ CTR5(KTR_CXGB, "%s: FAILED, atid %d, toep %p, l2te %p, mbuf %p",
+ __func__, atid, toep, e, m);
- if (sopt->sopt_level != IPPROTO_TCP)
- err = t3_ip_ctloutput(so, sopt);
- else
- err = t3_tcp_ctloutput(so, sopt);
+ if (atid >= 0)
+ free_atid(&td->tid_maps, atid);
- if (err != EOPNOTSUPP)
- return (err);
+ if (e)
+ l2t_release(td->l2t, e);
- return (tcp_ctloutput(so, sopt));
-}
+ if (toep)
+ toepcb_free(toep);
-/*
- * Returns true if we need to explicitly request RST when we receive new data
- * on an RX-closed connection.
- */
-static inline int
-need_rst_on_excess_rx(const struct toepcb *toep)
-{
- return (1);
-}
+ m_freem(m);
-/*
- * Handles Rx data that arrives in a state where the socket isn't accepting
- * new data.
- */
-static void
-handle_excess_rx(struct toepcb *toep, struct mbuf *m)
-{
-
- if (need_rst_on_excess_rx(toep) &&
- !(toep->tp_flags & TP_ABORT_SHUTDOWN))
- t3_send_reset(toep);
- m_freem(m);
+ return (ENOMEM);
}
/*
- * Process a get_tcb_rpl as a DDP completion (similar to RX_DDP_COMPLETE)
- * by getting the DDP offset from the TCB.
+ * Send an ABORT_REQ message. Cannot fail. This routine makes sure we do not
+ * send multiple ABORT_REQs for the same connection and also that we do not try
+ * to send a message after the connection has closed.
*/
static void
-tcb_rpl_as_ddp_complete(struct toepcb *toep, struct mbuf *m)
+send_reset(struct toepcb *toep)
{
- struct ddp_state *q = &toep->tp_ddp_state;
- struct ddp_buf_state *bsp;
- struct cpl_get_tcb_rpl *hdr;
- unsigned int ddp_offset;
- struct socket *so;
- struct tcpcb *tp;
- struct sockbuf *rcv;
- int state;
-
- uint64_t t;
- __be64 *tcb;
-
- tp = toep->tp_tp;
- so = inp_inpcbtosocket(tp->t_inpcb);
- inp_lock_assert(tp->t_inpcb);
- rcv = so_sockbuf_rcv(so);
- sockbuf_lock(rcv);
-
- /* Note that we only accout for CPL_GET_TCB issued by the DDP code.
- * We really need a cookie in order to dispatch the RPLs.
- */
- q->get_tcb_count--;
+ struct cpl_abort_req *req;
+ unsigned int tid = toep->tp_tid;
+ struct inpcb *inp = toep->tp_inp;
+ struct socket *so = inp->inp_socket;
+ struct tcpcb *tp = intotcpcb(inp);
+ struct toedev *tod = toep->tp_tod;
+ struct adapter *sc = tod->tod_softc;
+ struct mbuf *m;
- /* It is a possible that a previous CPL already invalidated UBUF DDP
- * and moved the cur_buf idx and hence no further processing of this
- * skb is required. However, the app might be sleeping on
- * !q->get_tcb_count and we need to wake it up.
- */
- if (q->cancel_ubuf && !t3_ddp_ubuf_pending(toep)) {
- int state = so_state_get(so);
+ INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_WLOCK_ASSERT(inp);
- m_freem(m);
- if (__predict_true((state & SS_NOFDREF) == 0))
- so_sorwakeup_locked(so);
- else
- sockbuf_unlock(rcv);
+ CTR4(KTR_CXGB, "%s: tid %d, toep %p (%x)", __func__, tid, toep,
+ toep->tp_flags);
+ if (toep->tp_flags & TP_ABORT_SHUTDOWN)
return;
- }
- bsp = &q->buf_state[q->cur_buf];
- hdr = cplhdr(m);
- tcb = (__be64 *)(hdr + 1);
- if (q->cur_buf == 0) {
- t = be64toh(tcb[(31 - W_TCB_RX_DDP_BUF0_OFFSET) / 2]);
- ddp_offset = t >> (32 + S_TCB_RX_DDP_BUF0_OFFSET);
- } else {
- t = be64toh(tcb[(31 - W_TCB_RX_DDP_BUF1_OFFSET) / 2]);
- ddp_offset = t >> S_TCB_RX_DDP_BUF1_OFFSET;
- }
- ddp_offset &= M_TCB_RX_DDP_BUF0_OFFSET;
- m->m_cur_offset = bsp->cur_offset;
- bsp->cur_offset = ddp_offset;
- m->m_len = m->m_pkthdr.len = ddp_offset - m->m_cur_offset;
-
- CTR5(KTR_TOM,
- "tcb_rpl_as_ddp_complete: idx=%d seq=0x%x hwbuf=%u ddp_offset=%u cur_offset=%u",
- q->cur_buf, tp->rcv_nxt, q->cur_buf, ddp_offset, m->m_cur_offset);
- KASSERT(ddp_offset >= m->m_cur_offset,
- ("ddp_offset=%u less than cur_offset=%u",
- ddp_offset, m->m_cur_offset));
-
-#if 0
-{
- unsigned int ddp_flags, rcv_nxt, rx_hdr_offset, buf_idx;
-
- t = be64toh(tcb[(31 - W_TCB_RX_DDP_FLAGS) / 2]);
- ddp_flags = (t >> S_TCB_RX_DDP_FLAGS) & M_TCB_RX_DDP_FLAGS;
-
- t = be64toh(tcb[(31 - W_TCB_RCV_NXT) / 2]);
- rcv_nxt = t >> S_TCB_RCV_NXT;
- rcv_nxt &= M_TCB_RCV_NXT;
-
- t = be64toh(tcb[(31 - W_TCB_RX_HDR_OFFSET) / 2]);
- rx_hdr_offset = t >> (32 + S_TCB_RX_HDR_OFFSET);
- rx_hdr_offset &= M_TCB_RX_HDR_OFFSET;
-
- T3_TRACE2(TIDTB(sk),
- "tcb_rpl_as_ddp_complete: DDP FLAGS 0x%x dma up to 0x%x",
- ddp_flags, rcv_nxt - rx_hdr_offset);
- T3_TRACE4(TB(q),
- "tcb_rpl_as_ddp_complete: rcvnxt 0x%x hwbuf %u cur_offset %u cancel %u",
- tp->rcv_nxt, q->cur_buf, bsp->cur_offset, q->cancel_ubuf);
- T3_TRACE3(TB(q),
- "tcb_rpl_as_ddp_complete: TCB rcvnxt 0x%x hwbuf 0x%x ddp_offset %u",
- rcv_nxt - rx_hdr_offset, ddp_flags, ddp_offset);
- T3_TRACE2(TB(q),
- "tcb_rpl_as_ddp_complete: flags0 0x%x flags1 0x%x",
- q->buf_state[0].flags, q->buf_state[1].flags);
+ toep->tp_flags |= (TP_ABORT_RPL_PENDING | TP_ABORT_SHUTDOWN);
-}
-#endif
- if (__predict_false(so_no_receive(so) && m->m_pkthdr.len)) {
- handle_excess_rx(toep, m);
- return;
- }
+ /* Purge the send queue */
+ sbflush(so_sockbuf_snd(so));
+ purge_wr_queue(toep);
-#ifdef T3_TRACE
- if ((int)m->m_pkthdr.len < 0) {
- t3_ddp_error(so, "tcb_rpl_as_ddp_complete: neg len");
- }
-#endif
- if (bsp->flags & DDP_BF_NOCOPY) {
-#ifdef T3_TRACE
- T3_TRACE0(TB(q),
- "tcb_rpl_as_ddp_complete: CANCEL UBUF");
-
- if (!q->cancel_ubuf && !(sk->sk_shutdown & RCV_SHUTDOWN)) {
- printk("!cancel_ubuf");
- t3_ddp_error(sk, "tcb_rpl_as_ddp_complete: !cancel_ubuf");
- }
-#endif
- m->m_ddp_flags = DDP_BF_PSH | DDP_BF_NOCOPY | 1;
- bsp->flags &= ~(DDP_BF_NOCOPY|DDP_BF_NODATA);
- q->cur_buf ^= 1;
- } else if (bsp->flags & DDP_BF_NOFLIP) {
-
- m->m_ddp_flags = 1; /* always a kernel buffer */
-
- /* now HW buffer carries a user buffer */
- bsp->flags &= ~DDP_BF_NOFLIP;
- bsp->flags |= DDP_BF_NOCOPY;
-
- /* It is possible that the CPL_GET_TCB_RPL doesn't indicate
- * any new data in which case we're done. If in addition the
- * offset is 0, then there wasn't a completion for the kbuf
- * and we need to decrement the posted count.
- */
- if (m->m_pkthdr.len == 0) {
- if (ddp_offset == 0) {
- q->kbuf_posted--;
- bsp->flags |= DDP_BF_NODATA;
- }
- sockbuf_unlock(rcv);
- m_free(m);
- return;
- }
- } else {
- sockbuf_unlock(rcv);
+ m = M_GETHDR_OFLD(toep->tp_qset, CPL_PRIORITY_DATA, req);
+ if (m == NULL)
+ CXGB_UNIMPLEMENTED();
- /* This reply is for a CPL_GET_TCB_RPL to cancel the UBUF DDP,
- * but it got here way late and nobody cares anymore.
- */
- m_free(m);
- return;
- }
+ req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_REQ));
+ req->wr.wrh_lo = htonl(V_WR_TID(tid));
+ OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ABORT_REQ, tid));
+ req->rsvd0 = htonl(tp->snd_nxt);
+ req->rsvd1 = !(toep->tp_flags & TP_DATASENT);
+ req->cmd = CPL_ABORT_SEND_RST;
- m->m_ddp_gl = (unsigned char *)bsp->gl;
- m->m_flags |= M_DDP;
- m->m_seq = tp->rcv_nxt;
- tp->rcv_nxt += m->m_pkthdr.len;
- tp->t_rcvtime = ticks;
- CTR3(KTR_TOM, "tcb_rpl_as_ddp_complete: seq 0x%x hwbuf %u m->m_pktlen %u",
- m->m_seq, q->cur_buf, m->m_pkthdr.len);
- if (m->m_pkthdr.len == 0) {
- q->user_ddp_pending = 0;
- m_free(m);
- } else
- SBAPPEND(rcv, m);
-
- state = so_state_get(so);
- if (__predict_true((state & SS_NOFDREF) == 0))
- so_sorwakeup_locked(so);
+ if (tp->t_state == TCPS_SYN_SENT)
+ mbufq_tail(&toep->out_of_order_queue, m); /* defer */
else
- sockbuf_unlock(rcv);
+ l2t_send(sc, m, toep->tp_l2t);
}
-/*
- * Process a CPL_GET_TCB_RPL. These can also be generated by the DDP code,
- * in that case they are similar to DDP completions.
- */
-static int
-do_get_tcb_rpl(struct t3cdev *cdev, struct mbuf *m, void *ctx)
+int
+t3_send_rst(struct toedev *tod __unused, struct tcpcb *tp)
{
- struct toepcb *toep = (struct toepcb *)ctx;
-
- /* OK if socket doesn't exist */
- if (toep == NULL) {
- printf("null toep in do_get_tcb_rpl\n");
- return (CPL_RET_BUF_DONE);
- }
- inp_wlock(toep->tp_tp->t_inpcb);
- tcb_rpl_as_ddp_complete(toep, m);
- inp_wunlock(toep->tp_tp->t_inpcb);
-
+ send_reset(tp->t_toe);
return (0);
}
-static void
-handle_ddp_data(struct toepcb *toep, struct mbuf *m)
-{
- struct tcpcb *tp = toep->tp_tp;
- struct socket *so;
- struct ddp_state *q;
- struct ddp_buf_state *bsp;
- struct cpl_rx_data *hdr = cplhdr(m);
- unsigned int rcv_nxt = ntohl(hdr->seq);
- struct sockbuf *rcv;
-
- if (tp->rcv_nxt == rcv_nxt)
- return;
-
- inp_lock_assert(tp->t_inpcb);
- so = inp_inpcbtosocket(tp->t_inpcb);
- rcv = so_sockbuf_rcv(so);
- sockbuf_lock(rcv);
-
- q = &toep->tp_ddp_state;
- bsp = &q->buf_state[q->cur_buf];
- KASSERT(SEQ_GT(rcv_nxt, tp->rcv_nxt), ("tp->rcv_nxt=0x%08x decreased rcv_nxt=0x08%x",
- rcv_nxt, tp->rcv_nxt));
- m->m_len = m->m_pkthdr.len = rcv_nxt - tp->rcv_nxt;
- KASSERT(m->m_len > 0, ("%s m_len=%d", __FUNCTION__, m->m_len));
- CTR3(KTR_TOM, "rcv_nxt=0x%x tp->rcv_nxt=0x%x len=%d",
- rcv_nxt, tp->rcv_nxt, m->m_pkthdr.len);
-
-#ifdef T3_TRACE
- if ((int)m->m_pkthdr.len < 0) {
- t3_ddp_error(so, "handle_ddp_data: neg len");
- }
-#endif
- m->m_ddp_gl = (unsigned char *)bsp->gl;
- m->m_flags |= M_DDP;
- m->m_cur_offset = bsp->cur_offset;
- m->m_ddp_flags = DDP_BF_PSH | (bsp->flags & DDP_BF_NOCOPY) | 1;
- if (bsp->flags & DDP_BF_NOCOPY)
- bsp->flags &= ~DDP_BF_NOCOPY;
-
- m->m_seq = tp->rcv_nxt;
- tp->rcv_nxt = rcv_nxt;
- bsp->cur_offset += m->m_pkthdr.len;
- if (!(bsp->flags & DDP_BF_NOFLIP))
- q->cur_buf ^= 1;
- /*
- * For now, don't re-enable DDP after a connection fell out of DDP
- * mode.
- */
- q->ubuf_ddp_ready = 0;
- sockbuf_unlock(rcv);
-}
-
/*
- * Process new data received for a connection.
+ * Handler for RX_DATA CPL messages.
*/
-static void
-new_rx_data(struct toepcb *toep, struct mbuf *m)
-{
- struct cpl_rx_data *hdr = cplhdr(m);
- struct tcpcb *tp = toep->tp_tp;
+static int
+do_rx_data(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
+{
+ struct adapter *sc = qs->adap;
+ struct tom_data *td = sc->tom_softc;
+ struct cpl_rx_data *hdr = mtod(m, void *);
+ unsigned int tid = GET_TID(hdr);
+ struct toepcb *toep = lookup_tid(&td->tid_maps, tid);
+ struct inpcb *inp = toep->tp_inp;
+ struct tcpcb *tp;
struct socket *so;
- struct sockbuf *rcv;
- int state;
- int len = be16toh(hdr->len);
-
- inp_wlock(tp->t_inpcb);
-
- so = inp_inpcbtosocket(tp->t_inpcb);
-
- if (__predict_false(so_no_receive(so))) {
- handle_excess_rx(toep, m);
- inp_wunlock(tp->t_inpcb);
- TRACE_EXIT;
- return;
- }
-
- if (toep->tp_ulp_mode == ULP_MODE_TCPDDP)
- handle_ddp_data(toep, m);
-
- m->m_seq = ntohl(hdr->seq);
- m->m_ulp_mode = 0; /* for iSCSI */
+ struct sockbuf *so_rcv;
-#if VALIDATE_SEQ
- if (__predict_false(m->m_seq != tp->rcv_nxt)) {
- log(LOG_ERR,
- "%s: TID %u: Bad sequence number %u, expected %u\n",
- toep->tp_toedev->name, toep->tp_tid, m->m_seq,
- tp->rcv_nxt);
- m_freem(m);
- inp_wunlock(tp->t_inpcb);
- return;
- }
-#endif
+ /* Advance over CPL */
m_adj(m, sizeof(*hdr));
-#ifdef URGENT_DATA_SUPPORTED
- /*
- * We don't handle urgent data yet
- */
- if (__predict_false(hdr->urg))
- handle_urg_ptr(so, tp->rcv_nxt + ntohs(hdr->urg));
- if (__predict_false(tp->urg_data == TCP_URG_NOTYET &&
- tp->urg_seq - tp->rcv_nxt < skb->len))
- tp->urg_data = TCP_URG_VALID | skb->data[tp->urg_seq -
- tp->rcv_nxt];
-#endif
- if (__predict_false(hdr->dack_mode != toep->tp_delack_mode)) {
- toep->tp_delack_mode = hdr->dack_mode;
- toep->tp_delack_seq = tp->rcv_nxt;
- }
- CTR6(KTR_TOM, "appending mbuf=%p pktlen=%d m_len=%d len=%d rcv_nxt=0x%x enqueued_bytes=%d",
- m, m->m_pkthdr.len, m->m_len, len, tp->rcv_nxt, toep->tp_enqueued_bytes);
-
- if (len < m->m_pkthdr.len)
- m->m_pkthdr.len = m->m_len = len;
-
- tp->rcv_nxt += m->m_pkthdr.len;
- tp->t_rcvtime = ticks;
- toep->tp_enqueued_bytes += m->m_pkthdr.len;
- CTR2(KTR_TOM,
- "new_rx_data: seq 0x%x len %u",
- m->m_seq, m->m_pkthdr.len);
- inp_wunlock(tp->t_inpcb);
- rcv = so_sockbuf_rcv(so);
- sockbuf_lock(rcv);
-#if 0
- if (sb_notify(rcv))
- DPRINTF("rx_data so=%p flags=0x%x len=%d\n", so, rcv->sb_flags, m->m_pkthdr.len);
-#endif
- SBAPPEND(rcv, m);
-
+ /* XXX: revisit. This comes from the T4 TOM */
+ if (__predict_false(inp == NULL)) {
+ /*
+ * do_pass_establish failed and must be attempting to abort the
+ * connection. Meanwhile, the T4 has sent us data for such a
+ * connection.
+ */
#ifdef notyet
- /*
- * We're giving too many credits to the card - but disable this check so we can keep on moving :-|
- *
- */
- KASSERT(rcv->sb_cc < (rcv->sb_mbmax << 1),
-
- ("so=%p, data contents exceed mbmax, sb_cc=%d sb_mbmax=%d",
- so, rcv->sb_cc, rcv->sb_mbmax));
+ KASSERT(toepcb_flag(toep, TPF_ABORT_SHUTDOWN),
+ ("%s: inp NULL and tid isn't being aborted", __func__));
#endif
-
-
- CTR2(KTR_TOM, "sb_cc=%d sb_mbcnt=%d",
- rcv->sb_cc, rcv->sb_mbcnt);
-
- state = so_state_get(so);
- if (__predict_true((state & SS_NOFDREF) == 0))
- so_sorwakeup_locked(so);
- else
- sockbuf_unlock(rcv);
-}
-
-/*
- * Handler for RX_DATA CPL messages.
- */
-static int
-do_rx_data(struct t3cdev *cdev, struct mbuf *m, void *ctx)
-{
- struct toepcb *toep = (struct toepcb *)ctx;
-
- DPRINTF("rx_data len=%d\n", m->m_pkthdr.len);
-
- new_rx_data(toep, m);
-
- return (0);
-}
-
-static void
-new_rx_data_ddp(struct toepcb *toep, struct mbuf *m)
-{
- struct tcpcb *tp;
- struct ddp_state *q;
- struct ddp_buf_state *bsp;
- struct cpl_rx_data_ddp *hdr;
- struct socket *so;
- unsigned int ddp_len, rcv_nxt, ddp_report, end_offset, buf_idx;
- int nomoredata = 0;
- unsigned int delack_mode;
- struct sockbuf *rcv;
-
- tp = toep->tp_tp;
- inp_wlock(tp->t_inpcb);
- so = inp_inpcbtosocket(tp->t_inpcb);
-
- if (__predict_false(so_no_receive(so))) {
-
- handle_excess_rx(toep, m);
- inp_wunlock(tp->t_inpcb);
- return;
+ m_freem(m);
+ return (0);
}
-
- q = &toep->tp_ddp_state;
- hdr = cplhdr(m);
- ddp_report = ntohl(hdr->u.ddp_report);
- buf_idx = (ddp_report >> S_DDP_BUF_IDX) & 1;
- bsp = &q->buf_state[buf_idx];
-
- CTR4(KTR_TOM,
- "new_rx_data_ddp: tp->rcv_nxt 0x%x cur_offset %u "
- "hdr seq 0x%x len %u",
- tp->rcv_nxt, bsp->cur_offset, ntohl(hdr->seq),
- ntohs(hdr->len));
- CTR3(KTR_TOM,
- "new_rx_data_ddp: offset %u ddp_report 0x%x buf_idx=%d",
- G_DDP_OFFSET(ddp_report), ddp_report, buf_idx);
-
- ddp_len = ntohs(hdr->len);
- rcv_nxt = ntohl(hdr->seq) + ddp_len;
- delack_mode = G_DDP_DACK_MODE(ddp_report);
- if (__predict_false(G_DDP_DACK_MODE(ddp_report) != toep->tp_delack_mode)) {
- toep->tp_delack_mode = delack_mode;
- toep->tp_delack_seq = tp->rcv_nxt;
+ INP_WLOCK(inp);
+ if (inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) {
+ CTR4(KTR_CXGB, "%s: tid %u, rx (%d bytes), inp_flags 0x%x",
+ __func__, tid, m->m_pkthdr.len, inp->inp_flags);
+ INP_WUNLOCK(inp);
+ m_freem(m);
+ return (0);
}
-
- m->m_seq = tp->rcv_nxt;
- tp->rcv_nxt = rcv_nxt;
-
- tp->t_rcvtime = ticks;
- /*
- * Store the length in m->m_len. We are changing the meaning of
- * m->m_len here, we need to be very careful that nothing from now on
- * interprets ->len of this packet the usual way.
- */
- m->m_len = m->m_pkthdr.len = rcv_nxt - m->m_seq;
- inp_wunlock(tp->t_inpcb);
- CTR3(KTR_TOM,
- "new_rx_data_ddp: m_len=%u rcv_next 0x%08x rcv_nxt_prev=0x%08x ",
- m->m_len, rcv_nxt, m->m_seq);
- /*
- * Figure out where the new data was placed in the buffer and store it
- * in when. Assumes the buffer offset starts at 0, consumer needs to
- * account for page pod's pg_offset.
- */
- end_offset = G_DDP_OFFSET(ddp_report) + ddp_len;
- m->m_cur_offset = end_offset - m->m_pkthdr.len;
- rcv = so_sockbuf_rcv(so);
- sockbuf_lock(rcv);
-
- m->m_ddp_gl = (unsigned char *)bsp->gl;
- m->m_flags |= M_DDP;
- bsp->cur_offset = end_offset;
- toep->tp_enqueued_bytes += m->m_pkthdr.len;
+ if (__predict_false(hdr->dack_mode != toep->tp_delack_mode))
+ toep->tp_delack_mode = hdr->dack_mode;
- /*
- * Length is only meaningful for kbuf
- */
- if (!(bsp->flags & DDP_BF_NOCOPY))
- KASSERT(m->m_len <= bsp->gl->dgl_length,
- ("length received exceeds ddp pages: len=%d dgl_length=%d",
- m->m_len, bsp->gl->dgl_length));
-
- KASSERT(m->m_len > 0, ("%s m_len=%d", __FUNCTION__, m->m_len));
- KASSERT(m->m_next == NULL, ("m_len=%p", m->m_next));
- /*
- * Bit 0 of flags stores whether the DDP buffer is completed.
- * Note that other parts of the code depend on this being in bit 0.
- */
- if ((bsp->flags & DDP_BF_NOINVAL) && end_offset != bsp->gl->dgl_length) {
- panic("spurious ddp completion");
- } else {
- m->m_ddp_flags = !!(ddp_report & F_DDP_BUF_COMPLETE);
- if (m->m_ddp_flags && !(bsp->flags & DDP_BF_NOFLIP))
- q->cur_buf ^= 1; /* flip buffers */
- }
+ tp = intotcpcb(inp);
- if (bsp->flags & DDP_BF_NOCOPY) {
- m->m_ddp_flags |= (bsp->flags & DDP_BF_NOCOPY);
- bsp->flags &= ~DDP_BF_NOCOPY;
+#ifdef INVARIANTS
+ if (__predict_false(tp->rcv_nxt != be32toh(hdr->seq))) {
+ log(LOG_ERR,
+ "%s: unexpected seq# %x for TID %u, rcv_nxt %x\n",
+ __func__, be32toh(hdr->seq), toep->tp_tid, tp->rcv_nxt);
}
-
- if (ddp_report & F_DDP_PSH)
- m->m_ddp_flags |= DDP_BF_PSH;
- if (nomoredata)
- m->m_ddp_flags |= DDP_BF_NODATA;
-
-#ifdef notyet
- skb_reset_transport_header(skb);
- tcp_hdr(skb)->fin = 0; /* changes original hdr->ddp_report */
#endif
- SBAPPEND(rcv, m);
-
- if ((so_state_get(so) & SS_NOFDREF) == 0 && ((ddp_report & F_DDP_PSH) ||
- (((m->m_ddp_flags & (DDP_BF_NOCOPY|1)) == (DDP_BF_NOCOPY|1))
- || !(m->m_ddp_flags & DDP_BF_NOCOPY))))
- so_sorwakeup_locked(so);
- else
- sockbuf_unlock(rcv);
-}
-
-#define DDP_ERR (F_DDP_PPOD_MISMATCH | F_DDP_LLIMIT_ERR | F_DDP_ULIMIT_ERR |\
- F_DDP_PPOD_PARITY_ERR | F_DDP_PADDING_ERR | F_DDP_OFFSET_ERR |\
- F_DDP_INVALID_TAG | F_DDP_COLOR_ERR | F_DDP_TID_MISMATCH |\
- F_DDP_INVALID_PPOD)
-
-/*
- * Handler for RX_DATA_DDP CPL messages.
- */
-static int
-do_rx_data_ddp(struct t3cdev *cdev, struct mbuf *m, void *ctx)
-{
- struct toepcb *toep = ctx;
- const struct cpl_rx_data_ddp *hdr = cplhdr(m);
-
- VALIDATE_SOCK(so);
-
- if (__predict_false(ntohl(hdr->ddpvld_status) & DDP_ERR)) {
- log(LOG_ERR, "RX_DATA_DDP for TID %u reported error 0x%x\n",
- GET_TID(hdr), G_DDP_VALID(ntohl(hdr->ddpvld_status)));
- return (CPL_RET_BUF_DONE);
- }
-#if 0
- skb->h.th = tcphdr_skb->h.th;
-#endif
- new_rx_data_ddp(toep, m);
- return (0);
-}
+ tp->rcv_nxt += m->m_pkthdr.len;
+ KASSERT(tp->rcv_wnd >= m->m_pkthdr.len,
+ ("%s: negative window size", __func__));
+ tp->rcv_wnd -= m->m_pkthdr.len;
+ tp->t_rcvtime = ticks;
-static void
-process_ddp_complete(struct toepcb *toep, struct mbuf *m)
-{
- struct tcpcb *tp = toep->tp_tp;
- struct socket *so;
- struct ddp_state *q;
- struct ddp_buf_state *bsp;
- struct cpl_rx_ddp_complete *hdr;
- unsigned int ddp_report, buf_idx, when, delack_mode;
- int nomoredata = 0;
- struct sockbuf *rcv;
-
- inp_wlock(tp->t_inpcb);
- so = inp_inpcbtosocket(tp->t_inpcb);
+ so = inp->inp_socket;
+ so_rcv = &so->so_rcv;
+ SOCKBUF_LOCK(so_rcv);
- if (__predict_false(so_no_receive(so))) {
- struct inpcb *inp = so_sotoinpcb(so);
+ if (__predict_false(so_rcv->sb_state & SBS_CANTRCVMORE)) {
+ CTR3(KTR_CXGB, "%s: tid %u, excess rx (%d bytes)",
+ __func__, tid, m->m_pkthdr.len);
+ SOCKBUF_UNLOCK(so_rcv);
+ INP_WUNLOCK(inp);
- handle_excess_rx(toep, m);
- inp_wunlock(inp);
- return;
- }
- q = &toep->tp_ddp_state;
- hdr = cplhdr(m);
- ddp_report = ntohl(hdr->ddp_report);
- buf_idx = (ddp_report >> S_DDP_BUF_IDX) & 1;
- m->m_pkthdr.csum_data = tp->rcv_nxt;
-
- rcv = so_sockbuf_rcv(so);
- sockbuf_lock(rcv);
-
- bsp = &q->buf_state[buf_idx];
- when = bsp->cur_offset;
- m->m_len = m->m_pkthdr.len = G_DDP_OFFSET(ddp_report) - when;
- tp->rcv_nxt += m->m_len;
- tp->t_rcvtime = ticks;
+ INP_INFO_WLOCK(&V_tcbinfo);
+ INP_WLOCK(inp);
+ tp = tcp_drop(tp, ECONNRESET);
+ if (tp)
+ INP_WUNLOCK(inp);
+ INP_INFO_WUNLOCK(&V_tcbinfo);
- delack_mode = G_DDP_DACK_MODE(ddp_report);
- if (__predict_false(G_DDP_DACK_MODE(ddp_report) != toep->tp_delack_mode)) {
- toep->tp_delack_mode = delack_mode;
- toep->tp_delack_seq = tp->rcv_nxt;
+ m_freem(m);
+ return (0);
}
-#ifdef notyet
- skb_reset_transport_header(skb);
- tcp_hdr(skb)->fin = 0; /* changes valid memory past CPL */
-#endif
- inp_wunlock(tp->t_inpcb);
- KASSERT(m->m_len >= 0, ("%s m_len=%d", __FUNCTION__, m->m_len));
- CTR5(KTR_TOM,
- "process_ddp_complete: tp->rcv_nxt 0x%x cur_offset %u "
- "ddp_report 0x%x offset %u, len %u",
- tp->rcv_nxt, bsp->cur_offset, ddp_report,
- G_DDP_OFFSET(ddp_report), m->m_len);
+ /* receive buffer autosize */
+ if (so_rcv->sb_flags & SB_AUTOSIZE &&
+ V_tcp_do_autorcvbuf &&
+ so_rcv->sb_hiwat < V_tcp_autorcvbuf_max &&
+ (m->m_pkthdr.len > (sbspace(so_rcv) / 8 * 7) || tp->rcv_wnd < 32768)) {
+ unsigned int hiwat = so_rcv->sb_hiwat;
+ unsigned int newsize = min(hiwat + V_tcp_autorcvbuf_inc,
+ V_tcp_autorcvbuf_max);
- m->m_cur_offset = bsp->cur_offset;
- bsp->cur_offset += m->m_len;
-
- if (!(bsp->flags & DDP_BF_NOFLIP)) {
- q->cur_buf ^= 1; /* flip buffers */
- if (G_DDP_OFFSET(ddp_report) < q->kbuf[0]->dgl_length)
- nomoredata=1;
+ if (!sbreserve_locked(so_rcv, newsize, so, NULL))
+ so_rcv->sb_flags &= ~SB_AUTOSIZE;
+ else
+ toep->tp_rx_credits += newsize - hiwat;
}
-
- CTR4(KTR_TOM,
- "process_ddp_complete: tp->rcv_nxt 0x%x cur_offset %u "
- "ddp_report %u offset %u",
- tp->rcv_nxt, bsp->cur_offset, ddp_report,
- G_DDP_OFFSET(ddp_report));
-
- m->m_ddp_gl = (unsigned char *)bsp->gl;
- m->m_flags |= M_DDP;
- m->m_ddp_flags = (bsp->flags & DDP_BF_NOCOPY) | 1;
- if (bsp->flags & DDP_BF_NOCOPY)
- bsp->flags &= ~DDP_BF_NOCOPY;
- if (nomoredata)
- m->m_ddp_flags |= DDP_BF_NODATA;
-
- SBAPPEND(rcv, m);
- if ((so_state_get(so) & SS_NOFDREF) == 0)
- so_sorwakeup_locked(so);
- else
- sockbuf_unlock(rcv);
-}
-/*
- * Handler for RX_DDP_COMPLETE CPL messages.
- */
-static int
-do_rx_ddp_complete(struct t3cdev *cdev, struct mbuf *m, void *ctx)
-{
- struct toepcb *toep = ctx;
+ toep->tp_enqueued += m->m_pkthdr.len;
+ sbappendstream_locked(so_rcv, m);
+ sorwakeup_locked(so);
+ SOCKBUF_UNLOCK_ASSERT(so_rcv);
- VALIDATE_SOCK(so);
-#if 0
- skb->h.th = tcphdr_skb->h.th;
-#endif
- process_ddp_complete(toep, m);
+ INP_WUNLOCK(inp);
return (0);
}
/*
- * Move a socket to TIME_WAIT state. We need to make some adjustments to the
- * socket state before calling tcp_time_wait to comply with its expectations.
- */
-static void
-enter_timewait(struct tcpcb *tp)
-{
- /*
- * Bump rcv_nxt for the peer FIN. We don't do this at the time we
- * process peer_close because we don't want to carry the peer FIN in
- * the socket's receive queue and if we increment rcv_nxt without
- * having the FIN in the receive queue we'll confuse facilities such
- * as SIOCINQ.
- */
- inp_wlock(tp->t_inpcb);
- tp->rcv_nxt++;
-
- tp->ts_recent_age = 0; /* defeat recycling */
- tp->t_srtt = 0; /* defeat tcp_update_metrics */
- inp_wunlock(tp->t_inpcb);
- tcp_offload_twstart(tp);
-}
-
-/*
- * For TCP DDP a PEER_CLOSE may also be an implicit RX_DDP_COMPLETE. This
- * function deals with the data that may be reported along with the FIN.
- * Returns -1 if no further processing of the PEER_CLOSE is needed, >= 0 to
- * perform normal FIN-related processing. In the latter case 1 indicates that
- * there was an implicit RX_DDP_COMPLETE and the skb should not be freed, 0 the
- * skb can be freed.
+ * Handler for PEER_CLOSE CPL messages.
*/
static int
-handle_peer_close_data(struct socket *so, struct mbuf *m)
-{
- struct tcpcb *tp = so_sototcpcb(so);
- struct toepcb *toep = tp->t_toe;
- struct ddp_state *q;
- struct ddp_buf_state *bsp;
- struct cpl_peer_close *req = cplhdr(m);
- unsigned int rcv_nxt = ntohl(req->rcv_nxt) - 1; /* exclude FIN */
- struct sockbuf *rcv;
-
- if (tp->rcv_nxt == rcv_nxt) /* no data */
- return (0);
+do_peer_close(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
+{
+ struct adapter *sc = qs->adap;
+ struct tom_data *td = sc->tom_softc;
+ const struct cpl_peer_close *hdr = mtod(m, void *);
+ unsigned int tid = GET_TID(hdr);
+ struct toepcb *toep = lookup_tid(&td->tid_maps, tid);
+ struct inpcb *inp = toep->tp_inp;
+ struct tcpcb *tp;
+ struct socket *so;
- CTR0(KTR_TOM, "handle_peer_close_data");
- if (__predict_false(so_no_receive(so))) {
- handle_excess_rx(toep, m);
+ INP_INFO_WLOCK(&V_tcbinfo);
+ INP_WLOCK(inp);
+ tp = intotcpcb(inp);
- /*
- * Although we discard the data we want to process the FIN so
- * that PEER_CLOSE + data behaves the same as RX_DATA_DDP +
- * PEER_CLOSE without data. In particular this PEER_CLOSE
- * may be what will close the connection. We return 1 because
- * handle_excess_rx() already freed the packet.
- */
- return (1);
- }
+ CTR5(KTR_CXGB, "%s: tid %u (%s), toep_flags 0x%x, inp %p", __func__,
+ tid, tp ? tcpstates[tp->t_state] : "no tp" , toep->tp_flags, inp);
- inp_lock_assert(tp->t_inpcb);
- q = &toep->tp_ddp_state;
- rcv = so_sockbuf_rcv(so);
- sockbuf_lock(rcv);
-
- bsp = &q->buf_state[q->cur_buf];
- m->m_len = m->m_pkthdr.len = rcv_nxt - tp->rcv_nxt;
- KASSERT(m->m_len > 0, ("%s m_len=%d", __FUNCTION__, m->m_len));
- m->m_ddp_gl = (unsigned char *)bsp->gl;
- m->m_flags |= M_DDP;
- m->m_cur_offset = bsp->cur_offset;
- m->m_ddp_flags =
- DDP_BF_PSH | (bsp->flags & DDP_BF_NOCOPY) | 1;
- m->m_seq = tp->rcv_nxt;
- tp->rcv_nxt = rcv_nxt;
- bsp->cur_offset += m->m_pkthdr.len;
- if (!(bsp->flags & DDP_BF_NOFLIP))
- q->cur_buf ^= 1;
-#ifdef notyet
- skb_reset_transport_header(skb);
- tcp_hdr(skb)->fin = 0; /* changes valid memory past CPL */
-#endif
- tp->t_rcvtime = ticks;
- SBAPPEND(rcv, m);
- if (__predict_true((so_state_get(so) & SS_NOFDREF) == 0))
- so_sorwakeup_locked(so);
- else
- sockbuf_unlock(rcv);
+ if (toep->tp_flags & TP_ABORT_RPL_PENDING)
+ goto done;
- return (1);
-}
+ so = inp_inpcbtosocket(inp);
-/*
- * Handle a peer FIN.
- */
-static void
-do_peer_fin(struct toepcb *toep, struct mbuf *m)
-{
- struct socket *so;
- struct tcpcb *tp = toep->tp_tp;
- int keep, action;
-
- action = keep = 0;
- CTR1(KTR_TOM, "do_peer_fin state=%d", tp->t_state);
- if (!is_t3a(toep->tp_toedev) && (toep->tp_flags & TP_ABORT_RPL_PENDING)) {
- printf("abort_pending set\n");
-
- goto out;
- }
- inp_wlock(tp->t_inpcb);
- so = inp_inpcbtosocket(toep->tp_tp->t_inpcb);
- if (toep->tp_ulp_mode == ULP_MODE_TCPDDP) {
- keep = handle_peer_close_data(so, m);
- if (keep < 0) {
- inp_wunlock(tp->t_inpcb);
- return;
- }
- }
- if (TCPS_HAVERCVDFIN(tp->t_state) == 0) {
- CTR1(KTR_TOM,
- "waking up waiters for cantrcvmore on %p ", so);
- socantrcvmore(so);
+ socantrcvmore(so);
+ tp->rcv_nxt++;
- /*
- * If connection is half-synchronized
- * (ie NEEDSYN flag on) then delay ACK,
- * so it may be piggybacked when SYN is sent.
- * Otherwise, since we received a FIN then no
- * more input can be expected, send ACK now.
- */
- if (tp->t_flags & TF_NEEDSYN)
- tp->t_flags |= TF_DELACK;
- else
- tp->t_flags |= TF_ACKNOW;
- tp->rcv_nxt++;
- }
-
switch (tp->t_state) {
case TCPS_SYN_RECEIVED:
- tp->t_starttime = ticks;
- /* FALLTHROUGH */
+ tp->t_starttime = ticks;
+ /* FALLTHROUGH */
case TCPS_ESTABLISHED:
tp->t_state = TCPS_CLOSE_WAIT;
break;
@@ -2561,228 +1251,134 @@ do_peer_fin(struct toepcb *toep, struct mbuf *m)
tp->t_state = TCPS_CLOSING;
break;
case TCPS_FIN_WAIT_2:
- /*
- * If we've sent an abort_req we must have sent it too late,
- * HW will send us a reply telling us so, and this peer_close
- * is really the last message for this connection and needs to
- * be treated as an abort_rpl, i.e., transition the connection
- * to TCP_CLOSE (note that the host stack does this at the
- * time of generating the RST but we must wait for HW).
- * Otherwise we enter TIME_WAIT.
- */
- t3_release_offload_resources(toep);
- if (toep->tp_flags & TP_ABORT_RPL_PENDING) {
- action = TCP_CLOSE;
- } else {
- action = TCP_TIMEWAIT;
- }
- break;
+ tcp_twstart(tp);
+ INP_UNLOCK_ASSERT(inp); /* safe, we have a ref on the inp */
+ INP_INFO_WUNLOCK(&V_tcbinfo);
+
+ INP_WLOCK(inp);
+ toepcb_release(toep); /* no more CPLs expected */
+
+ m_freem(m);
+ return (0);
default:
- log(LOG_ERR,
- "%s: TID %u received PEER_CLOSE in bad state %d\n",
- toep->tp_toedev->tod_name, toep->tp_tid, tp->t_state);
- }
- inp_wunlock(tp->t_inpcb);
-
- if (action == TCP_TIMEWAIT) {
- enter_timewait(tp);
- } else if (action == TCP_DROP) {
- tcp_offload_drop(tp, 0);
- } else if (action == TCP_CLOSE) {
- tcp_offload_close(tp);
+ log(LOG_ERR, "%s: TID %u received PEER_CLOSE in bad state %d\n",
+ __func__, toep->tp_tid, tp->t_state);
}
-#ifdef notyet
- /* Do not send POLL_HUP for half duplex close. */
- if ((sk->sk_shutdown & SEND_SHUTDOWN) ||
- sk->sk_state == TCP_CLOSE)
- sk_wake_async(so, 1, POLL_HUP);
- else
- sk_wake_async(so, 1, POLL_IN);
-#endif
+done:
+ INP_WUNLOCK(inp);
+ INP_INFO_WUNLOCK(&V_tcbinfo);
-out:
- if (!keep)
- m_free(m);
+ m_freem(m);
+ return (0);
}
/*
- * Handler for PEER_CLOSE CPL messages.
+ * Handler for CLOSE_CON_RPL CPL messages. peer ACK to our FIN received.
*/
static int
-do_peer_close(struct t3cdev *cdev, struct mbuf *m, void *ctx)
-{
- struct toepcb *toep = (struct toepcb *)ctx;
+do_close_con_rpl(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
+{
+ struct adapter *sc = qs->adap;
+ struct tom_data *td = sc->tom_softc;
+ const struct cpl_close_con_rpl *rpl = mtod(m, void *);
+ unsigned int tid = GET_TID(rpl);
+ struct toepcb *toep = lookup_tid(&td->tid_maps, tid);
+ struct inpcb *inp = toep->tp_inp;
+ struct tcpcb *tp;
+ struct socket *so;
- VALIDATE_SOCK(so);
+ INP_INFO_WLOCK(&V_tcbinfo);
+ INP_WLOCK(inp);
+ tp = intotcpcb(inp);
- do_peer_fin(toep, m);
- return (0);
-}
+ CTR4(KTR_CXGB, "%s: tid %u (%s), toep_flags 0x%x", __func__, tid,
+ tp ? tcpstates[tp->t_state] : "no tp", toep->tp_flags);
-static void
-process_close_con_rpl(struct toepcb *toep, struct mbuf *m)
-{
- struct cpl_close_con_rpl *rpl = cplhdr(m);
- struct tcpcb *tp = toep->tp_tp;
- struct socket *so;
- int action = 0;
- struct sockbuf *rcv;
-
- inp_wlock(tp->t_inpcb);
- so = inp_inpcbtosocket(tp->t_inpcb);
-
- tp->snd_una = ntohl(rpl->snd_nxt) - 1; /* exclude FIN */
+ if ((toep->tp_flags & TP_ABORT_RPL_PENDING))
+ goto done;
- if (!is_t3a(toep->tp_toedev) && (toep->tp_flags & TP_ABORT_RPL_PENDING)) {
- inp_wunlock(tp->t_inpcb);
- goto out;
- }
-
- CTR3(KTR_TOM, "process_close_con_rpl(%p) state=%d dead=%d", toep,
- tp->t_state, !!(so_state_get(so) & SS_NOFDREF));
+ so = inp_inpcbtosocket(inp);
+ tp->snd_una = ntohl(rpl->snd_nxt) - 1; /* exclude FIN */
switch (tp->t_state) {
- case TCPS_CLOSING: /* see FIN_WAIT2 case in do_peer_fin */
- t3_release_offload_resources(toep);
- if (toep->tp_flags & TP_ABORT_RPL_PENDING) {
- action = TCP_CLOSE;
+ case TCPS_CLOSING:
+ tcp_twstart(tp);
+release:
+ INP_UNLOCK_ASSERT(inp); /* safe, we have a ref on the inp */
+ INP_INFO_WUNLOCK(&V_tcbinfo);
- } else {
- action = TCP_TIMEWAIT;
- }
- break;
+ INP_WLOCK(inp);
+ toepcb_release(toep); /* no more CPLs expected */
+
+ m_freem(m);
+ return (0);
case TCPS_LAST_ACK:
- /*
- * In this state we don't care about pending abort_rpl.
- * If we've sent abort_req it was post-close and was sent too
- * late, this close_con_rpl is the actual last message.
- */
- t3_release_offload_resources(toep);
- action = TCP_CLOSE;
- break;
+ if (tcp_close(tp))
+ INP_WUNLOCK(inp);
+ goto release;
+
case TCPS_FIN_WAIT_1:
- /*
- * If we can't receive any more
- * data, then closing user can proceed.
- * Starting the timer is contrary to the
- * specification, but if we don't get a FIN
- * we'll hang forever.
- *
- * XXXjl:
- * we should release the tp also, and use a
- * compressed state.
- */
- if (so)
- rcv = so_sockbuf_rcv(so);
- else
- break;
-
- if (rcv->sb_state & SBS_CANTRCVMORE) {
- int timeout;
-
- if (so)
- soisdisconnected(so);
- timeout = (tcp_fast_finwait2_recycle) ?
- tcp_finwait2_timeout : tcp_maxidle;
- tcp_timer_activate(tp, TT_2MSL, timeout);
- }
+ if (so->so_rcv.sb_state & SBS_CANTRCVMORE)
+ soisdisconnected(so);
tp->t_state = TCPS_FIN_WAIT_2;
- if ((so_options_get(so) & SO_LINGER) && so_linger_get(so) == 0 &&
- (toep->tp_flags & TP_ABORT_SHUTDOWN) == 0) {
- action = TCP_DROP;
- }
-
break;
default:
log(LOG_ERR,
- "%s: TID %u received CLOSE_CON_RPL in bad state %d\n",
- toep->tp_toedev->tod_name, toep->tp_tid,
- tp->t_state);
+ "%s: TID %u received CLOSE_CON_RPL in bad state %d\n",
+ __func__, toep->tp_tid, tp->t_state);
}
- inp_wunlock(tp->t_inpcb);
+done:
+ INP_WUNLOCK(inp);
+ INP_INFO_WUNLOCK(&V_tcbinfo);
- if (action == TCP_TIMEWAIT) {
- enter_timewait(tp);
- } else if (action == TCP_DROP) {
- tcp_offload_drop(tp, 0);
- } else if (action == TCP_CLOSE) {
- tcp_offload_close(tp);
- }
-out:
m_freem(m);
+ return (0);
}
-/*
- * Handler for CLOSE_CON_RPL CPL messages.
- */
static int
-do_close_con_rpl(struct t3cdev *cdev, struct mbuf *m,
- void *ctx)
+do_smt_write_rpl(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
{
- struct toepcb *toep = (struct toepcb *)ctx;
+ struct cpl_smt_write_rpl *rpl = mtod(m, void *);
- process_close_con_rpl(toep, m);
+ if (rpl->status != CPL_ERR_NONE) {
+ log(LOG_ERR,
+ "Unexpected SMT_WRITE_RPL status %u for entry %u\n",
+ rpl->status, GET_TID(rpl));
+ }
+
+ m_freem(m);
return (0);
}
-/*
- * Process abort replies. We only process these messages if we anticipate
- * them as the coordination between SW and HW in this area is somewhat lacking
- * and sometimes we get ABORT_RPLs after we are done with the connection that
- * originated the ABORT_REQ.
- */
-static void
-process_abort_rpl(struct toepcb *toep, struct mbuf *m)
+static int
+do_set_tcb_rpl(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
{
- struct tcpcb *tp = toep->tp_tp;
- struct socket *so;
- int needclose = 0;
-
-#ifdef T3_TRACE
- T3_TRACE1(TIDTB(sk),
- "process_abort_rpl: GTS rpl pending %d",
- sock_flag(sk, ABORT_RPL_PENDING));
-#endif
-
- inp_wlock(tp->t_inpcb);
- so = inp_inpcbtosocket(tp->t_inpcb);
-
- if (toep->tp_flags & TP_ABORT_RPL_PENDING) {
- /*
- * XXX panic on tcpdrop
- */
- if (!(toep->tp_flags & TP_ABORT_RPL_RCVD) && !is_t3a(toep->tp_toedev))
- toep->tp_flags |= TP_ABORT_RPL_RCVD;
- else {
- toep->tp_flags &= ~(TP_ABORT_RPL_RCVD|TP_ABORT_RPL_PENDING);
- if (!(toep->tp_flags & TP_ABORT_REQ_RCVD) ||
- !is_t3a(toep->tp_toedev)) {
- if (toep->tp_flags & TP_ABORT_REQ_RCVD)
- panic("TP_ABORT_REQ_RCVD set");
- t3_release_offload_resources(toep);
- needclose = 1;
- }
- }
- }
- inp_wunlock(tp->t_inpcb);
+ struct cpl_set_tcb_rpl *rpl = mtod(m, void *);
- if (needclose)
- tcp_offload_close(tp);
+ if (rpl->status != CPL_ERR_NONE) {
+ log(LOG_ERR, "Unexpected SET_TCB_RPL status %u for tid %u\n",
+ rpl->status, GET_TID(rpl));
+ }
- m_free(m);
+ m_freem(m);
+ return (0);
}
/*
* Handle an ABORT_RPL_RSS CPL message.
*/
static int
-do_abort_rpl(struct t3cdev *cdev, struct mbuf *m, void *ctx)
+do_abort_rpl(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
{
- struct cpl_abort_rpl_rss *rpl = cplhdr(m);
- struct toepcb *toep;
-
+ struct adapter *sc = qs->adap;
+ struct tom_data *td = sc->tom_softc;
+ const struct cpl_abort_rpl_rss *rpl = mtod(m, void *);
+ unsigned int tid = GET_TID(rpl);
+ struct toepcb *toep = lookup_tid(&td->tid_maps, tid);
+ struct inpcb *inp;
+
/*
* Ignore replies to post-close aborts indicating that the abort was
* requested too late. These connections are terminated when we get
@@ -2790,99 +1386,54 @@ do_abort_rpl(struct t3cdev *cdev, struct mbuf *m, void *ctx)
* arrives the TID is either no longer used or it has been recycled.
*/
if (rpl->status == CPL_ERR_ABORT_FAILED) {
-discard:
- m_free(m);
+ m_freem(m);
return (0);
}
- toep = (struct toepcb *)ctx;
-
- /*
- * Sometimes we've already closed the socket, e.g., a post-close
- * abort races with ABORT_REQ_RSS, the latter frees the socket
- * expecting the ABORT_REQ will fail with CPL_ERR_ABORT_FAILED,
- * but FW turns the ABORT_REQ into a regular one and so we get
- * ABORT_RPL_RSS with status 0 and no socket. Only on T3A.
- */
- if (!toep)
- goto discard;
+ if (toep->tp_flags & TP_IS_A_SYNQ_ENTRY)
+ return (do_abort_rpl_synqe(qs, r, m));
- if (toep->tp_tp == NULL) {
- log(LOG_NOTICE, "removing tid for abort\n");
- cxgb_remove_tid(cdev, toep, toep->tp_tid);
- if (toep->tp_l2t)
- l2t_release(L2DATA(cdev), toep->tp_l2t);
+ CTR4(KTR_CXGB, "%s: tid %d, toep %p, status %d", __func__, tid, toep,
+ rpl->status);
- toepcb_release(toep);
- goto discard;
+ inp = toep->tp_inp;
+ INP_WLOCK(inp);
+
+ if (toep->tp_flags & TP_ABORT_RPL_PENDING) {
+ if (!(toep->tp_flags & TP_ABORT_RPL_RCVD)) {
+ toep->tp_flags |= TP_ABORT_RPL_RCVD;
+ INP_WUNLOCK(inp);
+ } else {
+ toep->tp_flags &= ~TP_ABORT_RPL_RCVD;
+ toep->tp_flags &= TP_ABORT_RPL_PENDING;
+ toepcb_release(toep); /* no more CPLs expected */
+ }
}
-
- log(LOG_NOTICE, "toep=%p\n", toep);
- log(LOG_NOTICE, "tp=%p\n", toep->tp_tp);
- toepcb_hold(toep);
- process_abort_rpl(toep, m);
- toepcb_release(toep);
+ m_freem(m);
return (0);
}
/*
- * Convert the status code of an ABORT_REQ into a FreeBSD error code. Also
- * indicate whether RST should be sent in response.
+ * Convert the status code of an ABORT_REQ into a FreeBSD error code.
*/
static int
-abort_status_to_errno(struct socket *so, int abort_reason, int *need_rst)
+abort_status_to_errno(struct tcpcb *tp, int abort_reason)
{
- struct tcpcb *tp = so_sototcpcb(so);
-
switch (abort_reason) {
case CPL_ERR_BAD_SYN:
-#if 0
- NET_INC_STATS_BH(LINUX_MIB_TCPABORTONSYN); // fall through
-#endif
case CPL_ERR_CONN_RESET:
- // XXX need to handle SYN_RECV due to crossed SYNs
return (tp->t_state == TCPS_CLOSE_WAIT ? EPIPE : ECONNRESET);
case CPL_ERR_XMIT_TIMEDOUT:
case CPL_ERR_PERSIST_TIMEDOUT:
case CPL_ERR_FINWAIT2_TIMEDOUT:
case CPL_ERR_KEEPALIVE_TIMEDOUT:
-#if 0
- NET_INC_STATS_BH(LINUX_MIB_TCPABORTONTIMEOUT);
-#endif
return (ETIMEDOUT);
default:
return (EIO);
}
}
-static inline void
-set_abort_rpl_wr(struct mbuf *m, unsigned int tid, int cmd)
-{
- struct cpl_abort_rpl *rpl = cplhdr(m);
-
- rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_RPL));
- rpl->wr.wr_lo = htonl(V_WR_TID(tid));
- m->m_len = m->m_pkthdr.len = sizeof(*rpl);
-
- OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_ABORT_RPL, tid));
- rpl->cmd = cmd;
-}
-
-static void
-send_deferred_abort_rpl(struct toedev *tdev, struct mbuf *m)
-{
- struct mbuf *reply_mbuf;
- struct cpl_abort_req_rss *req = cplhdr(m);
-
- reply_mbuf = m_gethdr_nofail(sizeof(struct cpl_abort_rpl));
- m_set_priority(m, CPL_PRIORITY_DATA);
- m->m_len = m->m_pkthdr.len = sizeof(struct cpl_abort_rpl);
- set_abort_rpl_wr(reply_mbuf, GET_TID(req), req->status);
- cxgb_ofld_send(TOM_DATA(tdev)->cdev, reply_mbuf);
- m_free(m);
-}
-
/*
* Returns whether an ABORT_REQ_RSS message is a negative advice.
*/
@@ -2893,848 +1444,175 @@ is_neg_adv_abort(unsigned int status)
status == CPL_ERR_PERSIST_NEG_ADVICE;
}
-static void
-send_abort_rpl(struct mbuf *m, struct toedev *tdev, int rst_status)
-{
- struct mbuf *reply_mbuf;
- struct cpl_abort_req_rss *req = cplhdr(m);
-
- reply_mbuf = m_gethdr(M_NOWAIT, MT_DATA);
-
- if (!reply_mbuf) {
- /* Defer the reply. Stick rst_status into req->cmd. */
- req->status = rst_status;
- t3_defer_reply(m, tdev, send_deferred_abort_rpl);
- return;
- }
-
- m_set_priority(reply_mbuf, CPL_PRIORITY_DATA);
- set_abort_rpl_wr(reply_mbuf, GET_TID(req), rst_status);
- m_free(m);
-
- /*
- * XXX need to sync with ARP as for SYN_RECV connections we can send
- * these messages while ARP is pending. For other connection states
- * it's not a problem.
- */
- cxgb_ofld_send(TOM_DATA(tdev)->cdev, reply_mbuf);
-}
-
-#ifdef notyet
-static void
-cleanup_syn_rcv_conn(struct socket *child, struct socket *parent)
+void
+send_abort_rpl(struct toedev *tod, int tid, int qset)
{
- CXGB_UNIMPLEMENTED();
-#ifdef notyet
- struct request_sock *req = child->sk_user_data;
-
- inet_csk_reqsk_queue_removed(parent, req);
- synq_remove(tcp_sk(child));
- __reqsk_free(req);
- child->sk_user_data = NULL;
-#endif
-}
+ struct mbuf *reply;
+ struct cpl_abort_rpl *rpl;
+ struct adapter *sc = tod->tod_softc;
+ reply = M_GETHDR_OFLD(qset, CPL_PRIORITY_DATA, rpl);
+ if (!reply)
+ CXGB_UNIMPLEMENTED();
-/*
- * Performs the actual work to abort a SYN_RECV connection.
- */
-static void
-do_abort_syn_rcv(struct socket *child, struct socket *parent)
-{
- struct tcpcb *parenttp = so_sototcpcb(parent);
- struct tcpcb *childtp = so_sototcpcb(child);
+ rpl->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_RPL));
+ rpl->wr.wrh_lo = htonl(V_WR_TID(tid));
+ OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_ABORT_RPL, tid));
+ rpl->cmd = CPL_ABORT_NO_RST;
- /*
- * If the server is still open we clean up the child connection,
- * otherwise the server already did the clean up as it was purging
- * its SYN queue and the skb was just sitting in its backlog.
- */
- if (__predict_false(parenttp->t_state == TCPS_LISTEN)) {
- cleanup_syn_rcv_conn(child, parent);
- inp_wlock(childtp->t_inpcb);
- t3_release_offload_resources(childtp->t_toe);
- inp_wunlock(childtp->t_inpcb);
- tcp_offload_close(childtp);
- }
+ t3_offload_tx(sc, reply);
}
-#endif
/*
- * Handle abort requests for a SYN_RECV connection. These need extra work
- * because the socket is on its parent's SYN queue.
+ * Handle an ABORT_REQ_RSS CPL message. If we're waiting for an ABORT_RPL we
+ * ignore this request except that we need to reply to it.
*/
static int
-abort_syn_rcv(struct socket *so, struct mbuf *m)
+do_abort_req(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
{
- CXGB_UNIMPLEMENTED();
-#ifdef notyet
- struct socket *parent;
- struct toedev *tdev = toep->tp_toedev;
- struct t3cdev *cdev = TOM_DATA(tdev)->cdev;
- struct socket *oreq = so->so_incomp;
- struct t3c_tid_entry *t3c_stid;
- struct tid_info *t;
-
- if (!oreq)
- return -1; /* somehow we are not on the SYN queue */
-
- t = &(T3C_DATA(cdev))->tid_maps;
- t3c_stid = lookup_stid(t, oreq->ts_recent);
- parent = ((struct listen_ctx *)t3c_stid->ctx)->lso;
-
- so_lock(parent);
- do_abort_syn_rcv(so, parent);
- send_abort_rpl(m, tdev, CPL_ABORT_NO_RST);
- so_unlock(parent);
-#endif
- return (0);
-}
-
-/*
- * Process abort requests. If we are waiting for an ABORT_RPL we ignore this
- * request except that we need to reply to it.
- */
-static void
-process_abort_req(struct toepcb *toep, struct mbuf *m, struct toedev *tdev)
-{
- int rst_status = CPL_ABORT_NO_RST;
- const struct cpl_abort_req_rss *req = cplhdr(m);
- struct tcpcb *tp = toep->tp_tp;
+ struct adapter *sc = qs->adap;
+ struct tom_data *td = sc->tom_softc;
+ struct toedev *tod = &td->tod;
+ const struct cpl_abort_req_rss *req = mtod(m, void *);
+ unsigned int tid = GET_TID(req);
+ struct toepcb *toep = lookup_tid(&td->tid_maps, tid);
+ struct inpcb *inp;
+ struct tcpcb *tp;
struct socket *so;
- int needclose = 0;
-
- inp_wlock(tp->t_inpcb);
- so = inp_inpcbtosocket(toep->tp_tp->t_inpcb);
- if ((toep->tp_flags & TP_ABORT_REQ_RCVD) == 0) {
- toep->tp_flags |= (TP_ABORT_REQ_RCVD|TP_ABORT_SHUTDOWN);
- m_free(m);
- goto skip;
- }
-
- toep->tp_flags &= ~TP_ABORT_REQ_RCVD;
- /*
- * Three cases to consider:
- * a) We haven't sent an abort_req; close the connection.
- * b) We have sent a post-close abort_req that will get to TP too late
- * and will generate a CPL_ERR_ABORT_FAILED reply. The reply will
- * be ignored and the connection should be closed now.
- * c) We have sent a regular abort_req that will get to TP too late.
- * That will generate an abort_rpl with status 0, wait for it.
- */
- if (((toep->tp_flags & TP_ABORT_RPL_PENDING) == 0) ||
- (is_t3a(toep->tp_toedev) && (toep->tp_flags & TP_CLOSE_CON_REQUESTED))) {
- int error;
-
- error = abort_status_to_errno(so, req->status,
- &rst_status);
- so_error_set(so, error);
-
- if (__predict_true((so_state_get(so) & SS_NOFDREF) == 0))
- so_sorwakeup(so);
- /*
- * SYN_RECV needs special processing. If abort_syn_rcv()
- * returns 0 is has taken care of the abort.
- */
- if ((tp->t_state == TCPS_SYN_RECEIVED) && !abort_syn_rcv(so, m))
- goto skip;
-
- t3_release_offload_resources(toep);
- needclose = 1;
- }
- inp_wunlock(tp->t_inpcb);
-
- if (needclose)
- tcp_offload_close(tp);
-
- send_abort_rpl(m, tdev, rst_status);
- return;
-skip:
- inp_wunlock(tp->t_inpcb);
-}
+ int qset = toep->tp_qset;
-/*
- * Handle an ABORT_REQ_RSS CPL message.
- */
-static int
-do_abort_req(struct t3cdev *cdev, struct mbuf *m, void *ctx)
-{
- const struct cpl_abort_req_rss *req = cplhdr(m);
- struct toepcb *toep = (struct toepcb *)ctx;
-
if (is_neg_adv_abort(req->status)) {
- m_free(m);
+ CTR4(KTR_CXGB, "%s: negative advice %d for tid %u (%x)",
+ __func__, req->status, tid, toep->tp_flags);
+ m_freem(m);
return (0);
}
- log(LOG_NOTICE, "aborting tid=%d\n", toep->tp_tid);
-
- if ((toep->tp_flags & (TP_SYN_RCVD|TP_ABORT_REQ_RCVD)) == TP_SYN_RCVD) {
- cxgb_remove_tid(cdev, toep, toep->tp_tid);
- toep->tp_flags |= TP_ABORT_REQ_RCVD;
-
- send_abort_rpl(m, toep->tp_toedev, CPL_ABORT_NO_RST);
- if (toep->tp_l2t)
- l2t_release(L2DATA(cdev), toep->tp_l2t);
+ if (toep->tp_flags & TP_IS_A_SYNQ_ENTRY)
+ return (do_abort_req_synqe(qs, r, m));
- /*
- * Unhook
- */
- toep->tp_tp->t_toe = NULL;
- toep->tp_tp->t_flags &= ~TF_TOE;
- toep->tp_tp = NULL;
- /*
- * XXX need to call syncache_chkrst - but we don't
- * have a way of doing that yet
- */
- toepcb_release(toep);
- log(LOG_ERR, "abort for unestablished connection :-(\n");
- return (0);
- }
- if (toep->tp_tp == NULL) {
- log(LOG_NOTICE, "disconnected toepcb\n");
- /* should be freed momentarily */
- return (0);
- }
+ inp = toep->tp_inp;
+ INP_INFO_WLOCK(&V_tcbinfo); /* for tcp_close */
+ INP_WLOCK(inp);
+ tp = intotcpcb(inp);
+ so = inp->inp_socket;
- toepcb_hold(toep);
- process_abort_req(toep, m, toep->tp_toedev);
- toepcb_release(toep);
- return (0);
-}
-#ifdef notyet
-static void
-pass_open_abort(struct socket *child, struct socket *parent, struct mbuf *m)
-{
- struct toedev *tdev = TOE_DEV(parent);
+ CTR6(KTR_CXGB, "%s: tid %u (%s), toep %p (%x), status %d",
+ __func__, tid, tcpstates[tp->t_state], toep, toep->tp_flags,
+ req->status);
- do_abort_syn_rcv(child, parent);
- if (tdev->tod_ttid == TOE_ID_CHELSIO_T3) {
- struct cpl_pass_accept_rpl *rpl = cplhdr(m);
+ if (!(toep->tp_flags & TP_ABORT_REQ_RCVD)) {
+ toep->tp_flags |= TP_ABORT_REQ_RCVD;
+ toep->tp_flags |= TP_ABORT_SHUTDOWN;
+ INP_WUNLOCK(inp);
+ INP_INFO_WUNLOCK(&V_tcbinfo);
+ m_freem(m);
+ return (0);
+ }
+ toep->tp_flags &= ~TP_ABORT_REQ_RCVD;
- rpl->opt0h = htonl(F_TCAM_BYPASS);
- rpl->opt0l_status = htonl(CPL_PASS_OPEN_REJECT);
- cxgb_ofld_send(TOM_DATA(tdev)->cdev, m);
- } else
- m_free(m);
-}
-#endif
-static void
-handle_pass_open_arp_failure(struct socket *so, struct mbuf *m)
-{
- CXGB_UNIMPLEMENTED();
-
-#ifdef notyet
- struct t3cdev *cdev;
- struct socket *parent;
- struct socket *oreq;
- struct t3c_tid_entry *t3c_stid;
- struct tid_info *t;
- struct tcpcb *otp, *tp = so_sototcpcb(so);
- struct toepcb *toep = tp->t_toe;
-
/*
- * If the connection is being aborted due to the parent listening
- * socket going away there's nothing to do, the ABORT_REQ will close
- * the connection.
+ * If we'd sent a reset on this toep, we'll ignore this and clean up in
+ * the T3's reply to our reset instead.
*/
if (toep->tp_flags & TP_ABORT_RPL_PENDING) {
- m_free(m);
- return;
+ toep->tp_flags |= TP_ABORT_RPL_SENT;
+ INP_WUNLOCK(inp);
+ } else {
+ so_error_set(so, abort_status_to_errno(tp, req->status));
+ tp = tcp_close(tp);
+ if (tp == NULL)
+ INP_WLOCK(inp); /* re-acquire */
+ toepcb_release(toep); /* no more CPLs expected */
}
+ INP_INFO_WUNLOCK(&V_tcbinfo);
- oreq = so->so_incomp;
- otp = so_sototcpcb(oreq);
-
- cdev = T3C_DEV(so);
- t = &(T3C_DATA(cdev))->tid_maps;
- t3c_stid = lookup_stid(t, otp->ts_recent);
- parent = ((struct listen_ctx *)t3c_stid->ctx)->lso;
-
- so_lock(parent);
- pass_open_abort(so, parent, m);
- so_unlock(parent);
-#endif
-}
-
-/*
- * Handle an ARP failure for a CPL_PASS_ACCEPT_RPL. This is treated similarly
- * to an ABORT_REQ_RSS in SYN_RECV as both events need to tear down a SYN_RECV
- * connection.
- */
-static void
-pass_accept_rpl_arp_failure(struct t3cdev *cdev, struct mbuf *m)
-{
-
-#ifdef notyet
- TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
- BLOG_SKB_CB(skb)->dev = TOE_DEV(skb->sk);
-#endif
- handle_pass_open_arp_failure(m_get_socket(m), m);
-}
-
-/*
- * Populate a reject CPL_PASS_ACCEPT_RPL WR.
- */
-static void
-mk_pass_accept_rpl(struct mbuf *reply_mbuf, struct mbuf *req_mbuf)
-{
- struct cpl_pass_accept_req *req = cplhdr(req_mbuf);
- struct cpl_pass_accept_rpl *rpl = cplhdr(reply_mbuf);
- unsigned int tid = GET_TID(req);
-
- m_set_priority(reply_mbuf, CPL_PRIORITY_SETUP);
- rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
- OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, tid));
- rpl->peer_ip = req->peer_ip; // req->peer_ip not overwritten yet
- rpl->opt0h = htonl(F_TCAM_BYPASS);
- rpl->opt0l_status = htonl(CPL_PASS_OPEN_REJECT);
- rpl->opt2 = 0;
- rpl->rsvd = rpl->opt2; /* workaround for HW bug */
-}
-
-/*
- * Send a deferred reject to an accept request.
- */
-static void
-reject_pass_request(struct toedev *tdev, struct mbuf *m)
-{
- struct mbuf *reply_mbuf;
-
- reply_mbuf = m_gethdr_nofail(sizeof(struct cpl_pass_accept_rpl));
- mk_pass_accept_rpl(reply_mbuf, m);
- cxgb_ofld_send(TOM_DATA(tdev)->cdev, reply_mbuf);
- m_free(m);
-}
-
-static void
-handle_syncache_event(int event, void *arg)
-{
- struct toepcb *toep = arg;
-
- switch (event) {
- case TOE_SC_ENTRY_PRESENT:
- /*
- * entry already exists - free toepcb
- * and l2t
- */
- printf("syncache entry present\n");
- toepcb_release(toep);
- break;
- case TOE_SC_DROP:
- /*
- * The syncache has given up on this entry
- * either it timed out, or it was evicted
- * we need to explicitly release the tid
- */
- printf("syncache entry dropped\n");
- toepcb_release(toep);
- break;
- default:
- log(LOG_ERR, "unknown syncache event %d\n", event);
- break;
- }
+ send_abort_rpl(tod, tid, qset);
+ m_freem(m);
+ return (0);
}
static void
-syncache_add_accept_req(struct cpl_pass_accept_req *req, struct socket *lso, struct toepcb *toep)
+assign_rxopt(struct tcpcb *tp, uint16_t tcpopt)
{
- struct in_conninfo inc;
- struct toeopt toeo;
- struct tcphdr th;
- struct inpcb *inp;
- int mss, wsf, sack, ts;
- uint32_t rcv_isn = ntohl(req->rcv_isn);
-
- bzero(&toeo, sizeof(struct toeopt));
- inp = so_sotoinpcb(lso);
-
- /*
- * Fill out information for entering us into the syncache
- */
- bzero(&inc, sizeof(inc));
- inc.inc_fport = th.th_sport = req->peer_port;
- inc.inc_lport = th.th_dport = req->local_port;
- th.th_seq = req->rcv_isn;
- th.th_flags = TH_SYN;
-
- toep->tp_iss = toep->tp_delack_seq = toep->tp_rcv_wup = toep->tp_copied_seq = rcv_isn + 1;
-
- inc.inc_len = 0;
- inc.inc_faddr.s_addr = req->peer_ip;
- inc.inc_laddr.s_addr = req->local_ip;
-
- DPRINTF("syncache add of %d:%d %d:%d\n",
- ntohl(req->local_ip), ntohs(req->local_port),
- ntohl(req->peer_ip), ntohs(req->peer_port));
-
- mss = req->tcp_options.mss;
- wsf = req->tcp_options.wsf;
- ts = req->tcp_options.tstamp;
- sack = req->tcp_options.sack;
- toeo.to_mss = mss;
- toeo.to_wscale = wsf;
- toeo.to_flags = (mss ? TOF_MSS : 0) | (wsf ? TOF_SCALE : 0) | (ts ? TOF_TS : 0) | (sack ? TOF_SACKPERM : 0);
- tcp_offload_syncache_add(&inc, &toeo, &th, inp, &lso, &cxgb_toe_usrreqs,
-toep);
-}
-
+ struct toepcb *toep = tp->t_toe;
+ struct adapter *sc = toep->tp_tod->tod_softc;
-/*
- * Process a CPL_PASS_ACCEPT_REQ message. Does the part that needs the socket
- * lock held. Note that the sock here is a listening socket that is not owned
- * by the TOE.
- */
-static void
-process_pass_accept_req(struct socket *so, struct mbuf *m, struct toedev *tdev,
- struct listen_ctx *lctx)
-{
- int rt_flags;
- struct l2t_entry *e;
- struct iff_mac tim;
- struct mbuf *reply_mbuf, *ddp_mbuf = NULL;
- struct cpl_pass_accept_rpl *rpl;
- struct cpl_pass_accept_req *req = cplhdr(m);
- unsigned int tid = GET_TID(req);
- struct tom_data *d = TOM_DATA(tdev);
- struct t3cdev *cdev = d->cdev;
- struct tcpcb *tp = so_sototcpcb(so);
- struct toepcb *newtoep;
- struct rtentry *dst;
- struct sockaddr_in nam;
- struct t3c_data *td = T3C_DATA(cdev);
-
- reply_mbuf = m_gethdr(M_NOWAIT, MT_DATA);
- if (__predict_false(reply_mbuf == NULL)) {
- if (tdev->tod_ttid == TOE_ID_CHELSIO_T3)
- t3_defer_reply(m, tdev, reject_pass_request);
- else {
- cxgb_queue_tid_release(cdev, tid);
- m_free(m);
- }
- DPRINTF("failed to get reply_mbuf\n");
-
- goto out;
- }
+ tp->t_maxseg = tp->t_maxopd = sc->params.mtus[G_TCPOPT_MSS(tcpopt)] - 40;
- if (tp->t_state != TCPS_LISTEN) {
- DPRINTF("socket not in listen state\n");
-
- goto reject;
+ if (G_TCPOPT_TSTAMP(tcpopt)) {
+ tp->t_flags |= TF_RCVD_TSTMP;
+ tp->t_flags |= TF_REQ_TSTMP; /* forcibly set */
+ tp->ts_recent = 0; /* XXX */
+ tp->ts_recent_age = tcp_ts_getticks();
+ tp->t_maxseg -= TCPOLEN_TSTAMP_APPA;
}
-
- tim.mac_addr = req->dst_mac;
- tim.vlan_tag = ntohs(req->vlan_tag);
- if (cdev->ctl(cdev, GET_IFF_FROM_MAC, &tim) < 0 || !tim.dev) {
- DPRINTF("rejecting from failed GET_IFF_FROM_MAC\n");
- goto reject;
- }
-
-#ifdef notyet
- /*
- * XXX do route lookup to confirm that we're still listening on this
- * address
- */
- if (ip_route_input(skb, req->local_ip, req->peer_ip,
- G_PASS_OPEN_TOS(ntohl(req->tos_tid)), tim.dev))
- goto reject;
- rt_flags = ((struct rtable *)skb->dst)->rt_flags &
- (RTCF_BROADCAST | RTCF_MULTICAST | RTCF_LOCAL);
- dst_release(skb->dst); // done with the input route, release it
- skb->dst = NULL;
-
- if ((rt_flags & RTF_LOCAL) == 0)
- goto reject;
-#endif
- /*
- * XXX
- */
- rt_flags = RTF_LOCAL;
- if ((rt_flags & RTF_LOCAL) == 0)
- goto reject;
-
- /*
- * Calculate values and add to syncache
- */
-
- newtoep = toepcb_alloc();
- if (newtoep == NULL)
- goto reject;
- bzero(&nam, sizeof(struct sockaddr_in));
-
- nam.sin_len = sizeof(struct sockaddr_in);
- nam.sin_family = AF_INET;
- nam.sin_addr.s_addr =req->peer_ip;
- dst = rtalloc2((struct sockaddr *)&nam, 1, 0);
-
- if (dst == NULL) {
- printf("failed to find route\n");
- goto reject;
- }
- e = newtoep->tp_l2t = t3_l2t_get(d->cdev, dst, tim.dev,
- (struct sockaddr *)&nam);
- if (e == NULL) {
- DPRINTF("failed to get l2t\n");
- }
- /*
- * Point to our listen socket until accept
- */
- newtoep->tp_tp = tp;
- newtoep->tp_flags = TP_SYN_RCVD;
- newtoep->tp_tid = tid;
- newtoep->tp_toedev = tdev;
- tp->rcv_wnd = select_rcv_wnd(tdev, so);
-
- cxgb_insert_tid(cdev, d->client, newtoep, tid);
- so_lock(so);
- LIST_INSERT_HEAD(&lctx->synq_head, newtoep, synq_entry);
- so_unlock(so);
-
- newtoep->tp_ulp_mode = TOM_TUNABLE(tdev, ddp) && !(so_options_get(so) & SO_NO_DDP) &&
- tp->rcv_wnd >= MIN_DDP_RCV_WIN ? ULP_MODE_TCPDDP : 0;
-
- if (newtoep->tp_ulp_mode) {
- ddp_mbuf = m_gethdr(M_NOWAIT, MT_DATA);
-
- if (ddp_mbuf == NULL)
- newtoep->tp_ulp_mode = 0;
- }
-
- CTR4(KTR_TOM, "ddp=%d rcv_wnd=%ld min_win=%d ulp_mode=%d",
- TOM_TUNABLE(tdev, ddp), tp->rcv_wnd, MIN_DDP_RCV_WIN, newtoep->tp_ulp_mode);
- set_arp_failure_handler(reply_mbuf, pass_accept_rpl_arp_failure);
- /*
- * XXX workaround for lack of syncache drop
- */
- toepcb_hold(newtoep);
- syncache_add_accept_req(req, so, newtoep);
-
- rpl = cplhdr(reply_mbuf);
- reply_mbuf->m_pkthdr.len = reply_mbuf->m_len = sizeof(*rpl);
- rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
- rpl->wr.wr_lo = 0;
- OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, tid));
- rpl->opt2 = htonl(calc_opt2(so, tdev));
- rpl->rsvd = rpl->opt2; /* workaround for HW bug */
- rpl->peer_ip = req->peer_ip; // req->peer_ip is not overwritten
-
- rpl->opt0h = htonl(calc_opt0h(so, select_mss(td, NULL, dst->rt_ifp->if_mtu)) |
- V_L2T_IDX(e->idx) | V_TX_CHANNEL(e->smt_idx));
- rpl->opt0l_status = htonl(calc_opt0l(so, newtoep->tp_ulp_mode) |
- CPL_PASS_OPEN_ACCEPT);
-
- DPRINTF("opt0l_status=%08x\n", rpl->opt0l_status);
-
- m_set_priority(reply_mbuf, mkprio(CPL_PRIORITY_SETUP, newtoep));
-
- l2t_send(cdev, reply_mbuf, e);
- m_free(m);
- if (newtoep->tp_ulp_mode) {
- __set_tcb_field(newtoep, ddp_mbuf, W_TCB_RX_DDP_FLAGS,
- V_TF_DDP_OFF(1) |
- TP_DDP_TIMER_WORKAROUND_MASK,
- V_TF_DDP_OFF(1) |
- TP_DDP_TIMER_WORKAROUND_VAL, 1);
- } else
- DPRINTF("no DDP\n");
-
- return;
-reject:
- if (tdev->tod_ttid == TOE_ID_CHELSIO_T3)
- mk_pass_accept_rpl(reply_mbuf, m);
- else
- mk_tid_release(reply_mbuf, newtoep, tid);
- cxgb_ofld_send(cdev, reply_mbuf);
- m_free(m);
-out:
-#if 0
- TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
-#else
- return;
-#endif
-}
+ if (G_TCPOPT_SACK(tcpopt))
+ tp->t_flags |= TF_SACK_PERMIT;
+ else
+ tp->t_flags &= ~TF_SACK_PERMIT;
-/*
- * Handle a CPL_PASS_ACCEPT_REQ message.
- */
-static int
-do_pass_accept_req(struct t3cdev *cdev, struct mbuf *m, void *ctx)
-{
- struct listen_ctx *listen_ctx = (struct listen_ctx *)ctx;
- struct socket *lso = listen_ctx->lso; /* XXX need an interlock against the listen socket going away */
- struct tom_data *d = listen_ctx->tom_data;
+ if (G_TCPOPT_WSCALE_OK(tcpopt))
+ tp->t_flags |= TF_RCVD_SCALE;
-#if VALIDATE_TID
- struct cpl_pass_accept_req *req = cplhdr(m);
- unsigned int tid = GET_TID(req);
- struct tid_info *t = &(T3C_DATA(cdev))->tid_maps;
-
- if (unlikely(!lsk)) {
- printk(KERN_ERR "%s: PASS_ACCEPT_REQ had unknown STID %lu\n",
- cdev->name,
- (unsigned long)((union listen_entry *)ctx -
- t->stid_tab));
- return CPL_RET_BUF_DONE;
- }
- if (unlikely(tid >= t->ntids)) {
- printk(KERN_ERR "%s: passive open TID %u too large\n",
- cdev->name, tid);
- return CPL_RET_BUF_DONE;
- }
- /*
- * For T3A the current user of the TID may have closed but its last
- * message(s) may have been backlogged so the TID appears to be still
- * in use. Just take the TID away, the connection can close at its
- * own leisure. For T3B this situation is a bug.
- */
- if (!valid_new_tid(t, tid) &&
- cdev->type != T3A) {
- printk(KERN_ERR "%s: passive open uses existing TID %u\n",
- cdev->name, tid);
- return CPL_RET_BUF_DONE;
+ if ((tp->t_flags & (TF_RCVD_SCALE | TF_REQ_SCALE)) ==
+ (TF_RCVD_SCALE | TF_REQ_SCALE)) {
+ tp->rcv_scale = tp->request_r_scale;
+ tp->snd_scale = G_TCPOPT_SND_WSCALE(tcpopt);
}
-#endif
-
- process_pass_accept_req(lso, m, &d->tdev, listen_ctx);
- return (0);
-}
-
-/*
- * Called when a connection is established to translate the TCP options
- * reported by HW to FreeBSD's native format.
- */
-static void
-assign_rxopt(struct socket *so, unsigned int opt)
-{
- struct tcpcb *tp = so_sototcpcb(so);
- struct toepcb *toep = tp->t_toe;
- const struct t3c_data *td = T3C_DATA(TOEP_T3C_DEV(toep));
- inp_lock_assert(tp->t_inpcb);
-
- toep->tp_mss_clamp = td->mtus[G_TCPOPT_MSS(opt)] - 40;
- tp->t_flags |= G_TCPOPT_TSTAMP(opt) ? TF_RCVD_TSTMP : 0;
- tp->t_flags |= G_TCPOPT_SACK(opt) ? TF_SACK_PERMIT : 0;
- tp->t_flags |= G_TCPOPT_WSCALE_OK(opt) ? TF_RCVD_SCALE : 0;
- if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
- (TF_RCVD_SCALE|TF_REQ_SCALE))
- tp->rcv_scale = tp->request_r_scale;
}
/*
- * Completes some final bits of initialization for just established connections
- * and changes their state to TCP_ESTABLISHED.
- *
- * snd_isn here is the ISN after the SYN, i.e., the true ISN + 1.
+ * The ISS and IRS are from after the exchange of SYNs and are off by 1.
*/
-static void
-make_established(struct socket *so, u32 snd_isn, unsigned int opt)
+void
+make_established(struct socket *so, uint32_t cpl_iss, uint32_t cpl_irs,
+ uint16_t cpl_tcpopt)
{
- struct tcpcb *tp = so_sototcpcb(so);
+ struct inpcb *inp = sotoinpcb(so);
+ struct tcpcb *tp = intotcpcb(inp);
struct toepcb *toep = tp->t_toe;
-
- toep->tp_write_seq = tp->iss = tp->snd_max = tp->snd_nxt = tp->snd_una = snd_isn;
- assign_rxopt(so, opt);
+ long bufsize;
+ uint32_t iss = be32toh(cpl_iss) - 1; /* true ISS */
+ uint32_t irs = be32toh(cpl_irs) - 1; /* true IRS */
+ uint16_t tcpopt = be16toh(cpl_tcpopt);
- /*
- *XXXXXXXXXXX
- *
- */
-#ifdef notyet
- so->so_proto->pr_ctloutput = t3_ctloutput;
-#endif
-
-#if 0
- inet_sk(sk)->id = tp->write_seq ^ jiffies;
-#endif
- /*
- * XXX not clear what rcv_wup maps to
- */
- /*
- * Causes the first RX_DATA_ACK to supply any Rx credits we couldn't
- * pass through opt0.
- */
- if (tp->rcv_wnd > (M_RCV_BUFSIZ << 10))
- toep->tp_rcv_wup -= tp->rcv_wnd - (M_RCV_BUFSIZ << 10);
+ INP_WLOCK_ASSERT(inp);
- dump_toepcb(toep);
-
-#ifdef notyet
-/*
- * no clean interface for marking ARP up to date
- */
- dst_confirm(sk->sk_dst_cache);
-#endif
- tp->t_starttime = ticks;
tp->t_state = TCPS_ESTABLISHED;
- soisconnected(so);
-}
-
-static int
-syncache_expand_establish_req(struct cpl_pass_establish *req, struct socket **so, struct toepcb *toep)
-{
-
- struct in_conninfo inc;
- struct toeopt toeo;
- struct tcphdr th;
- int mss, wsf, sack, ts;
- struct mbuf *m = NULL;
- const struct t3c_data *td = T3C_DATA(TOM_DATA(toep->tp_toedev)->cdev);
- unsigned int opt;
-
-#ifdef MAC
-#error "no MAC support"
-#endif
-
- opt = ntohs(req->tcp_opt);
-
- bzero(&toeo, sizeof(struct toeopt));
-
- /*
- * Fill out information for entering us into the syncache
- */
- bzero(&inc, sizeof(inc));
- inc.inc_fport = th.th_sport = req->peer_port;
- inc.inc_lport = th.th_dport = req->local_port;
- th.th_seq = req->rcv_isn;
- th.th_flags = TH_ACK;
-
- inc.inc_len = 0;
- inc.inc_faddr.s_addr = req->peer_ip;
- inc.inc_laddr.s_addr = req->local_ip;
-
- mss = td->mtus[G_TCPOPT_MSS(opt)] - 40;
- wsf = G_TCPOPT_WSCALE_OK(opt);
- ts = G_TCPOPT_TSTAMP(opt);
- sack = G_TCPOPT_SACK(opt);
-
- toeo.to_mss = mss;
- toeo.to_wscale = G_TCPOPT_SND_WSCALE(opt);
- toeo.to_flags = (mss ? TOF_MSS : 0) | (wsf ? TOF_SCALE : 0) | (ts ? TOF_TS : 0) | (sack ? TOF_SACKPERM : 0);
-
- DPRINTF("syncache expand of %d:%d %d:%d mss:%d wsf:%d ts:%d sack:%d\n",
- ntohl(req->local_ip), ntohs(req->local_port),
- ntohl(req->peer_ip), ntohs(req->peer_port),
- mss, wsf, ts, sack);
- return tcp_offload_syncache_expand(&inc, &toeo, &th, so, m);
-}
-
-
-/*
- * Process a CPL_PASS_ESTABLISH message. XXX a lot of the locking doesn't work
- * if we are in TCP_SYN_RECV due to crossed SYNs
- */
-static int
-do_pass_establish(struct t3cdev *cdev, struct mbuf *m, void *ctx)
-{
- struct cpl_pass_establish *req = cplhdr(m);
- struct toepcb *toep = (struct toepcb *)ctx;
- struct tcpcb *tp = toep->tp_tp;
- struct socket *so, *lso;
- struct t3c_data *td = T3C_DATA(cdev);
- struct sockbuf *snd, *rcv;
-
- // Complete socket initialization now that we have the SND_ISN
-
- struct toedev *tdev;
-
-
- tdev = toep->tp_toedev;
-
- inp_wlock(tp->t_inpcb);
-
- /*
- *
- * XXX need to add reference while we're manipulating
- */
- so = lso = inp_inpcbtosocket(tp->t_inpcb);
-
- inp_wunlock(tp->t_inpcb);
-
- so_lock(so);
- LIST_REMOVE(toep, synq_entry);
- so_unlock(so);
-
- if (!syncache_expand_establish_req(req, &so, toep)) {
- /*
- * No entry
- */
- CXGB_UNIMPLEMENTED();
- }
- if (so == NULL) {
- /*
- * Couldn't create the socket
- */
- CXGB_UNIMPLEMENTED();
- }
-
- tp = so_sototcpcb(so);
- inp_wlock(tp->t_inpcb);
-
- snd = so_sockbuf_snd(so);
- rcv = so_sockbuf_rcv(so);
+ tp->t_starttime = ticks;
+ TCPSTAT_INC(tcps_connects);
- snd->sb_flags |= SB_NOCOALESCE;
- rcv->sb_flags |= SB_NOCOALESCE;
+ CTR4(KTR_CXGB, "%s tid %u, toep %p, inp %p", tcpstates[tp->t_state],
+ toep->tp_tid, toep, inp);
- toep->tp_tp = tp;
- toep->tp_flags = 0;
- tp->t_toe = toep;
- reset_wr_list(toep);
- tp->rcv_wnd = select_rcv_wnd(tdev, so);
- tp->rcv_nxt = toep->tp_copied_seq;
- install_offload_ops(so);
-
- toep->tp_wr_max = toep->tp_wr_avail = TOM_TUNABLE(tdev, max_wrs);
- toep->tp_wr_unacked = 0;
- toep->tp_qset = G_QNUM(ntohl(m->m_pkthdr.csum_data));
- toep->tp_qset_idx = 0;
- toep->tp_mtu_idx = select_mss(td, tp, toep->tp_l2t->neigh->rt_ifp->if_mtu);
-
- /*
- * XXX Cancel any keep alive timer
- */
-
- make_established(so, ntohl(req->snd_isn), ntohs(req->tcp_opt));
+ tp->irs = irs;
+ tcp_rcvseqinit(tp);
+ tp->rcv_wnd = toep->tp_rx_credits << 10;
+ tp->rcv_adv += tp->rcv_wnd;
+ tp->last_ack_sent = tp->rcv_nxt;
/*
- * XXX workaround for lack of syncache drop
- */
- toepcb_release(toep);
- inp_wunlock(tp->t_inpcb);
-
- CTR1(KTR_TOM, "do_pass_establish tid=%u", toep->tp_tid);
- cxgb_log_tcb(cdev->adapter, toep->tp_tid);
-#ifdef notyet
- /*
- * XXX not sure how these checks map to us
- */
- if (unlikely(sk->sk_socket)) { // simultaneous opens only
- sk->sk_state_change(sk);
- sk_wake_async(so, 0, POLL_OUT);
- }
- /*
- * The state for the new connection is now up to date.
- * Next check if we should add the connection to the parent's
- * accept queue. When the parent closes it resets connections
- * on its SYN queue, so check if we are being reset. If so we
- * don't need to do anything more, the coming ABORT_RPL will
- * destroy this socket. Otherwise move the connection to the
- * accept queue.
- *
- * Note that we reset the synq before closing the server so if
- * we are not being reset the stid is still open.
+ * If we were unable to send all rx credits via opt0, save the remainder
+ * in rx_credits so that they can be handed over with the next credit
+ * update.
*/
- if (unlikely(!tp->forward_skb_hint)) { // removed from synq
- __kfree_skb(skb);
- goto unlock;
- }
-#endif
- m_free(m);
-
- return (0);
+ SOCKBUF_LOCK(&so->so_rcv);
+ bufsize = select_rcv_wnd(so);
+ SOCKBUF_UNLOCK(&so->so_rcv);
+ toep->tp_rx_credits = bufsize - tp->rcv_wnd;
+
+ tp->iss = iss;
+ tcp_sendseqinit(tp);
+ tp->snd_una = iss + 1;
+ tp->snd_nxt = iss + 1;
+ tp->snd_max = iss + 1;
+
+ assign_rxopt(tp, tcpopt);
+ soisconnected(so);
}
/*
@@ -3745,129 +1623,80 @@ static void
fixup_and_send_ofo(struct toepcb *toep)
{
struct mbuf *m;
- struct toedev *tdev = toep->tp_toedev;
- struct tcpcb *tp = toep->tp_tp;
+ struct toedev *tod = toep->tp_tod;
+ struct adapter *sc = tod->tod_softc;
+ struct inpcb *inp = toep->tp_inp;
unsigned int tid = toep->tp_tid;
- log(LOG_NOTICE, "fixup_and_send_ofo\n");
-
- inp_lock_assert(tp->t_inpcb);
+ inp_lock_assert(inp);
+
while ((m = mbufq_dequeue(&toep->out_of_order_queue)) != NULL) {
+ struct ofld_hdr *oh = mtod(m, void *);
/*
* A variety of messages can be waiting but the fields we'll
* be touching are common to all so any message type will do.
*/
- struct cpl_close_con_req *p = cplhdr(m);
+ struct cpl_close_con_req *p = (void *)(oh + 1);
- p->wr.wr_lo = htonl(V_WR_TID(tid));
+ p->wr.wrh_lo = htonl(V_WR_TID(tid));
OPCODE_TID(p) = htonl(MK_OPCODE_TID(p->ot.opcode, tid));
- cxgb_ofld_send(TOM_DATA(tdev)->cdev, m);
- }
-}
-
-/*
- * Updates socket state from an active establish CPL message. Runs with the
- * socket lock held.
- */
-static void
-socket_act_establish(struct socket *so, struct mbuf *m)
-{
- struct cpl_act_establish *req = cplhdr(m);
- u32 rcv_isn = ntohl(req->rcv_isn); /* real RCV_ISN + 1 */
- struct tcpcb *tp = so_sototcpcb(so);
- struct toepcb *toep = tp->t_toe;
-
- if (__predict_false(tp->t_state != TCPS_SYN_SENT))
- log(LOG_ERR, "TID %u expected SYN_SENT, found %d\n",
- toep->tp_tid, tp->t_state);
-
- tp->ts_recent_age = ticks;
- tp->irs = tp->rcv_wnd = tp->rcv_nxt = rcv_isn;
- toep->tp_delack_seq = toep->tp_rcv_wup = toep->tp_copied_seq = tp->irs;
-
- make_established(so, ntohl(req->snd_isn), ntohs(req->tcp_opt));
-
- /*
- * Now that we finally have a TID send any CPL messages that we had to
- * defer for lack of a TID.
- */
- if (mbufq_len(&toep->out_of_order_queue))
- fixup_and_send_ofo(toep);
-
- if (__predict_false(so_state_get(so) & SS_NOFDREF)) {
- /*
- * XXX does this even make sense?
- */
- so_sorwakeup(so);
+ t3_offload_tx(sc, m);
}
- m_free(m);
-#ifdef notyet
-/*
- * XXX assume no write requests permitted while socket connection is
- * incomplete
- */
- /*
- * Currently the send queue must be empty at this point because the
- * socket layer does not send anything before a connection is
- * established. To be future proof though we handle the possibility
- * that there are pending buffers to send (either TX_DATA or
- * CLOSE_CON_REQ). First we need to adjust the sequence number of the
- * buffers according to the just learned write_seq, and then we send
- * them on their way.
- */
- fixup_pending_writeq_buffers(sk);
- if (t3_push_frames(so, 1))
- sk->sk_write_space(sk);
-#endif
-
- toep->tp_state = tp->t_state;
- KMOD_TCPSTAT_INC(tcps_connects);
-
}
/*
* Process a CPL_ACT_ESTABLISH message.
*/
static int
-do_act_establish(struct t3cdev *cdev, struct mbuf *m, void *ctx)
+do_act_establish(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
{
- struct cpl_act_establish *req = cplhdr(m);
+ struct adapter *sc = qs->adap;
+ struct tom_data *td = sc->tom_softc;
+ struct cpl_act_establish *req = mtod(m, void *);
unsigned int tid = GET_TID(req);
unsigned int atid = G_PASS_OPEN_TID(ntohl(req->tos_tid));
- struct toepcb *toep = (struct toepcb *)ctx;
- struct tcpcb *tp = toep->tp_tp;
+ struct toepcb *toep = lookup_atid(&td->tid_maps, atid);
+ struct inpcb *inp = toep->tp_inp;
+ struct tcpcb *tp;
struct socket *so;
- struct toedev *tdev;
- struct tom_data *d;
-
- if (tp == NULL) {
- free_atid(cdev, atid);
- return (0);
+
+ CTR3(KTR_CXGB, "%s: atid %u, tid %u", __func__, atid, tid);
+
+ free_atid(&td->tid_maps, atid);
+
+ INP_WLOCK(inp);
+ tp = intotcpcb(inp);
+
+ KASSERT(toep->tp_qset == qs->idx,
+ ("%s qset mismatch %d %d", __func__, toep->tp_qset, qs->idx));
+ KASSERT(toep->tp_tid == atid,
+ ("%s atid mismatch %d %d", __func__, toep->tp_tid, atid));
+
+ toep->tp_tid = tid;
+ insert_tid(td, toep, tid);
+
+ if (inp->inp_flags & INP_DROPPED) {
+ /* socket closed by the kernel before hw told us it connected */
+ send_reset(toep);
+ goto done;
}
- inp_wlock(tp->t_inpcb);
- /*
- * XXX
- */
- so = inp_inpcbtosocket(tp->t_inpcb);
- tdev = toep->tp_toedev; /* blow up here if link was down */
- d = TOM_DATA(tdev);
+ KASSERT(tp->t_state == TCPS_SYN_SENT,
+ ("TID %u expected TCPS_SYN_SENT, found %d.", tid, tp->t_state));
+
+ so = inp->inp_socket;
+ make_established(so, req->snd_isn, req->rcv_isn, req->tcp_opt);
/*
- * It's OK if the TID is currently in use, the owning socket may have
- * backlogged its last CPL message(s). Just take it away.
+ * Now that we finally have a TID send any CPL messages that we had to
+ * defer for lack of a TID.
*/
- toep->tp_tid = tid;
- toep->tp_tp = tp;
- so_insert_tid(d, toep, tid);
- free_atid(cdev, atid);
- toep->tp_qset = G_QNUM(ntohl(m->m_pkthdr.csum_data));
-
- socket_act_establish(so, m);
- inp_wunlock(tp->t_inpcb);
- CTR1(KTR_TOM, "do_act_establish tid=%u", toep->tp_tid);
- cxgb_log_tcb(cdev->adapter, toep->tp_tid);
+ if (mbufq_len(&toep->out_of_order_queue))
+ fixup_and_send_ofo(toep);
+done:
+ INP_WUNLOCK(inp);
+ m_freem(m);
return (0);
}
@@ -3878,97 +1707,66 @@ do_act_establish(struct t3cdev *cdev, struct mbuf *m, void *ctx)
static void
wr_ack(struct toepcb *toep, struct mbuf *m)
{
- struct tcpcb *tp = toep->tp_tp;
- struct cpl_wr_ack *hdr = cplhdr(m);
+ struct inpcb *inp = toep->tp_inp;
+ struct tcpcb *tp;
+ struct cpl_wr_ack *hdr = mtod(m, void *);
struct socket *so;
unsigned int credits = ntohs(hdr->credits);
u32 snd_una = ntohl(hdr->snd_una);
int bytes = 0;
struct sockbuf *snd;
-
- CTR2(KTR_SPARE2, "wr_ack: snd_una=%u credits=%d", snd_una, credits);
+ struct mbuf *p;
+ struct ofld_hdr *oh;
- inp_wlock(tp->t_inpcb);
- so = inp_inpcbtosocket(tp->t_inpcb);
+ inp_wlock(inp);
+ tp = intotcpcb(inp);
+ so = inp->inp_socket;
toep->tp_wr_avail += credits;
if (toep->tp_wr_unacked > toep->tp_wr_max - toep->tp_wr_avail)
toep->tp_wr_unacked = toep->tp_wr_max - toep->tp_wr_avail;
while (credits) {
- struct mbuf *p = peek_wr(toep);
-
+ p = peek_wr(toep);
+
if (__predict_false(!p)) {
+ CTR5(KTR_CXGB, "%s: %u extra WR_ACK credits, "
+ "tid %u, state %u, wr_avail %u", __func__, credits,
+ toep->tp_tid, tp->t_state, toep->tp_wr_avail);
+
log(LOG_ERR, "%u WR_ACK credits for TID %u with "
"nothing pending, state %u wr_avail=%u\n",
credits, toep->tp_tid, tp->t_state, toep->tp_wr_avail);
break;
}
- CTR2(KTR_TOM,
- "wr_ack: p->credits=%d p->bytes=%d",
- p->m_pkthdr.csum_data, p->m_pkthdr.len);
- KASSERT(p->m_pkthdr.csum_data != 0,
- ("empty request still on list"));
-
- if (__predict_false(credits < p->m_pkthdr.csum_data)) {
-
-#if DEBUG_WR > 1
- struct tx_data_wr *w = cplhdr(p);
- log(LOG_ERR,
- "TID %u got %u WR credits, need %u, len %u, "
- "main body %u, frags %u, seq # %u, ACK una %u,"
- " ACK nxt %u, WR_AVAIL %u, WRs pending %u\n",
- toep->tp_tid, credits, p->csum, p->len,
- p->len - p->data_len, skb_shinfo(p)->nr_frags,
- ntohl(w->sndseq), snd_una, ntohl(hdr->snd_nxt),
- toep->tp_wr_avail, count_pending_wrs(tp) - credits);
-#endif
- p->m_pkthdr.csum_data -= credits;
- break;
- } else {
- dequeue_wr(toep);
- credits -= p->m_pkthdr.csum_data;
- bytes += p->m_pkthdr.len;
- CTR3(KTR_TOM,
- "wr_ack: done with wr of %d bytes remain credits=%d wr credits=%d",
- p->m_pkthdr.len, credits, p->m_pkthdr.csum_data);
-
- m_free(p);
- }
- }
-#if DEBUG_WR
- check_wr_invariants(tp);
-#endif
+ oh = mtod(p, struct ofld_hdr *);
- if (__predict_false(SEQ_LT(snd_una, tp->snd_una))) {
-#if VALIDATE_SEQ
- struct tom_data *d = TOM_DATA(TOE_DEV(so));
+ KASSERT(credits >= G_HDR_NDESC(oh->flags),
+ ("%s: partial credits? %d %d", __func__, credits,
+ G_HDR_NDESC(oh->flags)));
- log(LOG_ERR "%s: unexpected sequence # %u in WR_ACK "
- "for TID %u, snd_una %u\n", (&d->tdev)->name, snd_una,
- toep->tp_tid, tp->snd_una);
-#endif
- goto out_free;
+ dequeue_wr(toep);
+ credits -= G_HDR_NDESC(oh->flags);
+ bytes += oh->plen;
+
+ if (oh->flags & F_HDR_SGL)
+ sglist_free(oh->sgl);
+ m_freem(p);
}
+ if (__predict_false(SEQ_LT(snd_una, tp->snd_una)))
+ goto out_free;
+
if (tp->snd_una != snd_una) {
tp->snd_una = snd_una;
- tp->ts_recent_age = ticks;
-#ifdef notyet
- /*
- * Keep ARP entry "minty fresh"
- */
- dst_confirm(sk->sk_dst_cache);
-#endif
+ tp->ts_recent_age = tcp_ts_getticks();
if (tp->snd_una == tp->snd_nxt)
toep->tp_flags &= ~TP_TX_WAIT_IDLE;
}
snd = so_sockbuf_snd(so);
if (bytes) {
- CTR1(KTR_SPARE2, "wr_ack: sbdrop(%d)", bytes);
- snd = so_sockbuf_snd(so);
- sockbuf_lock(snd);
+ SOCKBUF_LOCK(snd);
sbdrop_locked(snd, bytes);
so_sowwakeup_locked(so);
}
@@ -3978,142 +1776,25 @@ wr_ack(struct toepcb *toep, struct mbuf *m)
out_free:
inp_wunlock(tp->t_inpcb);
- m_free(m);
+ m_freem(m);
}
/*
* Handler for TX_DATA_ACK CPL messages.
*/
static int
-do_wr_ack(struct t3cdev *dev, struct mbuf *m, void *ctx)
-{
- struct toepcb *toep = (struct toepcb *)ctx;
-
- VALIDATE_SOCK(so);
-
- wr_ack(toep, m);
- return 0;
-}
-
-/*
- * Handler for TRACE_PKT CPL messages. Just sink these packets.
- */
-static int
-do_trace_pkt(struct t3cdev *dev, struct mbuf *m, void *ctx)
-{
- m_freem(m);
- return 0;
-}
-
-/*
- * Reset a connection that is on a listener's SYN queue or accept queue,
- * i.e., one that has not had a struct socket associated with it.
- * Must be called from process context.
- *
- * Modeled after code in inet_csk_listen_stop().
- */
-static void
-t3_reset_listen_child(struct socket *child)
-{
- struct tcpcb *tp = so_sototcpcb(child);
-
- t3_send_reset(tp->t_toe);
-}
-
-
-static void
-t3_child_disconnect(struct socket *so, void *arg)
-{
- struct tcpcb *tp = so_sototcpcb(so);
-
- if (tp->t_flags & TF_TOE) {
- inp_wlock(tp->t_inpcb);
- t3_reset_listen_child(so);
- inp_wunlock(tp->t_inpcb);
- }
-}
-
-/*
- * Disconnect offloaded established but not yet accepted connections sitting
- * on a server's accept_queue. We just send an ABORT_REQ at this point and
- * finish off the disconnect later as we may need to wait for the ABORT_RPL.
- */
-void
-t3_disconnect_acceptq(struct socket *listen_so)
-{
-
- so_lock(listen_so);
- so_listeners_apply_all(listen_so, t3_child_disconnect, NULL);
- so_unlock(listen_so);
-}
-
-/*
- * Reset offloaded connections sitting on a server's syn queue. As above
- * we send ABORT_REQ and finish off when we get ABORT_RPL.
- */
-
-void
-t3_reset_synq(struct listen_ctx *lctx)
+do_wr_ack(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
{
- struct toepcb *toep;
-
- so_lock(lctx->lso);
- while (!LIST_EMPTY(&lctx->synq_head)) {
- toep = LIST_FIRST(&lctx->synq_head);
- LIST_REMOVE(toep, synq_entry);
- toep->tp_tp = NULL;
- t3_send_reset(toep);
- cxgb_remove_tid(TOEP_T3C_DEV(toep), toep, toep->tp_tid);
- toepcb_release(toep);
- }
- so_unlock(lctx->lso);
-}
-
+ struct adapter *sc = qs->adap;
+ struct tom_data *td = sc->tom_softc;
+ struct cpl_wr_ack *hdr = mtod(m, void *);
+ unsigned int tid = GET_TID(hdr);
+ struct toepcb *toep = lookup_tid(&td->tid_maps, tid);
-int
-t3_setup_ppods(struct toepcb *toep, const struct ddp_gather_list *gl,
- unsigned int nppods, unsigned int tag, unsigned int maxoff,
- unsigned int pg_off, unsigned int color)
-{
- unsigned int i, j, pidx;
- struct pagepod *p;
- struct mbuf *m;
- struct ulp_mem_io *req;
- unsigned int tid = toep->tp_tid;
- const struct tom_data *td = TOM_DATA(toep->tp_toedev);
- unsigned int ppod_addr = tag * PPOD_SIZE + td->ddp_llimit;
+ /* XXX bad race */
+ if (toep)
+ wr_ack(toep, m);
- CTR6(KTR_TOM, "t3_setup_ppods(gl=%p nppods=%u tag=%u maxoff=%u pg_off=%u color=%u)",
- gl, nppods, tag, maxoff, pg_off, color);
-
- for (i = 0; i < nppods; ++i) {
- m = m_gethdr_nofail(sizeof(*req) + PPOD_SIZE);
- m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep));
- req = mtod(m, struct ulp_mem_io *);
- m->m_pkthdr.len = m->m_len = sizeof(*req) + PPOD_SIZE;
- req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS));
- req->wr.wr_lo = 0;
- req->cmd_lock_addr = htonl(V_ULP_MEMIO_ADDR(ppod_addr >> 5) |
- V_ULPTX_CMD(ULP_MEM_WRITE));
- req->len = htonl(V_ULP_MEMIO_DATA_LEN(PPOD_SIZE / 32) |
- V_ULPTX_NFLITS(PPOD_SIZE / 8 + 1));
-
- p = (struct pagepod *)(req + 1);
- if (__predict_false(i < nppods - NUM_SENTINEL_PPODS)) {
- p->pp_vld_tid = htonl(F_PPOD_VALID | V_PPOD_TID(tid));
- p->pp_pgsz_tag_color = htonl(V_PPOD_TAG(tag) |
- V_PPOD_COLOR(color));
- p->pp_max_offset = htonl(maxoff);
- p->pp_page_offset = htonl(pg_off);
- p->pp_rsvd = 0;
- for (pidx = 4 * i, j = 0; j < 5; ++j, ++pidx)
- p->pp_addr[j] = pidx < gl->dgl_nelem ?
- htobe64(VM_PAGE_TO_PHYS(gl->dgl_pages[pidx])) : 0;
- } else
- p->pp_vld_tid = 0; /* mark sentinel page pods invalid */
- send_or_defer(toep, m, 0);
- ppod_addr += PPOD_SIZE;
- }
return (0);
}
@@ -4153,10 +1834,7 @@ mk_set_tcb_field_ulp(struct cpl_set_tcb_field *req, unsigned int tid,
unsigned int word, uint64_t mask, uint64_t val)
{
struct ulp_txpkt *txpkt = (struct ulp_txpkt *)req;
-
- CTR4(KTR_TCB, "mk_set_tcb_field_ulp(tid=%u word=0x%x mask=%jx val=%jx",
- tid, word, mask, val);
-
+
txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*req) / 8));
OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tid));
@@ -4167,294 +1845,19 @@ mk_set_tcb_field_ulp(struct cpl_set_tcb_field *req, unsigned int tid,
req->val = htobe64(val);
}
-/*
- * Build a CPL_RX_DATA_ACK message as payload of a ULP_TX_PKT command.
- */
-static void
-mk_rx_data_ack_ulp(struct toepcb *toep, struct cpl_rx_data_ack *ack,
- unsigned int tid, unsigned int credits)
-{
- struct ulp_txpkt *txpkt = (struct ulp_txpkt *)ack;
-
- txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
- txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*ack) / 8));
- OPCODE_TID(ack) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, tid));
- ack->credit_dack = htonl(F_RX_MODULATE | F_RX_DACK_CHANGE |
- V_RX_DACK_MODE(TOM_TUNABLE(toep->tp_toedev, delack)) |
- V_RX_CREDITS(credits));
-}
-
void
-t3_cancel_ddpbuf(struct toepcb *toep, unsigned int bufidx)
-{
- unsigned int wrlen;
- struct mbuf *m;
- struct work_request_hdr *wr;
- struct cpl_barrier *lock;
- struct cpl_set_tcb_field *req;
- struct cpl_get_tcb *getreq;
- struct ddp_state *p = &toep->tp_ddp_state;
-
-#if 0
- SOCKBUF_LOCK_ASSERT(&toeptoso(toep)->so_rcv);
-#endif
- wrlen = sizeof(*wr) + sizeof(*req) + 2 * sizeof(*lock) +
- sizeof(*getreq);
- m = m_gethdr_nofail(wrlen);
- m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep));
- wr = mtod(m, struct work_request_hdr *);
- bzero(wr, wrlen);
-
- wr->wr_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS));
- m->m_pkthdr.len = m->m_len = wrlen;
-
- lock = (struct cpl_barrier *)(wr + 1);
- mk_cpl_barrier_ulp(lock);
-
- req = (struct cpl_set_tcb_field *)(lock + 1);
-
- CTR1(KTR_TCB, "t3_cancel_ddpbuf(bufidx=%u)", bufidx);
-
- /* Hmmm, not sure if this actually a good thing: reactivating
- * the other buffer might be an issue if it has been completed
- * already. However, that is unlikely, since the fact that the UBUF
- * is not completed indicates that there is no oustanding data.
- */
- if (bufidx == 0)
- mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS,
- V_TF_DDP_ACTIVE_BUF(1) |
- V_TF_DDP_BUF0_VALID(1),
- V_TF_DDP_ACTIVE_BUF(1));
- else
- mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS,
- V_TF_DDP_ACTIVE_BUF(1) |
- V_TF_DDP_BUF1_VALID(1), 0);
-
- getreq = (struct cpl_get_tcb *)(req + 1);
- mk_get_tcb_ulp(getreq, toep->tp_tid, toep->tp_qset);
-
- mk_cpl_barrier_ulp((struct cpl_barrier *)(getreq + 1));
-
- /* Keep track of the number of oustanding CPL_GET_TCB requests
- */
- p->get_tcb_count++;
-
-#ifdef T3_TRACE
- T3_TRACE1(TIDTB(so),
- "t3_cancel_ddpbuf: bufidx %u", bufidx);
-#endif
- cxgb_ofld_send(TOEP_T3C_DEV(toep), m);
+t3_init_cpl_io(struct adapter *sc)
+{
+ t3_register_cpl_handler(sc, CPL_ACT_ESTABLISH, do_act_establish);
+ t3_register_cpl_handler(sc, CPL_ACT_OPEN_RPL, do_act_open_rpl);
+ t3_register_cpl_handler(sc, CPL_RX_URG_NOTIFY, do_rx_urg_notify);
+ t3_register_cpl_handler(sc, CPL_RX_DATA, do_rx_data);
+ t3_register_cpl_handler(sc, CPL_TX_DMA_ACK, do_wr_ack);
+ t3_register_cpl_handler(sc, CPL_PEER_CLOSE, do_peer_close);
+ t3_register_cpl_handler(sc, CPL_ABORT_REQ_RSS, do_abort_req);
+ t3_register_cpl_handler(sc, CPL_ABORT_RPL_RSS, do_abort_rpl);
+ t3_register_cpl_handler(sc, CPL_CLOSE_CON_RPL, do_close_con_rpl);
+ t3_register_cpl_handler(sc, CPL_SMT_WRITE_RPL, do_smt_write_rpl);
+ t3_register_cpl_handler(sc, CPL_SET_TCB_RPL, do_set_tcb_rpl);
}
-
-/**
- * t3_overlay_ddpbuf - overlay an existing DDP buffer with a new one
- * @sk: the socket associated with the buffers
- * @bufidx: index of HW DDP buffer (0 or 1)
- * @tag0: new tag for HW buffer 0
- * @tag1: new tag for HW buffer 1
- * @len: new length for HW buf @bufidx
- *
- * Sends a compound WR to overlay a new DDP buffer on top of an existing
- * buffer by changing the buffer tag and length and setting the valid and
- * active flag accordingly. The caller must ensure the new buffer is at
- * least as big as the existing one. Since we typically reprogram both HW
- * buffers this function sets both tags for convenience. Read the TCB to
- * determine how made data was written into the buffer before the overlay
- * took place.
- */
-void
-t3_overlay_ddpbuf(struct toepcb *toep, unsigned int bufidx, unsigned int tag0,
- unsigned int tag1, unsigned int len)
-{
- unsigned int wrlen;
- struct mbuf *m;
- struct work_request_hdr *wr;
- struct cpl_get_tcb *getreq;
- struct cpl_set_tcb_field *req;
- struct ddp_state *p = &toep->tp_ddp_state;
-
- CTR4(KTR_TCB, "t3_setup_ppods(bufidx=%u tag0=%u tag1=%u len=%u)",
- bufidx, tag0, tag1, len);
-#if 0
- SOCKBUF_LOCK_ASSERT(&toeptoso(toep)->so_rcv);
-#endif
- wrlen = sizeof(*wr) + 3 * sizeof(*req) + sizeof(*getreq);
- m = m_gethdr_nofail(wrlen);
- m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep));
- wr = mtod(m, struct work_request_hdr *);
- m->m_pkthdr.len = m->m_len = wrlen;
- bzero(wr, wrlen);
-
-
- /* Set the ATOMIC flag to make sure that TP processes the following
- * CPLs in an atomic manner and no wire segments can be interleaved.
- */
- wr->wr_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS) | F_WR_ATOMIC);
- req = (struct cpl_set_tcb_field *)(wr + 1);
- mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF0_TAG,
- V_TCB_RX_DDP_BUF0_TAG(M_TCB_RX_DDP_BUF0_TAG) |
- V_TCB_RX_DDP_BUF1_TAG(M_TCB_RX_DDP_BUF1_TAG) << 32,
- V_TCB_RX_DDP_BUF0_TAG(tag0) |
- V_TCB_RX_DDP_BUF1_TAG((uint64_t)tag1) << 32);
- req++;
- if (bufidx == 0) {
- mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF0_LEN,
- V_TCB_RX_DDP_BUF0_LEN(M_TCB_RX_DDP_BUF0_LEN),
- V_TCB_RX_DDP_BUF0_LEN((uint64_t)len));
- req++;
- mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS,
- V_TF_DDP_PUSH_DISABLE_0(1) |
- V_TF_DDP_BUF0_VALID(1) | V_TF_DDP_ACTIVE_BUF(1),
- V_TF_DDP_PUSH_DISABLE_0(0) |
- V_TF_DDP_BUF0_VALID(1));
- } else {
- mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF1_LEN,
- V_TCB_RX_DDP_BUF1_LEN(M_TCB_RX_DDP_BUF1_LEN),
- V_TCB_RX_DDP_BUF1_LEN((uint64_t)len));
- req++;
- mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS,
- V_TF_DDP_PUSH_DISABLE_1(1) |
- V_TF_DDP_BUF1_VALID(1) | V_TF_DDP_ACTIVE_BUF(1),
- V_TF_DDP_PUSH_DISABLE_1(0) |
- V_TF_DDP_BUF1_VALID(1) | V_TF_DDP_ACTIVE_BUF(1));
- }
-
- getreq = (struct cpl_get_tcb *)(req + 1);
- mk_get_tcb_ulp(getreq, toep->tp_tid, toep->tp_qset);
-
- /* Keep track of the number of oustanding CPL_GET_TCB requests
- */
- p->get_tcb_count++;
-
-#ifdef T3_TRACE
- T3_TRACE4(TIDTB(sk),
- "t3_overlay_ddpbuf: bufidx %u tag0 %u tag1 %u "
- "len %d",
- bufidx, tag0, tag1, len);
-#endif
- cxgb_ofld_send(TOEP_T3C_DEV(toep), m);
-}
-
-/*
- * Sends a compound WR containing all the CPL messages needed to program the
- * two HW DDP buffers, namely optionally setting up the length and offset of
- * each buffer, programming the DDP flags, and optionally sending RX_DATA_ACK.
- */
-void
-t3_setup_ddpbufs(struct toepcb *toep, unsigned int len0, unsigned int offset0,
- unsigned int len1, unsigned int offset1,
- uint64_t ddp_flags, uint64_t flag_mask, int modulate)
-{
- unsigned int wrlen;
- struct mbuf *m;
- struct work_request_hdr *wr;
- struct cpl_set_tcb_field *req;
-
- CTR6(KTR_TCB, "t3_setup_ddpbufs(len0=%u offset0=%u len1=%u offset1=%u ddp_flags=0x%08x%08x ",
- len0, offset0, len1, offset1, ddp_flags >> 32, ddp_flags & 0xffffffff);
-
-#if 0
- SOCKBUF_LOCK_ASSERT(&toeptoso(toep)->so_rcv);
-#endif
- wrlen = sizeof(*wr) + sizeof(*req) + (len0 ? sizeof(*req) : 0) +
- (len1 ? sizeof(*req) : 0) +
- (modulate ? sizeof(struct cpl_rx_data_ack) : 0);
- m = m_gethdr_nofail(wrlen);
- m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep));
- wr = mtod(m, struct work_request_hdr *);
- bzero(wr, wrlen);
-
- wr->wr_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS));
- m->m_pkthdr.len = m->m_len = wrlen;
-
- req = (struct cpl_set_tcb_field *)(wr + 1);
- if (len0) { /* program buffer 0 offset and length */
- mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF0_OFFSET,
- V_TCB_RX_DDP_BUF0_OFFSET(M_TCB_RX_DDP_BUF0_OFFSET) |
- V_TCB_RX_DDP_BUF0_LEN(M_TCB_RX_DDP_BUF0_LEN),
- V_TCB_RX_DDP_BUF0_OFFSET((uint64_t)offset0) |
- V_TCB_RX_DDP_BUF0_LEN((uint64_t)len0));
- req++;
- }
- if (len1) { /* program buffer 1 offset and length */
- mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF1_OFFSET,
- V_TCB_RX_DDP_BUF1_OFFSET(M_TCB_RX_DDP_BUF1_OFFSET) |
- V_TCB_RX_DDP_BUF1_LEN(M_TCB_RX_DDP_BUF1_LEN) << 32,
- V_TCB_RX_DDP_BUF1_OFFSET((uint64_t)offset1) |
- V_TCB_RX_DDP_BUF1_LEN((uint64_t)len1) << 32);
- req++;
- }
-
- mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS, flag_mask,
- ddp_flags);
-
- if (modulate) {
- mk_rx_data_ack_ulp(toep,
- (struct cpl_rx_data_ack *)(req + 1), toep->tp_tid,
- toep->tp_copied_seq - toep->tp_rcv_wup);
- toep->tp_rcv_wup = toep->tp_copied_seq;
- }
-
-#ifdef T3_TRACE
- T3_TRACE5(TIDTB(sk),
- "t3_setup_ddpbufs: len0 %u len1 %u ddp_flags 0x%08x%08x "
- "modulate %d",
- len0, len1, ddp_flags >> 32, ddp_flags & 0xffffffff,
- modulate);
#endif
-
- cxgb_ofld_send(TOEP_T3C_DEV(toep), m);
-}
-
-void
-t3_init_wr_tab(unsigned int wr_len)
-{
- int i;
-
- if (mbuf_wrs[1]) /* already initialized */
- return;
-
- for (i = 1; i < ARRAY_SIZE(mbuf_wrs); i++) {
- int sgl_len = (3 * i) / 2 + (i & 1);
-
- sgl_len += 3;
- mbuf_wrs[i] = sgl_len <= wr_len ?
- 1 : 1 + (sgl_len - 2) / (wr_len - 1);
- }
-
- wrlen = wr_len * 8;
-}
-
-int
-t3_init_cpl_io(void)
-{
-#ifdef notyet
- tcphdr_skb = alloc_skb(sizeof(struct tcphdr), GFP_KERNEL);
- if (!tcphdr_skb) {
- log(LOG_ERR,
- "Chelsio TCP offload: can't allocate sk_buff\n");
- return -1;
- }
- skb_put(tcphdr_skb, sizeof(struct tcphdr));
- tcphdr_skb->h.raw = tcphdr_skb->data;
- memset(tcphdr_skb->data, 0, tcphdr_skb->len);
-#endif
-
- t3tom_register_cpl_handler(CPL_ACT_ESTABLISH, do_act_establish);
- t3tom_register_cpl_handler(CPL_ACT_OPEN_RPL, do_act_open_rpl);
- t3tom_register_cpl_handler(CPL_TX_DMA_ACK, do_wr_ack);
- t3tom_register_cpl_handler(CPL_RX_DATA, do_rx_data);
- t3tom_register_cpl_handler(CPL_CLOSE_CON_RPL, do_close_con_rpl);
- t3tom_register_cpl_handler(CPL_PEER_CLOSE, do_peer_close);
- t3tom_register_cpl_handler(CPL_PASS_ESTABLISH, do_pass_establish);
- t3tom_register_cpl_handler(CPL_PASS_ACCEPT_REQ, do_pass_accept_req);
- t3tom_register_cpl_handler(CPL_ABORT_REQ_RSS, do_abort_req);
- t3tom_register_cpl_handler(CPL_ABORT_RPL_RSS, do_abort_rpl);
- t3tom_register_cpl_handler(CPL_RX_DATA_DDP, do_rx_data_ddp);
- t3tom_register_cpl_handler(CPL_RX_DDP_COMPLETE, do_rx_ddp_complete);
- t3tom_register_cpl_handler(CPL_RX_URG_NOTIFY, do_rx_urg_notify);
- t3tom_register_cpl_handler(CPL_TRACE_PKT, do_trace_pkt);
- t3tom_register_cpl_handler(CPL_GET_TCB_RPL, do_get_tcb_rpl);
- return (0);
-}
-
diff --git a/sys/dev/cxgb/ulp/tom/cxgb_cpl_socket.c b/sys/dev/cxgb/ulp/tom/cxgb_cpl_socket.c
deleted file mode 100644
index bb0015f..0000000
--- a/sys/dev/cxgb/ulp/tom/cxgb_cpl_socket.c
+++ /dev/null
@@ -1,1034 +0,0 @@
-/**************************************************************************
-
-Copyright (c) 2007-2008, Chelsio Inc.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
- this list of conditions and the following disclaimer.
-
- 2. Neither the name of the Chelsio Corporation nor the names of its
- contributors may be used to endorse or promote products derived from
- this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-***************************************************************************/
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/types.h>
-#include <sys/fcntl.h>
-#include <sys/kernel.h>
-#include <sys/limits.h>
-#include <sys/lock.h>
-#include <sys/mbuf.h>
-#include <sys/condvar.h>
-#include <sys/mutex.h>
-#include <sys/proc.h>
-#include <sys/smp.h>
-#include <sys/sockstate.h>
-#include <sys/sockopt.h>
-#include <sys/socket.h>
-#include <sys/sockbuf.h>
-#include <sys/syslog.h>
-#include <sys/uio.h>
-#include <sys/file.h>
-
-#include <machine/bus.h>
-#include <machine/cpu.h>
-
-#include <net/if.h>
-#include <net/route.h>
-
-#include <netinet/in.h>
-#include <netinet/in_pcb.h>
-#include <netinet/in_systm.h>
-#include <netinet/in_var.h>
-
-#include <cxgb_osdep.h>
-#include <sys/mbufq.h>
-#include <ulp/tom/cxgb_tcp_offload.h>
-#include <netinet/tcp.h>
-#include <netinet/tcp_var.h>
-#include <netinet/tcp_fsm.h>
-#include <netinet/tcp_offload.h>
-#include <net/route.h>
-
-#include <t3cdev.h>
-#include <common/cxgb_firmware_exports.h>
-#include <common/cxgb_t3_cpl.h>
-#include <common/cxgb_tcb.h>
-#include <common/cxgb_ctl_defs.h>
-#include <cxgb_offload.h>
-
-#include <vm/vm.h>
-#include <vm/vm_page.h>
-#include <vm/vm_map.h>
-#include <vm/vm_extern.h>
-#include <vm/pmap.h>
-
-#include <sys/mvec.h>
-#include <ulp/toecore/cxgb_toedev.h>
-#include <ulp/tom/cxgb_defs.h>
-#include <ulp/tom/cxgb_tom.h>
-#include <ulp/tom/cxgb_t3_ddp.h>
-#include <ulp/tom/cxgb_toepcb.h>
-#include <ulp/tom/cxgb_tcp.h>
-
-
-static int (*pru_sosend)(struct socket *so, struct sockaddr *addr,
- struct uio *uio, struct mbuf *top, struct mbuf *control,
- int flags, struct thread *td);
-
-static int (*pru_soreceive)(struct socket *so, struct sockaddr **paddr,
- struct uio *uio, struct mbuf **mp0, struct mbuf **controlp,
- int *flagsp);
-
-#define TMP_IOV_MAX 16
-#ifndef PG_FRAME
-#define PG_FRAME ~PAGE_MASK
-#endif
-#define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK)
-
-void
-t3_init_socket_ops(void)
-{
- struct protosw *prp;
-
- prp = pffindtype(AF_INET, SOCK_STREAM);
- pru_sosend = prp->pr_usrreqs->pru_sosend;
- pru_soreceive = prp->pr_usrreqs->pru_soreceive;
-}
-
-struct cxgb_dma_info {
- size_t cdi_mapped;
- int cdi_nsegs;
- bus_dma_segment_t *cdi_segs;
-
-};
-
-static void
-cxgb_dma_callback(void *arg, bus_dma_segment_t *segs, int nsegs,
- bus_size_t mapsize, int error)
-{
- struct cxgb_dma_info *cdi = arg;
-
- cdi->cdi_mapped = mapsize;
- cdi->cdi_nsegs = nsegs;
- cdi->cdi_segs = segs;
-}
-
-static void
-iov_adj(struct iovec **iov, int *iovcnt, size_t count)
-{
- struct iovec *iovtmp;
- int iovcnttmp;
- caddr_t ptmp;
-
- if (count > 0) {
- iovtmp = *iov;
- iovcnttmp = *iovcnt;
- while (count > 0) {
- if (count < iovtmp->iov_len) {
- ptmp = iovtmp->iov_base;
- ptmp += count;
- iovtmp->iov_base = ptmp;
- iovtmp->iov_len -= count;
- break;
- } else
- count -= iovtmp->iov_len;
- iovtmp++;
- iovcnttmp--;
- }
- *iov = iovtmp;
- *iovcnt = iovcnttmp;
- } else if (count < 0) {
- iovtmp = &(*iov)[*iovcnt - 1];
- iovcnttmp = *iovcnt;
- while (count < 0) {
- if (-count < iovtmp->iov_len) {
- iovtmp->iov_len += count;
- break;
- } else
- count += iovtmp->iov_len;
- iovtmp--;
- iovcnttmp--;
- }
- *iovcnt = iovcnttmp;
- }
-}
-
-static void
-cxgb_zero_copy_free(void *cl, void *arg)
-{
- struct mbuf_vec *mv;
- struct mbuf *m = (struct mbuf *)cl;
-
- mv = mtomv(m);
- /*
- * Physical addresses, don't try to free should be unheld separately from sbdrop
- *
- */
- mv->mv_count = 0;
- m_free_iovec(m, m->m_type);
-}
-
-
-static int
-cxgb_hold_iovec_pages(struct uio *uio, vm_page_t *m, int *held, vm_prot_t prot)
-{
- struct iovec *iov = uio->uio_iov;
- int iovcnt = uio->uio_iovcnt;
- int err, i, count, totcount, maxcount, totbytes, npages, curbytes;
- uint64_t start, end;
- vm_page_t *mp;
- vm_map_t map;
-
- map = &uio->uio_td->td_proc->p_vmspace->vm_map;
- totbytes = totcount = 0;
- maxcount = *held;
-
- mp = m;
- for (totcount = i = 0; (i < iovcnt) && (totcount < maxcount); i++, iov++) {
- count = maxcount - totcount;
-
- start = (uintptr_t)iov->iov_base;
- end = (uintptr_t)((caddr_t)iov->iov_base + iov->iov_len);
- start &= PG_FRAME;
- end += PAGE_MASK;
- end &= PG_FRAME;
- npages = (end - start) >> PAGE_SHIFT;
-
- count = min(count, npages);
-
- /* The following return value is not used. XXX */
- err = vm_fault_quick_hold_pages(map,
- (vm_offset_t)iov->iov_base, iov->iov_len, prot, mp, count);
- mp += count;
- totcount += count;
- curbytes = iov->iov_len;
- if (count != npages)
- curbytes = count*PAGE_SIZE - (((uintptr_t)iov->iov_base)&PAGE_MASK);
- totbytes += curbytes;
- }
- uio->uio_resid -= totbytes;
-
- return (0);
-}
-
-/*
- * Returns whether a connection should enable DDP. This happens when all of
- * the following conditions are met:
- * - the connection's ULP mode is DDP
- * - DDP is not already enabled
- * - the last receive was above the DDP threshold
- * - receive buffers are in user space
- * - receive side isn't shutdown (handled by caller)
- * - the connection's receive window is big enough so that sizable buffers
- * can be posted without closing the window in the middle of DDP (checked
- * when the connection is offloaded)
- */
-static int
-so_should_ddp(const struct toepcb *toep, int last_recv_len)
-{
-
- DPRINTF("ulp_mode=%d last_recv_len=%d ddp_thresh=%d rcv_wnd=%ld ddp_copy_limit=%d\n",
- toep->tp_ulp_mode, last_recv_len, TOM_TUNABLE(toep->tp_toedev, ddp_thres),
- toep->tp_tp->rcv_wnd, (TOM_TUNABLE(toep->tp_toedev, ddp_copy_limit) + DDP_RSVD_WIN));
-
- return toep->tp_ulp_mode == ULP_MODE_TCPDDP && (toep->tp_ddp_state.kbuf[0] == NULL) &&
- last_recv_len > TOM_TUNABLE(toep->tp_toedev, ddp_thres) &&
- toep->tp_tp->rcv_wnd >
- (TOM_TUNABLE(toep->tp_toedev, ddp_copy_limit) + DDP_RSVD_WIN);
-}
-
-static inline int
-is_ddp(const struct mbuf *m)
-{
- return ((m->m_flags & M_DDP) != 0);
-}
-
-static inline int
-is_ddp_psh(const struct mbuf *m)
-{
- return ((is_ddp(m) && (m->m_pkthdr.csum_flags & DDP_BF_PSH)) != 0);
-}
-
-static int
-m_uiomove(const struct mbuf *m, int offset, int len, struct uio *uio)
-{
- int curlen, startlen, resid_init, err = 0;
- caddr_t buf;
-
- DPRINTF("m_uiomove(m=%p, offset=%d, len=%d, ...)\n",
- m, offset, len);
-
- startlen = len;
- resid_init = uio->uio_resid;
- while (m && len) {
- buf = mtod(m, caddr_t);
- curlen = m->m_len;
- if (offset && (offset < curlen)) {
- curlen -= offset;
- buf += offset;
- offset = 0;
- } else if (offset) {
- offset -= curlen;
- m = m->m_next;
- continue;
- }
- err = uiomove(buf, min(len, curlen), uio);
- if (err) {
- printf("uiomove returned %d\n", err);
- return (err);
- }
-
- len -= min(len, curlen);
- m = m->m_next;
- }
- DPRINTF("copied %d bytes - resid_init=%d uio_resid=%d\n",
- startlen - len, resid_init, uio->uio_resid);
- return (err);
-}
-
-/*
- * Copy data from an sk_buff to an iovec. Deals with RX_DATA, which carry the
- * data in the sk_buff body, and with RX_DATA_DDP, which place the data in a
- * DDP buffer.
- */
-static inline int
-copy_data(const struct mbuf *m, int offset, int len, struct uio *uio)
-{
- struct iovec *to = uio->uio_iov;
- int err;
-
- if (__predict_true(!is_ddp(m))) /* RX_DATA */
- return m_uiomove(m, offset, len, uio);
- if (__predict_true(m->m_ddp_flags & DDP_BF_NOCOPY)) { /* user DDP */
- to->iov_len -= len;
- to->iov_base = ((caddr_t)to->iov_base) + len;
- uio->uio_iov = to;
- uio->uio_resid -= len;
- return (0);
- }
- err = t3_ddp_copy(m, offset, uio, len); /* kernel DDP */
- return (err);
-}
-
-static void
-cxgb_wait_dma_completion(struct toepcb *toep)
-{
- struct rwlock *lock;
-
- lock = &toep->tp_tp->t_inpcb->inp_lock;
- inp_wlock(toep->tp_tp->t_inpcb);
- cv_wait_unlock(&toep->tp_cv, lock);
-}
-
-static int
-cxgb_vm_page_to_miov(struct toepcb *toep, struct uio *uio, struct mbuf **m)
-{
- int i, seg_count, err, type;
- struct mbuf *m0;
- struct cxgb_dma_info cdi;
- struct mbuf_vec *mv;
- struct mbuf_iovec *mi;
- bus_dma_segment_t *segs;
-
- err = bus_dmamap_load_uio(toep->tp_tx_dmat, toep->tp_dmamap, uio,
- cxgb_dma_callback, &cdi, 0);
-
- if (err)
- return (err);
- seg_count = cdi.cdi_nsegs;
- if ((m0 = mcl_alloc(seg_count, &type)) == NULL) {
- bus_dmamap_unload(toep->tp_tx_dmat, toep->tp_dmamap);
- return (ENOMEM);
- }
- segs = cdi.cdi_segs;
- m0->m_type = type;
- m0->m_flags = (M_EXT|M_NOFREE);
- m0->m_ext.ext_type = EXT_EXTREF;
- m0->m_ext.ext_free = cxgb_zero_copy_free;
-#if __FreeBSD_version >= 800016
- m0->m_ext.ext_arg1 = NULL; /* XXX: probably wrong /phk */
- m0->m_ext.ext_arg2 = NULL;
-#else
- m0->m_ext.ext_args = NULL;
-#endif
-
- mv = mtomv(m0);
- mv->mv_count = seg_count;
- mv->mv_first = 0;
- for (i = 0, mi = mv->mv_vec; i < seg_count; mi++, segs++, i++)
- mi_collapse_sge(mi, segs);
-
- *m = m0;
-
- /*
- * This appears to be a no-op at the moment
- * as busdma is all or nothing need to make
- * sure the tag values are large enough
- *
- */
- if (cdi.cdi_mapped < uio->uio_resid) {
- uio->uio_resid -= cdi.cdi_mapped;
- } else
- uio->uio_resid = 0;
-
- return (0);
-}
-
-static int
-t3_sosend(struct socket *so, struct uio *uio)
-{
- int rv, count, hold_resid, sent, iovcnt;
- struct iovec iovtmp[TMP_IOV_MAX], *iovtmpp, *iov;
- struct tcpcb *tp = so_sototcpcb(so);
- struct toepcb *toep = tp->t_toe;
- struct mbuf *m;
- struct uio uiotmp;
- struct sockbuf *snd;
-
- /*
- * Events requiring iteration:
- * - number of pages exceeds max hold pages for process or system
- * - number of pages exceeds maximum sg entries for a single WR
- *
- * We're limited to holding 128 pages at once - and we're limited to
- * 34 SG entries per work request, but each SG entry can be any number
- * of contiguous pages
- *
- */
-
- uiotmp = *uio;
- iovcnt = uio->uio_iovcnt;
- iov = uio->uio_iov;
- sent = 0;
- snd = so_sockbuf_snd(so);
-sendmore:
- /*
- * Make sure we don't exceed the socket buffer
- */
- count = min(toep->tp_page_count, (sockbuf_sbspace(snd) >> PAGE_SHIFT) + 2*PAGE_SIZE);
- rv = cxgb_hold_iovec_pages(&uiotmp, toep->tp_pages, &count, VM_PROT_READ);
- hold_resid = uiotmp.uio_resid;
- if (rv)
- return (rv);
-
- /*
- * Bump past sent and shave off the unheld amount
- */
- if (hold_resid > 0) {
- iovtmpp = iovtmp;
- memcpy(iovtmp, iov, iovcnt*sizeof(*iov));
- if (sent)
- iov_adj(&iovtmpp, &iovcnt, sent);
- iov_adj(&iovtmpp, &iovcnt, -hold_resid);
- uiotmp.uio_iov = iovtmpp;
- uiotmp.uio_iovcnt = iovcnt;
-
- }
- uiotmp.uio_resid = uio->uio_resid - hold_resid;
-
- /*
- * Push off all held pages
- *
- */
- while (uiotmp.uio_resid > 0) {
- rv = cxgb_vm_page_to_miov(toep, &uiotmp, &m);
- if (rv) {
- vm_page_unhold_pages(toep->tp_pages, count);
- return (rv);
- }
- uio->uio_resid -= m->m_pkthdr.len;
- sent += m->m_pkthdr.len;
- sbappend(snd, m);
- t3_push_frames(so, TRUE);
- iov_adj(&uiotmp.uio_iov, &iovcnt, uiotmp.uio_resid);
- }
-
- /*
- * Wait for pending I/O to be DMA'd to the card
- *
- */
- cxgb_wait_dma_completion(toep);
- vm_page_unhold_pages(toep->tp_pages, count);
- /*
- * If there is more data to send adjust local copy of iov
- * to point to teh start
- */
- if (hold_resid) {
- iovtmpp = iovtmp;
- memcpy(iovtmp, iov, iovcnt*sizeof(*iov));
- iov_adj(&iovtmpp, &iovcnt, sent);
- uiotmp = *uio;
- uiotmp.uio_iov = iovtmpp;
- uiotmp.uio_iovcnt = iovcnt;
- goto sendmore;
- }
-
- return (0);
-}
-
-static int
-cxgb_sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
- struct mbuf *top, struct mbuf *control, int flags, struct thread *td)
-{
- struct tcpcb *tp = so_sototcpcb(so);
- struct toedev *tdev;
- int zcopy_thres, zcopy_enabled, rv;
-
- /*
- * In order to use DMA direct from userspace the following
- * conditions must be met:
- * - the connection is currently offloaded
- * - ddp is enabled
- * - the number of bytes to be transferred exceeds the threshold
- * - the number of bytes currently in flight won't exceed the in-flight
- * threshold XXX TODO
- * - vm_fault_quick_hold_pages succeeds
- * - blocking socket XXX for now
- *
- */
- if (tp && tp->t_flags & TF_TOE) {
- struct toepcb *toep = tp->t_toe;
-
- tdev = toep->tp_toedev;
- zcopy_thres = TOM_TUNABLE(tdev, zcopy_sosend_partial_thres);
- zcopy_enabled = TOM_TUNABLE(tdev, zcopy_sosend_enabled);
-
- if (uio && (uio->uio_resid > zcopy_thres) &&
- (uio->uio_iovcnt < TMP_IOV_MAX) && ((so_state_get(so) & SS_NBIO) == 0)
- && zcopy_enabled) {
- rv = t3_sosend(so, uio);
- if (rv != EAGAIN)
- return (rv);
- }
- }
- return pru_sosend(so, addr, uio, top, control, flags, td);
-}
-
-/*
- * Following replacement or removal of the first mbuf on the first mbuf chain
- * of a socket buffer, push necessary state changes back into the socket
- * buffer so that other consumers see the values consistently. 'nextrecord'
- * is the callers locally stored value of the original value of
- * sb->sb_mb->m_nextpkt which must be restored when the lead mbuf changes.
- * NOTE: 'nextrecord' may be NULL.
- */
-static __inline void
-sockbuf_pushsync(struct sockbuf *sb, struct mbuf *nextrecord)
-{
- sockbuf_lock_assert(sb);
- /*
- * First, update for the new value of nextrecord. If necessary, make
- * it the first record.
- */
- if (sb->sb_mb != NULL)
- sb->sb_mb->m_nextpkt = nextrecord;
- else
- sb->sb_mb = nextrecord;
-
- /*
- * Now update any dependent socket buffer fields to reflect the new
- * state. This is an expanded inline of SB_EMPTY_FIXUP(), with the
- * addition of a second clause that takes care of the case where
- * sb_mb has been updated, but remains the last record.
- */
- if (sb->sb_mb == NULL) {
- sb->sb_mbtail = NULL;
- sb->sb_lastrecord = NULL;
- } else if (sb->sb_mb->m_nextpkt == NULL)
- sb->sb_lastrecord = sb->sb_mb;
-}
-
-#define IS_NONBLOCKING(so) (so_state_get(so) & SS_NBIO)
-
-static int
-t3_soreceive(struct socket *so, int *flagsp, struct uio *uio)
-{
- struct tcpcb *tp = so_sototcpcb(so);
- struct toepcb *toep = tp->t_toe;
- struct mbuf *m;
- uint32_t offset;
- int err, flags, avail, len, copied, copied_unacked;
- int target; /* Read at least this many bytes */
- int user_ddp_ok;
- struct ddp_state *p;
- struct inpcb *inp = so_sotoinpcb(so);
- int socket_state, socket_error;
- struct sockbuf *rcv;
-
- avail = offset = copied = copied_unacked = 0;
- flags = flagsp ? (*flagsp &~ MSG_EOR) : 0;
- rcv = so_sockbuf_rcv(so);
-
- err = sblock(rcv, SBLOCKWAIT(flags));
- p = &toep->tp_ddp_state;
-
- if (err)
- return (err);
-
- rcv = so_sockbuf_rcv(so);
- sockbuf_lock(rcv);
- if ((tp->t_flags & TF_TOE) == 0) {
- sockbuf_unlock(rcv);
- err = EAGAIN;
- goto done_unlocked;
- }
-
- p->user_ddp_pending = 0;
-restart:
- if ((tp->t_flags & TF_TOE) == 0) {
- sockbuf_unlock(rcv);
- err = EAGAIN;
- goto done_unlocked;
- }
-
- len = uio->uio_resid;
- m = rcv->sb_mb;
- target = (flags & MSG_WAITALL) ? len : rcv->sb_lowat;
- user_ddp_ok = p->ubuf_ddp_ready;
- p->cancel_ubuf = 0;
-
- if (len == 0)
- goto done;
- if (m)
- goto got_mbuf;
-
- /* empty receive queue */
- if (copied >= target && (rcv->sb_mb == NULL) &&
- !p->user_ddp_pending)
- goto done;
-
- socket_state = so_state_get(so);
- socket_error = so_error_get(so);
- rcv = so_sockbuf_rcv(so);
-
- if (copied) {
- if (socket_error || tp->t_state == TCPS_CLOSED ||
- (socket_state & (SS_ISDISCONNECTING|SS_ISDISCONNECTED)))
- goto done;
- } else {
- if (socket_state & SS_NOFDREF)
- goto done;
- if (socket_error) {
- err = socket_error;
- socket_error = 0;
- goto done;
- }
- if (rcv->sb_state & SBS_CANTRCVMORE)
- goto done;
- if (socket_state & (SS_ISDISCONNECTING|SS_ISDISCONNECTED))
- goto done;
- if (tp->t_state == TCPS_CLOSED) {
- err = ENOTCONN;
- goto done;
- }
- }
- if (rcv->sb_mb && !p->user_ddp_pending) {
- sockbuf_unlock(rcv);
- inp_wlock(inp);
- t3_cleanup_rbuf(tp, copied_unacked);
- inp_wunlock(inp);
- sockbuf_lock(rcv);
- copied_unacked = 0;
- goto restart;
- }
- if (p->kbuf[0] && user_ddp_ok && !p->user_ddp_pending &&
- uio->uio_iov->iov_len > p->kbuf[0]->dgl_length &&
- p->ubuf_ddp_ready) {
- p->user_ddp_pending =
- !t3_overlay_ubuf(toep, rcv, uio,
- IS_NONBLOCKING(so), flags, 1, 1);
- if (p->user_ddp_pending) {
- p->kbuf_posted++;
- user_ddp_ok = 0;
- }
- }
- if (p->kbuf[0] && (p->kbuf_posted == 0)) {
- t3_post_kbuf(toep, 1, IS_NONBLOCKING(so));
- p->kbuf_posted++;
- }
- if (p->user_ddp_pending) {
- /* One shot at DDP if we already have enough data */
- if (copied >= target)
- user_ddp_ok = 0;
-
- if (rcv->sb_state & SBS_CANTRCVMORE)
- goto done;
- CTR0(KTR_TOM, "ddp pending -- waiting");
- if ((err = sbwait(rcv)) != 0)
- goto done;
-//for timers to work await_ddp_completion(sk, flags, &timeo);
- } else if (copied >= target)
- goto done;
- else {
- if (copied_unacked) {
- int i = 0;
-
- sockbuf_unlock(rcv);
- inp_wlock(inp);
- t3_cleanup_rbuf(tp, copied_unacked);
- inp_wunlock(inp);
- copied_unacked = 0;
- if (mp_ncpus > 1)
- while (i++ < 200 && rcv->sb_mb == NULL)
- cpu_spinwait();
- sockbuf_lock(rcv);
- }
- if (rcv->sb_mb)
- goto restart;
-
- if (rcv->sb_state & SBS_CANTRCVMORE)
- goto done;
-
- CTR0(KTR_TOM, "no buffers -- waiting");
-
- if ((err = sbwait(rcv)) != 0)
- goto done;
- }
- goto restart;
-got_mbuf:
- /*
- * Adjust the mbuf seqno if it has already been partially processed by
- * soreceive_generic
- */
- if (m->m_pkthdr.len != m->m_len) {
- m->m_seq += m->m_pkthdr.len - m->m_len;
- m->m_pkthdr.len = m->m_len;
- }
-
- CTR6(KTR_TOM, "t3_soreceive: ddp_flags=0x%x m_len=%u resid=%u "
- "m_seq=0x%08x c_seq=0x%08x c_unack=%u",
- (is_ddp(m) ? m->m_ddp_flags : 0), m->m_pkthdr.len, len,
- m->m_seq, toep->tp_copied_seq, copied_unacked);
- KASSERT(((m->m_flags & M_EXT) && (m->m_ext.ext_type == EXT_EXTREF)) || !(m->m_flags & M_EXT),
- ("unexpected type M_EXT=%d ext_type=%d m_len=%d m_pktlen=%d\n", !!(m->m_flags & M_EXT),
- m->m_ext.ext_type, m->m_len, m->m_pkthdr.len));
- KASSERT(m->m_next != (struct mbuf *)0xffffffff, ("bad next value m_next=%p m_nextpkt=%p"
- " m_flags=0x%x m->m_len=%d", m->m_next, m->m_nextpkt, m->m_flags, m->m_len));
- if (m->m_pkthdr.len == 0) {
- if ((m->m_ddp_flags & DDP_BF_NOCOPY) == 0)
- panic("empty mbuf and NOCOPY not set\n");
- CTR0(KTR_TOM, "ddp done notification");
- p->user_ddp_pending = 0;
- sbdroprecord_locked(rcv);
- goto done;
- }
-
- KASSERT((int32_t)(toep->tp_copied_seq + copied_unacked - m->m_seq) >= 0,
- ("offset will go negative: offset=%d copied_seq=0x%08x copied_unacked=%d m_seq=0x%08x",
- offset, toep->tp_copied_seq, copied_unacked, m->m_seq));
- offset = toep->tp_copied_seq + copied_unacked - m->m_seq;
-
- if (offset >= m->m_pkthdr.len)
- panic("t3_soreceive: OFFSET >= LEN offset %d copied_seq 0x%x "
- "seq 0x%x pktlen %d ddp flags 0x%x", offset,
- toep->tp_copied_seq + copied_unacked, m->m_seq,
- m->m_pkthdr.len, m->m_ddp_flags);
-
- avail = m->m_pkthdr.len - offset;
- if (len < avail) {
- if (is_ddp(m) && (m->m_ddp_flags & DDP_BF_NOCOPY))
- panic("bad state in t3_soreceive len=%d avail=%d offset=%d\n", len, avail, offset);
- avail = len;
- rcv->sb_flags |= SB_IN_TOE;
- } else if (p->kbuf_posted == 0 && p->user_ddp_pending == 0)
- rcv->sb_flags &= ~SB_IN_TOE;
-
-#ifdef URGENT_DATA_SUPPORTED
- /*
- * Check if the data we are preparing to copy contains urgent
- * data. Either stop short of urgent data or skip it if it's
- * first and we are not delivering urgent data inline.
- */
- if (__predict_false(toep->tp_urg_data)) {
- uint32_t urg_offset = tp->rcv_up - tp->copied_seq + copied_unacked;
-
- if (urg_offset < avail) {
- if (urg_offset) {
- /* stop short of the urgent data */
- avail = urg_offset;
- } else if ((so_options_get(so) & SO_OOBINLINE) == 0) {
- /* First byte is urgent, skip */
- toep->tp_copied_seq++;
- offset++;
- avail--;
- if (!avail)
- goto skip_copy;
- }
- }
- }
-#endif
- if (is_ddp_psh(m) || offset || (rcv->sb_mb && !is_ddp(m))) {
- user_ddp_ok = 0;
-#ifdef T3_TRACE
- T3_TRACE0(TIDTB(so), "t3_sosend: PSH");
-#endif
- }
-
- if (user_ddp_ok && !p->user_ddp_pending &&
- uio->uio_iov->iov_len > p->kbuf[0]->dgl_length &&
- p->ubuf_ddp_ready) {
- p->user_ddp_pending =
- !t3_overlay_ubuf(toep, rcv, uio,
- IS_NONBLOCKING(so), flags, 1, 1);
- if (p->user_ddp_pending) {
- p->kbuf_posted++;
- user_ddp_ok = 0;
- }
- DPRINTF("user_ddp_pending=%d\n", p->user_ddp_pending);
- } else
- DPRINTF("user_ddp_ok=%d user_ddp_pending=%d iov_len=%ld dgl_length=%d ubuf_ddp_ready=%d ulp_mode=%d is_ddp(m)=%d flags=0x%x ubuf=%p kbuf_posted=%d\n",
- user_ddp_ok, p->user_ddp_pending, uio->uio_iov->iov_len, p->kbuf[0] ? p->kbuf[0]->dgl_length : 0,
- p->ubuf_ddp_ready, toep->tp_ulp_mode, !!is_ddp(m), m->m_ddp_flags, p->ubuf, p->kbuf_posted);
-
- /*
- * If MSG_TRUNC is specified the data is discarded.
- * XXX need to check pr_atomic
- */
- KASSERT(avail > 0, ("avail=%d resid=%d offset=%d", avail, uio->uio_resid, offset));
- if (__predict_true(!(flags & MSG_TRUNC))) {
- int resid = uio->uio_resid;
-
- sockbuf_unlock(rcv);
- if ((err = copy_data(m, offset, avail, uio))) {
- if (err)
- err = EFAULT;
- goto done_unlocked;
- }
-
- sockbuf_lock(rcv);
- if (avail != (resid - uio->uio_resid))
- printf("didn't copy all bytes :-/ avail=%d offset=%d pktlen=%d resid=%d uio_resid=%d copied=%d copied_unacked=%d is_ddp(m)=%d\n",
- avail, offset, m->m_pkthdr.len, resid, uio->uio_resid, copied, copied_unacked, is_ddp(m));
-
- if ((tp->t_flags & TF_TOE) == 0) {
- sockbuf_unlock(rcv);
- err = EAGAIN;
- goto done_unlocked;
- }
- }
-
- copied += avail;
- copied_unacked += avail;
- len -= avail;
-
-#ifdef URGENT_DATA_SUPPORTED
-skip_copy:
- if (tp->urg_data && after(tp->copied_seq + copied_unacked, tp->urg_seq))
- tp->urg_data = 0;
-#endif
- /*
- * If the buffer is fully consumed free it. If it's a DDP
- * buffer also handle any events it indicates.
- */
- if (avail + offset >= m->m_pkthdr.len) {
- unsigned int fl = m->m_ddp_flags;
- int exitnow, got_psh = 0, nomoredata = 0;
- int count;
- struct mbuf *nextrecord;
-
- if (p->kbuf[0] != NULL && is_ddp(m) && (fl & 1)) {
- if (is_ddp_psh(m) && p->user_ddp_pending)
- got_psh = 1;
-
- if (fl & DDP_BF_NOCOPY)
- p->user_ddp_pending = 0;
- else if ((fl & DDP_BF_NODATA) && IS_NONBLOCKING(so)) {
- p->kbuf_posted--;
- nomoredata = 1;
- } else {
- p->kbuf_posted--;
- p->ubuf_ddp_ready = 1;
- }
- }
-
- nextrecord = m->m_nextpkt;
- count = m->m_pkthdr.len;
- while (count > 0) {
- count -= m->m_len;
- KASSERT(((m->m_flags & M_EXT) && (m->m_ext.ext_type == EXT_EXTREF)) || !(m->m_flags & M_EXT), ("unexpected type M_EXT=%d ext_type=%d m_len=%d\n", !!(m->m_flags & M_EXT), m->m_ext.ext_type, m->m_len));
- CTR2(KTR_TOM, "freeing mbuf m_len = %d pktlen = %d", m->m_len, m->m_pkthdr.len);
- sbfree(rcv, m);
- rcv->sb_mb = m_free(m);
- m = rcv->sb_mb;
- }
- sockbuf_pushsync(rcv, nextrecord);
-#if 0
- sbdrop_locked(rcv, m->m_pkthdr.len);
-#endif
- exitnow = got_psh || nomoredata;
- if (copied >= target && (rcv->sb_mb == NULL) && exitnow)
- goto done;
- if (copied_unacked > (rcv->sb_hiwat >> 2)) {
- sockbuf_unlock(rcv);
- inp_wlock(inp);
- t3_cleanup_rbuf(tp, copied_unacked);
- inp_wunlock(inp);
- copied_unacked = 0;
- sockbuf_lock(rcv);
- }
- }
- if (len > 0)
- goto restart;
-
- done:
- if ((tp->t_flags & TF_TOE) == 0) {
- sockbuf_unlock(rcv);
- err = EAGAIN;
- goto done_unlocked;
- }
- /*
- * If we can still receive decide what to do in preparation for the
- * next receive. Note that RCV_SHUTDOWN is set if the connection
- * transitioned to CLOSE but not if it was in that state to begin with.
- */
- if (__predict_true((so_state_get(so) & (SS_ISDISCONNECTING|SS_ISDISCONNECTED)) == 0)) {
- if (p->user_ddp_pending) {
- user_ddp_ok = 0;
- t3_cancel_ubuf(toep, rcv);
- if (rcv->sb_mb) {
- if (copied < 0)
- copied = 0;
- if (len > 0)
- goto restart;
- }
- p->user_ddp_pending = 0;
- }
- if ((p->kbuf[0] != NULL) && (p->kbuf_posted == 0)) {
-#ifdef T3_TRACE
- T3_TRACE0(TIDTB(so),
- "chelsio_recvmsg: about to exit, repost kbuf");
-#endif
-
- t3_post_kbuf(toep, 1, IS_NONBLOCKING(so));
- p->kbuf_posted++;
- } else if (so_should_ddp(toep, copied) && uio->uio_iovcnt == 1) {
- CTR1(KTR_TOM ,"entering ddp on tid=%u", toep->tp_tid);
- if (!t3_enter_ddp(toep, TOM_TUNABLE(toep->tp_toedev,
- ddp_copy_limit), 0, IS_NONBLOCKING(so))) {
- rcv->sb_flags |= SB_IN_TOE;
- p->kbuf_posted = 1;
- }
-
- }
- }
-#ifdef T3_TRACE
- T3_TRACE5(TIDTB(so),
- "chelsio_recvmsg <-: copied %d len %d buffers_freed %d "
- "kbuf_posted %d user_ddp_pending %u",
- copied, len, buffers_freed, p ? p->kbuf_posted : -1,
- p->user_ddp_pending);
-#endif
- sockbuf_unlock(rcv);
-done_unlocked:
- if (copied_unacked && (tp->t_flags & TF_TOE)) {
- inp_wlock(inp);
- t3_cleanup_rbuf(tp, copied_unacked);
- inp_wunlock(inp);
- }
- sbunlock(rcv);
-
- return (err);
-}
-
-static int
-cxgb_soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio,
- struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
-{
- struct toedev *tdev;
- int rv, zcopy_thres, zcopy_enabled, flags;
- struct tcpcb *tp = so_sototcpcb(so);
- struct sockbuf *rcv = so_sockbuf_rcv(so);
-
- flags = flagsp ? *flagsp &~ MSG_EOR : 0;
-
- /*
- * In order to use DMA direct from userspace the following
- * conditions must be met:
- * - the connection is currently offloaded
- * - ddp is enabled
- * - the number of bytes to be transferred exceeds the threshold
- * - the number of bytes currently in flight won't exceed the in-flight
- * threshold XXX TODO
- * - vm_fault_quick_hold_pages succeeds
- * - blocking socket XXX for now
- * - iovcnt is 1
- *
- */
- if (tp && (tp->t_flags & TF_TOE) && uio && ((flags & (MSG_OOB|MSG_PEEK|MSG_DONTWAIT)) == 0)
- && (uio->uio_iovcnt == 1) && (mp0 == NULL) &&
- ((rcv->sb_flags & SB_IN_TOE) || (uio->uio_iovcnt == 1))) {
- struct toepcb *toep = tp->t_toe;
-
- tdev = toep->tp_toedev;
- zcopy_thres = TOM_TUNABLE(tdev, ddp_thres);
- zcopy_enabled = TOM_TUNABLE(tdev, ddp);
- if ((rcv->sb_flags & SB_IN_TOE) ||((uio->uio_resid > zcopy_thres) &&
- (uio->uio_iovcnt == 1) && zcopy_enabled)) {
- CTR4(KTR_TOM, "cxgb_soreceive: sb_flags=0x%x t_flags=0x%x flags=0x%x uio_resid=%d",
- rcv->sb_flags, tp->t_flags, flags, uio->uio_resid);
- rv = t3_soreceive(so, flagsp, uio);
- if (rv != EAGAIN)
- return (rv);
- else
- printf("returned EAGAIN\n");
- }
- } else if (tp && (tp->t_flags & TF_TOE) && uio && mp0 == NULL) {
- struct sockbuf *rcv = so_sockbuf_rcv(so);
-
- log(LOG_INFO, "skipping t3_soreceive flags=0x%x iovcnt=%d sb_state=0x%x\n",
- flags, uio->uio_iovcnt, rcv->sb_state);
- }
-
- return pru_soreceive(so, psa, uio, mp0, controlp, flagsp);
-}
-
-struct protosw cxgb_protosw;
-struct pr_usrreqs cxgb_tcp_usrreqs;
-
-void
-t3_install_socket_ops(struct socket *so)
-{
- static int copied = 0;
- struct pr_usrreqs *pru;
- struct protosw *psw;
-
- if (copied == 0) {
- psw = so_protosw_get(so);
- pru = psw->pr_usrreqs;
-
- bcopy(psw, &cxgb_protosw, sizeof(*psw));
- bcopy(pru, &cxgb_tcp_usrreqs, sizeof(*pru));
-
- cxgb_protosw.pr_ctloutput = t3_ctloutput;
- cxgb_protosw.pr_usrreqs = &cxgb_tcp_usrreqs;
- cxgb_tcp_usrreqs.pru_sosend = cxgb_sosend;
- cxgb_tcp_usrreqs.pru_soreceive = cxgb_soreceive;
- }
- so_protosw_set(so, &cxgb_protosw);
-
-#if 0
- so->so_proto->pr_usrreqs->pru_sosend = cxgb_sosend;
- so->so_proto->pr_usrreqs->pru_soreceive = cxgb_soreceive;
-#endif
-}
diff --git a/sys/dev/cxgb/ulp/tom/cxgb_ddp.c b/sys/dev/cxgb/ulp/tom/cxgb_ddp.c
deleted file mode 100644
index fe3b075..0000000
--- a/sys/dev/cxgb/ulp/tom/cxgb_ddp.c
+++ /dev/null
@@ -1,738 +0,0 @@
-/**************************************************************************
-
-Copyright (c) 2007-2008, Chelsio Inc.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
- this list of conditions and the following disclaimer.
-
- 2. Neither the name of the Chelsio Corporation nor the names of its
- contributors may be used to endorse or promote products derived from
- this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-***************************************************************************/
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/types.h>
-#include <sys/fcntl.h>
-#include <sys/kernel.h>
-#include <sys/ktr.h>
-#include <sys/limits.h>
-#include <sys/lock.h>
-#include <sys/mbuf.h>
-#include <sys/condvar.h>
-#include <sys/mutex.h>
-#include <sys/proc.h>
-#include <sys/sockstate.h>
-#include <sys/sockopt.h>
-#include <sys/socket.h>
-#include <sys/sockbuf.h>
-#include <sys/syslog.h>
-#include <sys/uio.h>
-
-#include <machine/bus.h>
-
-#include <net/if.h>
-#include <net/route.h>
-
-#include <netinet/in.h>
-#include <netinet/in_pcb.h>
-#include <netinet/in_systm.h>
-#include <netinet/in_var.h>
-
-
-#include <cxgb_osdep.h>
-#include <sys/mbufq.h>
-
-#include <ulp/tom/cxgb_tcp_offload.h>
-#include <netinet/tcp.h>
-#include <netinet/tcp_var.h>
-#include <netinet/tcp_fsm.h>
-#include <netinet/tcp_offload.h>
-#include <net/route.h>
-
-#include <t3cdev.h>
-#include <common/cxgb_firmware_exports.h>
-#include <common/cxgb_t3_cpl.h>
-#include <common/cxgb_tcb.h>
-#include <common/cxgb_ctl_defs.h>
-#include <cxgb_offload.h>
-
-#include <vm/vm.h>
-#include <vm/vm_page.h>
-#include <vm/vm_map.h>
-#include <vm/vm_extern.h>
-#include <vm/pmap.h>
-
-#include <sys/mvec.h>
-#include <ulp/toecore/cxgb_toedev.h>
-#include <ulp/tom/cxgb_defs.h>
-#include <ulp/tom/cxgb_tom.h>
-#include <ulp/tom/cxgb_t3_ddp.h>
-#include <ulp/tom/cxgb_toepcb.h>
-#include <ulp/tom/cxgb_tcp.h>
-
-
-#define MAX_SCHEDULE_TIMEOUT 300
-
-/*
- * Return the # of page pods needed to accommodate a # of pages.
- */
-static inline unsigned int
-pages2ppods(unsigned int pages)
-{
- return (pages + PPOD_PAGES - 1) / PPOD_PAGES + NUM_SENTINEL_PPODS;
-}
-
-/**
- * t3_pin_pages - pin a user memory range and prepare it for DDP
- * @addr - the starting address
- * @len - the length of the range
- * @newgl - contains the pages and physical addresses of the pinned range
- * @gl - an existing gather list, may be %NULL
- *
- * Pins the pages in the user-space memory range [addr, addr + len) and
- * maps them for DMA. Returns a gather list with the pinned pages and
- * their physical addresses. If @gl is non NULL the pages it describes
- * are compared against the pages for [addr, addr + len), and if the
- * existing gather list already covers the range a new list is not
- * allocated. Returns 0 on success, or a negative errno. On success if
- * a new gather list was allocated it is returned in @newgl.
- */
-static int
-t3_pin_pages(bus_dma_tag_t tag, bus_dmamap_t dmamap, vm_offset_t addr,
- size_t len, struct ddp_gather_list **newgl,
- const struct ddp_gather_list *gl)
-{
- int i = 0, err;
- size_t pg_off;
- unsigned int npages;
- struct ddp_gather_list *p;
- vm_map_t map;
-
- pg_off = addr & PAGE_MASK;
- npages = (pg_off + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
- p = malloc(sizeof(struct ddp_gather_list) + npages * sizeof(vm_page_t *),
- M_DEVBUF, M_NOWAIT|M_ZERO);
- if (p == NULL)
- return (ENOMEM);
-
- map = &curthread->td_proc->p_vmspace->vm_map;
- if (vm_fault_quick_hold_pages(map, addr, len, VM_PROT_READ |
- VM_PROT_WRITE, p->dgl_pages, npages) < 0) {
- err = EFAULT;
- goto free_gl;
- }
-
- if (gl && gl->dgl_offset == pg_off && gl->dgl_nelem >= npages &&
- gl->dgl_length >= len) {
- for (i = 0; i < npages; i++)
- if (p->dgl_pages[i] != gl->dgl_pages[i])
- goto different_gl;
- err = 0;
- goto unpin;
- }
-
-different_gl:
- p->dgl_length = len;
- p->dgl_offset = pg_off;
- p->dgl_nelem = npages;
-#ifdef NEED_BUSDMA
- p->phys_addr[0] = pci_map_page(pdev, p->pages[0], pg_off,
- PAGE_SIZE - pg_off,
- PCI_DMA_FROMDEVICE) - pg_off;
- for (i = 1; i < npages; ++i)
- p->phys_addr[i] = pci_map_page(pdev, p->pages[i], 0, PAGE_SIZE,
- PCI_DMA_FROMDEVICE);
-#endif
- *newgl = p;
- return (0);
-unpin:
- vm_page_unhold_pages(p->dgl_pages, npages);
-
-free_gl:
-
- free(p, M_DEVBUF);
- *newgl = NULL;
- return (err);
-}
-
-static void
-unmap_ddp_gl(const struct ddp_gather_list *gl)
-{
-#ifdef NEED_BUSDMA
- int i;
-
- if (!gl->nelem)
- return;
-
- pci_unmap_page(pdev, gl->phys_addr[0] + gl->offset,
- PAGE_SIZE - gl->offset, PCI_DMA_FROMDEVICE);
- for (i = 1; i < gl->nelem; ++i)
- pci_unmap_page(pdev, gl->phys_addr[i], PAGE_SIZE,
- PCI_DMA_FROMDEVICE);
-
-#endif
-}
-
-static void
-ddp_gl_free_pages(struct ddp_gather_list *gl, int dirty)
-{
- /*
- * XXX mark pages as dirty before unholding
- */
- vm_page_unhold_pages(gl->dgl_pages, gl->dgl_nelem);
-}
-
-void
-t3_free_ddp_gl(struct ddp_gather_list *gl)
-{
- unmap_ddp_gl(gl);
- ddp_gl_free_pages(gl, 0);
- free(gl, M_DEVBUF);
-}
-
-/* Max # of page pods for a buffer, enough for 1MB buffer at 4KB page size */
-#define MAX_PPODS 64U
-
-/*
- * Allocate page pods for DDP buffer 1 (the user buffer) and set up the tag in
- * the TCB. We allocate page pods in multiples of PPOD_CLUSTER_SIZE. First we
- * try to allocate enough page pods to accommodate the whole buffer, subject to
- * the MAX_PPODS limit. If that fails we try to allocate PPOD_CLUSTER_SIZE page
- * pods before failing entirely.
- */
-static int
-alloc_buf1_ppods(struct toepcb *toep, struct ddp_state *p,
- unsigned long addr, unsigned int len)
-{
- int err, tag, npages, nppods;
- struct tom_data *d = TOM_DATA(toep->tp_toedev);
-
-#if 0
- SOCKBUF_LOCK_ASSERT(&so->so_rcv);
-#endif
- npages = ((addr & PAGE_MASK) + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
- nppods = min(pages2ppods(npages), MAX_PPODS);
- nppods = roundup2(nppods, PPOD_CLUSTER_SIZE);
- err = t3_alloc_ppods(d, nppods, &tag);
- if (err && nppods > PPOD_CLUSTER_SIZE) {
- nppods = PPOD_CLUSTER_SIZE;
- err = t3_alloc_ppods(d, nppods, &tag);
- }
- if (err)
- return (ENOMEM);
-
- p->ubuf_nppods = nppods;
- p->ubuf_tag = tag;
-#if NUM_DDP_KBUF == 1
- t3_set_ddp_tag(toep, 1, tag << 6);
-#endif
- return (0);
-}
-
-/*
- * Starting offset for the user DDP buffer. A non-0 value ensures a DDP flush
- * won't block indefinitely if there's nothing to place (which should be rare).
- */
-#define UBUF_OFFSET 1
-
-static __inline unsigned long
-select_ddp_flags(const struct toepcb *toep, int buf_idx,
- int nonblock, int rcv_flags)
-{
- if (buf_idx == 1) {
- if (__predict_false(rcv_flags & MSG_WAITALL))
- return V_TF_DDP_PSH_NO_INVALIDATE0(1) |
- V_TF_DDP_PSH_NO_INVALIDATE1(1) |
- V_TF_DDP_PUSH_DISABLE_1(1);
- if (nonblock)
- return V_TF_DDP_BUF1_FLUSH(1);
-
- return V_TF_DDP_BUF1_FLUSH(!TOM_TUNABLE(toep->tp_toedev,
- ddp_push_wait));
- }
-
- if (__predict_false(rcv_flags & MSG_WAITALL))
- return V_TF_DDP_PSH_NO_INVALIDATE0(1) |
- V_TF_DDP_PSH_NO_INVALIDATE1(1) |
- V_TF_DDP_PUSH_DISABLE_0(1);
- if (nonblock)
- return V_TF_DDP_BUF0_FLUSH(1);
-
- return V_TF_DDP_BUF0_FLUSH(!TOM_TUNABLE(toep->tp_toedev, ddp_push_wait));
-}
-
-/*
- * Reposts the kernel DDP buffer after it has been previously become full and
- * invalidated. We just need to reset the offset and adjust the DDP flags.
- * Conveniently, we can set the flags and the offset with a single message.
- * Note that this function does not set the buffer length. Again conveniently
- * our kernel buffer is of fixed size. If the length needs to be changed it
- * needs to be done separately.
- */
-static void
-t3_repost_kbuf(struct toepcb *toep, unsigned int bufidx, int modulate,
- int activate, int nonblock)
-{
- struct ddp_state *p = &toep->tp_ddp_state;
- unsigned long flags;
-
-#if 0
- SOCKBUF_LOCK_ASSERT(&so->so_rcv);
-#endif
- p->buf_state[bufidx].cur_offset = p->kbuf[bufidx]->dgl_offset;
- p->buf_state[bufidx].flags = p->kbuf_noinval ? DDP_BF_NOINVAL : 0;
- p->buf_state[bufidx].gl = p->kbuf[bufidx];
- p->cur_buf = bufidx;
- p->kbuf_idx = bufidx;
-
- flags = select_ddp_flags(toep, bufidx, nonblock, 0);
- if (!bufidx)
- t3_setup_ddpbufs(toep, 0, 0, 0, 0, flags |
- V_TF_DDP_PSH_NO_INVALIDATE0(p->kbuf_noinval) |
- V_TF_DDP_PSH_NO_INVALIDATE1(p->kbuf_noinval) |
- V_TF_DDP_BUF0_VALID(1),
- V_TF_DDP_BUF0_FLUSH(1) |
- V_TF_DDP_PSH_NO_INVALIDATE0(1) |
- V_TF_DDP_PSH_NO_INVALIDATE1(1) | V_TF_DDP_OFF(1) |
- V_TF_DDP_BUF0_VALID(1) |
- V_TF_DDP_ACTIVE_BUF(activate), modulate);
- else
- t3_setup_ddpbufs(toep, 0, 0, 0, 0, flags |
- V_TF_DDP_PSH_NO_INVALIDATE0(p->kbuf_noinval) |
- V_TF_DDP_PSH_NO_INVALIDATE1(p->kbuf_noinval) |
- V_TF_DDP_BUF1_VALID(1) |
- V_TF_DDP_ACTIVE_BUF(activate),
- V_TF_DDP_BUF1_FLUSH(1) |
- V_TF_DDP_PSH_NO_INVALIDATE0(1) |
- V_TF_DDP_PSH_NO_INVALIDATE1(1) | V_TF_DDP_OFF(1) |
- V_TF_DDP_BUF1_VALID(1) | V_TF_DDP_ACTIVE_BUF(1),
- modulate);
-
-}
-
-/**
- * setup_uio_ppods - setup HW page pods for a user iovec
- * @sk: the associated socket
- * @uio: the uio
- * @oft: additional bytes to map before the start of the buffer
- *
- * Pins a user iovec and sets up HW page pods for DDP into it. We allocate
- * page pods for user buffers on the first call per socket. Afterwards we
- * limit the buffer length to whatever the existing page pods can accommodate.
- * Returns a negative error code or the length of the mapped buffer.
- *
- * The current implementation handles iovecs with only one entry.
- */
-static int
-setup_uio_ppods(struct toepcb *toep, const struct uio *uio, int oft, int *length)
-{
- int err;
- unsigned int len;
- struct ddp_gather_list *gl = NULL;
- struct ddp_state *p = &toep->tp_ddp_state;
- struct iovec *iov = uio->uio_iov;
- vm_offset_t addr = (vm_offset_t)iov->iov_base - oft;
-
-#ifdef notyet
- SOCKBUF_LOCK_ASSERT(&so->so_rcv);
-#endif
- if (__predict_false(p->ubuf_nppods == 0)) {
- err = alloc_buf1_ppods(toep, p, addr, iov->iov_len + oft);
- if (err)
- return (err);
- }
-
- len = (p->ubuf_nppods - NUM_SENTINEL_PPODS) * PPOD_PAGES * PAGE_SIZE;
- len -= addr & PAGE_MASK;
- if (len > M_TCB_RX_DDP_BUF0_LEN)
- len = M_TCB_RX_DDP_BUF0_LEN;
- len = min(len, toep->tp_tp->rcv_wnd - 32768);
- len = min(len, iov->iov_len + oft);
-
- if (len <= p->kbuf[0]->dgl_length) {
- printf("length too short\n");
- return (EINVAL);
- }
-
- err = t3_pin_pages(toep->tp_rx_dmat, toep->tp_dmamap, addr, len, &gl, p->ubuf);
- if (err)
- return (err);
- if (gl) {
- if (p->ubuf)
- t3_free_ddp_gl(p->ubuf);
- p->ubuf = gl;
- t3_setup_ppods(toep, gl, pages2ppods(gl->dgl_nelem), p->ubuf_tag, len,
- gl->dgl_offset, 0);
- }
- *length = len;
- return (0);
-}
-
-/*
- *
- */
-void
-t3_cancel_ubuf(struct toepcb *toep, struct sockbuf *rcv)
-{
- struct ddp_state *p = &toep->tp_ddp_state;
- int ubuf_pending = t3_ddp_ubuf_pending(toep);
- int err = 0, count = 0;
-
- if (p->ubuf == NULL)
- return;
-
- sockbuf_lock_assert(rcv);
-
- p->cancel_ubuf = 1;
- while (ubuf_pending && !(rcv->sb_state & SBS_CANTRCVMORE)) {
- CTR3(KTR_TOM,
- "t3_cancel_ubuf: flags0 0x%x flags1 0x%x get_tcb_count %d",
- p->buf_state[0].flags & (DDP_BF_NOFLIP | DDP_BF_NOCOPY),
- p->buf_state[1].flags & (DDP_BF_NOFLIP | DDP_BF_NOCOPY),
- p->get_tcb_count);
- if (p->get_tcb_count == 0)
- t3_cancel_ddpbuf(toep, p->cur_buf);
- else
- CTR5(KTR_TOM, "waiting err=%d get_tcb_count=%d timeo=%d rcv=%p SBS_CANTRCVMORE=%d",
- err, p->get_tcb_count, rcv->sb_timeo, rcv,
- !!(rcv->sb_state & SBS_CANTRCVMORE));
-
- while (p->get_tcb_count && !(rcv->sb_state & SBS_CANTRCVMORE)) {
- if (count & 0xfffffff)
- CTR5(KTR_TOM, "waiting err=%d get_tcb_count=%d timeo=%d rcv=%p count=%d",
- err, p->get_tcb_count, rcv->sb_timeo, rcv, count);
- count++;
- err = sbwait(rcv);
- }
- ubuf_pending = t3_ddp_ubuf_pending(toep);
- }
- p->cancel_ubuf = 0;
- p->user_ddp_pending = 0;
-
-}
-
-#define OVERLAY_MASK (V_TF_DDP_PSH_NO_INVALIDATE0(1) | \
- V_TF_DDP_PSH_NO_INVALIDATE1(1) | \
- V_TF_DDP_BUF1_FLUSH(1) | \
- V_TF_DDP_BUF0_FLUSH(1) | \
- V_TF_DDP_PUSH_DISABLE_1(1) | \
- V_TF_DDP_PUSH_DISABLE_0(1) | \
- V_TF_DDP_INDICATE_OUT(1))
-
-/*
- * Post a user buffer as an overlay on top of the current kernel buffer.
- */
-int
-t3_overlay_ubuf(struct toepcb *toep, struct sockbuf *rcv,
- const struct uio *uio, int nonblock, int rcv_flags,
- int modulate, int post_kbuf)
-{
- int err, len, ubuf_idx;
- unsigned long flags;
- struct ddp_state *p = &toep->tp_ddp_state;
-
- if (p->kbuf[0] == NULL) {
- return (EINVAL);
- }
- sockbuf_unlock(rcv);
- err = setup_uio_ppods(toep, uio, 0, &len);
- sockbuf_lock(rcv);
- if (err)
- return (err);
-
- if ((rcv->sb_state & SBS_CANTRCVMORE) ||
- (toep->tp_tp->t_flags & TF_TOE) == 0)
- return (EINVAL);
-
- ubuf_idx = p->kbuf_idx;
- p->buf_state[ubuf_idx].flags = DDP_BF_NOFLIP;
- /* Use existing offset */
- /* Don't need to update .gl, user buffer isn't copied. */
- p->cur_buf = ubuf_idx;
-
- flags = select_ddp_flags(toep, ubuf_idx, nonblock, rcv_flags);
-
- if (post_kbuf) {
- struct ddp_buf_state *dbs = &p->buf_state[ubuf_idx ^ 1];
-
- dbs->cur_offset = 0;
- dbs->flags = 0;
- dbs->gl = p->kbuf[ubuf_idx ^ 1];
- p->kbuf_idx ^= 1;
- flags |= p->kbuf_idx ?
- V_TF_DDP_BUF1_VALID(1) | V_TF_DDP_PUSH_DISABLE_1(0) :
- V_TF_DDP_BUF0_VALID(1) | V_TF_DDP_PUSH_DISABLE_0(0);
- }
-
- if (ubuf_idx == 0) {
- t3_overlay_ddpbuf(toep, 0, p->ubuf_tag << 6, p->kbuf_tag[1] << 6,
- len);
- t3_setup_ddpbufs(toep, 0, 0, p->kbuf[1]->dgl_length, 0,
- flags,
- OVERLAY_MASK | flags, 1);
- } else {
- t3_overlay_ddpbuf(toep, 1, p->kbuf_tag[0] << 6, p->ubuf_tag << 6,
- len);
- t3_setup_ddpbufs(toep, p->kbuf[0]->dgl_length, 0, 0, 0,
- flags,
- OVERLAY_MASK | flags, 1);
- }
-#ifdef T3_TRACE
- T3_TRACE5(TIDTB(so),
- "t3_overlay_ubuf: tag %u flags 0x%x mask 0x%x ubuf_idx %d "
- " kbuf_idx %d",
- p->ubuf_tag, flags, OVERLAY_MASK, ubuf_idx, p->kbuf_idx);
-#endif
- CTR3(KTR_TOM,
- "t3_overlay_ubuf: tag %u flags 0x%x mask 0x%x",
- p->ubuf_tag, flags, OVERLAY_MASK);
- CTR3(KTR_TOM,
- "t3_overlay_ubuf: ubuf_idx %d kbuf_idx %d post_kbuf %d",
- ubuf_idx, p->kbuf_idx, post_kbuf);
-
- return (0);
-}
-
-/*
- * Clean up DDP state that needs to survive until socket close time, such as the
- * DDP buffers. The buffers are already unmapped at this point as unmapping
- * needs the PCI device and a socket may close long after the device is removed.
- */
-void
-t3_cleanup_ddp(struct toepcb *toep)
-{
- struct ddp_state *p = &toep->tp_ddp_state;
- int idx;
-
- for (idx = 0; idx < NUM_DDP_KBUF; idx++)
- if (p->kbuf[idx]) {
- ddp_gl_free_pages(p->kbuf[idx], 0);
- free(p->kbuf[idx], M_DEVBUF);
- }
- if (p->ubuf) {
- ddp_gl_free_pages(p->ubuf, 0);
- free(p->ubuf, M_DEVBUF);
- p->ubuf = NULL;
- }
- toep->tp_ulp_mode = 0;
-}
-
-/*
- * This is a companion to t3_cleanup_ddp() and releases the HW resources
- * associated with a connection's DDP state, such as the page pods.
- * It's called when HW is done with a connection. The rest of the state
- * remains available until both HW and the app are done with the connection.
- */
-void
-t3_release_ddp_resources(struct toepcb *toep)
-{
- struct ddp_state *p = &toep->tp_ddp_state;
- struct tom_data *d = TOM_DATA(toep->tp_toedev);
- int idx;
-
- for (idx = 0; idx < NUM_DDP_KBUF; idx++) {
- t3_free_ppods(d, p->kbuf_tag[idx],
- p->kbuf_nppods[idx]);
- unmap_ddp_gl(p->kbuf[idx]);
- }
-
- if (p->ubuf_nppods) {
- t3_free_ppods(d, p->ubuf_tag, p->ubuf_nppods);
- p->ubuf_nppods = 0;
- }
- if (p->ubuf)
- unmap_ddp_gl(p->ubuf);
-
-}
-
-void
-t3_post_kbuf(struct toepcb *toep, int modulate, int nonblock)
-{
- struct ddp_state *p = &toep->tp_ddp_state;
-
- t3_set_ddp_tag(toep, p->cur_buf, p->kbuf_tag[p->cur_buf] << 6);
- t3_set_ddp_buf(toep, p->cur_buf, 0, p->kbuf[p->cur_buf]->dgl_length);
- t3_repost_kbuf(toep, p->cur_buf, modulate, 1, nonblock);
-#ifdef T3_TRACE
- T3_TRACE1(TIDTB(so),
- "t3_post_kbuf: cur_buf = kbuf_idx = %u ", p->cur_buf);
-#endif
- CTR1(KTR_TOM,
- "t3_post_kbuf: cur_buf = kbuf_idx = %u ", p->cur_buf);
-}
-
-/*
- * Prepare a socket for DDP. Must be called when the socket is known to be
- * open.
- */
-int
-t3_enter_ddp(struct toepcb *toep, unsigned int kbuf_size, unsigned int waitall, int nonblock)
-{
- int i, err = ENOMEM;
- static vm_pindex_t color;
- unsigned int nppods, kbuf_pages, idx = 0;
- struct ddp_state *p = &toep->tp_ddp_state;
- struct tom_data *d = TOM_DATA(toep->tp_toedev);
-
-
- if (kbuf_size > M_TCB_RX_DDP_BUF0_LEN)
- return (EINVAL);
-
-#ifdef notyet
- SOCKBUF_LOCK_ASSERT(&so->so_rcv);
-#endif
- kbuf_pages = (kbuf_size + PAGE_SIZE - 1) >> PAGE_SHIFT;
- nppods = pages2ppods(kbuf_pages);
-
- p->kbuf_noinval = !!waitall;
- p->kbuf_tag[NUM_DDP_KBUF - 1] = -1;
- for (idx = 0; idx < NUM_DDP_KBUF; idx++) {
- p->kbuf[idx] =
- malloc(sizeof (struct ddp_gather_list) + kbuf_pages *
- sizeof(vm_page_t *), M_DEVBUF, M_NOWAIT|M_ZERO);
- if (p->kbuf[idx] == NULL)
- goto err;
- err = t3_alloc_ppods(d, nppods, &p->kbuf_tag[idx]);
- if (err) {
- printf("t3_alloc_ppods failed err=%d\n", err);
- goto err;
- }
-
- p->kbuf_nppods[idx] = nppods;
- p->kbuf[idx]->dgl_length = kbuf_size;
- p->kbuf[idx]->dgl_offset = 0;
- p->kbuf[idx]->dgl_nelem = kbuf_pages;
-
- for (i = 0; i < kbuf_pages; ++i) {
- p->kbuf[idx]->dgl_pages[i] = vm_page_alloc(NULL, color,
- VM_ALLOC_NOOBJ | VM_ALLOC_NORMAL | VM_ALLOC_WIRED |
- VM_ALLOC_ZERO);
- if (p->kbuf[idx]->dgl_pages[i] == NULL) {
- p->kbuf[idx]->dgl_nelem = i;
- printf("failed to allocate kbuf pages\n");
- goto err;
- }
- }
-#ifdef NEED_BUSDMA
- /*
- * XXX we'll need this for VT-d or any platform with an iommu :-/
- *
- */
- for (i = 0; i < kbuf_pages; ++i)
- p->kbuf[idx]->phys_addr[i] =
- pci_map_page(p->pdev, p->kbuf[idx]->pages[i],
- 0, PAGE_SIZE, PCI_DMA_FROMDEVICE);
-#endif
- t3_setup_ppods(toep, p->kbuf[idx], nppods, p->kbuf_tag[idx],
- p->kbuf[idx]->dgl_length, 0, 0);
- }
- cxgb_log_tcb(TOEP_T3C_DEV(toep)->adapter, toep->tp_tid);
-
- t3_set_ddp_tag(toep, 0, p->kbuf_tag[0] << 6);
- t3_set_ddp_buf(toep, 0, 0, p->kbuf[0]->dgl_length);
- t3_repost_kbuf(toep, 0, 0, 1, nonblock);
-
- t3_set_rcv_coalesce_enable(toep,
- TOM_TUNABLE(toep->tp_toedev, ddp_rcvcoalesce));
- t3_set_dack_mss(toep, TOM_TUNABLE(toep->tp_toedev, delack)>>1);
-
-#ifdef T3_TRACE
- T3_TRACE4(TIDTB(so),
- "t3_enter_ddp: kbuf_size %u waitall %u tag0 %d tag1 %d",
- kbuf_size, waitall, p->kbuf_tag[0], p->kbuf_tag[1]);
-#endif
- CTR4(KTR_TOM,
- "t3_enter_ddp: kbuf_size %u waitall %u tag0 %d tag1 %d",
- kbuf_size, waitall, p->kbuf_tag[0], p->kbuf_tag[1]);
- cxgb_log_tcb(TOEP_T3C_DEV(toep)->adapter, toep->tp_tid);
- return (0);
-
-err:
- t3_release_ddp_resources(toep);
- t3_cleanup_ddp(toep);
- return (err);
-}
-
-int
-t3_ddp_copy(const struct mbuf *m, int offset, struct uio *uio, int len)
-{
- int resid_init, err;
- struct ddp_gather_list *gl = (struct ddp_gather_list *)m->m_ddp_gl;
-
- resid_init = uio->uio_resid;
-
- if (!gl->dgl_pages)
- panic("pages not set\n");
-
- CTR4(KTR_TOM, "t3_ddp_copy: offset=%d dgl_offset=%d cur_offset=%d len=%d",
- offset, gl->dgl_offset, m->m_cur_offset, len);
- offset += gl->dgl_offset + m->m_cur_offset;
- KASSERT(len <= gl->dgl_length,
- ("len=%d > dgl_length=%d in ddp_copy\n", len, gl->dgl_length));
-
-
- err = uiomove_fromphys(gl->dgl_pages, offset, len, uio);
- return (err);
-}
-
-
-/*
- * Allocate n page pods. Returns -1 on failure or the page pod tag.
- */
-int
-t3_alloc_ppods(struct tom_data *td, unsigned int n, int *ptag)
-{
- unsigned int i, j;
-
- if (__predict_false(!td->ppod_map)) {
- printf("ppod_map not set\n");
- return (EINVAL);
- }
-
- mtx_lock(&td->ppod_map_lock);
- for (i = 0; i < td->nppods; ) {
-
- for (j = 0; j < n; ++j) /* scan ppod_map[i..i+n-1] */
- if (td->ppod_map[i + j]) {
- i = i + j + 1;
- goto next;
- }
- memset(&td->ppod_map[i], 1, n); /* allocate range */
- mtx_unlock(&td->ppod_map_lock);
- CTR2(KTR_TOM,
- "t3_alloc_ppods: n=%u tag=%u", n, i);
- *ptag = i;
- return (0);
- next: ;
- }
- mtx_unlock(&td->ppod_map_lock);
- return (0);
-}
-
-void
-t3_free_ppods(struct tom_data *td, unsigned int tag, unsigned int n)
-{
- /* No need to take ppod_lock here */
- memset(&td->ppod_map[tag], 0, n);
-}
diff --git a/sys/dev/cxgb/ulp/tom/cxgb_defs.h b/sys/dev/cxgb/ulp/tom/cxgb_defs.h
deleted file mode 100644
index 758f024..0000000
--- a/sys/dev/cxgb/ulp/tom/cxgb_defs.h
+++ /dev/null
@@ -1,91 +0,0 @@
-
-/**************************************************************************
-
-Copyright (c) 2007, Chelsio Inc.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
- this list of conditions and the following disclaimer.
-
- 2. Neither the name of the Chelsio Corporation nor the names of its
- contributors may be used to endorse or promote products derived from
- this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-
-$FreeBSD$
-
-***************************************************************************/
-#ifndef CXGB_DEFS_H_
-#define CXGB_DEFS_H_
-
-#define VALIDATE_TID 0
-
-#define TOEPCB(so) ((struct toepcb *)(sototcpcb((so))->t_toe))
-#define TOE_DEV(so) (TOEPCB((so))->tp_toedev)
-#define toeptoso(toep) ((toep)->tp_tp->t_inpcb->inp_socket)
-#define sototoep(so) (sototcpcb((so))->t_toe)
-
-#define TRACE_ENTER printf("%s:%s entered\n", __FUNCTION__, __FILE__)
-#define TRACE_EXIT printf("%s:%s:%d exited\n", __FUNCTION__, __FILE__, __LINE__)
-
-#define KTR_TOM KTR_SPARE2
-#define KTR_TCB KTR_SPARE3
-
-struct toepcb;
-struct listen_ctx;
-
-void cxgb_log_tcb(struct adapter *sc, unsigned int tid);
-typedef void (*defer_handler_t)(struct toedev *dev, struct mbuf *m);
-
-void t3tom_register_cpl_handler(unsigned int opcode, cxgb_cpl_handler_func h);
-void t3_listen_start(struct toedev *dev, struct socket *so, struct t3cdev *cdev);
-void t3_listen_stop(struct toedev *dev, struct socket *so, struct t3cdev *cdev);
-int t3_push_frames(struct socket *so, int req_completion);
-int t3_connect(struct toedev *tdev, struct socket *so, struct rtentry *rt,
- struct sockaddr *nam);
-void t3_init_listen_cpl_handlers(void);
-int t3_init_cpl_io(void);
-void t3_init_wr_tab(unsigned int wr_len);
-uint32_t t3_send_rx_credits(struct tcpcb *tp, uint32_t credits, uint32_t dack, int nofail);
-void t3_send_rx_modulate(struct toepcb *toep);
-void t3_cleanup_rbuf(struct tcpcb *tp, int copied);
-
-void t3_init_socket_ops(void);
-void t3_install_socket_ops(struct socket *so);
-
-
-void t3_disconnect_acceptq(struct socket *listen_so);
-void t3_reset_synq(struct listen_ctx *ctx);
-void t3_defer_reply(struct mbuf *m, struct toedev *dev, defer_handler_t handler);
-
-struct toepcb *toepcb_alloc(void);
-void toepcb_hold(struct toepcb *);
-void toepcb_release(struct toepcb *);
-void toepcb_init(struct toepcb *);
-
-void t3_set_rcv_coalesce_enable(struct toepcb *toep, int on_off);
-void t3_set_dack_mss(struct toepcb *toep, int on);
-void t3_set_keepalive(struct toepcb *toep, int on_off);
-void t3_set_ddp_tag(struct toepcb *toep, int buf_idx, unsigned int tag);
-void t3_set_ddp_buf(struct toepcb *toep, int buf_idx, unsigned int offset,
- unsigned int len);
-int t3_get_tcb(struct toepcb *toep);
-
-int t3_ctloutput(struct socket *so, struct sockopt *sopt);
-
-#endif
diff --git a/sys/dev/cxgb/ulp/tom/cxgb_l2t.c b/sys/dev/cxgb/ulp/tom/cxgb_l2t.c
index 2484923..4352f33 100644
--- a/sys/dev/cxgb/ulp/tom/cxgb_l2t.c
+++ b/sys/dev/cxgb/ulp/tom/cxgb_l2t.c
@@ -1,76 +1,61 @@
-/**************************************************************************
-
-Copyright (c) 2007, Chelsio Inc.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
- this list of conditions and the following disclaimer.
-
- 2. Neither the name of the Chelsio Corporation nor the names of its
- contributors may be used to endorse or promote products derived from
- this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-***************************************************************************/
+/*-
+ * Copyright (c) 2012 Chelsio Communications, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#include "opt_inet.h"
+
+#ifdef TCP_OFFLOAD
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/module.h>
#include <sys/bus.h>
-#include <sys/lock.h>
-#include <sys/mutex.h>
-#if __FreeBSD_version > 700000
-#include <sys/rwlock.h>
-#endif
-
#include <sys/socket.h>
#include <net/if.h>
#include <net/ethernet.h>
#include <net/if_vlan_var.h>
-#include <net/if_dl.h>
-#include <net/route.h>
#include <netinet/in.h>
-#include <netinet/if_ether.h>
+#include <netinet/toecore.h>
-#include <cxgb_include.h>
-#include <ulp/tom/cxgb_l2t.h>
+#include "cxgb_include.h"
+#include "ulp/tom/cxgb_tom.h"
+#include "ulp/tom/cxgb_l2t.h"
-#define VLAN_NONE 0xfff
-#define SDL(s) ((struct sockaddr_dl *)s)
-#define RT_ENADDR(sa) ((u_char *)LLADDR(SDL((sa))))
-#define rt_expire rt_rmx.rmx_expire
-
-struct llinfo_arp {
- struct callout la_timer;
- struct rtentry *la_rt;
- struct mbuf *la_hold; /* last packet until resolved/timeout */
- u_short la_preempt; /* countdown for pre-expiry arps */
- u_short la_asked; /* # requests sent */
-};
+#define VLAN_NONE 0xfff
+#define SA(x) ((struct sockaddr *)(x))
+#define SIN(x) ((struct sockaddr_in *)(x))
+#define SINADDR(x) (SIN(x)->sin_addr.s_addr)
/*
* Module locking notes: There is a RW lock protecting the L2 table as a
- * whole plus a spinlock per L2T entry. Entry lookups and allocations happen
+ * whole plus a mutex per L2T entry. Entry lookups and allocations happen
* under the protection of the table lock, individual entry changes happen
- * while holding that entry's spinlock. The table lock nests outside the
+ * while holding that entry's mutex. The table lock nests outside the
* entry locks. Allocations of new entries take the table lock as writers so
* no other lookups can happen while allocating new entries. Entry updates
* take the table lock as readers so multiple entries can be updated in
@@ -78,72 +63,60 @@ struct llinfo_arp {
* and therefore can happen in parallel with entry allocation but no entry
* can change state or increment its ref count during allocation as both of
* these perform lookups.
+ *
+ * When acquiring multiple locks, the order is llentry -> L2 table -> L2 entry.
*/
static inline unsigned int
-vlan_prio(const struct l2t_entry *e)
-{
- return e->vlan >> 13;
-}
-
-static inline unsigned int
arp_hash(u32 key, int ifindex, const struct l2t_data *d)
{
return jhash_2words(key, ifindex, 0) & (d->nentries - 1);
}
-static inline void
-neigh_replace(struct l2t_entry *e, struct llentry *neigh)
-{
- LLE_WLOCK(neigh);
- LLE_ADDREF(neigh);
- LLE_WUNLOCK(neigh);
-
- if (e->neigh)
- LLE_FREE(e->neigh);
- e->neigh = neigh;
-}
-
/*
- * Set up an L2T entry and send any packets waiting in the arp queue. The
- * supplied mbuf is used for the CPL_L2T_WRITE_REQ. Must be called with the
- * entry locked.
+ * Set up an L2T entry and send any packets waiting in the arp queue. Must be
+ * called with the entry locked.
*/
static int
-setup_l2e_send_pending(struct t3cdev *dev, struct mbuf *m,
- struct l2t_entry *e)
+setup_l2e_send_pending(struct adapter *sc, struct l2t_entry *e)
{
+ struct mbuf *m;
struct cpl_l2t_write_req *req;
+ struct port_info *pi = &sc->port[e->smt_idx]; /* smt_idx is port_id */
+
+ mtx_assert(&e->lock, MA_OWNED);
- if (!m) {
- if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
- return (ENOMEM);
+ m = M_GETHDR_OFLD(pi->first_qset, CPL_PRIORITY_CONTROL, req);
+ if (m == NULL) {
+ log(LOG_ERR, "%s: no mbuf, can't setup L2 entry at index %d\n",
+ __func__, e->idx);
+ return (ENOMEM);
}
- /*
- * XXX MH_ALIGN
- */
- req = mtod(m, struct cpl_l2t_write_req *);
- m->m_pkthdr.len = m->m_len = sizeof(*req);
-
- req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
+
+ req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, e->idx));
req->params = htonl(V_L2T_W_IDX(e->idx) | V_L2T_W_IFF(e->smt_idx) |
- V_L2T_W_VLAN(e->vlan & EVL_VLID_MASK) |
- V_L2T_W_PRIO(vlan_prio(e)));
-
+ V_L2T_W_VLAN(e->vlan & EVL_VLID_MASK) |
+ V_L2T_W_PRIO(EVL_PRIOFTAG(e->vlan)));
memcpy(req->dst_mac, e->dmac, sizeof(req->dst_mac));
- m_set_priority(m, CPL_PRIORITY_CONTROL);
- cxgb_ofld_send(dev, m);
+
+ t3_offload_tx(sc, m);
+
+ /*
+ * XXX: We used pi->first_qset to send the L2T_WRITE_REQ. If any mbuf
+ * on the arpq is going out via another queue set associated with the
+ * port then it has a bad race with the L2T_WRITE_REQ. Ideally we
+ * should wait till the reply to the write before draining the arpq.
+ */
while (e->arpq_head) {
m = e->arpq_head;
e->arpq_head = m->m_next;
m->m_next = NULL;
- cxgb_ofld_send(dev, m);
+ t3_offload_tx(sc, m);
}
e->arpq_tail = NULL;
- e->state = L2T_STATE_VALID;
- return 0;
+ return (0);
}
/*
@@ -153,6 +126,8 @@ setup_l2e_send_pending(struct t3cdev *dev, struct mbuf *m,
static inline void
arpq_enqueue(struct l2t_entry *e, struct mbuf *m)
{
+ mtx_assert(&e->lock, MA_OWNED);
+
m->m_next = NULL;
if (e->arpq_head)
e->arpq_tail->m_next = m;
@@ -161,113 +136,149 @@ arpq_enqueue(struct l2t_entry *e, struct mbuf *m)
e->arpq_tail = m;
}
-int
-t3_l2t_send_slow(struct t3cdev *dev, struct mbuf *m, struct l2t_entry *e)
+static void
+resolution_failed_mbuf(struct mbuf *m)
{
- struct llentry *lle = e->neigh;
- struct sockaddr_in sin;
+ log(LOG_ERR, "%s: leaked mbuf %p, CPL at %p",
+ __func__, m, mtod(m, void *));
+}
- bzero(&sin, sizeof(struct sockaddr_in));
- sin.sin_family = AF_INET;
- sin.sin_len = sizeof(struct sockaddr_in);
- sin.sin_addr.s_addr = e->addr;
+static void
+resolution_failed(struct l2t_entry *e)
+{
+ struct mbuf *m;
- CTR2(KTR_CXGB, "send slow on rt=%p eaddr=0x%08x\n", rt, e->addr);
-again:
- switch (e->state) {
- case L2T_STATE_STALE: /* entry is stale, kick off revalidation */
- arpresolve(rt->rt_ifp, rt, NULL,
- (struct sockaddr *)&sin, e->dmac, &lle);
- mtx_lock(&e->lock);
- if (e->state == L2T_STATE_STALE)
- e->state = L2T_STATE_VALID;
- mtx_unlock(&e->lock);
- case L2T_STATE_VALID: /* fast-path, send the packet on */
- return cxgb_ofld_send(dev, m);
- case L2T_STATE_RESOLVING:
- mtx_lock(&e->lock);
- if (e->state != L2T_STATE_RESOLVING) { // ARP already completed
- mtx_unlock(&e->lock);
- goto again;
- }
- arpq_enqueue(e, m);
- mtx_unlock(&e->lock);
+ mtx_assert(&e->lock, MA_OWNED);
+
+ while (e->arpq_head) {
+ m = e->arpq_head;
+ e->arpq_head = m->m_next;
+ m->m_next = NULL;
+ resolution_failed_mbuf(m);
+ }
+ e->arpq_tail = NULL;
+}
+
+static void
+update_entry(struct adapter *sc, struct l2t_entry *e, uint8_t *lladdr,
+ uint16_t vtag)
+{
+
+ mtx_assert(&e->lock, MA_OWNED);
+
+ /*
+ * The entry may be in active use (e->refcount > 0) or not. We update
+ * it even when it's not as this simplifies the case where we decide to
+ * reuse the entry later.
+ */
+
+ if (lladdr == NULL &&
+ (e->state == L2T_STATE_RESOLVING || e->state == L2T_STATE_FAILED)) {
/*
- * Only the first packet added to the arpq should kick off
- * resolution. However, because the m_gethdr below can fail,
- * we allow each packet added to the arpq to retry resolution
- * as a way of recovering from transient memory exhaustion.
- * A better way would be to use a work request to retry L2T
- * entries when there's no memory.
+ * Never got a valid L2 address for this one. Just mark it as
+ * failed instead of removing it from the hash (for which we'd
+ * need to wlock the table).
*/
- if (arpresolve(rt->rt_ifp, rt, NULL,
- (struct sockaddr *)&sin, e->dmac, &lle) == 0) {
- CTR6(KTR_CXGB, "mac=%x:%x:%x:%x:%x:%x\n",
- e->dmac[0], e->dmac[1], e->dmac[2], e->dmac[3], e->dmac[4], e->dmac[5]);
-
- if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
- return (ENOMEM);
+ e->state = L2T_STATE_FAILED;
+ resolution_failed(e);
+ return;
- mtx_lock(&e->lock);
- if (e->arpq_head)
- setup_l2e_send_pending(dev, m, e);
- else
- m_freem(m);
- mtx_unlock(&e->lock);
+ } else if (lladdr == NULL) {
+
+ /* Valid or already-stale entry was deleted (or expired) */
+
+ KASSERT(e->state == L2T_STATE_VALID ||
+ e->state == L2T_STATE_STALE,
+ ("%s: lladdr NULL, state %d", __func__, e->state));
+
+ e->state = L2T_STATE_STALE;
+
+ } else {
+
+ if (e->state == L2T_STATE_RESOLVING ||
+ e->state == L2T_STATE_FAILED ||
+ memcmp(e->dmac, lladdr, ETHER_ADDR_LEN)) {
+
+ /* unresolved -> resolved; or dmac changed */
+
+ memcpy(e->dmac, lladdr, ETHER_ADDR_LEN);
+ e->vlan = vtag;
+ setup_l2e_send_pending(sc, e);
}
+ e->state = L2T_STATE_VALID;
}
- return 0;
}
-void
-t3_l2t_send_event(struct t3cdev *dev, struct l2t_entry *e)
+static int
+resolve_entry(struct adapter *sc, struct l2t_entry *e)
{
- struct mbuf *m0;
- struct sockaddr_in sin;
+ struct tom_data *td = sc->tom_softc;
+ struct toedev *tod = &td->tod;
+ struct sockaddr_in sin = {0};
+ uint8_t dmac[ETHER_ADDR_LEN];
+ uint16_t vtag = EVL_VLID_MASK;
+ int rc;
+
sin.sin_family = AF_INET;
sin.sin_len = sizeof(struct sockaddr_in);
- sin.sin_addr.s_addr = e->addr;
- struct llentry *lle;
-
- if ((m0 = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
- return;
+ SINADDR(&sin) = e->addr;
+
+ rc = toe_l2_resolve(tod, e->ifp, SA(&sin), dmac, &vtag);
+ if (rc == EWOULDBLOCK)
+ return (rc);
+
+ mtx_lock(&e->lock);
+ update_entry(sc, e, rc == 0 ? dmac : NULL, vtag);
+ mtx_unlock(&e->lock);
+
+ return (rc);
+}
+
+int
+t3_l2t_send_slow(struct adapter *sc, struct mbuf *m, struct l2t_entry *e)
+{
- rt = e->neigh;
again:
switch (e->state) {
case L2T_STATE_STALE: /* entry is stale, kick off revalidation */
- arpresolve(rt->rt_ifp, rt, NULL,
- (struct sockaddr *)&sin, e->dmac, &lle);
- mtx_lock(&e->lock);
- if (e->state == L2T_STATE_STALE) {
- e->state = L2T_STATE_VALID;
- }
- mtx_unlock(&e->lock);
- return;
+
+ if (resolve_entry(sc, e) != EWOULDBLOCK)
+ goto again; /* entry updated, re-examine state */
+
+ /* Fall through */
+
case L2T_STATE_VALID: /* fast-path, send the packet on */
- return;
+
+ return (t3_offload_tx(sc, m));
+
case L2T_STATE_RESOLVING:
mtx_lock(&e->lock);
- if (e->state != L2T_STATE_RESOLVING) { // ARP already completed
+ if (e->state != L2T_STATE_RESOLVING) {
mtx_unlock(&e->lock);
goto again;
}
+ arpq_enqueue(e, m);
mtx_unlock(&e->lock);
-
- /*
- * Only the first packet added to the arpq should kick off
- * resolution. However, because the alloc_skb below can fail,
- * we allow each packet added to the arpq to retry resolution
- * as a way of recovering from transient memory exhaustion.
- * A better way would be to use a work request to retry L2T
- * entries when there's no memory.
- */
- arpresolve(rt->rt_ifp, rt, NULL,
- (struct sockaddr *)&sin, e->dmac, &lle);
+ if (resolve_entry(sc, e) == EWOULDBLOCK)
+ break;
+
+ mtx_lock(&e->lock);
+ if (e->state == L2T_STATE_VALID && e->arpq_head)
+ setup_l2e_send_pending(sc, e);
+ if (e->state == L2T_STATE_FAILED)
+ resolution_failed(e);
+ mtx_unlock(&e->lock);
+ break;
+
+ case L2T_STATE_FAILED:
+ resolution_failed_mbuf(m);
+ return (EHOSTUNREACH);
}
- return;
+
+ return (0);
}
+
/*
* Allocate a free L2T entry. Must be called with l2t_data.lock held.
*/
@@ -276,15 +287,19 @@ alloc_l2e(struct l2t_data *d)
{
struct l2t_entry *end, *e, **p;
+ rw_assert(&d->lock, RA_WLOCKED);
+
if (!atomic_load_acq_int(&d->nfree))
- return NULL;
+ return (NULL);
/* there's definitely a free entry */
- for (e = d->rover, end = &d->l2tab[d->nentries]; e != end; ++e)
+ for (e = d->rover, end = &d->l2tab[d->nentries]; e != end; ++e) {
if (atomic_load_acq_int(&e->refcnt) == 0)
goto found;
+ }
- for (e = &d->l2tab[1]; atomic_load_acq_int(&e->refcnt); ++e) ;
+ for (e = &d->l2tab[1]; atomic_load_acq_int(&e->refcnt); ++e)
+ continue;
found:
d->rover = e + 1;
atomic_add_int(&d->nfree, -1);
@@ -294,90 +309,37 @@ found:
* presently in the hash table. We need to remove it.
*/
if (e->state != L2T_STATE_UNUSED) {
- int hash = arp_hash(e->addr, e->ifindex, d);
+ int hash = arp_hash(e->addr, e->ifp->if_index, d);
- for (p = &d->l2tab[hash].first; *p; p = &(*p)->next)
+ for (p = &d->l2tab[hash].first; *p; p = &(*p)->next) {
if (*p == e) {
*p = e->next;
break;
}
+ }
e->state = L2T_STATE_UNUSED;
}
-
- return e;
-}
-/*
- * Called when an L2T entry has no more users. The entry is left in the hash
- * table since it is likely to be reused but we also bump nfree to indicate
- * that the entry can be reallocated for a different neighbor. We also drop
- * the existing neighbor reference in case the neighbor is going away and is
- * waiting on our reference.
- *
- * Because entries can be reallocated to other neighbors once their ref count
- * drops to 0 we need to take the entry's lock to avoid races with a new
- * incarnation.
- */
-void
-t3_l2e_free(struct l2t_data *d, struct l2t_entry *e)
-{
- struct llentry *lle;
-
- mtx_lock(&e->lock);
- if (atomic_load_acq_int(&e->refcnt) == 0) { /* hasn't been recycled */
- lle = e->neigh;
- e->neigh = NULL;
- }
-
- mtx_unlock(&e->lock);
- atomic_add_int(&d->nfree, 1);
- if (lle)
- LLE_FREE(lle);
-}
-
-
-/*
- * Update an L2T entry that was previously used for the same next hop as neigh.
- * Must be called with softirqs disabled.
- */
-static inline void
-reuse_entry(struct l2t_entry *e, struct llentry *neigh)
-{
-
- mtx_lock(&e->lock); /* avoid race with t3_l2t_free */
- if (neigh != e->neigh)
- neigh_replace(e, neigh);
-
- if (memcmp(e->dmac, RT_ENADDR(neigh->rt_gateway), sizeof(e->dmac)) ||
- (neigh->rt_expire > time_uptime))
- e->state = L2T_STATE_RESOLVING;
- else if (la->la_hold == NULL)
- e->state = L2T_STATE_VALID;
- else
- e->state = L2T_STATE_STALE;
- mtx_unlock(&e->lock);
+ return (e);
}
struct l2t_entry *
-t3_l2t_get(struct t3cdev *dev, struct llentry *neigh, struct ifnet *ifp,
- struct sockaddr *sa)
+t3_l2t_get(struct port_info *pi, struct ifnet *ifp, struct sockaddr *sa)
{
+ struct tom_data *td = pi->adapter->tom_softc;
struct l2t_entry *e;
- struct l2t_data *d = L2DATA(dev);
- u32 addr = ((struct sockaddr_in *)sa)->sin_addr.s_addr;
- int ifidx = ifp->if_index;
- int hash = arp_hash(addr, ifidx, d);
- unsigned int smt_idx = ((struct port_info *)ifp->if_softc)->port_id;
+ struct l2t_data *d = td->l2t;
+ uint32_t addr = SINADDR(sa);
+ int hash = arp_hash(addr, ifp->if_index, d);
+ unsigned int smt_idx = pi->port_id;
rw_wlock(&d->lock);
- for (e = d->l2tab[hash].first; e; e = e->next)
- if (e->addr == addr && e->ifindex == ifidx &&
- e->smt_idx == smt_idx) {
+ for (e = d->l2tab[hash].first; e; e = e->next) {
+ if (e->addr == addr && e->ifp == ifp && e->smt_idx == smt_idx) {
l2t_hold(d, e);
- if (atomic_load_acq_int(&e->refcnt) == 1)
- reuse_entry(e, neigh);
goto done;
}
+ }
/* Need to allocate a new entry */
e = alloc_l2e(d);
@@ -385,116 +347,59 @@ t3_l2t_get(struct t3cdev *dev, struct llentry *neigh, struct ifnet *ifp,
mtx_lock(&e->lock); /* avoid race with t3_l2t_free */
e->next = d->l2tab[hash].first;
d->l2tab[hash].first = e;
- rw_wunlock(&d->lock);
-
+
e->state = L2T_STATE_RESOLVING;
e->addr = addr;
- e->ifindex = ifidx;
+ e->ifp = ifp;
e->smt_idx = smt_idx;
atomic_store_rel_int(&e->refcnt, 1);
- e->neigh = NULL;
-
-
- neigh_replace(e, neigh);
-#ifdef notyet
- /*
- * XXX need to add accessor function for vlan tag
- */
- if (neigh->rt_ifp->if_vlantrunk)
- e->vlan = VLAN_DEV_INFO(neigh->dev)->vlan_id;
- else
-#endif
- e->vlan = VLAN_NONE;
- mtx_unlock(&e->lock);
- return (e);
+ KASSERT(ifp->if_vlantrunk == NULL, ("TOE+VLAN unimplemented."));
+ e->vlan = VLAN_NONE;
+
+ mtx_unlock(&e->lock);
}
-
+
done:
rw_wunlock(&d->lock);
- return e;
-}
-
-/*
- * Called when address resolution fails for an L2T entry to handle packets
- * on the arpq head. If a packet specifies a failure handler it is invoked,
- * otherwise the packets is sent to the TOE.
- *
- * XXX: maybe we should abandon the latter behavior and just require a failure
- * handler.
- */
-static void
-handle_failed_resolution(struct t3cdev *dev, struct mbuf *arpq)
-{
-
- while (arpq) {
- struct mbuf *m = arpq;
-#ifdef notyet
- struct l2t_mbuf_cb *cb = L2T_MBUF_CB(m);
-#endif
- arpq = m->m_next;
- m->m_next = NULL;
-#ifdef notyet
- if (cb->arp_failure_handler)
- cb->arp_failure_handler(dev, m);
- else
-#endif
- cxgb_ofld_send(dev, m);
- }
+ return (e);
}
void
-t3_l2t_update(struct t3cdev *dev, struct llentry *neigh,
- uint8_t *enaddr, struct sockaddr *sa)
+t3_l2_update(struct toedev *tod, struct ifnet *ifp, struct sockaddr *sa,
+ uint8_t *lladdr, uint16_t vtag)
{
+ struct tom_data *td = t3_tomdata(tod);
+ struct adapter *sc = tod->tod_softc;
struct l2t_entry *e;
- struct mbuf *arpq = NULL;
- struct l2t_data *d = L2DATA(dev);
- u32 addr = *(u32 *) &((struct sockaddr_in *)sa)->sin_addr;
- int hash = arp_hash(addr, ifidx, d);
- struct llinfo_arp *la;
+ struct l2t_data *d = td->l2t;
+ u32 addr = *(u32 *) &SIN(sa)->sin_addr;
+ int hash = arp_hash(addr, ifp->if_index, d);
rw_rlock(&d->lock);
for (e = d->l2tab[hash].first; e; e = e->next)
- if (e->addr == addr) {
+ if (e->addr == addr && e->ifp == ifp) {
mtx_lock(&e->lock);
goto found;
}
rw_runlock(&d->lock);
- CTR1(KTR_CXGB, "t3_l2t_update: addr=0x%08x not found", addr);
+
+ /*
+ * This is of no interest to us. We've never had an offloaded
+ * connection to this destination, and we aren't attempting one right
+ * now.
+ */
return;
found:
- printf("found 0x%08x\n", addr);
-
rw_runlock(&d->lock);
- memcpy(e->dmac, enaddr, ETHER_ADDR_LEN);
- printf("mac=%x:%x:%x:%x:%x:%x\n",
- e->dmac[0], e->dmac[1], e->dmac[2], e->dmac[3], e->dmac[4], e->dmac[5]);
-
- if (atomic_load_acq_int(&e->refcnt)) {
- if (neigh != e->neigh)
- neigh_replace(e, neigh);
-
- la = (struct llinfo_arp *)neigh->rt_llinfo;
- if (e->state == L2T_STATE_RESOLVING) {
-
- if (la->la_asked >= 5 /* arp_maxtries */) {
- arpq = e->arpq_head;
- e->arpq_head = e->arpq_tail = NULL;
- } else
- setup_l2e_send_pending(dev, NULL, e);
- } else {
- e->state = L2T_STATE_VALID;
- if (memcmp(e->dmac, RT_ENADDR(neigh->rt_gateway), 6))
- setup_l2e_send_pending(dev, NULL, e);
- }
- }
- mtx_unlock(&e->lock);
- if (arpq)
- handle_failed_resolution(dev, arpq);
+ KASSERT(e->state != L2T_STATE_UNUSED,
+ ("%s: unused entry in the hash.", __func__));
+
+ update_entry(sc, e, lladdr, vtag);
+ mtx_unlock(&e->lock);
}
struct l2t_data *
@@ -503,9 +408,9 @@ t3_init_l2t(unsigned int l2t_capacity)
struct l2t_data *d;
int i, size = sizeof(*d) + l2t_capacity * sizeof(struct l2t_entry);
- d = cxgb_alloc_mem(size);
+ d = malloc(size, M_CXGB, M_NOWAIT | M_ZERO);
if (!d)
- return NULL;
+ return (NULL);
d->nentries = l2t_capacity;
d->rover = &d->l2tab[1]; /* entry 0 is not used */
@@ -515,10 +420,10 @@ t3_init_l2t(unsigned int l2t_capacity)
for (i = 0; i < l2t_capacity; ++i) {
d->l2tab[i].idx = i;
d->l2tab[i].state = L2T_STATE_UNUSED;
- mtx_init(&d->l2tab[i].lock, "L2TAB", NULL, MTX_DEF);
+ mtx_init(&d->l2tab[i].lock, "L2T_E", NULL, MTX_DEF);
atomic_store_rel_int(&d->l2tab[i].refcnt, 0);
}
- return d;
+ return (d);
}
void
@@ -530,5 +435,26 @@ t3_free_l2t(struct l2t_data *d)
for (i = 0; i < d->nentries; ++i)
mtx_destroy(&d->l2tab[i].lock);
- cxgb_free_mem(d);
+ free(d, M_CXGB);
}
+
+static int
+do_l2t_write_rpl(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
+{
+ struct cpl_l2t_write_rpl *rpl = mtod(m, void *);
+
+ if (rpl->status != CPL_ERR_NONE)
+ log(LOG_ERR,
+ "Unexpected L2T_WRITE_RPL status %u for entry %u\n",
+ rpl->status, GET_TID(rpl));
+
+ m_freem(m);
+ return (0);
+}
+
+void
+t3_init_l2t_cpl_handlers(struct adapter *sc)
+{
+ t3_register_cpl_handler(sc, CPL_L2T_WRITE_RPL, do_l2t_write_rpl);
+}
+#endif
diff --git a/sys/dev/cxgb/ulp/tom/cxgb_l2t.h b/sys/dev/cxgb/ulp/tom/cxgb_l2t.h
index 308ba66..d3ddf9d 100644
--- a/sys/dev/cxgb/ulp/tom/cxgb_l2t.h
+++ b/sys/dev/cxgb/ulp/tom/cxgb_l2t.h
@@ -1,6 +1,6 @@
/**************************************************************************
-Copyright (c) 2007-2008, Chelsio Inc.
+Copyright (c) 2007-2009, Chelsio Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -31,26 +31,19 @@ $FreeBSD$
#ifndef _CHELSIO_L2T_H
#define _CHELSIO_L2T_H
-#include <ulp/toecore/cxgb_toedev.h>
#include <sys/lock.h>
-
-#if __FreeBSD_version > 700000
#include <sys/rwlock.h>
-#else
-#define rwlock mtx
-#define rw_wlock(x) mtx_lock((x))
-#define rw_wunlock(x) mtx_unlock((x))
-#define rw_rlock(x) mtx_lock((x))
-#define rw_runlock(x) mtx_unlock((x))
-#define rw_init(x, str) mtx_init((x), (str), NULL, MTX_DEF)
-#define rw_destroy(x) mtx_destroy((x))
-#endif
enum {
- L2T_STATE_VALID, /* entry is up to date */
- L2T_STATE_STALE, /* entry may be used but needs revalidation */
- L2T_STATE_RESOLVING, /* entry needs address resolution */
- L2T_STATE_UNUSED /* entry not in use */
+ L2T_SIZE = 2048
+};
+
+enum {
+ L2T_STATE_VALID, /* entry is up to date */
+ L2T_STATE_STALE, /* entry may be used but needs revalidation */
+ L2T_STATE_RESOLVING, /* entry needs address resolution */
+ L2T_STATE_FAILED, /* failed to resolve */
+ L2T_STATE_UNUSED /* entry not in use */
};
/*
@@ -64,18 +57,17 @@ enum {
struct l2t_entry {
uint16_t state; /* entry state */
uint16_t idx; /* entry index */
- uint32_t addr; /* dest IP address */
- int ifindex; /* neighbor's net_device's ifindex */
+ uint32_t addr; /* nexthop IP address */
+ struct ifnet *ifp; /* outgoing interface */
uint16_t smt_idx; /* SMT index */
uint16_t vlan; /* VLAN TCI (id: bits 0-11, prio: 13-15 */
- struct llentry *neigh; /* associated neighbour */
struct l2t_entry *first; /* start of hash chain */
struct l2t_entry *next; /* next l2t_entry on chain */
struct mbuf *arpq_head; /* queue of packets awaiting resolution */
struct mbuf *arpq_tail;
struct mtx lock;
volatile uint32_t refcnt; /* entry reference count */
- uint8_t dmac[6]; /* neighbour's MAC address */
+ uint8_t dmac[ETHER_ADDR_LEN]; /* nexthop's MAC address */
};
struct l2t_data {
@@ -86,76 +78,37 @@ struct l2t_data {
struct l2t_entry l2tab[0];
};
-typedef void (*arp_failure_handler_func)(struct t3cdev *dev,
- struct mbuf *m);
-
-typedef void (*opaque_arp_failure_handler_func)(void *dev,
- struct mbuf *m);
-
-/*
- * Callback stored in an skb to handle address resolution failure.
- */
-struct l2t_mbuf_cb {
- arp_failure_handler_func arp_failure_handler;
-};
-
-/*
- * XXX
- */
-#define L2T_MBUF_CB(skb) ((struct l2t_mbuf_cb *)(skb)->cb)
-
-
-static __inline void set_arp_failure_handler(struct mbuf *m,
- arp_failure_handler_func hnd)
+void t3_l2e_free(struct l2t_data *, struct l2t_entry *e);
+void t3_l2_update(struct toedev *tod, struct ifnet *ifp, struct sockaddr *sa,
+ uint8_t *lladdr, uint16_t vtag);
+struct l2t_entry *t3_l2t_get(struct port_info *, struct ifnet *,
+ struct sockaddr *);
+int t3_l2t_send_slow(struct adapter *, struct mbuf *, struct l2t_entry *);
+struct l2t_data *t3_init_l2t(unsigned int);
+void t3_free_l2t(struct l2t_data *);
+void t3_init_l2t_cpl_handlers(struct adapter *);
+
+static inline int
+l2t_send(struct adapter *sc, struct mbuf *m, struct l2t_entry *e)
{
- m->m_pkthdr.header = (opaque_arp_failure_handler_func)hnd;
-
+ if (__predict_true(e->state == L2T_STATE_VALID))
+ return t3_offload_tx(sc, m);
+ else
+ return t3_l2t_send_slow(sc, m, e);
}
-/*
- * Getting to the L2 data from an offload device.
- */
-#define L2DATA(dev) ((dev)->l2opt)
-
-void t3_l2e_free(struct l2t_data *d, struct l2t_entry *e);
-void t3_l2t_update(struct t3cdev *dev, struct rtentry *rt, uint8_t *enaddr, struct sockaddr *sa);
-struct l2t_entry *t3_l2t_get(struct t3cdev *dev, struct rtentry *neigh,
- struct ifnet *ifp, struct sockaddr *sa);
-int t3_l2t_send_slow(struct t3cdev *dev, struct mbuf *m,
- struct l2t_entry *e);
-void t3_l2t_send_event(struct t3cdev *dev, struct l2t_entry *e);
-struct l2t_data *t3_init_l2t(unsigned int l2t_capacity);
-void t3_free_l2t(struct l2t_data *d);
-
-#ifdef CONFIG_PROC_FS
-int t3_l2t_proc_setup(struct proc_dir_entry *dir, struct l2t_data *d);
-void t3_l2t_proc_free(struct proc_dir_entry *dir);
-#else
-#define l2t_proc_setup(dir, d) 0
-#define l2t_proc_free(dir)
-#endif
-
-int cxgb_ofld_send(struct t3cdev *dev, struct mbuf *m);
-
-static inline int l2t_send(struct t3cdev *dev, struct mbuf *m,
- struct l2t_entry *e)
-{
- if (__predict_true(e->state == L2T_STATE_VALID)) {
- return cxgb_ofld_send(dev, (struct mbuf *)m);
- }
- return t3_l2t_send_slow(dev, (struct mbuf *)m, e);
-}
-
-static inline void l2t_release(struct l2t_data *d, struct l2t_entry *e)
+static inline void
+l2t_release(struct l2t_data *d, struct l2t_entry *e)
{
- if (atomic_fetchadd_int(&e->refcnt, -1) == 1)
- t3_l2e_free(d, e);
+ if (atomic_fetchadd_int(&e->refcnt, -1) == 1) /* 1 -> 0 transition */
+ atomic_add_int(&d->nfree, 1);
}
-static inline void l2t_hold(struct l2t_data *d, struct l2t_entry *e)
+static inline void
+l2t_hold(struct l2t_data *d, struct l2t_entry *e)
{
- if (atomic_fetchadd_int(&e->refcnt, 1) == 1) /* 0 -> 1 transition */
- atomic_add_int(&d->nfree, 1);
+ if (atomic_fetchadd_int(&e->refcnt, 1) == 0) /* 0 -> 1 transition */
+ atomic_add_int(&d->nfree, -1);
}
#endif
diff --git a/sys/dev/cxgb/ulp/tom/cxgb_listen.c b/sys/dev/cxgb/ulp/tom/cxgb_listen.c
index 5dc2d9f..c80abf0 100644
--- a/sys/dev/cxgb/ulp/tom/cxgb_listen.c
+++ b/sys/dev/cxgb/ulp/tom/cxgb_listen.c
@@ -1,343 +1,1140 @@
-/**************************************************************************
+/*-
+ * Copyright (c) 2012 Chelsio Communications, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
-Copyright (c) 2007, Chelsio Inc.
-All rights reserved.
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
+#include "opt_inet.h"
- 1. Redistributions of source code must retain the above copyright notice,
- this list of conditions and the following disclaimer.
+#ifdef TCP_OFFLOAD
+#include <sys/param.h>
+#include <sys/refcount.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/sysctl.h>
+#include <net/if.h>
+#include <net/route.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/in_pcb.h>
+#include <netinet/in_var.h>
+#include <netinet/tcp_var.h>
+#define TCPSTATES
+#include <netinet/tcp_fsm.h>
+#include <netinet/toecore.h>
- 2. Neither the name of the Chelsio Corporation nor the names of its
- contributors may be used to endorse or promote products derived from
- this software without specific prior written permission.
+#include "cxgb_include.h"
+#include "ulp/tom/cxgb_tom.h"
+#include "ulp/tom/cxgb_l2t.h"
+#include "ulp/tom/cxgb_toepcb.h"
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
+static void t3_send_reset_synqe(struct toedev *, struct synq_entry *);
-***************************************************************************/
+static int
+alloc_stid(struct tid_info *t, void *ctx)
+{
+ int stid = -1;
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+ mtx_lock(&t->stid_lock);
+ if (t->sfree) {
+ union listen_entry *p = t->sfree;
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/fcntl.h>
-#include <sys/limits.h>
-#include <sys/lock.h>
-#include <sys/mbuf.h>
-#include <sys/mutex.h>
+ stid = (p - t->stid_tab) + t->stid_base;
+ t->sfree = p->next;
+ p->ctx = ctx;
+ t->stids_in_use++;
+ }
+ mtx_unlock(&t->stid_lock);
+ return (stid);
+}
-#include <sys/sockopt.h>
-#include <sys/sockstate.h>
-#include <sys/sockbuf.h>
+static void
+free_stid(struct tid_info *t, int stid)
+{
+ union listen_entry *p = stid2entry(t, stid);
-#include <sys/socket.h>
-#include <sys/syslog.h>
+ mtx_lock(&t->stid_lock);
+ p->next = t->sfree;
+ t->sfree = p;
+ t->stids_in_use--;
+ mtx_unlock(&t->stid_lock);
+}
-#include <net/if.h>
-#include <net/route.h>
+static struct listen_ctx *
+alloc_lctx(struct tom_data *td, struct inpcb *inp, int qset)
+{
+ struct listen_ctx *lctx;
-#include <netinet/in.h>
-#include <netinet/in_pcb.h>
-#include <netinet/in_systm.h>
-#include <netinet/in_var.h>
+ INP_WLOCK_ASSERT(inp);
+ lctx = malloc(sizeof(struct listen_ctx), M_CXGB, M_NOWAIT | M_ZERO);
+ if (lctx == NULL)
+ return (NULL);
-#include <cxgb_osdep.h>
-#include <sys/mbufq.h>
+ lctx->stid = alloc_stid(&td->tid_maps, lctx);
+ if (lctx->stid < 0) {
+ free(lctx, M_CXGB);
+ return (NULL);
+ }
-#include <netinet/tcp.h>
-#include <netinet/tcp_var.h>
-#include <netinet/tcp_fsm.h>
+ lctx->inp = inp;
+ in_pcbref(inp);
-#include <netinet/tcp_offload.h>
-#include <net/route.h>
+ lctx->qset = qset;
+ refcount_init(&lctx->refcnt, 1);
+ TAILQ_INIT(&lctx->synq);
+
+ return (lctx);
+}
+
+/* Don't call this directly, use release_lctx instead */
+static int
+free_lctx(struct tom_data *td, struct listen_ctx *lctx)
+{
+ struct inpcb *inp = lctx->inp;
-#include <t3cdev.h>
-#include <common/cxgb_firmware_exports.h>
-#include <common/cxgb_t3_cpl.h>
-#include <common/cxgb_tcb.h>
-#include <common/cxgb_ctl_defs.h>
-#include <cxgb_offload.h>
-#include <ulp/toecore/cxgb_toedev.h>
-#include <ulp/tom/cxgb_l2t.h>
-#include <ulp/tom/cxgb_defs.h>
-#include <ulp/tom/cxgb_tom.h>
-#include <ulp/tom/cxgb_t3_ddp.h>
-#include <ulp/tom/cxgb_toepcb.h>
+ INP_WLOCK_ASSERT(inp);
+ KASSERT(lctx->refcnt == 0,
+ ("%s: refcnt %d", __func__, lctx->refcnt));
+ KASSERT(TAILQ_EMPTY(&lctx->synq),
+ ("%s: synq not empty.", __func__));
+ KASSERT(lctx->stid >= 0, ("%s: bad stid %d.", __func__, lctx->stid));
+ CTR4(KTR_CXGB, "%s: stid %u, lctx %p, inp %p",
+ __func__, lctx->stid, lctx, lctx->inp);
-static struct listen_info *listen_hash_add(struct tom_data *d, struct socket *so, unsigned int stid);
-static int listen_hash_del(struct tom_data *d, struct socket *so);
+ free_stid(&td->tid_maps, lctx->stid);
+ free(lctx, M_CXGB);
+
+ return in_pcbrele_wlocked(inp);
+}
+
+static void
+hold_lctx(struct listen_ctx *lctx)
+{
+
+ refcount_acquire(&lctx->refcnt);
+}
+
+static inline uint32_t
+listen_hashfn(void *key, u_long mask)
+{
+
+ return (fnv_32_buf(&key, sizeof(key), FNV1_32_INIT) & mask);
+}
+
+/*
+ * Add a listen_ctx entry to the listen hash table.
+ */
+static void
+listen_hash_add(struct tom_data *td, struct listen_ctx *lctx)
+{
+ int bucket = listen_hashfn(lctx->inp, td->listen_mask);
+
+ mtx_lock(&td->lctx_hash_lock);
+ LIST_INSERT_HEAD(&td->listen_hash[bucket], lctx, link);
+ td->lctx_count++;
+ mtx_unlock(&td->lctx_hash_lock);
+}
+
+/*
+ * Look for the listening socket's context entry in the hash and return it.
+ */
+static struct listen_ctx *
+listen_hash_find(struct tom_data *td, struct inpcb *inp)
+{
+ int bucket = listen_hashfn(inp, td->listen_mask);
+ struct listen_ctx *lctx;
+
+ mtx_lock(&td->lctx_hash_lock);
+ LIST_FOREACH(lctx, &td->listen_hash[bucket], link) {
+ if (lctx->inp == inp)
+ break;
+ }
+ mtx_unlock(&td->lctx_hash_lock);
+
+ return (lctx);
+}
+
+/*
+ * Removes the listen_ctx structure for inp from the hash and returns it.
+ */
+static struct listen_ctx *
+listen_hash_del(struct tom_data *td, struct inpcb *inp)
+{
+ int bucket = listen_hashfn(inp, td->listen_mask);
+ struct listen_ctx *lctx, *l;
+
+ mtx_lock(&td->lctx_hash_lock);
+ LIST_FOREACH_SAFE(lctx, &td->listen_hash[bucket], link, l) {
+ if (lctx->inp == inp) {
+ LIST_REMOVE(lctx, link);
+ td->lctx_count--;
+ break;
+ }
+ }
+ mtx_unlock(&td->lctx_hash_lock);
+
+ return (lctx);
+}
+
+/*
+ * Releases a hold on the lctx. Must be called with the listening socket's inp
+ * locked. The inp may be freed by this function and it returns NULL to
+ * indicate this.
+ */
+static struct inpcb *
+release_lctx(struct tom_data *td, struct listen_ctx *lctx)
+{
+ struct inpcb *inp = lctx->inp;
+ int inp_freed = 0;
+
+ INP_WLOCK_ASSERT(inp);
+ if (refcount_release(&lctx->refcnt))
+ inp_freed = free_lctx(td, lctx);
+
+ return (inp_freed ? NULL : inp);
+}
+
+static int
+create_server(struct adapter *sc, struct listen_ctx *lctx)
+{
+ struct mbuf *m;
+ struct cpl_pass_open_req *req;
+ struct inpcb *inp = lctx->inp;
+
+ m = M_GETHDR_OFLD(lctx->qset, CPL_PRIORITY_CONTROL, req);
+ if (m == NULL)
+ return (ENOMEM);
+
+ req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
+ OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, lctx->stid));
+ req->local_port = inp->inp_lport;
+ memcpy(&req->local_ip, &inp->inp_laddr, 4);
+ req->peer_port = 0;
+ req->peer_ip = 0;
+ req->peer_netmask = 0;
+ req->opt0h = htonl(F_DELACK | F_TCAM_BYPASS);
+ req->opt0l = htonl(V_RCV_BUFSIZ(16));
+ req->opt1 = htonl(V_CONN_POLICY(CPL_CONN_POLICY_ASK));
+
+ t3_offload_tx(sc, m);
+
+ return (0);
+}
+
+static int
+destroy_server(struct adapter *sc, struct listen_ctx *lctx)
+{
+ struct mbuf *m;
+ struct cpl_close_listserv_req *req;
+
+ m = M_GETHDR_OFLD(lctx->qset, CPL_PRIORITY_CONTROL, req);
+ if (m == NULL)
+ return (ENOMEM);
+
+ req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
+ OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_LISTSRV_REQ,
+ lctx->stid));
+ req->cpu_idx = 0;
+
+ t3_offload_tx(sc, m);
+
+ return (0);
+}
/*
* Process a CPL_CLOSE_LISTSRV_RPL message. If the status is good we release
* the STID.
*/
static int
-do_close_server_rpl(struct t3cdev *cdev, struct mbuf *m, void *ctx)
+do_close_server_rpl(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
{
- struct cpl_close_listserv_rpl *rpl = cplhdr(m);
+ struct adapter *sc = qs->adap;
+ struct tom_data *td = sc->tom_softc;
+ struct cpl_close_listserv_rpl *rpl = mtod(m, void *);
unsigned int stid = GET_TID(rpl);
+ struct listen_ctx *lctx = lookup_stid(&td->tid_maps, stid);
+ struct inpcb *inp = lctx->inp;
- if (rpl->status != CPL_ERR_NONE)
- log(LOG_ERR, "Unexpected CLOSE_LISTSRV_RPL status %u for "
- "STID %u\n", rpl->status, stid);
- else {
- struct listen_ctx *listen_ctx = (struct listen_ctx *)ctx;
+ CTR3(KTR_CXGB, "%s: stid %u, status %u", __func__, stid, rpl->status);
- cxgb_free_stid(cdev, stid);
- free(listen_ctx, M_CXGB);
+ if (rpl->status != CPL_ERR_NONE) {
+ log(LOG_ERR, "%s: failed (%u) to close listener for stid %u",
+ __func__, rpl->status, stid);
+ } else {
+ INP_WLOCK(inp);
+ KASSERT(listen_hash_del(td, lctx->inp) == NULL,
+ ("%s: inp %p still in listen hash", __func__, inp));
+ if (release_lctx(td, lctx) != NULL)
+ INP_WUNLOCK(inp);
}
- return (CPL_RET_BUF_DONE);
+ m_freem(m);
+ return (0);
}
/*
- * Process a CPL_PASS_OPEN_RPL message. Remove the socket from the listen hash
- * table and free the STID if there was any error, otherwise nothing to do.
+ * Process a CPL_PASS_OPEN_RPL message. Remove the lctx from the listen hash
+ * table and free it if there was any error, otherwise nothing to do.
*/
static int
-do_pass_open_rpl(struct t3cdev *cdev, struct mbuf *m, void *ctx)
+do_pass_open_rpl(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
{
- struct cpl_pass_open_rpl *rpl = cplhdr(m);
+ struct adapter *sc = qs->adap;
+ struct tom_data *td = sc->tom_softc;
+ struct cpl_pass_open_rpl *rpl = mtod(m, void *);
+ int stid = GET_TID(rpl);
+ struct listen_ctx *lctx;
+ struct inpcb *inp;
+
+ /*
+ * We get these replies also when setting up HW filters. Just throw
+ * those away.
+ */
+ if (stid >= td->tid_maps.stid_base + td->tid_maps.nstids)
+ goto done;
+
+ lctx = lookup_stid(&td->tid_maps, stid);
+ inp = lctx->inp;
+
+ INP_WLOCK(inp);
+
+ CTR4(KTR_CXGB, "%s: stid %u, status %u, flags 0x%x",
+ __func__, stid, rpl->status, lctx->flags);
+
+ lctx->flags &= ~LCTX_RPL_PENDING;
if (rpl->status != CPL_ERR_NONE) {
- int stid = GET_TID(rpl);
- struct listen_ctx *listen_ctx = (struct listen_ctx *)ctx;
- struct tom_data *d = listen_ctx->tom_data;
- struct socket *lso = listen_ctx->lso;
-
-#if VALIDATE_TID
- if (!lso)
- return (CPL_RET_UNKNOWN_TID | CPL_RET_BUF_DONE);
-#endif
- /*
- * Note: It is safe to unconditionally call listen_hash_del()
- * at this point without risking unhashing a reincarnation of
- * an already closed socket (i.e., there is no listen, close,
- * listen, free the sock for the second listen while processing
- * a message for the first race) because we are still holding
- * a reference on the socket. It is possible that the unhash
- * will fail because the socket is already closed, but we can't
- * unhash the wrong socket because it is impossible for the
- * socket to which this message refers to have reincarnated.
- */
- listen_hash_del(d, lso);
- cxgb_free_stid(cdev, stid);
-#ifdef notyet
- /*
- * XXX need to unreference the inpcb
- * but we have no way of knowing that other TOMs aren't referencing it
- */
- sock_put(lso);
+ log(LOG_ERR, "%s: %s: hw listen (stid %d) failed: %d\n",
+ __func__, device_get_nameunit(sc->dev), stid, rpl->status);
+ }
+
+#ifdef INVARIANTS
+ /*
+ * If the inp has been dropped (listening socket closed) then
+ * listen_stop must have run and taken the inp out of the hash.
+ */
+ if (inp->inp_flags & INP_DROPPED) {
+ KASSERT(listen_hash_del(td, inp) == NULL,
+ ("%s: inp %p still in listen hash", __func__, inp));
+ }
#endif
- free(listen_ctx, M_CXGB);
+
+ if (inp->inp_flags & INP_DROPPED && rpl->status != CPL_ERR_NONE) {
+ if (release_lctx(td, lctx) != NULL)
+ INP_WUNLOCK(inp);
+ goto done;
+ }
+
+ /*
+ * Listening socket stopped listening earlier and now the chip tells us
+ * it has started the hardware listener. Stop it; the lctx will be
+ * released in do_close_server_rpl.
+ */
+ if (inp->inp_flags & INP_DROPPED) {
+ destroy_server(sc, lctx);
+ INP_WUNLOCK(inp);
+ goto done;
+ }
+
+ /*
+ * Failed to start hardware listener. Take inp out of the hash and
+ * release our reference on it. An error message has been logged
+ * already.
+ */
+ if (rpl->status != CPL_ERR_NONE) {
+ listen_hash_del(td, inp);
+ if (release_lctx(td, lctx) != NULL)
+ INP_WUNLOCK(inp);
+ goto done;
}
- return CPL_RET_BUF_DONE;
+
+ /* hardware listener open for business */
+
+ INP_WUNLOCK(inp);
+done:
+ m_freem(m);
+ return (0);
}
-void
-t3_init_listen_cpl_handlers(void)
+static void
+pass_accept_req_to_protohdrs(const struct cpl_pass_accept_req *cpl,
+ struct in_conninfo *inc, struct tcphdr *th, struct tcpopt *to)
{
- t3tom_register_cpl_handler(CPL_PASS_OPEN_RPL, do_pass_open_rpl);
- t3tom_register_cpl_handler(CPL_CLOSE_LISTSRV_RPL, do_close_server_rpl);
+ const struct tcp_options *t3opt = &cpl->tcp_options;
+
+ bzero(inc, sizeof(*inc));
+ inc->inc_faddr.s_addr = cpl->peer_ip;
+ inc->inc_laddr.s_addr = cpl->local_ip;
+ inc->inc_fport = cpl->peer_port;
+ inc->inc_lport = cpl->local_port;
+
+ bzero(th, sizeof(*th));
+ th->th_sport = cpl->peer_port;
+ th->th_dport = cpl->local_port;
+ th->th_seq = be32toh(cpl->rcv_isn); /* as in tcp_fields_to_host */
+ th->th_flags = TH_SYN;
+
+ bzero(to, sizeof(*to));
+ if (t3opt->mss) {
+ to->to_flags |= TOF_MSS;
+ to->to_mss = be16toh(t3opt->mss);
+ }
+ if (t3opt->wsf) {
+ to->to_flags |= TOF_SCALE;
+ to->to_wscale = t3opt->wsf;
+ }
+ if (t3opt->tstamp)
+ to->to_flags |= TOF_TS;
+ if (t3opt->sack)
+ to->to_flags |= TOF_SACKPERM;
}
-static inline int
-listen_hashfn(const struct socket *so)
+static inline void
+hold_synqe(struct synq_entry *synqe)
{
- return ((unsigned long)so >> 10) & (LISTEN_INFO_HASH_SIZE - 1);
+
+ refcount_acquire(&synqe->refcnt);
+}
+
+static inline void
+release_synqe(struct synq_entry *synqe)
+{
+
+ if (refcount_release(&synqe->refcnt))
+ m_freem(synqe->m);
}
/*
- * Create and add a listen_info entry to the listen hash table. This and the
- * listen hash table functions below cannot be called from softirqs.
+ * Use the trailing space in the mbuf in which the PASS_ACCEPT_REQ arrived to
+ * store some state temporarily. There will be enough room in the mbuf's
+ * trailing space as the CPL is not that large.
+ *
+ * XXX: bad hack.
*/
-static struct listen_info *
-listen_hash_add(struct tom_data *d, struct socket *so, unsigned int stid)
+static struct synq_entry *
+mbuf_to_synq_entry(struct mbuf *m)
{
- struct listen_info *p;
-
- p = malloc(sizeof(*p), M_CXGB, M_NOWAIT|M_ZERO);
- if (p) {
- int bucket = listen_hashfn(so);
-
- p->so = so; /* just a key, no need to take a reference */
- p->stid = stid;
- mtx_lock(&d->listen_lock);
- p->next = d->listen_hash_tab[bucket];
- d->listen_hash_tab[bucket] = p;
- mtx_unlock(&d->listen_lock);
+ int len = roundup(sizeof (struct synq_entry), 8);
+ uint8_t *buf;
+ int buflen;
+
+ if (__predict_false(M_TRAILINGSPACE(m) < len)) {
+ panic("%s: no room for synq_entry (%td, %d)\n", __func__,
+ M_TRAILINGSPACE(m), len);
}
- return p;
+
+ if (m->m_flags & M_EXT) {
+ buf = m->m_ext.ext_buf;
+ buflen = m->m_ext.ext_size;
+ } else if (m->m_flags & M_PKTHDR) {
+ buf = &m->m_pktdat[0];
+ buflen = MHLEN;
+ } else {
+ buf = &m->m_dat[0];
+ buflen = MLEN;
+ }
+
+ return ((void *)(buf + buflen - len));
}
+#ifdef KTR
+#define REJECT_PASS_ACCEPT() do { \
+ reject_reason = __LINE__; \
+ goto reject; \
+} while (0)
+#else
+#define REJECT_PASS_ACCEPT() do { goto reject; } while (0)
+#endif
+
/*
- * Given a pointer to a listening socket return its server TID by consulting
- * the socket->stid map. Returns -1 if the socket is not in the map.
+ * The context associated with a tid entry via insert_tid could be a synq_entry
+ * or a toepcb. The only way CPL handlers can tell is via a bit in these flags.
+ */
+CTASSERT(offsetof(struct toepcb, tp_flags) == offsetof(struct synq_entry, flags));
+
+/*
+ * Handle a CPL_PASS_ACCEPT_REQ message.
*/
static int
-listen_hash_find(struct tom_data *d, struct socket *so)
+do_pass_accept_req(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
{
- int stid = -1, bucket = listen_hashfn(so);
- struct listen_info *p;
+ struct adapter *sc = qs->adap;
+ struct tom_data *td = sc->tom_softc;
+ struct toedev *tod = &td->tod;
+ const struct cpl_pass_accept_req *req = mtod(m, void *);
+ unsigned int stid = G_PASS_OPEN_TID(ntohl(req->tos_tid));
+ unsigned int tid = GET_TID(req);
+ struct listen_ctx *lctx = lookup_stid(&td->tid_maps, stid);
+ struct l2t_entry *e = NULL;
+ struct sockaddr_in nam;
+ struct rtentry *rt;
+ struct inpcb *inp;
+ struct socket *so;
+ struct port_info *pi;
+ struct ifnet *ifp;
+ struct in_conninfo inc;
+ struct tcphdr th;
+ struct tcpopt to;
+ struct synq_entry *synqe = NULL;
+ int i;
+#ifdef KTR
+ int reject_reason;
+#endif
- mtx_lock(&d->listen_lock);
- for (p = d->listen_hash_tab[bucket]; p; p = p->next)
- if (p->so == so) {
- stid = p->stid;
- break;
+ CTR4(KTR_CXGB, "%s: stid %u, tid %u, lctx %p", __func__, stid, tid,
+ lctx);
+
+ pass_accept_req_to_protohdrs(req, &inc, &th, &to);
+
+ /*
+ * Don't offload if the interface that received the SYN doesn't have
+ * IFCAP_TOE enabled.
+ */
+ pi = NULL;
+ for_each_port(sc, i) {
+ if (memcmp(sc->port[i].hw_addr, req->dst_mac, ETHER_ADDR_LEN))
+ continue;
+ pi = &sc->port[i];
+ break;
+ }
+ if (pi == NULL)
+ REJECT_PASS_ACCEPT();
+ ifp = pi->ifp;
+ if ((ifp->if_capenable & IFCAP_TOE4) == 0)
+ REJECT_PASS_ACCEPT();
+
+ /*
+ * Don't offload if the outgoing interface for the route back to the
+ * peer is not the same as the interface that received the SYN.
+ */
+ bzero(&nam, sizeof(nam));
+ nam.sin_len = sizeof(nam);
+ nam.sin_family = AF_INET;
+ nam.sin_addr = inc.inc_faddr;
+ rt = rtalloc1((struct sockaddr *)&nam, 0, 0);
+ if (rt == NULL)
+ REJECT_PASS_ACCEPT();
+ else {
+ struct sockaddr *nexthop;
+
+ RT_UNLOCK(rt);
+ nexthop = rt->rt_flags & RTF_GATEWAY ? rt->rt_gateway :
+ (struct sockaddr *)&nam;
+ if (rt->rt_ifp == ifp)
+ e = t3_l2t_get(pi, rt->rt_ifp, nexthop);
+ RTFREE(rt);
+ if (e == NULL)
+ REJECT_PASS_ACCEPT(); /* no l2te, or ifp mismatch */
+ }
+
+ INP_INFO_WLOCK(&V_tcbinfo);
+
+ /* Don't offload if the 4-tuple is already in use */
+ if (toe_4tuple_check(&inc, &th, ifp) != 0) {
+ INP_INFO_WUNLOCK(&V_tcbinfo);
+ REJECT_PASS_ACCEPT();
+ }
+
+ inp = lctx->inp; /* listening socket (not owned by the TOE) */
+ INP_WLOCK(inp);
+ if (__predict_false(inp->inp_flags & INP_DROPPED)) {
+ /*
+ * The listening socket has closed. The reply from the TOE to
+ * our CPL_CLOSE_LISTSRV_REQ will ultimately release all
+ * resources tied to this listen context.
+ */
+ INP_WUNLOCK(inp);
+ INP_INFO_WUNLOCK(&V_tcbinfo);
+ REJECT_PASS_ACCEPT();
+ }
+ so = inp->inp_socket;
+
+ /* Reuse the mbuf that delivered the CPL to us */
+ synqe = mbuf_to_synq_entry(m);
+ synqe->flags = TP_IS_A_SYNQ_ENTRY;
+ synqe->m = m;
+ synqe->lctx = lctx;
+ synqe->tid = tid;
+ synqe->e = e;
+ synqe->opt0h = calc_opt0h(so, 0, 0, e);
+ synqe->qset = pi->first_qset + (arc4random() % pi->nqsets);
+ SOCKBUF_LOCK(&so->so_rcv);
+ synqe->rx_credits = min(select_rcv_wnd(so) >> 10, M_RCV_BUFSIZ);
+ SOCKBUF_UNLOCK(&so->so_rcv);
+ refcount_init(&synqe->refcnt, 1);
+ atomic_store_rel_int(&synqe->reply, RPL_OK);
+
+ insert_tid(td, synqe, tid);
+ TAILQ_INSERT_TAIL(&lctx->synq, synqe, link);
+ hold_synqe(synqe);
+ hold_lctx(lctx);
+
+ /* syncache_add releases both pcbinfo and pcb locks */
+ toe_syncache_add(&inc, &to, &th, inp, tod, synqe);
+ INP_UNLOCK_ASSERT(inp);
+ INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
+
+ /*
+ * If we replied during syncache_add (reply is RPL_DONE), good.
+ * Otherwise (reply is unchanged - RPL_OK) it's no longer ok to reply.
+ * The mbuf will stick around as long as the entry is in the syncache.
+ * The kernel is free to retry syncache_respond but we'll ignore it due
+ * to RPL_DONT.
+ */
+ if (atomic_cmpset_int(&synqe->reply, RPL_OK, RPL_DONT)) {
+
+ INP_WLOCK(inp);
+ if (__predict_false(inp->inp_flags & INP_DROPPED)) {
+ /* listener closed. synqe must have been aborted. */
+ KASSERT(synqe->flags & TP_ABORT_SHUTDOWN,
+ ("%s: listener %p closed but synqe %p not aborted",
+ __func__, inp, synqe));
+
+ CTR5(KTR_CXGB,
+ "%s: stid %u, tid %u, lctx %p, synqe %p, ABORTED",
+ __func__, stid, tid, lctx, synqe);
+ INP_WUNLOCK(inp);
+ release_synqe(synqe);
+ return (__LINE__);
}
- mtx_unlock(&d->listen_lock);
- return stid;
+
+ KASSERT(!(synqe->flags & TP_ABORT_SHUTDOWN),
+ ("%s: synqe %p aborted, but listener %p not dropped.",
+ __func__, synqe, inp));
+
+ TAILQ_REMOVE(&lctx->synq, synqe, link);
+ release_synqe(synqe); /* removed from synq list */
+ inp = release_lctx(td, lctx);
+ if (inp)
+ INP_WUNLOCK(inp);
+
+ release_synqe(synqe); /* about to exit function */
+ REJECT_PASS_ACCEPT();
+ }
+
+ KASSERT(synqe->reply == RPL_DONE,
+ ("%s: reply %d", __func__, synqe->reply));
+
+ CTR3(KTR_CXGB, "%s: stid %u, tid %u, OK", __func__, stid, tid);
+ release_synqe(synqe);
+ return (0);
+
+reject:
+ CTR4(KTR_CXGB, "%s: stid %u, tid %u, REJECT (%d)", __func__, stid, tid,
+ reject_reason);
+
+ if (synqe == NULL)
+ m_freem(m);
+ if (e)
+ l2t_release(td->l2t, e);
+ queue_tid_release(tod, tid);
+
+ return (0);
+}
+
+static void
+pass_establish_to_protohdrs(const struct cpl_pass_establish *cpl,
+ struct in_conninfo *inc, struct tcphdr *th, struct tcpopt *to)
+{
+ uint16_t tcp_opt = be16toh(cpl->tcp_opt);
+
+ bzero(inc, sizeof(*inc));
+ inc->inc_faddr.s_addr = cpl->peer_ip;
+ inc->inc_laddr.s_addr = cpl->local_ip;
+ inc->inc_fport = cpl->peer_port;
+ inc->inc_lport = cpl->local_port;
+
+ bzero(th, sizeof(*th));
+ th->th_sport = cpl->peer_port;
+ th->th_dport = cpl->local_port;
+ th->th_flags = TH_ACK;
+ th->th_seq = be32toh(cpl->rcv_isn); /* as in tcp_fields_to_host */
+ th->th_ack = be32toh(cpl->snd_isn); /* ditto */
+
+ bzero(to, sizeof(*to));
+ if (G_TCPOPT_TSTAMP(tcp_opt))
+ to->to_flags |= TOF_TS;
}
/*
- * Delete the listen_info structure for a listening socket. Returns the server
- * TID for the socket if it is present in the socket->stid map, or -1.
+ * Process a CPL_PASS_ESTABLISH message. The T3 has already established a
+ * connection and we need to do the software side setup.
*/
static int
-listen_hash_del(struct tom_data *d, struct socket *so)
+do_pass_establish(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
{
- int bucket, stid = -1;
- struct listen_info *p, **prev;
-
- bucket = listen_hashfn(so);
- prev = &d->listen_hash_tab[bucket];
-
- mtx_lock(&d->listen_lock);
- for (p = *prev; p; prev = &p->next, p = p->next)
- if (p->so == so) {
- stid = p->stid;
- *prev = p->next;
- free(p, M_CXGB);
- break;
- }
- mtx_unlock(&d->listen_lock);
-
- return (stid);
+ struct adapter *sc = qs->adap;
+ struct tom_data *td = sc->tom_softc;
+ struct cpl_pass_establish *cpl = mtod(m, void *);
+ struct toedev *tod = &td->tod;
+ unsigned int tid = GET_TID(cpl);
+ struct synq_entry *synqe = lookup_tid(&td->tid_maps, tid);
+ struct toepcb *toep;
+ struct socket *so;
+ struct listen_ctx *lctx = synqe->lctx;
+ struct inpcb *inp = lctx->inp;
+ struct tcpopt to;
+ struct tcphdr th;
+ struct in_conninfo inc;
+#ifdef KTR
+ int stid = G_PASS_OPEN_TID(ntohl(cpl->tos_tid));
+#endif
+
+ CTR5(KTR_CXGB, "%s: stid %u, tid %u, lctx %p, inp_flags 0x%x",
+ __func__, stid, tid, lctx, inp->inp_flags);
+
+ KASSERT(qs->idx == synqe->qset,
+ ("%s qset mismatch %d %d", __func__, qs->idx, synqe->qset));
+
+ INP_INFO_WLOCK(&V_tcbinfo); /* for syncache_expand */
+ INP_WLOCK(inp);
+
+ if (__predict_false(inp->inp_flags & INP_DROPPED)) {
+ /*
+ * The listening socket has closed. The TOM must have aborted
+ * all the embryonic connections (including this one) that were
+ * on the lctx's synq. do_abort_rpl for the tid is responsible
+ * for cleaning up.
+ */
+ KASSERT(synqe->flags & TP_ABORT_SHUTDOWN,
+ ("%s: listen socket dropped but tid %u not aborted.",
+ __func__, tid));
+ INP_WUNLOCK(inp);
+ INP_INFO_WUNLOCK(&V_tcbinfo);
+ m_freem(m);
+ return (0);
+ }
+
+ pass_establish_to_protohdrs(cpl, &inc, &th, &to);
+
+ /* Lie in order to pass the checks in syncache_expand */
+ to.to_tsecr = synqe->ts;
+ th.th_ack = synqe->iss + 1;
+
+ toep = toepcb_alloc(tod);
+ if (toep == NULL) {
+reset:
+ t3_send_reset_synqe(tod, synqe);
+ INP_WUNLOCK(inp);
+ INP_INFO_WUNLOCK(&V_tcbinfo);
+ m_freem(m);
+ return (0);
+ }
+ toep->tp_qset = qs->idx;
+ toep->tp_l2t = synqe->e;
+ toep->tp_tid = tid;
+ toep->tp_rx_credits = synqe->rx_credits;
+
+ synqe->toep = toep;
+ synqe->cpl = cpl;
+
+ so = inp->inp_socket;
+ if (!toe_syncache_expand(&inc, &to, &th, &so) || so == NULL) {
+ toepcb_free(toep);
+ goto reset;
+ }
+
+ /* Remove the synq entry and release its reference on the lctx */
+ TAILQ_REMOVE(&lctx->synq, synqe, link);
+ inp = release_lctx(td, lctx);
+ if (inp)
+ INP_WUNLOCK(inp);
+ INP_INFO_WUNLOCK(&V_tcbinfo);
+ release_synqe(synqe);
+
+ m_freem(m);
+ return (0);
+}
+
+void
+t3_init_listen_cpl_handlers(struct adapter *sc)
+{
+ t3_register_cpl_handler(sc, CPL_PASS_OPEN_RPL, do_pass_open_rpl);
+ t3_register_cpl_handler(sc, CPL_CLOSE_LISTSRV_RPL, do_close_server_rpl);
+ t3_register_cpl_handler(sc, CPL_PASS_ACCEPT_REQ, do_pass_accept_req);
+ t3_register_cpl_handler(sc, CPL_PASS_ESTABLISH, do_pass_establish);
}
/*
* Start a listening server by sending a passive open request to HW.
+ *
+ * Can't take adapter lock here and access to sc->flags, sc->open_device_map,
+ * sc->offload_map, if_capenable are all race prone.
*/
-void
-t3_listen_start(struct toedev *dev, struct socket *so, struct t3cdev *cdev)
+int
+t3_listen_start(struct toedev *tod, struct tcpcb *tp)
{
- int stid;
- struct mbuf *m;
- struct cpl_pass_open_req *req;
- struct tom_data *d = TOM_DATA(dev);
- struct inpcb *inp = so_sotoinpcb(so);
- struct listen_ctx *ctx;
+ struct tom_data *td = t3_tomdata(tod);
+ struct adapter *sc = tod->tod_softc;
+ struct port_info *pi;
+ struct inpcb *inp = tp->t_inpcb;
+ struct listen_ctx *lctx;
+ int i;
- if (!TOM_TUNABLE(dev, activated))
- return;
+ INP_WLOCK_ASSERT(inp);
- if (listen_hash_find(d, so) != -1)
- return;
-
- CTR1(KTR_TOM, "start listen on port %u", ntohs(inp->inp_lport));
- ctx = malloc(sizeof(*ctx), M_CXGB, M_NOWAIT|M_ZERO);
+ if ((inp->inp_vflag & INP_IPV4) == 0)
+ return (0);
- if (!ctx)
- return;
+#ifdef notyet
+ ADAPTER_LOCK(sc);
+ if (IS_BUSY(sc)) {
+ log(LOG_ERR, "%s: listen request ignored, %s is busy",
+ __func__, device_get_nameunit(sc->dev));
+ goto done;
+ }
- ctx->tom_data = d;
- ctx->lso = so;
- ctx->ulp_mode = TOM_TUNABLE(dev, ddp) && !(so_options_get(so) & SO_NO_DDP) ? ULP_MODE_TCPDDP : 0;
- LIST_INIT(&ctx->synq_head);
-
- stid = cxgb_alloc_stid(d->cdev, d->client, ctx);
- if (stid < 0)
- goto free_ctx;
+ KASSERT(sc->flags & TOM_INIT_DONE,
+ ("%s: TOM not initialized", __func__));
+#endif
- m = m_gethdr(M_NOWAIT, MT_DATA);
- if (m == NULL)
- goto free_stid;
- m->m_pkthdr.len = m->m_len = sizeof(*req);
-
- if (!listen_hash_add(d, so, stid))
- goto free_all;
-
- req = mtod(m, struct cpl_pass_open_req *);
- req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
- OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, stid));
- req->local_port = inp->inp_lport;
- memcpy(&req->local_ip, &inp->inp_laddr, 4);
- req->peer_port = 0;
- req->peer_ip = 0;
- req->peer_netmask = 0;
- req->opt0h = htonl(F_DELACK | F_TCAM_BYPASS);
- req->opt0l = htonl(V_RCV_BUFSIZ(16));
- req->opt1 = htonl(V_CONN_POLICY(CPL_CONN_POLICY_ASK));
+ if ((sc->open_device_map & sc->offload_map) == 0)
+ goto done; /* no port that's UP with IFCAP_TOE enabled */
+
+ /*
+ * Find a running port with IFCAP_TOE4. We'll use the first such port's
+ * queues to send the passive open and receive the reply to it.
+ *
+ * XXX: need a way to mark an port in use by offload. if_cxgbe should
+ * then reject any attempt to bring down such a port (and maybe reject
+ * attempts to disable IFCAP_TOE on that port too?).
+ */
+ for_each_port(sc, i) {
+ if (isset(&sc->open_device_map, i) &&
+ sc->port[i].ifp->if_capenable & IFCAP_TOE4)
+ break;
+ }
+ KASSERT(i < sc->params.nports,
+ ("%s: no running port with TOE capability enabled.", __func__));
+ pi = &sc->port[i];
+
+ if (listen_hash_find(td, inp) != NULL)
+ goto done; /* already setup */
+
+ lctx = alloc_lctx(td, inp, pi->first_qset);
+ if (lctx == NULL) {
+ log(LOG_ERR,
+ "%s: listen request ignored, %s couldn't allocate lctx\n",
+ __func__, device_get_nameunit(sc->dev));
+ goto done;
+ }
+ listen_hash_add(td, lctx);
- m_set_priority(m, CPL_PRIORITY_LISTEN);
- cxgb_ofld_send(cdev, m);
- return;
-
-free_all:
- m_free(m);
-free_stid:
- cxgb_free_stid(cdev, stid);
-#if 0
- sock_put(sk);
-#endif
-free_ctx:
- free(ctx, M_CXGB);
+ CTR5(KTR_CXGB, "%s: stid %u (%s), lctx %p, inp %p", __func__,
+ lctx->stid, tcpstates[tp->t_state], lctx, inp);
+
+ if (create_server(sc, lctx) != 0) {
+ log(LOG_ERR, "%s: %s failed to create hw listener.\n", __func__,
+ device_get_nameunit(sc->dev));
+ (void) listen_hash_del(td, inp);
+ inp = release_lctx(td, lctx);
+ /* can't be freed, host stack has a reference */
+ KASSERT(inp != NULL, ("%s: inp freed", __func__));
+ goto done;
+ }
+ lctx->flags |= LCTX_RPL_PENDING;
+done:
+#ifdef notyet
+ ADAPTER_UNLOCK(sc);
+#endif
+ return (0);
}
/*
* Stop a listening server by sending a close_listsvr request to HW.
* The server TID is freed when we get the reply.
*/
-void
-t3_listen_stop(struct toedev *dev, struct socket *so, struct t3cdev *cdev)
+int
+t3_listen_stop(struct toedev *tod, struct tcpcb *tp)
{
- struct mbuf *m;
- struct cpl_close_listserv_req *req;
struct listen_ctx *lctx;
- int stid = listen_hash_del(TOM_DATA(dev), so);
-
- if (stid < 0)
- return;
+ struct adapter *sc = tod->tod_softc;
+ struct tom_data *td = t3_tomdata(tod);
+ struct inpcb *inp = tp->t_inpcb;
+ struct synq_entry *synqe;
+
+ INP_WLOCK_ASSERT(inp);
+
+ lctx = listen_hash_del(td, inp);
+ if (lctx == NULL)
+ return (ENOENT); /* no hardware listener for this inp */
+
+ CTR4(KTR_CXGB, "%s: stid %u, lctx %p, flags %x", __func__, lctx->stid,
+ lctx, lctx->flags);
- lctx = cxgb_get_lctx(cdev, stid);
/*
- * Do this early so embryonic connections are marked as being aborted
- * while the stid is still open. This ensures pass_establish messages
- * that arrive while we are closing the server will be able to locate
- * the listening socket.
+ * If the reply to the PASS_OPEN is still pending we'll wait for it to
+ * arrive and clean up when it does.
*/
- t3_reset_synq(lctx);
+ if (lctx->flags & LCTX_RPL_PENDING) {
+ KASSERT(TAILQ_EMPTY(&lctx->synq),
+ ("%s: synq not empty.", __func__));
+ return (EINPROGRESS);
+ }
- /* Send the close ASAP to stop further passive opens */
- m = m_gethdr(M_NOWAIT, MT_DATA);
- if (m == NULL) {
- /*
- * XXX allocate from lowmem cache
- */
+ /*
+ * The host stack will abort all the connections on the listening
+ * socket's so_comp. It doesn't know about the connections on the synq
+ * so we need to take care of those.
+ */
+ TAILQ_FOREACH(synqe, &lctx->synq, link) {
+ KASSERT(synqe->lctx == lctx, ("%s: synq corrupt", __func__));
+ t3_send_reset_synqe(tod, synqe);
}
- m->m_pkthdr.len = m->m_len = sizeof(*req);
- req = mtod(m, struct cpl_close_listserv_req *);
- req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
- OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_LISTSRV_REQ, stid));
- req->cpu_idx = 0;
- m_set_priority(m, CPL_PRIORITY_LISTEN);
- cxgb_ofld_send(cdev, m);
+ destroy_server(sc, lctx);
+ return (0);
+}
+
+void
+t3_syncache_added(struct toedev *tod __unused, void *arg)
+{
+ struct synq_entry *synqe = arg;
+
+ hold_synqe(synqe);
+}
+
+void
+t3_syncache_removed(struct toedev *tod __unused, void *arg)
+{
+ struct synq_entry *synqe = arg;
+
+ release_synqe(synqe);
+}
+
+/* XXX */
+extern void tcp_dooptions(struct tcpopt *, u_char *, int, int);
+
+int
+t3_syncache_respond(struct toedev *tod, void *arg, struct mbuf *m)
+{
+ struct adapter *sc = tod->tod_softc;
+ struct synq_entry *synqe = arg;
+ struct l2t_entry *e = synqe->e;
+ struct ip *ip = mtod(m, struct ip *);
+ struct tcphdr *th = (void *)(ip + 1);
+ struct cpl_pass_accept_rpl *rpl;
+ struct mbuf *r;
+ struct listen_ctx *lctx = synqe->lctx;
+ struct tcpopt to;
+ int mtu_idx, cpu_idx;
+
+ /*
+ * The first time we run it's during the call to syncache_add. That's
+ * the only one we care about.
+ */
+ if (atomic_cmpset_int(&synqe->reply, RPL_OK, RPL_DONE) == 0)
+ goto done; /* reply to the CPL only if it's ok to do so */
+
+ r = M_GETHDR_OFLD(lctx->qset, CPL_PRIORITY_CONTROL, rpl);
+ if (r == NULL)
+ goto done;
+
+ /*
+ * Use only the provided mbuf (with ip and tcp headers) and what's in
+ * synqe. Avoid looking at the listening socket (lctx->inp) here.
+ *
+ * XXX: if the incoming SYN had the TCP timestamp option but the kernel
+ * decides it doesn't want to use TCP timestamps we have no way of
+ * relaying this info to the chip on a per-tid basis (all we have is a
+ * global knob).
+ */
+ bzero(&to, sizeof(to));
+ tcp_dooptions(&to, (void *)(th + 1), (th->th_off << 2) - sizeof(*th),
+ TO_SYN);
+
+ /* stash them for later */
+ synqe->iss = be32toh(th->th_seq);
+ synqe->ts = to.to_tsval;
+
+ mtu_idx = find_best_mtu_idx(sc, NULL, to.to_mss);
+ cpu_idx = sc->rrss_map[synqe->qset];
+
+ rpl->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
+ rpl->wr.wrh_lo = 0;
+ OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, synqe->tid));
+ rpl->opt2 = calc_opt2(cpu_idx);
+ rpl->rsvd = rpl->opt2; /* workaround for HW bug */
+ rpl->peer_ip = ip->ip_dst.s_addr;
+ rpl->opt0h = synqe->opt0h |
+ calc_opt0h(NULL, mtu_idx, to.to_wscale, NULL);
+ rpl->opt0l_status = htobe32(CPL_PASS_OPEN_ACCEPT) |
+ calc_opt0l(NULL, synqe->rx_credits);
+
+ l2t_send(sc, r, e);
+done:
+ m_freem(m);
+ return (0);
+}
- t3_disconnect_acceptq(so);
+int
+do_abort_req_synqe(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
+{
+ struct adapter *sc = qs->adap;
+ struct tom_data *td = sc->tom_softc;
+ struct toedev *tod = &td->tod;
+ const struct cpl_abort_req_rss *req = mtod(m, void *);
+ unsigned int tid = GET_TID(req);
+ struct synq_entry *synqe = lookup_tid(&td->tid_maps, tid);
+ struct listen_ctx *lctx = synqe->lctx;
+ struct inpcb *inp = lctx->inp;
+
+ KASSERT(synqe->flags & TP_IS_A_SYNQ_ENTRY,
+ ("%s: !SYNQ_ENTRY", __func__));
+
+ CTR6(KTR_CXGB, "%s: tid %u, synqe %p (%x), lctx %p, status %d",
+ __func__, tid, synqe, synqe->flags, synqe->lctx, req->status);
+
+ INP_WLOCK(inp);
+
+ if (!(synqe->flags & TP_ABORT_REQ_RCVD)) {
+ synqe->flags |= TP_ABORT_REQ_RCVD;
+ synqe->flags |= TP_ABORT_SHUTDOWN;
+ INP_WUNLOCK(inp);
+ m_freem(m);
+ return (0);
+ }
+ synqe->flags &= ~TP_ABORT_REQ_RCVD;
+
+ /*
+ * If we'd sent a reset on this synqe, we'll ignore this and clean up in
+ * the T3's reply to our reset instead.
+ */
+ if (synqe->flags & TP_ABORT_RPL_PENDING) {
+ synqe->flags |= TP_ABORT_RPL_SENT;
+ INP_WUNLOCK(inp);
+ } else {
+ TAILQ_REMOVE(&lctx->synq, synqe, link);
+ inp = release_lctx(td, lctx);
+ if (inp)
+ INP_WUNLOCK(inp);
+ release_tid(tod, tid, qs->idx);
+ l2t_release(td->l2t, synqe->e);
+ release_synqe(synqe);
+ }
+
+ send_abort_rpl(tod, tid, qs->idx);
+ m_freem(m);
+ return (0);
}
+
+int
+do_abort_rpl_synqe(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
+{
+ struct adapter *sc = qs->adap;
+ struct tom_data *td = sc->tom_softc;
+ struct toedev *tod = &td->tod;
+ const struct cpl_abort_rpl_rss *rpl = mtod(m, void *);
+ unsigned int tid = GET_TID(rpl);
+ struct synq_entry *synqe = lookup_tid(&td->tid_maps, tid);
+ struct listen_ctx *lctx = synqe->lctx;
+ struct inpcb *inp = lctx->inp;
+
+ CTR3(KTR_CXGB, "%s: tid %d, synqe %p, status %d", tid, synqe,
+ rpl->status);
+
+ INP_WLOCK(inp);
+
+ if (synqe->flags & TP_ABORT_RPL_PENDING) {
+ if (!(synqe->flags & TP_ABORT_RPL_RCVD)) {
+ synqe->flags |= TP_ABORT_RPL_RCVD;
+ INP_WUNLOCK(inp);
+ } else {
+ synqe->flags &= ~TP_ABORT_RPL_RCVD;
+ synqe->flags &= TP_ABORT_RPL_PENDING;
+
+ TAILQ_REMOVE(&lctx->synq, synqe, link);
+ inp = release_lctx(td, lctx);
+ if (inp)
+ INP_WUNLOCK(inp);
+ release_tid(tod, tid, qs->idx);
+ l2t_release(td->l2t, synqe->e);
+ release_synqe(synqe);
+ }
+ }
+
+ m_freem(m);
+ return (0);
+}
+
+static void
+t3_send_reset_synqe(struct toedev *tod, struct synq_entry *synqe)
+{
+ struct cpl_abort_req *req;
+ unsigned int tid = synqe->tid;
+ struct adapter *sc = tod->tod_softc;
+ struct mbuf *m;
+#ifdef INVARIANTS
+ struct listen_ctx *lctx = synqe->lctx;
+ struct inpcb *inp = lctx->inp;
+#endif
+
+ INP_WLOCK_ASSERT(inp);
+
+ CTR4(KTR_CXGB, "%s: tid %d, synqe %p (%x)", __func__, tid, synqe,
+ synqe->flags);
+
+ if (synqe->flags & TP_ABORT_SHUTDOWN)
+ return;
+
+ synqe->flags |= (TP_ABORT_RPL_PENDING | TP_ABORT_SHUTDOWN);
+
+ m = M_GETHDR_OFLD(synqe->qset, CPL_PRIORITY_DATA, req);
+ if (m == NULL)
+ CXGB_UNIMPLEMENTED();
+
+ req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_REQ));
+ req->wr.wrh_lo = htonl(V_WR_TID(tid));
+ OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ABORT_REQ, tid));
+ req->rsvd0 = 0;
+ req->rsvd1 = !(synqe->flags & TP_DATASENT);
+ req->cmd = CPL_ABORT_SEND_RST;
+
+ l2t_send(sc, m, synqe->e);
+}
+
+void
+t3_offload_socket(struct toedev *tod, void *arg, struct socket *so)
+{
+ struct adapter *sc = tod->tod_softc;
+ struct tom_data *td = sc->tom_softc;
+ struct synq_entry *synqe = arg;
+#ifdef INVARIANTS
+ struct inpcb *inp = sotoinpcb(so);
+#endif
+ struct cpl_pass_establish *cpl = synqe->cpl;
+ struct toepcb *toep = synqe->toep;
+
+ INP_INFO_LOCK_ASSERT(&V_tcbinfo); /* prevents bad race with accept() */
+ INP_WLOCK_ASSERT(inp);
+
+ offload_socket(so, toep);
+ make_established(so, cpl->snd_isn, cpl->rcv_isn, cpl->tcp_opt);
+ update_tid(td, toep, synqe->tid);
+}
+#endif
diff --git a/sys/dev/cxgb/ulp/tom/cxgb_t3_ddp.h b/sys/dev/cxgb/ulp/tom/cxgb_t3_ddp.h
deleted file mode 100644
index d6f9804..0000000
--- a/sys/dev/cxgb/ulp/tom/cxgb_t3_ddp.h
+++ /dev/null
@@ -1,181 +0,0 @@
-/**************************************************************************
-
-Copyright (c) 2007, Chelsio Inc.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
- this list of conditions and the following disclaimer.
-
- 2. Neither the name of the Chelsio Corporation nor the names of its
- contributors may be used to endorse or promote products derived from
- this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-
-$FreeBSD$
-
-***************************************************************************/
-
-#ifndef T3_DDP_H
-#define T3_DDP_H
-
-/* Should be 1 or 2 indicating single or double kernel buffers. */
-#define NUM_DDP_KBUF 2
-
-/* min receive window for a connection to be considered for DDP */
-#define MIN_DDP_RCV_WIN (48 << 10)
-
-/* amount of Rx window not available to DDP to avoid window exhaustion */
-#define DDP_RSVD_WIN (16 << 10)
-
-/* # of sentinel invalid page pods at the end of a group of valid page pods */
-#define NUM_SENTINEL_PPODS 0
-
-/* # of pages a pagepod can hold without needing another pagepod */
-#define PPOD_PAGES 4
-
-/* page pods are allocated in groups of this size (must be power of 2) */
-#define PPOD_CLUSTER_SIZE 16
-
-/* for each TID we reserve this many page pods up front */
-#define RSVD_PPODS_PER_TID 1
-
-struct pagepod {
- uint32_t pp_vld_tid;
- uint32_t pp_pgsz_tag_color;
- uint32_t pp_max_offset;
- uint32_t pp_page_offset;
- uint64_t pp_rsvd;
- uint64_t pp_addr[5];
-};
-
-#define PPOD_SIZE sizeof(struct pagepod)
-
-#define S_PPOD_TID 0
-#define M_PPOD_TID 0xFFFFFF
-#define V_PPOD_TID(x) ((x) << S_PPOD_TID)
-
-#define S_PPOD_VALID 24
-#define V_PPOD_VALID(x) ((x) << S_PPOD_VALID)
-#define F_PPOD_VALID V_PPOD_VALID(1U)
-
-#define S_PPOD_COLOR 0
-#define M_PPOD_COLOR 0x3F
-#define V_PPOD_COLOR(x) ((x) << S_PPOD_COLOR)
-
-#define S_PPOD_TAG 6
-#define M_PPOD_TAG 0xFFFFFF
-#define V_PPOD_TAG(x) ((x) << S_PPOD_TAG)
-
-#define S_PPOD_PGSZ 30
-#define M_PPOD_PGSZ 0x3
-#define V_PPOD_PGSZ(x) ((x) << S_PPOD_PGSZ)
-
-#include <vm/vm.h>
-#include <vm/vm_page.h>
-#include <machine/bus.h>
-
-/* DDP gather lists can specify an offset only for the first page. */
-struct ddp_gather_list {
- unsigned int dgl_length;
- unsigned int dgl_offset;
- unsigned int dgl_nelem;
- vm_page_t dgl_pages[0];
-};
-
-struct ddp_buf_state {
- unsigned int cur_offset; /* offset of latest DDP notification */
- unsigned int flags;
- struct ddp_gather_list *gl;
-};
-
-struct ddp_state {
- struct ddp_buf_state buf_state[2]; /* per buffer state */
- int cur_buf;
- unsigned short kbuf_noinval;
- unsigned short kbuf_idx; /* which HW buffer is used for kbuf */
- struct ddp_gather_list *ubuf;
- int user_ddp_pending;
- unsigned int ubuf_nppods; /* # of page pods for buffer 1 */
- unsigned int ubuf_tag;
- unsigned int ubuf_ddp_ready;
- int cancel_ubuf;
- int get_tcb_count;
- unsigned int kbuf_posted;
- unsigned int kbuf_nppods[NUM_DDP_KBUF];
- unsigned int kbuf_tag[NUM_DDP_KBUF];
- struct ddp_gather_list *kbuf[NUM_DDP_KBUF]; /* kernel buffer for DDP prefetch */
-};
-
-/* buf_state flags */
-enum {
- DDP_BF_NOINVAL = 1 << 0, /* buffer is set to NO_INVALIDATE */
- DDP_BF_NOCOPY = 1 << 1, /* DDP to final dest, no copy needed */
- DDP_BF_NOFLIP = 1 << 2, /* buffer flips after GET_TCB_RPL */
- DDP_BF_PSH = 1 << 3, /* set in skb->flags if the a DDP was
- completed with a segment having the
- PSH flag set */
- DDP_BF_NODATA = 1 << 4, /* buffer completed before filling */
-};
-
-#include <ulp/tom/cxgb_toepcb.h>
-struct sockbuf;
-
-/*
- * Returns 1 if a UBUF DMA buffer might be active.
- */
-static inline int
-t3_ddp_ubuf_pending(struct toepcb *toep)
-{
- struct ddp_state *p = &toep->tp_ddp_state;
-
- /* When the TOM_TUNABLE(ddp) is enabled, we're always in ULP_MODE DDP,
- * but DDP_STATE() is only valid if the connection actually enabled
- * DDP.
- */
- if (p->kbuf[0] == NULL)
- return (0);
-
- return (p->buf_state[0].flags & (DDP_BF_NOFLIP | DDP_BF_NOCOPY)) ||
- (p->buf_state[1].flags & (DDP_BF_NOFLIP | DDP_BF_NOCOPY));
-}
-
-int t3_setup_ppods(struct toepcb *toep, const struct ddp_gather_list *gl,
- unsigned int nppods, unsigned int tag, unsigned int maxoff,
- unsigned int pg_off, unsigned int color);
-int t3_alloc_ppods(struct tom_data *td, unsigned int n, int *tag);
-void t3_free_ppods(struct tom_data *td, unsigned int tag, unsigned int n);
-void t3_free_ddp_gl(struct ddp_gather_list *gl);
-int t3_ddp_copy(const struct mbuf *m, int offset, struct uio *uio, int len);
-//void t3_repost_kbuf(struct socket *so, int modulate, int activate);
-void t3_post_kbuf(struct toepcb *toep, int modulate, int nonblock);
-int t3_post_ubuf(struct toepcb *toep, const struct uio *uio, int nonblock,
- int rcv_flags, int modulate, int post_kbuf);
-void t3_cancel_ubuf(struct toepcb *toep, struct sockbuf *rcv);
-int t3_overlay_ubuf(struct toepcb *toep, struct sockbuf *rcv,
- const struct uio *uio, int nonblock,
- int rcv_flags, int modulate, int post_kbuf);
-int t3_enter_ddp(struct toepcb *toep, unsigned int kbuf_size, unsigned int waitall, int nonblock);
-void t3_cleanup_ddp(struct toepcb *toep);
-void t3_release_ddp_resources(struct toepcb *toep);
-void t3_cancel_ddpbuf(struct toepcb *, unsigned int bufidx);
-void t3_overlay_ddpbuf(struct toepcb *, unsigned int bufidx, unsigned int tag0,
- unsigned int tag1, unsigned int len);
-void t3_setup_ddpbufs(struct toepcb *, unsigned int len0, unsigned int offset0,
- unsigned int len1, unsigned int offset1,
- uint64_t ddp_flags, uint64_t flag_mask, int modulate);
-#endif /* T3_DDP_H */
diff --git a/sys/dev/cxgb/ulp/tom/cxgb_tcp.h b/sys/dev/cxgb/ulp/tom/cxgb_tcp.h
deleted file mode 100644
index 3042ef0..0000000
--- a/sys/dev/cxgb/ulp/tom/cxgb_tcp.h
+++ /dev/null
@@ -1,47 +0,0 @@
-
-/*-
- * Copyright (c) 2007, Chelsio Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Neither the name of the Chelsio Corporation nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-#ifndef CXGB_TCP_H_
-#define CXGB_TCP_H_
-#ifdef TCP_USRREQS_OVERLOAD
-struct tcpcb *cxgb_tcp_drop(struct tcpcb *tp, int errno);
-#else
-#define cxgb_tcp_drop tcp_drop
-#endif
-void cxgb_tcp_ctlinput(int cmd, struct sockaddr *sa, void *vip);
-struct tcpcb *cxgb_tcp_close(struct tcpcb *tp);
-
-extern struct pr_usrreqs cxgb_tcp_usrreqs;
-#ifdef INET6
-extern struct pr_usrreqs cxgb_tcp6_usrreqs;
-#endif
-
-#include <sys/sysctl.h>
-SYSCTL_DECL(_net_inet_tcp_cxgb);
-#endif /* CXGB_TCP_H_ */
diff --git a/sys/dev/cxgb/ulp/tom/cxgb_tcp_offload.c b/sys/dev/cxgb/ulp/tom/cxgb_tcp_offload.c
deleted file mode 100644
index 6cb6107..0000000
--- a/sys/dev/cxgb/ulp/tom/cxgb_tcp_offload.c
+++ /dev/null
@@ -1,97 +0,0 @@
-/*-
- * Copyright (c) 2007, Chelsio Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Neither the name of the Chelsio Corporation nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-/*
- * grab bag of accessor routines that will either be moved to netinet
- * or removed
- */
-
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/types.h>
-#include <sys/malloc.h>
-#include <sys/kernel.h>
-#include <sys/sysctl.h>
-#include <sys/mbuf.h>
-#include <sys/sockopt.h>
-#include <sys/sockbuf.h>
-
-#include <sys/socket.h>
-
-#include <net/if.h>
-#include <net/if_types.h>
-#include <net/if_var.h>
-
-#include <netinet/in.h>
-#include <netinet/in_systm.h>
-#include <netinet/in_pcb.h>
-#include <netinet/tcp.h>
-#include <netinet/tcp_var.h>
-#include <netinet/tcp_offload.h>
-#include <netinet/tcp_syncache.h>
-#include <netinet/toedev.h>
-
-#include <ulp/tom/cxgb_tcp_offload.h>
-
-
-/*
- * This file contains code as a short-term staging area before it is moved in
- * to sys/netinet/tcp_offload.c
- */
-
-void
-sockbuf_lock(struct sockbuf *sb)
-{
-
- SOCKBUF_LOCK(sb);
-}
-
-void
-sockbuf_lock_assert(struct sockbuf *sb)
-{
-
- SOCKBUF_LOCK_ASSERT(sb);
-}
-
-void
-sockbuf_unlock(struct sockbuf *sb)
-{
-
- SOCKBUF_UNLOCK(sb);
-}
-
-int
-sockbuf_sbspace(struct sockbuf *sb)
-{
-
- return (sbspace(sb));
-}
-
diff --git a/sys/dev/cxgb/ulp/tom/cxgb_tcp_offload.h b/sys/dev/cxgb/ulp/tom/cxgb_tcp_offload.h
deleted file mode 100644
index 2b516d7..0000000
--- a/sys/dev/cxgb/ulp/tom/cxgb_tcp_offload.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/* $FreeBSD$ */
-
-#ifndef CXGB_TCP_OFFLOAD_H_
-#define CXGB_TCP_OFFLOAD_H_
-
-struct sockbuf;
-
-void sockbuf_lock(struct sockbuf *);
-void sockbuf_lock_assert(struct sockbuf *);
-void sockbuf_unlock(struct sockbuf *);
-int sockbuf_sbspace(struct sockbuf *);
-
-
-#endif /* CXGB_TCP_OFFLOAD_H_ */
diff --git a/sys/dev/cxgb/ulp/tom/cxgb_toepcb.h b/sys/dev/cxgb/ulp/tom/cxgb_toepcb.h
index 1b28e96..d0046c8 100644
--- a/sys/dev/cxgb/ulp/tom/cxgb_toepcb.h
+++ b/sys/dev/cxgb/ulp/tom/cxgb_toepcb.h
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2007-2008, Chelsio Inc.
+ * Copyright (c) 2007-2009, Chelsio Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -32,88 +32,63 @@
#include <sys/condvar.h>
#include <sys/mbufq.h>
+#define TP_DATASENT (1 << 0)
+#define TP_TX_WAIT_IDLE (1 << 1)
+#define TP_FIN_SENT (1 << 2)
+#define TP_ABORT_RPL_PENDING (1 << 3)
+#define TP_ABORT_SHUTDOWN (1 << 4)
+#define TP_ABORT_RPL_RCVD (1 << 5)
+#define TP_ABORT_REQ_RCVD (1 << 6)
+#define TP_ATTACHED (1 << 7)
+#define TP_CPL_DONE (1 << 8)
+#define TP_IS_A_SYNQ_ENTRY (1 << 9)
+#define TP_ABORT_RPL_SENT (1 << 10)
+#define TP_SEND_FIN (1 << 11)
+
struct toepcb {
- struct toedev *tp_toedev;
+ TAILQ_ENTRY(toepcb) link; /* toep_list */
+ int tp_flags;
+ struct toedev *tp_tod;
struct l2t_entry *tp_l2t;
- unsigned int tp_tid;
+ int tp_tid;
int tp_wr_max;
int tp_wr_avail;
int tp_wr_unacked;
int tp_delack_mode;
- int tp_mtu_idx;
int tp_ulp_mode;
- int tp_qset_idx;
- int tp_mss_clamp;
int tp_qset;
- int tp_flags;
- int tp_enqueued_bytes;
- int tp_page_count;
- int tp_state;
-
- tcp_seq tp_iss;
- tcp_seq tp_delack_seq;
- tcp_seq tp_rcv_wup;
- tcp_seq tp_copied_seq;
- uint64_t tp_write_seq;
+ int tp_enqueued;
+ int tp_rx_credits;
- volatile int tp_refcount;
- vm_page_t *tp_pages;
-
- struct tcpcb *tp_tp;
- struct mbuf *tp_m_last;
- bus_dma_tag_t tp_tx_dmat;
- bus_dma_tag_t tp_rx_dmat;
- bus_dmamap_t tp_dmamap;
+ struct inpcb *tp_inp;
+ struct mbuf *tp_m_last;
- LIST_ENTRY(toepcb) synq_entry;
struct mbuf_head wr_list;
struct mbuf_head out_of_order_queue;
- struct ddp_state tp_ddp_state;
- struct cv tp_cv;
-
};
static inline void
reset_wr_list(struct toepcb *toep)
{
-
mbufq_init(&toep->wr_list);
}
static inline void
-purge_wr_queue(struct toepcb *toep)
-{
- struct mbuf *m;
-
- while ((m = mbufq_dequeue(&toep->wr_list)) != NULL)
- m_freem(m);
-}
-
-static inline void
enqueue_wr(struct toepcb *toep, struct mbuf *m)
{
-
mbufq_tail(&toep->wr_list, m);
}
static inline struct mbuf *
peek_wr(const struct toepcb *toep)
{
-
return (mbufq_peek(&toep->wr_list));
}
static inline struct mbuf *
dequeue_wr(struct toepcb *toep)
{
-
return (mbufq_dequeue(&toep->wr_list));
}
-#define wr_queue_walk(toep, m) \
- for (m = peek_wr(toep); m; m = m->m_nextpkt)
-
-
-
#endif
-
diff --git a/sys/dev/cxgb/ulp/tom/cxgb_tom.c b/sys/dev/cxgb/ulp/tom/cxgb_tom.c
index 1328044..8f0dd25 100644
--- a/sys/dev/cxgb/ulp/tom/cxgb_tom.c
+++ b/sys/dev/cxgb/ulp/tom/cxgb_tom.c
@@ -1,261 +1,106 @@
-/**************************************************************************
-
-Copyright (c) 2007, Chelsio Inc.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
- this list of conditions and the following disclaimer.
-
- 2. Neither the name of the Chelsio Corporation nor the names of its
- contributors may be used to endorse or promote products derived from
- this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-***************************************************************************/
+/*-
+ * Copyright (c) 2012 Chelsio Communications, Inc.
+ * All rights reserved.
+ * Written by: Navdeep Parhar <np@FreeBSD.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#include "opt_inet.h"
+
#include <sys/param.h>
-#include <sys/systm.h>
+#include <sys/types.h>
#include <sys/kernel.h>
-#include <sys/fcntl.h>
-#include <sys/ktr.h>
-#include <sys/limits.h>
-#include <sys/lock.h>
-#include <sys/eventhandler.h>
-#include <sys/mbuf.h>
+#include <sys/queue.h>
+#include <sys/malloc.h>
#include <sys/module.h>
-#include <sys/condvar.h>
-#include <sys/mutex.h>
#include <sys/socket.h>
-#include <sys/sockopt.h>
-#include <sys/sockstate.h>
-#include <sys/sockbuf.h>
-#include <sys/syslog.h>
#include <sys/taskqueue.h>
-
-#include <net/if.h>
-#include <net/route.h>
-
#include <netinet/in.h>
-#include <netinet/in_pcb.h>
-#include <netinet/in_systm.h>
-#include <netinet/in_var.h>
-
-#include <cxgb_osdep.h>
-#include <sys/mbufq.h>
-
-#include <netinet/in_pcb.h>
-
-#include <ulp/tom/cxgb_tcp_offload.h>
#include <netinet/tcp.h>
-#include <netinet/tcp_var.h>
-#include <netinet/tcp_offload.h>
-#include <netinet/tcp_fsm.h>
-
-#include <cxgb_include.h>
-
-#include <net/if_vlan_var.h>
-#include <net/route.h>
-
-#include <t3cdev.h>
-#include <common/cxgb_firmware_exports.h>
-#include <common/cxgb_tcb.h>
-#include <cxgb_include.h>
-#include <common/cxgb_ctl_defs.h>
-#include <common/cxgb_t3_cpl.h>
-#include <cxgb_offload.h>
-#include <ulp/toecore/cxgb_toedev.h>
-#include <ulp/tom/cxgb_l2t.h>
-#include <ulp/tom/cxgb_tom.h>
-#include <ulp/tom/cxgb_defs.h>
-#include <ulp/tom/cxgb_t3_ddp.h>
-#include <ulp/tom/cxgb_toepcb.h>
-#include <ulp/tom/cxgb_tcp.h>
-
-
-TAILQ_HEAD(, adapter) adapter_list;
-static struct rwlock adapter_list_lock;
-
-static TAILQ_HEAD(, tom_data) cxgb_list;
-static struct mtx cxgb_list_lock;
-static const unsigned int MAX_ATIDS = 64 * 1024;
-static const unsigned int ATID_BASE = 0x100000;
-
-static int t3_toe_attach(struct toedev *dev, const struct offload_id *entry);
-static void cxgb_register_listeners(void);
-static void t3c_tom_add(struct t3cdev *cdev);
-
-/*
- * Handlers for each CPL opcode
- */
-static cxgb_cpl_handler_func tom_cpl_handlers[256];
-
-
-static eventhandler_tag listen_tag;
-
-static struct offload_id t3_toe_id_tab[] = {
- { TOE_ID_CHELSIO_T3, 0 },
- { TOE_ID_CHELSIO_T3B, 0 },
- { TOE_ID_CHELSIO_T3C, 0 },
- { 0 }
+#include <netinet/toecore.h>
+
+#ifdef TCP_OFFLOAD
+#include "cxgb_include.h"
+#include "ulp/tom/cxgb_tom.h"
+#include "ulp/tom/cxgb_l2t.h"
+#include "ulp/tom/cxgb_toepcb.h"
+
+MALLOC_DEFINE(M_CXGB, "cxgb", "Chelsio T3 Offload services");
+
+/* Module ops */
+static int t3_tom_mod_load(void);
+static int t3_tom_mod_unload(void);
+static int t3_tom_modevent(module_t, int, void *);
+
+/* ULD ops and helpers */
+static int t3_tom_activate(struct adapter *);
+static int t3_tom_deactivate(struct adapter *);
+
+static int alloc_tid_tabs(struct tid_info *, u_int, u_int, u_int, u_int, u_int);
+static void free_tid_tabs(struct tid_info *);
+static int write_smt_entry(struct adapter *, int);
+static void free_tom_data(struct tom_data *);
+
+static struct uld_info tom_uld_info = {
+ .uld_id = ULD_TOM,
+ .activate = t3_tom_activate,
+ .deactivate = t3_tom_deactivate,
};
-static struct tom_info t3_tom_info = {
- .ti_attach = t3_toe_attach,
- .ti_id_table = t3_toe_id_tab,
- .ti_name = "Chelsio-T3"
-};
-
-struct cxgb_client t3c_tom_client = {
- .name = "tom_cxgb3",
- .add = t3c_tom_add,
- .remove = NULL,
- .handlers = tom_cpl_handlers,
- .redirect = NULL
-};
-
-void
-cxgb_log_tcb(struct adapter *sc, unsigned int tid)
-{
-
- char buf[TCB_SIZE];
- uint64_t *tcb = (uint64_t *)buf;
- int i, error;
- struct mc7 *mem = &sc->cm;
-
- error = t3_mc7_bd_read(mem, tid*TCB_SIZE/8, TCB_SIZE/8, tcb);
- if (error)
- printf("cxgb_tcb_log failed\n");
-
-
- CTR1(KTR_CXGB, "TCB tid=%u", tid);
- for (i = 0; i < TCB_SIZE / 32; i++) {
-
- CTR5(KTR_CXGB, "%1d: %08x %08x %08x %08x",
- i, (uint32_t)tcb[1], (uint32_t)(tcb[1] >> 32),
- (uint32_t)tcb[0], (uint32_t)(tcb[0] >> 32));
-
- tcb += 2;
- CTR4(KTR_CXGB, " %08x %08x %08x %08x",
- (uint32_t)tcb[1], (uint32_t)(tcb[1] >> 32),
- (uint32_t)tcb[0], (uint32_t)(tcb[0] >> 32));
- tcb += 2;
- }
-}
-
-/*
- * Add an skb to the deferred skb queue for processing from process context.
- */
-void
-t3_defer_reply(struct mbuf *m, struct toedev *dev, defer_handler_t handler)
-{
- struct tom_data *td = TOM_DATA(dev);
-
- m_set_handler(m, handler);
- mtx_lock(&td->deferq.lock);
-
- mbufq_tail(&td->deferq, m);
- if (mbufq_len(&td->deferq) == 1)
- taskqueue_enqueue(td->tq, &td->deferq_task);
- mtx_lock(&td->deferq.lock);
-}
-
struct toepcb *
-toepcb_alloc(void)
+toepcb_alloc(struct toedev *tod)
{
struct toepcb *toep;
-
- toep = malloc(sizeof(struct toepcb), M_CXGB, M_NOWAIT|M_ZERO);
-
+
+ toep = malloc(sizeof(struct toepcb), M_CXGB, M_NOWAIT | M_ZERO);
if (toep == NULL)
return (NULL);
- toepcb_init(toep);
- return (toep);
-}
+ toep->tp_tod = tod;
+ toep->tp_wr_max = toep->tp_wr_avail = 15;
+ toep->tp_wr_unacked = 0;
+ toep->tp_delack_mode = 0;
-void
-toepcb_init(struct toepcb *toep)
-{
- toep->tp_refcount = 1;
- cv_init(&toep->tp_cv, "toep cv");
-}
-
-void
-toepcb_hold(struct toepcb *toep)
-{
- atomic_add_acq_int(&toep->tp_refcount, 1);
+ return (toep);
}
void
-toepcb_release(struct toepcb *toep)
+toepcb_free(struct toepcb *toep)
{
- if (toep->tp_refcount == 1) {
- free(toep, M_CXGB);
- return;
- }
- atomic_add_acq_int(&toep->tp_refcount, -1);
-}
-
-
-/*
- * Add a T3 offload device to the list of devices we are managing.
- */
-static void
-t3cdev_add(struct tom_data *t)
-{
- mtx_lock(&cxgb_list_lock);
- TAILQ_INSERT_TAIL(&cxgb_list, t, entry);
- mtx_unlock(&cxgb_list_lock);
-}
-
-static inline int
-cdev2type(struct t3cdev *cdev)
-{
- int type = 0;
-
- switch (cdev->type) {
- case T3A:
- type = TOE_ID_CHELSIO_T3;
- break;
- case T3B:
- type = TOE_ID_CHELSIO_T3B;
- break;
- case T3C:
- type = TOE_ID_CHELSIO_T3C;
- break;
- }
- return (type);
+ free(toep, M_CXGB);
}
-/*
- * Allocate and initialize the TID tables. Returns 0 on success.
- */
static int
-init_tid_tabs(struct tid_info *t, unsigned int ntids,
- unsigned int natids, unsigned int nstids,
- unsigned int atid_base, unsigned int stid_base)
+alloc_tid_tabs(struct tid_info *t, u_int ntids, u_int natids, u_int nstids,
+ u_int atid_base, u_int stid_base)
{
unsigned long size = ntids * sizeof(*t->tid_tab) +
natids * sizeof(*t->atid_tab) + nstids * sizeof(*t->stid_tab);
- t->tid_tab = cxgb_alloc_mem(size);
+ t->tid_tab = malloc(size, M_CXGB, M_NOWAIT | M_ZERO);
if (!t->tid_tab)
return (ENOMEM);
@@ -270,8 +115,8 @@ init_tid_tabs(struct tid_info *t, unsigned int ntids,
t->afree = NULL;
t->stids_in_use = t->atids_in_use = 0;
t->tids_in_use = 0;
- mtx_init(&t->stid_lock, "stid", NULL, MTX_DUPOK|MTX_DEF);
- mtx_init(&t->atid_lock, "atid", NULL, MTX_DUPOK|MTX_DEF);
+ mtx_init(&t->stid_lock, "stid", NULL, MTX_DEF);
+ mtx_init(&t->atid_lock, "atid", NULL, MTX_DEF);
/*
* Setup the free lists for stid_tab and atid_tab.
@@ -286,1240 +131,266 @@ init_tid_tabs(struct tid_info *t, unsigned int ntids,
t->atid_tab[natids - 1].next = &t->atid_tab[natids];
t->afree = t->atid_tab;
}
- return 0;
+ return (0);
}
static void
-free_tid_maps(struct tid_info *t)
+free_tid_tabs(struct tid_info *t)
{
- mtx_destroy(&t->stid_lock);
- mtx_destroy(&t->atid_lock);
- cxgb_free_mem(t->tid_tab);
+ if (mtx_initialized(&t->stid_lock))
+ mtx_destroy(&t->stid_lock);
+ if (mtx_initialized(&t->atid_lock))
+ mtx_destroy(&t->atid_lock);
+ free(t->tid_tab, M_CXGB);
}
-static inline void
-add_adapter(adapter_t *adap)
-{
- rw_wlock(&adapter_list_lock);
- TAILQ_INSERT_TAIL(&adapter_list, adap, adapter_entry);
- rw_wunlock(&adapter_list_lock);
-}
-
-static inline void
-remove_adapter(adapter_t *adap)
-{
- rw_wlock(&adapter_list_lock);
- TAILQ_REMOVE(&adapter_list, adap, adapter_entry);
- rw_wunlock(&adapter_list_lock);
-}
-
-/*
- * Populate a TID_RELEASE WR. The mbuf must be already propely sized.
- */
-static inline void
-mk_tid_release(struct mbuf *m, unsigned int tid)
-{
- struct cpl_tid_release *req;
-
- m_set_priority(m, CPL_PRIORITY_SETUP);
- req = mtod(m, struct cpl_tid_release *);
- m->m_pkthdr.len = m->m_len = sizeof(*req);
- req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
- OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_TID_RELEASE, tid));
-}
-
-static void
-t3_process_tid_release_list(void *data, int pending)
+static int
+write_smt_entry(struct adapter *sc, int idx)
{
+ struct port_info *pi = &sc->port[idx];
+ struct cpl_smt_write_req *req;
struct mbuf *m;
- struct t3cdev *tdev = data;
- struct t3c_data *td = T3C_DATA (tdev);
-
- mtx_lock(&td->tid_release_lock);
- while (td->tid_release_list) {
- struct toe_tid_entry *p = td->tid_release_list;
-
- td->tid_release_list = (struct toe_tid_entry *)p->ctx;
- mtx_unlock(&td->tid_release_lock);
- m = m_get(M_WAIT, MT_DATA);
- mk_tid_release(m, p - td->tid_maps.tid_tab);
- cxgb_ofld_send(tdev, m);
- p->ctx = NULL;
- mtx_lock(&td->tid_release_lock);
- }
- mtx_unlock(&td->tid_release_lock);
-}
-int
-cxgb_offload_activate(struct adapter *adapter)
-{
- struct t3cdev *dev = &adapter->tdev;
- int natids, err;
- struct t3c_data *t;
- struct tid_range stid_range, tid_range;
- struct mtutab mtutab;
- unsigned int l2t_capacity;
-
- t = malloc(sizeof(*t), M_CXGB, M_NOWAIT|M_ZERO);
- if (!t)
+ m = M_GETHDR_OFLD(0, CPL_PRIORITY_CONTROL, req);
+ if (m == NULL) {
+ log(LOG_ERR, "%s: no mbuf, can't write SMT entry for %d\n",
+ __func__, idx);
return (ENOMEM);
- dev->adapter = adapter;
-
- err = (EOPNOTSUPP);
- if (dev->ctl(dev, GET_TX_MAX_CHUNK, &t->tx_max_chunk) < 0 ||
- dev->ctl(dev, GET_MAX_OUTSTANDING_WR, &t->max_wrs) < 0 ||
- dev->ctl(dev, GET_L2T_CAPACITY, &l2t_capacity) < 0 ||
- dev->ctl(dev, GET_MTUS, &mtutab) < 0 ||
- dev->ctl(dev, GET_TID_RANGE, &tid_range) < 0 ||
- dev->ctl(dev, GET_STID_RANGE, &stid_range) < 0) {
- device_printf(adapter->dev, "%s: dev->ctl check failed\n", __FUNCTION__);
- goto out_free;
}
-
- err = (ENOMEM);
- L2DATA(dev) = t3_init_l2t(l2t_capacity);
- if (!L2DATA(dev)) {
- device_printf(adapter->dev, "%s: t3_init_l2t failed\n", __FUNCTION__);
- goto out_free;
- }
- natids = min(tid_range.num / 2, MAX_ATIDS);
- err = init_tid_tabs(&t->tid_maps, tid_range.num, natids,
- stid_range.num, ATID_BASE, stid_range.base);
- if (err) {
- device_printf(adapter->dev, "%s: init_tid_tabs failed\n", __FUNCTION__);
- goto out_free_l2t;
- }
-
- t->mtus = mtutab.mtus;
- t->nmtus = mtutab.size;
-
- TASK_INIT(&t->tid_release_task, 0 /* XXX? */, t3_process_tid_release_list, dev);
- mtx_init(&t->tid_release_lock, "tid release", NULL, MTX_DUPOK|MTX_DEF);
- t->dev = dev;
-
- T3C_DATA (dev) = t;
- dev->recv = process_rx;
- dev->arp_update = t3_l2t_update;
- /* Register netevent handler once */
- if (TAILQ_EMPTY(&adapter_list)) {
-#if defined(CONFIG_CHELSIO_T3_MODULE)
- if (prepare_arp_with_t3core())
- log(LOG_ERR, "Unable to set offload capabilities\n");
-#endif
- }
- CTR1(KTR_CXGB, "adding adapter %p", adapter);
- add_adapter(adapter);
- device_printf(adapter->dev, "offload started\n");
- adapter->flags |= CXGB_OFLD_INIT;
- return (0);
-
-out_free_l2t:
- t3_free_l2t(L2DATA(dev));
- L2DATA(dev) = NULL;
-out_free:
- free(t, M_CXGB);
- return (err);
-}
-
-void
-cxgb_offload_deactivate(struct adapter *adapter)
-{
- struct t3cdev *tdev = &adapter->tdev;
- struct t3c_data *t = T3C_DATA(tdev);
-
- printf("removing adapter %p\n", adapter);
- remove_adapter(adapter);
- if (TAILQ_EMPTY(&adapter_list)) {
-#if defined(CONFIG_CHELSIO_T3_MODULE)
- restore_arp_sans_t3core();
-#endif
- }
- free_tid_maps(&t->tid_maps);
- T3C_DATA(tdev) = NULL;
- t3_free_l2t(L2DATA(tdev));
- L2DATA(tdev) = NULL;
- mtx_destroy(&t->tid_release_lock);
- free(t, M_CXGB);
-}
-
-/*
- * Sends an sk_buff to a T3C driver after dealing with any active network taps.
- */
-int
-cxgb_ofld_send(struct t3cdev *dev, struct mbuf *m)
-{
- int r;
-
- r = dev->send(dev, m);
- return r;
-}
-
-static struct ifnet *
-get_iff_from_mac(adapter_t *adapter, const uint8_t *mac, unsigned int vlan)
-{
- int i;
-
- for_each_port(adapter, i) {
-#ifdef notyet
- const struct vlan_group *grp;
-#endif
- const struct port_info *p = &adapter->port[i];
- struct ifnet *ifp = p->ifp;
-
- if (!memcmp(p->hw_addr, mac, ETHER_ADDR_LEN)) {
-#ifdef notyet
-
- if (vlan && vlan != EVL_VLID_MASK) {
- grp = p->vlan_grp;
- dev = grp ? grp->vlan_devices[vlan] : NULL;
- } else
- while (dev->master)
- dev = dev->master;
-#endif
- return (ifp);
- }
- }
- return (NULL);
-}
-static inline void
-failover_fixup(adapter_t *adapter, int port)
-{
- if (adapter->params.rev == 0) {
- struct ifnet *ifp = adapter->port[port].ifp;
- struct cmac *mac = &adapter->port[port].mac;
- if (!(ifp->if_flags & IFF_UP)) {
- /* Failover triggered by the interface ifdown */
- t3_write_reg(adapter, A_XGM_TX_CTRL + mac->offset,
- F_TXEN);
- t3_read_reg(adapter, A_XGM_TX_CTRL + mac->offset);
- } else {
- /* Failover triggered by the interface link down */
- t3_write_reg(adapter, A_XGM_RX_CTRL + mac->offset, 0);
- t3_read_reg(adapter, A_XGM_RX_CTRL + mac->offset);
- t3_write_reg(adapter, A_XGM_RX_CTRL + mac->offset,
- F_RXEN);
- }
- }
-}
+ req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
+ OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, idx));
+ req->mtu_idx = NMTUS - 1; /* should be 0 but there's a T3 bug */
+ req->iff = idx;
+ memset(req->src_mac1, 0, sizeof(req->src_mac1));
+ memcpy(req->src_mac0, pi->hw_addr, ETHER_ADDR_LEN);
-static int
-cxgb_ulp_iscsi_ctl(adapter_t *adapter, unsigned int req, void *data)
-{
- int ret = 0;
- struct ulp_iscsi_info *uiip = data;
-
- switch (req) {
- case ULP_ISCSI_GET_PARAMS:
- uiip->llimit = t3_read_reg(adapter, A_ULPRX_ISCSI_LLIMIT);
- uiip->ulimit = t3_read_reg(adapter, A_ULPRX_ISCSI_ULIMIT);
- uiip->tagmask = t3_read_reg(adapter, A_ULPRX_ISCSI_TAGMASK);
- /*
- * On tx, the iscsi pdu has to be <= tx page size and has to
- * fit into the Tx PM FIFO.
- */
- uiip->max_txsz = min(adapter->params.tp.tx_pg_size,
- t3_read_reg(adapter, A_PM1_TX_CFG) >> 17);
- /* on rx, the iscsi pdu has to be < rx page size and the
- whole pdu + cpl headers has to fit into one sge buffer */
- /* also check the max rx data length programmed in TP */
- uiip->max_rxsz = min(uiip->max_rxsz,
- ((t3_read_reg(adapter, A_TP_PARA_REG2))
- >> S_MAXRXDATA) & M_MAXRXDATA);
- break;
- case ULP_ISCSI_SET_PARAMS:
- t3_write_reg(adapter, A_ULPRX_ISCSI_TAGMASK, uiip->tagmask);
- break;
- default:
- ret = (EOPNOTSUPP);
- }
- return ret;
-}
-
-/* Response queue used for RDMA events. */
-#define ASYNC_NOTIF_RSPQ 0
-
-static int
-cxgb_rdma_ctl(adapter_t *adapter, unsigned int req, void *data)
-{
- int ret = 0;
-
- switch (req) {
- case RDMA_GET_PARAMS: {
- struct rdma_info *req = data;
-
- req->udbell_physbase = rman_get_start(adapter->udbs_res);
- req->udbell_len = rman_get_size(adapter->udbs_res);
- req->tpt_base = t3_read_reg(adapter, A_ULPTX_TPT_LLIMIT);
- req->tpt_top = t3_read_reg(adapter, A_ULPTX_TPT_ULIMIT);
- req->pbl_base = t3_read_reg(adapter, A_ULPTX_PBL_LLIMIT);
- req->pbl_top = t3_read_reg(adapter, A_ULPTX_PBL_ULIMIT);
- req->rqt_base = t3_read_reg(adapter, A_ULPRX_RQ_LLIMIT);
- req->rqt_top = t3_read_reg(adapter, A_ULPRX_RQ_ULIMIT);
- req->kdb_addr = (void *)((unsigned long)rman_get_virtual(adapter->regs_res) + A_SG_KDOORBELL); break;
- }
- case RDMA_CQ_OP: {
- struct rdma_cq_op *req = data;
-
- /* may be called in any context */
- mtx_lock_spin(&adapter->sge.reg_lock);
- ret = t3_sge_cqcntxt_op(adapter, req->id, req->op,
- req->credits);
- mtx_unlock_spin(&adapter->sge.reg_lock);
- break;
- }
- case RDMA_GET_MEM: {
- struct ch_mem_range *t = data;
- struct mc7 *mem;
-
- if ((t->addr & 7) || (t->len & 7))
- return (EINVAL);
- if (t->mem_id == MEM_CM)
- mem = &adapter->cm;
- else if (t->mem_id == MEM_PMRX)
- mem = &adapter->pmrx;
- else if (t->mem_id == MEM_PMTX)
- mem = &adapter->pmtx;
- else
- return (EINVAL);
-
- ret = t3_mc7_bd_read(mem, t->addr/8, t->len/8, (u64 *)t->buf);
- if (ret)
- return (ret);
- break;
- }
- case RDMA_CQ_SETUP: {
- struct rdma_cq_setup *req = data;
-
- mtx_lock_spin(&adapter->sge.reg_lock);
- ret = t3_sge_init_cqcntxt(adapter, req->id, req->base_addr,
- req->size, ASYNC_NOTIF_RSPQ,
- req->ovfl_mode, req->credits,
- req->credit_thres);
- mtx_unlock_spin(&adapter->sge.reg_lock);
- break;
- }
- case RDMA_CQ_DISABLE:
- mtx_lock_spin(&adapter->sge.reg_lock);
- ret = t3_sge_disable_cqcntxt(adapter, *(unsigned int *)data);
- mtx_unlock_spin(&adapter->sge.reg_lock);
- break;
- case RDMA_CTRL_QP_SETUP: {
- struct rdma_ctrlqp_setup *req = data;
-
- mtx_lock_spin(&adapter->sge.reg_lock);
- ret = t3_sge_init_ecntxt(adapter, FW_RI_SGEEC_START, 0,
- SGE_CNTXT_RDMA, ASYNC_NOTIF_RSPQ,
- req->base_addr, req->size,
- FW_RI_TID_START, 1, 0);
- mtx_unlock_spin(&adapter->sge.reg_lock);
- break;
- }
- default:
- ret = EOPNOTSUPP;
- }
- return (ret);
-}
+ t3_offload_tx(sc, m);
-static int
-cxgb_offload_ctl(struct t3cdev *tdev, unsigned int req, void *data)
-{
- struct adapter *adapter = tdev2adap(tdev);
- struct tid_range *tid;
- struct mtutab *mtup;
- struct iff_mac *iffmacp;
- struct ddp_params *ddpp;
- struct adap_ports *ports;
- struct ofld_page_info *rx_page_info;
- struct tp_params *tp = &adapter->params.tp;
- int port;
-
- switch (req) {
- case GET_MAX_OUTSTANDING_WR:
- *(unsigned int *)data = FW_WR_NUM;
- break;
- case GET_WR_LEN:
- *(unsigned int *)data = WR_FLITS;
- break;
- case GET_TX_MAX_CHUNK:
- *(unsigned int *)data = 1 << 20; /* 1MB */
- break;
- case GET_TID_RANGE:
- tid = data;
- tid->num = t3_mc5_size(&adapter->mc5) -
- adapter->params.mc5.nroutes -
- adapter->params.mc5.nfilters -
- adapter->params.mc5.nservers;
- tid->base = 0;
- break;
- case GET_STID_RANGE:
- tid = data;
- tid->num = adapter->params.mc5.nservers;
- tid->base = t3_mc5_size(&adapter->mc5) - tid->num -
- adapter->params.mc5.nfilters -
- adapter->params.mc5.nroutes;
- break;
- case GET_L2T_CAPACITY:
- *(unsigned int *)data = 2048;
- break;
- case GET_MTUS:
- mtup = data;
- mtup->size = NMTUS;
- mtup->mtus = adapter->params.mtus;
- break;
- case GET_IFF_FROM_MAC:
- iffmacp = data;
- iffmacp->dev = get_iff_from_mac(adapter, iffmacp->mac_addr,
- iffmacp->vlan_tag & EVL_VLID_MASK);
- break;
- case GET_DDP_PARAMS:
- ddpp = data;
- ddpp->llimit = t3_read_reg(adapter, A_ULPRX_TDDP_LLIMIT);
- ddpp->ulimit = t3_read_reg(adapter, A_ULPRX_TDDP_ULIMIT);
- ddpp->tag_mask = t3_read_reg(adapter, A_ULPRX_TDDP_TAGMASK);
- break;
- case GET_PORTS:
- ports = data;
- ports->nports = adapter->params.nports;
- for_each_port(adapter, port)
- ports->lldevs[port] = adapter->port[port].ifp;
- break;
- case FAILOVER:
- port = *(int *)data;
- t3_port_failover(adapter, port);
- failover_fixup(adapter, port);
- break;
- case FAILOVER_DONE:
- port = *(int *)data;
- t3_failover_done(adapter, port);
- break;
- case FAILOVER_CLEAR:
- t3_failover_clear(adapter);
- break;
- case GET_RX_PAGE_INFO:
- rx_page_info = data;
- rx_page_info->page_size = tp->rx_pg_size;
- rx_page_info->num = tp->rx_num_pgs;
- break;
- case ULP_ISCSI_GET_PARAMS:
- case ULP_ISCSI_SET_PARAMS:
- if (!offload_running(adapter))
- return (EAGAIN);
- return cxgb_ulp_iscsi_ctl(adapter, req, data);
- case RDMA_GET_PARAMS:
- case RDMA_CQ_OP:
- case RDMA_CQ_SETUP:
- case RDMA_CQ_DISABLE:
- case RDMA_CTRL_QP_SETUP:
- case RDMA_GET_MEM:
- if (!offload_running(adapter))
- return (EAGAIN);
- return cxgb_rdma_ctl(adapter, req, data);
- default:
- return (EOPNOTSUPP);
- }
- return 0;
+ return (0);
}
-/*
- * Allocate a TOM data structure,
- * initialize its cpl_handlers
- * and register it as a T3C client
- */
static void
-t3c_tom_add(struct t3cdev *cdev)
+free_tom_data(struct tom_data *td)
{
- int i;
- unsigned int wr_len;
- struct tom_data *t;
- struct toedev *tdev;
- struct adap_ports *port_info;
-
- t = malloc(sizeof(*t), M_CXGB, M_NOWAIT|M_ZERO);
- if (t == NULL)
- return;
-
- cdev->send = t3_offload_tx;
- cdev->ctl = cxgb_offload_ctl;
-
- if (cdev->ctl(cdev, GET_WR_LEN, &wr_len) < 0)
- goto out_free_tom;
-
- port_info = malloc(sizeof(*port_info), M_CXGB, M_NOWAIT|M_ZERO);
- if (!port_info)
- goto out_free_tom;
-
- if (cdev->ctl(cdev, GET_PORTS, port_info) < 0)
- goto out_free_all;
-
- t3_init_wr_tab(wr_len);
- t->cdev = cdev;
- t->client = &t3c_tom_client;
-
- /* Register TCP offload device */
- tdev = &t->tdev;
- tdev->tod_ttid = cdev2type(cdev);
- tdev->tod_lldev = cdev->lldev;
-
- if (register_toedev(tdev, "toe%d")) {
- printf("unable to register offload device");
- goto out_free_all;
- }
- TOM_DATA(tdev) = t;
+ KASSERT(TAILQ_EMPTY(&td->toep_list),
+ ("%s: toep_list not empty", __func__));
- for (i = 0; i < port_info->nports; i++) {
- struct ifnet *ifp = port_info->lldevs[i];
- TOEDEV(ifp) = tdev;
-
- CTR1(KTR_TOM, "enabling toe on %p", ifp);
- ifp->if_capabilities |= IFCAP_TOE4;
- ifp->if_capenable |= IFCAP_TOE4;
- }
- t->ports = port_info;
+ if (td->listen_mask != 0)
+ hashdestroy(td->listen_hash, M_CXGB, td->listen_mask);
- /* Add device to the list of offload devices */
- t3cdev_add(t);
-
- /* Activate TCP offload device */
- cxgb_offload_activate(TOM_DATA(tdev)->cdev->adapter);
-
- activate_offload(tdev);
- cxgb_register_listeners();
- return;
-
-out_free_all:
- printf("out_free_all fail\n");
- free(port_info, M_CXGB);
-out_free_tom:
- printf("out_free_tom fail\n");
- free(t, M_CXGB);
- return;
-}
-
-
-
-static int
-do_act_open_rpl(struct t3cdev *dev, struct mbuf *m)
-{
- struct cpl_act_open_rpl *rpl = cplhdr(m);
- unsigned int atid = G_TID(ntohl(rpl->atid));
- struct toe_tid_entry *toe_tid;
-
- toe_tid = lookup_atid(&(T3C_DATA (dev))->tid_maps, atid);
- if (toe_tid->ctx && toe_tid->client && toe_tid->client->handlers &&
- toe_tid->client->handlers[CPL_ACT_OPEN_RPL]) {
- return toe_tid->client->handlers[CPL_ACT_OPEN_RPL] (dev, m,
- toe_tid->ctx);
- } else {
- log(LOG_ERR, "%s: received clientless CPL command 0x%x\n",
- dev->name, CPL_ACT_OPEN_RPL);
- return CPL_RET_BUF_DONE | CPL_RET_BAD_MSG;
- }
-}
-
-static int
-do_stid_rpl(struct t3cdev *dev, struct mbuf *m)
-{
- union opcode_tid *p = cplhdr(m);
- unsigned int stid = G_TID(ntohl(p->opcode_tid));
- struct toe_tid_entry *toe_tid;
-
- toe_tid = lookup_stid(&(T3C_DATA (dev))->tid_maps, stid);
- if (toe_tid->ctx && toe_tid->client->handlers &&
- toe_tid->client->handlers[p->opcode]) {
- return toe_tid->client->handlers[p->opcode] (dev, m, toe_tid->ctx);
- } else {
- log(LOG_ERR, "%s: received clientless CPL command 0x%x\n",
- dev->name, p->opcode);
- return CPL_RET_BUF_DONE | CPL_RET_BAD_MSG;
- }
-}
-
-static int
-do_hwtid_rpl(struct t3cdev *dev, struct mbuf *m)
-{
- union opcode_tid *p = cplhdr(m);
- unsigned int hwtid;
- struct toe_tid_entry *toe_tid;
-
- DPRINTF("do_hwtid_rpl opcode=0x%x\n", p->opcode);
- hwtid = G_TID(ntohl(p->opcode_tid));
-
- toe_tid = lookup_tid(&(T3C_DATA (dev))->tid_maps, hwtid);
- if (toe_tid->ctx && toe_tid->client->handlers &&
- toe_tid->client->handlers[p->opcode]) {
- return toe_tid->client->handlers[p->opcode]
- (dev, m, toe_tid->ctx);
- } else {
- log(LOG_ERR, "%s: received clientless CPL command 0x%x\n",
- dev->name, p->opcode);
- return CPL_RET_BUF_DONE | CPL_RET_BAD_MSG;
- }
-}
-
-static int
-do_cr(struct t3cdev *dev, struct mbuf *m)
-{
- struct cpl_pass_accept_req *req = cplhdr(m);
- unsigned int stid = G_PASS_OPEN_TID(ntohl(req->tos_tid));
- struct toe_tid_entry *toe_tid;
-
- toe_tid = lookup_stid(&(T3C_DATA (dev))->tid_maps, stid);
- if (toe_tid->ctx && toe_tid->client->handlers &&
- toe_tid->client->handlers[CPL_PASS_ACCEPT_REQ]) {
- return toe_tid->client->handlers[CPL_PASS_ACCEPT_REQ]
- (dev, m, toe_tid->ctx);
- } else {
- log(LOG_ERR, "%s: received clientless CPL command 0x%x\n",
- dev->name, CPL_PASS_ACCEPT_REQ);
- return CPL_RET_BUF_DONE | CPL_RET_BAD_MSG;
- }
-}
-
-static int
-do_abort_req_rss(struct t3cdev *dev, struct mbuf *m)
-{
- union opcode_tid *p = cplhdr(m);
- unsigned int hwtid = G_TID(ntohl(p->opcode_tid));
- struct toe_tid_entry *toe_tid;
-
- toe_tid = lookup_tid(&(T3C_DATA (dev))->tid_maps, hwtid);
- if (toe_tid->ctx && toe_tid->client->handlers &&
- toe_tid->client->handlers[p->opcode]) {
- return toe_tid->client->handlers[p->opcode]
- (dev, m, toe_tid->ctx);
- } else {
- struct cpl_abort_req_rss *req = cplhdr(m);
- struct cpl_abort_rpl *rpl;
-
- struct mbuf *m = m_get(M_NOWAIT, MT_DATA);
- if (!m) {
- log(LOG_NOTICE, "do_abort_req_rss: couldn't get mbuf!\n");
- goto out;
- }
-
- m_set_priority(m, CPL_PRIORITY_DATA);
- rpl = cplhdr(m);
- rpl->wr.wr_hi =
- htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_RPL));
- rpl->wr.wr_lo = htonl(V_WR_TID(GET_TID(req)));
- OPCODE_TID(rpl) =
- htonl(MK_OPCODE_TID(CPL_ABORT_RPL, GET_TID(req)));
- rpl->cmd = req->status;
- cxgb_ofld_send(dev, m);
- out:
- return (CPL_RET_BUF_DONE);
- }
-}
-
-static int
-do_act_establish(struct t3cdev *dev, struct mbuf *m)
-{
- struct cpl_act_establish *req;
- unsigned int atid;
- struct toe_tid_entry *toe_tid;
-
- req = cplhdr(m);
- atid = G_PASS_OPEN_TID(ntohl(req->tos_tid));
- toe_tid = lookup_atid(&(T3C_DATA (dev))->tid_maps, atid);
- if (toe_tid && toe_tid->ctx && toe_tid->client->handlers &&
- toe_tid->client->handlers[CPL_ACT_ESTABLISH]) {
-
- return toe_tid->client->handlers[CPL_ACT_ESTABLISH]
- (dev, m, toe_tid->ctx);
- } else {
-
- log(LOG_ERR, "%s: received clientless CPL command 0x%x\n",
- dev->name, CPL_ACT_ESTABLISH);
- return CPL_RET_BUF_DONE | CPL_RET_BAD_MSG;
- }
-}
-
-
-static int
-do_term(struct t3cdev *dev, struct mbuf *m)
-{
- unsigned int hwtid = ntohl(m_get_priority(m)) >> 8 & 0xfffff;
- unsigned int opcode = G_OPCODE(ntohl(m->m_pkthdr.csum_data));
- struct toe_tid_entry *toe_tid;
-
- toe_tid = lookup_tid(&(T3C_DATA (dev))->tid_maps, hwtid);
- if (toe_tid && toe_tid->ctx && toe_tid->client->handlers &&
- toe_tid->client->handlers[opcode]) {
- return toe_tid->client->handlers[opcode](dev, m, toe_tid->ctx);
- } else {
- log(LOG_ERR, "%s: received clientless CPL command 0x%x\n",
- dev->name, opcode);
- return CPL_RET_BUF_DONE | CPL_RET_BAD_MSG;
- }
- return (0);
-}
-
-/*
- * Process a received packet with an unknown/unexpected CPL opcode.
- */
-static int
-do_bad_cpl(struct t3cdev *cdev, struct mbuf *m, void *ctx)
-{
- log(LOG_ERR, "%s: received bad CPL command %u\n", cdev->name,
- 0xFF & *mtod(m, unsigned int *));
- return (CPL_RET_BUF_DONE | CPL_RET_BAD_MSG);
-}
-
-/*
- * Add a new handler to the CPL dispatch table. A NULL handler may be supplied
- * to unregister an existing handler.
- */
-void
-t3tom_register_cpl_handler(unsigned int opcode, cxgb_cpl_handler_func h)
-{
- if (opcode < UCHAR_MAX)
- tom_cpl_handlers[opcode] = h ? h : do_bad_cpl;
- else
- log(LOG_ERR, "Chelsio T3 TOM: handler registration for "
- "opcode %u failed\n", opcode);
+ if (mtx_initialized(&td->toep_list_lock))
+ mtx_destroy(&td->toep_list_lock);
+ if (mtx_initialized(&td->lctx_hash_lock))
+ mtx_destroy(&td->lctx_hash_lock);
+ if (mtx_initialized(&td->tid_release_lock))
+ mtx_destroy(&td->tid_release_lock);
+ if (td->l2t)
+ t3_free_l2t(td->l2t);
+ free_tid_tabs(&td->tid_maps);
+ free(td, M_CXGB);
}
/*
- * Make a preliminary determination if a connection can be offloaded. It's OK
- * to fail the offload later if we say we can offload here. For now this
- * always accepts the offload request unless there are IP options.
+ * Ground control to Major TOM
+ * Commencing countdown, engines on
*/
static int
-can_offload(struct toedev *dev, struct socket *so)
-{
- struct tom_data *tomd = TOM_DATA(dev);
- struct t3cdev *cdev = T3CDEV(dev->tod_lldev);
- struct tid_info *t = &(T3C_DATA(cdev))->tid_maps;
-
- return so_sotoinpcb(so)->inp_depend4.inp4_options == NULL &&
- tomd->conf.activated &&
- (tomd->conf.max_conn < 0 ||
- atomic_load_acq_int(&t->tids_in_use) + t->atids_in_use < tomd->conf.max_conn);
-}
-
-static int
-tom_ctl(struct toedev *dev, unsigned int req, void *data)
-{
- struct tom_data *t = TOM_DATA(dev);
- struct t3cdev *cdev = t->cdev;
-
- if (cdev->ctl)
- return cdev->ctl(cdev, req, data);
-
- return (EOPNOTSUPP);
-}
-
-/*
- * Free an active-open TID.
- */
-void *
-cxgb_free_atid(struct t3cdev *tdev, int atid)
-{
- struct tid_info *t = &(T3C_DATA(tdev))->tid_maps;
- union active_open_entry *p = atid2entry(t, atid);
- void *ctx = p->toe_tid.ctx;
-
- mtx_lock(&t->atid_lock);
- p->next = t->afree;
- t->afree = p;
- t->atids_in_use--;
- mtx_unlock(&t->atid_lock);
-
- return ctx;
-}
-
-/*
- * Free a server TID and return it to the free pool.
- */
-void
-cxgb_free_stid(struct t3cdev *tdev, int stid)
+t3_tom_activate(struct adapter *sc)
{
- struct tid_info *t = &(T3C_DATA (tdev))->tid_maps;
- union listen_entry *p = stid2entry(t, stid);
-
- mtx_lock(&t->stid_lock);
- p->next = t->sfree;
- t->sfree = p;
- t->stids_in_use--;
- mtx_unlock(&t->stid_lock);
-}
-
-/*
- * Free a server TID and return it to the free pool.
- */
-void *
-cxgb_get_lctx(struct t3cdev *tdev, int stid)
-{
- struct tid_info *t = &(T3C_DATA (tdev))->tid_maps;
- union listen_entry *p = stid2entry(t, stid);
+ struct tom_data *td;
+ struct toedev *tod;
+ int i, rc = 0;
+ struct mc5_params *mc5 = &sc->params.mc5;
+ u_int ntids, natids, mtus;
- return (p->toe_tid.ctx);
-}
-
-void
-cxgb_insert_tid(struct t3cdev *tdev, struct cxgb_client *client,
- void *ctx, unsigned int tid)
-{
- struct tid_info *t = &(T3C_DATA (tdev))->tid_maps;
-
- t->tid_tab[tid].client = client;
- t->tid_tab[tid].ctx = ctx;
- atomic_add_int(&t->tids_in_use, 1);
-}
-
-/* use ctx as a next pointer in the tid release list */
-void
-cxgb_queue_tid_release(struct t3cdev *tdev, unsigned int tid)
-{
- struct t3c_data *td = T3C_DATA (tdev);
- struct toe_tid_entry *p = &td->tid_maps.tid_tab[tid];
-
- CTR0(KTR_TOM, "queuing tid release\n");
-
- mtx_lock(&td->tid_release_lock);
- p->ctx = td->tid_release_list;
- td->tid_release_list = p;
-
- if (!p->ctx)
- taskqueue_enqueue(tdev->adapter->tq, &td->tid_release_task);
-
- mtx_unlock(&td->tid_release_lock);
-}
-
-/*
- * Remove a tid from the TID table. A client may defer processing its last
- * CPL message if it is locked at the time it arrives, and while the message
- * sits in the client's backlog the TID may be reused for another connection.
- * To handle this we atomically switch the TID association if it still points
- * to the original client context.
- */
-void
-cxgb_remove_tid(struct t3cdev *tdev, void *ctx, unsigned int tid)
-{
- struct tid_info *t = &(T3C_DATA (tdev))->tid_maps;
-
- if (tid >= t->ntids)
- panic("tid=%d >= t->ntids=%d", tid, t->ntids);
-
- if (tdev->type == T3A)
- atomic_cmpset_ptr((uintptr_t *)&t->tid_tab[tid].ctx, (long)NULL, (long)ctx);
- else {
- struct mbuf *m;
-
- m = m_get(M_NOWAIT, MT_DATA);
- if (__predict_true(m != NULL)) {
- mk_tid_release(m, tid);
- CTR1(KTR_CXGB, "releasing tid=%u", tid);
-
- cxgb_ofld_send(tdev, m);
- t->tid_tab[tid].ctx = NULL;
- } else
- cxgb_queue_tid_release(tdev, tid);
- }
- atomic_add_int(&t->tids_in_use, -1);
-}
+ ADAPTER_LOCK_ASSERT_OWNED(sc); /* for sc->flags */
-int
-cxgb_alloc_atid(struct t3cdev *tdev, struct cxgb_client *client,
- void *ctx)
-{
- int atid = -1;
- struct tid_info *t = &(T3C_DATA (tdev))->tid_maps;
-
- mtx_lock(&t->atid_lock);
- if (t->afree) {
- union active_open_entry *p = t->afree;
-
- atid = (p - t->atid_tab) + t->atid_base;
- t->afree = p->next;
- p->toe_tid.ctx = ctx;
- p->toe_tid.client = client;
- t->atids_in_use++;
- }
- mtx_unlock(&t->atid_lock);
- return atid;
-}
+ /* per-adapter softc for TOM */
+ td = malloc(sizeof(*td), M_CXGB, M_ZERO | M_NOWAIT);
+ if (td == NULL)
+ return (ENOMEM);
-int
-cxgb_alloc_stid(struct t3cdev *tdev, struct cxgb_client *client,
- void *ctx)
-{
- int stid = -1;
- struct tid_info *t = &(T3C_DATA (tdev))->tid_maps;
-
- mtx_lock(&t->stid_lock);
- if (t->sfree) {
- union listen_entry *p = t->sfree;
-
- stid = (p - t->stid_tab) + t->stid_base;
- t->sfree = p->next;
- p->toe_tid.ctx = ctx;
- p->toe_tid.client = client;
- t->stids_in_use++;
- }
- mtx_unlock(&t->stid_lock);
- return stid;
+ /* List of TOE PCBs and associated lock */
+ mtx_init(&td->toep_list_lock, "PCB list lock", NULL, MTX_DEF);
+ TAILQ_INIT(&td->toep_list);
+
+ /* Listen context */
+ mtx_init(&td->lctx_hash_lock, "lctx hash lock", NULL, MTX_DEF);
+ td->listen_hash = hashinit_flags(LISTEN_HASH_SIZE, M_CXGB,
+ &td->listen_mask, HASH_NOWAIT);
+
+ /* TID release task */
+ TASK_INIT(&td->tid_release_task, 0 , t3_process_tid_release_list, td);
+ mtx_init(&td->tid_release_lock, "tid release", NULL, MTX_DEF);
+
+ /* L2 table */
+ td->l2t = t3_init_l2t(L2T_SIZE);
+ if (td->l2t == NULL) {
+ rc = ENOMEM;
+ goto done;
+ }
+
+ /* TID tables */
+ ntids = t3_mc5_size(&sc->mc5) - mc5->nroutes - mc5->nfilters -
+ mc5->nservers;
+ natids = min(ntids / 2, 64 * 1024);
+ rc = alloc_tid_tabs(&td->tid_maps, ntids, natids, mc5->nservers,
+ 0x100000 /* ATID_BASE */, ntids);
+ if (rc != 0)
+ goto done;
+
+ /* CPL handlers */
+ t3_init_listen_cpl_handlers(sc);
+ t3_init_l2t_cpl_handlers(sc);
+ t3_init_cpl_io(sc);
+
+ /* toedev ops */
+ tod = &td->tod;
+ init_toedev(tod);
+ tod->tod_softc = sc;
+ tod->tod_connect = t3_connect;
+ tod->tod_listen_start = t3_listen_start;
+ tod->tod_listen_stop = t3_listen_stop;
+ tod->tod_rcvd = t3_rcvd;
+ tod->tod_output = t3_tod_output;
+ tod->tod_send_rst = t3_send_rst;
+ tod->tod_send_fin = t3_send_fin;
+ tod->tod_pcb_detach = t3_pcb_detach;
+ tod->tod_l2_update = t3_l2_update;
+ tod->tod_syncache_added = t3_syncache_added;
+ tod->tod_syncache_removed = t3_syncache_removed;
+ tod->tod_syncache_respond = t3_syncache_respond;
+ tod->tod_offload_socket = t3_offload_socket;
+
+ /* port MTUs */
+ mtus = sc->port[0].ifp->if_mtu;
+ if (sc->params.nports > 1)
+ mtus |= sc->port[1].ifp->if_mtu << 16;
+ t3_write_reg(sc, A_TP_MTU_PORT_TABLE, mtus);
+ t3_load_mtus(sc, sc->params.mtus, sc->params.a_wnd, sc->params.b_wnd,
+ sc->params.rev == 0 ? sc->port[0].ifp->if_mtu : 0xffff);
+
+ /* SMT entry for each port */
+ for_each_port(sc, i) {
+ write_smt_entry(sc, i);
+ TOEDEV(sc->port[i].ifp) = &td->tod;
+ }
+
+ /* Switch TP to offload mode */
+ t3_tp_set_offload_mode(sc, 1);
+
+ sc->tom_softc = td;
+ sc->flags |= TOM_INIT_DONE;
+ register_toedev(tod);
+
+done:
+ if (rc != 0)
+ free_tom_data(td);
+
+ return (rc);
}
-
static int
-is_offloading(struct ifnet *ifp)
-{
- struct adapter *adapter;
- int port;
-
- rw_rlock(&adapter_list_lock);
- TAILQ_FOREACH(adapter, &adapter_list, adapter_entry) {
- for_each_port(adapter, port) {
- if (ifp == adapter->port[port].ifp) {
- rw_runlock(&adapter_list_lock);
- return 1;
- }
- }
- }
- rw_runlock(&adapter_list_lock);
- return 0;
-}
-
-
-static void
-cxgb_arp_update_event(void *unused, struct rtentry *rt0,
- uint8_t *enaddr, struct sockaddr *sa)
-{
-
- if (!is_offloading(rt0->rt_ifp))
- return;
-
- RT_ADDREF(rt0);
- RT_UNLOCK(rt0);
- cxgb_neigh_update(rt0, enaddr, sa);
- RT_LOCK(rt0);
- RT_REMREF(rt0);
-}
-
-static void
-cxgb_redirect_event(void *unused, int event, struct rtentry *rt0,
- struct rtentry *rt1, struct sockaddr *sa)
-{
- /*
- * ignore events on non-offloaded interfaces
- */
- if (!is_offloading(rt0->rt_ifp))
- return;
-
- /*
- * Cannot redirect to non-offload device.
- */
- if (!is_offloading(rt1->rt_ifp)) {
- log(LOG_WARNING, "%s: Redirect to non-offload"
- "device ignored.\n", __FUNCTION__);
- return;
- }
-
- /*
- * avoid LORs by dropping the route lock but keeping a reference
- *
- */
- RT_ADDREF(rt0);
- RT_UNLOCK(rt0);
- RT_ADDREF(rt1);
- RT_UNLOCK(rt1);
-
- cxgb_redirect(rt0, rt1, sa);
- cxgb_neigh_update(rt1, NULL, sa);
-
- RT_LOCK(rt0);
- RT_REMREF(rt0);
- RT_LOCK(rt1);
- RT_REMREF(rt1);
-}
-
-void
-cxgb_neigh_update(struct rtentry *rt, uint8_t *enaddr, struct sockaddr *sa)
+t3_tom_deactivate(struct adapter *sc)
{
+ int rc = 0;
+ struct tom_data *td = sc->tom_softc;
- if (rt->rt_ifp && is_offloading(rt->rt_ifp) && (rt->rt_ifp->if_flags & IFCAP_TOE)) {
- struct t3cdev *tdev = T3CDEV(rt->rt_ifp);
+ ADAPTER_LOCK_ASSERT_OWNED(sc); /* for sc->flags */
- PANIC_IF(!tdev);
- t3_l2t_update(tdev, rt, enaddr, sa);
- }
-}
-
-static void
-set_l2t_ix(struct t3cdev *tdev, u32 tid, struct l2t_entry *e)
-{
- struct mbuf *m;
- struct cpl_set_tcb_field *req;
+ if (td == NULL)
+ return (0); /* XXX. KASSERT? */
- m = m_gethdr(M_NOWAIT, MT_DATA);
- if (!m) {
- log(LOG_ERR, "%s: cannot allocate mbuf!\n", __FUNCTION__);
- return;
- }
-
- m_set_priority(m, CPL_PRIORITY_CONTROL);
- req = mtod(m, struct cpl_set_tcb_field *);
- m->m_pkthdr.len = m->m_len = sizeof(*req);
-
- req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
- OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tid));
- req->reply = 0;
- req->cpu_idx = 0;
- req->word = htons(W_TCB_L2T_IX);
- req->mask = htobe64(V_TCB_L2T_IX(M_TCB_L2T_IX));
- req->val = htobe64(V_TCB_L2T_IX(e->idx));
- tdev->send(tdev, m);
-}
+ if (sc->offload_map != 0)
+ return (EBUSY); /* at least one port has IFCAP_TOE enabled */
-void
-cxgb_redirect(struct rtentry *old, struct rtentry *new, struct sockaddr *sa)
-{
- struct ifnet *olddev, *newdev;
- struct tid_info *ti;
- struct t3cdev *tdev;
- u32 tid;
- int update_tcb;
- struct l2t_entry *e;
- struct toe_tid_entry *te;
-
- olddev = old->rt_ifp;
- newdev = new->rt_ifp;
- if (!is_offloading(olddev))
- return;
- if (!is_offloading(newdev)) {
- log(LOG_WARNING, "%s: Redirect to non-offload"
- "device ignored.\n", __FUNCTION__);
- return;
- }
- tdev = T3CDEV(olddev);
- PANIC_IF(!tdev);
- if (tdev != T3CDEV(newdev)) {
- log(LOG_WARNING, "%s: Redirect to different "
- "offload device ignored.\n", __FUNCTION__);
- return;
- }
+ mtx_lock(&td->toep_list_lock);
+ if (!TAILQ_EMPTY(&td->toep_list))
+ rc = EBUSY;
+ mtx_unlock(&td->toep_list_lock);
- /* Add new L2T entry */
- e = t3_l2t_get(tdev, new, new->rt_ifp, sa);
- if (!e) {
- log(LOG_ERR, "%s: couldn't allocate new l2t entry!\n",
- __FUNCTION__);
- return;
- }
+ mtx_lock(&td->lctx_hash_lock);
+ if (td->lctx_count > 0)
+ rc = EBUSY;
+ mtx_unlock(&td->lctx_hash_lock);
- /* Walk tid table and notify clients of dst change. */
- ti = &(T3C_DATA (tdev))->tid_maps;
- for (tid=0; tid < ti->ntids; tid++) {
- te = lookup_tid(ti, tid);
- PANIC_IF(!te);
- if (te->ctx && te->client && te->client->redirect) {
- update_tcb = te->client->redirect(te->ctx, old, new,
- e);
- if (update_tcb) {
- l2t_hold(L2DATA(tdev), e);
- set_l2t_ix(tdev, tid, e);
- }
- }
+ if (rc == 0) {
+ unregister_toedev(&td->tod);
+ t3_tp_set_offload_mode(sc, 0);
+ free_tom_data(td);
+ sc->tom_softc = NULL;
+ sc->flags &= ~TOM_INIT_DONE;
}
- l2t_release(L2DATA(tdev), e);
-}
-/*
- * Initialize the CPL dispatch table.
- */
-static void
-init_cpl_handlers(void)
-{
- int i;
-
- for (i = 0; i < 256; ++i)
- tom_cpl_handlers[i] = do_bad_cpl;
-
- t3_init_listen_cpl_handlers();
+ return (rc);
}
static int
-t3_toe_attach(struct toedev *dev, const struct offload_id *entry)
-{
- struct tom_data *t = TOM_DATA(dev);
- struct t3cdev *cdev = t->cdev;
- struct ddp_params ddp;
- struct ofld_page_info rx_page_info;
- int err;
-
- t3_init_tunables(t);
- mtx_init(&t->listen_lock, "tom data listeners", NULL, MTX_DEF);
- CTR2(KTR_TOM, "t3_toe_attach dev=%p entry=%p", dev, entry);
-
- dev->tod_can_offload = can_offload;
- dev->tod_connect = t3_connect;
- dev->tod_ctl = tom_ctl;
-#if 0
- dev->tod_failover = t3_failover;
-#endif
- err = cdev->ctl(cdev, GET_DDP_PARAMS, &ddp);
- if (err)
- return err;
-
- err = cdev->ctl(cdev, GET_RX_PAGE_INFO, &rx_page_info);
- if (err)
- return err;
-
- t->ddp_llimit = ddp.llimit;
- t->ddp_ulimit = ddp.ulimit;
- t->pdev = ddp.pdev;
- t->rx_page_size = rx_page_info.page_size;
- /* OK if this fails, we just can't do DDP */
- t->nppods = (ddp.ulimit + 1 - ddp.llimit) / PPOD_SIZE;
- t->ppod_map = malloc(t->nppods, M_DEVBUF, M_NOWAIT|M_ZERO);
-
- mtx_init(&t->ppod_map_lock, "ppod map", NULL, MTX_DEF);
-
-
- t3_sysctl_register(cdev->adapter, &t->conf);
- return (0);
-}
-
-static void
-cxgb_toe_listen_start(void *unused, struct tcpcb *tp)
-{
- struct socket *so = inp_inpcbtosocket(tp->t_inpcb);
- struct tom_data *p;
-
- mtx_lock(&cxgb_list_lock);
- TAILQ_FOREACH(p, &cxgb_list, entry) {
- t3_listen_start(&p->tdev, so, p->cdev);
- }
- mtx_unlock(&cxgb_list_lock);
-}
-
-static void
-cxgb_toe_listen_stop(void *unused, struct tcpcb *tp)
+t3_tom_mod_load(void)
{
- struct socket *so = inp_inpcbtosocket(tp->t_inpcb);
- struct tom_data *p;
-
- mtx_lock(&cxgb_list_lock);
- TAILQ_FOREACH(p, &cxgb_list, entry) {
- if (tp->t_state == TCPS_LISTEN)
- t3_listen_stop(&p->tdev, so, p->cdev);
- }
- mtx_unlock(&cxgb_list_lock);
-}
+ int rc;
-static void
-cxgb_toe_listen_start_handler(struct inpcb *inp, void *arg)
-{
- struct tcpcb *tp = intotcpcb(inp);
+ rc = t3_register_uld(&tom_uld_info);
+ if (rc != 0)
+ t3_tom_mod_unload();
- if (tp->t_state == TCPS_LISTEN)
- cxgb_toe_listen_start(NULL, tp);
+ return (rc);
}
static void
-cxgb_register_listeners(void)
+tom_uninit(struct adapter *sc, void *arg __unused)
{
-
- inp_apply_all(cxgb_toe_listen_start_handler, NULL);
+ /* Try to free resources (works only if no port has IFCAP_TOE) */
+ ADAPTER_LOCK(sc);
+ if (sc->flags & TOM_INIT_DONE)
+ t3_deactivate_uld(sc, ULD_TOM);
+ ADAPTER_UNLOCK(sc);
}
static int
-t3_tom_init(void)
+t3_tom_mod_unload(void)
{
- init_cpl_handlers();
- if (t3_init_cpl_io() < 0) {
- log(LOG_ERR,
- "Unable to initialize cpl io ops\n");
- return -1;
- }
- t3_init_socket_ops();
+ t3_iterate(tom_uninit, NULL);
- /* Register with the TOE device layer. */
+ if (t3_unregister_uld(&tom_uld_info) == EBUSY)
+ return (EBUSY);
- if (register_tom(&t3_tom_info) != 0) {
- log(LOG_ERR,
- "Unable to register Chelsio T3 TCP offload module.\n");
- return -1;
- }
-
- rw_init(&adapter_list_lock, "ofld adap list");
- TAILQ_INIT(&adapter_list);
- EVENTHANDLER_REGISTER(route_arp_update_event, cxgb_arp_update_event,
- NULL, EVENTHANDLER_PRI_ANY);
- EVENTHANDLER_REGISTER(route_redirect_event, cxgb_redirect_event,
- NULL, EVENTHANDLER_PRI_ANY);
-
- mtx_init(&cxgb_list_lock, "cxgb tom list", NULL, MTX_DEF);
- listen_tag = EVENTHANDLER_REGISTER(tcp_offload_listen_start,
- cxgb_toe_listen_start, NULL, EVENTHANDLER_PRI_ANY);
- listen_tag = EVENTHANDLER_REGISTER(tcp_offload_listen_stop,
- cxgb_toe_listen_stop, NULL, EVENTHANDLER_PRI_ANY);
- TAILQ_INIT(&cxgb_list);
-
-
-
- t3_register_cpl_handler(CPL_PASS_OPEN_RPL, do_stid_rpl);
- t3_register_cpl_handler(CPL_CLOSE_LISTSRV_RPL, do_stid_rpl);
- t3_register_cpl_handler(CPL_PASS_ACCEPT_REQ, do_cr);
- t3_register_cpl_handler(CPL_PASS_ESTABLISH, do_hwtid_rpl);
- t3_register_cpl_handler(CPL_ABORT_RPL_RSS, do_hwtid_rpl);
- t3_register_cpl_handler(CPL_ABORT_RPL, do_hwtid_rpl);
- t3_register_cpl_handler(CPL_RX_URG_NOTIFY, do_hwtid_rpl);
- t3_register_cpl_handler(CPL_RX_DATA, do_hwtid_rpl);
- t3_register_cpl_handler(CPL_TX_DATA_ACK, do_hwtid_rpl);
- t3_register_cpl_handler(CPL_TX_DMA_ACK, do_hwtid_rpl);
- t3_register_cpl_handler(CPL_ACT_OPEN_RPL, do_act_open_rpl);
- t3_register_cpl_handler(CPL_PEER_CLOSE, do_hwtid_rpl);
- t3_register_cpl_handler(CPL_CLOSE_CON_RPL, do_hwtid_rpl);
- t3_register_cpl_handler(CPL_ABORT_REQ_RSS, do_abort_req_rss);
- t3_register_cpl_handler(CPL_ACT_ESTABLISH, do_act_establish);
- t3_register_cpl_handler(CPL_RDMA_TERMINATE, do_term);
- t3_register_cpl_handler(CPL_RDMA_EC_STATUS, do_hwtid_rpl);
- t3_register_cpl_handler(CPL_RX_DATA_DDP, do_hwtid_rpl);
- t3_register_cpl_handler(CPL_RX_DDP_COMPLETE, do_hwtid_rpl);
- t3_register_cpl_handler(CPL_ISCSI_HDR, do_hwtid_rpl);
- t3_register_cpl_handler(CPL_GET_TCB_RPL, do_hwtid_rpl);
- t3_register_cpl_handler(CPL_SET_TCB_RPL, do_hwtid_rpl);
-
- /* Register to offloading devices */
- cxgb_register_client(&t3c_tom_client);
-
return (0);
}
+#endif /* ifdef TCP_OFFLOAD */
static int
-t3_tom_load(module_t mod, int cmd, void *arg)
+t3_tom_modevent(module_t mod, int cmd, void *arg)
{
- int err = 0;
+ int rc = 0;
+#ifdef TCP_OFFLOAD
switch (cmd) {
case MOD_LOAD:
- t3_tom_init();
- break;
- case MOD_QUIESCE:
+ rc = t3_tom_mod_load();
break;
+
case MOD_UNLOAD:
- printf("uhm, ... unloading isn't really supported for toe\n");
- break;
- case MOD_SHUTDOWN:
+ rc = t3_tom_mod_unload();
break;
+
default:
- err = EOPNOTSUPP;
- break;
+ rc = EINVAL;
}
-
- return (err);
+#else
+ rc = EOPNOTSUPP;
+#endif
+ return (rc);
}
-static moduledata_t mod_data= {
+static moduledata_t t3_tom_moddata= {
"t3_tom",
- t3_tom_load,
+ t3_tom_modevent,
0
};
+
MODULE_VERSION(t3_tom, 1);
MODULE_DEPEND(t3_tom, toecore, 1, 1, 1);
-MODULE_DEPEND(t3_tom, if_cxgb, 1, 1, 1);
-DECLARE_MODULE(t3_tom, mod_data, SI_SUB_EXEC, SI_ORDER_ANY);
-
+MODULE_DEPEND(t3_tom, cxgbc, 1, 1, 1);
+DECLARE_MODULE(t3_tom, t3_tom_moddata, SI_SUB_EXEC, SI_ORDER_ANY);
diff --git a/sys/dev/cxgb/ulp/tom/cxgb_tom.h b/sys/dev/cxgb/ulp/tom/cxgb_tom.h
index 2f3201d..5cc29a8 100644
--- a/sys/dev/cxgb/ulp/tom/cxgb_tom.h
+++ b/sys/dev/cxgb/ulp/tom/cxgb_tom.h
@@ -1,7 +1,6 @@
-
/**************************************************************************
-Copyright (c) 2007, Chelsio Inc.
+Copyright (c) 2007, 2009 Chelsio Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -33,128 +32,248 @@ $FreeBSD$
#ifndef CXGB_TOM_H_
#define CXGB_TOM_H_
#include <sys/protosw.h>
-#include <netinet/toedev.h>
+#include <netinet/toecore.h>
-#define LISTEN_INFO_HASH_SIZE 32
+MALLOC_DECLARE(M_CXGB);
-struct listen_info {
- struct listen_info *next; /* Link to next entry */
- struct socket *so; /* The listening socket */
- unsigned int stid; /* The server TID */
-};
+#define KTR_CXGB KTR_SPARE3
+#define LISTEN_HASH_SIZE 32
/*
- * TOM tunable parameters. They can be manipulated through sysctl(2) or /proc.
+ * Holds the size, base address, free list start, etc of the TID, server TID,
+ * and active-open TID tables for a offload device.
+ * The tables themselves are allocated dynamically.
*/
-struct tom_tunables {
- int max_host_sndbuf; // max host RAM consumed by a sndbuf
- int tx_hold_thres; // push/pull threshold for non-full TX sk_buffs
- int max_wrs; // max # of outstanding WRs per connection
- int rx_credit_thres; // min # of RX credits needed for RX_DATA_ACK
- int cong_alg; // Congestion control algorithm
- int mss; // max TX_DATA WR payload size
- int delack; // delayed ACK control
- int max_conn; // maximum number of offloaded connections
- int soft_backlog_limit; // whether the listen backlog limit is soft
- int ddp; // whether to put new connections in DDP mode
- int ddp_thres; // min recvmsg size before activating DDP
- int ddp_copy_limit; // capacity of kernel DDP buffer
- int ddp_push_wait; // whether blocking DDP waits for PSH flag
- int ddp_rcvcoalesce; // whether receive coalescing is enabled
- int zcopy_sosend_enabled; // < is never zcopied
- int zcopy_sosend_partial_thres; // < is never zcopied
- int zcopy_sosend_partial_copy; // bytes copied in partial zcopy
- int zcopy_sosend_thres;// >= are mostly zcopied
- int zcopy_sosend_copy; // bytes coped in zcopied
- int zcopy_sosend_ret_pending_dma;// pot. return while pending DMA
- int activated; // TOE engine activation state
+struct tid_info {
+ void **tid_tab;
+ unsigned int ntids;
+ volatile unsigned int tids_in_use;
+
+ union listen_entry *stid_tab;
+ unsigned int nstids;
+ unsigned int stid_base;
+
+ union active_open_entry *atid_tab;
+ unsigned int natids;
+ unsigned int atid_base;
+
+ /*
+ * The following members are accessed R/W so we put them in their own
+ * cache lines. TOM_XXX: actually do what is said here.
+ *
+ * XXX We could combine the atid fields above with the lock here since
+ * atids are use once (unlike other tids). OTOH the above fields are
+ * usually in cache due to tid_tab.
+ */
+ struct mtx atid_lock;
+ union active_open_entry *afree;
+ unsigned int atids_in_use;
+
+ struct mtx stid_lock;
+ union listen_entry *sfree;
+ unsigned int stids_in_use;
};
struct tom_data {
- TAILQ_ENTRY(tom_data) entry;
-
- struct t3cdev *cdev;
- struct pci_dev *pdev;
- struct toedev tdev;
+ struct toedev tod;
+
+ /*
+ * toepcb's associated with this TOE device are either on the
+ * toep list or in the synq of a listening socket in lctx hash.
+ */
+ struct mtx toep_list_lock;
+ TAILQ_HEAD(, toepcb) toep_list;
- struct cxgb_client *client;
- struct tom_tunables conf;
- struct tom_sysctl_table *sysctl;
+ struct l2t_data *l2t;
+ struct tid_info tid_maps;
/*
- * The next three locks listen_lock, deferq.lock, and tid_release_lock
- * are used rarely so we let them potentially share a cacheline.
+ * The next two locks listen_lock, and tid_release_lock are used rarely
+ * so we let them potentially share a cacheline.
*/
- struct listen_info *listen_hash_tab[LISTEN_INFO_HASH_SIZE];
- struct mtx listen_lock;
+ LIST_HEAD(, listen_ctx) *listen_hash;
+ u_long listen_mask;
+ int lctx_count; /* # of lctx in the hash table */
+ struct mtx lctx_hash_lock;
- struct mbuf_head deferq;
- struct task deferq_task;
-
- struct socket **tid_release_list;
+ void **tid_release_list;
struct mtx tid_release_lock;
struct task tid_release_task;
+};
- volatile int tx_dma_pending;
-
- unsigned int ddp_llimit;
- unsigned int ddp_ulimit;
-
- unsigned int rx_page_size;
+struct synq_entry {
+ TAILQ_ENTRY(synq_entry) link; /* listen_ctx's synq link */
+ int flags; /* same as toepcb's tp_flags */
+ int tid;
+ struct mbuf *m; /* backpointer to containing mbuf */
+ struct listen_ctx *lctx; /* backpointer to listen ctx */
+ struct cpl_pass_establish *cpl;
+ struct toepcb *toep;
+ struct l2t_entry *e;
+ uint32_t iss;
+ uint32_t ts;
+ uint32_t opt0h;
+ uint32_t qset;
+ int rx_credits;
+ volatile u_int refcnt;
- u8 *ppod_map;
- unsigned int nppods;
- struct mtx ppod_map_lock;
-
- struct adap_ports *ports;
- struct taskqueue *tq;
+#define RPL_OK 0 /* ok to reply */
+#define RPL_DONE 1 /* replied already */
+#define RPL_DONT 2 /* don't reply */
+ volatile u_int reply; /* see above. */
};
+#define LCTX_RPL_PENDING 1 /* waiting for CPL_PASS_OPEN_RPL */
struct listen_ctx {
- struct socket *lso;
- struct tom_data *tom_data;
- int ulp_mode;
- LIST_HEAD(, toepcb) synq_head;
-
+ LIST_ENTRY(listen_ctx) link; /* listen hash linkage */
+ volatile int refcnt;
+ int stid;
+ int flags;
+ struct inpcb *inp; /* listening socket's inp */
+ int qset;
+ TAILQ_HEAD(, synq_entry) synq;
};
-#define TOM_DATA(dev) (*(struct tom_data **)&(dev)->tod_l4opt)
-#define T3C_DEV(sk) ((TOM_DATA(TOE_DEV(sk)))->cdev)
-#define TOEP_T3C_DEV(toep) (TOM_DATA(toep->tp_toedev)->cdev)
-#define TOM_TUNABLE(dev, param) (TOM_DATA(dev)->conf.param)
+void t3_process_tid_release_list(void *data, int pending);
+
+static inline struct tom_data *
+t3_tomdata(struct toedev *tod)
+{
+ return (member2struct(tom_data, tod, tod));
+}
+
+union listen_entry {
+ void *ctx;
+ union listen_entry *next;
+};
-#define TP_DATASENT (1 << 0)
-#define TP_TX_WAIT_IDLE (1 << 1)
-#define TP_FIN_SENT (1 << 2)
-#define TP_ABORT_RPL_PENDING (1 << 3)
-#define TP_ABORT_SHUTDOWN (1 << 4)
-#define TP_ABORT_RPL_RCVD (1 << 5)
-#define TP_ABORT_REQ_RCVD (1 << 6)
-#define TP_CLOSE_CON_REQUESTED (1 << 7)
-#define TP_SYN_RCVD (1 << 8)
-#define TP_ESTABLISHED (1 << 9)
+union active_open_entry {
+ void *ctx;
+ union active_open_entry *next;
+};
-void t3_init_tunables(struct tom_data *t);
+/*
+ * Map an ATID or STID to their entries in the corresponding TID tables.
+ */
+static inline union active_open_entry *atid2entry(const struct tid_info *t,
+ unsigned int atid)
+{
+ return &t->atid_tab[atid - t->atid_base];
+}
-void t3_sysctl_register(struct adapter *sc, const struct tom_tunables *p);
-static __inline struct mbuf *
-m_gethdr_nofail(int len)
+static inline union listen_entry *stid2entry(const struct tid_info *t,
+ unsigned int stid)
{
- struct mbuf *m;
-
- m = m_gethdr(M_NOWAIT, MT_DATA);
- if (m == NULL) {
- panic("implement lowmem cache\n");
- }
-
- KASSERT(len < MHLEN, ("requested header size too large for mbuf"));
- m->m_pkthdr.len = m->m_len = len;
- return (m);
+ return &t->stid_tab[stid - t->stid_base];
}
+/*
+ * Find the connection corresponding to a TID.
+ */
+static inline void *lookup_tid(const struct tid_info *t, unsigned int tid)
+{
+ void *p;
+
+ if (tid >= t->ntids)
+ return (NULL);
+
+ p = t->tid_tab[tid];
+ if (p < (void *)t->tid_tab || p >= (void *)&t->atid_tab[t->natids])
+ return (p);
+
+ return (NULL);
+}
+
+/*
+ * Find the connection corresponding to a server TID.
+ */
+static inline void *lookup_stid(const struct tid_info *t, unsigned int tid)
+{
+ void *p;
+
+ if (tid < t->stid_base || tid >= t->stid_base + t->nstids)
+ return (NULL);
+
+ p = stid2entry(t, tid)->ctx;
+ if (p < (void *)t->tid_tab || p >= (void *)&t->atid_tab[t->natids])
+ return (p);
+
+ return (NULL);
+}
+
+/*
+ * Find the connection corresponding to an active-open TID.
+ */
+static inline void *lookup_atid(const struct tid_info *t, unsigned int tid)
+{
+ void *p;
+
+ if (tid < t->atid_base || tid >= t->atid_base + t->natids)
+ return (NULL);
+
+ p = atid2entry(t, tid)->ctx;
+ if (p < (void *)t->tid_tab || p >= (void *)&t->atid_tab[t->natids])
+ return (p);
+
+ return (NULL);
+}
+
+static inline uint32_t
+calc_opt2(int cpu_idx)
+{
+ uint32_t opt2 = F_CPU_INDEX_VALID | V_CPU_INDEX(cpu_idx);
+
+ /* 3 = highspeed CC algorithm */
+ opt2 |= V_FLAVORS_VALID(1) | V_CONG_CONTROL_FLAVOR(3) |
+ V_PACING_FLAVOR(1);
+
+ /* coalesce and push bit semantics */
+ opt2 |= F_RX_COALESCE_VALID | V_RX_COALESCE(3);
+
+ return (htobe32(opt2));
+}
+
+/* cxgb_tom.c */
+struct toepcb *toepcb_alloc(struct toedev *);
+void toepcb_free(struct toepcb *);
+
+/* cxgb_cpl_io.c */
+void t3_init_cpl_io(struct adapter *);
+int t3_push_frames(struct socket *, int);
+int t3_connect(struct toedev *, struct socket *, struct rtentry *,
+ struct sockaddr *);
+int t3_tod_output(struct toedev *, struct tcpcb *);
+int t3_send_rst(struct toedev *, struct tcpcb *);
+int t3_send_fin(struct toedev *, struct tcpcb *);
+void insert_tid(struct tom_data *, void *, unsigned int);
+void update_tid(struct tom_data *, void *, unsigned int);
+void remove_tid(struct tom_data *, unsigned int);
+uint32_t calc_opt0h(struct socket *, int, int, struct l2t_entry *);
+uint32_t calc_opt0l(struct socket *, int);
+void queue_tid_release(struct toedev *, unsigned int);
+void offload_socket(struct socket *, struct toepcb *);
+void undo_offload_socket(struct socket *);
+int select_rcv_wscale(void);
+unsigned long select_rcv_wnd(struct socket *);
+int find_best_mtu_idx(struct adapter *, struct in_conninfo *, int);
+void make_established(struct socket *, uint32_t, uint32_t, uint16_t);
+void t3_rcvd(struct toedev *, struct tcpcb *);
+void t3_pcb_detach(struct toedev *, struct tcpcb *);
+void send_abort_rpl(struct toedev *, int, int);
+void release_tid(struct toedev *, unsigned int, int);
+/* cxgb_listen.c */
+void t3_init_listen_cpl_handlers(struct adapter *);
+int t3_listen_start(struct toedev *, struct tcpcb *);
+int t3_listen_stop(struct toedev *, struct tcpcb *);
+void t3_syncache_added(struct toedev *, void *);
+void t3_syncache_removed(struct toedev *, void *);
+int t3_syncache_respond(struct toedev *, void *, struct mbuf *);
+int do_abort_req_synqe(struct sge_qset *, struct rsp_desc *, struct mbuf *);
+int do_abort_rpl_synqe(struct sge_qset *, struct rsp_desc *, struct mbuf *);
+void t3_offload_socket(struct toedev *, void *, struct socket *);
#endif
diff --git a/sys/dev/cxgb/ulp/tom/cxgb_tom_sysctl.c b/sys/dev/cxgb/ulp/tom/cxgb_tom_sysctl.c
deleted file mode 100644
index 926b445..0000000
--- a/sys/dev/cxgb/ulp/tom/cxgb_tom_sysctl.c
+++ /dev/null
@@ -1,140 +0,0 @@
-/**************************************************************************
-
-Copyright (c) 2007, Chelsio Inc.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
- this list of conditions and the following disclaimer.
-
- 2. Neither the name of the Chelsio Corporation nor the names of its
- contributors may be used to endorse or promote products derived from
- this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-***************************************************************************/
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/kernel.h>
-#include <sys/fcntl.h>
-#include <sys/limits.h>
-#include <sys/lock.h>
-#include <sys/mbuf.h>
-#include <sys/module.h>
-#include <sys/mutex.h>
-
-#include <sys/sockopt.h>
-#include <sys/sockstate.h>
-#include <sys/sockbuf.h>
-#include <sys/socket.h>
-#include <sys/sysctl.h>
-
-#include <sys/syslog.h>
-
-#include <net/if.h>
-#include <net/route.h>
-
-#include <netinet/in.h>
-#include <netinet/in_pcb.h>
-#include <netinet/in_systm.h>
-#include <netinet/in_var.h>
-
-#include <cxgb_osdep.h>
-#include <sys/mbufq.h>
-
-#include <netinet/tcp.h>
-#include <netinet/tcp_var.h>
-#include <netinet/tcp_fsm.h>
-#include <net/route.h>
-
-#include <t3cdev.h>
-#include <common/cxgb_firmware_exports.h>
-#include <common/cxgb_tcb.h>
-#include <common/cxgb_ctl_defs.h>
-#include <common/cxgb_t3_cpl.h>
-#include <cxgb_offload.h>
-#include <cxgb_include.h>
-#include <ulp/toecore/cxgb_toedev.h>
-#include <ulp/tom/cxgb_tom.h>
-#include <ulp/tom/cxgb_defs.h>
-#include <ulp/tom/cxgb_t3_ddp.h>
-
-/* Avoid clutter in the hw.* space, keep all toe tunables within hw.cxgb */
-SYSCTL_DECL(_hw_cxgb);
-static SYSCTL_NODE(_hw_cxgb, OID_AUTO, toe, CTLFLAG_RD, 0, "TOE parameters");
-
-static struct tom_tunables default_tunable_vals = {
- .max_host_sndbuf = 32 * 1024,
- .tx_hold_thres = 0,
- .max_wrs = 15,
- .rx_credit_thres = 15 * 1024,
- .cong_alg = -1,
- .mss = 16384,
- .delack = 1,
- .max_conn = -1,
- .soft_backlog_limit = 0,
- .ddp = 1,
- .ddp_thres = 14 * 4096,
- .ddp_copy_limit = 13 * 4096,
- .ddp_push_wait = 1,
- .ddp_rcvcoalesce = 0,
- .zcopy_sosend_enabled = 0,
- .zcopy_sosend_partial_thres = 40960,
- .zcopy_sosend_partial_copy = 4096 * 3,
- .zcopy_sosend_thres = 128 * 1024,
- .zcopy_sosend_copy = 4096 * 2,
- .zcopy_sosend_ret_pending_dma = 1,
- .activated = 1,
-};
-
-static int activated = 1;
-TUNABLE_INT("hw.cxgb.toe.activated", &activated);
-SYSCTL_UINT(_hw_cxgb_toe, OID_AUTO, activated, CTLFLAG_RDTUN, &activated, 0,
- "enable TOE at init time");
-
-static int ddp = 1;
-TUNABLE_INT("hw.cxgb.toe.ddp", &ddp);
-SYSCTL_UINT(_hw_cxgb_toe, OID_AUTO, ddp, CTLFLAG_RDTUN, &ddp, 0, "enable DDP");
-
-void
-t3_init_tunables(struct tom_data *t)
-{
- t->conf = default_tunable_vals;
-
- /* Adjust tunables */
- t->conf.activated = activated;
- t->conf.ddp = ddp;
-
- /* Now apply device specific fixups. */
- t->conf.mss = T3C_DATA(t->cdev)->tx_max_chunk;
- t->conf.max_wrs = T3C_DATA(t->cdev)->max_wrs;
-}
-
-void
-t3_sysctl_register(struct adapter *sc, const struct tom_tunables *p)
-{
- struct sysctl_ctx_list *ctx;
- struct sysctl_oid_list *children;
-
- ctx = device_get_sysctl_ctx(sc->dev);
- children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
-
-}
-
OpenPOWER on IntegriCloud