diff options
author | np <np@FreeBSD.org> | 2015-06-06 09:28:40 +0000 |
---|---|---|
committer | np <np@FreeBSD.org> | 2015-06-06 09:28:40 +0000 |
commit | 0a68383238bfcad36a92d5dc886b5157d48d8d98 (patch) | |
tree | a7525a032926b9b6bb4956da8398985bdaf1b038 /sys/dev/cxgbe/adapter.h | |
parent | 988e96c82ac62dd0892f582b123c9f759c677d64 (diff) | |
download | FreeBSD-src-0a68383238bfcad36a92d5dc886b5157d48d8d98.zip FreeBSD-src-0a68383238bfcad36a92d5dc886b5157d48d8d98.tar.gz |
MFC r276480, r276485, r276498, r277225, r277226, r277227, r277230,
r277637, and r283149 (by emaste@).
r276485 is the real change here, the rest deal with the fallout of
mp_ring's reliance on 64b atomics.
Use the incorrectly spelled 'eigth' from struct pkthdr in this branch
instead of MFC'ing r261733, which would have renamed the field of a
public structure in a -STABLE branch.
---
r276480:
Temporarily unplug cxgbe(4) from !amd64 builds.
r276485:
cxgbe(4): major tx rework.
a) Front load as much work as possible in if_transmit, before any driver
lock or software queue has to get involved.
b) Replace buf_ring with a brand new mp_ring (multiproducer ring). This
is specifically for the tx multiqueue model where one of the if_transmit
producer threads becomes the consumer and other producers carry on as
usual. mp_ring is implemented as standalone code and it should be
possible to use it in any driver with tx multiqueue. It also has:
- the ability to enqueue/dequeue multiple items. This might become
significant if packet batching is ever implemented.
- an abdication mechanism to allow a thread to give up writing tx
descriptors and have another if_transmit thread take over. A thread
that's writing tx descriptors can end up doing so for an unbounded
time period if a) there are other if_transmit threads continuously
feeding the sofware queue, and b) the chip keeps up with whatever the
thread is throwing at it.
- accurate statistics about interesting events even when the stats come
at the expense of additional branches/conditional code.
The NIC txq lock is uncontested on the fast path at this point. I've
left it there for synchronization with the control events (interface
up/down, modload/unload).
c) Add support for "type 1" coalescing work request in the normal NIC tx
path. This work request is optimized for frames with a single item in
the DMA gather list. These are very common when forwarding packets.
Note that netmap tx in cxgbe already uses these "type 1" work requests.
d) Do not request automatic cidx updates every 32 descriptors. Instead,
request updates via bits in individual work requests (still every 32
descriptors approximately). Also, request an automatic final update
when the queue idles after activity. This means NIC tx reclaim is still
performed lazily but it will catch up quickly as soon as the queue
idles. This seems to be the best middle ground and I'll probably do
something similar for netmap tx as well.
e) Implement a faster tx path for WRQs (used by TOE tx and control
queues, _not_ by the normal NIC tx). Allow work requests to be written
directly to the hardware descriptor ring if room is available. I will
convert t4_tom and iw_cxgbe modules to this faster style gradually.
r276498:
cxgbe(4): remove buf_ring specific restriction on the txq size.
r277225:
Make cxgbe(4) buildable with the gcc in base.
r277226:
Allow cxgbe(4) to be built on i386. Driver attach will succeed only on
a subset of i386 systems.
r277227:
Plug cxgbe(4) back into !powerpc && !arm builds, instead of building it
on amd64 only.
r277230:
Build cxgbe(4) on powerpc64 too.
r277637:
Make sure the compiler flag to get cxgbe(4) to compile with gcc is used
only when gcc is being used. This is what r277225 should have been.
Diffstat (limited to 'sys/dev/cxgbe/adapter.h')
-rw-r--r-- | sys/dev/cxgbe/adapter.h | 116 |
1 files changed, 56 insertions, 60 deletions
diff --git a/sys/dev/cxgbe/adapter.h b/sys/dev/cxgbe/adapter.h index fc914fe..df15fcc 100644 --- a/sys/dev/cxgbe/adapter.h +++ b/sys/dev/cxgbe/adapter.h @@ -146,7 +146,8 @@ enum { CL_METADATA_SIZE = CACHE_LINE_SIZE, SGE_MAX_WR_NDESC = SGE_MAX_WR_LEN / EQ_ESIZE, /* max WR size in desc */ - TX_SGL_SEGS = 36, + TX_SGL_SEGS = 39, + TX_SGL_SEGS_TSO = 38, TX_WR_FLITS = SGE_MAX_WR_LEN / 8 }; @@ -266,6 +267,7 @@ struct port_info { struct timeval last_refreshed; struct port_stats stats; + u_int tx_parse_error; eventhandler_tag vlan_c; @@ -301,23 +303,9 @@ struct tx_desc { __be64 flit[8]; }; -struct tx_map { - struct mbuf *m; - bus_dmamap_t map; -}; - -/* DMA maps used for tx */ -struct tx_maps { - struct tx_map *maps; - uint32_t map_total; /* # of DMA maps */ - uint32_t map_pidx; /* next map to be used */ - uint32_t map_cidx; /* reclaimed up to this index */ - uint32_t map_avail; /* # of available maps */ -}; - struct tx_sdesc { + struct mbuf *m; /* m_nextpkt linked chain of frames */ uint8_t desc_used; /* # of hardware descriptors used by the WR */ - uint8_t credits; /* NIC txq: # of frames sent out in the WR */ }; @@ -371,16 +359,12 @@ struct sge_iq { enum { EQ_CTRL = 1, EQ_ETH = 2, -#ifdef TCP_OFFLOAD EQ_OFLD = 3, -#endif /* eq flags */ - EQ_TYPEMASK = 7, /* 3 lsbits hold the type */ - EQ_ALLOCATED = (1 << 3), /* firmware resources allocated */ - EQ_DOOMED = (1 << 4), /* about to be destroyed */ - EQ_CRFLUSHED = (1 << 5), /* expecting an update from SGE */ - EQ_STALLED = (1 << 6), /* out of hw descriptors or dmamaps */ + EQ_TYPEMASK = 0x3, /* 2 lsbits hold the type (see above) */ + EQ_ALLOCATED = (1 << 2), /* firmware resources allocated */ + EQ_ENABLED = (1 << 3), /* open for business */ }; /* Listed in order of preference. Update t4_sysctls too if you change these */ @@ -395,32 +379,25 @@ enum {DOORBELL_UDB, DOORBELL_WCWR, DOORBELL_UDBWC, DOORBELL_KDB}; struct sge_eq { unsigned int flags; /* MUST be first */ unsigned int cntxt_id; /* SGE context id for the eq */ - bus_dma_tag_t desc_tag; - bus_dmamap_t desc_map; - char lockname[16]; struct mtx eq_lock; struct tx_desc *desc; /* KVA of descriptor ring */ - bus_addr_t ba; /* bus address of descriptor ring */ - struct sge_qstat *spg; /* status page, for convenience */ uint16_t doorbells; volatile uint32_t *udb; /* KVA of doorbell (lies within BAR2) */ u_int udb_qid; /* relative qid within the doorbell page */ - uint16_t cap; /* max # of desc, for convenience */ - uint16_t avail; /* available descriptors, for convenience */ - uint16_t qsize; /* size (# of entries) of the queue */ + uint16_t sidx; /* index of the entry with the status page */ uint16_t cidx; /* consumer idx (desc idx) */ uint16_t pidx; /* producer idx (desc idx) */ - uint16_t pending; /* # of descriptors used since last doorbell */ + uint16_t equeqidx; /* EQUEQ last requested at this pidx */ + uint16_t dbidx; /* pidx of the most recent doorbell */ uint16_t iqid; /* iq that gets egr_update for the eq */ uint8_t tx_chan; /* tx channel used by the eq */ - struct task tx_task; - struct callout tx_callout; - - /* stats */ + volatile u_int equiq; /* EQUIQ outstanding */ - uint32_t egr_update; /* # of SGE_EGR_UPDATE notifications for eq */ - uint32_t unstalled; /* recovered from stall */ + bus_dma_tag_t desc_tag; + bus_dmamap_t desc_map; + bus_addr_t ba; /* bus address of descriptor ring */ + char lockname[16]; }; struct sw_zone_info { @@ -492,18 +469,19 @@ struct sge_fl { struct cluster_layout cll_alt; /* alternate refill zone, layout */ }; +struct mp_ring; + /* txq: SGE egress queue + what's needed for Ethernet NIC */ struct sge_txq { struct sge_eq eq; /* MUST be first */ struct ifnet *ifp; /* the interface this txq belongs to */ - bus_dma_tag_t tx_tag; /* tag for transmit buffers */ - struct buf_ring *br; /* tx buffer ring */ + struct mp_ring *r; /* tx software ring */ struct tx_sdesc *sdesc; /* KVA of software descriptor ring */ - struct mbuf *m; /* held up due to temporary resource shortage */ - - struct tx_maps txmaps; + struct sglist *gl; + __be32 cpl_ctrl0; /* for convenience */ + struct task tx_reclaim_task; /* stats for common events first */ uint64_t txcsum; /* # of times hardware assisted with checksum */ @@ -512,13 +490,12 @@ struct sge_txq { uint64_t imm_wrs; /* # of work requests with immediate data */ uint64_t sgl_wrs; /* # of work requests with direct SGL */ uint64_t txpkt_wrs; /* # of txpkt work requests (not coalesced) */ - uint64_t txpkts_wrs; /* # of coalesced tx work requests */ - uint64_t txpkts_pkts; /* # of frames in coalesced tx work requests */ + uint64_t txpkts0_wrs; /* # of type0 coalesced tx work requests */ + uint64_t txpkts1_wrs; /* # of type1 coalesced tx work requests */ + uint64_t txpkts0_pkts; /* # of frames in type0 coalesced tx WRs */ + uint64_t txpkts1_pkts; /* # of frames in type1 coalesced tx WRs */ /* stats for not-that-common events */ - - uint32_t no_dmamap; /* no DMA map to load the mbuf */ - uint32_t no_desc; /* out of hardware descriptors */ } __aligned(CACHE_LINE_SIZE); /* rxq: SGE ingress queue + SGE free list + miscellaneous items */ @@ -567,7 +544,13 @@ struct wrqe { STAILQ_ENTRY(wrqe) link; struct sge_wrq *wrq; int wr_len; - uint64_t wr[] __aligned(16); + char wr[] __aligned(16); +}; + +struct wrq_cookie { + TAILQ_ENTRY(wrq_cookie) link; + int ndesc; + int pidx; }; /* @@ -578,17 +561,32 @@ struct sge_wrq { struct sge_eq eq; /* MUST be first */ struct adapter *adapter; + struct task wrq_tx_task; + + /* Tx desc reserved but WR not "committed" yet. */ + TAILQ_HEAD(wrq_incomplete_wrs , wrq_cookie) incomplete_wrs; - /* List of WRs held up due to lack of tx descriptors */ + /* List of WRs ready to go out as soon as descriptors are available. */ STAILQ_HEAD(, wrqe) wr_list; + u_int nwr_pending; + u_int ndesc_needed; /* stats for common events first */ - uint64_t tx_wrs; /* # of tx work requests */ + uint64_t tx_wrs_direct; /* # of WRs written directly to desc ring. */ + uint64_t tx_wrs_ss; /* # of WRs copied from scratch space. */ + uint64_t tx_wrs_copied; /* # of WRs queued and copied to desc ring. */ /* stats for not-that-common events */ - uint32_t no_desc; /* out of hardware descriptors */ + /* + * Scratch space for work requests that wrap around after reaching the + * status page, and some infomation about the last WR that used it. + */ + uint16_t ss_pidx; + uint16_t ss_len; + uint8_t ss[SGE_MAX_WR_LEN]; + } __aligned(CACHE_LINE_SIZE); @@ -737,7 +735,7 @@ struct adapter { struct sge sge; int lro_timeout; - struct taskqueue *tq[NCHAN]; /* taskqueues that flush data out */ + struct taskqueue *tq[NCHAN]; /* General purpose taskqueues */ struct port_info *port[MAX_NPORTS]; uint8_t chan_map[NCHAN]; @@ -970,12 +968,11 @@ static inline int tx_resume_threshold(struct sge_eq *eq) { - return (eq->qsize / 4); + /* not quite the same as qsize / 4, but this will do. */ + return (eq->sidx / 4); } /* t4_main.c */ -void t4_tx_task(void *, int); -void t4_tx_callout(void *); int t4_os_find_pci_capability(struct adapter *, int); int t4_os_pci_save_state(struct adapter *); int t4_os_pci_restore_state(struct adapter *); @@ -1016,16 +1013,15 @@ int t4_setup_adapter_queues(struct adapter *); int t4_teardown_adapter_queues(struct adapter *); int t4_setup_port_queues(struct port_info *); int t4_teardown_port_queues(struct port_info *); -int t4_alloc_tx_maps(struct tx_maps *, bus_dma_tag_t, int, int); -void t4_free_tx_maps(struct tx_maps *, bus_dma_tag_t); void t4_intr_all(void *); void t4_intr(void *); void t4_intr_err(void *); void t4_intr_evt(void *); void t4_wrq_tx_locked(struct adapter *, struct sge_wrq *, struct wrqe *); -int t4_eth_tx(struct ifnet *, struct sge_txq *, struct mbuf *); void t4_update_fl_bufsize(struct ifnet *); -int can_resume_tx(struct sge_eq *); +int parse_pkt(struct mbuf **); +void *start_wrq_wr(struct sge_wrq *, int, struct wrq_cookie *); +void commit_wrq_wr(struct sge_wrq *, void *, struct wrq_cookie *); int tnl_cong(struct port_info *); /* t4_tracer.c */ |