From 8199d3a79c224bbe5943fa08684e1f93a17881b0 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Wed, 30 Mar 2005 13:34:31 -0800 Subject: [PATCH] A new 10GB Ethernet Driver by Chelsio Communications A Linux driver for the Chelsio 10Gb Ethernet Network Controller by Chelsio (http://www.chelsio.com). This driver supports the Chelsio N210 NIC and is backward compatible with the Chelsio N110 model 10Gb NICs. It supports AMD64, EM64T and x86 systems. Signed-off-by: Tina Yang Signed-off-by: Scott Bardone Signed-off-by: Christoph Lameter Adrian said: - my3126.c is unused (because t1_my3126_ops isn't used anywhere) - what are the EXTRA_CFLAGS in drivers/net/chelsio/Makefile for? - $(cxgb-y) in drivers/net/chelsio/Makefile seems to be unneeded - completely unused global functions: - espi.c: t1_espi_get_intr_counts - sge.c: t1_sge_get_intr_counts - the following functions can be made static: - sge.c: t1_espi_workaround - sge.c: t1_sge_tx - subr.c: __t1_tpi_read - subr.c: __t1_tpi_write - subr.c: t1_wait_op_done shemminger said: The performance recommendations in cxgb.txt are common to all fast devices, and should be in one file rather than just for this device. I would rather see ip-sysctl.txt updated or a new file on tuning recommendations started. Some of them have consequences that aren't documented well. For example, turning off TCP timestamps risks data corruption from sequence wrap. A new driver shouldn't need so may #ifdef's unless you want to putit on older vendor versions of 2.4 Some accessor and wrapper functions like: t1_pci_read_config_4 adapter_name t1_malloc are just annoying noise. Why have useless dead code like: /* Interrupt handler */ +static int pm3393_interrupt_handler(struct cmac *cmac) +{ + u32 master_intr_status; +/* + 1. Read master interrupt register. + 2. Read BLOCK's interrupt status registers. + 3. Handle BLOCK interrupts. +*/ Jeff said: step 1: kill all the OS wrappers. And do you really need hooks for multiple MACs, when only one MAC is really supported? Typically these hooks are at a higher level anyway -- struct net_device. From: Christoph Lameter Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton diff -puN /dev/null Documentation/networking/cxgb.txt --- drivers/net/chelsio/sge.c | 1451 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1451 insertions(+) create mode 100644 drivers/net/chelsio/sge.c (limited to 'drivers/net/chelsio/sge.c') diff --git a/drivers/net/chelsio/sge.c b/drivers/net/chelsio/sge.c new file mode 100644 index 0000000..bcf8b1e --- /dev/null +++ b/drivers/net/chelsio/sge.c @@ -0,0 +1,1451 @@ +/***************************************************************************** + * * + * File: sge.c * + * $Revision: 1.13 $ * + * $Date: 2005/03/23 07:41:27 $ * + * Description: * + * DMA engine. * + * part of the Chelsio 10Gb Ethernet Driver. * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License, version 2, as * + * published by the Free Software Foundation. * + * * + * You should have received a copy of the GNU General Public License along * + * with this program; if not, write to the Free Software Foundation, Inc., * + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * + * * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED * + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF * + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. * + * * + * http://www.chelsio.com * + * * + * Copyright (c) 2003 - 2005 Chelsio Communications, Inc. * + * All rights reserved. * + * * + * Maintainers: maintainers@chelsio.com * + * * + * Authors: Dimitrios Michailidis * + * Tina Yang * + * Felix Marti * + * Scott Bardone * + * Kurt Ottaway * + * Frank DiMambro * + * * + * History: * + * * + ****************************************************************************/ + +#include "common.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "cpl5_cmd.h" +#include "sge.h" +#include "regs.h" +#include "espi.h" + +#include + +#define SGE_CMDQ_N 2 +#define SGE_FREELQ_N 2 +#define SGE_CMDQ0_E_N 512 +#define SGE_CMDQ1_E_N 128 +#define SGE_FREEL_SIZE 4096 +#define SGE_JUMBO_FREEL_SIZE 512 +#define SGE_FREEL_REFILL_THRESH 16 +#define SGE_RESPQ_E_N 1024 +#define SGE_INTR_BUCKETSIZE 100 +#define SGE_INTR_LATBUCKETS 5 +#define SGE_INTR_MAXBUCKETS 11 +#define SGE_INTRTIMER0 1 +#define SGE_INTRTIMER1 50 +#define SGE_INTRTIMER_NRES 10000 +#define SGE_RX_COPY_THRESHOLD 256 +#define SGE_RX_SM_BUF_SIZE 1536 + +#define SGE_RESPQ_REPLENISH_THRES ((3 * SGE_RESPQ_E_N) / 4) + +#define SGE_RX_OFFSET 2 +#ifndef NET_IP_ALIGN +# define NET_IP_ALIGN SGE_RX_OFFSET +#endif + +/* + * Memory Mapped HW Command, Freelist and Response Queue Descriptors + */ +#if defined(__BIG_ENDIAN_BITFIELD) +struct cmdQ_e { + u32 AddrLow; + u32 GenerationBit : 1; + u32 BufferLength : 31; + u32 RespQueueSelector : 4; + u32 ResponseTokens : 12; + u32 CmdId : 8; + u32 Reserved : 3; + u32 TokenValid : 1; + u32 Eop : 1; + u32 Sop : 1; + u32 DataValid : 1; + u32 GenerationBit2 : 1; + u32 AddrHigh; +}; + +struct freelQ_e { + u32 AddrLow; + u32 GenerationBit : 1; + u32 BufferLength : 31; + u32 Reserved : 31; + u32 GenerationBit2 : 1; + u32 AddrHigh; +}; + +struct respQ_e { + u32 Qsleeping : 4; + u32 Cmdq1CreditReturn : 5; + u32 Cmdq1DmaComplete : 5; + u32 Cmdq0CreditReturn : 5; + u32 Cmdq0DmaComplete : 5; + u32 FreelistQid : 2; + u32 CreditValid : 1; + u32 DataValid : 1; + u32 Offload : 1; + u32 Eop : 1; + u32 Sop : 1; + u32 GenerationBit : 1; + u32 BufferLength; +}; + +#elif defined(__LITTLE_ENDIAN_BITFIELD) +struct cmdQ_e { + u32 BufferLength : 31; + u32 GenerationBit : 1; + u32 AddrLow; + u32 AddrHigh; + u32 GenerationBit2 : 1; + u32 DataValid : 1; + u32 Sop : 1; + u32 Eop : 1; + u32 TokenValid : 1; + u32 Reserved : 3; + u32 CmdId : 8; + u32 ResponseTokens : 12; + u32 RespQueueSelector : 4; +}; + +struct freelQ_e { + u32 BufferLength : 31; + u32 GenerationBit : 1; + u32 AddrLow; + u32 AddrHigh; + u32 GenerationBit2 : 1; + u32 Reserved : 31; +}; + +struct respQ_e { + u32 BufferLength; + u32 GenerationBit : 1; + u32 Sop : 1; + u32 Eop : 1; + u32 Offload : 1; + u32 DataValid : 1; + u32 CreditValid : 1; + u32 FreelistQid : 2; + u32 Cmdq0DmaComplete : 5; + u32 Cmdq0CreditReturn : 5; + u32 Cmdq1DmaComplete : 5; + u32 Cmdq1CreditReturn : 5; + u32 Qsleeping : 4; +} ; +#endif + +/* + * SW Context Command and Freelist Queue Descriptors + */ +struct cmdQ_ce { + struct sk_buff *skb; + DECLARE_PCI_UNMAP_ADDR(dma_addr); + DECLARE_PCI_UNMAP_LEN(dma_len); + unsigned int single; +}; + +struct freelQ_ce { + struct sk_buff *skb; + DECLARE_PCI_UNMAP_ADDR(dma_addr); + DECLARE_PCI_UNMAP_LEN(dma_len); +}; + +/* + * SW Command, Freelist and Response Queue + */ +struct cmdQ { + atomic_t asleep; /* HW DMA Fetch status */ + atomic_t credits; /* # available descriptors for TX */ + atomic_t pio_pidx; /* Variable updated on Doorbell */ + u16 entries_n; /* # descriptors for TX */ + u16 pidx; /* producer index (SW) */ + u16 cidx; /* consumer index (HW) */ + u8 genbit; /* current generation (=valid) bit */ + struct cmdQ_e *entries; /* HW command descriptor Q */ + struct cmdQ_ce *centries; /* SW command context descriptor Q */ + spinlock_t Qlock; /* Lock to protect cmdQ enqueuing */ + dma_addr_t dma_addr; /* DMA addr HW command descriptor Q */ +}; + +struct freelQ { + unsigned int credits; /* # of available RX buffers */ + unsigned int entries_n; /* free list capacity */ + u16 pidx; /* producer index (SW) */ + u16 cidx; /* consumer index (HW) */ + u16 rx_buffer_size; /* Buffer size on this free list */ + u16 dma_offset; /* DMA offset to align IP headers */ + u8 genbit; /* current generation (=valid) bit */ + struct freelQ_e *entries; /* HW freelist descriptor Q */ + struct freelQ_ce *centries; /* SW freelist conext descriptor Q */ + dma_addr_t dma_addr; /* DMA addr HW freelist descriptor Q */ +}; + +struct respQ { + u16 credits; /* # of available respQ descriptors */ + u16 credits_pend; /* # of not yet returned descriptors */ + u16 entries_n; /* # of response Q descriptors */ + u16 pidx; /* producer index (HW) */ + u16 cidx; /* consumer index (SW) */ + u8 genbit; /* current generation(=valid) bit */ + struct respQ_e *entries; /* HW response descriptor Q */ + dma_addr_t dma_addr; /* DMA addr HW response descriptor Q */ +}; + +/* + * Main SGE data structure + * + * Interrupts are handled by a single CPU and it is likely that on a MP system + * the application is migrated to another CPU. In that scenario, we try to + * seperate the RX(in irq context) and TX state in order to decrease memory + * contention. + */ +struct sge { + struct adapter *adapter; /* adapter backpointer */ + struct freelQ freelQ[SGE_FREELQ_N]; /* freelist Q(s) */ + struct respQ respQ; /* response Q instatiation */ + unsigned int rx_pkt_pad; /* RX padding for L2 packets */ + unsigned int jumbo_fl; /* jumbo freelist Q index */ + u32 intrtimer[SGE_INTR_MAXBUCKETS]; /* ! */ + u32 currIndex; /* current index into intrtimer[] */ + u32 intrtimer_nres; /* no resource interrupt timer value */ + u32 sge_control; /* shadow content of sge control reg */ + struct sge_intr_counts intr_cnt; + struct timer_list ptimer; + struct sk_buff *pskb; + u32 ptimeout; + struct cmdQ cmdQ[SGE_CMDQ_N] ____cacheline_aligned; /* command Q(s)*/ +}; + +static unsigned int t1_sge_tx(struct sk_buff *skb, struct adapter *adapter, + unsigned int qid); + +/* + * PIO to indicate that memory mapped Q contains valid descriptor(s). + */ +static inline void doorbell_pio(struct sge *sge, u32 val) +{ + wmb(); + t1_write_reg_4(sge->adapter, A_SG_DOORBELL, val); +} + +/* + * Disables the DMA engine. + */ +void t1_sge_stop(struct sge *sge) +{ + t1_write_reg_4(sge->adapter, A_SG_CONTROL, 0); + t1_read_reg_4(sge->adapter, A_SG_CONTROL); /* flush write */ + if (is_T2(sge->adapter)) + del_timer_sync(&sge->ptimer); +} + +static u8 ch_mac_addr[ETH_ALEN] = {0x0, 0x7, 0x43, 0x0, 0x0, 0x0}; +static void t1_espi_workaround(void *data) +{ + struct adapter *adapter = (struct adapter *)data; + struct sge *sge = adapter->sge; + + if (netif_running(adapter->port[0].dev) && + atomic_read(&sge->cmdQ[0].asleep)) { + + u32 seop = t1_espi_get_mon(adapter, 0x930, 0); + + if ((seop & 0xfff0fff) == 0xfff && sge->pskb) { + struct sk_buff *skb = sge->pskb; + if (!skb->cb[0]) { + memcpy(skb->data+sizeof(struct cpl_tx_pkt), ch_mac_addr, ETH_ALEN); + memcpy(skb->data+skb->len-10, ch_mac_addr, ETH_ALEN); + + skb->cb[0] = 0xff; + } + t1_sge_tx(skb, adapter,0); + } + } + mod_timer(&adapter->sge->ptimer, jiffies + sge->ptimeout); +} + +/* + * Enables the DMA engine. + */ +void t1_sge_start(struct sge *sge) +{ + t1_write_reg_4(sge->adapter, A_SG_CONTROL, sge->sge_control); + t1_read_reg_4(sge->adapter, A_SG_CONTROL); /* flush write */ + if (is_T2(sge->adapter)) { + init_timer(&sge->ptimer); + sge->ptimer.function = (void *)&t1_espi_workaround; + sge->ptimer.data = (unsigned long)sge->adapter; + sge->ptimer.expires = jiffies + sge->ptimeout; + add_timer(&sge->ptimer); + } +} + +/* + * Creates a t1_sge structure and returns suggested resource parameters. + */ +struct sge * __devinit t1_sge_create(struct adapter *adapter, + struct sge_params *p) +{ + struct sge *sge = kmalloc(sizeof(*sge), GFP_KERNEL); + + if (!sge) + return NULL; + memset(sge, 0, sizeof(*sge)); + + if (is_T2(adapter)) + sge->ptimeout = 1; /* finest allowed */ + + sge->adapter = adapter; + sge->rx_pkt_pad = t1_is_T1B(adapter) ? 0 : SGE_RX_OFFSET; + sge->jumbo_fl = t1_is_T1B(adapter) ? 1 : 0; + + p->cmdQ_size[0] = SGE_CMDQ0_E_N; + p->cmdQ_size[1] = SGE_CMDQ1_E_N; + p->freelQ_size[!sge->jumbo_fl] = SGE_FREEL_SIZE; + p->freelQ_size[sge->jumbo_fl] = SGE_JUMBO_FREEL_SIZE; + p->rx_coalesce_usecs = SGE_INTRTIMER1; + p->last_rx_coalesce_raw = SGE_INTRTIMER1 * + (board_info(sge->adapter)->clock_core / 1000000); + p->default_rx_coalesce_usecs = SGE_INTRTIMER1; + p->coalesce_enable = 0; /* Turn off adaptive algorithm by default */ + p->sample_interval_usecs = 0; + return sge; +} + +/* + * Frees all RX buffers on the freelist Q. The caller must make sure that + * the SGE is turned off before calling this function. + */ +static void free_freelQ_buffers(struct pci_dev *pdev, struct freelQ *Q) +{ + unsigned int cidx = Q->cidx, credits = Q->credits; + + while (credits--) { + struct freelQ_ce *ce = &Q->centries[cidx]; + + pci_unmap_single(pdev, pci_unmap_addr(ce, dma_addr), + pci_unmap_len(ce, dma_len), + PCI_DMA_FROMDEVICE); + dev_kfree_skb(ce->skb); + ce->skb = NULL; + if (++cidx == Q->entries_n) + cidx = 0; + } +} + +/* + * Free RX free list and response queue resources. + */ +static void free_rx_resources(struct sge *sge) +{ + struct pci_dev *pdev = sge->adapter->pdev; + unsigned int size, i; + + if (sge->respQ.entries) { + size = sizeof(struct respQ_e) * sge->respQ.entries_n; + pci_free_consistent(pdev, size, sge->respQ.entries, + sge->respQ.dma_addr); + } + + for (i = 0; i < SGE_FREELQ_N; i++) { + struct freelQ *Q = &sge->freelQ[i]; + + if (Q->centries) { + free_freelQ_buffers(pdev, Q); + kfree(Q->centries); + } + if (Q->entries) { + size = sizeof(struct freelQ_e) * Q->entries_n; + pci_free_consistent(pdev, size, Q->entries, + Q->dma_addr); + } + } +} + +/* + * Allocates basic RX resources, consisting of memory mapped freelist Qs and a + * response Q. + */ +static int alloc_rx_resources(struct sge *sge, struct sge_params *p) +{ + struct pci_dev *pdev = sge->adapter->pdev; + unsigned int size, i; + + for (i = 0; i < SGE_FREELQ_N; i++) { + struct freelQ *Q = &sge->freelQ[i]; + + Q->genbit = 1; + Q->entries_n = p->freelQ_size[i]; + Q->dma_offset = SGE_RX_OFFSET - sge->rx_pkt_pad; + size = sizeof(struct freelQ_e) * Q->entries_n; + Q->entries = (struct freelQ_e *) + pci_alloc_consistent(pdev, size, &Q->dma_addr); + if (!Q->entries) + goto err_no_mem; + memset(Q->entries, 0, size); + Q->centries = kcalloc(Q->entries_n, sizeof(struct freelQ_ce), + GFP_KERNEL); + if (!Q->centries) + goto err_no_mem; + } + + /* + * Calculate the buffer sizes for the two free lists. FL0 accommodates + * regular sized Ethernet frames, FL1 is sized not to exceed 16K, + * including all the sk_buff overhead. + * + * Note: For T2 FL0 and FL1 are reversed. + */ + sge->freelQ[!sge->jumbo_fl].rx_buffer_size = SGE_RX_SM_BUF_SIZE + + sizeof(struct cpl_rx_data) + + sge->freelQ[!sge->jumbo_fl].dma_offset; + sge->freelQ[sge->jumbo_fl].rx_buffer_size = (16 * 1024) - + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + + sge->respQ.genbit = 1; + sge->respQ.entries_n = SGE_RESPQ_E_N; + sge->respQ.credits = SGE_RESPQ_E_N; + size = sizeof(struct respQ_e) * sge->respQ.entries_n; + sge->respQ.entries = (struct respQ_e *) + pci_alloc_consistent(pdev, size, &sge->respQ.dma_addr); + if (!sge->respQ.entries) + goto err_no_mem; + memset(sge->respQ.entries, 0, size); + return 0; + +err_no_mem: + free_rx_resources(sge); + return -ENOMEM; +} + +/* + * Frees 'credits_pend' TX buffers and returns the credits to Q->credits. + * + * The adaptive algorithm receives the total size of the buffers freed + * accumulated in @*totpayload. No initialization of this argument here. + * + */ +static void free_cmdQ_buffers(struct sge *sge, struct cmdQ *Q, + unsigned int credits_pend, unsigned int *totpayload) +{ + struct pci_dev *pdev = sge->adapter->pdev; + struct sk_buff *skb; + struct cmdQ_ce *ce, *cq = Q->centries; + unsigned int entries_n = Q->entries_n, cidx = Q->cidx, + i = credits_pend; + + + ce = &cq[cidx]; + while (i--) { + if (ce->single) + pci_unmap_single(pdev, pci_unmap_addr(ce, dma_addr), + pci_unmap_len(ce, dma_len), + PCI_DMA_TODEVICE); + else + pci_unmap_page(pdev, pci_unmap_addr(ce, dma_addr), + pci_unmap_len(ce, dma_len), + PCI_DMA_TODEVICE); + if (totpayload) + *totpayload += pci_unmap_len(ce, dma_len); + + skb = ce->skb; + if (skb) + dev_kfree_skb_irq(skb); + + ce++; + if (++cidx == entries_n) { + cidx = 0; + ce = cq; + } + } + + Q->cidx = cidx; + atomic_add(credits_pend, &Q->credits); +} + +/* + * Free TX resources. + * + * Assumes that SGE is stopped and all interrupts are disabled. + */ +static void free_tx_resources(struct sge *sge) +{ + struct pci_dev *pdev = sge->adapter->pdev; + unsigned int size, i; + + for (i = 0; i < SGE_CMDQ_N; i++) { + struct cmdQ *Q = &sge->cmdQ[i]; + + if (Q->centries) { + unsigned int pending = Q->entries_n - + atomic_read(&Q->credits); + + if (pending) + free_cmdQ_buffers(sge, Q, pending, NULL); + kfree(Q->centries); + } + if (Q->entries) { + size = sizeof(struct cmdQ_e) * Q->entries_n; + pci_free_consistent(pdev, size, Q->entries, + Q->dma_addr); + } + } +} + +/* + * Allocates basic TX resources, consisting of memory mapped command Qs. + */ +static int alloc_tx_resources(struct sge *sge, struct sge_params *p) +{ + struct pci_dev *pdev = sge->adapter->pdev; + unsigned int size, i; + + for (i = 0; i < SGE_CMDQ_N; i++) { + struct cmdQ *Q = &sge->cmdQ[i]; + + Q->genbit = 1; + Q->entries_n = p->cmdQ_size[i]; + atomic_set(&Q->credits, Q->entries_n); + atomic_set(&Q->asleep, 1); + spin_lock_init(&Q->Qlock); + size = sizeof(struct cmdQ_e) * Q->entries_n; + Q->entries = (struct cmdQ_e *) + pci_alloc_consistent(pdev, size, &Q->dma_addr); + if (!Q->entries) + goto err_no_mem; + memset(Q->entries, 0, size); + Q->centries = kcalloc(Q->entries_n, sizeof(struct cmdQ_ce), + GFP_KERNEL); + if (!Q->centries) + goto err_no_mem; + } + + return 0; + +err_no_mem: + free_tx_resources(sge); + return -ENOMEM; +} + +static inline void setup_ring_params(struct adapter *adapter, u64 addr, + u32 size, int base_reg_lo, + int base_reg_hi, int size_reg) +{ + t1_write_reg_4(adapter, base_reg_lo, (u32)addr); + t1_write_reg_4(adapter, base_reg_hi, addr >> 32); + t1_write_reg_4(adapter, size_reg, size); +} + +/* + * Enable/disable VLAN acceleration. + */ +void t1_set_vlan_accel(struct adapter *adapter, int on_off) +{ + struct sge *sge = adapter->sge; + + sge->sge_control &= ~F_VLAN_XTRACT; + if (on_off) + sge->sge_control |= F_VLAN_XTRACT; + if (adapter->open_device_map) { + t1_write_reg_4(adapter, A_SG_CONTROL, sge->sge_control); + t1_read_reg_4(adapter, A_SG_CONTROL); /* flush */ + } +} + +/* + * Sets the interrupt latency timer when the adaptive Rx coalescing + * is turned off. Do nothing when it is turned on again. + * + * This routine relies on the fact that the caller has already set + * the adaptive policy in adapter->sge_params before calling it. +*/ +int t1_sge_set_coalesce_params(struct sge *sge, struct sge_params *p) +{ + if (!p->coalesce_enable) { + u32 newTimer = p->rx_coalesce_usecs * + (board_info(sge->adapter)->clock_core / 1000000); + + t1_write_reg_4(sge->adapter, A_SG_INTRTIMER, newTimer); + } + return 0; +} + +/* + * Programs the various SGE registers. However, the engine is not yet enabled, + * but sge->sge_control is setup and ready to go. + */ +static void configure_sge(struct sge *sge, struct sge_params *p) +{ + struct adapter *ap = sge->adapter; + int i; + + t1_write_reg_4(ap, A_SG_CONTROL, 0); + setup_ring_params(ap, sge->cmdQ[0].dma_addr, sge->cmdQ[0].entries_n, + A_SG_CMD0BASELWR, A_SG_CMD0BASEUPR, A_SG_CMD0SIZE); + setup_ring_params(ap, sge->cmdQ[1].dma_addr, sge->cmdQ[1].entries_n, + A_SG_CMD1BASELWR, A_SG_CMD1BASEUPR, A_SG_CMD1SIZE); + setup_ring_params(ap, sge->freelQ[0].dma_addr, + sge->freelQ[0].entries_n, A_SG_FL0BASELWR, + A_SG_FL0BASEUPR, A_SG_FL0SIZE); + setup_ring_params(ap, sge->freelQ[1].dma_addr, + sge->freelQ[1].entries_n, A_SG_FL1BASELWR, + A_SG_FL1BASEUPR, A_SG_FL1SIZE); + + /* The threshold comparison uses <. */ + t1_write_reg_4(ap, A_SG_FLTHRESHOLD, SGE_RX_SM_BUF_SIZE + 1); + + setup_ring_params(ap, sge->respQ.dma_addr, sge->respQ.entries_n, + A_SG_RSPBASELWR, A_SG_RSPBASEUPR, A_SG_RSPSIZE); + t1_write_reg_4(ap, A_SG_RSPQUEUECREDIT, (u32)sge->respQ.entries_n); + + sge->sge_control = F_CMDQ0_ENABLE | F_CMDQ1_ENABLE | F_FL0_ENABLE | + F_FL1_ENABLE | F_CPL_ENABLE | F_RESPONSE_QUEUE_ENABLE | + V_CMDQ_PRIORITY(2) | F_DISABLE_CMDQ1_GTS | F_ISCSI_COALESCE | + V_RX_PKT_OFFSET(sge->rx_pkt_pad); + +#if defined(__BIG_ENDIAN_BITFIELD) + sge->sge_control |= F_ENABLE_BIG_ENDIAN; +#endif + + /* + * Initialize the SGE Interrupt Timer arrray: + * intrtimer[0] = (SGE_INTRTIMER0) usec + * intrtimer[0intrtimer[0] = board_info(sge->adapter)->clock_core / 1000000; + for (i = 1; i < SGE_INTR_LATBUCKETS; ++i) { + sge->intrtimer[i] = SGE_INTRTIMER0 + (2 * i); + sge->intrtimer[i] *= sge->intrtimer[0]; + } + for (i = SGE_INTR_LATBUCKETS; i < SGE_INTR_MAXBUCKETS - 1; ++i) { + sge->intrtimer[i] = (i - 3) * 6; + sge->intrtimer[i] *= sge->intrtimer[0]; + } + sge->intrtimer[SGE_INTR_MAXBUCKETS - 1] = + sge->intrtimer[0] * SGE_INTRTIMER1; + /* Initialize resource timer */ + sge->intrtimer_nres = sge->intrtimer[0] * SGE_INTRTIMER_NRES; + /* Finally finish initialization of intrtimer[0] */ + sge->intrtimer[0] *= SGE_INTRTIMER0; + /* Initialize for a throughput oriented workload */ + sge->currIndex = SGE_INTR_MAXBUCKETS - 1; + + if (p->coalesce_enable) + t1_write_reg_4(ap, A_SG_INTRTIMER, + sge->intrtimer[sge->currIndex]); + else + t1_sge_set_coalesce_params(sge, p); +} + +/* + * Return the payload capacity of the jumbo free-list buffers. + */ +static inline unsigned int jumbo_payload_capacity(const struct sge *sge) +{ + return sge->freelQ[sge->jumbo_fl].rx_buffer_size - + sizeof(struct cpl_rx_data) - SGE_RX_OFFSET + sge->rx_pkt_pad; +} + +/* + * Allocates both RX and TX resources and configures the SGE. However, + * the hardware is not enabled yet. + */ +int t1_sge_configure(struct sge *sge, struct sge_params *p) +{ + if (alloc_rx_resources(sge, p)) + return -ENOMEM; + if (alloc_tx_resources(sge, p)) { + free_rx_resources(sge); + return -ENOMEM; + } + configure_sge(sge, p); + + /* + * Now that we have sized the free lists calculate the payload + * capacity of the large buffers. Other parts of the driver use + * this to set the max offload coalescing size so that RX packets + * do not overflow our large buffers. + */ + p->large_buf_capacity = jumbo_payload_capacity(sge); + return 0; +} + +/* + * Frees all SGE related resources and the sge structure itself + */ +void t1_sge_destroy(struct sge *sge) +{ + if (sge->pskb) + dev_kfree_skb(sge->pskb); + free_tx_resources(sge); + free_rx_resources(sge); + kfree(sge); +} + +/* + * Allocates new RX buffers on the freelist Q (and tracks them on the freelist + * context Q) until the Q is full or alloc_skb fails. + * + * It is possible that the generation bits already match, indicating that the + * buffer is already valid and nothing needs to be done. This happens when we + * copied a received buffer into a new sk_buff during the interrupt processing. + * + * If the SGE doesn't automatically align packets properly (!sge->rx_pkt_pad), + * we specify a RX_OFFSET in order to make sure that the IP header is 4B + * aligned. + */ +static void refill_free_list(struct sge *sge, struct freelQ *Q) +{ + struct pci_dev *pdev = sge->adapter->pdev; + struct freelQ_ce *ce = &Q->centries[Q->pidx]; + struct freelQ_e *e = &Q->entries[Q->pidx]; + unsigned int dma_len = Q->rx_buffer_size - Q->dma_offset; + + + while (Q->credits < Q->entries_n) { + if (e->GenerationBit != Q->genbit) { + struct sk_buff *skb; + dma_addr_t mapping; + + skb = alloc_skb(Q->rx_buffer_size, GFP_ATOMIC); + if (!skb) + break; + if (Q->dma_offset) + skb_reserve(skb, Q->dma_offset); + mapping = pci_map_single(pdev, skb->data, dma_len, + PCI_DMA_FROMDEVICE); + ce->skb = skb; + pci_unmap_addr_set(ce, dma_addr, mapping); + pci_unmap_len_set(ce, dma_len, dma_len); + e->AddrLow = (u32)mapping; + e->AddrHigh = (u64)mapping >> 32; + e->BufferLength = dma_len; + e->GenerationBit = e->GenerationBit2 = Q->genbit; + } + + e++; + ce++; + if (++Q->pidx == Q->entries_n) { + Q->pidx = 0; + Q->genbit ^= 1; + ce = Q->centries; + e = Q->entries; + } + Q->credits++; + } + +} + +/* + * Calls refill_free_list for both freelist Qs. If we cannot + * fill at least 1/4 of both Qs, we go into 'few interrupt mode' in order + * to give the system time to free up resources. + */ +static void freelQs_empty(struct sge *sge) +{ + u32 irq_reg = t1_read_reg_4(sge->adapter, A_SG_INT_ENABLE); + u32 irqholdoff_reg; + + refill_free_list(sge, &sge->freelQ[0]); + refill_free_list(sge, &sge->freelQ[1]); + + if (sge->freelQ[0].credits > (sge->freelQ[0].entries_n >> 2) && + sge->freelQ[1].credits > (sge->freelQ[1].entries_n >> 2)) { + irq_reg |= F_FL_EXHAUSTED; + irqholdoff_reg = sge->intrtimer[sge->currIndex]; + } else { + /* Clear the F_FL_EXHAUSTED interrupts for now */ + irq_reg &= ~F_FL_EXHAUSTED; + irqholdoff_reg = sge->intrtimer_nres; + } + t1_write_reg_4(sge->adapter, A_SG_INTRTIMER, irqholdoff_reg); + t1_write_reg_4(sge->adapter, A_SG_INT_ENABLE, irq_reg); + + /* We reenable the Qs to force a freelist GTS interrupt later */ + doorbell_pio(sge, F_FL0_ENABLE | F_FL1_ENABLE); +} + +#define SGE_PL_INTR_MASK (F_PL_INTR_SGE_ERR | F_PL_INTR_SGE_DATA) +#define SGE_INT_FATAL (F_RESPQ_OVERFLOW | F_PACKET_TOO_BIG | F_PACKET_MISMATCH) +#define SGE_INT_ENABLE (F_RESPQ_EXHAUSTED | F_RESPQ_OVERFLOW | \ + F_FL_EXHAUSTED | F_PACKET_TOO_BIG | F_PACKET_MISMATCH) + +/* + * Disable SGE Interrupts + */ +void t1_sge_intr_disable(struct sge *sge) +{ + u32 val = t1_read_reg_4(sge->adapter, A_PL_ENABLE); + + t1_write_reg_4(sge->adapter, A_PL_ENABLE, val & ~SGE_PL_INTR_MASK); + t1_write_reg_4(sge->adapter, A_SG_INT_ENABLE, 0); +} + +/* + * Enable SGE interrupts. + */ +void t1_sge_intr_enable(struct sge *sge) +{ + u32 en = SGE_INT_ENABLE; + u32 val = t1_read_reg_4(sge->adapter, A_PL_ENABLE); + + if (sge->adapter->flags & TSO_CAPABLE) + en &= ~F_PACKET_TOO_BIG; + t1_write_reg_4(sge->adapter, A_SG_INT_ENABLE, en); + t1_write_reg_4(sge->adapter, A_PL_ENABLE, val | SGE_PL_INTR_MASK); +} + +/* + * Clear SGE interrupts. + */ +void t1_sge_intr_clear(struct sge *sge) +{ + t1_write_reg_4(sge->adapter, A_PL_CAUSE, SGE_PL_INTR_MASK); + t1_write_reg_4(sge->adapter, A_SG_INT_CAUSE, 0xffffffff); +} + +/* + * SGE 'Error' interrupt handler + */ +int t1_sge_intr_error_handler(struct sge *sge) +{ + struct adapter *adapter = sge->adapter; + u32 cause = t1_read_reg_4(adapter, A_SG_INT_CAUSE); + + if (adapter->flags & TSO_CAPABLE) + cause &= ~F_PACKET_TOO_BIG; + if (cause & F_RESPQ_EXHAUSTED) + sge->intr_cnt.respQ_empty++; + if (cause & F_RESPQ_OVERFLOW) { + sge->intr_cnt.respQ_overflow++; + CH_ALERT("%s: SGE response queue overflow\n", + adapter->name); + } + if (cause & F_FL_EXHAUSTED) { + sge->intr_cnt.freelistQ_empty++; + freelQs_empty(sge); + } + if (cause & F_PACKET_TOO_BIG) { + sge->intr_cnt.pkt_too_big++; + CH_ALERT("%s: SGE max packet size exceeded\n", + adapter->name); + } + if (cause & F_PACKET_MISMATCH) { + sge->intr_cnt.pkt_mismatch++; + CH_ALERT("%s: SGE packet mismatch\n", adapter->name); + } + if (cause & SGE_INT_FATAL) + t1_fatal_err(adapter); + + t1_write_reg_4(adapter, A_SG_INT_CAUSE, cause); + return 0; +} + +/* + * The following code is copied from 2.6, where the skb_pull is doing the + * right thing and only pulls ETH_HLEN. + * + * Determine the packet's protocol ID. The rule here is that we + * assume 802.3 if the type field is short enough to be a length. + * This is normal practice and works for any 'now in use' protocol. + */ +static unsigned short sge_eth_type_trans(struct sk_buff *skb, + struct net_device *dev) +{ + struct ethhdr *eth; + unsigned char *rawp; + + skb->mac.raw = skb->data; + skb_pull(skb, ETH_HLEN); + eth = (struct ethhdr *)skb->mac.raw; + + if (*eth->h_dest&1) { + if(memcmp(eth->h_dest, dev->broadcast, ETH_ALEN) == 0) + skb->pkt_type = PACKET_BROADCAST; + else + skb->pkt_type = PACKET_MULTICAST; + } + + /* + * This ALLMULTI check should be redundant by 1.4 + * so don't forget to remove it. + * + * Seems, you forgot to remove it. All silly devices + * seems to set IFF_PROMISC. + */ + + else if (1 /*dev->flags&IFF_PROMISC*/) + { + if(memcmp(eth->h_dest,dev->dev_addr, ETH_ALEN)) + skb->pkt_type=PACKET_OTHERHOST; + } + + if (ntohs(eth->h_proto) >= 1536) + return eth->h_proto; + + rawp = skb->data; + + /* + * This is a magic hack to spot IPX packets. Older Novell breaks + * the protocol design and runs IPX over 802.3 without an 802.2 LLC + * layer. We look for FFFF which isn't a used 802.2 SSAP/DSAP. This + * won't work for fault tolerant netware but does for the rest. + */ + if (*(unsigned short *)rawp == 0xFFFF) + return htons(ETH_P_802_3); + + /* + * Real 802.2 LLC + */ + return htons(ETH_P_802_2); +} + +/* + * Prepare the received buffer and pass it up the stack. If it is small enough + * and allocation doesn't fail, we use a new sk_buff and copy the content. + */ +static unsigned int t1_sge_rx(struct sge *sge, struct freelQ *Q, + unsigned int len, unsigned int offload) +{ + struct sk_buff *skb; + struct adapter *adapter = sge->adapter; + struct freelQ_ce *ce = &Q->centries[Q->cidx]; + + if (len <= SGE_RX_COPY_THRESHOLD && + (skb = alloc_skb(len + NET_IP_ALIGN, GFP_ATOMIC))) { + struct freelQ_e *e; + char *src = ce->skb->data; + + pci_dma_sync_single_for_cpu(adapter->pdev, + pci_unmap_addr(ce, dma_addr), + pci_unmap_len(ce, dma_len), + PCI_DMA_FROMDEVICE); + if (!offload) { + skb_reserve(skb, NET_IP_ALIGN); + src += sge->rx_pkt_pad; + } + memcpy(skb->data, src, len); + + /* Reuse the entry. */ + e = &Q->entries[Q->cidx]; + e->GenerationBit ^= 1; + e->GenerationBit2 ^= 1; + } else { + pci_unmap_single(adapter->pdev, pci_unmap_addr(ce, dma_addr), + pci_unmap_len(ce, dma_len), + PCI_DMA_FROMDEVICE); + skb = ce->skb; + if (!offload && sge->rx_pkt_pad) + __skb_pull(skb, sge->rx_pkt_pad); + } + + skb_put(skb, len); + + + if (unlikely(offload)) { + { + printk(KERN_ERR + "%s: unexpected offloaded packet, cmd %u\n", + adapter->name, *skb->data); + dev_kfree_skb_any(skb); + } + } else { + struct cpl_rx_pkt *p = (struct cpl_rx_pkt *)skb->data; + + skb_pull(skb, sizeof(*p)); + skb->dev = adapter->port[p->iff].dev; + skb->dev->last_rx = jiffies; + skb->protocol = sge_eth_type_trans(skb, skb->dev); + if ((adapter->flags & RX_CSUM_ENABLED) && p->csum == 0xffff && + skb->protocol == htons(ETH_P_IP) && + (skb->data[9] == IPPROTO_TCP || + skb->data[9] == IPPROTO_UDP)) + skb->ip_summed = CHECKSUM_UNNECESSARY; + else + skb->ip_summed = CHECKSUM_NONE; + if (adapter->vlan_grp && p->vlan_valid) + vlan_hwaccel_rx(skb, adapter->vlan_grp, + ntohs(p->vlan)); + else + netif_rx(skb); + } + + if (++Q->cidx == Q->entries_n) + Q->cidx = 0; + + if (unlikely(--Q->credits < Q->entries_n - SGE_FREEL_REFILL_THRESH)) + refill_free_list(sge, Q); + return 1; +} + + +/* + * Adaptive interrupt timer logic to keep the CPU utilization to + * manageable levels. Basically, as the Average Packet Size (APS) + * gets higher, the interrupt latency setting gets longer. Every + * SGE_INTR_BUCKETSIZE (of 100B) causes a bump of 2usec to the + * base value of SGE_INTRTIMER0. At large values of payload the + * latency hits the ceiling value of SGE_INTRTIMER1 stored at + * index SGE_INTR_MAXBUCKETS-1 in sge->intrtimer[]. + * + * sge->currIndex caches the last index to save unneeded PIOs. + */ +static inline void update_intr_timer(struct sge *sge, unsigned int avg_payload) +{ + unsigned int newIndex; + + newIndex = avg_payload / SGE_INTR_BUCKETSIZE; + if (newIndex > SGE_INTR_MAXBUCKETS - 1) { + newIndex = SGE_INTR_MAXBUCKETS - 1; + } + /* Save a PIO with this check....maybe */ + if (newIndex != sge->currIndex) { + t1_write_reg_4(sge->adapter, A_SG_INTRTIMER, + sge->intrtimer[newIndex]); + sge->currIndex = newIndex; + sge->adapter->params.sge.last_rx_coalesce_raw = + sge->intrtimer[newIndex]; + } +} + +/* + * Returns true if command queue q_num has enough available descriptors that + * we can resume Tx operation after temporarily disabling its packet queue. + */ +static inline int enough_free_Tx_descs(struct sge *sge, int q_num) +{ + return atomic_read(&sge->cmdQ[q_num].credits) > + (sge->cmdQ[q_num].entries_n >> 2); +} + +/* + * Main interrupt handler, optimized assuming that we took a 'DATA' + * interrupt. + * + * 1. Clear the interrupt + * 2. Loop while we find valid descriptors and process them; accumulate + * information that can be processed after the loop + * 3. Tell the SGE at which index we stopped processing descriptors + * 4. Bookkeeping; free TX buffers, ring doorbell if there are any + * outstanding TX buffers waiting, replenish RX buffers, potentially + * reenable upper layers if they were turned off due to lack of TX + * resources which are available again. + * 5. If we took an interrupt, but no valid respQ descriptors was found we + * let the slow_intr_handler run and do error handling. + */ +irqreturn_t t1_interrupt(int irq, void *cookie, struct pt_regs *regs) +{ + struct net_device *netdev; + struct adapter *adapter = cookie; + struct sge *sge = adapter->sge; + struct respQ *Q = &sge->respQ; + unsigned int credits = Q->credits, flags = 0, ret = 0; + unsigned int tot_rxpayload = 0, tot_txpayload = 0, n_rx = 0, n_tx = 0; + unsigned int credits_pend[SGE_CMDQ_N] = { 0, 0 }; + + struct respQ_e *e = &Q->entries[Q->cidx]; + prefetch(e); + + t1_write_reg_4(adapter, A_PL_CAUSE, F_PL_INTR_SGE_DATA); + + + while (e->GenerationBit == Q->genbit) { + if (--credits < SGE_RESPQ_REPLENISH_THRES) { + u32 n = Q->entries_n - credits - 1; + + t1_write_reg_4(adapter, A_SG_RSPQUEUECREDIT, n); + credits += n; + } + if (likely(e->DataValid)) { + if (!e->Sop || !e->Eop) + BUG(); + t1_sge_rx(sge, &sge->freelQ[e->FreelistQid], + e->BufferLength, e->Offload); + tot_rxpayload += e->BufferLength; + ++n_rx; + } + flags |= e->Qsleeping; + credits_pend[0] += e->Cmdq0CreditReturn; + credits_pend[1] += e->Cmdq1CreditReturn; + +#ifdef CONFIG_SMP + /* + * If enough cmdQ0 buffers have finished DMAing free them so + * anyone that may be waiting for their release can continue. + * We do this only on MP systems to allow other CPUs to proceed + * promptly. UP systems can wait for the free_cmdQ_buffers() + * calls after this loop as the sole CPU is currently busy in + * this loop. + */ + if (unlikely(credits_pend[0] > SGE_FREEL_REFILL_THRESH)) { + free_cmdQ_buffers(sge, &sge->cmdQ[0], credits_pend[0], + &tot_txpayload); + n_tx += credits_pend[0]; + credits_pend[0] = 0; + } +#endif + ret++; + e++; + if (unlikely(++Q->cidx == Q->entries_n)) { + Q->cidx = 0; + Q->genbit ^= 1; + e = Q->entries; + } + } + + Q->credits = credits; + t1_write_reg_4(adapter, A_SG_SLEEPING, Q->cidx); + + if (credits_pend[0]) + free_cmdQ_buffers(sge, &sge->cmdQ[0], credits_pend[0], &tot_txpayload); + if (credits_pend[1]) + free_cmdQ_buffers(sge, &sge->cmdQ[1], credits_pend[1], &tot_txpayload); + + /* Do any coalescing and interrupt latency timer adjustments */ + if (adapter->params.sge.coalesce_enable) { + unsigned int avg_txpayload = 0, avg_rxpayload = 0; + + n_tx += credits_pend[0] + credits_pend[1]; + + /* + * Choose larger avg. payload size to increase + * throughput and reduce [CPU util., intr/s.] + * + * Throughput behavior favored in mixed-mode. + */ + if (n_tx) + avg_txpayload = tot_txpayload/n_tx; + if (n_rx) + avg_rxpayload = tot_rxpayload/n_rx; + + if (n_tx && avg_txpayload > avg_rxpayload){ + update_intr_timer(sge, avg_txpayload); + } else if (n_rx) { + update_intr_timer(sge, avg_rxpayload); + } + } + + if (flags & F_CMDQ0_ENABLE) { + struct cmdQ *cmdQ = &sge->cmdQ[0]; + + atomic_set(&cmdQ->asleep, 1); + if (atomic_read(&cmdQ->pio_pidx) != cmdQ->pidx) { + doorbell_pio(sge, F_CMDQ0_ENABLE); + atomic_set(&cmdQ->pio_pidx, cmdQ->pidx); + } + } + if (unlikely(flags & (F_FL0_ENABLE | F_FL1_ENABLE))) + freelQs_empty(sge); + + netdev = adapter->port[0].dev; + if (unlikely(netif_queue_stopped(netdev) && netif_carrier_ok(netdev) && + enough_free_Tx_descs(sge, 0) && + enough_free_Tx_descs(sge, 1))) { + netif_wake_queue(netdev); + } + if (unlikely(!ret)) + ret = t1_slow_intr_handler(adapter); + + return IRQ_RETVAL(ret != 0); +} + +/* + * Enqueues the sk_buff onto the cmdQ[qid] and has hardware fetch it. + * + * The code figures out how many entries the sk_buff will require in the + * cmdQ and updates the cmdQ data structure with the state once the enqueue + * has complete. Then, it doesn't access the global structure anymore, but + * uses the corresponding fields on the stack. In conjuction with a spinlock + * around that code, we can make the function reentrant without holding the + * lock when we actually enqueue (which might be expensive, especially on + * architectures with IO MMUs). + */ +static unsigned int t1_sge_tx(struct sk_buff *skb, struct adapter *adapter, + unsigned int qid) +{ + struct sge *sge = adapter->sge; + struct cmdQ *Q = &sge->cmdQ[qid]; + struct cmdQ_e *e; + struct cmdQ_ce *ce; + dma_addr_t mapping; + unsigned int credits, pidx, genbit; + + unsigned int count = 1 + skb_shinfo(skb)->nr_frags; + + /* + * Coming from the timer + */ + if ((skb == sge->pskb)) { + /* + * Quit if any cmdQ activities + */ + if (!spin_trylock(&Q->Qlock)) + return 0; + if (atomic_read(&Q->credits) != Q->entries_n) { + spin_unlock(&Q->Qlock); + return 0; + } + } + else + spin_lock(&Q->Qlock); + + genbit = Q->genbit; + pidx = Q->pidx; + credits = atomic_read(&Q->credits); + + credits -= count; + atomic_sub(count, &Q->credits); + Q->pidx += count; + if (Q->pidx >= Q->entries_n) { + Q->pidx -= Q->entries_n; + Q->genbit ^= 1; + } + + if (unlikely(credits < (MAX_SKB_FRAGS + 1))) { + sge->intr_cnt.cmdQ_full[qid]++; + netif_stop_queue(adapter->port[0].dev); + } + spin_unlock(&Q->Qlock); + + mapping = pci_map_single(adapter->pdev, skb->data, + skb->len - skb->data_len, PCI_DMA_TODEVICE); + ce = &Q->centries[pidx]; + ce->skb = NULL; + pci_unmap_addr_set(ce, dma_addr, mapping); + pci_unmap_len_set(ce, dma_len, skb->len - skb->data_len); + ce->single = 1; + + e = &Q->entries[pidx]; + e->Sop = 1; + e->DataValid = 1; + e->BufferLength = skb->len - skb->data_len; + e->AddrHigh = (u64)mapping >> 32; + e->AddrLow = (u32)mapping; + + if (--count > 0) { + unsigned int i; + + e->Eop = 0; + wmb(); + e->GenerationBit = e->GenerationBit2 = genbit; + + for (i = 0; i < count; i++) { + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + + ce++; e++; + if (++pidx == Q->entries_n) { + pidx = 0; + genbit ^= 1; + ce = Q->centries; + e = Q->entries; + } + + mapping = pci_map_page(adapter->pdev, frag->page, + frag->page_offset, + frag->size, + PCI_DMA_TODEVICE); + ce->skb = NULL; + pci_unmap_addr_set(ce, dma_addr, mapping); + pci_unmap_len_set(ce, dma_len, frag->size); + ce->single = 0; + + e->Sop = 0; + e->DataValid = 1; + e->BufferLength = frag->size; + e->AddrHigh = (u64)mapping >> 32; + e->AddrLow = (u32)mapping; + + if (i < count - 1) { + e->Eop = 0; + wmb(); + e->GenerationBit = e->GenerationBit2 = genbit; + } + } + } + + if (skb != sge->pskb) + ce->skb = skb; + e->Eop = 1; + wmb(); + e->GenerationBit = e->GenerationBit2 = genbit; + + /* + * We always ring the doorbell for cmdQ1. For cmdQ0, we only ring + * the doorbell if the Q is asleep. There is a natural race, where + * the hardware is going to sleep just after we checked, however, + * then the interrupt handler will detect the outstanding TX packet + * and ring the doorbell for us. + */ + if (qid) { + doorbell_pio(sge, F_CMDQ1_ENABLE); + } else if (atomic_read(&Q->asleep)) { + atomic_set(&Q->asleep, 0); + doorbell_pio(sge, F_CMDQ0_ENABLE); + atomic_set(&Q->pio_pidx, Q->pidx); + } + return 0; +} + +#define MK_ETH_TYPE_MSS(type, mss) (((mss) & 0x3FFF) | ((type) << 14)) + +/* + * Adds the CPL header to the sk_buff and passes it to t1_sge_tx. + */ +int t1_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct adapter *adapter = dev->priv; + struct cpl_tx_pkt *cpl; + struct ethhdr *eth; + size_t max_len; + + /* + * We are using a non-standard hard_header_len and some kernel + * components, such as pktgen, do not handle it right. Complain + * when this happens but try to fix things up. + */ + if (unlikely(skb_headroom(skb) < dev->hard_header_len - ETH_HLEN)) { + struct sk_buff *orig_skb = skb; + + if (net_ratelimit()) + printk(KERN_ERR + "%s: Tx packet has inadequate headroom\n", + dev->name); + skb = skb_realloc_headroom(skb, sizeof(struct cpl_tx_pkt_lso)); + dev_kfree_skb_any(orig_skb); + if (!skb) + return -ENOMEM; + } + + if (skb_shinfo(skb)->tso_size) { + int eth_type; + struct cpl_tx_pkt_lso *hdr; + + eth_type = skb->nh.raw - skb->data == ETH_HLEN ? + CPL_ETH_II : CPL_ETH_II_VLAN; + + hdr = (struct cpl_tx_pkt_lso *)skb_push(skb, sizeof(*hdr)); + hdr->opcode = CPL_TX_PKT_LSO; + hdr->ip_csum_dis = hdr->l4_csum_dis = 0; + hdr->ip_hdr_words = skb->nh.iph->ihl; + hdr->tcp_hdr_words = skb->h.th->doff; + hdr->eth_type_mss = htons(MK_ETH_TYPE_MSS(eth_type, + skb_shinfo(skb)->tso_size)); + hdr->len = htonl(skb->len - sizeof(*hdr)); + cpl = (struct cpl_tx_pkt *)hdr; + } else + { + /* + * An Ethernet packet must have at least space for + * the DIX Ethernet header and be no greater than + * the device set MTU. Otherwise trash the packet. + */ + if (skb->len < ETH_HLEN) + goto t1_start_xmit_fail2; + eth = (struct ethhdr *)skb->data; + if (eth->h_proto == htons(ETH_P_8021Q)) + max_len = dev->mtu + VLAN_ETH_HLEN; + else + max_len = dev->mtu + ETH_HLEN; + if (skb->len > max_len) + goto t1_start_xmit_fail2; + + if (!(adapter->flags & UDP_CSUM_CAPABLE) && + skb->ip_summed == CHECKSUM_HW && + skb->nh.iph->protocol == IPPROTO_UDP && + skb_checksum_help(skb, 0)) + goto t1_start_xmit_fail3; + + + if (!adapter->sge->pskb) { + if (skb->protocol == htons(ETH_P_ARP) && + skb->nh.arph->ar_op == htons(ARPOP_REQUEST)) + adapter->sge->pskb = skb; + } + cpl = (struct cpl_tx_pkt *)skb_push(skb, sizeof(*cpl)); + cpl->opcode = CPL_TX_PKT; + cpl->ip_csum_dis = 1; /* SW calculates IP csum */ + cpl->l4_csum_dis = skb->ip_summed == CHECKSUM_HW ? 0 : 1; + /* the length field isn't used so don't bother setting it */ + } + cpl->iff = dev->if_port; + +#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) + if (adapter->vlan_grp && vlan_tx_tag_present(skb)) { + cpl->vlan_valid = 1; + cpl->vlan = htons(vlan_tx_tag_get(skb)); + } else +#endif + cpl->vlan_valid = 0; + + dev->trans_start = jiffies; + return t1_sge_tx(skb, adapter, 0); + +t1_start_xmit_fail3: + printk(KERN_INFO "%s: Unable to complete checksum\n", dev->name); + goto t1_start_xmit_fail1; + +t1_start_xmit_fail2: + printk(KERN_INFO "%s: Invalid packet length %d, dropping\n", + dev->name, skb->len); + +t1_start_xmit_fail1: + dev_kfree_skb_any(skb); + return 0; +} + +void t1_sge_set_ptimeout(adapter_t *adapter, u32 val) +{ + struct sge *sge = adapter->sge; + + if (is_T2(adapter)) + sge->ptimeout = max((u32)((HZ * val) / 1000), (u32)1); +} + +u32 t1_sge_get_ptimeout(adapter_t *adapter) +{ + struct sge *sge = adapter->sge; + + return (is_T2(adapter) ? ((sge->ptimeout * 1000) / HZ) : 0); +} + -- cgit v1.1