summaryrefslogtreecommitdiffstats
path: root/sys/dev/mxge/if_mxge.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/dev/mxge/if_mxge.c')
-rw-r--r--sys/dev/mxge/if_mxge.c2489
1 files changed, 2489 insertions, 0 deletions
diff --git a/sys/dev/mxge/if_mxge.c b/sys/dev/mxge/if_mxge.c
new file mode 100644
index 0000000..96a063f
--- /dev/null
+++ b/sys/dev/mxge/if_mxge.c
@@ -0,0 +1,2489 @@
+/*******************************************************************************
+
+Copyright (c) 2006, Myricom Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ 3. Neither the name of the Myricom Inc, nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+
+***************************************************************************/
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/linker.h>
+#include <sys/firmware.h>
+#include <sys/endian.h>
+#include <sys/sockio.h>
+#include <sys/mbuf.h>
+#include <sys/malloc.h>
+#include <sys/kdb.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
+#include <sys/memrange.h>
+#include <sys/socket.h>
+#include <sys/sysctl.h>
+#include <sys/sx.h>
+
+#include <net/if.h>
+#include <net/if_arp.h>
+#include <net/ethernet.h>
+#include <net/if_dl.h>
+#include <net/if_media.h>
+
+#include <net/bpf.h>
+
+#include <net/if_types.h>
+#include <net/if_vlan_var.h>
+#include <net/zlib.h>
+
+#include <netinet/in_systm.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+
+#include <machine/clock.h> /* for DELAY */
+#include <machine/bus.h>
+#include <machine/resource.h>
+#include <sys/bus.h>
+#include <sys/rman.h>
+
+#include <dev/pci/pcireg.h>
+#include <dev/pci/pcivar.h>
+
+#include <vm/vm.h> /* for pmap_mapdev() */
+#include <vm/pmap.h>
+
+#include <dev/myri10ge/myri10ge_mcp.h>
+#include <dev/myri10ge/mcp_gen_header.h>
+#include <dev/myri10ge/if_myri10ge_var.h>
+
+/* tunable params */
+static int myri10ge_nvidia_ecrc_enable = 1;
+static int myri10ge_max_intr_slots = 128;
+static int myri10ge_intr_coal_delay = 30;
+static int myri10ge_skip_pio_read = 0;
+static int myri10ge_flow_control = 1;
+static char *myri10ge_fw_unaligned = "myri10ge_ethp_z8e";
+static char *myri10ge_fw_aligned = "myri10ge_eth_z8e";
+
+static int myri10ge_probe(device_t dev);
+static int myri10ge_attach(device_t dev);
+static int myri10ge_detach(device_t dev);
+static int myri10ge_shutdown(device_t dev);
+static void myri10ge_intr(void *arg);
+
+static device_method_t myri10ge_methods[] =
+{
+ /* Device interface */
+ DEVMETHOD(device_probe, myri10ge_probe),
+ DEVMETHOD(device_attach, myri10ge_attach),
+ DEVMETHOD(device_detach, myri10ge_detach),
+ DEVMETHOD(device_shutdown, myri10ge_shutdown),
+ {0, 0}
+};
+
+static driver_t myri10ge_driver =
+{
+ "myri10ge",
+ myri10ge_methods,
+ sizeof(myri10ge_softc_t),
+};
+
+static devclass_t myri10ge_devclass;
+
+/* Declare ourselves to be a child of the PCI bus.*/
+DRIVER_MODULE(myri10ge, pci, myri10ge_driver, myri10ge_devclass, 0, 0);
+MODULE_DEPEND(myri10ge, firmware, 1, 1, 1);
+
+static int
+myri10ge_probe(device_t dev)
+{
+ if ((pci_get_vendor(dev) == MYRI10GE_PCI_VENDOR_MYRICOM) &&
+ (pci_get_device(dev) == MYRI10GE_PCI_DEVICE_Z8E)) {
+ device_set_desc(dev, "Myri10G-PCIE-8A");
+ return 0;
+ }
+ return ENXIO;
+}
+
+static void
+myri10ge_enable_wc(myri10ge_softc_t *sc)
+{
+ struct mem_range_desc mrdesc;
+ vm_paddr_t pa;
+ vm_offset_t len;
+ int err, action;
+
+ pa = rman_get_start(sc->mem_res);
+ len = rman_get_size(sc->mem_res);
+ mrdesc.mr_base = pa;
+ mrdesc.mr_len = len;
+ mrdesc.mr_flags = MDF_WRITECOMBINE;
+ action = MEMRANGE_SET_UPDATE;
+ strcpy((char *)&mrdesc.mr_owner, "myri10ge");
+ err = mem_range_attr_set(&mrdesc, &action);
+ if (err != 0) {
+ device_printf(sc->dev,
+ "w/c failed for pa 0x%lx, len 0x%lx, err = %d\n",
+ (unsigned long)pa, (unsigned long)len, err);
+ } else {
+ sc->wc = 1;
+ }
+}
+
+
+/* callback to get our DMA address */
+static void
+myri10ge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs,
+ int error)
+{
+ if (error == 0) {
+ *(bus_addr_t *) arg = segs->ds_addr;
+ }
+}
+
+static int
+myri10ge_dma_alloc(myri10ge_softc_t *sc, myri10ge_dma_t *dma, size_t bytes,
+ bus_size_t alignment)
+{
+ int err;
+ device_t dev = sc->dev;
+
+ /* allocate DMAable memory tags */
+ err = bus_dma_tag_create(sc->parent_dmat, /* parent */
+ alignment, /* alignment */
+ 4096, /* boundary */
+ BUS_SPACE_MAXADDR, /* low */
+ BUS_SPACE_MAXADDR, /* high */
+ NULL, NULL, /* filter */
+ bytes, /* maxsize */
+ 1, /* num segs */
+ 4096, /* maxsegsize */
+ BUS_DMA_COHERENT, /* flags */
+ NULL, NULL, /* lock */
+ &dma->dmat); /* tag */
+ if (err != 0) {
+ device_printf(dev, "couldn't alloc tag (err = %d)\n", err);
+ return err;
+ }
+
+ /* allocate DMAable memory & map */
+ err = bus_dmamem_alloc(dma->dmat, &dma->addr,
+ (BUS_DMA_WAITOK | BUS_DMA_COHERENT
+ | BUS_DMA_ZERO), &dma->map);
+ if (err != 0) {
+ device_printf(dev, "couldn't alloc mem (err = %d)\n", err);
+ goto abort_with_dmat;
+ }
+
+ /* load the memory */
+ err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes,
+ myri10ge_dmamap_callback,
+ (void *)&dma->bus_addr, 0);
+ if (err != 0) {
+ device_printf(dev, "couldn't load map (err = %d)\n", err);
+ goto abort_with_mem;
+ }
+ return 0;
+
+abort_with_mem:
+ bus_dmamem_free(dma->dmat, dma->addr, dma->map);
+abort_with_dmat:
+ (void)bus_dma_tag_destroy(dma->dmat);
+ return err;
+}
+
+
+static void
+myri10ge_dma_free(myri10ge_dma_t *dma)
+{
+ bus_dmamap_unload(dma->dmat, dma->map);
+ bus_dmamem_free(dma->dmat, dma->addr, dma->map);
+ (void)bus_dma_tag_destroy(dma->dmat);
+}
+
+/*
+ * The eeprom strings on the lanaiX have the format
+ * SN=x\0
+ * MAC=x:x:x:x:x:x\0
+ * PC=text\0
+ */
+
+static int
+myri10ge_parse_strings(myri10ge_softc_t *sc)
+{
+#define MYRI10GE_NEXT_STRING(p) while(ptr < limit && *ptr++)
+
+ char *ptr, *limit;
+ int i, found_mac;
+
+ ptr = sc->eeprom_strings;
+ limit = sc->eeprom_strings + MYRI10GE_EEPROM_STRINGS_SIZE;
+ found_mac = 0;
+ while (ptr < limit && *ptr != '\0') {
+ if (memcmp(ptr, "MAC=", 4) == 0) {
+ ptr+=4;
+ sc->mac_addr_string = ptr;
+ for (i = 0; i < 6; i++) {
+ if ((ptr + 2) > limit)
+ goto abort;
+ sc->mac_addr[i] = strtoul(ptr, NULL, 16);
+ found_mac = 1;
+ ptr += 3;
+ }
+ } else if (memcmp(ptr, "PC=", 4) == 0) {
+ sc->product_code_string = ptr;
+ }
+ MYRI10GE_NEXT_STRING(ptr);
+ }
+
+ if (found_mac)
+ return 0;
+
+ abort:
+ device_printf(sc->dev, "failed to parse eeprom_strings\n");
+
+ return ENXIO;
+}
+
+#if #cpu(i386) || defined __i386 || defined i386 || defined __i386__ || #cpu(x86_64) || defined __x86_64__
+static int
+myri10ge_enable_nvidia_ecrc(myri10ge_softc_t *sc, device_t pdev)
+{
+ uint32_t val;
+ unsigned long off;
+ char *va, *cfgptr;
+ uint16_t vendor_id, device_id;
+ uintptr_t bus, slot, func, ivend, idev;
+ uint32_t *ptr32;
+
+ /* XXXX
+ Test below is commented because it is believed that doing
+ config read/write beyond 0xff will access the config space
+ for the next larger function. Uncomment this and remove
+ the hacky pmap_mapdev() way of accessing config space when
+ FreeBSD grows support for extended pcie config space access
+ */
+#if 0
+ /* See if we can, by some miracle, access the extended
+ config space */
+ val = pci_read_config(pdev, 0x178, 4);
+ if (val != 0xffffffff) {
+ val |= 0x40;
+ pci_write_config(pdev, 0x178, val, 4);
+ return 0;
+ }
+#endif
+ /* Rather than using normal pci config space writes, we must
+ * map the Nvidia config space ourselves. This is because on
+ * opteron/nvidia class machine the 0xe000000 mapping is
+ * handled by the nvidia chipset, that means the internal PCI
+ * device (the on-chip northbridge), or the amd-8131 bridge
+ * and things behind them are not visible by this method.
+ */
+
+ BUS_READ_IVAR(device_get_parent(pdev), pdev,
+ PCI_IVAR_BUS, &bus);
+ BUS_READ_IVAR(device_get_parent(pdev), pdev,
+ PCI_IVAR_SLOT, &slot);
+ BUS_READ_IVAR(device_get_parent(pdev), pdev,
+ PCI_IVAR_FUNCTION, &func);
+ BUS_READ_IVAR(device_get_parent(pdev), pdev,
+ PCI_IVAR_VENDOR, &ivend);
+ BUS_READ_IVAR(device_get_parent(pdev), pdev,
+ PCI_IVAR_DEVICE, &idev);
+
+ off = 0xe0000000UL
+ + 0x00100000UL * (unsigned long)bus
+ + 0x00001000UL * (unsigned long)(func
+ + 8 * slot);
+
+ /* map it into the kernel */
+ va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE);
+
+
+ if (va == NULL) {
+ device_printf(sc->dev, "pmap_kenter_temporary didn't\n");
+ return EIO;
+ }
+ /* get a pointer to the config space mapped into the kernel */
+ cfgptr = va + (off & PAGE_MASK);
+
+ /* make sure that we can really access it */
+ vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR);
+ device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE);
+ if (! (vendor_id == ivend && device_id == idev)) {
+ device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n",
+ vendor_id, device_id);
+ pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
+ return EIO;
+ }
+
+ ptr32 = (uint32_t*)(cfgptr + 0x178);
+ val = *ptr32;
+
+ if (val == 0xffffffff) {
+ device_printf(sc->dev, "extended mapping failed\n");
+ pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
+ return EIO;
+ }
+ *ptr32 = val | 0x40;
+ pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
+ device_printf(sc->dev,
+ "Enabled ECRC on upstream Nvidia bridge at %d:%d:%d\n",
+ (int)bus, (int)slot, (int)func);
+ return 0;
+}
+#else
+static int
+myri10ge_enable_nvidia_ecrc(myri10ge_softc_t *sc, device_t pdev)
+{
+ device_printf(sc->dev,
+ "Nforce 4 chipset on non-x86/amd64!?!?!\n");
+ return ENXIO;
+}
+#endif
+/*
+ * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput
+ * when the PCI-E Completion packets are aligned on an 8-byte
+ * boundary. Some PCI-E chip sets always align Completion packets; on
+ * the ones that do not, the alignment can be enforced by enabling
+ * ECRC generation (if supported).
+ *
+ * When PCI-E Completion packets are not aligned, it is actually more
+ * efficient to limit Read-DMA transactions to 2KB, rather than 4KB.
+ *
+ * If the driver can neither enable ECRC nor verify that it has
+ * already been enabled, then it must use a firmware image which works
+ * around unaligned completion packets (ethp_z8e.dat), and it should
+ * also ensure that it never gives the device a Read-DMA which is
+ * larger than 2KB by setting the tx.boundary to 2KB. If ECRC is
+ * enabled, then the driver should use the aligned (eth_z8e.dat)
+ * firmware image, and set tx.boundary to 4KB.
+ */
+
+static void
+myri10ge_select_firmware(myri10ge_softc_t *sc)
+{
+ int err, aligned = 0;
+ device_t pdev;
+ uint16_t pvend, pdid;
+
+ pdev = device_get_parent(device_get_parent(sc->dev));
+ if (pdev == NULL) {
+ device_printf(sc->dev, "could not find parent?\n");
+ goto abort;
+ }
+ pvend = pci_read_config(pdev, PCIR_VENDOR, 2);
+ pdid = pci_read_config(pdev, PCIR_DEVICE, 2);
+
+ /* see if we can enable ECRC's on an upstream
+ Nvidia bridge */
+ if (myri10ge_nvidia_ecrc_enable &&
+ (pvend == 0x10de && pdid == 0x005d)) {
+ err = myri10ge_enable_nvidia_ecrc(sc, pdev);
+ if (err == 0) {
+ aligned = 1;
+ device_printf(sc->dev,
+ "Assuming aligned completions (ECRC)\n");
+ }
+ }
+ /* see if the upstream bridge is known to
+ provided aligned completions */
+ if (/* HT2000 */ (pvend == 0x1166 && pdid == 0x0132) ||
+ /* Ontario */ (pvend == 0x10b5 && pdid == 0x8532)) {
+ device_printf(sc->dev,
+ "Assuming aligned completions (0x%x:0x%x)\n",
+ pvend, pdid);
+ }
+
+abort:
+ if (aligned) {
+ sc->fw_name = myri10ge_fw_aligned;
+ sc->tx.boundary = 4096;
+ } else {
+ sc->fw_name = myri10ge_fw_unaligned;
+ sc->tx.boundary = 2048;
+ }
+}
+
+union qualhack
+{
+ const char *ro_char;
+ char *rw_char;
+};
+
+
+static int
+myri10ge_load_firmware_helper(myri10ge_softc_t *sc, uint32_t *limit)
+{
+ struct firmware *fw;
+ const mcp_gen_header_t *hdr;
+ unsigned hdr_offset;
+ const char *fw_data;
+ union qualhack hack;
+ int status;
+
+
+ fw = firmware_get(sc->fw_name);
+
+ if (fw == NULL) {
+ device_printf(sc->dev, "Could not find firmware image %s\n",
+ sc->fw_name);
+ return ENOENT;
+ }
+ if (fw->datasize > *limit ||
+ fw->datasize < MCP_HEADER_PTR_OFFSET + 4) {
+ device_printf(sc->dev, "Firmware image %s too large (%d/%d)\n",
+ sc->fw_name, (int)fw->datasize, (int) *limit);
+ status = ENOSPC;
+ goto abort_with_fw;
+ }
+ *limit = fw->datasize;
+
+ /* check id */
+ fw_data = (const char *)fw->data;
+ hdr_offset = htobe32(*(const uint32_t *)
+ (fw_data + MCP_HEADER_PTR_OFFSET));
+ if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw->datasize) {
+ device_printf(sc->dev, "Bad firmware file");
+ status = EIO;
+ goto abort_with_fw;
+ }
+ hdr = (const void*)(fw_data + hdr_offset);
+ if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) {
+ device_printf(sc->dev, "Bad firmware type: 0x%x\n",
+ be32toh(hdr->mcp_type));
+ status = EIO;
+ goto abort_with_fw;
+ }
+
+ /* save firmware version for sysctl */
+ strncpy(sc->fw_version, hdr->version, sizeof (sc->fw_version));
+ device_printf(sc->dev, "firmware id: %s\n", hdr->version);
+
+ hack.ro_char = fw_data;
+ /* Copy the inflated firmware to NIC SRAM. */
+ myri10ge_pio_copy(&sc->sram[MYRI10GE_FW_OFFSET],
+ hack.rw_char, *limit);
+
+ status = 0;
+abort_with_fw:
+ firmware_put(fw, FIRMWARE_UNLOAD);
+ return status;
+}
+
+/*
+ * Enable or disable periodic RDMAs from the host to make certain
+ * chipsets resend dropped PCIe messages
+ */
+
+static void
+myri10ge_dummy_rdma(myri10ge_softc_t *sc, int enable)
+{
+ char buf_bytes[72];
+ volatile uint32_t *confirm;
+ volatile char *submit;
+ uint32_t *buf, dma_low, dma_high;
+ int i;
+
+ buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
+
+ /* clear confirmation addr */
+ confirm = (volatile uint32_t *)sc->cmd;
+ *confirm = 0;
+ mb();
+
+ /* send an rdma command to the PCIe engine, and wait for the
+ response in the confirmation address. The firmware should
+ write a -1 there to indicate it is alive and well
+ */
+
+ dma_low = MYRI10GE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
+ dma_high = MYRI10GE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
+ buf[0] = htobe32(dma_high); /* confirm addr MSW */
+ buf[1] = htobe32(dma_low); /* confirm addr LSW */
+ buf[2] = htobe32(0xffffffff); /* confirm data */
+ dma_low = MYRI10GE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr);
+ dma_high = MYRI10GE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr);
+ buf[3] = htobe32(dma_high); /* dummy addr MSW */
+ buf[4] = htobe32(dma_low); /* dummy addr LSW */
+ buf[5] = htobe32(enable); /* enable? */
+
+
+ submit = (volatile char *)(sc->sram + 0xfc01c0);
+
+ myri10ge_pio_copy(submit, buf, 64);
+ mb();
+ DELAY(1000);
+ mb();
+ i = 0;
+ while (*confirm != 0xffffffff && i < 20) {
+ DELAY(1000);
+ i++;
+ }
+ if (*confirm != 0xffffffff) {
+ device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)",
+ (enable ? "enable" : "disable"), confirm,
+ *confirm);
+ }
+ return;
+}
+
+static int
+myri10ge_send_cmd(myri10ge_softc_t *sc, uint32_t cmd,
+ myri10ge_cmd_t *data)
+{
+ mcp_cmd_t *buf;
+ char buf_bytes[sizeof(*buf) + 8];
+ volatile mcp_cmd_response_t *response = sc->cmd;
+ volatile char *cmd_addr = sc->sram + MYRI10GE_MCP_CMD_OFFSET;
+ uint32_t dma_low, dma_high;
+ int sleep_total = 0;
+
+ /* ensure buf is aligned to 8 bytes */
+ buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
+
+ buf->data0 = htobe32(data->data0);
+ buf->data1 = htobe32(data->data1);
+ buf->data2 = htobe32(data->data2);
+ buf->cmd = htobe32(cmd);
+ dma_low = MYRI10GE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
+ dma_high = MYRI10GE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
+
+ buf->response_addr.low = htobe32(dma_low);
+ buf->response_addr.high = htobe32(dma_high);
+ mtx_lock(&sc->cmd_lock);
+ response->result = 0xffffffff;
+ mb();
+ myri10ge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf));
+
+ /* wait up to 2 seconds */
+ for (sleep_total = 0; sleep_total < (2 * 1000); sleep_total += 10) {
+ bus_dmamap_sync(sc->cmd_dma.dmat,
+ sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
+ mb();
+ if (response->result != 0xffffffff) {
+ if (response->result == 0) {
+ data->data0 = be32toh(response->data);
+ mtx_unlock(&sc->cmd_lock);
+ return 0;
+ } else {
+ device_printf(sc->dev,
+ "myri10ge: command %d "
+ "failed, result = %d\n",
+ cmd, be32toh(response->result));
+ mtx_unlock(&sc->cmd_lock);
+ return ENXIO;
+ }
+ }
+ DELAY(1000 * 10);
+ }
+ mtx_unlock(&sc->cmd_lock);
+ device_printf(sc->dev, "myri10ge: command %d timed out"
+ "result = %d\n",
+ cmd, be32toh(response->result));
+ return EAGAIN;
+}
+
+
+static int
+myri10ge_load_firmware(myri10ge_softc_t *sc)
+{
+ volatile uint32_t *confirm;
+ volatile char *submit;
+ char buf_bytes[72];
+ uint32_t *buf, size, dma_low, dma_high;
+ int status, i;
+
+ buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
+
+ size = sc->sram_size;
+ status = myri10ge_load_firmware_helper(sc, &size);
+ if (status) {
+ device_printf(sc->dev, "firmware loading failed\n");
+ return status;
+ }
+ /* clear confirmation addr */
+ confirm = (volatile uint32_t *)sc->cmd;
+ *confirm = 0;
+ mb();
+ /* send a reload command to the bootstrap MCP, and wait for the
+ response in the confirmation address. The firmware should
+ write a -1 there to indicate it is alive and well
+ */
+
+ dma_low = MYRI10GE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
+ dma_high = MYRI10GE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
+
+ buf[0] = htobe32(dma_high); /* confirm addr MSW */
+ buf[1] = htobe32(dma_low); /* confirm addr LSW */
+ buf[2] = htobe32(0xffffffff); /* confirm data */
+
+ /* FIX: All newest firmware should un-protect the bottom of
+ the sram before handoff. However, the very first interfaces
+ do not. Therefore the handoff copy must skip the first 8 bytes
+ */
+ /* where the code starts*/
+ buf[3] = htobe32(MYRI10GE_FW_OFFSET + 8);
+ buf[4] = htobe32(size - 8); /* length of code */
+ buf[5] = htobe32(8); /* where to copy to */
+ buf[6] = htobe32(0); /* where to jump to */
+
+ submit = (volatile char *)(sc->sram + 0xfc0000);
+ myri10ge_pio_copy(submit, buf, 64);
+ mb();
+ DELAY(1000);
+ mb();
+ i = 0;
+ while (*confirm != 0xffffffff && i < 20) {
+ DELAY(1000*10);
+ i++;
+ bus_dmamap_sync(sc->cmd_dma.dmat,
+ sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
+ }
+ if (*confirm != 0xffffffff) {
+ device_printf(sc->dev,"handoff failed (%p = 0x%x)",
+ confirm, *confirm);
+
+ return ENXIO;
+ }
+ myri10ge_dummy_rdma(sc, 1);
+ return 0;
+}
+
+static int
+myri10ge_update_mac_address(myri10ge_softc_t *sc)
+{
+ myri10ge_cmd_t cmd;
+ uint8_t *addr = sc->mac_addr;
+ int status;
+
+
+ cmd.data0 = ((addr[0] << 24) | (addr[1] << 16)
+ | (addr[2] << 8) | addr[3]);
+
+ cmd.data1 = ((addr[4] << 8) | (addr[5]));
+
+ status = myri10ge_send_cmd(sc, MYRI10GE_MCP_SET_MAC_ADDRESS, &cmd);
+ return status;
+}
+
+static int
+myri10ge_change_pause(myri10ge_softc_t *sc, int pause)
+{
+ myri10ge_cmd_t cmd;
+ int status;
+
+ if (pause)
+ status = myri10ge_send_cmd(sc,
+ MYRI10GE_MCP_ENABLE_FLOW_CONTROL,
+ &cmd);
+ else
+ status = myri10ge_send_cmd(sc,
+ MYRI10GE_MCP_DISABLE_FLOW_CONTROL,
+ &cmd);
+
+ if (status) {
+ device_printf(sc->dev, "Failed to set flow control mode\n");
+ return ENXIO;
+ }
+ sc->pause = pause;
+ return 0;
+}
+
+static void
+myri10ge_change_promisc(myri10ge_softc_t *sc, int promisc)
+{
+ myri10ge_cmd_t cmd;
+ int status;
+
+ if (promisc)
+ status = myri10ge_send_cmd(sc,
+ MYRI10GE_MCP_ENABLE_PROMISC,
+ &cmd);
+ else
+ status = myri10ge_send_cmd(sc,
+ MYRI10GE_MCP_DISABLE_PROMISC,
+ &cmd);
+
+ if (status) {
+ device_printf(sc->dev, "Failed to set promisc mode\n");
+ }
+}
+
+static int
+myri10ge_reset(myri10ge_softc_t *sc)
+{
+
+ myri10ge_cmd_t cmd;
+ int status, i;
+
+ /* try to send a reset command to the card to see if it
+ is alive */
+ memset(&cmd, 0, sizeof (cmd));
+ status = myri10ge_send_cmd(sc, MYRI10GE_MCP_CMD_RESET, &cmd);
+ if (status != 0) {
+ device_printf(sc->dev, "failed reset\n");
+ return ENXIO;
+ }
+
+ /* Now exchange information about interrupts */
+
+ cmd.data0 = (uint32_t)
+ (myri10ge_max_intr_slots * sizeof (*sc->intr.q[0]));
+ status = myri10ge_send_cmd(sc, MYRI10GE_MCP_CMD_SET_INTRQ_SIZE, &cmd);
+ for (i = 0; (status == 0) && (i < MYRI10GE_NUM_INTRQS); i++) {
+ cmd.data0 = MYRI10GE_LOWPART_TO_U32(sc->intr.dma[i].bus_addr);
+ cmd.data1 = MYRI10GE_HIGHPART_TO_U32(sc->intr.dma[i].bus_addr);
+ status |=
+ myri10ge_send_cmd(sc, (i +
+ MYRI10GE_MCP_CMD_SET_INTRQ0_DMA),
+ &cmd);
+ }
+
+ cmd.data0 = sc->intr_coal_delay = myri10ge_intr_coal_delay;
+ status |= myri10ge_send_cmd(sc,
+ MYRI10GE_MCP_CMD_SET_INTR_COAL_DELAY, &cmd);
+
+ if (sc->msi_enabled) {
+ status |= myri10ge_send_cmd
+ (sc, MYRI10GE_MCP_CMD_GET_IRQ_ACK_OFFSET, &cmd);
+ } else {
+ status |= myri10ge_send_cmd
+ (sc, MYRI10GE_MCP_CMD_GET_IRQ_ACK_DEASSERT_OFFSET,
+ &cmd);
+ }
+ if (status != 0) {
+ device_printf(sc->dev, "failed set interrupt parameters\n");
+ return status;
+ }
+ sc->irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0);
+
+ /* reset mcp/driver shared state back to 0 */
+ sc->intr.seqnum = 0;
+ sc->intr.intrq = 0;
+ sc->intr.slot = 0;
+ sc->tx.req = 0;
+ sc->tx.done = 0;
+ sc->rx_big.cnt = 0;
+ sc->rx_small.cnt = 0;
+ sc->rdma_tags_available = 15;
+ status = myri10ge_update_mac_address(sc);
+ myri10ge_change_promisc(sc, 0);
+ myri10ge_change_pause(sc, sc->pause);
+ return status;
+}
+
+static int
+myri10ge_change_intr_coal(SYSCTL_HANDLER_ARGS)
+{
+ myri10ge_cmd_t cmd;
+ myri10ge_softc_t *sc;
+ unsigned int intr_coal_delay;
+ int err;
+
+ sc = arg1;
+ intr_coal_delay = sc->intr_coal_delay;
+ err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req);
+ if (err != 0) {
+ return err;
+ }
+ if (intr_coal_delay == sc->intr_coal_delay)
+ return 0;
+
+ if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000)
+ return EINVAL;
+
+ sx_xlock(&sc->driver_lock);
+ cmd.data0 = intr_coal_delay;
+ err = myri10ge_send_cmd(sc, MYRI10GE_MCP_CMD_SET_INTR_COAL_DELAY,
+ &cmd);
+ if (err == 0) {
+ sc->intr_coal_delay = intr_coal_delay;
+ }
+ sx_xunlock(&sc->driver_lock);
+ return err;
+}
+
+static int
+myri10ge_change_flow_control(SYSCTL_HANDLER_ARGS)
+{
+ myri10ge_softc_t *sc;
+ unsigned int enabled;
+ int err;
+
+ sc = arg1;
+ enabled = sc->pause;
+ err = sysctl_handle_int(oidp, &enabled, arg2, req);
+ if (err != 0) {
+ return err;
+ }
+ if (enabled == sc->pause)
+ return 0;
+
+ sx_xlock(&sc->driver_lock);
+ err = myri10ge_change_pause(sc, enabled);
+ sx_xunlock(&sc->driver_lock);
+ return err;
+}
+
+static int
+myri10ge_handle_be32(SYSCTL_HANDLER_ARGS)
+{
+ int err;
+
+ if (arg1 == NULL)
+ return EFAULT;
+ arg2 = be32toh(*(int *)arg1);
+ arg1 = NULL;
+ err = sysctl_handle_int(oidp, arg1, arg2, req);
+
+ return err;
+}
+
+static void
+myri10ge_add_sysctls(myri10ge_softc_t *sc)
+{
+ struct sysctl_ctx_list *ctx;
+ struct sysctl_oid_list *children;
+ mcp_stats_t *fw;
+
+ ctx = device_get_sysctl_ctx(sc->dev);
+ children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
+ fw = sc->fw_stats;
+
+ SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
+ "intr_coal_delay",
+ CTLTYPE_INT|CTLFLAG_RW, sc,
+ 0, myri10ge_change_intr_coal,
+ "I", "interrupt coalescing delay in usecs");
+
+ SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
+ "flow_control_enabled",
+ CTLTYPE_INT|CTLFLAG_RW, sc,
+ 0, myri10ge_change_flow_control,
+ "I", "interrupt coalescing delay in usecs");
+
+ SYSCTL_ADD_INT(ctx, children, OID_AUTO,
+ "skip_pio_read",
+ CTLFLAG_RW, &myri10ge_skip_pio_read,
+ 0, "Skip pio read in interrupt handler");
+
+ /* stats block from firmware is in network byte order.
+ Need to swap it */
+ SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
+ "link_up",
+ CTLTYPE_INT|CTLFLAG_RD, &fw->link_up,
+ 0, myri10ge_handle_be32,
+ "I", "link up");
+ SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
+ "rdma_tags_available",
+ CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available,
+ 0, myri10ge_handle_be32,
+ "I", "rdma_tags_available");
+ SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
+ "dropped_link_overflow",
+ CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow,
+ 0, myri10ge_handle_be32,
+ "I", "dropped_link_overflow");
+ SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
+ "dropped_link_error_or_filtered",
+ CTLTYPE_INT|CTLFLAG_RD,
+ &fw->dropped_link_error_or_filtered,
+ 0, myri10ge_handle_be32,
+ "I", "dropped_link_error_or_filtered");
+ SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
+ "dropped_runt",
+ CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt,
+ 0, myri10ge_handle_be32,
+ "I", "dropped_runt");
+ SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
+ "dropped_overrun",
+ CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun,
+ 0, myri10ge_handle_be32,
+ "I", "dropped_overrun");
+ SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
+ "dropped_no_small_buffer",
+ CTLTYPE_INT|CTLFLAG_RD,
+ &fw->dropped_no_small_buffer,
+ 0, myri10ge_handle_be32,
+ "I", "dropped_no_small_buffer");
+ SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
+ "dropped_no_big_buffer",
+ CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer,
+ 0, myri10ge_handle_be32,
+ "I", "dropped_no_big_buffer");
+ SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
+ "dropped_interrupt_busy",
+ CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_interrupt_busy,
+ 0, myri10ge_handle_be32,
+ "I", "dropped_interrupt_busy");
+
+ /* host counters exported for debugging */
+ SYSCTL_ADD_INT(ctx, children, OID_AUTO,
+ "tx_req",
+ CTLFLAG_RD, &sc->tx.req,
+ 0, "tx_req");
+ SYSCTL_ADD_INT(ctx, children, OID_AUTO,
+ "tx_done",
+ CTLFLAG_RD, &sc->tx.done,
+ 0, "tx_done");
+ SYSCTL_ADD_INT(ctx, children, OID_AUTO,
+ "rx_small_cnt",
+ CTLFLAG_RD, &sc->rx_small.cnt,
+ 0, "rx_small_cnt");
+ SYSCTL_ADD_INT(ctx, children, OID_AUTO,
+ "rx_big_cnt",
+ CTLFLAG_RD, &sc->rx_big.cnt,
+ 0, "rx_small_cnt");
+
+}
+
+/* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
+ backwards one at a time and handle ring wraps */
+
+static inline void
+myri10ge_submit_req_backwards(myri10ge_tx_buf_t *tx,
+ mcp_kreq_ether_send_t *src, int cnt)
+{
+ int idx, starting_slot;
+ starting_slot = tx->req;
+ while (cnt > 1) {
+ cnt--;
+ idx = (starting_slot + cnt) & tx->mask;
+ myri10ge_pio_copy(&tx->lanai[idx],
+ &src[cnt], sizeof(*src));
+ mb();
+ }
+}
+
+/*
+ * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
+ * at most 32 bytes at a time, so as to avoid involving the software
+ * pio handler in the nic. We re-write the first segment's flags
+ * to mark them valid only after writing the entire chain
+ */
+
+static inline void
+myri10ge_submit_req(myri10ge_tx_buf_t *tx, mcp_kreq_ether_send_t *src,
+ int cnt)
+{
+ int idx, i;
+ uint32_t *src_ints;
+ volatile uint32_t *dst_ints;
+ mcp_kreq_ether_send_t *srcp;
+ volatile mcp_kreq_ether_send_t *dstp, *dst;
+
+
+ idx = tx->req & tx->mask;
+
+ src->flags &= ~(htobe16(MYRI10GE_MCP_ETHER_FLAGS_VALID));
+ mb();
+ dst = dstp = &tx->lanai[idx];
+ srcp = src;
+
+ if ((idx + cnt) < tx->mask) {
+ for (i = 0; i < (cnt - 1); i += 2) {
+ myri10ge_pio_copy(dstp, srcp, 2 * sizeof(*src));
+ mb(); /* force write every 32 bytes */
+ srcp += 2;
+ dstp += 2;
+ }
+ } else {
+ /* submit all but the first request, and ensure
+ that it is submitted below */
+ myri10ge_submit_req_backwards(tx, src, cnt);
+ i = 0;
+ }
+ if (i < cnt) {
+ /* submit the first request */
+ myri10ge_pio_copy(dstp, srcp, sizeof(*src));
+ mb(); /* barrier before setting valid flag */
+ }
+
+ /* re-write the last 32-bits with the valid flags */
+ src->flags |= htobe16(MYRI10GE_MCP_ETHER_FLAGS_VALID);
+ src_ints = (uint32_t *)src;
+ src_ints+=3;
+ dst_ints = (volatile uint32_t *)dst;
+ dst_ints+=3;
+ *dst_ints = *src_ints;
+ tx->req += cnt;
+ mb();
+}
+
+static inline void
+myri10ge_submit_req_wc(myri10ge_tx_buf_t *tx,
+ mcp_kreq_ether_send_t *src, int cnt)
+{
+ tx->req += cnt;
+ mb();
+ while (cnt >= 4) {
+ myri10ge_pio_copy((volatile char *)tx->wc_fifo, src, 64);
+ mb();
+ src += 4;
+ cnt -= 4;
+ }
+ if (cnt > 0) {
+ /* pad it to 64 bytes. The src is 64 bytes bigger than it
+ needs to be so that we don't overrun it */
+ myri10ge_pio_copy(tx->wc_fifo + (cnt<<18), src, 64);
+ mb();
+ }
+}
+
+static void
+myri10ge_encap(myri10ge_softc_t *sc, struct mbuf *m)
+{
+ mcp_kreq_ether_send_t *req;
+ bus_dma_segment_t seg_list[MYRI10GE_MCP_ETHER_MAX_SEND_DESC];
+ bus_dma_segment_t *seg;
+ struct mbuf *m_tmp;
+ struct ifnet *ifp;
+ myri10ge_tx_buf_t *tx;
+ struct ether_header *eh;
+ struct ip *ip;
+ int cnt, cum_len, err, i, idx;
+ uint16_t flags, pseudo_hdr_offset;
+ uint8_t cksum_offset;
+
+
+
+ ifp = sc->ifp;
+ tx = &sc->tx;
+
+ /* (try to) map the frame for DMA */
+ idx = tx->req & tx->mask;
+ err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map,
+ m, seg_list, &cnt,
+ BUS_DMA_NOWAIT);
+ if (err == EFBIG) {
+ /* Too many segments in the chain. Try
+ to defrag */
+ m_tmp = m_defrag(m, M_NOWAIT);
+ if (m_tmp == NULL) {
+ goto drop;
+ }
+ m = m_tmp;
+ err = bus_dmamap_load_mbuf_sg(tx->dmat,
+ tx->info[idx].map,
+ m, seg_list, &cnt,
+ BUS_DMA_NOWAIT);
+ }
+ if (err != 0) {
+ device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d\n",
+ err);
+ goto drop;
+ }
+ bus_dmamap_sync(tx->dmat, tx->info[idx].map,
+ BUS_DMASYNC_PREWRITE);
+
+ req = tx->req_list;
+ cksum_offset = 0;
+ flags = htobe16(MYRI10GE_MCP_ETHER_FLAGS_VALID |
+ MYRI10GE_MCP_ETHER_FLAGS_NOT_LAST);
+
+ /* checksum offloading? */
+ if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA)) {
+ eh = mtod(m, struct ether_header *);
+ ip = (struct ip *) (eh + 1);
+ cksum_offset = sizeof(*eh) + (ip->ip_hl << 2);
+ pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data;
+ req->pseudo_hdr_offset = htobe16(pseudo_hdr_offset);
+ req->cksum_offset = cksum_offset;
+ flags |= htobe16(MYRI10GE_MCP_ETHER_FLAGS_CKSUM);
+ }
+ if (m->m_pkthdr.len < 512)
+ req->flags = htobe16(MYRI10GE_MCP_ETHER_FLAGS_FIRST |
+ MYRI10GE_MCP_ETHER_FLAGS_SMALL);
+ else
+ req->flags = htobe16(MYRI10GE_MCP_ETHER_FLAGS_FIRST);
+
+ /* convert segments into a request list */
+ cum_len = 0;
+ seg = seg_list;
+ for (i = 0; i < cnt; i++) {
+ req->addr_low =
+ htobe32(MYRI10GE_LOWPART_TO_U32(seg->ds_addr));
+ req->addr_high =
+ htobe32(MYRI10GE_HIGHPART_TO_U32(seg->ds_addr));
+ req->length = htobe16(seg->ds_len);
+ req->cksum_offset = cksum_offset;
+ if (cksum_offset > seg->ds_len)
+ cksum_offset -= seg->ds_len;
+ else
+ cksum_offset = 0;
+ req->flags |= flags | ((cum_len & 1) *
+ htobe16(MYRI10GE_MCP_ETHER_FLAGS_ALIGN_ODD));
+ cum_len += seg->ds_len;
+ seg++;
+ req++;
+ req->flags = 0;
+ }
+ req--;
+ /* pad runts to 60 bytes */
+ if (cum_len < 60) {
+ req++;
+ req->addr_low =
+ htobe32(MYRI10GE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr));
+ req->addr_high =
+ htobe32(MYRI10GE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr));
+ req->length = htobe16(60 - cum_len);
+ req->cksum_offset = cksum_offset;
+ req->flags |= flags | ((cum_len & 1) *
+ htobe16(MYRI10GE_MCP_ETHER_FLAGS_ALIGN_ODD));
+ cnt++;
+ }
+ req->flags &= ~(htobe16(MYRI10GE_MCP_ETHER_FLAGS_NOT_LAST));
+ tx->info[idx].m = m;
+ if (tx->wc_fifo == NULL)
+ myri10ge_submit_req(tx, tx->req_list, cnt);
+ else
+ myri10ge_submit_req_wc(tx, tx->req_list, cnt);
+ return;
+
+drop:
+ m_freem(m);
+ ifp->if_oerrors++;
+ return;
+}
+
+
+static void
+myri10ge_start_locked(myri10ge_softc_t *sc)
+{
+ int avail;
+ struct mbuf *m;
+ struct ifnet *ifp;
+
+
+ ifp = sc->ifp;
+ while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
+ /* dequeue the packet */
+ IFQ_DRV_DEQUEUE(&ifp->if_snd, m);
+
+ /* let BPF see it */
+ BPF_MTAP(ifp, m);
+
+ /* give it to the nic */
+ myri10ge_encap(sc, m);
+
+ /* leave an extra slot keep the ring from wrapping */
+ avail = sc->tx.mask - (sc->tx.req - sc->tx.done);
+ if (avail < MYRI10GE_MCP_ETHER_MAX_SEND_DESC) {
+ sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
+ return;
+ }
+ }
+}
+
+static void
+myri10ge_start(struct ifnet *ifp)
+{
+ myri10ge_softc_t *sc = ifp->if_softc;
+
+
+ mtx_lock(&sc->tx_lock);
+ myri10ge_start_locked(sc);
+ mtx_unlock(&sc->tx_lock);
+}
+
+static int
+myri10ge_get_buf_small(myri10ge_softc_t *sc, bus_dmamap_t map, int idx)
+{
+ bus_dma_segment_t seg;
+ struct mbuf *m;
+ myri10ge_rx_buf_t *rx = &sc->rx_small;
+ int cnt, err;
+
+ m = m_gethdr(M_DONTWAIT, MT_DATA);
+ if (m == NULL) {
+ rx->alloc_fail++;
+ err = ENOBUFS;
+ goto done;
+ }
+ m->m_len = MHLEN;
+ err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m,
+ &seg, &cnt, BUS_DMA_NOWAIT);
+ if (err != 0) {
+ m_free(m);
+ goto done;
+ }
+ rx->info[idx].m = m;
+ rx->shadow[idx].addr_low =
+ htobe32(MYRI10GE_LOWPART_TO_U32(seg.ds_addr));
+ rx->shadow[idx].addr_high =
+ htobe32(MYRI10GE_HIGHPART_TO_U32(seg.ds_addr));
+
+done:
+ if ((idx & 7) == 7) {
+ myri10ge_pio_copy(&rx->lanai[idx - 7],
+ &rx->shadow[idx - 7],
+ 8 * sizeof (*rx->lanai));
+ mb();
+ }
+ return err;
+}
+
+static int
+myri10ge_get_buf_big(myri10ge_softc_t *sc, bus_dmamap_t map, int idx)
+{
+ bus_dma_segment_t seg;
+ struct mbuf *m;
+ myri10ge_rx_buf_t *rx = &sc->rx_big;
+ int cnt, err;
+
+ m = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, sc->big_bytes);
+ if (m == NULL) {
+ rx->alloc_fail++;
+ err = ENOBUFS;
+ goto done;
+ }
+ m->m_len = sc->big_bytes;
+ err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m,
+ &seg, &cnt, BUS_DMA_NOWAIT);
+ if (err != 0) {
+ m_free(m);
+ goto done;
+ }
+ rx->info[idx].m = m;
+ rx->shadow[idx].addr_low =
+ htobe32(MYRI10GE_LOWPART_TO_U32(seg.ds_addr));
+ rx->shadow[idx].addr_high =
+ htobe32(MYRI10GE_HIGHPART_TO_U32(seg.ds_addr));
+
+done:
+ if ((idx & 7) == 7) {
+ myri10ge_pio_copy(&rx->lanai[idx - 7],
+ &rx->shadow[idx - 7],
+ 8 * sizeof (*rx->lanai));
+ mb();
+ }
+ return err;
+}
+
+static inline void
+myri10ge_rx_done_big(myri10ge_softc_t *sc, int len, int csum, int flags)
+{
+ struct ifnet *ifp;
+ struct mbuf *m = 0; /* -Wunitialized */
+ struct mbuf *m_prev = 0; /* -Wunitialized */
+ struct mbuf *m_head = 0;
+ bus_dmamap_t old_map;
+ myri10ge_rx_buf_t *rx;
+ int idx;
+
+
+ rx = &sc->rx_big;
+ ifp = sc->ifp;
+ while (len > 0) {
+ idx = rx->cnt & rx->mask;
+ rx->cnt++;
+ /* save a pointer to the received mbuf */
+ m = rx->info[idx].m;
+ /* try to replace the received mbuf */
+ if (myri10ge_get_buf_big(sc, rx->extra_map, idx)) {
+ goto drop;
+ }
+ /* unmap the received buffer */
+ old_map = rx->info[idx].map;
+ bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
+ bus_dmamap_unload(rx->dmat, old_map);
+
+ /* swap the bus_dmamap_t's */
+ rx->info[idx].map = rx->extra_map;
+ rx->extra_map = old_map;
+
+ /* chain multiple segments together */
+ if (!m_head) {
+ m_head = m;
+ /* mcp implicitly skips 1st bytes so that
+ * packet is properly aligned */
+ m->m_data += MYRI10GE_MCP_ETHER_PAD;
+ m->m_pkthdr.len = len;
+ m->m_len = sc->big_bytes - MYRI10GE_MCP_ETHER_PAD;
+ } else {
+ m->m_len = sc->big_bytes;
+ m->m_flags &= ~M_PKTHDR;
+ m_prev->m_next = m;
+ }
+ len -= m->m_len;
+ m_prev = m;
+ }
+
+ /* trim trailing garbage from the last mbuf in the chain. If
+ * there is any garbage, len will be negative */
+ m->m_len += len;
+
+ /* if the checksum is valid, mark it in the mbuf header */
+ if (sc->csum_flag & flags) {
+ m_head->m_pkthdr.csum_data = csum;
+ m_head->m_pkthdr.csum_flags = CSUM_DATA_VALID;
+ }
+
+ /* pass the frame up the stack */
+ m_head->m_pkthdr.rcvif = ifp;
+ ifp->if_ipackets++;
+ (*ifp->if_input)(ifp, m_head);
+ return;
+
+drop:
+ /* drop the frame -- the old mbuf(s) are re-cycled by running
+ every slot through the allocator */
+ if (m_head) {
+ len -= sc->big_bytes;
+ m_freem(m_head);
+ } else {
+ len -= (sc->big_bytes + MYRI10GE_MCP_ETHER_PAD);
+ }
+ while ((int)len > 0) {
+ idx = rx->cnt & rx->mask;
+ rx->cnt++;
+ m = rx->info[idx].m;
+ if (0 == (myri10ge_get_buf_big(sc, rx->extra_map, idx))) {
+ m_freem(m);
+ /* unmap the received buffer */
+ old_map = rx->info[idx].map;
+ bus_dmamap_sync(rx->dmat, old_map,
+ BUS_DMASYNC_POSTREAD);
+ bus_dmamap_unload(rx->dmat, old_map);
+
+ /* swap the bus_dmamap_t's */
+ rx->info[idx].map = rx->extra_map;
+ rx->extra_map = old_map;
+ }
+ len -= sc->big_bytes;
+ }
+
+ ifp->if_ierrors++;
+
+}
+
+
+static inline void
+myri10ge_rx_done_small(myri10ge_softc_t *sc, uint32_t len,
+ uint32_t csum, uint32_t flags)
+{
+ struct ifnet *ifp;
+ struct mbuf *m;
+ myri10ge_rx_buf_t *rx;
+ bus_dmamap_t old_map;
+ int idx;
+
+ ifp = sc->ifp;
+ rx = &sc->rx_small;
+ idx = rx->cnt & rx->mask;
+ rx->cnt++;
+ /* save a pointer to the received mbuf */
+ m = rx->info[idx].m;
+ /* try to replace the received mbuf */
+ if (myri10ge_get_buf_small(sc, rx->extra_map, idx)) {
+ /* drop the frame -- the old mbuf is re-cycled */
+ ifp->if_ierrors++;
+ return;
+ }
+
+ /* unmap the received buffer */
+ old_map = rx->info[idx].map;
+ bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
+ bus_dmamap_unload(rx->dmat, old_map);
+
+ /* swap the bus_dmamap_t's */
+ rx->info[idx].map = rx->extra_map;
+ rx->extra_map = old_map;
+
+ /* mcp implicitly skips 1st 2 bytes so that packet is properly
+ * aligned */
+ m->m_data += MYRI10GE_MCP_ETHER_PAD;
+
+ /* if the checksum is valid, mark it in the mbuf header */
+ if (sc->csum_flag & flags) {
+ m->m_pkthdr.csum_data = csum;
+ m->m_pkthdr.csum_flags = CSUM_DATA_VALID;
+ }
+
+ /* pass the frame up the stack */
+ m->m_pkthdr.rcvif = ifp;
+ m->m_len = m->m_pkthdr.len = len;
+ ifp->if_ipackets++;
+ (*ifp->if_input)(ifp, m);
+}
+
+static inline void
+myri10ge_tx_done(myri10ge_softc_t *sc, uint32_t mcp_idx)
+{
+ struct ifnet *ifp;
+ myri10ge_tx_buf_t *tx;
+ struct mbuf *m;
+ bus_dmamap_t map;
+ int idx;
+
+ tx = &sc->tx;
+ ifp = sc->ifp;
+ while (tx->done != mcp_idx) {
+ idx = tx->done & tx->mask;
+ tx->done++;
+ m = tx->info[idx].m;
+ /* mbuf and DMA map only attached to the first
+ segment per-mbuf */
+ if (m != NULL) {
+ ifp->if_opackets++;
+ tx->info[idx].m = NULL;
+ map = tx->info[idx].map;
+ bus_dmamap_unload(tx->dmat, map);
+ m_freem(m);
+ }
+ }
+
+ /* If we have space, clear IFF_OACTIVE to tell the stack that
+ its OK to send packets */
+
+ if (ifp->if_drv_flags & IFF_DRV_OACTIVE &&
+ tx->req - tx->done < (tx->mask + 1)/4) {
+ mtx_lock(&sc->tx_lock);
+ ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
+ myri10ge_start_locked(sc);
+ mtx_unlock(&sc->tx_lock);
+ }
+}
+
+static void
+myri10ge_dump_interrupt_queues(myri10ge_softc_t *sc, int maxslot)
+{
+ int intrq, slot, type;
+ static int call_cnt = 0;
+
+ /* only do it a few times to avoid filling the message buffer */
+ if (call_cnt > 10)
+ return;
+
+ call_cnt++;
+
+ device_printf(sc->dev, "--------- Dumping interrupt queue state ----- \n");
+ device_printf(sc->dev, "currently expecting interrupts on queue %d\n",
+ sc->intr.intrq);
+ device_printf(sc->dev, " q slot status \n");
+ device_printf(sc->dev, "--- ---- -------- \n");
+ for (intrq = 0; intrq < 2; intrq++) {
+ for (slot = 0; slot <= maxslot; slot++) {
+ type = sc->intr.q[intrq][slot].type;
+#if 0
+ if (type == 0 && slot != 0)
+ continue;
+#endif
+ device_printf(sc->dev, "[%d]:[%d]: type = 0x%x\n", intrq, slot,
+ type);
+ device_printf(sc->dev, "[%d]:[%d]: flag = 0x%x\n", intrq, slot,
+ sc->intr.q[intrq][slot].flag);
+ device_printf(sc->dev, "[%d]:[%d]: index = 0x%x\n", intrq, slot,
+ be16toh(sc->intr.q[intrq][slot].index));
+ device_printf(sc->dev, "[%d]:[%d]: seqnum = 0x%x\n", intrq, slot,
+ (unsigned int)be32toh(sc->intr.q[intrq][slot].seqnum));
+ device_printf(sc->dev, "[%d]:[%d]: data0 = 0x%x\n", intrq, slot,
+ (unsigned int)be32toh(sc->intr.q[intrq][slot].data0));
+ device_printf(sc->dev, "[%d]:[%d]: data1 = 0x%x\n", intrq, slot,
+ (unsigned int)be32toh(sc->intr.q[intrq][slot].data1));
+
+ }
+ }
+
+}
+
+static inline void
+myri10ge_claim_irq(myri10ge_softc_t *sc)
+{
+ volatile uint32_t dontcare;
+
+
+ *sc->irq_claim = 0;
+ mb();
+
+ /* do a PIO read to ensure that PIO write to claim the irq has
+ hit the nic before we exit the interrupt handler */
+ if (!myri10ge_skip_pio_read) {
+ dontcare = *(volatile uint32_t *)sc->sram;
+ mb();
+ }
+}
+
+static void
+myri10ge_intr(void *arg)
+{
+ myri10ge_softc_t *sc = arg;
+ int intrq, claimed, flags, count, length, ip_csum;
+ uint32_t raw, slot;
+ uint8_t type;
+
+
+ intrq = sc->intr.intrq;
+ claimed = 0;
+ bus_dmamap_sync(sc->intr.dma[intrq].dmat,
+ sc->intr.dma[intrq].map, BUS_DMASYNC_POSTREAD);
+ if (sc->msi_enabled) {
+ /* We know we can immediately claim the interrupt */
+ myri10ge_claim_irq(sc);
+ claimed = 1;
+ } else {
+ /* Check to see if we have the last event in the queue
+ ready. If so, ack it as early as possible. This
+ allows more time to get the interrupt line
+ de-asserted prior to the EOI and reduces the chance
+ of seeing a spurious irq caused by the interrupt
+ line remaining high after EOI */
+
+ slot = be16toh(sc->intr.q[intrq][0].index) - 1;
+ if (slot < myri10ge_max_intr_slots &&
+ sc->intr.q[intrq][slot].type != 0 &&
+ sc->intr.q[intrq][slot].flag != 0) {
+ myri10ge_claim_irq(sc);
+ claimed = 1;
+ }
+ }
+
+ /* walk each slot in the current queue, processing events until
+ we reach an event with a zero type */
+ for (slot = sc->intr.slot; slot < myri10ge_max_intr_slots; slot++) {
+ type = sc->intr.q[intrq][slot].type;
+
+ /* check for partially completed DMA of events when
+ using non-MSI interrupts */
+ if (__predict_false(!claimed)) {
+ mb();
+ /* look if there is somscing in the queue */
+ if (type == 0) {
+ /* save the current slot for the next
+ * time we (re-)enter this routine */
+ if (sc->intr.slot == slot) {
+ sc->intr.spurious++;
+ }
+ sc->intr.slot = slot;
+ return;
+ }
+ }
+ if (__predict_false(htobe32(sc->intr.q[intrq][slot].seqnum) !=
+ sc->intr.seqnum++)) {
+ device_printf(sc->dev, "Bad interrupt!\n");
+ device_printf(sc->dev,
+ "bad irq seqno"
+ "(got 0x%x, expected 0x%x) \n",
+ (unsigned int)htobe32(sc->intr.q[intrq][slot].seqnum),
+ sc->intr.seqnum);
+ device_printf(sc->dev, "intrq = %d, slot = %d\n",
+ intrq, slot);
+ myri10ge_dump_interrupt_queues(sc, slot);
+ device_printf(sc->dev,
+ "Disabling futher interrupt handling\n");
+ bus_teardown_intr(sc->dev, sc->irq_res,
+ sc->ih);
+ sc->ih = NULL;
+ return;
+ }
+
+ switch (type) {
+ case MYRI10GE_MCP_INTR_ETHER_SEND_DONE:
+ myri10ge_tx_done(sc, be32toh(sc->intr.q[intrq][slot].data0));
+
+ if (__predict_true(sc->intr.q[intrq][slot].data1 == 0))
+ break;
+
+ /* check the link state. Don't bother to
+ * byteswap, since it can just be 0 or 1 */
+ if (sc->link_state != sc->fw_stats->link_up) {
+ sc->link_state = sc->fw_stats->link_up;
+ if (sc->link_state) {
+ if_link_state_change(sc->ifp,
+ LINK_STATE_UP);
+ device_printf(sc->dev,
+ "link up\n");
+ } else {
+ if_link_state_change(sc->ifp,
+ LINK_STATE_DOWN);
+ device_printf(sc->dev,
+ "link down\n");
+ }
+ }
+ if (sc->rdma_tags_available !=
+ be32toh(sc->fw_stats->rdma_tags_available)) {
+ sc->rdma_tags_available =
+ be32toh(sc->fw_stats->rdma_tags_available);
+ device_printf(sc->dev, "RDMA timed out!"
+ " %d tags left\n",
+ sc->rdma_tags_available);
+ }
+
+ break;
+
+
+ case MYRI10GE_MCP_INTR_ETHER_RECV_SMALL:
+ raw = be32toh(sc->intr.q[intrq][slot].data0);
+ count = 0xff & raw;
+ flags = raw >> 8;
+ raw = be32toh(sc->intr.q[intrq][slot].data1);
+ ip_csum = raw >> 16;
+ length = 0xffff & raw;
+ myri10ge_rx_done_small(sc, length, ip_csum,
+ flags);
+ break;
+
+ case MYRI10GE_MCP_INTR_ETHER_RECV_BIG:
+ raw = be32toh(sc->intr.q[intrq][slot].data0);
+ count = 0xff & raw;
+ flags = raw >> 8;
+ raw = be32toh(sc->intr.q[intrq][slot].data1);
+ ip_csum = raw >> 16;
+ length = 0xffff & raw;
+ myri10ge_rx_done_big(sc, length, ip_csum,
+ flags);
+
+ break;
+
+ case MYRI10GE_MCP_INTR_LINK_CHANGE:
+ /* not yet implemented in firmware */
+ break;
+
+ case MYRI10GE_MCP_INTR_ETHER_DOWN:
+ sc->down_cnt++;
+ wakeup(&sc->down_cnt);
+ break;
+
+ default:
+ device_printf(sc->dev, "Unknown interrupt type %d\n",
+ type);
+ }
+ sc->intr.q[intrq][slot].type = 0;
+ if (sc->intr.q[intrq][slot].flag != 0) {
+ if (!claimed) {
+ myri10ge_claim_irq(sc);
+ }
+ sc->intr.slot = 0;
+ sc->intr.q[intrq][slot].flag = 0;
+ sc->intr.intrq = ((intrq + 1) & 1);
+ return;
+ }
+ }
+
+ /* we should never be here unless we're on a shared irq and we have
+ not finished setting up the device */
+ return;
+}
+
+static void
+myri10ge_watchdog(struct ifnet *ifp)
+{
+ printf("%s called\n", __FUNCTION__);
+}
+
+static void
+myri10ge_init(void *arg)
+{
+}
+
+
+
+static void
+myri10ge_free_mbufs(myri10ge_softc_t *sc)
+{
+ int i;
+
+ for (i = 0; i <= sc->rx_big.mask; i++) {
+ if (sc->rx_big.info[i].m == NULL)
+ continue;
+ bus_dmamap_unload(sc->rx_big.dmat,
+ sc->rx_big.info[i].map);
+ m_freem(sc->rx_big.info[i].m);
+ sc->rx_big.info[i].m = NULL;
+ }
+
+ for (i = 0; i <= sc->rx_big.mask; i++) {
+ if (sc->rx_big.info[i].m == NULL)
+ continue;
+ bus_dmamap_unload(sc->rx_big.dmat,
+ sc->rx_big.info[i].map);
+ m_freem(sc->rx_big.info[i].m);
+ sc->rx_big.info[i].m = NULL;
+ }
+
+ for (i = 0; i <= sc->tx.mask; i++) {
+ if (sc->tx.info[i].m == NULL)
+ continue;
+ bus_dmamap_unload(sc->tx.dmat,
+ sc->tx.info[i].map);
+ m_freem(sc->tx.info[i].m);
+ sc->tx.info[i].m = NULL;
+ }
+}
+
+static void
+myri10ge_free_rings(myri10ge_softc_t *sc)
+{
+ int i;
+
+ if (sc->tx.req_bytes != NULL) {
+ free(sc->tx.req_bytes, M_DEVBUF);
+ }
+ if (sc->rx_small.shadow != NULL)
+ free(sc->rx_small.shadow, M_DEVBUF);
+ if (sc->rx_big.shadow != NULL)
+ free(sc->rx_big.shadow, M_DEVBUF);
+ if (sc->tx.info != NULL) {
+ for (i = 0; i <= sc->tx.mask; i++) {
+ if (sc->tx.info[i].map != NULL)
+ bus_dmamap_destroy(sc->tx.dmat,
+ sc->tx.info[i].map);
+ }
+ free(sc->tx.info, M_DEVBUF);
+ }
+ if (sc->rx_small.info != NULL) {
+ for (i = 0; i <= sc->rx_small.mask; i++) {
+ if (sc->rx_small.info[i].map != NULL)
+ bus_dmamap_destroy(sc->rx_small.dmat,
+ sc->rx_small.info[i].map);
+ }
+ free(sc->rx_small.info, M_DEVBUF);
+ }
+ if (sc->rx_big.info != NULL) {
+ for (i = 0; i <= sc->rx_big.mask; i++) {
+ if (sc->rx_big.info[i].map != NULL)
+ bus_dmamap_destroy(sc->rx_big.dmat,
+ sc->rx_big.info[i].map);
+ }
+ free(sc->rx_big.info, M_DEVBUF);
+ }
+ if (sc->rx_big.extra_map != NULL)
+ bus_dmamap_destroy(sc->rx_big.dmat,
+ sc->rx_big.extra_map);
+ if (sc->rx_small.extra_map != NULL)
+ bus_dmamap_destroy(sc->rx_small.dmat,
+ sc->rx_small.extra_map);
+ if (sc->tx.dmat != NULL)
+ bus_dma_tag_destroy(sc->tx.dmat);
+ if (sc->rx_small.dmat != NULL)
+ bus_dma_tag_destroy(sc->rx_small.dmat);
+ if (sc->rx_big.dmat != NULL)
+ bus_dma_tag_destroy(sc->rx_big.dmat);
+}
+
+static int
+myri10ge_alloc_rings(myri10ge_softc_t *sc)
+{
+ myri10ge_cmd_t cmd;
+ int tx_ring_size, rx_ring_size;
+ int tx_ring_entries, rx_ring_entries;
+ int i, err;
+ unsigned long bytes;
+
+ /* get ring sizes */
+ err = myri10ge_send_cmd(sc,
+ MYRI10GE_MCP_CMD_GET_SEND_RING_SIZE,
+ &cmd);
+ tx_ring_size = cmd.data0;
+ err |= myri10ge_send_cmd(sc,
+ MYRI10GE_MCP_CMD_GET_RX_RING_SIZE,
+ &cmd);
+ if (err != 0) {
+ device_printf(sc->dev, "Cannot determine ring sizes\n");
+ goto abort_with_nothing;
+ }
+
+ rx_ring_size = cmd.data0;
+
+ tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t);
+ rx_ring_entries = rx_ring_size / sizeof (mcp_dma_addr_t);
+ sc->ifp->if_snd.ifq_maxlen = tx_ring_entries - 1;
+ sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen;
+
+ sc->tx.mask = tx_ring_entries - 1;
+ sc->rx_small.mask = sc->rx_big.mask = rx_ring_entries - 1;
+
+ err = ENOMEM;
+
+ /* allocate the tx request copy block */
+ bytes = 8 +
+ sizeof (*sc->tx.req_list) * (MYRI10GE_MCP_ETHER_MAX_SEND_DESC + 4);
+ sc->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK);
+ if (sc->tx.req_bytes == NULL)
+ goto abort_with_nothing;
+ /* ensure req_list entries are aligned to 8 bytes */
+ sc->tx.req_list = (mcp_kreq_ether_send_t *)
+ ((unsigned long)(sc->tx.req_bytes + 7) & ~7UL);
+
+ /* allocate the rx shadow rings */
+ bytes = rx_ring_entries * sizeof (*sc->rx_small.shadow);
+ sc->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
+ if (sc->rx_small.shadow == NULL)
+ goto abort_with_alloc;
+
+ bytes = rx_ring_entries * sizeof (*sc->rx_big.shadow);
+ sc->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
+ if (sc->rx_big.shadow == NULL)
+ goto abort_with_alloc;
+
+ /* allocate the host info rings */
+ bytes = tx_ring_entries * sizeof (*sc->tx.info);
+ sc->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
+ if (sc->tx.info == NULL)
+ goto abort_with_alloc;
+
+ bytes = rx_ring_entries * sizeof (*sc->rx_small.info);
+ sc->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
+ if (sc->rx_small.info == NULL)
+ goto abort_with_alloc;
+
+ bytes = rx_ring_entries * sizeof (*sc->rx_big.info);
+ sc->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
+ if (sc->rx_big.info == NULL)
+ goto abort_with_alloc;
+
+ /* allocate the busdma resources */
+ err = bus_dma_tag_create(sc->parent_dmat, /* parent */
+ 1, /* alignment */
+ sc->tx.boundary, /* boundary */
+ BUS_SPACE_MAXADDR, /* low */
+ BUS_SPACE_MAXADDR, /* high */
+ NULL, NULL, /* filter */
+ MYRI10GE_MAX_ETHER_MTU,/* maxsize */
+ MYRI10GE_MCP_ETHER_MAX_SEND_DESC,/* num segs */
+ sc->tx.boundary, /* maxsegsize */
+ BUS_DMA_ALLOCNOW, /* flags */
+ NULL, NULL, /* lock */
+ &sc->tx.dmat); /* tag */
+
+ if (err != 0) {
+ device_printf(sc->dev, "Err %d allocating tx dmat\n",
+ err);
+ goto abort_with_alloc;
+ }
+
+ err = bus_dma_tag_create(sc->parent_dmat, /* parent */
+ 1, /* alignment */
+ 4096, /* boundary */
+ BUS_SPACE_MAXADDR, /* low */
+ BUS_SPACE_MAXADDR, /* high */
+ NULL, NULL, /* filter */
+ MHLEN, /* maxsize */
+ 1, /* num segs */
+ MHLEN, /* maxsegsize */
+ BUS_DMA_ALLOCNOW, /* flags */
+ NULL, NULL, /* lock */
+ &sc->rx_small.dmat); /* tag */
+ if (err != 0) {
+ device_printf(sc->dev, "Err %d allocating rx_small dmat\n",
+ err);
+ goto abort_with_alloc;
+ }
+
+ err = bus_dma_tag_create(sc->parent_dmat, /* parent */
+ 1, /* alignment */
+ 4096, /* boundary */
+ BUS_SPACE_MAXADDR, /* low */
+ BUS_SPACE_MAXADDR, /* high */
+ NULL, NULL, /* filter */
+ 4096, /* maxsize */
+ 1, /* num segs */
+ 4096, /* maxsegsize */
+ BUS_DMA_ALLOCNOW, /* flags */
+ NULL, NULL, /* lock */
+ &sc->rx_big.dmat); /* tag */
+ if (err != 0) {
+ device_printf(sc->dev, "Err %d allocating rx_big dmat\n",
+ err);
+ goto abort_with_alloc;
+ }
+
+ /* now use these tags to setup dmamaps for each slot
+ in each ring */
+ for (i = 0; i <= sc->tx.mask; i++) {
+ err = bus_dmamap_create(sc->tx.dmat, 0,
+ &sc->tx.info[i].map);
+ if (err != 0) {
+ device_printf(sc->dev, "Err %d tx dmamap\n",
+ err);
+ goto abort_with_alloc;
+ }
+ }
+ for (i = 0; i <= sc->rx_small.mask; i++) {
+ err = bus_dmamap_create(sc->rx_small.dmat, 0,
+ &sc->rx_small.info[i].map);
+ if (err != 0) {
+ device_printf(sc->dev, "Err %d rx_small dmamap\n",
+ err);
+ goto abort_with_alloc;
+ }
+ }
+ err = bus_dmamap_create(sc->rx_small.dmat, 0,
+ &sc->rx_small.extra_map);
+ if (err != 0) {
+ device_printf(sc->dev, "Err %d extra rx_small dmamap\n",
+ err);
+ goto abort_with_alloc;
+ }
+
+ for (i = 0; i <= sc->rx_big.mask; i++) {
+ err = bus_dmamap_create(sc->rx_big.dmat, 0,
+ &sc->rx_big.info[i].map);
+ if (err != 0) {
+ device_printf(sc->dev, "Err %d rx_big dmamap\n",
+ err);
+ goto abort_with_alloc;
+ }
+ }
+ err = bus_dmamap_create(sc->rx_big.dmat, 0,
+ &sc->rx_big.extra_map);
+ if (err != 0) {
+ device_printf(sc->dev, "Err %d extra rx_big dmamap\n",
+ err);
+ goto abort_with_alloc;
+ }
+ return 0;
+
+abort_with_alloc:
+ myri10ge_free_rings(sc);
+
+abort_with_nothing:
+ return err;
+}
+
+static int
+myri10ge_open(myri10ge_softc_t *sc)
+{
+ myri10ge_cmd_t cmd;
+ int i, err;
+ bus_dmamap_t map;
+
+
+ err = myri10ge_reset(sc);
+ if (err != 0) {
+ device_printf(sc->dev, "failed to reset\n");
+ return EIO;
+ }
+
+ if (MCLBYTES >=
+ sc->ifp->if_mtu + ETHER_HDR_LEN + MYRI10GE_MCP_ETHER_PAD)
+ sc->big_bytes = MCLBYTES;
+ else
+ sc->big_bytes = MJUMPAGESIZE;
+
+ err = myri10ge_alloc_rings(sc);
+ if (err != 0) {
+ device_printf(sc->dev, "failed to allocate rings\n");
+ return err;
+ }
+
+ err = bus_setup_intr(sc->dev, sc->irq_res,
+ INTR_TYPE_NET | INTR_MPSAFE,
+ myri10ge_intr, sc, &sc->ih);
+ if (err != 0) {
+ goto abort_with_rings;
+ }
+
+ /* get the lanai pointers to the send and receive rings */
+
+ err = myri10ge_send_cmd(sc, MYRI10GE_MCP_CMD_GET_SEND_OFFSET, &cmd);
+ sc->tx.lanai =
+ (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0);
+ err |= myri10ge_send_cmd(sc,
+ MYRI10GE_MCP_CMD_GET_SMALL_RX_OFFSET, &cmd);
+ sc->rx_small.lanai =
+ (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
+ err |= myri10ge_send_cmd(sc, MYRI10GE_MCP_CMD_GET_BIG_RX_OFFSET, &cmd);
+ sc->rx_big.lanai =
+ (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
+
+ if (err != 0) {
+ device_printf(sc->dev,
+ "failed to get ring sizes or locations\n");
+ err = EIO;
+ goto abort_with_irq;
+ }
+
+ if (sc->wc) {
+ sc->tx.wc_fifo = sc->sram + 0x200000;
+ sc->rx_small.wc_fifo = sc->sram + 0x300000;
+ sc->rx_big.wc_fifo = sc->sram + 0x340000;
+ } else {
+ sc->tx.wc_fifo = 0;
+ sc->rx_small.wc_fifo = 0;
+ sc->rx_big.wc_fifo = 0;
+ }
+
+
+ /* stock receive rings */
+ for (i = 0; i <= sc->rx_small.mask; i++) {
+ map = sc->rx_small.info[i].map;
+ err = myri10ge_get_buf_small(sc, map, i);
+ if (err) {
+ device_printf(sc->dev, "alloced %d/%d smalls\n",
+ i, sc->rx_small.mask + 1);
+ goto abort;
+ }
+ }
+ for (i = 0; i <= sc->rx_big.mask; i++) {
+ map = sc->rx_big.info[i].map;
+ err = myri10ge_get_buf_big(sc, map, i);
+ if (err) {
+ device_printf(sc->dev, "alloced %d/%d bigs\n",
+ i, sc->rx_big.mask + 1);
+ goto abort;
+ }
+ }
+
+ /* Give the firmware the mtu and the big and small buffer
+ sizes. The firmware wants the big buf size to be a power
+ of two. Luckily, FreeBSD's clusters are powers of two */
+ cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN;
+ err = myri10ge_send_cmd(sc, MYRI10GE_MCP_CMD_SET_MTU, &cmd);
+ cmd.data0 = MHLEN;
+ err |= myri10ge_send_cmd(sc,
+ MYRI10GE_MCP_CMD_SET_SMALL_BUFFER_SIZE,
+ &cmd);
+ cmd.data0 = sc->big_bytes;
+ err |= myri10ge_send_cmd(sc,
+ MYRI10GE_MCP_CMD_SET_BIG_BUFFER_SIZE,
+ &cmd);
+ /* Now give him the pointer to the stats block */
+ cmd.data0 = MYRI10GE_LOWPART_TO_U32(sc->fw_stats_dma.bus_addr);
+ cmd.data1 = MYRI10GE_HIGHPART_TO_U32(sc->fw_stats_dma.bus_addr);
+ err = myri10ge_send_cmd(sc, MYRI10GE_MCP_CMD_SET_STATS_DMA, &cmd);
+
+ if (err != 0) {
+ device_printf(sc->dev, "failed to setup params\n");
+ goto abort;
+ }
+
+ /* Finally, start the firmware running */
+ err = myri10ge_send_cmd(sc, MYRI10GE_MCP_CMD_ETHERNET_UP, &cmd);
+ if (err) {
+ device_printf(sc->dev, "Couldn't bring up link\n");
+ goto abort;
+ }
+ sc->ifp->if_drv_flags |= IFF_DRV_RUNNING;
+ sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
+
+ return 0;
+
+
+abort:
+ myri10ge_free_mbufs(sc);
+abort_with_irq:
+ bus_teardown_intr(sc->dev, sc->irq_res, sc->ih);
+abort_with_rings:
+ myri10ge_free_rings(sc);
+ return err;
+}
+
+static int
+myri10ge_close(myri10ge_softc_t *sc)
+{
+ myri10ge_cmd_t cmd;
+ int err, old_down_cnt;
+
+ sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+ old_down_cnt = sc->down_cnt;
+ mb();
+ err = myri10ge_send_cmd(sc, MYRI10GE_MCP_CMD_ETHERNET_DOWN, &cmd);
+ if (err) {
+ device_printf(sc->dev, "Couldn't bring down link\n");
+ }
+ if (old_down_cnt == sc->down_cnt) {
+ /* wait for down irq */
+ (void)tsleep(&sc->down_cnt, PWAIT, "down myri10ge", hz);
+ }
+ if (old_down_cnt == sc->down_cnt) {
+ device_printf(sc->dev, "never got down irq\n");
+ }
+ if (sc->ih != NULL)
+ bus_teardown_intr(sc->dev, sc->irq_res, sc->ih);
+ myri10ge_free_mbufs(sc);
+ myri10ge_free_rings(sc);
+ return 0;
+}
+
+
+static int
+myri10ge_media_change(struct ifnet *ifp)
+{
+ return EINVAL;
+}
+
+static int
+myri10ge_change_mtu(myri10ge_softc_t *sc, int mtu)
+{
+ struct ifnet *ifp = sc->ifp;
+ int real_mtu, old_mtu;
+ int err = 0;
+
+
+ real_mtu = mtu + ETHER_HDR_LEN;
+ if ((real_mtu > MYRI10GE_MAX_ETHER_MTU) ||
+ real_mtu < 60)
+ return EINVAL;
+ sx_xlock(&sc->driver_lock);
+ old_mtu = ifp->if_mtu;
+ ifp->if_mtu = mtu;
+ if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
+ myri10ge_close(sc);
+ err = myri10ge_open(sc);
+ if (err != 0) {
+ ifp->if_mtu = old_mtu;
+ myri10ge_close(sc);
+ (void) myri10ge_open(sc);
+ }
+ }
+ sx_xunlock(&sc->driver_lock);
+ return err;
+}
+
+static void
+myri10ge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
+{
+ myri10ge_softc_t *sc = ifp->if_softc;
+
+
+ if (sc == NULL)
+ return;
+ ifmr->ifm_status = IFM_AVALID;
+ ifmr->ifm_status |= sc->fw_stats->link_up ? IFM_ACTIVE : 0;
+ ifmr->ifm_active = IFM_AUTO | IFM_ETHER;
+ ifmr->ifm_active |= sc->fw_stats->link_up ? IFM_FDX : 0;
+}
+
+static int
+myri10ge_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
+{
+ myri10ge_softc_t *sc = ifp->if_softc;
+ struct ifreq *ifr = (struct ifreq *)data;
+ int err, mask;
+
+ err = 0;
+ switch (command) {
+ case SIOCSIFADDR:
+ case SIOCGIFADDR:
+ err = ether_ioctl(ifp, command, data);
+ break;
+
+ case SIOCSIFMTU:
+ err = myri10ge_change_mtu(sc, ifr->ifr_mtu);
+ break;
+
+ case SIOCSIFFLAGS:
+ sx_xlock(&sc->driver_lock);
+ if (ifp->if_flags & IFF_UP) {
+ if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
+ err = myri10ge_open(sc);
+ } else {
+ if (ifp->if_drv_flags & IFF_DRV_RUNNING)
+ myri10ge_close(sc);
+ }
+ sx_xunlock(&sc->driver_lock);
+ break;
+
+ case SIOCADDMULTI:
+ case SIOCDELMULTI:
+ err = 0;
+ break;
+
+ case SIOCSIFCAP:
+ sx_xlock(&sc->driver_lock);
+ mask = ifr->ifr_reqcap ^ ifp->if_capenable;
+ if (mask & IFCAP_TXCSUM) {
+ if (IFCAP_TXCSUM & ifp->if_capenable) {
+ ifp->if_capenable &= ~IFCAP_TXCSUM;
+ ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP);
+ } else {
+ ifp->if_capenable |= IFCAP_TXCSUM;
+ ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
+ }
+ } else if (mask & IFCAP_RXCSUM) {
+ if (IFCAP_RXCSUM & ifp->if_capenable) {
+ ifp->if_capenable &= ~IFCAP_RXCSUM;
+ sc->csum_flag &= ~MYRI10GE_MCP_ETHER_FLAGS_CKSUM;
+ } else {
+ ifp->if_capenable |= IFCAP_RXCSUM;
+ sc->csum_flag |= MYRI10GE_MCP_ETHER_FLAGS_CKSUM;
+ }
+ }
+ sx_xunlock(&sc->driver_lock);
+ break;
+
+ case SIOCGIFMEDIA:
+ err = ifmedia_ioctl(ifp, (struct ifreq *)data,
+ &sc->media, command);
+ break;
+
+ default:
+ err = ENOTTY;
+ }
+ return err;
+}
+
+static void
+myri10ge_fetch_tunables(myri10ge_softc_t *sc)
+{
+
+ TUNABLE_INT_FETCH("hw.myri10ge.flow_control_enabled",
+ &myri10ge_flow_control);
+ TUNABLE_INT_FETCH("hw.myri10ge.intr_coal_delay",
+ &myri10ge_intr_coal_delay);
+ TUNABLE_INT_FETCH("hw.myri10ge.nvidia_ecrc_enable",
+ &myri10ge_nvidia_ecrc_enable);
+ TUNABLE_INT_FETCH("hw.myri10ge.skip_pio_read",
+ &myri10ge_skip_pio_read);
+
+ if (myri10ge_intr_coal_delay < 0 ||
+ myri10ge_intr_coal_delay > 10*1000)
+ myri10ge_intr_coal_delay = 30;
+ sc->pause = myri10ge_flow_control;
+}
+
+static int
+myri10ge_attach(device_t dev)
+{
+ myri10ge_softc_t *sc = device_get_softc(dev);
+ struct ifnet *ifp;
+ size_t bytes;
+ int rid, err, i;
+ uint16_t cmd;
+
+ sc->dev = dev;
+ myri10ge_fetch_tunables(sc);
+
+ err = bus_dma_tag_create(NULL, /* parent */
+ 1, /* alignment */
+ 4096, /* boundary */
+ BUS_SPACE_MAXADDR, /* low */
+ BUS_SPACE_MAXADDR, /* high */
+ NULL, NULL, /* filter */
+ MYRI10GE_MAX_ETHER_MTU,/* maxsize */
+ MYRI10GE_MCP_ETHER_MAX_SEND_DESC, /* num segs */
+ 4096, /* maxsegsize */
+ 0, /* flags */
+ NULL, NULL, /* lock */
+ &sc->parent_dmat); /* tag */
+
+ if (err != 0) {
+ device_printf(sc->dev, "Err %d allocating parent dmat\n",
+ err);
+ goto abort_with_nothing;
+ }
+
+ ifp = sc->ifp = if_alloc(IFT_ETHER);
+ if (ifp == NULL) {
+ device_printf(dev, "can not if_alloc()\n");
+ err = ENOSPC;
+ goto abort_with_parent_dmat;
+ }
+ mtx_init(&sc->cmd_lock, NULL,
+ MTX_NETWORK_LOCK, MTX_DEF);
+ mtx_init(&sc->tx_lock, device_get_nameunit(dev),
+ MTX_NETWORK_LOCK, MTX_DEF);
+ sx_init(&sc->driver_lock, device_get_nameunit(dev));
+
+ /* Enable DMA and Memory space access */
+ pci_enable_busmaster(dev);
+ cmd = pci_read_config(dev, PCIR_COMMAND, 2);
+ cmd |= PCIM_CMD_MEMEN;
+ pci_write_config(dev, PCIR_COMMAND, cmd, 2);
+
+ /* Map the board into the kernel */
+ rid = PCIR_BARS;
+ sc->mem_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid, 0,
+ ~0, 1, RF_ACTIVE);
+ if (sc->mem_res == NULL) {
+ device_printf(dev, "could not map memory\n");
+ err = ENXIO;
+ goto abort_with_lock;
+ }
+ sc->sram = rman_get_virtual(sc->mem_res);
+ sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100;
+ if (sc->sram_size > rman_get_size(sc->mem_res)) {
+ device_printf(dev, "impossible memory region size %ld\n",
+ rman_get_size(sc->mem_res));
+ err = ENXIO;
+ goto abort_with_mem_res;
+ }
+
+ /* make NULL terminated copy of the EEPROM strings section of
+ lanai SRAM */
+ bzero(sc->eeprom_strings, MYRI10GE_EEPROM_STRINGS_SIZE);
+ bus_space_read_region_1(rman_get_bustag(sc->mem_res),
+ rman_get_bushandle(sc->mem_res),
+ sc->sram_size - MYRI10GE_EEPROM_STRINGS_SIZE,
+ sc->eeprom_strings,
+ MYRI10GE_EEPROM_STRINGS_SIZE - 2);
+ err = myri10ge_parse_strings(sc);
+ if (err != 0)
+ goto abort_with_mem_res;
+
+ /* Enable write combining for efficient use of PCIe bus */
+ myri10ge_enable_wc(sc);
+
+ /* Allocate the out of band dma memory */
+ err = myri10ge_dma_alloc(sc, &sc->cmd_dma,
+ sizeof (myri10ge_cmd_t), 64);
+ if (err != 0)
+ goto abort_with_mem_res;
+ sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr;
+ err = myri10ge_dma_alloc(sc, &sc->zeropad_dma, 64, 64);
+ if (err != 0)
+ goto abort_with_cmd_dma;
+
+ err = myri10ge_dma_alloc(sc, &sc->fw_stats_dma,
+ sizeof (*sc->fw_stats), 64);
+ if (err != 0)
+ goto abort_with_zeropad_dma;
+ sc->fw_stats = (mcp_stats_t *)sc->fw_stats_dma.addr;
+
+
+ /* allocate interrupt queues */
+ bytes = myri10ge_max_intr_slots * sizeof (*sc->intr.q[0]);
+ for (i = 0; i < MYRI10GE_NUM_INTRQS; i++) {
+ err = myri10ge_dma_alloc(sc, &sc->intr.dma[i],
+ bytes, 4096);
+ if (err != 0)
+ goto abort_with_intrq;
+ sc->intr.q[i] = (mcp_slot_t *)sc->intr.dma[i].addr;
+ }
+
+ /* Add our ithread */
+ rid = 0;
+ sc->irq_res = bus_alloc_resource(dev, SYS_RES_IRQ, &rid, 0, ~0,
+ 1, RF_SHAREABLE | RF_ACTIVE);
+ if (sc->irq_res == NULL) {
+ device_printf(dev, "could not alloc interrupt\n");
+ goto abort_with_intrq;
+ }
+
+ /* load the firmware */
+ myri10ge_select_firmware(sc);
+
+ err = myri10ge_load_firmware(sc);
+ if (err != 0)
+ goto abort_with_irq_res;
+ err = myri10ge_reset(sc);
+ if (err != 0)
+ goto abort_with_irq_res;
+
+ /* hook into the network stack */
+ if_initname(ifp, device_get_name(dev), device_get_unit(dev));
+ ifp->if_baudrate = 100000000;
+ ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM;
+ ifp->if_hwassist = CSUM_TCP | CSUM_UDP;
+ ifp->if_capenable = ifp->if_capabilities;
+ sc->csum_flag |= MYRI10GE_MCP_ETHER_FLAGS_CKSUM;
+ ifp->if_init = myri10ge_init;
+ ifp->if_softc = sc;
+ ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
+ ifp->if_ioctl = myri10ge_ioctl;
+ ifp->if_start = myri10ge_start;
+ ifp->if_watchdog = myri10ge_watchdog;
+ ether_ifattach(ifp, sc->mac_addr);
+ /* ether_ifattach sets mtu to 1500 */
+ ifp->if_mtu = MYRI10GE_MAX_ETHER_MTU - ETHER_HDR_LEN;
+
+ /* Initialise the ifmedia structure */
+ ifmedia_init(&sc->media, 0, myri10ge_media_change,
+ myri10ge_media_status);
+ ifmedia_add(&sc->media, IFM_ETHER|IFM_AUTO, 0, NULL);
+ myri10ge_add_sysctls(sc);
+ return 0;
+
+abort_with_irq_res:
+ bus_release_resource(dev, SYS_RES_IRQ, 0, sc->irq_res);
+abort_with_intrq:
+ for (i = 0; i < MYRI10GE_NUM_INTRQS; i++) {
+ if (sc->intr.q[i] == NULL)
+ continue;
+ sc->intr.q[i] = NULL;
+ myri10ge_dma_free(&sc->intr.dma[i]);
+ }
+ myri10ge_dma_free(&sc->fw_stats_dma);
+abort_with_zeropad_dma:
+ myri10ge_dma_free(&sc->zeropad_dma);
+abort_with_cmd_dma:
+ myri10ge_dma_free(&sc->cmd_dma);
+abort_with_mem_res:
+ bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res);
+abort_with_lock:
+ pci_disable_busmaster(dev);
+ mtx_destroy(&sc->cmd_lock);
+ mtx_destroy(&sc->tx_lock);
+ sx_destroy(&sc->driver_lock);
+ if_free(ifp);
+abort_with_parent_dmat:
+ bus_dma_tag_destroy(sc->parent_dmat);
+
+abort_with_nothing:
+ return err;
+}
+
+static int
+myri10ge_detach(device_t dev)
+{
+ myri10ge_softc_t *sc = device_get_softc(dev);
+ int i;
+
+ sx_xlock(&sc->driver_lock);
+ if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING)
+ myri10ge_close(sc);
+ sx_xunlock(&sc->driver_lock);
+ ether_ifdetach(sc->ifp);
+ bus_release_resource(dev, SYS_RES_IRQ, 0, sc->irq_res);
+ for (i = 0; i < MYRI10GE_NUM_INTRQS; i++) {
+ if (sc->intr.q[i] == NULL)
+ continue;
+ sc->intr.q[i] = NULL;
+ myri10ge_dma_free(&sc->intr.dma[i]);
+ }
+ myri10ge_dma_free(&sc->fw_stats_dma);
+ myri10ge_dma_free(&sc->zeropad_dma);
+ myri10ge_dma_free(&sc->cmd_dma);
+ bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res);
+ pci_disable_busmaster(dev);
+ mtx_destroy(&sc->cmd_lock);
+ mtx_destroy(&sc->tx_lock);
+ sx_destroy(&sc->driver_lock);
+ if_free(sc->ifp);
+ bus_dma_tag_destroy(sc->parent_dmat);
+ return 0;
+}
+
+static int
+myri10ge_shutdown(device_t dev)
+{
+ return 0;
+}
+
+/*
+ This file uses Myri10GE driver indentation.
+
+ Local Variables:
+ c-file-style:"linux"
+ tab-width:8
+ End:
+*/
OpenPOWER on IntegriCloud