summaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/hw
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/hw')
-rw-r--r--drivers/infiniband/hw/amso1100/Kbuild8
-rw-r--r--drivers/infiniband/hw/amso1100/Kconfig15
-rw-r--r--drivers/infiniband/hw/amso1100/c2.c1261
-rw-r--r--drivers/infiniband/hw/amso1100/c2.h550
-rw-r--r--drivers/infiniband/hw/amso1100/c2_ae.c319
-rw-r--r--drivers/infiniband/hw/amso1100/c2_ae.h108
-rw-r--r--drivers/infiniband/hw/amso1100/c2_alloc.c143
-rw-r--r--drivers/infiniband/hw/amso1100/c2_cm.c451
-rw-r--r--drivers/infiniband/hw/amso1100/c2_cq.c435
-rw-r--r--drivers/infiniband/hw/amso1100/c2_intr.c213
-rw-r--r--drivers/infiniband/hw/amso1100/c2_mm.c375
-rw-r--r--drivers/infiniband/hw/amso1100/c2_mq.c174
-rw-r--r--drivers/infiniband/hw/amso1100/c2_mq.h106
-rw-r--r--drivers/infiniband/hw/amso1100/c2_pd.c89
-rw-r--r--drivers/infiniband/hw/amso1100/c2_provider.c892
-rw-r--r--drivers/infiniband/hw/amso1100/c2_provider.h182
-rw-r--r--drivers/infiniband/hw/amso1100/c2_qp.c1015
-rw-r--r--drivers/infiniband/hw/amso1100/c2_rnic.c654
-rw-r--r--drivers/infiniband/hw/amso1100/c2_status.h158
-rw-r--r--drivers/infiniband/hw/amso1100/c2_user.h82
-rw-r--r--drivers/infiniband/hw/amso1100/c2_vq.c260
-rw-r--r--drivers/infiniband/hw/amso1100/c2_vq.h63
-rw-r--r--drivers/infiniband/hw/amso1100/c2_wr.h1520
-rw-r--r--drivers/infiniband/hw/cxgb3/Kconfig27
-rw-r--r--drivers/infiniband/hw/cxgb3/Makefile10
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_dbg.c206
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_hal.c1309
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_hal.h199
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_resource.c340
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_resource.h69
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_wr.h759
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch.c187
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch.h175
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cm.c2186
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cm.h231
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cq.c233
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_ev.c230
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_mem.c195
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.c1406
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.h361
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_qp.c1186
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_user.h66
-rw-r--r--drivers/infiniband/hw/cxgb3/tcb.h632
-rw-r--r--drivers/infiniband/hw/ehca/Kconfig9
-rw-r--r--drivers/infiniband/hw/ehca/Makefile16
-rw-r--r--drivers/infiniband/hw/ehca/ehca_av.c275
-rw-r--r--drivers/infiniband/hw/ehca/ehca_classes.h474
-rw-r--r--drivers/infiniband/hw/ehca/ehca_classes_pSeries.h236
-rw-r--r--drivers/infiniband/hw/ehca/ehca_cq.c402
-rw-r--r--drivers/infiniband/hw/ehca/ehca_eq.c186
-rw-r--r--drivers/infiniband/hw/ehca/ehca_hca.c408
-rw-r--r--drivers/infiniband/hw/ehca/ehca_irq.c943
-rw-r--r--drivers/infiniband/hw/ehca/ehca_irq.h77
-rw-r--r--drivers/infiniband/hw/ehca/ehca_iverbs.h212
-rw-r--r--drivers/infiniband/hw/ehca/ehca_main.c1082
-rw-r--r--drivers/infiniband/hw/ehca/ehca_mcast.c131
-rw-r--r--drivers/infiniband/hw/ehca/ehca_mrmw.c2186
-rw-r--r--drivers/infiniband/hw/ehca/ehca_mrmw.h121
-rw-r--r--drivers/infiniband/hw/ehca/ehca_pd.c122
-rw-r--r--drivers/infiniband/hw/ehca/ehca_qes.h260
-rw-r--r--drivers/infiniband/hw/ehca/ehca_qp.c2254
-rw-r--r--drivers/infiniband/hw/ehca/ehca_reqs.c956
-rw-r--r--drivers/infiniband/hw/ehca/ehca_sqp.c202
-rw-r--r--drivers/infiniband/hw/ehca/ehca_tools.h156
-rw-r--r--drivers/infiniband/hw/ehca/ehca_uverbs.c307
-rw-r--r--drivers/infiniband/hw/ehca/hcp_if.c945
-rw-r--r--drivers/infiniband/hw/ehca/hcp_if.h265
-rw-r--r--drivers/infiniband/hw/ehca/hcp_phyp.c80
-rw-r--r--drivers/infiniband/hw/ehca/hcp_phyp.h90
-rw-r--r--drivers/infiniband/hw/ehca/hipz_fns.h68
-rw-r--r--drivers/infiniband/hw/ehca/hipz_fns_core.h100
-rw-r--r--drivers/infiniband/hw/ehca/hipz_hw.h414
-rw-r--r--drivers/infiniband/hw/ehca/ipz_pt_fn.c284
-rw-r--r--drivers/infiniband/hw/ehca/ipz_pt_fn.h289
-rw-r--r--drivers/infiniband/hw/ipath/Kconfig9
-rw-r--r--drivers/infiniband/hw/ipath/Makefile41
-rw-r--r--drivers/infiniband/hw/ipath/ipath_7220.h57
-rw-r--r--drivers/infiniband/hw/ipath/ipath_common.h851
-rw-r--r--drivers/infiniband/hw/ipath/ipath_cq.c477
-rw-r--r--drivers/infiniband/hw/ipath/ipath_debug.h99
-rw-r--r--drivers/infiniband/hw/ipath/ipath_diag.c560
-rw-r--r--drivers/infiniband/hw/ipath/ipath_dma.c191
-rw-r--r--drivers/infiniband/hw/ipath/ipath_driver.c2807
-rw-r--r--drivers/infiniband/hw/ipath/ipath_eeprom.c1183
-rw-r--r--drivers/infiniband/hw/ipath/ipath_file_ops.c2613
-rw-r--r--drivers/infiniband/hw/ipath/ipath_fs.c426
-rw-r--r--drivers/infiniband/hw/ipath/ipath_iba6110.c1941
-rw-r--r--drivers/infiniband/hw/ipath/ipath_iba6120.c1801
-rw-r--r--drivers/infiniband/hw/ipath/ipath_iba7220.c2559
-rw-r--r--drivers/infiniband/hw/ipath/ipath_init_chip.c1075
-rw-r--r--drivers/infiniband/hw/ipath/ipath_intr.c1273
-rw-r--r--drivers/infiniband/hw/ipath/ipath_kernel.h1365
-rw-r--r--drivers/infiniband/hw/ipath/ipath_keys.c268
-rw-r--r--drivers/infiniband/hw/ipath/ipath_mad.c1638
-rw-r--r--drivers/infiniband/hw/ipath/ipath_mmap.c173
-rw-r--r--drivers/infiniband/hw/ipath/ipath_mr.c428
-rw-r--r--drivers/infiniband/hw/ipath/ipath_qp.c1062
-rw-r--r--drivers/infiniband/hw/ipath/ipath_rc.c1970
-rw-r--r--drivers/infiniband/hw/ipath/ipath_registers.h512
-rw-r--r--drivers/infiniband/hw/ipath/ipath_ruc.c733
-rw-r--r--drivers/infiniband/hw/ipath/ipath_sd7220.c1462
-rw-r--r--drivers/infiniband/hw/ipath/ipath_sd7220_img.c1082
-rw-r--r--drivers/infiniband/hw/ipath/ipath_sdma.c806
-rw-r--r--drivers/infiniband/hw/ipath/ipath_srq.c374
-rw-r--r--drivers/infiniband/hw/ipath/ipath_stats.c339
-rw-r--r--drivers/infiniband/hw/ipath/ipath_sysfs.c1236
-rw-r--r--drivers/infiniband/hw/ipath/ipath_uc.c547
-rw-r--r--drivers/infiniband/hw/ipath/ipath_ud.c596
-rw-r--r--drivers/infiniband/hw/ipath/ipath_user_pages.c228
-rw-r--r--drivers/infiniband/hw/ipath/ipath_user_sdma.c879
-rw-r--r--drivers/infiniband/hw/ipath/ipath_user_sdma.h52
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs.c2338
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs.h935
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs_mcast.c362
-rw-r--r--drivers/infiniband/hw/ipath/ipath_wc_ppc64.c62
-rw-r--r--drivers/infiniband/hw/ipath/ipath_wc_x86_64.c184
-rw-r--r--drivers/infiniband/hw/mlx4/Kconfig8
-rw-r--r--drivers/infiniband/hw/mlx4/Makefile3
-rw-r--r--drivers/infiniband/hw/mlx4/ah.c100
-rw-r--r--drivers/infiniband/hw/mlx4/cq.c803
-rw-r--r--drivers/infiniband/hw/mlx4/doorbell.c98
-rw-r--r--drivers/infiniband/hw/mlx4/mad.c340
-rw-r--r--drivers/infiniband/hw/mlx4/main.c757
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h320
-rw-r--r--drivers/infiniband/hw/mlx4/mr.c351
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c1944
-rw-r--r--drivers/infiniband/hw/mlx4/srq.c353
-rw-r--r--drivers/infiniband/hw/mlx4/user.h97
-rw-r--r--drivers/infiniband/hw/mthca/Kconfig17
-rw-r--r--drivers/infiniband/hw/mthca/Makefile7
-rw-r--r--drivers/infiniband/hw/mthca/mthca_allocator.c301
-rw-r--r--drivers/infiniband/hw/mthca/mthca_av.c374
-rw-r--r--drivers/infiniband/hw/mthca/mthca_catas.c192
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cmd.c1937
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cmd.h330
-rw-r--r--drivers/infiniband/hw/mthca/mthca_config_reg.h50
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cq.c993
-rw-r--r--drivers/infiniband/hw/mthca/mthca_dev.h594
-rw-r--r--drivers/infiniband/hw/mthca/mthca_doorbell.h109
-rw-r--r--drivers/infiniband/hw/mthca/mthca_eq.c918
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mad.c333
-rw-r--r--drivers/infiniband/hw/mthca/mthca_main.c1346
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mcg.c391
-rw-r--r--drivers/infiniband/hw/mthca/mthca_memfree.c774
-rw-r--r--drivers/infiniband/hw/mthca/mthca_memfree.h179
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mr.c985
-rw-r--r--drivers/infiniband/hw/mthca/mthca_pd.c81
-rw-r--r--drivers/infiniband/hw/mthca/mthca_profile.c285
-rw-r--r--drivers/infiniband/hw/mthca/mthca_profile.h59
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.c1424
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.h343
-rw-r--r--drivers/infiniband/hw/mthca/mthca_qp.c2323
-rw-r--r--drivers/infiniband/hw/mthca/mthca_reset.c289
-rw-r--r--drivers/infiniband/hw/mthca/mthca_srq.c715
-rw-r--r--drivers/infiniband/hw/mthca/mthca_uar.c78
-rw-r--r--drivers/infiniband/hw/mthca/mthca_user.h112
-rw-r--r--drivers/infiniband/hw/mthca/mthca_wqe.h131
-rw-r--r--drivers/infiniband/hw/nes/Kconfig17
-rw-r--r--drivers/infiniband/hw/nes/Makefile3
-rw-r--r--drivers/infiniband/hw/nes/nes.c1229
-rw-r--r--drivers/infiniband/hw/nes/nes.h548
-rw-r--r--drivers/infiniband/hw/nes/nes_cm.c3384
-rw-r--r--drivers/infiniband/hw/nes/nes_cm.h415
-rw-r--r--drivers/infiniband/hw/nes/nes_context.h193
-rw-r--r--drivers/infiniband/hw/nes/nes_hw.c3387
-rw-r--r--drivers/infiniband/hw/nes/nes_hw.h1222
-rw-r--r--drivers/infiniband/hw/nes/nes_nic.c1806
-rw-r--r--drivers/infiniband/hw/nes/nes_user.h112
-rw-r--r--drivers/infiniband/hw/nes/nes_utils.c938
-rw-r--r--drivers/infiniband/hw/nes/nes_verbs.c3801
-rw-r--r--drivers/infiniband/hw/nes/nes_verbs.h170
171 files changed, 108293 insertions, 0 deletions
diff --git a/drivers/infiniband/hw/amso1100/Kbuild b/drivers/infiniband/hw/amso1100/Kbuild
new file mode 100644
index 0000000..06964c4
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/Kbuild
@@ -0,0 +1,8 @@
+ifdef CONFIG_INFINIBAND_AMSO1100_DEBUG
+EXTRA_CFLAGS += -DDEBUG
+endif
+
+obj-$(CONFIG_INFINIBAND_AMSO1100) += iw_c2.o
+
+iw_c2-y := c2.o c2_provider.o c2_rnic.o c2_alloc.o c2_mq.o c2_ae.o c2_vq.o \
+ c2_intr.o c2_cq.o c2_qp.o c2_cm.o c2_mm.o c2_pd.o
diff --git a/drivers/infiniband/hw/amso1100/Kconfig b/drivers/infiniband/hw/amso1100/Kconfig
new file mode 100644
index 0000000..e6ce5f2
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/Kconfig
@@ -0,0 +1,15 @@
+config INFINIBAND_AMSO1100
+ tristate "Ammasso 1100 HCA support"
+ depends on PCI && INET
+ ---help---
+ This is a low-level driver for the Ammasso 1100 host
+ channel adapter (HCA).
+
+config INFINIBAND_AMSO1100_DEBUG
+ bool "Verbose debugging output"
+ depends on INFINIBAND_AMSO1100
+ default n
+ ---help---
+ This option causes the amso1100 driver to produce a bunch of
+ debug messages. Select this if you are developing the driver
+ or trying to diagnose a problem.
diff --git a/drivers/infiniband/hw/amso1100/c2.c b/drivers/infiniband/hw/amso1100/c2.c
new file mode 100644
index 0000000..113f3c0
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2.c
@@ -0,0 +1,1261 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/delay.h>
+#include <linux/ethtool.h>
+#include <linux/mii.h>
+#include <linux/if_vlan.h>
+#include <linux/crc32.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/init.h>
+#include <linux/dma-mapping.h>
+
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/byteorder.h>
+
+#include <rdma/ib_smi.h>
+#include "c2.h"
+#include "c2_provider.h"
+
+MODULE_AUTHOR("Tom Tucker <tom@opengridcomputing.com>");
+MODULE_DESCRIPTION("Ammasso AMSO1100 Low-level iWARP Driver");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_VERSION(DRV_VERSION);
+
+static const u32 default_msg = NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK
+ | NETIF_MSG_IFUP | NETIF_MSG_IFDOWN;
+
+static int debug = -1; /* defaults above */
+module_param(debug, int, 0);
+MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
+
+static int c2_up(struct net_device *netdev);
+static int c2_down(struct net_device *netdev);
+static int c2_xmit_frame(struct sk_buff *skb, struct net_device *netdev);
+static void c2_tx_interrupt(struct net_device *netdev);
+static void c2_rx_interrupt(struct net_device *netdev);
+static irqreturn_t c2_interrupt(int irq, void *dev_id);
+static void c2_tx_timeout(struct net_device *netdev);
+static int c2_change_mtu(struct net_device *netdev, int new_mtu);
+static void c2_reset(struct c2_port *c2_port);
+static struct net_device_stats *c2_get_stats(struct net_device *netdev);
+
+static struct pci_device_id c2_pci_table[] = {
+ { PCI_DEVICE(0x18b8, 0xb001) },
+ { 0 }
+};
+
+MODULE_DEVICE_TABLE(pci, c2_pci_table);
+
+static void c2_print_macaddr(struct net_device *netdev)
+{
+ pr_debug("%s: MAC %02X:%02X:%02X:%02X:%02X:%02X, "
+ "IRQ %u\n", netdev->name,
+ netdev->dev_addr[0], netdev->dev_addr[1], netdev->dev_addr[2],
+ netdev->dev_addr[3], netdev->dev_addr[4], netdev->dev_addr[5],
+ netdev->irq);
+}
+
+static void c2_set_rxbufsize(struct c2_port *c2_port)
+{
+ struct net_device *netdev = c2_port->netdev;
+
+ if (netdev->mtu > RX_BUF_SIZE)
+ c2_port->rx_buf_size =
+ netdev->mtu + ETH_HLEN + sizeof(struct c2_rxp_hdr) +
+ NET_IP_ALIGN;
+ else
+ c2_port->rx_buf_size = sizeof(struct c2_rxp_hdr) + RX_BUF_SIZE;
+}
+
+/*
+ * Allocate TX ring elements and chain them together.
+ * One-to-one association of adapter descriptors with ring elements.
+ */
+static int c2_tx_ring_alloc(struct c2_ring *tx_ring, void *vaddr,
+ dma_addr_t base, void __iomem * mmio_txp_ring)
+{
+ struct c2_tx_desc *tx_desc;
+ struct c2_txp_desc __iomem *txp_desc;
+ struct c2_element *elem;
+ int i;
+
+ tx_ring->start = kmalloc(sizeof(*elem) * tx_ring->count, GFP_KERNEL);
+ if (!tx_ring->start)
+ return -ENOMEM;
+
+ elem = tx_ring->start;
+ tx_desc = vaddr;
+ txp_desc = mmio_txp_ring;
+ for (i = 0; i < tx_ring->count; i++, elem++, tx_desc++, txp_desc++) {
+ tx_desc->len = 0;
+ tx_desc->status = 0;
+
+ /* Set TXP_HTXD_UNINIT */
+ __raw_writeq((__force u64) cpu_to_be64(0x1122334455667788ULL),
+ (void __iomem *) txp_desc + C2_TXP_ADDR);
+ __raw_writew(0, (void __iomem *) txp_desc + C2_TXP_LEN);
+ __raw_writew((__force u16) cpu_to_be16(TXP_HTXD_UNINIT),
+ (void __iomem *) txp_desc + C2_TXP_FLAGS);
+
+ elem->skb = NULL;
+ elem->ht_desc = tx_desc;
+ elem->hw_desc = txp_desc;
+
+ if (i == tx_ring->count - 1) {
+ elem->next = tx_ring->start;
+ tx_desc->next_offset = base;
+ } else {
+ elem->next = elem + 1;
+ tx_desc->next_offset =
+ base + (i + 1) * sizeof(*tx_desc);
+ }
+ }
+
+ tx_ring->to_use = tx_ring->to_clean = tx_ring->start;
+
+ return 0;
+}
+
+/*
+ * Allocate RX ring elements and chain them together.
+ * One-to-one association of adapter descriptors with ring elements.
+ */
+static int c2_rx_ring_alloc(struct c2_ring *rx_ring, void *vaddr,
+ dma_addr_t base, void __iomem * mmio_rxp_ring)
+{
+ struct c2_rx_desc *rx_desc;
+ struct c2_rxp_desc __iomem *rxp_desc;
+ struct c2_element *elem;
+ int i;
+
+ rx_ring->start = kmalloc(sizeof(*elem) * rx_ring->count, GFP_KERNEL);
+ if (!rx_ring->start)
+ return -ENOMEM;
+
+ elem = rx_ring->start;
+ rx_desc = vaddr;
+ rxp_desc = mmio_rxp_ring;
+ for (i = 0; i < rx_ring->count; i++, elem++, rx_desc++, rxp_desc++) {
+ rx_desc->len = 0;
+ rx_desc->status = 0;
+
+ /* Set RXP_HRXD_UNINIT */
+ __raw_writew((__force u16) cpu_to_be16(RXP_HRXD_OK),
+ (void __iomem *) rxp_desc + C2_RXP_STATUS);
+ __raw_writew(0, (void __iomem *) rxp_desc + C2_RXP_COUNT);
+ __raw_writew(0, (void __iomem *) rxp_desc + C2_RXP_LEN);
+ __raw_writeq((__force u64) cpu_to_be64(0x99aabbccddeeffULL),
+ (void __iomem *) rxp_desc + C2_RXP_ADDR);
+ __raw_writew((__force u16) cpu_to_be16(RXP_HRXD_UNINIT),
+ (void __iomem *) rxp_desc + C2_RXP_FLAGS);
+
+ elem->skb = NULL;
+ elem->ht_desc = rx_desc;
+ elem->hw_desc = rxp_desc;
+
+ if (i == rx_ring->count - 1) {
+ elem->next = rx_ring->start;
+ rx_desc->next_offset = base;
+ } else {
+ elem->next = elem + 1;
+ rx_desc->next_offset =
+ base + (i + 1) * sizeof(*rx_desc);
+ }
+ }
+
+ rx_ring->to_use = rx_ring->to_clean = rx_ring->start;
+
+ return 0;
+}
+
+/* Setup buffer for receiving */
+static inline int c2_rx_alloc(struct c2_port *c2_port, struct c2_element *elem)
+{
+ struct c2_dev *c2dev = c2_port->c2dev;
+ struct c2_rx_desc *rx_desc = elem->ht_desc;
+ struct sk_buff *skb;
+ dma_addr_t mapaddr;
+ u32 maplen;
+ struct c2_rxp_hdr *rxp_hdr;
+
+ skb = dev_alloc_skb(c2_port->rx_buf_size);
+ if (unlikely(!skb)) {
+ pr_debug("%s: out of memory for receive\n",
+ c2_port->netdev->name);
+ return -ENOMEM;
+ }
+
+ /* Zero out the rxp hdr in the sk_buff */
+ memset(skb->data, 0, sizeof(*rxp_hdr));
+
+ skb->dev = c2_port->netdev;
+
+ maplen = c2_port->rx_buf_size;
+ mapaddr =
+ pci_map_single(c2dev->pcidev, skb->data, maplen,
+ PCI_DMA_FROMDEVICE);
+
+ /* Set the sk_buff RXP_header to RXP_HRXD_READY */
+ rxp_hdr = (struct c2_rxp_hdr *) skb->data;
+ rxp_hdr->flags = RXP_HRXD_READY;
+
+ __raw_writew(0, elem->hw_desc + C2_RXP_STATUS);
+ __raw_writew((__force u16) cpu_to_be16((u16) maplen - sizeof(*rxp_hdr)),
+ elem->hw_desc + C2_RXP_LEN);
+ __raw_writeq((__force u64) cpu_to_be64(mapaddr), elem->hw_desc + C2_RXP_ADDR);
+ __raw_writew((__force u16) cpu_to_be16(RXP_HRXD_READY),
+ elem->hw_desc + C2_RXP_FLAGS);
+
+ elem->skb = skb;
+ elem->mapaddr = mapaddr;
+ elem->maplen = maplen;
+ rx_desc->len = maplen;
+
+ return 0;
+}
+
+/*
+ * Allocate buffers for the Rx ring
+ * For receive: rx_ring.to_clean is next received frame
+ */
+static int c2_rx_fill(struct c2_port *c2_port)
+{
+ struct c2_ring *rx_ring = &c2_port->rx_ring;
+ struct c2_element *elem;
+ int ret = 0;
+
+ elem = rx_ring->start;
+ do {
+ if (c2_rx_alloc(c2_port, elem)) {
+ ret = 1;
+ break;
+ }
+ } while ((elem = elem->next) != rx_ring->start);
+
+ rx_ring->to_clean = rx_ring->start;
+ return ret;
+}
+
+/* Free all buffers in RX ring, assumes receiver stopped */
+static void c2_rx_clean(struct c2_port *c2_port)
+{
+ struct c2_dev *c2dev = c2_port->c2dev;
+ struct c2_ring *rx_ring = &c2_port->rx_ring;
+ struct c2_element *elem;
+ struct c2_rx_desc *rx_desc;
+
+ elem = rx_ring->start;
+ do {
+ rx_desc = elem->ht_desc;
+ rx_desc->len = 0;
+
+ __raw_writew(0, elem->hw_desc + C2_RXP_STATUS);
+ __raw_writew(0, elem->hw_desc + C2_RXP_COUNT);
+ __raw_writew(0, elem->hw_desc + C2_RXP_LEN);
+ __raw_writeq((__force u64) cpu_to_be64(0x99aabbccddeeffULL),
+ elem->hw_desc + C2_RXP_ADDR);
+ __raw_writew((__force u16) cpu_to_be16(RXP_HRXD_UNINIT),
+ elem->hw_desc + C2_RXP_FLAGS);
+
+ if (elem->skb) {
+ pci_unmap_single(c2dev->pcidev, elem->mapaddr,
+ elem->maplen, PCI_DMA_FROMDEVICE);
+ dev_kfree_skb(elem->skb);
+ elem->skb = NULL;
+ }
+ } while ((elem = elem->next) != rx_ring->start);
+}
+
+static inline int c2_tx_free(struct c2_dev *c2dev, struct c2_element *elem)
+{
+ struct c2_tx_desc *tx_desc = elem->ht_desc;
+
+ tx_desc->len = 0;
+
+ pci_unmap_single(c2dev->pcidev, elem->mapaddr, elem->maplen,
+ PCI_DMA_TODEVICE);
+
+ if (elem->skb) {
+ dev_kfree_skb_any(elem->skb);
+ elem->skb = NULL;
+ }
+
+ return 0;
+}
+
+/* Free all buffers in TX ring, assumes transmitter stopped */
+static void c2_tx_clean(struct c2_port *c2_port)
+{
+ struct c2_ring *tx_ring = &c2_port->tx_ring;
+ struct c2_element *elem;
+ struct c2_txp_desc txp_htxd;
+ int retry;
+ unsigned long flags;
+
+ spin_lock_irqsave(&c2_port->tx_lock, flags);
+
+ elem = tx_ring->start;
+
+ do {
+ retry = 0;
+ do {
+ txp_htxd.flags =
+ readw(elem->hw_desc + C2_TXP_FLAGS);
+
+ if (txp_htxd.flags == TXP_HTXD_READY) {
+ retry = 1;
+ __raw_writew(0,
+ elem->hw_desc + C2_TXP_LEN);
+ __raw_writeq(0,
+ elem->hw_desc + C2_TXP_ADDR);
+ __raw_writew((__force u16) cpu_to_be16(TXP_HTXD_DONE),
+ elem->hw_desc + C2_TXP_FLAGS);
+ c2_port->netstats.tx_dropped++;
+ break;
+ } else {
+ __raw_writew(0,
+ elem->hw_desc + C2_TXP_LEN);
+ __raw_writeq((__force u64) cpu_to_be64(0x1122334455667788ULL),
+ elem->hw_desc + C2_TXP_ADDR);
+ __raw_writew((__force u16) cpu_to_be16(TXP_HTXD_UNINIT),
+ elem->hw_desc + C2_TXP_FLAGS);
+ }
+
+ c2_tx_free(c2_port->c2dev, elem);
+
+ } while ((elem = elem->next) != tx_ring->start);
+ } while (retry);
+
+ c2_port->tx_avail = c2_port->tx_ring.count - 1;
+ c2_port->c2dev->cur_tx = tx_ring->to_use - tx_ring->start;
+
+ if (c2_port->tx_avail > MAX_SKB_FRAGS + 1)
+ netif_wake_queue(c2_port->netdev);
+
+ spin_unlock_irqrestore(&c2_port->tx_lock, flags);
+}
+
+/*
+ * Process transmit descriptors marked 'DONE' by the firmware,
+ * freeing up their unneeded sk_buffs.
+ */
+static void c2_tx_interrupt(struct net_device *netdev)
+{
+ struct c2_port *c2_port = netdev_priv(netdev);
+ struct c2_dev *c2dev = c2_port->c2dev;
+ struct c2_ring *tx_ring = &c2_port->tx_ring;
+ struct c2_element *elem;
+ struct c2_txp_desc txp_htxd;
+
+ spin_lock(&c2_port->tx_lock);
+
+ for (elem = tx_ring->to_clean; elem != tx_ring->to_use;
+ elem = elem->next) {
+ txp_htxd.flags =
+ be16_to_cpu((__force __be16) readw(elem->hw_desc + C2_TXP_FLAGS));
+
+ if (txp_htxd.flags != TXP_HTXD_DONE)
+ break;
+
+ if (netif_msg_tx_done(c2_port)) {
+ /* PCI reads are expensive in fast path */
+ txp_htxd.len =
+ be16_to_cpu((__force __be16) readw(elem->hw_desc + C2_TXP_LEN));
+ pr_debug("%s: tx done slot %3Zu status 0x%x len "
+ "%5u bytes\n",
+ netdev->name, elem - tx_ring->start,
+ txp_htxd.flags, txp_htxd.len);
+ }
+
+ c2_tx_free(c2dev, elem);
+ ++(c2_port->tx_avail);
+ }
+
+ tx_ring->to_clean = elem;
+
+ if (netif_queue_stopped(netdev)
+ && c2_port->tx_avail > MAX_SKB_FRAGS + 1)
+ netif_wake_queue(netdev);
+
+ spin_unlock(&c2_port->tx_lock);
+}
+
+static void c2_rx_error(struct c2_port *c2_port, struct c2_element *elem)
+{
+ struct c2_rx_desc *rx_desc = elem->ht_desc;
+ struct c2_rxp_hdr *rxp_hdr = (struct c2_rxp_hdr *) elem->skb->data;
+
+ if (rxp_hdr->status != RXP_HRXD_OK ||
+ rxp_hdr->len > (rx_desc->len - sizeof(*rxp_hdr))) {
+ pr_debug("BAD RXP_HRXD\n");
+ pr_debug(" rx_desc : %p\n", rx_desc);
+ pr_debug(" index : %Zu\n",
+ elem - c2_port->rx_ring.start);
+ pr_debug(" len : %u\n", rx_desc->len);
+ pr_debug(" rxp_hdr : %p [PA %p]\n", rxp_hdr,
+ (void *) __pa((unsigned long) rxp_hdr));
+ pr_debug(" flags : 0x%x\n", rxp_hdr->flags);
+ pr_debug(" status: 0x%x\n", rxp_hdr->status);
+ pr_debug(" len : %u\n", rxp_hdr->len);
+ pr_debug(" rsvd : 0x%x\n", rxp_hdr->rsvd);
+ }
+
+ /* Setup the skb for reuse since we're dropping this pkt */
+ elem->skb->data = elem->skb->head;
+ skb_reset_tail_pointer(elem->skb);
+
+ /* Zero out the rxp hdr in the sk_buff */
+ memset(elem->skb->data, 0, sizeof(*rxp_hdr));
+
+ /* Write the descriptor to the adapter's rx ring */
+ __raw_writew(0, elem->hw_desc + C2_RXP_STATUS);
+ __raw_writew(0, elem->hw_desc + C2_RXP_COUNT);
+ __raw_writew((__force u16) cpu_to_be16((u16) elem->maplen - sizeof(*rxp_hdr)),
+ elem->hw_desc + C2_RXP_LEN);
+ __raw_writeq((__force u64) cpu_to_be64(elem->mapaddr),
+ elem->hw_desc + C2_RXP_ADDR);
+ __raw_writew((__force u16) cpu_to_be16(RXP_HRXD_READY),
+ elem->hw_desc + C2_RXP_FLAGS);
+
+ pr_debug("packet dropped\n");
+ c2_port->netstats.rx_dropped++;
+}
+
+static void c2_rx_interrupt(struct net_device *netdev)
+{
+ struct c2_port *c2_port = netdev_priv(netdev);
+ struct c2_dev *c2dev = c2_port->c2dev;
+ struct c2_ring *rx_ring = &c2_port->rx_ring;
+ struct c2_element *elem;
+ struct c2_rx_desc *rx_desc;
+ struct c2_rxp_hdr *rxp_hdr;
+ struct sk_buff *skb;
+ dma_addr_t mapaddr;
+ u32 maplen, buflen;
+ unsigned long flags;
+
+ spin_lock_irqsave(&c2dev->lock, flags);
+
+ /* Begin where we left off */
+ rx_ring->to_clean = rx_ring->start + c2dev->cur_rx;
+
+ for (elem = rx_ring->to_clean; elem->next != rx_ring->to_clean;
+ elem = elem->next) {
+ rx_desc = elem->ht_desc;
+ mapaddr = elem->mapaddr;
+ maplen = elem->maplen;
+ skb = elem->skb;
+ rxp_hdr = (struct c2_rxp_hdr *) skb->data;
+
+ if (rxp_hdr->flags != RXP_HRXD_DONE)
+ break;
+ buflen = rxp_hdr->len;
+
+ /* Sanity check the RXP header */
+ if (rxp_hdr->status != RXP_HRXD_OK ||
+ buflen > (rx_desc->len - sizeof(*rxp_hdr))) {
+ c2_rx_error(c2_port, elem);
+ continue;
+ }
+
+ /*
+ * Allocate and map a new skb for replenishing the host
+ * RX desc
+ */
+ if (c2_rx_alloc(c2_port, elem)) {
+ c2_rx_error(c2_port, elem);
+ continue;
+ }
+
+ /* Unmap the old skb */
+ pci_unmap_single(c2dev->pcidev, mapaddr, maplen,
+ PCI_DMA_FROMDEVICE);
+
+ prefetch(skb->data);
+
+ /*
+ * Skip past the leading 8 bytes comprising of the
+ * "struct c2_rxp_hdr", prepended by the adapter
+ * to the usual Ethernet header ("struct ethhdr"),
+ * to the start of the raw Ethernet packet.
+ *
+ * Fix up the various fields in the sk_buff before
+ * passing it up to netif_rx(). The transfer size
+ * (in bytes) specified by the adapter len field of
+ * the "struct rxp_hdr_t" does NOT include the
+ * "sizeof(struct c2_rxp_hdr)".
+ */
+ skb->data += sizeof(*rxp_hdr);
+ skb_set_tail_pointer(skb, buflen);
+ skb->len = buflen;
+ skb->protocol = eth_type_trans(skb, netdev);
+
+ netif_rx(skb);
+
+ netdev->last_rx = jiffies;
+ c2_port->netstats.rx_packets++;
+ c2_port->netstats.rx_bytes += buflen;
+ }
+
+ /* Save where we left off */
+ rx_ring->to_clean = elem;
+ c2dev->cur_rx = elem - rx_ring->start;
+ C2_SET_CUR_RX(c2dev, c2dev->cur_rx);
+
+ spin_unlock_irqrestore(&c2dev->lock, flags);
+}
+
+/*
+ * Handle netisr0 TX & RX interrupts.
+ */
+static irqreturn_t c2_interrupt(int irq, void *dev_id)
+{
+ unsigned int netisr0, dmaisr;
+ int handled = 0;
+ struct c2_dev *c2dev = (struct c2_dev *) dev_id;
+
+ /* Process CCILNET interrupts */
+ netisr0 = readl(c2dev->regs + C2_NISR0);
+ if (netisr0) {
+
+ /*
+ * There is an issue with the firmware that always
+ * provides the status of RX for both TX & RX
+ * interrupts. So process both queues here.
+ */
+ c2_rx_interrupt(c2dev->netdev);
+ c2_tx_interrupt(c2dev->netdev);
+
+ /* Clear the interrupt */
+ writel(netisr0, c2dev->regs + C2_NISR0);
+ handled++;
+ }
+
+ /* Process RNIC interrupts */
+ dmaisr = readl(c2dev->regs + C2_DISR);
+ if (dmaisr) {
+ writel(dmaisr, c2dev->regs + C2_DISR);
+ c2_rnic_interrupt(c2dev);
+ handled++;
+ }
+
+ if (handled) {
+ return IRQ_HANDLED;
+ } else {
+ return IRQ_NONE;
+ }
+}
+
+static int c2_up(struct net_device *netdev)
+{
+ struct c2_port *c2_port = netdev_priv(netdev);
+ struct c2_dev *c2dev = c2_port->c2dev;
+ struct c2_element *elem;
+ struct c2_rxp_hdr *rxp_hdr;
+ struct in_device *in_dev;
+ size_t rx_size, tx_size;
+ int ret, i;
+ unsigned int netimr0;
+
+ if (netif_msg_ifup(c2_port))
+ pr_debug("%s: enabling interface\n", netdev->name);
+
+ /* Set the Rx buffer size based on MTU */
+ c2_set_rxbufsize(c2_port);
+
+ /* Allocate DMA'able memory for Tx/Rx host descriptor rings */
+ rx_size = c2_port->rx_ring.count * sizeof(struct c2_rx_desc);
+ tx_size = c2_port->tx_ring.count * sizeof(struct c2_tx_desc);
+
+ c2_port->mem_size = tx_size + rx_size;
+ c2_port->mem = pci_alloc_consistent(c2dev->pcidev, c2_port->mem_size,
+ &c2_port->dma);
+ if (c2_port->mem == NULL) {
+ pr_debug("Unable to allocate memory for "
+ "host descriptor rings\n");
+ return -ENOMEM;
+ }
+
+ memset(c2_port->mem, 0, c2_port->mem_size);
+
+ /* Create the Rx host descriptor ring */
+ if ((ret =
+ c2_rx_ring_alloc(&c2_port->rx_ring, c2_port->mem, c2_port->dma,
+ c2dev->mmio_rxp_ring))) {
+ pr_debug("Unable to create RX ring\n");
+ goto bail0;
+ }
+
+ /* Allocate Rx buffers for the host descriptor ring */
+ if (c2_rx_fill(c2_port)) {
+ pr_debug("Unable to fill RX ring\n");
+ goto bail1;
+ }
+
+ /* Create the Tx host descriptor ring */
+ if ((ret = c2_tx_ring_alloc(&c2_port->tx_ring, c2_port->mem + rx_size,
+ c2_port->dma + rx_size,
+ c2dev->mmio_txp_ring))) {
+ pr_debug("Unable to create TX ring\n");
+ goto bail1;
+ }
+
+ /* Set the TX pointer to where we left off */
+ c2_port->tx_avail = c2_port->tx_ring.count - 1;
+ c2_port->tx_ring.to_use = c2_port->tx_ring.to_clean =
+ c2_port->tx_ring.start + c2dev->cur_tx;
+
+ /* missing: Initialize MAC */
+
+ BUG_ON(c2_port->tx_ring.to_use != c2_port->tx_ring.to_clean);
+
+ /* Reset the adapter, ensures the driver is in sync with the RXP */
+ c2_reset(c2_port);
+
+ /* Reset the READY bit in the sk_buff RXP headers & adapter HRXDQ */
+ for (i = 0, elem = c2_port->rx_ring.start; i < c2_port->rx_ring.count;
+ i++, elem++) {
+ rxp_hdr = (struct c2_rxp_hdr *) elem->skb->data;
+ rxp_hdr->flags = 0;
+ __raw_writew((__force u16) cpu_to_be16(RXP_HRXD_READY),
+ elem->hw_desc + C2_RXP_FLAGS);
+ }
+
+ /* Enable network packets */
+ netif_start_queue(netdev);
+
+ /* Enable IRQ */
+ writel(0, c2dev->regs + C2_IDIS);
+ netimr0 = readl(c2dev->regs + C2_NIMR0);
+ netimr0 &= ~(C2_PCI_HTX_INT | C2_PCI_HRX_INT);
+ writel(netimr0, c2dev->regs + C2_NIMR0);
+
+ /* Tell the stack to ignore arp requests for ipaddrs bound to
+ * other interfaces. This is needed to prevent the host stack
+ * from responding to arp requests to the ipaddr bound on the
+ * rdma interface.
+ */
+ in_dev = in_dev_get(netdev);
+ IN_DEV_CONF_SET(in_dev, ARP_IGNORE, 1);
+ in_dev_put(in_dev);
+
+ return 0;
+
+ bail1:
+ c2_rx_clean(c2_port);
+ kfree(c2_port->rx_ring.start);
+
+ bail0:
+ pci_free_consistent(c2dev->pcidev, c2_port->mem_size, c2_port->mem,
+ c2_port->dma);
+
+ return ret;
+}
+
+static int c2_down(struct net_device *netdev)
+{
+ struct c2_port *c2_port = netdev_priv(netdev);
+ struct c2_dev *c2dev = c2_port->c2dev;
+
+ if (netif_msg_ifdown(c2_port))
+ pr_debug("%s: disabling interface\n",
+ netdev->name);
+
+ /* Wait for all the queued packets to get sent */
+ c2_tx_interrupt(netdev);
+
+ /* Disable network packets */
+ netif_stop_queue(netdev);
+
+ /* Disable IRQs by clearing the interrupt mask */
+ writel(1, c2dev->regs + C2_IDIS);
+ writel(0, c2dev->regs + C2_NIMR0);
+
+ /* missing: Stop transmitter */
+
+ /* missing: Stop receiver */
+
+ /* Reset the adapter, ensures the driver is in sync with the RXP */
+ c2_reset(c2_port);
+
+ /* missing: Turn off LEDs here */
+
+ /* Free all buffers in the host descriptor rings */
+ c2_tx_clean(c2_port);
+ c2_rx_clean(c2_port);
+
+ /* Free the host descriptor rings */
+ kfree(c2_port->rx_ring.start);
+ kfree(c2_port->tx_ring.start);
+ pci_free_consistent(c2dev->pcidev, c2_port->mem_size, c2_port->mem,
+ c2_port->dma);
+
+ return 0;
+}
+
+static void c2_reset(struct c2_port *c2_port)
+{
+ struct c2_dev *c2dev = c2_port->c2dev;
+ unsigned int cur_rx = c2dev->cur_rx;
+
+ /* Tell the hardware to quiesce */
+ C2_SET_CUR_RX(c2dev, cur_rx | C2_PCI_HRX_QUI);
+
+ /*
+ * The hardware will reset the C2_PCI_HRX_QUI bit once
+ * the RXP is quiesced. Wait 2 seconds for this.
+ */
+ ssleep(2);
+
+ cur_rx = C2_GET_CUR_RX(c2dev);
+
+ if (cur_rx & C2_PCI_HRX_QUI)
+ pr_debug("c2_reset: failed to quiesce the hardware!\n");
+
+ cur_rx &= ~C2_PCI_HRX_QUI;
+
+ c2dev->cur_rx = cur_rx;
+
+ pr_debug("Current RX: %u\n", c2dev->cur_rx);
+}
+
+static int c2_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
+{
+ struct c2_port *c2_port = netdev_priv(netdev);
+ struct c2_dev *c2dev = c2_port->c2dev;
+ struct c2_ring *tx_ring = &c2_port->tx_ring;
+ struct c2_element *elem;
+ dma_addr_t mapaddr;
+ u32 maplen;
+ unsigned long flags;
+ unsigned int i;
+
+ spin_lock_irqsave(&c2_port->tx_lock, flags);
+
+ if (unlikely(c2_port->tx_avail < (skb_shinfo(skb)->nr_frags + 1))) {
+ netif_stop_queue(netdev);
+ spin_unlock_irqrestore(&c2_port->tx_lock, flags);
+
+ pr_debug("%s: Tx ring full when queue awake!\n",
+ netdev->name);
+ return NETDEV_TX_BUSY;
+ }
+
+ maplen = skb_headlen(skb);
+ mapaddr =
+ pci_map_single(c2dev->pcidev, skb->data, maplen, PCI_DMA_TODEVICE);
+
+ elem = tx_ring->to_use;
+ elem->skb = skb;
+ elem->mapaddr = mapaddr;
+ elem->maplen = maplen;
+
+ /* Tell HW to xmit */
+ __raw_writeq((__force u64) cpu_to_be64(mapaddr),
+ elem->hw_desc + C2_TXP_ADDR);
+ __raw_writew((__force u16) cpu_to_be16(maplen),
+ elem->hw_desc + C2_TXP_LEN);
+ __raw_writew((__force u16) cpu_to_be16(TXP_HTXD_READY),
+ elem->hw_desc + C2_TXP_FLAGS);
+
+ c2_port->netstats.tx_packets++;
+ c2_port->netstats.tx_bytes += maplen;
+
+ /* Loop thru additional data fragments and queue them */
+ if (skb_shinfo(skb)->nr_frags) {
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+ maplen = frag->size;
+ mapaddr =
+ pci_map_page(c2dev->pcidev, frag->page,
+ frag->page_offset, maplen,
+ PCI_DMA_TODEVICE);
+
+ elem = elem->next;
+ elem->skb = NULL;
+ elem->mapaddr = mapaddr;
+ elem->maplen = maplen;
+
+ /* Tell HW to xmit */
+ __raw_writeq((__force u64) cpu_to_be64(mapaddr),
+ elem->hw_desc + C2_TXP_ADDR);
+ __raw_writew((__force u16) cpu_to_be16(maplen),
+ elem->hw_desc + C2_TXP_LEN);
+ __raw_writew((__force u16) cpu_to_be16(TXP_HTXD_READY),
+ elem->hw_desc + C2_TXP_FLAGS);
+
+ c2_port->netstats.tx_packets++;
+ c2_port->netstats.tx_bytes += maplen;
+ }
+ }
+
+ tx_ring->to_use = elem->next;
+ c2_port->tx_avail -= (skb_shinfo(skb)->nr_frags + 1);
+
+ if (c2_port->tx_avail <= MAX_SKB_FRAGS + 1) {
+ netif_stop_queue(netdev);
+ if (netif_msg_tx_queued(c2_port))
+ pr_debug("%s: transmit queue full\n",
+ netdev->name);
+ }
+
+ spin_unlock_irqrestore(&c2_port->tx_lock, flags);
+
+ netdev->trans_start = jiffies;
+
+ return NETDEV_TX_OK;
+}
+
+static struct net_device_stats *c2_get_stats(struct net_device *netdev)
+{
+ struct c2_port *c2_port = netdev_priv(netdev);
+
+ return &c2_port->netstats;
+}
+
+static void c2_tx_timeout(struct net_device *netdev)
+{
+ struct c2_port *c2_port = netdev_priv(netdev);
+
+ if (netif_msg_timer(c2_port))
+ pr_debug("%s: tx timeout\n", netdev->name);
+
+ c2_tx_clean(c2_port);
+}
+
+static int c2_change_mtu(struct net_device *netdev, int new_mtu)
+{
+ int ret = 0;
+
+ if (new_mtu < ETH_ZLEN || new_mtu > ETH_JUMBO_MTU)
+ return -EINVAL;
+
+ netdev->mtu = new_mtu;
+
+ if (netif_running(netdev)) {
+ c2_down(netdev);
+
+ c2_up(netdev);
+ }
+
+ return ret;
+}
+
+/* Initialize network device */
+static struct net_device *c2_devinit(struct c2_dev *c2dev,
+ void __iomem * mmio_addr)
+{
+ struct c2_port *c2_port = NULL;
+ struct net_device *netdev = alloc_etherdev(sizeof(*c2_port));
+
+ if (!netdev) {
+ pr_debug("c2_port etherdev alloc failed");
+ return NULL;
+ }
+
+ SET_NETDEV_DEV(netdev, &c2dev->pcidev->dev);
+
+ netdev->open = c2_up;
+ netdev->stop = c2_down;
+ netdev->hard_start_xmit = c2_xmit_frame;
+ netdev->get_stats = c2_get_stats;
+ netdev->tx_timeout = c2_tx_timeout;
+ netdev->change_mtu = c2_change_mtu;
+ netdev->watchdog_timeo = C2_TX_TIMEOUT;
+ netdev->irq = c2dev->pcidev->irq;
+
+ c2_port = netdev_priv(netdev);
+ c2_port->netdev = netdev;
+ c2_port->c2dev = c2dev;
+ c2_port->msg_enable = netif_msg_init(debug, default_msg);
+ c2_port->tx_ring.count = C2_NUM_TX_DESC;
+ c2_port->rx_ring.count = C2_NUM_RX_DESC;
+
+ spin_lock_init(&c2_port->tx_lock);
+
+ /* Copy our 48-bit ethernet hardware address */
+ memcpy_fromio(netdev->dev_addr, mmio_addr + C2_REGS_ENADDR, 6);
+
+ /* Validate the MAC address */
+ if (!is_valid_ether_addr(netdev->dev_addr)) {
+ pr_debug("Invalid MAC Address\n");
+ c2_print_macaddr(netdev);
+ free_netdev(netdev);
+ return NULL;
+ }
+
+ c2dev->netdev = netdev;
+
+ return netdev;
+}
+
+static int __devinit c2_probe(struct pci_dev *pcidev,
+ const struct pci_device_id *ent)
+{
+ int ret = 0, i;
+ unsigned long reg0_start, reg0_flags, reg0_len;
+ unsigned long reg2_start, reg2_flags, reg2_len;
+ unsigned long reg4_start, reg4_flags, reg4_len;
+ unsigned kva_map_size;
+ struct net_device *netdev = NULL;
+ struct c2_dev *c2dev = NULL;
+ void __iomem *mmio_regs = NULL;
+
+ printk(KERN_INFO PFX "AMSO1100 Gigabit Ethernet driver v%s loaded\n",
+ DRV_VERSION);
+
+ /* Enable PCI device */
+ ret = pci_enable_device(pcidev);
+ if (ret) {
+ printk(KERN_ERR PFX "%s: Unable to enable PCI device\n",
+ pci_name(pcidev));
+ goto bail0;
+ }
+
+ reg0_start = pci_resource_start(pcidev, BAR_0);
+ reg0_len = pci_resource_len(pcidev, BAR_0);
+ reg0_flags = pci_resource_flags(pcidev, BAR_0);
+
+ reg2_start = pci_resource_start(pcidev, BAR_2);
+ reg2_len = pci_resource_len(pcidev, BAR_2);
+ reg2_flags = pci_resource_flags(pcidev, BAR_2);
+
+ reg4_start = pci_resource_start(pcidev, BAR_4);
+ reg4_len = pci_resource_len(pcidev, BAR_4);
+ reg4_flags = pci_resource_flags(pcidev, BAR_4);
+
+ pr_debug("BAR0 size = 0x%lX bytes\n", reg0_len);
+ pr_debug("BAR2 size = 0x%lX bytes\n", reg2_len);
+ pr_debug("BAR4 size = 0x%lX bytes\n", reg4_len);
+
+ /* Make sure PCI base addr are MMIO */
+ if (!(reg0_flags & IORESOURCE_MEM) ||
+ !(reg2_flags & IORESOURCE_MEM) || !(reg4_flags & IORESOURCE_MEM)) {
+ printk(KERN_ERR PFX "PCI regions not an MMIO resource\n");
+ ret = -ENODEV;
+ goto bail1;
+ }
+
+ /* Check for weird/broken PCI region reporting */
+ if ((reg0_len < C2_REG0_SIZE) ||
+ (reg2_len < C2_REG2_SIZE) || (reg4_len < C2_REG4_SIZE)) {
+ printk(KERN_ERR PFX "Invalid PCI region sizes\n");
+ ret = -ENODEV;
+ goto bail1;
+ }
+
+ /* Reserve PCI I/O and memory resources */
+ ret = pci_request_regions(pcidev, DRV_NAME);
+ if (ret) {
+ printk(KERN_ERR PFX "%s: Unable to request regions\n",
+ pci_name(pcidev));
+ goto bail1;
+ }
+
+ if ((sizeof(dma_addr_t) > 4)) {
+ ret = pci_set_dma_mask(pcidev, DMA_64BIT_MASK);
+ if (ret < 0) {
+ printk(KERN_ERR PFX "64b DMA configuration failed\n");
+ goto bail2;
+ }
+ } else {
+ ret = pci_set_dma_mask(pcidev, DMA_32BIT_MASK);
+ if (ret < 0) {
+ printk(KERN_ERR PFX "32b DMA configuration failed\n");
+ goto bail2;
+ }
+ }
+
+ /* Enables bus-mastering on the device */
+ pci_set_master(pcidev);
+
+ /* Remap the adapter PCI registers in BAR4 */
+ mmio_regs = ioremap_nocache(reg4_start + C2_PCI_REGS_OFFSET,
+ sizeof(struct c2_adapter_pci_regs));
+ if (!mmio_regs) {
+ printk(KERN_ERR PFX
+ "Unable to remap adapter PCI registers in BAR4\n");
+ ret = -EIO;
+ goto bail2;
+ }
+
+ /* Validate PCI regs magic */
+ for (i = 0; i < sizeof(c2_magic); i++) {
+ if (c2_magic[i] != readb(mmio_regs + C2_REGS_MAGIC + i)) {
+ printk(KERN_ERR PFX "Downlevel Firmware boot loader "
+ "[%d/%Zd: got 0x%x, exp 0x%x]. Use the cc_flash "
+ "utility to update your boot loader\n",
+ i + 1, sizeof(c2_magic),
+ readb(mmio_regs + C2_REGS_MAGIC + i),
+ c2_magic[i]);
+ printk(KERN_ERR PFX "Adapter not claimed\n");
+ iounmap(mmio_regs);
+ ret = -EIO;
+ goto bail2;
+ }
+ }
+
+ /* Validate the adapter version */
+ if (be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_VERS)) != C2_VERSION) {
+ printk(KERN_ERR PFX "Version mismatch "
+ "[fw=%u, c2=%u], Adapter not claimed\n",
+ be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_VERS)),
+ C2_VERSION);
+ ret = -EINVAL;
+ iounmap(mmio_regs);
+ goto bail2;
+ }
+
+ /* Validate the adapter IVN */
+ if (be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_IVN)) != C2_IVN) {
+ printk(KERN_ERR PFX "Downlevel FIrmware level. You should be using "
+ "the OpenIB device support kit. "
+ "[fw=0x%x, c2=0x%x], Adapter not claimed\n",
+ be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_IVN)),
+ C2_IVN);
+ ret = -EINVAL;
+ iounmap(mmio_regs);
+ goto bail2;
+ }
+
+ /* Allocate hardware structure */
+ c2dev = (struct c2_dev *) ib_alloc_device(sizeof(*c2dev));
+ if (!c2dev) {
+ printk(KERN_ERR PFX "%s: Unable to alloc hardware struct\n",
+ pci_name(pcidev));
+ ret = -ENOMEM;
+ iounmap(mmio_regs);
+ goto bail2;
+ }
+
+ memset(c2dev, 0, sizeof(*c2dev));
+ spin_lock_init(&c2dev->lock);
+ c2dev->pcidev = pcidev;
+ c2dev->cur_tx = 0;
+
+ /* Get the last RX index */
+ c2dev->cur_rx =
+ (be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_HRX_CUR)) -
+ 0xffffc000) / sizeof(struct c2_rxp_desc);
+
+ /* Request an interrupt line for the driver */
+ ret = request_irq(pcidev->irq, c2_interrupt, IRQF_SHARED, DRV_NAME, c2dev);
+ if (ret) {
+ printk(KERN_ERR PFX "%s: requested IRQ %u is busy\n",
+ pci_name(pcidev), pcidev->irq);
+ iounmap(mmio_regs);
+ goto bail3;
+ }
+
+ /* Set driver specific data */
+ pci_set_drvdata(pcidev, c2dev);
+
+ /* Initialize network device */
+ if ((netdev = c2_devinit(c2dev, mmio_regs)) == NULL) {
+ iounmap(mmio_regs);
+ goto bail4;
+ }
+
+ /* Save off the actual size prior to unmapping mmio_regs */
+ kva_map_size = be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_PCI_WINSIZE));
+
+ /* Unmap the adapter PCI registers in BAR4 */
+ iounmap(mmio_regs);
+
+ /* Register network device */
+ ret = register_netdev(netdev);
+ if (ret) {
+ printk(KERN_ERR PFX "Unable to register netdev, ret = %d\n",
+ ret);
+ goto bail5;
+ }
+
+ /* Disable network packets */
+ netif_stop_queue(netdev);
+
+ /* Remap the adapter HRXDQ PA space to kernel VA space */
+ c2dev->mmio_rxp_ring = ioremap_nocache(reg4_start + C2_RXP_HRXDQ_OFFSET,
+ C2_RXP_HRXDQ_SIZE);
+ if (!c2dev->mmio_rxp_ring) {
+ printk(KERN_ERR PFX "Unable to remap MMIO HRXDQ region\n");
+ ret = -EIO;
+ goto bail6;
+ }
+
+ /* Remap the adapter HTXDQ PA space to kernel VA space */
+ c2dev->mmio_txp_ring = ioremap_nocache(reg4_start + C2_TXP_HTXDQ_OFFSET,
+ C2_TXP_HTXDQ_SIZE);
+ if (!c2dev->mmio_txp_ring) {
+ printk(KERN_ERR PFX "Unable to remap MMIO HTXDQ region\n");
+ ret = -EIO;
+ goto bail7;
+ }
+
+ /* Save off the current RX index in the last 4 bytes of the TXP Ring */
+ C2_SET_CUR_RX(c2dev, c2dev->cur_rx);
+
+ /* Remap the PCI registers in adapter BAR0 to kernel VA space */
+ c2dev->regs = ioremap_nocache(reg0_start, reg0_len);
+ if (!c2dev->regs) {
+ printk(KERN_ERR PFX "Unable to remap BAR0\n");
+ ret = -EIO;
+ goto bail8;
+ }
+
+ /* Remap the PCI registers in adapter BAR4 to kernel VA space */
+ c2dev->pa = reg4_start + C2_PCI_REGS_OFFSET;
+ c2dev->kva = ioremap_nocache(reg4_start + C2_PCI_REGS_OFFSET,
+ kva_map_size);
+ if (!c2dev->kva) {
+ printk(KERN_ERR PFX "Unable to remap BAR4\n");
+ ret = -EIO;
+ goto bail9;
+ }
+
+ /* Print out the MAC address */
+ c2_print_macaddr(netdev);
+
+ ret = c2_rnic_init(c2dev);
+ if (ret) {
+ printk(KERN_ERR PFX "c2_rnic_init failed: %d\n", ret);
+ goto bail10;
+ }
+
+ if (c2_register_device(c2dev))
+ goto bail10;
+
+ return 0;
+
+ bail10:
+ iounmap(c2dev->kva);
+
+ bail9:
+ iounmap(c2dev->regs);
+
+ bail8:
+ iounmap(c2dev->mmio_txp_ring);
+
+ bail7:
+ iounmap(c2dev->mmio_rxp_ring);
+
+ bail6:
+ unregister_netdev(netdev);
+
+ bail5:
+ free_netdev(netdev);
+
+ bail4:
+ free_irq(pcidev->irq, c2dev);
+
+ bail3:
+ ib_dealloc_device(&c2dev->ibdev);
+
+ bail2:
+ pci_release_regions(pcidev);
+
+ bail1:
+ pci_disable_device(pcidev);
+
+ bail0:
+ return ret;
+}
+
+static void __devexit c2_remove(struct pci_dev *pcidev)
+{
+ struct c2_dev *c2dev = pci_get_drvdata(pcidev);
+ struct net_device *netdev = c2dev->netdev;
+
+ /* Unregister with OpenIB */
+ c2_unregister_device(c2dev);
+
+ /* Clean up the RNIC resources */
+ c2_rnic_term(c2dev);
+
+ /* Remove network device from the kernel */
+ unregister_netdev(netdev);
+
+ /* Free network device */
+ free_netdev(netdev);
+
+ /* Free the interrupt line */
+ free_irq(pcidev->irq, c2dev);
+
+ /* missing: Turn LEDs off here */
+
+ /* Unmap adapter PA space */
+ iounmap(c2dev->kva);
+ iounmap(c2dev->regs);
+ iounmap(c2dev->mmio_txp_ring);
+ iounmap(c2dev->mmio_rxp_ring);
+
+ /* Free the hardware structure */
+ ib_dealloc_device(&c2dev->ibdev);
+
+ /* Release reserved PCI I/O and memory resources */
+ pci_release_regions(pcidev);
+
+ /* Disable PCI device */
+ pci_disable_device(pcidev);
+
+ /* Clear driver specific data */
+ pci_set_drvdata(pcidev, NULL);
+}
+
+static struct pci_driver c2_pci_driver = {
+ .name = DRV_NAME,
+ .id_table = c2_pci_table,
+ .probe = c2_probe,
+ .remove = __devexit_p(c2_remove),
+};
+
+static int __init c2_init_module(void)
+{
+ return pci_register_driver(&c2_pci_driver);
+}
+
+static void __exit c2_exit_module(void)
+{
+ pci_unregister_driver(&c2_pci_driver);
+}
+
+module_init(c2_init_module);
+module_exit(c2_exit_module);
diff --git a/drivers/infiniband/hw/amso1100/c2.h b/drivers/infiniband/hw/amso1100/c2.h
new file mode 100644
index 0000000..d12a24a
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2.h
@@ -0,0 +1,550 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __C2_H
+#define __C2_H
+
+#include <linux/netdevice.h>
+#include <linux/spinlock.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/dma-mapping.h>
+#include <linux/idr.h>
+
+#include "c2_provider.h"
+#include "c2_mq.h"
+#include "c2_status.h"
+
+#define DRV_NAME "c2"
+#define DRV_VERSION "1.1"
+#define PFX DRV_NAME ": "
+
+#define BAR_0 0
+#define BAR_2 2
+#define BAR_4 4
+
+#define RX_BUF_SIZE (1536 + 8)
+#define ETH_JUMBO_MTU 9000
+#define C2_MAGIC "CEPHEUS"
+#define C2_VERSION 4
+#define C2_IVN (18 & 0x7fffffff)
+
+#define C2_REG0_SIZE (16 * 1024)
+#define C2_REG2_SIZE (2 * 1024 * 1024)
+#define C2_REG4_SIZE (256 * 1024 * 1024)
+#define C2_NUM_TX_DESC 341
+#define C2_NUM_RX_DESC 256
+#define C2_PCI_REGS_OFFSET (0x10000)
+#define C2_RXP_HRXDQ_OFFSET (((C2_REG4_SIZE)/2))
+#define C2_RXP_HRXDQ_SIZE (4096)
+#define C2_TXP_HTXDQ_OFFSET (((C2_REG4_SIZE)/2) + C2_RXP_HRXDQ_SIZE)
+#define C2_TXP_HTXDQ_SIZE (4096)
+#define C2_TX_TIMEOUT (6*HZ)
+
+/* CEPHEUS */
+static const u8 c2_magic[] = {
+ 0x43, 0x45, 0x50, 0x48, 0x45, 0x55, 0x53
+};
+
+enum adapter_pci_regs {
+ C2_REGS_MAGIC = 0x0000,
+ C2_REGS_VERS = 0x0008,
+ C2_REGS_IVN = 0x000C,
+ C2_REGS_PCI_WINSIZE = 0x0010,
+ C2_REGS_Q0_QSIZE = 0x0014,
+ C2_REGS_Q0_MSGSIZE = 0x0018,
+ C2_REGS_Q0_POOLSTART = 0x001C,
+ C2_REGS_Q0_SHARED = 0x0020,
+ C2_REGS_Q1_QSIZE = 0x0024,
+ C2_REGS_Q1_MSGSIZE = 0x0028,
+ C2_REGS_Q1_SHARED = 0x0030,
+ C2_REGS_Q2_QSIZE = 0x0034,
+ C2_REGS_Q2_MSGSIZE = 0x0038,
+ C2_REGS_Q2_SHARED = 0x0040,
+ C2_REGS_ENADDR = 0x004C,
+ C2_REGS_RDMA_ENADDR = 0x0054,
+ C2_REGS_HRX_CUR = 0x006C,
+};
+
+struct c2_adapter_pci_regs {
+ char reg_magic[8];
+ u32 version;
+ u32 ivn;
+ u32 pci_window_size;
+ u32 q0_q_size;
+ u32 q0_msg_size;
+ u32 q0_pool_start;
+ u32 q0_shared;
+ u32 q1_q_size;
+ u32 q1_msg_size;
+ u32 q1_pool_start;
+ u32 q1_shared;
+ u32 q2_q_size;
+ u32 q2_msg_size;
+ u32 q2_pool_start;
+ u32 q2_shared;
+ u32 log_start;
+ u32 log_size;
+ u8 host_enaddr[8];
+ u8 rdma_enaddr[8];
+ u32 crash_entry;
+ u32 crash_ready[2];
+ u32 fw_txd_cur;
+ u32 fw_hrxd_cur;
+ u32 fw_rxd_cur;
+};
+
+enum pci_regs {
+ C2_HISR = 0x0000,
+ C2_DISR = 0x0004,
+ C2_HIMR = 0x0008,
+ C2_DIMR = 0x000C,
+ C2_NISR0 = 0x0010,
+ C2_NISR1 = 0x0014,
+ C2_NIMR0 = 0x0018,
+ C2_NIMR1 = 0x001C,
+ C2_IDIS = 0x0020,
+};
+
+enum {
+ C2_PCI_HRX_INT = 1 << 8,
+ C2_PCI_HTX_INT = 1 << 17,
+ C2_PCI_HRX_QUI = 1 << 31,
+};
+
+/*
+ * Cepheus registers in BAR0.
+ */
+struct c2_pci_regs {
+ u32 hostisr;
+ u32 dmaisr;
+ u32 hostimr;
+ u32 dmaimr;
+ u32 netisr0;
+ u32 netisr1;
+ u32 netimr0;
+ u32 netimr1;
+ u32 int_disable;
+};
+
+/* TXP flags */
+enum c2_txp_flags {
+ TXP_HTXD_DONE = 0,
+ TXP_HTXD_READY = 1 << 0,
+ TXP_HTXD_UNINIT = 1 << 1,
+};
+
+/* RXP flags */
+enum c2_rxp_flags {
+ RXP_HRXD_UNINIT = 0,
+ RXP_HRXD_READY = 1 << 0,
+ RXP_HRXD_DONE = 1 << 1,
+};
+
+/* RXP status */
+enum c2_rxp_status {
+ RXP_HRXD_ZERO = 0,
+ RXP_HRXD_OK = 1 << 0,
+ RXP_HRXD_BUF_OV = 1 << 1,
+};
+
+/* TXP descriptor fields */
+enum txp_desc {
+ C2_TXP_FLAGS = 0x0000,
+ C2_TXP_LEN = 0x0002,
+ C2_TXP_ADDR = 0x0004,
+};
+
+/* RXP descriptor fields */
+enum rxp_desc {
+ C2_RXP_FLAGS = 0x0000,
+ C2_RXP_STATUS = 0x0002,
+ C2_RXP_COUNT = 0x0004,
+ C2_RXP_LEN = 0x0006,
+ C2_RXP_ADDR = 0x0008,
+};
+
+struct c2_txp_desc {
+ u16 flags;
+ u16 len;
+ u64 addr;
+} __attribute__ ((packed));
+
+struct c2_rxp_desc {
+ u16 flags;
+ u16 status;
+ u16 count;
+ u16 len;
+ u64 addr;
+} __attribute__ ((packed));
+
+struct c2_rxp_hdr {
+ u16 flags;
+ u16 status;
+ u16 len;
+ u16 rsvd;
+} __attribute__ ((packed));
+
+struct c2_tx_desc {
+ u32 len;
+ u32 status;
+ dma_addr_t next_offset;
+};
+
+struct c2_rx_desc {
+ u32 len;
+ u32 status;
+ dma_addr_t next_offset;
+};
+
+struct c2_alloc {
+ u32 last;
+ u32 max;
+ spinlock_t lock;
+ unsigned long *table;
+};
+
+struct c2_array {
+ struct {
+ void **page;
+ int used;
+ } *page_list;
+};
+
+/*
+ * The MQ shared pointer pool is organized as a linked list of
+ * chunks. Each chunk contains a linked list of free shared pointers
+ * that can be allocated to a given user mode client.
+ *
+ */
+struct sp_chunk {
+ struct sp_chunk *next;
+ dma_addr_t dma_addr;
+ DECLARE_PCI_UNMAP_ADDR(mapping);
+ u16 head;
+ u16 shared_ptr[0];
+};
+
+struct c2_pd_table {
+ u32 last;
+ u32 max;
+ spinlock_t lock;
+ unsigned long *table;
+};
+
+struct c2_qp_table {
+ struct idr idr;
+ spinlock_t lock;
+ int last;
+};
+
+struct c2_element {
+ struct c2_element *next;
+ void *ht_desc; /* host descriptor */
+ void __iomem *hw_desc; /* hardware descriptor */
+ struct sk_buff *skb;
+ dma_addr_t mapaddr;
+ u32 maplen;
+};
+
+struct c2_ring {
+ struct c2_element *to_clean;
+ struct c2_element *to_use;
+ struct c2_element *start;
+ unsigned long count;
+};
+
+struct c2_dev {
+ struct ib_device ibdev;
+ void __iomem *regs;
+ void __iomem *mmio_txp_ring; /* remapped adapter memory for hw rings */
+ void __iomem *mmio_rxp_ring;
+ spinlock_t lock;
+ struct pci_dev *pcidev;
+ struct net_device *netdev;
+ struct net_device *pseudo_netdev;
+ unsigned int cur_tx;
+ unsigned int cur_rx;
+ u32 adapter_handle;
+ int device_cap_flags;
+ void __iomem *kva; /* KVA device memory */
+ unsigned long pa; /* PA device memory */
+ void **qptr_array;
+
+ struct kmem_cache *host_msg_cache;
+
+ struct list_head cca_link; /* adapter list */
+ struct list_head eh_wakeup_list; /* event wakeup list */
+ wait_queue_head_t req_vq_wo;
+
+ /* Cached RNIC properties */
+ struct ib_device_attr props;
+
+ struct c2_pd_table pd_table;
+ struct c2_qp_table qp_table;
+ int ports; /* num of GigE ports */
+ int devnum;
+ spinlock_t vqlock; /* sync vbs req MQ */
+
+ /* Verbs Queues */
+ struct c2_mq req_vq; /* Verbs Request MQ */
+ struct c2_mq rep_vq; /* Verbs Reply MQ */
+ struct c2_mq aeq; /* Async Events MQ */
+
+ /* Kernel client MQs */
+ struct sp_chunk *kern_mqsp_pool;
+
+ /* Device updates these values when posting messages to a host
+ * target queue */
+ u16 req_vq_shared;
+ u16 rep_vq_shared;
+ u16 aeq_shared;
+ u16 irq_claimed;
+
+ /*
+ * Shared host target pages for user-accessible MQs.
+ */
+ int hthead; /* index of first free entry */
+ void *htpages; /* kernel vaddr */
+ int htlen; /* length of htpages memory */
+ void *htuva; /* user mapped vaddr */
+ spinlock_t htlock; /* serialize allocation */
+
+ u64 adapter_hint_uva; /* access to the activity FIFO */
+
+ // spinlock_t aeq_lock;
+ // spinlock_t rnic_lock;
+
+ __be16 *hint_count;
+ dma_addr_t hint_count_dma;
+ u16 hints_read;
+
+ int init; /* TRUE if it's ready */
+ char ae_cache_name[16];
+ char vq_cache_name[16];
+};
+
+struct c2_port {
+ u32 msg_enable;
+ struct c2_dev *c2dev;
+ struct net_device *netdev;
+
+ spinlock_t tx_lock;
+ u32 tx_avail;
+ struct c2_ring tx_ring;
+ struct c2_ring rx_ring;
+
+ void *mem; /* PCI memory for host rings */
+ dma_addr_t dma;
+ unsigned long mem_size;
+
+ u32 rx_buf_size;
+
+ struct net_device_stats netstats;
+};
+
+/*
+ * Activity FIFO registers in BAR0.
+ */
+#define PCI_BAR0_HOST_HINT 0x100
+#define PCI_BAR0_ADAPTER_HINT 0x2000
+
+/*
+ * Ammasso PCI vendor id and Cepheus PCI device id.
+ */
+#define CQ_ARMED 0x01
+#define CQ_WAIT_FOR_DMA 0x80
+
+/*
+ * The format of a hint is as follows:
+ * Lower 16 bits are the count of hints for the queue.
+ * Next 15 bits are the qp_index
+ * Upper most bit depends on who reads it:
+ * If read by producer, then it means Full (1) or Not-Full (0)
+ * If read by consumer, then it means Empty (1) or Not-Empty (0)
+ */
+#define C2_HINT_MAKE(q_index, hint_count) (((q_index) << 16) | hint_count)
+#define C2_HINT_GET_INDEX(hint) (((hint) & 0x7FFF0000) >> 16)
+#define C2_HINT_GET_COUNT(hint) ((hint) & 0x0000FFFF)
+
+
+/*
+ * The following defines the offset in SDRAM for the c2_adapter_pci_regs_t
+ * struct.
+ */
+#define C2_ADAPTER_PCI_REGS_OFFSET 0x10000
+
+#ifndef readq
+static inline u64 readq(const void __iomem * addr)
+{
+ u64 ret = readl(addr + 4);
+ ret <<= 32;
+ ret |= readl(addr);
+
+ return ret;
+}
+#endif
+
+#ifndef writeq
+static inline void __raw_writeq(u64 val, void __iomem * addr)
+{
+ __raw_writel((u32) (val), addr);
+ __raw_writel((u32) (val >> 32), (addr + 4));
+}
+#endif
+
+#define C2_SET_CUR_RX(c2dev, cur_rx) \
+ __raw_writel((__force u32) cpu_to_be32(cur_rx), c2dev->mmio_txp_ring + 4092)
+
+#define C2_GET_CUR_RX(c2dev) \
+ be32_to_cpu((__force __be32) readl(c2dev->mmio_txp_ring + 4092))
+
+static inline struct c2_dev *to_c2dev(struct ib_device *ibdev)
+{
+ return container_of(ibdev, struct c2_dev, ibdev);
+}
+
+static inline int c2_errno(void *reply)
+{
+ switch (c2_wr_get_result(reply)) {
+ case C2_OK:
+ return 0;
+ case CCERR_NO_BUFS:
+ case CCERR_INSUFFICIENT_RESOURCES:
+ case CCERR_ZERO_RDMA_READ_RESOURCES:
+ return -ENOMEM;
+ case CCERR_MR_IN_USE:
+ case CCERR_QP_IN_USE:
+ return -EBUSY;
+ case CCERR_ADDR_IN_USE:
+ return -EADDRINUSE;
+ case CCERR_ADDR_NOT_AVAIL:
+ return -EADDRNOTAVAIL;
+ case CCERR_CONN_RESET:
+ return -ECONNRESET;
+ case CCERR_NOT_IMPLEMENTED:
+ case CCERR_INVALID_WQE:
+ return -ENOSYS;
+ case CCERR_QP_NOT_PRIVILEGED:
+ return -EPERM;
+ case CCERR_STACK_ERROR:
+ return -EPROTO;
+ case CCERR_ACCESS_VIOLATION:
+ case CCERR_BASE_AND_BOUNDS_VIOLATION:
+ return -EFAULT;
+ case CCERR_STAG_STATE_NOT_INVALID:
+ case CCERR_INVALID_ADDRESS:
+ case CCERR_INVALID_CQ:
+ case CCERR_INVALID_EP:
+ case CCERR_INVALID_MODIFIER:
+ case CCERR_INVALID_MTU:
+ case CCERR_INVALID_PD_ID:
+ case CCERR_INVALID_QP:
+ case CCERR_INVALID_RNIC:
+ case CCERR_INVALID_STAG:
+ return -EINVAL;
+ default:
+ return -EAGAIN;
+ }
+}
+
+/* Device */
+extern int c2_register_device(struct c2_dev *c2dev);
+extern void c2_unregister_device(struct c2_dev *c2dev);
+extern int c2_rnic_init(struct c2_dev *c2dev);
+extern void c2_rnic_term(struct c2_dev *c2dev);
+extern void c2_rnic_interrupt(struct c2_dev *c2dev);
+extern int c2_del_addr(struct c2_dev *c2dev, __be32 inaddr, __be32 inmask);
+extern int c2_add_addr(struct c2_dev *c2dev, __be32 inaddr, __be32 inmask);
+
+/* QPs */
+extern int c2_alloc_qp(struct c2_dev *c2dev, struct c2_pd *pd,
+ struct ib_qp_init_attr *qp_attrs, struct c2_qp *qp);
+extern void c2_free_qp(struct c2_dev *c2dev, struct c2_qp *qp);
+extern struct ib_qp *c2_get_qp(struct ib_device *device, int qpn);
+extern int c2_qp_modify(struct c2_dev *c2dev, struct c2_qp *qp,
+ struct ib_qp_attr *attr, int attr_mask);
+extern int c2_qp_set_read_limits(struct c2_dev *c2dev, struct c2_qp *qp,
+ int ord, int ird);
+extern int c2_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
+ struct ib_send_wr **bad_wr);
+extern int c2_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr,
+ struct ib_recv_wr **bad_wr);
+extern void __devinit c2_init_qp_table(struct c2_dev *c2dev);
+extern void __devexit c2_cleanup_qp_table(struct c2_dev *c2dev);
+extern void c2_set_qp_state(struct c2_qp *, int);
+extern struct c2_qp *c2_find_qpn(struct c2_dev *c2dev, int qpn);
+
+/* PDs */
+extern int c2_pd_alloc(struct c2_dev *c2dev, int privileged, struct c2_pd *pd);
+extern void c2_pd_free(struct c2_dev *c2dev, struct c2_pd *pd);
+extern int __devinit c2_init_pd_table(struct c2_dev *c2dev);
+extern void __devexit c2_cleanup_pd_table(struct c2_dev *c2dev);
+
+/* CQs */
+extern int c2_init_cq(struct c2_dev *c2dev, int entries,
+ struct c2_ucontext *ctx, struct c2_cq *cq);
+extern void c2_free_cq(struct c2_dev *c2dev, struct c2_cq *cq);
+extern void c2_cq_event(struct c2_dev *c2dev, u32 mq_index);
+extern void c2_cq_clean(struct c2_dev *c2dev, struct c2_qp *qp, u32 mq_index);
+extern int c2_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry);
+extern int c2_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
+
+/* CM */
+extern int c2_llp_connect(struct iw_cm_id *cm_id,
+ struct iw_cm_conn_param *iw_param);
+extern int c2_llp_accept(struct iw_cm_id *cm_id,
+ struct iw_cm_conn_param *iw_param);
+extern int c2_llp_reject(struct iw_cm_id *cm_id, const void *pdata,
+ u8 pdata_len);
+extern int c2_llp_service_create(struct iw_cm_id *cm_id, int backlog);
+extern int c2_llp_service_destroy(struct iw_cm_id *cm_id);
+
+/* MM */
+extern int c2_nsmr_register_phys_kern(struct c2_dev *c2dev, u64 *addr_list,
+ int page_size, int pbl_depth, u32 length,
+ u32 off, u64 *va, enum c2_acf acf,
+ struct c2_mr *mr);
+extern int c2_stag_dealloc(struct c2_dev *c2dev, u32 stag_index);
+
+/* AE */
+extern void c2_ae_event(struct c2_dev *c2dev, u32 mq_index);
+
+/* MQSP Allocator */
+extern int c2_init_mqsp_pool(struct c2_dev *c2dev, gfp_t gfp_mask,
+ struct sp_chunk **root);
+extern void c2_free_mqsp_pool(struct c2_dev *c2dev, struct sp_chunk *root);
+extern __be16 *c2_alloc_mqsp(struct c2_dev *c2dev, struct sp_chunk *head,
+ dma_addr_t *dma_addr, gfp_t gfp_mask);
+extern void c2_free_mqsp(__be16* mqsp);
+#endif
diff --git a/drivers/infiniband/hw/amso1100/c2_ae.c b/drivers/infiniband/hw/amso1100/c2_ae.c
new file mode 100644
index 0000000..62af742
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_ae.c
@@ -0,0 +1,319 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "c2.h"
+#include <rdma/iw_cm.h>
+#include "c2_status.h"
+#include "c2_ae.h"
+
+static int c2_convert_cm_status(u32 c2_status)
+{
+ switch (c2_status) {
+ case C2_CONN_STATUS_SUCCESS:
+ return 0;
+ case C2_CONN_STATUS_REJECTED:
+ return -ENETRESET;
+ case C2_CONN_STATUS_REFUSED:
+ return -ECONNREFUSED;
+ case C2_CONN_STATUS_TIMEDOUT:
+ return -ETIMEDOUT;
+ case C2_CONN_STATUS_NETUNREACH:
+ return -ENETUNREACH;
+ case C2_CONN_STATUS_HOSTUNREACH:
+ return -EHOSTUNREACH;
+ case C2_CONN_STATUS_INVALID_RNIC:
+ return -EINVAL;
+ case C2_CONN_STATUS_INVALID_QP:
+ return -EINVAL;
+ case C2_CONN_STATUS_INVALID_QP_STATE:
+ return -EINVAL;
+ case C2_CONN_STATUS_ADDR_NOT_AVAIL:
+ return -EADDRNOTAVAIL;
+ default:
+ printk(KERN_ERR PFX
+ "%s - Unable to convert CM status: %d\n",
+ __func__, c2_status);
+ return -EIO;
+ }
+}
+
+static const char* to_event_str(int event)
+{
+ static const char* event_str[] = {
+ "CCAE_REMOTE_SHUTDOWN",
+ "CCAE_ACTIVE_CONNECT_RESULTS",
+ "CCAE_CONNECTION_REQUEST",
+ "CCAE_LLP_CLOSE_COMPLETE",
+ "CCAE_TERMINATE_MESSAGE_RECEIVED",
+ "CCAE_LLP_CONNECTION_RESET",
+ "CCAE_LLP_CONNECTION_LOST",
+ "CCAE_LLP_SEGMENT_SIZE_INVALID",
+ "CCAE_LLP_INVALID_CRC",
+ "CCAE_LLP_BAD_FPDU",
+ "CCAE_INVALID_DDP_VERSION",
+ "CCAE_INVALID_RDMA_VERSION",
+ "CCAE_UNEXPECTED_OPCODE",
+ "CCAE_INVALID_DDP_QUEUE_NUMBER",
+ "CCAE_RDMA_READ_NOT_ENABLED",
+ "CCAE_RDMA_WRITE_NOT_ENABLED",
+ "CCAE_RDMA_READ_TOO_SMALL",
+ "CCAE_NO_L_BIT",
+ "CCAE_TAGGED_INVALID_STAG",
+ "CCAE_TAGGED_BASE_BOUNDS_VIOLATION",
+ "CCAE_TAGGED_ACCESS_RIGHTS_VIOLATION",
+ "CCAE_TAGGED_INVALID_PD",
+ "CCAE_WRAP_ERROR",
+ "CCAE_BAD_CLOSE",
+ "CCAE_BAD_LLP_CLOSE",
+ "CCAE_INVALID_MSN_RANGE",
+ "CCAE_INVALID_MSN_GAP",
+ "CCAE_IRRQ_OVERFLOW",
+ "CCAE_IRRQ_MSN_GAP",
+ "CCAE_IRRQ_MSN_RANGE",
+ "CCAE_IRRQ_INVALID_STAG",
+ "CCAE_IRRQ_BASE_BOUNDS_VIOLATION",
+ "CCAE_IRRQ_ACCESS_RIGHTS_VIOLATION",
+ "CCAE_IRRQ_INVALID_PD",
+ "CCAE_IRRQ_WRAP_ERROR",
+ "CCAE_CQ_SQ_COMPLETION_OVERFLOW",
+ "CCAE_CQ_RQ_COMPLETION_ERROR",
+ "CCAE_QP_SRQ_WQE_ERROR",
+ "CCAE_QP_LOCAL_CATASTROPHIC_ERROR",
+ "CCAE_CQ_OVERFLOW",
+ "CCAE_CQ_OPERATION_ERROR",
+ "CCAE_SRQ_LIMIT_REACHED",
+ "CCAE_QP_RQ_LIMIT_REACHED",
+ "CCAE_SRQ_CATASTROPHIC_ERROR",
+ "CCAE_RNIC_CATASTROPHIC_ERROR"
+ };
+
+ if (event < CCAE_REMOTE_SHUTDOWN ||
+ event > CCAE_RNIC_CATASTROPHIC_ERROR)
+ return "<invalid event>";
+
+ event -= CCAE_REMOTE_SHUTDOWN;
+ return event_str[event];
+}
+
+static const char *to_qp_state_str(int state)
+{
+ switch (state) {
+ case C2_QP_STATE_IDLE:
+ return "C2_QP_STATE_IDLE";
+ case C2_QP_STATE_CONNECTING:
+ return "C2_QP_STATE_CONNECTING";
+ case C2_QP_STATE_RTS:
+ return "C2_QP_STATE_RTS";
+ case C2_QP_STATE_CLOSING:
+ return "C2_QP_STATE_CLOSING";
+ case C2_QP_STATE_TERMINATE:
+ return "C2_QP_STATE_TERMINATE";
+ case C2_QP_STATE_ERROR:
+ return "C2_QP_STATE_ERROR";
+ default:
+ return "<invalid QP state>";
+ };
+}
+
+void c2_ae_event(struct c2_dev *c2dev, u32 mq_index)
+{
+ struct c2_mq *mq = c2dev->qptr_array[mq_index];
+ union c2wr *wr;
+ void *resource_user_context;
+ struct iw_cm_event cm_event;
+ struct ib_event ib_event;
+ enum c2_resource_indicator resource_indicator;
+ enum c2_event_id event_id;
+ unsigned long flags;
+ int status;
+
+ /*
+ * retreive the message
+ */
+ wr = c2_mq_consume(mq);
+ if (!wr)
+ return;
+
+ memset(&ib_event, 0, sizeof(ib_event));
+ memset(&cm_event, 0, sizeof(cm_event));
+
+ event_id = c2_wr_get_id(wr);
+ resource_indicator = be32_to_cpu(wr->ae.ae_generic.resource_type);
+ resource_user_context =
+ (void *) (unsigned long) wr->ae.ae_generic.user_context;
+
+ status = cm_event.status = c2_convert_cm_status(c2_wr_get_result(wr));
+
+ pr_debug("event received c2_dev=%p, event_id=%d, "
+ "resource_indicator=%d, user_context=%p, status = %d\n",
+ c2dev, event_id, resource_indicator, resource_user_context,
+ status);
+
+ switch (resource_indicator) {
+ case C2_RES_IND_QP:{
+
+ struct c2_qp *qp = (struct c2_qp *)resource_user_context;
+ struct iw_cm_id *cm_id = qp->cm_id;
+ struct c2wr_ae_active_connect_results *res;
+
+ if (!cm_id) {
+ pr_debug("event received, but cm_id is <nul>, qp=%p!\n",
+ qp);
+ goto ignore_it;
+ }
+ pr_debug("%s: event = %s, user_context=%llx, "
+ "resource_type=%x, "
+ "resource=%x, qp_state=%s\n",
+ __func__,
+ to_event_str(event_id),
+ (unsigned long long) wr->ae.ae_generic.user_context,
+ be32_to_cpu(wr->ae.ae_generic.resource_type),
+ be32_to_cpu(wr->ae.ae_generic.resource),
+ to_qp_state_str(be32_to_cpu(wr->ae.ae_generic.qp_state)));
+
+ c2_set_qp_state(qp, be32_to_cpu(wr->ae.ae_generic.qp_state));
+
+ switch (event_id) {
+ case CCAE_ACTIVE_CONNECT_RESULTS:
+ res = &wr->ae.ae_active_connect_results;
+ cm_event.event = IW_CM_EVENT_CONNECT_REPLY;
+ cm_event.local_addr.sin_addr.s_addr = res->laddr;
+ cm_event.remote_addr.sin_addr.s_addr = res->raddr;
+ cm_event.local_addr.sin_port = res->lport;
+ cm_event.remote_addr.sin_port = res->rport;
+ if (status == 0) {
+ cm_event.private_data_len =
+ be32_to_cpu(res->private_data_length);
+ cm_event.private_data = res->private_data;
+ } else {
+ spin_lock_irqsave(&qp->lock, flags);
+ if (qp->cm_id) {
+ qp->cm_id->rem_ref(qp->cm_id);
+ qp->cm_id = NULL;
+ }
+ spin_unlock_irqrestore(&qp->lock, flags);
+ cm_event.private_data_len = 0;
+ cm_event.private_data = NULL;
+ }
+ if (cm_id->event_handler)
+ cm_id->event_handler(cm_id, &cm_event);
+ break;
+ case CCAE_TERMINATE_MESSAGE_RECEIVED:
+ case CCAE_CQ_SQ_COMPLETION_OVERFLOW:
+ ib_event.device = &c2dev->ibdev;
+ ib_event.element.qp = &qp->ibqp;
+ ib_event.event = IB_EVENT_QP_REQ_ERR;
+
+ if (qp->ibqp.event_handler)
+ qp->ibqp.event_handler(&ib_event,
+ qp->ibqp.
+ qp_context);
+ break;
+ case CCAE_BAD_CLOSE:
+ case CCAE_LLP_CLOSE_COMPLETE:
+ case CCAE_LLP_CONNECTION_RESET:
+ case CCAE_LLP_CONNECTION_LOST:
+ BUG_ON(cm_id->event_handler==(void*)0x6b6b6b6b);
+
+ spin_lock_irqsave(&qp->lock, flags);
+ if (qp->cm_id) {
+ qp->cm_id->rem_ref(qp->cm_id);
+ qp->cm_id = NULL;
+ }
+ spin_unlock_irqrestore(&qp->lock, flags);
+ cm_event.event = IW_CM_EVENT_CLOSE;
+ cm_event.status = 0;
+ if (cm_id->event_handler)
+ cm_id->event_handler(cm_id, &cm_event);
+ break;
+ default:
+ BUG_ON(1);
+ pr_debug("%s:%d Unexpected event_id=%d on QP=%p, "
+ "CM_ID=%p\n",
+ __func__, __LINE__,
+ event_id, qp, cm_id);
+ break;
+ }
+ break;
+ }
+
+ case C2_RES_IND_EP:{
+
+ struct c2wr_ae_connection_request *req =
+ &wr->ae.ae_connection_request;
+ struct iw_cm_id *cm_id =
+ (struct iw_cm_id *)resource_user_context;
+
+ pr_debug("C2_RES_IND_EP event_id=%d\n", event_id);
+ if (event_id != CCAE_CONNECTION_REQUEST) {
+ pr_debug("%s: Invalid event_id: %d\n",
+ __func__, event_id);
+ break;
+ }
+ cm_event.event = IW_CM_EVENT_CONNECT_REQUEST;
+ cm_event.provider_data = (void*)(unsigned long)req->cr_handle;
+ cm_event.local_addr.sin_addr.s_addr = req->laddr;
+ cm_event.remote_addr.sin_addr.s_addr = req->raddr;
+ cm_event.local_addr.sin_port = req->lport;
+ cm_event.remote_addr.sin_port = req->rport;
+ cm_event.private_data_len =
+ be32_to_cpu(req->private_data_length);
+ cm_event.private_data = req->private_data;
+
+ if (cm_id->event_handler)
+ cm_id->event_handler(cm_id, &cm_event);
+ break;
+ }
+
+ case C2_RES_IND_CQ:{
+ struct c2_cq *cq =
+ (struct c2_cq *) resource_user_context;
+
+ pr_debug("IB_EVENT_CQ_ERR\n");
+ ib_event.device = &c2dev->ibdev;
+ ib_event.element.cq = &cq->ibcq;
+ ib_event.event = IB_EVENT_CQ_ERR;
+
+ if (cq->ibcq.event_handler)
+ cq->ibcq.event_handler(&ib_event,
+ cq->ibcq.cq_context);
+ }
+
+ default:
+ printk("Bad resource indicator = %d\n",
+ resource_indicator);
+ break;
+ }
+
+ ignore_it:
+ c2_mq_free(mq);
+}
diff --git a/drivers/infiniband/hw/amso1100/c2_ae.h b/drivers/infiniband/hw/amso1100/c2_ae.h
new file mode 100644
index 0000000..3a065c3
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_ae.h
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _C2_AE_H_
+#define _C2_AE_H_
+
+/*
+ * WARNING: If you change this file, also bump C2_IVN_BASE
+ * in common/include/clustercore/c2_ivn.h.
+ */
+
+/*
+ * Asynchronous Event Identifiers
+ *
+ * These start at 0x80 only so it's obvious from inspection that
+ * they are not work-request statuses. This isn't critical.
+ *
+ * NOTE: these event id's must fit in eight bits.
+ */
+enum c2_event_id {
+ CCAE_REMOTE_SHUTDOWN = 0x80,
+ CCAE_ACTIVE_CONNECT_RESULTS,
+ CCAE_CONNECTION_REQUEST,
+ CCAE_LLP_CLOSE_COMPLETE,
+ CCAE_TERMINATE_MESSAGE_RECEIVED,
+ CCAE_LLP_CONNECTION_RESET,
+ CCAE_LLP_CONNECTION_LOST,
+ CCAE_LLP_SEGMENT_SIZE_INVALID,
+ CCAE_LLP_INVALID_CRC,
+ CCAE_LLP_BAD_FPDU,
+ CCAE_INVALID_DDP_VERSION,
+ CCAE_INVALID_RDMA_VERSION,
+ CCAE_UNEXPECTED_OPCODE,
+ CCAE_INVALID_DDP_QUEUE_NUMBER,
+ CCAE_RDMA_READ_NOT_ENABLED,
+ CCAE_RDMA_WRITE_NOT_ENABLED,
+ CCAE_RDMA_READ_TOO_SMALL,
+ CCAE_NO_L_BIT,
+ CCAE_TAGGED_INVALID_STAG,
+ CCAE_TAGGED_BASE_BOUNDS_VIOLATION,
+ CCAE_TAGGED_ACCESS_RIGHTS_VIOLATION,
+ CCAE_TAGGED_INVALID_PD,
+ CCAE_WRAP_ERROR,
+ CCAE_BAD_CLOSE,
+ CCAE_BAD_LLP_CLOSE,
+ CCAE_INVALID_MSN_RANGE,
+ CCAE_INVALID_MSN_GAP,
+ CCAE_IRRQ_OVERFLOW,
+ CCAE_IRRQ_MSN_GAP,
+ CCAE_IRRQ_MSN_RANGE,
+ CCAE_IRRQ_INVALID_STAG,
+ CCAE_IRRQ_BASE_BOUNDS_VIOLATION,
+ CCAE_IRRQ_ACCESS_RIGHTS_VIOLATION,
+ CCAE_IRRQ_INVALID_PD,
+ CCAE_IRRQ_WRAP_ERROR,
+ CCAE_CQ_SQ_COMPLETION_OVERFLOW,
+ CCAE_CQ_RQ_COMPLETION_ERROR,
+ CCAE_QP_SRQ_WQE_ERROR,
+ CCAE_QP_LOCAL_CATASTROPHIC_ERROR,
+ CCAE_CQ_OVERFLOW,
+ CCAE_CQ_OPERATION_ERROR,
+ CCAE_SRQ_LIMIT_REACHED,
+ CCAE_QP_RQ_LIMIT_REACHED,
+ CCAE_SRQ_CATASTROPHIC_ERROR,
+ CCAE_RNIC_CATASTROPHIC_ERROR
+/* WARNING If you add more id's, make sure their values fit in eight bits. */
+};
+
+/*
+ * Resource Indicators and Identifiers
+ */
+enum c2_resource_indicator {
+ C2_RES_IND_QP = 1,
+ C2_RES_IND_EP,
+ C2_RES_IND_CQ,
+ C2_RES_IND_SRQ,
+};
+
+#endif /* _C2_AE_H_ */
diff --git a/drivers/infiniband/hw/amso1100/c2_alloc.c b/drivers/infiniband/hw/amso1100/c2_alloc.c
new file mode 100644
index 0000000..e911016
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_alloc.c
@@ -0,0 +1,143 @@
+/*
+ * Copyright (c) 2004 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/bitmap.h>
+
+#include "c2.h"
+
+static int c2_alloc_mqsp_chunk(struct c2_dev *c2dev, gfp_t gfp_mask,
+ struct sp_chunk **head)
+{
+ int i;
+ struct sp_chunk *new_head;
+ dma_addr_t dma_addr;
+
+ new_head = dma_alloc_coherent(&c2dev->pcidev->dev, PAGE_SIZE,
+ &dma_addr, gfp_mask);
+ if (new_head == NULL)
+ return -ENOMEM;
+
+ new_head->dma_addr = dma_addr;
+ pci_unmap_addr_set(new_head, mapping, new_head->dma_addr);
+
+ new_head->next = NULL;
+ new_head->head = 0;
+
+ /* build list where each index is the next free slot */
+ for (i = 0;
+ i < (PAGE_SIZE - sizeof(struct sp_chunk) -
+ sizeof(u16)) / sizeof(u16) - 1;
+ i++) {
+ new_head->shared_ptr[i] = i + 1;
+ }
+ /* terminate list */
+ new_head->shared_ptr[i] = 0xFFFF;
+
+ *head = new_head;
+ return 0;
+}
+
+int c2_init_mqsp_pool(struct c2_dev *c2dev, gfp_t gfp_mask,
+ struct sp_chunk **root)
+{
+ return c2_alloc_mqsp_chunk(c2dev, gfp_mask, root);
+}
+
+void c2_free_mqsp_pool(struct c2_dev *c2dev, struct sp_chunk *root)
+{
+ struct sp_chunk *next;
+
+ while (root) {
+ next = root->next;
+ dma_free_coherent(&c2dev->pcidev->dev, PAGE_SIZE, root,
+ pci_unmap_addr(root, mapping));
+ root = next;
+ }
+}
+
+__be16 *c2_alloc_mqsp(struct c2_dev *c2dev, struct sp_chunk *head,
+ dma_addr_t *dma_addr, gfp_t gfp_mask)
+{
+ u16 mqsp;
+
+ while (head) {
+ mqsp = head->head;
+ if (mqsp != 0xFFFF) {
+ head->head = head->shared_ptr[mqsp];
+ break;
+ } else if (head->next == NULL) {
+ if (c2_alloc_mqsp_chunk(c2dev, gfp_mask, &head->next) ==
+ 0) {
+ head = head->next;
+ mqsp = head->head;
+ head->head = head->shared_ptr[mqsp];
+ break;
+ } else
+ return NULL;
+ } else
+ head = head->next;
+ }
+ if (head) {
+ *dma_addr = head->dma_addr +
+ ((unsigned long) &(head->shared_ptr[mqsp]) -
+ (unsigned long) head);
+ pr_debug("%s addr %p dma_addr %llx\n", __func__,
+ &(head->shared_ptr[mqsp]), (unsigned long long) *dma_addr);
+ return (__force __be16 *) &(head->shared_ptr[mqsp]);
+ }
+ return NULL;
+}
+
+void c2_free_mqsp(__be16 *mqsp)
+{
+ struct sp_chunk *head;
+ u16 idx;
+
+ /* The chunk containing this ptr begins at the page boundary */
+ head = (struct sp_chunk *) ((unsigned long) mqsp & PAGE_MASK);
+
+ /* Link head to new mqsp */
+ *mqsp = (__force __be16) head->head;
+
+ /* Compute the shared_ptr index */
+ idx = ((unsigned long) mqsp & ~PAGE_MASK) >> 1;
+ idx -= (unsigned long) &(((struct sp_chunk *) 0)->shared_ptr[0]) >> 1;
+
+ /* Point this index at the head */
+ head->shared_ptr[idx] = head->head;
+
+ /* Point head at this index */
+ head->head = idx;
+}
diff --git a/drivers/infiniband/hw/amso1100/c2_cm.c b/drivers/infiniband/hw/amso1100/c2_cm.c
new file mode 100644
index 0000000..75b93e9
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_cm.c
@@ -0,0 +1,451 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+#include "c2.h"
+#include "c2_wr.h"
+#include "c2_vq.h"
+#include <rdma/iw_cm.h>
+
+int c2_llp_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
+{
+ struct c2_dev *c2dev = to_c2dev(cm_id->device);
+ struct ib_qp *ibqp;
+ struct c2_qp *qp;
+ struct c2wr_qp_connect_req *wr; /* variable size needs a malloc. */
+ struct c2_vq_req *vq_req;
+ int err;
+
+ ibqp = c2_get_qp(cm_id->device, iw_param->qpn);
+ if (!ibqp)
+ return -EINVAL;
+ qp = to_c2qp(ibqp);
+
+ /* Associate QP <--> CM_ID */
+ cm_id->provider_data = qp;
+ cm_id->add_ref(cm_id);
+ qp->cm_id = cm_id;
+
+ /*
+ * only support the max private_data length
+ */
+ if (iw_param->private_data_len > C2_MAX_PRIVATE_DATA_SIZE) {
+ err = -EINVAL;
+ goto bail0;
+ }
+ /*
+ * Set the rdma read limits
+ */
+ err = c2_qp_set_read_limits(c2dev, qp, iw_param->ord, iw_param->ird);
+ if (err)
+ goto bail0;
+
+ /*
+ * Create and send a WR_QP_CONNECT...
+ */
+ wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
+ if (!wr) {
+ err = -ENOMEM;
+ goto bail0;
+ }
+
+ vq_req = vq_req_alloc(c2dev);
+ if (!vq_req) {
+ err = -ENOMEM;
+ goto bail1;
+ }
+
+ c2_wr_set_id(wr, CCWR_QP_CONNECT);
+ wr->hdr.context = 0;
+ wr->rnic_handle = c2dev->adapter_handle;
+ wr->qp_handle = qp->adapter_handle;
+
+ wr->remote_addr = cm_id->remote_addr.sin_addr.s_addr;
+ wr->remote_port = cm_id->remote_addr.sin_port;
+
+ /*
+ * Move any private data from the callers's buf into
+ * the WR.
+ */
+ if (iw_param->private_data) {
+ wr->private_data_length =
+ cpu_to_be32(iw_param->private_data_len);
+ memcpy(&wr->private_data[0], iw_param->private_data,
+ iw_param->private_data_len);
+ } else
+ wr->private_data_length = 0;
+
+ /*
+ * Send WR to adapter. NOTE: There is no synch reply from
+ * the adapter.
+ */
+ err = vq_send_wr(c2dev, (union c2wr *) wr);
+ vq_req_free(c2dev, vq_req);
+
+ bail1:
+ kfree(wr);
+ bail0:
+ if (err) {
+ /*
+ * If we fail, release reference on QP and
+ * disassociate QP from CM_ID
+ */
+ cm_id->provider_data = NULL;
+ qp->cm_id = NULL;
+ cm_id->rem_ref(cm_id);
+ }
+ return err;
+}
+
+int c2_llp_service_create(struct iw_cm_id *cm_id, int backlog)
+{
+ struct c2_dev *c2dev;
+ struct c2wr_ep_listen_create_req wr;
+ struct c2wr_ep_listen_create_rep *reply;
+ struct c2_vq_req *vq_req;
+ int err;
+
+ c2dev = to_c2dev(cm_id->device);
+ if (c2dev == NULL)
+ return -EINVAL;
+
+ /*
+ * Allocate verbs request.
+ */
+ vq_req = vq_req_alloc(c2dev);
+ if (!vq_req)
+ return -ENOMEM;
+
+ /*
+ * Build the WR
+ */
+ c2_wr_set_id(&wr, CCWR_EP_LISTEN_CREATE);
+ wr.hdr.context = (u64) (unsigned long) vq_req;
+ wr.rnic_handle = c2dev->adapter_handle;
+ wr.local_addr = cm_id->local_addr.sin_addr.s_addr;
+ wr.local_port = cm_id->local_addr.sin_port;
+ wr.backlog = cpu_to_be32(backlog);
+ wr.user_context = (u64) (unsigned long) cm_id;
+
+ /*
+ * Reference the request struct. Dereferenced in the int handler.
+ */
+ vq_req_get(c2dev, vq_req);
+
+ /*
+ * Send WR to adapter
+ */
+ err = vq_send_wr(c2dev, (union c2wr *) & wr);
+ if (err) {
+ vq_req_put(c2dev, vq_req);
+ goto bail0;
+ }
+
+ /*
+ * Wait for reply from adapter
+ */
+ err = vq_wait_for_reply(c2dev, vq_req);
+ if (err)
+ goto bail0;
+
+ /*
+ * Process reply
+ */
+ reply =
+ (struct c2wr_ep_listen_create_rep *) (unsigned long) vq_req->reply_msg;
+ if (!reply) {
+ err = -ENOMEM;
+ goto bail1;
+ }
+
+ if ((err = c2_errno(reply)) != 0)
+ goto bail1;
+
+ /*
+ * Keep the adapter handle. Used in subsequent destroy
+ */
+ cm_id->provider_data = (void*)(unsigned long) reply->ep_handle;
+
+ /*
+ * free vq stuff
+ */
+ vq_repbuf_free(c2dev, reply);
+ vq_req_free(c2dev, vq_req);
+
+ return 0;
+
+ bail1:
+ vq_repbuf_free(c2dev, reply);
+ bail0:
+ vq_req_free(c2dev, vq_req);
+ return err;
+}
+
+
+int c2_llp_service_destroy(struct iw_cm_id *cm_id)
+{
+
+ struct c2_dev *c2dev;
+ struct c2wr_ep_listen_destroy_req wr;
+ struct c2wr_ep_listen_destroy_rep *reply;
+ struct c2_vq_req *vq_req;
+ int err;
+
+ c2dev = to_c2dev(cm_id->device);
+ if (c2dev == NULL)
+ return -EINVAL;
+
+ /*
+ * Allocate verbs request.
+ */
+ vq_req = vq_req_alloc(c2dev);
+ if (!vq_req)
+ return -ENOMEM;
+
+ /*
+ * Build the WR
+ */
+ c2_wr_set_id(&wr, CCWR_EP_LISTEN_DESTROY);
+ wr.hdr.context = (unsigned long) vq_req;
+ wr.rnic_handle = c2dev->adapter_handle;
+ wr.ep_handle = (u32)(unsigned long)cm_id->provider_data;
+
+ /*
+ * reference the request struct. dereferenced in the int handler.
+ */
+ vq_req_get(c2dev, vq_req);
+
+ /*
+ * Send WR to adapter
+ */
+ err = vq_send_wr(c2dev, (union c2wr *) & wr);
+ if (err) {
+ vq_req_put(c2dev, vq_req);
+ goto bail0;
+ }
+
+ /*
+ * Wait for reply from adapter
+ */
+ err = vq_wait_for_reply(c2dev, vq_req);
+ if (err)
+ goto bail0;
+
+ /*
+ * Process reply
+ */
+ reply=(struct c2wr_ep_listen_destroy_rep *)(unsigned long)vq_req->reply_msg;
+ if (!reply) {
+ err = -ENOMEM;
+ goto bail0;
+ }
+ if ((err = c2_errno(reply)) != 0)
+ goto bail1;
+
+ bail1:
+ vq_repbuf_free(c2dev, reply);
+ bail0:
+ vq_req_free(c2dev, vq_req);
+ return err;
+}
+
+int c2_llp_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
+{
+ struct c2_dev *c2dev = to_c2dev(cm_id->device);
+ struct c2_qp *qp;
+ struct ib_qp *ibqp;
+ struct c2wr_cr_accept_req *wr; /* variable length WR */
+ struct c2_vq_req *vq_req;
+ struct c2wr_cr_accept_rep *reply; /* VQ Reply msg ptr. */
+ int err;
+
+ ibqp = c2_get_qp(cm_id->device, iw_param->qpn);
+ if (!ibqp)
+ return -EINVAL;
+ qp = to_c2qp(ibqp);
+
+ /* Set the RDMA read limits */
+ err = c2_qp_set_read_limits(c2dev, qp, iw_param->ord, iw_param->ird);
+ if (err)
+ goto bail0;
+
+ /* Allocate verbs request. */
+ vq_req = vq_req_alloc(c2dev);
+ if (!vq_req) {
+ err = -ENOMEM;
+ goto bail0;
+ }
+ vq_req->qp = qp;
+ vq_req->cm_id = cm_id;
+ vq_req->event = IW_CM_EVENT_ESTABLISHED;
+
+ wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
+ if (!wr) {
+ err = -ENOMEM;
+ goto bail1;
+ }
+
+ /* Build the WR */
+ c2_wr_set_id(wr, CCWR_CR_ACCEPT);
+ wr->hdr.context = (unsigned long) vq_req;
+ wr->rnic_handle = c2dev->adapter_handle;
+ wr->ep_handle = (u32) (unsigned long) cm_id->provider_data;
+ wr->qp_handle = qp->adapter_handle;
+
+ /* Replace the cr_handle with the QP after accept */
+ cm_id->provider_data = qp;
+ cm_id->add_ref(cm_id);
+ qp->cm_id = cm_id;
+
+ cm_id->provider_data = qp;
+
+ /* Validate private_data length */
+ if (iw_param->private_data_len > C2_MAX_PRIVATE_DATA_SIZE) {
+ err = -EINVAL;
+ goto bail1;
+ }
+
+ if (iw_param->private_data) {
+ wr->private_data_length = cpu_to_be32(iw_param->private_data_len);
+ memcpy(&wr->private_data[0],
+ iw_param->private_data, iw_param->private_data_len);
+ } else
+ wr->private_data_length = 0;
+
+ /* Reference the request struct. Dereferenced in the int handler. */
+ vq_req_get(c2dev, vq_req);
+
+ /* Send WR to adapter */
+ err = vq_send_wr(c2dev, (union c2wr *) wr);
+ if (err) {
+ vq_req_put(c2dev, vq_req);
+ goto bail1;
+ }
+
+ /* Wait for reply from adapter */
+ err = vq_wait_for_reply(c2dev, vq_req);
+ if (err)
+ goto bail1;
+
+ /* Check that reply is present */
+ reply = (struct c2wr_cr_accept_rep *) (unsigned long) vq_req->reply_msg;
+ if (!reply) {
+ err = -ENOMEM;
+ goto bail1;
+ }
+
+ err = c2_errno(reply);
+ vq_repbuf_free(c2dev, reply);
+
+ if (!err)
+ c2_set_qp_state(qp, C2_QP_STATE_RTS);
+ bail1:
+ kfree(wr);
+ vq_req_free(c2dev, vq_req);
+ bail0:
+ if (err) {
+ /*
+ * If we fail, release reference on QP and
+ * disassociate QP from CM_ID
+ */
+ cm_id->provider_data = NULL;
+ qp->cm_id = NULL;
+ cm_id->rem_ref(cm_id);
+ }
+ return err;
+}
+
+int c2_llp_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
+{
+ struct c2_dev *c2dev;
+ struct c2wr_cr_reject_req wr;
+ struct c2_vq_req *vq_req;
+ struct c2wr_cr_reject_rep *reply;
+ int err;
+
+ c2dev = to_c2dev(cm_id->device);
+
+ /*
+ * Allocate verbs request.
+ */
+ vq_req = vq_req_alloc(c2dev);
+ if (!vq_req)
+ return -ENOMEM;
+
+ /*
+ * Build the WR
+ */
+ c2_wr_set_id(&wr, CCWR_CR_REJECT);
+ wr.hdr.context = (unsigned long) vq_req;
+ wr.rnic_handle = c2dev->adapter_handle;
+ wr.ep_handle = (u32) (unsigned long) cm_id->provider_data;
+
+ /*
+ * reference the request struct. dereferenced in the int handler.
+ */
+ vq_req_get(c2dev, vq_req);
+
+ /*
+ * Send WR to adapter
+ */
+ err = vq_send_wr(c2dev, (union c2wr *) & wr);
+ if (err) {
+ vq_req_put(c2dev, vq_req);
+ goto bail0;
+ }
+
+ /*
+ * Wait for reply from adapter
+ */
+ err = vq_wait_for_reply(c2dev, vq_req);
+ if (err)
+ goto bail0;
+
+ /*
+ * Process reply
+ */
+ reply = (struct c2wr_cr_reject_rep *) (unsigned long)
+ vq_req->reply_msg;
+ if (!reply) {
+ err = -ENOMEM;
+ goto bail0;
+ }
+ err = c2_errno(reply);
+ /*
+ * free vq stuff
+ */
+ vq_repbuf_free(c2dev, reply);
+
+ bail0:
+ vq_req_free(c2dev, vq_req);
+ return err;
+}
diff --git a/drivers/infiniband/hw/amso1100/c2_cq.c b/drivers/infiniband/hw/amso1100/c2_cq.c
new file mode 100644
index 0000000..bb17cce
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_cq.c
@@ -0,0 +1,435 @@
+/*
+ * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright (c) 2005 Cisco Systems, Inc. All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+#include "c2.h"
+#include "c2_vq.h"
+#include "c2_status.h"
+
+#define C2_CQ_MSG_SIZE ((sizeof(struct c2wr_ce) + 32-1) & ~(32-1))
+
+static struct c2_cq *c2_cq_get(struct c2_dev *c2dev, int cqn)
+{
+ struct c2_cq *cq;
+ unsigned long flags;
+
+ spin_lock_irqsave(&c2dev->lock, flags);
+ cq = c2dev->qptr_array[cqn];
+ if (!cq) {
+ spin_unlock_irqrestore(&c2dev->lock, flags);
+ return NULL;
+ }
+ atomic_inc(&cq->refcount);
+ spin_unlock_irqrestore(&c2dev->lock, flags);
+ return cq;
+}
+
+static void c2_cq_put(struct c2_cq *cq)
+{
+ if (atomic_dec_and_test(&cq->refcount))
+ wake_up(&cq->wait);
+}
+
+void c2_cq_event(struct c2_dev *c2dev, u32 mq_index)
+{
+ struct c2_cq *cq;
+
+ cq = c2_cq_get(c2dev, mq_index);
+ if (!cq) {
+ printk("discarding events on destroyed CQN=%d\n", mq_index);
+ return;
+ }
+
+ (*cq->ibcq.comp_handler) (&cq->ibcq, cq->ibcq.cq_context);
+ c2_cq_put(cq);
+}
+
+void c2_cq_clean(struct c2_dev *c2dev, struct c2_qp *qp, u32 mq_index)
+{
+ struct c2_cq *cq;
+ struct c2_mq *q;
+
+ cq = c2_cq_get(c2dev, mq_index);
+ if (!cq)
+ return;
+
+ spin_lock_irq(&cq->lock);
+ q = &cq->mq;
+ if (q && !c2_mq_empty(q)) {
+ u16 priv = q->priv;
+ struct c2wr_ce *msg;
+
+ while (priv != be16_to_cpu(*q->shared)) {
+ msg = (struct c2wr_ce *)
+ (q->msg_pool.host + priv * q->msg_size);
+ if (msg->qp_user_context == (u64) (unsigned long) qp) {
+ msg->qp_user_context = (u64) 0;
+ }
+ priv = (priv + 1) % q->q_size;
+ }
+ }
+ spin_unlock_irq(&cq->lock);
+ c2_cq_put(cq);
+}
+
+static inline enum ib_wc_status c2_cqe_status_to_openib(u8 status)
+{
+ switch (status) {
+ case C2_OK:
+ return IB_WC_SUCCESS;
+ case CCERR_FLUSHED:
+ return IB_WC_WR_FLUSH_ERR;
+ case CCERR_BASE_AND_BOUNDS_VIOLATION:
+ return IB_WC_LOC_PROT_ERR;
+ case CCERR_ACCESS_VIOLATION:
+ return IB_WC_LOC_ACCESS_ERR;
+ case CCERR_TOTAL_LENGTH_TOO_BIG:
+ return IB_WC_LOC_LEN_ERR;
+ case CCERR_INVALID_WINDOW:
+ return IB_WC_MW_BIND_ERR;
+ default:
+ return IB_WC_GENERAL_ERR;
+ }
+}
+
+
+static inline int c2_poll_one(struct c2_dev *c2dev,
+ struct c2_cq *cq, struct ib_wc *entry)
+{
+ struct c2wr_ce *ce;
+ struct c2_qp *qp;
+ int is_recv = 0;
+
+ ce = (struct c2wr_ce *) c2_mq_consume(&cq->mq);
+ if (!ce) {
+ return -EAGAIN;
+ }
+
+ /*
+ * if the qp returned is null then this qp has already
+ * been freed and we are unable process the completion.
+ * try pulling the next message
+ */
+ while ((qp =
+ (struct c2_qp *) (unsigned long) ce->qp_user_context) == NULL) {
+ c2_mq_free(&cq->mq);
+ ce = (struct c2wr_ce *) c2_mq_consume(&cq->mq);
+ if (!ce)
+ return -EAGAIN;
+ }
+
+ entry->status = c2_cqe_status_to_openib(c2_wr_get_result(ce));
+ entry->wr_id = ce->hdr.context;
+ entry->qp = &qp->ibqp;
+ entry->wc_flags = 0;
+ entry->slid = 0;
+ entry->sl = 0;
+ entry->src_qp = 0;
+ entry->dlid_path_bits = 0;
+ entry->pkey_index = 0;
+
+ switch (c2_wr_get_id(ce)) {
+ case C2_WR_TYPE_SEND:
+ entry->opcode = IB_WC_SEND;
+ break;
+ case C2_WR_TYPE_RDMA_WRITE:
+ entry->opcode = IB_WC_RDMA_WRITE;
+ break;
+ case C2_WR_TYPE_RDMA_READ:
+ entry->opcode = IB_WC_RDMA_READ;
+ break;
+ case C2_WR_TYPE_BIND_MW:
+ entry->opcode = IB_WC_BIND_MW;
+ break;
+ case C2_WR_TYPE_RECV:
+ entry->byte_len = be32_to_cpu(ce->bytes_rcvd);
+ entry->opcode = IB_WC_RECV;
+ is_recv = 1;
+ break;
+ default:
+ break;
+ }
+
+ /* consume the WQEs */
+ if (is_recv)
+ c2_mq_lconsume(&qp->rq_mq, 1);
+ else
+ c2_mq_lconsume(&qp->sq_mq,
+ be32_to_cpu(c2_wr_get_wqe_count(ce)) + 1);
+
+ /* free the message */
+ c2_mq_free(&cq->mq);
+
+ return 0;
+}
+
+int c2_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
+{
+ struct c2_dev *c2dev = to_c2dev(ibcq->device);
+ struct c2_cq *cq = to_c2cq(ibcq);
+ unsigned long flags;
+ int npolled, err;
+
+ spin_lock_irqsave(&cq->lock, flags);
+
+ for (npolled = 0; npolled < num_entries; ++npolled) {
+
+ err = c2_poll_one(c2dev, cq, entry + npolled);
+ if (err)
+ break;
+ }
+
+ spin_unlock_irqrestore(&cq->lock, flags);
+
+ return npolled;
+}
+
+int c2_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags)
+{
+ struct c2_mq_shared __iomem *shared;
+ struct c2_cq *cq;
+ unsigned long flags;
+ int ret = 0;
+
+ cq = to_c2cq(ibcq);
+ shared = cq->mq.peer;
+
+ if ((notify_flags & IB_CQ_SOLICITED_MASK) == IB_CQ_NEXT_COMP)
+ writeb(C2_CQ_NOTIFICATION_TYPE_NEXT, &shared->notification_type);
+ else if ((notify_flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED)
+ writeb(C2_CQ_NOTIFICATION_TYPE_NEXT_SE, &shared->notification_type);
+ else
+ return -EINVAL;
+
+ writeb(CQ_WAIT_FOR_DMA | CQ_ARMED, &shared->armed);
+
+ /*
+ * Now read back shared->armed to make the PCI
+ * write synchronous. This is necessary for
+ * correct cq notification semantics.
+ */
+ readb(&shared->armed);
+
+ if (notify_flags & IB_CQ_REPORT_MISSED_EVENTS) {
+ spin_lock_irqsave(&cq->lock, flags);
+ ret = !c2_mq_empty(&cq->mq);
+ spin_unlock_irqrestore(&cq->lock, flags);
+ }
+
+ return ret;
+}
+
+static void c2_free_cq_buf(struct c2_dev *c2dev, struct c2_mq *mq)
+{
+ dma_free_coherent(&c2dev->pcidev->dev, mq->q_size * mq->msg_size,
+ mq->msg_pool.host, pci_unmap_addr(mq, mapping));
+}
+
+static int c2_alloc_cq_buf(struct c2_dev *c2dev, struct c2_mq *mq, int q_size,
+ int msg_size)
+{
+ u8 *pool_start;
+
+ pool_start = dma_alloc_coherent(&c2dev->pcidev->dev, q_size * msg_size,
+ &mq->host_dma, GFP_KERNEL);
+ if (!pool_start)
+ return -ENOMEM;
+
+ c2_mq_rep_init(mq,
+ 0, /* index (currently unknown) */
+ q_size,
+ msg_size,
+ pool_start,
+ NULL, /* peer (currently unknown) */
+ C2_MQ_HOST_TARGET);
+
+ pci_unmap_addr_set(mq, mapping, mq->host_dma);
+
+ return 0;
+}
+
+int c2_init_cq(struct c2_dev *c2dev, int entries,
+ struct c2_ucontext *ctx, struct c2_cq *cq)
+{
+ struct c2wr_cq_create_req wr;
+ struct c2wr_cq_create_rep *reply;
+ unsigned long peer_pa;
+ struct c2_vq_req *vq_req;
+ int err;
+
+ might_sleep();
+
+ cq->ibcq.cqe = entries - 1;
+ cq->is_kernel = !ctx;
+
+ /* Allocate a shared pointer */
+ cq->mq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
+ &cq->mq.shared_dma, GFP_KERNEL);
+ if (!cq->mq.shared)
+ return -ENOMEM;
+
+ /* Allocate pages for the message pool */
+ err = c2_alloc_cq_buf(c2dev, &cq->mq, entries + 1, C2_CQ_MSG_SIZE);
+ if (err)
+ goto bail0;
+
+ vq_req = vq_req_alloc(c2dev);
+ if (!vq_req) {
+ err = -ENOMEM;
+ goto bail1;
+ }
+
+ memset(&wr, 0, sizeof(wr));
+ c2_wr_set_id(&wr, CCWR_CQ_CREATE);
+ wr.hdr.context = (unsigned long) vq_req;
+ wr.rnic_handle = c2dev->adapter_handle;
+ wr.msg_size = cpu_to_be32(cq->mq.msg_size);
+ wr.depth = cpu_to_be32(cq->mq.q_size);
+ wr.shared_ht = cpu_to_be64(cq->mq.shared_dma);
+ wr.msg_pool = cpu_to_be64(cq->mq.host_dma);
+ wr.user_context = (u64) (unsigned long) (cq);
+
+ vq_req_get(c2dev, vq_req);
+
+ err = vq_send_wr(c2dev, (union c2wr *) & wr);
+ if (err) {
+ vq_req_put(c2dev, vq_req);
+ goto bail2;
+ }
+
+ err = vq_wait_for_reply(c2dev, vq_req);
+ if (err)
+ goto bail2;
+
+ reply = (struct c2wr_cq_create_rep *) (unsigned long) (vq_req->reply_msg);
+ if (!reply) {
+ err = -ENOMEM;
+ goto bail2;
+ }
+
+ if ((err = c2_errno(reply)) != 0)
+ goto bail3;
+
+ cq->adapter_handle = reply->cq_handle;
+ cq->mq.index = be32_to_cpu(reply->mq_index);
+
+ peer_pa = c2dev->pa + be32_to_cpu(reply->adapter_shared);
+ cq->mq.peer = ioremap_nocache(peer_pa, PAGE_SIZE);
+ if (!cq->mq.peer) {
+ err = -ENOMEM;
+ goto bail3;
+ }
+
+ vq_repbuf_free(c2dev, reply);
+ vq_req_free(c2dev, vq_req);
+
+ spin_lock_init(&cq->lock);
+ atomic_set(&cq->refcount, 1);
+ init_waitqueue_head(&cq->wait);
+
+ /*
+ * Use the MQ index allocated by the adapter to
+ * store the CQ in the qptr_array
+ */
+ cq->cqn = cq->mq.index;
+ c2dev->qptr_array[cq->cqn] = cq;
+
+ return 0;
+
+ bail3:
+ vq_repbuf_free(c2dev, reply);
+ bail2:
+ vq_req_free(c2dev, vq_req);
+ bail1:
+ c2_free_cq_buf(c2dev, &cq->mq);
+ bail0:
+ c2_free_mqsp(cq->mq.shared);
+
+ return err;
+}
+
+void c2_free_cq(struct c2_dev *c2dev, struct c2_cq *cq)
+{
+ int err;
+ struct c2_vq_req *vq_req;
+ struct c2wr_cq_destroy_req wr;
+ struct c2wr_cq_destroy_rep *reply;
+
+ might_sleep();
+
+ /* Clear CQ from the qptr array */
+ spin_lock_irq(&c2dev->lock);
+ c2dev->qptr_array[cq->mq.index] = NULL;
+ atomic_dec(&cq->refcount);
+ spin_unlock_irq(&c2dev->lock);
+
+ wait_event(cq->wait, !atomic_read(&cq->refcount));
+
+ vq_req = vq_req_alloc(c2dev);
+ if (!vq_req) {
+ goto bail0;
+ }
+
+ memset(&wr, 0, sizeof(wr));
+ c2_wr_set_id(&wr, CCWR_CQ_DESTROY);
+ wr.hdr.context = (unsigned long) vq_req;
+ wr.rnic_handle = c2dev->adapter_handle;
+ wr.cq_handle = cq->adapter_handle;
+
+ vq_req_get(c2dev, vq_req);
+
+ err = vq_send_wr(c2dev, (union c2wr *) & wr);
+ if (err) {
+ vq_req_put(c2dev, vq_req);
+ goto bail1;
+ }
+
+ err = vq_wait_for_reply(c2dev, vq_req);
+ if (err)
+ goto bail1;
+
+ reply = (struct c2wr_cq_destroy_rep *) (unsigned long) (vq_req->reply_msg);
+ if (reply)
+ vq_repbuf_free(c2dev, reply);
+ bail1:
+ vq_req_free(c2dev, vq_req);
+ bail0:
+ if (cq->is_kernel) {
+ c2_free_cq_buf(c2dev, &cq->mq);
+ }
+
+ return;
+}
diff --git a/drivers/infiniband/hw/amso1100/c2_intr.c b/drivers/infiniband/hw/amso1100/c2_intr.c
new file mode 100644
index 0000000..3b50954
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_intr.c
@@ -0,0 +1,213 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "c2.h"
+#include <rdma/iw_cm.h>
+#include "c2_vq.h"
+
+static void handle_mq(struct c2_dev *c2dev, u32 index);
+static void handle_vq(struct c2_dev *c2dev, u32 mq_index);
+
+/*
+ * Handle RNIC interrupts
+ */
+void c2_rnic_interrupt(struct c2_dev *c2dev)
+{
+ unsigned int mq_index;
+
+ while (c2dev->hints_read != be16_to_cpu(*c2dev->hint_count)) {
+ mq_index = readl(c2dev->regs + PCI_BAR0_HOST_HINT);
+ if (mq_index & 0x80000000) {
+ break;
+ }
+
+ c2dev->hints_read++;
+ handle_mq(c2dev, mq_index);
+ }
+
+}
+
+/*
+ * Top level MQ handler
+ */
+static void handle_mq(struct c2_dev *c2dev, u32 mq_index)
+{
+ if (c2dev->qptr_array[mq_index] == NULL) {
+ pr_debug(KERN_INFO "handle_mq: stray activity for mq_index=%d\n",
+ mq_index);
+ return;
+ }
+
+ switch (mq_index) {
+ case (0):
+ /*
+ * An index of 0 in the activity queue
+ * indicates the req vq now has messages
+ * available...
+ *
+ * Wake up any waiters waiting on req VQ
+ * message availability.
+ */
+ wake_up(&c2dev->req_vq_wo);
+ break;
+ case (1):
+ handle_vq(c2dev, mq_index);
+ break;
+ case (2):
+ /* We have to purge the VQ in case there are pending
+ * accept reply requests that would result in the
+ * generation of an ESTABLISHED event. If we don't
+ * generate these first, a CLOSE event could end up
+ * being delivered before the ESTABLISHED event.
+ */
+ handle_vq(c2dev, 1);
+
+ c2_ae_event(c2dev, mq_index);
+ break;
+ default:
+ /* There is no event synchronization between CQ events
+ * and AE or CM events. In fact, CQE could be
+ * delivered for all of the I/O up to and including the
+ * FLUSH for a peer disconenct prior to the ESTABLISHED
+ * event being delivered to the app. The reason for this
+ * is that CM events are delivered on a thread, while AE
+ * and CM events are delivered on interrupt context.
+ */
+ c2_cq_event(c2dev, mq_index);
+ break;
+ }
+
+ return;
+}
+
+/*
+ * Handles verbs WR replies.
+ */
+static void handle_vq(struct c2_dev *c2dev, u32 mq_index)
+{
+ void *adapter_msg, *reply_msg;
+ struct c2wr_hdr *host_msg;
+ struct c2wr_hdr tmp;
+ struct c2_mq *reply_vq;
+ struct c2_vq_req *req;
+ struct iw_cm_event cm_event;
+ int err;
+
+ reply_vq = (struct c2_mq *) c2dev->qptr_array[mq_index];
+
+ /*
+ * get next msg from mq_index into adapter_msg.
+ * don't free it yet.
+ */
+ adapter_msg = c2_mq_consume(reply_vq);
+ if (adapter_msg == NULL) {
+ return;
+ }
+
+ host_msg = vq_repbuf_alloc(c2dev);
+
+ /*
+ * If we can't get a host buffer, then we'll still
+ * wakeup the waiter, we just won't give him the msg.
+ * It is assumed the waiter will deal with this...
+ */
+ if (!host_msg) {
+ pr_debug("handle_vq: no repbufs!\n");
+
+ /*
+ * just copy the WR header into a local variable.
+ * this allows us to still demux on the context
+ */
+ host_msg = &tmp;
+ memcpy(host_msg, adapter_msg, sizeof(tmp));
+ reply_msg = NULL;
+ } else {
+ memcpy(host_msg, adapter_msg, reply_vq->msg_size);
+ reply_msg = host_msg;
+ }
+
+ /*
+ * consume the msg from the MQ
+ */
+ c2_mq_free(reply_vq);
+
+ /*
+ * wakeup the waiter.
+ */
+ req = (struct c2_vq_req *) (unsigned long) host_msg->context;
+ if (req == NULL) {
+ /*
+ * We should never get here, as the adapter should
+ * never send us a reply that we're not expecting.
+ */
+ vq_repbuf_free(c2dev, host_msg);
+ pr_debug("handle_vq: UNEXPECTEDLY got NULL req\n");
+ return;
+ }
+
+ if (reply_msg)
+ err = c2_errno(reply_msg);
+ else
+ err = -ENOMEM;
+
+ if (!err) switch (req->event) {
+ case IW_CM_EVENT_ESTABLISHED:
+ c2_set_qp_state(req->qp,
+ C2_QP_STATE_RTS);
+ case IW_CM_EVENT_CLOSE:
+
+ /*
+ * Move the QP to RTS if this is
+ * the established event
+ */
+ cm_event.event = req->event;
+ cm_event.status = 0;
+ cm_event.local_addr = req->cm_id->local_addr;
+ cm_event.remote_addr = req->cm_id->remote_addr;
+ cm_event.private_data = NULL;
+ cm_event.private_data_len = 0;
+ req->cm_id->event_handler(req->cm_id, &cm_event);
+ break;
+ default:
+ break;
+ }
+
+ req->reply_msg = (u64) (unsigned long) (reply_msg);
+ atomic_set(&req->reply_ready, 1);
+ wake_up(&req->wait_object);
+
+ /*
+ * If the request was cancelled, then this put will
+ * free the vq_req memory...and reply_msg!!!
+ */
+ vq_req_put(c2dev, req);
+}
diff --git a/drivers/infiniband/hw/amso1100/c2_mm.c b/drivers/infiniband/hw/amso1100/c2_mm.c
new file mode 100644
index 0000000..b506fe2
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_mm.c
@@ -0,0 +1,375 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "c2.h"
+#include "c2_vq.h"
+
+#define PBL_VIRT 1
+#define PBL_PHYS 2
+
+/*
+ * Send all the PBL messages to convey the remainder of the PBL
+ * Wait for the adapter's reply on the last one.
+ * This is indicated by setting the MEM_PBL_COMPLETE in the flags.
+ *
+ * NOTE: vq_req is _not_ freed by this function. The VQ Host
+ * Reply buffer _is_ freed by this function.
+ */
+static int
+send_pbl_messages(struct c2_dev *c2dev, __be32 stag_index,
+ unsigned long va, u32 pbl_depth,
+ struct c2_vq_req *vq_req, int pbl_type)
+{
+ u32 pbe_count; /* amt that fits in a PBL msg */
+ u32 count; /* amt in this PBL MSG. */
+ struct c2wr_nsmr_pbl_req *wr; /* PBL WR ptr */
+ struct c2wr_nsmr_pbl_rep *reply; /* reply ptr */
+ int err, pbl_virt, pbl_index, i;
+
+ switch (pbl_type) {
+ case PBL_VIRT:
+ pbl_virt = 1;
+ break;
+ case PBL_PHYS:
+ pbl_virt = 0;
+ break;
+ default:
+ return -EINVAL;
+ break;
+ }
+
+ pbe_count = (c2dev->req_vq.msg_size -
+ sizeof(struct c2wr_nsmr_pbl_req)) / sizeof(u64);
+ wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
+ if (!wr) {
+ return -ENOMEM;
+ }
+ c2_wr_set_id(wr, CCWR_NSMR_PBL);
+
+ /*
+ * Only the last PBL message will generate a reply from the verbs,
+ * so we set the context to 0 indicating there is no kernel verbs
+ * handler blocked awaiting this reply.
+ */
+ wr->hdr.context = 0;
+ wr->rnic_handle = c2dev->adapter_handle;
+ wr->stag_index = stag_index; /* already swapped */
+ wr->flags = 0;
+ pbl_index = 0;
+ while (pbl_depth) {
+ count = min(pbe_count, pbl_depth);
+ wr->addrs_length = cpu_to_be32(count);
+
+ /*
+ * If this is the last message, then reference the
+ * vq request struct cuz we're gonna wait for a reply.
+ * also make this PBL msg as the last one.
+ */
+ if (count == pbl_depth) {
+ /*
+ * reference the request struct. dereferenced in the
+ * int handler.
+ */
+ vq_req_get(c2dev, vq_req);
+ wr->flags = cpu_to_be32(MEM_PBL_COMPLETE);
+
+ /*
+ * This is the last PBL message.
+ * Set the context to our VQ Request Object so we can
+ * wait for the reply.
+ */
+ wr->hdr.context = (unsigned long) vq_req;
+ }
+
+ /*
+ * If pbl_virt is set then va is a virtual address
+ * that describes a virtually contiguous memory
+ * allocation. The wr needs the start of each virtual page
+ * to be converted to the corresponding physical address
+ * of the page. If pbl_virt is not set then va is an array
+ * of physical addresses and there is no conversion to do.
+ * Just fill in the wr with what is in the array.
+ */
+ for (i = 0; i < count; i++) {
+ if (pbl_virt) {
+ va += PAGE_SIZE;
+ } else {
+ wr->paddrs[i] =
+ cpu_to_be64(((u64 *)va)[pbl_index + i]);
+ }
+ }
+
+ /*
+ * Send WR to adapter
+ */
+ err = vq_send_wr(c2dev, (union c2wr *) wr);
+ if (err) {
+ if (count <= pbe_count) {
+ vq_req_put(c2dev, vq_req);
+ }
+ goto bail0;
+ }
+ pbl_depth -= count;
+ pbl_index += count;
+ }
+
+ /*
+ * Now wait for the reply...
+ */
+ err = vq_wait_for_reply(c2dev, vq_req);
+ if (err) {
+ goto bail0;
+ }
+
+ /*
+ * Process reply
+ */
+ reply = (struct c2wr_nsmr_pbl_rep *) (unsigned long) vq_req->reply_msg;
+ if (!reply) {
+ err = -ENOMEM;
+ goto bail0;
+ }
+
+ err = c2_errno(reply);
+
+ vq_repbuf_free(c2dev, reply);
+ bail0:
+ kfree(wr);
+ return err;
+}
+
+#define C2_PBL_MAX_DEPTH 131072
+int
+c2_nsmr_register_phys_kern(struct c2_dev *c2dev, u64 *addr_list,
+ int page_size, int pbl_depth, u32 length,
+ u32 offset, u64 *va, enum c2_acf acf,
+ struct c2_mr *mr)
+{
+ struct c2_vq_req *vq_req;
+ struct c2wr_nsmr_register_req *wr;
+ struct c2wr_nsmr_register_rep *reply;
+ u16 flags;
+ int i, pbe_count, count;
+ int err;
+
+ if (!va || !length || !addr_list || !pbl_depth)
+ return -EINTR;
+
+ /*
+ * Verify PBL depth is within rnic max
+ */
+ if (pbl_depth > C2_PBL_MAX_DEPTH) {
+ return -EINTR;
+ }
+
+ /*
+ * allocate verbs request object
+ */
+ vq_req = vq_req_alloc(c2dev);
+ if (!vq_req)
+ return -ENOMEM;
+
+ wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
+ if (!wr) {
+ err = -ENOMEM;
+ goto bail0;
+ }
+
+ /*
+ * build the WR
+ */
+ c2_wr_set_id(wr, CCWR_NSMR_REGISTER);
+ wr->hdr.context = (unsigned long) vq_req;
+ wr->rnic_handle = c2dev->adapter_handle;
+
+ flags = (acf | MEM_VA_BASED | MEM_REMOTE);
+
+ /*
+ * compute how many pbes can fit in the message
+ */
+ pbe_count = (c2dev->req_vq.msg_size -
+ sizeof(struct c2wr_nsmr_register_req)) / sizeof(u64);
+
+ if (pbl_depth <= pbe_count) {
+ flags |= MEM_PBL_COMPLETE;
+ }
+ wr->flags = cpu_to_be16(flags);
+ wr->stag_key = 0; //stag_key;
+ wr->va = cpu_to_be64(*va);
+ wr->pd_id = mr->pd->pd_id;
+ wr->pbe_size = cpu_to_be32(page_size);
+ wr->length = cpu_to_be32(length);
+ wr->pbl_depth = cpu_to_be32(pbl_depth);
+ wr->fbo = cpu_to_be32(offset);
+ count = min(pbl_depth, pbe_count);
+ wr->addrs_length = cpu_to_be32(count);
+
+ /*
+ * fill out the PBL for this message
+ */
+ for (i = 0; i < count; i++) {
+ wr->paddrs[i] = cpu_to_be64(addr_list[i]);
+ }
+
+ /*
+ * regerence the request struct
+ */
+ vq_req_get(c2dev, vq_req);
+
+ /*
+ * send the WR to the adapter
+ */
+ err = vq_send_wr(c2dev, (union c2wr *) wr);
+ if (err) {
+ vq_req_put(c2dev, vq_req);
+ goto bail1;
+ }
+
+ /*
+ * wait for reply from adapter
+ */
+ err = vq_wait_for_reply(c2dev, vq_req);
+ if (err) {
+ goto bail1;
+ }
+
+ /*
+ * process reply
+ */
+ reply =
+ (struct c2wr_nsmr_register_rep *) (unsigned long) (vq_req->reply_msg);
+ if (!reply) {
+ err = -ENOMEM;
+ goto bail1;
+ }
+ if ((err = c2_errno(reply))) {
+ goto bail2;
+ }
+ //*p_pb_entries = be32_to_cpu(reply->pbl_depth);
+ mr->ibmr.lkey = mr->ibmr.rkey = be32_to_cpu(reply->stag_index);
+ vq_repbuf_free(c2dev, reply);
+
+ /*
+ * if there are still more PBEs we need to send them to
+ * the adapter and wait for a reply on the final one.
+ * reuse vq_req for this purpose.
+ */
+ pbl_depth -= count;
+ if (pbl_depth) {
+
+ vq_req->reply_msg = (unsigned long) NULL;
+ atomic_set(&vq_req->reply_ready, 0);
+ err = send_pbl_messages(c2dev,
+ cpu_to_be32(mr->ibmr.lkey),
+ (unsigned long) &addr_list[i],
+ pbl_depth, vq_req, PBL_PHYS);
+ if (err) {
+ goto bail1;
+ }
+ }
+
+ vq_req_free(c2dev, vq_req);
+ kfree(wr);
+
+ return err;
+
+ bail2:
+ vq_repbuf_free(c2dev, reply);
+ bail1:
+ kfree(wr);
+ bail0:
+ vq_req_free(c2dev, vq_req);
+ return err;
+}
+
+int c2_stag_dealloc(struct c2_dev *c2dev, u32 stag_index)
+{
+ struct c2_vq_req *vq_req; /* verbs request object */
+ struct c2wr_stag_dealloc_req wr; /* work request */
+ struct c2wr_stag_dealloc_rep *reply; /* WR reply */
+ int err;
+
+
+ /*
+ * allocate verbs request object
+ */
+ vq_req = vq_req_alloc(c2dev);
+ if (!vq_req) {
+ return -ENOMEM;
+ }
+
+ /*
+ * Build the WR
+ */
+ c2_wr_set_id(&wr, CCWR_STAG_DEALLOC);
+ wr.hdr.context = (u64) (unsigned long) vq_req;
+ wr.rnic_handle = c2dev->adapter_handle;
+ wr.stag_index = cpu_to_be32(stag_index);
+
+ /*
+ * reference the request struct. dereferenced in the int handler.
+ */
+ vq_req_get(c2dev, vq_req);
+
+ /*
+ * Send WR to adapter
+ */
+ err = vq_send_wr(c2dev, (union c2wr *) & wr);
+ if (err) {
+ vq_req_put(c2dev, vq_req);
+ goto bail0;
+ }
+
+ /*
+ * Wait for reply from adapter
+ */
+ err = vq_wait_for_reply(c2dev, vq_req);
+ if (err) {
+ goto bail0;
+ }
+
+ /*
+ * Process reply
+ */
+ reply = (struct c2wr_stag_dealloc_rep *) (unsigned long) vq_req->reply_msg;
+ if (!reply) {
+ err = -ENOMEM;
+ goto bail0;
+ }
+
+ err = c2_errno(reply);
+
+ vq_repbuf_free(c2dev, reply);
+ bail0:
+ vq_req_free(c2dev, vq_req);
+ return err;
+}
diff --git a/drivers/infiniband/hw/amso1100/c2_mq.c b/drivers/infiniband/hw/amso1100/c2_mq.c
new file mode 100644
index 0000000..0cddc49
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_mq.c
@@ -0,0 +1,174 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "c2.h"
+#include "c2_mq.h"
+
+void *c2_mq_alloc(struct c2_mq *q)
+{
+ BUG_ON(q->magic != C2_MQ_MAGIC);
+ BUG_ON(q->type != C2_MQ_ADAPTER_TARGET);
+
+ if (c2_mq_full(q)) {
+ return NULL;
+ } else {
+#ifdef DEBUG
+ struct c2wr_hdr *m =
+ (struct c2wr_hdr *) (q->msg_pool.host + q->priv * q->msg_size);
+#ifdef CCMSGMAGIC
+ BUG_ON(m->magic != be32_to_cpu(~CCWR_MAGIC));
+ m->magic = cpu_to_be32(CCWR_MAGIC);
+#endif
+ return m;
+#else
+ return q->msg_pool.host + q->priv * q->msg_size;
+#endif
+ }
+}
+
+void c2_mq_produce(struct c2_mq *q)
+{
+ BUG_ON(q->magic != C2_MQ_MAGIC);
+ BUG_ON(q->type != C2_MQ_ADAPTER_TARGET);
+
+ if (!c2_mq_full(q)) {
+ q->priv = (q->priv + 1) % q->q_size;
+ q->hint_count++;
+ /* Update peer's offset. */
+ __raw_writew((__force u16) cpu_to_be16(q->priv), &q->peer->shared);
+ }
+}
+
+void *c2_mq_consume(struct c2_mq *q)
+{
+ BUG_ON(q->magic != C2_MQ_MAGIC);
+ BUG_ON(q->type != C2_MQ_HOST_TARGET);
+
+ if (c2_mq_empty(q)) {
+ return NULL;
+ } else {
+#ifdef DEBUG
+ struct c2wr_hdr *m = (struct c2wr_hdr *)
+ (q->msg_pool.host + q->priv * q->msg_size);
+#ifdef CCMSGMAGIC
+ BUG_ON(m->magic != be32_to_cpu(CCWR_MAGIC));
+#endif
+ return m;
+#else
+ return q->msg_pool.host + q->priv * q->msg_size;
+#endif
+ }
+}
+
+void c2_mq_free(struct c2_mq *q)
+{
+ BUG_ON(q->magic != C2_MQ_MAGIC);
+ BUG_ON(q->type != C2_MQ_HOST_TARGET);
+
+ if (!c2_mq_empty(q)) {
+
+#ifdef CCMSGMAGIC
+ {
+ struct c2wr_hdr __iomem *m = (struct c2wr_hdr __iomem *)
+ (q->msg_pool.adapter + q->priv * q->msg_size);
+ __raw_writel(cpu_to_be32(~CCWR_MAGIC), &m->magic);
+ }
+#endif
+ q->priv = (q->priv + 1) % q->q_size;
+ /* Update peer's offset. */
+ __raw_writew((__force u16) cpu_to_be16(q->priv), &q->peer->shared);
+ }
+}
+
+
+void c2_mq_lconsume(struct c2_mq *q, u32 wqe_count)
+{
+ BUG_ON(q->magic != C2_MQ_MAGIC);
+ BUG_ON(q->type != C2_MQ_ADAPTER_TARGET);
+
+ while (wqe_count--) {
+ BUG_ON(c2_mq_empty(q));
+ *q->shared = cpu_to_be16((be16_to_cpu(*q->shared)+1) % q->q_size);
+ }
+}
+
+#if 0
+u32 c2_mq_count(struct c2_mq *q)
+{
+ s32 count;
+
+ if (q->type == C2_MQ_HOST_TARGET)
+ count = be16_to_cpu(*q->shared) - q->priv;
+ else
+ count = q->priv - be16_to_cpu(*q->shared);
+
+ if (count < 0)
+ count += q->q_size;
+
+ return (u32) count;
+}
+#endif /* 0 */
+
+void c2_mq_req_init(struct c2_mq *q, u32 index, u32 q_size, u32 msg_size,
+ u8 __iomem *pool_start, u16 __iomem *peer, u32 type)
+{
+ BUG_ON(!q->shared);
+
+ /* This code assumes the byte swapping has already been done! */
+ q->index = index;
+ q->q_size = q_size;
+ q->msg_size = msg_size;
+ q->msg_pool.adapter = pool_start;
+ q->peer = (struct c2_mq_shared __iomem *) peer;
+ q->magic = C2_MQ_MAGIC;
+ q->type = type;
+ q->priv = 0;
+ q->hint_count = 0;
+ return;
+}
+void c2_mq_rep_init(struct c2_mq *q, u32 index, u32 q_size, u32 msg_size,
+ u8 *pool_start, u16 __iomem *peer, u32 type)
+{
+ BUG_ON(!q->shared);
+
+ /* This code assumes the byte swapping has already been done! */
+ q->index = index;
+ q->q_size = q_size;
+ q->msg_size = msg_size;
+ q->msg_pool.host = pool_start;
+ q->peer = (struct c2_mq_shared __iomem *) peer;
+ q->magic = C2_MQ_MAGIC;
+ q->type = type;
+ q->priv = 0;
+ q->hint_count = 0;
+ return;
+}
diff --git a/drivers/infiniband/hw/amso1100/c2_mq.h b/drivers/infiniband/hw/amso1100/c2_mq.h
new file mode 100644
index 0000000..acede00
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_mq.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _C2_MQ_H_
+#define _C2_MQ_H_
+#include <linux/kernel.h>
+#include <linux/dma-mapping.h>
+#include "c2_wr.h"
+
+enum c2_shared_regs {
+
+ C2_SHARED_ARMED = 0x10,
+ C2_SHARED_NOTIFY = 0x18,
+ C2_SHARED_SHARED = 0x40,
+};
+
+struct c2_mq_shared {
+ u16 unused1;
+ u8 armed;
+ u8 notification_type;
+ u32 unused2;
+ u16 shared;
+ /* Pad to 64 bytes. */
+ u8 pad[64 - sizeof(u16) - 2 * sizeof(u8) - sizeof(u32) - sizeof(u16)];
+};
+
+enum c2_mq_type {
+ C2_MQ_HOST_TARGET = 1,
+ C2_MQ_ADAPTER_TARGET = 2,
+};
+
+/*
+ * c2_mq_t is for kernel-mode MQs like the VQs Cand the AEQ.
+ * c2_user_mq_t (which is the same format) is for user-mode MQs...
+ */
+#define C2_MQ_MAGIC 0x4d512020 /* 'MQ ' */
+struct c2_mq {
+ u32 magic;
+ union {
+ u8 *host;
+ u8 __iomem *adapter;
+ } msg_pool;
+ dma_addr_t host_dma;
+ DECLARE_PCI_UNMAP_ADDR(mapping);
+ u16 hint_count;
+ u16 priv;
+ struct c2_mq_shared __iomem *peer;
+ __be16 *shared;
+ dma_addr_t shared_dma;
+ u32 q_size;
+ u32 msg_size;
+ u32 index;
+ enum c2_mq_type type;
+};
+
+static __inline__ int c2_mq_empty(struct c2_mq *q)
+{
+ return q->priv == be16_to_cpu(*q->shared);
+}
+
+static __inline__ int c2_mq_full(struct c2_mq *q)
+{
+ return q->priv == (be16_to_cpu(*q->shared) + q->q_size - 1) % q->q_size;
+}
+
+extern void c2_mq_lconsume(struct c2_mq *q, u32 wqe_count);
+extern void *c2_mq_alloc(struct c2_mq *q);
+extern void c2_mq_produce(struct c2_mq *q);
+extern void *c2_mq_consume(struct c2_mq *q);
+extern void c2_mq_free(struct c2_mq *q);
+extern void c2_mq_req_init(struct c2_mq *q, u32 index, u32 q_size, u32 msg_size,
+ u8 __iomem *pool_start, u16 __iomem *peer, u32 type);
+extern void c2_mq_rep_init(struct c2_mq *q, u32 index, u32 q_size, u32 msg_size,
+ u8 *pool_start, u16 __iomem *peer, u32 type);
+
+#endif /* _C2_MQ_H_ */
diff --git a/drivers/infiniband/hw/amso1100/c2_pd.c b/drivers/infiniband/hw/amso1100/c2_pd.c
new file mode 100644
index 0000000..00c7099
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_pd.c
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2004 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Cisco Systems. All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/init.h>
+#include <linux/errno.h>
+
+#include "c2.h"
+#include "c2_provider.h"
+
+int c2_pd_alloc(struct c2_dev *c2dev, int privileged, struct c2_pd *pd)
+{
+ u32 obj;
+ int ret = 0;
+
+ spin_lock(&c2dev->pd_table.lock);
+ obj = find_next_zero_bit(c2dev->pd_table.table, c2dev->pd_table.max,
+ c2dev->pd_table.last);
+ if (obj >= c2dev->pd_table.max)
+ obj = find_first_zero_bit(c2dev->pd_table.table,
+ c2dev->pd_table.max);
+ if (obj < c2dev->pd_table.max) {
+ pd->pd_id = obj;
+ __set_bit(obj, c2dev->pd_table.table);
+ c2dev->pd_table.last = obj+1;
+ if (c2dev->pd_table.last >= c2dev->pd_table.max)
+ c2dev->pd_table.last = 0;
+ } else
+ ret = -ENOMEM;
+ spin_unlock(&c2dev->pd_table.lock);
+ return ret;
+}
+
+void c2_pd_free(struct c2_dev *c2dev, struct c2_pd *pd)
+{
+ spin_lock(&c2dev->pd_table.lock);
+ __clear_bit(pd->pd_id, c2dev->pd_table.table);
+ spin_unlock(&c2dev->pd_table.lock);
+}
+
+int __devinit c2_init_pd_table(struct c2_dev *c2dev)
+{
+
+ c2dev->pd_table.last = 0;
+ c2dev->pd_table.max = c2dev->props.max_pd;
+ spin_lock_init(&c2dev->pd_table.lock);
+ c2dev->pd_table.table = kmalloc(BITS_TO_LONGS(c2dev->props.max_pd) *
+ sizeof(long), GFP_KERNEL);
+ if (!c2dev->pd_table.table)
+ return -ENOMEM;
+ bitmap_zero(c2dev->pd_table.table, c2dev->props.max_pd);
+ return 0;
+}
+
+void __devexit c2_cleanup_pd_table(struct c2_dev *c2dev)
+{
+ kfree(c2dev->pd_table.table);
+}
diff --git a/drivers/infiniband/hw/amso1100/c2_provider.c b/drivers/infiniband/hw/amso1100/c2_provider.c
new file mode 100644
index 0000000..69580e2
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_provider.c
@@ -0,0 +1,892 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/delay.h>
+#include <linux/ethtool.h>
+#include <linux/mii.h>
+#include <linux/if_vlan.h>
+#include <linux/crc32.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/init.h>
+#include <linux/dma-mapping.h>
+#include <linux/if_arp.h>
+#include <linux/vmalloc.h>
+
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/byteorder.h>
+
+#include <rdma/ib_smi.h>
+#include <rdma/ib_umem.h>
+#include <rdma/ib_user_verbs.h>
+#include "c2.h"
+#include "c2_provider.h"
+#include "c2_user.h"
+
+static int c2_query_device(struct ib_device *ibdev,
+ struct ib_device_attr *props)
+{
+ struct c2_dev *c2dev = to_c2dev(ibdev);
+
+ pr_debug("%s:%u\n", __func__, __LINE__);
+
+ *props = c2dev->props;
+ return 0;
+}
+
+static int c2_query_port(struct ib_device *ibdev,
+ u8 port, struct ib_port_attr *props)
+{
+ pr_debug("%s:%u\n", __func__, __LINE__);
+
+ props->max_mtu = IB_MTU_4096;
+ props->lid = 0;
+ props->lmc = 0;
+ props->sm_lid = 0;
+ props->sm_sl = 0;
+ props->state = IB_PORT_ACTIVE;
+ props->phys_state = 0;
+ props->port_cap_flags =
+ IB_PORT_CM_SUP |
+ IB_PORT_REINIT_SUP |
+ IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP;
+ props->gid_tbl_len = 1;
+ props->pkey_tbl_len = 1;
+ props->qkey_viol_cntr = 0;
+ props->active_width = 1;
+ props->active_speed = 1;
+
+ return 0;
+}
+
+static int c2_modify_port(struct ib_device *ibdev,
+ u8 port, int port_modify_mask,
+ struct ib_port_modify *props)
+{
+ pr_debug("%s:%u\n", __func__, __LINE__);
+ return 0;
+}
+
+static int c2_query_pkey(struct ib_device *ibdev,
+ u8 port, u16 index, u16 * pkey)
+{
+ pr_debug("%s:%u\n", __func__, __LINE__);
+ *pkey = 0;
+ return 0;
+}
+
+static int c2_query_gid(struct ib_device *ibdev, u8 port,
+ int index, union ib_gid *gid)
+{
+ struct c2_dev *c2dev = to_c2dev(ibdev);
+
+ pr_debug("%s:%u\n", __func__, __LINE__);
+ memset(&(gid->raw[0]), 0, sizeof(gid->raw));
+ memcpy(&(gid->raw[0]), c2dev->pseudo_netdev->dev_addr, 6);
+
+ return 0;
+}
+
+/* Allocate the user context data structure. This keeps track
+ * of all objects associated with a particular user-mode client.
+ */
+static struct ib_ucontext *c2_alloc_ucontext(struct ib_device *ibdev,
+ struct ib_udata *udata)
+{
+ struct c2_ucontext *context;
+
+ pr_debug("%s:%u\n", __func__, __LINE__);
+ context = kmalloc(sizeof(*context), GFP_KERNEL);
+ if (!context)
+ return ERR_PTR(-ENOMEM);
+
+ return &context->ibucontext;
+}
+
+static int c2_dealloc_ucontext(struct ib_ucontext *context)
+{
+ pr_debug("%s:%u\n", __func__, __LINE__);
+ kfree(context);
+ return 0;
+}
+
+static int c2_mmap_uar(struct ib_ucontext *context, struct vm_area_struct *vma)
+{
+ pr_debug("%s:%u\n", __func__, __LINE__);
+ return -ENOSYS;
+}
+
+static struct ib_pd *c2_alloc_pd(struct ib_device *ibdev,
+ struct ib_ucontext *context,
+ struct ib_udata *udata)
+{
+ struct c2_pd *pd;
+ int err;
+
+ pr_debug("%s:%u\n", __func__, __LINE__);
+
+ pd = kmalloc(sizeof(*pd), GFP_KERNEL);
+ if (!pd)
+ return ERR_PTR(-ENOMEM);
+
+ err = c2_pd_alloc(to_c2dev(ibdev), !context, pd);
+ if (err) {
+ kfree(pd);
+ return ERR_PTR(err);
+ }
+
+ if (context) {
+ if (ib_copy_to_udata(udata, &pd->pd_id, sizeof(__u32))) {
+ c2_pd_free(to_c2dev(ibdev), pd);
+ kfree(pd);
+ return ERR_PTR(-EFAULT);
+ }
+ }
+
+ return &pd->ibpd;
+}
+
+static int c2_dealloc_pd(struct ib_pd *pd)
+{
+ pr_debug("%s:%u\n", __func__, __LINE__);
+ c2_pd_free(to_c2dev(pd->device), to_c2pd(pd));
+ kfree(pd);
+
+ return 0;
+}
+
+static struct ib_ah *c2_ah_create(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
+{
+ pr_debug("%s:%u\n", __func__, __LINE__);
+ return ERR_PTR(-ENOSYS);
+}
+
+static int c2_ah_destroy(struct ib_ah *ah)
+{
+ pr_debug("%s:%u\n", __func__, __LINE__);
+ return -ENOSYS;
+}
+
+static void c2_add_ref(struct ib_qp *ibqp)
+{
+ struct c2_qp *qp;
+ BUG_ON(!ibqp);
+ qp = to_c2qp(ibqp);
+ atomic_inc(&qp->refcount);
+}
+
+static void c2_rem_ref(struct ib_qp *ibqp)
+{
+ struct c2_qp *qp;
+ BUG_ON(!ibqp);
+ qp = to_c2qp(ibqp);
+ if (atomic_dec_and_test(&qp->refcount))
+ wake_up(&qp->wait);
+}
+
+struct ib_qp *c2_get_qp(struct ib_device *device, int qpn)
+{
+ struct c2_dev* c2dev = to_c2dev(device);
+ struct c2_qp *qp;
+
+ qp = c2_find_qpn(c2dev, qpn);
+ pr_debug("%s Returning QP=%p for QPN=%d, device=%p, refcount=%d\n",
+ __func__, qp, qpn, device,
+ (qp?atomic_read(&qp->refcount):0));
+
+ return (qp?&qp->ibqp:NULL);
+}
+
+static struct ib_qp *c2_create_qp(struct ib_pd *pd,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+ struct c2_qp *qp;
+ int err;
+
+ pr_debug("%s:%u\n", __func__, __LINE__);
+
+ if (init_attr->create_flags)
+ return ERR_PTR(-EINVAL);
+
+ switch (init_attr->qp_type) {
+ case IB_QPT_RC:
+ qp = kzalloc(sizeof(*qp), GFP_KERNEL);
+ if (!qp) {
+ pr_debug("%s: Unable to allocate QP\n", __func__);
+ return ERR_PTR(-ENOMEM);
+ }
+ spin_lock_init(&qp->lock);
+ if (pd->uobject) {
+ /* userspace specific */
+ }
+
+ err = c2_alloc_qp(to_c2dev(pd->device),
+ to_c2pd(pd), init_attr, qp);
+
+ if (err && pd->uobject) {
+ /* userspace specific */
+ }
+
+ break;
+ default:
+ pr_debug("%s: Invalid QP type: %d\n", __func__,
+ init_attr->qp_type);
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (err) {
+ kfree(qp);
+ return ERR_PTR(err);
+ }
+
+ return &qp->ibqp;
+}
+
+static int c2_destroy_qp(struct ib_qp *ib_qp)
+{
+ struct c2_qp *qp = to_c2qp(ib_qp);
+
+ pr_debug("%s:%u qp=%p,qp->state=%d\n",
+ __func__, __LINE__, ib_qp, qp->state);
+ c2_free_qp(to_c2dev(ib_qp->device), qp);
+ kfree(qp);
+ return 0;
+}
+
+static struct ib_cq *c2_create_cq(struct ib_device *ibdev, int entries, int vector,
+ struct ib_ucontext *context,
+ struct ib_udata *udata)
+{
+ struct c2_cq *cq;
+ int err;
+
+ cq = kmalloc(sizeof(*cq), GFP_KERNEL);
+ if (!cq) {
+ pr_debug("%s: Unable to allocate CQ\n", __func__);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ err = c2_init_cq(to_c2dev(ibdev), entries, NULL, cq);
+ if (err) {
+ pr_debug("%s: error initializing CQ\n", __func__);
+ kfree(cq);
+ return ERR_PTR(err);
+ }
+
+ return &cq->ibcq;
+}
+
+static int c2_destroy_cq(struct ib_cq *ib_cq)
+{
+ struct c2_cq *cq = to_c2cq(ib_cq);
+
+ pr_debug("%s:%u\n", __func__, __LINE__);
+
+ c2_free_cq(to_c2dev(ib_cq->device), cq);
+ kfree(cq);
+
+ return 0;
+}
+
+static inline u32 c2_convert_access(int acc)
+{
+ return (acc & IB_ACCESS_REMOTE_WRITE ? C2_ACF_REMOTE_WRITE : 0) |
+ (acc & IB_ACCESS_REMOTE_READ ? C2_ACF_REMOTE_READ : 0) |
+ (acc & IB_ACCESS_LOCAL_WRITE ? C2_ACF_LOCAL_WRITE : 0) |
+ C2_ACF_LOCAL_READ | C2_ACF_WINDOW_BIND;
+}
+
+static struct ib_mr *c2_reg_phys_mr(struct ib_pd *ib_pd,
+ struct ib_phys_buf *buffer_list,
+ int num_phys_buf, int acc, u64 * iova_start)
+{
+ struct c2_mr *mr;
+ u64 *page_list;
+ u32 total_len;
+ int err, i, j, k, page_shift, pbl_depth;
+
+ pbl_depth = 0;
+ total_len = 0;
+
+ page_shift = PAGE_SHIFT;
+ /*
+ * If there is only 1 buffer we assume this could
+ * be a map of all phy mem...use a 32k page_shift.
+ */
+ if (num_phys_buf == 1)
+ page_shift += 3;
+
+ for (i = 0; i < num_phys_buf; i++) {
+
+ if (buffer_list[i].addr & ~PAGE_MASK) {
+ pr_debug("Unaligned Memory Buffer: 0x%x\n",
+ (unsigned int) buffer_list[i].addr);
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (!buffer_list[i].size) {
+ pr_debug("Invalid Buffer Size\n");
+ return ERR_PTR(-EINVAL);
+ }
+
+ total_len += buffer_list[i].size;
+ pbl_depth += ALIGN(buffer_list[i].size,
+ (1 << page_shift)) >> page_shift;
+ }
+
+ page_list = vmalloc(sizeof(u64) * pbl_depth);
+ if (!page_list) {
+ pr_debug("couldn't vmalloc page_list of size %zd\n",
+ (sizeof(u64) * pbl_depth));
+ return ERR_PTR(-ENOMEM);
+ }
+
+ for (i = 0, j = 0; i < num_phys_buf; i++) {
+
+ int naddrs;
+
+ naddrs = ALIGN(buffer_list[i].size,
+ (1 << page_shift)) >> page_shift;
+ for (k = 0; k < naddrs; k++)
+ page_list[j++] = (buffer_list[i].addr +
+ (k << page_shift));
+ }
+
+ mr = kmalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr) {
+ vfree(page_list);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ mr->pd = to_c2pd(ib_pd);
+ mr->umem = NULL;
+ pr_debug("%s - page shift %d, pbl_depth %d, total_len %u, "
+ "*iova_start %llx, first pa %llx, last pa %llx\n",
+ __func__, page_shift, pbl_depth, total_len,
+ (unsigned long long) *iova_start,
+ (unsigned long long) page_list[0],
+ (unsigned long long) page_list[pbl_depth-1]);
+ err = c2_nsmr_register_phys_kern(to_c2dev(ib_pd->device), page_list,
+ (1 << page_shift), pbl_depth,
+ total_len, 0, iova_start,
+ c2_convert_access(acc), mr);
+ vfree(page_list);
+ if (err) {
+ kfree(mr);
+ return ERR_PTR(err);
+ }
+
+ return &mr->ibmr;
+}
+
+static struct ib_mr *c2_get_dma_mr(struct ib_pd *pd, int acc)
+{
+ struct ib_phys_buf bl;
+ u64 kva = 0;
+
+ pr_debug("%s:%u\n", __func__, __LINE__);
+
+ /* AMSO1100 limit */
+ bl.size = 0xffffffff;
+ bl.addr = 0;
+ return c2_reg_phys_mr(pd, &bl, 1, acc, &kva);
+}
+
+static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 virt, int acc, struct ib_udata *udata)
+{
+ u64 *pages;
+ u64 kva = 0;
+ int shift, n, len;
+ int i, j, k;
+ int err = 0;
+ struct ib_umem_chunk *chunk;
+ struct c2_pd *c2pd = to_c2pd(pd);
+ struct c2_mr *c2mr;
+
+ pr_debug("%s:%u\n", __func__, __LINE__);
+
+ c2mr = kmalloc(sizeof(*c2mr), GFP_KERNEL);
+ if (!c2mr)
+ return ERR_PTR(-ENOMEM);
+ c2mr->pd = c2pd;
+
+ c2mr->umem = ib_umem_get(pd->uobject->context, start, length, acc, 0);
+ if (IS_ERR(c2mr->umem)) {
+ err = PTR_ERR(c2mr->umem);
+ kfree(c2mr);
+ return ERR_PTR(err);
+ }
+
+ shift = ffs(c2mr->umem->page_size) - 1;
+
+ n = 0;
+ list_for_each_entry(chunk, &c2mr->umem->chunk_list, list)
+ n += chunk->nents;
+
+ pages = kmalloc(n * sizeof(u64), GFP_KERNEL);
+ if (!pages) {
+ err = -ENOMEM;
+ goto err;
+ }
+
+ i = 0;
+ list_for_each_entry(chunk, &c2mr->umem->chunk_list, list) {
+ for (j = 0; j < chunk->nmap; ++j) {
+ len = sg_dma_len(&chunk->page_list[j]) >> shift;
+ for (k = 0; k < len; ++k) {
+ pages[i++] =
+ sg_dma_address(&chunk->page_list[j]) +
+ (c2mr->umem->page_size * k);
+ }
+ }
+ }
+
+ kva = virt;
+ err = c2_nsmr_register_phys_kern(to_c2dev(pd->device),
+ pages,
+ c2mr->umem->page_size,
+ i,
+ length,
+ c2mr->umem->offset,
+ &kva,
+ c2_convert_access(acc),
+ c2mr);
+ kfree(pages);
+ if (err)
+ goto err;
+ return &c2mr->ibmr;
+
+err:
+ ib_umem_release(c2mr->umem);
+ kfree(c2mr);
+ return ERR_PTR(err);
+}
+
+static int c2_dereg_mr(struct ib_mr *ib_mr)
+{
+ struct c2_mr *mr = to_c2mr(ib_mr);
+ int err;
+
+ pr_debug("%s:%u\n", __func__, __LINE__);
+
+ err = c2_stag_dealloc(to_c2dev(ib_mr->device), ib_mr->lkey);
+ if (err)
+ pr_debug("c2_stag_dealloc failed: %d\n", err);
+ else {
+ if (mr->umem)
+ ib_umem_release(mr->umem);
+ kfree(mr);
+ }
+
+ return err;
+}
+
+static ssize_t show_rev(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct c2_dev *c2dev = container_of(dev, struct c2_dev, ibdev.dev);
+ pr_debug("%s:%u\n", __func__, __LINE__);
+ return sprintf(buf, "%x\n", c2dev->props.hw_ver);
+}
+
+static ssize_t show_fw_ver(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct c2_dev *c2dev = container_of(dev, struct c2_dev, ibdev.dev);
+ pr_debug("%s:%u\n", __func__, __LINE__);
+ return sprintf(buf, "%x.%x.%x\n",
+ (int) (c2dev->props.fw_ver >> 32),
+ (int) (c2dev->props.fw_ver >> 16) & 0xffff,
+ (int) (c2dev->props.fw_ver & 0xffff));
+}
+
+static ssize_t show_hca(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ pr_debug("%s:%u\n", __func__, __LINE__);
+ return sprintf(buf, "AMSO1100\n");
+}
+
+static ssize_t show_board(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ pr_debug("%s:%u\n", __func__, __LINE__);
+ return sprintf(buf, "%.*s\n", 32, "AMSO1100 Board ID");
+}
+
+static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
+static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
+static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
+static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
+
+static struct device_attribute *c2_dev_attributes[] = {
+ &dev_attr_hw_rev,
+ &dev_attr_fw_ver,
+ &dev_attr_hca_type,
+ &dev_attr_board_id
+};
+
+static int c2_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata)
+{
+ int err;
+
+ err =
+ c2_qp_modify(to_c2dev(ibqp->device), to_c2qp(ibqp), attr,
+ attr_mask);
+
+ return err;
+}
+
+static int c2_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
+{
+ pr_debug("%s:%u\n", __func__, __LINE__);
+ return -ENOSYS;
+}
+
+static int c2_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
+{
+ pr_debug("%s:%u\n", __func__, __LINE__);
+ return -ENOSYS;
+}
+
+static int c2_process_mad(struct ib_device *ibdev,
+ int mad_flags,
+ u8 port_num,
+ struct ib_wc *in_wc,
+ struct ib_grh *in_grh,
+ struct ib_mad *in_mad, struct ib_mad *out_mad)
+{
+ pr_debug("%s:%u\n", __func__, __LINE__);
+ return -ENOSYS;
+}
+
+static int c2_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
+{
+ pr_debug("%s:%u\n", __func__, __LINE__);
+
+ /* Request a connection */
+ return c2_llp_connect(cm_id, iw_param);
+}
+
+static int c2_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
+{
+ pr_debug("%s:%u\n", __func__, __LINE__);
+
+ /* Accept the new connection */
+ return c2_llp_accept(cm_id, iw_param);
+}
+
+static int c2_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
+{
+ int err;
+
+ pr_debug("%s:%u\n", __func__, __LINE__);
+
+ err = c2_llp_reject(cm_id, pdata, pdata_len);
+ return err;
+}
+
+static int c2_service_create(struct iw_cm_id *cm_id, int backlog)
+{
+ int err;
+
+ pr_debug("%s:%u\n", __func__, __LINE__);
+ err = c2_llp_service_create(cm_id, backlog);
+ pr_debug("%s:%u err=%d\n",
+ __func__, __LINE__,
+ err);
+ return err;
+}
+
+static int c2_service_destroy(struct iw_cm_id *cm_id)
+{
+ int err;
+ pr_debug("%s:%u\n", __func__, __LINE__);
+
+ err = c2_llp_service_destroy(cm_id);
+
+ return err;
+}
+
+static int c2_pseudo_up(struct net_device *netdev)
+{
+ struct in_device *ind;
+ struct c2_dev *c2dev = netdev->priv;
+
+ ind = in_dev_get(netdev);
+ if (!ind)
+ return 0;
+
+ pr_debug("adding...\n");
+ for_ifa(ind) {
+#ifdef DEBUG
+ u8 *ip = (u8 *) & ifa->ifa_address;
+
+ pr_debug("%s: %d.%d.%d.%d\n",
+ ifa->ifa_label, ip[0], ip[1], ip[2], ip[3]);
+#endif
+ c2_add_addr(c2dev, ifa->ifa_address, ifa->ifa_mask);
+ }
+ endfor_ifa(ind);
+ in_dev_put(ind);
+
+ return 0;
+}
+
+static int c2_pseudo_down(struct net_device *netdev)
+{
+ struct in_device *ind;
+ struct c2_dev *c2dev = netdev->priv;
+
+ ind = in_dev_get(netdev);
+ if (!ind)
+ return 0;
+
+ pr_debug("deleting...\n");
+ for_ifa(ind) {
+#ifdef DEBUG
+ u8 *ip = (u8 *) & ifa->ifa_address;
+
+ pr_debug("%s: %d.%d.%d.%d\n",
+ ifa->ifa_label, ip[0], ip[1], ip[2], ip[3]);
+#endif
+ c2_del_addr(c2dev, ifa->ifa_address, ifa->ifa_mask);
+ }
+ endfor_ifa(ind);
+ in_dev_put(ind);
+
+ return 0;
+}
+
+static int c2_pseudo_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
+{
+ kfree_skb(skb);
+ return NETDEV_TX_OK;
+}
+
+static int c2_pseudo_change_mtu(struct net_device *netdev, int new_mtu)
+{
+ int ret = 0;
+
+ if (new_mtu < ETH_ZLEN || new_mtu > ETH_JUMBO_MTU)
+ return -EINVAL;
+
+ netdev->mtu = new_mtu;
+
+ /* TODO: Tell rnic about new rmda interface mtu */
+ return ret;
+}
+
+static void setup(struct net_device *netdev)
+{
+ netdev->open = c2_pseudo_up;
+ netdev->stop = c2_pseudo_down;
+ netdev->hard_start_xmit = c2_pseudo_xmit_frame;
+ netdev->get_stats = NULL;
+ netdev->tx_timeout = NULL;
+ netdev->set_mac_address = NULL;
+ netdev->change_mtu = c2_pseudo_change_mtu;
+ netdev->watchdog_timeo = 0;
+ netdev->type = ARPHRD_ETHER;
+ netdev->mtu = 1500;
+ netdev->hard_header_len = ETH_HLEN;
+ netdev->addr_len = ETH_ALEN;
+ netdev->tx_queue_len = 0;
+ netdev->flags |= IFF_NOARP;
+ return;
+}
+
+static struct net_device *c2_pseudo_netdev_init(struct c2_dev *c2dev)
+{
+ char name[IFNAMSIZ];
+ struct net_device *netdev;
+
+ /* change ethxxx to iwxxx */
+ strcpy(name, "iw");
+ strcat(name, &c2dev->netdev->name[3]);
+ netdev = alloc_netdev(sizeof(*netdev), name, setup);
+ if (!netdev) {
+ printk(KERN_ERR PFX "%s - etherdev alloc failed",
+ __func__);
+ return NULL;
+ }
+
+ netdev->priv = c2dev;
+
+ SET_NETDEV_DEV(netdev, &c2dev->pcidev->dev);
+
+ memcpy_fromio(netdev->dev_addr, c2dev->kva + C2_REGS_RDMA_ENADDR, 6);
+
+ /* Print out the MAC address */
+ pr_debug("%s: MAC %02X:%02X:%02X:%02X:%02X:%02X\n",
+ netdev->name,
+ netdev->dev_addr[0], netdev->dev_addr[1], netdev->dev_addr[2],
+ netdev->dev_addr[3], netdev->dev_addr[4], netdev->dev_addr[5]);
+
+#if 0
+ /* Disable network packets */
+ netif_stop_queue(netdev);
+#endif
+ return netdev;
+}
+
+int c2_register_device(struct c2_dev *dev)
+{
+ int ret = -ENOMEM;
+ int i;
+
+ /* Register pseudo network device */
+ dev->pseudo_netdev = c2_pseudo_netdev_init(dev);
+ if (!dev->pseudo_netdev)
+ goto out3;
+
+ ret = register_netdev(dev->pseudo_netdev);
+ if (ret)
+ goto out2;
+
+ pr_debug("%s:%u\n", __func__, __LINE__);
+ strlcpy(dev->ibdev.name, "amso%d", IB_DEVICE_NAME_MAX);
+ dev->ibdev.owner = THIS_MODULE;
+ dev->ibdev.uverbs_cmd_mask =
+ (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
+ (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
+ (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
+ (1ull << IB_USER_VERBS_CMD_REG_MR) |
+ (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
+ (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
+ (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
+ (1ull << IB_USER_VERBS_CMD_POLL_CQ) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
+ (1ull << IB_USER_VERBS_CMD_POST_SEND) |
+ (1ull << IB_USER_VERBS_CMD_POST_RECV);
+
+ dev->ibdev.node_type = RDMA_NODE_RNIC;
+ memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid));
+ memcpy(&dev->ibdev.node_guid, dev->pseudo_netdev->dev_addr, 6);
+ dev->ibdev.phys_port_cnt = 1;
+ dev->ibdev.num_comp_vectors = 1;
+ dev->ibdev.dma_device = &dev->pcidev->dev;
+ dev->ibdev.query_device = c2_query_device;
+ dev->ibdev.query_port = c2_query_port;
+ dev->ibdev.modify_port = c2_modify_port;
+ dev->ibdev.query_pkey = c2_query_pkey;
+ dev->ibdev.query_gid = c2_query_gid;
+ dev->ibdev.alloc_ucontext = c2_alloc_ucontext;
+ dev->ibdev.dealloc_ucontext = c2_dealloc_ucontext;
+ dev->ibdev.mmap = c2_mmap_uar;
+ dev->ibdev.alloc_pd = c2_alloc_pd;
+ dev->ibdev.dealloc_pd = c2_dealloc_pd;
+ dev->ibdev.create_ah = c2_ah_create;
+ dev->ibdev.destroy_ah = c2_ah_destroy;
+ dev->ibdev.create_qp = c2_create_qp;
+ dev->ibdev.modify_qp = c2_modify_qp;
+ dev->ibdev.destroy_qp = c2_destroy_qp;
+ dev->ibdev.create_cq = c2_create_cq;
+ dev->ibdev.destroy_cq = c2_destroy_cq;
+ dev->ibdev.poll_cq = c2_poll_cq;
+ dev->ibdev.get_dma_mr = c2_get_dma_mr;
+ dev->ibdev.reg_phys_mr = c2_reg_phys_mr;
+ dev->ibdev.reg_user_mr = c2_reg_user_mr;
+ dev->ibdev.dereg_mr = c2_dereg_mr;
+
+ dev->ibdev.alloc_fmr = NULL;
+ dev->ibdev.unmap_fmr = NULL;
+ dev->ibdev.dealloc_fmr = NULL;
+ dev->ibdev.map_phys_fmr = NULL;
+
+ dev->ibdev.attach_mcast = c2_multicast_attach;
+ dev->ibdev.detach_mcast = c2_multicast_detach;
+ dev->ibdev.process_mad = c2_process_mad;
+
+ dev->ibdev.req_notify_cq = c2_arm_cq;
+ dev->ibdev.post_send = c2_post_send;
+ dev->ibdev.post_recv = c2_post_receive;
+
+ dev->ibdev.iwcm = kmalloc(sizeof(*dev->ibdev.iwcm), GFP_KERNEL);
+ dev->ibdev.iwcm->add_ref = c2_add_ref;
+ dev->ibdev.iwcm->rem_ref = c2_rem_ref;
+ dev->ibdev.iwcm->get_qp = c2_get_qp;
+ dev->ibdev.iwcm->connect = c2_connect;
+ dev->ibdev.iwcm->accept = c2_accept;
+ dev->ibdev.iwcm->reject = c2_reject;
+ dev->ibdev.iwcm->create_listen = c2_service_create;
+ dev->ibdev.iwcm->destroy_listen = c2_service_destroy;
+
+ ret = ib_register_device(&dev->ibdev);
+ if (ret)
+ goto out1;
+
+ for (i = 0; i < ARRAY_SIZE(c2_dev_attributes); ++i) {
+ ret = device_create_file(&dev->ibdev.dev,
+ c2_dev_attributes[i]);
+ if (ret)
+ goto out0;
+ }
+ goto out3;
+
+out0:
+ ib_unregister_device(&dev->ibdev);
+out1:
+ unregister_netdev(dev->pseudo_netdev);
+out2:
+ free_netdev(dev->pseudo_netdev);
+out3:
+ pr_debug("%s:%u ret=%d\n", __func__, __LINE__, ret);
+ return ret;
+}
+
+void c2_unregister_device(struct c2_dev *dev)
+{
+ pr_debug("%s:%u\n", __func__, __LINE__);
+ unregister_netdev(dev->pseudo_netdev);
+ free_netdev(dev->pseudo_netdev);
+ ib_unregister_device(&dev->ibdev);
+}
diff --git a/drivers/infiniband/hw/amso1100/c2_provider.h b/drivers/infiniband/hw/amso1100/c2_provider.h
new file mode 100644
index 0000000..1076df2
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_provider.h
@@ -0,0 +1,182 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef C2_PROVIDER_H
+#define C2_PROVIDER_H
+#include <linux/inetdevice.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_pack.h>
+
+#include "c2_mq.h"
+#include <rdma/iw_cm.h>
+
+#define C2_MPT_FLAG_ATOMIC (1 << 14)
+#define C2_MPT_FLAG_REMOTE_WRITE (1 << 13)
+#define C2_MPT_FLAG_REMOTE_READ (1 << 12)
+#define C2_MPT_FLAG_LOCAL_WRITE (1 << 11)
+#define C2_MPT_FLAG_LOCAL_READ (1 << 10)
+
+struct c2_buf_list {
+ void *buf;
+ DECLARE_PCI_UNMAP_ADDR(mapping)
+};
+
+
+/* The user context keeps track of objects allocated for a
+ * particular user-mode client. */
+struct c2_ucontext {
+ struct ib_ucontext ibucontext;
+};
+
+struct c2_mtt;
+
+/* All objects associated with a PD are kept in the
+ * associated user context if present.
+ */
+struct c2_pd {
+ struct ib_pd ibpd;
+ u32 pd_id;
+};
+
+struct c2_mr {
+ struct ib_mr ibmr;
+ struct c2_pd *pd;
+ struct ib_umem *umem;
+};
+
+struct c2_av;
+
+enum c2_ah_type {
+ C2_AH_ON_HCA,
+ C2_AH_PCI_POOL,
+ C2_AH_KMALLOC
+};
+
+struct c2_ah {
+ struct ib_ah ibah;
+};
+
+struct c2_cq {
+ struct ib_cq ibcq;
+ spinlock_t lock;
+ atomic_t refcount;
+ int cqn;
+ int is_kernel;
+ wait_queue_head_t wait;
+
+ u32 adapter_handle;
+ struct c2_mq mq;
+};
+
+struct c2_wq {
+ spinlock_t lock;
+};
+struct iw_cm_id;
+struct c2_qp {
+ struct ib_qp ibqp;
+ struct iw_cm_id *cm_id;
+ spinlock_t lock;
+ atomic_t refcount;
+ wait_queue_head_t wait;
+ int qpn;
+
+ u32 adapter_handle;
+ u32 send_sgl_depth;
+ u32 recv_sgl_depth;
+ u32 rdma_write_sgl_depth;
+ u8 state;
+
+ struct c2_mq sq_mq;
+ struct c2_mq rq_mq;
+};
+
+struct c2_cr_query_attrs {
+ u32 local_addr;
+ u32 remote_addr;
+ u16 local_port;
+ u16 remote_port;
+};
+
+static inline struct c2_pd *to_c2pd(struct ib_pd *ibpd)
+{
+ return container_of(ibpd, struct c2_pd, ibpd);
+}
+
+static inline struct c2_ucontext *to_c2ucontext(struct ib_ucontext *ibucontext)
+{
+ return container_of(ibucontext, struct c2_ucontext, ibucontext);
+}
+
+static inline struct c2_mr *to_c2mr(struct ib_mr *ibmr)
+{
+ return container_of(ibmr, struct c2_mr, ibmr);
+}
+
+
+static inline struct c2_ah *to_c2ah(struct ib_ah *ibah)
+{
+ return container_of(ibah, struct c2_ah, ibah);
+}
+
+static inline struct c2_cq *to_c2cq(struct ib_cq *ibcq)
+{
+ return container_of(ibcq, struct c2_cq, ibcq);
+}
+
+static inline struct c2_qp *to_c2qp(struct ib_qp *ibqp)
+{
+ return container_of(ibqp, struct c2_qp, ibqp);
+}
+
+static inline int is_rnic_addr(struct net_device *netdev, u32 addr)
+{
+ struct in_device *ind;
+ int ret = 0;
+
+ ind = in_dev_get(netdev);
+ if (!ind)
+ return 0;
+
+ for_ifa(ind) {
+ if (ifa->ifa_address == addr) {
+ ret = 1;
+ break;
+ }
+ }
+ endfor_ifa(ind);
+ in_dev_put(ind);
+ return ret;
+}
+#endif /* C2_PROVIDER_H */
diff --git a/drivers/infiniband/hw/amso1100/c2_qp.c b/drivers/infiniband/hw/amso1100/c2_qp.c
new file mode 100644
index 0000000..a6d8944
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_qp.c
@@ -0,0 +1,1015 @@
+/*
+ * Copyright (c) 2004 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Cisco Systems. All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/delay.h>
+
+#include "c2.h"
+#include "c2_vq.h"
+#include "c2_status.h"
+
+#define C2_MAX_ORD_PER_QP 128
+#define C2_MAX_IRD_PER_QP 128
+
+#define C2_HINT_MAKE(q_index, hint_count) (((q_index) << 16) | hint_count)
+#define C2_HINT_GET_INDEX(hint) (((hint) & 0x7FFF0000) >> 16)
+#define C2_HINT_GET_COUNT(hint) ((hint) & 0x0000FFFF)
+
+#define NO_SUPPORT -1
+static const u8 c2_opcode[] = {
+ [IB_WR_SEND] = C2_WR_TYPE_SEND,
+ [IB_WR_SEND_WITH_IMM] = NO_SUPPORT,
+ [IB_WR_RDMA_WRITE] = C2_WR_TYPE_RDMA_WRITE,
+ [IB_WR_RDMA_WRITE_WITH_IMM] = NO_SUPPORT,
+ [IB_WR_RDMA_READ] = C2_WR_TYPE_RDMA_READ,
+ [IB_WR_ATOMIC_CMP_AND_SWP] = NO_SUPPORT,
+ [IB_WR_ATOMIC_FETCH_AND_ADD] = NO_SUPPORT,
+};
+
+static int to_c2_state(enum ib_qp_state ib_state)
+{
+ switch (ib_state) {
+ case IB_QPS_RESET:
+ return C2_QP_STATE_IDLE;
+ case IB_QPS_RTS:
+ return C2_QP_STATE_RTS;
+ case IB_QPS_SQD:
+ return C2_QP_STATE_CLOSING;
+ case IB_QPS_SQE:
+ return C2_QP_STATE_CLOSING;
+ case IB_QPS_ERR:
+ return C2_QP_STATE_ERROR;
+ default:
+ return -1;
+ }
+}
+
+static int to_ib_state(enum c2_qp_state c2_state)
+{
+ switch (c2_state) {
+ case C2_QP_STATE_IDLE:
+ return IB_QPS_RESET;
+ case C2_QP_STATE_CONNECTING:
+ return IB_QPS_RTR;
+ case C2_QP_STATE_RTS:
+ return IB_QPS_RTS;
+ case C2_QP_STATE_CLOSING:
+ return IB_QPS_SQD;
+ case C2_QP_STATE_ERROR:
+ return IB_QPS_ERR;
+ case C2_QP_STATE_TERMINATE:
+ return IB_QPS_SQE;
+ default:
+ return -1;
+ }
+}
+
+static const char *to_ib_state_str(int ib_state)
+{
+ static const char *state_str[] = {
+ "IB_QPS_RESET",
+ "IB_QPS_INIT",
+ "IB_QPS_RTR",
+ "IB_QPS_RTS",
+ "IB_QPS_SQD",
+ "IB_QPS_SQE",
+ "IB_QPS_ERR"
+ };
+ if (ib_state < IB_QPS_RESET ||
+ ib_state > IB_QPS_ERR)
+ return "<invalid IB QP state>";
+
+ ib_state -= IB_QPS_RESET;
+ return state_str[ib_state];
+}
+
+void c2_set_qp_state(struct c2_qp *qp, int c2_state)
+{
+ int new_state = to_ib_state(c2_state);
+
+ pr_debug("%s: qp[%p] state modify %s --> %s\n",
+ __func__,
+ qp,
+ to_ib_state_str(qp->state),
+ to_ib_state_str(new_state));
+ qp->state = new_state;
+}
+
+#define C2_QP_NO_ATTR_CHANGE 0xFFFFFFFF
+
+int c2_qp_modify(struct c2_dev *c2dev, struct c2_qp *qp,
+ struct ib_qp_attr *attr, int attr_mask)
+{
+ struct c2wr_qp_modify_req wr;
+ struct c2wr_qp_modify_rep *reply;
+ struct c2_vq_req *vq_req;
+ unsigned long flags;
+ u8 next_state;
+ int err;
+
+ pr_debug("%s:%d qp=%p, %s --> %s\n",
+ __func__, __LINE__,
+ qp,
+ to_ib_state_str(qp->state),
+ to_ib_state_str(attr->qp_state));
+
+ vq_req = vq_req_alloc(c2dev);
+ if (!vq_req)
+ return -ENOMEM;
+
+ c2_wr_set_id(&wr, CCWR_QP_MODIFY);
+ wr.hdr.context = (unsigned long) vq_req;
+ wr.rnic_handle = c2dev->adapter_handle;
+ wr.qp_handle = qp->adapter_handle;
+ wr.ord = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
+ wr.ird = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
+ wr.sq_depth = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
+ wr.rq_depth = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
+
+ if (attr_mask & IB_QP_STATE) {
+ /* Ensure the state is valid */
+ if (attr->qp_state < 0 || attr->qp_state > IB_QPS_ERR) {
+ err = -EINVAL;
+ goto bail0;
+ }
+
+ wr.next_qp_state = cpu_to_be32(to_c2_state(attr->qp_state));
+
+ if (attr->qp_state == IB_QPS_ERR) {
+ spin_lock_irqsave(&qp->lock, flags);
+ if (qp->cm_id && qp->state == IB_QPS_RTS) {
+ pr_debug("Generating CLOSE event for QP-->ERR, "
+ "qp=%p, cm_id=%p\n",qp,qp->cm_id);
+ /* Generate an CLOSE event */
+ vq_req->cm_id = qp->cm_id;
+ vq_req->event = IW_CM_EVENT_CLOSE;
+ }
+ spin_unlock_irqrestore(&qp->lock, flags);
+ }
+ next_state = attr->qp_state;
+
+ } else if (attr_mask & IB_QP_CUR_STATE) {
+
+ if (attr->cur_qp_state != IB_QPS_RTR &&
+ attr->cur_qp_state != IB_QPS_RTS &&
+ attr->cur_qp_state != IB_QPS_SQD &&
+ attr->cur_qp_state != IB_QPS_SQE) {
+ err = -EINVAL;
+ goto bail0;
+ } else
+ wr.next_qp_state =
+ cpu_to_be32(to_c2_state(attr->cur_qp_state));
+
+ next_state = attr->cur_qp_state;
+
+ } else {
+ err = 0;
+ goto bail0;
+ }
+
+ /* reference the request struct */
+ vq_req_get(c2dev, vq_req);
+
+ err = vq_send_wr(c2dev, (union c2wr *) & wr);
+ if (err) {
+ vq_req_put(c2dev, vq_req);
+ goto bail0;
+ }
+
+ err = vq_wait_for_reply(c2dev, vq_req);
+ if (err)
+ goto bail0;
+
+ reply = (struct c2wr_qp_modify_rep *) (unsigned long) vq_req->reply_msg;
+ if (!reply) {
+ err = -ENOMEM;
+ goto bail0;
+ }
+
+ err = c2_errno(reply);
+ if (!err)
+ qp->state = next_state;
+#ifdef DEBUG
+ else
+ pr_debug("%s: c2_errno=%d\n", __func__, err);
+#endif
+ /*
+ * If we're going to error and generating the event here, then
+ * we need to remove the reference because there will be no
+ * close event generated by the adapter
+ */
+ spin_lock_irqsave(&qp->lock, flags);
+ if (vq_req->event==IW_CM_EVENT_CLOSE && qp->cm_id) {
+ qp->cm_id->rem_ref(qp->cm_id);
+ qp->cm_id = NULL;
+ }
+ spin_unlock_irqrestore(&qp->lock, flags);
+
+ vq_repbuf_free(c2dev, reply);
+ bail0:
+ vq_req_free(c2dev, vq_req);
+
+ pr_debug("%s:%d qp=%p, cur_state=%s\n",
+ __func__, __LINE__,
+ qp,
+ to_ib_state_str(qp->state));
+ return err;
+}
+
+int c2_qp_set_read_limits(struct c2_dev *c2dev, struct c2_qp *qp,
+ int ord, int ird)
+{
+ struct c2wr_qp_modify_req wr;
+ struct c2wr_qp_modify_rep *reply;
+ struct c2_vq_req *vq_req;
+ int err;
+
+ vq_req = vq_req_alloc(c2dev);
+ if (!vq_req)
+ return -ENOMEM;
+
+ c2_wr_set_id(&wr, CCWR_QP_MODIFY);
+ wr.hdr.context = (unsigned long) vq_req;
+ wr.rnic_handle = c2dev->adapter_handle;
+ wr.qp_handle = qp->adapter_handle;
+ wr.ord = cpu_to_be32(ord);
+ wr.ird = cpu_to_be32(ird);
+ wr.sq_depth = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
+ wr.rq_depth = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
+ wr.next_qp_state = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
+
+ /* reference the request struct */
+ vq_req_get(c2dev, vq_req);
+
+ err = vq_send_wr(c2dev, (union c2wr *) & wr);
+ if (err) {
+ vq_req_put(c2dev, vq_req);
+ goto bail0;
+ }
+
+ err = vq_wait_for_reply(c2dev, vq_req);
+ if (err)
+ goto bail0;
+
+ reply = (struct c2wr_qp_modify_rep *) (unsigned long)
+ vq_req->reply_msg;
+ if (!reply) {
+ err = -ENOMEM;
+ goto bail0;
+ }
+
+ err = c2_errno(reply);
+ vq_repbuf_free(c2dev, reply);
+ bail0:
+ vq_req_free(c2dev, vq_req);
+ return err;
+}
+
+static int destroy_qp(struct c2_dev *c2dev, struct c2_qp *qp)
+{
+ struct c2_vq_req *vq_req;
+ struct c2wr_qp_destroy_req wr;
+ struct c2wr_qp_destroy_rep *reply;
+ unsigned long flags;
+ int err;
+
+ /*
+ * Allocate a verb request message
+ */
+ vq_req = vq_req_alloc(c2dev);
+ if (!vq_req) {
+ return -ENOMEM;
+ }
+
+ /*
+ * Initialize the WR
+ */
+ c2_wr_set_id(&wr, CCWR_QP_DESTROY);
+ wr.hdr.context = (unsigned long) vq_req;
+ wr.rnic_handle = c2dev->adapter_handle;
+ wr.qp_handle = qp->adapter_handle;
+
+ /*
+ * reference the request struct. dereferenced in the int handler.
+ */
+ vq_req_get(c2dev, vq_req);
+
+ spin_lock_irqsave(&qp->lock, flags);
+ if (qp->cm_id && qp->state == IB_QPS_RTS) {
+ pr_debug("destroy_qp: generating CLOSE event for QP-->ERR, "
+ "qp=%p, cm_id=%p\n",qp,qp->cm_id);
+ /* Generate an CLOSE event */
+ vq_req->qp = qp;
+ vq_req->cm_id = qp->cm_id;
+ vq_req->event = IW_CM_EVENT_CLOSE;
+ }
+ spin_unlock_irqrestore(&qp->lock, flags);
+
+ /*
+ * Send WR to adapter
+ */
+ err = vq_send_wr(c2dev, (union c2wr *) & wr);
+ if (err) {
+ vq_req_put(c2dev, vq_req);
+ goto bail0;
+ }
+
+ /*
+ * Wait for reply from adapter
+ */
+ err = vq_wait_for_reply(c2dev, vq_req);
+ if (err) {
+ goto bail0;
+ }
+
+ /*
+ * Process reply
+ */
+ reply = (struct c2wr_qp_destroy_rep *) (unsigned long) (vq_req->reply_msg);
+ if (!reply) {
+ err = -ENOMEM;
+ goto bail0;
+ }
+
+ spin_lock_irqsave(&qp->lock, flags);
+ if (qp->cm_id) {
+ qp->cm_id->rem_ref(qp->cm_id);
+ qp->cm_id = NULL;
+ }
+ spin_unlock_irqrestore(&qp->lock, flags);
+
+ vq_repbuf_free(c2dev, reply);
+ bail0:
+ vq_req_free(c2dev, vq_req);
+ return err;
+}
+
+static int c2_alloc_qpn(struct c2_dev *c2dev, struct c2_qp *qp)
+{
+ int ret;
+
+ do {
+ spin_lock_irq(&c2dev->qp_table.lock);
+ ret = idr_get_new_above(&c2dev->qp_table.idr, qp,
+ c2dev->qp_table.last++, &qp->qpn);
+ spin_unlock_irq(&c2dev->qp_table.lock);
+ } while ((ret == -EAGAIN) &&
+ idr_pre_get(&c2dev->qp_table.idr, GFP_KERNEL));
+ return ret;
+}
+
+static void c2_free_qpn(struct c2_dev *c2dev, int qpn)
+{
+ spin_lock_irq(&c2dev->qp_table.lock);
+ idr_remove(&c2dev->qp_table.idr, qpn);
+ spin_unlock_irq(&c2dev->qp_table.lock);
+}
+
+struct c2_qp *c2_find_qpn(struct c2_dev *c2dev, int qpn)
+{
+ unsigned long flags;
+ struct c2_qp *qp;
+
+ spin_lock_irqsave(&c2dev->qp_table.lock, flags);
+ qp = idr_find(&c2dev->qp_table.idr, qpn);
+ spin_unlock_irqrestore(&c2dev->qp_table.lock, flags);
+ return qp;
+}
+
+int c2_alloc_qp(struct c2_dev *c2dev,
+ struct c2_pd *pd,
+ struct ib_qp_init_attr *qp_attrs, struct c2_qp *qp)
+{
+ struct c2wr_qp_create_req wr;
+ struct c2wr_qp_create_rep *reply;
+ struct c2_vq_req *vq_req;
+ struct c2_cq *send_cq = to_c2cq(qp_attrs->send_cq);
+ struct c2_cq *recv_cq = to_c2cq(qp_attrs->recv_cq);
+ unsigned long peer_pa;
+ u32 q_size, msg_size, mmap_size;
+ void __iomem *mmap;
+ int err;
+
+ err = c2_alloc_qpn(c2dev, qp);
+ if (err)
+ return err;
+ qp->ibqp.qp_num = qp->qpn;
+ qp->ibqp.qp_type = IB_QPT_RC;
+
+ /* Allocate the SQ and RQ shared pointers */
+ qp->sq_mq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
+ &qp->sq_mq.shared_dma, GFP_KERNEL);
+ if (!qp->sq_mq.shared) {
+ err = -ENOMEM;
+ goto bail0;
+ }
+
+ qp->rq_mq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
+ &qp->rq_mq.shared_dma, GFP_KERNEL);
+ if (!qp->rq_mq.shared) {
+ err = -ENOMEM;
+ goto bail1;
+ }
+
+ /* Allocate the verbs request */
+ vq_req = vq_req_alloc(c2dev);
+ if (vq_req == NULL) {
+ err = -ENOMEM;
+ goto bail2;
+ }
+
+ /* Initialize the work request */
+ memset(&wr, 0, sizeof(wr));
+ c2_wr_set_id(&wr, CCWR_QP_CREATE);
+ wr.hdr.context = (unsigned long) vq_req;
+ wr.rnic_handle = c2dev->adapter_handle;
+ wr.sq_cq_handle = send_cq->adapter_handle;
+ wr.rq_cq_handle = recv_cq->adapter_handle;
+ wr.sq_depth = cpu_to_be32(qp_attrs->cap.max_send_wr + 1);
+ wr.rq_depth = cpu_to_be32(qp_attrs->cap.max_recv_wr + 1);
+ wr.srq_handle = 0;
+ wr.flags = cpu_to_be32(QP_RDMA_READ | QP_RDMA_WRITE | QP_MW_BIND |
+ QP_ZERO_STAG | QP_RDMA_READ_RESPONSE);
+ wr.send_sgl_depth = cpu_to_be32(qp_attrs->cap.max_send_sge);
+ wr.recv_sgl_depth = cpu_to_be32(qp_attrs->cap.max_recv_sge);
+ wr.rdma_write_sgl_depth = cpu_to_be32(qp_attrs->cap.max_send_sge);
+ wr.shared_sq_ht = cpu_to_be64(qp->sq_mq.shared_dma);
+ wr.shared_rq_ht = cpu_to_be64(qp->rq_mq.shared_dma);
+ wr.ord = cpu_to_be32(C2_MAX_ORD_PER_QP);
+ wr.ird = cpu_to_be32(C2_MAX_IRD_PER_QP);
+ wr.pd_id = pd->pd_id;
+ wr.user_context = (unsigned long) qp;
+
+ vq_req_get(c2dev, vq_req);
+
+ /* Send the WR to the adapter */
+ err = vq_send_wr(c2dev, (union c2wr *) & wr);
+ if (err) {
+ vq_req_put(c2dev, vq_req);
+ goto bail3;
+ }
+
+ /* Wait for the verb reply */
+ err = vq_wait_for_reply(c2dev, vq_req);
+ if (err) {
+ goto bail3;
+ }
+
+ /* Process the reply */
+ reply = (struct c2wr_qp_create_rep *) (unsigned long) (vq_req->reply_msg);
+ if (!reply) {
+ err = -ENOMEM;
+ goto bail3;
+ }
+
+ if ((err = c2_wr_get_result(reply)) != 0) {
+ goto bail4;
+ }
+
+ /* Fill in the kernel QP struct */
+ atomic_set(&qp->refcount, 1);
+ qp->adapter_handle = reply->qp_handle;
+ qp->state = IB_QPS_RESET;
+ qp->send_sgl_depth = qp_attrs->cap.max_send_sge;
+ qp->rdma_write_sgl_depth = qp_attrs->cap.max_send_sge;
+ qp->recv_sgl_depth = qp_attrs->cap.max_recv_sge;
+ init_waitqueue_head(&qp->wait);
+
+ /* Initialize the SQ MQ */
+ q_size = be32_to_cpu(reply->sq_depth);
+ msg_size = be32_to_cpu(reply->sq_msg_size);
+ peer_pa = c2dev->pa + be32_to_cpu(reply->sq_mq_start);
+ mmap_size = PAGE_ALIGN(sizeof(struct c2_mq_shared) + msg_size * q_size);
+ mmap = ioremap_nocache(peer_pa, mmap_size);
+ if (!mmap) {
+ err = -ENOMEM;
+ goto bail5;
+ }
+
+ c2_mq_req_init(&qp->sq_mq,
+ be32_to_cpu(reply->sq_mq_index),
+ q_size,
+ msg_size,
+ mmap + sizeof(struct c2_mq_shared), /* pool start */
+ mmap, /* peer */
+ C2_MQ_ADAPTER_TARGET);
+
+ /* Initialize the RQ mq */
+ q_size = be32_to_cpu(reply->rq_depth);
+ msg_size = be32_to_cpu(reply->rq_msg_size);
+ peer_pa = c2dev->pa + be32_to_cpu(reply->rq_mq_start);
+ mmap_size = PAGE_ALIGN(sizeof(struct c2_mq_shared) + msg_size * q_size);
+ mmap = ioremap_nocache(peer_pa, mmap_size);
+ if (!mmap) {
+ err = -ENOMEM;
+ goto bail6;
+ }
+
+ c2_mq_req_init(&qp->rq_mq,
+ be32_to_cpu(reply->rq_mq_index),
+ q_size,
+ msg_size,
+ mmap + sizeof(struct c2_mq_shared), /* pool start */
+ mmap, /* peer */
+ C2_MQ_ADAPTER_TARGET);
+
+ vq_repbuf_free(c2dev, reply);
+ vq_req_free(c2dev, vq_req);
+
+ return 0;
+
+ bail6:
+ iounmap(qp->sq_mq.peer);
+ bail5:
+ destroy_qp(c2dev, qp);
+ bail4:
+ vq_repbuf_free(c2dev, reply);
+ bail3:
+ vq_req_free(c2dev, vq_req);
+ bail2:
+ c2_free_mqsp(qp->rq_mq.shared);
+ bail1:
+ c2_free_mqsp(qp->sq_mq.shared);
+ bail0:
+ c2_free_qpn(c2dev, qp->qpn);
+ return err;
+}
+
+static inline void c2_lock_cqs(struct c2_cq *send_cq, struct c2_cq *recv_cq)
+{
+ if (send_cq == recv_cq)
+ spin_lock_irq(&send_cq->lock);
+ else if (send_cq > recv_cq) {
+ spin_lock_irq(&send_cq->lock);
+ spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING);
+ } else {
+ spin_lock_irq(&recv_cq->lock);
+ spin_lock_nested(&send_cq->lock, SINGLE_DEPTH_NESTING);
+ }
+}
+
+static inline void c2_unlock_cqs(struct c2_cq *send_cq, struct c2_cq *recv_cq)
+{
+ if (send_cq == recv_cq)
+ spin_unlock_irq(&send_cq->lock);
+ else if (send_cq > recv_cq) {
+ spin_unlock(&recv_cq->lock);
+ spin_unlock_irq(&send_cq->lock);
+ } else {
+ spin_unlock(&send_cq->lock);
+ spin_unlock_irq(&recv_cq->lock);
+ }
+}
+
+void c2_free_qp(struct c2_dev *c2dev, struct c2_qp *qp)
+{
+ struct c2_cq *send_cq;
+ struct c2_cq *recv_cq;
+
+ send_cq = to_c2cq(qp->ibqp.send_cq);
+ recv_cq = to_c2cq(qp->ibqp.recv_cq);
+
+ /*
+ * Lock CQs here, so that CQ polling code can do QP lookup
+ * without taking a lock.
+ */
+ c2_lock_cqs(send_cq, recv_cq);
+ c2_free_qpn(c2dev, qp->qpn);
+ c2_unlock_cqs(send_cq, recv_cq);
+
+ /*
+ * Destory qp in the rnic...
+ */
+ destroy_qp(c2dev, qp);
+
+ /*
+ * Mark any unreaped CQEs as null and void.
+ */
+ c2_cq_clean(c2dev, qp, send_cq->cqn);
+ if (send_cq != recv_cq)
+ c2_cq_clean(c2dev, qp, recv_cq->cqn);
+ /*
+ * Unmap the MQs and return the shared pointers
+ * to the message pool.
+ */
+ iounmap(qp->sq_mq.peer);
+ iounmap(qp->rq_mq.peer);
+ c2_free_mqsp(qp->sq_mq.shared);
+ c2_free_mqsp(qp->rq_mq.shared);
+
+ atomic_dec(&qp->refcount);
+ wait_event(qp->wait, !atomic_read(&qp->refcount));
+}
+
+/*
+ * Function: move_sgl
+ *
+ * Description:
+ * Move an SGL from the user's work request struct into a CCIL Work Request
+ * message, swapping to WR byte order and ensure the total length doesn't
+ * overflow.
+ *
+ * IN:
+ * dst - ptr to CCIL Work Request message SGL memory.
+ * src - ptr to the consumers SGL memory.
+ *
+ * OUT: none
+ *
+ * Return:
+ * CCIL status codes.
+ */
+static int
+move_sgl(struct c2_data_addr * dst, struct ib_sge *src, int count, u32 * p_len,
+ u8 * actual_count)
+{
+ u32 tot = 0; /* running total */
+ u8 acount = 0; /* running total non-0 len sge's */
+
+ while (count > 0) {
+ /*
+ * If the addition of this SGE causes the
+ * total SGL length to exceed 2^32-1, then
+ * fail-n-bail.
+ *
+ * If the current total plus the next element length
+ * wraps, then it will go negative and be less than the
+ * current total...
+ */
+ if ((tot + src->length) < tot) {
+ return -EINVAL;
+ }
+ /*
+ * Bug: 1456 (as well as 1498 & 1643)
+ * Skip over any sge's supplied with len=0
+ */
+ if (src->length) {
+ tot += src->length;
+ dst->stag = cpu_to_be32(src->lkey);
+ dst->to = cpu_to_be64(src->addr);
+ dst->length = cpu_to_be32(src->length);
+ dst++;
+ acount++;
+ }
+ src++;
+ count--;
+ }
+
+ if (acount == 0) {
+ /*
+ * Bug: 1476 (as well as 1498, 1456 and 1643)
+ * Setup the SGL in the WR to make it easier for the RNIC.
+ * This way, the FW doesn't have to deal with special cases.
+ * Setting length=0 should be sufficient.
+ */
+ dst->stag = 0;
+ dst->to = 0;
+ dst->length = 0;
+ }
+
+ *p_len = tot;
+ *actual_count = acount;
+ return 0;
+}
+
+/*
+ * Function: c2_activity (private function)
+ *
+ * Description:
+ * Post an mq index to the host->adapter activity fifo.
+ *
+ * IN:
+ * c2dev - ptr to c2dev structure
+ * mq_index - mq index to post
+ * shared - value most recently written to shared
+ *
+ * OUT:
+ *
+ * Return:
+ * none
+ */
+static inline void c2_activity(struct c2_dev *c2dev, u32 mq_index, u16 shared)
+{
+ /*
+ * First read the register to see if the FIFO is full, and if so,
+ * spin until it's not. This isn't perfect -- there is no
+ * synchronization among the clients of the register, but in
+ * practice it prevents multiple CPU from hammering the bus
+ * with PCI RETRY. Note that when this does happen, the card
+ * cannot get on the bus and the card and system hang in a
+ * deadlock -- thus the need for this code. [TOT]
+ */
+ while (readl(c2dev->regs + PCI_BAR0_ADAPTER_HINT) & 0x80000000)
+ udelay(10);
+
+ __raw_writel(C2_HINT_MAKE(mq_index, shared),
+ c2dev->regs + PCI_BAR0_ADAPTER_HINT);
+}
+
+/*
+ * Function: qp_wr_post
+ *
+ * Description:
+ * This in-line function allocates a MQ msg, then moves the host-copy of
+ * the completed WR into msg. Then it posts the message.
+ *
+ * IN:
+ * q - ptr to user MQ.
+ * wr - ptr to host-copy of the WR.
+ * qp - ptr to user qp
+ * size - Number of bytes to post. Assumed to be divisible by 4.
+ *
+ * OUT: none
+ *
+ * Return:
+ * CCIL status codes.
+ */
+static int qp_wr_post(struct c2_mq *q, union c2wr * wr, struct c2_qp *qp, u32 size)
+{
+ union c2wr *msg;
+
+ msg = c2_mq_alloc(q);
+ if (msg == NULL) {
+ return -EINVAL;
+ }
+#ifdef CCMSGMAGIC
+ ((c2wr_hdr_t *) wr)->magic = cpu_to_be32(CCWR_MAGIC);
+#endif
+
+ /*
+ * Since all header fields in the WR are the same as the
+ * CQE, set the following so the adapter need not.
+ */
+ c2_wr_set_result(wr, CCERR_PENDING);
+
+ /*
+ * Copy the wr down to the adapter
+ */
+ memcpy((void *) msg, (void *) wr, size);
+
+ c2_mq_produce(q);
+ return 0;
+}
+
+
+int c2_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
+ struct ib_send_wr **bad_wr)
+{
+ struct c2_dev *c2dev = to_c2dev(ibqp->device);
+ struct c2_qp *qp = to_c2qp(ibqp);
+ union c2wr wr;
+ unsigned long lock_flags;
+ int err = 0;
+
+ u32 flags;
+ u32 tot_len;
+ u8 actual_sge_count;
+ u32 msg_size;
+
+ if (qp->state > IB_QPS_RTS)
+ return -EINVAL;
+
+ while (ib_wr) {
+
+ flags = 0;
+ wr.sqwr.sq_hdr.user_hdr.hdr.context = ib_wr->wr_id;
+ if (ib_wr->send_flags & IB_SEND_SIGNALED) {
+ flags |= SQ_SIGNALED;
+ }
+
+ switch (ib_wr->opcode) {
+ case IB_WR_SEND:
+ case IB_WR_SEND_WITH_INV:
+ if (ib_wr->opcode == IB_WR_SEND) {
+ if (ib_wr->send_flags & IB_SEND_SOLICITED)
+ c2_wr_set_id(&wr, C2_WR_TYPE_SEND_SE);
+ else
+ c2_wr_set_id(&wr, C2_WR_TYPE_SEND);
+ wr.sqwr.send.remote_stag = 0;
+ } else {
+ if (ib_wr->send_flags & IB_SEND_SOLICITED)
+ c2_wr_set_id(&wr, C2_WR_TYPE_SEND_SE_INV);
+ else
+ c2_wr_set_id(&wr, C2_WR_TYPE_SEND_INV);
+ wr.sqwr.send.remote_stag =
+ cpu_to_be32(ib_wr->ex.invalidate_rkey);
+ }
+
+ msg_size = sizeof(struct c2wr_send_req) +
+ sizeof(struct c2_data_addr) * ib_wr->num_sge;
+ if (ib_wr->num_sge > qp->send_sgl_depth) {
+ err = -EINVAL;
+ break;
+ }
+ if (ib_wr->send_flags & IB_SEND_FENCE) {
+ flags |= SQ_READ_FENCE;
+ }
+ err = move_sgl((struct c2_data_addr *) & (wr.sqwr.send.data),
+ ib_wr->sg_list,
+ ib_wr->num_sge,
+ &tot_len, &actual_sge_count);
+ wr.sqwr.send.sge_len = cpu_to_be32(tot_len);
+ c2_wr_set_sge_count(&wr, actual_sge_count);
+ break;
+ case IB_WR_RDMA_WRITE:
+ c2_wr_set_id(&wr, C2_WR_TYPE_RDMA_WRITE);
+ msg_size = sizeof(struct c2wr_rdma_write_req) +
+ (sizeof(struct c2_data_addr) * ib_wr->num_sge);
+ if (ib_wr->num_sge > qp->rdma_write_sgl_depth) {
+ err = -EINVAL;
+ break;
+ }
+ if (ib_wr->send_flags & IB_SEND_FENCE) {
+ flags |= SQ_READ_FENCE;
+ }
+ wr.sqwr.rdma_write.remote_stag =
+ cpu_to_be32(ib_wr->wr.rdma.rkey);
+ wr.sqwr.rdma_write.remote_to =
+ cpu_to_be64(ib_wr->wr.rdma.remote_addr);
+ err = move_sgl((struct c2_data_addr *)
+ & (wr.sqwr.rdma_write.data),
+ ib_wr->sg_list,
+ ib_wr->num_sge,
+ &tot_len, &actual_sge_count);
+ wr.sqwr.rdma_write.sge_len = cpu_to_be32(tot_len);
+ c2_wr_set_sge_count(&wr, actual_sge_count);
+ break;
+ case IB_WR_RDMA_READ:
+ c2_wr_set_id(&wr, C2_WR_TYPE_RDMA_READ);
+ msg_size = sizeof(struct c2wr_rdma_read_req);
+
+ /* IWarp only suppots 1 sge for RDMA reads */
+ if (ib_wr->num_sge > 1) {
+ err = -EINVAL;
+ break;
+ }
+
+ /*
+ * Move the local and remote stag/to/len into the WR.
+ */
+ wr.sqwr.rdma_read.local_stag =
+ cpu_to_be32(ib_wr->sg_list->lkey);
+ wr.sqwr.rdma_read.local_to =
+ cpu_to_be64(ib_wr->sg_list->addr);
+ wr.sqwr.rdma_read.remote_stag =
+ cpu_to_be32(ib_wr->wr.rdma.rkey);
+ wr.sqwr.rdma_read.remote_to =
+ cpu_to_be64(ib_wr->wr.rdma.remote_addr);
+ wr.sqwr.rdma_read.length =
+ cpu_to_be32(ib_wr->sg_list->length);
+ break;
+ default:
+ /* error */
+ msg_size = 0;
+ err = -EINVAL;
+ break;
+ }
+
+ /*
+ * If we had an error on the last wr build, then
+ * break out. Possible errors include bogus WR
+ * type, and a bogus SGL length...
+ */
+ if (err) {
+ break;
+ }
+
+ /*
+ * Store flags
+ */
+ c2_wr_set_flags(&wr, flags);
+
+ /*
+ * Post the puppy!
+ */
+ spin_lock_irqsave(&qp->lock, lock_flags);
+ err = qp_wr_post(&qp->sq_mq, &wr, qp, msg_size);
+ if (err) {
+ spin_unlock_irqrestore(&qp->lock, lock_flags);
+ break;
+ }
+
+ /*
+ * Enqueue mq index to activity FIFO.
+ */
+ c2_activity(c2dev, qp->sq_mq.index, qp->sq_mq.hint_count);
+ spin_unlock_irqrestore(&qp->lock, lock_flags);
+
+ ib_wr = ib_wr->next;
+ }
+
+ if (err)
+ *bad_wr = ib_wr;
+ return err;
+}
+
+int c2_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr,
+ struct ib_recv_wr **bad_wr)
+{
+ struct c2_dev *c2dev = to_c2dev(ibqp->device);
+ struct c2_qp *qp = to_c2qp(ibqp);
+ union c2wr wr;
+ unsigned long lock_flags;
+ int err = 0;
+
+ if (qp->state > IB_QPS_RTS)
+ return -EINVAL;
+
+ /*
+ * Try and post each work request
+ */
+ while (ib_wr) {
+ u32 tot_len;
+ u8 actual_sge_count;
+
+ if (ib_wr->num_sge > qp->recv_sgl_depth) {
+ err = -EINVAL;
+ break;
+ }
+
+ /*
+ * Create local host-copy of the WR
+ */
+ wr.rqwr.rq_hdr.user_hdr.hdr.context = ib_wr->wr_id;
+ c2_wr_set_id(&wr, CCWR_RECV);
+ c2_wr_set_flags(&wr, 0);
+
+ /* sge_count is limited to eight bits. */
+ BUG_ON(ib_wr->num_sge >= 256);
+ err = move_sgl((struct c2_data_addr *) & (wr.rqwr.data),
+ ib_wr->sg_list,
+ ib_wr->num_sge, &tot_len, &actual_sge_count);
+ c2_wr_set_sge_count(&wr, actual_sge_count);
+
+ /*
+ * If we had an error on the last wr build, then
+ * break out. Possible errors include bogus WR
+ * type, and a bogus SGL length...
+ */
+ if (err) {
+ break;
+ }
+
+ spin_lock_irqsave(&qp->lock, lock_flags);
+ err = qp_wr_post(&qp->rq_mq, &wr, qp, qp->rq_mq.msg_size);
+ if (err) {
+ spin_unlock_irqrestore(&qp->lock, lock_flags);
+ break;
+ }
+
+ /*
+ * Enqueue mq index to activity FIFO
+ */
+ c2_activity(c2dev, qp->rq_mq.index, qp->rq_mq.hint_count);
+ spin_unlock_irqrestore(&qp->lock, lock_flags);
+
+ ib_wr = ib_wr->next;
+ }
+
+ if (err)
+ *bad_wr = ib_wr;
+ return err;
+}
+
+void __devinit c2_init_qp_table(struct c2_dev *c2dev)
+{
+ spin_lock_init(&c2dev->qp_table.lock);
+ idr_init(&c2dev->qp_table.idr);
+}
+
+void __devexit c2_cleanup_qp_table(struct c2_dev *c2dev)
+{
+ idr_destroy(&c2dev->qp_table.idr);
+}
diff --git a/drivers/infiniband/hw/amso1100/c2_rnic.c b/drivers/infiniband/hw/amso1100/c2_rnic.c
new file mode 100644
index 0000000..dd05c48
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_rnic.c
@@ -0,0 +1,654 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/delay.h>
+#include <linux/ethtool.h>
+#include <linux/mii.h>
+#include <linux/if_vlan.h>
+#include <linux/crc32.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/init.h>
+#include <linux/dma-mapping.h>
+#include <linux/mm.h>
+#include <linux/inet.h>
+#include <linux/vmalloc.h>
+
+#include <linux/route.h>
+
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/byteorder.h>
+#include <rdma/ib_smi.h>
+#include "c2.h"
+#include "c2_vq.h"
+
+/* Device capabilities */
+#define C2_MIN_PAGESIZE 1024
+
+#define C2_MAX_MRS 32768
+#define C2_MAX_QPS 16000
+#define C2_MAX_WQE_SZ 256
+#define C2_MAX_QP_WR ((128*1024)/C2_MAX_WQE_SZ)
+#define C2_MAX_SGES 4
+#define C2_MAX_SGE_RD 1
+#define C2_MAX_CQS 32768
+#define C2_MAX_CQES 4096
+#define C2_MAX_PDS 16384
+
+/*
+ * Send the adapter INIT message to the amso1100
+ */
+static int c2_adapter_init(struct c2_dev *c2dev)
+{
+ struct c2wr_init_req wr;
+ int err;
+
+ memset(&wr, 0, sizeof(wr));
+ c2_wr_set_id(&wr, CCWR_INIT);
+ wr.hdr.context = 0;
+ wr.hint_count = cpu_to_be64(c2dev->hint_count_dma);
+ wr.q0_host_shared = cpu_to_be64(c2dev->req_vq.shared_dma);
+ wr.q1_host_shared = cpu_to_be64(c2dev->rep_vq.shared_dma);
+ wr.q1_host_msg_pool = cpu_to_be64(c2dev->rep_vq.host_dma);
+ wr.q2_host_shared = cpu_to_be64(c2dev->aeq.shared_dma);
+ wr.q2_host_msg_pool = cpu_to_be64(c2dev->aeq.host_dma);
+
+ /* Post the init message */
+ err = vq_send_wr(c2dev, (union c2wr *) & wr);
+
+ return err;
+}
+
+/*
+ * Send the adapter TERM message to the amso1100
+ */
+static void c2_adapter_term(struct c2_dev *c2dev)
+{
+ struct c2wr_init_req wr;
+
+ memset(&wr, 0, sizeof(wr));
+ c2_wr_set_id(&wr, CCWR_TERM);
+ wr.hdr.context = 0;
+
+ /* Post the init message */
+ vq_send_wr(c2dev, (union c2wr *) & wr);
+ c2dev->init = 0;
+
+ return;
+}
+
+/*
+ * Query the adapter
+ */
+static int c2_rnic_query(struct c2_dev *c2dev, struct ib_device_attr *props)
+{
+ struct c2_vq_req *vq_req;
+ struct c2wr_rnic_query_req wr;
+ struct c2wr_rnic_query_rep *reply;
+ int err;
+
+ vq_req = vq_req_alloc(c2dev);
+ if (!vq_req)
+ return -ENOMEM;
+
+ c2_wr_set_id(&wr, CCWR_RNIC_QUERY);
+ wr.hdr.context = (unsigned long) vq_req;
+ wr.rnic_handle = c2dev->adapter_handle;
+
+ vq_req_get(c2dev, vq_req);
+
+ err = vq_send_wr(c2dev, (union c2wr *) &wr);
+ if (err) {
+ vq_req_put(c2dev, vq_req);
+ goto bail1;
+ }
+
+ err = vq_wait_for_reply(c2dev, vq_req);
+ if (err)
+ goto bail1;
+
+ reply =
+ (struct c2wr_rnic_query_rep *) (unsigned long) (vq_req->reply_msg);
+ if (!reply)
+ err = -ENOMEM;
+ else
+ err = c2_errno(reply);
+ if (err)
+ goto bail2;
+
+ props->fw_ver =
+ ((u64)be32_to_cpu(reply->fw_ver_major) << 32) |
+ ((be32_to_cpu(reply->fw_ver_minor) & 0xFFFF) << 16) |
+ (be32_to_cpu(reply->fw_ver_patch) & 0xFFFF);
+ memcpy(&props->sys_image_guid, c2dev->netdev->dev_addr, 6);
+ props->max_mr_size = 0xFFFFFFFF;
+ props->page_size_cap = ~(C2_MIN_PAGESIZE-1);
+ props->vendor_id = be32_to_cpu(reply->vendor_id);
+ props->vendor_part_id = be32_to_cpu(reply->part_number);
+ props->hw_ver = be32_to_cpu(reply->hw_version);
+ props->max_qp = be32_to_cpu(reply->max_qps);
+ props->max_qp_wr = be32_to_cpu(reply->max_qp_depth);
+ props->device_cap_flags = c2dev->device_cap_flags;
+ props->max_sge = C2_MAX_SGES;
+ props->max_sge_rd = C2_MAX_SGE_RD;
+ props->max_cq = be32_to_cpu(reply->max_cqs);
+ props->max_cqe = be32_to_cpu(reply->max_cq_depth);
+ props->max_mr = be32_to_cpu(reply->max_mrs);
+ props->max_pd = be32_to_cpu(reply->max_pds);
+ props->max_qp_rd_atom = be32_to_cpu(reply->max_qp_ird);
+ props->max_ee_rd_atom = 0;
+ props->max_res_rd_atom = be32_to_cpu(reply->max_global_ird);
+ props->max_qp_init_rd_atom = be32_to_cpu(reply->max_qp_ord);
+ props->max_ee_init_rd_atom = 0;
+ props->atomic_cap = IB_ATOMIC_NONE;
+ props->max_ee = 0;
+ props->max_rdd = 0;
+ props->max_mw = be32_to_cpu(reply->max_mws);
+ props->max_raw_ipv6_qp = 0;
+ props->max_raw_ethy_qp = 0;
+ props->max_mcast_grp = 0;
+ props->max_mcast_qp_attach = 0;
+ props->max_total_mcast_qp_attach = 0;
+ props->max_ah = 0;
+ props->max_fmr = 0;
+ props->max_map_per_fmr = 0;
+ props->max_srq = 0;
+ props->max_srq_wr = 0;
+ props->max_srq_sge = 0;
+ props->max_pkeys = 0;
+ props->local_ca_ack_delay = 0;
+
+ bail2:
+ vq_repbuf_free(c2dev, reply);
+
+ bail1:
+ vq_req_free(c2dev, vq_req);
+ return err;
+}
+
+/*
+ * Add an IP address to the RNIC interface
+ */
+int c2_add_addr(struct c2_dev *c2dev, __be32 inaddr, __be32 inmask)
+{
+ struct c2_vq_req *vq_req;
+ struct c2wr_rnic_setconfig_req *wr;
+ struct c2wr_rnic_setconfig_rep *reply;
+ struct c2_netaddr netaddr;
+ int err, len;
+
+ vq_req = vq_req_alloc(c2dev);
+ if (!vq_req)
+ return -ENOMEM;
+
+ len = sizeof(struct c2_netaddr);
+ wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
+ if (!wr) {
+ err = -ENOMEM;
+ goto bail0;
+ }
+
+ c2_wr_set_id(wr, CCWR_RNIC_SETCONFIG);
+ wr->hdr.context = (unsigned long) vq_req;
+ wr->rnic_handle = c2dev->adapter_handle;
+ wr->option = cpu_to_be32(C2_CFG_ADD_ADDR);
+
+ netaddr.ip_addr = inaddr;
+ netaddr.netmask = inmask;
+ netaddr.mtu = 0;
+
+ memcpy(wr->data, &netaddr, len);
+
+ vq_req_get(c2dev, vq_req);
+
+ err = vq_send_wr(c2dev, (union c2wr *) wr);
+ if (err) {
+ vq_req_put(c2dev, vq_req);
+ goto bail1;
+ }
+
+ err = vq_wait_for_reply(c2dev, vq_req);
+ if (err)
+ goto bail1;
+
+ reply =
+ (struct c2wr_rnic_setconfig_rep *) (unsigned long) (vq_req->reply_msg);
+ if (!reply) {
+ err = -ENOMEM;
+ goto bail1;
+ }
+
+ err = c2_errno(reply);
+ vq_repbuf_free(c2dev, reply);
+
+ bail1:
+ kfree(wr);
+ bail0:
+ vq_req_free(c2dev, vq_req);
+ return err;
+}
+
+/*
+ * Delete an IP address from the RNIC interface
+ */
+int c2_del_addr(struct c2_dev *c2dev, __be32 inaddr, __be32 inmask)
+{
+ struct c2_vq_req *vq_req;
+ struct c2wr_rnic_setconfig_req *wr;
+ struct c2wr_rnic_setconfig_rep *reply;
+ struct c2_netaddr netaddr;
+ int err, len;
+
+ vq_req = vq_req_alloc(c2dev);
+ if (!vq_req)
+ return -ENOMEM;
+
+ len = sizeof(struct c2_netaddr);
+ wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
+ if (!wr) {
+ err = -ENOMEM;
+ goto bail0;
+ }
+
+ c2_wr_set_id(wr, CCWR_RNIC_SETCONFIG);
+ wr->hdr.context = (unsigned long) vq_req;
+ wr->rnic_handle = c2dev->adapter_handle;
+ wr->option = cpu_to_be32(C2_CFG_DEL_ADDR);
+
+ netaddr.ip_addr = inaddr;
+ netaddr.netmask = inmask;
+ netaddr.mtu = 0;
+
+ memcpy(wr->data, &netaddr, len);
+
+ vq_req_get(c2dev, vq_req);
+
+ err = vq_send_wr(c2dev, (union c2wr *) wr);
+ if (err) {
+ vq_req_put(c2dev, vq_req);
+ goto bail1;
+ }
+
+ err = vq_wait_for_reply(c2dev, vq_req);
+ if (err)
+ goto bail1;
+
+ reply =
+ (struct c2wr_rnic_setconfig_rep *) (unsigned long) (vq_req->reply_msg);
+ if (!reply) {
+ err = -ENOMEM;
+ goto bail1;
+ }
+
+ err = c2_errno(reply);
+ vq_repbuf_free(c2dev, reply);
+
+ bail1:
+ kfree(wr);
+ bail0:
+ vq_req_free(c2dev, vq_req);
+ return err;
+}
+
+/*
+ * Open a single RNIC instance to use with all
+ * low level openib calls
+ */
+static int c2_rnic_open(struct c2_dev *c2dev)
+{
+ struct c2_vq_req *vq_req;
+ union c2wr wr;
+ struct c2wr_rnic_open_rep *reply;
+ int err;
+
+ vq_req = vq_req_alloc(c2dev);
+ if (vq_req == NULL) {
+ return -ENOMEM;
+ }
+
+ memset(&wr, 0, sizeof(wr));
+ c2_wr_set_id(&wr, CCWR_RNIC_OPEN);
+ wr.rnic_open.req.hdr.context = (unsigned long) (vq_req);
+ wr.rnic_open.req.flags = cpu_to_be16(RNIC_PRIV_MODE);
+ wr.rnic_open.req.port_num = cpu_to_be16(0);
+ wr.rnic_open.req.user_context = (unsigned long) c2dev;
+
+ vq_req_get(c2dev, vq_req);
+
+ err = vq_send_wr(c2dev, &wr);
+ if (err) {
+ vq_req_put(c2dev, vq_req);
+ goto bail0;
+ }
+
+ err = vq_wait_for_reply(c2dev, vq_req);
+ if (err) {
+ goto bail0;
+ }
+
+ reply = (struct c2wr_rnic_open_rep *) (unsigned long) (vq_req->reply_msg);
+ if (!reply) {
+ err = -ENOMEM;
+ goto bail0;
+ }
+
+ if ((err = c2_errno(reply)) != 0) {
+ goto bail1;
+ }
+
+ c2dev->adapter_handle = reply->rnic_handle;
+
+ bail1:
+ vq_repbuf_free(c2dev, reply);
+ bail0:
+ vq_req_free(c2dev, vq_req);
+ return err;
+}
+
+/*
+ * Close the RNIC instance
+ */
+static int c2_rnic_close(struct c2_dev *c2dev)
+{
+ struct c2_vq_req *vq_req;
+ union c2wr wr;
+ struct c2wr_rnic_close_rep *reply;
+ int err;
+
+ vq_req = vq_req_alloc(c2dev);
+ if (vq_req == NULL) {
+ return -ENOMEM;
+ }
+
+ memset(&wr, 0, sizeof(wr));
+ c2_wr_set_id(&wr, CCWR_RNIC_CLOSE);
+ wr.rnic_close.req.hdr.context = (unsigned long) vq_req;
+ wr.rnic_close.req.rnic_handle = c2dev->adapter_handle;
+
+ vq_req_get(c2dev, vq_req);
+
+ err = vq_send_wr(c2dev, &wr);
+ if (err) {
+ vq_req_put(c2dev, vq_req);
+ goto bail0;
+ }
+
+ err = vq_wait_for_reply(c2dev, vq_req);
+ if (err) {
+ goto bail0;
+ }
+
+ reply = (struct c2wr_rnic_close_rep *) (unsigned long) (vq_req->reply_msg);
+ if (!reply) {
+ err = -ENOMEM;
+ goto bail0;
+ }
+
+ if ((err = c2_errno(reply)) != 0) {
+ goto bail1;
+ }
+
+ c2dev->adapter_handle = 0;
+
+ bail1:
+ vq_repbuf_free(c2dev, reply);
+ bail0:
+ vq_req_free(c2dev, vq_req);
+ return err;
+}
+
+/*
+ * Called by c2_probe to initialize the RNIC. This principally
+ * involves initalizing the various limits and resouce pools that
+ * comprise the RNIC instance.
+ */
+int __devinit c2_rnic_init(struct c2_dev *c2dev)
+{
+ int err;
+ u32 qsize, msgsize;
+ void *q1_pages;
+ void *q2_pages;
+ void __iomem *mmio_regs;
+
+ /* Device capabilities */
+ c2dev->device_cap_flags =
+ (IB_DEVICE_RESIZE_MAX_WR |
+ IB_DEVICE_CURR_QP_STATE_MOD |
+ IB_DEVICE_SYS_IMAGE_GUID |
+ IB_DEVICE_LOCAL_DMA_LKEY |
+ IB_DEVICE_MEM_WINDOW);
+
+ /* Allocate the qptr_array */
+ c2dev->qptr_array = vmalloc(C2_MAX_CQS * sizeof(void *));
+ if (!c2dev->qptr_array) {
+ return -ENOMEM;
+ }
+
+ /* Inialize the qptr_array */
+ memset(c2dev->qptr_array, 0, C2_MAX_CQS * sizeof(void *));
+ c2dev->qptr_array[0] = (void *) &c2dev->req_vq;
+ c2dev->qptr_array[1] = (void *) &c2dev->rep_vq;
+ c2dev->qptr_array[2] = (void *) &c2dev->aeq;
+
+ /* Initialize data structures */
+ init_waitqueue_head(&c2dev->req_vq_wo);
+ spin_lock_init(&c2dev->vqlock);
+ spin_lock_init(&c2dev->lock);
+
+ /* Allocate MQ shared pointer pool for kernel clients. User
+ * mode client pools are hung off the user context
+ */
+ err = c2_init_mqsp_pool(c2dev, GFP_KERNEL, &c2dev->kern_mqsp_pool);
+ if (err) {
+ goto bail0;
+ }
+
+ /* Allocate shared pointers for Q0, Q1, and Q2 from
+ * the shared pointer pool.
+ */
+
+ c2dev->hint_count = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
+ &c2dev->hint_count_dma,
+ GFP_KERNEL);
+ c2dev->req_vq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
+ &c2dev->req_vq.shared_dma,
+ GFP_KERNEL);
+ c2dev->rep_vq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
+ &c2dev->rep_vq.shared_dma,
+ GFP_KERNEL);
+ c2dev->aeq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
+ &c2dev->aeq.shared_dma, GFP_KERNEL);
+ if (!c2dev->hint_count || !c2dev->req_vq.shared ||
+ !c2dev->rep_vq.shared || !c2dev->aeq.shared) {
+ err = -ENOMEM;
+ goto bail1;
+ }
+
+ mmio_regs = c2dev->kva;
+ /* Initialize the Verbs Request Queue */
+ c2_mq_req_init(&c2dev->req_vq, 0,
+ be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q0_QSIZE)),
+ be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q0_MSGSIZE)),
+ mmio_regs +
+ be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q0_POOLSTART)),
+ mmio_regs +
+ be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q0_SHARED)),
+ C2_MQ_ADAPTER_TARGET);
+
+ /* Initialize the Verbs Reply Queue */
+ qsize = be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q1_QSIZE));
+ msgsize = be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q1_MSGSIZE));
+ q1_pages = dma_alloc_coherent(&c2dev->pcidev->dev, qsize * msgsize,
+ &c2dev->rep_vq.host_dma, GFP_KERNEL);
+ if (!q1_pages) {
+ err = -ENOMEM;
+ goto bail1;
+ }
+ pci_unmap_addr_set(&c2dev->rep_vq, mapping, c2dev->rep_vq.host_dma);
+ pr_debug("%s rep_vq va %p dma %llx\n", __func__, q1_pages,
+ (unsigned long long) c2dev->rep_vq.host_dma);
+ c2_mq_rep_init(&c2dev->rep_vq,
+ 1,
+ qsize,
+ msgsize,
+ q1_pages,
+ mmio_regs +
+ be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q1_SHARED)),
+ C2_MQ_HOST_TARGET);
+
+ /* Initialize the Asynchronus Event Queue */
+ qsize = be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q2_QSIZE));
+ msgsize = be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q2_MSGSIZE));
+ q2_pages = dma_alloc_coherent(&c2dev->pcidev->dev, qsize * msgsize,
+ &c2dev->aeq.host_dma, GFP_KERNEL);
+ if (!q2_pages) {
+ err = -ENOMEM;
+ goto bail2;
+ }
+ pci_unmap_addr_set(&c2dev->aeq, mapping, c2dev->aeq.host_dma);
+ pr_debug("%s aeq va %p dma %llx\n", __func__, q2_pages,
+ (unsigned long long) c2dev->aeq.host_dma);
+ c2_mq_rep_init(&c2dev->aeq,
+ 2,
+ qsize,
+ msgsize,
+ q2_pages,
+ mmio_regs +
+ be32_to_cpu((__force __be32) readl(mmio_regs + C2_REGS_Q2_SHARED)),
+ C2_MQ_HOST_TARGET);
+
+ /* Initialize the verbs request allocator */
+ err = vq_init(c2dev);
+ if (err)
+ goto bail3;
+
+ /* Enable interrupts on the adapter */
+ writel(0, c2dev->regs + C2_IDIS);
+
+ /* create the WR init message */
+ err = c2_adapter_init(c2dev);
+ if (err)
+ goto bail4;
+ c2dev->init++;
+
+ /* open an adapter instance */
+ err = c2_rnic_open(c2dev);
+ if (err)
+ goto bail4;
+
+ /* Initialize cached the adapter limits */
+ if (c2_rnic_query(c2dev, &c2dev->props))
+ goto bail5;
+
+ /* Initialize the PD pool */
+ err = c2_init_pd_table(c2dev);
+ if (err)
+ goto bail5;
+
+ /* Initialize the QP pool */
+ c2_init_qp_table(c2dev);
+ return 0;
+
+ bail5:
+ c2_rnic_close(c2dev);
+ bail4:
+ vq_term(c2dev);
+ bail3:
+ dma_free_coherent(&c2dev->pcidev->dev,
+ c2dev->aeq.q_size * c2dev->aeq.msg_size,
+ q2_pages, pci_unmap_addr(&c2dev->aeq, mapping));
+ bail2:
+ dma_free_coherent(&c2dev->pcidev->dev,
+ c2dev->rep_vq.q_size * c2dev->rep_vq.msg_size,
+ q1_pages, pci_unmap_addr(&c2dev->rep_vq, mapping));
+ bail1:
+ c2_free_mqsp_pool(c2dev, c2dev->kern_mqsp_pool);
+ bail0:
+ vfree(c2dev->qptr_array);
+
+ return err;
+}
+
+/*
+ * Called by c2_remove to cleanup the RNIC resources.
+ */
+void __devexit c2_rnic_term(struct c2_dev *c2dev)
+{
+
+ /* Close the open adapter instance */
+ c2_rnic_close(c2dev);
+
+ /* Send the TERM message to the adapter */
+ c2_adapter_term(c2dev);
+
+ /* Disable interrupts on the adapter */
+ writel(1, c2dev->regs + C2_IDIS);
+
+ /* Free the QP pool */
+ c2_cleanup_qp_table(c2dev);
+
+ /* Free the PD pool */
+ c2_cleanup_pd_table(c2dev);
+
+ /* Free the verbs request allocator */
+ vq_term(c2dev);
+
+ /* Free the asynchronus event queue */
+ dma_free_coherent(&c2dev->pcidev->dev,
+ c2dev->aeq.q_size * c2dev->aeq.msg_size,
+ c2dev->aeq.msg_pool.host,
+ pci_unmap_addr(&c2dev->aeq, mapping));
+
+ /* Free the verbs reply queue */
+ dma_free_coherent(&c2dev->pcidev->dev,
+ c2dev->rep_vq.q_size * c2dev->rep_vq.msg_size,
+ c2dev->rep_vq.msg_pool.host,
+ pci_unmap_addr(&c2dev->rep_vq, mapping));
+
+ /* Free the MQ shared pointer pool */
+ c2_free_mqsp_pool(c2dev, c2dev->kern_mqsp_pool);
+
+ /* Free the qptr_array */
+ vfree(c2dev->qptr_array);
+
+ return;
+}
diff --git a/drivers/infiniband/hw/amso1100/c2_status.h b/drivers/infiniband/hw/amso1100/c2_status.h
new file mode 100644
index 0000000..6ee4aa9
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_status.h
@@ -0,0 +1,158 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _C2_STATUS_H_
+#define _C2_STATUS_H_
+
+/*
+ * Verbs Status Codes
+ */
+enum c2_status {
+ C2_OK = 0, /* This must be zero */
+ CCERR_INSUFFICIENT_RESOURCES = 1,
+ CCERR_INVALID_MODIFIER = 2,
+ CCERR_INVALID_MODE = 3,
+ CCERR_IN_USE = 4,
+ CCERR_INVALID_RNIC = 5,
+ CCERR_INTERRUPTED_OPERATION = 6,
+ CCERR_INVALID_EH = 7,
+ CCERR_INVALID_CQ = 8,
+ CCERR_CQ_EMPTY = 9,
+ CCERR_NOT_IMPLEMENTED = 10,
+ CCERR_CQ_DEPTH_TOO_SMALL = 11,
+ CCERR_PD_IN_USE = 12,
+ CCERR_INVALID_PD = 13,
+ CCERR_INVALID_SRQ = 14,
+ CCERR_INVALID_ADDRESS = 15,
+ CCERR_INVALID_NETMASK = 16,
+ CCERR_INVALID_QP = 17,
+ CCERR_INVALID_QP_STATE = 18,
+ CCERR_TOO_MANY_WRS_POSTED = 19,
+ CCERR_INVALID_WR_TYPE = 20,
+ CCERR_INVALID_SGL_LENGTH = 21,
+ CCERR_INVALID_SQ_DEPTH = 22,
+ CCERR_INVALID_RQ_DEPTH = 23,
+ CCERR_INVALID_ORD = 24,
+ CCERR_INVALID_IRD = 25,
+ CCERR_QP_ATTR_CANNOT_CHANGE = 26,
+ CCERR_INVALID_STAG = 27,
+ CCERR_QP_IN_USE = 28,
+ CCERR_OUTSTANDING_WRS = 29,
+ CCERR_STAG_IN_USE = 30,
+ CCERR_INVALID_STAG_INDEX = 31,
+ CCERR_INVALID_SGL_FORMAT = 32,
+ CCERR_ADAPTER_TIMEOUT = 33,
+ CCERR_INVALID_CQ_DEPTH = 34,
+ CCERR_INVALID_PRIVATE_DATA_LENGTH = 35,
+ CCERR_INVALID_EP = 36,
+ CCERR_MR_IN_USE = CCERR_STAG_IN_USE,
+ CCERR_FLUSHED = 38,
+ CCERR_INVALID_WQE = 39,
+ CCERR_LOCAL_QP_CATASTROPHIC_ERROR = 40,
+ CCERR_REMOTE_TERMINATION_ERROR = 41,
+ CCERR_BASE_AND_BOUNDS_VIOLATION = 42,
+ CCERR_ACCESS_VIOLATION = 43,
+ CCERR_INVALID_PD_ID = 44,
+ CCERR_WRAP_ERROR = 45,
+ CCERR_INV_STAG_ACCESS_ERROR = 46,
+ CCERR_ZERO_RDMA_READ_RESOURCES = 47,
+ CCERR_QP_NOT_PRIVILEGED = 48,
+ CCERR_STAG_STATE_NOT_INVALID = 49,
+ CCERR_INVALID_PAGE_SIZE = 50,
+ CCERR_INVALID_BUFFER_SIZE = 51,
+ CCERR_INVALID_PBE = 52,
+ CCERR_INVALID_FBO = 53,
+ CCERR_INVALID_LENGTH = 54,
+ CCERR_INVALID_ACCESS_RIGHTS = 55,
+ CCERR_PBL_TOO_BIG = 56,
+ CCERR_INVALID_VA = 57,
+ CCERR_INVALID_REGION = 58,
+ CCERR_INVALID_WINDOW = 59,
+ CCERR_TOTAL_LENGTH_TOO_BIG = 60,
+ CCERR_INVALID_QP_ID = 61,
+ CCERR_ADDR_IN_USE = 62,
+ CCERR_ADDR_NOT_AVAIL = 63,
+ CCERR_NET_DOWN = 64,
+ CCERR_NET_UNREACHABLE = 65,
+ CCERR_CONN_ABORTED = 66,
+ CCERR_CONN_RESET = 67,
+ CCERR_NO_BUFS = 68,
+ CCERR_CONN_TIMEDOUT = 69,
+ CCERR_CONN_REFUSED = 70,
+ CCERR_HOST_UNREACHABLE = 71,
+ CCERR_INVALID_SEND_SGL_DEPTH = 72,
+ CCERR_INVALID_RECV_SGL_DEPTH = 73,
+ CCERR_INVALID_RDMA_WRITE_SGL_DEPTH = 74,
+ CCERR_INSUFFICIENT_PRIVILEGES = 75,
+ CCERR_STACK_ERROR = 76,
+ CCERR_INVALID_VERSION = 77,
+ CCERR_INVALID_MTU = 78,
+ CCERR_INVALID_IMAGE = 79,
+ CCERR_PENDING = 98, /* not an error; user internally by adapter */
+ CCERR_DEFER = 99, /* not an error; used internally by adapter */
+ CCERR_FAILED_WRITE = 100,
+ CCERR_FAILED_ERASE = 101,
+ CCERR_FAILED_VERIFICATION = 102,
+ CCERR_NOT_FOUND = 103,
+
+};
+
+/*
+ * CCAE_ACTIVE_CONNECT_RESULTS status result codes.
+ */
+enum c2_connect_status {
+ C2_CONN_STATUS_SUCCESS = C2_OK,
+ C2_CONN_STATUS_NO_MEM = CCERR_INSUFFICIENT_RESOURCES,
+ C2_CONN_STATUS_TIMEDOUT = CCERR_CONN_TIMEDOUT,
+ C2_CONN_STATUS_REFUSED = CCERR_CONN_REFUSED,
+ C2_CONN_STATUS_NETUNREACH = CCERR_NET_UNREACHABLE,
+ C2_CONN_STATUS_HOSTUNREACH = CCERR_HOST_UNREACHABLE,
+ C2_CONN_STATUS_INVALID_RNIC = CCERR_INVALID_RNIC,
+ C2_CONN_STATUS_INVALID_QP = CCERR_INVALID_QP,
+ C2_CONN_STATUS_INVALID_QP_STATE = CCERR_INVALID_QP_STATE,
+ C2_CONN_STATUS_REJECTED = CCERR_CONN_RESET,
+ C2_CONN_STATUS_ADDR_NOT_AVAIL = CCERR_ADDR_NOT_AVAIL,
+};
+
+/*
+ * Flash programming status codes.
+ */
+enum c2_flash_status {
+ C2_FLASH_STATUS_SUCCESS = 0x0000,
+ C2_FLASH_STATUS_VERIFY_ERR = 0x0002,
+ C2_FLASH_STATUS_IMAGE_ERR = 0x0004,
+ C2_FLASH_STATUS_ECLBS = 0x0400,
+ C2_FLASH_STATUS_PSLBS = 0x0800,
+ C2_FLASH_STATUS_VPENS = 0x1000,
+};
+
+#endif /* _C2_STATUS_H_ */
diff --git a/drivers/infiniband/hw/amso1100/c2_user.h b/drivers/infiniband/hw/amso1100/c2_user.h
new file mode 100644
index 0000000..7e9e7ad
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_user.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Cisco Systems. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef C2_USER_H
+#define C2_USER_H
+
+#include <linux/types.h>
+
+/*
+ * Make sure that all structs defined in this file remain laid out so
+ * that they pack the same way on 32-bit and 64-bit architectures (to
+ * avoid incompatibility between 32-bit userspace and 64-bit kernels).
+ * In particular do not use pointer types -- pass pointers in __u64
+ * instead.
+ */
+
+struct c2_alloc_ucontext_resp {
+ __u32 qp_tab_size;
+ __u32 uarc_size;
+};
+
+struct c2_alloc_pd_resp {
+ __u32 pdn;
+ __u32 reserved;
+};
+
+struct c2_create_cq {
+ __u32 lkey;
+ __u32 pdn;
+ __u64 arm_db_page;
+ __u64 set_db_page;
+ __u32 arm_db_index;
+ __u32 set_db_index;
+};
+
+struct c2_create_cq_resp {
+ __u32 cqn;
+ __u32 reserved;
+};
+
+struct c2_create_qp {
+ __u32 lkey;
+ __u32 reserved;
+ __u64 sq_db_page;
+ __u64 rq_db_page;
+ __u32 sq_db_index;
+ __u32 rq_db_index;
+};
+
+#endif /* C2_USER_H */
diff --git a/drivers/infiniband/hw/amso1100/c2_vq.c b/drivers/infiniband/hw/amso1100/c2_vq.c
new file mode 100644
index 0000000..9ce7819
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_vq.c
@@ -0,0 +1,260 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+#include "c2_vq.h"
+#include "c2_provider.h"
+
+/*
+ * Verbs Request Objects:
+ *
+ * VQ Request Objects are allocated by the kernel verbs handlers.
+ * They contain a wait object, a refcnt, an atomic bool indicating that the
+ * adapter has replied, and a copy of the verb reply work request.
+ * A pointer to the VQ Request Object is passed down in the context
+ * field of the work request message, and reflected back by the adapter
+ * in the verbs reply message. The function handle_vq() in the interrupt
+ * path will use this pointer to:
+ * 1) append a copy of the verbs reply message
+ * 2) mark that the reply is ready
+ * 3) wake up the kernel verbs handler blocked awaiting the reply.
+ *
+ *
+ * The kernel verbs handlers do a "get" to put a 2nd reference on the
+ * VQ Request object. If the kernel verbs handler exits before the adapter
+ * can respond, this extra reference will keep the VQ Request object around
+ * until the adapter's reply can be processed. The reason we need this is
+ * because a pointer to this object is stuffed into the context field of
+ * the verbs work request message, and reflected back in the reply message.
+ * It is used in the interrupt handler (handle_vq()) to wake up the appropriate
+ * kernel verb handler that is blocked awaiting the verb reply.
+ * So handle_vq() will do a "put" on the object when it's done accessing it.
+ * NOTE: If we guarantee that the kernel verb handler will never bail before
+ * getting the reply, then we don't need these refcnts.
+ *
+ *
+ * VQ Request objects are freed by the kernel verbs handlers only
+ * after the verb has been processed, or when the adapter fails and
+ * does not reply.
+ *
+ *
+ * Verbs Reply Buffers:
+ *
+ * VQ Reply bufs are local host memory copies of a
+ * outstanding Verb Request reply
+ * message. The are always allocated by the kernel verbs handlers, and _may_ be
+ * freed by either the kernel verbs handler -or- the interrupt handler. The
+ * kernel verbs handler _must_ free the repbuf, then free the vq request object
+ * in that order.
+ */
+
+int vq_init(struct c2_dev *c2dev)
+{
+ sprintf(c2dev->vq_cache_name, "c2-vq:dev%c",
+ (char) ('0' + c2dev->devnum));
+ c2dev->host_msg_cache =
+ kmem_cache_create(c2dev->vq_cache_name, c2dev->rep_vq.msg_size, 0,
+ SLAB_HWCACHE_ALIGN, NULL);
+ if (c2dev->host_msg_cache == NULL) {
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+void vq_term(struct c2_dev *c2dev)
+{
+ kmem_cache_destroy(c2dev->host_msg_cache);
+}
+
+/* vq_req_alloc - allocate a VQ Request Object and initialize it.
+ * The refcnt is set to 1.
+ */
+struct c2_vq_req *vq_req_alloc(struct c2_dev *c2dev)
+{
+ struct c2_vq_req *r;
+
+ r = kmalloc(sizeof(struct c2_vq_req), GFP_KERNEL);
+ if (r) {
+ init_waitqueue_head(&r->wait_object);
+ r->reply_msg = (u64) NULL;
+ r->event = 0;
+ r->cm_id = NULL;
+ r->qp = NULL;
+ atomic_set(&r->refcnt, 1);
+ atomic_set(&r->reply_ready, 0);
+ }
+ return r;
+}
+
+
+/* vq_req_free - free the VQ Request Object. It is assumed the verbs handler
+ * has already free the VQ Reply Buffer if it existed.
+ */
+void vq_req_free(struct c2_dev *c2dev, struct c2_vq_req *r)
+{
+ r->reply_msg = (u64) NULL;
+ if (atomic_dec_and_test(&r->refcnt)) {
+ kfree(r);
+ }
+}
+
+/* vq_req_get - reference a VQ Request Object. Done
+ * only in the kernel verbs handlers.
+ */
+void vq_req_get(struct c2_dev *c2dev, struct c2_vq_req *r)
+{
+ atomic_inc(&r->refcnt);
+}
+
+
+/* vq_req_put - dereference and potentially free a VQ Request Object.
+ *
+ * This is only called by handle_vq() on the
+ * interrupt when it is done processing
+ * a verb reply message. If the associated
+ * kernel verbs handler has already bailed,
+ * then this put will actually free the VQ
+ * Request object _and_ the VQ Reply Buffer
+ * if it exists.
+ */
+void vq_req_put(struct c2_dev *c2dev, struct c2_vq_req *r)
+{
+ if (atomic_dec_and_test(&r->refcnt)) {
+ if (r->reply_msg != (u64) NULL)
+ vq_repbuf_free(c2dev,
+ (void *) (unsigned long) r->reply_msg);
+ kfree(r);
+ }
+}
+
+
+/*
+ * vq_repbuf_alloc - allocate a VQ Reply Buffer.
+ */
+void *vq_repbuf_alloc(struct c2_dev *c2dev)
+{
+ return kmem_cache_alloc(c2dev->host_msg_cache, GFP_ATOMIC);
+}
+
+/*
+ * vq_send_wr - post a verbs request message to the Verbs Request Queue.
+ * If a message is not available in the MQ, then block until one is available.
+ * NOTE: handle_mq() on the interrupt context will wake up threads blocked here.
+ * When the adapter drains the Verbs Request Queue,
+ * it inserts MQ index 0 in to the
+ * adapter->host activity fifo and interrupts the host.
+ */
+int vq_send_wr(struct c2_dev *c2dev, union c2wr *wr)
+{
+ void *msg;
+ wait_queue_t __wait;
+
+ /*
+ * grab adapter vq lock
+ */
+ spin_lock(&c2dev->vqlock);
+
+ /*
+ * allocate msg
+ */
+ msg = c2_mq_alloc(&c2dev->req_vq);
+
+ /*
+ * If we cannot get a msg, then we'll wait
+ * When a messages are available, the int handler will wake_up()
+ * any waiters.
+ */
+ while (msg == NULL) {
+ pr_debug("%s:%d no available msg in VQ, waiting...\n",
+ __func__, __LINE__);
+ init_waitqueue_entry(&__wait, current);
+ add_wait_queue(&c2dev->req_vq_wo, &__wait);
+ spin_unlock(&c2dev->vqlock);
+ for (;;) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ if (!c2_mq_full(&c2dev->req_vq)) {
+ break;
+ }
+ if (!signal_pending(current)) {
+ schedule_timeout(1 * HZ); /* 1 second... */
+ continue;
+ }
+ set_current_state(TASK_RUNNING);
+ remove_wait_queue(&c2dev->req_vq_wo, &__wait);
+ return -EINTR;
+ }
+ set_current_state(TASK_RUNNING);
+ remove_wait_queue(&c2dev->req_vq_wo, &__wait);
+ spin_lock(&c2dev->vqlock);
+ msg = c2_mq_alloc(&c2dev->req_vq);
+ }
+
+ /*
+ * copy wr into adapter msg
+ */
+ memcpy(msg, wr, c2dev->req_vq.msg_size);
+
+ /*
+ * post msg
+ */
+ c2_mq_produce(&c2dev->req_vq);
+
+ /*
+ * release adapter vq lock
+ */
+ spin_unlock(&c2dev->vqlock);
+ return 0;
+}
+
+
+/*
+ * vq_wait_for_reply - block until the adapter posts a Verb Reply Message.
+ */
+int vq_wait_for_reply(struct c2_dev *c2dev, struct c2_vq_req *req)
+{
+ if (!wait_event_timeout(req->wait_object,
+ atomic_read(&req->reply_ready),
+ 60*HZ))
+ return -ETIMEDOUT;
+
+ return 0;
+}
+
+/*
+ * vq_repbuf_free - Free a Verbs Reply Buffer.
+ */
+void vq_repbuf_free(struct c2_dev *c2dev, void *reply)
+{
+ kmem_cache_free(c2dev->host_msg_cache, reply);
+}
diff --git a/drivers/infiniband/hw/amso1100/c2_vq.h b/drivers/infiniband/hw/amso1100/c2_vq.h
new file mode 100644
index 0000000..3380562
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_vq.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _C2_VQ_H_
+#define _C2_VQ_H_
+#include <linux/sched.h>
+#include "c2.h"
+#include "c2_wr.h"
+#include "c2_provider.h"
+
+struct c2_vq_req {
+ u64 reply_msg; /* ptr to reply msg */
+ wait_queue_head_t wait_object; /* wait object for vq reqs */
+ atomic_t reply_ready; /* set when reply is ready */
+ atomic_t refcnt; /* used to cancel WRs... */
+ int event;
+ struct iw_cm_id *cm_id;
+ struct c2_qp *qp;
+};
+
+extern int vq_init(struct c2_dev *c2dev);
+extern void vq_term(struct c2_dev *c2dev);
+
+extern struct c2_vq_req *vq_req_alloc(struct c2_dev *c2dev);
+extern void vq_req_free(struct c2_dev *c2dev, struct c2_vq_req *req);
+extern void vq_req_get(struct c2_dev *c2dev, struct c2_vq_req *req);
+extern void vq_req_put(struct c2_dev *c2dev, struct c2_vq_req *req);
+extern int vq_send_wr(struct c2_dev *c2dev, union c2wr * wr);
+
+extern void *vq_repbuf_alloc(struct c2_dev *c2dev);
+extern void vq_repbuf_free(struct c2_dev *c2dev, void *reply);
+
+extern int vq_wait_for_reply(struct c2_dev *c2dev, struct c2_vq_req *req);
+#endif /* _C2_VQ_H_ */
diff --git a/drivers/infiniband/hw/amso1100/c2_wr.h b/drivers/infiniband/hw/amso1100/c2_wr.h
new file mode 100644
index 0000000..c65fbdd
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_wr.h
@@ -0,0 +1,1520 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _C2_WR_H_
+#define _C2_WR_H_
+
+#ifdef CCDEBUG
+#define CCWR_MAGIC 0xb07700b0
+#endif
+
+#define C2_QP_NO_ATTR_CHANGE 0xFFFFFFFF
+
+/* Maximum allowed size in bytes of private_data exchange
+ * on connect.
+ */
+#define C2_MAX_PRIVATE_DATA_SIZE 200
+
+/*
+ * These types are shared among the adapter, host, and CCIL consumer.
+ */
+enum c2_cq_notification_type {
+ C2_CQ_NOTIFICATION_TYPE_NONE = 1,
+ C2_CQ_NOTIFICATION_TYPE_NEXT,
+ C2_CQ_NOTIFICATION_TYPE_NEXT_SE
+};
+
+enum c2_setconfig_cmd {
+ C2_CFG_ADD_ADDR = 1,
+ C2_CFG_DEL_ADDR = 2,
+ C2_CFG_ADD_ROUTE = 3,
+ C2_CFG_DEL_ROUTE = 4
+};
+
+enum c2_getconfig_cmd {
+ C2_GETCONFIG_ROUTES = 1,
+ C2_GETCONFIG_ADDRS
+};
+
+/*
+ * CCIL Work Request Identifiers
+ */
+enum c2wr_ids {
+ CCWR_RNIC_OPEN = 1,
+ CCWR_RNIC_QUERY,
+ CCWR_RNIC_SETCONFIG,
+ CCWR_RNIC_GETCONFIG,
+ CCWR_RNIC_CLOSE,
+ CCWR_CQ_CREATE,
+ CCWR_CQ_QUERY,
+ CCWR_CQ_MODIFY,
+ CCWR_CQ_DESTROY,
+ CCWR_QP_CONNECT,
+ CCWR_PD_ALLOC,
+ CCWR_PD_DEALLOC,
+ CCWR_SRQ_CREATE,
+ CCWR_SRQ_QUERY,
+ CCWR_SRQ_MODIFY,
+ CCWR_SRQ_DESTROY,
+ CCWR_QP_CREATE,
+ CCWR_QP_QUERY,
+ CCWR_QP_MODIFY,
+ CCWR_QP_DESTROY,
+ CCWR_NSMR_STAG_ALLOC,
+ CCWR_NSMR_REGISTER,
+ CCWR_NSMR_PBL,
+ CCWR_STAG_DEALLOC,
+ CCWR_NSMR_REREGISTER,
+ CCWR_SMR_REGISTER,
+ CCWR_MR_QUERY,
+ CCWR_MW_ALLOC,
+ CCWR_MW_QUERY,
+ CCWR_EP_CREATE,
+ CCWR_EP_GETOPT,
+ CCWR_EP_SETOPT,
+ CCWR_EP_DESTROY,
+ CCWR_EP_BIND,
+ CCWR_EP_CONNECT,
+ CCWR_EP_LISTEN,
+ CCWR_EP_SHUTDOWN,
+ CCWR_EP_LISTEN_CREATE,
+ CCWR_EP_LISTEN_DESTROY,
+ CCWR_EP_QUERY,
+ CCWR_CR_ACCEPT,
+ CCWR_CR_REJECT,
+ CCWR_CONSOLE,
+ CCWR_TERM,
+ CCWR_FLASH_INIT,
+ CCWR_FLASH,
+ CCWR_BUF_ALLOC,
+ CCWR_BUF_FREE,
+ CCWR_FLASH_WRITE,
+ CCWR_INIT, /* WARNING: Don't move this ever again! */
+
+
+
+ /* Add new IDs here */
+
+
+
+ /*
+ * WARNING: CCWR_LAST must always be the last verbs id defined!
+ * All the preceding IDs are fixed, and must not change.
+ * You can add new IDs, but must not remove or reorder
+ * any IDs. If you do, YOU will ruin any hope of
+ * compatability between versions.
+ */
+ CCWR_LAST,
+
+ /*
+ * Start over at 1 so that arrays indexed by user wr id's
+ * begin at 1. This is OK since the verbs and user wr id's
+ * are always used on disjoint sets of queues.
+ */
+ /*
+ * The order of the CCWR_SEND_XX verbs must
+ * match the order of the RDMA_OPs
+ */
+ CCWR_SEND = 1,
+ CCWR_SEND_INV,
+ CCWR_SEND_SE,
+ CCWR_SEND_SE_INV,
+ CCWR_RDMA_WRITE,
+ CCWR_RDMA_READ,
+ CCWR_RDMA_READ_INV,
+ CCWR_MW_BIND,
+ CCWR_NSMR_FASTREG,
+ CCWR_STAG_INVALIDATE,
+ CCWR_RECV,
+ CCWR_NOP,
+ CCWR_UNIMPL,
+/* WARNING: This must always be the last user wr id defined! */
+};
+#define RDMA_SEND_OPCODE_FROM_WR_ID(x) (x+2)
+
+/*
+ * SQ/RQ Work Request Types
+ */
+enum c2_wr_type {
+ C2_WR_TYPE_SEND = CCWR_SEND,
+ C2_WR_TYPE_SEND_SE = CCWR_SEND_SE,
+ C2_WR_TYPE_SEND_INV = CCWR_SEND_INV,
+ C2_WR_TYPE_SEND_SE_INV = CCWR_SEND_SE_INV,
+ C2_WR_TYPE_RDMA_WRITE = CCWR_RDMA_WRITE,
+ C2_WR_TYPE_RDMA_READ = CCWR_RDMA_READ,
+ C2_WR_TYPE_RDMA_READ_INV_STAG = CCWR_RDMA_READ_INV,
+ C2_WR_TYPE_BIND_MW = CCWR_MW_BIND,
+ C2_WR_TYPE_FASTREG_NSMR = CCWR_NSMR_FASTREG,
+ C2_WR_TYPE_INV_STAG = CCWR_STAG_INVALIDATE,
+ C2_WR_TYPE_RECV = CCWR_RECV,
+ C2_WR_TYPE_NOP = CCWR_NOP,
+};
+
+struct c2_netaddr {
+ __be32 ip_addr;
+ __be32 netmask;
+ u32 mtu;
+};
+
+struct c2_route {
+ u32 ip_addr; /* 0 indicates the default route */
+ u32 netmask; /* netmask associated with dst */
+ u32 flags;
+ union {
+ u32 ipaddr; /* address of the nexthop interface */
+ u8 enaddr[6];
+ } nexthop;
+};
+
+/*
+ * A Scatter Gather Entry.
+ */
+struct c2_data_addr {
+ __be32 stag;
+ __be32 length;
+ __be64 to;
+};
+
+/*
+ * MR and MW flags used by the consumer, RI, and RNIC.
+ */
+enum c2_mm_flags {
+ MEM_REMOTE = 0x0001, /* allow mw binds with remote access. */
+ MEM_VA_BASED = 0x0002, /* Not Zero-based */
+ MEM_PBL_COMPLETE = 0x0004, /* PBL array is complete in this msg */
+ MEM_LOCAL_READ = 0x0008, /* allow local reads */
+ MEM_LOCAL_WRITE = 0x0010, /* allow local writes */
+ MEM_REMOTE_READ = 0x0020, /* allow remote reads */
+ MEM_REMOTE_WRITE = 0x0040, /* allow remote writes */
+ MEM_WINDOW_BIND = 0x0080, /* binds allowed */
+ MEM_SHARED = 0x0100, /* set if MR is shared */
+ MEM_STAG_VALID = 0x0200 /* set if STAG is in valid state */
+};
+
+/*
+ * CCIL API ACF flags defined in terms of the low level mem flags.
+ * This minimizes translation needed in the user API
+ */
+enum c2_acf {
+ C2_ACF_LOCAL_READ = MEM_LOCAL_READ,
+ C2_ACF_LOCAL_WRITE = MEM_LOCAL_WRITE,
+ C2_ACF_REMOTE_READ = MEM_REMOTE_READ,
+ C2_ACF_REMOTE_WRITE = MEM_REMOTE_WRITE,
+ C2_ACF_WINDOW_BIND = MEM_WINDOW_BIND
+};
+
+/*
+ * Image types of objects written to flash
+ */
+#define C2_FLASH_IMG_BITFILE 1
+#define C2_FLASH_IMG_OPTION_ROM 2
+#define C2_FLASH_IMG_VPD 3
+
+/*
+ * to fix bug 1815 we define the max size allowable of the
+ * terminate message (per the IETF spec).Refer to the IETF
+ * protocal specification, section 12.1.6, page 64)
+ * The message is prefixed by 20 types of DDP info.
+ *
+ * Then the message has 6 bytes for the terminate control
+ * and DDP segment length info plus a DDP header (either
+ * 14 or 18 byts) plus 28 bytes for the RDMA header.
+ * Thus the max size in:
+ * 20 + (6 + 18 + 28) = 72
+ */
+#define C2_MAX_TERMINATE_MESSAGE_SIZE (72)
+
+/*
+ * Build String Length. It must be the same as C2_BUILD_STR_LEN in ccil_api.h
+ */
+#define WR_BUILD_STR_LEN 64
+
+/*
+ * WARNING: All of these structs need to align any 64bit types on
+ * 64 bit boundaries! 64bit types include u64 and u64.
+ */
+
+/*
+ * Clustercore Work Request Header. Be sensitive to field layout
+ * and alignment.
+ */
+struct c2wr_hdr {
+ /* wqe_count is part of the cqe. It is put here so the
+ * adapter can write to it while the wr is pending without
+ * clobbering part of the wr. This word need not be dma'd
+ * from the host to adapter by libccil, but we copy it anyway
+ * to make the memcpy to the adapter better aligned.
+ */
+ __be32 wqe_count;
+
+ /* Put these fields next so that later 32- and 64-bit
+ * quantities are naturally aligned.
+ */
+ u8 id;
+ u8 result; /* adapter -> host */
+ u8 sge_count; /* host -> adapter */
+ u8 flags; /* host -> adapter */
+
+ u64 context;
+#ifdef CCMSGMAGIC
+ u32 magic;
+ u32 pad;
+#endif
+} __attribute__((packed));
+
+/*
+ *------------------------ RNIC ------------------------
+ */
+
+/*
+ * WR_RNIC_OPEN
+ */
+
+/*
+ * Flags for the RNIC WRs
+ */
+enum c2_rnic_flags {
+ RNIC_IRD_STATIC = 0x0001,
+ RNIC_ORD_STATIC = 0x0002,
+ RNIC_QP_STATIC = 0x0004,
+ RNIC_SRQ_SUPPORTED = 0x0008,
+ RNIC_PBL_BLOCK_MODE = 0x0010,
+ RNIC_SRQ_MODEL_ARRIVAL = 0x0020,
+ RNIC_CQ_OVF_DETECTED = 0x0040,
+ RNIC_PRIV_MODE = 0x0080
+};
+
+struct c2wr_rnic_open_req {
+ struct c2wr_hdr hdr;
+ u64 user_context;
+ __be16 flags; /* See enum c2_rnic_flags */
+ __be16 port_num;
+} __attribute__((packed));
+
+struct c2wr_rnic_open_rep {
+ struct c2wr_hdr hdr;
+ u32 rnic_handle;
+} __attribute__((packed));
+
+union c2wr_rnic_open {
+ struct c2wr_rnic_open_req req;
+ struct c2wr_rnic_open_rep rep;
+} __attribute__((packed));
+
+struct c2wr_rnic_query_req {
+ struct c2wr_hdr hdr;
+ u32 rnic_handle;
+} __attribute__((packed));
+
+/*
+ * WR_RNIC_QUERY
+ */
+struct c2wr_rnic_query_rep {
+ struct c2wr_hdr hdr;
+ u64 user_context;
+ __be32 vendor_id;
+ __be32 part_number;
+ __be32 hw_version;
+ __be32 fw_ver_major;
+ __be32 fw_ver_minor;
+ __be32 fw_ver_patch;
+ char fw_ver_build_str[WR_BUILD_STR_LEN];
+ __be32 max_qps;
+ __be32 max_qp_depth;
+ u32 max_srq_depth;
+ u32 max_send_sgl_depth;
+ u32 max_rdma_sgl_depth;
+ __be32 max_cqs;
+ __be32 max_cq_depth;
+ u32 max_cq_event_handlers;
+ __be32 max_mrs;
+ u32 max_pbl_depth;
+ __be32 max_pds;
+ __be32 max_global_ird;
+ u32 max_global_ord;
+ __be32 max_qp_ird;
+ __be32 max_qp_ord;
+ u32 flags;
+ __be32 max_mws;
+ u32 pbe_range_low;
+ u32 pbe_range_high;
+ u32 max_srqs;
+ u32 page_size;
+} __attribute__((packed));
+
+union c2wr_rnic_query {
+ struct c2wr_rnic_query_req req;
+ struct c2wr_rnic_query_rep rep;
+} __attribute__((packed));
+
+/*
+ * WR_RNIC_GETCONFIG
+ */
+
+struct c2wr_rnic_getconfig_req {
+ struct c2wr_hdr hdr;
+ u32 rnic_handle;
+ u32 option; /* see c2_getconfig_cmd_t */
+ u64 reply_buf;
+ u32 reply_buf_len;
+} __attribute__((packed)) ;
+
+struct c2wr_rnic_getconfig_rep {
+ struct c2wr_hdr hdr;
+ u32 option; /* see c2_getconfig_cmd_t */
+ u32 count_len; /* length of the number of addresses configured */
+} __attribute__((packed)) ;
+
+union c2wr_rnic_getconfig {
+ struct c2wr_rnic_getconfig_req req;
+ struct c2wr_rnic_getconfig_rep rep;
+} __attribute__((packed)) ;
+
+/*
+ * WR_RNIC_SETCONFIG
+ */
+struct c2wr_rnic_setconfig_req {
+ struct c2wr_hdr hdr;
+ u32 rnic_handle;
+ __be32 option; /* See c2_setconfig_cmd_t */
+ /* variable data and pad. See c2_netaddr and c2_route */
+ u8 data[0];
+} __attribute__((packed)) ;
+
+struct c2wr_rnic_setconfig_rep {
+ struct c2wr_hdr hdr;
+} __attribute__((packed)) ;
+
+union c2wr_rnic_setconfig {
+ struct c2wr_rnic_setconfig_req req;
+ struct c2wr_rnic_setconfig_rep rep;
+} __attribute__((packed)) ;
+
+/*
+ * WR_RNIC_CLOSE
+ */
+struct c2wr_rnic_close_req {
+ struct c2wr_hdr hdr;
+ u32 rnic_handle;
+} __attribute__((packed)) ;
+
+struct c2wr_rnic_close_rep {
+ struct c2wr_hdr hdr;
+} __attribute__((packed)) ;
+
+union c2wr_rnic_close {
+ struct c2wr_rnic_close_req req;
+ struct c2wr_rnic_close_rep rep;
+} __attribute__((packed)) ;
+
+/*
+ *------------------------ CQ ------------------------
+ */
+struct c2wr_cq_create_req {
+ struct c2wr_hdr hdr;
+ __be64 shared_ht;
+ u64 user_context;
+ __be64 msg_pool;
+ u32 rnic_handle;
+ __be32 msg_size;
+ __be32 depth;
+} __attribute__((packed)) ;
+
+struct c2wr_cq_create_rep {
+ struct c2wr_hdr hdr;
+ __be32 mq_index;
+ __be32 adapter_shared;
+ u32 cq_handle;
+} __attribute__((packed)) ;
+
+union c2wr_cq_create {
+ struct c2wr_cq_create_req req;
+ struct c2wr_cq_create_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_cq_modify_req {
+ struct c2wr_hdr hdr;
+ u32 rnic_handle;
+ u32 cq_handle;
+ u32 new_depth;
+ u64 new_msg_pool;
+} __attribute__((packed)) ;
+
+struct c2wr_cq_modify_rep {
+ struct c2wr_hdr hdr;
+} __attribute__((packed)) ;
+
+union c2wr_cq_modify {
+ struct c2wr_cq_modify_req req;
+ struct c2wr_cq_modify_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_cq_destroy_req {
+ struct c2wr_hdr hdr;
+ u32 rnic_handle;
+ u32 cq_handle;
+} __attribute__((packed)) ;
+
+struct c2wr_cq_destroy_rep {
+ struct c2wr_hdr hdr;
+} __attribute__((packed)) ;
+
+union c2wr_cq_destroy {
+ struct c2wr_cq_destroy_req req;
+ struct c2wr_cq_destroy_rep rep;
+} __attribute__((packed)) ;
+
+/*
+ *------------------------ PD ------------------------
+ */
+struct c2wr_pd_alloc_req {
+ struct c2wr_hdr hdr;
+ u32 rnic_handle;
+ u32 pd_id;
+} __attribute__((packed)) ;
+
+struct c2wr_pd_alloc_rep {
+ struct c2wr_hdr hdr;
+} __attribute__((packed)) ;
+
+union c2wr_pd_alloc {
+ struct c2wr_pd_alloc_req req;
+ struct c2wr_pd_alloc_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_pd_dealloc_req {
+ struct c2wr_hdr hdr;
+ u32 rnic_handle;
+ u32 pd_id;
+} __attribute__((packed)) ;
+
+struct c2wr_pd_dealloc_rep {
+ struct c2wr_hdr hdr;
+} __attribute__((packed)) ;
+
+union c2wr_pd_dealloc {
+ struct c2wr_pd_dealloc_req req;
+ struct c2wr_pd_dealloc_rep rep;
+} __attribute__((packed)) ;
+
+/*
+ *------------------------ SRQ ------------------------
+ */
+struct c2wr_srq_create_req {
+ struct c2wr_hdr hdr;
+ u64 shared_ht;
+ u64 user_context;
+ u32 rnic_handle;
+ u32 srq_depth;
+ u32 srq_limit;
+ u32 sgl_depth;
+ u32 pd_id;
+} __attribute__((packed)) ;
+
+struct c2wr_srq_create_rep {
+ struct c2wr_hdr hdr;
+ u32 srq_depth;
+ u32 sgl_depth;
+ u32 msg_size;
+ u32 mq_index;
+ u32 mq_start;
+ u32 srq_handle;
+} __attribute__((packed)) ;
+
+union c2wr_srq_create {
+ struct c2wr_srq_create_req req;
+ struct c2wr_srq_create_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_srq_destroy_req {
+ struct c2wr_hdr hdr;
+ u32 rnic_handle;
+ u32 srq_handle;
+} __attribute__((packed)) ;
+
+struct c2wr_srq_destroy_rep {
+ struct c2wr_hdr hdr;
+} __attribute__((packed)) ;
+
+union c2wr_srq_destroy {
+ struct c2wr_srq_destroy_req req;
+ struct c2wr_srq_destroy_rep rep;
+} __attribute__((packed)) ;
+
+/*
+ *------------------------ QP ------------------------
+ */
+enum c2wr_qp_flags {
+ QP_RDMA_READ = 0x00000001, /* RDMA read enabled? */
+ QP_RDMA_WRITE = 0x00000002, /* RDMA write enabled? */
+ QP_MW_BIND = 0x00000004, /* MWs enabled */
+ QP_ZERO_STAG = 0x00000008, /* enabled? */
+ QP_REMOTE_TERMINATION = 0x00000010, /* remote end terminated */
+ QP_RDMA_READ_RESPONSE = 0x00000020 /* Remote RDMA read */
+ /* enabled? */
+};
+
+struct c2wr_qp_create_req {
+ struct c2wr_hdr hdr;
+ __be64 shared_sq_ht;
+ __be64 shared_rq_ht;
+ u64 user_context;
+ u32 rnic_handle;
+ u32 sq_cq_handle;
+ u32 rq_cq_handle;
+ __be32 sq_depth;
+ __be32 rq_depth;
+ u32 srq_handle;
+ u32 srq_limit;
+ __be32 flags; /* see enum c2wr_qp_flags */
+ __be32 send_sgl_depth;
+ __be32 recv_sgl_depth;
+ __be32 rdma_write_sgl_depth;
+ __be32 ord;
+ __be32 ird;
+ u32 pd_id;
+} __attribute__((packed)) ;
+
+struct c2wr_qp_create_rep {
+ struct c2wr_hdr hdr;
+ __be32 sq_depth;
+ __be32 rq_depth;
+ u32 send_sgl_depth;
+ u32 recv_sgl_depth;
+ u32 rdma_write_sgl_depth;
+ u32 ord;
+ u32 ird;
+ __be32 sq_msg_size;
+ __be32 sq_mq_index;
+ __be32 sq_mq_start;
+ __be32 rq_msg_size;
+ __be32 rq_mq_index;
+ __be32 rq_mq_start;
+ u32 qp_handle;
+} __attribute__((packed)) ;
+
+union c2wr_qp_create {
+ struct c2wr_qp_create_req req;
+ struct c2wr_qp_create_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_qp_query_req {
+ struct c2wr_hdr hdr;
+ u32 rnic_handle;
+ u32 qp_handle;
+} __attribute__((packed)) ;
+
+struct c2wr_qp_query_rep {
+ struct c2wr_hdr hdr;
+ u64 user_context;
+ u32 rnic_handle;
+ u32 sq_depth;
+ u32 rq_depth;
+ u32 send_sgl_depth;
+ u32 rdma_write_sgl_depth;
+ u32 recv_sgl_depth;
+ u32 ord;
+ u32 ird;
+ u16 qp_state;
+ u16 flags; /* see c2wr_qp_flags_t */
+ u32 qp_id;
+ u32 local_addr;
+ u32 remote_addr;
+ u16 local_port;
+ u16 remote_port;
+ u32 terminate_msg_length; /* 0 if not present */
+ u8 data[0];
+ /* Terminate Message in-line here. */
+} __attribute__((packed)) ;
+
+union c2wr_qp_query {
+ struct c2wr_qp_query_req req;
+ struct c2wr_qp_query_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_qp_modify_req {
+ struct c2wr_hdr hdr;
+ u64 stream_msg;
+ u32 stream_msg_length;
+ u32 rnic_handle;
+ u32 qp_handle;
+ __be32 next_qp_state;
+ __be32 ord;
+ __be32 ird;
+ __be32 sq_depth;
+ __be32 rq_depth;
+ u32 llp_ep_handle;
+} __attribute__((packed)) ;
+
+struct c2wr_qp_modify_rep {
+ struct c2wr_hdr hdr;
+ u32 ord;
+ u32 ird;
+ u32 sq_depth;
+ u32 rq_depth;
+ u32 sq_msg_size;
+ u32 sq_mq_index;
+ u32 sq_mq_start;
+ u32 rq_msg_size;
+ u32 rq_mq_index;
+ u32 rq_mq_start;
+} __attribute__((packed)) ;
+
+union c2wr_qp_modify {
+ struct c2wr_qp_modify_req req;
+ struct c2wr_qp_modify_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_qp_destroy_req {
+ struct c2wr_hdr hdr;
+ u32 rnic_handle;
+ u32 qp_handle;
+} __attribute__((packed)) ;
+
+struct c2wr_qp_destroy_rep {
+ struct c2wr_hdr hdr;
+} __attribute__((packed)) ;
+
+union c2wr_qp_destroy {
+ struct c2wr_qp_destroy_req req;
+ struct c2wr_qp_destroy_rep rep;
+} __attribute__((packed)) ;
+
+/*
+ * The CCWR_QP_CONNECT msg is posted on the verbs request queue. It can
+ * only be posted when a QP is in IDLE state. After the connect request is
+ * submitted to the LLP, the adapter moves the QP to CONNECT_PENDING state.
+ * No synchronous reply from adapter to this WR. The results of
+ * connection are passed back in an async event CCAE_ACTIVE_CONNECT_RESULTS
+ * See c2wr_ae_active_connect_results_t
+ */
+struct c2wr_qp_connect_req {
+ struct c2wr_hdr hdr;
+ u32 rnic_handle;
+ u32 qp_handle;
+ __be32 remote_addr;
+ __be16 remote_port;
+ u16 pad;
+ __be32 private_data_length;
+ u8 private_data[0]; /* Private data in-line. */
+} __attribute__((packed)) ;
+
+struct c2wr_qp_connect {
+ struct c2wr_qp_connect_req req;
+ /* no synchronous reply. */
+} __attribute__((packed)) ;
+
+
+/*
+ *------------------------ MM ------------------------
+ */
+
+struct c2wr_nsmr_stag_alloc_req {
+ struct c2wr_hdr hdr;
+ u32 rnic_handle;
+ u32 pbl_depth;
+ u32 pd_id;
+ u32 flags;
+} __attribute__((packed)) ;
+
+struct c2wr_nsmr_stag_alloc_rep {
+ struct c2wr_hdr hdr;
+ u32 pbl_depth;
+ u32 stag_index;
+} __attribute__((packed)) ;
+
+union c2wr_nsmr_stag_alloc {
+ struct c2wr_nsmr_stag_alloc_req req;
+ struct c2wr_nsmr_stag_alloc_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_nsmr_register_req {
+ struct c2wr_hdr hdr;
+ __be64 va;
+ u32 rnic_handle;
+ __be16 flags;
+ u8 stag_key;
+ u8 pad;
+ u32 pd_id;
+ __be32 pbl_depth;
+ __be32 pbe_size;
+ __be32 fbo;
+ __be32 length;
+ __be32 addrs_length;
+ /* array of paddrs (must be aligned on a 64bit boundary) */
+ __be64 paddrs[0];
+} __attribute__((packed)) ;
+
+struct c2wr_nsmr_register_rep {
+ struct c2wr_hdr hdr;
+ u32 pbl_depth;
+ __be32 stag_index;
+} __attribute__((packed)) ;
+
+union c2wr_nsmr_register {
+ struct c2wr_nsmr_register_req req;
+ struct c2wr_nsmr_register_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_nsmr_pbl_req {
+ struct c2wr_hdr hdr;
+ u32 rnic_handle;
+ __be32 flags;
+ __be32 stag_index;
+ __be32 addrs_length;
+ /* array of paddrs (must be aligned on a 64bit boundary) */
+ __be64 paddrs[0];
+} __attribute__((packed)) ;
+
+struct c2wr_nsmr_pbl_rep {
+ struct c2wr_hdr hdr;
+} __attribute__((packed)) ;
+
+union c2wr_nsmr_pbl {
+ struct c2wr_nsmr_pbl_req req;
+ struct c2wr_nsmr_pbl_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_mr_query_req {
+ struct c2wr_hdr hdr;
+ u32 rnic_handle;
+ u32 stag_index;
+} __attribute__((packed)) ;
+
+struct c2wr_mr_query_rep {
+ struct c2wr_hdr hdr;
+ u8 stag_key;
+ u8 pad[3];
+ u32 pd_id;
+ u32 flags;
+ u32 pbl_depth;
+} __attribute__((packed)) ;
+
+union c2wr_mr_query {
+ struct c2wr_mr_query_req req;
+ struct c2wr_mr_query_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_mw_query_req {
+ struct c2wr_hdr hdr;
+ u32 rnic_handle;
+ u32 stag_index;
+} __attribute__((packed)) ;
+
+struct c2wr_mw_query_rep {
+ struct c2wr_hdr hdr;
+ u8 stag_key;
+ u8 pad[3];
+ u32 pd_id;
+ u32 flags;
+} __attribute__((packed)) ;
+
+union c2wr_mw_query {
+ struct c2wr_mw_query_req req;
+ struct c2wr_mw_query_rep rep;
+} __attribute__((packed)) ;
+
+
+struct c2wr_stag_dealloc_req {
+ struct c2wr_hdr hdr;
+ u32 rnic_handle;
+ __be32 stag_index;
+} __attribute__((packed)) ;
+
+struct c2wr_stag_dealloc_rep {
+ struct c2wr_hdr hdr;
+} __attribute__((packed)) ;
+
+union c2wr_stag_dealloc {
+ struct c2wr_stag_dealloc_req req;
+ struct c2wr_stag_dealloc_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_nsmr_reregister_req {
+ struct c2wr_hdr hdr;
+ u64 va;
+ u32 rnic_handle;
+ u16 flags;
+ u8 stag_key;
+ u8 pad;
+ u32 stag_index;
+ u32 pd_id;
+ u32 pbl_depth;
+ u32 pbe_size;
+ u32 fbo;
+ u32 length;
+ u32 addrs_length;
+ u32 pad1;
+ /* array of paddrs (must be aligned on a 64bit boundary) */
+ u64 paddrs[0];
+} __attribute__((packed)) ;
+
+struct c2wr_nsmr_reregister_rep {
+ struct c2wr_hdr hdr;
+ u32 pbl_depth;
+ u32 stag_index;
+} __attribute__((packed)) ;
+
+union c2wr_nsmr_reregister {
+ struct c2wr_nsmr_reregister_req req;
+ struct c2wr_nsmr_reregister_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_smr_register_req {
+ struct c2wr_hdr hdr;
+ u64 va;
+ u32 rnic_handle;
+ u16 flags;
+ u8 stag_key;
+ u8 pad;
+ u32 stag_index;
+ u32 pd_id;
+} __attribute__((packed)) ;
+
+struct c2wr_smr_register_rep {
+ struct c2wr_hdr hdr;
+ u32 stag_index;
+} __attribute__((packed)) ;
+
+union c2wr_smr_register {
+ struct c2wr_smr_register_req req;
+ struct c2wr_smr_register_rep rep;
+} __attribute__((packed)) ;
+
+struct c2wr_mw_alloc_req {
+ struct c2wr_hdr hdr;
+ u32 rnic_handle;
+ u32 pd_id;
+} __attribute__((packed)) ;
+
+struct c2wr_mw_alloc_rep {
+ struct c2wr_hdr hdr;
+ u32 stag_index;
+} __attribute__((packed)) ;
+
+union c2wr_mw_alloc {
+ struct c2wr_mw_alloc_req req;
+ struct c2wr_mw_alloc_rep rep;
+} __attribute__((packed)) ;
+
+/*
+ *------------------------ WRs -----------------------
+ */
+
+struct c2wr_user_hdr {
+ struct c2wr_hdr hdr; /* Has status and WR Type */
+} __attribute__((packed)) ;
+
+enum c2_qp_state {
+ C2_QP_STATE_IDLE = 0x01,
+ C2_QP_STATE_CONNECTING = 0x02,
+ C2_QP_STATE_RTS = 0x04,
+ C2_QP_STATE_CLOSING = 0x08,
+ C2_QP_STATE_TERMINATE = 0x10,
+ C2_QP_STATE_ERROR = 0x20,
+};
+
+/* Completion queue entry. */
+struct c2wr_ce {
+ struct c2wr_hdr hdr; /* Has status and WR Type */
+ u64 qp_user_context; /* c2_user_qp_t * */
+ u32 qp_state; /* Current QP State */
+ u32 handle; /* QPID or EP Handle */
+ __be32 bytes_rcvd; /* valid for RECV WCs */
+ u32 stag;
+} __attribute__((packed)) ;
+
+
+/*
+ * Flags used for all post-sq WRs. These must fit in the flags
+ * field of the struct c2wr_hdr (eight bits).
+ */
+enum {
+ SQ_SIGNALED = 0x01,
+ SQ_READ_FENCE = 0x02,
+ SQ_FENCE = 0x04,
+};
+
+/*
+ * Common fields for all post-sq WRs. Namely the standard header and a
+ * secondary header with fields common to all post-sq WRs.
+ */
+struct c2_sq_hdr {
+ struct c2wr_user_hdr user_hdr;
+} __attribute__((packed));
+
+/*
+ * Same as above but for post-rq WRs.
+ */
+struct c2_rq_hdr {
+ struct c2wr_user_hdr user_hdr;
+} __attribute__((packed));
+
+/*
+ * use the same struct for all sends.
+ */
+struct c2wr_send_req {
+ struct c2_sq_hdr sq_hdr;
+ __be32 sge_len;
+ __be32 remote_stag;
+ u8 data[0]; /* SGE array */
+} __attribute__((packed));
+
+union c2wr_send {
+ struct c2wr_send_req req;
+ struct c2wr_ce rep;
+} __attribute__((packed));
+
+struct c2wr_rdma_write_req {
+ struct c2_sq_hdr sq_hdr;
+ __be64 remote_to;
+ __be32 remote_stag;
+ __be32 sge_len;
+ u8 data[0]; /* SGE array */
+} __attribute__((packed));
+
+union c2wr_rdma_write {
+ struct c2wr_rdma_write_req req;
+ struct c2wr_ce rep;
+} __attribute__((packed));
+
+struct c2wr_rdma_read_req {
+ struct c2_sq_hdr sq_hdr;
+ __be64 local_to;
+ __be64 remote_to;
+ __be32 local_stag;
+ __be32 remote_stag;
+ __be32 length;
+} __attribute__((packed));
+
+union c2wr_rdma_read {
+ struct c2wr_rdma_read_req req;
+ struct c2wr_ce rep;
+} __attribute__((packed));
+
+struct c2wr_mw_bind_req {
+ struct c2_sq_hdr sq_hdr;
+ u64 va;
+ u8 stag_key;
+ u8 pad[3];
+ u32 mw_stag_index;
+ u32 mr_stag_index;
+ u32 length;
+ u32 flags;
+} __attribute__((packed));
+
+union c2wr_mw_bind {
+ struct c2wr_mw_bind_req req;
+ struct c2wr_ce rep;
+} __attribute__((packed));
+
+struct c2wr_nsmr_fastreg_req {
+ struct c2_sq_hdr sq_hdr;
+ u64 va;
+ u8 stag_key;
+ u8 pad[3];
+ u32 stag_index;
+ u32 pbe_size;
+ u32 fbo;
+ u32 length;
+ u32 addrs_length;
+ /* array of paddrs (must be aligned on a 64bit boundary) */
+ u64 paddrs[0];
+} __attribute__((packed));
+
+union c2wr_nsmr_fastreg {
+ struct c2wr_nsmr_fastreg_req req;
+ struct c2wr_ce rep;
+} __attribute__((packed));
+
+struct c2wr_stag_invalidate_req {
+ struct c2_sq_hdr sq_hdr;
+ u8 stag_key;
+ u8 pad[3];
+ u32 stag_index;
+} __attribute__((packed));
+
+union c2wr_stag_invalidate {
+ struct c2wr_stag_invalidate_req req;
+ struct c2wr_ce rep;
+} __attribute__((packed));
+
+union c2wr_sqwr {
+ struct c2_sq_hdr sq_hdr;
+ struct c2wr_send_req send;
+ struct c2wr_send_req send_se;
+ struct c2wr_send_req send_inv;
+ struct c2wr_send_req send_se_inv;
+ struct c2wr_rdma_write_req rdma_write;
+ struct c2wr_rdma_read_req rdma_read;
+ struct c2wr_mw_bind_req mw_bind;
+ struct c2wr_nsmr_fastreg_req nsmr_fastreg;
+ struct c2wr_stag_invalidate_req stag_inv;
+} __attribute__((packed));
+
+
+/*
+ * RQ WRs
+ */
+struct c2wr_rqwr {
+ struct c2_rq_hdr rq_hdr;
+ u8 data[0]; /* array of SGEs */
+} __attribute__((packed));
+
+union c2wr_recv {
+ struct c2wr_rqwr req;
+ struct c2wr_ce rep;
+} __attribute__((packed));
+
+/*
+ * All AEs start with this header. Most AEs only need to convey the
+ * information in the header. Some, like LLP connection events, need
+ * more info. The union typdef c2wr_ae_t has all the possible AEs.
+ *
+ * hdr.context is the user_context from the rnic_open WR. NULL If this
+ * is not affiliated with an rnic
+ *
+ * hdr.id is the AE identifier (eg; CCAE_REMOTE_SHUTDOWN,
+ * CCAE_LLP_CLOSE_COMPLETE)
+ *
+ * resource_type is one of: C2_RES_IND_QP, C2_RES_IND_CQ, C2_RES_IND_SRQ
+ *
+ * user_context is the context passed down when the host created the resource.
+ */
+struct c2wr_ae_hdr {
+ struct c2wr_hdr hdr;
+ u64 user_context; /* user context for this res. */
+ __be32 resource_type; /* see enum c2_resource_indicator */
+ __be32 resource; /* handle for resource */
+ __be32 qp_state; /* current QP State */
+} __attribute__((packed));
+
+/*
+ * After submitting the CCAE_ACTIVE_CONNECT_RESULTS message on the AEQ,
+ * the adapter moves the QP into RTS state
+ */
+struct c2wr_ae_active_connect_results {
+ struct c2wr_ae_hdr ae_hdr;
+ __be32 laddr;
+ __be32 raddr;
+ __be16 lport;
+ __be16 rport;
+ __be32 private_data_length;
+ u8 private_data[0]; /* data is in-line in the msg. */
+} __attribute__((packed));
+
+/*
+ * When connections are established by the stack (and the private data
+ * MPA frame is received), the adapter will generate an event to the host.
+ * The details of the connection, any private data, and the new connection
+ * request handle is passed up via the CCAE_CONNECTION_REQUEST msg on the
+ * AE queue:
+ */
+struct c2wr_ae_connection_request {
+ struct c2wr_ae_hdr ae_hdr;
+ u32 cr_handle; /* connreq handle (sock ptr) */
+ __be32 laddr;
+ __be32 raddr;
+ __be16 lport;
+ __be16 rport;
+ __be32 private_data_length;
+ u8 private_data[0]; /* data is in-line in the msg. */
+} __attribute__((packed));
+
+union c2wr_ae {
+ struct c2wr_ae_hdr ae_generic;
+ struct c2wr_ae_active_connect_results ae_active_connect_results;
+ struct c2wr_ae_connection_request ae_connection_request;
+} __attribute__((packed));
+
+struct c2wr_init_req {
+ struct c2wr_hdr hdr;
+ __be64 hint_count;
+ __be64 q0_host_shared;
+ __be64 q1_host_shared;
+ __be64 q1_host_msg_pool;
+ __be64 q2_host_shared;
+ __be64 q2_host_msg_pool;
+} __attribute__((packed));
+
+struct c2wr_init_rep {
+ struct c2wr_hdr hdr;
+} __attribute__((packed));
+
+union c2wr_init {
+ struct c2wr_init_req req;
+ struct c2wr_init_rep rep;
+} __attribute__((packed));
+
+/*
+ * For upgrading flash.
+ */
+
+struct c2wr_flash_init_req {
+ struct c2wr_hdr hdr;
+ u32 rnic_handle;
+} __attribute__((packed));
+
+struct c2wr_flash_init_rep {
+ struct c2wr_hdr hdr;
+ u32 adapter_flash_buf_offset;
+ u32 adapter_flash_len;
+} __attribute__((packed));
+
+union c2wr_flash_init {
+ struct c2wr_flash_init_req req;
+ struct c2wr_flash_init_rep rep;
+} __attribute__((packed));
+
+struct c2wr_flash_req {
+ struct c2wr_hdr hdr;
+ u32 rnic_handle;
+ u32 len;
+} __attribute__((packed));
+
+struct c2wr_flash_rep {
+ struct c2wr_hdr hdr;
+ u32 status;
+} __attribute__((packed));
+
+union c2wr_flash {
+ struct c2wr_flash_req req;
+ struct c2wr_flash_rep rep;
+} __attribute__((packed));
+
+struct c2wr_buf_alloc_req {
+ struct c2wr_hdr hdr;
+ u32 rnic_handle;
+ u32 size;
+} __attribute__((packed));
+
+struct c2wr_buf_alloc_rep {
+ struct c2wr_hdr hdr;
+ u32 offset; /* 0 if mem not available */
+ u32 size; /* 0 if mem not available */
+} __attribute__((packed));
+
+union c2wr_buf_alloc {
+ struct c2wr_buf_alloc_req req;
+ struct c2wr_buf_alloc_rep rep;
+} __attribute__((packed));
+
+struct c2wr_buf_free_req {
+ struct c2wr_hdr hdr;
+ u32 rnic_handle;
+ u32 offset; /* Must match value from alloc */
+ u32 size; /* Must match value from alloc */
+} __attribute__((packed));
+
+struct c2wr_buf_free_rep {
+ struct c2wr_hdr hdr;
+} __attribute__((packed));
+
+union c2wr_buf_free {
+ struct c2wr_buf_free_req req;
+ struct c2wr_ce rep;
+} __attribute__((packed));
+
+struct c2wr_flash_write_req {
+ struct c2wr_hdr hdr;
+ u32 rnic_handle;
+ u32 offset;
+ u32 size;
+ u32 type;
+ u32 flags;
+} __attribute__((packed));
+
+struct c2wr_flash_write_rep {
+ struct c2wr_hdr hdr;
+ u32 status;
+} __attribute__((packed));
+
+union c2wr_flash_write {
+ struct c2wr_flash_write_req req;
+ struct c2wr_flash_write_rep rep;
+} __attribute__((packed));
+
+/*
+ * Messages for LLP connection setup.
+ */
+
+/*
+ * Listen Request. This allocates a listening endpoint to allow passive
+ * connection setup. Newly established LLP connections are passed up
+ * via an AE. See c2wr_ae_connection_request_t
+ */
+struct c2wr_ep_listen_create_req {
+ struct c2wr_hdr hdr;
+ u64 user_context; /* returned in AEs. */
+ u32 rnic_handle;
+ __be32 local_addr; /* local addr, or 0 */
+ __be16 local_port; /* 0 means "pick one" */
+ u16 pad;
+ __be32 backlog; /* tradional tcp listen bl */
+} __attribute__((packed));
+
+struct c2wr_ep_listen_create_rep {
+ struct c2wr_hdr hdr;
+ u32 ep_handle; /* handle to new listening ep */
+ u16 local_port; /* resulting port... */
+ u16 pad;
+} __attribute__((packed));
+
+union c2wr_ep_listen_create {
+ struct c2wr_ep_listen_create_req req;
+ struct c2wr_ep_listen_create_rep rep;
+} __attribute__((packed));
+
+struct c2wr_ep_listen_destroy_req {
+ struct c2wr_hdr hdr;
+ u32 rnic_handle;
+ u32 ep_handle;
+} __attribute__((packed));
+
+struct c2wr_ep_listen_destroy_rep {
+ struct c2wr_hdr hdr;
+} __attribute__((packed));
+
+union c2wr_ep_listen_destroy {
+ struct c2wr_ep_listen_destroy_req req;
+ struct c2wr_ep_listen_destroy_rep rep;
+} __attribute__((packed));
+
+struct c2wr_ep_query_req {
+ struct c2wr_hdr hdr;
+ u32 rnic_handle;
+ u32 ep_handle;
+} __attribute__((packed));
+
+struct c2wr_ep_query_rep {
+ struct c2wr_hdr hdr;
+ u32 rnic_handle;
+ u32 local_addr;
+ u32 remote_addr;
+ u16 local_port;
+ u16 remote_port;
+} __attribute__((packed));
+
+union c2wr_ep_query {
+ struct c2wr_ep_query_req req;
+ struct c2wr_ep_query_rep rep;
+} __attribute__((packed));
+
+
+/*
+ * The host passes this down to indicate acceptance of a pending iWARP
+ * connection. The cr_handle was obtained from the CONNECTION_REQUEST
+ * AE passed up by the adapter. See c2wr_ae_connection_request_t.
+ */
+struct c2wr_cr_accept_req {
+ struct c2wr_hdr hdr;
+ u32 rnic_handle;
+ u32 qp_handle; /* QP to bind to this LLP conn */
+ u32 ep_handle; /* LLP handle to accept */
+ __be32 private_data_length;
+ u8 private_data[0]; /* data in-line in msg. */
+} __attribute__((packed));
+
+/*
+ * adapter sends reply when private data is successfully submitted to
+ * the LLP.
+ */
+struct c2wr_cr_accept_rep {
+ struct c2wr_hdr hdr;
+} __attribute__((packed));
+
+union c2wr_cr_accept {
+ struct c2wr_cr_accept_req req;
+ struct c2wr_cr_accept_rep rep;
+} __attribute__((packed));
+
+/*
+ * The host sends this down if a given iWARP connection request was
+ * rejected by the consumer. The cr_handle was obtained from a
+ * previous c2wr_ae_connection_request_t AE sent by the adapter.
+ */
+struct c2wr_cr_reject_req {
+ struct c2wr_hdr hdr;
+ u32 rnic_handle;
+ u32 ep_handle; /* LLP handle to reject */
+} __attribute__((packed));
+
+/*
+ * Dunno if this is needed, but we'll add it for now. The adapter will
+ * send the reject_reply after the LLP endpoint has been destroyed.
+ */
+struct c2wr_cr_reject_rep {
+ struct c2wr_hdr hdr;
+} __attribute__((packed));
+
+union c2wr_cr_reject {
+ struct c2wr_cr_reject_req req;
+ struct c2wr_cr_reject_rep rep;
+} __attribute__((packed));
+
+/*
+ * console command. Used to implement a debug console over the verbs
+ * request and reply queues.
+ */
+
+/*
+ * Console request message. It contains:
+ * - message hdr with id = CCWR_CONSOLE
+ * - the physaddr/len of host memory to be used for the reply.
+ * - the command string. eg: "netstat -s" or "zoneinfo"
+ */
+struct c2wr_console_req {
+ struct c2wr_hdr hdr; /* id = CCWR_CONSOLE */
+ u64 reply_buf; /* pinned host buf for reply */
+ u32 reply_buf_len; /* length of reply buffer */
+ u8 command[0]; /* NUL terminated ascii string */
+ /* containing the command req */
+} __attribute__((packed));
+
+/*
+ * flags used in the console reply.
+ */
+enum c2_console_flags {
+ CONS_REPLY_TRUNCATED = 0x00000001 /* reply was truncated */
+} __attribute__((packed));
+
+/*
+ * Console reply message.
+ * hdr.result contains the c2_status_t error if the reply was _not_ generated,
+ * or C2_OK if the reply was generated.
+ */
+struct c2wr_console_rep {
+ struct c2wr_hdr hdr; /* id = CCWR_CONSOLE */
+ u32 flags;
+} __attribute__((packed));
+
+union c2wr_console {
+ struct c2wr_console_req req;
+ struct c2wr_console_rep rep;
+} __attribute__((packed));
+
+
+/*
+ * Giant union with all WRs. Makes life easier...
+ */
+union c2wr {
+ struct c2wr_hdr hdr;
+ struct c2wr_user_hdr user_hdr;
+ union c2wr_rnic_open rnic_open;
+ union c2wr_rnic_query rnic_query;
+ union c2wr_rnic_getconfig rnic_getconfig;
+ union c2wr_rnic_setconfig rnic_setconfig;
+ union c2wr_rnic_close rnic_close;
+ union c2wr_cq_create cq_create;
+ union c2wr_cq_modify cq_modify;
+ union c2wr_cq_destroy cq_destroy;
+ union c2wr_pd_alloc pd_alloc;
+ union c2wr_pd_dealloc pd_dealloc;
+ union c2wr_srq_create srq_create;
+ union c2wr_srq_destroy srq_destroy;
+ union c2wr_qp_create qp_create;
+ union c2wr_qp_query qp_query;
+ union c2wr_qp_modify qp_modify;
+ union c2wr_qp_destroy qp_destroy;
+ struct c2wr_qp_connect qp_connect;
+ union c2wr_nsmr_stag_alloc nsmr_stag_alloc;
+ union c2wr_nsmr_register nsmr_register;
+ union c2wr_nsmr_pbl nsmr_pbl;
+ union c2wr_mr_query mr_query;
+ union c2wr_mw_query mw_query;
+ union c2wr_stag_dealloc stag_dealloc;
+ union c2wr_sqwr sqwr;
+ struct c2wr_rqwr rqwr;
+ struct c2wr_ce ce;
+ union c2wr_ae ae;
+ union c2wr_init init;
+ union c2wr_ep_listen_create ep_listen_create;
+ union c2wr_ep_listen_destroy ep_listen_destroy;
+ union c2wr_cr_accept cr_accept;
+ union c2wr_cr_reject cr_reject;
+ union c2wr_console console;
+ union c2wr_flash_init flash_init;
+ union c2wr_flash flash;
+ union c2wr_buf_alloc buf_alloc;
+ union c2wr_buf_free buf_free;
+ union c2wr_flash_write flash_write;
+} __attribute__((packed));
+
+
+/*
+ * Accessors for the wr fields that are packed together tightly to
+ * reduce the wr message size. The wr arguments are void* so that
+ * either a struct c2wr*, a struct c2wr_hdr*, or a pointer to any of the types
+ * in the struct c2wr union can be passed in.
+ */
+static __inline__ u8 c2_wr_get_id(void *wr)
+{
+ return ((struct c2wr_hdr *) wr)->id;
+}
+static __inline__ void c2_wr_set_id(void *wr, u8 id)
+{
+ ((struct c2wr_hdr *) wr)->id = id;
+}
+static __inline__ u8 c2_wr_get_result(void *wr)
+{
+ return ((struct c2wr_hdr *) wr)->result;
+}
+static __inline__ void c2_wr_set_result(void *wr, u8 result)
+{
+ ((struct c2wr_hdr *) wr)->result = result;
+}
+static __inline__ u8 c2_wr_get_flags(void *wr)
+{
+ return ((struct c2wr_hdr *) wr)->flags;
+}
+static __inline__ void c2_wr_set_flags(void *wr, u8 flags)
+{
+ ((struct c2wr_hdr *) wr)->flags = flags;
+}
+static __inline__ u8 c2_wr_get_sge_count(void *wr)
+{
+ return ((struct c2wr_hdr *) wr)->sge_count;
+}
+static __inline__ void c2_wr_set_sge_count(void *wr, u8 sge_count)
+{
+ ((struct c2wr_hdr *) wr)->sge_count = sge_count;
+}
+static __inline__ __be32 c2_wr_get_wqe_count(void *wr)
+{
+ return ((struct c2wr_hdr *) wr)->wqe_count;
+}
+static __inline__ void c2_wr_set_wqe_count(void *wr, u32 wqe_count)
+{
+ ((struct c2wr_hdr *) wr)->wqe_count = wqe_count;
+}
+
+#endif /* _C2_WR_H_ */
diff --git a/drivers/infiniband/hw/cxgb3/Kconfig b/drivers/infiniband/hw/cxgb3/Kconfig
new file mode 100644
index 0000000..2acec3f
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb3/Kconfig
@@ -0,0 +1,27 @@
+config INFINIBAND_CXGB3
+ tristate "Chelsio RDMA Driver"
+ depends on CHELSIO_T3 && INET
+ select GENERIC_ALLOCATOR
+ ---help---
+ This is an iWARP/RDMA driver for the Chelsio T3 1GbE and
+ 10GbE adapters.
+
+ For general information about Chelsio and our products, visit
+ our website at <http://www.chelsio.com>.
+
+ For customer support, please visit our customer support page at
+ <http://www.chelsio.com/support.htm>.
+
+ Please send feedback to <linux-bugs@chelsio.com>.
+
+ To compile this driver as a module, choose M here: the module
+ will be called iw_cxgb3.
+
+config INFINIBAND_CXGB3_DEBUG
+ bool "Verbose debugging output"
+ depends on INFINIBAND_CXGB3
+ default n
+ ---help---
+ This option causes the Chelsio RDMA driver to produce copious
+ amounts of debug messages. Select this if you are developing
+ the driver or trying to diagnose a problem.
diff --git a/drivers/infiniband/hw/cxgb3/Makefile b/drivers/infiniband/hw/cxgb3/Makefile
new file mode 100644
index 0000000..7e7b5a6
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb3/Makefile
@@ -0,0 +1,10 @@
+EXTRA_CFLAGS += -Idrivers/net/cxgb3
+
+obj-$(CONFIG_INFINIBAND_CXGB3) += iw_cxgb3.o
+
+iw_cxgb3-y := iwch_cm.o iwch_ev.o iwch_cq.o iwch_qp.o iwch_mem.o \
+ iwch_provider.o iwch.o cxio_hal.o cxio_resource.o
+
+ifdef CONFIG_INFINIBAND_CXGB3_DEBUG
+EXTRA_CFLAGS += -DDEBUG
+endif
diff --git a/drivers/infiniband/hw/cxgb3/cxio_dbg.c b/drivers/infiniband/hw/cxgb3/cxio_dbg.c
new file mode 100644
index 0000000..a8d24d5
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb3/cxio_dbg.c
@@ -0,0 +1,206 @@
+/*
+ * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifdef DEBUG
+#include <linux/types.h>
+#include "common.h"
+#include "cxgb3_ioctl.h"
+#include "cxio_hal.h"
+#include "cxio_wr.h"
+
+void cxio_dump_tpt(struct cxio_rdev *rdev, u32 stag)
+{
+ struct ch_mem_range *m;
+ u64 *data;
+ int rc;
+ int size = 32;
+
+ m = kmalloc(sizeof(*m) + size, GFP_ATOMIC);
+ if (!m) {
+ PDBG("%s couldn't allocate memory.\n", __func__);
+ return;
+ }
+ m->mem_id = MEM_PMRX;
+ m->addr = (stag>>8) * 32 + rdev->rnic_info.tpt_base;
+ m->len = size;
+ PDBG("%s TPT addr 0x%x len %d\n", __func__, m->addr, m->len);
+ rc = rdev->t3cdev_p->ctl(rdev->t3cdev_p, RDMA_GET_MEM, m);
+ if (rc) {
+ PDBG("%s toectl returned error %d\n", __func__, rc);
+ kfree(m);
+ return;
+ }
+
+ data = (u64 *)m->buf;
+ while (size > 0) {
+ PDBG("TPT %08x: %016llx\n", m->addr, (unsigned long long) *data);
+ size -= 8;
+ data++;
+ m->addr += 8;
+ }
+ kfree(m);
+}
+
+void cxio_dump_pbl(struct cxio_rdev *rdev, u32 pbl_addr, uint len, u8 shift)
+{
+ struct ch_mem_range *m;
+ u64 *data;
+ int rc;
+ int size, npages;
+
+ shift += 12;
+ npages = (len + (1ULL << shift) - 1) >> shift;
+ size = npages * sizeof(u64);
+
+ m = kmalloc(sizeof(*m) + size, GFP_ATOMIC);
+ if (!m) {
+ PDBG("%s couldn't allocate memory.\n", __func__);
+ return;
+ }
+ m->mem_id = MEM_PMRX;
+ m->addr = pbl_addr;
+ m->len = size;
+ PDBG("%s PBL addr 0x%x len %d depth %d\n",
+ __func__, m->addr, m->len, npages);
+ rc = rdev->t3cdev_p->ctl(rdev->t3cdev_p, RDMA_GET_MEM, m);
+ if (rc) {
+ PDBG("%s toectl returned error %d\n", __func__, rc);
+ kfree(m);
+ return;
+ }
+
+ data = (u64 *)m->buf;
+ while (size > 0) {
+ PDBG("PBL %08x: %016llx\n", m->addr, (unsigned long long) *data);
+ size -= 8;
+ data++;
+ m->addr += 8;
+ }
+ kfree(m);
+}
+
+void cxio_dump_wqe(union t3_wr *wqe)
+{
+ __be64 *data = (__be64 *)wqe;
+ uint size = (uint)(be64_to_cpu(*data) & 0xff);
+
+ if (size == 0)
+ size = 8;
+ while (size > 0) {
+ PDBG("WQE %p: %016llx\n", data,
+ (unsigned long long) be64_to_cpu(*data));
+ size--;
+ data++;
+ }
+}
+
+void cxio_dump_wce(struct t3_cqe *wce)
+{
+ __be64 *data = (__be64 *)wce;
+ int size = sizeof(*wce);
+
+ while (size > 0) {
+ PDBG("WCE %p: %016llx\n", data,
+ (unsigned long long) be64_to_cpu(*data));
+ size -= 8;
+ data++;
+ }
+}
+
+void cxio_dump_rqt(struct cxio_rdev *rdev, u32 hwtid, int nents)
+{
+ struct ch_mem_range *m;
+ int size = nents * 64;
+ u64 *data;
+ int rc;
+
+ m = kmalloc(sizeof(*m) + size, GFP_ATOMIC);
+ if (!m) {
+ PDBG("%s couldn't allocate memory.\n", __func__);
+ return;
+ }
+ m->mem_id = MEM_PMRX;
+ m->addr = ((hwtid)<<10) + rdev->rnic_info.rqt_base;
+ m->len = size;
+ PDBG("%s RQT addr 0x%x len %d\n", __func__, m->addr, m->len);
+ rc = rdev->t3cdev_p->ctl(rdev->t3cdev_p, RDMA_GET_MEM, m);
+ if (rc) {
+ PDBG("%s toectl returned error %d\n", __func__, rc);
+ kfree(m);
+ return;
+ }
+
+ data = (u64 *)m->buf;
+ while (size > 0) {
+ PDBG("RQT %08x: %016llx\n", m->addr, (unsigned long long) *data);
+ size -= 8;
+ data++;
+ m->addr += 8;
+ }
+ kfree(m);
+}
+
+void cxio_dump_tcb(struct cxio_rdev *rdev, u32 hwtid)
+{
+ struct ch_mem_range *m;
+ int size = TCB_SIZE;
+ u32 *data;
+ int rc;
+
+ m = kmalloc(sizeof(*m) + size, GFP_ATOMIC);
+ if (!m) {
+ PDBG("%s couldn't allocate memory.\n", __func__);
+ return;
+ }
+ m->mem_id = MEM_CM;
+ m->addr = hwtid * size;
+ m->len = size;
+ PDBG("%s TCB %d len %d\n", __func__, m->addr, m->len);
+ rc = rdev->t3cdev_p->ctl(rdev->t3cdev_p, RDMA_GET_MEM, m);
+ if (rc) {
+ PDBG("%s toectl returned error %d\n", __func__, rc);
+ kfree(m);
+ return;
+ }
+
+ data = (u32 *)m->buf;
+ while (size > 0) {
+ printk("%2u: %08x %08x %08x %08x %08x %08x %08x %08x\n",
+ m->addr,
+ *(data+2), *(data+3), *(data),*(data+1),
+ *(data+6), *(data+7), *(data+4), *(data+5));
+ size -= 32;
+ data += 8;
+ m->addr += 32;
+ }
+ kfree(m);
+}
+#endif
diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c
new file mode 100644
index 0000000..4dcf08b
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c
@@ -0,0 +1,1309 @@
+/*
+ * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <asm/delay.h>
+
+#include <linux/mutex.h>
+#include <linux/netdevice.h>
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+#include <linux/pci.h>
+#include <linux/dma-mapping.h>
+#include <net/net_namespace.h>
+
+#include "cxio_resource.h"
+#include "cxio_hal.h"
+#include "cxgb3_offload.h"
+#include "sge_defs.h"
+
+static LIST_HEAD(rdev_list);
+static cxio_hal_ev_callback_func_t cxio_ev_cb = NULL;
+
+static struct cxio_rdev *cxio_hal_find_rdev_by_name(char *dev_name)
+{
+ struct cxio_rdev *rdev;
+
+ list_for_each_entry(rdev, &rdev_list, entry)
+ if (!strcmp(rdev->dev_name, dev_name))
+ return rdev;
+ return NULL;
+}
+
+static struct cxio_rdev *cxio_hal_find_rdev_by_t3cdev(struct t3cdev *tdev)
+{
+ struct cxio_rdev *rdev;
+
+ list_for_each_entry(rdev, &rdev_list, entry)
+ if (rdev->t3cdev_p == tdev)
+ return rdev;
+ return NULL;
+}
+
+int cxio_hal_cq_op(struct cxio_rdev *rdev_p, struct t3_cq *cq,
+ enum t3_cq_opcode op, u32 credit)
+{
+ int ret;
+ struct t3_cqe *cqe;
+ u32 rptr;
+
+ struct rdma_cq_op setup;
+ setup.id = cq->cqid;
+ setup.credits = (op == CQ_CREDIT_UPDATE) ? credit : 0;
+ setup.op = op;
+ ret = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_OP, &setup);
+
+ if ((ret < 0) || (op == CQ_CREDIT_UPDATE))
+ return ret;
+
+ /*
+ * If the rearm returned an index other than our current index,
+ * then there might be CQE's in flight (being DMA'd). We must wait
+ * here for them to complete or the consumer can miss a notification.
+ */
+ if (Q_PTR2IDX((cq->rptr), cq->size_log2) != ret) {
+ int i=0;
+
+ rptr = cq->rptr;
+
+ /*
+ * Keep the generation correct by bumping rptr until it
+ * matches the index returned by the rearm - 1.
+ */
+ while (Q_PTR2IDX((rptr+1), cq->size_log2) != ret)
+ rptr++;
+
+ /*
+ * Now rptr is the index for the (last) cqe that was
+ * in-flight at the time the HW rearmed the CQ. We
+ * spin until that CQE is valid.
+ */
+ cqe = cq->queue + Q_PTR2IDX(rptr, cq->size_log2);
+ while (!CQ_VLD_ENTRY(rptr, cq->size_log2, cqe)) {
+ udelay(1);
+ if (i++ > 1000000) {
+ BUG_ON(1);
+ printk(KERN_ERR "%s: stalled rnic\n",
+ rdev_p->dev_name);
+ return -EIO;
+ }
+ }
+
+ return 1;
+ }
+
+ return 0;
+}
+
+static int cxio_hal_clear_cq_ctx(struct cxio_rdev *rdev_p, u32 cqid)
+{
+ struct rdma_cq_setup setup;
+ setup.id = cqid;
+ setup.base_addr = 0; /* NULL address */
+ setup.size = 0; /* disaable the CQ */
+ setup.credits = 0;
+ setup.credit_thres = 0;
+ setup.ovfl_mode = 0;
+ return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup));
+}
+
+static int cxio_hal_clear_qp_ctx(struct cxio_rdev *rdev_p, u32 qpid)
+{
+ u64 sge_cmd;
+ struct t3_modify_qp_wr *wqe;
+ struct sk_buff *skb = alloc_skb(sizeof(*wqe), GFP_KERNEL);
+ if (!skb) {
+ PDBG("%s alloc_skb failed\n", __func__);
+ return -ENOMEM;
+ }
+ wqe = (struct t3_modify_qp_wr *) skb_put(skb, sizeof(*wqe));
+ memset(wqe, 0, sizeof(*wqe));
+ build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD,
+ T3_COMPLETION_FLAG | T3_NOTIFY_FLAG, 0, qpid, 7,
+ T3_SOPEOP);
+ wqe->flags = cpu_to_be32(MODQP_WRITE_EC);
+ sge_cmd = qpid << 8 | 3;
+ wqe->sge_cmd = cpu_to_be64(sge_cmd);
+ skb->priority = CPL_PRIORITY_CONTROL;
+ return (cxgb3_ofld_send(rdev_p->t3cdev_p, skb));
+}
+
+int cxio_create_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq)
+{
+ struct rdma_cq_setup setup;
+ int size = (1UL << (cq->size_log2)) * sizeof(struct t3_cqe);
+
+ cq->cqid = cxio_hal_get_cqid(rdev_p->rscp);
+ if (!cq->cqid)
+ return -ENOMEM;
+ cq->sw_queue = kzalloc(size, GFP_KERNEL);
+ if (!cq->sw_queue)
+ return -ENOMEM;
+ cq->queue = dma_alloc_coherent(&(rdev_p->rnic_info.pdev->dev),
+ (1UL << (cq->size_log2)) *
+ sizeof(struct t3_cqe),
+ &(cq->dma_addr), GFP_KERNEL);
+ if (!cq->queue) {
+ kfree(cq->sw_queue);
+ return -ENOMEM;
+ }
+ pci_unmap_addr_set(cq, mapping, cq->dma_addr);
+ memset(cq->queue, 0, size);
+ setup.id = cq->cqid;
+ setup.base_addr = (u64) (cq->dma_addr);
+ setup.size = 1UL << cq->size_log2;
+ setup.credits = 65535;
+ setup.credit_thres = 1;
+ if (rdev_p->t3cdev_p->type != T3A)
+ setup.ovfl_mode = 0;
+ else
+ setup.ovfl_mode = 1;
+ return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup));
+}
+
+int cxio_resize_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq)
+{
+ struct rdma_cq_setup setup;
+ setup.id = cq->cqid;
+ setup.base_addr = (u64) (cq->dma_addr);
+ setup.size = 1UL << cq->size_log2;
+ setup.credits = setup.size;
+ setup.credit_thres = setup.size; /* TBD: overflow recovery */
+ setup.ovfl_mode = 1;
+ return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup));
+}
+
+static u32 get_qpid(struct cxio_rdev *rdev_p, struct cxio_ucontext *uctx)
+{
+ struct cxio_qpid_list *entry;
+ u32 qpid;
+ int i;
+
+ mutex_lock(&uctx->lock);
+ if (!list_empty(&uctx->qpids)) {
+ entry = list_entry(uctx->qpids.next, struct cxio_qpid_list,
+ entry);
+ list_del(&entry->entry);
+ qpid = entry->qpid;
+ kfree(entry);
+ } else {
+ qpid = cxio_hal_get_qpid(rdev_p->rscp);
+ if (!qpid)
+ goto out;
+ for (i = qpid+1; i & rdev_p->qpmask; i++) {
+ entry = kmalloc(sizeof *entry, GFP_KERNEL);
+ if (!entry)
+ break;
+ entry->qpid = i;
+ list_add_tail(&entry->entry, &uctx->qpids);
+ }
+ }
+out:
+ mutex_unlock(&uctx->lock);
+ PDBG("%s qpid 0x%x\n", __func__, qpid);
+ return qpid;
+}
+
+static void put_qpid(struct cxio_rdev *rdev_p, u32 qpid,
+ struct cxio_ucontext *uctx)
+{
+ struct cxio_qpid_list *entry;
+
+ entry = kmalloc(sizeof *entry, GFP_KERNEL);
+ if (!entry)
+ return;
+ PDBG("%s qpid 0x%x\n", __func__, qpid);
+ entry->qpid = qpid;
+ mutex_lock(&uctx->lock);
+ list_add_tail(&entry->entry, &uctx->qpids);
+ mutex_unlock(&uctx->lock);
+}
+
+void cxio_release_ucontext(struct cxio_rdev *rdev_p, struct cxio_ucontext *uctx)
+{
+ struct list_head *pos, *nxt;
+ struct cxio_qpid_list *entry;
+
+ mutex_lock(&uctx->lock);
+ list_for_each_safe(pos, nxt, &uctx->qpids) {
+ entry = list_entry(pos, struct cxio_qpid_list, entry);
+ list_del_init(&entry->entry);
+ if (!(entry->qpid & rdev_p->qpmask))
+ cxio_hal_put_qpid(rdev_p->rscp, entry->qpid);
+ kfree(entry);
+ }
+ mutex_unlock(&uctx->lock);
+}
+
+void cxio_init_ucontext(struct cxio_rdev *rdev_p, struct cxio_ucontext *uctx)
+{
+ INIT_LIST_HEAD(&uctx->qpids);
+ mutex_init(&uctx->lock);
+}
+
+int cxio_create_qp(struct cxio_rdev *rdev_p, u32 kernel_domain,
+ struct t3_wq *wq, struct cxio_ucontext *uctx)
+{
+ int depth = 1UL << wq->size_log2;
+ int rqsize = 1UL << wq->rq_size_log2;
+
+ wq->qpid = get_qpid(rdev_p, uctx);
+ if (!wq->qpid)
+ return -ENOMEM;
+
+ wq->rq = kzalloc(depth * sizeof(struct t3_swrq), GFP_KERNEL);
+ if (!wq->rq)
+ goto err1;
+
+ wq->rq_addr = cxio_hal_rqtpool_alloc(rdev_p, rqsize);
+ if (!wq->rq_addr)
+ goto err2;
+
+ wq->sq = kzalloc(depth * sizeof(struct t3_swsq), GFP_KERNEL);
+ if (!wq->sq)
+ goto err3;
+
+ wq->queue = dma_alloc_coherent(&(rdev_p->rnic_info.pdev->dev),
+ depth * sizeof(union t3_wr),
+ &(wq->dma_addr), GFP_KERNEL);
+ if (!wq->queue)
+ goto err4;
+
+ memset(wq->queue, 0, depth * sizeof(union t3_wr));
+ pci_unmap_addr_set(wq, mapping, wq->dma_addr);
+ wq->doorbell = (void __iomem *)rdev_p->rnic_info.kdb_addr;
+ if (!kernel_domain)
+ wq->udb = (u64)rdev_p->rnic_info.udbell_physbase +
+ (wq->qpid << rdev_p->qpshift);
+ wq->rdev = rdev_p;
+ PDBG("%s qpid 0x%x doorbell 0x%p udb 0x%llx\n", __func__,
+ wq->qpid, wq->doorbell, (unsigned long long) wq->udb);
+ return 0;
+err4:
+ kfree(wq->sq);
+err3:
+ cxio_hal_rqtpool_free(rdev_p, wq->rq_addr, rqsize);
+err2:
+ kfree(wq->rq);
+err1:
+ put_qpid(rdev_p, wq->qpid, uctx);
+ return -ENOMEM;
+}
+
+int cxio_destroy_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq)
+{
+ int err;
+ err = cxio_hal_clear_cq_ctx(rdev_p, cq->cqid);
+ kfree(cq->sw_queue);
+ dma_free_coherent(&(rdev_p->rnic_info.pdev->dev),
+ (1UL << (cq->size_log2))
+ * sizeof(struct t3_cqe), cq->queue,
+ pci_unmap_addr(cq, mapping));
+ cxio_hal_put_cqid(rdev_p->rscp, cq->cqid);
+ return err;
+}
+
+int cxio_destroy_qp(struct cxio_rdev *rdev_p, struct t3_wq *wq,
+ struct cxio_ucontext *uctx)
+{
+ dma_free_coherent(&(rdev_p->rnic_info.pdev->dev),
+ (1UL << (wq->size_log2))
+ * sizeof(union t3_wr), wq->queue,
+ pci_unmap_addr(wq, mapping));
+ kfree(wq->sq);
+ cxio_hal_rqtpool_free(rdev_p, wq->rq_addr, (1UL << wq->rq_size_log2));
+ kfree(wq->rq);
+ put_qpid(rdev_p, wq->qpid, uctx);
+ return 0;
+}
+
+static void insert_recv_cqe(struct t3_wq *wq, struct t3_cq *cq)
+{
+ struct t3_cqe cqe;
+
+ PDBG("%s wq %p cq %p sw_rptr 0x%x sw_wptr 0x%x\n", __func__,
+ wq, cq, cq->sw_rptr, cq->sw_wptr);
+ memset(&cqe, 0, sizeof(cqe));
+ cqe.header = cpu_to_be32(V_CQE_STATUS(TPT_ERR_SWFLUSH) |
+ V_CQE_OPCODE(T3_SEND) |
+ V_CQE_TYPE(0) |
+ V_CQE_SWCQE(1) |
+ V_CQE_QPID(wq->qpid) |
+ V_CQE_GENBIT(Q_GENBIT(cq->sw_wptr,
+ cq->size_log2)));
+ *(cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2)) = cqe;
+ cq->sw_wptr++;
+}
+
+int cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count)
+{
+ u32 ptr;
+ int flushed = 0;
+
+ PDBG("%s wq %p cq %p\n", __func__, wq, cq);
+
+ /* flush RQ */
+ PDBG("%s rq_rptr %u rq_wptr %u skip count %u\n", __func__,
+ wq->rq_rptr, wq->rq_wptr, count);
+ ptr = wq->rq_rptr + count;
+ while (ptr++ != wq->rq_wptr) {
+ insert_recv_cqe(wq, cq);
+ flushed++;
+ }
+ return flushed;
+}
+
+static void insert_sq_cqe(struct t3_wq *wq, struct t3_cq *cq,
+ struct t3_swsq *sqp)
+{
+ struct t3_cqe cqe;
+
+ PDBG("%s wq %p cq %p sw_rptr 0x%x sw_wptr 0x%x\n", __func__,
+ wq, cq, cq->sw_rptr, cq->sw_wptr);
+ memset(&cqe, 0, sizeof(cqe));
+ cqe.header = cpu_to_be32(V_CQE_STATUS(TPT_ERR_SWFLUSH) |
+ V_CQE_OPCODE(sqp->opcode) |
+ V_CQE_TYPE(1) |
+ V_CQE_SWCQE(1) |
+ V_CQE_QPID(wq->qpid) |
+ V_CQE_GENBIT(Q_GENBIT(cq->sw_wptr,
+ cq->size_log2)));
+ cqe.u.scqe.wrid_hi = sqp->sq_wptr;
+
+ *(cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2)) = cqe;
+ cq->sw_wptr++;
+}
+
+int cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count)
+{
+ __u32 ptr;
+ int flushed = 0;
+ struct t3_swsq *sqp = wq->sq + Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2);
+
+ ptr = wq->sq_rptr + count;
+ sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2);
+ while (ptr != wq->sq_wptr) {
+ insert_sq_cqe(wq, cq, sqp);
+ ptr++;
+ sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2);
+ flushed++;
+ }
+ return flushed;
+}
+
+/*
+ * Move all CQEs from the HWCQ into the SWCQ.
+ */
+void cxio_flush_hw_cq(struct t3_cq *cq)
+{
+ struct t3_cqe *cqe, *swcqe;
+
+ PDBG("%s cq %p cqid 0x%x\n", __func__, cq, cq->cqid);
+ cqe = cxio_next_hw_cqe(cq);
+ while (cqe) {
+ PDBG("%s flushing hwcq rptr 0x%x to swcq wptr 0x%x\n",
+ __func__, cq->rptr, cq->sw_wptr);
+ swcqe = cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2);
+ *swcqe = *cqe;
+ swcqe->header |= cpu_to_be32(V_CQE_SWCQE(1));
+ cq->sw_wptr++;
+ cq->rptr++;
+ cqe = cxio_next_hw_cqe(cq);
+ }
+}
+
+static int cqe_completes_wr(struct t3_cqe *cqe, struct t3_wq *wq)
+{
+ if (CQE_OPCODE(*cqe) == T3_TERMINATE)
+ return 0;
+
+ if ((CQE_OPCODE(*cqe) == T3_RDMA_WRITE) && RQ_TYPE(*cqe))
+ return 0;
+
+ if ((CQE_OPCODE(*cqe) == T3_READ_RESP) && SQ_TYPE(*cqe))
+ return 0;
+
+ if ((CQE_OPCODE(*cqe) == T3_SEND) && RQ_TYPE(*cqe) &&
+ Q_EMPTY(wq->rq_rptr, wq->rq_wptr))
+ return 0;
+
+ return 1;
+}
+
+void cxio_count_scqes(struct t3_cq *cq, struct t3_wq *wq, int *count)
+{
+ struct t3_cqe *cqe;
+ u32 ptr;
+
+ *count = 0;
+ ptr = cq->sw_rptr;
+ while (!Q_EMPTY(ptr, cq->sw_wptr)) {
+ cqe = cq->sw_queue + (Q_PTR2IDX(ptr, cq->size_log2));
+ if ((SQ_TYPE(*cqe) ||
+ ((CQE_OPCODE(*cqe) == T3_READ_RESP) && wq->oldest_read)) &&
+ (CQE_QPID(*cqe) == wq->qpid))
+ (*count)++;
+ ptr++;
+ }
+ PDBG("%s cq %p count %d\n", __func__, cq, *count);
+}
+
+void cxio_count_rcqes(struct t3_cq *cq, struct t3_wq *wq, int *count)
+{
+ struct t3_cqe *cqe;
+ u32 ptr;
+
+ *count = 0;
+ PDBG("%s count zero %d\n", __func__, *count);
+ ptr = cq->sw_rptr;
+ while (!Q_EMPTY(ptr, cq->sw_wptr)) {
+ cqe = cq->sw_queue + (Q_PTR2IDX(ptr, cq->size_log2));
+ if (RQ_TYPE(*cqe) && (CQE_OPCODE(*cqe) != T3_READ_RESP) &&
+ (CQE_QPID(*cqe) == wq->qpid) && cqe_completes_wr(cqe, wq))
+ (*count)++;
+ ptr++;
+ }
+ PDBG("%s cq %p count %d\n", __func__, cq, *count);
+}
+
+static int cxio_hal_init_ctrl_cq(struct cxio_rdev *rdev_p)
+{
+ struct rdma_cq_setup setup;
+ setup.id = 0;
+ setup.base_addr = 0; /* NULL address */
+ setup.size = 1; /* enable the CQ */
+ setup.credits = 0;
+
+ /* force SGE to redirect to RspQ and interrupt */
+ setup.credit_thres = 0;
+ setup.ovfl_mode = 1;
+ return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup));
+}
+
+static int cxio_hal_init_ctrl_qp(struct cxio_rdev *rdev_p)
+{
+ int err;
+ u64 sge_cmd, ctx0, ctx1;
+ u64 base_addr;
+ struct t3_modify_qp_wr *wqe;
+ struct sk_buff *skb;
+
+ skb = alloc_skb(sizeof(*wqe), GFP_KERNEL);
+ if (!skb) {
+ PDBG("%s alloc_skb failed\n", __func__);
+ return -ENOMEM;
+ }
+ err = cxio_hal_init_ctrl_cq(rdev_p);
+ if (err) {
+ PDBG("%s err %d initializing ctrl_cq\n", __func__, err);
+ goto err;
+ }
+ rdev_p->ctrl_qp.workq = dma_alloc_coherent(
+ &(rdev_p->rnic_info.pdev->dev),
+ (1 << T3_CTRL_QP_SIZE_LOG2) *
+ sizeof(union t3_wr),
+ &(rdev_p->ctrl_qp.dma_addr),
+ GFP_KERNEL);
+ if (!rdev_p->ctrl_qp.workq) {
+ PDBG("%s dma_alloc_coherent failed\n", __func__);
+ err = -ENOMEM;
+ goto err;
+ }
+ pci_unmap_addr_set(&rdev_p->ctrl_qp, mapping,
+ rdev_p->ctrl_qp.dma_addr);
+ rdev_p->ctrl_qp.doorbell = (void __iomem *)rdev_p->rnic_info.kdb_addr;
+ memset(rdev_p->ctrl_qp.workq, 0,
+ (1 << T3_CTRL_QP_SIZE_LOG2) * sizeof(union t3_wr));
+
+ mutex_init(&rdev_p->ctrl_qp.lock);
+ init_waitqueue_head(&rdev_p->ctrl_qp.waitq);
+
+ /* update HW Ctrl QP context */
+ base_addr = rdev_p->ctrl_qp.dma_addr;
+ base_addr >>= 12;
+ ctx0 = (V_EC_SIZE((1 << T3_CTRL_QP_SIZE_LOG2)) |
+ V_EC_BASE_LO((u32) base_addr & 0xffff));
+ ctx0 <<= 32;
+ ctx0 |= V_EC_CREDITS(FW_WR_NUM);
+ base_addr >>= 16;
+ ctx1 = (u32) base_addr;
+ base_addr >>= 32;
+ ctx1 |= ((u64) (V_EC_BASE_HI((u32) base_addr & 0xf) | V_EC_RESPQ(0) |
+ V_EC_TYPE(0) | V_EC_GEN(1) |
+ V_EC_UP_TOKEN(T3_CTL_QP_TID) | F_EC_VALID)) << 32;
+ wqe = (struct t3_modify_qp_wr *) skb_put(skb, sizeof(*wqe));
+ memset(wqe, 0, sizeof(*wqe));
+ build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, 0, 0,
+ T3_CTL_QP_TID, 7, T3_SOPEOP);
+ wqe->flags = cpu_to_be32(MODQP_WRITE_EC);
+ sge_cmd = (3ULL << 56) | FW_RI_SGEEC_START << 8 | 3;
+ wqe->sge_cmd = cpu_to_be64(sge_cmd);
+ wqe->ctx1 = cpu_to_be64(ctx1);
+ wqe->ctx0 = cpu_to_be64(ctx0);
+ PDBG("CtrlQP dma_addr 0x%llx workq %p size %d\n",
+ (unsigned long long) rdev_p->ctrl_qp.dma_addr,
+ rdev_p->ctrl_qp.workq, 1 << T3_CTRL_QP_SIZE_LOG2);
+ skb->priority = CPL_PRIORITY_CONTROL;
+ return (cxgb3_ofld_send(rdev_p->t3cdev_p, skb));
+err:
+ kfree_skb(skb);
+ return err;
+}
+
+static int cxio_hal_destroy_ctrl_qp(struct cxio_rdev *rdev_p)
+{
+ dma_free_coherent(&(rdev_p->rnic_info.pdev->dev),
+ (1UL << T3_CTRL_QP_SIZE_LOG2)
+ * sizeof(union t3_wr), rdev_p->ctrl_qp.workq,
+ pci_unmap_addr(&rdev_p->ctrl_qp, mapping));
+ return cxio_hal_clear_qp_ctx(rdev_p, T3_CTRL_QP_ID);
+}
+
+/* write len bytes of data into addr (32B aligned address)
+ * If data is NULL, clear len byte of memory to zero.
+ * caller aquires the ctrl_qp lock before the call
+ */
+static int cxio_hal_ctrl_qp_write_mem(struct cxio_rdev *rdev_p, u32 addr,
+ u32 len, void *data)
+{
+ u32 i, nr_wqe, copy_len;
+ u8 *copy_data;
+ u8 wr_len, utx_len; /* length in 8 byte flit */
+ enum t3_wr_flags flag;
+ __be64 *wqe;
+ u64 utx_cmd;
+ addr &= 0x7FFFFFF;
+ nr_wqe = len % 96 ? len / 96 + 1 : len / 96; /* 96B max per WQE */
+ PDBG("%s wptr 0x%x rptr 0x%x len %d, nr_wqe %d data %p addr 0x%0x\n",
+ __func__, rdev_p->ctrl_qp.wptr, rdev_p->ctrl_qp.rptr, len,
+ nr_wqe, data, addr);
+ utx_len = 3; /* in 32B unit */
+ for (i = 0; i < nr_wqe; i++) {
+ if (Q_FULL(rdev_p->ctrl_qp.rptr, rdev_p->ctrl_qp.wptr,
+ T3_CTRL_QP_SIZE_LOG2)) {
+ PDBG("%s ctrl_qp full wtpr 0x%0x rptr 0x%0x, "
+ "wait for more space i %d\n", __func__,
+ rdev_p->ctrl_qp.wptr, rdev_p->ctrl_qp.rptr, i);
+ if (wait_event_interruptible(rdev_p->ctrl_qp.waitq,
+ !Q_FULL(rdev_p->ctrl_qp.rptr,
+ rdev_p->ctrl_qp.wptr,
+ T3_CTRL_QP_SIZE_LOG2))) {
+ PDBG("%s ctrl_qp workq interrupted\n",
+ __func__);
+ return -ERESTARTSYS;
+ }
+ PDBG("%s ctrl_qp wakeup, continue posting work request "
+ "i %d\n", __func__, i);
+ }
+ wqe = (__be64 *)(rdev_p->ctrl_qp.workq + (rdev_p->ctrl_qp.wptr %
+ (1 << T3_CTRL_QP_SIZE_LOG2)));
+ flag = 0;
+ if (i == (nr_wqe - 1)) {
+ /* last WQE */
+ flag = T3_COMPLETION_FLAG;
+ if (len % 32)
+ utx_len = len / 32 + 1;
+ else
+ utx_len = len / 32;
+ }
+
+ /*
+ * Force a CQE to return the credit to the workq in case
+ * we posted more than half the max QP size of WRs
+ */
+ if ((i != 0) &&
+ (i % (((1 << T3_CTRL_QP_SIZE_LOG2)) >> 1) == 0)) {
+ flag = T3_COMPLETION_FLAG;
+ PDBG("%s force completion at i %d\n", __func__, i);
+ }
+
+ /* build the utx mem command */
+ wqe += (sizeof(struct t3_bypass_wr) >> 3);
+ utx_cmd = (T3_UTX_MEM_WRITE << 28) | (addr + i * 3);
+ utx_cmd <<= 32;
+ utx_cmd |= (utx_len << 28) | ((utx_len << 2) + 1);
+ *wqe = cpu_to_be64(utx_cmd);
+ wqe++;
+ copy_data = (u8 *) data + i * 96;
+ copy_len = len > 96 ? 96 : len;
+
+ /* clear memory content if data is NULL */
+ if (data)
+ memcpy(wqe, copy_data, copy_len);
+ else
+ memset(wqe, 0, copy_len);
+ if (copy_len % 32)
+ memset(((u8 *) wqe) + copy_len, 0,
+ 32 - (copy_len % 32));
+ wr_len = ((sizeof(struct t3_bypass_wr)) >> 3) + 1 +
+ (utx_len << 2);
+ wqe = (__be64 *)(rdev_p->ctrl_qp.workq + (rdev_p->ctrl_qp.wptr %
+ (1 << T3_CTRL_QP_SIZE_LOG2)));
+
+ /* wptr in the WRID[31:0] */
+ ((union t3_wrid *)(wqe+1))->id0.low = rdev_p->ctrl_qp.wptr;
+
+ /*
+ * This must be the last write with a memory barrier
+ * for the genbit
+ */
+ build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_BP, flag,
+ Q_GENBIT(rdev_p->ctrl_qp.wptr,
+ T3_CTRL_QP_SIZE_LOG2), T3_CTRL_QP_ID,
+ wr_len, T3_SOPEOP);
+ if (flag == T3_COMPLETION_FLAG)
+ ring_doorbell(rdev_p->ctrl_qp.doorbell, T3_CTRL_QP_ID);
+ len -= 96;
+ rdev_p->ctrl_qp.wptr++;
+ }
+ return 0;
+}
+
+/* IN: stag key, pdid, perm, zbva, to, len, page_size, pbl_size and pbl_addr
+ * OUT: stag index
+ * TBD: shared memory region support
+ */
+static int __cxio_tpt_op(struct cxio_rdev *rdev_p, u32 reset_tpt_entry,
+ u32 *stag, u8 stag_state, u32 pdid,
+ enum tpt_mem_type type, enum tpt_mem_perm perm,
+ u32 zbva, u64 to, u32 len, u8 page_size,
+ u32 pbl_size, u32 pbl_addr)
+{
+ int err;
+ struct tpt_entry tpt;
+ u32 stag_idx;
+ u32 wptr;
+
+ stag_state = stag_state > 0;
+ stag_idx = (*stag) >> 8;
+
+ if ((!reset_tpt_entry) && !(*stag != T3_STAG_UNSET)) {
+ stag_idx = cxio_hal_get_stag(rdev_p->rscp);
+ if (!stag_idx)
+ return -ENOMEM;
+ *stag = (stag_idx << 8) | ((*stag) & 0xFF);
+ }
+ PDBG("%s stag_state 0x%0x type 0x%0x pdid 0x%0x, stag_idx 0x%x\n",
+ __func__, stag_state, type, pdid, stag_idx);
+
+ mutex_lock(&rdev_p->ctrl_qp.lock);
+
+ /* write TPT entry */
+ if (reset_tpt_entry)
+ memset(&tpt, 0, sizeof(tpt));
+ else {
+ tpt.valid_stag_pdid = cpu_to_be32(F_TPT_VALID |
+ V_TPT_STAG_KEY((*stag) & M_TPT_STAG_KEY) |
+ V_TPT_STAG_STATE(stag_state) |
+ V_TPT_STAG_TYPE(type) | V_TPT_PDID(pdid));
+ BUG_ON(page_size >= 28);
+ tpt.flags_pagesize_qpid = cpu_to_be32(V_TPT_PERM(perm) |
+ ((perm & TPT_MW_BIND) ? F_TPT_MW_BIND_ENABLE : 0) |
+ V_TPT_ADDR_TYPE((zbva ? TPT_ZBTO : TPT_VATO)) |
+ V_TPT_PAGE_SIZE(page_size));
+ tpt.rsvd_pbl_addr = reset_tpt_entry ? 0 :
+ cpu_to_be32(V_TPT_PBL_ADDR(PBL_OFF(rdev_p, pbl_addr)>>3));
+ tpt.len = cpu_to_be32(len);
+ tpt.va_hi = cpu_to_be32((u32) (to >> 32));
+ tpt.va_low_or_fbo = cpu_to_be32((u32) (to & 0xFFFFFFFFULL));
+ tpt.rsvd_bind_cnt_or_pstag = 0;
+ tpt.rsvd_pbl_size = reset_tpt_entry ? 0 :
+ cpu_to_be32(V_TPT_PBL_SIZE(pbl_size >> 2));
+ }
+ err = cxio_hal_ctrl_qp_write_mem(rdev_p,
+ stag_idx +
+ (rdev_p->rnic_info.tpt_base >> 5),
+ sizeof(tpt), &tpt);
+
+ /* release the stag index to free pool */
+ if (reset_tpt_entry)
+ cxio_hal_put_stag(rdev_p->rscp, stag_idx);
+
+ wptr = rdev_p->ctrl_qp.wptr;
+ mutex_unlock(&rdev_p->ctrl_qp.lock);
+ if (!err)
+ if (wait_event_interruptible(rdev_p->ctrl_qp.waitq,
+ SEQ32_GE(rdev_p->ctrl_qp.rptr,
+ wptr)))
+ return -ERESTARTSYS;
+ return err;
+}
+
+int cxio_write_pbl(struct cxio_rdev *rdev_p, __be64 *pbl,
+ u32 pbl_addr, u32 pbl_size)
+{
+ u32 wptr;
+ int err;
+
+ PDBG("%s *pdb_addr 0x%x, pbl_base 0x%x, pbl_size %d\n",
+ __func__, pbl_addr, rdev_p->rnic_info.pbl_base,
+ pbl_size);
+
+ mutex_lock(&rdev_p->ctrl_qp.lock);
+ err = cxio_hal_ctrl_qp_write_mem(rdev_p, pbl_addr >> 5, pbl_size << 3,
+ pbl);
+ wptr = rdev_p->ctrl_qp.wptr;
+ mutex_unlock(&rdev_p->ctrl_qp.lock);
+ if (err)
+ return err;
+
+ if (wait_event_interruptible(rdev_p->ctrl_qp.waitq,
+ SEQ32_GE(rdev_p->ctrl_qp.rptr,
+ wptr)))
+ return -ERESTARTSYS;
+
+ return 0;
+}
+
+int cxio_register_phys_mem(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid,
+ enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
+ u8 page_size, u32 pbl_size, u32 pbl_addr)
+{
+ *stag = T3_STAG_UNSET;
+ return __cxio_tpt_op(rdev_p, 0, stag, 1, pdid, TPT_NON_SHARED_MR, perm,
+ zbva, to, len, page_size, pbl_size, pbl_addr);
+}
+
+int cxio_reregister_phys_mem(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid,
+ enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
+ u8 page_size, u32 pbl_size, u32 pbl_addr)
+{
+ return __cxio_tpt_op(rdev_p, 0, stag, 1, pdid, TPT_NON_SHARED_MR, perm,
+ zbva, to, len, page_size, pbl_size, pbl_addr);
+}
+
+int cxio_dereg_mem(struct cxio_rdev *rdev_p, u32 stag, u32 pbl_size,
+ u32 pbl_addr)
+{
+ return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0,
+ pbl_size, pbl_addr);
+}
+
+int cxio_allocate_window(struct cxio_rdev *rdev_p, u32 * stag, u32 pdid)
+{
+ *stag = T3_STAG_UNSET;
+ return __cxio_tpt_op(rdev_p, 0, stag, 0, pdid, TPT_MW, 0, 0, 0ULL, 0, 0,
+ 0, 0);
+}
+
+int cxio_deallocate_window(struct cxio_rdev *rdev_p, u32 stag)
+{
+ return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0,
+ 0, 0);
+}
+
+int cxio_allocate_stag(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid, u32 pbl_size, u32 pbl_addr)
+{
+ *stag = T3_STAG_UNSET;
+ return __cxio_tpt_op(rdev_p, 0, stag, 0, pdid, TPT_NON_SHARED_MR,
+ 0, 0, 0ULL, 0, 0, pbl_size, pbl_addr);
+}
+
+int cxio_rdma_init(struct cxio_rdev *rdev_p, struct t3_rdma_init_attr *attr)
+{
+ struct t3_rdma_init_wr *wqe;
+ struct sk_buff *skb = alloc_skb(sizeof(*wqe), GFP_ATOMIC);
+ if (!skb)
+ return -ENOMEM;
+ PDBG("%s rdev_p %p\n", __func__, rdev_p);
+ wqe = (struct t3_rdma_init_wr *) __skb_put(skb, sizeof(*wqe));
+ wqe->wrh.op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(T3_WR_INIT));
+ wqe->wrh.gen_tid_len = cpu_to_be32(V_FW_RIWR_TID(attr->tid) |
+ V_FW_RIWR_LEN(sizeof(*wqe) >> 3));
+ wqe->wrid.id1 = 0;
+ wqe->qpid = cpu_to_be32(attr->qpid);
+ wqe->pdid = cpu_to_be32(attr->pdid);
+ wqe->scqid = cpu_to_be32(attr->scqid);
+ wqe->rcqid = cpu_to_be32(attr->rcqid);
+ wqe->rq_addr = cpu_to_be32(attr->rq_addr - rdev_p->rnic_info.rqt_base);
+ wqe->rq_size = cpu_to_be32(attr->rq_size);
+ wqe->mpaattrs = attr->mpaattrs;
+ wqe->qpcaps = attr->qpcaps;
+ wqe->ulpdu_size = cpu_to_be16(attr->tcp_emss);
+ wqe->rqe_count = cpu_to_be16(attr->rqe_count);
+ wqe->flags_rtr_type = cpu_to_be16(attr->flags|V_RTR_TYPE(attr->rtr_type));
+ wqe->ord = cpu_to_be32(attr->ord);
+ wqe->ird = cpu_to_be32(attr->ird);
+ wqe->qp_dma_addr = cpu_to_be64(attr->qp_dma_addr);
+ wqe->qp_dma_size = cpu_to_be32(attr->qp_dma_size);
+ wqe->irs = cpu_to_be32(attr->irs);
+ skb->priority = 0; /* 0=>ToeQ; 1=>CtrlQ */
+ return (cxgb3_ofld_send(rdev_p->t3cdev_p, skb));
+}
+
+void cxio_register_ev_cb(cxio_hal_ev_callback_func_t ev_cb)
+{
+ cxio_ev_cb = ev_cb;
+}
+
+void cxio_unregister_ev_cb(cxio_hal_ev_callback_func_t ev_cb)
+{
+ cxio_ev_cb = NULL;
+}
+
+static int cxio_hal_ev_handler(struct t3cdev *t3cdev_p, struct sk_buff *skb)
+{
+ static int cnt;
+ struct cxio_rdev *rdev_p = NULL;
+ struct respQ_msg_t *rsp_msg = (struct respQ_msg_t *) skb->data;
+ PDBG("%d: %s cq_id 0x%x cq_ptr 0x%x genbit %0x overflow %0x an %0x"
+ " se %0x notify %0x cqbranch %0x creditth %0x\n",
+ cnt, __func__, RSPQ_CQID(rsp_msg), RSPQ_CQPTR(rsp_msg),
+ RSPQ_GENBIT(rsp_msg), RSPQ_OVERFLOW(rsp_msg), RSPQ_AN(rsp_msg),
+ RSPQ_SE(rsp_msg), RSPQ_NOTIFY(rsp_msg), RSPQ_CQBRANCH(rsp_msg),
+ RSPQ_CREDIT_THRESH(rsp_msg));
+ PDBG("CQE: QPID 0x%0x genbit %0x type 0x%0x status 0x%0x opcode %d "
+ "len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x\n",
+ CQE_QPID(rsp_msg->cqe), CQE_GENBIT(rsp_msg->cqe),
+ CQE_TYPE(rsp_msg->cqe), CQE_STATUS(rsp_msg->cqe),
+ CQE_OPCODE(rsp_msg->cqe), CQE_LEN(rsp_msg->cqe),
+ CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe));
+ rdev_p = (struct cxio_rdev *)t3cdev_p->ulp;
+ if (!rdev_p) {
+ PDBG("%s called by t3cdev %p with null ulp\n", __func__,
+ t3cdev_p);
+ return 0;
+ }
+ if (CQE_QPID(rsp_msg->cqe) == T3_CTRL_QP_ID) {
+ rdev_p->ctrl_qp.rptr = CQE_WRID_LOW(rsp_msg->cqe) + 1;
+ wake_up_interruptible(&rdev_p->ctrl_qp.waitq);
+ dev_kfree_skb_irq(skb);
+ } else if (CQE_QPID(rsp_msg->cqe) == 0xfff8)
+ dev_kfree_skb_irq(skb);
+ else if (cxio_ev_cb)
+ (*cxio_ev_cb) (rdev_p, skb);
+ else
+ dev_kfree_skb_irq(skb);
+ cnt++;
+ return 0;
+}
+
+/* Caller takes care of locking if needed */
+int cxio_rdev_open(struct cxio_rdev *rdev_p)
+{
+ struct net_device *netdev_p = NULL;
+ int err = 0;
+ if (strlen(rdev_p->dev_name)) {
+ if (cxio_hal_find_rdev_by_name(rdev_p->dev_name)) {
+ return -EBUSY;
+ }
+ netdev_p = dev_get_by_name(&init_net, rdev_p->dev_name);
+ if (!netdev_p) {
+ return -EINVAL;
+ }
+ dev_put(netdev_p);
+ } else if (rdev_p->t3cdev_p) {
+ if (cxio_hal_find_rdev_by_t3cdev(rdev_p->t3cdev_p)) {
+ return -EBUSY;
+ }
+ netdev_p = rdev_p->t3cdev_p->lldev;
+ strncpy(rdev_p->dev_name, rdev_p->t3cdev_p->name,
+ T3_MAX_DEV_NAME_LEN);
+ } else {
+ PDBG("%s t3cdev_p or dev_name must be set\n", __func__);
+ return -EINVAL;
+ }
+
+ list_add_tail(&rdev_p->entry, &rdev_list);
+
+ PDBG("%s opening rnic dev %s\n", __func__, rdev_p->dev_name);
+ memset(&rdev_p->ctrl_qp, 0, sizeof(rdev_p->ctrl_qp));
+ if (!rdev_p->t3cdev_p)
+ rdev_p->t3cdev_p = dev2t3cdev(netdev_p);
+ rdev_p->t3cdev_p->ulp = (void *) rdev_p;
+ err = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_GET_PARAMS,
+ &(rdev_p->rnic_info));
+ if (err) {
+ printk(KERN_ERR "%s t3cdev_p(%p)->ctl returned error %d.\n",
+ __func__, rdev_p->t3cdev_p, err);
+ goto err1;
+ }
+ err = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, GET_PORTS,
+ &(rdev_p->port_info));
+ if (err) {
+ printk(KERN_ERR "%s t3cdev_p(%p)->ctl returned error %d.\n",
+ __func__, rdev_p->t3cdev_p, err);
+ goto err1;
+ }
+
+ /*
+ * qpshift is the number of bits to shift the qpid left in order
+ * to get the correct address of the doorbell for that qp.
+ */
+ cxio_init_ucontext(rdev_p, &rdev_p->uctx);
+ rdev_p->qpshift = PAGE_SHIFT -
+ ilog2(65536 >>
+ ilog2(rdev_p->rnic_info.udbell_len >>
+ PAGE_SHIFT));
+ rdev_p->qpnr = rdev_p->rnic_info.udbell_len >> PAGE_SHIFT;
+ rdev_p->qpmask = (65536 >> ilog2(rdev_p->qpnr)) - 1;
+ PDBG("%s rnic %s info: tpt_base 0x%0x tpt_top 0x%0x num stags %d "
+ "pbl_base 0x%0x pbl_top 0x%0x rqt_base 0x%0x, rqt_top 0x%0x\n",
+ __func__, rdev_p->dev_name, rdev_p->rnic_info.tpt_base,
+ rdev_p->rnic_info.tpt_top, cxio_num_stags(rdev_p),
+ rdev_p->rnic_info.pbl_base,
+ rdev_p->rnic_info.pbl_top, rdev_p->rnic_info.rqt_base,
+ rdev_p->rnic_info.rqt_top);
+ PDBG("udbell_len 0x%0x udbell_physbase 0x%lx kdb_addr %p qpshift %lu "
+ "qpnr %d qpmask 0x%x\n",
+ rdev_p->rnic_info.udbell_len,
+ rdev_p->rnic_info.udbell_physbase, rdev_p->rnic_info.kdb_addr,
+ rdev_p->qpshift, rdev_p->qpnr, rdev_p->qpmask);
+
+ err = cxio_hal_init_ctrl_qp(rdev_p);
+ if (err) {
+ printk(KERN_ERR "%s error %d initializing ctrl_qp.\n",
+ __func__, err);
+ goto err1;
+ }
+ err = cxio_hal_init_resource(rdev_p, cxio_num_stags(rdev_p), 0,
+ 0, T3_MAX_NUM_QP, T3_MAX_NUM_CQ,
+ T3_MAX_NUM_PD);
+ if (err) {
+ printk(KERN_ERR "%s error %d initializing hal resources.\n",
+ __func__, err);
+ goto err2;
+ }
+ err = cxio_hal_pblpool_create(rdev_p);
+ if (err) {
+ printk(KERN_ERR "%s error %d initializing pbl mem pool.\n",
+ __func__, err);
+ goto err3;
+ }
+ err = cxio_hal_rqtpool_create(rdev_p);
+ if (err) {
+ printk(KERN_ERR "%s error %d initializing rqt mem pool.\n",
+ __func__, err);
+ goto err4;
+ }
+ return 0;
+err4:
+ cxio_hal_pblpool_destroy(rdev_p);
+err3:
+ cxio_hal_destroy_resource(rdev_p->rscp);
+err2:
+ cxio_hal_destroy_ctrl_qp(rdev_p);
+err1:
+ list_del(&rdev_p->entry);
+ return err;
+}
+
+void cxio_rdev_close(struct cxio_rdev *rdev_p)
+{
+ if (rdev_p) {
+ cxio_hal_pblpool_destroy(rdev_p);
+ cxio_hal_rqtpool_destroy(rdev_p);
+ list_del(&rdev_p->entry);
+ rdev_p->t3cdev_p->ulp = NULL;
+ cxio_hal_destroy_ctrl_qp(rdev_p);
+ cxio_hal_destroy_resource(rdev_p->rscp);
+ }
+}
+
+int __init cxio_hal_init(void)
+{
+ if (cxio_hal_init_rhdl_resource(T3_MAX_NUM_RI))
+ return -ENOMEM;
+ t3_register_cpl_handler(CPL_ASYNC_NOTIF, cxio_hal_ev_handler);
+ return 0;
+}
+
+void __exit cxio_hal_exit(void)
+{
+ struct cxio_rdev *rdev, *tmp;
+
+ t3_register_cpl_handler(CPL_ASYNC_NOTIF, NULL);
+ list_for_each_entry_safe(rdev, tmp, &rdev_list, entry)
+ cxio_rdev_close(rdev);
+ cxio_hal_destroy_rhdl_resource();
+}
+
+static void flush_completed_wrs(struct t3_wq *wq, struct t3_cq *cq)
+{
+ struct t3_swsq *sqp;
+ __u32 ptr = wq->sq_rptr;
+ int count = Q_COUNT(wq->sq_rptr, wq->sq_wptr);
+
+ sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2);
+ while (count--)
+ if (!sqp->signaled) {
+ ptr++;
+ sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2);
+ } else if (sqp->complete) {
+
+ /*
+ * Insert this completed cqe into the swcq.
+ */
+ PDBG("%s moving cqe into swcq sq idx %ld cq idx %ld\n",
+ __func__, Q_PTR2IDX(ptr, wq->sq_size_log2),
+ Q_PTR2IDX(cq->sw_wptr, cq->size_log2));
+ sqp->cqe.header |= htonl(V_CQE_SWCQE(1));
+ *(cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2))
+ = sqp->cqe;
+ cq->sw_wptr++;
+ sqp->signaled = 0;
+ break;
+ } else
+ break;
+}
+
+static void create_read_req_cqe(struct t3_wq *wq, struct t3_cqe *hw_cqe,
+ struct t3_cqe *read_cqe)
+{
+ read_cqe->u.scqe.wrid_hi = wq->oldest_read->sq_wptr;
+ read_cqe->len = wq->oldest_read->read_len;
+ read_cqe->header = htonl(V_CQE_QPID(CQE_QPID(*hw_cqe)) |
+ V_CQE_SWCQE(SW_CQE(*hw_cqe)) |
+ V_CQE_OPCODE(T3_READ_REQ) |
+ V_CQE_TYPE(1));
+}
+
+/*
+ * Return a ptr to the next read wr in the SWSQ or NULL.
+ */
+static void advance_oldest_read(struct t3_wq *wq)
+{
+
+ u32 rptr = wq->oldest_read - wq->sq + 1;
+ u32 wptr = Q_PTR2IDX(wq->sq_wptr, wq->sq_size_log2);
+
+ while (Q_PTR2IDX(rptr, wq->sq_size_log2) != wptr) {
+ wq->oldest_read = wq->sq + Q_PTR2IDX(rptr, wq->sq_size_log2);
+
+ if (wq->oldest_read->opcode == T3_READ_REQ)
+ return;
+ rptr++;
+ }
+ wq->oldest_read = NULL;
+}
+
+/*
+ * cxio_poll_cq
+ *
+ * Caller must:
+ * check the validity of the first CQE,
+ * supply the wq assicated with the qpid.
+ *
+ * credit: cq credit to return to sge.
+ * cqe_flushed: 1 iff the CQE is flushed.
+ * cqe: copy of the polled CQE.
+ *
+ * return value:
+ * 0 CQE returned,
+ * -1 CQE skipped, try again.
+ */
+int cxio_poll_cq(struct t3_wq *wq, struct t3_cq *cq, struct t3_cqe *cqe,
+ u8 *cqe_flushed, u64 *cookie, u32 *credit)
+{
+ int ret = 0;
+ struct t3_cqe *hw_cqe, read_cqe;
+
+ *cqe_flushed = 0;
+ *credit = 0;
+ hw_cqe = cxio_next_cqe(cq);
+
+ PDBG("%s CQE OOO %d qpid 0x%0x genbit %d type %d status 0x%0x"
+ " opcode 0x%0x len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x\n",
+ __func__, CQE_OOO(*hw_cqe), CQE_QPID(*hw_cqe),
+ CQE_GENBIT(*hw_cqe), CQE_TYPE(*hw_cqe), CQE_STATUS(*hw_cqe),
+ CQE_OPCODE(*hw_cqe), CQE_LEN(*hw_cqe), CQE_WRID_HI(*hw_cqe),
+ CQE_WRID_LOW(*hw_cqe));
+
+ /*
+ * skip cqe's not affiliated with a QP.
+ */
+ if (wq == NULL) {
+ ret = -1;
+ goto skip_cqe;
+ }
+
+ /*
+ * Gotta tweak READ completions:
+ * 1) the cqe doesn't contain the sq_wptr from the wr.
+ * 2) opcode not reflected from the wr.
+ * 3) read_len not reflected from the wr.
+ * 4) cq_type is RQ_TYPE not SQ_TYPE.
+ */
+ if (RQ_TYPE(*hw_cqe) && (CQE_OPCODE(*hw_cqe) == T3_READ_RESP)) {
+
+ /*
+ * If this is an unsolicited read response, then the read
+ * was generated by the kernel driver as part of peer-2-peer
+ * connection setup. So ignore the completion.
+ */
+ if (!wq->oldest_read) {
+ if (CQE_STATUS(*hw_cqe))
+ wq->error = 1;
+ ret = -1;
+ goto skip_cqe;
+ }
+
+ /*
+ * Don't write to the HWCQ, so create a new read req CQE
+ * in local memory.
+ */
+ create_read_req_cqe(wq, hw_cqe, &read_cqe);
+ hw_cqe = &read_cqe;
+ advance_oldest_read(wq);
+ }
+
+ /*
+ * T3A: Discard TERMINATE CQEs.
+ */
+ if (CQE_OPCODE(*hw_cqe) == T3_TERMINATE) {
+ ret = -1;
+ wq->error = 1;
+ goto skip_cqe;
+ }
+
+ if (CQE_STATUS(*hw_cqe) || wq->error) {
+ *cqe_flushed = wq->error;
+ wq->error = 1;
+
+ /*
+ * T3A inserts errors into the CQE. We cannot return
+ * these as work completions.
+ */
+ /* incoming write failures */
+ if ((CQE_OPCODE(*hw_cqe) == T3_RDMA_WRITE)
+ && RQ_TYPE(*hw_cqe)) {
+ ret = -1;
+ goto skip_cqe;
+ }
+ /* incoming read request failures */
+ if ((CQE_OPCODE(*hw_cqe) == T3_READ_RESP) && SQ_TYPE(*hw_cqe)) {
+ ret = -1;
+ goto skip_cqe;
+ }
+
+ /* incoming SEND with no receive posted failures */
+ if ((CQE_OPCODE(*hw_cqe) == T3_SEND) && RQ_TYPE(*hw_cqe) &&
+ Q_EMPTY(wq->rq_rptr, wq->rq_wptr)) {
+ ret = -1;
+ goto skip_cqe;
+ }
+ goto proc_cqe;
+ }
+
+ /*
+ * RECV completion.
+ */
+ if (RQ_TYPE(*hw_cqe)) {
+
+ /*
+ * HW only validates 4 bits of MSN. So we must validate that
+ * the MSN in the SEND is the next expected MSN. If its not,
+ * then we complete this with TPT_ERR_MSN and mark the wq in
+ * error.
+ */
+ if (unlikely((CQE_WRID_MSN(*hw_cqe) != (wq->rq_rptr + 1)))) {
+ wq->error = 1;
+ hw_cqe->header |= htonl(V_CQE_STATUS(TPT_ERR_MSN));
+ goto proc_cqe;
+ }
+ goto proc_cqe;
+ }
+
+ /*
+ * If we get here its a send completion.
+ *
+ * Handle out of order completion. These get stuffed
+ * in the SW SQ. Then the SW SQ is walked to move any
+ * now in-order completions into the SW CQ. This handles
+ * 2 cases:
+ * 1) reaping unsignaled WRs when the first subsequent
+ * signaled WR is completed.
+ * 2) out of order read completions.
+ */
+ if (!SW_CQE(*hw_cqe) && (CQE_WRID_SQ_WPTR(*hw_cqe) != wq->sq_rptr)) {
+ struct t3_swsq *sqp;
+
+ PDBG("%s out of order completion going in swsq at idx %ld\n",
+ __func__,
+ Q_PTR2IDX(CQE_WRID_SQ_WPTR(*hw_cqe), wq->sq_size_log2));
+ sqp = wq->sq +
+ Q_PTR2IDX(CQE_WRID_SQ_WPTR(*hw_cqe), wq->sq_size_log2);
+ sqp->cqe = *hw_cqe;
+ sqp->complete = 1;
+ ret = -1;
+ goto flush_wq;
+ }
+
+proc_cqe:
+ *cqe = *hw_cqe;
+
+ /*
+ * Reap the associated WR(s) that are freed up with this
+ * completion.
+ */
+ if (SQ_TYPE(*hw_cqe)) {
+ wq->sq_rptr = CQE_WRID_SQ_WPTR(*hw_cqe);
+ PDBG("%s completing sq idx %ld\n", __func__,
+ Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2));
+ *cookie = wq->sq[Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2)].wr_id;
+ wq->sq_rptr++;
+ } else {
+ PDBG("%s completing rq idx %ld\n", __func__,
+ Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2));
+ *cookie = wq->rq[Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)].wr_id;
+ if (wq->rq[Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)].pbl_addr)
+ cxio_hal_pblpool_free(wq->rdev,
+ wq->rq[Q_PTR2IDX(wq->rq_rptr,
+ wq->rq_size_log2)].pbl_addr, T3_STAG0_PBL_SIZE);
+ wq->rq_rptr++;
+ }
+
+flush_wq:
+ /*
+ * Flush any completed cqes that are now in-order.
+ */
+ flush_completed_wrs(wq, cq);
+
+skip_cqe:
+ if (SW_CQE(*hw_cqe)) {
+ PDBG("%s cq %p cqid 0x%x skip sw cqe sw_rptr 0x%x\n",
+ __func__, cq, cq->cqid, cq->sw_rptr);
+ ++cq->sw_rptr;
+ } else {
+ PDBG("%s cq %p cqid 0x%x skip hw cqe rptr 0x%x\n",
+ __func__, cq, cq->cqid, cq->rptr);
+ ++cq->rptr;
+
+ /*
+ * T3A: compute credits.
+ */
+ if (((cq->rptr - cq->wptr) > (1 << (cq->size_log2 - 1)))
+ || ((cq->rptr - cq->wptr) >= 128)) {
+ *credit = cq->rptr - cq->wptr;
+ cq->wptr = cq->rptr;
+ }
+ }
+ return ret;
+}
diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.h b/drivers/infiniband/hw/cxgb3/cxio_hal.h
new file mode 100644
index 0000000..656fe47
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb3/cxio_hal.h
@@ -0,0 +1,199 @@
+/*
+ * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __CXIO_HAL_H__
+#define __CXIO_HAL_H__
+
+#include <linux/list.h>
+#include <linux/mutex.h>
+
+#include "t3_cpl.h"
+#include "t3cdev.h"
+#include "cxgb3_ctl_defs.h"
+#include "cxio_wr.h"
+
+#define T3_CTRL_QP_ID FW_RI_SGEEC_START
+#define T3_CTL_QP_TID FW_RI_TID_START
+#define T3_CTRL_QP_SIZE_LOG2 8
+#define T3_CTRL_CQ_ID 0
+
+#define T3_MAX_NUM_RI (1<<15)
+#define T3_MAX_NUM_QP (1<<15)
+#define T3_MAX_NUM_CQ (1<<15)
+#define T3_MAX_NUM_PD (1<<15)
+#define T3_MAX_PBL_SIZE 256
+#define T3_MAX_RQ_SIZE 1024
+#define T3_MAX_QP_DEPTH (T3_MAX_RQ_SIZE-1)
+#define T3_MAX_CQ_DEPTH 8192
+#define T3_MAX_NUM_STAG (1<<15)
+#define T3_MAX_MR_SIZE 0x100000000ULL
+#define T3_PAGESIZE_MASK 0xffff000 /* 4KB-128MB */
+
+#define T3_STAG_UNSET 0xffffffff
+
+#define T3_MAX_DEV_NAME_LEN 32
+
+struct cxio_hal_ctrl_qp {
+ u32 wptr;
+ u32 rptr;
+ struct mutex lock; /* for the wtpr, can sleep */
+ wait_queue_head_t waitq;/* wait for RspQ/CQE msg */
+ union t3_wr *workq; /* the work request queue */
+ dma_addr_t dma_addr; /* pci bus address of the workq */
+ DECLARE_PCI_UNMAP_ADDR(mapping)
+ void __iomem *doorbell;
+};
+
+struct cxio_hal_resource {
+ struct kfifo *tpt_fifo;
+ spinlock_t tpt_fifo_lock;
+ struct kfifo *qpid_fifo;
+ spinlock_t qpid_fifo_lock;
+ struct kfifo *cqid_fifo;
+ spinlock_t cqid_fifo_lock;
+ struct kfifo *pdid_fifo;
+ spinlock_t pdid_fifo_lock;
+};
+
+struct cxio_qpid_list {
+ struct list_head entry;
+ u32 qpid;
+};
+
+struct cxio_ucontext {
+ struct list_head qpids;
+ struct mutex lock;
+};
+
+struct cxio_rdev {
+ char dev_name[T3_MAX_DEV_NAME_LEN];
+ struct t3cdev *t3cdev_p;
+ struct rdma_info rnic_info;
+ struct adap_ports port_info;
+ struct cxio_hal_resource *rscp;
+ struct cxio_hal_ctrl_qp ctrl_qp;
+ void *ulp;
+ unsigned long qpshift;
+ u32 qpnr;
+ u32 qpmask;
+ struct cxio_ucontext uctx;
+ struct gen_pool *pbl_pool;
+ struct gen_pool *rqt_pool;
+ struct list_head entry;
+};
+
+static inline int cxio_num_stags(struct cxio_rdev *rdev_p)
+{
+ return min((int)T3_MAX_NUM_STAG, (int)((rdev_p->rnic_info.tpt_top - rdev_p->rnic_info.tpt_base) >> 5));
+}
+
+typedef void (*cxio_hal_ev_callback_func_t) (struct cxio_rdev * rdev_p,
+ struct sk_buff * skb);
+
+#define RSPQ_CQID(rsp) (be32_to_cpu(rsp->cq_ptrid) & 0xffff)
+#define RSPQ_CQPTR(rsp) ((be32_to_cpu(rsp->cq_ptrid) >> 16) & 0xffff)
+#define RSPQ_GENBIT(rsp) ((be32_to_cpu(rsp->flags) >> 16) & 1)
+#define RSPQ_OVERFLOW(rsp) ((be32_to_cpu(rsp->flags) >> 17) & 1)
+#define RSPQ_AN(rsp) ((be32_to_cpu(rsp->flags) >> 18) & 1)
+#define RSPQ_SE(rsp) ((be32_to_cpu(rsp->flags) >> 19) & 1)
+#define RSPQ_NOTIFY(rsp) ((be32_to_cpu(rsp->flags) >> 20) & 1)
+#define RSPQ_CQBRANCH(rsp) ((be32_to_cpu(rsp->flags) >> 21) & 1)
+#define RSPQ_CREDIT_THRESH(rsp) ((be32_to_cpu(rsp->flags) >> 22) & 1)
+
+struct respQ_msg_t {
+ __be32 flags; /* flit 0 */
+ __be32 cq_ptrid;
+ __be64 rsvd; /* flit 1 */
+ struct t3_cqe cqe; /* flits 2-3 */
+};
+
+enum t3_cq_opcode {
+ CQ_ARM_AN = 0x2,
+ CQ_ARM_SE = 0x6,
+ CQ_FORCE_AN = 0x3,
+ CQ_CREDIT_UPDATE = 0x7
+};
+
+int cxio_rdev_open(struct cxio_rdev *rdev);
+void cxio_rdev_close(struct cxio_rdev *rdev);
+int cxio_hal_cq_op(struct cxio_rdev *rdev, struct t3_cq *cq,
+ enum t3_cq_opcode op, u32 credit);
+int cxio_create_cq(struct cxio_rdev *rdev, struct t3_cq *cq);
+int cxio_destroy_cq(struct cxio_rdev *rdev, struct t3_cq *cq);
+int cxio_resize_cq(struct cxio_rdev *rdev, struct t3_cq *cq);
+void cxio_release_ucontext(struct cxio_rdev *rdev, struct cxio_ucontext *uctx);
+void cxio_init_ucontext(struct cxio_rdev *rdev, struct cxio_ucontext *uctx);
+int cxio_create_qp(struct cxio_rdev *rdev, u32 kernel_domain, struct t3_wq *wq,
+ struct cxio_ucontext *uctx);
+int cxio_destroy_qp(struct cxio_rdev *rdev, struct t3_wq *wq,
+ struct cxio_ucontext *uctx);
+int cxio_peek_cq(struct t3_wq *wr, struct t3_cq *cq, int opcode);
+int cxio_write_pbl(struct cxio_rdev *rdev_p, __be64 *pbl,
+ u32 pbl_addr, u32 pbl_size);
+int cxio_register_phys_mem(struct cxio_rdev *rdev, u32 * stag, u32 pdid,
+ enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
+ u8 page_size, u32 pbl_size, u32 pbl_addr);
+int cxio_reregister_phys_mem(struct cxio_rdev *rdev, u32 * stag, u32 pdid,
+ enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
+ u8 page_size, u32 pbl_size, u32 pbl_addr);
+int cxio_dereg_mem(struct cxio_rdev *rdev, u32 stag, u32 pbl_size,
+ u32 pbl_addr);
+int cxio_allocate_window(struct cxio_rdev *rdev, u32 * stag, u32 pdid);
+int cxio_allocate_stag(struct cxio_rdev *rdev, u32 *stag, u32 pdid, u32 pbl_size, u32 pbl_addr);
+int cxio_deallocate_window(struct cxio_rdev *rdev, u32 stag);
+int cxio_rdma_init(struct cxio_rdev *rdev, struct t3_rdma_init_attr *attr);
+void cxio_register_ev_cb(cxio_hal_ev_callback_func_t ev_cb);
+void cxio_unregister_ev_cb(cxio_hal_ev_callback_func_t ev_cb);
+u32 cxio_hal_get_pdid(struct cxio_hal_resource *rscp);
+void cxio_hal_put_pdid(struct cxio_hal_resource *rscp, u32 pdid);
+int __init cxio_hal_init(void);
+void __exit cxio_hal_exit(void);
+int cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count);
+int cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count);
+void cxio_count_rcqes(struct t3_cq *cq, struct t3_wq *wq, int *count);
+void cxio_count_scqes(struct t3_cq *cq, struct t3_wq *wq, int *count);
+void cxio_flush_hw_cq(struct t3_cq *cq);
+int cxio_poll_cq(struct t3_wq *wq, struct t3_cq *cq, struct t3_cqe *cqe,
+ u8 *cqe_flushed, u64 *cookie, u32 *credit);
+
+#define MOD "iw_cxgb3: "
+#define PDBG(fmt, args...) pr_debug(MOD fmt, ## args)
+
+#ifdef DEBUG
+void cxio_dump_tpt(struct cxio_rdev *rev, u32 stag);
+void cxio_dump_pbl(struct cxio_rdev *rev, u32 pbl_addr, uint len, u8 shift);
+void cxio_dump_wqe(union t3_wr *wqe);
+void cxio_dump_wce(struct t3_cqe *wce);
+void cxio_dump_rqt(struct cxio_rdev *rdev, u32 hwtid, int nents);
+void cxio_dump_tcb(struct cxio_rdev *rdev, u32 hwtid);
+#endif
+
+#endif
diff --git a/drivers/infiniband/hw/cxgb3/cxio_resource.c b/drivers/infiniband/hw/cxgb3/cxio_resource.c
new file mode 100644
index 0000000..bd233c0
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb3/cxio_resource.c
@@ -0,0 +1,340 @@
+/*
+ * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+/* Crude resource management */
+#include <linux/kernel.h>
+#include <linux/random.h>
+#include <linux/slab.h>
+#include <linux/kfifo.h>
+#include <linux/spinlock.h>
+#include <linux/errno.h>
+#include "cxio_resource.h"
+#include "cxio_hal.h"
+
+static struct kfifo *rhdl_fifo;
+static spinlock_t rhdl_fifo_lock;
+
+#define RANDOM_SIZE 16
+
+static int __cxio_init_resource_fifo(struct kfifo **fifo,
+ spinlock_t *fifo_lock,
+ u32 nr, u32 skip_low,
+ u32 skip_high,
+ int random)
+{
+ u32 i, j, entry = 0, idx;
+ u32 random_bytes;
+ u32 rarray[16];
+ spin_lock_init(fifo_lock);
+
+ *fifo = kfifo_alloc(nr * sizeof(u32), GFP_KERNEL, fifo_lock);
+ if (IS_ERR(*fifo))
+ return -ENOMEM;
+
+ for (i = 0; i < skip_low + skip_high; i++)
+ __kfifo_put(*fifo, (unsigned char *) &entry, sizeof(u32));
+ if (random) {
+ j = 0;
+ random_bytes = random32();
+ for (i = 0; i < RANDOM_SIZE; i++)
+ rarray[i] = i + skip_low;
+ for (i = skip_low + RANDOM_SIZE; i < nr - skip_high; i++) {
+ if (j >= RANDOM_SIZE) {
+ j = 0;
+ random_bytes = random32();
+ }
+ idx = (random_bytes >> (j * 2)) & 0xF;
+ __kfifo_put(*fifo,
+ (unsigned char *) &rarray[idx],
+ sizeof(u32));
+ rarray[idx] = i;
+ j++;
+ }
+ for (i = 0; i < RANDOM_SIZE; i++)
+ __kfifo_put(*fifo,
+ (unsigned char *) &rarray[i],
+ sizeof(u32));
+ } else
+ for (i = skip_low; i < nr - skip_high; i++)
+ __kfifo_put(*fifo, (unsigned char *) &i, sizeof(u32));
+
+ for (i = 0; i < skip_low + skip_high; i++)
+ kfifo_get(*fifo, (unsigned char *) &entry, sizeof(u32));
+ return 0;
+}
+
+static int cxio_init_resource_fifo(struct kfifo **fifo, spinlock_t * fifo_lock,
+ u32 nr, u32 skip_low, u32 skip_high)
+{
+ return (__cxio_init_resource_fifo(fifo, fifo_lock, nr, skip_low,
+ skip_high, 0));
+}
+
+static int cxio_init_resource_fifo_random(struct kfifo **fifo,
+ spinlock_t * fifo_lock,
+ u32 nr, u32 skip_low, u32 skip_high)
+{
+
+ return (__cxio_init_resource_fifo(fifo, fifo_lock, nr, skip_low,
+ skip_high, 1));
+}
+
+static int cxio_init_qpid_fifo(struct cxio_rdev *rdev_p)
+{
+ u32 i;
+
+ spin_lock_init(&rdev_p->rscp->qpid_fifo_lock);
+
+ rdev_p->rscp->qpid_fifo = kfifo_alloc(T3_MAX_NUM_QP * sizeof(u32),
+ GFP_KERNEL,
+ &rdev_p->rscp->qpid_fifo_lock);
+ if (IS_ERR(rdev_p->rscp->qpid_fifo))
+ return -ENOMEM;
+
+ for (i = 16; i < T3_MAX_NUM_QP; i++)
+ if (!(i & rdev_p->qpmask))
+ __kfifo_put(rdev_p->rscp->qpid_fifo,
+ (unsigned char *) &i, sizeof(u32));
+ return 0;
+}
+
+int cxio_hal_init_rhdl_resource(u32 nr_rhdl)
+{
+ return cxio_init_resource_fifo(&rhdl_fifo, &rhdl_fifo_lock, nr_rhdl, 1,
+ 0);
+}
+
+void cxio_hal_destroy_rhdl_resource(void)
+{
+ kfifo_free(rhdl_fifo);
+}
+
+/* nr_* must be power of 2 */
+int cxio_hal_init_resource(struct cxio_rdev *rdev_p,
+ u32 nr_tpt, u32 nr_pbl,
+ u32 nr_rqt, u32 nr_qpid, u32 nr_cqid, u32 nr_pdid)
+{
+ int err = 0;
+ struct cxio_hal_resource *rscp;
+
+ rscp = kmalloc(sizeof(*rscp), GFP_KERNEL);
+ if (!rscp)
+ return -ENOMEM;
+ rdev_p->rscp = rscp;
+ err = cxio_init_resource_fifo_random(&rscp->tpt_fifo,
+ &rscp->tpt_fifo_lock,
+ nr_tpt, 1, 0);
+ if (err)
+ goto tpt_err;
+ err = cxio_init_qpid_fifo(rdev_p);
+ if (err)
+ goto qpid_err;
+ err = cxio_init_resource_fifo(&rscp->cqid_fifo, &rscp->cqid_fifo_lock,
+ nr_cqid, 1, 0);
+ if (err)
+ goto cqid_err;
+ err = cxio_init_resource_fifo(&rscp->pdid_fifo, &rscp->pdid_fifo_lock,
+ nr_pdid, 1, 0);
+ if (err)
+ goto pdid_err;
+ return 0;
+pdid_err:
+ kfifo_free(rscp->cqid_fifo);
+cqid_err:
+ kfifo_free(rscp->qpid_fifo);
+qpid_err:
+ kfifo_free(rscp->tpt_fifo);
+tpt_err:
+ return -ENOMEM;
+}
+
+/*
+ * returns 0 if no resource available
+ */
+static u32 cxio_hal_get_resource(struct kfifo *fifo)
+{
+ u32 entry;
+ if (kfifo_get(fifo, (unsigned char *) &entry, sizeof(u32)))
+ return entry;
+ else
+ return 0; /* fifo emptry */
+}
+
+static void cxio_hal_put_resource(struct kfifo *fifo, u32 entry)
+{
+ BUG_ON(kfifo_put(fifo, (unsigned char *) &entry, sizeof(u32)) == 0);
+}
+
+u32 cxio_hal_get_stag(struct cxio_hal_resource *rscp)
+{
+ return cxio_hal_get_resource(rscp->tpt_fifo);
+}
+
+void cxio_hal_put_stag(struct cxio_hal_resource *rscp, u32 stag)
+{
+ cxio_hal_put_resource(rscp->tpt_fifo, stag);
+}
+
+u32 cxio_hal_get_qpid(struct cxio_hal_resource *rscp)
+{
+ u32 qpid = cxio_hal_get_resource(rscp->qpid_fifo);
+ PDBG("%s qpid 0x%x\n", __func__, qpid);
+ return qpid;
+}
+
+void cxio_hal_put_qpid(struct cxio_hal_resource *rscp, u32 qpid)
+{
+ PDBG("%s qpid 0x%x\n", __func__, qpid);
+ cxio_hal_put_resource(rscp->qpid_fifo, qpid);
+}
+
+u32 cxio_hal_get_cqid(struct cxio_hal_resource *rscp)
+{
+ return cxio_hal_get_resource(rscp->cqid_fifo);
+}
+
+void cxio_hal_put_cqid(struct cxio_hal_resource *rscp, u32 cqid)
+{
+ cxio_hal_put_resource(rscp->cqid_fifo, cqid);
+}
+
+u32 cxio_hal_get_pdid(struct cxio_hal_resource *rscp)
+{
+ return cxio_hal_get_resource(rscp->pdid_fifo);
+}
+
+void cxio_hal_put_pdid(struct cxio_hal_resource *rscp, u32 pdid)
+{
+ cxio_hal_put_resource(rscp->pdid_fifo, pdid);
+}
+
+void cxio_hal_destroy_resource(struct cxio_hal_resource *rscp)
+{
+ kfifo_free(rscp->tpt_fifo);
+ kfifo_free(rscp->cqid_fifo);
+ kfifo_free(rscp->qpid_fifo);
+ kfifo_free(rscp->pdid_fifo);
+ kfree(rscp);
+}
+
+/*
+ * PBL Memory Manager. Uses Linux generic allocator.
+ */
+
+#define MIN_PBL_SHIFT 8 /* 256B == min PBL size (32 entries) */
+
+u32 cxio_hal_pblpool_alloc(struct cxio_rdev *rdev_p, int size)
+{
+ unsigned long addr = gen_pool_alloc(rdev_p->pbl_pool, size);
+ PDBG("%s addr 0x%x size %d\n", __func__, (u32)addr, size);
+ return (u32)addr;
+}
+
+void cxio_hal_pblpool_free(struct cxio_rdev *rdev_p, u32 addr, int size)
+{
+ PDBG("%s addr 0x%x size %d\n", __func__, addr, size);
+ gen_pool_free(rdev_p->pbl_pool, (unsigned long)addr, size);
+}
+
+int cxio_hal_pblpool_create(struct cxio_rdev *rdev_p)
+{
+ unsigned pbl_start, pbl_chunk;
+
+ rdev_p->pbl_pool = gen_pool_create(MIN_PBL_SHIFT, -1);
+ if (!rdev_p->pbl_pool)
+ return -ENOMEM;
+
+ pbl_start = rdev_p->rnic_info.pbl_base;
+ pbl_chunk = rdev_p->rnic_info.pbl_top - pbl_start + 1;
+
+ while (pbl_start < rdev_p->rnic_info.pbl_top) {
+ pbl_chunk = min(rdev_p->rnic_info.pbl_top - pbl_start + 1,
+ pbl_chunk);
+ if (gen_pool_add(rdev_p->pbl_pool, pbl_start, pbl_chunk, -1)) {
+ PDBG("%s failed to add PBL chunk (%x/%x)\n",
+ __func__, pbl_start, pbl_chunk);
+ if (pbl_chunk <= 1024 << MIN_PBL_SHIFT) {
+ printk(KERN_WARNING MOD "%s: Failed to add all PBL chunks (%x/%x)\n",
+ __func__, pbl_start, rdev_p->rnic_info.pbl_top - pbl_start);
+ return 0;
+ }
+ pbl_chunk >>= 1;
+ } else {
+ PDBG("%s added PBL chunk (%x/%x)\n",
+ __func__, pbl_start, pbl_chunk);
+ pbl_start += pbl_chunk;
+ }
+ }
+
+ return 0;
+}
+
+void cxio_hal_pblpool_destroy(struct cxio_rdev *rdev_p)
+{
+ gen_pool_destroy(rdev_p->pbl_pool);
+}
+
+/*
+ * RQT Memory Manager. Uses Linux generic allocator.
+ */
+
+#define MIN_RQT_SHIFT 10 /* 1KB == mini RQT size (16 entries) */
+#define RQT_CHUNK 2*1024*1024
+
+u32 cxio_hal_rqtpool_alloc(struct cxio_rdev *rdev_p, int size)
+{
+ unsigned long addr = gen_pool_alloc(rdev_p->rqt_pool, size << 6);
+ PDBG("%s addr 0x%x size %d\n", __func__, (u32)addr, size << 6);
+ return (u32)addr;
+}
+
+void cxio_hal_rqtpool_free(struct cxio_rdev *rdev_p, u32 addr, int size)
+{
+ PDBG("%s addr 0x%x size %d\n", __func__, addr, size << 6);
+ gen_pool_free(rdev_p->rqt_pool, (unsigned long)addr, size << 6);
+}
+
+int cxio_hal_rqtpool_create(struct cxio_rdev *rdev_p)
+{
+ unsigned long i;
+ rdev_p->rqt_pool = gen_pool_create(MIN_RQT_SHIFT, -1);
+ if (rdev_p->rqt_pool)
+ for (i = rdev_p->rnic_info.rqt_base;
+ i <= rdev_p->rnic_info.rqt_top - RQT_CHUNK + 1;
+ i += RQT_CHUNK)
+ gen_pool_add(rdev_p->rqt_pool, i, RQT_CHUNK, -1);
+ return rdev_p->rqt_pool ? 0 : -ENOMEM;
+}
+
+void cxio_hal_rqtpool_destroy(struct cxio_rdev *rdev_p)
+{
+ gen_pool_destroy(rdev_p->rqt_pool);
+}
diff --git a/drivers/infiniband/hw/cxgb3/cxio_resource.h b/drivers/infiniband/hw/cxgb3/cxio_resource.h
new file mode 100644
index 0000000..a2703a3
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb3/cxio_resource.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __CXIO_RESOURCE_H__
+#define __CXIO_RESOURCE_H__
+
+#include <linux/kernel.h>
+#include <linux/random.h>
+#include <linux/slab.h>
+#include <linux/kfifo.h>
+#include <linux/spinlock.h>
+#include <linux/errno.h>
+#include <linux/genalloc.h>
+#include "cxio_hal.h"
+
+extern int cxio_hal_init_rhdl_resource(u32 nr_rhdl);
+extern void cxio_hal_destroy_rhdl_resource(void);
+extern int cxio_hal_init_resource(struct cxio_rdev *rdev_p,
+ u32 nr_tpt, u32 nr_pbl,
+ u32 nr_rqt, u32 nr_qpid, u32 nr_cqid,
+ u32 nr_pdid);
+extern u32 cxio_hal_get_stag(struct cxio_hal_resource *rscp);
+extern void cxio_hal_put_stag(struct cxio_hal_resource *rscp, u32 stag);
+extern u32 cxio_hal_get_qpid(struct cxio_hal_resource *rscp);
+extern void cxio_hal_put_qpid(struct cxio_hal_resource *rscp, u32 qpid);
+extern u32 cxio_hal_get_cqid(struct cxio_hal_resource *rscp);
+extern void cxio_hal_put_cqid(struct cxio_hal_resource *rscp, u32 cqid);
+extern void cxio_hal_destroy_resource(struct cxio_hal_resource *rscp);
+
+#define PBL_OFF(rdev_p, a) ( (a) - (rdev_p)->rnic_info.pbl_base )
+extern int cxio_hal_pblpool_create(struct cxio_rdev *rdev_p);
+extern void cxio_hal_pblpool_destroy(struct cxio_rdev *rdev_p);
+extern u32 cxio_hal_pblpool_alloc(struct cxio_rdev *rdev_p, int size);
+extern void cxio_hal_pblpool_free(struct cxio_rdev *rdev_p, u32 addr, int size);
+
+#define RQT_OFF(rdev_p, a) ( (a) - (rdev_p)->rnic_info.rqt_base )
+extern int cxio_hal_rqtpool_create(struct cxio_rdev *rdev_p);
+extern void cxio_hal_rqtpool_destroy(struct cxio_rdev *rdev_p);
+extern u32 cxio_hal_rqtpool_alloc(struct cxio_rdev *rdev_p, int size);
+extern void cxio_hal_rqtpool_free(struct cxio_rdev *rdev_p, u32 addr, int size);
+#endif
diff --git a/drivers/infiniband/hw/cxgb3/cxio_wr.h b/drivers/infiniband/hw/cxgb3/cxio_wr.h
new file mode 100644
index 0000000..04618f7
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb3/cxio_wr.h
@@ -0,0 +1,759 @@
+/*
+ * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __CXIO_WR_H__
+#define __CXIO_WR_H__
+
+#include <asm/io.h>
+#include <linux/pci.h>
+#include <linux/timer.h>
+#include "firmware_exports.h"
+
+#define T3_MAX_SGE 4
+#define T3_MAX_INLINE 64
+#define T3_STAG0_PBL_SIZE (2 * T3_MAX_SGE << 3)
+#define T3_STAG0_MAX_PBE_LEN (128 * 1024 * 1024)
+#define T3_STAG0_PAGE_SHIFT 15
+
+#define Q_EMPTY(rptr,wptr) ((rptr)==(wptr))
+#define Q_FULL(rptr,wptr,size_log2) ( (((wptr)-(rptr))>>(size_log2)) && \
+ ((rptr)!=(wptr)) )
+#define Q_GENBIT(ptr,size_log2) (!(((ptr)>>size_log2)&0x1))
+#define Q_FREECNT(rptr,wptr,size_log2) ((1UL<<size_log2)-((wptr)-(rptr)))
+#define Q_COUNT(rptr,wptr) ((wptr)-(rptr))
+#define Q_PTR2IDX(ptr,size_log2) (ptr & ((1UL<<size_log2)-1))
+
+static inline void ring_doorbell(void __iomem *doorbell, u32 qpid)
+{
+ writel(((1<<31) | qpid), doorbell);
+}
+
+#define SEQ32_GE(x,y) (!( (((u32) (x)) - ((u32) (y))) & 0x80000000 ))
+
+enum t3_wr_flags {
+ T3_COMPLETION_FLAG = 0x01,
+ T3_NOTIFY_FLAG = 0x02,
+ T3_SOLICITED_EVENT_FLAG = 0x04,
+ T3_READ_FENCE_FLAG = 0x08,
+ T3_LOCAL_FENCE_FLAG = 0x10
+} __attribute__ ((packed));
+
+enum t3_wr_opcode {
+ T3_WR_BP = FW_WROPCODE_RI_BYPASS,
+ T3_WR_SEND = FW_WROPCODE_RI_SEND,
+ T3_WR_WRITE = FW_WROPCODE_RI_RDMA_WRITE,
+ T3_WR_READ = FW_WROPCODE_RI_RDMA_READ,
+ T3_WR_INV_STAG = FW_WROPCODE_RI_LOCAL_INV,
+ T3_WR_BIND = FW_WROPCODE_RI_BIND_MW,
+ T3_WR_RCV = FW_WROPCODE_RI_RECEIVE,
+ T3_WR_INIT = FW_WROPCODE_RI_RDMA_INIT,
+ T3_WR_QP_MOD = FW_WROPCODE_RI_MODIFY_QP,
+ T3_WR_FASTREG = FW_WROPCODE_RI_FASTREGISTER_MR
+} __attribute__ ((packed));
+
+enum t3_rdma_opcode {
+ T3_RDMA_WRITE, /* IETF RDMAP v1.0 ... */
+ T3_READ_REQ,
+ T3_READ_RESP,
+ T3_SEND,
+ T3_SEND_WITH_INV,
+ T3_SEND_WITH_SE,
+ T3_SEND_WITH_SE_INV,
+ T3_TERMINATE,
+ T3_RDMA_INIT, /* CHELSIO RI specific ... */
+ T3_BIND_MW,
+ T3_FAST_REGISTER,
+ T3_LOCAL_INV,
+ T3_QP_MOD,
+ T3_BYPASS,
+ T3_RDMA_READ_REQ_WITH_INV,
+} __attribute__ ((packed));
+
+static inline enum t3_rdma_opcode wr2opcode(enum t3_wr_opcode wrop)
+{
+ switch (wrop) {
+ case T3_WR_BP: return T3_BYPASS;
+ case T3_WR_SEND: return T3_SEND;
+ case T3_WR_WRITE: return T3_RDMA_WRITE;
+ case T3_WR_READ: return T3_READ_REQ;
+ case T3_WR_INV_STAG: return T3_LOCAL_INV;
+ case T3_WR_BIND: return T3_BIND_MW;
+ case T3_WR_INIT: return T3_RDMA_INIT;
+ case T3_WR_QP_MOD: return T3_QP_MOD;
+ case T3_WR_FASTREG: return T3_FAST_REGISTER;
+ default: break;
+ }
+ return -1;
+}
+
+
+/* Work request id */
+union t3_wrid {
+ struct {
+ u32 hi;
+ u32 low;
+ } id0;
+ u64 id1;
+};
+
+#define WRID(wrid) (wrid.id1)
+#define WRID_GEN(wrid) (wrid.id0.wr_gen)
+#define WRID_IDX(wrid) (wrid.id0.wr_idx)
+#define WRID_LO(wrid) (wrid.id0.wr_lo)
+
+struct fw_riwrh {
+ __be32 op_seop_flags;
+ __be32 gen_tid_len;
+};
+
+#define S_FW_RIWR_OP 24
+#define M_FW_RIWR_OP 0xff
+#define V_FW_RIWR_OP(x) ((x) << S_FW_RIWR_OP)
+#define G_FW_RIWR_OP(x) ((((x) >> S_FW_RIWR_OP)) & M_FW_RIWR_OP)
+
+#define S_FW_RIWR_SOPEOP 22
+#define M_FW_RIWR_SOPEOP 0x3
+#define V_FW_RIWR_SOPEOP(x) ((x) << S_FW_RIWR_SOPEOP)
+
+#define S_FW_RIWR_FLAGS 8
+#define M_FW_RIWR_FLAGS 0x3fffff
+#define V_FW_RIWR_FLAGS(x) ((x) << S_FW_RIWR_FLAGS)
+#define G_FW_RIWR_FLAGS(x) ((((x) >> S_FW_RIWR_FLAGS)) & M_FW_RIWR_FLAGS)
+
+#define S_FW_RIWR_TID 8
+#define V_FW_RIWR_TID(x) ((x) << S_FW_RIWR_TID)
+
+#define S_FW_RIWR_LEN 0
+#define V_FW_RIWR_LEN(x) ((x) << S_FW_RIWR_LEN)
+
+#define S_FW_RIWR_GEN 31
+#define V_FW_RIWR_GEN(x) ((x) << S_FW_RIWR_GEN)
+
+struct t3_sge {
+ __be32 stag;
+ __be32 len;
+ __be64 to;
+};
+
+/* If num_sgle is zero, flit 5+ contains immediate data.*/
+struct t3_send_wr {
+ struct fw_riwrh wrh; /* 0 */
+ union t3_wrid wrid; /* 1 */
+
+ u8 rdmaop; /* 2 */
+ u8 reserved[3];
+ __be32 rem_stag;
+ __be32 plen; /* 3 */
+ __be32 num_sgle;
+ struct t3_sge sgl[T3_MAX_SGE]; /* 4+ */
+};
+
+#define T3_MAX_FASTREG_DEPTH 24
+#define T3_MAX_FASTREG_FRAG 10
+
+struct t3_fastreg_wr {
+ struct fw_riwrh wrh; /* 0 */
+ union t3_wrid wrid; /* 1 */
+ __be32 stag; /* 2 */
+ __be32 len;
+ __be32 va_base_hi; /* 3 */
+ __be32 va_base_lo_fbo;
+ __be32 page_type_perms; /* 4 */
+ __be32 reserved1;
+ __be64 pbl_addrs[0]; /* 5+ */
+};
+
+/*
+ * If a fastreg wr spans multiple wqes, then the 2nd fragment look like this.
+ */
+struct t3_pbl_frag {
+ struct fw_riwrh wrh; /* 0 */
+ __be64 pbl_addrs[14]; /* 1..14 */
+};
+
+#define S_FR_PAGE_COUNT 24
+#define M_FR_PAGE_COUNT 0xff
+#define V_FR_PAGE_COUNT(x) ((x) << S_FR_PAGE_COUNT)
+#define G_FR_PAGE_COUNT(x) ((((x) >> S_FR_PAGE_COUNT)) & M_FR_PAGE_COUNT)
+
+#define S_FR_PAGE_SIZE 16
+#define M_FR_PAGE_SIZE 0x1f
+#define V_FR_PAGE_SIZE(x) ((x) << S_FR_PAGE_SIZE)
+#define G_FR_PAGE_SIZE(x) ((((x) >> S_FR_PAGE_SIZE)) & M_FR_PAGE_SIZE)
+
+#define S_FR_TYPE 8
+#define M_FR_TYPE 0x1
+#define V_FR_TYPE(x) ((x) << S_FR_TYPE)
+#define G_FR_TYPE(x) ((((x) >> S_FR_TYPE)) & M_FR_TYPE)
+
+#define S_FR_PERMS 0
+#define M_FR_PERMS 0xff
+#define V_FR_PERMS(x) ((x) << S_FR_PERMS)
+#define G_FR_PERMS(x) ((((x) >> S_FR_PERMS)) & M_FR_PERMS)
+
+struct t3_local_inv_wr {
+ struct fw_riwrh wrh; /* 0 */
+ union t3_wrid wrid; /* 1 */
+ __be32 stag; /* 2 */
+ __be32 reserved;
+};
+
+struct t3_rdma_write_wr {
+ struct fw_riwrh wrh; /* 0 */
+ union t3_wrid wrid; /* 1 */
+ u8 rdmaop; /* 2 */
+ u8 reserved[3];
+ __be32 stag_sink;
+ __be64 to_sink; /* 3 */
+ __be32 plen; /* 4 */
+ __be32 num_sgle;
+ struct t3_sge sgl[T3_MAX_SGE]; /* 5+ */
+};
+
+struct t3_rdma_read_wr {
+ struct fw_riwrh wrh; /* 0 */
+ union t3_wrid wrid; /* 1 */
+ u8 rdmaop; /* 2 */
+ u8 local_inv;
+ u8 reserved[2];
+ __be32 rem_stag;
+ __be64 rem_to; /* 3 */
+ __be32 local_stag; /* 4 */
+ __be32 local_len;
+ __be64 local_to; /* 5 */
+};
+
+struct t3_bind_mw_wr {
+ struct fw_riwrh wrh; /* 0 */
+ union t3_wrid wrid; /* 1 */
+ u16 reserved; /* 2 */
+ u8 type;
+ u8 perms;
+ __be32 mr_stag;
+ __be32 mw_stag; /* 3 */
+ __be32 mw_len;
+ __be64 mw_va; /* 4 */
+ __be32 mr_pbl_addr; /* 5 */
+ u8 reserved2[3];
+ u8 mr_pagesz;
+};
+
+struct t3_receive_wr {
+ struct fw_riwrh wrh; /* 0 */
+ union t3_wrid wrid; /* 1 */
+ u8 pagesz[T3_MAX_SGE];
+ __be32 num_sgle; /* 2 */
+ struct t3_sge sgl[T3_MAX_SGE]; /* 3+ */
+ __be32 pbl_addr[T3_MAX_SGE];
+};
+
+struct t3_bypass_wr {
+ struct fw_riwrh wrh;
+ union t3_wrid wrid; /* 1 */
+};
+
+struct t3_modify_qp_wr {
+ struct fw_riwrh wrh; /* 0 */
+ union t3_wrid wrid; /* 1 */
+ __be32 flags; /* 2 */
+ __be32 quiesce; /* 2 */
+ __be32 max_ird; /* 3 */
+ __be32 max_ord; /* 3 */
+ __be64 sge_cmd; /* 4 */
+ __be64 ctx1; /* 5 */
+ __be64 ctx0; /* 6 */
+};
+
+enum t3_modify_qp_flags {
+ MODQP_QUIESCE = 0x01,
+ MODQP_MAX_IRD = 0x02,
+ MODQP_MAX_ORD = 0x04,
+ MODQP_WRITE_EC = 0x08,
+ MODQP_READ_EC = 0x10,
+};
+
+
+enum t3_mpa_attrs {
+ uP_RI_MPA_RX_MARKER_ENABLE = 0x1,
+ uP_RI_MPA_TX_MARKER_ENABLE = 0x2,
+ uP_RI_MPA_CRC_ENABLE = 0x4,
+ uP_RI_MPA_IETF_ENABLE = 0x8
+} __attribute__ ((packed));
+
+enum t3_qp_caps {
+ uP_RI_QP_RDMA_READ_ENABLE = 0x01,
+ uP_RI_QP_RDMA_WRITE_ENABLE = 0x02,
+ uP_RI_QP_BIND_ENABLE = 0x04,
+ uP_RI_QP_FAST_REGISTER_ENABLE = 0x08,
+ uP_RI_QP_STAG0_ENABLE = 0x10
+} __attribute__ ((packed));
+
+enum rdma_init_rtr_types {
+ RTR_READ = 1,
+ RTR_WRITE = 2,
+ RTR_SEND = 3,
+};
+
+#define S_RTR_TYPE 2
+#define M_RTR_TYPE 0x3
+#define V_RTR_TYPE(x) ((x) << S_RTR_TYPE)
+#define G_RTR_TYPE(x) ((((x) >> S_RTR_TYPE)) & M_RTR_TYPE)
+
+struct t3_rdma_init_attr {
+ u32 tid;
+ u32 qpid;
+ u32 pdid;
+ u32 scqid;
+ u32 rcqid;
+ u32 rq_addr;
+ u32 rq_size;
+ enum t3_mpa_attrs mpaattrs;
+ enum t3_qp_caps qpcaps;
+ u16 tcp_emss;
+ u32 ord;
+ u32 ird;
+ u64 qp_dma_addr;
+ u32 qp_dma_size;
+ enum rdma_init_rtr_types rtr_type;
+ u16 flags;
+ u16 rqe_count;
+ u32 irs;
+};
+
+struct t3_rdma_init_wr {
+ struct fw_riwrh wrh; /* 0 */
+ union t3_wrid wrid; /* 1 */
+ __be32 qpid; /* 2 */
+ __be32 pdid;
+ __be32 scqid; /* 3 */
+ __be32 rcqid;
+ __be32 rq_addr; /* 4 */
+ __be32 rq_size;
+ u8 mpaattrs; /* 5 */
+ u8 qpcaps;
+ __be16 ulpdu_size;
+ __be16 flags_rtr_type;
+ __be16 rqe_count;
+ __be32 ord; /* 6 */
+ __be32 ird;
+ __be64 qp_dma_addr; /* 7 */
+ __be32 qp_dma_size; /* 8 */
+ __be32 irs;
+};
+
+struct t3_genbit {
+ u64 flit[15];
+ __be64 genbit;
+};
+
+struct t3_wq_in_err {
+ u64 flit[13];
+ u64 err;
+};
+
+enum rdma_init_wr_flags {
+ MPA_INITIATOR = (1<<0),
+ PRIV_QP = (1<<1),
+};
+
+union t3_wr {
+ struct t3_send_wr send;
+ struct t3_rdma_write_wr write;
+ struct t3_rdma_read_wr read;
+ struct t3_receive_wr recv;
+ struct t3_fastreg_wr fastreg;
+ struct t3_pbl_frag pbl_frag;
+ struct t3_local_inv_wr local_inv;
+ struct t3_bind_mw_wr bind;
+ struct t3_bypass_wr bypass;
+ struct t3_rdma_init_wr init;
+ struct t3_modify_qp_wr qp_mod;
+ struct t3_genbit genbit;
+ struct t3_wq_in_err wq_in_err;
+ __be64 flit[16];
+};
+
+#define T3_SQ_CQE_FLIT 13
+#define T3_SQ_COOKIE_FLIT 14
+
+#define T3_RQ_COOKIE_FLIT 13
+#define T3_RQ_CQE_FLIT 14
+
+static inline enum t3_wr_opcode fw_riwrh_opcode(struct fw_riwrh *wqe)
+{
+ return G_FW_RIWR_OP(be32_to_cpu(wqe->op_seop_flags));
+}
+
+enum t3_wr_hdr_bits {
+ T3_EOP = 1,
+ T3_SOP = 2,
+ T3_SOPEOP = T3_EOP|T3_SOP,
+};
+
+static inline void build_fw_riwrh(struct fw_riwrh *wqe, enum t3_wr_opcode op,
+ enum t3_wr_flags flags, u8 genbit, u32 tid,
+ u8 len, u8 sopeop)
+{
+ wqe->op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(op) |
+ V_FW_RIWR_SOPEOP(sopeop) |
+ V_FW_RIWR_FLAGS(flags));
+ wmb();
+ wqe->gen_tid_len = cpu_to_be32(V_FW_RIWR_GEN(genbit) |
+ V_FW_RIWR_TID(tid) |
+ V_FW_RIWR_LEN(len));
+ /* 2nd gen bit... */
+ ((union t3_wr *)wqe)->genbit.genbit = cpu_to_be64(genbit);
+}
+
+/*
+ * T3 ULP2_TX commands
+ */
+enum t3_utx_mem_op {
+ T3_UTX_MEM_READ = 2,
+ T3_UTX_MEM_WRITE = 3
+};
+
+/* T3 MC7 RDMA TPT entry format */
+
+enum tpt_mem_type {
+ TPT_NON_SHARED_MR = 0x0,
+ TPT_SHARED_MR = 0x1,
+ TPT_MW = 0x2,
+ TPT_MW_RELAXED_PROTECTION = 0x3
+};
+
+enum tpt_addr_type {
+ TPT_ZBTO = 0,
+ TPT_VATO = 1
+};
+
+enum tpt_mem_perm {
+ TPT_MW_BIND = 0x10,
+ TPT_LOCAL_READ = 0x8,
+ TPT_LOCAL_WRITE = 0x4,
+ TPT_REMOTE_READ = 0x2,
+ TPT_REMOTE_WRITE = 0x1
+};
+
+struct tpt_entry {
+ __be32 valid_stag_pdid;
+ __be32 flags_pagesize_qpid;
+
+ __be32 rsvd_pbl_addr;
+ __be32 len;
+ __be32 va_hi;
+ __be32 va_low_or_fbo;
+
+ __be32 rsvd_bind_cnt_or_pstag;
+ __be32 rsvd_pbl_size;
+};
+
+#define S_TPT_VALID 31
+#define V_TPT_VALID(x) ((x) << S_TPT_VALID)
+#define F_TPT_VALID V_TPT_VALID(1U)
+
+#define S_TPT_STAG_KEY 23
+#define M_TPT_STAG_KEY 0xFF
+#define V_TPT_STAG_KEY(x) ((x) << S_TPT_STAG_KEY)
+#define G_TPT_STAG_KEY(x) (((x) >> S_TPT_STAG_KEY) & M_TPT_STAG_KEY)
+
+#define S_TPT_STAG_STATE 22
+#define V_TPT_STAG_STATE(x) ((x) << S_TPT_STAG_STATE)
+#define F_TPT_STAG_STATE V_TPT_STAG_STATE(1U)
+
+#define S_TPT_STAG_TYPE 20
+#define M_TPT_STAG_TYPE 0x3
+#define V_TPT_STAG_TYPE(x) ((x) << S_TPT_STAG_TYPE)
+#define G_TPT_STAG_TYPE(x) (((x) >> S_TPT_STAG_TYPE) & M_TPT_STAG_TYPE)
+
+#define S_TPT_PDID 0
+#define M_TPT_PDID 0xFFFFF
+#define V_TPT_PDID(x) ((x) << S_TPT_PDID)
+#define G_TPT_PDID(x) (((x) >> S_TPT_PDID) & M_TPT_PDID)
+
+#define S_TPT_PERM 28
+#define M_TPT_PERM 0xF
+#define V_TPT_PERM(x) ((x) << S_TPT_PERM)
+#define G_TPT_PERM(x) (((x) >> S_TPT_PERM) & M_TPT_PERM)
+
+#define S_TPT_REM_INV_DIS 27
+#define V_TPT_REM_INV_DIS(x) ((x) << S_TPT_REM_INV_DIS)
+#define F_TPT_REM_INV_DIS V_TPT_REM_INV_DIS(1U)
+
+#define S_TPT_ADDR_TYPE 26
+#define V_TPT_ADDR_TYPE(x) ((x) << S_TPT_ADDR_TYPE)
+#define F_TPT_ADDR_TYPE V_TPT_ADDR_TYPE(1U)
+
+#define S_TPT_MW_BIND_ENABLE 25
+#define V_TPT_MW_BIND_ENABLE(x) ((x) << S_TPT_MW_BIND_ENABLE)
+#define F_TPT_MW_BIND_ENABLE V_TPT_MW_BIND_ENABLE(1U)
+
+#define S_TPT_PAGE_SIZE 20
+#define M_TPT_PAGE_SIZE 0x1F
+#define V_TPT_PAGE_SIZE(x) ((x) << S_TPT_PAGE_SIZE)
+#define G_TPT_PAGE_SIZE(x) (((x) >> S_TPT_PAGE_SIZE) & M_TPT_PAGE_SIZE)
+
+#define S_TPT_PBL_ADDR 0
+#define M_TPT_PBL_ADDR 0x1FFFFFFF
+#define V_TPT_PBL_ADDR(x) ((x) << S_TPT_PBL_ADDR)
+#define G_TPT_PBL_ADDR(x) (((x) >> S_TPT_PBL_ADDR) & M_TPT_PBL_ADDR)
+
+#define S_TPT_QPID 0
+#define M_TPT_QPID 0xFFFFF
+#define V_TPT_QPID(x) ((x) << S_TPT_QPID)
+#define G_TPT_QPID(x) (((x) >> S_TPT_QPID) & M_TPT_QPID)
+
+#define S_TPT_PSTAG 0
+#define M_TPT_PSTAG 0xFFFFFF
+#define V_TPT_PSTAG(x) ((x) << S_TPT_PSTAG)
+#define G_TPT_PSTAG(x) (((x) >> S_TPT_PSTAG) & M_TPT_PSTAG)
+
+#define S_TPT_PBL_SIZE 0
+#define M_TPT_PBL_SIZE 0xFFFFF
+#define V_TPT_PBL_SIZE(x) ((x) << S_TPT_PBL_SIZE)
+#define G_TPT_PBL_SIZE(x) (((x) >> S_TPT_PBL_SIZE) & M_TPT_PBL_SIZE)
+
+/*
+ * CQE defs
+ */
+struct t3_cqe {
+ __be32 header;
+ __be32 len;
+ union {
+ struct {
+ __be32 stag;
+ __be32 msn;
+ } rcqe;
+ struct {
+ u32 wrid_hi;
+ u32 wrid_low;
+ } scqe;
+ } u;
+};
+
+#define S_CQE_OOO 31
+#define M_CQE_OOO 0x1
+#define G_CQE_OOO(x) ((((x) >> S_CQE_OOO)) & M_CQE_OOO)
+#define V_CEQ_OOO(x) ((x)<<S_CQE_OOO)
+
+#define S_CQE_QPID 12
+#define M_CQE_QPID 0x7FFFF
+#define G_CQE_QPID(x) ((((x) >> S_CQE_QPID)) & M_CQE_QPID)
+#define V_CQE_QPID(x) ((x)<<S_CQE_QPID)
+
+#define S_CQE_SWCQE 11
+#define M_CQE_SWCQE 0x1
+#define G_CQE_SWCQE(x) ((((x) >> S_CQE_SWCQE)) & M_CQE_SWCQE)
+#define V_CQE_SWCQE(x) ((x)<<S_CQE_SWCQE)
+
+#define S_CQE_GENBIT 10
+#define M_CQE_GENBIT 0x1
+#define G_CQE_GENBIT(x) (((x) >> S_CQE_GENBIT) & M_CQE_GENBIT)
+#define V_CQE_GENBIT(x) ((x)<<S_CQE_GENBIT)
+
+#define S_CQE_STATUS 5
+#define M_CQE_STATUS 0x1F
+#define G_CQE_STATUS(x) ((((x) >> S_CQE_STATUS)) & M_CQE_STATUS)
+#define V_CQE_STATUS(x) ((x)<<S_CQE_STATUS)
+
+#define S_CQE_TYPE 4
+#define M_CQE_TYPE 0x1
+#define G_CQE_TYPE(x) ((((x) >> S_CQE_TYPE)) & M_CQE_TYPE)
+#define V_CQE_TYPE(x) ((x)<<S_CQE_TYPE)
+
+#define S_CQE_OPCODE 0
+#define M_CQE_OPCODE 0xF
+#define G_CQE_OPCODE(x) ((((x) >> S_CQE_OPCODE)) & M_CQE_OPCODE)
+#define V_CQE_OPCODE(x) ((x)<<S_CQE_OPCODE)
+
+#define SW_CQE(x) (G_CQE_SWCQE(be32_to_cpu((x).header)))
+#define CQE_OOO(x) (G_CQE_OOO(be32_to_cpu((x).header)))
+#define CQE_QPID(x) (G_CQE_QPID(be32_to_cpu((x).header)))
+#define CQE_GENBIT(x) (G_CQE_GENBIT(be32_to_cpu((x).header)))
+#define CQE_TYPE(x) (G_CQE_TYPE(be32_to_cpu((x).header)))
+#define SQ_TYPE(x) (CQE_TYPE((x)))
+#define RQ_TYPE(x) (!CQE_TYPE((x)))
+#define CQE_STATUS(x) (G_CQE_STATUS(be32_to_cpu((x).header)))
+#define CQE_OPCODE(x) (G_CQE_OPCODE(be32_to_cpu((x).header)))
+
+#define CQE_LEN(x) (be32_to_cpu((x).len))
+
+/* used for RQ completion processing */
+#define CQE_WRID_STAG(x) (be32_to_cpu((x).u.rcqe.stag))
+#define CQE_WRID_MSN(x) (be32_to_cpu((x).u.rcqe.msn))
+
+/* used for SQ completion processing */
+#define CQE_WRID_SQ_WPTR(x) ((x).u.scqe.wrid_hi)
+#define CQE_WRID_WPTR(x) ((x).u.scqe.wrid_low)
+
+/* generic accessor macros */
+#define CQE_WRID_HI(x) ((x).u.scqe.wrid_hi)
+#define CQE_WRID_LOW(x) ((x).u.scqe.wrid_low)
+
+#define TPT_ERR_SUCCESS 0x0
+#define TPT_ERR_STAG 0x1 /* STAG invalid: either the */
+ /* STAG is offlimt, being 0, */
+ /* or STAG_key mismatch */
+#define TPT_ERR_PDID 0x2 /* PDID mismatch */
+#define TPT_ERR_QPID 0x3 /* QPID mismatch */
+#define TPT_ERR_ACCESS 0x4 /* Invalid access right */
+#define TPT_ERR_WRAP 0x5 /* Wrap error */
+#define TPT_ERR_BOUND 0x6 /* base and bounds voilation */
+#define TPT_ERR_INVALIDATE_SHARED_MR 0x7 /* attempt to invalidate a */
+ /* shared memory region */
+#define TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND 0x8 /* attempt to invalidate a */
+ /* shared memory region */
+#define TPT_ERR_ECC 0x9 /* ECC error detected */
+#define TPT_ERR_ECC_PSTAG 0xA /* ECC error detected when */
+ /* reading PSTAG for a MW */
+ /* Invalidate */
+#define TPT_ERR_PBL_ADDR_BOUND 0xB /* pbl addr out of bounds: */
+ /* software error */
+#define TPT_ERR_SWFLUSH 0xC /* SW FLUSHED */
+#define TPT_ERR_CRC 0x10 /* CRC error */
+#define TPT_ERR_MARKER 0x11 /* Marker error */
+#define TPT_ERR_PDU_LEN_ERR 0x12 /* invalid PDU length */
+#define TPT_ERR_OUT_OF_RQE 0x13 /* out of RQE */
+#define TPT_ERR_DDP_VERSION 0x14 /* wrong DDP version */
+#define TPT_ERR_RDMA_VERSION 0x15 /* wrong RDMA version */
+#define TPT_ERR_OPCODE 0x16 /* invalid rdma opcode */
+#define TPT_ERR_DDP_QUEUE_NUM 0x17 /* invalid ddp queue number */
+#define TPT_ERR_MSN 0x18 /* MSN error */
+#define TPT_ERR_TBIT 0x19 /* tag bit not set correctly */
+#define TPT_ERR_MO 0x1A /* MO not 0 for TERMINATE */
+ /* or READ_REQ */
+#define TPT_ERR_MSN_GAP 0x1B
+#define TPT_ERR_MSN_RANGE 0x1C
+#define TPT_ERR_IRD_OVERFLOW 0x1D
+#define TPT_ERR_RQE_ADDR_BOUND 0x1E /* RQE addr out of bounds: */
+ /* software error */
+#define TPT_ERR_INTERNAL_ERR 0x1F /* internal error (opcode */
+ /* mismatch) */
+
+struct t3_swsq {
+ __u64 wr_id;
+ struct t3_cqe cqe;
+ __u32 sq_wptr;
+ __be32 read_len;
+ int opcode;
+ int complete;
+ int signaled;
+};
+
+struct t3_swrq {
+ __u64 wr_id;
+ __u32 pbl_addr;
+};
+
+/*
+ * A T3 WQ implements both the SQ and RQ.
+ */
+struct t3_wq {
+ union t3_wr *queue; /* DMA accessable memory */
+ dma_addr_t dma_addr; /* DMA address for HW */
+ DECLARE_PCI_UNMAP_ADDR(mapping) /* unmap kruft */
+ u32 error; /* 1 once we go to ERROR */
+ u32 qpid;
+ u32 wptr; /* idx to next available WR slot */
+ u32 size_log2; /* total wq size */
+ struct t3_swsq *sq; /* SW SQ */
+ struct t3_swsq *oldest_read; /* tracks oldest pending read */
+ u32 sq_wptr; /* sq_wptr - sq_rptr == count of */
+ u32 sq_rptr; /* pending wrs */
+ u32 sq_size_log2; /* sq size */
+ struct t3_swrq *rq; /* SW RQ (holds consumer wr_ids */
+ u32 rq_wptr; /* rq_wptr - rq_rptr == count of */
+ u32 rq_rptr; /* pending wrs */
+ struct t3_swrq *rq_oldest_wr; /* oldest wr on the SW RQ */
+ u32 rq_size_log2; /* rq size */
+ u32 rq_addr; /* rq adapter address */
+ void __iomem *doorbell; /* kernel db */
+ u64 udb; /* user db if any */
+ struct cxio_rdev *rdev;
+};
+
+struct t3_cq {
+ u32 cqid;
+ u32 rptr;
+ u32 wptr;
+ u32 size_log2;
+ dma_addr_t dma_addr;
+ DECLARE_PCI_UNMAP_ADDR(mapping)
+ struct t3_cqe *queue;
+ struct t3_cqe *sw_queue;
+ u32 sw_rptr;
+ u32 sw_wptr;
+};
+
+#define CQ_VLD_ENTRY(ptr,size_log2,cqe) (Q_GENBIT(ptr,size_log2) == \
+ CQE_GENBIT(*cqe))
+
+static inline void cxio_set_wq_in_error(struct t3_wq *wq)
+{
+ wq->queue->wq_in_err.err = 1;
+}
+
+static inline struct t3_cqe *cxio_next_hw_cqe(struct t3_cq *cq)
+{
+ struct t3_cqe *cqe;
+
+ cqe = cq->queue + (Q_PTR2IDX(cq->rptr, cq->size_log2));
+ if (CQ_VLD_ENTRY(cq->rptr, cq->size_log2, cqe))
+ return cqe;
+ return NULL;
+}
+
+static inline struct t3_cqe *cxio_next_sw_cqe(struct t3_cq *cq)
+{
+ struct t3_cqe *cqe;
+
+ if (!Q_EMPTY(cq->sw_rptr, cq->sw_wptr)) {
+ cqe = cq->sw_queue + (Q_PTR2IDX(cq->sw_rptr, cq->size_log2));
+ return cqe;
+ }
+ return NULL;
+}
+
+static inline struct t3_cqe *cxio_next_cqe(struct t3_cq *cq)
+{
+ struct t3_cqe *cqe;
+
+ if (!Q_EMPTY(cq->sw_rptr, cq->sw_wptr)) {
+ cqe = cq->sw_queue + (Q_PTR2IDX(cq->sw_rptr, cq->size_log2));
+ return cqe;
+ }
+ cqe = cq->queue + (Q_PTR2IDX(cq->rptr, cq->size_log2));
+ if (CQ_VLD_ENTRY(cq->rptr, cq->size_log2, cqe))
+ return cqe;
+ return NULL;
+}
+
+#endif
diff --git a/drivers/infiniband/hw/cxgb3/iwch.c b/drivers/infiniband/hw/cxgb3/iwch.c
new file mode 100644
index 0000000..4489c89
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb3/iwch.c
@@ -0,0 +1,187 @@
+/*
+ * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+
+#include <rdma/ib_verbs.h>
+
+#include "cxgb3_offload.h"
+#include "iwch_provider.h"
+#include "iwch_user.h"
+#include "iwch.h"
+#include "iwch_cm.h"
+
+#define DRV_VERSION "1.1"
+
+MODULE_AUTHOR("Boyd Faulkner, Steve Wise");
+MODULE_DESCRIPTION("Chelsio T3 RDMA Driver");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_VERSION(DRV_VERSION);
+
+cxgb3_cpl_handler_func t3c_handlers[NUM_CPL_CMDS];
+
+static void open_rnic_dev(struct t3cdev *);
+static void close_rnic_dev(struct t3cdev *);
+
+struct cxgb3_client t3c_client = {
+ .name = "iw_cxgb3",
+ .add = open_rnic_dev,
+ .remove = close_rnic_dev,
+ .handlers = t3c_handlers,
+ .redirect = iwch_ep_redirect
+};
+
+static LIST_HEAD(dev_list);
+static DEFINE_MUTEX(dev_mutex);
+
+static void rnic_init(struct iwch_dev *rnicp)
+{
+ PDBG("%s iwch_dev %p\n", __func__, rnicp);
+ idr_init(&rnicp->cqidr);
+ idr_init(&rnicp->qpidr);
+ idr_init(&rnicp->mmidr);
+ spin_lock_init(&rnicp->lock);
+
+ rnicp->attr.max_qps = T3_MAX_NUM_QP - 32;
+ rnicp->attr.max_wrs = T3_MAX_QP_DEPTH;
+ rnicp->attr.max_sge_per_wr = T3_MAX_SGE;
+ rnicp->attr.max_sge_per_rdma_write_wr = T3_MAX_SGE;
+ rnicp->attr.max_cqs = T3_MAX_NUM_CQ - 1;
+ rnicp->attr.max_cqes_per_cq = T3_MAX_CQ_DEPTH;
+ rnicp->attr.max_mem_regs = cxio_num_stags(&rnicp->rdev);
+ rnicp->attr.max_phys_buf_entries = T3_MAX_PBL_SIZE;
+ rnicp->attr.max_pds = T3_MAX_NUM_PD - 1;
+ rnicp->attr.mem_pgsizes_bitmask = T3_PAGESIZE_MASK;
+ rnicp->attr.max_mr_size = T3_MAX_MR_SIZE;
+ rnicp->attr.can_resize_wq = 0;
+ rnicp->attr.max_rdma_reads_per_qp = 8;
+ rnicp->attr.max_rdma_read_resources =
+ rnicp->attr.max_rdma_reads_per_qp * rnicp->attr.max_qps;
+ rnicp->attr.max_rdma_read_qp_depth = 8; /* IRD */
+ rnicp->attr.max_rdma_read_depth =
+ rnicp->attr.max_rdma_read_qp_depth * rnicp->attr.max_qps;
+ rnicp->attr.rq_overflow_handled = 0;
+ rnicp->attr.can_modify_ird = 0;
+ rnicp->attr.can_modify_ord = 0;
+ rnicp->attr.max_mem_windows = rnicp->attr.max_mem_regs - 1;
+ rnicp->attr.stag0_value = 1;
+ rnicp->attr.zbva_support = 1;
+ rnicp->attr.local_invalidate_fence = 1;
+ rnicp->attr.cq_overflow_detection = 1;
+ return;
+}
+
+static void open_rnic_dev(struct t3cdev *tdev)
+{
+ struct iwch_dev *rnicp;
+ static int vers_printed;
+
+ PDBG("%s t3cdev %p\n", __func__, tdev);
+ if (!vers_printed++)
+ printk(KERN_INFO MOD "Chelsio T3 RDMA Driver - version %s\n",
+ DRV_VERSION);
+ rnicp = (struct iwch_dev *)ib_alloc_device(sizeof(*rnicp));
+ if (!rnicp) {
+ printk(KERN_ERR MOD "Cannot allocate ib device\n");
+ return;
+ }
+ rnicp->rdev.ulp = rnicp;
+ rnicp->rdev.t3cdev_p = tdev;
+
+ mutex_lock(&dev_mutex);
+
+ if (cxio_rdev_open(&rnicp->rdev)) {
+ mutex_unlock(&dev_mutex);
+ printk(KERN_ERR MOD "Unable to open CXIO rdev\n");
+ ib_dealloc_device(&rnicp->ibdev);
+ return;
+ }
+
+ rnic_init(rnicp);
+
+ list_add_tail(&rnicp->entry, &dev_list);
+ mutex_unlock(&dev_mutex);
+
+ if (iwch_register_device(rnicp)) {
+ printk(KERN_ERR MOD "Unable to register device\n");
+ close_rnic_dev(tdev);
+ }
+ printk(KERN_INFO MOD "Initialized device %s\n",
+ pci_name(rnicp->rdev.rnic_info.pdev));
+ return;
+}
+
+static void close_rnic_dev(struct t3cdev *tdev)
+{
+ struct iwch_dev *dev, *tmp;
+ PDBG("%s t3cdev %p\n", __func__, tdev);
+ mutex_lock(&dev_mutex);
+ list_for_each_entry_safe(dev, tmp, &dev_list, entry) {
+ if (dev->rdev.t3cdev_p == tdev) {
+ list_del(&dev->entry);
+ iwch_unregister_device(dev);
+ cxio_rdev_close(&dev->rdev);
+ idr_destroy(&dev->cqidr);
+ idr_destroy(&dev->qpidr);
+ idr_destroy(&dev->mmidr);
+ ib_dealloc_device(&dev->ibdev);
+ break;
+ }
+ }
+ mutex_unlock(&dev_mutex);
+}
+
+static int __init iwch_init_module(void)
+{
+ int err;
+
+ err = cxio_hal_init();
+ if (err)
+ return err;
+ err = iwch_cm_init();
+ if (err)
+ return err;
+ cxio_register_ev_cb(iwch_ev_dispatch);
+ cxgb3_register_client(&t3c_client);
+ return 0;
+}
+
+static void __exit iwch_exit_module(void)
+{
+ cxgb3_unregister_client(&t3c_client);
+ cxio_unregister_ev_cb(iwch_ev_dispatch);
+ iwch_cm_term();
+ cxio_hal_exit();
+}
+
+module_init(iwch_init_module);
+module_exit(iwch_exit_module);
diff --git a/drivers/infiniband/hw/cxgb3/iwch.h b/drivers/infiniband/hw/cxgb3/iwch.h
new file mode 100644
index 0000000..3773453
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb3/iwch.h
@@ -0,0 +1,175 @@
+/*
+ * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __IWCH_H__
+#define __IWCH_H__
+
+#include <linux/mutex.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/idr.h>
+
+#include <rdma/ib_verbs.h>
+
+#include "cxio_hal.h"
+#include "cxgb3_offload.h"
+
+struct iwch_pd;
+struct iwch_cq;
+struct iwch_qp;
+struct iwch_mr;
+
+struct iwch_rnic_attributes {
+ u32 max_qps;
+ u32 max_wrs; /* Max for any SQ/RQ */
+ u32 max_sge_per_wr;
+ u32 max_sge_per_rdma_write_wr; /* for RDMA Write WR */
+ u32 max_cqs;
+ u32 max_cqes_per_cq;
+ u32 max_mem_regs;
+ u32 max_phys_buf_entries; /* for phys buf list */
+ u32 max_pds;
+
+ /*
+ * The memory page sizes supported by this RNIC.
+ * Bit position i in bitmap indicates page of
+ * size (4k)^i. Phys block list mode unsupported.
+ */
+ u32 mem_pgsizes_bitmask;
+ u64 max_mr_size;
+ u8 can_resize_wq;
+
+ /*
+ * The maximum number of RDMA Reads that can be outstanding
+ * per QP with this RNIC as the target.
+ */
+ u32 max_rdma_reads_per_qp;
+
+ /*
+ * The maximum number of resources used for RDMA Reads
+ * by this RNIC with this RNIC as the target.
+ */
+ u32 max_rdma_read_resources;
+
+ /*
+ * The max depth per QP for initiation of RDMA Read
+ * by this RNIC.
+ */
+ u32 max_rdma_read_qp_depth;
+
+ /*
+ * The maximum depth for initiation of RDMA Read
+ * operations by this RNIC on all QPs
+ */
+ u32 max_rdma_read_depth;
+ u8 rq_overflow_handled;
+ u32 can_modify_ird;
+ u32 can_modify_ord;
+ u32 max_mem_windows;
+ u32 stag0_value;
+ u8 zbva_support;
+ u8 local_invalidate_fence;
+ u32 cq_overflow_detection;
+};
+
+struct iwch_dev {
+ struct ib_device ibdev;
+ struct cxio_rdev rdev;
+ u32 device_cap_flags;
+ struct iwch_rnic_attributes attr;
+ struct idr cqidr;
+ struct idr qpidr;
+ struct idr mmidr;
+ spinlock_t lock;
+ struct list_head entry;
+};
+
+static inline struct iwch_dev *to_iwch_dev(struct ib_device *ibdev)
+{
+ return container_of(ibdev, struct iwch_dev, ibdev);
+}
+
+static inline int t3b_device(const struct iwch_dev *rhp)
+{
+ return rhp->rdev.t3cdev_p->type == T3B;
+}
+
+static inline int t3a_device(const struct iwch_dev *rhp)
+{
+ return rhp->rdev.t3cdev_p->type == T3A;
+}
+
+static inline struct iwch_cq *get_chp(struct iwch_dev *rhp, u32 cqid)
+{
+ return idr_find(&rhp->cqidr, cqid);
+}
+
+static inline struct iwch_qp *get_qhp(struct iwch_dev *rhp, u32 qpid)
+{
+ return idr_find(&rhp->qpidr, qpid);
+}
+
+static inline struct iwch_mr *get_mhp(struct iwch_dev *rhp, u32 mmid)
+{
+ return idr_find(&rhp->mmidr, mmid);
+}
+
+static inline int insert_handle(struct iwch_dev *rhp, struct idr *idr,
+ void *handle, u32 id)
+{
+ int ret;
+ int newid;
+
+ do {
+ if (!idr_pre_get(idr, GFP_KERNEL)) {
+ return -ENOMEM;
+ }
+ spin_lock_irq(&rhp->lock);
+ ret = idr_get_new_above(idr, handle, id, &newid);
+ BUG_ON(newid != id);
+ spin_unlock_irq(&rhp->lock);
+ } while (ret == -EAGAIN);
+
+ return ret;
+}
+
+static inline void remove_handle(struct iwch_dev *rhp, struct idr *idr, u32 id)
+{
+ spin_lock_irq(&rhp->lock);
+ idr_remove(idr, id);
+ spin_unlock_irq(&rhp->lock);
+}
+
+extern struct cxgb3_client t3c_client;
+extern cxgb3_cpl_handler_func t3c_handlers[NUM_CPL_CMDS];
+extern void iwch_ev_dispatch(struct cxio_rdev *rdev_p, struct sk_buff *skb);
+
+#endif
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c
new file mode 100644
index 0000000..44e936e
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c
@@ -0,0 +1,2186 @@
+/*
+ * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/workqueue.h>
+#include <linux/skbuff.h>
+#include <linux/timer.h>
+#include <linux/notifier.h>
+#include <linux/inetdevice.h>
+
+#include <net/neighbour.h>
+#include <net/netevent.h>
+#include <net/route.h>
+
+#include "tcb.h"
+#include "cxgb3_offload.h"
+#include "iwch.h"
+#include "iwch_provider.h"
+#include "iwch_cm.h"
+
+static char *states[] = {
+ "idle",
+ "listen",
+ "connecting",
+ "mpa_wait_req",
+ "mpa_req_sent",
+ "mpa_req_rcvd",
+ "mpa_rep_sent",
+ "fpdu_mode",
+ "aborting",
+ "closing",
+ "moribund",
+ "dead",
+ NULL,
+};
+
+int peer2peer = 0;
+module_param(peer2peer, int, 0644);
+MODULE_PARM_DESC(peer2peer, "Support peer2peer ULPs (default=0)");
+
+static int ep_timeout_secs = 60;
+module_param(ep_timeout_secs, int, 0644);
+MODULE_PARM_DESC(ep_timeout_secs, "CM Endpoint operation timeout "
+ "in seconds (default=60)");
+
+static int mpa_rev = 1;
+module_param(mpa_rev, int, 0644);
+MODULE_PARM_DESC(mpa_rev, "MPA Revision, 0 supports amso1100, "
+ "1 is spec compliant. (default=1)");
+
+static int markers_enabled = 0;
+module_param(markers_enabled, int, 0644);
+MODULE_PARM_DESC(markers_enabled, "Enable MPA MARKERS (default(0)=disabled)");
+
+static int crc_enabled = 1;
+module_param(crc_enabled, int, 0644);
+MODULE_PARM_DESC(crc_enabled, "Enable MPA CRC (default(1)=enabled)");
+
+static int rcv_win = 256 * 1024;
+module_param(rcv_win, int, 0644);
+MODULE_PARM_DESC(rcv_win, "TCP receive window in bytes (default=256)");
+
+static int snd_win = 32 * 1024;
+module_param(snd_win, int, 0644);
+MODULE_PARM_DESC(snd_win, "TCP send window in bytes (default=32KB)");
+
+static unsigned int nocong = 0;
+module_param(nocong, uint, 0644);
+MODULE_PARM_DESC(nocong, "Turn off congestion control (default=0)");
+
+static unsigned int cong_flavor = 1;
+module_param(cong_flavor, uint, 0644);
+MODULE_PARM_DESC(cong_flavor, "TCP Congestion control flavor (default=1)");
+
+static void process_work(struct work_struct *work);
+static struct workqueue_struct *workq;
+static DECLARE_WORK(skb_work, process_work);
+
+static struct sk_buff_head rxq;
+static cxgb3_cpl_handler_func work_handlers[NUM_CPL_CMDS];
+
+static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp);
+static void ep_timeout(unsigned long arg);
+static void connect_reply_upcall(struct iwch_ep *ep, int status);
+
+static void start_ep_timer(struct iwch_ep *ep)
+{
+ PDBG("%s ep %p\n", __func__, ep);
+ if (timer_pending(&ep->timer)) {
+ PDBG("%s stopped / restarted timer ep %p\n", __func__, ep);
+ del_timer_sync(&ep->timer);
+ } else
+ get_ep(&ep->com);
+ ep->timer.expires = jiffies + ep_timeout_secs * HZ;
+ ep->timer.data = (unsigned long)ep;
+ ep->timer.function = ep_timeout;
+ add_timer(&ep->timer);
+}
+
+static void stop_ep_timer(struct iwch_ep *ep)
+{
+ PDBG("%s ep %p\n", __func__, ep);
+ if (!timer_pending(&ep->timer)) {
+ printk(KERN_ERR "%s timer stopped when its not running! ep %p state %u\n",
+ __func__, ep, ep->com.state);
+ WARN_ON(1);
+ return;
+ }
+ del_timer_sync(&ep->timer);
+ put_ep(&ep->com);
+}
+
+static void release_tid(struct t3cdev *tdev, u32 hwtid, struct sk_buff *skb)
+{
+ struct cpl_tid_release *req;
+
+ skb = get_skb(skb, sizeof *req, GFP_KERNEL);
+ if (!skb)
+ return;
+ req = (struct cpl_tid_release *) skb_put(skb, sizeof(*req));
+ req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
+ OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_TID_RELEASE, hwtid));
+ skb->priority = CPL_PRIORITY_SETUP;
+ cxgb3_ofld_send(tdev, skb);
+ return;
+}
+
+int iwch_quiesce_tid(struct iwch_ep *ep)
+{
+ struct cpl_set_tcb_field *req;
+ struct sk_buff *skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
+
+ if (!skb)
+ return -ENOMEM;
+ req = (struct cpl_set_tcb_field *) skb_put(skb, sizeof(*req));
+ req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
+ req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
+ OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, ep->hwtid));
+ req->reply = 0;
+ req->cpu_idx = 0;
+ req->word = htons(W_TCB_RX_QUIESCE);
+ req->mask = cpu_to_be64(1ULL << S_TCB_RX_QUIESCE);
+ req->val = cpu_to_be64(1 << S_TCB_RX_QUIESCE);
+
+ skb->priority = CPL_PRIORITY_DATA;
+ cxgb3_ofld_send(ep->com.tdev, skb);
+ return 0;
+}
+
+int iwch_resume_tid(struct iwch_ep *ep)
+{
+ struct cpl_set_tcb_field *req;
+ struct sk_buff *skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
+
+ if (!skb)
+ return -ENOMEM;
+ req = (struct cpl_set_tcb_field *) skb_put(skb, sizeof(*req));
+ req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
+ req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
+ OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, ep->hwtid));
+ req->reply = 0;
+ req->cpu_idx = 0;
+ req->word = htons(W_TCB_RX_QUIESCE);
+ req->mask = cpu_to_be64(1ULL << S_TCB_RX_QUIESCE);
+ req->val = 0;
+
+ skb->priority = CPL_PRIORITY_DATA;
+ cxgb3_ofld_send(ep->com.tdev, skb);
+ return 0;
+}
+
+static void set_emss(struct iwch_ep *ep, u16 opt)
+{
+ PDBG("%s ep %p opt %u\n", __func__, ep, opt);
+ ep->emss = T3C_DATA(ep->com.tdev)->mtus[G_TCPOPT_MSS(opt)] - 40;
+ if (G_TCPOPT_TSTAMP(opt))
+ ep->emss -= 12;
+ if (ep->emss < 128)
+ ep->emss = 128;
+ PDBG("emss=%d\n", ep->emss);
+}
+
+static enum iwch_ep_state state_read(struct iwch_ep_common *epc)
+{
+ unsigned long flags;
+ enum iwch_ep_state state;
+
+ spin_lock_irqsave(&epc->lock, flags);
+ state = epc->state;
+ spin_unlock_irqrestore(&epc->lock, flags);
+ return state;
+}
+
+static void __state_set(struct iwch_ep_common *epc, enum iwch_ep_state new)
+{
+ epc->state = new;
+}
+
+static void state_set(struct iwch_ep_common *epc, enum iwch_ep_state new)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&epc->lock, flags);
+ PDBG("%s - %s -> %s\n", __func__, states[epc->state], states[new]);
+ __state_set(epc, new);
+ spin_unlock_irqrestore(&epc->lock, flags);
+ return;
+}
+
+static void *alloc_ep(int size, gfp_t gfp)
+{
+ struct iwch_ep_common *epc;
+
+ epc = kzalloc(size, gfp);
+ if (epc) {
+ kref_init(&epc->kref);
+ spin_lock_init(&epc->lock);
+ init_waitqueue_head(&epc->waitq);
+ }
+ PDBG("%s alloc ep %p\n", __func__, epc);
+ return epc;
+}
+
+void __free_ep(struct kref *kref)
+{
+ struct iwch_ep_common *epc;
+ epc = container_of(kref, struct iwch_ep_common, kref);
+ PDBG("%s ep %p state %s\n", __func__, epc, states[state_read(epc)]);
+ kfree(epc);
+}
+
+static void release_ep_resources(struct iwch_ep *ep)
+{
+ PDBG("%s ep %p tid %d\n", __func__, ep, ep->hwtid);
+ cxgb3_remove_tid(ep->com.tdev, (void *)ep, ep->hwtid);
+ dst_release(ep->dst);
+ l2t_release(L2DATA(ep->com.tdev), ep->l2t);
+ put_ep(&ep->com);
+}
+
+static void process_work(struct work_struct *work)
+{
+ struct sk_buff *skb = NULL;
+ void *ep;
+ struct t3cdev *tdev;
+ int ret;
+
+ while ((skb = skb_dequeue(&rxq))) {
+ ep = *((void **) (skb->cb));
+ tdev = *((struct t3cdev **) (skb->cb + sizeof(void *)));
+ ret = work_handlers[G_OPCODE(ntohl((__force __be32)skb->csum))](tdev, skb, ep);
+ if (ret & CPL_RET_BUF_DONE)
+ kfree_skb(skb);
+
+ /*
+ * ep was referenced in sched(), and is freed here.
+ */
+ put_ep((struct iwch_ep_common *)ep);
+ }
+}
+
+static int status2errno(int status)
+{
+ switch (status) {
+ case CPL_ERR_NONE:
+ return 0;
+ case CPL_ERR_CONN_RESET:
+ return -ECONNRESET;
+ case CPL_ERR_ARP_MISS:
+ return -EHOSTUNREACH;
+ case CPL_ERR_CONN_TIMEDOUT:
+ return -ETIMEDOUT;
+ case CPL_ERR_TCAM_FULL:
+ return -ENOMEM;
+ case CPL_ERR_CONN_EXIST:
+ return -EADDRINUSE;
+ default:
+ return -EIO;
+ }
+}
+
+/*
+ * Try and reuse skbs already allocated...
+ */
+static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp)
+{
+ if (skb && !skb_is_nonlinear(skb) && !skb_cloned(skb)) {
+ skb_trim(skb, 0);
+ skb_get(skb);
+ } else {
+ skb = alloc_skb(len, gfp);
+ }
+ return skb;
+}
+
+static struct rtable *find_route(struct t3cdev *dev, __be32 local_ip,
+ __be32 peer_ip, __be16 local_port,
+ __be16 peer_port, u8 tos)
+{
+ struct rtable *rt;
+ struct flowi fl = {
+ .oif = 0,
+ .nl_u = {
+ .ip4_u = {
+ .daddr = peer_ip,
+ .saddr = local_ip,
+ .tos = tos}
+ },
+ .proto = IPPROTO_TCP,
+ .uli_u = {
+ .ports = {
+ .sport = local_port,
+ .dport = peer_port}
+ }
+ };
+
+ if (ip_route_output_flow(&init_net, &rt, &fl, NULL, 0))
+ return NULL;
+ return rt;
+}
+
+static unsigned int find_best_mtu(const struct t3c_data *d, unsigned short mtu)
+{
+ int i = 0;
+
+ while (i < d->nmtus - 1 && d->mtus[i + 1] <= mtu)
+ ++i;
+ return i;
+}
+
+static void arp_failure_discard(struct t3cdev *dev, struct sk_buff *skb)
+{
+ PDBG("%s t3cdev %p\n", __func__, dev);
+ kfree_skb(skb);
+}
+
+/*
+ * Handle an ARP failure for an active open.
+ */
+static void act_open_req_arp_failure(struct t3cdev *dev, struct sk_buff *skb)
+{
+ printk(KERN_ERR MOD "ARP failure duing connect\n");
+ kfree_skb(skb);
+}
+
+/*
+ * Handle an ARP failure for a CPL_ABORT_REQ. Change it into a no RST variant
+ * and send it along.
+ */
+static void abort_arp_failure(struct t3cdev *dev, struct sk_buff *skb)
+{
+ struct cpl_abort_req *req = cplhdr(skb);
+
+ PDBG("%s t3cdev %p\n", __func__, dev);
+ req->cmd = CPL_ABORT_NO_RST;
+ cxgb3_ofld_send(dev, skb);
+}
+
+static int send_halfclose(struct iwch_ep *ep, gfp_t gfp)
+{
+ struct cpl_close_con_req *req;
+ struct sk_buff *skb;
+
+ PDBG("%s ep %p\n", __func__, ep);
+ skb = get_skb(NULL, sizeof(*req), gfp);
+ if (!skb) {
+ printk(KERN_ERR MOD "%s - failed to alloc skb\n", __func__);
+ return -ENOMEM;
+ }
+ skb->priority = CPL_PRIORITY_DATA;
+ set_arp_failure_handler(skb, arp_failure_discard);
+ req = (struct cpl_close_con_req *) skb_put(skb, sizeof(*req));
+ req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_CLOSE_CON));
+ req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
+ OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, ep->hwtid));
+ l2t_send(ep->com.tdev, skb, ep->l2t);
+ return 0;
+}
+
+static int send_abort(struct iwch_ep *ep, struct sk_buff *skb, gfp_t gfp)
+{
+ struct cpl_abort_req *req;
+
+ PDBG("%s ep %p\n", __func__, ep);
+ skb = get_skb(skb, sizeof(*req), gfp);
+ if (!skb) {
+ printk(KERN_ERR MOD "%s - failed to alloc skb.\n",
+ __func__);
+ return -ENOMEM;
+ }
+ skb->priority = CPL_PRIORITY_DATA;
+ set_arp_failure_handler(skb, abort_arp_failure);
+ req = (struct cpl_abort_req *) skb_put(skb, sizeof(*req));
+ req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_REQ));
+ req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
+ OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ABORT_REQ, ep->hwtid));
+ req->cmd = CPL_ABORT_SEND_RST;
+ l2t_send(ep->com.tdev, skb, ep->l2t);
+ return 0;
+}
+
+static int send_connect(struct iwch_ep *ep)
+{
+ struct cpl_act_open_req *req;
+ struct sk_buff *skb;
+ u32 opt0h, opt0l, opt2;
+ unsigned int mtu_idx;
+ int wscale;
+
+ PDBG("%s ep %p\n", __func__, ep);
+
+ skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
+ if (!skb) {
+ printk(KERN_ERR MOD "%s - failed to alloc skb.\n",
+ __func__);
+ return -ENOMEM;
+ }
+ mtu_idx = find_best_mtu(T3C_DATA(ep->com.tdev), dst_mtu(ep->dst));
+ wscale = compute_wscale(rcv_win);
+ opt0h = V_NAGLE(0) |
+ V_NO_CONG(nocong) |
+ V_KEEP_ALIVE(1) |
+ F_TCAM_BYPASS |
+ V_WND_SCALE(wscale) |
+ V_MSS_IDX(mtu_idx) |
+ V_L2T_IDX(ep->l2t->idx) | V_TX_CHANNEL(ep->l2t->smt_idx);
+ opt0l = V_TOS((ep->tos >> 2) & M_TOS) | V_RCV_BUFSIZ(rcv_win>>10);
+ opt2 = V_FLAVORS_VALID(1) | V_CONG_CONTROL_FLAVOR(cong_flavor);
+ skb->priority = CPL_PRIORITY_SETUP;
+ set_arp_failure_handler(skb, act_open_req_arp_failure);
+
+ req = (struct cpl_act_open_req *) skb_put(skb, sizeof(*req));
+ req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
+ OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, ep->atid));
+ req->local_port = ep->com.local_addr.sin_port;
+ req->peer_port = ep->com.remote_addr.sin_port;
+ req->local_ip = ep->com.local_addr.sin_addr.s_addr;
+ req->peer_ip = ep->com.remote_addr.sin_addr.s_addr;
+ req->opt0h = htonl(opt0h);
+ req->opt0l = htonl(opt0l);
+ req->params = 0;
+ req->opt2 = htonl(opt2);
+ l2t_send(ep->com.tdev, skb, ep->l2t);
+ return 0;
+}
+
+static void send_mpa_req(struct iwch_ep *ep, struct sk_buff *skb)
+{
+ int mpalen;
+ struct tx_data_wr *req;
+ struct mpa_message *mpa;
+ int len;
+
+ PDBG("%s ep %p pd_len %d\n", __func__, ep, ep->plen);
+
+ BUG_ON(skb_cloned(skb));
+
+ mpalen = sizeof(*mpa) + ep->plen;
+ if (skb->data + mpalen + sizeof(*req) > skb_end_pointer(skb)) {
+ kfree_skb(skb);
+ skb=alloc_skb(mpalen + sizeof(*req), GFP_KERNEL);
+ if (!skb) {
+ connect_reply_upcall(ep, -ENOMEM);
+ return;
+ }
+ }
+ skb_trim(skb, 0);
+ skb_reserve(skb, sizeof(*req));
+ skb_put(skb, mpalen);
+ skb->priority = CPL_PRIORITY_DATA;
+ mpa = (struct mpa_message *) skb->data;
+ memset(mpa, 0, sizeof(*mpa));
+ memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key));
+ mpa->flags = (crc_enabled ? MPA_CRC : 0) |
+ (markers_enabled ? MPA_MARKERS : 0);
+ mpa->private_data_size = htons(ep->plen);
+ mpa->revision = mpa_rev;
+
+ if (ep->plen)
+ memcpy(mpa->private_data, ep->mpa_pkt + sizeof(*mpa), ep->plen);
+
+ /*
+ * Reference the mpa skb. This ensures the data area
+ * will remain in memory until the hw acks the tx.
+ * Function tx_ack() will deref it.
+ */
+ skb_get(skb);
+ set_arp_failure_handler(skb, arp_failure_discard);
+ skb_reset_transport_header(skb);
+ len = skb->len;
+ req = (struct tx_data_wr *) skb_push(skb, sizeof(*req));
+ req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)|F_WR_COMPL);
+ req->wr_lo = htonl(V_WR_TID(ep->hwtid));
+ req->len = htonl(len);
+ req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) |
+ V_TX_SNDBUF(snd_win>>15));
+ req->flags = htonl(F_TX_INIT);
+ req->sndseq = htonl(ep->snd_seq);
+ BUG_ON(ep->mpa_skb);
+ ep->mpa_skb = skb;
+ l2t_send(ep->com.tdev, skb, ep->l2t);
+ start_ep_timer(ep);
+ state_set(&ep->com, MPA_REQ_SENT);
+ return;
+}
+
+static int send_mpa_reject(struct iwch_ep *ep, const void *pdata, u8 plen)
+{
+ int mpalen;
+ struct tx_data_wr *req;
+ struct mpa_message *mpa;
+ struct sk_buff *skb;
+
+ PDBG("%s ep %p plen %d\n", __func__, ep, plen);
+
+ mpalen = sizeof(*mpa) + plen;
+
+ skb = get_skb(NULL, mpalen + sizeof(*req), GFP_KERNEL);
+ if (!skb) {
+ printk(KERN_ERR MOD "%s - cannot alloc skb!\n", __func__);
+ return -ENOMEM;
+ }
+ skb_reserve(skb, sizeof(*req));
+ mpa = (struct mpa_message *) skb_put(skb, mpalen);
+ memset(mpa, 0, sizeof(*mpa));
+ memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
+ mpa->flags = MPA_REJECT;
+ mpa->revision = mpa_rev;
+ mpa->private_data_size = htons(plen);
+ if (plen)
+ memcpy(mpa->private_data, pdata, plen);
+
+ /*
+ * Reference the mpa skb again. This ensures the data area
+ * will remain in memory until the hw acks the tx.
+ * Function tx_ack() will deref it.
+ */
+ skb_get(skb);
+ skb->priority = CPL_PRIORITY_DATA;
+ set_arp_failure_handler(skb, arp_failure_discard);
+ skb_reset_transport_header(skb);
+ req = (struct tx_data_wr *) skb_push(skb, sizeof(*req));
+ req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)|F_WR_COMPL);
+ req->wr_lo = htonl(V_WR_TID(ep->hwtid));
+ req->len = htonl(mpalen);
+ req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) |
+ V_TX_SNDBUF(snd_win>>15));
+ req->flags = htonl(F_TX_INIT);
+ req->sndseq = htonl(ep->snd_seq);
+ BUG_ON(ep->mpa_skb);
+ ep->mpa_skb = skb;
+ l2t_send(ep->com.tdev, skb, ep->l2t);
+ return 0;
+}
+
+static int send_mpa_reply(struct iwch_ep *ep, const void *pdata, u8 plen)
+{
+ int mpalen;
+ struct tx_data_wr *req;
+ struct mpa_message *mpa;
+ int len;
+ struct sk_buff *skb;
+
+ PDBG("%s ep %p plen %d\n", __func__, ep, plen);
+
+ mpalen = sizeof(*mpa) + plen;
+
+ skb = get_skb(NULL, mpalen + sizeof(*req), GFP_KERNEL);
+ if (!skb) {
+ printk(KERN_ERR MOD "%s - cannot alloc skb!\n", __func__);
+ return -ENOMEM;
+ }
+ skb->priority = CPL_PRIORITY_DATA;
+ skb_reserve(skb, sizeof(*req));
+ mpa = (struct mpa_message *) skb_put(skb, mpalen);
+ memset(mpa, 0, sizeof(*mpa));
+ memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
+ mpa->flags = (ep->mpa_attr.crc_enabled ? MPA_CRC : 0) |
+ (markers_enabled ? MPA_MARKERS : 0);
+ mpa->revision = mpa_rev;
+ mpa->private_data_size = htons(plen);
+ if (plen)
+ memcpy(mpa->private_data, pdata, plen);
+
+ /*
+ * Reference the mpa skb. This ensures the data area
+ * will remain in memory until the hw acks the tx.
+ * Function tx_ack() will deref it.
+ */
+ skb_get(skb);
+ set_arp_failure_handler(skb, arp_failure_discard);
+ skb_reset_transport_header(skb);
+ len = skb->len;
+ req = (struct tx_data_wr *) skb_push(skb, sizeof(*req));
+ req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)|F_WR_COMPL);
+ req->wr_lo = htonl(V_WR_TID(ep->hwtid));
+ req->len = htonl(len);
+ req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) |
+ V_TX_SNDBUF(snd_win>>15));
+ req->flags = htonl(F_TX_INIT);
+ req->sndseq = htonl(ep->snd_seq);
+ ep->mpa_skb = skb;
+ state_set(&ep->com, MPA_REP_SENT);
+ l2t_send(ep->com.tdev, skb, ep->l2t);
+ return 0;
+}
+
+static int act_establish(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
+{
+ struct iwch_ep *ep = ctx;
+ struct cpl_act_establish *req = cplhdr(skb);
+ unsigned int tid = GET_TID(req);
+
+ PDBG("%s ep %p tid %d\n", __func__, ep, tid);
+
+ dst_confirm(ep->dst);
+
+ /* setup the hwtid for this connection */
+ ep->hwtid = tid;
+ cxgb3_insert_tid(ep->com.tdev, &t3c_client, ep, tid);
+
+ ep->snd_seq = ntohl(req->snd_isn);
+ ep->rcv_seq = ntohl(req->rcv_isn);
+
+ set_emss(ep, ntohs(req->tcp_opt));
+
+ /* dealloc the atid */
+ cxgb3_free_atid(ep->com.tdev, ep->atid);
+
+ /* start MPA negotiation */
+ send_mpa_req(ep, skb);
+
+ return 0;
+}
+
+static void abort_connection(struct iwch_ep *ep, struct sk_buff *skb, gfp_t gfp)
+{
+ PDBG("%s ep %p\n", __FILE__, ep);
+ state_set(&ep->com, ABORTING);
+ send_abort(ep, skb, gfp);
+}
+
+static void close_complete_upcall(struct iwch_ep *ep)
+{
+ struct iw_cm_event event;
+
+ PDBG("%s ep %p\n", __func__, ep);
+ memset(&event, 0, sizeof(event));
+ event.event = IW_CM_EVENT_CLOSE;
+ if (ep->com.cm_id) {
+ PDBG("close complete delivered ep %p cm_id %p tid %d\n",
+ ep, ep->com.cm_id, ep->hwtid);
+ ep->com.cm_id->event_handler(ep->com.cm_id, &event);
+ ep->com.cm_id->rem_ref(ep->com.cm_id);
+ ep->com.cm_id = NULL;
+ ep->com.qp = NULL;
+ }
+}
+
+static void peer_close_upcall(struct iwch_ep *ep)
+{
+ struct iw_cm_event event;
+
+ PDBG("%s ep %p\n", __func__, ep);
+ memset(&event, 0, sizeof(event));
+ event.event = IW_CM_EVENT_DISCONNECT;
+ if (ep->com.cm_id) {
+ PDBG("peer close delivered ep %p cm_id %p tid %d\n",
+ ep, ep->com.cm_id, ep->hwtid);
+ ep->com.cm_id->event_handler(ep->com.cm_id, &event);
+ }
+}
+
+static void peer_abort_upcall(struct iwch_ep *ep)
+{
+ struct iw_cm_event event;
+
+ PDBG("%s ep %p\n", __func__, ep);
+ memset(&event, 0, sizeof(event));
+ event.event = IW_CM_EVENT_CLOSE;
+ event.status = -ECONNRESET;
+ if (ep->com.cm_id) {
+ PDBG("abort delivered ep %p cm_id %p tid %d\n", ep,
+ ep->com.cm_id, ep->hwtid);
+ ep->com.cm_id->event_handler(ep->com.cm_id, &event);
+ ep->com.cm_id->rem_ref(ep->com.cm_id);
+ ep->com.cm_id = NULL;
+ ep->com.qp = NULL;
+ }
+}
+
+static void connect_reply_upcall(struct iwch_ep *ep, int status)
+{
+ struct iw_cm_event event;
+
+ PDBG("%s ep %p status %d\n", __func__, ep, status);
+ memset(&event, 0, sizeof(event));
+ event.event = IW_CM_EVENT_CONNECT_REPLY;
+ event.status = status;
+ event.local_addr = ep->com.local_addr;
+ event.remote_addr = ep->com.remote_addr;
+
+ if ((status == 0) || (status == -ECONNREFUSED)) {
+ event.private_data_len = ep->plen;
+ event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
+ }
+ if (ep->com.cm_id) {
+ PDBG("%s ep %p tid %d status %d\n", __func__, ep,
+ ep->hwtid, status);
+ ep->com.cm_id->event_handler(ep->com.cm_id, &event);
+ }
+ if (status < 0) {
+ ep->com.cm_id->rem_ref(ep->com.cm_id);
+ ep->com.cm_id = NULL;
+ ep->com.qp = NULL;
+ }
+}
+
+static void connect_request_upcall(struct iwch_ep *ep)
+{
+ struct iw_cm_event event;
+
+ PDBG("%s ep %p tid %d\n", __func__, ep, ep->hwtid);
+ memset(&event, 0, sizeof(event));
+ event.event = IW_CM_EVENT_CONNECT_REQUEST;
+ event.local_addr = ep->com.local_addr;
+ event.remote_addr = ep->com.remote_addr;
+ event.private_data_len = ep->plen;
+ event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
+ event.provider_data = ep;
+ if (state_read(&ep->parent_ep->com) != DEAD)
+ ep->parent_ep->com.cm_id->event_handler(
+ ep->parent_ep->com.cm_id,
+ &event);
+ put_ep(&ep->parent_ep->com);
+ ep->parent_ep = NULL;
+}
+
+static void established_upcall(struct iwch_ep *ep)
+{
+ struct iw_cm_event event;
+
+ PDBG("%s ep %p\n", __func__, ep);
+ memset(&event, 0, sizeof(event));
+ event.event = IW_CM_EVENT_ESTABLISHED;
+ if (ep->com.cm_id) {
+ PDBG("%s ep %p tid %d\n", __func__, ep, ep->hwtid);
+ ep->com.cm_id->event_handler(ep->com.cm_id, &event);
+ }
+}
+
+static int update_rx_credits(struct iwch_ep *ep, u32 credits)
+{
+ struct cpl_rx_data_ack *req;
+ struct sk_buff *skb;
+
+ PDBG("%s ep %p credits %u\n", __func__, ep, credits);
+ skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
+ if (!skb) {
+ printk(KERN_ERR MOD "update_rx_credits - cannot alloc skb!\n");
+ return 0;
+ }
+
+ req = (struct cpl_rx_data_ack *) skb_put(skb, sizeof(*req));
+ req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
+ OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, ep->hwtid));
+ req->credit_dack = htonl(V_RX_CREDITS(credits) | V_RX_FORCE_ACK(1));
+ skb->priority = CPL_PRIORITY_ACK;
+ cxgb3_ofld_send(ep->com.tdev, skb);
+ return credits;
+}
+
+static void process_mpa_reply(struct iwch_ep *ep, struct sk_buff *skb)
+{
+ struct mpa_message *mpa;
+ u16 plen;
+ struct iwch_qp_attributes attrs;
+ enum iwch_qp_attr_mask mask;
+ int err;
+
+ PDBG("%s ep %p\n", __func__, ep);
+
+ /*
+ * Stop mpa timer. If it expired, then the state has
+ * changed and we bail since ep_timeout already aborted
+ * the connection.
+ */
+ stop_ep_timer(ep);
+ if (state_read(&ep->com) != MPA_REQ_SENT)
+ return;
+
+ /*
+ * If we get more than the supported amount of private data
+ * then we must fail this connection.
+ */
+ if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) {
+ err = -EINVAL;
+ goto err;
+ }
+
+ /*
+ * copy the new data into our accumulation buffer.
+ */
+ skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]),
+ skb->len);
+ ep->mpa_pkt_len += skb->len;
+
+ /*
+ * if we don't even have the mpa message, then bail.
+ */
+ if (ep->mpa_pkt_len < sizeof(*mpa))
+ return;
+ mpa = (struct mpa_message *) ep->mpa_pkt;
+
+ /* Validate MPA header. */
+ if (mpa->revision != mpa_rev) {
+ err = -EPROTO;
+ goto err;
+ }
+ if (memcmp(mpa->key, MPA_KEY_REP, sizeof(mpa->key))) {
+ err = -EPROTO;
+ goto err;
+ }
+
+ plen = ntohs(mpa->private_data_size);
+
+ /*
+ * Fail if there's too much private data.
+ */
+ if (plen > MPA_MAX_PRIVATE_DATA) {
+ err = -EPROTO;
+ goto err;
+ }
+
+ /*
+ * If plen does not account for pkt size
+ */
+ if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) {
+ err = -EPROTO;
+ goto err;
+ }
+
+ ep->plen = (u8) plen;
+
+ /*
+ * If we don't have all the pdata yet, then bail.
+ * We'll continue process when more data arrives.
+ */
+ if (ep->mpa_pkt_len < (sizeof(*mpa) + plen))
+ return;
+
+ if (mpa->flags & MPA_REJECT) {
+ err = -ECONNREFUSED;
+ goto err;
+ }
+
+ /*
+ * If we get here we have accumulated the entire mpa
+ * start reply message including private data. And
+ * the MPA header is valid.
+ */
+ state_set(&ep->com, FPDU_MODE);
+ ep->mpa_attr.initiator = 1;
+ ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
+ ep->mpa_attr.recv_marker_enabled = markers_enabled;
+ ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
+ ep->mpa_attr.version = mpa_rev;
+ PDBG("%s - crc_enabled=%d, recv_marker_enabled=%d, "
+ "xmit_marker_enabled=%d, version=%d\n", __func__,
+ ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
+ ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version);
+
+ attrs.mpa_attr = ep->mpa_attr;
+ attrs.max_ird = ep->ird;
+ attrs.max_ord = ep->ord;
+ attrs.llp_stream_handle = ep;
+ attrs.next_state = IWCH_QP_STATE_RTS;
+
+ mask = IWCH_QP_ATTR_NEXT_STATE |
+ IWCH_QP_ATTR_LLP_STREAM_HANDLE | IWCH_QP_ATTR_MPA_ATTR |
+ IWCH_QP_ATTR_MAX_IRD | IWCH_QP_ATTR_MAX_ORD;
+
+ /* bind QP and TID with INIT_WR */
+ err = iwch_modify_qp(ep->com.qp->rhp,
+ ep->com.qp, mask, &attrs, 1);
+ if (err)
+ goto err;
+
+ if (peer2peer && iwch_rqes_posted(ep->com.qp) == 0) {
+ iwch_post_zb_read(ep->com.qp);
+ }
+
+ goto out;
+err:
+ abort_connection(ep, skb, GFP_KERNEL);
+out:
+ connect_reply_upcall(ep, err);
+ return;
+}
+
+static void process_mpa_request(struct iwch_ep *ep, struct sk_buff *skb)
+{
+ struct mpa_message *mpa;
+ u16 plen;
+
+ PDBG("%s ep %p\n", __func__, ep);
+
+ /*
+ * Stop mpa timer. If it expired, then the state has
+ * changed and we bail since ep_timeout already aborted
+ * the connection.
+ */
+ stop_ep_timer(ep);
+ if (state_read(&ep->com) != MPA_REQ_WAIT)
+ return;
+
+ /*
+ * If we get more than the supported amount of private data
+ * then we must fail this connection.
+ */
+ if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) {
+ abort_connection(ep, skb, GFP_KERNEL);
+ return;
+ }
+
+ PDBG("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__);
+
+ /*
+ * Copy the new data into our accumulation buffer.
+ */
+ skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]),
+ skb->len);
+ ep->mpa_pkt_len += skb->len;
+
+ /*
+ * If we don't even have the mpa message, then bail.
+ * We'll continue process when more data arrives.
+ */
+ if (ep->mpa_pkt_len < sizeof(*mpa))
+ return;
+ PDBG("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__);
+ mpa = (struct mpa_message *) ep->mpa_pkt;
+
+ /*
+ * Validate MPA Header.
+ */
+ if (mpa->revision != mpa_rev) {
+ abort_connection(ep, skb, GFP_KERNEL);
+ return;
+ }
+
+ if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key))) {
+ abort_connection(ep, skb, GFP_KERNEL);
+ return;
+ }
+
+ plen = ntohs(mpa->private_data_size);
+
+ /*
+ * Fail if there's too much private data.
+ */
+ if (plen > MPA_MAX_PRIVATE_DATA) {
+ abort_connection(ep, skb, GFP_KERNEL);
+ return;
+ }
+
+ /*
+ * If plen does not account for pkt size
+ */
+ if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) {
+ abort_connection(ep, skb, GFP_KERNEL);
+ return;
+ }
+ ep->plen = (u8) plen;
+
+ /*
+ * If we don't have all the pdata yet, then bail.
+ */
+ if (ep->mpa_pkt_len < (sizeof(*mpa) + plen))
+ return;
+
+ /*
+ * If we get here we have accumulated the entire mpa
+ * start reply message including private data.
+ */
+ ep->mpa_attr.initiator = 0;
+ ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
+ ep->mpa_attr.recv_marker_enabled = markers_enabled;
+ ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
+ ep->mpa_attr.version = mpa_rev;
+ PDBG("%s - crc_enabled=%d, recv_marker_enabled=%d, "
+ "xmit_marker_enabled=%d, version=%d\n", __func__,
+ ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
+ ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version);
+
+ state_set(&ep->com, MPA_REQ_RCVD);
+
+ /* drive upcall */
+ connect_request_upcall(ep);
+ return;
+}
+
+static int rx_data(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
+{
+ struct iwch_ep *ep = ctx;
+ struct cpl_rx_data *hdr = cplhdr(skb);
+ unsigned int dlen = ntohs(hdr->len);
+
+ PDBG("%s ep %p dlen %u\n", __func__, ep, dlen);
+
+ skb_pull(skb, sizeof(*hdr));
+ skb_trim(skb, dlen);
+
+ ep->rcv_seq += dlen;
+ BUG_ON(ep->rcv_seq != (ntohl(hdr->seq) + dlen));
+
+ switch (state_read(&ep->com)) {
+ case MPA_REQ_SENT:
+ process_mpa_reply(ep, skb);
+ break;
+ case MPA_REQ_WAIT:
+ process_mpa_request(ep, skb);
+ break;
+ case MPA_REP_SENT:
+ break;
+ default:
+ printk(KERN_ERR MOD "%s Unexpected streaming data."
+ " ep %p state %d tid %d\n",
+ __func__, ep, state_read(&ep->com), ep->hwtid);
+
+ /*
+ * The ep will timeout and inform the ULP of the failure.
+ * See ep_timeout().
+ */
+ break;
+ }
+
+ /* update RX credits */
+ update_rx_credits(ep, dlen);
+
+ return CPL_RET_BUF_DONE;
+}
+
+/*
+ * Upcall from the adapter indicating data has been transmitted.
+ * For us its just the single MPA request or reply. We can now free
+ * the skb holding the mpa message.
+ */
+static int tx_ack(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
+{
+ struct iwch_ep *ep = ctx;
+ struct cpl_wr_ack *hdr = cplhdr(skb);
+ unsigned int credits = ntohs(hdr->credits);
+
+ PDBG("%s ep %p credits %u\n", __func__, ep, credits);
+
+ if (credits == 0) {
+ PDBG(KERN_ERR "%s 0 credit ack ep %p state %u\n",
+ __func__, ep, state_read(&ep->com));
+ return CPL_RET_BUF_DONE;
+ }
+
+ BUG_ON(credits != 1);
+ dst_confirm(ep->dst);
+ if (!ep->mpa_skb) {
+ PDBG("%s rdma_init wr_ack ep %p state %u\n",
+ __func__, ep, state_read(&ep->com));
+ if (ep->mpa_attr.initiator) {
+ PDBG("%s initiator ep %p state %u\n",
+ __func__, ep, state_read(&ep->com));
+ if (peer2peer)
+ iwch_post_zb_read(ep->com.qp);
+ } else {
+ PDBG("%s responder ep %p state %u\n",
+ __func__, ep, state_read(&ep->com));
+ ep->com.rpl_done = 1;
+ wake_up(&ep->com.waitq);
+ }
+ } else {
+ PDBG("%s lsm ack ep %p state %u freeing skb\n",
+ __func__, ep, state_read(&ep->com));
+ kfree_skb(ep->mpa_skb);
+ ep->mpa_skb = NULL;
+ }
+ return CPL_RET_BUF_DONE;
+}
+
+static int abort_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
+{
+ struct iwch_ep *ep = ctx;
+ unsigned long flags;
+ int release = 0;
+
+ PDBG("%s ep %p\n", __func__, ep);
+ BUG_ON(!ep);
+
+ /*
+ * We get 2 abort replies from the HW. The first one must
+ * be ignored except for scribbling that we need one more.
+ */
+ if (!(ep->flags & ABORT_REQ_IN_PROGRESS)) {
+ ep->flags |= ABORT_REQ_IN_PROGRESS;
+ return CPL_RET_BUF_DONE;
+ }
+
+ spin_lock_irqsave(&ep->com.lock, flags);
+ switch (ep->com.state) {
+ case ABORTING:
+ close_complete_upcall(ep);
+ __state_set(&ep->com, DEAD);
+ release = 1;
+ break;
+ default:
+ printk(KERN_ERR "%s ep %p state %d\n",
+ __func__, ep, ep->com.state);
+ break;
+ }
+ spin_unlock_irqrestore(&ep->com.lock, flags);
+
+ if (release)
+ release_ep_resources(ep);
+ return CPL_RET_BUF_DONE;
+}
+
+/*
+ * Return whether a failed active open has allocated a TID
+ */
+static inline int act_open_has_tid(int status)
+{
+ return status != CPL_ERR_TCAM_FULL && status != CPL_ERR_CONN_EXIST &&
+ status != CPL_ERR_ARP_MISS;
+}
+
+static int act_open_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
+{
+ struct iwch_ep *ep = ctx;
+ struct cpl_act_open_rpl *rpl = cplhdr(skb);
+
+ PDBG("%s ep %p status %u errno %d\n", __func__, ep, rpl->status,
+ status2errno(rpl->status));
+ connect_reply_upcall(ep, status2errno(rpl->status));
+ state_set(&ep->com, DEAD);
+ if (ep->com.tdev->type != T3A && act_open_has_tid(rpl->status))
+ release_tid(ep->com.tdev, GET_TID(rpl), NULL);
+ cxgb3_free_atid(ep->com.tdev, ep->atid);
+ dst_release(ep->dst);
+ l2t_release(L2DATA(ep->com.tdev), ep->l2t);
+ put_ep(&ep->com);
+ return CPL_RET_BUF_DONE;
+}
+
+static int listen_start(struct iwch_listen_ep *ep)
+{
+ struct sk_buff *skb;
+ struct cpl_pass_open_req *req;
+
+ PDBG("%s ep %p\n", __func__, ep);
+ skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
+ if (!skb) {
+ printk(KERN_ERR MOD "t3c_listen_start failed to alloc skb!\n");
+ return -ENOMEM;
+ }
+
+ req = (struct cpl_pass_open_req *) skb_put(skb, sizeof(*req));
+ req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
+ OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, ep->stid));
+ req->local_port = ep->com.local_addr.sin_port;
+ req->local_ip = ep->com.local_addr.sin_addr.s_addr;
+ req->peer_port = 0;
+ req->peer_ip = 0;
+ req->peer_netmask = 0;
+ req->opt0h = htonl(F_DELACK | F_TCAM_BYPASS);
+ req->opt0l = htonl(V_RCV_BUFSIZ(rcv_win>>10));
+ req->opt1 = htonl(V_CONN_POLICY(CPL_CONN_POLICY_ASK));
+
+ skb->priority = 1;
+ cxgb3_ofld_send(ep->com.tdev, skb);
+ return 0;
+}
+
+static int pass_open_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
+{
+ struct iwch_listen_ep *ep = ctx;
+ struct cpl_pass_open_rpl *rpl = cplhdr(skb);
+
+ PDBG("%s ep %p status %d error %d\n", __func__, ep,
+ rpl->status, status2errno(rpl->status));
+ ep->com.rpl_err = status2errno(rpl->status);
+ ep->com.rpl_done = 1;
+ wake_up(&ep->com.waitq);
+
+ return CPL_RET_BUF_DONE;
+}
+
+static int listen_stop(struct iwch_listen_ep *ep)
+{
+ struct sk_buff *skb;
+ struct cpl_close_listserv_req *req;
+
+ PDBG("%s ep %p\n", __func__, ep);
+ skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
+ if (!skb) {
+ printk(KERN_ERR MOD "%s - failed to alloc skb\n", __func__);
+ return -ENOMEM;
+ }
+ req = (struct cpl_close_listserv_req *) skb_put(skb, sizeof(*req));
+ req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
+ req->cpu_idx = 0;
+ OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_LISTSRV_REQ, ep->stid));
+ skb->priority = 1;
+ cxgb3_ofld_send(ep->com.tdev, skb);
+ return 0;
+}
+
+static int close_listsrv_rpl(struct t3cdev *tdev, struct sk_buff *skb,
+ void *ctx)
+{
+ struct iwch_listen_ep *ep = ctx;
+ struct cpl_close_listserv_rpl *rpl = cplhdr(skb);
+
+ PDBG("%s ep %p\n", __func__, ep);
+ ep->com.rpl_err = status2errno(rpl->status);
+ ep->com.rpl_done = 1;
+ wake_up(&ep->com.waitq);
+ return CPL_RET_BUF_DONE;
+}
+
+static void accept_cr(struct iwch_ep *ep, __be32 peer_ip, struct sk_buff *skb)
+{
+ struct cpl_pass_accept_rpl *rpl;
+ unsigned int mtu_idx;
+ u32 opt0h, opt0l, opt2;
+ int wscale;
+
+ PDBG("%s ep %p\n", __func__, ep);
+ BUG_ON(skb_cloned(skb));
+ skb_trim(skb, sizeof(*rpl));
+ skb_get(skb);
+ mtu_idx = find_best_mtu(T3C_DATA(ep->com.tdev), dst_mtu(ep->dst));
+ wscale = compute_wscale(rcv_win);
+ opt0h = V_NAGLE(0) |
+ V_NO_CONG(nocong) |
+ V_KEEP_ALIVE(1) |
+ F_TCAM_BYPASS |
+ V_WND_SCALE(wscale) |
+ V_MSS_IDX(mtu_idx) |
+ V_L2T_IDX(ep->l2t->idx) | V_TX_CHANNEL(ep->l2t->smt_idx);
+ opt0l = V_TOS((ep->tos >> 2) & M_TOS) | V_RCV_BUFSIZ(rcv_win>>10);
+ opt2 = V_FLAVORS_VALID(1) | V_CONG_CONTROL_FLAVOR(cong_flavor);
+
+ rpl = cplhdr(skb);
+ rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
+ OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, ep->hwtid));
+ rpl->peer_ip = peer_ip;
+ rpl->opt0h = htonl(opt0h);
+ rpl->opt0l_status = htonl(opt0l | CPL_PASS_OPEN_ACCEPT);
+ rpl->opt2 = htonl(opt2);
+ rpl->rsvd = rpl->opt2; /* workaround for HW bug */
+ skb->priority = CPL_PRIORITY_SETUP;
+ l2t_send(ep->com.tdev, skb, ep->l2t);
+
+ return;
+}
+
+static void reject_cr(struct t3cdev *tdev, u32 hwtid, __be32 peer_ip,
+ struct sk_buff *skb)
+{
+ PDBG("%s t3cdev %p tid %u peer_ip %x\n", __func__, tdev, hwtid,
+ peer_ip);
+ BUG_ON(skb_cloned(skb));
+ skb_trim(skb, sizeof(struct cpl_tid_release));
+ skb_get(skb);
+
+ if (tdev->type != T3A)
+ release_tid(tdev, hwtid, skb);
+ else {
+ struct cpl_pass_accept_rpl *rpl;
+
+ rpl = cplhdr(skb);
+ skb->priority = CPL_PRIORITY_SETUP;
+ rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
+ OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL,
+ hwtid));
+ rpl->peer_ip = peer_ip;
+ rpl->opt0h = htonl(F_TCAM_BYPASS);
+ rpl->opt0l_status = htonl(CPL_PASS_OPEN_REJECT);
+ rpl->opt2 = 0;
+ rpl->rsvd = rpl->opt2;
+ cxgb3_ofld_send(tdev, skb);
+ }
+}
+
+static int pass_accept_req(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
+{
+ struct iwch_ep *child_ep, *parent_ep = ctx;
+ struct cpl_pass_accept_req *req = cplhdr(skb);
+ unsigned int hwtid = GET_TID(req);
+ struct dst_entry *dst;
+ struct l2t_entry *l2t;
+ struct rtable *rt;
+ struct iff_mac tim;
+
+ PDBG("%s parent ep %p tid %u\n", __func__, parent_ep, hwtid);
+
+ if (state_read(&parent_ep->com) != LISTEN) {
+ printk(KERN_ERR "%s - listening ep not in LISTEN\n",
+ __func__);
+ goto reject;
+ }
+
+ /*
+ * Find the netdev for this connection request.
+ */
+ tim.mac_addr = req->dst_mac;
+ tim.vlan_tag = ntohs(req->vlan_tag);
+ if (tdev->ctl(tdev, GET_IFF_FROM_MAC, &tim) < 0 || !tim.dev) {
+ printk(KERN_ERR
+ "%s bad dst mac %02x %02x %02x %02x %02x %02x\n",
+ __func__,
+ req->dst_mac[0],
+ req->dst_mac[1],
+ req->dst_mac[2],
+ req->dst_mac[3],
+ req->dst_mac[4],
+ req->dst_mac[5]);
+ goto reject;
+ }
+
+ /* Find output route */
+ rt = find_route(tdev,
+ req->local_ip,
+ req->peer_ip,
+ req->local_port,
+ req->peer_port, G_PASS_OPEN_TOS(ntohl(req->tos_tid)));
+ if (!rt) {
+ printk(KERN_ERR MOD "%s - failed to find dst entry!\n",
+ __func__);
+ goto reject;
+ }
+ dst = &rt->u.dst;
+ l2t = t3_l2t_get(tdev, dst->neighbour, dst->neighbour->dev);
+ if (!l2t) {
+ printk(KERN_ERR MOD "%s - failed to allocate l2t entry!\n",
+ __func__);
+ dst_release(dst);
+ goto reject;
+ }
+ child_ep = alloc_ep(sizeof(*child_ep), GFP_KERNEL);
+ if (!child_ep) {
+ printk(KERN_ERR MOD "%s - failed to allocate ep entry!\n",
+ __func__);
+ l2t_release(L2DATA(tdev), l2t);
+ dst_release(dst);
+ goto reject;
+ }
+ state_set(&child_ep->com, CONNECTING);
+ child_ep->com.tdev = tdev;
+ child_ep->com.cm_id = NULL;
+ child_ep->com.local_addr.sin_family = PF_INET;
+ child_ep->com.local_addr.sin_port = req->local_port;
+ child_ep->com.local_addr.sin_addr.s_addr = req->local_ip;
+ child_ep->com.remote_addr.sin_family = PF_INET;
+ child_ep->com.remote_addr.sin_port = req->peer_port;
+ child_ep->com.remote_addr.sin_addr.s_addr = req->peer_ip;
+ get_ep(&parent_ep->com);
+ child_ep->parent_ep = parent_ep;
+ child_ep->tos = G_PASS_OPEN_TOS(ntohl(req->tos_tid));
+ child_ep->l2t = l2t;
+ child_ep->dst = dst;
+ child_ep->hwtid = hwtid;
+ init_timer(&child_ep->timer);
+ cxgb3_insert_tid(tdev, &t3c_client, child_ep, hwtid);
+ accept_cr(child_ep, req->peer_ip, skb);
+ goto out;
+reject:
+ reject_cr(tdev, hwtid, req->peer_ip, skb);
+out:
+ return CPL_RET_BUF_DONE;
+}
+
+static int pass_establish(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
+{
+ struct iwch_ep *ep = ctx;
+ struct cpl_pass_establish *req = cplhdr(skb);
+
+ PDBG("%s ep %p\n", __func__, ep);
+ ep->snd_seq = ntohl(req->snd_isn);
+ ep->rcv_seq = ntohl(req->rcv_isn);
+
+ set_emss(ep, ntohs(req->tcp_opt));
+
+ dst_confirm(ep->dst);
+ state_set(&ep->com, MPA_REQ_WAIT);
+ start_ep_timer(ep);
+
+ return CPL_RET_BUF_DONE;
+}
+
+static int peer_close(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
+{
+ struct iwch_ep *ep = ctx;
+ struct iwch_qp_attributes attrs;
+ unsigned long flags;
+ int disconnect = 1;
+ int release = 0;
+
+ PDBG("%s ep %p\n", __func__, ep);
+ dst_confirm(ep->dst);
+
+ spin_lock_irqsave(&ep->com.lock, flags);
+ switch (ep->com.state) {
+ case MPA_REQ_WAIT:
+ __state_set(&ep->com, CLOSING);
+ break;
+ case MPA_REQ_SENT:
+ __state_set(&ep->com, CLOSING);
+ connect_reply_upcall(ep, -ECONNRESET);
+ break;
+ case MPA_REQ_RCVD:
+
+ /*
+ * We're gonna mark this puppy DEAD, but keep
+ * the reference on it until the ULP accepts or
+ * rejects the CR.
+ */
+ __state_set(&ep->com, CLOSING);
+ get_ep(&ep->com);
+ break;
+ case MPA_REP_SENT:
+ __state_set(&ep->com, CLOSING);
+ ep->com.rpl_done = 1;
+ ep->com.rpl_err = -ECONNRESET;
+ PDBG("waking up ep %p\n", ep);
+ wake_up(&ep->com.waitq);
+ break;
+ case FPDU_MODE:
+ start_ep_timer(ep);
+ __state_set(&ep->com, CLOSING);
+ attrs.next_state = IWCH_QP_STATE_CLOSING;
+ iwch_modify_qp(ep->com.qp->rhp, ep->com.qp,
+ IWCH_QP_ATTR_NEXT_STATE, &attrs, 1);
+ peer_close_upcall(ep);
+ break;
+ case ABORTING:
+ disconnect = 0;
+ break;
+ case CLOSING:
+ __state_set(&ep->com, MORIBUND);
+ disconnect = 0;
+ break;
+ case MORIBUND:
+ stop_ep_timer(ep);
+ if (ep->com.cm_id && ep->com.qp) {
+ attrs.next_state = IWCH_QP_STATE_IDLE;
+ iwch_modify_qp(ep->com.qp->rhp, ep->com.qp,
+ IWCH_QP_ATTR_NEXT_STATE, &attrs, 1);
+ }
+ close_complete_upcall(ep);
+ __state_set(&ep->com, DEAD);
+ release = 1;
+ disconnect = 0;
+ break;
+ case DEAD:
+ disconnect = 0;
+ break;
+ default:
+ BUG_ON(1);
+ }
+ spin_unlock_irqrestore(&ep->com.lock, flags);
+ if (disconnect)
+ iwch_ep_disconnect(ep, 0, GFP_KERNEL);
+ if (release)
+ release_ep_resources(ep);
+ return CPL_RET_BUF_DONE;
+}
+
+/*
+ * Returns whether an ABORT_REQ_RSS message is a negative advice.
+ */
+static int is_neg_adv_abort(unsigned int status)
+{
+ return status == CPL_ERR_RTX_NEG_ADVICE ||
+ status == CPL_ERR_PERSIST_NEG_ADVICE;
+}
+
+static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
+{
+ struct cpl_abort_req_rss *req = cplhdr(skb);
+ struct iwch_ep *ep = ctx;
+ struct cpl_abort_rpl *rpl;
+ struct sk_buff *rpl_skb;
+ struct iwch_qp_attributes attrs;
+ int ret;
+ int release = 0;
+ unsigned long flags;
+
+ if (is_neg_adv_abort(req->status)) {
+ PDBG("%s neg_adv_abort ep %p tid %d\n", __func__, ep,
+ ep->hwtid);
+ t3_l2t_send_event(ep->com.tdev, ep->l2t);
+ return CPL_RET_BUF_DONE;
+ }
+
+ /*
+ * We get 2 peer aborts from the HW. The first one must
+ * be ignored except for scribbling that we need one more.
+ */
+ if (!(ep->flags & PEER_ABORT_IN_PROGRESS)) {
+ ep->flags |= PEER_ABORT_IN_PROGRESS;
+ return CPL_RET_BUF_DONE;
+ }
+
+ spin_lock_irqsave(&ep->com.lock, flags);
+ PDBG("%s ep %p state %u\n", __func__, ep, ep->com.state);
+ switch (ep->com.state) {
+ case CONNECTING:
+ break;
+ case MPA_REQ_WAIT:
+ stop_ep_timer(ep);
+ break;
+ case MPA_REQ_SENT:
+ stop_ep_timer(ep);
+ connect_reply_upcall(ep, -ECONNRESET);
+ break;
+ case MPA_REP_SENT:
+ ep->com.rpl_done = 1;
+ ep->com.rpl_err = -ECONNRESET;
+ PDBG("waking up ep %p\n", ep);
+ wake_up(&ep->com.waitq);
+ break;
+ case MPA_REQ_RCVD:
+
+ /*
+ * We're gonna mark this puppy DEAD, but keep
+ * the reference on it until the ULP accepts or
+ * rejects the CR.
+ */
+ get_ep(&ep->com);
+ break;
+ case MORIBUND:
+ case CLOSING:
+ stop_ep_timer(ep);
+ /*FALLTHROUGH*/
+ case FPDU_MODE:
+ if (ep->com.cm_id && ep->com.qp) {
+ attrs.next_state = IWCH_QP_STATE_ERROR;
+ ret = iwch_modify_qp(ep->com.qp->rhp,
+ ep->com.qp, IWCH_QP_ATTR_NEXT_STATE,
+ &attrs, 1);
+ if (ret)
+ printk(KERN_ERR MOD
+ "%s - qp <- error failed!\n",
+ __func__);
+ }
+ peer_abort_upcall(ep);
+ break;
+ case ABORTING:
+ break;
+ case DEAD:
+ PDBG("%s PEER_ABORT IN DEAD STATE!!!!\n", __func__);
+ spin_unlock_irqrestore(&ep->com.lock, flags);
+ return CPL_RET_BUF_DONE;
+ default:
+ BUG_ON(1);
+ break;
+ }
+ dst_confirm(ep->dst);
+ if (ep->com.state != ABORTING) {
+ __state_set(&ep->com, DEAD);
+ release = 1;
+ }
+ spin_unlock_irqrestore(&ep->com.lock, flags);
+
+ rpl_skb = get_skb(skb, sizeof(*rpl), GFP_KERNEL);
+ if (!rpl_skb) {
+ printk(KERN_ERR MOD "%s - cannot allocate skb!\n",
+ __func__);
+ release = 1;
+ goto out;
+ }
+ rpl_skb->priority = CPL_PRIORITY_DATA;
+ rpl = (struct cpl_abort_rpl *) skb_put(rpl_skb, sizeof(*rpl));
+ rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_RPL));
+ rpl->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
+ OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_ABORT_RPL, ep->hwtid));
+ rpl->cmd = CPL_ABORT_NO_RST;
+ cxgb3_ofld_send(ep->com.tdev, rpl_skb);
+out:
+ if (release)
+ release_ep_resources(ep);
+ return CPL_RET_BUF_DONE;
+}
+
+static int close_con_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
+{
+ struct iwch_ep *ep = ctx;
+ struct iwch_qp_attributes attrs;
+ unsigned long flags;
+ int release = 0;
+
+ PDBG("%s ep %p\n", __func__, ep);
+ BUG_ON(!ep);
+
+ /* The cm_id may be null if we failed to connect */
+ spin_lock_irqsave(&ep->com.lock, flags);
+ switch (ep->com.state) {
+ case CLOSING:
+ __state_set(&ep->com, MORIBUND);
+ break;
+ case MORIBUND:
+ stop_ep_timer(ep);
+ if ((ep->com.cm_id) && (ep->com.qp)) {
+ attrs.next_state = IWCH_QP_STATE_IDLE;
+ iwch_modify_qp(ep->com.qp->rhp,
+ ep->com.qp,
+ IWCH_QP_ATTR_NEXT_STATE,
+ &attrs, 1);
+ }
+ close_complete_upcall(ep);
+ __state_set(&ep->com, DEAD);
+ release = 1;
+ break;
+ case ABORTING:
+ case DEAD:
+ break;
+ default:
+ BUG_ON(1);
+ break;
+ }
+ spin_unlock_irqrestore(&ep->com.lock, flags);
+ if (release)
+ release_ep_resources(ep);
+ return CPL_RET_BUF_DONE;
+}
+
+/*
+ * T3A does 3 things when a TERM is received:
+ * 1) send up a CPL_RDMA_TERMINATE message with the TERM packet
+ * 2) generate an async event on the QP with the TERMINATE opcode
+ * 3) post a TERMINATE opcde cqe into the associated CQ.
+ *
+ * For (1), we save the message in the qp for later consumer consumption.
+ * For (2), we move the QP into TERMINATE, post a QP event and disconnect.
+ * For (3), we toss the CQE in cxio_poll_cq().
+ *
+ * terminate() handles case (1)...
+ */
+static int terminate(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
+{
+ struct iwch_ep *ep = ctx;
+
+ PDBG("%s ep %p\n", __func__, ep);
+ skb_pull(skb, sizeof(struct cpl_rdma_terminate));
+ PDBG("%s saving %d bytes of term msg\n", __func__, skb->len);
+ skb_copy_from_linear_data(skb, ep->com.qp->attr.terminate_buffer,
+ skb->len);
+ ep->com.qp->attr.terminate_msg_len = skb->len;
+ ep->com.qp->attr.is_terminate_local = 0;
+ return CPL_RET_BUF_DONE;
+}
+
+static int ec_status(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
+{
+ struct cpl_rdma_ec_status *rep = cplhdr(skb);
+ struct iwch_ep *ep = ctx;
+
+ PDBG("%s ep %p tid %u status %d\n", __func__, ep, ep->hwtid,
+ rep->status);
+ if (rep->status) {
+ struct iwch_qp_attributes attrs;
+
+ printk(KERN_ERR MOD "%s BAD CLOSE - Aborting tid %u\n",
+ __func__, ep->hwtid);
+ stop_ep_timer(ep);
+ attrs.next_state = IWCH_QP_STATE_ERROR;
+ iwch_modify_qp(ep->com.qp->rhp,
+ ep->com.qp, IWCH_QP_ATTR_NEXT_STATE,
+ &attrs, 1);
+ abort_connection(ep, NULL, GFP_KERNEL);
+ }
+ return CPL_RET_BUF_DONE;
+}
+
+static void ep_timeout(unsigned long arg)
+{
+ struct iwch_ep *ep = (struct iwch_ep *)arg;
+ struct iwch_qp_attributes attrs;
+ unsigned long flags;
+ int abort = 1;
+
+ spin_lock_irqsave(&ep->com.lock, flags);
+ PDBG("%s ep %p tid %u state %d\n", __func__, ep, ep->hwtid,
+ ep->com.state);
+ switch (ep->com.state) {
+ case MPA_REQ_SENT:
+ __state_set(&ep->com, ABORTING);
+ connect_reply_upcall(ep, -ETIMEDOUT);
+ break;
+ case MPA_REQ_WAIT:
+ __state_set(&ep->com, ABORTING);
+ break;
+ case CLOSING:
+ case MORIBUND:
+ if (ep->com.cm_id && ep->com.qp) {
+ attrs.next_state = IWCH_QP_STATE_ERROR;
+ iwch_modify_qp(ep->com.qp->rhp,
+ ep->com.qp, IWCH_QP_ATTR_NEXT_STATE,
+ &attrs, 1);
+ }
+ __state_set(&ep->com, ABORTING);
+ break;
+ default:
+ printk(KERN_ERR "%s unexpected state ep %p state %u\n",
+ __func__, ep, ep->com.state);
+ WARN_ON(1);
+ abort = 0;
+ }
+ spin_unlock_irqrestore(&ep->com.lock, flags);
+ if (abort)
+ abort_connection(ep, NULL, GFP_ATOMIC);
+ put_ep(&ep->com);
+}
+
+int iwch_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
+{
+ int err;
+ struct iwch_ep *ep = to_ep(cm_id);
+ PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+
+ if (state_read(&ep->com) == DEAD) {
+ put_ep(&ep->com);
+ return -ECONNRESET;
+ }
+ BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD);
+ if (mpa_rev == 0)
+ abort_connection(ep, NULL, GFP_KERNEL);
+ else {
+ err = send_mpa_reject(ep, pdata, pdata_len);
+ err = iwch_ep_disconnect(ep, 0, GFP_KERNEL);
+ }
+ return 0;
+}
+
+int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
+{
+ int err;
+ struct iwch_qp_attributes attrs;
+ enum iwch_qp_attr_mask mask;
+ struct iwch_ep *ep = to_ep(cm_id);
+ struct iwch_dev *h = to_iwch_dev(cm_id->device);
+ struct iwch_qp *qp = get_qhp(h, conn_param->qpn);
+
+ PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ if (state_read(&ep->com) == DEAD)
+ return -ECONNRESET;
+
+ BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD);
+ BUG_ON(!qp);
+
+ if ((conn_param->ord > qp->rhp->attr.max_rdma_read_qp_depth) ||
+ (conn_param->ird > qp->rhp->attr.max_rdma_reads_per_qp)) {
+ abort_connection(ep, NULL, GFP_KERNEL);
+ return -EINVAL;
+ }
+
+ cm_id->add_ref(cm_id);
+ ep->com.cm_id = cm_id;
+ ep->com.qp = qp;
+
+ ep->com.rpl_done = 0;
+ ep->com.rpl_err = 0;
+ ep->ird = conn_param->ird;
+ ep->ord = conn_param->ord;
+ PDBG("%s %d ird %d ord %d\n", __func__, __LINE__, ep->ird, ep->ord);
+
+ get_ep(&ep->com);
+
+ /* bind QP to EP and move to RTS */
+ attrs.mpa_attr = ep->mpa_attr;
+ attrs.max_ird = ep->ird;
+ attrs.max_ord = ep->ord;
+ attrs.llp_stream_handle = ep;
+ attrs.next_state = IWCH_QP_STATE_RTS;
+
+ /* bind QP and TID with INIT_WR */
+ mask = IWCH_QP_ATTR_NEXT_STATE |
+ IWCH_QP_ATTR_LLP_STREAM_HANDLE |
+ IWCH_QP_ATTR_MPA_ATTR |
+ IWCH_QP_ATTR_MAX_IRD |
+ IWCH_QP_ATTR_MAX_ORD;
+
+ err = iwch_modify_qp(ep->com.qp->rhp,
+ ep->com.qp, mask, &attrs, 1);
+ if (err)
+ goto err;
+
+ /* if needed, wait for wr_ack */
+ if (iwch_rqes_posted(qp)) {
+ wait_event(ep->com.waitq, ep->com.rpl_done);
+ err = ep->com.rpl_err;
+ if (err)
+ goto err;
+ }
+
+ err = send_mpa_reply(ep, conn_param->private_data,
+ conn_param->private_data_len);
+ if (err)
+ goto err;
+
+
+ state_set(&ep->com, FPDU_MODE);
+ established_upcall(ep);
+ put_ep(&ep->com);
+ return 0;
+err:
+ ep->com.cm_id = NULL;
+ ep->com.qp = NULL;
+ cm_id->rem_ref(cm_id);
+ put_ep(&ep->com);
+ return err;
+}
+
+static int is_loopback_dst(struct iw_cm_id *cm_id)
+{
+ struct net_device *dev;
+
+ dev = ip_dev_find(&init_net, cm_id->remote_addr.sin_addr.s_addr);
+ if (!dev)
+ return 0;
+ dev_put(dev);
+ return 1;
+}
+
+int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
+{
+ int err = 0;
+ struct iwch_dev *h = to_iwch_dev(cm_id->device);
+ struct iwch_ep *ep;
+ struct rtable *rt;
+
+ if (is_loopback_dst(cm_id)) {
+ err = -ENOSYS;
+ goto out;
+ }
+
+ ep = alloc_ep(sizeof(*ep), GFP_KERNEL);
+ if (!ep) {
+ printk(KERN_ERR MOD "%s - cannot alloc ep.\n", __func__);
+ err = -ENOMEM;
+ goto out;
+ }
+ init_timer(&ep->timer);
+ ep->plen = conn_param->private_data_len;
+ if (ep->plen)
+ memcpy(ep->mpa_pkt + sizeof(struct mpa_message),
+ conn_param->private_data, ep->plen);
+ ep->ird = conn_param->ird;
+ ep->ord = conn_param->ord;
+ ep->com.tdev = h->rdev.t3cdev_p;
+
+ cm_id->add_ref(cm_id);
+ ep->com.cm_id = cm_id;
+ ep->com.qp = get_qhp(h, conn_param->qpn);
+ BUG_ON(!ep->com.qp);
+ PDBG("%s qpn 0x%x qp %p cm_id %p\n", __func__, conn_param->qpn,
+ ep->com.qp, cm_id);
+
+ /*
+ * Allocate an active TID to initiate a TCP connection.
+ */
+ ep->atid = cxgb3_alloc_atid(h->rdev.t3cdev_p, &t3c_client, ep);
+ if (ep->atid == -1) {
+ printk(KERN_ERR MOD "%s - cannot alloc atid.\n", __func__);
+ err = -ENOMEM;
+ goto fail2;
+ }
+
+ /* find a route */
+ rt = find_route(h->rdev.t3cdev_p,
+ cm_id->local_addr.sin_addr.s_addr,
+ cm_id->remote_addr.sin_addr.s_addr,
+ cm_id->local_addr.sin_port,
+ cm_id->remote_addr.sin_port, IPTOS_LOWDELAY);
+ if (!rt) {
+ printk(KERN_ERR MOD "%s - cannot find route.\n", __func__);
+ err = -EHOSTUNREACH;
+ goto fail3;
+ }
+ ep->dst = &rt->u.dst;
+
+ /* get a l2t entry */
+ ep->l2t = t3_l2t_get(ep->com.tdev, ep->dst->neighbour,
+ ep->dst->neighbour->dev);
+ if (!ep->l2t) {
+ printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__);
+ err = -ENOMEM;
+ goto fail4;
+ }
+
+ state_set(&ep->com, CONNECTING);
+ ep->tos = IPTOS_LOWDELAY;
+ ep->com.local_addr = cm_id->local_addr;
+ ep->com.remote_addr = cm_id->remote_addr;
+
+ /* send connect request to rnic */
+ err = send_connect(ep);
+ if (!err)
+ goto out;
+
+ l2t_release(L2DATA(h->rdev.t3cdev_p), ep->l2t);
+fail4:
+ dst_release(ep->dst);
+fail3:
+ cxgb3_free_atid(ep->com.tdev, ep->atid);
+fail2:
+ cm_id->rem_ref(cm_id);
+ put_ep(&ep->com);
+out:
+ return err;
+}
+
+int iwch_create_listen(struct iw_cm_id *cm_id, int backlog)
+{
+ int err = 0;
+ struct iwch_dev *h = to_iwch_dev(cm_id->device);
+ struct iwch_listen_ep *ep;
+
+
+ might_sleep();
+
+ ep = alloc_ep(sizeof(*ep), GFP_KERNEL);
+ if (!ep) {
+ printk(KERN_ERR MOD "%s - cannot alloc ep.\n", __func__);
+ err = -ENOMEM;
+ goto fail1;
+ }
+ PDBG("%s ep %p\n", __func__, ep);
+ ep->com.tdev = h->rdev.t3cdev_p;
+ cm_id->add_ref(cm_id);
+ ep->com.cm_id = cm_id;
+ ep->backlog = backlog;
+ ep->com.local_addr = cm_id->local_addr;
+
+ /*
+ * Allocate a server TID.
+ */
+ ep->stid = cxgb3_alloc_stid(h->rdev.t3cdev_p, &t3c_client, ep);
+ if (ep->stid == -1) {
+ printk(KERN_ERR MOD "%s - cannot alloc atid.\n", __func__);
+ err = -ENOMEM;
+ goto fail2;
+ }
+
+ state_set(&ep->com, LISTEN);
+ err = listen_start(ep);
+ if (err)
+ goto fail3;
+
+ /* wait for pass_open_rpl */
+ wait_event(ep->com.waitq, ep->com.rpl_done);
+ err = ep->com.rpl_err;
+ if (!err) {
+ cm_id->provider_data = ep;
+ goto out;
+ }
+fail3:
+ cxgb3_free_stid(ep->com.tdev, ep->stid);
+fail2:
+ cm_id->rem_ref(cm_id);
+ put_ep(&ep->com);
+fail1:
+out:
+ return err;
+}
+
+int iwch_destroy_listen(struct iw_cm_id *cm_id)
+{
+ int err;
+ struct iwch_listen_ep *ep = to_listen_ep(cm_id);
+
+ PDBG("%s ep %p\n", __func__, ep);
+
+ might_sleep();
+ state_set(&ep->com, DEAD);
+ ep->com.rpl_done = 0;
+ ep->com.rpl_err = 0;
+ err = listen_stop(ep);
+ wait_event(ep->com.waitq, ep->com.rpl_done);
+ cxgb3_free_stid(ep->com.tdev, ep->stid);
+ err = ep->com.rpl_err;
+ cm_id->rem_ref(cm_id);
+ put_ep(&ep->com);
+ return err;
+}
+
+int iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, gfp_t gfp)
+{
+ int ret=0;
+ unsigned long flags;
+ int close = 0;
+
+ spin_lock_irqsave(&ep->com.lock, flags);
+
+ PDBG("%s ep %p state %s, abrupt %d\n", __func__, ep,
+ states[ep->com.state], abrupt);
+
+ switch (ep->com.state) {
+ case MPA_REQ_WAIT:
+ case MPA_REQ_SENT:
+ case MPA_REQ_RCVD:
+ case MPA_REP_SENT:
+ case FPDU_MODE:
+ close = 1;
+ if (abrupt)
+ ep->com.state = ABORTING;
+ else {
+ ep->com.state = CLOSING;
+ start_ep_timer(ep);
+ }
+ break;
+ case CLOSING:
+ close = 1;
+ if (abrupt) {
+ stop_ep_timer(ep);
+ ep->com.state = ABORTING;
+ } else
+ ep->com.state = MORIBUND;
+ break;
+ case MORIBUND:
+ case ABORTING:
+ case DEAD:
+ PDBG("%s ignoring disconnect ep %p state %u\n",
+ __func__, ep, ep->com.state);
+ break;
+ default:
+ BUG();
+ break;
+ }
+
+ spin_unlock_irqrestore(&ep->com.lock, flags);
+ if (close) {
+ if (abrupt)
+ ret = send_abort(ep, NULL, gfp);
+ else
+ ret = send_halfclose(ep, gfp);
+ }
+ return ret;
+}
+
+int iwch_ep_redirect(void *ctx, struct dst_entry *old, struct dst_entry *new,
+ struct l2t_entry *l2t)
+{
+ struct iwch_ep *ep = ctx;
+
+ if (ep->dst != old)
+ return 0;
+
+ PDBG("%s ep %p redirect to dst %p l2t %p\n", __func__, ep, new,
+ l2t);
+ dst_hold(new);
+ l2t_release(L2DATA(ep->com.tdev), ep->l2t);
+ ep->l2t = l2t;
+ dst_release(old);
+ ep->dst = new;
+ return 1;
+}
+
+/*
+ * All the CM events are handled on a work queue to have a safe context.
+ */
+static int sched(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
+{
+ struct iwch_ep_common *epc = ctx;
+
+ get_ep(epc);
+
+ /*
+ * Save ctx and tdev in the skb->cb area.
+ */
+ *((void **) skb->cb) = ctx;
+ *((struct t3cdev **) (skb->cb + sizeof(void *))) = tdev;
+
+ /*
+ * Queue the skb and schedule the worker thread.
+ */
+ skb_queue_tail(&rxq, skb);
+ queue_work(workq, &skb_work);
+ return 0;
+}
+
+static int set_tcb_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
+{
+ struct cpl_set_tcb_rpl *rpl = cplhdr(skb);
+
+ if (rpl->status != CPL_ERR_NONE) {
+ printk(KERN_ERR MOD "Unexpected SET_TCB_RPL status %u "
+ "for tid %u\n", rpl->status, GET_TID(rpl));
+ }
+ return CPL_RET_BUF_DONE;
+}
+
+int __init iwch_cm_init(void)
+{
+ skb_queue_head_init(&rxq);
+
+ workq = create_singlethread_workqueue("iw_cxgb3");
+ if (!workq)
+ return -ENOMEM;
+
+ /*
+ * All upcalls from the T3 Core go to sched() to
+ * schedule the processing on a work queue.
+ */
+ t3c_handlers[CPL_ACT_ESTABLISH] = sched;
+ t3c_handlers[CPL_ACT_OPEN_RPL] = sched;
+ t3c_handlers[CPL_RX_DATA] = sched;
+ t3c_handlers[CPL_TX_DMA_ACK] = sched;
+ t3c_handlers[CPL_ABORT_RPL_RSS] = sched;
+ t3c_handlers[CPL_ABORT_RPL] = sched;
+ t3c_handlers[CPL_PASS_OPEN_RPL] = sched;
+ t3c_handlers[CPL_CLOSE_LISTSRV_RPL] = sched;
+ t3c_handlers[CPL_PASS_ACCEPT_REQ] = sched;
+ t3c_handlers[CPL_PASS_ESTABLISH] = sched;
+ t3c_handlers[CPL_PEER_CLOSE] = sched;
+ t3c_handlers[CPL_CLOSE_CON_RPL] = sched;
+ t3c_handlers[CPL_ABORT_REQ_RSS] = sched;
+ t3c_handlers[CPL_RDMA_TERMINATE] = sched;
+ t3c_handlers[CPL_RDMA_EC_STATUS] = sched;
+ t3c_handlers[CPL_SET_TCB_RPL] = set_tcb_rpl;
+
+ /*
+ * These are the real handlers that are called from a
+ * work queue.
+ */
+ work_handlers[CPL_ACT_ESTABLISH] = act_establish;
+ work_handlers[CPL_ACT_OPEN_RPL] = act_open_rpl;
+ work_handlers[CPL_RX_DATA] = rx_data;
+ work_handlers[CPL_TX_DMA_ACK] = tx_ack;
+ work_handlers[CPL_ABORT_RPL_RSS] = abort_rpl;
+ work_handlers[CPL_ABORT_RPL] = abort_rpl;
+ work_handlers[CPL_PASS_OPEN_RPL] = pass_open_rpl;
+ work_handlers[CPL_CLOSE_LISTSRV_RPL] = close_listsrv_rpl;
+ work_handlers[CPL_PASS_ACCEPT_REQ] = pass_accept_req;
+ work_handlers[CPL_PASS_ESTABLISH] = pass_establish;
+ work_handlers[CPL_PEER_CLOSE] = peer_close;
+ work_handlers[CPL_ABORT_REQ_RSS] = peer_abort;
+ work_handlers[CPL_CLOSE_CON_RPL] = close_con_rpl;
+ work_handlers[CPL_RDMA_TERMINATE] = terminate;
+ work_handlers[CPL_RDMA_EC_STATUS] = ec_status;
+ return 0;
+}
+
+void __exit iwch_cm_term(void)
+{
+ flush_workqueue(workq);
+ destroy_workqueue(workq);
+}
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.h b/drivers/infiniband/hw/cxgb3/iwch_cm.h
new file mode 100644
index 0000000..d7c7e09
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.h
@@ -0,0 +1,231 @@
+/*
+ * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _IWCH_CM_H_
+#define _IWCH_CM_H_
+
+#include <linux/inet.h>
+#include <linux/wait.h>
+#include <linux/spinlock.h>
+#include <linux/kref.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/iw_cm.h>
+
+#include "cxgb3_offload.h"
+#include "iwch_provider.h"
+
+#define MPA_KEY_REQ "MPA ID Req Frame"
+#define MPA_KEY_REP "MPA ID Rep Frame"
+
+#define MPA_MAX_PRIVATE_DATA 256
+#define MPA_REV 0 /* XXX - amso1100 uses rev 0 ! */
+#define MPA_REJECT 0x20
+#define MPA_CRC 0x40
+#define MPA_MARKERS 0x80
+#define MPA_FLAGS_MASK 0xE0
+
+#define put_ep(ep) { \
+ PDBG("put_ep (via %s:%u) ep %p refcnt %d\n", __func__, __LINE__, \
+ ep, atomic_read(&((ep)->kref.refcount))); \
+ WARN_ON(atomic_read(&((ep)->kref.refcount)) < 1); \
+ kref_put(&((ep)->kref), __free_ep); \
+}
+
+#define get_ep(ep) { \
+ PDBG("get_ep (via %s:%u) ep %p, refcnt %d\n", __func__, __LINE__, \
+ ep, atomic_read(&((ep)->kref.refcount))); \
+ kref_get(&((ep)->kref)); \
+}
+
+struct mpa_message {
+ u8 key[16];
+ u8 flags;
+ u8 revision;
+ __be16 private_data_size;
+ u8 private_data[0];
+};
+
+struct terminate_message {
+ u8 layer_etype;
+ u8 ecode;
+ __be16 hdrct_rsvd;
+ u8 len_hdrs[0];
+};
+
+#define TERM_MAX_LENGTH (sizeof(struct terminate_message) + 2 + 18 + 28)
+
+enum iwch_layers_types {
+ LAYER_RDMAP = 0x00,
+ LAYER_DDP = 0x10,
+ LAYER_MPA = 0x20,
+ RDMAP_LOCAL_CATA = 0x00,
+ RDMAP_REMOTE_PROT = 0x01,
+ RDMAP_REMOTE_OP = 0x02,
+ DDP_LOCAL_CATA = 0x00,
+ DDP_TAGGED_ERR = 0x01,
+ DDP_UNTAGGED_ERR = 0x02,
+ DDP_LLP = 0x03
+};
+
+enum iwch_rdma_ecodes {
+ RDMAP_INV_STAG = 0x00,
+ RDMAP_BASE_BOUNDS = 0x01,
+ RDMAP_ACC_VIOL = 0x02,
+ RDMAP_STAG_NOT_ASSOC = 0x03,
+ RDMAP_TO_WRAP = 0x04,
+ RDMAP_INV_VERS = 0x05,
+ RDMAP_INV_OPCODE = 0x06,
+ RDMAP_STREAM_CATA = 0x07,
+ RDMAP_GLOBAL_CATA = 0x08,
+ RDMAP_CANT_INV_STAG = 0x09,
+ RDMAP_UNSPECIFIED = 0xff
+};
+
+enum iwch_ddp_ecodes {
+ DDPT_INV_STAG = 0x00,
+ DDPT_BASE_BOUNDS = 0x01,
+ DDPT_STAG_NOT_ASSOC = 0x02,
+ DDPT_TO_WRAP = 0x03,
+ DDPT_INV_VERS = 0x04,
+ DDPU_INV_QN = 0x01,
+ DDPU_INV_MSN_NOBUF = 0x02,
+ DDPU_INV_MSN_RANGE = 0x03,
+ DDPU_INV_MO = 0x04,
+ DDPU_MSG_TOOBIG = 0x05,
+ DDPU_INV_VERS = 0x06
+};
+
+enum iwch_mpa_ecodes {
+ MPA_CRC_ERR = 0x02,
+ MPA_MARKER_ERR = 0x03
+};
+
+enum iwch_ep_state {
+ IDLE = 0,
+ LISTEN,
+ CONNECTING,
+ MPA_REQ_WAIT,
+ MPA_REQ_SENT,
+ MPA_REQ_RCVD,
+ MPA_REP_SENT,
+ FPDU_MODE,
+ ABORTING,
+ CLOSING,
+ MORIBUND,
+ DEAD,
+};
+
+enum iwch_ep_flags {
+ PEER_ABORT_IN_PROGRESS = (1 << 0),
+ ABORT_REQ_IN_PROGRESS = (1 << 1),
+};
+
+struct iwch_ep_common {
+ struct iw_cm_id *cm_id;
+ struct iwch_qp *qp;
+ struct t3cdev *tdev;
+ enum iwch_ep_state state;
+ struct kref kref;
+ spinlock_t lock;
+ struct sockaddr_in local_addr;
+ struct sockaddr_in remote_addr;
+ wait_queue_head_t waitq;
+ int rpl_done;
+ int rpl_err;
+};
+
+struct iwch_listen_ep {
+ struct iwch_ep_common com;
+ unsigned int stid;
+ int backlog;
+};
+
+struct iwch_ep {
+ struct iwch_ep_common com;
+ struct iwch_ep *parent_ep;
+ struct timer_list timer;
+ unsigned int atid;
+ u32 hwtid;
+ u32 snd_seq;
+ u32 rcv_seq;
+ struct l2t_entry *l2t;
+ struct dst_entry *dst;
+ struct sk_buff *mpa_skb;
+ struct iwch_mpa_attributes mpa_attr;
+ unsigned int mpa_pkt_len;
+ u8 mpa_pkt[sizeof(struct mpa_message) + MPA_MAX_PRIVATE_DATA];
+ u8 tos;
+ u16 emss;
+ u16 plen;
+ u32 ird;
+ u32 ord;
+ u32 flags;
+};
+
+static inline struct iwch_ep *to_ep(struct iw_cm_id *cm_id)
+{
+ return cm_id->provider_data;
+}
+
+static inline struct iwch_listen_ep *to_listen_ep(struct iw_cm_id *cm_id)
+{
+ return cm_id->provider_data;
+}
+
+static inline int compute_wscale(int win)
+{
+ int wscale = 0;
+
+ while (wscale < 14 && (65535<<wscale) < win)
+ wscale++;
+ return wscale;
+}
+
+/* CM prototypes */
+
+int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param);
+int iwch_create_listen(struct iw_cm_id *cm_id, int backlog);
+int iwch_destroy_listen(struct iw_cm_id *cm_id);
+int iwch_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len);
+int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param);
+int iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, gfp_t gfp);
+int iwch_quiesce_tid(struct iwch_ep *ep);
+int iwch_resume_tid(struct iwch_ep *ep);
+void __free_ep(struct kref *kref);
+void iwch_rearp(struct iwch_ep *ep);
+int iwch_ep_redirect(void *ctx, struct dst_entry *old, struct dst_entry *new, struct l2t_entry *l2t);
+
+int __init iwch_cm_init(void);
+void __exit iwch_cm_term(void);
+extern int peer2peer;
+
+#endif /* _IWCH_CM_H_ */
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cq.c b/drivers/infiniband/hw/cxgb3/iwch_cq.c
new file mode 100644
index 0000000..cf5474a
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb3/iwch_cq.c
@@ -0,0 +1,233 @@
+/*
+ * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "iwch_provider.h"
+#include "iwch.h"
+
+/*
+ * Get one cq entry from cxio and map it to openib.
+ *
+ * Returns:
+ * 0 EMPTY;
+ * 1 cqe returned
+ * -EAGAIN caller must try again
+ * any other -errno fatal error
+ */
+static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp,
+ struct ib_wc *wc)
+{
+ struct iwch_qp *qhp = NULL;
+ struct t3_cqe cqe, *rd_cqe;
+ struct t3_wq *wq;
+ u32 credit = 0;
+ u8 cqe_flushed;
+ u64 cookie;
+ int ret = 1;
+
+ rd_cqe = cxio_next_cqe(&chp->cq);
+
+ if (!rd_cqe)
+ return 0;
+
+ qhp = get_qhp(rhp, CQE_QPID(*rd_cqe));
+ if (!qhp)
+ wq = NULL;
+ else {
+ spin_lock(&qhp->lock);
+ wq = &(qhp->wq);
+ }
+ ret = cxio_poll_cq(wq, &(chp->cq), &cqe, &cqe_flushed, &cookie,
+ &credit);
+ if (t3a_device(chp->rhp) && credit) {
+ PDBG("%s updating %d cq credits on id %d\n", __func__,
+ credit, chp->cq.cqid);
+ cxio_hal_cq_op(&rhp->rdev, &chp->cq, CQ_CREDIT_UPDATE, credit);
+ }
+
+ if (ret) {
+ ret = -EAGAIN;
+ goto out;
+ }
+ ret = 1;
+
+ wc->wr_id = cookie;
+ wc->qp = &qhp->ibqp;
+ wc->vendor_err = CQE_STATUS(cqe);
+ wc->wc_flags = 0;
+
+ PDBG("%s qpid 0x%x type %d opcode %d status 0x%x wrid hi 0x%x "
+ "lo 0x%x cookie 0x%llx\n", __func__,
+ CQE_QPID(cqe), CQE_TYPE(cqe),
+ CQE_OPCODE(cqe), CQE_STATUS(cqe), CQE_WRID_HI(cqe),
+ CQE_WRID_LOW(cqe), (unsigned long long) cookie);
+
+ if (CQE_TYPE(cqe) == 0) {
+ if (!CQE_STATUS(cqe))
+ wc->byte_len = CQE_LEN(cqe);
+ else
+ wc->byte_len = 0;
+ wc->opcode = IB_WC_RECV;
+ if (CQE_OPCODE(cqe) == T3_SEND_WITH_INV ||
+ CQE_OPCODE(cqe) == T3_SEND_WITH_SE_INV) {
+ wc->ex.invalidate_rkey = CQE_WRID_STAG(cqe);
+ wc->wc_flags |= IB_WC_WITH_INVALIDATE;
+ }
+ } else {
+ switch (CQE_OPCODE(cqe)) {
+ case T3_RDMA_WRITE:
+ wc->opcode = IB_WC_RDMA_WRITE;
+ break;
+ case T3_READ_REQ:
+ wc->opcode = IB_WC_RDMA_READ;
+ wc->byte_len = CQE_LEN(cqe);
+ break;
+ case T3_SEND:
+ case T3_SEND_WITH_SE:
+ case T3_SEND_WITH_INV:
+ case T3_SEND_WITH_SE_INV:
+ wc->opcode = IB_WC_SEND;
+ break;
+ case T3_BIND_MW:
+ wc->opcode = IB_WC_BIND_MW;
+ break;
+
+ case T3_LOCAL_INV:
+ wc->opcode = IB_WC_LOCAL_INV;
+ break;
+ case T3_FAST_REGISTER:
+ wc->opcode = IB_WC_FAST_REG_MR;
+ break;
+ default:
+ printk(KERN_ERR MOD "Unexpected opcode %d "
+ "in the CQE received for QPID=0x%0x\n",
+ CQE_OPCODE(cqe), CQE_QPID(cqe));
+ ret = -EINVAL;
+ goto out;
+ }
+ }
+
+ if (cqe_flushed)
+ wc->status = IB_WC_WR_FLUSH_ERR;
+ else {
+
+ switch (CQE_STATUS(cqe)) {
+ case TPT_ERR_SUCCESS:
+ wc->status = IB_WC_SUCCESS;
+ break;
+ case TPT_ERR_STAG:
+ wc->status = IB_WC_LOC_ACCESS_ERR;
+ break;
+ case TPT_ERR_PDID:
+ wc->status = IB_WC_LOC_PROT_ERR;
+ break;
+ case TPT_ERR_QPID:
+ case TPT_ERR_ACCESS:
+ wc->status = IB_WC_LOC_ACCESS_ERR;
+ break;
+ case TPT_ERR_WRAP:
+ wc->status = IB_WC_GENERAL_ERR;
+ break;
+ case TPT_ERR_BOUND:
+ wc->status = IB_WC_LOC_LEN_ERR;
+ break;
+ case TPT_ERR_INVALIDATE_SHARED_MR:
+ case TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND:
+ wc->status = IB_WC_MW_BIND_ERR;
+ break;
+ case TPT_ERR_CRC:
+ case TPT_ERR_MARKER:
+ case TPT_ERR_PDU_LEN_ERR:
+ case TPT_ERR_OUT_OF_RQE:
+ case TPT_ERR_DDP_VERSION:
+ case TPT_ERR_RDMA_VERSION:
+ case TPT_ERR_DDP_QUEUE_NUM:
+ case TPT_ERR_MSN:
+ case TPT_ERR_TBIT:
+ case TPT_ERR_MO:
+ case TPT_ERR_MSN_RANGE:
+ case TPT_ERR_IRD_OVERFLOW:
+ case TPT_ERR_OPCODE:
+ wc->status = IB_WC_FATAL_ERR;
+ break;
+ case TPT_ERR_SWFLUSH:
+ wc->status = IB_WC_WR_FLUSH_ERR;
+ break;
+ default:
+ printk(KERN_ERR MOD "Unexpected cqe_status 0x%x for "
+ "QPID=0x%0x\n", CQE_STATUS(cqe), CQE_QPID(cqe));
+ ret = -EINVAL;
+ }
+ }
+out:
+ if (wq)
+ spin_unlock(&qhp->lock);
+ return ret;
+}
+
+int iwch_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
+{
+ struct iwch_dev *rhp;
+ struct iwch_cq *chp;
+ unsigned long flags;
+ int npolled;
+ int err = 0;
+
+ chp = to_iwch_cq(ibcq);
+ rhp = chp->rhp;
+
+ spin_lock_irqsave(&chp->lock, flags);
+ for (npolled = 0; npolled < num_entries; ++npolled) {
+#ifdef DEBUG
+ int i=0;
+#endif
+
+ /*
+ * Because T3 can post CQEs that are _not_ associated
+ * with a WR, we might have to poll again after removing
+ * one of these.
+ */
+ do {
+ err = iwch_poll_cq_one(rhp, chp, wc + npolled);
+#ifdef DEBUG
+ BUG_ON(++i > 1000);
+#endif
+ } while (err == -EAGAIN);
+ if (err <= 0)
+ break;
+ }
+ spin_unlock_irqrestore(&chp->lock, flags);
+
+ if (err < 0)
+ return err;
+ else {
+ return npolled;
+ }
+}
diff --git a/drivers/infiniband/hw/cxgb3/iwch_ev.c b/drivers/infiniband/hw/cxgb3/iwch_ev.c
new file mode 100644
index 0000000..7b67a67
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb3/iwch_ev.c
@@ -0,0 +1,230 @@
+/*
+ * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/slab.h>
+#include <linux/mman.h>
+#include <net/sock.h>
+#include "iwch_provider.h"
+#include "iwch.h"
+#include "iwch_cm.h"
+#include "cxio_hal.h"
+#include "cxio_wr.h"
+
+static void post_qp_event(struct iwch_dev *rnicp, struct iwch_cq *chp,
+ struct respQ_msg_t *rsp_msg,
+ enum ib_event_type ib_event,
+ int send_term)
+{
+ struct ib_event event;
+ struct iwch_qp_attributes attrs;
+ struct iwch_qp *qhp;
+
+ spin_lock(&rnicp->lock);
+ qhp = get_qhp(rnicp, CQE_QPID(rsp_msg->cqe));
+
+ if (!qhp) {
+ printk(KERN_ERR "%s unaffiliated error 0x%x qpid 0x%x\n",
+ __func__, CQE_STATUS(rsp_msg->cqe),
+ CQE_QPID(rsp_msg->cqe));
+ spin_unlock(&rnicp->lock);
+ return;
+ }
+
+ if ((qhp->attr.state == IWCH_QP_STATE_ERROR) ||
+ (qhp->attr.state == IWCH_QP_STATE_TERMINATE)) {
+ PDBG("%s AE received after RTS - "
+ "qp state %d qpid 0x%x status 0x%x\n", __func__,
+ qhp->attr.state, qhp->wq.qpid, CQE_STATUS(rsp_msg->cqe));
+ spin_unlock(&rnicp->lock);
+ return;
+ }
+
+ printk(KERN_ERR "%s - AE qpid 0x%x opcode %d status 0x%x "
+ "type %d wrid.hi 0x%x wrid.lo 0x%x \n", __func__,
+ CQE_QPID(rsp_msg->cqe), CQE_OPCODE(rsp_msg->cqe),
+ CQE_STATUS(rsp_msg->cqe), CQE_TYPE(rsp_msg->cqe),
+ CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe));
+
+ atomic_inc(&qhp->refcnt);
+ spin_unlock(&rnicp->lock);
+
+ event.event = ib_event;
+ event.device = chp->ibcq.device;
+ if (ib_event == IB_EVENT_CQ_ERR)
+ event.element.cq = &chp->ibcq;
+ else
+ event.element.qp = &qhp->ibqp;
+
+ if (qhp->ibqp.event_handler)
+ (*qhp->ibqp.event_handler)(&event, qhp->ibqp.qp_context);
+
+ if (qhp->attr.state == IWCH_QP_STATE_RTS) {
+ attrs.next_state = IWCH_QP_STATE_TERMINATE;
+ iwch_modify_qp(qhp->rhp, qhp, IWCH_QP_ATTR_NEXT_STATE,
+ &attrs, 1);
+ if (send_term)
+ iwch_post_terminate(qhp, rsp_msg);
+ }
+
+ if (atomic_dec_and_test(&qhp->refcnt))
+ wake_up(&qhp->wait);
+}
+
+void iwch_ev_dispatch(struct cxio_rdev *rdev_p, struct sk_buff *skb)
+{
+ struct iwch_dev *rnicp;
+ struct respQ_msg_t *rsp_msg = (struct respQ_msg_t *) skb->data;
+ struct iwch_cq *chp;
+ struct iwch_qp *qhp;
+ u32 cqid = RSPQ_CQID(rsp_msg);
+
+ rnicp = (struct iwch_dev *) rdev_p->ulp;
+ spin_lock(&rnicp->lock);
+ chp = get_chp(rnicp, cqid);
+ qhp = get_qhp(rnicp, CQE_QPID(rsp_msg->cqe));
+ if (!chp || !qhp) {
+ printk(KERN_ERR MOD "BAD AE cqid 0x%x qpid 0x%x opcode %d "
+ "status 0x%x type %d wrid.hi 0x%x wrid.lo 0x%x \n",
+ cqid, CQE_QPID(rsp_msg->cqe),
+ CQE_OPCODE(rsp_msg->cqe), CQE_STATUS(rsp_msg->cqe),
+ CQE_TYPE(rsp_msg->cqe), CQE_WRID_HI(rsp_msg->cqe),
+ CQE_WRID_LOW(rsp_msg->cqe));
+ spin_unlock(&rnicp->lock);
+ goto out;
+ }
+ iwch_qp_add_ref(&qhp->ibqp);
+ atomic_inc(&chp->refcnt);
+ spin_unlock(&rnicp->lock);
+
+ /*
+ * 1) completion of our sending a TERMINATE.
+ * 2) incoming TERMINATE message.
+ */
+ if ((CQE_OPCODE(rsp_msg->cqe) == T3_TERMINATE) &&
+ (CQE_STATUS(rsp_msg->cqe) == 0)) {
+ if (SQ_TYPE(rsp_msg->cqe)) {
+ PDBG("%s QPID 0x%x ep %p disconnecting\n",
+ __func__, qhp->wq.qpid, qhp->ep);
+ iwch_ep_disconnect(qhp->ep, 0, GFP_ATOMIC);
+ } else {
+ PDBG("%s post REQ_ERR AE QPID 0x%x\n", __func__,
+ qhp->wq.qpid);
+ post_qp_event(rnicp, chp, rsp_msg,
+ IB_EVENT_QP_REQ_ERR, 0);
+ iwch_ep_disconnect(qhp->ep, 0, GFP_ATOMIC);
+ }
+ goto done;
+ }
+
+ /* Bad incoming Read request */
+ if (SQ_TYPE(rsp_msg->cqe) &&
+ (CQE_OPCODE(rsp_msg->cqe) == T3_READ_RESP)) {
+ post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_REQ_ERR, 1);
+ goto done;
+ }
+
+ /* Bad incoming write */
+ if (RQ_TYPE(rsp_msg->cqe) &&
+ (CQE_OPCODE(rsp_msg->cqe) == T3_RDMA_WRITE)) {
+ post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_REQ_ERR, 1);
+ goto done;
+ }
+
+ switch (CQE_STATUS(rsp_msg->cqe)) {
+
+ /* Completion Events */
+ case TPT_ERR_SUCCESS:
+
+ /*
+ * Confirm the destination entry if this is a RECV completion.
+ */
+ if (qhp->ep && SQ_TYPE(rsp_msg->cqe))
+ dst_confirm(qhp->ep->dst);
+ (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context);
+ break;
+
+ case TPT_ERR_STAG:
+ case TPT_ERR_PDID:
+ case TPT_ERR_QPID:
+ case TPT_ERR_ACCESS:
+ case TPT_ERR_WRAP:
+ case TPT_ERR_BOUND:
+ case TPT_ERR_INVALIDATE_SHARED_MR:
+ case TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND:
+ printk(KERN_ERR "%s - CQE Err qpid 0x%x opcode %d status 0x%x "
+ "type %d wrid.hi 0x%x wrid.lo 0x%x \n", __func__,
+ CQE_QPID(rsp_msg->cqe), CQE_OPCODE(rsp_msg->cqe),
+ CQE_STATUS(rsp_msg->cqe), CQE_TYPE(rsp_msg->cqe),
+ CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe));
+ (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context);
+ post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_ACCESS_ERR, 1);
+ break;
+
+ /* Device Fatal Errors */
+ case TPT_ERR_ECC:
+ case TPT_ERR_ECC_PSTAG:
+ case TPT_ERR_INTERNAL_ERR:
+ post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_DEVICE_FATAL, 1);
+ break;
+
+ /* QP Fatal Errors */
+ case TPT_ERR_OUT_OF_RQE:
+ case TPT_ERR_PBL_ADDR_BOUND:
+ case TPT_ERR_CRC:
+ case TPT_ERR_MARKER:
+ case TPT_ERR_PDU_LEN_ERR:
+ case TPT_ERR_DDP_VERSION:
+ case TPT_ERR_RDMA_VERSION:
+ case TPT_ERR_OPCODE:
+ case TPT_ERR_DDP_QUEUE_NUM:
+ case TPT_ERR_MSN:
+ case TPT_ERR_TBIT:
+ case TPT_ERR_MO:
+ case TPT_ERR_MSN_GAP:
+ case TPT_ERR_MSN_RANGE:
+ case TPT_ERR_RQE_ADDR_BOUND:
+ case TPT_ERR_IRD_OVERFLOW:
+ post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_FATAL, 1);
+ break;
+
+ default:
+ printk(KERN_ERR MOD "Unknown T3 status 0x%x QPID 0x%x\n",
+ CQE_STATUS(rsp_msg->cqe), qhp->wq.qpid);
+ post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_FATAL, 1);
+ break;
+ }
+done:
+ if (atomic_dec_and_test(&chp->refcnt))
+ wake_up(&chp->wait);
+ iwch_qp_rem_ref(&qhp->ibqp);
+out:
+ dev_kfree_skb_irq(skb);
+}
diff --git a/drivers/infiniband/hw/cxgb3/iwch_mem.c b/drivers/infiniband/hw/cxgb3/iwch_mem.c
new file mode 100644
index 0000000..ec49a5c
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb3/iwch_mem.c
@@ -0,0 +1,195 @@
+/*
+ * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <asm/byteorder.h>
+
+#include <rdma/iw_cm.h>
+#include <rdma/ib_verbs.h>
+
+#include "cxio_hal.h"
+#include "cxio_resource.h"
+#include "iwch.h"
+#include "iwch_provider.h"
+
+static void iwch_finish_mem_reg(struct iwch_mr *mhp, u32 stag)
+{
+ u32 mmid;
+
+ mhp->attr.state = 1;
+ mhp->attr.stag = stag;
+ mmid = stag >> 8;
+ mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
+ insert_handle(mhp->rhp, &mhp->rhp->mmidr, mhp, mmid);
+ PDBG("%s mmid 0x%x mhp %p\n", __func__, mmid, mhp);
+}
+
+int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php,
+ struct iwch_mr *mhp, int shift)
+{
+ u32 stag;
+
+ if (cxio_register_phys_mem(&rhp->rdev,
+ &stag, mhp->attr.pdid,
+ mhp->attr.perms,
+ mhp->attr.zbva,
+ mhp->attr.va_fbo,
+ mhp->attr.len,
+ shift - 12,
+ mhp->attr.pbl_size, mhp->attr.pbl_addr))
+ return -ENOMEM;
+
+ iwch_finish_mem_reg(mhp, stag);
+
+ return 0;
+}
+
+int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php,
+ struct iwch_mr *mhp,
+ int shift,
+ int npages)
+{
+ u32 stag;
+
+ /* We could support this... */
+ if (npages > mhp->attr.pbl_size)
+ return -ENOMEM;
+
+ stag = mhp->attr.stag;
+ if (cxio_reregister_phys_mem(&rhp->rdev,
+ &stag, mhp->attr.pdid,
+ mhp->attr.perms,
+ mhp->attr.zbva,
+ mhp->attr.va_fbo,
+ mhp->attr.len,
+ shift - 12,
+ mhp->attr.pbl_size, mhp->attr.pbl_addr))
+ return -ENOMEM;
+
+ iwch_finish_mem_reg(mhp, stag);
+
+ return 0;
+}
+
+int iwch_alloc_pbl(struct iwch_mr *mhp, int npages)
+{
+ mhp->attr.pbl_addr = cxio_hal_pblpool_alloc(&mhp->rhp->rdev,
+ npages << 3);
+
+ if (!mhp->attr.pbl_addr)
+ return -ENOMEM;
+
+ mhp->attr.pbl_size = npages;
+
+ return 0;
+}
+
+void iwch_free_pbl(struct iwch_mr *mhp)
+{
+ cxio_hal_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr,
+ mhp->attr.pbl_size << 3);
+}
+
+int iwch_write_pbl(struct iwch_mr *mhp, __be64 *pages, int npages, int offset)
+{
+ return cxio_write_pbl(&mhp->rhp->rdev, pages,
+ mhp->attr.pbl_addr + (offset << 3), npages);
+}
+
+int build_phys_page_list(struct ib_phys_buf *buffer_list,
+ int num_phys_buf,
+ u64 *iova_start,
+ u64 *total_size,
+ int *npages,
+ int *shift,
+ __be64 **page_list)
+{
+ u64 mask;
+ int i, j, n;
+
+ mask = 0;
+ *total_size = 0;
+ for (i = 0; i < num_phys_buf; ++i) {
+ if (i != 0 && buffer_list[i].addr & ~PAGE_MASK)
+ return -EINVAL;
+ if (i != 0 && i != num_phys_buf - 1 &&
+ (buffer_list[i].size & ~PAGE_MASK))
+ return -EINVAL;
+ *total_size += buffer_list[i].size;
+ if (i > 0)
+ mask |= buffer_list[i].addr;
+ else
+ mask |= buffer_list[i].addr & PAGE_MASK;
+ if (i != num_phys_buf - 1)
+ mask |= buffer_list[i].addr + buffer_list[i].size;
+ else
+ mask |= (buffer_list[i].addr + buffer_list[i].size +
+ PAGE_SIZE - 1) & PAGE_MASK;
+ }
+
+ if (*total_size > 0xFFFFFFFFULL)
+ return -ENOMEM;
+
+ /* Find largest page shift we can use to cover buffers */
+ for (*shift = PAGE_SHIFT; *shift < 27; ++(*shift))
+ if ((1ULL << *shift) & mask)
+ break;
+
+ buffer_list[0].size += buffer_list[0].addr & ((1ULL << *shift) - 1);
+ buffer_list[0].addr &= ~0ull << *shift;
+
+ *npages = 0;
+ for (i = 0; i < num_phys_buf; ++i)
+ *npages += (buffer_list[i].size +
+ (1ULL << *shift) - 1) >> *shift;
+
+ if (!*npages)
+ return -EINVAL;
+
+ *page_list = kmalloc(sizeof(u64) * *npages, GFP_KERNEL);
+ if (!*page_list)
+ return -ENOMEM;
+
+ n = 0;
+ for (i = 0; i < num_phys_buf; ++i)
+ for (j = 0;
+ j < (buffer_list[i].size + (1ULL << *shift) - 1) >> *shift;
+ ++j)
+ (*page_list)[n++] = cpu_to_be64(buffer_list[i].addr +
+ ((u64) j << *shift));
+
+ PDBG("%s va 0x%llx mask 0x%llx shift %d len %lld pbl_size %d\n",
+ __func__, (unsigned long long) *iova_start,
+ (unsigned long long) mask, *shift, (unsigned long long) *total_size,
+ *npages);
+
+ return 0;
+
+}
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
new file mode 100644
index 0000000..160ef48
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -0,0 +1,1406 @@
+/*
+ * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/device.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/delay.h>
+#include <linux/errno.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/ethtool.h>
+#include <linux/rtnetlink.h>
+
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/byteorder.h>
+
+#include <rdma/iw_cm.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_smi.h>
+#include <rdma/ib_umem.h>
+#include <rdma/ib_user_verbs.h>
+
+#include "cxio_hal.h"
+#include "iwch.h"
+#include "iwch_provider.h"
+#include "iwch_cm.h"
+#include "iwch_user.h"
+#include "common.h"
+
+static int iwch_modify_port(struct ib_device *ibdev,
+ u8 port, int port_modify_mask,
+ struct ib_port_modify *props)
+{
+ return -ENOSYS;
+}
+
+static struct ib_ah *iwch_ah_create(struct ib_pd *pd,
+ struct ib_ah_attr *ah_attr)
+{
+ return ERR_PTR(-ENOSYS);
+}
+
+static int iwch_ah_destroy(struct ib_ah *ah)
+{
+ return -ENOSYS;
+}
+
+static int iwch_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
+{
+ return -ENOSYS;
+}
+
+static int iwch_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
+{
+ return -ENOSYS;
+}
+
+static int iwch_process_mad(struct ib_device *ibdev,
+ int mad_flags,
+ u8 port_num,
+ struct ib_wc *in_wc,
+ struct ib_grh *in_grh,
+ struct ib_mad *in_mad, struct ib_mad *out_mad)
+{
+ return -ENOSYS;
+}
+
+static int iwch_dealloc_ucontext(struct ib_ucontext *context)
+{
+ struct iwch_dev *rhp = to_iwch_dev(context->device);
+ struct iwch_ucontext *ucontext = to_iwch_ucontext(context);
+ struct iwch_mm_entry *mm, *tmp;
+
+ PDBG("%s context %p\n", __func__, context);
+ list_for_each_entry_safe(mm, tmp, &ucontext->mmaps, entry)
+ kfree(mm);
+ cxio_release_ucontext(&rhp->rdev, &ucontext->uctx);
+ kfree(ucontext);
+ return 0;
+}
+
+static struct ib_ucontext *iwch_alloc_ucontext(struct ib_device *ibdev,
+ struct ib_udata *udata)
+{
+ struct iwch_ucontext *context;
+ struct iwch_dev *rhp = to_iwch_dev(ibdev);
+
+ PDBG("%s ibdev %p\n", __func__, ibdev);
+ context = kzalloc(sizeof(*context), GFP_KERNEL);
+ if (!context)
+ return ERR_PTR(-ENOMEM);
+ cxio_init_ucontext(&rhp->rdev, &context->uctx);
+ INIT_LIST_HEAD(&context->mmaps);
+ spin_lock_init(&context->mmap_lock);
+ return &context->ibucontext;
+}
+
+static int iwch_destroy_cq(struct ib_cq *ib_cq)
+{
+ struct iwch_cq *chp;
+
+ PDBG("%s ib_cq %p\n", __func__, ib_cq);
+ chp = to_iwch_cq(ib_cq);
+
+ remove_handle(chp->rhp, &chp->rhp->cqidr, chp->cq.cqid);
+ atomic_dec(&chp->refcnt);
+ wait_event(chp->wait, !atomic_read(&chp->refcnt));
+
+ cxio_destroy_cq(&chp->rhp->rdev, &chp->cq);
+ kfree(chp);
+ return 0;
+}
+
+static struct ib_cq *iwch_create_cq(struct ib_device *ibdev, int entries, int vector,
+ struct ib_ucontext *ib_context,
+ struct ib_udata *udata)
+{
+ struct iwch_dev *rhp;
+ struct iwch_cq *chp;
+ struct iwch_create_cq_resp uresp;
+ struct iwch_create_cq_req ureq;
+ struct iwch_ucontext *ucontext = NULL;
+
+ PDBG("%s ib_dev %p entries %d\n", __func__, ibdev, entries);
+ rhp = to_iwch_dev(ibdev);
+ chp = kzalloc(sizeof(*chp), GFP_KERNEL);
+ if (!chp)
+ return ERR_PTR(-ENOMEM);
+
+ if (ib_context) {
+ ucontext = to_iwch_ucontext(ib_context);
+ if (!t3a_device(rhp)) {
+ if (ib_copy_from_udata(&ureq, udata, sizeof (ureq))) {
+ kfree(chp);
+ return ERR_PTR(-EFAULT);
+ }
+ chp->user_rptr_addr = (u32 __user *)(unsigned long)ureq.user_rptr_addr;
+ }
+ }
+
+ if (t3a_device(rhp)) {
+
+ /*
+ * T3A: Add some fluff to handle extra CQEs inserted
+ * for various errors.
+ * Additional CQE possibilities:
+ * TERMINATE,
+ * incoming RDMA WRITE Failures
+ * incoming RDMA READ REQUEST FAILUREs
+ * NOTE: We cannot ensure the CQ won't overflow.
+ */
+ entries += 16;
+ }
+ entries = roundup_pow_of_two(entries);
+ chp->cq.size_log2 = ilog2(entries);
+
+ if (cxio_create_cq(&rhp->rdev, &chp->cq)) {
+ kfree(chp);
+ return ERR_PTR(-ENOMEM);
+ }
+ chp->rhp = rhp;
+ chp->ibcq.cqe = 1 << chp->cq.size_log2;
+ spin_lock_init(&chp->lock);
+ atomic_set(&chp->refcnt, 1);
+ init_waitqueue_head(&chp->wait);
+ insert_handle(rhp, &rhp->cqidr, chp, chp->cq.cqid);
+
+ if (ucontext) {
+ struct iwch_mm_entry *mm;
+
+ mm = kmalloc(sizeof *mm, GFP_KERNEL);
+ if (!mm) {
+ iwch_destroy_cq(&chp->ibcq);
+ return ERR_PTR(-ENOMEM);
+ }
+ uresp.cqid = chp->cq.cqid;
+ uresp.size_log2 = chp->cq.size_log2;
+ spin_lock(&ucontext->mmap_lock);
+ uresp.key = ucontext->key;
+ ucontext->key += PAGE_SIZE;
+ spin_unlock(&ucontext->mmap_lock);
+ if (ib_copy_to_udata(udata, &uresp, sizeof (uresp))) {
+ kfree(mm);
+ iwch_destroy_cq(&chp->ibcq);
+ return ERR_PTR(-EFAULT);
+ }
+ mm->key = uresp.key;
+ mm->addr = virt_to_phys(chp->cq.queue);
+ mm->len = PAGE_ALIGN((1UL << uresp.size_log2) *
+ sizeof (struct t3_cqe));
+ insert_mmap(ucontext, mm);
+ }
+ PDBG("created cqid 0x%0x chp %p size 0x%0x, dma_addr 0x%0llx\n",
+ chp->cq.cqid, chp, (1 << chp->cq.size_log2),
+ (unsigned long long) chp->cq.dma_addr);
+ return &chp->ibcq;
+}
+
+static int iwch_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata)
+{
+#ifdef notyet
+ struct iwch_cq *chp = to_iwch_cq(cq);
+ struct t3_cq oldcq, newcq;
+ int ret;
+
+ PDBG("%s ib_cq %p cqe %d\n", __func__, cq, cqe);
+
+ /* We don't downsize... */
+ if (cqe <= cq->cqe)
+ return 0;
+
+ /* create new t3_cq with new size */
+ cqe = roundup_pow_of_two(cqe+1);
+ newcq.size_log2 = ilog2(cqe);
+
+ /* Dont allow resize to less than the current wce count */
+ if (cqe < Q_COUNT(chp->cq.rptr, chp->cq.wptr)) {
+ return -ENOMEM;
+ }
+
+ /* Quiesce all QPs using this CQ */
+ ret = iwch_quiesce_qps(chp);
+ if (ret) {
+ return ret;
+ }
+
+ ret = cxio_create_cq(&chp->rhp->rdev, &newcq);
+ if (ret) {
+ return ret;
+ }
+
+ /* copy CQEs */
+ memcpy(newcq.queue, chp->cq.queue, (1 << chp->cq.size_log2) *
+ sizeof(struct t3_cqe));
+
+ /* old iwch_qp gets new t3_cq but keeps old cqid */
+ oldcq = chp->cq;
+ chp->cq = newcq;
+ chp->cq.cqid = oldcq.cqid;
+
+ /* resize new t3_cq to update the HW context */
+ ret = cxio_resize_cq(&chp->rhp->rdev, &chp->cq);
+ if (ret) {
+ chp->cq = oldcq;
+ return ret;
+ }
+ chp->ibcq.cqe = (1<<chp->cq.size_log2) - 1;
+
+ /* destroy old t3_cq */
+ oldcq.cqid = newcq.cqid;
+ ret = cxio_destroy_cq(&chp->rhp->rdev, &oldcq);
+ if (ret) {
+ printk(KERN_ERR MOD "%s - cxio_destroy_cq failed %d\n",
+ __func__, ret);
+ }
+
+ /* add user hooks here */
+
+ /* resume qps */
+ ret = iwch_resume_qps(chp);
+ return ret;
+#else
+ return -ENOSYS;
+#endif
+}
+
+static int iwch_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
+{
+ struct iwch_dev *rhp;
+ struct iwch_cq *chp;
+ enum t3_cq_opcode cq_op;
+ int err;
+ unsigned long flag;
+ u32 rptr;
+
+ chp = to_iwch_cq(ibcq);
+ rhp = chp->rhp;
+ if ((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED)
+ cq_op = CQ_ARM_SE;
+ else
+ cq_op = CQ_ARM_AN;
+ if (chp->user_rptr_addr) {
+ if (get_user(rptr, chp->user_rptr_addr))
+ return -EFAULT;
+ spin_lock_irqsave(&chp->lock, flag);
+ chp->cq.rptr = rptr;
+ } else
+ spin_lock_irqsave(&chp->lock, flag);
+ PDBG("%s rptr 0x%x\n", __func__, chp->cq.rptr);
+ err = cxio_hal_cq_op(&rhp->rdev, &chp->cq, cq_op, 0);
+ spin_unlock_irqrestore(&chp->lock, flag);
+ if (err < 0)
+ printk(KERN_ERR MOD "Error %d rearming CQID 0x%x\n", err,
+ chp->cq.cqid);
+ if (err > 0 && !(flags & IB_CQ_REPORT_MISSED_EVENTS))
+ err = 0;
+ return err;
+}
+
+static int iwch_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
+{
+ int len = vma->vm_end - vma->vm_start;
+ u32 key = vma->vm_pgoff << PAGE_SHIFT;
+ struct cxio_rdev *rdev_p;
+ int ret = 0;
+ struct iwch_mm_entry *mm;
+ struct iwch_ucontext *ucontext;
+ u64 addr;
+
+ PDBG("%s pgoff 0x%lx key 0x%x len %d\n", __func__, vma->vm_pgoff,
+ key, len);
+
+ if (vma->vm_start & (PAGE_SIZE-1)) {
+ return -EINVAL;
+ }
+
+ rdev_p = &(to_iwch_dev(context->device)->rdev);
+ ucontext = to_iwch_ucontext(context);
+
+ mm = remove_mmap(ucontext, key, len);
+ if (!mm)
+ return -EINVAL;
+ addr = mm->addr;
+ kfree(mm);
+
+ if ((addr >= rdev_p->rnic_info.udbell_physbase) &&
+ (addr < (rdev_p->rnic_info.udbell_physbase +
+ rdev_p->rnic_info.udbell_len))) {
+
+ /*
+ * Map T3 DB register.
+ */
+ if (vma->vm_flags & VM_READ) {
+ return -EPERM;
+ }
+
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+ vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND;
+ vma->vm_flags &= ~VM_MAYREAD;
+ ret = io_remap_pfn_range(vma, vma->vm_start,
+ addr >> PAGE_SHIFT,
+ len, vma->vm_page_prot);
+ } else {
+
+ /*
+ * Map WQ or CQ contig dma memory...
+ */
+ ret = remap_pfn_range(vma, vma->vm_start,
+ addr >> PAGE_SHIFT,
+ len, vma->vm_page_prot);
+ }
+
+ return ret;
+}
+
+static int iwch_deallocate_pd(struct ib_pd *pd)
+{
+ struct iwch_dev *rhp;
+ struct iwch_pd *php;
+
+ php = to_iwch_pd(pd);
+ rhp = php->rhp;
+ PDBG("%s ibpd %p pdid 0x%x\n", __func__, pd, php->pdid);
+ cxio_hal_put_pdid(rhp->rdev.rscp, php->pdid);
+ kfree(php);
+ return 0;
+}
+
+static struct ib_pd *iwch_allocate_pd(struct ib_device *ibdev,
+ struct ib_ucontext *context,
+ struct ib_udata *udata)
+{
+ struct iwch_pd *php;
+ u32 pdid;
+ struct iwch_dev *rhp;
+
+ PDBG("%s ibdev %p\n", __func__, ibdev);
+ rhp = (struct iwch_dev *) ibdev;
+ pdid = cxio_hal_get_pdid(rhp->rdev.rscp);
+ if (!pdid)
+ return ERR_PTR(-EINVAL);
+ php = kzalloc(sizeof(*php), GFP_KERNEL);
+ if (!php) {
+ cxio_hal_put_pdid(rhp->rdev.rscp, pdid);
+ return ERR_PTR(-ENOMEM);
+ }
+ php->pdid = pdid;
+ php->rhp = rhp;
+ if (context) {
+ if (ib_copy_to_udata(udata, &php->pdid, sizeof (__u32))) {
+ iwch_deallocate_pd(&php->ibpd);
+ return ERR_PTR(-EFAULT);
+ }
+ }
+ PDBG("%s pdid 0x%0x ptr 0x%p\n", __func__, pdid, php);
+ return &php->ibpd;
+}
+
+static int iwch_dereg_mr(struct ib_mr *ib_mr)
+{
+ struct iwch_dev *rhp;
+ struct iwch_mr *mhp;
+ u32 mmid;
+
+ PDBG("%s ib_mr %p\n", __func__, ib_mr);
+ /* There can be no memory windows */
+ if (atomic_read(&ib_mr->usecnt))
+ return -EINVAL;
+
+ mhp = to_iwch_mr(ib_mr);
+ rhp = mhp->rhp;
+ mmid = mhp->attr.stag >> 8;
+ cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
+ mhp->attr.pbl_addr);
+ iwch_free_pbl(mhp);
+ remove_handle(rhp, &rhp->mmidr, mmid);
+ if (mhp->kva)
+ kfree((void *) (unsigned long) mhp->kva);
+ if (mhp->umem)
+ ib_umem_release(mhp->umem);
+ PDBG("%s mmid 0x%x ptr %p\n", __func__, mmid, mhp);
+ kfree(mhp);
+ return 0;
+}
+
+static struct ib_mr *iwch_register_phys_mem(struct ib_pd *pd,
+ struct ib_phys_buf *buffer_list,
+ int num_phys_buf,
+ int acc,
+ u64 *iova_start)
+{
+ __be64 *page_list;
+ int shift;
+ u64 total_size;
+ int npages;
+ struct iwch_dev *rhp;
+ struct iwch_pd *php;
+ struct iwch_mr *mhp;
+ int ret;
+
+ PDBG("%s ib_pd %p\n", __func__, pd);
+ php = to_iwch_pd(pd);
+ rhp = php->rhp;
+
+ mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
+ if (!mhp)
+ return ERR_PTR(-ENOMEM);
+
+ mhp->rhp = rhp;
+
+ /* First check that we have enough alignment */
+ if ((*iova_start & ~PAGE_MASK) != (buffer_list[0].addr & ~PAGE_MASK)) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ if (num_phys_buf > 1 &&
+ ((buffer_list[0].addr + buffer_list[0].size) & ~PAGE_MASK)) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ ret = build_phys_page_list(buffer_list, num_phys_buf, iova_start,
+ &total_size, &npages, &shift, &page_list);
+ if (ret)
+ goto err;
+
+ ret = iwch_alloc_pbl(mhp, npages);
+ if (ret) {
+ kfree(page_list);
+ goto err_pbl;
+ }
+
+ ret = iwch_write_pbl(mhp, page_list, npages, 0);
+ kfree(page_list);
+ if (ret)
+ goto err_pbl;
+
+ mhp->attr.pdid = php->pdid;
+ mhp->attr.zbva = 0;
+
+ mhp->attr.perms = iwch_ib_to_tpt_access(acc);
+ mhp->attr.va_fbo = *iova_start;
+ mhp->attr.page_size = shift - 12;
+
+ mhp->attr.len = (u32) total_size;
+ mhp->attr.pbl_size = npages;
+ ret = iwch_register_mem(rhp, php, mhp, shift);
+ if (ret)
+ goto err_pbl;
+
+ return &mhp->ibmr;
+
+err_pbl:
+ iwch_free_pbl(mhp);
+
+err:
+ kfree(mhp);
+ return ERR_PTR(ret);
+
+}
+
+static int iwch_reregister_phys_mem(struct ib_mr *mr,
+ int mr_rereg_mask,
+ struct ib_pd *pd,
+ struct ib_phys_buf *buffer_list,
+ int num_phys_buf,
+ int acc, u64 * iova_start)
+{
+
+ struct iwch_mr mh, *mhp;
+ struct iwch_pd *php;
+ struct iwch_dev *rhp;
+ __be64 *page_list = NULL;
+ int shift = 0;
+ u64 total_size;
+ int npages;
+ int ret;
+
+ PDBG("%s ib_mr %p ib_pd %p\n", __func__, mr, pd);
+
+ /* There can be no memory windows */
+ if (atomic_read(&mr->usecnt))
+ return -EINVAL;
+
+ mhp = to_iwch_mr(mr);
+ rhp = mhp->rhp;
+ php = to_iwch_pd(mr->pd);
+
+ /* make sure we are on the same adapter */
+ if (rhp != php->rhp)
+ return -EINVAL;
+
+ memcpy(&mh, mhp, sizeof *mhp);
+
+ if (mr_rereg_mask & IB_MR_REREG_PD)
+ php = to_iwch_pd(pd);
+ if (mr_rereg_mask & IB_MR_REREG_ACCESS)
+ mh.attr.perms = iwch_ib_to_tpt_access(acc);
+ if (mr_rereg_mask & IB_MR_REREG_TRANS) {
+ ret = build_phys_page_list(buffer_list, num_phys_buf,
+ iova_start,
+ &total_size, &npages,
+ &shift, &page_list);
+ if (ret)
+ return ret;
+ }
+
+ ret = iwch_reregister_mem(rhp, php, &mh, shift, npages);
+ kfree(page_list);
+ if (ret) {
+ return ret;
+ }
+ if (mr_rereg_mask & IB_MR_REREG_PD)
+ mhp->attr.pdid = php->pdid;
+ if (mr_rereg_mask & IB_MR_REREG_ACCESS)
+ mhp->attr.perms = iwch_ib_to_tpt_access(acc);
+ if (mr_rereg_mask & IB_MR_REREG_TRANS) {
+ mhp->attr.zbva = 0;
+ mhp->attr.va_fbo = *iova_start;
+ mhp->attr.page_size = shift - 12;
+ mhp->attr.len = (u32) total_size;
+ mhp->attr.pbl_size = npages;
+ }
+
+ return 0;
+}
+
+
+static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 virt, int acc, struct ib_udata *udata)
+{
+ __be64 *pages;
+ int shift, n, len;
+ int i, j, k;
+ int err = 0;
+ struct ib_umem_chunk *chunk;
+ struct iwch_dev *rhp;
+ struct iwch_pd *php;
+ struct iwch_mr *mhp;
+ struct iwch_reg_user_mr_resp uresp;
+
+ PDBG("%s ib_pd %p\n", __func__, pd);
+
+ php = to_iwch_pd(pd);
+ rhp = php->rhp;
+ mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
+ if (!mhp)
+ return ERR_PTR(-ENOMEM);
+
+ mhp->rhp = rhp;
+
+ mhp->umem = ib_umem_get(pd->uobject->context, start, length, acc, 0);
+ if (IS_ERR(mhp->umem)) {
+ err = PTR_ERR(mhp->umem);
+ kfree(mhp);
+ return ERR_PTR(err);
+ }
+
+ shift = ffs(mhp->umem->page_size) - 1;
+
+ n = 0;
+ list_for_each_entry(chunk, &mhp->umem->chunk_list, list)
+ n += chunk->nents;
+
+ err = iwch_alloc_pbl(mhp, n);
+ if (err)
+ goto err;
+
+ pages = (__be64 *) __get_free_page(GFP_KERNEL);
+ if (!pages) {
+ err = -ENOMEM;
+ goto err_pbl;
+ }
+
+ i = n = 0;
+
+ list_for_each_entry(chunk, &mhp->umem->chunk_list, list)
+ for (j = 0; j < chunk->nmap; ++j) {
+ len = sg_dma_len(&chunk->page_list[j]) >> shift;
+ for (k = 0; k < len; ++k) {
+ pages[i++] = cpu_to_be64(sg_dma_address(
+ &chunk->page_list[j]) +
+ mhp->umem->page_size * k);
+ if (i == PAGE_SIZE / sizeof *pages) {
+ err = iwch_write_pbl(mhp, pages, i, n);
+ if (err)
+ goto pbl_done;
+ n += i;
+ i = 0;
+ }
+ }
+ }
+
+ if (i)
+ err = iwch_write_pbl(mhp, pages, i, n);
+
+pbl_done:
+ free_page((unsigned long) pages);
+ if (err)
+ goto err_pbl;
+
+ mhp->attr.pdid = php->pdid;
+ mhp->attr.zbva = 0;
+ mhp->attr.perms = iwch_ib_to_tpt_access(acc);
+ mhp->attr.va_fbo = virt;
+ mhp->attr.page_size = shift - 12;
+ mhp->attr.len = (u32) length;
+
+ err = iwch_register_mem(rhp, php, mhp, shift);
+ if (err)
+ goto err_pbl;
+
+ if (udata && !t3a_device(rhp)) {
+ uresp.pbl_addr = (mhp->attr.pbl_addr -
+ rhp->rdev.rnic_info.pbl_base) >> 3;
+ PDBG("%s user resp pbl_addr 0x%x\n", __func__,
+ uresp.pbl_addr);
+
+ if (ib_copy_to_udata(udata, &uresp, sizeof (uresp))) {
+ iwch_dereg_mr(&mhp->ibmr);
+ err = -EFAULT;
+ goto err;
+ }
+ }
+
+ return &mhp->ibmr;
+
+err_pbl:
+ iwch_free_pbl(mhp);
+
+err:
+ ib_umem_release(mhp->umem);
+ kfree(mhp);
+ return ERR_PTR(err);
+}
+
+static struct ib_mr *iwch_get_dma_mr(struct ib_pd *pd, int acc)
+{
+ struct ib_phys_buf bl;
+ u64 kva;
+ struct ib_mr *ibmr;
+
+ PDBG("%s ib_pd %p\n", __func__, pd);
+
+ /*
+ * T3 only supports 32 bits of size.
+ */
+ bl.size = 0xffffffff;
+ bl.addr = 0;
+ kva = 0;
+ ibmr = iwch_register_phys_mem(pd, &bl, 1, acc, &kva);
+ return ibmr;
+}
+
+static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd)
+{
+ struct iwch_dev *rhp;
+ struct iwch_pd *php;
+ struct iwch_mw *mhp;
+ u32 mmid;
+ u32 stag = 0;
+ int ret;
+
+ php = to_iwch_pd(pd);
+ rhp = php->rhp;
+ mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
+ if (!mhp)
+ return ERR_PTR(-ENOMEM);
+ ret = cxio_allocate_window(&rhp->rdev, &stag, php->pdid);
+ if (ret) {
+ kfree(mhp);
+ return ERR_PTR(ret);
+ }
+ mhp->rhp = rhp;
+ mhp->attr.pdid = php->pdid;
+ mhp->attr.type = TPT_MW;
+ mhp->attr.stag = stag;
+ mmid = (stag) >> 8;
+ mhp->ibmw.rkey = stag;
+ insert_handle(rhp, &rhp->mmidr, mhp, mmid);
+ PDBG("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag);
+ return &(mhp->ibmw);
+}
+
+static int iwch_dealloc_mw(struct ib_mw *mw)
+{
+ struct iwch_dev *rhp;
+ struct iwch_mw *mhp;
+ u32 mmid;
+
+ mhp = to_iwch_mw(mw);
+ rhp = mhp->rhp;
+ mmid = (mw->rkey) >> 8;
+ cxio_deallocate_window(&rhp->rdev, mhp->attr.stag);
+ remove_handle(rhp, &rhp->mmidr, mmid);
+ kfree(mhp);
+ PDBG("%s ib_mw %p mmid 0x%x ptr %p\n", __func__, mw, mmid, mhp);
+ return 0;
+}
+
+static struct ib_mr *iwch_alloc_fast_reg_mr(struct ib_pd *pd, int pbl_depth)
+{
+ struct iwch_dev *rhp;
+ struct iwch_pd *php;
+ struct iwch_mr *mhp;
+ u32 mmid;
+ u32 stag = 0;
+ int ret;
+
+ php = to_iwch_pd(pd);
+ rhp = php->rhp;
+ mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
+ if (!mhp)
+ return ERR_PTR(-ENOMEM);
+
+ mhp->rhp = rhp;
+ ret = iwch_alloc_pbl(mhp, pbl_depth);
+ if (ret) {
+ kfree(mhp);
+ return ERR_PTR(ret);
+ }
+ mhp->attr.pbl_size = pbl_depth;
+ ret = cxio_allocate_stag(&rhp->rdev, &stag, php->pdid,
+ mhp->attr.pbl_size, mhp->attr.pbl_addr);
+ if (ret) {
+ iwch_free_pbl(mhp);
+ kfree(mhp);
+ return ERR_PTR(ret);
+ }
+ mhp->attr.pdid = php->pdid;
+ mhp->attr.type = TPT_NON_SHARED_MR;
+ mhp->attr.stag = stag;
+ mhp->attr.state = 1;
+ mmid = (stag) >> 8;
+ mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
+ insert_handle(rhp, &rhp->mmidr, mhp, mmid);
+ PDBG("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag);
+ return &(mhp->ibmr);
+}
+
+static struct ib_fast_reg_page_list *iwch_alloc_fastreg_pbl(
+ struct ib_device *device,
+ int page_list_len)
+{
+ struct ib_fast_reg_page_list *page_list;
+
+ page_list = kmalloc(sizeof *page_list + page_list_len * sizeof(u64),
+ GFP_KERNEL);
+ if (!page_list)
+ return ERR_PTR(-ENOMEM);
+
+ page_list->page_list = (u64 *)(page_list + 1);
+ page_list->max_page_list_len = page_list_len;
+
+ return page_list;
+}
+
+static void iwch_free_fastreg_pbl(struct ib_fast_reg_page_list *page_list)
+{
+ kfree(page_list);
+}
+
+static int iwch_destroy_qp(struct ib_qp *ib_qp)
+{
+ struct iwch_dev *rhp;
+ struct iwch_qp *qhp;
+ struct iwch_qp_attributes attrs;
+ struct iwch_ucontext *ucontext;
+
+ qhp = to_iwch_qp(ib_qp);
+ rhp = qhp->rhp;
+
+ attrs.next_state = IWCH_QP_STATE_ERROR;
+ iwch_modify_qp(rhp, qhp, IWCH_QP_ATTR_NEXT_STATE, &attrs, 0);
+ wait_event(qhp->wait, !qhp->ep);
+
+ remove_handle(rhp, &rhp->qpidr, qhp->wq.qpid);
+
+ atomic_dec(&qhp->refcnt);
+ wait_event(qhp->wait, !atomic_read(&qhp->refcnt));
+
+ ucontext = ib_qp->uobject ? to_iwch_ucontext(ib_qp->uobject->context)
+ : NULL;
+ cxio_destroy_qp(&rhp->rdev, &qhp->wq,
+ ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
+
+ PDBG("%s ib_qp %p qpid 0x%0x qhp %p\n", __func__,
+ ib_qp, qhp->wq.qpid, qhp);
+ kfree(qhp);
+ return 0;
+}
+
+static struct ib_qp *iwch_create_qp(struct ib_pd *pd,
+ struct ib_qp_init_attr *attrs,
+ struct ib_udata *udata)
+{
+ struct iwch_dev *rhp;
+ struct iwch_qp *qhp;
+ struct iwch_pd *php;
+ struct iwch_cq *schp;
+ struct iwch_cq *rchp;
+ struct iwch_create_qp_resp uresp;
+ int wqsize, sqsize, rqsize;
+ struct iwch_ucontext *ucontext;
+
+ PDBG("%s ib_pd %p\n", __func__, pd);
+ if (attrs->qp_type != IB_QPT_RC)
+ return ERR_PTR(-EINVAL);
+ php = to_iwch_pd(pd);
+ rhp = php->rhp;
+ schp = get_chp(rhp, ((struct iwch_cq *) attrs->send_cq)->cq.cqid);
+ rchp = get_chp(rhp, ((struct iwch_cq *) attrs->recv_cq)->cq.cqid);
+ if (!schp || !rchp)
+ return ERR_PTR(-EINVAL);
+
+ /* The RQT size must be # of entries + 1 rounded up to a power of two */
+ rqsize = roundup_pow_of_two(attrs->cap.max_recv_wr);
+ if (rqsize == attrs->cap.max_recv_wr)
+ rqsize = roundup_pow_of_two(attrs->cap.max_recv_wr+1);
+
+ /* T3 doesn't support RQT depth < 16 */
+ if (rqsize < 16)
+ rqsize = 16;
+
+ if (rqsize > T3_MAX_RQ_SIZE)
+ return ERR_PTR(-EINVAL);
+
+ if (attrs->cap.max_inline_data > T3_MAX_INLINE)
+ return ERR_PTR(-EINVAL);
+
+ /*
+ * NOTE: The SQ and total WQ sizes don't need to be
+ * a power of two. However, all the code assumes
+ * they are. EG: Q_FREECNT() and friends.
+ */
+ sqsize = roundup_pow_of_two(attrs->cap.max_send_wr);
+ wqsize = roundup_pow_of_two(rqsize + sqsize);
+
+ /*
+ * Kernel users need more wq space for fastreg WRs which can take
+ * 2 WR fragments.
+ */
+ ucontext = pd->uobject ? to_iwch_ucontext(pd->uobject->context) : NULL;
+ if (!ucontext && wqsize < (rqsize + (2 * sqsize)))
+ wqsize = roundup_pow_of_two(rqsize +
+ roundup_pow_of_two(attrs->cap.max_send_wr * 2));
+ PDBG("%s wqsize %d sqsize %d rqsize %d\n", __func__,
+ wqsize, sqsize, rqsize);
+ qhp = kzalloc(sizeof(*qhp), GFP_KERNEL);
+ if (!qhp)
+ return ERR_PTR(-ENOMEM);
+ qhp->wq.size_log2 = ilog2(wqsize);
+ qhp->wq.rq_size_log2 = ilog2(rqsize);
+ qhp->wq.sq_size_log2 = ilog2(sqsize);
+ if (cxio_create_qp(&rhp->rdev, !udata, &qhp->wq,
+ ucontext ? &ucontext->uctx : &rhp->rdev.uctx)) {
+ kfree(qhp);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ attrs->cap.max_recv_wr = rqsize - 1;
+ attrs->cap.max_send_wr = sqsize;
+ attrs->cap.max_inline_data = T3_MAX_INLINE;
+
+ qhp->rhp = rhp;
+ qhp->attr.pd = php->pdid;
+ qhp->attr.scq = ((struct iwch_cq *) attrs->send_cq)->cq.cqid;
+ qhp->attr.rcq = ((struct iwch_cq *) attrs->recv_cq)->cq.cqid;
+ qhp->attr.sq_num_entries = attrs->cap.max_send_wr;
+ qhp->attr.rq_num_entries = attrs->cap.max_recv_wr;
+ qhp->attr.sq_max_sges = attrs->cap.max_send_sge;
+ qhp->attr.sq_max_sges_rdma_write = attrs->cap.max_send_sge;
+ qhp->attr.rq_max_sges = attrs->cap.max_recv_sge;
+ qhp->attr.state = IWCH_QP_STATE_IDLE;
+ qhp->attr.next_state = IWCH_QP_STATE_IDLE;
+
+ /*
+ * XXX - These don't get passed in from the openib user
+ * at create time. The CM sets them via a QP modify.
+ * Need to fix... I think the CM should
+ */
+ qhp->attr.enable_rdma_read = 1;
+ qhp->attr.enable_rdma_write = 1;
+ qhp->attr.enable_bind = 1;
+ qhp->attr.max_ord = 1;
+ qhp->attr.max_ird = 1;
+
+ spin_lock_init(&qhp->lock);
+ init_waitqueue_head(&qhp->wait);
+ atomic_set(&qhp->refcnt, 1);
+ insert_handle(rhp, &rhp->qpidr, qhp, qhp->wq.qpid);
+
+ if (udata) {
+
+ struct iwch_mm_entry *mm1, *mm2;
+
+ mm1 = kmalloc(sizeof *mm1, GFP_KERNEL);
+ if (!mm1) {
+ iwch_destroy_qp(&qhp->ibqp);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ mm2 = kmalloc(sizeof *mm2, GFP_KERNEL);
+ if (!mm2) {
+ kfree(mm1);
+ iwch_destroy_qp(&qhp->ibqp);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ uresp.qpid = qhp->wq.qpid;
+ uresp.size_log2 = qhp->wq.size_log2;
+ uresp.sq_size_log2 = qhp->wq.sq_size_log2;
+ uresp.rq_size_log2 = qhp->wq.rq_size_log2;
+ spin_lock(&ucontext->mmap_lock);
+ uresp.key = ucontext->key;
+ ucontext->key += PAGE_SIZE;
+ uresp.db_key = ucontext->key;
+ ucontext->key += PAGE_SIZE;
+ spin_unlock(&ucontext->mmap_lock);
+ if (ib_copy_to_udata(udata, &uresp, sizeof (uresp))) {
+ kfree(mm1);
+ kfree(mm2);
+ iwch_destroy_qp(&qhp->ibqp);
+ return ERR_PTR(-EFAULT);
+ }
+ mm1->key = uresp.key;
+ mm1->addr = virt_to_phys(qhp->wq.queue);
+ mm1->len = PAGE_ALIGN(wqsize * sizeof (union t3_wr));
+ insert_mmap(ucontext, mm1);
+ mm2->key = uresp.db_key;
+ mm2->addr = qhp->wq.udb & PAGE_MASK;
+ mm2->len = PAGE_SIZE;
+ insert_mmap(ucontext, mm2);
+ }
+ qhp->ibqp.qp_num = qhp->wq.qpid;
+ init_timer(&(qhp->timer));
+ PDBG("%s sq_num_entries %d, rq_num_entries %d "
+ "qpid 0x%0x qhp %p dma_addr 0x%llx size %d rq_addr 0x%x\n",
+ __func__, qhp->attr.sq_num_entries, qhp->attr.rq_num_entries,
+ qhp->wq.qpid, qhp, (unsigned long long) qhp->wq.dma_addr,
+ 1 << qhp->wq.size_log2, qhp->wq.rq_addr);
+ return &qhp->ibqp;
+}
+
+static int iwch_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata)
+{
+ struct iwch_dev *rhp;
+ struct iwch_qp *qhp;
+ enum iwch_qp_attr_mask mask = 0;
+ struct iwch_qp_attributes attrs;
+
+ PDBG("%s ib_qp %p\n", __func__, ibqp);
+
+ /* iwarp does not support the RTR state */
+ if ((attr_mask & IB_QP_STATE) && (attr->qp_state == IB_QPS_RTR))
+ attr_mask &= ~IB_QP_STATE;
+
+ /* Make sure we still have something left to do */
+ if (!attr_mask)
+ return 0;
+
+ memset(&attrs, 0, sizeof attrs);
+ qhp = to_iwch_qp(ibqp);
+ rhp = qhp->rhp;
+
+ attrs.next_state = iwch_convert_state(attr->qp_state);
+ attrs.enable_rdma_read = (attr->qp_access_flags &
+ IB_ACCESS_REMOTE_READ) ? 1 : 0;
+ attrs.enable_rdma_write = (attr->qp_access_flags &
+ IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
+ attrs.enable_bind = (attr->qp_access_flags & IB_ACCESS_MW_BIND) ? 1 : 0;
+
+
+ mask |= (attr_mask & IB_QP_STATE) ? IWCH_QP_ATTR_NEXT_STATE : 0;
+ mask |= (attr_mask & IB_QP_ACCESS_FLAGS) ?
+ (IWCH_QP_ATTR_ENABLE_RDMA_READ |
+ IWCH_QP_ATTR_ENABLE_RDMA_WRITE |
+ IWCH_QP_ATTR_ENABLE_RDMA_BIND) : 0;
+
+ return iwch_modify_qp(rhp, qhp, mask, &attrs, 0);
+}
+
+void iwch_qp_add_ref(struct ib_qp *qp)
+{
+ PDBG("%s ib_qp %p\n", __func__, qp);
+ atomic_inc(&(to_iwch_qp(qp)->refcnt));
+}
+
+void iwch_qp_rem_ref(struct ib_qp *qp)
+{
+ PDBG("%s ib_qp %p\n", __func__, qp);
+ if (atomic_dec_and_test(&(to_iwch_qp(qp)->refcnt)))
+ wake_up(&(to_iwch_qp(qp)->wait));
+}
+
+static struct ib_qp *iwch_get_qp(struct ib_device *dev, int qpn)
+{
+ PDBG("%s ib_dev %p qpn 0x%x\n", __func__, dev, qpn);
+ return (struct ib_qp *)get_qhp(to_iwch_dev(dev), qpn);
+}
+
+
+static int iwch_query_pkey(struct ib_device *ibdev,
+ u8 port, u16 index, u16 * pkey)
+{
+ PDBG("%s ibdev %p\n", __func__, ibdev);
+ *pkey = 0;
+ return 0;
+}
+
+static int iwch_query_gid(struct ib_device *ibdev, u8 port,
+ int index, union ib_gid *gid)
+{
+ struct iwch_dev *dev;
+
+ PDBG("%s ibdev %p, port %d, index %d, gid %p\n",
+ __func__, ibdev, port, index, gid);
+ dev = to_iwch_dev(ibdev);
+ BUG_ON(port == 0 || port > 2);
+ memset(&(gid->raw[0]), 0, sizeof(gid->raw));
+ memcpy(&(gid->raw[0]), dev->rdev.port_info.lldevs[port-1]->dev_addr, 6);
+ return 0;
+}
+
+static u64 fw_vers_string_to_u64(struct iwch_dev *iwch_dev)
+{
+ struct ethtool_drvinfo info;
+ struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev;
+ char *cp, *next;
+ unsigned fw_maj, fw_min, fw_mic;
+
+ lldev->ethtool_ops->get_drvinfo(lldev, &info);
+
+ next = info.fw_version + 1;
+ cp = strsep(&next, ".");
+ sscanf(cp, "%i", &fw_maj);
+ cp = strsep(&next, ".");
+ sscanf(cp, "%i", &fw_min);
+ cp = strsep(&next, ".");
+ sscanf(cp, "%i", &fw_mic);
+
+ return (((u64)fw_maj & 0xffff) << 32) | ((fw_min & 0xffff) << 16) |
+ (fw_mic & 0xffff);
+}
+
+static int iwch_query_device(struct ib_device *ibdev,
+ struct ib_device_attr *props)
+{
+
+ struct iwch_dev *dev;
+ PDBG("%s ibdev %p\n", __func__, ibdev);
+
+ dev = to_iwch_dev(ibdev);
+ memset(props, 0, sizeof *props);
+ memcpy(&props->sys_image_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6);
+ props->hw_ver = dev->rdev.t3cdev_p->type;
+ props->fw_ver = fw_vers_string_to_u64(dev);
+ props->device_cap_flags = dev->device_cap_flags;
+ props->page_size_cap = dev->attr.mem_pgsizes_bitmask;
+ props->vendor_id = (u32)dev->rdev.rnic_info.pdev->vendor;
+ props->vendor_part_id = (u32)dev->rdev.rnic_info.pdev->device;
+ props->max_mr_size = dev->attr.max_mr_size;
+ props->max_qp = dev->attr.max_qps;
+ props->max_qp_wr = dev->attr.max_wrs;
+ props->max_sge = dev->attr.max_sge_per_wr;
+ props->max_sge_rd = 1;
+ props->max_qp_rd_atom = dev->attr.max_rdma_reads_per_qp;
+ props->max_qp_init_rd_atom = dev->attr.max_rdma_reads_per_qp;
+ props->max_cq = dev->attr.max_cqs;
+ props->max_cqe = dev->attr.max_cqes_per_cq;
+ props->max_mr = dev->attr.max_mem_regs;
+ props->max_pd = dev->attr.max_pds;
+ props->local_ca_ack_delay = 0;
+ props->max_fast_reg_page_list_len = T3_MAX_FASTREG_DEPTH;
+
+ return 0;
+}
+
+static int iwch_query_port(struct ib_device *ibdev,
+ u8 port, struct ib_port_attr *props)
+{
+ PDBG("%s ibdev %p\n", __func__, ibdev);
+
+ memset(props, 0, sizeof(struct ib_port_attr));
+ props->max_mtu = IB_MTU_4096;
+ props->active_mtu = IB_MTU_2048;
+ props->state = IB_PORT_ACTIVE;
+ props->port_cap_flags =
+ IB_PORT_CM_SUP |
+ IB_PORT_SNMP_TUNNEL_SUP |
+ IB_PORT_REINIT_SUP |
+ IB_PORT_DEVICE_MGMT_SUP |
+ IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP;
+ props->gid_tbl_len = 1;
+ props->pkey_tbl_len = 1;
+ props->active_width = 2;
+ props->active_speed = 2;
+ props->max_msg_sz = -1;
+
+ return 0;
+}
+
+static ssize_t show_rev(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev,
+ ibdev.dev);
+ PDBG("%s dev 0x%p\n", __func__, dev);
+ return sprintf(buf, "%d\n", iwch_dev->rdev.t3cdev_p->type);
+}
+
+static ssize_t show_fw_ver(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev,
+ ibdev.dev);
+ struct ethtool_drvinfo info;
+ struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev;
+
+ PDBG("%s dev 0x%p\n", __func__, dev);
+ lldev->ethtool_ops->get_drvinfo(lldev, &info);
+ return sprintf(buf, "%s\n", info.fw_version);
+}
+
+static ssize_t show_hca(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev,
+ ibdev.dev);
+ struct ethtool_drvinfo info;
+ struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev;
+
+ PDBG("%s dev 0x%p\n", __func__, dev);
+ lldev->ethtool_ops->get_drvinfo(lldev, &info);
+ return sprintf(buf, "%s\n", info.driver);
+}
+
+static ssize_t show_board(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev,
+ ibdev.dev);
+ PDBG("%s dev 0x%p\n", __func__, dev);
+ return sprintf(buf, "%x.%x\n", iwch_dev->rdev.rnic_info.pdev->vendor,
+ iwch_dev->rdev.rnic_info.pdev->device);
+}
+
+static int iwch_get_mib(struct ib_device *ibdev,
+ union rdma_protocol_stats *stats)
+{
+ struct iwch_dev *dev;
+ struct tp_mib_stats m;
+ int ret;
+
+ PDBG("%s ibdev %p\n", __func__, ibdev);
+ dev = to_iwch_dev(ibdev);
+ ret = dev->rdev.t3cdev_p->ctl(dev->rdev.t3cdev_p, RDMA_GET_MIB, &m);
+ if (ret)
+ return -ENOSYS;
+
+ memset(stats, 0, sizeof *stats);
+ stats->iw.ipInReceives = ((u64) m.ipInReceive_hi << 32) +
+ m.ipInReceive_lo;
+ stats->iw.ipInHdrErrors = ((u64) m.ipInHdrErrors_hi << 32) +
+ m.ipInHdrErrors_lo;
+ stats->iw.ipInAddrErrors = ((u64) m.ipInAddrErrors_hi << 32) +
+ m.ipInAddrErrors_lo;
+ stats->iw.ipInUnknownProtos = ((u64) m.ipInUnknownProtos_hi << 32) +
+ m.ipInUnknownProtos_lo;
+ stats->iw.ipInDiscards = ((u64) m.ipInDiscards_hi << 32) +
+ m.ipInDiscards_lo;
+ stats->iw.ipInDelivers = ((u64) m.ipInDelivers_hi << 32) +
+ m.ipInDelivers_lo;
+ stats->iw.ipOutRequests = ((u64) m.ipOutRequests_hi << 32) +
+ m.ipOutRequests_lo;
+ stats->iw.ipOutDiscards = ((u64) m.ipOutDiscards_hi << 32) +
+ m.ipOutDiscards_lo;
+ stats->iw.ipOutNoRoutes = ((u64) m.ipOutNoRoutes_hi << 32) +
+ m.ipOutNoRoutes_lo;
+ stats->iw.ipReasmTimeout = (u64) m.ipReasmTimeout;
+ stats->iw.ipReasmReqds = (u64) m.ipReasmReqds;
+ stats->iw.ipReasmOKs = (u64) m.ipReasmOKs;
+ stats->iw.ipReasmFails = (u64) m.ipReasmFails;
+ stats->iw.tcpActiveOpens = (u64) m.tcpActiveOpens;
+ stats->iw.tcpPassiveOpens = (u64) m.tcpPassiveOpens;
+ stats->iw.tcpAttemptFails = (u64) m.tcpAttemptFails;
+ stats->iw.tcpEstabResets = (u64) m.tcpEstabResets;
+ stats->iw.tcpOutRsts = (u64) m.tcpOutRsts;
+ stats->iw.tcpCurrEstab = (u64) m.tcpCurrEstab;
+ stats->iw.tcpInSegs = ((u64) m.tcpInSegs_hi << 32) +
+ m.tcpInSegs_lo;
+ stats->iw.tcpOutSegs = ((u64) m.tcpOutSegs_hi << 32) +
+ m.tcpOutSegs_lo;
+ stats->iw.tcpRetransSegs = ((u64) m.tcpRetransSeg_hi << 32) +
+ m.tcpRetransSeg_lo;
+ stats->iw.tcpInErrs = ((u64) m.tcpInErrs_hi << 32) +
+ m.tcpInErrs_lo;
+ stats->iw.tcpRtoMin = (u64) m.tcpRtoMin;
+ stats->iw.tcpRtoMax = (u64) m.tcpRtoMax;
+ return 0;
+}
+
+static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
+static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
+static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
+static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
+
+static struct device_attribute *iwch_class_attributes[] = {
+ &dev_attr_hw_rev,
+ &dev_attr_fw_ver,
+ &dev_attr_hca_type,
+ &dev_attr_board_id,
+};
+
+int iwch_register_device(struct iwch_dev *dev)
+{
+ int ret;
+ int i;
+
+ PDBG("%s iwch_dev %p\n", __func__, dev);
+ strlcpy(dev->ibdev.name, "cxgb3_%d", IB_DEVICE_NAME_MAX);
+ memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid));
+ memcpy(&dev->ibdev.node_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6);
+ dev->ibdev.owner = THIS_MODULE;
+ dev->device_cap_flags = IB_DEVICE_LOCAL_DMA_LKEY |
+ IB_DEVICE_MEM_WINDOW |
+ IB_DEVICE_MEM_MGT_EXTENSIONS;
+
+ /* cxgb3 supports STag 0. */
+ dev->ibdev.local_dma_lkey = 0;
+
+ dev->ibdev.uverbs_cmd_mask =
+ (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
+ (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
+ (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
+ (1ull << IB_USER_VERBS_CMD_REG_MR) |
+ (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
+ (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
+ (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
+ (1ull << IB_USER_VERBS_CMD_POLL_CQ) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
+ (1ull << IB_USER_VERBS_CMD_POST_SEND) |
+ (1ull << IB_USER_VERBS_CMD_POST_RECV);
+ dev->ibdev.node_type = RDMA_NODE_RNIC;
+ memcpy(dev->ibdev.node_desc, IWCH_NODE_DESC, sizeof(IWCH_NODE_DESC));
+ dev->ibdev.phys_port_cnt = dev->rdev.port_info.nports;
+ dev->ibdev.num_comp_vectors = 1;
+ dev->ibdev.dma_device = &(dev->rdev.rnic_info.pdev->dev);
+ dev->ibdev.query_device = iwch_query_device;
+ dev->ibdev.query_port = iwch_query_port;
+ dev->ibdev.modify_port = iwch_modify_port;
+ dev->ibdev.query_pkey = iwch_query_pkey;
+ dev->ibdev.query_gid = iwch_query_gid;
+ dev->ibdev.alloc_ucontext = iwch_alloc_ucontext;
+ dev->ibdev.dealloc_ucontext = iwch_dealloc_ucontext;
+ dev->ibdev.mmap = iwch_mmap;
+ dev->ibdev.alloc_pd = iwch_allocate_pd;
+ dev->ibdev.dealloc_pd = iwch_deallocate_pd;
+ dev->ibdev.create_ah = iwch_ah_create;
+ dev->ibdev.destroy_ah = iwch_ah_destroy;
+ dev->ibdev.create_qp = iwch_create_qp;
+ dev->ibdev.modify_qp = iwch_ib_modify_qp;
+ dev->ibdev.destroy_qp = iwch_destroy_qp;
+ dev->ibdev.create_cq = iwch_create_cq;
+ dev->ibdev.destroy_cq = iwch_destroy_cq;
+ dev->ibdev.resize_cq = iwch_resize_cq;
+ dev->ibdev.poll_cq = iwch_poll_cq;
+ dev->ibdev.get_dma_mr = iwch_get_dma_mr;
+ dev->ibdev.reg_phys_mr = iwch_register_phys_mem;
+ dev->ibdev.rereg_phys_mr = iwch_reregister_phys_mem;
+ dev->ibdev.reg_user_mr = iwch_reg_user_mr;
+ dev->ibdev.dereg_mr = iwch_dereg_mr;
+ dev->ibdev.alloc_mw = iwch_alloc_mw;
+ dev->ibdev.bind_mw = iwch_bind_mw;
+ dev->ibdev.dealloc_mw = iwch_dealloc_mw;
+ dev->ibdev.alloc_fast_reg_mr = iwch_alloc_fast_reg_mr;
+ dev->ibdev.alloc_fast_reg_page_list = iwch_alloc_fastreg_pbl;
+ dev->ibdev.free_fast_reg_page_list = iwch_free_fastreg_pbl;
+ dev->ibdev.attach_mcast = iwch_multicast_attach;
+ dev->ibdev.detach_mcast = iwch_multicast_detach;
+ dev->ibdev.process_mad = iwch_process_mad;
+ dev->ibdev.req_notify_cq = iwch_arm_cq;
+ dev->ibdev.post_send = iwch_post_send;
+ dev->ibdev.post_recv = iwch_post_receive;
+ dev->ibdev.get_protocol_stats = iwch_get_mib;
+
+ dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL);
+ if (!dev->ibdev.iwcm)
+ return -ENOMEM;
+
+ dev->ibdev.iwcm->connect = iwch_connect;
+ dev->ibdev.iwcm->accept = iwch_accept_cr;
+ dev->ibdev.iwcm->reject = iwch_reject_cr;
+ dev->ibdev.iwcm->create_listen = iwch_create_listen;
+ dev->ibdev.iwcm->destroy_listen = iwch_destroy_listen;
+ dev->ibdev.iwcm->add_ref = iwch_qp_add_ref;
+ dev->ibdev.iwcm->rem_ref = iwch_qp_rem_ref;
+ dev->ibdev.iwcm->get_qp = iwch_get_qp;
+
+ ret = ib_register_device(&dev->ibdev);
+ if (ret)
+ goto bail1;
+
+ for (i = 0; i < ARRAY_SIZE(iwch_class_attributes); ++i) {
+ ret = device_create_file(&dev->ibdev.dev,
+ iwch_class_attributes[i]);
+ if (ret) {
+ goto bail2;
+ }
+ }
+ return 0;
+bail2:
+ ib_unregister_device(&dev->ibdev);
+bail1:
+ return ret;
+}
+
+void iwch_unregister_device(struct iwch_dev *dev)
+{
+ int i;
+
+ PDBG("%s iwch_dev %p\n", __func__, dev);
+ for (i = 0; i < ARRAY_SIZE(iwch_class_attributes); ++i)
+ device_remove_file(&dev->ibdev.dev,
+ iwch_class_attributes[i]);
+ ib_unregister_device(&dev->ibdev);
+ return;
+}
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.h b/drivers/infiniband/hw/cxgb3/iwch_provider.h
new file mode 100644
index 0000000..a237d49
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.h
@@ -0,0 +1,361 @@
+/*
+ * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __IWCH_PROVIDER_H__
+#define __IWCH_PROVIDER_H__
+
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <rdma/ib_verbs.h>
+#include <asm/types.h>
+#include "t3cdev.h"
+#include "iwch.h"
+#include "cxio_wr.h"
+#include "cxio_hal.h"
+
+struct iwch_pd {
+ struct ib_pd ibpd;
+ u32 pdid;
+ struct iwch_dev *rhp;
+};
+
+static inline struct iwch_pd *to_iwch_pd(struct ib_pd *ibpd)
+{
+ return container_of(ibpd, struct iwch_pd, ibpd);
+}
+
+struct tpt_attributes {
+ u32 stag;
+ u32 state:1;
+ u32 type:2;
+ u32 rsvd:1;
+ enum tpt_mem_perm perms;
+ u32 remote_invaliate_disable:1;
+ u32 zbva:1;
+ u32 mw_bind_enable:1;
+ u32 page_size:5;
+
+ u32 pdid;
+ u32 qpid;
+ u32 pbl_addr;
+ u32 len;
+ u64 va_fbo;
+ u32 pbl_size;
+};
+
+struct iwch_mr {
+ struct ib_mr ibmr;
+ struct ib_umem *umem;
+ struct iwch_dev *rhp;
+ u64 kva;
+ struct tpt_attributes attr;
+};
+
+typedef struct iwch_mw iwch_mw_handle;
+
+static inline struct iwch_mr *to_iwch_mr(struct ib_mr *ibmr)
+{
+ return container_of(ibmr, struct iwch_mr, ibmr);
+}
+
+struct iwch_mw {
+ struct ib_mw ibmw;
+ struct iwch_dev *rhp;
+ u64 kva;
+ struct tpt_attributes attr;
+};
+
+static inline struct iwch_mw *to_iwch_mw(struct ib_mw *ibmw)
+{
+ return container_of(ibmw, struct iwch_mw, ibmw);
+}
+
+struct iwch_cq {
+ struct ib_cq ibcq;
+ struct iwch_dev *rhp;
+ struct t3_cq cq;
+ spinlock_t lock;
+ atomic_t refcnt;
+ wait_queue_head_t wait;
+ u32 __user *user_rptr_addr;
+};
+
+static inline struct iwch_cq *to_iwch_cq(struct ib_cq *ibcq)
+{
+ return container_of(ibcq, struct iwch_cq, ibcq);
+}
+
+enum IWCH_QP_FLAGS {
+ QP_QUIESCED = 0x01
+};
+
+struct iwch_mpa_attributes {
+ u8 initiator;
+ u8 recv_marker_enabled;
+ u8 xmit_marker_enabled; /* iWARP: enable inbound Read Resp. */
+ u8 crc_enabled;
+ u8 version; /* 0 or 1 */
+};
+
+struct iwch_qp_attributes {
+ u32 scq;
+ u32 rcq;
+ u32 sq_num_entries;
+ u32 rq_num_entries;
+ u32 sq_max_sges;
+ u32 sq_max_sges_rdma_write;
+ u32 rq_max_sges;
+ u32 state;
+ u8 enable_rdma_read;
+ u8 enable_rdma_write; /* enable inbound Read Resp. */
+ u8 enable_bind;
+ u8 enable_mmid0_fastreg; /* Enable STAG0 + Fast-register */
+ /*
+ * Next QP state. If specify the current state, only the
+ * QP attributes will be modified.
+ */
+ u32 max_ord;
+ u32 max_ird;
+ u32 pd; /* IN */
+ u32 next_state;
+ char terminate_buffer[52];
+ u32 terminate_msg_len;
+ u8 is_terminate_local;
+ struct iwch_mpa_attributes mpa_attr; /* IN-OUT */
+ struct iwch_ep *llp_stream_handle;
+ char *stream_msg_buf; /* Last stream msg. before Idle -> RTS */
+ u32 stream_msg_buf_len; /* Only on Idle -> RTS */
+};
+
+struct iwch_qp {
+ struct ib_qp ibqp;
+ struct iwch_dev *rhp;
+ struct iwch_ep *ep;
+ struct iwch_qp_attributes attr;
+ struct t3_wq wq;
+ spinlock_t lock;
+ atomic_t refcnt;
+ wait_queue_head_t wait;
+ enum IWCH_QP_FLAGS flags;
+ struct timer_list timer;
+};
+
+static inline int qp_quiesced(struct iwch_qp *qhp)
+{
+ return qhp->flags & QP_QUIESCED;
+}
+
+static inline struct iwch_qp *to_iwch_qp(struct ib_qp *ibqp)
+{
+ return container_of(ibqp, struct iwch_qp, ibqp);
+}
+
+void iwch_qp_add_ref(struct ib_qp *qp);
+void iwch_qp_rem_ref(struct ib_qp *qp);
+
+struct iwch_ucontext {
+ struct ib_ucontext ibucontext;
+ struct cxio_ucontext uctx;
+ u32 key;
+ spinlock_t mmap_lock;
+ struct list_head mmaps;
+};
+
+static inline struct iwch_ucontext *to_iwch_ucontext(struct ib_ucontext *c)
+{
+ return container_of(c, struct iwch_ucontext, ibucontext);
+}
+
+struct iwch_mm_entry {
+ struct list_head entry;
+ u64 addr;
+ u32 key;
+ unsigned len;
+};
+
+static inline struct iwch_mm_entry *remove_mmap(struct iwch_ucontext *ucontext,
+ u32 key, unsigned len)
+{
+ struct list_head *pos, *nxt;
+ struct iwch_mm_entry *mm;
+
+ spin_lock(&ucontext->mmap_lock);
+ list_for_each_safe(pos, nxt, &ucontext->mmaps) {
+
+ mm = list_entry(pos, struct iwch_mm_entry, entry);
+ if (mm->key == key && mm->len == len) {
+ list_del_init(&mm->entry);
+ spin_unlock(&ucontext->mmap_lock);
+ PDBG("%s key 0x%x addr 0x%llx len %d\n", __func__,
+ key, (unsigned long long) mm->addr, mm->len);
+ return mm;
+ }
+ }
+ spin_unlock(&ucontext->mmap_lock);
+ return NULL;
+}
+
+static inline void insert_mmap(struct iwch_ucontext *ucontext,
+ struct iwch_mm_entry *mm)
+{
+ spin_lock(&ucontext->mmap_lock);
+ PDBG("%s key 0x%x addr 0x%llx len %d\n", __func__,
+ mm->key, (unsigned long long) mm->addr, mm->len);
+ list_add_tail(&mm->entry, &ucontext->mmaps);
+ spin_unlock(&ucontext->mmap_lock);
+}
+
+enum iwch_qp_attr_mask {
+ IWCH_QP_ATTR_NEXT_STATE = 1 << 0,
+ IWCH_QP_ATTR_ENABLE_RDMA_READ = 1 << 7,
+ IWCH_QP_ATTR_ENABLE_RDMA_WRITE = 1 << 8,
+ IWCH_QP_ATTR_ENABLE_RDMA_BIND = 1 << 9,
+ IWCH_QP_ATTR_MAX_ORD = 1 << 11,
+ IWCH_QP_ATTR_MAX_IRD = 1 << 12,
+ IWCH_QP_ATTR_LLP_STREAM_HANDLE = 1 << 22,
+ IWCH_QP_ATTR_STREAM_MSG_BUFFER = 1 << 23,
+ IWCH_QP_ATTR_MPA_ATTR = 1 << 24,
+ IWCH_QP_ATTR_QP_CONTEXT_ACTIVATE = 1 << 25,
+ IWCH_QP_ATTR_VALID_MODIFY = (IWCH_QP_ATTR_ENABLE_RDMA_READ |
+ IWCH_QP_ATTR_ENABLE_RDMA_WRITE |
+ IWCH_QP_ATTR_MAX_ORD |
+ IWCH_QP_ATTR_MAX_IRD |
+ IWCH_QP_ATTR_LLP_STREAM_HANDLE |
+ IWCH_QP_ATTR_STREAM_MSG_BUFFER |
+ IWCH_QP_ATTR_MPA_ATTR |
+ IWCH_QP_ATTR_QP_CONTEXT_ACTIVATE)
+};
+
+int iwch_modify_qp(struct iwch_dev *rhp,
+ struct iwch_qp *qhp,
+ enum iwch_qp_attr_mask mask,
+ struct iwch_qp_attributes *attrs,
+ int internal);
+
+enum iwch_qp_state {
+ IWCH_QP_STATE_IDLE,
+ IWCH_QP_STATE_RTS,
+ IWCH_QP_STATE_ERROR,
+ IWCH_QP_STATE_TERMINATE,
+ IWCH_QP_STATE_CLOSING,
+ IWCH_QP_STATE_TOT
+};
+
+static inline int iwch_convert_state(enum ib_qp_state ib_state)
+{
+ switch (ib_state) {
+ case IB_QPS_RESET:
+ case IB_QPS_INIT:
+ return IWCH_QP_STATE_IDLE;
+ case IB_QPS_RTS:
+ return IWCH_QP_STATE_RTS;
+ case IB_QPS_SQD:
+ return IWCH_QP_STATE_CLOSING;
+ case IB_QPS_SQE:
+ return IWCH_QP_STATE_TERMINATE;
+ case IB_QPS_ERR:
+ return IWCH_QP_STATE_ERROR;
+ default:
+ return -1;
+ }
+}
+
+static inline u32 iwch_ib_to_tpt_access(int acc)
+{
+ return (acc & IB_ACCESS_REMOTE_WRITE ? TPT_REMOTE_WRITE : 0) |
+ (acc & IB_ACCESS_REMOTE_READ ? TPT_REMOTE_READ : 0) |
+ (acc & IB_ACCESS_LOCAL_WRITE ? TPT_LOCAL_WRITE : 0) |
+ (acc & IB_ACCESS_MW_BIND ? TPT_MW_BIND : 0) |
+ TPT_LOCAL_READ;
+}
+
+static inline u32 iwch_ib_to_tpt_bind_access(int acc)
+{
+ return (acc & IB_ACCESS_REMOTE_WRITE ? TPT_REMOTE_WRITE : 0) |
+ (acc & IB_ACCESS_REMOTE_READ ? TPT_REMOTE_READ : 0);
+}
+
+enum iwch_mmid_state {
+ IWCH_STAG_STATE_VALID,
+ IWCH_STAG_STATE_INVALID
+};
+
+enum iwch_qp_query_flags {
+ IWCH_QP_QUERY_CONTEXT_NONE = 0x0, /* No ctx; Only attrs */
+ IWCH_QP_QUERY_CONTEXT_GET = 0x1, /* Get ctx + attrs */
+ IWCH_QP_QUERY_CONTEXT_SUSPEND = 0x2, /* Not Supported */
+
+ /*
+ * Quiesce QP context; Consumer
+ * will NOT replay outstanding WR
+ */
+ IWCH_QP_QUERY_CONTEXT_QUIESCE = 0x4,
+ IWCH_QP_QUERY_CONTEXT_REMOVE = 0x8,
+ IWCH_QP_QUERY_TEST_USERWRITE = 0x32 /* Test special */
+};
+
+u16 iwch_rqes_posted(struct iwch_qp *qhp);
+int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
+ struct ib_send_wr **bad_wr);
+int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
+ struct ib_recv_wr **bad_wr);
+int iwch_bind_mw(struct ib_qp *qp,
+ struct ib_mw *mw,
+ struct ib_mw_bind *mw_bind);
+int iwch_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
+int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg);
+int iwch_post_zb_read(struct iwch_qp *qhp);
+int iwch_register_device(struct iwch_dev *dev);
+void iwch_unregister_device(struct iwch_dev *dev);
+int iwch_quiesce_qps(struct iwch_cq *chp);
+int iwch_resume_qps(struct iwch_cq *chp);
+void stop_read_rep_timer(struct iwch_qp *qhp);
+int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php,
+ struct iwch_mr *mhp, int shift);
+int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php,
+ struct iwch_mr *mhp,
+ int shift,
+ int npages);
+int iwch_alloc_pbl(struct iwch_mr *mhp, int npages);
+void iwch_free_pbl(struct iwch_mr *mhp);
+int iwch_write_pbl(struct iwch_mr *mhp, __be64 *pages, int npages, int offset);
+int build_phys_page_list(struct ib_phys_buf *buffer_list,
+ int num_phys_buf,
+ u64 *iova_start,
+ u64 *total_size,
+ int *npages,
+ int *shift,
+ __be64 **page_list);
+
+
+#define IWCH_NODE_DESC "cxgb3 Chelsio Communications"
+
+#endif
diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c
new file mode 100644
index 0000000..19661b2
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c
@@ -0,0 +1,1186 @@
+/*
+ * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "iwch_provider.h"
+#include "iwch.h"
+#include "iwch_cm.h"
+#include "cxio_hal.h"
+#include "cxio_resource.h"
+
+#define NO_SUPPORT -1
+
+static int build_rdma_send(union t3_wr *wqe, struct ib_send_wr *wr,
+ u8 * flit_cnt)
+{
+ int i;
+ u32 plen;
+
+ switch (wr->opcode) {
+ case IB_WR_SEND:
+ if (wr->send_flags & IB_SEND_SOLICITED)
+ wqe->send.rdmaop = T3_SEND_WITH_SE;
+ else
+ wqe->send.rdmaop = T3_SEND;
+ wqe->send.rem_stag = 0;
+ break;
+ case IB_WR_SEND_WITH_INV:
+ if (wr->send_flags & IB_SEND_SOLICITED)
+ wqe->send.rdmaop = T3_SEND_WITH_SE_INV;
+ else
+ wqe->send.rdmaop = T3_SEND_WITH_INV;
+ wqe->send.rem_stag = cpu_to_be32(wr->ex.invalidate_rkey);
+ break;
+ default:
+ return -EINVAL;
+ }
+ if (wr->num_sge > T3_MAX_SGE)
+ return -EINVAL;
+ wqe->send.reserved[0] = 0;
+ wqe->send.reserved[1] = 0;
+ wqe->send.reserved[2] = 0;
+ plen = 0;
+ for (i = 0; i < wr->num_sge; i++) {
+ if ((plen + wr->sg_list[i].length) < plen)
+ return -EMSGSIZE;
+
+ plen += wr->sg_list[i].length;
+ wqe->send.sgl[i].stag = cpu_to_be32(wr->sg_list[i].lkey);
+ wqe->send.sgl[i].len = cpu_to_be32(wr->sg_list[i].length);
+ wqe->send.sgl[i].to = cpu_to_be64(wr->sg_list[i].addr);
+ }
+ wqe->send.num_sgle = cpu_to_be32(wr->num_sge);
+ *flit_cnt = 4 + ((wr->num_sge) << 1);
+ wqe->send.plen = cpu_to_be32(plen);
+ return 0;
+}
+
+static int build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr,
+ u8 *flit_cnt)
+{
+ int i;
+ u32 plen;
+ if (wr->num_sge > T3_MAX_SGE)
+ return -EINVAL;
+ wqe->write.rdmaop = T3_RDMA_WRITE;
+ wqe->write.reserved[0] = 0;
+ wqe->write.reserved[1] = 0;
+ wqe->write.reserved[2] = 0;
+ wqe->write.stag_sink = cpu_to_be32(wr->wr.rdma.rkey);
+ wqe->write.to_sink = cpu_to_be64(wr->wr.rdma.remote_addr);
+
+ if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) {
+ plen = 4;
+ wqe->write.sgl[0].stag = wr->ex.imm_data;
+ wqe->write.sgl[0].len = __constant_cpu_to_be32(0);
+ wqe->write.num_sgle = __constant_cpu_to_be32(0);
+ *flit_cnt = 6;
+ } else {
+ plen = 0;
+ for (i = 0; i < wr->num_sge; i++) {
+ if ((plen + wr->sg_list[i].length) < plen) {
+ return -EMSGSIZE;
+ }
+ plen += wr->sg_list[i].length;
+ wqe->write.sgl[i].stag =
+ cpu_to_be32(wr->sg_list[i].lkey);
+ wqe->write.sgl[i].len =
+ cpu_to_be32(wr->sg_list[i].length);
+ wqe->write.sgl[i].to =
+ cpu_to_be64(wr->sg_list[i].addr);
+ }
+ wqe->write.num_sgle = cpu_to_be32(wr->num_sge);
+ *flit_cnt = 5 + ((wr->num_sge) << 1);
+ }
+ wqe->write.plen = cpu_to_be32(plen);
+ return 0;
+}
+
+static int build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr,
+ u8 *flit_cnt)
+{
+ if (wr->num_sge > 1)
+ return -EINVAL;
+ wqe->read.rdmaop = T3_READ_REQ;
+ if (wr->opcode == IB_WR_RDMA_READ_WITH_INV)
+ wqe->read.local_inv = 1;
+ else
+ wqe->read.local_inv = 0;
+ wqe->read.reserved[0] = 0;
+ wqe->read.reserved[1] = 0;
+ wqe->read.rem_stag = cpu_to_be32(wr->wr.rdma.rkey);
+ wqe->read.rem_to = cpu_to_be64(wr->wr.rdma.remote_addr);
+ wqe->read.local_stag = cpu_to_be32(wr->sg_list[0].lkey);
+ wqe->read.local_len = cpu_to_be32(wr->sg_list[0].length);
+ wqe->read.local_to = cpu_to_be64(wr->sg_list[0].addr);
+ *flit_cnt = sizeof(struct t3_rdma_read_wr) >> 3;
+ return 0;
+}
+
+static int build_fastreg(union t3_wr *wqe, struct ib_send_wr *wr,
+ u8 *flit_cnt, int *wr_cnt, struct t3_wq *wq)
+{
+ int i;
+ __be64 *p;
+
+ if (wr->wr.fast_reg.page_list_len > T3_MAX_FASTREG_DEPTH)
+ return -EINVAL;
+ *wr_cnt = 1;
+ wqe->fastreg.stag = cpu_to_be32(wr->wr.fast_reg.rkey);
+ wqe->fastreg.len = cpu_to_be32(wr->wr.fast_reg.length);
+ wqe->fastreg.va_base_hi = cpu_to_be32(wr->wr.fast_reg.iova_start >> 32);
+ wqe->fastreg.va_base_lo_fbo =
+ cpu_to_be32(wr->wr.fast_reg.iova_start & 0xffffffff);
+ wqe->fastreg.page_type_perms = cpu_to_be32(
+ V_FR_PAGE_COUNT(wr->wr.fast_reg.page_list_len) |
+ V_FR_PAGE_SIZE(wr->wr.fast_reg.page_shift-12) |
+ V_FR_TYPE(TPT_VATO) |
+ V_FR_PERMS(iwch_ib_to_tpt_access(wr->wr.fast_reg.access_flags)));
+ p = &wqe->fastreg.pbl_addrs[0];
+ for (i = 0; i < wr->wr.fast_reg.page_list_len; i++, p++) {
+
+ /* If we need a 2nd WR, then set it up */
+ if (i == T3_MAX_FASTREG_FRAG) {
+ *wr_cnt = 2;
+ wqe = (union t3_wr *)(wq->queue +
+ Q_PTR2IDX((wq->wptr+1), wq->size_log2));
+ build_fw_riwrh((void *)wqe, T3_WR_FASTREG, 0,
+ Q_GENBIT(wq->wptr + 1, wq->size_log2),
+ 0, 1 + wr->wr.fast_reg.page_list_len - T3_MAX_FASTREG_FRAG,
+ T3_EOP);
+
+ p = &wqe->pbl_frag.pbl_addrs[0];
+ }
+ *p = cpu_to_be64((u64)wr->wr.fast_reg.page_list->page_list[i]);
+ }
+ *flit_cnt = 5 + wr->wr.fast_reg.page_list_len;
+ if (*flit_cnt > 15)
+ *flit_cnt = 15;
+ return 0;
+}
+
+static int build_inv_stag(union t3_wr *wqe, struct ib_send_wr *wr,
+ u8 *flit_cnt)
+{
+ wqe->local_inv.stag = cpu_to_be32(wr->ex.invalidate_rkey);
+ wqe->local_inv.reserved = 0;
+ *flit_cnt = sizeof(struct t3_local_inv_wr) >> 3;
+ return 0;
+}
+
+/*
+ * TBD: this is going to be moved to firmware. Missing pdid/qpid check for now.
+ */
+static int iwch_sgl2pbl_map(struct iwch_dev *rhp, struct ib_sge *sg_list,
+ u32 num_sgle, u32 * pbl_addr, u8 * page_size)
+{
+ int i;
+ struct iwch_mr *mhp;
+ u32 offset;
+ for (i = 0; i < num_sgle; i++) {
+
+ mhp = get_mhp(rhp, (sg_list[i].lkey) >> 8);
+ if (!mhp) {
+ PDBG("%s %d\n", __func__, __LINE__);
+ return -EIO;
+ }
+ if (!mhp->attr.state) {
+ PDBG("%s %d\n", __func__, __LINE__);
+ return -EIO;
+ }
+ if (mhp->attr.zbva) {
+ PDBG("%s %d\n", __func__, __LINE__);
+ return -EIO;
+ }
+
+ if (sg_list[i].addr < mhp->attr.va_fbo) {
+ PDBG("%s %d\n", __func__, __LINE__);
+ return -EINVAL;
+ }
+ if (sg_list[i].addr + ((u64) sg_list[i].length) <
+ sg_list[i].addr) {
+ PDBG("%s %d\n", __func__, __LINE__);
+ return -EINVAL;
+ }
+ if (sg_list[i].addr + ((u64) sg_list[i].length) >
+ mhp->attr.va_fbo + ((u64) mhp->attr.len)) {
+ PDBG("%s %d\n", __func__, __LINE__);
+ return -EINVAL;
+ }
+ offset = sg_list[i].addr - mhp->attr.va_fbo;
+ offset += ((u32) mhp->attr.va_fbo) %
+ (1UL << (12 + mhp->attr.page_size));
+ pbl_addr[i] = ((mhp->attr.pbl_addr -
+ rhp->rdev.rnic_info.pbl_base) >> 3) +
+ (offset >> (12 + mhp->attr.page_size));
+ page_size[i] = mhp->attr.page_size;
+ }
+ return 0;
+}
+
+static int build_rdma_recv(struct iwch_qp *qhp, union t3_wr *wqe,
+ struct ib_recv_wr *wr)
+{
+ int i, err = 0;
+ u32 pbl_addr[T3_MAX_SGE];
+ u8 page_size[T3_MAX_SGE];
+
+ err = iwch_sgl2pbl_map(qhp->rhp, wr->sg_list, wr->num_sge, pbl_addr,
+ page_size);
+ if (err)
+ return err;
+ wqe->recv.pagesz[0] = page_size[0];
+ wqe->recv.pagesz[1] = page_size[1];
+ wqe->recv.pagesz[2] = page_size[2];
+ wqe->recv.pagesz[3] = page_size[3];
+ wqe->recv.num_sgle = cpu_to_be32(wr->num_sge);
+ for (i = 0; i < wr->num_sge; i++) {
+ wqe->recv.sgl[i].stag = cpu_to_be32(wr->sg_list[i].lkey);
+ wqe->recv.sgl[i].len = cpu_to_be32(wr->sg_list[i].length);
+
+ /* to in the WQE == the offset into the page */
+ wqe->recv.sgl[i].to = cpu_to_be64(((u32) wr->sg_list[i].addr) %
+ (1UL << (12 + page_size[i])));
+
+ /* pbl_addr is the adapters address in the PBL */
+ wqe->recv.pbl_addr[i] = cpu_to_be32(pbl_addr[i]);
+ }
+ for (; i < T3_MAX_SGE; i++) {
+ wqe->recv.sgl[i].stag = 0;
+ wqe->recv.sgl[i].len = 0;
+ wqe->recv.sgl[i].to = 0;
+ wqe->recv.pbl_addr[i] = 0;
+ }
+ qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr,
+ qhp->wq.rq_size_log2)].wr_id = wr->wr_id;
+ qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr,
+ qhp->wq.rq_size_log2)].pbl_addr = 0;
+ return 0;
+}
+
+static int build_zero_stag_recv(struct iwch_qp *qhp, union t3_wr *wqe,
+ struct ib_recv_wr *wr)
+{
+ int i;
+ u32 pbl_addr;
+ u32 pbl_offset;
+
+
+ /*
+ * The T3 HW requires the PBL in the HW recv descriptor to reference
+ * a PBL entry. So we allocate the max needed PBL memory here and pass
+ * it to the uP in the recv WR. The uP will build the PBL and setup
+ * the HW recv descriptor.
+ */
+ pbl_addr = cxio_hal_pblpool_alloc(&qhp->rhp->rdev, T3_STAG0_PBL_SIZE);
+ if (!pbl_addr)
+ return -ENOMEM;
+
+ /*
+ * Compute the 8B aligned offset.
+ */
+ pbl_offset = (pbl_addr - qhp->rhp->rdev.rnic_info.pbl_base) >> 3;
+
+ wqe->recv.num_sgle = cpu_to_be32(wr->num_sge);
+
+ for (i = 0; i < wr->num_sge; i++) {
+
+ /*
+ * Use a 128MB page size. This and an imposed 128MB
+ * sge length limit allows us to require only a 2-entry HW
+ * PBL for each SGE. This restriction is acceptable since
+ * since it is not possible to allocate 128MB of contiguous
+ * DMA coherent memory!
+ */
+ if (wr->sg_list[i].length > T3_STAG0_MAX_PBE_LEN)
+ return -EINVAL;
+ wqe->recv.pagesz[i] = T3_STAG0_PAGE_SHIFT;
+
+ /*
+ * T3 restricts a recv to all zero-stag or all non-zero-stag.
+ */
+ if (wr->sg_list[i].lkey != 0)
+ return -EINVAL;
+ wqe->recv.sgl[i].stag = 0;
+ wqe->recv.sgl[i].len = cpu_to_be32(wr->sg_list[i].length);
+ wqe->recv.sgl[i].to = cpu_to_be64(wr->sg_list[i].addr);
+ wqe->recv.pbl_addr[i] = cpu_to_be32(pbl_offset);
+ pbl_offset += 2;
+ }
+ for (; i < T3_MAX_SGE; i++) {
+ wqe->recv.pagesz[i] = 0;
+ wqe->recv.sgl[i].stag = 0;
+ wqe->recv.sgl[i].len = 0;
+ wqe->recv.sgl[i].to = 0;
+ wqe->recv.pbl_addr[i] = 0;
+ }
+ qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr,
+ qhp->wq.rq_size_log2)].wr_id = wr->wr_id;
+ qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr,
+ qhp->wq.rq_size_log2)].pbl_addr = pbl_addr;
+ return 0;
+}
+
+int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
+ struct ib_send_wr **bad_wr)
+{
+ int err = 0;
+ u8 uninitialized_var(t3_wr_flit_cnt);
+ enum t3_wr_opcode t3_wr_opcode = 0;
+ enum t3_wr_flags t3_wr_flags;
+ struct iwch_qp *qhp;
+ u32 idx;
+ union t3_wr *wqe;
+ u32 num_wrs;
+ unsigned long flag;
+ struct t3_swsq *sqp;
+ int wr_cnt = 1;
+
+ qhp = to_iwch_qp(ibqp);
+ spin_lock_irqsave(&qhp->lock, flag);
+ if (qhp->attr.state > IWCH_QP_STATE_RTS) {
+ spin_unlock_irqrestore(&qhp->lock, flag);
+ return -EINVAL;
+ }
+ num_wrs = Q_FREECNT(qhp->wq.sq_rptr, qhp->wq.sq_wptr,
+ qhp->wq.sq_size_log2);
+ if (num_wrs <= 0) {
+ spin_unlock_irqrestore(&qhp->lock, flag);
+ return -ENOMEM;
+ }
+ while (wr) {
+ if (num_wrs == 0) {
+ err = -ENOMEM;
+ *bad_wr = wr;
+ break;
+ }
+ idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
+ wqe = (union t3_wr *) (qhp->wq.queue + idx);
+ t3_wr_flags = 0;
+ if (wr->send_flags & IB_SEND_SOLICITED)
+ t3_wr_flags |= T3_SOLICITED_EVENT_FLAG;
+ if (wr->send_flags & IB_SEND_SIGNALED)
+ t3_wr_flags |= T3_COMPLETION_FLAG;
+ sqp = qhp->wq.sq +
+ Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2);
+ switch (wr->opcode) {
+ case IB_WR_SEND:
+ case IB_WR_SEND_WITH_INV:
+ if (wr->send_flags & IB_SEND_FENCE)
+ t3_wr_flags |= T3_READ_FENCE_FLAG;
+ t3_wr_opcode = T3_WR_SEND;
+ err = build_rdma_send(wqe, wr, &t3_wr_flit_cnt);
+ break;
+ case IB_WR_RDMA_WRITE:
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ t3_wr_opcode = T3_WR_WRITE;
+ err = build_rdma_write(wqe, wr, &t3_wr_flit_cnt);
+ break;
+ case IB_WR_RDMA_READ:
+ case IB_WR_RDMA_READ_WITH_INV:
+ t3_wr_opcode = T3_WR_READ;
+ t3_wr_flags = 0; /* T3 reads are always signaled */
+ err = build_rdma_read(wqe, wr, &t3_wr_flit_cnt);
+ if (err)
+ break;
+ sqp->read_len = wqe->read.local_len;
+ if (!qhp->wq.oldest_read)
+ qhp->wq.oldest_read = sqp;
+ break;
+ case IB_WR_FAST_REG_MR:
+ t3_wr_opcode = T3_WR_FASTREG;
+ err = build_fastreg(wqe, wr, &t3_wr_flit_cnt,
+ &wr_cnt, &qhp->wq);
+ break;
+ case IB_WR_LOCAL_INV:
+ if (wr->send_flags & IB_SEND_FENCE)
+ t3_wr_flags |= T3_LOCAL_FENCE_FLAG;
+ t3_wr_opcode = T3_WR_INV_STAG;
+ err = build_inv_stag(wqe, wr, &t3_wr_flit_cnt);
+ break;
+ default:
+ PDBG("%s post of type=%d TBD!\n", __func__,
+ wr->opcode);
+ err = -EINVAL;
+ }
+ if (err) {
+ *bad_wr = wr;
+ break;
+ }
+ wqe->send.wrid.id0.hi = qhp->wq.sq_wptr;
+ sqp->wr_id = wr->wr_id;
+ sqp->opcode = wr2opcode(t3_wr_opcode);
+ sqp->sq_wptr = qhp->wq.sq_wptr;
+ sqp->complete = 0;
+ sqp->signaled = (wr->send_flags & IB_SEND_SIGNALED);
+
+ build_fw_riwrh((void *) wqe, t3_wr_opcode, t3_wr_flags,
+ Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2),
+ 0, t3_wr_flit_cnt,
+ (wr_cnt == 1) ? T3_SOPEOP : T3_SOP);
+ PDBG("%s cookie 0x%llx wq idx 0x%x swsq idx %ld opcode %d\n",
+ __func__, (unsigned long long) wr->wr_id, idx,
+ Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2),
+ sqp->opcode);
+ wr = wr->next;
+ num_wrs--;
+ qhp->wq.wptr += wr_cnt;
+ ++(qhp->wq.sq_wptr);
+ }
+ spin_unlock_irqrestore(&qhp->lock, flag);
+ ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
+ return err;
+}
+
+int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
+ struct ib_recv_wr **bad_wr)
+{
+ int err = 0;
+ struct iwch_qp *qhp;
+ u32 idx;
+ union t3_wr *wqe;
+ u32 num_wrs;
+ unsigned long flag;
+
+ qhp = to_iwch_qp(ibqp);
+ spin_lock_irqsave(&qhp->lock, flag);
+ if (qhp->attr.state > IWCH_QP_STATE_RTS) {
+ spin_unlock_irqrestore(&qhp->lock, flag);
+ return -EINVAL;
+ }
+ num_wrs = Q_FREECNT(qhp->wq.rq_rptr, qhp->wq.rq_wptr,
+ qhp->wq.rq_size_log2) - 1;
+ if (!wr) {
+ spin_unlock_irqrestore(&qhp->lock, flag);
+ return -EINVAL;
+ }
+ while (wr) {
+ if (wr->num_sge > T3_MAX_SGE) {
+ err = -EINVAL;
+ *bad_wr = wr;
+ break;
+ }
+ idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
+ wqe = (union t3_wr *) (qhp->wq.queue + idx);
+ if (num_wrs)
+ if (wr->sg_list[0].lkey)
+ err = build_rdma_recv(qhp, wqe, wr);
+ else
+ err = build_zero_stag_recv(qhp, wqe, wr);
+ else
+ err = -ENOMEM;
+ if (err) {
+ *bad_wr = wr;
+ break;
+ }
+ build_fw_riwrh((void *) wqe, T3_WR_RCV, T3_COMPLETION_FLAG,
+ Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2),
+ 0, sizeof(struct t3_receive_wr) >> 3, T3_SOPEOP);
+ PDBG("%s cookie 0x%llx idx 0x%x rq_wptr 0x%x rw_rptr 0x%x "
+ "wqe %p \n", __func__, (unsigned long long) wr->wr_id,
+ idx, qhp->wq.rq_wptr, qhp->wq.rq_rptr, wqe);
+ ++(qhp->wq.rq_wptr);
+ ++(qhp->wq.wptr);
+ wr = wr->next;
+ num_wrs--;
+ }
+ spin_unlock_irqrestore(&qhp->lock, flag);
+ ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
+ return err;
+}
+
+int iwch_bind_mw(struct ib_qp *qp,
+ struct ib_mw *mw,
+ struct ib_mw_bind *mw_bind)
+{
+ struct iwch_dev *rhp;
+ struct iwch_mw *mhp;
+ struct iwch_qp *qhp;
+ union t3_wr *wqe;
+ u32 pbl_addr;
+ u8 page_size;
+ u32 num_wrs;
+ unsigned long flag;
+ struct ib_sge sgl;
+ int err=0;
+ enum t3_wr_flags t3_wr_flags;
+ u32 idx;
+ struct t3_swsq *sqp;
+
+ qhp = to_iwch_qp(qp);
+ mhp = to_iwch_mw(mw);
+ rhp = qhp->rhp;
+
+ spin_lock_irqsave(&qhp->lock, flag);
+ if (qhp->attr.state > IWCH_QP_STATE_RTS) {
+ spin_unlock_irqrestore(&qhp->lock, flag);
+ return -EINVAL;
+ }
+ num_wrs = Q_FREECNT(qhp->wq.sq_rptr, qhp->wq.sq_wptr,
+ qhp->wq.sq_size_log2);
+ if ((num_wrs) <= 0) {
+ spin_unlock_irqrestore(&qhp->lock, flag);
+ return -ENOMEM;
+ }
+ idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
+ PDBG("%s: idx 0x%0x, mw 0x%p, mw_bind 0x%p\n", __func__, idx,
+ mw, mw_bind);
+ wqe = (union t3_wr *) (qhp->wq.queue + idx);
+
+ t3_wr_flags = 0;
+ if (mw_bind->send_flags & IB_SEND_SIGNALED)
+ t3_wr_flags = T3_COMPLETION_FLAG;
+
+ sgl.addr = mw_bind->addr;
+ sgl.lkey = mw_bind->mr->lkey;
+ sgl.length = mw_bind->length;
+ wqe->bind.reserved = 0;
+ wqe->bind.type = TPT_VATO;
+
+ /* TBD: check perms */
+ wqe->bind.perms = iwch_ib_to_tpt_bind_access(mw_bind->mw_access_flags);
+ wqe->bind.mr_stag = cpu_to_be32(mw_bind->mr->lkey);
+ wqe->bind.mw_stag = cpu_to_be32(mw->rkey);
+ wqe->bind.mw_len = cpu_to_be32(mw_bind->length);
+ wqe->bind.mw_va = cpu_to_be64(mw_bind->addr);
+ err = iwch_sgl2pbl_map(rhp, &sgl, 1, &pbl_addr, &page_size);
+ if (err) {
+ spin_unlock_irqrestore(&qhp->lock, flag);
+ return err;
+ }
+ wqe->send.wrid.id0.hi = qhp->wq.sq_wptr;
+ sqp = qhp->wq.sq + Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2);
+ sqp->wr_id = mw_bind->wr_id;
+ sqp->opcode = T3_BIND_MW;
+ sqp->sq_wptr = qhp->wq.sq_wptr;
+ sqp->complete = 0;
+ sqp->signaled = (mw_bind->send_flags & IB_SEND_SIGNALED);
+ wqe->bind.mr_pbl_addr = cpu_to_be32(pbl_addr);
+ wqe->bind.mr_pagesz = page_size;
+ build_fw_riwrh((void *)wqe, T3_WR_BIND, t3_wr_flags,
+ Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2), 0,
+ sizeof(struct t3_bind_mw_wr) >> 3, T3_SOPEOP);
+ ++(qhp->wq.wptr);
+ ++(qhp->wq.sq_wptr);
+ spin_unlock_irqrestore(&qhp->lock, flag);
+
+ ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
+
+ return err;
+}
+
+static inline void build_term_codes(struct respQ_msg_t *rsp_msg,
+ u8 *layer_type, u8 *ecode)
+{
+ int status = TPT_ERR_INTERNAL_ERR;
+ int tagged = 0;
+ int opcode = -1;
+ int rqtype = 0;
+ int send_inv = 0;
+
+ if (rsp_msg) {
+ status = CQE_STATUS(rsp_msg->cqe);
+ opcode = CQE_OPCODE(rsp_msg->cqe);
+ rqtype = RQ_TYPE(rsp_msg->cqe);
+ send_inv = (opcode == T3_SEND_WITH_INV) ||
+ (opcode == T3_SEND_WITH_SE_INV);
+ tagged = (opcode == T3_RDMA_WRITE) ||
+ (rqtype && (opcode == T3_READ_RESP));
+ }
+
+ switch (status) {
+ case TPT_ERR_STAG:
+ if (send_inv) {
+ *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
+ *ecode = RDMAP_CANT_INV_STAG;
+ } else {
+ *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
+ *ecode = RDMAP_INV_STAG;
+ }
+ break;
+ case TPT_ERR_PDID:
+ *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
+ if ((opcode == T3_SEND_WITH_INV) ||
+ (opcode == T3_SEND_WITH_SE_INV))
+ *ecode = RDMAP_CANT_INV_STAG;
+ else
+ *ecode = RDMAP_STAG_NOT_ASSOC;
+ break;
+ case TPT_ERR_QPID:
+ *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
+ *ecode = RDMAP_STAG_NOT_ASSOC;
+ break;
+ case TPT_ERR_ACCESS:
+ *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
+ *ecode = RDMAP_ACC_VIOL;
+ break;
+ case TPT_ERR_WRAP:
+ *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
+ *ecode = RDMAP_TO_WRAP;
+ break;
+ case TPT_ERR_BOUND:
+ if (tagged) {
+ *layer_type = LAYER_DDP|DDP_TAGGED_ERR;
+ *ecode = DDPT_BASE_BOUNDS;
+ } else {
+ *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
+ *ecode = RDMAP_BASE_BOUNDS;
+ }
+ break;
+ case TPT_ERR_INVALIDATE_SHARED_MR:
+ case TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND:
+ *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
+ *ecode = RDMAP_CANT_INV_STAG;
+ break;
+ case TPT_ERR_ECC:
+ case TPT_ERR_ECC_PSTAG:
+ case TPT_ERR_INTERNAL_ERR:
+ *layer_type = LAYER_RDMAP|RDMAP_LOCAL_CATA;
+ *ecode = 0;
+ break;
+ case TPT_ERR_OUT_OF_RQE:
+ *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
+ *ecode = DDPU_INV_MSN_NOBUF;
+ break;
+ case TPT_ERR_PBL_ADDR_BOUND:
+ *layer_type = LAYER_DDP|DDP_TAGGED_ERR;
+ *ecode = DDPT_BASE_BOUNDS;
+ break;
+ case TPT_ERR_CRC:
+ *layer_type = LAYER_MPA|DDP_LLP;
+ *ecode = MPA_CRC_ERR;
+ break;
+ case TPT_ERR_MARKER:
+ *layer_type = LAYER_MPA|DDP_LLP;
+ *ecode = MPA_MARKER_ERR;
+ break;
+ case TPT_ERR_PDU_LEN_ERR:
+ *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
+ *ecode = DDPU_MSG_TOOBIG;
+ break;
+ case TPT_ERR_DDP_VERSION:
+ if (tagged) {
+ *layer_type = LAYER_DDP|DDP_TAGGED_ERR;
+ *ecode = DDPT_INV_VERS;
+ } else {
+ *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
+ *ecode = DDPU_INV_VERS;
+ }
+ break;
+ case TPT_ERR_RDMA_VERSION:
+ *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
+ *ecode = RDMAP_INV_VERS;
+ break;
+ case TPT_ERR_OPCODE:
+ *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
+ *ecode = RDMAP_INV_OPCODE;
+ break;
+ case TPT_ERR_DDP_QUEUE_NUM:
+ *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
+ *ecode = DDPU_INV_QN;
+ break;
+ case TPT_ERR_MSN:
+ case TPT_ERR_MSN_GAP:
+ case TPT_ERR_MSN_RANGE:
+ case TPT_ERR_IRD_OVERFLOW:
+ *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
+ *ecode = DDPU_INV_MSN_RANGE;
+ break;
+ case TPT_ERR_TBIT:
+ *layer_type = LAYER_DDP|DDP_LOCAL_CATA;
+ *ecode = 0;
+ break;
+ case TPT_ERR_MO:
+ *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
+ *ecode = DDPU_INV_MO;
+ break;
+ default:
+ *layer_type = LAYER_RDMAP|DDP_LOCAL_CATA;
+ *ecode = 0;
+ break;
+ }
+}
+
+int iwch_post_zb_read(struct iwch_qp *qhp)
+{
+ union t3_wr *wqe;
+ struct sk_buff *skb;
+ u8 flit_cnt = sizeof(struct t3_rdma_read_wr) >> 3;
+
+ PDBG("%s enter\n", __func__);
+ skb = alloc_skb(40, GFP_KERNEL);
+ if (!skb) {
+ printk(KERN_ERR "%s cannot send zb_read!!\n", __func__);
+ return -ENOMEM;
+ }
+ wqe = (union t3_wr *)skb_put(skb, sizeof(struct t3_rdma_read_wr));
+ memset(wqe, 0, sizeof(struct t3_rdma_read_wr));
+ wqe->read.rdmaop = T3_READ_REQ;
+ wqe->read.reserved[0] = 0;
+ wqe->read.reserved[1] = 0;
+ wqe->read.rem_stag = cpu_to_be32(1);
+ wqe->read.rem_to = cpu_to_be64(1);
+ wqe->read.local_stag = cpu_to_be32(1);
+ wqe->read.local_len = cpu_to_be32(0);
+ wqe->read.local_to = cpu_to_be64(1);
+ wqe->send.wrh.op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(T3_WR_READ));
+ wqe->send.wrh.gen_tid_len = cpu_to_be32(V_FW_RIWR_TID(qhp->ep->hwtid)|
+ V_FW_RIWR_LEN(flit_cnt));
+ skb->priority = CPL_PRIORITY_DATA;
+ return cxgb3_ofld_send(qhp->rhp->rdev.t3cdev_p, skb);
+}
+
+/*
+ * This posts a TERMINATE with layer=RDMA, type=catastrophic.
+ */
+int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg)
+{
+ union t3_wr *wqe;
+ struct terminate_message *term;
+ struct sk_buff *skb;
+
+ PDBG("%s %d\n", __func__, __LINE__);
+ skb = alloc_skb(40, GFP_ATOMIC);
+ if (!skb) {
+ printk(KERN_ERR "%s cannot send TERMINATE!\n", __func__);
+ return -ENOMEM;
+ }
+ wqe = (union t3_wr *)skb_put(skb, 40);
+ memset(wqe, 0, 40);
+ wqe->send.rdmaop = T3_TERMINATE;
+
+ /* immediate data length */
+ wqe->send.plen = htonl(4);
+
+ /* immediate data starts here. */
+ term = (struct terminate_message *)wqe->send.sgl;
+ build_term_codes(rsp_msg, &term->layer_etype, &term->ecode);
+ wqe->send.wrh.op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(T3_WR_SEND) |
+ V_FW_RIWR_FLAGS(T3_COMPLETION_FLAG | T3_NOTIFY_FLAG));
+ wqe->send.wrh.gen_tid_len = cpu_to_be32(V_FW_RIWR_TID(qhp->ep->hwtid));
+ skb->priority = CPL_PRIORITY_DATA;
+ return cxgb3_ofld_send(qhp->rhp->rdev.t3cdev_p, skb);
+}
+
+/*
+ * Assumes qhp lock is held.
+ */
+static void __flush_qp(struct iwch_qp *qhp, unsigned long *flag)
+{
+ struct iwch_cq *rchp, *schp;
+ int count;
+ int flushed;
+
+ rchp = get_chp(qhp->rhp, qhp->attr.rcq);
+ schp = get_chp(qhp->rhp, qhp->attr.scq);
+
+ PDBG("%s qhp %p rchp %p schp %p\n", __func__, qhp, rchp, schp);
+ /* take a ref on the qhp since we must release the lock */
+ atomic_inc(&qhp->refcnt);
+ spin_unlock_irqrestore(&qhp->lock, *flag);
+
+ /* locking heirarchy: cq lock first, then qp lock. */
+ spin_lock_irqsave(&rchp->lock, *flag);
+ spin_lock(&qhp->lock);
+ cxio_flush_hw_cq(&rchp->cq);
+ cxio_count_rcqes(&rchp->cq, &qhp->wq, &count);
+ flushed = cxio_flush_rq(&qhp->wq, &rchp->cq, count);
+ spin_unlock(&qhp->lock);
+ spin_unlock_irqrestore(&rchp->lock, *flag);
+ if (flushed)
+ (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
+
+ /* locking heirarchy: cq lock first, then qp lock. */
+ spin_lock_irqsave(&schp->lock, *flag);
+ spin_lock(&qhp->lock);
+ cxio_flush_hw_cq(&schp->cq);
+ cxio_count_scqes(&schp->cq, &qhp->wq, &count);
+ flushed = cxio_flush_sq(&qhp->wq, &schp->cq, count);
+ spin_unlock(&qhp->lock);
+ spin_unlock_irqrestore(&schp->lock, *flag);
+ if (flushed)
+ (*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context);
+
+ /* deref */
+ if (atomic_dec_and_test(&qhp->refcnt))
+ wake_up(&qhp->wait);
+
+ spin_lock_irqsave(&qhp->lock, *flag);
+}
+
+static void flush_qp(struct iwch_qp *qhp, unsigned long *flag)
+{
+ if (qhp->ibqp.uobject)
+ cxio_set_wq_in_error(&qhp->wq);
+ else
+ __flush_qp(qhp, flag);
+}
+
+
+/*
+ * Return count of RECV WRs posted
+ */
+u16 iwch_rqes_posted(struct iwch_qp *qhp)
+{
+ union t3_wr *wqe = qhp->wq.queue;
+ u16 count = 0;
+ while ((count+1) != 0 && fw_riwrh_opcode((struct fw_riwrh *)wqe) == T3_WR_RCV) {
+ count++;
+ wqe++;
+ }
+ PDBG("%s qhp %p count %u\n", __func__, qhp, count);
+ return count;
+}
+
+static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp,
+ enum iwch_qp_attr_mask mask,
+ struct iwch_qp_attributes *attrs)
+{
+ struct t3_rdma_init_attr init_attr;
+ int ret;
+
+ init_attr.tid = qhp->ep->hwtid;
+ init_attr.qpid = qhp->wq.qpid;
+ init_attr.pdid = qhp->attr.pd;
+ init_attr.scqid = qhp->attr.scq;
+ init_attr.rcqid = qhp->attr.rcq;
+ init_attr.rq_addr = qhp->wq.rq_addr;
+ init_attr.rq_size = 1 << qhp->wq.rq_size_log2;
+ init_attr.mpaattrs = uP_RI_MPA_IETF_ENABLE |
+ qhp->attr.mpa_attr.recv_marker_enabled |
+ (qhp->attr.mpa_attr.xmit_marker_enabled << 1) |
+ (qhp->attr.mpa_attr.crc_enabled << 2);
+
+ init_attr.qpcaps = uP_RI_QP_RDMA_READ_ENABLE |
+ uP_RI_QP_RDMA_WRITE_ENABLE |
+ uP_RI_QP_BIND_ENABLE;
+ if (!qhp->ibqp.uobject)
+ init_attr.qpcaps |= uP_RI_QP_STAG0_ENABLE |
+ uP_RI_QP_FAST_REGISTER_ENABLE;
+
+ init_attr.tcp_emss = qhp->ep->emss;
+ init_attr.ord = qhp->attr.max_ord;
+ init_attr.ird = qhp->attr.max_ird;
+ init_attr.qp_dma_addr = qhp->wq.dma_addr;
+ init_attr.qp_dma_size = (1UL << qhp->wq.size_log2);
+ init_attr.rqe_count = iwch_rqes_posted(qhp);
+ init_attr.flags = qhp->attr.mpa_attr.initiator ? MPA_INITIATOR : 0;
+ if (peer2peer) {
+ init_attr.rtr_type = RTR_READ;
+ if (init_attr.ord == 0 && qhp->attr.mpa_attr.initiator)
+ init_attr.ord = 1;
+ if (init_attr.ird == 0 && !qhp->attr.mpa_attr.initiator)
+ init_attr.ird = 1;
+ } else
+ init_attr.rtr_type = 0;
+ init_attr.irs = qhp->ep->rcv_seq;
+ PDBG("%s init_attr.rq_addr 0x%x init_attr.rq_size = %d "
+ "flags 0x%x qpcaps 0x%x\n", __func__,
+ init_attr.rq_addr, init_attr.rq_size,
+ init_attr.flags, init_attr.qpcaps);
+ ret = cxio_rdma_init(&rhp->rdev, &init_attr);
+ PDBG("%s ret %d\n", __func__, ret);
+ return ret;
+}
+
+int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp,
+ enum iwch_qp_attr_mask mask,
+ struct iwch_qp_attributes *attrs,
+ int internal)
+{
+ int ret = 0;
+ struct iwch_qp_attributes newattr = qhp->attr;
+ unsigned long flag;
+ int disconnect = 0;
+ int terminate = 0;
+ int abort = 0;
+ int free = 0;
+ struct iwch_ep *ep = NULL;
+
+ PDBG("%s qhp %p qpid 0x%x ep %p state %d -> %d\n", __func__,
+ qhp, qhp->wq.qpid, qhp->ep, qhp->attr.state,
+ (mask & IWCH_QP_ATTR_NEXT_STATE) ? attrs->next_state : -1);
+
+ spin_lock_irqsave(&qhp->lock, flag);
+
+ /* Process attr changes if in IDLE */
+ if (mask & IWCH_QP_ATTR_VALID_MODIFY) {
+ if (qhp->attr.state != IWCH_QP_STATE_IDLE) {
+ ret = -EIO;
+ goto out;
+ }
+ if (mask & IWCH_QP_ATTR_ENABLE_RDMA_READ)
+ newattr.enable_rdma_read = attrs->enable_rdma_read;
+ if (mask & IWCH_QP_ATTR_ENABLE_RDMA_WRITE)
+ newattr.enable_rdma_write = attrs->enable_rdma_write;
+ if (mask & IWCH_QP_ATTR_ENABLE_RDMA_BIND)
+ newattr.enable_bind = attrs->enable_bind;
+ if (mask & IWCH_QP_ATTR_MAX_ORD) {
+ if (attrs->max_ord >
+ rhp->attr.max_rdma_read_qp_depth) {
+ ret = -EINVAL;
+ goto out;
+ }
+ newattr.max_ord = attrs->max_ord;
+ }
+ if (mask & IWCH_QP_ATTR_MAX_IRD) {
+ if (attrs->max_ird >
+ rhp->attr.max_rdma_reads_per_qp) {
+ ret = -EINVAL;
+ goto out;
+ }
+ newattr.max_ird = attrs->max_ird;
+ }
+ qhp->attr = newattr;
+ }
+
+ if (!(mask & IWCH_QP_ATTR_NEXT_STATE))
+ goto out;
+ if (qhp->attr.state == attrs->next_state)
+ goto out;
+
+ switch (qhp->attr.state) {
+ case IWCH_QP_STATE_IDLE:
+ switch (attrs->next_state) {
+ case IWCH_QP_STATE_RTS:
+ if (!(mask & IWCH_QP_ATTR_LLP_STREAM_HANDLE)) {
+ ret = -EINVAL;
+ goto out;
+ }
+ if (!(mask & IWCH_QP_ATTR_MPA_ATTR)) {
+ ret = -EINVAL;
+ goto out;
+ }
+ qhp->attr.mpa_attr = attrs->mpa_attr;
+ qhp->attr.llp_stream_handle = attrs->llp_stream_handle;
+ qhp->ep = qhp->attr.llp_stream_handle;
+ qhp->attr.state = IWCH_QP_STATE_RTS;
+
+ /*
+ * Ref the endpoint here and deref when we
+ * disassociate the endpoint from the QP. This
+ * happens in CLOSING->IDLE transition or *->ERROR
+ * transition.
+ */
+ get_ep(&qhp->ep->com);
+ spin_unlock_irqrestore(&qhp->lock, flag);
+ ret = rdma_init(rhp, qhp, mask, attrs);
+ spin_lock_irqsave(&qhp->lock, flag);
+ if (ret)
+ goto err;
+ break;
+ case IWCH_QP_STATE_ERROR:
+ qhp->attr.state = IWCH_QP_STATE_ERROR;
+ flush_qp(qhp, &flag);
+ break;
+ default:
+ ret = -EINVAL;
+ goto out;
+ }
+ break;
+ case IWCH_QP_STATE_RTS:
+ switch (attrs->next_state) {
+ case IWCH_QP_STATE_CLOSING:
+ BUG_ON(atomic_read(&qhp->ep->com.kref.refcount) < 2);
+ qhp->attr.state = IWCH_QP_STATE_CLOSING;
+ if (!internal) {
+ abort=0;
+ disconnect = 1;
+ ep = qhp->ep;
+ get_ep(&ep->com);
+ }
+ break;
+ case IWCH_QP_STATE_TERMINATE:
+ qhp->attr.state = IWCH_QP_STATE_TERMINATE;
+ if (qhp->ibqp.uobject)
+ cxio_set_wq_in_error(&qhp->wq);
+ if (!internal)
+ terminate = 1;
+ break;
+ case IWCH_QP_STATE_ERROR:
+ qhp->attr.state = IWCH_QP_STATE_ERROR;
+ if (!internal) {
+ abort=1;
+ disconnect = 1;
+ ep = qhp->ep;
+ get_ep(&ep->com);
+ }
+ goto err;
+ break;
+ default:
+ ret = -EINVAL;
+ goto out;
+ }
+ break;
+ case IWCH_QP_STATE_CLOSING:
+ if (!internal) {
+ ret = -EINVAL;
+ goto out;
+ }
+ switch (attrs->next_state) {
+ case IWCH_QP_STATE_IDLE:
+ flush_qp(qhp, &flag);
+ qhp->attr.state = IWCH_QP_STATE_IDLE;
+ qhp->attr.llp_stream_handle = NULL;
+ put_ep(&qhp->ep->com);
+ qhp->ep = NULL;
+ wake_up(&qhp->wait);
+ break;
+ case IWCH_QP_STATE_ERROR:
+ goto err;
+ default:
+ ret = -EINVAL;
+ goto err;
+ }
+ break;
+ case IWCH_QP_STATE_ERROR:
+ if (attrs->next_state != IWCH_QP_STATE_IDLE) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (!Q_EMPTY(qhp->wq.sq_rptr, qhp->wq.sq_wptr) ||
+ !Q_EMPTY(qhp->wq.rq_rptr, qhp->wq.rq_wptr)) {
+ ret = -EINVAL;
+ goto out;
+ }
+ qhp->attr.state = IWCH_QP_STATE_IDLE;
+ memset(&qhp->attr, 0, sizeof(qhp->attr));
+ break;
+ case IWCH_QP_STATE_TERMINATE:
+ if (!internal) {
+ ret = -EINVAL;
+ goto out;
+ }
+ goto err;
+ break;
+ default:
+ printk(KERN_ERR "%s in a bad state %d\n",
+ __func__, qhp->attr.state);
+ ret = -EINVAL;
+ goto err;
+ break;
+ }
+ goto out;
+err:
+ PDBG("%s disassociating ep %p qpid 0x%x\n", __func__, qhp->ep,
+ qhp->wq.qpid);
+
+ /* disassociate the LLP connection */
+ qhp->attr.llp_stream_handle = NULL;
+ ep = qhp->ep;
+ qhp->ep = NULL;
+ qhp->attr.state = IWCH_QP_STATE_ERROR;
+ free=1;
+ wake_up(&qhp->wait);
+ BUG_ON(!ep);
+ flush_qp(qhp, &flag);
+out:
+ spin_unlock_irqrestore(&qhp->lock, flag);
+
+ if (terminate)
+ iwch_post_terminate(qhp, NULL);
+
+ /*
+ * If disconnect is 1, then we need to initiate a disconnect
+ * on the EP. This can be a normal close (RTS->CLOSING) or
+ * an abnormal close (RTS/CLOSING->ERROR).
+ */
+ if (disconnect) {
+ iwch_ep_disconnect(ep, abort, GFP_KERNEL);
+ put_ep(&ep->com);
+ }
+
+ /*
+ * If free is 1, then we've disassociated the EP from the QP
+ * and we need to dereference the EP.
+ */
+ if (free)
+ put_ep(&ep->com);
+
+ PDBG("%s exit state %d\n", __func__, qhp->attr.state);
+ return ret;
+}
+
+static int quiesce_qp(struct iwch_qp *qhp)
+{
+ spin_lock_irq(&qhp->lock);
+ iwch_quiesce_tid(qhp->ep);
+ qhp->flags |= QP_QUIESCED;
+ spin_unlock_irq(&qhp->lock);
+ return 0;
+}
+
+static int resume_qp(struct iwch_qp *qhp)
+{
+ spin_lock_irq(&qhp->lock);
+ iwch_resume_tid(qhp->ep);
+ qhp->flags &= ~QP_QUIESCED;
+ spin_unlock_irq(&qhp->lock);
+ return 0;
+}
+
+int iwch_quiesce_qps(struct iwch_cq *chp)
+{
+ int i;
+ struct iwch_qp *qhp;
+
+ for (i=0; i < T3_MAX_NUM_QP; i++) {
+ qhp = get_qhp(chp->rhp, i);
+ if (!qhp)
+ continue;
+ if ((qhp->attr.rcq == chp->cq.cqid) && !qp_quiesced(qhp)) {
+ quiesce_qp(qhp);
+ continue;
+ }
+ if ((qhp->attr.scq == chp->cq.cqid) && !qp_quiesced(qhp))
+ quiesce_qp(qhp);
+ }
+ return 0;
+}
+
+int iwch_resume_qps(struct iwch_cq *chp)
+{
+ int i;
+ struct iwch_qp *qhp;
+
+ for (i=0; i < T3_MAX_NUM_QP; i++) {
+ qhp = get_qhp(chp->rhp, i);
+ if (!qhp)
+ continue;
+ if ((qhp->attr.rcq == chp->cq.cqid) && qp_quiesced(qhp)) {
+ resume_qp(qhp);
+ continue;
+ }
+ if ((qhp->attr.scq == chp->cq.cqid) && qp_quiesced(qhp))
+ resume_qp(qhp);
+ }
+ return 0;
+}
diff --git a/drivers/infiniband/hw/cxgb3/iwch_user.h b/drivers/infiniband/hw/cxgb3/iwch_user.h
new file mode 100644
index 0000000..cb7086f
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb3/iwch_user.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __IWCH_USER_H__
+#define __IWCH_USER_H__
+
+#define IWCH_UVERBS_ABI_VERSION 1
+
+/*
+ * Make sure that all structs defined in this file remain laid out so
+ * that they pack the same way on 32-bit and 64-bit architectures (to
+ * avoid incompatibility between 32-bit userspace and 64-bit kernels).
+ * In particular do not use pointer types -- pass pointers in __u64
+ * instead.
+ */
+struct iwch_create_cq_req {
+ __u64 user_rptr_addr;
+};
+
+struct iwch_create_cq_resp {
+ __u64 key;
+ __u32 cqid;
+ __u32 size_log2;
+};
+
+struct iwch_create_qp_resp {
+ __u64 key;
+ __u64 db_key;
+ __u32 qpid;
+ __u32 size_log2;
+ __u32 sq_size_log2;
+ __u32 rq_size_log2;
+};
+
+struct iwch_reg_user_mr_resp {
+ __u32 pbl_addr;
+};
+#endif
diff --git a/drivers/infiniband/hw/cxgb3/tcb.h b/drivers/infiniband/hw/cxgb3/tcb.h
new file mode 100644
index 0000000..c702dc1
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb3/tcb.h
@@ -0,0 +1,632 @@
+/*
+ * Copyright (c) 2007 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _TCB_DEFS_H
+#define _TCB_DEFS_H
+
+#define W_TCB_T_STATE 0
+#define S_TCB_T_STATE 0
+#define M_TCB_T_STATE 0xfULL
+#define V_TCB_T_STATE(x) ((x) << S_TCB_T_STATE)
+
+#define W_TCB_TIMER 0
+#define S_TCB_TIMER 4
+#define M_TCB_TIMER 0x1ULL
+#define V_TCB_TIMER(x) ((x) << S_TCB_TIMER)
+
+#define W_TCB_DACK_TIMER 0
+#define S_TCB_DACK_TIMER 5
+#define M_TCB_DACK_TIMER 0x1ULL
+#define V_TCB_DACK_TIMER(x) ((x) << S_TCB_DACK_TIMER)
+
+#define W_TCB_DEL_FLAG 0
+#define S_TCB_DEL_FLAG 6
+#define M_TCB_DEL_FLAG 0x1ULL
+#define V_TCB_DEL_FLAG(x) ((x) << S_TCB_DEL_FLAG)
+
+#define W_TCB_L2T_IX 0
+#define S_TCB_L2T_IX 7
+#define M_TCB_L2T_IX 0x7ffULL
+#define V_TCB_L2T_IX(x) ((x) << S_TCB_L2T_IX)
+
+#define W_TCB_SMAC_SEL 0
+#define S_TCB_SMAC_SEL 18
+#define M_TCB_SMAC_SEL 0x3ULL
+#define V_TCB_SMAC_SEL(x) ((x) << S_TCB_SMAC_SEL)
+
+#define W_TCB_TOS 0
+#define S_TCB_TOS 20
+#define M_TCB_TOS 0x3fULL
+#define V_TCB_TOS(x) ((x) << S_TCB_TOS)
+
+#define W_TCB_MAX_RT 0
+#define S_TCB_MAX_RT 26
+#define M_TCB_MAX_RT 0xfULL
+#define V_TCB_MAX_RT(x) ((x) << S_TCB_MAX_RT)
+
+#define W_TCB_T_RXTSHIFT 0
+#define S_TCB_T_RXTSHIFT 30
+#define M_TCB_T_RXTSHIFT 0xfULL
+#define V_TCB_T_RXTSHIFT(x) ((x) << S_TCB_T_RXTSHIFT)
+
+#define W_TCB_T_DUPACKS 1
+#define S_TCB_T_DUPACKS 2
+#define M_TCB_T_DUPACKS 0xfULL
+#define V_TCB_T_DUPACKS(x) ((x) << S_TCB_T_DUPACKS)
+
+#define W_TCB_T_MAXSEG 1
+#define S_TCB_T_MAXSEG 6
+#define M_TCB_T_MAXSEG 0xfULL
+#define V_TCB_T_MAXSEG(x) ((x) << S_TCB_T_MAXSEG)
+
+#define W_TCB_T_FLAGS1 1
+#define S_TCB_T_FLAGS1 10
+#define M_TCB_T_FLAGS1 0xffffffffULL
+#define V_TCB_T_FLAGS1(x) ((x) << S_TCB_T_FLAGS1)
+
+#define W_TCB_T_MIGRATION 1
+#define S_TCB_T_MIGRATION 20
+#define M_TCB_T_MIGRATION 0x1ULL
+#define V_TCB_T_MIGRATION(x) ((x) << S_TCB_T_MIGRATION)
+
+#define W_TCB_T_FLAGS2 2
+#define S_TCB_T_FLAGS2 10
+#define M_TCB_T_FLAGS2 0x7fULL
+#define V_TCB_T_FLAGS2(x) ((x) << S_TCB_T_FLAGS2)
+
+#define W_TCB_SND_SCALE 2
+#define S_TCB_SND_SCALE 17
+#define M_TCB_SND_SCALE 0xfULL
+#define V_TCB_SND_SCALE(x) ((x) << S_TCB_SND_SCALE)
+
+#define W_TCB_RCV_SCALE 2
+#define S_TCB_RCV_SCALE 21
+#define M_TCB_RCV_SCALE 0xfULL
+#define V_TCB_RCV_SCALE(x) ((x) << S_TCB_RCV_SCALE)
+
+#define W_TCB_SND_UNA_RAW 2
+#define S_TCB_SND_UNA_RAW 25
+#define M_TCB_SND_UNA_RAW 0x7ffffffULL
+#define V_TCB_SND_UNA_RAW(x) ((x) << S_TCB_SND_UNA_RAW)
+
+#define W_TCB_SND_NXT_RAW 3
+#define S_TCB_SND_NXT_RAW 20
+#define M_TCB_SND_NXT_RAW 0x7ffffffULL
+#define V_TCB_SND_NXT_RAW(x) ((x) << S_TCB_SND_NXT_RAW)
+
+#define W_TCB_RCV_NXT 4
+#define S_TCB_RCV_NXT 15
+#define M_TCB_RCV_NXT 0xffffffffULL
+#define V_TCB_RCV_NXT(x) ((x) << S_TCB_RCV_NXT)
+
+#define W_TCB_RCV_ADV 5
+#define S_TCB_RCV_ADV 15
+#define M_TCB_RCV_ADV 0xffffULL
+#define V_TCB_RCV_ADV(x) ((x) << S_TCB_RCV_ADV)
+
+#define W_TCB_SND_MAX_RAW 5
+#define S_TCB_SND_MAX_RAW 31
+#define M_TCB_SND_MAX_RAW 0x7ffffffULL
+#define V_TCB_SND_MAX_RAW(x) ((x) << S_TCB_SND_MAX_RAW)
+
+#define W_TCB_SND_CWND 6
+#define S_TCB_SND_CWND 26
+#define M_TCB_SND_CWND 0x7ffffffULL
+#define V_TCB_SND_CWND(x) ((x) << S_TCB_SND_CWND)
+
+#define W_TCB_SND_SSTHRESH 7
+#define S_TCB_SND_SSTHRESH 21
+#define M_TCB_SND_SSTHRESH 0x7ffffffULL
+#define V_TCB_SND_SSTHRESH(x) ((x) << S_TCB_SND_SSTHRESH)
+
+#define W_TCB_T_RTT_TS_RECENT_AGE 8
+#define S_TCB_T_RTT_TS_RECENT_AGE 16
+#define M_TCB_T_RTT_TS_RECENT_AGE 0xffffffffULL
+#define V_TCB_T_RTT_TS_RECENT_AGE(x) ((x) << S_TCB_T_RTT_TS_RECENT_AGE)
+
+#define W_TCB_T_RTSEQ_RECENT 9
+#define S_TCB_T_RTSEQ_RECENT 16
+#define M_TCB_T_RTSEQ_RECENT 0xffffffffULL
+#define V_TCB_T_RTSEQ_RECENT(x) ((x) << S_TCB_T_RTSEQ_RECENT)
+
+#define W_TCB_T_SRTT 10
+#define S_TCB_T_SRTT 16
+#define M_TCB_T_SRTT 0xffffULL
+#define V_TCB_T_SRTT(x) ((x) << S_TCB_T_SRTT)
+
+#define W_TCB_T_RTTVAR 11
+#define S_TCB_T_RTTVAR 0
+#define M_TCB_T_RTTVAR 0xffffULL
+#define V_TCB_T_RTTVAR(x) ((x) << S_TCB_T_RTTVAR)
+
+#define W_TCB_TS_LAST_ACK_SENT_RAW 11
+#define S_TCB_TS_LAST_ACK_SENT_RAW 16
+#define M_TCB_TS_LAST_ACK_SENT_RAW 0x7ffffffULL
+#define V_TCB_TS_LAST_ACK_SENT_RAW(x) ((x) << S_TCB_TS_LAST_ACK_SENT_RAW)
+
+#define W_TCB_DIP 12
+#define S_TCB_DIP 11
+#define M_TCB_DIP 0xffffffffULL
+#define V_TCB_DIP(x) ((x) << S_TCB_DIP)
+
+#define W_TCB_SIP 13
+#define S_TCB_SIP 11
+#define M_TCB_SIP 0xffffffffULL
+#define V_TCB_SIP(x) ((x) << S_TCB_SIP)
+
+#define W_TCB_DP 14
+#define S_TCB_DP 11
+#define M_TCB_DP 0xffffULL
+#define V_TCB_DP(x) ((x) << S_TCB_DP)
+
+#define W_TCB_SP 14
+#define S_TCB_SP 27
+#define M_TCB_SP 0xffffULL
+#define V_TCB_SP(x) ((x) << S_TCB_SP)
+
+#define W_TCB_TIMESTAMP 15
+#define S_TCB_TIMESTAMP 11
+#define M_TCB_TIMESTAMP 0xffffffffULL
+#define V_TCB_TIMESTAMP(x) ((x) << S_TCB_TIMESTAMP)
+
+#define W_TCB_TIMESTAMP_OFFSET 16
+#define S_TCB_TIMESTAMP_OFFSET 11
+#define M_TCB_TIMESTAMP_OFFSET 0xfULL
+#define V_TCB_TIMESTAMP_OFFSET(x) ((x) << S_TCB_TIMESTAMP_OFFSET)
+
+#define W_TCB_TX_MAX 16
+#define S_TCB_TX_MAX 15
+#define M_TCB_TX_MAX 0xffffffffULL
+#define V_TCB_TX_MAX(x) ((x) << S_TCB_TX_MAX)
+
+#define W_TCB_TX_HDR_PTR_RAW 17
+#define S_TCB_TX_HDR_PTR_RAW 15
+#define M_TCB_TX_HDR_PTR_RAW 0x1ffffULL
+#define V_TCB_TX_HDR_PTR_RAW(x) ((x) << S_TCB_TX_HDR_PTR_RAW)
+
+#define W_TCB_TX_LAST_PTR_RAW 18
+#define S_TCB_TX_LAST_PTR_RAW 0
+#define M_TCB_TX_LAST_PTR_RAW 0x1ffffULL
+#define V_TCB_TX_LAST_PTR_RAW(x) ((x) << S_TCB_TX_LAST_PTR_RAW)
+
+#define W_TCB_TX_COMPACT 18
+#define S_TCB_TX_COMPACT 17
+#define M_TCB_TX_COMPACT 0x1ULL
+#define V_TCB_TX_COMPACT(x) ((x) << S_TCB_TX_COMPACT)
+
+#define W_TCB_RX_COMPACT 18
+#define S_TCB_RX_COMPACT 18
+#define M_TCB_RX_COMPACT 0x1ULL
+#define V_TCB_RX_COMPACT(x) ((x) << S_TCB_RX_COMPACT)
+
+#define W_TCB_RCV_WND 18
+#define S_TCB_RCV_WND 19
+#define M_TCB_RCV_WND 0x7ffffffULL
+#define V_TCB_RCV_WND(x) ((x) << S_TCB_RCV_WND)
+
+#define W_TCB_RX_HDR_OFFSET 19
+#define S_TCB_RX_HDR_OFFSET 14
+#define M_TCB_RX_HDR_OFFSET 0x7ffffffULL
+#define V_TCB_RX_HDR_OFFSET(x) ((x) << S_TCB_RX_HDR_OFFSET)
+
+#define W_TCB_RX_FRAG0_START_IDX_RAW 20
+#define S_TCB_RX_FRAG0_START_IDX_RAW 9
+#define M_TCB_RX_FRAG0_START_IDX_RAW 0x7ffffffULL
+#define V_TCB_RX_FRAG0_START_IDX_RAW(x) ((x) << S_TCB_RX_FRAG0_START_IDX_RAW)
+
+#define W_TCB_RX_FRAG1_START_IDX_OFFSET 21
+#define S_TCB_RX_FRAG1_START_IDX_OFFSET 4
+#define M_TCB_RX_FRAG1_START_IDX_OFFSET 0x7ffffffULL
+#define V_TCB_RX_FRAG1_START_IDX_OFFSET(x) ((x) << S_TCB_RX_FRAG1_START_IDX_OFFSET)
+
+#define W_TCB_RX_FRAG0_LEN 21
+#define S_TCB_RX_FRAG0_LEN 31
+#define M_TCB_RX_FRAG0_LEN 0x7ffffffULL
+#define V_TCB_RX_FRAG0_LEN(x) ((x) << S_TCB_RX_FRAG0_LEN)
+
+#define W_TCB_RX_FRAG1_LEN 22
+#define S_TCB_RX_FRAG1_LEN 26
+#define M_TCB_RX_FRAG1_LEN 0x7ffffffULL
+#define V_TCB_RX_FRAG1_LEN(x) ((x) << S_TCB_RX_FRAG1_LEN)
+
+#define W_TCB_NEWRENO_RECOVER 23
+#define S_TCB_NEWRENO_RECOVER 21
+#define M_TCB_NEWRENO_RECOVER 0x7ffffffULL
+#define V_TCB_NEWRENO_RECOVER(x) ((x) << S_TCB_NEWRENO_RECOVER)
+
+#define W_TCB_PDU_HAVE_LEN 24
+#define S_TCB_PDU_HAVE_LEN 16
+#define M_TCB_PDU_HAVE_LEN 0x1ULL
+#define V_TCB_PDU_HAVE_LEN(x) ((x) << S_TCB_PDU_HAVE_LEN)
+
+#define W_TCB_PDU_LEN 24
+#define S_TCB_PDU_LEN 17
+#define M_TCB_PDU_LEN 0xffffULL
+#define V_TCB_PDU_LEN(x) ((x) << S_TCB_PDU_LEN)
+
+#define W_TCB_RX_QUIESCE 25
+#define S_TCB_RX_QUIESCE 1
+#define M_TCB_RX_QUIESCE 0x1ULL
+#define V_TCB_RX_QUIESCE(x) ((x) << S_TCB_RX_QUIESCE)
+
+#define W_TCB_RX_PTR_RAW 25
+#define S_TCB_RX_PTR_RAW 2
+#define M_TCB_RX_PTR_RAW 0x1ffffULL
+#define V_TCB_RX_PTR_RAW(x) ((x) << S_TCB_RX_PTR_RAW)
+
+#define W_TCB_CPU_NO 25
+#define S_TCB_CPU_NO 19
+#define M_TCB_CPU_NO 0x7fULL
+#define V_TCB_CPU_NO(x) ((x) << S_TCB_CPU_NO)
+
+#define W_TCB_ULP_TYPE 25
+#define S_TCB_ULP_TYPE 26
+#define M_TCB_ULP_TYPE 0xfULL
+#define V_TCB_ULP_TYPE(x) ((x) << S_TCB_ULP_TYPE)
+
+#define W_TCB_RX_FRAG1_PTR_RAW 25
+#define S_TCB_RX_FRAG1_PTR_RAW 30
+#define M_TCB_RX_FRAG1_PTR_RAW 0x1ffffULL
+#define V_TCB_RX_FRAG1_PTR_RAW(x) ((x) << S_TCB_RX_FRAG1_PTR_RAW)
+
+#define W_TCB_RX_FRAG2_START_IDX_OFFSET_RAW 26
+#define S_TCB_RX_FRAG2_START_IDX_OFFSET_RAW 15
+#define M_TCB_RX_FRAG2_START_IDX_OFFSET_RAW 0x7ffffffULL
+#define V_TCB_RX_FRAG2_START_IDX_OFFSET_RAW(x) ((x) << S_TCB_RX_FRAG2_START_IDX_OFFSET_RAW)
+
+#define W_TCB_RX_FRAG2_PTR_RAW 27
+#define S_TCB_RX_FRAG2_PTR_RAW 10
+#define M_TCB_RX_FRAG2_PTR_RAW 0x1ffffULL
+#define V_TCB_RX_FRAG2_PTR_RAW(x) ((x) << S_TCB_RX_FRAG2_PTR_RAW)
+
+#define W_TCB_RX_FRAG2_LEN_RAW 27
+#define S_TCB_RX_FRAG2_LEN_RAW 27
+#define M_TCB_RX_FRAG2_LEN_RAW 0x7ffffffULL
+#define V_TCB_RX_FRAG2_LEN_RAW(x) ((x) << S_TCB_RX_FRAG2_LEN_RAW)
+
+#define W_TCB_RX_FRAG3_PTR_RAW 28
+#define S_TCB_RX_FRAG3_PTR_RAW 22
+#define M_TCB_RX_FRAG3_PTR_RAW 0x1ffffULL
+#define V_TCB_RX_FRAG3_PTR_RAW(x) ((x) << S_TCB_RX_FRAG3_PTR_RAW)
+
+#define W_TCB_RX_FRAG3_LEN_RAW 29
+#define S_TCB_RX_FRAG3_LEN_RAW 7
+#define M_TCB_RX_FRAG3_LEN_RAW 0x7ffffffULL
+#define V_TCB_RX_FRAG3_LEN_RAW(x) ((x) << S_TCB_RX_FRAG3_LEN_RAW)
+
+#define W_TCB_RX_FRAG3_START_IDX_OFFSET_RAW 30
+#define S_TCB_RX_FRAG3_START_IDX_OFFSET_RAW 2
+#define M_TCB_RX_FRAG3_START_IDX_OFFSET_RAW 0x7ffffffULL
+#define V_TCB_RX_FRAG3_START_IDX_OFFSET_RAW(x) ((x) << S_TCB_RX_FRAG3_START_IDX_OFFSET_RAW)
+
+#define W_TCB_PDU_HDR_LEN 30
+#define S_TCB_PDU_HDR_LEN 29
+#define M_TCB_PDU_HDR_LEN 0xffULL
+#define V_TCB_PDU_HDR_LEN(x) ((x) << S_TCB_PDU_HDR_LEN)
+
+#define W_TCB_SLUSH1 31
+#define S_TCB_SLUSH1 5
+#define M_TCB_SLUSH1 0x7ffffULL
+#define V_TCB_SLUSH1(x) ((x) << S_TCB_SLUSH1)
+
+#define W_TCB_ULP_RAW 31
+#define S_TCB_ULP_RAW 24
+#define M_TCB_ULP_RAW 0xffULL
+#define V_TCB_ULP_RAW(x) ((x) << S_TCB_ULP_RAW)
+
+#define W_TCB_DDP_RDMAP_VERSION 25
+#define S_TCB_DDP_RDMAP_VERSION 30
+#define M_TCB_DDP_RDMAP_VERSION 0x1ULL
+#define V_TCB_DDP_RDMAP_VERSION(x) ((x) << S_TCB_DDP_RDMAP_VERSION)
+
+#define W_TCB_MARKER_ENABLE_RX 25
+#define S_TCB_MARKER_ENABLE_RX 31
+#define M_TCB_MARKER_ENABLE_RX 0x1ULL
+#define V_TCB_MARKER_ENABLE_RX(x) ((x) << S_TCB_MARKER_ENABLE_RX)
+
+#define W_TCB_MARKER_ENABLE_TX 26
+#define S_TCB_MARKER_ENABLE_TX 0
+#define M_TCB_MARKER_ENABLE_TX 0x1ULL
+#define V_TCB_MARKER_ENABLE_TX(x) ((x) << S_TCB_MARKER_ENABLE_TX)
+
+#define W_TCB_CRC_ENABLE 26
+#define S_TCB_CRC_ENABLE 1
+#define M_TCB_CRC_ENABLE 0x1ULL
+#define V_TCB_CRC_ENABLE(x) ((x) << S_TCB_CRC_ENABLE)
+
+#define W_TCB_IRS_ULP 26
+#define S_TCB_IRS_ULP 2
+#define M_TCB_IRS_ULP 0x1ffULL
+#define V_TCB_IRS_ULP(x) ((x) << S_TCB_IRS_ULP)
+
+#define W_TCB_ISS_ULP 26
+#define S_TCB_ISS_ULP 11
+#define M_TCB_ISS_ULP 0x1ffULL
+#define V_TCB_ISS_ULP(x) ((x) << S_TCB_ISS_ULP)
+
+#define W_TCB_TX_PDU_LEN 26
+#define S_TCB_TX_PDU_LEN 20
+#define M_TCB_TX_PDU_LEN 0x3fffULL
+#define V_TCB_TX_PDU_LEN(x) ((x) << S_TCB_TX_PDU_LEN)
+
+#define W_TCB_TX_PDU_OUT 27
+#define S_TCB_TX_PDU_OUT 2
+#define M_TCB_TX_PDU_OUT 0x1ULL
+#define V_TCB_TX_PDU_OUT(x) ((x) << S_TCB_TX_PDU_OUT)
+
+#define W_TCB_CQ_IDX_SQ 27
+#define S_TCB_CQ_IDX_SQ 3
+#define M_TCB_CQ_IDX_SQ 0xffffULL
+#define V_TCB_CQ_IDX_SQ(x) ((x) << S_TCB_CQ_IDX_SQ)
+
+#define W_TCB_CQ_IDX_RQ 27
+#define S_TCB_CQ_IDX_RQ 19
+#define M_TCB_CQ_IDX_RQ 0xffffULL
+#define V_TCB_CQ_IDX_RQ(x) ((x) << S_TCB_CQ_IDX_RQ)
+
+#define W_TCB_QP_ID 28
+#define S_TCB_QP_ID 3
+#define M_TCB_QP_ID 0xffffULL
+#define V_TCB_QP_ID(x) ((x) << S_TCB_QP_ID)
+
+#define W_TCB_PD_ID 28
+#define S_TCB_PD_ID 19
+#define M_TCB_PD_ID 0xffffULL
+#define V_TCB_PD_ID(x) ((x) << S_TCB_PD_ID)
+
+#define W_TCB_STAG 29
+#define S_TCB_STAG 3
+#define M_TCB_STAG 0xffffffffULL
+#define V_TCB_STAG(x) ((x) << S_TCB_STAG)
+
+#define W_TCB_RQ_START 30
+#define S_TCB_RQ_START 3
+#define M_TCB_RQ_START 0x3ffffffULL
+#define V_TCB_RQ_START(x) ((x) << S_TCB_RQ_START)
+
+#define W_TCB_RQ_MSN 30
+#define S_TCB_RQ_MSN 29
+#define M_TCB_RQ_MSN 0x3ffULL
+#define V_TCB_RQ_MSN(x) ((x) << S_TCB_RQ_MSN)
+
+#define W_TCB_RQ_MAX_OFFSET 31
+#define S_TCB_RQ_MAX_OFFSET 7
+#define M_TCB_RQ_MAX_OFFSET 0xfULL
+#define V_TCB_RQ_MAX_OFFSET(x) ((x) << S_TCB_RQ_MAX_OFFSET)
+
+#define W_TCB_RQ_WRITE_PTR 31
+#define S_TCB_RQ_WRITE_PTR 11
+#define M_TCB_RQ_WRITE_PTR 0x3ffULL
+#define V_TCB_RQ_WRITE_PTR(x) ((x) << S_TCB_RQ_WRITE_PTR)
+
+#define W_TCB_INB_WRITE_PERM 31
+#define S_TCB_INB_WRITE_PERM 21
+#define M_TCB_INB_WRITE_PERM 0x1ULL
+#define V_TCB_INB_WRITE_PERM(x) ((x) << S_TCB_INB_WRITE_PERM)
+
+#define W_TCB_INB_READ_PERM 31
+#define S_TCB_INB_READ_PERM 22
+#define M_TCB_INB_READ_PERM 0x1ULL
+#define V_TCB_INB_READ_PERM(x) ((x) << S_TCB_INB_READ_PERM)
+
+#define W_TCB_ORD_L_BIT_VLD 31
+#define S_TCB_ORD_L_BIT_VLD 23
+#define M_TCB_ORD_L_BIT_VLD 0x1ULL
+#define V_TCB_ORD_L_BIT_VLD(x) ((x) << S_TCB_ORD_L_BIT_VLD)
+
+#define W_TCB_RDMAP_OPCODE 31
+#define S_TCB_RDMAP_OPCODE 24
+#define M_TCB_RDMAP_OPCODE 0xfULL
+#define V_TCB_RDMAP_OPCODE(x) ((x) << S_TCB_RDMAP_OPCODE)
+
+#define W_TCB_TX_FLUSH 31
+#define S_TCB_TX_FLUSH 28
+#define M_TCB_TX_FLUSH 0x1ULL
+#define V_TCB_TX_FLUSH(x) ((x) << S_TCB_TX_FLUSH)
+
+#define W_TCB_TX_OOS_RXMT 31
+#define S_TCB_TX_OOS_RXMT 29
+#define M_TCB_TX_OOS_RXMT 0x1ULL
+#define V_TCB_TX_OOS_RXMT(x) ((x) << S_TCB_TX_OOS_RXMT)
+
+#define W_TCB_TX_OOS_TXMT 31
+#define S_TCB_TX_OOS_TXMT 30
+#define M_TCB_TX_OOS_TXMT 0x1ULL
+#define V_TCB_TX_OOS_TXMT(x) ((x) << S_TCB_TX_OOS_TXMT)
+
+#define W_TCB_SLUSH_AUX2 31
+#define S_TCB_SLUSH_AUX2 31
+#define M_TCB_SLUSH_AUX2 0x1ULL
+#define V_TCB_SLUSH_AUX2(x) ((x) << S_TCB_SLUSH_AUX2)
+
+#define W_TCB_RX_FRAG1_PTR_RAW2 25
+#define S_TCB_RX_FRAG1_PTR_RAW2 30
+#define M_TCB_RX_FRAG1_PTR_RAW2 0x1ffffULL
+#define V_TCB_RX_FRAG1_PTR_RAW2(x) ((x) << S_TCB_RX_FRAG1_PTR_RAW2)
+
+#define W_TCB_RX_DDP_FLAGS 26
+#define S_TCB_RX_DDP_FLAGS 15
+#define M_TCB_RX_DDP_FLAGS 0x3ffULL
+#define V_TCB_RX_DDP_FLAGS(x) ((x) << S_TCB_RX_DDP_FLAGS)
+
+#define W_TCB_SLUSH_AUX3 26
+#define S_TCB_SLUSH_AUX3 31
+#define M_TCB_SLUSH_AUX3 0x1ffULL
+#define V_TCB_SLUSH_AUX3(x) ((x) << S_TCB_SLUSH_AUX3)
+
+#define W_TCB_RX_DDP_BUF0_OFFSET 27
+#define S_TCB_RX_DDP_BUF0_OFFSET 8
+#define M_TCB_RX_DDP_BUF0_OFFSET 0x3fffffULL
+#define V_TCB_RX_DDP_BUF0_OFFSET(x) ((x) << S_TCB_RX_DDP_BUF0_OFFSET)
+
+#define W_TCB_RX_DDP_BUF0_LEN 27
+#define S_TCB_RX_DDP_BUF0_LEN 30
+#define M_TCB_RX_DDP_BUF0_LEN 0x3fffffULL
+#define V_TCB_RX_DDP_BUF0_LEN(x) ((x) << S_TCB_RX_DDP_BUF0_LEN)
+
+#define W_TCB_RX_DDP_BUF1_OFFSET 28
+#define S_TCB_RX_DDP_BUF1_OFFSET 20
+#define M_TCB_RX_DDP_BUF1_OFFSET 0x3fffffULL
+#define V_TCB_RX_DDP_BUF1_OFFSET(x) ((x) << S_TCB_RX_DDP_BUF1_OFFSET)
+
+#define W_TCB_RX_DDP_BUF1_LEN 29
+#define S_TCB_RX_DDP_BUF1_LEN 10
+#define M_TCB_RX_DDP_BUF1_LEN 0x3fffffULL
+#define V_TCB_RX_DDP_BUF1_LEN(x) ((x) << S_TCB_RX_DDP_BUF1_LEN)
+
+#define W_TCB_RX_DDP_BUF0_TAG 30
+#define S_TCB_RX_DDP_BUF0_TAG 0
+#define M_TCB_RX_DDP_BUF0_TAG 0xffffffffULL
+#define V_TCB_RX_DDP_BUF0_TAG(x) ((x) << S_TCB_RX_DDP_BUF0_TAG)
+
+#define W_TCB_RX_DDP_BUF1_TAG 31
+#define S_TCB_RX_DDP_BUF1_TAG 0
+#define M_TCB_RX_DDP_BUF1_TAG 0xffffffffULL
+#define V_TCB_RX_DDP_BUF1_TAG(x) ((x) << S_TCB_RX_DDP_BUF1_TAG)
+
+#define S_TF_DACK 10
+#define V_TF_DACK(x) ((x) << S_TF_DACK)
+
+#define S_TF_NAGLE 11
+#define V_TF_NAGLE(x) ((x) << S_TF_NAGLE)
+
+#define S_TF_RECV_SCALE 12
+#define V_TF_RECV_SCALE(x) ((x) << S_TF_RECV_SCALE)
+
+#define S_TF_RECV_TSTMP 13
+#define V_TF_RECV_TSTMP(x) ((x) << S_TF_RECV_TSTMP)
+
+#define S_TF_RECV_SACK 14
+#define V_TF_RECV_SACK(x) ((x) << S_TF_RECV_SACK)
+
+#define S_TF_TURBO 15
+#define V_TF_TURBO(x) ((x) << S_TF_TURBO)
+
+#define S_TF_KEEPALIVE 16
+#define V_TF_KEEPALIVE(x) ((x) << S_TF_KEEPALIVE)
+
+#define S_TF_TCAM_BYPASS 17
+#define V_TF_TCAM_BYPASS(x) ((x) << S_TF_TCAM_BYPASS)
+
+#define S_TF_CORE_FIN 18
+#define V_TF_CORE_FIN(x) ((x) << S_TF_CORE_FIN)
+
+#define S_TF_CORE_MORE 19
+#define V_TF_CORE_MORE(x) ((x) << S_TF_CORE_MORE)
+
+#define S_TF_MIGRATING 20
+#define V_TF_MIGRATING(x) ((x) << S_TF_MIGRATING)
+
+#define S_TF_ACTIVE_OPEN 21
+#define V_TF_ACTIVE_OPEN(x) ((x) << S_TF_ACTIVE_OPEN)
+
+#define S_TF_ASK_MODE 22
+#define V_TF_ASK_MODE(x) ((x) << S_TF_ASK_MODE)
+
+#define S_TF_NON_OFFLOAD 23
+#define V_TF_NON_OFFLOAD(x) ((x) << S_TF_NON_OFFLOAD)
+
+#define S_TF_MOD_SCHD 24
+#define V_TF_MOD_SCHD(x) ((x) << S_TF_MOD_SCHD)
+
+#define S_TF_MOD_SCHD_REASON0 25
+#define V_TF_MOD_SCHD_REASON0(x) ((x) << S_TF_MOD_SCHD_REASON0)
+
+#define S_TF_MOD_SCHD_REASON1 26
+#define V_TF_MOD_SCHD_REASON1(x) ((x) << S_TF_MOD_SCHD_REASON1)
+
+#define S_TF_MOD_SCHD_RX 27
+#define V_TF_MOD_SCHD_RX(x) ((x) << S_TF_MOD_SCHD_RX)
+
+#define S_TF_CORE_PUSH 28
+#define V_TF_CORE_PUSH(x) ((x) << S_TF_CORE_PUSH)
+
+#define S_TF_RCV_COALESCE_ENABLE 29
+#define V_TF_RCV_COALESCE_ENABLE(x) ((x) << S_TF_RCV_COALESCE_ENABLE)
+
+#define S_TF_RCV_COALESCE_PUSH 30
+#define V_TF_RCV_COALESCE_PUSH(x) ((x) << S_TF_RCV_COALESCE_PUSH)
+
+#define S_TF_RCV_COALESCE_LAST_PSH 31
+#define V_TF_RCV_COALESCE_LAST_PSH(x) ((x) << S_TF_RCV_COALESCE_LAST_PSH)
+
+#define S_TF_RCV_COALESCE_HEARTBEAT 32
+#define V_TF_RCV_COALESCE_HEARTBEAT(x) ((x) << S_TF_RCV_COALESCE_HEARTBEAT)
+
+#define S_TF_HALF_CLOSE 33
+#define V_TF_HALF_CLOSE(x) ((x) << S_TF_HALF_CLOSE)
+
+#define S_TF_DACK_MSS 34
+#define V_TF_DACK_MSS(x) ((x) << S_TF_DACK_MSS)
+
+#define S_TF_CCTRL_SEL0 35
+#define V_TF_CCTRL_SEL0(x) ((x) << S_TF_CCTRL_SEL0)
+
+#define S_TF_CCTRL_SEL1 36
+#define V_TF_CCTRL_SEL1(x) ((x) << S_TF_CCTRL_SEL1)
+
+#define S_TF_TCP_NEWRENO_FAST_RECOVERY 37
+#define V_TF_TCP_NEWRENO_FAST_RECOVERY(x) ((x) << S_TF_TCP_NEWRENO_FAST_RECOVERY)
+
+#define S_TF_TX_PACE_AUTO 38
+#define V_TF_TX_PACE_AUTO(x) ((x) << S_TF_TX_PACE_AUTO)
+
+#define S_TF_PEER_FIN_HELD 39
+#define V_TF_PEER_FIN_HELD(x) ((x) << S_TF_PEER_FIN_HELD)
+
+#define S_TF_CORE_URG 40
+#define V_TF_CORE_URG(x) ((x) << S_TF_CORE_URG)
+
+#define S_TF_RDMA_ERROR 41
+#define V_TF_RDMA_ERROR(x) ((x) << S_TF_RDMA_ERROR)
+
+#define S_TF_SSWS_DISABLED 42
+#define V_TF_SSWS_DISABLED(x) ((x) << S_TF_SSWS_DISABLED)
+
+#define S_TF_DUPACK_COUNT_ODD 43
+#define V_TF_DUPACK_COUNT_ODD(x) ((x) << S_TF_DUPACK_COUNT_ODD)
+
+#define S_TF_TX_CHANNEL 44
+#define V_TF_TX_CHANNEL(x) ((x) << S_TF_TX_CHANNEL)
+
+#define S_TF_RX_CHANNEL 45
+#define V_TF_RX_CHANNEL(x) ((x) << S_TF_RX_CHANNEL)
+
+#define S_TF_TX_PACE_FIXED 46
+#define V_TF_TX_PACE_FIXED(x) ((x) << S_TF_TX_PACE_FIXED)
+
+#define S_TF_RDMA_FLM_ERROR 47
+#define V_TF_RDMA_FLM_ERROR(x) ((x) << S_TF_RDMA_FLM_ERROR)
+
+#define S_TF_RX_FLOW_CONTROL_DISABLE 48
+#define V_TF_RX_FLOW_CONTROL_DISABLE(x) ((x) << S_TF_RX_FLOW_CONTROL_DISABLE)
+
+#endif /* _TCB_DEFS_H */
diff --git a/drivers/infiniband/hw/ehca/Kconfig b/drivers/infiniband/hw/ehca/Kconfig
new file mode 100644
index 0000000..59f807d
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/Kconfig
@@ -0,0 +1,9 @@
+config INFINIBAND_EHCA
+ tristate "eHCA support"
+ depends on IBMEBUS
+ ---help---
+ This driver supports the IBM pSeries eHCA InfiniBand adapter.
+
+ To compile the driver as a module, choose M here. The module
+ will be called ib_ehca.
+
diff --git a/drivers/infiniband/hw/ehca/Makefile b/drivers/infiniband/hw/ehca/Makefile
new file mode 100644
index 0000000..74d284e
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/Makefile
@@ -0,0 +1,16 @@
+# Authors: Heiko J Schick <schickhj@de.ibm.com>
+# Christoph Raisch <raisch@de.ibm.com>
+# Joachim Fenkes <fenkes@de.ibm.com>
+#
+# Copyright (c) 2005 IBM Corporation
+#
+# All rights reserved.
+#
+# This source code is distributed under a dual license of GPL v2.0 and OpenIB BSD.
+
+obj-$(CONFIG_INFINIBAND_EHCA) += ib_ehca.o
+
+ib_ehca-objs = ehca_main.o ehca_hca.o ehca_mcast.o ehca_pd.o ehca_av.o ehca_eq.o \
+ ehca_cq.o ehca_qp.o ehca_sqp.o ehca_mrmw.o ehca_reqs.o ehca_irq.o \
+ ehca_uverbs.o ipz_pt_fn.o hcp_if.o hcp_phyp.o
+
diff --git a/drivers/infiniband/hw/ehca/ehca_av.c b/drivers/infiniband/hw/ehca/ehca_av.c
new file mode 100644
index 0000000..56735ea
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_av.c
@@ -0,0 +1,275 @@
+/*
+ * IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ * address vector functions
+ *
+ * Authors: Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ * Khadija Souissi <souissik@de.ibm.com>
+ * Reinhard Ernst <rernst@de.ibm.com>
+ * Christoph Raisch <raisch@de.ibm.com>
+ *
+ * Copyright (c) 2005 IBM Corporation
+ *
+ * All rights reserved.
+ *
+ * This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ * BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "ehca_tools.h"
+#include "ehca_iverbs.h"
+#include "hcp_if.h"
+
+static struct kmem_cache *av_cache;
+
+int ehca_calc_ipd(struct ehca_shca *shca, int port,
+ enum ib_rate path_rate, u32 *ipd)
+{
+ int path = ib_rate_to_mult(path_rate);
+ int link, ret;
+ struct ib_port_attr pa;
+
+ if (path_rate == IB_RATE_PORT_CURRENT) {
+ *ipd = 0;
+ return 0;
+ }
+
+ if (unlikely(path < 0)) {
+ ehca_err(&shca->ib_device, "Invalid static rate! path_rate=%x",
+ path_rate);
+ return -EINVAL;
+ }
+
+ ret = ehca_query_port(&shca->ib_device, port, &pa);
+ if (unlikely(ret < 0)) {
+ ehca_err(&shca->ib_device, "Failed to query port ret=%i", ret);
+ return ret;
+ }
+
+ link = ib_width_enum_to_int(pa.active_width) * pa.active_speed;
+
+ if (path >= link)
+ /* no need to throttle if path faster than link */
+ *ipd = 0;
+ else
+ /* IPD = round((link / path) - 1) */
+ *ipd = ((link + (path >> 1)) / path) - 1;
+
+ return 0;
+}
+
+struct ib_ah *ehca_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
+{
+ int ret;
+ struct ehca_av *av;
+ struct ehca_shca *shca = container_of(pd->device, struct ehca_shca,
+ ib_device);
+
+ av = kmem_cache_alloc(av_cache, GFP_KERNEL);
+ if (!av) {
+ ehca_err(pd->device, "Out of memory pd=%p ah_attr=%p",
+ pd, ah_attr);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ av->av.sl = ah_attr->sl;
+ av->av.dlid = ah_attr->dlid;
+ av->av.slid_path_bits = ah_attr->src_path_bits;
+
+ if (ehca_static_rate < 0) {
+ u32 ipd;
+ if (ehca_calc_ipd(shca, ah_attr->port_num,
+ ah_attr->static_rate, &ipd)) {
+ ret = -EINVAL;
+ goto create_ah_exit1;
+ }
+ av->av.ipd = ipd;
+ } else
+ av->av.ipd = ehca_static_rate;
+
+ av->av.lnh = ah_attr->ah_flags;
+ av->av.grh.word_0 = EHCA_BMASK_SET(GRH_IPVERSION_MASK, 6);
+ av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_TCLASS_MASK,
+ ah_attr->grh.traffic_class);
+ av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_FLOWLABEL_MASK,
+ ah_attr->grh.flow_label);
+ av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_HOPLIMIT_MASK,
+ ah_attr->grh.hop_limit);
+ av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_NEXTHEADER_MASK, 0x1B);
+ /* set sgid in grh.word_1 */
+ if (ah_attr->ah_flags & IB_AH_GRH) {
+ int rc;
+ struct ib_port_attr port_attr;
+ union ib_gid gid;
+ memset(&port_attr, 0, sizeof(port_attr));
+ rc = ehca_query_port(pd->device, ah_attr->port_num,
+ &port_attr);
+ if (rc) { /* invalid port number */
+ ret = -EINVAL;
+ ehca_err(pd->device, "Invalid port number "
+ "ehca_query_port() returned %x "
+ "pd=%p ah_attr=%p", rc, pd, ah_attr);
+ goto create_ah_exit1;
+ }
+ memset(&gid, 0, sizeof(gid));
+ rc = ehca_query_gid(pd->device,
+ ah_attr->port_num,
+ ah_attr->grh.sgid_index, &gid);
+ if (rc) {
+ ret = -EINVAL;
+ ehca_err(pd->device, "Failed to retrieve sgid "
+ "ehca_query_gid() returned %x "
+ "pd=%p ah_attr=%p", rc, pd, ah_attr);
+ goto create_ah_exit1;
+ }
+ memcpy(&av->av.grh.word_1, &gid, sizeof(gid));
+ }
+ av->av.pmtu = shca->max_mtu;
+
+ /* dgid comes in grh.word_3 */
+ memcpy(&av->av.grh.word_3, &ah_attr->grh.dgid,
+ sizeof(ah_attr->grh.dgid));
+
+ return &av->ib_ah;
+
+create_ah_exit1:
+ kmem_cache_free(av_cache, av);
+
+ return ERR_PTR(ret);
+}
+
+int ehca_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr)
+{
+ struct ehca_av *av;
+ struct ehca_ud_av new_ehca_av;
+ struct ehca_shca *shca = container_of(ah->pd->device, struct ehca_shca,
+ ib_device);
+
+ memset(&new_ehca_av, 0, sizeof(new_ehca_av));
+ new_ehca_av.sl = ah_attr->sl;
+ new_ehca_av.dlid = ah_attr->dlid;
+ new_ehca_av.slid_path_bits = ah_attr->src_path_bits;
+ new_ehca_av.ipd = ah_attr->static_rate;
+ new_ehca_av.lnh = EHCA_BMASK_SET(GRH_FLAG_MASK,
+ (ah_attr->ah_flags & IB_AH_GRH) > 0);
+ new_ehca_av.grh.word_0 = EHCA_BMASK_SET(GRH_TCLASS_MASK,
+ ah_attr->grh.traffic_class);
+ new_ehca_av.grh.word_0 |= EHCA_BMASK_SET(GRH_FLOWLABEL_MASK,
+ ah_attr->grh.flow_label);
+ new_ehca_av.grh.word_0 |= EHCA_BMASK_SET(GRH_HOPLIMIT_MASK,
+ ah_attr->grh.hop_limit);
+ new_ehca_av.grh.word_0 |= EHCA_BMASK_SET(GRH_NEXTHEADER_MASK, 0x1b);
+
+ /* set sgid in grh.word_1 */
+ if (ah_attr->ah_flags & IB_AH_GRH) {
+ int rc;
+ struct ib_port_attr port_attr;
+ union ib_gid gid;
+ memset(&port_attr, 0, sizeof(port_attr));
+ rc = ehca_query_port(ah->device, ah_attr->port_num,
+ &port_attr);
+ if (rc) { /* invalid port number */
+ ehca_err(ah->device, "Invalid port number "
+ "ehca_query_port() returned %x "
+ "ah=%p ah_attr=%p port_num=%x",
+ rc, ah, ah_attr, ah_attr->port_num);
+ return -EINVAL;
+ }
+ memset(&gid, 0, sizeof(gid));
+ rc = ehca_query_gid(ah->device,
+ ah_attr->port_num,
+ ah_attr->grh.sgid_index, &gid);
+ if (rc) {
+ ehca_err(ah->device, "Failed to retrieve sgid "
+ "ehca_query_gid() returned %x "
+ "ah=%p ah_attr=%p port_num=%x "
+ "sgid_index=%x",
+ rc, ah, ah_attr, ah_attr->port_num,
+ ah_attr->grh.sgid_index);
+ return -EINVAL;
+ }
+ memcpy(&new_ehca_av.grh.word_1, &gid, sizeof(gid));
+ }
+
+ new_ehca_av.pmtu = shca->max_mtu;
+
+ memcpy(&new_ehca_av.grh.word_3, &ah_attr->grh.dgid,
+ sizeof(ah_attr->grh.dgid));
+
+ av = container_of(ah, struct ehca_av, ib_ah);
+ av->av = new_ehca_av;
+
+ return 0;
+}
+
+int ehca_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr)
+{
+ struct ehca_av *av = container_of(ah, struct ehca_av, ib_ah);
+
+ memcpy(&ah_attr->grh.dgid, &av->av.grh.word_3,
+ sizeof(ah_attr->grh.dgid));
+ ah_attr->sl = av->av.sl;
+
+ ah_attr->dlid = av->av.dlid;
+
+ ah_attr->src_path_bits = av->av.slid_path_bits;
+ ah_attr->static_rate = av->av.ipd;
+ ah_attr->ah_flags = EHCA_BMASK_GET(GRH_FLAG_MASK, av->av.lnh);
+ ah_attr->grh.traffic_class = EHCA_BMASK_GET(GRH_TCLASS_MASK,
+ av->av.grh.word_0);
+ ah_attr->grh.hop_limit = EHCA_BMASK_GET(GRH_HOPLIMIT_MASK,
+ av->av.grh.word_0);
+ ah_attr->grh.flow_label = EHCA_BMASK_GET(GRH_FLOWLABEL_MASK,
+ av->av.grh.word_0);
+
+ return 0;
+}
+
+int ehca_destroy_ah(struct ib_ah *ah)
+{
+ kmem_cache_free(av_cache, container_of(ah, struct ehca_av, ib_ah));
+
+ return 0;
+}
+
+int ehca_init_av_cache(void)
+{
+ av_cache = kmem_cache_create("ehca_cache_av",
+ sizeof(struct ehca_av), 0,
+ SLAB_HWCACHE_ALIGN,
+ NULL);
+ if (!av_cache)
+ return -ENOMEM;
+ return 0;
+}
+
+void ehca_cleanup_av_cache(void)
+{
+ if (av_cache)
+ kmem_cache_destroy(av_cache);
+}
diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h
new file mode 100644
index 0000000..7fc35cf
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_classes.h
@@ -0,0 +1,474 @@
+/*
+ * IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ * Struct definition for eHCA internal structures
+ *
+ * Authors: Heiko J Schick <schickhj@de.ibm.com>
+ * Christoph Raisch <raisch@de.ibm.com>
+ * Joachim Fenkes <fenkes@de.ibm.com>
+ *
+ * Copyright (c) 2005 IBM Corporation
+ *
+ * All rights reserved.
+ *
+ * This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ * BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __EHCA_CLASSES_H__
+#define __EHCA_CLASSES_H__
+
+struct ehca_module;
+struct ehca_qp;
+struct ehca_cq;
+struct ehca_eq;
+struct ehca_mr;
+struct ehca_mw;
+struct ehca_pd;
+struct ehca_av;
+
+#include <linux/wait.h>
+#include <linux/mutex.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_user_verbs.h>
+
+#ifdef CONFIG_PPC64
+#include "ehca_classes_pSeries.h"
+#endif
+#include "ipz_pt_fn.h"
+#include "ehca_qes.h"
+#include "ehca_irq.h"
+
+#define EHCA_EQE_CACHE_SIZE 20
+#define EHCA_MAX_NUM_QUEUES 0xffff
+
+struct ehca_eqe_cache_entry {
+ struct ehca_eqe *eqe;
+ struct ehca_cq *cq;
+};
+
+struct ehca_eq {
+ u32 length;
+ struct ipz_queue ipz_queue;
+ struct ipz_eq_handle ipz_eq_handle;
+ struct work_struct work;
+ struct h_galpas galpas;
+ int is_initialized;
+ struct ehca_pfeq pf;
+ spinlock_t spinlock;
+ struct tasklet_struct interrupt_task;
+ u32 ist;
+ spinlock_t irq_spinlock;
+ struct ehca_eqe_cache_entry eqe_cache[EHCA_EQE_CACHE_SIZE];
+};
+
+struct ehca_sma_attr {
+ u16 lid, lmc, sm_sl, sm_lid;
+ u16 pkey_tbl_len, pkeys[16];
+};
+
+struct ehca_sport {
+ struct ib_cq *ibcq_aqp1;
+ struct ib_qp *ibqp_sqp[2];
+ /* lock to serialze modify_qp() calls for sqp in normal
+ * and irq path (when event PORT_ACTIVE is received first time)
+ */
+ spinlock_t mod_sqp_lock;
+ enum ib_port_state port_state;
+ struct ehca_sma_attr saved_attr;
+ u32 pma_qp_nr;
+};
+
+#define HCA_CAP_MR_PGSIZE_4K 0x80000000
+#define HCA_CAP_MR_PGSIZE_64K 0x40000000
+#define HCA_CAP_MR_PGSIZE_1M 0x20000000
+#define HCA_CAP_MR_PGSIZE_16M 0x10000000
+
+struct ehca_shca {
+ struct ib_device ib_device;
+ struct of_device *ofdev;
+ u8 num_ports;
+ int hw_level;
+ struct list_head shca_list;
+ struct ipz_adapter_handle ipz_hca_handle;
+ struct ehca_sport sport[2];
+ struct ehca_eq eq;
+ struct ehca_eq neq;
+ struct ehca_mr *maxmr;
+ struct ehca_pd *pd;
+ struct h_galpas galpas;
+ struct mutex modify_mutex;
+ u64 hca_cap;
+ /* MR pgsize: bit 0-3 means 4K, 64K, 1M, 16M respectively */
+ u32 hca_cap_mr_pgsize;
+ int max_mtu;
+ int max_num_qps;
+ int max_num_cqs;
+ atomic_t num_cqs;
+ atomic_t num_qps;
+};
+
+struct ehca_pd {
+ struct ib_pd ib_pd;
+ struct ipz_pd fw_pd;
+ /* small queue mgmt */
+ struct mutex lock;
+ struct list_head free[2];
+ struct list_head full[2];
+};
+
+enum ehca_ext_qp_type {
+ EQPT_NORMAL = 0,
+ EQPT_LLQP = 1,
+ EQPT_SRQBASE = 2,
+ EQPT_SRQ = 3,
+};
+
+/* struct to cache modify_qp()'s parms for GSI/SMI qp */
+struct ehca_mod_qp_parm {
+ int mask;
+ struct ib_qp_attr attr;
+};
+
+#define EHCA_MOD_QP_PARM_MAX 4
+
+#define QMAP_IDX_MASK 0xFFFFULL
+
+/* struct for tracking if cqes have been reported to the application */
+struct ehca_qmap_entry {
+ u16 app_wr_id;
+ u8 reported;
+ u8 cqe_req;
+};
+
+struct ehca_queue_map {
+ struct ehca_qmap_entry *map;
+ unsigned int entries;
+ unsigned int tail;
+ unsigned int left_to_poll;
+ unsigned int next_wqe_idx; /* Idx to first wqe to be flushed */
+};
+
+struct ehca_qp {
+ union {
+ struct ib_qp ib_qp;
+ struct ib_srq ib_srq;
+ };
+ u32 qp_type;
+ enum ehca_ext_qp_type ext_type;
+ enum ib_qp_state state;
+ struct ipz_queue ipz_squeue;
+ struct ehca_queue_map sq_map;
+ struct ipz_queue ipz_rqueue;
+ struct ehca_queue_map rq_map;
+ struct h_galpas galpas;
+ u32 qkey;
+ u32 real_qp_num;
+ u32 token;
+ spinlock_t spinlock_s;
+ spinlock_t spinlock_r;
+ u32 sq_max_inline_data_size;
+ struct ipz_qp_handle ipz_qp_handle;
+ struct ehca_pfqp pf;
+ struct ib_qp_init_attr init_attr;
+ struct ehca_cq *send_cq;
+ struct ehca_cq *recv_cq;
+ unsigned int sqerr_purgeflag;
+ struct hlist_node list_entries;
+ /* array to cache modify_qp()'s parms for GSI/SMI qp */
+ struct ehca_mod_qp_parm *mod_qp_parm;
+ int mod_qp_parm_idx;
+ /* mmap counter for resources mapped into user space */
+ u32 mm_count_squeue;
+ u32 mm_count_rqueue;
+ u32 mm_count_galpa;
+ /* unsolicited ack circumvention */
+ int unsol_ack_circ;
+ int mtu_shift;
+ u32 message_count;
+ u32 packet_count;
+ atomic_t nr_events; /* events seen */
+ wait_queue_head_t wait_completion;
+ int mig_armed;
+ struct list_head sq_err_node;
+ struct list_head rq_err_node;
+};
+
+#define IS_SRQ(qp) (qp->ext_type == EQPT_SRQ)
+#define HAS_SQ(qp) (qp->ext_type != EQPT_SRQ)
+#define HAS_RQ(qp) (qp->ext_type != EQPT_SRQBASE)
+
+/* must be power of 2 */
+#define QP_HASHTAB_LEN 8
+
+struct ehca_cq {
+ struct ib_cq ib_cq;
+ struct ipz_queue ipz_queue;
+ struct h_galpas galpas;
+ spinlock_t spinlock;
+ u32 cq_number;
+ u32 token;
+ u32 nr_of_entries;
+ struct ipz_cq_handle ipz_cq_handle;
+ struct ehca_pfcq pf;
+ spinlock_t cb_lock;
+ struct hlist_head qp_hashtab[QP_HASHTAB_LEN];
+ struct list_head entry;
+ u32 nr_callbacks; /* #events assigned to cpu by scaling code */
+ atomic_t nr_events; /* #events seen */
+ wait_queue_head_t wait_completion;
+ spinlock_t task_lock;
+ /* mmap counter for resources mapped into user space */
+ u32 mm_count_queue;
+ u32 mm_count_galpa;
+ struct list_head sqp_err_list;
+ struct list_head rqp_err_list;
+};
+
+enum ehca_mr_flag {
+ EHCA_MR_FLAG_FMR = 0x80000000, /* FMR, created with ehca_alloc_fmr */
+ EHCA_MR_FLAG_MAXMR = 0x40000000, /* max-MR */
+};
+
+struct ehca_mr {
+ union {
+ struct ib_mr ib_mr; /* must always be first in ehca_mr */
+ struct ib_fmr ib_fmr; /* must always be first in ehca_mr */
+ } ib;
+ struct ib_umem *umem;
+ spinlock_t mrlock;
+
+ enum ehca_mr_flag flags;
+ u32 num_kpages; /* number of kernel pages */
+ u32 num_hwpages; /* number of hw pages to form MR */
+ u64 hwpage_size; /* hw page size used for this MR */
+ int acl; /* ACL (stored here for usage in reregister) */
+ u64 *start; /* virtual start address (stored here for */
+ /* usage in reregister) */
+ u64 size; /* size (stored here for usage in reregister) */
+ u32 fmr_page_size; /* page size for FMR */
+ u32 fmr_max_pages; /* max pages for FMR */
+ u32 fmr_max_maps; /* max outstanding maps for FMR */
+ u32 fmr_map_cnt; /* map counter for FMR */
+ /* fw specific data */
+ struct ipz_mrmw_handle ipz_mr_handle; /* MR handle for h-calls */
+ struct h_galpas galpas;
+};
+
+struct ehca_mw {
+ struct ib_mw ib_mw; /* gen2 mw, must always be first in ehca_mw */
+ spinlock_t mwlock;
+
+ u8 never_bound; /* indication MW was never bound */
+ struct ipz_mrmw_handle ipz_mw_handle; /* MW handle for h-calls */
+ struct h_galpas galpas;
+};
+
+enum ehca_mr_pgi_type {
+ EHCA_MR_PGI_PHYS = 1, /* type of ehca_reg_phys_mr,
+ * ehca_rereg_phys_mr,
+ * ehca_reg_internal_maxmr */
+ EHCA_MR_PGI_USER = 2, /* type of ehca_reg_user_mr */
+ EHCA_MR_PGI_FMR = 3 /* type of ehca_map_phys_fmr */
+};
+
+struct ehca_mr_pginfo {
+ enum ehca_mr_pgi_type type;
+ u64 num_kpages;
+ u64 kpage_cnt;
+ u64 hwpage_size; /* hw page size used for this MR */
+ u64 num_hwpages; /* number of hw pages */
+ u64 hwpage_cnt; /* counter for hw pages */
+ u64 next_hwpage; /* next hw page in buffer/chunk/listelem */
+
+ union {
+ struct { /* type EHCA_MR_PGI_PHYS section */
+ int num_phys_buf;
+ struct ib_phys_buf *phys_buf_array;
+ u64 next_buf;
+ } phy;
+ struct { /* type EHCA_MR_PGI_USER section */
+ struct ib_umem *region;
+ struct ib_umem_chunk *next_chunk;
+ u64 next_nmap;
+ } usr;
+ struct { /* type EHCA_MR_PGI_FMR section */
+ u64 fmr_pgsize;
+ u64 *page_list;
+ u64 next_listelem;
+ } fmr;
+ } u;
+};
+
+/* output parameters for MR/FMR hipz calls */
+struct ehca_mr_hipzout_parms {
+ struct ipz_mrmw_handle handle;
+ u32 lkey;
+ u32 rkey;
+ u64 len;
+ u64 vaddr;
+ u32 acl;
+};
+
+/* output parameters for MW hipz calls */
+struct ehca_mw_hipzout_parms {
+ struct ipz_mrmw_handle handle;
+ u32 rkey;
+};
+
+struct ehca_av {
+ struct ib_ah ib_ah;
+ struct ehca_ud_av av;
+};
+
+struct ehca_ucontext {
+ struct ib_ucontext ib_ucontext;
+};
+
+int ehca_init_pd_cache(void);
+void ehca_cleanup_pd_cache(void);
+int ehca_init_cq_cache(void);
+void ehca_cleanup_cq_cache(void);
+int ehca_init_qp_cache(void);
+void ehca_cleanup_qp_cache(void);
+int ehca_init_av_cache(void);
+void ehca_cleanup_av_cache(void);
+int ehca_init_mrmw_cache(void);
+void ehca_cleanup_mrmw_cache(void);
+int ehca_init_small_qp_cache(void);
+void ehca_cleanup_small_qp_cache(void);
+
+extern rwlock_t ehca_qp_idr_lock;
+extern rwlock_t ehca_cq_idr_lock;
+extern struct idr ehca_qp_idr;
+extern struct idr ehca_cq_idr;
+
+extern int ehca_static_rate;
+extern int ehca_port_act_time;
+extern int ehca_use_hp_mr;
+extern int ehca_scaling_code;
+extern int ehca_lock_hcalls;
+extern int ehca_nr_ports;
+extern int ehca_max_cq;
+extern int ehca_max_qp;
+
+struct ipzu_queue_resp {
+ u32 qe_size; /* queue entry size */
+ u32 act_nr_of_sg;
+ u32 queue_length; /* queue length allocated in bytes */
+ u32 pagesize;
+ u32 toggle_state;
+ u32 offset; /* save offset within a page for small_qp */
+};
+
+struct ehca_create_cq_resp {
+ u32 cq_number;
+ u32 token;
+ struct ipzu_queue_resp ipz_queue;
+ u32 fw_handle_ofs;
+ u32 dummy;
+};
+
+struct ehca_create_qp_resp {
+ u32 qp_num;
+ u32 token;
+ u32 qp_type;
+ u32 ext_type;
+ u32 qkey;
+ /* qp_num assigned by ehca: sqp0/1 may have got different numbers */
+ u32 real_qp_num;
+ u32 fw_handle_ofs;
+ u32 dummy;
+ struct ipzu_queue_resp ipz_squeue;
+ struct ipzu_queue_resp ipz_rqueue;
+};
+
+struct ehca_alloc_cq_parms {
+ u32 nr_cqe;
+ u32 act_nr_of_entries;
+ u32 act_pages;
+ struct ipz_eq_handle eq_handle;
+};
+
+enum ehca_service_type {
+ ST_RC = 0,
+ ST_UC = 1,
+ ST_RD = 2,
+ ST_UD = 3,
+};
+
+enum ehca_ll_comp_flags {
+ LLQP_SEND_COMP = 0x20,
+ LLQP_RECV_COMP = 0x40,
+ LLQP_COMP_MASK = 0x60,
+};
+
+struct ehca_alloc_queue_parms {
+ /* input parameters */
+ int max_wr;
+ int max_sge;
+ int page_size;
+ int is_small;
+
+ /* output parameters */
+ u16 act_nr_wqes;
+ u8 act_nr_sges;
+ u32 queue_size; /* bytes for small queues, pages otherwise */
+};
+
+struct ehca_alloc_qp_parms {
+ struct ehca_alloc_queue_parms squeue;
+ struct ehca_alloc_queue_parms rqueue;
+
+ /* input parameters */
+ enum ehca_service_type servicetype;
+ int qp_storage;
+ int sigtype;
+ enum ehca_ext_qp_type ext_type;
+ enum ehca_ll_comp_flags ll_comp_flags;
+ int ud_av_l_key_ctl;
+
+ u32 token;
+ struct ipz_eq_handle eq_handle;
+ struct ipz_pd pd;
+ struct ipz_cq_handle send_cq_handle, recv_cq_handle;
+
+ u32 srq_qpn, srq_token, srq_limit;
+
+ /* output parameters */
+ u32 real_qp_num;
+ struct ipz_qp_handle qp_handle;
+ struct h_galpas galpas;
+};
+
+int ehca_cq_assign_qp(struct ehca_cq *cq, struct ehca_qp *qp);
+int ehca_cq_unassign_qp(struct ehca_cq *cq, unsigned int qp_num);
+struct ehca_qp *ehca_cq_get_qp(struct ehca_cq *cq, int qp_num);
+
+#endif
diff --git a/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h b/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h
new file mode 100644
index 0000000..1798e64
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h
@@ -0,0 +1,236 @@
+/*
+ * IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ * pSeries interface definitions
+ *
+ * Authors: Waleri Fomin <fomin@de.ibm.com>
+ * Christoph Raisch <raisch@de.ibm.com>
+ *
+ * Copyright (c) 2005 IBM Corporation
+ *
+ * All rights reserved.
+ *
+ * This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ * BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __EHCA_CLASSES_PSERIES_H__
+#define __EHCA_CLASSES_PSERIES_H__
+
+#include "hcp_phyp.h"
+#include "ipz_pt_fn.h"
+
+
+struct ehca_pfqp {
+ struct ipz_qpt sqpt;
+ struct ipz_qpt rqpt;
+};
+
+struct ehca_pfcq {
+ struct ipz_qpt qpt;
+ u32 cqnr;
+};
+
+struct ehca_pfeq {
+ struct ipz_qpt qpt;
+ struct h_galpa galpa;
+ u32 eqnr;
+};
+
+struct ipz_adapter_handle {
+ u64 handle;
+};
+
+struct ipz_cq_handle {
+ u64 handle;
+};
+
+struct ipz_eq_handle {
+ u64 handle;
+};
+
+struct ipz_qp_handle {
+ u64 handle;
+};
+struct ipz_mrmw_handle {
+ u64 handle;
+};
+
+struct ipz_pd {
+ u32 value;
+};
+
+struct hcp_modify_qp_control_block {
+ u32 qkey; /* 00 */
+ u32 rdd; /* reliable datagram domain */
+ u32 send_psn; /* 02 */
+ u32 receive_psn; /* 03 */
+ u32 prim_phys_port; /* 04 */
+ u32 alt_phys_port; /* 05 */
+ u32 prim_p_key_idx; /* 06 */
+ u32 alt_p_key_idx; /* 07 */
+ u32 rdma_atomic_ctrl; /* 08 */
+ u32 qp_state; /* 09 */
+ u32 reserved_10; /* 10 */
+ u32 rdma_nr_atomic_resp_res; /* 11 */
+ u32 path_migration_state; /* 12 */
+ u32 rdma_atomic_outst_dest_qp; /* 13 */
+ u32 dest_qp_nr; /* 14 */
+ u32 min_rnr_nak_timer_field; /* 15 */
+ u32 service_level; /* 16 */
+ u32 send_grh_flag; /* 17 */
+ u32 retry_count; /* 18 */
+ u32 timeout; /* 19 */
+ u32 path_mtu; /* 20 */
+ u32 max_static_rate; /* 21 */
+ u32 dlid; /* 22 */
+ u32 rnr_retry_count; /* 23 */
+ u32 source_path_bits; /* 24 */
+ u32 traffic_class; /* 25 */
+ u32 hop_limit; /* 26 */
+ u32 source_gid_idx; /* 27 */
+ u32 flow_label; /* 28 */
+ u32 reserved_29; /* 29 */
+ union { /* 30 */
+ u64 dw[2];
+ u8 byte[16];
+ } dest_gid;
+ u32 service_level_al; /* 34 */
+ u32 send_grh_flag_al; /* 35 */
+ u32 retry_count_al; /* 36 */
+ u32 timeout_al; /* 37 */
+ u32 max_static_rate_al; /* 38 */
+ u32 dlid_al; /* 39 */
+ u32 rnr_retry_count_al; /* 40 */
+ u32 source_path_bits_al; /* 41 */
+ u32 traffic_class_al; /* 42 */
+ u32 hop_limit_al; /* 43 */
+ u32 source_gid_idx_al; /* 44 */
+ u32 flow_label_al; /* 45 */
+ u32 reserved_46; /* 46 */
+ u32 reserved_47; /* 47 */
+ union { /* 48 */
+ u64 dw[2];
+ u8 byte[16];
+ } dest_gid_al;
+ u32 max_nr_outst_send_wr; /* 52 */
+ u32 max_nr_outst_recv_wr; /* 53 */
+ u32 disable_ete_credit_check; /* 54 */
+ u32 qp_number; /* 55 */
+ u64 send_queue_handle; /* 56 */
+ u64 recv_queue_handle; /* 58 */
+ u32 actual_nr_sges_in_sq_wqe; /* 60 */
+ u32 actual_nr_sges_in_rq_wqe; /* 61 */
+ u32 qp_enable; /* 62 */
+ u32 curr_srq_limit; /* 63 */
+ u64 qp_aff_asyn_ev_log_reg; /* 64 */
+ u64 shared_rq_hndl; /* 66 */
+ u64 trigg_doorbell_qp_hndl; /* 68 */
+ u32 reserved_70_127[58]; /* 70 */
+};
+
+#define MQPCB_MASK_QKEY EHCA_BMASK_IBM( 0, 0)
+#define MQPCB_MASK_SEND_PSN EHCA_BMASK_IBM( 2, 2)
+#define MQPCB_MASK_RECEIVE_PSN EHCA_BMASK_IBM( 3, 3)
+#define MQPCB_MASK_PRIM_PHYS_PORT EHCA_BMASK_IBM( 4, 4)
+#define MQPCB_PRIM_PHYS_PORT EHCA_BMASK_IBM(24, 31)
+#define MQPCB_MASK_ALT_PHYS_PORT EHCA_BMASK_IBM( 5, 5)
+#define MQPCB_MASK_PRIM_P_KEY_IDX EHCA_BMASK_IBM( 6, 6)
+#define MQPCB_PRIM_P_KEY_IDX EHCA_BMASK_IBM(24, 31)
+#define MQPCB_MASK_ALT_P_KEY_IDX EHCA_BMASK_IBM( 7, 7)
+#define MQPCB_MASK_RDMA_ATOMIC_CTRL EHCA_BMASK_IBM( 8, 8)
+#define MQPCB_MASK_QP_STATE EHCA_BMASK_IBM( 9, 9)
+#define MQPCB_QP_STATE EHCA_BMASK_IBM(24, 31)
+#define MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES EHCA_BMASK_IBM(11, 11)
+#define MQPCB_MASK_PATH_MIGRATION_STATE EHCA_BMASK_IBM(12, 12)
+#define MQPCB_MASK_RDMA_ATOMIC_OUTST_DEST_QP EHCA_BMASK_IBM(13, 13)
+#define MQPCB_MASK_DEST_QP_NR EHCA_BMASK_IBM(14, 14)
+#define MQPCB_MASK_MIN_RNR_NAK_TIMER_FIELD EHCA_BMASK_IBM(15, 15)
+#define MQPCB_MASK_SERVICE_LEVEL EHCA_BMASK_IBM(16, 16)
+#define MQPCB_MASK_SEND_GRH_FLAG EHCA_BMASK_IBM(17, 17)
+#define MQPCB_MASK_RETRY_COUNT EHCA_BMASK_IBM(18, 18)
+#define MQPCB_MASK_TIMEOUT EHCA_BMASK_IBM(19, 19)
+#define MQPCB_MASK_PATH_MTU EHCA_BMASK_IBM(20, 20)
+#define MQPCB_PATH_MTU EHCA_BMASK_IBM(24, 31)
+#define MQPCB_MASK_MAX_STATIC_RATE EHCA_BMASK_IBM(21, 21)
+#define MQPCB_MAX_STATIC_RATE EHCA_BMASK_IBM(24, 31)
+#define MQPCB_MASK_DLID EHCA_BMASK_IBM(22, 22)
+#define MQPCB_DLID EHCA_BMASK_IBM(16, 31)
+#define MQPCB_MASK_RNR_RETRY_COUNT EHCA_BMASK_IBM(23, 23)
+#define MQPCB_RNR_RETRY_COUNT EHCA_BMASK_IBM(29, 31)
+#define MQPCB_MASK_SOURCE_PATH_BITS EHCA_BMASK_IBM(24, 24)
+#define MQPCB_SOURCE_PATH_BITS EHCA_BMASK_IBM(25, 31)
+#define MQPCB_MASK_TRAFFIC_CLASS EHCA_BMASK_IBM(25, 25)
+#define MQPCB_TRAFFIC_CLASS EHCA_BMASK_IBM(24, 31)
+#define MQPCB_MASK_HOP_LIMIT EHCA_BMASK_IBM(26, 26)
+#define MQPCB_HOP_LIMIT EHCA_BMASK_IBM(24, 31)
+#define MQPCB_MASK_SOURCE_GID_IDX EHCA_BMASK_IBM(27, 27)
+#define MQPCB_SOURCE_GID_IDX EHCA_BMASK_IBM(24, 31)
+#define MQPCB_MASK_FLOW_LABEL EHCA_BMASK_IBM(28, 28)
+#define MQPCB_FLOW_LABEL EHCA_BMASK_IBM(12, 31)
+#define MQPCB_MASK_DEST_GID EHCA_BMASK_IBM(30, 30)
+#define MQPCB_MASK_SERVICE_LEVEL_AL EHCA_BMASK_IBM(31, 31)
+#define MQPCB_SERVICE_LEVEL_AL EHCA_BMASK_IBM(28, 31)
+#define MQPCB_MASK_SEND_GRH_FLAG_AL EHCA_BMASK_IBM(32, 32)
+#define MQPCB_SEND_GRH_FLAG_AL EHCA_BMASK_IBM(31, 31)
+#define MQPCB_MASK_RETRY_COUNT_AL EHCA_BMASK_IBM(33, 33)
+#define MQPCB_RETRY_COUNT_AL EHCA_BMASK_IBM(29, 31)
+#define MQPCB_MASK_TIMEOUT_AL EHCA_BMASK_IBM(34, 34)
+#define MQPCB_TIMEOUT_AL EHCA_BMASK_IBM(27, 31)
+#define MQPCB_MASK_MAX_STATIC_RATE_AL EHCA_BMASK_IBM(35, 35)
+#define MQPCB_MAX_STATIC_RATE_AL EHCA_BMASK_IBM(24, 31)
+#define MQPCB_MASK_DLID_AL EHCA_BMASK_IBM(36, 36)
+#define MQPCB_DLID_AL EHCA_BMASK_IBM(16, 31)
+#define MQPCB_MASK_RNR_RETRY_COUNT_AL EHCA_BMASK_IBM(37, 37)
+#define MQPCB_RNR_RETRY_COUNT_AL EHCA_BMASK_IBM(29, 31)
+#define MQPCB_MASK_SOURCE_PATH_BITS_AL EHCA_BMASK_IBM(38, 38)
+#define MQPCB_SOURCE_PATH_BITS_AL EHCA_BMASK_IBM(25, 31)
+#define MQPCB_MASK_TRAFFIC_CLASS_AL EHCA_BMASK_IBM(39, 39)
+#define MQPCB_TRAFFIC_CLASS_AL EHCA_BMASK_IBM(24, 31)
+#define MQPCB_MASK_HOP_LIMIT_AL EHCA_BMASK_IBM(40, 40)
+#define MQPCB_HOP_LIMIT_AL EHCA_BMASK_IBM(24, 31)
+#define MQPCB_MASK_SOURCE_GID_IDX_AL EHCA_BMASK_IBM(41, 41)
+#define MQPCB_SOURCE_GID_IDX_AL EHCA_BMASK_IBM(24, 31)
+#define MQPCB_MASK_FLOW_LABEL_AL EHCA_BMASK_IBM(42, 42)
+#define MQPCB_FLOW_LABEL_AL EHCA_BMASK_IBM(12, 31)
+#define MQPCB_MASK_DEST_GID_AL EHCA_BMASK_IBM(44, 44)
+#define MQPCB_MASK_MAX_NR_OUTST_SEND_WR EHCA_BMASK_IBM(45, 45)
+#define MQPCB_MAX_NR_OUTST_SEND_WR EHCA_BMASK_IBM(16, 31)
+#define MQPCB_MASK_MAX_NR_OUTST_RECV_WR EHCA_BMASK_IBM(46, 46)
+#define MQPCB_MAX_NR_OUTST_RECV_WR EHCA_BMASK_IBM(16, 31)
+#define MQPCB_MASK_DISABLE_ETE_CREDIT_CHECK EHCA_BMASK_IBM(47, 47)
+#define MQPCB_DISABLE_ETE_CREDIT_CHECK EHCA_BMASK_IBM(31, 31)
+#define MQPCB_QP_NUMBER EHCA_BMASK_IBM( 8, 31)
+#define MQPCB_MASK_QP_ENABLE EHCA_BMASK_IBM(48, 48)
+#define MQPCB_QP_ENABLE EHCA_BMASK_IBM(31, 31)
+#define MQPCB_MASK_CURR_SRQ_LIMIT EHCA_BMASK_IBM(49, 49)
+#define MQPCB_CURR_SRQ_LIMIT EHCA_BMASK_IBM(16, 31)
+#define MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG EHCA_BMASK_IBM(50, 50)
+#define MQPCB_MASK_SHARED_RQ_HNDL EHCA_BMASK_IBM(51, 51)
+
+#endif /* __EHCA_CLASSES_PSERIES_H__ */
diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c
new file mode 100644
index 0000000..2f4c28a
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_cq.c
@@ -0,0 +1,402 @@
+/*
+ * IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ * Completion queue handling
+ *
+ * Authors: Waleri Fomin <fomin@de.ibm.com>
+ * Khadija Souissi <souissi@de.ibm.com>
+ * Reinhard Ernst <rernst@de.ibm.com>
+ * Heiko J Schick <schickhj@de.ibm.com>
+ * Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *
+ *
+ * Copyright (c) 2005 IBM Corporation
+ *
+ * All rights reserved.
+ *
+ * This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ * BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "ehca_iverbs.h"
+#include "ehca_classes.h"
+#include "ehca_irq.h"
+#include "hcp_if.h"
+
+static struct kmem_cache *cq_cache;
+
+int ehca_cq_assign_qp(struct ehca_cq *cq, struct ehca_qp *qp)
+{
+ unsigned int qp_num = qp->real_qp_num;
+ unsigned int key = qp_num & (QP_HASHTAB_LEN-1);
+ unsigned long flags;
+
+ spin_lock_irqsave(&cq->spinlock, flags);
+ hlist_add_head(&qp->list_entries, &cq->qp_hashtab[key]);
+ spin_unlock_irqrestore(&cq->spinlock, flags);
+
+ ehca_dbg(cq->ib_cq.device, "cq_num=%x real_qp_num=%x",
+ cq->cq_number, qp_num);
+
+ return 0;
+}
+
+int ehca_cq_unassign_qp(struct ehca_cq *cq, unsigned int real_qp_num)
+{
+ int ret = -EINVAL;
+ unsigned int key = real_qp_num & (QP_HASHTAB_LEN-1);
+ struct hlist_node *iter;
+ struct ehca_qp *qp;
+ unsigned long flags;
+
+ spin_lock_irqsave(&cq->spinlock, flags);
+ hlist_for_each(iter, &cq->qp_hashtab[key]) {
+ qp = hlist_entry(iter, struct ehca_qp, list_entries);
+ if (qp->real_qp_num == real_qp_num) {
+ hlist_del(iter);
+ ehca_dbg(cq->ib_cq.device,
+ "removed qp from cq .cq_num=%x real_qp_num=%x",
+ cq->cq_number, real_qp_num);
+ ret = 0;
+ break;
+ }
+ }
+ spin_unlock_irqrestore(&cq->spinlock, flags);
+ if (ret)
+ ehca_err(cq->ib_cq.device,
+ "qp not found cq_num=%x real_qp_num=%x",
+ cq->cq_number, real_qp_num);
+
+ return ret;
+}
+
+struct ehca_qp *ehca_cq_get_qp(struct ehca_cq *cq, int real_qp_num)
+{
+ struct ehca_qp *ret = NULL;
+ unsigned int key = real_qp_num & (QP_HASHTAB_LEN-1);
+ struct hlist_node *iter;
+ struct ehca_qp *qp;
+ hlist_for_each(iter, &cq->qp_hashtab[key]) {
+ qp = hlist_entry(iter, struct ehca_qp, list_entries);
+ if (qp->real_qp_num == real_qp_num) {
+ ret = qp;
+ break;
+ }
+ }
+ return ret;
+}
+
+struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector,
+ struct ib_ucontext *context,
+ struct ib_udata *udata)
+{
+ static const u32 additional_cqe = 20;
+ struct ib_cq *cq;
+ struct ehca_cq *my_cq;
+ struct ehca_shca *shca =
+ container_of(device, struct ehca_shca, ib_device);
+ struct ipz_adapter_handle adapter_handle;
+ struct ehca_alloc_cq_parms param; /* h_call's out parameters */
+ struct h_galpa gal;
+ void *vpage;
+ u32 counter;
+ u64 rpage, cqx_fec, h_ret;
+ int ipz_rc, ret, i;
+ unsigned long flags;
+
+ if (cqe >= 0xFFFFFFFF - 64 - additional_cqe)
+ return ERR_PTR(-EINVAL);
+
+ if (!atomic_add_unless(&shca->num_cqs, 1, shca->max_num_cqs)) {
+ ehca_err(device, "Unable to create CQ, max number of %i "
+ "CQs reached.", shca->max_num_cqs);
+ ehca_err(device, "To increase the maximum number of CQs "
+ "use the number_of_cqs module parameter.\n");
+ return ERR_PTR(-ENOSPC);
+ }
+
+ my_cq = kmem_cache_zalloc(cq_cache, GFP_KERNEL);
+ if (!my_cq) {
+ ehca_err(device, "Out of memory for ehca_cq struct device=%p",
+ device);
+ atomic_dec(&shca->num_cqs);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ memset(&param, 0, sizeof(struct ehca_alloc_cq_parms));
+
+ spin_lock_init(&my_cq->spinlock);
+ spin_lock_init(&my_cq->cb_lock);
+ spin_lock_init(&my_cq->task_lock);
+ atomic_set(&my_cq->nr_events, 0);
+ init_waitqueue_head(&my_cq->wait_completion);
+
+ cq = &my_cq->ib_cq;
+
+ adapter_handle = shca->ipz_hca_handle;
+ param.eq_handle = shca->eq.ipz_eq_handle;
+
+ do {
+ if (!idr_pre_get(&ehca_cq_idr, GFP_KERNEL)) {
+ cq = ERR_PTR(-ENOMEM);
+ ehca_err(device, "Can't reserve idr nr. device=%p",
+ device);
+ goto create_cq_exit1;
+ }
+
+ write_lock_irqsave(&ehca_cq_idr_lock, flags);
+ ret = idr_get_new(&ehca_cq_idr, my_cq, &my_cq->token);
+ write_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+ } while (ret == -EAGAIN);
+
+ if (ret) {
+ cq = ERR_PTR(-ENOMEM);
+ ehca_err(device, "Can't allocate new idr entry. device=%p",
+ device);
+ goto create_cq_exit1;
+ }
+
+ if (my_cq->token > 0x1FFFFFF) {
+ cq = ERR_PTR(-ENOMEM);
+ ehca_err(device, "Invalid number of cq. device=%p", device);
+ goto create_cq_exit2;
+ }
+
+ /*
+ * CQs maximum depth is 4GB-64, but we need additional 20 as buffer
+ * for receiving errors CQEs.
+ */
+ param.nr_cqe = cqe + additional_cqe;
+ h_ret = hipz_h_alloc_resource_cq(adapter_handle, my_cq, &param);
+
+ if (h_ret != H_SUCCESS) {
+ ehca_err(device, "hipz_h_alloc_resource_cq() failed "
+ "h_ret=%li device=%p", h_ret, device);
+ cq = ERR_PTR(ehca2ib_return_code(h_ret));
+ goto create_cq_exit2;
+ }
+
+ ipz_rc = ipz_queue_ctor(NULL, &my_cq->ipz_queue, param.act_pages,
+ EHCA_PAGESIZE, sizeof(struct ehca_cqe), 0, 0);
+ if (!ipz_rc) {
+ ehca_err(device, "ipz_queue_ctor() failed ipz_rc=%i device=%p",
+ ipz_rc, device);
+ cq = ERR_PTR(-EINVAL);
+ goto create_cq_exit3;
+ }
+
+ for (counter = 0; counter < param.act_pages; counter++) {
+ vpage = ipz_qpageit_get_inc(&my_cq->ipz_queue);
+ if (!vpage) {
+ ehca_err(device, "ipz_qpageit_get_inc() "
+ "returns NULL device=%p", device);
+ cq = ERR_PTR(-EAGAIN);
+ goto create_cq_exit4;
+ }
+ rpage = virt_to_abs(vpage);
+
+ h_ret = hipz_h_register_rpage_cq(adapter_handle,
+ my_cq->ipz_cq_handle,
+ &my_cq->pf,
+ 0,
+ 0,
+ rpage,
+ 1,
+ my_cq->galpas.
+ kernel);
+
+ if (h_ret < H_SUCCESS) {
+ ehca_err(device, "hipz_h_register_rpage_cq() failed "
+ "ehca_cq=%p cq_num=%x h_ret=%li counter=%i "
+ "act_pages=%i", my_cq, my_cq->cq_number,
+ h_ret, counter, param.act_pages);
+ cq = ERR_PTR(-EINVAL);
+ goto create_cq_exit4;
+ }
+
+ if (counter == (param.act_pages - 1)) {
+ vpage = ipz_qpageit_get_inc(&my_cq->ipz_queue);
+ if ((h_ret != H_SUCCESS) || vpage) {
+ ehca_err(device, "Registration of pages not "
+ "complete ehca_cq=%p cq_num=%x "
+ "h_ret=%li", my_cq, my_cq->cq_number,
+ h_ret);
+ cq = ERR_PTR(-EAGAIN);
+ goto create_cq_exit4;
+ }
+ } else {
+ if (h_ret != H_PAGE_REGISTERED) {
+ ehca_err(device, "Registration of page failed "
+ "ehca_cq=%p cq_num=%x h_ret=%li "
+ "counter=%i act_pages=%i",
+ my_cq, my_cq->cq_number,
+ h_ret, counter, param.act_pages);
+ cq = ERR_PTR(-ENOMEM);
+ goto create_cq_exit4;
+ }
+ }
+ }
+
+ ipz_qeit_reset(&my_cq->ipz_queue);
+
+ gal = my_cq->galpas.kernel;
+ cqx_fec = hipz_galpa_load(gal, CQTEMM_OFFSET(cqx_fec));
+ ehca_dbg(device, "ehca_cq=%p cq_num=%x CQX_FEC=%lx",
+ my_cq, my_cq->cq_number, cqx_fec);
+
+ my_cq->ib_cq.cqe = my_cq->nr_of_entries =
+ param.act_nr_of_entries - additional_cqe;
+ my_cq->cq_number = (my_cq->ipz_cq_handle.handle) & 0xffff;
+
+ for (i = 0; i < QP_HASHTAB_LEN; i++)
+ INIT_HLIST_HEAD(&my_cq->qp_hashtab[i]);
+
+ INIT_LIST_HEAD(&my_cq->sqp_err_list);
+ INIT_LIST_HEAD(&my_cq->rqp_err_list);
+
+ if (context) {
+ struct ipz_queue *ipz_queue = &my_cq->ipz_queue;
+ struct ehca_create_cq_resp resp;
+ memset(&resp, 0, sizeof(resp));
+ resp.cq_number = my_cq->cq_number;
+ resp.token = my_cq->token;
+ resp.ipz_queue.qe_size = ipz_queue->qe_size;
+ resp.ipz_queue.act_nr_of_sg = ipz_queue->act_nr_of_sg;
+ resp.ipz_queue.queue_length = ipz_queue->queue_length;
+ resp.ipz_queue.pagesize = ipz_queue->pagesize;
+ resp.ipz_queue.toggle_state = ipz_queue->toggle_state;
+ resp.fw_handle_ofs = (u32)
+ (my_cq->galpas.user.fw_handle & (PAGE_SIZE - 1));
+ if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
+ ehca_err(device, "Copy to udata failed.");
+ goto create_cq_exit4;
+ }
+ }
+
+ return cq;
+
+create_cq_exit4:
+ ipz_queue_dtor(NULL, &my_cq->ipz_queue);
+
+create_cq_exit3:
+ h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 1);
+ if (h_ret != H_SUCCESS)
+ ehca_err(device, "hipz_h_destroy_cq() failed ehca_cq=%p "
+ "cq_num=%x h_ret=%li", my_cq, my_cq->cq_number, h_ret);
+
+create_cq_exit2:
+ write_lock_irqsave(&ehca_cq_idr_lock, flags);
+ idr_remove(&ehca_cq_idr, my_cq->token);
+ write_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+
+create_cq_exit1:
+ kmem_cache_free(cq_cache, my_cq);
+
+ atomic_dec(&shca->num_cqs);
+ return cq;
+}
+
+int ehca_destroy_cq(struct ib_cq *cq)
+{
+ u64 h_ret;
+ struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
+ int cq_num = my_cq->cq_number;
+ struct ib_device *device = cq->device;
+ struct ehca_shca *shca = container_of(device, struct ehca_shca,
+ ib_device);
+ struct ipz_adapter_handle adapter_handle = shca->ipz_hca_handle;
+ unsigned long flags;
+
+ if (cq->uobject) {
+ if (my_cq->mm_count_galpa || my_cq->mm_count_queue) {
+ ehca_err(device, "Resources still referenced in "
+ "user space cq_num=%x", my_cq->cq_number);
+ return -EINVAL;
+ }
+ }
+
+ /*
+ * remove the CQ from the idr first to make sure
+ * no more interrupt tasklets will touch this CQ
+ */
+ write_lock_irqsave(&ehca_cq_idr_lock, flags);
+ idr_remove(&ehca_cq_idr, my_cq->token);
+ write_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+
+ /* now wait until all pending events have completed */
+ wait_event(my_cq->wait_completion, !atomic_read(&my_cq->nr_events));
+
+ /* nobody's using our CQ any longer -- we can destroy it */
+ h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 0);
+ if (h_ret == H_R_STATE) {
+ /* cq in err: read err data and destroy it forcibly */
+ ehca_dbg(device, "ehca_cq=%p cq_num=%x ressource=%lx in err "
+ "state. Try to delete it forcibly.",
+ my_cq, cq_num, my_cq->ipz_cq_handle.handle);
+ ehca_error_data(shca, my_cq, my_cq->ipz_cq_handle.handle);
+ h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 1);
+ if (h_ret == H_SUCCESS)
+ ehca_dbg(device, "cq_num=%x deleted successfully.",
+ cq_num);
+ }
+ if (h_ret != H_SUCCESS) {
+ ehca_err(device, "hipz_h_destroy_cq() failed h_ret=%li "
+ "ehca_cq=%p cq_num=%x", h_ret, my_cq, cq_num);
+ return ehca2ib_return_code(h_ret);
+ }
+ ipz_queue_dtor(NULL, &my_cq->ipz_queue);
+ kmem_cache_free(cq_cache, my_cq);
+
+ atomic_dec(&shca->num_cqs);
+ return 0;
+}
+
+int ehca_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata)
+{
+ /* TODO: proper resize needs to be done */
+ ehca_err(cq->device, "not implemented yet");
+
+ return -EFAULT;
+}
+
+int ehca_init_cq_cache(void)
+{
+ cq_cache = kmem_cache_create("ehca_cache_cq",
+ sizeof(struct ehca_cq), 0,
+ SLAB_HWCACHE_ALIGN,
+ NULL);
+ if (!cq_cache)
+ return -ENOMEM;
+ return 0;
+}
+
+void ehca_cleanup_cq_cache(void)
+{
+ if (cq_cache)
+ kmem_cache_destroy(cq_cache);
+}
diff --git a/drivers/infiniband/hw/ehca/ehca_eq.c b/drivers/infiniband/hw/ehca/ehca_eq.c
new file mode 100644
index 0000000..49660df
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_eq.c
@@ -0,0 +1,186 @@
+/*
+ * IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ * Event queue handling
+ *
+ * Authors: Waleri Fomin <fomin@de.ibm.com>
+ * Khadija Souissi <souissi@de.ibm.com>
+ * Reinhard Ernst <rernst@de.ibm.com>
+ * Heiko J Schick <schickhj@de.ibm.com>
+ * Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *
+ *
+ * Copyright (c) 2005 IBM Corporation
+ *
+ * All rights reserved.
+ *
+ * This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ * BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "ehca_classes.h"
+#include "ehca_irq.h"
+#include "ehca_iverbs.h"
+#include "ehca_qes.h"
+#include "hcp_if.h"
+#include "ipz_pt_fn.h"
+
+int ehca_create_eq(struct ehca_shca *shca,
+ struct ehca_eq *eq,
+ const enum ehca_eq_type type, const u32 length)
+{
+ int ret;
+ u64 h_ret;
+ u32 nr_pages;
+ u32 i;
+ void *vpage;
+ struct ib_device *ib_dev = &shca->ib_device;
+
+ spin_lock_init(&eq->spinlock);
+ spin_lock_init(&eq->irq_spinlock);
+ eq->is_initialized = 0;
+
+ if (type != EHCA_EQ && type != EHCA_NEQ) {
+ ehca_err(ib_dev, "Invalid EQ type %x. eq=%p", type, eq);
+ return -EINVAL;
+ }
+ if (!length) {
+ ehca_err(ib_dev, "EQ length must not be zero. eq=%p", eq);
+ return -EINVAL;
+ }
+
+ h_ret = hipz_h_alloc_resource_eq(shca->ipz_hca_handle,
+ &eq->pf,
+ type,
+ length,
+ &eq->ipz_eq_handle,
+ &eq->length,
+ &nr_pages, &eq->ist);
+
+ if (h_ret != H_SUCCESS) {
+ ehca_err(ib_dev, "Can't allocate EQ/NEQ. eq=%p", eq);
+ return -EINVAL;
+ }
+
+ ret = ipz_queue_ctor(NULL, &eq->ipz_queue, nr_pages,
+ EHCA_PAGESIZE, sizeof(struct ehca_eqe), 0, 0);
+ if (!ret) {
+ ehca_err(ib_dev, "Can't allocate EQ pages eq=%p", eq);
+ goto create_eq_exit1;
+ }
+
+ for (i = 0; i < nr_pages; i++) {
+ u64 rpage;
+
+ vpage = ipz_qpageit_get_inc(&eq->ipz_queue);
+ if (!vpage)
+ goto create_eq_exit2;
+
+ rpage = virt_to_abs(vpage);
+ h_ret = hipz_h_register_rpage_eq(shca->ipz_hca_handle,
+ eq->ipz_eq_handle,
+ &eq->pf,
+ 0, 0, rpage, 1);
+
+ if (i == (nr_pages - 1)) {
+ /* last page */
+ vpage = ipz_qpageit_get_inc(&eq->ipz_queue);
+ if (h_ret != H_SUCCESS || vpage)
+ goto create_eq_exit2;
+ } else {
+ if (h_ret != H_PAGE_REGISTERED || !vpage)
+ goto create_eq_exit2;
+ }
+ }
+
+ ipz_qeit_reset(&eq->ipz_queue);
+
+ /* register interrupt handlers and initialize work queues */
+ if (type == EHCA_EQ) {
+ ret = ibmebus_request_irq(eq->ist, ehca_interrupt_eq,
+ IRQF_DISABLED, "ehca_eq",
+ (void *)shca);
+ if (ret < 0)
+ ehca_err(ib_dev, "Can't map interrupt handler.");
+
+ tasklet_init(&eq->interrupt_task, ehca_tasklet_eq, (long)shca);
+ } else if (type == EHCA_NEQ) {
+ ret = ibmebus_request_irq(eq->ist, ehca_interrupt_neq,
+ IRQF_DISABLED, "ehca_neq",
+ (void *)shca);
+ if (ret < 0)
+ ehca_err(ib_dev, "Can't map interrupt handler.");
+
+ tasklet_init(&eq->interrupt_task, ehca_tasklet_neq, (long)shca);
+ }
+
+ eq->is_initialized = 1;
+
+ return 0;
+
+create_eq_exit2:
+ ipz_queue_dtor(NULL, &eq->ipz_queue);
+
+create_eq_exit1:
+ hipz_h_destroy_eq(shca->ipz_hca_handle, eq);
+
+ return -EINVAL;
+}
+
+void *ehca_poll_eq(struct ehca_shca *shca, struct ehca_eq *eq)
+{
+ unsigned long flags;
+ void *eqe;
+
+ spin_lock_irqsave(&eq->spinlock, flags);
+ eqe = ipz_eqit_eq_get_inc_valid(&eq->ipz_queue);
+ spin_unlock_irqrestore(&eq->spinlock, flags);
+
+ return eqe;
+}
+
+int ehca_destroy_eq(struct ehca_shca *shca, struct ehca_eq *eq)
+{
+ unsigned long flags;
+ u64 h_ret;
+
+ spin_lock_irqsave(&eq->spinlock, flags);
+ ibmebus_free_irq(eq->ist, (void *)shca);
+
+ h_ret = hipz_h_destroy_eq(shca->ipz_hca_handle, eq);
+
+ spin_unlock_irqrestore(&eq->spinlock, flags);
+
+ if (h_ret != H_SUCCESS) {
+ ehca_err(&shca->ib_device, "Can't free EQ resources.");
+ return -EINVAL;
+ }
+ ipz_queue_dtor(NULL, &eq->ipz_queue);
+
+ return 0;
+}
diff --git a/drivers/infiniband/hw/ehca/ehca_hca.c b/drivers/infiniband/hw/ehca/ehca_hca.c
new file mode 100644
index 0000000..4628822
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_hca.c
@@ -0,0 +1,408 @@
+/*
+ * IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ * HCA query functions
+ *
+ * Authors: Heiko J Schick <schickhj@de.ibm.com>
+ * Christoph Raisch <raisch@de.ibm.com>
+ *
+ * Copyright (c) 2005 IBM Corporation
+ *
+ * All rights reserved.
+ *
+ * This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ * BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "ehca_tools.h"
+#include "ehca_iverbs.h"
+#include "hcp_if.h"
+
+static unsigned int limit_uint(unsigned int value)
+{
+ return min_t(unsigned int, value, INT_MAX);
+}
+
+int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props)
+{
+ int i, ret = 0;
+ struct ehca_shca *shca = container_of(ibdev, struct ehca_shca,
+ ib_device);
+ struct hipz_query_hca *rblock;
+
+ static const u32 cap_mapping[] = {
+ IB_DEVICE_RESIZE_MAX_WR, HCA_CAP_WQE_RESIZE,
+ IB_DEVICE_BAD_PKEY_CNTR, HCA_CAP_BAD_P_KEY_CTR,
+ IB_DEVICE_BAD_QKEY_CNTR, HCA_CAP_Q_KEY_VIOL_CTR,
+ IB_DEVICE_RAW_MULTI, HCA_CAP_RAW_PACKET_MCAST,
+ IB_DEVICE_AUTO_PATH_MIG, HCA_CAP_AUTO_PATH_MIG,
+ IB_DEVICE_CHANGE_PHY_PORT, HCA_CAP_SQD_RTS_PORT_CHANGE,
+ IB_DEVICE_UD_AV_PORT_ENFORCE, HCA_CAP_AH_PORT_NR_CHECK,
+ IB_DEVICE_CURR_QP_STATE_MOD, HCA_CAP_CUR_QP_STATE_MOD,
+ IB_DEVICE_SHUTDOWN_PORT, HCA_CAP_SHUTDOWN_PORT,
+ IB_DEVICE_INIT_TYPE, HCA_CAP_INIT_TYPE,
+ IB_DEVICE_PORT_ACTIVE_EVENT, HCA_CAP_PORT_ACTIVE_EVENT,
+ };
+
+ rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
+ if (!rblock) {
+ ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
+ return -ENOMEM;
+ }
+
+ if (hipz_h_query_hca(shca->ipz_hca_handle, rblock) != H_SUCCESS) {
+ ehca_err(&shca->ib_device, "Can't query device properties");
+ ret = -EINVAL;
+ goto query_device1;
+ }
+
+ memset(props, 0, sizeof(struct ib_device_attr));
+ props->page_size_cap = shca->hca_cap_mr_pgsize;
+ props->fw_ver = rblock->hw_ver;
+ props->max_mr_size = rblock->max_mr_size;
+ props->vendor_id = rblock->vendor_id >> 8;
+ props->vendor_part_id = rblock->vendor_part_id >> 16;
+ props->hw_ver = rblock->hw_ver;
+ props->max_qp = limit_uint(rblock->max_qp);
+ props->max_qp_wr = limit_uint(rblock->max_wqes_wq);
+ props->max_sge = limit_uint(rblock->max_sge);
+ props->max_sge_rd = limit_uint(rblock->max_sge_rd);
+ props->max_cq = limit_uint(rblock->max_cq);
+ props->max_cqe = limit_uint(rblock->max_cqe);
+ props->max_mr = limit_uint(rblock->max_mr);
+ props->max_mw = limit_uint(rblock->max_mw);
+ props->max_pd = limit_uint(rblock->max_pd);
+ props->max_ah = limit_uint(rblock->max_ah);
+ props->max_ee = limit_uint(rblock->max_rd_ee_context);
+ props->max_rdd = limit_uint(rblock->max_rd_domain);
+ props->max_fmr = limit_uint(rblock->max_mr);
+ props->max_qp_rd_atom = limit_uint(rblock->max_rr_qp);
+ props->max_ee_rd_atom = limit_uint(rblock->max_rr_ee_context);
+ props->max_res_rd_atom = limit_uint(rblock->max_rr_hca);
+ props->max_qp_init_rd_atom = limit_uint(rblock->max_act_wqs_qp);
+ props->max_ee_init_rd_atom = limit_uint(rblock->max_act_wqs_ee_context);
+
+ if (EHCA_BMASK_GET(HCA_CAP_SRQ, shca->hca_cap)) {
+ props->max_srq = limit_uint(props->max_qp);
+ props->max_srq_wr = limit_uint(props->max_qp_wr);
+ props->max_srq_sge = 3;
+ }
+
+ props->max_pkeys = 16;
+ /* Some FW versions say 0 here; insert sensible value in that case */
+ props->local_ca_ack_delay = rblock->local_ca_ack_delay ?
+ min_t(u8, rblock->local_ca_ack_delay, 255) : 12;
+ props->max_raw_ipv6_qp = limit_uint(rblock->max_raw_ipv6_qp);
+ props->max_raw_ethy_qp = limit_uint(rblock->max_raw_ethy_qp);
+ props->max_mcast_grp = limit_uint(rblock->max_mcast_grp);
+ props->max_mcast_qp_attach = limit_uint(rblock->max_mcast_qp_attach);
+ props->max_total_mcast_qp_attach
+ = limit_uint(rblock->max_total_mcast_qp_attach);
+
+ /* translate device capabilities */
+ props->device_cap_flags = IB_DEVICE_SYS_IMAGE_GUID |
+ IB_DEVICE_RC_RNR_NAK_GEN | IB_DEVICE_N_NOTIFY_CQ;
+ for (i = 0; i < ARRAY_SIZE(cap_mapping); i += 2)
+ if (rblock->hca_cap_indicators & cap_mapping[i + 1])
+ props->device_cap_flags |= cap_mapping[i];
+
+query_device1:
+ ehca_free_fw_ctrlblock(rblock);
+
+ return ret;
+}
+
+static enum ib_mtu map_mtu(struct ehca_shca *shca, u32 fw_mtu)
+{
+ switch (fw_mtu) {
+ case 0x1:
+ return IB_MTU_256;
+ case 0x2:
+ return IB_MTU_512;
+ case 0x3:
+ return IB_MTU_1024;
+ case 0x4:
+ return IB_MTU_2048;
+ case 0x5:
+ return IB_MTU_4096;
+ default:
+ ehca_err(&shca->ib_device, "Unknown MTU size: %x.",
+ fw_mtu);
+ return 0;
+ }
+}
+
+static u8 map_number_of_vls(struct ehca_shca *shca, u32 vl_cap)
+{
+ switch (vl_cap) {
+ case 0x1:
+ return 1;
+ case 0x2:
+ return 2;
+ case 0x3:
+ return 4;
+ case 0x4:
+ return 8;
+ case 0x5:
+ return 15;
+ default:
+ ehca_err(&shca->ib_device, "invalid Vl Capability: %x.",
+ vl_cap);
+ return 0;
+ }
+}
+
+int ehca_query_port(struct ib_device *ibdev,
+ u8 port, struct ib_port_attr *props)
+{
+ int ret = 0;
+ u64 h_ret;
+ struct ehca_shca *shca = container_of(ibdev, struct ehca_shca,
+ ib_device);
+ struct hipz_query_port *rblock;
+
+ rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
+ if (!rblock) {
+ ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
+ return -ENOMEM;
+ }
+
+ h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock);
+ if (h_ret != H_SUCCESS) {
+ ehca_err(&shca->ib_device, "Can't query port properties");
+ ret = -EINVAL;
+ goto query_port1;
+ }
+
+ memset(props, 0, sizeof(struct ib_port_attr));
+
+ props->active_mtu = props->max_mtu = map_mtu(shca, rblock->max_mtu);
+ props->port_cap_flags = rblock->capability_mask;
+ props->gid_tbl_len = rblock->gid_tbl_len;
+ if (rblock->max_msg_sz)
+ props->max_msg_sz = rblock->max_msg_sz;
+ else
+ props->max_msg_sz = 0x1 << 31;
+ props->bad_pkey_cntr = rblock->bad_pkey_cntr;
+ props->qkey_viol_cntr = rblock->qkey_viol_cntr;
+ props->pkey_tbl_len = rblock->pkey_tbl_len;
+ props->lid = rblock->lid;
+ props->sm_lid = rblock->sm_lid;
+ props->lmc = rblock->lmc;
+ props->sm_sl = rblock->sm_sl;
+ props->subnet_timeout = rblock->subnet_timeout;
+ props->init_type_reply = rblock->init_type_reply;
+ props->max_vl_num = map_number_of_vls(shca, rblock->vl_cap);
+
+ if (rblock->state && rblock->phys_width) {
+ props->phys_state = rblock->phys_pstate;
+ props->state = rblock->phys_state;
+ props->active_width = rblock->phys_width;
+ props->active_speed = rblock->phys_speed;
+ } else {
+ /* old firmware releases don't report physical
+ * port info, so use default values
+ */
+ props->phys_state = 5;
+ props->state = rblock->state;
+ props->active_width = IB_WIDTH_12X;
+ props->active_speed = 0x1;
+ }
+
+query_port1:
+ ehca_free_fw_ctrlblock(rblock);
+
+ return ret;
+}
+
+int ehca_query_sma_attr(struct ehca_shca *shca,
+ u8 port, struct ehca_sma_attr *attr)
+{
+ int ret = 0;
+ u64 h_ret;
+ struct hipz_query_port *rblock;
+
+ rblock = ehca_alloc_fw_ctrlblock(GFP_ATOMIC);
+ if (!rblock) {
+ ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
+ return -ENOMEM;
+ }
+
+ h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock);
+ if (h_ret != H_SUCCESS) {
+ ehca_err(&shca->ib_device, "Can't query port properties");
+ ret = -EINVAL;
+ goto query_sma_attr1;
+ }
+
+ memset(attr, 0, sizeof(struct ehca_sma_attr));
+
+ attr->lid = rblock->lid;
+ attr->lmc = rblock->lmc;
+ attr->sm_sl = rblock->sm_sl;
+ attr->sm_lid = rblock->sm_lid;
+
+ attr->pkey_tbl_len = rblock->pkey_tbl_len;
+ memcpy(attr->pkeys, rblock->pkey_entries, sizeof(attr->pkeys));
+
+query_sma_attr1:
+ ehca_free_fw_ctrlblock(rblock);
+
+ return ret;
+}
+
+int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
+{
+ int ret = 0;
+ u64 h_ret;
+ struct ehca_shca *shca;
+ struct hipz_query_port *rblock;
+
+ shca = container_of(ibdev, struct ehca_shca, ib_device);
+ if (index > 16) {
+ ehca_err(&shca->ib_device, "Invalid index: %x.", index);
+ return -EINVAL;
+ }
+
+ rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
+ if (!rblock) {
+ ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
+ return -ENOMEM;
+ }
+
+ h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock);
+ if (h_ret != H_SUCCESS) {
+ ehca_err(&shca->ib_device, "Can't query port properties");
+ ret = -EINVAL;
+ goto query_pkey1;
+ }
+
+ memcpy(pkey, &rblock->pkey_entries + index, sizeof(u16));
+
+query_pkey1:
+ ehca_free_fw_ctrlblock(rblock);
+
+ return ret;
+}
+
+int ehca_query_gid(struct ib_device *ibdev, u8 port,
+ int index, union ib_gid *gid)
+{
+ int ret = 0;
+ u64 h_ret;
+ struct ehca_shca *shca = container_of(ibdev, struct ehca_shca,
+ ib_device);
+ struct hipz_query_port *rblock;
+
+ if (index > 255) {
+ ehca_err(&shca->ib_device, "Invalid index: %x.", index);
+ return -EINVAL;
+ }
+
+ rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
+ if (!rblock) {
+ ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
+ return -ENOMEM;
+ }
+
+ h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock);
+ if (h_ret != H_SUCCESS) {
+ ehca_err(&shca->ib_device, "Can't query port properties");
+ ret = -EINVAL;
+ goto query_gid1;
+ }
+
+ memcpy(&gid->raw[0], &rblock->gid_prefix, sizeof(u64));
+ memcpy(&gid->raw[8], &rblock->guid_entries[index], sizeof(u64));
+
+query_gid1:
+ ehca_free_fw_ctrlblock(rblock);
+
+ return ret;
+}
+
+static const u32 allowed_port_caps = (
+ IB_PORT_SM | IB_PORT_LED_INFO_SUP | IB_PORT_CM_SUP |
+ IB_PORT_SNMP_TUNNEL_SUP | IB_PORT_DEVICE_MGMT_SUP |
+ IB_PORT_VENDOR_CLASS_SUP);
+
+int ehca_modify_port(struct ib_device *ibdev,
+ u8 port, int port_modify_mask,
+ struct ib_port_modify *props)
+{
+ int ret = 0;
+ struct ehca_shca *shca;
+ struct hipz_query_port *rblock;
+ u32 cap;
+ u64 hret;
+
+ shca = container_of(ibdev, struct ehca_shca, ib_device);
+ if ((props->set_port_cap_mask | props->clr_port_cap_mask)
+ & ~allowed_port_caps) {
+ ehca_err(&shca->ib_device, "Non-changeable bits set in masks "
+ "set=%x clr=%x allowed=%x", props->set_port_cap_mask,
+ props->clr_port_cap_mask, allowed_port_caps);
+ return -EINVAL;
+ }
+
+ if (mutex_lock_interruptible(&shca->modify_mutex))
+ return -ERESTARTSYS;
+
+ rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
+ if (!rblock) {
+ ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
+ ret = -ENOMEM;
+ goto modify_port1;
+ }
+
+ hret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock);
+ if (hret != H_SUCCESS) {
+ ehca_err(&shca->ib_device, "Can't query port properties");
+ ret = -EINVAL;
+ goto modify_port2;
+ }
+
+ cap = (rblock->capability_mask | props->set_port_cap_mask)
+ & ~props->clr_port_cap_mask;
+
+ hret = hipz_h_modify_port(shca->ipz_hca_handle, port,
+ cap, props->init_type, port_modify_mask);
+ if (hret != H_SUCCESS) {
+ ehca_err(&shca->ib_device, "Modify port failed h_ret=%li",
+ hret);
+ ret = -EINVAL;
+ }
+
+modify_port2:
+ ehca_free_fw_ctrlblock(rblock);
+
+modify_port1:
+ mutex_unlock(&shca->modify_mutex);
+
+ return ret;
+}
diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c
new file mode 100644
index 0000000..757035e
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_irq.c
@@ -0,0 +1,943 @@
+/*
+ * IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ * Functions for EQs, NEQs and interrupts
+ *
+ * Authors: Heiko J Schick <schickhj@de.ibm.com>
+ * Khadija Souissi <souissi@de.ibm.com>
+ * Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ * Joachim Fenkes <fenkes@de.ibm.com>
+ *
+ * Copyright (c) 2005 IBM Corporation
+ *
+ * All rights reserved.
+ *
+ * This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ * BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "ehca_classes.h"
+#include "ehca_irq.h"
+#include "ehca_iverbs.h"
+#include "ehca_tools.h"
+#include "hcp_if.h"
+#include "hipz_fns.h"
+#include "ipz_pt_fn.h"
+
+#define EQE_COMPLETION_EVENT EHCA_BMASK_IBM( 1, 1)
+#define EQE_CQ_QP_NUMBER EHCA_BMASK_IBM( 8, 31)
+#define EQE_EE_IDENTIFIER EHCA_BMASK_IBM( 2, 7)
+#define EQE_CQ_NUMBER EHCA_BMASK_IBM( 8, 31)
+#define EQE_QP_NUMBER EHCA_BMASK_IBM( 8, 31)
+#define EQE_QP_TOKEN EHCA_BMASK_IBM(32, 63)
+#define EQE_CQ_TOKEN EHCA_BMASK_IBM(32, 63)
+
+#define NEQE_COMPLETION_EVENT EHCA_BMASK_IBM( 1, 1)
+#define NEQE_EVENT_CODE EHCA_BMASK_IBM( 2, 7)
+#define NEQE_PORT_NUMBER EHCA_BMASK_IBM( 8, 15)
+#define NEQE_PORT_AVAILABILITY EHCA_BMASK_IBM(16, 16)
+#define NEQE_DISRUPTIVE EHCA_BMASK_IBM(16, 16)
+#define NEQE_SPECIFIC_EVENT EHCA_BMASK_IBM(16, 23)
+
+#define ERROR_DATA_LENGTH EHCA_BMASK_IBM(52, 63)
+#define ERROR_DATA_TYPE EHCA_BMASK_IBM( 0, 7)
+
+static void queue_comp_task(struct ehca_cq *__cq);
+
+static struct ehca_comp_pool *pool;
+
+static inline void comp_event_callback(struct ehca_cq *cq)
+{
+ if (!cq->ib_cq.comp_handler)
+ return;
+
+ spin_lock(&cq->cb_lock);
+ cq->ib_cq.comp_handler(&cq->ib_cq, cq->ib_cq.cq_context);
+ spin_unlock(&cq->cb_lock);
+
+ return;
+}
+
+static void print_error_data(struct ehca_shca *shca, void *data,
+ u64 *rblock, int length)
+{
+ u64 type = EHCA_BMASK_GET(ERROR_DATA_TYPE, rblock[2]);
+ u64 resource = rblock[1];
+
+ switch (type) {
+ case 0x1: /* Queue Pair */
+ {
+ struct ehca_qp *qp = (struct ehca_qp *)data;
+
+ /* only print error data if AER is set */
+ if (rblock[6] == 0)
+ return;
+
+ ehca_err(&shca->ib_device,
+ "QP 0x%x (resource=%lx) has errors.",
+ qp->ib_qp.qp_num, resource);
+ break;
+ }
+ case 0x4: /* Completion Queue */
+ {
+ struct ehca_cq *cq = (struct ehca_cq *)data;
+
+ ehca_err(&shca->ib_device,
+ "CQ 0x%x (resource=%lx) has errors.",
+ cq->cq_number, resource);
+ break;
+ }
+ default:
+ ehca_err(&shca->ib_device,
+ "Unknown error type: %lx on %s.",
+ type, shca->ib_device.name);
+ break;
+ }
+
+ ehca_err(&shca->ib_device, "Error data is available: %lx.", resource);
+ ehca_err(&shca->ib_device, "EHCA ----- error data begin "
+ "---------------------------------------------------");
+ ehca_dmp(rblock, length, "resource=%lx", resource);
+ ehca_err(&shca->ib_device, "EHCA ----- error data end "
+ "----------------------------------------------------");
+
+ return;
+}
+
+int ehca_error_data(struct ehca_shca *shca, void *data,
+ u64 resource)
+{
+
+ unsigned long ret;
+ u64 *rblock;
+ unsigned long block_count;
+
+ rblock = ehca_alloc_fw_ctrlblock(GFP_ATOMIC);
+ if (!rblock) {
+ ehca_err(&shca->ib_device, "Cannot allocate rblock memory.");
+ ret = -ENOMEM;
+ goto error_data1;
+ }
+
+ /* rblock must be 4K aligned and should be 4K large */
+ ret = hipz_h_error_data(shca->ipz_hca_handle,
+ resource,
+ rblock,
+ &block_count);
+
+ if (ret == H_R_STATE)
+ ehca_err(&shca->ib_device,
+ "No error data is available: %lx.", resource);
+ else if (ret == H_SUCCESS) {
+ int length;
+
+ length = EHCA_BMASK_GET(ERROR_DATA_LENGTH, rblock[0]);
+
+ if (length > EHCA_PAGESIZE)
+ length = EHCA_PAGESIZE;
+
+ print_error_data(shca, data, rblock, length);
+ } else
+ ehca_err(&shca->ib_device,
+ "Error data could not be fetched: %lx", resource);
+
+ ehca_free_fw_ctrlblock(rblock);
+
+error_data1:
+ return ret;
+
+}
+
+static void dispatch_qp_event(struct ehca_shca *shca, struct ehca_qp *qp,
+ enum ib_event_type event_type)
+{
+ struct ib_event event;
+
+ /* PATH_MIG without the QP ever having been armed is false alarm */
+ if (event_type == IB_EVENT_PATH_MIG && !qp->mig_armed)
+ return;
+
+ event.device = &shca->ib_device;
+ event.event = event_type;
+
+ if (qp->ext_type == EQPT_SRQ) {
+ if (!qp->ib_srq.event_handler)
+ return;
+
+ event.element.srq = &qp->ib_srq;
+ qp->ib_srq.event_handler(&event, qp->ib_srq.srq_context);
+ } else {
+ if (!qp->ib_qp.event_handler)
+ return;
+
+ event.element.qp = &qp->ib_qp;
+ qp->ib_qp.event_handler(&event, qp->ib_qp.qp_context);
+ }
+}
+
+static void qp_event_callback(struct ehca_shca *shca, u64 eqe,
+ enum ib_event_type event_type, int fatal)
+{
+ struct ehca_qp *qp;
+ u32 token = EHCA_BMASK_GET(EQE_QP_TOKEN, eqe);
+
+ read_lock(&ehca_qp_idr_lock);
+ qp = idr_find(&ehca_qp_idr, token);
+ if (qp)
+ atomic_inc(&qp->nr_events);
+ read_unlock(&ehca_qp_idr_lock);
+
+ if (!qp)
+ return;
+
+ if (fatal)
+ ehca_error_data(shca, qp, qp->ipz_qp_handle.handle);
+
+ dispatch_qp_event(shca, qp, fatal && qp->ext_type == EQPT_SRQ ?
+ IB_EVENT_SRQ_ERR : event_type);
+
+ /*
+ * eHCA only processes one WQE at a time for SRQ base QPs,
+ * so the last WQE has been processed as soon as the QP enters
+ * error state.
+ */
+ if (fatal && qp->ext_type == EQPT_SRQBASE)
+ dispatch_qp_event(shca, qp, IB_EVENT_QP_LAST_WQE_REACHED);
+
+ if (atomic_dec_and_test(&qp->nr_events))
+ wake_up(&qp->wait_completion);
+ return;
+}
+
+static void cq_event_callback(struct ehca_shca *shca,
+ u64 eqe)
+{
+ struct ehca_cq *cq;
+ u32 token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe);
+
+ read_lock(&ehca_cq_idr_lock);
+ cq = idr_find(&ehca_cq_idr, token);
+ if (cq)
+ atomic_inc(&cq->nr_events);
+ read_unlock(&ehca_cq_idr_lock);
+
+ if (!cq)
+ return;
+
+ ehca_error_data(shca, cq, cq->ipz_cq_handle.handle);
+
+ if (atomic_dec_and_test(&cq->nr_events))
+ wake_up(&cq->wait_completion);
+
+ return;
+}
+
+static void parse_identifier(struct ehca_shca *shca, u64 eqe)
+{
+ u8 identifier = EHCA_BMASK_GET(EQE_EE_IDENTIFIER, eqe);
+
+ switch (identifier) {
+ case 0x02: /* path migrated */
+ qp_event_callback(shca, eqe, IB_EVENT_PATH_MIG, 0);
+ break;
+ case 0x03: /* communication established */
+ qp_event_callback(shca, eqe, IB_EVENT_COMM_EST, 0);
+ break;
+ case 0x04: /* send queue drained */
+ qp_event_callback(shca, eqe, IB_EVENT_SQ_DRAINED, 0);
+ break;
+ case 0x05: /* QP error */
+ case 0x06: /* QP error */
+ qp_event_callback(shca, eqe, IB_EVENT_QP_FATAL, 1);
+ break;
+ case 0x07: /* CQ error */
+ case 0x08: /* CQ error */
+ cq_event_callback(shca, eqe);
+ break;
+ case 0x09: /* MRMWPTE error */
+ ehca_err(&shca->ib_device, "MRMWPTE error.");
+ break;
+ case 0x0A: /* port event */
+ ehca_err(&shca->ib_device, "Port event.");
+ break;
+ case 0x0B: /* MR access error */
+ ehca_err(&shca->ib_device, "MR access error.");
+ break;
+ case 0x0C: /* EQ error */
+ ehca_err(&shca->ib_device, "EQ error.");
+ break;
+ case 0x0D: /* P/Q_Key mismatch */
+ ehca_err(&shca->ib_device, "P/Q_Key mismatch.");
+ break;
+ case 0x10: /* sampling complete */
+ ehca_err(&shca->ib_device, "Sampling complete.");
+ break;
+ case 0x11: /* unaffiliated access error */
+ ehca_err(&shca->ib_device, "Unaffiliated access error.");
+ break;
+ case 0x12: /* path migrating */
+ ehca_err(&shca->ib_device, "Path migrating.");
+ break;
+ case 0x13: /* interface trace stopped */
+ ehca_err(&shca->ib_device, "Interface trace stopped.");
+ break;
+ case 0x14: /* first error capture info available */
+ ehca_info(&shca->ib_device, "First error capture available");
+ break;
+ case 0x15: /* SRQ limit reached */
+ qp_event_callback(shca, eqe, IB_EVENT_SRQ_LIMIT_REACHED, 0);
+ break;
+ default:
+ ehca_err(&shca->ib_device, "Unknown identifier: %x on %s.",
+ identifier, shca->ib_device.name);
+ break;
+ }
+
+ return;
+}
+
+static void dispatch_port_event(struct ehca_shca *shca, int port_num,
+ enum ib_event_type type, const char *msg)
+{
+ struct ib_event event;
+
+ ehca_info(&shca->ib_device, "port %d %s.", port_num, msg);
+ event.device = &shca->ib_device;
+ event.event = type;
+ event.element.port_num = port_num;
+ ib_dispatch_event(&event);
+}
+
+static void notify_port_conf_change(struct ehca_shca *shca, int port_num)
+{
+ struct ehca_sma_attr new_attr;
+ struct ehca_sma_attr *old_attr = &shca->sport[port_num - 1].saved_attr;
+
+ ehca_query_sma_attr(shca, port_num, &new_attr);
+
+ if (new_attr.sm_sl != old_attr->sm_sl ||
+ new_attr.sm_lid != old_attr->sm_lid)
+ dispatch_port_event(shca, port_num, IB_EVENT_SM_CHANGE,
+ "SM changed");
+
+ if (new_attr.lid != old_attr->lid ||
+ new_attr.lmc != old_attr->lmc)
+ dispatch_port_event(shca, port_num, IB_EVENT_LID_CHANGE,
+ "LID changed");
+
+ if (new_attr.pkey_tbl_len != old_attr->pkey_tbl_len ||
+ memcmp(new_attr.pkeys, old_attr->pkeys,
+ sizeof(u16) * new_attr.pkey_tbl_len))
+ dispatch_port_event(shca, port_num, IB_EVENT_PKEY_CHANGE,
+ "P_Key changed");
+
+ *old_attr = new_attr;
+}
+
+/* replay modify_qp for sqps -- return 0 if all is well, 1 if AQP1 destroyed */
+static int replay_modify_qp(struct ehca_sport *sport)
+{
+ int aqp1_destroyed;
+ unsigned long flags;
+
+ spin_lock_irqsave(&sport->mod_sqp_lock, flags);
+
+ aqp1_destroyed = !sport->ibqp_sqp[IB_QPT_GSI];
+
+ if (sport->ibqp_sqp[IB_QPT_SMI])
+ ehca_recover_sqp(sport->ibqp_sqp[IB_QPT_SMI]);
+ if (!aqp1_destroyed)
+ ehca_recover_sqp(sport->ibqp_sqp[IB_QPT_GSI]);
+
+ spin_unlock_irqrestore(&sport->mod_sqp_lock, flags);
+
+ return aqp1_destroyed;
+}
+
+static void parse_ec(struct ehca_shca *shca, u64 eqe)
+{
+ u8 ec = EHCA_BMASK_GET(NEQE_EVENT_CODE, eqe);
+ u8 port = EHCA_BMASK_GET(NEQE_PORT_NUMBER, eqe);
+ u8 spec_event;
+ struct ehca_sport *sport = &shca->sport[port - 1];
+
+ switch (ec) {
+ case 0x30: /* port availability change */
+ if (EHCA_BMASK_GET(NEQE_PORT_AVAILABILITY, eqe)) {
+ /* only replay modify_qp calls in autodetect mode;
+ * if AQP1 was destroyed, the port is already down
+ * again and we can drop the event.
+ */
+ if (ehca_nr_ports < 0)
+ if (replay_modify_qp(sport))
+ break;
+
+ sport->port_state = IB_PORT_ACTIVE;
+ dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE,
+ "is active");
+ ehca_query_sma_attr(shca, port, &sport->saved_attr);
+ } else {
+ sport->port_state = IB_PORT_DOWN;
+ dispatch_port_event(shca, port, IB_EVENT_PORT_ERR,
+ "is inactive");
+ }
+ break;
+ case 0x31:
+ /* port configuration change
+ * disruptive change is caused by
+ * LID, PKEY or SM change
+ */
+ if (EHCA_BMASK_GET(NEQE_DISRUPTIVE, eqe)) {
+ ehca_warn(&shca->ib_device, "disruptive port "
+ "%d configuration change", port);
+
+ sport->port_state = IB_PORT_DOWN;
+ dispatch_port_event(shca, port, IB_EVENT_PORT_ERR,
+ "is inactive");
+
+ sport->port_state = IB_PORT_ACTIVE;
+ dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE,
+ "is active");
+ ehca_query_sma_attr(shca, port,
+ &sport->saved_attr);
+ } else
+ notify_port_conf_change(shca, port);
+ break;
+ case 0x32: /* adapter malfunction */
+ ehca_err(&shca->ib_device, "Adapter malfunction.");
+ break;
+ case 0x33: /* trace stopped */
+ ehca_err(&shca->ib_device, "Traced stopped.");
+ break;
+ case 0x34: /* util async event */
+ spec_event = EHCA_BMASK_GET(NEQE_SPECIFIC_EVENT, eqe);
+ if (spec_event == 0x80) /* client reregister required */
+ dispatch_port_event(shca, port,
+ IB_EVENT_CLIENT_REREGISTER,
+ "client reregister req.");
+ else
+ ehca_warn(&shca->ib_device, "Unknown util async "
+ "event %x on port %x", spec_event, port);
+ break;
+ default:
+ ehca_err(&shca->ib_device, "Unknown event code: %x on %s.",
+ ec, shca->ib_device.name);
+ break;
+ }
+
+ return;
+}
+
+static inline void reset_eq_pending(struct ehca_cq *cq)
+{
+ u64 CQx_EP;
+ struct h_galpa gal = cq->galpas.kernel;
+
+ hipz_galpa_store_cq(gal, cqx_ep, 0x0);
+ CQx_EP = hipz_galpa_load(gal, CQTEMM_OFFSET(cqx_ep));
+
+ return;
+}
+
+irqreturn_t ehca_interrupt_neq(int irq, void *dev_id)
+{
+ struct ehca_shca *shca = (struct ehca_shca*)dev_id;
+
+ tasklet_hi_schedule(&shca->neq.interrupt_task);
+
+ return IRQ_HANDLED;
+}
+
+void ehca_tasklet_neq(unsigned long data)
+{
+ struct ehca_shca *shca = (struct ehca_shca*)data;
+ struct ehca_eqe *eqe;
+ u64 ret;
+
+ eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->neq);
+
+ while (eqe) {
+ if (!EHCA_BMASK_GET(NEQE_COMPLETION_EVENT, eqe->entry))
+ parse_ec(shca, eqe->entry);
+
+ eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->neq);
+ }
+
+ ret = hipz_h_reset_event(shca->ipz_hca_handle,
+ shca->neq.ipz_eq_handle, 0xFFFFFFFFFFFFFFFFL);
+
+ if (ret != H_SUCCESS)
+ ehca_err(&shca->ib_device, "Can't clear notification events.");
+
+ return;
+}
+
+irqreturn_t ehca_interrupt_eq(int irq, void *dev_id)
+{
+ struct ehca_shca *shca = (struct ehca_shca*)dev_id;
+
+ tasklet_hi_schedule(&shca->eq.interrupt_task);
+
+ return IRQ_HANDLED;
+}
+
+
+static inline void process_eqe(struct ehca_shca *shca, struct ehca_eqe *eqe)
+{
+ u64 eqe_value;
+ u32 token;
+ struct ehca_cq *cq;
+
+ eqe_value = eqe->entry;
+ ehca_dbg(&shca->ib_device, "eqe_value=%lx", eqe_value);
+ if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) {
+ ehca_dbg(&shca->ib_device, "Got completion event");
+ token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value);
+ read_lock(&ehca_cq_idr_lock);
+ cq = idr_find(&ehca_cq_idr, token);
+ if (cq)
+ atomic_inc(&cq->nr_events);
+ read_unlock(&ehca_cq_idr_lock);
+ if (cq == NULL) {
+ ehca_err(&shca->ib_device,
+ "Invalid eqe for non-existing cq token=%x",
+ token);
+ return;
+ }
+ reset_eq_pending(cq);
+ if (ehca_scaling_code)
+ queue_comp_task(cq);
+ else {
+ comp_event_callback(cq);
+ if (atomic_dec_and_test(&cq->nr_events))
+ wake_up(&cq->wait_completion);
+ }
+ } else {
+ ehca_dbg(&shca->ib_device, "Got non completion event");
+ parse_identifier(shca, eqe_value);
+ }
+}
+
+void ehca_process_eq(struct ehca_shca *shca, int is_irq)
+{
+ struct ehca_eq *eq = &shca->eq;
+ struct ehca_eqe_cache_entry *eqe_cache = eq->eqe_cache;
+ u64 eqe_value, ret;
+ unsigned long flags;
+ int eqe_cnt, i;
+ int eq_empty = 0;
+
+ spin_lock_irqsave(&eq->irq_spinlock, flags);
+ if (is_irq) {
+ const int max_query_cnt = 100;
+ int query_cnt = 0;
+ int int_state = 1;
+ do {
+ int_state = hipz_h_query_int_state(
+ shca->ipz_hca_handle, eq->ist);
+ query_cnt++;
+ iosync();
+ } while (int_state && query_cnt < max_query_cnt);
+ if (unlikely((query_cnt == max_query_cnt)))
+ ehca_dbg(&shca->ib_device, "int_state=%x query_cnt=%x",
+ int_state, query_cnt);
+ }
+
+ /* read out all eqes */
+ eqe_cnt = 0;
+ do {
+ u32 token;
+ eqe_cache[eqe_cnt].eqe =
+ (struct ehca_eqe *)ehca_poll_eq(shca, eq);
+ if (!eqe_cache[eqe_cnt].eqe)
+ break;
+ eqe_value = eqe_cache[eqe_cnt].eqe->entry;
+ if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) {
+ token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value);
+ read_lock(&ehca_cq_idr_lock);
+ eqe_cache[eqe_cnt].cq = idr_find(&ehca_cq_idr, token);
+ if (eqe_cache[eqe_cnt].cq)
+ atomic_inc(&eqe_cache[eqe_cnt].cq->nr_events);
+ read_unlock(&ehca_cq_idr_lock);
+ if (!eqe_cache[eqe_cnt].cq) {
+ ehca_err(&shca->ib_device,
+ "Invalid eqe for non-existing cq "
+ "token=%x", token);
+ continue;
+ }
+ } else
+ eqe_cache[eqe_cnt].cq = NULL;
+ eqe_cnt++;
+ } while (eqe_cnt < EHCA_EQE_CACHE_SIZE);
+ if (!eqe_cnt) {
+ if (is_irq)
+ ehca_dbg(&shca->ib_device,
+ "No eqe found for irq event");
+ goto unlock_irq_spinlock;
+ } else if (!is_irq) {
+ ret = hipz_h_eoi(eq->ist);
+ if (ret != H_SUCCESS)
+ ehca_err(&shca->ib_device,
+ "bad return code EOI -rc = %ld\n", ret);
+ ehca_dbg(&shca->ib_device, "deadman found %x eqe", eqe_cnt);
+ }
+ if (unlikely(eqe_cnt == EHCA_EQE_CACHE_SIZE))
+ ehca_dbg(&shca->ib_device, "too many eqes for one irq event");
+ /* enable irq for new packets */
+ for (i = 0; i < eqe_cnt; i++) {
+ if (eq->eqe_cache[i].cq)
+ reset_eq_pending(eq->eqe_cache[i].cq);
+ }
+ /* check eq */
+ spin_lock(&eq->spinlock);
+ eq_empty = (!ipz_eqit_eq_peek_valid(&shca->eq.ipz_queue));
+ spin_unlock(&eq->spinlock);
+ /* call completion handler for cached eqes */
+ for (i = 0; i < eqe_cnt; i++)
+ if (eq->eqe_cache[i].cq) {
+ if (ehca_scaling_code)
+ queue_comp_task(eq->eqe_cache[i].cq);
+ else {
+ struct ehca_cq *cq = eq->eqe_cache[i].cq;
+ comp_event_callback(cq);
+ if (atomic_dec_and_test(&cq->nr_events))
+ wake_up(&cq->wait_completion);
+ }
+ } else {
+ ehca_dbg(&shca->ib_device, "Got non completion event");
+ parse_identifier(shca, eq->eqe_cache[i].eqe->entry);
+ }
+ /* poll eq if not empty */
+ if (eq_empty)
+ goto unlock_irq_spinlock;
+ do {
+ struct ehca_eqe *eqe;
+ eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->eq);
+ if (!eqe)
+ break;
+ process_eqe(shca, eqe);
+ } while (1);
+
+unlock_irq_spinlock:
+ spin_unlock_irqrestore(&eq->irq_spinlock, flags);
+}
+
+void ehca_tasklet_eq(unsigned long data)
+{
+ ehca_process_eq((struct ehca_shca*)data, 1);
+}
+
+static inline int find_next_online_cpu(struct ehca_comp_pool *pool)
+{
+ int cpu;
+ unsigned long flags;
+
+ WARN_ON_ONCE(!in_interrupt());
+ if (ehca_debug_level >= 3)
+ ehca_dmp(&cpu_online_map, sizeof(cpumask_t), "");
+
+ spin_lock_irqsave(&pool->last_cpu_lock, flags);
+ cpu = next_cpu_nr(pool->last_cpu, cpu_online_map);
+ if (cpu >= nr_cpu_ids)
+ cpu = first_cpu(cpu_online_map);
+ pool->last_cpu = cpu;
+ spin_unlock_irqrestore(&pool->last_cpu_lock, flags);
+
+ return cpu;
+}
+
+static void __queue_comp_task(struct ehca_cq *__cq,
+ struct ehca_cpu_comp_task *cct)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&cct->task_lock, flags);
+ spin_lock(&__cq->task_lock);
+
+ if (__cq->nr_callbacks == 0) {
+ __cq->nr_callbacks++;
+ list_add_tail(&__cq->entry, &cct->cq_list);
+ cct->cq_jobs++;
+ wake_up(&cct->wait_queue);
+ } else
+ __cq->nr_callbacks++;
+
+ spin_unlock(&__cq->task_lock);
+ spin_unlock_irqrestore(&cct->task_lock, flags);
+}
+
+static void queue_comp_task(struct ehca_cq *__cq)
+{
+ int cpu_id;
+ struct ehca_cpu_comp_task *cct;
+ int cq_jobs;
+ unsigned long flags;
+
+ cpu_id = find_next_online_cpu(pool);
+ BUG_ON(!cpu_online(cpu_id));
+
+ cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id);
+ BUG_ON(!cct);
+
+ spin_lock_irqsave(&cct->task_lock, flags);
+ cq_jobs = cct->cq_jobs;
+ spin_unlock_irqrestore(&cct->task_lock, flags);
+ if (cq_jobs > 0) {
+ cpu_id = find_next_online_cpu(pool);
+ cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id);
+ BUG_ON(!cct);
+ }
+
+ __queue_comp_task(__cq, cct);
+}
+
+static void run_comp_task(struct ehca_cpu_comp_task *cct)
+{
+ struct ehca_cq *cq;
+ unsigned long flags;
+
+ spin_lock_irqsave(&cct->task_lock, flags);
+
+ while (!list_empty(&cct->cq_list)) {
+ cq = list_entry(cct->cq_list.next, struct ehca_cq, entry);
+ spin_unlock_irqrestore(&cct->task_lock, flags);
+
+ comp_event_callback(cq);
+ if (atomic_dec_and_test(&cq->nr_events))
+ wake_up(&cq->wait_completion);
+
+ spin_lock_irqsave(&cct->task_lock, flags);
+ spin_lock(&cq->task_lock);
+ cq->nr_callbacks--;
+ if (!cq->nr_callbacks) {
+ list_del_init(cct->cq_list.next);
+ cct->cq_jobs--;
+ }
+ spin_unlock(&cq->task_lock);
+ }
+
+ spin_unlock_irqrestore(&cct->task_lock, flags);
+}
+
+static int comp_task(void *__cct)
+{
+ struct ehca_cpu_comp_task *cct = __cct;
+ int cql_empty;
+ DECLARE_WAITQUEUE(wait, current);
+
+ set_current_state(TASK_INTERRUPTIBLE);
+ while (!kthread_should_stop()) {
+ add_wait_queue(&cct->wait_queue, &wait);
+
+ spin_lock_irq(&cct->task_lock);
+ cql_empty = list_empty(&cct->cq_list);
+ spin_unlock_irq(&cct->task_lock);
+ if (cql_empty)
+ schedule();
+ else
+ __set_current_state(TASK_RUNNING);
+
+ remove_wait_queue(&cct->wait_queue, &wait);
+
+ spin_lock_irq(&cct->task_lock);
+ cql_empty = list_empty(&cct->cq_list);
+ spin_unlock_irq(&cct->task_lock);
+ if (!cql_empty)
+ run_comp_task(__cct);
+
+ set_current_state(TASK_INTERRUPTIBLE);
+ }
+ __set_current_state(TASK_RUNNING);
+
+ return 0;
+}
+
+static struct task_struct *create_comp_task(struct ehca_comp_pool *pool,
+ int cpu)
+{
+ struct ehca_cpu_comp_task *cct;
+
+ cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
+ spin_lock_init(&cct->task_lock);
+ INIT_LIST_HEAD(&cct->cq_list);
+ init_waitqueue_head(&cct->wait_queue);
+ cct->task = kthread_create(comp_task, cct, "ehca_comp/%d", cpu);
+
+ return cct->task;
+}
+
+static void destroy_comp_task(struct ehca_comp_pool *pool,
+ int cpu)
+{
+ struct ehca_cpu_comp_task *cct;
+ struct task_struct *task;
+ unsigned long flags_cct;
+
+ cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
+
+ spin_lock_irqsave(&cct->task_lock, flags_cct);
+
+ task = cct->task;
+ cct->task = NULL;
+ cct->cq_jobs = 0;
+
+ spin_unlock_irqrestore(&cct->task_lock, flags_cct);
+
+ if (task)
+ kthread_stop(task);
+}
+
+static void __cpuinit take_over_work(struct ehca_comp_pool *pool, int cpu)
+{
+ struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
+ LIST_HEAD(list);
+ struct ehca_cq *cq;
+ unsigned long flags_cct;
+
+ spin_lock_irqsave(&cct->task_lock, flags_cct);
+
+ list_splice_init(&cct->cq_list, &list);
+
+ while (!list_empty(&list)) {
+ cq = list_entry(cct->cq_list.next, struct ehca_cq, entry);
+
+ list_del(&cq->entry);
+ __queue_comp_task(cq, per_cpu_ptr(pool->cpu_comp_tasks,
+ smp_processor_id()));
+ }
+
+ spin_unlock_irqrestore(&cct->task_lock, flags_cct);
+
+}
+
+static int __cpuinit comp_pool_callback(struct notifier_block *nfb,
+ unsigned long action,
+ void *hcpu)
+{
+ unsigned int cpu = (unsigned long)hcpu;
+ struct ehca_cpu_comp_task *cct;
+
+ switch (action) {
+ case CPU_UP_PREPARE:
+ case CPU_UP_PREPARE_FROZEN:
+ ehca_gen_dbg("CPU: %x (CPU_PREPARE)", cpu);
+ if (!create_comp_task(pool, cpu)) {
+ ehca_gen_err("Can't create comp_task for cpu: %x", cpu);
+ return NOTIFY_BAD;
+ }
+ break;
+ case CPU_UP_CANCELED:
+ case CPU_UP_CANCELED_FROZEN:
+ ehca_gen_dbg("CPU: %x (CPU_CANCELED)", cpu);
+ cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
+ kthread_bind(cct->task, any_online_cpu(cpu_online_map));
+ destroy_comp_task(pool, cpu);
+ break;
+ case CPU_ONLINE:
+ case CPU_ONLINE_FROZEN:
+ ehca_gen_dbg("CPU: %x (CPU_ONLINE)", cpu);
+ cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
+ kthread_bind(cct->task, cpu);
+ wake_up_process(cct->task);
+ break;
+ case CPU_DOWN_PREPARE:
+ case CPU_DOWN_PREPARE_FROZEN:
+ ehca_gen_dbg("CPU: %x (CPU_DOWN_PREPARE)", cpu);
+ break;
+ case CPU_DOWN_FAILED:
+ case CPU_DOWN_FAILED_FROZEN:
+ ehca_gen_dbg("CPU: %x (CPU_DOWN_FAILED)", cpu);
+ break;
+ case CPU_DEAD:
+ case CPU_DEAD_FROZEN:
+ ehca_gen_dbg("CPU: %x (CPU_DEAD)", cpu);
+ destroy_comp_task(pool, cpu);
+ take_over_work(pool, cpu);
+ break;
+ }
+
+ return NOTIFY_OK;
+}
+
+static struct notifier_block comp_pool_callback_nb __cpuinitdata = {
+ .notifier_call = comp_pool_callback,
+ .priority = 0,
+};
+
+int ehca_create_comp_pool(void)
+{
+ int cpu;
+ struct task_struct *task;
+
+ if (!ehca_scaling_code)
+ return 0;
+
+ pool = kzalloc(sizeof(struct ehca_comp_pool), GFP_KERNEL);
+ if (pool == NULL)
+ return -ENOMEM;
+
+ spin_lock_init(&pool->last_cpu_lock);
+ pool->last_cpu = any_online_cpu(cpu_online_map);
+
+ pool->cpu_comp_tasks = alloc_percpu(struct ehca_cpu_comp_task);
+ if (pool->cpu_comp_tasks == NULL) {
+ kfree(pool);
+ return -EINVAL;
+ }
+
+ for_each_online_cpu(cpu) {
+ task = create_comp_task(pool, cpu);
+ if (task) {
+ kthread_bind(task, cpu);
+ wake_up_process(task);
+ }
+ }
+
+ register_hotcpu_notifier(&comp_pool_callback_nb);
+
+ printk(KERN_INFO "eHCA scaling code enabled\n");
+
+ return 0;
+}
+
+void ehca_destroy_comp_pool(void)
+{
+ int i;
+
+ if (!ehca_scaling_code)
+ return;
+
+ unregister_hotcpu_notifier(&comp_pool_callback_nb);
+
+ for (i = 0; i < NR_CPUS; i++) {
+ if (cpu_online(i))
+ destroy_comp_task(pool, i);
+ }
+ free_percpu(pool->cpu_comp_tasks);
+ kfree(pool);
+}
diff --git a/drivers/infiniband/hw/ehca/ehca_irq.h b/drivers/infiniband/hw/ehca/ehca_irq.h
new file mode 100644
index 0000000..3346cb0
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_irq.h
@@ -0,0 +1,77 @@
+/*
+ * IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ * Function definitions and structs for EQs, NEQs and interrupts
+ *
+ * Authors: Heiko J Schick <schickhj@de.ibm.com>
+ * Khadija Souissi <souissi@de.ibm.com>
+ *
+ * Copyright (c) 2005 IBM Corporation
+ *
+ * All rights reserved.
+ *
+ * This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ * BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __EHCA_IRQ_H
+#define __EHCA_IRQ_H
+
+
+struct ehca_shca;
+
+#include <linux/interrupt.h>
+#include <linux/types.h>
+
+int ehca_error_data(struct ehca_shca *shca, void *data, u64 resource);
+
+irqreturn_t ehca_interrupt_neq(int irq, void *dev_id);
+void ehca_tasklet_neq(unsigned long data);
+
+irqreturn_t ehca_interrupt_eq(int irq, void *dev_id);
+void ehca_tasklet_eq(unsigned long data);
+void ehca_process_eq(struct ehca_shca *shca, int is_irq);
+
+struct ehca_cpu_comp_task {
+ wait_queue_head_t wait_queue;
+ struct list_head cq_list;
+ struct task_struct *task;
+ spinlock_t task_lock;
+ int cq_jobs;
+};
+
+struct ehca_comp_pool {
+ struct ehca_cpu_comp_task *cpu_comp_tasks;
+ int last_cpu;
+ spinlock_t last_cpu_lock;
+};
+
+int ehca_create_comp_pool(void);
+void ehca_destroy_comp_pool(void);
+
+#endif
diff --git a/drivers/infiniband/hw/ehca/ehca_iverbs.h b/drivers/infiniband/hw/ehca/ehca_iverbs.h
new file mode 100644
index 0000000..8f7f282
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_iverbs.h
@@ -0,0 +1,212 @@
+/*
+ * IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ * Function definitions for internal functions
+ *
+ * Authors: Heiko J Schick <schickhj@de.ibm.com>
+ * Dietmar Decker <ddecker@de.ibm.com>
+ *
+ * Copyright (c) 2005 IBM Corporation
+ *
+ * All rights reserved.
+ *
+ * This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ * BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __EHCA_IVERBS_H__
+#define __EHCA_IVERBS_H__
+
+#include "ehca_classes.h"
+
+int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props);
+
+int ehca_query_port(struct ib_device *ibdev, u8 port,
+ struct ib_port_attr *props);
+
+int ehca_query_sma_attr(struct ehca_shca *shca, u8 port,
+ struct ehca_sma_attr *attr);
+
+int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 * pkey);
+
+int ehca_query_gid(struct ib_device *ibdev, u8 port, int index,
+ union ib_gid *gid);
+
+int ehca_modify_port(struct ib_device *ibdev, u8 port, int port_modify_mask,
+ struct ib_port_modify *props);
+
+struct ib_pd *ehca_alloc_pd(struct ib_device *device,
+ struct ib_ucontext *context,
+ struct ib_udata *udata);
+
+int ehca_dealloc_pd(struct ib_pd *pd);
+
+struct ib_ah *ehca_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr);
+
+int ehca_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr);
+
+int ehca_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr);
+
+int ehca_destroy_ah(struct ib_ah *ah);
+
+struct ib_mr *ehca_get_dma_mr(struct ib_pd *pd, int mr_access_flags);
+
+struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd,
+ struct ib_phys_buf *phys_buf_array,
+ int num_phys_buf,
+ int mr_access_flags, u64 *iova_start);
+
+struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 virt, int mr_access_flags,
+ struct ib_udata *udata);
+
+int ehca_rereg_phys_mr(struct ib_mr *mr,
+ int mr_rereg_mask,
+ struct ib_pd *pd,
+ struct ib_phys_buf *phys_buf_array,
+ int num_phys_buf, int mr_access_flags, u64 *iova_start);
+
+int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr);
+
+int ehca_dereg_mr(struct ib_mr *mr);
+
+struct ib_mw *ehca_alloc_mw(struct ib_pd *pd);
+
+int ehca_bind_mw(struct ib_qp *qp, struct ib_mw *mw,
+ struct ib_mw_bind *mw_bind);
+
+int ehca_dealloc_mw(struct ib_mw *mw);
+
+struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd,
+ int mr_access_flags,
+ struct ib_fmr_attr *fmr_attr);
+
+int ehca_map_phys_fmr(struct ib_fmr *fmr,
+ u64 *page_list, int list_len, u64 iova);
+
+int ehca_unmap_fmr(struct list_head *fmr_list);
+
+int ehca_dealloc_fmr(struct ib_fmr *fmr);
+
+enum ehca_eq_type {
+ EHCA_EQ = 0, /* Event Queue */
+ EHCA_NEQ /* Notification Event Queue */
+};
+
+int ehca_create_eq(struct ehca_shca *shca, struct ehca_eq *eq,
+ enum ehca_eq_type type, const u32 length);
+
+int ehca_destroy_eq(struct ehca_shca *shca, struct ehca_eq *eq);
+
+void *ehca_poll_eq(struct ehca_shca *shca, struct ehca_eq *eq);
+
+
+struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector,
+ struct ib_ucontext *context,
+ struct ib_udata *udata);
+
+int ehca_destroy_cq(struct ib_cq *cq);
+
+int ehca_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata);
+
+int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc);
+
+int ehca_peek_cq(struct ib_cq *cq, int wc_cnt);
+
+int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags notify_flags);
+
+struct ib_qp *ehca_create_qp(struct ib_pd *pd,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata);
+
+int ehca_destroy_qp(struct ib_qp *qp);
+
+int ehca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
+ struct ib_udata *udata);
+
+int ehca_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
+ int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr);
+
+int ehca_post_send(struct ib_qp *qp, struct ib_send_wr *send_wr,
+ struct ib_send_wr **bad_send_wr);
+
+int ehca_post_recv(struct ib_qp *qp, struct ib_recv_wr *recv_wr,
+ struct ib_recv_wr **bad_recv_wr);
+
+int ehca_post_srq_recv(struct ib_srq *srq,
+ struct ib_recv_wr *recv_wr,
+ struct ib_recv_wr **bad_recv_wr);
+
+struct ib_srq *ehca_create_srq(struct ib_pd *pd,
+ struct ib_srq_init_attr *init_attr,
+ struct ib_udata *udata);
+
+int ehca_modify_srq(struct ib_srq *srq, struct ib_srq_attr *attr,
+ enum ib_srq_attr_mask attr_mask, struct ib_udata *udata);
+
+int ehca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
+
+int ehca_destroy_srq(struct ib_srq *srq);
+
+u64 ehca_define_sqp(struct ehca_shca *shca, struct ehca_qp *ibqp,
+ struct ib_qp_init_attr *qp_init_attr);
+
+int ehca_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid);
+
+int ehca_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid);
+
+struct ib_ucontext *ehca_alloc_ucontext(struct ib_device *device,
+ struct ib_udata *udata);
+
+int ehca_dealloc_ucontext(struct ib_ucontext *context);
+
+int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
+
+int ehca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
+ struct ib_wc *in_wc, struct ib_grh *in_grh,
+ struct ib_mad *in_mad,
+ struct ib_mad *out_mad);
+
+void ehca_poll_eqs(unsigned long data);
+
+int ehca_calc_ipd(struct ehca_shca *shca, int port,
+ enum ib_rate path_rate, u32 *ipd);
+
+void ehca_add_to_err_list(struct ehca_qp *qp, int on_sq);
+
+#ifdef CONFIG_PPC_64K_PAGES
+void *ehca_alloc_fw_ctrlblock(gfp_t flags);
+void ehca_free_fw_ctrlblock(void *ptr);
+#else
+#define ehca_alloc_fw_ctrlblock(flags) ((void *)get_zeroed_page(flags))
+#define ehca_free_fw_ctrlblock(ptr) free_page((unsigned long)(ptr))
+#endif
+
+void ehca_recover_sqp(struct ib_qp *sqp);
+
+#endif
diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c
new file mode 100644
index 0000000..bec7e02
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_main.c
@@ -0,0 +1,1082 @@
+/*
+ * IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ * module start stop, hca detection
+ *
+ * Authors: Heiko J Schick <schickhj@de.ibm.com>
+ * Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ * Joachim Fenkes <fenkes@de.ibm.com>
+ *
+ * Copyright (c) 2005 IBM Corporation
+ *
+ * All rights reserved.
+ *
+ * This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ * BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifdef CONFIG_PPC_64K_PAGES
+#include <linux/slab.h>
+#endif
+
+#include <linux/notifier.h>
+#include <linux/memory.h>
+#include "ehca_classes.h"
+#include "ehca_iverbs.h"
+#include "ehca_mrmw.h"
+#include "ehca_tools.h"
+#include "hcp_if.h"
+
+#define HCAD_VERSION "0026"
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Christoph Raisch <raisch@de.ibm.com>");
+MODULE_DESCRIPTION("IBM eServer HCA InfiniBand Device Driver");
+MODULE_VERSION(HCAD_VERSION);
+
+static int ehca_open_aqp1 = 0;
+static int ehca_hw_level = 0;
+static int ehca_poll_all_eqs = 1;
+
+int ehca_debug_level = 0;
+int ehca_nr_ports = 2;
+int ehca_use_hp_mr = 0;
+int ehca_port_act_time = 30;
+int ehca_static_rate = -1;
+int ehca_scaling_code = 0;
+int ehca_lock_hcalls = -1;
+int ehca_max_cq = -1;
+int ehca_max_qp = -1;
+
+module_param_named(open_aqp1, ehca_open_aqp1, bool, S_IRUGO);
+module_param_named(debug_level, ehca_debug_level, int, S_IRUGO);
+module_param_named(hw_level, ehca_hw_level, int, S_IRUGO);
+module_param_named(nr_ports, ehca_nr_ports, int, S_IRUGO);
+module_param_named(use_hp_mr, ehca_use_hp_mr, bool, S_IRUGO);
+module_param_named(port_act_time, ehca_port_act_time, int, S_IRUGO);
+module_param_named(poll_all_eqs, ehca_poll_all_eqs, bool, S_IRUGO);
+module_param_named(static_rate, ehca_static_rate, int, S_IRUGO);
+module_param_named(scaling_code, ehca_scaling_code, bool, S_IRUGO);
+module_param_named(lock_hcalls, ehca_lock_hcalls, bool, S_IRUGO);
+module_param_named(number_of_cqs, ehca_max_cq, int, S_IRUGO);
+module_param_named(number_of_qps, ehca_max_qp, int, S_IRUGO);
+
+MODULE_PARM_DESC(open_aqp1,
+ "Open AQP1 on startup (default: no)");
+MODULE_PARM_DESC(debug_level,
+ "Amount of debug output (0: none (default), 1: traces, "
+ "2: some dumps, 3: lots)");
+MODULE_PARM_DESC(hw_level,
+ "Hardware level (0: autosensing (default), "
+ "0x10..0x14: eHCA, 0x20..0x23: eHCA2)");
+MODULE_PARM_DESC(nr_ports,
+ "number of connected ports (-1: autodetect, 1: port one only, "
+ "2: two ports (default)");
+MODULE_PARM_DESC(use_hp_mr,
+ "Use high performance MRs (default: no)");
+MODULE_PARM_DESC(port_act_time,
+ "Time to wait for port activation (default: 30 sec)");
+MODULE_PARM_DESC(poll_all_eqs,
+ "Poll all event queues periodically (default: yes)");
+MODULE_PARM_DESC(static_rate,
+ "Set permanent static rate (default: no static rate)");
+MODULE_PARM_DESC(scaling_code,
+ "Enable scaling code (default: no)");
+MODULE_PARM_DESC(lock_hcalls,
+ "Serialize all hCalls made by the driver "
+ "(default: autodetect)");
+MODULE_PARM_DESC(number_of_cqs,
+ "Max number of CQs which can be allocated "
+ "(default: autodetect)");
+MODULE_PARM_DESC(number_of_qps,
+ "Max number of QPs which can be allocated "
+ "(default: autodetect)");
+
+DEFINE_RWLOCK(ehca_qp_idr_lock);
+DEFINE_RWLOCK(ehca_cq_idr_lock);
+DEFINE_IDR(ehca_qp_idr);
+DEFINE_IDR(ehca_cq_idr);
+
+static LIST_HEAD(shca_list); /* list of all registered ehcas */
+static DEFINE_SPINLOCK(shca_list_lock);
+
+static struct timer_list poll_eqs_timer;
+
+#ifdef CONFIG_PPC_64K_PAGES
+static struct kmem_cache *ctblk_cache;
+
+void *ehca_alloc_fw_ctrlblock(gfp_t flags)
+{
+ void *ret = kmem_cache_zalloc(ctblk_cache, flags);
+ if (!ret)
+ ehca_gen_err("Out of memory for ctblk");
+ return ret;
+}
+
+void ehca_free_fw_ctrlblock(void *ptr)
+{
+ if (ptr)
+ kmem_cache_free(ctblk_cache, ptr);
+
+}
+#endif
+
+int ehca2ib_return_code(u64 ehca_rc)
+{
+ switch (ehca_rc) {
+ case H_SUCCESS:
+ return 0;
+ case H_RESOURCE: /* Resource in use */
+ case H_BUSY:
+ return -EBUSY;
+ case H_NOT_ENOUGH_RESOURCES: /* insufficient resources */
+ case H_CONSTRAINED: /* resource constraint */
+ case H_NO_MEM:
+ return -ENOMEM;
+ default:
+ return -EINVAL;
+ }
+}
+
+static int ehca_create_slab_caches(void)
+{
+ int ret;
+
+ ret = ehca_init_pd_cache();
+ if (ret) {
+ ehca_gen_err("Cannot create PD SLAB cache.");
+ return ret;
+ }
+
+ ret = ehca_init_cq_cache();
+ if (ret) {
+ ehca_gen_err("Cannot create CQ SLAB cache.");
+ goto create_slab_caches2;
+ }
+
+ ret = ehca_init_qp_cache();
+ if (ret) {
+ ehca_gen_err("Cannot create QP SLAB cache.");
+ goto create_slab_caches3;
+ }
+
+ ret = ehca_init_av_cache();
+ if (ret) {
+ ehca_gen_err("Cannot create AV SLAB cache.");
+ goto create_slab_caches4;
+ }
+
+ ret = ehca_init_mrmw_cache();
+ if (ret) {
+ ehca_gen_err("Cannot create MR&MW SLAB cache.");
+ goto create_slab_caches5;
+ }
+
+ ret = ehca_init_small_qp_cache();
+ if (ret) {
+ ehca_gen_err("Cannot create small queue SLAB cache.");
+ goto create_slab_caches6;
+ }
+
+#ifdef CONFIG_PPC_64K_PAGES
+ ctblk_cache = kmem_cache_create("ehca_cache_ctblk",
+ EHCA_PAGESIZE, H_CB_ALIGNMENT,
+ SLAB_HWCACHE_ALIGN,
+ NULL);
+ if (!ctblk_cache) {
+ ehca_gen_err("Cannot create ctblk SLAB cache.");
+ ehca_cleanup_small_qp_cache();
+ goto create_slab_caches6;
+ }
+#endif
+ return 0;
+
+create_slab_caches6:
+ ehca_cleanup_mrmw_cache();
+
+create_slab_caches5:
+ ehca_cleanup_av_cache();
+
+create_slab_caches4:
+ ehca_cleanup_qp_cache();
+
+create_slab_caches3:
+ ehca_cleanup_cq_cache();
+
+create_slab_caches2:
+ ehca_cleanup_pd_cache();
+
+ return ret;
+}
+
+static void ehca_destroy_slab_caches(void)
+{
+ ehca_cleanup_small_qp_cache();
+ ehca_cleanup_mrmw_cache();
+ ehca_cleanup_av_cache();
+ ehca_cleanup_qp_cache();
+ ehca_cleanup_cq_cache();
+ ehca_cleanup_pd_cache();
+#ifdef CONFIG_PPC_64K_PAGES
+ if (ctblk_cache)
+ kmem_cache_destroy(ctblk_cache);
+#endif
+}
+
+#define EHCA_HCAAVER EHCA_BMASK_IBM(32, 39)
+#define EHCA_REVID EHCA_BMASK_IBM(40, 63)
+
+static struct cap_descr {
+ u64 mask;
+ char *descr;
+} hca_cap_descr[] = {
+ { HCA_CAP_AH_PORT_NR_CHECK, "HCA_CAP_AH_PORT_NR_CHECK" },
+ { HCA_CAP_ATOMIC, "HCA_CAP_ATOMIC" },
+ { HCA_CAP_AUTO_PATH_MIG, "HCA_CAP_AUTO_PATH_MIG" },
+ { HCA_CAP_BAD_P_KEY_CTR, "HCA_CAP_BAD_P_KEY_CTR" },
+ { HCA_CAP_SQD_RTS_PORT_CHANGE, "HCA_CAP_SQD_RTS_PORT_CHANGE" },
+ { HCA_CAP_CUR_QP_STATE_MOD, "HCA_CAP_CUR_QP_STATE_MOD" },
+ { HCA_CAP_INIT_TYPE, "HCA_CAP_INIT_TYPE" },
+ { HCA_CAP_PORT_ACTIVE_EVENT, "HCA_CAP_PORT_ACTIVE_EVENT" },
+ { HCA_CAP_Q_KEY_VIOL_CTR, "HCA_CAP_Q_KEY_VIOL_CTR" },
+ { HCA_CAP_WQE_RESIZE, "HCA_CAP_WQE_RESIZE" },
+ { HCA_CAP_RAW_PACKET_MCAST, "HCA_CAP_RAW_PACKET_MCAST" },
+ { HCA_CAP_SHUTDOWN_PORT, "HCA_CAP_SHUTDOWN_PORT" },
+ { HCA_CAP_RC_LL_QP, "HCA_CAP_RC_LL_QP" },
+ { HCA_CAP_SRQ, "HCA_CAP_SRQ" },
+ { HCA_CAP_UD_LL_QP, "HCA_CAP_UD_LL_QP" },
+ { HCA_CAP_RESIZE_MR, "HCA_CAP_RESIZE_MR" },
+ { HCA_CAP_MINI_QP, "HCA_CAP_MINI_QP" },
+ { HCA_CAP_H_ALLOC_RES_SYNC, "HCA_CAP_H_ALLOC_RES_SYNC" },
+};
+
+static int ehca_sense_attributes(struct ehca_shca *shca)
+{
+ int i, ret = 0;
+ u64 h_ret;
+ struct hipz_query_hca *rblock;
+ struct hipz_query_port *port;
+ const char *loc_code;
+
+ static const u32 pgsize_map[] = {
+ HCA_CAP_MR_PGSIZE_4K, 0x1000,
+ HCA_CAP_MR_PGSIZE_64K, 0x10000,
+ HCA_CAP_MR_PGSIZE_1M, 0x100000,
+ HCA_CAP_MR_PGSIZE_16M, 0x1000000,
+ };
+
+ ehca_gen_dbg("Probing adapter %s...",
+ shca->ofdev->node->full_name);
+ loc_code = of_get_property(shca->ofdev->node, "ibm,loc-code", NULL);
+ if (loc_code)
+ ehca_gen_dbg(" ... location lode=%s", loc_code);
+
+ rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
+ if (!rblock) {
+ ehca_gen_err("Cannot allocate rblock memory.");
+ return -ENOMEM;
+ }
+
+ h_ret = hipz_h_query_hca(shca->ipz_hca_handle, rblock);
+ if (h_ret != H_SUCCESS) {
+ ehca_gen_err("Cannot query device properties. h_ret=%li",
+ h_ret);
+ ret = -EPERM;
+ goto sense_attributes1;
+ }
+
+ if (ehca_nr_ports == 1)
+ shca->num_ports = 1;
+ else
+ shca->num_ports = (u8)rblock->num_ports;
+
+ ehca_gen_dbg(" ... found %x ports", rblock->num_ports);
+
+ if (ehca_hw_level == 0) {
+ u32 hcaaver;
+ u32 revid;
+
+ hcaaver = EHCA_BMASK_GET(EHCA_HCAAVER, rblock->hw_ver);
+ revid = EHCA_BMASK_GET(EHCA_REVID, rblock->hw_ver);
+
+ ehca_gen_dbg(" ... hardware version=%x:%x", hcaaver, revid);
+
+ if (hcaaver == 1) {
+ if (revid <= 3)
+ shca->hw_level = 0x10 | (revid + 1);
+ else
+ shca->hw_level = 0x14;
+ } else if (hcaaver == 2) {
+ if (revid == 0)
+ shca->hw_level = 0x21;
+ else if (revid == 0x10)
+ shca->hw_level = 0x22;
+ else if (revid == 0x20 || revid == 0x21)
+ shca->hw_level = 0x23;
+ }
+
+ if (!shca->hw_level) {
+ ehca_gen_warn("unknown hardware version"
+ " - assuming default level");
+ shca->hw_level = 0x22;
+ }
+ } else
+ shca->hw_level = ehca_hw_level;
+ ehca_gen_dbg(" ... hardware level=%x", shca->hw_level);
+
+ shca->hca_cap = rblock->hca_cap_indicators;
+ ehca_gen_dbg(" ... HCA capabilities:");
+ for (i = 0; i < ARRAY_SIZE(hca_cap_descr); i++)
+ if (EHCA_BMASK_GET(hca_cap_descr[i].mask, shca->hca_cap))
+ ehca_gen_dbg(" %s", hca_cap_descr[i].descr);
+
+ /* Autodetect hCall locking -- the "H_ALLOC_RESOURCE synced" flag is
+ * a firmware property, so it's valid across all adapters
+ */
+ if (ehca_lock_hcalls == -1)
+ ehca_lock_hcalls = !(shca->hca_cap & HCA_CAP_H_ALLOC_RES_SYNC);
+
+ /* translate supported MR page sizes; always support 4K */
+ shca->hca_cap_mr_pgsize = EHCA_PAGESIZE;
+ for (i = 0; i < ARRAY_SIZE(pgsize_map); i += 2)
+ if (rblock->memory_page_size_supported & pgsize_map[i])
+ shca->hca_cap_mr_pgsize |= pgsize_map[i + 1];
+
+ /* Set maximum number of CQs and QPs to calculate EQ size */
+ if (shca->max_num_qps == -1)
+ shca->max_num_qps = min_t(int, rblock->max_qp,
+ EHCA_MAX_NUM_QUEUES);
+ else if (shca->max_num_qps < 1 || shca->max_num_qps > rblock->max_qp) {
+ ehca_gen_warn("The requested number of QPs is out of range "
+ "(1 - %i) specified by HW. Value is set to %i",
+ rblock->max_qp, rblock->max_qp);
+ shca->max_num_qps = rblock->max_qp;
+ }
+
+ if (shca->max_num_cqs == -1)
+ shca->max_num_cqs = min_t(int, rblock->max_cq,
+ EHCA_MAX_NUM_QUEUES);
+ else if (shca->max_num_cqs < 1 || shca->max_num_cqs > rblock->max_cq) {
+ ehca_gen_warn("The requested number of CQs is out of range "
+ "(1 - %i) specified by HW. Value is set to %i",
+ rblock->max_cq, rblock->max_cq);
+ }
+
+ /* query max MTU from first port -- it's the same for all ports */
+ port = (struct hipz_query_port *)rblock;
+ h_ret = hipz_h_query_port(shca->ipz_hca_handle, 1, port);
+ if (h_ret != H_SUCCESS) {
+ ehca_gen_err("Cannot query port properties. h_ret=%li",
+ h_ret);
+ ret = -EPERM;
+ goto sense_attributes1;
+ }
+
+ shca->max_mtu = port->max_mtu;
+
+sense_attributes1:
+ ehca_free_fw_ctrlblock(rblock);
+ return ret;
+}
+
+static int init_node_guid(struct ehca_shca *shca)
+{
+ int ret = 0;
+ struct hipz_query_hca *rblock;
+
+ rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
+ if (!rblock) {
+ ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
+ return -ENOMEM;
+ }
+
+ if (hipz_h_query_hca(shca->ipz_hca_handle, rblock) != H_SUCCESS) {
+ ehca_err(&shca->ib_device, "Can't query device properties");
+ ret = -EINVAL;
+ goto init_node_guid1;
+ }
+
+ memcpy(&shca->ib_device.node_guid, &rblock->node_guid, sizeof(u64));
+
+init_node_guid1:
+ ehca_free_fw_ctrlblock(rblock);
+ return ret;
+}
+
+static int ehca_init_device(struct ehca_shca *shca)
+{
+ int ret;
+
+ ret = init_node_guid(shca);
+ if (ret)
+ return ret;
+
+ strlcpy(shca->ib_device.name, "ehca%d", IB_DEVICE_NAME_MAX);
+ shca->ib_device.owner = THIS_MODULE;
+
+ shca->ib_device.uverbs_abi_ver = 8;
+ shca->ib_device.uverbs_cmd_mask =
+ (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
+ (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
+ (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
+ (1ull << IB_USER_VERBS_CMD_REG_MR) |
+ (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
+ (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
+ (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) |
+ (1ull << IB_USER_VERBS_CMD_DETACH_MCAST);
+
+ shca->ib_device.node_type = RDMA_NODE_IB_CA;
+ shca->ib_device.phys_port_cnt = shca->num_ports;
+ shca->ib_device.num_comp_vectors = 1;
+ shca->ib_device.dma_device = &shca->ofdev->dev;
+ shca->ib_device.query_device = ehca_query_device;
+ shca->ib_device.query_port = ehca_query_port;
+ shca->ib_device.query_gid = ehca_query_gid;
+ shca->ib_device.query_pkey = ehca_query_pkey;
+ /* shca->in_device.modify_device = ehca_modify_device */
+ shca->ib_device.modify_port = ehca_modify_port;
+ shca->ib_device.alloc_ucontext = ehca_alloc_ucontext;
+ shca->ib_device.dealloc_ucontext = ehca_dealloc_ucontext;
+ shca->ib_device.alloc_pd = ehca_alloc_pd;
+ shca->ib_device.dealloc_pd = ehca_dealloc_pd;
+ shca->ib_device.create_ah = ehca_create_ah;
+ /* shca->ib_device.modify_ah = ehca_modify_ah; */
+ shca->ib_device.query_ah = ehca_query_ah;
+ shca->ib_device.destroy_ah = ehca_destroy_ah;
+ shca->ib_device.create_qp = ehca_create_qp;
+ shca->ib_device.modify_qp = ehca_modify_qp;
+ shca->ib_device.query_qp = ehca_query_qp;
+ shca->ib_device.destroy_qp = ehca_destroy_qp;
+ shca->ib_device.post_send = ehca_post_send;
+ shca->ib_device.post_recv = ehca_post_recv;
+ shca->ib_device.create_cq = ehca_create_cq;
+ shca->ib_device.destroy_cq = ehca_destroy_cq;
+ shca->ib_device.resize_cq = ehca_resize_cq;
+ shca->ib_device.poll_cq = ehca_poll_cq;
+ /* shca->ib_device.peek_cq = ehca_peek_cq; */
+ shca->ib_device.req_notify_cq = ehca_req_notify_cq;
+ /* shca->ib_device.req_ncomp_notif = ehca_req_ncomp_notif; */
+ shca->ib_device.get_dma_mr = ehca_get_dma_mr;
+ shca->ib_device.reg_phys_mr = ehca_reg_phys_mr;
+ shca->ib_device.reg_user_mr = ehca_reg_user_mr;
+ shca->ib_device.query_mr = ehca_query_mr;
+ shca->ib_device.dereg_mr = ehca_dereg_mr;
+ shca->ib_device.rereg_phys_mr = ehca_rereg_phys_mr;
+ shca->ib_device.alloc_mw = ehca_alloc_mw;
+ shca->ib_device.bind_mw = ehca_bind_mw;
+ shca->ib_device.dealloc_mw = ehca_dealloc_mw;
+ shca->ib_device.alloc_fmr = ehca_alloc_fmr;
+ shca->ib_device.map_phys_fmr = ehca_map_phys_fmr;
+ shca->ib_device.unmap_fmr = ehca_unmap_fmr;
+ shca->ib_device.dealloc_fmr = ehca_dealloc_fmr;
+ shca->ib_device.attach_mcast = ehca_attach_mcast;
+ shca->ib_device.detach_mcast = ehca_detach_mcast;
+ shca->ib_device.process_mad = ehca_process_mad;
+ shca->ib_device.mmap = ehca_mmap;
+
+ if (EHCA_BMASK_GET(HCA_CAP_SRQ, shca->hca_cap)) {
+ shca->ib_device.uverbs_cmd_mask |=
+ (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
+ (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ);
+
+ shca->ib_device.create_srq = ehca_create_srq;
+ shca->ib_device.modify_srq = ehca_modify_srq;
+ shca->ib_device.query_srq = ehca_query_srq;
+ shca->ib_device.destroy_srq = ehca_destroy_srq;
+ shca->ib_device.post_srq_recv = ehca_post_srq_recv;
+ }
+
+ return ret;
+}
+
+static int ehca_create_aqp1(struct ehca_shca *shca, u32 port)
+{
+ struct ehca_sport *sport = &shca->sport[port - 1];
+ struct ib_cq *ibcq;
+ struct ib_qp *ibqp;
+ struct ib_qp_init_attr qp_init_attr;
+ int ret;
+
+ if (sport->ibcq_aqp1) {
+ ehca_err(&shca->ib_device, "AQP1 CQ is already created.");
+ return -EPERM;
+ }
+
+ ibcq = ib_create_cq(&shca->ib_device, NULL, NULL, (void *)(-1), 10, 0);
+ if (IS_ERR(ibcq)) {
+ ehca_err(&shca->ib_device, "Cannot create AQP1 CQ.");
+ return PTR_ERR(ibcq);
+ }
+ sport->ibcq_aqp1 = ibcq;
+
+ if (sport->ibqp_sqp[IB_QPT_GSI]) {
+ ehca_err(&shca->ib_device, "AQP1 QP is already created.");
+ ret = -EPERM;
+ goto create_aqp1;
+ }
+
+ memset(&qp_init_attr, 0, sizeof(struct ib_qp_init_attr));
+ qp_init_attr.send_cq = ibcq;
+ qp_init_attr.recv_cq = ibcq;
+ qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
+ qp_init_attr.cap.max_send_wr = 100;
+ qp_init_attr.cap.max_recv_wr = 100;
+ qp_init_attr.cap.max_send_sge = 2;
+ qp_init_attr.cap.max_recv_sge = 1;
+ qp_init_attr.qp_type = IB_QPT_GSI;
+ qp_init_attr.port_num = port;
+ qp_init_attr.qp_context = NULL;
+ qp_init_attr.event_handler = NULL;
+ qp_init_attr.srq = NULL;
+
+ ibqp = ib_create_qp(&shca->pd->ib_pd, &qp_init_attr);
+ if (IS_ERR(ibqp)) {
+ ehca_err(&shca->ib_device, "Cannot create AQP1 QP.");
+ ret = PTR_ERR(ibqp);
+ goto create_aqp1;
+ }
+ sport->ibqp_sqp[IB_QPT_GSI] = ibqp;
+
+ return 0;
+
+create_aqp1:
+ ib_destroy_cq(sport->ibcq_aqp1);
+ return ret;
+}
+
+static int ehca_destroy_aqp1(struct ehca_sport *sport)
+{
+ int ret;
+
+ ret = ib_destroy_qp(sport->ibqp_sqp[IB_QPT_GSI]);
+ if (ret) {
+ ehca_gen_err("Cannot destroy AQP1 QP. ret=%i", ret);
+ return ret;
+ }
+
+ ret = ib_destroy_cq(sport->ibcq_aqp1);
+ if (ret)
+ ehca_gen_err("Cannot destroy AQP1 CQ. ret=%i", ret);
+
+ return ret;
+}
+
+static ssize_t ehca_show_debug_level(struct device_driver *ddp, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%d\n", ehca_debug_level);
+}
+
+static ssize_t ehca_store_debug_level(struct device_driver *ddp,
+ const char *buf, size_t count)
+{
+ int value = (*buf) - '0';
+ if (value >= 0 && value <= 9)
+ ehca_debug_level = value;
+ return 1;
+}
+
+static DRIVER_ATTR(debug_level, S_IRUSR | S_IWUSR,
+ ehca_show_debug_level, ehca_store_debug_level);
+
+static struct attribute *ehca_drv_attrs[] = {
+ &driver_attr_debug_level.attr,
+ NULL
+};
+
+static struct attribute_group ehca_drv_attr_grp = {
+ .attrs = ehca_drv_attrs
+};
+
+static struct attribute_group *ehca_drv_attr_groups[] = {
+ &ehca_drv_attr_grp,
+ NULL,
+};
+
+#define EHCA_RESOURCE_ATTR(name) \
+static ssize_t ehca_show_##name(struct device *dev, \
+ struct device_attribute *attr, \
+ char *buf) \
+{ \
+ struct ehca_shca *shca; \
+ struct hipz_query_hca *rblock; \
+ int data; \
+ \
+ shca = dev->driver_data; \
+ \
+ rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); \
+ if (!rblock) { \
+ dev_err(dev, "Can't allocate rblock memory.\n"); \
+ return 0; \
+ } \
+ \
+ if (hipz_h_query_hca(shca->ipz_hca_handle, rblock) != H_SUCCESS) { \
+ dev_err(dev, "Can't query device properties\n"); \
+ ehca_free_fw_ctrlblock(rblock); \
+ return 0; \
+ } \
+ \
+ data = rblock->name; \
+ ehca_free_fw_ctrlblock(rblock); \
+ \
+ if ((strcmp(#name, "num_ports") == 0) && (ehca_nr_ports == 1)) \
+ return snprintf(buf, 256, "1\n"); \
+ else \
+ return snprintf(buf, 256, "%d\n", data); \
+ \
+} \
+static DEVICE_ATTR(name, S_IRUGO, ehca_show_##name, NULL);
+
+EHCA_RESOURCE_ATTR(num_ports);
+EHCA_RESOURCE_ATTR(hw_ver);
+EHCA_RESOURCE_ATTR(max_eq);
+EHCA_RESOURCE_ATTR(cur_eq);
+EHCA_RESOURCE_ATTR(max_cq);
+EHCA_RESOURCE_ATTR(cur_cq);
+EHCA_RESOURCE_ATTR(max_qp);
+EHCA_RESOURCE_ATTR(cur_qp);
+EHCA_RESOURCE_ATTR(max_mr);
+EHCA_RESOURCE_ATTR(cur_mr);
+EHCA_RESOURCE_ATTR(max_mw);
+EHCA_RESOURCE_ATTR(cur_mw);
+EHCA_RESOURCE_ATTR(max_pd);
+EHCA_RESOURCE_ATTR(max_ah);
+
+static ssize_t ehca_show_adapter_handle(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct ehca_shca *shca = dev->driver_data;
+
+ return sprintf(buf, "%lx\n", shca->ipz_hca_handle.handle);
+
+}
+static DEVICE_ATTR(adapter_handle, S_IRUGO, ehca_show_adapter_handle, NULL);
+
+static struct attribute *ehca_dev_attrs[] = {
+ &dev_attr_adapter_handle.attr,
+ &dev_attr_num_ports.attr,
+ &dev_attr_hw_ver.attr,
+ &dev_attr_max_eq.attr,
+ &dev_attr_cur_eq.attr,
+ &dev_attr_max_cq.attr,
+ &dev_attr_cur_cq.attr,
+ &dev_attr_max_qp.attr,
+ &dev_attr_cur_qp.attr,
+ &dev_attr_max_mr.attr,
+ &dev_attr_cur_mr.attr,
+ &dev_attr_max_mw.attr,
+ &dev_attr_cur_mw.attr,
+ &dev_attr_max_pd.attr,
+ &dev_attr_max_ah.attr,
+ NULL
+};
+
+static struct attribute_group ehca_dev_attr_grp = {
+ .attrs = ehca_dev_attrs
+};
+
+static int __devinit ehca_probe(struct of_device *dev,
+ const struct of_device_id *id)
+{
+ struct ehca_shca *shca;
+ const u64 *handle;
+ struct ib_pd *ibpd;
+ int ret, i, eq_size;
+
+ handle = of_get_property(dev->node, "ibm,hca-handle", NULL);
+ if (!handle) {
+ ehca_gen_err("Cannot get eHCA handle for adapter: %s.",
+ dev->node->full_name);
+ return -ENODEV;
+ }
+
+ if (!(*handle)) {
+ ehca_gen_err("Wrong eHCA handle for adapter: %s.",
+ dev->node->full_name);
+ return -ENODEV;
+ }
+
+ shca = (struct ehca_shca *)ib_alloc_device(sizeof(*shca));
+ if (!shca) {
+ ehca_gen_err("Cannot allocate shca memory.");
+ return -ENOMEM;
+ }
+
+ mutex_init(&shca->modify_mutex);
+ atomic_set(&shca->num_cqs, 0);
+ atomic_set(&shca->num_qps, 0);
+ shca->max_num_qps = ehca_max_qp;
+ shca->max_num_cqs = ehca_max_cq;
+
+ for (i = 0; i < ARRAY_SIZE(shca->sport); i++)
+ spin_lock_init(&shca->sport[i].mod_sqp_lock);
+
+ shca->ofdev = dev;
+ shca->ipz_hca_handle.handle = *handle;
+ dev->dev.driver_data = shca;
+
+ ret = ehca_sense_attributes(shca);
+ if (ret < 0) {
+ ehca_gen_err("Cannot sense eHCA attributes.");
+ goto probe1;
+ }
+
+ ret = ehca_init_device(shca);
+ if (ret) {
+ ehca_gen_err("Cannot init ehca device struct");
+ goto probe1;
+ }
+
+ eq_size = 2 * shca->max_num_cqs + 4 * shca->max_num_qps;
+ /* create event queues */
+ ret = ehca_create_eq(shca, &shca->eq, EHCA_EQ, eq_size);
+ if (ret) {
+ ehca_err(&shca->ib_device, "Cannot create EQ.");
+ goto probe1;
+ }
+
+ ret = ehca_create_eq(shca, &shca->neq, EHCA_NEQ, 513);
+ if (ret) {
+ ehca_err(&shca->ib_device, "Cannot create NEQ.");
+ goto probe3;
+ }
+
+ /* create internal protection domain */
+ ibpd = ehca_alloc_pd(&shca->ib_device, (void *)(-1), NULL);
+ if (IS_ERR(ibpd)) {
+ ehca_err(&shca->ib_device, "Cannot create internal PD.");
+ ret = PTR_ERR(ibpd);
+ goto probe4;
+ }
+
+ shca->pd = container_of(ibpd, struct ehca_pd, ib_pd);
+ shca->pd->ib_pd.device = &shca->ib_device;
+
+ /* create internal max MR */
+ ret = ehca_reg_internal_maxmr(shca, shca->pd, &shca->maxmr);
+
+ if (ret) {
+ ehca_err(&shca->ib_device, "Cannot create internal MR ret=%i",
+ ret);
+ goto probe5;
+ }
+
+ ret = ib_register_device(&shca->ib_device);
+ if (ret) {
+ ehca_err(&shca->ib_device,
+ "ib_register_device() failed ret=%i", ret);
+ goto probe6;
+ }
+
+ /* create AQP1 for port 1 */
+ if (ehca_open_aqp1 == 1) {
+ shca->sport[0].port_state = IB_PORT_DOWN;
+ ret = ehca_create_aqp1(shca, 1);
+ if (ret) {
+ ehca_err(&shca->ib_device,
+ "Cannot create AQP1 for port 1.");
+ goto probe7;
+ }
+ }
+
+ /* create AQP1 for port 2 */
+ if ((ehca_open_aqp1 == 1) && (shca->num_ports == 2)) {
+ shca->sport[1].port_state = IB_PORT_DOWN;
+ ret = ehca_create_aqp1(shca, 2);
+ if (ret) {
+ ehca_err(&shca->ib_device,
+ "Cannot create AQP1 for port 2.");
+ goto probe8;
+ }
+ }
+
+ ret = sysfs_create_group(&dev->dev.kobj, &ehca_dev_attr_grp);
+ if (ret) /* only complain; we can live without attributes */
+ ehca_err(&shca->ib_device,
+ "Cannot create device attributes ret=%d", ret);
+
+ spin_lock(&shca_list_lock);
+ list_add(&shca->shca_list, &shca_list);
+ spin_unlock(&shca_list_lock);
+
+ return 0;
+
+probe8:
+ ret = ehca_destroy_aqp1(&shca->sport[0]);
+ if (ret)
+ ehca_err(&shca->ib_device,
+ "Cannot destroy AQP1 for port 1. ret=%i", ret);
+
+probe7:
+ ib_unregister_device(&shca->ib_device);
+
+probe6:
+ ret = ehca_dereg_internal_maxmr(shca);
+ if (ret)
+ ehca_err(&shca->ib_device,
+ "Cannot destroy internal MR. ret=%x", ret);
+
+probe5:
+ ret = ehca_dealloc_pd(&shca->pd->ib_pd);
+ if (ret)
+ ehca_err(&shca->ib_device,
+ "Cannot destroy internal PD. ret=%x", ret);
+
+probe4:
+ ret = ehca_destroy_eq(shca, &shca->neq);
+ if (ret)
+ ehca_err(&shca->ib_device,
+ "Cannot destroy NEQ. ret=%x", ret);
+
+probe3:
+ ret = ehca_destroy_eq(shca, &shca->eq);
+ if (ret)
+ ehca_err(&shca->ib_device,
+ "Cannot destroy EQ. ret=%x", ret);
+
+probe1:
+ ib_dealloc_device(&shca->ib_device);
+
+ return -EINVAL;
+}
+
+static int __devexit ehca_remove(struct of_device *dev)
+{
+ struct ehca_shca *shca = dev->dev.driver_data;
+ int ret;
+
+ sysfs_remove_group(&dev->dev.kobj, &ehca_dev_attr_grp);
+
+ if (ehca_open_aqp1 == 1) {
+ int i;
+ for (i = 0; i < shca->num_ports; i++) {
+ ret = ehca_destroy_aqp1(&shca->sport[i]);
+ if (ret)
+ ehca_err(&shca->ib_device,
+ "Cannot destroy AQP1 for port %x "
+ "ret=%i", ret, i);
+ }
+ }
+
+ ib_unregister_device(&shca->ib_device);
+
+ ret = ehca_dereg_internal_maxmr(shca);
+ if (ret)
+ ehca_err(&shca->ib_device,
+ "Cannot destroy internal MR. ret=%i", ret);
+
+ ret = ehca_dealloc_pd(&shca->pd->ib_pd);
+ if (ret)
+ ehca_err(&shca->ib_device,
+ "Cannot destroy internal PD. ret=%i", ret);
+
+ ret = ehca_destroy_eq(shca, &shca->eq);
+ if (ret)
+ ehca_err(&shca->ib_device, "Cannot destroy EQ. ret=%i", ret);
+
+ ret = ehca_destroy_eq(shca, &shca->neq);
+ if (ret)
+ ehca_err(&shca->ib_device, "Canot destroy NEQ. ret=%i", ret);
+
+ ib_dealloc_device(&shca->ib_device);
+
+ spin_lock(&shca_list_lock);
+ list_del(&shca->shca_list);
+ spin_unlock(&shca_list_lock);
+
+ return ret;
+}
+
+static struct of_device_id ehca_device_table[] =
+{
+ {
+ .name = "lhca",
+ .compatible = "IBM,lhca",
+ },
+ {},
+};
+MODULE_DEVICE_TABLE(of, ehca_device_table);
+
+static struct of_platform_driver ehca_driver = {
+ .name = "ehca",
+ .match_table = ehca_device_table,
+ .probe = ehca_probe,
+ .remove = ehca_remove,
+ .driver = {
+ .groups = ehca_drv_attr_groups,
+ },
+};
+
+void ehca_poll_eqs(unsigned long data)
+{
+ struct ehca_shca *shca;
+
+ spin_lock(&shca_list_lock);
+ list_for_each_entry(shca, &shca_list, shca_list) {
+ if (shca->eq.is_initialized) {
+ /* call deadman proc only if eq ptr does not change */
+ struct ehca_eq *eq = &shca->eq;
+ int max = 3;
+ volatile u64 q_ofs, q_ofs2;
+ u64 flags;
+ spin_lock_irqsave(&eq->spinlock, flags);
+ q_ofs = eq->ipz_queue.current_q_offset;
+ spin_unlock_irqrestore(&eq->spinlock, flags);
+ do {
+ spin_lock_irqsave(&eq->spinlock, flags);
+ q_ofs2 = eq->ipz_queue.current_q_offset;
+ spin_unlock_irqrestore(&eq->spinlock, flags);
+ max--;
+ } while (q_ofs == q_ofs2 && max > 0);
+ if (q_ofs == q_ofs2)
+ ehca_process_eq(shca, 0);
+ }
+ }
+ mod_timer(&poll_eqs_timer, round_jiffies(jiffies + HZ));
+ spin_unlock(&shca_list_lock);
+}
+
+static int ehca_mem_notifier(struct notifier_block *nb,
+ unsigned long action, void *data)
+{
+ static unsigned long ehca_dmem_warn_time;
+
+ switch (action) {
+ case MEM_CANCEL_OFFLINE:
+ case MEM_CANCEL_ONLINE:
+ case MEM_ONLINE:
+ case MEM_OFFLINE:
+ return NOTIFY_OK;
+ case MEM_GOING_ONLINE:
+ case MEM_GOING_OFFLINE:
+ /* only ok if no hca is attached to the lpar */
+ spin_lock(&shca_list_lock);
+ if (list_empty(&shca_list)) {
+ spin_unlock(&shca_list_lock);
+ return NOTIFY_OK;
+ } else {
+ spin_unlock(&shca_list_lock);
+ if (printk_timed_ratelimit(&ehca_dmem_warn_time,
+ 30 * 1000))
+ ehca_gen_err("DMEM operations are not allowed"
+ "in conjunction with eHCA");
+ return NOTIFY_BAD;
+ }
+ }
+ return NOTIFY_OK;
+}
+
+static struct notifier_block ehca_mem_nb = {
+ .notifier_call = ehca_mem_notifier,
+};
+
+static int __init ehca_module_init(void)
+{
+ int ret;
+
+ printk(KERN_INFO "eHCA Infiniband Device Driver "
+ "(Version " HCAD_VERSION ")\n");
+
+ ret = ehca_create_comp_pool();
+ if (ret) {
+ ehca_gen_err("Cannot create comp pool.");
+ return ret;
+ }
+
+ ret = ehca_create_slab_caches();
+ if (ret) {
+ ehca_gen_err("Cannot create SLAB caches");
+ ret = -ENOMEM;
+ goto module_init1;
+ }
+
+ ret = ibmebus_register_driver(&ehca_driver);
+ if (ret) {
+ ehca_gen_err("Cannot register eHCA device driver");
+ ret = -EINVAL;
+ goto module_init2;
+ }
+
+ ret = register_memory_notifier(&ehca_mem_nb);
+ if (ret) {
+ ehca_gen_err("Failed registering memory add/remove notifier");
+ goto module_init3;
+ }
+
+ if (ehca_poll_all_eqs != 1) {
+ ehca_gen_err("WARNING!!!");
+ ehca_gen_err("It is possible to lose interrupts.");
+ } else {
+ init_timer(&poll_eqs_timer);
+ poll_eqs_timer.function = ehca_poll_eqs;
+ poll_eqs_timer.expires = jiffies + HZ;
+ add_timer(&poll_eqs_timer);
+ }
+
+ return 0;
+
+module_init3:
+ ibmebus_unregister_driver(&ehca_driver);
+
+module_init2:
+ ehca_destroy_slab_caches();
+
+module_init1:
+ ehca_destroy_comp_pool();
+ return ret;
+};
+
+static void __exit ehca_module_exit(void)
+{
+ if (ehca_poll_all_eqs == 1)
+ del_timer_sync(&poll_eqs_timer);
+
+ ibmebus_unregister_driver(&ehca_driver);
+
+ unregister_memory_notifier(&ehca_mem_nb);
+
+ ehca_destroy_slab_caches();
+
+ ehca_destroy_comp_pool();
+
+ idr_destroy(&ehca_cq_idr);
+ idr_destroy(&ehca_qp_idr);
+};
+
+module_init(ehca_module_init);
+module_exit(ehca_module_exit);
diff --git a/drivers/infiniband/hw/ehca/ehca_mcast.c b/drivers/infiniband/hw/ehca/ehca_mcast.c
new file mode 100644
index 0000000..e3ef026
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_mcast.c
@@ -0,0 +1,131 @@
+/*
+ * IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ * mcast functions
+ *
+ * Authors: Khadija Souissi <souissik@de.ibm.com>
+ * Waleri Fomin <fomin@de.ibm.com>
+ * Reinhard Ernst <rernst@de.ibm.com>
+ * Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ * Heiko J Schick <schickhj@de.ibm.com>
+ *
+ * Copyright (c) 2005 IBM Corporation
+ *
+ * All rights reserved.
+ *
+ * This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ * BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/module.h>
+#include <linux/err.h>
+#include "ehca_classes.h"
+#include "ehca_tools.h"
+#include "ehca_qes.h"
+#include "ehca_iverbs.h"
+#include "hcp_if.h"
+
+#define MAX_MC_LID 0xFFFE
+#define MIN_MC_LID 0xC000 /* Multicast limits */
+#define EHCA_VALID_MULTICAST_GID(gid) ((gid)[0] == 0xFF)
+#define EHCA_VALID_MULTICAST_LID(lid) \
+ (((lid) >= MIN_MC_LID) && ((lid) <= MAX_MC_LID))
+
+int ehca_attach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
+{
+ struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp);
+ struct ehca_shca *shca = container_of(ibqp->device, struct ehca_shca,
+ ib_device);
+ union ib_gid my_gid;
+ u64 subnet_prefix, interface_id, h_ret;
+
+ if (ibqp->qp_type != IB_QPT_UD) {
+ ehca_err(ibqp->device, "invalid qp_type=%x", ibqp->qp_type);
+ return -EINVAL;
+ }
+
+ if (!(EHCA_VALID_MULTICAST_GID(gid->raw))) {
+ ehca_err(ibqp->device, "invalid mulitcast gid");
+ return -EINVAL;
+ } else if ((lid < MIN_MC_LID) || (lid > MAX_MC_LID)) {
+ ehca_err(ibqp->device, "invalid mulitcast lid=%x", lid);
+ return -EINVAL;
+ }
+
+ memcpy(&my_gid.raw, gid->raw, sizeof(union ib_gid));
+
+ subnet_prefix = be64_to_cpu(my_gid.global.subnet_prefix);
+ interface_id = be64_to_cpu(my_gid.global.interface_id);
+ h_ret = hipz_h_attach_mcqp(shca->ipz_hca_handle,
+ my_qp->ipz_qp_handle,
+ my_qp->galpas.kernel,
+ lid, subnet_prefix, interface_id);
+ if (h_ret != H_SUCCESS)
+ ehca_err(ibqp->device,
+ "ehca_qp=%p qp_num=%x hipz_h_attach_mcqp() failed "
+ "h_ret=%li", my_qp, ibqp->qp_num, h_ret);
+
+ return ehca2ib_return_code(h_ret);
+}
+
+int ehca_detach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
+{
+ struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp);
+ struct ehca_shca *shca = container_of(ibqp->pd->device,
+ struct ehca_shca, ib_device);
+ union ib_gid my_gid;
+ u64 subnet_prefix, interface_id, h_ret;
+
+ if (ibqp->qp_type != IB_QPT_UD) {
+ ehca_err(ibqp->device, "invalid qp_type %x", ibqp->qp_type);
+ return -EINVAL;
+ }
+
+ if (!(EHCA_VALID_MULTICAST_GID(gid->raw))) {
+ ehca_err(ibqp->device, "invalid mulitcast gid");
+ return -EINVAL;
+ } else if ((lid < MIN_MC_LID) || (lid > MAX_MC_LID)) {
+ ehca_err(ibqp->device, "invalid mulitcast lid=%x", lid);
+ return -EINVAL;
+ }
+
+ memcpy(&my_gid.raw, gid->raw, sizeof(union ib_gid));
+
+ subnet_prefix = be64_to_cpu(my_gid.global.subnet_prefix);
+ interface_id = be64_to_cpu(my_gid.global.interface_id);
+ h_ret = hipz_h_detach_mcqp(shca->ipz_hca_handle,
+ my_qp->ipz_qp_handle,
+ my_qp->galpas.kernel,
+ lid, subnet_prefix, interface_id);
+ if (h_ret != H_SUCCESS)
+ ehca_err(ibqp->device,
+ "ehca_qp=%p qp_num=%x hipz_h_detach_mcqp() failed "
+ "h_ret=%li", my_qp, ibqp->qp_num, h_ret);
+
+ return ehca2ib_return_code(h_ret);
+}
diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c
new file mode 100644
index 0000000..f974367
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c
@@ -0,0 +1,2186 @@
+/*
+ * IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ * MR/MW functions
+ *
+ * Authors: Dietmar Decker <ddecker@de.ibm.com>
+ * Christoph Raisch <raisch@de.ibm.com>
+ * Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *
+ * Copyright (c) 2005 IBM Corporation
+ *
+ * All rights reserved.
+ *
+ * This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ * BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <rdma/ib_umem.h>
+
+#include "ehca_iverbs.h"
+#include "ehca_mrmw.h"
+#include "hcp_if.h"
+#include "hipz_hw.h"
+
+#define NUM_CHUNKS(length, chunk_size) \
+ (((length) + (chunk_size - 1)) / (chunk_size))
+
+/* max number of rpages (per hcall register_rpages) */
+#define MAX_RPAGES 512
+
+static struct kmem_cache *mr_cache;
+static struct kmem_cache *mw_cache;
+
+enum ehca_mr_pgsize {
+ EHCA_MR_PGSIZE4K = 0x1000L,
+ EHCA_MR_PGSIZE64K = 0x10000L,
+ EHCA_MR_PGSIZE1M = 0x100000L,
+ EHCA_MR_PGSIZE16M = 0x1000000L
+};
+
+#define EHCA_MR_PGSHIFT4K 12
+#define EHCA_MR_PGSHIFT64K 16
+#define EHCA_MR_PGSHIFT1M 20
+#define EHCA_MR_PGSHIFT16M 24
+
+static u32 ehca_encode_hwpage_size(u32 pgsize)
+{
+ int log = ilog2(pgsize);
+ WARN_ON(log < 12 || log > 24 || log & 3);
+ return (log - 12) / 4;
+}
+
+static u64 ehca_get_max_hwpage_size(struct ehca_shca *shca)
+{
+ return 1UL << ilog2(shca->hca_cap_mr_pgsize);
+}
+
+static struct ehca_mr *ehca_mr_new(void)
+{
+ struct ehca_mr *me;
+
+ me = kmem_cache_zalloc(mr_cache, GFP_KERNEL);
+ if (me)
+ spin_lock_init(&me->mrlock);
+ else
+ ehca_gen_err("alloc failed");
+
+ return me;
+}
+
+static void ehca_mr_delete(struct ehca_mr *me)
+{
+ kmem_cache_free(mr_cache, me);
+}
+
+static struct ehca_mw *ehca_mw_new(void)
+{
+ struct ehca_mw *me;
+
+ me = kmem_cache_zalloc(mw_cache, GFP_KERNEL);
+ if (me)
+ spin_lock_init(&me->mwlock);
+ else
+ ehca_gen_err("alloc failed");
+
+ return me;
+}
+
+static void ehca_mw_delete(struct ehca_mw *me)
+{
+ kmem_cache_free(mw_cache, me);
+}
+
+/*----------------------------------------------------------------------*/
+
+struct ib_mr *ehca_get_dma_mr(struct ib_pd *pd, int mr_access_flags)
+{
+ struct ib_mr *ib_mr;
+ int ret;
+ struct ehca_mr *e_maxmr;
+ struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
+ struct ehca_shca *shca =
+ container_of(pd->device, struct ehca_shca, ib_device);
+
+ if (shca->maxmr) {
+ e_maxmr = ehca_mr_new();
+ if (!e_maxmr) {
+ ehca_err(&shca->ib_device, "out of memory");
+ ib_mr = ERR_PTR(-ENOMEM);
+ goto get_dma_mr_exit0;
+ }
+
+ ret = ehca_reg_maxmr(shca, e_maxmr, (u64 *)KERNELBASE,
+ mr_access_flags, e_pd,
+ &e_maxmr->ib.ib_mr.lkey,
+ &e_maxmr->ib.ib_mr.rkey);
+ if (ret) {
+ ehca_mr_delete(e_maxmr);
+ ib_mr = ERR_PTR(ret);
+ goto get_dma_mr_exit0;
+ }
+ ib_mr = &e_maxmr->ib.ib_mr;
+ } else {
+ ehca_err(&shca->ib_device, "no internal max-MR exist!");
+ ib_mr = ERR_PTR(-EINVAL);
+ goto get_dma_mr_exit0;
+ }
+
+get_dma_mr_exit0:
+ if (IS_ERR(ib_mr))
+ ehca_err(&shca->ib_device, "h_ret=%li pd=%p mr_access_flags=%x",
+ PTR_ERR(ib_mr), pd, mr_access_flags);
+ return ib_mr;
+} /* end ehca_get_dma_mr() */
+
+/*----------------------------------------------------------------------*/
+
+struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd,
+ struct ib_phys_buf *phys_buf_array,
+ int num_phys_buf,
+ int mr_access_flags,
+ u64 *iova_start)
+{
+ struct ib_mr *ib_mr;
+ int ret;
+ struct ehca_mr *e_mr;
+ struct ehca_shca *shca =
+ container_of(pd->device, struct ehca_shca, ib_device);
+ struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
+
+ u64 size;
+
+ if ((num_phys_buf <= 0) || !phys_buf_array) {
+ ehca_err(pd->device, "bad input values: num_phys_buf=%x "
+ "phys_buf_array=%p", num_phys_buf, phys_buf_array);
+ ib_mr = ERR_PTR(-EINVAL);
+ goto reg_phys_mr_exit0;
+ }
+ if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
+ !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
+ ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
+ !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) {
+ /*
+ * Remote Write Access requires Local Write Access
+ * Remote Atomic Access requires Local Write Access
+ */
+ ehca_err(pd->device, "bad input values: mr_access_flags=%x",
+ mr_access_flags);
+ ib_mr = ERR_PTR(-EINVAL);
+ goto reg_phys_mr_exit0;
+ }
+
+ /* check physical buffer list and calculate size */
+ ret = ehca_mr_chk_buf_and_calc_size(phys_buf_array, num_phys_buf,
+ iova_start, &size);
+ if (ret) {
+ ib_mr = ERR_PTR(ret);
+ goto reg_phys_mr_exit0;
+ }
+ if ((size == 0) ||
+ (((u64)iova_start + size) < (u64)iova_start)) {
+ ehca_err(pd->device, "bad input values: size=%lx iova_start=%p",
+ size, iova_start);
+ ib_mr = ERR_PTR(-EINVAL);
+ goto reg_phys_mr_exit0;
+ }
+
+ e_mr = ehca_mr_new();
+ if (!e_mr) {
+ ehca_err(pd->device, "out of memory");
+ ib_mr = ERR_PTR(-ENOMEM);
+ goto reg_phys_mr_exit0;
+ }
+
+ /* register MR on HCA */
+ if (ehca_mr_is_maxmr(size, iova_start)) {
+ e_mr->flags |= EHCA_MR_FLAG_MAXMR;
+ ret = ehca_reg_maxmr(shca, e_mr, iova_start, mr_access_flags,
+ e_pd, &e_mr->ib.ib_mr.lkey,
+ &e_mr->ib.ib_mr.rkey);
+ if (ret) {
+ ib_mr = ERR_PTR(ret);
+ goto reg_phys_mr_exit1;
+ }
+ } else {
+ struct ehca_mr_pginfo pginfo;
+ u32 num_kpages;
+ u32 num_hwpages;
+ u64 hw_pgsize;
+
+ num_kpages = NUM_CHUNKS(((u64)iova_start % PAGE_SIZE) + size,
+ PAGE_SIZE);
+ /* for kernel space we try most possible pgsize */
+ hw_pgsize = ehca_get_max_hwpage_size(shca);
+ num_hwpages = NUM_CHUNKS(((u64)iova_start % hw_pgsize) + size,
+ hw_pgsize);
+ memset(&pginfo, 0, sizeof(pginfo));
+ pginfo.type = EHCA_MR_PGI_PHYS;
+ pginfo.num_kpages = num_kpages;
+ pginfo.hwpage_size = hw_pgsize;
+ pginfo.num_hwpages = num_hwpages;
+ pginfo.u.phy.num_phys_buf = num_phys_buf;
+ pginfo.u.phy.phys_buf_array = phys_buf_array;
+ pginfo.next_hwpage =
+ ((u64)iova_start & ~PAGE_MASK) / hw_pgsize;
+
+ ret = ehca_reg_mr(shca, e_mr, iova_start, size, mr_access_flags,
+ e_pd, &pginfo, &e_mr->ib.ib_mr.lkey,
+ &e_mr->ib.ib_mr.rkey);
+ if (ret) {
+ ib_mr = ERR_PTR(ret);
+ goto reg_phys_mr_exit1;
+ }
+ }
+
+ /* successful registration of all pages */
+ return &e_mr->ib.ib_mr;
+
+reg_phys_mr_exit1:
+ ehca_mr_delete(e_mr);
+reg_phys_mr_exit0:
+ if (IS_ERR(ib_mr))
+ ehca_err(pd->device, "h_ret=%li pd=%p phys_buf_array=%p "
+ "num_phys_buf=%x mr_access_flags=%x iova_start=%p",
+ PTR_ERR(ib_mr), pd, phys_buf_array,
+ num_phys_buf, mr_access_flags, iova_start);
+ return ib_mr;
+} /* end ehca_reg_phys_mr() */
+
+/*----------------------------------------------------------------------*/
+
+struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 virt, int mr_access_flags,
+ struct ib_udata *udata)
+{
+ struct ib_mr *ib_mr;
+ struct ehca_mr *e_mr;
+ struct ehca_shca *shca =
+ container_of(pd->device, struct ehca_shca, ib_device);
+ struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
+ struct ehca_mr_pginfo pginfo;
+ int ret, page_shift;
+ u32 num_kpages;
+ u32 num_hwpages;
+ u64 hwpage_size;
+
+ if (!pd) {
+ ehca_gen_err("bad pd=%p", pd);
+ return ERR_PTR(-EFAULT);
+ }
+
+ if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
+ !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
+ ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
+ !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) {
+ /*
+ * Remote Write Access requires Local Write Access
+ * Remote Atomic Access requires Local Write Access
+ */
+ ehca_err(pd->device, "bad input values: mr_access_flags=%x",
+ mr_access_flags);
+ ib_mr = ERR_PTR(-EINVAL);
+ goto reg_user_mr_exit0;
+ }
+
+ if (length == 0 || virt + length < virt) {
+ ehca_err(pd->device, "bad input values: length=%lx "
+ "virt_base=%lx", length, virt);
+ ib_mr = ERR_PTR(-EINVAL);
+ goto reg_user_mr_exit0;
+ }
+
+ e_mr = ehca_mr_new();
+ if (!e_mr) {
+ ehca_err(pd->device, "out of memory");
+ ib_mr = ERR_PTR(-ENOMEM);
+ goto reg_user_mr_exit0;
+ }
+
+ e_mr->umem = ib_umem_get(pd->uobject->context, start, length,
+ mr_access_flags, 0);
+ if (IS_ERR(e_mr->umem)) {
+ ib_mr = (void *)e_mr->umem;
+ goto reg_user_mr_exit1;
+ }
+
+ if (e_mr->umem->page_size != PAGE_SIZE) {
+ ehca_err(pd->device, "page size not supported, "
+ "e_mr->umem->page_size=%x", e_mr->umem->page_size);
+ ib_mr = ERR_PTR(-EINVAL);
+ goto reg_user_mr_exit2;
+ }
+
+ /* determine number of MR pages */
+ num_kpages = NUM_CHUNKS((virt % PAGE_SIZE) + length, PAGE_SIZE);
+ /* select proper hw_pgsize */
+ page_shift = PAGE_SHIFT;
+ if (e_mr->umem->hugetlb) {
+ /* determine page_shift, clamp between 4K and 16M */
+ page_shift = (fls64(length - 1) + 3) & ~3;
+ page_shift = min(max(page_shift, EHCA_MR_PGSHIFT4K),
+ EHCA_MR_PGSHIFT16M);
+ }
+ hwpage_size = 1UL << page_shift;
+
+ /* now that we have the desired page size, shift until it's
+ * supported, too. 4K is always supported, so this terminates.
+ */
+ while (!(hwpage_size & shca->hca_cap_mr_pgsize))
+ hwpage_size >>= 4;
+
+reg_user_mr_fallback:
+ num_hwpages = NUM_CHUNKS((virt % hwpage_size) + length, hwpage_size);
+ /* register MR on HCA */
+ memset(&pginfo, 0, sizeof(pginfo));
+ pginfo.type = EHCA_MR_PGI_USER;
+ pginfo.hwpage_size = hwpage_size;
+ pginfo.num_kpages = num_kpages;
+ pginfo.num_hwpages = num_hwpages;
+ pginfo.u.usr.region = e_mr->umem;
+ pginfo.next_hwpage = e_mr->umem->offset / hwpage_size;
+ pginfo.u.usr.next_chunk = list_prepare_entry(pginfo.u.usr.next_chunk,
+ (&e_mr->umem->chunk_list),
+ list);
+
+ ret = ehca_reg_mr(shca, e_mr, (u64 *)virt, length, mr_access_flags,
+ e_pd, &pginfo, &e_mr->ib.ib_mr.lkey,
+ &e_mr->ib.ib_mr.rkey);
+ if (ret == -EINVAL && pginfo.hwpage_size > PAGE_SIZE) {
+ ehca_warn(pd->device, "failed to register mr "
+ "with hwpage_size=%lx", hwpage_size);
+ ehca_info(pd->device, "try to register mr with "
+ "kpage_size=%lx", PAGE_SIZE);
+ /*
+ * this means kpages are not contiguous for a hw page
+ * try kernel page size as fallback solution
+ */
+ hwpage_size = PAGE_SIZE;
+ goto reg_user_mr_fallback;
+ }
+ if (ret) {
+ ib_mr = ERR_PTR(ret);
+ goto reg_user_mr_exit2;
+ }
+
+ /* successful registration of all pages */
+ return &e_mr->ib.ib_mr;
+
+reg_user_mr_exit2:
+ ib_umem_release(e_mr->umem);
+reg_user_mr_exit1:
+ ehca_mr_delete(e_mr);
+reg_user_mr_exit0:
+ if (IS_ERR(ib_mr))
+ ehca_err(pd->device, "rc=%li pd=%p mr_access_flags=%x udata=%p",
+ PTR_ERR(ib_mr), pd, mr_access_flags, udata);
+ return ib_mr;
+} /* end ehca_reg_user_mr() */
+
+/*----------------------------------------------------------------------*/
+
+int ehca_rereg_phys_mr(struct ib_mr *mr,
+ int mr_rereg_mask,
+ struct ib_pd *pd,
+ struct ib_phys_buf *phys_buf_array,
+ int num_phys_buf,
+ int mr_access_flags,
+ u64 *iova_start)
+{
+ int ret;
+
+ struct ehca_shca *shca =
+ container_of(mr->device, struct ehca_shca, ib_device);
+ struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr);
+ u64 new_size;
+ u64 *new_start;
+ u32 new_acl;
+ struct ehca_pd *new_pd;
+ u32 tmp_lkey, tmp_rkey;
+ unsigned long sl_flags;
+ u32 num_kpages = 0;
+ u32 num_hwpages = 0;
+ struct ehca_mr_pginfo pginfo;
+
+ if (!(mr_rereg_mask & IB_MR_REREG_TRANS)) {
+ /* TODO not supported, because PHYP rereg hCall needs pages */
+ ehca_err(mr->device, "rereg without IB_MR_REREG_TRANS not "
+ "supported yet, mr_rereg_mask=%x", mr_rereg_mask);
+ ret = -EINVAL;
+ goto rereg_phys_mr_exit0;
+ }
+
+ if (mr_rereg_mask & IB_MR_REREG_PD) {
+ if (!pd) {
+ ehca_err(mr->device, "rereg with bad pd, pd=%p "
+ "mr_rereg_mask=%x", pd, mr_rereg_mask);
+ ret = -EINVAL;
+ goto rereg_phys_mr_exit0;
+ }
+ }
+
+ if ((mr_rereg_mask &
+ ~(IB_MR_REREG_TRANS | IB_MR_REREG_PD | IB_MR_REREG_ACCESS)) ||
+ (mr_rereg_mask == 0)) {
+ ret = -EINVAL;
+ goto rereg_phys_mr_exit0;
+ }
+
+ /* check other parameters */
+ if (e_mr == shca->maxmr) {
+ /* should be impossible, however reject to be sure */
+ ehca_err(mr->device, "rereg internal max-MR impossible, mr=%p "
+ "shca->maxmr=%p mr->lkey=%x",
+ mr, shca->maxmr, mr->lkey);
+ ret = -EINVAL;
+ goto rereg_phys_mr_exit0;
+ }
+ if (mr_rereg_mask & IB_MR_REREG_TRANS) { /* transl., i.e. addr/size */
+ if (e_mr->flags & EHCA_MR_FLAG_FMR) {
+ ehca_err(mr->device, "not supported for FMR, mr=%p "
+ "flags=%x", mr, e_mr->flags);
+ ret = -EINVAL;
+ goto rereg_phys_mr_exit0;
+ }
+ if (!phys_buf_array || num_phys_buf <= 0) {
+ ehca_err(mr->device, "bad input values mr_rereg_mask=%x"
+ " phys_buf_array=%p num_phys_buf=%x",
+ mr_rereg_mask, phys_buf_array, num_phys_buf);
+ ret = -EINVAL;
+ goto rereg_phys_mr_exit0;
+ }
+ }
+ if ((mr_rereg_mask & IB_MR_REREG_ACCESS) && /* change ACL */
+ (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
+ !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
+ ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
+ !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)))) {
+ /*
+ * Remote Write Access requires Local Write Access
+ * Remote Atomic Access requires Local Write Access
+ */
+ ehca_err(mr->device, "bad input values: mr_rereg_mask=%x "
+ "mr_access_flags=%x", mr_rereg_mask, mr_access_flags);
+ ret = -EINVAL;
+ goto rereg_phys_mr_exit0;
+ }
+
+ /* set requested values dependent on rereg request */
+ spin_lock_irqsave(&e_mr->mrlock, sl_flags);
+ new_start = e_mr->start;
+ new_size = e_mr->size;
+ new_acl = e_mr->acl;
+ new_pd = container_of(mr->pd, struct ehca_pd, ib_pd);
+
+ if (mr_rereg_mask & IB_MR_REREG_TRANS) {
+ u64 hw_pgsize = ehca_get_max_hwpage_size(shca);
+
+ new_start = iova_start; /* change address */
+ /* check physical buffer list and calculate size */
+ ret = ehca_mr_chk_buf_and_calc_size(phys_buf_array,
+ num_phys_buf, iova_start,
+ &new_size);
+ if (ret)
+ goto rereg_phys_mr_exit1;
+ if ((new_size == 0) ||
+ (((u64)iova_start + new_size) < (u64)iova_start)) {
+ ehca_err(mr->device, "bad input values: new_size=%lx "
+ "iova_start=%p", new_size, iova_start);
+ ret = -EINVAL;
+ goto rereg_phys_mr_exit1;
+ }
+ num_kpages = NUM_CHUNKS(((u64)new_start % PAGE_SIZE) +
+ new_size, PAGE_SIZE);
+ num_hwpages = NUM_CHUNKS(((u64)new_start % hw_pgsize) +
+ new_size, hw_pgsize);
+ memset(&pginfo, 0, sizeof(pginfo));
+ pginfo.type = EHCA_MR_PGI_PHYS;
+ pginfo.num_kpages = num_kpages;
+ pginfo.hwpage_size = hw_pgsize;
+ pginfo.num_hwpages = num_hwpages;
+ pginfo.u.phy.num_phys_buf = num_phys_buf;
+ pginfo.u.phy.phys_buf_array = phys_buf_array;
+ pginfo.next_hwpage =
+ ((u64)iova_start & ~PAGE_MASK) / hw_pgsize;
+ }
+ if (mr_rereg_mask & IB_MR_REREG_ACCESS)
+ new_acl = mr_access_flags;
+ if (mr_rereg_mask & IB_MR_REREG_PD)
+ new_pd = container_of(pd, struct ehca_pd, ib_pd);
+
+ ret = ehca_rereg_mr(shca, e_mr, new_start, new_size, new_acl,
+ new_pd, &pginfo, &tmp_lkey, &tmp_rkey);
+ if (ret)
+ goto rereg_phys_mr_exit1;
+
+ /* successful reregistration */
+ if (mr_rereg_mask & IB_MR_REREG_PD)
+ mr->pd = pd;
+ mr->lkey = tmp_lkey;
+ mr->rkey = tmp_rkey;
+
+rereg_phys_mr_exit1:
+ spin_unlock_irqrestore(&e_mr->mrlock, sl_flags);
+rereg_phys_mr_exit0:
+ if (ret)
+ ehca_err(mr->device, "ret=%i mr=%p mr_rereg_mask=%x pd=%p "
+ "phys_buf_array=%p num_phys_buf=%x mr_access_flags=%x "
+ "iova_start=%p",
+ ret, mr, mr_rereg_mask, pd, phys_buf_array,
+ num_phys_buf, mr_access_flags, iova_start);
+ return ret;
+} /* end ehca_rereg_phys_mr() */
+
+/*----------------------------------------------------------------------*/
+
+int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr)
+{
+ int ret = 0;
+ u64 h_ret;
+ struct ehca_shca *shca =
+ container_of(mr->device, struct ehca_shca, ib_device);
+ struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr);
+ unsigned long sl_flags;
+ struct ehca_mr_hipzout_parms hipzout;
+
+ if ((e_mr->flags & EHCA_MR_FLAG_FMR)) {
+ ehca_err(mr->device, "not supported for FMR, mr=%p e_mr=%p "
+ "e_mr->flags=%x", mr, e_mr, e_mr->flags);
+ ret = -EINVAL;
+ goto query_mr_exit0;
+ }
+
+ memset(mr_attr, 0, sizeof(struct ib_mr_attr));
+ spin_lock_irqsave(&e_mr->mrlock, sl_flags);
+
+ h_ret = hipz_h_query_mr(shca->ipz_hca_handle, e_mr, &hipzout);
+ if (h_ret != H_SUCCESS) {
+ ehca_err(mr->device, "hipz_mr_query failed, h_ret=%li mr=%p "
+ "hca_hndl=%lx mr_hndl=%lx lkey=%x",
+ h_ret, mr, shca->ipz_hca_handle.handle,
+ e_mr->ipz_mr_handle.handle, mr->lkey);
+ ret = ehca2ib_return_code(h_ret);
+ goto query_mr_exit1;
+ }
+ mr_attr->pd = mr->pd;
+ mr_attr->device_virt_addr = hipzout.vaddr;
+ mr_attr->size = hipzout.len;
+ mr_attr->lkey = hipzout.lkey;
+ mr_attr->rkey = hipzout.rkey;
+ ehca_mrmw_reverse_map_acl(&hipzout.acl, &mr_attr->mr_access_flags);
+
+query_mr_exit1:
+ spin_unlock_irqrestore(&e_mr->mrlock, sl_flags);
+query_mr_exit0:
+ if (ret)
+ ehca_err(mr->device, "ret=%i mr=%p mr_attr=%p",
+ ret, mr, mr_attr);
+ return ret;
+} /* end ehca_query_mr() */
+
+/*----------------------------------------------------------------------*/
+
+int ehca_dereg_mr(struct ib_mr *mr)
+{
+ int ret = 0;
+ u64 h_ret;
+ struct ehca_shca *shca =
+ container_of(mr->device, struct ehca_shca, ib_device);
+ struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr);
+
+ if ((e_mr->flags & EHCA_MR_FLAG_FMR)) {
+ ehca_err(mr->device, "not supported for FMR, mr=%p e_mr=%p "
+ "e_mr->flags=%x", mr, e_mr, e_mr->flags);
+ ret = -EINVAL;
+ goto dereg_mr_exit0;
+ } else if (e_mr == shca->maxmr) {
+ /* should be impossible, however reject to be sure */
+ ehca_err(mr->device, "dereg internal max-MR impossible, mr=%p "
+ "shca->maxmr=%p mr->lkey=%x",
+ mr, shca->maxmr, mr->lkey);
+ ret = -EINVAL;
+ goto dereg_mr_exit0;
+ }
+
+ /* TODO: BUSY: MR still has bound window(s) */
+ h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr);
+ if (h_ret != H_SUCCESS) {
+ ehca_err(mr->device, "hipz_free_mr failed, h_ret=%li shca=%p "
+ "e_mr=%p hca_hndl=%lx mr_hndl=%lx mr->lkey=%x",
+ h_ret, shca, e_mr, shca->ipz_hca_handle.handle,
+ e_mr->ipz_mr_handle.handle, mr->lkey);
+ ret = ehca2ib_return_code(h_ret);
+ goto dereg_mr_exit0;
+ }
+
+ if (e_mr->umem)
+ ib_umem_release(e_mr->umem);
+
+ /* successful deregistration */
+ ehca_mr_delete(e_mr);
+
+dereg_mr_exit0:
+ if (ret)
+ ehca_err(mr->device, "ret=%i mr=%p", ret, mr);
+ return ret;
+} /* end ehca_dereg_mr() */
+
+/*----------------------------------------------------------------------*/
+
+struct ib_mw *ehca_alloc_mw(struct ib_pd *pd)
+{
+ struct ib_mw *ib_mw;
+ u64 h_ret;
+ struct ehca_mw *e_mw;
+ struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
+ struct ehca_shca *shca =
+ container_of(pd->device, struct ehca_shca, ib_device);
+ struct ehca_mw_hipzout_parms hipzout;
+
+ e_mw = ehca_mw_new();
+ if (!e_mw) {
+ ib_mw = ERR_PTR(-ENOMEM);
+ goto alloc_mw_exit0;
+ }
+
+ h_ret = hipz_h_alloc_resource_mw(shca->ipz_hca_handle, e_mw,
+ e_pd->fw_pd, &hipzout);
+ if (h_ret != H_SUCCESS) {
+ ehca_err(pd->device, "hipz_mw_allocate failed, h_ret=%li "
+ "shca=%p hca_hndl=%lx mw=%p",
+ h_ret, shca, shca->ipz_hca_handle.handle, e_mw);
+ ib_mw = ERR_PTR(ehca2ib_return_code(h_ret));
+ goto alloc_mw_exit1;
+ }
+ /* successful MW allocation */
+ e_mw->ipz_mw_handle = hipzout.handle;
+ e_mw->ib_mw.rkey = hipzout.rkey;
+ return &e_mw->ib_mw;
+
+alloc_mw_exit1:
+ ehca_mw_delete(e_mw);
+alloc_mw_exit0:
+ if (IS_ERR(ib_mw))
+ ehca_err(pd->device, "h_ret=%li pd=%p", PTR_ERR(ib_mw), pd);
+ return ib_mw;
+} /* end ehca_alloc_mw() */
+
+/*----------------------------------------------------------------------*/
+
+int ehca_bind_mw(struct ib_qp *qp,
+ struct ib_mw *mw,
+ struct ib_mw_bind *mw_bind)
+{
+ /* TODO: not supported up to now */
+ ehca_gen_err("bind MW currently not supported by HCAD");
+
+ return -EPERM;
+} /* end ehca_bind_mw() */
+
+/*----------------------------------------------------------------------*/
+
+int ehca_dealloc_mw(struct ib_mw *mw)
+{
+ u64 h_ret;
+ struct ehca_shca *shca =
+ container_of(mw->device, struct ehca_shca, ib_device);
+ struct ehca_mw *e_mw = container_of(mw, struct ehca_mw, ib_mw);
+
+ h_ret = hipz_h_free_resource_mw(shca->ipz_hca_handle, e_mw);
+ if (h_ret != H_SUCCESS) {
+ ehca_err(mw->device, "hipz_free_mw failed, h_ret=%li shca=%p "
+ "mw=%p rkey=%x hca_hndl=%lx mw_hndl=%lx",
+ h_ret, shca, mw, mw->rkey, shca->ipz_hca_handle.handle,
+ e_mw->ipz_mw_handle.handle);
+ return ehca2ib_return_code(h_ret);
+ }
+ /* successful deallocation */
+ ehca_mw_delete(e_mw);
+ return 0;
+} /* end ehca_dealloc_mw() */
+
+/*----------------------------------------------------------------------*/
+
+struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd,
+ int mr_access_flags,
+ struct ib_fmr_attr *fmr_attr)
+{
+ struct ib_fmr *ib_fmr;
+ struct ehca_shca *shca =
+ container_of(pd->device, struct ehca_shca, ib_device);
+ struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
+ struct ehca_mr *e_fmr;
+ int ret;
+ u32 tmp_lkey, tmp_rkey;
+ struct ehca_mr_pginfo pginfo;
+ u64 hw_pgsize;
+
+ /* check other parameters */
+ if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
+ !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
+ ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
+ !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) {
+ /*
+ * Remote Write Access requires Local Write Access
+ * Remote Atomic Access requires Local Write Access
+ */
+ ehca_err(pd->device, "bad input values: mr_access_flags=%x",
+ mr_access_flags);
+ ib_fmr = ERR_PTR(-EINVAL);
+ goto alloc_fmr_exit0;
+ }
+ if (mr_access_flags & IB_ACCESS_MW_BIND) {
+ ehca_err(pd->device, "bad input values: mr_access_flags=%x",
+ mr_access_flags);
+ ib_fmr = ERR_PTR(-EINVAL);
+ goto alloc_fmr_exit0;
+ }
+ if ((fmr_attr->max_pages == 0) || (fmr_attr->max_maps == 0)) {
+ ehca_err(pd->device, "bad input values: fmr_attr->max_pages=%x "
+ "fmr_attr->max_maps=%x fmr_attr->page_shift=%x",
+ fmr_attr->max_pages, fmr_attr->max_maps,
+ fmr_attr->page_shift);
+ ib_fmr = ERR_PTR(-EINVAL);
+ goto alloc_fmr_exit0;
+ }
+
+ hw_pgsize = 1 << fmr_attr->page_shift;
+ if (!(hw_pgsize & shca->hca_cap_mr_pgsize)) {
+ ehca_err(pd->device, "unsupported fmr_attr->page_shift=%x",
+ fmr_attr->page_shift);
+ ib_fmr = ERR_PTR(-EINVAL);
+ goto alloc_fmr_exit0;
+ }
+
+ e_fmr = ehca_mr_new();
+ if (!e_fmr) {
+ ib_fmr = ERR_PTR(-ENOMEM);
+ goto alloc_fmr_exit0;
+ }
+ e_fmr->flags |= EHCA_MR_FLAG_FMR;
+
+ /* register MR on HCA */
+ memset(&pginfo, 0, sizeof(pginfo));
+ pginfo.hwpage_size = hw_pgsize;
+ /*
+ * pginfo.num_hwpages==0, ie register_rpages() will not be called
+ * but deferred to map_phys_fmr()
+ */
+ ret = ehca_reg_mr(shca, e_fmr, NULL,
+ fmr_attr->max_pages * (1 << fmr_attr->page_shift),
+ mr_access_flags, e_pd, &pginfo,
+ &tmp_lkey, &tmp_rkey);
+ if (ret) {
+ ib_fmr = ERR_PTR(ret);
+ goto alloc_fmr_exit1;
+ }
+
+ /* successful */
+ e_fmr->hwpage_size = hw_pgsize;
+ e_fmr->fmr_page_size = 1 << fmr_attr->page_shift;
+ e_fmr->fmr_max_pages = fmr_attr->max_pages;
+ e_fmr->fmr_max_maps = fmr_attr->max_maps;
+ e_fmr->fmr_map_cnt = 0;
+ return &e_fmr->ib.ib_fmr;
+
+alloc_fmr_exit1:
+ ehca_mr_delete(e_fmr);
+alloc_fmr_exit0:
+ return ib_fmr;
+} /* end ehca_alloc_fmr() */
+
+/*----------------------------------------------------------------------*/
+
+int ehca_map_phys_fmr(struct ib_fmr *fmr,
+ u64 *page_list,
+ int list_len,
+ u64 iova)
+{
+ int ret;
+ struct ehca_shca *shca =
+ container_of(fmr->device, struct ehca_shca, ib_device);
+ struct ehca_mr *e_fmr = container_of(fmr, struct ehca_mr, ib.ib_fmr);
+ struct ehca_pd *e_pd = container_of(fmr->pd, struct ehca_pd, ib_pd);
+ struct ehca_mr_pginfo pginfo;
+ u32 tmp_lkey, tmp_rkey;
+
+ if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) {
+ ehca_err(fmr->device, "not a FMR, e_fmr=%p e_fmr->flags=%x",
+ e_fmr, e_fmr->flags);
+ ret = -EINVAL;
+ goto map_phys_fmr_exit0;
+ }
+ ret = ehca_fmr_check_page_list(e_fmr, page_list, list_len);
+ if (ret)
+ goto map_phys_fmr_exit0;
+ if (iova % e_fmr->fmr_page_size) {
+ /* only whole-numbered pages */
+ ehca_err(fmr->device, "bad iova, iova=%lx fmr_page_size=%x",
+ iova, e_fmr->fmr_page_size);
+ ret = -EINVAL;
+ goto map_phys_fmr_exit0;
+ }
+ if (e_fmr->fmr_map_cnt >= e_fmr->fmr_max_maps) {
+ /* HCAD does not limit the maps, however trace this anyway */
+ ehca_info(fmr->device, "map limit exceeded, fmr=%p "
+ "e_fmr->fmr_map_cnt=%x e_fmr->fmr_max_maps=%x",
+ fmr, e_fmr->fmr_map_cnt, e_fmr->fmr_max_maps);
+ }
+
+ memset(&pginfo, 0, sizeof(pginfo));
+ pginfo.type = EHCA_MR_PGI_FMR;
+ pginfo.num_kpages = list_len;
+ pginfo.hwpage_size = e_fmr->hwpage_size;
+ pginfo.num_hwpages =
+ list_len * e_fmr->fmr_page_size / pginfo.hwpage_size;
+ pginfo.u.fmr.page_list = page_list;
+ pginfo.next_hwpage =
+ (iova & (e_fmr->fmr_page_size-1)) / pginfo.hwpage_size;
+ pginfo.u.fmr.fmr_pgsize = e_fmr->fmr_page_size;
+
+ ret = ehca_rereg_mr(shca, e_fmr, (u64 *)iova,
+ list_len * e_fmr->fmr_page_size,
+ e_fmr->acl, e_pd, &pginfo, &tmp_lkey, &tmp_rkey);
+ if (ret)
+ goto map_phys_fmr_exit0;
+
+ /* successful reregistration */
+ e_fmr->fmr_map_cnt++;
+ e_fmr->ib.ib_fmr.lkey = tmp_lkey;
+ e_fmr->ib.ib_fmr.rkey = tmp_rkey;
+ return 0;
+
+map_phys_fmr_exit0:
+ if (ret)
+ ehca_err(fmr->device, "ret=%i fmr=%p page_list=%p list_len=%x "
+ "iova=%lx", ret, fmr, page_list, list_len, iova);
+ return ret;
+} /* end ehca_map_phys_fmr() */
+
+/*----------------------------------------------------------------------*/
+
+int ehca_unmap_fmr(struct list_head *fmr_list)
+{
+ int ret = 0;
+ struct ib_fmr *ib_fmr;
+ struct ehca_shca *shca = NULL;
+ struct ehca_shca *prev_shca;
+ struct ehca_mr *e_fmr;
+ u32 num_fmr = 0;
+ u32 unmap_fmr_cnt = 0;
+
+ /* check all FMR belong to same SHCA, and check internal flag */
+ list_for_each_entry(ib_fmr, fmr_list, list) {
+ prev_shca = shca;
+ if (!ib_fmr) {
+ ehca_gen_err("bad fmr=%p in list", ib_fmr);
+ ret = -EINVAL;
+ goto unmap_fmr_exit0;
+ }
+ shca = container_of(ib_fmr->device, struct ehca_shca,
+ ib_device);
+ e_fmr = container_of(ib_fmr, struct ehca_mr, ib.ib_fmr);
+ if ((shca != prev_shca) && prev_shca) {
+ ehca_err(&shca->ib_device, "SHCA mismatch, shca=%p "
+ "prev_shca=%p e_fmr=%p",
+ shca, prev_shca, e_fmr);
+ ret = -EINVAL;
+ goto unmap_fmr_exit0;
+ }
+ if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) {
+ ehca_err(&shca->ib_device, "not a FMR, e_fmr=%p "
+ "e_fmr->flags=%x", e_fmr, e_fmr->flags);
+ ret = -EINVAL;
+ goto unmap_fmr_exit0;
+ }
+ num_fmr++;
+ }
+
+ /* loop over all FMRs to unmap */
+ list_for_each_entry(ib_fmr, fmr_list, list) {
+ unmap_fmr_cnt++;
+ e_fmr = container_of(ib_fmr, struct ehca_mr, ib.ib_fmr);
+ shca = container_of(ib_fmr->device, struct ehca_shca,
+ ib_device);
+ ret = ehca_unmap_one_fmr(shca, e_fmr);
+ if (ret) {
+ /* unmap failed, stop unmapping of rest of FMRs */
+ ehca_err(&shca->ib_device, "unmap of one FMR failed, "
+ "stop rest, e_fmr=%p num_fmr=%x "
+ "unmap_fmr_cnt=%x lkey=%x", e_fmr, num_fmr,
+ unmap_fmr_cnt, e_fmr->ib.ib_fmr.lkey);
+ goto unmap_fmr_exit0;
+ }
+ }
+
+unmap_fmr_exit0:
+ if (ret)
+ ehca_gen_err("ret=%i fmr_list=%p num_fmr=%x unmap_fmr_cnt=%x",
+ ret, fmr_list, num_fmr, unmap_fmr_cnt);
+ return ret;
+} /* end ehca_unmap_fmr() */
+
+/*----------------------------------------------------------------------*/
+
+int ehca_dealloc_fmr(struct ib_fmr *fmr)
+{
+ int ret;
+ u64 h_ret;
+ struct ehca_shca *shca =
+ container_of(fmr->device, struct ehca_shca, ib_device);
+ struct ehca_mr *e_fmr = container_of(fmr, struct ehca_mr, ib.ib_fmr);
+
+ if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) {
+ ehca_err(fmr->device, "not a FMR, e_fmr=%p e_fmr->flags=%x",
+ e_fmr, e_fmr->flags);
+ ret = -EINVAL;
+ goto free_fmr_exit0;
+ }
+
+ h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr);
+ if (h_ret != H_SUCCESS) {
+ ehca_err(fmr->device, "hipz_free_mr failed, h_ret=%li e_fmr=%p "
+ "hca_hndl=%lx fmr_hndl=%lx fmr->lkey=%x",
+ h_ret, e_fmr, shca->ipz_hca_handle.handle,
+ e_fmr->ipz_mr_handle.handle, fmr->lkey);
+ ret = ehca2ib_return_code(h_ret);
+ goto free_fmr_exit0;
+ }
+ /* successful deregistration */
+ ehca_mr_delete(e_fmr);
+ return 0;
+
+free_fmr_exit0:
+ if (ret)
+ ehca_err(&shca->ib_device, "ret=%i fmr=%p", ret, fmr);
+ return ret;
+} /* end ehca_dealloc_fmr() */
+
+/*----------------------------------------------------------------------*/
+
+int ehca_reg_mr(struct ehca_shca *shca,
+ struct ehca_mr *e_mr,
+ u64 *iova_start,
+ u64 size,
+ int acl,
+ struct ehca_pd *e_pd,
+ struct ehca_mr_pginfo *pginfo,
+ u32 *lkey, /*OUT*/
+ u32 *rkey) /*OUT*/
+{
+ int ret;
+ u64 h_ret;
+ u32 hipz_acl;
+ struct ehca_mr_hipzout_parms hipzout;
+
+ ehca_mrmw_map_acl(acl, &hipz_acl);
+ ehca_mrmw_set_pgsize_hipz_acl(pginfo->hwpage_size, &hipz_acl);
+ if (ehca_use_hp_mr == 1)
+ hipz_acl |= 0x00000001;
+
+ h_ret = hipz_h_alloc_resource_mr(shca->ipz_hca_handle, e_mr,
+ (u64)iova_start, size, hipz_acl,
+ e_pd->fw_pd, &hipzout);
+ if (h_ret != H_SUCCESS) {
+ ehca_err(&shca->ib_device, "hipz_alloc_mr failed, h_ret=%li "
+ "hca_hndl=%lx", h_ret, shca->ipz_hca_handle.handle);
+ ret = ehca2ib_return_code(h_ret);
+ goto ehca_reg_mr_exit0;
+ }
+
+ e_mr->ipz_mr_handle = hipzout.handle;
+
+ ret = ehca_reg_mr_rpages(shca, e_mr, pginfo);
+ if (ret)
+ goto ehca_reg_mr_exit1;
+
+ /* successful registration */
+ e_mr->num_kpages = pginfo->num_kpages;
+ e_mr->num_hwpages = pginfo->num_hwpages;
+ e_mr->hwpage_size = pginfo->hwpage_size;
+ e_mr->start = iova_start;
+ e_mr->size = size;
+ e_mr->acl = acl;
+ *lkey = hipzout.lkey;
+ *rkey = hipzout.rkey;
+ return 0;
+
+ehca_reg_mr_exit1:
+ h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr);
+ if (h_ret != H_SUCCESS) {
+ ehca_err(&shca->ib_device, "h_ret=%li shca=%p e_mr=%p "
+ "iova_start=%p size=%lx acl=%x e_pd=%p lkey=%x "
+ "pginfo=%p num_kpages=%lx num_hwpages=%lx ret=%i",
+ h_ret, shca, e_mr, iova_start, size, acl, e_pd,
+ hipzout.lkey, pginfo, pginfo->num_kpages,
+ pginfo->num_hwpages, ret);
+ ehca_err(&shca->ib_device, "internal error in ehca_reg_mr, "
+ "not recoverable");
+ }
+ehca_reg_mr_exit0:
+ if (ret)
+ ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p "
+ "iova_start=%p size=%lx acl=%x e_pd=%p pginfo=%p "
+ "num_kpages=%lx num_hwpages=%lx",
+ ret, shca, e_mr, iova_start, size, acl, e_pd, pginfo,
+ pginfo->num_kpages, pginfo->num_hwpages);
+ return ret;
+} /* end ehca_reg_mr() */
+
+/*----------------------------------------------------------------------*/
+
+int ehca_reg_mr_rpages(struct ehca_shca *shca,
+ struct ehca_mr *e_mr,
+ struct ehca_mr_pginfo *pginfo)
+{
+ int ret = 0;
+ u64 h_ret;
+ u32 rnum;
+ u64 rpage;
+ u32 i;
+ u64 *kpage;
+
+ if (!pginfo->num_hwpages) /* in case of fmr */
+ return 0;
+
+ kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
+ if (!kpage) {
+ ehca_err(&shca->ib_device, "kpage alloc failed");
+ ret = -ENOMEM;
+ goto ehca_reg_mr_rpages_exit0;
+ }
+
+ /* max MAX_RPAGES ehca mr pages per register call */
+ for (i = 0; i < NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES); i++) {
+
+ if (i == NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES) - 1) {
+ rnum = pginfo->num_hwpages % MAX_RPAGES; /* last shot */
+ if (rnum == 0)
+ rnum = MAX_RPAGES; /* last shot is full */
+ } else
+ rnum = MAX_RPAGES;
+
+ ret = ehca_set_pagebuf(pginfo, rnum, kpage);
+ if (ret) {
+ ehca_err(&shca->ib_device, "ehca_set_pagebuf "
+ "bad rc, ret=%i rnum=%x kpage=%p",
+ ret, rnum, kpage);
+ goto ehca_reg_mr_rpages_exit1;
+ }
+
+ if (rnum > 1) {
+ rpage = virt_to_abs(kpage);
+ if (!rpage) {
+ ehca_err(&shca->ib_device, "kpage=%p i=%x",
+ kpage, i);
+ ret = -EFAULT;
+ goto ehca_reg_mr_rpages_exit1;
+ }
+ } else
+ rpage = *kpage;
+
+ h_ret = hipz_h_register_rpage_mr(
+ shca->ipz_hca_handle, e_mr,
+ ehca_encode_hwpage_size(pginfo->hwpage_size),
+ 0, rpage, rnum);
+
+ if (i == NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES) - 1) {
+ /*
+ * check for 'registration complete'==H_SUCCESS
+ * and for 'page registered'==H_PAGE_REGISTERED
+ */
+ if (h_ret != H_SUCCESS) {
+ ehca_err(&shca->ib_device, "last "
+ "hipz_reg_rpage_mr failed, h_ret=%li "
+ "e_mr=%p i=%x hca_hndl=%lx mr_hndl=%lx"
+ " lkey=%x", h_ret, e_mr, i,
+ shca->ipz_hca_handle.handle,
+ e_mr->ipz_mr_handle.handle,
+ e_mr->ib.ib_mr.lkey);
+ ret = ehca2ib_return_code(h_ret);
+ break;
+ } else
+ ret = 0;
+ } else if (h_ret != H_PAGE_REGISTERED) {
+ ehca_err(&shca->ib_device, "hipz_reg_rpage_mr failed, "
+ "h_ret=%li e_mr=%p i=%x lkey=%x hca_hndl=%lx "
+ "mr_hndl=%lx", h_ret, e_mr, i,
+ e_mr->ib.ib_mr.lkey,
+ shca->ipz_hca_handle.handle,
+ e_mr->ipz_mr_handle.handle);
+ ret = ehca2ib_return_code(h_ret);
+ break;
+ } else
+ ret = 0;
+ } /* end for(i) */
+
+
+ehca_reg_mr_rpages_exit1:
+ ehca_free_fw_ctrlblock(kpage);
+ehca_reg_mr_rpages_exit0:
+ if (ret)
+ ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p pginfo=%p "
+ "num_kpages=%lx num_hwpages=%lx", ret, shca, e_mr,
+ pginfo, pginfo->num_kpages, pginfo->num_hwpages);
+ return ret;
+} /* end ehca_reg_mr_rpages() */
+
+/*----------------------------------------------------------------------*/
+
+inline int ehca_rereg_mr_rereg1(struct ehca_shca *shca,
+ struct ehca_mr *e_mr,
+ u64 *iova_start,
+ u64 size,
+ u32 acl,
+ struct ehca_pd *e_pd,
+ struct ehca_mr_pginfo *pginfo,
+ u32 *lkey, /*OUT*/
+ u32 *rkey) /*OUT*/
+{
+ int ret;
+ u64 h_ret;
+ u32 hipz_acl;
+ u64 *kpage;
+ u64 rpage;
+ struct ehca_mr_pginfo pginfo_save;
+ struct ehca_mr_hipzout_parms hipzout;
+
+ ehca_mrmw_map_acl(acl, &hipz_acl);
+ ehca_mrmw_set_pgsize_hipz_acl(pginfo->hwpage_size, &hipz_acl);
+
+ kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
+ if (!kpage) {
+ ehca_err(&shca->ib_device, "kpage alloc failed");
+ ret = -ENOMEM;
+ goto ehca_rereg_mr_rereg1_exit0;
+ }
+
+ pginfo_save = *pginfo;
+ ret = ehca_set_pagebuf(pginfo, pginfo->num_hwpages, kpage);
+ if (ret) {
+ ehca_err(&shca->ib_device, "set pagebuf failed, e_mr=%p "
+ "pginfo=%p type=%x num_kpages=%lx num_hwpages=%lx "
+ "kpage=%p", e_mr, pginfo, pginfo->type,
+ pginfo->num_kpages, pginfo->num_hwpages, kpage);
+ goto ehca_rereg_mr_rereg1_exit1;
+ }
+ rpage = virt_to_abs(kpage);
+ if (!rpage) {
+ ehca_err(&shca->ib_device, "kpage=%p", kpage);
+ ret = -EFAULT;
+ goto ehca_rereg_mr_rereg1_exit1;
+ }
+ h_ret = hipz_h_reregister_pmr(shca->ipz_hca_handle, e_mr,
+ (u64)iova_start, size, hipz_acl,
+ e_pd->fw_pd, rpage, &hipzout);
+ if (h_ret != H_SUCCESS) {
+ /*
+ * reregistration unsuccessful, try it again with the 3 hCalls,
+ * e.g. this is required in case H_MR_CONDITION
+ * (MW bound or MR is shared)
+ */
+ ehca_warn(&shca->ib_device, "hipz_h_reregister_pmr failed "
+ "(Rereg1), h_ret=%li e_mr=%p", h_ret, e_mr);
+ *pginfo = pginfo_save;
+ ret = -EAGAIN;
+ } else if ((u64 *)hipzout.vaddr != iova_start) {
+ ehca_err(&shca->ib_device, "PHYP changed iova_start in "
+ "rereg_pmr, iova_start=%p iova_start_out=%lx e_mr=%p "
+ "mr_handle=%lx lkey=%x lkey_out=%x", iova_start,
+ hipzout.vaddr, e_mr, e_mr->ipz_mr_handle.handle,
+ e_mr->ib.ib_mr.lkey, hipzout.lkey);
+ ret = -EFAULT;
+ } else {
+ /*
+ * successful reregistration
+ * note: start and start_out are identical for eServer HCAs
+ */
+ e_mr->num_kpages = pginfo->num_kpages;
+ e_mr->num_hwpages = pginfo->num_hwpages;
+ e_mr->hwpage_size = pginfo->hwpage_size;
+ e_mr->start = iova_start;
+ e_mr->size = size;
+ e_mr->acl = acl;
+ *lkey = hipzout.lkey;
+ *rkey = hipzout.rkey;
+ }
+
+ehca_rereg_mr_rereg1_exit1:
+ ehca_free_fw_ctrlblock(kpage);
+ehca_rereg_mr_rereg1_exit0:
+ if ( ret && (ret != -EAGAIN) )
+ ehca_err(&shca->ib_device, "ret=%i lkey=%x rkey=%x "
+ "pginfo=%p num_kpages=%lx num_hwpages=%lx",
+ ret, *lkey, *rkey, pginfo, pginfo->num_kpages,
+ pginfo->num_hwpages);
+ return ret;
+} /* end ehca_rereg_mr_rereg1() */
+
+/*----------------------------------------------------------------------*/
+
+int ehca_rereg_mr(struct ehca_shca *shca,
+ struct ehca_mr *e_mr,
+ u64 *iova_start,
+ u64 size,
+ int acl,
+ struct ehca_pd *e_pd,
+ struct ehca_mr_pginfo *pginfo,
+ u32 *lkey,
+ u32 *rkey)
+{
+ int ret = 0;
+ u64 h_ret;
+ int rereg_1_hcall = 1; /* 1: use hipz_h_reregister_pmr directly */
+ int rereg_3_hcall = 0; /* 1: use 3 hipz calls for reregistration */
+
+ /* first determine reregistration hCall(s) */
+ if ((pginfo->num_hwpages > MAX_RPAGES) ||
+ (e_mr->num_hwpages > MAX_RPAGES) ||
+ (pginfo->num_hwpages > e_mr->num_hwpages)) {
+ ehca_dbg(&shca->ib_device, "Rereg3 case, "
+ "pginfo->num_hwpages=%lx e_mr->num_hwpages=%x",
+ pginfo->num_hwpages, e_mr->num_hwpages);
+ rereg_1_hcall = 0;
+ rereg_3_hcall = 1;
+ }
+
+ if (e_mr->flags & EHCA_MR_FLAG_MAXMR) { /* check for max-MR */
+ rereg_1_hcall = 0;
+ rereg_3_hcall = 1;
+ e_mr->flags &= ~EHCA_MR_FLAG_MAXMR;
+ ehca_err(&shca->ib_device, "Rereg MR for max-MR! e_mr=%p",
+ e_mr);
+ }
+
+ if (rereg_1_hcall) {
+ ret = ehca_rereg_mr_rereg1(shca, e_mr, iova_start, size,
+ acl, e_pd, pginfo, lkey, rkey);
+ if (ret) {
+ if (ret == -EAGAIN)
+ rereg_3_hcall = 1;
+ else
+ goto ehca_rereg_mr_exit0;
+ }
+ }
+
+ if (rereg_3_hcall) {
+ struct ehca_mr save_mr;
+
+ /* first deregister old MR */
+ h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr);
+ if (h_ret != H_SUCCESS) {
+ ehca_err(&shca->ib_device, "hipz_free_mr failed, "
+ "h_ret=%li e_mr=%p hca_hndl=%lx mr_hndl=%lx "
+ "mr->lkey=%x",
+ h_ret, e_mr, shca->ipz_hca_handle.handle,
+ e_mr->ipz_mr_handle.handle,
+ e_mr->ib.ib_mr.lkey);
+ ret = ehca2ib_return_code(h_ret);
+ goto ehca_rereg_mr_exit0;
+ }
+ /* clean ehca_mr_t, without changing struct ib_mr and lock */
+ save_mr = *e_mr;
+ ehca_mr_deletenew(e_mr);
+
+ /* set some MR values */
+ e_mr->flags = save_mr.flags;
+ e_mr->hwpage_size = save_mr.hwpage_size;
+ e_mr->fmr_page_size = save_mr.fmr_page_size;
+ e_mr->fmr_max_pages = save_mr.fmr_max_pages;
+ e_mr->fmr_max_maps = save_mr.fmr_max_maps;
+ e_mr->fmr_map_cnt = save_mr.fmr_map_cnt;
+
+ ret = ehca_reg_mr(shca, e_mr, iova_start, size, acl,
+ e_pd, pginfo, lkey, rkey);
+ if (ret) {
+ u32 offset = (u64)(&e_mr->flags) - (u64)e_mr;
+ memcpy(&e_mr->flags, &(save_mr.flags),
+ sizeof(struct ehca_mr) - offset);
+ goto ehca_rereg_mr_exit0;
+ }
+ }
+
+ehca_rereg_mr_exit0:
+ if (ret)
+ ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p "
+ "iova_start=%p size=%lx acl=%x e_pd=%p pginfo=%p "
+ "num_kpages=%lx lkey=%x rkey=%x rereg_1_hcall=%x "
+ "rereg_3_hcall=%x", ret, shca, e_mr, iova_start, size,
+ acl, e_pd, pginfo, pginfo->num_kpages, *lkey, *rkey,
+ rereg_1_hcall, rereg_3_hcall);
+ return ret;
+} /* end ehca_rereg_mr() */
+
+/*----------------------------------------------------------------------*/
+
+int ehca_unmap_one_fmr(struct ehca_shca *shca,
+ struct ehca_mr *e_fmr)
+{
+ int ret = 0;
+ u64 h_ret;
+ struct ehca_pd *e_pd =
+ container_of(e_fmr->ib.ib_fmr.pd, struct ehca_pd, ib_pd);
+ struct ehca_mr save_fmr;
+ u32 tmp_lkey, tmp_rkey;
+ struct ehca_mr_pginfo pginfo;
+ struct ehca_mr_hipzout_parms hipzout;
+ struct ehca_mr save_mr;
+
+ if (e_fmr->fmr_max_pages <= MAX_RPAGES) {
+ /*
+ * note: after using rereg hcall with len=0,
+ * rereg hcall must be used again for registering pages
+ */
+ h_ret = hipz_h_reregister_pmr(shca->ipz_hca_handle, e_fmr, 0,
+ 0, 0, e_pd->fw_pd, 0, &hipzout);
+ if (h_ret == H_SUCCESS) {
+ /* successful reregistration */
+ e_fmr->start = NULL;
+ e_fmr->size = 0;
+ tmp_lkey = hipzout.lkey;
+ tmp_rkey = hipzout.rkey;
+ return 0;
+ }
+ /*
+ * should not happen, because length checked above,
+ * FMRs are not shared and no MW bound to FMRs
+ */
+ ehca_err(&shca->ib_device, "hipz_reregister_pmr failed "
+ "(Rereg1), h_ret=%li e_fmr=%p hca_hndl=%lx "
+ "mr_hndl=%lx lkey=%x lkey_out=%x",
+ h_ret, e_fmr, shca->ipz_hca_handle.handle,
+ e_fmr->ipz_mr_handle.handle,
+ e_fmr->ib.ib_fmr.lkey, hipzout.lkey);
+ /* try free and rereg */
+ }
+
+ /* first free old FMR */
+ h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr);
+ if (h_ret != H_SUCCESS) {
+ ehca_err(&shca->ib_device, "hipz_free_mr failed, "
+ "h_ret=%li e_fmr=%p hca_hndl=%lx mr_hndl=%lx "
+ "lkey=%x",
+ h_ret, e_fmr, shca->ipz_hca_handle.handle,
+ e_fmr->ipz_mr_handle.handle,
+ e_fmr->ib.ib_fmr.lkey);
+ ret = ehca2ib_return_code(h_ret);
+ goto ehca_unmap_one_fmr_exit0;
+ }
+ /* clean ehca_mr_t, without changing lock */
+ save_fmr = *e_fmr;
+ ehca_mr_deletenew(e_fmr);
+
+ /* set some MR values */
+ e_fmr->flags = save_fmr.flags;
+ e_fmr->hwpage_size = save_fmr.hwpage_size;
+ e_fmr->fmr_page_size = save_fmr.fmr_page_size;
+ e_fmr->fmr_max_pages = save_fmr.fmr_max_pages;
+ e_fmr->fmr_max_maps = save_fmr.fmr_max_maps;
+ e_fmr->fmr_map_cnt = save_fmr.fmr_map_cnt;
+ e_fmr->acl = save_fmr.acl;
+
+ memset(&pginfo, 0, sizeof(pginfo));
+ pginfo.type = EHCA_MR_PGI_FMR;
+ ret = ehca_reg_mr(shca, e_fmr, NULL,
+ (e_fmr->fmr_max_pages * e_fmr->fmr_page_size),
+ e_fmr->acl, e_pd, &pginfo, &tmp_lkey,
+ &tmp_rkey);
+ if (ret) {
+ u32 offset = (u64)(&e_fmr->flags) - (u64)e_fmr;
+ memcpy(&e_fmr->flags, &(save_mr.flags),
+ sizeof(struct ehca_mr) - offset);
+ }
+
+ehca_unmap_one_fmr_exit0:
+ if (ret)
+ ehca_err(&shca->ib_device, "ret=%i tmp_lkey=%x tmp_rkey=%x "
+ "fmr_max_pages=%x",
+ ret, tmp_lkey, tmp_rkey, e_fmr->fmr_max_pages);
+ return ret;
+} /* end ehca_unmap_one_fmr() */
+
+/*----------------------------------------------------------------------*/
+
+int ehca_reg_smr(struct ehca_shca *shca,
+ struct ehca_mr *e_origmr,
+ struct ehca_mr *e_newmr,
+ u64 *iova_start,
+ int acl,
+ struct ehca_pd *e_pd,
+ u32 *lkey, /*OUT*/
+ u32 *rkey) /*OUT*/
+{
+ int ret = 0;
+ u64 h_ret;
+ u32 hipz_acl;
+ struct ehca_mr_hipzout_parms hipzout;
+
+ ehca_mrmw_map_acl(acl, &hipz_acl);
+ ehca_mrmw_set_pgsize_hipz_acl(e_origmr->hwpage_size, &hipz_acl);
+
+ h_ret = hipz_h_register_smr(shca->ipz_hca_handle, e_newmr, e_origmr,
+ (u64)iova_start, hipz_acl, e_pd->fw_pd,
+ &hipzout);
+ if (h_ret != H_SUCCESS) {
+ ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%li "
+ "shca=%p e_origmr=%p e_newmr=%p iova_start=%p acl=%x "
+ "e_pd=%p hca_hndl=%lx mr_hndl=%lx lkey=%x",
+ h_ret, shca, e_origmr, e_newmr, iova_start, acl, e_pd,
+ shca->ipz_hca_handle.handle,
+ e_origmr->ipz_mr_handle.handle,
+ e_origmr->ib.ib_mr.lkey);
+ ret = ehca2ib_return_code(h_ret);
+ goto ehca_reg_smr_exit0;
+ }
+ /* successful registration */
+ e_newmr->num_kpages = e_origmr->num_kpages;
+ e_newmr->num_hwpages = e_origmr->num_hwpages;
+ e_newmr->hwpage_size = e_origmr->hwpage_size;
+ e_newmr->start = iova_start;
+ e_newmr->size = e_origmr->size;
+ e_newmr->acl = acl;
+ e_newmr->ipz_mr_handle = hipzout.handle;
+ *lkey = hipzout.lkey;
+ *rkey = hipzout.rkey;
+ return 0;
+
+ehca_reg_smr_exit0:
+ if (ret)
+ ehca_err(&shca->ib_device, "ret=%i shca=%p e_origmr=%p "
+ "e_newmr=%p iova_start=%p acl=%x e_pd=%p",
+ ret, shca, e_origmr, e_newmr, iova_start, acl, e_pd);
+ return ret;
+} /* end ehca_reg_smr() */
+
+/*----------------------------------------------------------------------*/
+
+/* register internal max-MR to internal SHCA */
+int ehca_reg_internal_maxmr(
+ struct ehca_shca *shca,
+ struct ehca_pd *e_pd,
+ struct ehca_mr **e_maxmr) /*OUT*/
+{
+ int ret;
+ struct ehca_mr *e_mr;
+ u64 *iova_start;
+ u64 size_maxmr;
+ struct ehca_mr_pginfo pginfo;
+ struct ib_phys_buf ib_pbuf;
+ u32 num_kpages;
+ u32 num_hwpages;
+ u64 hw_pgsize;
+
+ e_mr = ehca_mr_new();
+ if (!e_mr) {
+ ehca_err(&shca->ib_device, "out of memory");
+ ret = -ENOMEM;
+ goto ehca_reg_internal_maxmr_exit0;
+ }
+ e_mr->flags |= EHCA_MR_FLAG_MAXMR;
+
+ /* register internal max-MR on HCA */
+ size_maxmr = (u64)high_memory - PAGE_OFFSET;
+ iova_start = (u64 *)KERNELBASE;
+ ib_pbuf.addr = 0;
+ ib_pbuf.size = size_maxmr;
+ num_kpages = NUM_CHUNKS(((u64)iova_start % PAGE_SIZE) + size_maxmr,
+ PAGE_SIZE);
+ hw_pgsize = ehca_get_max_hwpage_size(shca);
+ num_hwpages = NUM_CHUNKS(((u64)iova_start % hw_pgsize) + size_maxmr,
+ hw_pgsize);
+
+ memset(&pginfo, 0, sizeof(pginfo));
+ pginfo.type = EHCA_MR_PGI_PHYS;
+ pginfo.num_kpages = num_kpages;
+ pginfo.num_hwpages = num_hwpages;
+ pginfo.hwpage_size = hw_pgsize;
+ pginfo.u.phy.num_phys_buf = 1;
+ pginfo.u.phy.phys_buf_array = &ib_pbuf;
+
+ ret = ehca_reg_mr(shca, e_mr, iova_start, size_maxmr, 0, e_pd,
+ &pginfo, &e_mr->ib.ib_mr.lkey,
+ &e_mr->ib.ib_mr.rkey);
+ if (ret) {
+ ehca_err(&shca->ib_device, "reg of internal max MR failed, "
+ "e_mr=%p iova_start=%p size_maxmr=%lx num_kpages=%x "
+ "num_hwpages=%x", e_mr, iova_start, size_maxmr,
+ num_kpages, num_hwpages);
+ goto ehca_reg_internal_maxmr_exit1;
+ }
+
+ /* successful registration of all pages */
+ e_mr->ib.ib_mr.device = e_pd->ib_pd.device;
+ e_mr->ib.ib_mr.pd = &e_pd->ib_pd;
+ e_mr->ib.ib_mr.uobject = NULL;
+ atomic_inc(&(e_pd->ib_pd.usecnt));
+ atomic_set(&(e_mr->ib.ib_mr.usecnt), 0);
+ *e_maxmr = e_mr;
+ return 0;
+
+ehca_reg_internal_maxmr_exit1:
+ ehca_mr_delete(e_mr);
+ehca_reg_internal_maxmr_exit0:
+ if (ret)
+ ehca_err(&shca->ib_device, "ret=%i shca=%p e_pd=%p e_maxmr=%p",
+ ret, shca, e_pd, e_maxmr);
+ return ret;
+} /* end ehca_reg_internal_maxmr() */
+
+/*----------------------------------------------------------------------*/
+
+int ehca_reg_maxmr(struct ehca_shca *shca,
+ struct ehca_mr *e_newmr,
+ u64 *iova_start,
+ int acl,
+ struct ehca_pd *e_pd,
+ u32 *lkey,
+ u32 *rkey)
+{
+ u64 h_ret;
+ struct ehca_mr *e_origmr = shca->maxmr;
+ u32 hipz_acl;
+ struct ehca_mr_hipzout_parms hipzout;
+
+ ehca_mrmw_map_acl(acl, &hipz_acl);
+ ehca_mrmw_set_pgsize_hipz_acl(e_origmr->hwpage_size, &hipz_acl);
+
+ h_ret = hipz_h_register_smr(shca->ipz_hca_handle, e_newmr, e_origmr,
+ (u64)iova_start, hipz_acl, e_pd->fw_pd,
+ &hipzout);
+ if (h_ret != H_SUCCESS) {
+ ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%li "
+ "e_origmr=%p hca_hndl=%lx mr_hndl=%lx lkey=%x",
+ h_ret, e_origmr, shca->ipz_hca_handle.handle,
+ e_origmr->ipz_mr_handle.handle,
+ e_origmr->ib.ib_mr.lkey);
+ return ehca2ib_return_code(h_ret);
+ }
+ /* successful registration */
+ e_newmr->num_kpages = e_origmr->num_kpages;
+ e_newmr->num_hwpages = e_origmr->num_hwpages;
+ e_newmr->hwpage_size = e_origmr->hwpage_size;
+ e_newmr->start = iova_start;
+ e_newmr->size = e_origmr->size;
+ e_newmr->acl = acl;
+ e_newmr->ipz_mr_handle = hipzout.handle;
+ *lkey = hipzout.lkey;
+ *rkey = hipzout.rkey;
+ return 0;
+} /* end ehca_reg_maxmr() */
+
+/*----------------------------------------------------------------------*/
+
+int ehca_dereg_internal_maxmr(struct ehca_shca *shca)
+{
+ int ret;
+ struct ehca_mr *e_maxmr;
+ struct ib_pd *ib_pd;
+
+ if (!shca->maxmr) {
+ ehca_err(&shca->ib_device, "bad call, shca=%p", shca);
+ ret = -EINVAL;
+ goto ehca_dereg_internal_maxmr_exit0;
+ }
+
+ e_maxmr = shca->maxmr;
+ ib_pd = e_maxmr->ib.ib_mr.pd;
+ shca->maxmr = NULL; /* remove internal max-MR indication from SHCA */
+
+ ret = ehca_dereg_mr(&e_maxmr->ib.ib_mr);
+ if (ret) {
+ ehca_err(&shca->ib_device, "dereg internal max-MR failed, "
+ "ret=%i e_maxmr=%p shca=%p lkey=%x",
+ ret, e_maxmr, shca, e_maxmr->ib.ib_mr.lkey);
+ shca->maxmr = e_maxmr;
+ goto ehca_dereg_internal_maxmr_exit0;
+ }
+
+ atomic_dec(&ib_pd->usecnt);
+
+ehca_dereg_internal_maxmr_exit0:
+ if (ret)
+ ehca_err(&shca->ib_device, "ret=%i shca=%p shca->maxmr=%p",
+ ret, shca, shca->maxmr);
+ return ret;
+} /* end ehca_dereg_internal_maxmr() */
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * check physical buffer array of MR verbs for validness and
+ * calculates MR size
+ */
+int ehca_mr_chk_buf_and_calc_size(struct ib_phys_buf *phys_buf_array,
+ int num_phys_buf,
+ u64 *iova_start,
+ u64 *size)
+{
+ struct ib_phys_buf *pbuf = phys_buf_array;
+ u64 size_count = 0;
+ u32 i;
+
+ if (num_phys_buf == 0) {
+ ehca_gen_err("bad phys buf array len, num_phys_buf=0");
+ return -EINVAL;
+ }
+ /* check first buffer */
+ if (((u64)iova_start & ~PAGE_MASK) != (pbuf->addr & ~PAGE_MASK)) {
+ ehca_gen_err("iova_start/addr mismatch, iova_start=%p "
+ "pbuf->addr=%lx pbuf->size=%lx",
+ iova_start, pbuf->addr, pbuf->size);
+ return -EINVAL;
+ }
+ if (((pbuf->addr + pbuf->size) % PAGE_SIZE) &&
+ (num_phys_buf > 1)) {
+ ehca_gen_err("addr/size mismatch in 1st buf, pbuf->addr=%lx "
+ "pbuf->size=%lx", pbuf->addr, pbuf->size);
+ return -EINVAL;
+ }
+
+ for (i = 0; i < num_phys_buf; i++) {
+ if ((i > 0) && (pbuf->addr % PAGE_SIZE)) {
+ ehca_gen_err("bad address, i=%x pbuf->addr=%lx "
+ "pbuf->size=%lx",
+ i, pbuf->addr, pbuf->size);
+ return -EINVAL;
+ }
+ if (((i > 0) && /* not 1st */
+ (i < (num_phys_buf - 1)) && /* not last */
+ (pbuf->size % PAGE_SIZE)) || (pbuf->size == 0)) {
+ ehca_gen_err("bad size, i=%x pbuf->size=%lx",
+ i, pbuf->size);
+ return -EINVAL;
+ }
+ size_count += pbuf->size;
+ pbuf++;
+ }
+
+ *size = size_count;
+ return 0;
+} /* end ehca_mr_chk_buf_and_calc_size() */
+
+/*----------------------------------------------------------------------*/
+
+/* check page list of map FMR verb for validness */
+int ehca_fmr_check_page_list(struct ehca_mr *e_fmr,
+ u64 *page_list,
+ int list_len)
+{
+ u32 i;
+ u64 *page;
+
+ if ((list_len == 0) || (list_len > e_fmr->fmr_max_pages)) {
+ ehca_gen_err("bad list_len, list_len=%x "
+ "e_fmr->fmr_max_pages=%x fmr=%p",
+ list_len, e_fmr->fmr_max_pages, e_fmr);
+ return -EINVAL;
+ }
+
+ /* each page must be aligned */
+ page = page_list;
+ for (i = 0; i < list_len; i++) {
+ if (*page % e_fmr->fmr_page_size) {
+ ehca_gen_err("bad page, i=%x *page=%lx page=%p fmr=%p "
+ "fmr_page_size=%x", i, *page, page, e_fmr,
+ e_fmr->fmr_page_size);
+ return -EINVAL;
+ }
+ page++;
+ }
+
+ return 0;
+} /* end ehca_fmr_check_page_list() */
+
+/*----------------------------------------------------------------------*/
+
+/* PAGE_SIZE >= pginfo->hwpage_size */
+static int ehca_set_pagebuf_user1(struct ehca_mr_pginfo *pginfo,
+ u32 number,
+ u64 *kpage)
+{
+ int ret = 0;
+ struct ib_umem_chunk *prev_chunk;
+ struct ib_umem_chunk *chunk;
+ u64 pgaddr;
+ u32 i = 0;
+ u32 j = 0;
+ int hwpages_per_kpage = PAGE_SIZE / pginfo->hwpage_size;
+
+ /* loop over desired chunk entries */
+ chunk = pginfo->u.usr.next_chunk;
+ prev_chunk = pginfo->u.usr.next_chunk;
+ list_for_each_entry_continue(
+ chunk, (&(pginfo->u.usr.region->chunk_list)), list) {
+ for (i = pginfo->u.usr.next_nmap; i < chunk->nmap; ) {
+ pgaddr = page_to_pfn(sg_page(&chunk->page_list[i]))
+ << PAGE_SHIFT ;
+ *kpage = phys_to_abs(pgaddr +
+ (pginfo->next_hwpage *
+ pginfo->hwpage_size));
+ if ( !(*kpage) ) {
+ ehca_gen_err("pgaddr=%lx "
+ "chunk->page_list[i]=%lx "
+ "i=%x next_hwpage=%lx",
+ pgaddr, (u64)sg_dma_address(
+ &chunk->page_list[i]),
+ i, pginfo->next_hwpage);
+ return -EFAULT;
+ }
+ (pginfo->hwpage_cnt)++;
+ (pginfo->next_hwpage)++;
+ kpage++;
+ if (pginfo->next_hwpage % hwpages_per_kpage == 0) {
+ (pginfo->kpage_cnt)++;
+ (pginfo->u.usr.next_nmap)++;
+ pginfo->next_hwpage = 0;
+ i++;
+ }
+ j++;
+ if (j >= number) break;
+ }
+ if ((pginfo->u.usr.next_nmap >= chunk->nmap) &&
+ (j >= number)) {
+ pginfo->u.usr.next_nmap = 0;
+ prev_chunk = chunk;
+ break;
+ } else if (pginfo->u.usr.next_nmap >= chunk->nmap) {
+ pginfo->u.usr.next_nmap = 0;
+ prev_chunk = chunk;
+ } else if (j >= number)
+ break;
+ else
+ prev_chunk = chunk;
+ }
+ pginfo->u.usr.next_chunk =
+ list_prepare_entry(prev_chunk,
+ (&(pginfo->u.usr.region->chunk_list)),
+ list);
+ return ret;
+}
+
+/*
+ * check given pages for contiguous layout
+ * last page addr is returned in prev_pgaddr for further check
+ */
+static int ehca_check_kpages_per_ate(struct scatterlist *page_list,
+ int start_idx, int end_idx,
+ u64 *prev_pgaddr)
+{
+ int t;
+ for (t = start_idx; t <= end_idx; t++) {
+ u64 pgaddr = page_to_pfn(sg_page(&page_list[t])) << PAGE_SHIFT;
+ if (ehca_debug_level >= 3)
+ ehca_gen_dbg("chunk_page=%lx value=%016lx", pgaddr,
+ *(u64 *)abs_to_virt(phys_to_abs(pgaddr)));
+ if (pgaddr - PAGE_SIZE != *prev_pgaddr) {
+ ehca_gen_err("uncontiguous page found pgaddr=%lx "
+ "prev_pgaddr=%lx page_list_i=%x",
+ pgaddr, *prev_pgaddr, t);
+ return -EINVAL;
+ }
+ *prev_pgaddr = pgaddr;
+ }
+ return 0;
+}
+
+/* PAGE_SIZE < pginfo->hwpage_size */
+static int ehca_set_pagebuf_user2(struct ehca_mr_pginfo *pginfo,
+ u32 number,
+ u64 *kpage)
+{
+ int ret = 0;
+ struct ib_umem_chunk *prev_chunk;
+ struct ib_umem_chunk *chunk;
+ u64 pgaddr, prev_pgaddr;
+ u32 i = 0;
+ u32 j = 0;
+ int kpages_per_hwpage = pginfo->hwpage_size / PAGE_SIZE;
+ int nr_kpages = kpages_per_hwpage;
+
+ /* loop over desired chunk entries */
+ chunk = pginfo->u.usr.next_chunk;
+ prev_chunk = pginfo->u.usr.next_chunk;
+ list_for_each_entry_continue(
+ chunk, (&(pginfo->u.usr.region->chunk_list)), list) {
+ for (i = pginfo->u.usr.next_nmap; i < chunk->nmap; ) {
+ if (nr_kpages == kpages_per_hwpage) {
+ pgaddr = ( page_to_pfn(sg_page(&chunk->page_list[i]))
+ << PAGE_SHIFT );
+ *kpage = phys_to_abs(pgaddr);
+ if ( !(*kpage) ) {
+ ehca_gen_err("pgaddr=%lx i=%x",
+ pgaddr, i);
+ ret = -EFAULT;
+ return ret;
+ }
+ /*
+ * The first page in a hwpage must be aligned;
+ * the first MR page is exempt from this rule.
+ */
+ if (pgaddr & (pginfo->hwpage_size - 1)) {
+ if (pginfo->hwpage_cnt) {
+ ehca_gen_err(
+ "invalid alignment "
+ "pgaddr=%lx i=%x "
+ "mr_pgsize=%lx",
+ pgaddr, i,
+ pginfo->hwpage_size);
+ ret = -EFAULT;
+ return ret;
+ }
+ /* first MR page */
+ pginfo->kpage_cnt =
+ (pgaddr &
+ (pginfo->hwpage_size - 1)) >>
+ PAGE_SHIFT;
+ nr_kpages -= pginfo->kpage_cnt;
+ *kpage = phys_to_abs(
+ pgaddr &
+ ~(pginfo->hwpage_size - 1));
+ }
+ if (ehca_debug_level >= 3) {
+ u64 val = *(u64 *)abs_to_virt(
+ phys_to_abs(pgaddr));
+ ehca_gen_dbg("kpage=%lx chunk_page=%lx "
+ "value=%016lx",
+ *kpage, pgaddr, val);
+ }
+ prev_pgaddr = pgaddr;
+ i++;
+ pginfo->kpage_cnt++;
+ pginfo->u.usr.next_nmap++;
+ nr_kpages--;
+ if (!nr_kpages)
+ goto next_kpage;
+ continue;
+ }
+ if (i + nr_kpages > chunk->nmap) {
+ ret = ehca_check_kpages_per_ate(
+ chunk->page_list, i,
+ chunk->nmap - 1, &prev_pgaddr);
+ if (ret) return ret;
+ pginfo->kpage_cnt += chunk->nmap - i;
+ pginfo->u.usr.next_nmap += chunk->nmap - i;
+ nr_kpages -= chunk->nmap - i;
+ break;
+ }
+
+ ret = ehca_check_kpages_per_ate(chunk->page_list, i,
+ i + nr_kpages - 1,
+ &prev_pgaddr);
+ if (ret) return ret;
+ i += nr_kpages;
+ pginfo->kpage_cnt += nr_kpages;
+ pginfo->u.usr.next_nmap += nr_kpages;
+next_kpage:
+ nr_kpages = kpages_per_hwpage;
+ (pginfo->hwpage_cnt)++;
+ kpage++;
+ j++;
+ if (j >= number) break;
+ }
+ if ((pginfo->u.usr.next_nmap >= chunk->nmap) &&
+ (j >= number)) {
+ pginfo->u.usr.next_nmap = 0;
+ prev_chunk = chunk;
+ break;
+ } else if (pginfo->u.usr.next_nmap >= chunk->nmap) {
+ pginfo->u.usr.next_nmap = 0;
+ prev_chunk = chunk;
+ } else if (j >= number)
+ break;
+ else
+ prev_chunk = chunk;
+ }
+ pginfo->u.usr.next_chunk =
+ list_prepare_entry(prev_chunk,
+ (&(pginfo->u.usr.region->chunk_list)),
+ list);
+ return ret;
+}
+
+static int ehca_set_pagebuf_phys(struct ehca_mr_pginfo *pginfo,
+ u32 number, u64 *kpage)
+{
+ int ret = 0;
+ struct ib_phys_buf *pbuf;
+ u64 num_hw, offs_hw;
+ u32 i = 0;
+
+ /* loop over desired phys_buf_array entries */
+ while (i < number) {
+ pbuf = pginfo->u.phy.phys_buf_array + pginfo->u.phy.next_buf;
+ num_hw = NUM_CHUNKS((pbuf->addr % pginfo->hwpage_size) +
+ pbuf->size, pginfo->hwpage_size);
+ offs_hw = (pbuf->addr & ~(pginfo->hwpage_size - 1)) /
+ pginfo->hwpage_size;
+ while (pginfo->next_hwpage < offs_hw + num_hw) {
+ /* sanity check */
+ if ((pginfo->kpage_cnt >= pginfo->num_kpages) ||
+ (pginfo->hwpage_cnt >= pginfo->num_hwpages)) {
+ ehca_gen_err("kpage_cnt >= num_kpages, "
+ "kpage_cnt=%lx num_kpages=%lx "
+ "hwpage_cnt=%lx "
+ "num_hwpages=%lx i=%x",
+ pginfo->kpage_cnt,
+ pginfo->num_kpages,
+ pginfo->hwpage_cnt,
+ pginfo->num_hwpages, i);
+ return -EFAULT;
+ }
+ *kpage = phys_to_abs(
+ (pbuf->addr & ~(pginfo->hwpage_size - 1)) +
+ (pginfo->next_hwpage * pginfo->hwpage_size));
+ if ( !(*kpage) && pbuf->addr ) {
+ ehca_gen_err("pbuf->addr=%lx pbuf->size=%lx "
+ "next_hwpage=%lx", pbuf->addr,
+ pbuf->size, pginfo->next_hwpage);
+ return -EFAULT;
+ }
+ (pginfo->hwpage_cnt)++;
+ (pginfo->next_hwpage)++;
+ if (PAGE_SIZE >= pginfo->hwpage_size) {
+ if (pginfo->next_hwpage %
+ (PAGE_SIZE / pginfo->hwpage_size) == 0)
+ (pginfo->kpage_cnt)++;
+ } else
+ pginfo->kpage_cnt += pginfo->hwpage_size /
+ PAGE_SIZE;
+ kpage++;
+ i++;
+ if (i >= number) break;
+ }
+ if (pginfo->next_hwpage >= offs_hw + num_hw) {
+ (pginfo->u.phy.next_buf)++;
+ pginfo->next_hwpage = 0;
+ }
+ }
+ return ret;
+}
+
+static int ehca_set_pagebuf_fmr(struct ehca_mr_pginfo *pginfo,
+ u32 number, u64 *kpage)
+{
+ int ret = 0;
+ u64 *fmrlist;
+ u32 i;
+
+ /* loop over desired page_list entries */
+ fmrlist = pginfo->u.fmr.page_list + pginfo->u.fmr.next_listelem;
+ for (i = 0; i < number; i++) {
+ *kpage = phys_to_abs((*fmrlist & ~(pginfo->hwpage_size - 1)) +
+ pginfo->next_hwpage * pginfo->hwpage_size);
+ if ( !(*kpage) ) {
+ ehca_gen_err("*fmrlist=%lx fmrlist=%p "
+ "next_listelem=%lx next_hwpage=%lx",
+ *fmrlist, fmrlist,
+ pginfo->u.fmr.next_listelem,
+ pginfo->next_hwpage);
+ return -EFAULT;
+ }
+ (pginfo->hwpage_cnt)++;
+ if (pginfo->u.fmr.fmr_pgsize >= pginfo->hwpage_size) {
+ if (pginfo->next_hwpage %
+ (pginfo->u.fmr.fmr_pgsize /
+ pginfo->hwpage_size) == 0) {
+ (pginfo->kpage_cnt)++;
+ (pginfo->u.fmr.next_listelem)++;
+ fmrlist++;
+ pginfo->next_hwpage = 0;
+ } else
+ (pginfo->next_hwpage)++;
+ } else {
+ unsigned int cnt_per_hwpage = pginfo->hwpage_size /
+ pginfo->u.fmr.fmr_pgsize;
+ unsigned int j;
+ u64 prev = *kpage;
+ /* check if adrs are contiguous */
+ for (j = 1; j < cnt_per_hwpage; j++) {
+ u64 p = phys_to_abs(fmrlist[j] &
+ ~(pginfo->hwpage_size - 1));
+ if (prev + pginfo->u.fmr.fmr_pgsize != p) {
+ ehca_gen_err("uncontiguous fmr pages "
+ "found prev=%lx p=%lx "
+ "idx=%x", prev, p, i + j);
+ return -EINVAL;
+ }
+ prev = p;
+ }
+ pginfo->kpage_cnt += cnt_per_hwpage;
+ pginfo->u.fmr.next_listelem += cnt_per_hwpage;
+ fmrlist += cnt_per_hwpage;
+ }
+ kpage++;
+ }
+ return ret;
+}
+
+/* setup page buffer from page info */
+int ehca_set_pagebuf(struct ehca_mr_pginfo *pginfo,
+ u32 number,
+ u64 *kpage)
+{
+ int ret;
+
+ switch (pginfo->type) {
+ case EHCA_MR_PGI_PHYS:
+ ret = ehca_set_pagebuf_phys(pginfo, number, kpage);
+ break;
+ case EHCA_MR_PGI_USER:
+ ret = PAGE_SIZE >= pginfo->hwpage_size ?
+ ehca_set_pagebuf_user1(pginfo, number, kpage) :
+ ehca_set_pagebuf_user2(pginfo, number, kpage);
+ break;
+ case EHCA_MR_PGI_FMR:
+ ret = ehca_set_pagebuf_fmr(pginfo, number, kpage);
+ break;
+ default:
+ ehca_gen_err("bad pginfo->type=%x", pginfo->type);
+ ret = -EFAULT;
+ break;
+ }
+ return ret;
+} /* end ehca_set_pagebuf() */
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * check MR if it is a max-MR, i.e. uses whole memory
+ * in case it's a max-MR 1 is returned, else 0
+ */
+int ehca_mr_is_maxmr(u64 size,
+ u64 *iova_start)
+{
+ /* a MR is treated as max-MR only if it fits following: */
+ if ((size == ((u64)high_memory - PAGE_OFFSET)) &&
+ (iova_start == (void *)KERNELBASE)) {
+ ehca_gen_dbg("this is a max-MR");
+ return 1;
+ } else
+ return 0;
+} /* end ehca_mr_is_maxmr() */
+
+/*----------------------------------------------------------------------*/
+
+/* map access control for MR/MW. This routine is used for MR and MW. */
+void ehca_mrmw_map_acl(int ib_acl,
+ u32 *hipz_acl)
+{
+ *hipz_acl = 0;
+ if (ib_acl & IB_ACCESS_REMOTE_READ)
+ *hipz_acl |= HIPZ_ACCESSCTRL_R_READ;
+ if (ib_acl & IB_ACCESS_REMOTE_WRITE)
+ *hipz_acl |= HIPZ_ACCESSCTRL_R_WRITE;
+ if (ib_acl & IB_ACCESS_REMOTE_ATOMIC)
+ *hipz_acl |= HIPZ_ACCESSCTRL_R_ATOMIC;
+ if (ib_acl & IB_ACCESS_LOCAL_WRITE)
+ *hipz_acl |= HIPZ_ACCESSCTRL_L_WRITE;
+ if (ib_acl & IB_ACCESS_MW_BIND)
+ *hipz_acl |= HIPZ_ACCESSCTRL_MW_BIND;
+} /* end ehca_mrmw_map_acl() */
+
+/*----------------------------------------------------------------------*/
+
+/* sets page size in hipz access control for MR/MW. */
+void ehca_mrmw_set_pgsize_hipz_acl(u32 pgsize, u32 *hipz_acl) /*INOUT*/
+{
+ *hipz_acl |= (ehca_encode_hwpage_size(pgsize) << 24);
+} /* end ehca_mrmw_set_pgsize_hipz_acl() */
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * reverse map access control for MR/MW.
+ * This routine is used for MR and MW.
+ */
+void ehca_mrmw_reverse_map_acl(const u32 *hipz_acl,
+ int *ib_acl) /*OUT*/
+{
+ *ib_acl = 0;
+ if (*hipz_acl & HIPZ_ACCESSCTRL_R_READ)
+ *ib_acl |= IB_ACCESS_REMOTE_READ;
+ if (*hipz_acl & HIPZ_ACCESSCTRL_R_WRITE)
+ *ib_acl |= IB_ACCESS_REMOTE_WRITE;
+ if (*hipz_acl & HIPZ_ACCESSCTRL_R_ATOMIC)
+ *ib_acl |= IB_ACCESS_REMOTE_ATOMIC;
+ if (*hipz_acl & HIPZ_ACCESSCTRL_L_WRITE)
+ *ib_acl |= IB_ACCESS_LOCAL_WRITE;
+ if (*hipz_acl & HIPZ_ACCESSCTRL_MW_BIND)
+ *ib_acl |= IB_ACCESS_MW_BIND;
+} /* end ehca_mrmw_reverse_map_acl() */
+
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * MR destructor and constructor
+ * used in Reregister MR verb, sets all fields in ehca_mr_t to 0,
+ * except struct ib_mr and spinlock
+ */
+void ehca_mr_deletenew(struct ehca_mr *mr)
+{
+ mr->flags = 0;
+ mr->num_kpages = 0;
+ mr->num_hwpages = 0;
+ mr->acl = 0;
+ mr->start = NULL;
+ mr->fmr_page_size = 0;
+ mr->fmr_max_pages = 0;
+ mr->fmr_max_maps = 0;
+ mr->fmr_map_cnt = 0;
+ memset(&mr->ipz_mr_handle, 0, sizeof(mr->ipz_mr_handle));
+ memset(&mr->galpas, 0, sizeof(mr->galpas));
+} /* end ehca_mr_deletenew() */
+
+int ehca_init_mrmw_cache(void)
+{
+ mr_cache = kmem_cache_create("ehca_cache_mr",
+ sizeof(struct ehca_mr), 0,
+ SLAB_HWCACHE_ALIGN,
+ NULL);
+ if (!mr_cache)
+ return -ENOMEM;
+ mw_cache = kmem_cache_create("ehca_cache_mw",
+ sizeof(struct ehca_mw), 0,
+ SLAB_HWCACHE_ALIGN,
+ NULL);
+ if (!mw_cache) {
+ kmem_cache_destroy(mr_cache);
+ mr_cache = NULL;
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+void ehca_cleanup_mrmw_cache(void)
+{
+ if (mr_cache)
+ kmem_cache_destroy(mr_cache);
+ if (mw_cache)
+ kmem_cache_destroy(mw_cache);
+}
diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.h b/drivers/infiniband/hw/ehca/ehca_mrmw.h
new file mode 100644
index 0000000..bc8f4e3
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_mrmw.h
@@ -0,0 +1,121 @@
+/*
+ * IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ * MR/MW declarations and inline functions
+ *
+ * Authors: Dietmar Decker <ddecker@de.ibm.com>
+ * Christoph Raisch <raisch@de.ibm.com>
+ *
+ * Copyright (c) 2005 IBM Corporation
+ *
+ * All rights reserved.
+ *
+ * This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ * BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _EHCA_MRMW_H_
+#define _EHCA_MRMW_H_
+
+int ehca_reg_mr(struct ehca_shca *shca,
+ struct ehca_mr *e_mr,
+ u64 *iova_start,
+ u64 size,
+ int acl,
+ struct ehca_pd *e_pd,
+ struct ehca_mr_pginfo *pginfo,
+ u32 *lkey,
+ u32 *rkey);
+
+int ehca_reg_mr_rpages(struct ehca_shca *shca,
+ struct ehca_mr *e_mr,
+ struct ehca_mr_pginfo *pginfo);
+
+int ehca_rereg_mr(struct ehca_shca *shca,
+ struct ehca_mr *e_mr,
+ u64 *iova_start,
+ u64 size,
+ int mr_access_flags,
+ struct ehca_pd *e_pd,
+ struct ehca_mr_pginfo *pginfo,
+ u32 *lkey,
+ u32 *rkey);
+
+int ehca_unmap_one_fmr(struct ehca_shca *shca,
+ struct ehca_mr *e_fmr);
+
+int ehca_reg_smr(struct ehca_shca *shca,
+ struct ehca_mr *e_origmr,
+ struct ehca_mr *e_newmr,
+ u64 *iova_start,
+ int acl,
+ struct ehca_pd *e_pd,
+ u32 *lkey,
+ u32 *rkey);
+
+int ehca_reg_internal_maxmr(struct ehca_shca *shca,
+ struct ehca_pd *e_pd,
+ struct ehca_mr **maxmr);
+
+int ehca_reg_maxmr(struct ehca_shca *shca,
+ struct ehca_mr *e_newmr,
+ u64 *iova_start,
+ int acl,
+ struct ehca_pd *e_pd,
+ u32 *lkey,
+ u32 *rkey);
+
+int ehca_dereg_internal_maxmr(struct ehca_shca *shca);
+
+int ehca_mr_chk_buf_and_calc_size(struct ib_phys_buf *phys_buf_array,
+ int num_phys_buf,
+ u64 *iova_start,
+ u64 *size);
+
+int ehca_fmr_check_page_list(struct ehca_mr *e_fmr,
+ u64 *page_list,
+ int list_len);
+
+int ehca_set_pagebuf(struct ehca_mr_pginfo *pginfo,
+ u32 number,
+ u64 *kpage);
+
+int ehca_mr_is_maxmr(u64 size,
+ u64 *iova_start);
+
+void ehca_mrmw_map_acl(int ib_acl,
+ u32 *hipz_acl);
+
+void ehca_mrmw_set_pgsize_hipz_acl(u32 pgsize, u32 *hipz_acl);
+
+void ehca_mrmw_reverse_map_acl(const u32 *hipz_acl,
+ int *ib_acl);
+
+void ehca_mr_deletenew(struct ehca_mr *mr);
+
+#endif /*_EHCA_MRMW_H_*/
diff --git a/drivers/infiniband/hw/ehca/ehca_pd.c b/drivers/infiniband/hw/ehca/ehca_pd.c
new file mode 100644
index 0000000..2fe5548
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_pd.c
@@ -0,0 +1,122 @@
+/*
+ * IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ * PD functions
+ *
+ * Authors: Christoph Raisch <raisch@de.ibm.com>
+ *
+ * Copyright (c) 2005 IBM Corporation
+ *
+ * All rights reserved.
+ *
+ * This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ * BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "ehca_tools.h"
+#include "ehca_iverbs.h"
+
+static struct kmem_cache *pd_cache;
+
+struct ib_pd *ehca_alloc_pd(struct ib_device *device,
+ struct ib_ucontext *context, struct ib_udata *udata)
+{
+ struct ehca_pd *pd;
+ int i;
+
+ pd = kmem_cache_zalloc(pd_cache, GFP_KERNEL);
+ if (!pd) {
+ ehca_err(device, "device=%p context=%p out of memory",
+ device, context);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ for (i = 0; i < 2; i++) {
+ INIT_LIST_HEAD(&pd->free[i]);
+ INIT_LIST_HEAD(&pd->full[i]);
+ }
+ mutex_init(&pd->lock);
+
+ /*
+ * Kernel PD: when device = -1, 0
+ * User PD: when context != -1
+ */
+ if (!context) {
+ /*
+ * Kernel PDs after init reuses always
+ * the one created in ehca_shca_reopen()
+ */
+ struct ehca_shca *shca = container_of(device, struct ehca_shca,
+ ib_device);
+ pd->fw_pd.value = shca->pd->fw_pd.value;
+ } else
+ pd->fw_pd.value = (u64)pd;
+
+ return &pd->ib_pd;
+}
+
+int ehca_dealloc_pd(struct ib_pd *pd)
+{
+ struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd);
+ int i, leftovers = 0;
+ struct ipz_small_queue_page *page, *tmp;
+
+ for (i = 0; i < 2; i++) {
+ list_splice(&my_pd->full[i], &my_pd->free[i]);
+ list_for_each_entry_safe(page, tmp, &my_pd->free[i], list) {
+ leftovers = 1;
+ free_page(page->page);
+ kmem_cache_free(small_qp_cache, page);
+ }
+ }
+
+ if (leftovers)
+ ehca_warn(pd->device,
+ "Some small queue pages were not freed");
+
+ kmem_cache_free(pd_cache, my_pd);
+
+ return 0;
+}
+
+int ehca_init_pd_cache(void)
+{
+ pd_cache = kmem_cache_create("ehca_cache_pd",
+ sizeof(struct ehca_pd), 0,
+ SLAB_HWCACHE_ALIGN,
+ NULL);
+ if (!pd_cache)
+ return -ENOMEM;
+ return 0;
+}
+
+void ehca_cleanup_pd_cache(void)
+{
+ if (pd_cache)
+ kmem_cache_destroy(pd_cache);
+}
diff --git a/drivers/infiniband/hw/ehca/ehca_qes.h b/drivers/infiniband/hw/ehca/ehca_qes.h
new file mode 100644
index 0000000..5d28e3e
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_qes.h
@@ -0,0 +1,260 @@
+/*
+ * IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ * Hardware request structures
+ *
+ * Authors: Waleri Fomin <fomin@de.ibm.com>
+ * Reinhard Ernst <rernst@de.ibm.com>
+ * Christoph Raisch <raisch@de.ibm.com>
+ *
+ * Copyright (c) 2005 IBM Corporation
+ *
+ * All rights reserved.
+ *
+ * This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ * BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#ifndef _EHCA_QES_H_
+#define _EHCA_QES_H_
+
+#include "ehca_tools.h"
+
+/* virtual scatter gather entry to specify remote adresses with length */
+struct ehca_vsgentry {
+ u64 vaddr;
+ u32 lkey;
+ u32 length;
+};
+
+#define GRH_FLAG_MASK EHCA_BMASK_IBM( 7, 7)
+#define GRH_IPVERSION_MASK EHCA_BMASK_IBM( 0, 3)
+#define GRH_TCLASS_MASK EHCA_BMASK_IBM( 4, 12)
+#define GRH_FLOWLABEL_MASK EHCA_BMASK_IBM(13, 31)
+#define GRH_PAYLEN_MASK EHCA_BMASK_IBM(32, 47)
+#define GRH_NEXTHEADER_MASK EHCA_BMASK_IBM(48, 55)
+#define GRH_HOPLIMIT_MASK EHCA_BMASK_IBM(56, 63)
+
+/*
+ * Unreliable Datagram Address Vector Format
+ * see IBTA Vol1 chapter 8.3 Global Routing Header
+ */
+struct ehca_ud_av {
+ u8 sl;
+ u8 lnh;
+ u16 dlid;
+ u8 reserved1;
+ u8 reserved2;
+ u8 reserved3;
+ u8 slid_path_bits;
+ u8 reserved4;
+ u8 ipd;
+ u8 reserved5;
+ u8 pmtu;
+ u32 reserved6;
+ u64 reserved7;
+ union {
+ struct {
+ u64 word_0; /* always set to 6 */
+ /*should be 0x1B for IB transport */
+ u64 word_1;
+ u64 word_2;
+ u64 word_3;
+ u64 word_4;
+ } grh;
+ struct {
+ u32 wd_0;
+ u32 wd_1;
+ /* DWord_1 --> SGID */
+
+ u32 sgid_wd3;
+ u32 sgid_wd2;
+
+ u32 sgid_wd1;
+ u32 sgid_wd0;
+ /* DWord_3 --> DGID */
+
+ u32 dgid_wd3;
+ u32 dgid_wd2;
+
+ u32 dgid_wd1;
+ u32 dgid_wd0;
+ } grh_l;
+ };
+};
+
+/* maximum number of sg entries allowed in a WQE */
+#define MAX_WQE_SG_ENTRIES 252
+
+#define WQE_OPTYPE_SEND 0x80
+#define WQE_OPTYPE_RDMAREAD 0x40
+#define WQE_OPTYPE_RDMAWRITE 0x20
+#define WQE_OPTYPE_CMPSWAP 0x10
+#define WQE_OPTYPE_FETCHADD 0x08
+#define WQE_OPTYPE_BIND 0x04
+
+#define WQE_WRFLAG_REQ_SIGNAL_COM 0x80
+#define WQE_WRFLAG_FENCE 0x40
+#define WQE_WRFLAG_IMM_DATA_PRESENT 0x20
+#define WQE_WRFLAG_SOLIC_EVENT 0x10
+
+#define WQEF_CACHE_HINT 0x80
+#define WQEF_CACHE_HINT_RD_WR 0x40
+#define WQEF_TIMED_WQE 0x20
+#define WQEF_PURGE 0x08
+#define WQEF_HIGH_NIBBLE 0xF0
+
+#define MW_BIND_ACCESSCTRL_R_WRITE 0x40
+#define MW_BIND_ACCESSCTRL_R_READ 0x20
+#define MW_BIND_ACCESSCTRL_R_ATOMIC 0x10
+
+struct ehca_wqe {
+ u64 work_request_id;
+ u8 optype;
+ u8 wr_flag;
+ u16 pkeyi;
+ u8 wqef;
+ u8 nr_of_data_seg;
+ u16 wqe_provided_slid;
+ u32 destination_qp_number;
+ u32 resync_psn_sqp;
+ u32 local_ee_context_qkey;
+ u32 immediate_data;
+ union {
+ struct {
+ u64 remote_virtual_adress;
+ u32 rkey;
+ u32 reserved;
+ u64 atomic_1st_op_dma_len;
+ u64 atomic_2nd_op;
+ struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES];
+
+ } nud;
+ struct {
+ u64 ehca_ud_av_ptr;
+ u64 reserved1;
+ u64 reserved2;
+ u64 reserved3;
+ struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES];
+ } ud_avp;
+ struct {
+ struct ehca_ud_av ud_av;
+ struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES -
+ 2];
+ } ud_av;
+ struct {
+ u64 reserved0;
+ u64 reserved1;
+ u64 reserved2;
+ u64 reserved3;
+ struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES];
+ } all_rcv;
+
+ struct {
+ u64 reserved;
+ u32 rkey;
+ u32 old_rkey;
+ u64 reserved1;
+ u64 reserved2;
+ u64 virtual_address;
+ u32 reserved3;
+ u32 length;
+ u32 reserved4;
+ u16 reserved5;
+ u8 reserved6;
+ u8 lr_ctl;
+ u32 lkey;
+ u32 reserved7;
+ u64 reserved8;
+ u64 reserved9;
+ u64 reserved10;
+ u64 reserved11;
+ } bind;
+ struct {
+ u64 reserved12;
+ u64 reserved13;
+ u32 size;
+ u32 start;
+ } inline_data;
+ } u;
+
+};
+
+#define WC_SEND_RECEIVE EHCA_BMASK_IBM(0, 0)
+#define WC_IMM_DATA EHCA_BMASK_IBM(1, 1)
+#define WC_GRH_PRESENT EHCA_BMASK_IBM(2, 2)
+#define WC_SE_BIT EHCA_BMASK_IBM(3, 3)
+#define WC_STATUS_ERROR_BIT 0x80000000
+#define WC_STATUS_REMOTE_ERROR_FLAGS 0x0000F800
+#define WC_STATUS_PURGE_BIT 0x10
+#define WC_SEND_RECEIVE_BIT 0x80
+
+struct ehca_cqe {
+ u64 work_request_id;
+ u8 optype;
+ u8 w_completion_flags;
+ u16 reserved1;
+ u32 nr_bytes_transferred;
+ u32 immediate_data;
+ u32 local_qp_number;
+ u8 freed_resource_count;
+ u8 service_level;
+ u16 wqe_count;
+ u32 qp_token;
+ u32 qkey_ee_token;
+ u32 remote_qp_number;
+ u16 dlid;
+ u16 rlid;
+ u16 reserved2;
+ u16 pkey_index;
+ u32 cqe_timestamp;
+ u32 wqe_timestamp;
+ u8 wqe_timestamp_valid;
+ u8 reserved3;
+ u8 reserved4;
+ u8 cqe_flags;
+ u32 status;
+};
+
+struct ehca_eqe {
+ u64 entry;
+};
+
+struct ehca_mrte {
+ u64 starting_va;
+ u64 length; /* length of memory region in bytes*/
+ u32 pd;
+ u8 key_instance;
+ u8 pagesize;
+ u8 mr_control;
+ u8 local_remote_access_ctrl;
+ u8 reserved[0x20 - 0x18];
+ u64 at_pointer[4];
+};
+#endif /*_EHCA_QES_H_*/
diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c
new file mode 100644
index 0000000..cadbf0c
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_qp.c
@@ -0,0 +1,2254 @@
+/*
+ * IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ * QP functions
+ *
+ * Authors: Joachim Fenkes <fenkes@de.ibm.com>
+ * Stefan Roscher <stefan.roscher@de.ibm.com>
+ * Waleri Fomin <fomin@de.ibm.com>
+ * Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ * Reinhard Ernst <rernst@de.ibm.com>
+ * Heiko J Schick <schickhj@de.ibm.com>
+ *
+ * Copyright (c) 2005 IBM Corporation
+ *
+ * All rights reserved.
+ *
+ * This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ * BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "ehca_classes.h"
+#include "ehca_tools.h"
+#include "ehca_qes.h"
+#include "ehca_iverbs.h"
+#include "hcp_if.h"
+#include "hipz_fns.h"
+
+static struct kmem_cache *qp_cache;
+
+/*
+ * attributes not supported by query qp
+ */
+#define QP_ATTR_QUERY_NOT_SUPPORTED (IB_QP_MAX_DEST_RD_ATOMIC | \
+ IB_QP_MAX_QP_RD_ATOMIC | \
+ IB_QP_ACCESS_FLAGS | \
+ IB_QP_EN_SQD_ASYNC_NOTIFY)
+
+/*
+ * ehca (internal) qp state values
+ */
+enum ehca_qp_state {
+ EHCA_QPS_RESET = 1,
+ EHCA_QPS_INIT = 2,
+ EHCA_QPS_RTR = 3,
+ EHCA_QPS_RTS = 5,
+ EHCA_QPS_SQD = 6,
+ EHCA_QPS_SQE = 8,
+ EHCA_QPS_ERR = 128
+};
+
+/*
+ * qp state transitions as defined by IB Arch Rel 1.1 page 431
+ */
+enum ib_qp_statetrans {
+ IB_QPST_ANY2RESET,
+ IB_QPST_ANY2ERR,
+ IB_QPST_RESET2INIT,
+ IB_QPST_INIT2RTR,
+ IB_QPST_INIT2INIT,
+ IB_QPST_RTR2RTS,
+ IB_QPST_RTS2SQD,
+ IB_QPST_RTS2RTS,
+ IB_QPST_SQD2RTS,
+ IB_QPST_SQE2RTS,
+ IB_QPST_SQD2SQD,
+ IB_QPST_MAX /* nr of transitions, this must be last!!! */
+};
+
+/*
+ * ib2ehca_qp_state maps IB to ehca qp_state
+ * returns ehca qp state corresponding to given ib qp state
+ */
+static inline enum ehca_qp_state ib2ehca_qp_state(enum ib_qp_state ib_qp_state)
+{
+ switch (ib_qp_state) {
+ case IB_QPS_RESET:
+ return EHCA_QPS_RESET;
+ case IB_QPS_INIT:
+ return EHCA_QPS_INIT;
+ case IB_QPS_RTR:
+ return EHCA_QPS_RTR;
+ case IB_QPS_RTS:
+ return EHCA_QPS_RTS;
+ case IB_QPS_SQD:
+ return EHCA_QPS_SQD;
+ case IB_QPS_SQE:
+ return EHCA_QPS_SQE;
+ case IB_QPS_ERR:
+ return EHCA_QPS_ERR;
+ default:
+ ehca_gen_err("invalid ib_qp_state=%x", ib_qp_state);
+ return -EINVAL;
+ }
+}
+
+/*
+ * ehca2ib_qp_state maps ehca to IB qp_state
+ * returns ib qp state corresponding to given ehca qp state
+ */
+static inline enum ib_qp_state ehca2ib_qp_state(enum ehca_qp_state
+ ehca_qp_state)
+{
+ switch (ehca_qp_state) {
+ case EHCA_QPS_RESET:
+ return IB_QPS_RESET;
+ case EHCA_QPS_INIT:
+ return IB_QPS_INIT;
+ case EHCA_QPS_RTR:
+ return IB_QPS_RTR;
+ case EHCA_QPS_RTS:
+ return IB_QPS_RTS;
+ case EHCA_QPS_SQD:
+ return IB_QPS_SQD;
+ case EHCA_QPS_SQE:
+ return IB_QPS_SQE;
+ case EHCA_QPS_ERR:
+ return IB_QPS_ERR;
+ default:
+ ehca_gen_err("invalid ehca_qp_state=%x", ehca_qp_state);
+ return -EINVAL;
+ }
+}
+
+/*
+ * ehca_qp_type used as index for req_attr and opt_attr of
+ * struct ehca_modqp_statetrans
+ */
+enum ehca_qp_type {
+ QPT_RC = 0,
+ QPT_UC = 1,
+ QPT_UD = 2,
+ QPT_SQP = 3,
+ QPT_MAX
+};
+
+/*
+ * ib2ehcaqptype maps Ib to ehca qp_type
+ * returns ehca qp type corresponding to ib qp type
+ */
+static inline enum ehca_qp_type ib2ehcaqptype(enum ib_qp_type ibqptype)
+{
+ switch (ibqptype) {
+ case IB_QPT_SMI:
+ case IB_QPT_GSI:
+ return QPT_SQP;
+ case IB_QPT_RC:
+ return QPT_RC;
+ case IB_QPT_UC:
+ return QPT_UC;
+ case IB_QPT_UD:
+ return QPT_UD;
+ default:
+ ehca_gen_err("Invalid ibqptype=%x", ibqptype);
+ return -EINVAL;
+ }
+}
+
+static inline enum ib_qp_statetrans get_modqp_statetrans(int ib_fromstate,
+ int ib_tostate)
+{
+ int index = -EINVAL;
+ switch (ib_tostate) {
+ case IB_QPS_RESET:
+ index = IB_QPST_ANY2RESET;
+ break;
+ case IB_QPS_INIT:
+ switch (ib_fromstate) {
+ case IB_QPS_RESET:
+ index = IB_QPST_RESET2INIT;
+ break;
+ case IB_QPS_INIT:
+ index = IB_QPST_INIT2INIT;
+ break;
+ }
+ break;
+ case IB_QPS_RTR:
+ if (ib_fromstate == IB_QPS_INIT)
+ index = IB_QPST_INIT2RTR;
+ break;
+ case IB_QPS_RTS:
+ switch (ib_fromstate) {
+ case IB_QPS_RTR:
+ index = IB_QPST_RTR2RTS;
+ break;
+ case IB_QPS_RTS:
+ index = IB_QPST_RTS2RTS;
+ break;
+ case IB_QPS_SQD:
+ index = IB_QPST_SQD2RTS;
+ break;
+ case IB_QPS_SQE:
+ index = IB_QPST_SQE2RTS;
+ break;
+ }
+ break;
+ case IB_QPS_SQD:
+ if (ib_fromstate == IB_QPS_RTS)
+ index = IB_QPST_RTS2SQD;
+ break;
+ case IB_QPS_SQE:
+ break;
+ case IB_QPS_ERR:
+ index = IB_QPST_ANY2ERR;
+ break;
+ default:
+ break;
+ }
+ return index;
+}
+
+/*
+ * ibqptype2servicetype returns hcp service type corresponding to given
+ * ib qp type used by create_qp()
+ */
+static inline int ibqptype2servicetype(enum ib_qp_type ibqptype)
+{
+ switch (ibqptype) {
+ case IB_QPT_SMI:
+ case IB_QPT_GSI:
+ return ST_UD;
+ case IB_QPT_RC:
+ return ST_RC;
+ case IB_QPT_UC:
+ return ST_UC;
+ case IB_QPT_UD:
+ return ST_UD;
+ case IB_QPT_RAW_IPV6:
+ return -EINVAL;
+ case IB_QPT_RAW_ETY:
+ return -EINVAL;
+ default:
+ ehca_gen_err("Invalid ibqptype=%x", ibqptype);
+ return -EINVAL;
+ }
+}
+
+/*
+ * init userspace queue info from ipz_queue data
+ */
+static inline void queue2resp(struct ipzu_queue_resp *resp,
+ struct ipz_queue *queue)
+{
+ resp->qe_size = queue->qe_size;
+ resp->act_nr_of_sg = queue->act_nr_of_sg;
+ resp->queue_length = queue->queue_length;
+ resp->pagesize = queue->pagesize;
+ resp->toggle_state = queue->toggle_state;
+ resp->offset = queue->offset;
+}
+
+/*
+ * init_qp_queue initializes/constructs r/squeue and registers queue pages.
+ */
+static inline int init_qp_queue(struct ehca_shca *shca,
+ struct ehca_pd *pd,
+ struct ehca_qp *my_qp,
+ struct ipz_queue *queue,
+ int q_type,
+ u64 expected_hret,
+ struct ehca_alloc_queue_parms *parms,
+ int wqe_size)
+{
+ int ret, cnt, ipz_rc, nr_q_pages;
+ void *vpage;
+ u64 rpage, h_ret;
+ struct ib_device *ib_dev = &shca->ib_device;
+ struct ipz_adapter_handle ipz_hca_handle = shca->ipz_hca_handle;
+
+ if (!parms->queue_size)
+ return 0;
+
+ if (parms->is_small) {
+ nr_q_pages = 1;
+ ipz_rc = ipz_queue_ctor(pd, queue, nr_q_pages,
+ 128 << parms->page_size,
+ wqe_size, parms->act_nr_sges, 1);
+ } else {
+ nr_q_pages = parms->queue_size;
+ ipz_rc = ipz_queue_ctor(pd, queue, nr_q_pages,
+ EHCA_PAGESIZE, wqe_size,
+ parms->act_nr_sges, 0);
+ }
+
+ if (!ipz_rc) {
+ ehca_err(ib_dev, "Cannot allocate page for queue. ipz_rc=%i",
+ ipz_rc);
+ return -EBUSY;
+ }
+
+ /* register queue pages */
+ for (cnt = 0; cnt < nr_q_pages; cnt++) {
+ vpage = ipz_qpageit_get_inc(queue);
+ if (!vpage) {
+ ehca_err(ib_dev, "ipz_qpageit_get_inc() "
+ "failed p_vpage= %p", vpage);
+ ret = -EINVAL;
+ goto init_qp_queue1;
+ }
+ rpage = virt_to_abs(vpage);
+
+ h_ret = hipz_h_register_rpage_qp(ipz_hca_handle,
+ my_qp->ipz_qp_handle,
+ NULL, 0, q_type,
+ rpage, parms->is_small ? 0 : 1,
+ my_qp->galpas.kernel);
+ if (cnt == (nr_q_pages - 1)) { /* last page! */
+ if (h_ret != expected_hret) {
+ ehca_err(ib_dev, "hipz_qp_register_rpage() "
+ "h_ret=%li", h_ret);
+ ret = ehca2ib_return_code(h_ret);
+ goto init_qp_queue1;
+ }
+ vpage = ipz_qpageit_get_inc(&my_qp->ipz_rqueue);
+ if (vpage) {
+ ehca_err(ib_dev, "ipz_qpageit_get_inc() "
+ "should not succeed vpage=%p", vpage);
+ ret = -EINVAL;
+ goto init_qp_queue1;
+ }
+ } else {
+ if (h_ret != H_PAGE_REGISTERED) {
+ ehca_err(ib_dev, "hipz_qp_register_rpage() "
+ "h_ret=%li", h_ret);
+ ret = ehca2ib_return_code(h_ret);
+ goto init_qp_queue1;
+ }
+ }
+ }
+
+ ipz_qeit_reset(queue);
+
+ return 0;
+
+init_qp_queue1:
+ ipz_queue_dtor(pd, queue);
+ return ret;
+}
+
+static inline int ehca_calc_wqe_size(int act_nr_sge, int is_llqp)
+{
+ if (is_llqp)
+ return 128 << act_nr_sge;
+ else
+ return offsetof(struct ehca_wqe,
+ u.nud.sg_list[act_nr_sge]);
+}
+
+static void ehca_determine_small_queue(struct ehca_alloc_queue_parms *queue,
+ int req_nr_sge, int is_llqp)
+{
+ u32 wqe_size, q_size;
+ int act_nr_sge = req_nr_sge;
+
+ if (!is_llqp)
+ /* round up #SGEs so WQE size is a power of 2 */
+ for (act_nr_sge = 4; act_nr_sge <= 252;
+ act_nr_sge = 4 + 2 * act_nr_sge)
+ if (act_nr_sge >= req_nr_sge)
+ break;
+
+ wqe_size = ehca_calc_wqe_size(act_nr_sge, is_llqp);
+ q_size = wqe_size * (queue->max_wr + 1);
+
+ if (q_size <= 512)
+ queue->page_size = 2;
+ else if (q_size <= 1024)
+ queue->page_size = 3;
+ else
+ queue->page_size = 0;
+
+ queue->is_small = (queue->page_size != 0);
+}
+
+/* needs to be called with cq->spinlock held */
+void ehca_add_to_err_list(struct ehca_qp *qp, int on_sq)
+{
+ struct list_head *list, *node;
+
+ /* TODO: support low latency QPs */
+ if (qp->ext_type == EQPT_LLQP)
+ return;
+
+ if (on_sq) {
+ list = &qp->send_cq->sqp_err_list;
+ node = &qp->sq_err_node;
+ } else {
+ list = &qp->recv_cq->rqp_err_list;
+ node = &qp->rq_err_node;
+ }
+
+ if (list_empty(node))
+ list_add_tail(node, list);
+
+ return;
+}
+
+static void del_from_err_list(struct ehca_cq *cq, struct list_head *node)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&cq->spinlock, flags);
+
+ if (!list_empty(node))
+ list_del_init(node);
+
+ spin_unlock_irqrestore(&cq->spinlock, flags);
+}
+
+static void reset_queue_map(struct ehca_queue_map *qmap)
+{
+ int i;
+
+ qmap->tail = qmap->entries - 1;
+ qmap->left_to_poll = 0;
+ qmap->next_wqe_idx = 0;
+ for (i = 0; i < qmap->entries; i++) {
+ qmap->map[i].reported = 1;
+ qmap->map[i].cqe_req = 0;
+ }
+}
+
+/*
+ * Create an ib_qp struct that is either a QP or an SRQ, depending on
+ * the value of the is_srq parameter. If init_attr and srq_init_attr share
+ * fields, the field out of init_attr is used.
+ */
+static struct ehca_qp *internal_create_qp(
+ struct ib_pd *pd,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_srq_init_attr *srq_init_attr,
+ struct ib_udata *udata, int is_srq)
+{
+ struct ehca_qp *my_qp, *my_srq = NULL;
+ struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd);
+ struct ehca_shca *shca = container_of(pd->device, struct ehca_shca,
+ ib_device);
+ struct ib_ucontext *context = NULL;
+ u64 h_ret;
+ int is_llqp = 0, has_srq = 0;
+ int qp_type, max_send_sge, max_recv_sge, ret;
+
+ /* h_call's out parameters */
+ struct ehca_alloc_qp_parms parms;
+ u32 swqe_size = 0, rwqe_size = 0, ib_qp_num;
+ unsigned long flags;
+
+ if (!atomic_add_unless(&shca->num_qps, 1, shca->max_num_qps)) {
+ ehca_err(pd->device, "Unable to create QP, max number of %i "
+ "QPs reached.", shca->max_num_qps);
+ ehca_err(pd->device, "To increase the maximum number of QPs "
+ "use the number_of_qps module parameter.\n");
+ return ERR_PTR(-ENOSPC);
+ }
+
+ if (init_attr->create_flags) {
+ atomic_dec(&shca->num_qps);
+ return ERR_PTR(-EINVAL);
+ }
+
+ memset(&parms, 0, sizeof(parms));
+ qp_type = init_attr->qp_type;
+
+ if (init_attr->sq_sig_type != IB_SIGNAL_REQ_WR &&
+ init_attr->sq_sig_type != IB_SIGNAL_ALL_WR) {
+ ehca_err(pd->device, "init_attr->sg_sig_type=%x not allowed",
+ init_attr->sq_sig_type);
+ atomic_dec(&shca->num_qps);
+ return ERR_PTR(-EINVAL);
+ }
+
+ /* save LLQP info */
+ if (qp_type & 0x80) {
+ is_llqp = 1;
+ parms.ext_type = EQPT_LLQP;
+ parms.ll_comp_flags = qp_type & LLQP_COMP_MASK;
+ }
+ qp_type &= 0x1F;
+ init_attr->qp_type &= 0x1F;
+
+ /* handle SRQ base QPs */
+ if (init_attr->srq) {
+ my_srq = container_of(init_attr->srq, struct ehca_qp, ib_srq);
+
+ if (qp_type == IB_QPT_UC) {
+ ehca_err(pd->device, "UC with SRQ not supported");
+ atomic_dec(&shca->num_qps);
+ return ERR_PTR(-EINVAL);
+ }
+
+ has_srq = 1;
+ parms.ext_type = EQPT_SRQBASE;
+ parms.srq_qpn = my_srq->real_qp_num;
+ }
+
+ if (is_llqp && has_srq) {
+ ehca_err(pd->device, "LLQPs can't have an SRQ");
+ atomic_dec(&shca->num_qps);
+ return ERR_PTR(-EINVAL);
+ }
+
+ /* handle SRQs */
+ if (is_srq) {
+ parms.ext_type = EQPT_SRQ;
+ parms.srq_limit = srq_init_attr->attr.srq_limit;
+ if (init_attr->cap.max_recv_sge > 3) {
+ ehca_err(pd->device, "no more than three SGEs "
+ "supported for SRQ pd=%p max_sge=%x",
+ pd, init_attr->cap.max_recv_sge);
+ atomic_dec(&shca->num_qps);
+ return ERR_PTR(-EINVAL);
+ }
+ }
+
+ /* check QP type */
+ if (qp_type != IB_QPT_UD &&
+ qp_type != IB_QPT_UC &&
+ qp_type != IB_QPT_RC &&
+ qp_type != IB_QPT_SMI &&
+ qp_type != IB_QPT_GSI) {
+ ehca_err(pd->device, "wrong QP Type=%x", qp_type);
+ atomic_dec(&shca->num_qps);
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (is_llqp) {
+ switch (qp_type) {
+ case IB_QPT_RC:
+ if ((init_attr->cap.max_send_wr > 255) ||
+ (init_attr->cap.max_recv_wr > 255)) {
+ ehca_err(pd->device,
+ "Invalid Number of max_sq_wr=%x "
+ "or max_rq_wr=%x for RC LLQP",
+ init_attr->cap.max_send_wr,
+ init_attr->cap.max_recv_wr);
+ atomic_dec(&shca->num_qps);
+ return ERR_PTR(-EINVAL);
+ }
+ break;
+ case IB_QPT_UD:
+ if (!EHCA_BMASK_GET(HCA_CAP_UD_LL_QP, shca->hca_cap)) {
+ ehca_err(pd->device, "UD LLQP not supported "
+ "by this adapter");
+ atomic_dec(&shca->num_qps);
+ return ERR_PTR(-ENOSYS);
+ }
+ if (!(init_attr->cap.max_send_sge <= 5
+ && init_attr->cap.max_send_sge >= 1
+ && init_attr->cap.max_recv_sge <= 5
+ && init_attr->cap.max_recv_sge >= 1)) {
+ ehca_err(pd->device,
+ "Invalid Number of max_send_sge=%x "
+ "or max_recv_sge=%x for UD LLQP",
+ init_attr->cap.max_send_sge,
+ init_attr->cap.max_recv_sge);
+ atomic_dec(&shca->num_qps);
+ return ERR_PTR(-EINVAL);
+ } else if (init_attr->cap.max_send_wr > 255) {
+ ehca_err(pd->device,
+ "Invalid Number of "
+ "max_send_wr=%x for UD QP_TYPE=%x",
+ init_attr->cap.max_send_wr, qp_type);
+ atomic_dec(&shca->num_qps);
+ return ERR_PTR(-EINVAL);
+ }
+ break;
+ default:
+ ehca_err(pd->device, "unsupported LL QP Type=%x",
+ qp_type);
+ atomic_dec(&shca->num_qps);
+ return ERR_PTR(-EINVAL);
+ }
+ } else {
+ int max_sge = (qp_type == IB_QPT_UD || qp_type == IB_QPT_SMI
+ || qp_type == IB_QPT_GSI) ? 250 : 252;
+
+ if (init_attr->cap.max_send_sge > max_sge
+ || init_attr->cap.max_recv_sge > max_sge) {
+ ehca_err(pd->device, "Invalid number of SGEs requested "
+ "send_sge=%x recv_sge=%x max_sge=%x",
+ init_attr->cap.max_send_sge,
+ init_attr->cap.max_recv_sge, max_sge);
+ atomic_dec(&shca->num_qps);
+ return ERR_PTR(-EINVAL);
+ }
+ }
+
+ if (pd->uobject && udata)
+ context = pd->uobject->context;
+
+ my_qp = kmem_cache_zalloc(qp_cache, GFP_KERNEL);
+ if (!my_qp) {
+ ehca_err(pd->device, "pd=%p not enough memory to alloc qp", pd);
+ atomic_dec(&shca->num_qps);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ atomic_set(&my_qp->nr_events, 0);
+ init_waitqueue_head(&my_qp->wait_completion);
+ spin_lock_init(&my_qp->spinlock_s);
+ spin_lock_init(&my_qp->spinlock_r);
+ my_qp->qp_type = qp_type;
+ my_qp->ext_type = parms.ext_type;
+ my_qp->state = IB_QPS_RESET;
+
+ if (init_attr->recv_cq)
+ my_qp->recv_cq =
+ container_of(init_attr->recv_cq, struct ehca_cq, ib_cq);
+ if (init_attr->send_cq)
+ my_qp->send_cq =
+ container_of(init_attr->send_cq, struct ehca_cq, ib_cq);
+
+ do {
+ if (!idr_pre_get(&ehca_qp_idr, GFP_KERNEL)) {
+ ret = -ENOMEM;
+ ehca_err(pd->device, "Can't reserve idr resources.");
+ goto create_qp_exit0;
+ }
+
+ write_lock_irqsave(&ehca_qp_idr_lock, flags);
+ ret = idr_get_new(&ehca_qp_idr, my_qp, &my_qp->token);
+ write_unlock_irqrestore(&ehca_qp_idr_lock, flags);
+ } while (ret == -EAGAIN);
+
+ if (ret) {
+ ret = -ENOMEM;
+ ehca_err(pd->device, "Can't allocate new idr entry.");
+ goto create_qp_exit0;
+ }
+
+ if (my_qp->token > 0x1FFFFFF) {
+ ret = -EINVAL;
+ ehca_err(pd->device, "Invalid number of qp");
+ goto create_qp_exit1;
+ }
+
+ if (has_srq)
+ parms.srq_token = my_qp->token;
+
+ parms.servicetype = ibqptype2servicetype(qp_type);
+ if (parms.servicetype < 0) {
+ ret = -EINVAL;
+ ehca_err(pd->device, "Invalid qp_type=%x", qp_type);
+ goto create_qp_exit1;
+ }
+
+ /* Always signal by WQE so we can hide circ. WQEs */
+ parms.sigtype = HCALL_SIGT_BY_WQE;
+
+ /* UD_AV CIRCUMVENTION */
+ max_send_sge = init_attr->cap.max_send_sge;
+ max_recv_sge = init_attr->cap.max_recv_sge;
+ if (parms.servicetype == ST_UD && !is_llqp) {
+ max_send_sge += 2;
+ max_recv_sge += 2;
+ }
+
+ parms.token = my_qp->token;
+ parms.eq_handle = shca->eq.ipz_eq_handle;
+ parms.pd = my_pd->fw_pd;
+ if (my_qp->send_cq)
+ parms.send_cq_handle = my_qp->send_cq->ipz_cq_handle;
+ if (my_qp->recv_cq)
+ parms.recv_cq_handle = my_qp->recv_cq->ipz_cq_handle;
+
+ parms.squeue.max_wr = init_attr->cap.max_send_wr;
+ parms.rqueue.max_wr = init_attr->cap.max_recv_wr;
+ parms.squeue.max_sge = max_send_sge;
+ parms.rqueue.max_sge = max_recv_sge;
+
+ /* RC QPs need one more SWQE for unsolicited ack circumvention */
+ if (qp_type == IB_QPT_RC)
+ parms.squeue.max_wr++;
+
+ if (EHCA_BMASK_GET(HCA_CAP_MINI_QP, shca->hca_cap)) {
+ if (HAS_SQ(my_qp))
+ ehca_determine_small_queue(
+ &parms.squeue, max_send_sge, is_llqp);
+ if (HAS_RQ(my_qp))
+ ehca_determine_small_queue(
+ &parms.rqueue, max_recv_sge, is_llqp);
+ parms.qp_storage =
+ (parms.squeue.is_small || parms.rqueue.is_small);
+ }
+
+ h_ret = hipz_h_alloc_resource_qp(shca->ipz_hca_handle, &parms);
+ if (h_ret != H_SUCCESS) {
+ ehca_err(pd->device, "h_alloc_resource_qp() failed h_ret=%li",
+ h_ret);
+ ret = ehca2ib_return_code(h_ret);
+ goto create_qp_exit1;
+ }
+
+ ib_qp_num = my_qp->real_qp_num = parms.real_qp_num;
+ my_qp->ipz_qp_handle = parms.qp_handle;
+ my_qp->galpas = parms.galpas;
+
+ swqe_size = ehca_calc_wqe_size(parms.squeue.act_nr_sges, is_llqp);
+ rwqe_size = ehca_calc_wqe_size(parms.rqueue.act_nr_sges, is_llqp);
+
+ switch (qp_type) {
+ case IB_QPT_RC:
+ if (is_llqp) {
+ parms.squeue.act_nr_sges = 1;
+ parms.rqueue.act_nr_sges = 1;
+ }
+ /* hide the extra WQE */
+ parms.squeue.act_nr_wqes--;
+ break;
+ case IB_QPT_UD:
+ case IB_QPT_GSI:
+ case IB_QPT_SMI:
+ /* UD circumvention */
+ if (is_llqp) {
+ parms.squeue.act_nr_sges = 1;
+ parms.rqueue.act_nr_sges = 1;
+ } else {
+ parms.squeue.act_nr_sges -= 2;
+ parms.rqueue.act_nr_sges -= 2;
+ }
+
+ if (IB_QPT_GSI == qp_type || IB_QPT_SMI == qp_type) {
+ parms.squeue.act_nr_wqes = init_attr->cap.max_send_wr;
+ parms.rqueue.act_nr_wqes = init_attr->cap.max_recv_wr;
+ parms.squeue.act_nr_sges = init_attr->cap.max_send_sge;
+ parms.rqueue.act_nr_sges = init_attr->cap.max_recv_sge;
+ ib_qp_num = (qp_type == IB_QPT_SMI) ? 0 : 1;
+ }
+
+ break;
+
+ default:
+ break;
+ }
+
+ /* initialize r/squeue and register queue pages */
+ if (HAS_SQ(my_qp)) {
+ ret = init_qp_queue(
+ shca, my_pd, my_qp, &my_qp->ipz_squeue, 0,
+ HAS_RQ(my_qp) ? H_PAGE_REGISTERED : H_SUCCESS,
+ &parms.squeue, swqe_size);
+ if (ret) {
+ ehca_err(pd->device, "Couldn't initialize squeue "
+ "and pages ret=%i", ret);
+ goto create_qp_exit2;
+ }
+
+ my_qp->sq_map.entries = my_qp->ipz_squeue.queue_length /
+ my_qp->ipz_squeue.qe_size;
+ my_qp->sq_map.map = vmalloc(my_qp->sq_map.entries *
+ sizeof(struct ehca_qmap_entry));
+ if (!my_qp->sq_map.map) {
+ ehca_err(pd->device, "Couldn't allocate squeue "
+ "map ret=%i", ret);
+ goto create_qp_exit3;
+ }
+ INIT_LIST_HEAD(&my_qp->sq_err_node);
+ /* to avoid the generation of bogus flush CQEs */
+ reset_queue_map(&my_qp->sq_map);
+ }
+
+ if (HAS_RQ(my_qp)) {
+ ret = init_qp_queue(
+ shca, my_pd, my_qp, &my_qp->ipz_rqueue, 1,
+ H_SUCCESS, &parms.rqueue, rwqe_size);
+ if (ret) {
+ ehca_err(pd->device, "Couldn't initialize rqueue "
+ "and pages ret=%i", ret);
+ goto create_qp_exit4;
+ }
+
+ my_qp->rq_map.entries = my_qp->ipz_rqueue.queue_length /
+ my_qp->ipz_rqueue.qe_size;
+ my_qp->rq_map.map = vmalloc(my_qp->rq_map.entries *
+ sizeof(struct ehca_qmap_entry));
+ if (!my_qp->rq_map.map) {
+ ehca_err(pd->device, "Couldn't allocate squeue "
+ "map ret=%i", ret);
+ goto create_qp_exit5;
+ }
+ INIT_LIST_HEAD(&my_qp->rq_err_node);
+ /* to avoid the generation of bogus flush CQEs */
+ reset_queue_map(&my_qp->rq_map);
+ } else if (init_attr->srq) {
+ /* this is a base QP, use the queue map of the SRQ */
+ my_qp->rq_map = my_srq->rq_map;
+ INIT_LIST_HEAD(&my_qp->rq_err_node);
+
+ my_qp->ipz_rqueue = my_srq->ipz_rqueue;
+ }
+
+ if (is_srq) {
+ my_qp->ib_srq.pd = &my_pd->ib_pd;
+ my_qp->ib_srq.device = my_pd->ib_pd.device;
+
+ my_qp->ib_srq.srq_context = init_attr->qp_context;
+ my_qp->ib_srq.event_handler = init_attr->event_handler;
+ } else {
+ my_qp->ib_qp.qp_num = ib_qp_num;
+ my_qp->ib_qp.pd = &my_pd->ib_pd;
+ my_qp->ib_qp.device = my_pd->ib_pd.device;
+
+ my_qp->ib_qp.recv_cq = init_attr->recv_cq;
+ my_qp->ib_qp.send_cq = init_attr->send_cq;
+
+ my_qp->ib_qp.qp_type = qp_type;
+ my_qp->ib_qp.srq = init_attr->srq;
+
+ my_qp->ib_qp.qp_context = init_attr->qp_context;
+ my_qp->ib_qp.event_handler = init_attr->event_handler;
+ }
+
+ init_attr->cap.max_inline_data = 0; /* not supported yet */
+ init_attr->cap.max_recv_sge = parms.rqueue.act_nr_sges;
+ init_attr->cap.max_recv_wr = parms.rqueue.act_nr_wqes;
+ init_attr->cap.max_send_sge = parms.squeue.act_nr_sges;
+ init_attr->cap.max_send_wr = parms.squeue.act_nr_wqes;
+ my_qp->init_attr = *init_attr;
+
+ if (qp_type == IB_QPT_SMI || qp_type == IB_QPT_GSI) {
+ shca->sport[init_attr->port_num - 1].ibqp_sqp[qp_type] =
+ &my_qp->ib_qp;
+ if (ehca_nr_ports < 0) {
+ /* alloc array to cache subsequent modify qp parms
+ * for autodetect mode
+ */
+ my_qp->mod_qp_parm =
+ kzalloc(EHCA_MOD_QP_PARM_MAX *
+ sizeof(*my_qp->mod_qp_parm),
+ GFP_KERNEL);
+ if (!my_qp->mod_qp_parm) {
+ ehca_err(pd->device,
+ "Could not alloc mod_qp_parm");
+ goto create_qp_exit5;
+ }
+ }
+ }
+
+ /* NOTE: define_apq0() not supported yet */
+ if (qp_type == IB_QPT_GSI) {
+ h_ret = ehca_define_sqp(shca, my_qp, init_attr);
+ if (h_ret != H_SUCCESS) {
+ kfree(my_qp->mod_qp_parm);
+ my_qp->mod_qp_parm = NULL;
+ /* the QP pointer is no longer valid */
+ shca->sport[init_attr->port_num - 1].ibqp_sqp[qp_type] =
+ NULL;
+ ret = ehca2ib_return_code(h_ret);
+ goto create_qp_exit6;
+ }
+ }
+
+ if (my_qp->send_cq) {
+ ret = ehca_cq_assign_qp(my_qp->send_cq, my_qp);
+ if (ret) {
+ ehca_err(pd->device,
+ "Couldn't assign qp to send_cq ret=%i", ret);
+ goto create_qp_exit7;
+ }
+ }
+
+ /* copy queues, galpa data to user space */
+ if (context && udata) {
+ struct ehca_create_qp_resp resp;
+ memset(&resp, 0, sizeof(resp));
+
+ resp.qp_num = my_qp->real_qp_num;
+ resp.token = my_qp->token;
+ resp.qp_type = my_qp->qp_type;
+ resp.ext_type = my_qp->ext_type;
+ resp.qkey = my_qp->qkey;
+ resp.real_qp_num = my_qp->real_qp_num;
+
+ if (HAS_SQ(my_qp))
+ queue2resp(&resp.ipz_squeue, &my_qp->ipz_squeue);
+ if (HAS_RQ(my_qp))
+ queue2resp(&resp.ipz_rqueue, &my_qp->ipz_rqueue);
+ resp.fw_handle_ofs = (u32)
+ (my_qp->galpas.user.fw_handle & (PAGE_SIZE - 1));
+
+ if (ib_copy_to_udata(udata, &resp, sizeof resp)) {
+ ehca_err(pd->device, "Copy to udata failed");
+ ret = -EINVAL;
+ goto create_qp_exit8;
+ }
+ }
+
+ return my_qp;
+
+create_qp_exit8:
+ ehca_cq_unassign_qp(my_qp->send_cq, my_qp->real_qp_num);
+
+create_qp_exit7:
+ kfree(my_qp->mod_qp_parm);
+
+create_qp_exit6:
+ if (HAS_RQ(my_qp))
+ vfree(my_qp->rq_map.map);
+
+create_qp_exit5:
+ if (HAS_RQ(my_qp))
+ ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue);
+
+create_qp_exit4:
+ if (HAS_SQ(my_qp))
+ vfree(my_qp->sq_map.map);
+
+create_qp_exit3:
+ if (HAS_SQ(my_qp))
+ ipz_queue_dtor(my_pd, &my_qp->ipz_squeue);
+
+create_qp_exit2:
+ hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp);
+
+create_qp_exit1:
+ write_lock_irqsave(&ehca_qp_idr_lock, flags);
+ idr_remove(&ehca_qp_idr, my_qp->token);
+ write_unlock_irqrestore(&ehca_qp_idr_lock, flags);
+
+create_qp_exit0:
+ kmem_cache_free(qp_cache, my_qp);
+ atomic_dec(&shca->num_qps);
+ return ERR_PTR(ret);
+}
+
+struct ib_qp *ehca_create_qp(struct ib_pd *pd,
+ struct ib_qp_init_attr *qp_init_attr,
+ struct ib_udata *udata)
+{
+ struct ehca_qp *ret;
+
+ ret = internal_create_qp(pd, qp_init_attr, NULL, udata, 0);
+ return IS_ERR(ret) ? (struct ib_qp *)ret : &ret->ib_qp;
+}
+
+static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp,
+ struct ib_uobject *uobject);
+
+struct ib_srq *ehca_create_srq(struct ib_pd *pd,
+ struct ib_srq_init_attr *srq_init_attr,
+ struct ib_udata *udata)
+{
+ struct ib_qp_init_attr qp_init_attr;
+ struct ehca_qp *my_qp;
+ struct ib_srq *ret;
+ struct ehca_shca *shca = container_of(pd->device, struct ehca_shca,
+ ib_device);
+ struct hcp_modify_qp_control_block *mqpcb;
+ u64 hret, update_mask;
+
+ /* For common attributes, internal_create_qp() takes its info
+ * out of qp_init_attr, so copy all common attrs there.
+ */
+ memset(&qp_init_attr, 0, sizeof(qp_init_attr));
+ qp_init_attr.event_handler = srq_init_attr->event_handler;
+ qp_init_attr.qp_context = srq_init_attr->srq_context;
+ qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
+ qp_init_attr.qp_type = IB_QPT_RC;
+ qp_init_attr.cap.max_recv_wr = srq_init_attr->attr.max_wr;
+ qp_init_attr.cap.max_recv_sge = srq_init_attr->attr.max_sge;
+
+ my_qp = internal_create_qp(pd, &qp_init_attr, srq_init_attr, udata, 1);
+ if (IS_ERR(my_qp))
+ return (struct ib_srq *)my_qp;
+
+ /* copy back return values */
+ srq_init_attr->attr.max_wr = qp_init_attr.cap.max_recv_wr;
+ srq_init_attr->attr.max_sge = 3;
+
+ /* drive SRQ into RTR state */
+ mqpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
+ if (!mqpcb) {
+ ehca_err(pd->device, "Could not get zeroed page for mqpcb "
+ "ehca_qp=%p qp_num=%x ", my_qp, my_qp->real_qp_num);
+ ret = ERR_PTR(-ENOMEM);
+ goto create_srq1;
+ }
+
+ mqpcb->qp_state = EHCA_QPS_INIT;
+ mqpcb->prim_phys_port = 1;
+ update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_STATE, 1);
+ hret = hipz_h_modify_qp(shca->ipz_hca_handle,
+ my_qp->ipz_qp_handle,
+ &my_qp->pf,
+ update_mask,
+ mqpcb, my_qp->galpas.kernel);
+ if (hret != H_SUCCESS) {
+ ehca_err(pd->device, "Could not modify SRQ to INIT "
+ "ehca_qp=%p qp_num=%x h_ret=%li",
+ my_qp, my_qp->real_qp_num, hret);
+ goto create_srq2;
+ }
+
+ mqpcb->qp_enable = 1;
+ update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_ENABLE, 1);
+ hret = hipz_h_modify_qp(shca->ipz_hca_handle,
+ my_qp->ipz_qp_handle,
+ &my_qp->pf,
+ update_mask,
+ mqpcb, my_qp->galpas.kernel);
+ if (hret != H_SUCCESS) {
+ ehca_err(pd->device, "Could not enable SRQ "
+ "ehca_qp=%p qp_num=%x h_ret=%li",
+ my_qp, my_qp->real_qp_num, hret);
+ goto create_srq2;
+ }
+
+ mqpcb->qp_state = EHCA_QPS_RTR;
+ update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_STATE, 1);
+ hret = hipz_h_modify_qp(shca->ipz_hca_handle,
+ my_qp->ipz_qp_handle,
+ &my_qp->pf,
+ update_mask,
+ mqpcb, my_qp->galpas.kernel);
+ if (hret != H_SUCCESS) {
+ ehca_err(pd->device, "Could not modify SRQ to RTR "
+ "ehca_qp=%p qp_num=%x h_ret=%li",
+ my_qp, my_qp->real_qp_num, hret);
+ goto create_srq2;
+ }
+
+ ehca_free_fw_ctrlblock(mqpcb);
+
+ return &my_qp->ib_srq;
+
+create_srq2:
+ ret = ERR_PTR(ehca2ib_return_code(hret));
+ ehca_free_fw_ctrlblock(mqpcb);
+
+create_srq1:
+ internal_destroy_qp(pd->device, my_qp, my_qp->ib_srq.uobject);
+
+ return ret;
+}
+
+/*
+ * prepare_sqe_rts called by internal_modify_qp() at trans sqe -> rts
+ * set purge bit of bad wqe and subsequent wqes to avoid reentering sqe
+ * returns total number of bad wqes in bad_wqe_cnt
+ */
+static int prepare_sqe_rts(struct ehca_qp *my_qp, struct ehca_shca *shca,
+ int *bad_wqe_cnt)
+{
+ u64 h_ret;
+ struct ipz_queue *squeue;
+ void *bad_send_wqe_p, *bad_send_wqe_v;
+ u64 q_ofs;
+ struct ehca_wqe *wqe;
+ int qp_num = my_qp->ib_qp.qp_num;
+
+ /* get send wqe pointer */
+ h_ret = hipz_h_disable_and_get_wqe(shca->ipz_hca_handle,
+ my_qp->ipz_qp_handle, &my_qp->pf,
+ &bad_send_wqe_p, NULL, 2);
+ if (h_ret != H_SUCCESS) {
+ ehca_err(&shca->ib_device, "hipz_h_disable_and_get_wqe() failed"
+ " ehca_qp=%p qp_num=%x h_ret=%li",
+ my_qp, qp_num, h_ret);
+ return ehca2ib_return_code(h_ret);
+ }
+ bad_send_wqe_p = (void *)((u64)bad_send_wqe_p & (~(1L << 63)));
+ ehca_dbg(&shca->ib_device, "qp_num=%x bad_send_wqe_p=%p",
+ qp_num, bad_send_wqe_p);
+ /* convert wqe pointer to vadr */
+ bad_send_wqe_v = abs_to_virt((u64)bad_send_wqe_p);
+ if (ehca_debug_level >= 2)
+ ehca_dmp(bad_send_wqe_v, 32, "qp_num=%x bad_wqe", qp_num);
+ squeue = &my_qp->ipz_squeue;
+ if (ipz_queue_abs_to_offset(squeue, (u64)bad_send_wqe_p, &q_ofs)) {
+ ehca_err(&shca->ib_device, "failed to get wqe offset qp_num=%x"
+ " bad_send_wqe_p=%p", qp_num, bad_send_wqe_p);
+ return -EFAULT;
+ }
+
+ /* loop sets wqe's purge bit */
+ wqe = (struct ehca_wqe *)ipz_qeit_calc(squeue, q_ofs);
+ *bad_wqe_cnt = 0;
+ while (wqe->optype != 0xff && wqe->wqef != 0xff) {
+ if (ehca_debug_level >= 2)
+ ehca_dmp(wqe, 32, "qp_num=%x wqe", qp_num);
+ wqe->nr_of_data_seg = 0; /* suppress data access */
+ wqe->wqef = WQEF_PURGE; /* WQE to be purged */
+ q_ofs = ipz_queue_advance_offset(squeue, q_ofs);
+ wqe = (struct ehca_wqe *)ipz_qeit_calc(squeue, q_ofs);
+ *bad_wqe_cnt = (*bad_wqe_cnt)+1;
+ }
+ /*
+ * bad wqe will be reprocessed and ignored when pol_cq() is called,
+ * i.e. nr of wqes with flush error status is one less
+ */
+ ehca_dbg(&shca->ib_device, "qp_num=%x flusherr_wqe_cnt=%x",
+ qp_num, (*bad_wqe_cnt)-1);
+ wqe->wqef = 0;
+
+ return 0;
+}
+
+static int calc_left_cqes(u64 wqe_p, struct ipz_queue *ipz_queue,
+ struct ehca_queue_map *qmap)
+{
+ void *wqe_v;
+ u64 q_ofs;
+ u32 wqe_idx;
+ unsigned int tail_idx;
+
+ /* convert real to abs address */
+ wqe_p = wqe_p & (~(1UL << 63));
+
+ wqe_v = abs_to_virt(wqe_p);
+
+ if (ipz_queue_abs_to_offset(ipz_queue, wqe_p, &q_ofs)) {
+ ehca_gen_err("Invalid offset for calculating left cqes "
+ "wqe_p=%#lx wqe_v=%p\n", wqe_p, wqe_v);
+ return -EFAULT;
+ }
+
+ tail_idx = (qmap->tail + 1) % qmap->entries;
+ wqe_idx = q_ofs / ipz_queue->qe_size;
+
+ /* check all processed wqes, whether a cqe is requested or not */
+ while (tail_idx != wqe_idx) {
+ if (qmap->map[tail_idx].cqe_req)
+ qmap->left_to_poll++;
+ tail_idx = (tail_idx + 1) % qmap->entries;
+ }
+ /* save index in queue, where we have to start flushing */
+ qmap->next_wqe_idx = wqe_idx;
+ return 0;
+}
+
+static int check_for_left_cqes(struct ehca_qp *my_qp, struct ehca_shca *shca)
+{
+ u64 h_ret;
+ void *send_wqe_p, *recv_wqe_p;
+ int ret;
+ unsigned long flags;
+ int qp_num = my_qp->ib_qp.qp_num;
+
+ /* this hcall is not supported on base QPs */
+ if (my_qp->ext_type != EQPT_SRQBASE) {
+ /* get send and receive wqe pointer */
+ h_ret = hipz_h_disable_and_get_wqe(shca->ipz_hca_handle,
+ my_qp->ipz_qp_handle, &my_qp->pf,
+ &send_wqe_p, &recv_wqe_p, 4);
+ if (h_ret != H_SUCCESS) {
+ ehca_err(&shca->ib_device, "disable_and_get_wqe() "
+ "failed ehca_qp=%p qp_num=%x h_ret=%li",
+ my_qp, qp_num, h_ret);
+ return ehca2ib_return_code(h_ret);
+ }
+
+ /*
+ * acquire lock to ensure that nobody is polling the cq which
+ * could mean that the qmap->tail pointer is in an
+ * inconsistent state.
+ */
+ spin_lock_irqsave(&my_qp->send_cq->spinlock, flags);
+ ret = calc_left_cqes((u64)send_wqe_p, &my_qp->ipz_squeue,
+ &my_qp->sq_map);
+ spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags);
+ if (ret)
+ return ret;
+
+
+ spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags);
+ ret = calc_left_cqes((u64)recv_wqe_p, &my_qp->ipz_rqueue,
+ &my_qp->rq_map);
+ spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, flags);
+ if (ret)
+ return ret;
+ } else {
+ spin_lock_irqsave(&my_qp->send_cq->spinlock, flags);
+ my_qp->sq_map.left_to_poll = 0;
+ my_qp->sq_map.next_wqe_idx = (my_qp->sq_map.tail + 1) %
+ my_qp->sq_map.entries;
+ spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags);
+
+ spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags);
+ my_qp->rq_map.left_to_poll = 0;
+ my_qp->rq_map.next_wqe_idx = (my_qp->rq_map.tail + 1) %
+ my_qp->rq_map.entries;
+ spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, flags);
+ }
+
+ /* this assures flush cqes being generated only for pending wqes */
+ if ((my_qp->sq_map.left_to_poll == 0) &&
+ (my_qp->rq_map.left_to_poll == 0)) {
+ spin_lock_irqsave(&my_qp->send_cq->spinlock, flags);
+ ehca_add_to_err_list(my_qp, 1);
+ spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags);
+
+ if (HAS_RQ(my_qp)) {
+ spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags);
+ ehca_add_to_err_list(my_qp, 0);
+ spin_unlock_irqrestore(&my_qp->recv_cq->spinlock,
+ flags);
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * internal_modify_qp with circumvention to handle aqp0 properly
+ * smi_reset2init indicates if this is an internal reset-to-init-call for
+ * smi. This flag must always be zero if called from ehca_modify_qp()!
+ * This internal func was intorduced to avoid recursion of ehca_modify_qp()!
+ */
+static int internal_modify_qp(struct ib_qp *ibqp,
+ struct ib_qp_attr *attr,
+ int attr_mask, int smi_reset2init)
+{
+ enum ib_qp_state qp_cur_state, qp_new_state;
+ int cnt, qp_attr_idx, ret = 0;
+ enum ib_qp_statetrans statetrans;
+ struct hcp_modify_qp_control_block *mqpcb;
+ struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp);
+ struct ehca_shca *shca =
+ container_of(ibqp->pd->device, struct ehca_shca, ib_device);
+ u64 update_mask;
+ u64 h_ret;
+ int bad_wqe_cnt = 0;
+ int squeue_locked = 0;
+ unsigned long flags = 0;
+
+ /* do query_qp to obtain current attr values */
+ mqpcb = ehca_alloc_fw_ctrlblock(GFP_ATOMIC);
+ if (!mqpcb) {
+ ehca_err(ibqp->device, "Could not get zeroed page for mqpcb "
+ "ehca_qp=%p qp_num=%x ", my_qp, ibqp->qp_num);
+ return -ENOMEM;
+ }
+
+ h_ret = hipz_h_query_qp(shca->ipz_hca_handle,
+ my_qp->ipz_qp_handle,
+ &my_qp->pf,
+ mqpcb, my_qp->galpas.kernel);
+ if (h_ret != H_SUCCESS) {
+ ehca_err(ibqp->device, "hipz_h_query_qp() failed "
+ "ehca_qp=%p qp_num=%x h_ret=%li",
+ my_qp, ibqp->qp_num, h_ret);
+ ret = ehca2ib_return_code(h_ret);
+ goto modify_qp_exit1;
+ }
+
+ qp_cur_state = ehca2ib_qp_state(mqpcb->qp_state);
+
+ if (qp_cur_state == -EINVAL) { /* invalid qp state */
+ ret = -EINVAL;
+ ehca_err(ibqp->device, "Invalid current ehca_qp_state=%x "
+ "ehca_qp=%p qp_num=%x",
+ mqpcb->qp_state, my_qp, ibqp->qp_num);
+ goto modify_qp_exit1;
+ }
+ /*
+ * circumvention to set aqp0 initial state to init
+ * as expected by IB spec
+ */
+ if (smi_reset2init == 0 &&
+ ibqp->qp_type == IB_QPT_SMI &&
+ qp_cur_state == IB_QPS_RESET &&
+ (attr_mask & IB_QP_STATE) &&
+ attr->qp_state == IB_QPS_INIT) { /* RESET -> INIT */
+ struct ib_qp_attr smiqp_attr = {
+ .qp_state = IB_QPS_INIT,
+ .port_num = my_qp->init_attr.port_num,
+ .pkey_index = 0,
+ .qkey = 0
+ };
+ int smiqp_attr_mask = IB_QP_STATE | IB_QP_PORT |
+ IB_QP_PKEY_INDEX | IB_QP_QKEY;
+ int smirc = internal_modify_qp(
+ ibqp, &smiqp_attr, smiqp_attr_mask, 1);
+ if (smirc) {
+ ehca_err(ibqp->device, "SMI RESET -> INIT failed. "
+ "ehca_modify_qp() rc=%i", smirc);
+ ret = H_PARAMETER;
+ goto modify_qp_exit1;
+ }
+ qp_cur_state = IB_QPS_INIT;
+ ehca_dbg(ibqp->device, "SMI RESET -> INIT succeeded");
+ }
+ /* is transmitted current state equal to "real" current state */
+ if ((attr_mask & IB_QP_CUR_STATE) &&
+ qp_cur_state != attr->cur_qp_state) {
+ ret = -EINVAL;
+ ehca_err(ibqp->device,
+ "Invalid IB_QP_CUR_STATE attr->curr_qp_state=%x <>"
+ " actual cur_qp_state=%x. ehca_qp=%p qp_num=%x",
+ attr->cur_qp_state, qp_cur_state, my_qp, ibqp->qp_num);
+ goto modify_qp_exit1;
+ }
+
+ ehca_dbg(ibqp->device, "ehca_qp=%p qp_num=%x current qp_state=%x "
+ "new qp_state=%x attribute_mask=%x",
+ my_qp, ibqp->qp_num, qp_cur_state, attr->qp_state, attr_mask);
+
+ qp_new_state = attr_mask & IB_QP_STATE ? attr->qp_state : qp_cur_state;
+ if (!smi_reset2init &&
+ !ib_modify_qp_is_ok(qp_cur_state, qp_new_state, ibqp->qp_type,
+ attr_mask)) {
+ ret = -EINVAL;
+ ehca_err(ibqp->device,
+ "Invalid qp transition new_state=%x cur_state=%x "
+ "ehca_qp=%p qp_num=%x attr_mask=%x", qp_new_state,
+ qp_cur_state, my_qp, ibqp->qp_num, attr_mask);
+ goto modify_qp_exit1;
+ }
+
+ mqpcb->qp_state = ib2ehca_qp_state(qp_new_state);
+ if (mqpcb->qp_state)
+ update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_STATE, 1);
+ else {
+ ret = -EINVAL;
+ ehca_err(ibqp->device, "Invalid new qp state=%x "
+ "ehca_qp=%p qp_num=%x",
+ qp_new_state, my_qp, ibqp->qp_num);
+ goto modify_qp_exit1;
+ }
+
+ /* retrieve state transition struct to get req and opt attrs */
+ statetrans = get_modqp_statetrans(qp_cur_state, qp_new_state);
+ if (statetrans < 0) {
+ ret = -EINVAL;
+ ehca_err(ibqp->device, "<INVALID STATE CHANGE> qp_cur_state=%x "
+ "new_qp_state=%x State_xsition=%x ehca_qp=%p "
+ "qp_num=%x", qp_cur_state, qp_new_state,
+ statetrans, my_qp, ibqp->qp_num);
+ goto modify_qp_exit1;
+ }
+
+ qp_attr_idx = ib2ehcaqptype(ibqp->qp_type);
+
+ if (qp_attr_idx < 0) {
+ ret = qp_attr_idx;
+ ehca_err(ibqp->device,
+ "Invalid QP type=%x ehca_qp=%p qp_num=%x",
+ ibqp->qp_type, my_qp, ibqp->qp_num);
+ goto modify_qp_exit1;
+ }
+
+ ehca_dbg(ibqp->device,
+ "ehca_qp=%p qp_num=%x <VALID STATE CHANGE> qp_state_xsit=%x",
+ my_qp, ibqp->qp_num, statetrans);
+
+ /* eHCA2 rev2 and higher require the SEND_GRH_FLAG to be set
+ * in non-LL UD QPs.
+ */
+ if ((my_qp->qp_type == IB_QPT_UD) &&
+ (my_qp->ext_type != EQPT_LLQP) &&
+ (statetrans == IB_QPST_INIT2RTR) &&
+ (shca->hw_level >= 0x22)) {
+ update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG, 1);
+ mqpcb->send_grh_flag = 1;
+ }
+
+ /* sqe -> rts: set purge bit of bad wqe before actual trans */
+ if ((my_qp->qp_type == IB_QPT_UD ||
+ my_qp->qp_type == IB_QPT_GSI ||
+ my_qp->qp_type == IB_QPT_SMI) &&
+ statetrans == IB_QPST_SQE2RTS) {
+ /* mark next free wqe if kernel */
+ if (!ibqp->uobject) {
+ struct ehca_wqe *wqe;
+ /* lock send queue */
+ spin_lock_irqsave(&my_qp->spinlock_s, flags);
+ squeue_locked = 1;
+ /* mark next free wqe */
+ wqe = (struct ehca_wqe *)
+ ipz_qeit_get(&my_qp->ipz_squeue);
+ wqe->optype = wqe->wqef = 0xff;
+ ehca_dbg(ibqp->device, "qp_num=%x next_free_wqe=%p",
+ ibqp->qp_num, wqe);
+ }
+ ret = prepare_sqe_rts(my_qp, shca, &bad_wqe_cnt);
+ if (ret) {
+ ehca_err(ibqp->device, "prepare_sqe_rts() failed "
+ "ehca_qp=%p qp_num=%x ret=%i",
+ my_qp, ibqp->qp_num, ret);
+ goto modify_qp_exit2;
+ }
+ }
+
+ /*
+ * enable RDMA_Atomic_Control if reset->init und reliable con
+ * this is necessary since gen2 does not provide that flag,
+ * but pHyp requires it
+ */
+ if (statetrans == IB_QPST_RESET2INIT &&
+ (ibqp->qp_type == IB_QPT_RC || ibqp->qp_type == IB_QPT_UC)) {
+ mqpcb->rdma_atomic_ctrl = 3;
+ update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RDMA_ATOMIC_CTRL, 1);
+ }
+ /* circ. pHyp requires #RDMA/Atomic Resp Res for UC INIT -> RTR */
+ if (statetrans == IB_QPST_INIT2RTR &&
+ (ibqp->qp_type == IB_QPT_UC) &&
+ !(attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)) {
+ mqpcb->rdma_nr_atomic_resp_res = 1; /* default to 1 */
+ update_mask |=
+ EHCA_BMASK_SET(MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES, 1);
+ }
+
+ if (attr_mask & IB_QP_PKEY_INDEX) {
+ if (attr->pkey_index >= 16) {
+ ret = -EINVAL;
+ ehca_err(ibqp->device, "Invalid pkey_index=%x. "
+ "ehca_qp=%p qp_num=%x max_pkey_index=f",
+ attr->pkey_index, my_qp, ibqp->qp_num);
+ goto modify_qp_exit2;
+ }
+ mqpcb->prim_p_key_idx = attr->pkey_index;
+ update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PRIM_P_KEY_IDX, 1);
+ }
+ if (attr_mask & IB_QP_PORT) {
+ struct ehca_sport *sport;
+ struct ehca_qp *aqp1;
+ if (attr->port_num < 1 || attr->port_num > shca->num_ports) {
+ ret = -EINVAL;
+ ehca_err(ibqp->device, "Invalid port=%x. "
+ "ehca_qp=%p qp_num=%x num_ports=%x",
+ attr->port_num, my_qp, ibqp->qp_num,
+ shca->num_ports);
+ goto modify_qp_exit2;
+ }
+ sport = &shca->sport[attr->port_num - 1];
+ if (!sport->ibqp_sqp[IB_QPT_GSI]) {
+ /* should not occur */
+ ret = -EFAULT;
+ ehca_err(ibqp->device, "AQP1 was not created for "
+ "port=%x", attr->port_num);
+ goto modify_qp_exit2;
+ }
+ aqp1 = container_of(sport->ibqp_sqp[IB_QPT_GSI],
+ struct ehca_qp, ib_qp);
+ if (ibqp->qp_type != IB_QPT_GSI &&
+ ibqp->qp_type != IB_QPT_SMI &&
+ aqp1->mod_qp_parm) {
+ /*
+ * firmware will reject this modify_qp() because
+ * port is not activated/initialized fully
+ */
+ ret = -EFAULT;
+ ehca_warn(ibqp->device, "Couldn't modify qp port=%x: "
+ "either port is being activated (try again) "
+ "or cabling issue", attr->port_num);
+ goto modify_qp_exit2;
+ }
+ mqpcb->prim_phys_port = attr->port_num;
+ update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PRIM_PHYS_PORT, 1);
+ }
+ if (attr_mask & IB_QP_QKEY) {
+ mqpcb->qkey = attr->qkey;
+ update_mask |= EHCA_BMASK_SET(MQPCB_MASK_QKEY, 1);
+ }
+ if (attr_mask & IB_QP_AV) {
+ mqpcb->dlid = attr->ah_attr.dlid;
+ update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DLID, 1);
+ mqpcb->source_path_bits = attr->ah_attr.src_path_bits;
+ update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SOURCE_PATH_BITS, 1);
+ mqpcb->service_level = attr->ah_attr.sl;
+ update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SERVICE_LEVEL, 1);
+
+ if (ehca_calc_ipd(shca, mqpcb->prim_phys_port,
+ attr->ah_attr.static_rate,
+ &mqpcb->max_static_rate)) {
+ ret = -EINVAL;
+ goto modify_qp_exit2;
+ }
+ update_mask |= EHCA_BMASK_SET(MQPCB_MASK_MAX_STATIC_RATE, 1);
+
+ /*
+ * Always supply the GRH flag, even if it's zero, to give the
+ * hypervisor a clear "yes" or "no" instead of a "perhaps"
+ */
+ update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG, 1);
+
+ /*
+ * only if GRH is TRUE we might consider SOURCE_GID_IDX
+ * and DEST_GID otherwise phype will return H_ATTR_PARM!!!
+ */
+ if (attr->ah_attr.ah_flags == IB_AH_GRH) {
+ mqpcb->send_grh_flag = 1;
+
+ mqpcb->source_gid_idx = attr->ah_attr.grh.sgid_index;
+ update_mask |=
+ EHCA_BMASK_SET(MQPCB_MASK_SOURCE_GID_IDX, 1);
+
+ for (cnt = 0; cnt < 16; cnt++)
+ mqpcb->dest_gid.byte[cnt] =
+ attr->ah_attr.grh.dgid.raw[cnt];
+
+ update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DEST_GID, 1);
+ mqpcb->flow_label = attr->ah_attr.grh.flow_label;
+ update_mask |= EHCA_BMASK_SET(MQPCB_MASK_FLOW_LABEL, 1);
+ mqpcb->hop_limit = attr->ah_attr.grh.hop_limit;
+ update_mask |= EHCA_BMASK_SET(MQPCB_MASK_HOP_LIMIT, 1);
+ mqpcb->traffic_class = attr->ah_attr.grh.traffic_class;
+ update_mask |=
+ EHCA_BMASK_SET(MQPCB_MASK_TRAFFIC_CLASS, 1);
+ }
+ }
+
+ if (attr_mask & IB_QP_PATH_MTU) {
+ /* store ld(MTU) */
+ my_qp->mtu_shift = attr->path_mtu + 7;
+ mqpcb->path_mtu = attr->path_mtu;
+ update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PATH_MTU, 1);
+ }
+ if (attr_mask & IB_QP_TIMEOUT) {
+ mqpcb->timeout = attr->timeout;
+ update_mask |= EHCA_BMASK_SET(MQPCB_MASK_TIMEOUT, 1);
+ }
+ if (attr_mask & IB_QP_RETRY_CNT) {
+ mqpcb->retry_count = attr->retry_cnt;
+ update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RETRY_COUNT, 1);
+ }
+ if (attr_mask & IB_QP_RNR_RETRY) {
+ mqpcb->rnr_retry_count = attr->rnr_retry;
+ update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RNR_RETRY_COUNT, 1);
+ }
+ if (attr_mask & IB_QP_RQ_PSN) {
+ mqpcb->receive_psn = attr->rq_psn;
+ update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RECEIVE_PSN, 1);
+ }
+ if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
+ mqpcb->rdma_nr_atomic_resp_res = attr->max_dest_rd_atomic < 3 ?
+ attr->max_dest_rd_atomic : 2;
+ update_mask |=
+ EHCA_BMASK_SET(MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES, 1);
+ }
+ if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
+ mqpcb->rdma_atomic_outst_dest_qp = attr->max_rd_atomic < 3 ?
+ attr->max_rd_atomic : 2;
+ update_mask |=
+ EHCA_BMASK_SET
+ (MQPCB_MASK_RDMA_ATOMIC_OUTST_DEST_QP, 1);
+ }
+ if (attr_mask & IB_QP_ALT_PATH) {
+ if (attr->alt_port_num < 1
+ || attr->alt_port_num > shca->num_ports) {
+ ret = -EINVAL;
+ ehca_err(ibqp->device, "Invalid alt_port=%x. "
+ "ehca_qp=%p qp_num=%x num_ports=%x",
+ attr->alt_port_num, my_qp, ibqp->qp_num,
+ shca->num_ports);
+ goto modify_qp_exit2;
+ }
+ mqpcb->alt_phys_port = attr->alt_port_num;
+
+ if (attr->alt_pkey_index >= 16) {
+ ret = -EINVAL;
+ ehca_err(ibqp->device, "Invalid alt_pkey_index=%x. "
+ "ehca_qp=%p qp_num=%x max_pkey_index=f",
+ attr->pkey_index, my_qp, ibqp->qp_num);
+ goto modify_qp_exit2;
+ }
+ mqpcb->alt_p_key_idx = attr->alt_pkey_index;
+
+ mqpcb->timeout_al = attr->alt_timeout;
+ mqpcb->dlid_al = attr->alt_ah_attr.dlid;
+ mqpcb->source_path_bits_al = attr->alt_ah_attr.src_path_bits;
+ mqpcb->service_level_al = attr->alt_ah_attr.sl;
+
+ if (ehca_calc_ipd(shca, mqpcb->alt_phys_port,
+ attr->alt_ah_attr.static_rate,
+ &mqpcb->max_static_rate_al)) {
+ ret = -EINVAL;
+ goto modify_qp_exit2;
+ }
+
+ /* OpenIB doesn't support alternate retry counts - copy them */
+ mqpcb->retry_count_al = mqpcb->retry_count;
+ mqpcb->rnr_retry_count_al = mqpcb->rnr_retry_count;
+
+ update_mask |= EHCA_BMASK_SET(MQPCB_MASK_ALT_PHYS_PORT, 1)
+ | EHCA_BMASK_SET(MQPCB_MASK_ALT_P_KEY_IDX, 1)
+ | EHCA_BMASK_SET(MQPCB_MASK_TIMEOUT_AL, 1)
+ | EHCA_BMASK_SET(MQPCB_MASK_DLID_AL, 1)
+ | EHCA_BMASK_SET(MQPCB_MASK_SOURCE_PATH_BITS_AL, 1)
+ | EHCA_BMASK_SET(MQPCB_MASK_SERVICE_LEVEL_AL, 1)
+ | EHCA_BMASK_SET(MQPCB_MASK_MAX_STATIC_RATE_AL, 1)
+ | EHCA_BMASK_SET(MQPCB_MASK_RETRY_COUNT_AL, 1)
+ | EHCA_BMASK_SET(MQPCB_MASK_RNR_RETRY_COUNT_AL, 1);
+
+ /*
+ * Always supply the GRH flag, even if it's zero, to give the
+ * hypervisor a clear "yes" or "no" instead of a "perhaps"
+ */
+ update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG_AL, 1);
+
+ /*
+ * only if GRH is TRUE we might consider SOURCE_GID_IDX
+ * and DEST_GID otherwise phype will return H_ATTR_PARM!!!
+ */
+ if (attr->alt_ah_attr.ah_flags == IB_AH_GRH) {
+ mqpcb->send_grh_flag_al = 1;
+
+ for (cnt = 0; cnt < 16; cnt++)
+ mqpcb->dest_gid_al.byte[cnt] =
+ attr->alt_ah_attr.grh.dgid.raw[cnt];
+ mqpcb->source_gid_idx_al =
+ attr->alt_ah_attr.grh.sgid_index;
+ mqpcb->flow_label_al = attr->alt_ah_attr.grh.flow_label;
+ mqpcb->hop_limit_al = attr->alt_ah_attr.grh.hop_limit;
+ mqpcb->traffic_class_al =
+ attr->alt_ah_attr.grh.traffic_class;
+
+ update_mask |=
+ EHCA_BMASK_SET(MQPCB_MASK_SOURCE_GID_IDX_AL, 1)
+ | EHCA_BMASK_SET(MQPCB_MASK_DEST_GID_AL, 1)
+ | EHCA_BMASK_SET(MQPCB_MASK_FLOW_LABEL_AL, 1)
+ | EHCA_BMASK_SET(MQPCB_MASK_HOP_LIMIT_AL, 1) |
+ EHCA_BMASK_SET(MQPCB_MASK_TRAFFIC_CLASS_AL, 1);
+ }
+ }
+
+ if (attr_mask & IB_QP_MIN_RNR_TIMER) {
+ mqpcb->min_rnr_nak_timer_field = attr->min_rnr_timer;
+ update_mask |=
+ EHCA_BMASK_SET(MQPCB_MASK_MIN_RNR_NAK_TIMER_FIELD, 1);
+ }
+
+ if (attr_mask & IB_QP_SQ_PSN) {
+ mqpcb->send_psn = attr->sq_psn;
+ update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_PSN, 1);
+ }
+
+ if (attr_mask & IB_QP_DEST_QPN) {
+ mqpcb->dest_qp_nr = attr->dest_qp_num;
+ update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DEST_QP_NR, 1);
+ }
+
+ if (attr_mask & IB_QP_PATH_MIG_STATE) {
+ if (attr->path_mig_state != IB_MIG_REARM
+ && attr->path_mig_state != IB_MIG_MIGRATED) {
+ ret = -EINVAL;
+ ehca_err(ibqp->device, "Invalid mig_state=%x",
+ attr->path_mig_state);
+ goto modify_qp_exit2;
+ }
+ mqpcb->path_migration_state = attr->path_mig_state + 1;
+ if (attr->path_mig_state == IB_MIG_REARM)
+ my_qp->mig_armed = 1;
+ update_mask |=
+ EHCA_BMASK_SET(MQPCB_MASK_PATH_MIGRATION_STATE, 1);
+ }
+
+ if (attr_mask & IB_QP_CAP) {
+ mqpcb->max_nr_outst_send_wr = attr->cap.max_send_wr+1;
+ update_mask |=
+ EHCA_BMASK_SET(MQPCB_MASK_MAX_NR_OUTST_SEND_WR, 1);
+ mqpcb->max_nr_outst_recv_wr = attr->cap.max_recv_wr+1;
+ update_mask |=
+ EHCA_BMASK_SET(MQPCB_MASK_MAX_NR_OUTST_RECV_WR, 1);
+ /* no support for max_send/recv_sge yet */
+ }
+
+ if (ehca_debug_level >= 2)
+ ehca_dmp(mqpcb, 4*70, "qp_num=%x", ibqp->qp_num);
+
+ h_ret = hipz_h_modify_qp(shca->ipz_hca_handle,
+ my_qp->ipz_qp_handle,
+ &my_qp->pf,
+ update_mask,
+ mqpcb, my_qp->galpas.kernel);
+
+ if (h_ret != H_SUCCESS) {
+ ret = ehca2ib_return_code(h_ret);
+ ehca_err(ibqp->device, "hipz_h_modify_qp() failed h_ret=%li "
+ "ehca_qp=%p qp_num=%x", h_ret, my_qp, ibqp->qp_num);
+ goto modify_qp_exit2;
+ }
+
+ if ((my_qp->qp_type == IB_QPT_UD ||
+ my_qp->qp_type == IB_QPT_GSI ||
+ my_qp->qp_type == IB_QPT_SMI) &&
+ statetrans == IB_QPST_SQE2RTS) {
+ /* doorbell to reprocessing wqes */
+ iosync(); /* serialize GAL register access */
+ hipz_update_sqa(my_qp, bad_wqe_cnt-1);
+ ehca_gen_dbg("doorbell for %x wqes", bad_wqe_cnt);
+ }
+
+ if (statetrans == IB_QPST_RESET2INIT ||
+ statetrans == IB_QPST_INIT2INIT) {
+ mqpcb->qp_enable = 1;
+ mqpcb->qp_state = EHCA_QPS_INIT;
+ update_mask = 0;
+ update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_ENABLE, 1);
+
+ h_ret = hipz_h_modify_qp(shca->ipz_hca_handle,
+ my_qp->ipz_qp_handle,
+ &my_qp->pf,
+ update_mask,
+ mqpcb,
+ my_qp->galpas.kernel);
+
+ if (h_ret != H_SUCCESS) {
+ ret = ehca2ib_return_code(h_ret);
+ ehca_err(ibqp->device, "ENABLE in context of "
+ "RESET_2_INIT failed! Maybe you didn't get "
+ "a LID h_ret=%li ehca_qp=%p qp_num=%x",
+ h_ret, my_qp, ibqp->qp_num);
+ goto modify_qp_exit2;
+ }
+ }
+ if ((qp_new_state == IB_QPS_ERR) && (qp_cur_state != IB_QPS_ERR)) {
+ ret = check_for_left_cqes(my_qp, shca);
+ if (ret)
+ goto modify_qp_exit2;
+ }
+
+ if (statetrans == IB_QPST_ANY2RESET) {
+ ipz_qeit_reset(&my_qp->ipz_rqueue);
+ ipz_qeit_reset(&my_qp->ipz_squeue);
+
+ if (qp_cur_state == IB_QPS_ERR) {
+ del_from_err_list(my_qp->send_cq, &my_qp->sq_err_node);
+
+ if (HAS_RQ(my_qp))
+ del_from_err_list(my_qp->recv_cq,
+ &my_qp->rq_err_node);
+ }
+ reset_queue_map(&my_qp->sq_map);
+
+ if (HAS_RQ(my_qp))
+ reset_queue_map(&my_qp->rq_map);
+ }
+
+ if (attr_mask & IB_QP_QKEY)
+ my_qp->qkey = attr->qkey;
+
+modify_qp_exit2:
+ if (squeue_locked) { /* this means: sqe -> rts */
+ spin_unlock_irqrestore(&my_qp->spinlock_s, flags);
+ my_qp->sqerr_purgeflag = 1;
+ }
+
+modify_qp_exit1:
+ ehca_free_fw_ctrlblock(mqpcb);
+
+ return ret;
+}
+
+int ehca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
+ struct ib_udata *udata)
+{
+ int ret = 0;
+
+ struct ehca_shca *shca = container_of(ibqp->device, struct ehca_shca,
+ ib_device);
+ struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp);
+
+ /* The if-block below caches qp_attr to be modified for GSI and SMI
+ * qps during the initialization by ib_mad. When the respective port
+ * is activated, ie we got an event PORT_ACTIVE, we'll replay the
+ * cached modify calls sequence, see ehca_recover_sqs() below.
+ * Why that is required:
+ * 1) If one port is connected, older code requires that port one
+ * to be connected and module option nr_ports=1 to be given by
+ * user, which is very inconvenient for end user.
+ * 2) Firmware accepts modify_qp() only if respective port has become
+ * active. Older code had a wait loop of 30sec create_qp()/
+ * define_aqp1(), which is not appropriate in practice. This
+ * code now removes that wait loop, see define_aqp1(), and always
+ * reports all ports to ib_mad resp. users. Only activated ports
+ * will then usable for the users.
+ */
+ if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI) {
+ int port = my_qp->init_attr.port_num;
+ struct ehca_sport *sport = &shca->sport[port - 1];
+ unsigned long flags;
+ spin_lock_irqsave(&sport->mod_sqp_lock, flags);
+ /* cache qp_attr only during init */
+ if (my_qp->mod_qp_parm) {
+ struct ehca_mod_qp_parm *p;
+ if (my_qp->mod_qp_parm_idx >= EHCA_MOD_QP_PARM_MAX) {
+ ehca_err(&shca->ib_device,
+ "mod_qp_parm overflow state=%x port=%x"
+ " type=%x", attr->qp_state,
+ my_qp->init_attr.port_num,
+ ibqp->qp_type);
+ spin_unlock_irqrestore(&sport->mod_sqp_lock,
+ flags);
+ return -EINVAL;
+ }
+ p = &my_qp->mod_qp_parm[my_qp->mod_qp_parm_idx];
+ p->mask = attr_mask;
+ p->attr = *attr;
+ my_qp->mod_qp_parm_idx++;
+ ehca_dbg(&shca->ib_device,
+ "Saved qp_attr for state=%x port=%x type=%x",
+ attr->qp_state, my_qp->init_attr.port_num,
+ ibqp->qp_type);
+ spin_unlock_irqrestore(&sport->mod_sqp_lock, flags);
+ goto out;
+ }
+ spin_unlock_irqrestore(&sport->mod_sqp_lock, flags);
+ }
+
+ ret = internal_modify_qp(ibqp, attr, attr_mask, 0);
+
+out:
+ if ((ret == 0) && (attr_mask & IB_QP_STATE))
+ my_qp->state = attr->qp_state;
+
+ return ret;
+}
+
+void ehca_recover_sqp(struct ib_qp *sqp)
+{
+ struct ehca_qp *my_sqp = container_of(sqp, struct ehca_qp, ib_qp);
+ int port = my_sqp->init_attr.port_num;
+ struct ib_qp_attr attr;
+ struct ehca_mod_qp_parm *qp_parm;
+ int i, qp_parm_idx, ret;
+ unsigned long flags, wr_cnt;
+
+ if (!my_sqp->mod_qp_parm)
+ return;
+ ehca_dbg(sqp->device, "SQP port=%x qp_num=%x", port, sqp->qp_num);
+
+ qp_parm = my_sqp->mod_qp_parm;
+ qp_parm_idx = my_sqp->mod_qp_parm_idx;
+ for (i = 0; i < qp_parm_idx; i++) {
+ attr = qp_parm[i].attr;
+ ret = internal_modify_qp(sqp, &attr, qp_parm[i].mask, 0);
+ if (ret) {
+ ehca_err(sqp->device, "Could not modify SQP port=%x "
+ "qp_num=%x ret=%x", port, sqp->qp_num, ret);
+ goto free_qp_parm;
+ }
+ ehca_dbg(sqp->device, "SQP port=%x qp_num=%x in state=%x",
+ port, sqp->qp_num, attr.qp_state);
+ }
+
+ /* re-trigger posted recv wrs */
+ wr_cnt = my_sqp->ipz_rqueue.current_q_offset /
+ my_sqp->ipz_rqueue.qe_size;
+ if (wr_cnt) {
+ spin_lock_irqsave(&my_sqp->spinlock_r, flags);
+ hipz_update_rqa(my_sqp, wr_cnt);
+ spin_unlock_irqrestore(&my_sqp->spinlock_r, flags);
+ ehca_dbg(sqp->device, "doorbell port=%x qp_num=%x wr_cnt=%lx",
+ port, sqp->qp_num, wr_cnt);
+ }
+
+free_qp_parm:
+ kfree(qp_parm);
+ /* this prevents subsequent calls to modify_qp() to cache qp_attr */
+ my_sqp->mod_qp_parm = NULL;
+}
+
+int ehca_query_qp(struct ib_qp *qp,
+ struct ib_qp_attr *qp_attr,
+ int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
+{
+ struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp);
+ struct ehca_shca *shca = container_of(qp->device, struct ehca_shca,
+ ib_device);
+ struct ipz_adapter_handle adapter_handle = shca->ipz_hca_handle;
+ struct hcp_modify_qp_control_block *qpcb;
+ int cnt, ret = 0;
+ u64 h_ret;
+
+ if (qp_attr_mask & QP_ATTR_QUERY_NOT_SUPPORTED) {
+ ehca_err(qp->device, "Invalid attribute mask "
+ "ehca_qp=%p qp_num=%x qp_attr_mask=%x ",
+ my_qp, qp->qp_num, qp_attr_mask);
+ return -EINVAL;
+ }
+
+ qpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
+ if (!qpcb) {
+ ehca_err(qp->device, "Out of memory for qpcb "
+ "ehca_qp=%p qp_num=%x", my_qp, qp->qp_num);
+ return -ENOMEM;
+ }
+
+ h_ret = hipz_h_query_qp(adapter_handle,
+ my_qp->ipz_qp_handle,
+ &my_qp->pf,
+ qpcb, my_qp->galpas.kernel);
+
+ if (h_ret != H_SUCCESS) {
+ ret = ehca2ib_return_code(h_ret);
+ ehca_err(qp->device, "hipz_h_query_qp() failed "
+ "ehca_qp=%p qp_num=%x h_ret=%li",
+ my_qp, qp->qp_num, h_ret);
+ goto query_qp_exit1;
+ }
+
+ qp_attr->cur_qp_state = ehca2ib_qp_state(qpcb->qp_state);
+ qp_attr->qp_state = qp_attr->cur_qp_state;
+
+ if (qp_attr->cur_qp_state == -EINVAL) {
+ ret = -EINVAL;
+ ehca_err(qp->device, "Got invalid ehca_qp_state=%x "
+ "ehca_qp=%p qp_num=%x",
+ qpcb->qp_state, my_qp, qp->qp_num);
+ goto query_qp_exit1;
+ }
+
+ if (qp_attr->qp_state == IB_QPS_SQD)
+ qp_attr->sq_draining = 1;
+
+ qp_attr->qkey = qpcb->qkey;
+ qp_attr->path_mtu = qpcb->path_mtu;
+ qp_attr->path_mig_state = qpcb->path_migration_state - 1;
+ qp_attr->rq_psn = qpcb->receive_psn;
+ qp_attr->sq_psn = qpcb->send_psn;
+ qp_attr->min_rnr_timer = qpcb->min_rnr_nak_timer_field;
+ qp_attr->cap.max_send_wr = qpcb->max_nr_outst_send_wr-1;
+ qp_attr->cap.max_recv_wr = qpcb->max_nr_outst_recv_wr-1;
+ /* UD_AV CIRCUMVENTION */
+ if (my_qp->qp_type == IB_QPT_UD) {
+ qp_attr->cap.max_send_sge =
+ qpcb->actual_nr_sges_in_sq_wqe - 2;
+ qp_attr->cap.max_recv_sge =
+ qpcb->actual_nr_sges_in_rq_wqe - 2;
+ } else {
+ qp_attr->cap.max_send_sge =
+ qpcb->actual_nr_sges_in_sq_wqe;
+ qp_attr->cap.max_recv_sge =
+ qpcb->actual_nr_sges_in_rq_wqe;
+ }
+
+ qp_attr->cap.max_inline_data = my_qp->sq_max_inline_data_size;
+ qp_attr->dest_qp_num = qpcb->dest_qp_nr;
+
+ qp_attr->pkey_index =
+ EHCA_BMASK_GET(MQPCB_PRIM_P_KEY_IDX, qpcb->prim_p_key_idx);
+
+ qp_attr->port_num =
+ EHCA_BMASK_GET(MQPCB_PRIM_PHYS_PORT, qpcb->prim_phys_port);
+
+ qp_attr->timeout = qpcb->timeout;
+ qp_attr->retry_cnt = qpcb->retry_count;
+ qp_attr->rnr_retry = qpcb->rnr_retry_count;
+
+ qp_attr->alt_pkey_index =
+ EHCA_BMASK_GET(MQPCB_PRIM_P_KEY_IDX, qpcb->alt_p_key_idx);
+
+ qp_attr->alt_port_num = qpcb->alt_phys_port;
+ qp_attr->alt_timeout = qpcb->timeout_al;
+
+ qp_attr->max_dest_rd_atomic = qpcb->rdma_nr_atomic_resp_res;
+ qp_attr->max_rd_atomic = qpcb->rdma_atomic_outst_dest_qp;
+
+ /* primary av */
+ qp_attr->ah_attr.sl = qpcb->service_level;
+
+ if (qpcb->send_grh_flag) {
+ qp_attr->ah_attr.ah_flags = IB_AH_GRH;
+ }
+
+ qp_attr->ah_attr.static_rate = qpcb->max_static_rate;
+ qp_attr->ah_attr.dlid = qpcb->dlid;
+ qp_attr->ah_attr.src_path_bits = qpcb->source_path_bits;
+ qp_attr->ah_attr.port_num = qp_attr->port_num;
+
+ /* primary GRH */
+ qp_attr->ah_attr.grh.traffic_class = qpcb->traffic_class;
+ qp_attr->ah_attr.grh.hop_limit = qpcb->hop_limit;
+ qp_attr->ah_attr.grh.sgid_index = qpcb->source_gid_idx;
+ qp_attr->ah_attr.grh.flow_label = qpcb->flow_label;
+
+ for (cnt = 0; cnt < 16; cnt++)
+ qp_attr->ah_attr.grh.dgid.raw[cnt] =
+ qpcb->dest_gid.byte[cnt];
+
+ /* alternate AV */
+ qp_attr->alt_ah_attr.sl = qpcb->service_level_al;
+ if (qpcb->send_grh_flag_al) {
+ qp_attr->alt_ah_attr.ah_flags = IB_AH_GRH;
+ }
+
+ qp_attr->alt_ah_attr.static_rate = qpcb->max_static_rate_al;
+ qp_attr->alt_ah_attr.dlid = qpcb->dlid_al;
+ qp_attr->alt_ah_attr.src_path_bits = qpcb->source_path_bits_al;
+
+ /* alternate GRH */
+ qp_attr->alt_ah_attr.grh.traffic_class = qpcb->traffic_class_al;
+ qp_attr->alt_ah_attr.grh.hop_limit = qpcb->hop_limit_al;
+ qp_attr->alt_ah_attr.grh.sgid_index = qpcb->source_gid_idx_al;
+ qp_attr->alt_ah_attr.grh.flow_label = qpcb->flow_label_al;
+
+ for (cnt = 0; cnt < 16; cnt++)
+ qp_attr->alt_ah_attr.grh.dgid.raw[cnt] =
+ qpcb->dest_gid_al.byte[cnt];
+
+ /* return init attributes given in ehca_create_qp */
+ if (qp_init_attr)
+ *qp_init_attr = my_qp->init_attr;
+
+ if (ehca_debug_level >= 2)
+ ehca_dmp(qpcb, 4*70, "qp_num=%x", qp->qp_num);
+
+query_qp_exit1:
+ ehca_free_fw_ctrlblock(qpcb);
+
+ return ret;
+}
+
+int ehca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
+ enum ib_srq_attr_mask attr_mask, struct ib_udata *udata)
+{
+ struct ehca_qp *my_qp =
+ container_of(ibsrq, struct ehca_qp, ib_srq);
+ struct ehca_shca *shca =
+ container_of(ibsrq->pd->device, struct ehca_shca, ib_device);
+ struct hcp_modify_qp_control_block *mqpcb;
+ u64 update_mask;
+ u64 h_ret;
+ int ret = 0;
+
+ mqpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
+ if (!mqpcb) {
+ ehca_err(ibsrq->device, "Could not get zeroed page for mqpcb "
+ "ehca_qp=%p qp_num=%x ", my_qp, my_qp->real_qp_num);
+ return -ENOMEM;
+ }
+
+ update_mask = 0;
+ if (attr_mask & IB_SRQ_LIMIT) {
+ attr_mask &= ~IB_SRQ_LIMIT;
+ update_mask |=
+ EHCA_BMASK_SET(MQPCB_MASK_CURR_SRQ_LIMIT, 1)
+ | EHCA_BMASK_SET(MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG, 1);
+ mqpcb->curr_srq_limit =
+ EHCA_BMASK_SET(MQPCB_CURR_SRQ_LIMIT, attr->srq_limit);
+ mqpcb->qp_aff_asyn_ev_log_reg =
+ EHCA_BMASK_SET(QPX_AAELOG_RESET_SRQ_LIMIT, 1);
+ }
+
+ /* by now, all bits in attr_mask should have been cleared */
+ if (attr_mask) {
+ ehca_err(ibsrq->device, "invalid attribute mask bits set "
+ "attr_mask=%x", attr_mask);
+ ret = -EINVAL;
+ goto modify_srq_exit0;
+ }
+
+ if (ehca_debug_level >= 2)
+ ehca_dmp(mqpcb, 4*70, "qp_num=%x", my_qp->real_qp_num);
+
+ h_ret = hipz_h_modify_qp(shca->ipz_hca_handle, my_qp->ipz_qp_handle,
+ NULL, update_mask, mqpcb,
+ my_qp->galpas.kernel);
+
+ if (h_ret != H_SUCCESS) {
+ ret = ehca2ib_return_code(h_ret);
+ ehca_err(ibsrq->device, "hipz_h_modify_qp() failed h_ret=%li "
+ "ehca_qp=%p qp_num=%x",
+ h_ret, my_qp, my_qp->real_qp_num);
+ }
+
+modify_srq_exit0:
+ ehca_free_fw_ctrlblock(mqpcb);
+
+ return ret;
+}
+
+int ehca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr)
+{
+ struct ehca_qp *my_qp = container_of(srq, struct ehca_qp, ib_srq);
+ struct ehca_shca *shca = container_of(srq->device, struct ehca_shca,
+ ib_device);
+ struct ipz_adapter_handle adapter_handle = shca->ipz_hca_handle;
+ struct hcp_modify_qp_control_block *qpcb;
+ int ret = 0;
+ u64 h_ret;
+
+ qpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
+ if (!qpcb) {
+ ehca_err(srq->device, "Out of memory for qpcb "
+ "ehca_qp=%p qp_num=%x", my_qp, my_qp->real_qp_num);
+ return -ENOMEM;
+ }
+
+ h_ret = hipz_h_query_qp(adapter_handle, my_qp->ipz_qp_handle,
+ NULL, qpcb, my_qp->galpas.kernel);
+
+ if (h_ret != H_SUCCESS) {
+ ret = ehca2ib_return_code(h_ret);
+ ehca_err(srq->device, "hipz_h_query_qp() failed "
+ "ehca_qp=%p qp_num=%x h_ret=%li",
+ my_qp, my_qp->real_qp_num, h_ret);
+ goto query_srq_exit1;
+ }
+
+ srq_attr->max_wr = qpcb->max_nr_outst_recv_wr - 1;
+ srq_attr->max_sge = 3;
+ srq_attr->srq_limit = EHCA_BMASK_GET(
+ MQPCB_CURR_SRQ_LIMIT, qpcb->curr_srq_limit);
+
+ if (ehca_debug_level >= 2)
+ ehca_dmp(qpcb, 4*70, "qp_num=%x", my_qp->real_qp_num);
+
+query_srq_exit1:
+ ehca_free_fw_ctrlblock(qpcb);
+
+ return ret;
+}
+
+static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp,
+ struct ib_uobject *uobject)
+{
+ struct ehca_shca *shca = container_of(dev, struct ehca_shca, ib_device);
+ struct ehca_pd *my_pd = container_of(my_qp->ib_qp.pd, struct ehca_pd,
+ ib_pd);
+ struct ehca_sport *sport = &shca->sport[my_qp->init_attr.port_num - 1];
+ u32 qp_num = my_qp->real_qp_num;
+ int ret;
+ u64 h_ret;
+ u8 port_num;
+ enum ib_qp_type qp_type;
+ unsigned long flags;
+
+ if (uobject) {
+ if (my_qp->mm_count_galpa ||
+ my_qp->mm_count_rqueue || my_qp->mm_count_squeue) {
+ ehca_err(dev, "Resources still referenced in "
+ "user space qp_num=%x", qp_num);
+ return -EINVAL;
+ }
+ }
+
+ if (my_qp->send_cq) {
+ ret = ehca_cq_unassign_qp(my_qp->send_cq, qp_num);
+ if (ret) {
+ ehca_err(dev, "Couldn't unassign qp from "
+ "send_cq ret=%i qp_num=%x cq_num=%x", ret,
+ qp_num, my_qp->send_cq->cq_number);
+ return ret;
+ }
+ }
+
+ write_lock_irqsave(&ehca_qp_idr_lock, flags);
+ idr_remove(&ehca_qp_idr, my_qp->token);
+ write_unlock_irqrestore(&ehca_qp_idr_lock, flags);
+
+ /*
+ * SRQs will never get into an error list and do not have a recv_cq,
+ * so we need to skip them here.
+ */
+ if (HAS_RQ(my_qp) && !IS_SRQ(my_qp))
+ del_from_err_list(my_qp->recv_cq, &my_qp->rq_err_node);
+
+ if (HAS_SQ(my_qp))
+ del_from_err_list(my_qp->send_cq, &my_qp->sq_err_node);
+
+ /* now wait until all pending events have completed */
+ wait_event(my_qp->wait_completion, !atomic_read(&my_qp->nr_events));
+
+ h_ret = hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp);
+ if (h_ret != H_SUCCESS) {
+ ehca_err(dev, "hipz_h_destroy_qp() failed h_ret=%li "
+ "ehca_qp=%p qp_num=%x", h_ret, my_qp, qp_num);
+ return ehca2ib_return_code(h_ret);
+ }
+
+ port_num = my_qp->init_attr.port_num;
+ qp_type = my_qp->init_attr.qp_type;
+
+ if (qp_type == IB_QPT_SMI || qp_type == IB_QPT_GSI) {
+ spin_lock_irqsave(&sport->mod_sqp_lock, flags);
+ kfree(my_qp->mod_qp_parm);
+ my_qp->mod_qp_parm = NULL;
+ shca->sport[port_num - 1].ibqp_sqp[qp_type] = NULL;
+ spin_unlock_irqrestore(&sport->mod_sqp_lock, flags);
+ }
+
+ /* no support for IB_QPT_SMI yet */
+ if (qp_type == IB_QPT_GSI) {
+ struct ib_event event;
+ ehca_info(dev, "device %s: port %x is inactive.",
+ shca->ib_device.name, port_num);
+ event.device = &shca->ib_device;
+ event.event = IB_EVENT_PORT_ERR;
+ event.element.port_num = port_num;
+ shca->sport[port_num - 1].port_state = IB_PORT_DOWN;
+ ib_dispatch_event(&event);
+ }
+
+ if (HAS_RQ(my_qp)) {
+ ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue);
+
+ vfree(my_qp->rq_map.map);
+ }
+ if (HAS_SQ(my_qp)) {
+ ipz_queue_dtor(my_pd, &my_qp->ipz_squeue);
+
+ vfree(my_qp->sq_map.map);
+ }
+ kmem_cache_free(qp_cache, my_qp);
+ atomic_dec(&shca->num_qps);
+ return 0;
+}
+
+int ehca_destroy_qp(struct ib_qp *qp)
+{
+ return internal_destroy_qp(qp->device,
+ container_of(qp, struct ehca_qp, ib_qp),
+ qp->uobject);
+}
+
+int ehca_destroy_srq(struct ib_srq *srq)
+{
+ return internal_destroy_qp(srq->device,
+ container_of(srq, struct ehca_qp, ib_srq),
+ srq->uobject);
+}
+
+int ehca_init_qp_cache(void)
+{
+ qp_cache = kmem_cache_create("ehca_cache_qp",
+ sizeof(struct ehca_qp), 0,
+ SLAB_HWCACHE_ALIGN,
+ NULL);
+ if (!qp_cache)
+ return -ENOMEM;
+ return 0;
+}
+
+void ehca_cleanup_qp_cache(void)
+{
+ if (qp_cache)
+ kmem_cache_destroy(qp_cache);
+}
diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c
new file mode 100644
index 0000000..00a648f
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_reqs.c
@@ -0,0 +1,956 @@
+/*
+ * IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ * post_send/recv, poll_cq, req_notify
+ *
+ * Authors: Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ * Waleri Fomin <fomin@de.ibm.com>
+ * Joachim Fenkes <fenkes@de.ibm.com>
+ * Reinhard Ernst <rernst@de.ibm.com>
+ *
+ * Copyright (c) 2005 IBM Corporation
+ *
+ * All rights reserved.
+ *
+ * This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ * BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include <asm/system.h>
+#include "ehca_classes.h"
+#include "ehca_tools.h"
+#include "ehca_qes.h"
+#include "ehca_iverbs.h"
+#include "hcp_if.h"
+#include "hipz_fns.h"
+
+/* in RC traffic, insert an empty RDMA READ every this many packets */
+#define ACK_CIRC_THRESHOLD 2000000
+
+static u64 replace_wr_id(u64 wr_id, u16 idx)
+{
+ u64 ret;
+
+ ret = wr_id & ~QMAP_IDX_MASK;
+ ret |= idx & QMAP_IDX_MASK;
+
+ return ret;
+}
+
+static u16 get_app_wr_id(u64 wr_id)
+{
+ return wr_id & QMAP_IDX_MASK;
+}
+
+static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue,
+ struct ehca_wqe *wqe_p,
+ struct ib_recv_wr *recv_wr,
+ u32 rq_map_idx)
+{
+ u8 cnt_ds;
+ if (unlikely((recv_wr->num_sge < 0) ||
+ (recv_wr->num_sge > ipz_rqueue->act_nr_of_sg))) {
+ ehca_gen_err("Invalid number of WQE SGE. "
+ "num_sqe=%x max_nr_of_sg=%x",
+ recv_wr->num_sge, ipz_rqueue->act_nr_of_sg);
+ return -EINVAL; /* invalid SG list length */
+ }
+
+ /* clear wqe header until sglist */
+ memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list));
+
+ wqe_p->work_request_id = replace_wr_id(recv_wr->wr_id, rq_map_idx);
+ wqe_p->nr_of_data_seg = recv_wr->num_sge;
+
+ for (cnt_ds = 0; cnt_ds < recv_wr->num_sge; cnt_ds++) {
+ wqe_p->u.all_rcv.sg_list[cnt_ds].vaddr =
+ recv_wr->sg_list[cnt_ds].addr;
+ wqe_p->u.all_rcv.sg_list[cnt_ds].lkey =
+ recv_wr->sg_list[cnt_ds].lkey;
+ wqe_p->u.all_rcv.sg_list[cnt_ds].length =
+ recv_wr->sg_list[cnt_ds].length;
+ }
+
+ if (ehca_debug_level >= 3) {
+ ehca_gen_dbg("RECEIVE WQE written into ipz_rqueue=%p",
+ ipz_rqueue);
+ ehca_dmp(wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "recv wqe");
+ }
+
+ return 0;
+}
+
+#if defined(DEBUG_GSI_SEND_WR)
+
+/* need ib_mad struct */
+#include <rdma/ib_mad.h>
+
+static void trace_send_wr_ud(const struct ib_send_wr *send_wr)
+{
+ int idx;
+ int j;
+ while (send_wr) {
+ struct ib_mad_hdr *mad_hdr = send_wr->wr.ud.mad_hdr;
+ struct ib_sge *sge = send_wr->sg_list;
+ ehca_gen_dbg("send_wr#%x wr_id=%lx num_sge=%x "
+ "send_flags=%x opcode=%x", idx, send_wr->wr_id,
+ send_wr->num_sge, send_wr->send_flags,
+ send_wr->opcode);
+ if (mad_hdr) {
+ ehca_gen_dbg("send_wr#%x mad_hdr base_version=%x "
+ "mgmt_class=%x class_version=%x method=%x "
+ "status=%x class_specific=%x tid=%lx "
+ "attr_id=%x resv=%x attr_mod=%x",
+ idx, mad_hdr->base_version,
+ mad_hdr->mgmt_class,
+ mad_hdr->class_version, mad_hdr->method,
+ mad_hdr->status, mad_hdr->class_specific,
+ mad_hdr->tid, mad_hdr->attr_id,
+ mad_hdr->resv,
+ mad_hdr->attr_mod);
+ }
+ for (j = 0; j < send_wr->num_sge; j++) {
+ u8 *data = (u8 *)abs_to_virt(sge->addr);
+ ehca_gen_dbg("send_wr#%x sge#%x addr=%p length=%x "
+ "lkey=%x",
+ idx, j, data, sge->length, sge->lkey);
+ /* assume length is n*16 */
+ ehca_dmp(data, sge->length, "send_wr#%x sge#%x",
+ idx, j);
+ sge++;
+ } /* eof for j */
+ idx++;
+ send_wr = send_wr->next;
+ } /* eof while send_wr */
+}
+
+#endif /* DEBUG_GSI_SEND_WR */
+
+static inline int ehca_write_swqe(struct ehca_qp *qp,
+ struct ehca_wqe *wqe_p,
+ const struct ib_send_wr *send_wr,
+ u32 sq_map_idx,
+ int hidden)
+{
+ u32 idx;
+ u64 dma_length;
+ struct ehca_av *my_av;
+ u32 remote_qkey = send_wr->wr.ud.remote_qkey;
+ struct ehca_qmap_entry *qmap_entry = &qp->sq_map.map[sq_map_idx];
+
+ if (unlikely((send_wr->num_sge < 0) ||
+ (send_wr->num_sge > qp->ipz_squeue.act_nr_of_sg))) {
+ ehca_gen_err("Invalid number of WQE SGE. "
+ "num_sqe=%x max_nr_of_sg=%x",
+ send_wr->num_sge, qp->ipz_squeue.act_nr_of_sg);
+ return -EINVAL; /* invalid SG list length */
+ }
+
+ /* clear wqe header until sglist */
+ memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list));
+
+ wqe_p->work_request_id = replace_wr_id(send_wr->wr_id, sq_map_idx);
+
+ qmap_entry->app_wr_id = get_app_wr_id(send_wr->wr_id);
+ qmap_entry->reported = 0;
+ qmap_entry->cqe_req = 0;
+
+ switch (send_wr->opcode) {
+ case IB_WR_SEND:
+ case IB_WR_SEND_WITH_IMM:
+ wqe_p->optype = WQE_OPTYPE_SEND;
+ break;
+ case IB_WR_RDMA_WRITE:
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ wqe_p->optype = WQE_OPTYPE_RDMAWRITE;
+ break;
+ case IB_WR_RDMA_READ:
+ wqe_p->optype = WQE_OPTYPE_RDMAREAD;
+ break;
+ default:
+ ehca_gen_err("Invalid opcode=%x", send_wr->opcode);
+ return -EINVAL; /* invalid opcode */
+ }
+
+ wqe_p->wqef = (send_wr->opcode) & WQEF_HIGH_NIBBLE;
+
+ wqe_p->wr_flag = 0;
+
+ if ((send_wr->send_flags & IB_SEND_SIGNALED ||
+ qp->init_attr.sq_sig_type == IB_SIGNAL_ALL_WR)
+ && !hidden) {
+ wqe_p->wr_flag |= WQE_WRFLAG_REQ_SIGNAL_COM;
+ qmap_entry->cqe_req = 1;
+ }
+
+ if (send_wr->opcode == IB_WR_SEND_WITH_IMM ||
+ send_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) {
+ /* this might not work as long as HW does not support it */
+ wqe_p->immediate_data = be32_to_cpu(send_wr->ex.imm_data);
+ wqe_p->wr_flag |= WQE_WRFLAG_IMM_DATA_PRESENT;
+ }
+
+ wqe_p->nr_of_data_seg = send_wr->num_sge;
+
+ switch (qp->qp_type) {
+ case IB_QPT_SMI:
+ case IB_QPT_GSI:
+ /* no break is intential here */
+ case IB_QPT_UD:
+ /* IB 1.2 spec C10-15 compliance */
+ if (send_wr->wr.ud.remote_qkey & 0x80000000)
+ remote_qkey = qp->qkey;
+
+ wqe_p->destination_qp_number = send_wr->wr.ud.remote_qpn << 8;
+ wqe_p->local_ee_context_qkey = remote_qkey;
+ if (unlikely(!send_wr->wr.ud.ah)) {
+ ehca_gen_err("wr.ud.ah is NULL. qp=%p", qp);
+ return -EINVAL;
+ }
+ if (unlikely(send_wr->wr.ud.remote_qpn == 0)) {
+ ehca_gen_err("dest QP# is 0. qp=%x", qp->real_qp_num);
+ return -EINVAL;
+ }
+ my_av = container_of(send_wr->wr.ud.ah, struct ehca_av, ib_ah);
+ wqe_p->u.ud_av.ud_av = my_av->av;
+
+ /*
+ * omitted check of IB_SEND_INLINE
+ * since HW does not support it
+ */
+ for (idx = 0; idx < send_wr->num_sge; idx++) {
+ wqe_p->u.ud_av.sg_list[idx].vaddr =
+ send_wr->sg_list[idx].addr;
+ wqe_p->u.ud_av.sg_list[idx].lkey =
+ send_wr->sg_list[idx].lkey;
+ wqe_p->u.ud_av.sg_list[idx].length =
+ send_wr->sg_list[idx].length;
+ } /* eof for idx */
+ if (qp->qp_type == IB_QPT_SMI ||
+ qp->qp_type == IB_QPT_GSI)
+ wqe_p->u.ud_av.ud_av.pmtu = 1;
+ if (qp->qp_type == IB_QPT_GSI) {
+ wqe_p->pkeyi = send_wr->wr.ud.pkey_index;
+#ifdef DEBUG_GSI_SEND_WR
+ trace_send_wr_ud(send_wr);
+#endif /* DEBUG_GSI_SEND_WR */
+ }
+ break;
+
+ case IB_QPT_UC:
+ if (send_wr->send_flags & IB_SEND_FENCE)
+ wqe_p->wr_flag |= WQE_WRFLAG_FENCE;
+ /* no break is intentional here */
+ case IB_QPT_RC:
+ /* TODO: atomic not implemented */
+ wqe_p->u.nud.remote_virtual_adress =
+ send_wr->wr.rdma.remote_addr;
+ wqe_p->u.nud.rkey = send_wr->wr.rdma.rkey;
+
+ /*
+ * omitted checking of IB_SEND_INLINE
+ * since HW does not support it
+ */
+ dma_length = 0;
+ for (idx = 0; idx < send_wr->num_sge; idx++) {
+ wqe_p->u.nud.sg_list[idx].vaddr =
+ send_wr->sg_list[idx].addr;
+ wqe_p->u.nud.sg_list[idx].lkey =
+ send_wr->sg_list[idx].lkey;
+ wqe_p->u.nud.sg_list[idx].length =
+ send_wr->sg_list[idx].length;
+ dma_length += send_wr->sg_list[idx].length;
+ } /* eof idx */
+ wqe_p->u.nud.atomic_1st_op_dma_len = dma_length;
+
+ /* unsolicited ack circumvention */
+ if (send_wr->opcode == IB_WR_RDMA_READ) {
+ /* on RDMA read, switch on and reset counters */
+ qp->message_count = qp->packet_count = 0;
+ qp->unsol_ack_circ = 1;
+ } else
+ /* else estimate #packets */
+ qp->packet_count += (dma_length >> qp->mtu_shift) + 1;
+
+ break;
+
+ default:
+ ehca_gen_err("Invalid qptype=%x", qp->qp_type);
+ return -EINVAL;
+ }
+
+ if (ehca_debug_level >= 3) {
+ ehca_gen_dbg("SEND WQE written into queue qp=%p ", qp);
+ ehca_dmp( wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "send wqe");
+ }
+ return 0;
+}
+
+/* map_ib_wc_status converts raw cqe_status to ib_wc_status */
+static inline void map_ib_wc_status(u32 cqe_status,
+ enum ib_wc_status *wc_status)
+{
+ if (unlikely(cqe_status & WC_STATUS_ERROR_BIT)) {
+ switch (cqe_status & 0x3F) {
+ case 0x01:
+ case 0x21:
+ *wc_status = IB_WC_LOC_LEN_ERR;
+ break;
+ case 0x02:
+ case 0x22:
+ *wc_status = IB_WC_LOC_QP_OP_ERR;
+ break;
+ case 0x03:
+ case 0x23:
+ *wc_status = IB_WC_LOC_EEC_OP_ERR;
+ break;
+ case 0x04:
+ case 0x24:
+ *wc_status = IB_WC_LOC_PROT_ERR;
+ break;
+ case 0x05:
+ case 0x25:
+ *wc_status = IB_WC_WR_FLUSH_ERR;
+ break;
+ case 0x06:
+ *wc_status = IB_WC_MW_BIND_ERR;
+ break;
+ case 0x07: /* remote error - look into bits 20:24 */
+ switch ((cqe_status
+ & WC_STATUS_REMOTE_ERROR_FLAGS) >> 11) {
+ case 0x0:
+ /*
+ * PSN Sequence Error!
+ * couldn't find a matching status!
+ */
+ *wc_status = IB_WC_GENERAL_ERR;
+ break;
+ case 0x1:
+ *wc_status = IB_WC_REM_INV_REQ_ERR;
+ break;
+ case 0x2:
+ *wc_status = IB_WC_REM_ACCESS_ERR;
+ break;
+ case 0x3:
+ *wc_status = IB_WC_REM_OP_ERR;
+ break;
+ case 0x4:
+ *wc_status = IB_WC_REM_INV_RD_REQ_ERR;
+ break;
+ }
+ break;
+ case 0x08:
+ *wc_status = IB_WC_RETRY_EXC_ERR;
+ break;
+ case 0x09:
+ *wc_status = IB_WC_RNR_RETRY_EXC_ERR;
+ break;
+ case 0x0A:
+ case 0x2D:
+ *wc_status = IB_WC_REM_ABORT_ERR;
+ break;
+ case 0x0B:
+ case 0x2E:
+ *wc_status = IB_WC_INV_EECN_ERR;
+ break;
+ case 0x0C:
+ case 0x2F:
+ *wc_status = IB_WC_INV_EEC_STATE_ERR;
+ break;
+ case 0x0D:
+ *wc_status = IB_WC_BAD_RESP_ERR;
+ break;
+ case 0x10:
+ /* WQE purged */
+ *wc_status = IB_WC_WR_FLUSH_ERR;
+ break;
+ default:
+ *wc_status = IB_WC_FATAL_ERR;
+
+ }
+ } else
+ *wc_status = IB_WC_SUCCESS;
+}
+
+static inline int post_one_send(struct ehca_qp *my_qp,
+ struct ib_send_wr *cur_send_wr,
+ struct ib_send_wr **bad_send_wr,
+ int hidden)
+{
+ struct ehca_wqe *wqe_p;
+ int ret;
+ u32 sq_map_idx;
+ u64 start_offset = my_qp->ipz_squeue.current_q_offset;
+
+ /* get pointer next to free WQE */
+ wqe_p = ipz_qeit_get_inc(&my_qp->ipz_squeue);
+ if (unlikely(!wqe_p)) {
+ /* too many posted work requests: queue overflow */
+ if (bad_send_wr)
+ *bad_send_wr = cur_send_wr;
+ ehca_err(my_qp->ib_qp.device, "Too many posted WQEs "
+ "qp_num=%x", my_qp->ib_qp.qp_num);
+ return -ENOMEM;
+ }
+
+ /*
+ * Get the index of the WQE in the send queue. The same index is used
+ * for writing into the sq_map.
+ */
+ sq_map_idx = start_offset / my_qp->ipz_squeue.qe_size;
+
+ /* write a SEND WQE into the QUEUE */
+ ret = ehca_write_swqe(my_qp, wqe_p, cur_send_wr, sq_map_idx, hidden);
+ /*
+ * if something failed,
+ * reset the free entry pointer to the start value
+ */
+ if (unlikely(ret)) {
+ my_qp->ipz_squeue.current_q_offset = start_offset;
+ if (bad_send_wr)
+ *bad_send_wr = cur_send_wr;
+ ehca_err(my_qp->ib_qp.device, "Could not write WQE "
+ "qp_num=%x", my_qp->ib_qp.qp_num);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int ehca_post_send(struct ib_qp *qp,
+ struct ib_send_wr *send_wr,
+ struct ib_send_wr **bad_send_wr)
+{
+ struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp);
+ struct ib_send_wr *cur_send_wr;
+ int wqe_cnt = 0;
+ int ret = 0;
+ unsigned long flags;
+
+ /* Reject WR if QP is in RESET, INIT or RTR state */
+ if (unlikely(my_qp->state < IB_QPS_RTS)) {
+ ehca_err(qp->device, "Invalid QP state qp_state=%d qpn=%x",
+ my_qp->state, qp->qp_num);
+ return -EINVAL;
+ }
+
+ /* LOCK the QUEUE */
+ spin_lock_irqsave(&my_qp->spinlock_s, flags);
+
+ /* Send an empty extra RDMA read if:
+ * 1) there has been an RDMA read on this connection before
+ * 2) no RDMA read occurred for ACK_CIRC_THRESHOLD link packets
+ * 3) we can be sure that any previous extra RDMA read has been
+ * processed so we don't overflow the SQ
+ */
+ if (unlikely(my_qp->unsol_ack_circ &&
+ my_qp->packet_count > ACK_CIRC_THRESHOLD &&
+ my_qp->message_count > my_qp->init_attr.cap.max_send_wr)) {
+ /* insert an empty RDMA READ to fix up the remote QP state */
+ struct ib_send_wr circ_wr;
+ memset(&circ_wr, 0, sizeof(circ_wr));
+ circ_wr.opcode = IB_WR_RDMA_READ;
+ post_one_send(my_qp, &circ_wr, NULL, 1); /* ignore retcode */
+ wqe_cnt++;
+ ehca_dbg(qp->device, "posted circ wr qp_num=%x", qp->qp_num);
+ my_qp->message_count = my_qp->packet_count = 0;
+ }
+
+ /* loop processes list of send reqs */
+ for (cur_send_wr = send_wr; cur_send_wr != NULL;
+ cur_send_wr = cur_send_wr->next) {
+ ret = post_one_send(my_qp, cur_send_wr, bad_send_wr, 0);
+ if (unlikely(ret)) {
+ /* if one or more WQEs were successful, don't fail */
+ if (wqe_cnt)
+ ret = 0;
+ goto post_send_exit0;
+ }
+ wqe_cnt++;
+ } /* eof for cur_send_wr */
+
+post_send_exit0:
+ iosync(); /* serialize GAL register access */
+ hipz_update_sqa(my_qp, wqe_cnt);
+ if (unlikely(ret || ehca_debug_level >= 2))
+ ehca_dbg(qp->device, "ehca_qp=%p qp_num=%x wqe_cnt=%d ret=%i",
+ my_qp, qp->qp_num, wqe_cnt, ret);
+ my_qp->message_count += wqe_cnt;
+ spin_unlock_irqrestore(&my_qp->spinlock_s, flags);
+ return ret;
+}
+
+static int internal_post_recv(struct ehca_qp *my_qp,
+ struct ib_device *dev,
+ struct ib_recv_wr *recv_wr,
+ struct ib_recv_wr **bad_recv_wr)
+{
+ struct ib_recv_wr *cur_recv_wr;
+ struct ehca_wqe *wqe_p;
+ int wqe_cnt = 0;
+ int ret = 0;
+ u32 rq_map_idx;
+ unsigned long flags;
+ struct ehca_qmap_entry *qmap_entry;
+
+ if (unlikely(!HAS_RQ(my_qp))) {
+ ehca_err(dev, "QP has no RQ ehca_qp=%p qp_num=%x ext_type=%d",
+ my_qp, my_qp->real_qp_num, my_qp->ext_type);
+ return -ENODEV;
+ }
+
+ /* LOCK the QUEUE */
+ spin_lock_irqsave(&my_qp->spinlock_r, flags);
+
+ /* loop processes list of send reqs */
+ for (cur_recv_wr = recv_wr; cur_recv_wr != NULL;
+ cur_recv_wr = cur_recv_wr->next) {
+ u64 start_offset = my_qp->ipz_rqueue.current_q_offset;
+ /* get pointer next to free WQE */
+ wqe_p = ipz_qeit_get_inc(&my_qp->ipz_rqueue);
+ if (unlikely(!wqe_p)) {
+ /* too many posted work requests: queue overflow */
+ if (bad_recv_wr)
+ *bad_recv_wr = cur_recv_wr;
+ if (wqe_cnt == 0) {
+ ret = -ENOMEM;
+ ehca_err(dev, "Too many posted WQEs "
+ "qp_num=%x", my_qp->real_qp_num);
+ }
+ goto post_recv_exit0;
+ }
+ /*
+ * Get the index of the WQE in the recv queue. The same index
+ * is used for writing into the rq_map.
+ */
+ rq_map_idx = start_offset / my_qp->ipz_rqueue.qe_size;
+
+ /* write a RECV WQE into the QUEUE */
+ ret = ehca_write_rwqe(&my_qp->ipz_rqueue, wqe_p, cur_recv_wr,
+ rq_map_idx);
+ /*
+ * if something failed,
+ * reset the free entry pointer to the start value
+ */
+ if (unlikely(ret)) {
+ my_qp->ipz_rqueue.current_q_offset = start_offset;
+ *bad_recv_wr = cur_recv_wr;
+ if (wqe_cnt == 0) {
+ ret = -EINVAL;
+ ehca_err(dev, "Could not write WQE "
+ "qp_num=%x", my_qp->real_qp_num);
+ }
+ goto post_recv_exit0;
+ }
+
+ qmap_entry = &my_qp->rq_map.map[rq_map_idx];
+ qmap_entry->app_wr_id = get_app_wr_id(cur_recv_wr->wr_id);
+ qmap_entry->reported = 0;
+ qmap_entry->cqe_req = 1;
+
+ wqe_cnt++;
+ } /* eof for cur_recv_wr */
+
+post_recv_exit0:
+ iosync(); /* serialize GAL register access */
+ hipz_update_rqa(my_qp, wqe_cnt);
+ if (unlikely(ret || ehca_debug_level >= 2))
+ ehca_dbg(dev, "ehca_qp=%p qp_num=%x wqe_cnt=%d ret=%i",
+ my_qp, my_qp->real_qp_num, wqe_cnt, ret);
+ spin_unlock_irqrestore(&my_qp->spinlock_r, flags);
+ return ret;
+}
+
+int ehca_post_recv(struct ib_qp *qp,
+ struct ib_recv_wr *recv_wr,
+ struct ib_recv_wr **bad_recv_wr)
+{
+ struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp);
+
+ /* Reject WR if QP is in RESET state */
+ if (unlikely(my_qp->state == IB_QPS_RESET)) {
+ ehca_err(qp->device, "Invalid QP state qp_state=%d qpn=%x",
+ my_qp->state, qp->qp_num);
+ return -EINVAL;
+ }
+
+ return internal_post_recv(my_qp, qp->device, recv_wr, bad_recv_wr);
+}
+
+int ehca_post_srq_recv(struct ib_srq *srq,
+ struct ib_recv_wr *recv_wr,
+ struct ib_recv_wr **bad_recv_wr)
+{
+ return internal_post_recv(container_of(srq, struct ehca_qp, ib_srq),
+ srq->device, recv_wr, bad_recv_wr);
+}
+
+/*
+ * ib_wc_opcode table converts ehca wc opcode to ib
+ * Since we use zero to indicate invalid opcode, the actual ib opcode must
+ * be decremented!!!
+ */
+static const u8 ib_wc_opcode[255] = {
+ [0x01] = IB_WC_RECV+1,
+ [0x02] = IB_WC_RECV_RDMA_WITH_IMM+1,
+ [0x04] = IB_WC_BIND_MW+1,
+ [0x08] = IB_WC_FETCH_ADD+1,
+ [0x10] = IB_WC_COMP_SWAP+1,
+ [0x20] = IB_WC_RDMA_WRITE+1,
+ [0x40] = IB_WC_RDMA_READ+1,
+ [0x80] = IB_WC_SEND+1
+};
+
+/* internal function to poll one entry of cq */
+static inline int ehca_poll_cq_one(struct ib_cq *cq, struct ib_wc *wc)
+{
+ int ret = 0, qmap_tail_idx;
+ struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
+ struct ehca_cqe *cqe;
+ struct ehca_qp *my_qp;
+ struct ehca_qmap_entry *qmap_entry;
+ struct ehca_queue_map *qmap;
+ int cqe_count = 0, is_error;
+
+repoll:
+ cqe = (struct ehca_cqe *)
+ ipz_qeit_get_inc_valid(&my_cq->ipz_queue);
+ if (!cqe) {
+ ret = -EAGAIN;
+ if (ehca_debug_level >= 3)
+ ehca_dbg(cq->device, "Completion queue is empty "
+ "my_cq=%p cq_num=%x", my_cq, my_cq->cq_number);
+ goto poll_cq_one_exit0;
+ }
+
+ /* prevents loads being reordered across this point */
+ rmb();
+
+ cqe_count++;
+ if (unlikely(cqe->status & WC_STATUS_PURGE_BIT)) {
+ struct ehca_qp *qp;
+ int purgeflag;
+ unsigned long flags;
+
+ qp = ehca_cq_get_qp(my_cq, cqe->local_qp_number);
+ if (!qp) {
+ ehca_err(cq->device, "cq_num=%x qp_num=%x "
+ "could not find qp -> ignore cqe",
+ my_cq->cq_number, cqe->local_qp_number);
+ ehca_dmp(cqe, 64, "cq_num=%x qp_num=%x",
+ my_cq->cq_number, cqe->local_qp_number);
+ /* ignore this purged cqe */
+ goto repoll;
+ }
+ spin_lock_irqsave(&qp->spinlock_s, flags);
+ purgeflag = qp->sqerr_purgeflag;
+ spin_unlock_irqrestore(&qp->spinlock_s, flags);
+
+ if (purgeflag) {
+ ehca_dbg(cq->device,
+ "Got CQE with purged bit qp_num=%x src_qp=%x",
+ cqe->local_qp_number, cqe->remote_qp_number);
+ if (ehca_debug_level >= 2)
+ ehca_dmp(cqe, 64, "qp_num=%x src_qp=%x",
+ cqe->local_qp_number,
+ cqe->remote_qp_number);
+ /*
+ * ignore this to avoid double cqes of bad wqe
+ * that caused sqe and turn off purge flag
+ */
+ qp->sqerr_purgeflag = 0;
+ goto repoll;
+ }
+ }
+
+ is_error = cqe->status & WC_STATUS_ERROR_BIT;
+
+ /* trace error CQEs if debug_level >= 1, trace all CQEs if >= 3 */
+ if (unlikely(ehca_debug_level >= 3 || (ehca_debug_level && is_error))) {
+ ehca_dbg(cq->device,
+ "Received %sCOMPLETION ehca_cq=%p cq_num=%x -----",
+ is_error ? "ERROR " : "", my_cq, my_cq->cq_number);
+ ehca_dmp(cqe, 64, "ehca_cq=%p cq_num=%x",
+ my_cq, my_cq->cq_number);
+ ehca_dbg(cq->device,
+ "ehca_cq=%p cq_num=%x -------------------------",
+ my_cq, my_cq->cq_number);
+ }
+
+ read_lock(&ehca_qp_idr_lock);
+ my_qp = idr_find(&ehca_qp_idr, cqe->qp_token);
+ read_unlock(&ehca_qp_idr_lock);
+ if (!my_qp)
+ goto repoll;
+ wc->qp = &my_qp->ib_qp;
+
+ qmap_tail_idx = get_app_wr_id(cqe->work_request_id);
+ if (!(cqe->w_completion_flags & WC_SEND_RECEIVE_BIT))
+ /* We got a send completion. */
+ qmap = &my_qp->sq_map;
+ else
+ /* We got a receive completion. */
+ qmap = &my_qp->rq_map;
+
+ /* advance the tail pointer */
+ qmap->tail = qmap_tail_idx;
+
+ if (is_error) {
+ /*
+ * set left_to_poll to 0 because in error state, we will not
+ * get any additional CQEs
+ */
+ my_qp->sq_map.next_wqe_idx = (my_qp->sq_map.tail + 1) %
+ my_qp->sq_map.entries;
+ my_qp->sq_map.left_to_poll = 0;
+ ehca_add_to_err_list(my_qp, 1);
+
+ my_qp->rq_map.next_wqe_idx = (my_qp->rq_map.tail + 1) %
+ my_qp->rq_map.entries;
+ my_qp->rq_map.left_to_poll = 0;
+ if (HAS_RQ(my_qp))
+ ehca_add_to_err_list(my_qp, 0);
+ }
+
+ qmap_entry = &qmap->map[qmap_tail_idx];
+ if (qmap_entry->reported) {
+ ehca_warn(cq->device, "Double cqe on qp_num=%#x",
+ my_qp->real_qp_num);
+ /* found a double cqe, discard it and read next one */
+ goto repoll;
+ }
+
+ wc->wr_id = replace_wr_id(cqe->work_request_id, qmap_entry->app_wr_id);
+ qmap_entry->reported = 1;
+
+ /* if left_to_poll is decremented to 0, add the QP to the error list */
+ if (qmap->left_to_poll > 0) {
+ qmap->left_to_poll--;
+ if ((my_qp->sq_map.left_to_poll == 0) &&
+ (my_qp->rq_map.left_to_poll == 0)) {
+ ehca_add_to_err_list(my_qp, 1);
+ if (HAS_RQ(my_qp))
+ ehca_add_to_err_list(my_qp, 0);
+ }
+ }
+
+ /* eval ib_wc_opcode */
+ wc->opcode = ib_wc_opcode[cqe->optype]-1;
+ if (unlikely(wc->opcode == -1)) {
+ ehca_err(cq->device, "Invalid cqe->OPType=%x cqe->status=%x "
+ "ehca_cq=%p cq_num=%x",
+ cqe->optype, cqe->status, my_cq, my_cq->cq_number);
+ /* dump cqe for other infos */
+ ehca_dmp(cqe, 64, "ehca_cq=%p cq_num=%x",
+ my_cq, my_cq->cq_number);
+ /* update also queue adder to throw away this entry!!! */
+ goto repoll;
+ }
+
+ /* eval ib_wc_status */
+ if (unlikely(is_error)) {
+ /* complete with errors */
+ map_ib_wc_status(cqe->status, &wc->status);
+ wc->vendor_err = wc->status;
+ } else
+ wc->status = IB_WC_SUCCESS;
+
+ wc->byte_len = cqe->nr_bytes_transferred;
+ wc->pkey_index = cqe->pkey_index;
+ wc->slid = cqe->rlid;
+ wc->dlid_path_bits = cqe->dlid;
+ wc->src_qp = cqe->remote_qp_number;
+ wc->wc_flags = cqe->w_completion_flags;
+ wc->ex.imm_data = cpu_to_be32(cqe->immediate_data);
+ wc->sl = cqe->service_level;
+
+poll_cq_one_exit0:
+ if (cqe_count > 0)
+ hipz_update_feca(my_cq, cqe_count);
+
+ return ret;
+}
+
+static int generate_flush_cqes(struct ehca_qp *my_qp, struct ib_cq *cq,
+ struct ib_wc *wc, int num_entries,
+ struct ipz_queue *ipz_queue, int on_sq)
+{
+ int nr = 0;
+ struct ehca_wqe *wqe;
+ u64 offset;
+ struct ehca_queue_map *qmap;
+ struct ehca_qmap_entry *qmap_entry;
+
+ if (on_sq)
+ qmap = &my_qp->sq_map;
+ else
+ qmap = &my_qp->rq_map;
+
+ qmap_entry = &qmap->map[qmap->next_wqe_idx];
+
+ while ((nr < num_entries) && (qmap_entry->reported == 0)) {
+ /* generate flush CQE */
+
+ memset(wc, 0, sizeof(*wc));
+
+ offset = qmap->next_wqe_idx * ipz_queue->qe_size;
+ wqe = (struct ehca_wqe *)ipz_qeit_calc(ipz_queue, offset);
+ if (!wqe) {
+ ehca_err(cq->device, "Invalid wqe offset=%#lx on "
+ "qp_num=%#x", offset, my_qp->real_qp_num);
+ return nr;
+ }
+
+ wc->wr_id = replace_wr_id(wqe->work_request_id,
+ qmap_entry->app_wr_id);
+
+ if (on_sq) {
+ switch (wqe->optype) {
+ case WQE_OPTYPE_SEND:
+ wc->opcode = IB_WC_SEND;
+ break;
+ case WQE_OPTYPE_RDMAWRITE:
+ wc->opcode = IB_WC_RDMA_WRITE;
+ break;
+ case WQE_OPTYPE_RDMAREAD:
+ wc->opcode = IB_WC_RDMA_READ;
+ break;
+ default:
+ ehca_err(cq->device, "Invalid optype=%x",
+ wqe->optype);
+ return nr;
+ }
+ } else
+ wc->opcode = IB_WC_RECV;
+
+ if (wqe->wr_flag & WQE_WRFLAG_IMM_DATA_PRESENT) {
+ wc->ex.imm_data = wqe->immediate_data;
+ wc->wc_flags |= IB_WC_WITH_IMM;
+ }
+
+ wc->status = IB_WC_WR_FLUSH_ERR;
+
+ wc->qp = &my_qp->ib_qp;
+
+ /* mark as reported and advance next_wqe pointer */
+ qmap_entry->reported = 1;
+ qmap->next_wqe_idx++;
+ if (qmap->next_wqe_idx == qmap->entries)
+ qmap->next_wqe_idx = 0;
+ qmap_entry = &qmap->map[qmap->next_wqe_idx];
+
+ wc++; nr++;
+ }
+
+ return nr;
+
+}
+
+int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc)
+{
+ struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
+ int nr;
+ struct ehca_qp *err_qp;
+ struct ib_wc *current_wc = wc;
+ int ret = 0;
+ unsigned long flags;
+ int entries_left = num_entries;
+
+ if (num_entries < 1) {
+ ehca_err(cq->device, "Invalid num_entries=%d ehca_cq=%p "
+ "cq_num=%x", num_entries, my_cq, my_cq->cq_number);
+ ret = -EINVAL;
+ goto poll_cq_exit0;
+ }
+
+ spin_lock_irqsave(&my_cq->spinlock, flags);
+
+ /* generate flush cqes for send queues */
+ list_for_each_entry(err_qp, &my_cq->sqp_err_list, sq_err_node) {
+ nr = generate_flush_cqes(err_qp, cq, current_wc, entries_left,
+ &err_qp->ipz_squeue, 1);
+ entries_left -= nr;
+ current_wc += nr;
+
+ if (entries_left == 0)
+ break;
+ }
+
+ /* generate flush cqes for receive queues */
+ list_for_each_entry(err_qp, &my_cq->rqp_err_list, rq_err_node) {
+ nr = generate_flush_cqes(err_qp, cq, current_wc, entries_left,
+ &err_qp->ipz_rqueue, 0);
+ entries_left -= nr;
+ current_wc += nr;
+
+ if (entries_left == 0)
+ break;
+ }
+
+ for (nr = 0; nr < entries_left; nr++) {
+ ret = ehca_poll_cq_one(cq, current_wc);
+ if (ret)
+ break;
+ current_wc++;
+ } /* eof for nr */
+ entries_left -= nr;
+
+ spin_unlock_irqrestore(&my_cq->spinlock, flags);
+ if (ret == -EAGAIN || !ret)
+ ret = num_entries - entries_left;
+
+poll_cq_exit0:
+ return ret;
+}
+
+int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags notify_flags)
+{
+ struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
+ int ret = 0;
+
+ switch (notify_flags & IB_CQ_SOLICITED_MASK) {
+ case IB_CQ_SOLICITED:
+ hipz_set_cqx_n0(my_cq, 1);
+ break;
+ case IB_CQ_NEXT_COMP:
+ hipz_set_cqx_n1(my_cq, 1);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (notify_flags & IB_CQ_REPORT_MISSED_EVENTS) {
+ unsigned long spl_flags;
+ spin_lock_irqsave(&my_cq->spinlock, spl_flags);
+ ret = ipz_qeit_is_valid(&my_cq->ipz_queue);
+ spin_unlock_irqrestore(&my_cq->spinlock, spl_flags);
+ }
+
+ return ret;
+}
diff --git a/drivers/infiniband/hw/ehca/ehca_sqp.c b/drivers/infiniband/hw/ehca/ehca_sqp.c
new file mode 100644
index 0000000..706d97a
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_sqp.c
@@ -0,0 +1,202 @@
+/*
+ * IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ * SQP functions
+ *
+ * Authors: Khadija Souissi <souissi@de.ibm.com>
+ * Heiko J Schick <schickhj@de.ibm.com>
+ *
+ * Copyright (c) 2005 IBM Corporation
+ *
+ * All rights reserved.
+ *
+ * This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ * BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <rdma/ib_mad.h>
+
+#include "ehca_classes.h"
+#include "ehca_tools.h"
+#include "ehca_iverbs.h"
+#include "hcp_if.h"
+
+#define IB_MAD_STATUS_REDIRECT __constant_htons(0x0002)
+#define IB_MAD_STATUS_UNSUP_VERSION __constant_htons(0x0004)
+#define IB_MAD_STATUS_UNSUP_METHOD __constant_htons(0x0008)
+
+#define IB_PMA_CLASS_PORT_INFO __constant_htons(0x0001)
+
+/**
+ * ehca_define_sqp - Defines special queue pair 1 (GSI QP). When special queue
+ * pair is created successfully, the corresponding port gets active.
+ *
+ * Define Special Queue pair 0 (SMI QP) is still not supported.
+ *
+ * @qp_init_attr: Queue pair init attributes with port and queue pair type
+ */
+
+u64 ehca_define_sqp(struct ehca_shca *shca,
+ struct ehca_qp *ehca_qp,
+ struct ib_qp_init_attr *qp_init_attr)
+{
+ u32 pma_qp_nr, bma_qp_nr;
+ u64 ret;
+ u8 port = qp_init_attr->port_num;
+ int counter;
+
+ shca->sport[port - 1].port_state = IB_PORT_DOWN;
+
+ switch (qp_init_attr->qp_type) {
+ case IB_QPT_SMI:
+ /* function not supported yet */
+ break;
+ case IB_QPT_GSI:
+ ret = hipz_h_define_aqp1(shca->ipz_hca_handle,
+ ehca_qp->ipz_qp_handle,
+ ehca_qp->galpas.kernel,
+ (u32) qp_init_attr->port_num,
+ &pma_qp_nr, &bma_qp_nr);
+
+ if (ret != H_SUCCESS) {
+ ehca_err(&shca->ib_device,
+ "Can't define AQP1 for port %x. h_ret=%li",
+ port, ret);
+ return ret;
+ }
+ shca->sport[port - 1].pma_qp_nr = pma_qp_nr;
+ ehca_dbg(&shca->ib_device, "port=%x pma_qp_nr=%x",
+ port, pma_qp_nr);
+ break;
+ default:
+ ehca_err(&shca->ib_device, "invalid qp_type=%x",
+ qp_init_attr->qp_type);
+ return H_PARAMETER;
+ }
+
+ if (ehca_nr_ports < 0) /* autodetect mode */
+ return H_SUCCESS;
+
+ for (counter = 0;
+ shca->sport[port - 1].port_state != IB_PORT_ACTIVE &&
+ counter < ehca_port_act_time;
+ counter++) {
+ ehca_dbg(&shca->ib_device, "... wait until port %x is active",
+ port);
+ msleep_interruptible(1000);
+ }
+
+ if (counter == ehca_port_act_time) {
+ ehca_err(&shca->ib_device, "Port %x is not active.", port);
+ return H_HARDWARE;
+ }
+
+ return H_SUCCESS;
+}
+
+struct ib_perf {
+ struct ib_mad_hdr mad_hdr;
+ u8 reserved[40];
+ u8 data[192];
+} __attribute__ ((packed));
+
+
+static int ehca_process_perf(struct ib_device *ibdev, u8 port_num,
+ struct ib_mad *in_mad, struct ib_mad *out_mad)
+{
+ struct ib_perf *in_perf = (struct ib_perf *)in_mad;
+ struct ib_perf *out_perf = (struct ib_perf *)out_mad;
+ struct ib_class_port_info *poi =
+ (struct ib_class_port_info *)out_perf->data;
+ struct ehca_shca *shca =
+ container_of(ibdev, struct ehca_shca, ib_device);
+ struct ehca_sport *sport = &shca->sport[port_num - 1];
+
+ ehca_dbg(ibdev, "method=%x", in_perf->mad_hdr.method);
+
+ *out_mad = *in_mad;
+
+ if (in_perf->mad_hdr.class_version != 1) {
+ ehca_warn(ibdev, "Unsupported class_version=%x",
+ in_perf->mad_hdr.class_version);
+ out_perf->mad_hdr.status = IB_MAD_STATUS_UNSUP_VERSION;
+ goto perf_reply;
+ }
+
+ switch (in_perf->mad_hdr.method) {
+ case IB_MGMT_METHOD_GET:
+ case IB_MGMT_METHOD_SET:
+ /* set class port info for redirection */
+ out_perf->mad_hdr.attr_id = IB_PMA_CLASS_PORT_INFO;
+ out_perf->mad_hdr.status = IB_MAD_STATUS_REDIRECT;
+ memset(poi, 0, sizeof(*poi));
+ poi->base_version = 1;
+ poi->class_version = 1;
+ poi->resp_time_value = 18;
+ poi->redirect_lid = sport->saved_attr.lid;
+ poi->redirect_qp = sport->pma_qp_nr;
+ poi->redirect_qkey = IB_QP1_QKEY;
+ poi->redirect_pkey = IB_DEFAULT_PKEY_FULL;
+
+ ehca_dbg(ibdev, "ehca_pma_lid=%x ehca_pma_qp=%x",
+ sport->saved_attr.lid, sport->pma_qp_nr);
+ break;
+
+ case IB_MGMT_METHOD_GET_RESP:
+ return IB_MAD_RESULT_FAILURE;
+
+ default:
+ out_perf->mad_hdr.status = IB_MAD_STATUS_UNSUP_METHOD;
+ break;
+ }
+
+perf_reply:
+ out_perf->mad_hdr.method = IB_MGMT_METHOD_GET_RESP;
+
+ return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
+}
+
+int ehca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
+ struct ib_wc *in_wc, struct ib_grh *in_grh,
+ struct ib_mad *in_mad,
+ struct ib_mad *out_mad)
+{
+ int ret;
+
+ if (!port_num || port_num > ibdev->phys_port_cnt)
+ return IB_MAD_RESULT_FAILURE;
+
+ /* accept only pma request */
+ if (in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_PERF_MGMT)
+ return IB_MAD_RESULT_SUCCESS;
+
+ ehca_dbg(ibdev, "port_num=%x src_qp=%x", port_num, in_wc->src_qp);
+ ret = ehca_process_perf(ibdev, port_num, in_mad, out_mad);
+
+ return ret;
+}
diff --git a/drivers/infiniband/hw/ehca/ehca_tools.h b/drivers/infiniband/hw/ehca/ehca_tools.h
new file mode 100644
index 0000000..21f7d06
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_tools.h
@@ -0,0 +1,156 @@
+/*
+ * IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ * auxiliary functions
+ *
+ * Authors: Christoph Raisch <raisch@de.ibm.com>
+ * Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ * Khadija Souissi <souissik@de.ibm.com>
+ * Waleri Fomin <fomin@de.ibm.com>
+ * Heiko J Schick <schickhj@de.ibm.com>
+ *
+ * Copyright (c) 2005 IBM Corporation
+ *
+ * This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ * BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#ifndef EHCA_TOOLS_H
+#define EHCA_TOOLS_H
+
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/delay.h>
+#include <linux/idr.h>
+#include <linux/kthread.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/vmalloc.h>
+#include <linux/notifier.h>
+#include <linux/cpu.h>
+#include <linux/device.h>
+
+#include <asm/atomic.h>
+#include <asm/abs_addr.h>
+#include <asm/ibmebus.h>
+#include <asm/io.h>
+#include <asm/pgtable.h>
+#include <asm/hvcall.h>
+
+extern int ehca_debug_level;
+
+#define ehca_dbg(ib_dev, format, arg...) \
+ do { \
+ if (unlikely(ehca_debug_level)) \
+ dev_printk(KERN_DEBUG, (ib_dev)->dma_device, \
+ "PU%04x EHCA_DBG:%s " format "\n", \
+ raw_smp_processor_id(), __func__, \
+ ## arg); \
+ } while (0)
+
+#define ehca_info(ib_dev, format, arg...) \
+ dev_info((ib_dev)->dma_device, "PU%04x EHCA_INFO:%s " format "\n", \
+ raw_smp_processor_id(), __func__, ## arg)
+
+#define ehca_warn(ib_dev, format, arg...) \
+ dev_warn((ib_dev)->dma_device, "PU%04x EHCA_WARN:%s " format "\n", \
+ raw_smp_processor_id(), __func__, ## arg)
+
+#define ehca_err(ib_dev, format, arg...) \
+ dev_err((ib_dev)->dma_device, "PU%04x EHCA_ERR:%s " format "\n", \
+ raw_smp_processor_id(), __func__, ## arg)
+
+/* use this one only if no ib_dev available */
+#define ehca_gen_dbg(format, arg...) \
+ do { \
+ if (unlikely(ehca_debug_level)) \
+ printk(KERN_DEBUG "PU%04x EHCA_DBG:%s " format "\n", \
+ raw_smp_processor_id(), __func__, ## arg); \
+ } while (0)
+
+#define ehca_gen_warn(format, arg...) \
+ printk(KERN_INFO "PU%04x EHCA_WARN:%s " format "\n", \
+ raw_smp_processor_id(), __func__, ## arg)
+
+#define ehca_gen_err(format, arg...) \
+ printk(KERN_ERR "PU%04x EHCA_ERR:%s " format "\n", \
+ raw_smp_processor_id(), __func__, ## arg)
+
+/**
+ * ehca_dmp - printk a memory block, whose length is n*8 bytes.
+ * Each line has the following layout:
+ * <format string> adr=X ofs=Y <8 bytes hex> <8 bytes hex>
+ */
+#define ehca_dmp(adr, len, format, args...) \
+ do { \
+ unsigned int x; \
+ unsigned int l = (unsigned int)(len); \
+ unsigned char *deb = (unsigned char *)(adr); \
+ for (x = 0; x < l; x += 16) { \
+ printk(KERN_INFO "EHCA_DMP:%s " format \
+ " adr=%p ofs=%04x %016lx %016lx\n", \
+ __func__, ##args, deb, x, \
+ *((u64 *)&deb[0]), *((u64 *)&deb[8])); \
+ deb += 16; \
+ } \
+ } while (0)
+
+/* define a bitmask, little endian version */
+#define EHCA_BMASK(pos, length) (((pos) << 16) + (length))
+
+/* define a bitmask, the ibm way... */
+#define EHCA_BMASK_IBM(from, to) (((63 - to) << 16) + ((to) - (from) + 1))
+
+/* internal function, don't use */
+#define EHCA_BMASK_SHIFTPOS(mask) (((mask) >> 16) & 0xffff)
+
+/* internal function, don't use */
+#define EHCA_BMASK_MASK(mask) (~0ULL >> ((64 - (mask)) & 0xffff))
+
+/**
+ * EHCA_BMASK_SET - return value shifted and masked by mask
+ * variable|=EHCA_BMASK_SET(MY_MASK,0x4711) ORs the bits in variable
+ * variable&=~EHCA_BMASK_SET(MY_MASK,-1) clears the bits from the mask
+ * in variable
+ */
+#define EHCA_BMASK_SET(mask, value) \
+ ((EHCA_BMASK_MASK(mask) & ((u64)(value))) << EHCA_BMASK_SHIFTPOS(mask))
+
+/**
+ * EHCA_BMASK_GET - extract a parameter from value by mask
+ */
+#define EHCA_BMASK_GET(mask, value) \
+ (EHCA_BMASK_MASK(mask) & (((u64)(value)) >> EHCA_BMASK_SHIFTPOS(mask)))
+
+/* Converts ehca to ib return code */
+int ehca2ib_return_code(u64 ehca_rc);
+
+#endif /* EHCA_TOOLS_H */
diff --git a/drivers/infiniband/hw/ehca/ehca_uverbs.c b/drivers/infiniband/hw/ehca/ehca_uverbs.c
new file mode 100644
index 0000000..e43ed8f
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_uverbs.c
@@ -0,0 +1,307 @@
+/*
+ * IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ * userspace support verbs
+ *
+ * Authors: Christoph Raisch <raisch@de.ibm.com>
+ * Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ * Heiko J Schick <schickhj@de.ibm.com>
+ *
+ * Copyright (c) 2005 IBM Corporation
+ *
+ * All rights reserved.
+ *
+ * This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ * BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "ehca_classes.h"
+#include "ehca_iverbs.h"
+#include "ehca_mrmw.h"
+#include "ehca_tools.h"
+#include "hcp_if.h"
+
+struct ib_ucontext *ehca_alloc_ucontext(struct ib_device *device,
+ struct ib_udata *udata)
+{
+ struct ehca_ucontext *my_context;
+
+ my_context = kzalloc(sizeof *my_context, GFP_KERNEL);
+ if (!my_context) {
+ ehca_err(device, "Out of memory device=%p", device);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ return &my_context->ib_ucontext;
+}
+
+int ehca_dealloc_ucontext(struct ib_ucontext *context)
+{
+ kfree(container_of(context, struct ehca_ucontext, ib_ucontext));
+ return 0;
+}
+
+static void ehca_mm_open(struct vm_area_struct *vma)
+{
+ u32 *count = (u32 *)vma->vm_private_data;
+ if (!count) {
+ ehca_gen_err("Invalid vma struct vm_start=%lx vm_end=%lx",
+ vma->vm_start, vma->vm_end);
+ return;
+ }
+ (*count)++;
+ if (!(*count))
+ ehca_gen_err("Use count overflow vm_start=%lx vm_end=%lx",
+ vma->vm_start, vma->vm_end);
+ ehca_gen_dbg("vm_start=%lx vm_end=%lx count=%x",
+ vma->vm_start, vma->vm_end, *count);
+}
+
+static void ehca_mm_close(struct vm_area_struct *vma)
+{
+ u32 *count = (u32 *)vma->vm_private_data;
+ if (!count) {
+ ehca_gen_err("Invalid vma struct vm_start=%lx vm_end=%lx",
+ vma->vm_start, vma->vm_end);
+ return;
+ }
+ (*count)--;
+ ehca_gen_dbg("vm_start=%lx vm_end=%lx count=%x",
+ vma->vm_start, vma->vm_end, *count);
+}
+
+static struct vm_operations_struct vm_ops = {
+ .open = ehca_mm_open,
+ .close = ehca_mm_close,
+};
+
+static int ehca_mmap_fw(struct vm_area_struct *vma, struct h_galpas *galpas,
+ u32 *mm_count)
+{
+ int ret;
+ u64 vsize, physical;
+
+ vsize = vma->vm_end - vma->vm_start;
+ if (vsize < EHCA_PAGESIZE) {
+ ehca_gen_err("invalid vsize=%lx", vma->vm_end - vma->vm_start);
+ return -EINVAL;
+ }
+
+ physical = galpas->user.fw_handle;
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+ ehca_gen_dbg("vsize=%lx physical=%lx", vsize, physical);
+ /* VM_IO | VM_RESERVED are set by remap_pfn_range() */
+ ret = remap_4k_pfn(vma, vma->vm_start, physical >> EHCA_PAGESHIFT,
+ vma->vm_page_prot);
+ if (unlikely(ret)) {
+ ehca_gen_err("remap_pfn_range() failed ret=%i", ret);
+ return -ENOMEM;
+ }
+
+ vma->vm_private_data = mm_count;
+ (*mm_count)++;
+ vma->vm_ops = &vm_ops;
+
+ return 0;
+}
+
+static int ehca_mmap_queue(struct vm_area_struct *vma, struct ipz_queue *queue,
+ u32 *mm_count)
+{
+ int ret;
+ u64 start, ofs;
+ struct page *page;
+
+ vma->vm_flags |= VM_RESERVED;
+ start = vma->vm_start;
+ for (ofs = 0; ofs < queue->queue_length; ofs += PAGE_SIZE) {
+ u64 virt_addr = (u64)ipz_qeit_calc(queue, ofs);
+ page = virt_to_page(virt_addr);
+ ret = vm_insert_page(vma, start, page);
+ if (unlikely(ret)) {
+ ehca_gen_err("vm_insert_page() failed rc=%i", ret);
+ return ret;
+ }
+ start += PAGE_SIZE;
+ }
+ vma->vm_private_data = mm_count;
+ (*mm_count)++;
+ vma->vm_ops = &vm_ops;
+
+ return 0;
+}
+
+static int ehca_mmap_cq(struct vm_area_struct *vma, struct ehca_cq *cq,
+ u32 rsrc_type)
+{
+ int ret;
+
+ switch (rsrc_type) {
+ case 0: /* galpa fw handle */
+ ehca_dbg(cq->ib_cq.device, "cq_num=%x fw", cq->cq_number);
+ ret = ehca_mmap_fw(vma, &cq->galpas, &cq->mm_count_galpa);
+ if (unlikely(ret)) {
+ ehca_err(cq->ib_cq.device,
+ "ehca_mmap_fw() failed rc=%i cq_num=%x",
+ ret, cq->cq_number);
+ return ret;
+ }
+ break;
+
+ case 1: /* cq queue_addr */
+ ehca_dbg(cq->ib_cq.device, "cq_num=%x queue", cq->cq_number);
+ ret = ehca_mmap_queue(vma, &cq->ipz_queue, &cq->mm_count_queue);
+ if (unlikely(ret)) {
+ ehca_err(cq->ib_cq.device,
+ "ehca_mmap_queue() failed rc=%i cq_num=%x",
+ ret, cq->cq_number);
+ return ret;
+ }
+ break;
+
+ default:
+ ehca_err(cq->ib_cq.device, "bad resource type=%x cq_num=%x",
+ rsrc_type, cq->cq_number);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int ehca_mmap_qp(struct vm_area_struct *vma, struct ehca_qp *qp,
+ u32 rsrc_type)
+{
+ int ret;
+
+ switch (rsrc_type) {
+ case 0: /* galpa fw handle */
+ ehca_dbg(qp->ib_qp.device, "qp_num=%x fw", qp->ib_qp.qp_num);
+ ret = ehca_mmap_fw(vma, &qp->galpas, &qp->mm_count_galpa);
+ if (unlikely(ret)) {
+ ehca_err(qp->ib_qp.device,
+ "remap_pfn_range() failed ret=%i qp_num=%x",
+ ret, qp->ib_qp.qp_num);
+ return -ENOMEM;
+ }
+ break;
+
+ case 1: /* qp rqueue_addr */
+ ehca_dbg(qp->ib_qp.device, "qp_num=%x rq", qp->ib_qp.qp_num);
+ ret = ehca_mmap_queue(vma, &qp->ipz_rqueue,
+ &qp->mm_count_rqueue);
+ if (unlikely(ret)) {
+ ehca_err(qp->ib_qp.device,
+ "ehca_mmap_queue(rq) failed rc=%i qp_num=%x",
+ ret, qp->ib_qp.qp_num);
+ return ret;
+ }
+ break;
+
+ case 2: /* qp squeue_addr */
+ ehca_dbg(qp->ib_qp.device, "qp_num=%x sq", qp->ib_qp.qp_num);
+ ret = ehca_mmap_queue(vma, &qp->ipz_squeue,
+ &qp->mm_count_squeue);
+ if (unlikely(ret)) {
+ ehca_err(qp->ib_qp.device,
+ "ehca_mmap_queue(sq) failed rc=%i qp_num=%x",
+ ret, qp->ib_qp.qp_num);
+ return ret;
+ }
+ break;
+
+ default:
+ ehca_err(qp->ib_qp.device, "bad resource type=%x qp=num=%x",
+ rsrc_type, qp->ib_qp.qp_num);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
+{
+ u64 fileoffset = vma->vm_pgoff;
+ u32 idr_handle = fileoffset & 0x1FFFFFF;
+ u32 q_type = (fileoffset >> 27) & 0x1; /* CQ, QP,... */
+ u32 rsrc_type = (fileoffset >> 25) & 0x3; /* sq,rq,cmnd_window */
+ u32 ret;
+ struct ehca_cq *cq;
+ struct ehca_qp *qp;
+ struct ib_uobject *uobject;
+
+ switch (q_type) {
+ case 0: /* CQ */
+ read_lock(&ehca_cq_idr_lock);
+ cq = idr_find(&ehca_cq_idr, idr_handle);
+ read_unlock(&ehca_cq_idr_lock);
+
+ /* make sure this mmap really belongs to the authorized user */
+ if (!cq)
+ return -EINVAL;
+
+ if (!cq->ib_cq.uobject || cq->ib_cq.uobject->context != context)
+ return -EINVAL;
+
+ ret = ehca_mmap_cq(vma, cq, rsrc_type);
+ if (unlikely(ret)) {
+ ehca_err(cq->ib_cq.device,
+ "ehca_mmap_cq() failed rc=%i cq_num=%x",
+ ret, cq->cq_number);
+ return ret;
+ }
+ break;
+
+ case 1: /* QP */
+ read_lock(&ehca_qp_idr_lock);
+ qp = idr_find(&ehca_qp_idr, idr_handle);
+ read_unlock(&ehca_qp_idr_lock);
+
+ /* make sure this mmap really belongs to the authorized user */
+ if (!qp)
+ return -EINVAL;
+
+ uobject = IS_SRQ(qp) ? qp->ib_srq.uobject : qp->ib_qp.uobject;
+ if (!uobject || uobject->context != context)
+ return -EINVAL;
+
+ ret = ehca_mmap_qp(vma, qp, rsrc_type);
+ if (unlikely(ret)) {
+ ehca_err(qp->ib_qp.device,
+ "ehca_mmap_qp() failed rc=%i qp_num=%x",
+ ret, qp->ib_qp.qp_num);
+ return ret;
+ }
+ break;
+
+ default:
+ ehca_gen_err("bad queue type %x", q_type);
+ return -EINVAL;
+ }
+
+ return 0;
+}
diff --git a/drivers/infiniband/hw/ehca/hcp_if.c b/drivers/infiniband/hw/ehca/hcp_if.c
new file mode 100644
index 0000000..415d3a4
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/hcp_if.c
@@ -0,0 +1,945 @@
+/*
+ * IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ * Firmware Infiniband Interface code for POWER
+ *
+ * Authors: Christoph Raisch <raisch@de.ibm.com>
+ * Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ * Joachim Fenkes <fenkes@de.ibm.com>
+ * Gerd Bayer <gerd.bayer@de.ibm.com>
+ * Waleri Fomin <fomin@de.ibm.com>
+ *
+ * Copyright (c) 2005 IBM Corporation
+ *
+ * All rights reserved.
+ *
+ * This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ * BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <asm/hvcall.h>
+#include "ehca_tools.h"
+#include "hcp_if.h"
+#include "hcp_phyp.h"
+#include "hipz_fns.h"
+#include "ipz_pt_fn.h"
+
+#define H_ALL_RES_QP_ENHANCED_OPS EHCA_BMASK_IBM(9, 11)
+#define H_ALL_RES_QP_PTE_PIN EHCA_BMASK_IBM(12, 12)
+#define H_ALL_RES_QP_SERVICE_TYPE EHCA_BMASK_IBM(13, 15)
+#define H_ALL_RES_QP_STORAGE EHCA_BMASK_IBM(16, 17)
+#define H_ALL_RES_QP_LL_RQ_CQE_POSTING EHCA_BMASK_IBM(18, 18)
+#define H_ALL_RES_QP_LL_SQ_CQE_POSTING EHCA_BMASK_IBM(19, 21)
+#define H_ALL_RES_QP_SIGNALING_TYPE EHCA_BMASK_IBM(22, 23)
+#define H_ALL_RES_QP_UD_AV_LKEY_CTRL EHCA_BMASK_IBM(31, 31)
+#define H_ALL_RES_QP_SMALL_SQ_PAGE_SIZE EHCA_BMASK_IBM(32, 35)
+#define H_ALL_RES_QP_SMALL_RQ_PAGE_SIZE EHCA_BMASK_IBM(36, 39)
+#define H_ALL_RES_QP_RESOURCE_TYPE EHCA_BMASK_IBM(56, 63)
+
+#define H_ALL_RES_QP_MAX_OUTST_SEND_WR EHCA_BMASK_IBM(0, 15)
+#define H_ALL_RES_QP_MAX_OUTST_RECV_WR EHCA_BMASK_IBM(16, 31)
+#define H_ALL_RES_QP_MAX_SEND_SGE EHCA_BMASK_IBM(32, 39)
+#define H_ALL_RES_QP_MAX_RECV_SGE EHCA_BMASK_IBM(40, 47)
+
+#define H_ALL_RES_QP_UD_AV_LKEY EHCA_BMASK_IBM(32, 63)
+#define H_ALL_RES_QP_SRQ_QP_TOKEN EHCA_BMASK_IBM(0, 31)
+#define H_ALL_RES_QP_SRQ_QP_HANDLE EHCA_BMASK_IBM(0, 64)
+#define H_ALL_RES_QP_SRQ_LIMIT EHCA_BMASK_IBM(48, 63)
+#define H_ALL_RES_QP_SRQ_QPN EHCA_BMASK_IBM(40, 63)
+
+#define H_ALL_RES_QP_ACT_OUTST_SEND_WR EHCA_BMASK_IBM(16, 31)
+#define H_ALL_RES_QP_ACT_OUTST_RECV_WR EHCA_BMASK_IBM(48, 63)
+#define H_ALL_RES_QP_ACT_SEND_SGE EHCA_BMASK_IBM(8, 15)
+#define H_ALL_RES_QP_ACT_RECV_SGE EHCA_BMASK_IBM(24, 31)
+
+#define H_ALL_RES_QP_SQUEUE_SIZE_PAGES EHCA_BMASK_IBM(0, 31)
+#define H_ALL_RES_QP_RQUEUE_SIZE_PAGES EHCA_BMASK_IBM(32, 63)
+
+#define H_MP_INIT_TYPE EHCA_BMASK_IBM(44, 47)
+#define H_MP_SHUTDOWN EHCA_BMASK_IBM(48, 48)
+#define H_MP_RESET_QKEY_CTR EHCA_BMASK_IBM(49, 49)
+
+#define HCALL4_REGS_FORMAT "r4=%lx r5=%lx r6=%lx r7=%lx"
+#define HCALL7_REGS_FORMAT HCALL4_REGS_FORMAT " r8=%lx r9=%lx r10=%lx"
+#define HCALL9_REGS_FORMAT HCALL7_REGS_FORMAT " r11=%lx r12=%lx"
+
+static DEFINE_SPINLOCK(hcall_lock);
+
+static u32 get_longbusy_msecs(int longbusy_rc)
+{
+ switch (longbusy_rc) {
+ case H_LONG_BUSY_ORDER_1_MSEC:
+ return 1;
+ case H_LONG_BUSY_ORDER_10_MSEC:
+ return 10;
+ case H_LONG_BUSY_ORDER_100_MSEC:
+ return 100;
+ case H_LONG_BUSY_ORDER_1_SEC:
+ return 1000;
+ case H_LONG_BUSY_ORDER_10_SEC:
+ return 10000;
+ case H_LONG_BUSY_ORDER_100_SEC:
+ return 100000;
+ default:
+ return 1;
+ }
+}
+
+static long ehca_plpar_hcall_norets(unsigned long opcode,
+ unsigned long arg1,
+ unsigned long arg2,
+ unsigned long arg3,
+ unsigned long arg4,
+ unsigned long arg5,
+ unsigned long arg6,
+ unsigned long arg7)
+{
+ long ret;
+ int i, sleep_msecs;
+ unsigned long flags = 0;
+
+ if (unlikely(ehca_debug_level >= 2))
+ ehca_gen_dbg("opcode=%lx " HCALL7_REGS_FORMAT,
+ opcode, arg1, arg2, arg3, arg4, arg5, arg6, arg7);
+
+ for (i = 0; i < 5; i++) {
+ /* serialize hCalls to work around firmware issue */
+ if (ehca_lock_hcalls)
+ spin_lock_irqsave(&hcall_lock, flags);
+
+ ret = plpar_hcall_norets(opcode, arg1, arg2, arg3, arg4,
+ arg5, arg6, arg7);
+
+ if (ehca_lock_hcalls)
+ spin_unlock_irqrestore(&hcall_lock, flags);
+
+ if (H_IS_LONG_BUSY(ret)) {
+ sleep_msecs = get_longbusy_msecs(ret);
+ msleep_interruptible(sleep_msecs);
+ continue;
+ }
+
+ if (ret < H_SUCCESS)
+ ehca_gen_err("opcode=%lx ret=%li " HCALL7_REGS_FORMAT,
+ opcode, ret, arg1, arg2, arg3,
+ arg4, arg5, arg6, arg7);
+ else
+ if (unlikely(ehca_debug_level >= 2))
+ ehca_gen_dbg("opcode=%lx ret=%li", opcode, ret);
+
+ return ret;
+ }
+
+ return H_BUSY;
+}
+
+static long ehca_plpar_hcall9(unsigned long opcode,
+ unsigned long *outs, /* array of 9 outputs */
+ unsigned long arg1,
+ unsigned long arg2,
+ unsigned long arg3,
+ unsigned long arg4,
+ unsigned long arg5,
+ unsigned long arg6,
+ unsigned long arg7,
+ unsigned long arg8,
+ unsigned long arg9)
+{
+ long ret;
+ int i, sleep_msecs;
+ unsigned long flags = 0;
+
+ if (unlikely(ehca_debug_level >= 2))
+ ehca_gen_dbg("INPUT -- opcode=%lx " HCALL9_REGS_FORMAT, opcode,
+ arg1, arg2, arg3, arg4, arg5,
+ arg6, arg7, arg8, arg9);
+
+ for (i = 0; i < 5; i++) {
+ /* serialize hCalls to work around firmware issue */
+ if (ehca_lock_hcalls)
+ spin_lock_irqsave(&hcall_lock, flags);
+
+ ret = plpar_hcall9(opcode, outs,
+ arg1, arg2, arg3, arg4, arg5,
+ arg6, arg7, arg8, arg9);
+
+ if (ehca_lock_hcalls)
+ spin_unlock_irqrestore(&hcall_lock, flags);
+
+ if (H_IS_LONG_BUSY(ret)) {
+ sleep_msecs = get_longbusy_msecs(ret);
+ msleep_interruptible(sleep_msecs);
+ continue;
+ }
+
+ if (ret < H_SUCCESS) {
+ ehca_gen_err("INPUT -- opcode=%lx " HCALL9_REGS_FORMAT,
+ opcode, arg1, arg2, arg3, arg4, arg5,
+ arg6, arg7, arg8, arg9);
+ ehca_gen_err("OUTPUT -- ret=%li " HCALL9_REGS_FORMAT,
+ ret, outs[0], outs[1], outs[2], outs[3],
+ outs[4], outs[5], outs[6], outs[7],
+ outs[8]);
+ } else if (unlikely(ehca_debug_level >= 2))
+ ehca_gen_dbg("OUTPUT -- ret=%li " HCALL9_REGS_FORMAT,
+ ret, outs[0], outs[1], outs[2], outs[3],
+ outs[4], outs[5], outs[6], outs[7],
+ outs[8]);
+ return ret;
+ }
+
+ return H_BUSY;
+}
+
+u64 hipz_h_alloc_resource_eq(const struct ipz_adapter_handle adapter_handle,
+ struct ehca_pfeq *pfeq,
+ const u32 neq_control,
+ const u32 number_of_entries,
+ struct ipz_eq_handle *eq_handle,
+ u32 *act_nr_of_entries,
+ u32 *act_pages,
+ u32 *eq_ist)
+{
+ u64 ret;
+ u64 outs[PLPAR_HCALL9_BUFSIZE];
+ u64 allocate_controls;
+
+ /* resource type */
+ allocate_controls = 3ULL;
+
+ /* ISN is associated */
+ if (neq_control != 1)
+ allocate_controls = (1ULL << (63 - 7)) | allocate_controls;
+ else /* notification event queue */
+ allocate_controls = (1ULL << 63) | allocate_controls;
+
+ ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs,
+ adapter_handle.handle, /* r4 */
+ allocate_controls, /* r5 */
+ number_of_entries, /* r6 */
+ 0, 0, 0, 0, 0, 0);
+ eq_handle->handle = outs[0];
+ *act_nr_of_entries = (u32)outs[3];
+ *act_pages = (u32)outs[4];
+ *eq_ist = (u32)outs[5];
+
+ if (ret == H_NOT_ENOUGH_RESOURCES)
+ ehca_gen_err("Not enough resource - ret=%li ", ret);
+
+ return ret;
+}
+
+u64 hipz_h_reset_event(const struct ipz_adapter_handle adapter_handle,
+ struct ipz_eq_handle eq_handle,
+ const u64 event_mask)
+{
+ return ehca_plpar_hcall_norets(H_RESET_EVENTS,
+ adapter_handle.handle, /* r4 */
+ eq_handle.handle, /* r5 */
+ event_mask, /* r6 */
+ 0, 0, 0, 0);
+}
+
+u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle,
+ struct ehca_cq *cq,
+ struct ehca_alloc_cq_parms *param)
+{
+ u64 ret;
+ u64 outs[PLPAR_HCALL9_BUFSIZE];
+
+ ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs,
+ adapter_handle.handle, /* r4 */
+ 2, /* r5 */
+ param->eq_handle.handle, /* r6 */
+ cq->token, /* r7 */
+ param->nr_cqe, /* r8 */
+ 0, 0, 0, 0);
+ cq->ipz_cq_handle.handle = outs[0];
+ param->act_nr_of_entries = (u32)outs[3];
+ param->act_pages = (u32)outs[4];
+
+ if (ret == H_SUCCESS)
+ hcp_galpas_ctor(&cq->galpas, outs[5], outs[6]);
+
+ if (ret == H_NOT_ENOUGH_RESOURCES)
+ ehca_gen_err("Not enough resources. ret=%li", ret);
+
+ return ret;
+}
+
+u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle,
+ struct ehca_alloc_qp_parms *parms)
+{
+ u64 ret;
+ u64 allocate_controls, max_r10_reg, r11, r12;
+ u64 outs[PLPAR_HCALL9_BUFSIZE];
+
+ allocate_controls =
+ EHCA_BMASK_SET(H_ALL_RES_QP_ENHANCED_OPS, parms->ext_type)
+ | EHCA_BMASK_SET(H_ALL_RES_QP_PTE_PIN, 0)
+ | EHCA_BMASK_SET(H_ALL_RES_QP_SERVICE_TYPE, parms->servicetype)
+ | EHCA_BMASK_SET(H_ALL_RES_QP_SIGNALING_TYPE, parms->sigtype)
+ | EHCA_BMASK_SET(H_ALL_RES_QP_STORAGE, parms->qp_storage)
+ | EHCA_BMASK_SET(H_ALL_RES_QP_SMALL_SQ_PAGE_SIZE,
+ parms->squeue.page_size)
+ | EHCA_BMASK_SET(H_ALL_RES_QP_SMALL_RQ_PAGE_SIZE,
+ parms->rqueue.page_size)
+ | EHCA_BMASK_SET(H_ALL_RES_QP_LL_RQ_CQE_POSTING,
+ !!(parms->ll_comp_flags & LLQP_RECV_COMP))
+ | EHCA_BMASK_SET(H_ALL_RES_QP_LL_SQ_CQE_POSTING,
+ !!(parms->ll_comp_flags & LLQP_SEND_COMP))
+ | EHCA_BMASK_SET(H_ALL_RES_QP_UD_AV_LKEY_CTRL,
+ parms->ud_av_l_key_ctl)
+ | EHCA_BMASK_SET(H_ALL_RES_QP_RESOURCE_TYPE, 1);
+
+ max_r10_reg =
+ EHCA_BMASK_SET(H_ALL_RES_QP_MAX_OUTST_SEND_WR,
+ parms->squeue.max_wr + 1)
+ | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_OUTST_RECV_WR,
+ parms->rqueue.max_wr + 1)
+ | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_SEND_SGE,
+ parms->squeue.max_sge)
+ | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_RECV_SGE,
+ parms->rqueue.max_sge);
+
+ r11 = EHCA_BMASK_SET(H_ALL_RES_QP_SRQ_QP_TOKEN, parms->srq_token);
+
+ if (parms->ext_type == EQPT_SRQ)
+ r12 = EHCA_BMASK_SET(H_ALL_RES_QP_SRQ_LIMIT, parms->srq_limit);
+ else
+ r12 = EHCA_BMASK_SET(H_ALL_RES_QP_SRQ_QPN, parms->srq_qpn);
+
+ ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs,
+ adapter_handle.handle, /* r4 */
+ allocate_controls, /* r5 */
+ parms->send_cq_handle.handle,
+ parms->recv_cq_handle.handle,
+ parms->eq_handle.handle,
+ ((u64)parms->token << 32) | parms->pd.value,
+ max_r10_reg, r11, r12);
+
+ parms->qp_handle.handle = outs[0];
+ parms->real_qp_num = (u32)outs[1];
+ parms->squeue.act_nr_wqes =
+ (u16)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_OUTST_SEND_WR, outs[2]);
+ parms->rqueue.act_nr_wqes =
+ (u16)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_OUTST_RECV_WR, outs[2]);
+ parms->squeue.act_nr_sges =
+ (u8)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_SEND_SGE, outs[3]);
+ parms->rqueue.act_nr_sges =
+ (u8)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_RECV_SGE, outs[3]);
+ parms->squeue.queue_size =
+ (u32)EHCA_BMASK_GET(H_ALL_RES_QP_SQUEUE_SIZE_PAGES, outs[4]);
+ parms->rqueue.queue_size =
+ (u32)EHCA_BMASK_GET(H_ALL_RES_QP_RQUEUE_SIZE_PAGES, outs[4]);
+
+ if (ret == H_SUCCESS)
+ hcp_galpas_ctor(&parms->galpas, outs[6], outs[6]);
+
+ if (ret == H_NOT_ENOUGH_RESOURCES)
+ ehca_gen_err("Not enough resources. ret=%li", ret);
+
+ return ret;
+}
+
+u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle,
+ const u8 port_id,
+ struct hipz_query_port *query_port_response_block)
+{
+ u64 ret;
+ u64 r_cb = virt_to_abs(query_port_response_block);
+
+ if (r_cb & (EHCA_PAGESIZE-1)) {
+ ehca_gen_err("response block not page aligned");
+ return H_PARAMETER;
+ }
+
+ ret = ehca_plpar_hcall_norets(H_QUERY_PORT,
+ adapter_handle.handle, /* r4 */
+ port_id, /* r5 */
+ r_cb, /* r6 */
+ 0, 0, 0, 0);
+
+ if (ehca_debug_level >= 2)
+ ehca_dmp(query_port_response_block, 64, "response_block");
+
+ return ret;
+}
+
+u64 hipz_h_modify_port(const struct ipz_adapter_handle adapter_handle,
+ const u8 port_id, const u32 port_cap,
+ const u8 init_type, const int modify_mask)
+{
+ u64 port_attributes = port_cap;
+
+ if (modify_mask & IB_PORT_SHUTDOWN)
+ port_attributes |= EHCA_BMASK_SET(H_MP_SHUTDOWN, 1);
+ if (modify_mask & IB_PORT_INIT_TYPE)
+ port_attributes |= EHCA_BMASK_SET(H_MP_INIT_TYPE, init_type);
+ if (modify_mask & IB_PORT_RESET_QKEY_CNTR)
+ port_attributes |= EHCA_BMASK_SET(H_MP_RESET_QKEY_CTR, 1);
+
+ return ehca_plpar_hcall_norets(H_MODIFY_PORT,
+ adapter_handle.handle, /* r4 */
+ port_id, /* r5 */
+ port_attributes, /* r6 */
+ 0, 0, 0, 0);
+}
+
+u64 hipz_h_query_hca(const struct ipz_adapter_handle adapter_handle,
+ struct hipz_query_hca *query_hca_rblock)
+{
+ u64 r_cb = virt_to_abs(query_hca_rblock);
+
+ if (r_cb & (EHCA_PAGESIZE-1)) {
+ ehca_gen_err("response_block=%p not page aligned",
+ query_hca_rblock);
+ return H_PARAMETER;
+ }
+
+ return ehca_plpar_hcall_norets(H_QUERY_HCA,
+ adapter_handle.handle, /* r4 */
+ r_cb, /* r5 */
+ 0, 0, 0, 0, 0);
+}
+
+u64 hipz_h_register_rpage(const struct ipz_adapter_handle adapter_handle,
+ const u8 pagesize,
+ const u8 queue_type,
+ const u64 resource_handle,
+ const u64 logical_address_of_page,
+ u64 count)
+{
+ return ehca_plpar_hcall_norets(H_REGISTER_RPAGES,
+ adapter_handle.handle, /* r4 */
+ (u64)queue_type | ((u64)pagesize) << 8,
+ /* r5 */
+ resource_handle, /* r6 */
+ logical_address_of_page, /* r7 */
+ count, /* r8 */
+ 0, 0);
+}
+
+u64 hipz_h_register_rpage_eq(const struct ipz_adapter_handle adapter_handle,
+ const struct ipz_eq_handle eq_handle,
+ struct ehca_pfeq *pfeq,
+ const u8 pagesize,
+ const u8 queue_type,
+ const u64 logical_address_of_page,
+ const u64 count)
+{
+ if (count != 1) {
+ ehca_gen_err("Ppage counter=%lx", count);
+ return H_PARAMETER;
+ }
+ return hipz_h_register_rpage(adapter_handle,
+ pagesize,
+ queue_type,
+ eq_handle.handle,
+ logical_address_of_page, count);
+}
+
+u64 hipz_h_query_int_state(const struct ipz_adapter_handle adapter_handle,
+ u32 ist)
+{
+ u64 ret;
+ ret = ehca_plpar_hcall_norets(H_QUERY_INT_STATE,
+ adapter_handle.handle, /* r4 */
+ ist, /* r5 */
+ 0, 0, 0, 0, 0);
+
+ if (ret != H_SUCCESS && ret != H_BUSY)
+ ehca_gen_err("Could not query interrupt state.");
+
+ return ret;
+}
+
+u64 hipz_h_register_rpage_cq(const struct ipz_adapter_handle adapter_handle,
+ const struct ipz_cq_handle cq_handle,
+ struct ehca_pfcq *pfcq,
+ const u8 pagesize,
+ const u8 queue_type,
+ const u64 logical_address_of_page,
+ const u64 count,
+ const struct h_galpa gal)
+{
+ if (count != 1) {
+ ehca_gen_err("Page counter=%lx", count);
+ return H_PARAMETER;
+ }
+
+ return hipz_h_register_rpage(adapter_handle, pagesize, queue_type,
+ cq_handle.handle, logical_address_of_page,
+ count);
+}
+
+u64 hipz_h_register_rpage_qp(const struct ipz_adapter_handle adapter_handle,
+ const struct ipz_qp_handle qp_handle,
+ struct ehca_pfqp *pfqp,
+ const u8 pagesize,
+ const u8 queue_type,
+ const u64 logical_address_of_page,
+ const u64 count,
+ const struct h_galpa galpa)
+{
+ if (count > 1) {
+ ehca_gen_err("Page counter=%lx", count);
+ return H_PARAMETER;
+ }
+
+ return hipz_h_register_rpage(adapter_handle, pagesize, queue_type,
+ qp_handle.handle, logical_address_of_page,
+ count);
+}
+
+u64 hipz_h_disable_and_get_wqe(const struct ipz_adapter_handle adapter_handle,
+ const struct ipz_qp_handle qp_handle,
+ struct ehca_pfqp *pfqp,
+ void **log_addr_next_sq_wqe2processed,
+ void **log_addr_next_rq_wqe2processed,
+ int dis_and_get_function_code)
+{
+ u64 ret;
+ u64 outs[PLPAR_HCALL9_BUFSIZE];
+
+ ret = ehca_plpar_hcall9(H_DISABLE_AND_GETC, outs,
+ adapter_handle.handle, /* r4 */
+ dis_and_get_function_code, /* r5 */
+ qp_handle.handle, /* r6 */
+ 0, 0, 0, 0, 0, 0);
+ if (log_addr_next_sq_wqe2processed)
+ *log_addr_next_sq_wqe2processed = (void *)outs[0];
+ if (log_addr_next_rq_wqe2processed)
+ *log_addr_next_rq_wqe2processed = (void *)outs[1];
+
+ return ret;
+}
+
+u64 hipz_h_modify_qp(const struct ipz_adapter_handle adapter_handle,
+ const struct ipz_qp_handle qp_handle,
+ struct ehca_pfqp *pfqp,
+ const u64 update_mask,
+ struct hcp_modify_qp_control_block *mqpcb,
+ struct h_galpa gal)
+{
+ u64 ret;
+ u64 outs[PLPAR_HCALL9_BUFSIZE];
+ ret = ehca_plpar_hcall9(H_MODIFY_QP, outs,
+ adapter_handle.handle, /* r4 */
+ qp_handle.handle, /* r5 */
+ update_mask, /* r6 */
+ virt_to_abs(mqpcb), /* r7 */
+ 0, 0, 0, 0, 0);
+
+ if (ret == H_NOT_ENOUGH_RESOURCES)
+ ehca_gen_err("Insufficient resources ret=%li", ret);
+
+ return ret;
+}
+
+u64 hipz_h_query_qp(const struct ipz_adapter_handle adapter_handle,
+ const struct ipz_qp_handle qp_handle,
+ struct ehca_pfqp *pfqp,
+ struct hcp_modify_qp_control_block *qqpcb,
+ struct h_galpa gal)
+{
+ return ehca_plpar_hcall_norets(H_QUERY_QP,
+ adapter_handle.handle, /* r4 */
+ qp_handle.handle, /* r5 */
+ virt_to_abs(qqpcb), /* r6 */
+ 0, 0, 0, 0);
+}
+
+u64 hipz_h_destroy_qp(const struct ipz_adapter_handle adapter_handle,
+ struct ehca_qp *qp)
+{
+ u64 ret;
+ u64 outs[PLPAR_HCALL9_BUFSIZE];
+
+ ret = hcp_galpas_dtor(&qp->galpas);
+ if (ret) {
+ ehca_gen_err("Could not destruct qp->galpas");
+ return H_RESOURCE;
+ }
+ ret = ehca_plpar_hcall9(H_DISABLE_AND_GETC, outs,
+ adapter_handle.handle, /* r4 */
+ /* function code */
+ 1, /* r5 */
+ qp->ipz_qp_handle.handle, /* r6 */
+ 0, 0, 0, 0, 0, 0);
+ if (ret == H_HARDWARE)
+ ehca_gen_err("HCA not operational. ret=%li", ret);
+
+ ret = ehca_plpar_hcall_norets(H_FREE_RESOURCE,
+ adapter_handle.handle, /* r4 */
+ qp->ipz_qp_handle.handle, /* r5 */
+ 0, 0, 0, 0, 0);
+
+ if (ret == H_RESOURCE)
+ ehca_gen_err("Resource still in use. ret=%li", ret);
+
+ return ret;
+}
+
+u64 hipz_h_define_aqp0(const struct ipz_adapter_handle adapter_handle,
+ const struct ipz_qp_handle qp_handle,
+ struct h_galpa gal,
+ u32 port)
+{
+ return ehca_plpar_hcall_norets(H_DEFINE_AQP0,
+ adapter_handle.handle, /* r4 */
+ qp_handle.handle, /* r5 */
+ port, /* r6 */
+ 0, 0, 0, 0);
+}
+
+u64 hipz_h_define_aqp1(const struct ipz_adapter_handle adapter_handle,
+ const struct ipz_qp_handle qp_handle,
+ struct h_galpa gal,
+ u32 port, u32 * pma_qp_nr,
+ u32 * bma_qp_nr)
+{
+ u64 ret;
+ u64 outs[PLPAR_HCALL9_BUFSIZE];
+
+ ret = ehca_plpar_hcall9(H_DEFINE_AQP1, outs,
+ adapter_handle.handle, /* r4 */
+ qp_handle.handle, /* r5 */
+ port, /* r6 */
+ 0, 0, 0, 0, 0, 0);
+ *pma_qp_nr = (u32)outs[0];
+ *bma_qp_nr = (u32)outs[1];
+
+ if (ret == H_ALIAS_EXIST)
+ ehca_gen_err("AQP1 already exists. ret=%li", ret);
+
+ return ret;
+}
+
+u64 hipz_h_attach_mcqp(const struct ipz_adapter_handle adapter_handle,
+ const struct ipz_qp_handle qp_handle,
+ struct h_galpa gal,
+ u16 mcg_dlid,
+ u64 subnet_prefix, u64 interface_id)
+{
+ u64 ret;
+
+ ret = ehca_plpar_hcall_norets(H_ATTACH_MCQP,
+ adapter_handle.handle, /* r4 */
+ qp_handle.handle, /* r5 */
+ mcg_dlid, /* r6 */
+ interface_id, /* r7 */
+ subnet_prefix, /* r8 */
+ 0, 0);
+
+ if (ret == H_NOT_ENOUGH_RESOURCES)
+ ehca_gen_err("Not enough resources. ret=%li", ret);
+
+ return ret;
+}
+
+u64 hipz_h_detach_mcqp(const struct ipz_adapter_handle adapter_handle,
+ const struct ipz_qp_handle qp_handle,
+ struct h_galpa gal,
+ u16 mcg_dlid,
+ u64 subnet_prefix, u64 interface_id)
+{
+ return ehca_plpar_hcall_norets(H_DETACH_MCQP,
+ adapter_handle.handle, /* r4 */
+ qp_handle.handle, /* r5 */
+ mcg_dlid, /* r6 */
+ interface_id, /* r7 */
+ subnet_prefix, /* r8 */
+ 0, 0);
+}
+
+u64 hipz_h_destroy_cq(const struct ipz_adapter_handle adapter_handle,
+ struct ehca_cq *cq,
+ u8 force_flag)
+{
+ u64 ret;
+
+ ret = hcp_galpas_dtor(&cq->galpas);
+ if (ret) {
+ ehca_gen_err("Could not destruct cp->galpas");
+ return H_RESOURCE;
+ }
+
+ ret = ehca_plpar_hcall_norets(H_FREE_RESOURCE,
+ adapter_handle.handle, /* r4 */
+ cq->ipz_cq_handle.handle, /* r5 */
+ force_flag != 0 ? 1L : 0L, /* r6 */
+ 0, 0, 0, 0);
+
+ if (ret == H_RESOURCE)
+ ehca_gen_err("H_FREE_RESOURCE failed ret=%li ", ret);
+
+ return ret;
+}
+
+u64 hipz_h_destroy_eq(const struct ipz_adapter_handle adapter_handle,
+ struct ehca_eq *eq)
+{
+ u64 ret;
+
+ ret = hcp_galpas_dtor(&eq->galpas);
+ if (ret) {
+ ehca_gen_err("Could not destruct eq->galpas");
+ return H_RESOURCE;
+ }
+
+ ret = ehca_plpar_hcall_norets(H_FREE_RESOURCE,
+ adapter_handle.handle, /* r4 */
+ eq->ipz_eq_handle.handle, /* r5 */
+ 0, 0, 0, 0, 0);
+
+ if (ret == H_RESOURCE)
+ ehca_gen_err("Resource in use. ret=%li ", ret);
+
+ return ret;
+}
+
+u64 hipz_h_alloc_resource_mr(const struct ipz_adapter_handle adapter_handle,
+ const struct ehca_mr *mr,
+ const u64 vaddr,
+ const u64 length,
+ const u32 access_ctrl,
+ const struct ipz_pd pd,
+ struct ehca_mr_hipzout_parms *outparms)
+{
+ u64 ret;
+ u64 outs[PLPAR_HCALL9_BUFSIZE];
+
+ ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs,
+ adapter_handle.handle, /* r4 */
+ 5, /* r5 */
+ vaddr, /* r6 */
+ length, /* r7 */
+ (((u64)access_ctrl) << 32ULL), /* r8 */
+ pd.value, /* r9 */
+ 0, 0, 0);
+ outparms->handle.handle = outs[0];
+ outparms->lkey = (u32)outs[2];
+ outparms->rkey = (u32)outs[3];
+
+ return ret;
+}
+
+u64 hipz_h_register_rpage_mr(const struct ipz_adapter_handle adapter_handle,
+ const struct ehca_mr *mr,
+ const u8 pagesize,
+ const u8 queue_type,
+ const u64 logical_address_of_page,
+ const u64 count)
+{
+ u64 ret;
+
+ if (unlikely(ehca_debug_level >= 3)) {
+ if (count > 1) {
+ u64 *kpage;
+ int i;
+ kpage = (u64 *)abs_to_virt(logical_address_of_page);
+ for (i = 0; i < count; i++)
+ ehca_gen_dbg("kpage[%d]=%p",
+ i, (void *)kpage[i]);
+ } else
+ ehca_gen_dbg("kpage=%p",
+ (void *)logical_address_of_page);
+ }
+
+ if ((count > 1) && (logical_address_of_page & (EHCA_PAGESIZE-1))) {
+ ehca_gen_err("logical_address_of_page not on a 4k boundary "
+ "adapter_handle=%lx mr=%p mr_handle=%lx "
+ "pagesize=%x queue_type=%x "
+ "logical_address_of_page=%lx count=%lx",
+ adapter_handle.handle, mr,
+ mr->ipz_mr_handle.handle, pagesize, queue_type,
+ logical_address_of_page, count);
+ ret = H_PARAMETER;
+ } else
+ ret = hipz_h_register_rpage(adapter_handle, pagesize,
+ queue_type,
+ mr->ipz_mr_handle.handle,
+ logical_address_of_page, count);
+ return ret;
+}
+
+u64 hipz_h_query_mr(const struct ipz_adapter_handle adapter_handle,
+ const struct ehca_mr *mr,
+ struct ehca_mr_hipzout_parms *outparms)
+{
+ u64 ret;
+ u64 outs[PLPAR_HCALL9_BUFSIZE];
+
+ ret = ehca_plpar_hcall9(H_QUERY_MR, outs,
+ adapter_handle.handle, /* r4 */
+ mr->ipz_mr_handle.handle, /* r5 */
+ 0, 0, 0, 0, 0, 0, 0);
+ outparms->len = outs[0];
+ outparms->vaddr = outs[1];
+ outparms->acl = outs[4] >> 32;
+ outparms->lkey = (u32)(outs[5] >> 32);
+ outparms->rkey = (u32)(outs[5] & (0xffffffff));
+
+ return ret;
+}
+
+u64 hipz_h_free_resource_mr(const struct ipz_adapter_handle adapter_handle,
+ const struct ehca_mr *mr)
+{
+ return ehca_plpar_hcall_norets(H_FREE_RESOURCE,
+ adapter_handle.handle, /* r4 */
+ mr->ipz_mr_handle.handle, /* r5 */
+ 0, 0, 0, 0, 0);
+}
+
+u64 hipz_h_reregister_pmr(const struct ipz_adapter_handle adapter_handle,
+ const struct ehca_mr *mr,
+ const u64 vaddr_in,
+ const u64 length,
+ const u32 access_ctrl,
+ const struct ipz_pd pd,
+ const u64 mr_addr_cb,
+ struct ehca_mr_hipzout_parms *outparms)
+{
+ u64 ret;
+ u64 outs[PLPAR_HCALL9_BUFSIZE];
+
+ ret = ehca_plpar_hcall9(H_REREGISTER_PMR, outs,
+ adapter_handle.handle, /* r4 */
+ mr->ipz_mr_handle.handle, /* r5 */
+ vaddr_in, /* r6 */
+ length, /* r7 */
+ /* r8 */
+ ((((u64)access_ctrl) << 32ULL) | pd.value),
+ mr_addr_cb, /* r9 */
+ 0, 0, 0);
+ outparms->vaddr = outs[1];
+ outparms->lkey = (u32)outs[2];
+ outparms->rkey = (u32)outs[3];
+
+ return ret;
+}
+
+u64 hipz_h_register_smr(const struct ipz_adapter_handle adapter_handle,
+ const struct ehca_mr *mr,
+ const struct ehca_mr *orig_mr,
+ const u64 vaddr_in,
+ const u32 access_ctrl,
+ const struct ipz_pd pd,
+ struct ehca_mr_hipzout_parms *outparms)
+{
+ u64 ret;
+ u64 outs[PLPAR_HCALL9_BUFSIZE];
+
+ ret = ehca_plpar_hcall9(H_REGISTER_SMR, outs,
+ adapter_handle.handle, /* r4 */
+ orig_mr->ipz_mr_handle.handle, /* r5 */
+ vaddr_in, /* r6 */
+ (((u64)access_ctrl) << 32ULL), /* r7 */
+ pd.value, /* r8 */
+ 0, 0, 0, 0);
+ outparms->handle.handle = outs[0];
+ outparms->lkey = (u32)outs[2];
+ outparms->rkey = (u32)outs[3];
+
+ return ret;
+}
+
+u64 hipz_h_alloc_resource_mw(const struct ipz_adapter_handle adapter_handle,
+ const struct ehca_mw *mw,
+ const struct ipz_pd pd,
+ struct ehca_mw_hipzout_parms *outparms)
+{
+ u64 ret;
+ u64 outs[PLPAR_HCALL9_BUFSIZE];
+
+ ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs,
+ adapter_handle.handle, /* r4 */
+ 6, /* r5 */
+ pd.value, /* r6 */
+ 0, 0, 0, 0, 0, 0);
+ outparms->handle.handle = outs[0];
+ outparms->rkey = (u32)outs[3];
+
+ return ret;
+}
+
+u64 hipz_h_query_mw(const struct ipz_adapter_handle adapter_handle,
+ const struct ehca_mw *mw,
+ struct ehca_mw_hipzout_parms *outparms)
+{
+ u64 ret;
+ u64 outs[PLPAR_HCALL9_BUFSIZE];
+
+ ret = ehca_plpar_hcall9(H_QUERY_MW, outs,
+ adapter_handle.handle, /* r4 */
+ mw->ipz_mw_handle.handle, /* r5 */
+ 0, 0, 0, 0, 0, 0, 0);
+ outparms->rkey = (u32)outs[3];
+
+ return ret;
+}
+
+u64 hipz_h_free_resource_mw(const struct ipz_adapter_handle adapter_handle,
+ const struct ehca_mw *mw)
+{
+ return ehca_plpar_hcall_norets(H_FREE_RESOURCE,
+ adapter_handle.handle, /* r4 */
+ mw->ipz_mw_handle.handle, /* r5 */
+ 0, 0, 0, 0, 0);
+}
+
+u64 hipz_h_error_data(const struct ipz_adapter_handle adapter_handle,
+ const u64 ressource_handle,
+ void *rblock,
+ unsigned long *byte_count)
+{
+ u64 r_cb = virt_to_abs(rblock);
+
+ if (r_cb & (EHCA_PAGESIZE-1)) {
+ ehca_gen_err("rblock not page aligned.");
+ return H_PARAMETER;
+ }
+
+ return ehca_plpar_hcall_norets(H_ERROR_DATA,
+ adapter_handle.handle,
+ ressource_handle,
+ r_cb,
+ 0, 0, 0, 0);
+}
+
+u64 hipz_h_eoi(int irq)
+{
+ unsigned long xirr;
+
+ iosync();
+ xirr = (0xffULL << 24) | irq;
+
+ return plpar_hcall_norets(H_EOI, xirr);
+}
diff --git a/drivers/infiniband/hw/ehca/hcp_if.h b/drivers/infiniband/hw/ehca/hcp_if.h
new file mode 100644
index 0000000..2c3c6e0
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/hcp_if.h
@@ -0,0 +1,265 @@
+/*
+ * IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ * Firmware Infiniband Interface code for POWER
+ *
+ * Authors: Christoph Raisch <raisch@de.ibm.com>
+ * Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ * Gerd Bayer <gerd.bayer@de.ibm.com>
+ * Waleri Fomin <fomin@de.ibm.com>
+ *
+ * Copyright (c) 2005 IBM Corporation
+ *
+ * All rights reserved.
+ *
+ * This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ * BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __HCP_IF_H__
+#define __HCP_IF_H__
+
+#include "ehca_classes.h"
+#include "ehca_tools.h"
+#include "hipz_hw.h"
+
+/*
+ * hipz_h_alloc_resource_eq allocates EQ resources in HW and FW, initalize
+ * resources, create the empty EQPT (ring).
+ */
+u64 hipz_h_alloc_resource_eq(const struct ipz_adapter_handle adapter_handle,
+ struct ehca_pfeq *pfeq,
+ const u32 neq_control,
+ const u32 number_of_entries,
+ struct ipz_eq_handle *eq_handle,
+ u32 * act_nr_of_entries,
+ u32 * act_pages,
+ u32 * eq_ist);
+
+u64 hipz_h_reset_event(const struct ipz_adapter_handle adapter_handle,
+ struct ipz_eq_handle eq_handle,
+ const u64 event_mask);
+/*
+ * hipz_h_allocate_resource_cq allocates CQ resources in HW and FW, initialize
+ * resources, create the empty CQPT (ring).
+ */
+u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle,
+ struct ehca_cq *cq,
+ struct ehca_alloc_cq_parms *param);
+
+
+/*
+ * hipz_h_alloc_resource_qp allocates QP resources in HW and FW,
+ * initialize resources, create empty QPPTs (2 rings).
+ */
+u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle,
+ struct ehca_alloc_qp_parms *parms);
+
+u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle,
+ const u8 port_id,
+ struct hipz_query_port *query_port_response_block);
+
+u64 hipz_h_modify_port(const struct ipz_adapter_handle adapter_handle,
+ const u8 port_id, const u32 port_cap,
+ const u8 init_type, const int modify_mask);
+
+u64 hipz_h_query_hca(const struct ipz_adapter_handle adapter_handle,
+ struct hipz_query_hca *query_hca_rblock);
+
+/*
+ * hipz_h_register_rpage internal function in hcp_if.h for all
+ * hcp_H_REGISTER_RPAGE calls.
+ */
+u64 hipz_h_register_rpage(const struct ipz_adapter_handle adapter_handle,
+ const u8 pagesize,
+ const u8 queue_type,
+ const u64 resource_handle,
+ const u64 logical_address_of_page,
+ u64 count);
+
+u64 hipz_h_register_rpage_eq(const struct ipz_adapter_handle adapter_handle,
+ const struct ipz_eq_handle eq_handle,
+ struct ehca_pfeq *pfeq,
+ const u8 pagesize,
+ const u8 queue_type,
+ const u64 logical_address_of_page,
+ const u64 count);
+
+u64 hipz_h_query_int_state(const struct ipz_adapter_handle
+ hcp_adapter_handle,
+ u32 ist);
+
+u64 hipz_h_register_rpage_cq(const struct ipz_adapter_handle adapter_handle,
+ const struct ipz_cq_handle cq_handle,
+ struct ehca_pfcq *pfcq,
+ const u8 pagesize,
+ const u8 queue_type,
+ const u64 logical_address_of_page,
+ const u64 count,
+ const struct h_galpa gal);
+
+u64 hipz_h_register_rpage_qp(const struct ipz_adapter_handle adapter_handle,
+ const struct ipz_qp_handle qp_handle,
+ struct ehca_pfqp *pfqp,
+ const u8 pagesize,
+ const u8 queue_type,
+ const u64 logical_address_of_page,
+ const u64 count,
+ const struct h_galpa galpa);
+
+u64 hipz_h_disable_and_get_wqe(const struct ipz_adapter_handle adapter_handle,
+ const struct ipz_qp_handle qp_handle,
+ struct ehca_pfqp *pfqp,
+ void **log_addr_next_sq_wqe_tb_processed,
+ void **log_addr_next_rq_wqe_tb_processed,
+ int dis_and_get_function_code);
+enum hcall_sigt {
+ HCALL_SIGT_NO_CQE = 0,
+ HCALL_SIGT_BY_WQE = 1,
+ HCALL_SIGT_EVERY = 2
+};
+
+u64 hipz_h_modify_qp(const struct ipz_adapter_handle adapter_handle,
+ const struct ipz_qp_handle qp_handle,
+ struct ehca_pfqp *pfqp,
+ const u64 update_mask,
+ struct hcp_modify_qp_control_block *mqpcb,
+ struct h_galpa gal);
+
+u64 hipz_h_query_qp(const struct ipz_adapter_handle adapter_handle,
+ const struct ipz_qp_handle qp_handle,
+ struct ehca_pfqp *pfqp,
+ struct hcp_modify_qp_control_block *qqpcb,
+ struct h_galpa gal);
+
+u64 hipz_h_destroy_qp(const struct ipz_adapter_handle adapter_handle,
+ struct ehca_qp *qp);
+
+u64 hipz_h_define_aqp0(const struct ipz_adapter_handle adapter_handle,
+ const struct ipz_qp_handle qp_handle,
+ struct h_galpa gal,
+ u32 port);
+
+u64 hipz_h_define_aqp1(const struct ipz_adapter_handle adapter_handle,
+ const struct ipz_qp_handle qp_handle,
+ struct h_galpa gal,
+ u32 port, u32 * pma_qp_nr,
+ u32 * bma_qp_nr);
+
+u64 hipz_h_attach_mcqp(const struct ipz_adapter_handle adapter_handle,
+ const struct ipz_qp_handle qp_handle,
+ struct h_galpa gal,
+ u16 mcg_dlid,
+ u64 subnet_prefix, u64 interface_id);
+
+u64 hipz_h_detach_mcqp(const struct ipz_adapter_handle adapter_handle,
+ const struct ipz_qp_handle qp_handle,
+ struct h_galpa gal,
+ u16 mcg_dlid,
+ u64 subnet_prefix, u64 interface_id);
+
+u64 hipz_h_destroy_cq(const struct ipz_adapter_handle adapter_handle,
+ struct ehca_cq *cq,
+ u8 force_flag);
+
+u64 hipz_h_destroy_eq(const struct ipz_adapter_handle adapter_handle,
+ struct ehca_eq *eq);
+
+/*
+ * hipz_h_alloc_resource_mr allocates MR resources in HW and FW, initialize
+ * resources.
+ */
+u64 hipz_h_alloc_resource_mr(const struct ipz_adapter_handle adapter_handle,
+ const struct ehca_mr *mr,
+ const u64 vaddr,
+ const u64 length,
+ const u32 access_ctrl,
+ const struct ipz_pd pd,
+ struct ehca_mr_hipzout_parms *outparms);
+
+/* hipz_h_register_rpage_mr registers MR resource pages in HW and FW */
+u64 hipz_h_register_rpage_mr(const struct ipz_adapter_handle adapter_handle,
+ const struct ehca_mr *mr,
+ const u8 pagesize,
+ const u8 queue_type,
+ const u64 logical_address_of_page,
+ const u64 count);
+
+/* hipz_h_query_mr queries MR in HW and FW */
+u64 hipz_h_query_mr(const struct ipz_adapter_handle adapter_handle,
+ const struct ehca_mr *mr,
+ struct ehca_mr_hipzout_parms *outparms);
+
+/* hipz_h_free_resource_mr frees MR resources in HW and FW */
+u64 hipz_h_free_resource_mr(const struct ipz_adapter_handle adapter_handle,
+ const struct ehca_mr *mr);
+
+/* hipz_h_reregister_pmr reregisters MR in HW and FW */
+u64 hipz_h_reregister_pmr(const struct ipz_adapter_handle adapter_handle,
+ const struct ehca_mr *mr,
+ const u64 vaddr_in,
+ const u64 length,
+ const u32 access_ctrl,
+ const struct ipz_pd pd,
+ const u64 mr_addr_cb,
+ struct ehca_mr_hipzout_parms *outparms);
+
+/* hipz_h_register_smr register shared MR in HW and FW */
+u64 hipz_h_register_smr(const struct ipz_adapter_handle adapter_handle,
+ const struct ehca_mr *mr,
+ const struct ehca_mr *orig_mr,
+ const u64 vaddr_in,
+ const u32 access_ctrl,
+ const struct ipz_pd pd,
+ struct ehca_mr_hipzout_parms *outparms);
+
+/*
+ * hipz_h_alloc_resource_mw allocates MW resources in HW and FW, initialize
+ * resources.
+ */
+u64 hipz_h_alloc_resource_mw(const struct ipz_adapter_handle adapter_handle,
+ const struct ehca_mw *mw,
+ const struct ipz_pd pd,
+ struct ehca_mw_hipzout_parms *outparms);
+
+/* hipz_h_query_mw queries MW in HW and FW */
+u64 hipz_h_query_mw(const struct ipz_adapter_handle adapter_handle,
+ const struct ehca_mw *mw,
+ struct ehca_mw_hipzout_parms *outparms);
+
+/* hipz_h_free_resource_mw frees MW resources in HW and FW */
+u64 hipz_h_free_resource_mw(const struct ipz_adapter_handle adapter_handle,
+ const struct ehca_mw *mw);
+
+u64 hipz_h_error_data(const struct ipz_adapter_handle adapter_handle,
+ const u64 ressource_handle,
+ void *rblock,
+ unsigned long *byte_count);
+u64 hipz_h_eoi(int irq);
+
+#endif /* __HCP_IF_H__ */
diff --git a/drivers/infiniband/hw/ehca/hcp_phyp.c b/drivers/infiniband/hw/ehca/hcp_phyp.c
new file mode 100644
index 0000000..2148210
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/hcp_phyp.c
@@ -0,0 +1,80 @@
+/*
+ * IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ * load store abstraction for ehca register access with tracing
+ *
+ * Authors: Christoph Raisch <raisch@de.ibm.com>
+ * Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *
+ * Copyright (c) 2005 IBM Corporation
+ *
+ * All rights reserved.
+ *
+ * This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ * BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "ehca_classes.h"
+#include "hipz_hw.h"
+
+int hcall_map_page(u64 physaddr, u64 *mapaddr)
+{
+ *mapaddr = (u64)(ioremap(physaddr, EHCA_PAGESIZE));
+ return 0;
+}
+
+int hcall_unmap_page(u64 mapaddr)
+{
+ iounmap((volatile void __iomem *) mapaddr);
+ return 0;
+}
+
+int hcp_galpas_ctor(struct h_galpas *galpas,
+ u64 paddr_kernel, u64 paddr_user)
+{
+ int ret = hcall_map_page(paddr_kernel, &galpas->kernel.fw_handle);
+ if (ret)
+ return ret;
+
+ galpas->user.fw_handle = paddr_user;
+
+ return 0;
+}
+
+int hcp_galpas_dtor(struct h_galpas *galpas)
+{
+ if (galpas->kernel.fw_handle) {
+ int ret = hcall_unmap_page(galpas->kernel.fw_handle);
+ if (ret)
+ return ret;
+ }
+
+ galpas->user.fw_handle = galpas->kernel.fw_handle = 0;
+
+ return 0;
+}
diff --git a/drivers/infiniband/hw/ehca/hcp_phyp.h b/drivers/infiniband/hw/ehca/hcp_phyp.h
new file mode 100644
index 0000000..5305c2a
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/hcp_phyp.h
@@ -0,0 +1,90 @@
+/*
+ * IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ * Firmware calls
+ *
+ * Authors: Christoph Raisch <raisch@de.ibm.com>
+ * Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ * Waleri Fomin <fomin@de.ibm.com>
+ * Gerd Bayer <gerd.bayer@de.ibm.com>
+ *
+ * Copyright (c) 2005 IBM Corporation
+ *
+ * All rights reserved.
+ *
+ * This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ * BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __HCP_PHYP_H__
+#define __HCP_PHYP_H__
+
+
+/*
+ * eHCA page (mapped into memory)
+ * resource to access eHCA register pages in CPU address space
+*/
+struct h_galpa {
+ u64 fw_handle;
+ /* for pSeries this is a 64bit memory address where
+ I/O memory is mapped into CPU address space (kv) */
+};
+
+/*
+ * resource to access eHCA address space registers, all types
+ */
+struct h_galpas {
+ u32 pid; /*PID of userspace galpa checking */
+ struct h_galpa user; /* user space accessible resource,
+ set to 0 if unused */
+ struct h_galpa kernel; /* kernel space accessible resource,
+ set to 0 if unused */
+};
+
+static inline u64 hipz_galpa_load(struct h_galpa galpa, u32 offset)
+{
+ u64 addr = galpa.fw_handle + offset;
+ return *(volatile u64 __force *)addr;
+}
+
+static inline void hipz_galpa_store(struct h_galpa galpa, u32 offset, u64 value)
+{
+ u64 addr = galpa.fw_handle + offset;
+ *(volatile u64 __force *)addr = value;
+}
+
+int hcp_galpas_ctor(struct h_galpas *galpas,
+ u64 paddr_kernel, u64 paddr_user);
+
+int hcp_galpas_dtor(struct h_galpas *galpas);
+
+int hcall_map_page(u64 physaddr, u64 * mapaddr);
+
+int hcall_unmap_page(u64 mapaddr);
+
+#endif
diff --git a/drivers/infiniband/hw/ehca/hipz_fns.h b/drivers/infiniband/hw/ehca/hipz_fns.h
new file mode 100644
index 0000000..9dac93d
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/hipz_fns.h
@@ -0,0 +1,68 @@
+/*
+ * IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ * HW abstraction register functions
+ *
+ * Authors: Christoph Raisch <raisch@de.ibm.com>
+ * Reinhard Ernst <rernst@de.ibm.com>
+ *
+ * Copyright (c) 2005 IBM Corporation
+ *
+ * All rights reserved.
+ *
+ * This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ * BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __HIPZ_FNS_H__
+#define __HIPZ_FNS_H__
+
+#include "ehca_classes.h"
+#include "hipz_hw.h"
+
+#include "hipz_fns_core.h"
+
+#define hipz_galpa_store_eq(gal, offset, value) \
+ hipz_galpa_store(gal, EQTEMM_OFFSET(offset), value)
+
+#define hipz_galpa_load_eq(gal, offset) \
+ hipz_galpa_load(gal, EQTEMM_OFFSET(offset))
+
+#define hipz_galpa_store_qped(gal, offset, value) \
+ hipz_galpa_store(gal, QPEDMM_OFFSET(offset), value)
+
+#define hipz_galpa_load_qped(gal, offset) \
+ hipz_galpa_load(gal, QPEDMM_OFFSET(offset))
+
+#define hipz_galpa_store_mrmw(gal, offset, value) \
+ hipz_galpa_store(gal, MRMWMM_OFFSET(offset), value)
+
+#define hipz_galpa_load_mrmw(gal, offset) \
+ hipz_galpa_load(gal, MRMWMM_OFFSET(offset))
+
+#endif
diff --git a/drivers/infiniband/hw/ehca/hipz_fns_core.h b/drivers/infiniband/hw/ehca/hipz_fns_core.h
new file mode 100644
index 0000000..868735f
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/hipz_fns_core.h
@@ -0,0 +1,100 @@
+/*
+ * IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ * HW abstraction register functions
+ *
+ * Authors: Christoph Raisch <raisch@de.ibm.com>
+ * Heiko J Schick <schickhj@de.ibm.com>
+ * Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ * Reinhard Ernst <rernst@de.ibm.com>
+ *
+ * Copyright (c) 2005 IBM Corporation
+ *
+ * All rights reserved.
+ *
+ * This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ * BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __HIPZ_FNS_CORE_H__
+#define __HIPZ_FNS_CORE_H__
+
+#include "hcp_phyp.h"
+#include "hipz_hw.h"
+
+#define hipz_galpa_store_cq(gal, offset, value) \
+ hipz_galpa_store(gal, CQTEMM_OFFSET(offset), value)
+
+#define hipz_galpa_load_cq(gal, offset) \
+ hipz_galpa_load(gal, CQTEMM_OFFSET(offset))
+
+#define hipz_galpa_store_qp(gal, offset, value) \
+ hipz_galpa_store(gal, QPTEMM_OFFSET(offset), value)
+#define hipz_galpa_load_qp(gal, offset) \
+ hipz_galpa_load(gal, QPTEMM_OFFSET(offset))
+
+static inline void hipz_update_sqa(struct ehca_qp *qp, u16 nr_wqes)
+{
+ /* ringing doorbell :-) */
+ hipz_galpa_store_qp(qp->galpas.kernel, qpx_sqa,
+ EHCA_BMASK_SET(QPX_SQADDER, nr_wqes));
+}
+
+static inline void hipz_update_rqa(struct ehca_qp *qp, u16 nr_wqes)
+{
+ /* ringing doorbell :-) */
+ hipz_galpa_store_qp(qp->galpas.kernel, qpx_rqa,
+ EHCA_BMASK_SET(QPX_RQADDER, nr_wqes));
+}
+
+static inline void hipz_update_feca(struct ehca_cq *cq, u32 nr_cqes)
+{
+ hipz_galpa_store_cq(cq->galpas.kernel, cqx_feca,
+ EHCA_BMASK_SET(CQX_FECADDER, nr_cqes));
+}
+
+static inline void hipz_set_cqx_n0(struct ehca_cq *cq, u32 value)
+{
+ u64 cqx_n0_reg;
+
+ hipz_galpa_store_cq(cq->galpas.kernel, cqx_n0,
+ EHCA_BMASK_SET(CQX_N0_GENERATE_SOLICITED_COMP_EVENT,
+ value));
+ cqx_n0_reg = hipz_galpa_load_cq(cq->galpas.kernel, cqx_n0);
+}
+
+static inline void hipz_set_cqx_n1(struct ehca_cq *cq, u32 value)
+{
+ u64 cqx_n1_reg;
+
+ hipz_galpa_store_cq(cq->galpas.kernel, cqx_n1,
+ EHCA_BMASK_SET(CQX_N1_GENERATE_COMP_EVENT, value));
+ cqx_n1_reg = hipz_galpa_load_cq(cq->galpas.kernel, cqx_n1);
+}
+
+#endif /* __HIPZ_FNC_CORE_H__ */
diff --git a/drivers/infiniband/hw/ehca/hipz_hw.h b/drivers/infiniband/hw/ehca/hipz_hw.h
new file mode 100644
index 0000000..bf996c7
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/hipz_hw.h
@@ -0,0 +1,414 @@
+/*
+ * IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ * eHCA register definitions
+ *
+ * Authors: Waleri Fomin <fomin@de.ibm.com>
+ * Christoph Raisch <raisch@de.ibm.com>
+ * Reinhard Ernst <rernst@de.ibm.com>
+ *
+ * Copyright (c) 2005 IBM Corporation
+ *
+ * All rights reserved.
+ *
+ * This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ * BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __HIPZ_HW_H__
+#define __HIPZ_HW_H__
+
+#include "ehca_tools.h"
+
+#define EHCA_MAX_MTU 4
+
+/* QP Table Entry Memory Map */
+struct hipz_qptemm {
+ u64 qpx_hcr;
+ u64 qpx_c;
+ u64 qpx_herr;
+ u64 qpx_aer;
+/* 0x20*/
+ u64 qpx_sqa;
+ u64 qpx_sqc;
+ u64 qpx_rqa;
+ u64 qpx_rqc;
+/* 0x40*/
+ u64 qpx_st;
+ u64 qpx_pmstate;
+ u64 qpx_pmfa;
+ u64 qpx_pkey;
+/* 0x60*/
+ u64 qpx_pkeya;
+ u64 qpx_pkeyb;
+ u64 qpx_pkeyc;
+ u64 qpx_pkeyd;
+/* 0x80*/
+ u64 qpx_qkey;
+ u64 qpx_dqp;
+ u64 qpx_dlidp;
+ u64 qpx_portp;
+/* 0xa0*/
+ u64 qpx_slidp;
+ u64 qpx_slidpp;
+ u64 qpx_dlida;
+ u64 qpx_porta;
+/* 0xc0*/
+ u64 qpx_slida;
+ u64 qpx_slidpa;
+ u64 qpx_slvl;
+ u64 qpx_ipd;
+/* 0xe0*/
+ u64 qpx_mtu;
+ u64 qpx_lato;
+ u64 qpx_rlimit;
+ u64 qpx_rnrlimit;
+/* 0x100*/
+ u64 qpx_t;
+ u64 qpx_sqhp;
+ u64 qpx_sqptp;
+ u64 qpx_nspsn;
+/* 0x120*/
+ u64 qpx_nspsnhwm;
+ u64 reserved1;
+ u64 qpx_sdsi;
+ u64 qpx_sdsbc;
+/* 0x140*/
+ u64 qpx_sqwsize;
+ u64 qpx_sqwts;
+ u64 qpx_lsn;
+ u64 qpx_nssn;
+/* 0x160 */
+ u64 qpx_mor;
+ u64 qpx_cor;
+ u64 qpx_sqsize;
+ u64 qpx_erc;
+/* 0x180*/
+ u64 qpx_rnrrc;
+ u64 qpx_ernrwt;
+ u64 qpx_rnrresp;
+ u64 qpx_lmsna;
+/* 0x1a0 */
+ u64 qpx_sqhpc;
+ u64 qpx_sqcptp;
+ u64 qpx_sigt;
+ u64 qpx_wqecnt;
+/* 0x1c0*/
+ u64 qpx_rqhp;
+ u64 qpx_rqptp;
+ u64 qpx_rqsize;
+ u64 qpx_nrr;
+/* 0x1e0*/
+ u64 qpx_rdmac;
+ u64 qpx_nrpsn;
+ u64 qpx_lapsn;
+ u64 qpx_lcr;
+/* 0x200*/
+ u64 qpx_rwc;
+ u64 qpx_rwva;
+ u64 qpx_rdsi;
+ u64 qpx_rdsbc;
+/* 0x220*/
+ u64 qpx_rqwsize;
+ u64 qpx_crmsn;
+ u64 qpx_rdd;
+ u64 qpx_larpsn;
+/* 0x240*/
+ u64 qpx_pd;
+ u64 qpx_scqn;
+ u64 qpx_rcqn;
+ u64 qpx_aeqn;
+/* 0x260*/
+ u64 qpx_aaelog;
+ u64 qpx_ram;
+ u64 qpx_rdmaqe0;
+ u64 qpx_rdmaqe1;
+/* 0x280*/
+ u64 qpx_rdmaqe2;
+ u64 qpx_rdmaqe3;
+ u64 qpx_nrpsnhwm;
+/* 0x298*/
+ u64 reserved[(0x400 - 0x298) / 8];
+/* 0x400 extended data */
+ u64 reserved_ext[(0x500 - 0x400) / 8];
+/* 0x500 */
+ u64 reserved2[(0x1000 - 0x500) / 8];
+/* 0x1000 */
+};
+
+#define QPX_SQADDER EHCA_BMASK_IBM(48, 63)
+#define QPX_RQADDER EHCA_BMASK_IBM(48, 63)
+#define QPX_AAELOG_RESET_SRQ_LIMIT EHCA_BMASK_IBM(3, 3)
+
+#define QPTEMM_OFFSET(x) offsetof(struct hipz_qptemm, x)
+
+/* MRMWPT Entry Memory Map */
+struct hipz_mrmwmm {
+ /* 0x00 */
+ u64 mrx_hcr;
+
+ u64 mrx_c;
+ u64 mrx_herr;
+ u64 mrx_aer;
+ /* 0x20 */
+ u64 mrx_pp;
+ u64 reserved1;
+ u64 reserved2;
+ u64 reserved3;
+ /* 0x40 */
+ u64 reserved4[(0x200 - 0x40) / 8];
+ /* 0x200 */
+ u64 mrx_ctl[64];
+
+};
+
+#define MRMWMM_OFFSET(x) offsetof(struct hipz_mrmwmm, x)
+
+struct hipz_qpedmm {
+ /* 0x00 */
+ u64 reserved0[(0x400) / 8];
+ /* 0x400 */
+ u64 qpedx_phh;
+ u64 qpedx_ppsgp;
+ /* 0x410 */
+ u64 qpedx_ppsgu;
+ u64 qpedx_ppdgp;
+ /* 0x420 */
+ u64 qpedx_ppdgu;
+ u64 qpedx_aph;
+ /* 0x430 */
+ u64 qpedx_apsgp;
+ u64 qpedx_apsgu;
+ /* 0x440 */
+ u64 qpedx_apdgp;
+ u64 qpedx_apdgu;
+ /* 0x450 */
+ u64 qpedx_apav;
+ u64 qpedx_apsav;
+ /* 0x460 */
+ u64 qpedx_hcr;
+ u64 reserved1[4];
+ /* 0x488 */
+ u64 qpedx_rrl0;
+ /* 0x490 */
+ u64 qpedx_rrrkey0;
+ u64 qpedx_rrva0;
+ /* 0x4a0 */
+ u64 reserved2;
+ u64 qpedx_rrl1;
+ /* 0x4b0 */
+ u64 qpedx_rrrkey1;
+ u64 qpedx_rrva1;
+ /* 0x4c0 */
+ u64 reserved3;
+ u64 qpedx_rrl2;
+ /* 0x4d0 */
+ u64 qpedx_rrrkey2;
+ u64 qpedx_rrva2;
+ /* 0x4e0 */
+ u64 reserved4;
+ u64 qpedx_rrl3;
+ /* 0x4f0 */
+ u64 qpedx_rrrkey3;
+ u64 qpedx_rrva3;
+};
+
+#define QPEDMM_OFFSET(x) offsetof(struct hipz_qpedmm, x)
+
+/* CQ Table Entry Memory Map */
+struct hipz_cqtemm {
+ u64 cqx_hcr;
+ u64 cqx_c;
+ u64 cqx_herr;
+ u64 cqx_aer;
+/* 0x20 */
+ u64 cqx_ptp;
+ u64 cqx_tp;
+ u64 cqx_fec;
+ u64 cqx_feca;
+/* 0x40 */
+ u64 cqx_ep;
+ u64 cqx_eq;
+/* 0x50 */
+ u64 reserved1;
+ u64 cqx_n0;
+/* 0x60 */
+ u64 cqx_n1;
+ u64 reserved2[(0x1000 - 0x60) / 8];
+/* 0x1000 */
+};
+
+#define CQX_FEC_CQE_CNT EHCA_BMASK_IBM(32, 63)
+#define CQX_FECADDER EHCA_BMASK_IBM(32, 63)
+#define CQX_N0_GENERATE_SOLICITED_COMP_EVENT EHCA_BMASK_IBM(0, 0)
+#define CQX_N1_GENERATE_COMP_EVENT EHCA_BMASK_IBM(0, 0)
+
+#define CQTEMM_OFFSET(x) offsetof(struct hipz_cqtemm, x)
+
+/* EQ Table Entry Memory Map */
+struct hipz_eqtemm {
+ u64 eqx_hcr;
+ u64 eqx_c;
+
+ u64 eqx_herr;
+ u64 eqx_aer;
+/* 0x20 */
+ u64 eqx_ptp;
+ u64 eqx_tp;
+ u64 eqx_ssba;
+ u64 eqx_psba;
+
+/* 0x40 */
+ u64 eqx_cec;
+ u64 eqx_meql;
+ u64 eqx_xisbi;
+ u64 eqx_xisc;
+/* 0x60 */
+ u64 eqx_it;
+
+};
+
+#define EQTEMM_OFFSET(x) offsetof(struct hipz_eqtemm, x)
+
+/* access control defines for MR/MW */
+#define HIPZ_ACCESSCTRL_L_WRITE 0x00800000
+#define HIPZ_ACCESSCTRL_R_WRITE 0x00400000
+#define HIPZ_ACCESSCTRL_R_READ 0x00200000
+#define HIPZ_ACCESSCTRL_R_ATOMIC 0x00100000
+#define HIPZ_ACCESSCTRL_MW_BIND 0x00080000
+
+/* query hca response block */
+struct hipz_query_hca {
+ u32 cur_reliable_dg;
+ u32 cur_qp;
+ u32 cur_cq;
+ u32 cur_eq;
+ u32 cur_mr;
+ u32 cur_mw;
+ u32 cur_ee_context;
+ u32 cur_mcast_grp;
+ u32 cur_qp_attached_mcast_grp;
+ u32 reserved1;
+ u32 cur_ipv6_qp;
+ u32 cur_eth_qp;
+ u32 cur_hp_mr;
+ u32 reserved2[3];
+ u32 max_rd_domain;
+ u32 max_qp;
+ u32 max_cq;
+ u32 max_eq;
+ u32 max_mr;
+ u32 max_hp_mr;
+ u32 max_mw;
+ u32 max_mrwpte;
+ u32 max_special_mrwpte;
+ u32 max_rd_ee_context;
+ u32 max_mcast_grp;
+ u32 max_total_mcast_qp_attach;
+ u32 max_mcast_qp_attach;
+ u32 max_raw_ipv6_qp;
+ u32 max_raw_ethy_qp;
+ u32 internal_clock_frequency;
+ u32 max_pd;
+ u32 max_ah;
+ u32 max_cqe;
+ u32 max_wqes_wq;
+ u32 max_partitions;
+ u32 max_rr_ee_context;
+ u32 max_rr_qp;
+ u32 max_rr_hca;
+ u32 max_act_wqs_ee_context;
+ u32 max_act_wqs_qp;
+ u32 max_sge;
+ u32 max_sge_rd;
+ u32 memory_page_size_supported;
+ u64 max_mr_size;
+ u32 local_ca_ack_delay;
+ u32 num_ports;
+ u32 vendor_id;
+ u32 vendor_part_id;
+ u32 hw_ver;
+ u64 node_guid;
+ u64 hca_cap_indicators;
+ u32 data_counter_register_size;
+ u32 max_shared_rq;
+ u32 max_isns_eq;
+ u32 max_neq;
+} __attribute__ ((packed));
+
+#define HCA_CAP_AH_PORT_NR_CHECK EHCA_BMASK_IBM( 0, 0)
+#define HCA_CAP_ATOMIC EHCA_BMASK_IBM( 1, 1)
+#define HCA_CAP_AUTO_PATH_MIG EHCA_BMASK_IBM( 2, 2)
+#define HCA_CAP_BAD_P_KEY_CTR EHCA_BMASK_IBM( 3, 3)
+#define HCA_CAP_SQD_RTS_PORT_CHANGE EHCA_BMASK_IBM( 4, 4)
+#define HCA_CAP_CUR_QP_STATE_MOD EHCA_BMASK_IBM( 5, 5)
+#define HCA_CAP_INIT_TYPE EHCA_BMASK_IBM( 6, 6)
+#define HCA_CAP_PORT_ACTIVE_EVENT EHCA_BMASK_IBM( 7, 7)
+#define HCA_CAP_Q_KEY_VIOL_CTR EHCA_BMASK_IBM( 8, 8)
+#define HCA_CAP_WQE_RESIZE EHCA_BMASK_IBM( 9, 9)
+#define HCA_CAP_RAW_PACKET_MCAST EHCA_BMASK_IBM(10, 10)
+#define HCA_CAP_SHUTDOWN_PORT EHCA_BMASK_IBM(11, 11)
+#define HCA_CAP_RC_LL_QP EHCA_BMASK_IBM(12, 12)
+#define HCA_CAP_SRQ EHCA_BMASK_IBM(13, 13)
+#define HCA_CAP_UD_LL_QP EHCA_BMASK_IBM(16, 16)
+#define HCA_CAP_RESIZE_MR EHCA_BMASK_IBM(17, 17)
+#define HCA_CAP_MINI_QP EHCA_BMASK_IBM(18, 18)
+#define HCA_CAP_H_ALLOC_RES_SYNC EHCA_BMASK_IBM(19, 19)
+
+/* query port response block */
+struct hipz_query_port {
+ u32 state;
+ u32 bad_pkey_cntr;
+ u32 lmc;
+ u32 lid;
+ u32 subnet_timeout;
+ u32 qkey_viol_cntr;
+ u32 sm_sl;
+ u32 sm_lid;
+ u32 capability_mask;
+ u32 init_type_reply;
+ u32 pkey_tbl_len;
+ u32 gid_tbl_len;
+ u64 gid_prefix;
+ u32 port_nr;
+ u16 pkey_entries[16];
+ u8 reserved1[32];
+ u32 trent_size;
+ u32 trbuf_size;
+ u64 max_msg_sz;
+ u32 max_mtu;
+ u32 vl_cap;
+ u32 phys_pstate;
+ u32 phys_state;
+ u32 phys_speed;
+ u32 phys_width;
+ u8 reserved2[1884];
+ u64 guid_entries[255];
+} __attribute__ ((packed));
+
+#endif
diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.c b/drivers/infiniband/hw/ehca/ipz_pt_fn.c
new file mode 100644
index 0000000..c3a3284
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ipz_pt_fn.c
@@ -0,0 +1,284 @@
+/*
+ * IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ * internal queue handling
+ *
+ * Authors: Waleri Fomin <fomin@de.ibm.com>
+ * Reinhard Ernst <rernst@de.ibm.com>
+ * Christoph Raisch <raisch@de.ibm.com>
+ *
+ * Copyright (c) 2005 IBM Corporation
+ *
+ * This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ * BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "ehca_tools.h"
+#include "ipz_pt_fn.h"
+#include "ehca_classes.h"
+
+#define PAGES_PER_KPAGE (PAGE_SIZE >> EHCA_PAGESHIFT)
+
+struct kmem_cache *small_qp_cache;
+
+void *ipz_qpageit_get_inc(struct ipz_queue *queue)
+{
+ void *ret = ipz_qeit_get(queue);
+ queue->current_q_offset += queue->pagesize;
+ if (queue->current_q_offset > queue->queue_length) {
+ queue->current_q_offset -= queue->pagesize;
+ ret = NULL;
+ }
+ if (((u64)ret) % queue->pagesize) {
+ ehca_gen_err("ERROR!! not at PAGE-Boundary");
+ return NULL;
+ }
+ return ret;
+}
+
+void *ipz_qeit_eq_get_inc(struct ipz_queue *queue)
+{
+ void *ret = ipz_qeit_get(queue);
+ u64 last_entry_in_q = queue->queue_length - queue->qe_size;
+
+ queue->current_q_offset += queue->qe_size;
+ if (queue->current_q_offset > last_entry_in_q) {
+ queue->current_q_offset = 0;
+ queue->toggle_state = (~queue->toggle_state) & 1;
+ }
+
+ return ret;
+}
+
+int ipz_queue_abs_to_offset(struct ipz_queue *queue, u64 addr, u64 *q_offset)
+{
+ int i;
+ for (i = 0; i < queue->queue_length / queue->pagesize; i++) {
+ u64 page = (u64)virt_to_abs(queue->queue_pages[i]);
+ if (addr >= page && addr < page + queue->pagesize) {
+ *q_offset = addr - page + i * queue->pagesize;
+ return 0;
+ }
+ }
+ return -EINVAL;
+}
+
+#if PAGE_SHIFT < EHCA_PAGESHIFT
+#error Kernel pages must be at least as large than eHCA pages (4K) !
+#endif
+
+/*
+ * allocate pages for queue:
+ * outer loop allocates whole kernel pages (page aligned) and
+ * inner loop divides a kernel page into smaller hca queue pages
+ */
+static int alloc_queue_pages(struct ipz_queue *queue, const u32 nr_of_pages)
+{
+ int k, f = 0;
+ u8 *kpage;
+
+ while (f < nr_of_pages) {
+ kpage = (u8 *)get_zeroed_page(GFP_KERNEL);
+ if (!kpage)
+ goto out;
+
+ for (k = 0; k < PAGES_PER_KPAGE && f < nr_of_pages; k++) {
+ queue->queue_pages[f] = (struct ipz_page *)kpage;
+ kpage += EHCA_PAGESIZE;
+ f++;
+ }
+ }
+ return 1;
+
+out:
+ for (f = 0; f < nr_of_pages && queue->queue_pages[f];
+ f += PAGES_PER_KPAGE)
+ free_page((unsigned long)(queue->queue_pages)[f]);
+ return 0;
+}
+
+static int alloc_small_queue_page(struct ipz_queue *queue, struct ehca_pd *pd)
+{
+ int order = ilog2(queue->pagesize) - 9;
+ struct ipz_small_queue_page *page;
+ unsigned long bit;
+
+ mutex_lock(&pd->lock);
+
+ if (!list_empty(&pd->free[order]))
+ page = list_entry(pd->free[order].next,
+ struct ipz_small_queue_page, list);
+ else {
+ page = kmem_cache_zalloc(small_qp_cache, GFP_KERNEL);
+ if (!page)
+ goto out;
+
+ page->page = get_zeroed_page(GFP_KERNEL);
+ if (!page->page) {
+ kmem_cache_free(small_qp_cache, page);
+ goto out;
+ }
+
+ list_add(&page->list, &pd->free[order]);
+ }
+
+ bit = find_first_zero_bit(page->bitmap, IPZ_SPAGE_PER_KPAGE >> order);
+ __set_bit(bit, page->bitmap);
+ page->fill++;
+
+ if (page->fill == IPZ_SPAGE_PER_KPAGE >> order)
+ list_move(&page->list, &pd->full[order]);
+
+ mutex_unlock(&pd->lock);
+
+ queue->queue_pages[0] = (void *)(page->page | (bit << (order + 9)));
+ queue->small_page = page;
+ queue->offset = bit << (order + 9);
+ return 1;
+
+out:
+ ehca_err(pd->ib_pd.device, "failed to allocate small queue page");
+ mutex_unlock(&pd->lock);
+ return 0;
+}
+
+static void free_small_queue_page(struct ipz_queue *queue, struct ehca_pd *pd)
+{
+ int order = ilog2(queue->pagesize) - 9;
+ struct ipz_small_queue_page *page = queue->small_page;
+ unsigned long bit;
+ int free_page = 0;
+
+ bit = ((unsigned long)queue->queue_pages[0] & ~PAGE_MASK)
+ >> (order + 9);
+
+ mutex_lock(&pd->lock);
+
+ __clear_bit(bit, page->bitmap);
+ page->fill--;
+
+ if (page->fill == 0) {
+ list_del(&page->list);
+ free_page = 1;
+ }
+
+ if (page->fill == (IPZ_SPAGE_PER_KPAGE >> order) - 1)
+ /* the page was full until we freed the chunk */
+ list_move_tail(&page->list, &pd->free[order]);
+
+ mutex_unlock(&pd->lock);
+
+ if (free_page) {
+ free_page(page->page);
+ kmem_cache_free(small_qp_cache, page);
+ }
+}
+
+int ipz_queue_ctor(struct ehca_pd *pd, struct ipz_queue *queue,
+ const u32 nr_of_pages, const u32 pagesize,
+ const u32 qe_size, const u32 nr_of_sg,
+ int is_small)
+{
+ if (pagesize > PAGE_SIZE) {
+ ehca_gen_err("FATAL ERROR: pagesize=%x "
+ "is greater than kernel page size", pagesize);
+ return 0;
+ }
+
+ /* init queue fields */
+ queue->queue_length = nr_of_pages * pagesize;
+ queue->pagesize = pagesize;
+ queue->qe_size = qe_size;
+ queue->act_nr_of_sg = nr_of_sg;
+ queue->current_q_offset = 0;
+ queue->toggle_state = 1;
+ queue->small_page = NULL;
+
+ /* allocate queue page pointers */
+ queue->queue_pages = vmalloc(nr_of_pages * sizeof(void *));
+ if (!queue->queue_pages) {
+ ehca_gen_err("Couldn't allocate queue page list");
+ return 0;
+ }
+ memset(queue->queue_pages, 0, nr_of_pages * sizeof(void *));
+
+ /* allocate actual queue pages */
+ if (is_small) {
+ if (!alloc_small_queue_page(queue, pd))
+ goto ipz_queue_ctor_exit0;
+ } else
+ if (!alloc_queue_pages(queue, nr_of_pages))
+ goto ipz_queue_ctor_exit0;
+
+ return 1;
+
+ipz_queue_ctor_exit0:
+ ehca_gen_err("Couldn't alloc pages queue=%p "
+ "nr_of_pages=%x", queue, nr_of_pages);
+ vfree(queue->queue_pages);
+
+ return 0;
+}
+
+int ipz_queue_dtor(struct ehca_pd *pd, struct ipz_queue *queue)
+{
+ int i, nr_pages;
+
+ if (!queue || !queue->queue_pages) {
+ ehca_gen_dbg("queue or queue_pages is NULL");
+ return 0;
+ }
+
+ if (queue->small_page)
+ free_small_queue_page(queue, pd);
+ else {
+ nr_pages = queue->queue_length / queue->pagesize;
+ for (i = 0; i < nr_pages; i += PAGES_PER_KPAGE)
+ free_page((unsigned long)queue->queue_pages[i]);
+ }
+
+ vfree(queue->queue_pages);
+
+ return 1;
+}
+
+int ehca_init_small_qp_cache(void)
+{
+ small_qp_cache = kmem_cache_create("ehca_cache_small_qp",
+ sizeof(struct ipz_small_queue_page),
+ 0, SLAB_HWCACHE_ALIGN, NULL);
+ if (!small_qp_cache)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void ehca_cleanup_small_qp_cache(void)
+{
+ kmem_cache_destroy(small_qp_cache);
+}
diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.h b/drivers/infiniband/hw/ehca/ipz_pt_fn.h
new file mode 100644
index 0000000..a801274
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ipz_pt_fn.h
@@ -0,0 +1,289 @@
+/*
+ * IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ * internal queue handling
+ *
+ * Authors: Waleri Fomin <fomin@de.ibm.com>
+ * Reinhard Ernst <rernst@de.ibm.com>
+ * Christoph Raisch <raisch@de.ibm.com>
+ *
+ * Copyright (c) 2005 IBM Corporation
+ *
+ * All rights reserved.
+ *
+ * This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ * BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __IPZ_PT_FN_H__
+#define __IPZ_PT_FN_H__
+
+#define EHCA_PAGESHIFT 12
+#define EHCA_PAGESIZE 4096UL
+#define EHCA_PAGEMASK (~(EHCA_PAGESIZE-1))
+#define EHCA_PT_ENTRIES 512UL
+
+#include "ehca_tools.h"
+#include "ehca_qes.h"
+
+struct ehca_pd;
+struct ipz_small_queue_page;
+
+extern struct kmem_cache *small_qp_cache;
+
+/* struct generic ehca page */
+struct ipz_page {
+ u8 entries[EHCA_PAGESIZE];
+};
+
+#define IPZ_SPAGE_PER_KPAGE (PAGE_SIZE / 512)
+
+struct ipz_small_queue_page {
+ unsigned long page;
+ unsigned long bitmap[IPZ_SPAGE_PER_KPAGE / BITS_PER_LONG];
+ int fill;
+ void *mapped_addr;
+ u32 mmap_count;
+ struct list_head list;
+};
+
+/* struct generic queue in linux kernel virtual memory (kv) */
+struct ipz_queue {
+ u64 current_q_offset; /* current queue entry */
+
+ struct ipz_page **queue_pages; /* array of pages belonging to queue */
+ u32 qe_size; /* queue entry size */
+ u32 act_nr_of_sg;
+ u32 queue_length; /* queue length allocated in bytes */
+ u32 pagesize;
+ u32 toggle_state; /* toggle flag - per page */
+ u32 offset; /* save offset within page for small_qp */
+ struct ipz_small_queue_page *small_page;
+};
+
+/*
+ * return current Queue Entry for a certain q_offset
+ * returns address (kv) of Queue Entry
+ */
+static inline void *ipz_qeit_calc(struct ipz_queue *queue, u64 q_offset)
+{
+ struct ipz_page *current_page;
+ if (q_offset >= queue->queue_length)
+ return NULL;
+ current_page = (queue->queue_pages)[q_offset >> EHCA_PAGESHIFT];
+ return &current_page->entries[q_offset & (EHCA_PAGESIZE - 1)];
+}
+
+/*
+ * return current Queue Entry
+ * returns address (kv) of Queue Entry
+ */
+static inline void *ipz_qeit_get(struct ipz_queue *queue)
+{
+ return ipz_qeit_calc(queue, queue->current_q_offset);
+}
+
+/*
+ * return current Queue Page , increment Queue Page iterator from
+ * page to page in struct ipz_queue, last increment will return 0! and
+ * NOT wrap
+ * returns address (kv) of Queue Page
+ * warning don't use in parallel with ipz_QE_get_inc()
+ */
+void *ipz_qpageit_get_inc(struct ipz_queue *queue);
+
+/*
+ * return current Queue Entry, increment Queue Entry iterator by one
+ * step in struct ipz_queue, will wrap in ringbuffer
+ * returns address (kv) of Queue Entry BEFORE increment
+ * warning don't use in parallel with ipz_qpageit_get_inc()
+ */
+static inline void *ipz_qeit_get_inc(struct ipz_queue *queue)
+{
+ void *ret = ipz_qeit_get(queue);
+ queue->current_q_offset += queue->qe_size;
+ if (queue->current_q_offset >= queue->queue_length) {
+ queue->current_q_offset = 0;
+ /* toggle the valid flag */
+ queue->toggle_state = (~queue->toggle_state) & 1;
+ }
+
+ return ret;
+}
+
+/*
+ * return a bool indicating whether current Queue Entry is valid
+ */
+static inline int ipz_qeit_is_valid(struct ipz_queue *queue)
+{
+ struct ehca_cqe *cqe = ipz_qeit_get(queue);
+ return ((cqe->cqe_flags >> 7) == (queue->toggle_state & 1));
+}
+
+/*
+ * return current Queue Entry, increment Queue Entry iterator by one
+ * step in struct ipz_queue, will wrap in ringbuffer
+ * returns address (kv) of Queue Entry BEFORE increment
+ * returns 0 and does not increment, if wrong valid state
+ * warning don't use in parallel with ipz_qpageit_get_inc()
+ */
+static inline void *ipz_qeit_get_inc_valid(struct ipz_queue *queue)
+{
+ return ipz_qeit_is_valid(queue) ? ipz_qeit_get_inc(queue) : NULL;
+}
+
+/*
+ * returns and resets Queue Entry iterator
+ * returns address (kv) of first Queue Entry
+ */
+static inline void *ipz_qeit_reset(struct ipz_queue *queue)
+{
+ queue->current_q_offset = 0;
+ return ipz_qeit_get(queue);
+}
+
+/*
+ * return the q_offset corresponding to an absolute address
+ */
+int ipz_queue_abs_to_offset(struct ipz_queue *queue, u64 addr, u64 *q_offset);
+
+/*
+ * return the next queue offset. don't modify the queue.
+ */
+static inline u64 ipz_queue_advance_offset(struct ipz_queue *queue, u64 offset)
+{
+ offset += queue->qe_size;
+ if (offset >= queue->queue_length) offset = 0;
+ return offset;
+}
+
+/* struct generic page table */
+struct ipz_pt {
+ u64 entries[EHCA_PT_ENTRIES];
+};
+
+/* struct page table for a queue, only to be used in pf */
+struct ipz_qpt {
+ /* queue page tables (kv), use u64 because we know the element length */
+ u64 *qpts;
+ u32 n_qpts;
+ u32 n_ptes; /* number of page table entries */
+ u64 *current_pte_addr;
+};
+
+/*
+ * constructor for a ipz_queue_t, placement new for ipz_queue_t,
+ * new for all dependent datastructors
+ * all QP Tables are the same
+ * flow:
+ * allocate+pin queue
+ * see ipz_qpt_ctor()
+ * returns true if ok, false if out of memory
+ */
+int ipz_queue_ctor(struct ehca_pd *pd, struct ipz_queue *queue,
+ const u32 nr_of_pages, const u32 pagesize,
+ const u32 qe_size, const u32 nr_of_sg,
+ int is_small);
+
+/*
+ * destructor for a ipz_queue_t
+ * -# free queue
+ * see ipz_queue_ctor()
+ * returns true if ok, false if queue was NULL-ptr of free failed
+ */
+int ipz_queue_dtor(struct ehca_pd *pd, struct ipz_queue *queue);
+
+/*
+ * constructor for a ipz_qpt_t,
+ * placement new for struct ipz_queue, new for all dependent datastructors
+ * all QP Tables are the same,
+ * flow:
+ * -# allocate+pin queue
+ * -# initialise ptcb
+ * -# allocate+pin PTs
+ * -# link PTs to a ring, according to HCA Arch, set bit62 id needed
+ * -# the ring must have room for exactly nr_of_PTEs
+ * see ipz_qpt_ctor()
+ */
+void ipz_qpt_ctor(struct ipz_qpt *qpt,
+ const u32 nr_of_qes,
+ const u32 pagesize,
+ const u32 qe_size,
+ const u8 lowbyte, const u8 toggle,
+ u32 * act_nr_of_QEs, u32 * act_nr_of_pages);
+
+/*
+ * return current Queue Entry, increment Queue Entry iterator by one
+ * step in struct ipz_queue, will wrap in ringbuffer
+ * returns address (kv) of Queue Entry BEFORE increment
+ * warning don't use in parallel with ipz_qpageit_get_inc()
+ * warning unpredictable results may occur if steps>act_nr_of_queue_entries
+ * fix EQ page problems
+ */
+void *ipz_qeit_eq_get_inc(struct ipz_queue *queue);
+
+/*
+ * return current Event Queue Entry, increment Queue Entry iterator
+ * by one step in struct ipz_queue if valid, will wrap in ringbuffer
+ * returns address (kv) of Queue Entry BEFORE increment
+ * returns 0 and does not increment, if wrong valid state
+ * warning don't use in parallel with ipz_queue_QPageit_get_inc()
+ * warning unpredictable results may occur if steps>act_nr_of_queue_entries
+ */
+static inline void *ipz_eqit_eq_get_inc_valid(struct ipz_queue *queue)
+{
+ void *ret = ipz_qeit_get(queue);
+ u32 qe = *(u8 *)ret;
+ if ((qe >> 7) != (queue->toggle_state & 1))
+ return NULL;
+ ipz_qeit_eq_get_inc(queue); /* this is a good one */
+ return ret;
+}
+
+static inline void *ipz_eqit_eq_peek_valid(struct ipz_queue *queue)
+{
+ void *ret = ipz_qeit_get(queue);
+ u32 qe = *(u8 *)ret;
+ if ((qe >> 7) != (queue->toggle_state & 1))
+ return NULL;
+ return ret;
+}
+
+/* returns address (GX) of first queue entry */
+static inline u64 ipz_qpt_get_firstpage(struct ipz_qpt *qpt)
+{
+ return be64_to_cpu(qpt->qpts[0]);
+}
+
+/* returns address (kv) of first page of queue page table */
+static inline void *ipz_qpt_get_qpt(struct ipz_qpt *qpt)
+{
+ return qpt->qpts;
+}
+
+#endif /* __IPZ_PT_FN_H__ */
diff --git a/drivers/infiniband/hw/ipath/Kconfig b/drivers/infiniband/hw/ipath/Kconfig
new file mode 100644
index 0000000..3c7968f
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/Kconfig
@@ -0,0 +1,9 @@
+config INFINIBAND_IPATH
+ tristate "QLogic InfiniPath Driver"
+ depends on 64BIT && NET
+ ---help---
+ This is a driver for QLogic InfiniPath host channel adapters,
+ including InfiniBand verbs support. This driver allows these
+ devices to be used with both kernel upper level protocols such
+ as IP-over-InfiniBand as well as with userspace applications
+ (in conjunction with InfiniBand userspace access).
diff --git a/drivers/infiniband/hw/ipath/Makefile b/drivers/infiniband/hw/ipath/Makefile
new file mode 100644
index 0000000..bf94500
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/Makefile
@@ -0,0 +1,41 @@
+EXTRA_CFLAGS += -DIPATH_IDSTR='"QLogic kernel.org driver"' \
+ -DIPATH_KERN_TYPE=0
+
+obj-$(CONFIG_INFINIBAND_IPATH) += ib_ipath.o
+
+ib_ipath-y := \
+ ipath_cq.o \
+ ipath_diag.o \
+ ipath_dma.o \
+ ipath_driver.o \
+ ipath_eeprom.o \
+ ipath_file_ops.o \
+ ipath_fs.o \
+ ipath_init_chip.o \
+ ipath_intr.o \
+ ipath_keys.o \
+ ipath_mad.o \
+ ipath_mmap.o \
+ ipath_mr.o \
+ ipath_qp.o \
+ ipath_rc.o \
+ ipath_ruc.o \
+ ipath_sdma.o \
+ ipath_srq.o \
+ ipath_stats.o \
+ ipath_sysfs.o \
+ ipath_uc.o \
+ ipath_ud.o \
+ ipath_user_pages.o \
+ ipath_user_sdma.o \
+ ipath_verbs_mcast.o \
+ ipath_verbs.o \
+ ipath_iba7220.o \
+ ipath_sd7220.o \
+ ipath_sd7220_img.o
+
+ib_ipath-$(CONFIG_HT_IRQ) += ipath_iba6110.o
+ib_ipath-$(CONFIG_PCI_MSI) += ipath_iba6120.o
+
+ib_ipath-$(CONFIG_X86_64) += ipath_wc_x86_64.o
+ib_ipath-$(CONFIG_PPC64) += ipath_wc_ppc64.o
diff --git a/drivers/infiniband/hw/ipath/ipath_7220.h b/drivers/infiniband/hw/ipath/ipath_7220.h
new file mode 100644
index 0000000..74fa5cc
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_7220.h
@@ -0,0 +1,57 @@
+#ifndef _IPATH_7220_H
+#define _IPATH_7220_H
+/*
+ * Copyright (c) 2007 QLogic Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/*
+ * This header file provides the declarations and common definitions
+ * for (mostly) manipulation of the SerDes blocks within the IBA7220.
+ * the functions declared should only be called from within other
+ * 7220-related files such as ipath_iba7220.c or ipath_sd7220.c.
+ */
+int ipath_sd7220_presets(struct ipath_devdata *dd);
+int ipath_sd7220_init(struct ipath_devdata *dd, int was_reset);
+int ipath_sd7220_prog_ld(struct ipath_devdata *dd, int sdnum, u8 *img,
+ int len, int offset);
+int ipath_sd7220_prog_vfy(struct ipath_devdata *dd, int sdnum, const u8 *img,
+ int len, int offset);
+/*
+ * Below used for sdnum parameter, selecting one of the two sections
+ * used for PCIe, or the single SerDes used for IB, which is the
+ * only one currently used
+ */
+#define IB_7220_SERDES 2
+
+int ipath_sd7220_ib_load(struct ipath_devdata *dd);
+int ipath_sd7220_ib_vfy(struct ipath_devdata *dd);
+
+#endif /* _IPATH_7220_H */
diff --git a/drivers/infiniband/hw/ipath/ipath_common.h b/drivers/infiniband/hw/ipath/ipath_common.h
new file mode 100644
index 0000000..28cfe97
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_common.h
@@ -0,0 +1,851 @@
+/*
+ * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _IPATH_COMMON_H
+#define _IPATH_COMMON_H
+
+/*
+ * This file contains defines, structures, etc. that are used
+ * to communicate between kernel and user code.
+ */
+
+
+/* This is the IEEE-assigned OUI for QLogic Inc. InfiniPath */
+#define IPATH_SRC_OUI_1 0x00
+#define IPATH_SRC_OUI_2 0x11
+#define IPATH_SRC_OUI_3 0x75
+
+/* version of protocol header (known to chip also). In the long run,
+ * we should be able to generate and accept a range of version numbers;
+ * for now we only accept one, and it's compiled in.
+ */
+#define IPS_PROTO_VERSION 2
+
+/*
+ * These are compile time constants that you may want to enable or disable
+ * if you are trying to debug problems with code or performance.
+ * IPATH_VERBOSE_TRACING define as 1 if you want additional tracing in
+ * fastpath code
+ * IPATH_TRACE_REGWRITES define as 1 if you want register writes to be
+ * traced in faspath code
+ * _IPATH_TRACING define as 0 if you want to remove all tracing in a
+ * compilation unit
+ * _IPATH_DEBUGGING define as 0 if you want to remove debug prints
+ */
+
+/*
+ * The value in the BTH QP field that InfiniPath uses to differentiate
+ * an infinipath protocol IB packet vs standard IB transport
+ */
+#define IPATH_KD_QP 0x656b79
+
+/*
+ * valid states passed to ipath_set_linkstate() user call
+ */
+#define IPATH_IB_LINKDOWN 0
+#define IPATH_IB_LINKARM 1
+#define IPATH_IB_LINKACTIVE 2
+#define IPATH_IB_LINKDOWN_ONLY 3
+#define IPATH_IB_LINKDOWN_SLEEP 4
+#define IPATH_IB_LINKDOWN_DISABLE 5
+#define IPATH_IB_LINK_LOOPBACK 6 /* enable local loopback */
+#define IPATH_IB_LINK_EXTERNAL 7 /* normal, disable local loopback */
+#define IPATH_IB_LINK_NO_HRTBT 8 /* disable Heartbeat, e.g. for loopback */
+#define IPATH_IB_LINK_HRTBT 9 /* enable heartbeat, normal, non-loopback */
+
+/*
+ * These 3 values (SDR and DDR may be ORed for auto-speed
+ * negotiation) are used for the 3rd argument to path_f_set_ib_cfg
+ * with cmd IPATH_IB_CFG_SPD_ENB, by direct calls or via sysfs. They
+ * are also the the possible values for ipath_link_speed_enabled and active
+ * The values were chosen to match values used within the IB spec.
+ */
+#define IPATH_IB_SDR 1
+#define IPATH_IB_DDR 2
+
+/*
+ * stats maintained by the driver. For now, at least, this is global
+ * to all minor devices.
+ */
+struct infinipath_stats {
+ /* number of interrupts taken */
+ __u64 sps_ints;
+ /* number of interrupts for errors */
+ __u64 sps_errints;
+ /* number of errors from chip (not incl. packet errors or CRC) */
+ __u64 sps_errs;
+ /* number of packet errors from chip other than CRC */
+ __u64 sps_pkterrs;
+ /* number of packets with CRC errors (ICRC and VCRC) */
+ __u64 sps_crcerrs;
+ /* number of hardware errors reported (parity, etc.) */
+ __u64 sps_hwerrs;
+ /* number of times IB link changed state unexpectedly */
+ __u64 sps_iblink;
+ __u64 sps_unused; /* was fastrcvint, no longer implemented */
+ /* number of kernel (port0) packets received */
+ __u64 sps_port0pkts;
+ /* number of "ethernet" packets sent by driver */
+ __u64 sps_ether_spkts;
+ /* number of "ethernet" packets received by driver */
+ __u64 sps_ether_rpkts;
+ /* number of SMA packets sent by driver. Obsolete. */
+ __u64 sps_sma_spkts;
+ /* number of SMA packets received by driver. Obsolete. */
+ __u64 sps_sma_rpkts;
+ /* number of times all ports rcvhdrq was full and packet dropped */
+ __u64 sps_hdrqfull;
+ /* number of times all ports egrtid was full and packet dropped */
+ __u64 sps_etidfull;
+ /*
+ * number of times we tried to send from driver, but no pio buffers
+ * avail
+ */
+ __u64 sps_nopiobufs;
+ /* number of ports currently open */
+ __u64 sps_ports;
+ /* list of pkeys (other than default) accepted (0 means not set) */
+ __u16 sps_pkeys[4];
+ __u16 sps_unused16[4]; /* available; maintaining compatible layout */
+ /* number of user ports per chip (not IB ports) */
+ __u32 sps_nports;
+ /* not our interrupt, or already handled */
+ __u32 sps_nullintr;
+ /* max number of packets handled per receive call */
+ __u32 sps_maxpkts_call;
+ /* avg number of packets handled per receive call */
+ __u32 sps_avgpkts_call;
+ /* total number of pages locked */
+ __u64 sps_pagelocks;
+ /* total number of pages unlocked */
+ __u64 sps_pageunlocks;
+ /*
+ * Number of packets dropped in kernel other than errors (ether
+ * packets if ipath not configured, etc.)
+ */
+ __u64 sps_krdrops;
+ __u64 sps_txeparity; /* PIO buffer parity error, recovered */
+ /* pad for future growth */
+ __u64 __sps_pad[45];
+};
+
+/*
+ * These are the status bits readable (in ascii form, 64bit value)
+ * from the "status" sysfs file.
+ */
+#define IPATH_STATUS_INITTED 0x1 /* basic initialization done */
+#define IPATH_STATUS_DISABLED 0x2 /* hardware disabled */
+/* Device has been disabled via admin request */
+#define IPATH_STATUS_ADMIN_DISABLED 0x4
+/* Chip has been found and initted */
+#define IPATH_STATUS_CHIP_PRESENT 0x20
+/* IB link is at ACTIVE, usable for data traffic */
+#define IPATH_STATUS_IB_READY 0x40
+/* link is configured, LID, MTU, etc. have been set */
+#define IPATH_STATUS_IB_CONF 0x80
+/* no link established, probably no cable */
+#define IPATH_STATUS_IB_NOCABLE 0x100
+/* A Fatal hardware error has occurred. */
+#define IPATH_STATUS_HWERROR 0x200
+
+/*
+ * The list of usermode accessible registers. Also see Reg_* later in file.
+ */
+typedef enum _ipath_ureg {
+ /* (RO) DMA RcvHdr to be used next. */
+ ur_rcvhdrtail = 0,
+ /* (RW) RcvHdr entry to be processed next by host. */
+ ur_rcvhdrhead = 1,
+ /* (RO) Index of next Eager index to use. */
+ ur_rcvegrindextail = 2,
+ /* (RW) Eager TID to be processed next */
+ ur_rcvegrindexhead = 3,
+ /* For internal use only; max register number. */
+ _IPATH_UregMax
+} ipath_ureg;
+
+/* bit values for spi_runtime_flags */
+#define IPATH_RUNTIME_HT 0x1
+#define IPATH_RUNTIME_PCIE 0x2
+#define IPATH_RUNTIME_FORCE_WC_ORDER 0x4
+#define IPATH_RUNTIME_RCVHDR_COPY 0x8
+#define IPATH_RUNTIME_MASTER 0x10
+#define IPATH_RUNTIME_NODMA_RTAIL 0x80
+#define IPATH_RUNTIME_SDMA 0x200
+#define IPATH_RUNTIME_FORCE_PIOAVAIL 0x400
+#define IPATH_RUNTIME_PIO_REGSWAPPED 0x800
+
+/*
+ * This structure is returned by ipath_userinit() immediately after
+ * open to get implementation-specific info, and info specific to this
+ * instance.
+ *
+ * This struct must have explict pad fields where type sizes
+ * may result in different alignments between 32 and 64 bit
+ * programs, since the 64 bit * bit kernel requires the user code
+ * to have matching offsets
+ */
+struct ipath_base_info {
+ /* version of hardware, for feature checking. */
+ __u32 spi_hw_version;
+ /* version of software, for feature checking. */
+ __u32 spi_sw_version;
+ /* InfiniPath port assigned, goes into sent packets */
+ __u16 spi_port;
+ __u16 spi_subport;
+ /*
+ * IB MTU, packets IB data must be less than this.
+ * The MTU is in bytes, and will be a multiple of 4 bytes.
+ */
+ __u32 spi_mtu;
+ /*
+ * Size of a PIO buffer. Any given packet's total size must be less
+ * than this (in words). Included is the starting control word, so
+ * if 513 is returned, then total pkt size is 512 words or less.
+ */
+ __u32 spi_piosize;
+ /* size of the TID cache in infinipath, in entries */
+ __u32 spi_tidcnt;
+ /* size of the TID Eager list in infinipath, in entries */
+ __u32 spi_tidegrcnt;
+ /* size of a single receive header queue entry in words. */
+ __u32 spi_rcvhdrent_size;
+ /*
+ * Count of receive header queue entries allocated.
+ * This may be less than the spu_rcvhdrcnt passed in!.
+ */
+ __u32 spi_rcvhdr_cnt;
+
+ /* per-chip and other runtime features bitmap (IPATH_RUNTIME_*) */
+ __u32 spi_runtime_flags;
+
+ /* address where receive buffer queue is mapped into */
+ __u64 spi_rcvhdr_base;
+
+ /* user program. */
+
+ /* base address of eager TID receive buffers. */
+ __u64 spi_rcv_egrbufs;
+
+ /* Allocated by initialization code, not by protocol. */
+
+ /*
+ * Size of each TID buffer in host memory, starting at
+ * spi_rcv_egrbufs. The buffers are virtually contiguous.
+ */
+ __u32 spi_rcv_egrbufsize;
+ /*
+ * The special QP (queue pair) value that identifies an infinipath
+ * protocol packet from standard IB packets. More, probably much
+ * more, to be added.
+ */
+ __u32 spi_qpair;
+
+ /*
+ * User register base for init code, not to be used directly by
+ * protocol or applications.
+ */
+ __u64 __spi_uregbase;
+ /*
+ * Maximum buffer size in bytes that can be used in a single TID
+ * entry (assuming the buffer is aligned to this boundary). This is
+ * the minimum of what the hardware and software support Guaranteed
+ * to be a power of 2.
+ */
+ __u32 spi_tid_maxsize;
+ /*
+ * alignment of each pio send buffer (byte count
+ * to add to spi_piobufbase to get to second buffer)
+ */
+ __u32 spi_pioalign;
+ /*
+ * The index of the first pio buffer available to this process;
+ * needed to do lookup in spi_pioavailaddr; not added to
+ * spi_piobufbase.
+ */
+ __u32 spi_pioindex;
+ /* number of buffers mapped for this process */
+ __u32 spi_piocnt;
+
+ /*
+ * Base address of writeonly pio buffers for this process.
+ * Each buffer has spi_piosize words, and is aligned on spi_pioalign
+ * boundaries. spi_piocnt buffers are mapped from this address
+ */
+ __u64 spi_piobufbase;
+
+ /*
+ * Base address of readonly memory copy of the pioavail registers.
+ * There are 2 bits for each buffer.
+ */
+ __u64 spi_pioavailaddr;
+
+ /*
+ * Address where driver updates a copy of the interface and driver
+ * status (IPATH_STATUS_*) as a 64 bit value. It's followed by a
+ * string indicating hardware error, if there was one.
+ */
+ __u64 spi_status;
+
+ /* number of chip ports available to user processes */
+ __u32 spi_nports;
+ /* unit number of chip we are using */
+ __u32 spi_unit;
+ /* num bufs in each contiguous set */
+ __u32 spi_rcv_egrperchunk;
+ /* size in bytes of each contiguous set */
+ __u32 spi_rcv_egrchunksize;
+ /* total size of mmap to cover full rcvegrbuffers */
+ __u32 spi_rcv_egrbuftotlen;
+ __u32 spi_filler_for_align;
+ /* address of readonly memory copy of the rcvhdrq tail register. */
+ __u64 spi_rcvhdr_tailaddr;
+
+ /* shared memory pages for subports if port is shared */
+ __u64 spi_subport_uregbase;
+ __u64 spi_subport_rcvegrbuf;
+ __u64 spi_subport_rcvhdr_base;
+
+ /* shared memory page for hardware port if it is shared */
+ __u64 spi_port_uregbase;
+ __u64 spi_port_rcvegrbuf;
+ __u64 spi_port_rcvhdr_base;
+ __u64 spi_port_rcvhdr_tailaddr;
+
+} __attribute__ ((aligned(8)));
+
+
+/*
+ * This version number is given to the driver by the user code during
+ * initialization in the spu_userversion field of ipath_user_info, so
+ * the driver can check for compatibility with user code.
+ *
+ * The major version changes when data structures
+ * change in an incompatible way. The driver must be the same or higher
+ * for initialization to succeed. In some cases, a higher version
+ * driver will not interoperate with older software, and initialization
+ * will return an error.
+ */
+#define IPATH_USER_SWMAJOR 1
+
+/*
+ * Minor version differences are always compatible
+ * a within a major version, however if user software is larger
+ * than driver software, some new features and/or structure fields
+ * may not be implemented; the user code must deal with this if it
+ * cares, or it must abort after initialization reports the difference.
+ */
+#define IPATH_USER_SWMINOR 6
+
+#define IPATH_USER_SWVERSION ((IPATH_USER_SWMAJOR<<16) | IPATH_USER_SWMINOR)
+
+#define IPATH_KERN_TYPE 0
+
+/*
+ * Similarly, this is the kernel version going back to the user. It's
+ * slightly different, in that we want to tell if the driver was built as
+ * part of a QLogic release, or from the driver from openfabrics.org,
+ * kernel.org, or a standard distribution, for support reasons.
+ * The high bit is 0 for non-QLogic and 1 for QLogic-built/supplied.
+ *
+ * It's returned by the driver to the user code during initialization in the
+ * spi_sw_version field of ipath_base_info, so the user code can in turn
+ * check for compatibility with the kernel.
+*/
+#define IPATH_KERN_SWVERSION ((IPATH_KERN_TYPE<<31) | IPATH_USER_SWVERSION)
+
+/*
+ * This structure is passed to ipath_userinit() to tell the driver where
+ * user code buffers are, sizes, etc. The offsets and sizes of the
+ * fields must remain unchanged, for binary compatibility. It can
+ * be extended, if userversion is changed so user code can tell, if needed
+ */
+struct ipath_user_info {
+ /*
+ * version of user software, to detect compatibility issues.
+ * Should be set to IPATH_USER_SWVERSION.
+ */
+ __u32 spu_userversion;
+
+ /* desired number of receive header queue entries */
+ __u32 spu_rcvhdrcnt;
+
+ /* size of struct base_info to write to */
+ __u32 spu_base_info_size;
+
+ /*
+ * number of words in KD protocol header
+ * This tells InfiniPath how many words to copy to rcvhdrq. If 0,
+ * kernel uses a default. Once set, attempts to set any other value
+ * are an error (EAGAIN) until driver is reloaded.
+ */
+ __u32 spu_rcvhdrsize;
+
+ /*
+ * If two or more processes wish to share a port, each process
+ * must set the spu_subport_cnt and spu_subport_id to the same
+ * values. The only restriction on the spu_subport_id is that
+ * it be unique for a given node.
+ */
+ __u16 spu_subport_cnt;
+ __u16 spu_subport_id;
+
+ __u32 spu_unused; /* kept for compatible layout */
+
+ /*
+ * address of struct base_info to write to
+ */
+ __u64 spu_base_info;
+
+} __attribute__ ((aligned(8)));
+
+/* User commands. */
+
+#define IPATH_CMD_MIN 16
+
+#define __IPATH_CMD_USER_INIT 16 /* old set up userspace (for old user code) */
+#define IPATH_CMD_PORT_INFO 17 /* find out what resources we got */
+#define IPATH_CMD_RECV_CTRL 18 /* control receipt of packets */
+#define IPATH_CMD_TID_UPDATE 19 /* update expected TID entries */
+#define IPATH_CMD_TID_FREE 20 /* free expected TID entries */
+#define IPATH_CMD_SET_PART_KEY 21 /* add partition key */
+#define __IPATH_CMD_SLAVE_INFO 22 /* return info on slave processes (for old user code) */
+#define IPATH_CMD_ASSIGN_PORT 23 /* allocate HCA and port */
+#define IPATH_CMD_USER_INIT 24 /* set up userspace */
+#define IPATH_CMD_UNUSED_1 25
+#define IPATH_CMD_UNUSED_2 26
+#define IPATH_CMD_PIOAVAILUPD 27 /* force an update of PIOAvail reg */
+#define IPATH_CMD_POLL_TYPE 28 /* set the kind of polling we want */
+#define IPATH_CMD_ARMLAUNCH_CTRL 29 /* armlaunch detection control */
+/* 30 is unused */
+#define IPATH_CMD_SDMA_INFLIGHT 31 /* sdma inflight counter request */
+#define IPATH_CMD_SDMA_COMPLETE 32 /* sdma completion counter request */
+
+/*
+ * Poll types
+ */
+#define IPATH_POLL_TYPE_URGENT 0x01
+#define IPATH_POLL_TYPE_OVERFLOW 0x02
+
+struct ipath_port_info {
+ __u32 num_active; /* number of active units */
+ __u32 unit; /* unit (chip) assigned to caller */
+ __u16 port; /* port on unit assigned to caller */
+ __u16 subport; /* subport on unit assigned to caller */
+ __u16 num_ports; /* number of ports available on unit */
+ __u16 num_subports; /* number of subports opened on port */
+};
+
+struct ipath_tid_info {
+ __u32 tidcnt;
+ /* make structure same size in 32 and 64 bit */
+ __u32 tid__unused;
+ /* virtual address of first page in transfer */
+ __u64 tidvaddr;
+ /* pointer (same size 32/64 bit) to __u16 tid array */
+ __u64 tidlist;
+
+ /*
+ * pointer (same size 32/64 bit) to bitmap of TIDs used
+ * for this call; checked for being large enough at open
+ */
+ __u64 tidmap;
+};
+
+struct ipath_cmd {
+ __u32 type; /* command type */
+ union {
+ struct ipath_tid_info tid_info;
+ struct ipath_user_info user_info;
+
+ /*
+ * address in userspace where we should put the sdma
+ * inflight counter
+ */
+ __u64 sdma_inflight;
+ /*
+ * address in userspace where we should put the sdma
+ * completion counter
+ */
+ __u64 sdma_complete;
+ /* address in userspace of struct ipath_port_info to
+ write result to */
+ __u64 port_info;
+ /* enable/disable receipt of packets */
+ __u32 recv_ctrl;
+ /* enable/disable armlaunch errors (non-zero to enable) */
+ __u32 armlaunch_ctrl;
+ /* partition key to set */
+ __u16 part_key;
+ /* user address of __u32 bitmask of active slaves */
+ __u64 slave_mask_addr;
+ /* type of polling we want */
+ __u16 poll_type;
+ } cmd;
+};
+
+struct ipath_iovec {
+ /* Pointer to data, but same size 32 and 64 bit */
+ __u64 iov_base;
+
+ /*
+ * Length of data; don't need 64 bits, but want
+ * ipath_sendpkt to remain same size as before 32 bit changes, so...
+ */
+ __u64 iov_len;
+};
+
+/*
+ * Describes a single packet for send. Each packet can have one or more
+ * buffers, but the total length (exclusive of IB headers) must be less
+ * than the MTU, and if using the PIO method, entire packet length,
+ * including IB headers, must be less than the ipath_piosize value (words).
+ * Use of this necessitates including sys/uio.h
+ */
+struct __ipath_sendpkt {
+ __u32 sps_flags; /* flags for packet (TBD) */
+ __u32 sps_cnt; /* number of entries to use in sps_iov */
+ /* array of iov's describing packet. TEMPORARY */
+ struct ipath_iovec sps_iov[4];
+};
+
+/*
+ * diagnostics can send a packet by "writing" one of the following
+ * two structs to diag data special file
+ * The first is the legacy version for backward compatibility
+ */
+struct ipath_diag_pkt {
+ __u32 unit;
+ __u64 data;
+ __u32 len;
+};
+
+/* The second diag_pkt struct is the expanded version that allows
+ * more control over the packet, specifically, by allowing a custom
+ * pbc (+ static rate) qword, so that special modes and deliberate
+ * changes to CRCs can be used. The elements were also re-ordered
+ * for better alignment and to avoid padding issues.
+ */
+struct ipath_diag_xpkt {
+ __u64 data;
+ __u64 pbc_wd;
+ __u32 unit;
+ __u32 len;
+};
+
+/*
+ * Data layout in I2C flash (for GUID, etc.)
+ * All fields are little-endian binary unless otherwise stated
+ */
+#define IPATH_FLASH_VERSION 2
+struct ipath_flash {
+ /* flash layout version (IPATH_FLASH_VERSION) */
+ __u8 if_fversion;
+ /* checksum protecting if_length bytes */
+ __u8 if_csum;
+ /*
+ * valid length (in use, protected by if_csum), including
+ * if_fversion and if_csum themselves)
+ */
+ __u8 if_length;
+ /* the GUID, in network order */
+ __u8 if_guid[8];
+ /* number of GUIDs to use, starting from if_guid */
+ __u8 if_numguid;
+ /* the (last 10 characters of) board serial number, in ASCII */
+ char if_serial[12];
+ /* board mfg date (YYYYMMDD ASCII) */
+ char if_mfgdate[8];
+ /* last board rework/test date (YYYYMMDD ASCII) */
+ char if_testdate[8];
+ /* logging of error counts, TBD */
+ __u8 if_errcntp[4];
+ /* powered on hours, updated at driver unload */
+ __u8 if_powerhour[2];
+ /* ASCII free-form comment field */
+ char if_comment[32];
+ /* Backwards compatible prefix for longer QLogic Serial Numbers */
+ char if_sprefix[4];
+ /* 82 bytes used, min flash size is 128 bytes */
+ __u8 if_future[46];
+};
+
+/*
+ * These are the counters implemented in the chip, and are listed in order.
+ * The InterCaps naming is taken straight from the chip spec.
+ */
+struct infinipath_counters {
+ __u64 LBIntCnt;
+ __u64 LBFlowStallCnt;
+ __u64 TxSDmaDescCnt; /* was Reserved1 */
+ __u64 TxUnsupVLErrCnt;
+ __u64 TxDataPktCnt;
+ __u64 TxFlowPktCnt;
+ __u64 TxDwordCnt;
+ __u64 TxLenErrCnt;
+ __u64 TxMaxMinLenErrCnt;
+ __u64 TxUnderrunCnt;
+ __u64 TxFlowStallCnt;
+ __u64 TxDroppedPktCnt;
+ __u64 RxDroppedPktCnt;
+ __u64 RxDataPktCnt;
+ __u64 RxFlowPktCnt;
+ __u64 RxDwordCnt;
+ __u64 RxLenErrCnt;
+ __u64 RxMaxMinLenErrCnt;
+ __u64 RxICRCErrCnt;
+ __u64 RxVCRCErrCnt;
+ __u64 RxFlowCtrlErrCnt;
+ __u64 RxBadFormatCnt;
+ __u64 RxLinkProblemCnt;
+ __u64 RxEBPCnt;
+ __u64 RxLPCRCErrCnt;
+ __u64 RxBufOvflCnt;
+ __u64 RxTIDFullErrCnt;
+ __u64 RxTIDValidErrCnt;
+ __u64 RxPKeyMismatchCnt;
+ __u64 RxP0HdrEgrOvflCnt;
+ __u64 RxP1HdrEgrOvflCnt;
+ __u64 RxP2HdrEgrOvflCnt;
+ __u64 RxP3HdrEgrOvflCnt;
+ __u64 RxP4HdrEgrOvflCnt;
+ __u64 RxP5HdrEgrOvflCnt;
+ __u64 RxP6HdrEgrOvflCnt;
+ __u64 RxP7HdrEgrOvflCnt;
+ __u64 RxP8HdrEgrOvflCnt;
+ __u64 RxP9HdrEgrOvflCnt; /* was Reserved6 */
+ __u64 RxP10HdrEgrOvflCnt; /* was Reserved7 */
+ __u64 RxP11HdrEgrOvflCnt; /* new for IBA7220 */
+ __u64 RxP12HdrEgrOvflCnt; /* new for IBA7220 */
+ __u64 RxP13HdrEgrOvflCnt; /* new for IBA7220 */
+ __u64 RxP14HdrEgrOvflCnt; /* new for IBA7220 */
+ __u64 RxP15HdrEgrOvflCnt; /* new for IBA7220 */
+ __u64 RxP16HdrEgrOvflCnt; /* new for IBA7220 */
+ __u64 IBStatusChangeCnt;
+ __u64 IBLinkErrRecoveryCnt;
+ __u64 IBLinkDownedCnt;
+ __u64 IBSymbolErrCnt;
+ /* The following are new for IBA7220 */
+ __u64 RxVL15DroppedPktCnt;
+ __u64 RxOtherLocalPhyErrCnt;
+ __u64 PcieRetryBufDiagQwordCnt;
+ __u64 ExcessBufferOvflCnt;
+ __u64 LocalLinkIntegrityErrCnt;
+ __u64 RxVlErrCnt;
+ __u64 RxDlidFltrCnt;
+};
+
+/*
+ * The next set of defines are for packet headers, and chip register
+ * and memory bits that are visible to and/or used by user-mode software
+ * The other bits that are used only by the driver or diags are in
+ * ipath_registers.h
+ */
+
+/* RcvHdrFlags bits */
+#define INFINIPATH_RHF_LENGTH_MASK 0x7FF
+#define INFINIPATH_RHF_LENGTH_SHIFT 0
+#define INFINIPATH_RHF_RCVTYPE_MASK 0x7
+#define INFINIPATH_RHF_RCVTYPE_SHIFT 11
+#define INFINIPATH_RHF_EGRINDEX_MASK 0xFFF
+#define INFINIPATH_RHF_EGRINDEX_SHIFT 16
+#define INFINIPATH_RHF_SEQ_MASK 0xF
+#define INFINIPATH_RHF_SEQ_SHIFT 0
+#define INFINIPATH_RHF_HDRQ_OFFSET_MASK 0x7FF
+#define INFINIPATH_RHF_HDRQ_OFFSET_SHIFT 4
+#define INFINIPATH_RHF_H_ICRCERR 0x80000000
+#define INFINIPATH_RHF_H_VCRCERR 0x40000000
+#define INFINIPATH_RHF_H_PARITYERR 0x20000000
+#define INFINIPATH_RHF_H_LENERR 0x10000000
+#define INFINIPATH_RHF_H_MTUERR 0x08000000
+#define INFINIPATH_RHF_H_IHDRERR 0x04000000
+#define INFINIPATH_RHF_H_TIDERR 0x02000000
+#define INFINIPATH_RHF_H_MKERR 0x01000000
+#define INFINIPATH_RHF_H_IBERR 0x00800000
+#define INFINIPATH_RHF_H_ERR_MASK 0xFF800000
+#define INFINIPATH_RHF_L_USE_EGR 0x80000000
+#define INFINIPATH_RHF_L_SWA 0x00008000
+#define INFINIPATH_RHF_L_SWB 0x00004000
+
+/* infinipath header fields */
+#define INFINIPATH_I_VERS_MASK 0xF
+#define INFINIPATH_I_VERS_SHIFT 28
+#define INFINIPATH_I_PORT_MASK 0xF
+#define INFINIPATH_I_PORT_SHIFT 24
+#define INFINIPATH_I_TID_MASK 0x7FF
+#define INFINIPATH_I_TID_SHIFT 13
+#define INFINIPATH_I_OFFSET_MASK 0x1FFF
+#define INFINIPATH_I_OFFSET_SHIFT 0
+
+/* K_PktFlags bits */
+#define INFINIPATH_KPF_INTR 0x1
+#define INFINIPATH_KPF_SUBPORT_MASK 0x3
+#define INFINIPATH_KPF_SUBPORT_SHIFT 1
+
+#define INFINIPATH_MAX_SUBPORT 4
+
+/* SendPIO per-buffer control */
+#define INFINIPATH_SP_TEST 0x40
+#define INFINIPATH_SP_TESTEBP 0x20
+#define INFINIPATH_SP_TRIGGER_SHIFT 15
+
+/* SendPIOAvail bits */
+#define INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT 1
+#define INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT 0
+
+/* infinipath header format */
+struct ipath_header {
+ /*
+ * Version - 4 bits, Port - 4 bits, TID - 10 bits and Offset -
+ * 14 bits before ECO change ~28 Dec 03. After that, Vers 4,
+ * Port 4, TID 11, offset 13.
+ */
+ __le32 ver_port_tid_offset;
+ __le16 chksum;
+ __le16 pkt_flags;
+};
+
+/* infinipath user message header format.
+ * This structure contains the first 4 fields common to all protocols
+ * that employ infinipath.
+ */
+struct ipath_message_header {
+ __be16 lrh[4];
+ __be32 bth[3];
+ /* fields below this point are in host byte order */
+ struct ipath_header iph;
+ __u8 sub_opcode;
+};
+
+/* infinipath ethernet header format */
+struct ether_header {
+ __be16 lrh[4];
+ __be32 bth[3];
+ struct ipath_header iph;
+ __u8 sub_opcode;
+ __u8 cmd;
+ __be16 lid;
+ __u16 mac[3];
+ __u8 frag_num;
+ __u8 seq_num;
+ __le32 len;
+ /* MUST be of word size due to PIO write requirements */
+ __le32 csum;
+ __le16 csum_offset;
+ __le16 flags;
+ __u16 first_2_bytes;
+ __u8 unused[2]; /* currently unused */
+};
+
+
+/* IB - LRH header consts */
+#define IPATH_LRH_GRH 0x0003 /* 1. word of IB LRH - next header: GRH */
+#define IPATH_LRH_BTH 0x0002 /* 1. word of IB LRH - next header: BTH */
+
+/* misc. */
+#define SIZE_OF_CRC 1
+
+#define IPATH_DEFAULT_P_KEY 0xFFFF
+#define IPATH_PERMISSIVE_LID 0xFFFF
+#define IPATH_AETH_CREDIT_SHIFT 24
+#define IPATH_AETH_CREDIT_MASK 0x1F
+#define IPATH_AETH_CREDIT_INVAL 0x1F
+#define IPATH_PSN_MASK 0xFFFFFF
+#define IPATH_MSN_MASK 0xFFFFFF
+#define IPATH_QPN_MASK 0xFFFFFF
+#define IPATH_MULTICAST_LID_BASE 0xC000
+#define IPATH_EAGER_TID_ID INFINIPATH_I_TID_MASK
+#define IPATH_MULTICAST_QPN 0xFFFFFF
+
+/* Receive Header Queue: receive type (from infinipath) */
+#define RCVHQ_RCV_TYPE_EXPECTED 0
+#define RCVHQ_RCV_TYPE_EAGER 1
+#define RCVHQ_RCV_TYPE_NON_KD 2
+#define RCVHQ_RCV_TYPE_ERROR 3
+
+
+/* sub OpCodes - ith4x */
+#define IPATH_ITH4X_OPCODE_ENCAP 0x81
+#define IPATH_ITH4X_OPCODE_LID_ARP 0x82
+
+#define IPATH_HEADER_QUEUE_WORDS 9
+
+/* functions for extracting fields from rcvhdrq entries for the driver.
+ */
+static inline __u32 ipath_hdrget_err_flags(const __le32 * rbuf)
+{
+ return __le32_to_cpu(rbuf[1]) & INFINIPATH_RHF_H_ERR_MASK;
+}
+
+static inline __u32 ipath_hdrget_rcv_type(const __le32 * rbuf)
+{
+ return (__le32_to_cpu(rbuf[0]) >> INFINIPATH_RHF_RCVTYPE_SHIFT)
+ & INFINIPATH_RHF_RCVTYPE_MASK;
+}
+
+static inline __u32 ipath_hdrget_length_in_bytes(const __le32 * rbuf)
+{
+ return ((__le32_to_cpu(rbuf[0]) >> INFINIPATH_RHF_LENGTH_SHIFT)
+ & INFINIPATH_RHF_LENGTH_MASK) << 2;
+}
+
+static inline __u32 ipath_hdrget_index(const __le32 * rbuf)
+{
+ return (__le32_to_cpu(rbuf[0]) >> INFINIPATH_RHF_EGRINDEX_SHIFT)
+ & INFINIPATH_RHF_EGRINDEX_MASK;
+}
+
+static inline __u32 ipath_hdrget_seq(const __le32 *rbuf)
+{
+ return (__le32_to_cpu(rbuf[1]) >> INFINIPATH_RHF_SEQ_SHIFT)
+ & INFINIPATH_RHF_SEQ_MASK;
+}
+
+static inline __u32 ipath_hdrget_offset(const __le32 *rbuf)
+{
+ return (__le32_to_cpu(rbuf[1]) >> INFINIPATH_RHF_HDRQ_OFFSET_SHIFT)
+ & INFINIPATH_RHF_HDRQ_OFFSET_MASK;
+}
+
+static inline __u32 ipath_hdrget_use_egr_buf(const __le32 *rbuf)
+{
+ return __le32_to_cpu(rbuf[0]) & INFINIPATH_RHF_L_USE_EGR;
+}
+
+static inline __u32 ipath_hdrget_ipath_ver(__le32 hdrword)
+{
+ return (__le32_to_cpu(hdrword) >> INFINIPATH_I_VERS_SHIFT)
+ & INFINIPATH_I_VERS_MASK;
+}
+
+#endif /* _IPATH_COMMON_H */
diff --git a/drivers/infiniband/hw/ipath/ipath_cq.c b/drivers/infiniband/hw/ipath/ipath_cq.c
new file mode 100644
index 0000000..d385e41
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_cq.c
@@ -0,0 +1,477 @@
+/*
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/err.h>
+#include <linux/vmalloc.h>
+
+#include "ipath_verbs.h"
+
+/**
+ * ipath_cq_enter - add a new entry to the completion queue
+ * @cq: completion queue
+ * @entry: work completion entry to add
+ * @sig: true if @entry is a solicitated entry
+ *
+ * This may be called with qp->s_lock held.
+ */
+void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited)
+{
+ struct ipath_cq_wc *wc;
+ unsigned long flags;
+ u32 head;
+ u32 next;
+
+ spin_lock_irqsave(&cq->lock, flags);
+
+ /*
+ * Note that the head pointer might be writable by user processes.
+ * Take care to verify it is a sane value.
+ */
+ wc = cq->queue;
+ head = wc->head;
+ if (head >= (unsigned) cq->ibcq.cqe) {
+ head = cq->ibcq.cqe;
+ next = 0;
+ } else
+ next = head + 1;
+ if (unlikely(next == wc->tail)) {
+ spin_unlock_irqrestore(&cq->lock, flags);
+ if (cq->ibcq.event_handler) {
+ struct ib_event ev;
+
+ ev.device = cq->ibcq.device;
+ ev.element.cq = &cq->ibcq;
+ ev.event = IB_EVENT_CQ_ERR;
+ cq->ibcq.event_handler(&ev, cq->ibcq.cq_context);
+ }
+ return;
+ }
+ if (cq->ip) {
+ wc->uqueue[head].wr_id = entry->wr_id;
+ wc->uqueue[head].status = entry->status;
+ wc->uqueue[head].opcode = entry->opcode;
+ wc->uqueue[head].vendor_err = entry->vendor_err;
+ wc->uqueue[head].byte_len = entry->byte_len;
+ wc->uqueue[head].ex.imm_data = (__u32 __force) entry->ex.imm_data;
+ wc->uqueue[head].qp_num = entry->qp->qp_num;
+ wc->uqueue[head].src_qp = entry->src_qp;
+ wc->uqueue[head].wc_flags = entry->wc_flags;
+ wc->uqueue[head].pkey_index = entry->pkey_index;
+ wc->uqueue[head].slid = entry->slid;
+ wc->uqueue[head].sl = entry->sl;
+ wc->uqueue[head].dlid_path_bits = entry->dlid_path_bits;
+ wc->uqueue[head].port_num = entry->port_num;
+ /* Make sure entry is written before the head index. */
+ smp_wmb();
+ } else
+ wc->kqueue[head] = *entry;
+ wc->head = next;
+
+ if (cq->notify == IB_CQ_NEXT_COMP ||
+ (cq->notify == IB_CQ_SOLICITED && solicited)) {
+ cq->notify = IB_CQ_NONE;
+ cq->triggered++;
+ /*
+ * This will cause send_complete() to be called in
+ * another thread.
+ */
+ tasklet_hi_schedule(&cq->comptask);
+ }
+
+ spin_unlock_irqrestore(&cq->lock, flags);
+
+ if (entry->status != IB_WC_SUCCESS)
+ to_idev(cq->ibcq.device)->n_wqe_errs++;
+}
+
+/**
+ * ipath_poll_cq - poll for work completion entries
+ * @ibcq: the completion queue to poll
+ * @num_entries: the maximum number of entries to return
+ * @entry: pointer to array where work completions are placed
+ *
+ * Returns the number of completion entries polled.
+ *
+ * This may be called from interrupt context. Also called by ib_poll_cq()
+ * in the generic verbs code.
+ */
+int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
+{
+ struct ipath_cq *cq = to_icq(ibcq);
+ struct ipath_cq_wc *wc;
+ unsigned long flags;
+ int npolled;
+ u32 tail;
+
+ /* The kernel can only poll a kernel completion queue */
+ if (cq->ip) {
+ npolled = -EINVAL;
+ goto bail;
+ }
+
+ spin_lock_irqsave(&cq->lock, flags);
+
+ wc = cq->queue;
+ tail = wc->tail;
+ if (tail > (u32) cq->ibcq.cqe)
+ tail = (u32) cq->ibcq.cqe;
+ for (npolled = 0; npolled < num_entries; ++npolled, ++entry) {
+ if (tail == wc->head)
+ break;
+ /* The kernel doesn't need a RMB since it has the lock. */
+ *entry = wc->kqueue[tail];
+ if (tail >= cq->ibcq.cqe)
+ tail = 0;
+ else
+ tail++;
+ }
+ wc->tail = tail;
+
+ spin_unlock_irqrestore(&cq->lock, flags);
+
+bail:
+ return npolled;
+}
+
+static void send_complete(unsigned long data)
+{
+ struct ipath_cq *cq = (struct ipath_cq *)data;
+
+ /*
+ * The completion handler will most likely rearm the notification
+ * and poll for all pending entries. If a new completion entry
+ * is added while we are in this routine, tasklet_hi_schedule()
+ * won't call us again until we return so we check triggered to
+ * see if we need to call the handler again.
+ */
+ for (;;) {
+ u8 triggered = cq->triggered;
+
+ cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
+
+ if (cq->triggered == triggered)
+ return;
+ }
+}
+
+/**
+ * ipath_create_cq - create a completion queue
+ * @ibdev: the device this completion queue is attached to
+ * @entries: the minimum size of the completion queue
+ * @context: unused by the InfiniPath driver
+ * @udata: unused by the InfiniPath driver
+ *
+ * Returns a pointer to the completion queue or negative errno values
+ * for failure.
+ *
+ * Called by ib_create_cq() in the generic verbs code.
+ */
+struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries, int comp_vector,
+ struct ib_ucontext *context,
+ struct ib_udata *udata)
+{
+ struct ipath_ibdev *dev = to_idev(ibdev);
+ struct ipath_cq *cq;
+ struct ipath_cq_wc *wc;
+ struct ib_cq *ret;
+ u32 sz;
+
+ if (entries < 1 || entries > ib_ipath_max_cqes) {
+ ret = ERR_PTR(-EINVAL);
+ goto done;
+ }
+
+ /* Allocate the completion queue structure. */
+ cq = kmalloc(sizeof(*cq), GFP_KERNEL);
+ if (!cq) {
+ ret = ERR_PTR(-ENOMEM);
+ goto done;
+ }
+
+ /*
+ * Allocate the completion queue entries and head/tail pointers.
+ * This is allocated separately so that it can be resized and
+ * also mapped into user space.
+ * We need to use vmalloc() in order to support mmap and large
+ * numbers of entries.
+ */
+ sz = sizeof(*wc);
+ if (udata && udata->outlen >= sizeof(__u64))
+ sz += sizeof(struct ib_uverbs_wc) * (entries + 1);
+ else
+ sz += sizeof(struct ib_wc) * (entries + 1);
+ wc = vmalloc_user(sz);
+ if (!wc) {
+ ret = ERR_PTR(-ENOMEM);
+ goto bail_cq;
+ }
+
+ /*
+ * Return the address of the WC as the offset to mmap.
+ * See ipath_mmap() for details.
+ */
+ if (udata && udata->outlen >= sizeof(__u64)) {
+ int err;
+
+ cq->ip = ipath_create_mmap_info(dev, sz, context, wc);
+ if (!cq->ip) {
+ ret = ERR_PTR(-ENOMEM);
+ goto bail_wc;
+ }
+
+ err = ib_copy_to_udata(udata, &cq->ip->offset,
+ sizeof(cq->ip->offset));
+ if (err) {
+ ret = ERR_PTR(err);
+ goto bail_ip;
+ }
+ } else
+ cq->ip = NULL;
+
+ spin_lock(&dev->n_cqs_lock);
+ if (dev->n_cqs_allocated == ib_ipath_max_cqs) {
+ spin_unlock(&dev->n_cqs_lock);
+ ret = ERR_PTR(-ENOMEM);
+ goto bail_ip;
+ }
+
+ dev->n_cqs_allocated++;
+ spin_unlock(&dev->n_cqs_lock);
+
+ if (cq->ip) {
+ spin_lock_irq(&dev->pending_lock);
+ list_add(&cq->ip->pending_mmaps, &dev->pending_mmaps);
+ spin_unlock_irq(&dev->pending_lock);
+ }
+
+ /*
+ * ib_create_cq() will initialize cq->ibcq except for cq->ibcq.cqe.
+ * The number of entries should be >= the number requested or return
+ * an error.
+ */
+ cq->ibcq.cqe = entries;
+ cq->notify = IB_CQ_NONE;
+ cq->triggered = 0;
+ spin_lock_init(&cq->lock);
+ tasklet_init(&cq->comptask, send_complete, (unsigned long)cq);
+ wc->head = 0;
+ wc->tail = 0;
+ cq->queue = wc;
+
+ ret = &cq->ibcq;
+
+ goto done;
+
+bail_ip:
+ kfree(cq->ip);
+bail_wc:
+ vfree(wc);
+bail_cq:
+ kfree(cq);
+done:
+ return ret;
+}
+
+/**
+ * ipath_destroy_cq - destroy a completion queue
+ * @ibcq: the completion queue to destroy.
+ *
+ * Returns 0 for success.
+ *
+ * Called by ib_destroy_cq() in the generic verbs code.
+ */
+int ipath_destroy_cq(struct ib_cq *ibcq)
+{
+ struct ipath_ibdev *dev = to_idev(ibcq->device);
+ struct ipath_cq *cq = to_icq(ibcq);
+
+ tasklet_kill(&cq->comptask);
+ spin_lock(&dev->n_cqs_lock);
+ dev->n_cqs_allocated--;
+ spin_unlock(&dev->n_cqs_lock);
+ if (cq->ip)
+ kref_put(&cq->ip->ref, ipath_release_mmap_info);
+ else
+ vfree(cq->queue);
+ kfree(cq);
+
+ return 0;
+}
+
+/**
+ * ipath_req_notify_cq - change the notification type for a completion queue
+ * @ibcq: the completion queue
+ * @notify_flags: the type of notification to request
+ *
+ * Returns 0 for success.
+ *
+ * This may be called from interrupt context. Also called by
+ * ib_req_notify_cq() in the generic verbs code.
+ */
+int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags)
+{
+ struct ipath_cq *cq = to_icq(ibcq);
+ unsigned long flags;
+ int ret = 0;
+
+ spin_lock_irqsave(&cq->lock, flags);
+ /*
+ * Don't change IB_CQ_NEXT_COMP to IB_CQ_SOLICITED but allow
+ * any other transitions (see C11-31 and C11-32 in ch. 11.4.2.2).
+ */
+ if (cq->notify != IB_CQ_NEXT_COMP)
+ cq->notify = notify_flags & IB_CQ_SOLICITED_MASK;
+
+ if ((notify_flags & IB_CQ_REPORT_MISSED_EVENTS) &&
+ cq->queue->head != cq->queue->tail)
+ ret = 1;
+
+ spin_unlock_irqrestore(&cq->lock, flags);
+
+ return ret;
+}
+
+/**
+ * ipath_resize_cq - change the size of the CQ
+ * @ibcq: the completion queue
+ *
+ * Returns 0 for success.
+ */
+int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
+{
+ struct ipath_cq *cq = to_icq(ibcq);
+ struct ipath_cq_wc *old_wc;
+ struct ipath_cq_wc *wc;
+ u32 head, tail, n;
+ int ret;
+ u32 sz;
+
+ if (cqe < 1 || cqe > ib_ipath_max_cqes) {
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ /*
+ * Need to use vmalloc() if we want to support large #s of entries.
+ */
+ sz = sizeof(*wc);
+ if (udata && udata->outlen >= sizeof(__u64))
+ sz += sizeof(struct ib_uverbs_wc) * (cqe + 1);
+ else
+ sz += sizeof(struct ib_wc) * (cqe + 1);
+ wc = vmalloc_user(sz);
+ if (!wc) {
+ ret = -ENOMEM;
+ goto bail;
+ }
+
+ /* Check that we can write the offset to mmap. */
+ if (udata && udata->outlen >= sizeof(__u64)) {
+ __u64 offset = 0;
+
+ ret = ib_copy_to_udata(udata, &offset, sizeof(offset));
+ if (ret)
+ goto bail_free;
+ }
+
+ spin_lock_irq(&cq->lock);
+ /*
+ * Make sure head and tail are sane since they
+ * might be user writable.
+ */
+ old_wc = cq->queue;
+ head = old_wc->head;
+ if (head > (u32) cq->ibcq.cqe)
+ head = (u32) cq->ibcq.cqe;
+ tail = old_wc->tail;
+ if (tail > (u32) cq->ibcq.cqe)
+ tail = (u32) cq->ibcq.cqe;
+ if (head < tail)
+ n = cq->ibcq.cqe + 1 + head - tail;
+ else
+ n = head - tail;
+ if (unlikely((u32)cqe < n)) {
+ ret = -EINVAL;
+ goto bail_unlock;
+ }
+ for (n = 0; tail != head; n++) {
+ if (cq->ip)
+ wc->uqueue[n] = old_wc->uqueue[tail];
+ else
+ wc->kqueue[n] = old_wc->kqueue[tail];
+ if (tail == (u32) cq->ibcq.cqe)
+ tail = 0;
+ else
+ tail++;
+ }
+ cq->ibcq.cqe = cqe;
+ wc->head = n;
+ wc->tail = 0;
+ cq->queue = wc;
+ spin_unlock_irq(&cq->lock);
+
+ vfree(old_wc);
+
+ if (cq->ip) {
+ struct ipath_ibdev *dev = to_idev(ibcq->device);
+ struct ipath_mmap_info *ip = cq->ip;
+
+ ipath_update_mmap_info(dev, ip, sz, wc);
+
+ /*
+ * Return the offset to mmap.
+ * See ipath_mmap() for details.
+ */
+ if (udata && udata->outlen >= sizeof(__u64)) {
+ ret = ib_copy_to_udata(udata, &ip->offset,
+ sizeof(ip->offset));
+ if (ret)
+ goto bail;
+ }
+
+ spin_lock_irq(&dev->pending_lock);
+ if (list_empty(&ip->pending_mmaps))
+ list_add(&ip->pending_mmaps, &dev->pending_mmaps);
+ spin_unlock_irq(&dev->pending_lock);
+ }
+
+ ret = 0;
+ goto bail;
+
+bail_unlock:
+ spin_unlock_irq(&cq->lock);
+bail_free:
+ vfree(wc);
+bail:
+ return ret;
+}
diff --git a/drivers/infiniband/hw/ipath/ipath_debug.h b/drivers/infiniband/hw/ipath/ipath_debug.h
new file mode 100644
index 0000000..65926cd
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_debug.h
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _IPATH_DEBUG_H
+#define _IPATH_DEBUG_H
+
+#ifndef _IPATH_DEBUGGING /* debugging enabled or not */
+#define _IPATH_DEBUGGING 1
+#endif
+
+#if _IPATH_DEBUGGING
+
+/*
+ * Mask values for debugging. The scheme allows us to compile out any
+ * of the debug tracing stuff, and if compiled in, to enable or disable
+ * dynamically. This can be set at modprobe time also:
+ * modprobe infinipath.ko infinipath_debug=7
+ */
+
+#define __IPATH_INFO 0x1 /* generic low verbosity stuff */
+#define __IPATH_DBG 0x2 /* generic debug */
+#define __IPATH_TRSAMPLE 0x8 /* generate trace buffer sample entries */
+/* leave some low verbosity spots open */
+#define __IPATH_VERBDBG 0x40 /* very verbose debug */
+#define __IPATH_PKTDBG 0x80 /* print packet data */
+/* print process startup (init)/exit messages */
+#define __IPATH_PROCDBG 0x100
+/* print mmap/fault stuff, not using VDBG any more */
+#define __IPATH_MMDBG 0x200
+#define __IPATH_ERRPKTDBG 0x400
+#define __IPATH_USER_SEND 0x1000 /* use user mode send */
+#define __IPATH_KERNEL_SEND 0x2000 /* use kernel mode send */
+#define __IPATH_EPKTDBG 0x4000 /* print ethernet packet data */
+#define __IPATH_IPATHDBG 0x10000 /* Ethernet (IPATH) gen debug */
+#define __IPATH_IPATHWARN 0x20000 /* Ethernet (IPATH) warnings */
+#define __IPATH_IPATHERR 0x40000 /* Ethernet (IPATH) errors */
+#define __IPATH_IPATHPD 0x80000 /* Ethernet (IPATH) packet dump */
+#define __IPATH_IPATHTABLE 0x100000 /* Ethernet (IPATH) table dump */
+#define __IPATH_LINKVERBDBG 0x200000 /* very verbose linkchange debug */
+
+#else /* _IPATH_DEBUGGING */
+
+/*
+ * define all of these even with debugging off, for the few places that do
+ * if(infinipath_debug & _IPATH_xyzzy), but in a way that will make the
+ * compiler eliminate the code
+ */
+
+#define __IPATH_INFO 0x0 /* generic low verbosity stuff */
+#define __IPATH_DBG 0x0 /* generic debug */
+#define __IPATH_TRSAMPLE 0x0 /* generate trace buffer sample entries */
+#define __IPATH_VERBDBG 0x0 /* very verbose debug */
+#define __IPATH_PKTDBG 0x0 /* print packet data */
+#define __IPATH_PROCDBG 0x0 /* process startup (init)/exit messages */
+/* print mmap/fault stuff, not using VDBG any more */
+#define __IPATH_MMDBG 0x0
+#define __IPATH_EPKTDBG 0x0 /* print ethernet packet data */
+#define __IPATH_IPATHDBG 0x0 /* Ethernet (IPATH) table dump on */
+#define __IPATH_IPATHWARN 0x0 /* Ethernet (IPATH) warnings on */
+#define __IPATH_IPATHERR 0x0 /* Ethernet (IPATH) errors on */
+#define __IPATH_IPATHPD 0x0 /* Ethernet (IPATH) packet dump on */
+#define __IPATH_IPATHTABLE 0x0 /* Ethernet (IPATH) packet dump on */
+#define __IPATH_LINKVERBDBG 0x0 /* very verbose linkchange debug */
+
+#endif /* _IPATH_DEBUGGING */
+
+#define __IPATH_VERBOSEDBG __IPATH_VERBDBG
+
+#endif /* _IPATH_DEBUG_H */
diff --git a/drivers/infiniband/hw/ipath/ipath_diag.c b/drivers/infiniband/hw/ipath/ipath_diag.c
new file mode 100644
index 0000000..d4ce8b6
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_diag.c
@@ -0,0 +1,560 @@
+/*
+ * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/*
+ * This file contains support for diagnostic functions. It is accessed by
+ * opening the ipath_diag device, normally minor number 129. Diagnostic use
+ * of the InfiniPath chip may render the chip or board unusable until the
+ * driver is unloaded, or in some cases, until the system is rebooted.
+ *
+ * Accesses to the chip through this interface are not similar to going
+ * through the /sys/bus/pci resource mmap interface.
+ */
+
+#include <linux/io.h>
+#include <linux/pci.h>
+#include <linux/vmalloc.h>
+#include <linux/fs.h>
+#include <asm/uaccess.h>
+
+#include "ipath_kernel.h"
+#include "ipath_common.h"
+
+int ipath_diag_inuse;
+static int diag_set_link;
+
+static int ipath_diag_open(struct inode *in, struct file *fp);
+static int ipath_diag_release(struct inode *in, struct file *fp);
+static ssize_t ipath_diag_read(struct file *fp, char __user *data,
+ size_t count, loff_t *off);
+static ssize_t ipath_diag_write(struct file *fp, const char __user *data,
+ size_t count, loff_t *off);
+
+static const struct file_operations diag_file_ops = {
+ .owner = THIS_MODULE,
+ .write = ipath_diag_write,
+ .read = ipath_diag_read,
+ .open = ipath_diag_open,
+ .release = ipath_diag_release
+};
+
+static ssize_t ipath_diagpkt_write(struct file *fp,
+ const char __user *data,
+ size_t count, loff_t *off);
+
+static const struct file_operations diagpkt_file_ops = {
+ .owner = THIS_MODULE,
+ .write = ipath_diagpkt_write,
+};
+
+static atomic_t diagpkt_count = ATOMIC_INIT(0);
+static struct cdev *diagpkt_cdev;
+static struct device *diagpkt_dev;
+
+int ipath_diag_add(struct ipath_devdata *dd)
+{
+ char name[16];
+ int ret = 0;
+
+ if (atomic_inc_return(&diagpkt_count) == 1) {
+ ret = ipath_cdev_init(IPATH_DIAGPKT_MINOR,
+ "ipath_diagpkt", &diagpkt_file_ops,
+ &diagpkt_cdev, &diagpkt_dev);
+
+ if (ret) {
+ ipath_dev_err(dd, "Couldn't create ipath_diagpkt "
+ "device: %d", ret);
+ goto done;
+ }
+ }
+
+ snprintf(name, sizeof(name), "ipath_diag%d", dd->ipath_unit);
+
+ ret = ipath_cdev_init(IPATH_DIAG_MINOR_BASE + dd->ipath_unit, name,
+ &diag_file_ops, &dd->diag_cdev,
+ &dd->diag_dev);
+ if (ret)
+ ipath_dev_err(dd, "Couldn't create %s device: %d",
+ name, ret);
+
+done:
+ return ret;
+}
+
+void ipath_diag_remove(struct ipath_devdata *dd)
+{
+ if (atomic_dec_and_test(&diagpkt_count))
+ ipath_cdev_cleanup(&diagpkt_cdev, &diagpkt_dev);
+
+ ipath_cdev_cleanup(&dd->diag_cdev, &dd->diag_dev);
+}
+
+/**
+ * ipath_read_umem64 - read a 64-bit quantity from the chip into user space
+ * @dd: the infinipath device
+ * @uaddr: the location to store the data in user memory
+ * @caddr: the source chip address (full pointer, not offset)
+ * @count: number of bytes to copy (multiple of 32 bits)
+ *
+ * This function also localizes all chip memory accesses.
+ * The copy should be written such that we read full cacheline packets
+ * from the chip. This is usually used for a single qword
+ *
+ * NOTE: This assumes the chip address is 64-bit aligned.
+ */
+static int ipath_read_umem64(struct ipath_devdata *dd, void __user *uaddr,
+ const void __iomem *caddr, size_t count)
+{
+ const u64 __iomem *reg_addr = caddr;
+ const u64 __iomem *reg_end = reg_addr + (count / sizeof(u64));
+ int ret;
+
+ /* not very efficient, but it works for now */
+ if (reg_addr < dd->ipath_kregbase || reg_end > dd->ipath_kregend) {
+ ret = -EINVAL;
+ goto bail;
+ }
+ while (reg_addr < reg_end) {
+ u64 data = readq(reg_addr);
+ if (copy_to_user(uaddr, &data, sizeof(u64))) {
+ ret = -EFAULT;
+ goto bail;
+ }
+ reg_addr++;
+ uaddr += sizeof(u64);
+ }
+ ret = 0;
+bail:
+ return ret;
+}
+
+/**
+ * ipath_write_umem64 - write a 64-bit quantity to the chip from user space
+ * @dd: the infinipath device
+ * @caddr: the destination chip address (full pointer, not offset)
+ * @uaddr: the source of the data in user memory
+ * @count: the number of bytes to copy (multiple of 32 bits)
+ *
+ * This is usually used for a single qword
+ * NOTE: This assumes the chip address is 64-bit aligned.
+ */
+
+static int ipath_write_umem64(struct ipath_devdata *dd, void __iomem *caddr,
+ const void __user *uaddr, size_t count)
+{
+ u64 __iomem *reg_addr = caddr;
+ const u64 __iomem *reg_end = reg_addr + (count / sizeof(u64));
+ int ret;
+
+ /* not very efficient, but it works for now */
+ if (reg_addr < dd->ipath_kregbase || reg_end > dd->ipath_kregend) {
+ ret = -EINVAL;
+ goto bail;
+ }
+ while (reg_addr < reg_end) {
+ u64 data;
+ if (copy_from_user(&data, uaddr, sizeof(data))) {
+ ret = -EFAULT;
+ goto bail;
+ }
+ writeq(data, reg_addr);
+
+ reg_addr++;
+ uaddr += sizeof(u64);
+ }
+ ret = 0;
+bail:
+ return ret;
+}
+
+/**
+ * ipath_read_umem32 - read a 32-bit quantity from the chip into user space
+ * @dd: the infinipath device
+ * @uaddr: the location to store the data in user memory
+ * @caddr: the source chip address (full pointer, not offset)
+ * @count: number of bytes to copy
+ *
+ * read 32 bit values, not 64 bit; for memories that only
+ * support 32 bit reads; usually a single dword.
+ */
+static int ipath_read_umem32(struct ipath_devdata *dd, void __user *uaddr,
+ const void __iomem *caddr, size_t count)
+{
+ const u32 __iomem *reg_addr = caddr;
+ const u32 __iomem *reg_end = reg_addr + (count / sizeof(u32));
+ int ret;
+
+ if (reg_addr < (u32 __iomem *) dd->ipath_kregbase ||
+ reg_end > (u32 __iomem *) dd->ipath_kregend) {
+ ret = -EINVAL;
+ goto bail;
+ }
+ /* not very efficient, but it works for now */
+ while (reg_addr < reg_end) {
+ u32 data = readl(reg_addr);
+ if (copy_to_user(uaddr, &data, sizeof(data))) {
+ ret = -EFAULT;
+ goto bail;
+ }
+
+ reg_addr++;
+ uaddr += sizeof(u32);
+
+ }
+ ret = 0;
+bail:
+ return ret;
+}
+
+/**
+ * ipath_write_umem32 - write a 32-bit quantity to the chip from user space
+ * @dd: the infinipath device
+ * @caddr: the destination chip address (full pointer, not offset)
+ * @uaddr: the source of the data in user memory
+ * @count: number of bytes to copy
+ *
+ * write 32 bit values, not 64 bit; for memories that only
+ * support 32 bit write; usually a single dword.
+ */
+
+static int ipath_write_umem32(struct ipath_devdata *dd, void __iomem *caddr,
+ const void __user *uaddr, size_t count)
+{
+ u32 __iomem *reg_addr = caddr;
+ const u32 __iomem *reg_end = reg_addr + (count / sizeof(u32));
+ int ret;
+
+ if (reg_addr < (u32 __iomem *) dd->ipath_kregbase ||
+ reg_end > (u32 __iomem *) dd->ipath_kregend) {
+ ret = -EINVAL;
+ goto bail;
+ }
+ while (reg_addr < reg_end) {
+ u32 data;
+ if (copy_from_user(&data, uaddr, sizeof(data))) {
+ ret = -EFAULT;
+ goto bail;
+ }
+ writel(data, reg_addr);
+
+ reg_addr++;
+ uaddr += sizeof(u32);
+ }
+ ret = 0;
+bail:
+ return ret;
+}
+
+static int ipath_diag_open(struct inode *in, struct file *fp)
+{
+ int unit = iminor(in) - IPATH_DIAG_MINOR_BASE;
+ struct ipath_devdata *dd;
+ int ret;
+
+ mutex_lock(&ipath_mutex);
+
+ if (ipath_diag_inuse) {
+ ret = -EBUSY;
+ goto bail;
+ }
+
+ dd = ipath_lookup(unit);
+
+ if (dd == NULL || !(dd->ipath_flags & IPATH_PRESENT) ||
+ !dd->ipath_kregbase) {
+ ret = -ENODEV;
+ goto bail;
+ }
+
+ fp->private_data = dd;
+ ipath_diag_inuse = -2;
+ diag_set_link = 0;
+ ret = 0;
+
+ /* Only expose a way to reset the device if we
+ make it into diag mode. */
+ ipath_expose_reset(&dd->pcidev->dev);
+
+bail:
+ mutex_unlock(&ipath_mutex);
+
+ return ret;
+}
+
+/**
+ * ipath_diagpkt_write - write an IB packet
+ * @fp: the diag data device file pointer
+ * @data: ipath_diag_pkt structure saying where to get the packet
+ * @count: size of data to write
+ * @off: unused by this code
+ */
+static ssize_t ipath_diagpkt_write(struct file *fp,
+ const char __user *data,
+ size_t count, loff_t *off)
+{
+ u32 __iomem *piobuf;
+ u32 plen, clen, pbufn;
+ struct ipath_diag_pkt odp;
+ struct ipath_diag_xpkt dp;
+ u32 *tmpbuf = NULL;
+ struct ipath_devdata *dd;
+ ssize_t ret = 0;
+ u64 val;
+ u32 l_state, lt_state; /* LinkState, LinkTrainingState */
+
+ if (count < sizeof(odp)) {
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ if (count == sizeof(dp)) {
+ if (copy_from_user(&dp, data, sizeof(dp))) {
+ ret = -EFAULT;
+ goto bail;
+ }
+ } else if (copy_from_user(&odp, data, sizeof(odp))) {
+ ret = -EFAULT;
+ goto bail;
+ }
+
+ /*
+ * Due to padding/alignment issues (lessened with new struct)
+ * the old and new structs are the same length. We need to
+ * disambiguate them, which we can do because odp.len has never
+ * been less than the total of LRH+BTH+DETH so far, while
+ * dp.unit (same offset) unit is unlikely to get that high.
+ * Similarly, dp.data, the pointer to user at the same offset
+ * as odp.unit, is almost certainly at least one (512byte)page
+ * "above" NULL. The if-block below can be omitted if compatibility
+ * between a new driver and older diagnostic code is unimportant.
+ * compatibility the other direction (new diags, old driver) is
+ * handled in the diagnostic code, with a warning.
+ */
+ if (dp.unit >= 20 && dp.data < 512) {
+ /* very probable version mismatch. Fix it up */
+ memcpy(&odp, &dp, sizeof(odp));
+ /* We got a legacy dp, copy elements to dp */
+ dp.unit = odp.unit;
+ dp.data = odp.data;
+ dp.len = odp.len;
+ dp.pbc_wd = 0; /* Indicate we need to compute PBC wd */
+ }
+
+ /* send count must be an exact number of dwords */
+ if (dp.len & 3) {
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ clen = dp.len >> 2;
+
+ dd = ipath_lookup(dp.unit);
+ if (!dd || !(dd->ipath_flags & IPATH_PRESENT) ||
+ !dd->ipath_kregbase) {
+ ipath_cdbg(VERBOSE, "illegal unit %u for diag data send\n",
+ dp.unit);
+ ret = -ENODEV;
+ goto bail;
+ }
+
+ if (ipath_diag_inuse && !diag_set_link &&
+ !(dd->ipath_flags & IPATH_LINKACTIVE)) {
+ diag_set_link = 1;
+ ipath_cdbg(VERBOSE, "Trying to set to set link active for "
+ "diag pkt\n");
+ ipath_set_linkstate(dd, IPATH_IB_LINKARM);
+ ipath_set_linkstate(dd, IPATH_IB_LINKACTIVE);
+ }
+
+ if (!(dd->ipath_flags & IPATH_INITTED)) {
+ /* no hardware, freeze, etc. */
+ ipath_cdbg(VERBOSE, "unit %u not usable\n", dd->ipath_unit);
+ ret = -ENODEV;
+ goto bail;
+ }
+ /*
+ * Want to skip check for l_state if using custom PBC,
+ * because we might be trying to force an SM packet out.
+ * first-cut, skip _all_ state checking in that case.
+ */
+ val = ipath_ib_state(dd, dd->ipath_lastibcstat);
+ lt_state = ipath_ib_linktrstate(dd, dd->ipath_lastibcstat);
+ l_state = ipath_ib_linkstate(dd, dd->ipath_lastibcstat);
+ if (!dp.pbc_wd && (lt_state != INFINIPATH_IBCS_LT_STATE_LINKUP ||
+ (val != dd->ib_init && val != dd->ib_arm &&
+ val != dd->ib_active))) {
+ ipath_cdbg(VERBOSE, "unit %u not ready (state %llx)\n",
+ dd->ipath_unit, (unsigned long long) val);
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ /* need total length before first word written */
+ /* +1 word is for the qword padding */
+ plen = sizeof(u32) + dp.len;
+
+ if ((plen + 4) > dd->ipath_ibmaxlen) {
+ ipath_dbg("Pkt len 0x%x > ibmaxlen %x\n",
+ plen - 4, dd->ipath_ibmaxlen);
+ ret = -EINVAL;
+ goto bail; /* before writing pbc */
+ }
+ tmpbuf = vmalloc(plen);
+ if (!tmpbuf) {
+ dev_info(&dd->pcidev->dev, "Unable to allocate tmp buffer, "
+ "failing\n");
+ ret = -ENOMEM;
+ goto bail;
+ }
+
+ if (copy_from_user(tmpbuf,
+ (const void __user *) (unsigned long) dp.data,
+ dp.len)) {
+ ret = -EFAULT;
+ goto bail;
+ }
+
+ plen >>= 2; /* in dwords */
+
+ piobuf = ipath_getpiobuf(dd, plen, &pbufn);
+ if (!piobuf) {
+ ipath_cdbg(VERBOSE, "No PIO buffers avail unit for %u\n",
+ dd->ipath_unit);
+ ret = -EBUSY;
+ goto bail;
+ }
+ /* disarm it just to be extra sure */
+ ipath_disarm_piobufs(dd, pbufn, 1);
+
+ if (ipath_debug & __IPATH_PKTDBG)
+ ipath_cdbg(VERBOSE, "unit %u 0x%x+1w pio%d\n",
+ dd->ipath_unit, plen - 1, pbufn);
+
+ if (dp.pbc_wd == 0)
+ dp.pbc_wd = plen;
+ writeq(dp.pbc_wd, piobuf);
+ /*
+ * Copy all by the trigger word, then flush, so it's written
+ * to chip before trigger word, then write trigger word, then
+ * flush again, so packet is sent.
+ */
+ if (dd->ipath_flags & IPATH_PIO_FLUSH_WC) {
+ ipath_flush_wc();
+ __iowrite32_copy(piobuf + 2, tmpbuf, clen - 1);
+ ipath_flush_wc();
+ __raw_writel(tmpbuf[clen - 1], piobuf + clen + 1);
+ } else
+ __iowrite32_copy(piobuf + 2, tmpbuf, clen);
+
+ ipath_flush_wc();
+
+ ret = sizeof(dp);
+
+bail:
+ vfree(tmpbuf);
+ return ret;
+}
+
+static int ipath_diag_release(struct inode *in, struct file *fp)
+{
+ mutex_lock(&ipath_mutex);
+ ipath_diag_inuse = 0;
+ fp->private_data = NULL;
+ mutex_unlock(&ipath_mutex);
+ return 0;
+}
+
+static ssize_t ipath_diag_read(struct file *fp, char __user *data,
+ size_t count, loff_t *off)
+{
+ struct ipath_devdata *dd = fp->private_data;
+ void __iomem *kreg_base;
+ ssize_t ret;
+
+ kreg_base = dd->ipath_kregbase;
+
+ if (count == 0)
+ ret = 0;
+ else if ((count % 4) || (*off % 4))
+ /* address or length is not 32-bit aligned, hence invalid */
+ ret = -EINVAL;
+ else if (ipath_diag_inuse < 1 && (*off || count != 8))
+ ret = -EINVAL; /* prevent cat /dev/ipath_diag* */
+ else if ((count % 8) || (*off % 8))
+ /* address or length not 64-bit aligned; do 32-bit reads */
+ ret = ipath_read_umem32(dd, data, kreg_base + *off, count);
+ else
+ ret = ipath_read_umem64(dd, data, kreg_base + *off, count);
+
+ if (ret >= 0) {
+ *off += count;
+ ret = count;
+ if (ipath_diag_inuse == -2)
+ ipath_diag_inuse++;
+ }
+
+ return ret;
+}
+
+static ssize_t ipath_diag_write(struct file *fp, const char __user *data,
+ size_t count, loff_t *off)
+{
+ struct ipath_devdata *dd = fp->private_data;
+ void __iomem *kreg_base;
+ ssize_t ret;
+
+ kreg_base = dd->ipath_kregbase;
+
+ if (count == 0)
+ ret = 0;
+ else if ((count % 4) || (*off % 4))
+ /* address or length is not 32-bit aligned, hence invalid */
+ ret = -EINVAL;
+ else if ((ipath_diag_inuse == -1 && (*off || count != 8)) ||
+ ipath_diag_inuse == -2) /* read qw off 0, write qw off 0 */
+ ret = -EINVAL; /* before any other write allowed */
+ else if ((count % 8) || (*off % 8))
+ /* address or length not 64-bit aligned; do 32-bit writes */
+ ret = ipath_write_umem32(dd, kreg_base + *off, data, count);
+ else
+ ret = ipath_write_umem64(dd, kreg_base + *off, data, count);
+
+ if (ret >= 0) {
+ *off += count;
+ ret = count;
+ if (ipath_diag_inuse == -1)
+ ipath_diag_inuse = 1; /* all read/write OK now */
+ }
+
+ return ret;
+}
diff --git a/drivers/infiniband/hw/ipath/ipath_dma.c b/drivers/infiniband/hw/ipath/ipath_dma.c
new file mode 100644
index 0000000..e90a0ea
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_dma.c
@@ -0,0 +1,191 @@
+/*
+ * Copyright (c) 2006 QLogic, Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/scatterlist.h>
+#include <rdma/ib_verbs.h>
+
+#include "ipath_verbs.h"
+
+#define BAD_DMA_ADDRESS ((u64) 0)
+
+/*
+ * The following functions implement driver specific replacements
+ * for the ib_dma_*() functions.
+ *
+ * These functions return kernel virtual addresses instead of
+ * device bus addresses since the driver uses the CPU to copy
+ * data instead of using hardware DMA.
+ */
+
+static int ipath_mapping_error(struct ib_device *dev, u64 dma_addr)
+{
+ return dma_addr == BAD_DMA_ADDRESS;
+}
+
+static u64 ipath_dma_map_single(struct ib_device *dev,
+ void *cpu_addr, size_t size,
+ enum dma_data_direction direction)
+{
+ BUG_ON(!valid_dma_direction(direction));
+ return (u64) cpu_addr;
+}
+
+static void ipath_dma_unmap_single(struct ib_device *dev,
+ u64 addr, size_t size,
+ enum dma_data_direction direction)
+{
+ BUG_ON(!valid_dma_direction(direction));
+}
+
+static u64 ipath_dma_map_page(struct ib_device *dev,
+ struct page *page,
+ unsigned long offset,
+ size_t size,
+ enum dma_data_direction direction)
+{
+ u64 addr;
+
+ BUG_ON(!valid_dma_direction(direction));
+
+ if (offset + size > PAGE_SIZE) {
+ addr = BAD_DMA_ADDRESS;
+ goto done;
+ }
+
+ addr = (u64) page_address(page);
+ if (addr)
+ addr += offset;
+ /* TODO: handle highmem pages */
+
+done:
+ return addr;
+}
+
+static void ipath_dma_unmap_page(struct ib_device *dev,
+ u64 addr, size_t size,
+ enum dma_data_direction direction)
+{
+ BUG_ON(!valid_dma_direction(direction));
+}
+
+static int ipath_map_sg(struct ib_device *dev, struct scatterlist *sgl,
+ int nents, enum dma_data_direction direction)
+{
+ struct scatterlist *sg;
+ u64 addr;
+ int i;
+ int ret = nents;
+
+ BUG_ON(!valid_dma_direction(direction));
+
+ for_each_sg(sgl, sg, nents, i) {
+ addr = (u64) page_address(sg_page(sg));
+ /* TODO: handle highmem pages */
+ if (!addr) {
+ ret = 0;
+ break;
+ }
+ }
+ return ret;
+}
+
+static void ipath_unmap_sg(struct ib_device *dev,
+ struct scatterlist *sg, int nents,
+ enum dma_data_direction direction)
+{
+ BUG_ON(!valid_dma_direction(direction));
+}
+
+static u64 ipath_sg_dma_address(struct ib_device *dev, struct scatterlist *sg)
+{
+ u64 addr = (u64) page_address(sg_page(sg));
+
+ if (addr)
+ addr += sg->offset;
+ return addr;
+}
+
+static unsigned int ipath_sg_dma_len(struct ib_device *dev,
+ struct scatterlist *sg)
+{
+ return sg->length;
+}
+
+static void ipath_sync_single_for_cpu(struct ib_device *dev,
+ u64 addr,
+ size_t size,
+ enum dma_data_direction dir)
+{
+}
+
+static void ipath_sync_single_for_device(struct ib_device *dev,
+ u64 addr,
+ size_t size,
+ enum dma_data_direction dir)
+{
+}
+
+static void *ipath_dma_alloc_coherent(struct ib_device *dev, size_t size,
+ u64 *dma_handle, gfp_t flag)
+{
+ struct page *p;
+ void *addr = NULL;
+
+ p = alloc_pages(flag, get_order(size));
+ if (p)
+ addr = page_address(p);
+ if (dma_handle)
+ *dma_handle = (u64) addr;
+ return addr;
+}
+
+static void ipath_dma_free_coherent(struct ib_device *dev, size_t size,
+ void *cpu_addr, u64 dma_handle)
+{
+ free_pages((unsigned long) cpu_addr, get_order(size));
+}
+
+struct ib_dma_mapping_ops ipath_dma_mapping_ops = {
+ ipath_mapping_error,
+ ipath_dma_map_single,
+ ipath_dma_unmap_single,
+ ipath_dma_map_page,
+ ipath_dma_unmap_page,
+ ipath_map_sg,
+ ipath_unmap_sg,
+ ipath_sg_dma_address,
+ ipath_sg_dma_len,
+ ipath_sync_single_for_cpu,
+ ipath_sync_single_for_device,
+ ipath_dma_alloc_coherent,
+ ipath_dma_free_coherent
+};
diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c
new file mode 100644
index 0000000..ad0aab6
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_driver.c
@@ -0,0 +1,2807 @@
+/*
+ * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/spinlock.h>
+#include <linux/idr.h>
+#include <linux/pci.h>
+#include <linux/io.h>
+#include <linux/delay.h>
+#include <linux/netdevice.h>
+#include <linux/vmalloc.h>
+
+#include "ipath_kernel.h"
+#include "ipath_verbs.h"
+
+static void ipath_update_pio_bufs(struct ipath_devdata *);
+
+const char *ipath_get_unit_name(int unit)
+{
+ static char iname[16];
+ snprintf(iname, sizeof iname, "infinipath%u", unit);
+ return iname;
+}
+
+#define DRIVER_LOAD_MSG "QLogic " IPATH_DRV_NAME " loaded: "
+#define PFX IPATH_DRV_NAME ": "
+
+/*
+ * The size has to be longer than this string, so we can append
+ * board/chip information to it in the init code.
+ */
+const char ib_ipath_version[] = IPATH_IDSTR "\n";
+
+static struct idr unit_table;
+DEFINE_SPINLOCK(ipath_devs_lock);
+LIST_HEAD(ipath_dev_list);
+
+wait_queue_head_t ipath_state_wait;
+
+unsigned ipath_debug = __IPATH_INFO;
+
+module_param_named(debug, ipath_debug, uint, S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(debug, "mask for debug prints");
+EXPORT_SYMBOL_GPL(ipath_debug);
+
+unsigned ipath_mtu4096 = 1; /* max 4KB IB mtu by default, if supported */
+module_param_named(mtu4096, ipath_mtu4096, uint, S_IRUGO);
+MODULE_PARM_DESC(mtu4096, "enable MTU of 4096 bytes, if supported");
+
+static unsigned ipath_hol_timeout_ms = 13000;
+module_param_named(hol_timeout_ms, ipath_hol_timeout_ms, uint, S_IRUGO);
+MODULE_PARM_DESC(hol_timeout_ms,
+ "duration of user app suspension after link failure");
+
+unsigned ipath_linkrecovery = 1;
+module_param_named(linkrecovery, ipath_linkrecovery, uint, S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(linkrecovery, "enable workaround for link recovery issue");
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("QLogic <support@qlogic.com>");
+MODULE_DESCRIPTION("QLogic InfiniPath driver");
+
+/*
+ * Table to translate the LINKTRAININGSTATE portion of
+ * IBCStatus to a human-readable form.
+ */
+const char *ipath_ibcstatus_str[] = {
+ "Disabled",
+ "LinkUp",
+ "PollActive",
+ "PollQuiet",
+ "SleepDelay",
+ "SleepQuiet",
+ "LState6", /* unused */
+ "LState7", /* unused */
+ "CfgDebounce",
+ "CfgRcvfCfg",
+ "CfgWaitRmt",
+ "CfgIdle",
+ "RecovRetrain",
+ "CfgTxRevLane", /* unused before IBA7220 */
+ "RecovWaitRmt",
+ "RecovIdle",
+ /* below were added for IBA7220 */
+ "CfgEnhanced",
+ "CfgTest",
+ "CfgWaitRmtTest",
+ "CfgWaitCfgEnhanced",
+ "SendTS_T",
+ "SendTstIdles",
+ "RcvTS_T",
+ "SendTst_TS1s",
+ "LTState18", "LTState19", "LTState1A", "LTState1B",
+ "LTState1C", "LTState1D", "LTState1E", "LTState1F"
+};
+
+static void __devexit ipath_remove_one(struct pci_dev *);
+static int __devinit ipath_init_one(struct pci_dev *,
+ const struct pci_device_id *);
+
+/* Only needed for registration, nothing else needs this info */
+#define PCI_VENDOR_ID_PATHSCALE 0x1fc1
+#define PCI_VENDOR_ID_QLOGIC 0x1077
+#define PCI_DEVICE_ID_INFINIPATH_HT 0xd
+#define PCI_DEVICE_ID_INFINIPATH_PE800 0x10
+#define PCI_DEVICE_ID_INFINIPATH_7220 0x7220
+
+/* Number of seconds before our card status check... */
+#define STATUS_TIMEOUT 60
+
+static const struct pci_device_id ipath_pci_tbl[] = {
+ { PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE, PCI_DEVICE_ID_INFINIPATH_HT) },
+ { PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE, PCI_DEVICE_ID_INFINIPATH_PE800) },
+ { PCI_DEVICE(PCI_VENDOR_ID_QLOGIC, PCI_DEVICE_ID_INFINIPATH_7220) },
+ { 0, }
+};
+
+MODULE_DEVICE_TABLE(pci, ipath_pci_tbl);
+
+static struct pci_driver ipath_driver = {
+ .name = IPATH_DRV_NAME,
+ .probe = ipath_init_one,
+ .remove = __devexit_p(ipath_remove_one),
+ .id_table = ipath_pci_tbl,
+ .driver = {
+ .groups = ipath_driver_attr_groups,
+ },
+};
+
+static inline void read_bars(struct ipath_devdata *dd, struct pci_dev *dev,
+ u32 *bar0, u32 *bar1)
+{
+ int ret;
+
+ ret = pci_read_config_dword(dev, PCI_BASE_ADDRESS_0, bar0);
+ if (ret)
+ ipath_dev_err(dd, "failed to read bar0 before enable: "
+ "error %d\n", -ret);
+
+ ret = pci_read_config_dword(dev, PCI_BASE_ADDRESS_1, bar1);
+ if (ret)
+ ipath_dev_err(dd, "failed to read bar1 before enable: "
+ "error %d\n", -ret);
+
+ ipath_dbg("Read bar0 %x bar1 %x\n", *bar0, *bar1);
+}
+
+static void ipath_free_devdata(struct pci_dev *pdev,
+ struct ipath_devdata *dd)
+{
+ unsigned long flags;
+
+ pci_set_drvdata(pdev, NULL);
+
+ if (dd->ipath_unit != -1) {
+ spin_lock_irqsave(&ipath_devs_lock, flags);
+ idr_remove(&unit_table, dd->ipath_unit);
+ list_del(&dd->ipath_list);
+ spin_unlock_irqrestore(&ipath_devs_lock, flags);
+ }
+ vfree(dd);
+}
+
+static struct ipath_devdata *ipath_alloc_devdata(struct pci_dev *pdev)
+{
+ unsigned long flags;
+ struct ipath_devdata *dd;
+ int ret;
+
+ if (!idr_pre_get(&unit_table, GFP_KERNEL)) {
+ dd = ERR_PTR(-ENOMEM);
+ goto bail;
+ }
+
+ dd = vmalloc(sizeof(*dd));
+ if (!dd) {
+ dd = ERR_PTR(-ENOMEM);
+ goto bail;
+ }
+ memset(dd, 0, sizeof(*dd));
+ dd->ipath_unit = -1;
+
+ spin_lock_irqsave(&ipath_devs_lock, flags);
+
+ ret = idr_get_new(&unit_table, dd, &dd->ipath_unit);
+ if (ret < 0) {
+ printk(KERN_ERR IPATH_DRV_NAME
+ ": Could not allocate unit ID: error %d\n", -ret);
+ ipath_free_devdata(pdev, dd);
+ dd = ERR_PTR(ret);
+ goto bail_unlock;
+ }
+
+ dd->pcidev = pdev;
+ pci_set_drvdata(pdev, dd);
+
+ list_add(&dd->ipath_list, &ipath_dev_list);
+
+bail_unlock:
+ spin_unlock_irqrestore(&ipath_devs_lock, flags);
+
+bail:
+ return dd;
+}
+
+static inline struct ipath_devdata *__ipath_lookup(int unit)
+{
+ return idr_find(&unit_table, unit);
+}
+
+struct ipath_devdata *ipath_lookup(int unit)
+{
+ struct ipath_devdata *dd;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ipath_devs_lock, flags);
+ dd = __ipath_lookup(unit);
+ spin_unlock_irqrestore(&ipath_devs_lock, flags);
+
+ return dd;
+}
+
+int ipath_count_units(int *npresentp, int *nupp, int *maxportsp)
+{
+ int nunits, npresent, nup;
+ struct ipath_devdata *dd;
+ unsigned long flags;
+ int maxports;
+
+ nunits = npresent = nup = maxports = 0;
+
+ spin_lock_irqsave(&ipath_devs_lock, flags);
+
+ list_for_each_entry(dd, &ipath_dev_list, ipath_list) {
+ nunits++;
+ if ((dd->ipath_flags & IPATH_PRESENT) && dd->ipath_kregbase)
+ npresent++;
+ if (dd->ipath_lid &&
+ !(dd->ipath_flags & (IPATH_DISABLED | IPATH_LINKDOWN
+ | IPATH_LINKUNK)))
+ nup++;
+ if (dd->ipath_cfgports > maxports)
+ maxports = dd->ipath_cfgports;
+ }
+
+ spin_unlock_irqrestore(&ipath_devs_lock, flags);
+
+ if (npresentp)
+ *npresentp = npresent;
+ if (nupp)
+ *nupp = nup;
+ if (maxportsp)
+ *maxportsp = maxports;
+
+ return nunits;
+}
+
+/*
+ * These next two routines are placeholders in case we don't have per-arch
+ * code for controlling write combining. If explicit control of write
+ * combining is not available, performance will probably be awful.
+ */
+
+int __attribute__((weak)) ipath_enable_wc(struct ipath_devdata *dd)
+{
+ return -EOPNOTSUPP;
+}
+
+void __attribute__((weak)) ipath_disable_wc(struct ipath_devdata *dd)
+{
+}
+
+/*
+ * Perform a PIO buffer bandwidth write test, to verify proper system
+ * configuration. Even when all the setup calls work, occasionally
+ * BIOS or other issues can prevent write combining from working, or
+ * can cause other bandwidth problems to the chip.
+ *
+ * This test simply writes the same buffer over and over again, and
+ * measures close to the peak bandwidth to the chip (not testing
+ * data bandwidth to the wire). On chips that use an address-based
+ * trigger to send packets to the wire, this is easy. On chips that
+ * use a count to trigger, we want to make sure that the packet doesn't
+ * go out on the wire, or trigger flow control checks.
+ */
+static void ipath_verify_pioperf(struct ipath_devdata *dd)
+{
+ u32 pbnum, cnt, lcnt;
+ u32 __iomem *piobuf;
+ u32 *addr;
+ u64 msecs, emsecs;
+
+ piobuf = ipath_getpiobuf(dd, 0, &pbnum);
+ if (!piobuf) {
+ dev_info(&dd->pcidev->dev,
+ "No PIObufs for checking perf, skipping\n");
+ return;
+ }
+
+ /*
+ * Enough to give us a reasonable test, less than piobuf size, and
+ * likely multiple of store buffer length.
+ */
+ cnt = 1024;
+
+ addr = vmalloc(cnt);
+ if (!addr) {
+ dev_info(&dd->pcidev->dev,
+ "Couldn't get memory for checking PIO perf,"
+ " skipping\n");
+ goto done;
+ }
+
+ preempt_disable(); /* we want reasonably accurate elapsed time */
+ msecs = 1 + jiffies_to_msecs(jiffies);
+ for (lcnt = 0; lcnt < 10000U; lcnt++) {
+ /* wait until we cross msec boundary */
+ if (jiffies_to_msecs(jiffies) >= msecs)
+ break;
+ udelay(1);
+ }
+
+ ipath_disable_armlaunch(dd);
+
+ /*
+ * length 0, no dwords actually sent, and mark as VL15
+ * on chips where that may matter (due to IB flowcontrol)
+ */
+ if ((dd->ipath_flags & IPATH_HAS_PBC_CNT))
+ writeq(1UL << 63, piobuf);
+ else
+ writeq(0, piobuf);
+ ipath_flush_wc();
+
+ /*
+ * this is only roughly accurate, since even with preempt we
+ * still take interrupts that could take a while. Running for
+ * >= 5 msec seems to get us "close enough" to accurate values
+ */
+ msecs = jiffies_to_msecs(jiffies);
+ for (emsecs = lcnt = 0; emsecs <= 5UL; lcnt++) {
+ __iowrite32_copy(piobuf + 64, addr, cnt >> 2);
+ emsecs = jiffies_to_msecs(jiffies) - msecs;
+ }
+
+ /* 1 GiB/sec, slightly over IB SDR line rate */
+ if (lcnt < (emsecs * 1024U))
+ ipath_dev_err(dd,
+ "Performance problem: bandwidth to PIO buffers is "
+ "only %u MiB/sec\n",
+ lcnt / (u32) emsecs);
+ else
+ ipath_dbg("PIO buffer bandwidth %u MiB/sec is OK\n",
+ lcnt / (u32) emsecs);
+
+ preempt_enable();
+
+ vfree(addr);
+
+done:
+ /* disarm piobuf, so it's available again */
+ ipath_disarm_piobufs(dd, pbnum, 1);
+ ipath_enable_armlaunch(dd);
+}
+
+static int __devinit ipath_init_one(struct pci_dev *pdev,
+ const struct pci_device_id *ent)
+{
+ int ret, len, j;
+ struct ipath_devdata *dd;
+ unsigned long long addr;
+ u32 bar0 = 0, bar1 = 0;
+ u8 rev;
+
+ dd = ipath_alloc_devdata(pdev);
+ if (IS_ERR(dd)) {
+ ret = PTR_ERR(dd);
+ printk(KERN_ERR IPATH_DRV_NAME
+ ": Could not allocate devdata: error %d\n", -ret);
+ goto bail;
+ }
+
+ ipath_cdbg(VERBOSE, "initializing unit #%u\n", dd->ipath_unit);
+
+ ret = pci_enable_device(pdev);
+ if (ret) {
+ /* This can happen iff:
+ *
+ * We did a chip reset, and then failed to reprogram the
+ * BAR, or the chip reset due to an internal error. We then
+ * unloaded the driver and reloaded it.
+ *
+ * Both reset cases set the BAR back to initial state. For
+ * the latter case, the AER sticky error bit at offset 0x718
+ * should be set, but the Linux kernel doesn't yet know
+ * about that, it appears. If the original BAR was retained
+ * in the kernel data structures, this may be OK.
+ */
+ ipath_dev_err(dd, "enable unit %d failed: error %d\n",
+ dd->ipath_unit, -ret);
+ goto bail_devdata;
+ }
+ addr = pci_resource_start(pdev, 0);
+ len = pci_resource_len(pdev, 0);
+ ipath_cdbg(VERBOSE, "regbase (0) %llx len %d irq %d, vend %x/%x "
+ "driver_data %lx\n", addr, len, pdev->irq, ent->vendor,
+ ent->device, ent->driver_data);
+
+ read_bars(dd, pdev, &bar0, &bar1);
+
+ if (!bar1 && !(bar0 & ~0xf)) {
+ if (addr) {
+ dev_info(&pdev->dev, "BAR is 0 (probable RESET), "
+ "rewriting as %llx\n", addr);
+ ret = pci_write_config_dword(
+ pdev, PCI_BASE_ADDRESS_0, addr);
+ if (ret) {
+ ipath_dev_err(dd, "rewrite of BAR0 "
+ "failed: err %d\n", -ret);
+ goto bail_disable;
+ }
+ ret = pci_write_config_dword(
+ pdev, PCI_BASE_ADDRESS_1, addr >> 32);
+ if (ret) {
+ ipath_dev_err(dd, "rewrite of BAR1 "
+ "failed: err %d\n", -ret);
+ goto bail_disable;
+ }
+ } else {
+ ipath_dev_err(dd, "BAR is 0 (probable RESET), "
+ "not usable until reboot\n");
+ ret = -ENODEV;
+ goto bail_disable;
+ }
+ }
+
+ ret = pci_request_regions(pdev, IPATH_DRV_NAME);
+ if (ret) {
+ dev_info(&pdev->dev, "pci_request_regions unit %u fails: "
+ "err %d\n", dd->ipath_unit, -ret);
+ goto bail_disable;
+ }
+
+ ret = pci_set_dma_mask(pdev, DMA_64BIT_MASK);
+ if (ret) {
+ /*
+ * if the 64 bit setup fails, try 32 bit. Some systems
+ * do not setup 64 bit maps on systems with 2GB or less
+ * memory installed.
+ */
+ ret = pci_set_dma_mask(pdev, DMA_32BIT_MASK);
+ if (ret) {
+ dev_info(&pdev->dev,
+ "Unable to set DMA mask for unit %u: %d\n",
+ dd->ipath_unit, ret);
+ goto bail_regions;
+ }
+ else {
+ ipath_dbg("No 64bit DMA mask, used 32 bit mask\n");
+ ret = pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK);
+ if (ret)
+ dev_info(&pdev->dev,
+ "Unable to set DMA consistent mask "
+ "for unit %u: %d\n",
+ dd->ipath_unit, ret);
+
+ }
+ }
+ else {
+ ret = pci_set_consistent_dma_mask(pdev, DMA_64BIT_MASK);
+ if (ret)
+ dev_info(&pdev->dev,
+ "Unable to set DMA consistent mask "
+ "for unit %u: %d\n",
+ dd->ipath_unit, ret);
+ }
+
+ pci_set_master(pdev);
+
+ /*
+ * Save BARs to rewrite after device reset. Save all 64 bits of
+ * BAR, just in case.
+ */
+ dd->ipath_pcibar0 = addr;
+ dd->ipath_pcibar1 = addr >> 32;
+ dd->ipath_deviceid = ent->device; /* save for later use */
+ dd->ipath_vendorid = ent->vendor;
+
+ /* setup the chip-specific functions, as early as possible. */
+ switch (ent->device) {
+ case PCI_DEVICE_ID_INFINIPATH_HT:
+#ifdef CONFIG_HT_IRQ
+ ipath_init_iba6110_funcs(dd);
+ break;
+#else
+ ipath_dev_err(dd, "QLogic HT device 0x%x cannot work if "
+ "CONFIG_HT_IRQ is not enabled\n", ent->device);
+ return -ENODEV;
+#endif
+ case PCI_DEVICE_ID_INFINIPATH_PE800:
+#ifdef CONFIG_PCI_MSI
+ ipath_init_iba6120_funcs(dd);
+ break;
+#else
+ ipath_dev_err(dd, "QLogic PCIE device 0x%x cannot work if "
+ "CONFIG_PCI_MSI is not enabled\n", ent->device);
+ return -ENODEV;
+#endif
+ case PCI_DEVICE_ID_INFINIPATH_7220:
+#ifndef CONFIG_PCI_MSI
+ ipath_dbg("CONFIG_PCI_MSI is not enabled, "
+ "using INTx for unit %u\n", dd->ipath_unit);
+#endif
+ ipath_init_iba7220_funcs(dd);
+ break;
+ default:
+ ipath_dev_err(dd, "Found unknown QLogic deviceid 0x%x, "
+ "failing\n", ent->device);
+ return -ENODEV;
+ }
+
+ for (j = 0; j < 6; j++) {
+ if (!pdev->resource[j].start)
+ continue;
+ ipath_cdbg(VERBOSE, "BAR %d start %llx, end %llx, len %llx\n",
+ j, (unsigned long long)pdev->resource[j].start,
+ (unsigned long long)pdev->resource[j].end,
+ (unsigned long long)pci_resource_len(pdev, j));
+ }
+
+ if (!addr) {
+ ipath_dev_err(dd, "No valid address in BAR 0!\n");
+ ret = -ENODEV;
+ goto bail_regions;
+ }
+
+ ret = pci_read_config_byte(pdev, PCI_REVISION_ID, &rev);
+ if (ret) {
+ ipath_dev_err(dd, "Failed to read PCI revision ID unit "
+ "%u: err %d\n", dd->ipath_unit, -ret);
+ goto bail_regions; /* shouldn't ever happen */
+ }
+ dd->ipath_pcirev = rev;
+
+#if defined(__powerpc__)
+ /* There isn't a generic way to specify writethrough mappings */
+ dd->ipath_kregbase = __ioremap(addr, len,
+ (_PAGE_NO_CACHE|_PAGE_WRITETHRU));
+#else
+ dd->ipath_kregbase = ioremap_nocache(addr, len);
+#endif
+
+ if (!dd->ipath_kregbase) {
+ ipath_dbg("Unable to map io addr %llx to kvirt, failing\n",
+ addr);
+ ret = -ENOMEM;
+ goto bail_iounmap;
+ }
+ dd->ipath_kregend = (u64 __iomem *)
+ ((void __iomem *)dd->ipath_kregbase + len);
+ dd->ipath_physaddr = addr; /* used for io_remap, etc. */
+ /* for user mmap */
+ ipath_cdbg(VERBOSE, "mapped io addr %llx to kregbase %p\n",
+ addr, dd->ipath_kregbase);
+
+ if (dd->ipath_f_bus(dd, pdev))
+ ipath_dev_err(dd, "Failed to setup config space; "
+ "continuing anyway\n");
+
+ /*
+ * set up our interrupt handler; IRQF_SHARED probably not needed,
+ * since MSI interrupts shouldn't be shared but won't hurt for now.
+ * check 0 irq after we return from chip-specific bus setup, since
+ * that can affect this due to setup
+ */
+ if (!dd->ipath_irq)
+ ipath_dev_err(dd, "irq is 0, BIOS error? Interrupts won't "
+ "work\n");
+ else {
+ ret = request_irq(dd->ipath_irq, ipath_intr, IRQF_SHARED,
+ IPATH_DRV_NAME, dd);
+ if (ret) {
+ ipath_dev_err(dd, "Couldn't setup irq handler, "
+ "irq=%d: %d\n", dd->ipath_irq, ret);
+ goto bail_iounmap;
+ }
+ }
+
+ ret = ipath_init_chip(dd, 0); /* do the chip-specific init */
+ if (ret)
+ goto bail_irqsetup;
+
+ ret = ipath_enable_wc(dd);
+
+ if (ret) {
+ ipath_dev_err(dd, "Write combining not enabled "
+ "(err %d): performance may be poor\n",
+ -ret);
+ ret = 0;
+ }
+
+ ipath_verify_pioperf(dd);
+
+ ipath_device_create_group(&pdev->dev, dd);
+ ipathfs_add_device(dd);
+ ipath_user_add(dd);
+ ipath_diag_add(dd);
+ ipath_register_ib_device(dd);
+
+ goto bail;
+
+bail_irqsetup:
+ if (pdev->irq)
+ free_irq(pdev->irq, dd);
+
+bail_iounmap:
+ iounmap((volatile void __iomem *) dd->ipath_kregbase);
+
+bail_regions:
+ pci_release_regions(pdev);
+
+bail_disable:
+ pci_disable_device(pdev);
+
+bail_devdata:
+ ipath_free_devdata(pdev, dd);
+
+bail:
+ return ret;
+}
+
+static void __devexit cleanup_device(struct ipath_devdata *dd)
+{
+ int port;
+
+ if (*dd->ipath_statusp & IPATH_STATUS_CHIP_PRESENT) {
+ /* can't do anything more with chip; needs re-init */
+ *dd->ipath_statusp &= ~IPATH_STATUS_CHIP_PRESENT;
+ if (dd->ipath_kregbase) {
+ /*
+ * if we haven't already cleaned up before these are
+ * to ensure any register reads/writes "fail" until
+ * re-init
+ */
+ dd->ipath_kregbase = NULL;
+ dd->ipath_uregbase = 0;
+ dd->ipath_sregbase = 0;
+ dd->ipath_cregbase = 0;
+ dd->ipath_kregsize = 0;
+ }
+ ipath_disable_wc(dd);
+ }
+
+ if (dd->ipath_spectriggerhit)
+ dev_info(&dd->pcidev->dev, "%lu special trigger hits\n",
+ dd->ipath_spectriggerhit);
+
+ if (dd->ipath_pioavailregs_dma) {
+ dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
+ (void *) dd->ipath_pioavailregs_dma,
+ dd->ipath_pioavailregs_phys);
+ dd->ipath_pioavailregs_dma = NULL;
+ }
+ if (dd->ipath_dummy_hdrq) {
+ dma_free_coherent(&dd->pcidev->dev,
+ dd->ipath_pd[0]->port_rcvhdrq_size,
+ dd->ipath_dummy_hdrq, dd->ipath_dummy_hdrq_phys);
+ dd->ipath_dummy_hdrq = NULL;
+ }
+
+ if (dd->ipath_pageshadow) {
+ struct page **tmpp = dd->ipath_pageshadow;
+ dma_addr_t *tmpd = dd->ipath_physshadow;
+ int i, cnt = 0;
+
+ ipath_cdbg(VERBOSE, "Unlocking any expTID pages still "
+ "locked\n");
+ for (port = 0; port < dd->ipath_cfgports; port++) {
+ int port_tidbase = port * dd->ipath_rcvtidcnt;
+ int maxtid = port_tidbase + dd->ipath_rcvtidcnt;
+ for (i = port_tidbase; i < maxtid; i++) {
+ if (!tmpp[i])
+ continue;
+ pci_unmap_page(dd->pcidev, tmpd[i],
+ PAGE_SIZE, PCI_DMA_FROMDEVICE);
+ ipath_release_user_pages(&tmpp[i], 1);
+ tmpp[i] = NULL;
+ cnt++;
+ }
+ }
+ if (cnt) {
+ ipath_stats.sps_pageunlocks += cnt;
+ ipath_cdbg(VERBOSE, "There were still %u expTID "
+ "entries locked\n", cnt);
+ }
+ if (ipath_stats.sps_pagelocks ||
+ ipath_stats.sps_pageunlocks)
+ ipath_cdbg(VERBOSE, "%llu pages locked, %llu "
+ "unlocked via ipath_m{un}lock\n",
+ (unsigned long long)
+ ipath_stats.sps_pagelocks,
+ (unsigned long long)
+ ipath_stats.sps_pageunlocks);
+
+ ipath_cdbg(VERBOSE, "Free shadow page tid array at %p\n",
+ dd->ipath_pageshadow);
+ tmpp = dd->ipath_pageshadow;
+ dd->ipath_pageshadow = NULL;
+ vfree(tmpp);
+
+ dd->ipath_egrtidbase = NULL;
+ }
+
+ /*
+ * free any resources still in use (usually just kernel ports)
+ * at unload; we do for portcnt, not cfgports, because cfgports
+ * could have changed while we were loaded.
+ */
+ for (port = 0; port < dd->ipath_portcnt; port++) {
+ struct ipath_portdata *pd = dd->ipath_pd[port];
+ dd->ipath_pd[port] = NULL;
+ ipath_free_pddata(dd, pd);
+ }
+ kfree(dd->ipath_pd);
+ /*
+ * debuggability, in case some cleanup path tries to use it
+ * after this
+ */
+ dd->ipath_pd = NULL;
+}
+
+static void __devexit ipath_remove_one(struct pci_dev *pdev)
+{
+ struct ipath_devdata *dd = pci_get_drvdata(pdev);
+
+ ipath_cdbg(VERBOSE, "removing, pdev=%p, dd=%p\n", pdev, dd);
+
+ /*
+ * disable the IB link early, to be sure no new packets arrive, which
+ * complicates the shutdown process
+ */
+ ipath_shutdown_device(dd);
+
+ flush_scheduled_work();
+
+ if (dd->verbs_dev)
+ ipath_unregister_ib_device(dd->verbs_dev);
+
+ ipath_diag_remove(dd);
+ ipath_user_remove(dd);
+ ipathfs_remove_device(dd);
+ ipath_device_remove_group(&pdev->dev, dd);
+
+ ipath_cdbg(VERBOSE, "Releasing pci memory regions, dd %p, "
+ "unit %u\n", dd, (u32) dd->ipath_unit);
+
+ cleanup_device(dd);
+
+ /*
+ * turn off rcv, send, and interrupts for all ports, all drivers
+ * should also hard reset the chip here?
+ * free up port 0 (kernel) rcvhdr, egr bufs, and eventually tid bufs
+ * for all versions of the driver, if they were allocated
+ */
+ if (dd->ipath_irq) {
+ ipath_cdbg(VERBOSE, "unit %u free irq %d\n",
+ dd->ipath_unit, dd->ipath_irq);
+ dd->ipath_f_free_irq(dd);
+ } else
+ ipath_dbg("irq is 0, not doing free_irq "
+ "for unit %u\n", dd->ipath_unit);
+ /*
+ * we check for NULL here, because it's outside
+ * the kregbase check, and we need to call it
+ * after the free_irq. Thus it's possible that
+ * the function pointers were never initialized.
+ */
+ if (dd->ipath_f_cleanup)
+ /* clean up chip-specific stuff */
+ dd->ipath_f_cleanup(dd);
+
+ ipath_cdbg(VERBOSE, "Unmapping kregbase %p\n", dd->ipath_kregbase);
+ iounmap((volatile void __iomem *) dd->ipath_kregbase);
+ pci_release_regions(pdev);
+ ipath_cdbg(VERBOSE, "calling pci_disable_device\n");
+ pci_disable_device(pdev);
+
+ ipath_free_devdata(pdev, dd);
+}
+
+/* general driver use */
+DEFINE_MUTEX(ipath_mutex);
+
+static DEFINE_SPINLOCK(ipath_pioavail_lock);
+
+/**
+ * ipath_disarm_piobufs - cancel a range of PIO buffers
+ * @dd: the infinipath device
+ * @first: the first PIO buffer to cancel
+ * @cnt: the number of PIO buffers to cancel
+ *
+ * cancel a range of PIO buffers, used when they might be armed, but
+ * not triggered. Used at init to ensure buffer state, and also user
+ * process close, in case it died while writing to a PIO buffer
+ * Also after errors.
+ */
+void ipath_disarm_piobufs(struct ipath_devdata *dd, unsigned first,
+ unsigned cnt)
+{
+ unsigned i, last = first + cnt;
+ unsigned long flags;
+
+ ipath_cdbg(PKT, "disarm %u PIObufs first=%u\n", cnt, first);
+ for (i = first; i < last; i++) {
+ spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
+ /*
+ * The disarm-related bits are write-only, so it
+ * is ok to OR them in with our copy of sendctrl
+ * while we hold the lock.
+ */
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
+ dd->ipath_sendctrl | INFINIPATH_S_DISARM |
+ (i << INFINIPATH_S_DISARMPIOBUF_SHIFT));
+ /* can't disarm bufs back-to-back per iba7220 spec */
+ ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+ spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
+ }
+ /* on some older chips, update may not happen after cancel */
+ ipath_force_pio_avail_update(dd);
+}
+
+/**
+ * ipath_wait_linkstate - wait for an IB link state change to occur
+ * @dd: the infinipath device
+ * @state: the state to wait for
+ * @msecs: the number of milliseconds to wait
+ *
+ * wait up to msecs milliseconds for IB link state change to occur for
+ * now, take the easy polling route. Currently used only by
+ * ipath_set_linkstate. Returns 0 if state reached, otherwise
+ * -ETIMEDOUT state can have multiple states set, for any of several
+ * transitions.
+ */
+int ipath_wait_linkstate(struct ipath_devdata *dd, u32 state, int msecs)
+{
+ dd->ipath_state_wanted = state;
+ wait_event_interruptible_timeout(ipath_state_wait,
+ (dd->ipath_flags & state),
+ msecs_to_jiffies(msecs));
+ dd->ipath_state_wanted = 0;
+
+ if (!(dd->ipath_flags & state)) {
+ u64 val;
+ ipath_cdbg(VERBOSE, "Didn't reach linkstate %s within %u"
+ " ms\n",
+ /* test INIT ahead of DOWN, both can be set */
+ (state & IPATH_LINKINIT) ? "INIT" :
+ ((state & IPATH_LINKDOWN) ? "DOWN" :
+ ((state & IPATH_LINKARMED) ? "ARM" : "ACTIVE")),
+ msecs);
+ val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);
+ ipath_cdbg(VERBOSE, "ibcc=%llx ibcstatus=%llx (%s)\n",
+ (unsigned long long) ipath_read_kreg64(
+ dd, dd->ipath_kregs->kr_ibcctrl),
+ (unsigned long long) val,
+ ipath_ibcstatus_str[val & dd->ibcs_lts_mask]);
+ }
+ return (dd->ipath_flags & state) ? 0 : -ETIMEDOUT;
+}
+
+static void decode_sdma_errs(struct ipath_devdata *dd, ipath_err_t err,
+ char *buf, size_t blen)
+{
+ static const struct {
+ ipath_err_t err;
+ const char *msg;
+ } errs[] = {
+ { INFINIPATH_E_SDMAGENMISMATCH, "SDmaGenMismatch" },
+ { INFINIPATH_E_SDMAOUTOFBOUND, "SDmaOutOfBound" },
+ { INFINIPATH_E_SDMATAILOUTOFBOUND, "SDmaTailOutOfBound" },
+ { INFINIPATH_E_SDMABASE, "SDmaBase" },
+ { INFINIPATH_E_SDMA1STDESC, "SDma1stDesc" },
+ { INFINIPATH_E_SDMARPYTAG, "SDmaRpyTag" },
+ { INFINIPATH_E_SDMADWEN, "SDmaDwEn" },
+ { INFINIPATH_E_SDMAMISSINGDW, "SDmaMissingDw" },
+ { INFINIPATH_E_SDMAUNEXPDATA, "SDmaUnexpData" },
+ { INFINIPATH_E_SDMADESCADDRMISALIGN, "SDmaDescAddrMisalign" },
+ { INFINIPATH_E_SENDBUFMISUSE, "SendBufMisuse" },
+ { INFINIPATH_E_SDMADISABLED, "SDmaDisabled" },
+ };
+ int i;
+ int expected;
+ size_t bidx = 0;
+
+ for (i = 0; i < ARRAY_SIZE(errs); i++) {
+ expected = (errs[i].err != INFINIPATH_E_SDMADISABLED) ? 0 :
+ test_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status);
+ if ((err & errs[i].err) && !expected)
+ bidx += snprintf(buf + bidx, blen - bidx,
+ "%s ", errs[i].msg);
+ }
+}
+
+/*
+ * Decode the error status into strings, deciding whether to always
+ * print * it or not depending on "normal packet errors" vs everything
+ * else. Return 1 if "real" errors, otherwise 0 if only packet
+ * errors, so caller can decide what to print with the string.
+ */
+int ipath_decode_err(struct ipath_devdata *dd, char *buf, size_t blen,
+ ipath_err_t err)
+{
+ int iserr = 1;
+ *buf = '\0';
+ if (err & INFINIPATH_E_PKTERRS) {
+ if (!(err & ~INFINIPATH_E_PKTERRS))
+ iserr = 0; // if only packet errors.
+ if (ipath_debug & __IPATH_ERRPKTDBG) {
+ if (err & INFINIPATH_E_REBP)
+ strlcat(buf, "EBP ", blen);
+ if (err & INFINIPATH_E_RVCRC)
+ strlcat(buf, "VCRC ", blen);
+ if (err & INFINIPATH_E_RICRC) {
+ strlcat(buf, "CRC ", blen);
+ // clear for check below, so only once
+ err &= INFINIPATH_E_RICRC;
+ }
+ if (err & INFINIPATH_E_RSHORTPKTLEN)
+ strlcat(buf, "rshortpktlen ", blen);
+ if (err & INFINIPATH_E_SDROPPEDDATAPKT)
+ strlcat(buf, "sdroppeddatapkt ", blen);
+ if (err & INFINIPATH_E_SPKTLEN)
+ strlcat(buf, "spktlen ", blen);
+ }
+ if ((err & INFINIPATH_E_RICRC) &&
+ !(err&(INFINIPATH_E_RVCRC|INFINIPATH_E_REBP)))
+ strlcat(buf, "CRC ", blen);
+ if (!iserr)
+ goto done;
+ }
+ if (err & INFINIPATH_E_RHDRLEN)
+ strlcat(buf, "rhdrlen ", blen);
+ if (err & INFINIPATH_E_RBADTID)
+ strlcat(buf, "rbadtid ", blen);
+ if (err & INFINIPATH_E_RBADVERSION)
+ strlcat(buf, "rbadversion ", blen);
+ if (err & INFINIPATH_E_RHDR)
+ strlcat(buf, "rhdr ", blen);
+ if (err & INFINIPATH_E_SENDSPECIALTRIGGER)
+ strlcat(buf, "sendspecialtrigger ", blen);
+ if (err & INFINIPATH_E_RLONGPKTLEN)
+ strlcat(buf, "rlongpktlen ", blen);
+ if (err & INFINIPATH_E_RMAXPKTLEN)
+ strlcat(buf, "rmaxpktlen ", blen);
+ if (err & INFINIPATH_E_RMINPKTLEN)
+ strlcat(buf, "rminpktlen ", blen);
+ if (err & INFINIPATH_E_SMINPKTLEN)
+ strlcat(buf, "sminpktlen ", blen);
+ if (err & INFINIPATH_E_RFORMATERR)
+ strlcat(buf, "rformaterr ", blen);
+ if (err & INFINIPATH_E_RUNSUPVL)
+ strlcat(buf, "runsupvl ", blen);
+ if (err & INFINIPATH_E_RUNEXPCHAR)
+ strlcat(buf, "runexpchar ", blen);
+ if (err & INFINIPATH_E_RIBFLOW)
+ strlcat(buf, "ribflow ", blen);
+ if (err & INFINIPATH_E_SUNDERRUN)
+ strlcat(buf, "sunderrun ", blen);
+ if (err & INFINIPATH_E_SPIOARMLAUNCH)
+ strlcat(buf, "spioarmlaunch ", blen);
+ if (err & INFINIPATH_E_SUNEXPERRPKTNUM)
+ strlcat(buf, "sunexperrpktnum ", blen);
+ if (err & INFINIPATH_E_SDROPPEDSMPPKT)
+ strlcat(buf, "sdroppedsmppkt ", blen);
+ if (err & INFINIPATH_E_SMAXPKTLEN)
+ strlcat(buf, "smaxpktlen ", blen);
+ if (err & INFINIPATH_E_SUNSUPVL)
+ strlcat(buf, "sunsupVL ", blen);
+ if (err & INFINIPATH_E_INVALIDADDR)
+ strlcat(buf, "invalidaddr ", blen);
+ if (err & INFINIPATH_E_RRCVEGRFULL)
+ strlcat(buf, "rcvegrfull ", blen);
+ if (err & INFINIPATH_E_RRCVHDRFULL)
+ strlcat(buf, "rcvhdrfull ", blen);
+ if (err & INFINIPATH_E_IBSTATUSCHANGED)
+ strlcat(buf, "ibcstatuschg ", blen);
+ if (err & INFINIPATH_E_RIBLOSTLINK)
+ strlcat(buf, "riblostlink ", blen);
+ if (err & INFINIPATH_E_HARDWARE)
+ strlcat(buf, "hardware ", blen);
+ if (err & INFINIPATH_E_RESET)
+ strlcat(buf, "reset ", blen);
+ if (err & INFINIPATH_E_SDMAERRS)
+ decode_sdma_errs(dd, err, buf, blen);
+ if (err & INFINIPATH_E_INVALIDEEPCMD)
+ strlcat(buf, "invalideepromcmd ", blen);
+done:
+ return iserr;
+}
+
+/**
+ * get_rhf_errstring - decode RHF errors
+ * @err: the err number
+ * @msg: the output buffer
+ * @len: the length of the output buffer
+ *
+ * only used one place now, may want more later
+ */
+static void get_rhf_errstring(u32 err, char *msg, size_t len)
+{
+ /* if no errors, and so don't need to check what's first */
+ *msg = '\0';
+
+ if (err & INFINIPATH_RHF_H_ICRCERR)
+ strlcat(msg, "icrcerr ", len);
+ if (err & INFINIPATH_RHF_H_VCRCERR)
+ strlcat(msg, "vcrcerr ", len);
+ if (err & INFINIPATH_RHF_H_PARITYERR)
+ strlcat(msg, "parityerr ", len);
+ if (err & INFINIPATH_RHF_H_LENERR)
+ strlcat(msg, "lenerr ", len);
+ if (err & INFINIPATH_RHF_H_MTUERR)
+ strlcat(msg, "mtuerr ", len);
+ if (err & INFINIPATH_RHF_H_IHDRERR)
+ /* infinipath hdr checksum error */
+ strlcat(msg, "ipathhdrerr ", len);
+ if (err & INFINIPATH_RHF_H_TIDERR)
+ strlcat(msg, "tiderr ", len);
+ if (err & INFINIPATH_RHF_H_MKERR)
+ /* bad port, offset, etc. */
+ strlcat(msg, "invalid ipathhdr ", len);
+ if (err & INFINIPATH_RHF_H_IBERR)
+ strlcat(msg, "iberr ", len);
+ if (err & INFINIPATH_RHF_L_SWA)
+ strlcat(msg, "swA ", len);
+ if (err & INFINIPATH_RHF_L_SWB)
+ strlcat(msg, "swB ", len);
+}
+
+/**
+ * ipath_get_egrbuf - get an eager buffer
+ * @dd: the infinipath device
+ * @bufnum: the eager buffer to get
+ *
+ * must only be called if ipath_pd[port] is known to be allocated
+ */
+static inline void *ipath_get_egrbuf(struct ipath_devdata *dd, u32 bufnum)
+{
+ return dd->ipath_port0_skbinfo ?
+ (void *) dd->ipath_port0_skbinfo[bufnum].skb->data : NULL;
+}
+
+/**
+ * ipath_alloc_skb - allocate an skb and buffer with possible constraints
+ * @dd: the infinipath device
+ * @gfp_mask: the sk_buff SFP mask
+ */
+struct sk_buff *ipath_alloc_skb(struct ipath_devdata *dd,
+ gfp_t gfp_mask)
+{
+ struct sk_buff *skb;
+ u32 len;
+
+ /*
+ * Only fully supported way to handle this is to allocate lots
+ * extra, align as needed, and then do skb_reserve(). That wastes
+ * a lot of memory... I'll have to hack this into infinipath_copy
+ * also.
+ */
+
+ /*
+ * We need 2 extra bytes for ipath_ether data sent in the
+ * key header. In order to keep everything dword aligned,
+ * we'll reserve 4 bytes.
+ */
+ len = dd->ipath_ibmaxlen + 4;
+
+ if (dd->ipath_flags & IPATH_4BYTE_TID) {
+ /* We need a 2KB multiple alignment, and there is no way
+ * to do it except to allocate extra and then skb_reserve
+ * enough to bring it up to the right alignment.
+ */
+ len += 2047;
+ }
+
+ skb = __dev_alloc_skb(len, gfp_mask);
+ if (!skb) {
+ ipath_dev_err(dd, "Failed to allocate skbuff, length %u\n",
+ len);
+ goto bail;
+ }
+
+ skb_reserve(skb, 4);
+
+ if (dd->ipath_flags & IPATH_4BYTE_TID) {
+ u32 una = (unsigned long)skb->data & 2047;
+ if (una)
+ skb_reserve(skb, 2048 - una);
+ }
+
+bail:
+ return skb;
+}
+
+static void ipath_rcv_hdrerr(struct ipath_devdata *dd,
+ u32 eflags,
+ u32 l,
+ u32 etail,
+ __le32 *rhf_addr,
+ struct ipath_message_header *hdr)
+{
+ char emsg[128];
+
+ get_rhf_errstring(eflags, emsg, sizeof emsg);
+ ipath_cdbg(PKT, "RHFerrs %x hdrqtail=%x typ=%u "
+ "tlen=%x opcode=%x egridx=%x: %s\n",
+ eflags, l,
+ ipath_hdrget_rcv_type(rhf_addr),
+ ipath_hdrget_length_in_bytes(rhf_addr),
+ be32_to_cpu(hdr->bth[0]) >> 24,
+ etail, emsg);
+
+ /* Count local link integrity errors. */
+ if (eflags & (INFINIPATH_RHF_H_ICRCERR | INFINIPATH_RHF_H_VCRCERR)) {
+ u8 n = (dd->ipath_ibcctrl >>
+ INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) &
+ INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK;
+
+ if (++dd->ipath_lli_counter > n) {
+ dd->ipath_lli_counter = 0;
+ dd->ipath_lli_errors++;
+ }
+ }
+}
+
+/*
+ * ipath_kreceive - receive a packet
+ * @pd: the infinipath port
+ *
+ * called from interrupt handler for errors or receive interrupt
+ */
+void ipath_kreceive(struct ipath_portdata *pd)
+{
+ struct ipath_devdata *dd = pd->port_dd;
+ __le32 *rhf_addr;
+ void *ebuf;
+ const u32 rsize = dd->ipath_rcvhdrentsize; /* words */
+ const u32 maxcnt = dd->ipath_rcvhdrcnt * rsize; /* words */
+ u32 etail = -1, l, hdrqtail;
+ struct ipath_message_header *hdr;
+ u32 eflags, i, etype, tlen, pkttot = 0, updegr = 0, reloop = 0;
+ static u64 totcalls; /* stats, may eventually remove */
+ int last;
+
+ l = pd->port_head;
+ rhf_addr = (__le32 *) pd->port_rcvhdrq + l + dd->ipath_rhf_offset;
+ if (dd->ipath_flags & IPATH_NODMA_RTAIL) {
+ u32 seq = ipath_hdrget_seq(rhf_addr);
+
+ if (seq != pd->port_seq_cnt)
+ goto bail;
+ hdrqtail = 0;
+ } else {
+ hdrqtail = ipath_get_rcvhdrtail(pd);
+ if (l == hdrqtail)
+ goto bail;
+ smp_rmb();
+ }
+
+reloop:
+ for (last = 0, i = 1; !last; i += !last) {
+ hdr = dd->ipath_f_get_msgheader(dd, rhf_addr);
+ eflags = ipath_hdrget_err_flags(rhf_addr);
+ etype = ipath_hdrget_rcv_type(rhf_addr);
+ /* total length */
+ tlen = ipath_hdrget_length_in_bytes(rhf_addr);
+ ebuf = NULL;
+ if ((dd->ipath_flags & IPATH_NODMA_RTAIL) ?
+ ipath_hdrget_use_egr_buf(rhf_addr) :
+ (etype != RCVHQ_RCV_TYPE_EXPECTED)) {
+ /*
+ * It turns out that the chip uses an eager buffer
+ * for all non-expected packets, whether it "needs"
+ * one or not. So always get the index, but don't
+ * set ebuf (so we try to copy data) unless the
+ * length requires it.
+ */
+ etail = ipath_hdrget_index(rhf_addr);
+ updegr = 1;
+ if (tlen > sizeof(*hdr) ||
+ etype == RCVHQ_RCV_TYPE_NON_KD)
+ ebuf = ipath_get_egrbuf(dd, etail);
+ }
+
+ /*
+ * both tiderr and ipathhdrerr are set for all plain IB
+ * packets; only ipathhdrerr should be set.
+ */
+
+ if (etype != RCVHQ_RCV_TYPE_NON_KD &&
+ etype != RCVHQ_RCV_TYPE_ERROR &&
+ ipath_hdrget_ipath_ver(hdr->iph.ver_port_tid_offset) !=
+ IPS_PROTO_VERSION)
+ ipath_cdbg(PKT, "Bad InfiniPath protocol version "
+ "%x\n", etype);
+
+ if (unlikely(eflags))
+ ipath_rcv_hdrerr(dd, eflags, l, etail, rhf_addr, hdr);
+ else if (etype == RCVHQ_RCV_TYPE_NON_KD) {
+ ipath_ib_rcv(dd->verbs_dev, (u32 *)hdr, ebuf, tlen);
+ if (dd->ipath_lli_counter)
+ dd->ipath_lli_counter--;
+ } else if (etype == RCVHQ_RCV_TYPE_EAGER) {
+ u8 opcode = be32_to_cpu(hdr->bth[0]) >> 24;
+ u32 qp = be32_to_cpu(hdr->bth[1]) & 0xffffff;
+ ipath_cdbg(PKT, "typ %x, opcode %x (eager, "
+ "qp=%x), len %x; ignored\n",
+ etype, opcode, qp, tlen);
+ }
+ else if (etype == RCVHQ_RCV_TYPE_EXPECTED)
+ ipath_dbg("Bug: Expected TID, opcode %x; ignored\n",
+ be32_to_cpu(hdr->bth[0]) >> 24);
+ else {
+ /*
+ * error packet, type of error unknown.
+ * Probably type 3, but we don't know, so don't
+ * even try to print the opcode, etc.
+ * Usually caused by a "bad packet", that has no
+ * BTH, when the LRH says it should.
+ */
+ ipath_cdbg(ERRPKT, "Error Pkt, but no eflags! egrbuf"
+ " %x, len %x hdrq+%x rhf: %Lx\n",
+ etail, tlen, l, (unsigned long long)
+ le64_to_cpu(*(__le64 *) rhf_addr));
+ if (ipath_debug & __IPATH_ERRPKTDBG) {
+ u32 j, *d, dw = rsize-2;
+ if (rsize > (tlen>>2))
+ dw = tlen>>2;
+ d = (u32 *)hdr;
+ printk(KERN_DEBUG "EPkt rcvhdr(%x dw):\n",
+ dw);
+ for (j = 0; j < dw; j++)
+ printk(KERN_DEBUG "%8x%s", d[j],
+ (j%8) == 7 ? "\n" : " ");
+ printk(KERN_DEBUG ".\n");
+ }
+ }
+ l += rsize;
+ if (l >= maxcnt)
+ l = 0;
+ rhf_addr = (__le32 *) pd->port_rcvhdrq +
+ l + dd->ipath_rhf_offset;
+ if (dd->ipath_flags & IPATH_NODMA_RTAIL) {
+ u32 seq = ipath_hdrget_seq(rhf_addr);
+
+ if (++pd->port_seq_cnt > 13)
+ pd->port_seq_cnt = 1;
+ if (seq != pd->port_seq_cnt)
+ last = 1;
+ } else if (l == hdrqtail)
+ last = 1;
+ /*
+ * update head regs on last packet, and every 16 packets.
+ * Reduce bus traffic, while still trying to prevent
+ * rcvhdrq overflows, for when the queue is nearly full
+ */
+ if (last || !(i & 0xf)) {
+ u64 lval = l;
+
+ /* request IBA6120 and 7220 interrupt only on last */
+ if (last)
+ lval |= dd->ipath_rhdrhead_intr_off;
+ ipath_write_ureg(dd, ur_rcvhdrhead, lval,
+ pd->port_port);
+ if (updegr) {
+ ipath_write_ureg(dd, ur_rcvegrindexhead,
+ etail, pd->port_port);
+ updegr = 0;
+ }
+ }
+ }
+
+ if (!dd->ipath_rhdrhead_intr_off && !reloop &&
+ !(dd->ipath_flags & IPATH_NODMA_RTAIL)) {
+ /* IBA6110 workaround; we can have a race clearing chip
+ * interrupt with another interrupt about to be delivered,
+ * and can clear it before it is delivered on the GPIO
+ * workaround. By doing the extra check here for the
+ * in-memory tail register updating while we were doing
+ * earlier packets, we "almost" guarantee we have covered
+ * that case.
+ */
+ u32 hqtail = ipath_get_rcvhdrtail(pd);
+ if (hqtail != hdrqtail) {
+ hdrqtail = hqtail;
+ reloop = 1; /* loop 1 extra time at most */
+ goto reloop;
+ }
+ }
+
+ pkttot += i;
+
+ pd->port_head = l;
+
+ if (pkttot > ipath_stats.sps_maxpkts_call)
+ ipath_stats.sps_maxpkts_call = pkttot;
+ ipath_stats.sps_port0pkts += pkttot;
+ ipath_stats.sps_avgpkts_call =
+ ipath_stats.sps_port0pkts / ++totcalls;
+
+bail:;
+}
+
+/**
+ * ipath_update_pio_bufs - update shadow copy of the PIO availability map
+ * @dd: the infinipath device
+ *
+ * called whenever our local copy indicates we have run out of send buffers
+ * NOTE: This can be called from interrupt context by some code
+ * and from non-interrupt context by ipath_getpiobuf().
+ */
+
+static void ipath_update_pio_bufs(struct ipath_devdata *dd)
+{
+ unsigned long flags;
+ int i;
+ const unsigned piobregs = (unsigned)dd->ipath_pioavregs;
+
+ /* If the generation (check) bits have changed, then we update the
+ * busy bit for the corresponding PIO buffer. This algorithm will
+ * modify positions to the value they already have in some cases
+ * (i.e., no change), but it's faster than changing only the bits
+ * that have changed.
+ *
+ * We would like to do this atomicly, to avoid spinlocks in the
+ * critical send path, but that's not really possible, given the
+ * type of changes, and that this routine could be called on
+ * multiple cpu's simultaneously, so we lock in this routine only,
+ * to avoid conflicting updates; all we change is the shadow, and
+ * it's a single 64 bit memory location, so by definition the update
+ * is atomic in terms of what other cpu's can see in testing the
+ * bits. The spin_lock overhead isn't too bad, since it only
+ * happens when all buffers are in use, so only cpu overhead, not
+ * latency or bandwidth is affected.
+ */
+ if (!dd->ipath_pioavailregs_dma) {
+ ipath_dbg("Update shadow pioavail, but regs_dma NULL!\n");
+ return;
+ }
+ if (ipath_debug & __IPATH_VERBDBG) {
+ /* only if packet debug and verbose */
+ volatile __le64 *dma = dd->ipath_pioavailregs_dma;
+ unsigned long *shadow = dd->ipath_pioavailshadow;
+
+ ipath_cdbg(PKT, "Refill avail, dma0=%llx shad0=%lx, "
+ "d1=%llx s1=%lx, d2=%llx s2=%lx, d3=%llx "
+ "s3=%lx\n",
+ (unsigned long long) le64_to_cpu(dma[0]),
+ shadow[0],
+ (unsigned long long) le64_to_cpu(dma[1]),
+ shadow[1],
+ (unsigned long long) le64_to_cpu(dma[2]),
+ shadow[2],
+ (unsigned long long) le64_to_cpu(dma[3]),
+ shadow[3]);
+ if (piobregs > 4)
+ ipath_cdbg(
+ PKT, "2nd group, dma4=%llx shad4=%lx, "
+ "d5=%llx s5=%lx, d6=%llx s6=%lx, "
+ "d7=%llx s7=%lx\n",
+ (unsigned long long) le64_to_cpu(dma[4]),
+ shadow[4],
+ (unsigned long long) le64_to_cpu(dma[5]),
+ shadow[5],
+ (unsigned long long) le64_to_cpu(dma[6]),
+ shadow[6],
+ (unsigned long long) le64_to_cpu(dma[7]),
+ shadow[7]);
+ }
+ spin_lock_irqsave(&ipath_pioavail_lock, flags);
+ for (i = 0; i < piobregs; i++) {
+ u64 pchbusy, pchg, piov, pnew;
+ /*
+ * Chip Errata: bug 6641; even and odd qwords>3 are swapped
+ */
+ if (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS))
+ piov = le64_to_cpu(dd->ipath_pioavailregs_dma[i ^ 1]);
+ else
+ piov = le64_to_cpu(dd->ipath_pioavailregs_dma[i]);
+ pchg = dd->ipath_pioavailkernel[i] &
+ ~(dd->ipath_pioavailshadow[i] ^ piov);
+ pchbusy = pchg << INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT;
+ if (pchg && (pchbusy & dd->ipath_pioavailshadow[i])) {
+ pnew = dd->ipath_pioavailshadow[i] & ~pchbusy;
+ pnew |= piov & pchbusy;
+ dd->ipath_pioavailshadow[i] = pnew;
+ }
+ }
+ spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
+}
+
+/*
+ * used to force update of pioavailshadow if we can't get a pio buffer.
+ * Needed primarily due to exitting freeze mode after recovering
+ * from errors. Done lazily, because it's safer (known to not
+ * be writing pio buffers).
+ */
+static void ipath_reset_availshadow(struct ipath_devdata *dd)
+{
+ int i, im;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ipath_pioavail_lock, flags);
+ for (i = 0; i < dd->ipath_pioavregs; i++) {
+ u64 val, oldval;
+ /* deal with 6110 chip bug on high register #s */
+ im = (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) ?
+ i ^ 1 : i;
+ val = le64_to_cpu(dd->ipath_pioavailregs_dma[im]);
+ /*
+ * busy out the buffers not in the kernel avail list,
+ * without changing the generation bits.
+ */
+ oldval = dd->ipath_pioavailshadow[i];
+ dd->ipath_pioavailshadow[i] = val |
+ ((~dd->ipath_pioavailkernel[i] <<
+ INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT) &
+ 0xaaaaaaaaaaaaaaaaULL); /* All BUSY bits in qword */
+ if (oldval != dd->ipath_pioavailshadow[i])
+ ipath_dbg("shadow[%d] was %Lx, now %lx\n",
+ i, (unsigned long long) oldval,
+ dd->ipath_pioavailshadow[i]);
+ }
+ spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
+}
+
+/**
+ * ipath_setrcvhdrsize - set the receive header size
+ * @dd: the infinipath device
+ * @rhdrsize: the receive header size
+ *
+ * called from user init code, and also layered driver init
+ */
+int ipath_setrcvhdrsize(struct ipath_devdata *dd, unsigned rhdrsize)
+{
+ int ret = 0;
+
+ if (dd->ipath_flags & IPATH_RCVHDRSZ_SET) {
+ if (dd->ipath_rcvhdrsize != rhdrsize) {
+ dev_info(&dd->pcidev->dev,
+ "Error: can't set protocol header "
+ "size %u, already %u\n",
+ rhdrsize, dd->ipath_rcvhdrsize);
+ ret = -EAGAIN;
+ } else
+ ipath_cdbg(VERBOSE, "Reuse same protocol header "
+ "size %u\n", dd->ipath_rcvhdrsize);
+ } else if (rhdrsize > (dd->ipath_rcvhdrentsize -
+ (sizeof(u64) / sizeof(u32)))) {
+ ipath_dbg("Error: can't set protocol header size %u "
+ "(> max %u)\n", rhdrsize,
+ dd->ipath_rcvhdrentsize -
+ (u32) (sizeof(u64) / sizeof(u32)));
+ ret = -EOVERFLOW;
+ } else {
+ dd->ipath_flags |= IPATH_RCVHDRSZ_SET;
+ dd->ipath_rcvhdrsize = rhdrsize;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvhdrsize,
+ dd->ipath_rcvhdrsize);
+ ipath_cdbg(VERBOSE, "Set protocol header size to %u\n",
+ dd->ipath_rcvhdrsize);
+ }
+ return ret;
+}
+
+/*
+ * debugging code and stats updates if no pio buffers available.
+ */
+static noinline void no_pio_bufs(struct ipath_devdata *dd)
+{
+ unsigned long *shadow = dd->ipath_pioavailshadow;
+ __le64 *dma = (__le64 *)dd->ipath_pioavailregs_dma;
+
+ dd->ipath_upd_pio_shadow = 1;
+
+ /*
+ * not atomic, but if we lose a stat count in a while, that's OK
+ */
+ ipath_stats.sps_nopiobufs++;
+ if (!(++dd->ipath_consec_nopiobuf % 100000)) {
+ ipath_force_pio_avail_update(dd); /* at start */
+ ipath_dbg("%u tries no piobufavail ts%lx; dmacopy: "
+ "%llx %llx %llx %llx\n"
+ "ipath shadow: %lx %lx %lx %lx\n",
+ dd->ipath_consec_nopiobuf,
+ (unsigned long)get_cycles(),
+ (unsigned long long) le64_to_cpu(dma[0]),
+ (unsigned long long) le64_to_cpu(dma[1]),
+ (unsigned long long) le64_to_cpu(dma[2]),
+ (unsigned long long) le64_to_cpu(dma[3]),
+ shadow[0], shadow[1], shadow[2], shadow[3]);
+ /*
+ * 4 buffers per byte, 4 registers above, cover rest
+ * below
+ */
+ if ((dd->ipath_piobcnt2k + dd->ipath_piobcnt4k) >
+ (sizeof(shadow[0]) * 4 * 4))
+ ipath_dbg("2nd group: dmacopy: "
+ "%llx %llx %llx %llx\n"
+ "ipath shadow: %lx %lx %lx %lx\n",
+ (unsigned long long)le64_to_cpu(dma[4]),
+ (unsigned long long)le64_to_cpu(dma[5]),
+ (unsigned long long)le64_to_cpu(dma[6]),
+ (unsigned long long)le64_to_cpu(dma[7]),
+ shadow[4], shadow[5], shadow[6], shadow[7]);
+
+ /* at end, so update likely happened */
+ ipath_reset_availshadow(dd);
+ }
+}
+
+/*
+ * common code for normal driver pio buffer allocation, and reserved
+ * allocation.
+ *
+ * do appropriate marking as busy, etc.
+ * returns buffer number if one found (>=0), negative number is error.
+ */
+static u32 __iomem *ipath_getpiobuf_range(struct ipath_devdata *dd,
+ u32 *pbufnum, u32 first, u32 last, u32 firsti)
+{
+ int i, j, updated = 0;
+ unsigned piobcnt;
+ unsigned long flags;
+ unsigned long *shadow = dd->ipath_pioavailshadow;
+ u32 __iomem *buf;
+
+ piobcnt = last - first;
+ if (dd->ipath_upd_pio_shadow) {
+ /*
+ * Minor optimization. If we had no buffers on last call,
+ * start out by doing the update; continue and do scan even
+ * if no buffers were updated, to be paranoid
+ */
+ ipath_update_pio_bufs(dd);
+ updated++;
+ i = first;
+ } else
+ i = firsti;
+rescan:
+ /*
+ * while test_and_set_bit() is atomic, we do that and then the
+ * change_bit(), and the pair is not. See if this is the cause
+ * of the remaining armlaunch errors.
+ */
+ spin_lock_irqsave(&ipath_pioavail_lock, flags);
+ for (j = 0; j < piobcnt; j++, i++) {
+ if (i >= last)
+ i = first;
+ if (__test_and_set_bit((2 * i) + 1, shadow))
+ continue;
+ /* flip generation bit */
+ __change_bit(2 * i, shadow);
+ break;
+ }
+ spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
+
+ if (j == piobcnt) {
+ if (!updated) {
+ /*
+ * first time through; shadow exhausted, but may be
+ * buffers available, try an update and then rescan.
+ */
+ ipath_update_pio_bufs(dd);
+ updated++;
+ i = first;
+ goto rescan;
+ } else if (updated == 1 && piobcnt <=
+ ((dd->ipath_sendctrl
+ >> INFINIPATH_S_UPDTHRESH_SHIFT) &
+ INFINIPATH_S_UPDTHRESH_MASK)) {
+ /*
+ * for chips supporting and using the update
+ * threshold we need to force an update of the
+ * in-memory copy if the count is less than the
+ * thershold, then check one more time.
+ */
+ ipath_force_pio_avail_update(dd);
+ ipath_update_pio_bufs(dd);
+ updated++;
+ i = first;
+ goto rescan;
+ }
+
+ no_pio_bufs(dd);
+ buf = NULL;
+ } else {
+ if (i < dd->ipath_piobcnt2k)
+ buf = (u32 __iomem *) (dd->ipath_pio2kbase +
+ i * dd->ipath_palign);
+ else
+ buf = (u32 __iomem *)
+ (dd->ipath_pio4kbase +
+ (i - dd->ipath_piobcnt2k) * dd->ipath_4kalign);
+ if (pbufnum)
+ *pbufnum = i;
+ }
+
+ return buf;
+}
+
+/**
+ * ipath_getpiobuf - find an available pio buffer
+ * @dd: the infinipath device
+ * @plen: the size of the PIO buffer needed in 32-bit words
+ * @pbufnum: the buffer number is placed here
+ */
+u32 __iomem *ipath_getpiobuf(struct ipath_devdata *dd, u32 plen, u32 *pbufnum)
+{
+ u32 __iomem *buf;
+ u32 pnum, nbufs;
+ u32 first, lasti;
+
+ if (plen + 1 >= IPATH_SMALLBUF_DWORDS) {
+ first = dd->ipath_piobcnt2k;
+ lasti = dd->ipath_lastpioindexl;
+ } else {
+ first = 0;
+ lasti = dd->ipath_lastpioindex;
+ }
+ nbufs = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
+ buf = ipath_getpiobuf_range(dd, &pnum, first, nbufs, lasti);
+
+ if (buf) {
+ /*
+ * Set next starting place. It's just an optimization,
+ * it doesn't matter who wins on this, so no locking
+ */
+ if (plen + 1 >= IPATH_SMALLBUF_DWORDS)
+ dd->ipath_lastpioindexl = pnum + 1;
+ else
+ dd->ipath_lastpioindex = pnum + 1;
+ if (dd->ipath_upd_pio_shadow)
+ dd->ipath_upd_pio_shadow = 0;
+ if (dd->ipath_consec_nopiobuf)
+ dd->ipath_consec_nopiobuf = 0;
+ ipath_cdbg(VERBOSE, "Return piobuf%u %uk @ %p\n",
+ pnum, (pnum < dd->ipath_piobcnt2k) ? 2 : 4, buf);
+ if (pbufnum)
+ *pbufnum = pnum;
+
+ }
+ return buf;
+}
+
+/**
+ * ipath_chg_pioavailkernel - change which send buffers are available for kernel
+ * @dd: the infinipath device
+ * @start: the starting send buffer number
+ * @len: the number of send buffers
+ * @avail: true if the buffers are available for kernel use, false otherwise
+ */
+void ipath_chg_pioavailkernel(struct ipath_devdata *dd, unsigned start,
+ unsigned len, int avail)
+{
+ unsigned long flags;
+ unsigned end, cnt = 0, next;
+
+ /* There are two bits per send buffer (busy and generation) */
+ start *= 2;
+ end = start + len * 2;
+
+ spin_lock_irqsave(&ipath_pioavail_lock, flags);
+ /* Set or clear the busy bit in the shadow. */
+ while (start < end) {
+ if (avail) {
+ unsigned long dma;
+ int i, im;
+ /*
+ * the BUSY bit will never be set, because we disarm
+ * the user buffers before we hand them back to the
+ * kernel. We do have to make sure the generation
+ * bit is set correctly in shadow, since it could
+ * have changed many times while allocated to user.
+ * We can't use the bitmap functions on the full
+ * dma array because it is always little-endian, so
+ * we have to flip to host-order first.
+ * BITS_PER_LONG is slightly wrong, since it's
+ * always 64 bits per register in chip...
+ * We only work on 64 bit kernels, so that's OK.
+ */
+ /* deal with 6110 chip bug on high register #s */
+ i = start / BITS_PER_LONG;
+ im = (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) ?
+ i ^ 1 : i;
+ __clear_bit(INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT
+ + start, dd->ipath_pioavailshadow);
+ dma = (unsigned long) le64_to_cpu(
+ dd->ipath_pioavailregs_dma[im]);
+ if (test_bit((INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT
+ + start) % BITS_PER_LONG, &dma))
+ __set_bit(INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT
+ + start, dd->ipath_pioavailshadow);
+ else
+ __clear_bit(INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT
+ + start, dd->ipath_pioavailshadow);
+ __set_bit(start, dd->ipath_pioavailkernel);
+ } else {
+ __set_bit(start + INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT,
+ dd->ipath_pioavailshadow);
+ __clear_bit(start, dd->ipath_pioavailkernel);
+ }
+ start += 2;
+ }
+
+ if (dd->ipath_pioupd_thresh) {
+ end = 2 * (dd->ipath_piobcnt2k + dd->ipath_piobcnt4k);
+ next = find_first_bit(dd->ipath_pioavailkernel, end);
+ while (next < end) {
+ cnt++;
+ next = find_next_bit(dd->ipath_pioavailkernel, end,
+ next + 1);
+ }
+ }
+ spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
+
+ /*
+ * When moving buffers from kernel to user, if number assigned to
+ * the user is less than the pio update threshold, and threshold
+ * is supported (cnt was computed > 0), drop the update threshold
+ * so we update at least once per allocated number of buffers.
+ * In any case, if the kernel buffers are less than the threshold,
+ * drop the threshold. We don't bother increasing it, having once
+ * decreased it, since it would typically just cycle back and forth.
+ * If we don't decrease below buffers in use, we can wait a long
+ * time for an update, until some other context uses PIO buffers.
+ */
+ if (!avail && len < cnt)
+ cnt = len;
+ if (cnt < dd->ipath_pioupd_thresh) {
+ dd->ipath_pioupd_thresh = cnt;
+ ipath_dbg("Decreased pio update threshold to %u\n",
+ dd->ipath_pioupd_thresh);
+ spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
+ dd->ipath_sendctrl &= ~(INFINIPATH_S_UPDTHRESH_MASK
+ << INFINIPATH_S_UPDTHRESH_SHIFT);
+ dd->ipath_sendctrl |= dd->ipath_pioupd_thresh
+ << INFINIPATH_S_UPDTHRESH_SHIFT;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
+ dd->ipath_sendctrl);
+ spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
+ }
+}
+
+/**
+ * ipath_create_rcvhdrq - create a receive header queue
+ * @dd: the infinipath device
+ * @pd: the port data
+ *
+ * this must be contiguous memory (from an i/o perspective), and must be
+ * DMA'able (which means for some systems, it will go through an IOMMU,
+ * or be forced into a low address range).
+ */
+int ipath_create_rcvhdrq(struct ipath_devdata *dd,
+ struct ipath_portdata *pd)
+{
+ int ret = 0;
+
+ if (!pd->port_rcvhdrq) {
+ dma_addr_t phys_hdrqtail;
+ gfp_t gfp_flags = GFP_USER | __GFP_COMP;
+ int amt = ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize *
+ sizeof(u32), PAGE_SIZE);
+
+ pd->port_rcvhdrq = dma_alloc_coherent(
+ &dd->pcidev->dev, amt, &pd->port_rcvhdrq_phys,
+ gfp_flags);
+
+ if (!pd->port_rcvhdrq) {
+ ipath_dev_err(dd, "attempt to allocate %d bytes "
+ "for port %u rcvhdrq failed\n",
+ amt, pd->port_port);
+ ret = -ENOMEM;
+ goto bail;
+ }
+
+ if (!(dd->ipath_flags & IPATH_NODMA_RTAIL)) {
+ pd->port_rcvhdrtail_kvaddr = dma_alloc_coherent(
+ &dd->pcidev->dev, PAGE_SIZE, &phys_hdrqtail,
+ GFP_KERNEL);
+ if (!pd->port_rcvhdrtail_kvaddr) {
+ ipath_dev_err(dd, "attempt to allocate 1 page "
+ "for port %u rcvhdrqtailaddr "
+ "failed\n", pd->port_port);
+ ret = -ENOMEM;
+ dma_free_coherent(&dd->pcidev->dev, amt,
+ pd->port_rcvhdrq,
+ pd->port_rcvhdrq_phys);
+ pd->port_rcvhdrq = NULL;
+ goto bail;
+ }
+ pd->port_rcvhdrqtailaddr_phys = phys_hdrqtail;
+ ipath_cdbg(VERBOSE, "port %d hdrtailaddr, %llx "
+ "physical\n", pd->port_port,
+ (unsigned long long) phys_hdrqtail);
+ }
+
+ pd->port_rcvhdrq_size = amt;
+
+ ipath_cdbg(VERBOSE, "%d pages at %p (phys %lx) size=%lu "
+ "for port %u rcvhdr Q\n",
+ amt >> PAGE_SHIFT, pd->port_rcvhdrq,
+ (unsigned long) pd->port_rcvhdrq_phys,
+ (unsigned long) pd->port_rcvhdrq_size,
+ pd->port_port);
+ }
+ else
+ ipath_cdbg(VERBOSE, "reuse port %d rcvhdrq @%p %llx phys; "
+ "hdrtailaddr@%p %llx physical\n",
+ pd->port_port, pd->port_rcvhdrq,
+ (unsigned long long) pd->port_rcvhdrq_phys,
+ pd->port_rcvhdrtail_kvaddr, (unsigned long long)
+ pd->port_rcvhdrqtailaddr_phys);
+
+ /* clear for security and sanity on each use */
+ memset(pd->port_rcvhdrq, 0, pd->port_rcvhdrq_size);
+ if (pd->port_rcvhdrtail_kvaddr)
+ memset(pd->port_rcvhdrtail_kvaddr, 0, PAGE_SIZE);
+
+ /*
+ * tell chip each time we init it, even if we are re-using previous
+ * memory (we zero the register at process close)
+ */
+ ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdrtailaddr,
+ pd->port_port, pd->port_rcvhdrqtailaddr_phys);
+ ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdraddr,
+ pd->port_port, pd->port_rcvhdrq_phys);
+
+bail:
+ return ret;
+}
+
+
+/*
+ * Flush all sends that might be in the ready to send state, as well as any
+ * that are in the process of being sent. Used whenever we need to be
+ * sure the send side is idle. Cleans up all buffer state by canceling
+ * all pio buffers, and issuing an abort, which cleans up anything in the
+ * launch fifo. The cancel is superfluous on some chip versions, but
+ * it's safer to always do it.
+ * PIOAvail bits are updated by the chip as if normal send had happened.
+ */
+void ipath_cancel_sends(struct ipath_devdata *dd, int restore_sendctrl)
+{
+ unsigned long flags;
+
+ if (dd->ipath_flags & IPATH_IB_AUTONEG_INPROG) {
+ ipath_cdbg(VERBOSE, "Ignore while in autonegotiation\n");
+ goto bail;
+ }
+ /*
+ * If we have SDMA, and it's not disabled, we have to kick off the
+ * abort state machine, provided we aren't already aborting.
+ * If we are in the process of aborting SDMA (!DISABLED, but ABORTING),
+ * we skip the rest of this routine. It is already "in progress"
+ */
+ if (dd->ipath_flags & IPATH_HAS_SEND_DMA) {
+ int skip_cancel;
+ unsigned long *statp = &dd->ipath_sdma_status;
+
+ spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
+ skip_cancel =
+ test_and_set_bit(IPATH_SDMA_ABORTING, statp)
+ && !test_bit(IPATH_SDMA_DISABLED, statp);
+ spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
+ if (skip_cancel)
+ goto bail;
+ }
+
+ ipath_dbg("Cancelling all in-progress send buffers\n");
+
+ /* skip armlaunch errs for a while */
+ dd->ipath_lastcancel = jiffies + HZ / 2;
+
+ /*
+ * The abort bit is auto-clearing. We also don't want pioavail
+ * update happening during this, and we don't want any other
+ * sends going out, so turn those off for the duration. We read
+ * the scratch register to be sure that cancels and the abort
+ * have taken effect in the chip. Otherwise two parts are same
+ * as ipath_force_pio_avail_update()
+ */
+ spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
+ dd->ipath_sendctrl &= ~(INFINIPATH_S_PIOBUFAVAILUPD
+ | INFINIPATH_S_PIOENABLE);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
+ dd->ipath_sendctrl | INFINIPATH_S_ABORT);
+ ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+ spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
+
+ /* disarm all send buffers */
+ ipath_disarm_piobufs(dd, 0,
+ dd->ipath_piobcnt2k + dd->ipath_piobcnt4k);
+
+ if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
+ set_bit(IPATH_SDMA_DISARMED, &dd->ipath_sdma_status);
+
+ if (restore_sendctrl) {
+ /* else done by caller later if needed */
+ spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
+ dd->ipath_sendctrl |= INFINIPATH_S_PIOBUFAVAILUPD |
+ INFINIPATH_S_PIOENABLE;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
+ dd->ipath_sendctrl);
+ /* and again, be sure all have hit the chip */
+ ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+ spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
+ }
+
+ if ((dd->ipath_flags & IPATH_HAS_SEND_DMA) &&
+ !test_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status) &&
+ test_bit(IPATH_SDMA_RUNNING, &dd->ipath_sdma_status)) {
+ spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
+ /* only wait so long for intr */
+ dd->ipath_sdma_abort_intr_timeout = jiffies + HZ;
+ dd->ipath_sdma_reset_wait = 200;
+ if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
+ tasklet_hi_schedule(&dd->ipath_sdma_abort_task);
+ spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
+ }
+bail:;
+}
+
+/*
+ * Force an update of in-memory copy of the pioavail registers, when
+ * needed for any of a variety of reasons. We read the scratch register
+ * to make it highly likely that the update will have happened by the
+ * time we return. If already off (as in cancel_sends above), this
+ * routine is a nop, on the assumption that the caller will "do the
+ * right thing".
+ */
+void ipath_force_pio_avail_update(struct ipath_devdata *dd)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
+ if (dd->ipath_sendctrl & INFINIPATH_S_PIOBUFAVAILUPD) {
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
+ dd->ipath_sendctrl & ~INFINIPATH_S_PIOBUFAVAILUPD);
+ ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
+ dd->ipath_sendctrl);
+ ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+ }
+ spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
+}
+
+static void ipath_set_ib_lstate(struct ipath_devdata *dd, int linkcmd,
+ int linitcmd)
+{
+ u64 mod_wd;
+ static const char *what[4] = {
+ [0] = "NOP",
+ [INFINIPATH_IBCC_LINKCMD_DOWN] = "DOWN",
+ [INFINIPATH_IBCC_LINKCMD_ARMED] = "ARMED",
+ [INFINIPATH_IBCC_LINKCMD_ACTIVE] = "ACTIVE"
+ };
+
+ if (linitcmd == INFINIPATH_IBCC_LINKINITCMD_DISABLE) {
+ /*
+ * If we are told to disable, note that so link-recovery
+ * code does not attempt to bring us back up.
+ */
+ preempt_disable();
+ dd->ipath_flags |= IPATH_IB_LINK_DISABLED;
+ preempt_enable();
+ } else if (linitcmd) {
+ /*
+ * Any other linkinitcmd will lead to LINKDOWN and then
+ * to INIT (if all is well), so clear flag to let
+ * link-recovery code attempt to bring us back up.
+ */
+ preempt_disable();
+ dd->ipath_flags &= ~IPATH_IB_LINK_DISABLED;
+ preempt_enable();
+ }
+
+ mod_wd = (linkcmd << dd->ibcc_lc_shift) |
+ (linitcmd << INFINIPATH_IBCC_LINKINITCMD_SHIFT);
+ ipath_cdbg(VERBOSE,
+ "Moving unit %u to %s (initcmd=0x%x), current ltstate is %s\n",
+ dd->ipath_unit, what[linkcmd], linitcmd,
+ ipath_ibcstatus_str[ipath_ib_linktrstate(dd,
+ ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus))]);
+
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
+ dd->ipath_ibcctrl | mod_wd);
+ /* read from chip so write is flushed */
+ (void) ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);
+}
+
+int ipath_set_linkstate(struct ipath_devdata *dd, u8 newstate)
+{
+ u32 lstate;
+ int ret;
+
+ switch (newstate) {
+ case IPATH_IB_LINKDOWN_ONLY:
+ ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN, 0);
+ /* don't wait */
+ ret = 0;
+ goto bail;
+
+ case IPATH_IB_LINKDOWN:
+ ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN,
+ INFINIPATH_IBCC_LINKINITCMD_POLL);
+ /* don't wait */
+ ret = 0;
+ goto bail;
+
+ case IPATH_IB_LINKDOWN_SLEEP:
+ ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN,
+ INFINIPATH_IBCC_LINKINITCMD_SLEEP);
+ /* don't wait */
+ ret = 0;
+ goto bail;
+
+ case IPATH_IB_LINKDOWN_DISABLE:
+ ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN,
+ INFINIPATH_IBCC_LINKINITCMD_DISABLE);
+ /* don't wait */
+ ret = 0;
+ goto bail;
+
+ case IPATH_IB_LINKARM:
+ if (dd->ipath_flags & IPATH_LINKARMED) {
+ ret = 0;
+ goto bail;
+ }
+ if (!(dd->ipath_flags &
+ (IPATH_LINKINIT | IPATH_LINKACTIVE))) {
+ ret = -EINVAL;
+ goto bail;
+ }
+ ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ARMED, 0);
+
+ /*
+ * Since the port can transition to ACTIVE by receiving
+ * a non VL 15 packet, wait for either state.
+ */
+ lstate = IPATH_LINKARMED | IPATH_LINKACTIVE;
+ break;
+
+ case IPATH_IB_LINKACTIVE:
+ if (dd->ipath_flags & IPATH_LINKACTIVE) {
+ ret = 0;
+ goto bail;
+ }
+ if (!(dd->ipath_flags & IPATH_LINKARMED)) {
+ ret = -EINVAL;
+ goto bail;
+ }
+ ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ACTIVE, 0);
+ lstate = IPATH_LINKACTIVE;
+ break;
+
+ case IPATH_IB_LINK_LOOPBACK:
+ dev_info(&dd->pcidev->dev, "Enabling IB local loopback\n");
+ dd->ipath_ibcctrl |= INFINIPATH_IBCC_LOOPBACK;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
+ dd->ipath_ibcctrl);
+
+ /* turn heartbeat off, as it causes loopback to fail */
+ dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT,
+ IPATH_IB_HRTBT_OFF);
+ /* don't wait */
+ ret = 0;
+ goto bail;
+
+ case IPATH_IB_LINK_EXTERNAL:
+ dev_info(&dd->pcidev->dev,
+ "Disabling IB local loopback (normal)\n");
+ dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT,
+ IPATH_IB_HRTBT_ON);
+ dd->ipath_ibcctrl &= ~INFINIPATH_IBCC_LOOPBACK;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
+ dd->ipath_ibcctrl);
+ /* don't wait */
+ ret = 0;
+ goto bail;
+
+ /*
+ * Heartbeat can be explicitly enabled by the user via
+ * "hrtbt_enable" "file", and if disabled, trying to enable here
+ * will have no effect. Implicit changes (heartbeat off when
+ * loopback on, and vice versa) are included to ease testing.
+ */
+ case IPATH_IB_LINK_HRTBT:
+ ret = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT,
+ IPATH_IB_HRTBT_ON);
+ goto bail;
+
+ case IPATH_IB_LINK_NO_HRTBT:
+ ret = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT,
+ IPATH_IB_HRTBT_OFF);
+ goto bail;
+
+ default:
+ ipath_dbg("Invalid linkstate 0x%x requested\n", newstate);
+ ret = -EINVAL;
+ goto bail;
+ }
+ ret = ipath_wait_linkstate(dd, lstate, 2000);
+
+bail:
+ return ret;
+}
+
+/**
+ * ipath_set_mtu - set the MTU
+ * @dd: the infinipath device
+ * @arg: the new MTU
+ *
+ * we can handle "any" incoming size, the issue here is whether we
+ * need to restrict our outgoing size. For now, we don't do any
+ * sanity checking on this, and we don't deal with what happens to
+ * programs that are already running when the size changes.
+ * NOTE: changing the MTU will usually cause the IBC to go back to
+ * link INIT state...
+ */
+int ipath_set_mtu(struct ipath_devdata *dd, u16 arg)
+{
+ u32 piosize;
+ int changed = 0;
+ int ret;
+
+ /*
+ * mtu is IB data payload max. It's the largest power of 2 less
+ * than piosize (or even larger, since it only really controls the
+ * largest we can receive; we can send the max of the mtu and
+ * piosize). We check that it's one of the valid IB sizes.
+ */
+ if (arg != 256 && arg != 512 && arg != 1024 && arg != 2048 &&
+ (arg != 4096 || !ipath_mtu4096)) {
+ ipath_dbg("Trying to set invalid mtu %u, failing\n", arg);
+ ret = -EINVAL;
+ goto bail;
+ }
+ if (dd->ipath_ibmtu == arg) {
+ ret = 0; /* same as current */
+ goto bail;
+ }
+
+ piosize = dd->ipath_ibmaxlen;
+ dd->ipath_ibmtu = arg;
+
+ if (arg >= (piosize - IPATH_PIO_MAXIBHDR)) {
+ /* Only if it's not the initial value (or reset to it) */
+ if (piosize != dd->ipath_init_ibmaxlen) {
+ if (arg > piosize && arg <= dd->ipath_init_ibmaxlen)
+ piosize = dd->ipath_init_ibmaxlen;
+ dd->ipath_ibmaxlen = piosize;
+ changed = 1;
+ }
+ } else if ((arg + IPATH_PIO_MAXIBHDR) != dd->ipath_ibmaxlen) {
+ piosize = arg + IPATH_PIO_MAXIBHDR;
+ ipath_cdbg(VERBOSE, "ibmaxlen was 0x%x, setting to 0x%x "
+ "(mtu 0x%x)\n", dd->ipath_ibmaxlen, piosize,
+ arg);
+ dd->ipath_ibmaxlen = piosize;
+ changed = 1;
+ }
+
+ if (changed) {
+ u64 ibc = dd->ipath_ibcctrl, ibdw;
+ /*
+ * update our housekeeping variables, and set IBC max
+ * size, same as init code; max IBC is max we allow in
+ * buffer, less the qword pbc, plus 1 for ICRC, in dwords
+ */
+ dd->ipath_ibmaxlen = piosize - 2 * sizeof(u32);
+ ibdw = (dd->ipath_ibmaxlen >> 2) + 1;
+ ibc &= ~(INFINIPATH_IBCC_MAXPKTLEN_MASK <<
+ dd->ibcc_mpl_shift);
+ ibc |= ibdw << dd->ibcc_mpl_shift;
+ dd->ipath_ibcctrl = ibc;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
+ dd->ipath_ibcctrl);
+ dd->ipath_f_tidtemplate(dd);
+ }
+
+ ret = 0;
+
+bail:
+ return ret;
+}
+
+int ipath_set_lid(struct ipath_devdata *dd, u32 lid, u8 lmc)
+{
+ dd->ipath_lid = lid;
+ dd->ipath_lmc = lmc;
+
+ dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_LIDLMC, lid |
+ (~((1U << lmc) - 1)) << 16);
+
+ dev_info(&dd->pcidev->dev, "We got a lid: 0x%x\n", lid);
+
+ return 0;
+}
+
+
+/**
+ * ipath_write_kreg_port - write a device's per-port 64-bit kernel register
+ * @dd: the infinipath device
+ * @regno: the register number to write
+ * @port: the port containing the register
+ * @value: the value to write
+ *
+ * Registers that vary with the chip implementation constants (port)
+ * use this routine.
+ */
+void ipath_write_kreg_port(const struct ipath_devdata *dd, ipath_kreg regno,
+ unsigned port, u64 value)
+{
+ u16 where;
+
+ if (port < dd->ipath_portcnt &&
+ (regno == dd->ipath_kregs->kr_rcvhdraddr ||
+ regno == dd->ipath_kregs->kr_rcvhdrtailaddr))
+ where = regno + port;
+ else
+ where = -1;
+
+ ipath_write_kreg(dd, where, value);
+}
+
+/*
+ * Following deal with the "obviously simple" task of overriding the state
+ * of the LEDS, which normally indicate link physical and logical status.
+ * The complications arise in dealing with different hardware mappings
+ * and the board-dependent routine being called from interrupts.
+ * and then there's the requirement to _flash_ them.
+ */
+#define LED_OVER_FREQ_SHIFT 8
+#define LED_OVER_FREQ_MASK (0xFF<<LED_OVER_FREQ_SHIFT)
+/* Below is "non-zero" to force override, but both actual LEDs are off */
+#define LED_OVER_BOTH_OFF (8)
+
+static void ipath_run_led_override(unsigned long opaque)
+{
+ struct ipath_devdata *dd = (struct ipath_devdata *)opaque;
+ int timeoff;
+ int pidx;
+ u64 lstate, ltstate, val;
+
+ if (!(dd->ipath_flags & IPATH_INITTED))
+ return;
+
+ pidx = dd->ipath_led_override_phase++ & 1;
+ dd->ipath_led_override = dd->ipath_led_override_vals[pidx];
+ timeoff = dd->ipath_led_override_timeoff;
+
+ /*
+ * below potentially restores the LED values per current status,
+ * should also possibly setup the traffic-blink register,
+ * but leave that to per-chip functions.
+ */
+ val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);
+ ltstate = ipath_ib_linktrstate(dd, val);
+ lstate = ipath_ib_linkstate(dd, val);
+
+ dd->ipath_f_setextled(dd, lstate, ltstate);
+ mod_timer(&dd->ipath_led_override_timer, jiffies + timeoff);
+}
+
+void ipath_set_led_override(struct ipath_devdata *dd, unsigned int val)
+{
+ int timeoff, freq;
+
+ if (!(dd->ipath_flags & IPATH_INITTED))
+ return;
+
+ /* First check if we are blinking. If not, use 1HZ polling */
+ timeoff = HZ;
+ freq = (val & LED_OVER_FREQ_MASK) >> LED_OVER_FREQ_SHIFT;
+
+ if (freq) {
+ /* For blink, set each phase from one nybble of val */
+ dd->ipath_led_override_vals[0] = val & 0xF;
+ dd->ipath_led_override_vals[1] = (val >> 4) & 0xF;
+ timeoff = (HZ << 4)/freq;
+ } else {
+ /* Non-blink set both phases the same. */
+ dd->ipath_led_override_vals[0] = val & 0xF;
+ dd->ipath_led_override_vals[1] = val & 0xF;
+ }
+ dd->ipath_led_override_timeoff = timeoff;
+
+ /*
+ * If the timer has not already been started, do so. Use a "quick"
+ * timeout so the function will be called soon, to look at our request.
+ */
+ if (atomic_inc_return(&dd->ipath_led_override_timer_active) == 1) {
+ /* Need to start timer */
+ init_timer(&dd->ipath_led_override_timer);
+ dd->ipath_led_override_timer.function =
+ ipath_run_led_override;
+ dd->ipath_led_override_timer.data = (unsigned long) dd;
+ dd->ipath_led_override_timer.expires = jiffies + 1;
+ add_timer(&dd->ipath_led_override_timer);
+ } else
+ atomic_dec(&dd->ipath_led_override_timer_active);
+}
+
+/**
+ * ipath_shutdown_device - shut down a device
+ * @dd: the infinipath device
+ *
+ * This is called to make the device quiet when we are about to
+ * unload the driver, and also when the device is administratively
+ * disabled. It does not free any data structures.
+ * Everything it does has to be setup again by ipath_init_chip(dd,1)
+ */
+void ipath_shutdown_device(struct ipath_devdata *dd)
+{
+ unsigned long flags;
+
+ ipath_dbg("Shutting down the device\n");
+
+ ipath_hol_up(dd); /* make sure user processes aren't suspended */
+
+ dd->ipath_flags |= IPATH_LINKUNK;
+ dd->ipath_flags &= ~(IPATH_INITTED | IPATH_LINKDOWN |
+ IPATH_LINKINIT | IPATH_LINKARMED |
+ IPATH_LINKACTIVE);
+ *dd->ipath_statusp &= ~(IPATH_STATUS_IB_CONF |
+ IPATH_STATUS_IB_READY);
+
+ /* mask interrupts, but not errors */
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL);
+
+ dd->ipath_rcvctrl = 0;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
+ dd->ipath_rcvctrl);
+
+ if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
+ teardown_sdma(dd);
+
+ /*
+ * gracefully stop all sends allowing any in progress to trickle out
+ * first.
+ */
+ spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
+ dd->ipath_sendctrl = 0;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
+ /* flush it */
+ ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+ spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
+
+ /*
+ * enough for anything that's going to trickle out to have actually
+ * done so.
+ */
+ udelay(5);
+
+ dd->ipath_f_setextled(dd, 0, 0); /* make sure LEDs are off */
+
+ ipath_set_ib_lstate(dd, 0, INFINIPATH_IBCC_LINKINITCMD_DISABLE);
+ ipath_cancel_sends(dd, 0);
+
+ /*
+ * we are shutting down, so tell components that care. We don't do
+ * this on just a link state change, much like ethernet, a cable
+ * unplug, etc. doesn't change driver state
+ */
+ signal_ib_event(dd, IB_EVENT_PORT_ERR);
+
+ /* disable IBC */
+ dd->ipath_control &= ~INFINIPATH_C_LINKENABLE;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
+ dd->ipath_control | INFINIPATH_C_FREEZEMODE);
+
+ /*
+ * clear SerdesEnable and turn the leds off; do this here because
+ * we are unloading, so don't count on interrupts to move along
+ * Turn the LEDs off explictly for the same reason.
+ */
+ dd->ipath_f_quiet_serdes(dd);
+
+ /* stop all the timers that might still be running */
+ del_timer_sync(&dd->ipath_hol_timer);
+ if (dd->ipath_stats_timer_active) {
+ del_timer_sync(&dd->ipath_stats_timer);
+ dd->ipath_stats_timer_active = 0;
+ }
+ if (dd->ipath_intrchk_timer.data) {
+ del_timer_sync(&dd->ipath_intrchk_timer);
+ dd->ipath_intrchk_timer.data = 0;
+ }
+ if (atomic_read(&dd->ipath_led_override_timer_active)) {
+ del_timer_sync(&dd->ipath_led_override_timer);
+ atomic_set(&dd->ipath_led_override_timer_active, 0);
+ }
+
+ /*
+ * clear all interrupts and errors, so that the next time the driver
+ * is loaded or device is enabled, we know that whatever is set
+ * happened while we were unloaded
+ */
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear,
+ ~0ULL & ~INFINIPATH_HWE_MEMBISTFAILED);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, -1LL);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, -1LL);
+
+ ipath_cdbg(VERBOSE, "Flush time and errors to EEPROM\n");
+ ipath_update_eeprom_log(dd);
+}
+
+/**
+ * ipath_free_pddata - free a port's allocated data
+ * @dd: the infinipath device
+ * @pd: the portdata structure
+ *
+ * free up any allocated data for a port
+ * This should not touch anything that would affect a simultaneous
+ * re-allocation of port data, because it is called after ipath_mutex
+ * is released (and can be called from reinit as well).
+ * It should never change any chip state, or global driver state.
+ * (The only exception to global state is freeing the port0 port0_skbs.)
+ */
+void ipath_free_pddata(struct ipath_devdata *dd, struct ipath_portdata *pd)
+{
+ if (!pd)
+ return;
+
+ if (pd->port_rcvhdrq) {
+ ipath_cdbg(VERBOSE, "free closed port %d rcvhdrq @ %p "
+ "(size=%lu)\n", pd->port_port, pd->port_rcvhdrq,
+ (unsigned long) pd->port_rcvhdrq_size);
+ dma_free_coherent(&dd->pcidev->dev, pd->port_rcvhdrq_size,
+ pd->port_rcvhdrq, pd->port_rcvhdrq_phys);
+ pd->port_rcvhdrq = NULL;
+ if (pd->port_rcvhdrtail_kvaddr) {
+ dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
+ pd->port_rcvhdrtail_kvaddr,
+ pd->port_rcvhdrqtailaddr_phys);
+ pd->port_rcvhdrtail_kvaddr = NULL;
+ }
+ }
+ if (pd->port_port && pd->port_rcvegrbuf) {
+ unsigned e;
+
+ for (e = 0; e < pd->port_rcvegrbuf_chunks; e++) {
+ void *base = pd->port_rcvegrbuf[e];
+ size_t size = pd->port_rcvegrbuf_size;
+
+ ipath_cdbg(VERBOSE, "egrbuf free(%p, %lu), "
+ "chunk %u/%u\n", base,
+ (unsigned long) size,
+ e, pd->port_rcvegrbuf_chunks);
+ dma_free_coherent(&dd->pcidev->dev, size,
+ base, pd->port_rcvegrbuf_phys[e]);
+ }
+ kfree(pd->port_rcvegrbuf);
+ pd->port_rcvegrbuf = NULL;
+ kfree(pd->port_rcvegrbuf_phys);
+ pd->port_rcvegrbuf_phys = NULL;
+ pd->port_rcvegrbuf_chunks = 0;
+ } else if (pd->port_port == 0 && dd->ipath_port0_skbinfo) {
+ unsigned e;
+ struct ipath_skbinfo *skbinfo = dd->ipath_port0_skbinfo;
+
+ dd->ipath_port0_skbinfo = NULL;
+ ipath_cdbg(VERBOSE, "free closed port %d "
+ "ipath_port0_skbinfo @ %p\n", pd->port_port,
+ skbinfo);
+ for (e = 0; e < dd->ipath_p0_rcvegrcnt; e++)
+ if (skbinfo[e].skb) {
+ pci_unmap_single(dd->pcidev, skbinfo[e].phys,
+ dd->ipath_ibmaxlen,
+ PCI_DMA_FROMDEVICE);
+ dev_kfree_skb(skbinfo[e].skb);
+ }
+ vfree(skbinfo);
+ }
+ kfree(pd->port_tid_pg_list);
+ vfree(pd->subport_uregbase);
+ vfree(pd->subport_rcvegrbuf);
+ vfree(pd->subport_rcvhdr_base);
+ kfree(pd);
+}
+
+static int __init infinipath_init(void)
+{
+ int ret;
+
+ if (ipath_debug & __IPATH_DBG)
+ printk(KERN_INFO DRIVER_LOAD_MSG "%s", ib_ipath_version);
+
+ /*
+ * These must be called before the driver is registered with
+ * the PCI subsystem.
+ */
+ idr_init(&unit_table);
+ if (!idr_pre_get(&unit_table, GFP_KERNEL)) {
+ printk(KERN_ERR IPATH_DRV_NAME ": idr_pre_get() failed\n");
+ ret = -ENOMEM;
+ goto bail;
+ }
+
+ ret = pci_register_driver(&ipath_driver);
+ if (ret < 0) {
+ printk(KERN_ERR IPATH_DRV_NAME
+ ": Unable to register driver: error %d\n", -ret);
+ goto bail_unit;
+ }
+
+ ret = ipath_init_ipathfs();
+ if (ret < 0) {
+ printk(KERN_ERR IPATH_DRV_NAME ": Unable to create "
+ "ipathfs: error %d\n", -ret);
+ goto bail_pci;
+ }
+
+ goto bail;
+
+bail_pci:
+ pci_unregister_driver(&ipath_driver);
+
+bail_unit:
+ idr_destroy(&unit_table);
+
+bail:
+ return ret;
+}
+
+static void __exit infinipath_cleanup(void)
+{
+ ipath_exit_ipathfs();
+
+ ipath_cdbg(VERBOSE, "Unregistering pci driver\n");
+ pci_unregister_driver(&ipath_driver);
+
+ idr_destroy(&unit_table);
+}
+
+/**
+ * ipath_reset_device - reset the chip if possible
+ * @unit: the device to reset
+ *
+ * Whether or not reset is successful, we attempt to re-initialize the chip
+ * (that is, much like a driver unload/reload). We clear the INITTED flag
+ * so that the various entry points will fail until we reinitialize. For
+ * now, we only allow this if no user ports are open that use chip resources
+ */
+int ipath_reset_device(int unit)
+{
+ int ret, i;
+ struct ipath_devdata *dd = ipath_lookup(unit);
+
+ if (!dd) {
+ ret = -ENODEV;
+ goto bail;
+ }
+
+ if (atomic_read(&dd->ipath_led_override_timer_active)) {
+ /* Need to stop LED timer, _then_ shut off LEDs */
+ del_timer_sync(&dd->ipath_led_override_timer);
+ atomic_set(&dd->ipath_led_override_timer_active, 0);
+ }
+
+ /* Shut off LEDs after we are sure timer is not running */
+ dd->ipath_led_override = LED_OVER_BOTH_OFF;
+ dd->ipath_f_setextled(dd, 0, 0);
+
+ dev_info(&dd->pcidev->dev, "Reset on unit %u requested\n", unit);
+
+ if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT)) {
+ dev_info(&dd->pcidev->dev, "Invalid unit number %u or "
+ "not initialized or not present\n", unit);
+ ret = -ENXIO;
+ goto bail;
+ }
+
+ if (dd->ipath_pd)
+ for (i = 1; i < dd->ipath_cfgports; i++) {
+ if (dd->ipath_pd[i] && dd->ipath_pd[i]->port_cnt) {
+ ipath_dbg("unit %u port %d is in use "
+ "(PID %u cmd %s), can't reset\n",
+ unit, i,
+ pid_nr(dd->ipath_pd[i]->port_pid),
+ dd->ipath_pd[i]->port_comm);
+ ret = -EBUSY;
+ goto bail;
+ }
+ }
+
+ if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
+ teardown_sdma(dd);
+
+ dd->ipath_flags &= ~IPATH_INITTED;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL);
+ ret = dd->ipath_f_reset(dd);
+ if (ret == 1) {
+ ipath_dbg("Reinitializing unit %u after reset attempt\n",
+ unit);
+ ret = ipath_init_chip(dd, 1);
+ } else
+ ret = -EAGAIN;
+ if (ret)
+ ipath_dev_err(dd, "Reinitialize unit %u after "
+ "reset failed with %d\n", unit, ret);
+ else
+ dev_info(&dd->pcidev->dev, "Reinitialized unit %u after "
+ "resetting\n", unit);
+
+bail:
+ return ret;
+}
+
+/*
+ * send a signal to all the processes that have the driver open
+ * through the normal interfaces (i.e., everything other than diags
+ * interface). Returns number of signalled processes.
+ */
+static int ipath_signal_procs(struct ipath_devdata *dd, int sig)
+{
+ int i, sub, any = 0;
+ struct pid *pid;
+
+ if (!dd->ipath_pd)
+ return 0;
+ for (i = 1; i < dd->ipath_cfgports; i++) {
+ if (!dd->ipath_pd[i] || !dd->ipath_pd[i]->port_cnt)
+ continue;
+ pid = dd->ipath_pd[i]->port_pid;
+ if (!pid)
+ continue;
+
+ dev_info(&dd->pcidev->dev, "context %d in use "
+ "(PID %u), sending signal %d\n",
+ i, pid_nr(pid), sig);
+ kill_pid(pid, sig, 1);
+ any++;
+ for (sub = 0; sub < INFINIPATH_MAX_SUBPORT; sub++) {
+ pid = dd->ipath_pd[i]->port_subpid[sub];
+ if (!pid)
+ continue;
+ dev_info(&dd->pcidev->dev, "sub-context "
+ "%d:%d in use (PID %u), sending "
+ "signal %d\n", i, sub, pid_nr(pid), sig);
+ kill_pid(pid, sig, 1);
+ any++;
+ }
+ }
+ return any;
+}
+
+static void ipath_hol_signal_down(struct ipath_devdata *dd)
+{
+ if (ipath_signal_procs(dd, SIGSTOP))
+ ipath_dbg("Stopped some processes\n");
+ ipath_cancel_sends(dd, 1);
+}
+
+
+static void ipath_hol_signal_up(struct ipath_devdata *dd)
+{
+ if (ipath_signal_procs(dd, SIGCONT))
+ ipath_dbg("Continued some processes\n");
+}
+
+/*
+ * link is down, stop any users processes, and flush pending sends
+ * to prevent HoL blocking, then start the HoL timer that
+ * periodically continues, then stop procs, so they can detect
+ * link down if they want, and do something about it.
+ * Timer may already be running, so use __mod_timer, not add_timer.
+ */
+void ipath_hol_down(struct ipath_devdata *dd)
+{
+ dd->ipath_hol_state = IPATH_HOL_DOWN;
+ ipath_hol_signal_down(dd);
+ dd->ipath_hol_next = IPATH_HOL_DOWNCONT;
+ dd->ipath_hol_timer.expires = jiffies +
+ msecs_to_jiffies(ipath_hol_timeout_ms);
+ __mod_timer(&dd->ipath_hol_timer, dd->ipath_hol_timer.expires);
+}
+
+/*
+ * link is up, continue any user processes, and ensure timer
+ * is a nop, if running. Let timer keep running, if set; it
+ * will nop when it sees the link is up
+ */
+void ipath_hol_up(struct ipath_devdata *dd)
+{
+ ipath_hol_signal_up(dd);
+ dd->ipath_hol_state = IPATH_HOL_UP;
+}
+
+/*
+ * toggle the running/not running state of user proceses
+ * to prevent HoL blocking on chip resources, but still allow
+ * user processes to do link down special case handling.
+ * Should only be called via the timer
+ */
+void ipath_hol_event(unsigned long opaque)
+{
+ struct ipath_devdata *dd = (struct ipath_devdata *)opaque;
+
+ if (dd->ipath_hol_next == IPATH_HOL_DOWNSTOP
+ && dd->ipath_hol_state != IPATH_HOL_UP) {
+ dd->ipath_hol_next = IPATH_HOL_DOWNCONT;
+ ipath_dbg("Stopping processes\n");
+ ipath_hol_signal_down(dd);
+ } else { /* may do "extra" if also in ipath_hol_up() */
+ dd->ipath_hol_next = IPATH_HOL_DOWNSTOP;
+ ipath_dbg("Continuing processes\n");
+ ipath_hol_signal_up(dd);
+ }
+ if (dd->ipath_hol_state == IPATH_HOL_UP)
+ ipath_dbg("link's up, don't resched timer\n");
+ else {
+ dd->ipath_hol_timer.expires = jiffies +
+ msecs_to_jiffies(ipath_hol_timeout_ms);
+ __mod_timer(&dd->ipath_hol_timer,
+ dd->ipath_hol_timer.expires);
+ }
+}
+
+int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv)
+{
+ u64 val;
+
+ if (new_pol_inv > INFINIPATH_XGXS_RX_POL_MASK)
+ return -1;
+ if (dd->ipath_rx_pol_inv != new_pol_inv) {
+ dd->ipath_rx_pol_inv = new_pol_inv;
+ val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig);
+ val &= ~(INFINIPATH_XGXS_RX_POL_MASK <<
+ INFINIPATH_XGXS_RX_POL_SHIFT);
+ val |= ((u64)dd->ipath_rx_pol_inv) <<
+ INFINIPATH_XGXS_RX_POL_SHIFT;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
+ }
+ return 0;
+}
+
+/*
+ * Disable and enable the armlaunch error. Used for PIO bandwidth testing on
+ * the 7220, which is count-based, rather than trigger-based. Safe for the
+ * driver check, since it's at init. Not completely safe when used for
+ * user-mode checking, since some error checking can be lost, but not
+ * particularly risky, and only has problematic side-effects in the face of
+ * very buggy user code. There is no reference counting, but that's also
+ * fine, given the intended use.
+ */
+void ipath_enable_armlaunch(struct ipath_devdata *dd)
+{
+ dd->ipath_lasterror &= ~INFINIPATH_E_SPIOARMLAUNCH;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear,
+ INFINIPATH_E_SPIOARMLAUNCH);
+ dd->ipath_errormask |= INFINIPATH_E_SPIOARMLAUNCH;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
+ dd->ipath_errormask);
+}
+
+void ipath_disable_armlaunch(struct ipath_devdata *dd)
+{
+ /* so don't re-enable if already set */
+ dd->ipath_maskederrs &= ~INFINIPATH_E_SPIOARMLAUNCH;
+ dd->ipath_errormask &= ~INFINIPATH_E_SPIOARMLAUNCH;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
+ dd->ipath_errormask);
+}
+
+module_init(infinipath_init);
+module_exit(infinipath_cleanup);
diff --git a/drivers/infiniband/hw/ipath/ipath_eeprom.c b/drivers/infiniband/hw/ipath/ipath_eeprom.c
new file mode 100644
index 0000000..dc37277
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_eeprom.c
@@ -0,0 +1,1183 @@
+/*
+ * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/delay.h>
+#include <linux/pci.h>
+#include <linux/vmalloc.h>
+
+#include "ipath_kernel.h"
+
+/*
+ * InfiniPath I2C driver for a serial eeprom. This is not a generic
+ * I2C interface. For a start, the device we're using (Atmel AT24C11)
+ * doesn't work like a regular I2C device. It looks like one
+ * electrically, but not logically. Normal I2C devices have a single
+ * 7-bit or 10-bit I2C address that they respond to. Valid 7-bit
+ * addresses range from 0x03 to 0x77. Addresses 0x00 to 0x02 and 0x78
+ * to 0x7F are special reserved addresses (e.g. 0x00 is the "general
+ * call" address.) The Atmel device, on the other hand, responds to ALL
+ * 7-bit addresses. It's designed to be the only device on a given I2C
+ * bus. A 7-bit address corresponds to the memory address within the
+ * Atmel device itself.
+ *
+ * Also, the timing requirements mean more than simple software
+ * bitbanging, with readbacks from chip to ensure timing (simple udelay
+ * is not enough).
+ *
+ * This all means that accessing the device is specialized enough
+ * that using the standard kernel I2C bitbanging interface would be
+ * impossible. For example, the core I2C eeprom driver expects to find
+ * a device at one or more of a limited set of addresses only. It doesn't
+ * allow writing to an eeprom. It also doesn't provide any means of
+ * accessing eeprom contents from within the kernel, only via sysfs.
+ */
+
+/* Added functionality for IBA7220-based cards */
+#define IPATH_EEPROM_DEV_V1 0xA0
+#define IPATH_EEPROM_DEV_V2 0xA2
+#define IPATH_TEMP_DEV 0x98
+#define IPATH_BAD_DEV (IPATH_EEPROM_DEV_V2+2)
+#define IPATH_NO_DEV (0xFF)
+
+/*
+ * The number of I2C chains is proliferating. Table below brings
+ * some order to the madness. The basic principle is that the
+ * table is scanned from the top, and a "probe" is made to the
+ * device probe_dev. If that succeeds, the chain is considered
+ * to be of that type, and dd->i2c_chain_type is set to the index+1
+ * of the entry.
+ * The +1 is so static initialization can mean "unknown, do probe."
+ */
+static struct i2c_chain_desc {
+ u8 probe_dev; /* If seen at probe, chain is this type */
+ u8 eeprom_dev; /* Dev addr (if any) for EEPROM */
+ u8 temp_dev; /* Dev Addr (if any) for Temp-sense */
+} i2c_chains[] = {
+ { IPATH_BAD_DEV, IPATH_NO_DEV, IPATH_NO_DEV }, /* pre-iba7220 bds */
+ { IPATH_EEPROM_DEV_V1, IPATH_EEPROM_DEV_V1, IPATH_TEMP_DEV}, /* V1 */
+ { IPATH_EEPROM_DEV_V2, IPATH_EEPROM_DEV_V2, IPATH_TEMP_DEV}, /* V2 */
+ { IPATH_NO_DEV }
+};
+
+enum i2c_type {
+ i2c_line_scl = 0,
+ i2c_line_sda
+};
+
+enum i2c_state {
+ i2c_line_low = 0,
+ i2c_line_high
+};
+
+#define READ_CMD 1
+#define WRITE_CMD 0
+
+/**
+ * i2c_gpio_set - set a GPIO line
+ * @dd: the infinipath device
+ * @line: the line to set
+ * @new_line_state: the state to set
+ *
+ * Returns 0 if the line was set to the new state successfully, non-zero
+ * on error.
+ */
+static int i2c_gpio_set(struct ipath_devdata *dd,
+ enum i2c_type line,
+ enum i2c_state new_line_state)
+{
+ u64 out_mask, dir_mask, *gpioval;
+ unsigned long flags = 0;
+
+ gpioval = &dd->ipath_gpio_out;
+
+ if (line == i2c_line_scl) {
+ dir_mask = dd->ipath_gpio_scl;
+ out_mask = (1UL << dd->ipath_gpio_scl_num);
+ } else {
+ dir_mask = dd->ipath_gpio_sda;
+ out_mask = (1UL << dd->ipath_gpio_sda_num);
+ }
+
+ spin_lock_irqsave(&dd->ipath_gpio_lock, flags);
+ if (new_line_state == i2c_line_high) {
+ /* tri-state the output rather than force high */
+ dd->ipath_extctrl &= ~dir_mask;
+ } else {
+ /* config line to be an output */
+ dd->ipath_extctrl |= dir_mask;
+ }
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, dd->ipath_extctrl);
+
+ /* set output as well (no real verify) */
+ if (new_line_state == i2c_line_high)
+ *gpioval |= out_mask;
+ else
+ *gpioval &= ~out_mask;
+
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_out, *gpioval);
+ spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags);
+
+ return 0;
+}
+
+/**
+ * i2c_gpio_get - get a GPIO line state
+ * @dd: the infinipath device
+ * @line: the line to get
+ * @curr_statep: where to put the line state
+ *
+ * Returns 0 if the line was set to the new state successfully, non-zero
+ * on error. curr_state is not set on error.
+ */
+static int i2c_gpio_get(struct ipath_devdata *dd,
+ enum i2c_type line,
+ enum i2c_state *curr_statep)
+{
+ u64 read_val, mask;
+ int ret;
+ unsigned long flags = 0;
+
+ /* check args */
+ if (curr_statep == NULL) {
+ ret = 1;
+ goto bail;
+ }
+
+ /* config line to be an input */
+ if (line == i2c_line_scl)
+ mask = dd->ipath_gpio_scl;
+ else
+ mask = dd->ipath_gpio_sda;
+
+ spin_lock_irqsave(&dd->ipath_gpio_lock, flags);
+ dd->ipath_extctrl &= ~mask;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, dd->ipath_extctrl);
+ /*
+ * Below is very unlikely to reflect true input state if Output
+ * Enable actually changed.
+ */
+ read_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extstatus);
+ spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags);
+
+ if (read_val & mask)
+ *curr_statep = i2c_line_high;
+ else
+ *curr_statep = i2c_line_low;
+
+ ret = 0;
+
+bail:
+ return ret;
+}
+
+/**
+ * i2c_wait_for_writes - wait for a write
+ * @dd: the infinipath device
+ *
+ * We use this instead of udelay directly, so we can make sure
+ * that previous register writes have been flushed all the way
+ * to the chip. Since we are delaying anyway, the cost doesn't
+ * hurt, and makes the bit twiddling more regular
+ */
+static void i2c_wait_for_writes(struct ipath_devdata *dd)
+{
+ (void)ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch);
+ rmb();
+}
+
+static void scl_out(struct ipath_devdata *dd, u8 bit)
+{
+ udelay(1);
+ i2c_gpio_set(dd, i2c_line_scl, bit ? i2c_line_high : i2c_line_low);
+
+ i2c_wait_for_writes(dd);
+}
+
+static void sda_out(struct ipath_devdata *dd, u8 bit)
+{
+ i2c_gpio_set(dd, i2c_line_sda, bit ? i2c_line_high : i2c_line_low);
+
+ i2c_wait_for_writes(dd);
+}
+
+static u8 sda_in(struct ipath_devdata *dd, int wait)
+{
+ enum i2c_state bit;
+
+ if (i2c_gpio_get(dd, i2c_line_sda, &bit))
+ ipath_dbg("get bit failed!\n");
+
+ if (wait)
+ i2c_wait_for_writes(dd);
+
+ return bit == i2c_line_high ? 1U : 0;
+}
+
+/**
+ * i2c_ackrcv - see if ack following write is true
+ * @dd: the infinipath device
+ */
+static int i2c_ackrcv(struct ipath_devdata *dd)
+{
+ u8 ack_received;
+
+ /* AT ENTRY SCL = LOW */
+ /* change direction, ignore data */
+ ack_received = sda_in(dd, 1);
+ scl_out(dd, i2c_line_high);
+ ack_received = sda_in(dd, 1) == 0;
+ scl_out(dd, i2c_line_low);
+ return ack_received;
+}
+
+/**
+ * rd_byte - read a byte, leaving ACK, STOP, etc up to caller
+ * @dd: the infinipath device
+ *
+ * Returns byte shifted out of device
+ */
+static int rd_byte(struct ipath_devdata *dd)
+{
+ int bit_cntr, data;
+
+ data = 0;
+
+ for (bit_cntr = 7; bit_cntr >= 0; --bit_cntr) {
+ data <<= 1;
+ scl_out(dd, i2c_line_high);
+ data |= sda_in(dd, 0);
+ scl_out(dd, i2c_line_low);
+ }
+ return data;
+}
+
+/**
+ * wr_byte - write a byte, one bit at a time
+ * @dd: the infinipath device
+ * @data: the byte to write
+ *
+ * Returns 0 if we got the following ack, otherwise 1
+ */
+static int wr_byte(struct ipath_devdata *dd, u8 data)
+{
+ int bit_cntr;
+ u8 bit;
+
+ for (bit_cntr = 7; bit_cntr >= 0; bit_cntr--) {
+ bit = (data >> bit_cntr) & 1;
+ sda_out(dd, bit);
+ scl_out(dd, i2c_line_high);
+ scl_out(dd, i2c_line_low);
+ }
+ return (!i2c_ackrcv(dd)) ? 1 : 0;
+}
+
+static void send_ack(struct ipath_devdata *dd)
+{
+ sda_out(dd, i2c_line_low);
+ scl_out(dd, i2c_line_high);
+ scl_out(dd, i2c_line_low);
+ sda_out(dd, i2c_line_high);
+}
+
+/**
+ * i2c_startcmd - transmit the start condition, followed by address/cmd
+ * @dd: the infinipath device
+ * @offset_dir: direction byte
+ *
+ * (both clock/data high, clock high, data low while clock is high)
+ */
+static int i2c_startcmd(struct ipath_devdata *dd, u8 offset_dir)
+{
+ int res;
+
+ /* issue start sequence */
+ sda_out(dd, i2c_line_high);
+ scl_out(dd, i2c_line_high);
+ sda_out(dd, i2c_line_low);
+ scl_out(dd, i2c_line_low);
+
+ /* issue length and direction byte */
+ res = wr_byte(dd, offset_dir);
+
+ if (res)
+ ipath_cdbg(VERBOSE, "No ack to complete start\n");
+
+ return res;
+}
+
+/**
+ * stop_cmd - transmit the stop condition
+ * @dd: the infinipath device
+ *
+ * (both clock/data low, clock high, data high while clock is high)
+ */
+static void stop_cmd(struct ipath_devdata *dd)
+{
+ scl_out(dd, i2c_line_low);
+ sda_out(dd, i2c_line_low);
+ scl_out(dd, i2c_line_high);
+ sda_out(dd, i2c_line_high);
+ udelay(2);
+}
+
+/**
+ * eeprom_reset - reset I2C communication
+ * @dd: the infinipath device
+ */
+
+static int eeprom_reset(struct ipath_devdata *dd)
+{
+ int clock_cycles_left = 9;
+ u64 *gpioval = &dd->ipath_gpio_out;
+ int ret;
+ unsigned long flags;
+
+ spin_lock_irqsave(&dd->ipath_gpio_lock, flags);
+ /* Make sure shadows are consistent */
+ dd->ipath_extctrl = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extctrl);
+ *gpioval = ipath_read_kreg64(dd, dd->ipath_kregs->kr_gpio_out);
+ spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags);
+
+ ipath_cdbg(VERBOSE, "Resetting i2c eeprom; initial gpioout reg "
+ "is %llx\n", (unsigned long long) *gpioval);
+
+ /*
+ * This is to get the i2c into a known state, by first going low,
+ * then tristate sda (and then tristate scl as first thing
+ * in loop)
+ */
+ scl_out(dd, i2c_line_low);
+ sda_out(dd, i2c_line_high);
+
+ /* Clock up to 9 cycles looking for SDA hi, then issue START and STOP */
+ while (clock_cycles_left--) {
+ scl_out(dd, i2c_line_high);
+
+ /* SDA seen high, issue START by dropping it while SCL high */
+ if (sda_in(dd, 0)) {
+ sda_out(dd, i2c_line_low);
+ scl_out(dd, i2c_line_low);
+ /* ATMEL spec says must be followed by STOP. */
+ scl_out(dd, i2c_line_high);
+ sda_out(dd, i2c_line_high);
+ ret = 0;
+ goto bail;
+ }
+
+ scl_out(dd, i2c_line_low);
+ }
+
+ ret = 1;
+
+bail:
+ return ret;
+}
+
+/*
+ * Probe for I2C device at specified address. Returns 0 for "success"
+ * to match rest of this file.
+ * Leave bus in "reasonable" state for further commands.
+ */
+static int i2c_probe(struct ipath_devdata *dd, int devaddr)
+{
+ int ret = 0;
+
+ ret = eeprom_reset(dd);
+ if (ret) {
+ ipath_dev_err(dd, "Failed reset probing device 0x%02X\n",
+ devaddr);
+ return ret;
+ }
+ /*
+ * Reset no longer leaves bus in start condition, so normal
+ * i2c_startcmd() will do.
+ */
+ ret = i2c_startcmd(dd, devaddr | READ_CMD);
+ if (ret)
+ ipath_cdbg(VERBOSE, "Failed startcmd for device 0x%02X\n",
+ devaddr);
+ else {
+ /*
+ * Device did respond. Complete a single-byte read, because some
+ * devices apparently cannot handle STOP immediately after they
+ * ACK the start-cmd.
+ */
+ int data;
+ data = rd_byte(dd);
+ stop_cmd(dd);
+ ipath_cdbg(VERBOSE, "Response from device 0x%02X\n", devaddr);
+ }
+ return ret;
+}
+
+/*
+ * Returns the "i2c type". This is a pointer to a struct that describes
+ * the I2C chain on this board. To minimize impact on struct ipath_devdata,
+ * the (small integer) index into the table is actually memoized, rather
+ * then the pointer.
+ * Memoization is because the type is determined on the first call per chip.
+ * An alternative would be to move type determination to early
+ * init code.
+ */
+static struct i2c_chain_desc *ipath_i2c_type(struct ipath_devdata *dd)
+{
+ int idx;
+
+ /* Get memoized index, from previous successful probes */
+ idx = dd->ipath_i2c_chain_type - 1;
+ if (idx >= 0 && idx < (ARRAY_SIZE(i2c_chains) - 1))
+ goto done;
+
+ idx = 0;
+ while (i2c_chains[idx].probe_dev != IPATH_NO_DEV) {
+ /* if probe succeeds, this is type */
+ if (!i2c_probe(dd, i2c_chains[idx].probe_dev))
+ break;
+ ++idx;
+ }
+
+ /*
+ * Old EEPROM (first entry) may require a reset after probe,
+ * rather than being able to "start" after "stop"
+ */
+ if (idx == 0)
+ eeprom_reset(dd);
+
+ if (i2c_chains[idx].probe_dev == IPATH_NO_DEV)
+ idx = -1;
+ else
+ dd->ipath_i2c_chain_type = idx + 1;
+done:
+ return (idx >= 0) ? i2c_chains + idx : NULL;
+}
+
+static int ipath_eeprom_internal_read(struct ipath_devdata *dd,
+ u8 eeprom_offset, void *buffer, int len)
+{
+ int ret;
+ struct i2c_chain_desc *icd;
+ u8 *bp = buffer;
+
+ ret = 1;
+ icd = ipath_i2c_type(dd);
+ if (!icd)
+ goto bail;
+
+ if (icd->eeprom_dev == IPATH_NO_DEV) {
+ /* legacy not-really-I2C */
+ ipath_cdbg(VERBOSE, "Start command only address\n");
+ eeprom_offset = (eeprom_offset << 1) | READ_CMD;
+ ret = i2c_startcmd(dd, eeprom_offset);
+ } else {
+ /* Actual I2C */
+ ipath_cdbg(VERBOSE, "Start command uses devaddr\n");
+ if (i2c_startcmd(dd, icd->eeprom_dev | WRITE_CMD)) {
+ ipath_dbg("Failed EEPROM startcmd\n");
+ stop_cmd(dd);
+ ret = 1;
+ goto bail;
+ }
+ ret = wr_byte(dd, eeprom_offset);
+ stop_cmd(dd);
+ if (ret) {
+ ipath_dev_err(dd, "Failed to write EEPROM address\n");
+ ret = 1;
+ goto bail;
+ }
+ ret = i2c_startcmd(dd, icd->eeprom_dev | READ_CMD);
+ }
+ if (ret) {
+ ipath_dbg("Failed startcmd for dev %02X\n", icd->eeprom_dev);
+ stop_cmd(dd);
+ ret = 1;
+ goto bail;
+ }
+
+ /*
+ * eeprom keeps clocking data out as long as we ack, automatically
+ * incrementing the address.
+ */
+ while (len-- > 0) {
+ /* get and store data */
+ *bp++ = rd_byte(dd);
+ /* send ack if not the last byte */
+ if (len)
+ send_ack(dd);
+ }
+
+ stop_cmd(dd);
+
+ ret = 0;
+
+bail:
+ return ret;
+}
+
+static int ipath_eeprom_internal_write(struct ipath_devdata *dd, u8 eeprom_offset,
+ const void *buffer, int len)
+{
+ int sub_len;
+ const u8 *bp = buffer;
+ int max_wait_time, i;
+ int ret;
+ struct i2c_chain_desc *icd;
+
+ ret = 1;
+ icd = ipath_i2c_type(dd);
+ if (!icd)
+ goto bail;
+
+ while (len > 0) {
+ if (icd->eeprom_dev == IPATH_NO_DEV) {
+ if (i2c_startcmd(dd,
+ (eeprom_offset << 1) | WRITE_CMD)) {
+ ipath_dbg("Failed to start cmd offset %u\n",
+ eeprom_offset);
+ goto failed_write;
+ }
+ } else {
+ /* Real I2C */
+ if (i2c_startcmd(dd, icd->eeprom_dev | WRITE_CMD)) {
+ ipath_dbg("Failed EEPROM startcmd\n");
+ goto failed_write;
+ }
+ ret = wr_byte(dd, eeprom_offset);
+ if (ret) {
+ ipath_dev_err(dd, "Failed to write EEPROM "
+ "address\n");
+ goto failed_write;
+ }
+ }
+
+ sub_len = min(len, 4);
+ eeprom_offset += sub_len;
+ len -= sub_len;
+
+ for (i = 0; i < sub_len; i++) {
+ if (wr_byte(dd, *bp++)) {
+ ipath_dbg("no ack after byte %u/%u (%u "
+ "total remain)\n", i, sub_len,
+ len + sub_len - i);
+ goto failed_write;
+ }
+ }
+
+ stop_cmd(dd);
+
+ /*
+ * wait for write complete by waiting for a successful
+ * read (the chip replies with a zero after the write
+ * cmd completes, and before it writes to the eeprom.
+ * The startcmd for the read will fail the ack until
+ * the writes have completed. We do this inline to avoid
+ * the debug prints that are in the real read routine
+ * if the startcmd fails.
+ * We also use the proper device address, so it doesn't matter
+ * whether we have real eeprom_dev. legacy likes any address.
+ */
+ max_wait_time = 100;
+ while (i2c_startcmd(dd, icd->eeprom_dev | READ_CMD)) {
+ stop_cmd(dd);
+ if (!--max_wait_time) {
+ ipath_dbg("Did not get successful read to "
+ "complete write\n");
+ goto failed_write;
+ }
+ }
+ /* now read (and ignore) the resulting byte */
+ rd_byte(dd);
+ stop_cmd(dd);
+ }
+
+ ret = 0;
+ goto bail;
+
+failed_write:
+ stop_cmd(dd);
+ ret = 1;
+
+bail:
+ return ret;
+}
+
+/**
+ * ipath_eeprom_read - receives bytes from the eeprom via I2C
+ * @dd: the infinipath device
+ * @eeprom_offset: address to read from
+ * @buffer: where to store result
+ * @len: number of bytes to receive
+ */
+int ipath_eeprom_read(struct ipath_devdata *dd, u8 eeprom_offset,
+ void *buff, int len)
+{
+ int ret;
+
+ ret = mutex_lock_interruptible(&dd->ipath_eep_lock);
+ if (!ret) {
+ ret = ipath_eeprom_internal_read(dd, eeprom_offset, buff, len);
+ mutex_unlock(&dd->ipath_eep_lock);
+ }
+
+ return ret;
+}
+
+/**
+ * ipath_eeprom_write - writes data to the eeprom via I2C
+ * @dd: the infinipath device
+ * @eeprom_offset: where to place data
+ * @buffer: data to write
+ * @len: number of bytes to write
+ */
+int ipath_eeprom_write(struct ipath_devdata *dd, u8 eeprom_offset,
+ const void *buff, int len)
+{
+ int ret;
+
+ ret = mutex_lock_interruptible(&dd->ipath_eep_lock);
+ if (!ret) {
+ ret = ipath_eeprom_internal_write(dd, eeprom_offset, buff, len);
+ mutex_unlock(&dd->ipath_eep_lock);
+ }
+
+ return ret;
+}
+
+static u8 flash_csum(struct ipath_flash *ifp, int adjust)
+{
+ u8 *ip = (u8 *) ifp;
+ u8 csum = 0, len;
+
+ /*
+ * Limit length checksummed to max length of actual data.
+ * Checksum of erased eeprom will still be bad, but we avoid
+ * reading past the end of the buffer we were passed.
+ */
+ len = ifp->if_length;
+ if (len > sizeof(struct ipath_flash))
+ len = sizeof(struct ipath_flash);
+ while (len--)
+ csum += *ip++;
+ csum -= ifp->if_csum;
+ csum = ~csum;
+ if (adjust)
+ ifp->if_csum = csum;
+
+ return csum;
+}
+
+/**
+ * ipath_get_guid - get the GUID from the i2c device
+ * @dd: the infinipath device
+ *
+ * We have the capability to use the ipath_nguid field, and get
+ * the guid from the first chip's flash, to use for all of them.
+ */
+void ipath_get_eeprom_info(struct ipath_devdata *dd)
+{
+ void *buf;
+ struct ipath_flash *ifp;
+ __be64 guid;
+ int len, eep_stat;
+ u8 csum, *bguid;
+ int t = dd->ipath_unit;
+ struct ipath_devdata *dd0 = ipath_lookup(0);
+
+ if (t && dd0->ipath_nguid > 1 && t <= dd0->ipath_nguid) {
+ u8 oguid;
+ dd->ipath_guid = dd0->ipath_guid;
+ bguid = (u8 *) & dd->ipath_guid;
+
+ oguid = bguid[7];
+ bguid[7] += t;
+ if (oguid > bguid[7]) {
+ if (bguid[6] == 0xff) {
+ if (bguid[5] == 0xff) {
+ ipath_dev_err(
+ dd,
+ "Can't set %s GUID from "
+ "base, wraps to OUI!\n",
+ ipath_get_unit_name(t));
+ dd->ipath_guid = 0;
+ goto bail;
+ }
+ bguid[5]++;
+ }
+ bguid[6]++;
+ }
+ dd->ipath_nguid = 1;
+
+ ipath_dbg("nguid %u, so adding %u to device 0 guid, "
+ "for %llx\n",
+ dd0->ipath_nguid, t,
+ (unsigned long long) be64_to_cpu(dd->ipath_guid));
+ goto bail;
+ }
+
+ /*
+ * read full flash, not just currently used part, since it may have
+ * been written with a newer definition
+ * */
+ len = sizeof(struct ipath_flash);
+ buf = vmalloc(len);
+ if (!buf) {
+ ipath_dev_err(dd, "Couldn't allocate memory to read %u "
+ "bytes from eeprom for GUID\n", len);
+ goto bail;
+ }
+
+ mutex_lock(&dd->ipath_eep_lock);
+ eep_stat = ipath_eeprom_internal_read(dd, 0, buf, len);
+ mutex_unlock(&dd->ipath_eep_lock);
+
+ if (eep_stat) {
+ ipath_dev_err(dd, "Failed reading GUID from eeprom\n");
+ goto done;
+ }
+ ifp = (struct ipath_flash *)buf;
+
+ csum = flash_csum(ifp, 0);
+ if (csum != ifp->if_csum) {
+ dev_info(&dd->pcidev->dev, "Bad I2C flash checksum: "
+ "0x%x, not 0x%x\n", csum, ifp->if_csum);
+ goto done;
+ }
+ if (*(__be64 *) ifp->if_guid == 0ULL ||
+ *(__be64 *) ifp->if_guid == __constant_cpu_to_be64(-1LL)) {
+ ipath_dev_err(dd, "Invalid GUID %llx from flash; "
+ "ignoring\n",
+ *(unsigned long long *) ifp->if_guid);
+ /* don't allow GUID if all 0 or all 1's */
+ goto done;
+ }
+
+ /* complain, but allow it */
+ if (*(u64 *) ifp->if_guid == 0x100007511000000ULL)
+ dev_info(&dd->pcidev->dev, "Warning, GUID %llx is "
+ "default, probably not correct!\n",
+ *(unsigned long long *) ifp->if_guid);
+
+ bguid = ifp->if_guid;
+ if (!bguid[0] && !bguid[1] && !bguid[2]) {
+ /* original incorrect GUID format in flash; fix in
+ * core copy, by shifting up 2 octets; don't need to
+ * change top octet, since both it and shifted are
+ * 0.. */
+ bguid[1] = bguid[3];
+ bguid[2] = bguid[4];
+ bguid[3] = bguid[4] = 0;
+ guid = *(__be64 *) ifp->if_guid;
+ ipath_cdbg(VERBOSE, "Old GUID format in flash, top 3 zero, "
+ "shifting 2 octets\n");
+ } else
+ guid = *(__be64 *) ifp->if_guid;
+ dd->ipath_guid = guid;
+ dd->ipath_nguid = ifp->if_numguid;
+ /*
+ * Things are slightly complicated by the desire to transparently
+ * support both the Pathscale 10-digit serial number and the QLogic
+ * 13-character version.
+ */
+ if ((ifp->if_fversion > 1) && ifp->if_sprefix[0]
+ && ((u8 *)ifp->if_sprefix)[0] != 0xFF) {
+ /* This board has a Serial-prefix, which is stored
+ * elsewhere for backward-compatibility.
+ */
+ char *snp = dd->ipath_serial;
+ memcpy(snp, ifp->if_sprefix, sizeof ifp->if_sprefix);
+ snp[sizeof ifp->if_sprefix] = '\0';
+ len = strlen(snp);
+ snp += len;
+ len = (sizeof dd->ipath_serial) - len;
+ if (len > sizeof ifp->if_serial) {
+ len = sizeof ifp->if_serial;
+ }
+ memcpy(snp, ifp->if_serial, len);
+ } else
+ memcpy(dd->ipath_serial, ifp->if_serial,
+ sizeof ifp->if_serial);
+ if (!strstr(ifp->if_comment, "Tested successfully"))
+ ipath_dev_err(dd, "Board SN %s did not pass functional "
+ "test: %s\n", dd->ipath_serial,
+ ifp->if_comment);
+
+ ipath_cdbg(VERBOSE, "Initted GUID to %llx from eeprom\n",
+ (unsigned long long) be64_to_cpu(dd->ipath_guid));
+
+ memcpy(&dd->ipath_eep_st_errs, &ifp->if_errcntp, IPATH_EEP_LOG_CNT);
+ /*
+ * Power-on (actually "active") hours are kept as little-endian value
+ * in EEPROM, but as seconds in a (possibly as small as 24-bit)
+ * atomic_t while running.
+ */
+ atomic_set(&dd->ipath_active_time, 0);
+ dd->ipath_eep_hrs = ifp->if_powerhour[0] | (ifp->if_powerhour[1] << 8);
+
+done:
+ vfree(buf);
+
+bail:;
+}
+
+/**
+ * ipath_update_eeprom_log - copy active-time and error counters to eeprom
+ * @dd: the infinipath device
+ *
+ * Although the time is kept as seconds in the ipath_devdata struct, it is
+ * rounded to hours for re-write, as we have only 16 bits in EEPROM.
+ * First-cut code reads whole (expected) struct ipath_flash, modifies,
+ * re-writes. Future direction: read/write only what we need, assuming
+ * that the EEPROM had to have been "good enough" for driver init, and
+ * if not, we aren't making it worse.
+ *
+ */
+
+int ipath_update_eeprom_log(struct ipath_devdata *dd)
+{
+ void *buf;
+ struct ipath_flash *ifp;
+ int len, hi_water;
+ uint32_t new_time, new_hrs;
+ u8 csum;
+ int ret, idx;
+ unsigned long flags;
+
+ /* first, check if we actually need to do anything. */
+ ret = 0;
+ for (idx = 0; idx < IPATH_EEP_LOG_CNT; ++idx) {
+ if (dd->ipath_eep_st_new_errs[idx]) {
+ ret = 1;
+ break;
+ }
+ }
+ new_time = atomic_read(&dd->ipath_active_time);
+
+ if (ret == 0 && new_time < 3600)
+ return 0;
+
+ /*
+ * The quick-check above determined that there is something worthy
+ * of logging, so get current contents and do a more detailed idea.
+ * read full flash, not just currently used part, since it may have
+ * been written with a newer definition
+ */
+ len = sizeof(struct ipath_flash);
+ buf = vmalloc(len);
+ ret = 1;
+ if (!buf) {
+ ipath_dev_err(dd, "Couldn't allocate memory to read %u "
+ "bytes from eeprom for logging\n", len);
+ goto bail;
+ }
+
+ /* Grab semaphore and read current EEPROM. If we get an
+ * error, let go, but if not, keep it until we finish write.
+ */
+ ret = mutex_lock_interruptible(&dd->ipath_eep_lock);
+ if (ret) {
+ ipath_dev_err(dd, "Unable to acquire EEPROM for logging\n");
+ goto free_bail;
+ }
+ ret = ipath_eeprom_internal_read(dd, 0, buf, len);
+ if (ret) {
+ mutex_unlock(&dd->ipath_eep_lock);
+ ipath_dev_err(dd, "Unable read EEPROM for logging\n");
+ goto free_bail;
+ }
+ ifp = (struct ipath_flash *)buf;
+
+ csum = flash_csum(ifp, 0);
+ if (csum != ifp->if_csum) {
+ mutex_unlock(&dd->ipath_eep_lock);
+ ipath_dev_err(dd, "EEPROM cks err (0x%02X, S/B 0x%02X)\n",
+ csum, ifp->if_csum);
+ ret = 1;
+ goto free_bail;
+ }
+ hi_water = 0;
+ spin_lock_irqsave(&dd->ipath_eep_st_lock, flags);
+ for (idx = 0; idx < IPATH_EEP_LOG_CNT; ++idx) {
+ int new_val = dd->ipath_eep_st_new_errs[idx];
+ if (new_val) {
+ /*
+ * If we have seen any errors, add to EEPROM values
+ * We need to saturate at 0xFF (255) and we also
+ * would need to adjust the checksum if we were
+ * trying to minimize EEPROM traffic
+ * Note that we add to actual current count in EEPROM,
+ * in case it was altered while we were running.
+ */
+ new_val += ifp->if_errcntp[idx];
+ if (new_val > 0xFF)
+ new_val = 0xFF;
+ if (ifp->if_errcntp[idx] != new_val) {
+ ifp->if_errcntp[idx] = new_val;
+ hi_water = offsetof(struct ipath_flash,
+ if_errcntp) + idx;
+ }
+ /*
+ * update our shadow (used to minimize EEPROM
+ * traffic), to match what we are about to write.
+ */
+ dd->ipath_eep_st_errs[idx] = new_val;
+ dd->ipath_eep_st_new_errs[idx] = 0;
+ }
+ }
+ /*
+ * now update active-time. We would like to round to the nearest hour
+ * but unless atomic_t are sure to be proper signed ints we cannot,
+ * because we need to account for what we "transfer" to EEPROM and
+ * if we log an hour at 31 minutes, then we would need to set
+ * active_time to -29 to accurately count the _next_ hour.
+ */
+ if (new_time >= 3600) {
+ new_hrs = new_time / 3600;
+ atomic_sub((new_hrs * 3600), &dd->ipath_active_time);
+ new_hrs += dd->ipath_eep_hrs;
+ if (new_hrs > 0xFFFF)
+ new_hrs = 0xFFFF;
+ dd->ipath_eep_hrs = new_hrs;
+ if ((new_hrs & 0xFF) != ifp->if_powerhour[0]) {
+ ifp->if_powerhour[0] = new_hrs & 0xFF;
+ hi_water = offsetof(struct ipath_flash, if_powerhour);
+ }
+ if ((new_hrs >> 8) != ifp->if_powerhour[1]) {
+ ifp->if_powerhour[1] = new_hrs >> 8;
+ hi_water = offsetof(struct ipath_flash, if_powerhour)
+ + 1;
+ }
+ }
+ /*
+ * There is a tiny possibility that we could somehow fail to write
+ * the EEPROM after updating our shadows, but problems from holding
+ * the spinlock too long are a much bigger issue.
+ */
+ spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags);
+ if (hi_water) {
+ /* we made some change to the data, uopdate cksum and write */
+ csum = flash_csum(ifp, 1);
+ ret = ipath_eeprom_internal_write(dd, 0, buf, hi_water + 1);
+ }
+ mutex_unlock(&dd->ipath_eep_lock);
+ if (ret)
+ ipath_dev_err(dd, "Failed updating EEPROM\n");
+
+free_bail:
+ vfree(buf);
+bail:
+ return ret;
+
+}
+
+/**
+ * ipath_inc_eeprom_err - increment one of the four error counters
+ * that are logged to EEPROM.
+ * @dd: the infinipath device
+ * @eidx: 0..3, the counter to increment
+ * @incr: how much to add
+ *
+ * Each counter is 8-bits, and saturates at 255 (0xFF). They
+ * are copied to the EEPROM (aka flash) whenever ipath_update_eeprom_log()
+ * is called, but it can only be called in a context that allows sleep.
+ * This function can be called even at interrupt level.
+ */
+
+void ipath_inc_eeprom_err(struct ipath_devdata *dd, u32 eidx, u32 incr)
+{
+ uint new_val;
+ unsigned long flags;
+
+ spin_lock_irqsave(&dd->ipath_eep_st_lock, flags);
+ new_val = dd->ipath_eep_st_new_errs[eidx] + incr;
+ if (new_val > 255)
+ new_val = 255;
+ dd->ipath_eep_st_new_errs[eidx] = new_val;
+ spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags);
+ return;
+}
+
+static int ipath_tempsense_internal_read(struct ipath_devdata *dd, u8 regnum)
+{
+ int ret;
+ struct i2c_chain_desc *icd;
+
+ ret = -ENOENT;
+
+ icd = ipath_i2c_type(dd);
+ if (!icd)
+ goto bail;
+
+ if (icd->temp_dev == IPATH_NO_DEV) {
+ /* tempsense only exists on new, real-I2C boards */
+ ret = -ENXIO;
+ goto bail;
+ }
+
+ if (i2c_startcmd(dd, icd->temp_dev | WRITE_CMD)) {
+ ipath_dbg("Failed tempsense startcmd\n");
+ stop_cmd(dd);
+ ret = -ENXIO;
+ goto bail;
+ }
+ ret = wr_byte(dd, regnum);
+ stop_cmd(dd);
+ if (ret) {
+ ipath_dev_err(dd, "Failed tempsense WR command %02X\n",
+ regnum);
+ ret = -ENXIO;
+ goto bail;
+ }
+ if (i2c_startcmd(dd, icd->temp_dev | READ_CMD)) {
+ ipath_dbg("Failed tempsense RD startcmd\n");
+ stop_cmd(dd);
+ ret = -ENXIO;
+ goto bail;
+ }
+ /*
+ * We can only clock out one byte per command, sensibly
+ */
+ ret = rd_byte(dd);
+ stop_cmd(dd);
+
+bail:
+ return ret;
+}
+
+#define VALID_TS_RD_REG_MASK 0xBF
+
+/**
+ * ipath_tempsense_read - read register of temp sensor via I2C
+ * @dd: the infinipath device
+ * @regnum: register to read from
+ *
+ * returns reg contents (0..255) or < 0 for error
+ */
+int ipath_tempsense_read(struct ipath_devdata *dd, u8 regnum)
+{
+ int ret;
+
+ if (regnum > 7)
+ return -EINVAL;
+
+ /* return a bogus value for (the one) register we do not have */
+ if (!((1 << regnum) & VALID_TS_RD_REG_MASK))
+ return 0;
+
+ ret = mutex_lock_interruptible(&dd->ipath_eep_lock);
+ if (!ret) {
+ ret = ipath_tempsense_internal_read(dd, regnum);
+ mutex_unlock(&dd->ipath_eep_lock);
+ }
+
+ /*
+ * There are three possibilities here:
+ * ret is actual value (0..255)
+ * ret is -ENXIO or -EINVAL from code in this file
+ * ret is -EINTR from mutex_lock_interruptible.
+ */
+ return ret;
+}
+
+static int ipath_tempsense_internal_write(struct ipath_devdata *dd,
+ u8 regnum, u8 data)
+{
+ int ret = -ENOENT;
+ struct i2c_chain_desc *icd;
+
+ icd = ipath_i2c_type(dd);
+ if (!icd)
+ goto bail;
+
+ if (icd->temp_dev == IPATH_NO_DEV) {
+ /* tempsense only exists on new, real-I2C boards */
+ ret = -ENXIO;
+ goto bail;
+ }
+ if (i2c_startcmd(dd, icd->temp_dev | WRITE_CMD)) {
+ ipath_dbg("Failed tempsense startcmd\n");
+ stop_cmd(dd);
+ ret = -ENXIO;
+ goto bail;
+ }
+ ret = wr_byte(dd, regnum);
+ if (ret) {
+ stop_cmd(dd);
+ ipath_dev_err(dd, "Failed to write tempsense command %02X\n",
+ regnum);
+ ret = -ENXIO;
+ goto bail;
+ }
+ ret = wr_byte(dd, data);
+ stop_cmd(dd);
+ ret = i2c_startcmd(dd, icd->temp_dev | READ_CMD);
+ if (ret) {
+ ipath_dev_err(dd, "Failed tempsense data wrt to %02X\n",
+ regnum);
+ ret = -ENXIO;
+ }
+
+bail:
+ return ret;
+}
+
+#define VALID_TS_WR_REG_MASK ((1 << 9) | (1 << 0xB) | (1 << 0xD))
+
+/**
+ * ipath_tempsense_write - write register of temp sensor via I2C
+ * @dd: the infinipath device
+ * @regnum: register to write
+ * @data: data to write
+ *
+ * returns 0 for success or < 0 for error
+ */
+int ipath_tempsense_write(struct ipath_devdata *dd, u8 regnum, u8 data)
+{
+ int ret;
+
+ if (regnum > 15 || !((1 << regnum) & VALID_TS_WR_REG_MASK))
+ return -EINVAL;
+
+ ret = mutex_lock_interruptible(&dd->ipath_eep_lock);
+ if (!ret) {
+ ret = ipath_tempsense_internal_write(dd, regnum, data);
+ mutex_unlock(&dd->ipath_eep_lock);
+ }
+
+ /*
+ * There are three possibilities here:
+ * ret is 0 for success
+ * ret is -ENXIO or -EINVAL from code in this file
+ * ret is -EINTR from mutex_lock_interruptible.
+ */
+ return ret;
+}
diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c
new file mode 100644
index 0000000..1af1f3a
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c
@@ -0,0 +1,2613 @@
+/*
+ * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/pci.h>
+#include <linux/poll.h>
+#include <linux/cdev.h>
+#include <linux/swap.h>
+#include <linux/vmalloc.h>
+#include <linux/highmem.h>
+#include <linux/io.h>
+#include <linux/jiffies.h>
+#include <linux/smp_lock.h>
+#include <asm/pgtable.h>
+
+#include "ipath_kernel.h"
+#include "ipath_common.h"
+#include "ipath_user_sdma.h"
+
+static int ipath_open(struct inode *, struct file *);
+static int ipath_close(struct inode *, struct file *);
+static ssize_t ipath_write(struct file *, const char __user *, size_t,
+ loff_t *);
+static ssize_t ipath_writev(struct kiocb *, const struct iovec *,
+ unsigned long , loff_t);
+static unsigned int ipath_poll(struct file *, struct poll_table_struct *);
+static int ipath_mmap(struct file *, struct vm_area_struct *);
+
+static const struct file_operations ipath_file_ops = {
+ .owner = THIS_MODULE,
+ .write = ipath_write,
+ .aio_write = ipath_writev,
+ .open = ipath_open,
+ .release = ipath_close,
+ .poll = ipath_poll,
+ .mmap = ipath_mmap
+};
+
+/*
+ * Convert kernel virtual addresses to physical addresses so they don't
+ * potentially conflict with the chip addresses used as mmap offsets.
+ * It doesn't really matter what mmap offset we use as long as we can
+ * interpret it correctly.
+ */
+static u64 cvt_kvaddr(void *p)
+{
+ struct page *page;
+ u64 paddr = 0;
+
+ page = vmalloc_to_page(p);
+ if (page)
+ paddr = page_to_pfn(page) << PAGE_SHIFT;
+
+ return paddr;
+}
+
+static int ipath_get_base_info(struct file *fp,
+ void __user *ubase, size_t ubase_size)
+{
+ struct ipath_portdata *pd = port_fp(fp);
+ int ret = 0;
+ struct ipath_base_info *kinfo = NULL;
+ struct ipath_devdata *dd = pd->port_dd;
+ unsigned subport_cnt;
+ int shared, master;
+ size_t sz;
+
+ subport_cnt = pd->port_subport_cnt;
+ if (!subport_cnt) {
+ shared = 0;
+ master = 0;
+ subport_cnt = 1;
+ } else {
+ shared = 1;
+ master = !subport_fp(fp);
+ }
+
+ sz = sizeof(*kinfo);
+ /* If port sharing is not requested, allow the old size structure */
+ if (!shared)
+ sz -= 7 * sizeof(u64);
+ if (ubase_size < sz) {
+ ipath_cdbg(PROC,
+ "Base size %zu, need %zu (version mismatch?)\n",
+ ubase_size, sz);
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ kinfo = kzalloc(sizeof(*kinfo), GFP_KERNEL);
+ if (kinfo == NULL) {
+ ret = -ENOMEM;
+ goto bail;
+ }
+
+ ret = dd->ipath_f_get_base_info(pd, kinfo);
+ if (ret < 0)
+ goto bail;
+
+ kinfo->spi_rcvhdr_cnt = dd->ipath_rcvhdrcnt;
+ kinfo->spi_rcvhdrent_size = dd->ipath_rcvhdrentsize;
+ kinfo->spi_tidegrcnt = dd->ipath_rcvegrcnt;
+ kinfo->spi_rcv_egrbufsize = dd->ipath_rcvegrbufsize;
+ /*
+ * have to mmap whole thing
+ */
+ kinfo->spi_rcv_egrbuftotlen =
+ pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size;
+ kinfo->spi_rcv_egrperchunk = pd->port_rcvegrbufs_perchunk;
+ kinfo->spi_rcv_egrchunksize = kinfo->spi_rcv_egrbuftotlen /
+ pd->port_rcvegrbuf_chunks;
+ kinfo->spi_tidcnt = dd->ipath_rcvtidcnt / subport_cnt;
+ if (master)
+ kinfo->spi_tidcnt += dd->ipath_rcvtidcnt % subport_cnt;
+ /*
+ * for this use, may be ipath_cfgports summed over all chips that
+ * are are configured and present
+ */
+ kinfo->spi_nports = dd->ipath_cfgports;
+ /* unit (chip/board) our port is on */
+ kinfo->spi_unit = dd->ipath_unit;
+ /* for now, only a single page */
+ kinfo->spi_tid_maxsize = PAGE_SIZE;
+
+ /*
+ * Doing this per port, and based on the skip value, etc. This has
+ * to be the actual buffer size, since the protocol code treats it
+ * as an array.
+ *
+ * These have to be set to user addresses in the user code via mmap.
+ * These values are used on return to user code for the mmap target
+ * addresses only. For 32 bit, same 44 bit address problem, so use
+ * the physical address, not virtual. Before 2.6.11, using the
+ * page_address() macro worked, but in 2.6.11, even that returns the
+ * full 64 bit address (upper bits all 1's). So far, using the
+ * physical addresses (or chip offsets, for chip mapping) works, but
+ * no doubt some future kernel release will change that, and we'll be
+ * on to yet another method of dealing with this.
+ */
+ kinfo->spi_rcvhdr_base = (u64) pd->port_rcvhdrq_phys;
+ kinfo->spi_rcvhdr_tailaddr = (u64) pd->port_rcvhdrqtailaddr_phys;
+ kinfo->spi_rcv_egrbufs = (u64) pd->port_rcvegr_phys;
+ kinfo->spi_pioavailaddr = (u64) dd->ipath_pioavailregs_phys;
+ kinfo->spi_status = (u64) kinfo->spi_pioavailaddr +
+ (void *) dd->ipath_statusp -
+ (void *) dd->ipath_pioavailregs_dma;
+ if (!shared) {
+ kinfo->spi_piocnt = pd->port_piocnt;
+ kinfo->spi_piobufbase = (u64) pd->port_piobufs;
+ kinfo->__spi_uregbase = (u64) dd->ipath_uregbase +
+ dd->ipath_ureg_align * pd->port_port;
+ } else if (master) {
+ kinfo->spi_piocnt = (pd->port_piocnt / subport_cnt) +
+ (pd->port_piocnt % subport_cnt);
+ /* Master's PIO buffers are after all the slave's */
+ kinfo->spi_piobufbase = (u64) pd->port_piobufs +
+ dd->ipath_palign *
+ (pd->port_piocnt - kinfo->spi_piocnt);
+ } else {
+ unsigned slave = subport_fp(fp) - 1;
+
+ kinfo->spi_piocnt = pd->port_piocnt / subport_cnt;
+ kinfo->spi_piobufbase = (u64) pd->port_piobufs +
+ dd->ipath_palign * kinfo->spi_piocnt * slave;
+ }
+
+ if (shared) {
+ kinfo->spi_port_uregbase = (u64) dd->ipath_uregbase +
+ dd->ipath_ureg_align * pd->port_port;
+ kinfo->spi_port_rcvegrbuf = kinfo->spi_rcv_egrbufs;
+ kinfo->spi_port_rcvhdr_base = kinfo->spi_rcvhdr_base;
+ kinfo->spi_port_rcvhdr_tailaddr = kinfo->spi_rcvhdr_tailaddr;
+
+ kinfo->__spi_uregbase = cvt_kvaddr(pd->subport_uregbase +
+ PAGE_SIZE * subport_fp(fp));
+
+ kinfo->spi_rcvhdr_base = cvt_kvaddr(pd->subport_rcvhdr_base +
+ pd->port_rcvhdrq_size * subport_fp(fp));
+ kinfo->spi_rcvhdr_tailaddr = 0;
+ kinfo->spi_rcv_egrbufs = cvt_kvaddr(pd->subport_rcvegrbuf +
+ pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size *
+ subport_fp(fp));
+
+ kinfo->spi_subport_uregbase =
+ cvt_kvaddr(pd->subport_uregbase);
+ kinfo->spi_subport_rcvegrbuf =
+ cvt_kvaddr(pd->subport_rcvegrbuf);
+ kinfo->spi_subport_rcvhdr_base =
+ cvt_kvaddr(pd->subport_rcvhdr_base);
+ ipath_cdbg(PROC, "port %u flags %x %llx %llx %llx\n",
+ kinfo->spi_port, kinfo->spi_runtime_flags,
+ (unsigned long long) kinfo->spi_subport_uregbase,
+ (unsigned long long) kinfo->spi_subport_rcvegrbuf,
+ (unsigned long long) kinfo->spi_subport_rcvhdr_base);
+ }
+
+ kinfo->spi_pioindex = (kinfo->spi_piobufbase - dd->ipath_piobufbase) /
+ dd->ipath_palign;
+ kinfo->spi_pioalign = dd->ipath_palign;
+
+ kinfo->spi_qpair = IPATH_KD_QP;
+ /*
+ * user mode PIO buffers are always 2KB, even when 4KB can
+ * be received, and sent via the kernel; this is ibmaxlen
+ * for 2K MTU.
+ */
+ kinfo->spi_piosize = dd->ipath_piosize2k - 2 * sizeof(u32);
+ kinfo->spi_mtu = dd->ipath_ibmaxlen; /* maxlen, not ibmtu */
+ kinfo->spi_port = pd->port_port;
+ kinfo->spi_subport = subport_fp(fp);
+ kinfo->spi_sw_version = IPATH_KERN_SWVERSION;
+ kinfo->spi_hw_version = dd->ipath_revision;
+
+ if (master) {
+ kinfo->spi_runtime_flags |= IPATH_RUNTIME_MASTER;
+ }
+
+ sz = (ubase_size < sizeof(*kinfo)) ? ubase_size : sizeof(*kinfo);
+ if (copy_to_user(ubase, kinfo, sz))
+ ret = -EFAULT;
+
+bail:
+ kfree(kinfo);
+ return ret;
+}
+
+/**
+ * ipath_tid_update - update a port TID
+ * @pd: the port
+ * @fp: the ipath device file
+ * @ti: the TID information
+ *
+ * The new implementation as of Oct 2004 is that the driver assigns
+ * the tid and returns it to the caller. To make it easier to
+ * catch bugs, and to reduce search time, we keep a cursor for
+ * each port, walking the shadow tid array to find one that's not
+ * in use.
+ *
+ * For now, if we can't allocate the full list, we fail, although
+ * in the long run, we'll allocate as many as we can, and the
+ * caller will deal with that by trying the remaining pages later.
+ * That means that when we fail, we have to mark the tids as not in
+ * use again, in our shadow copy.
+ *
+ * It's up to the caller to free the tids when they are done.
+ * We'll unlock the pages as they free them.
+ *
+ * Also, right now we are locking one page at a time, but since
+ * the intended use of this routine is for a single group of
+ * virtually contiguous pages, that should change to improve
+ * performance.
+ */
+static int ipath_tid_update(struct ipath_portdata *pd, struct file *fp,
+ const struct ipath_tid_info *ti)
+{
+ int ret = 0, ntids;
+ u32 tid, porttid, cnt, i, tidcnt, tidoff;
+ u16 *tidlist;
+ struct ipath_devdata *dd = pd->port_dd;
+ u64 physaddr;
+ unsigned long vaddr;
+ u64 __iomem *tidbase;
+ unsigned long tidmap[8];
+ struct page **pagep = NULL;
+ unsigned subport = subport_fp(fp);
+
+ if (!dd->ipath_pageshadow) {
+ ret = -ENOMEM;
+ goto done;
+ }
+
+ cnt = ti->tidcnt;
+ if (!cnt) {
+ ipath_dbg("After copyin, tidcnt 0, tidlist %llx\n",
+ (unsigned long long) ti->tidlist);
+ /*
+ * Should we treat as success? likely a bug
+ */
+ ret = -EFAULT;
+ goto done;
+ }
+ porttid = pd->port_port * dd->ipath_rcvtidcnt;
+ if (!pd->port_subport_cnt) {
+ tidcnt = dd->ipath_rcvtidcnt;
+ tid = pd->port_tidcursor;
+ tidoff = 0;
+ } else if (!subport) {
+ tidcnt = (dd->ipath_rcvtidcnt / pd->port_subport_cnt) +
+ (dd->ipath_rcvtidcnt % pd->port_subport_cnt);
+ tidoff = dd->ipath_rcvtidcnt - tidcnt;
+ porttid += tidoff;
+ tid = tidcursor_fp(fp);
+ } else {
+ tidcnt = dd->ipath_rcvtidcnt / pd->port_subport_cnt;
+ tidoff = tidcnt * (subport - 1);
+ porttid += tidoff;
+ tid = tidcursor_fp(fp);
+ }
+ if (cnt > tidcnt) {
+ /* make sure it all fits in port_tid_pg_list */
+ dev_info(&dd->pcidev->dev, "Process tried to allocate %u "
+ "TIDs, only trying max (%u)\n", cnt, tidcnt);
+ cnt = tidcnt;
+ }
+ pagep = &((struct page **) pd->port_tid_pg_list)[tidoff];
+ tidlist = &((u16 *) &pagep[dd->ipath_rcvtidcnt])[tidoff];
+
+ memset(tidmap, 0, sizeof(tidmap));
+ /* before decrement; chip actual # */
+ ntids = tidcnt;
+ tidbase = (u64 __iomem *) (((char __iomem *) dd->ipath_kregbase) +
+ dd->ipath_rcvtidbase +
+ porttid * sizeof(*tidbase));
+
+ ipath_cdbg(VERBOSE, "Port%u %u tids, cursor %u, tidbase %p\n",
+ pd->port_port, cnt, tid, tidbase);
+
+ /* virtual address of first page in transfer */
+ vaddr = ti->tidvaddr;
+ if (!access_ok(VERIFY_WRITE, (void __user *) vaddr,
+ cnt * PAGE_SIZE)) {
+ ipath_dbg("Fail vaddr %p, %u pages, !access_ok\n",
+ (void *)vaddr, cnt);
+ ret = -EFAULT;
+ goto done;
+ }
+ ret = ipath_get_user_pages(vaddr, cnt, pagep);
+ if (ret) {
+ if (ret == -EBUSY) {
+ ipath_dbg("Failed to lock addr %p, %u pages "
+ "(already locked)\n",
+ (void *) vaddr, cnt);
+ /*
+ * for now, continue, and see what happens but with
+ * the new implementation, this should never happen,
+ * unless perhaps the user has mpin'ed the pages
+ * themselves (something we need to test)
+ */
+ ret = 0;
+ } else {
+ dev_info(&dd->pcidev->dev,
+ "Failed to lock addr %p, %u pages: "
+ "errno %d\n", (void *) vaddr, cnt, -ret);
+ goto done;
+ }
+ }
+ for (i = 0; i < cnt; i++, vaddr += PAGE_SIZE) {
+ for (; ntids--; tid++) {
+ if (tid == tidcnt)
+ tid = 0;
+ if (!dd->ipath_pageshadow[porttid + tid])
+ break;
+ }
+ if (ntids < 0) {
+ /*
+ * oops, wrapped all the way through their TIDs,
+ * and didn't have enough free; see comments at
+ * start of routine
+ */
+ ipath_dbg("Not enough free TIDs for %u pages "
+ "(index %d), failing\n", cnt, i);
+ i--; /* last tidlist[i] not filled in */
+ ret = -ENOMEM;
+ break;
+ }
+ tidlist[i] = tid + tidoff;
+ ipath_cdbg(VERBOSE, "Updating idx %u to TID %u, "
+ "vaddr %lx\n", i, tid + tidoff, vaddr);
+ /* we "know" system pages and TID pages are same size */
+ dd->ipath_pageshadow[porttid + tid] = pagep[i];
+ dd->ipath_physshadow[porttid + tid] = ipath_map_page(
+ dd->pcidev, pagep[i], 0, PAGE_SIZE,
+ PCI_DMA_FROMDEVICE);
+ /*
+ * don't need atomic or it's overhead
+ */
+ __set_bit(tid, tidmap);
+ physaddr = dd->ipath_physshadow[porttid + tid];
+ ipath_stats.sps_pagelocks++;
+ ipath_cdbg(VERBOSE,
+ "TID %u, vaddr %lx, physaddr %llx pgp %p\n",
+ tid, vaddr, (unsigned long long) physaddr,
+ pagep[i]);
+ dd->ipath_f_put_tid(dd, &tidbase[tid], RCVHQ_RCV_TYPE_EXPECTED,
+ physaddr);
+ /*
+ * don't check this tid in ipath_portshadow, since we
+ * just filled it in; start with the next one.
+ */
+ tid++;
+ }
+
+ if (ret) {
+ u32 limit;
+ cleanup:
+ /* jump here if copy out of updated info failed... */
+ ipath_dbg("After failure (ret=%d), undo %d of %d entries\n",
+ -ret, i, cnt);
+ /* same code that's in ipath_free_tid() */
+ limit = sizeof(tidmap) * BITS_PER_BYTE;
+ if (limit > tidcnt)
+ /* just in case size changes in future */
+ limit = tidcnt;
+ tid = find_first_bit((const unsigned long *)tidmap, limit);
+ for (; tid < limit; tid++) {
+ if (!test_bit(tid, tidmap))
+ continue;
+ if (dd->ipath_pageshadow[porttid + tid]) {
+ ipath_cdbg(VERBOSE, "Freeing TID %u\n",
+ tid);
+ dd->ipath_f_put_tid(dd, &tidbase[tid],
+ RCVHQ_RCV_TYPE_EXPECTED,
+ dd->ipath_tidinvalid);
+ pci_unmap_page(dd->pcidev,
+ dd->ipath_physshadow[porttid + tid],
+ PAGE_SIZE, PCI_DMA_FROMDEVICE);
+ dd->ipath_pageshadow[porttid + tid] = NULL;
+ ipath_stats.sps_pageunlocks++;
+ }
+ }
+ ipath_release_user_pages(pagep, cnt);
+ } else {
+ /*
+ * Copy the updated array, with ipath_tid's filled in, back
+ * to user. Since we did the copy in already, this "should
+ * never fail" If it does, we have to clean up...
+ */
+ if (copy_to_user((void __user *)
+ (unsigned long) ti->tidlist,
+ tidlist, cnt * sizeof(*tidlist))) {
+ ret = -EFAULT;
+ goto cleanup;
+ }
+ if (copy_to_user((void __user *) (unsigned long) ti->tidmap,
+ tidmap, sizeof tidmap)) {
+ ret = -EFAULT;
+ goto cleanup;
+ }
+ if (tid == tidcnt)
+ tid = 0;
+ if (!pd->port_subport_cnt)
+ pd->port_tidcursor = tid;
+ else
+ tidcursor_fp(fp) = tid;
+ }
+
+done:
+ if (ret)
+ ipath_dbg("Failed to map %u TID pages, failing with %d\n",
+ ti->tidcnt, -ret);
+ return ret;
+}
+
+/**
+ * ipath_tid_free - free a port TID
+ * @pd: the port
+ * @subport: the subport
+ * @ti: the TID info
+ *
+ * right now we are unlocking one page at a time, but since
+ * the intended use of this routine is for a single group of
+ * virtually contiguous pages, that should change to improve
+ * performance. We check that the TID is in range for this port
+ * but otherwise don't check validity; if user has an error and
+ * frees the wrong tid, it's only their own data that can thereby
+ * be corrupted. We do check that the TID was in use, for sanity
+ * We always use our idea of the saved address, not the address that
+ * they pass in to us.
+ */
+
+static int ipath_tid_free(struct ipath_portdata *pd, unsigned subport,
+ const struct ipath_tid_info *ti)
+{
+ int ret = 0;
+ u32 tid, porttid, cnt, limit, tidcnt;
+ struct ipath_devdata *dd = pd->port_dd;
+ u64 __iomem *tidbase;
+ unsigned long tidmap[8];
+
+ if (!dd->ipath_pageshadow) {
+ ret = -ENOMEM;
+ goto done;
+ }
+
+ if (copy_from_user(tidmap, (void __user *)(unsigned long)ti->tidmap,
+ sizeof tidmap)) {
+ ret = -EFAULT;
+ goto done;
+ }
+
+ porttid = pd->port_port * dd->ipath_rcvtidcnt;
+ if (!pd->port_subport_cnt)
+ tidcnt = dd->ipath_rcvtidcnt;
+ else if (!subport) {
+ tidcnt = (dd->ipath_rcvtidcnt / pd->port_subport_cnt) +
+ (dd->ipath_rcvtidcnt % pd->port_subport_cnt);
+ porttid += dd->ipath_rcvtidcnt - tidcnt;
+ } else {
+ tidcnt = dd->ipath_rcvtidcnt / pd->port_subport_cnt;
+ porttid += tidcnt * (subport - 1);
+ }
+ tidbase = (u64 __iomem *) ((char __iomem *)(dd->ipath_kregbase) +
+ dd->ipath_rcvtidbase +
+ porttid * sizeof(*tidbase));
+
+ limit = sizeof(tidmap) * BITS_PER_BYTE;
+ if (limit > tidcnt)
+ /* just in case size changes in future */
+ limit = tidcnt;
+ tid = find_first_bit(tidmap, limit);
+ ipath_cdbg(VERBOSE, "Port%u free %u tids; first bit (max=%d) "
+ "set is %d, porttid %u\n", pd->port_port, ti->tidcnt,
+ limit, tid, porttid);
+ for (cnt = 0; tid < limit; tid++) {
+ /*
+ * small optimization; if we detect a run of 3 or so without
+ * any set, use find_first_bit again. That's mainly to
+ * accelerate the case where we wrapped, so we have some at
+ * the beginning, and some at the end, and a big gap
+ * in the middle.
+ */
+ if (!test_bit(tid, tidmap))
+ continue;
+ cnt++;
+ if (dd->ipath_pageshadow[porttid + tid]) {
+ struct page *p;
+ p = dd->ipath_pageshadow[porttid + tid];
+ dd->ipath_pageshadow[porttid + tid] = NULL;
+ ipath_cdbg(VERBOSE, "PID %u freeing TID %u\n",
+ pid_nr(pd->port_pid), tid);
+ dd->ipath_f_put_tid(dd, &tidbase[tid],
+ RCVHQ_RCV_TYPE_EXPECTED,
+ dd->ipath_tidinvalid);
+ pci_unmap_page(dd->pcidev,
+ dd->ipath_physshadow[porttid + tid],
+ PAGE_SIZE, PCI_DMA_FROMDEVICE);
+ ipath_release_user_pages(&p, 1);
+ ipath_stats.sps_pageunlocks++;
+ } else
+ ipath_dbg("Unused tid %u, ignoring\n", tid);
+ }
+ if (cnt != ti->tidcnt)
+ ipath_dbg("passed in tidcnt %d, only %d bits set in map\n",
+ ti->tidcnt, cnt);
+done:
+ if (ret)
+ ipath_dbg("Failed to unmap %u TID pages, failing with %d\n",
+ ti->tidcnt, -ret);
+ return ret;
+}
+
+/**
+ * ipath_set_part_key - set a partition key
+ * @pd: the port
+ * @key: the key
+ *
+ * We can have up to 4 active at a time (other than the default, which is
+ * always allowed). This is somewhat tricky, since multiple ports may set
+ * the same key, so we reference count them, and clean up at exit. All 4
+ * partition keys are packed into a single infinipath register. It's an
+ * error for a process to set the same pkey multiple times. We provide no
+ * mechanism to de-allocate a pkey at this time, we may eventually need to
+ * do that. I've used the atomic operations, and no locking, and only make
+ * a single pass through what's available. This should be more than
+ * adequate for some time. I'll think about spinlocks or the like if and as
+ * it's necessary.
+ */
+static int ipath_set_part_key(struct ipath_portdata *pd, u16 key)
+{
+ struct ipath_devdata *dd = pd->port_dd;
+ int i, any = 0, pidx = -1;
+ u16 lkey = key & 0x7FFF;
+ int ret;
+
+ if (lkey == (IPATH_DEFAULT_P_KEY & 0x7FFF)) {
+ /* nothing to do; this key always valid */
+ ret = 0;
+ goto bail;
+ }
+
+ ipath_cdbg(VERBOSE, "p%u try to set pkey %hx, current keys "
+ "%hx:%x %hx:%x %hx:%x %hx:%x\n",
+ pd->port_port, key, dd->ipath_pkeys[0],
+ atomic_read(&dd->ipath_pkeyrefs[0]), dd->ipath_pkeys[1],
+ atomic_read(&dd->ipath_pkeyrefs[1]), dd->ipath_pkeys[2],
+ atomic_read(&dd->ipath_pkeyrefs[2]), dd->ipath_pkeys[3],
+ atomic_read(&dd->ipath_pkeyrefs[3]));
+
+ if (!lkey) {
+ ipath_cdbg(PROC, "p%u tries to set key 0, not allowed\n",
+ pd->port_port);
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ /*
+ * Set the full membership bit, because it has to be
+ * set in the register or the packet, and it seems
+ * cleaner to set in the register than to force all
+ * callers to set it. (see bug 4331)
+ */
+ key |= 0x8000;
+
+ for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) {
+ if (!pd->port_pkeys[i] && pidx == -1)
+ pidx = i;
+ if (pd->port_pkeys[i] == key) {
+ ipath_cdbg(VERBOSE, "p%u tries to set same pkey "
+ "(%x) more than once\n",
+ pd->port_port, key);
+ ret = -EEXIST;
+ goto bail;
+ }
+ }
+ if (pidx == -1) {
+ ipath_dbg("All pkeys for port %u already in use, "
+ "can't set %x\n", pd->port_port, key);
+ ret = -EBUSY;
+ goto bail;
+ }
+ for (any = i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
+ if (!dd->ipath_pkeys[i]) {
+ any++;
+ continue;
+ }
+ if (dd->ipath_pkeys[i] == key) {
+ atomic_t *pkrefs = &dd->ipath_pkeyrefs[i];
+
+ if (atomic_inc_return(pkrefs) > 1) {
+ pd->port_pkeys[pidx] = key;
+ ipath_cdbg(VERBOSE, "p%u set key %x "
+ "matches #%d, count now %d\n",
+ pd->port_port, key, i,
+ atomic_read(pkrefs));
+ ret = 0;
+ goto bail;
+ } else {
+ /*
+ * lost race, decrement count, catch below
+ */
+ atomic_dec(pkrefs);
+ ipath_cdbg(VERBOSE, "Lost race, count was "
+ "0, after dec, it's %d\n",
+ atomic_read(pkrefs));
+ any++;
+ }
+ }
+ if ((dd->ipath_pkeys[i] & 0x7FFF) == lkey) {
+ /*
+ * It makes no sense to have both the limited and
+ * full membership PKEY set at the same time since
+ * the unlimited one will disable the limited one.
+ */
+ ret = -EEXIST;
+ goto bail;
+ }
+ }
+ if (!any) {
+ ipath_dbg("port %u, all pkeys already in use, "
+ "can't set %x\n", pd->port_port, key);
+ ret = -EBUSY;
+ goto bail;
+ }
+ for (any = i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
+ if (!dd->ipath_pkeys[i] &&
+ atomic_inc_return(&dd->ipath_pkeyrefs[i]) == 1) {
+ u64 pkey;
+
+ /* for ipathstats, etc. */
+ ipath_stats.sps_pkeys[i] = lkey;
+ pd->port_pkeys[pidx] = dd->ipath_pkeys[i] = key;
+ pkey =
+ (u64) dd->ipath_pkeys[0] |
+ ((u64) dd->ipath_pkeys[1] << 16) |
+ ((u64) dd->ipath_pkeys[2] << 32) |
+ ((u64) dd->ipath_pkeys[3] << 48);
+ ipath_cdbg(PROC, "p%u set key %x in #%d, "
+ "portidx %d, new pkey reg %llx\n",
+ pd->port_port, key, i, pidx,
+ (unsigned long long) pkey);
+ ipath_write_kreg(
+ dd, dd->ipath_kregs->kr_partitionkey, pkey);
+
+ ret = 0;
+ goto bail;
+ }
+ }
+ ipath_dbg("port %u, all pkeys already in use 2nd pass, "
+ "can't set %x\n", pd->port_port, key);
+ ret = -EBUSY;
+
+bail:
+ return ret;
+}
+
+/**
+ * ipath_manage_rcvq - manage a port's receive queue
+ * @pd: the port
+ * @subport: the subport
+ * @start_stop: action to carry out
+ *
+ * start_stop == 0 disables receive on the port, for use in queue
+ * overflow conditions. start_stop==1 re-enables, to be used to
+ * re-init the software copy of the head register
+ */
+static int ipath_manage_rcvq(struct ipath_portdata *pd, unsigned subport,
+ int start_stop)
+{
+ struct ipath_devdata *dd = pd->port_dd;
+
+ ipath_cdbg(PROC, "%sabling rcv for unit %u port %u:%u\n",
+ start_stop ? "en" : "dis", dd->ipath_unit,
+ pd->port_port, subport);
+ if (subport)
+ goto bail;
+ /* atomically clear receive enable port. */
+ if (start_stop) {
+ /*
+ * On enable, force in-memory copy of the tail register to
+ * 0, so that protocol code doesn't have to worry about
+ * whether or not the chip has yet updated the in-memory
+ * copy or not on return from the system call. The chip
+ * always resets it's tail register back to 0 on a
+ * transition from disabled to enabled. This could cause a
+ * problem if software was broken, and did the enable w/o
+ * the disable, but eventually the in-memory copy will be
+ * updated and correct itself, even in the face of software
+ * bugs.
+ */
+ if (pd->port_rcvhdrtail_kvaddr)
+ ipath_clear_rcvhdrtail(pd);
+ set_bit(dd->ipath_r_portenable_shift + pd->port_port,
+ &dd->ipath_rcvctrl);
+ } else
+ clear_bit(dd->ipath_r_portenable_shift + pd->port_port,
+ &dd->ipath_rcvctrl);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
+ dd->ipath_rcvctrl);
+ /* now be sure chip saw it before we return */
+ ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+ if (start_stop) {
+ /*
+ * And try to be sure that tail reg update has happened too.
+ * This should in theory interlock with the RXE changes to
+ * the tail register. Don't assign it to the tail register
+ * in memory copy, since we could overwrite an update by the
+ * chip if we did.
+ */
+ ipath_read_ureg32(dd, ur_rcvhdrtail, pd->port_port);
+ }
+ /* always; new head should be equal to new tail; see above */
+bail:
+ return 0;
+}
+
+static void ipath_clean_part_key(struct ipath_portdata *pd,
+ struct ipath_devdata *dd)
+{
+ int i, j, pchanged = 0;
+ u64 oldpkey;
+
+ /* for debugging only */
+ oldpkey = (u64) dd->ipath_pkeys[0] |
+ ((u64) dd->ipath_pkeys[1] << 16) |
+ ((u64) dd->ipath_pkeys[2] << 32) |
+ ((u64) dd->ipath_pkeys[3] << 48);
+
+ for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) {
+ if (!pd->port_pkeys[i])
+ continue;
+ ipath_cdbg(VERBOSE, "look for key[%d] %hx in pkeys\n", i,
+ pd->port_pkeys[i]);
+ for (j = 0; j < ARRAY_SIZE(dd->ipath_pkeys); j++) {
+ /* check for match independent of the global bit */
+ if ((dd->ipath_pkeys[j] & 0x7fff) !=
+ (pd->port_pkeys[i] & 0x7fff))
+ continue;
+ if (atomic_dec_and_test(&dd->ipath_pkeyrefs[j])) {
+ ipath_cdbg(VERBOSE, "p%u clear key "
+ "%x matches #%d\n",
+ pd->port_port,
+ pd->port_pkeys[i], j);
+ ipath_stats.sps_pkeys[j] =
+ dd->ipath_pkeys[j] = 0;
+ pchanged++;
+ }
+ else ipath_cdbg(
+ VERBOSE, "p%u key %x matches #%d, "
+ "but ref still %d\n", pd->port_port,
+ pd->port_pkeys[i], j,
+ atomic_read(&dd->ipath_pkeyrefs[j]));
+ break;
+ }
+ pd->port_pkeys[i] = 0;
+ }
+ if (pchanged) {
+ u64 pkey = (u64) dd->ipath_pkeys[0] |
+ ((u64) dd->ipath_pkeys[1] << 16) |
+ ((u64) dd->ipath_pkeys[2] << 32) |
+ ((u64) dd->ipath_pkeys[3] << 48);
+ ipath_cdbg(VERBOSE, "p%u old pkey reg %llx, "
+ "new pkey reg %llx\n", pd->port_port,
+ (unsigned long long) oldpkey,
+ (unsigned long long) pkey);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_partitionkey,
+ pkey);
+ }
+}
+
+/*
+ * Initialize the port data with the receive buffer sizes
+ * so this can be done while the master port is locked.
+ * Otherwise, there is a race with a slave opening the port
+ * and seeing these fields uninitialized.
+ */
+static void init_user_egr_sizes(struct ipath_portdata *pd)
+{
+ struct ipath_devdata *dd = pd->port_dd;
+ unsigned egrperchunk, egrcnt, size;
+
+ /*
+ * to avoid wasting a lot of memory, we allocate 32KB chunks of
+ * physically contiguous memory, advance through it until used up
+ * and then allocate more. Of course, we need memory to store those
+ * extra pointers, now. Started out with 256KB, but under heavy
+ * memory pressure (creating large files and then copying them over
+ * NFS while doing lots of MPI jobs), we hit some allocation
+ * failures, even though we can sleep... (2.6.10) Still get
+ * failures at 64K. 32K is the lowest we can go without wasting
+ * additional memory.
+ */
+ size = 0x8000;
+ egrperchunk = size / dd->ipath_rcvegrbufsize;
+ egrcnt = dd->ipath_rcvegrcnt;
+ pd->port_rcvegrbuf_chunks = (egrcnt + egrperchunk - 1) / egrperchunk;
+ pd->port_rcvegrbufs_perchunk = egrperchunk;
+ pd->port_rcvegrbuf_size = size;
+}
+
+/**
+ * ipath_create_user_egr - allocate eager TID buffers
+ * @pd: the port to allocate TID buffers for
+ *
+ * This routine is now quite different for user and kernel, because
+ * the kernel uses skb's, for the accelerated network performance
+ * This is the user port version
+ *
+ * Allocate the eager TID buffers and program them into infinipath
+ * They are no longer completely contiguous, we do multiple allocation
+ * calls.
+ */
+static int ipath_create_user_egr(struct ipath_portdata *pd)
+{
+ struct ipath_devdata *dd = pd->port_dd;
+ unsigned e, egrcnt, egrperchunk, chunk, egrsize, egroff;
+ size_t size;
+ int ret;
+ gfp_t gfp_flags;
+
+ /*
+ * GFP_USER, but without GFP_FS, so buffer cache can be
+ * coalesced (we hope); otherwise, even at order 4,
+ * heavy filesystem activity makes these fail, and we can
+ * use compound pages.
+ */
+ gfp_flags = __GFP_WAIT | __GFP_IO | __GFP_COMP;
+
+ egrcnt = dd->ipath_rcvegrcnt;
+ /* TID number offset for this port */
+ egroff = (pd->port_port - 1) * egrcnt + dd->ipath_p0_rcvegrcnt;
+ egrsize = dd->ipath_rcvegrbufsize;
+ ipath_cdbg(VERBOSE, "Allocating %d egr buffers, at egrtid "
+ "offset %x, egrsize %u\n", egrcnt, egroff, egrsize);
+
+ chunk = pd->port_rcvegrbuf_chunks;
+ egrperchunk = pd->port_rcvegrbufs_perchunk;
+ size = pd->port_rcvegrbuf_size;
+ pd->port_rcvegrbuf = kmalloc(chunk * sizeof(pd->port_rcvegrbuf[0]),
+ GFP_KERNEL);
+ if (!pd->port_rcvegrbuf) {
+ ret = -ENOMEM;
+ goto bail;
+ }
+ pd->port_rcvegrbuf_phys =
+ kmalloc(chunk * sizeof(pd->port_rcvegrbuf_phys[0]),
+ GFP_KERNEL);
+ if (!pd->port_rcvegrbuf_phys) {
+ ret = -ENOMEM;
+ goto bail_rcvegrbuf;
+ }
+ for (e = 0; e < pd->port_rcvegrbuf_chunks; e++) {
+
+ pd->port_rcvegrbuf[e] = dma_alloc_coherent(
+ &dd->pcidev->dev, size, &pd->port_rcvegrbuf_phys[e],
+ gfp_flags);
+
+ if (!pd->port_rcvegrbuf[e]) {
+ ret = -ENOMEM;
+ goto bail_rcvegrbuf_phys;
+ }
+ }
+
+ pd->port_rcvegr_phys = pd->port_rcvegrbuf_phys[0];
+
+ for (e = chunk = 0; chunk < pd->port_rcvegrbuf_chunks; chunk++) {
+ dma_addr_t pa = pd->port_rcvegrbuf_phys[chunk];
+ unsigned i;
+
+ for (i = 0; e < egrcnt && i < egrperchunk; e++, i++) {
+ dd->ipath_f_put_tid(dd, e + egroff +
+ (u64 __iomem *)
+ ((char __iomem *)
+ dd->ipath_kregbase +
+ dd->ipath_rcvegrbase),
+ RCVHQ_RCV_TYPE_EAGER, pa);
+ pa += egrsize;
+ }
+ cond_resched(); /* don't hog the cpu */
+ }
+
+ ret = 0;
+ goto bail;
+
+bail_rcvegrbuf_phys:
+ for (e = 0; e < pd->port_rcvegrbuf_chunks &&
+ pd->port_rcvegrbuf[e]; e++) {
+ dma_free_coherent(&dd->pcidev->dev, size,
+ pd->port_rcvegrbuf[e],
+ pd->port_rcvegrbuf_phys[e]);
+
+ }
+ kfree(pd->port_rcvegrbuf_phys);
+ pd->port_rcvegrbuf_phys = NULL;
+bail_rcvegrbuf:
+ kfree(pd->port_rcvegrbuf);
+ pd->port_rcvegrbuf = NULL;
+bail:
+ return ret;
+}
+
+
+/* common code for the mappings on dma_alloc_coherent mem */
+static int ipath_mmap_mem(struct vm_area_struct *vma,
+ struct ipath_portdata *pd, unsigned len, int write_ok,
+ void *kvaddr, char *what)
+{
+ struct ipath_devdata *dd = pd->port_dd;
+ unsigned long pfn;
+ int ret;
+
+ if ((vma->vm_end - vma->vm_start) > len) {
+ dev_info(&dd->pcidev->dev,
+ "FAIL on %s: len %lx > %x\n", what,
+ vma->vm_end - vma->vm_start, len);
+ ret = -EFAULT;
+ goto bail;
+ }
+
+ if (!write_ok) {
+ if (vma->vm_flags & VM_WRITE) {
+ dev_info(&dd->pcidev->dev,
+ "%s must be mapped readonly\n", what);
+ ret = -EPERM;
+ goto bail;
+ }
+
+ /* don't allow them to later change with mprotect */
+ vma->vm_flags &= ~VM_MAYWRITE;
+ }
+
+ pfn = virt_to_phys(kvaddr) >> PAGE_SHIFT;
+ ret = remap_pfn_range(vma, vma->vm_start, pfn,
+ len, vma->vm_page_prot);
+ if (ret)
+ dev_info(&dd->pcidev->dev, "%s port%u mmap of %lx, %x "
+ "bytes r%c failed: %d\n", what, pd->port_port,
+ pfn, len, write_ok?'w':'o', ret);
+ else
+ ipath_cdbg(VERBOSE, "%s port%u mmaped %lx, %x bytes "
+ "r%c\n", what, pd->port_port, pfn, len,
+ write_ok?'w':'o');
+bail:
+ return ret;
+}
+
+static int mmap_ureg(struct vm_area_struct *vma, struct ipath_devdata *dd,
+ u64 ureg)
+{
+ unsigned long phys;
+ int ret;
+
+ /*
+ * This is real hardware, so use io_remap. This is the mechanism
+ * for the user process to update the head registers for their port
+ * in the chip.
+ */
+ if ((vma->vm_end - vma->vm_start) > PAGE_SIZE) {
+ dev_info(&dd->pcidev->dev, "FAIL mmap userreg: reqlen "
+ "%lx > PAGE\n", vma->vm_end - vma->vm_start);
+ ret = -EFAULT;
+ } else {
+ phys = dd->ipath_physaddr + ureg;
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+ vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND;
+ ret = io_remap_pfn_range(vma, vma->vm_start,
+ phys >> PAGE_SHIFT,
+ vma->vm_end - vma->vm_start,
+ vma->vm_page_prot);
+ }
+ return ret;
+}
+
+static int mmap_piobufs(struct vm_area_struct *vma,
+ struct ipath_devdata *dd,
+ struct ipath_portdata *pd,
+ unsigned piobufs, unsigned piocnt)
+{
+ unsigned long phys;
+ int ret;
+
+ /*
+ * When we map the PIO buffers in the chip, we want to map them as
+ * writeonly, no read possible. This prevents access to previous
+ * process data, and catches users who might try to read the i/o
+ * space due to a bug.
+ */
+ if ((vma->vm_end - vma->vm_start) > (piocnt * dd->ipath_palign)) {
+ dev_info(&dd->pcidev->dev, "FAIL mmap piobufs: "
+ "reqlen %lx > PAGE\n",
+ vma->vm_end - vma->vm_start);
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ phys = dd->ipath_physaddr + piobufs;
+
+#if defined(__powerpc__)
+ /* There isn't a generic way to specify writethrough mappings */
+ pgprot_val(vma->vm_page_prot) |= _PAGE_NO_CACHE;
+ pgprot_val(vma->vm_page_prot) |= _PAGE_WRITETHRU;
+ pgprot_val(vma->vm_page_prot) &= ~_PAGE_GUARDED;
+#endif
+
+ /*
+ * don't allow them to later change to readable with mprotect (for when
+ * not initially mapped readable, as is normally the case)
+ */
+ vma->vm_flags &= ~VM_MAYREAD;
+ vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND;
+
+ ret = io_remap_pfn_range(vma, vma->vm_start, phys >> PAGE_SHIFT,
+ vma->vm_end - vma->vm_start,
+ vma->vm_page_prot);
+bail:
+ return ret;
+}
+
+static int mmap_rcvegrbufs(struct vm_area_struct *vma,
+ struct ipath_portdata *pd)
+{
+ struct ipath_devdata *dd = pd->port_dd;
+ unsigned long start, size;
+ size_t total_size, i;
+ unsigned long pfn;
+ int ret;
+
+ size = pd->port_rcvegrbuf_size;
+ total_size = pd->port_rcvegrbuf_chunks * size;
+ if ((vma->vm_end - vma->vm_start) > total_size) {
+ dev_info(&dd->pcidev->dev, "FAIL on egr bufs: "
+ "reqlen %lx > actual %lx\n",
+ vma->vm_end - vma->vm_start,
+ (unsigned long) total_size);
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ if (vma->vm_flags & VM_WRITE) {
+ dev_info(&dd->pcidev->dev, "Can't map eager buffers as "
+ "writable (flags=%lx)\n", vma->vm_flags);
+ ret = -EPERM;
+ goto bail;
+ }
+ /* don't allow them to later change to writeable with mprotect */
+ vma->vm_flags &= ~VM_MAYWRITE;
+
+ start = vma->vm_start;
+
+ for (i = 0; i < pd->port_rcvegrbuf_chunks; i++, start += size) {
+ pfn = virt_to_phys(pd->port_rcvegrbuf[i]) >> PAGE_SHIFT;
+ ret = remap_pfn_range(vma, start, pfn, size,
+ vma->vm_page_prot);
+ if (ret < 0)
+ goto bail;
+ }
+ ret = 0;
+
+bail:
+ return ret;
+}
+
+/*
+ * ipath_file_vma_fault - handle a VMA page fault.
+ */
+static int ipath_file_vma_fault(struct vm_area_struct *vma,
+ struct vm_fault *vmf)
+{
+ struct page *page;
+
+ page = vmalloc_to_page((void *)(vmf->pgoff << PAGE_SHIFT));
+ if (!page)
+ return VM_FAULT_SIGBUS;
+ get_page(page);
+ vmf->page = page;
+
+ return 0;
+}
+
+static struct vm_operations_struct ipath_file_vm_ops = {
+ .fault = ipath_file_vma_fault,
+};
+
+static int mmap_kvaddr(struct vm_area_struct *vma, u64 pgaddr,
+ struct ipath_portdata *pd, unsigned subport)
+{
+ unsigned long len;
+ struct ipath_devdata *dd;
+ void *addr;
+ size_t size;
+ int ret = 0;
+
+ /* If the port is not shared, all addresses should be physical */
+ if (!pd->port_subport_cnt)
+ goto bail;
+
+ dd = pd->port_dd;
+ size = pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size;
+
+ /*
+ * Each process has all the subport uregbase, rcvhdrq, and
+ * rcvegrbufs mmapped - as an array for all the processes,
+ * and also separately for this process.
+ */
+ if (pgaddr == cvt_kvaddr(pd->subport_uregbase)) {
+ addr = pd->subport_uregbase;
+ size = PAGE_SIZE * pd->port_subport_cnt;
+ } else if (pgaddr == cvt_kvaddr(pd->subport_rcvhdr_base)) {
+ addr = pd->subport_rcvhdr_base;
+ size = pd->port_rcvhdrq_size * pd->port_subport_cnt;
+ } else if (pgaddr == cvt_kvaddr(pd->subport_rcvegrbuf)) {
+ addr = pd->subport_rcvegrbuf;
+ size *= pd->port_subport_cnt;
+ } else if (pgaddr == cvt_kvaddr(pd->subport_uregbase +
+ PAGE_SIZE * subport)) {
+ addr = pd->subport_uregbase + PAGE_SIZE * subport;
+ size = PAGE_SIZE;
+ } else if (pgaddr == cvt_kvaddr(pd->subport_rcvhdr_base +
+ pd->port_rcvhdrq_size * subport)) {
+ addr = pd->subport_rcvhdr_base +
+ pd->port_rcvhdrq_size * subport;
+ size = pd->port_rcvhdrq_size;
+ } else if (pgaddr == cvt_kvaddr(pd->subport_rcvegrbuf +
+ size * subport)) {
+ addr = pd->subport_rcvegrbuf + size * subport;
+ /* rcvegrbufs are read-only on the slave */
+ if (vma->vm_flags & VM_WRITE) {
+ dev_info(&dd->pcidev->dev,
+ "Can't map eager buffers as "
+ "writable (flags=%lx)\n", vma->vm_flags);
+ ret = -EPERM;
+ goto bail;
+ }
+ /*
+ * Don't allow permission to later change to writeable
+ * with mprotect.
+ */
+ vma->vm_flags &= ~VM_MAYWRITE;
+ } else {
+ goto bail;
+ }
+ len = vma->vm_end - vma->vm_start;
+ if (len > size) {
+ ipath_cdbg(MM, "FAIL: reqlen %lx > %zx\n", len, size);
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ vma->vm_pgoff = (unsigned long) addr >> PAGE_SHIFT;
+ vma->vm_ops = &ipath_file_vm_ops;
+ vma->vm_flags |= VM_RESERVED | VM_DONTEXPAND;
+ ret = 1;
+
+bail:
+ return ret;
+}
+
+/**
+ * ipath_mmap - mmap various structures into user space
+ * @fp: the file pointer
+ * @vma: the VM area
+ *
+ * We use this to have a shared buffer between the kernel and the user code
+ * for the rcvhdr queue, egr buffers, and the per-port user regs and pio
+ * buffers in the chip. We have the open and close entries so we can bump
+ * the ref count and keep the driver from being unloaded while still mapped.
+ */
+static int ipath_mmap(struct file *fp, struct vm_area_struct *vma)
+{
+ struct ipath_portdata *pd;
+ struct ipath_devdata *dd;
+ u64 pgaddr, ureg;
+ unsigned piobufs, piocnt;
+ int ret;
+
+ pd = port_fp(fp);
+ if (!pd) {
+ ret = -EINVAL;
+ goto bail;
+ }
+ dd = pd->port_dd;
+
+ /*
+ * This is the ipath_do_user_init() code, mapping the shared buffers
+ * into the user process. The address referred to by vm_pgoff is the
+ * file offset passed via mmap(). For shared ports, this is the
+ * kernel vmalloc() address of the pages to share with the master.
+ * For non-shared or master ports, this is a physical address.
+ * We only do one mmap for each space mapped.
+ */
+ pgaddr = vma->vm_pgoff << PAGE_SHIFT;
+
+ /*
+ * Check for 0 in case one of the allocations failed, but user
+ * called mmap anyway.
+ */
+ if (!pgaddr) {
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ ipath_cdbg(MM, "pgaddr %llx vm_start=%lx len %lx port %u:%u:%u\n",
+ (unsigned long long) pgaddr, vma->vm_start,
+ vma->vm_end - vma->vm_start, dd->ipath_unit,
+ pd->port_port, subport_fp(fp));
+
+ /*
+ * Physical addresses must fit in 40 bits for our hardware.
+ * Check for kernel virtual addresses first, anything else must
+ * match a HW or memory address.
+ */
+ ret = mmap_kvaddr(vma, pgaddr, pd, subport_fp(fp));
+ if (ret) {
+ if (ret > 0)
+ ret = 0;
+ goto bail;
+ }
+
+ ureg = dd->ipath_uregbase + dd->ipath_ureg_align * pd->port_port;
+ if (!pd->port_subport_cnt) {
+ /* port is not shared */
+ piocnt = pd->port_piocnt;
+ piobufs = pd->port_piobufs;
+ } else if (!subport_fp(fp)) {
+ /* caller is the master */
+ piocnt = (pd->port_piocnt / pd->port_subport_cnt) +
+ (pd->port_piocnt % pd->port_subport_cnt);
+ piobufs = pd->port_piobufs +
+ dd->ipath_palign * (pd->port_piocnt - piocnt);
+ } else {
+ unsigned slave = subport_fp(fp) - 1;
+
+ /* caller is a slave */
+ piocnt = pd->port_piocnt / pd->port_subport_cnt;
+ piobufs = pd->port_piobufs + dd->ipath_palign * piocnt * slave;
+ }
+
+ if (pgaddr == ureg)
+ ret = mmap_ureg(vma, dd, ureg);
+ else if (pgaddr == piobufs)
+ ret = mmap_piobufs(vma, dd, pd, piobufs, piocnt);
+ else if (pgaddr == dd->ipath_pioavailregs_phys)
+ /* in-memory copy of pioavail registers */
+ ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0,
+ (void *) dd->ipath_pioavailregs_dma,
+ "pioavail registers");
+ else if (pgaddr == pd->port_rcvegr_phys)
+ ret = mmap_rcvegrbufs(vma, pd);
+ else if (pgaddr == (u64) pd->port_rcvhdrq_phys)
+ /*
+ * The rcvhdrq itself; readonly except on HT (so have
+ * to allow writable mapping), multiple pages, contiguous
+ * from an i/o perspective.
+ */
+ ret = ipath_mmap_mem(vma, pd, pd->port_rcvhdrq_size, 1,
+ pd->port_rcvhdrq,
+ "rcvhdrq");
+ else if (pgaddr == (u64) pd->port_rcvhdrqtailaddr_phys)
+ /* in-memory copy of rcvhdrq tail register */
+ ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0,
+ pd->port_rcvhdrtail_kvaddr,
+ "rcvhdrq tail");
+ else
+ ret = -EINVAL;
+
+ vma->vm_private_data = NULL;
+
+ if (ret < 0)
+ dev_info(&dd->pcidev->dev,
+ "Failure %d on off %llx len %lx\n",
+ -ret, (unsigned long long)pgaddr,
+ vma->vm_end - vma->vm_start);
+bail:
+ return ret;
+}
+
+static unsigned ipath_poll_hdrqfull(struct ipath_portdata *pd)
+{
+ unsigned pollflag = 0;
+
+ if ((pd->poll_type & IPATH_POLL_TYPE_OVERFLOW) &&
+ pd->port_hdrqfull != pd->port_hdrqfull_poll) {
+ pollflag |= POLLIN | POLLRDNORM;
+ pd->port_hdrqfull_poll = pd->port_hdrqfull;
+ }
+
+ return pollflag;
+}
+
+static unsigned int ipath_poll_urgent(struct ipath_portdata *pd,
+ struct file *fp,
+ struct poll_table_struct *pt)
+{
+ unsigned pollflag = 0;
+ struct ipath_devdata *dd;
+
+ dd = pd->port_dd;
+
+ /* variable access in ipath_poll_hdrqfull() needs this */
+ rmb();
+ pollflag = ipath_poll_hdrqfull(pd);
+
+ if (pd->port_urgent != pd->port_urgent_poll) {
+ pollflag |= POLLIN | POLLRDNORM;
+ pd->port_urgent_poll = pd->port_urgent;
+ }
+
+ if (!pollflag) {
+ /* this saves a spin_lock/unlock in interrupt handler... */
+ set_bit(IPATH_PORT_WAITING_URG, &pd->port_flag);
+ /* flush waiting flag so don't miss an event... */
+ wmb();
+ poll_wait(fp, &pd->port_wait, pt);
+ }
+
+ return pollflag;
+}
+
+static unsigned int ipath_poll_next(struct ipath_portdata *pd,
+ struct file *fp,
+ struct poll_table_struct *pt)
+{
+ u32 head;
+ u32 tail;
+ unsigned pollflag = 0;
+ struct ipath_devdata *dd;
+
+ dd = pd->port_dd;
+
+ /* variable access in ipath_poll_hdrqfull() needs this */
+ rmb();
+ pollflag = ipath_poll_hdrqfull(pd);
+
+ head = ipath_read_ureg32(dd, ur_rcvhdrhead, pd->port_port);
+ if (pd->port_rcvhdrtail_kvaddr)
+ tail = ipath_get_rcvhdrtail(pd);
+ else
+ tail = ipath_read_ureg32(dd, ur_rcvhdrtail, pd->port_port);
+
+ if (head != tail)
+ pollflag |= POLLIN | POLLRDNORM;
+ else {
+ /* this saves a spin_lock/unlock in interrupt handler */
+ set_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag);
+ /* flush waiting flag so we don't miss an event */
+ wmb();
+
+ set_bit(pd->port_port + dd->ipath_r_intravail_shift,
+ &dd->ipath_rcvctrl);
+
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
+ dd->ipath_rcvctrl);
+
+ if (dd->ipath_rhdrhead_intr_off) /* arm rcv interrupt */
+ ipath_write_ureg(dd, ur_rcvhdrhead,
+ dd->ipath_rhdrhead_intr_off | head,
+ pd->port_port);
+
+ poll_wait(fp, &pd->port_wait, pt);
+ }
+
+ return pollflag;
+}
+
+static unsigned int ipath_poll(struct file *fp,
+ struct poll_table_struct *pt)
+{
+ struct ipath_portdata *pd;
+ unsigned pollflag;
+
+ pd = port_fp(fp);
+ if (!pd)
+ pollflag = 0;
+ else if (pd->poll_type & IPATH_POLL_TYPE_URGENT)
+ pollflag = ipath_poll_urgent(pd, fp, pt);
+ else
+ pollflag = ipath_poll_next(pd, fp, pt);
+
+ return pollflag;
+}
+
+static int ipath_supports_subports(int user_swmajor, int user_swminor)
+{
+ /* no subport implementation prior to software version 1.3 */
+ return (user_swmajor > 1) || (user_swminor >= 3);
+}
+
+static int ipath_compatible_subports(int user_swmajor, int user_swminor)
+{
+ /* this code is written long-hand for clarity */
+ if (IPATH_USER_SWMAJOR != user_swmajor) {
+ /* no promise of compatibility if major mismatch */
+ return 0;
+ }
+ if (IPATH_USER_SWMAJOR == 1) {
+ switch (IPATH_USER_SWMINOR) {
+ case 0:
+ case 1:
+ case 2:
+ /* no subport implementation so cannot be compatible */
+ return 0;
+ case 3:
+ /* 3 is only compatible with itself */
+ return user_swminor == 3;
+ default:
+ /* >= 4 are compatible (or are expected to be) */
+ return user_swminor >= 4;
+ }
+ }
+ /* make no promises yet for future major versions */
+ return 0;
+}
+
+static int init_subports(struct ipath_devdata *dd,
+ struct ipath_portdata *pd,
+ const struct ipath_user_info *uinfo)
+{
+ int ret = 0;
+ unsigned num_subports;
+ size_t size;
+
+ /*
+ * If the user is requesting zero subports,
+ * skip the subport allocation.
+ */
+ if (uinfo->spu_subport_cnt <= 0)
+ goto bail;
+
+ /* Self-consistency check for ipath_compatible_subports() */
+ if (ipath_supports_subports(IPATH_USER_SWMAJOR, IPATH_USER_SWMINOR) &&
+ !ipath_compatible_subports(IPATH_USER_SWMAJOR,
+ IPATH_USER_SWMINOR)) {
+ dev_info(&dd->pcidev->dev,
+ "Inconsistent ipath_compatible_subports()\n");
+ goto bail;
+ }
+
+ /* Check for subport compatibility */
+ if (!ipath_compatible_subports(uinfo->spu_userversion >> 16,
+ uinfo->spu_userversion & 0xffff)) {
+ dev_info(&dd->pcidev->dev,
+ "Mismatched user version (%d.%d) and driver "
+ "version (%d.%d) while port sharing. Ensure "
+ "that driver and library are from the same "
+ "release.\n",
+ (int) (uinfo->spu_userversion >> 16),
+ (int) (uinfo->spu_userversion & 0xffff),
+ IPATH_USER_SWMAJOR,
+ IPATH_USER_SWMINOR);
+ goto bail;
+ }
+ if (uinfo->spu_subport_cnt > INFINIPATH_MAX_SUBPORT) {
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ num_subports = uinfo->spu_subport_cnt;
+ pd->subport_uregbase = vmalloc(PAGE_SIZE * num_subports);
+ if (!pd->subport_uregbase) {
+ ret = -ENOMEM;
+ goto bail;
+ }
+ /* Note: pd->port_rcvhdrq_size isn't initialized yet. */
+ size = ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize *
+ sizeof(u32), PAGE_SIZE) * num_subports;
+ pd->subport_rcvhdr_base = vmalloc(size);
+ if (!pd->subport_rcvhdr_base) {
+ ret = -ENOMEM;
+ goto bail_ureg;
+ }
+
+ pd->subport_rcvegrbuf = vmalloc(pd->port_rcvegrbuf_chunks *
+ pd->port_rcvegrbuf_size *
+ num_subports);
+ if (!pd->subport_rcvegrbuf) {
+ ret = -ENOMEM;
+ goto bail_rhdr;
+ }
+
+ pd->port_subport_cnt = uinfo->spu_subport_cnt;
+ pd->port_subport_id = uinfo->spu_subport_id;
+ pd->active_slaves = 1;
+ set_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag);
+ memset(pd->subport_uregbase, 0, PAGE_SIZE * num_subports);
+ memset(pd->subport_rcvhdr_base, 0, size);
+ memset(pd->subport_rcvegrbuf, 0, pd->port_rcvegrbuf_chunks *
+ pd->port_rcvegrbuf_size *
+ num_subports);
+ goto bail;
+
+bail_rhdr:
+ vfree(pd->subport_rcvhdr_base);
+bail_ureg:
+ vfree(pd->subport_uregbase);
+ pd->subport_uregbase = NULL;
+bail:
+ return ret;
+}
+
+static int try_alloc_port(struct ipath_devdata *dd, int port,
+ struct file *fp,
+ const struct ipath_user_info *uinfo)
+{
+ struct ipath_portdata *pd;
+ int ret;
+
+ if (!(pd = dd->ipath_pd[port])) {
+ void *ptmp;
+
+ pd = kzalloc(sizeof(struct ipath_portdata), GFP_KERNEL);
+
+ /*
+ * Allocate memory for use in ipath_tid_update() just once
+ * at open, not per call. Reduces cost of expected send
+ * setup.
+ */
+ ptmp = kmalloc(dd->ipath_rcvtidcnt * sizeof(u16) +
+ dd->ipath_rcvtidcnt * sizeof(struct page **),
+ GFP_KERNEL);
+ if (!pd || !ptmp) {
+ ipath_dev_err(dd, "Unable to allocate portdata "
+ "memory, failing open\n");
+ ret = -ENOMEM;
+ kfree(pd);
+ kfree(ptmp);
+ goto bail;
+ }
+ dd->ipath_pd[port] = pd;
+ dd->ipath_pd[port]->port_port = port;
+ dd->ipath_pd[port]->port_dd = dd;
+ dd->ipath_pd[port]->port_tid_pg_list = ptmp;
+ init_waitqueue_head(&dd->ipath_pd[port]->port_wait);
+ }
+ if (!pd->port_cnt) {
+ pd->userversion = uinfo->spu_userversion;
+ init_user_egr_sizes(pd);
+ if ((ret = init_subports(dd, pd, uinfo)) != 0)
+ goto bail;
+ ipath_cdbg(PROC, "%s[%u] opened unit:port %u:%u\n",
+ current->comm, current->pid, dd->ipath_unit,
+ port);
+ pd->port_cnt = 1;
+ port_fp(fp) = pd;
+ pd->port_pid = get_pid(task_pid(current));
+ strncpy(pd->port_comm, current->comm, sizeof(pd->port_comm));
+ ipath_stats.sps_ports++;
+ ret = 0;
+ } else
+ ret = -EBUSY;
+
+bail:
+ return ret;
+}
+
+static inline int usable(struct ipath_devdata *dd)
+{
+ return dd &&
+ (dd->ipath_flags & IPATH_PRESENT) &&
+ dd->ipath_kregbase &&
+ dd->ipath_lid &&
+ !(dd->ipath_flags & (IPATH_LINKDOWN | IPATH_DISABLED
+ | IPATH_LINKUNK));
+}
+
+static int find_free_port(int unit, struct file *fp,
+ const struct ipath_user_info *uinfo)
+{
+ struct ipath_devdata *dd = ipath_lookup(unit);
+ int ret, i;
+
+ if (!dd) {
+ ret = -ENODEV;
+ goto bail;
+ }
+
+ if (!usable(dd)) {
+ ret = -ENETDOWN;
+ goto bail;
+ }
+
+ for (i = 1; i < dd->ipath_cfgports; i++) {
+ ret = try_alloc_port(dd, i, fp, uinfo);
+ if (ret != -EBUSY)
+ goto bail;
+ }
+ ret = -EBUSY;
+
+bail:
+ return ret;
+}
+
+static int find_best_unit(struct file *fp,
+ const struct ipath_user_info *uinfo)
+{
+ int ret = 0, i, prefunit = -1, devmax;
+ int maxofallports, npresent, nup;
+ int ndev;
+
+ devmax = ipath_count_units(&npresent, &nup, &maxofallports);
+
+ /*
+ * This code is present to allow a knowledgeable person to
+ * specify the layout of processes to processors before opening
+ * this driver, and then we'll assign the process to the "closest"
+ * InfiniPath chip to that processor (we assume reasonable connectivity,
+ * for now). This code assumes that if affinity has been set
+ * before this point, that at most one cpu is set; for now this
+ * is reasonable. I check for both cpus_empty() and cpus_full(),
+ * in case some kernel variant sets none of the bits when no
+ * affinity is set. 2.6.11 and 12 kernels have all present
+ * cpus set. Some day we'll have to fix it up further to handle
+ * a cpu subset. This algorithm fails for two HT chips connected
+ * in tunnel fashion. Eventually this needs real topology
+ * information. There may be some issues with dual core numbering
+ * as well. This needs more work prior to release.
+ */
+ if (!cpus_empty(current->cpus_allowed) &&
+ !cpus_full(current->cpus_allowed)) {
+ int ncpus = num_online_cpus(), curcpu = -1, nset = 0;
+ for (i = 0; i < ncpus; i++)
+ if (cpu_isset(i, current->cpus_allowed)) {
+ ipath_cdbg(PROC, "%s[%u] affinity set for "
+ "cpu %d/%d\n", current->comm,
+ current->pid, i, ncpus);
+ curcpu = i;
+ nset++;
+ }
+ if (curcpu != -1 && nset != ncpus) {
+ if (npresent) {
+ prefunit = curcpu / (ncpus / npresent);
+ ipath_cdbg(PROC,"%s[%u] %d chips, %d cpus, "
+ "%d cpus/chip, select unit %d\n",
+ current->comm, current->pid,
+ npresent, ncpus, ncpus / npresent,
+ prefunit);
+ }
+ }
+ }
+
+ /*
+ * user ports start at 1, kernel port is 0
+ * For now, we do round-robin access across all chips
+ */
+
+ if (prefunit != -1)
+ devmax = prefunit + 1;
+recheck:
+ for (i = 1; i < maxofallports; i++) {
+ for (ndev = prefunit != -1 ? prefunit : 0; ndev < devmax;
+ ndev++) {
+ struct ipath_devdata *dd = ipath_lookup(ndev);
+
+ if (!usable(dd))
+ continue; /* can't use this unit */
+ if (i >= dd->ipath_cfgports)
+ /*
+ * Maxed out on users of this unit. Try
+ * next.
+ */
+ continue;
+ ret = try_alloc_port(dd, i, fp, uinfo);
+ if (!ret)
+ goto done;
+ }
+ }
+
+ if (npresent) {
+ if (nup == 0) {
+ ret = -ENETDOWN;
+ ipath_dbg("No ports available (none initialized "
+ "and ready)\n");
+ } else {
+ if (prefunit > 0) {
+ /* if started above 0, retry from 0 */
+ ipath_cdbg(PROC,
+ "%s[%u] no ports on prefunit "
+ "%d, clear and re-check\n",
+ current->comm, current->pid,
+ prefunit);
+ devmax = ipath_count_units(NULL, NULL,
+ NULL);
+ prefunit = -1;
+ goto recheck;
+ }
+ ret = -EBUSY;
+ ipath_dbg("No ports available\n");
+ }
+ } else {
+ ret = -ENXIO;
+ ipath_dbg("No boards found\n");
+ }
+
+done:
+ return ret;
+}
+
+static int find_shared_port(struct file *fp,
+ const struct ipath_user_info *uinfo)
+{
+ int devmax, ndev, i;
+ int ret = 0;
+
+ devmax = ipath_count_units(NULL, NULL, NULL);
+
+ for (ndev = 0; ndev < devmax; ndev++) {
+ struct ipath_devdata *dd = ipath_lookup(ndev);
+
+ if (!usable(dd))
+ continue;
+ for (i = 1; i < dd->ipath_cfgports; i++) {
+ struct ipath_portdata *pd = dd->ipath_pd[i];
+
+ /* Skip ports which are not yet open */
+ if (!pd || !pd->port_cnt)
+ continue;
+ /* Skip port if it doesn't match the requested one */
+ if (pd->port_subport_id != uinfo->spu_subport_id)
+ continue;
+ /* Verify the sharing process matches the master */
+ if (pd->port_subport_cnt != uinfo->spu_subport_cnt ||
+ pd->userversion != uinfo->spu_userversion ||
+ pd->port_cnt >= pd->port_subport_cnt) {
+ ret = -EINVAL;
+ goto done;
+ }
+ port_fp(fp) = pd;
+ subport_fp(fp) = pd->port_cnt++;
+ pd->port_subpid[subport_fp(fp)] =
+ get_pid(task_pid(current));
+ tidcursor_fp(fp) = 0;
+ pd->active_slaves |= 1 << subport_fp(fp);
+ ipath_cdbg(PROC,
+ "%s[%u] %u sharing %s[%u] unit:port %u:%u\n",
+ current->comm, current->pid,
+ subport_fp(fp),
+ pd->port_comm, pid_nr(pd->port_pid),
+ dd->ipath_unit, pd->port_port);
+ ret = 1;
+ goto done;
+ }
+ }
+
+done:
+ return ret;
+}
+
+static int ipath_open(struct inode *in, struct file *fp)
+{
+ /* The real work is performed later in ipath_assign_port() */
+ cycle_kernel_lock();
+ fp->private_data = kzalloc(sizeof(struct ipath_filedata), GFP_KERNEL);
+ return fp->private_data ? 0 : -ENOMEM;
+}
+
+/* Get port early, so can set affinity prior to memory allocation */
+static int ipath_assign_port(struct file *fp,
+ const struct ipath_user_info *uinfo)
+{
+ int ret;
+ int i_minor;
+ unsigned swmajor, swminor;
+
+ /* Check to be sure we haven't already initialized this file */
+ if (port_fp(fp)) {
+ ret = -EINVAL;
+ goto done;
+ }
+
+ /* for now, if major version is different, bail */
+ swmajor = uinfo->spu_userversion >> 16;
+ if (swmajor != IPATH_USER_SWMAJOR) {
+ ipath_dbg("User major version %d not same as driver "
+ "major %d\n", uinfo->spu_userversion >> 16,
+ IPATH_USER_SWMAJOR);
+ ret = -ENODEV;
+ goto done;
+ }
+
+ swminor = uinfo->spu_userversion & 0xffff;
+ if (swminor != IPATH_USER_SWMINOR)
+ ipath_dbg("User minor version %d not same as driver "
+ "minor %d\n", swminor, IPATH_USER_SWMINOR);
+
+ mutex_lock(&ipath_mutex);
+
+ if (ipath_compatible_subports(swmajor, swminor) &&
+ uinfo->spu_subport_cnt &&
+ (ret = find_shared_port(fp, uinfo))) {
+ if (ret > 0)
+ ret = 0;
+ goto done_chk_sdma;
+ }
+
+ i_minor = iminor(fp->f_path.dentry->d_inode) - IPATH_USER_MINOR_BASE;
+ ipath_cdbg(VERBOSE, "open on dev %lx (minor %d)\n",
+ (long)fp->f_path.dentry->d_inode->i_rdev, i_minor);
+
+ if (i_minor)
+ ret = find_free_port(i_minor - 1, fp, uinfo);
+ else
+ ret = find_best_unit(fp, uinfo);
+
+done_chk_sdma:
+ if (!ret) {
+ struct ipath_filedata *fd = fp->private_data;
+ const struct ipath_portdata *pd = fd->pd;
+ const struct ipath_devdata *dd = pd->port_dd;
+
+ fd->pq = ipath_user_sdma_queue_create(&dd->pcidev->dev,
+ dd->ipath_unit,
+ pd->port_port,
+ fd->subport);
+
+ if (!fd->pq)
+ ret = -ENOMEM;
+ }
+
+ mutex_unlock(&ipath_mutex);
+
+done:
+ return ret;
+}
+
+
+static int ipath_do_user_init(struct file *fp,
+ const struct ipath_user_info *uinfo)
+{
+ int ret;
+ struct ipath_portdata *pd = port_fp(fp);
+ struct ipath_devdata *dd;
+ u32 head32;
+
+ /* Subports don't need to initialize anything since master did it. */
+ if (subport_fp(fp)) {
+ ret = wait_event_interruptible(pd->port_wait,
+ !test_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag));
+ goto done;
+ }
+
+ dd = pd->port_dd;
+
+ if (uinfo->spu_rcvhdrsize) {
+ ret = ipath_setrcvhdrsize(dd, uinfo->spu_rcvhdrsize);
+ if (ret)
+ goto done;
+ }
+
+ /* for now we do nothing with rcvhdrcnt: uinfo->spu_rcvhdrcnt */
+
+ /* some ports may get extra buffers, calculate that here */
+ if (pd->port_port <= dd->ipath_ports_extrabuf)
+ pd->port_piocnt = dd->ipath_pbufsport + 1;
+ else
+ pd->port_piocnt = dd->ipath_pbufsport;
+
+ /* for right now, kernel piobufs are at end, so port 1 is at 0 */
+ if (pd->port_port <= dd->ipath_ports_extrabuf)
+ pd->port_pio_base = (dd->ipath_pbufsport + 1)
+ * (pd->port_port - 1);
+ else
+ pd->port_pio_base = dd->ipath_ports_extrabuf +
+ dd->ipath_pbufsport * (pd->port_port - 1);
+ pd->port_piobufs = dd->ipath_piobufbase +
+ pd->port_pio_base * dd->ipath_palign;
+ ipath_cdbg(VERBOSE, "piobuf base for port %u is 0x%x, piocnt %u,"
+ " first pio %u\n", pd->port_port, pd->port_piobufs,
+ pd->port_piocnt, pd->port_pio_base);
+ ipath_chg_pioavailkernel(dd, pd->port_pio_base, pd->port_piocnt, 0);
+
+ /*
+ * Now allocate the rcvhdr Q and eager TIDs; skip the TID
+ * array for time being. If pd->port_port > chip-supported,
+ * we need to do extra stuff here to handle by handling overflow
+ * through port 0, someday
+ */
+ ret = ipath_create_rcvhdrq(dd, pd);
+ if (!ret)
+ ret = ipath_create_user_egr(pd);
+ if (ret)
+ goto done;
+
+ /*
+ * set the eager head register for this port to the current values
+ * of the tail pointers, since we don't know if they were
+ * updated on last use of the port.
+ */
+ head32 = ipath_read_ureg32(dd, ur_rcvegrindextail, pd->port_port);
+ ipath_write_ureg(dd, ur_rcvegrindexhead, head32, pd->port_port);
+ pd->port_lastrcvhdrqtail = -1;
+ ipath_cdbg(VERBOSE, "Wrote port%d egrhead %x from tail regs\n",
+ pd->port_port, head32);
+ pd->port_tidcursor = 0; /* start at beginning after open */
+
+ /* initialize poll variables... */
+ pd->port_urgent = 0;
+ pd->port_urgent_poll = 0;
+ pd->port_hdrqfull_poll = pd->port_hdrqfull;
+
+ /*
+ * Now enable the port for receive.
+ * For chips that are set to DMA the tail register to memory
+ * when they change (and when the update bit transitions from
+ * 0 to 1. So for those chips, we turn it off and then back on.
+ * This will (very briefly) affect any other open ports, but the
+ * duration is very short, and therefore isn't an issue. We
+ * explictly set the in-memory tail copy to 0 beforehand, so we
+ * don't have to wait to be sure the DMA update has happened
+ * (chip resets head/tail to 0 on transition to enable).
+ */
+ set_bit(dd->ipath_r_portenable_shift + pd->port_port,
+ &dd->ipath_rcvctrl);
+ if (!(dd->ipath_flags & IPATH_NODMA_RTAIL)) {
+ if (pd->port_rcvhdrtail_kvaddr)
+ ipath_clear_rcvhdrtail(pd);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
+ dd->ipath_rcvctrl &
+ ~(1ULL << dd->ipath_r_tailupd_shift));
+ }
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
+ dd->ipath_rcvctrl);
+ /* Notify any waiting slaves */
+ if (pd->port_subport_cnt) {
+ clear_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag);
+ wake_up(&pd->port_wait);
+ }
+done:
+ return ret;
+}
+
+/**
+ * unlock_exptid - unlock any expected TID entries port still had in use
+ * @pd: port
+ *
+ * We don't actually update the chip here, because we do a bulk update
+ * below, using ipath_f_clear_tids.
+ */
+static void unlock_expected_tids(struct ipath_portdata *pd)
+{
+ struct ipath_devdata *dd = pd->port_dd;
+ int port_tidbase = pd->port_port * dd->ipath_rcvtidcnt;
+ int i, cnt = 0, maxtid = port_tidbase + dd->ipath_rcvtidcnt;
+
+ ipath_cdbg(VERBOSE, "Port %u unlocking any locked expTID pages\n",
+ pd->port_port);
+ for (i = port_tidbase; i < maxtid; i++) {
+ struct page *ps = dd->ipath_pageshadow[i];
+
+ if (!ps)
+ continue;
+
+ dd->ipath_pageshadow[i] = NULL;
+ pci_unmap_page(dd->pcidev, dd->ipath_physshadow[i],
+ PAGE_SIZE, PCI_DMA_FROMDEVICE);
+ ipath_release_user_pages_on_close(&ps, 1);
+ cnt++;
+ ipath_stats.sps_pageunlocks++;
+ }
+ if (cnt)
+ ipath_cdbg(VERBOSE, "Port %u locked %u expTID entries\n",
+ pd->port_port, cnt);
+
+ if (ipath_stats.sps_pagelocks || ipath_stats.sps_pageunlocks)
+ ipath_cdbg(VERBOSE, "%llu pages locked, %llu unlocked\n",
+ (unsigned long long) ipath_stats.sps_pagelocks,
+ (unsigned long long)
+ ipath_stats.sps_pageunlocks);
+}
+
+static int ipath_close(struct inode *in, struct file *fp)
+{
+ int ret = 0;
+ struct ipath_filedata *fd;
+ struct ipath_portdata *pd;
+ struct ipath_devdata *dd;
+ unsigned port;
+
+ ipath_cdbg(VERBOSE, "close on dev %lx, private data %p\n",
+ (long)in->i_rdev, fp->private_data);
+
+ mutex_lock(&ipath_mutex);
+
+ fd = (struct ipath_filedata *) fp->private_data;
+ fp->private_data = NULL;
+ pd = fd->pd;
+ if (!pd) {
+ mutex_unlock(&ipath_mutex);
+ goto bail;
+ }
+
+ dd = pd->port_dd;
+
+ /* drain user sdma queue */
+ ipath_user_sdma_queue_drain(dd, fd->pq);
+ ipath_user_sdma_queue_destroy(fd->pq);
+
+ if (--pd->port_cnt) {
+ /*
+ * XXX If the master closes the port before the slave(s),
+ * revoke the mmap for the eager receive queue so
+ * the slave(s) don't wait for receive data forever.
+ */
+ pd->active_slaves &= ~(1 << fd->subport);
+ put_pid(pd->port_subpid[fd->subport]);
+ pd->port_subpid[fd->subport] = NULL;
+ mutex_unlock(&ipath_mutex);
+ goto bail;
+ }
+ port = pd->port_port;
+
+ if (pd->port_hdrqfull) {
+ ipath_cdbg(PROC, "%s[%u] had %u rcvhdrqfull errors "
+ "during run\n", pd->port_comm, pid_nr(pd->port_pid),
+ pd->port_hdrqfull);
+ pd->port_hdrqfull = 0;
+ }
+
+ if (pd->port_rcvwait_to || pd->port_piowait_to
+ || pd->port_rcvnowait || pd->port_pionowait) {
+ ipath_cdbg(VERBOSE, "port%u, %u rcv, %u pio wait timeo; "
+ "%u rcv %u, pio already\n",
+ pd->port_port, pd->port_rcvwait_to,
+ pd->port_piowait_to, pd->port_rcvnowait,
+ pd->port_pionowait);
+ pd->port_rcvwait_to = pd->port_piowait_to =
+ pd->port_rcvnowait = pd->port_pionowait = 0;
+ }
+ if (pd->port_flag) {
+ ipath_cdbg(PROC, "port %u port_flag set: 0x%lx\n",
+ pd->port_port, pd->port_flag);
+ pd->port_flag = 0;
+ }
+
+ if (dd->ipath_kregbase) {
+ /* atomically clear receive enable port and intr avail. */
+ clear_bit(dd->ipath_r_portenable_shift + port,
+ &dd->ipath_rcvctrl);
+ clear_bit(pd->port_port + dd->ipath_r_intravail_shift,
+ &dd->ipath_rcvctrl);
+ ipath_write_kreg( dd, dd->ipath_kregs->kr_rcvctrl,
+ dd->ipath_rcvctrl);
+ /* and read back from chip to be sure that nothing
+ * else is in flight when we do the rest */
+ (void)ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+
+ /* clean up the pkeys for this port user */
+ ipath_clean_part_key(pd, dd);
+ /*
+ * be paranoid, and never write 0's to these, just use an
+ * unused part of the port 0 tail page. Of course,
+ * rcvhdraddr points to a large chunk of memory, so this
+ * could still trash things, but at least it won't trash
+ * page 0, and by disabling the port, it should stop "soon",
+ * even if a packet or two is in already in flight after we
+ * disabled the port.
+ */
+ ipath_write_kreg_port(dd,
+ dd->ipath_kregs->kr_rcvhdrtailaddr, port,
+ dd->ipath_dummy_hdrq_phys);
+ ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdraddr,
+ pd->port_port, dd->ipath_dummy_hdrq_phys);
+
+ ipath_disarm_piobufs(dd, pd->port_pio_base, pd->port_piocnt);
+ ipath_chg_pioavailkernel(dd, pd->port_pio_base,
+ pd->port_piocnt, 1);
+
+ dd->ipath_f_clear_tids(dd, pd->port_port);
+
+ if (dd->ipath_pageshadow)
+ unlock_expected_tids(pd);
+ ipath_stats.sps_ports--;
+ ipath_cdbg(PROC, "%s[%u] closed port %u:%u\n",
+ pd->port_comm, pid_nr(pd->port_pid),
+ dd->ipath_unit, port);
+ }
+
+ put_pid(pd->port_pid);
+ pd->port_pid = NULL;
+ dd->ipath_pd[pd->port_port] = NULL; /* before releasing mutex */
+ mutex_unlock(&ipath_mutex);
+ ipath_free_pddata(dd, pd); /* after releasing the mutex */
+
+bail:
+ kfree(fd);
+ return ret;
+}
+
+static int ipath_port_info(struct ipath_portdata *pd, u16 subport,
+ struct ipath_port_info __user *uinfo)
+{
+ struct ipath_port_info info;
+ int nup;
+ int ret;
+ size_t sz;
+
+ (void) ipath_count_units(NULL, &nup, NULL);
+ info.num_active = nup;
+ info.unit = pd->port_dd->ipath_unit;
+ info.port = pd->port_port;
+ info.subport = subport;
+ /* Don't return new fields if old library opened the port. */
+ if (ipath_supports_subports(pd->userversion >> 16,
+ pd->userversion & 0xffff)) {
+ /* Number of user ports available for this device. */
+ info.num_ports = pd->port_dd->ipath_cfgports - 1;
+ info.num_subports = pd->port_subport_cnt;
+ sz = sizeof(info);
+ } else
+ sz = sizeof(info) - 2 * sizeof(u16);
+
+ if (copy_to_user(uinfo, &info, sz)) {
+ ret = -EFAULT;
+ goto bail;
+ }
+ ret = 0;
+
+bail:
+ return ret;
+}
+
+static int ipath_get_slave_info(struct ipath_portdata *pd,
+ void __user *slave_mask_addr)
+{
+ int ret = 0;
+
+ if (copy_to_user(slave_mask_addr, &pd->active_slaves, sizeof(u32)))
+ ret = -EFAULT;
+ return ret;
+}
+
+static int ipath_sdma_get_inflight(struct ipath_user_sdma_queue *pq,
+ u32 __user *inflightp)
+{
+ const u32 val = ipath_user_sdma_inflight_counter(pq);
+
+ if (put_user(val, inflightp))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int ipath_sdma_get_complete(struct ipath_devdata *dd,
+ struct ipath_user_sdma_queue *pq,
+ u32 __user *completep)
+{
+ u32 val;
+ int err;
+
+ err = ipath_user_sdma_make_progress(dd, pq);
+ if (err < 0)
+ return err;
+
+ val = ipath_user_sdma_complete_counter(pq);
+ if (put_user(val, completep))
+ return -EFAULT;
+
+ return 0;
+}
+
+static ssize_t ipath_write(struct file *fp, const char __user *data,
+ size_t count, loff_t *off)
+{
+ const struct ipath_cmd __user *ucmd;
+ struct ipath_portdata *pd;
+ const void __user *src;
+ size_t consumed, copy;
+ struct ipath_cmd cmd;
+ ssize_t ret = 0;
+ void *dest;
+
+ if (count < sizeof(cmd.type)) {
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ ucmd = (const struct ipath_cmd __user *) data;
+
+ if (copy_from_user(&cmd.type, &ucmd->type, sizeof(cmd.type))) {
+ ret = -EFAULT;
+ goto bail;
+ }
+
+ consumed = sizeof(cmd.type);
+
+ switch (cmd.type) {
+ case IPATH_CMD_ASSIGN_PORT:
+ case __IPATH_CMD_USER_INIT:
+ case IPATH_CMD_USER_INIT:
+ copy = sizeof(cmd.cmd.user_info);
+ dest = &cmd.cmd.user_info;
+ src = &ucmd->cmd.user_info;
+ break;
+ case IPATH_CMD_RECV_CTRL:
+ copy = sizeof(cmd.cmd.recv_ctrl);
+ dest = &cmd.cmd.recv_ctrl;
+ src = &ucmd->cmd.recv_ctrl;
+ break;
+ case IPATH_CMD_PORT_INFO:
+ copy = sizeof(cmd.cmd.port_info);
+ dest = &cmd.cmd.port_info;
+ src = &ucmd->cmd.port_info;
+ break;
+ case IPATH_CMD_TID_UPDATE:
+ case IPATH_CMD_TID_FREE:
+ copy = sizeof(cmd.cmd.tid_info);
+ dest = &cmd.cmd.tid_info;
+ src = &ucmd->cmd.tid_info;
+ break;
+ case IPATH_CMD_SET_PART_KEY:
+ copy = sizeof(cmd.cmd.part_key);
+ dest = &cmd.cmd.part_key;
+ src = &ucmd->cmd.part_key;
+ break;
+ case __IPATH_CMD_SLAVE_INFO:
+ copy = sizeof(cmd.cmd.slave_mask_addr);
+ dest = &cmd.cmd.slave_mask_addr;
+ src = &ucmd->cmd.slave_mask_addr;
+ break;
+ case IPATH_CMD_PIOAVAILUPD: // force an update of PIOAvail reg
+ copy = 0;
+ src = NULL;
+ dest = NULL;
+ break;
+ case IPATH_CMD_POLL_TYPE:
+ copy = sizeof(cmd.cmd.poll_type);
+ dest = &cmd.cmd.poll_type;
+ src = &ucmd->cmd.poll_type;
+ break;
+ case IPATH_CMD_ARMLAUNCH_CTRL:
+ copy = sizeof(cmd.cmd.armlaunch_ctrl);
+ dest = &cmd.cmd.armlaunch_ctrl;
+ src = &ucmd->cmd.armlaunch_ctrl;
+ break;
+ case IPATH_CMD_SDMA_INFLIGHT:
+ copy = sizeof(cmd.cmd.sdma_inflight);
+ dest = &cmd.cmd.sdma_inflight;
+ src = &ucmd->cmd.sdma_inflight;
+ break;
+ case IPATH_CMD_SDMA_COMPLETE:
+ copy = sizeof(cmd.cmd.sdma_complete);
+ dest = &cmd.cmd.sdma_complete;
+ src = &ucmd->cmd.sdma_complete;
+ break;
+ default:
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ if (copy) {
+ if ((count - consumed) < copy) {
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ if (copy_from_user(dest, src, copy)) {
+ ret = -EFAULT;
+ goto bail;
+ }
+
+ consumed += copy;
+ }
+
+ pd = port_fp(fp);
+ if (!pd && cmd.type != __IPATH_CMD_USER_INIT &&
+ cmd.type != IPATH_CMD_ASSIGN_PORT) {
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ switch (cmd.type) {
+ case IPATH_CMD_ASSIGN_PORT:
+ ret = ipath_assign_port(fp, &cmd.cmd.user_info);
+ if (ret)
+ goto bail;
+ break;
+ case __IPATH_CMD_USER_INIT:
+ /* backwards compatibility, get port first */
+ ret = ipath_assign_port(fp, &cmd.cmd.user_info);
+ if (ret)
+ goto bail;
+ /* and fall through to current version. */
+ case IPATH_CMD_USER_INIT:
+ ret = ipath_do_user_init(fp, &cmd.cmd.user_info);
+ if (ret)
+ goto bail;
+ ret = ipath_get_base_info(
+ fp, (void __user *) (unsigned long)
+ cmd.cmd.user_info.spu_base_info,
+ cmd.cmd.user_info.spu_base_info_size);
+ break;
+ case IPATH_CMD_RECV_CTRL:
+ ret = ipath_manage_rcvq(pd, subport_fp(fp), cmd.cmd.recv_ctrl);
+ break;
+ case IPATH_CMD_PORT_INFO:
+ ret = ipath_port_info(pd, subport_fp(fp),
+ (struct ipath_port_info __user *)
+ (unsigned long) cmd.cmd.port_info);
+ break;
+ case IPATH_CMD_TID_UPDATE:
+ ret = ipath_tid_update(pd, fp, &cmd.cmd.tid_info);
+ break;
+ case IPATH_CMD_TID_FREE:
+ ret = ipath_tid_free(pd, subport_fp(fp), &cmd.cmd.tid_info);
+ break;
+ case IPATH_CMD_SET_PART_KEY:
+ ret = ipath_set_part_key(pd, cmd.cmd.part_key);
+ break;
+ case __IPATH_CMD_SLAVE_INFO:
+ ret = ipath_get_slave_info(pd,
+ (void __user *) (unsigned long)
+ cmd.cmd.slave_mask_addr);
+ break;
+ case IPATH_CMD_PIOAVAILUPD:
+ ipath_force_pio_avail_update(pd->port_dd);
+ break;
+ case IPATH_CMD_POLL_TYPE:
+ pd->poll_type = cmd.cmd.poll_type;
+ break;
+ case IPATH_CMD_ARMLAUNCH_CTRL:
+ if (cmd.cmd.armlaunch_ctrl)
+ ipath_enable_armlaunch(pd->port_dd);
+ else
+ ipath_disable_armlaunch(pd->port_dd);
+ break;
+ case IPATH_CMD_SDMA_INFLIGHT:
+ ret = ipath_sdma_get_inflight(user_sdma_queue_fp(fp),
+ (u32 __user *) (unsigned long)
+ cmd.cmd.sdma_inflight);
+ break;
+ case IPATH_CMD_SDMA_COMPLETE:
+ ret = ipath_sdma_get_complete(pd->port_dd,
+ user_sdma_queue_fp(fp),
+ (u32 __user *) (unsigned long)
+ cmd.cmd.sdma_complete);
+ break;
+ }
+
+ if (ret >= 0)
+ ret = consumed;
+
+bail:
+ return ret;
+}
+
+static ssize_t ipath_writev(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long dim, loff_t off)
+{
+ struct file *filp = iocb->ki_filp;
+ struct ipath_filedata *fp = filp->private_data;
+ struct ipath_portdata *pd = port_fp(filp);
+ struct ipath_user_sdma_queue *pq = fp->pq;
+
+ if (!dim)
+ return -EINVAL;
+
+ return ipath_user_sdma_writev(pd->port_dd, pq, iov, dim);
+}
+
+static struct class *ipath_class;
+
+static int init_cdev(int minor, char *name, const struct file_operations *fops,
+ struct cdev **cdevp, struct device **devp)
+{
+ const dev_t dev = MKDEV(IPATH_MAJOR, minor);
+ struct cdev *cdev = NULL;
+ struct device *device = NULL;
+ int ret;
+
+ cdev = cdev_alloc();
+ if (!cdev) {
+ printk(KERN_ERR IPATH_DRV_NAME
+ ": Could not allocate cdev for minor %d, %s\n",
+ minor, name);
+ ret = -ENOMEM;
+ goto done;
+ }
+
+ cdev->owner = THIS_MODULE;
+ cdev->ops = fops;
+ kobject_set_name(&cdev->kobj, name);
+
+ ret = cdev_add(cdev, dev, 1);
+ if (ret < 0) {
+ printk(KERN_ERR IPATH_DRV_NAME
+ ": Could not add cdev for minor %d, %s (err %d)\n",
+ minor, name, -ret);
+ goto err_cdev;
+ }
+
+ device = device_create(ipath_class, NULL, dev, NULL, name);
+
+ if (IS_ERR(device)) {
+ ret = PTR_ERR(device);
+ printk(KERN_ERR IPATH_DRV_NAME ": Could not create "
+ "device for minor %d, %s (err %d)\n",
+ minor, name, -ret);
+ goto err_cdev;
+ }
+
+ goto done;
+
+err_cdev:
+ cdev_del(cdev);
+ cdev = NULL;
+
+done:
+ if (ret >= 0) {
+ *cdevp = cdev;
+ *devp = device;
+ } else {
+ *cdevp = NULL;
+ *devp = NULL;
+ }
+
+ return ret;
+}
+
+int ipath_cdev_init(int minor, char *name, const struct file_operations *fops,
+ struct cdev **cdevp, struct device **devp)
+{
+ return init_cdev(minor, name, fops, cdevp, devp);
+}
+
+static void cleanup_cdev(struct cdev **cdevp,
+ struct device **devp)
+{
+ struct device *dev = *devp;
+
+ if (dev) {
+ device_unregister(dev);
+ *devp = NULL;
+ }
+
+ if (*cdevp) {
+ cdev_del(*cdevp);
+ *cdevp = NULL;
+ }
+}
+
+void ipath_cdev_cleanup(struct cdev **cdevp,
+ struct device **devp)
+{
+ cleanup_cdev(cdevp, devp);
+}
+
+static struct cdev *wildcard_cdev;
+static struct device *wildcard_dev;
+
+static const dev_t dev = MKDEV(IPATH_MAJOR, 0);
+
+static int user_init(void)
+{
+ int ret;
+
+ ret = register_chrdev_region(dev, IPATH_NMINORS, IPATH_DRV_NAME);
+ if (ret < 0) {
+ printk(KERN_ERR IPATH_DRV_NAME ": Could not register "
+ "chrdev region (err %d)\n", -ret);
+ goto done;
+ }
+
+ ipath_class = class_create(THIS_MODULE, IPATH_DRV_NAME);
+
+ if (IS_ERR(ipath_class)) {
+ ret = PTR_ERR(ipath_class);
+ printk(KERN_ERR IPATH_DRV_NAME ": Could not create "
+ "device class (err %d)\n", -ret);
+ goto bail;
+ }
+
+ goto done;
+bail:
+ unregister_chrdev_region(dev, IPATH_NMINORS);
+done:
+ return ret;
+}
+
+static void user_cleanup(void)
+{
+ if (ipath_class) {
+ class_destroy(ipath_class);
+ ipath_class = NULL;
+ }
+
+ unregister_chrdev_region(dev, IPATH_NMINORS);
+}
+
+static atomic_t user_count = ATOMIC_INIT(0);
+static atomic_t user_setup = ATOMIC_INIT(0);
+
+int ipath_user_add(struct ipath_devdata *dd)
+{
+ char name[10];
+ int ret;
+
+ if (atomic_inc_return(&user_count) == 1) {
+ ret = user_init();
+ if (ret < 0) {
+ ipath_dev_err(dd, "Unable to set up user support: "
+ "error %d\n", -ret);
+ goto bail;
+ }
+ ret = init_cdev(0, "ipath", &ipath_file_ops, &wildcard_cdev,
+ &wildcard_dev);
+ if (ret < 0) {
+ ipath_dev_err(dd, "Could not create wildcard "
+ "minor: error %d\n", -ret);
+ goto bail_user;
+ }
+
+ atomic_set(&user_setup, 1);
+ }
+
+ snprintf(name, sizeof(name), "ipath%d", dd->ipath_unit);
+
+ ret = init_cdev(dd->ipath_unit + 1, name, &ipath_file_ops,
+ &dd->user_cdev, &dd->user_dev);
+ if (ret < 0)
+ ipath_dev_err(dd, "Could not create user minor %d, %s\n",
+ dd->ipath_unit + 1, name);
+
+ goto bail;
+
+bail_user:
+ user_cleanup();
+bail:
+ return ret;
+}
+
+void ipath_user_remove(struct ipath_devdata *dd)
+{
+ cleanup_cdev(&dd->user_cdev, &dd->user_dev);
+
+ if (atomic_dec_return(&user_count) == 0) {
+ if (atomic_read(&user_setup) == 0)
+ goto bail;
+
+ cleanup_cdev(&wildcard_cdev, &wildcard_dev);
+ user_cleanup();
+
+ atomic_set(&user_setup, 0);
+ }
+bail:
+ return;
+}
diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c
new file mode 100644
index 0000000..8bb5170
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_fs.c
@@ -0,0 +1,426 @@
+/*
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/mount.h>
+#include <linux/pagemap.h>
+#include <linux/init.h>
+#include <linux/namei.h>
+
+#include "ipath_kernel.h"
+
+#define IPATHFS_MAGIC 0x726a77
+
+static struct super_block *ipath_super;
+
+static int ipathfs_mknod(struct inode *dir, struct dentry *dentry,
+ int mode, const struct file_operations *fops,
+ void *data)
+{
+ int error;
+ struct inode *inode = new_inode(dir->i_sb);
+
+ if (!inode) {
+ error = -EPERM;
+ goto bail;
+ }
+
+ inode->i_mode = mode;
+ inode->i_uid = 0;
+ inode->i_gid = 0;
+ inode->i_blocks = 0;
+ inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+ inode->i_private = data;
+ if ((mode & S_IFMT) == S_IFDIR) {
+ inode->i_op = &simple_dir_inode_operations;
+ inc_nlink(inode);
+ inc_nlink(dir);
+ }
+
+ inode->i_fop = fops;
+
+ d_instantiate(dentry, inode);
+ error = 0;
+
+bail:
+ return error;
+}
+
+static int create_file(const char *name, mode_t mode,
+ struct dentry *parent, struct dentry **dentry,
+ const struct file_operations *fops, void *data)
+{
+ int error;
+
+ *dentry = NULL;
+ mutex_lock(&parent->d_inode->i_mutex);
+ *dentry = lookup_one_len(name, parent, strlen(name));
+ if (!IS_ERR(dentry))
+ error = ipathfs_mknod(parent->d_inode, *dentry,
+ mode, fops, data);
+ else
+ error = PTR_ERR(dentry);
+ mutex_unlock(&parent->d_inode->i_mutex);
+
+ return error;
+}
+
+static ssize_t atomic_stats_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ return simple_read_from_buffer(buf, count, ppos, &ipath_stats,
+ sizeof ipath_stats);
+}
+
+static const struct file_operations atomic_stats_ops = {
+ .read = atomic_stats_read,
+};
+
+static ssize_t atomic_counters_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct infinipath_counters counters;
+ struct ipath_devdata *dd;
+
+ dd = file->f_path.dentry->d_inode->i_private;
+ dd->ipath_f_read_counters(dd, &counters);
+
+ return simple_read_from_buffer(buf, count, ppos, &counters,
+ sizeof counters);
+}
+
+static const struct file_operations atomic_counters_ops = {
+ .read = atomic_counters_read,
+};
+
+static ssize_t flash_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct ipath_devdata *dd;
+ ssize_t ret;
+ loff_t pos;
+ char *tmp;
+
+ pos = *ppos;
+
+ if ( pos < 0) {
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ if (pos >= sizeof(struct ipath_flash)) {
+ ret = 0;
+ goto bail;
+ }
+
+ if (count > sizeof(struct ipath_flash) - pos)
+ count = sizeof(struct ipath_flash) - pos;
+
+ tmp = kmalloc(count, GFP_KERNEL);
+ if (!tmp) {
+ ret = -ENOMEM;
+ goto bail;
+ }
+
+ dd = file->f_path.dentry->d_inode->i_private;
+ if (ipath_eeprom_read(dd, pos, tmp, count)) {
+ ipath_dev_err(dd, "failed to read from flash\n");
+ ret = -ENXIO;
+ goto bail_tmp;
+ }
+
+ if (copy_to_user(buf, tmp, count)) {
+ ret = -EFAULT;
+ goto bail_tmp;
+ }
+
+ *ppos = pos + count;
+ ret = count;
+
+bail_tmp:
+ kfree(tmp);
+
+bail:
+ return ret;
+}
+
+static ssize_t flash_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct ipath_devdata *dd;
+ ssize_t ret;
+ loff_t pos;
+ char *tmp;
+
+ pos = *ppos;
+
+ if (pos != 0) {
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ if (count != sizeof(struct ipath_flash)) {
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ tmp = kmalloc(count, GFP_KERNEL);
+ if (!tmp) {
+ ret = -ENOMEM;
+ goto bail;
+ }
+
+ if (copy_from_user(tmp, buf, count)) {
+ ret = -EFAULT;
+ goto bail_tmp;
+ }
+
+ dd = file->f_path.dentry->d_inode->i_private;
+ if (ipath_eeprom_write(dd, pos, tmp, count)) {
+ ret = -ENXIO;
+ ipath_dev_err(dd, "failed to write to flash\n");
+ goto bail_tmp;
+ }
+
+ *ppos = pos + count;
+ ret = count;
+
+bail_tmp:
+ kfree(tmp);
+
+bail:
+ return ret;
+}
+
+static const struct file_operations flash_ops = {
+ .read = flash_read,
+ .write = flash_write,
+};
+
+static int create_device_files(struct super_block *sb,
+ struct ipath_devdata *dd)
+{
+ struct dentry *dir, *tmp;
+ char unit[10];
+ int ret;
+
+ snprintf(unit, sizeof unit, "%02d", dd->ipath_unit);
+ ret = create_file(unit, S_IFDIR|S_IRUGO|S_IXUGO, sb->s_root, &dir,
+ &simple_dir_operations, dd);
+ if (ret) {
+ printk(KERN_ERR "create_file(%s) failed: %d\n", unit, ret);
+ goto bail;
+ }
+
+ ret = create_file("atomic_counters", S_IFREG|S_IRUGO, dir, &tmp,
+ &atomic_counters_ops, dd);
+ if (ret) {
+ printk(KERN_ERR "create_file(%s/atomic_counters) "
+ "failed: %d\n", unit, ret);
+ goto bail;
+ }
+
+ ret = create_file("flash", S_IFREG|S_IWUSR|S_IRUGO, dir, &tmp,
+ &flash_ops, dd);
+ if (ret) {
+ printk(KERN_ERR "create_file(%s/flash) "
+ "failed: %d\n", unit, ret);
+ goto bail;
+ }
+
+bail:
+ return ret;
+}
+
+static int remove_file(struct dentry *parent, char *name)
+{
+ struct dentry *tmp;
+ int ret;
+
+ tmp = lookup_one_len(name, parent, strlen(name));
+
+ if (IS_ERR(tmp)) {
+ ret = PTR_ERR(tmp);
+ goto bail;
+ }
+
+ spin_lock(&dcache_lock);
+ spin_lock(&tmp->d_lock);
+ if (!(d_unhashed(tmp) && tmp->d_inode)) {
+ dget_locked(tmp);
+ __d_drop(tmp);
+ spin_unlock(&tmp->d_lock);
+ spin_unlock(&dcache_lock);
+ simple_unlink(parent->d_inode, tmp);
+ } else {
+ spin_unlock(&tmp->d_lock);
+ spin_unlock(&dcache_lock);
+ }
+
+ ret = 0;
+bail:
+ /*
+ * We don't expect clients to care about the return value, but
+ * it's there if they need it.
+ */
+ return ret;
+}
+
+static int remove_device_files(struct super_block *sb,
+ struct ipath_devdata *dd)
+{
+ struct dentry *dir, *root;
+ char unit[10];
+ int ret;
+
+ root = dget(sb->s_root);
+ mutex_lock(&root->d_inode->i_mutex);
+ snprintf(unit, sizeof unit, "%02d", dd->ipath_unit);
+ dir = lookup_one_len(unit, root, strlen(unit));
+
+ if (IS_ERR(dir)) {
+ ret = PTR_ERR(dir);
+ printk(KERN_ERR "Lookup of %s failed\n", unit);
+ goto bail;
+ }
+
+ remove_file(dir, "flash");
+ remove_file(dir, "atomic_counters");
+ d_delete(dir);
+ ret = simple_rmdir(root->d_inode, dir);
+
+bail:
+ mutex_unlock(&root->d_inode->i_mutex);
+ dput(root);
+ return ret;
+}
+
+static int ipathfs_fill_super(struct super_block *sb, void *data,
+ int silent)
+{
+ struct ipath_devdata *dd, *tmp;
+ unsigned long flags;
+ int ret;
+
+ static struct tree_descr files[] = {
+ [2] = {"atomic_stats", &atomic_stats_ops, S_IRUGO},
+ {""},
+ };
+
+ ret = simple_fill_super(sb, IPATHFS_MAGIC, files);
+ if (ret) {
+ printk(KERN_ERR "simple_fill_super failed: %d\n", ret);
+ goto bail;
+ }
+
+ spin_lock_irqsave(&ipath_devs_lock, flags);
+
+ list_for_each_entry_safe(dd, tmp, &ipath_dev_list, ipath_list) {
+ spin_unlock_irqrestore(&ipath_devs_lock, flags);
+ ret = create_device_files(sb, dd);
+ if (ret) {
+ deactivate_super(sb);
+ goto bail;
+ }
+ spin_lock_irqsave(&ipath_devs_lock, flags);
+ }
+
+ spin_unlock_irqrestore(&ipath_devs_lock, flags);
+
+bail:
+ return ret;
+}
+
+static int ipathfs_get_sb(struct file_system_type *fs_type, int flags,
+ const char *dev_name, void *data, struct vfsmount *mnt)
+{
+ int ret = get_sb_single(fs_type, flags, data,
+ ipathfs_fill_super, mnt);
+ if (ret >= 0)
+ ipath_super = mnt->mnt_sb;
+ return ret;
+}
+
+static void ipathfs_kill_super(struct super_block *s)
+{
+ kill_litter_super(s);
+ ipath_super = NULL;
+}
+
+int ipathfs_add_device(struct ipath_devdata *dd)
+{
+ int ret;
+
+ if (ipath_super == NULL) {
+ ret = 0;
+ goto bail;
+ }
+
+ ret = create_device_files(ipath_super, dd);
+
+bail:
+ return ret;
+}
+
+int ipathfs_remove_device(struct ipath_devdata *dd)
+{
+ int ret;
+
+ if (ipath_super == NULL) {
+ ret = 0;
+ goto bail;
+ }
+
+ ret = remove_device_files(ipath_super, dd);
+
+bail:
+ return ret;
+}
+
+static struct file_system_type ipathfs_fs_type = {
+ .owner = THIS_MODULE,
+ .name = "ipathfs",
+ .get_sb = ipathfs_get_sb,
+ .kill_sb = ipathfs_kill_super,
+};
+
+int __init ipath_init_ipathfs(void)
+{
+ return register_filesystem(&ipathfs_fs_type);
+}
+
+void __exit ipath_exit_ipathfs(void)
+{
+ unregister_filesystem(&ipathfs_fs_type);
+}
diff --git a/drivers/infiniband/hw/ipath/ipath_iba6110.c b/drivers/infiniband/hw/ipath/ipath_iba6110.c
new file mode 100644
index 0000000..02831ad
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_iba6110.c
@@ -0,0 +1,1941 @@
+/*
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/*
+ * This file contains all of the code that is specific to the InfiniPath
+ * HT chip.
+ */
+
+#include <linux/vmalloc.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/htirq.h>
+#include <rdma/ib_verbs.h>
+
+#include "ipath_kernel.h"
+#include "ipath_registers.h"
+
+static void ipath_setup_ht_setextled(struct ipath_devdata *, u64, u64);
+
+
+/*
+ * This lists the InfiniPath registers, in the actual chip layout.
+ * This structure should never be directly accessed.
+ *
+ * The names are in InterCap form because they're taken straight from
+ * the chip specification. Since they're only used in this file, they
+ * don't pollute the rest of the source.
+*/
+
+struct _infinipath_do_not_use_kernel_regs {
+ unsigned long long Revision;
+ unsigned long long Control;
+ unsigned long long PageAlign;
+ unsigned long long PortCnt;
+ unsigned long long DebugPortSelect;
+ unsigned long long DebugPort;
+ unsigned long long SendRegBase;
+ unsigned long long UserRegBase;
+ unsigned long long CounterRegBase;
+ unsigned long long Scratch;
+ unsigned long long ReservedMisc1;
+ unsigned long long InterruptConfig;
+ unsigned long long IntBlocked;
+ unsigned long long IntMask;
+ unsigned long long IntStatus;
+ unsigned long long IntClear;
+ unsigned long long ErrorMask;
+ unsigned long long ErrorStatus;
+ unsigned long long ErrorClear;
+ unsigned long long HwErrMask;
+ unsigned long long HwErrStatus;
+ unsigned long long HwErrClear;
+ unsigned long long HwDiagCtrl;
+ unsigned long long MDIO;
+ unsigned long long IBCStatus;
+ unsigned long long IBCCtrl;
+ unsigned long long ExtStatus;
+ unsigned long long ExtCtrl;
+ unsigned long long GPIOOut;
+ unsigned long long GPIOMask;
+ unsigned long long GPIOStatus;
+ unsigned long long GPIOClear;
+ unsigned long long RcvCtrl;
+ unsigned long long RcvBTHQP;
+ unsigned long long RcvHdrSize;
+ unsigned long long RcvHdrCnt;
+ unsigned long long RcvHdrEntSize;
+ unsigned long long RcvTIDBase;
+ unsigned long long RcvTIDCnt;
+ unsigned long long RcvEgrBase;
+ unsigned long long RcvEgrCnt;
+ unsigned long long RcvBufBase;
+ unsigned long long RcvBufSize;
+ unsigned long long RxIntMemBase;
+ unsigned long long RxIntMemSize;
+ unsigned long long RcvPartitionKey;
+ unsigned long long ReservedRcv[10];
+ unsigned long long SendCtrl;
+ unsigned long long SendPIOBufBase;
+ unsigned long long SendPIOSize;
+ unsigned long long SendPIOBufCnt;
+ unsigned long long SendPIOAvailAddr;
+ unsigned long long TxIntMemBase;
+ unsigned long long TxIntMemSize;
+ unsigned long long ReservedSend[9];
+ unsigned long long SendBufferError;
+ unsigned long long SendBufferErrorCONT1;
+ unsigned long long SendBufferErrorCONT2;
+ unsigned long long SendBufferErrorCONT3;
+ unsigned long long ReservedSBE[4];
+ unsigned long long RcvHdrAddr0;
+ unsigned long long RcvHdrAddr1;
+ unsigned long long RcvHdrAddr2;
+ unsigned long long RcvHdrAddr3;
+ unsigned long long RcvHdrAddr4;
+ unsigned long long RcvHdrAddr5;
+ unsigned long long RcvHdrAddr6;
+ unsigned long long RcvHdrAddr7;
+ unsigned long long RcvHdrAddr8;
+ unsigned long long ReservedRHA[7];
+ unsigned long long RcvHdrTailAddr0;
+ unsigned long long RcvHdrTailAddr1;
+ unsigned long long RcvHdrTailAddr2;
+ unsigned long long RcvHdrTailAddr3;
+ unsigned long long RcvHdrTailAddr4;
+ unsigned long long RcvHdrTailAddr5;
+ unsigned long long RcvHdrTailAddr6;
+ unsigned long long RcvHdrTailAddr7;
+ unsigned long long RcvHdrTailAddr8;
+ unsigned long long ReservedRHTA[7];
+ unsigned long long Sync; /* Software only */
+ unsigned long long Dump; /* Software only */
+ unsigned long long SimVer; /* Software only */
+ unsigned long long ReservedSW[5];
+ unsigned long long SerdesConfig0;
+ unsigned long long SerdesConfig1;
+ unsigned long long SerdesStatus;
+ unsigned long long XGXSConfig;
+ unsigned long long ReservedSW2[4];
+};
+
+struct _infinipath_do_not_use_counters {
+ __u64 LBIntCnt;
+ __u64 LBFlowStallCnt;
+ __u64 Reserved1;
+ __u64 TxUnsupVLErrCnt;
+ __u64 TxDataPktCnt;
+ __u64 TxFlowPktCnt;
+ __u64 TxDwordCnt;
+ __u64 TxLenErrCnt;
+ __u64 TxMaxMinLenErrCnt;
+ __u64 TxUnderrunCnt;
+ __u64 TxFlowStallCnt;
+ __u64 TxDroppedPktCnt;
+ __u64 RxDroppedPktCnt;
+ __u64 RxDataPktCnt;
+ __u64 RxFlowPktCnt;
+ __u64 RxDwordCnt;
+ __u64 RxLenErrCnt;
+ __u64 RxMaxMinLenErrCnt;
+ __u64 RxICRCErrCnt;
+ __u64 RxVCRCErrCnt;
+ __u64 RxFlowCtrlErrCnt;
+ __u64 RxBadFormatCnt;
+ __u64 RxLinkProblemCnt;
+ __u64 RxEBPCnt;
+ __u64 RxLPCRCErrCnt;
+ __u64 RxBufOvflCnt;
+ __u64 RxTIDFullErrCnt;
+ __u64 RxTIDValidErrCnt;
+ __u64 RxPKeyMismatchCnt;
+ __u64 RxP0HdrEgrOvflCnt;
+ __u64 RxP1HdrEgrOvflCnt;
+ __u64 RxP2HdrEgrOvflCnt;
+ __u64 RxP3HdrEgrOvflCnt;
+ __u64 RxP4HdrEgrOvflCnt;
+ __u64 RxP5HdrEgrOvflCnt;
+ __u64 RxP6HdrEgrOvflCnt;
+ __u64 RxP7HdrEgrOvflCnt;
+ __u64 RxP8HdrEgrOvflCnt;
+ __u64 Reserved6;
+ __u64 Reserved7;
+ __u64 IBStatusChangeCnt;
+ __u64 IBLinkErrRecoveryCnt;
+ __u64 IBLinkDownedCnt;
+ __u64 IBSymbolErrCnt;
+};
+
+#define IPATH_KREG_OFFSET(field) (offsetof( \
+ struct _infinipath_do_not_use_kernel_regs, field) / sizeof(u64))
+#define IPATH_CREG_OFFSET(field) (offsetof( \
+ struct _infinipath_do_not_use_counters, field) / sizeof(u64))
+
+static const struct ipath_kregs ipath_ht_kregs = {
+ .kr_control = IPATH_KREG_OFFSET(Control),
+ .kr_counterregbase = IPATH_KREG_OFFSET(CounterRegBase),
+ .kr_debugport = IPATH_KREG_OFFSET(DebugPort),
+ .kr_debugportselect = IPATH_KREG_OFFSET(DebugPortSelect),
+ .kr_errorclear = IPATH_KREG_OFFSET(ErrorClear),
+ .kr_errormask = IPATH_KREG_OFFSET(ErrorMask),
+ .kr_errorstatus = IPATH_KREG_OFFSET(ErrorStatus),
+ .kr_extctrl = IPATH_KREG_OFFSET(ExtCtrl),
+ .kr_extstatus = IPATH_KREG_OFFSET(ExtStatus),
+ .kr_gpio_clear = IPATH_KREG_OFFSET(GPIOClear),
+ .kr_gpio_mask = IPATH_KREG_OFFSET(GPIOMask),
+ .kr_gpio_out = IPATH_KREG_OFFSET(GPIOOut),
+ .kr_gpio_status = IPATH_KREG_OFFSET(GPIOStatus),
+ .kr_hwdiagctrl = IPATH_KREG_OFFSET(HwDiagCtrl),
+ .kr_hwerrclear = IPATH_KREG_OFFSET(HwErrClear),
+ .kr_hwerrmask = IPATH_KREG_OFFSET(HwErrMask),
+ .kr_hwerrstatus = IPATH_KREG_OFFSET(HwErrStatus),
+ .kr_ibcctrl = IPATH_KREG_OFFSET(IBCCtrl),
+ .kr_ibcstatus = IPATH_KREG_OFFSET(IBCStatus),
+ .kr_intblocked = IPATH_KREG_OFFSET(IntBlocked),
+ .kr_intclear = IPATH_KREG_OFFSET(IntClear),
+ .kr_interruptconfig = IPATH_KREG_OFFSET(InterruptConfig),
+ .kr_intmask = IPATH_KREG_OFFSET(IntMask),
+ .kr_intstatus = IPATH_KREG_OFFSET(IntStatus),
+ .kr_mdio = IPATH_KREG_OFFSET(MDIO),
+ .kr_pagealign = IPATH_KREG_OFFSET(PageAlign),
+ .kr_partitionkey = IPATH_KREG_OFFSET(RcvPartitionKey),
+ .kr_portcnt = IPATH_KREG_OFFSET(PortCnt),
+ .kr_rcvbthqp = IPATH_KREG_OFFSET(RcvBTHQP),
+ .kr_rcvbufbase = IPATH_KREG_OFFSET(RcvBufBase),
+ .kr_rcvbufsize = IPATH_KREG_OFFSET(RcvBufSize),
+ .kr_rcvctrl = IPATH_KREG_OFFSET(RcvCtrl),
+ .kr_rcvegrbase = IPATH_KREG_OFFSET(RcvEgrBase),
+ .kr_rcvegrcnt = IPATH_KREG_OFFSET(RcvEgrCnt),
+ .kr_rcvhdrcnt = IPATH_KREG_OFFSET(RcvHdrCnt),
+ .kr_rcvhdrentsize = IPATH_KREG_OFFSET(RcvHdrEntSize),
+ .kr_rcvhdrsize = IPATH_KREG_OFFSET(RcvHdrSize),
+ .kr_rcvintmembase = IPATH_KREG_OFFSET(RxIntMemBase),
+ .kr_rcvintmemsize = IPATH_KREG_OFFSET(RxIntMemSize),
+ .kr_rcvtidbase = IPATH_KREG_OFFSET(RcvTIDBase),
+ .kr_rcvtidcnt = IPATH_KREG_OFFSET(RcvTIDCnt),
+ .kr_revision = IPATH_KREG_OFFSET(Revision),
+ .kr_scratch = IPATH_KREG_OFFSET(Scratch),
+ .kr_sendbuffererror = IPATH_KREG_OFFSET(SendBufferError),
+ .kr_sendctrl = IPATH_KREG_OFFSET(SendCtrl),
+ .kr_sendpioavailaddr = IPATH_KREG_OFFSET(SendPIOAvailAddr),
+ .kr_sendpiobufbase = IPATH_KREG_OFFSET(SendPIOBufBase),
+ .kr_sendpiobufcnt = IPATH_KREG_OFFSET(SendPIOBufCnt),
+ .kr_sendpiosize = IPATH_KREG_OFFSET(SendPIOSize),
+ .kr_sendregbase = IPATH_KREG_OFFSET(SendRegBase),
+ .kr_txintmembase = IPATH_KREG_OFFSET(TxIntMemBase),
+ .kr_txintmemsize = IPATH_KREG_OFFSET(TxIntMemSize),
+ .kr_userregbase = IPATH_KREG_OFFSET(UserRegBase),
+ .kr_serdesconfig0 = IPATH_KREG_OFFSET(SerdesConfig0),
+ .kr_serdesconfig1 = IPATH_KREG_OFFSET(SerdesConfig1),
+ .kr_serdesstatus = IPATH_KREG_OFFSET(SerdesStatus),
+ .kr_xgxsconfig = IPATH_KREG_OFFSET(XGXSConfig),
+ /*
+ * These should not be used directly via ipath_write_kreg64(),
+ * use them with ipath_write_kreg64_port(),
+ */
+ .kr_rcvhdraddr = IPATH_KREG_OFFSET(RcvHdrAddr0),
+ .kr_rcvhdrtailaddr = IPATH_KREG_OFFSET(RcvHdrTailAddr0)
+};
+
+static const struct ipath_cregs ipath_ht_cregs = {
+ .cr_badformatcnt = IPATH_CREG_OFFSET(RxBadFormatCnt),
+ .cr_erricrccnt = IPATH_CREG_OFFSET(RxICRCErrCnt),
+ .cr_errlinkcnt = IPATH_CREG_OFFSET(RxLinkProblemCnt),
+ .cr_errlpcrccnt = IPATH_CREG_OFFSET(RxLPCRCErrCnt),
+ .cr_errpkey = IPATH_CREG_OFFSET(RxPKeyMismatchCnt),
+ .cr_errrcvflowctrlcnt = IPATH_CREG_OFFSET(RxFlowCtrlErrCnt),
+ .cr_err_rlencnt = IPATH_CREG_OFFSET(RxLenErrCnt),
+ .cr_errslencnt = IPATH_CREG_OFFSET(TxLenErrCnt),
+ .cr_errtidfull = IPATH_CREG_OFFSET(RxTIDFullErrCnt),
+ .cr_errtidvalid = IPATH_CREG_OFFSET(RxTIDValidErrCnt),
+ .cr_errvcrccnt = IPATH_CREG_OFFSET(RxVCRCErrCnt),
+ .cr_ibstatuschange = IPATH_CREG_OFFSET(IBStatusChangeCnt),
+ /* calc from Reg_CounterRegBase + offset */
+ .cr_intcnt = IPATH_CREG_OFFSET(LBIntCnt),
+ .cr_invalidrlencnt = IPATH_CREG_OFFSET(RxMaxMinLenErrCnt),
+ .cr_invalidslencnt = IPATH_CREG_OFFSET(TxMaxMinLenErrCnt),
+ .cr_lbflowstallcnt = IPATH_CREG_OFFSET(LBFlowStallCnt),
+ .cr_pktrcvcnt = IPATH_CREG_OFFSET(RxDataPktCnt),
+ .cr_pktrcvflowctrlcnt = IPATH_CREG_OFFSET(RxFlowPktCnt),
+ .cr_pktsendcnt = IPATH_CREG_OFFSET(TxDataPktCnt),
+ .cr_pktsendflowcnt = IPATH_CREG_OFFSET(TxFlowPktCnt),
+ .cr_portovflcnt = IPATH_CREG_OFFSET(RxP0HdrEgrOvflCnt),
+ .cr_rcvebpcnt = IPATH_CREG_OFFSET(RxEBPCnt),
+ .cr_rcvovflcnt = IPATH_CREG_OFFSET(RxBufOvflCnt),
+ .cr_senddropped = IPATH_CREG_OFFSET(TxDroppedPktCnt),
+ .cr_sendstallcnt = IPATH_CREG_OFFSET(TxFlowStallCnt),
+ .cr_sendunderruncnt = IPATH_CREG_OFFSET(TxUnderrunCnt),
+ .cr_wordrcvcnt = IPATH_CREG_OFFSET(RxDwordCnt),
+ .cr_wordsendcnt = IPATH_CREG_OFFSET(TxDwordCnt),
+ .cr_unsupvlcnt = IPATH_CREG_OFFSET(TxUnsupVLErrCnt),
+ .cr_rxdroppktcnt = IPATH_CREG_OFFSET(RxDroppedPktCnt),
+ .cr_iblinkerrrecovcnt = IPATH_CREG_OFFSET(IBLinkErrRecoveryCnt),
+ .cr_iblinkdowncnt = IPATH_CREG_OFFSET(IBLinkDownedCnt),
+ .cr_ibsymbolerrcnt = IPATH_CREG_OFFSET(IBSymbolErrCnt)
+};
+
+/* kr_intstatus, kr_intclear, kr_intmask bits */
+#define INFINIPATH_I_RCVURG_MASK ((1U<<9)-1)
+#define INFINIPATH_I_RCVURG_SHIFT 0
+#define INFINIPATH_I_RCVAVAIL_MASK ((1U<<9)-1)
+#define INFINIPATH_I_RCVAVAIL_SHIFT 12
+
+/* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */
+#define INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT 0
+#define INFINIPATH_HWE_HTCMEMPARITYERR_MASK 0x3FFFFFULL
+#define INFINIPATH_HWE_HTCLNKABYTE0CRCERR 0x0000000000800000ULL
+#define INFINIPATH_HWE_HTCLNKABYTE1CRCERR 0x0000000001000000ULL
+#define INFINIPATH_HWE_HTCLNKBBYTE0CRCERR 0x0000000002000000ULL
+#define INFINIPATH_HWE_HTCLNKBBYTE1CRCERR 0x0000000004000000ULL
+#define INFINIPATH_HWE_HTCMISCERR4 0x0000000008000000ULL
+#define INFINIPATH_HWE_HTCMISCERR5 0x0000000010000000ULL
+#define INFINIPATH_HWE_HTCMISCERR6 0x0000000020000000ULL
+#define INFINIPATH_HWE_HTCMISCERR7 0x0000000040000000ULL
+#define INFINIPATH_HWE_HTCBUSTREQPARITYERR 0x0000000080000000ULL
+#define INFINIPATH_HWE_HTCBUSTRESPPARITYERR 0x0000000100000000ULL
+#define INFINIPATH_HWE_HTCBUSIREQPARITYERR 0x0000000200000000ULL
+#define INFINIPATH_HWE_COREPLL_FBSLIP 0x0080000000000000ULL
+#define INFINIPATH_HWE_COREPLL_RFSLIP 0x0100000000000000ULL
+#define INFINIPATH_HWE_HTBPLL_FBSLIP 0x0200000000000000ULL
+#define INFINIPATH_HWE_HTBPLL_RFSLIP 0x0400000000000000ULL
+#define INFINIPATH_HWE_HTAPLL_FBSLIP 0x0800000000000000ULL
+#define INFINIPATH_HWE_HTAPLL_RFSLIP 0x1000000000000000ULL
+#define INFINIPATH_HWE_SERDESPLLFAILED 0x2000000000000000ULL
+
+#define IBA6110_IBCS_LINKTRAININGSTATE_MASK 0xf
+#define IBA6110_IBCS_LINKSTATE_SHIFT 4
+
+/* kr_extstatus bits */
+#define INFINIPATH_EXTS_FREQSEL 0x2
+#define INFINIPATH_EXTS_SERDESSEL 0x4
+#define INFINIPATH_EXTS_MEMBIST_ENDTEST 0x0000000000004000
+#define INFINIPATH_EXTS_MEMBIST_CORRECT 0x0000000000008000
+
+
+/* TID entries (memory), HT-only */
+#define INFINIPATH_RT_ADDR_MASK 0xFFFFFFFFFFULL /* 40 bits valid */
+#define INFINIPATH_RT_VALID 0x8000000000000000ULL
+#define INFINIPATH_RT_ADDR_SHIFT 0
+#define INFINIPATH_RT_BUFSIZE_MASK 0x3FFFULL
+#define INFINIPATH_RT_BUFSIZE_SHIFT 48
+
+#define INFINIPATH_R_INTRAVAIL_SHIFT 16
+#define INFINIPATH_R_TAILUPD_SHIFT 31
+
+/* kr_xgxsconfig bits */
+#define INFINIPATH_XGXS_RESET 0x7ULL
+
+/*
+ * masks and bits that are different in different chips, or present only
+ * in one
+ */
+static const ipath_err_t infinipath_hwe_htcmemparityerr_mask =
+ INFINIPATH_HWE_HTCMEMPARITYERR_MASK;
+static const ipath_err_t infinipath_hwe_htcmemparityerr_shift =
+ INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT;
+
+static const ipath_err_t infinipath_hwe_htclnkabyte0crcerr =
+ INFINIPATH_HWE_HTCLNKABYTE0CRCERR;
+static const ipath_err_t infinipath_hwe_htclnkabyte1crcerr =
+ INFINIPATH_HWE_HTCLNKABYTE1CRCERR;
+static const ipath_err_t infinipath_hwe_htclnkbbyte0crcerr =
+ INFINIPATH_HWE_HTCLNKBBYTE0CRCERR;
+static const ipath_err_t infinipath_hwe_htclnkbbyte1crcerr =
+ INFINIPATH_HWE_HTCLNKBBYTE1CRCERR;
+
+#define _IPATH_GPIO_SDA_NUM 1
+#define _IPATH_GPIO_SCL_NUM 0
+
+#define IPATH_GPIO_SDA \
+ (1ULL << (_IPATH_GPIO_SDA_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT))
+#define IPATH_GPIO_SCL \
+ (1ULL << (_IPATH_GPIO_SCL_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT))
+
+/* keep the code below somewhat more readonable; not used elsewhere */
+#define _IPATH_HTLINK0_CRCBITS (infinipath_hwe_htclnkabyte0crcerr | \
+ infinipath_hwe_htclnkabyte1crcerr)
+#define _IPATH_HTLINK1_CRCBITS (infinipath_hwe_htclnkbbyte0crcerr | \
+ infinipath_hwe_htclnkbbyte1crcerr)
+#define _IPATH_HTLANE0_CRCBITS (infinipath_hwe_htclnkabyte0crcerr | \
+ infinipath_hwe_htclnkbbyte0crcerr)
+#define _IPATH_HTLANE1_CRCBITS (infinipath_hwe_htclnkabyte1crcerr | \
+ infinipath_hwe_htclnkbbyte1crcerr)
+
+static void hwerr_crcbits(struct ipath_devdata *dd, ipath_err_t hwerrs,
+ char *msg, size_t msgl)
+{
+ char bitsmsg[64];
+ ipath_err_t crcbits = hwerrs &
+ (_IPATH_HTLINK0_CRCBITS | _IPATH_HTLINK1_CRCBITS);
+ /* don't check if 8bit HT */
+ if (dd->ipath_flags & IPATH_8BIT_IN_HT0)
+ crcbits &= ~infinipath_hwe_htclnkabyte1crcerr;
+ /* don't check if 8bit HT */
+ if (dd->ipath_flags & IPATH_8BIT_IN_HT1)
+ crcbits &= ~infinipath_hwe_htclnkbbyte1crcerr;
+ /*
+ * we'll want to ignore link errors on link that is
+ * not in use, if any. For now, complain about both
+ */
+ if (crcbits) {
+ u16 ctrl0, ctrl1;
+ snprintf(bitsmsg, sizeof bitsmsg,
+ "[HT%s lane %s CRC (%llx); powercycle to completely clear]",
+ !(crcbits & _IPATH_HTLINK1_CRCBITS) ?
+ "0 (A)" : (!(crcbits & _IPATH_HTLINK0_CRCBITS)
+ ? "1 (B)" : "0+1 (A+B)"),
+ !(crcbits & _IPATH_HTLANE1_CRCBITS) ? "0"
+ : (!(crcbits & _IPATH_HTLANE0_CRCBITS) ? "1" :
+ "0+1"), (unsigned long long) crcbits);
+ strlcat(msg, bitsmsg, msgl);
+
+ /*
+ * print extra info for debugging. slave/primary
+ * config word 4, 8 (link control 0, 1)
+ */
+
+ if (pci_read_config_word(dd->pcidev,
+ dd->ipath_ht_slave_off + 0x4,
+ &ctrl0))
+ dev_info(&dd->pcidev->dev, "Couldn't read "
+ "linkctrl0 of slave/primary "
+ "config block\n");
+ else if (!(ctrl0 & 1 << 6))
+ /* not if EOC bit set */
+ ipath_dbg("HT linkctrl0 0x%x%s%s\n", ctrl0,
+ ((ctrl0 >> 8) & 7) ? " CRC" : "",
+ ((ctrl0 >> 4) & 1) ? "linkfail" :
+ "");
+ if (pci_read_config_word(dd->pcidev,
+ dd->ipath_ht_slave_off + 0x8,
+ &ctrl1))
+ dev_info(&dd->pcidev->dev, "Couldn't read "
+ "linkctrl1 of slave/primary "
+ "config block\n");
+ else if (!(ctrl1 & 1 << 6))
+ /* not if EOC bit set */
+ ipath_dbg("HT linkctrl1 0x%x%s%s\n", ctrl1,
+ ((ctrl1 >> 8) & 7) ? " CRC" : "",
+ ((ctrl1 >> 4) & 1) ? "linkfail" :
+ "");
+
+ /* disable until driver reloaded */
+ dd->ipath_hwerrmask &= ~crcbits;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
+ dd->ipath_hwerrmask);
+ ipath_dbg("HT crc errs: %s\n", msg);
+ } else
+ ipath_dbg("ignoring HT crc errors 0x%llx, "
+ "not in use\n", (unsigned long long)
+ (hwerrs & (_IPATH_HTLINK0_CRCBITS |
+ _IPATH_HTLINK1_CRCBITS)));
+}
+
+/* 6110 specific hardware errors... */
+static const struct ipath_hwerror_msgs ipath_6110_hwerror_msgs[] = {
+ INFINIPATH_HWE_MSG(HTCBUSIREQPARITYERR, "HTC Ireq Parity"),
+ INFINIPATH_HWE_MSG(HTCBUSTREQPARITYERR, "HTC Treq Parity"),
+ INFINIPATH_HWE_MSG(HTCBUSTRESPPARITYERR, "HTC Tresp Parity"),
+ INFINIPATH_HWE_MSG(HTCMISCERR5, "HT core Misc5"),
+ INFINIPATH_HWE_MSG(HTCMISCERR6, "HT core Misc6"),
+ INFINIPATH_HWE_MSG(HTCMISCERR7, "HT core Misc7"),
+ INFINIPATH_HWE_MSG(RXDSYNCMEMPARITYERR, "Rx Dsync"),
+ INFINIPATH_HWE_MSG(SERDESPLLFAILED, "SerDes PLL"),
+};
+
+#define TXE_PIO_PARITY ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | \
+ INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) \
+ << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)
+#define RXE_EAGER_PARITY (INFINIPATH_HWE_RXEMEMPARITYERR_EAGERTID \
+ << INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT)
+
+static void ipath_ht_txe_recover(struct ipath_devdata *dd)
+{
+ ++ipath_stats.sps_txeparity;
+ dev_info(&dd->pcidev->dev,
+ "Recovering from TXE PIO parity error\n");
+}
+
+
+/**
+ * ipath_ht_handle_hwerrors - display hardware errors.
+ * @dd: the infinipath device
+ * @msg: the output buffer
+ * @msgl: the size of the output buffer
+ *
+ * Use same msg buffer as regular errors to avoid excessive stack
+ * use. Most hardware errors are catastrophic, but for right now,
+ * we'll print them and continue. We reuse the same message buffer as
+ * ipath_handle_errors() to avoid excessive stack usage.
+ */
+static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
+ size_t msgl)
+{
+ ipath_err_t hwerrs;
+ u32 bits, ctrl;
+ int isfatal = 0;
+ char bitsmsg[64];
+ int log_idx;
+
+ hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus);
+
+ if (!hwerrs) {
+ ipath_cdbg(VERBOSE, "Called but no hardware errors set\n");
+ /*
+ * better than printing cofusing messages
+ * This seems to be related to clearing the crc error, or
+ * the pll error during init.
+ */
+ goto bail;
+ } else if (hwerrs == -1LL) {
+ ipath_dev_err(dd, "Read of hardware error status failed "
+ "(all bits set); ignoring\n");
+ goto bail;
+ }
+ ipath_stats.sps_hwerrs++;
+
+ /* Always clear the error status register, except MEMBISTFAIL,
+ * regardless of whether we continue or stop using the chip.
+ * We want that set so we know it failed, even across driver reload.
+ * We'll still ignore it in the hwerrmask. We do this partly for
+ * diagnostics, but also for support */
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear,
+ hwerrs&~INFINIPATH_HWE_MEMBISTFAILED);
+
+ hwerrs &= dd->ipath_hwerrmask;
+
+ /* We log some errors to EEPROM, check if we have any of those. */
+ for (log_idx = 0; log_idx < IPATH_EEP_LOG_CNT; ++log_idx)
+ if (hwerrs & dd->ipath_eep_st_masks[log_idx].hwerrs_to_log)
+ ipath_inc_eeprom_err(dd, log_idx, 1);
+
+ /*
+ * make sure we get this much out, unless told to be quiet,
+ * it's a parity error we may recover from,
+ * or it's occurred within the last 5 seconds
+ */
+ if ((hwerrs & ~(dd->ipath_lasthwerror | TXE_PIO_PARITY |
+ RXE_EAGER_PARITY)) ||
+ (ipath_debug & __IPATH_VERBDBG))
+ dev_info(&dd->pcidev->dev, "Hardware error: hwerr=0x%llx "
+ "(cleared)\n", (unsigned long long) hwerrs);
+ dd->ipath_lasthwerror |= hwerrs;
+
+ if (hwerrs & ~dd->ipath_hwe_bitsextant)
+ ipath_dev_err(dd, "hwerror interrupt with unknown errors "
+ "%llx set\n", (unsigned long long)
+ (hwerrs & ~dd->ipath_hwe_bitsextant));
+
+ ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control);
+ if ((ctrl & INFINIPATH_C_FREEZEMODE) && !ipath_diag_inuse) {
+ /*
+ * parity errors in send memory are recoverable,
+ * just cancel the send (if indicated in * sendbuffererror),
+ * count the occurrence, unfreeze (if no other handled
+ * hardware error bits are set), and continue. They can
+ * occur if a processor speculative read is done to the PIO
+ * buffer while we are sending a packet, for example.
+ */
+ if (hwerrs & TXE_PIO_PARITY) {
+ ipath_ht_txe_recover(dd);
+ hwerrs &= ~TXE_PIO_PARITY;
+ }
+
+ if (!hwerrs) {
+ ipath_dbg("Clearing freezemode on ignored or "
+ "recovered hardware error\n");
+ ipath_clear_freeze(dd);
+ }
+ }
+
+ *msg = '\0';
+
+ /*
+ * may someday want to decode into which bits are which
+ * functional area for parity errors, etc.
+ */
+ if (hwerrs & (infinipath_hwe_htcmemparityerr_mask
+ << INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT)) {
+ bits = (u32) ((hwerrs >>
+ INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT) &
+ INFINIPATH_HWE_HTCMEMPARITYERR_MASK);
+ snprintf(bitsmsg, sizeof bitsmsg, "[HTC Parity Errs %x] ",
+ bits);
+ strlcat(msg, bitsmsg, msgl);
+ }
+
+ ipath_format_hwerrors(hwerrs,
+ ipath_6110_hwerror_msgs,
+ sizeof(ipath_6110_hwerror_msgs) /
+ sizeof(ipath_6110_hwerror_msgs[0]),
+ msg, msgl);
+
+ if (hwerrs & (_IPATH_HTLINK0_CRCBITS | _IPATH_HTLINK1_CRCBITS))
+ hwerr_crcbits(dd, hwerrs, msg, msgl);
+
+ if (hwerrs & INFINIPATH_HWE_MEMBISTFAILED) {
+ strlcat(msg, "[Memory BIST test failed, InfiniPath hardware unusable]",
+ msgl);
+ /* ignore from now on, so disable until driver reloaded */
+ dd->ipath_hwerrmask &= ~INFINIPATH_HWE_MEMBISTFAILED;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
+ dd->ipath_hwerrmask);
+ }
+#define _IPATH_PLL_FAIL (INFINIPATH_HWE_COREPLL_FBSLIP | \
+ INFINIPATH_HWE_COREPLL_RFSLIP | \
+ INFINIPATH_HWE_HTBPLL_FBSLIP | \
+ INFINIPATH_HWE_HTBPLL_RFSLIP | \
+ INFINIPATH_HWE_HTAPLL_FBSLIP | \
+ INFINIPATH_HWE_HTAPLL_RFSLIP)
+
+ if (hwerrs & _IPATH_PLL_FAIL) {
+ snprintf(bitsmsg, sizeof bitsmsg,
+ "[PLL failed (%llx), InfiniPath hardware unusable]",
+ (unsigned long long) (hwerrs & _IPATH_PLL_FAIL));
+ strlcat(msg, bitsmsg, msgl);
+ /* ignore from now on, so disable until driver reloaded */
+ dd->ipath_hwerrmask &= ~(hwerrs & _IPATH_PLL_FAIL);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
+ dd->ipath_hwerrmask);
+ }
+
+ if (hwerrs & INFINIPATH_HWE_SERDESPLLFAILED) {
+ /*
+ * If it occurs, it is left masked since the eternal
+ * interface is unused
+ */
+ dd->ipath_hwerrmask &= ~INFINIPATH_HWE_SERDESPLLFAILED;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
+ dd->ipath_hwerrmask);
+ }
+
+ if (hwerrs) {
+ /*
+ * if any set that we aren't ignoring; only
+ * make the complaint once, in case it's stuck
+ * or recurring, and we get here multiple
+ * times.
+ * force link down, so switch knows, and
+ * LEDs are turned off
+ */
+ if (dd->ipath_flags & IPATH_INITTED) {
+ ipath_set_linkstate(dd, IPATH_IB_LINKDOWN);
+ ipath_setup_ht_setextled(dd,
+ INFINIPATH_IBCS_L_STATE_DOWN,
+ INFINIPATH_IBCS_LT_STATE_DISABLED);
+ ipath_dev_err(dd, "Fatal Hardware Error (freeze "
+ "mode), no longer usable, SN %.16s\n",
+ dd->ipath_serial);
+ isfatal = 1;
+ }
+ *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
+ /* mark as having had error */
+ *dd->ipath_statusp |= IPATH_STATUS_HWERROR;
+ /*
+ * mark as not usable, at a minimum until driver
+ * is reloaded, probably until reboot, since no
+ * other reset is possible.
+ */
+ dd->ipath_flags &= ~IPATH_INITTED;
+ }
+ else
+ *msg = 0; /* recovered from all of them */
+ if (*msg)
+ ipath_dev_err(dd, "%s hardware error\n", msg);
+ if (isfatal && !ipath_diag_inuse && dd->ipath_freezemsg)
+ /*
+ * for status file; if no trailing brace is copied,
+ * we'll know it was truncated.
+ */
+ snprintf(dd->ipath_freezemsg,
+ dd->ipath_freezelen, "{%s}", msg);
+
+bail:;
+}
+
+/**
+ * ipath_ht_boardname - fill in the board name
+ * @dd: the infinipath device
+ * @name: the output buffer
+ * @namelen: the size of the output buffer
+ *
+ * fill in the board name, based on the board revision register
+ */
+static int ipath_ht_boardname(struct ipath_devdata *dd, char *name,
+ size_t namelen)
+{
+ char *n = NULL;
+ u8 boardrev = dd->ipath_boardrev;
+ int ret = 0;
+
+ switch (boardrev) {
+ case 5:
+ /*
+ * original production board; two production levels, with
+ * different serial number ranges. See ipath_ht_early_init() for
+ * case where we enable IPATH_GPIO_INTR for later serial # range.
+ * Original 112* serial number is no longer supported.
+ */
+ n = "InfiniPath_QHT7040";
+ break;
+ case 7:
+ /* small form factor production board */
+ n = "InfiniPath_QHT7140";
+ break;
+ default: /* don't know, just print the number */
+ ipath_dev_err(dd, "Don't yet know about board "
+ "with ID %u\n", boardrev);
+ snprintf(name, namelen, "Unknown_InfiniPath_QHT7xxx_%u",
+ boardrev);
+ break;
+ }
+ if (n)
+ snprintf(name, namelen, "%s", n);
+
+ if (ret) {
+ ipath_dev_err(dd, "Unsupported InfiniPath board %s!\n", name);
+ goto bail;
+ }
+ if (dd->ipath_majrev != 3 || (dd->ipath_minrev < 2 ||
+ dd->ipath_minrev > 4)) {
+ /*
+ * This version of the driver only supports Rev 3.2 - 3.4
+ */
+ ipath_dev_err(dd,
+ "Unsupported InfiniPath hardware revision %u.%u!\n",
+ dd->ipath_majrev, dd->ipath_minrev);
+ ret = 1;
+ goto bail;
+ }
+ /*
+ * pkt/word counters are 32 bit, and therefore wrap fast enough
+ * that we snapshot them from a timer, and maintain 64 bit shadow
+ * copies
+ */
+ dd->ipath_flags |= IPATH_32BITCOUNTERS;
+ dd->ipath_flags |= IPATH_GPIO_INTR;
+ if (dd->ipath_lbus_speed != 800)
+ ipath_dev_err(dd,
+ "Incorrectly configured for HT @ %uMHz\n",
+ dd->ipath_lbus_speed);
+
+ /*
+ * set here, not in ipath_init_*_funcs because we have to do
+ * it after we can read chip registers.
+ */
+ dd->ipath_ureg_align =
+ ipath_read_kreg32(dd, dd->ipath_kregs->kr_pagealign);
+
+bail:
+ return ret;
+}
+
+static void ipath_check_htlink(struct ipath_devdata *dd)
+{
+ u8 linkerr, link_off, i;
+
+ for (i = 0; i < 2; i++) {
+ link_off = dd->ipath_ht_slave_off + i * 4 + 0xd;
+ if (pci_read_config_byte(dd->pcidev, link_off, &linkerr))
+ dev_info(&dd->pcidev->dev, "Couldn't read "
+ "linkerror%d of HT slave/primary block\n",
+ i);
+ else if (linkerr & 0xf0) {
+ ipath_cdbg(VERBOSE, "HT linkerr%d bits 0x%x set, "
+ "clearing\n", linkerr >> 4, i);
+ /*
+ * writing the linkerr bits that are set should
+ * clear them
+ */
+ if (pci_write_config_byte(dd->pcidev, link_off,
+ linkerr))
+ ipath_dbg("Failed write to clear HT "
+ "linkerror%d\n", i);
+ if (pci_read_config_byte(dd->pcidev, link_off,
+ &linkerr))
+ dev_info(&dd->pcidev->dev,
+ "Couldn't reread linkerror%d of "
+ "HT slave/primary block\n", i);
+ else if (linkerr & 0xf0)
+ dev_info(&dd->pcidev->dev,
+ "HT linkerror%d bits 0x%x "
+ "couldn't be cleared\n",
+ i, linkerr >> 4);
+ }
+ }
+}
+
+static int ipath_setup_ht_reset(struct ipath_devdata *dd)
+{
+ ipath_dbg("No reset possible for this InfiniPath hardware\n");
+ return 0;
+}
+
+#define HT_INTR_DISC_CONFIG 0x80 /* HT interrupt and discovery cap */
+#define HT_INTR_REG_INDEX 2 /* intconfig requires indirect accesses */
+
+/*
+ * Bits 13-15 of command==0 is slave/primary block. Clear any HT CRC
+ * errors. We only bother to do this at load time, because it's OK if
+ * it happened before we were loaded (first time after boot/reset),
+ * but any time after that, it's fatal anyway. Also need to not check
+ * for for upper byte errors if we are in 8 bit mode, so figure out
+ * our width. For now, at least, also complain if it's 8 bit.
+ */
+static void slave_or_pri_blk(struct ipath_devdata *dd, struct pci_dev *pdev,
+ int pos, u8 cap_type)
+{
+ u8 linkwidth = 0, linkerr, link_a_b_off, link_off;
+ u16 linkctrl = 0;
+ int i;
+
+ dd->ipath_ht_slave_off = pos;
+ /* command word, master_host bit */
+ /* master host || slave */
+ if ((cap_type >> 2) & 1)
+ link_a_b_off = 4;
+ else
+ link_a_b_off = 0;
+ ipath_cdbg(VERBOSE, "HT%u (Link %c) connected to processor\n",
+ link_a_b_off ? 1 : 0,
+ link_a_b_off ? 'B' : 'A');
+
+ link_a_b_off += pos;
+
+ /*
+ * check both link control registers; clear both HT CRC sets if
+ * necessary.
+ */
+ for (i = 0; i < 2; i++) {
+ link_off = pos + i * 4 + 0x4;
+ if (pci_read_config_word(pdev, link_off, &linkctrl))
+ ipath_dev_err(dd, "Couldn't read HT link control%d "
+ "register\n", i);
+ else if (linkctrl & (0xf << 8)) {
+ ipath_cdbg(VERBOSE, "Clear linkctrl%d CRC Error "
+ "bits %x\n", i, linkctrl & (0xf << 8));
+ /*
+ * now write them back to clear the error.
+ */
+ pci_write_config_word(pdev, link_off,
+ linkctrl & (0xf << 8));
+ }
+ }
+
+ /*
+ * As with HT CRC bits, same for protocol errors that might occur
+ * during boot.
+ */
+ for (i = 0; i < 2; i++) {
+ link_off = pos + i * 4 + 0xd;
+ if (pci_read_config_byte(pdev, link_off, &linkerr))
+ dev_info(&pdev->dev, "Couldn't read linkerror%d "
+ "of HT slave/primary block\n", i);
+ else if (linkerr & 0xf0) {
+ ipath_cdbg(VERBOSE, "HT linkerr%d bits 0x%x set, "
+ "clearing\n", linkerr >> 4, i);
+ /*
+ * writing the linkerr bits that are set will clear
+ * them
+ */
+ if (pci_write_config_byte
+ (pdev, link_off, linkerr))
+ ipath_dbg("Failed write to clear HT "
+ "linkerror%d\n", i);
+ if (pci_read_config_byte(pdev, link_off, &linkerr))
+ dev_info(&pdev->dev, "Couldn't reread "
+ "linkerror%d of HT slave/primary "
+ "block\n", i);
+ else if (linkerr & 0xf0)
+ dev_info(&pdev->dev, "HT linkerror%d bits "
+ "0x%x couldn't be cleared\n",
+ i, linkerr >> 4);
+ }
+ }
+
+ /*
+ * this is just for our link to the host, not devices connected
+ * through tunnel.
+ */
+
+ if (pci_read_config_byte(pdev, link_a_b_off + 7, &linkwidth))
+ ipath_dev_err(dd, "Couldn't read HT link width "
+ "config register\n");
+ else {
+ u32 width;
+ switch (linkwidth & 7) {
+ case 5:
+ width = 4;
+ break;
+ case 4:
+ width = 2;
+ break;
+ case 3:
+ width = 32;
+ break;
+ case 1:
+ width = 16;
+ break;
+ case 0:
+ default: /* if wrong, assume 8 bit */
+ width = 8;
+ break;
+ }
+
+ dd->ipath_lbus_width = width;
+
+ if (linkwidth != 0x11) {
+ ipath_dev_err(dd, "Not configured for 16 bit HT "
+ "(%x)\n", linkwidth);
+ if (!(linkwidth & 0xf)) {
+ ipath_dbg("Will ignore HT lane1 errors\n");
+ dd->ipath_flags |= IPATH_8BIT_IN_HT0;
+ }
+ }
+ }
+
+ /*
+ * this is just for our link to the host, not devices connected
+ * through tunnel.
+ */
+ if (pci_read_config_byte(pdev, link_a_b_off + 0xd, &linkwidth))
+ ipath_dev_err(dd, "Couldn't read HT link frequency "
+ "config register\n");
+ else {
+ u32 speed;
+ switch (linkwidth & 0xf) {
+ case 6:
+ speed = 1000;
+ break;
+ case 5:
+ speed = 800;
+ break;
+ case 4:
+ speed = 600;
+ break;
+ case 3:
+ speed = 500;
+ break;
+ case 2:
+ speed = 400;
+ break;
+ case 1:
+ speed = 300;
+ break;
+ default:
+ /*
+ * assume reserved and vendor-specific are 200...
+ */
+ case 0:
+ speed = 200;
+ break;
+ }
+ dd->ipath_lbus_speed = speed;
+ }
+
+ snprintf(dd->ipath_lbus_info, sizeof(dd->ipath_lbus_info),
+ "HyperTransport,%uMHz,x%u\n",
+ dd->ipath_lbus_speed,
+ dd->ipath_lbus_width);
+}
+
+static int ipath_ht_intconfig(struct ipath_devdata *dd)
+{
+ int ret;
+
+ if (dd->ipath_intconfig) {
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_interruptconfig,
+ dd->ipath_intconfig); /* interrupt address */
+ ret = 0;
+ } else {
+ ipath_dev_err(dd, "No interrupts enabled, couldn't setup "
+ "interrupt address\n");
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+static void ipath_ht_irq_update(struct pci_dev *dev, int irq,
+ struct ht_irq_msg *msg)
+{
+ struct ipath_devdata *dd = pci_get_drvdata(dev);
+ u64 prev_intconfig = dd->ipath_intconfig;
+
+ dd->ipath_intconfig = msg->address_lo;
+ dd->ipath_intconfig |= ((u64) msg->address_hi) << 32;
+
+ /*
+ * If the previous value of dd->ipath_intconfig is zero, we're
+ * getting configured for the first time, and must not program the
+ * intconfig register here (it will be programmed later, when the
+ * hardware is ready). Otherwise, we should.
+ */
+ if (prev_intconfig)
+ ipath_ht_intconfig(dd);
+}
+
+/**
+ * ipath_setup_ht_config - setup the interruptconfig register
+ * @dd: the infinipath device
+ * @pdev: the PCI device
+ *
+ * setup the interruptconfig register from the HT config info.
+ * Also clear CRC errors in HT linkcontrol, if necessary.
+ * This is done only for the real hardware. It is done before
+ * chip address space is initted, so can't touch infinipath registers
+ */
+static int ipath_setup_ht_config(struct ipath_devdata *dd,
+ struct pci_dev *pdev)
+{
+ int pos, ret;
+
+ ret = __ht_create_irq(pdev, 0, ipath_ht_irq_update);
+ if (ret < 0) {
+ ipath_dev_err(dd, "Couldn't create interrupt handler: "
+ "err %d\n", ret);
+ goto bail;
+ }
+ dd->ipath_irq = ret;
+ ret = 0;
+
+ /*
+ * Handle clearing CRC errors in linkctrl register if necessary. We
+ * do this early, before we ever enable errors or hardware errors,
+ * mostly to avoid causing the chip to enter freeze mode.
+ */
+ pos = pci_find_capability(pdev, PCI_CAP_ID_HT);
+ if (!pos) {
+ ipath_dev_err(dd, "Couldn't find HyperTransport "
+ "capability; no interrupts\n");
+ ret = -ENODEV;
+ goto bail;
+ }
+ do {
+ u8 cap_type;
+
+ /*
+ * The HT capability type byte is 3 bytes after the
+ * capability byte.
+ */
+ if (pci_read_config_byte(pdev, pos + 3, &cap_type)) {
+ dev_info(&pdev->dev, "Couldn't read config "
+ "command @ %d\n", pos);
+ continue;
+ }
+ if (!(cap_type & 0xE0))
+ slave_or_pri_blk(dd, pdev, pos, cap_type);
+ } while ((pos = pci_find_next_capability(pdev, pos,
+ PCI_CAP_ID_HT)));
+
+ dd->ipath_flags |= IPATH_SWAP_PIOBUFS;
+
+bail:
+ return ret;
+}
+
+/**
+ * ipath_setup_ht_cleanup - clean up any per-chip chip-specific stuff
+ * @dd: the infinipath device
+ *
+ * Called during driver unload.
+ * This is currently a nop for the HT chip, not for all chips
+ */
+static void ipath_setup_ht_cleanup(struct ipath_devdata *dd)
+{
+}
+
+/**
+ * ipath_setup_ht_setextled - set the state of the two external LEDs
+ * @dd: the infinipath device
+ * @lst: the L state
+ * @ltst: the LT state
+ *
+ * Set the state of the two external LEDs, to indicate physical and
+ * logical state of IB link. For this chip (at least with recommended
+ * board pinouts), LED1 is Green (physical state), and LED2 is Yellow
+ * (logical state)
+ *
+ * Note: We try to match the Mellanox HCA LED behavior as best
+ * we can. Green indicates physical link state is OK (something is
+ * plugged in, and we can train).
+ * Amber indicates the link is logically up (ACTIVE).
+ * Mellanox further blinks the amber LED to indicate data packet
+ * activity, but we have no hardware support for that, so it would
+ * require waking up every 10-20 msecs and checking the counters
+ * on the chip, and then turning the LED off if appropriate. That's
+ * visible overhead, so not something we will do.
+ *
+ */
+static void ipath_setup_ht_setextled(struct ipath_devdata *dd,
+ u64 lst, u64 ltst)
+{
+ u64 extctl;
+ unsigned long flags = 0;
+
+ /* the diags use the LED to indicate diag info, so we leave
+ * the external LED alone when the diags are running */
+ if (ipath_diag_inuse)
+ return;
+
+ /* Allow override of LED display for, e.g. Locating system in rack */
+ if (dd->ipath_led_override) {
+ ltst = (dd->ipath_led_override & IPATH_LED_PHYS)
+ ? INFINIPATH_IBCS_LT_STATE_LINKUP
+ : INFINIPATH_IBCS_LT_STATE_DISABLED;
+ lst = (dd->ipath_led_override & IPATH_LED_LOG)
+ ? INFINIPATH_IBCS_L_STATE_ACTIVE
+ : INFINIPATH_IBCS_L_STATE_DOWN;
+ }
+
+ spin_lock_irqsave(&dd->ipath_gpio_lock, flags);
+ /*
+ * start by setting both LED control bits to off, then turn
+ * on the appropriate bit(s).
+ */
+ if (dd->ipath_boardrev == 8) { /* LS/X-1 uses different pins */
+ /*
+ * major difference is that INFINIPATH_EXTC_LEDGBLERR_OFF
+ * is inverted, because it is normally used to indicate
+ * a hardware fault at reset, if there were errors
+ */
+ extctl = (dd->ipath_extctrl & ~INFINIPATH_EXTC_LEDGBLOK_ON)
+ | INFINIPATH_EXTC_LEDGBLERR_OFF;
+ if (ltst == INFINIPATH_IBCS_LT_STATE_LINKUP)
+ extctl &= ~INFINIPATH_EXTC_LEDGBLERR_OFF;
+ if (lst == INFINIPATH_IBCS_L_STATE_ACTIVE)
+ extctl |= INFINIPATH_EXTC_LEDGBLOK_ON;
+ }
+ else {
+ extctl = dd->ipath_extctrl &
+ ~(INFINIPATH_EXTC_LED1PRIPORT_ON |
+ INFINIPATH_EXTC_LED2PRIPORT_ON);
+ if (ltst == INFINIPATH_IBCS_LT_STATE_LINKUP)
+ extctl |= INFINIPATH_EXTC_LED1PRIPORT_ON;
+ if (lst == INFINIPATH_IBCS_L_STATE_ACTIVE)
+ extctl |= INFINIPATH_EXTC_LED2PRIPORT_ON;
+ }
+ dd->ipath_extctrl = extctl;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, extctl);
+ spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags);
+}
+
+static void ipath_init_ht_variables(struct ipath_devdata *dd)
+{
+ /*
+ * setup the register offsets, since they are different for each
+ * chip
+ */
+ dd->ipath_kregs = &ipath_ht_kregs;
+ dd->ipath_cregs = &ipath_ht_cregs;
+
+ dd->ipath_gpio_sda_num = _IPATH_GPIO_SDA_NUM;
+ dd->ipath_gpio_scl_num = _IPATH_GPIO_SCL_NUM;
+ dd->ipath_gpio_sda = IPATH_GPIO_SDA;
+ dd->ipath_gpio_scl = IPATH_GPIO_SCL;
+
+ /*
+ * Fill in data for field-values that change in newer chips.
+ * We dynamically specify only the mask for LINKTRAININGSTATE
+ * and only the shift for LINKSTATE, as they are the only ones
+ * that change. Also precalculate the 3 link states of interest
+ * and the combined mask.
+ */
+ dd->ibcs_ls_shift = IBA6110_IBCS_LINKSTATE_SHIFT;
+ dd->ibcs_lts_mask = IBA6110_IBCS_LINKTRAININGSTATE_MASK;
+ dd->ibcs_mask = (INFINIPATH_IBCS_LINKSTATE_MASK <<
+ dd->ibcs_ls_shift) | dd->ibcs_lts_mask;
+ dd->ib_init = (INFINIPATH_IBCS_LT_STATE_LINKUP <<
+ INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) |
+ (INFINIPATH_IBCS_L_STATE_INIT << dd->ibcs_ls_shift);
+ dd->ib_arm = (INFINIPATH_IBCS_LT_STATE_LINKUP <<
+ INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) |
+ (INFINIPATH_IBCS_L_STATE_ARM << dd->ibcs_ls_shift);
+ dd->ib_active = (INFINIPATH_IBCS_LT_STATE_LINKUP <<
+ INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) |
+ (INFINIPATH_IBCS_L_STATE_ACTIVE << dd->ibcs_ls_shift);
+
+ /*
+ * Fill in data for ibcc field-values that change in newer chips.
+ * We dynamically specify only the mask for LINKINITCMD
+ * and only the shift for LINKCMD and MAXPKTLEN, as they are
+ * the only ones that change.
+ */
+ dd->ibcc_lic_mask = INFINIPATH_IBCC_LINKINITCMD_MASK;
+ dd->ibcc_lc_shift = INFINIPATH_IBCC_LINKCMD_SHIFT;
+ dd->ibcc_mpl_shift = INFINIPATH_IBCC_MAXPKTLEN_SHIFT;
+
+ /* Fill in shifts for RcvCtrl. */
+ dd->ipath_r_portenable_shift = INFINIPATH_R_PORTENABLE_SHIFT;
+ dd->ipath_r_intravail_shift = INFINIPATH_R_INTRAVAIL_SHIFT;
+ dd->ipath_r_tailupd_shift = INFINIPATH_R_TAILUPD_SHIFT;
+ dd->ipath_r_portcfg_shift = 0; /* Not on IBA6110 */
+
+ dd->ipath_i_bitsextant =
+ (INFINIPATH_I_RCVURG_MASK << INFINIPATH_I_RCVURG_SHIFT) |
+ (INFINIPATH_I_RCVAVAIL_MASK <<
+ INFINIPATH_I_RCVAVAIL_SHIFT) |
+ INFINIPATH_I_ERROR | INFINIPATH_I_SPIOSENT |
+ INFINIPATH_I_SPIOBUFAVAIL | INFINIPATH_I_GPIO;
+
+ dd->ipath_e_bitsextant =
+ INFINIPATH_E_RFORMATERR | INFINIPATH_E_RVCRC |
+ INFINIPATH_E_RICRC | INFINIPATH_E_RMINPKTLEN |
+ INFINIPATH_E_RMAXPKTLEN | INFINIPATH_E_RLONGPKTLEN |
+ INFINIPATH_E_RSHORTPKTLEN | INFINIPATH_E_RUNEXPCHAR |
+ INFINIPATH_E_RUNSUPVL | INFINIPATH_E_REBP |
+ INFINIPATH_E_RIBFLOW | INFINIPATH_E_RBADVERSION |
+ INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL |
+ INFINIPATH_E_RBADTID | INFINIPATH_E_RHDRLEN |
+ INFINIPATH_E_RHDR | INFINIPATH_E_RIBLOSTLINK |
+ INFINIPATH_E_SMINPKTLEN | INFINIPATH_E_SMAXPKTLEN |
+ INFINIPATH_E_SUNDERRUN | INFINIPATH_E_SPKTLEN |
+ INFINIPATH_E_SDROPPEDSMPPKT | INFINIPATH_E_SDROPPEDDATAPKT |
+ INFINIPATH_E_SPIOARMLAUNCH | INFINIPATH_E_SUNEXPERRPKTNUM |
+ INFINIPATH_E_SUNSUPVL | INFINIPATH_E_IBSTATUSCHANGED |
+ INFINIPATH_E_INVALIDADDR | INFINIPATH_E_RESET |
+ INFINIPATH_E_HARDWARE;
+
+ dd->ipath_hwe_bitsextant =
+ (INFINIPATH_HWE_HTCMEMPARITYERR_MASK <<
+ INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT) |
+ (INFINIPATH_HWE_TXEMEMPARITYERR_MASK <<
+ INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) |
+ (INFINIPATH_HWE_RXEMEMPARITYERR_MASK <<
+ INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) |
+ INFINIPATH_HWE_HTCLNKABYTE0CRCERR |
+ INFINIPATH_HWE_HTCLNKABYTE1CRCERR |
+ INFINIPATH_HWE_HTCLNKBBYTE0CRCERR |
+ INFINIPATH_HWE_HTCLNKBBYTE1CRCERR |
+ INFINIPATH_HWE_HTCMISCERR4 |
+ INFINIPATH_HWE_HTCMISCERR5 | INFINIPATH_HWE_HTCMISCERR6 |
+ INFINIPATH_HWE_HTCMISCERR7 |
+ INFINIPATH_HWE_HTCBUSTREQPARITYERR |
+ INFINIPATH_HWE_HTCBUSTRESPPARITYERR |
+ INFINIPATH_HWE_HTCBUSIREQPARITYERR |
+ INFINIPATH_HWE_RXDSYNCMEMPARITYERR |
+ INFINIPATH_HWE_MEMBISTFAILED |
+ INFINIPATH_HWE_COREPLL_FBSLIP |
+ INFINIPATH_HWE_COREPLL_RFSLIP |
+ INFINIPATH_HWE_HTBPLL_FBSLIP |
+ INFINIPATH_HWE_HTBPLL_RFSLIP |
+ INFINIPATH_HWE_HTAPLL_FBSLIP |
+ INFINIPATH_HWE_HTAPLL_RFSLIP |
+ INFINIPATH_HWE_SERDESPLLFAILED |
+ INFINIPATH_HWE_IBCBUSTOSPCPARITYERR |
+ INFINIPATH_HWE_IBCBUSFRSPCPARITYERR;
+
+ dd->ipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK;
+ dd->ipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK;
+ dd->ipath_i_rcvavail_shift = INFINIPATH_I_RCVAVAIL_SHIFT;
+ dd->ipath_i_rcvurg_shift = INFINIPATH_I_RCVURG_SHIFT;
+
+ /*
+ * EEPROM error log 0 is TXE Parity errors. 1 is RXE Parity.
+ * 2 is Some Misc, 3 is reserved for future.
+ */
+ dd->ipath_eep_st_masks[0].hwerrs_to_log =
+ INFINIPATH_HWE_TXEMEMPARITYERR_MASK <<
+ INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT;
+
+ dd->ipath_eep_st_masks[1].hwerrs_to_log =
+ INFINIPATH_HWE_RXEMEMPARITYERR_MASK <<
+ INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT;
+
+ dd->ipath_eep_st_masks[2].errs_to_log = INFINIPATH_E_RESET;
+
+ dd->delay_mult = 2; /* SDR, 4X, can't change */
+
+ dd->ipath_link_width_supported = IB_WIDTH_1X | IB_WIDTH_4X;
+ dd->ipath_link_speed_supported = IPATH_IB_SDR;
+ dd->ipath_link_width_enabled = IB_WIDTH_4X;
+ dd->ipath_link_speed_enabled = dd->ipath_link_speed_supported;
+ /* these can't change for this chip, so set once */
+ dd->ipath_link_width_active = dd->ipath_link_width_enabled;
+ dd->ipath_link_speed_active = dd->ipath_link_speed_enabled;
+}
+
+/**
+ * ipath_ht_init_hwerrors - enable hardware errors
+ * @dd: the infinipath device
+ *
+ * now that we have finished initializing everything that might reasonably
+ * cause a hardware error, and cleared those errors bits as they occur,
+ * we can enable hardware errors in the mask (potentially enabling
+ * freeze mode), and enable hardware errors as errors (along with
+ * everything else) in errormask
+ */
+static void ipath_ht_init_hwerrors(struct ipath_devdata *dd)
+{
+ ipath_err_t val;
+ u64 extsval;
+
+ extsval = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extstatus);
+
+ if (!(extsval & INFINIPATH_EXTS_MEMBIST_ENDTEST))
+ ipath_dev_err(dd, "MemBIST did not complete!\n");
+ if (extsval & INFINIPATH_EXTS_MEMBIST_CORRECT)
+ ipath_dbg("MemBIST corrected\n");
+
+ ipath_check_htlink(dd);
+
+ /* barring bugs, all hwerrors become interrupts, which can */
+ val = -1LL;
+ /* don't look at crc lane1 if 8 bit */
+ if (dd->ipath_flags & IPATH_8BIT_IN_HT0)
+ val &= ~infinipath_hwe_htclnkabyte1crcerr;
+ /* don't look at crc lane1 if 8 bit */
+ if (dd->ipath_flags & IPATH_8BIT_IN_HT1)
+ val &= ~infinipath_hwe_htclnkbbyte1crcerr;
+
+ /*
+ * disable RXDSYNCMEMPARITY because external serdes is unused,
+ * and therefore the logic will never be used or initialized,
+ * and uninitialized state will normally result in this error
+ * being asserted. Similarly for the external serdess pll
+ * lock signal.
+ */
+ val &= ~(INFINIPATH_HWE_SERDESPLLFAILED |
+ INFINIPATH_HWE_RXDSYNCMEMPARITYERR);
+
+ /*
+ * Disable MISCERR4 because of an inversion in the HT core
+ * logic checking for errors that cause this bit to be set.
+ * The errata can also cause the protocol error bit to be set
+ * in the HT config space linkerror register(s).
+ */
+ val &= ~INFINIPATH_HWE_HTCMISCERR4;
+
+ /*
+ * PLL ignored because unused MDIO interface has a logic problem
+ */
+ if (dd->ipath_boardrev == 4 || dd->ipath_boardrev == 9)
+ val &= ~INFINIPATH_HWE_SERDESPLLFAILED;
+ dd->ipath_hwerrmask = val;
+}
+
+
+
+
+/**
+ * ipath_ht_bringup_serdes - bring up the serdes
+ * @dd: the infinipath device
+ */
+static int ipath_ht_bringup_serdes(struct ipath_devdata *dd)
+{
+ u64 val, config1;
+ int ret = 0, change = 0;
+
+ ipath_dbg("Trying to bringup serdes\n");
+
+ if (ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus) &
+ INFINIPATH_HWE_SERDESPLLFAILED)
+ {
+ ipath_dbg("At start, serdes PLL failed bit set in "
+ "hwerrstatus, clearing and continuing\n");
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear,
+ INFINIPATH_HWE_SERDESPLLFAILED);
+ }
+
+ val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0);
+ config1 = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig1);
+
+ ipath_cdbg(VERBOSE, "Initial serdes status is config0=%llx "
+ "config1=%llx, sstatus=%llx xgxs %llx\n",
+ (unsigned long long) val, (unsigned long long) config1,
+ (unsigned long long)
+ ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesstatus),
+ (unsigned long long)
+ ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig));
+
+ /* force reset on */
+ val |= INFINIPATH_SERDC0_RESET_PLL
+ /* | INFINIPATH_SERDC0_RESET_MASK */
+ ;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val);
+ udelay(15); /* need pll reset set at least for a bit */
+
+ if (val & INFINIPATH_SERDC0_RESET_PLL) {
+ u64 val2 = val &= ~INFINIPATH_SERDC0_RESET_PLL;
+ /* set lane resets, and tx idle, during pll reset */
+ val2 |= INFINIPATH_SERDC0_RESET_MASK |
+ INFINIPATH_SERDC0_TXIDLE;
+ ipath_cdbg(VERBOSE, "Clearing serdes PLL reset (writing "
+ "%llx)\n", (unsigned long long) val2);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0,
+ val2);
+ /*
+ * be sure chip saw it
+ */
+ val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+ /*
+ * need pll reset clear at least 11 usec before lane
+ * resets cleared; give it a few more
+ */
+ udelay(15);
+ val = val2; /* for check below */
+ }
+
+ if (val & (INFINIPATH_SERDC0_RESET_PLL |
+ INFINIPATH_SERDC0_RESET_MASK |
+ INFINIPATH_SERDC0_TXIDLE)) {
+ val &= ~(INFINIPATH_SERDC0_RESET_PLL |
+ INFINIPATH_SERDC0_RESET_MASK |
+ INFINIPATH_SERDC0_TXIDLE);
+ /* clear them */
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0,
+ val);
+ }
+
+ val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig);
+ if (val & INFINIPATH_XGXS_RESET) {
+ /* normally true after boot */
+ val &= ~INFINIPATH_XGXS_RESET;
+ change = 1;
+ }
+ if (((val >> INFINIPATH_XGXS_RX_POL_SHIFT) &
+ INFINIPATH_XGXS_RX_POL_MASK) != dd->ipath_rx_pol_inv ) {
+ /* need to compensate for Tx inversion in partner */
+ val &= ~(INFINIPATH_XGXS_RX_POL_MASK <<
+ INFINIPATH_XGXS_RX_POL_SHIFT);
+ val |= dd->ipath_rx_pol_inv <<
+ INFINIPATH_XGXS_RX_POL_SHIFT;
+ change = 1;
+ }
+ if (change)
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
+
+ val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0);
+
+ /* clear current and de-emphasis bits */
+ config1 &= ~0x0ffffffff00ULL;
+ /* set current to 20ma */
+ config1 |= 0x00000000000ULL;
+ /* set de-emphasis to -5.68dB */
+ config1 |= 0x0cccc000000ULL;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig1, config1);
+
+ ipath_cdbg(VERBOSE, "After setup: serdes status is config0=%llx "
+ "config1=%llx, sstatus=%llx xgxs %llx\n",
+ (unsigned long long) val, (unsigned long long) config1,
+ (unsigned long long)
+ ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesstatus),
+ (unsigned long long)
+ ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig));
+
+ return ret; /* for now, say we always succeeded */
+}
+
+/**
+ * ipath_ht_quiet_serdes - set serdes to txidle
+ * @dd: the infinipath device
+ * driver is being unloaded
+ */
+static void ipath_ht_quiet_serdes(struct ipath_devdata *dd)
+{
+ u64 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0);
+
+ val |= INFINIPATH_SERDC0_TXIDLE;
+ ipath_dbg("Setting TxIdleEn on serdes (config0 = %llx)\n",
+ (unsigned long long) val);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val);
+}
+
+/**
+ * ipath_pe_put_tid - write a TID in chip
+ * @dd: the infinipath device
+ * @tidptr: pointer to the expected TID (in chip) to udpate
+ * @tidtype: RCVHQ_RCV_TYPE_EAGER (1) for eager, RCVHQ_RCV_TYPE_EXPECTED (0) for expected
+ * @pa: physical address of in memory buffer; ipath_tidinvalid if freeing
+ *
+ * This exists as a separate routine to allow for special locking etc.
+ * It's used for both the full cleanup on exit, as well as the normal
+ * setup and teardown.
+ */
+static void ipath_ht_put_tid(struct ipath_devdata *dd,
+ u64 __iomem *tidptr, u32 type,
+ unsigned long pa)
+{
+ if (!dd->ipath_kregbase)
+ return;
+
+ if (pa != dd->ipath_tidinvalid) {
+ if (unlikely((pa & ~INFINIPATH_RT_ADDR_MASK))) {
+ dev_info(&dd->pcidev->dev,
+ "physaddr %lx has more than "
+ "40 bits, using only 40!!!\n", pa);
+ pa &= INFINIPATH_RT_ADDR_MASK;
+ }
+ if (type == RCVHQ_RCV_TYPE_EAGER)
+ pa |= dd->ipath_tidtemplate;
+ else {
+ /* in words (fixed, full page). */
+ u64 lenvalid = PAGE_SIZE >> 2;
+ lenvalid <<= INFINIPATH_RT_BUFSIZE_SHIFT;
+ pa |= lenvalid | INFINIPATH_RT_VALID;
+ }
+ }
+
+ writeq(pa, tidptr);
+}
+
+
+/**
+ * ipath_ht_clear_tid - clear all TID entries for a port, expected and eager
+ * @dd: the infinipath device
+ * @port: the port
+ *
+ * Used from ipath_close(), and at chip initialization.
+ */
+static void ipath_ht_clear_tids(struct ipath_devdata *dd, unsigned port)
+{
+ u64 __iomem *tidbase;
+ int i;
+
+ if (!dd->ipath_kregbase)
+ return;
+
+ ipath_cdbg(VERBOSE, "Invalidate TIDs for port %u\n", port);
+
+ /*
+ * need to invalidate all of the expected TID entries for this
+ * port, so we don't have valid entries that might somehow get
+ * used (early in next use of this port, or through some bug)
+ */
+ tidbase = (u64 __iomem *) ((char __iomem *)(dd->ipath_kregbase) +
+ dd->ipath_rcvtidbase +
+ port * dd->ipath_rcvtidcnt *
+ sizeof(*tidbase));
+ for (i = 0; i < dd->ipath_rcvtidcnt; i++)
+ ipath_ht_put_tid(dd, &tidbase[i], RCVHQ_RCV_TYPE_EXPECTED,
+ dd->ipath_tidinvalid);
+
+ tidbase = (u64 __iomem *) ((char __iomem *)(dd->ipath_kregbase) +
+ dd->ipath_rcvegrbase +
+ port * dd->ipath_rcvegrcnt *
+ sizeof(*tidbase));
+
+ for (i = 0; i < dd->ipath_rcvegrcnt; i++)
+ ipath_ht_put_tid(dd, &tidbase[i], RCVHQ_RCV_TYPE_EAGER,
+ dd->ipath_tidinvalid);
+}
+
+/**
+ * ipath_ht_tidtemplate - setup constants for TID updates
+ * @dd: the infinipath device
+ *
+ * We setup stuff that we use a lot, to avoid calculating each time
+ */
+static void ipath_ht_tidtemplate(struct ipath_devdata *dd)
+{
+ dd->ipath_tidtemplate = dd->ipath_ibmaxlen >> 2;
+ dd->ipath_tidtemplate <<= INFINIPATH_RT_BUFSIZE_SHIFT;
+ dd->ipath_tidtemplate |= INFINIPATH_RT_VALID;
+
+ /*
+ * work around chip errata bug 7358, by marking invalid tids
+ * as having max length
+ */
+ dd->ipath_tidinvalid = (-1LL & INFINIPATH_RT_BUFSIZE_MASK) <<
+ INFINIPATH_RT_BUFSIZE_SHIFT;
+}
+
+static int ipath_ht_early_init(struct ipath_devdata *dd)
+{
+ u32 __iomem *piobuf;
+ u32 pioincr, val32;
+ int i;
+
+ /*
+ * one cache line; long IB headers will spill over into received
+ * buffer
+ */
+ dd->ipath_rcvhdrentsize = 16;
+ dd->ipath_rcvhdrsize = IPATH_DFLT_RCVHDRSIZE;
+
+ /*
+ * For HT, we allocate a somewhat overly large eager buffer,
+ * such that we can guarantee that we can receive the largest
+ * packet that we can send out. To truly support a 4KB MTU,
+ * we need to bump this to a large value. To date, other than
+ * testing, we have never encountered an HCA that can really
+ * send 4KB MTU packets, so we do not handle that (we'll get
+ * errors interrupts if we ever see one).
+ */
+ dd->ipath_rcvegrbufsize = dd->ipath_piosize2k;
+
+ /*
+ * the min() check here is currently a nop, but it may not
+ * always be, depending on just how we do ipath_rcvegrbufsize
+ */
+ dd->ipath_ibmaxlen = min(dd->ipath_piosize2k,
+ dd->ipath_rcvegrbufsize);
+ dd->ipath_init_ibmaxlen = dd->ipath_ibmaxlen;
+ ipath_ht_tidtemplate(dd);
+
+ /*
+ * zero all the TID entries at startup. We do this for sanity,
+ * in case of a previous driver crash of some kind, and also
+ * because the chip powers up with these memories in an unknown
+ * state. Use portcnt, not cfgports, since this is for the
+ * full chip, not for current (possibly different) configuration
+ * value.
+ * Chip Errata bug 6447
+ */
+ for (val32 = 0; val32 < dd->ipath_portcnt; val32++)
+ ipath_ht_clear_tids(dd, val32);
+
+ /*
+ * write the pbc of each buffer, to be sure it's initialized, then
+ * cancel all the buffers, and also abort any packets that might
+ * have been in flight for some reason (the latter is for driver
+ * unload/reload, but isn't a bad idea at first init). PIO send
+ * isn't enabled at this point, so there is no danger of sending
+ * these out on the wire.
+ * Chip Errata bug 6610
+ */
+ piobuf = (u32 __iomem *) (((char __iomem *)(dd->ipath_kregbase)) +
+ dd->ipath_piobufbase);
+ pioincr = dd->ipath_palign / sizeof(*piobuf);
+ for (i = 0; i < dd->ipath_piobcnt2k; i++) {
+ /*
+ * reasonable word count, just to init pbc
+ */
+ writel(16, piobuf);
+ piobuf += pioincr;
+ }
+
+ ipath_get_eeprom_info(dd);
+ if (dd->ipath_boardrev == 5) {
+ /*
+ * Later production QHT7040 has same changes as QHT7140, so
+ * can use GPIO interrupts. They have serial #'s starting
+ * with 128, rather than 112.
+ */
+ if (dd->ipath_serial[0] == '1' &&
+ dd->ipath_serial[1] == '2' &&
+ dd->ipath_serial[2] == '8')
+ dd->ipath_flags |= IPATH_GPIO_INTR;
+ else {
+ ipath_dev_err(dd, "Unsupported InfiniPath board "
+ "(serial number %.16s)!\n",
+ dd->ipath_serial);
+ return 1;
+ }
+ }
+
+ if (dd->ipath_minrev >= 4) {
+ /* Rev4+ reports extra errors via internal GPIO pins */
+ dd->ipath_flags |= IPATH_GPIO_ERRINTRS;
+ dd->ipath_gpio_mask |= IPATH_GPIO_ERRINTR_MASK;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask,
+ dd->ipath_gpio_mask);
+ }
+
+ return 0;
+}
+
+
+/**
+ * ipath_init_ht_get_base_info - set chip-specific flags for user code
+ * @dd: the infinipath device
+ * @kbase: ipath_base_info pointer
+ *
+ * We set the PCIE flag because the lower bandwidth on PCIe vs
+ * HyperTransport can affect some user packet algorithms.
+ */
+static int ipath_ht_get_base_info(struct ipath_portdata *pd, void *kbase)
+{
+ struct ipath_base_info *kinfo = kbase;
+
+ kinfo->spi_runtime_flags |= IPATH_RUNTIME_HT |
+ IPATH_RUNTIME_PIO_REGSWAPPED;
+
+ if (pd->port_dd->ipath_minrev < 4)
+ kinfo->spi_runtime_flags |= IPATH_RUNTIME_RCVHDR_COPY;
+
+ return 0;
+}
+
+static void ipath_ht_free_irq(struct ipath_devdata *dd)
+{
+ free_irq(dd->ipath_irq, dd);
+ ht_destroy_irq(dd->ipath_irq);
+ dd->ipath_irq = 0;
+ dd->ipath_intconfig = 0;
+}
+
+static struct ipath_message_header *
+ipath_ht_get_msgheader(struct ipath_devdata *dd, __le32 *rhf_addr)
+{
+ return (struct ipath_message_header *)
+ &rhf_addr[sizeof(u64) / sizeof(u32)];
+}
+
+static void ipath_ht_config_ports(struct ipath_devdata *dd, ushort cfgports)
+{
+ dd->ipath_portcnt =
+ ipath_read_kreg32(dd, dd->ipath_kregs->kr_portcnt);
+ dd->ipath_p0_rcvegrcnt =
+ ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvegrcnt);
+}
+
+static void ipath_ht_read_counters(struct ipath_devdata *dd,
+ struct infinipath_counters *cntrs)
+{
+ cntrs->LBIntCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(LBIntCnt));
+ cntrs->LBFlowStallCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(LBFlowStallCnt));
+ cntrs->TxSDmaDescCnt = 0;
+ cntrs->TxUnsupVLErrCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxUnsupVLErrCnt));
+ cntrs->TxDataPktCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxDataPktCnt));
+ cntrs->TxFlowPktCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxFlowPktCnt));
+ cntrs->TxDwordCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxDwordCnt));
+ cntrs->TxLenErrCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxLenErrCnt));
+ cntrs->TxMaxMinLenErrCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxMaxMinLenErrCnt));
+ cntrs->TxUnderrunCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxUnderrunCnt));
+ cntrs->TxFlowStallCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxFlowStallCnt));
+ cntrs->TxDroppedPktCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxDroppedPktCnt));
+ cntrs->RxDroppedPktCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxDroppedPktCnt));
+ cntrs->RxDataPktCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxDataPktCnt));
+ cntrs->RxFlowPktCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxFlowPktCnt));
+ cntrs->RxDwordCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxDwordCnt));
+ cntrs->RxLenErrCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxLenErrCnt));
+ cntrs->RxMaxMinLenErrCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxMaxMinLenErrCnt));
+ cntrs->RxICRCErrCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxICRCErrCnt));
+ cntrs->RxVCRCErrCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxVCRCErrCnt));
+ cntrs->RxFlowCtrlErrCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxFlowCtrlErrCnt));
+ cntrs->RxBadFormatCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxBadFormatCnt));
+ cntrs->RxLinkProblemCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxLinkProblemCnt));
+ cntrs->RxEBPCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxEBPCnt));
+ cntrs->RxLPCRCErrCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxLPCRCErrCnt));
+ cntrs->RxBufOvflCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxBufOvflCnt));
+ cntrs->RxTIDFullErrCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxTIDFullErrCnt));
+ cntrs->RxTIDValidErrCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxTIDValidErrCnt));
+ cntrs->RxPKeyMismatchCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxPKeyMismatchCnt));
+ cntrs->RxP0HdrEgrOvflCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP0HdrEgrOvflCnt));
+ cntrs->RxP1HdrEgrOvflCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP1HdrEgrOvflCnt));
+ cntrs->RxP2HdrEgrOvflCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP2HdrEgrOvflCnt));
+ cntrs->RxP3HdrEgrOvflCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP3HdrEgrOvflCnt));
+ cntrs->RxP4HdrEgrOvflCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP4HdrEgrOvflCnt));
+ cntrs->RxP5HdrEgrOvflCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP5HdrEgrOvflCnt));
+ cntrs->RxP6HdrEgrOvflCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP6HdrEgrOvflCnt));
+ cntrs->RxP7HdrEgrOvflCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP7HdrEgrOvflCnt));
+ cntrs->RxP8HdrEgrOvflCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP8HdrEgrOvflCnt));
+ cntrs->RxP9HdrEgrOvflCnt = 0;
+ cntrs->RxP10HdrEgrOvflCnt = 0;
+ cntrs->RxP11HdrEgrOvflCnt = 0;
+ cntrs->RxP12HdrEgrOvflCnt = 0;
+ cntrs->RxP13HdrEgrOvflCnt = 0;
+ cntrs->RxP14HdrEgrOvflCnt = 0;
+ cntrs->RxP15HdrEgrOvflCnt = 0;
+ cntrs->RxP16HdrEgrOvflCnt = 0;
+ cntrs->IBStatusChangeCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBStatusChangeCnt));
+ cntrs->IBLinkErrRecoveryCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBLinkErrRecoveryCnt));
+ cntrs->IBLinkDownedCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBLinkDownedCnt));
+ cntrs->IBSymbolErrCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBSymbolErrCnt));
+ cntrs->RxVL15DroppedPktCnt = 0;
+ cntrs->RxOtherLocalPhyErrCnt = 0;
+ cntrs->PcieRetryBufDiagQwordCnt = 0;
+ cntrs->ExcessBufferOvflCnt = dd->ipath_overrun_thresh_errs;
+ cntrs->LocalLinkIntegrityErrCnt =
+ (dd->ipath_flags & IPATH_GPIO_ERRINTRS) ?
+ dd->ipath_lli_errs : dd->ipath_lli_errors;
+ cntrs->RxVlErrCnt = 0;
+ cntrs->RxDlidFltrCnt = 0;
+}
+
+
+/* no interrupt fallback for these chips */
+static int ipath_ht_nointr_fallback(struct ipath_devdata *dd)
+{
+ return 0;
+}
+
+
+/*
+ * reset the XGXS (between serdes and IBC). Slightly less intrusive
+ * than resetting the IBC or external link state, and useful in some
+ * cases to cause some retraining. To do this right, we reset IBC
+ * as well.
+ */
+static void ipath_ht_xgxs_reset(struct ipath_devdata *dd)
+{
+ u64 val, prev_val;
+
+ prev_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig);
+ val = prev_val | INFINIPATH_XGXS_RESET;
+ prev_val &= ~INFINIPATH_XGXS_RESET; /* be sure */
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
+ dd->ipath_control & ~INFINIPATH_C_LINKENABLE);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
+ ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, prev_val);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
+ dd->ipath_control);
+}
+
+
+static int ipath_ht_get_ib_cfg(struct ipath_devdata *dd, int which)
+{
+ int ret;
+
+ switch (which) {
+ case IPATH_IB_CFG_LWID:
+ ret = dd->ipath_link_width_active;
+ break;
+ case IPATH_IB_CFG_SPD:
+ ret = dd->ipath_link_speed_active;
+ break;
+ case IPATH_IB_CFG_LWID_ENB:
+ ret = dd->ipath_link_width_enabled;
+ break;
+ case IPATH_IB_CFG_SPD_ENB:
+ ret = dd->ipath_link_speed_enabled;
+ break;
+ default:
+ ret = -ENOTSUPP;
+ break;
+ }
+ return ret;
+}
+
+
+/* we assume range checking is already done, if needed */
+static int ipath_ht_set_ib_cfg(struct ipath_devdata *dd, int which, u32 val)
+{
+ int ret = 0;
+
+ if (which == IPATH_IB_CFG_LWID_ENB)
+ dd->ipath_link_width_enabled = val;
+ else if (which == IPATH_IB_CFG_SPD_ENB)
+ dd->ipath_link_speed_enabled = val;
+ else
+ ret = -ENOTSUPP;
+ return ret;
+}
+
+
+static void ipath_ht_config_jint(struct ipath_devdata *dd, u16 a, u16 b)
+{
+}
+
+
+static int ipath_ht_ib_updown(struct ipath_devdata *dd, int ibup, u64 ibcs)
+{
+ ipath_setup_ht_setextled(dd, ipath_ib_linkstate(dd, ibcs),
+ ipath_ib_linktrstate(dd, ibcs));
+ return 0;
+}
+
+
+/**
+ * ipath_init_iba6110_funcs - set up the chip-specific function pointers
+ * @dd: the infinipath device
+ *
+ * This is global, and is called directly at init to set up the
+ * chip-specific function pointers for later use.
+ */
+void ipath_init_iba6110_funcs(struct ipath_devdata *dd)
+{
+ dd->ipath_f_intrsetup = ipath_ht_intconfig;
+ dd->ipath_f_bus = ipath_setup_ht_config;
+ dd->ipath_f_reset = ipath_setup_ht_reset;
+ dd->ipath_f_get_boardname = ipath_ht_boardname;
+ dd->ipath_f_init_hwerrors = ipath_ht_init_hwerrors;
+ dd->ipath_f_early_init = ipath_ht_early_init;
+ dd->ipath_f_handle_hwerrors = ipath_ht_handle_hwerrors;
+ dd->ipath_f_quiet_serdes = ipath_ht_quiet_serdes;
+ dd->ipath_f_bringup_serdes = ipath_ht_bringup_serdes;
+ dd->ipath_f_clear_tids = ipath_ht_clear_tids;
+ dd->ipath_f_put_tid = ipath_ht_put_tid;
+ dd->ipath_f_cleanup = ipath_setup_ht_cleanup;
+ dd->ipath_f_setextled = ipath_setup_ht_setextled;
+ dd->ipath_f_get_base_info = ipath_ht_get_base_info;
+ dd->ipath_f_free_irq = ipath_ht_free_irq;
+ dd->ipath_f_tidtemplate = ipath_ht_tidtemplate;
+ dd->ipath_f_intr_fallback = ipath_ht_nointr_fallback;
+ dd->ipath_f_get_msgheader = ipath_ht_get_msgheader;
+ dd->ipath_f_config_ports = ipath_ht_config_ports;
+ dd->ipath_f_read_counters = ipath_ht_read_counters;
+ dd->ipath_f_xgxs_reset = ipath_ht_xgxs_reset;
+ dd->ipath_f_get_ib_cfg = ipath_ht_get_ib_cfg;
+ dd->ipath_f_set_ib_cfg = ipath_ht_set_ib_cfg;
+ dd->ipath_f_config_jint = ipath_ht_config_jint;
+ dd->ipath_f_ib_updown = ipath_ht_ib_updown;
+
+ /*
+ * initialize chip-specific variables
+ */
+ ipath_init_ht_variables(dd);
+}
diff --git a/drivers/infiniband/hw/ipath/ipath_iba6120.c b/drivers/infiniband/hw/ipath/ipath_iba6120.c
new file mode 100644
index 0000000..421cc2a
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_iba6120.c
@@ -0,0 +1,1801 @@
+/*
+ * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+/*
+ * This file contains all of the code that is specific to the
+ * InfiniPath PCIe chip.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <rdma/ib_verbs.h>
+
+#include "ipath_kernel.h"
+#include "ipath_registers.h"
+
+static void ipath_setup_pe_setextled(struct ipath_devdata *, u64, u64);
+
+/*
+ * This file contains all the chip-specific register information and
+ * access functions for the QLogic InfiniPath PCI-Express chip.
+ *
+ * This lists the InfiniPath registers, in the actual chip layout.
+ * This structure should never be directly accessed.
+ */
+struct _infinipath_do_not_use_kernel_regs {
+ unsigned long long Revision;
+ unsigned long long Control;
+ unsigned long long PageAlign;
+ unsigned long long PortCnt;
+ unsigned long long DebugPortSelect;
+ unsigned long long Reserved0;
+ unsigned long long SendRegBase;
+ unsigned long long UserRegBase;
+ unsigned long long CounterRegBase;
+ unsigned long long Scratch;
+ unsigned long long Reserved1;
+ unsigned long long Reserved2;
+ unsigned long long IntBlocked;
+ unsigned long long IntMask;
+ unsigned long long IntStatus;
+ unsigned long long IntClear;
+ unsigned long long ErrorMask;
+ unsigned long long ErrorStatus;
+ unsigned long long ErrorClear;
+ unsigned long long HwErrMask;
+ unsigned long long HwErrStatus;
+ unsigned long long HwErrClear;
+ unsigned long long HwDiagCtrl;
+ unsigned long long MDIO;
+ unsigned long long IBCStatus;
+ unsigned long long IBCCtrl;
+ unsigned long long ExtStatus;
+ unsigned long long ExtCtrl;
+ unsigned long long GPIOOut;
+ unsigned long long GPIOMask;
+ unsigned long long GPIOStatus;
+ unsigned long long GPIOClear;
+ unsigned long long RcvCtrl;
+ unsigned long long RcvBTHQP;
+ unsigned long long RcvHdrSize;
+ unsigned long long RcvHdrCnt;
+ unsigned long long RcvHdrEntSize;
+ unsigned long long RcvTIDBase;
+ unsigned long long RcvTIDCnt;
+ unsigned long long RcvEgrBase;
+ unsigned long long RcvEgrCnt;
+ unsigned long long RcvBufBase;
+ unsigned long long RcvBufSize;
+ unsigned long long RxIntMemBase;
+ unsigned long long RxIntMemSize;
+ unsigned long long RcvPartitionKey;
+ unsigned long long Reserved3;
+ unsigned long long RcvPktLEDCnt;
+ unsigned long long Reserved4[8];
+ unsigned long long SendCtrl;
+ unsigned long long SendPIOBufBase;
+ unsigned long long SendPIOSize;
+ unsigned long long SendPIOBufCnt;
+ unsigned long long SendPIOAvailAddr;
+ unsigned long long TxIntMemBase;
+ unsigned long long TxIntMemSize;
+ unsigned long long Reserved5;
+ unsigned long long PCIeRBufTestReg0;
+ unsigned long long PCIeRBufTestReg1;
+ unsigned long long Reserved51[6];
+ unsigned long long SendBufferError;
+ unsigned long long SendBufferErrorCONT1;
+ unsigned long long Reserved6SBE[6];
+ unsigned long long RcvHdrAddr0;
+ unsigned long long RcvHdrAddr1;
+ unsigned long long RcvHdrAddr2;
+ unsigned long long RcvHdrAddr3;
+ unsigned long long RcvHdrAddr4;
+ unsigned long long Reserved7RHA[11];
+ unsigned long long RcvHdrTailAddr0;
+ unsigned long long RcvHdrTailAddr1;
+ unsigned long long RcvHdrTailAddr2;
+ unsigned long long RcvHdrTailAddr3;
+ unsigned long long RcvHdrTailAddr4;
+ unsigned long long Reserved8RHTA[11];
+ unsigned long long Reserved9SW[8];
+ unsigned long long SerdesConfig0;
+ unsigned long long SerdesConfig1;
+ unsigned long long SerdesStatus;
+ unsigned long long XGXSConfig;
+ unsigned long long IBPLLCfg;
+ unsigned long long Reserved10SW2[3];
+ unsigned long long PCIEQ0SerdesConfig0;
+ unsigned long long PCIEQ0SerdesConfig1;
+ unsigned long long PCIEQ0SerdesStatus;
+ unsigned long long Reserved11;
+ unsigned long long PCIEQ1SerdesConfig0;
+ unsigned long long PCIEQ1SerdesConfig1;
+ unsigned long long PCIEQ1SerdesStatus;
+ unsigned long long Reserved12;
+};
+
+struct _infinipath_do_not_use_counters {
+ __u64 LBIntCnt;
+ __u64 LBFlowStallCnt;
+ __u64 Reserved1;
+ __u64 TxUnsupVLErrCnt;
+ __u64 TxDataPktCnt;
+ __u64 TxFlowPktCnt;
+ __u64 TxDwordCnt;
+ __u64 TxLenErrCnt;
+ __u64 TxMaxMinLenErrCnt;
+ __u64 TxUnderrunCnt;
+ __u64 TxFlowStallCnt;
+ __u64 TxDroppedPktCnt;
+ __u64 RxDroppedPktCnt;
+ __u64 RxDataPktCnt;
+ __u64 RxFlowPktCnt;
+ __u64 RxDwordCnt;
+ __u64 RxLenErrCnt;
+ __u64 RxMaxMinLenErrCnt;
+ __u64 RxICRCErrCnt;
+ __u64 RxVCRCErrCnt;
+ __u64 RxFlowCtrlErrCnt;
+ __u64 RxBadFormatCnt;
+ __u64 RxLinkProblemCnt;
+ __u64 RxEBPCnt;
+ __u64 RxLPCRCErrCnt;
+ __u64 RxBufOvflCnt;
+ __u64 RxTIDFullErrCnt;
+ __u64 RxTIDValidErrCnt;
+ __u64 RxPKeyMismatchCnt;
+ __u64 RxP0HdrEgrOvflCnt;
+ __u64 RxP1HdrEgrOvflCnt;
+ __u64 RxP2HdrEgrOvflCnt;
+ __u64 RxP3HdrEgrOvflCnt;
+ __u64 RxP4HdrEgrOvflCnt;
+ __u64 RxP5HdrEgrOvflCnt;
+ __u64 RxP6HdrEgrOvflCnt;
+ __u64 RxP7HdrEgrOvflCnt;
+ __u64 RxP8HdrEgrOvflCnt;
+ __u64 Reserved6;
+ __u64 Reserved7;
+ __u64 IBStatusChangeCnt;
+ __u64 IBLinkErrRecoveryCnt;
+ __u64 IBLinkDownedCnt;
+ __u64 IBSymbolErrCnt;
+};
+
+#define IPATH_KREG_OFFSET(field) (offsetof( \
+ struct _infinipath_do_not_use_kernel_regs, field) / sizeof(u64))
+#define IPATH_CREG_OFFSET(field) (offsetof( \
+ struct _infinipath_do_not_use_counters, field) / sizeof(u64))
+
+static const struct ipath_kregs ipath_pe_kregs = {
+ .kr_control = IPATH_KREG_OFFSET(Control),
+ .kr_counterregbase = IPATH_KREG_OFFSET(CounterRegBase),
+ .kr_debugportselect = IPATH_KREG_OFFSET(DebugPortSelect),
+ .kr_errorclear = IPATH_KREG_OFFSET(ErrorClear),
+ .kr_errormask = IPATH_KREG_OFFSET(ErrorMask),
+ .kr_errorstatus = IPATH_KREG_OFFSET(ErrorStatus),
+ .kr_extctrl = IPATH_KREG_OFFSET(ExtCtrl),
+ .kr_extstatus = IPATH_KREG_OFFSET(ExtStatus),
+ .kr_gpio_clear = IPATH_KREG_OFFSET(GPIOClear),
+ .kr_gpio_mask = IPATH_KREG_OFFSET(GPIOMask),
+ .kr_gpio_out = IPATH_KREG_OFFSET(GPIOOut),
+ .kr_gpio_status = IPATH_KREG_OFFSET(GPIOStatus),
+ .kr_hwdiagctrl = IPATH_KREG_OFFSET(HwDiagCtrl),
+ .kr_hwerrclear = IPATH_KREG_OFFSET(HwErrClear),
+ .kr_hwerrmask = IPATH_KREG_OFFSET(HwErrMask),
+ .kr_hwerrstatus = IPATH_KREG_OFFSET(HwErrStatus),
+ .kr_ibcctrl = IPATH_KREG_OFFSET(IBCCtrl),
+ .kr_ibcstatus = IPATH_KREG_OFFSET(IBCStatus),
+ .kr_intblocked = IPATH_KREG_OFFSET(IntBlocked),
+ .kr_intclear = IPATH_KREG_OFFSET(IntClear),
+ .kr_intmask = IPATH_KREG_OFFSET(IntMask),
+ .kr_intstatus = IPATH_KREG_OFFSET(IntStatus),
+ .kr_mdio = IPATH_KREG_OFFSET(MDIO),
+ .kr_pagealign = IPATH_KREG_OFFSET(PageAlign),
+ .kr_partitionkey = IPATH_KREG_OFFSET(RcvPartitionKey),
+ .kr_portcnt = IPATH_KREG_OFFSET(PortCnt),
+ .kr_rcvbthqp = IPATH_KREG_OFFSET(RcvBTHQP),
+ .kr_rcvbufbase = IPATH_KREG_OFFSET(RcvBufBase),
+ .kr_rcvbufsize = IPATH_KREG_OFFSET(RcvBufSize),
+ .kr_rcvctrl = IPATH_KREG_OFFSET(RcvCtrl),
+ .kr_rcvegrbase = IPATH_KREG_OFFSET(RcvEgrBase),
+ .kr_rcvegrcnt = IPATH_KREG_OFFSET(RcvEgrCnt),
+ .kr_rcvhdrcnt = IPATH_KREG_OFFSET(RcvHdrCnt),
+ .kr_rcvhdrentsize = IPATH_KREG_OFFSET(RcvHdrEntSize),
+ .kr_rcvhdrsize = IPATH_KREG_OFFSET(RcvHdrSize),
+ .kr_rcvintmembase = IPATH_KREG_OFFSET(RxIntMemBase),
+ .kr_rcvintmemsize = IPATH_KREG_OFFSET(RxIntMemSize),
+ .kr_rcvtidbase = IPATH_KREG_OFFSET(RcvTIDBase),
+ .kr_rcvtidcnt = IPATH_KREG_OFFSET(RcvTIDCnt),
+ .kr_revision = IPATH_KREG_OFFSET(Revision),
+ .kr_scratch = IPATH_KREG_OFFSET(Scratch),
+ .kr_sendbuffererror = IPATH_KREG_OFFSET(SendBufferError),
+ .kr_sendctrl = IPATH_KREG_OFFSET(SendCtrl),
+ .kr_sendpioavailaddr = IPATH_KREG_OFFSET(SendPIOAvailAddr),
+ .kr_sendpiobufbase = IPATH_KREG_OFFSET(SendPIOBufBase),
+ .kr_sendpiobufcnt = IPATH_KREG_OFFSET(SendPIOBufCnt),
+ .kr_sendpiosize = IPATH_KREG_OFFSET(SendPIOSize),
+ .kr_sendregbase = IPATH_KREG_OFFSET(SendRegBase),
+ .kr_txintmembase = IPATH_KREG_OFFSET(TxIntMemBase),
+ .kr_txintmemsize = IPATH_KREG_OFFSET(TxIntMemSize),
+ .kr_userregbase = IPATH_KREG_OFFSET(UserRegBase),
+ .kr_serdesconfig0 = IPATH_KREG_OFFSET(SerdesConfig0),
+ .kr_serdesconfig1 = IPATH_KREG_OFFSET(SerdesConfig1),
+ .kr_serdesstatus = IPATH_KREG_OFFSET(SerdesStatus),
+ .kr_xgxsconfig = IPATH_KREG_OFFSET(XGXSConfig),
+ .kr_ibpllcfg = IPATH_KREG_OFFSET(IBPLLCfg),
+
+ /*
+ * These should not be used directly via ipath_write_kreg64(),
+ * use them with ipath_write_kreg64_port(),
+ */
+ .kr_rcvhdraddr = IPATH_KREG_OFFSET(RcvHdrAddr0),
+ .kr_rcvhdrtailaddr = IPATH_KREG_OFFSET(RcvHdrTailAddr0),
+
+ /* The rcvpktled register controls one of the debug port signals, so
+ * a packet activity LED can be connected to it. */
+ .kr_rcvpktledcnt = IPATH_KREG_OFFSET(RcvPktLEDCnt),
+ .kr_pcierbuftestreg0 = IPATH_KREG_OFFSET(PCIeRBufTestReg0),
+ .kr_pcierbuftestreg1 = IPATH_KREG_OFFSET(PCIeRBufTestReg1),
+ .kr_pcieq0serdesconfig0 = IPATH_KREG_OFFSET(PCIEQ0SerdesConfig0),
+ .kr_pcieq0serdesconfig1 = IPATH_KREG_OFFSET(PCIEQ0SerdesConfig1),
+ .kr_pcieq0serdesstatus = IPATH_KREG_OFFSET(PCIEQ0SerdesStatus),
+ .kr_pcieq1serdesconfig0 = IPATH_KREG_OFFSET(PCIEQ1SerdesConfig0),
+ .kr_pcieq1serdesconfig1 = IPATH_KREG_OFFSET(PCIEQ1SerdesConfig1),
+ .kr_pcieq1serdesstatus = IPATH_KREG_OFFSET(PCIEQ1SerdesStatus)
+};
+
+static const struct ipath_cregs ipath_pe_cregs = {
+ .cr_badformatcnt = IPATH_CREG_OFFSET(RxBadFormatCnt),
+ .cr_erricrccnt = IPATH_CREG_OFFSET(RxICRCErrCnt),
+ .cr_errlinkcnt = IPATH_CREG_OFFSET(RxLinkProblemCnt),
+ .cr_errlpcrccnt = IPATH_CREG_OFFSET(RxLPCRCErrCnt),
+ .cr_errpkey = IPATH_CREG_OFFSET(RxPKeyMismatchCnt),
+ .cr_errrcvflowctrlcnt = IPATH_CREG_OFFSET(RxFlowCtrlErrCnt),
+ .cr_err_rlencnt = IPATH_CREG_OFFSET(RxLenErrCnt),
+ .cr_errslencnt = IPATH_CREG_OFFSET(TxLenErrCnt),
+ .cr_errtidfull = IPATH_CREG_OFFSET(RxTIDFullErrCnt),
+ .cr_errtidvalid = IPATH_CREG_OFFSET(RxTIDValidErrCnt),
+ .cr_errvcrccnt = IPATH_CREG_OFFSET(RxVCRCErrCnt),
+ .cr_ibstatuschange = IPATH_CREG_OFFSET(IBStatusChangeCnt),
+ .cr_intcnt = IPATH_CREG_OFFSET(LBIntCnt),
+ .cr_invalidrlencnt = IPATH_CREG_OFFSET(RxMaxMinLenErrCnt),
+ .cr_invalidslencnt = IPATH_CREG_OFFSET(TxMaxMinLenErrCnt),
+ .cr_lbflowstallcnt = IPATH_CREG_OFFSET(LBFlowStallCnt),
+ .cr_pktrcvcnt = IPATH_CREG_OFFSET(RxDataPktCnt),
+ .cr_pktrcvflowctrlcnt = IPATH_CREG_OFFSET(RxFlowPktCnt),
+ .cr_pktsendcnt = IPATH_CREG_OFFSET(TxDataPktCnt),
+ .cr_pktsendflowcnt = IPATH_CREG_OFFSET(TxFlowPktCnt),
+ .cr_portovflcnt = IPATH_CREG_OFFSET(RxP0HdrEgrOvflCnt),
+ .cr_rcvebpcnt = IPATH_CREG_OFFSET(RxEBPCnt),
+ .cr_rcvovflcnt = IPATH_CREG_OFFSET(RxBufOvflCnt),
+ .cr_senddropped = IPATH_CREG_OFFSET(TxDroppedPktCnt),
+ .cr_sendstallcnt = IPATH_CREG_OFFSET(TxFlowStallCnt),
+ .cr_sendunderruncnt = IPATH_CREG_OFFSET(TxUnderrunCnt),
+ .cr_wordrcvcnt = IPATH_CREG_OFFSET(RxDwordCnt),
+ .cr_wordsendcnt = IPATH_CREG_OFFSET(TxDwordCnt),
+ .cr_unsupvlcnt = IPATH_CREG_OFFSET(TxUnsupVLErrCnt),
+ .cr_rxdroppktcnt = IPATH_CREG_OFFSET(RxDroppedPktCnt),
+ .cr_iblinkerrrecovcnt = IPATH_CREG_OFFSET(IBLinkErrRecoveryCnt),
+ .cr_iblinkdowncnt = IPATH_CREG_OFFSET(IBLinkDownedCnt),
+ .cr_ibsymbolerrcnt = IPATH_CREG_OFFSET(IBSymbolErrCnt)
+};
+
+/* kr_control bits */
+#define INFINIPATH_C_RESET 1U
+
+/* kr_intstatus, kr_intclear, kr_intmask bits */
+#define INFINIPATH_I_RCVURG_MASK ((1U<<5)-1)
+#define INFINIPATH_I_RCVURG_SHIFT 0
+#define INFINIPATH_I_RCVAVAIL_MASK ((1U<<5)-1)
+#define INFINIPATH_I_RCVAVAIL_SHIFT 12
+
+/* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */
+#define INFINIPATH_HWE_PCIEMEMPARITYERR_MASK 0x000000000000003fULL
+#define INFINIPATH_HWE_PCIEMEMPARITYERR_SHIFT 0
+#define INFINIPATH_HWE_PCIEPOISONEDTLP 0x0000000010000000ULL
+#define INFINIPATH_HWE_PCIECPLTIMEOUT 0x0000000020000000ULL
+#define INFINIPATH_HWE_PCIEBUSPARITYXTLH 0x0000000040000000ULL
+#define INFINIPATH_HWE_PCIEBUSPARITYXADM 0x0000000080000000ULL
+#define INFINIPATH_HWE_PCIEBUSPARITYRADM 0x0000000100000000ULL
+#define INFINIPATH_HWE_COREPLL_FBSLIP 0x0080000000000000ULL
+#define INFINIPATH_HWE_COREPLL_RFSLIP 0x0100000000000000ULL
+#define INFINIPATH_HWE_PCIE1PLLFAILED 0x0400000000000000ULL
+#define INFINIPATH_HWE_PCIE0PLLFAILED 0x0800000000000000ULL
+#define INFINIPATH_HWE_SERDESPLLFAILED 0x1000000000000000ULL
+
+#define IBA6120_IBCS_LINKTRAININGSTATE_MASK 0xf
+#define IBA6120_IBCS_LINKSTATE_SHIFT 4
+
+/* kr_extstatus bits */
+#define INFINIPATH_EXTS_FREQSEL 0x2
+#define INFINIPATH_EXTS_SERDESSEL 0x4
+#define INFINIPATH_EXTS_MEMBIST_ENDTEST 0x0000000000004000
+#define INFINIPATH_EXTS_MEMBIST_FOUND 0x0000000000008000
+
+/* kr_xgxsconfig bits */
+#define INFINIPATH_XGXS_RESET 0x5ULL
+
+#define _IPATH_GPIO_SDA_NUM 1
+#define _IPATH_GPIO_SCL_NUM 0
+
+#define IPATH_GPIO_SDA (1ULL << \
+ (_IPATH_GPIO_SDA_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT))
+#define IPATH_GPIO_SCL (1ULL << \
+ (_IPATH_GPIO_SCL_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT))
+
+#define INFINIPATH_RT_BUFSIZE_MASK 0xe0000000ULL
+#define INFINIPATH_RT_BUFSIZE_SHIFTVAL(tid) \
+ ((((tid) & INFINIPATH_RT_BUFSIZE_MASK) >> 29) + 11 - 1)
+#define INFINIPATH_RT_BUFSIZE(tid) (1 << INFINIPATH_RT_BUFSIZE_SHIFTVAL(tid))
+#define INFINIPATH_RT_IS_VALID(tid) \
+ (((tid) & INFINIPATH_RT_BUFSIZE_MASK) && \
+ ((((tid) & INFINIPATH_RT_BUFSIZE_MASK) != INFINIPATH_RT_BUFSIZE_MASK)))
+#define INFINIPATH_RT_ADDR_MASK 0x1FFFFFFFULL /* 29 bits valid */
+#define INFINIPATH_RT_ADDR_SHIFT 10
+
+#define INFINIPATH_R_INTRAVAIL_SHIFT 16
+#define INFINIPATH_R_TAILUPD_SHIFT 31
+
+/* 6120 specific hardware errors... */
+static const struct ipath_hwerror_msgs ipath_6120_hwerror_msgs[] = {
+ INFINIPATH_HWE_MSG(PCIEPOISONEDTLP, "PCIe Poisoned TLP"),
+ INFINIPATH_HWE_MSG(PCIECPLTIMEOUT, "PCIe completion timeout"),
+ /*
+ * In practice, it's unlikely wthat we'll see PCIe PLL, or bus
+ * parity or memory parity error failures, because most likely we
+ * won't be able to talk to the core of the chip. Nonetheless, we
+ * might see them, if they are in parts of the PCIe core that aren't
+ * essential.
+ */
+ INFINIPATH_HWE_MSG(PCIE1PLLFAILED, "PCIePLL1"),
+ INFINIPATH_HWE_MSG(PCIE0PLLFAILED, "PCIePLL0"),
+ INFINIPATH_HWE_MSG(PCIEBUSPARITYXTLH, "PCIe XTLH core parity"),
+ INFINIPATH_HWE_MSG(PCIEBUSPARITYXADM, "PCIe ADM TX core parity"),
+ INFINIPATH_HWE_MSG(PCIEBUSPARITYRADM, "PCIe ADM RX core parity"),
+ INFINIPATH_HWE_MSG(RXDSYNCMEMPARITYERR, "Rx Dsync"),
+ INFINIPATH_HWE_MSG(SERDESPLLFAILED, "SerDes PLL"),
+};
+
+#define TXE_PIO_PARITY ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | \
+ INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) \
+ << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)
+#define RXE_EAGER_PARITY (INFINIPATH_HWE_RXEMEMPARITYERR_EAGERTID \
+ << INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT)
+
+static void ipath_pe_put_tid_2(struct ipath_devdata *, u64 __iomem *,
+ u32, unsigned long);
+
+/*
+ * On platforms using this chip, and not having ordered WC stores, we
+ * can get TXE parity errors due to speculative reads to the PIO buffers,
+ * and this, due to a chip bug can result in (many) false parity error
+ * reports. So it's a debug print on those, and an info print on systems
+ * where the speculative reads don't occur.
+ */
+static void ipath_pe_txe_recover(struct ipath_devdata *dd)
+{
+ if (ipath_unordered_wc())
+ ipath_dbg("Recovering from TXE PIO parity error\n");
+ else {
+ ++ipath_stats.sps_txeparity;
+ dev_info(&dd->pcidev->dev,
+ "Recovering from TXE PIO parity error\n");
+ }
+}
+
+
+/**
+ * ipath_pe_handle_hwerrors - display hardware errors.
+ * @dd: the infinipath device
+ * @msg: the output buffer
+ * @msgl: the size of the output buffer
+ *
+ * Use same msg buffer as regular errors to avoid excessive stack
+ * use. Most hardware errors are catastrophic, but for right now,
+ * we'll print them and continue. We reuse the same message buffer as
+ * ipath_handle_errors() to avoid excessive stack usage.
+ */
+static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
+ size_t msgl)
+{
+ ipath_err_t hwerrs;
+ u32 bits, ctrl;
+ int isfatal = 0;
+ char bitsmsg[64];
+ int log_idx;
+
+ hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus);
+ if (!hwerrs) {
+ /*
+ * better than printing cofusing messages
+ * This seems to be related to clearing the crc error, or
+ * the pll error during init.
+ */
+ ipath_cdbg(VERBOSE, "Called but no hardware errors set\n");
+ return;
+ } else if (hwerrs == ~0ULL) {
+ ipath_dev_err(dd, "Read of hardware error status failed "
+ "(all bits set); ignoring\n");
+ return;
+ }
+ ipath_stats.sps_hwerrs++;
+
+ /* Always clear the error status register, except MEMBISTFAIL,
+ * regardless of whether we continue or stop using the chip.
+ * We want that set so we know it failed, even across driver reload.
+ * We'll still ignore it in the hwerrmask. We do this partly for
+ * diagnostics, but also for support */
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear,
+ hwerrs&~INFINIPATH_HWE_MEMBISTFAILED);
+
+ hwerrs &= dd->ipath_hwerrmask;
+
+ /* We log some errors to EEPROM, check if we have any of those. */
+ for (log_idx = 0; log_idx < IPATH_EEP_LOG_CNT; ++log_idx)
+ if (hwerrs & dd->ipath_eep_st_masks[log_idx].hwerrs_to_log)
+ ipath_inc_eeprom_err(dd, log_idx, 1);
+
+ /*
+ * make sure we get this much out, unless told to be quiet,
+ * or it's occurred within the last 5 seconds
+ */
+ if ((hwerrs & ~(dd->ipath_lasthwerror | TXE_PIO_PARITY |
+ RXE_EAGER_PARITY)) ||
+ (ipath_debug & __IPATH_VERBDBG))
+ dev_info(&dd->pcidev->dev, "Hardware error: hwerr=0x%llx "
+ "(cleared)\n", (unsigned long long) hwerrs);
+ dd->ipath_lasthwerror |= hwerrs;
+
+ if (hwerrs & ~dd->ipath_hwe_bitsextant)
+ ipath_dev_err(dd, "hwerror interrupt with unknown errors "
+ "%llx set\n", (unsigned long long)
+ (hwerrs & ~dd->ipath_hwe_bitsextant));
+
+ ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control);
+ if ((ctrl & INFINIPATH_C_FREEZEMODE) && !ipath_diag_inuse) {
+ /*
+ * parity errors in send memory are recoverable,
+ * just cancel the send (if indicated in * sendbuffererror),
+ * count the occurrence, unfreeze (if no other handled
+ * hardware error bits are set), and continue. They can
+ * occur if a processor speculative read is done to the PIO
+ * buffer while we are sending a packet, for example.
+ */
+ if (hwerrs & TXE_PIO_PARITY) {
+ ipath_pe_txe_recover(dd);
+ hwerrs &= ~TXE_PIO_PARITY;
+ }
+ if (!hwerrs) {
+ static u32 freeze_cnt;
+
+ freeze_cnt++;
+ ipath_dbg("Clearing freezemode on ignored or recovered "
+ "hardware error (%u)\n", freeze_cnt);
+ ipath_clear_freeze(dd);
+ }
+ }
+
+ *msg = '\0';
+
+ if (hwerrs & INFINIPATH_HWE_MEMBISTFAILED) {
+ strlcat(msg, "[Memory BIST test failed, InfiniPath hardware unusable]",
+ msgl);
+ /* ignore from now on, so disable until driver reloaded */
+ *dd->ipath_statusp |= IPATH_STATUS_HWERROR;
+ dd->ipath_hwerrmask &= ~INFINIPATH_HWE_MEMBISTFAILED;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
+ dd->ipath_hwerrmask);
+ }
+
+ ipath_format_hwerrors(hwerrs,
+ ipath_6120_hwerror_msgs,
+ sizeof(ipath_6120_hwerror_msgs)/
+ sizeof(ipath_6120_hwerror_msgs[0]),
+ msg, msgl);
+
+ if (hwerrs & (INFINIPATH_HWE_PCIEMEMPARITYERR_MASK
+ << INFINIPATH_HWE_PCIEMEMPARITYERR_SHIFT)) {
+ bits = (u32) ((hwerrs >>
+ INFINIPATH_HWE_PCIEMEMPARITYERR_SHIFT) &
+ INFINIPATH_HWE_PCIEMEMPARITYERR_MASK);
+ snprintf(bitsmsg, sizeof bitsmsg,
+ "[PCIe Mem Parity Errs %x] ", bits);
+ strlcat(msg, bitsmsg, msgl);
+ }
+
+#define _IPATH_PLL_FAIL (INFINIPATH_HWE_COREPLL_FBSLIP | \
+ INFINIPATH_HWE_COREPLL_RFSLIP )
+
+ if (hwerrs & _IPATH_PLL_FAIL) {
+ snprintf(bitsmsg, sizeof bitsmsg,
+ "[PLL failed (%llx), InfiniPath hardware unusable]",
+ (unsigned long long) hwerrs & _IPATH_PLL_FAIL);
+ strlcat(msg, bitsmsg, msgl);
+ /* ignore from now on, so disable until driver reloaded */
+ dd->ipath_hwerrmask &= ~(hwerrs & _IPATH_PLL_FAIL);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
+ dd->ipath_hwerrmask);
+ }
+
+ if (hwerrs & INFINIPATH_HWE_SERDESPLLFAILED) {
+ /*
+ * If it occurs, it is left masked since the external
+ * interface is unused
+ */
+ dd->ipath_hwerrmask &= ~INFINIPATH_HWE_SERDESPLLFAILED;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
+ dd->ipath_hwerrmask);
+ }
+
+ if (hwerrs) {
+ /*
+ * if any set that we aren't ignoring; only
+ * make the complaint once, in case it's stuck
+ * or recurring, and we get here multiple
+ * times.
+ */
+ ipath_dev_err(dd, "%s hardware error\n", msg);
+ if (dd->ipath_flags & IPATH_INITTED) {
+ ipath_set_linkstate(dd, IPATH_IB_LINKDOWN);
+ ipath_setup_pe_setextled(dd,
+ INFINIPATH_IBCS_L_STATE_DOWN,
+ INFINIPATH_IBCS_LT_STATE_DISABLED);
+ ipath_dev_err(dd, "Fatal Hardware Error (freeze "
+ "mode), no longer usable, SN %.16s\n",
+ dd->ipath_serial);
+ isfatal = 1;
+ }
+ *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
+ /* mark as having had error */
+ *dd->ipath_statusp |= IPATH_STATUS_HWERROR;
+ /*
+ * mark as not usable, at a minimum until driver
+ * is reloaded, probably until reboot, since no
+ * other reset is possible.
+ */
+ dd->ipath_flags &= ~IPATH_INITTED;
+ } else
+ *msg = 0; /* recovered from all of them */
+
+ if (isfatal && !ipath_diag_inuse && dd->ipath_freezemsg && msg) {
+ /*
+ * for /sys status file ; if no trailing brace is copied,
+ * we'll know it was truncated.
+ */
+ snprintf(dd->ipath_freezemsg, dd->ipath_freezelen,
+ "{%s}", msg);
+ }
+}
+
+/**
+ * ipath_pe_boardname - fill in the board name
+ * @dd: the infinipath device
+ * @name: the output buffer
+ * @namelen: the size of the output buffer
+ *
+ * info is based on the board revision register
+ */
+static int ipath_pe_boardname(struct ipath_devdata *dd, char *name,
+ size_t namelen)
+{
+ char *n = NULL;
+ u8 boardrev = dd->ipath_boardrev;
+ int ret;
+
+ switch (boardrev) {
+ case 0:
+ n = "InfiniPath_Emulation";
+ break;
+ case 1:
+ n = "InfiniPath_QLE7140-Bringup";
+ break;
+ case 2:
+ n = "InfiniPath_QLE7140";
+ break;
+ case 3:
+ n = "InfiniPath_QMI7140";
+ break;
+ case 4:
+ n = "InfiniPath_QEM7140";
+ break;
+ case 5:
+ n = "InfiniPath_QMH7140";
+ break;
+ case 6:
+ n = "InfiniPath_QLE7142";
+ break;
+ default:
+ ipath_dev_err(dd,
+ "Don't yet know about board with ID %u\n",
+ boardrev);
+ snprintf(name, namelen, "Unknown_InfiniPath_PCIe_%u",
+ boardrev);
+ break;
+ }
+ if (n)
+ snprintf(name, namelen, "%s", n);
+
+ if (dd->ipath_majrev != 4 || !dd->ipath_minrev || dd->ipath_minrev>2) {
+ ipath_dev_err(dd, "Unsupported InfiniPath hardware revision %u.%u!\n",
+ dd->ipath_majrev, dd->ipath_minrev);
+ ret = 1;
+ } else {
+ ret = 0;
+ if (dd->ipath_minrev >= 2)
+ dd->ipath_f_put_tid = ipath_pe_put_tid_2;
+ }
+
+ /*
+ * set here, not in ipath_init_*_funcs because we have to do
+ * it after we can read chip registers.
+ */
+ dd->ipath_ureg_align =
+ ipath_read_kreg32(dd, dd->ipath_kregs->kr_pagealign);
+
+ return ret;
+}
+
+/**
+ * ipath_pe_init_hwerrors - enable hardware errors
+ * @dd: the infinipath device
+ *
+ * now that we have finished initializing everything that might reasonably
+ * cause a hardware error, and cleared those errors bits as they occur,
+ * we can enable hardware errors in the mask (potentially enabling
+ * freeze mode), and enable hardware errors as errors (along with
+ * everything else) in errormask
+ */
+static void ipath_pe_init_hwerrors(struct ipath_devdata *dd)
+{
+ ipath_err_t val;
+ u64 extsval;
+
+ extsval = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extstatus);
+
+ if (!(extsval & INFINIPATH_EXTS_MEMBIST_ENDTEST))
+ ipath_dev_err(dd, "MemBIST did not complete!\n");
+ if (extsval & INFINIPATH_EXTS_MEMBIST_FOUND)
+ ipath_dbg("MemBIST corrected\n");
+
+ val = ~0ULL; /* barring bugs, all hwerrors become interrupts, */
+
+ if (!dd->ipath_boardrev) // no PLL for Emulator
+ val &= ~INFINIPATH_HWE_SERDESPLLFAILED;
+
+ if (dd->ipath_minrev < 2) {
+ /* workaround bug 9460 in internal interface bus parity
+ * checking. Fixed (HW bug 9490) in Rev2.
+ */
+ val &= ~INFINIPATH_HWE_PCIEBUSPARITYRADM;
+ }
+ dd->ipath_hwerrmask = val;
+}
+
+/**
+ * ipath_pe_bringup_serdes - bring up the serdes
+ * @dd: the infinipath device
+ */
+static int ipath_pe_bringup_serdes(struct ipath_devdata *dd)
+{
+ u64 val, config1, prev_val;
+ int ret = 0;
+
+ ipath_dbg("Trying to bringup serdes\n");
+
+ if (ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus) &
+ INFINIPATH_HWE_SERDESPLLFAILED) {
+ ipath_dbg("At start, serdes PLL failed bit set "
+ "in hwerrstatus, clearing and continuing\n");
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear,
+ INFINIPATH_HWE_SERDESPLLFAILED);
+ }
+
+ val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0);
+ config1 = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig1);
+
+ ipath_cdbg(VERBOSE, "SerDes status config0=%llx config1=%llx, "
+ "xgxsconfig %llx\n", (unsigned long long) val,
+ (unsigned long long) config1, (unsigned long long)
+ ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig));
+
+ /*
+ * Force reset on, also set rxdetect enable. Must do before reading
+ * serdesstatus at least for simulation, or some of the bits in
+ * serdes status will come back as undefined and cause simulation
+ * failures
+ */
+ val |= INFINIPATH_SERDC0_RESET_PLL | INFINIPATH_SERDC0_RXDETECT_EN
+ | INFINIPATH_SERDC0_L1PWR_DN;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val);
+ /* be sure chip saw it */
+ ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+ udelay(5); /* need pll reset set at least for a bit */
+ /*
+ * after PLL is reset, set the per-lane Resets and TxIdle and
+ * clear the PLL reset and rxdetect (to get falling edge).
+ * Leave L1PWR bits set (permanently)
+ */
+ val &= ~(INFINIPATH_SERDC0_RXDETECT_EN | INFINIPATH_SERDC0_RESET_PLL
+ | INFINIPATH_SERDC0_L1PWR_DN);
+ val |= INFINIPATH_SERDC0_RESET_MASK | INFINIPATH_SERDC0_TXIDLE;
+ ipath_cdbg(VERBOSE, "Clearing pll reset and setting lane resets "
+ "and txidle (%llx)\n", (unsigned long long) val);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val);
+ /* be sure chip saw it */
+ ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+ /* need PLL reset clear for at least 11 usec before lane
+ * resets cleared; give it a few more to be sure */
+ udelay(15);
+ val &= ~(INFINIPATH_SERDC0_RESET_MASK | INFINIPATH_SERDC0_TXIDLE);
+
+ ipath_cdbg(VERBOSE, "Clearing lane resets and txidle "
+ "(writing %llx)\n", (unsigned long long) val);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val);
+ /* be sure chip saw it */
+ val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+
+ val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig);
+ prev_val = val;
+ if (val & INFINIPATH_XGXS_RESET)
+ val &= ~INFINIPATH_XGXS_RESET;
+ if (((val >> INFINIPATH_XGXS_RX_POL_SHIFT) &
+ INFINIPATH_XGXS_RX_POL_MASK) != dd->ipath_rx_pol_inv ) {
+ /* need to compensate for Tx inversion in partner */
+ val &= ~(INFINIPATH_XGXS_RX_POL_MASK <<
+ INFINIPATH_XGXS_RX_POL_SHIFT);
+ val |= dd->ipath_rx_pol_inv <<
+ INFINIPATH_XGXS_RX_POL_SHIFT;
+ }
+ if (val != prev_val)
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
+
+ val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0);
+
+ /* clear current and de-emphasis bits */
+ config1 &= ~0x0ffffffff00ULL;
+ /* set current to 20ma */
+ config1 |= 0x00000000000ULL;
+ /* set de-emphasis to -5.68dB */
+ config1 |= 0x0cccc000000ULL;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig1, config1);
+
+ ipath_cdbg(VERBOSE, "done: SerDes status config0=%llx "
+ "config1=%llx, sstatus=%llx xgxs=%llx\n",
+ (unsigned long long) val, (unsigned long long) config1,
+ (unsigned long long)
+ ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesstatus),
+ (unsigned long long)
+ ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig));
+
+ return ret;
+}
+
+/**
+ * ipath_pe_quiet_serdes - set serdes to txidle
+ * @dd: the infinipath device
+ * Called when driver is being unloaded
+ */
+static void ipath_pe_quiet_serdes(struct ipath_devdata *dd)
+{
+ u64 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0);
+
+ val |= INFINIPATH_SERDC0_TXIDLE;
+ ipath_dbg("Setting TxIdleEn on serdes (config0 = %llx)\n",
+ (unsigned long long) val);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val);
+}
+
+static int ipath_pe_intconfig(struct ipath_devdata *dd)
+{
+ u32 chiprev;
+
+ /*
+ * If the chip supports added error indication via GPIO pins,
+ * enable interrupts on those bits so the interrupt routine
+ * can count the events. Also set flag so interrupt routine
+ * can know they are expected.
+ */
+ chiprev = dd->ipath_revision >> INFINIPATH_R_CHIPREVMINOR_SHIFT;
+ if ((chiprev & INFINIPATH_R_CHIPREVMINOR_MASK) > 1) {
+ /* Rev2+ reports extra errors via internal GPIO pins */
+ dd->ipath_flags |= IPATH_GPIO_ERRINTRS;
+ dd->ipath_gpio_mask |= IPATH_GPIO_ERRINTR_MASK;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask,
+ dd->ipath_gpio_mask);
+ }
+ return 0;
+}
+
+/**
+ * ipath_setup_pe_setextled - set the state of the two external LEDs
+ * @dd: the infinipath device
+ * @lst: the L state
+ * @ltst: the LT state
+
+ * These LEDs indicate the physical and logical state of IB link.
+ * For this chip (at least with recommended board pinouts), LED1
+ * is Yellow (logical state) and LED2 is Green (physical state),
+ *
+ * Note: We try to match the Mellanox HCA LED behavior as best
+ * we can. Green indicates physical link state is OK (something is
+ * plugged in, and we can train).
+ * Amber indicates the link is logically up (ACTIVE).
+ * Mellanox further blinks the amber LED to indicate data packet
+ * activity, but we have no hardware support for that, so it would
+ * require waking up every 10-20 msecs and checking the counters
+ * on the chip, and then turning the LED off if appropriate. That's
+ * visible overhead, so not something we will do.
+ *
+ */
+static void ipath_setup_pe_setextled(struct ipath_devdata *dd, u64 lst,
+ u64 ltst)
+{
+ u64 extctl;
+ unsigned long flags = 0;
+
+ /* the diags use the LED to indicate diag info, so we leave
+ * the external LED alone when the diags are running */
+ if (ipath_diag_inuse)
+ return;
+
+ /* Allow override of LED display for, e.g. Locating system in rack */
+ if (dd->ipath_led_override) {
+ ltst = (dd->ipath_led_override & IPATH_LED_PHYS)
+ ? INFINIPATH_IBCS_LT_STATE_LINKUP
+ : INFINIPATH_IBCS_LT_STATE_DISABLED;
+ lst = (dd->ipath_led_override & IPATH_LED_LOG)
+ ? INFINIPATH_IBCS_L_STATE_ACTIVE
+ : INFINIPATH_IBCS_L_STATE_DOWN;
+ }
+
+ spin_lock_irqsave(&dd->ipath_gpio_lock, flags);
+ extctl = dd->ipath_extctrl & ~(INFINIPATH_EXTC_LED1PRIPORT_ON |
+ INFINIPATH_EXTC_LED2PRIPORT_ON);
+
+ if (ltst == INFINIPATH_IBCS_LT_STATE_LINKUP)
+ extctl |= INFINIPATH_EXTC_LED2PRIPORT_ON;
+ if (lst == INFINIPATH_IBCS_L_STATE_ACTIVE)
+ extctl |= INFINIPATH_EXTC_LED1PRIPORT_ON;
+ dd->ipath_extctrl = extctl;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, extctl);
+ spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags);
+}
+
+/**
+ * ipath_setup_pe_cleanup - clean up any per-chip chip-specific stuff
+ * @dd: the infinipath device
+ *
+ * This is called during driver unload.
+ * We do the pci_disable_msi here, not in generic code, because it
+ * isn't used for the HT chips. If we do end up needing pci_enable_msi
+ * at some point in the future for HT, we'll move the call back
+ * into the main init_one code.
+ */
+static void ipath_setup_pe_cleanup(struct ipath_devdata *dd)
+{
+ dd->ipath_msi_lo = 0; /* just in case unload fails */
+ pci_disable_msi(dd->pcidev);
+}
+
+static void ipath_6120_pcie_params(struct ipath_devdata *dd)
+{
+ u16 linkstat, speed;
+ int pos;
+
+ pos = pci_find_capability(dd->pcidev, PCI_CAP_ID_EXP);
+ if (!pos) {
+ ipath_dev_err(dd, "Can't find PCI Express capability!\n");
+ goto bail;
+ }
+
+ pci_read_config_word(dd->pcidev, pos + PCI_EXP_LNKSTA,
+ &linkstat);
+ /*
+ * speed is bits 0-4, linkwidth is bits 4-8
+ * no defines for them in headers
+ */
+ speed = linkstat & 0xf;
+ linkstat >>= 4;
+ linkstat &= 0x1f;
+ dd->ipath_lbus_width = linkstat;
+
+ switch (speed) {
+ case 1:
+ dd->ipath_lbus_speed = 2500; /* Gen1, 2.5GHz */
+ break;
+ case 2:
+ dd->ipath_lbus_speed = 5000; /* Gen1, 5GHz */
+ break;
+ default: /* not defined, assume gen1 */
+ dd->ipath_lbus_speed = 2500;
+ break;
+ }
+
+ if (linkstat < 8)
+ ipath_dev_err(dd,
+ "PCIe width %u (x8 HCA), performance reduced\n",
+ linkstat);
+ else
+ ipath_cdbg(VERBOSE, "PCIe speed %u width %u (x8 HCA)\n",
+ dd->ipath_lbus_speed, linkstat);
+
+ if (speed != 1)
+ ipath_dev_err(dd,
+ "PCIe linkspeed %u is incorrect; "
+ "should be 1 (2500)!\n", speed);
+bail:
+ /* fill in string, even on errors */
+ snprintf(dd->ipath_lbus_info, sizeof(dd->ipath_lbus_info),
+ "PCIe,%uMHz,x%u\n",
+ dd->ipath_lbus_speed,
+ dd->ipath_lbus_width);
+
+ return;
+}
+
+/**
+ * ipath_setup_pe_config - setup PCIe config related stuff
+ * @dd: the infinipath device
+ * @pdev: the PCI device
+ *
+ * The pci_enable_msi() call will fail on systems with MSI quirks
+ * such as those with AMD8131, even if the device of interest is not
+ * attached to that device, (in the 2.6.13 - 2.6.15 kernels, at least, fixed
+ * late in 2.6.16).
+ * All that can be done is to edit the kernel source to remove the quirk
+ * check until that is fixed.
+ * We do not need to call enable_msi() for our HyperTransport chip,
+ * even though it uses MSI, and we want to avoid the quirk warning, so
+ * So we call enable_msi only for PCIe. If we do end up needing
+ * pci_enable_msi at some point in the future for HT, we'll move the
+ * call back into the main init_one code.
+ * We save the msi lo and hi values, so we can restore them after
+ * chip reset (the kernel PCI infrastructure doesn't yet handle that
+ * correctly).
+ */
+static int ipath_setup_pe_config(struct ipath_devdata *dd,
+ struct pci_dev *pdev)
+{
+ int pos, ret;
+
+ dd->ipath_msi_lo = 0; /* used as a flag during reset processing */
+ ret = pci_enable_msi(dd->pcidev);
+ if (ret)
+ ipath_dev_err(dd, "pci_enable_msi failed: %d, "
+ "interrupts may not work\n", ret);
+ /* continue even if it fails, we may still be OK... */
+ dd->ipath_irq = pdev->irq;
+
+ if ((pos = pci_find_capability(dd->pcidev, PCI_CAP_ID_MSI))) {
+ u16 control;
+ pci_read_config_dword(dd->pcidev, pos + PCI_MSI_ADDRESS_LO,
+ &dd->ipath_msi_lo);
+ pci_read_config_dword(dd->pcidev, pos + PCI_MSI_ADDRESS_HI,
+ &dd->ipath_msi_hi);
+ pci_read_config_word(dd->pcidev, pos + PCI_MSI_FLAGS,
+ &control);
+ /* now save the data (vector) info */
+ pci_read_config_word(dd->pcidev,
+ pos + ((control & PCI_MSI_FLAGS_64BIT)
+ ? 12 : 8),
+ &dd->ipath_msi_data);
+ ipath_cdbg(VERBOSE, "Read msi data 0x%x from config offset "
+ "0x%x, control=0x%x\n", dd->ipath_msi_data,
+ pos + ((control & PCI_MSI_FLAGS_64BIT) ? 12 : 8),
+ control);
+ /* we save the cachelinesize also, although it doesn't
+ * really matter */
+ pci_read_config_byte(dd->pcidev, PCI_CACHE_LINE_SIZE,
+ &dd->ipath_pci_cacheline);
+ } else
+ ipath_dev_err(dd, "Can't find MSI capability, "
+ "can't save MSI settings for reset\n");
+
+ ipath_6120_pcie_params(dd);
+
+ dd->ipath_link_width_supported = IB_WIDTH_1X | IB_WIDTH_4X;
+ dd->ipath_link_speed_supported = IPATH_IB_SDR;
+ dd->ipath_link_width_enabled = IB_WIDTH_4X;
+ dd->ipath_link_speed_enabled = dd->ipath_link_speed_supported;
+ /* these can't change for this chip, so set once */
+ dd->ipath_link_width_active = dd->ipath_link_width_enabled;
+ dd->ipath_link_speed_active = dd->ipath_link_speed_enabled;
+ return 0;
+}
+
+static void ipath_init_pe_variables(struct ipath_devdata *dd)
+{
+ /*
+ * setup the register offsets, since they are different for each
+ * chip
+ */
+ dd->ipath_kregs = &ipath_pe_kregs;
+ dd->ipath_cregs = &ipath_pe_cregs;
+
+ /*
+ * bits for selecting i2c direction and values,
+ * used for I2C serial flash
+ */
+ dd->ipath_gpio_sda_num = _IPATH_GPIO_SDA_NUM;
+ dd->ipath_gpio_scl_num = _IPATH_GPIO_SCL_NUM;
+ dd->ipath_gpio_sda = IPATH_GPIO_SDA;
+ dd->ipath_gpio_scl = IPATH_GPIO_SCL;
+
+ /*
+ * Fill in data for field-values that change in newer chips.
+ * We dynamically specify only the mask for LINKTRAININGSTATE
+ * and only the shift for LINKSTATE, as they are the only ones
+ * that change. Also precalculate the 3 link states of interest
+ * and the combined mask.
+ */
+ dd->ibcs_ls_shift = IBA6120_IBCS_LINKSTATE_SHIFT;
+ dd->ibcs_lts_mask = IBA6120_IBCS_LINKTRAININGSTATE_MASK;
+ dd->ibcs_mask = (INFINIPATH_IBCS_LINKSTATE_MASK <<
+ dd->ibcs_ls_shift) | dd->ibcs_lts_mask;
+ dd->ib_init = (INFINIPATH_IBCS_LT_STATE_LINKUP <<
+ INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) |
+ (INFINIPATH_IBCS_L_STATE_INIT << dd->ibcs_ls_shift);
+ dd->ib_arm = (INFINIPATH_IBCS_LT_STATE_LINKUP <<
+ INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) |
+ (INFINIPATH_IBCS_L_STATE_ARM << dd->ibcs_ls_shift);
+ dd->ib_active = (INFINIPATH_IBCS_LT_STATE_LINKUP <<
+ INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) |
+ (INFINIPATH_IBCS_L_STATE_ACTIVE << dd->ibcs_ls_shift);
+
+ /*
+ * Fill in data for ibcc field-values that change in newer chips.
+ * We dynamically specify only the mask for LINKINITCMD
+ * and only the shift for LINKCMD and MAXPKTLEN, as they are
+ * the only ones that change.
+ */
+ dd->ibcc_lic_mask = INFINIPATH_IBCC_LINKINITCMD_MASK;
+ dd->ibcc_lc_shift = INFINIPATH_IBCC_LINKCMD_SHIFT;
+ dd->ibcc_mpl_shift = INFINIPATH_IBCC_MAXPKTLEN_SHIFT;
+
+ /* Fill in shifts for RcvCtrl. */
+ dd->ipath_r_portenable_shift = INFINIPATH_R_PORTENABLE_SHIFT;
+ dd->ipath_r_intravail_shift = INFINIPATH_R_INTRAVAIL_SHIFT;
+ dd->ipath_r_tailupd_shift = INFINIPATH_R_TAILUPD_SHIFT;
+ dd->ipath_r_portcfg_shift = 0; /* Not on IBA6120 */
+
+ /* variables for sanity checking interrupt and errors */
+ dd->ipath_hwe_bitsextant =
+ (INFINIPATH_HWE_RXEMEMPARITYERR_MASK <<
+ INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) |
+ (INFINIPATH_HWE_TXEMEMPARITYERR_MASK <<
+ INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) |
+ (INFINIPATH_HWE_PCIEMEMPARITYERR_MASK <<
+ INFINIPATH_HWE_PCIEMEMPARITYERR_SHIFT) |
+ INFINIPATH_HWE_PCIE1PLLFAILED |
+ INFINIPATH_HWE_PCIE0PLLFAILED |
+ INFINIPATH_HWE_PCIEPOISONEDTLP |
+ INFINIPATH_HWE_PCIECPLTIMEOUT |
+ INFINIPATH_HWE_PCIEBUSPARITYXTLH |
+ INFINIPATH_HWE_PCIEBUSPARITYXADM |
+ INFINIPATH_HWE_PCIEBUSPARITYRADM |
+ INFINIPATH_HWE_MEMBISTFAILED |
+ INFINIPATH_HWE_COREPLL_FBSLIP |
+ INFINIPATH_HWE_COREPLL_RFSLIP |
+ INFINIPATH_HWE_SERDESPLLFAILED |
+ INFINIPATH_HWE_IBCBUSTOSPCPARITYERR |
+ INFINIPATH_HWE_IBCBUSFRSPCPARITYERR;
+ dd->ipath_i_bitsextant =
+ (INFINIPATH_I_RCVURG_MASK << INFINIPATH_I_RCVURG_SHIFT) |
+ (INFINIPATH_I_RCVAVAIL_MASK <<
+ INFINIPATH_I_RCVAVAIL_SHIFT) |
+ INFINIPATH_I_ERROR | INFINIPATH_I_SPIOSENT |
+ INFINIPATH_I_SPIOBUFAVAIL | INFINIPATH_I_GPIO;
+ dd->ipath_e_bitsextant =
+ INFINIPATH_E_RFORMATERR | INFINIPATH_E_RVCRC |
+ INFINIPATH_E_RICRC | INFINIPATH_E_RMINPKTLEN |
+ INFINIPATH_E_RMAXPKTLEN | INFINIPATH_E_RLONGPKTLEN |
+ INFINIPATH_E_RSHORTPKTLEN | INFINIPATH_E_RUNEXPCHAR |
+ INFINIPATH_E_RUNSUPVL | INFINIPATH_E_REBP |
+ INFINIPATH_E_RIBFLOW | INFINIPATH_E_RBADVERSION |
+ INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL |
+ INFINIPATH_E_RBADTID | INFINIPATH_E_RHDRLEN |
+ INFINIPATH_E_RHDR | INFINIPATH_E_RIBLOSTLINK |
+ INFINIPATH_E_SMINPKTLEN | INFINIPATH_E_SMAXPKTLEN |
+ INFINIPATH_E_SUNDERRUN | INFINIPATH_E_SPKTLEN |
+ INFINIPATH_E_SDROPPEDSMPPKT | INFINIPATH_E_SDROPPEDDATAPKT |
+ INFINIPATH_E_SPIOARMLAUNCH | INFINIPATH_E_SUNEXPERRPKTNUM |
+ INFINIPATH_E_SUNSUPVL | INFINIPATH_E_IBSTATUSCHANGED |
+ INFINIPATH_E_INVALIDADDR | INFINIPATH_E_RESET |
+ INFINIPATH_E_HARDWARE;
+
+ dd->ipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK;
+ dd->ipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK;
+ dd->ipath_i_rcvavail_shift = INFINIPATH_I_RCVAVAIL_SHIFT;
+ dd->ipath_i_rcvurg_shift = INFINIPATH_I_RCVURG_SHIFT;
+
+ /*
+ * EEPROM error log 0 is TXE Parity errors. 1 is RXE Parity.
+ * 2 is Some Misc, 3 is reserved for future.
+ */
+ dd->ipath_eep_st_masks[0].hwerrs_to_log =
+ INFINIPATH_HWE_TXEMEMPARITYERR_MASK <<
+ INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT;
+
+ /* Ignore errors in PIO/PBC on systems with unordered write-combining */
+ if (ipath_unordered_wc())
+ dd->ipath_eep_st_masks[0].hwerrs_to_log &= ~TXE_PIO_PARITY;
+
+ dd->ipath_eep_st_masks[1].hwerrs_to_log =
+ INFINIPATH_HWE_RXEMEMPARITYERR_MASK <<
+ INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT;
+
+ dd->ipath_eep_st_masks[2].errs_to_log = INFINIPATH_E_RESET;
+ dd->delay_mult = 2; /* SDR, 4X, can't change */
+}
+
+/* setup the MSI stuff again after a reset. I'd like to just call
+ * pci_enable_msi() and request_irq() again, but when I do that,
+ * the MSI enable bit doesn't get set in the command word, and
+ * we switch to to a different interrupt vector, which is confusing,
+ * so I instead just do it all inline. Perhaps somehow can tie this
+ * into the PCIe hotplug support at some point
+ * Note, because I'm doing it all here, I don't call pci_disable_msi()
+ * or free_irq() at the start of ipath_setup_pe_reset().
+ */
+static int ipath_reinit_msi(struct ipath_devdata *dd)
+{
+ int pos;
+ u16 control;
+ int ret;
+
+ if (!dd->ipath_msi_lo) {
+ dev_info(&dd->pcidev->dev, "Can't restore MSI config, "
+ "initial setup failed?\n");
+ ret = 0;
+ goto bail;
+ }
+
+ if (!(pos = pci_find_capability(dd->pcidev, PCI_CAP_ID_MSI))) {
+ ipath_dev_err(dd, "Can't find MSI capability, "
+ "can't restore MSI settings\n");
+ ret = 0;
+ goto bail;
+ }
+ ipath_cdbg(VERBOSE, "Writing msi_lo 0x%x to config offset 0x%x\n",
+ dd->ipath_msi_lo, pos + PCI_MSI_ADDRESS_LO);
+ pci_write_config_dword(dd->pcidev, pos + PCI_MSI_ADDRESS_LO,
+ dd->ipath_msi_lo);
+ ipath_cdbg(VERBOSE, "Writing msi_lo 0x%x to config offset 0x%x\n",
+ dd->ipath_msi_hi, pos + PCI_MSI_ADDRESS_HI);
+ pci_write_config_dword(dd->pcidev, pos + PCI_MSI_ADDRESS_HI,
+ dd->ipath_msi_hi);
+ pci_read_config_word(dd->pcidev, pos + PCI_MSI_FLAGS, &control);
+ if (!(control & PCI_MSI_FLAGS_ENABLE)) {
+ ipath_cdbg(VERBOSE, "MSI control at off %x was %x, "
+ "setting MSI enable (%x)\n", pos + PCI_MSI_FLAGS,
+ control, control | PCI_MSI_FLAGS_ENABLE);
+ control |= PCI_MSI_FLAGS_ENABLE;
+ pci_write_config_word(dd->pcidev, pos + PCI_MSI_FLAGS,
+ control);
+ }
+ /* now rewrite the data (vector) info */
+ pci_write_config_word(dd->pcidev, pos +
+ ((control & PCI_MSI_FLAGS_64BIT) ? 12 : 8),
+ dd->ipath_msi_data);
+ /* we restore the cachelinesize also, although it doesn't really
+ * matter */
+ pci_write_config_byte(dd->pcidev, PCI_CACHE_LINE_SIZE,
+ dd->ipath_pci_cacheline);
+ /* and now set the pci master bit again */
+ pci_set_master(dd->pcidev);
+ ret = 1;
+
+bail:
+ return ret;
+}
+
+/* This routine sleeps, so it can only be called from user context, not
+ * from interrupt context. If we need interrupt context, we can split
+ * it into two routines.
+*/
+static int ipath_setup_pe_reset(struct ipath_devdata *dd)
+{
+ u64 val;
+ int i;
+ int ret;
+ u16 cmdval;
+
+ pci_read_config_word(dd->pcidev, PCI_COMMAND, &cmdval);
+
+ /* Use ERROR so it shows up in logs, etc. */
+ ipath_dev_err(dd, "Resetting InfiniPath unit %u\n", dd->ipath_unit);
+ /* keep chip from being accessed in a few places */
+ dd->ipath_flags &= ~(IPATH_INITTED|IPATH_PRESENT);
+ val = dd->ipath_control | INFINIPATH_C_RESET;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_control, val);
+ mb();
+
+ for (i = 1; i <= 5; i++) {
+ int r;
+ /* allow MBIST, etc. to complete; longer on each retry.
+ * We sometimes get machine checks from bus timeout if no
+ * response, so for now, make it *really* long.
+ */
+ msleep(1000 + (1 + i) * 2000);
+ if ((r =
+ pci_write_config_dword(dd->pcidev, PCI_BASE_ADDRESS_0,
+ dd->ipath_pcibar0)))
+ ipath_dev_err(dd, "rewrite of BAR0 failed: %d\n",
+ r);
+ if ((r =
+ pci_write_config_dword(dd->pcidev, PCI_BASE_ADDRESS_1,
+ dd->ipath_pcibar1)))
+ ipath_dev_err(dd, "rewrite of BAR1 failed: %d\n",
+ r);
+ /* now re-enable memory access */
+ pci_write_config_word(dd->pcidev, PCI_COMMAND, cmdval);
+ if ((r = pci_enable_device(dd->pcidev)))
+ ipath_dev_err(dd, "pci_enable_device failed after "
+ "reset: %d\n", r);
+ /*
+ * whether it fully enabled or not, mark as present,
+ * again (but not INITTED)
+ */
+ dd->ipath_flags |= IPATH_PRESENT;
+ val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_revision);
+ if (val == dd->ipath_revision) {
+ ipath_cdbg(VERBOSE, "Got matching revision "
+ "register %llx on try %d\n",
+ (unsigned long long) val, i);
+ ret = ipath_reinit_msi(dd);
+ goto bail;
+ }
+ /* Probably getting -1 back */
+ ipath_dbg("Didn't get expected revision register, "
+ "got %llx, try %d\n", (unsigned long long) val,
+ i + 1);
+ }
+ ret = 0; /* failed */
+
+bail:
+ if (ret)
+ ipath_6120_pcie_params(dd);
+ return ret;
+}
+
+/**
+ * ipath_pe_put_tid - write a TID in chip
+ * @dd: the infinipath device
+ * @tidptr: pointer to the expected TID (in chip) to udpate
+ * @tidtype: RCVHQ_RCV_TYPE_EAGER (1) for eager, RCVHQ_RCV_TYPE_EXPECTED (0) for expected
+ * @pa: physical address of in memory buffer; ipath_tidinvalid if freeing
+ *
+ * This exists as a separate routine to allow for special locking etc.
+ * It's used for both the full cleanup on exit, as well as the normal
+ * setup and teardown.
+ */
+static void ipath_pe_put_tid(struct ipath_devdata *dd, u64 __iomem *tidptr,
+ u32 type, unsigned long pa)
+{
+ u32 __iomem *tidp32 = (u32 __iomem *)tidptr;
+ unsigned long flags = 0; /* keep gcc quiet */
+ int tidx;
+ spinlock_t *tidlockp;
+
+ if (!dd->ipath_kregbase)
+ return;
+
+ if (pa != dd->ipath_tidinvalid) {
+ if (pa & ((1U << 11) - 1)) {
+ dev_info(&dd->pcidev->dev, "BUG: physaddr %lx "
+ "not 2KB aligned!\n", pa);
+ return;
+ }
+ pa >>= 11;
+ /* paranoia check */
+ if (pa & ~INFINIPATH_RT_ADDR_MASK)
+ ipath_dev_err(dd,
+ "BUG: Physical page address 0x%lx "
+ "has bits set in 31-29\n", pa);
+
+ if (type == RCVHQ_RCV_TYPE_EAGER)
+ pa |= dd->ipath_tidtemplate;
+ else /* for now, always full 4KB page */
+ pa |= 2 << 29;
+ }
+
+ /*
+ * Workaround chip bug 9437 by writing the scratch register
+ * before and after the TID, and with an io write barrier.
+ * We use a spinlock around the writes, so they can't intermix
+ * with other TID (eager or expected) writes (the chip bug
+ * is triggered by back to back TID writes). Unfortunately, this
+ * call can be done from interrupt level for the port 0 eager TIDs,
+ * so we have to use irqsave locks.
+ */
+ /*
+ * Assumes tidptr always > ipath_egrtidbase
+ * if type == RCVHQ_RCV_TYPE_EAGER.
+ */
+ tidx = tidptr - dd->ipath_egrtidbase;
+
+ tidlockp = (type == RCVHQ_RCV_TYPE_EAGER && tidx < dd->ipath_rcvegrcnt)
+ ? &dd->ipath_kernel_tid_lock : &dd->ipath_user_tid_lock;
+ spin_lock_irqsave(tidlockp, flags);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_scratch, 0xfeeddeaf);
+ writel(pa, tidp32);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_scratch, 0xdeadbeef);
+ mmiowb();
+ spin_unlock_irqrestore(tidlockp, flags);
+}
+
+/**
+ * ipath_pe_put_tid_2 - write a TID in chip, Revision 2 or higher
+ * @dd: the infinipath device
+ * @tidptr: pointer to the expected TID (in chip) to udpate
+ * @tidtype: RCVHQ_RCV_TYPE_EAGER (1) for eager, RCVHQ_RCV_TYPE_EXPECTED (0) for expected
+ * @pa: physical address of in memory buffer; ipath_tidinvalid if freeing
+ *
+ * This exists as a separate routine to allow for selection of the
+ * appropriate "flavor". The static calls in cleanup just use the
+ * revision-agnostic form, as they are not performance critical.
+ */
+static void ipath_pe_put_tid_2(struct ipath_devdata *dd, u64 __iomem *tidptr,
+ u32 type, unsigned long pa)
+{
+ u32 __iomem *tidp32 = (u32 __iomem *)tidptr;
+ u32 tidx;
+
+ if (!dd->ipath_kregbase)
+ return;
+
+ if (pa != dd->ipath_tidinvalid) {
+ if (pa & ((1U << 11) - 1)) {
+ dev_info(&dd->pcidev->dev, "BUG: physaddr %lx "
+ "not 2KB aligned!\n", pa);
+ return;
+ }
+ pa >>= 11;
+ /* paranoia check */
+ if (pa & ~INFINIPATH_RT_ADDR_MASK)
+ ipath_dev_err(dd,
+ "BUG: Physical page address 0x%lx "
+ "has bits set in 31-29\n", pa);
+
+ if (type == RCVHQ_RCV_TYPE_EAGER)
+ pa |= dd->ipath_tidtemplate;
+ else /* for now, always full 4KB page */
+ pa |= 2 << 29;
+ }
+ tidx = tidptr - dd->ipath_egrtidbase;
+ writel(pa, tidp32);
+ mmiowb();
+}
+
+
+/**
+ * ipath_pe_clear_tid - clear all TID entries for a port, expected and eager
+ * @dd: the infinipath device
+ * @port: the port
+ *
+ * clear all TID entries for a port, expected and eager.
+ * Used from ipath_close(). On this chip, TIDs are only 32 bits,
+ * not 64, but they are still on 64 bit boundaries, so tidbase
+ * is declared as u64 * for the pointer math, even though we write 32 bits
+ */
+static void ipath_pe_clear_tids(struct ipath_devdata *dd, unsigned port)
+{
+ u64 __iomem *tidbase;
+ unsigned long tidinv;
+ int i;
+
+ if (!dd->ipath_kregbase)
+ return;
+
+ ipath_cdbg(VERBOSE, "Invalidate TIDs for port %u\n", port);
+
+ tidinv = dd->ipath_tidinvalid;
+ tidbase = (u64 __iomem *)
+ ((char __iomem *)(dd->ipath_kregbase) +
+ dd->ipath_rcvtidbase +
+ port * dd->ipath_rcvtidcnt * sizeof(*tidbase));
+
+ for (i = 0; i < dd->ipath_rcvtidcnt; i++)
+ dd->ipath_f_put_tid(dd, &tidbase[i], RCVHQ_RCV_TYPE_EXPECTED,
+ tidinv);
+
+ tidbase = (u64 __iomem *)
+ ((char __iomem *)(dd->ipath_kregbase) +
+ dd->ipath_rcvegrbase +
+ port * dd->ipath_rcvegrcnt * sizeof(*tidbase));
+
+ for (i = 0; i < dd->ipath_rcvegrcnt; i++)
+ dd->ipath_f_put_tid(dd, &tidbase[i], RCVHQ_RCV_TYPE_EAGER,
+ tidinv);
+}
+
+/**
+ * ipath_pe_tidtemplate - setup constants for TID updates
+ * @dd: the infinipath device
+ *
+ * We setup stuff that we use a lot, to avoid calculating each time
+ */
+static void ipath_pe_tidtemplate(struct ipath_devdata *dd)
+{
+ u32 egrsize = dd->ipath_rcvegrbufsize;
+
+ /* For now, we always allocate 4KB buffers (at init) so we can
+ * receive max size packets. We may want a module parameter to
+ * specify 2KB or 4KB and/or make be per port instead of per device
+ * for those who want to reduce memory footprint. Note that the
+ * ipath_rcvhdrentsize size must be large enough to hold the largest
+ * IB header (currently 96 bytes) that we expect to handle (plus of
+ * course the 2 dwords of RHF).
+ */
+ if (egrsize == 2048)
+ dd->ipath_tidtemplate = 1U << 29;
+ else if (egrsize == 4096)
+ dd->ipath_tidtemplate = 2U << 29;
+ else {
+ egrsize = 4096;
+ dev_info(&dd->pcidev->dev, "BUG: unsupported egrbufsize "
+ "%u, using %u\n", dd->ipath_rcvegrbufsize,
+ egrsize);
+ dd->ipath_tidtemplate = 2U << 29;
+ }
+ dd->ipath_tidinvalid = 0;
+}
+
+static int ipath_pe_early_init(struct ipath_devdata *dd)
+{
+ dd->ipath_flags |= IPATH_4BYTE_TID;
+ if (ipath_unordered_wc())
+ dd->ipath_flags |= IPATH_PIO_FLUSH_WC;
+
+ /*
+ * For openfabrics, we need to be able to handle an IB header of
+ * 24 dwords. HT chip has arbitrary sized receive buffers, so we
+ * made them the same size as the PIO buffers. This chip does not
+ * handle arbitrary size buffers, so we need the header large enough
+ * to handle largest IB header, but still have room for a 2KB MTU
+ * standard IB packet.
+ */
+ dd->ipath_rcvhdrentsize = 24;
+ dd->ipath_rcvhdrsize = IPATH_DFLT_RCVHDRSIZE;
+ dd->ipath_rhf_offset = 0;
+ dd->ipath_egrtidbase = (u64 __iomem *)
+ ((char __iomem *) dd->ipath_kregbase + dd->ipath_rcvegrbase);
+
+ dd->ipath_rcvegrbufsize = ipath_mtu4096 ? 4096 : 2048;
+ /*
+ * the min() check here is currently a nop, but it may not always
+ * be, depending on just how we do ipath_rcvegrbufsize
+ */
+ dd->ipath_ibmaxlen = min(ipath_mtu4096 ? dd->ipath_piosize4k :
+ dd->ipath_piosize2k,
+ dd->ipath_rcvegrbufsize +
+ (dd->ipath_rcvhdrentsize << 2));
+ dd->ipath_init_ibmaxlen = dd->ipath_ibmaxlen;
+
+ /*
+ * We can request a receive interrupt for 1 or
+ * more packets from current offset. For now, we set this
+ * up for a single packet.
+ */
+ dd->ipath_rhdrhead_intr_off = 1ULL<<32;
+
+ ipath_get_eeprom_info(dd);
+
+ return 0;
+}
+
+int __attribute__((weak)) ipath_unordered_wc(void)
+{
+ return 0;
+}
+
+/**
+ * ipath_init_pe_get_base_info - set chip-specific flags for user code
+ * @pd: the infinipath port
+ * @kbase: ipath_base_info pointer
+ *
+ * We set the PCIE flag because the lower bandwidth on PCIe vs
+ * HyperTransport can affect some user packet algorithms.
+ */
+static int ipath_pe_get_base_info(struct ipath_portdata *pd, void *kbase)
+{
+ struct ipath_base_info *kinfo = kbase;
+ struct ipath_devdata *dd;
+
+ if (ipath_unordered_wc()) {
+ kinfo->spi_runtime_flags |= IPATH_RUNTIME_FORCE_WC_ORDER;
+ ipath_cdbg(PROC, "Intel processor, forcing WC order\n");
+ }
+ else
+ ipath_cdbg(PROC, "Not Intel processor, WC ordered\n");
+
+ if (pd == NULL)
+ goto done;
+
+ dd = pd->port_dd;
+
+done:
+ kinfo->spi_runtime_flags |= IPATH_RUNTIME_PCIE |
+ IPATH_RUNTIME_FORCE_PIOAVAIL | IPATH_RUNTIME_PIO_REGSWAPPED;
+ return 0;
+}
+
+static void ipath_pe_free_irq(struct ipath_devdata *dd)
+{
+ free_irq(dd->ipath_irq, dd);
+ dd->ipath_irq = 0;
+}
+
+
+static struct ipath_message_header *
+ipath_pe_get_msgheader(struct ipath_devdata *dd, __le32 *rhf_addr)
+{
+ return (struct ipath_message_header *)
+ &rhf_addr[sizeof(u64) / sizeof(u32)];
+}
+
+static void ipath_pe_config_ports(struct ipath_devdata *dd, ushort cfgports)
+{
+ dd->ipath_portcnt =
+ ipath_read_kreg32(dd, dd->ipath_kregs->kr_portcnt);
+ dd->ipath_p0_rcvegrcnt =
+ ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvegrcnt);
+}
+
+static void ipath_pe_read_counters(struct ipath_devdata *dd,
+ struct infinipath_counters *cntrs)
+{
+ cntrs->LBIntCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(LBIntCnt));
+ cntrs->LBFlowStallCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(LBFlowStallCnt));
+ cntrs->TxSDmaDescCnt = 0;
+ cntrs->TxUnsupVLErrCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxUnsupVLErrCnt));
+ cntrs->TxDataPktCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxDataPktCnt));
+ cntrs->TxFlowPktCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxFlowPktCnt));
+ cntrs->TxDwordCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxDwordCnt));
+ cntrs->TxLenErrCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxLenErrCnt));
+ cntrs->TxMaxMinLenErrCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxMaxMinLenErrCnt));
+ cntrs->TxUnderrunCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxUnderrunCnt));
+ cntrs->TxFlowStallCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxFlowStallCnt));
+ cntrs->TxDroppedPktCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxDroppedPktCnt));
+ cntrs->RxDroppedPktCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxDroppedPktCnt));
+ cntrs->RxDataPktCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxDataPktCnt));
+ cntrs->RxFlowPktCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxFlowPktCnt));
+ cntrs->RxDwordCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxDwordCnt));
+ cntrs->RxLenErrCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxLenErrCnt));
+ cntrs->RxMaxMinLenErrCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxMaxMinLenErrCnt));
+ cntrs->RxICRCErrCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxICRCErrCnt));
+ cntrs->RxVCRCErrCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxVCRCErrCnt));
+ cntrs->RxFlowCtrlErrCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxFlowCtrlErrCnt));
+ cntrs->RxBadFormatCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxBadFormatCnt));
+ cntrs->RxLinkProblemCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxLinkProblemCnt));
+ cntrs->RxEBPCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxEBPCnt));
+ cntrs->RxLPCRCErrCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxLPCRCErrCnt));
+ cntrs->RxBufOvflCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxBufOvflCnt));
+ cntrs->RxTIDFullErrCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxTIDFullErrCnt));
+ cntrs->RxTIDValidErrCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxTIDValidErrCnt));
+ cntrs->RxPKeyMismatchCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxPKeyMismatchCnt));
+ cntrs->RxP0HdrEgrOvflCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP0HdrEgrOvflCnt));
+ cntrs->RxP1HdrEgrOvflCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP1HdrEgrOvflCnt));
+ cntrs->RxP2HdrEgrOvflCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP2HdrEgrOvflCnt));
+ cntrs->RxP3HdrEgrOvflCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP3HdrEgrOvflCnt));
+ cntrs->RxP4HdrEgrOvflCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP4HdrEgrOvflCnt));
+ cntrs->RxP5HdrEgrOvflCnt = 0;
+ cntrs->RxP6HdrEgrOvflCnt = 0;
+ cntrs->RxP7HdrEgrOvflCnt = 0;
+ cntrs->RxP8HdrEgrOvflCnt = 0;
+ cntrs->RxP9HdrEgrOvflCnt = 0;
+ cntrs->RxP10HdrEgrOvflCnt = 0;
+ cntrs->RxP11HdrEgrOvflCnt = 0;
+ cntrs->RxP12HdrEgrOvflCnt = 0;
+ cntrs->RxP13HdrEgrOvflCnt = 0;
+ cntrs->RxP14HdrEgrOvflCnt = 0;
+ cntrs->RxP15HdrEgrOvflCnt = 0;
+ cntrs->RxP16HdrEgrOvflCnt = 0;
+ cntrs->IBStatusChangeCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBStatusChangeCnt));
+ cntrs->IBLinkErrRecoveryCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBLinkErrRecoveryCnt));
+ cntrs->IBLinkDownedCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBLinkDownedCnt));
+ cntrs->IBSymbolErrCnt =
+ ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBSymbolErrCnt));
+ cntrs->RxVL15DroppedPktCnt = 0;
+ cntrs->RxOtherLocalPhyErrCnt = 0;
+ cntrs->PcieRetryBufDiagQwordCnt = 0;
+ cntrs->ExcessBufferOvflCnt = dd->ipath_overrun_thresh_errs;
+ cntrs->LocalLinkIntegrityErrCnt = dd->ipath_lli_errs;
+ cntrs->RxVlErrCnt = 0;
+ cntrs->RxDlidFltrCnt = 0;
+}
+
+
+/* no interrupt fallback for these chips */
+static int ipath_pe_nointr_fallback(struct ipath_devdata *dd)
+{
+ return 0;
+}
+
+
+/*
+ * reset the XGXS (between serdes and IBC). Slightly less intrusive
+ * than resetting the IBC or external link state, and useful in some
+ * cases to cause some retraining. To do this right, we reset IBC
+ * as well.
+ */
+static void ipath_pe_xgxs_reset(struct ipath_devdata *dd)
+{
+ u64 val, prev_val;
+
+ prev_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig);
+ val = prev_val | INFINIPATH_XGXS_RESET;
+ prev_val &= ~INFINIPATH_XGXS_RESET; /* be sure */
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
+ dd->ipath_control & ~INFINIPATH_C_LINKENABLE);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
+ ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, prev_val);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
+ dd->ipath_control);
+}
+
+
+static int ipath_pe_get_ib_cfg(struct ipath_devdata *dd, int which)
+{
+ int ret;
+
+ switch (which) {
+ case IPATH_IB_CFG_LWID:
+ ret = dd->ipath_link_width_active;
+ break;
+ case IPATH_IB_CFG_SPD:
+ ret = dd->ipath_link_speed_active;
+ break;
+ case IPATH_IB_CFG_LWID_ENB:
+ ret = dd->ipath_link_width_enabled;
+ break;
+ case IPATH_IB_CFG_SPD_ENB:
+ ret = dd->ipath_link_speed_enabled;
+ break;
+ default:
+ ret = -ENOTSUPP;
+ break;
+ }
+ return ret;
+}
+
+
+/* we assume range checking is already done, if needed */
+static int ipath_pe_set_ib_cfg(struct ipath_devdata *dd, int which, u32 val)
+{
+ int ret = 0;
+
+ if (which == IPATH_IB_CFG_LWID_ENB)
+ dd->ipath_link_width_enabled = val;
+ else if (which == IPATH_IB_CFG_SPD_ENB)
+ dd->ipath_link_speed_enabled = val;
+ else
+ ret = -ENOTSUPP;
+ return ret;
+}
+
+static void ipath_pe_config_jint(struct ipath_devdata *dd, u16 a, u16 b)
+{
+}
+
+
+static int ipath_pe_ib_updown(struct ipath_devdata *dd, int ibup, u64 ibcs)
+{
+ ipath_setup_pe_setextled(dd, ipath_ib_linkstate(dd, ibcs),
+ ipath_ib_linktrstate(dd, ibcs));
+ return 0;
+}
+
+
+/**
+ * ipath_init_iba6120_funcs - set up the chip-specific function pointers
+ * @dd: the infinipath device
+ *
+ * This is global, and is called directly at init to set up the
+ * chip-specific function pointers for later use.
+ */
+void ipath_init_iba6120_funcs(struct ipath_devdata *dd)
+{
+ dd->ipath_f_intrsetup = ipath_pe_intconfig;
+ dd->ipath_f_bus = ipath_setup_pe_config;
+ dd->ipath_f_reset = ipath_setup_pe_reset;
+ dd->ipath_f_get_boardname = ipath_pe_boardname;
+ dd->ipath_f_init_hwerrors = ipath_pe_init_hwerrors;
+ dd->ipath_f_early_init = ipath_pe_early_init;
+ dd->ipath_f_handle_hwerrors = ipath_pe_handle_hwerrors;
+ dd->ipath_f_quiet_serdes = ipath_pe_quiet_serdes;
+ dd->ipath_f_bringup_serdes = ipath_pe_bringup_serdes;
+ dd->ipath_f_clear_tids = ipath_pe_clear_tids;
+ /*
+ * _f_put_tid may get changed after we read the chip revision,
+ * but we start with the safe version for all revs
+ */
+ dd->ipath_f_put_tid = ipath_pe_put_tid;
+ dd->ipath_f_cleanup = ipath_setup_pe_cleanup;
+ dd->ipath_f_setextled = ipath_setup_pe_setextled;
+ dd->ipath_f_get_base_info = ipath_pe_get_base_info;
+ dd->ipath_f_free_irq = ipath_pe_free_irq;
+ dd->ipath_f_tidtemplate = ipath_pe_tidtemplate;
+ dd->ipath_f_intr_fallback = ipath_pe_nointr_fallback;
+ dd->ipath_f_xgxs_reset = ipath_pe_xgxs_reset;
+ dd->ipath_f_get_msgheader = ipath_pe_get_msgheader;
+ dd->ipath_f_config_ports = ipath_pe_config_ports;
+ dd->ipath_f_read_counters = ipath_pe_read_counters;
+ dd->ipath_f_get_ib_cfg = ipath_pe_get_ib_cfg;
+ dd->ipath_f_set_ib_cfg = ipath_pe_set_ib_cfg;
+ dd->ipath_f_config_jint = ipath_pe_config_jint;
+ dd->ipath_f_ib_updown = ipath_pe_ib_updown;
+
+
+ /* initialize chip-specific variables */
+ ipath_init_pe_variables(dd);
+}
+
diff --git a/drivers/infiniband/hw/ipath/ipath_iba7220.c b/drivers/infiniband/hw/ipath/ipath_iba7220.c
new file mode 100644
index 0000000..9839e20
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_iba7220.c
@@ -0,0 +1,2559 @@
+/*
+ * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+/*
+ * This file contains all of the code that is specific to the
+ * InfiniPath 7220 chip (except that specific to the SerDes)
+ */
+
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <rdma/ib_verbs.h>
+
+#include "ipath_kernel.h"
+#include "ipath_registers.h"
+#include "ipath_7220.h"
+
+static void ipath_setup_7220_setextled(struct ipath_devdata *, u64, u64);
+
+static unsigned ipath_compat_ddr_negotiate = 1;
+
+module_param_named(compat_ddr_negotiate, ipath_compat_ddr_negotiate, uint,
+ S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(compat_ddr_negotiate,
+ "Attempt pre-IBTA 1.2 DDR speed negotiation");
+
+static unsigned ipath_sdma_fetch_arb = 1;
+module_param_named(fetch_arb, ipath_sdma_fetch_arb, uint, S_IRUGO);
+MODULE_PARM_DESC(fetch_arb, "IBA7220: change SDMA descriptor arbitration");
+
+/*
+ * This file contains almost all the chip-specific register information and
+ * access functions for the QLogic InfiniPath 7220 PCI-Express chip, with the
+ * exception of SerDes support, which in in ipath_sd7220.c.
+ *
+ * This lists the InfiniPath registers, in the actual chip layout.
+ * This structure should never be directly accessed.
+ */
+struct _infinipath_do_not_use_kernel_regs {
+ unsigned long long Revision;
+ unsigned long long Control;
+ unsigned long long PageAlign;
+ unsigned long long PortCnt;
+ unsigned long long DebugPortSelect;
+ unsigned long long DebugSigsIntSel; /* was Reserved0;*/
+ unsigned long long SendRegBase;
+ unsigned long long UserRegBase;
+ unsigned long long CounterRegBase;
+ unsigned long long Scratch;
+ unsigned long long EEPROMAddrCmd; /* was Reserved1; */
+ unsigned long long EEPROMData; /* was Reserved2; */
+ unsigned long long IntBlocked;
+ unsigned long long IntMask;
+ unsigned long long IntStatus;
+ unsigned long long IntClear;
+ unsigned long long ErrorMask;
+ unsigned long long ErrorStatus;
+ unsigned long long ErrorClear;
+ unsigned long long HwErrMask;
+ unsigned long long HwErrStatus;
+ unsigned long long HwErrClear;
+ unsigned long long HwDiagCtrl;
+ unsigned long long MDIO;
+ unsigned long long IBCStatus;
+ unsigned long long IBCCtrl;
+ unsigned long long ExtStatus;
+ unsigned long long ExtCtrl;
+ unsigned long long GPIOOut;
+ unsigned long long GPIOMask;
+ unsigned long long GPIOStatus;
+ unsigned long long GPIOClear;
+ unsigned long long RcvCtrl;
+ unsigned long long RcvBTHQP;
+ unsigned long long RcvHdrSize;
+ unsigned long long RcvHdrCnt;
+ unsigned long long RcvHdrEntSize;
+ unsigned long long RcvTIDBase;
+ unsigned long long RcvTIDCnt;
+ unsigned long long RcvEgrBase;
+ unsigned long long RcvEgrCnt;
+ unsigned long long RcvBufBase;
+ unsigned long long RcvBufSize;
+ unsigned long long RxIntMemBase;
+ unsigned long long RxIntMemSize;
+ unsigned long long RcvPartitionKey;
+ unsigned long long RcvQPMulticastPort;
+ unsigned long long RcvPktLEDCnt;
+ unsigned long long IBCDDRCtrl;
+ unsigned long long HRTBT_GUID;
+ unsigned long long IB_SDTEST_IF_TX;
+ unsigned long long IB_SDTEST_IF_RX;
+ unsigned long long IBCDDRCtrl2;
+ unsigned long long IBCDDRStatus;
+ unsigned long long JIntReload;
+ unsigned long long IBNCModeCtrl;
+ unsigned long long SendCtrl;
+ unsigned long long SendBufBase;
+ unsigned long long SendBufSize;
+ unsigned long long SendBufCnt;
+ unsigned long long SendAvailAddr;
+ unsigned long long TxIntMemBase;
+ unsigned long long TxIntMemSize;
+ unsigned long long SendDmaBase;
+ unsigned long long SendDmaLenGen;
+ unsigned long long SendDmaTail;
+ unsigned long long SendDmaHead;
+ unsigned long long SendDmaHeadAddr;
+ unsigned long long SendDmaBufMask0;
+ unsigned long long SendDmaBufMask1;
+ unsigned long long SendDmaBufMask2;
+ unsigned long long SendDmaStatus;
+ unsigned long long SendBufferError;
+ unsigned long long SendBufferErrorCONT1;
+ unsigned long long SendBufErr2; /* was Reserved6SBE[0/6] */
+ unsigned long long Reserved6L[2];
+ unsigned long long AvailUpdCount;
+ unsigned long long RcvHdrAddr0;
+ unsigned long long RcvHdrAddrs[16]; /* Why enumerate? */
+ unsigned long long Reserved7hdtl; /* Align next to 300 */
+ unsigned long long RcvHdrTailAddr0; /* 300, like others */
+ unsigned long long RcvHdrTailAddrs[16];
+ unsigned long long Reserved9SW[7]; /* was [8]; we have 17 ports */
+ unsigned long long IbsdEpbAccCtl; /* IB Serdes EPB access control */
+ unsigned long long IbsdEpbTransReg; /* IB Serdes EPB Transaction */
+ unsigned long long Reserved10sds; /* was SerdesStatus on */
+ unsigned long long XGXSConfig;
+ unsigned long long IBSerDesCtrl; /* Was IBPLLCfg on Monty */
+ unsigned long long EEPCtlStat; /* for "boot" EEPROM/FLASH */
+ unsigned long long EEPAddrCmd;
+ unsigned long long EEPData;
+ unsigned long long PcieEpbAccCtl;
+ unsigned long long PcieEpbTransCtl;
+ unsigned long long EfuseCtl; /* E-Fuse control */
+ unsigned long long EfuseData[4];
+ unsigned long long ProcMon;
+ /* this chip moves following two from previous 200, 208 */
+ unsigned long long PCIeRBufTestReg0;
+ unsigned long long PCIeRBufTestReg1;
+ /* added for this chip */
+ unsigned long long PCIeRBufTestReg2;
+ unsigned long long PCIeRBufTestReg3;
+ /* added for this chip, debug only */
+ unsigned long long SPC_JTAG_ACCESS_REG;
+ unsigned long long LAControlReg;
+ unsigned long long GPIODebugSelReg;
+ unsigned long long DebugPortValueReg;
+ /* added for this chip, DMA */
+ unsigned long long SendDmaBufUsed[3];
+ unsigned long long SendDmaReqTagUsed;
+ /*
+ * added for this chip, EFUSE: note that these program 64-bit
+ * words 2 and 3 */
+ unsigned long long efuse_pgm_data[2];
+ unsigned long long Reserved11LAalign[10]; /* Skip 4B0..4F8 */
+ /* we have 30 regs for DDS and RXEQ in IB SERDES */
+ unsigned long long SerDesDDSRXEQ[30];
+ unsigned long long Reserved12LAalign[2]; /* Skip 5F0, 5F8 */
+ /* added for LA debug support */
+ unsigned long long LAMemory[32];
+};
+
+struct _infinipath_do_not_use_counters {
+ __u64 LBIntCnt;
+ __u64 LBFlowStallCnt;
+ __u64 TxSDmaDescCnt; /* was Reserved1 */
+ __u64 TxUnsupVLErrCnt;
+ __u64 TxDataPktCnt;
+ __u64 TxFlowPktCnt;
+ __u64 TxDwordCnt;
+ __u64 TxLenErrCnt;
+ __u64 TxMaxMinLenErrCnt;
+ __u64 TxUnderrunCnt;
+ __u64 TxFlowStallCnt;
+ __u64 TxDroppedPktCnt;
+ __u64 RxDroppedPktCnt;
+ __u64 RxDataPktCnt;
+ __u64 RxFlowPktCnt;
+ __u64 RxDwordCnt;
+ __u64 RxLenErrCnt;
+ __u64 RxMaxMinLenErrCnt;
+ __u64 RxICRCErrCnt;
+ __u64 RxVCRCErrCnt;
+ __u64 RxFlowCtrlErrCnt;
+ __u64 RxBadFormatCnt;
+ __u64 RxLinkProblemCnt;
+ __u64 RxEBPCnt;
+ __u64 RxLPCRCErrCnt;
+ __u64 RxBufOvflCnt;
+ __u64 RxTIDFullErrCnt;
+ __u64 RxTIDValidErrCnt;
+ __u64 RxPKeyMismatchCnt;
+ __u64 RxP0HdrEgrOvflCnt;
+ __u64 RxP1HdrEgrOvflCnt;
+ __u64 RxP2HdrEgrOvflCnt;
+ __u64 RxP3HdrEgrOvflCnt;
+ __u64 RxP4HdrEgrOvflCnt;
+ __u64 RxP5HdrEgrOvflCnt;
+ __u64 RxP6HdrEgrOvflCnt;
+ __u64 RxP7HdrEgrOvflCnt;
+ __u64 RxP8HdrEgrOvflCnt;
+ __u64 RxP9HdrEgrOvflCnt; /* was Reserved6 */
+ __u64 RxP10HdrEgrOvflCnt; /* was Reserved7 */
+ __u64 RxP11HdrEgrOvflCnt; /* new for IBA7220 */
+ __u64 RxP12HdrEgrOvflCnt; /* new for IBA7220 */
+ __u64 RxP13HdrEgrOvflCnt; /* new for IBA7220 */
+ __u64 RxP14HdrEgrOvflCnt; /* new for IBA7220 */
+ __u64 RxP15HdrEgrOvflCnt; /* new for IBA7220 */
+ __u64 RxP16HdrEgrOvflCnt; /* new for IBA7220 */
+ __u64 IBStatusChangeCnt;
+ __u64 IBLinkErrRecoveryCnt;
+ __u64 IBLinkDownedCnt;
+ __u64 IBSymbolErrCnt;
+ /* The following are new for IBA7220 */
+ __u64 RxVL15DroppedPktCnt;
+ __u64 RxOtherLocalPhyErrCnt;
+ __u64 PcieRetryBufDiagQwordCnt;
+ __u64 ExcessBufferOvflCnt;
+ __u64 LocalLinkIntegrityErrCnt;
+ __u64 RxVlErrCnt;
+ __u64 RxDlidFltrCnt;
+ __u64 Reserved8[7];
+ __u64 PSStat;
+ __u64 PSStart;
+ __u64 PSInterval;
+ __u64 PSRcvDataCount;
+ __u64 PSRcvPktsCount;
+ __u64 PSXmitDataCount;
+ __u64 PSXmitPktsCount;
+ __u64 PSXmitWaitCount;
+};
+
+#define IPATH_KREG_OFFSET(field) (offsetof( \
+ struct _infinipath_do_not_use_kernel_regs, field) / sizeof(u64))
+#define IPATH_CREG_OFFSET(field) (offsetof( \
+ struct _infinipath_do_not_use_counters, field) / sizeof(u64))
+
+static const struct ipath_kregs ipath_7220_kregs = {
+ .kr_control = IPATH_KREG_OFFSET(Control),
+ .kr_counterregbase = IPATH_KREG_OFFSET(CounterRegBase),
+ .kr_debugportselect = IPATH_KREG_OFFSET(DebugPortSelect),
+ .kr_errorclear = IPATH_KREG_OFFSET(ErrorClear),
+ .kr_errormask = IPATH_KREG_OFFSET(ErrorMask),
+ .kr_errorstatus = IPATH_KREG_OFFSET(ErrorStatus),
+ .kr_extctrl = IPATH_KREG_OFFSET(ExtCtrl),
+ .kr_extstatus = IPATH_KREG_OFFSET(ExtStatus),
+ .kr_gpio_clear = IPATH_KREG_OFFSET(GPIOClear),
+ .kr_gpio_mask = IPATH_KREG_OFFSET(GPIOMask),
+ .kr_gpio_out = IPATH_KREG_OFFSET(GPIOOut),
+ .kr_gpio_status = IPATH_KREG_OFFSET(GPIOStatus),
+ .kr_hwdiagctrl = IPATH_KREG_OFFSET(HwDiagCtrl),
+ .kr_hwerrclear = IPATH_KREG_OFFSET(HwErrClear),
+ .kr_hwerrmask = IPATH_KREG_OFFSET(HwErrMask),
+ .kr_hwerrstatus = IPATH_KREG_OFFSET(HwErrStatus),
+ .kr_ibcctrl = IPATH_KREG_OFFSET(IBCCtrl),
+ .kr_ibcstatus = IPATH_KREG_OFFSET(IBCStatus),
+ .kr_intblocked = IPATH_KREG_OFFSET(IntBlocked),
+ .kr_intclear = IPATH_KREG_OFFSET(IntClear),
+ .kr_intmask = IPATH_KREG_OFFSET(IntMask),
+ .kr_intstatus = IPATH_KREG_OFFSET(IntStatus),
+ .kr_mdio = IPATH_KREG_OFFSET(MDIO),
+ .kr_pagealign = IPATH_KREG_OFFSET(PageAlign),
+ .kr_partitionkey = IPATH_KREG_OFFSET(RcvPartitionKey),
+ .kr_portcnt = IPATH_KREG_OFFSET(PortCnt),
+ .kr_rcvbthqp = IPATH_KREG_OFFSET(RcvBTHQP),
+ .kr_rcvbufbase = IPATH_KREG_OFFSET(RcvBufBase),
+ .kr_rcvbufsize = IPATH_KREG_OFFSET(RcvBufSize),
+ .kr_rcvctrl = IPATH_KREG_OFFSET(RcvCtrl),
+ .kr_rcvegrbase = IPATH_KREG_OFFSET(RcvEgrBase),
+ .kr_rcvegrcnt = IPATH_KREG_OFFSET(RcvEgrCnt),
+ .kr_rcvhdrcnt = IPATH_KREG_OFFSET(RcvHdrCnt),
+ .kr_rcvhdrentsize = IPATH_KREG_OFFSET(RcvHdrEntSize),
+ .kr_rcvhdrsize = IPATH_KREG_OFFSET(RcvHdrSize),
+ .kr_rcvintmembase = IPATH_KREG_OFFSET(RxIntMemBase),
+ .kr_rcvintmemsize = IPATH_KREG_OFFSET(RxIntMemSize),
+ .kr_rcvtidbase = IPATH_KREG_OFFSET(RcvTIDBase),
+ .kr_rcvtidcnt = IPATH_KREG_OFFSET(RcvTIDCnt),
+ .kr_revision = IPATH_KREG_OFFSET(Revision),
+ .kr_scratch = IPATH_KREG_OFFSET(Scratch),
+ .kr_sendbuffererror = IPATH_KREG_OFFSET(SendBufferError),
+ .kr_sendctrl = IPATH_KREG_OFFSET(SendCtrl),
+ .kr_sendpioavailaddr = IPATH_KREG_OFFSET(SendAvailAddr),
+ .kr_sendpiobufbase = IPATH_KREG_OFFSET(SendBufBase),
+ .kr_sendpiobufcnt = IPATH_KREG_OFFSET(SendBufCnt),
+ .kr_sendpiosize = IPATH_KREG_OFFSET(SendBufSize),
+ .kr_sendregbase = IPATH_KREG_OFFSET(SendRegBase),
+ .kr_txintmembase = IPATH_KREG_OFFSET(TxIntMemBase),
+ .kr_txintmemsize = IPATH_KREG_OFFSET(TxIntMemSize),
+ .kr_userregbase = IPATH_KREG_OFFSET(UserRegBase),
+
+ .kr_xgxsconfig = IPATH_KREG_OFFSET(XGXSConfig),
+
+ /* send dma related regs */
+ .kr_senddmabase = IPATH_KREG_OFFSET(SendDmaBase),
+ .kr_senddmalengen = IPATH_KREG_OFFSET(SendDmaLenGen),
+ .kr_senddmatail = IPATH_KREG_OFFSET(SendDmaTail),
+ .kr_senddmahead = IPATH_KREG_OFFSET(SendDmaHead),
+ .kr_senddmaheadaddr = IPATH_KREG_OFFSET(SendDmaHeadAddr),
+ .kr_senddmabufmask0 = IPATH_KREG_OFFSET(SendDmaBufMask0),
+ .kr_senddmabufmask1 = IPATH_KREG_OFFSET(SendDmaBufMask1),
+ .kr_senddmabufmask2 = IPATH_KREG_OFFSET(SendDmaBufMask2),
+ .kr_senddmastatus = IPATH_KREG_OFFSET(SendDmaStatus),
+
+ /* SerDes related regs */
+ .kr_ibserdesctrl = IPATH_KREG_OFFSET(IBSerDesCtrl),
+ .kr_ib_epbacc = IPATH_KREG_OFFSET(IbsdEpbAccCtl),
+ .kr_ib_epbtrans = IPATH_KREG_OFFSET(IbsdEpbTransReg),
+ .kr_pcie_epbacc = IPATH_KREG_OFFSET(PcieEpbAccCtl),
+ .kr_pcie_epbtrans = IPATH_KREG_OFFSET(PcieEpbTransCtl),
+ .kr_ib_ddsrxeq = IPATH_KREG_OFFSET(SerDesDDSRXEQ),
+
+ /*
+ * These should not be used directly via ipath_read_kreg64(),
+ * use them with ipath_read_kreg64_port()
+ */
+ .kr_rcvhdraddr = IPATH_KREG_OFFSET(RcvHdrAddr0),
+ .kr_rcvhdrtailaddr = IPATH_KREG_OFFSET(RcvHdrTailAddr0),
+
+ /*
+ * The rcvpktled register controls one of the debug port signals, so
+ * a packet activity LED can be connected to it.
+ */
+ .kr_rcvpktledcnt = IPATH_KREG_OFFSET(RcvPktLEDCnt),
+ .kr_pcierbuftestreg0 = IPATH_KREG_OFFSET(PCIeRBufTestReg0),
+ .kr_pcierbuftestreg1 = IPATH_KREG_OFFSET(PCIeRBufTestReg1),
+
+ .kr_hrtbt_guid = IPATH_KREG_OFFSET(HRTBT_GUID),
+ .kr_ibcddrctrl = IPATH_KREG_OFFSET(IBCDDRCtrl),
+ .kr_ibcddrstatus = IPATH_KREG_OFFSET(IBCDDRStatus),
+ .kr_jintreload = IPATH_KREG_OFFSET(JIntReload)
+};
+
+static const struct ipath_cregs ipath_7220_cregs = {
+ .cr_badformatcnt = IPATH_CREG_OFFSET(RxBadFormatCnt),
+ .cr_erricrccnt = IPATH_CREG_OFFSET(RxICRCErrCnt),
+ .cr_errlinkcnt = IPATH_CREG_OFFSET(RxLinkProblemCnt),
+ .cr_errlpcrccnt = IPATH_CREG_OFFSET(RxLPCRCErrCnt),
+ .cr_errpkey = IPATH_CREG_OFFSET(RxPKeyMismatchCnt),
+ .cr_errrcvflowctrlcnt = IPATH_CREG_OFFSET(RxFlowCtrlErrCnt),
+ .cr_err_rlencnt = IPATH_CREG_OFFSET(RxLenErrCnt),
+ .cr_errslencnt = IPATH_CREG_OFFSET(TxLenErrCnt),
+ .cr_errtidfull = IPATH_CREG_OFFSET(RxTIDFullErrCnt),
+ .cr_errtidvalid = IPATH_CREG_OFFSET(RxTIDValidErrCnt),
+ .cr_errvcrccnt = IPATH_CREG_OFFSET(RxVCRCErrCnt),
+ .cr_ibstatuschange = IPATH_CREG_OFFSET(IBStatusChangeCnt),
+ .cr_intcnt = IPATH_CREG_OFFSET(LBIntCnt),
+ .cr_invalidrlencnt = IPATH_CREG_OFFSET(RxMaxMinLenErrCnt),
+ .cr_invalidslencnt = IPATH_CREG_OFFSET(TxMaxMinLenErrCnt),
+ .cr_lbflowstallcnt = IPATH_CREG_OFFSET(LBFlowStallCnt),
+ .cr_pktrcvcnt = IPATH_CREG_OFFSET(RxDataPktCnt),
+ .cr_pktrcvflowctrlcnt = IPATH_CREG_OFFSET(RxFlowPktCnt),
+ .cr_pktsendcnt = IPATH_CREG_OFFSET(TxDataPktCnt),
+ .cr_pktsendflowcnt = IPATH_CREG_OFFSET(TxFlowPktCnt),
+ .cr_portovflcnt = IPATH_CREG_OFFSET(RxP0HdrEgrOvflCnt),
+ .cr_rcvebpcnt = IPATH_CREG_OFFSET(RxEBPCnt),
+ .cr_rcvovflcnt = IPATH_CREG_OFFSET(RxBufOvflCnt),
+ .cr_senddropped = IPATH_CREG_OFFSET(TxDroppedPktCnt),
+ .cr_sendstallcnt = IPATH_CREG_OFFSET(TxFlowStallCnt),
+ .cr_sendunderruncnt = IPATH_CREG_OFFSET(TxUnderrunCnt),
+ .cr_wordrcvcnt = IPATH_CREG_OFFSET(RxDwordCnt),
+ .cr_wordsendcnt = IPATH_CREG_OFFSET(TxDwordCnt),
+ .cr_unsupvlcnt = IPATH_CREG_OFFSET(TxUnsupVLErrCnt),
+ .cr_rxdroppktcnt = IPATH_CREG_OFFSET(RxDroppedPktCnt),
+ .cr_iblinkerrrecovcnt = IPATH_CREG_OFFSET(IBLinkErrRecoveryCnt),
+ .cr_iblinkdowncnt = IPATH_CREG_OFFSET(IBLinkDownedCnt),
+ .cr_ibsymbolerrcnt = IPATH_CREG_OFFSET(IBSymbolErrCnt),
+ .cr_vl15droppedpktcnt = IPATH_CREG_OFFSET(RxVL15DroppedPktCnt),
+ .cr_rxotherlocalphyerrcnt =
+ IPATH_CREG_OFFSET(RxOtherLocalPhyErrCnt),
+ .cr_excessbufferovflcnt = IPATH_CREG_OFFSET(ExcessBufferOvflCnt),
+ .cr_locallinkintegrityerrcnt =
+ IPATH_CREG_OFFSET(LocalLinkIntegrityErrCnt),
+ .cr_rxvlerrcnt = IPATH_CREG_OFFSET(RxVlErrCnt),
+ .cr_rxdlidfltrcnt = IPATH_CREG_OFFSET(RxDlidFltrCnt),
+ .cr_psstat = IPATH_CREG_OFFSET(PSStat),
+ .cr_psstart = IPATH_CREG_OFFSET(PSStart),
+ .cr_psinterval = IPATH_CREG_OFFSET(PSInterval),
+ .cr_psrcvdatacount = IPATH_CREG_OFFSET(PSRcvDataCount),
+ .cr_psrcvpktscount = IPATH_CREG_OFFSET(PSRcvPktsCount),
+ .cr_psxmitdatacount = IPATH_CREG_OFFSET(PSXmitDataCount),
+ .cr_psxmitpktscount = IPATH_CREG_OFFSET(PSXmitPktsCount),
+ .cr_psxmitwaitcount = IPATH_CREG_OFFSET(PSXmitWaitCount),
+};
+
+/* kr_control bits */
+#define INFINIPATH_C_RESET (1U<<7)
+
+/* kr_intstatus, kr_intclear, kr_intmask bits */
+#define INFINIPATH_I_RCVURG_MASK ((1ULL<<17)-1)
+#define INFINIPATH_I_RCVURG_SHIFT 32
+#define INFINIPATH_I_RCVAVAIL_MASK ((1ULL<<17)-1)
+#define INFINIPATH_I_RCVAVAIL_SHIFT 0
+#define INFINIPATH_I_SERDESTRIMDONE (1ULL<<27)
+
+/* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */
+#define INFINIPATH_HWE_PCIEMEMPARITYERR_MASK 0x00000000000000ffULL
+#define INFINIPATH_HWE_PCIEMEMPARITYERR_SHIFT 0
+#define INFINIPATH_HWE_PCIEPOISONEDTLP 0x0000000010000000ULL
+#define INFINIPATH_HWE_PCIECPLTIMEOUT 0x0000000020000000ULL
+#define INFINIPATH_HWE_PCIEBUSPARITYXTLH 0x0000000040000000ULL
+#define INFINIPATH_HWE_PCIEBUSPARITYXADM 0x0000000080000000ULL
+#define INFINIPATH_HWE_PCIEBUSPARITYRADM 0x0000000100000000ULL
+#define INFINIPATH_HWE_COREPLL_FBSLIP 0x0080000000000000ULL
+#define INFINIPATH_HWE_COREPLL_RFSLIP 0x0100000000000000ULL
+#define INFINIPATH_HWE_PCIE1PLLFAILED 0x0400000000000000ULL
+#define INFINIPATH_HWE_PCIE0PLLFAILED 0x0800000000000000ULL
+#define INFINIPATH_HWE_SERDESPLLFAILED 0x1000000000000000ULL
+/* specific to this chip */
+#define INFINIPATH_HWE_PCIECPLDATAQUEUEERR 0x0000000000000040ULL
+#define INFINIPATH_HWE_PCIECPLHDRQUEUEERR 0x0000000000000080ULL
+#define INFINIPATH_HWE_SDMAMEMREADERR 0x0000000010000000ULL
+#define INFINIPATH_HWE_CLK_UC_PLLNOTLOCKED 0x2000000000000000ULL
+#define INFINIPATH_HWE_PCIESERDESQ0PCLKNOTDETECT 0x0100000000000000ULL
+#define INFINIPATH_HWE_PCIESERDESQ1PCLKNOTDETECT 0x0200000000000000ULL
+#define INFINIPATH_HWE_PCIESERDESQ2PCLKNOTDETECT 0x0400000000000000ULL
+#define INFINIPATH_HWE_PCIESERDESQ3PCLKNOTDETECT 0x0800000000000000ULL
+#define INFINIPATH_HWE_DDSRXEQMEMORYPARITYERR 0x0000008000000000ULL
+#define INFINIPATH_HWE_IB_UC_MEMORYPARITYERR 0x0000004000000000ULL
+#define INFINIPATH_HWE_PCIE_UC_OCT0MEMORYPARITYERR 0x0000001000000000ULL
+#define INFINIPATH_HWE_PCIE_UC_OCT1MEMORYPARITYERR 0x0000002000000000ULL
+
+#define IBA7220_IBCS_LINKTRAININGSTATE_MASK 0x1F
+#define IBA7220_IBCS_LINKSTATE_SHIFT 5
+#define IBA7220_IBCS_LINKSPEED_SHIFT 8
+#define IBA7220_IBCS_LINKWIDTH_SHIFT 9
+
+#define IBA7220_IBCC_LINKINITCMD_MASK 0x7ULL
+#define IBA7220_IBCC_LINKCMD_SHIFT 19
+#define IBA7220_IBCC_MAXPKTLEN_SHIFT 21
+
+/* kr_ibcddrctrl bits */
+#define IBA7220_IBC_DLIDLMC_MASK 0xFFFFFFFFUL
+#define IBA7220_IBC_DLIDLMC_SHIFT 32
+#define IBA7220_IBC_HRTBT_MASK 3
+#define IBA7220_IBC_HRTBT_SHIFT 16
+#define IBA7220_IBC_HRTBT_ENB 0x10000UL
+#define IBA7220_IBC_LANE_REV_SUPPORTED (1<<8)
+#define IBA7220_IBC_LREV_MASK 1
+#define IBA7220_IBC_LREV_SHIFT 8
+#define IBA7220_IBC_RXPOL_MASK 1
+#define IBA7220_IBC_RXPOL_SHIFT 7
+#define IBA7220_IBC_WIDTH_SHIFT 5
+#define IBA7220_IBC_WIDTH_MASK 0x3
+#define IBA7220_IBC_WIDTH_1X_ONLY (0<<IBA7220_IBC_WIDTH_SHIFT)
+#define IBA7220_IBC_WIDTH_4X_ONLY (1<<IBA7220_IBC_WIDTH_SHIFT)
+#define IBA7220_IBC_WIDTH_AUTONEG (2<<IBA7220_IBC_WIDTH_SHIFT)
+#define IBA7220_IBC_SPEED_AUTONEG (1<<1)
+#define IBA7220_IBC_SPEED_SDR (1<<2)
+#define IBA7220_IBC_SPEED_DDR (1<<3)
+#define IBA7220_IBC_SPEED_AUTONEG_MASK (0x7<<1)
+#define IBA7220_IBC_IBTA_1_2_MASK (1)
+
+/* kr_ibcddrstatus */
+/* link latency shift is 0, don't bother defining */
+#define IBA7220_DDRSTAT_LINKLAT_MASK 0x3ffffff
+
+/* kr_extstatus bits */
+#define INFINIPATH_EXTS_FREQSEL 0x2
+#define INFINIPATH_EXTS_SERDESSEL 0x4
+#define INFINIPATH_EXTS_MEMBIST_ENDTEST 0x0000000000004000
+#define INFINIPATH_EXTS_MEMBIST_DISABLED 0x0000000000008000
+
+/* kr_xgxsconfig bits */
+#define INFINIPATH_XGXS_RESET 0x5ULL
+#define INFINIPATH_XGXS_FC_SAFE (1ULL<<63)
+
+/* kr_rcvpktledcnt */
+#define IBA7220_LEDBLINK_ON_SHIFT 32 /* 4ns period on after packet */
+#define IBA7220_LEDBLINK_OFF_SHIFT 0 /* 4ns period off before next on */
+
+#define _IPATH_GPIO_SDA_NUM 1
+#define _IPATH_GPIO_SCL_NUM 0
+
+#define IPATH_GPIO_SDA (1ULL << \
+ (_IPATH_GPIO_SDA_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT))
+#define IPATH_GPIO_SCL (1ULL << \
+ (_IPATH_GPIO_SCL_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT))
+
+#define IBA7220_R_INTRAVAIL_SHIFT 17
+#define IBA7220_R_TAILUPD_SHIFT 35
+#define IBA7220_R_PORTCFG_SHIFT 36
+
+#define INFINIPATH_JINT_PACKETSHIFT 16
+#define INFINIPATH_JINT_DEFAULT_IDLE_TICKS 0
+#define INFINIPATH_JINT_DEFAULT_MAX_PACKETS 0
+
+#define IBA7220_HDRHEAD_PKTINT_SHIFT 32 /* interrupt cnt in upper 32 bits */
+
+/*
+ * the size bits give us 2^N, in KB units. 0 marks as invalid,
+ * and 7 is reserved. We currently use only 2KB and 4KB
+ */
+#define IBA7220_TID_SZ_SHIFT 37 /* shift to 3bit size selector */
+#define IBA7220_TID_SZ_2K (1UL<<IBA7220_TID_SZ_SHIFT) /* 2KB */
+#define IBA7220_TID_SZ_4K (2UL<<IBA7220_TID_SZ_SHIFT) /* 4KB */
+#define IBA7220_TID_PA_SHIFT 11U /* TID addr in chip stored w/o low bits */
+
+#define IPATH_AUTONEG_TRIES 5 /* sequential retries to negotiate DDR */
+
+static char int_type[16] = "auto";
+module_param_string(interrupt_type, int_type, sizeof(int_type), 0444);
+MODULE_PARM_DESC(int_type, " interrupt_type=auto|force_msi|force_intx");
+
+/* packet rate matching delay; chip has support */
+static u8 rate_to_delay[2][2] = {
+ /* 1x, 4x */
+ { 8, 2 }, /* SDR */
+ { 4, 1 } /* DDR */
+};
+
+/* 7220 specific hardware errors... */
+static const struct ipath_hwerror_msgs ipath_7220_hwerror_msgs[] = {
+ INFINIPATH_HWE_MSG(PCIEPOISONEDTLP, "PCIe Poisoned TLP"),
+ INFINIPATH_HWE_MSG(PCIECPLTIMEOUT, "PCIe completion timeout"),
+ /*
+ * In practice, it's unlikely wthat we'll see PCIe PLL, or bus
+ * parity or memory parity error failures, because most likely we
+ * won't be able to talk to the core of the chip. Nonetheless, we
+ * might see them, if they are in parts of the PCIe core that aren't
+ * essential.
+ */
+ INFINIPATH_HWE_MSG(PCIE1PLLFAILED, "PCIePLL1"),
+ INFINIPATH_HWE_MSG(PCIE0PLLFAILED, "PCIePLL0"),
+ INFINIPATH_HWE_MSG(PCIEBUSPARITYXTLH, "PCIe XTLH core parity"),
+ INFINIPATH_HWE_MSG(PCIEBUSPARITYXADM, "PCIe ADM TX core parity"),
+ INFINIPATH_HWE_MSG(PCIEBUSPARITYRADM, "PCIe ADM RX core parity"),
+ INFINIPATH_HWE_MSG(RXDSYNCMEMPARITYERR, "Rx Dsync"),
+ INFINIPATH_HWE_MSG(SERDESPLLFAILED, "SerDes PLL"),
+ INFINIPATH_HWE_MSG(PCIECPLDATAQUEUEERR, "PCIe cpl header queue"),
+ INFINIPATH_HWE_MSG(PCIECPLHDRQUEUEERR, "PCIe cpl data queue"),
+ INFINIPATH_HWE_MSG(SDMAMEMREADERR, "Send DMA memory read"),
+ INFINIPATH_HWE_MSG(CLK_UC_PLLNOTLOCKED, "uC PLL clock not locked"),
+ INFINIPATH_HWE_MSG(PCIESERDESQ0PCLKNOTDETECT,
+ "PCIe serdes Q0 no clock"),
+ INFINIPATH_HWE_MSG(PCIESERDESQ1PCLKNOTDETECT,
+ "PCIe serdes Q1 no clock"),
+ INFINIPATH_HWE_MSG(PCIESERDESQ2PCLKNOTDETECT,
+ "PCIe serdes Q2 no clock"),
+ INFINIPATH_HWE_MSG(PCIESERDESQ3PCLKNOTDETECT,
+ "PCIe serdes Q3 no clock"),
+ INFINIPATH_HWE_MSG(DDSRXEQMEMORYPARITYERR,
+ "DDS RXEQ memory parity"),
+ INFINIPATH_HWE_MSG(IB_UC_MEMORYPARITYERR, "IB uC memory parity"),
+ INFINIPATH_HWE_MSG(PCIE_UC_OCT0MEMORYPARITYERR,
+ "PCIe uC oct0 memory parity"),
+ INFINIPATH_HWE_MSG(PCIE_UC_OCT1MEMORYPARITYERR,
+ "PCIe uC oct1 memory parity"),
+};
+
+static void autoneg_work(struct work_struct *);
+
+/*
+ * the offset is different for different configured port numbers, since
+ * port0 is fixed in size, but others can vary. Make it a function to
+ * make the issue more obvious.
+*/
+static inline u32 port_egrtid_idx(struct ipath_devdata *dd, unsigned port)
+{
+ return port ? dd->ipath_p0_rcvegrcnt +
+ (port-1) * dd->ipath_rcvegrcnt : 0;
+}
+
+static void ipath_7220_txe_recover(struct ipath_devdata *dd)
+{
+ ++ipath_stats.sps_txeparity;
+
+ dev_info(&dd->pcidev->dev,
+ "Recovering from TXE PIO parity error\n");
+ ipath_disarm_senderrbufs(dd);
+}
+
+
+/**
+ * ipath_7220_handle_hwerrors - display hardware errors.
+ * @dd: the infinipath device
+ * @msg: the output buffer
+ * @msgl: the size of the output buffer
+ *
+ * Use same msg buffer as regular errors to avoid excessive stack
+ * use. Most hardware errors are catastrophic, but for right now,
+ * we'll print them and continue. We reuse the same message buffer as
+ * ipath_handle_errors() to avoid excessive stack usage.
+ */
+static void ipath_7220_handle_hwerrors(struct ipath_devdata *dd, char *msg,
+ size_t msgl)
+{
+ ipath_err_t hwerrs;
+ u32 bits, ctrl;
+ int isfatal = 0;
+ char bitsmsg[64];
+ int log_idx;
+
+ hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus);
+ if (!hwerrs) {
+ /*
+ * better than printing cofusing messages
+ * This seems to be related to clearing the crc error, or
+ * the pll error during init.
+ */
+ ipath_cdbg(VERBOSE, "Called but no hardware errors set\n");
+ goto bail;
+ } else if (hwerrs == ~0ULL) {
+ ipath_dev_err(dd, "Read of hardware error status failed "
+ "(all bits set); ignoring\n");
+ goto bail;
+ }
+ ipath_stats.sps_hwerrs++;
+
+ /*
+ * Always clear the error status register, except MEMBISTFAIL,
+ * regardless of whether we continue or stop using the chip.
+ * We want that set so we know it failed, even across driver reload.
+ * We'll still ignore it in the hwerrmask. We do this partly for
+ * diagnostics, but also for support.
+ */
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear,
+ hwerrs&~INFINIPATH_HWE_MEMBISTFAILED);
+
+ hwerrs &= dd->ipath_hwerrmask;
+
+ /* We log some errors to EEPROM, check if we have any of those. */
+ for (log_idx = 0; log_idx < IPATH_EEP_LOG_CNT; ++log_idx)
+ if (hwerrs & dd->ipath_eep_st_masks[log_idx].hwerrs_to_log)
+ ipath_inc_eeprom_err(dd, log_idx, 1);
+ /*
+ * Make sure we get this much out, unless told to be quiet,
+ * or it's occurred within the last 5 seconds.
+ */
+ if ((hwerrs & ~(dd->ipath_lasthwerror |
+ ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
+ INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
+ << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT))) ||
+ (ipath_debug & __IPATH_VERBDBG))
+ dev_info(&dd->pcidev->dev, "Hardware error: hwerr=0x%llx "
+ "(cleared)\n", (unsigned long long) hwerrs);
+ dd->ipath_lasthwerror |= hwerrs;
+
+ if (hwerrs & ~dd->ipath_hwe_bitsextant)
+ ipath_dev_err(dd, "hwerror interrupt with unknown errors "
+ "%llx set\n", (unsigned long long)
+ (hwerrs & ~dd->ipath_hwe_bitsextant));
+
+ if (hwerrs & INFINIPATH_HWE_IB_UC_MEMORYPARITYERR)
+ ipath_sd7220_clr_ibpar(dd);
+
+ ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control);
+ if ((ctrl & INFINIPATH_C_FREEZEMODE) && !ipath_diag_inuse) {
+ /*
+ * Parity errors in send memory are recoverable by h/w
+ * just do housekeeping, exit freeze mode and continue.
+ */
+ if (hwerrs & ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
+ INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
+ << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) {
+ ipath_7220_txe_recover(dd);
+ hwerrs &= ~((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
+ INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
+ << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT);
+ }
+ if (hwerrs) {
+ /*
+ * If any set that we aren't ignoring only make the
+ * complaint once, in case it's stuck or recurring,
+ * and we get here multiple times
+ * Force link down, so switch knows, and
+ * LEDs are turned off.
+ */
+ if (dd->ipath_flags & IPATH_INITTED) {
+ ipath_set_linkstate(dd, IPATH_IB_LINKDOWN);
+ ipath_setup_7220_setextled(dd,
+ INFINIPATH_IBCS_L_STATE_DOWN,
+ INFINIPATH_IBCS_LT_STATE_DISABLED);
+ ipath_dev_err(dd, "Fatal Hardware Error "
+ "(freeze mode), no longer"
+ " usable, SN %.16s\n",
+ dd->ipath_serial);
+ isfatal = 1;
+ }
+ /*
+ * Mark as having had an error for driver, and also
+ * for /sys and status word mapped to user programs.
+ * This marks unit as not usable, until reset.
+ */
+ *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
+ *dd->ipath_statusp |= IPATH_STATUS_HWERROR;
+ dd->ipath_flags &= ~IPATH_INITTED;
+ } else {
+ ipath_dbg("Clearing freezemode on ignored or "
+ "recovered hardware error\n");
+ ipath_clear_freeze(dd);
+ }
+ }
+
+ *msg = '\0';
+
+ if (hwerrs & INFINIPATH_HWE_MEMBISTFAILED) {
+ strlcat(msg, "[Memory BIST test failed, "
+ "InfiniPath hardware unusable]", msgl);
+ /* ignore from now on, so disable until driver reloaded */
+ *dd->ipath_statusp |= IPATH_STATUS_HWERROR;
+ dd->ipath_hwerrmask &= ~INFINIPATH_HWE_MEMBISTFAILED;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
+ dd->ipath_hwerrmask);
+ }
+
+ ipath_format_hwerrors(hwerrs,
+ ipath_7220_hwerror_msgs,
+ ARRAY_SIZE(ipath_7220_hwerror_msgs),
+ msg, msgl);
+
+ if (hwerrs & (INFINIPATH_HWE_PCIEMEMPARITYERR_MASK
+ << INFINIPATH_HWE_PCIEMEMPARITYERR_SHIFT)) {
+ bits = (u32) ((hwerrs >>
+ INFINIPATH_HWE_PCIEMEMPARITYERR_SHIFT) &
+ INFINIPATH_HWE_PCIEMEMPARITYERR_MASK);
+ snprintf(bitsmsg, sizeof bitsmsg,
+ "[PCIe Mem Parity Errs %x] ", bits);
+ strlcat(msg, bitsmsg, msgl);
+ }
+
+#define _IPATH_PLL_FAIL (INFINIPATH_HWE_COREPLL_FBSLIP | \
+ INFINIPATH_HWE_COREPLL_RFSLIP)
+
+ if (hwerrs & _IPATH_PLL_FAIL) {
+ snprintf(bitsmsg, sizeof bitsmsg,
+ "[PLL failed (%llx), InfiniPath hardware unusable]",
+ (unsigned long long) hwerrs & _IPATH_PLL_FAIL);
+ strlcat(msg, bitsmsg, msgl);
+ /* ignore from now on, so disable until driver reloaded */
+ dd->ipath_hwerrmask &= ~(hwerrs & _IPATH_PLL_FAIL);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
+ dd->ipath_hwerrmask);
+ }
+
+ if (hwerrs & INFINIPATH_HWE_SERDESPLLFAILED) {
+ /*
+ * If it occurs, it is left masked since the eternal
+ * interface is unused.
+ */
+ dd->ipath_hwerrmask &= ~INFINIPATH_HWE_SERDESPLLFAILED;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
+ dd->ipath_hwerrmask);
+ }
+
+ ipath_dev_err(dd, "%s hardware error\n", msg);
+ /*
+ * For /sys status file. if no trailing } is copied, we'll
+ * know it was truncated.
+ */
+ if (isfatal && !ipath_diag_inuse && dd->ipath_freezemsg)
+ snprintf(dd->ipath_freezemsg, dd->ipath_freezelen,
+ "{%s}", msg);
+bail:;
+}
+
+/**
+ * ipath_7220_boardname - fill in the board name
+ * @dd: the infinipath device
+ * @name: the output buffer
+ * @namelen: the size of the output buffer
+ *
+ * info is based on the board revision register
+ */
+static int ipath_7220_boardname(struct ipath_devdata *dd, char *name,
+ size_t namelen)
+{
+ char *n = NULL;
+ u8 boardrev = dd->ipath_boardrev;
+ int ret;
+
+ if (boardrev == 15) {
+ /*
+ * Emulator sometimes comes up all-ones, rather than zero.
+ */
+ boardrev = 0;
+ dd->ipath_boardrev = boardrev;
+ }
+ switch (boardrev) {
+ case 0:
+ n = "InfiniPath_7220_Emulation";
+ break;
+ case 1:
+ n = "InfiniPath_QLE7240";
+ break;
+ case 2:
+ n = "InfiniPath_QLE7280";
+ break;
+ case 3:
+ n = "InfiniPath_QLE7242";
+ break;
+ case 4:
+ n = "InfiniPath_QEM7240";
+ break;
+ case 5:
+ n = "InfiniPath_QMI7240";
+ break;
+ case 6:
+ n = "InfiniPath_QMI7264";
+ break;
+ case 7:
+ n = "InfiniPath_QMH7240";
+ break;
+ case 8:
+ n = "InfiniPath_QME7240";
+ break;
+ case 9:
+ n = "InfiniPath_QLE7250";
+ break;
+ case 10:
+ n = "InfiniPath_QLE7290";
+ break;
+ case 11:
+ n = "InfiniPath_QEM7250";
+ break;
+ case 12:
+ n = "InfiniPath_QLE-Bringup";
+ break;
+ default:
+ ipath_dev_err(dd,
+ "Don't yet know about board with ID %u\n",
+ boardrev);
+ snprintf(name, namelen, "Unknown_InfiniPath_PCIe_%u",
+ boardrev);
+ break;
+ }
+ if (n)
+ snprintf(name, namelen, "%s", n);
+
+ if (dd->ipath_majrev != 5 || !dd->ipath_minrev ||
+ dd->ipath_minrev > 2) {
+ ipath_dev_err(dd, "Unsupported InfiniPath hardware "
+ "revision %u.%u!\n",
+ dd->ipath_majrev, dd->ipath_minrev);
+ ret = 1;
+ } else if (dd->ipath_minrev == 1 &&
+ !(dd->ipath_flags & IPATH_INITTED)) {
+ /* Rev1 chips are prototype. Complain at init, but allow use */
+ ipath_dev_err(dd, "Unsupported hardware "
+ "revision %u.%u, Contact support@qlogic.com\n",
+ dd->ipath_majrev, dd->ipath_minrev);
+ ret = 0;
+ } else
+ ret = 0;
+
+ /*
+ * Set here not in ipath_init_*_funcs because we have to do
+ * it after we can read chip registers.
+ */
+ dd->ipath_ureg_align = 0x10000; /* 64KB alignment */
+
+ return ret;
+}
+
+/**
+ * ipath_7220_init_hwerrors - enable hardware errors
+ * @dd: the infinipath device
+ *
+ * now that we have finished initializing everything that might reasonably
+ * cause a hardware error, and cleared those errors bits as they occur,
+ * we can enable hardware errors in the mask (potentially enabling
+ * freeze mode), and enable hardware errors as errors (along with
+ * everything else) in errormask
+ */
+static void ipath_7220_init_hwerrors(struct ipath_devdata *dd)
+{
+ ipath_err_t val;
+ u64 extsval;
+
+ extsval = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extstatus);
+
+ if (!(extsval & (INFINIPATH_EXTS_MEMBIST_ENDTEST |
+ INFINIPATH_EXTS_MEMBIST_DISABLED)))
+ ipath_dev_err(dd, "MemBIST did not complete!\n");
+ if (extsval & INFINIPATH_EXTS_MEMBIST_DISABLED)
+ dev_info(&dd->pcidev->dev, "MemBIST is disabled.\n");
+
+ val = ~0ULL; /* barring bugs, all hwerrors become interrupts, */
+
+ if (!dd->ipath_boardrev) /* no PLL for Emulator */
+ val &= ~INFINIPATH_HWE_SERDESPLLFAILED;
+
+ if (dd->ipath_minrev == 1)
+ val &= ~(1ULL << 42); /* TXE LaunchFIFO Parity rev1 issue */
+
+ val &= ~INFINIPATH_HWE_IB_UC_MEMORYPARITYERR;
+ dd->ipath_hwerrmask = val;
+
+ /*
+ * special trigger "error" is for debugging purposes. It
+ * works around a processor/chipset problem. The error
+ * interrupt allows us to count occurrences, but we don't
+ * want to pay the overhead for normal use. Emulation only
+ */
+ if (!dd->ipath_boardrev)
+ dd->ipath_maskederrs = INFINIPATH_E_SENDSPECIALTRIGGER;
+}
+
+/*
+ * All detailed interaction with the SerDes has been moved to ipath_sd7220.c
+ *
+ * The portion of IBA7220-specific bringup_serdes() that actually deals with
+ * registers and memory within the SerDes itself is ipath_sd7220_init().
+ */
+
+/**
+ * ipath_7220_bringup_serdes - bring up the serdes
+ * @dd: the infinipath device
+ */
+static int ipath_7220_bringup_serdes(struct ipath_devdata *dd)
+{
+ int ret = 0;
+ u64 val, prev_val, guid;
+ int was_reset; /* Note whether uC was reset */
+
+ ipath_dbg("Trying to bringup serdes\n");
+
+ if (ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus) &
+ INFINIPATH_HWE_SERDESPLLFAILED) {
+ ipath_dbg("At start, serdes PLL failed bit set "
+ "in hwerrstatus, clearing and continuing\n");
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear,
+ INFINIPATH_HWE_SERDESPLLFAILED);
+ }
+
+ if (!dd->ipath_ibcddrctrl) {
+ /* not on re-init after reset */
+ dd->ipath_ibcddrctrl =
+ ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcddrctrl);
+
+ if (dd->ipath_link_speed_enabled ==
+ (IPATH_IB_SDR | IPATH_IB_DDR))
+ dd->ipath_ibcddrctrl |=
+ IBA7220_IBC_SPEED_AUTONEG_MASK |
+ IBA7220_IBC_IBTA_1_2_MASK;
+ else
+ dd->ipath_ibcddrctrl |=
+ dd->ipath_link_speed_enabled == IPATH_IB_DDR
+ ? IBA7220_IBC_SPEED_DDR :
+ IBA7220_IBC_SPEED_SDR;
+ if ((dd->ipath_link_width_enabled & (IB_WIDTH_1X |
+ IB_WIDTH_4X)) == (IB_WIDTH_1X | IB_WIDTH_4X))
+ dd->ipath_ibcddrctrl |= IBA7220_IBC_WIDTH_AUTONEG;
+ else
+ dd->ipath_ibcddrctrl |=
+ dd->ipath_link_width_enabled == IB_WIDTH_4X
+ ? IBA7220_IBC_WIDTH_4X_ONLY :
+ IBA7220_IBC_WIDTH_1X_ONLY;
+
+ /* always enable these on driver reload, not sticky */
+ dd->ipath_ibcddrctrl |=
+ IBA7220_IBC_RXPOL_MASK << IBA7220_IBC_RXPOL_SHIFT;
+ dd->ipath_ibcddrctrl |=
+ IBA7220_IBC_HRTBT_MASK << IBA7220_IBC_HRTBT_SHIFT;
+ /*
+ * automatic lane reversal detection for receive
+ * doesn't work correctly in rev 1, so disable it
+ * on that rev, otherwise enable (disabling not
+ * sticky across reload for >rev1)
+ */
+ if (dd->ipath_minrev == 1)
+ dd->ipath_ibcddrctrl &=
+ ~IBA7220_IBC_LANE_REV_SUPPORTED;
+ else
+ dd->ipath_ibcddrctrl |=
+ IBA7220_IBC_LANE_REV_SUPPORTED;
+ }
+
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcddrctrl,
+ dd->ipath_ibcddrctrl);
+
+ ipath_write_kreg(dd, IPATH_KREG_OFFSET(IBNCModeCtrl), 0Ull);
+
+ /* IBA7220 has SERDES MPU reset in D0 of what _was_ IBPLLCfg */
+ val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibserdesctrl);
+ /* remember if uC was in Reset or not, for dactrim */
+ was_reset = (val & 1);
+ ipath_cdbg(VERBOSE, "IBReset %s xgxsconfig %llx\n",
+ was_reset ? "Asserted" : "Negated", (unsigned long long)
+ ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig));
+
+ if (dd->ipath_boardrev) {
+ /*
+ * Hardware is not emulator, and may have been reset. Init it.
+ * Below will release reset, but needs to know if chip was
+ * originally in reset, to only trim DACs on first time
+ * after chip reset or powercycle (not driver reload)
+ */
+ ret = ipath_sd7220_init(dd, was_reset);
+ }
+
+ val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig);
+ prev_val = val;
+ val |= INFINIPATH_XGXS_FC_SAFE;
+ if (val != prev_val) {
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
+ ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch);
+ }
+ if (val & INFINIPATH_XGXS_RESET)
+ val &= ~INFINIPATH_XGXS_RESET;
+ if (val != prev_val)
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
+
+ ipath_cdbg(VERBOSE, "done: xgxs=%llx from %llx\n",
+ (unsigned long long)
+ ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig),
+ (unsigned long long) prev_val);
+
+ guid = be64_to_cpu(dd->ipath_guid);
+
+ if (!guid) {
+ /* have to have something, so use likely unique tsc */
+ guid = get_cycles();
+ ipath_dbg("No GUID for heartbeat, faking %llx\n",
+ (unsigned long long)guid);
+ } else
+ ipath_cdbg(VERBOSE, "Wrote %llX to HRTBT_GUID\n",
+ (unsigned long long) guid);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_hrtbt_guid, guid);
+ return ret;
+}
+
+static void ipath_7220_config_jint(struct ipath_devdata *dd,
+ u16 idle_ticks, u16 max_packets)
+{
+
+ /*
+ * We can request a receive interrupt for 1 or more packets
+ * from current offset.
+ */
+ if (idle_ticks == 0 || max_packets == 0)
+ /* interrupt after one packet if no mitigation */
+ dd->ipath_rhdrhead_intr_off =
+ 1ULL << IBA7220_HDRHEAD_PKTINT_SHIFT;
+ else
+ /* Turn off RcvHdrHead interrupts if using mitigation */
+ dd->ipath_rhdrhead_intr_off = 0ULL;
+
+ /* refresh kernel RcvHdrHead registers... */
+ ipath_write_ureg(dd, ur_rcvhdrhead,
+ dd->ipath_rhdrhead_intr_off |
+ dd->ipath_pd[0]->port_head, 0);
+
+ dd->ipath_jint_max_packets = max_packets;
+ dd->ipath_jint_idle_ticks = idle_ticks;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_jintreload,
+ ((u64) max_packets << INFINIPATH_JINT_PACKETSHIFT) |
+ idle_ticks);
+}
+
+/**
+ * ipath_7220_quiet_serdes - set serdes to txidle
+ * @dd: the infinipath device
+ * Called when driver is being unloaded
+ */
+static void ipath_7220_quiet_serdes(struct ipath_devdata *dd)
+{
+ u64 val;
+ dd->ipath_flags &= ~IPATH_IB_AUTONEG_INPROG;
+ wake_up(&dd->ipath_autoneg_wait);
+ cancel_delayed_work(&dd->ipath_autoneg_work);
+ flush_scheduled_work();
+ ipath_shutdown_relock_poll(dd);
+ val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig);
+ val |= INFINIPATH_XGXS_RESET;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
+}
+
+static int ipath_7220_intconfig(struct ipath_devdata *dd)
+{
+ ipath_7220_config_jint(dd, dd->ipath_jint_idle_ticks,
+ dd->ipath_jint_max_packets);
+ return 0;
+}
+
+/**
+ * ipath_setup_7220_setextled - set the state of the two external LEDs
+ * @dd: the infinipath device
+ * @lst: the L state
+ * @ltst: the LT state
+ *
+ * These LEDs indicate the physical and logical state of IB link.
+ * For this chip (at least with recommended board pinouts), LED1
+ * is Yellow (logical state) and LED2 is Green (physical state),
+ *
+ * Note: We try to match the Mellanox HCA LED behavior as best
+ * we can. Green indicates physical link state is OK (something is
+ * plugged in, and we can train).
+ * Amber indicates the link is logically up (ACTIVE).
+ * Mellanox further blinks the amber LED to indicate data packet
+ * activity, but we have no hardware support for that, so it would
+ * require waking up every 10-20 msecs and checking the counters
+ * on the chip, and then turning the LED off if appropriate. That's
+ * visible overhead, so not something we will do.
+ *
+ */
+static void ipath_setup_7220_setextled(struct ipath_devdata *dd, u64 lst,
+ u64 ltst)
+{
+ u64 extctl, ledblink = 0;
+ unsigned long flags = 0;
+
+ /* the diags use the LED to indicate diag info, so we leave
+ * the external LED alone when the diags are running */
+ if (ipath_diag_inuse)
+ return;
+
+ /* Allow override of LED display for, e.g. Locating system in rack */
+ if (dd->ipath_led_override) {
+ ltst = (dd->ipath_led_override & IPATH_LED_PHYS)
+ ? INFINIPATH_IBCS_LT_STATE_LINKUP
+ : INFINIPATH_IBCS_LT_STATE_DISABLED;
+ lst = (dd->ipath_led_override & IPATH_LED_LOG)
+ ? INFINIPATH_IBCS_L_STATE_ACTIVE
+ : INFINIPATH_IBCS_L_STATE_DOWN;
+ }
+
+ spin_lock_irqsave(&dd->ipath_gpio_lock, flags);
+ extctl = dd->ipath_extctrl & ~(INFINIPATH_EXTC_LED1PRIPORT_ON |
+ INFINIPATH_EXTC_LED2PRIPORT_ON);
+ if (ltst == INFINIPATH_IBCS_LT_STATE_LINKUP) {
+ extctl |= INFINIPATH_EXTC_LED1PRIPORT_ON;
+ /*
+ * counts are in chip clock (4ns) periods.
+ * This is 1/16 sec (66.6ms) on,
+ * 3/16 sec (187.5 ms) off, with packets rcvd
+ */
+ ledblink = ((66600*1000UL/4) << IBA7220_LEDBLINK_ON_SHIFT)
+ | ((187500*1000UL/4) << IBA7220_LEDBLINK_OFF_SHIFT);
+ }
+ if (lst == INFINIPATH_IBCS_L_STATE_ACTIVE)
+ extctl |= INFINIPATH_EXTC_LED2PRIPORT_ON;
+ dd->ipath_extctrl = extctl;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, extctl);
+ spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags);
+
+ if (ledblink) /* blink the LED on packet receive */
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvpktledcnt,
+ ledblink);
+}
+
+/*
+ * Similar to pci_intx(pdev, 1), except that we make sure
+ * msi is off...
+ */
+static void ipath_enable_intx(struct pci_dev *pdev)
+{
+ u16 cw, new;
+ int pos;
+
+ /* first, turn on INTx */
+ pci_read_config_word(pdev, PCI_COMMAND, &cw);
+ new = cw & ~PCI_COMMAND_INTX_DISABLE;
+ if (new != cw)
+ pci_write_config_word(pdev, PCI_COMMAND, new);
+
+ /* then turn off MSI */
+ pos = pci_find_capability(pdev, PCI_CAP_ID_MSI);
+ if (pos) {
+ pci_read_config_word(pdev, pos + PCI_MSI_FLAGS, &cw);
+ new = cw & ~PCI_MSI_FLAGS_ENABLE;
+ if (new != cw)
+ pci_write_config_word(pdev, pos + PCI_MSI_FLAGS, new);
+ }
+}
+
+static int ipath_msi_enabled(struct pci_dev *pdev)
+{
+ int pos, ret = 0;
+
+ pos = pci_find_capability(pdev, PCI_CAP_ID_MSI);
+ if (pos) {
+ u16 cw;
+
+ pci_read_config_word(pdev, pos + PCI_MSI_FLAGS, &cw);
+ ret = !!(cw & PCI_MSI_FLAGS_ENABLE);
+ }
+ return ret;
+}
+
+/*
+ * disable msi interrupt if enabled, and clear the flag.
+ * flag is used primarily for the fallback to INTx, but
+ * is also used in reinit after reset as a flag.
+ */
+static void ipath_7220_nomsi(struct ipath_devdata *dd)
+{
+ dd->ipath_msi_lo = 0;
+
+ if (ipath_msi_enabled(dd->pcidev)) {
+ /*
+ * free, but don't zero; later kernels require
+ * it be freed before disable_msi, so the intx
+ * setup has to request it again.
+ */
+ if (dd->ipath_irq)
+ free_irq(dd->ipath_irq, dd);
+ pci_disable_msi(dd->pcidev);
+ }
+}
+
+/*
+ * ipath_setup_7220_cleanup - clean up any per-chip chip-specific stuff
+ * @dd: the infinipath device
+ *
+ * Nothing but msi interrupt cleanup for now.
+ *
+ * This is called during driver unload.
+ */
+static void ipath_setup_7220_cleanup(struct ipath_devdata *dd)
+{
+ ipath_7220_nomsi(dd);
+}
+
+
+static void ipath_7220_pcie_params(struct ipath_devdata *dd, u32 boardrev)
+{
+ u16 linkstat, minwidth, speed;
+ int pos;
+
+ pos = pci_find_capability(dd->pcidev, PCI_CAP_ID_EXP);
+ if (!pos) {
+ ipath_dev_err(dd, "Can't find PCI Express capability!\n");
+ goto bail;
+ }
+
+ pci_read_config_word(dd->pcidev, pos + PCI_EXP_LNKSTA,
+ &linkstat);
+ /*
+ * speed is bits 0-4, linkwidth is bits 4-8
+ * no defines for them in headers
+ */
+ speed = linkstat & 0xf;
+ linkstat >>= 4;
+ linkstat &= 0x1f;
+ dd->ipath_lbus_width = linkstat;
+ switch (boardrev) {
+ case 0:
+ case 2:
+ case 10:
+ case 12:
+ minwidth = 16; /* x16 capable boards */
+ break;
+ default:
+ minwidth = 8; /* x8 capable boards */
+ break;
+ }
+
+ switch (speed) {
+ case 1:
+ dd->ipath_lbus_speed = 2500; /* Gen1, 2.5GHz */
+ break;
+ case 2:
+ dd->ipath_lbus_speed = 5000; /* Gen1, 5GHz */
+ break;
+ default: /* not defined, assume gen1 */
+ dd->ipath_lbus_speed = 2500;
+ break;
+ }
+
+ if (linkstat < minwidth)
+ ipath_dev_err(dd,
+ "PCIe width %u (x%u HCA), performance "
+ "reduced\n", linkstat, minwidth);
+ else
+ ipath_cdbg(VERBOSE, "PCIe speed %u width %u (x%u HCA)\n",
+ dd->ipath_lbus_speed, linkstat, minwidth);
+
+ if (speed != 1)
+ ipath_dev_err(dd,
+ "PCIe linkspeed %u is incorrect; "
+ "should be 1 (2500)!\n", speed);
+
+bail:
+ /* fill in string, even on errors */
+ snprintf(dd->ipath_lbus_info, sizeof(dd->ipath_lbus_info),
+ "PCIe,%uMHz,x%u\n",
+ dd->ipath_lbus_speed,
+ dd->ipath_lbus_width);
+ return;
+}
+
+
+/**
+ * ipath_setup_7220_config - setup PCIe config related stuff
+ * @dd: the infinipath device
+ * @pdev: the PCI device
+ *
+ * The pci_enable_msi() call will fail on systems with MSI quirks
+ * such as those with AMD8131, even if the device of interest is not
+ * attached to that device, (in the 2.6.13 - 2.6.15 kernels, at least, fixed
+ * late in 2.6.16).
+ * All that can be done is to edit the kernel source to remove the quirk
+ * check until that is fixed.
+ * We do not need to call enable_msi() for our HyperTransport chip,
+ * even though it uses MSI, and we want to avoid the quirk warning, so
+ * So we call enable_msi only for PCIe. If we do end up needing
+ * pci_enable_msi at some point in the future for HT, we'll move the
+ * call back into the main init_one code.
+ * We save the msi lo and hi values, so we can restore them after
+ * chip reset (the kernel PCI infrastructure doesn't yet handle that
+ * correctly).
+ */
+static int ipath_setup_7220_config(struct ipath_devdata *dd,
+ struct pci_dev *pdev)
+{
+ int pos, ret = -1;
+ u32 boardrev;
+
+ dd->ipath_msi_lo = 0; /* used as a flag during reset processing */
+
+ pos = pci_find_capability(pdev, PCI_CAP_ID_MSI);
+ if (!strcmp(int_type, "force_msi") || !strcmp(int_type, "auto"))
+ ret = pci_enable_msi(pdev);
+ if (ret) {
+ if (!strcmp(int_type, "force_msi")) {
+ ipath_dev_err(dd, "pci_enable_msi failed: %d, "
+ "force_msi is on, so not continuing.\n",
+ ret);
+ return ret;
+ }
+
+ ipath_enable_intx(pdev);
+ if (!strcmp(int_type, "auto"))
+ ipath_dev_err(dd, "pci_enable_msi failed: %d, "
+ "falling back to INTx\n", ret);
+ } else if (pos) {
+ u16 control;
+ pci_read_config_dword(pdev, pos + PCI_MSI_ADDRESS_LO,
+ &dd->ipath_msi_lo);
+ pci_read_config_dword(pdev, pos + PCI_MSI_ADDRESS_HI,
+ &dd->ipath_msi_hi);
+ pci_read_config_word(pdev, pos + PCI_MSI_FLAGS,
+ &control);
+ /* now save the data (vector) info */
+ pci_read_config_word(pdev,
+ pos + ((control & PCI_MSI_FLAGS_64BIT)
+ ? PCI_MSI_DATA_64 :
+ PCI_MSI_DATA_32),
+ &dd->ipath_msi_data);
+ } else
+ ipath_dev_err(dd, "Can't find MSI capability, "
+ "can't save MSI settings for reset\n");
+
+ dd->ipath_irq = pdev->irq;
+
+ /*
+ * We save the cachelinesize also, although it doesn't
+ * really matter.
+ */
+ pci_read_config_byte(pdev, PCI_CACHE_LINE_SIZE,
+ &dd->ipath_pci_cacheline);
+
+ /*
+ * this function called early, ipath_boardrev not set yet. Can't
+ * use ipath_read_kreg64() yet, too early in init, so use readq()
+ */
+ boardrev = (readq(&dd->ipath_kregbase[dd->ipath_kregs->kr_revision])
+ >> INFINIPATH_R_BOARDID_SHIFT) & INFINIPATH_R_BOARDID_MASK;
+
+ ipath_7220_pcie_params(dd, boardrev);
+
+ dd->ipath_flags |= IPATH_NODMA_RTAIL | IPATH_HAS_SEND_DMA |
+ IPATH_HAS_PBC_CNT | IPATH_HAS_THRESH_UPDATE;
+ dd->ipath_pioupd_thresh = 4U; /* set default update threshold */
+ return 0;
+}
+
+static void ipath_init_7220_variables(struct ipath_devdata *dd)
+{
+ /*
+ * setup the register offsets, since they are different for each
+ * chip
+ */
+ dd->ipath_kregs = &ipath_7220_kregs;
+ dd->ipath_cregs = &ipath_7220_cregs;
+
+ /*
+ * bits for selecting i2c direction and values,
+ * used for I2C serial flash
+ */
+ dd->ipath_gpio_sda_num = _IPATH_GPIO_SDA_NUM;
+ dd->ipath_gpio_scl_num = _IPATH_GPIO_SCL_NUM;
+ dd->ipath_gpio_sda = IPATH_GPIO_SDA;
+ dd->ipath_gpio_scl = IPATH_GPIO_SCL;
+
+ /*
+ * Fill in data for field-values that change in IBA7220.
+ * We dynamically specify only the mask for LINKTRAININGSTATE
+ * and only the shift for LINKSTATE, as they are the only ones
+ * that change. Also precalculate the 3 link states of interest
+ * and the combined mask.
+ */
+ dd->ibcs_ls_shift = IBA7220_IBCS_LINKSTATE_SHIFT;
+ dd->ibcs_lts_mask = IBA7220_IBCS_LINKTRAININGSTATE_MASK;
+ dd->ibcs_mask = (INFINIPATH_IBCS_LINKSTATE_MASK <<
+ dd->ibcs_ls_shift) | dd->ibcs_lts_mask;
+ dd->ib_init = (INFINIPATH_IBCS_LT_STATE_LINKUP <<
+ INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) |
+ (INFINIPATH_IBCS_L_STATE_INIT << dd->ibcs_ls_shift);
+ dd->ib_arm = (INFINIPATH_IBCS_LT_STATE_LINKUP <<
+ INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) |
+ (INFINIPATH_IBCS_L_STATE_ARM << dd->ibcs_ls_shift);
+ dd->ib_active = (INFINIPATH_IBCS_LT_STATE_LINKUP <<
+ INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) |
+ (INFINIPATH_IBCS_L_STATE_ACTIVE << dd->ibcs_ls_shift);
+
+ /*
+ * Fill in data for ibcc field-values that change in IBA7220.
+ * We dynamically specify only the mask for LINKINITCMD
+ * and only the shift for LINKCMD and MAXPKTLEN, as they are
+ * the only ones that change.
+ */
+ dd->ibcc_lic_mask = IBA7220_IBCC_LINKINITCMD_MASK;
+ dd->ibcc_lc_shift = IBA7220_IBCC_LINKCMD_SHIFT;
+ dd->ibcc_mpl_shift = IBA7220_IBCC_MAXPKTLEN_SHIFT;
+
+ /* Fill in shifts for RcvCtrl. */
+ dd->ipath_r_portenable_shift = INFINIPATH_R_PORTENABLE_SHIFT;
+ dd->ipath_r_intravail_shift = IBA7220_R_INTRAVAIL_SHIFT;
+ dd->ipath_r_tailupd_shift = IBA7220_R_TAILUPD_SHIFT;
+ dd->ipath_r_portcfg_shift = IBA7220_R_PORTCFG_SHIFT;
+
+ /* variables for sanity checking interrupt and errors */
+ dd->ipath_hwe_bitsextant =
+ (INFINIPATH_HWE_RXEMEMPARITYERR_MASK <<
+ INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) |
+ (INFINIPATH_HWE_TXEMEMPARITYERR_MASK <<
+ INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) |
+ (INFINIPATH_HWE_PCIEMEMPARITYERR_MASK <<
+ INFINIPATH_HWE_PCIEMEMPARITYERR_SHIFT) |
+ INFINIPATH_HWE_PCIE1PLLFAILED |
+ INFINIPATH_HWE_PCIE0PLLFAILED |
+ INFINIPATH_HWE_PCIEPOISONEDTLP |
+ INFINIPATH_HWE_PCIECPLTIMEOUT |
+ INFINIPATH_HWE_PCIEBUSPARITYXTLH |
+ INFINIPATH_HWE_PCIEBUSPARITYXADM |
+ INFINIPATH_HWE_PCIEBUSPARITYRADM |
+ INFINIPATH_HWE_MEMBISTFAILED |
+ INFINIPATH_HWE_COREPLL_FBSLIP |
+ INFINIPATH_HWE_COREPLL_RFSLIP |
+ INFINIPATH_HWE_SERDESPLLFAILED |
+ INFINIPATH_HWE_IBCBUSTOSPCPARITYERR |
+ INFINIPATH_HWE_IBCBUSFRSPCPARITYERR |
+ INFINIPATH_HWE_PCIECPLDATAQUEUEERR |
+ INFINIPATH_HWE_PCIECPLHDRQUEUEERR |
+ INFINIPATH_HWE_SDMAMEMREADERR |
+ INFINIPATH_HWE_CLK_UC_PLLNOTLOCKED |
+ INFINIPATH_HWE_PCIESERDESQ0PCLKNOTDETECT |
+ INFINIPATH_HWE_PCIESERDESQ1PCLKNOTDETECT |
+ INFINIPATH_HWE_PCIESERDESQ2PCLKNOTDETECT |
+ INFINIPATH_HWE_PCIESERDESQ3PCLKNOTDETECT |
+ INFINIPATH_HWE_DDSRXEQMEMORYPARITYERR |
+ INFINIPATH_HWE_IB_UC_MEMORYPARITYERR |
+ INFINIPATH_HWE_PCIE_UC_OCT0MEMORYPARITYERR |
+ INFINIPATH_HWE_PCIE_UC_OCT1MEMORYPARITYERR;
+ dd->ipath_i_bitsextant =
+ INFINIPATH_I_SDMAINT | INFINIPATH_I_SDMADISABLED |
+ (INFINIPATH_I_RCVURG_MASK << INFINIPATH_I_RCVURG_SHIFT) |
+ (INFINIPATH_I_RCVAVAIL_MASK <<
+ INFINIPATH_I_RCVAVAIL_SHIFT) |
+ INFINIPATH_I_ERROR | INFINIPATH_I_SPIOSENT |
+ INFINIPATH_I_SPIOBUFAVAIL | INFINIPATH_I_GPIO |
+ INFINIPATH_I_JINT | INFINIPATH_I_SERDESTRIMDONE;
+ dd->ipath_e_bitsextant =
+ INFINIPATH_E_RFORMATERR | INFINIPATH_E_RVCRC |
+ INFINIPATH_E_RICRC | INFINIPATH_E_RMINPKTLEN |
+ INFINIPATH_E_RMAXPKTLEN | INFINIPATH_E_RLONGPKTLEN |
+ INFINIPATH_E_RSHORTPKTLEN | INFINIPATH_E_RUNEXPCHAR |
+ INFINIPATH_E_RUNSUPVL | INFINIPATH_E_REBP |
+ INFINIPATH_E_RIBFLOW | INFINIPATH_E_RBADVERSION |
+ INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL |
+ INFINIPATH_E_RBADTID | INFINIPATH_E_RHDRLEN |
+ INFINIPATH_E_RHDR | INFINIPATH_E_RIBLOSTLINK |
+ INFINIPATH_E_SENDSPECIALTRIGGER |
+ INFINIPATH_E_SDMADISABLED | INFINIPATH_E_SMINPKTLEN |
+ INFINIPATH_E_SMAXPKTLEN | INFINIPATH_E_SUNDERRUN |
+ INFINIPATH_E_SPKTLEN | INFINIPATH_E_SDROPPEDSMPPKT |
+ INFINIPATH_E_SDROPPEDDATAPKT |
+ INFINIPATH_E_SPIOARMLAUNCH | INFINIPATH_E_SUNEXPERRPKTNUM |
+ INFINIPATH_E_SUNSUPVL | INFINIPATH_E_SENDBUFMISUSE |
+ INFINIPATH_E_SDMAGENMISMATCH | INFINIPATH_E_SDMAOUTOFBOUND |
+ INFINIPATH_E_SDMATAILOUTOFBOUND | INFINIPATH_E_SDMABASE |
+ INFINIPATH_E_SDMA1STDESC | INFINIPATH_E_SDMARPYTAG |
+ INFINIPATH_E_SDMADWEN | INFINIPATH_E_SDMAMISSINGDW |
+ INFINIPATH_E_SDMAUNEXPDATA |
+ INFINIPATH_E_IBSTATUSCHANGED | INFINIPATH_E_INVALIDADDR |
+ INFINIPATH_E_RESET | INFINIPATH_E_HARDWARE |
+ INFINIPATH_E_SDMADESCADDRMISALIGN |
+ INFINIPATH_E_INVALIDEEPCMD;
+
+ dd->ipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK;
+ dd->ipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK;
+ dd->ipath_i_rcvavail_shift = INFINIPATH_I_RCVAVAIL_SHIFT;
+ dd->ipath_i_rcvurg_shift = INFINIPATH_I_RCVURG_SHIFT;
+ dd->ipath_flags |= IPATH_INTREG_64 | IPATH_HAS_MULT_IB_SPEED
+ | IPATH_HAS_LINK_LATENCY;
+
+ /*
+ * EEPROM error log 0 is TXE Parity errors. 1 is RXE Parity.
+ * 2 is Some Misc, 3 is reserved for future.
+ */
+ dd->ipath_eep_st_masks[0].hwerrs_to_log =
+ INFINIPATH_HWE_TXEMEMPARITYERR_MASK <<
+ INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT;
+
+ dd->ipath_eep_st_masks[1].hwerrs_to_log =
+ INFINIPATH_HWE_RXEMEMPARITYERR_MASK <<
+ INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT;
+
+ dd->ipath_eep_st_masks[2].errs_to_log = INFINIPATH_E_RESET;
+
+ ipath_linkrecovery = 0;
+
+ init_waitqueue_head(&dd->ipath_autoneg_wait);
+ INIT_DELAYED_WORK(&dd->ipath_autoneg_work, autoneg_work);
+
+ dd->ipath_link_width_supported = IB_WIDTH_1X | IB_WIDTH_4X;
+ dd->ipath_link_speed_supported = IPATH_IB_SDR | IPATH_IB_DDR;
+
+ dd->ipath_link_width_enabled = dd->ipath_link_width_supported;
+ dd->ipath_link_speed_enabled = dd->ipath_link_speed_supported;
+ /*
+ * set the initial values to reasonable default, will be set
+ * for real when link is up.
+ */
+ dd->ipath_link_width_active = IB_WIDTH_4X;
+ dd->ipath_link_speed_active = IPATH_IB_SDR;
+ dd->delay_mult = rate_to_delay[0][1];
+}
+
+
+/*
+ * Setup the MSI stuff again after a reset. I'd like to just call
+ * pci_enable_msi() and request_irq() again, but when I do that,
+ * the MSI enable bit doesn't get set in the command word, and
+ * we switch to to a different interrupt vector, which is confusing,
+ * so I instead just do it all inline. Perhaps somehow can tie this
+ * into the PCIe hotplug support at some point
+ * Note, because I'm doing it all here, I don't call pci_disable_msi()
+ * or free_irq() at the start of ipath_setup_7220_reset().
+ */
+static int ipath_reinit_msi(struct ipath_devdata *dd)
+{
+ int ret = 0;
+
+ int pos;
+ u16 control;
+ if (!dd->ipath_msi_lo) /* Using intX, or init problem */
+ goto bail;
+
+ pos = pci_find_capability(dd->pcidev, PCI_CAP_ID_MSI);
+ if (!pos) {
+ ipath_dev_err(dd, "Can't find MSI capability, "
+ "can't restore MSI settings\n");
+ goto bail;
+ }
+ ipath_cdbg(VERBOSE, "Writing msi_lo 0x%x to config offset 0x%x\n",
+ dd->ipath_msi_lo, pos + PCI_MSI_ADDRESS_LO);
+ pci_write_config_dword(dd->pcidev, pos + PCI_MSI_ADDRESS_LO,
+ dd->ipath_msi_lo);
+ ipath_cdbg(VERBOSE, "Writing msi_lo 0x%x to config offset 0x%x\n",
+ dd->ipath_msi_hi, pos + PCI_MSI_ADDRESS_HI);
+ pci_write_config_dword(dd->pcidev, pos + PCI_MSI_ADDRESS_HI,
+ dd->ipath_msi_hi);
+ pci_read_config_word(dd->pcidev, pos + PCI_MSI_FLAGS, &control);
+ if (!(control & PCI_MSI_FLAGS_ENABLE)) {
+ ipath_cdbg(VERBOSE, "MSI control at off %x was %x, "
+ "setting MSI enable (%x)\n", pos + PCI_MSI_FLAGS,
+ control, control | PCI_MSI_FLAGS_ENABLE);
+ control |= PCI_MSI_FLAGS_ENABLE;
+ pci_write_config_word(dd->pcidev, pos + PCI_MSI_FLAGS,
+ control);
+ }
+ /* now rewrite the data (vector) info */
+ pci_write_config_word(dd->pcidev, pos +
+ ((control & PCI_MSI_FLAGS_64BIT) ? 12 : 8),
+ dd->ipath_msi_data);
+ ret = 1;
+
+bail:
+ if (!ret) {
+ ipath_dbg("Using INTx, MSI disabled or not configured\n");
+ ipath_enable_intx(dd->pcidev);
+ ret = 1;
+ }
+ /*
+ * We restore the cachelinesize also, although it doesn't really
+ * matter.
+ */
+ pci_write_config_byte(dd->pcidev, PCI_CACHE_LINE_SIZE,
+ dd->ipath_pci_cacheline);
+ /* and now set the pci master bit again */
+ pci_set_master(dd->pcidev);
+
+ return ret;
+}
+
+/*
+ * This routine sleeps, so it can only be called from user context, not
+ * from interrupt context. If we need interrupt context, we can split
+ * it into two routines.
+ */
+static int ipath_setup_7220_reset(struct ipath_devdata *dd)
+{
+ u64 val;
+ int i;
+ int ret;
+ u16 cmdval;
+
+ pci_read_config_word(dd->pcidev, PCI_COMMAND, &cmdval);
+
+ /* Use dev_err so it shows up in logs, etc. */
+ ipath_dev_err(dd, "Resetting InfiniPath unit %u\n", dd->ipath_unit);
+
+ /* keep chip from being accessed in a few places */
+ dd->ipath_flags &= ~(IPATH_INITTED | IPATH_PRESENT);
+ val = dd->ipath_control | INFINIPATH_C_RESET;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_control, val);
+ mb();
+
+ for (i = 1; i <= 5; i++) {
+ int r;
+
+ /*
+ * Allow MBIST, etc. to complete; longer on each retry.
+ * We sometimes get machine checks from bus timeout if no
+ * response, so for now, make it *really* long.
+ */
+ msleep(1000 + (1 + i) * 2000);
+ r = pci_write_config_dword(dd->pcidev, PCI_BASE_ADDRESS_0,
+ dd->ipath_pcibar0);
+ if (r)
+ ipath_dev_err(dd, "rewrite of BAR0 failed: %d\n", r);
+ r = pci_write_config_dword(dd->pcidev, PCI_BASE_ADDRESS_1,
+ dd->ipath_pcibar1);
+ if (r)
+ ipath_dev_err(dd, "rewrite of BAR1 failed: %d\n", r);
+ /* now re-enable memory access */
+ pci_write_config_word(dd->pcidev, PCI_COMMAND, cmdval);
+ r = pci_enable_device(dd->pcidev);
+ if (r)
+ ipath_dev_err(dd, "pci_enable_device failed after "
+ "reset: %d\n", r);
+ /*
+ * whether it fully enabled or not, mark as present,
+ * again (but not INITTED)
+ */
+ dd->ipath_flags |= IPATH_PRESENT;
+ val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_revision);
+ if (val == dd->ipath_revision) {
+ ipath_cdbg(VERBOSE, "Got matching revision "
+ "register %llx on try %d\n",
+ (unsigned long long) val, i);
+ ret = ipath_reinit_msi(dd);
+ goto bail;
+ }
+ /* Probably getting -1 back */
+ ipath_dbg("Didn't get expected revision register, "
+ "got %llx, try %d\n", (unsigned long long) val,
+ i + 1);
+ }
+ ret = 0; /* failed */
+
+bail:
+ if (ret)
+ ipath_7220_pcie_params(dd, dd->ipath_boardrev);
+
+ return ret;
+}
+
+/**
+ * ipath_7220_put_tid - write a TID to the chip
+ * @dd: the infinipath device
+ * @tidptr: pointer to the expected TID (in chip) to udpate
+ * @tidtype: 0 for eager, 1 for expected
+ * @pa: physical address of in memory buffer; ipath_tidinvalid if freeing
+ *
+ * This exists as a separate routine to allow for selection of the
+ * appropriate "flavor". The static calls in cleanup just use the
+ * revision-agnostic form, as they are not performance critical.
+ */
+static void ipath_7220_put_tid(struct ipath_devdata *dd, u64 __iomem *tidptr,
+ u32 type, unsigned long pa)
+{
+ if (pa != dd->ipath_tidinvalid) {
+ u64 chippa = pa >> IBA7220_TID_PA_SHIFT;
+
+ /* paranoia checks */
+ if (pa != (chippa << IBA7220_TID_PA_SHIFT)) {
+ dev_info(&dd->pcidev->dev, "BUG: physaddr %lx "
+ "not 2KB aligned!\n", pa);
+ return;
+ }
+ if (chippa >= (1UL << IBA7220_TID_SZ_SHIFT)) {
+ ipath_dev_err(dd,
+ "BUG: Physical page address 0x%lx "
+ "larger than supported\n", pa);
+ return;
+ }
+
+ if (type == RCVHQ_RCV_TYPE_EAGER)
+ chippa |= dd->ipath_tidtemplate;
+ else /* for now, always full 4KB page */
+ chippa |= IBA7220_TID_SZ_4K;
+ writeq(chippa, tidptr);
+ } else
+ writeq(pa, tidptr);
+ mmiowb();
+}
+
+/**
+ * ipath_7220_clear_tid - clear all TID entries for a port, expected and eager
+ * @dd: the infinipath device
+ * @port: the port
+ *
+ * clear all TID entries for a port, expected and eager.
+ * Used from ipath_close(). On this chip, TIDs are only 32 bits,
+ * not 64, but they are still on 64 bit boundaries, so tidbase
+ * is declared as u64 * for the pointer math, even though we write 32 bits
+ */
+static void ipath_7220_clear_tids(struct ipath_devdata *dd, unsigned port)
+{
+ u64 __iomem *tidbase;
+ unsigned long tidinv;
+ int i;
+
+ if (!dd->ipath_kregbase)
+ return;
+
+ ipath_cdbg(VERBOSE, "Invalidate TIDs for port %u\n", port);
+
+ tidinv = dd->ipath_tidinvalid;
+ tidbase = (u64 __iomem *)
+ ((char __iomem *)(dd->ipath_kregbase) +
+ dd->ipath_rcvtidbase +
+ port * dd->ipath_rcvtidcnt * sizeof(*tidbase));
+
+ for (i = 0; i < dd->ipath_rcvtidcnt; i++)
+ ipath_7220_put_tid(dd, &tidbase[i], RCVHQ_RCV_TYPE_EXPECTED,
+ tidinv);
+
+ tidbase = (u64 __iomem *)
+ ((char __iomem *)(dd->ipath_kregbase) +
+ dd->ipath_rcvegrbase + port_egrtid_idx(dd, port)
+ * sizeof(*tidbase));
+
+ for (i = port ? dd->ipath_rcvegrcnt : dd->ipath_p0_rcvegrcnt; i; i--)
+ ipath_7220_put_tid(dd, &tidbase[i-1], RCVHQ_RCV_TYPE_EAGER,
+ tidinv);
+}
+
+/**
+ * ipath_7220_tidtemplate - setup constants for TID updates
+ * @dd: the infinipath device
+ *
+ * We setup stuff that we use a lot, to avoid calculating each time
+ */
+static void ipath_7220_tidtemplate(struct ipath_devdata *dd)
+{
+ /* For now, we always allocate 4KB buffers (at init) so we can
+ * receive max size packets. We may want a module parameter to
+ * specify 2KB or 4KB and/or make be per port instead of per device
+ * for those who want to reduce memory footprint. Note that the
+ * ipath_rcvhdrentsize size must be large enough to hold the largest
+ * IB header (currently 96 bytes) that we expect to handle (plus of
+ * course the 2 dwords of RHF).
+ */
+ if (dd->ipath_rcvegrbufsize == 2048)
+ dd->ipath_tidtemplate = IBA7220_TID_SZ_2K;
+ else if (dd->ipath_rcvegrbufsize == 4096)
+ dd->ipath_tidtemplate = IBA7220_TID_SZ_4K;
+ else {
+ dev_info(&dd->pcidev->dev, "BUG: unsupported egrbufsize "
+ "%u, using %u\n", dd->ipath_rcvegrbufsize,
+ 4096);
+ dd->ipath_tidtemplate = IBA7220_TID_SZ_4K;
+ }
+ dd->ipath_tidinvalid = 0;
+}
+
+static int ipath_7220_early_init(struct ipath_devdata *dd)
+{
+ u32 i, s;
+
+ if (strcmp(int_type, "auto") &&
+ strcmp(int_type, "force_msi") &&
+ strcmp(int_type, "force_intx")) {
+ ipath_dev_err(dd, "Invalid interrupt_type: '%s', expecting "
+ "auto, force_msi or force_intx\n", int_type);
+ return -EINVAL;
+ }
+
+ /*
+ * Control[4] has been added to change the arbitration within
+ * the SDMA engine between favoring data fetches over descriptor
+ * fetches. ipath_sdma_fetch_arb==0 gives data fetches priority.
+ */
+ if (ipath_sdma_fetch_arb && (dd->ipath_minrev > 1))
+ dd->ipath_control |= 1<<4;
+
+ dd->ipath_flags |= IPATH_4BYTE_TID;
+
+ /*
+ * For openfabrics, we need to be able to handle an IB header of
+ * 24 dwords. HT chip has arbitrary sized receive buffers, so we
+ * made them the same size as the PIO buffers. This chip does not
+ * handle arbitrary size buffers, so we need the header large enough
+ * to handle largest IB header, but still have room for a 2KB MTU
+ * standard IB packet.
+ */
+ dd->ipath_rcvhdrentsize = 24;
+ dd->ipath_rcvhdrsize = IPATH_DFLT_RCVHDRSIZE;
+ dd->ipath_rhf_offset =
+ dd->ipath_rcvhdrentsize - sizeof(u64) / sizeof(u32);
+
+ dd->ipath_rcvegrbufsize = ipath_mtu4096 ? 4096 : 2048;
+ /*
+ * the min() check here is currently a nop, but it may not always
+ * be, depending on just how we do ipath_rcvegrbufsize
+ */
+ dd->ipath_ibmaxlen = min(ipath_mtu4096 ? dd->ipath_piosize4k :
+ dd->ipath_piosize2k,
+ dd->ipath_rcvegrbufsize +
+ (dd->ipath_rcvhdrentsize << 2));
+ dd->ipath_init_ibmaxlen = dd->ipath_ibmaxlen;
+
+ ipath_7220_config_jint(dd, INFINIPATH_JINT_DEFAULT_IDLE_TICKS,
+ INFINIPATH_JINT_DEFAULT_MAX_PACKETS);
+
+ if (dd->ipath_boardrev) /* no eeprom on emulator */
+ ipath_get_eeprom_info(dd);
+
+ /* start of code to check and print procmon */
+ s = ipath_read_kreg32(dd, IPATH_KREG_OFFSET(ProcMon));
+ s &= ~(1U<<31); /* clear done bit */
+ s |= 1U<<14; /* clear counter (write 1 to clear) */
+ ipath_write_kreg(dd, IPATH_KREG_OFFSET(ProcMon), s);
+ /* make sure clear_counter low long enough before start */
+ ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch);
+ ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch);
+
+ s &= ~(1U<<14); /* allow counter to count (before starting) */
+ ipath_write_kreg(dd, IPATH_KREG_OFFSET(ProcMon), s);
+ ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch);
+ ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch);
+ s = ipath_read_kreg32(dd, IPATH_KREG_OFFSET(ProcMon));
+
+ s |= 1U<<15; /* start the counter */
+ s &= ~(1U<<31); /* clear done bit */
+ s &= ~0x7ffU; /* clear frequency bits */
+ s |= 0xe29; /* set frequency bits, in case cleared */
+ ipath_write_kreg(dd, IPATH_KREG_OFFSET(ProcMon), s);
+
+ s = 0;
+ for (i = 500; i > 0 && !(s&(1ULL<<31)); i--) {
+ ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch);
+ s = ipath_read_kreg32(dd, IPATH_KREG_OFFSET(ProcMon));
+ }
+ if (!(s&(1U<<31)))
+ ipath_dev_err(dd, "ProcMon register not valid: 0x%x\n", s);
+ else
+ ipath_dbg("ProcMon=0x%x, count=0x%x\n", s, (s>>16)&0x1ff);
+
+ return 0;
+}
+
+/**
+ * ipath_init_7220_get_base_info - set chip-specific flags for user code
+ * @pd: the infinipath port
+ * @kbase: ipath_base_info pointer
+ *
+ * We set the PCIE flag because the lower bandwidth on PCIe vs
+ * HyperTransport can affect some user packet algorithims.
+ */
+static int ipath_7220_get_base_info(struct ipath_portdata *pd, void *kbase)
+{
+ struct ipath_base_info *kinfo = kbase;
+
+ kinfo->spi_runtime_flags |=
+ IPATH_RUNTIME_PCIE | IPATH_RUNTIME_NODMA_RTAIL |
+ IPATH_RUNTIME_SDMA;
+
+ return 0;
+}
+
+static void ipath_7220_free_irq(struct ipath_devdata *dd)
+{
+ free_irq(dd->ipath_irq, dd);
+ dd->ipath_irq = 0;
+}
+
+static struct ipath_message_header *
+ipath_7220_get_msgheader(struct ipath_devdata *dd, __le32 *rhf_addr)
+{
+ u32 offset = ipath_hdrget_offset(rhf_addr);
+
+ return (struct ipath_message_header *)
+ (rhf_addr - dd->ipath_rhf_offset + offset);
+}
+
+static void ipath_7220_config_ports(struct ipath_devdata *dd, ushort cfgports)
+{
+ u32 nchipports;
+
+ nchipports = ipath_read_kreg32(dd, dd->ipath_kregs->kr_portcnt);
+ if (!cfgports) {
+ int ncpus = num_online_cpus();
+
+ if (ncpus <= 4)
+ dd->ipath_portcnt = 5;
+ else if (ncpus <= 8)
+ dd->ipath_portcnt = 9;
+ if (dd->ipath_portcnt)
+ ipath_dbg("Auto-configured for %u ports, %d cpus "
+ "online\n", dd->ipath_portcnt, ncpus);
+ } else if (cfgports <= nchipports)
+ dd->ipath_portcnt = cfgports;
+ if (!dd->ipath_portcnt) /* none of the above, set to max */
+ dd->ipath_portcnt = nchipports;
+ /*
+ * chip can be configured for 5, 9, or 17 ports, and choice
+ * affects number of eager TIDs per port (1K, 2K, 4K).
+ */
+ if (dd->ipath_portcnt > 9)
+ dd->ipath_rcvctrl |= 2ULL << IBA7220_R_PORTCFG_SHIFT;
+ else if (dd->ipath_portcnt > 5)
+ dd->ipath_rcvctrl |= 1ULL << IBA7220_R_PORTCFG_SHIFT;
+ /* else configure for default 5 receive ports */
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
+ dd->ipath_rcvctrl);
+ dd->ipath_p0_rcvegrcnt = 2048; /* always */
+ if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
+ dd->ipath_pioreserved = 3; /* kpiobufs used for PIO */
+}
+
+
+static int ipath_7220_get_ib_cfg(struct ipath_devdata *dd, int which)
+{
+ int lsb, ret = 0;
+ u64 maskr; /* right-justified mask */
+
+ switch (which) {
+ case IPATH_IB_CFG_HRTBT: /* Get Heartbeat off/enable/auto */
+ lsb = IBA7220_IBC_HRTBT_SHIFT;
+ maskr = IBA7220_IBC_HRTBT_MASK;
+ break;
+
+ case IPATH_IB_CFG_LWID_ENB: /* Get allowed Link-width */
+ ret = dd->ipath_link_width_enabled;
+ goto done;
+
+ case IPATH_IB_CFG_LWID: /* Get currently active Link-width */
+ ret = dd->ipath_link_width_active;
+ goto done;
+
+ case IPATH_IB_CFG_SPD_ENB: /* Get allowed Link speeds */
+ ret = dd->ipath_link_speed_enabled;
+ goto done;
+
+ case IPATH_IB_CFG_SPD: /* Get current Link spd */
+ ret = dd->ipath_link_speed_active;
+ goto done;
+
+ case IPATH_IB_CFG_RXPOL_ENB: /* Get Auto-RX-polarity enable */
+ lsb = IBA7220_IBC_RXPOL_SHIFT;
+ maskr = IBA7220_IBC_RXPOL_MASK;
+ break;
+
+ case IPATH_IB_CFG_LREV_ENB: /* Get Auto-Lane-reversal enable */
+ lsb = IBA7220_IBC_LREV_SHIFT;
+ maskr = IBA7220_IBC_LREV_MASK;
+ break;
+
+ case IPATH_IB_CFG_LINKLATENCY:
+ ret = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcddrstatus)
+ & IBA7220_DDRSTAT_LINKLAT_MASK;
+ goto done;
+
+ default:
+ ret = -ENOTSUPP;
+ goto done;
+ }
+ ret = (int)((dd->ipath_ibcddrctrl >> lsb) & maskr);
+done:
+ return ret;
+}
+
+static int ipath_7220_set_ib_cfg(struct ipath_devdata *dd, int which, u32 val)
+{
+ int lsb, ret = 0, setforce = 0;
+ u64 maskr; /* right-justified mask */
+
+ switch (which) {
+ case IPATH_IB_CFG_LIDLMC:
+ /*
+ * Set LID and LMC. Combined to avoid possible hazard
+ * caller puts LMC in 16MSbits, DLID in 16LSbits of val
+ */
+ lsb = IBA7220_IBC_DLIDLMC_SHIFT;
+ maskr = IBA7220_IBC_DLIDLMC_MASK;
+ break;
+
+ case IPATH_IB_CFG_HRTBT: /* set Heartbeat off/enable/auto */
+ if (val & IPATH_IB_HRTBT_ON &&
+ (dd->ipath_flags & IPATH_NO_HRTBT))
+ goto bail;
+ lsb = IBA7220_IBC_HRTBT_SHIFT;
+ maskr = IBA7220_IBC_HRTBT_MASK;
+ break;
+
+ case IPATH_IB_CFG_LWID_ENB: /* set allowed Link-width */
+ /*
+ * As with speed, only write the actual register if
+ * the link is currently down, otherwise takes effect
+ * on next link change.
+ */
+ dd->ipath_link_width_enabled = val;
+ if ((dd->ipath_flags & (IPATH_LINKDOWN|IPATH_LINKINIT)) !=
+ IPATH_LINKDOWN)
+ goto bail;
+ /*
+ * We set the IPATH_IB_FORCE_NOTIFY bit so updown
+ * will get called because we want update
+ * link_width_active, and the change may not take
+ * effect for some time (if we are in POLL), so this
+ * flag will force the updown routine to be called
+ * on the next ibstatuschange down interrupt, even
+ * if it's not an down->up transition.
+ */
+ val--; /* convert from IB to chip */
+ maskr = IBA7220_IBC_WIDTH_MASK;
+ lsb = IBA7220_IBC_WIDTH_SHIFT;
+ setforce = 1;
+ dd->ipath_flags |= IPATH_IB_FORCE_NOTIFY;
+ break;
+
+ case IPATH_IB_CFG_SPD_ENB: /* set allowed Link speeds */
+ /*
+ * If we turn off IB1.2, need to preset SerDes defaults,
+ * but not right now. Set a flag for the next time
+ * we command the link down. As with width, only write the
+ * actual register if the link is currently down, otherwise
+ * takes effect on next link change. Since setting is being
+ * explictly requested (via MAD or sysfs), clear autoneg
+ * failure status if speed autoneg is enabled.
+ */
+ dd->ipath_link_speed_enabled = val;
+ if (dd->ipath_ibcddrctrl & IBA7220_IBC_IBTA_1_2_MASK &&
+ !(val & (val - 1)))
+ dd->ipath_presets_needed = 1;
+ if ((dd->ipath_flags & (IPATH_LINKDOWN|IPATH_LINKINIT)) !=
+ IPATH_LINKDOWN)
+ goto bail;
+ /*
+ * We set the IPATH_IB_FORCE_NOTIFY bit so updown
+ * will get called because we want update
+ * link_speed_active, and the change may not take
+ * effect for some time (if we are in POLL), so this
+ * flag will force the updown routine to be called
+ * on the next ibstatuschange down interrupt, even
+ * if it's not an down->up transition. When setting
+ * speed autoneg, clear AUTONEG_FAILED.
+ */
+ if (val == (IPATH_IB_SDR | IPATH_IB_DDR)) {
+ val = IBA7220_IBC_SPEED_AUTONEG_MASK |
+ IBA7220_IBC_IBTA_1_2_MASK;
+ dd->ipath_flags &= ~IPATH_IB_AUTONEG_FAILED;
+ } else
+ val = val == IPATH_IB_DDR ? IBA7220_IBC_SPEED_DDR
+ : IBA7220_IBC_SPEED_SDR;
+ maskr = IBA7220_IBC_SPEED_AUTONEG_MASK |
+ IBA7220_IBC_IBTA_1_2_MASK;
+ lsb = 0; /* speed bits are low bits */
+ setforce = 1;
+ break;
+
+ case IPATH_IB_CFG_RXPOL_ENB: /* set Auto-RX-polarity enable */
+ lsb = IBA7220_IBC_RXPOL_SHIFT;
+ maskr = IBA7220_IBC_RXPOL_MASK;
+ break;
+
+ case IPATH_IB_CFG_LREV_ENB: /* set Auto-Lane-reversal enable */
+ lsb = IBA7220_IBC_LREV_SHIFT;
+ maskr = IBA7220_IBC_LREV_MASK;
+ break;
+
+ default:
+ ret = -ENOTSUPP;
+ goto bail;
+ }
+ dd->ipath_ibcddrctrl &= ~(maskr << lsb);
+ dd->ipath_ibcddrctrl |= (((u64) val & maskr) << lsb);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcddrctrl,
+ dd->ipath_ibcddrctrl);
+ if (setforce)
+ dd->ipath_flags |= IPATH_IB_FORCE_NOTIFY;
+bail:
+ return ret;
+}
+
+static void ipath_7220_read_counters(struct ipath_devdata *dd,
+ struct infinipath_counters *cntrs)
+{
+ u64 *counters = (u64 *) cntrs;
+ int i;
+
+ for (i = 0; i < sizeof(*cntrs) / sizeof(u64); i++)
+ counters[i] = ipath_snap_cntr(dd, i);
+}
+
+/* if we are using MSI, try to fallback to INTx */
+static int ipath_7220_intr_fallback(struct ipath_devdata *dd)
+{
+ if (dd->ipath_msi_lo) {
+ dev_info(&dd->pcidev->dev, "MSI interrupt not detected,"
+ " trying INTx interrupts\n");
+ ipath_7220_nomsi(dd);
+ ipath_enable_intx(dd->pcidev);
+ /*
+ * some newer kernels require free_irq before disable_msi,
+ * and irq can be changed during disable and intx enable
+ * and we need to therefore use the pcidev->irq value,
+ * not our saved MSI value.
+ */
+ dd->ipath_irq = dd->pcidev->irq;
+ if (request_irq(dd->ipath_irq, ipath_intr, IRQF_SHARED,
+ IPATH_DRV_NAME, dd))
+ ipath_dev_err(dd,
+ "Could not re-request_irq for INTx\n");
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * reset the XGXS (between serdes and IBC). Slightly less intrusive
+ * than resetting the IBC or external link state, and useful in some
+ * cases to cause some retraining. To do this right, we reset IBC
+ * as well.
+ */
+static void ipath_7220_xgxs_reset(struct ipath_devdata *dd)
+{
+ u64 val, prev_val;
+
+ prev_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig);
+ val = prev_val | INFINIPATH_XGXS_RESET;
+ prev_val &= ~INFINIPATH_XGXS_RESET; /* be sure */
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
+ dd->ipath_control & ~INFINIPATH_C_LINKENABLE);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
+ ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, prev_val);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
+ dd->ipath_control);
+}
+
+
+/* Still needs cleanup, too much hardwired stuff */
+static void autoneg_send(struct ipath_devdata *dd,
+ u32 *hdr, u32 dcnt, u32 *data)
+{
+ int i;
+ u64 cnt;
+ u32 __iomem *piobuf;
+ u32 pnum;
+
+ i = 0;
+ cnt = 7 + dcnt + 1; /* 7 dword header, dword data, icrc */
+ while (!(piobuf = ipath_getpiobuf(dd, cnt, &pnum))) {
+ if (i++ > 15) {
+ ipath_dbg("Couldn't get pio buffer for send\n");
+ return;
+ }
+ udelay(2);
+ }
+ if (dd->ipath_flags&IPATH_HAS_PBC_CNT)
+ cnt |= 0x80000000UL<<32; /* mark as VL15 */
+ writeq(cnt, piobuf);
+ ipath_flush_wc();
+ __iowrite32_copy(piobuf + 2, hdr, 7);
+ __iowrite32_copy(piobuf + 9, data, dcnt);
+ ipath_flush_wc();
+}
+
+/*
+ * _start packet gets sent twice at start, _done gets sent twice at end
+ */
+static void ipath_autoneg_send(struct ipath_devdata *dd, int which)
+{
+ static u32 swapped;
+ u32 dw, i, hcnt, dcnt, *data;
+ static u32 hdr[7] = { 0xf002ffff, 0x48ffff, 0x6400abba };
+ static u32 madpayload_start[0x40] = {
+ 0x1810103, 0x1, 0x0, 0x0, 0x2c90000, 0x2c9, 0x0, 0x0,
+ 0xffffffff, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+ 0x1, 0x1388, 0x15e, 0x1, /* rest 0's */
+ };
+ static u32 madpayload_done[0x40] = {
+ 0x1810103, 0x1, 0x0, 0x0, 0x2c90000, 0x2c9, 0x0, 0x0,
+ 0xffffffff, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+ 0x40000001, 0x1388, 0x15e, /* rest 0's */
+ };
+ dcnt = ARRAY_SIZE(madpayload_start);
+ hcnt = ARRAY_SIZE(hdr);
+ if (!swapped) {
+ /* for maintainability, do it at runtime */
+ for (i = 0; i < hcnt; i++) {
+ dw = (__force u32) cpu_to_be32(hdr[i]);
+ hdr[i] = dw;
+ }
+ for (i = 0; i < dcnt; i++) {
+ dw = (__force u32) cpu_to_be32(madpayload_start[i]);
+ madpayload_start[i] = dw;
+ dw = (__force u32) cpu_to_be32(madpayload_done[i]);
+ madpayload_done[i] = dw;
+ }
+ swapped = 1;
+ }
+
+ data = which ? madpayload_done : madpayload_start;
+ ipath_cdbg(PKT, "Sending %s special MADs\n", which?"done":"start");
+
+ autoneg_send(dd, hdr, dcnt, data);
+ ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+ udelay(2);
+ autoneg_send(dd, hdr, dcnt, data);
+ ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+ udelay(2);
+}
+
+
+
+/*
+ * Do the absolute minimum to cause an IB speed change, and make it
+ * ready, but don't actually trigger the change. The caller will
+ * do that when ready (if link is in Polling training state, it will
+ * happen immediately, otherwise when link next goes down)
+ *
+ * This routine should only be used as part of the DDR autonegotation
+ * code for devices that are not compliant with IB 1.2 (or code that
+ * fixes things up for same).
+ *
+ * When link has gone down, and autoneg enabled, or autoneg has
+ * failed and we give up until next time we set both speeds, and
+ * then we want IBTA enabled as well as "use max enabled speed.
+ */
+static void set_speed_fast(struct ipath_devdata *dd, u32 speed)
+{
+ dd->ipath_ibcddrctrl &= ~(IBA7220_IBC_SPEED_AUTONEG_MASK |
+ IBA7220_IBC_IBTA_1_2_MASK |
+ (IBA7220_IBC_WIDTH_MASK << IBA7220_IBC_WIDTH_SHIFT));
+
+ if (speed == (IPATH_IB_SDR | IPATH_IB_DDR))
+ dd->ipath_ibcddrctrl |= IBA7220_IBC_SPEED_AUTONEG_MASK |
+ IBA7220_IBC_IBTA_1_2_MASK;
+ else
+ dd->ipath_ibcddrctrl |= speed == IPATH_IB_DDR ?
+ IBA7220_IBC_SPEED_DDR : IBA7220_IBC_SPEED_SDR;
+
+ /*
+ * Convert from IB-style 1 = 1x, 2 = 4x, 3 = auto
+ * to chip-centric 0 = 1x, 1 = 4x, 2 = auto
+ */
+ dd->ipath_ibcddrctrl |= (u64)(dd->ipath_link_width_enabled - 1) <<
+ IBA7220_IBC_WIDTH_SHIFT;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcddrctrl,
+ dd->ipath_ibcddrctrl);
+ ipath_cdbg(VERBOSE, "setup for IB speed (%x) done\n", speed);
+}
+
+
+/*
+ * this routine is only used when we are not talking to another
+ * IB 1.2-compliant device that we think can do DDR.
+ * (This includes all existing switch chips as of Oct 2007.)
+ * 1.2-compliant devices go directly to DDR prior to reaching INIT
+ */
+static void try_auto_neg(struct ipath_devdata *dd)
+{
+ /*
+ * required for older non-IB1.2 DDR switches. Newer
+ * non-IB-compliant switches don't need it, but so far,
+ * aren't bothered by it either. "Magic constant"
+ */
+ ipath_write_kreg(dd, IPATH_KREG_OFFSET(IBNCModeCtrl),
+ 0x3b9dc07);
+ dd->ipath_flags |= IPATH_IB_AUTONEG_INPROG;
+ ipath_autoneg_send(dd, 0);
+ set_speed_fast(dd, IPATH_IB_DDR);
+ ipath_toggle_rclkrls(dd);
+ /* 2 msec is minimum length of a poll cycle */
+ schedule_delayed_work(&dd->ipath_autoneg_work,
+ msecs_to_jiffies(2));
+}
+
+
+static int ipath_7220_ib_updown(struct ipath_devdata *dd, int ibup, u64 ibcs)
+{
+ int ret = 0;
+ u32 ltstate = ipath_ib_linkstate(dd, ibcs);
+
+ dd->ipath_link_width_active =
+ ((ibcs >> IBA7220_IBCS_LINKWIDTH_SHIFT) & 1) ?
+ IB_WIDTH_4X : IB_WIDTH_1X;
+ dd->ipath_link_speed_active =
+ ((ibcs >> IBA7220_IBCS_LINKSPEED_SHIFT) & 1) ?
+ IPATH_IB_DDR : IPATH_IB_SDR;
+
+ if (!ibup) {
+ /*
+ * when link goes down we don't want aeq running, so it
+ * won't't interfere with IBC training, etc., and we need
+ * to go back to the static SerDes preset values
+ */
+ if (dd->ipath_x1_fix_tries &&
+ ltstate <= INFINIPATH_IBCS_LT_STATE_SLEEPQUIET &&
+ ltstate != INFINIPATH_IBCS_LT_STATE_LINKUP)
+ dd->ipath_x1_fix_tries = 0;
+ if (!(dd->ipath_flags & (IPATH_IB_AUTONEG_FAILED |
+ IPATH_IB_AUTONEG_INPROG)))
+ set_speed_fast(dd, dd->ipath_link_speed_enabled);
+ if (!(dd->ipath_flags & IPATH_IB_AUTONEG_INPROG)) {
+ ipath_cdbg(VERBOSE, "Setting RXEQ defaults\n");
+ ipath_sd7220_presets(dd);
+ }
+ /* this might better in ipath_sd7220_presets() */
+ ipath_set_relock_poll(dd, ibup);
+ } else {
+ if (ipath_compat_ddr_negotiate &&
+ !(dd->ipath_flags & (IPATH_IB_AUTONEG_FAILED |
+ IPATH_IB_AUTONEG_INPROG)) &&
+ dd->ipath_link_speed_active == IPATH_IB_SDR &&
+ (dd->ipath_link_speed_enabled &
+ (IPATH_IB_DDR | IPATH_IB_SDR)) ==
+ (IPATH_IB_DDR | IPATH_IB_SDR) &&
+ dd->ipath_autoneg_tries < IPATH_AUTONEG_TRIES) {
+ /* we are SDR, and DDR auto-negotiation enabled */
+ ++dd->ipath_autoneg_tries;
+ ipath_dbg("DDR negotiation try, %u/%u\n",
+ dd->ipath_autoneg_tries,
+ IPATH_AUTONEG_TRIES);
+ try_auto_neg(dd);
+ ret = 1; /* no other IB status change processing */
+ } else if ((dd->ipath_flags & IPATH_IB_AUTONEG_INPROG)
+ && dd->ipath_link_speed_active == IPATH_IB_SDR) {
+ ipath_autoneg_send(dd, 1);
+ set_speed_fast(dd, IPATH_IB_DDR);
+ udelay(2);
+ ipath_toggle_rclkrls(dd);
+ ret = 1; /* no other IB status change processing */
+ } else {
+ if ((dd->ipath_flags & IPATH_IB_AUTONEG_INPROG) &&
+ (dd->ipath_link_speed_active & IPATH_IB_DDR)) {
+ ipath_dbg("Got to INIT with DDR autoneg\n");
+ dd->ipath_flags &= ~(IPATH_IB_AUTONEG_INPROG
+ | IPATH_IB_AUTONEG_FAILED);
+ dd->ipath_autoneg_tries = 0;
+ /* re-enable SDR, for next link down */
+ set_speed_fast(dd,
+ dd->ipath_link_speed_enabled);
+ wake_up(&dd->ipath_autoneg_wait);
+ } else if (dd->ipath_flags & IPATH_IB_AUTONEG_FAILED) {
+ /*
+ * clear autoneg failure flag, and do setup
+ * so we'll try next time link goes down and
+ * back to INIT (possibly connected to different
+ * device).
+ */
+ ipath_dbg("INIT %sDR after autoneg failure\n",
+ (dd->ipath_link_speed_active &
+ IPATH_IB_DDR) ? "D" : "S");
+ dd->ipath_flags &= ~IPATH_IB_AUTONEG_FAILED;
+ dd->ipath_ibcddrctrl |=
+ IBA7220_IBC_IBTA_1_2_MASK;
+ ipath_write_kreg(dd,
+ IPATH_KREG_OFFSET(IBNCModeCtrl), 0);
+ }
+ }
+ /*
+ * if we are in 1X, and are in autoneg width, it
+ * could be due to an xgxs problem, so if we haven't
+ * already tried, try twice to get to 4X; if we
+ * tried, and couldn't, report it, since it will
+ * probably not be what is desired.
+ */
+ if ((dd->ipath_link_width_enabled & (IB_WIDTH_1X |
+ IB_WIDTH_4X)) == (IB_WIDTH_1X | IB_WIDTH_4X)
+ && dd->ipath_link_width_active == IB_WIDTH_1X
+ && dd->ipath_x1_fix_tries < 3) {
+ if (++dd->ipath_x1_fix_tries == 3)
+ dev_info(&dd->pcidev->dev,
+ "IB link is in 1X mode\n");
+ else {
+ ipath_cdbg(VERBOSE, "IB 1X in "
+ "auto-width, try %u to be "
+ "sure it's really 1X; "
+ "ltstate %u\n",
+ dd->ipath_x1_fix_tries,
+ ltstate);
+ dd->ipath_f_xgxs_reset(dd);
+ ret = 1; /* skip other processing */
+ }
+ }
+
+ if (!ret) {
+ dd->delay_mult = rate_to_delay
+ [(ibcs >> IBA7220_IBCS_LINKSPEED_SHIFT) & 1]
+ [(ibcs >> IBA7220_IBCS_LINKWIDTH_SHIFT) & 1];
+
+ ipath_set_relock_poll(dd, ibup);
+ }
+ }
+
+ if (!ret)
+ ipath_setup_7220_setextled(dd, ipath_ib_linkstate(dd, ibcs),
+ ltstate);
+ return ret;
+}
+
+
+/*
+ * Handle the empirically determined mechanism for auto-negotiation
+ * of DDR speed with switches.
+ */
+static void autoneg_work(struct work_struct *work)
+{
+ struct ipath_devdata *dd;
+ u64 startms;
+ u32 lastlts, i;
+
+ dd = container_of(work, struct ipath_devdata,
+ ipath_autoneg_work.work);
+
+ startms = jiffies_to_msecs(jiffies);
+
+ /*
+ * busy wait for this first part, it should be at most a
+ * few hundred usec, since we scheduled ourselves for 2msec.
+ */
+ for (i = 0; i < 25; i++) {
+ lastlts = ipath_ib_linktrstate(dd, dd->ipath_lastibcstat);
+ if (lastlts == INFINIPATH_IBCS_LT_STATE_POLLQUIET) {
+ ipath_set_linkstate(dd, IPATH_IB_LINKDOWN_DISABLE);
+ break;
+ }
+ udelay(100);
+ }
+
+ if (!(dd->ipath_flags & IPATH_IB_AUTONEG_INPROG))
+ goto done; /* we got there early or told to stop */
+
+ /* we expect this to timeout */
+ if (wait_event_timeout(dd->ipath_autoneg_wait,
+ !(dd->ipath_flags & IPATH_IB_AUTONEG_INPROG),
+ msecs_to_jiffies(90)))
+ goto done;
+
+ ipath_toggle_rclkrls(dd);
+
+ /* we expect this to timeout */
+ if (wait_event_timeout(dd->ipath_autoneg_wait,
+ !(dd->ipath_flags & IPATH_IB_AUTONEG_INPROG),
+ msecs_to_jiffies(1700)))
+ goto done;
+
+ set_speed_fast(dd, IPATH_IB_SDR);
+ ipath_toggle_rclkrls(dd);
+
+ /*
+ * wait up to 250 msec for link to train and get to INIT at DDR;
+ * this should terminate early
+ */
+ wait_event_timeout(dd->ipath_autoneg_wait,
+ !(dd->ipath_flags & IPATH_IB_AUTONEG_INPROG),
+ msecs_to_jiffies(250));
+done:
+ if (dd->ipath_flags & IPATH_IB_AUTONEG_INPROG) {
+ ipath_dbg("Did not get to DDR INIT (%x) after %Lu msecs\n",
+ ipath_ib_state(dd, dd->ipath_lastibcstat),
+ (unsigned long long) jiffies_to_msecs(jiffies)-startms);
+ dd->ipath_flags &= ~IPATH_IB_AUTONEG_INPROG;
+ if (dd->ipath_autoneg_tries == IPATH_AUTONEG_TRIES) {
+ dd->ipath_flags |= IPATH_IB_AUTONEG_FAILED;
+ ipath_dbg("Giving up on DDR until next IB "
+ "link Down\n");
+ dd->ipath_autoneg_tries = 0;
+ }
+ set_speed_fast(dd, dd->ipath_link_speed_enabled);
+ }
+}
+
+
+/**
+ * ipath_init_iba7220_funcs - set up the chip-specific function pointers
+ * @dd: the infinipath device
+ *
+ * This is global, and is called directly at init to set up the
+ * chip-specific function pointers for later use.
+ */
+void ipath_init_iba7220_funcs(struct ipath_devdata *dd)
+{
+ dd->ipath_f_intrsetup = ipath_7220_intconfig;
+ dd->ipath_f_bus = ipath_setup_7220_config;
+ dd->ipath_f_reset = ipath_setup_7220_reset;
+ dd->ipath_f_get_boardname = ipath_7220_boardname;
+ dd->ipath_f_init_hwerrors = ipath_7220_init_hwerrors;
+ dd->ipath_f_early_init = ipath_7220_early_init;
+ dd->ipath_f_handle_hwerrors = ipath_7220_handle_hwerrors;
+ dd->ipath_f_quiet_serdes = ipath_7220_quiet_serdes;
+ dd->ipath_f_bringup_serdes = ipath_7220_bringup_serdes;
+ dd->ipath_f_clear_tids = ipath_7220_clear_tids;
+ dd->ipath_f_put_tid = ipath_7220_put_tid;
+ dd->ipath_f_cleanup = ipath_setup_7220_cleanup;
+ dd->ipath_f_setextled = ipath_setup_7220_setextled;
+ dd->ipath_f_get_base_info = ipath_7220_get_base_info;
+ dd->ipath_f_free_irq = ipath_7220_free_irq;
+ dd->ipath_f_tidtemplate = ipath_7220_tidtemplate;
+ dd->ipath_f_intr_fallback = ipath_7220_intr_fallback;
+ dd->ipath_f_xgxs_reset = ipath_7220_xgxs_reset;
+ dd->ipath_f_get_ib_cfg = ipath_7220_get_ib_cfg;
+ dd->ipath_f_set_ib_cfg = ipath_7220_set_ib_cfg;
+ dd->ipath_f_config_jint = ipath_7220_config_jint;
+ dd->ipath_f_config_ports = ipath_7220_config_ports;
+ dd->ipath_f_read_counters = ipath_7220_read_counters;
+ dd->ipath_f_get_msgheader = ipath_7220_get_msgheader;
+ dd->ipath_f_ib_updown = ipath_7220_ib_updown;
+
+ /* initialize chip-specific variables */
+ ipath_init_7220_variables(dd);
+}
diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c
new file mode 100644
index 0000000..3e5baa4
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c
@@ -0,0 +1,1075 @@
+/*
+ * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/vmalloc.h>
+
+#include "ipath_kernel.h"
+#include "ipath_common.h"
+
+/*
+ * min buffers we want to have per port, after driver
+ */
+#define IPATH_MIN_USER_PORT_BUFCNT 7
+
+/*
+ * Number of ports we are configured to use (to allow for more pio
+ * buffers per port, etc.) Zero means use chip value.
+ */
+static ushort ipath_cfgports;
+
+module_param_named(cfgports, ipath_cfgports, ushort, S_IRUGO);
+MODULE_PARM_DESC(cfgports, "Set max number of ports to use");
+
+/*
+ * Number of buffers reserved for driver (verbs and layered drivers.)
+ * Initialized based on number of PIO buffers if not set via module interface.
+ * The problem with this is that it's global, but we'll use different
+ * numbers for different chip types.
+ */
+static ushort ipath_kpiobufs;
+
+static int ipath_set_kpiobufs(const char *val, struct kernel_param *kp);
+
+module_param_call(kpiobufs, ipath_set_kpiobufs, param_get_ushort,
+ &ipath_kpiobufs, S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(kpiobufs, "Set number of PIO buffers for driver");
+
+/**
+ * create_port0_egr - allocate the eager TID buffers
+ * @dd: the infinipath device
+ *
+ * This code is now quite different for user and kernel, because
+ * the kernel uses skb's, for the accelerated network performance.
+ * This is the kernel (port0) version.
+ *
+ * Allocate the eager TID buffers and program them into infinipath.
+ * We use the network layer alloc_skb() allocator to allocate the
+ * memory, and either use the buffers as is for things like verbs
+ * packets, or pass the buffers up to the ipath layered driver and
+ * thence the network layer, replacing them as we do so (see
+ * ipath_rcv_layer()).
+ */
+static int create_port0_egr(struct ipath_devdata *dd)
+{
+ unsigned e, egrcnt;
+ struct ipath_skbinfo *skbinfo;
+ int ret;
+
+ egrcnt = dd->ipath_p0_rcvegrcnt;
+
+ skbinfo = vmalloc(sizeof(*dd->ipath_port0_skbinfo) * egrcnt);
+ if (skbinfo == NULL) {
+ ipath_dev_err(dd, "allocation error for eager TID "
+ "skb array\n");
+ ret = -ENOMEM;
+ goto bail;
+ }
+ for (e = 0; e < egrcnt; e++) {
+ /*
+ * This is a bit tricky in that we allocate extra
+ * space for 2 bytes of the 14 byte ethernet header.
+ * These two bytes are passed in the ipath header so
+ * the rest of the data is word aligned. We allocate
+ * 4 bytes so that the data buffer stays word aligned.
+ * See ipath_kreceive() for more details.
+ */
+ skbinfo[e].skb = ipath_alloc_skb(dd, GFP_KERNEL);
+ if (!skbinfo[e].skb) {
+ ipath_dev_err(dd, "SKB allocation error for "
+ "eager TID %u\n", e);
+ while (e != 0)
+ dev_kfree_skb(skbinfo[--e].skb);
+ vfree(skbinfo);
+ ret = -ENOMEM;
+ goto bail;
+ }
+ }
+ /*
+ * After loop above, so we can test non-NULL to see if ready
+ * to use at receive, etc.
+ */
+ dd->ipath_port0_skbinfo = skbinfo;
+
+ for (e = 0; e < egrcnt; e++) {
+ dd->ipath_port0_skbinfo[e].phys =
+ ipath_map_single(dd->pcidev,
+ dd->ipath_port0_skbinfo[e].skb->data,
+ dd->ipath_ibmaxlen, PCI_DMA_FROMDEVICE);
+ dd->ipath_f_put_tid(dd, e + (u64 __iomem *)
+ ((char __iomem *) dd->ipath_kregbase +
+ dd->ipath_rcvegrbase),
+ RCVHQ_RCV_TYPE_EAGER,
+ dd->ipath_port0_skbinfo[e].phys);
+ }
+
+ ret = 0;
+
+bail:
+ return ret;
+}
+
+static int bringup_link(struct ipath_devdata *dd)
+{
+ u64 val, ibc;
+ int ret = 0;
+
+ /* hold IBC in reset */
+ dd->ipath_control &= ~INFINIPATH_C_LINKENABLE;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
+ dd->ipath_control);
+
+ /*
+ * set initial max size pkt IBC will send, including ICRC; it's the
+ * PIO buffer size in dwords, less 1; also see ipath_set_mtu()
+ */
+ val = (dd->ipath_ibmaxlen >> 2) + 1;
+ ibc = val << dd->ibcc_mpl_shift;
+
+ /* flowcontrolwatermark is in units of KBytes */
+ ibc |= 0x5ULL << INFINIPATH_IBCC_FLOWCTRLWATERMARK_SHIFT;
+ /*
+ * How often flowctrl sent. More or less in usecs; balance against
+ * watermark value, so that in theory senders always get a flow
+ * control update in time to not let the IB link go idle.
+ */
+ ibc |= 0x3ULL << INFINIPATH_IBCC_FLOWCTRLPERIOD_SHIFT;
+ /* max error tolerance */
+ ibc |= 0xfULL << INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT;
+ /* use "real" buffer space for */
+ ibc |= 4ULL << INFINIPATH_IBCC_CREDITSCALE_SHIFT;
+ /* IB credit flow control. */
+ ibc |= 0xfULL << INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT;
+ /* initially come up waiting for TS1, without sending anything. */
+ dd->ipath_ibcctrl = ibc;
+ /*
+ * Want to start out with both LINKCMD and LINKINITCMD in NOP
+ * (0 and 0). Don't put linkinitcmd in ipath_ibcctrl, want that
+ * to stay a NOP. Flag that we are disabled, for the (unlikely)
+ * case that some recovery path is trying to bring the link up
+ * before we are ready.
+ */
+ ibc |= INFINIPATH_IBCC_LINKINITCMD_DISABLE <<
+ INFINIPATH_IBCC_LINKINITCMD_SHIFT;
+ dd->ipath_flags |= IPATH_IB_LINK_DISABLED;
+ ipath_cdbg(VERBOSE, "Writing 0x%llx to ibcctrl\n",
+ (unsigned long long) ibc);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, ibc);
+
+ // be sure chip saw it
+ val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+
+ ret = dd->ipath_f_bringup_serdes(dd);
+
+ if (ret)
+ dev_info(&dd->pcidev->dev, "Could not initialize SerDes, "
+ "not usable\n");
+ else {
+ /* enable IBC */
+ dd->ipath_control |= INFINIPATH_C_LINKENABLE;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
+ dd->ipath_control);
+ }
+
+ return ret;
+}
+
+static struct ipath_portdata *create_portdata0(struct ipath_devdata *dd)
+{
+ struct ipath_portdata *pd = NULL;
+
+ pd = kzalloc(sizeof(*pd), GFP_KERNEL);
+ if (pd) {
+ pd->port_dd = dd;
+ pd->port_cnt = 1;
+ /* The port 0 pkey table is used by the layer interface. */
+ pd->port_pkeys[0] = IPATH_DEFAULT_P_KEY;
+ pd->port_seq_cnt = 1;
+ }
+ return pd;
+}
+
+static int init_chip_first(struct ipath_devdata *dd)
+{
+ struct ipath_portdata *pd;
+ int ret = 0;
+ u64 val;
+
+ spin_lock_init(&dd->ipath_kernel_tid_lock);
+ spin_lock_init(&dd->ipath_user_tid_lock);
+ spin_lock_init(&dd->ipath_sendctrl_lock);
+ spin_lock_init(&dd->ipath_sdma_lock);
+ spin_lock_init(&dd->ipath_gpio_lock);
+ spin_lock_init(&dd->ipath_eep_st_lock);
+ spin_lock_init(&dd->ipath_sdepb_lock);
+ mutex_init(&dd->ipath_eep_lock);
+
+ /*
+ * skip cfgports stuff because we are not allocating memory,
+ * and we don't want problems if the portcnt changed due to
+ * cfgports. We do still check and report a difference, if
+ * not same (should be impossible).
+ */
+ dd->ipath_f_config_ports(dd, ipath_cfgports);
+ if (!ipath_cfgports)
+ dd->ipath_cfgports = dd->ipath_portcnt;
+ else if (ipath_cfgports <= dd->ipath_portcnt) {
+ dd->ipath_cfgports = ipath_cfgports;
+ ipath_dbg("Configured to use %u ports out of %u in chip\n",
+ dd->ipath_cfgports, ipath_read_kreg32(dd,
+ dd->ipath_kregs->kr_portcnt));
+ } else {
+ dd->ipath_cfgports = dd->ipath_portcnt;
+ ipath_dbg("Tried to configured to use %u ports; chip "
+ "only supports %u\n", ipath_cfgports,
+ ipath_read_kreg32(dd,
+ dd->ipath_kregs->kr_portcnt));
+ }
+ /*
+ * Allocate full portcnt array, rather than just cfgports, because
+ * cleanup iterates across all possible ports.
+ */
+ dd->ipath_pd = kzalloc(sizeof(*dd->ipath_pd) * dd->ipath_portcnt,
+ GFP_KERNEL);
+
+ if (!dd->ipath_pd) {
+ ipath_dev_err(dd, "Unable to allocate portdata array, "
+ "failing\n");
+ ret = -ENOMEM;
+ goto done;
+ }
+
+ pd = create_portdata0(dd);
+ if (!pd) {
+ ipath_dev_err(dd, "Unable to allocate portdata for port "
+ "0, failing\n");
+ ret = -ENOMEM;
+ goto done;
+ }
+ dd->ipath_pd[0] = pd;
+
+ dd->ipath_rcvtidcnt =
+ ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvtidcnt);
+ dd->ipath_rcvtidbase =
+ ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvtidbase);
+ dd->ipath_rcvegrcnt =
+ ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvegrcnt);
+ dd->ipath_rcvegrbase =
+ ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvegrbase);
+ dd->ipath_palign =
+ ipath_read_kreg32(dd, dd->ipath_kregs->kr_pagealign);
+ dd->ipath_piobufbase =
+ ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendpiobufbase);
+ val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendpiosize);
+ dd->ipath_piosize2k = val & ~0U;
+ dd->ipath_piosize4k = val >> 32;
+ if (dd->ipath_piosize4k == 0 && ipath_mtu4096)
+ ipath_mtu4096 = 0; /* 4KB not supported by this chip */
+ dd->ipath_ibmtu = ipath_mtu4096 ? 4096 : 2048;
+ val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendpiobufcnt);
+ dd->ipath_piobcnt2k = val & ~0U;
+ dd->ipath_piobcnt4k = val >> 32;
+ dd->ipath_pio2kbase =
+ (u32 __iomem *) (((char __iomem *) dd->ipath_kregbase) +
+ (dd->ipath_piobufbase & 0xffffffff));
+ if (dd->ipath_piobcnt4k) {
+ dd->ipath_pio4kbase = (u32 __iomem *)
+ (((char __iomem *) dd->ipath_kregbase) +
+ (dd->ipath_piobufbase >> 32));
+ /*
+ * 4K buffers take 2 pages; we use roundup just to be
+ * paranoid; we calculate it once here, rather than on
+ * ever buf allocate
+ */
+ dd->ipath_4kalign = ALIGN(dd->ipath_piosize4k,
+ dd->ipath_palign);
+ ipath_dbg("%u 2k(%x) piobufs @ %p, %u 4k(%x) @ %p "
+ "(%x aligned)\n",
+ dd->ipath_piobcnt2k, dd->ipath_piosize2k,
+ dd->ipath_pio2kbase, dd->ipath_piobcnt4k,
+ dd->ipath_piosize4k, dd->ipath_pio4kbase,
+ dd->ipath_4kalign);
+ }
+ else ipath_dbg("%u 2k piobufs @ %p\n",
+ dd->ipath_piobcnt2k, dd->ipath_pio2kbase);
+
+done:
+ return ret;
+}
+
+/**
+ * init_chip_reset - re-initialize after a reset, or enable
+ * @dd: the infinipath device
+ *
+ * sanity check at least some of the values after reset, and
+ * ensure no receive or transmit (explictly, in case reset
+ * failed
+ */
+static int init_chip_reset(struct ipath_devdata *dd)
+{
+ u32 rtmp;
+ int i;
+ unsigned long flags;
+
+ /*
+ * ensure chip does no sends or receives, tail updates, or
+ * pioavail updates while we re-initialize
+ */
+ dd->ipath_rcvctrl &= ~(1ULL << dd->ipath_r_tailupd_shift);
+ for (i = 0; i < dd->ipath_portcnt; i++) {
+ clear_bit(dd->ipath_r_portenable_shift + i,
+ &dd->ipath_rcvctrl);
+ clear_bit(dd->ipath_r_intravail_shift + i,
+ &dd->ipath_rcvctrl);
+ }
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
+ dd->ipath_rcvctrl);
+
+ spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
+ dd->ipath_sendctrl = 0U; /* no sdma, etc */
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
+ ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+ spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
+
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_control, 0ULL);
+
+ rtmp = ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvtidcnt);
+ if (rtmp != dd->ipath_rcvtidcnt)
+ dev_info(&dd->pcidev->dev, "tidcnt was %u before "
+ "reset, now %u, using original\n",
+ dd->ipath_rcvtidcnt, rtmp);
+ rtmp = ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvtidbase);
+ if (rtmp != dd->ipath_rcvtidbase)
+ dev_info(&dd->pcidev->dev, "tidbase was %u before "
+ "reset, now %u, using original\n",
+ dd->ipath_rcvtidbase, rtmp);
+ rtmp = ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvegrcnt);
+ if (rtmp != dd->ipath_rcvegrcnt)
+ dev_info(&dd->pcidev->dev, "egrcnt was %u before "
+ "reset, now %u, using original\n",
+ dd->ipath_rcvegrcnt, rtmp);
+ rtmp = ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvegrbase);
+ if (rtmp != dd->ipath_rcvegrbase)
+ dev_info(&dd->pcidev->dev, "egrbase was %u before "
+ "reset, now %u, using original\n",
+ dd->ipath_rcvegrbase, rtmp);
+
+ return 0;
+}
+
+static int init_pioavailregs(struct ipath_devdata *dd)
+{
+ int ret;
+
+ dd->ipath_pioavailregs_dma = dma_alloc_coherent(
+ &dd->pcidev->dev, PAGE_SIZE, &dd->ipath_pioavailregs_phys,
+ GFP_KERNEL);
+ if (!dd->ipath_pioavailregs_dma) {
+ ipath_dev_err(dd, "failed to allocate PIOavail reg area "
+ "in memory\n");
+ ret = -ENOMEM;
+ goto done;
+ }
+
+ /*
+ * we really want L2 cache aligned, but for current CPUs of
+ * interest, they are the same.
+ */
+ dd->ipath_statusp = (u64 *)
+ ((char *)dd->ipath_pioavailregs_dma +
+ ((2 * L1_CACHE_BYTES +
+ dd->ipath_pioavregs * sizeof(u64)) & ~L1_CACHE_BYTES));
+ /* copy the current value now that it's really allocated */
+ *dd->ipath_statusp = dd->_ipath_status;
+ /*
+ * setup buffer to hold freeze msg, accessible to apps,
+ * following statusp
+ */
+ dd->ipath_freezemsg = (char *)&dd->ipath_statusp[1];
+ /* and its length */
+ dd->ipath_freezelen = L1_CACHE_BYTES - sizeof(dd->ipath_statusp[0]);
+
+ ret = 0;
+
+done:
+ return ret;
+}
+
+/**
+ * init_shadow_tids - allocate the shadow TID array
+ * @dd: the infinipath device
+ *
+ * allocate the shadow TID array, so we can ipath_munlock previous
+ * entries. It may make more sense to move the pageshadow to the
+ * port data structure, so we only allocate memory for ports actually
+ * in use, since we at 8k per port, now.
+ */
+static void init_shadow_tids(struct ipath_devdata *dd)
+{
+ struct page **pages;
+ dma_addr_t *addrs;
+
+ pages = vmalloc(dd->ipath_cfgports * dd->ipath_rcvtidcnt *
+ sizeof(struct page *));
+ if (!pages) {
+ ipath_dev_err(dd, "failed to allocate shadow page * "
+ "array, no expected sends!\n");
+ dd->ipath_pageshadow = NULL;
+ return;
+ }
+
+ addrs = vmalloc(dd->ipath_cfgports * dd->ipath_rcvtidcnt *
+ sizeof(dma_addr_t));
+ if (!addrs) {
+ ipath_dev_err(dd, "failed to allocate shadow dma handle "
+ "array, no expected sends!\n");
+ vfree(dd->ipath_pageshadow);
+ dd->ipath_pageshadow = NULL;
+ return;
+ }
+
+ memset(pages, 0, dd->ipath_cfgports * dd->ipath_rcvtidcnt *
+ sizeof(struct page *));
+
+ dd->ipath_pageshadow = pages;
+ dd->ipath_physshadow = addrs;
+}
+
+static void enable_chip(struct ipath_devdata *dd, int reinit)
+{
+ u32 val;
+ u64 rcvmask;
+ unsigned long flags;
+ int i;
+
+ if (!reinit)
+ init_waitqueue_head(&ipath_state_wait);
+
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
+ dd->ipath_rcvctrl);
+
+ spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
+ /* Enable PIO send, and update of PIOavail regs to memory. */
+ dd->ipath_sendctrl = INFINIPATH_S_PIOENABLE |
+ INFINIPATH_S_PIOBUFAVAILUPD;
+
+ /*
+ * Set the PIO avail update threshold to host memory
+ * on chips that support it.
+ */
+ if (dd->ipath_pioupd_thresh)
+ dd->ipath_sendctrl |= dd->ipath_pioupd_thresh
+ << INFINIPATH_S_UPDTHRESH_SHIFT;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
+ ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+ spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
+
+ /*
+ * Enable kernel ports' receive and receive interrupt.
+ * Other ports done as user opens and inits them.
+ */
+ rcvmask = 1ULL;
+ dd->ipath_rcvctrl |= (rcvmask << dd->ipath_r_portenable_shift) |
+ (rcvmask << dd->ipath_r_intravail_shift);
+ if (!(dd->ipath_flags & IPATH_NODMA_RTAIL))
+ dd->ipath_rcvctrl |= (1ULL << dd->ipath_r_tailupd_shift);
+
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
+ dd->ipath_rcvctrl);
+
+ /*
+ * now ready for use. this should be cleared whenever we
+ * detect a reset, or initiate one.
+ */
+ dd->ipath_flags |= IPATH_INITTED;
+
+ /*
+ * Init our shadow copies of head from tail values,
+ * and write head values to match.
+ */
+ val = ipath_read_ureg32(dd, ur_rcvegrindextail, 0);
+ ipath_write_ureg(dd, ur_rcvegrindexhead, val, 0);
+
+ /* Initialize so we interrupt on next packet received */
+ ipath_write_ureg(dd, ur_rcvhdrhead,
+ dd->ipath_rhdrhead_intr_off |
+ dd->ipath_pd[0]->port_head, 0);
+
+ /*
+ * by now pioavail updates to memory should have occurred, so
+ * copy them into our working/shadow registers; this is in
+ * case something went wrong with abort, but mostly to get the
+ * initial values of the generation bit correct.
+ */
+ for (i = 0; i < dd->ipath_pioavregs; i++) {
+ __le64 pioavail;
+
+ /*
+ * Chip Errata bug 6641; even and odd qwords>3 are swapped.
+ */
+ if (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS))
+ pioavail = dd->ipath_pioavailregs_dma[i ^ 1];
+ else
+ pioavail = dd->ipath_pioavailregs_dma[i];
+ /*
+ * don't need to worry about ipath_pioavailkernel here
+ * because we will call ipath_chg_pioavailkernel() later
+ * in initialization, to busy out buffers as needed
+ */
+ dd->ipath_pioavailshadow[i] = le64_to_cpu(pioavail);
+ }
+ /* can get counters, stats, etc. */
+ dd->ipath_flags |= IPATH_PRESENT;
+}
+
+static int init_housekeeping(struct ipath_devdata *dd, int reinit)
+{
+ char boardn[40];
+ int ret = 0;
+
+ /*
+ * have to clear shadow copies of registers at init that are
+ * not otherwise set here, or all kinds of bizarre things
+ * happen with driver on chip reset
+ */
+ dd->ipath_rcvhdrsize = 0;
+
+ /*
+ * Don't clear ipath_flags as 8bit mode was set before
+ * entering this func. However, we do set the linkstate to
+ * unknown, so we can watch for a transition.
+ * PRESENT is set because we want register reads to work,
+ * and the kernel infrastructure saw it in config space;
+ * We clear it if we have failures.
+ */
+ dd->ipath_flags |= IPATH_LINKUNK | IPATH_PRESENT;
+ dd->ipath_flags &= ~(IPATH_LINKACTIVE | IPATH_LINKARMED |
+ IPATH_LINKDOWN | IPATH_LINKINIT);
+
+ ipath_cdbg(VERBOSE, "Try to read spc chip revision\n");
+ dd->ipath_revision =
+ ipath_read_kreg64(dd, dd->ipath_kregs->kr_revision);
+
+ /*
+ * set up fundamental info we need to use the chip; we assume
+ * if the revision reg and these regs are OK, we don't need to
+ * special case the rest
+ */
+ dd->ipath_sregbase =
+ ipath_read_kreg32(dd, dd->ipath_kregs->kr_sendregbase);
+ dd->ipath_cregbase =
+ ipath_read_kreg32(dd, dd->ipath_kregs->kr_counterregbase);
+ dd->ipath_uregbase =
+ ipath_read_kreg32(dd, dd->ipath_kregs->kr_userregbase);
+ ipath_cdbg(VERBOSE, "ipath_kregbase %p, sendbase %x usrbase %x, "
+ "cntrbase %x\n", dd->ipath_kregbase, dd->ipath_sregbase,
+ dd->ipath_uregbase, dd->ipath_cregbase);
+ if ((dd->ipath_revision & 0xffffffff) == 0xffffffff
+ || (dd->ipath_sregbase & 0xffffffff) == 0xffffffff
+ || (dd->ipath_cregbase & 0xffffffff) == 0xffffffff
+ || (dd->ipath_uregbase & 0xffffffff) == 0xffffffff) {
+ ipath_dev_err(dd, "Register read failures from chip, "
+ "giving up initialization\n");
+ dd->ipath_flags &= ~IPATH_PRESENT;
+ ret = -ENODEV;
+ goto done;
+ }
+
+
+ /* clear diagctrl register, in case diags were running and crashed */
+ ipath_write_kreg (dd, dd->ipath_kregs->kr_hwdiagctrl, 0);
+
+ /* clear the initial reset flag, in case first driver load */
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear,
+ INFINIPATH_E_RESET);
+
+ ipath_cdbg(VERBOSE, "Revision %llx (PCI %x)\n",
+ (unsigned long long) dd->ipath_revision,
+ dd->ipath_pcirev);
+
+ if (((dd->ipath_revision >> INFINIPATH_R_SOFTWARE_SHIFT) &
+ INFINIPATH_R_SOFTWARE_MASK) != IPATH_CHIP_SWVERSION) {
+ ipath_dev_err(dd, "Driver only handles version %d, "
+ "chip swversion is %d (%llx), failng\n",
+ IPATH_CHIP_SWVERSION,
+ (int)(dd->ipath_revision >>
+ INFINIPATH_R_SOFTWARE_SHIFT) &
+ INFINIPATH_R_SOFTWARE_MASK,
+ (unsigned long long) dd->ipath_revision);
+ ret = -ENOSYS;
+ goto done;
+ }
+ dd->ipath_majrev = (u8) ((dd->ipath_revision >>
+ INFINIPATH_R_CHIPREVMAJOR_SHIFT) &
+ INFINIPATH_R_CHIPREVMAJOR_MASK);
+ dd->ipath_minrev = (u8) ((dd->ipath_revision >>
+ INFINIPATH_R_CHIPREVMINOR_SHIFT) &
+ INFINIPATH_R_CHIPREVMINOR_MASK);
+ dd->ipath_boardrev = (u8) ((dd->ipath_revision >>
+ INFINIPATH_R_BOARDID_SHIFT) &
+ INFINIPATH_R_BOARDID_MASK);
+
+ ret = dd->ipath_f_get_boardname(dd, boardn, sizeof boardn);
+
+ snprintf(dd->ipath_boardversion, sizeof(dd->ipath_boardversion),
+ "ChipABI %u.%u, %s, InfiniPath%u %u.%u, PCI %u, "
+ "SW Compat %u\n",
+ IPATH_CHIP_VERS_MAJ, IPATH_CHIP_VERS_MIN, boardn,
+ (unsigned)(dd->ipath_revision >> INFINIPATH_R_ARCH_SHIFT) &
+ INFINIPATH_R_ARCH_MASK,
+ dd->ipath_majrev, dd->ipath_minrev, dd->ipath_pcirev,
+ (unsigned)(dd->ipath_revision >>
+ INFINIPATH_R_SOFTWARE_SHIFT) &
+ INFINIPATH_R_SOFTWARE_MASK);
+
+ ipath_dbg("%s", dd->ipath_boardversion);
+
+ if (ret)
+ goto done;
+
+ if (reinit)
+ ret = init_chip_reset(dd);
+ else
+ ret = init_chip_first(dd);
+
+done:
+ return ret;
+}
+
+static void verify_interrupt(unsigned long opaque)
+{
+ struct ipath_devdata *dd = (struct ipath_devdata *) opaque;
+
+ if (!dd)
+ return; /* being torn down */
+
+ /*
+ * If we don't have any interrupts, let the user know and
+ * don't bother checking again.
+ */
+ if (dd->ipath_int_counter == 0) {
+ if (!dd->ipath_f_intr_fallback(dd))
+ dev_err(&dd->pcidev->dev, "No interrupts detected, "
+ "not usable.\n");
+ else /* re-arm the timer to see if fallback works */
+ mod_timer(&dd->ipath_intrchk_timer, jiffies + HZ/2);
+ } else
+ ipath_cdbg(VERBOSE, "%u interrupts at timer check\n",
+ dd->ipath_int_counter);
+}
+
+/**
+ * ipath_init_chip - do the actual initialization sequence on the chip
+ * @dd: the infinipath device
+ * @reinit: reinitializing, so don't allocate new memory
+ *
+ * Do the actual initialization sequence on the chip. This is done
+ * both from the init routine called from the PCI infrastructure, and
+ * when we reset the chip, or detect that it was reset internally,
+ * or it's administratively re-enabled.
+ *
+ * Memory allocation here and in called routines is only done in
+ * the first case (reinit == 0). We have to be careful, because even
+ * without memory allocation, we need to re-write all the chip registers
+ * TIDs, etc. after the reset or enable has completed.
+ */
+int ipath_init_chip(struct ipath_devdata *dd, int reinit)
+{
+ int ret = 0;
+ u32 kpiobufs, defkbufs;
+ u32 piobufs, uports;
+ u64 val;
+ struct ipath_portdata *pd;
+ gfp_t gfp_flags = GFP_USER | __GFP_COMP;
+
+ ret = init_housekeeping(dd, reinit);
+ if (ret)
+ goto done;
+
+ /*
+ * we ignore most issues after reporting them, but have to specially
+ * handle hardware-disabled chips.
+ */
+ if (ret == 2) {
+ /* unique error, known to ipath_init_one */
+ ret = -EPERM;
+ goto done;
+ }
+
+ /*
+ * We could bump this to allow for full rcvegrcnt + rcvtidcnt,
+ * but then it no longer nicely fits power of two, and since
+ * we now use routines that backend onto __get_free_pages, the
+ * rest would be wasted.
+ */
+ dd->ipath_rcvhdrcnt = max(dd->ipath_p0_rcvegrcnt, dd->ipath_rcvegrcnt);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvhdrcnt,
+ dd->ipath_rcvhdrcnt);
+
+ /*
+ * Set up the shadow copies of the piobufavail registers,
+ * which we compare against the chip registers for now, and
+ * the in memory DMA'ed copies of the registers. This has to
+ * be done early, before we calculate lastport, etc.
+ */
+ piobufs = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
+ /*
+ * calc number of pioavail registers, and save it; we have 2
+ * bits per buffer.
+ */
+ dd->ipath_pioavregs = ALIGN(piobufs, sizeof(u64) * BITS_PER_BYTE / 2)
+ / (sizeof(u64) * BITS_PER_BYTE / 2);
+ uports = dd->ipath_cfgports ? dd->ipath_cfgports - 1 : 0;
+ if (piobufs > 144)
+ defkbufs = 32 + dd->ipath_pioreserved;
+ else
+ defkbufs = 16 + dd->ipath_pioreserved;
+
+ if (ipath_kpiobufs && (ipath_kpiobufs +
+ (uports * IPATH_MIN_USER_PORT_BUFCNT)) > piobufs) {
+ int i = (int) piobufs -
+ (int) (uports * IPATH_MIN_USER_PORT_BUFCNT);
+ if (i < 1)
+ i = 1;
+ dev_info(&dd->pcidev->dev, "Allocating %d PIO bufs of "
+ "%d for kernel leaves too few for %d user ports "
+ "(%d each); using %u\n", ipath_kpiobufs,
+ piobufs, uports, IPATH_MIN_USER_PORT_BUFCNT, i);
+ /*
+ * shouldn't change ipath_kpiobufs, because could be
+ * different for different devices...
+ */
+ kpiobufs = i;
+ } else if (ipath_kpiobufs)
+ kpiobufs = ipath_kpiobufs;
+ else
+ kpiobufs = defkbufs;
+ dd->ipath_lastport_piobuf = piobufs - kpiobufs;
+ dd->ipath_pbufsport =
+ uports ? dd->ipath_lastport_piobuf / uports : 0;
+ /* if not an even divisor, some user ports get extra buffers */
+ dd->ipath_ports_extrabuf = dd->ipath_lastport_piobuf -
+ (dd->ipath_pbufsport * uports);
+ if (dd->ipath_ports_extrabuf)
+ ipath_dbg("%u pbufs/port leaves some unused, add 1 buffer to "
+ "ports <= %u\n", dd->ipath_pbufsport,
+ dd->ipath_ports_extrabuf);
+ dd->ipath_lastpioindex = 0;
+ dd->ipath_lastpioindexl = dd->ipath_piobcnt2k;
+ /* ipath_pioavailshadow initialized earlier */
+ ipath_cdbg(VERBOSE, "%d PIO bufs for kernel out of %d total %u "
+ "each for %u user ports\n", kpiobufs,
+ piobufs, dd->ipath_pbufsport, uports);
+ ret = dd->ipath_f_early_init(dd);
+ if (ret) {
+ ipath_dev_err(dd, "Early initialization failure\n");
+ goto done;
+ }
+
+ /*
+ * Early_init sets rcvhdrentsize and rcvhdrsize, so this must be
+ * done after early_init.
+ */
+ dd->ipath_hdrqlast =
+ dd->ipath_rcvhdrentsize * (dd->ipath_rcvhdrcnt - 1);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvhdrentsize,
+ dd->ipath_rcvhdrentsize);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvhdrsize,
+ dd->ipath_rcvhdrsize);
+
+ if (!reinit) {
+ ret = init_pioavailregs(dd);
+ init_shadow_tids(dd);
+ if (ret)
+ goto done;
+ }
+
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_sendpioavailaddr,
+ dd->ipath_pioavailregs_phys);
+
+ /*
+ * this is to detect s/w errors, which the h/w works around by
+ * ignoring the low 6 bits of address, if it wasn't aligned.
+ */
+ val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendpioavailaddr);
+ if (val != dd->ipath_pioavailregs_phys) {
+ ipath_dev_err(dd, "Catastrophic software error, "
+ "SendPIOAvailAddr written as %lx, "
+ "read back as %llx\n",
+ (unsigned long) dd->ipath_pioavailregs_phys,
+ (unsigned long long) val);
+ ret = -EINVAL;
+ goto done;
+ }
+
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvbthqp, IPATH_KD_QP);
+
+ /*
+ * make sure we are not in freeze, and PIO send enabled, so
+ * writes to pbc happen
+ */
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask, 0ULL);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear,
+ ~0ULL&~INFINIPATH_HWE_MEMBISTFAILED);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_control, 0ULL);
+
+ /*
+ * before error clears, since we expect serdes pll errors during
+ * this, the first time after reset
+ */
+ if (bringup_link(dd)) {
+ dev_info(&dd->pcidev->dev, "Failed to bringup IB link\n");
+ ret = -ENETDOWN;
+ goto done;
+ }
+
+ /*
+ * clear any "expected" hwerrs from reset and/or initialization
+ * clear any that aren't enabled (at least this once), and then
+ * set the enable mask
+ */
+ dd->ipath_f_init_hwerrors(dd);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear,
+ ~0ULL&~INFINIPATH_HWE_MEMBISTFAILED);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
+ dd->ipath_hwerrmask);
+
+ /* clear all */
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, -1LL);
+ /* enable errors that are masked, at least this first time. */
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
+ ~dd->ipath_maskederrs);
+ dd->ipath_maskederrs = 0; /* don't re-enable ignored in timer */
+ dd->ipath_errormask =
+ ipath_read_kreg64(dd, dd->ipath_kregs->kr_errormask);
+ /* clear any interrupts up to this point (ints still not enabled) */
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, -1LL);
+
+ dd->ipath_f_tidtemplate(dd);
+
+ /*
+ * Set up the port 0 (kernel) rcvhdr q and egr TIDs. If doing
+ * re-init, the simplest way to handle this is to free
+ * existing, and re-allocate.
+ * Need to re-create rest of port 0 portdata as well.
+ */
+ pd = dd->ipath_pd[0];
+ if (reinit) {
+ struct ipath_portdata *npd;
+
+ /*
+ * Alloc and init new ipath_portdata for port0,
+ * Then free old pd. Could lead to fragmentation, but also
+ * makes later support for hot-swap easier.
+ */
+ npd = create_portdata0(dd);
+ if (npd) {
+ ipath_free_pddata(dd, pd);
+ dd->ipath_pd[0] = npd;
+ pd = npd;
+ } else {
+ ipath_dev_err(dd, "Unable to allocate portdata"
+ " for port 0, failing\n");
+ ret = -ENOMEM;
+ goto done;
+ }
+ }
+ ret = ipath_create_rcvhdrq(dd, pd);
+ if (!ret)
+ ret = create_port0_egr(dd);
+ if (ret) {
+ ipath_dev_err(dd, "failed to allocate kernel port's "
+ "rcvhdrq and/or egr bufs\n");
+ goto done;
+ }
+ else
+ enable_chip(dd, reinit);
+
+ /* after enable_chip, so pioavailshadow setup */
+ ipath_chg_pioavailkernel(dd, 0, piobufs, 1);
+
+ /*
+ * Cancel any possible active sends from early driver load.
+ * Follows early_init because some chips have to initialize
+ * PIO buffers in early_init to avoid false parity errors.
+ * After enable and ipath_chg_pioavailkernel so we can safely
+ * enable pioavail updates and PIOENABLE; packets are now
+ * ready to go out.
+ */
+ ipath_cancel_sends(dd, 1);
+
+ if (!reinit) {
+ /*
+ * Used when we close a port, for DMA already in flight
+ * at close.
+ */
+ dd->ipath_dummy_hdrq = dma_alloc_coherent(
+ &dd->pcidev->dev, dd->ipath_pd[0]->port_rcvhdrq_size,
+ &dd->ipath_dummy_hdrq_phys,
+ gfp_flags);
+ if (!dd->ipath_dummy_hdrq) {
+ dev_info(&dd->pcidev->dev,
+ "Couldn't allocate 0x%lx bytes for dummy hdrq\n",
+ dd->ipath_pd[0]->port_rcvhdrq_size);
+ /* fallback to just 0'ing */
+ dd->ipath_dummy_hdrq_phys = 0UL;
+ }
+ }
+
+ /*
+ * cause retrigger of pending interrupts ignored during init,
+ * even if we had errors
+ */
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, 0ULL);
+
+ if (!dd->ipath_stats_timer_active) {
+ /*
+ * first init, or after an admin disable/enable
+ * set up stats retrieval timer, even if we had errors
+ * in last portion of setup
+ */
+ init_timer(&dd->ipath_stats_timer);
+ dd->ipath_stats_timer.function = ipath_get_faststats;
+ dd->ipath_stats_timer.data = (unsigned long) dd;
+ /* every 5 seconds; */
+ dd->ipath_stats_timer.expires = jiffies + 5 * HZ;
+ /* takes ~16 seconds to overflow at full IB 4x bandwdith */
+ add_timer(&dd->ipath_stats_timer);
+ dd->ipath_stats_timer_active = 1;
+ }
+
+ /* Set up SendDMA if chip supports it */
+ if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
+ ret = setup_sdma(dd);
+
+ /* Set up HoL state */
+ init_timer(&dd->ipath_hol_timer);
+ dd->ipath_hol_timer.function = ipath_hol_event;
+ dd->ipath_hol_timer.data = (unsigned long)dd;
+ dd->ipath_hol_state = IPATH_HOL_UP;
+
+done:
+ if (!ret) {
+ *dd->ipath_statusp |= IPATH_STATUS_CHIP_PRESENT;
+ if (!dd->ipath_f_intrsetup(dd)) {
+ /* now we can enable all interrupts from the chip */
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask,
+ -1LL);
+ /* force re-interrupt of any pending interrupts. */
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear,
+ 0ULL);
+ /* chip is usable; mark it as initialized */
+ *dd->ipath_statusp |= IPATH_STATUS_INITTED;
+
+ /*
+ * setup to verify we get an interrupt, and fallback
+ * to an alternate if necessary and possible
+ */
+ if (!reinit) {
+ init_timer(&dd->ipath_intrchk_timer);
+ dd->ipath_intrchk_timer.function =
+ verify_interrupt;
+ dd->ipath_intrchk_timer.data =
+ (unsigned long) dd;
+ }
+ dd->ipath_intrchk_timer.expires = jiffies + HZ/2;
+ add_timer(&dd->ipath_intrchk_timer);
+ } else
+ ipath_dev_err(dd, "No interrupts enabled, couldn't "
+ "setup interrupt address\n");
+
+ if (dd->ipath_cfgports > ipath_stats.sps_nports)
+ /*
+ * sps_nports is a global, so, we set it to
+ * the highest number of ports of any of the
+ * chips we find; we never decrement it, at
+ * least for now. Since this might have changed
+ * over disable/enable or prior to reset, always
+ * do the check and potentially adjust.
+ */
+ ipath_stats.sps_nports = dd->ipath_cfgports;
+ } else
+ ipath_dbg("Failed (%d) to initialize chip\n", ret);
+
+ /* if ret is non-zero, we probably should do some cleanup
+ here... */
+ return ret;
+}
+
+static int ipath_set_kpiobufs(const char *str, struct kernel_param *kp)
+{
+ struct ipath_devdata *dd;
+ unsigned long flags;
+ unsigned short val;
+ int ret;
+
+ ret = ipath_parse_ushort(str, &val);
+
+ spin_lock_irqsave(&ipath_devs_lock, flags);
+
+ if (ret < 0)
+ goto bail;
+
+ if (val == 0) {
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ list_for_each_entry(dd, &ipath_dev_list, ipath_list) {
+ if (dd->ipath_kregbase)
+ continue;
+ if (val > (dd->ipath_piobcnt2k + dd->ipath_piobcnt4k -
+ (dd->ipath_cfgports *
+ IPATH_MIN_USER_PORT_BUFCNT)))
+ {
+ ipath_dev_err(
+ dd,
+ "Allocating %d PIO bufs for kernel leaves "
+ "too few for %d user ports (%d each)\n",
+ val, dd->ipath_cfgports - 1,
+ IPATH_MIN_USER_PORT_BUFCNT);
+ ret = -EINVAL;
+ goto bail;
+ }
+ dd->ipath_lastport_piobuf =
+ dd->ipath_piobcnt2k + dd->ipath_piobcnt4k - val;
+ }
+
+ ipath_kpiobufs = val;
+ ret = 0;
+bail:
+ spin_unlock_irqrestore(&ipath_devs_lock, flags);
+
+ return ret;
+}
diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c
new file mode 100644
index 0000000..6c21b4b
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_intr.c
@@ -0,0 +1,1273 @@
+/*
+ * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/pci.h>
+#include <linux/delay.h>
+
+#include "ipath_kernel.h"
+#include "ipath_verbs.h"
+#include "ipath_common.h"
+
+
+/*
+ * Called when we might have an error that is specific to a particular
+ * PIO buffer, and may need to cancel that buffer, so it can be re-used.
+ */
+void ipath_disarm_senderrbufs(struct ipath_devdata *dd)
+{
+ u32 piobcnt;
+ unsigned long sbuf[4];
+ /*
+ * it's possible that sendbuffererror could have bits set; might
+ * have already done this as a result of hardware error handling
+ */
+ piobcnt = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
+ /* read these before writing errorclear */
+ sbuf[0] = ipath_read_kreg64(
+ dd, dd->ipath_kregs->kr_sendbuffererror);
+ sbuf[1] = ipath_read_kreg64(
+ dd, dd->ipath_kregs->kr_sendbuffererror + 1);
+ if (piobcnt > 128)
+ sbuf[2] = ipath_read_kreg64(
+ dd, dd->ipath_kregs->kr_sendbuffererror + 2);
+ if (piobcnt > 192)
+ sbuf[3] = ipath_read_kreg64(
+ dd, dd->ipath_kregs->kr_sendbuffererror + 3);
+ else
+ sbuf[3] = 0;
+
+ if (sbuf[0] || sbuf[1] || (piobcnt > 128 && (sbuf[2] || sbuf[3]))) {
+ int i;
+ if (ipath_debug & (__IPATH_PKTDBG|__IPATH_DBG) &&
+ dd->ipath_lastcancel > jiffies) {
+ __IPATH_DBG_WHICH(__IPATH_PKTDBG|__IPATH_DBG,
+ "SendbufErrs %lx %lx", sbuf[0],
+ sbuf[1]);
+ if (ipath_debug & __IPATH_PKTDBG && piobcnt > 128)
+ printk(" %lx %lx ", sbuf[2], sbuf[3]);
+ printk("\n");
+ }
+
+ for (i = 0; i < piobcnt; i++)
+ if (test_bit(i, sbuf))
+ ipath_disarm_piobufs(dd, i, 1);
+ /* ignore armlaunch errs for a bit */
+ dd->ipath_lastcancel = jiffies+3;
+ }
+}
+
+
+/* These are all rcv-related errors which we want to count for stats */
+#define E_SUM_PKTERRS \
+ (INFINIPATH_E_RHDRLEN | INFINIPATH_E_RBADTID | \
+ INFINIPATH_E_RBADVERSION | INFINIPATH_E_RHDR | \
+ INFINIPATH_E_RLONGPKTLEN | INFINIPATH_E_RSHORTPKTLEN | \
+ INFINIPATH_E_RMAXPKTLEN | INFINIPATH_E_RMINPKTLEN | \
+ INFINIPATH_E_RFORMATERR | INFINIPATH_E_RUNSUPVL | \
+ INFINIPATH_E_RUNEXPCHAR | INFINIPATH_E_REBP)
+
+/* These are all send-related errors which we want to count for stats */
+#define E_SUM_ERRS \
+ (INFINIPATH_E_SPIOARMLAUNCH | INFINIPATH_E_SUNEXPERRPKTNUM | \
+ INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_SDROPPEDSMPPKT | \
+ INFINIPATH_E_SMAXPKTLEN | INFINIPATH_E_SUNSUPVL | \
+ INFINIPATH_E_SMINPKTLEN | INFINIPATH_E_SPKTLEN | \
+ INFINIPATH_E_INVALIDADDR)
+
+/*
+ * this is similar to E_SUM_ERRS, but can't ignore armlaunch, don't ignore
+ * errors not related to freeze and cancelling buffers. Can't ignore
+ * armlaunch because could get more while still cleaning up, and need
+ * to cancel those as they happen.
+ */
+#define E_SPKT_ERRS_IGNORE \
+ (INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_SDROPPEDSMPPKT | \
+ INFINIPATH_E_SMAXPKTLEN | INFINIPATH_E_SMINPKTLEN | \
+ INFINIPATH_E_SPKTLEN)
+
+/*
+ * these are errors that can occur when the link changes state while
+ * a packet is being sent or received. This doesn't cover things
+ * like EBP or VCRC that can be the result of a sending having the
+ * link change state, so we receive a "known bad" packet.
+ */
+#define E_SUM_LINK_PKTERRS \
+ (INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_SDROPPEDSMPPKT | \
+ INFINIPATH_E_SMINPKTLEN | INFINIPATH_E_SPKTLEN | \
+ INFINIPATH_E_RSHORTPKTLEN | INFINIPATH_E_RMINPKTLEN | \
+ INFINIPATH_E_RUNEXPCHAR)
+
+static u64 handle_e_sum_errs(struct ipath_devdata *dd, ipath_err_t errs)
+{
+ u64 ignore_this_time = 0;
+
+ ipath_disarm_senderrbufs(dd);
+ if ((errs & E_SUM_LINK_PKTERRS) &&
+ !(dd->ipath_flags & IPATH_LINKACTIVE)) {
+ /*
+ * This can happen when SMA is trying to bring the link
+ * up, but the IB link changes state at the "wrong" time.
+ * The IB logic then complains that the packet isn't
+ * valid. We don't want to confuse people, so we just
+ * don't print them, except at debug
+ */
+ ipath_dbg("Ignoring packet errors %llx, because link not "
+ "ACTIVE\n", (unsigned long long) errs);
+ ignore_this_time = errs & E_SUM_LINK_PKTERRS;
+ }
+
+ return ignore_this_time;
+}
+
+/* generic hw error messages... */
+#define INFINIPATH_HWE_TXEMEMPARITYERR_MSG(a) \
+ { \
+ .mask = ( INFINIPATH_HWE_TXEMEMPARITYERR_##a << \
+ INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT ), \
+ .msg = "TXE " #a " Memory Parity" \
+ }
+#define INFINIPATH_HWE_RXEMEMPARITYERR_MSG(a) \
+ { \
+ .mask = ( INFINIPATH_HWE_RXEMEMPARITYERR_##a << \
+ INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT ), \
+ .msg = "RXE " #a " Memory Parity" \
+ }
+
+static const struct ipath_hwerror_msgs ipath_generic_hwerror_msgs[] = {
+ INFINIPATH_HWE_MSG(IBCBUSFRSPCPARITYERR, "IPATH2IB Parity"),
+ INFINIPATH_HWE_MSG(IBCBUSTOSPCPARITYERR, "IB2IPATH Parity"),
+
+ INFINIPATH_HWE_TXEMEMPARITYERR_MSG(PIOBUF),
+ INFINIPATH_HWE_TXEMEMPARITYERR_MSG(PIOPBC),
+ INFINIPATH_HWE_TXEMEMPARITYERR_MSG(PIOLAUNCHFIFO),
+
+ INFINIPATH_HWE_RXEMEMPARITYERR_MSG(RCVBUF),
+ INFINIPATH_HWE_RXEMEMPARITYERR_MSG(LOOKUPQ),
+ INFINIPATH_HWE_RXEMEMPARITYERR_MSG(EAGERTID),
+ INFINIPATH_HWE_RXEMEMPARITYERR_MSG(EXPTID),
+ INFINIPATH_HWE_RXEMEMPARITYERR_MSG(FLAGBUF),
+ INFINIPATH_HWE_RXEMEMPARITYERR_MSG(DATAINFO),
+ INFINIPATH_HWE_RXEMEMPARITYERR_MSG(HDRINFO),
+};
+
+/**
+ * ipath_format_hwmsg - format a single hwerror message
+ * @msg message buffer
+ * @msgl length of message buffer
+ * @hwmsg message to add to message buffer
+ */
+static void ipath_format_hwmsg(char *msg, size_t msgl, const char *hwmsg)
+{
+ strlcat(msg, "[", msgl);
+ strlcat(msg, hwmsg, msgl);
+ strlcat(msg, "]", msgl);
+}
+
+/**
+ * ipath_format_hwerrors - format hardware error messages for display
+ * @hwerrs hardware errors bit vector
+ * @hwerrmsgs hardware error descriptions
+ * @nhwerrmsgs number of hwerrmsgs
+ * @msg message buffer
+ * @msgl message buffer length
+ */
+void ipath_format_hwerrors(u64 hwerrs,
+ const struct ipath_hwerror_msgs *hwerrmsgs,
+ size_t nhwerrmsgs,
+ char *msg, size_t msgl)
+{
+ int i;
+ const int glen =
+ sizeof(ipath_generic_hwerror_msgs) /
+ sizeof(ipath_generic_hwerror_msgs[0]);
+
+ for (i=0; i<glen; i++) {
+ if (hwerrs & ipath_generic_hwerror_msgs[i].mask) {
+ ipath_format_hwmsg(msg, msgl,
+ ipath_generic_hwerror_msgs[i].msg);
+ }
+ }
+
+ for (i=0; i<nhwerrmsgs; i++) {
+ if (hwerrs & hwerrmsgs[i].mask) {
+ ipath_format_hwmsg(msg, msgl, hwerrmsgs[i].msg);
+ }
+ }
+}
+
+/* return the strings for the most common link states */
+static char *ib_linkstate(struct ipath_devdata *dd, u64 ibcs)
+{
+ char *ret;
+ u32 state;
+
+ state = ipath_ib_state(dd, ibcs);
+ if (state == dd->ib_init)
+ ret = "Init";
+ else if (state == dd->ib_arm)
+ ret = "Arm";
+ else if (state == dd->ib_active)
+ ret = "Active";
+ else
+ ret = "Down";
+ return ret;
+}
+
+void signal_ib_event(struct ipath_devdata *dd, enum ib_event_type ev)
+{
+ struct ib_event event;
+
+ event.device = &dd->verbs_dev->ibdev;
+ event.element.port_num = 1;
+ event.event = ev;
+ ib_dispatch_event(&event);
+}
+
+static void handle_e_ibstatuschanged(struct ipath_devdata *dd,
+ ipath_err_t errs)
+{
+ u32 ltstate, lstate, ibstate, lastlstate;
+ u32 init = dd->ib_init;
+ u32 arm = dd->ib_arm;
+ u32 active = dd->ib_active;
+ const u64 ibcs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);
+
+ lstate = ipath_ib_linkstate(dd, ibcs); /* linkstate */
+ ibstate = ipath_ib_state(dd, ibcs);
+ /* linkstate at last interrupt */
+ lastlstate = ipath_ib_linkstate(dd, dd->ipath_lastibcstat);
+ ltstate = ipath_ib_linktrstate(dd, ibcs); /* linktrainingtate */
+
+ /*
+ * Since going into a recovery state causes the link state to go
+ * down and since recovery is transitory, it is better if we "miss"
+ * ever seeing the link training state go into recovery (i.e.,
+ * ignore this transition for link state special handling purposes)
+ * without even updating ipath_lastibcstat.
+ */
+ if ((ltstate == INFINIPATH_IBCS_LT_STATE_RECOVERRETRAIN) ||
+ (ltstate == INFINIPATH_IBCS_LT_STATE_RECOVERWAITRMT) ||
+ (ltstate == INFINIPATH_IBCS_LT_STATE_RECOVERIDLE))
+ goto done;
+
+ /*
+ * if linkstate transitions into INIT from any of the various down
+ * states, or if it transitions from any of the up (INIT or better)
+ * states into any of the down states (except link recovery), then
+ * call the chip-specific code to take appropriate actions.
+ */
+ if (lstate >= INFINIPATH_IBCS_L_STATE_INIT &&
+ lastlstate == INFINIPATH_IBCS_L_STATE_DOWN) {
+ /* transitioned to UP */
+ if (dd->ipath_f_ib_updown(dd, 1, ibcs)) {
+ /* link came up, so we must no longer be disabled */
+ dd->ipath_flags &= ~IPATH_IB_LINK_DISABLED;
+ ipath_cdbg(LINKVERB, "LinkUp handled, skipped\n");
+ goto skip_ibchange; /* chip-code handled */
+ }
+ } else if ((lastlstate >= INFINIPATH_IBCS_L_STATE_INIT ||
+ (dd->ipath_flags & IPATH_IB_FORCE_NOTIFY)) &&
+ ltstate <= INFINIPATH_IBCS_LT_STATE_CFGWAITRMT &&
+ ltstate != INFINIPATH_IBCS_LT_STATE_LINKUP) {
+ int handled;
+ handled = dd->ipath_f_ib_updown(dd, 0, ibcs);
+ dd->ipath_flags &= ~IPATH_IB_FORCE_NOTIFY;
+ if (handled) {
+ ipath_cdbg(LINKVERB, "LinkDown handled, skipped\n");
+ goto skip_ibchange; /* chip-code handled */
+ }
+ }
+
+ /*
+ * Significant enough to always print and get into logs, if it was
+ * unexpected. If it was a requested state change, we'll have
+ * already cleared the flags, so we won't print this warning
+ */
+ if ((ibstate != arm && ibstate != active) &&
+ (dd->ipath_flags & (IPATH_LINKARMED | IPATH_LINKACTIVE))) {
+ dev_info(&dd->pcidev->dev, "Link state changed from %s "
+ "to %s\n", (dd->ipath_flags & IPATH_LINKARMED) ?
+ "ARM" : "ACTIVE", ib_linkstate(dd, ibcs));
+ }
+
+ if (ltstate == INFINIPATH_IBCS_LT_STATE_POLLACTIVE ||
+ ltstate == INFINIPATH_IBCS_LT_STATE_POLLQUIET) {
+ u32 lastlts;
+ lastlts = ipath_ib_linktrstate(dd, dd->ipath_lastibcstat);
+ /*
+ * Ignore cycling back and forth from Polling.Active to
+ * Polling.Quiet while waiting for the other end of the link
+ * to come up, except to try and decide if we are connected
+ * to a live IB device or not. We will cycle back and
+ * forth between them if no cable is plugged in, the other
+ * device is powered off or disabled, etc.
+ */
+ if (lastlts == INFINIPATH_IBCS_LT_STATE_POLLACTIVE ||
+ lastlts == INFINIPATH_IBCS_LT_STATE_POLLQUIET) {
+ if (!(dd->ipath_flags & IPATH_IB_AUTONEG_INPROG) &&
+ (++dd->ipath_ibpollcnt == 40)) {
+ dd->ipath_flags |= IPATH_NOCABLE;
+ *dd->ipath_statusp |=
+ IPATH_STATUS_IB_NOCABLE;
+ ipath_cdbg(LINKVERB, "Set NOCABLE\n");
+ }
+ ipath_cdbg(LINKVERB, "POLL change to %s (%x)\n",
+ ipath_ibcstatus_str[ltstate], ibstate);
+ goto skip_ibchange;
+ }
+ }
+
+ dd->ipath_ibpollcnt = 0; /* not poll*, now */
+ ipath_stats.sps_iblink++;
+
+ if (ibstate != init && dd->ipath_lastlinkrecov && ipath_linkrecovery) {
+ u64 linkrecov;
+ linkrecov = ipath_snap_cntr(dd,
+ dd->ipath_cregs->cr_iblinkerrrecovcnt);
+ if (linkrecov != dd->ipath_lastlinkrecov) {
+ ipath_dbg("IB linkrecov up %Lx (%s %s) recov %Lu\n",
+ (unsigned long long) ibcs,
+ ib_linkstate(dd, ibcs),
+ ipath_ibcstatus_str[ltstate],
+ (unsigned long long) linkrecov);
+ /* and no more until active again */
+ dd->ipath_lastlinkrecov = 0;
+ ipath_set_linkstate(dd, IPATH_IB_LINKDOWN);
+ goto skip_ibchange;
+ }
+ }
+
+ if (ibstate == init || ibstate == arm || ibstate == active) {
+ *dd->ipath_statusp &= ~IPATH_STATUS_IB_NOCABLE;
+ if (ibstate == init || ibstate == arm) {
+ *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
+ if (dd->ipath_flags & IPATH_LINKACTIVE)
+ signal_ib_event(dd, IB_EVENT_PORT_ERR);
+ }
+ if (ibstate == arm) {
+ dd->ipath_flags |= IPATH_LINKARMED;
+ dd->ipath_flags &= ~(IPATH_LINKUNK |
+ IPATH_LINKINIT | IPATH_LINKDOWN |
+ IPATH_LINKACTIVE | IPATH_NOCABLE);
+ ipath_hol_down(dd);
+ } else if (ibstate == init) {
+ /*
+ * set INIT and DOWN. Down is checked by
+ * most of the other code, but INIT is
+ * useful to know in a few places.
+ */
+ dd->ipath_flags |= IPATH_LINKINIT |
+ IPATH_LINKDOWN;
+ dd->ipath_flags &= ~(IPATH_LINKUNK |
+ IPATH_LINKARMED | IPATH_LINKACTIVE |
+ IPATH_NOCABLE);
+ ipath_hol_down(dd);
+ } else { /* active */
+ dd->ipath_lastlinkrecov = ipath_snap_cntr(dd,
+ dd->ipath_cregs->cr_iblinkerrrecovcnt);
+ *dd->ipath_statusp |=
+ IPATH_STATUS_IB_READY | IPATH_STATUS_IB_CONF;
+ dd->ipath_flags |= IPATH_LINKACTIVE;
+ dd->ipath_flags &= ~(IPATH_LINKUNK | IPATH_LINKINIT
+ | IPATH_LINKDOWN | IPATH_LINKARMED |
+ IPATH_NOCABLE);
+ if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
+ ipath_restart_sdma(dd);
+ signal_ib_event(dd, IB_EVENT_PORT_ACTIVE);
+ /* LED active not handled in chip _f_updown */
+ dd->ipath_f_setextled(dd, lstate, ltstate);
+ ipath_hol_up(dd);
+ }
+
+ /*
+ * print after we've already done the work, so as not to
+ * delay the state changes and notifications, for debugging
+ */
+ if (lstate == lastlstate)
+ ipath_cdbg(LINKVERB, "Unchanged from last: %s "
+ "(%x)\n", ib_linkstate(dd, ibcs), ibstate);
+ else
+ ipath_cdbg(VERBOSE, "Unit %u: link up to %s %s (%x)\n",
+ dd->ipath_unit, ib_linkstate(dd, ibcs),
+ ipath_ibcstatus_str[ltstate], ibstate);
+ } else { /* down */
+ if (dd->ipath_flags & IPATH_LINKACTIVE)
+ signal_ib_event(dd, IB_EVENT_PORT_ERR);
+ dd->ipath_flags |= IPATH_LINKDOWN;
+ dd->ipath_flags &= ~(IPATH_LINKUNK | IPATH_LINKINIT
+ | IPATH_LINKACTIVE |
+ IPATH_LINKARMED);
+ *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
+ dd->ipath_lli_counter = 0;
+
+ if (lastlstate != INFINIPATH_IBCS_L_STATE_DOWN)
+ ipath_cdbg(VERBOSE, "Unit %u link state down "
+ "(state 0x%x), from %s\n",
+ dd->ipath_unit, lstate,
+ ib_linkstate(dd, dd->ipath_lastibcstat));
+ else
+ ipath_cdbg(LINKVERB, "Unit %u link state changed "
+ "to %s (0x%x) from down (%x)\n",
+ dd->ipath_unit,
+ ipath_ibcstatus_str[ltstate],
+ ibstate, lastlstate);
+ }
+
+skip_ibchange:
+ dd->ipath_lastibcstat = ibcs;
+done:
+ return;
+}
+
+static void handle_supp_msgs(struct ipath_devdata *dd,
+ unsigned supp_msgs, char *msg, u32 msgsz)
+{
+ /*
+ * Print the message unless it's ibc status change only, which
+ * happens so often we never want to count it.
+ */
+ if (dd->ipath_lasterror & ~INFINIPATH_E_IBSTATUSCHANGED) {
+ int iserr;
+ ipath_err_t mask;
+ iserr = ipath_decode_err(dd, msg, msgsz,
+ dd->ipath_lasterror &
+ ~INFINIPATH_E_IBSTATUSCHANGED);
+
+ mask = INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL |
+ INFINIPATH_E_PKTERRS | INFINIPATH_E_SDMADISABLED;
+
+ /* if we're in debug, then don't mask SDMADISABLED msgs */
+ if (ipath_debug & __IPATH_DBG)
+ mask &= ~INFINIPATH_E_SDMADISABLED;
+
+ if (dd->ipath_lasterror & ~mask)
+ ipath_dev_err(dd, "Suppressed %u messages for "
+ "fast-repeating errors (%s) (%llx)\n",
+ supp_msgs, msg,
+ (unsigned long long)
+ dd->ipath_lasterror);
+ else {
+ /*
+ * rcvegrfull and rcvhdrqfull are "normal", for some
+ * types of processes (mostly benchmarks) that send
+ * huge numbers of messages, while not processing
+ * them. So only complain about these at debug
+ * level.
+ */
+ if (iserr)
+ ipath_dbg("Suppressed %u messages for %s\n",
+ supp_msgs, msg);
+ else
+ ipath_cdbg(ERRPKT,
+ "Suppressed %u messages for %s\n",
+ supp_msgs, msg);
+ }
+ }
+}
+
+static unsigned handle_frequent_errors(struct ipath_devdata *dd,
+ ipath_err_t errs, char *msg,
+ u32 msgsz, int *noprint)
+{
+ unsigned long nc;
+ static unsigned long nextmsg_time;
+ static unsigned nmsgs, supp_msgs;
+
+ /*
+ * Throttle back "fast" messages to no more than 10 per 5 seconds.
+ * This isn't perfect, but it's a reasonable heuristic. If we get
+ * more than 10, give a 6x longer delay.
+ */
+ nc = jiffies;
+ if (nmsgs > 10) {
+ if (time_before(nc, nextmsg_time)) {
+ *noprint = 1;
+ if (!supp_msgs++)
+ nextmsg_time = nc + HZ * 3;
+ }
+ else if (supp_msgs) {
+ handle_supp_msgs(dd, supp_msgs, msg, msgsz);
+ supp_msgs = 0;
+ nmsgs = 0;
+ }
+ }
+ else if (!nmsgs++ || time_after(nc, nextmsg_time))
+ nextmsg_time = nc + HZ / 2;
+
+ return supp_msgs;
+}
+
+static void handle_sdma_errors(struct ipath_devdata *dd, ipath_err_t errs)
+{
+ unsigned long flags;
+ int expected;
+
+ if (ipath_debug & __IPATH_DBG) {
+ char msg[128];
+ ipath_decode_err(dd, msg, sizeof msg, errs &
+ INFINIPATH_E_SDMAERRS);
+ ipath_dbg("errors %lx (%s)\n", (unsigned long)errs, msg);
+ }
+ if (ipath_debug & __IPATH_VERBDBG) {
+ unsigned long tl, hd, status, lengen;
+ tl = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmatail);
+ hd = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmahead);
+ status = ipath_read_kreg64(dd
+ , dd->ipath_kregs->kr_senddmastatus);
+ lengen = ipath_read_kreg64(dd,
+ dd->ipath_kregs->kr_senddmalengen);
+ ipath_cdbg(VERBOSE, "sdma tl 0x%lx hd 0x%lx status 0x%lx "
+ "lengen 0x%lx\n", tl, hd, status, lengen);
+ }
+
+ spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
+ __set_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status);
+ expected = test_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status);
+ spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
+ if (!expected)
+ ipath_cancel_sends(dd, 1);
+}
+
+static void handle_sdma_intr(struct ipath_devdata *dd, u64 istat)
+{
+ unsigned long flags;
+ int expected;
+
+ if ((istat & INFINIPATH_I_SDMAINT) &&
+ !test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
+ ipath_sdma_intr(dd);
+
+ if (istat & INFINIPATH_I_SDMADISABLED) {
+ expected = test_bit(IPATH_SDMA_ABORTING,
+ &dd->ipath_sdma_status);
+ ipath_dbg("%s SDmaDisabled intr\n",
+ expected ? "expected" : "unexpected");
+ spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
+ __set_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status);
+ spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
+ if (!expected)
+ ipath_cancel_sends(dd, 1);
+ if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
+ tasklet_hi_schedule(&dd->ipath_sdma_abort_task);
+ }
+}
+
+static int handle_hdrq_full(struct ipath_devdata *dd)
+{
+ int chkerrpkts = 0;
+ u32 hd, tl;
+ u32 i;
+
+ ipath_stats.sps_hdrqfull++;
+ for (i = 0; i < dd->ipath_cfgports; i++) {
+ struct ipath_portdata *pd = dd->ipath_pd[i];
+
+ if (i == 0) {
+ /*
+ * For kernel receive queues, we just want to know
+ * if there are packets in the queue that we can
+ * process.
+ */
+ if (pd->port_head != ipath_get_hdrqtail(pd))
+ chkerrpkts |= 1 << i;
+ continue;
+ }
+
+ /* Skip if user context is not open */
+ if (!pd || !pd->port_cnt)
+ continue;
+
+ /* Don't report the same point multiple times. */
+ if (dd->ipath_flags & IPATH_NODMA_RTAIL)
+ tl = ipath_read_ureg32(dd, ur_rcvhdrtail, i);
+ else
+ tl = ipath_get_rcvhdrtail(pd);
+ if (tl == pd->port_lastrcvhdrqtail)
+ continue;
+
+ hd = ipath_read_ureg32(dd, ur_rcvhdrhead, i);
+ if (hd == (tl + 1) || (!hd && tl == dd->ipath_hdrqlast)) {
+ pd->port_lastrcvhdrqtail = tl;
+ pd->port_hdrqfull++;
+ /* flush hdrqfull so that poll() sees it */
+ wmb();
+ wake_up_interruptible(&pd->port_wait);
+ }
+ }
+
+ return chkerrpkts;
+}
+
+static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
+{
+ char msg[128];
+ u64 ignore_this_time = 0;
+ u64 iserr = 0;
+ int chkerrpkts = 0, noprint = 0;
+ unsigned supp_msgs;
+ int log_idx;
+
+ /*
+ * don't report errors that are masked, either at init
+ * (not set in ipath_errormask), or temporarily (set in
+ * ipath_maskederrs)
+ */
+ errs &= dd->ipath_errormask & ~dd->ipath_maskederrs;
+
+ supp_msgs = handle_frequent_errors(dd, errs, msg, (u32)sizeof msg,
+ &noprint);
+
+ /* do these first, they are most important */
+ if (errs & INFINIPATH_E_HARDWARE) {
+ /* reuse same msg buf */
+ dd->ipath_f_handle_hwerrors(dd, msg, sizeof msg);
+ } else {
+ u64 mask;
+ for (log_idx = 0; log_idx < IPATH_EEP_LOG_CNT; ++log_idx) {
+ mask = dd->ipath_eep_st_masks[log_idx].errs_to_log;
+ if (errs & mask)
+ ipath_inc_eeprom_err(dd, log_idx, 1);
+ }
+ }
+
+ if (errs & INFINIPATH_E_SDMAERRS)
+ handle_sdma_errors(dd, errs);
+
+ if (!noprint && (errs & ~dd->ipath_e_bitsextant))
+ ipath_dev_err(dd, "error interrupt with unknown errors "
+ "%llx set\n", (unsigned long long)
+ (errs & ~dd->ipath_e_bitsextant));
+
+ if (errs & E_SUM_ERRS)
+ ignore_this_time = handle_e_sum_errs(dd, errs);
+ else if ((errs & E_SUM_LINK_PKTERRS) &&
+ !(dd->ipath_flags & IPATH_LINKACTIVE)) {
+ /*
+ * This can happen when SMA is trying to bring the link
+ * up, but the IB link changes state at the "wrong" time.
+ * The IB logic then complains that the packet isn't
+ * valid. We don't want to confuse people, so we just
+ * don't print them, except at debug
+ */
+ ipath_dbg("Ignoring packet errors %llx, because link not "
+ "ACTIVE\n", (unsigned long long) errs);
+ ignore_this_time = errs & E_SUM_LINK_PKTERRS;
+ }
+
+ if (supp_msgs == 250000) {
+ int s_iserr;
+ /*
+ * It's not entirely reasonable assuming that the errors set
+ * in the last clear period are all responsible for the
+ * problem, but the alternative is to assume it's the only
+ * ones on this particular interrupt, which also isn't great
+ */
+ dd->ipath_maskederrs |= dd->ipath_lasterror | errs;
+
+ dd->ipath_errormask &= ~dd->ipath_maskederrs;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
+ dd->ipath_errormask);
+ s_iserr = ipath_decode_err(dd, msg, sizeof msg,
+ dd->ipath_maskederrs);
+
+ if (dd->ipath_maskederrs &
+ ~(INFINIPATH_E_RRCVEGRFULL |
+ INFINIPATH_E_RRCVHDRFULL | INFINIPATH_E_PKTERRS))
+ ipath_dev_err(dd, "Temporarily disabling "
+ "error(s) %llx reporting; too frequent (%s)\n",
+ (unsigned long long) dd->ipath_maskederrs,
+ msg);
+ else {
+ /*
+ * rcvegrfull and rcvhdrqfull are "normal",
+ * for some types of processes (mostly benchmarks)
+ * that send huge numbers of messages, while not
+ * processing them. So only complain about
+ * these at debug level.
+ */
+ if (s_iserr)
+ ipath_dbg("Temporarily disabling reporting "
+ "too frequent queue full errors (%s)\n",
+ msg);
+ else
+ ipath_cdbg(ERRPKT,
+ "Temporarily disabling reporting too"
+ " frequent packet errors (%s)\n",
+ msg);
+ }
+
+ /*
+ * Re-enable the masked errors after around 3 minutes. in
+ * ipath_get_faststats(). If we have a series of fast
+ * repeating but different errors, the interval will keep
+ * stretching out, but that's OK, as that's pretty
+ * catastrophic.
+ */
+ dd->ipath_unmasktime = jiffies + HZ * 180;
+ }
+
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, errs);
+ if (ignore_this_time)
+ errs &= ~ignore_this_time;
+ if (errs & ~dd->ipath_lasterror) {
+ errs &= ~dd->ipath_lasterror;
+ /* never suppress duplicate hwerrors or ibstatuschange */
+ dd->ipath_lasterror |= errs &
+ ~(INFINIPATH_E_HARDWARE |
+ INFINIPATH_E_IBSTATUSCHANGED);
+ }
+
+ if (errs & INFINIPATH_E_SENDSPECIALTRIGGER) {
+ dd->ipath_spectriggerhit++;
+ ipath_dbg("%lu special trigger hits\n",
+ dd->ipath_spectriggerhit);
+ }
+
+ /* likely due to cancel; so suppress message unless verbose */
+ if ((errs & (INFINIPATH_E_SPKTLEN | INFINIPATH_E_SPIOARMLAUNCH)) &&
+ dd->ipath_lastcancel > jiffies) {
+ /* armlaunch takes precedence; it often causes both. */
+ ipath_cdbg(VERBOSE,
+ "Suppressed %s error (%llx) after sendbuf cancel\n",
+ (errs & INFINIPATH_E_SPIOARMLAUNCH) ?
+ "armlaunch" : "sendpktlen", (unsigned long long)errs);
+ errs &= ~(INFINIPATH_E_SPIOARMLAUNCH | INFINIPATH_E_SPKTLEN);
+ }
+
+ if (!errs)
+ return 0;
+
+ if (!noprint) {
+ ipath_err_t mask;
+ /*
+ * The ones we mask off are handled specially below
+ * or above. Also mask SDMADISABLED by default as it
+ * is too chatty.
+ */
+ mask = INFINIPATH_E_IBSTATUSCHANGED |
+ INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL |
+ INFINIPATH_E_HARDWARE | INFINIPATH_E_SDMADISABLED;
+
+ /* if we're in debug, then don't mask SDMADISABLED msgs */
+ if (ipath_debug & __IPATH_DBG)
+ mask &= ~INFINIPATH_E_SDMADISABLED;
+
+ ipath_decode_err(dd, msg, sizeof msg, errs & ~mask);
+ } else
+ /* so we don't need if (!noprint) at strlcat's below */
+ *msg = 0;
+
+ if (errs & E_SUM_PKTERRS) {
+ ipath_stats.sps_pkterrs++;
+ chkerrpkts = 1;
+ }
+ if (errs & E_SUM_ERRS)
+ ipath_stats.sps_errs++;
+
+ if (errs & (INFINIPATH_E_RICRC | INFINIPATH_E_RVCRC)) {
+ ipath_stats.sps_crcerrs++;
+ chkerrpkts = 1;
+ }
+ iserr = errs & ~(E_SUM_PKTERRS | INFINIPATH_E_PKTERRS);
+
+
+ /*
+ * We don't want to print these two as they happen, or we can make
+ * the situation even worse, because it takes so long to print
+ * messages to serial consoles. Kernel ports get printed from
+ * fast_stats, no more than every 5 seconds, user ports get printed
+ * on close
+ */
+ if (errs & INFINIPATH_E_RRCVHDRFULL)
+ chkerrpkts |= handle_hdrq_full(dd);
+ if (errs & INFINIPATH_E_RRCVEGRFULL) {
+ struct ipath_portdata *pd = dd->ipath_pd[0];
+
+ /*
+ * since this is of less importance and not likely to
+ * happen without also getting hdrfull, only count
+ * occurrences; don't check each port (or even the kernel
+ * vs user)
+ */
+ ipath_stats.sps_etidfull++;
+ if (pd->port_head != ipath_get_hdrqtail(pd))
+ chkerrpkts |= 1;
+ }
+
+ /*
+ * do this before IBSTATUSCHANGED, in case both bits set in a single
+ * interrupt; we want the STATUSCHANGE to "win", so we do our
+ * internal copy of state machine correctly
+ */
+ if (errs & INFINIPATH_E_RIBLOSTLINK) {
+ /*
+ * force through block below
+ */
+ errs |= INFINIPATH_E_IBSTATUSCHANGED;
+ ipath_stats.sps_iblink++;
+ dd->ipath_flags |= IPATH_LINKDOWN;
+ dd->ipath_flags &= ~(IPATH_LINKUNK | IPATH_LINKINIT
+ | IPATH_LINKARMED | IPATH_LINKACTIVE);
+ *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
+
+ ipath_dbg("Lost link, link now down (%s)\n",
+ ipath_ibcstatus_str[ipath_read_kreg64(dd,
+ dd->ipath_kregs->kr_ibcstatus) & 0xf]);
+ }
+ if (errs & INFINIPATH_E_IBSTATUSCHANGED)
+ handle_e_ibstatuschanged(dd, errs);
+
+ if (errs & INFINIPATH_E_RESET) {
+ if (!noprint)
+ ipath_dev_err(dd, "Got reset, requires re-init "
+ "(unload and reload driver)\n");
+ dd->ipath_flags &= ~IPATH_INITTED; /* needs re-init */
+ /* mark as having had error */
+ *dd->ipath_statusp |= IPATH_STATUS_HWERROR;
+ *dd->ipath_statusp &= ~IPATH_STATUS_IB_CONF;
+ }
+
+ if (!noprint && *msg) {
+ if (iserr)
+ ipath_dev_err(dd, "%s error\n", msg);
+ }
+ if (dd->ipath_state_wanted & dd->ipath_flags) {
+ ipath_cdbg(VERBOSE, "driver wanted state %x, iflags now %x, "
+ "waking\n", dd->ipath_state_wanted,
+ dd->ipath_flags);
+ wake_up_interruptible(&ipath_state_wait);
+ }
+
+ return chkerrpkts;
+}
+
+/*
+ * try to cleanup as much as possible for anything that might have gone
+ * wrong while in freeze mode, such as pio buffers being written by user
+ * processes (causing armlaunch), send errors due to going into freeze mode,
+ * etc., and try to avoid causing extra interrupts while doing so.
+ * Forcibly update the in-memory pioavail register copies after cleanup
+ * because the chip won't do it while in freeze mode (the register values
+ * themselves are kept correct).
+ * Make sure that we don't lose any important interrupts by using the chip
+ * feature that says that writing 0 to a bit in *clear that is set in
+ * *status will cause an interrupt to be generated again (if allowed by
+ * the *mask value).
+ */
+void ipath_clear_freeze(struct ipath_devdata *dd)
+{
+ /* disable error interrupts, to avoid confusion */
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, 0ULL);
+
+ /* also disable interrupts; errormask is sometimes overwriten */
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL);
+
+ ipath_cancel_sends(dd, 1);
+
+ /* clear the freeze, and be sure chip saw it */
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
+ dd->ipath_control);
+ ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+
+ /* force in-memory update now we are out of freeze */
+ ipath_force_pio_avail_update(dd);
+
+ /*
+ * force new interrupt if any hwerr, error or interrupt bits are
+ * still set, and clear "safe" send packet errors related to freeze
+ * and cancelling sends. Re-enable error interrupts before possible
+ * force of re-interrupt on pending interrupts.
+ */
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear, 0ULL);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear,
+ E_SPKT_ERRS_IGNORE);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
+ dd->ipath_errormask);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, -1LL);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, 0ULL);
+}
+
+
+/* this is separate to allow for better optimization of ipath_intr() */
+
+static noinline void ipath_bad_intr(struct ipath_devdata *dd, u32 *unexpectp)
+{
+ /*
+ * sometimes happen during driver init and unload, don't want
+ * to process any interrupts at that point
+ */
+
+ /* this is just a bandaid, not a fix, if something goes badly
+ * wrong */
+ if (++*unexpectp > 100) {
+ if (++*unexpectp > 105) {
+ /*
+ * ok, we must be taking somebody else's interrupts,
+ * due to a messed up mptable and/or PIRQ table, so
+ * unregister the interrupt. We've seen this during
+ * linuxbios development work, and it may happen in
+ * the future again.
+ */
+ if (dd->pcidev && dd->ipath_irq) {
+ ipath_dev_err(dd, "Now %u unexpected "
+ "interrupts, unregistering "
+ "interrupt handler\n",
+ *unexpectp);
+ ipath_dbg("free_irq of irq %d\n",
+ dd->ipath_irq);
+ dd->ipath_f_free_irq(dd);
+ }
+ }
+ if (ipath_read_ireg(dd, dd->ipath_kregs->kr_intmask)) {
+ ipath_dev_err(dd, "%u unexpected interrupts, "
+ "disabling interrupts completely\n",
+ *unexpectp);
+ /*
+ * disable all interrupts, something is very wrong
+ */
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask,
+ 0ULL);
+ }
+ } else if (*unexpectp > 1)
+ ipath_dbg("Interrupt when not ready, should not happen, "
+ "ignoring\n");
+}
+
+static noinline void ipath_bad_regread(struct ipath_devdata *dd)
+{
+ static int allbits;
+
+ /* separate routine, for better optimization of ipath_intr() */
+
+ /*
+ * We print the message and disable interrupts, in hope of
+ * having a better chance of debugging the problem.
+ */
+ ipath_dev_err(dd,
+ "Read of interrupt status failed (all bits set)\n");
+ if (allbits++) {
+ /* disable all interrupts, something is very wrong */
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL);
+ if (allbits == 2) {
+ ipath_dev_err(dd, "Still bad interrupt status, "
+ "unregistering interrupt\n");
+ dd->ipath_f_free_irq(dd);
+ } else if (allbits > 2) {
+ if ((allbits % 10000) == 0)
+ printk(".");
+ } else
+ ipath_dev_err(dd, "Disabling interrupts, "
+ "multiple errors\n");
+ }
+}
+
+static void handle_layer_pioavail(struct ipath_devdata *dd)
+{
+ unsigned long flags;
+ int ret;
+
+ ret = ipath_ib_piobufavail(dd->verbs_dev);
+ if (ret > 0)
+ goto set;
+
+ return;
+set:
+ spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
+ dd->ipath_sendctrl |= INFINIPATH_S_PIOINTBUFAVAIL;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
+ dd->ipath_sendctrl);
+ ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+ spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
+}
+
+/*
+ * Handle receive interrupts for user ports; this means a user
+ * process was waiting for a packet to arrive, and didn't want
+ * to poll
+ */
+static void handle_urcv(struct ipath_devdata *dd, u64 istat)
+{
+ u64 portr;
+ int i;
+ int rcvdint = 0;
+
+ /*
+ * test_and_clear_bit(IPATH_PORT_WAITING_RCV) and
+ * test_and_clear_bit(IPATH_PORT_WAITING_URG) below
+ * would both like timely updates of the bits so that
+ * we don't pass them by unnecessarily. the rmb()
+ * here ensures that we see them promptly -- the
+ * corresponding wmb()'s are in ipath_poll_urgent()
+ * and ipath_poll_next()...
+ */
+ rmb();
+ portr = ((istat >> dd->ipath_i_rcvavail_shift) &
+ dd->ipath_i_rcvavail_mask) |
+ ((istat >> dd->ipath_i_rcvurg_shift) &
+ dd->ipath_i_rcvurg_mask);
+ for (i = 1; i < dd->ipath_cfgports; i++) {
+ struct ipath_portdata *pd = dd->ipath_pd[i];
+
+ if (portr & (1 << i) && pd && pd->port_cnt) {
+ if (test_and_clear_bit(IPATH_PORT_WAITING_RCV,
+ &pd->port_flag)) {
+ clear_bit(i + dd->ipath_r_intravail_shift,
+ &dd->ipath_rcvctrl);
+ wake_up_interruptible(&pd->port_wait);
+ rcvdint = 1;
+ } else if (test_and_clear_bit(IPATH_PORT_WAITING_URG,
+ &pd->port_flag)) {
+ pd->port_urgent++;
+ wake_up_interruptible(&pd->port_wait);
+ }
+ }
+ }
+ if (rcvdint) {
+ /* only want to take one interrupt, so turn off the rcv
+ * interrupt for all the ports that we set the rcv_waiting
+ * (but never for kernel port)
+ */
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
+ dd->ipath_rcvctrl);
+ }
+}
+
+irqreturn_t ipath_intr(int irq, void *data)
+{
+ struct ipath_devdata *dd = data;
+ u64 istat, chk0rcv = 0;
+ ipath_err_t estat = 0;
+ irqreturn_t ret;
+ static unsigned unexpected = 0;
+ u64 kportrbits;
+
+ ipath_stats.sps_ints++;
+
+ if (dd->ipath_int_counter != (u32) -1)
+ dd->ipath_int_counter++;
+
+ if (!(dd->ipath_flags & IPATH_PRESENT)) {
+ /*
+ * This return value is not great, but we do not want the
+ * interrupt core code to remove our interrupt handler
+ * because we don't appear to be handling an interrupt
+ * during a chip reset.
+ */
+ return IRQ_HANDLED;
+ }
+
+ /*
+ * this needs to be flags&initted, not statusp, so we keep
+ * taking interrupts even after link goes down, etc.
+ * Also, we *must* clear the interrupt at some point, or we won't
+ * take it again, which can be real bad for errors, etc...
+ */
+
+ if (!(dd->ipath_flags & IPATH_INITTED)) {
+ ipath_bad_intr(dd, &unexpected);
+ ret = IRQ_NONE;
+ goto bail;
+ }
+
+ istat = ipath_read_ireg(dd, dd->ipath_kregs->kr_intstatus);
+
+ if (unlikely(!istat)) {
+ ipath_stats.sps_nullintr++;
+ ret = IRQ_NONE; /* not our interrupt, or already handled */
+ goto bail;
+ }
+ if (unlikely(istat == -1)) {
+ ipath_bad_regread(dd);
+ /* don't know if it was our interrupt or not */
+ ret = IRQ_NONE;
+ goto bail;
+ }
+
+ if (unexpected)
+ unexpected = 0;
+
+ if (unlikely(istat & ~dd->ipath_i_bitsextant))
+ ipath_dev_err(dd,
+ "interrupt with unknown interrupts %Lx set\n",
+ (unsigned long long)
+ istat & ~dd->ipath_i_bitsextant);
+ else if (istat & ~INFINIPATH_I_ERROR) /* errors do own printing */
+ ipath_cdbg(VERBOSE, "intr stat=0x%Lx\n",
+ (unsigned long long) istat);
+
+ if (istat & INFINIPATH_I_ERROR) {
+ ipath_stats.sps_errints++;
+ estat = ipath_read_kreg64(dd,
+ dd->ipath_kregs->kr_errorstatus);
+ if (!estat)
+ dev_info(&dd->pcidev->dev, "error interrupt (%Lx), "
+ "but no error bits set!\n",
+ (unsigned long long) istat);
+ else if (estat == -1LL)
+ /*
+ * should we try clearing all, or hope next read
+ * works?
+ */
+ ipath_dev_err(dd, "Read of error status failed "
+ "(all bits set); ignoring\n");
+ else
+ chk0rcv |= handle_errors(dd, estat);
+ }
+
+ if (istat & INFINIPATH_I_GPIO) {
+ /*
+ * GPIO interrupts fall in two broad classes:
+ * GPIO_2 indicates (on some HT4xx boards) that a packet
+ * has arrived for Port 0. Checking for this
+ * is controlled by flag IPATH_GPIO_INTR.
+ * GPIO_3..5 on IBA6120 Rev2 and IBA6110 Rev4 chips indicate
+ * errors that we need to count. Checking for this
+ * is controlled by flag IPATH_GPIO_ERRINTRS.
+ */
+ u32 gpiostatus;
+ u32 to_clear = 0;
+
+ gpiostatus = ipath_read_kreg32(
+ dd, dd->ipath_kregs->kr_gpio_status);
+ /* First the error-counter case. */
+ if ((gpiostatus & IPATH_GPIO_ERRINTR_MASK) &&
+ (dd->ipath_flags & IPATH_GPIO_ERRINTRS)) {
+ /* want to clear the bits we see asserted. */
+ to_clear |= (gpiostatus & IPATH_GPIO_ERRINTR_MASK);
+
+ /*
+ * Count appropriately, clear bits out of our copy,
+ * as they have been "handled".
+ */
+ if (gpiostatus & (1 << IPATH_GPIO_RXUVL_BIT)) {
+ ipath_dbg("FlowCtl on UnsupVL\n");
+ dd->ipath_rxfc_unsupvl_errs++;
+ }
+ if (gpiostatus & (1 << IPATH_GPIO_OVRUN_BIT)) {
+ ipath_dbg("Overrun Threshold exceeded\n");
+ dd->ipath_overrun_thresh_errs++;
+ }
+ if (gpiostatus & (1 << IPATH_GPIO_LLI_BIT)) {
+ ipath_dbg("Local Link Integrity error\n");
+ dd->ipath_lli_errs++;
+ }
+ gpiostatus &= ~IPATH_GPIO_ERRINTR_MASK;
+ }
+ /* Now the Port0 Receive case */
+ if ((gpiostatus & (1 << IPATH_GPIO_PORT0_BIT)) &&
+ (dd->ipath_flags & IPATH_GPIO_INTR)) {
+ /*
+ * GPIO status bit 2 is set, and we expected it.
+ * clear it and indicate in p0bits.
+ * This probably only happens if a Port0 pkt
+ * arrives at _just_ the wrong time, and we
+ * handle that by seting chk0rcv;
+ */
+ to_clear |= (1 << IPATH_GPIO_PORT0_BIT);
+ gpiostatus &= ~(1 << IPATH_GPIO_PORT0_BIT);
+ chk0rcv = 1;
+ }
+ if (gpiostatus) {
+ /*
+ * Some unexpected bits remain. If they could have
+ * caused the interrupt, complain and clear.
+ * To avoid repetition of this condition, also clear
+ * the mask. It is almost certainly due to error.
+ */
+ const u32 mask = (u32) dd->ipath_gpio_mask;
+
+ if (mask & gpiostatus) {
+ ipath_dbg("Unexpected GPIO IRQ bits %x\n",
+ gpiostatus & mask);
+ to_clear |= (gpiostatus & mask);
+ dd->ipath_gpio_mask &= ~(gpiostatus & mask);
+ ipath_write_kreg(dd,
+ dd->ipath_kregs->kr_gpio_mask,
+ dd->ipath_gpio_mask);
+ }
+ }
+ if (to_clear) {
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_clear,
+ (u64) to_clear);
+ }
+ }
+
+ /*
+ * Clear the interrupt bits we found set, unless they are receive
+ * related, in which case we already cleared them above, and don't
+ * want to clear them again, because we might lose an interrupt.
+ * Clear it early, so we "know" know the chip will have seen this by
+ * the time we process the queue, and will re-interrupt if necessary.
+ * The processor itself won't take the interrupt again until we return.
+ */
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, istat);
+
+ /*
+ * Handle kernel receive queues before checking for pio buffers
+ * available since receives can overflow; piobuf waiters can afford
+ * a few extra cycles, since they were waiting anyway, and user's
+ * waiting for receive are at the bottom.
+ */
+ kportrbits = (1ULL << dd->ipath_i_rcvavail_shift) |
+ (1ULL << dd->ipath_i_rcvurg_shift);
+ if (chk0rcv || (istat & kportrbits)) {
+ istat &= ~kportrbits;
+ ipath_kreceive(dd->ipath_pd[0]);
+ }
+
+ if (istat & ((dd->ipath_i_rcvavail_mask << dd->ipath_i_rcvavail_shift) |
+ (dd->ipath_i_rcvurg_mask << dd->ipath_i_rcvurg_shift)))
+ handle_urcv(dd, istat);
+
+ if (istat & (INFINIPATH_I_SDMAINT | INFINIPATH_I_SDMADISABLED))
+ handle_sdma_intr(dd, istat);
+
+ if (istat & INFINIPATH_I_SPIOBUFAVAIL) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
+ dd->ipath_sendctrl &= ~INFINIPATH_S_PIOINTBUFAVAIL;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
+ dd->ipath_sendctrl);
+ ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+ spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
+
+ /* always process; sdma verbs uses PIO for acks and VL15 */
+ handle_layer_pioavail(dd);
+ }
+
+ ret = IRQ_HANDLED;
+
+bail:
+ return ret;
+}
diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h
new file mode 100644
index 0000000..0bd8bcb
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_kernel.h
@@ -0,0 +1,1365 @@
+#ifndef _IPATH_KERNEL_H
+#define _IPATH_KERNEL_H
+/*
+ * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/*
+ * This header file is the base header file for infinipath kernel code
+ * ipath_user.h serves a similar purpose for user code.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/dma-mapping.h>
+#include <linux/mutex.h>
+#include <linux/list.h>
+#include <linux/scatterlist.h>
+#include <asm/io.h>
+#include <rdma/ib_verbs.h>
+
+#include "ipath_common.h"
+#include "ipath_debug.h"
+#include "ipath_registers.h"
+
+/* only s/w major version of InfiniPath we can handle */
+#define IPATH_CHIP_VERS_MAJ 2U
+
+/* don't care about this except printing */
+#define IPATH_CHIP_VERS_MIN 0U
+
+/* temporary, maybe always */
+extern struct infinipath_stats ipath_stats;
+
+#define IPATH_CHIP_SWVERSION IPATH_CHIP_VERS_MAJ
+/*
+ * First-cut critierion for "device is active" is
+ * two thousand dwords combined Tx, Rx traffic per
+ * 5-second interval. SMA packets are 64 dwords,
+ * and occur "a few per second", presumably each way.
+ */
+#define IPATH_TRAFFIC_ACTIVE_THRESHOLD (2000)
+/*
+ * Struct used to indicate which errors are logged in each of the
+ * error-counters that are logged to EEPROM. A counter is incremented
+ * _once_ (saturating at 255) for each event with any bits set in
+ * the error or hwerror register masks below.
+ */
+#define IPATH_EEP_LOG_CNT (4)
+struct ipath_eep_log_mask {
+ u64 errs_to_log;
+ u64 hwerrs_to_log;
+};
+
+struct ipath_portdata {
+ void **port_rcvegrbuf;
+ dma_addr_t *port_rcvegrbuf_phys;
+ /* rcvhdrq base, needs mmap before useful */
+ void *port_rcvhdrq;
+ /* kernel virtual address where hdrqtail is updated */
+ void *port_rcvhdrtail_kvaddr;
+ /*
+ * temp buffer for expected send setup, allocated at open, instead
+ * of each setup call
+ */
+ void *port_tid_pg_list;
+ /* when waiting for rcv or pioavail */
+ wait_queue_head_t port_wait;
+ /*
+ * rcvegr bufs base, physical, must fit
+ * in 44 bits so 32 bit programs mmap64 44 bit works)
+ */
+ dma_addr_t port_rcvegr_phys;
+ /* mmap of hdrq, must fit in 44 bits */
+ dma_addr_t port_rcvhdrq_phys;
+ dma_addr_t port_rcvhdrqtailaddr_phys;
+ /*
+ * number of opens (including slave subports) on this instance
+ * (ignoring forks, dup, etc. for now)
+ */
+ int port_cnt;
+ /*
+ * how much space to leave at start of eager TID entries for
+ * protocol use, on each TID
+ */
+ /* instead of calculating it */
+ unsigned port_port;
+ /* non-zero if port is being shared. */
+ u16 port_subport_cnt;
+ /* non-zero if port is being shared. */
+ u16 port_subport_id;
+ /* number of pio bufs for this port (all procs, if shared) */
+ u32 port_piocnt;
+ /* first pio buffer for this port */
+ u32 port_pio_base;
+ /* chip offset of PIO buffers for this port */
+ u32 port_piobufs;
+ /* how many alloc_pages() chunks in port_rcvegrbuf_pages */
+ u32 port_rcvegrbuf_chunks;
+ /* how many egrbufs per chunk */
+ u32 port_rcvegrbufs_perchunk;
+ /* order for port_rcvegrbuf_pages */
+ size_t port_rcvegrbuf_size;
+ /* rcvhdrq size (for freeing) */
+ size_t port_rcvhdrq_size;
+ /* next expected TID to check when looking for free */
+ u32 port_tidcursor;
+ /* next expected TID to check */
+ unsigned long port_flag;
+ /* what happened */
+ unsigned long int_flag;
+ /* WAIT_RCV that timed out, no interrupt */
+ u32 port_rcvwait_to;
+ /* WAIT_PIO that timed out, no interrupt */
+ u32 port_piowait_to;
+ /* WAIT_RCV already happened, no wait */
+ u32 port_rcvnowait;
+ /* WAIT_PIO already happened, no wait */
+ u32 port_pionowait;
+ /* total number of rcvhdrqfull errors */
+ u32 port_hdrqfull;
+ /*
+ * Used to suppress multiple instances of same
+ * port staying stuck at same point.
+ */
+ u32 port_lastrcvhdrqtail;
+ /* saved total number of rcvhdrqfull errors for poll edge trigger */
+ u32 port_hdrqfull_poll;
+ /* total number of polled urgent packets */
+ u32 port_urgent;
+ /* saved total number of polled urgent packets for poll edge trigger */
+ u32 port_urgent_poll;
+ /* pid of process using this port */
+ struct pid *port_pid;
+ struct pid *port_subpid[INFINIPATH_MAX_SUBPORT];
+ /* same size as task_struct .comm[] */
+ char port_comm[16];
+ /* pkeys set by this use of this port */
+ u16 port_pkeys[4];
+ /* so file ops can get at unit */
+ struct ipath_devdata *port_dd;
+ /* A page of memory for rcvhdrhead, rcvegrhead, rcvegrtail * N */
+ void *subport_uregbase;
+ /* An array of pages for the eager receive buffers * N */
+ void *subport_rcvegrbuf;
+ /* An array of pages for the eager header queue entries * N */
+ void *subport_rcvhdr_base;
+ /* The version of the library which opened this port */
+ u32 userversion;
+ /* Bitmask of active slaves */
+ u32 active_slaves;
+ /* Type of packets or conditions we want to poll for */
+ u16 poll_type;
+ /* port rcvhdrq head offset */
+ u32 port_head;
+ /* receive packet sequence counter */
+ u32 port_seq_cnt;
+};
+
+struct sk_buff;
+struct ipath_sge_state;
+struct ipath_verbs_txreq;
+
+/*
+ * control information for layered drivers
+ */
+struct _ipath_layer {
+ void *l_arg;
+};
+
+struct ipath_skbinfo {
+ struct sk_buff *skb;
+ dma_addr_t phys;
+};
+
+struct ipath_sdma_txreq {
+ int flags;
+ int sg_count;
+ union {
+ struct scatterlist *sg;
+ void *map_addr;
+ };
+ void (*callback)(void *, int);
+ void *callback_cookie;
+ int callback_status;
+ u16 start_idx; /* sdma private */
+ u16 next_descq_idx; /* sdma private */
+ struct list_head list; /* sdma private */
+};
+
+struct ipath_sdma_desc {
+ __le64 qw[2];
+};
+
+#define IPATH_SDMA_TXREQ_F_USELARGEBUF 0x1
+#define IPATH_SDMA_TXREQ_F_HEADTOHOST 0x2
+#define IPATH_SDMA_TXREQ_F_INTREQ 0x4
+#define IPATH_SDMA_TXREQ_F_FREEBUF 0x8
+#define IPATH_SDMA_TXREQ_F_FREEDESC 0x10
+#define IPATH_SDMA_TXREQ_F_VL15 0x20
+
+#define IPATH_SDMA_TXREQ_S_OK 0
+#define IPATH_SDMA_TXREQ_S_SENDERROR 1
+#define IPATH_SDMA_TXREQ_S_ABORTED 2
+#define IPATH_SDMA_TXREQ_S_SHUTDOWN 3
+
+#define IPATH_SDMA_STATUS_SCORE_BOARD_DRAIN_IN_PROG (1ull << 63)
+#define IPATH_SDMA_STATUS_ABORT_IN_PROG (1ull << 62)
+#define IPATH_SDMA_STATUS_INTERNAL_SDMA_ENABLE (1ull << 61)
+#define IPATH_SDMA_STATUS_SCB_EMPTY (1ull << 30)
+
+/* max dwords in small buffer packet */
+#define IPATH_SMALLBUF_DWORDS (dd->ipath_piosize2k >> 2)
+
+/*
+ * Possible IB config parameters for ipath_f_get/set_ib_cfg()
+ */
+#define IPATH_IB_CFG_LIDLMC 0 /* Get/set LID (LS16b) and Mask (MS16b) */
+#define IPATH_IB_CFG_HRTBT 1 /* Get/set Heartbeat off/enable/auto */
+#define IPATH_IB_HRTBT_ON 3 /* Heartbeat enabled, sent every 100msec */
+#define IPATH_IB_HRTBT_OFF 0 /* Heartbeat off */
+#define IPATH_IB_CFG_LWID_ENB 2 /* Get/set allowed Link-width */
+#define IPATH_IB_CFG_LWID 3 /* Get currently active Link-width */
+#define IPATH_IB_CFG_SPD_ENB 4 /* Get/set allowed Link speeds */
+#define IPATH_IB_CFG_SPD 5 /* Get current Link spd */
+#define IPATH_IB_CFG_RXPOL_ENB 6 /* Get/set Auto-RX-polarity enable */
+#define IPATH_IB_CFG_LREV_ENB 7 /* Get/set Auto-Lane-reversal enable */
+#define IPATH_IB_CFG_LINKLATENCY 8 /* Get Auto-Lane-reversal enable */
+
+
+struct ipath_devdata {
+ struct list_head ipath_list;
+
+ struct ipath_kregs const *ipath_kregs;
+ struct ipath_cregs const *ipath_cregs;
+
+ /* mem-mapped pointer to base of chip regs */
+ u64 __iomem *ipath_kregbase;
+ /* end of mem-mapped chip space; range checking */
+ u64 __iomem *ipath_kregend;
+ /* physical address of chip for io_remap, etc. */
+ unsigned long ipath_physaddr;
+ /* base of memory alloced for ipath_kregbase, for free */
+ u64 *ipath_kregalloc;
+ /* ipath_cfgports pointers */
+ struct ipath_portdata **ipath_pd;
+ /* sk_buffs used by port 0 eager receive queue */
+ struct ipath_skbinfo *ipath_port0_skbinfo;
+ /* kvirt address of 1st 2k pio buffer */
+ void __iomem *ipath_pio2kbase;
+ /* kvirt address of 1st 4k pio buffer */
+ void __iomem *ipath_pio4kbase;
+ /*
+ * points to area where PIOavail registers will be DMA'ed.
+ * Has to be on a page of it's own, because the page will be
+ * mapped into user program space. This copy is *ONLY* ever
+ * written by DMA, not by the driver! Need a copy per device
+ * when we get to multiple devices
+ */
+ volatile __le64 *ipath_pioavailregs_dma;
+ /* physical address where updates occur */
+ dma_addr_t ipath_pioavailregs_phys;
+ struct _ipath_layer ipath_layer;
+ /* setup intr */
+ int (*ipath_f_intrsetup)(struct ipath_devdata *);
+ /* fallback to alternate interrupt type if possible */
+ int (*ipath_f_intr_fallback)(struct ipath_devdata *);
+ /* setup on-chip bus config */
+ int (*ipath_f_bus)(struct ipath_devdata *, struct pci_dev *);
+ /* hard reset chip */
+ int (*ipath_f_reset)(struct ipath_devdata *);
+ int (*ipath_f_get_boardname)(struct ipath_devdata *, char *,
+ size_t);
+ void (*ipath_f_init_hwerrors)(struct ipath_devdata *);
+ void (*ipath_f_handle_hwerrors)(struct ipath_devdata *, char *,
+ size_t);
+ void (*ipath_f_quiet_serdes)(struct ipath_devdata *);
+ int (*ipath_f_bringup_serdes)(struct ipath_devdata *);
+ int (*ipath_f_early_init)(struct ipath_devdata *);
+ void (*ipath_f_clear_tids)(struct ipath_devdata *, unsigned);
+ void (*ipath_f_put_tid)(struct ipath_devdata *, u64 __iomem*,
+ u32, unsigned long);
+ void (*ipath_f_tidtemplate)(struct ipath_devdata *);
+ void (*ipath_f_cleanup)(struct ipath_devdata *);
+ void (*ipath_f_setextled)(struct ipath_devdata *, u64, u64);
+ /* fill out chip-specific fields */
+ int (*ipath_f_get_base_info)(struct ipath_portdata *, void *);
+ /* free irq */
+ void (*ipath_f_free_irq)(struct ipath_devdata *);
+ struct ipath_message_header *(*ipath_f_get_msgheader)
+ (struct ipath_devdata *, __le32 *);
+ void (*ipath_f_config_ports)(struct ipath_devdata *, ushort);
+ int (*ipath_f_get_ib_cfg)(struct ipath_devdata *, int);
+ int (*ipath_f_set_ib_cfg)(struct ipath_devdata *, int, u32);
+ void (*ipath_f_config_jint)(struct ipath_devdata *, u16 , u16);
+ void (*ipath_f_read_counters)(struct ipath_devdata *,
+ struct infinipath_counters *);
+ void (*ipath_f_xgxs_reset)(struct ipath_devdata *);
+ /* per chip actions needed for IB Link up/down changes */
+ int (*ipath_f_ib_updown)(struct ipath_devdata *, int, u64);
+
+ unsigned ipath_lastegr_idx;
+ struct ipath_ibdev *verbs_dev;
+ struct timer_list verbs_timer;
+ /* total dwords sent (summed from counter) */
+ u64 ipath_sword;
+ /* total dwords rcvd (summed from counter) */
+ u64 ipath_rword;
+ /* total packets sent (summed from counter) */
+ u64 ipath_spkts;
+ /* total packets rcvd (summed from counter) */
+ u64 ipath_rpkts;
+ /* ipath_statusp initially points to this. */
+ u64 _ipath_status;
+ /* GUID for this interface, in network order */
+ __be64 ipath_guid;
+ /*
+ * aggregrate of error bits reported since last cleared, for
+ * limiting of error reporting
+ */
+ ipath_err_t ipath_lasterror;
+ /*
+ * aggregrate of error bits reported since last cleared, for
+ * limiting of hwerror reporting
+ */
+ ipath_err_t ipath_lasthwerror;
+ /* errors masked because they occur too fast */
+ ipath_err_t ipath_maskederrs;
+ u64 ipath_lastlinkrecov; /* link recoveries at last ACTIVE */
+ /* time in jiffies at which to re-enable maskederrs */
+ unsigned long ipath_unmasktime;
+ /* count of egrfull errors, combined for all ports */
+ u64 ipath_last_tidfull;
+ /* for ipath_qcheck() */
+ u64 ipath_lastport0rcv_cnt;
+ /* template for writing TIDs */
+ u64 ipath_tidtemplate;
+ /* value to write to free TIDs */
+ u64 ipath_tidinvalid;
+ /* IBA6120 rcv interrupt setup */
+ u64 ipath_rhdrhead_intr_off;
+
+ /* size of memory at ipath_kregbase */
+ u32 ipath_kregsize;
+ /* number of registers used for pioavail */
+ u32 ipath_pioavregs;
+ /* IPATH_POLL, etc. */
+ u32 ipath_flags;
+ /* ipath_flags driver is waiting for */
+ u32 ipath_state_wanted;
+ /* last buffer for user use, first buf for kernel use is this
+ * index. */
+ u32 ipath_lastport_piobuf;
+ /* is a stats timer active */
+ u32 ipath_stats_timer_active;
+ /* number of interrupts for this device -- saturates... */
+ u32 ipath_int_counter;
+ /* dwords sent read from counter */
+ u32 ipath_lastsword;
+ /* dwords received read from counter */
+ u32 ipath_lastrword;
+ /* sent packets read from counter */
+ u32 ipath_lastspkts;
+ /* received packets read from counter */
+ u32 ipath_lastrpkts;
+ /* pio bufs allocated per port */
+ u32 ipath_pbufsport;
+ /* if remainder on bufs/port, ports < extrabuf get 1 extra */
+ u32 ipath_ports_extrabuf;
+ u32 ipath_pioupd_thresh; /* update threshold, some chips */
+ /*
+ * number of ports configured as max; zero is set to number chip
+ * supports, less gives more pio bufs/port, etc.
+ */
+ u32 ipath_cfgports;
+ /* count of port 0 hdrqfull errors */
+ u32 ipath_p0_hdrqfull;
+ /* port 0 number of receive eager buffers */
+ u32 ipath_p0_rcvegrcnt;
+
+ /*
+ * index of last piobuffer we used. Speeds up searching, by
+ * starting at this point. Doesn't matter if multiple cpu's use and
+ * update, last updater is only write that matters. Whenever it
+ * wraps, we update shadow copies. Need a copy per device when we
+ * get to multiple devices
+ */
+ u32 ipath_lastpioindex;
+ u32 ipath_lastpioindexl;
+ /* max length of freezemsg */
+ u32 ipath_freezelen;
+ /*
+ * consecutive times we wanted a PIO buffer but were unable to
+ * get one
+ */
+ u32 ipath_consec_nopiobuf;
+ /*
+ * hint that we should update ipath_pioavailshadow before
+ * looking for a PIO buffer
+ */
+ u32 ipath_upd_pio_shadow;
+ /* so we can rewrite it after a chip reset */
+ u32 ipath_pcibar0;
+ /* so we can rewrite it after a chip reset */
+ u32 ipath_pcibar1;
+ u32 ipath_x1_fix_tries;
+ u32 ipath_autoneg_tries;
+ u32 serdes_first_init_done;
+
+ struct ipath_relock {
+ atomic_t ipath_relock_timer_active;
+ struct timer_list ipath_relock_timer;
+ unsigned int ipath_relock_interval; /* in jiffies */
+ } ipath_relock_singleton;
+
+ /* interrupt number */
+ int ipath_irq;
+ /* HT/PCI Vendor ID (here for NodeInfo) */
+ u16 ipath_vendorid;
+ /* HT/PCI Device ID (here for NodeInfo) */
+ u16 ipath_deviceid;
+ /* offset in HT config space of slave/primary interface block */
+ u8 ipath_ht_slave_off;
+ /* for write combining settings */
+ unsigned long ipath_wc_cookie;
+ unsigned long ipath_wc_base;
+ unsigned long ipath_wc_len;
+ /* ref count for each pkey */
+ atomic_t ipath_pkeyrefs[4];
+ /* shadow copy of struct page *'s for exp tid pages */
+ struct page **ipath_pageshadow;
+ /* shadow copy of dma handles for exp tid pages */
+ dma_addr_t *ipath_physshadow;
+ u64 __iomem *ipath_egrtidbase;
+ /* lock to workaround chip bug 9437 and others */
+ spinlock_t ipath_kernel_tid_lock;
+ spinlock_t ipath_user_tid_lock;
+ spinlock_t ipath_sendctrl_lock;
+
+ /*
+ * IPATH_STATUS_*,
+ * this address is mapped readonly into user processes so they can
+ * get status cheaply, whenever they want.
+ */
+ u64 *ipath_statusp;
+ /* freeze msg if hw error put chip in freeze */
+ char *ipath_freezemsg;
+ /* pci access data structure */
+ struct pci_dev *pcidev;
+ struct cdev *user_cdev;
+ struct cdev *diag_cdev;
+ struct device *user_dev;
+ struct device *diag_dev;
+ /* timer used to prevent stats overflow, error throttling, etc. */
+ struct timer_list ipath_stats_timer;
+ /* timer to verify interrupts work, and fallback if possible */
+ struct timer_list ipath_intrchk_timer;
+ void *ipath_dummy_hdrq; /* used after port close */
+ dma_addr_t ipath_dummy_hdrq_phys;
+
+ /* SendDMA related entries */
+ spinlock_t ipath_sdma_lock;
+ unsigned long ipath_sdma_status;
+ unsigned long ipath_sdma_abort_jiffies;
+ unsigned long ipath_sdma_abort_intr_timeout;
+ unsigned long ipath_sdma_buf_jiffies;
+ struct ipath_sdma_desc *ipath_sdma_descq;
+ u64 ipath_sdma_descq_added;
+ u64 ipath_sdma_descq_removed;
+ int ipath_sdma_desc_nreserved;
+ u16 ipath_sdma_descq_cnt;
+ u16 ipath_sdma_descq_tail;
+ u16 ipath_sdma_descq_head;
+ u16 ipath_sdma_next_intr;
+ u16 ipath_sdma_reset_wait;
+ u8 ipath_sdma_generation;
+ struct tasklet_struct ipath_sdma_abort_task;
+ struct tasklet_struct ipath_sdma_notify_task;
+ struct list_head ipath_sdma_activelist;
+ struct list_head ipath_sdma_notifylist;
+ atomic_t ipath_sdma_vl15_count;
+ struct timer_list ipath_sdma_vl15_timer;
+
+ dma_addr_t ipath_sdma_descq_phys;
+ volatile __le64 *ipath_sdma_head_dma;
+ dma_addr_t ipath_sdma_head_phys;
+
+ unsigned long ipath_ureg_align; /* user register alignment */
+
+ struct delayed_work ipath_autoneg_work;
+ wait_queue_head_t ipath_autoneg_wait;
+
+ /* HoL blocking / user app forward-progress state */
+ unsigned ipath_hol_state;
+ unsigned ipath_hol_next;
+ struct timer_list ipath_hol_timer;
+
+ /*
+ * Shadow copies of registers; size indicates read access size.
+ * Most of them are readonly, but some are write-only register,
+ * where we manipulate the bits in the shadow copy, and then write
+ * the shadow copy to infinipath.
+ *
+ * We deliberately make most of these 32 bits, since they have
+ * restricted range. For any that we read, we won't to generate 32
+ * bit accesses, since Opteron will generate 2 separate 32 bit HT
+ * transactions for a 64 bit read, and we want to avoid unnecessary
+ * HT transactions.
+ */
+
+ /* This is the 64 bit group */
+
+ /*
+ * shadow of pioavail, check to be sure it's large enough at
+ * init time.
+ */
+ unsigned long ipath_pioavailshadow[8];
+ /* bitmap of send buffers available for the kernel to use with PIO. */
+ unsigned long ipath_pioavailkernel[8];
+ /* shadow of kr_gpio_out, for rmw ops */
+ u64 ipath_gpio_out;
+ /* shadow the gpio mask register */
+ u64 ipath_gpio_mask;
+ /* shadow the gpio output enable, etc... */
+ u64 ipath_extctrl;
+ /* kr_revision shadow */
+ u64 ipath_revision;
+ /*
+ * shadow of ibcctrl, for interrupt handling of link changes,
+ * etc.
+ */
+ u64 ipath_ibcctrl;
+ /*
+ * last ibcstatus, to suppress "duplicate" status change messages,
+ * mostly from 2 to 3
+ */
+ u64 ipath_lastibcstat;
+ /* hwerrmask shadow */
+ ipath_err_t ipath_hwerrmask;
+ ipath_err_t ipath_errormask; /* errormask shadow */
+ /* interrupt config reg shadow */
+ u64 ipath_intconfig;
+ /* kr_sendpiobufbase value */
+ u64 ipath_piobufbase;
+ /* kr_ibcddrctrl shadow */
+ u64 ipath_ibcddrctrl;
+
+ /* these are the "32 bit" regs */
+
+ /*
+ * number of GUIDs in the flash for this interface; may need some
+ * rethinking for setting on other ifaces
+ */
+ u32 ipath_nguid;
+ /*
+ * the following two are 32-bit bitmasks, but {test,clear,set}_bit
+ * all expect bit fields to be "unsigned long"
+ */
+ /* shadow kr_rcvctrl */
+ unsigned long ipath_rcvctrl;
+ /* shadow kr_sendctrl */
+ unsigned long ipath_sendctrl;
+ /* to not count armlaunch after cancel */
+ unsigned long ipath_lastcancel;
+ /* count cases where special trigger was needed (double write) */
+ unsigned long ipath_spectriggerhit;
+
+ /* value we put in kr_rcvhdrcnt */
+ u32 ipath_rcvhdrcnt;
+ /* value we put in kr_rcvhdrsize */
+ u32 ipath_rcvhdrsize;
+ /* value we put in kr_rcvhdrentsize */
+ u32 ipath_rcvhdrentsize;
+ /* offset of last entry in rcvhdrq */
+ u32 ipath_hdrqlast;
+ /* kr_portcnt value */
+ u32 ipath_portcnt;
+ /* kr_pagealign value */
+ u32 ipath_palign;
+ /* number of "2KB" PIO buffers */
+ u32 ipath_piobcnt2k;
+ /* size in bytes of "2KB" PIO buffers */
+ u32 ipath_piosize2k;
+ /* number of "4KB" PIO buffers */
+ u32 ipath_piobcnt4k;
+ /* size in bytes of "4KB" PIO buffers */
+ u32 ipath_piosize4k;
+ u32 ipath_pioreserved; /* reserved special-inkernel; */
+ /* kr_rcvegrbase value */
+ u32 ipath_rcvegrbase;
+ /* kr_rcvegrcnt value */
+ u32 ipath_rcvegrcnt;
+ /* kr_rcvtidbase value */
+ u32 ipath_rcvtidbase;
+ /* kr_rcvtidcnt value */
+ u32 ipath_rcvtidcnt;
+ /* kr_sendregbase */
+ u32 ipath_sregbase;
+ /* kr_userregbase */
+ u32 ipath_uregbase;
+ /* kr_counterregbase */
+ u32 ipath_cregbase;
+ /* shadow the control register contents */
+ u32 ipath_control;
+ /* PCI revision register (HTC rev on FPGA) */
+ u32 ipath_pcirev;
+
+ /* chip address space used by 4k pio buffers */
+ u32 ipath_4kalign;
+ /* The MTU programmed for this unit */
+ u32 ipath_ibmtu;
+ /*
+ * The max size IB packet, included IB headers that we can send.
+ * Starts same as ipath_piosize, but is affected when ibmtu is
+ * changed, or by size of eager buffers
+ */
+ u32 ipath_ibmaxlen;
+ /*
+ * ibmaxlen at init time, limited by chip and by receive buffer
+ * size. Not changed after init.
+ */
+ u32 ipath_init_ibmaxlen;
+ /* size of each rcvegrbuffer */
+ u32 ipath_rcvegrbufsize;
+ /* localbus width (1, 2,4,8,16,32) from config space */
+ u32 ipath_lbus_width;
+ /* localbus speed (HT: 200,400,800,1000; PCIe 2500) */
+ u32 ipath_lbus_speed;
+ /*
+ * number of sequential ibcstatus change for polling active/quiet
+ * (i.e., link not coming up).
+ */
+ u32 ipath_ibpollcnt;
+ /* low and high portions of MSI capability/vector */
+ u32 ipath_msi_lo;
+ /* saved after PCIe init for restore after reset */
+ u32 ipath_msi_hi;
+ /* MSI data (vector) saved for restore */
+ u16 ipath_msi_data;
+ /* MLID programmed for this instance */
+ u16 ipath_mlid;
+ /* LID programmed for this instance */
+ u16 ipath_lid;
+ /* list of pkeys programmed; 0 if not set */
+ u16 ipath_pkeys[4];
+ /*
+ * ASCII serial number, from flash, large enough for original
+ * all digit strings, and longer QLogic serial number format
+ */
+ u8 ipath_serial[16];
+ /* human readable board version */
+ u8 ipath_boardversion[96];
+ u8 ipath_lbus_info[32]; /* human readable localbus info */
+ /* chip major rev, from ipath_revision */
+ u8 ipath_majrev;
+ /* chip minor rev, from ipath_revision */
+ u8 ipath_minrev;
+ /* board rev, from ipath_revision */
+ u8 ipath_boardrev;
+ /* saved for restore after reset */
+ u8 ipath_pci_cacheline;
+ /* LID mask control */
+ u8 ipath_lmc;
+ /* link width supported */
+ u8 ipath_link_width_supported;
+ /* link speed supported */
+ u8 ipath_link_speed_supported;
+ u8 ipath_link_width_enabled;
+ u8 ipath_link_speed_enabled;
+ u8 ipath_link_width_active;
+ u8 ipath_link_speed_active;
+ /* Rx Polarity inversion (compensate for ~tx on partner) */
+ u8 ipath_rx_pol_inv;
+
+ u8 ipath_r_portenable_shift;
+ u8 ipath_r_intravail_shift;
+ u8 ipath_r_tailupd_shift;
+ u8 ipath_r_portcfg_shift;
+
+ /* unit # of this chip, if present */
+ int ipath_unit;
+
+ /* local link integrity counter */
+ u32 ipath_lli_counter;
+ /* local link integrity errors */
+ u32 ipath_lli_errors;
+ /*
+ * Above counts only cases where _successive_ LocalLinkIntegrity
+ * errors were seen in the receive headers of kern-packets.
+ * Below are the three (monotonically increasing) counters
+ * maintained via GPIO interrupts on iba6120-rev2.
+ */
+ u32 ipath_rxfc_unsupvl_errs;
+ u32 ipath_overrun_thresh_errs;
+ u32 ipath_lli_errs;
+
+ /*
+ * Not all devices managed by a driver instance are the same
+ * type, so these fields must be per-device.
+ */
+ u64 ipath_i_bitsextant;
+ ipath_err_t ipath_e_bitsextant;
+ ipath_err_t ipath_hwe_bitsextant;
+
+ /*
+ * Below should be computable from number of ports,
+ * since they are never modified.
+ */
+ u64 ipath_i_rcvavail_mask;
+ u64 ipath_i_rcvurg_mask;
+ u16 ipath_i_rcvurg_shift;
+ u16 ipath_i_rcvavail_shift;
+
+ /*
+ * Register bits for selecting i2c direction and values, used for
+ * I2C serial flash.
+ */
+ u8 ipath_gpio_sda_num;
+ u8 ipath_gpio_scl_num;
+ u8 ipath_i2c_chain_type;
+ u64 ipath_gpio_sda;
+ u64 ipath_gpio_scl;
+
+ /* lock for doing RMW of shadows/regs for ExtCtrl and GPIO */
+ spinlock_t ipath_gpio_lock;
+
+ /*
+ * IB link and linktraining states and masks that vary per chip in
+ * some way. Set at init, to avoid each IB status change interrupt
+ */
+ u8 ibcs_ls_shift;
+ u8 ibcs_lts_mask;
+ u32 ibcs_mask;
+ u32 ib_init;
+ u32 ib_arm;
+ u32 ib_active;
+
+ u16 ipath_rhf_offset; /* offset of RHF within receive header entry */
+
+ /*
+ * shift/mask for linkcmd, linkinitcmd, maxpktlen in ibccontol
+ * reg. Changes for IBA7220
+ */
+ u8 ibcc_lic_mask; /* LinkInitCmd */
+ u8 ibcc_lc_shift; /* LinkCmd */
+ u8 ibcc_mpl_shift; /* Maxpktlen */
+
+ u8 delay_mult;
+
+ /* used to override LED behavior */
+ u8 ipath_led_override; /* Substituted for normal value, if non-zero */
+ u16 ipath_led_override_timeoff; /* delta to next timer event */
+ u8 ipath_led_override_vals[2]; /* Alternates per blink-frame */
+ u8 ipath_led_override_phase; /* Just counts, LSB picks from vals[] */
+ atomic_t ipath_led_override_timer_active;
+ /* Used to flash LEDs in override mode */
+ struct timer_list ipath_led_override_timer;
+
+ /* Support (including locks) for EEPROM logging of errors and time */
+ /* control access to actual counters, timer */
+ spinlock_t ipath_eep_st_lock;
+ /* control high-level access to EEPROM */
+ struct mutex ipath_eep_lock;
+ /* Below inc'd by ipath_snap_cntrs(), locked by ipath_eep_st_lock */
+ uint64_t ipath_traffic_wds;
+ /* active time is kept in seconds, but logged in hours */
+ atomic_t ipath_active_time;
+ /* Below are nominal shadow of EEPROM, new since last EEPROM update */
+ uint8_t ipath_eep_st_errs[IPATH_EEP_LOG_CNT];
+ uint8_t ipath_eep_st_new_errs[IPATH_EEP_LOG_CNT];
+ uint16_t ipath_eep_hrs;
+ /*
+ * masks for which bits of errs, hwerrs that cause
+ * each of the counters to increment.
+ */
+ struct ipath_eep_log_mask ipath_eep_st_masks[IPATH_EEP_LOG_CNT];
+
+ /* interrupt mitigation reload register info */
+ u16 ipath_jint_idle_ticks; /* idle clock ticks */
+ u16 ipath_jint_max_packets; /* max packets across all ports */
+
+ /*
+ * lock for access to SerDes, and flags to sequence preset
+ * versus steady-state. 7220-only at the moment.
+ */
+ spinlock_t ipath_sdepb_lock;
+ u8 ipath_presets_needed; /* Set if presets to be restored next DOWN */
+};
+
+/* ipath_hol_state values (stopping/starting user proc, send flushing) */
+#define IPATH_HOL_UP 0
+#define IPATH_HOL_DOWN 1
+/* ipath_hol_next toggle values, used when hol_state IPATH_HOL_DOWN */
+#define IPATH_HOL_DOWNSTOP 0
+#define IPATH_HOL_DOWNCONT 1
+
+/* bit positions for sdma_status */
+#define IPATH_SDMA_ABORTING 0
+#define IPATH_SDMA_DISARMED 1
+#define IPATH_SDMA_DISABLED 2
+#define IPATH_SDMA_LAYERBUF 3
+#define IPATH_SDMA_RUNNING 30
+#define IPATH_SDMA_SHUTDOWN 31
+
+/* bit combinations that correspond to abort states */
+#define IPATH_SDMA_ABORT_NONE 0
+#define IPATH_SDMA_ABORT_ABORTING (1UL << IPATH_SDMA_ABORTING)
+#define IPATH_SDMA_ABORT_DISARMED ((1UL << IPATH_SDMA_ABORTING) | \
+ (1UL << IPATH_SDMA_DISARMED))
+#define IPATH_SDMA_ABORT_DISABLED ((1UL << IPATH_SDMA_ABORTING) | \
+ (1UL << IPATH_SDMA_DISABLED))
+#define IPATH_SDMA_ABORT_ABORTED ((1UL << IPATH_SDMA_ABORTING) | \
+ (1UL << IPATH_SDMA_DISARMED) | (1UL << IPATH_SDMA_DISABLED))
+#define IPATH_SDMA_ABORT_MASK ((1UL<<IPATH_SDMA_ABORTING) | \
+ (1UL << IPATH_SDMA_DISARMED) | (1UL << IPATH_SDMA_DISABLED))
+
+#define IPATH_SDMA_BUF_NONE 0
+#define IPATH_SDMA_BUF_MASK (1UL<<IPATH_SDMA_LAYERBUF)
+
+/* Private data for file operations */
+struct ipath_filedata {
+ struct ipath_portdata *pd;
+ unsigned subport;
+ unsigned tidcursor;
+ struct ipath_user_sdma_queue *pq;
+};
+extern struct list_head ipath_dev_list;
+extern spinlock_t ipath_devs_lock;
+extern struct ipath_devdata *ipath_lookup(int unit);
+
+int ipath_init_chip(struct ipath_devdata *, int);
+int ipath_enable_wc(struct ipath_devdata *dd);
+void ipath_disable_wc(struct ipath_devdata *dd);
+int ipath_count_units(int *npresentp, int *nupp, int *maxportsp);
+void ipath_shutdown_device(struct ipath_devdata *);
+void ipath_clear_freeze(struct ipath_devdata *);
+
+struct file_operations;
+int ipath_cdev_init(int minor, char *name, const struct file_operations *fops,
+ struct cdev **cdevp, struct device **devp);
+void ipath_cdev_cleanup(struct cdev **cdevp,
+ struct device **devp);
+
+int ipath_diag_add(struct ipath_devdata *);
+void ipath_diag_remove(struct ipath_devdata *);
+
+extern wait_queue_head_t ipath_state_wait;
+
+int ipath_user_add(struct ipath_devdata *dd);
+void ipath_user_remove(struct ipath_devdata *dd);
+
+struct sk_buff *ipath_alloc_skb(struct ipath_devdata *dd, gfp_t);
+
+extern int ipath_diag_inuse;
+
+irqreturn_t ipath_intr(int irq, void *devid);
+int ipath_decode_err(struct ipath_devdata *dd, char *buf, size_t blen,
+ ipath_err_t err);
+#if __IPATH_INFO || __IPATH_DBG
+extern const char *ipath_ibcstatus_str[];
+#endif
+
+/* clean up any per-chip chip-specific stuff */
+void ipath_chip_cleanup(struct ipath_devdata *);
+/* clean up any chip type-specific stuff */
+void ipath_chip_done(void);
+
+/* check to see if we have to force ordering for write combining */
+int ipath_unordered_wc(void);
+
+void ipath_disarm_piobufs(struct ipath_devdata *, unsigned first,
+ unsigned cnt);
+void ipath_cancel_sends(struct ipath_devdata *, int);
+
+int ipath_create_rcvhdrq(struct ipath_devdata *, struct ipath_portdata *);
+void ipath_free_pddata(struct ipath_devdata *, struct ipath_portdata *);
+
+int ipath_parse_ushort(const char *str, unsigned short *valp);
+
+void ipath_kreceive(struct ipath_portdata *);
+int ipath_setrcvhdrsize(struct ipath_devdata *, unsigned);
+int ipath_reset_device(int);
+void ipath_get_faststats(unsigned long);
+int ipath_wait_linkstate(struct ipath_devdata *, u32, int);
+int ipath_set_linkstate(struct ipath_devdata *, u8);
+int ipath_set_mtu(struct ipath_devdata *, u16);
+int ipath_set_lid(struct ipath_devdata *, u32, u8);
+int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv);
+void ipath_enable_armlaunch(struct ipath_devdata *);
+void ipath_disable_armlaunch(struct ipath_devdata *);
+void ipath_hol_down(struct ipath_devdata *);
+void ipath_hol_up(struct ipath_devdata *);
+void ipath_hol_event(unsigned long);
+void ipath_toggle_rclkrls(struct ipath_devdata *);
+void ipath_sd7220_clr_ibpar(struct ipath_devdata *);
+void ipath_set_relock_poll(struct ipath_devdata *, int);
+void ipath_shutdown_relock_poll(struct ipath_devdata *);
+
+/* for use in system calls, where we want to know device type, etc. */
+#define port_fp(fp) ((struct ipath_filedata *)(fp)->private_data)->pd
+#define subport_fp(fp) \
+ ((struct ipath_filedata *)(fp)->private_data)->subport
+#define tidcursor_fp(fp) \
+ ((struct ipath_filedata *)(fp)->private_data)->tidcursor
+#define user_sdma_queue_fp(fp) \
+ ((struct ipath_filedata *)(fp)->private_data)->pq
+
+/*
+ * values for ipath_flags
+ */
+ /* chip can report link latency (IB 1.2) */
+#define IPATH_HAS_LINK_LATENCY 0x1
+ /* The chip is up and initted */
+#define IPATH_INITTED 0x2
+ /* set if any user code has set kr_rcvhdrsize */
+#define IPATH_RCVHDRSZ_SET 0x4
+ /* The chip is present and valid for accesses */
+#define IPATH_PRESENT 0x8
+ /* HT link0 is only 8 bits wide, ignore upper byte crc
+ * errors, etc. */
+#define IPATH_8BIT_IN_HT0 0x10
+ /* HT link1 is only 8 bits wide, ignore upper byte crc
+ * errors, etc. */
+#define IPATH_8BIT_IN_HT1 0x20
+ /* The link is down */
+#define IPATH_LINKDOWN 0x40
+ /* The link level is up (0x11) */
+#define IPATH_LINKINIT 0x80
+ /* The link is in the armed (0x21) state */
+#define IPATH_LINKARMED 0x100
+ /* The link is in the active (0x31) state */
+#define IPATH_LINKACTIVE 0x200
+ /* link current state is unknown */
+#define IPATH_LINKUNK 0x400
+ /* Write combining flush needed for PIO */
+#define IPATH_PIO_FLUSH_WC 0x1000
+ /* DMA Receive tail pointer */
+#define IPATH_NODMA_RTAIL 0x2000
+ /* no IB cable, or no device on IB cable */
+#define IPATH_NOCABLE 0x4000
+ /* Supports port zero per packet receive interrupts via
+ * GPIO */
+#define IPATH_GPIO_INTR 0x8000
+ /* uses the coded 4byte TID, not 8 byte */
+#define IPATH_4BYTE_TID 0x10000
+ /* packet/word counters are 32 bit, else those 4 counters
+ * are 64bit */
+#define IPATH_32BITCOUNTERS 0x20000
+ /* Interrupt register is 64 bits */
+#define IPATH_INTREG_64 0x40000
+ /* can miss port0 rx interrupts */
+#define IPATH_DISABLED 0x80000 /* administratively disabled */
+ /* Use GPIO interrupts for new counters */
+#define IPATH_GPIO_ERRINTRS 0x100000
+#define IPATH_SWAP_PIOBUFS 0x200000
+ /* Supports Send DMA */
+#define IPATH_HAS_SEND_DMA 0x400000
+ /* Supports Send Count (not just word count) in PBC */
+#define IPATH_HAS_PBC_CNT 0x800000
+ /* Suppress heartbeat, even if turning off loopback */
+#define IPATH_NO_HRTBT 0x1000000
+#define IPATH_HAS_THRESH_UPDATE 0x4000000
+#define IPATH_HAS_MULT_IB_SPEED 0x8000000
+#define IPATH_IB_AUTONEG_INPROG 0x10000000
+#define IPATH_IB_AUTONEG_FAILED 0x20000000
+ /* Linkdown-disable intentionally, Do not attempt to bring up */
+#define IPATH_IB_LINK_DISABLED 0x40000000
+#define IPATH_IB_FORCE_NOTIFY 0x80000000 /* force notify on next ib change */
+
+/* Bits in GPIO for the added interrupts */
+#define IPATH_GPIO_PORT0_BIT 2
+#define IPATH_GPIO_RXUVL_BIT 3
+#define IPATH_GPIO_OVRUN_BIT 4
+#define IPATH_GPIO_LLI_BIT 5
+#define IPATH_GPIO_ERRINTR_MASK 0x38
+
+/* portdata flag bit offsets */
+ /* waiting for a packet to arrive */
+#define IPATH_PORT_WAITING_RCV 2
+ /* master has not finished initializing */
+#define IPATH_PORT_MASTER_UNINIT 4
+ /* waiting for an urgent packet to arrive */
+#define IPATH_PORT_WAITING_URG 5
+
+/* free up any allocated data at closes */
+void ipath_free_data(struct ipath_portdata *dd);
+u32 __iomem *ipath_getpiobuf(struct ipath_devdata *, u32, u32 *);
+void ipath_chg_pioavailkernel(struct ipath_devdata *dd, unsigned start,
+ unsigned len, int avail);
+void ipath_init_iba7220_funcs(struct ipath_devdata *);
+void ipath_init_iba6120_funcs(struct ipath_devdata *);
+void ipath_init_iba6110_funcs(struct ipath_devdata *);
+void ipath_get_eeprom_info(struct ipath_devdata *);
+int ipath_update_eeprom_log(struct ipath_devdata *dd);
+void ipath_inc_eeprom_err(struct ipath_devdata *dd, u32 eidx, u32 incr);
+u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg);
+void ipath_disarm_senderrbufs(struct ipath_devdata *);
+void ipath_force_pio_avail_update(struct ipath_devdata *);
+void signal_ib_event(struct ipath_devdata *dd, enum ib_event_type ev);
+
+/*
+ * Set LED override, only the two LSBs have "public" meaning, but
+ * any non-zero value substitutes them for the Link and LinkTrain
+ * LED states.
+ */
+#define IPATH_LED_PHYS 1 /* Physical (linktraining) GREEN LED */
+#define IPATH_LED_LOG 2 /* Logical (link) YELLOW LED */
+void ipath_set_led_override(struct ipath_devdata *dd, unsigned int val);
+
+/* send dma routines */
+int setup_sdma(struct ipath_devdata *);
+void teardown_sdma(struct ipath_devdata *);
+void ipath_restart_sdma(struct ipath_devdata *);
+void ipath_sdma_intr(struct ipath_devdata *);
+int ipath_sdma_verbs_send(struct ipath_devdata *, struct ipath_sge_state *,
+ u32, struct ipath_verbs_txreq *);
+/* ipath_sdma_lock should be locked before calling this. */
+int ipath_sdma_make_progress(struct ipath_devdata *dd);
+
+/* must be called under ipath_sdma_lock */
+static inline u16 ipath_sdma_descq_freecnt(const struct ipath_devdata *dd)
+{
+ return dd->ipath_sdma_descq_cnt -
+ (dd->ipath_sdma_descq_added - dd->ipath_sdma_descq_removed) -
+ 1 - dd->ipath_sdma_desc_nreserved;
+}
+
+static inline void ipath_sdma_desc_reserve(struct ipath_devdata *dd, u16 cnt)
+{
+ dd->ipath_sdma_desc_nreserved += cnt;
+}
+
+static inline void ipath_sdma_desc_unreserve(struct ipath_devdata *dd, u16 cnt)
+{
+ dd->ipath_sdma_desc_nreserved -= cnt;
+}
+
+/*
+ * number of words used for protocol header if not set by ipath_userinit();
+ */
+#define IPATH_DFLT_RCVHDRSIZE 9
+
+int ipath_get_user_pages(unsigned long, size_t, struct page **);
+void ipath_release_user_pages(struct page **, size_t);
+void ipath_release_user_pages_on_close(struct page **, size_t);
+int ipath_eeprom_read(struct ipath_devdata *, u8, void *, int);
+int ipath_eeprom_write(struct ipath_devdata *, u8, const void *, int);
+int ipath_tempsense_read(struct ipath_devdata *, u8 regnum);
+int ipath_tempsense_write(struct ipath_devdata *, u8 regnum, u8 data);
+
+/* these are used for the registers that vary with port */
+void ipath_write_kreg_port(const struct ipath_devdata *, ipath_kreg,
+ unsigned, u64);
+
+/*
+ * We could have a single register get/put routine, that takes a group type,
+ * but this is somewhat clearer and cleaner. It also gives us some error
+ * checking. 64 bit register reads should always work, but are inefficient
+ * on opteron (the northbridge always generates 2 separate HT 32 bit reads),
+ * so we use kreg32 wherever possible. User register and counter register
+ * reads are always 32 bit reads, so only one form of those routines.
+ */
+
+/*
+ * At the moment, none of the s-registers are writable, so no
+ * ipath_write_sreg().
+ */
+
+/**
+ * ipath_read_ureg32 - read 32-bit virtualized per-port register
+ * @dd: device
+ * @regno: register number
+ * @port: port number
+ *
+ * Return the contents of a register that is virtualized to be per port.
+ * Returns -1 on errors (not distinguishable from valid contents at
+ * runtime; we may add a separate error variable at some point).
+ */
+static inline u32 ipath_read_ureg32(const struct ipath_devdata *dd,
+ ipath_ureg regno, int port)
+{
+ if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT))
+ return 0;
+
+ return readl(regno + (u64 __iomem *)
+ (dd->ipath_uregbase +
+ (char __iomem *)dd->ipath_kregbase +
+ dd->ipath_ureg_align * port));
+}
+
+/**
+ * ipath_write_ureg - write 32-bit virtualized per-port register
+ * @dd: device
+ * @regno: register number
+ * @value: value
+ * @port: port
+ *
+ * Write the contents of a register that is virtualized to be per port.
+ */
+static inline void ipath_write_ureg(const struct ipath_devdata *dd,
+ ipath_ureg regno, u64 value, int port)
+{
+ u64 __iomem *ubase = (u64 __iomem *)
+ (dd->ipath_uregbase + (char __iomem *) dd->ipath_kregbase +
+ dd->ipath_ureg_align * port);
+ if (dd->ipath_kregbase)
+ writeq(value, &ubase[regno]);
+}
+
+static inline u32 ipath_read_kreg32(const struct ipath_devdata *dd,
+ ipath_kreg regno)
+{
+ if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT))
+ return -1;
+ return readl((u32 __iomem *) & dd->ipath_kregbase[regno]);
+}
+
+static inline u64 ipath_read_kreg64(const struct ipath_devdata *dd,
+ ipath_kreg regno)
+{
+ if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT))
+ return -1;
+
+ return readq(&dd->ipath_kregbase[regno]);
+}
+
+static inline void ipath_write_kreg(const struct ipath_devdata *dd,
+ ipath_kreg regno, u64 value)
+{
+ if (dd->ipath_kregbase)
+ writeq(value, &dd->ipath_kregbase[regno]);
+}
+
+static inline u64 ipath_read_creg(const struct ipath_devdata *dd,
+ ipath_sreg regno)
+{
+ if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT))
+ return 0;
+
+ return readq(regno + (u64 __iomem *)
+ (dd->ipath_cregbase +
+ (char __iomem *)dd->ipath_kregbase));
+}
+
+static inline u32 ipath_read_creg32(const struct ipath_devdata *dd,
+ ipath_sreg regno)
+{
+ if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT))
+ return 0;
+ return readl(regno + (u64 __iomem *)
+ (dd->ipath_cregbase +
+ (char __iomem *)dd->ipath_kregbase));
+}
+
+static inline void ipath_write_creg(const struct ipath_devdata *dd,
+ ipath_creg regno, u64 value)
+{
+ if (dd->ipath_kregbase)
+ writeq(value, regno + (u64 __iomem *)
+ (dd->ipath_cregbase +
+ (char __iomem *)dd->ipath_kregbase));
+}
+
+static inline void ipath_clear_rcvhdrtail(const struct ipath_portdata *pd)
+{
+ *((u64 *) pd->port_rcvhdrtail_kvaddr) = 0ULL;
+}
+
+static inline u32 ipath_get_rcvhdrtail(const struct ipath_portdata *pd)
+{
+ return (u32) le64_to_cpu(*((volatile __le64 *)
+ pd->port_rcvhdrtail_kvaddr));
+}
+
+static inline u32 ipath_get_hdrqtail(const struct ipath_portdata *pd)
+{
+ const struct ipath_devdata *dd = pd->port_dd;
+ u32 hdrqtail;
+
+ if (dd->ipath_flags & IPATH_NODMA_RTAIL) {
+ __le32 *rhf_addr;
+ u32 seq;
+
+ rhf_addr = (__le32 *) pd->port_rcvhdrq +
+ pd->port_head + dd->ipath_rhf_offset;
+ seq = ipath_hdrget_seq(rhf_addr);
+ hdrqtail = pd->port_head;
+ if (seq == pd->port_seq_cnt)
+ hdrqtail++;
+ } else
+ hdrqtail = ipath_get_rcvhdrtail(pd);
+
+ return hdrqtail;
+}
+
+static inline u64 ipath_read_ireg(const struct ipath_devdata *dd, ipath_kreg r)
+{
+ return (dd->ipath_flags & IPATH_INTREG_64) ?
+ ipath_read_kreg64(dd, r) : ipath_read_kreg32(dd, r);
+}
+
+/*
+ * from contents of IBCStatus (or a saved copy), return linkstate
+ * Report ACTIVE_DEFER as ACTIVE, because we treat them the same
+ * everywhere, anyway (and should be, for almost all purposes).
+ */
+static inline u32 ipath_ib_linkstate(struct ipath_devdata *dd, u64 ibcs)
+{
+ u32 state = (u32)(ibcs >> dd->ibcs_ls_shift) &
+ INFINIPATH_IBCS_LINKSTATE_MASK;
+ if (state == INFINIPATH_IBCS_L_STATE_ACT_DEFER)
+ state = INFINIPATH_IBCS_L_STATE_ACTIVE;
+ return state;
+}
+
+/* from contents of IBCStatus (or a saved copy), return linktrainingstate */
+static inline u32 ipath_ib_linktrstate(struct ipath_devdata *dd, u64 ibcs)
+{
+ return (u32)(ibcs >> INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) &
+ dd->ibcs_lts_mask;
+}
+
+/*
+ * from contents of IBCStatus (or a saved copy), return logical link state
+ * combination of link state and linktraining state (down, active, init,
+ * arm, etc.
+ */
+static inline u32 ipath_ib_state(struct ipath_devdata *dd, u64 ibcs)
+{
+ u32 ibs;
+ ibs = (u32)(ibcs >> INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) &
+ dd->ibcs_lts_mask;
+ ibs |= (u32)(ibcs &
+ (INFINIPATH_IBCS_LINKSTATE_MASK << dd->ibcs_ls_shift));
+ return ibs;
+}
+
+/*
+ * sysfs interface.
+ */
+
+struct device_driver;
+
+extern const char ib_ipath_version[];
+
+extern struct attribute_group *ipath_driver_attr_groups[];
+
+int ipath_device_create_group(struct device *, struct ipath_devdata *);
+void ipath_device_remove_group(struct device *, struct ipath_devdata *);
+int ipath_expose_reset(struct device *);
+
+int ipath_init_ipathfs(void);
+void ipath_exit_ipathfs(void);
+int ipathfs_add_device(struct ipath_devdata *);
+int ipathfs_remove_device(struct ipath_devdata *);
+
+/*
+ * dma_addr wrappers - all 0's invalid for hw
+ */
+dma_addr_t ipath_map_page(struct pci_dev *, struct page *, unsigned long,
+ size_t, int);
+dma_addr_t ipath_map_single(struct pci_dev *, void *, size_t, int);
+const char *ipath_get_unit_name(int unit);
+
+/*
+ * Flush write combining store buffers (if present) and perform a write
+ * barrier.
+ */
+#if defined(CONFIG_X86_64)
+#define ipath_flush_wc() asm volatile("sfence" ::: "memory")
+#else
+#define ipath_flush_wc() wmb()
+#endif
+
+extern unsigned ipath_debug; /* debugging bit mask */
+extern unsigned ipath_linkrecovery;
+extern unsigned ipath_mtu4096;
+extern struct mutex ipath_mutex;
+
+#define IPATH_DRV_NAME "ib_ipath"
+#define IPATH_MAJOR 233
+#define IPATH_USER_MINOR_BASE 0
+#define IPATH_DIAGPKT_MINOR 127
+#define IPATH_DIAG_MINOR_BASE 129
+#define IPATH_NMINORS 255
+
+#define ipath_dev_err(dd,fmt,...) \
+ do { \
+ const struct ipath_devdata *__dd = (dd); \
+ if (__dd->pcidev) \
+ dev_err(&__dd->pcidev->dev, "%s: " fmt, \
+ ipath_get_unit_name(__dd->ipath_unit), \
+ ##__VA_ARGS__); \
+ else \
+ printk(KERN_ERR IPATH_DRV_NAME ": %s: " fmt, \
+ ipath_get_unit_name(__dd->ipath_unit), \
+ ##__VA_ARGS__); \
+ } while (0)
+
+#if _IPATH_DEBUGGING
+
+# define __IPATH_DBG_WHICH(which,fmt,...) \
+ do { \
+ if (unlikely(ipath_debug & (which))) \
+ printk(KERN_DEBUG IPATH_DRV_NAME ": %s: " fmt, \
+ __func__,##__VA_ARGS__); \
+ } while(0)
+
+# define ipath_dbg(fmt,...) \
+ __IPATH_DBG_WHICH(__IPATH_DBG,fmt,##__VA_ARGS__)
+# define ipath_cdbg(which,fmt,...) \
+ __IPATH_DBG_WHICH(__IPATH_##which##DBG,fmt,##__VA_ARGS__)
+
+#else /* ! _IPATH_DEBUGGING */
+
+# define ipath_dbg(fmt,...)
+# define ipath_cdbg(which,fmt,...)
+
+#endif /* _IPATH_DEBUGGING */
+
+/*
+ * this is used for formatting hw error messages...
+ */
+struct ipath_hwerror_msgs {
+ u64 mask;
+ const char *msg;
+};
+
+#define INFINIPATH_HWE_MSG(a, b) { .mask = INFINIPATH_HWE_##a, .msg = b }
+
+/* in ipath_intr.c... */
+void ipath_format_hwerrors(u64 hwerrs,
+ const struct ipath_hwerror_msgs *hwerrmsgs,
+ size_t nhwerrmsgs,
+ char *msg, size_t lmsg);
+
+#endif /* _IPATH_KERNEL_H */
diff --git a/drivers/infiniband/hw/ipath/ipath_keys.c b/drivers/infiniband/hw/ipath/ipath_keys.c
new file mode 100644
index 0000000..8f32b17
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_keys.c
@@ -0,0 +1,268 @@
+/*
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <asm/io.h>
+
+#include "ipath_verbs.h"
+#include "ipath_kernel.h"
+
+/**
+ * ipath_alloc_lkey - allocate an lkey
+ * @rkt: lkey table in which to allocate the lkey
+ * @mr: memory region that this lkey protects
+ *
+ * Returns 1 if successful, otherwise returns 0.
+ */
+
+int ipath_alloc_lkey(struct ipath_lkey_table *rkt, struct ipath_mregion *mr)
+{
+ unsigned long flags;
+ u32 r;
+ u32 n;
+ int ret;
+
+ spin_lock_irqsave(&rkt->lock, flags);
+
+ /* Find the next available LKEY */
+ r = n = rkt->next;
+ for (;;) {
+ if (rkt->table[r] == NULL)
+ break;
+ r = (r + 1) & (rkt->max - 1);
+ if (r == n) {
+ spin_unlock_irqrestore(&rkt->lock, flags);
+ ipath_dbg("LKEY table full\n");
+ ret = 0;
+ goto bail;
+ }
+ }
+ rkt->next = (r + 1) & (rkt->max - 1);
+ /*
+ * Make sure lkey is never zero which is reserved to indicate an
+ * unrestricted LKEY.
+ */
+ rkt->gen++;
+ mr->lkey = (r << (32 - ib_ipath_lkey_table_size)) |
+ ((((1 << (24 - ib_ipath_lkey_table_size)) - 1) & rkt->gen)
+ << 8);
+ if (mr->lkey == 0) {
+ mr->lkey |= 1 << 8;
+ rkt->gen++;
+ }
+ rkt->table[r] = mr;
+ spin_unlock_irqrestore(&rkt->lock, flags);
+
+ ret = 1;
+
+bail:
+ return ret;
+}
+
+/**
+ * ipath_free_lkey - free an lkey
+ * @rkt: table from which to free the lkey
+ * @lkey: lkey id to free
+ */
+void ipath_free_lkey(struct ipath_lkey_table *rkt, u32 lkey)
+{
+ unsigned long flags;
+ u32 r;
+
+ if (lkey == 0)
+ return;
+ r = lkey >> (32 - ib_ipath_lkey_table_size);
+ spin_lock_irqsave(&rkt->lock, flags);
+ rkt->table[r] = NULL;
+ spin_unlock_irqrestore(&rkt->lock, flags);
+}
+
+/**
+ * ipath_lkey_ok - check IB SGE for validity and initialize
+ * @rkt: table containing lkey to check SGE against
+ * @isge: outgoing internal SGE
+ * @sge: SGE to check
+ * @acc: access flags
+ *
+ * Return 1 if valid and successful, otherwise returns 0.
+ *
+ * Check the IB SGE for validity and initialize our internal version
+ * of it.
+ */
+int ipath_lkey_ok(struct ipath_qp *qp, struct ipath_sge *isge,
+ struct ib_sge *sge, int acc)
+{
+ struct ipath_lkey_table *rkt = &to_idev(qp->ibqp.device)->lk_table;
+ struct ipath_mregion *mr;
+ unsigned n, m;
+ size_t off;
+ int ret;
+
+ /*
+ * We use LKEY == zero for kernel virtual addresses
+ * (see ipath_get_dma_mr and ipath_dma.c).
+ */
+ if (sge->lkey == 0) {
+ struct ipath_pd *pd = to_ipd(qp->ibqp.pd);
+
+ if (pd->user) {
+ ret = 0;
+ goto bail;
+ }
+ isge->mr = NULL;
+ isge->vaddr = (void *) sge->addr;
+ isge->length = sge->length;
+ isge->sge_length = sge->length;
+ ret = 1;
+ goto bail;
+ }
+ mr = rkt->table[(sge->lkey >> (32 - ib_ipath_lkey_table_size))];
+ if (unlikely(mr == NULL || mr->lkey != sge->lkey ||
+ qp->ibqp.pd != mr->pd)) {
+ ret = 0;
+ goto bail;
+ }
+
+ off = sge->addr - mr->user_base;
+ if (unlikely(sge->addr < mr->user_base ||
+ off + sge->length > mr->length ||
+ (mr->access_flags & acc) != acc)) {
+ ret = 0;
+ goto bail;
+ }
+
+ off += mr->offset;
+ m = 0;
+ n = 0;
+ while (off >= mr->map[m]->segs[n].length) {
+ off -= mr->map[m]->segs[n].length;
+ n++;
+ if (n >= IPATH_SEGSZ) {
+ m++;
+ n = 0;
+ }
+ }
+ isge->mr = mr;
+ isge->vaddr = mr->map[m]->segs[n].vaddr + off;
+ isge->length = mr->map[m]->segs[n].length - off;
+ isge->sge_length = sge->length;
+ isge->m = m;
+ isge->n = n;
+
+ ret = 1;
+
+bail:
+ return ret;
+}
+
+/**
+ * ipath_rkey_ok - check the IB virtual address, length, and RKEY
+ * @dev: infiniband device
+ * @ss: SGE state
+ * @len: length of data
+ * @vaddr: virtual address to place data
+ * @rkey: rkey to check
+ * @acc: access flags
+ *
+ * Return 1 if successful, otherwise 0.
+ */
+int ipath_rkey_ok(struct ipath_qp *qp, struct ipath_sge_state *ss,
+ u32 len, u64 vaddr, u32 rkey, int acc)
+{
+ struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
+ struct ipath_lkey_table *rkt = &dev->lk_table;
+ struct ipath_sge *sge = &ss->sge;
+ struct ipath_mregion *mr;
+ unsigned n, m;
+ size_t off;
+ int ret;
+
+ /*
+ * We use RKEY == zero for kernel virtual addresses
+ * (see ipath_get_dma_mr and ipath_dma.c).
+ */
+ if (rkey == 0) {
+ struct ipath_pd *pd = to_ipd(qp->ibqp.pd);
+
+ if (pd->user) {
+ ret = 0;
+ goto bail;
+ }
+ sge->mr = NULL;
+ sge->vaddr = (void *) vaddr;
+ sge->length = len;
+ sge->sge_length = len;
+ ss->sg_list = NULL;
+ ss->num_sge = 1;
+ ret = 1;
+ goto bail;
+ }
+
+ mr = rkt->table[(rkey >> (32 - ib_ipath_lkey_table_size))];
+ if (unlikely(mr == NULL || mr->lkey != rkey ||
+ qp->ibqp.pd != mr->pd)) {
+ ret = 0;
+ goto bail;
+ }
+
+ off = vaddr - mr->iova;
+ if (unlikely(vaddr < mr->iova || off + len > mr->length ||
+ (mr->access_flags & acc) == 0)) {
+ ret = 0;
+ goto bail;
+ }
+
+ off += mr->offset;
+ m = 0;
+ n = 0;
+ while (off >= mr->map[m]->segs[n].length) {
+ off -= mr->map[m]->segs[n].length;
+ n++;
+ if (n >= IPATH_SEGSZ) {
+ m++;
+ n = 0;
+ }
+ }
+ sge->mr = mr;
+ sge->vaddr = mr->map[m]->segs[n].vaddr + off;
+ sge->length = mr->map[m]->segs[n].length - off;
+ sge->sge_length = len;
+ sge->m = m;
+ sge->n = n;
+ ss->sg_list = NULL;
+ ss->num_sge = 1;
+
+ ret = 1;
+
+bail:
+ return ret;
+}
diff --git a/drivers/infiniband/hw/ipath/ipath_mad.c b/drivers/infiniband/hw/ipath/ipath_mad.c
new file mode 100644
index 0000000..be4fc9a
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_mad.c
@@ -0,0 +1,1638 @@
+/*
+ * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <rdma/ib_smi.h>
+
+#include "ipath_kernel.h"
+#include "ipath_verbs.h"
+#include "ipath_common.h"
+
+#define IB_SMP_UNSUP_VERSION __constant_htons(0x0004)
+#define IB_SMP_UNSUP_METHOD __constant_htons(0x0008)
+#define IB_SMP_UNSUP_METH_ATTR __constant_htons(0x000C)
+#define IB_SMP_INVALID_FIELD __constant_htons(0x001C)
+
+static int reply(struct ib_smp *smp)
+{
+ /*
+ * The verbs framework will handle the directed/LID route
+ * packet changes.
+ */
+ smp->method = IB_MGMT_METHOD_GET_RESP;
+ if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
+ smp->status |= IB_SMP_DIRECTION;
+ return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
+}
+
+static int recv_subn_get_nodedescription(struct ib_smp *smp,
+ struct ib_device *ibdev)
+{
+ if (smp->attr_mod)
+ smp->status |= IB_SMP_INVALID_FIELD;
+
+ strncpy(smp->data, ibdev->node_desc, sizeof(smp->data));
+
+ return reply(smp);
+}
+
+struct nodeinfo {
+ u8 base_version;
+ u8 class_version;
+ u8 node_type;
+ u8 num_ports;
+ __be64 sys_guid;
+ __be64 node_guid;
+ __be64 port_guid;
+ __be16 partition_cap;
+ __be16 device_id;
+ __be32 revision;
+ u8 local_port_num;
+ u8 vendor_id[3];
+} __attribute__ ((packed));
+
+static int recv_subn_get_nodeinfo(struct ib_smp *smp,
+ struct ib_device *ibdev, u8 port)
+{
+ struct nodeinfo *nip = (struct nodeinfo *)&smp->data;
+ struct ipath_devdata *dd = to_idev(ibdev)->dd;
+ u32 vendor, majrev, minrev;
+
+ /* GUID 0 is illegal */
+ if (smp->attr_mod || (dd->ipath_guid == 0))
+ smp->status |= IB_SMP_INVALID_FIELD;
+
+ nip->base_version = 1;
+ nip->class_version = 1;
+ nip->node_type = 1; /* channel adapter */
+ /*
+ * XXX The num_ports value will need a layer function to get
+ * the value if we ever have more than one IB port on a chip.
+ * We will also need to get the GUID for the port.
+ */
+ nip->num_ports = ibdev->phys_port_cnt;
+ /* This is already in network order */
+ nip->sys_guid = to_idev(ibdev)->sys_image_guid;
+ nip->node_guid = dd->ipath_guid;
+ nip->port_guid = dd->ipath_guid;
+ nip->partition_cap = cpu_to_be16(ipath_get_npkeys(dd));
+ nip->device_id = cpu_to_be16(dd->ipath_deviceid);
+ majrev = dd->ipath_majrev;
+ minrev = dd->ipath_minrev;
+ nip->revision = cpu_to_be32((majrev << 16) | minrev);
+ nip->local_port_num = port;
+ vendor = dd->ipath_vendorid;
+ nip->vendor_id[0] = IPATH_SRC_OUI_1;
+ nip->vendor_id[1] = IPATH_SRC_OUI_2;
+ nip->vendor_id[2] = IPATH_SRC_OUI_3;
+
+ return reply(smp);
+}
+
+static int recv_subn_get_guidinfo(struct ib_smp *smp,
+ struct ib_device *ibdev)
+{
+ u32 startgx = 8 * be32_to_cpu(smp->attr_mod);
+ __be64 *p = (__be64 *) smp->data;
+
+ /* 32 blocks of 8 64-bit GUIDs per block */
+
+ memset(smp->data, 0, sizeof(smp->data));
+
+ /*
+ * We only support one GUID for now. If this changes, the
+ * portinfo.guid_cap field needs to be updated too.
+ */
+ if (startgx == 0) {
+ __be64 g = to_idev(ibdev)->dd->ipath_guid;
+ if (g == 0)
+ /* GUID 0 is illegal */
+ smp->status |= IB_SMP_INVALID_FIELD;
+ else
+ /* The first is a copy of the read-only HW GUID. */
+ *p = g;
+ } else
+ smp->status |= IB_SMP_INVALID_FIELD;
+
+ return reply(smp);
+}
+
+static void set_link_width_enabled(struct ipath_devdata *dd, u32 w)
+{
+ (void) dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_LWID_ENB, w);
+}
+
+static void set_link_speed_enabled(struct ipath_devdata *dd, u32 s)
+{
+ (void) dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_SPD_ENB, s);
+}
+
+static int get_overrunthreshold(struct ipath_devdata *dd)
+{
+ return (dd->ipath_ibcctrl >>
+ INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT) &
+ INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK;
+}
+
+/**
+ * set_overrunthreshold - set the overrun threshold
+ * @dd: the infinipath device
+ * @n: the new threshold
+ *
+ * Note that this will only take effect when the link state changes.
+ */
+static int set_overrunthreshold(struct ipath_devdata *dd, unsigned n)
+{
+ unsigned v;
+
+ v = (dd->ipath_ibcctrl >> INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT) &
+ INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK;
+ if (v != n) {
+ dd->ipath_ibcctrl &=
+ ~(INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK <<
+ INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT);
+ dd->ipath_ibcctrl |=
+ (u64) n << INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
+ dd->ipath_ibcctrl);
+ }
+ return 0;
+}
+
+static int get_phyerrthreshold(struct ipath_devdata *dd)
+{
+ return (dd->ipath_ibcctrl >>
+ INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) &
+ INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK;
+}
+
+/**
+ * set_phyerrthreshold - set the physical error threshold
+ * @dd: the infinipath device
+ * @n: the new threshold
+ *
+ * Note that this will only take effect when the link state changes.
+ */
+static int set_phyerrthreshold(struct ipath_devdata *dd, unsigned n)
+{
+ unsigned v;
+
+ v = (dd->ipath_ibcctrl >> INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) &
+ INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK;
+ if (v != n) {
+ dd->ipath_ibcctrl &=
+ ~(INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK <<
+ INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT);
+ dd->ipath_ibcctrl |=
+ (u64) n << INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
+ dd->ipath_ibcctrl);
+ }
+ return 0;
+}
+
+/**
+ * get_linkdowndefaultstate - get the default linkdown state
+ * @dd: the infinipath device
+ *
+ * Returns zero if the default is POLL, 1 if the default is SLEEP.
+ */
+static int get_linkdowndefaultstate(struct ipath_devdata *dd)
+{
+ return !!(dd->ipath_ibcctrl & INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE);
+}
+
+static int recv_subn_get_portinfo(struct ib_smp *smp,
+ struct ib_device *ibdev, u8 port)
+{
+ struct ipath_ibdev *dev;
+ struct ipath_devdata *dd;
+ struct ib_port_info *pip = (struct ib_port_info *)smp->data;
+ u16 lid;
+ u8 ibcstat;
+ u8 mtu;
+ int ret;
+
+ if (be32_to_cpu(smp->attr_mod) > ibdev->phys_port_cnt) {
+ smp->status |= IB_SMP_INVALID_FIELD;
+ ret = reply(smp);
+ goto bail;
+ }
+
+ dev = to_idev(ibdev);
+ dd = dev->dd;
+
+ /* Clear all fields. Only set the non-zero fields. */
+ memset(smp->data, 0, sizeof(smp->data));
+
+ /* Only return the mkey if the protection field allows it. */
+ if (smp->method == IB_MGMT_METHOD_SET || dev->mkey == smp->mkey ||
+ dev->mkeyprot == 0)
+ pip->mkey = dev->mkey;
+ pip->gid_prefix = dev->gid_prefix;
+ lid = dd->ipath_lid;
+ pip->lid = lid ? cpu_to_be16(lid) : IB_LID_PERMISSIVE;
+ pip->sm_lid = cpu_to_be16(dev->sm_lid);
+ pip->cap_mask = cpu_to_be32(dev->port_cap_flags);
+ /* pip->diag_code; */
+ pip->mkey_lease_period = cpu_to_be16(dev->mkey_lease_period);
+ pip->local_port_num = port;
+ pip->link_width_enabled = dd->ipath_link_width_enabled;
+ pip->link_width_supported = dd->ipath_link_width_supported;
+ pip->link_width_active = dd->ipath_link_width_active;
+ pip->linkspeed_portstate = dd->ipath_link_speed_supported << 4;
+ ibcstat = dd->ipath_lastibcstat;
+ /* map LinkState to IB portinfo values. */
+ pip->linkspeed_portstate |= ipath_ib_linkstate(dd, ibcstat) + 1;
+
+ pip->portphysstate_linkdown =
+ (ipath_cvt_physportstate[ibcstat & dd->ibcs_lts_mask] << 4) |
+ (get_linkdowndefaultstate(dd) ? 1 : 2);
+ pip->mkeyprot_resv_lmc = (dev->mkeyprot << 6) | dd->ipath_lmc;
+ pip->linkspeedactive_enabled = (dd->ipath_link_speed_active << 4) |
+ dd->ipath_link_speed_enabled;
+ switch (dd->ipath_ibmtu) {
+ case 4096:
+ mtu = IB_MTU_4096;
+ break;
+ case 2048:
+ mtu = IB_MTU_2048;
+ break;
+ case 1024:
+ mtu = IB_MTU_1024;
+ break;
+ case 512:
+ mtu = IB_MTU_512;
+ break;
+ case 256:
+ mtu = IB_MTU_256;
+ break;
+ default: /* oops, something is wrong */
+ mtu = IB_MTU_2048;
+ break;
+ }
+ pip->neighbormtu_mastersmsl = (mtu << 4) | dev->sm_sl;
+ pip->vlcap_inittype = 0x10; /* VLCap = VL0, InitType = 0 */
+ pip->vl_high_limit = dev->vl_high_limit;
+ /* pip->vl_arb_high_cap; // only one VL */
+ /* pip->vl_arb_low_cap; // only one VL */
+ /* InitTypeReply = 0 */
+ /* our mtu cap depends on whether 4K MTU enabled or not */
+ pip->inittypereply_mtucap = ipath_mtu4096 ? IB_MTU_4096 : IB_MTU_2048;
+ /* HCAs ignore VLStallCount and HOQLife */
+ /* pip->vlstallcnt_hoqlife; */
+ pip->operationalvl_pei_peo_fpi_fpo = 0x10; /* OVLs = 1 */
+ pip->mkey_violations = cpu_to_be16(dev->mkey_violations);
+ /* P_KeyViolations are counted by hardware. */
+ pip->pkey_violations =
+ cpu_to_be16((ipath_get_cr_errpkey(dd) -
+ dev->z_pkey_violations) & 0xFFFF);
+ pip->qkey_violations = cpu_to_be16(dev->qkey_violations);
+ /* Only the hardware GUID is supported for now */
+ pip->guid_cap = 1;
+ pip->clientrereg_resv_subnetto = dev->subnet_timeout;
+ /* 32.768 usec. response time (guessing) */
+ pip->resv_resptimevalue = 3;
+ pip->localphyerrors_overrunerrors =
+ (get_phyerrthreshold(dd) << 4) |
+ get_overrunthreshold(dd);
+ /* pip->max_credit_hint; */
+ if (dev->port_cap_flags & IB_PORT_LINK_LATENCY_SUP) {
+ u32 v;
+
+ v = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_LINKLATENCY);
+ pip->link_roundtrip_latency[0] = v >> 16;
+ pip->link_roundtrip_latency[1] = v >> 8;
+ pip->link_roundtrip_latency[2] = v;
+ }
+
+ ret = reply(smp);
+
+bail:
+ return ret;
+}
+
+/**
+ * get_pkeys - return the PKEY table for port 0
+ * @dd: the infinipath device
+ * @pkeys: the pkey table is placed here
+ */
+static int get_pkeys(struct ipath_devdata *dd, u16 * pkeys)
+{
+ struct ipath_portdata *pd = dd->ipath_pd[0];
+
+ memcpy(pkeys, pd->port_pkeys, sizeof(pd->port_pkeys));
+
+ return 0;
+}
+
+static int recv_subn_get_pkeytable(struct ib_smp *smp,
+ struct ib_device *ibdev)
+{
+ u32 startpx = 32 * (be32_to_cpu(smp->attr_mod) & 0xffff);
+ u16 *p = (u16 *) smp->data;
+ __be16 *q = (__be16 *) smp->data;
+
+ /* 64 blocks of 32 16-bit P_Key entries */
+
+ memset(smp->data, 0, sizeof(smp->data));
+ if (startpx == 0) {
+ struct ipath_ibdev *dev = to_idev(ibdev);
+ unsigned i, n = ipath_get_npkeys(dev->dd);
+
+ get_pkeys(dev->dd, p);
+
+ for (i = 0; i < n; i++)
+ q[i] = cpu_to_be16(p[i]);
+ } else
+ smp->status |= IB_SMP_INVALID_FIELD;
+
+ return reply(smp);
+}
+
+static int recv_subn_set_guidinfo(struct ib_smp *smp,
+ struct ib_device *ibdev)
+{
+ /* The only GUID we support is the first read-only entry. */
+ return recv_subn_get_guidinfo(smp, ibdev);
+}
+
+/**
+ * set_linkdowndefaultstate - set the default linkdown state
+ * @dd: the infinipath device
+ * @sleep: the new state
+ *
+ * Note that this will only take effect when the link state changes.
+ */
+static int set_linkdowndefaultstate(struct ipath_devdata *dd, int sleep)
+{
+ if (sleep)
+ dd->ipath_ibcctrl |= INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE;
+ else
+ dd->ipath_ibcctrl &= ~INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
+ dd->ipath_ibcctrl);
+ return 0;
+}
+
+/**
+ * recv_subn_set_portinfo - set port information
+ * @smp: the incoming SM packet
+ * @ibdev: the infiniband device
+ * @port: the port on the device
+ *
+ * Set Portinfo (see ch. 14.2.5.6).
+ */
+static int recv_subn_set_portinfo(struct ib_smp *smp,
+ struct ib_device *ibdev, u8 port)
+{
+ struct ib_port_info *pip = (struct ib_port_info *)smp->data;
+ struct ib_event event;
+ struct ipath_ibdev *dev;
+ struct ipath_devdata *dd;
+ char clientrereg = 0;
+ u16 lid, smlid;
+ u8 lwe;
+ u8 lse;
+ u8 state;
+ u16 lstate;
+ u32 mtu;
+ int ret, ore;
+
+ if (be32_to_cpu(smp->attr_mod) > ibdev->phys_port_cnt)
+ goto err;
+
+ dev = to_idev(ibdev);
+ dd = dev->dd;
+ event.device = ibdev;
+ event.element.port_num = port;
+
+ dev->mkey = pip->mkey;
+ dev->gid_prefix = pip->gid_prefix;
+ dev->mkey_lease_period = be16_to_cpu(pip->mkey_lease_period);
+
+ lid = be16_to_cpu(pip->lid);
+ if (dd->ipath_lid != lid ||
+ dd->ipath_lmc != (pip->mkeyprot_resv_lmc & 7)) {
+ /* Must be a valid unicast LID address. */
+ if (lid == 0 || lid >= IPATH_MULTICAST_LID_BASE)
+ goto err;
+ ipath_set_lid(dd, lid, pip->mkeyprot_resv_lmc & 7);
+ event.event = IB_EVENT_LID_CHANGE;
+ ib_dispatch_event(&event);
+ }
+
+ smlid = be16_to_cpu(pip->sm_lid);
+ if (smlid != dev->sm_lid) {
+ /* Must be a valid unicast LID address. */
+ if (smlid == 0 || smlid >= IPATH_MULTICAST_LID_BASE)
+ goto err;
+ dev->sm_lid = smlid;
+ event.event = IB_EVENT_SM_CHANGE;
+ ib_dispatch_event(&event);
+ }
+
+ /* Allow 1x or 4x to be set (see 14.2.6.6). */
+ lwe = pip->link_width_enabled;
+ if (lwe) {
+ if (lwe == 0xFF)
+ lwe = dd->ipath_link_width_supported;
+ else if (lwe >= 16 || (lwe & ~dd->ipath_link_width_supported))
+ goto err;
+ set_link_width_enabled(dd, lwe);
+ }
+
+ /* Allow 2.5 or 5.0 Gbs. */
+ lse = pip->linkspeedactive_enabled & 0xF;
+ if (lse) {
+ if (lse == 15)
+ lse = dd->ipath_link_speed_supported;
+ else if (lse >= 8 || (lse & ~dd->ipath_link_speed_supported))
+ goto err;
+ set_link_speed_enabled(dd, lse);
+ }
+
+ /* Set link down default state. */
+ switch (pip->portphysstate_linkdown & 0xF) {
+ case 0: /* NOP */
+ break;
+ case 1: /* SLEEP */
+ if (set_linkdowndefaultstate(dd, 1))
+ goto err;
+ break;
+ case 2: /* POLL */
+ if (set_linkdowndefaultstate(dd, 0))
+ goto err;
+ break;
+ default:
+ goto err;
+ }
+
+ dev->mkeyprot = pip->mkeyprot_resv_lmc >> 6;
+ dev->vl_high_limit = pip->vl_high_limit;
+
+ switch ((pip->neighbormtu_mastersmsl >> 4) & 0xF) {
+ case IB_MTU_256:
+ mtu = 256;
+ break;
+ case IB_MTU_512:
+ mtu = 512;
+ break;
+ case IB_MTU_1024:
+ mtu = 1024;
+ break;
+ case IB_MTU_2048:
+ mtu = 2048;
+ break;
+ case IB_MTU_4096:
+ if (!ipath_mtu4096)
+ goto err;
+ mtu = 4096;
+ break;
+ default:
+ /* XXX We have already partially updated our state! */
+ goto err;
+ }
+ ipath_set_mtu(dd, mtu);
+
+ dev->sm_sl = pip->neighbormtu_mastersmsl & 0xF;
+
+ /* We only support VL0 */
+ if (((pip->operationalvl_pei_peo_fpi_fpo >> 4) & 0xF) > 1)
+ goto err;
+
+ if (pip->mkey_violations == 0)
+ dev->mkey_violations = 0;
+
+ /*
+ * Hardware counter can't be reset so snapshot and subtract
+ * later.
+ */
+ if (pip->pkey_violations == 0)
+ dev->z_pkey_violations = ipath_get_cr_errpkey(dd);
+
+ if (pip->qkey_violations == 0)
+ dev->qkey_violations = 0;
+
+ ore = pip->localphyerrors_overrunerrors;
+ if (set_phyerrthreshold(dd, (ore >> 4) & 0xF))
+ goto err;
+
+ if (set_overrunthreshold(dd, (ore & 0xF)))
+ goto err;
+
+ dev->subnet_timeout = pip->clientrereg_resv_subnetto & 0x1F;
+
+ if (pip->clientrereg_resv_subnetto & 0x80) {
+ clientrereg = 1;
+ event.event = IB_EVENT_CLIENT_REREGISTER;
+ ib_dispatch_event(&event);
+ }
+
+ /*
+ * Do the port state change now that the other link parameters
+ * have been set.
+ * Changing the port physical state only makes sense if the link
+ * is down or is being set to down.
+ */
+ state = pip->linkspeed_portstate & 0xF;
+ lstate = (pip->portphysstate_linkdown >> 4) & 0xF;
+ if (lstate && !(state == IB_PORT_DOWN || state == IB_PORT_NOP))
+ goto err;
+
+ /*
+ * Only state changes of DOWN, ARM, and ACTIVE are valid
+ * and must be in the correct state to take effect (see 7.2.6).
+ */
+ switch (state) {
+ case IB_PORT_NOP:
+ if (lstate == 0)
+ break;
+ /* FALLTHROUGH */
+ case IB_PORT_DOWN:
+ if (lstate == 0)
+ lstate = IPATH_IB_LINKDOWN_ONLY;
+ else if (lstate == 1)
+ lstate = IPATH_IB_LINKDOWN_SLEEP;
+ else if (lstate == 2)
+ lstate = IPATH_IB_LINKDOWN;
+ else if (lstate == 3)
+ lstate = IPATH_IB_LINKDOWN_DISABLE;
+ else
+ goto err;
+ ipath_set_linkstate(dd, lstate);
+ if (lstate == IPATH_IB_LINKDOWN_DISABLE) {
+ ret = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
+ goto done;
+ }
+ ipath_wait_linkstate(dd, IPATH_LINKINIT | IPATH_LINKARMED |
+ IPATH_LINKACTIVE, 1000);
+ break;
+ case IB_PORT_ARMED:
+ ipath_set_linkstate(dd, IPATH_IB_LINKARM);
+ break;
+ case IB_PORT_ACTIVE:
+ ipath_set_linkstate(dd, IPATH_IB_LINKACTIVE);
+ break;
+ default:
+ /* XXX We have already partially updated our state! */
+ goto err;
+ }
+
+ ret = recv_subn_get_portinfo(smp, ibdev, port);
+
+ if (clientrereg)
+ pip->clientrereg_resv_subnetto |= 0x80;
+
+ goto done;
+
+err:
+ smp->status |= IB_SMP_INVALID_FIELD;
+ ret = recv_subn_get_portinfo(smp, ibdev, port);
+
+done:
+ return ret;
+}
+
+/**
+ * rm_pkey - decrecment the reference count for the given PKEY
+ * @dd: the infinipath device
+ * @key: the PKEY index
+ *
+ * Return true if this was the last reference and the hardware table entry
+ * needs to be changed.
+ */
+static int rm_pkey(struct ipath_devdata *dd, u16 key)
+{
+ int i;
+ int ret;
+
+ for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
+ if (dd->ipath_pkeys[i] != key)
+ continue;
+ if (atomic_dec_and_test(&dd->ipath_pkeyrefs[i])) {
+ dd->ipath_pkeys[i] = 0;
+ ret = 1;
+ goto bail;
+ }
+ break;
+ }
+
+ ret = 0;
+
+bail:
+ return ret;
+}
+
+/**
+ * add_pkey - add the given PKEY to the hardware table
+ * @dd: the infinipath device
+ * @key: the PKEY
+ *
+ * Return an error code if unable to add the entry, zero if no change,
+ * or 1 if the hardware PKEY register needs to be updated.
+ */
+static int add_pkey(struct ipath_devdata *dd, u16 key)
+{
+ int i;
+ u16 lkey = key & 0x7FFF;
+ int any = 0;
+ int ret;
+
+ if (lkey == 0x7FFF) {
+ ret = 0;
+ goto bail;
+ }
+
+ /* Look for an empty slot or a matching PKEY. */
+ for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
+ if (!dd->ipath_pkeys[i]) {
+ any++;
+ continue;
+ }
+ /* If it matches exactly, try to increment the ref count */
+ if (dd->ipath_pkeys[i] == key) {
+ if (atomic_inc_return(&dd->ipath_pkeyrefs[i]) > 1) {
+ ret = 0;
+ goto bail;
+ }
+ /* Lost the race. Look for an empty slot below. */
+ atomic_dec(&dd->ipath_pkeyrefs[i]);
+ any++;
+ }
+ /*
+ * It makes no sense to have both the limited and unlimited
+ * PKEY set at the same time since the unlimited one will
+ * disable the limited one.
+ */
+ if ((dd->ipath_pkeys[i] & 0x7FFF) == lkey) {
+ ret = -EEXIST;
+ goto bail;
+ }
+ }
+ if (!any) {
+ ret = -EBUSY;
+ goto bail;
+ }
+ for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
+ if (!dd->ipath_pkeys[i] &&
+ atomic_inc_return(&dd->ipath_pkeyrefs[i]) == 1) {
+ /* for ipathstats, etc. */
+ ipath_stats.sps_pkeys[i] = lkey;
+ dd->ipath_pkeys[i] = key;
+ ret = 1;
+ goto bail;
+ }
+ }
+ ret = -EBUSY;
+
+bail:
+ return ret;
+}
+
+/**
+ * set_pkeys - set the PKEY table for port 0
+ * @dd: the infinipath device
+ * @pkeys: the PKEY table
+ */
+static int set_pkeys(struct ipath_devdata *dd, u16 *pkeys)
+{
+ struct ipath_portdata *pd;
+ int i;
+ int changed = 0;
+
+ pd = dd->ipath_pd[0];
+
+ for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) {
+ u16 key = pkeys[i];
+ u16 okey = pd->port_pkeys[i];
+
+ if (key == okey)
+ continue;
+ /*
+ * The value of this PKEY table entry is changing.
+ * Remove the old entry in the hardware's array of PKEYs.
+ */
+ if (okey & 0x7FFF)
+ changed |= rm_pkey(dd, okey);
+ if (key & 0x7FFF) {
+ int ret = add_pkey(dd, key);
+
+ if (ret < 0)
+ key = 0;
+ else
+ changed |= ret;
+ }
+ pd->port_pkeys[i] = key;
+ }
+ if (changed) {
+ u64 pkey;
+
+ pkey = (u64) dd->ipath_pkeys[0] |
+ ((u64) dd->ipath_pkeys[1] << 16) |
+ ((u64) dd->ipath_pkeys[2] << 32) |
+ ((u64) dd->ipath_pkeys[3] << 48);
+ ipath_cdbg(VERBOSE, "p0 new pkey reg %llx\n",
+ (unsigned long long) pkey);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_partitionkey,
+ pkey);
+ }
+ return 0;
+}
+
+static int recv_subn_set_pkeytable(struct ib_smp *smp,
+ struct ib_device *ibdev)
+{
+ u32 startpx = 32 * (be32_to_cpu(smp->attr_mod) & 0xffff);
+ __be16 *p = (__be16 *) smp->data;
+ u16 *q = (u16 *) smp->data;
+ struct ipath_ibdev *dev = to_idev(ibdev);
+ unsigned i, n = ipath_get_npkeys(dev->dd);
+
+ for (i = 0; i < n; i++)
+ q[i] = be16_to_cpu(p[i]);
+
+ if (startpx != 0 || set_pkeys(dev->dd, q) != 0)
+ smp->status |= IB_SMP_INVALID_FIELD;
+
+ return recv_subn_get_pkeytable(smp, ibdev);
+}
+
+#define IB_PMA_CLASS_PORT_INFO __constant_htons(0x0001)
+#define IB_PMA_PORT_SAMPLES_CONTROL __constant_htons(0x0010)
+#define IB_PMA_PORT_SAMPLES_RESULT __constant_htons(0x0011)
+#define IB_PMA_PORT_COUNTERS __constant_htons(0x0012)
+#define IB_PMA_PORT_COUNTERS_EXT __constant_htons(0x001D)
+#define IB_PMA_PORT_SAMPLES_RESULT_EXT __constant_htons(0x001E)
+
+struct ib_perf {
+ u8 base_version;
+ u8 mgmt_class;
+ u8 class_version;
+ u8 method;
+ __be16 status;
+ __be16 unused;
+ __be64 tid;
+ __be16 attr_id;
+ __be16 resv;
+ __be32 attr_mod;
+ u8 reserved[40];
+ u8 data[192];
+} __attribute__ ((packed));
+
+struct ib_pma_classportinfo {
+ u8 base_version;
+ u8 class_version;
+ __be16 cap_mask;
+ u8 reserved[3];
+ u8 resp_time_value; /* only lower 5 bits */
+ union ib_gid redirect_gid;
+ __be32 redirect_tc_sl_fl; /* 8, 4, 20 bits respectively */
+ __be16 redirect_lid;
+ __be16 redirect_pkey;
+ __be32 redirect_qp; /* only lower 24 bits */
+ __be32 redirect_qkey;
+ union ib_gid trap_gid;
+ __be32 trap_tc_sl_fl; /* 8, 4, 20 bits respectively */
+ __be16 trap_lid;
+ __be16 trap_pkey;
+ __be32 trap_hl_qp; /* 8, 24 bits respectively */
+ __be32 trap_qkey;
+} __attribute__ ((packed));
+
+struct ib_pma_portsamplescontrol {
+ u8 opcode;
+ u8 port_select;
+ u8 tick;
+ u8 counter_width; /* only lower 3 bits */
+ __be32 counter_mask0_9; /* 2, 10 * 3, bits */
+ __be16 counter_mask10_14; /* 1, 5 * 3, bits */
+ u8 sample_mechanisms;
+ u8 sample_status; /* only lower 2 bits */
+ __be64 option_mask;
+ __be64 vendor_mask;
+ __be32 sample_start;
+ __be32 sample_interval;
+ __be16 tag;
+ __be16 counter_select[15];
+} __attribute__ ((packed));
+
+struct ib_pma_portsamplesresult {
+ __be16 tag;
+ __be16 sample_status; /* only lower 2 bits */
+ __be32 counter[15];
+} __attribute__ ((packed));
+
+struct ib_pma_portsamplesresult_ext {
+ __be16 tag;
+ __be16 sample_status; /* only lower 2 bits */
+ __be32 extended_width; /* only upper 2 bits */
+ __be64 counter[15];
+} __attribute__ ((packed));
+
+struct ib_pma_portcounters {
+ u8 reserved;
+ u8 port_select;
+ __be16 counter_select;
+ __be16 symbol_error_counter;
+ u8 link_error_recovery_counter;
+ u8 link_downed_counter;
+ __be16 port_rcv_errors;
+ __be16 port_rcv_remphys_errors;
+ __be16 port_rcv_switch_relay_errors;
+ __be16 port_xmit_discards;
+ u8 port_xmit_constraint_errors;
+ u8 port_rcv_constraint_errors;
+ u8 reserved1;
+ u8 lli_ebor_errors; /* 4, 4, bits */
+ __be16 reserved2;
+ __be16 vl15_dropped;
+ __be32 port_xmit_data;
+ __be32 port_rcv_data;
+ __be32 port_xmit_packets;
+ __be32 port_rcv_packets;
+} __attribute__ ((packed));
+
+#define IB_PMA_SEL_SYMBOL_ERROR __constant_htons(0x0001)
+#define IB_PMA_SEL_LINK_ERROR_RECOVERY __constant_htons(0x0002)
+#define IB_PMA_SEL_LINK_DOWNED __constant_htons(0x0004)
+#define IB_PMA_SEL_PORT_RCV_ERRORS __constant_htons(0x0008)
+#define IB_PMA_SEL_PORT_RCV_REMPHYS_ERRORS __constant_htons(0x0010)
+#define IB_PMA_SEL_PORT_XMIT_DISCARDS __constant_htons(0x0040)
+#define IB_PMA_SEL_LOCAL_LINK_INTEGRITY_ERRORS __constant_htons(0x0200)
+#define IB_PMA_SEL_EXCESSIVE_BUFFER_OVERRUNS __constant_htons(0x0400)
+#define IB_PMA_SEL_PORT_VL15_DROPPED __constant_htons(0x0800)
+#define IB_PMA_SEL_PORT_XMIT_DATA __constant_htons(0x1000)
+#define IB_PMA_SEL_PORT_RCV_DATA __constant_htons(0x2000)
+#define IB_PMA_SEL_PORT_XMIT_PACKETS __constant_htons(0x4000)
+#define IB_PMA_SEL_PORT_RCV_PACKETS __constant_htons(0x8000)
+
+struct ib_pma_portcounters_ext {
+ u8 reserved;
+ u8 port_select;
+ __be16 counter_select;
+ __be32 reserved1;
+ __be64 port_xmit_data;
+ __be64 port_rcv_data;
+ __be64 port_xmit_packets;
+ __be64 port_rcv_packets;
+ __be64 port_unicast_xmit_packets;
+ __be64 port_unicast_rcv_packets;
+ __be64 port_multicast_xmit_packets;
+ __be64 port_multicast_rcv_packets;
+} __attribute__ ((packed));
+
+#define IB_PMA_SELX_PORT_XMIT_DATA __constant_htons(0x0001)
+#define IB_PMA_SELX_PORT_RCV_DATA __constant_htons(0x0002)
+#define IB_PMA_SELX_PORT_XMIT_PACKETS __constant_htons(0x0004)
+#define IB_PMA_SELX_PORT_RCV_PACKETS __constant_htons(0x0008)
+#define IB_PMA_SELX_PORT_UNI_XMIT_PACKETS __constant_htons(0x0010)
+#define IB_PMA_SELX_PORT_UNI_RCV_PACKETS __constant_htons(0x0020)
+#define IB_PMA_SELX_PORT_MULTI_XMIT_PACKETS __constant_htons(0x0040)
+#define IB_PMA_SELX_PORT_MULTI_RCV_PACKETS __constant_htons(0x0080)
+
+static int recv_pma_get_classportinfo(struct ib_perf *pmp)
+{
+ struct ib_pma_classportinfo *p =
+ (struct ib_pma_classportinfo *)pmp->data;
+
+ memset(pmp->data, 0, sizeof(pmp->data));
+
+ if (pmp->attr_mod != 0)
+ pmp->status |= IB_SMP_INVALID_FIELD;
+
+ /* Indicate AllPortSelect is valid (only one port anyway) */
+ p->cap_mask = __constant_cpu_to_be16(1 << 8);
+ p->base_version = 1;
+ p->class_version = 1;
+ /*
+ * Expected response time is 4.096 usec. * 2^18 == 1.073741824
+ * sec.
+ */
+ p->resp_time_value = 18;
+
+ return reply((struct ib_smp *) pmp);
+}
+
+/*
+ * The PortSamplesControl.CounterMasks field is an array of 3 bit fields
+ * which specify the N'th counter's capabilities. See ch. 16.1.3.2.
+ * We support 5 counters which only count the mandatory quantities.
+ */
+#define COUNTER_MASK(q, n) (q << ((9 - n) * 3))
+#define COUNTER_MASK0_9 \
+ __constant_cpu_to_be32(COUNTER_MASK(1, 0) | \
+ COUNTER_MASK(1, 1) | \
+ COUNTER_MASK(1, 2) | \
+ COUNTER_MASK(1, 3) | \
+ COUNTER_MASK(1, 4))
+
+static int recv_pma_get_portsamplescontrol(struct ib_perf *pmp,
+ struct ib_device *ibdev, u8 port)
+{
+ struct ib_pma_portsamplescontrol *p =
+ (struct ib_pma_portsamplescontrol *)pmp->data;
+ struct ipath_ibdev *dev = to_idev(ibdev);
+ struct ipath_cregs const *crp = dev->dd->ipath_cregs;
+ unsigned long flags;
+ u8 port_select = p->port_select;
+
+ memset(pmp->data, 0, sizeof(pmp->data));
+
+ p->port_select = port_select;
+ if (pmp->attr_mod != 0 ||
+ (port_select != port && port_select != 0xFF))
+ pmp->status |= IB_SMP_INVALID_FIELD;
+ /*
+ * Ticks are 10x the link transfer period which for 2.5Gbs is 4
+ * nsec. 0 == 4 nsec., 1 == 8 nsec., ..., 255 == 1020 nsec. Sample
+ * intervals are counted in ticks. Since we use Linux timers, that
+ * count in jiffies, we can't sample for less than 1000 ticks if HZ
+ * == 1000 (4000 ticks if HZ is 250). link_speed_active returns 2 for
+ * DDR, 1 for SDR, set the tick to 1 for DDR, 0 for SDR on chips that
+ * have hardware support for delaying packets.
+ */
+ if (crp->cr_psstat)
+ p->tick = dev->dd->ipath_link_speed_active - 1;
+ else
+ p->tick = 250; /* 1 usec. */
+ p->counter_width = 4; /* 32 bit counters */
+ p->counter_mask0_9 = COUNTER_MASK0_9;
+ spin_lock_irqsave(&dev->pending_lock, flags);
+ if (crp->cr_psstat)
+ p->sample_status = ipath_read_creg32(dev->dd, crp->cr_psstat);
+ else
+ p->sample_status = dev->pma_sample_status;
+ p->sample_start = cpu_to_be32(dev->pma_sample_start);
+ p->sample_interval = cpu_to_be32(dev->pma_sample_interval);
+ p->tag = cpu_to_be16(dev->pma_tag);
+ p->counter_select[0] = dev->pma_counter_select[0];
+ p->counter_select[1] = dev->pma_counter_select[1];
+ p->counter_select[2] = dev->pma_counter_select[2];
+ p->counter_select[3] = dev->pma_counter_select[3];
+ p->counter_select[4] = dev->pma_counter_select[4];
+ spin_unlock_irqrestore(&dev->pending_lock, flags);
+
+ return reply((struct ib_smp *) pmp);
+}
+
+static int recv_pma_set_portsamplescontrol(struct ib_perf *pmp,
+ struct ib_device *ibdev, u8 port)
+{
+ struct ib_pma_portsamplescontrol *p =
+ (struct ib_pma_portsamplescontrol *)pmp->data;
+ struct ipath_ibdev *dev = to_idev(ibdev);
+ struct ipath_cregs const *crp = dev->dd->ipath_cregs;
+ unsigned long flags;
+ u8 status;
+ int ret;
+
+ if (pmp->attr_mod != 0 ||
+ (p->port_select != port && p->port_select != 0xFF)) {
+ pmp->status |= IB_SMP_INVALID_FIELD;
+ ret = reply((struct ib_smp *) pmp);
+ goto bail;
+ }
+
+ spin_lock_irqsave(&dev->pending_lock, flags);
+ if (crp->cr_psstat)
+ status = ipath_read_creg32(dev->dd, crp->cr_psstat);
+ else
+ status = dev->pma_sample_status;
+ if (status == IB_PMA_SAMPLE_STATUS_DONE) {
+ dev->pma_sample_start = be32_to_cpu(p->sample_start);
+ dev->pma_sample_interval = be32_to_cpu(p->sample_interval);
+ dev->pma_tag = be16_to_cpu(p->tag);
+ dev->pma_counter_select[0] = p->counter_select[0];
+ dev->pma_counter_select[1] = p->counter_select[1];
+ dev->pma_counter_select[2] = p->counter_select[2];
+ dev->pma_counter_select[3] = p->counter_select[3];
+ dev->pma_counter_select[4] = p->counter_select[4];
+ if (crp->cr_psstat) {
+ ipath_write_creg(dev->dd, crp->cr_psinterval,
+ dev->pma_sample_interval);
+ ipath_write_creg(dev->dd, crp->cr_psstart,
+ dev->pma_sample_start);
+ } else
+ dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_STARTED;
+ }
+ spin_unlock_irqrestore(&dev->pending_lock, flags);
+
+ ret = recv_pma_get_portsamplescontrol(pmp, ibdev, port);
+
+bail:
+ return ret;
+}
+
+static u64 get_counter(struct ipath_ibdev *dev,
+ struct ipath_cregs const *crp,
+ __be16 sel)
+{
+ u64 ret;
+
+ switch (sel) {
+ case IB_PMA_PORT_XMIT_DATA:
+ ret = (crp->cr_psxmitdatacount) ?
+ ipath_read_creg32(dev->dd, crp->cr_psxmitdatacount) :
+ dev->ipath_sword;
+ break;
+ case IB_PMA_PORT_RCV_DATA:
+ ret = (crp->cr_psrcvdatacount) ?
+ ipath_read_creg32(dev->dd, crp->cr_psrcvdatacount) :
+ dev->ipath_rword;
+ break;
+ case IB_PMA_PORT_XMIT_PKTS:
+ ret = (crp->cr_psxmitpktscount) ?
+ ipath_read_creg32(dev->dd, crp->cr_psxmitpktscount) :
+ dev->ipath_spkts;
+ break;
+ case IB_PMA_PORT_RCV_PKTS:
+ ret = (crp->cr_psrcvpktscount) ?
+ ipath_read_creg32(dev->dd, crp->cr_psrcvpktscount) :
+ dev->ipath_rpkts;
+ break;
+ case IB_PMA_PORT_XMIT_WAIT:
+ ret = (crp->cr_psxmitwaitcount) ?
+ ipath_read_creg32(dev->dd, crp->cr_psxmitwaitcount) :
+ dev->ipath_xmit_wait;
+ break;
+ default:
+ ret = 0;
+ }
+
+ return ret;
+}
+
+static int recv_pma_get_portsamplesresult(struct ib_perf *pmp,
+ struct ib_device *ibdev)
+{
+ struct ib_pma_portsamplesresult *p =
+ (struct ib_pma_portsamplesresult *)pmp->data;
+ struct ipath_ibdev *dev = to_idev(ibdev);
+ struct ipath_cregs const *crp = dev->dd->ipath_cregs;
+ u8 status;
+ int i;
+
+ memset(pmp->data, 0, sizeof(pmp->data));
+ p->tag = cpu_to_be16(dev->pma_tag);
+ if (crp->cr_psstat)
+ status = ipath_read_creg32(dev->dd, crp->cr_psstat);
+ else
+ status = dev->pma_sample_status;
+ p->sample_status = cpu_to_be16(status);
+ for (i = 0; i < ARRAY_SIZE(dev->pma_counter_select); i++)
+ p->counter[i] = (status != IB_PMA_SAMPLE_STATUS_DONE) ? 0 :
+ cpu_to_be32(
+ get_counter(dev, crp, dev->pma_counter_select[i]));
+
+ return reply((struct ib_smp *) pmp);
+}
+
+static int recv_pma_get_portsamplesresult_ext(struct ib_perf *pmp,
+ struct ib_device *ibdev)
+{
+ struct ib_pma_portsamplesresult_ext *p =
+ (struct ib_pma_portsamplesresult_ext *)pmp->data;
+ struct ipath_ibdev *dev = to_idev(ibdev);
+ struct ipath_cregs const *crp = dev->dd->ipath_cregs;
+ u8 status;
+ int i;
+
+ memset(pmp->data, 0, sizeof(pmp->data));
+ p->tag = cpu_to_be16(dev->pma_tag);
+ if (crp->cr_psstat)
+ status = ipath_read_creg32(dev->dd, crp->cr_psstat);
+ else
+ status = dev->pma_sample_status;
+ p->sample_status = cpu_to_be16(status);
+ /* 64 bits */
+ p->extended_width = __constant_cpu_to_be32(0x80000000);
+ for (i = 0; i < ARRAY_SIZE(dev->pma_counter_select); i++)
+ p->counter[i] = (status != IB_PMA_SAMPLE_STATUS_DONE) ? 0 :
+ cpu_to_be64(
+ get_counter(dev, crp, dev->pma_counter_select[i]));
+
+ return reply((struct ib_smp *) pmp);
+}
+
+static int recv_pma_get_portcounters(struct ib_perf *pmp,
+ struct ib_device *ibdev, u8 port)
+{
+ struct ib_pma_portcounters *p = (struct ib_pma_portcounters *)
+ pmp->data;
+ struct ipath_ibdev *dev = to_idev(ibdev);
+ struct ipath_verbs_counters cntrs;
+ u8 port_select = p->port_select;
+
+ ipath_get_counters(dev->dd, &cntrs);
+
+ /* Adjust counters for any resets done. */
+ cntrs.symbol_error_counter -= dev->z_symbol_error_counter;
+ cntrs.link_error_recovery_counter -=
+ dev->z_link_error_recovery_counter;
+ cntrs.link_downed_counter -= dev->z_link_downed_counter;
+ cntrs.port_rcv_errors += dev->rcv_errors;
+ cntrs.port_rcv_errors -= dev->z_port_rcv_errors;
+ cntrs.port_rcv_remphys_errors -= dev->z_port_rcv_remphys_errors;
+ cntrs.port_xmit_discards -= dev->z_port_xmit_discards;
+ cntrs.port_xmit_data -= dev->z_port_xmit_data;
+ cntrs.port_rcv_data -= dev->z_port_rcv_data;
+ cntrs.port_xmit_packets -= dev->z_port_xmit_packets;
+ cntrs.port_rcv_packets -= dev->z_port_rcv_packets;
+ cntrs.local_link_integrity_errors -=
+ dev->z_local_link_integrity_errors;
+ cntrs.excessive_buffer_overrun_errors -=
+ dev->z_excessive_buffer_overrun_errors;
+ cntrs.vl15_dropped -= dev->z_vl15_dropped;
+ cntrs.vl15_dropped += dev->n_vl15_dropped;
+
+ memset(pmp->data, 0, sizeof(pmp->data));
+
+ p->port_select = port_select;
+ if (pmp->attr_mod != 0 ||
+ (port_select != port && port_select != 0xFF))
+ pmp->status |= IB_SMP_INVALID_FIELD;
+
+ if (cntrs.symbol_error_counter > 0xFFFFUL)
+ p->symbol_error_counter = __constant_cpu_to_be16(0xFFFF);
+ else
+ p->symbol_error_counter =
+ cpu_to_be16((u16)cntrs.symbol_error_counter);
+ if (cntrs.link_error_recovery_counter > 0xFFUL)
+ p->link_error_recovery_counter = 0xFF;
+ else
+ p->link_error_recovery_counter =
+ (u8)cntrs.link_error_recovery_counter;
+ if (cntrs.link_downed_counter > 0xFFUL)
+ p->link_downed_counter = 0xFF;
+ else
+ p->link_downed_counter = (u8)cntrs.link_downed_counter;
+ if (cntrs.port_rcv_errors > 0xFFFFUL)
+ p->port_rcv_errors = __constant_cpu_to_be16(0xFFFF);
+ else
+ p->port_rcv_errors =
+ cpu_to_be16((u16) cntrs.port_rcv_errors);
+ if (cntrs.port_rcv_remphys_errors > 0xFFFFUL)
+ p->port_rcv_remphys_errors = __constant_cpu_to_be16(0xFFFF);
+ else
+ p->port_rcv_remphys_errors =
+ cpu_to_be16((u16)cntrs.port_rcv_remphys_errors);
+ if (cntrs.port_xmit_discards > 0xFFFFUL)
+ p->port_xmit_discards = __constant_cpu_to_be16(0xFFFF);
+ else
+ p->port_xmit_discards =
+ cpu_to_be16((u16)cntrs.port_xmit_discards);
+ if (cntrs.local_link_integrity_errors > 0xFUL)
+ cntrs.local_link_integrity_errors = 0xFUL;
+ if (cntrs.excessive_buffer_overrun_errors > 0xFUL)
+ cntrs.excessive_buffer_overrun_errors = 0xFUL;
+ p->lli_ebor_errors = (cntrs.local_link_integrity_errors << 4) |
+ cntrs.excessive_buffer_overrun_errors;
+ if (cntrs.vl15_dropped > 0xFFFFUL)
+ p->vl15_dropped = __constant_cpu_to_be16(0xFFFF);
+ else
+ p->vl15_dropped = cpu_to_be16((u16)cntrs.vl15_dropped);
+ if (cntrs.port_xmit_data > 0xFFFFFFFFUL)
+ p->port_xmit_data = __constant_cpu_to_be32(0xFFFFFFFF);
+ else
+ p->port_xmit_data = cpu_to_be32((u32)cntrs.port_xmit_data);
+ if (cntrs.port_rcv_data > 0xFFFFFFFFUL)
+ p->port_rcv_data = __constant_cpu_to_be32(0xFFFFFFFF);
+ else
+ p->port_rcv_data = cpu_to_be32((u32)cntrs.port_rcv_data);
+ if (cntrs.port_xmit_packets > 0xFFFFFFFFUL)
+ p->port_xmit_packets = __constant_cpu_to_be32(0xFFFFFFFF);
+ else
+ p->port_xmit_packets =
+ cpu_to_be32((u32)cntrs.port_xmit_packets);
+ if (cntrs.port_rcv_packets > 0xFFFFFFFFUL)
+ p->port_rcv_packets = __constant_cpu_to_be32(0xFFFFFFFF);
+ else
+ p->port_rcv_packets =
+ cpu_to_be32((u32) cntrs.port_rcv_packets);
+
+ return reply((struct ib_smp *) pmp);
+}
+
+static int recv_pma_get_portcounters_ext(struct ib_perf *pmp,
+ struct ib_device *ibdev, u8 port)
+{
+ struct ib_pma_portcounters_ext *p =
+ (struct ib_pma_portcounters_ext *)pmp->data;
+ struct ipath_ibdev *dev = to_idev(ibdev);
+ u64 swords, rwords, spkts, rpkts, xwait;
+ u8 port_select = p->port_select;
+
+ ipath_snapshot_counters(dev->dd, &swords, &rwords, &spkts,
+ &rpkts, &xwait);
+
+ /* Adjust counters for any resets done. */
+ swords -= dev->z_port_xmit_data;
+ rwords -= dev->z_port_rcv_data;
+ spkts -= dev->z_port_xmit_packets;
+ rpkts -= dev->z_port_rcv_packets;
+
+ memset(pmp->data, 0, sizeof(pmp->data));
+
+ p->port_select = port_select;
+ if (pmp->attr_mod != 0 ||
+ (port_select != port && port_select != 0xFF))
+ pmp->status |= IB_SMP_INVALID_FIELD;
+
+ p->port_xmit_data = cpu_to_be64(swords);
+ p->port_rcv_data = cpu_to_be64(rwords);
+ p->port_xmit_packets = cpu_to_be64(spkts);
+ p->port_rcv_packets = cpu_to_be64(rpkts);
+ p->port_unicast_xmit_packets = cpu_to_be64(dev->n_unicast_xmit);
+ p->port_unicast_rcv_packets = cpu_to_be64(dev->n_unicast_rcv);
+ p->port_multicast_xmit_packets = cpu_to_be64(dev->n_multicast_xmit);
+ p->port_multicast_rcv_packets = cpu_to_be64(dev->n_multicast_rcv);
+
+ return reply((struct ib_smp *) pmp);
+}
+
+static int recv_pma_set_portcounters(struct ib_perf *pmp,
+ struct ib_device *ibdev, u8 port)
+{
+ struct ib_pma_portcounters *p = (struct ib_pma_portcounters *)
+ pmp->data;
+ struct ipath_ibdev *dev = to_idev(ibdev);
+ struct ipath_verbs_counters cntrs;
+
+ /*
+ * Since the HW doesn't support clearing counters, we save the
+ * current count and subtract it from future responses.
+ */
+ ipath_get_counters(dev->dd, &cntrs);
+
+ if (p->counter_select & IB_PMA_SEL_SYMBOL_ERROR)
+ dev->z_symbol_error_counter = cntrs.symbol_error_counter;
+
+ if (p->counter_select & IB_PMA_SEL_LINK_ERROR_RECOVERY)
+ dev->z_link_error_recovery_counter =
+ cntrs.link_error_recovery_counter;
+
+ if (p->counter_select & IB_PMA_SEL_LINK_DOWNED)
+ dev->z_link_downed_counter = cntrs.link_downed_counter;
+
+ if (p->counter_select & IB_PMA_SEL_PORT_RCV_ERRORS)
+ dev->z_port_rcv_errors =
+ cntrs.port_rcv_errors + dev->rcv_errors;
+
+ if (p->counter_select & IB_PMA_SEL_PORT_RCV_REMPHYS_ERRORS)
+ dev->z_port_rcv_remphys_errors =
+ cntrs.port_rcv_remphys_errors;
+
+ if (p->counter_select & IB_PMA_SEL_PORT_XMIT_DISCARDS)
+ dev->z_port_xmit_discards = cntrs.port_xmit_discards;
+
+ if (p->counter_select & IB_PMA_SEL_LOCAL_LINK_INTEGRITY_ERRORS)
+ dev->z_local_link_integrity_errors =
+ cntrs.local_link_integrity_errors;
+
+ if (p->counter_select & IB_PMA_SEL_EXCESSIVE_BUFFER_OVERRUNS)
+ dev->z_excessive_buffer_overrun_errors =
+ cntrs.excessive_buffer_overrun_errors;
+
+ if (p->counter_select & IB_PMA_SEL_PORT_VL15_DROPPED) {
+ dev->n_vl15_dropped = 0;
+ dev->z_vl15_dropped = cntrs.vl15_dropped;
+ }
+
+ if (p->counter_select & IB_PMA_SEL_PORT_XMIT_DATA)
+ dev->z_port_xmit_data = cntrs.port_xmit_data;
+
+ if (p->counter_select & IB_PMA_SEL_PORT_RCV_DATA)
+ dev->z_port_rcv_data = cntrs.port_rcv_data;
+
+ if (p->counter_select & IB_PMA_SEL_PORT_XMIT_PACKETS)
+ dev->z_port_xmit_packets = cntrs.port_xmit_packets;
+
+ if (p->counter_select & IB_PMA_SEL_PORT_RCV_PACKETS)
+ dev->z_port_rcv_packets = cntrs.port_rcv_packets;
+
+ return recv_pma_get_portcounters(pmp, ibdev, port);
+}
+
+static int recv_pma_set_portcounters_ext(struct ib_perf *pmp,
+ struct ib_device *ibdev, u8 port)
+{
+ struct ib_pma_portcounters *p = (struct ib_pma_portcounters *)
+ pmp->data;
+ struct ipath_ibdev *dev = to_idev(ibdev);
+ u64 swords, rwords, spkts, rpkts, xwait;
+
+ ipath_snapshot_counters(dev->dd, &swords, &rwords, &spkts,
+ &rpkts, &xwait);
+
+ if (p->counter_select & IB_PMA_SELX_PORT_XMIT_DATA)
+ dev->z_port_xmit_data = swords;
+
+ if (p->counter_select & IB_PMA_SELX_PORT_RCV_DATA)
+ dev->z_port_rcv_data = rwords;
+
+ if (p->counter_select & IB_PMA_SELX_PORT_XMIT_PACKETS)
+ dev->z_port_xmit_packets = spkts;
+
+ if (p->counter_select & IB_PMA_SELX_PORT_RCV_PACKETS)
+ dev->z_port_rcv_packets = rpkts;
+
+ if (p->counter_select & IB_PMA_SELX_PORT_UNI_XMIT_PACKETS)
+ dev->n_unicast_xmit = 0;
+
+ if (p->counter_select & IB_PMA_SELX_PORT_UNI_RCV_PACKETS)
+ dev->n_unicast_rcv = 0;
+
+ if (p->counter_select & IB_PMA_SELX_PORT_MULTI_XMIT_PACKETS)
+ dev->n_multicast_xmit = 0;
+
+ if (p->counter_select & IB_PMA_SELX_PORT_MULTI_RCV_PACKETS)
+ dev->n_multicast_rcv = 0;
+
+ return recv_pma_get_portcounters_ext(pmp, ibdev, port);
+}
+
+static int process_subn(struct ib_device *ibdev, int mad_flags,
+ u8 port_num, struct ib_mad *in_mad,
+ struct ib_mad *out_mad)
+{
+ struct ib_smp *smp = (struct ib_smp *)out_mad;
+ struct ipath_ibdev *dev = to_idev(ibdev);
+ int ret;
+
+ *out_mad = *in_mad;
+ if (smp->class_version != 1) {
+ smp->status |= IB_SMP_UNSUP_VERSION;
+ ret = reply(smp);
+ goto bail;
+ }
+
+ /* Is the mkey in the process of expiring? */
+ if (dev->mkey_lease_timeout &&
+ time_after_eq(jiffies, dev->mkey_lease_timeout)) {
+ /* Clear timeout and mkey protection field. */
+ dev->mkey_lease_timeout = 0;
+ dev->mkeyprot = 0;
+ }
+
+ /*
+ * M_Key checking depends on
+ * Portinfo:M_Key_protect_bits
+ */
+ if ((mad_flags & IB_MAD_IGNORE_MKEY) == 0 && dev->mkey != 0 &&
+ dev->mkey != smp->mkey &&
+ (smp->method == IB_MGMT_METHOD_SET ||
+ (smp->method == IB_MGMT_METHOD_GET &&
+ dev->mkeyprot >= 2))) {
+ if (dev->mkey_violations != 0xFFFF)
+ ++dev->mkey_violations;
+ if (dev->mkey_lease_timeout ||
+ dev->mkey_lease_period == 0) {
+ ret = IB_MAD_RESULT_SUCCESS |
+ IB_MAD_RESULT_CONSUMED;
+ goto bail;
+ }
+ dev->mkey_lease_timeout = jiffies +
+ dev->mkey_lease_period * HZ;
+ /* Future: Generate a trap notice. */
+ ret = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
+ goto bail;
+ } else if (dev->mkey_lease_timeout)
+ dev->mkey_lease_timeout = 0;
+
+ switch (smp->method) {
+ case IB_MGMT_METHOD_GET:
+ switch (smp->attr_id) {
+ case IB_SMP_ATTR_NODE_DESC:
+ ret = recv_subn_get_nodedescription(smp, ibdev);
+ goto bail;
+ case IB_SMP_ATTR_NODE_INFO:
+ ret = recv_subn_get_nodeinfo(smp, ibdev, port_num);
+ goto bail;
+ case IB_SMP_ATTR_GUID_INFO:
+ ret = recv_subn_get_guidinfo(smp, ibdev);
+ goto bail;
+ case IB_SMP_ATTR_PORT_INFO:
+ ret = recv_subn_get_portinfo(smp, ibdev, port_num);
+ goto bail;
+ case IB_SMP_ATTR_PKEY_TABLE:
+ ret = recv_subn_get_pkeytable(smp, ibdev);
+ goto bail;
+ case IB_SMP_ATTR_SM_INFO:
+ if (dev->port_cap_flags & IB_PORT_SM_DISABLED) {
+ ret = IB_MAD_RESULT_SUCCESS |
+ IB_MAD_RESULT_CONSUMED;
+ goto bail;
+ }
+ if (dev->port_cap_flags & IB_PORT_SM) {
+ ret = IB_MAD_RESULT_SUCCESS;
+ goto bail;
+ }
+ /* FALLTHROUGH */
+ default:
+ smp->status |= IB_SMP_UNSUP_METH_ATTR;
+ ret = reply(smp);
+ goto bail;
+ }
+
+ case IB_MGMT_METHOD_SET:
+ switch (smp->attr_id) {
+ case IB_SMP_ATTR_GUID_INFO:
+ ret = recv_subn_set_guidinfo(smp, ibdev);
+ goto bail;
+ case IB_SMP_ATTR_PORT_INFO:
+ ret = recv_subn_set_portinfo(smp, ibdev, port_num);
+ goto bail;
+ case IB_SMP_ATTR_PKEY_TABLE:
+ ret = recv_subn_set_pkeytable(smp, ibdev);
+ goto bail;
+ case IB_SMP_ATTR_SM_INFO:
+ if (dev->port_cap_flags & IB_PORT_SM_DISABLED) {
+ ret = IB_MAD_RESULT_SUCCESS |
+ IB_MAD_RESULT_CONSUMED;
+ goto bail;
+ }
+ if (dev->port_cap_flags & IB_PORT_SM) {
+ ret = IB_MAD_RESULT_SUCCESS;
+ goto bail;
+ }
+ /* FALLTHROUGH */
+ default:
+ smp->status |= IB_SMP_UNSUP_METH_ATTR;
+ ret = reply(smp);
+ goto bail;
+ }
+
+ case IB_MGMT_METHOD_TRAP:
+ case IB_MGMT_METHOD_REPORT:
+ case IB_MGMT_METHOD_REPORT_RESP:
+ case IB_MGMT_METHOD_TRAP_REPRESS:
+ case IB_MGMT_METHOD_GET_RESP:
+ /*
+ * The ib_mad module will call us to process responses
+ * before checking for other consumers.
+ * Just tell the caller to process it normally.
+ */
+ ret = IB_MAD_RESULT_SUCCESS;
+ goto bail;
+ default:
+ smp->status |= IB_SMP_UNSUP_METHOD;
+ ret = reply(smp);
+ }
+
+bail:
+ return ret;
+}
+
+static int process_perf(struct ib_device *ibdev, u8 port_num,
+ struct ib_mad *in_mad,
+ struct ib_mad *out_mad)
+{
+ struct ib_perf *pmp = (struct ib_perf *)out_mad;
+ int ret;
+
+ *out_mad = *in_mad;
+ if (pmp->class_version != 1) {
+ pmp->status |= IB_SMP_UNSUP_VERSION;
+ ret = reply((struct ib_smp *) pmp);
+ goto bail;
+ }
+
+ switch (pmp->method) {
+ case IB_MGMT_METHOD_GET:
+ switch (pmp->attr_id) {
+ case IB_PMA_CLASS_PORT_INFO:
+ ret = recv_pma_get_classportinfo(pmp);
+ goto bail;
+ case IB_PMA_PORT_SAMPLES_CONTROL:
+ ret = recv_pma_get_portsamplescontrol(pmp, ibdev,
+ port_num);
+ goto bail;
+ case IB_PMA_PORT_SAMPLES_RESULT:
+ ret = recv_pma_get_portsamplesresult(pmp, ibdev);
+ goto bail;
+ case IB_PMA_PORT_SAMPLES_RESULT_EXT:
+ ret = recv_pma_get_portsamplesresult_ext(pmp,
+ ibdev);
+ goto bail;
+ case IB_PMA_PORT_COUNTERS:
+ ret = recv_pma_get_portcounters(pmp, ibdev,
+ port_num);
+ goto bail;
+ case IB_PMA_PORT_COUNTERS_EXT:
+ ret = recv_pma_get_portcounters_ext(pmp, ibdev,
+ port_num);
+ goto bail;
+ default:
+ pmp->status |= IB_SMP_UNSUP_METH_ATTR;
+ ret = reply((struct ib_smp *) pmp);
+ goto bail;
+ }
+
+ case IB_MGMT_METHOD_SET:
+ switch (pmp->attr_id) {
+ case IB_PMA_PORT_SAMPLES_CONTROL:
+ ret = recv_pma_set_portsamplescontrol(pmp, ibdev,
+ port_num);
+ goto bail;
+ case IB_PMA_PORT_COUNTERS:
+ ret = recv_pma_set_portcounters(pmp, ibdev,
+ port_num);
+ goto bail;
+ case IB_PMA_PORT_COUNTERS_EXT:
+ ret = recv_pma_set_portcounters_ext(pmp, ibdev,
+ port_num);
+ goto bail;
+ default:
+ pmp->status |= IB_SMP_UNSUP_METH_ATTR;
+ ret = reply((struct ib_smp *) pmp);
+ goto bail;
+ }
+
+ case IB_MGMT_METHOD_GET_RESP:
+ /*
+ * The ib_mad module will call us to process responses
+ * before checking for other consumers.
+ * Just tell the caller to process it normally.
+ */
+ ret = IB_MAD_RESULT_SUCCESS;
+ goto bail;
+ default:
+ pmp->status |= IB_SMP_UNSUP_METHOD;
+ ret = reply((struct ib_smp *) pmp);
+ }
+
+bail:
+ return ret;
+}
+
+/**
+ * ipath_process_mad - process an incoming MAD packet
+ * @ibdev: the infiniband device this packet came in on
+ * @mad_flags: MAD flags
+ * @port_num: the port number this packet came in on
+ * @in_wc: the work completion entry for this packet
+ * @in_grh: the global route header for this packet
+ * @in_mad: the incoming MAD
+ * @out_mad: any outgoing MAD reply
+ *
+ * Returns IB_MAD_RESULT_SUCCESS if this is a MAD that we are not
+ * interested in processing.
+ *
+ * Note that the verbs framework has already done the MAD sanity checks,
+ * and hop count/pointer updating for IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE
+ * MADs.
+ *
+ * This is called by the ib_mad module.
+ */
+int ipath_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
+ struct ib_wc *in_wc, struct ib_grh *in_grh,
+ struct ib_mad *in_mad, struct ib_mad *out_mad)
+{
+ int ret;
+
+ switch (in_mad->mad_hdr.mgmt_class) {
+ case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
+ case IB_MGMT_CLASS_SUBN_LID_ROUTED:
+ ret = process_subn(ibdev, mad_flags, port_num,
+ in_mad, out_mad);
+ goto bail;
+ case IB_MGMT_CLASS_PERF_MGMT:
+ ret = process_perf(ibdev, port_num, in_mad, out_mad);
+ goto bail;
+ default:
+ ret = IB_MAD_RESULT_SUCCESS;
+ }
+
+bail:
+ return ret;
+}
diff --git a/drivers/infiniband/hw/ipath/ipath_mmap.c b/drivers/infiniband/hw/ipath/ipath_mmap.c
new file mode 100644
index 0000000..fa830e2
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_mmap.c
@@ -0,0 +1,173 @@
+/*
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <asm/pgtable.h>
+
+#include "ipath_verbs.h"
+
+/**
+ * ipath_release_mmap_info - free mmap info structure
+ * @ref: a pointer to the kref within struct ipath_mmap_info
+ */
+void ipath_release_mmap_info(struct kref *ref)
+{
+ struct ipath_mmap_info *ip =
+ container_of(ref, struct ipath_mmap_info, ref);
+ struct ipath_ibdev *dev = to_idev(ip->context->device);
+
+ spin_lock_irq(&dev->pending_lock);
+ list_del(&ip->pending_mmaps);
+ spin_unlock_irq(&dev->pending_lock);
+
+ vfree(ip->obj);
+ kfree(ip);
+}
+
+/*
+ * open and close keep track of how many times the CQ is mapped,
+ * to avoid releasing it.
+ */
+static void ipath_vma_open(struct vm_area_struct *vma)
+{
+ struct ipath_mmap_info *ip = vma->vm_private_data;
+
+ kref_get(&ip->ref);
+}
+
+static void ipath_vma_close(struct vm_area_struct *vma)
+{
+ struct ipath_mmap_info *ip = vma->vm_private_data;
+
+ kref_put(&ip->ref, ipath_release_mmap_info);
+}
+
+static struct vm_operations_struct ipath_vm_ops = {
+ .open = ipath_vma_open,
+ .close = ipath_vma_close,
+};
+
+/**
+ * ipath_mmap - create a new mmap region
+ * @context: the IB user context of the process making the mmap() call
+ * @vma: the VMA to be initialized
+ * Return zero if the mmap is OK. Otherwise, return an errno.
+ */
+int ipath_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
+{
+ struct ipath_ibdev *dev = to_idev(context->device);
+ unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
+ unsigned long size = vma->vm_end - vma->vm_start;
+ struct ipath_mmap_info *ip, *pp;
+ int ret = -EINVAL;
+
+ /*
+ * Search the device's list of objects waiting for a mmap call.
+ * Normally, this list is very short since a call to create a
+ * CQ, QP, or SRQ is soon followed by a call to mmap().
+ */
+ spin_lock_irq(&dev->pending_lock);
+ list_for_each_entry_safe(ip, pp, &dev->pending_mmaps,
+ pending_mmaps) {
+ /* Only the creator is allowed to mmap the object */
+ if (context != ip->context || (__u64) offset != ip->offset)
+ continue;
+ /* Don't allow a mmap larger than the object. */
+ if (size > ip->size)
+ break;
+
+ list_del_init(&ip->pending_mmaps);
+ spin_unlock_irq(&dev->pending_lock);
+
+ ret = remap_vmalloc_range(vma, ip->obj, 0);
+ if (ret)
+ goto done;
+ vma->vm_ops = &ipath_vm_ops;
+ vma->vm_private_data = ip;
+ ipath_vma_open(vma);
+ goto done;
+ }
+ spin_unlock_irq(&dev->pending_lock);
+done:
+ return ret;
+}
+
+/*
+ * Allocate information for ipath_mmap
+ */
+struct ipath_mmap_info *ipath_create_mmap_info(struct ipath_ibdev *dev,
+ u32 size,
+ struct ib_ucontext *context,
+ void *obj) {
+ struct ipath_mmap_info *ip;
+
+ ip = kmalloc(sizeof *ip, GFP_KERNEL);
+ if (!ip)
+ goto bail;
+
+ size = PAGE_ALIGN(size);
+
+ spin_lock_irq(&dev->mmap_offset_lock);
+ if (dev->mmap_offset == 0)
+ dev->mmap_offset = PAGE_SIZE;
+ ip->offset = dev->mmap_offset;
+ dev->mmap_offset += size;
+ spin_unlock_irq(&dev->mmap_offset_lock);
+
+ INIT_LIST_HEAD(&ip->pending_mmaps);
+ ip->size = size;
+ ip->context = context;
+ ip->obj = obj;
+ kref_init(&ip->ref);
+
+bail:
+ return ip;
+}
+
+void ipath_update_mmap_info(struct ipath_ibdev *dev,
+ struct ipath_mmap_info *ip,
+ u32 size, void *obj) {
+ size = PAGE_ALIGN(size);
+
+ spin_lock_irq(&dev->mmap_offset_lock);
+ if (dev->mmap_offset == 0)
+ dev->mmap_offset = PAGE_SIZE;
+ ip->offset = dev->mmap_offset;
+ dev->mmap_offset += size;
+ spin_unlock_irq(&dev->mmap_offset_lock);
+
+ ip->size = size;
+ ip->obj = obj;
+}
diff --git a/drivers/infiniband/hw/ipath/ipath_mr.c b/drivers/infiniband/hw/ipath/ipath_mr.c
new file mode 100644
index 0000000..9d343b7
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_mr.c
@@ -0,0 +1,428 @@
+/*
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <rdma/ib_umem.h>
+#include <rdma/ib_pack.h>
+#include <rdma/ib_smi.h>
+
+#include "ipath_verbs.h"
+
+/* Fast memory region */
+struct ipath_fmr {
+ struct ib_fmr ibfmr;
+ u8 page_shift;
+ struct ipath_mregion mr; /* must be last */
+};
+
+static inline struct ipath_fmr *to_ifmr(struct ib_fmr *ibfmr)
+{
+ return container_of(ibfmr, struct ipath_fmr, ibfmr);
+}
+
+/**
+ * ipath_get_dma_mr - get a DMA memory region
+ * @pd: protection domain for this memory region
+ * @acc: access flags
+ *
+ * Returns the memory region on success, otherwise returns an errno.
+ * Note that all DMA addresses should be created via the
+ * struct ib_dma_mapping_ops functions (see ipath_dma.c).
+ */
+struct ib_mr *ipath_get_dma_mr(struct ib_pd *pd, int acc)
+{
+ struct ipath_mr *mr;
+ struct ib_mr *ret;
+
+ mr = kzalloc(sizeof *mr, GFP_KERNEL);
+ if (!mr) {
+ ret = ERR_PTR(-ENOMEM);
+ goto bail;
+ }
+
+ mr->mr.access_flags = acc;
+ ret = &mr->ibmr;
+
+bail:
+ return ret;
+}
+
+static struct ipath_mr *alloc_mr(int count,
+ struct ipath_lkey_table *lk_table)
+{
+ struct ipath_mr *mr;
+ int m, i = 0;
+
+ /* Allocate struct plus pointers to first level page tables. */
+ m = (count + IPATH_SEGSZ - 1) / IPATH_SEGSZ;
+ mr = kmalloc(sizeof *mr + m * sizeof mr->mr.map[0], GFP_KERNEL);
+ if (!mr)
+ goto done;
+
+ /* Allocate first level page tables. */
+ for (; i < m; i++) {
+ mr->mr.map[i] = kmalloc(sizeof *mr->mr.map[0], GFP_KERNEL);
+ if (!mr->mr.map[i])
+ goto bail;
+ }
+ mr->mr.mapsz = m;
+
+ /*
+ * ib_reg_phys_mr() will initialize mr->ibmr except for
+ * lkey and rkey.
+ */
+ if (!ipath_alloc_lkey(lk_table, &mr->mr))
+ goto bail;
+ mr->ibmr.rkey = mr->ibmr.lkey = mr->mr.lkey;
+
+ goto done;
+
+bail:
+ while (i) {
+ i--;
+ kfree(mr->mr.map[i]);
+ }
+ kfree(mr);
+ mr = NULL;
+
+done:
+ return mr;
+}
+
+/**
+ * ipath_reg_phys_mr - register a physical memory region
+ * @pd: protection domain for this memory region
+ * @buffer_list: pointer to the list of physical buffers to register
+ * @num_phys_buf: the number of physical buffers to register
+ * @iova_start: the starting address passed over IB which maps to this MR
+ *
+ * Returns the memory region on success, otherwise returns an errno.
+ */
+struct ib_mr *ipath_reg_phys_mr(struct ib_pd *pd,
+ struct ib_phys_buf *buffer_list,
+ int num_phys_buf, int acc, u64 *iova_start)
+{
+ struct ipath_mr *mr;
+ int n, m, i;
+ struct ib_mr *ret;
+
+ mr = alloc_mr(num_phys_buf, &to_idev(pd->device)->lk_table);
+ if (mr == NULL) {
+ ret = ERR_PTR(-ENOMEM);
+ goto bail;
+ }
+
+ mr->mr.pd = pd;
+ mr->mr.user_base = *iova_start;
+ mr->mr.iova = *iova_start;
+ mr->mr.length = 0;
+ mr->mr.offset = 0;
+ mr->mr.access_flags = acc;
+ mr->mr.max_segs = num_phys_buf;
+ mr->umem = NULL;
+
+ m = 0;
+ n = 0;
+ for (i = 0; i < num_phys_buf; i++) {
+ mr->mr.map[m]->segs[n].vaddr = (void *) buffer_list[i].addr;
+ mr->mr.map[m]->segs[n].length = buffer_list[i].size;
+ mr->mr.length += buffer_list[i].size;
+ n++;
+ if (n == IPATH_SEGSZ) {
+ m++;
+ n = 0;
+ }
+ }
+
+ ret = &mr->ibmr;
+
+bail:
+ return ret;
+}
+
+/**
+ * ipath_reg_user_mr - register a userspace memory region
+ * @pd: protection domain for this memory region
+ * @start: starting userspace address
+ * @length: length of region to register
+ * @virt_addr: virtual address to use (from HCA's point of view)
+ * @mr_access_flags: access flags for this memory region
+ * @udata: unused by the InfiniPath driver
+ *
+ * Returns the memory region on success, otherwise returns an errno.
+ */
+struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 virt_addr, int mr_access_flags,
+ struct ib_udata *udata)
+{
+ struct ipath_mr *mr;
+ struct ib_umem *umem;
+ struct ib_umem_chunk *chunk;
+ int n, m, i;
+ struct ib_mr *ret;
+
+ if (length == 0) {
+ ret = ERR_PTR(-EINVAL);
+ goto bail;
+ }
+
+ umem = ib_umem_get(pd->uobject->context, start, length,
+ mr_access_flags, 0);
+ if (IS_ERR(umem))
+ return (void *) umem;
+
+ n = 0;
+ list_for_each_entry(chunk, &umem->chunk_list, list)
+ n += chunk->nents;
+
+ mr = alloc_mr(n, &to_idev(pd->device)->lk_table);
+ if (!mr) {
+ ret = ERR_PTR(-ENOMEM);
+ ib_umem_release(umem);
+ goto bail;
+ }
+
+ mr->mr.pd = pd;
+ mr->mr.user_base = start;
+ mr->mr.iova = virt_addr;
+ mr->mr.length = length;
+ mr->mr.offset = umem->offset;
+ mr->mr.access_flags = mr_access_flags;
+ mr->mr.max_segs = n;
+ mr->umem = umem;
+
+ m = 0;
+ n = 0;
+ list_for_each_entry(chunk, &umem->chunk_list, list) {
+ for (i = 0; i < chunk->nents; i++) {
+ void *vaddr;
+
+ vaddr = page_address(sg_page(&chunk->page_list[i]));
+ if (!vaddr) {
+ ret = ERR_PTR(-EINVAL);
+ goto bail;
+ }
+ mr->mr.map[m]->segs[n].vaddr = vaddr;
+ mr->mr.map[m]->segs[n].length = umem->page_size;
+ n++;
+ if (n == IPATH_SEGSZ) {
+ m++;
+ n = 0;
+ }
+ }
+ }
+ ret = &mr->ibmr;
+
+bail:
+ return ret;
+}
+
+/**
+ * ipath_dereg_mr - unregister and free a memory region
+ * @ibmr: the memory region to free
+ *
+ * Returns 0 on success.
+ *
+ * Note that this is called to free MRs created by ipath_get_dma_mr()
+ * or ipath_reg_user_mr().
+ */
+int ipath_dereg_mr(struct ib_mr *ibmr)
+{
+ struct ipath_mr *mr = to_imr(ibmr);
+ int i;
+
+ ipath_free_lkey(&to_idev(ibmr->device)->lk_table, ibmr->lkey);
+ i = mr->mr.mapsz;
+ while (i) {
+ i--;
+ kfree(mr->mr.map[i]);
+ }
+
+ if (mr->umem)
+ ib_umem_release(mr->umem);
+
+ kfree(mr);
+ return 0;
+}
+
+/**
+ * ipath_alloc_fmr - allocate a fast memory region
+ * @pd: the protection domain for this memory region
+ * @mr_access_flags: access flags for this memory region
+ * @fmr_attr: fast memory region attributes
+ *
+ * Returns the memory region on success, otherwise returns an errno.
+ */
+struct ib_fmr *ipath_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
+ struct ib_fmr_attr *fmr_attr)
+{
+ struct ipath_fmr *fmr;
+ int m, i = 0;
+ struct ib_fmr *ret;
+
+ /* Allocate struct plus pointers to first level page tables. */
+ m = (fmr_attr->max_pages + IPATH_SEGSZ - 1) / IPATH_SEGSZ;
+ fmr = kmalloc(sizeof *fmr + m * sizeof fmr->mr.map[0], GFP_KERNEL);
+ if (!fmr)
+ goto bail;
+
+ /* Allocate first level page tables. */
+ for (; i < m; i++) {
+ fmr->mr.map[i] = kmalloc(sizeof *fmr->mr.map[0],
+ GFP_KERNEL);
+ if (!fmr->mr.map[i])
+ goto bail;
+ }
+ fmr->mr.mapsz = m;
+
+ /*
+ * ib_alloc_fmr() will initialize fmr->ibfmr except for lkey &
+ * rkey.
+ */
+ if (!ipath_alloc_lkey(&to_idev(pd->device)->lk_table, &fmr->mr))
+ goto bail;
+ fmr->ibfmr.rkey = fmr->ibfmr.lkey = fmr->mr.lkey;
+ /*
+ * Resources are allocated but no valid mapping (RKEY can't be
+ * used).
+ */
+ fmr->mr.pd = pd;
+ fmr->mr.user_base = 0;
+ fmr->mr.iova = 0;
+ fmr->mr.length = 0;
+ fmr->mr.offset = 0;
+ fmr->mr.access_flags = mr_access_flags;
+ fmr->mr.max_segs = fmr_attr->max_pages;
+ fmr->page_shift = fmr_attr->page_shift;
+
+ ret = &fmr->ibfmr;
+ goto done;
+
+bail:
+ while (i)
+ kfree(fmr->mr.map[--i]);
+ kfree(fmr);
+ ret = ERR_PTR(-ENOMEM);
+
+done:
+ return ret;
+}
+
+/**
+ * ipath_map_phys_fmr - set up a fast memory region
+ * @ibmfr: the fast memory region to set up
+ * @page_list: the list of pages to associate with the fast memory region
+ * @list_len: the number of pages to associate with the fast memory region
+ * @iova: the virtual address of the start of the fast memory region
+ *
+ * This may be called from interrupt context.
+ */
+
+int ipath_map_phys_fmr(struct ib_fmr *ibfmr, u64 * page_list,
+ int list_len, u64 iova)
+{
+ struct ipath_fmr *fmr = to_ifmr(ibfmr);
+ struct ipath_lkey_table *rkt;
+ unsigned long flags;
+ int m, n, i;
+ u32 ps;
+ int ret;
+
+ if (list_len > fmr->mr.max_segs) {
+ ret = -EINVAL;
+ goto bail;
+ }
+ rkt = &to_idev(ibfmr->device)->lk_table;
+ spin_lock_irqsave(&rkt->lock, flags);
+ fmr->mr.user_base = iova;
+ fmr->mr.iova = iova;
+ ps = 1 << fmr->page_shift;
+ fmr->mr.length = list_len * ps;
+ m = 0;
+ n = 0;
+ ps = 1 << fmr->page_shift;
+ for (i = 0; i < list_len; i++) {
+ fmr->mr.map[m]->segs[n].vaddr = (void *) page_list[i];
+ fmr->mr.map[m]->segs[n].length = ps;
+ if (++n == IPATH_SEGSZ) {
+ m++;
+ n = 0;
+ }
+ }
+ spin_unlock_irqrestore(&rkt->lock, flags);
+ ret = 0;
+
+bail:
+ return ret;
+}
+
+/**
+ * ipath_unmap_fmr - unmap fast memory regions
+ * @fmr_list: the list of fast memory regions to unmap
+ *
+ * Returns 0 on success.
+ */
+int ipath_unmap_fmr(struct list_head *fmr_list)
+{
+ struct ipath_fmr *fmr;
+ struct ipath_lkey_table *rkt;
+ unsigned long flags;
+
+ list_for_each_entry(fmr, fmr_list, ibfmr.list) {
+ rkt = &to_idev(fmr->ibfmr.device)->lk_table;
+ spin_lock_irqsave(&rkt->lock, flags);
+ fmr->mr.user_base = 0;
+ fmr->mr.iova = 0;
+ fmr->mr.length = 0;
+ spin_unlock_irqrestore(&rkt->lock, flags);
+ }
+ return 0;
+}
+
+/**
+ * ipath_dealloc_fmr - deallocate a fast memory region
+ * @ibfmr: the fast memory region to deallocate
+ *
+ * Returns 0 on success.
+ */
+int ipath_dealloc_fmr(struct ib_fmr *ibfmr)
+{
+ struct ipath_fmr *fmr = to_ifmr(ibfmr);
+ int i;
+
+ ipath_free_lkey(&to_idev(ibfmr->device)->lk_table, ibfmr->lkey);
+ i = fmr->mr.mapsz;
+ while (i)
+ kfree(fmr->mr.map[--i]);
+ kfree(fmr);
+ return 0;
+}
diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c
new file mode 100644
index 0000000..4715911
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_qp.c
@@ -0,0 +1,1062 @@
+/*
+ * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/err.h>
+#include <linux/vmalloc.h>
+
+#include "ipath_verbs.h"
+#include "ipath_kernel.h"
+
+#define BITS_PER_PAGE (PAGE_SIZE*BITS_PER_BYTE)
+#define BITS_PER_PAGE_MASK (BITS_PER_PAGE-1)
+#define mk_qpn(qpt, map, off) (((map) - (qpt)->map) * BITS_PER_PAGE + \
+ (off))
+#define find_next_offset(map, off) find_next_zero_bit((map)->page, \
+ BITS_PER_PAGE, off)
+
+/*
+ * Convert the AETH credit code into the number of credits.
+ */
+static u32 credit_table[31] = {
+ 0, /* 0 */
+ 1, /* 1 */
+ 2, /* 2 */
+ 3, /* 3 */
+ 4, /* 4 */
+ 6, /* 5 */
+ 8, /* 6 */
+ 12, /* 7 */
+ 16, /* 8 */
+ 24, /* 9 */
+ 32, /* A */
+ 48, /* B */
+ 64, /* C */
+ 96, /* D */
+ 128, /* E */
+ 192, /* F */
+ 256, /* 10 */
+ 384, /* 11 */
+ 512, /* 12 */
+ 768, /* 13 */
+ 1024, /* 14 */
+ 1536, /* 15 */
+ 2048, /* 16 */
+ 3072, /* 17 */
+ 4096, /* 18 */
+ 6144, /* 19 */
+ 8192, /* 1A */
+ 12288, /* 1B */
+ 16384, /* 1C */
+ 24576, /* 1D */
+ 32768 /* 1E */
+};
+
+
+static void get_map_page(struct ipath_qp_table *qpt, struct qpn_map *map)
+{
+ unsigned long page = get_zeroed_page(GFP_KERNEL);
+ unsigned long flags;
+
+ /*
+ * Free the page if someone raced with us installing it.
+ */
+
+ spin_lock_irqsave(&qpt->lock, flags);
+ if (map->page)
+ free_page(page);
+ else
+ map->page = (void *)page;
+ spin_unlock_irqrestore(&qpt->lock, flags);
+}
+
+
+static int alloc_qpn(struct ipath_qp_table *qpt, enum ib_qp_type type)
+{
+ u32 i, offset, max_scan, qpn;
+ struct qpn_map *map;
+ u32 ret = -1;
+
+ if (type == IB_QPT_SMI)
+ ret = 0;
+ else if (type == IB_QPT_GSI)
+ ret = 1;
+
+ if (ret != -1) {
+ map = &qpt->map[0];
+ if (unlikely(!map->page)) {
+ get_map_page(qpt, map);
+ if (unlikely(!map->page)) {
+ ret = -ENOMEM;
+ goto bail;
+ }
+ }
+ if (!test_and_set_bit(ret, map->page))
+ atomic_dec(&map->n_free);
+ else
+ ret = -EBUSY;
+ goto bail;
+ }
+
+ qpn = qpt->last + 1;
+ if (qpn >= QPN_MAX)
+ qpn = 2;
+ offset = qpn & BITS_PER_PAGE_MASK;
+ map = &qpt->map[qpn / BITS_PER_PAGE];
+ max_scan = qpt->nmaps - !offset;
+ for (i = 0;;) {
+ if (unlikely(!map->page)) {
+ get_map_page(qpt, map);
+ if (unlikely(!map->page))
+ break;
+ }
+ if (likely(atomic_read(&map->n_free))) {
+ do {
+ if (!test_and_set_bit(offset, map->page)) {
+ atomic_dec(&map->n_free);
+ qpt->last = qpn;
+ ret = qpn;
+ goto bail;
+ }
+ offset = find_next_offset(map, offset);
+ qpn = mk_qpn(qpt, map, offset);
+ /*
+ * This test differs from alloc_pidmap().
+ * If find_next_offset() does find a zero
+ * bit, we don't need to check for QPN
+ * wrapping around past our starting QPN.
+ * We just need to be sure we don't loop
+ * forever.
+ */
+ } while (offset < BITS_PER_PAGE && qpn < QPN_MAX);
+ }
+ /*
+ * In order to keep the number of pages allocated to a
+ * minimum, we scan the all existing pages before increasing
+ * the size of the bitmap table.
+ */
+ if (++i > max_scan) {
+ if (qpt->nmaps == QPNMAP_ENTRIES)
+ break;
+ map = &qpt->map[qpt->nmaps++];
+ offset = 0;
+ } else if (map < &qpt->map[qpt->nmaps]) {
+ ++map;
+ offset = 0;
+ } else {
+ map = &qpt->map[0];
+ offset = 2;
+ }
+ qpn = mk_qpn(qpt, map, offset);
+ }
+
+ ret = -ENOMEM;
+
+bail:
+ return ret;
+}
+
+static void free_qpn(struct ipath_qp_table *qpt, u32 qpn)
+{
+ struct qpn_map *map;
+
+ map = qpt->map + qpn / BITS_PER_PAGE;
+ if (map->page)
+ clear_bit(qpn & BITS_PER_PAGE_MASK, map->page);
+ atomic_inc(&map->n_free);
+}
+
+/**
+ * ipath_alloc_qpn - allocate a QP number
+ * @qpt: the QP table
+ * @qp: the QP
+ * @type: the QP type (IB_QPT_SMI and IB_QPT_GSI are special)
+ *
+ * Allocate the next available QPN and put the QP into the hash table.
+ * The hash table holds a reference to the QP.
+ */
+static int ipath_alloc_qpn(struct ipath_qp_table *qpt, struct ipath_qp *qp,
+ enum ib_qp_type type)
+{
+ unsigned long flags;
+ int ret;
+
+ ret = alloc_qpn(qpt, type);
+ if (ret < 0)
+ goto bail;
+ qp->ibqp.qp_num = ret;
+
+ /* Add the QP to the hash table. */
+ spin_lock_irqsave(&qpt->lock, flags);
+
+ ret %= qpt->max;
+ qp->next = qpt->table[ret];
+ qpt->table[ret] = qp;
+ atomic_inc(&qp->refcount);
+
+ spin_unlock_irqrestore(&qpt->lock, flags);
+ ret = 0;
+
+bail:
+ return ret;
+}
+
+/**
+ * ipath_free_qp - remove a QP from the QP table
+ * @qpt: the QP table
+ * @qp: the QP to remove
+ *
+ * Remove the QP from the table so it can't be found asynchronously by
+ * the receive interrupt routine.
+ */
+static void ipath_free_qp(struct ipath_qp_table *qpt, struct ipath_qp *qp)
+{
+ struct ipath_qp *q, **qpp;
+ unsigned long flags;
+
+ spin_lock_irqsave(&qpt->lock, flags);
+
+ /* Remove QP from the hash table. */
+ qpp = &qpt->table[qp->ibqp.qp_num % qpt->max];
+ for (; (q = *qpp) != NULL; qpp = &q->next) {
+ if (q == qp) {
+ *qpp = qp->next;
+ qp->next = NULL;
+ atomic_dec(&qp->refcount);
+ break;
+ }
+ }
+
+ spin_unlock_irqrestore(&qpt->lock, flags);
+}
+
+/**
+ * ipath_free_all_qps - check for QPs still in use
+ * @qpt: the QP table to empty
+ *
+ * There should not be any QPs still in use.
+ * Free memory for table.
+ */
+unsigned ipath_free_all_qps(struct ipath_qp_table *qpt)
+{
+ unsigned long flags;
+ struct ipath_qp *qp;
+ u32 n, qp_inuse = 0;
+
+ spin_lock_irqsave(&qpt->lock, flags);
+ for (n = 0; n < qpt->max; n++) {
+ qp = qpt->table[n];
+ qpt->table[n] = NULL;
+
+ for (; qp; qp = qp->next)
+ qp_inuse++;
+ }
+ spin_unlock_irqrestore(&qpt->lock, flags);
+
+ for (n = 0; n < ARRAY_SIZE(qpt->map); n++)
+ if (qpt->map[n].page)
+ free_page((unsigned long) qpt->map[n].page);
+ return qp_inuse;
+}
+
+/**
+ * ipath_lookup_qpn - return the QP with the given QPN
+ * @qpt: the QP table
+ * @qpn: the QP number to look up
+ *
+ * The caller is responsible for decrementing the QP reference count
+ * when done.
+ */
+struct ipath_qp *ipath_lookup_qpn(struct ipath_qp_table *qpt, u32 qpn)
+{
+ unsigned long flags;
+ struct ipath_qp *qp;
+
+ spin_lock_irqsave(&qpt->lock, flags);
+
+ for (qp = qpt->table[qpn % qpt->max]; qp; qp = qp->next) {
+ if (qp->ibqp.qp_num == qpn) {
+ atomic_inc(&qp->refcount);
+ break;
+ }
+ }
+
+ spin_unlock_irqrestore(&qpt->lock, flags);
+ return qp;
+}
+
+/**
+ * ipath_reset_qp - initialize the QP state to the reset state
+ * @qp: the QP to reset
+ * @type: the QP type
+ */
+static void ipath_reset_qp(struct ipath_qp *qp, enum ib_qp_type type)
+{
+ qp->remote_qpn = 0;
+ qp->qkey = 0;
+ qp->qp_access_flags = 0;
+ atomic_set(&qp->s_dma_busy, 0);
+ qp->s_flags &= IPATH_S_SIGNAL_REQ_WR;
+ qp->s_hdrwords = 0;
+ qp->s_wqe = NULL;
+ qp->s_pkt_delay = 0;
+ qp->s_draining = 0;
+ qp->s_psn = 0;
+ qp->r_psn = 0;
+ qp->r_msn = 0;
+ if (type == IB_QPT_RC) {
+ qp->s_state = IB_OPCODE_RC_SEND_LAST;
+ qp->r_state = IB_OPCODE_RC_SEND_LAST;
+ } else {
+ qp->s_state = IB_OPCODE_UC_SEND_LAST;
+ qp->r_state = IB_OPCODE_UC_SEND_LAST;
+ }
+ qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
+ qp->r_nak_state = 0;
+ qp->r_aflags = 0;
+ qp->r_flags = 0;
+ qp->s_rnr_timeout = 0;
+ qp->s_head = 0;
+ qp->s_tail = 0;
+ qp->s_cur = 0;
+ qp->s_last = 0;
+ qp->s_ssn = 1;
+ qp->s_lsn = 0;
+ memset(qp->s_ack_queue, 0, sizeof(qp->s_ack_queue));
+ qp->r_head_ack_queue = 0;
+ qp->s_tail_ack_queue = 0;
+ qp->s_num_rd_atomic = 0;
+ if (qp->r_rq.wq) {
+ qp->r_rq.wq->head = 0;
+ qp->r_rq.wq->tail = 0;
+ }
+}
+
+/**
+ * ipath_error_qp - put a QP into the error state
+ * @qp: the QP to put into the error state
+ * @err: the receive completion error to signal if a RWQE is active
+ *
+ * Flushes both send and receive work queues.
+ * Returns true if last WQE event should be generated.
+ * The QP s_lock should be held and interrupts disabled.
+ * If we are already in error state, just return.
+ */
+
+int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)
+{
+ struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
+ struct ib_wc wc;
+ int ret = 0;
+
+ if (qp->state == IB_QPS_ERR)
+ goto bail;
+
+ qp->state = IB_QPS_ERR;
+
+ spin_lock(&dev->pending_lock);
+ if (!list_empty(&qp->timerwait))
+ list_del_init(&qp->timerwait);
+ if (!list_empty(&qp->piowait))
+ list_del_init(&qp->piowait);
+ spin_unlock(&dev->pending_lock);
+
+ /* Schedule the sending tasklet to drain the send work queue. */
+ if (qp->s_last != qp->s_head)
+ ipath_schedule_send(qp);
+
+ memset(&wc, 0, sizeof(wc));
+ wc.qp = &qp->ibqp;
+ wc.opcode = IB_WC_RECV;
+
+ if (test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags)) {
+ wc.wr_id = qp->r_wr_id;
+ wc.status = err;
+ ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
+ }
+ wc.status = IB_WC_WR_FLUSH_ERR;
+
+ if (qp->r_rq.wq) {
+ struct ipath_rwq *wq;
+ u32 head;
+ u32 tail;
+
+ spin_lock(&qp->r_rq.lock);
+
+ /* sanity check pointers before trusting them */
+ wq = qp->r_rq.wq;
+ head = wq->head;
+ if (head >= qp->r_rq.size)
+ head = 0;
+ tail = wq->tail;
+ if (tail >= qp->r_rq.size)
+ tail = 0;
+ while (tail != head) {
+ wc.wr_id = get_rwqe_ptr(&qp->r_rq, tail)->wr_id;
+ if (++tail >= qp->r_rq.size)
+ tail = 0;
+ ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
+ }
+ wq->tail = tail;
+
+ spin_unlock(&qp->r_rq.lock);
+ } else if (qp->ibqp.event_handler)
+ ret = 1;
+
+bail:
+ return ret;
+}
+
+/**
+ * ipath_modify_qp - modify the attributes of a queue pair
+ * @ibqp: the queue pair who's attributes we're modifying
+ * @attr: the new attributes
+ * @attr_mask: the mask of attributes to modify
+ * @udata: user data for ipathverbs.so
+ *
+ * Returns 0 on success, otherwise returns an errno.
+ */
+int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata)
+{
+ struct ipath_ibdev *dev = to_idev(ibqp->device);
+ struct ipath_qp *qp = to_iqp(ibqp);
+ enum ib_qp_state cur_state, new_state;
+ int lastwqe = 0;
+ int ret;
+
+ spin_lock_irq(&qp->s_lock);
+
+ cur_state = attr_mask & IB_QP_CUR_STATE ?
+ attr->cur_qp_state : qp->state;
+ new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
+
+ if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
+ attr_mask))
+ goto inval;
+
+ if (attr_mask & IB_QP_AV) {
+ if (attr->ah_attr.dlid == 0 ||
+ attr->ah_attr.dlid >= IPATH_MULTICAST_LID_BASE)
+ goto inval;
+
+ if ((attr->ah_attr.ah_flags & IB_AH_GRH) &&
+ (attr->ah_attr.grh.sgid_index > 1))
+ goto inval;
+ }
+
+ if (attr_mask & IB_QP_PKEY_INDEX)
+ if (attr->pkey_index >= ipath_get_npkeys(dev->dd))
+ goto inval;
+
+ if (attr_mask & IB_QP_MIN_RNR_TIMER)
+ if (attr->min_rnr_timer > 31)
+ goto inval;
+
+ if (attr_mask & IB_QP_PORT)
+ if (attr->port_num == 0 ||
+ attr->port_num > ibqp->device->phys_port_cnt)
+ goto inval;
+
+ /*
+ * don't allow invalid Path MTU values or greater than 2048
+ * unless we are configured for a 4KB MTU
+ */
+ if ((attr_mask & IB_QP_PATH_MTU) &&
+ (ib_mtu_enum_to_int(attr->path_mtu) == -1 ||
+ (attr->path_mtu > IB_MTU_2048 && !ipath_mtu4096)))
+ goto inval;
+
+ if (attr_mask & IB_QP_PATH_MIG_STATE)
+ if (attr->path_mig_state != IB_MIG_MIGRATED &&
+ attr->path_mig_state != IB_MIG_REARM)
+ goto inval;
+
+ if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
+ if (attr->max_dest_rd_atomic > IPATH_MAX_RDMA_ATOMIC)
+ goto inval;
+
+ switch (new_state) {
+ case IB_QPS_RESET:
+ if (qp->state != IB_QPS_RESET) {
+ qp->state = IB_QPS_RESET;
+ spin_lock(&dev->pending_lock);
+ if (!list_empty(&qp->timerwait))
+ list_del_init(&qp->timerwait);
+ if (!list_empty(&qp->piowait))
+ list_del_init(&qp->piowait);
+ spin_unlock(&dev->pending_lock);
+ qp->s_flags &= ~IPATH_S_ANY_WAIT;
+ spin_unlock_irq(&qp->s_lock);
+ /* Stop the sending tasklet */
+ tasklet_kill(&qp->s_task);
+ wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy));
+ spin_lock_irq(&qp->s_lock);
+ }
+ ipath_reset_qp(qp, ibqp->qp_type);
+ break;
+
+ case IB_QPS_SQD:
+ qp->s_draining = qp->s_last != qp->s_cur;
+ qp->state = new_state;
+ break;
+
+ case IB_QPS_SQE:
+ if (qp->ibqp.qp_type == IB_QPT_RC)
+ goto inval;
+ qp->state = new_state;
+ break;
+
+ case IB_QPS_ERR:
+ lastwqe = ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR);
+ break;
+
+ default:
+ qp->state = new_state;
+ break;
+ }
+
+ if (attr_mask & IB_QP_PKEY_INDEX)
+ qp->s_pkey_index = attr->pkey_index;
+
+ if (attr_mask & IB_QP_DEST_QPN)
+ qp->remote_qpn = attr->dest_qp_num;
+
+ if (attr_mask & IB_QP_SQ_PSN) {
+ qp->s_psn = qp->s_next_psn = attr->sq_psn;
+ qp->s_last_psn = qp->s_next_psn - 1;
+ }
+
+ if (attr_mask & IB_QP_RQ_PSN)
+ qp->r_psn = attr->rq_psn;
+
+ if (attr_mask & IB_QP_ACCESS_FLAGS)
+ qp->qp_access_flags = attr->qp_access_flags;
+
+ if (attr_mask & IB_QP_AV) {
+ qp->remote_ah_attr = attr->ah_attr;
+ qp->s_dmult = ipath_ib_rate_to_mult(attr->ah_attr.static_rate);
+ }
+
+ if (attr_mask & IB_QP_PATH_MTU)
+ qp->path_mtu = attr->path_mtu;
+
+ if (attr_mask & IB_QP_RETRY_CNT)
+ qp->s_retry = qp->s_retry_cnt = attr->retry_cnt;
+
+ if (attr_mask & IB_QP_RNR_RETRY) {
+ qp->s_rnr_retry = attr->rnr_retry;
+ if (qp->s_rnr_retry > 7)
+ qp->s_rnr_retry = 7;
+ qp->s_rnr_retry_cnt = qp->s_rnr_retry;
+ }
+
+ if (attr_mask & IB_QP_MIN_RNR_TIMER)
+ qp->r_min_rnr_timer = attr->min_rnr_timer;
+
+ if (attr_mask & IB_QP_TIMEOUT)
+ qp->timeout = attr->timeout;
+
+ if (attr_mask & IB_QP_QKEY)
+ qp->qkey = attr->qkey;
+
+ if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
+ qp->r_max_rd_atomic = attr->max_dest_rd_atomic;
+
+ if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC)
+ qp->s_max_rd_atomic = attr->max_rd_atomic;
+
+ spin_unlock_irq(&qp->s_lock);
+
+ if (lastwqe) {
+ struct ib_event ev;
+
+ ev.device = qp->ibqp.device;
+ ev.element.qp = &qp->ibqp;
+ ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
+ qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
+ }
+ ret = 0;
+ goto bail;
+
+inval:
+ spin_unlock_irq(&qp->s_lock);
+ ret = -EINVAL;
+
+bail:
+ return ret;
+}
+
+int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_qp_init_attr *init_attr)
+{
+ struct ipath_qp *qp = to_iqp(ibqp);
+
+ attr->qp_state = qp->state;
+ attr->cur_qp_state = attr->qp_state;
+ attr->path_mtu = qp->path_mtu;
+ attr->path_mig_state = 0;
+ attr->qkey = qp->qkey;
+ attr->rq_psn = qp->r_psn;
+ attr->sq_psn = qp->s_next_psn;
+ attr->dest_qp_num = qp->remote_qpn;
+ attr->qp_access_flags = qp->qp_access_flags;
+ attr->cap.max_send_wr = qp->s_size - 1;
+ attr->cap.max_recv_wr = qp->ibqp.srq ? 0 : qp->r_rq.size - 1;
+ attr->cap.max_send_sge = qp->s_max_sge;
+ attr->cap.max_recv_sge = qp->r_rq.max_sge;
+ attr->cap.max_inline_data = 0;
+ attr->ah_attr = qp->remote_ah_attr;
+ memset(&attr->alt_ah_attr, 0, sizeof(attr->alt_ah_attr));
+ attr->pkey_index = qp->s_pkey_index;
+ attr->alt_pkey_index = 0;
+ attr->en_sqd_async_notify = 0;
+ attr->sq_draining = qp->s_draining;
+ attr->max_rd_atomic = qp->s_max_rd_atomic;
+ attr->max_dest_rd_atomic = qp->r_max_rd_atomic;
+ attr->min_rnr_timer = qp->r_min_rnr_timer;
+ attr->port_num = 1;
+ attr->timeout = qp->timeout;
+ attr->retry_cnt = qp->s_retry_cnt;
+ attr->rnr_retry = qp->s_rnr_retry_cnt;
+ attr->alt_port_num = 0;
+ attr->alt_timeout = 0;
+
+ init_attr->event_handler = qp->ibqp.event_handler;
+ init_attr->qp_context = qp->ibqp.qp_context;
+ init_attr->send_cq = qp->ibqp.send_cq;
+ init_attr->recv_cq = qp->ibqp.recv_cq;
+ init_attr->srq = qp->ibqp.srq;
+ init_attr->cap = attr->cap;
+ if (qp->s_flags & IPATH_S_SIGNAL_REQ_WR)
+ init_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
+ else
+ init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
+ init_attr->qp_type = qp->ibqp.qp_type;
+ init_attr->port_num = 1;
+ return 0;
+}
+
+/**
+ * ipath_compute_aeth - compute the AETH (syndrome + MSN)
+ * @qp: the queue pair to compute the AETH for
+ *
+ * Returns the AETH.
+ */
+__be32 ipath_compute_aeth(struct ipath_qp *qp)
+{
+ u32 aeth = qp->r_msn & IPATH_MSN_MASK;
+
+ if (qp->ibqp.srq) {
+ /*
+ * Shared receive queues don't generate credits.
+ * Set the credit field to the invalid value.
+ */
+ aeth |= IPATH_AETH_CREDIT_INVAL << IPATH_AETH_CREDIT_SHIFT;
+ } else {
+ u32 min, max, x;
+ u32 credits;
+ struct ipath_rwq *wq = qp->r_rq.wq;
+ u32 head;
+ u32 tail;
+
+ /* sanity check pointers before trusting them */
+ head = wq->head;
+ if (head >= qp->r_rq.size)
+ head = 0;
+ tail = wq->tail;
+ if (tail >= qp->r_rq.size)
+ tail = 0;
+ /*
+ * Compute the number of credits available (RWQEs).
+ * XXX Not holding the r_rq.lock here so there is a small
+ * chance that the pair of reads are not atomic.
+ */
+ credits = head - tail;
+ if ((int)credits < 0)
+ credits += qp->r_rq.size;
+ /*
+ * Binary search the credit table to find the code to
+ * use.
+ */
+ min = 0;
+ max = 31;
+ for (;;) {
+ x = (min + max) / 2;
+ if (credit_table[x] == credits)
+ break;
+ if (credit_table[x] > credits)
+ max = x;
+ else if (min == x)
+ break;
+ else
+ min = x;
+ }
+ aeth |= x << IPATH_AETH_CREDIT_SHIFT;
+ }
+ return cpu_to_be32(aeth);
+}
+
+/**
+ * ipath_create_qp - create a queue pair for a device
+ * @ibpd: the protection domain who's device we create the queue pair for
+ * @init_attr: the attributes of the queue pair
+ * @udata: unused by InfiniPath
+ *
+ * Returns the queue pair on success, otherwise returns an errno.
+ *
+ * Called by the ib_create_qp() core verbs function.
+ */
+struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+ struct ipath_qp *qp;
+ int err;
+ struct ipath_swqe *swq = NULL;
+ struct ipath_ibdev *dev;
+ size_t sz;
+ struct ib_qp *ret;
+
+ if (init_attr->create_flags) {
+ ret = ERR_PTR(-EINVAL);
+ goto bail;
+ }
+
+ if (init_attr->cap.max_send_sge > ib_ipath_max_sges ||
+ init_attr->cap.max_send_wr > ib_ipath_max_qp_wrs) {
+ ret = ERR_PTR(-EINVAL);
+ goto bail;
+ }
+
+ /* Check receive queue parameters if no SRQ is specified. */
+ if (!init_attr->srq) {
+ if (init_attr->cap.max_recv_sge > ib_ipath_max_sges ||
+ init_attr->cap.max_recv_wr > ib_ipath_max_qp_wrs) {
+ ret = ERR_PTR(-EINVAL);
+ goto bail;
+ }
+ if (init_attr->cap.max_send_sge +
+ init_attr->cap.max_send_wr +
+ init_attr->cap.max_recv_sge +
+ init_attr->cap.max_recv_wr == 0) {
+ ret = ERR_PTR(-EINVAL);
+ goto bail;
+ }
+ }
+
+ switch (init_attr->qp_type) {
+ case IB_QPT_UC:
+ case IB_QPT_RC:
+ case IB_QPT_UD:
+ case IB_QPT_SMI:
+ case IB_QPT_GSI:
+ sz = sizeof(struct ipath_sge) *
+ init_attr->cap.max_send_sge +
+ sizeof(struct ipath_swqe);
+ swq = vmalloc((init_attr->cap.max_send_wr + 1) * sz);
+ if (swq == NULL) {
+ ret = ERR_PTR(-ENOMEM);
+ goto bail;
+ }
+ sz = sizeof(*qp);
+ if (init_attr->srq) {
+ struct ipath_srq *srq = to_isrq(init_attr->srq);
+
+ sz += sizeof(*qp->r_sg_list) *
+ srq->rq.max_sge;
+ } else
+ sz += sizeof(*qp->r_sg_list) *
+ init_attr->cap.max_recv_sge;
+ qp = kmalloc(sz, GFP_KERNEL);
+ if (!qp) {
+ ret = ERR_PTR(-ENOMEM);
+ goto bail_swq;
+ }
+ if (init_attr->srq) {
+ sz = 0;
+ qp->r_rq.size = 0;
+ qp->r_rq.max_sge = 0;
+ qp->r_rq.wq = NULL;
+ init_attr->cap.max_recv_wr = 0;
+ init_attr->cap.max_recv_sge = 0;
+ } else {
+ qp->r_rq.size = init_attr->cap.max_recv_wr + 1;
+ qp->r_rq.max_sge = init_attr->cap.max_recv_sge;
+ sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) +
+ sizeof(struct ipath_rwqe);
+ qp->r_rq.wq = vmalloc_user(sizeof(struct ipath_rwq) +
+ qp->r_rq.size * sz);
+ if (!qp->r_rq.wq) {
+ ret = ERR_PTR(-ENOMEM);
+ goto bail_qp;
+ }
+ }
+
+ /*
+ * ib_create_qp() will initialize qp->ibqp
+ * except for qp->ibqp.qp_num.
+ */
+ spin_lock_init(&qp->s_lock);
+ spin_lock_init(&qp->r_rq.lock);
+ atomic_set(&qp->refcount, 0);
+ init_waitqueue_head(&qp->wait);
+ init_waitqueue_head(&qp->wait_dma);
+ tasklet_init(&qp->s_task, ipath_do_send, (unsigned long)qp);
+ INIT_LIST_HEAD(&qp->piowait);
+ INIT_LIST_HEAD(&qp->timerwait);
+ qp->state = IB_QPS_RESET;
+ qp->s_wq = swq;
+ qp->s_size = init_attr->cap.max_send_wr + 1;
+ qp->s_max_sge = init_attr->cap.max_send_sge;
+ if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR)
+ qp->s_flags = IPATH_S_SIGNAL_REQ_WR;
+ else
+ qp->s_flags = 0;
+ dev = to_idev(ibpd->device);
+ err = ipath_alloc_qpn(&dev->qp_table, qp,
+ init_attr->qp_type);
+ if (err) {
+ ret = ERR_PTR(err);
+ vfree(qp->r_rq.wq);
+ goto bail_qp;
+ }
+ qp->ip = NULL;
+ qp->s_tx = NULL;
+ ipath_reset_qp(qp, init_attr->qp_type);
+ break;
+
+ default:
+ /* Don't support raw QPs */
+ ret = ERR_PTR(-ENOSYS);
+ goto bail;
+ }
+
+ init_attr->cap.max_inline_data = 0;
+
+ /*
+ * Return the address of the RWQ as the offset to mmap.
+ * See ipath_mmap() for details.
+ */
+ if (udata && udata->outlen >= sizeof(__u64)) {
+ if (!qp->r_rq.wq) {
+ __u64 offset = 0;
+
+ err = ib_copy_to_udata(udata, &offset,
+ sizeof(offset));
+ if (err) {
+ ret = ERR_PTR(err);
+ goto bail_ip;
+ }
+ } else {
+ u32 s = sizeof(struct ipath_rwq) +
+ qp->r_rq.size * sz;
+
+ qp->ip =
+ ipath_create_mmap_info(dev, s,
+ ibpd->uobject->context,
+ qp->r_rq.wq);
+ if (!qp->ip) {
+ ret = ERR_PTR(-ENOMEM);
+ goto bail_ip;
+ }
+
+ err = ib_copy_to_udata(udata, &(qp->ip->offset),
+ sizeof(qp->ip->offset));
+ if (err) {
+ ret = ERR_PTR(err);
+ goto bail_ip;
+ }
+ }
+ }
+
+ spin_lock(&dev->n_qps_lock);
+ if (dev->n_qps_allocated == ib_ipath_max_qps) {
+ spin_unlock(&dev->n_qps_lock);
+ ret = ERR_PTR(-ENOMEM);
+ goto bail_ip;
+ }
+
+ dev->n_qps_allocated++;
+ spin_unlock(&dev->n_qps_lock);
+
+ if (qp->ip) {
+ spin_lock_irq(&dev->pending_lock);
+ list_add(&qp->ip->pending_mmaps, &dev->pending_mmaps);
+ spin_unlock_irq(&dev->pending_lock);
+ }
+
+ ret = &qp->ibqp;
+ goto bail;
+
+bail_ip:
+ if (qp->ip)
+ kref_put(&qp->ip->ref, ipath_release_mmap_info);
+ else
+ vfree(qp->r_rq.wq);
+ ipath_free_qp(&dev->qp_table, qp);
+ free_qpn(&dev->qp_table, qp->ibqp.qp_num);
+bail_qp:
+ kfree(qp);
+bail_swq:
+ vfree(swq);
+bail:
+ return ret;
+}
+
+/**
+ * ipath_destroy_qp - destroy a queue pair
+ * @ibqp: the queue pair to destroy
+ *
+ * Returns 0 on success.
+ *
+ * Note that this can be called while the QP is actively sending or
+ * receiving!
+ */
+int ipath_destroy_qp(struct ib_qp *ibqp)
+{
+ struct ipath_qp *qp = to_iqp(ibqp);
+ struct ipath_ibdev *dev = to_idev(ibqp->device);
+
+ /* Make sure HW and driver activity is stopped. */
+ spin_lock_irq(&qp->s_lock);
+ if (qp->state != IB_QPS_RESET) {
+ qp->state = IB_QPS_RESET;
+ spin_lock(&dev->pending_lock);
+ if (!list_empty(&qp->timerwait))
+ list_del_init(&qp->timerwait);
+ if (!list_empty(&qp->piowait))
+ list_del_init(&qp->piowait);
+ spin_unlock(&dev->pending_lock);
+ qp->s_flags &= ~IPATH_S_ANY_WAIT;
+ spin_unlock_irq(&qp->s_lock);
+ /* Stop the sending tasklet */
+ tasklet_kill(&qp->s_task);
+ wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy));
+ } else
+ spin_unlock_irq(&qp->s_lock);
+
+ ipath_free_qp(&dev->qp_table, qp);
+
+ if (qp->s_tx) {
+ atomic_dec(&qp->refcount);
+ if (qp->s_tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEBUF)
+ kfree(qp->s_tx->txreq.map_addr);
+ spin_lock_irq(&dev->pending_lock);
+ list_add(&qp->s_tx->txreq.list, &dev->txreq_free);
+ spin_unlock_irq(&dev->pending_lock);
+ qp->s_tx = NULL;
+ }
+
+ wait_event(qp->wait, !atomic_read(&qp->refcount));
+
+ /* all user's cleaned up, mark it available */
+ free_qpn(&dev->qp_table, qp->ibqp.qp_num);
+ spin_lock(&dev->n_qps_lock);
+ dev->n_qps_allocated--;
+ spin_unlock(&dev->n_qps_lock);
+
+ if (qp->ip)
+ kref_put(&qp->ip->ref, ipath_release_mmap_info);
+ else
+ vfree(qp->r_rq.wq);
+ vfree(qp->s_wq);
+ kfree(qp);
+ return 0;
+}
+
+/**
+ * ipath_init_qp_table - initialize the QP table for a device
+ * @idev: the device who's QP table we're initializing
+ * @size: the size of the QP table
+ *
+ * Returns 0 on success, otherwise returns an errno.
+ */
+int ipath_init_qp_table(struct ipath_ibdev *idev, int size)
+{
+ int i;
+ int ret;
+
+ idev->qp_table.last = 1; /* QPN 0 and 1 are special. */
+ idev->qp_table.max = size;
+ idev->qp_table.nmaps = 1;
+ idev->qp_table.table = kzalloc(size * sizeof(*idev->qp_table.table),
+ GFP_KERNEL);
+ if (idev->qp_table.table == NULL) {
+ ret = -ENOMEM;
+ goto bail;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(idev->qp_table.map); i++) {
+ atomic_set(&idev->qp_table.map[i].n_free, BITS_PER_PAGE);
+ idev->qp_table.map[i].page = NULL;
+ }
+
+ ret = 0;
+
+bail:
+ return ret;
+}
+
+/**
+ * ipath_get_credit - flush the send work queue of a QP
+ * @qp: the qp who's send work queue to flush
+ * @aeth: the Acknowledge Extended Transport Header
+ *
+ * The QP s_lock should be held.
+ */
+void ipath_get_credit(struct ipath_qp *qp, u32 aeth)
+{
+ u32 credit = (aeth >> IPATH_AETH_CREDIT_SHIFT) & IPATH_AETH_CREDIT_MASK;
+
+ /*
+ * If the credit is invalid, we can send
+ * as many packets as we like. Otherwise, we have to
+ * honor the credit field.
+ */
+ if (credit == IPATH_AETH_CREDIT_INVAL)
+ qp->s_lsn = (u32) -1;
+ else if (qp->s_lsn != (u32) -1) {
+ /* Compute new LSN (i.e., MSN + credit) */
+ credit = (aeth + credit_table[credit]) & IPATH_MSN_MASK;
+ if (ipath_cmp24(credit, qp->s_lsn) > 0)
+ qp->s_lsn = credit;
+ }
+
+ /* Restart sending if it was blocked due to lack of credits. */
+ if ((qp->s_flags & IPATH_S_WAIT_SSN_CREDIT) &&
+ qp->s_cur != qp->s_head &&
+ (qp->s_lsn == (u32) -1 ||
+ ipath_cmp24(get_swqe_ptr(qp, qp->s_cur)->ssn,
+ qp->s_lsn + 1) <= 0))
+ ipath_schedule_send(qp);
+}
diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c
new file mode 100644
index 0000000..7b93cda
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_rc.c
@@ -0,0 +1,1970 @@
+/*
+ * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/io.h>
+
+#include "ipath_verbs.h"
+#include "ipath_kernel.h"
+
+/* cut down ridiculously long IB macro names */
+#define OP(x) IB_OPCODE_RC_##x
+
+static u32 restart_sge(struct ipath_sge_state *ss, struct ipath_swqe *wqe,
+ u32 psn, u32 pmtu)
+{
+ u32 len;
+
+ len = ((psn - wqe->psn) & IPATH_PSN_MASK) * pmtu;
+ ss->sge = wqe->sg_list[0];
+ ss->sg_list = wqe->sg_list + 1;
+ ss->num_sge = wqe->wr.num_sge;
+ ipath_skip_sge(ss, len);
+ return wqe->length - len;
+}
+
+/**
+ * ipath_init_restart- initialize the qp->s_sge after a restart
+ * @qp: the QP who's SGE we're restarting
+ * @wqe: the work queue to initialize the QP's SGE from
+ *
+ * The QP s_lock should be held and interrupts disabled.
+ */
+static void ipath_init_restart(struct ipath_qp *qp, struct ipath_swqe *wqe)
+{
+ struct ipath_ibdev *dev;
+
+ qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn,
+ ib_mtu_enum_to_int(qp->path_mtu));
+ dev = to_idev(qp->ibqp.device);
+ spin_lock(&dev->pending_lock);
+ if (list_empty(&qp->timerwait))
+ list_add_tail(&qp->timerwait,
+ &dev->pending[dev->pending_index]);
+ spin_unlock(&dev->pending_lock);
+}
+
+/**
+ * ipath_make_rc_ack - construct a response packet (ACK, NAK, or RDMA read)
+ * @qp: a pointer to the QP
+ * @ohdr: a pointer to the IB header being constructed
+ * @pmtu: the path MTU
+ *
+ * Return 1 if constructed; otherwise, return 0.
+ * Note that we are in the responder's side of the QP context.
+ * Note the QP s_lock must be held.
+ */
+static int ipath_make_rc_ack(struct ipath_ibdev *dev, struct ipath_qp *qp,
+ struct ipath_other_headers *ohdr, u32 pmtu)
+{
+ struct ipath_ack_entry *e;
+ u32 hwords;
+ u32 len;
+ u32 bth0;
+ u32 bth2;
+
+ /* Don't send an ACK if we aren't supposed to. */
+ if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
+ goto bail;
+
+ /* header size in 32-bit words LRH+BTH = (8+12)/4. */
+ hwords = 5;
+
+ switch (qp->s_ack_state) {
+ case OP(RDMA_READ_RESPONSE_LAST):
+ case OP(RDMA_READ_RESPONSE_ONLY):
+ case OP(ATOMIC_ACKNOWLEDGE):
+ /*
+ * We can increment the tail pointer now that the last
+ * response has been sent instead of only being
+ * constructed.
+ */
+ if (++qp->s_tail_ack_queue > IPATH_MAX_RDMA_ATOMIC)
+ qp->s_tail_ack_queue = 0;
+ /* FALLTHROUGH */
+ case OP(SEND_ONLY):
+ case OP(ACKNOWLEDGE):
+ /* Check for no next entry in the queue. */
+ if (qp->r_head_ack_queue == qp->s_tail_ack_queue) {
+ if (qp->s_flags & IPATH_S_ACK_PENDING)
+ goto normal;
+ qp->s_ack_state = OP(ACKNOWLEDGE);
+ goto bail;
+ }
+
+ e = &qp->s_ack_queue[qp->s_tail_ack_queue];
+ if (e->opcode == OP(RDMA_READ_REQUEST)) {
+ /* Copy SGE state in case we need to resend */
+ qp->s_ack_rdma_sge = e->rdma_sge;
+ qp->s_cur_sge = &qp->s_ack_rdma_sge;
+ len = e->rdma_sge.sge.sge_length;
+ if (len > pmtu) {
+ len = pmtu;
+ qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST);
+ } else {
+ qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY);
+ e->sent = 1;
+ }
+ ohdr->u.aeth = ipath_compute_aeth(qp);
+ hwords++;
+ qp->s_ack_rdma_psn = e->psn;
+ bth2 = qp->s_ack_rdma_psn++ & IPATH_PSN_MASK;
+ } else {
+ /* COMPARE_SWAP or FETCH_ADD */
+ qp->s_cur_sge = NULL;
+ len = 0;
+ qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE);
+ ohdr->u.at.aeth = ipath_compute_aeth(qp);
+ ohdr->u.at.atomic_ack_eth[0] =
+ cpu_to_be32(e->atomic_data >> 32);
+ ohdr->u.at.atomic_ack_eth[1] =
+ cpu_to_be32(e->atomic_data);
+ hwords += sizeof(ohdr->u.at) / sizeof(u32);
+ bth2 = e->psn;
+ e->sent = 1;
+ }
+ bth0 = qp->s_ack_state << 24;
+ break;
+
+ case OP(RDMA_READ_RESPONSE_FIRST):
+ qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE);
+ /* FALLTHROUGH */
+ case OP(RDMA_READ_RESPONSE_MIDDLE):
+ len = qp->s_ack_rdma_sge.sge.sge_length;
+ if (len > pmtu)
+ len = pmtu;
+ else {
+ ohdr->u.aeth = ipath_compute_aeth(qp);
+ hwords++;
+ qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
+ qp->s_ack_queue[qp->s_tail_ack_queue].sent = 1;
+ }
+ bth0 = qp->s_ack_state << 24;
+ bth2 = qp->s_ack_rdma_psn++ & IPATH_PSN_MASK;
+ break;
+
+ default:
+ normal:
+ /*
+ * Send a regular ACK.
+ * Set the s_ack_state so we wait until after sending
+ * the ACK before setting s_ack_state to ACKNOWLEDGE
+ * (see above).
+ */
+ qp->s_ack_state = OP(SEND_ONLY);
+ qp->s_flags &= ~IPATH_S_ACK_PENDING;
+ qp->s_cur_sge = NULL;
+ if (qp->s_nak_state)
+ ohdr->u.aeth =
+ cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) |
+ (qp->s_nak_state <<
+ IPATH_AETH_CREDIT_SHIFT));
+ else
+ ohdr->u.aeth = ipath_compute_aeth(qp);
+ hwords++;
+ len = 0;
+ bth0 = OP(ACKNOWLEDGE) << 24;
+ bth2 = qp->s_ack_psn & IPATH_PSN_MASK;
+ }
+ qp->s_hdrwords = hwords;
+ qp->s_cur_size = len;
+ ipath_make_ruc_header(dev, qp, ohdr, bth0, bth2);
+ return 1;
+
+bail:
+ return 0;
+}
+
+/**
+ * ipath_make_rc_req - construct a request packet (SEND, RDMA r/w, ATOMIC)
+ * @qp: a pointer to the QP
+ *
+ * Return 1 if constructed; otherwise, return 0.
+ */
+int ipath_make_rc_req(struct ipath_qp *qp)
+{
+ struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
+ struct ipath_other_headers *ohdr;
+ struct ipath_sge_state *ss;
+ struct ipath_swqe *wqe;
+ u32 hwords;
+ u32 len;
+ u32 bth0;
+ u32 bth2;
+ u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
+ char newreq;
+ unsigned long flags;
+ int ret = 0;
+
+ ohdr = &qp->s_hdr.u.oth;
+ if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
+ ohdr = &qp->s_hdr.u.l.oth;
+
+ /*
+ * The lock is needed to synchronize between the sending tasklet,
+ * the receive interrupt handler, and timeout resends.
+ */
+ spin_lock_irqsave(&qp->s_lock, flags);
+
+ /* Sending responses has higher priority over sending requests. */
+ if ((qp->r_head_ack_queue != qp->s_tail_ack_queue ||
+ (qp->s_flags & IPATH_S_ACK_PENDING) ||
+ qp->s_ack_state != OP(ACKNOWLEDGE)) &&
+ ipath_make_rc_ack(dev, qp, ohdr, pmtu))
+ goto done;
+
+ if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)) {
+ if (!(ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND))
+ goto bail;
+ /* We are in the error state, flush the work request. */
+ if (qp->s_last == qp->s_head)
+ goto bail;
+ /* If DMAs are in progress, we can't flush immediately. */
+ if (atomic_read(&qp->s_dma_busy)) {
+ qp->s_flags |= IPATH_S_WAIT_DMA;
+ goto bail;
+ }
+ wqe = get_swqe_ptr(qp, qp->s_last);
+ ipath_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
+ goto done;
+ }
+
+ /* Leave BUSY set until RNR timeout. */
+ if (qp->s_rnr_timeout) {
+ qp->s_flags |= IPATH_S_WAITING;
+ goto bail;
+ }
+
+ /* header size in 32-bit words LRH+BTH = (8+12)/4. */
+ hwords = 5;
+ bth0 = 1 << 22; /* Set M bit */
+
+ /* Send a request. */
+ wqe = get_swqe_ptr(qp, qp->s_cur);
+ switch (qp->s_state) {
+ default:
+ if (!(ib_ipath_state_ops[qp->state] &
+ IPATH_PROCESS_NEXT_SEND_OK))
+ goto bail;
+ /*
+ * Resend an old request or start a new one.
+ *
+ * We keep track of the current SWQE so that
+ * we don't reset the "furthest progress" state
+ * if we need to back up.
+ */
+ newreq = 0;
+ if (qp->s_cur == qp->s_tail) {
+ /* Check if send work queue is empty. */
+ if (qp->s_tail == qp->s_head)
+ goto bail;
+ /*
+ * If a fence is requested, wait for previous
+ * RDMA read and atomic operations to finish.
+ */
+ if ((wqe->wr.send_flags & IB_SEND_FENCE) &&
+ qp->s_num_rd_atomic) {
+ qp->s_flags |= IPATH_S_FENCE_PENDING;
+ goto bail;
+ }
+ wqe->psn = qp->s_next_psn;
+ newreq = 1;
+ }
+ /*
+ * Note that we have to be careful not to modify the
+ * original work request since we may need to resend
+ * it.
+ */
+ len = wqe->length;
+ ss = &qp->s_sge;
+ bth2 = 0;
+ switch (wqe->wr.opcode) {
+ case IB_WR_SEND:
+ case IB_WR_SEND_WITH_IMM:
+ /* If no credit, return. */
+ if (qp->s_lsn != (u32) -1 &&
+ ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) {
+ qp->s_flags |= IPATH_S_WAIT_SSN_CREDIT;
+ goto bail;
+ }
+ wqe->lpsn = wqe->psn;
+ if (len > pmtu) {
+ wqe->lpsn += (len - 1) / pmtu;
+ qp->s_state = OP(SEND_FIRST);
+ len = pmtu;
+ break;
+ }
+ if (wqe->wr.opcode == IB_WR_SEND)
+ qp->s_state = OP(SEND_ONLY);
+ else {
+ qp->s_state = OP(SEND_ONLY_WITH_IMMEDIATE);
+ /* Immediate data comes after the BTH */
+ ohdr->u.imm_data = wqe->wr.ex.imm_data;
+ hwords += 1;
+ }
+ if (wqe->wr.send_flags & IB_SEND_SOLICITED)
+ bth0 |= 1 << 23;
+ bth2 = 1 << 31; /* Request ACK. */
+ if (++qp->s_cur == qp->s_size)
+ qp->s_cur = 0;
+ break;
+
+ case IB_WR_RDMA_WRITE:
+ if (newreq && qp->s_lsn != (u32) -1)
+ qp->s_lsn++;
+ /* FALLTHROUGH */
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ /* If no credit, return. */
+ if (qp->s_lsn != (u32) -1 &&
+ ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) {
+ qp->s_flags |= IPATH_S_WAIT_SSN_CREDIT;
+ goto bail;
+ }
+ ohdr->u.rc.reth.vaddr =
+ cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
+ ohdr->u.rc.reth.rkey =
+ cpu_to_be32(wqe->wr.wr.rdma.rkey);
+ ohdr->u.rc.reth.length = cpu_to_be32(len);
+ hwords += sizeof(struct ib_reth) / sizeof(u32);
+ wqe->lpsn = wqe->psn;
+ if (len > pmtu) {
+ wqe->lpsn += (len - 1) / pmtu;
+ qp->s_state = OP(RDMA_WRITE_FIRST);
+ len = pmtu;
+ break;
+ }
+ if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
+ qp->s_state = OP(RDMA_WRITE_ONLY);
+ else {
+ qp->s_state =
+ OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
+ /* Immediate data comes after RETH */
+ ohdr->u.rc.imm_data = wqe->wr.ex.imm_data;
+ hwords += 1;
+ if (wqe->wr.send_flags & IB_SEND_SOLICITED)
+ bth0 |= 1 << 23;
+ }
+ bth2 = 1 << 31; /* Request ACK. */
+ if (++qp->s_cur == qp->s_size)
+ qp->s_cur = 0;
+ break;
+
+ case IB_WR_RDMA_READ:
+ /*
+ * Don't allow more operations to be started
+ * than the QP limits allow.
+ */
+ if (newreq) {
+ if (qp->s_num_rd_atomic >=
+ qp->s_max_rd_atomic) {
+ qp->s_flags |= IPATH_S_RDMAR_PENDING;
+ goto bail;
+ }
+ qp->s_num_rd_atomic++;
+ if (qp->s_lsn != (u32) -1)
+ qp->s_lsn++;
+ /*
+ * Adjust s_next_psn to count the
+ * expected number of responses.
+ */
+ if (len > pmtu)
+ qp->s_next_psn += (len - 1) / pmtu;
+ wqe->lpsn = qp->s_next_psn++;
+ }
+ ohdr->u.rc.reth.vaddr =
+ cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
+ ohdr->u.rc.reth.rkey =
+ cpu_to_be32(wqe->wr.wr.rdma.rkey);
+ ohdr->u.rc.reth.length = cpu_to_be32(len);
+ qp->s_state = OP(RDMA_READ_REQUEST);
+ hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
+ ss = NULL;
+ len = 0;
+ if (++qp->s_cur == qp->s_size)
+ qp->s_cur = 0;
+ break;
+
+ case IB_WR_ATOMIC_CMP_AND_SWP:
+ case IB_WR_ATOMIC_FETCH_AND_ADD:
+ /*
+ * Don't allow more operations to be started
+ * than the QP limits allow.
+ */
+ if (newreq) {
+ if (qp->s_num_rd_atomic >=
+ qp->s_max_rd_atomic) {
+ qp->s_flags |= IPATH_S_RDMAR_PENDING;
+ goto bail;
+ }
+ qp->s_num_rd_atomic++;
+ if (qp->s_lsn != (u32) -1)
+ qp->s_lsn++;
+ wqe->lpsn = wqe->psn;
+ }
+ if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
+ qp->s_state = OP(COMPARE_SWAP);
+ ohdr->u.atomic_eth.swap_data = cpu_to_be64(
+ wqe->wr.wr.atomic.swap);
+ ohdr->u.atomic_eth.compare_data = cpu_to_be64(
+ wqe->wr.wr.atomic.compare_add);
+ } else {
+ qp->s_state = OP(FETCH_ADD);
+ ohdr->u.atomic_eth.swap_data = cpu_to_be64(
+ wqe->wr.wr.atomic.compare_add);
+ ohdr->u.atomic_eth.compare_data = 0;
+ }
+ ohdr->u.atomic_eth.vaddr[0] = cpu_to_be32(
+ wqe->wr.wr.atomic.remote_addr >> 32);
+ ohdr->u.atomic_eth.vaddr[1] = cpu_to_be32(
+ wqe->wr.wr.atomic.remote_addr);
+ ohdr->u.atomic_eth.rkey = cpu_to_be32(
+ wqe->wr.wr.atomic.rkey);
+ hwords += sizeof(struct ib_atomic_eth) / sizeof(u32);
+ ss = NULL;
+ len = 0;
+ if (++qp->s_cur == qp->s_size)
+ qp->s_cur = 0;
+ break;
+
+ default:
+ goto bail;
+ }
+ qp->s_sge.sge = wqe->sg_list[0];
+ qp->s_sge.sg_list = wqe->sg_list + 1;
+ qp->s_sge.num_sge = wqe->wr.num_sge;
+ qp->s_len = wqe->length;
+ if (newreq) {
+ qp->s_tail++;
+ if (qp->s_tail >= qp->s_size)
+ qp->s_tail = 0;
+ }
+ bth2 |= qp->s_psn & IPATH_PSN_MASK;
+ if (wqe->wr.opcode == IB_WR_RDMA_READ)
+ qp->s_psn = wqe->lpsn + 1;
+ else {
+ qp->s_psn++;
+ if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0)
+ qp->s_next_psn = qp->s_psn;
+ }
+ /*
+ * Put the QP on the pending list so lost ACKs will cause
+ * a retry. More than one request can be pending so the
+ * QP may already be on the dev->pending list.
+ */
+ spin_lock(&dev->pending_lock);
+ if (list_empty(&qp->timerwait))
+ list_add_tail(&qp->timerwait,
+ &dev->pending[dev->pending_index]);
+ spin_unlock(&dev->pending_lock);
+ break;
+
+ case OP(RDMA_READ_RESPONSE_FIRST):
+ /*
+ * This case can only happen if a send is restarted.
+ * See ipath_restart_rc().
+ */
+ ipath_init_restart(qp, wqe);
+ /* FALLTHROUGH */
+ case OP(SEND_FIRST):
+ qp->s_state = OP(SEND_MIDDLE);
+ /* FALLTHROUGH */
+ case OP(SEND_MIDDLE):
+ bth2 = qp->s_psn++ & IPATH_PSN_MASK;
+ if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0)
+ qp->s_next_psn = qp->s_psn;
+ ss = &qp->s_sge;
+ len = qp->s_len;
+ if (len > pmtu) {
+ len = pmtu;
+ break;
+ }
+ if (wqe->wr.opcode == IB_WR_SEND)
+ qp->s_state = OP(SEND_LAST);
+ else {
+ qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE);
+ /* Immediate data comes after the BTH */
+ ohdr->u.imm_data = wqe->wr.ex.imm_data;
+ hwords += 1;
+ }
+ if (wqe->wr.send_flags & IB_SEND_SOLICITED)
+ bth0 |= 1 << 23;
+ bth2 |= 1 << 31; /* Request ACK. */
+ qp->s_cur++;
+ if (qp->s_cur >= qp->s_size)
+ qp->s_cur = 0;
+ break;
+
+ case OP(RDMA_READ_RESPONSE_LAST):
+ /*
+ * This case can only happen if a RDMA write is restarted.
+ * See ipath_restart_rc().
+ */
+ ipath_init_restart(qp, wqe);
+ /* FALLTHROUGH */
+ case OP(RDMA_WRITE_FIRST):
+ qp->s_state = OP(RDMA_WRITE_MIDDLE);
+ /* FALLTHROUGH */
+ case OP(RDMA_WRITE_MIDDLE):
+ bth2 = qp->s_psn++ & IPATH_PSN_MASK;
+ if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0)
+ qp->s_next_psn = qp->s_psn;
+ ss = &qp->s_sge;
+ len = qp->s_len;
+ if (len > pmtu) {
+ len = pmtu;
+ break;
+ }
+ if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
+ qp->s_state = OP(RDMA_WRITE_LAST);
+ else {
+ qp->s_state = OP(RDMA_WRITE_LAST_WITH_IMMEDIATE);
+ /* Immediate data comes after the BTH */
+ ohdr->u.imm_data = wqe->wr.ex.imm_data;
+ hwords += 1;
+ if (wqe->wr.send_flags & IB_SEND_SOLICITED)
+ bth0 |= 1 << 23;
+ }
+ bth2 |= 1 << 31; /* Request ACK. */
+ qp->s_cur++;
+ if (qp->s_cur >= qp->s_size)
+ qp->s_cur = 0;
+ break;
+
+ case OP(RDMA_READ_RESPONSE_MIDDLE):
+ /*
+ * This case can only happen if a RDMA read is restarted.
+ * See ipath_restart_rc().
+ */
+ ipath_init_restart(qp, wqe);
+ len = ((qp->s_psn - wqe->psn) & IPATH_PSN_MASK) * pmtu;
+ ohdr->u.rc.reth.vaddr =
+ cpu_to_be64(wqe->wr.wr.rdma.remote_addr + len);
+ ohdr->u.rc.reth.rkey =
+ cpu_to_be32(wqe->wr.wr.rdma.rkey);
+ ohdr->u.rc.reth.length = cpu_to_be32(qp->s_len);
+ qp->s_state = OP(RDMA_READ_REQUEST);
+ hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
+ bth2 = qp->s_psn++ & IPATH_PSN_MASK;
+ if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0)
+ qp->s_next_psn = qp->s_psn;
+ ss = NULL;
+ len = 0;
+ qp->s_cur++;
+ if (qp->s_cur == qp->s_size)
+ qp->s_cur = 0;
+ break;
+ }
+ if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT - 1) >= 0)
+ bth2 |= 1 << 31; /* Request ACK. */
+ qp->s_len -= len;
+ qp->s_hdrwords = hwords;
+ qp->s_cur_sge = ss;
+ qp->s_cur_size = len;
+ ipath_make_ruc_header(dev, qp, ohdr, bth0 | (qp->s_state << 24), bth2);
+done:
+ ret = 1;
+ goto unlock;
+
+bail:
+ qp->s_flags &= ~IPATH_S_BUSY;
+unlock:
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ return ret;
+}
+
+/**
+ * send_rc_ack - Construct an ACK packet and send it
+ * @qp: a pointer to the QP
+ *
+ * This is called from ipath_rc_rcv() and only uses the receive
+ * side QP state.
+ * Note that RDMA reads and atomics are handled in the
+ * send side QP state and tasklet.
+ */
+static void send_rc_ack(struct ipath_qp *qp)
+{
+ struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
+ struct ipath_devdata *dd;
+ u16 lrh0;
+ u32 bth0;
+ u32 hwords;
+ u32 __iomem *piobuf;
+ struct ipath_ib_header hdr;
+ struct ipath_other_headers *ohdr;
+ unsigned long flags;
+
+ spin_lock_irqsave(&qp->s_lock, flags);
+
+ /* Don't send ACK or NAK if a RDMA read or atomic is pending. */
+ if (qp->r_head_ack_queue != qp->s_tail_ack_queue ||
+ (qp->s_flags & IPATH_S_ACK_PENDING) ||
+ qp->s_ack_state != OP(ACKNOWLEDGE))
+ goto queue_ack;
+
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+
+ /* Don't try to send ACKs if the link isn't ACTIVE */
+ dd = dev->dd;
+ if (!(dd->ipath_flags & IPATH_LINKACTIVE))
+ goto done;
+
+ piobuf = ipath_getpiobuf(dd, 0, NULL);
+ if (!piobuf) {
+ /*
+ * We are out of PIO buffers at the moment.
+ * Pass responsibility for sending the ACK to the
+ * send tasklet so that when a PIO buffer becomes
+ * available, the ACK is sent ahead of other outgoing
+ * packets.
+ */
+ spin_lock_irqsave(&qp->s_lock, flags);
+ goto queue_ack;
+ }
+
+ /* Construct the header. */
+ ohdr = &hdr.u.oth;
+ lrh0 = IPATH_LRH_BTH;
+ /* header size in 32-bit words LRH+BTH+AETH = (8+12+4)/4. */
+ hwords = 6;
+ if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
+ hwords += ipath_make_grh(dev, &hdr.u.l.grh,
+ &qp->remote_ah_attr.grh,
+ hwords, 0);
+ ohdr = &hdr.u.l.oth;
+ lrh0 = IPATH_LRH_GRH;
+ }
+ /* read pkey_index w/o lock (its atomic) */
+ bth0 = ipath_get_pkey(dd, qp->s_pkey_index) |
+ (OP(ACKNOWLEDGE) << 24) | (1 << 22);
+ if (qp->r_nak_state)
+ ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) |
+ (qp->r_nak_state <<
+ IPATH_AETH_CREDIT_SHIFT));
+ else
+ ohdr->u.aeth = ipath_compute_aeth(qp);
+ lrh0 |= qp->remote_ah_attr.sl << 4;
+ hdr.lrh[0] = cpu_to_be16(lrh0);
+ hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
+ hdr.lrh[2] = cpu_to_be16(hwords + SIZE_OF_CRC);
+ hdr.lrh[3] = cpu_to_be16(dd->ipath_lid |
+ qp->remote_ah_attr.src_path_bits);
+ ohdr->bth[0] = cpu_to_be32(bth0);
+ ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
+ ohdr->bth[2] = cpu_to_be32(qp->r_ack_psn & IPATH_PSN_MASK);
+
+ writeq(hwords + 1, piobuf);
+
+ if (dd->ipath_flags & IPATH_PIO_FLUSH_WC) {
+ u32 *hdrp = (u32 *) &hdr;
+
+ ipath_flush_wc();
+ __iowrite32_copy(piobuf + 2, hdrp, hwords - 1);
+ ipath_flush_wc();
+ __raw_writel(hdrp[hwords - 1], piobuf + hwords + 1);
+ } else
+ __iowrite32_copy(piobuf + 2, (u32 *) &hdr, hwords);
+
+ ipath_flush_wc();
+
+ dev->n_unicast_xmit++;
+ goto done;
+
+queue_ack:
+ if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK) {
+ dev->n_rc_qacks++;
+ qp->s_flags |= IPATH_S_ACK_PENDING;
+ qp->s_nak_state = qp->r_nak_state;
+ qp->s_ack_psn = qp->r_ack_psn;
+
+ /* Schedule the send tasklet. */
+ ipath_schedule_send(qp);
+ }
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+done:
+ return;
+}
+
+/**
+ * reset_psn - reset the QP state to send starting from PSN
+ * @qp: the QP
+ * @psn: the packet sequence number to restart at
+ *
+ * This is called from ipath_rc_rcv() to process an incoming RC ACK
+ * for the given QP.
+ * Called at interrupt level with the QP s_lock held.
+ */
+static void reset_psn(struct ipath_qp *qp, u32 psn)
+{
+ u32 n = qp->s_last;
+ struct ipath_swqe *wqe = get_swqe_ptr(qp, n);
+ u32 opcode;
+
+ qp->s_cur = n;
+
+ /*
+ * If we are starting the request from the beginning,
+ * let the normal send code handle initialization.
+ */
+ if (ipath_cmp24(psn, wqe->psn) <= 0) {
+ qp->s_state = OP(SEND_LAST);
+ goto done;
+ }
+
+ /* Find the work request opcode corresponding to the given PSN. */
+ opcode = wqe->wr.opcode;
+ for (;;) {
+ int diff;
+
+ if (++n == qp->s_size)
+ n = 0;
+ if (n == qp->s_tail)
+ break;
+ wqe = get_swqe_ptr(qp, n);
+ diff = ipath_cmp24(psn, wqe->psn);
+ if (diff < 0)
+ break;
+ qp->s_cur = n;
+ /*
+ * If we are starting the request from the beginning,
+ * let the normal send code handle initialization.
+ */
+ if (diff == 0) {
+ qp->s_state = OP(SEND_LAST);
+ goto done;
+ }
+ opcode = wqe->wr.opcode;
+ }
+
+ /*
+ * Set the state to restart in the middle of a request.
+ * Don't change the s_sge, s_cur_sge, or s_cur_size.
+ * See ipath_make_rc_req().
+ */
+ switch (opcode) {
+ case IB_WR_SEND:
+ case IB_WR_SEND_WITH_IMM:
+ qp->s_state = OP(RDMA_READ_RESPONSE_FIRST);
+ break;
+
+ case IB_WR_RDMA_WRITE:
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ qp->s_state = OP(RDMA_READ_RESPONSE_LAST);
+ break;
+
+ case IB_WR_RDMA_READ:
+ qp->s_state = OP(RDMA_READ_RESPONSE_MIDDLE);
+ break;
+
+ default:
+ /*
+ * This case shouldn't happen since its only
+ * one PSN per req.
+ */
+ qp->s_state = OP(SEND_LAST);
+ }
+done:
+ qp->s_psn = psn;
+}
+
+/**
+ * ipath_restart_rc - back up requester to resend the last un-ACKed request
+ * @qp: the QP to restart
+ * @psn: packet sequence number for the request
+ * @wc: the work completion request
+ *
+ * The QP s_lock should be held and interrupts disabled.
+ */
+void ipath_restart_rc(struct ipath_qp *qp, u32 psn)
+{
+ struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last);
+ struct ipath_ibdev *dev;
+
+ if (qp->s_retry == 0) {
+ ipath_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR);
+ ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR);
+ goto bail;
+ }
+ qp->s_retry--;
+
+ /*
+ * Remove the QP from the timeout queue.
+ * Note: it may already have been removed by ipath_ib_timer().
+ */
+ dev = to_idev(qp->ibqp.device);
+ spin_lock(&dev->pending_lock);
+ if (!list_empty(&qp->timerwait))
+ list_del_init(&qp->timerwait);
+ if (!list_empty(&qp->piowait))
+ list_del_init(&qp->piowait);
+ spin_unlock(&dev->pending_lock);
+
+ if (wqe->wr.opcode == IB_WR_RDMA_READ)
+ dev->n_rc_resends++;
+ else
+ dev->n_rc_resends += (qp->s_psn - psn) & IPATH_PSN_MASK;
+
+ reset_psn(qp, psn);
+ ipath_schedule_send(qp);
+
+bail:
+ return;
+}
+
+static inline void update_last_psn(struct ipath_qp *qp, u32 psn)
+{
+ qp->s_last_psn = psn;
+}
+
+/**
+ * do_rc_ack - process an incoming RC ACK
+ * @qp: the QP the ACK came in on
+ * @psn: the packet sequence number of the ACK
+ * @opcode: the opcode of the request that resulted in the ACK
+ *
+ * This is called from ipath_rc_rcv_resp() to process an incoming RC ACK
+ * for the given QP.
+ * Called at interrupt level with the QP s_lock held and interrupts disabled.
+ * Returns 1 if OK, 0 if current operation should be aborted (NAK).
+ */
+static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode,
+ u64 val)
+{
+ struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
+ struct ib_wc wc;
+ enum ib_wc_status status;
+ struct ipath_swqe *wqe;
+ int ret = 0;
+ u32 ack_psn;
+ int diff;
+
+ /*
+ * Remove the QP from the timeout queue (or RNR timeout queue).
+ * If ipath_ib_timer() has already removed it,
+ * it's OK since we hold the QP s_lock and ipath_restart_rc()
+ * just won't find anything to restart if we ACK everything.
+ */
+ spin_lock(&dev->pending_lock);
+ if (!list_empty(&qp->timerwait))
+ list_del_init(&qp->timerwait);
+ spin_unlock(&dev->pending_lock);
+
+ /*
+ * Note that NAKs implicitly ACK outstanding SEND and RDMA write
+ * requests and implicitly NAK RDMA read and atomic requests issued
+ * before the NAK'ed request. The MSN won't include the NAK'ed
+ * request but will include an ACK'ed request(s).
+ */
+ ack_psn = psn;
+ if (aeth >> 29)
+ ack_psn--;
+ wqe = get_swqe_ptr(qp, qp->s_last);
+
+ /*
+ * The MSN might be for a later WQE than the PSN indicates so
+ * only complete WQEs that the PSN finishes.
+ */
+ while ((diff = ipath_cmp24(ack_psn, wqe->lpsn)) >= 0) {
+ /*
+ * RDMA_READ_RESPONSE_ONLY is a special case since
+ * we want to generate completion events for everything
+ * before the RDMA read, copy the data, then generate
+ * the completion for the read.
+ */
+ if (wqe->wr.opcode == IB_WR_RDMA_READ &&
+ opcode == OP(RDMA_READ_RESPONSE_ONLY) &&
+ diff == 0) {
+ ret = 1;
+ goto bail;
+ }
+ /*
+ * If this request is a RDMA read or atomic, and the ACK is
+ * for a later operation, this ACK NAKs the RDMA read or
+ * atomic. In other words, only a RDMA_READ_LAST or ONLY
+ * can ACK a RDMA read and likewise for atomic ops. Note
+ * that the NAK case can only happen if relaxed ordering is
+ * used and requests are sent after an RDMA read or atomic
+ * is sent but before the response is received.
+ */
+ if ((wqe->wr.opcode == IB_WR_RDMA_READ &&
+ (opcode != OP(RDMA_READ_RESPONSE_LAST) || diff != 0)) ||
+ ((wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
+ wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) &&
+ (opcode != OP(ATOMIC_ACKNOWLEDGE) || diff != 0))) {
+ /*
+ * The last valid PSN seen is the previous
+ * request's.
+ */
+ update_last_psn(qp, wqe->psn - 1);
+ /* Retry this request. */
+ ipath_restart_rc(qp, wqe->psn);
+ /*
+ * No need to process the ACK/NAK since we are
+ * restarting an earlier request.
+ */
+ goto bail;
+ }
+ if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
+ wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
+ *(u64 *) wqe->sg_list[0].vaddr = val;
+ if (qp->s_num_rd_atomic &&
+ (wqe->wr.opcode == IB_WR_RDMA_READ ||
+ wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
+ wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)) {
+ qp->s_num_rd_atomic--;
+ /* Restart sending task if fence is complete */
+ if (((qp->s_flags & IPATH_S_FENCE_PENDING) &&
+ !qp->s_num_rd_atomic) ||
+ qp->s_flags & IPATH_S_RDMAR_PENDING)
+ ipath_schedule_send(qp);
+ }
+ /* Post a send completion queue entry if requested. */
+ if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
+ (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
+ memset(&wc, 0, sizeof wc);
+ wc.wr_id = wqe->wr.wr_id;
+ wc.status = IB_WC_SUCCESS;
+ wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
+ wc.byte_len = wqe->length;
+ wc.qp = &qp->ibqp;
+ wc.src_qp = qp->remote_qpn;
+ wc.slid = qp->remote_ah_attr.dlid;
+ wc.sl = qp->remote_ah_attr.sl;
+ ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0);
+ }
+ qp->s_retry = qp->s_retry_cnt;
+ /*
+ * If we are completing a request which is in the process of
+ * being resent, we can stop resending it since we know the
+ * responder has already seen it.
+ */
+ if (qp->s_last == qp->s_cur) {
+ if (++qp->s_cur >= qp->s_size)
+ qp->s_cur = 0;
+ qp->s_last = qp->s_cur;
+ if (qp->s_last == qp->s_tail)
+ break;
+ wqe = get_swqe_ptr(qp, qp->s_cur);
+ qp->s_state = OP(SEND_LAST);
+ qp->s_psn = wqe->psn;
+ } else {
+ if (++qp->s_last >= qp->s_size)
+ qp->s_last = 0;
+ if (qp->state == IB_QPS_SQD && qp->s_last == qp->s_cur)
+ qp->s_draining = 0;
+ if (qp->s_last == qp->s_tail)
+ break;
+ wqe = get_swqe_ptr(qp, qp->s_last);
+ }
+ }
+
+ switch (aeth >> 29) {
+ case 0: /* ACK */
+ dev->n_rc_acks++;
+ /* If this is a partial ACK, reset the retransmit timer. */
+ if (qp->s_last != qp->s_tail) {
+ spin_lock(&dev->pending_lock);
+ if (list_empty(&qp->timerwait))
+ list_add_tail(&qp->timerwait,
+ &dev->pending[dev->pending_index]);
+ spin_unlock(&dev->pending_lock);
+ /*
+ * If we get a partial ACK for a resent operation,
+ * we can stop resending the earlier packets and
+ * continue with the next packet the receiver wants.
+ */
+ if (ipath_cmp24(qp->s_psn, psn) <= 0) {
+ reset_psn(qp, psn + 1);
+ ipath_schedule_send(qp);
+ }
+ } else if (ipath_cmp24(qp->s_psn, psn) <= 0) {
+ qp->s_state = OP(SEND_LAST);
+ qp->s_psn = psn + 1;
+ }
+ ipath_get_credit(qp, aeth);
+ qp->s_rnr_retry = qp->s_rnr_retry_cnt;
+ qp->s_retry = qp->s_retry_cnt;
+ update_last_psn(qp, psn);
+ ret = 1;
+ goto bail;
+
+ case 1: /* RNR NAK */
+ dev->n_rnr_naks++;
+ if (qp->s_last == qp->s_tail)
+ goto bail;
+ if (qp->s_rnr_retry == 0) {
+ status = IB_WC_RNR_RETRY_EXC_ERR;
+ goto class_b;
+ }
+ if (qp->s_rnr_retry_cnt < 7)
+ qp->s_rnr_retry--;
+
+ /* The last valid PSN is the previous PSN. */
+ update_last_psn(qp, psn - 1);
+
+ if (wqe->wr.opcode == IB_WR_RDMA_READ)
+ dev->n_rc_resends++;
+ else
+ dev->n_rc_resends +=
+ (qp->s_psn - psn) & IPATH_PSN_MASK;
+
+ reset_psn(qp, psn);
+
+ qp->s_rnr_timeout =
+ ib_ipath_rnr_table[(aeth >> IPATH_AETH_CREDIT_SHIFT) &
+ IPATH_AETH_CREDIT_MASK];
+ ipath_insert_rnr_queue(qp);
+ ipath_schedule_send(qp);
+ goto bail;
+
+ case 3: /* NAK */
+ if (qp->s_last == qp->s_tail)
+ goto bail;
+ /* The last valid PSN is the previous PSN. */
+ update_last_psn(qp, psn - 1);
+ switch ((aeth >> IPATH_AETH_CREDIT_SHIFT) &
+ IPATH_AETH_CREDIT_MASK) {
+ case 0: /* PSN sequence error */
+ dev->n_seq_naks++;
+ /*
+ * Back up to the responder's expected PSN.
+ * Note that we might get a NAK in the middle of an
+ * RDMA READ response which terminates the RDMA
+ * READ.
+ */
+ ipath_restart_rc(qp, psn);
+ break;
+
+ case 1: /* Invalid Request */
+ status = IB_WC_REM_INV_REQ_ERR;
+ dev->n_other_naks++;
+ goto class_b;
+
+ case 2: /* Remote Access Error */
+ status = IB_WC_REM_ACCESS_ERR;
+ dev->n_other_naks++;
+ goto class_b;
+
+ case 3: /* Remote Operation Error */
+ status = IB_WC_REM_OP_ERR;
+ dev->n_other_naks++;
+ class_b:
+ ipath_send_complete(qp, wqe, status);
+ ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR);
+ break;
+
+ default:
+ /* Ignore other reserved NAK error codes */
+ goto reserved;
+ }
+ qp->s_rnr_retry = qp->s_rnr_retry_cnt;
+ goto bail;
+
+ default: /* 2: reserved */
+ reserved:
+ /* Ignore reserved NAK codes. */
+ goto bail;
+ }
+
+bail:
+ return ret;
+}
+
+/**
+ * ipath_rc_rcv_resp - process an incoming RC response packet
+ * @dev: the device this packet came in on
+ * @ohdr: the other headers for this packet
+ * @data: the packet data
+ * @tlen: the packet length
+ * @qp: the QP for this packet
+ * @opcode: the opcode for this packet
+ * @psn: the packet sequence number for this packet
+ * @hdrsize: the header length
+ * @pmtu: the path MTU
+ * @header_in_data: true if part of the header data is in the data buffer
+ *
+ * This is called from ipath_rc_rcv() to process an incoming RC response
+ * packet for the given QP.
+ * Called at interrupt level.
+ */
+static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
+ struct ipath_other_headers *ohdr,
+ void *data, u32 tlen,
+ struct ipath_qp *qp,
+ u32 opcode,
+ u32 psn, u32 hdrsize, u32 pmtu,
+ int header_in_data)
+{
+ struct ipath_swqe *wqe;
+ enum ib_wc_status status;
+ unsigned long flags;
+ int diff;
+ u32 pad;
+ u32 aeth;
+ u64 val;
+
+ spin_lock_irqsave(&qp->s_lock, flags);
+
+ /* Double check we can process this now that we hold the s_lock. */
+ if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
+ goto ack_done;
+
+ /* Ignore invalid responses. */
+ if (ipath_cmp24(psn, qp->s_next_psn) >= 0)
+ goto ack_done;
+
+ /* Ignore duplicate responses. */
+ diff = ipath_cmp24(psn, qp->s_last_psn);
+ if (unlikely(diff <= 0)) {
+ /* Update credits for "ghost" ACKs */
+ if (diff == 0 && opcode == OP(ACKNOWLEDGE)) {
+ if (!header_in_data)
+ aeth = be32_to_cpu(ohdr->u.aeth);
+ else {
+ aeth = be32_to_cpu(((__be32 *) data)[0]);
+ data += sizeof(__be32);
+ }
+ if ((aeth >> 29) == 0)
+ ipath_get_credit(qp, aeth);
+ }
+ goto ack_done;
+ }
+
+ if (unlikely(qp->s_last == qp->s_tail))
+ goto ack_done;
+ wqe = get_swqe_ptr(qp, qp->s_last);
+ status = IB_WC_SUCCESS;
+
+ switch (opcode) {
+ case OP(ACKNOWLEDGE):
+ case OP(ATOMIC_ACKNOWLEDGE):
+ case OP(RDMA_READ_RESPONSE_FIRST):
+ if (!header_in_data)
+ aeth = be32_to_cpu(ohdr->u.aeth);
+ else {
+ aeth = be32_to_cpu(((__be32 *) data)[0]);
+ data += sizeof(__be32);
+ }
+ if (opcode == OP(ATOMIC_ACKNOWLEDGE)) {
+ if (!header_in_data) {
+ __be32 *p = ohdr->u.at.atomic_ack_eth;
+
+ val = ((u64) be32_to_cpu(p[0]) << 32) |
+ be32_to_cpu(p[1]);
+ } else
+ val = be64_to_cpu(((__be64 *) data)[0]);
+ } else
+ val = 0;
+ if (!do_rc_ack(qp, aeth, psn, opcode, val) ||
+ opcode != OP(RDMA_READ_RESPONSE_FIRST))
+ goto ack_done;
+ hdrsize += 4;
+ wqe = get_swqe_ptr(qp, qp->s_last);
+ if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
+ goto ack_op_err;
+ qp->r_flags &= ~IPATH_R_RDMAR_SEQ;
+ /*
+ * If this is a response to a resent RDMA read, we
+ * have to be careful to copy the data to the right
+ * location.
+ */
+ qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge,
+ wqe, psn, pmtu);
+ goto read_middle;
+
+ case OP(RDMA_READ_RESPONSE_MIDDLE):
+ /* no AETH, no ACK */
+ if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) {
+ dev->n_rdma_seq++;
+ if (qp->r_flags & IPATH_R_RDMAR_SEQ)
+ goto ack_done;
+ qp->r_flags |= IPATH_R_RDMAR_SEQ;
+ ipath_restart_rc(qp, qp->s_last_psn + 1);
+ goto ack_done;
+ }
+ if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
+ goto ack_op_err;
+ read_middle:
+ if (unlikely(tlen != (hdrsize + pmtu + 4)))
+ goto ack_len_err;
+ if (unlikely(pmtu >= qp->s_rdma_read_len))
+ goto ack_len_err;
+
+ /* We got a response so update the timeout. */
+ spin_lock(&dev->pending_lock);
+ if (qp->s_rnr_timeout == 0 && !list_empty(&qp->timerwait))
+ list_move_tail(&qp->timerwait,
+ &dev->pending[dev->pending_index]);
+ spin_unlock(&dev->pending_lock);
+
+ if (opcode == OP(RDMA_READ_RESPONSE_MIDDLE))
+ qp->s_retry = qp->s_retry_cnt;
+
+ /*
+ * Update the RDMA receive state but do the copy w/o
+ * holding the locks and blocking interrupts.
+ */
+ qp->s_rdma_read_len -= pmtu;
+ update_last_psn(qp, psn);
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ ipath_copy_sge(&qp->s_rdma_read_sge, data, pmtu);
+ goto bail;
+
+ case OP(RDMA_READ_RESPONSE_ONLY):
+ if (!header_in_data)
+ aeth = be32_to_cpu(ohdr->u.aeth);
+ else
+ aeth = be32_to_cpu(((__be32 *) data)[0]);
+ if (!do_rc_ack(qp, aeth, psn, opcode, 0))
+ goto ack_done;
+ /* Get the number of bytes the message was padded by. */
+ pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
+ /*
+ * Check that the data size is >= 0 && <= pmtu.
+ * Remember to account for the AETH header (4) and
+ * ICRC (4).
+ */
+ if (unlikely(tlen < (hdrsize + pad + 8)))
+ goto ack_len_err;
+ /*
+ * If this is a response to a resent RDMA read, we
+ * have to be careful to copy the data to the right
+ * location.
+ */
+ wqe = get_swqe_ptr(qp, qp->s_last);
+ qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge,
+ wqe, psn, pmtu);
+ goto read_last;
+
+ case OP(RDMA_READ_RESPONSE_LAST):
+ /* ACKs READ req. */
+ if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) {
+ dev->n_rdma_seq++;
+ if (qp->r_flags & IPATH_R_RDMAR_SEQ)
+ goto ack_done;
+ qp->r_flags |= IPATH_R_RDMAR_SEQ;
+ ipath_restart_rc(qp, qp->s_last_psn + 1);
+ goto ack_done;
+ }
+ if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
+ goto ack_op_err;
+ /* Get the number of bytes the message was padded by. */
+ pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
+ /*
+ * Check that the data size is >= 1 && <= pmtu.
+ * Remember to account for the AETH header (4) and
+ * ICRC (4).
+ */
+ if (unlikely(tlen <= (hdrsize + pad + 8)))
+ goto ack_len_err;
+ read_last:
+ tlen -= hdrsize + pad + 8;
+ if (unlikely(tlen != qp->s_rdma_read_len))
+ goto ack_len_err;
+ if (!header_in_data)
+ aeth = be32_to_cpu(ohdr->u.aeth);
+ else {
+ aeth = be32_to_cpu(((__be32 *) data)[0]);
+ data += sizeof(__be32);
+ }
+ ipath_copy_sge(&qp->s_rdma_read_sge, data, tlen);
+ (void) do_rc_ack(qp, aeth, psn,
+ OP(RDMA_READ_RESPONSE_LAST), 0);
+ goto ack_done;
+ }
+
+ack_op_err:
+ status = IB_WC_LOC_QP_OP_ERR;
+ goto ack_err;
+
+ack_len_err:
+ status = IB_WC_LOC_LEN_ERR;
+ack_err:
+ ipath_send_complete(qp, wqe, status);
+ ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR);
+ack_done:
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+bail:
+ return;
+}
+
+/**
+ * ipath_rc_rcv_error - process an incoming duplicate or error RC packet
+ * @dev: the device this packet came in on
+ * @ohdr: the other headers for this packet
+ * @data: the packet data
+ * @qp: the QP for this packet
+ * @opcode: the opcode for this packet
+ * @psn: the packet sequence number for this packet
+ * @diff: the difference between the PSN and the expected PSN
+ * @header_in_data: true if part of the header data is in the data buffer
+ *
+ * This is called from ipath_rc_rcv() to process an unexpected
+ * incoming RC packet for the given QP.
+ * Called at interrupt level.
+ * Return 1 if no more processing is needed; otherwise return 0 to
+ * schedule a response to be sent.
+ */
+static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
+ struct ipath_other_headers *ohdr,
+ void *data,
+ struct ipath_qp *qp,
+ u32 opcode,
+ u32 psn,
+ int diff,
+ int header_in_data)
+{
+ struct ipath_ack_entry *e;
+ u8 i, prev;
+ int old_req;
+ unsigned long flags;
+
+ if (diff > 0) {
+ /*
+ * Packet sequence error.
+ * A NAK will ACK earlier sends and RDMA writes.
+ * Don't queue the NAK if we already sent one.
+ */
+ if (!qp->r_nak_state) {
+ qp->r_nak_state = IB_NAK_PSN_ERROR;
+ /* Use the expected PSN. */
+ qp->r_ack_psn = qp->r_psn;
+ goto send_ack;
+ }
+ goto done;
+ }
+
+ /*
+ * Handle a duplicate request. Don't re-execute SEND, RDMA
+ * write or atomic op. Don't NAK errors, just silently drop
+ * the duplicate request. Note that r_sge, r_len, and
+ * r_rcv_len may be in use so don't modify them.
+ *
+ * We are supposed to ACK the earliest duplicate PSN but we
+ * can coalesce an outstanding duplicate ACK. We have to
+ * send the earliest so that RDMA reads can be restarted at
+ * the requester's expected PSN.
+ *
+ * First, find where this duplicate PSN falls within the
+ * ACKs previously sent.
+ */
+ psn &= IPATH_PSN_MASK;
+ e = NULL;
+ old_req = 1;
+
+ spin_lock_irqsave(&qp->s_lock, flags);
+ /* Double check we can process this now that we hold the s_lock. */
+ if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
+ goto unlock_done;
+
+ for (i = qp->r_head_ack_queue; ; i = prev) {
+ if (i == qp->s_tail_ack_queue)
+ old_req = 0;
+ if (i)
+ prev = i - 1;
+ else
+ prev = IPATH_MAX_RDMA_ATOMIC;
+ if (prev == qp->r_head_ack_queue) {
+ e = NULL;
+ break;
+ }
+ e = &qp->s_ack_queue[prev];
+ if (!e->opcode) {
+ e = NULL;
+ break;
+ }
+ if (ipath_cmp24(psn, e->psn) >= 0) {
+ if (prev == qp->s_tail_ack_queue)
+ old_req = 0;
+ break;
+ }
+ }
+ switch (opcode) {
+ case OP(RDMA_READ_REQUEST): {
+ struct ib_reth *reth;
+ u32 offset;
+ u32 len;
+
+ /*
+ * If we didn't find the RDMA read request in the ack queue,
+ * or the send tasklet is already backed up to send an
+ * earlier entry, we can ignore this request.
+ */
+ if (!e || e->opcode != OP(RDMA_READ_REQUEST) || old_req)
+ goto unlock_done;
+ /* RETH comes after BTH */
+ if (!header_in_data)
+ reth = &ohdr->u.rc.reth;
+ else {
+ reth = (struct ib_reth *)data;
+ data += sizeof(*reth);
+ }
+ /*
+ * Address range must be a subset of the original
+ * request and start on pmtu boundaries.
+ * We reuse the old ack_queue slot since the requester
+ * should not back up and request an earlier PSN for the
+ * same request.
+ */
+ offset = ((psn - e->psn) & IPATH_PSN_MASK) *
+ ib_mtu_enum_to_int(qp->path_mtu);
+ len = be32_to_cpu(reth->length);
+ if (unlikely(offset + len > e->rdma_sge.sge.sge_length))
+ goto unlock_done;
+ if (len != 0) {
+ u32 rkey = be32_to_cpu(reth->rkey);
+ u64 vaddr = be64_to_cpu(reth->vaddr);
+ int ok;
+
+ ok = ipath_rkey_ok(qp, &e->rdma_sge,
+ len, vaddr, rkey,
+ IB_ACCESS_REMOTE_READ);
+ if (unlikely(!ok))
+ goto unlock_done;
+ } else {
+ e->rdma_sge.sg_list = NULL;
+ e->rdma_sge.num_sge = 0;
+ e->rdma_sge.sge.mr = NULL;
+ e->rdma_sge.sge.vaddr = NULL;
+ e->rdma_sge.sge.length = 0;
+ e->rdma_sge.sge.sge_length = 0;
+ }
+ e->psn = psn;
+ qp->s_ack_state = OP(ACKNOWLEDGE);
+ qp->s_tail_ack_queue = prev;
+ break;
+ }
+
+ case OP(COMPARE_SWAP):
+ case OP(FETCH_ADD): {
+ /*
+ * If we didn't find the atomic request in the ack queue
+ * or the send tasklet is already backed up to send an
+ * earlier entry, we can ignore this request.
+ */
+ if (!e || e->opcode != (u8) opcode || old_req)
+ goto unlock_done;
+ qp->s_ack_state = OP(ACKNOWLEDGE);
+ qp->s_tail_ack_queue = prev;
+ break;
+ }
+
+ default:
+ if (old_req)
+ goto unlock_done;
+ /*
+ * Resend the most recent ACK if this request is
+ * after all the previous RDMA reads and atomics.
+ */
+ if (i == qp->r_head_ack_queue) {
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ qp->r_nak_state = 0;
+ qp->r_ack_psn = qp->r_psn - 1;
+ goto send_ack;
+ }
+ /*
+ * Try to send a simple ACK to work around a Mellanox bug
+ * which doesn't accept a RDMA read response or atomic
+ * response as an ACK for earlier SENDs or RDMA writes.
+ */
+ if (qp->r_head_ack_queue == qp->s_tail_ack_queue &&
+ !(qp->s_flags & IPATH_S_ACK_PENDING) &&
+ qp->s_ack_state == OP(ACKNOWLEDGE)) {
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ qp->r_nak_state = 0;
+ qp->r_ack_psn = qp->s_ack_queue[i].psn - 1;
+ goto send_ack;
+ }
+ /*
+ * Resend the RDMA read or atomic op which
+ * ACKs this duplicate request.
+ */
+ qp->s_ack_state = OP(ACKNOWLEDGE);
+ qp->s_tail_ack_queue = i;
+ break;
+ }
+ qp->r_nak_state = 0;
+ ipath_schedule_send(qp);
+
+unlock_done:
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+done:
+ return 1;
+
+send_ack:
+ return 0;
+}
+
+void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err)
+{
+ unsigned long flags;
+ int lastwqe;
+
+ spin_lock_irqsave(&qp->s_lock, flags);
+ lastwqe = ipath_error_qp(qp, err);
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+
+ if (lastwqe) {
+ struct ib_event ev;
+
+ ev.device = qp->ibqp.device;
+ ev.element.qp = &qp->ibqp;
+ ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
+ qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
+ }
+}
+
+static inline void ipath_update_ack_queue(struct ipath_qp *qp, unsigned n)
+{
+ unsigned next;
+
+ next = n + 1;
+ if (next > IPATH_MAX_RDMA_ATOMIC)
+ next = 0;
+ if (n == qp->s_tail_ack_queue) {
+ qp->s_tail_ack_queue = next;
+ qp->s_ack_state = OP(ACKNOWLEDGE);
+ }
+}
+
+/**
+ * ipath_rc_rcv - process an incoming RC packet
+ * @dev: the device this packet came in on
+ * @hdr: the header of this packet
+ * @has_grh: true if the header has a GRH
+ * @data: the packet data
+ * @tlen: the packet length
+ * @qp: the QP for this packet
+ *
+ * This is called from ipath_qp_rcv() to process an incoming RC packet
+ * for the given QP.
+ * Called at interrupt level.
+ */
+void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
+ int has_grh, void *data, u32 tlen, struct ipath_qp *qp)
+{
+ struct ipath_other_headers *ohdr;
+ u32 opcode;
+ u32 hdrsize;
+ u32 psn;
+ u32 pad;
+ struct ib_wc wc;
+ u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
+ int diff;
+ struct ib_reth *reth;
+ int header_in_data;
+ unsigned long flags;
+
+ /* Validate the SLID. See Ch. 9.6.1.5 */
+ if (unlikely(be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid))
+ goto done;
+
+ /* Check for GRH */
+ if (!has_grh) {
+ ohdr = &hdr->u.oth;
+ hdrsize = 8 + 12; /* LRH + BTH */
+ psn = be32_to_cpu(ohdr->bth[2]);
+ header_in_data = 0;
+ } else {
+ ohdr = &hdr->u.l.oth;
+ hdrsize = 8 + 40 + 12; /* LRH + GRH + BTH */
+ /*
+ * The header with GRH is 60 bytes and the core driver sets
+ * the eager header buffer size to 56 bytes so the last 4
+ * bytes of the BTH header (PSN) is in the data buffer.
+ */
+ header_in_data = dev->dd->ipath_rcvhdrentsize == 16;
+ if (header_in_data) {
+ psn = be32_to_cpu(((__be32 *) data)[0]);
+ data += sizeof(__be32);
+ } else
+ psn = be32_to_cpu(ohdr->bth[2]);
+ }
+
+ /*
+ * Process responses (ACKs) before anything else. Note that the
+ * packet sequence number will be for something in the send work
+ * queue rather than the expected receive packet sequence number.
+ * In other words, this QP is the requester.
+ */
+ opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
+ if (opcode >= OP(RDMA_READ_RESPONSE_FIRST) &&
+ opcode <= OP(ATOMIC_ACKNOWLEDGE)) {
+ ipath_rc_rcv_resp(dev, ohdr, data, tlen, qp, opcode, psn,
+ hdrsize, pmtu, header_in_data);
+ goto done;
+ }
+
+ /* Compute 24 bits worth of difference. */
+ diff = ipath_cmp24(psn, qp->r_psn);
+ if (unlikely(diff)) {
+ if (ipath_rc_rcv_error(dev, ohdr, data, qp, opcode,
+ psn, diff, header_in_data))
+ goto done;
+ goto send_ack;
+ }
+
+ /* Check for opcode sequence errors. */
+ switch (qp->r_state) {
+ case OP(SEND_FIRST):
+ case OP(SEND_MIDDLE):
+ if (opcode == OP(SEND_MIDDLE) ||
+ opcode == OP(SEND_LAST) ||
+ opcode == OP(SEND_LAST_WITH_IMMEDIATE))
+ break;
+ goto nack_inv;
+
+ case OP(RDMA_WRITE_FIRST):
+ case OP(RDMA_WRITE_MIDDLE):
+ if (opcode == OP(RDMA_WRITE_MIDDLE) ||
+ opcode == OP(RDMA_WRITE_LAST) ||
+ opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))
+ break;
+ goto nack_inv;
+
+ default:
+ if (opcode == OP(SEND_MIDDLE) ||
+ opcode == OP(SEND_LAST) ||
+ opcode == OP(SEND_LAST_WITH_IMMEDIATE) ||
+ opcode == OP(RDMA_WRITE_MIDDLE) ||
+ opcode == OP(RDMA_WRITE_LAST) ||
+ opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))
+ goto nack_inv;
+ /*
+ * Note that it is up to the requester to not send a new
+ * RDMA read or atomic operation before receiving an ACK
+ * for the previous operation.
+ */
+ break;
+ }
+
+ memset(&wc, 0, sizeof wc);
+
+ /* OK, process the packet. */
+ switch (opcode) {
+ case OP(SEND_FIRST):
+ if (!ipath_get_rwqe(qp, 0))
+ goto rnr_nak;
+ qp->r_rcv_len = 0;
+ /* FALLTHROUGH */
+ case OP(SEND_MIDDLE):
+ case OP(RDMA_WRITE_MIDDLE):
+ send_middle:
+ /* Check for invalid length PMTU or posted rwqe len. */
+ if (unlikely(tlen != (hdrsize + pmtu + 4)))
+ goto nack_inv;
+ qp->r_rcv_len += pmtu;
+ if (unlikely(qp->r_rcv_len > qp->r_len))
+ goto nack_inv;
+ ipath_copy_sge(&qp->r_sge, data, pmtu);
+ break;
+
+ case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
+ /* consume RWQE */
+ if (!ipath_get_rwqe(qp, 1))
+ goto rnr_nak;
+ goto send_last_imm;
+
+ case OP(SEND_ONLY):
+ case OP(SEND_ONLY_WITH_IMMEDIATE):
+ if (!ipath_get_rwqe(qp, 0))
+ goto rnr_nak;
+ qp->r_rcv_len = 0;
+ if (opcode == OP(SEND_ONLY))
+ goto send_last;
+ /* FALLTHROUGH */
+ case OP(SEND_LAST_WITH_IMMEDIATE):
+ send_last_imm:
+ if (header_in_data) {
+ wc.ex.imm_data = *(__be32 *) data;
+ data += sizeof(__be32);
+ } else {
+ /* Immediate data comes after BTH */
+ wc.ex.imm_data = ohdr->u.imm_data;
+ }
+ hdrsize += 4;
+ wc.wc_flags = IB_WC_WITH_IMM;
+ /* FALLTHROUGH */
+ case OP(SEND_LAST):
+ case OP(RDMA_WRITE_LAST):
+ send_last:
+ /* Get the number of bytes the message was padded by. */
+ pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
+ /* Check for invalid length. */
+ /* XXX LAST len should be >= 1 */
+ if (unlikely(tlen < (hdrsize + pad + 4)))
+ goto nack_inv;
+ /* Don't count the CRC. */
+ tlen -= (hdrsize + pad + 4);
+ wc.byte_len = tlen + qp->r_rcv_len;
+ if (unlikely(wc.byte_len > qp->r_len))
+ goto nack_inv;
+ ipath_copy_sge(&qp->r_sge, data, tlen);
+ qp->r_msn++;
+ if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags))
+ break;
+ wc.wr_id = qp->r_wr_id;
+ wc.status = IB_WC_SUCCESS;
+ if (opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE) ||
+ opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE))
+ wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
+ else
+ wc.opcode = IB_WC_RECV;
+ wc.qp = &qp->ibqp;
+ wc.src_qp = qp->remote_qpn;
+ wc.slid = qp->remote_ah_attr.dlid;
+ wc.sl = qp->remote_ah_attr.sl;
+ /* Signal completion event if the solicited bit is set. */
+ ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
+ (ohdr->bth[0] &
+ __constant_cpu_to_be32(1 << 23)) != 0);
+ break;
+
+ case OP(RDMA_WRITE_FIRST):
+ case OP(RDMA_WRITE_ONLY):
+ case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE):
+ if (unlikely(!(qp->qp_access_flags &
+ IB_ACCESS_REMOTE_WRITE)))
+ goto nack_inv;
+ /* consume RWQE */
+ /* RETH comes after BTH */
+ if (!header_in_data)
+ reth = &ohdr->u.rc.reth;
+ else {
+ reth = (struct ib_reth *)data;
+ data += sizeof(*reth);
+ }
+ hdrsize += sizeof(*reth);
+ qp->r_len = be32_to_cpu(reth->length);
+ qp->r_rcv_len = 0;
+ if (qp->r_len != 0) {
+ u32 rkey = be32_to_cpu(reth->rkey);
+ u64 vaddr = be64_to_cpu(reth->vaddr);
+ int ok;
+
+ /* Check rkey & NAK */
+ ok = ipath_rkey_ok(qp, &qp->r_sge,
+ qp->r_len, vaddr, rkey,
+ IB_ACCESS_REMOTE_WRITE);
+ if (unlikely(!ok))
+ goto nack_acc;
+ } else {
+ qp->r_sge.sg_list = NULL;
+ qp->r_sge.sge.mr = NULL;
+ qp->r_sge.sge.vaddr = NULL;
+ qp->r_sge.sge.length = 0;
+ qp->r_sge.sge.sge_length = 0;
+ }
+ if (opcode == OP(RDMA_WRITE_FIRST))
+ goto send_middle;
+ else if (opcode == OP(RDMA_WRITE_ONLY))
+ goto send_last;
+ if (!ipath_get_rwqe(qp, 1))
+ goto rnr_nak;
+ goto send_last_imm;
+
+ case OP(RDMA_READ_REQUEST): {
+ struct ipath_ack_entry *e;
+ u32 len;
+ u8 next;
+
+ if (unlikely(!(qp->qp_access_flags &
+ IB_ACCESS_REMOTE_READ)))
+ goto nack_inv;
+ next = qp->r_head_ack_queue + 1;
+ if (next > IPATH_MAX_RDMA_ATOMIC)
+ next = 0;
+ spin_lock_irqsave(&qp->s_lock, flags);
+ /* Double check we can process this while holding the s_lock. */
+ if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
+ goto unlock;
+ if (unlikely(next == qp->s_tail_ack_queue)) {
+ if (!qp->s_ack_queue[next].sent)
+ goto nack_inv_unlck;
+ ipath_update_ack_queue(qp, next);
+ }
+ e = &qp->s_ack_queue[qp->r_head_ack_queue];
+ /* RETH comes after BTH */
+ if (!header_in_data)
+ reth = &ohdr->u.rc.reth;
+ else {
+ reth = (struct ib_reth *)data;
+ data += sizeof(*reth);
+ }
+ len = be32_to_cpu(reth->length);
+ if (len) {
+ u32 rkey = be32_to_cpu(reth->rkey);
+ u64 vaddr = be64_to_cpu(reth->vaddr);
+ int ok;
+
+ /* Check rkey & NAK */
+ ok = ipath_rkey_ok(qp, &e->rdma_sge, len, vaddr,
+ rkey, IB_ACCESS_REMOTE_READ);
+ if (unlikely(!ok))
+ goto nack_acc_unlck;
+ /*
+ * Update the next expected PSN. We add 1 later
+ * below, so only add the remainder here.
+ */
+ if (len > pmtu)
+ qp->r_psn += (len - 1) / pmtu;
+ } else {
+ e->rdma_sge.sg_list = NULL;
+ e->rdma_sge.num_sge = 0;
+ e->rdma_sge.sge.mr = NULL;
+ e->rdma_sge.sge.vaddr = NULL;
+ e->rdma_sge.sge.length = 0;
+ e->rdma_sge.sge.sge_length = 0;
+ }
+ e->opcode = opcode;
+ e->sent = 0;
+ e->psn = psn;
+ /*
+ * We need to increment the MSN here instead of when we
+ * finish sending the result since a duplicate request would
+ * increment it more than once.
+ */
+ qp->r_msn++;
+ qp->r_psn++;
+ qp->r_state = opcode;
+ qp->r_nak_state = 0;
+ qp->r_head_ack_queue = next;
+
+ /* Schedule the send tasklet. */
+ ipath_schedule_send(qp);
+
+ goto unlock;
+ }
+
+ case OP(COMPARE_SWAP):
+ case OP(FETCH_ADD): {
+ struct ib_atomic_eth *ateth;
+ struct ipath_ack_entry *e;
+ u64 vaddr;
+ atomic64_t *maddr;
+ u64 sdata;
+ u32 rkey;
+ u8 next;
+
+ if (unlikely(!(qp->qp_access_flags &
+ IB_ACCESS_REMOTE_ATOMIC)))
+ goto nack_inv;
+ next = qp->r_head_ack_queue + 1;
+ if (next > IPATH_MAX_RDMA_ATOMIC)
+ next = 0;
+ spin_lock_irqsave(&qp->s_lock, flags);
+ /* Double check we can process this while holding the s_lock. */
+ if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
+ goto unlock;
+ if (unlikely(next == qp->s_tail_ack_queue)) {
+ if (!qp->s_ack_queue[next].sent)
+ goto nack_inv_unlck;
+ ipath_update_ack_queue(qp, next);
+ }
+ if (!header_in_data)
+ ateth = &ohdr->u.atomic_eth;
+ else
+ ateth = (struct ib_atomic_eth *)data;
+ vaddr = ((u64) be32_to_cpu(ateth->vaddr[0]) << 32) |
+ be32_to_cpu(ateth->vaddr[1]);
+ if (unlikely(vaddr & (sizeof(u64) - 1)))
+ goto nack_inv_unlck;
+ rkey = be32_to_cpu(ateth->rkey);
+ /* Check rkey & NAK */
+ if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge,
+ sizeof(u64), vaddr, rkey,
+ IB_ACCESS_REMOTE_ATOMIC)))
+ goto nack_acc_unlck;
+ /* Perform atomic OP and save result. */
+ maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
+ sdata = be64_to_cpu(ateth->swap_data);
+ e = &qp->s_ack_queue[qp->r_head_ack_queue];
+ e->atomic_data = (opcode == OP(FETCH_ADD)) ?
+ (u64) atomic64_add_return(sdata, maddr) - sdata :
+ (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
+ be64_to_cpu(ateth->compare_data),
+ sdata);
+ e->opcode = opcode;
+ e->sent = 0;
+ e->psn = psn & IPATH_PSN_MASK;
+ qp->r_msn++;
+ qp->r_psn++;
+ qp->r_state = opcode;
+ qp->r_nak_state = 0;
+ qp->r_head_ack_queue = next;
+
+ /* Schedule the send tasklet. */
+ ipath_schedule_send(qp);
+
+ goto unlock;
+ }
+
+ default:
+ /* NAK unknown opcodes. */
+ goto nack_inv;
+ }
+ qp->r_psn++;
+ qp->r_state = opcode;
+ qp->r_ack_psn = psn;
+ qp->r_nak_state = 0;
+ /* Send an ACK if requested or required. */
+ if (psn & (1 << 31))
+ goto send_ack;
+ goto done;
+
+rnr_nak:
+ qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer;
+ qp->r_ack_psn = qp->r_psn;
+ goto send_ack;
+
+nack_inv_unlck:
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+nack_inv:
+ ipath_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
+ qp->r_nak_state = IB_NAK_INVALID_REQUEST;
+ qp->r_ack_psn = qp->r_psn;
+ goto send_ack;
+
+nack_acc_unlck:
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+nack_acc:
+ ipath_rc_error(qp, IB_WC_LOC_PROT_ERR);
+ qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
+ qp->r_ack_psn = qp->r_psn;
+send_ack:
+ send_rc_ack(qp);
+ goto done;
+
+unlock:
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+done:
+ return;
+}
diff --git a/drivers/infiniband/hw/ipath/ipath_registers.h b/drivers/infiniband/hw/ipath/ipath_registers.h
new file mode 100644
index 0000000..8f44d0c
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_registers.h
@@ -0,0 +1,512 @@
+/*
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _IPATH_REGISTERS_H
+#define _IPATH_REGISTERS_H
+
+/*
+ * This file should only be included by kernel source, and by the diags. It
+ * defines the registers, and their contents, for InfiniPath chips.
+ */
+
+/*
+ * These are the InfiniPath register and buffer bit definitions,
+ * that are visible to software, and needed only by the kernel
+ * and diag code. A few, that are visible to protocol and user
+ * code are in ipath_common.h. Some bits are specific
+ * to a given chip implementation, and have been moved to the
+ * chip-specific source file
+ */
+
+/* kr_revision bits */
+#define INFINIPATH_R_CHIPREVMINOR_MASK 0xFF
+#define INFINIPATH_R_CHIPREVMINOR_SHIFT 0
+#define INFINIPATH_R_CHIPREVMAJOR_MASK 0xFF
+#define INFINIPATH_R_CHIPREVMAJOR_SHIFT 8
+#define INFINIPATH_R_ARCH_MASK 0xFF
+#define INFINIPATH_R_ARCH_SHIFT 16
+#define INFINIPATH_R_SOFTWARE_MASK 0xFF
+#define INFINIPATH_R_SOFTWARE_SHIFT 24
+#define INFINIPATH_R_BOARDID_MASK 0xFF
+#define INFINIPATH_R_BOARDID_SHIFT 32
+
+/* kr_control bits */
+#define INFINIPATH_C_FREEZEMODE 0x00000002
+#define INFINIPATH_C_LINKENABLE 0x00000004
+
+/* kr_sendctrl bits */
+#define INFINIPATH_S_DISARMPIOBUF_SHIFT 16
+#define INFINIPATH_S_UPDTHRESH_SHIFT 24
+#define INFINIPATH_S_UPDTHRESH_MASK 0x1f
+
+#define IPATH_S_ABORT 0
+#define IPATH_S_PIOINTBUFAVAIL 1
+#define IPATH_S_PIOBUFAVAILUPD 2
+#define IPATH_S_PIOENABLE 3
+#define IPATH_S_SDMAINTENABLE 9
+#define IPATH_S_SDMASINGLEDESCRIPTOR 10
+#define IPATH_S_SDMAENABLE 11
+#define IPATH_S_SDMAHALT 12
+#define IPATH_S_DISARM 31
+
+#define INFINIPATH_S_ABORT (1U << IPATH_S_ABORT)
+#define INFINIPATH_S_PIOINTBUFAVAIL (1U << IPATH_S_PIOINTBUFAVAIL)
+#define INFINIPATH_S_PIOBUFAVAILUPD (1U << IPATH_S_PIOBUFAVAILUPD)
+#define INFINIPATH_S_PIOENABLE (1U << IPATH_S_PIOENABLE)
+#define INFINIPATH_S_SDMAINTENABLE (1U << IPATH_S_SDMAINTENABLE)
+#define INFINIPATH_S_SDMASINGLEDESCRIPTOR \
+ (1U << IPATH_S_SDMASINGLEDESCRIPTOR)
+#define INFINIPATH_S_SDMAENABLE (1U << IPATH_S_SDMAENABLE)
+#define INFINIPATH_S_SDMAHALT (1U << IPATH_S_SDMAHALT)
+#define INFINIPATH_S_DISARM (1U << IPATH_S_DISARM)
+
+/* kr_rcvctrl bits that are the same on multiple chips */
+#define INFINIPATH_R_PORTENABLE_SHIFT 0
+#define INFINIPATH_R_QPMAP_ENABLE (1ULL << 38)
+
+/* kr_intstatus, kr_intclear, kr_intmask bits */
+#define INFINIPATH_I_SDMAINT 0x8000000000000000ULL
+#define INFINIPATH_I_SDMADISABLED 0x4000000000000000ULL
+#define INFINIPATH_I_ERROR 0x0000000080000000ULL
+#define INFINIPATH_I_SPIOSENT 0x0000000040000000ULL
+#define INFINIPATH_I_SPIOBUFAVAIL 0x0000000020000000ULL
+#define INFINIPATH_I_GPIO 0x0000000010000000ULL
+#define INFINIPATH_I_JINT 0x0000000004000000ULL
+
+/* kr_errorstatus, kr_errorclear, kr_errormask bits */
+#define INFINIPATH_E_RFORMATERR 0x0000000000000001ULL
+#define INFINIPATH_E_RVCRC 0x0000000000000002ULL
+#define INFINIPATH_E_RICRC 0x0000000000000004ULL
+#define INFINIPATH_E_RMINPKTLEN 0x0000000000000008ULL
+#define INFINIPATH_E_RMAXPKTLEN 0x0000000000000010ULL
+#define INFINIPATH_E_RLONGPKTLEN 0x0000000000000020ULL
+#define INFINIPATH_E_RSHORTPKTLEN 0x0000000000000040ULL
+#define INFINIPATH_E_RUNEXPCHAR 0x0000000000000080ULL
+#define INFINIPATH_E_RUNSUPVL 0x0000000000000100ULL
+#define INFINIPATH_E_REBP 0x0000000000000200ULL
+#define INFINIPATH_E_RIBFLOW 0x0000000000000400ULL
+#define INFINIPATH_E_RBADVERSION 0x0000000000000800ULL
+#define INFINIPATH_E_RRCVEGRFULL 0x0000000000001000ULL
+#define INFINIPATH_E_RRCVHDRFULL 0x0000000000002000ULL
+#define INFINIPATH_E_RBADTID 0x0000000000004000ULL
+#define INFINIPATH_E_RHDRLEN 0x0000000000008000ULL
+#define INFINIPATH_E_RHDR 0x0000000000010000ULL
+#define INFINIPATH_E_RIBLOSTLINK 0x0000000000020000ULL
+#define INFINIPATH_E_SENDSPECIALTRIGGER 0x0000000008000000ULL
+#define INFINIPATH_E_SDMADISABLED 0x0000000010000000ULL
+#define INFINIPATH_E_SMINPKTLEN 0x0000000020000000ULL
+#define INFINIPATH_E_SMAXPKTLEN 0x0000000040000000ULL
+#define INFINIPATH_E_SUNDERRUN 0x0000000080000000ULL
+#define INFINIPATH_E_SPKTLEN 0x0000000100000000ULL
+#define INFINIPATH_E_SDROPPEDSMPPKT 0x0000000200000000ULL
+#define INFINIPATH_E_SDROPPEDDATAPKT 0x0000000400000000ULL
+#define INFINIPATH_E_SPIOARMLAUNCH 0x0000000800000000ULL
+#define INFINIPATH_E_SUNEXPERRPKTNUM 0x0000001000000000ULL
+#define INFINIPATH_E_SUNSUPVL 0x0000002000000000ULL
+#define INFINIPATH_E_SENDBUFMISUSE 0x0000004000000000ULL
+#define INFINIPATH_E_SDMAGENMISMATCH 0x0000008000000000ULL
+#define INFINIPATH_E_SDMAOUTOFBOUND 0x0000010000000000ULL
+#define INFINIPATH_E_SDMATAILOUTOFBOUND 0x0000020000000000ULL
+#define INFINIPATH_E_SDMABASE 0x0000040000000000ULL
+#define INFINIPATH_E_SDMA1STDESC 0x0000080000000000ULL
+#define INFINIPATH_E_SDMARPYTAG 0x0000100000000000ULL
+#define INFINIPATH_E_SDMADWEN 0x0000200000000000ULL
+#define INFINIPATH_E_SDMAMISSINGDW 0x0000400000000000ULL
+#define INFINIPATH_E_SDMAUNEXPDATA 0x0000800000000000ULL
+#define INFINIPATH_E_IBSTATUSCHANGED 0x0001000000000000ULL
+#define INFINIPATH_E_INVALIDADDR 0x0002000000000000ULL
+#define INFINIPATH_E_RESET 0x0004000000000000ULL
+#define INFINIPATH_E_HARDWARE 0x0008000000000000ULL
+#define INFINIPATH_E_SDMADESCADDRMISALIGN 0x0010000000000000ULL
+#define INFINIPATH_E_INVALIDEEPCMD 0x0020000000000000ULL
+
+/*
+ * this is used to print "common" packet errors only when the
+ * __IPATH_ERRPKTDBG bit is set in ipath_debug.
+ */
+#define INFINIPATH_E_PKTERRS ( INFINIPATH_E_SPKTLEN \
+ | INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_RVCRC \
+ | INFINIPATH_E_RICRC | INFINIPATH_E_RSHORTPKTLEN \
+ | INFINIPATH_E_REBP )
+
+/* Convenience for decoding Send DMA errors */
+#define INFINIPATH_E_SDMAERRS ( \
+ INFINIPATH_E_SDMAGENMISMATCH | INFINIPATH_E_SDMAOUTOFBOUND | \
+ INFINIPATH_E_SDMATAILOUTOFBOUND | INFINIPATH_E_SDMABASE | \
+ INFINIPATH_E_SDMA1STDESC | INFINIPATH_E_SDMARPYTAG | \
+ INFINIPATH_E_SDMADWEN | INFINIPATH_E_SDMAMISSINGDW | \
+ INFINIPATH_E_SDMAUNEXPDATA | \
+ INFINIPATH_E_SDMADESCADDRMISALIGN | \
+ INFINIPATH_E_SDMADISABLED | \
+ INFINIPATH_E_SENDBUFMISUSE)
+
+/* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */
+/* TXEMEMPARITYERR bit 0: PIObuf, 1: PIOpbc, 2: launchfifo
+ * RXEMEMPARITYERR bit 0: rcvbuf, 1: lookupq, 2: expTID, 3: eagerTID
+ * bit 4: flag buffer, 5: datainfo, 6: header info */
+#define INFINIPATH_HWE_TXEMEMPARITYERR_MASK 0xFULL
+#define INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT 40
+#define INFINIPATH_HWE_RXEMEMPARITYERR_MASK 0x7FULL
+#define INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT 44
+#define INFINIPATH_HWE_IBCBUSTOSPCPARITYERR 0x4000000000000000ULL
+#define INFINIPATH_HWE_IBCBUSFRSPCPARITYERR 0x8000000000000000ULL
+/* txe mem parity errors (shift by INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) */
+#define INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF 0x1ULL
+#define INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC 0x2ULL
+#define INFINIPATH_HWE_TXEMEMPARITYERR_PIOLAUNCHFIFO 0x4ULL
+/* rxe mem parity errors (shift by INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) */
+#define INFINIPATH_HWE_RXEMEMPARITYERR_RCVBUF 0x01ULL
+#define INFINIPATH_HWE_RXEMEMPARITYERR_LOOKUPQ 0x02ULL
+#define INFINIPATH_HWE_RXEMEMPARITYERR_EXPTID 0x04ULL
+#define INFINIPATH_HWE_RXEMEMPARITYERR_EAGERTID 0x08ULL
+#define INFINIPATH_HWE_RXEMEMPARITYERR_FLAGBUF 0x10ULL
+#define INFINIPATH_HWE_RXEMEMPARITYERR_DATAINFO 0x20ULL
+#define INFINIPATH_HWE_RXEMEMPARITYERR_HDRINFO 0x40ULL
+/* waldo specific -- find the rest in ipath_6110.c */
+#define INFINIPATH_HWE_RXDSYNCMEMPARITYERR 0x0000000400000000ULL
+/* 6120/7220 specific -- find the rest in ipath_6120.c and ipath_7220.c */
+#define INFINIPATH_HWE_MEMBISTFAILED 0x0040000000000000ULL
+
+/* kr_hwdiagctrl bits */
+#define INFINIPATH_DC_FORCETXEMEMPARITYERR_MASK 0xFULL
+#define INFINIPATH_DC_FORCETXEMEMPARITYERR_SHIFT 40
+#define INFINIPATH_DC_FORCERXEMEMPARITYERR_MASK 0x7FULL
+#define INFINIPATH_DC_FORCERXEMEMPARITYERR_SHIFT 44
+#define INFINIPATH_DC_FORCERXDSYNCMEMPARITYERR 0x0000000400000000ULL
+#define INFINIPATH_DC_COUNTERDISABLE 0x1000000000000000ULL
+#define INFINIPATH_DC_COUNTERWREN 0x2000000000000000ULL
+#define INFINIPATH_DC_FORCEIBCBUSTOSPCPARITYERR 0x4000000000000000ULL
+#define INFINIPATH_DC_FORCEIBCBUSFRSPCPARITYERR 0x8000000000000000ULL
+
+/* kr_ibcctrl bits */
+#define INFINIPATH_IBCC_FLOWCTRLPERIOD_MASK 0xFFULL
+#define INFINIPATH_IBCC_FLOWCTRLPERIOD_SHIFT 0
+#define INFINIPATH_IBCC_FLOWCTRLWATERMARK_MASK 0xFFULL
+#define INFINIPATH_IBCC_FLOWCTRLWATERMARK_SHIFT 8
+#define INFINIPATH_IBCC_LINKINITCMD_MASK 0x3ULL
+#define INFINIPATH_IBCC_LINKINITCMD_DISABLE 1
+/* cycle through TS1/TS2 till OK */
+#define INFINIPATH_IBCC_LINKINITCMD_POLL 2
+/* wait for TS1, then go on */
+#define INFINIPATH_IBCC_LINKINITCMD_SLEEP 3
+#define INFINIPATH_IBCC_LINKINITCMD_SHIFT 16
+#define INFINIPATH_IBCC_LINKCMD_MASK 0x3ULL
+#define INFINIPATH_IBCC_LINKCMD_DOWN 1 /* move to 0x11 */
+#define INFINIPATH_IBCC_LINKCMD_ARMED 2 /* move to 0x21 */
+#define INFINIPATH_IBCC_LINKCMD_ACTIVE 3 /* move to 0x31 */
+#define INFINIPATH_IBCC_LINKCMD_SHIFT 18
+#define INFINIPATH_IBCC_MAXPKTLEN_MASK 0x7FFULL
+#define INFINIPATH_IBCC_MAXPKTLEN_SHIFT 20
+#define INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK 0xFULL
+#define INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT 32
+#define INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK 0xFULL
+#define INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT 36
+#define INFINIPATH_IBCC_CREDITSCALE_MASK 0x7ULL
+#define INFINIPATH_IBCC_CREDITSCALE_SHIFT 40
+#define INFINIPATH_IBCC_LOOPBACK 0x8000000000000000ULL
+#define INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE 0x4000000000000000ULL
+
+/* kr_ibcstatus bits */
+#define INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT 0
+#define INFINIPATH_IBCS_LINKSTATE_MASK 0x7
+
+#define INFINIPATH_IBCS_TXREADY 0x40000000
+#define INFINIPATH_IBCS_TXCREDITOK 0x80000000
+/* link training states (shift by
+ INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) */
+#define INFINIPATH_IBCS_LT_STATE_DISABLED 0x00
+#define INFINIPATH_IBCS_LT_STATE_LINKUP 0x01
+#define INFINIPATH_IBCS_LT_STATE_POLLACTIVE 0x02
+#define INFINIPATH_IBCS_LT_STATE_POLLQUIET 0x03
+#define INFINIPATH_IBCS_LT_STATE_SLEEPDELAY 0x04
+#define INFINIPATH_IBCS_LT_STATE_SLEEPQUIET 0x05
+#define INFINIPATH_IBCS_LT_STATE_CFGDEBOUNCE 0x08
+#define INFINIPATH_IBCS_LT_STATE_CFGRCVFCFG 0x09
+#define INFINIPATH_IBCS_LT_STATE_CFGWAITRMT 0x0a
+#define INFINIPATH_IBCS_LT_STATE_CFGIDLE 0x0b
+#define INFINIPATH_IBCS_LT_STATE_RECOVERRETRAIN 0x0c
+#define INFINIPATH_IBCS_LT_STATE_RECOVERWAITRMT 0x0e
+#define INFINIPATH_IBCS_LT_STATE_RECOVERIDLE 0x0f
+/* link state machine states (shift by ibcs_ls_shift) */
+#define INFINIPATH_IBCS_L_STATE_DOWN 0x0
+#define INFINIPATH_IBCS_L_STATE_INIT 0x1
+#define INFINIPATH_IBCS_L_STATE_ARM 0x2
+#define INFINIPATH_IBCS_L_STATE_ACTIVE 0x3
+#define INFINIPATH_IBCS_L_STATE_ACT_DEFER 0x4
+
+
+/* kr_extstatus bits */
+#define INFINIPATH_EXTS_SERDESPLLLOCK 0x1
+#define INFINIPATH_EXTS_GPIOIN_MASK 0xFFFFULL
+#define INFINIPATH_EXTS_GPIOIN_SHIFT 48
+
+/* kr_extctrl bits */
+#define INFINIPATH_EXTC_GPIOINVERT_MASK 0xFFFFULL
+#define INFINIPATH_EXTC_GPIOINVERT_SHIFT 32
+#define INFINIPATH_EXTC_GPIOOE_MASK 0xFFFFULL
+#define INFINIPATH_EXTC_GPIOOE_SHIFT 48
+#define INFINIPATH_EXTC_SERDESENABLE 0x80000000ULL
+#define INFINIPATH_EXTC_SERDESCONNECT 0x40000000ULL
+#define INFINIPATH_EXTC_SERDESENTRUNKING 0x20000000ULL
+#define INFINIPATH_EXTC_SERDESDISRXFIFO 0x10000000ULL
+#define INFINIPATH_EXTC_SERDESENPLPBK1 0x08000000ULL
+#define INFINIPATH_EXTC_SERDESENPLPBK2 0x04000000ULL
+#define INFINIPATH_EXTC_SERDESENENCDEC 0x02000000ULL
+#define INFINIPATH_EXTC_LED1SECPORT_ON 0x00000020ULL
+#define INFINIPATH_EXTC_LED2SECPORT_ON 0x00000010ULL
+#define INFINIPATH_EXTC_LED1PRIPORT_ON 0x00000008ULL
+#define INFINIPATH_EXTC_LED2PRIPORT_ON 0x00000004ULL
+#define INFINIPATH_EXTC_LEDGBLOK_ON 0x00000002ULL
+#define INFINIPATH_EXTC_LEDGBLERR_OFF 0x00000001ULL
+
+/* kr_partitionkey bits */
+#define INFINIPATH_PKEY_SIZE 16
+#define INFINIPATH_PKEY_MASK 0xFFFF
+#define INFINIPATH_PKEY_DEFAULT_PKEY 0xFFFF
+
+/* kr_serdesconfig0 bits */
+#define INFINIPATH_SERDC0_RESET_MASK 0xfULL /* overal reset bits */
+#define INFINIPATH_SERDC0_RESET_PLL 0x10000000ULL /* pll reset */
+/* tx idle enables (per lane) */
+#define INFINIPATH_SERDC0_TXIDLE 0xF000ULL
+/* rx detect enables (per lane) */
+#define INFINIPATH_SERDC0_RXDETECT_EN 0xF0000ULL
+/* L1 Power down; use with RXDETECT, Otherwise not used on IB side */
+#define INFINIPATH_SERDC0_L1PWR_DN 0xF0ULL
+
+/* common kr_xgxsconfig bits (or safe in all, even if not implemented) */
+#define INFINIPATH_XGXS_RX_POL_SHIFT 19
+#define INFINIPATH_XGXS_RX_POL_MASK 0xfULL
+
+
+/*
+ * IPATH_PIO_MAXIBHDR is the max IB header size allowed for in our
+ * PIO send buffers. This is well beyond anything currently
+ * defined in the InfiniBand spec.
+ */
+#define IPATH_PIO_MAXIBHDR 128
+
+typedef u64 ipath_err_t;
+
+/* The following change with the type of device, so
+ * need to be part of the ipath_devdata struct, or
+ * we could have problems plugging in devices of
+ * different types (e.g. one HT, one PCIE)
+ * in one system, to be managed by one driver.
+ * On the other hand, this file is may also be included
+ * by other code, so leave the declarations here
+ * temporarily. Minor footprint issue if common-model
+ * linker used, none if C89+ linker used.
+ */
+
+/* mask of defined bits for various registers */
+extern u64 infinipath_i_bitsextant;
+extern ipath_err_t infinipath_e_bitsextant, infinipath_hwe_bitsextant;
+
+/* masks that are different in various chips, or only exist in some chips */
+extern u32 infinipath_i_rcvavail_mask, infinipath_i_rcvurg_mask;
+
+/*
+ * These are the infinipath general register numbers (not offsets).
+ * The kernel registers are used directly, those beyond the kernel
+ * registers are calculated from one of the base registers. The use of
+ * an integer type doesn't allow type-checking as thorough as, say,
+ * an enum but allows for better hiding of chip differences.
+ */
+typedef const u16 ipath_kreg, /* infinipath general registers */
+ ipath_creg, /* infinipath counter registers */
+ ipath_sreg; /* kernel-only, infinipath send registers */
+
+/*
+ * These are the chip registers common to all infinipath chips, and
+ * used both by the kernel and the diagnostics or other user code.
+ * They are all implemented such that 64 bit accesses work.
+ * Some implement no more than 32 bits. Because 64 bit reads
+ * require 2 HT cmds on opteron, we access those with 32 bit
+ * reads for efficiency (they are written as 64 bits, since
+ * the extra 32 bits are nearly free on writes, and it slightly reduces
+ * complexity). The rest are all accessed as 64 bits.
+ */
+struct ipath_kregs {
+ /* These are the 32 bit group */
+ ipath_kreg kr_control;
+ ipath_kreg kr_counterregbase;
+ ipath_kreg kr_intmask;
+ ipath_kreg kr_intstatus;
+ ipath_kreg kr_pagealign;
+ ipath_kreg kr_portcnt;
+ ipath_kreg kr_rcvtidbase;
+ ipath_kreg kr_rcvtidcnt;
+ ipath_kreg kr_rcvegrbase;
+ ipath_kreg kr_rcvegrcnt;
+ ipath_kreg kr_scratch;
+ ipath_kreg kr_sendctrl;
+ ipath_kreg kr_sendpiobufbase;
+ ipath_kreg kr_sendpiobufcnt;
+ ipath_kreg kr_sendpiosize;
+ ipath_kreg kr_sendregbase;
+ ipath_kreg kr_userregbase;
+ /* These are the 64 bit group */
+ ipath_kreg kr_debugport;
+ ipath_kreg kr_debugportselect;
+ ipath_kreg kr_errorclear;
+ ipath_kreg kr_errormask;
+ ipath_kreg kr_errorstatus;
+ ipath_kreg kr_extctrl;
+ ipath_kreg kr_extstatus;
+ ipath_kreg kr_gpio_clear;
+ ipath_kreg kr_gpio_mask;
+ ipath_kreg kr_gpio_out;
+ ipath_kreg kr_gpio_status;
+ ipath_kreg kr_hwdiagctrl;
+ ipath_kreg kr_hwerrclear;
+ ipath_kreg kr_hwerrmask;
+ ipath_kreg kr_hwerrstatus;
+ ipath_kreg kr_ibcctrl;
+ ipath_kreg kr_ibcstatus;
+ ipath_kreg kr_intblocked;
+ ipath_kreg kr_intclear;
+ ipath_kreg kr_interruptconfig;
+ ipath_kreg kr_mdio;
+ ipath_kreg kr_partitionkey;
+ ipath_kreg kr_rcvbthqp;
+ ipath_kreg kr_rcvbufbase;
+ ipath_kreg kr_rcvbufsize;
+ ipath_kreg kr_rcvctrl;
+ ipath_kreg kr_rcvhdrcnt;
+ ipath_kreg kr_rcvhdrentsize;
+ ipath_kreg kr_rcvhdrsize;
+ ipath_kreg kr_rcvintmembase;
+ ipath_kreg kr_rcvintmemsize;
+ ipath_kreg kr_revision;
+ ipath_kreg kr_sendbuffererror;
+ ipath_kreg kr_sendpioavailaddr;
+ ipath_kreg kr_serdesconfig0;
+ ipath_kreg kr_serdesconfig1;
+ ipath_kreg kr_serdesstatus;
+ ipath_kreg kr_txintmembase;
+ ipath_kreg kr_txintmemsize;
+ ipath_kreg kr_xgxsconfig;
+ ipath_kreg kr_ibpllcfg;
+ /* use these two (and the following N ports) only with
+ * ipath_k*_kreg64_port(); not *kreg64() */
+ ipath_kreg kr_rcvhdraddr;
+ ipath_kreg kr_rcvhdrtailaddr;
+
+ /* remaining registers are not present on all types of infinipath
+ chips */
+ ipath_kreg kr_rcvpktledcnt;
+ ipath_kreg kr_pcierbuftestreg0;
+ ipath_kreg kr_pcierbuftestreg1;
+ ipath_kreg kr_pcieq0serdesconfig0;
+ ipath_kreg kr_pcieq0serdesconfig1;
+ ipath_kreg kr_pcieq0serdesstatus;
+ ipath_kreg kr_pcieq1serdesconfig0;
+ ipath_kreg kr_pcieq1serdesconfig1;
+ ipath_kreg kr_pcieq1serdesstatus;
+ ipath_kreg kr_hrtbt_guid;
+ ipath_kreg kr_ibcddrctrl;
+ ipath_kreg kr_ibcddrstatus;
+ ipath_kreg kr_jintreload;
+
+ /* send dma related regs */
+ ipath_kreg kr_senddmabase;
+ ipath_kreg kr_senddmalengen;
+ ipath_kreg kr_senddmatail;
+ ipath_kreg kr_senddmahead;
+ ipath_kreg kr_senddmaheadaddr;
+ ipath_kreg kr_senddmabufmask0;
+ ipath_kreg kr_senddmabufmask1;
+ ipath_kreg kr_senddmabufmask2;
+ ipath_kreg kr_senddmastatus;
+
+ /* SerDes related regs (IBA7220-only) */
+ ipath_kreg kr_ibserdesctrl;
+ ipath_kreg kr_ib_epbacc;
+ ipath_kreg kr_ib_epbtrans;
+ ipath_kreg kr_pcie_epbacc;
+ ipath_kreg kr_pcie_epbtrans;
+ ipath_kreg kr_ib_ddsrxeq;
+};
+
+struct ipath_cregs {
+ ipath_creg cr_badformatcnt;
+ ipath_creg cr_erricrccnt;
+ ipath_creg cr_errlinkcnt;
+ ipath_creg cr_errlpcrccnt;
+ ipath_creg cr_errpkey;
+ ipath_creg cr_errrcvflowctrlcnt;
+ ipath_creg cr_err_rlencnt;
+ ipath_creg cr_errslencnt;
+ ipath_creg cr_errtidfull;
+ ipath_creg cr_errtidvalid;
+ ipath_creg cr_errvcrccnt;
+ ipath_creg cr_ibstatuschange;
+ ipath_creg cr_intcnt;
+ ipath_creg cr_invalidrlencnt;
+ ipath_creg cr_invalidslencnt;
+ ipath_creg cr_lbflowstallcnt;
+ ipath_creg cr_iblinkdowncnt;
+ ipath_creg cr_iblinkerrrecovcnt;
+ ipath_creg cr_ibsymbolerrcnt;
+ ipath_creg cr_pktrcvcnt;
+ ipath_creg cr_pktrcvflowctrlcnt;
+ ipath_creg cr_pktsendcnt;
+ ipath_creg cr_pktsendflowcnt;
+ ipath_creg cr_portovflcnt;
+ ipath_creg cr_rcvebpcnt;
+ ipath_creg cr_rcvovflcnt;
+ ipath_creg cr_rxdroppktcnt;
+ ipath_creg cr_senddropped;
+ ipath_creg cr_sendstallcnt;
+ ipath_creg cr_sendunderruncnt;
+ ipath_creg cr_unsupvlcnt;
+ ipath_creg cr_wordrcvcnt;
+ ipath_creg cr_wordsendcnt;
+ ipath_creg cr_vl15droppedpktcnt;
+ ipath_creg cr_rxotherlocalphyerrcnt;
+ ipath_creg cr_excessbufferovflcnt;
+ ipath_creg cr_locallinkintegrityerrcnt;
+ ipath_creg cr_rxvlerrcnt;
+ ipath_creg cr_rxdlidfltrcnt;
+ ipath_creg cr_psstat;
+ ipath_creg cr_psstart;
+ ipath_creg cr_psinterval;
+ ipath_creg cr_psrcvdatacount;
+ ipath_creg cr_psrcvpktscount;
+ ipath_creg cr_psxmitdatacount;
+ ipath_creg cr_psxmitpktscount;
+ ipath_creg cr_psxmitwaitcount;
+};
+
+#endif /* _IPATH_REGISTERS_H */
diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c
new file mode 100644
index 0000000..2296832
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_ruc.c
@@ -0,0 +1,733 @@
+/*
+ * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/spinlock.h>
+
+#include "ipath_verbs.h"
+#include "ipath_kernel.h"
+
+/*
+ * Convert the AETH RNR timeout code into the number of milliseconds.
+ */
+const u32 ib_ipath_rnr_table[32] = {
+ 656, /* 0 */
+ 1, /* 1 */
+ 1, /* 2 */
+ 1, /* 3 */
+ 1, /* 4 */
+ 1, /* 5 */
+ 1, /* 6 */
+ 1, /* 7 */
+ 1, /* 8 */
+ 1, /* 9 */
+ 1, /* A */
+ 1, /* B */
+ 1, /* C */
+ 1, /* D */
+ 2, /* E */
+ 2, /* F */
+ 3, /* 10 */
+ 4, /* 11 */
+ 6, /* 12 */
+ 8, /* 13 */
+ 11, /* 14 */
+ 16, /* 15 */
+ 21, /* 16 */
+ 31, /* 17 */
+ 41, /* 18 */
+ 62, /* 19 */
+ 82, /* 1A */
+ 123, /* 1B */
+ 164, /* 1C */
+ 246, /* 1D */
+ 328, /* 1E */
+ 492 /* 1F */
+};
+
+/**
+ * ipath_insert_rnr_queue - put QP on the RNR timeout list for the device
+ * @qp: the QP
+ *
+ * Called with the QP s_lock held and interrupts disabled.
+ * XXX Use a simple list for now. We might need a priority
+ * queue if we have lots of QPs waiting for RNR timeouts
+ * but that should be rare.
+ */
+void ipath_insert_rnr_queue(struct ipath_qp *qp)
+{
+ struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
+
+ /* We already did a spin_lock_irqsave(), so just use spin_lock */
+ spin_lock(&dev->pending_lock);
+ if (list_empty(&dev->rnrwait))
+ list_add(&qp->timerwait, &dev->rnrwait);
+ else {
+ struct list_head *l = &dev->rnrwait;
+ struct ipath_qp *nqp = list_entry(l->next, struct ipath_qp,
+ timerwait);
+
+ while (qp->s_rnr_timeout >= nqp->s_rnr_timeout) {
+ qp->s_rnr_timeout -= nqp->s_rnr_timeout;
+ l = l->next;
+ if (l->next == &dev->rnrwait) {
+ nqp = NULL;
+ break;
+ }
+ nqp = list_entry(l->next, struct ipath_qp,
+ timerwait);
+ }
+ if (nqp)
+ nqp->s_rnr_timeout -= qp->s_rnr_timeout;
+ list_add(&qp->timerwait, l);
+ }
+ spin_unlock(&dev->pending_lock);
+}
+
+/**
+ * ipath_init_sge - Validate a RWQE and fill in the SGE state
+ * @qp: the QP
+ *
+ * Return 1 if OK.
+ */
+int ipath_init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe,
+ u32 *lengthp, struct ipath_sge_state *ss)
+{
+ int i, j, ret;
+ struct ib_wc wc;
+
+ *lengthp = 0;
+ for (i = j = 0; i < wqe->num_sge; i++) {
+ if (wqe->sg_list[i].length == 0)
+ continue;
+ /* Check LKEY */
+ if (!ipath_lkey_ok(qp, j ? &ss->sg_list[j - 1] : &ss->sge,
+ &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE))
+ goto bad_lkey;
+ *lengthp += wqe->sg_list[i].length;
+ j++;
+ }
+ ss->num_sge = j;
+ ret = 1;
+ goto bail;
+
+bad_lkey:
+ memset(&wc, 0, sizeof(wc));
+ wc.wr_id = wqe->wr_id;
+ wc.status = IB_WC_LOC_PROT_ERR;
+ wc.opcode = IB_WC_RECV;
+ wc.qp = &qp->ibqp;
+ /* Signal solicited completion event. */
+ ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
+ ret = 0;
+bail:
+ return ret;
+}
+
+/**
+ * ipath_get_rwqe - copy the next RWQE into the QP's RWQE
+ * @qp: the QP
+ * @wr_id_only: update qp->r_wr_id only, not qp->r_sge
+ *
+ * Return 0 if no RWQE is available, otherwise return 1.
+ *
+ * Can be called from interrupt level.
+ */
+int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
+{
+ unsigned long flags;
+ struct ipath_rq *rq;
+ struct ipath_rwq *wq;
+ struct ipath_srq *srq;
+ struct ipath_rwqe *wqe;
+ void (*handler)(struct ib_event *, void *);
+ u32 tail;
+ int ret;
+
+ if (qp->ibqp.srq) {
+ srq = to_isrq(qp->ibqp.srq);
+ handler = srq->ibsrq.event_handler;
+ rq = &srq->rq;
+ } else {
+ srq = NULL;
+ handler = NULL;
+ rq = &qp->r_rq;
+ }
+
+ spin_lock_irqsave(&rq->lock, flags);
+ if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) {
+ ret = 0;
+ goto unlock;
+ }
+
+ wq = rq->wq;
+ tail = wq->tail;
+ /* Validate tail before using it since it is user writable. */
+ if (tail >= rq->size)
+ tail = 0;
+ do {
+ if (unlikely(tail == wq->head)) {
+ ret = 0;
+ goto unlock;
+ }
+ /* Make sure entry is read after head index is read. */
+ smp_rmb();
+ wqe = get_rwqe_ptr(rq, tail);
+ if (++tail >= rq->size)
+ tail = 0;
+ if (wr_id_only)
+ break;
+ qp->r_sge.sg_list = qp->r_sg_list;
+ } while (!ipath_init_sge(qp, wqe, &qp->r_len, &qp->r_sge));
+ qp->r_wr_id = wqe->wr_id;
+ wq->tail = tail;
+
+ ret = 1;
+ set_bit(IPATH_R_WRID_VALID, &qp->r_aflags);
+ if (handler) {
+ u32 n;
+
+ /*
+ * validate head pointer value and compute
+ * the number of remaining WQEs.
+ */
+ n = wq->head;
+ if (n >= rq->size)
+ n = 0;
+ if (n < tail)
+ n += rq->size - tail;
+ else
+ n -= tail;
+ if (n < srq->limit) {
+ struct ib_event ev;
+
+ srq->limit = 0;
+ spin_unlock_irqrestore(&rq->lock, flags);
+ ev.device = qp->ibqp.device;
+ ev.element.srq = qp->ibqp.srq;
+ ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
+ handler(&ev, srq->ibsrq.srq_context);
+ goto bail;
+ }
+ }
+unlock:
+ spin_unlock_irqrestore(&rq->lock, flags);
+bail:
+ return ret;
+}
+
+/**
+ * ipath_ruc_loopback - handle UC and RC lookback requests
+ * @sqp: the sending QP
+ *
+ * This is called from ipath_do_send() to
+ * forward a WQE addressed to the same HCA.
+ * Note that although we are single threaded due to the tasklet, we still
+ * have to protect against post_send(). We don't have to worry about
+ * receive interrupts since this is a connected protocol and all packets
+ * will pass through here.
+ */
+static void ipath_ruc_loopback(struct ipath_qp *sqp)
+{
+ struct ipath_ibdev *dev = to_idev(sqp->ibqp.device);
+ struct ipath_qp *qp;
+ struct ipath_swqe *wqe;
+ struct ipath_sge *sge;
+ unsigned long flags;
+ struct ib_wc wc;
+ u64 sdata;
+ atomic64_t *maddr;
+ enum ib_wc_status send_status;
+
+ /*
+ * Note that we check the responder QP state after
+ * checking the requester's state.
+ */
+ qp = ipath_lookup_qpn(&dev->qp_table, sqp->remote_qpn);
+
+ spin_lock_irqsave(&sqp->s_lock, flags);
+
+ /* Return if we are already busy processing a work request. */
+ if ((sqp->s_flags & (IPATH_S_BUSY | IPATH_S_ANY_WAIT)) ||
+ !(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_OR_FLUSH_SEND))
+ goto unlock;
+
+ sqp->s_flags |= IPATH_S_BUSY;
+
+again:
+ if (sqp->s_last == sqp->s_head)
+ goto clr_busy;
+ wqe = get_swqe_ptr(sqp, sqp->s_last);
+
+ /* Return if it is not OK to start a new work reqeust. */
+ if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_NEXT_SEND_OK)) {
+ if (!(ib_ipath_state_ops[sqp->state] & IPATH_FLUSH_SEND))
+ goto clr_busy;
+ /* We are in the error state, flush the work request. */
+ send_status = IB_WC_WR_FLUSH_ERR;
+ goto flush_send;
+ }
+
+ /*
+ * We can rely on the entry not changing without the s_lock
+ * being held until we update s_last.
+ * We increment s_cur to indicate s_last is in progress.
+ */
+ if (sqp->s_last == sqp->s_cur) {
+ if (++sqp->s_cur >= sqp->s_size)
+ sqp->s_cur = 0;
+ }
+ spin_unlock_irqrestore(&sqp->s_lock, flags);
+
+ if (!qp || !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) {
+ dev->n_pkt_drops++;
+ /*
+ * For RC, the requester would timeout and retry so
+ * shortcut the timeouts and just signal too many retries.
+ */
+ if (sqp->ibqp.qp_type == IB_QPT_RC)
+ send_status = IB_WC_RETRY_EXC_ERR;
+ else
+ send_status = IB_WC_SUCCESS;
+ goto serr;
+ }
+
+ memset(&wc, 0, sizeof wc);
+ send_status = IB_WC_SUCCESS;
+
+ sqp->s_sge.sge = wqe->sg_list[0];
+ sqp->s_sge.sg_list = wqe->sg_list + 1;
+ sqp->s_sge.num_sge = wqe->wr.num_sge;
+ sqp->s_len = wqe->length;
+ switch (wqe->wr.opcode) {
+ case IB_WR_SEND_WITH_IMM:
+ wc.wc_flags = IB_WC_WITH_IMM;
+ wc.ex.imm_data = wqe->wr.ex.imm_data;
+ /* FALLTHROUGH */
+ case IB_WR_SEND:
+ if (!ipath_get_rwqe(qp, 0))
+ goto rnr_nak;
+ break;
+
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
+ goto inv_err;
+ wc.wc_flags = IB_WC_WITH_IMM;
+ wc.ex.imm_data = wqe->wr.ex.imm_data;
+ if (!ipath_get_rwqe(qp, 1))
+ goto rnr_nak;
+ /* FALLTHROUGH */
+ case IB_WR_RDMA_WRITE:
+ if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
+ goto inv_err;
+ if (wqe->length == 0)
+ break;
+ if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, wqe->length,
+ wqe->wr.wr.rdma.remote_addr,
+ wqe->wr.wr.rdma.rkey,
+ IB_ACCESS_REMOTE_WRITE)))
+ goto acc_err;
+ break;
+
+ case IB_WR_RDMA_READ:
+ if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
+ goto inv_err;
+ if (unlikely(!ipath_rkey_ok(qp, &sqp->s_sge, wqe->length,
+ wqe->wr.wr.rdma.remote_addr,
+ wqe->wr.wr.rdma.rkey,
+ IB_ACCESS_REMOTE_READ)))
+ goto acc_err;
+ qp->r_sge.sge = wqe->sg_list[0];
+ qp->r_sge.sg_list = wqe->sg_list + 1;
+ qp->r_sge.num_sge = wqe->wr.num_sge;
+ break;
+
+ case IB_WR_ATOMIC_CMP_AND_SWP:
+ case IB_WR_ATOMIC_FETCH_AND_ADD:
+ if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
+ goto inv_err;
+ if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, sizeof(u64),
+ wqe->wr.wr.atomic.remote_addr,
+ wqe->wr.wr.atomic.rkey,
+ IB_ACCESS_REMOTE_ATOMIC)))
+ goto acc_err;
+ /* Perform atomic OP and save result. */
+ maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
+ sdata = wqe->wr.wr.atomic.compare_add;
+ *(u64 *) sqp->s_sge.sge.vaddr =
+ (wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ?
+ (u64) atomic64_add_return(sdata, maddr) - sdata :
+ (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
+ sdata, wqe->wr.wr.atomic.swap);
+ goto send_comp;
+
+ default:
+ send_status = IB_WC_LOC_QP_OP_ERR;
+ goto serr;
+ }
+
+ sge = &sqp->s_sge.sge;
+ while (sqp->s_len) {
+ u32 len = sqp->s_len;
+
+ if (len > sge->length)
+ len = sge->length;
+ if (len > sge->sge_length)
+ len = sge->sge_length;
+ BUG_ON(len == 0);
+ ipath_copy_sge(&qp->r_sge, sge->vaddr, len);
+ sge->vaddr += len;
+ sge->length -= len;
+ sge->sge_length -= len;
+ if (sge->sge_length == 0) {
+ if (--sqp->s_sge.num_sge)
+ *sge = *sqp->s_sge.sg_list++;
+ } else if (sge->length == 0 && sge->mr != NULL) {
+ if (++sge->n >= IPATH_SEGSZ) {
+ if (++sge->m >= sge->mr->mapsz)
+ break;
+ sge->n = 0;
+ }
+ sge->vaddr =
+ sge->mr->map[sge->m]->segs[sge->n].vaddr;
+ sge->length =
+ sge->mr->map[sge->m]->segs[sge->n].length;
+ }
+ sqp->s_len -= len;
+ }
+
+ if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags))
+ goto send_comp;
+
+ if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM)
+ wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
+ else
+ wc.opcode = IB_WC_RECV;
+ wc.wr_id = qp->r_wr_id;
+ wc.status = IB_WC_SUCCESS;
+ wc.byte_len = wqe->length;
+ wc.qp = &qp->ibqp;
+ wc.src_qp = qp->remote_qpn;
+ wc.slid = qp->remote_ah_attr.dlid;
+ wc.sl = qp->remote_ah_attr.sl;
+ wc.port_num = 1;
+ /* Signal completion event if the solicited bit is set. */
+ ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
+ wqe->wr.send_flags & IB_SEND_SOLICITED);
+
+send_comp:
+ spin_lock_irqsave(&sqp->s_lock, flags);
+flush_send:
+ sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
+ ipath_send_complete(sqp, wqe, send_status);
+ goto again;
+
+rnr_nak:
+ /* Handle RNR NAK */
+ if (qp->ibqp.qp_type == IB_QPT_UC)
+ goto send_comp;
+ /*
+ * Note: we don't need the s_lock held since the BUSY flag
+ * makes this single threaded.
+ */
+ if (sqp->s_rnr_retry == 0) {
+ send_status = IB_WC_RNR_RETRY_EXC_ERR;
+ goto serr;
+ }
+ if (sqp->s_rnr_retry_cnt < 7)
+ sqp->s_rnr_retry--;
+ spin_lock_irqsave(&sqp->s_lock, flags);
+ if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_RECV_OK))
+ goto clr_busy;
+ sqp->s_flags |= IPATH_S_WAITING;
+ dev->n_rnr_naks++;
+ sqp->s_rnr_timeout = ib_ipath_rnr_table[qp->r_min_rnr_timer];
+ ipath_insert_rnr_queue(sqp);
+ goto clr_busy;
+
+inv_err:
+ send_status = IB_WC_REM_INV_REQ_ERR;
+ wc.status = IB_WC_LOC_QP_OP_ERR;
+ goto err;
+
+acc_err:
+ send_status = IB_WC_REM_ACCESS_ERR;
+ wc.status = IB_WC_LOC_PROT_ERR;
+err:
+ /* responder goes to error state */
+ ipath_rc_error(qp, wc.status);
+
+serr:
+ spin_lock_irqsave(&sqp->s_lock, flags);
+ ipath_send_complete(sqp, wqe, send_status);
+ if (sqp->ibqp.qp_type == IB_QPT_RC) {
+ int lastwqe = ipath_error_qp(sqp, IB_WC_WR_FLUSH_ERR);
+
+ sqp->s_flags &= ~IPATH_S_BUSY;
+ spin_unlock_irqrestore(&sqp->s_lock, flags);
+ if (lastwqe) {
+ struct ib_event ev;
+
+ ev.device = sqp->ibqp.device;
+ ev.element.qp = &sqp->ibqp;
+ ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
+ sqp->ibqp.event_handler(&ev, sqp->ibqp.qp_context);
+ }
+ goto done;
+ }
+clr_busy:
+ sqp->s_flags &= ~IPATH_S_BUSY;
+unlock:
+ spin_unlock_irqrestore(&sqp->s_lock, flags);
+done:
+ if (qp && atomic_dec_and_test(&qp->refcount))
+ wake_up(&qp->wait);
+}
+
+static void want_buffer(struct ipath_devdata *dd, struct ipath_qp *qp)
+{
+ if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA) ||
+ qp->ibqp.qp_type == IB_QPT_SMI) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
+ dd->ipath_sendctrl |= INFINIPATH_S_PIOINTBUFAVAIL;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
+ dd->ipath_sendctrl);
+ ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+ spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
+ }
+}
+
+/**
+ * ipath_no_bufs_available - tell the layer driver we need buffers
+ * @qp: the QP that caused the problem
+ * @dev: the device we ran out of buffers on
+ *
+ * Called when we run out of PIO buffers.
+ * If we are now in the error state, return zero to flush the
+ * send work request.
+ */
+static int ipath_no_bufs_available(struct ipath_qp *qp,
+ struct ipath_ibdev *dev)
+{
+ unsigned long flags;
+ int ret = 1;
+
+ /*
+ * Note that as soon as want_buffer() is called and
+ * possibly before it returns, ipath_ib_piobufavail()
+ * could be called. Therefore, put QP on the piowait list before
+ * enabling the PIO avail interrupt.
+ */
+ spin_lock_irqsave(&qp->s_lock, flags);
+ if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) {
+ dev->n_piowait++;
+ qp->s_flags |= IPATH_S_WAITING;
+ qp->s_flags &= ~IPATH_S_BUSY;
+ spin_lock(&dev->pending_lock);
+ if (list_empty(&qp->piowait))
+ list_add_tail(&qp->piowait, &dev->piowait);
+ spin_unlock(&dev->pending_lock);
+ } else
+ ret = 0;
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ if (ret)
+ want_buffer(dev->dd, qp);
+ return ret;
+}
+
+/**
+ * ipath_make_grh - construct a GRH header
+ * @dev: a pointer to the ipath device
+ * @hdr: a pointer to the GRH header being constructed
+ * @grh: the global route address to send to
+ * @hwords: the number of 32 bit words of header being sent
+ * @nwords: the number of 32 bit words of data being sent
+ *
+ * Return the size of the header in 32 bit words.
+ */
+u32 ipath_make_grh(struct ipath_ibdev *dev, struct ib_grh *hdr,
+ struct ib_global_route *grh, u32 hwords, u32 nwords)
+{
+ hdr->version_tclass_flow =
+ cpu_to_be32((6 << 28) |
+ (grh->traffic_class << 20) |
+ grh->flow_label);
+ hdr->paylen = cpu_to_be16((hwords - 2 + nwords + SIZE_OF_CRC) << 2);
+ /* next_hdr is defined by C8-7 in ch. 8.4.1 */
+ hdr->next_hdr = 0x1B;
+ hdr->hop_limit = grh->hop_limit;
+ /* The SGID is 32-bit aligned. */
+ hdr->sgid.global.subnet_prefix = dev->gid_prefix;
+ hdr->sgid.global.interface_id = dev->dd->ipath_guid;
+ hdr->dgid = grh->dgid;
+
+ /* GRH header size in 32-bit words. */
+ return sizeof(struct ib_grh) / sizeof(u32);
+}
+
+void ipath_make_ruc_header(struct ipath_ibdev *dev, struct ipath_qp *qp,
+ struct ipath_other_headers *ohdr,
+ u32 bth0, u32 bth2)
+{
+ u16 lrh0;
+ u32 nwords;
+ u32 extra_bytes;
+
+ /* Construct the header. */
+ extra_bytes = -qp->s_cur_size & 3;
+ nwords = (qp->s_cur_size + extra_bytes) >> 2;
+ lrh0 = IPATH_LRH_BTH;
+ if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
+ qp->s_hdrwords += ipath_make_grh(dev, &qp->s_hdr.u.l.grh,
+ &qp->remote_ah_attr.grh,
+ qp->s_hdrwords, nwords);
+ lrh0 = IPATH_LRH_GRH;
+ }
+ lrh0 |= qp->remote_ah_attr.sl << 4;
+ qp->s_hdr.lrh[0] = cpu_to_be16(lrh0);
+ qp->s_hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
+ qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC);
+ qp->s_hdr.lrh[3] = cpu_to_be16(dev->dd->ipath_lid |
+ qp->remote_ah_attr.src_path_bits);
+ bth0 |= ipath_get_pkey(dev->dd, qp->s_pkey_index);
+ bth0 |= extra_bytes << 20;
+ ohdr->bth[0] = cpu_to_be32(bth0 | (1 << 22));
+ ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
+ ohdr->bth[2] = cpu_to_be32(bth2);
+}
+
+/**
+ * ipath_do_send - perform a send on a QP
+ * @data: contains a pointer to the QP
+ *
+ * Process entries in the send work queue until credit or queue is
+ * exhausted. Only allow one CPU to send a packet per QP (tasklet).
+ * Otherwise, two threads could send packets out of order.
+ */
+void ipath_do_send(unsigned long data)
+{
+ struct ipath_qp *qp = (struct ipath_qp *)data;
+ struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
+ int (*make_req)(struct ipath_qp *qp);
+ unsigned long flags;
+
+ if ((qp->ibqp.qp_type == IB_QPT_RC ||
+ qp->ibqp.qp_type == IB_QPT_UC) &&
+ qp->remote_ah_attr.dlid == dev->dd->ipath_lid) {
+ ipath_ruc_loopback(qp);
+ goto bail;
+ }
+
+ if (qp->ibqp.qp_type == IB_QPT_RC)
+ make_req = ipath_make_rc_req;
+ else if (qp->ibqp.qp_type == IB_QPT_UC)
+ make_req = ipath_make_uc_req;
+ else
+ make_req = ipath_make_ud_req;
+
+ spin_lock_irqsave(&qp->s_lock, flags);
+
+ /* Return if we are already busy processing a work request. */
+ if ((qp->s_flags & (IPATH_S_BUSY | IPATH_S_ANY_WAIT)) ||
+ !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_OR_FLUSH_SEND)) {
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ goto bail;
+ }
+
+ qp->s_flags |= IPATH_S_BUSY;
+
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+
+again:
+ /* Check for a constructed packet to be sent. */
+ if (qp->s_hdrwords != 0) {
+ /*
+ * If no PIO bufs are available, return. An interrupt will
+ * call ipath_ib_piobufavail() when one is available.
+ */
+ if (ipath_verbs_send(qp, &qp->s_hdr, qp->s_hdrwords,
+ qp->s_cur_sge, qp->s_cur_size)) {
+ if (ipath_no_bufs_available(qp, dev))
+ goto bail;
+ }
+ dev->n_unicast_xmit++;
+ /* Record that we sent the packet and s_hdr is empty. */
+ qp->s_hdrwords = 0;
+ }
+
+ if (make_req(qp))
+ goto again;
+
+bail:;
+}
+
+/*
+ * This should be called with s_lock held.
+ */
+void ipath_send_complete(struct ipath_qp *qp, struct ipath_swqe *wqe,
+ enum ib_wc_status status)
+{
+ u32 old_last, last;
+
+ if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_OR_FLUSH_SEND))
+ return;
+
+ /* See ch. 11.2.4.1 and 10.7.3.1 */
+ if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
+ (wqe->wr.send_flags & IB_SEND_SIGNALED) ||
+ status != IB_WC_SUCCESS) {
+ struct ib_wc wc;
+
+ memset(&wc, 0, sizeof wc);
+ wc.wr_id = wqe->wr.wr_id;
+ wc.status = status;
+ wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
+ wc.qp = &qp->ibqp;
+ if (status == IB_WC_SUCCESS)
+ wc.byte_len = wqe->length;
+ ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc,
+ status != IB_WC_SUCCESS);
+ }
+
+ old_last = last = qp->s_last;
+ if (++last >= qp->s_size)
+ last = 0;
+ qp->s_last = last;
+ if (qp->s_cur == old_last)
+ qp->s_cur = last;
+ if (qp->s_tail == old_last)
+ qp->s_tail = last;
+ if (qp->state == IB_QPS_SQD && last == qp->s_cur)
+ qp->s_draining = 0;
+}
diff --git a/drivers/infiniband/hw/ipath/ipath_sd7220.c b/drivers/infiniband/hw/ipath/ipath_sd7220.c
new file mode 100644
index 0000000..aa47eb5
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_sd7220.c
@@ -0,0 +1,1462 @@
+/*
+ * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+/*
+ * This file contains all of the code that is specific to the SerDes
+ * on the InfiniPath 7220 chip.
+ */
+
+#include <linux/pci.h>
+#include <linux/delay.h>
+
+#include "ipath_kernel.h"
+#include "ipath_registers.h"
+#include "ipath_7220.h"
+
+/*
+ * The IBSerDesMappTable is a memory that holds values to be stored in
+ * various SerDes registers by IBC. It is not part of the normal kregs
+ * map and is used in exactly one place, hence the #define below.
+ */
+#define KR_IBSerDesMappTable (0x94000 / (sizeof(uint64_t)))
+
+/*
+ * Below used for sdnum parameter, selecting one of the two sections
+ * used for PCIe, or the single SerDes used for IB.
+ */
+#define PCIE_SERDES0 0
+#define PCIE_SERDES1 1
+
+/*
+ * The EPB requires addressing in a particular form. EPB_LOC() is intended
+ * to make #definitions a little more readable.
+ */
+#define EPB_ADDR_SHF 8
+#define EPB_LOC(chn, elt, reg) \
+ (((elt & 0xf) | ((chn & 7) << 4) | ((reg & 0x3f) << 9)) << \
+ EPB_ADDR_SHF)
+#define EPB_IB_QUAD0_CS_SHF (25)
+#define EPB_IB_QUAD0_CS (1U << EPB_IB_QUAD0_CS_SHF)
+#define EPB_IB_UC_CS_SHF (26)
+#define EPB_PCIE_UC_CS_SHF (27)
+#define EPB_GLOBAL_WR (1U << (EPB_ADDR_SHF + 8))
+
+/* Forward declarations. */
+static int ipath_sd7220_reg_mod(struct ipath_devdata *dd, int sdnum, u32 loc,
+ u32 data, u32 mask);
+static int ibsd_mod_allchnls(struct ipath_devdata *dd, int loc, int val,
+ int mask);
+static int ipath_sd_trimdone_poll(struct ipath_devdata *dd);
+static void ipath_sd_trimdone_monitor(struct ipath_devdata *dd,
+ const char *where);
+static int ipath_sd_setvals(struct ipath_devdata *dd);
+static int ipath_sd_early(struct ipath_devdata *dd);
+static int ipath_sd_dactrim(struct ipath_devdata *dd);
+/* Set the registers that IBC may muck with to their default "preset" values */
+int ipath_sd7220_presets(struct ipath_devdata *dd);
+static int ipath_internal_presets(struct ipath_devdata *dd);
+/* Tweak the register (CMUCTRL5) that contains the TRIMSELF controls */
+static int ipath_sd_trimself(struct ipath_devdata *dd, int val);
+static int epb_access(struct ipath_devdata *dd, int sdnum, int claim);
+
+void ipath_set_relock_poll(struct ipath_devdata *dd, int ibup);
+
+/*
+ * Below keeps track of whether the "once per power-on" initialization has
+ * been done, because uC code Version 1.32.17 or higher allows the uC to
+ * be reset at will, and Automatic Equalization may require it. So the
+ * state of the reset "pin", as reflected in was_reset parameter to
+ * ipath_sd7220_init() is no longer valid. Instead, we check for the
+ * actual uC code having been loaded.
+ */
+static int ipath_ibsd_ucode_loaded(struct ipath_devdata *dd)
+{
+ if (!dd->serdes_first_init_done && (ipath_sd7220_ib_vfy(dd) > 0))
+ dd->serdes_first_init_done = 1;
+ return dd->serdes_first_init_done;
+}
+
+/* repeat #define for local use. "Real" #define is in ipath_iba7220.c */
+#define INFINIPATH_HWE_IB_UC_MEMORYPARITYERR 0x0000004000000000ULL
+#define IB_MPREG5 (EPB_LOC(6, 0, 0xE) | (1L << EPB_IB_UC_CS_SHF))
+#define IB_MPREG6 (EPB_LOC(6, 0, 0xF) | (1U << EPB_IB_UC_CS_SHF))
+#define UC_PAR_CLR_D 8
+#define UC_PAR_CLR_M 0xC
+#define IB_CTRL2(chn) (EPB_LOC(chn, 7, 3) | EPB_IB_QUAD0_CS)
+#define START_EQ1(chan) EPB_LOC(chan, 7, 0x27)
+
+void ipath_sd7220_clr_ibpar(struct ipath_devdata *dd)
+{
+ int ret;
+
+ /* clear, then re-enable parity errs */
+ ret = ipath_sd7220_reg_mod(dd, IB_7220_SERDES, IB_MPREG6,
+ UC_PAR_CLR_D, UC_PAR_CLR_M);
+ if (ret < 0) {
+ ipath_dev_err(dd, "Failed clearing IBSerDes Parity err\n");
+ goto bail;
+ }
+ ret = ipath_sd7220_reg_mod(dd, IB_7220_SERDES, IB_MPREG6, 0,
+ UC_PAR_CLR_M);
+
+ ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch);
+ udelay(4);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear,
+ INFINIPATH_HWE_IB_UC_MEMORYPARITYERR);
+ ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch);
+bail:
+ return;
+}
+
+/*
+ * After a reset or other unusual event, the epb interface may need
+ * to be re-synchronized, between the host and the uC.
+ * returns <0 for failure to resync within IBSD_RESYNC_TRIES (not expected)
+ */
+#define IBSD_RESYNC_TRIES 3
+#define IB_PGUDP(chn) (EPB_LOC((chn), 2, 1) | EPB_IB_QUAD0_CS)
+#define IB_CMUDONE(chn) (EPB_LOC((chn), 7, 0xF) | EPB_IB_QUAD0_CS)
+
+static int ipath_resync_ibepb(struct ipath_devdata *dd)
+{
+ int ret, pat, tries, chn;
+ u32 loc;
+
+ ret = -1;
+ chn = 0;
+ for (tries = 0; tries < (4 * IBSD_RESYNC_TRIES); ++tries) {
+ loc = IB_PGUDP(chn);
+ ret = ipath_sd7220_reg_mod(dd, IB_7220_SERDES, loc, 0, 0);
+ if (ret < 0) {
+ ipath_dev_err(dd, "Failed read in resync\n");
+ continue;
+ }
+ if (ret != 0xF0 && ret != 0x55 && tries == 0)
+ ipath_dev_err(dd, "unexpected pattern in resync\n");
+ pat = ret ^ 0xA5; /* alternate F0 and 55 */
+ ret = ipath_sd7220_reg_mod(dd, IB_7220_SERDES, loc, pat, 0xFF);
+ if (ret < 0) {
+ ipath_dev_err(dd, "Failed write in resync\n");
+ continue;
+ }
+ ret = ipath_sd7220_reg_mod(dd, IB_7220_SERDES, loc, 0, 0);
+ if (ret < 0) {
+ ipath_dev_err(dd, "Failed re-read in resync\n");
+ continue;
+ }
+ if (ret != pat) {
+ ipath_dev_err(dd, "Failed compare1 in resync\n");
+ continue;
+ }
+ loc = IB_CMUDONE(chn);
+ ret = ipath_sd7220_reg_mod(dd, IB_7220_SERDES, loc, 0, 0);
+ if (ret < 0) {
+ ipath_dev_err(dd, "Failed CMUDONE rd in resync\n");
+ continue;
+ }
+ if ((ret & 0x70) != ((chn << 4) | 0x40)) {
+ ipath_dev_err(dd, "Bad CMUDONE value %02X, chn %d\n",
+ ret, chn);
+ continue;
+ }
+ if (++chn == 4)
+ break; /* Success */
+ }
+ ipath_cdbg(VERBOSE, "Resync in %d tries\n", tries);
+ return (ret > 0) ? 0 : ret;
+}
+
+/*
+ * Localize the stuff that should be done to change IB uC reset
+ * returns <0 for errors.
+ */
+static int ipath_ibsd_reset(struct ipath_devdata *dd, int assert_rst)
+{
+ u64 rst_val;
+ int ret = 0;
+ unsigned long flags;
+
+ rst_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibserdesctrl);
+ if (assert_rst) {
+ /*
+ * Vendor recommends "interrupting" uC before reset, to
+ * minimize possible glitches.
+ */
+ spin_lock_irqsave(&dd->ipath_sdepb_lock, flags);
+ epb_access(dd, IB_7220_SERDES, 1);
+ rst_val |= 1ULL;
+ /* Squelch possible parity error from _asserting_ reset */
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
+ dd->ipath_hwerrmask &
+ ~INFINIPATH_HWE_IB_UC_MEMORYPARITYERR);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_ibserdesctrl, rst_val);
+ /* flush write, delay to ensure it took effect */
+ ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch);
+ udelay(2);
+ /* once it's reset, can remove interrupt */
+ epb_access(dd, IB_7220_SERDES, -1);
+ spin_unlock_irqrestore(&dd->ipath_sdepb_lock, flags);
+ } else {
+ /*
+ * Before we de-assert reset, we need to deal with
+ * possible glitch on the Parity-error line.
+ * Suppress it around the reset, both in chip-level
+ * hwerrmask and in IB uC control reg. uC will allow
+ * it again during startup.
+ */
+ u64 val;
+ rst_val &= ~(1ULL);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
+ dd->ipath_hwerrmask &
+ ~INFINIPATH_HWE_IB_UC_MEMORYPARITYERR);
+
+ ret = ipath_resync_ibepb(dd);
+ if (ret < 0)
+ ipath_dev_err(dd, "unable to re-sync IB EPB\n");
+
+ /* set uC control regs to suppress parity errs */
+ ret = ipath_sd7220_reg_mod(dd, IB_7220_SERDES, IB_MPREG5, 1, 1);
+ if (ret < 0)
+ goto bail;
+ /* IB uC code past Version 1.32.17 allow suppression of wdog */
+ ret = ipath_sd7220_reg_mod(dd, IB_7220_SERDES, IB_MPREG6, 0x80,
+ 0x80);
+ if (ret < 0) {
+ ipath_dev_err(dd, "Failed to set WDOG disable\n");
+ goto bail;
+ }
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_ibserdesctrl, rst_val);
+ /* flush write, delay for startup */
+ ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch);
+ udelay(1);
+ /* clear, then re-enable parity errs */
+ ipath_sd7220_clr_ibpar(dd);
+ val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus);
+ if (val & INFINIPATH_HWE_IB_UC_MEMORYPARITYERR) {
+ ipath_dev_err(dd, "IBUC Parity still set after RST\n");
+ dd->ipath_hwerrmask &=
+ ~INFINIPATH_HWE_IB_UC_MEMORYPARITYERR;
+ }
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
+ dd->ipath_hwerrmask);
+ }
+
+bail:
+ return ret;
+}
+
+static void ipath_sd_trimdone_monitor(struct ipath_devdata *dd,
+ const char *where)
+{
+ int ret, chn, baduns;
+ u64 val;
+
+ if (!where)
+ where = "?";
+
+ /* give time for reset to settle out in EPB */
+ udelay(2);
+
+ ret = ipath_resync_ibepb(dd);
+ if (ret < 0)
+ ipath_dev_err(dd, "not able to re-sync IB EPB (%s)\n", where);
+
+ /* Do "sacrificial read" to get EPB in sane state after reset */
+ ret = ipath_sd7220_reg_mod(dd, IB_7220_SERDES, IB_CTRL2(0), 0, 0);
+ if (ret < 0)
+ ipath_dev_err(dd, "Failed TRIMDONE 1st read, (%s)\n", where);
+
+ /* Check/show "summary" Trim-done bit in IBCStatus */
+ val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);
+ if (val & (1ULL << 11))
+ ipath_cdbg(VERBOSE, "IBCS TRIMDONE set (%s)\n", where);
+ else
+ ipath_dev_err(dd, "IBCS TRIMDONE clear (%s)\n", where);
+
+ udelay(2);
+
+ ret = ipath_sd7220_reg_mod(dd, IB_7220_SERDES, IB_MPREG6, 0x80, 0x80);
+ if (ret < 0)
+ ipath_dev_err(dd, "Failed Dummy RMW, (%s)\n", where);
+ udelay(10);
+
+ baduns = 0;
+
+ for (chn = 3; chn >= 0; --chn) {
+ /* Read CTRL reg for each channel to check TRIMDONE */
+ ret = ipath_sd7220_reg_mod(dd, IB_7220_SERDES,
+ IB_CTRL2(chn), 0, 0);
+ if (ret < 0)
+ ipath_dev_err(dd, "Failed checking TRIMDONE, chn %d"
+ " (%s)\n", chn, where);
+
+ if (!(ret & 0x10)) {
+ int probe;
+ baduns |= (1 << chn);
+ ipath_dev_err(dd, "TRIMDONE cleared on chn %d (%02X)."
+ " (%s)\n", chn, ret, where);
+ probe = ipath_sd7220_reg_mod(dd, IB_7220_SERDES,
+ IB_PGUDP(0), 0, 0);
+ ipath_dev_err(dd, "probe is %d (%02X)\n",
+ probe, probe);
+ probe = ipath_sd7220_reg_mod(dd, IB_7220_SERDES,
+ IB_CTRL2(chn), 0, 0);
+ ipath_dev_err(dd, "re-read: %d (%02X)\n",
+ probe, probe);
+ ret = ipath_sd7220_reg_mod(dd, IB_7220_SERDES,
+ IB_CTRL2(chn), 0x10, 0x10);
+ if (ret < 0)
+ ipath_dev_err(dd,
+ "Err on TRIMDONE rewrite1\n");
+ }
+ }
+ for (chn = 3; chn >= 0; --chn) {
+ /* Read CTRL reg for each channel to check TRIMDONE */
+ if (baduns & (1 << chn)) {
+ ipath_dev_err(dd,
+ "Reseting TRIMDONE on chn %d (%s)\n",
+ chn, where);
+ ret = ipath_sd7220_reg_mod(dd, IB_7220_SERDES,
+ IB_CTRL2(chn), 0x10, 0x10);
+ if (ret < 0)
+ ipath_dev_err(dd, "Failed re-setting "
+ "TRIMDONE, chn %d (%s)\n",
+ chn, where);
+ }
+ }
+}
+
+/*
+ * Below is portion of IBA7220-specific bringup_serdes() that actually
+ * deals with registers and memory within the SerDes itself.
+ * Post IB uC code version 1.32.17, was_reset being 1 is not really
+ * informative, so we double-check.
+ */
+int ipath_sd7220_init(struct ipath_devdata *dd, int was_reset)
+{
+ int ret = 1; /* default to failure */
+ int first_reset;
+ int val_stat;
+
+ if (!was_reset) {
+ /* entered with reset not asserted, we need to do it */
+ ipath_ibsd_reset(dd, 1);
+ ipath_sd_trimdone_monitor(dd, "Driver-reload");
+ }
+
+ /* Substitute our deduced value for was_reset */
+ ret = ipath_ibsd_ucode_loaded(dd);
+ if (ret < 0) {
+ ret = 1;
+ goto done;
+ }
+ first_reset = !ret; /* First reset if IBSD uCode not yet loaded */
+
+ /*
+ * Alter some regs per vendor latest doc, reset-defaults
+ * are not right for IB.
+ */
+ ret = ipath_sd_early(dd);
+ if (ret < 0) {
+ ipath_dev_err(dd, "Failed to set IB SERDES early defaults\n");
+ ret = 1;
+ goto done;
+ }
+
+ /*
+ * Set DAC manual trim IB.
+ * We only do this once after chip has been reset (usually
+ * same as once per system boot).
+ */
+ if (first_reset) {
+ ret = ipath_sd_dactrim(dd);
+ if (ret < 0) {
+ ipath_dev_err(dd, "Failed IB SERDES DAC trim\n");
+ ret = 1;
+ goto done;
+ }
+ }
+
+ /*
+ * Set various registers (DDS and RXEQ) that will be
+ * controlled by IBC (in 1.2 mode) to reasonable preset values
+ * Calling the "internal" version avoids the "check for needed"
+ * and "trimdone monitor" that might be counter-productive.
+ */
+ ret = ipath_internal_presets(dd);
+ if (ret < 0) {
+ ipath_dev_err(dd, "Failed to set IB SERDES presets\n");
+ ret = 1;
+ goto done;
+ }
+ ret = ipath_sd_trimself(dd, 0x80);
+ if (ret < 0) {
+ ipath_dev_err(dd, "Failed to set IB SERDES TRIMSELF\n");
+ ret = 1;
+ goto done;
+ }
+
+ /* Load image, then try to verify */
+ ret = 0; /* Assume success */
+ if (first_reset) {
+ int vfy;
+ int trim_done;
+ ipath_dbg("SerDes uC was reset, reloading PRAM\n");
+ ret = ipath_sd7220_ib_load(dd);
+ if (ret < 0) {
+ ipath_dev_err(dd, "Failed to load IB SERDES image\n");
+ ret = 1;
+ goto done;
+ }
+
+ /* Loaded image, try to verify */
+ vfy = ipath_sd7220_ib_vfy(dd);
+ if (vfy != ret) {
+ ipath_dev_err(dd, "SERDES PRAM VFY failed\n");
+ ret = 1;
+ goto done;
+ }
+ /*
+ * Loaded and verified. Almost good...
+ * hold "success" in ret
+ */
+ ret = 0;
+
+ /*
+ * Prev steps all worked, continue bringup
+ * De-assert RESET to uC, only in first reset, to allow
+ * trimming.
+ *
+ * Since our default setup sets START_EQ1 to
+ * PRESET, we need to clear that for this very first run.
+ */
+ ret = ibsd_mod_allchnls(dd, START_EQ1(0), 0, 0x38);
+ if (ret < 0) {
+ ipath_dev_err(dd, "Failed clearing START_EQ1\n");
+ ret = 1;
+ goto done;
+ }
+
+ ipath_ibsd_reset(dd, 0);
+ /*
+ * If this is not the first reset, trimdone should be set
+ * already.
+ */
+ trim_done = ipath_sd_trimdone_poll(dd);
+ /*
+ * Whether or not trimdone succeeded, we need to put the
+ * uC back into reset to avoid a possible fight with the
+ * IBC state-machine.
+ */
+ ipath_ibsd_reset(dd, 1);
+
+ if (!trim_done) {
+ ipath_dev_err(dd, "No TRIMDONE seen\n");
+ ret = 1;
+ goto done;
+ }
+
+ ipath_sd_trimdone_monitor(dd, "First-reset");
+ /* Remember so we do not re-do the load, dactrim, etc. */
+ dd->serdes_first_init_done = 1;
+ }
+ /*
+ * Setup for channel training and load values for
+ * RxEq and DDS in tables used by IBC in IB1.2 mode
+ */
+
+ val_stat = ipath_sd_setvals(dd);
+ if (val_stat < 0)
+ ret = 1;
+done:
+ /* start relock timer regardless, but start at 1 second */
+ ipath_set_relock_poll(dd, -1);
+ return ret;
+}
+
+#define EPB_ACC_REQ 1
+#define EPB_ACC_GNT 0x100
+#define EPB_DATA_MASK 0xFF
+#define EPB_RD (1ULL << 24)
+#define EPB_TRANS_RDY (1ULL << 31)
+#define EPB_TRANS_ERR (1ULL << 30)
+#define EPB_TRANS_TRIES 5
+
+/*
+ * query, claim, release ownership of the EPB (External Parallel Bus)
+ * for a specified SERDES.
+ * the "claim" parameter is >0 to claim, <0 to release, 0 to query.
+ * Returns <0 for errors, >0 if we had ownership, else 0.
+ */
+static int epb_access(struct ipath_devdata *dd, int sdnum, int claim)
+{
+ u16 acc;
+ u64 accval;
+ int owned = 0;
+ u64 oct_sel = 0;
+
+ switch (sdnum) {
+ case IB_7220_SERDES :
+ /*
+ * The IB SERDES "ownership" is fairly simple. A single each
+ * request/grant.
+ */
+ acc = dd->ipath_kregs->kr_ib_epbacc;
+ break;
+ case PCIE_SERDES0 :
+ case PCIE_SERDES1 :
+ /* PCIe SERDES has two "octants", need to select which */
+ acc = dd->ipath_kregs->kr_pcie_epbacc;
+ oct_sel = (2 << (sdnum - PCIE_SERDES0));
+ break;
+ default :
+ return 0;
+ }
+
+ /* Make sure any outstanding transaction was seen */
+ ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch);
+ udelay(15);
+
+ accval = ipath_read_kreg32(dd, acc);
+
+ owned = !!(accval & EPB_ACC_GNT);
+ if (claim < 0) {
+ /* Need to release */
+ u64 pollval;
+ /*
+ * The only writeable bits are the request and CS.
+ * Both should be clear
+ */
+ u64 newval = 0;
+ ipath_write_kreg(dd, acc, newval);
+ /* First read after write is not trustworthy */
+ pollval = ipath_read_kreg32(dd, acc);
+ udelay(5);
+ pollval = ipath_read_kreg32(dd, acc);
+ if (pollval & EPB_ACC_GNT)
+ owned = -1;
+ } else if (claim > 0) {
+ /* Need to claim */
+ u64 pollval;
+ u64 newval = EPB_ACC_REQ | oct_sel;
+ ipath_write_kreg(dd, acc, newval);
+ /* First read after write is not trustworthy */
+ pollval = ipath_read_kreg32(dd, acc);
+ udelay(5);
+ pollval = ipath_read_kreg32(dd, acc);
+ if (!(pollval & EPB_ACC_GNT))
+ owned = -1;
+ }
+ return owned;
+}
+
+/*
+ * Lemma to deal with race condition of write..read to epb regs
+ */
+static int epb_trans(struct ipath_devdata *dd, u16 reg, u64 i_val, u64 *o_vp)
+{
+ int tries;
+ u64 transval;
+
+
+ ipath_write_kreg(dd, reg, i_val);
+ /* Throw away first read, as RDY bit may be stale */
+ transval = ipath_read_kreg64(dd, reg);
+
+ for (tries = EPB_TRANS_TRIES; tries; --tries) {
+ transval = ipath_read_kreg32(dd, reg);
+ if (transval & EPB_TRANS_RDY)
+ break;
+ udelay(5);
+ }
+ if (transval & EPB_TRANS_ERR)
+ return -1;
+ if (tries > 0 && o_vp)
+ *o_vp = transval;
+ return tries;
+}
+
+/**
+ *
+ * ipath_sd7220_reg_mod - modify SERDES register
+ * @dd: the infinipath device
+ * @sdnum: which SERDES to access
+ * @loc: location - channel, element, register, as packed by EPB_LOC() macro.
+ * @wd: Write Data - value to set in register
+ * @mask: ones where data should be spliced into reg.
+ *
+ * Basic register read/modify/write, with un-needed acesses elided. That is,
+ * a mask of zero will prevent write, while a mask of 0xFF will prevent read.
+ * returns current (presumed, if a write was done) contents of selected
+ * register, or <0 if errors.
+ */
+static int ipath_sd7220_reg_mod(struct ipath_devdata *dd, int sdnum, u32 loc,
+ u32 wd, u32 mask)
+{
+ u16 trans;
+ u64 transval;
+ int owned;
+ int tries, ret;
+ unsigned long flags;
+
+ switch (sdnum) {
+ case IB_7220_SERDES :
+ trans = dd->ipath_kregs->kr_ib_epbtrans;
+ break;
+ case PCIE_SERDES0 :
+ case PCIE_SERDES1 :
+ trans = dd->ipath_kregs->kr_pcie_epbtrans;
+ break;
+ default :
+ return -1;
+ }
+
+ /*
+ * All access is locked in software (vs other host threads) and
+ * hardware (vs uC access).
+ */
+ spin_lock_irqsave(&dd->ipath_sdepb_lock, flags);
+
+ owned = epb_access(dd, sdnum, 1);
+ if (owned < 0) {
+ spin_unlock_irqrestore(&dd->ipath_sdepb_lock, flags);
+ return -1;
+ }
+ ret = 0;
+ for (tries = EPB_TRANS_TRIES; tries; --tries) {
+ transval = ipath_read_kreg32(dd, trans);
+ if (transval & EPB_TRANS_RDY)
+ break;
+ udelay(5);
+ }
+
+ if (tries > 0) {
+ tries = 1; /* to make read-skip work */
+ if (mask != 0xFF) {
+ /*
+ * Not a pure write, so need to read.
+ * loc encodes chip-select as well as address
+ */
+ transval = loc | EPB_RD;
+ tries = epb_trans(dd, trans, transval, &transval);
+ }
+ if (tries > 0 && mask != 0) {
+ /*
+ * Not a pure read, so need to write.
+ */
+ wd = (wd & mask) | (transval & ~mask);
+ transval = loc | (wd & EPB_DATA_MASK);
+ tries = epb_trans(dd, trans, transval, &transval);
+ }
+ }
+ /* else, failed to see ready, what error-handling? */
+
+ /*
+ * Release bus. Failure is an error.
+ */
+ if (epb_access(dd, sdnum, -1) < 0)
+ ret = -1;
+ else
+ ret = transval & EPB_DATA_MASK;
+
+ spin_unlock_irqrestore(&dd->ipath_sdepb_lock, flags);
+ if (tries <= 0)
+ ret = -1;
+ return ret;
+}
+
+#define EPB_ROM_R (2)
+#define EPB_ROM_W (1)
+/*
+ * Below, all uC-related, use appropriate UC_CS, depending
+ * on which SerDes is used.
+ */
+#define EPB_UC_CTL EPB_LOC(6, 0, 0)
+#define EPB_MADDRL EPB_LOC(6, 0, 2)
+#define EPB_MADDRH EPB_LOC(6, 0, 3)
+#define EPB_ROMDATA EPB_LOC(6, 0, 4)
+#define EPB_RAMDATA EPB_LOC(6, 0, 5)
+
+/* Transfer date to/from uC Program RAM of IB or PCIe SerDes */
+static int ipath_sd7220_ram_xfer(struct ipath_devdata *dd, int sdnum, u32 loc,
+ u8 *buf, int cnt, int rd_notwr)
+{
+ u16 trans;
+ u64 transval;
+ u64 csbit;
+ int owned;
+ int tries;
+ int sofar;
+ int addr;
+ int ret;
+ unsigned long flags;
+ const char *op;
+
+ /* Pick appropriate transaction reg and "Chip select" for this serdes */
+ switch (sdnum) {
+ case IB_7220_SERDES :
+ csbit = 1ULL << EPB_IB_UC_CS_SHF;
+ trans = dd->ipath_kregs->kr_ib_epbtrans;
+ break;
+ case PCIE_SERDES0 :
+ case PCIE_SERDES1 :
+ /* PCIe SERDES has uC "chip select" in different bit, too */
+ csbit = 1ULL << EPB_PCIE_UC_CS_SHF;
+ trans = dd->ipath_kregs->kr_pcie_epbtrans;
+ break;
+ default :
+ return -1;
+ }
+
+ op = rd_notwr ? "Rd" : "Wr";
+ spin_lock_irqsave(&dd->ipath_sdepb_lock, flags);
+
+ owned = epb_access(dd, sdnum, 1);
+ if (owned < 0) {
+ spin_unlock_irqrestore(&dd->ipath_sdepb_lock, flags);
+ ipath_dbg("Could not get %s access to %s EPB: %X, loc %X\n",
+ op, (sdnum == IB_7220_SERDES) ? "IB" : "PCIe",
+ owned, loc);
+ return -1;
+ }
+
+ /*
+ * In future code, we may need to distinguish several address ranges,
+ * and select various memories based on this. For now, just trim
+ * "loc" (location including address and memory select) to
+ * "addr" (address within memory). we will only support PRAM
+ * The memory is 8KB.
+ */
+ addr = loc & 0x1FFF;
+ for (tries = EPB_TRANS_TRIES; tries; --tries) {
+ transval = ipath_read_kreg32(dd, trans);
+ if (transval & EPB_TRANS_RDY)
+ break;
+ udelay(5);
+ }
+
+ sofar = 0;
+ if (tries <= 0)
+ ipath_dbg("No initial RDY on EPB access request\n");
+ else {
+ /*
+ * Every "memory" access is doubly-indirect.
+ * We set two bytes of address, then read/write
+ * one or mores bytes of data.
+ */
+
+ /* First, we set control to "Read" or "Write" */
+ transval = csbit | EPB_UC_CTL |
+ (rd_notwr ? EPB_ROM_R : EPB_ROM_W);
+ tries = epb_trans(dd, trans, transval, &transval);
+ if (tries <= 0)
+ ipath_dbg("No EPB response to uC %s cmd\n", op);
+ while (tries > 0 && sofar < cnt) {
+ if (!sofar) {
+ /* Only set address at start of chunk */
+ int addrbyte = (addr + sofar) >> 8;
+ transval = csbit | EPB_MADDRH | addrbyte;
+ tries = epb_trans(dd, trans, transval,
+ &transval);
+ if (tries <= 0) {
+ ipath_dbg("No EPB response ADDRH\n");
+ break;
+ }
+ addrbyte = (addr + sofar) & 0xFF;
+ transval = csbit | EPB_MADDRL | addrbyte;
+ tries = epb_trans(dd, trans, transval,
+ &transval);
+ if (tries <= 0) {
+ ipath_dbg("No EPB response ADDRL\n");
+ break;
+ }
+ }
+
+ if (rd_notwr)
+ transval = csbit | EPB_ROMDATA | EPB_RD;
+ else
+ transval = csbit | EPB_ROMDATA | buf[sofar];
+ tries = epb_trans(dd, trans, transval, &transval);
+ if (tries <= 0) {
+ ipath_dbg("No EPB response DATA\n");
+ break;
+ }
+ if (rd_notwr)
+ buf[sofar] = transval & EPB_DATA_MASK;
+ ++sofar;
+ }
+ /* Finally, clear control-bit for Read or Write */
+ transval = csbit | EPB_UC_CTL;
+ tries = epb_trans(dd, trans, transval, &transval);
+ if (tries <= 0)
+ ipath_dbg("No EPB response to drop of uC %s cmd\n", op);
+ }
+
+ ret = sofar;
+ /* Release bus. Failure is an error */
+ if (epb_access(dd, sdnum, -1) < 0)
+ ret = -1;
+
+ spin_unlock_irqrestore(&dd->ipath_sdepb_lock, flags);
+ if (tries <= 0) {
+ ipath_dbg("SERDES PRAM %s failed after %d bytes\n", op, sofar);
+ ret = -1;
+ }
+ return ret;
+}
+
+#define PROG_CHUNK 64
+
+int ipath_sd7220_prog_ld(struct ipath_devdata *dd, int sdnum,
+ u8 *img, int len, int offset)
+{
+ int cnt, sofar, req;
+
+ sofar = 0;
+ while (sofar < len) {
+ req = len - sofar;
+ if (req > PROG_CHUNK)
+ req = PROG_CHUNK;
+ cnt = ipath_sd7220_ram_xfer(dd, sdnum, offset + sofar,
+ img + sofar, req, 0);
+ if (cnt < req) {
+ sofar = -1;
+ break;
+ }
+ sofar += req;
+ }
+ return sofar;
+}
+
+#define VFY_CHUNK 64
+#define SD_PRAM_ERROR_LIMIT 42
+
+int ipath_sd7220_prog_vfy(struct ipath_devdata *dd, int sdnum,
+ const u8 *img, int len, int offset)
+{
+ int cnt, sofar, req, idx, errors;
+ unsigned char readback[VFY_CHUNK];
+
+ errors = 0;
+ sofar = 0;
+ while (sofar < len) {
+ req = len - sofar;
+ if (req > VFY_CHUNK)
+ req = VFY_CHUNK;
+ cnt = ipath_sd7220_ram_xfer(dd, sdnum, sofar + offset,
+ readback, req, 1);
+ if (cnt < req) {
+ /* failed in read itself */
+ sofar = -1;
+ break;
+ }
+ for (idx = 0; idx < cnt; ++idx) {
+ if (readback[idx] != img[idx+sofar])
+ ++errors;
+ }
+ sofar += cnt;
+ }
+ return errors ? -errors : sofar;
+}
+
+/* IRQ not set up at this point in init, so we poll. */
+#define IB_SERDES_TRIM_DONE (1ULL << 11)
+#define TRIM_TMO (30)
+
+static int ipath_sd_trimdone_poll(struct ipath_devdata *dd)
+{
+ int trim_tmo, ret;
+ uint64_t val;
+
+ /*
+ * Default to failure, so IBC will not start
+ * without IB_SERDES_TRIM_DONE.
+ */
+ ret = 0;
+ for (trim_tmo = 0; trim_tmo < TRIM_TMO; ++trim_tmo) {
+ val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);
+ if (val & IB_SERDES_TRIM_DONE) {
+ ipath_cdbg(VERBOSE, "TRIMDONE after %d\n", trim_tmo);
+ ret = 1;
+ break;
+ }
+ msleep(10);
+ }
+ if (trim_tmo >= TRIM_TMO) {
+ ipath_dev_err(dd, "No TRIMDONE in %d tries\n", trim_tmo);
+ ret = 0;
+ }
+ return ret;
+}
+
+#define TX_FAST_ELT (9)
+
+/*
+ * Set the "negotiation" values for SERDES. These are used by the IB1.2
+ * link negotiation. Macros below are attempt to keep the values a
+ * little more human-editable.
+ * First, values related to Drive De-emphasis Settings.
+ */
+
+#define NUM_DDS_REGS 6
+#define DDS_REG_MAP 0x76A910 /* LSB-first list of regs (in elt 9) to mod */
+
+#define DDS_VAL(amp_d, main_d, ipst_d, ipre_d, amp_s, main_s, ipst_s, ipre_s) \
+ { { ((amp_d & 0x1F) << 1) | 1, ((amp_s & 0x1F) << 1) | 1, \
+ (main_d << 3) | 4 | (ipre_d >> 2), \
+ (main_s << 3) | 4 | (ipre_s >> 2), \
+ ((ipst_d & 0xF) << 1) | ((ipre_d & 3) << 6) | 0x21, \
+ ((ipst_s & 0xF) << 1) | ((ipre_s & 3) << 6) | 0x21 } }
+
+static struct dds_init {
+ uint8_t reg_vals[NUM_DDS_REGS];
+} dds_init_vals[] = {
+ /* DDR(FDR) SDR(HDR) */
+ /* Vendor recommends below for 3m cable */
+#define DDS_3M 0
+ DDS_VAL(31, 19, 12, 0, 29, 22, 9, 0),
+ DDS_VAL(31, 12, 15, 4, 31, 15, 15, 1),
+ DDS_VAL(31, 13, 15, 3, 31, 16, 15, 0),
+ DDS_VAL(31, 14, 15, 2, 31, 17, 14, 0),
+ DDS_VAL(31, 15, 15, 1, 31, 18, 13, 0),
+ DDS_VAL(31, 16, 15, 0, 31, 19, 12, 0),
+ DDS_VAL(31, 17, 14, 0, 31, 20, 11, 0),
+ DDS_VAL(31, 18, 13, 0, 30, 21, 10, 0),
+ DDS_VAL(31, 20, 11, 0, 28, 23, 8, 0),
+ DDS_VAL(31, 21, 10, 0, 27, 24, 7, 0),
+ DDS_VAL(31, 22, 9, 0, 26, 25, 6, 0),
+ DDS_VAL(30, 23, 8, 0, 25, 26, 5, 0),
+ DDS_VAL(29, 24, 7, 0, 23, 27, 4, 0),
+ /* Vendor recommends below for 1m cable */
+#define DDS_1M 13
+ DDS_VAL(28, 25, 6, 0, 21, 28, 3, 0),
+ DDS_VAL(27, 26, 5, 0, 19, 29, 2, 0),
+ DDS_VAL(25, 27, 4, 0, 17, 30, 1, 0)
+};
+
+/*
+ * Next, values related to Receive Equalization.
+ * In comments, FDR (Full) is IB DDR, HDR (Half) is IB SDR
+ */
+/* Hardware packs an element number and register address thus: */
+#define RXEQ_INIT_RDESC(elt, addr) (((elt) & 0xF) | ((addr) << 4))
+#define RXEQ_VAL(elt, adr, val0, val1, val2, val3) \
+ {RXEQ_INIT_RDESC((elt), (adr)), {(val0), (val1), (val2), (val3)} }
+
+#define RXEQ_VAL_ALL(elt, adr, val) \
+ {RXEQ_INIT_RDESC((elt), (adr)), {(val), (val), (val), (val)} }
+
+#define RXEQ_SDR_DFELTH 0
+#define RXEQ_SDR_TLTH 0
+#define RXEQ_SDR_G1CNT_Z1CNT 0x11
+#define RXEQ_SDR_ZCNT 23
+
+static struct rxeq_init {
+ u16 rdesc; /* in form used in SerDesDDSRXEQ */
+ u8 rdata[4];
+} rxeq_init_vals[] = {
+ /* Set Rcv Eq. to Preset node */
+ RXEQ_VAL_ALL(7, 0x27, 0x10),
+ /* Set DFELTHFDR/HDR thresholds */
+ RXEQ_VAL(7, 8, 0, 0, 0, 0), /* FDR */
+ RXEQ_VAL(7, 0x21, 0, 0, 0, 0), /* HDR */
+ /* Set TLTHFDR/HDR theshold */
+ RXEQ_VAL(7, 9, 2, 2, 2, 2), /* FDR */
+ RXEQ_VAL(7, 0x23, 2, 2, 2, 2), /* HDR */
+ /* Set Preamp setting 2 (ZFR/ZCNT) */
+ RXEQ_VAL(7, 0x1B, 12, 12, 12, 12), /* FDR */
+ RXEQ_VAL(7, 0x1C, 12, 12, 12, 12), /* HDR */
+ /* Set Preamp DC gain and Setting 1 (GFR/GHR) */
+ RXEQ_VAL(7, 0x1E, 0x10, 0x10, 0x10, 0x10), /* FDR */
+ RXEQ_VAL(7, 0x1F, 0x10, 0x10, 0x10, 0x10), /* HDR */
+ /* Toggle RELOCK (in VCDL_CTRL0) to lock to data */
+ RXEQ_VAL_ALL(6, 6, 0x20), /* Set D5 High */
+ RXEQ_VAL_ALL(6, 6, 0), /* Set D5 Low */
+};
+
+/* There are 17 values from vendor, but IBC only accesses the first 16 */
+#define DDS_ROWS (16)
+#define RXEQ_ROWS ARRAY_SIZE(rxeq_init_vals)
+
+static int ipath_sd_setvals(struct ipath_devdata *dd)
+{
+ int idx, midx;
+ int min_idx; /* Minimum index for this portion of table */
+ uint32_t dds_reg_map;
+ u64 __iomem *taddr, *iaddr;
+ uint64_t data;
+ uint64_t sdctl;
+
+ taddr = dd->ipath_kregbase + KR_IBSerDesMappTable;
+ iaddr = dd->ipath_kregbase + dd->ipath_kregs->kr_ib_ddsrxeq;
+
+ /*
+ * Init the DDS section of the table.
+ * Each "row" of the table provokes NUM_DDS_REG writes, to the
+ * registers indicated in DDS_REG_MAP.
+ */
+ sdctl = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibserdesctrl);
+ sdctl = (sdctl & ~(0x1f << 8)) | (NUM_DDS_REGS << 8);
+ sdctl = (sdctl & ~(0x1f << 13)) | (RXEQ_ROWS << 13);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_ibserdesctrl, sdctl);
+
+ /*
+ * Iterate down table within loop for each register to store.
+ */
+ dds_reg_map = DDS_REG_MAP;
+ for (idx = 0; idx < NUM_DDS_REGS; ++idx) {
+ data = ((dds_reg_map & 0xF) << 4) | TX_FAST_ELT;
+ writeq(data, iaddr + idx);
+ mmiowb();
+ ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch);
+ dds_reg_map >>= 4;
+ for (midx = 0; midx < DDS_ROWS; ++midx) {
+ u64 __iomem *daddr = taddr + ((midx << 4) + idx);
+ data = dds_init_vals[midx].reg_vals[idx];
+ writeq(data, daddr);
+ mmiowb();
+ ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch);
+ } /* End inner for (vals for this reg, each row) */
+ } /* end outer for (regs to be stored) */
+
+ /*
+ * Init the RXEQ section of the table. As explained above the table
+ * rxeq_init_vals[], this runs in a different order, as the pattern
+ * of register references is more complex, but there are only
+ * four "data" values per register.
+ */
+ min_idx = idx; /* RXEQ indices pick up where DDS left off */
+ taddr += 0x100; /* RXEQ data is in second half of table */
+ /* Iterate through RXEQ register addresses */
+ for (idx = 0; idx < RXEQ_ROWS; ++idx) {
+ int didx; /* "destination" */
+ int vidx;
+
+ /* didx is offset by min_idx to address RXEQ range of regs */
+ didx = idx + min_idx;
+ /* Store the next RXEQ register address */
+ writeq(rxeq_init_vals[idx].rdesc, iaddr + didx);
+ mmiowb();
+ ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch);
+ /* Iterate through RXEQ values */
+ for (vidx = 0; vidx < 4; vidx++) {
+ data = rxeq_init_vals[idx].rdata[vidx];
+ writeq(data, taddr + (vidx << 6) + idx);
+ mmiowb();
+ ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch);
+ }
+ } /* end outer for (Reg-writes for RXEQ) */
+ return 0;
+}
+
+#define CMUCTRL5 EPB_LOC(7, 0, 0x15)
+#define RXHSCTRL0(chan) EPB_LOC(chan, 6, 0)
+#define VCDL_DAC2(chan) EPB_LOC(chan, 6, 5)
+#define VCDL_CTRL0(chan) EPB_LOC(chan, 6, 6)
+#define VCDL_CTRL2(chan) EPB_LOC(chan, 6, 8)
+#define START_EQ2(chan) EPB_LOC(chan, 7, 0x28)
+
+static int ibsd_sto_noisy(struct ipath_devdata *dd, int loc, int val, int mask)
+{
+ int ret = -1;
+ int sloc; /* shifted loc, for messages */
+
+ loc |= (1U << EPB_IB_QUAD0_CS_SHF);
+ sloc = loc >> EPB_ADDR_SHF;
+
+ ret = ipath_sd7220_reg_mod(dd, IB_7220_SERDES, loc, val, mask);
+ if (ret < 0)
+ ipath_dev_err(dd, "Write failed: elt %d,"
+ " addr 0x%X, chnl %d, val 0x%02X, mask 0x%02X\n",
+ (sloc & 0xF), (sloc >> 9) & 0x3f, (sloc >> 4) & 7,
+ val & 0xFF, mask & 0xFF);
+ return ret;
+}
+
+/*
+ * Repeat a "store" across all channels of the IB SerDes.
+ * Although nominally it inherits the "read value" of the last
+ * channel it modified, the only really useful return is <0 for
+ * failure, >= 0 for success. The parameter 'loc' is assumed to
+ * be the location for the channel-0 copy of the register to
+ * be modified.
+ */
+static int ibsd_mod_allchnls(struct ipath_devdata *dd, int loc, int val,
+ int mask)
+{
+ int ret = -1;
+ int chnl;
+
+ if (loc & EPB_GLOBAL_WR) {
+ /*
+ * Our caller has assured us that we can set all four
+ * channels at once. Trust that. If mask is not 0xFF,
+ * we will read the _specified_ channel for our starting
+ * value.
+ */
+ loc |= (1U << EPB_IB_QUAD0_CS_SHF);
+ chnl = (loc >> (4 + EPB_ADDR_SHF)) & 7;
+ if (mask != 0xFF) {
+ ret = ipath_sd7220_reg_mod(dd, IB_7220_SERDES,
+ loc & ~EPB_GLOBAL_WR, 0, 0);
+ if (ret < 0) {
+ int sloc = loc >> EPB_ADDR_SHF;
+ ipath_dev_err(dd, "pre-read failed: elt %d,"
+ " addr 0x%X, chnl %d\n", (sloc & 0xF),
+ (sloc >> 9) & 0x3f, chnl);
+ return ret;
+ }
+ val = (ret & ~mask) | (val & mask);
+ }
+ loc &= ~(7 << (4+EPB_ADDR_SHF));
+ ret = ipath_sd7220_reg_mod(dd, IB_7220_SERDES, loc, val, 0xFF);
+ if (ret < 0) {
+ int sloc = loc >> EPB_ADDR_SHF;
+ ipath_dev_err(dd, "Global WR failed: elt %d,"
+ " addr 0x%X, val %02X\n",
+ (sloc & 0xF), (sloc >> 9) & 0x3f, val);
+ }
+ return ret;
+ }
+ /* Clear "channel" and set CS so we can simply iterate */
+ loc &= ~(7 << (4+EPB_ADDR_SHF));
+ loc |= (1U << EPB_IB_QUAD0_CS_SHF);
+ for (chnl = 0; chnl < 4; ++chnl) {
+ int cloc;
+ cloc = loc | (chnl << (4+EPB_ADDR_SHF));
+ ret = ipath_sd7220_reg_mod(dd, IB_7220_SERDES, cloc, val, mask);
+ if (ret < 0) {
+ int sloc = loc >> EPB_ADDR_SHF;
+ ipath_dev_err(dd, "Write failed: elt %d,"
+ " addr 0x%X, chnl %d, val 0x%02X,"
+ " mask 0x%02X\n",
+ (sloc & 0xF), (sloc >> 9) & 0x3f, chnl,
+ val & 0xFF, mask & 0xFF);
+ break;
+ }
+ }
+ return ret;
+}
+
+/*
+ * Set the Tx values normally modified by IBC in IB1.2 mode to default
+ * values, as gotten from first row of init table.
+ */
+static int set_dds_vals(struct ipath_devdata *dd, struct dds_init *ddi)
+{
+ int ret;
+ int idx, reg, data;
+ uint32_t regmap;
+
+ regmap = DDS_REG_MAP;
+ for (idx = 0; idx < NUM_DDS_REGS; ++idx) {
+ reg = (regmap & 0xF);
+ regmap >>= 4;
+ data = ddi->reg_vals[idx];
+ /* Vendor says RMW not needed for these regs, use 0xFF mask */
+ ret = ibsd_mod_allchnls(dd, EPB_LOC(0, 9, reg), data, 0xFF);
+ if (ret < 0)
+ break;
+ }
+ return ret;
+}
+
+/*
+ * Set the Rx values normally modified by IBC in IB1.2 mode to default
+ * values, as gotten from selected column of init table.
+ */
+static int set_rxeq_vals(struct ipath_devdata *dd, int vsel)
+{
+ int ret;
+ int ridx;
+ int cnt = ARRAY_SIZE(rxeq_init_vals);
+
+ for (ridx = 0; ridx < cnt; ++ridx) {
+ int elt, reg, val, loc;
+ elt = rxeq_init_vals[ridx].rdesc & 0xF;
+ reg = rxeq_init_vals[ridx].rdesc >> 4;
+ loc = EPB_LOC(0, elt, reg);
+ val = rxeq_init_vals[ridx].rdata[vsel];
+ /* mask of 0xFF, because hardware does full-byte store. */
+ ret = ibsd_mod_allchnls(dd, loc, val, 0xFF);
+ if (ret < 0)
+ break;
+ }
+ return ret;
+}
+
+/*
+ * Set the default values (row 0) for DDR Driver Demphasis.
+ * we do this initially and whenever we turn off IB-1.2
+ * The "default" values for Rx equalization are also stored to
+ * SerDes registers. Formerly (and still default), we used set 2.
+ * For experimenting with cables and link-partners, we allow changing
+ * that via a module parameter.
+ */
+static unsigned ipath_rxeq_set = 2;
+module_param_named(rxeq_default_set, ipath_rxeq_set, uint,
+ S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(rxeq_default_set,
+ "Which set [0..3] of Rx Equalization values is default");
+
+static int ipath_internal_presets(struct ipath_devdata *dd)
+{
+ int ret = 0;
+
+ ret = set_dds_vals(dd, dds_init_vals + DDS_3M);
+
+ if (ret < 0)
+ ipath_dev_err(dd, "Failed to set default DDS values\n");
+ ret = set_rxeq_vals(dd, ipath_rxeq_set & 3);
+ if (ret < 0)
+ ipath_dev_err(dd, "Failed to set default RXEQ values\n");
+ return ret;
+}
+
+int ipath_sd7220_presets(struct ipath_devdata *dd)
+{
+ int ret = 0;
+
+ if (!dd->ipath_presets_needed)
+ return ret;
+ dd->ipath_presets_needed = 0;
+ /* Assert uC reset, so we don't clash with it. */
+ ipath_ibsd_reset(dd, 1);
+ udelay(2);
+ ipath_sd_trimdone_monitor(dd, "link-down");
+
+ ret = ipath_internal_presets(dd);
+return ret;
+}
+
+static int ipath_sd_trimself(struct ipath_devdata *dd, int val)
+{
+ return ibsd_sto_noisy(dd, CMUCTRL5, val, 0xFF);
+}
+
+static int ipath_sd_early(struct ipath_devdata *dd)
+{
+ int ret = -1; /* Default failed */
+ int chnl;
+
+ for (chnl = 0; chnl < 4; ++chnl) {
+ ret = ibsd_sto_noisy(dd, RXHSCTRL0(chnl), 0xD4, 0xFF);
+ if (ret < 0)
+ goto bail;
+ }
+ for (chnl = 0; chnl < 4; ++chnl) {
+ ret = ibsd_sto_noisy(dd, VCDL_DAC2(chnl), 0x2D, 0xFF);
+ if (ret < 0)
+ goto bail;
+ }
+ /* more fine-tuning of what will be default */
+ for (chnl = 0; chnl < 4; ++chnl) {
+ ret = ibsd_sto_noisy(dd, VCDL_CTRL2(chnl), 3, 0xF);
+ if (ret < 0)
+ goto bail;
+ }
+ for (chnl = 0; chnl < 4; ++chnl) {
+ ret = ibsd_sto_noisy(dd, START_EQ1(chnl), 0x10, 0xFF);
+ if (ret < 0)
+ goto bail;
+ }
+ for (chnl = 0; chnl < 4; ++chnl) {
+ ret = ibsd_sto_noisy(dd, START_EQ2(chnl), 0x30, 0xFF);
+ if (ret < 0)
+ goto bail;
+ }
+bail:
+ return ret;
+}
+
+#define BACTRL(chnl) EPB_LOC(chnl, 6, 0x0E)
+#define LDOUTCTRL1(chnl) EPB_LOC(chnl, 7, 6)
+#define RXHSSTATUS(chnl) EPB_LOC(chnl, 6, 0xF)
+
+static int ipath_sd_dactrim(struct ipath_devdata *dd)
+{
+ int ret = -1; /* Default failed */
+ int chnl;
+
+ for (chnl = 0; chnl < 4; ++chnl) {
+ ret = ibsd_sto_noisy(dd, BACTRL(chnl), 0x40, 0xFF);
+ if (ret < 0)
+ goto bail;
+ }
+ for (chnl = 0; chnl < 4; ++chnl) {
+ ret = ibsd_sto_noisy(dd, LDOUTCTRL1(chnl), 0x04, 0xFF);
+ if (ret < 0)
+ goto bail;
+ }
+ for (chnl = 0; chnl < 4; ++chnl) {
+ ret = ibsd_sto_noisy(dd, RXHSSTATUS(chnl), 0x04, 0xFF);
+ if (ret < 0)
+ goto bail;
+ }
+ /*
+ * delay for max possible number of steps, with slop.
+ * Each step is about 4usec.
+ */
+ udelay(415);
+ for (chnl = 0; chnl < 4; ++chnl) {
+ ret = ibsd_sto_noisy(dd, LDOUTCTRL1(chnl), 0x00, 0xFF);
+ if (ret < 0)
+ goto bail;
+ }
+bail:
+ return ret;
+}
+
+#define RELOCK_FIRST_MS 3
+#define RXLSPPM(chan) EPB_LOC(chan, 0, 2)
+void ipath_toggle_rclkrls(struct ipath_devdata *dd)
+{
+ int loc = RXLSPPM(0) | EPB_GLOBAL_WR;
+ int ret;
+
+ ret = ibsd_mod_allchnls(dd, loc, 0, 0x80);
+ if (ret < 0)
+ ipath_dev_err(dd, "RCLKRLS failed to clear D7\n");
+ else {
+ udelay(1);
+ ibsd_mod_allchnls(dd, loc, 0x80, 0x80);
+ }
+ /* And again for good measure */
+ udelay(1);
+ ret = ibsd_mod_allchnls(dd, loc, 0, 0x80);
+ if (ret < 0)
+ ipath_dev_err(dd, "RCLKRLS failed to clear D7\n");
+ else {
+ udelay(1);
+ ibsd_mod_allchnls(dd, loc, 0x80, 0x80);
+ }
+ /* Now reset xgxs and IBC to complete the recovery */
+ dd->ipath_f_xgxs_reset(dd);
+}
+
+/*
+ * Shut down the timer that polls for relock occasions, if needed
+ * this is "hooked" from ipath_7220_quiet_serdes(), which is called
+ * just before ipath_shutdown_device() in ipath_driver.c shuts down all
+ * the other timers
+ */
+void ipath_shutdown_relock_poll(struct ipath_devdata *dd)
+{
+ struct ipath_relock *irp = &dd->ipath_relock_singleton;
+ if (atomic_read(&irp->ipath_relock_timer_active)) {
+ del_timer_sync(&irp->ipath_relock_timer);
+ atomic_set(&irp->ipath_relock_timer_active, 0);
+ }
+}
+
+static unsigned ipath_relock_by_timer = 1;
+module_param_named(relock_by_timer, ipath_relock_by_timer, uint,
+ S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(relock_by_timer, "Allow relock attempt if link not up");
+
+static void ipath_run_relock(unsigned long opaque)
+{
+ struct ipath_devdata *dd = (struct ipath_devdata *)opaque;
+ struct ipath_relock *irp = &dd->ipath_relock_singleton;
+ u64 val, ltstate;
+
+ if (!(dd->ipath_flags & IPATH_INITTED)) {
+ /* Not yet up, just reenable the timer for later */
+ irp->ipath_relock_interval = HZ;
+ mod_timer(&irp->ipath_relock_timer, jiffies + HZ);
+ return;
+ }
+
+ /*
+ * Check link-training state for "stuck" state.
+ * if found, try relock and schedule another try at
+ * exponentially growing delay, maxed at one second.
+ * if not stuck, our work is done.
+ */
+ val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);
+ ltstate = ipath_ib_linktrstate(dd, val);
+
+ if (ltstate <= INFINIPATH_IBCS_LT_STATE_CFGWAITRMT
+ && ltstate != INFINIPATH_IBCS_LT_STATE_LINKUP) {
+ int timeoff;
+ /* Not up yet. Try again, if allowed by module-param */
+ if (ipath_relock_by_timer) {
+ if (dd->ipath_flags & IPATH_IB_AUTONEG_INPROG)
+ ipath_cdbg(VERBOSE, "Skip RELOCK in AUTONEG\n");
+ else if (!(dd->ipath_flags & IPATH_IB_LINK_DISABLED)) {
+ ipath_cdbg(VERBOSE, "RELOCK\n");
+ ipath_toggle_rclkrls(dd);
+ }
+ }
+ /* re-set timer for next check */
+ timeoff = irp->ipath_relock_interval << 1;
+ if (timeoff > HZ)
+ timeoff = HZ;
+ irp->ipath_relock_interval = timeoff;
+
+ mod_timer(&irp->ipath_relock_timer, jiffies + timeoff);
+ } else {
+ /* Up, so no more need to check so often */
+ mod_timer(&irp->ipath_relock_timer, jiffies + HZ);
+ }
+}
+
+void ipath_set_relock_poll(struct ipath_devdata *dd, int ibup)
+{
+ struct ipath_relock *irp = &dd->ipath_relock_singleton;
+
+ if (ibup > 0) {
+ /* we are now up, so relax timer to 1 second interval */
+ if (atomic_read(&irp->ipath_relock_timer_active))
+ mod_timer(&irp->ipath_relock_timer, jiffies + HZ);
+ } else {
+ /* Transition to down, (re-)set timer to short interval. */
+ int timeout;
+ timeout = (HZ * ((ibup == -1) ? 1000 : RELOCK_FIRST_MS))/1000;
+ if (timeout == 0)
+ timeout = 1;
+ /* If timer has not yet been started, do so. */
+ if (atomic_inc_return(&irp->ipath_relock_timer_active) == 1) {
+ init_timer(&irp->ipath_relock_timer);
+ irp->ipath_relock_timer.function = ipath_run_relock;
+ irp->ipath_relock_timer.data = (unsigned long) dd;
+ irp->ipath_relock_interval = timeout;
+ irp->ipath_relock_timer.expires = jiffies + timeout;
+ add_timer(&irp->ipath_relock_timer);
+ } else {
+ irp->ipath_relock_interval = timeout;
+ mod_timer(&irp->ipath_relock_timer, jiffies + timeout);
+ atomic_dec(&irp->ipath_relock_timer_active);
+ }
+ }
+}
+
diff --git a/drivers/infiniband/hw/ipath/ipath_sd7220_img.c b/drivers/infiniband/hw/ipath/ipath_sd7220_img.c
new file mode 100644
index 0000000..5ef59da
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_sd7220_img.c
@@ -0,0 +1,1082 @@
+/*
+ * Copyright (c) 2007, 2008 QLogic Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/*
+ * This file contains the memory image from the vendor, to be copied into
+ * the IB SERDES of the IBA7220 during initialization.
+ * The file also includes the two functions which use this image.
+ */
+#include <linux/pci.h>
+#include <linux/delay.h>
+
+#include "ipath_kernel.h"
+#include "ipath_registers.h"
+#include "ipath_7220.h"
+
+static unsigned char ipath_sd7220_ib_img[] = {
+/*0000*/0x02, 0x0A, 0x29, 0x02, 0x0A, 0x87, 0xE5, 0xE6,
+ 0x30, 0xE6, 0x04, 0x7F, 0x01, 0x80, 0x02, 0x7F,
+/*0010*/0x00, 0xE5, 0xE2, 0x30, 0xE4, 0x04, 0x7E, 0x01,
+ 0x80, 0x02, 0x7E, 0x00, 0xEE, 0x5F, 0x60, 0x08,
+/*0020*/0x53, 0xF9, 0xF7, 0xE4, 0xF5, 0xFE, 0x80, 0x08,
+ 0x7F, 0x0A, 0x12, 0x17, 0x31, 0x12, 0x0E, 0xA2,
+/*0030*/0x75, 0xFC, 0x08, 0xE4, 0xF5, 0xFD, 0xE5, 0xE7,
+ 0x20, 0xE7, 0x03, 0x43, 0xF9, 0x08, 0x22, 0x00,
+/*0040*/0x01, 0x20, 0x11, 0x00, 0x04, 0x20, 0x00, 0x75,
+ 0x51, 0x01, 0xE4, 0xF5, 0x52, 0xF5, 0x53, 0xF5,
+/*0050*/0x52, 0xF5, 0x7E, 0x7F, 0x04, 0x02, 0x04, 0x38,
+ 0xC2, 0x36, 0x05, 0x52, 0xE5, 0x52, 0xD3, 0x94,
+/*0060*/0x0C, 0x40, 0x05, 0x75, 0x52, 0x01, 0xD2, 0x36,
+ 0x90, 0x07, 0x0C, 0x74, 0x07, 0xF0, 0xA3, 0x74,
+/*0070*/0xFF, 0xF0, 0xE4, 0xF5, 0x0C, 0xA3, 0xF0, 0x90,
+ 0x07, 0x14, 0xF0, 0xA3, 0xF0, 0x75, 0x0B, 0x20,
+/*0080*/0xF5, 0x09, 0xE4, 0xF5, 0x08, 0xE5, 0x08, 0xD3,
+ 0x94, 0x30, 0x40, 0x03, 0x02, 0x04, 0x04, 0x12,
+/*0090*/0x00, 0x06, 0x15, 0x0B, 0xE5, 0x08, 0x70, 0x04,
+ 0x7F, 0x01, 0x80, 0x02, 0x7F, 0x00, 0xE5, 0x09,
+/*00A0*/0x70, 0x04, 0x7E, 0x01, 0x80, 0x02, 0x7E, 0x00,
+ 0xEE, 0x5F, 0x60, 0x05, 0x12, 0x18, 0x71, 0xD2,
+/*00B0*/0x35, 0x53, 0xE1, 0xF7, 0xE5, 0x08, 0x45, 0x09,
+ 0xFF, 0xE5, 0x0B, 0x25, 0xE0, 0x25, 0xE0, 0x24,
+/*00C0*/0x83, 0xF5, 0x82, 0xE4, 0x34, 0x07, 0xF5, 0x83,
+ 0xEF, 0xF0, 0x85, 0xE2, 0x20, 0xE5, 0x52, 0xD3,
+/*00D0*/0x94, 0x01, 0x40, 0x0D, 0x12, 0x19, 0xF3, 0xE0,
+ 0x54, 0xA0, 0x64, 0x40, 0x70, 0x03, 0x02, 0x03,
+/*00E0*/0xFB, 0x53, 0xF9, 0xF8, 0x90, 0x94, 0x70, 0xE4,
+ 0xF0, 0xE0, 0xF5, 0x10, 0xAF, 0x09, 0x12, 0x1E,
+/*00F0*/0xB3, 0xAF, 0x08, 0xEF, 0x44, 0x08, 0xF5, 0x82,
+ 0x75, 0x83, 0x80, 0xE0, 0xF5, 0x29, 0xEF, 0x44,
+/*0100*/0x07, 0x12, 0x1A, 0x3C, 0xF5, 0x22, 0x54, 0x40,
+ 0xD3, 0x94, 0x00, 0x40, 0x1E, 0xE5, 0x29, 0x54,
+/*0110*/0xF0, 0x70, 0x21, 0x12, 0x19, 0xF3, 0xE0, 0x44,
+ 0x80, 0xF0, 0xE5, 0x22, 0x54, 0x30, 0x65, 0x08,
+/*0120*/0x70, 0x09, 0x12, 0x19, 0xF3, 0xE0, 0x54, 0xBF,
+ 0xF0, 0x80, 0x09, 0x12, 0x19, 0xF3, 0x74, 0x40,
+/*0130*/0xF0, 0x02, 0x03, 0xFB, 0x12, 0x1A, 0x12, 0x75,
+ 0x83, 0xAE, 0x74, 0xFF, 0xF0, 0xAF, 0x08, 0x7E,
+/*0140*/0x00, 0xEF, 0x44, 0x07, 0xF5, 0x82, 0xE0, 0xFD,
+ 0xE5, 0x0B, 0x25, 0xE0, 0x25, 0xE0, 0x24, 0x81,
+/*0150*/0xF5, 0x82, 0xE4, 0x34, 0x07, 0xF5, 0x83, 0xED,
+ 0xF0, 0x90, 0x07, 0x0E, 0xE0, 0x04, 0xF0, 0xEF,
+/*0160*/0x44, 0x07, 0xF5, 0x82, 0x75, 0x83, 0x98, 0xE0,
+ 0xF5, 0x28, 0x12, 0x1A, 0x23, 0x40, 0x0C, 0x12,
+/*0170*/0x19, 0xF3, 0xE0, 0x44, 0x01, 0x12, 0x1A, 0x32,
+ 0x02, 0x03, 0xF6, 0xAF, 0x08, 0x7E, 0x00, 0x74,
+/*0180*/0x80, 0xCD, 0xEF, 0xCD, 0x8D, 0x82, 0xF5, 0x83,
+ 0xE0, 0x30, 0xE0, 0x0A, 0x12, 0x19, 0xF3, 0xE0,
+/*0190*/0x44, 0x20, 0xF0, 0x02, 0x03, 0xFB, 0x12, 0x19,
+ 0xF3, 0xE0, 0x54, 0xDF, 0xF0, 0xEE, 0x44, 0xAE,
+/*01A0*/0x12, 0x1A, 0x43, 0x30, 0xE4, 0x03, 0x02, 0x03,
+ 0xFB, 0x74, 0x9E, 0x12, 0x1A, 0x05, 0x20, 0xE0,
+/*01B0*/0x03, 0x02, 0x03, 0xFB, 0x8F, 0x82, 0x8E, 0x83,
+ 0xE0, 0x20, 0xE0, 0x03, 0x02, 0x03, 0xFB, 0x12,
+/*01C0*/0x19, 0xF3, 0xE0, 0x44, 0x10, 0xF0, 0xE5, 0xE3,
+ 0x20, 0xE7, 0x08, 0xE5, 0x08, 0x12, 0x1A, 0x3A,
+/*01D0*/0x44, 0x04, 0xF0, 0xAF, 0x08, 0x7E, 0x00, 0xEF,
+ 0x12, 0x1A, 0x3A, 0x20, 0xE2, 0x34, 0x12, 0x19,
+/*01E0*/0xF3, 0xE0, 0x44, 0x08, 0xF0, 0xE5, 0xE4, 0x30,
+ 0xE6, 0x04, 0x7D, 0x01, 0x80, 0x02, 0x7D, 0x00,
+/*01F0*/0xE5, 0x7E, 0xC3, 0x94, 0x04, 0x50, 0x04, 0x7C,
+ 0x01, 0x80, 0x02, 0x7C, 0x00, 0xEC, 0x4D, 0x60,
+/*0200*/0x05, 0xC2, 0x35, 0x02, 0x03, 0xFB, 0xEE, 0x44,
+ 0xD2, 0x12, 0x1A, 0x43, 0x44, 0x40, 0xF0, 0x02,
+/*0210*/0x03, 0xFB, 0x12, 0x19, 0xF3, 0xE0, 0x54, 0xF7,
+ 0xF0, 0x12, 0x1A, 0x12, 0x75, 0x83, 0xD2, 0xE0,
+/*0220*/0x54, 0xBF, 0xF0, 0x90, 0x07, 0x14, 0xE0, 0x04,
+ 0xF0, 0xE5, 0x7E, 0x70, 0x03, 0x75, 0x7E, 0x01,
+/*0230*/0xAF, 0x08, 0x7E, 0x00, 0x12, 0x1A, 0x23, 0x40,
+ 0x12, 0x12, 0x19, 0xF3, 0xE0, 0x44, 0x01, 0x12,
+/*0240*/0x19, 0xF2, 0xE0, 0x54, 0x02, 0x12, 0x1A, 0x32,
+ 0x02, 0x03, 0xFB, 0x12, 0x19, 0xF3, 0xE0, 0x44,
+/*0250*/0x02, 0x12, 0x19, 0xF2, 0xE0, 0x54, 0xFE, 0xF0,
+ 0xC2, 0x35, 0xEE, 0x44, 0x8A, 0x8F, 0x82, 0xF5,
+/*0260*/0x83, 0xE0, 0xF5, 0x17, 0x54, 0x8F, 0x44, 0x40,
+ 0xF0, 0x74, 0x90, 0xFC, 0xE5, 0x08, 0x44, 0x07,
+/*0270*/0xFD, 0xF5, 0x82, 0x8C, 0x83, 0xE0, 0x54, 0x3F,
+ 0x90, 0x07, 0x02, 0xF0, 0xE0, 0x54, 0xC0, 0x8D,
+/*0280*/0x82, 0x8C, 0x83, 0xF0, 0x74, 0x92, 0x12, 0x1A,
+ 0x05, 0x90, 0x07, 0x03, 0x12, 0x1A, 0x19, 0x74,
+/*0290*/0x82, 0x12, 0x1A, 0x05, 0x90, 0x07, 0x04, 0x12,
+ 0x1A, 0x19, 0x74, 0xB4, 0x12, 0x1A, 0x05, 0x90,
+/*02A0*/0x07, 0x05, 0x12, 0x1A, 0x19, 0x74, 0x94, 0xFE,
+ 0xE5, 0x08, 0x44, 0x06, 0x12, 0x1A, 0x0A, 0xF5,
+/*02B0*/0x10, 0x30, 0xE0, 0x04, 0xD2, 0x37, 0x80, 0x02,
+ 0xC2, 0x37, 0xE5, 0x10, 0x54, 0x7F, 0x8F, 0x82,
+/*02C0*/0x8E, 0x83, 0xF0, 0x30, 0x44, 0x30, 0x12, 0x1A,
+ 0x03, 0x54, 0x80, 0xD3, 0x94, 0x00, 0x40, 0x04,
+/*02D0*/0xD2, 0x39, 0x80, 0x02, 0xC2, 0x39, 0x8F, 0x82,
+ 0x8E, 0x83, 0xE0, 0x44, 0x80, 0xF0, 0x12, 0x1A,
+/*02E0*/0x03, 0x54, 0x40, 0xD3, 0x94, 0x00, 0x40, 0x04,
+ 0xD2, 0x3A, 0x80, 0x02, 0xC2, 0x3A, 0x8F, 0x82,
+/*02F0*/0x8E, 0x83, 0xE0, 0x44, 0x40, 0xF0, 0x74, 0x92,
+ 0xFE, 0xE5, 0x08, 0x44, 0x06, 0x12, 0x1A, 0x0A,
+/*0300*/0x30, 0xE7, 0x04, 0xD2, 0x38, 0x80, 0x02, 0xC2,
+ 0x38, 0x8F, 0x82, 0x8E, 0x83, 0xE0, 0x54, 0x7F,
+/*0310*/0xF0, 0x12, 0x1E, 0x46, 0xE4, 0xF5, 0x0A, 0x20,
+ 0x03, 0x02, 0x80, 0x03, 0x30, 0x43, 0x03, 0x12,
+/*0320*/0x19, 0x95, 0x20, 0x02, 0x02, 0x80, 0x03, 0x30,
+ 0x42, 0x03, 0x12, 0x0C, 0x8F, 0x30, 0x30, 0x06,
+/*0330*/0x12, 0x19, 0x95, 0x12, 0x0C, 0x8F, 0x12, 0x0D,
+ 0x47, 0x12, 0x19, 0xF3, 0xE0, 0x54, 0xFB, 0xF0,
+/*0340*/0xE5, 0x0A, 0xC3, 0x94, 0x01, 0x40, 0x46, 0x43,
+ 0xE1, 0x08, 0x12, 0x19, 0xF3, 0xE0, 0x44, 0x04,
+/*0350*/0xF0, 0xE5, 0xE4, 0x20, 0xE7, 0x2A, 0x12, 0x1A,
+ 0x12, 0x75, 0x83, 0xD2, 0xE0, 0x54, 0x08, 0xD3,
+/*0360*/0x94, 0x00, 0x40, 0x04, 0x7F, 0x01, 0x80, 0x02,
+ 0x7F, 0x00, 0xE5, 0x0A, 0xC3, 0x94, 0x01, 0x40,
+/*0370*/0x04, 0x7E, 0x01, 0x80, 0x02, 0x7E, 0x00, 0xEF,
+ 0x5E, 0x60, 0x05, 0x12, 0x1D, 0xD7, 0x80, 0x17,
+/*0380*/0x12, 0x1A, 0x12, 0x75, 0x83, 0xD2, 0xE0, 0x44,
+ 0x08, 0xF0, 0x02, 0x03, 0xFB, 0x12, 0x1A, 0x12,
+/*0390*/0x75, 0x83, 0xD2, 0xE0, 0x54, 0xF7, 0xF0, 0x12,
+ 0x1E, 0x46, 0x7F, 0x08, 0x12, 0x17, 0x31, 0x74,
+/*03A0*/0x8E, 0xFE, 0x12, 0x1A, 0x12, 0x8E, 0x83, 0xE0,
+ 0xF5, 0x10, 0x54, 0xFE, 0xF0, 0xE5, 0x10, 0x44,
+/*03B0*/0x01, 0xFF, 0xE5, 0x08, 0xFD, 0xED, 0x44, 0x07,
+ 0xF5, 0x82, 0xEF, 0xF0, 0xE5, 0x10, 0x54, 0xFE,
+/*03C0*/0xFF, 0xED, 0x44, 0x07, 0xF5, 0x82, 0xEF, 0x12,
+ 0x1A, 0x11, 0x75, 0x83, 0x86, 0xE0, 0x44, 0x10,
+/*03D0*/0x12, 0x1A, 0x11, 0xE0, 0x44, 0x10, 0xF0, 0x12,
+ 0x19, 0xF3, 0xE0, 0x54, 0xFD, 0x44, 0x01, 0xFF,
+/*03E0*/0x12, 0x19, 0xF3, 0xEF, 0x12, 0x1A, 0x32, 0x30,
+ 0x32, 0x0C, 0xE5, 0x08, 0x44, 0x08, 0xF5, 0x82,
+/*03F0*/0x75, 0x83, 0x82, 0x74, 0x05, 0xF0, 0xAF, 0x0B,
+ 0x12, 0x18, 0xD7, 0x74, 0x10, 0x25, 0x08, 0xF5,
+/*0400*/0x08, 0x02, 0x00, 0x85, 0x05, 0x09, 0xE5, 0x09,
+ 0xD3, 0x94, 0x07, 0x50, 0x03, 0x02, 0x00, 0x82,
+/*0410*/0xE5, 0x7E, 0xD3, 0x94, 0x00, 0x40, 0x04, 0x7F,
+ 0x01, 0x80, 0x02, 0x7F, 0x00, 0xE5, 0x7E, 0xC3,
+/*0420*/0x94, 0xFA, 0x50, 0x04, 0x7E, 0x01, 0x80, 0x02,
+ 0x7E, 0x00, 0xEE, 0x5F, 0x60, 0x02, 0x05, 0x7E,
+/*0430*/0x30, 0x35, 0x0B, 0x43, 0xE1, 0x01, 0x7F, 0x09,
+ 0x12, 0x17, 0x31, 0x02, 0x00, 0x58, 0x53, 0xE1,
+/*0440*/0xFE, 0x02, 0x00, 0x58, 0x8E, 0x6A, 0x8F, 0x6B,
+ 0x8C, 0x6C, 0x8D, 0x6D, 0x75, 0x6E, 0x01, 0x75,
+/*0450*/0x6F, 0x01, 0x75, 0x70, 0x01, 0xE4, 0xF5, 0x73,
+ 0xF5, 0x74, 0xF5, 0x75, 0x90, 0x07, 0x2F, 0xF0,
+/*0460*/0xF5, 0x3C, 0xF5, 0x3E, 0xF5, 0x46, 0xF5, 0x47,
+ 0xF5, 0x3D, 0xF5, 0x3F, 0xF5, 0x6F, 0xE5, 0x6F,
+/*0470*/0x70, 0x0F, 0xE5, 0x6B, 0x45, 0x6A, 0x12, 0x07,
+ 0x2A, 0x75, 0x83, 0x80, 0x74, 0x3A, 0xF0, 0x80,
+/*0480*/0x09, 0x12, 0x07, 0x2A, 0x75, 0x83, 0x80, 0x74,
+ 0x1A, 0xF0, 0xE4, 0xF5, 0x6E, 0xC3, 0x74, 0x3F,
+/*0490*/0x95, 0x6E, 0xFF, 0x12, 0x08, 0x65, 0x75, 0x83,
+ 0x82, 0xEF, 0xF0, 0x12, 0x1A, 0x4D, 0x12, 0x08,
+/*04A0*/0xC6, 0xE5, 0x33, 0xF0, 0x12, 0x08, 0xFA, 0x12,
+ 0x08, 0xB1, 0x40, 0xE1, 0xE5, 0x6F, 0x70, 0x0B,
+/*04B0*/0x12, 0x07, 0x2A, 0x75, 0x83, 0x80, 0x74, 0x36,
+ 0xF0, 0x80, 0x09, 0x12, 0x07, 0x2A, 0x75, 0x83,
+/*04C0*/0x80, 0x74, 0x16, 0xF0, 0x75, 0x6E, 0x01, 0x12,
+ 0x07, 0x2A, 0x75, 0x83, 0xB4, 0xE5, 0x6E, 0xF0,
+/*04D0*/0x12, 0x1A, 0x4D, 0x74, 0x3F, 0x25, 0x6E, 0xF5,
+ 0x82, 0xE4, 0x34, 0x00, 0xF5, 0x83, 0xE5, 0x33,
+/*04E0*/0xF0, 0x74, 0xBF, 0x25, 0x6E, 0xF5, 0x82, 0xE4,
+ 0x34, 0x00, 0x12, 0x08, 0xB1, 0x40, 0xD8, 0xE4,
+/*04F0*/0xF5, 0x70, 0xF5, 0x46, 0xF5, 0x47, 0xF5, 0x6E,
+ 0x12, 0x08, 0xFA, 0xF5, 0x83, 0xE0, 0xFE, 0x12,
+/*0500*/0x08, 0xC6, 0xE0, 0x7C, 0x00, 0x24, 0x00, 0xFF,
+ 0xEC, 0x3E, 0xFE, 0xAD, 0x3B, 0xD3, 0xEF, 0x9D,
+/*0510*/0xEE, 0x9C, 0x50, 0x04, 0x7B, 0x01, 0x80, 0x02,
+ 0x7B, 0x00, 0xE5, 0x70, 0x70, 0x04, 0x7A, 0x01,
+/*0520*/0x80, 0x02, 0x7A, 0x00, 0xEB, 0x5A, 0x60, 0x06,
+ 0x85, 0x6E, 0x46, 0x75, 0x70, 0x01, 0xD3, 0xEF,
+/*0530*/0x9D, 0xEE, 0x9C, 0x50, 0x04, 0x7F, 0x01, 0x80,
+ 0x02, 0x7F, 0x00, 0xE5, 0x70, 0xB4, 0x01, 0x04,
+/*0540*/0x7E, 0x01, 0x80, 0x02, 0x7E, 0x00, 0xEF, 0x5E,
+ 0x60, 0x03, 0x85, 0x6E, 0x47, 0x05, 0x6E, 0xE5,
+/*0550*/0x6E, 0x64, 0x7F, 0x70, 0xA3, 0xE5, 0x46, 0x60,
+ 0x05, 0xE5, 0x47, 0xB4, 0x7E, 0x03, 0x85, 0x46,
+/*0560*/0x47, 0xE5, 0x6F, 0x70, 0x08, 0x85, 0x46, 0x76,
+ 0x85, 0x47, 0x77, 0x80, 0x0E, 0xC3, 0x74, 0x7F,
+/*0570*/0x95, 0x46, 0xF5, 0x78, 0xC3, 0x74, 0x7F, 0x95,
+ 0x47, 0xF5, 0x79, 0xE5, 0x6F, 0x70, 0x37, 0xE5,
+/*0580*/0x46, 0x65, 0x47, 0x70, 0x0C, 0x75, 0x73, 0x01,
+ 0x75, 0x74, 0x01, 0xF5, 0x3C, 0xF5, 0x3D, 0x80,
+/*0590*/0x35, 0xE4, 0xF5, 0x4E, 0xC3, 0xE5, 0x47, 0x95,
+ 0x46, 0xF5, 0x3C, 0xC3, 0x13, 0xF5, 0x71, 0x25,
+/*05A0*/0x46, 0xF5, 0x72, 0xC3, 0x94, 0x3F, 0x40, 0x05,
+ 0xE4, 0xF5, 0x3D, 0x80, 0x40, 0xC3, 0x74, 0x3F,
+/*05B0*/0x95, 0x72, 0xF5, 0x3D, 0x80, 0x37, 0xE5, 0x46,
+ 0x65, 0x47, 0x70, 0x0F, 0x75, 0x73, 0x01, 0x75,
+/*05C0*/0x75, 0x01, 0xF5, 0x3E, 0xF5, 0x3F, 0x75, 0x4E,
+ 0x01, 0x80, 0x22, 0xE4, 0xF5, 0x4E, 0xC3, 0xE5,
+/*05D0*/0x47, 0x95, 0x46, 0xF5, 0x3E, 0xC3, 0x13, 0xF5,
+ 0x71, 0x25, 0x46, 0xF5, 0x72, 0xD3, 0x94, 0x3F,
+/*05E0*/0x50, 0x05, 0xE4, 0xF5, 0x3F, 0x80, 0x06, 0xE5,
+ 0x72, 0x24, 0xC1, 0xF5, 0x3F, 0x05, 0x6F, 0xE5,
+/*05F0*/0x6F, 0xC3, 0x94, 0x02, 0x50, 0x03, 0x02, 0x04,
+ 0x6E, 0xE5, 0x6D, 0x45, 0x6C, 0x70, 0x02, 0x80,
+/*0600*/0x04, 0xE5, 0x74, 0x45, 0x75, 0x90, 0x07, 0x2F,
+ 0xF0, 0x7F, 0x01, 0xE5, 0x3E, 0x60, 0x04, 0xE5,
+/*0610*/0x3C, 0x70, 0x14, 0xE4, 0xF5, 0x3C, 0xF5, 0x3D,
+ 0xF5, 0x3E, 0xF5, 0x3F, 0x12, 0x08, 0xD2, 0x70,
+/*0620*/0x04, 0xF0, 0x02, 0x06, 0xA4, 0x80, 0x7A, 0xE5,
+ 0x3C, 0xC3, 0x95, 0x3E, 0x40, 0x07, 0xE5, 0x3C,
+/*0630*/0x95, 0x3E, 0xFF, 0x80, 0x06, 0xC3, 0xE5, 0x3E,
+ 0x95, 0x3C, 0xFF, 0xE5, 0x76, 0xD3, 0x95, 0x79,
+/*0640*/0x40, 0x05, 0x85, 0x76, 0x7A, 0x80, 0x03, 0x85,
+ 0x79, 0x7A, 0xE5, 0x77, 0xC3, 0x95, 0x78, 0x50,
+/*0650*/0x05, 0x85, 0x77, 0x7B, 0x80, 0x03, 0x85, 0x78,
+ 0x7B, 0xE5, 0x7B, 0xD3, 0x95, 0x7A, 0x40, 0x30,
+/*0660*/0xE5, 0x7B, 0x95, 0x7A, 0xF5, 0x3C, 0xF5, 0x3E,
+ 0xC3, 0xE5, 0x7B, 0x95, 0x7A, 0x90, 0x07, 0x19,
+/*0670*/0xF0, 0xE5, 0x3C, 0xC3, 0x13, 0xF5, 0x71, 0x25,
+ 0x7A, 0xF5, 0x72, 0xC3, 0x94, 0x3F, 0x40, 0x05,
+/*0680*/0xE4, 0xF5, 0x3D, 0x80, 0x1F, 0xC3, 0x74, 0x3F,
+ 0x95, 0x72, 0xF5, 0x3D, 0xF5, 0x3F, 0x80, 0x14,
+/*0690*/0xE4, 0xF5, 0x3C, 0xF5, 0x3E, 0x90, 0x07, 0x19,
+ 0xF0, 0x12, 0x08, 0xD2, 0x70, 0x03, 0xF0, 0x80,
+/*06A0*/0x03, 0x74, 0x01, 0xF0, 0x12, 0x08, 0x65, 0x75,
+ 0x83, 0xD0, 0xE0, 0x54, 0x0F, 0xFE, 0xAD, 0x3C,
+/*06B0*/0x70, 0x02, 0x7E, 0x07, 0xBE, 0x0F, 0x02, 0x7E,
+ 0x80, 0xEE, 0xFB, 0xEF, 0xD3, 0x9B, 0x74, 0x80,
+/*06C0*/0xF8, 0x98, 0x40, 0x1F, 0xE4, 0xF5, 0x3C, 0xF5,
+ 0x3E, 0x12, 0x08, 0xD2, 0x70, 0x03, 0xF0, 0x80,
+/*06D0*/0x12, 0x74, 0x01, 0xF0, 0xE5, 0x08, 0xFB, 0xEB,
+ 0x44, 0x07, 0xF5, 0x82, 0x75, 0x83, 0xD2, 0xE0,
+/*06E0*/0x44, 0x10, 0xF0, 0xE5, 0x08, 0xFB, 0xEB, 0x44,
+ 0x09, 0xF5, 0x82, 0x75, 0x83, 0x9E, 0xED, 0xF0,
+/*06F0*/0xEB, 0x44, 0x07, 0xF5, 0x82, 0x75, 0x83, 0xCA,
+ 0xED, 0xF0, 0x12, 0x08, 0x65, 0x75, 0x83, 0xCC,
+/*0700*/0xEF, 0xF0, 0x22, 0xE5, 0x08, 0x44, 0x07, 0xF5,
+ 0x82, 0x75, 0x83, 0xBC, 0xE0, 0x54, 0xF0, 0xF0,
+/*0710*/0xE5, 0x08, 0x44, 0x07, 0xF5, 0x82, 0x75, 0x83,
+ 0xBE, 0xE0, 0x54, 0xF0, 0xF0, 0xE5, 0x08, 0x44,
+/*0720*/0x07, 0xF5, 0x82, 0x75, 0x83, 0xC0, 0xE0, 0x54,
+ 0xF0, 0xF0, 0xE5, 0x08, 0x44, 0x07, 0xF5, 0x82,
+/*0730*/0x22, 0xF0, 0x90, 0x07, 0x28, 0xE0, 0xFE, 0xA3,
+ 0xE0, 0xF5, 0x82, 0x8E, 0x83, 0x22, 0x85, 0x42,
+/*0740*/0x42, 0x85, 0x41, 0x41, 0x85, 0x40, 0x40, 0x74,
+ 0xC0, 0x2F, 0xF5, 0x82, 0x74, 0x02, 0x3E, 0xF5,
+/*0750*/0x83, 0xE5, 0x42, 0xF0, 0x74, 0xE0, 0x2F, 0xF5,
+ 0x82, 0x74, 0x02, 0x3E, 0xF5, 0x83, 0x22, 0xE5,
+/*0760*/0x42, 0x29, 0xFD, 0xE4, 0x33, 0xFC, 0xE5, 0x3C,
+ 0xC3, 0x9D, 0xEC, 0x64, 0x80, 0xF8, 0x74, 0x80,
+/*0770*/0x98, 0x22, 0xF5, 0x83, 0xE0, 0x90, 0x07, 0x22,
+ 0x54, 0x1F, 0xFD, 0xE0, 0xFA, 0xA3, 0xE0, 0xF5,
+/*0780*/0x82, 0x8A, 0x83, 0xED, 0xF0, 0x22, 0x90, 0x07,
+ 0x22, 0xE0, 0xFC, 0xA3, 0xE0, 0xF5, 0x82, 0x8C,
+/*0790*/0x83, 0x22, 0x90, 0x07, 0x24, 0xFF, 0xED, 0x44,
+ 0x07, 0xCF, 0xF0, 0xA3, 0xEF, 0xF0, 0x22, 0x85,
+/*07A0*/0x38, 0x38, 0x85, 0x39, 0x39, 0x85, 0x3A, 0x3A,
+ 0x74, 0xC0, 0x2F, 0xF5, 0x82, 0x74, 0x02, 0x3E,
+/*07B0*/0xF5, 0x83, 0x22, 0x90, 0x07, 0x26, 0xFF, 0xED,
+ 0x44, 0x07, 0xCF, 0xF0, 0xA3, 0xEF, 0xF0, 0x22,
+/*07C0*/0xF0, 0x74, 0xA0, 0x2F, 0xF5, 0x82, 0x74, 0x02,
+ 0x3E, 0xF5, 0x83, 0x22, 0x74, 0xC0, 0x25, 0x11,
+/*07D0*/0xF5, 0x82, 0xE4, 0x34, 0x01, 0xF5, 0x83, 0x22,
+ 0x74, 0x00, 0x25, 0x11, 0xF5, 0x82, 0xE4, 0x34,
+/*07E0*/0x02, 0xF5, 0x83, 0x22, 0x74, 0x60, 0x25, 0x11,
+ 0xF5, 0x82, 0xE4, 0x34, 0x03, 0xF5, 0x83, 0x22,
+/*07F0*/0x74, 0x80, 0x25, 0x11, 0xF5, 0x82, 0xE4, 0x34,
+ 0x03, 0xF5, 0x83, 0x22, 0x74, 0xE0, 0x25, 0x11,
+/*0800*/0xF5, 0x82, 0xE4, 0x34, 0x03, 0xF5, 0x83, 0x22,
+ 0x74, 0x40, 0x25, 0x11, 0xF5, 0x82, 0xE4, 0x34,
+/*0810*/0x06, 0xF5, 0x83, 0x22, 0x74, 0x80, 0x2F, 0xF5,
+ 0x82, 0x74, 0x02, 0x3E, 0xF5, 0x83, 0x22, 0xAF,
+/*0820*/0x08, 0x7E, 0x00, 0xEF, 0x44, 0x07, 0xF5, 0x82,
+ 0x22, 0xF5, 0x83, 0xE5, 0x82, 0x44, 0x07, 0xF5,
+/*0830*/0x82, 0xE5, 0x40, 0xF0, 0x22, 0x74, 0x40, 0x25,
+ 0x11, 0xF5, 0x82, 0xE4, 0x34, 0x02, 0xF5, 0x83,
+/*0840*/0x22, 0x74, 0xC0, 0x25, 0x11, 0xF5, 0x82, 0xE4,
+ 0x34, 0x03, 0xF5, 0x83, 0x22, 0x74, 0x00, 0x25,
+/*0850*/0x11, 0xF5, 0x82, 0xE4, 0x34, 0x06, 0xF5, 0x83,
+ 0x22, 0x74, 0x20, 0x25, 0x11, 0xF5, 0x82, 0xE4,
+/*0860*/0x34, 0x06, 0xF5, 0x83, 0x22, 0xE5, 0x08, 0xFD,
+ 0xED, 0x44, 0x07, 0xF5, 0x82, 0x22, 0xE5, 0x41,
+/*0870*/0xF0, 0xE5, 0x65, 0x64, 0x01, 0x45, 0x64, 0x22,
+ 0x7E, 0x00, 0xFB, 0x7A, 0x00, 0xFD, 0x7C, 0x00,
+/*0880*/0x22, 0x74, 0x20, 0x25, 0x11, 0xF5, 0x82, 0xE4,
+ 0x34, 0x02, 0x22, 0x74, 0xA0, 0x25, 0x11, 0xF5,
+/*0890*/0x82, 0xE4, 0x34, 0x03, 0x22, 0x85, 0x3E, 0x42,
+ 0x85, 0x3F, 0x41, 0x8F, 0x40, 0x22, 0x85, 0x3C,
+/*08A0*/0x42, 0x85, 0x3D, 0x41, 0x8F, 0x40, 0x22, 0x75,
+ 0x45, 0x3F, 0x90, 0x07, 0x20, 0xE4, 0xF0, 0xA3,
+/*08B0*/0x22, 0xF5, 0x83, 0xE5, 0x32, 0xF0, 0x05, 0x6E,
+ 0xE5, 0x6E, 0xC3, 0x94, 0x40, 0x22, 0xF0, 0xE5,
+/*08C0*/0x08, 0x44, 0x06, 0xF5, 0x82, 0x22, 0x74, 0x00,
+ 0x25, 0x6E, 0xF5, 0x82, 0xE4, 0x34, 0x00, 0xF5,
+/*08D0*/0x83, 0x22, 0xE5, 0x6D, 0x45, 0x6C, 0x90, 0x07,
+ 0x2F, 0x22, 0xE4, 0xF9, 0xE5, 0x3C, 0xD3, 0x95,
+/*08E0*/0x3E, 0x22, 0x74, 0x80, 0x2E, 0xF5, 0x82, 0xE4,
+ 0x34, 0x02, 0xF5, 0x83, 0xE0, 0x22, 0x74, 0xA0,
+/*08F0*/0x2E, 0xF5, 0x82, 0xE4, 0x34, 0x02, 0xF5, 0x83,
+ 0xE0, 0x22, 0x74, 0x80, 0x25, 0x6E, 0xF5, 0x82,
+/*0900*/0xE4, 0x34, 0x00, 0x22, 0x25, 0x42, 0xFD, 0xE4,
+ 0x33, 0xFC, 0x22, 0x85, 0x42, 0x42, 0x85, 0x41,
+/*0910*/0x41, 0x85, 0x40, 0x40, 0x22, 0xED, 0x4C, 0x60,
+ 0x03, 0x02, 0x09, 0xE5, 0xEF, 0x4E, 0x70, 0x37,
+/*0920*/0x90, 0x07, 0x26, 0x12, 0x07, 0x89, 0xE0, 0xFD,
+ 0x12, 0x07, 0xCC, 0xED, 0xF0, 0x90, 0x07, 0x28,
+/*0930*/0x12, 0x07, 0x89, 0xE0, 0xFD, 0x12, 0x07, 0xD8,
+ 0xED, 0xF0, 0x12, 0x07, 0x86, 0xE0, 0x54, 0x1F,
+/*0940*/0xFD, 0x12, 0x08, 0x81, 0xF5, 0x83, 0xED, 0xF0,
+ 0x90, 0x07, 0x24, 0x12, 0x07, 0x89, 0xE0, 0x54,
+/*0950*/0x1F, 0xFD, 0x12, 0x08, 0x35, 0xED, 0xF0, 0xEF,
+ 0x64, 0x04, 0x4E, 0x70, 0x37, 0x90, 0x07, 0x26,
+/*0960*/0x12, 0x07, 0x89, 0xE0, 0xFD, 0x12, 0x07, 0xE4,
+ 0xED, 0xF0, 0x90, 0x07, 0x28, 0x12, 0x07, 0x89,
+/*0970*/0xE0, 0xFD, 0x12, 0x07, 0xF0, 0xED, 0xF0, 0x12,
+ 0x07, 0x86, 0xE0, 0x54, 0x1F, 0xFD, 0x12, 0x08,
+/*0980*/0x8B, 0xF5, 0x83, 0xED, 0xF0, 0x90, 0x07, 0x24,
+ 0x12, 0x07, 0x89, 0xE0, 0x54, 0x1F, 0xFD, 0x12,
+/*0990*/0x08, 0x41, 0xED, 0xF0, 0xEF, 0x64, 0x01, 0x4E,
+ 0x70, 0x04, 0x7D, 0x01, 0x80, 0x02, 0x7D, 0x00,
+/*09A0*/0xEF, 0x64, 0x02, 0x4E, 0x70, 0x04, 0x7F, 0x01,
+ 0x80, 0x02, 0x7F, 0x00, 0xEF, 0x4D, 0x60, 0x78,
+/*09B0*/0x90, 0x07, 0x26, 0x12, 0x07, 0x35, 0xE0, 0xFF,
+ 0x12, 0x07, 0xFC, 0xEF, 0x12, 0x07, 0x31, 0xE0,
+/*09C0*/0xFF, 0x12, 0x08, 0x08, 0xEF, 0xF0, 0x90, 0x07,
+ 0x22, 0x12, 0x07, 0x35, 0xE0, 0x54, 0x1F, 0xFF,
+/*09D0*/0x12, 0x08, 0x4D, 0xEF, 0xF0, 0x90, 0x07, 0x24,
+ 0x12, 0x07, 0x35, 0xE0, 0x54, 0x1F, 0xFF, 0x12,
+/*09E0*/0x08, 0x59, 0xEF, 0xF0, 0x22, 0x12, 0x07, 0xCC,
+ 0xE4, 0xF0, 0x12, 0x07, 0xD8, 0xE4, 0xF0, 0x12,
+/*09F0*/0x08, 0x81, 0xF5, 0x83, 0xE4, 0xF0, 0x12, 0x08,
+ 0x35, 0x74, 0x14, 0xF0, 0x12, 0x07, 0xE4, 0xE4,
+/*0A00*/0xF0, 0x12, 0x07, 0xF0, 0xE4, 0xF0, 0x12, 0x08,
+ 0x8B, 0xF5, 0x83, 0xE4, 0xF0, 0x12, 0x08, 0x41,
+/*0A10*/0x74, 0x14, 0xF0, 0x12, 0x07, 0xFC, 0xE4, 0xF0,
+ 0x12, 0x08, 0x08, 0xE4, 0xF0, 0x12, 0x08, 0x4D,
+/*0A20*/0xE4, 0xF0, 0x12, 0x08, 0x59, 0x74, 0x14, 0xF0,
+ 0x22, 0x53, 0xF9, 0xF7, 0x75, 0xFC, 0x10, 0xE4,
+/*0A30*/0xF5, 0xFD, 0x75, 0xFE, 0x30, 0xF5, 0xFF, 0xE5,
+ 0xE7, 0x20, 0xE7, 0x03, 0x43, 0xF9, 0x08, 0xE5,
+/*0A40*/0xE6, 0x20, 0xE7, 0x0B, 0x78, 0xFF, 0xE4, 0xF6,
+ 0xD8, 0xFD, 0x53, 0xE6, 0xFE, 0x80, 0x09, 0x78,
+/*0A50*/0x08, 0xE4, 0xF6, 0xD8, 0xFD, 0x53, 0xE6, 0xFE,
+ 0x75, 0x81, 0x80, 0xE4, 0xF5, 0xA8, 0xD2, 0xA8,
+/*0A60*/0xC2, 0xA9, 0xD2, 0xAF, 0xE5, 0xE2, 0x20, 0xE5,
+ 0x05, 0x20, 0xE6, 0x02, 0x80, 0x03, 0x43, 0xE1,
+/*0A70*/0x02, 0xE5, 0xE2, 0x20, 0xE0, 0x0E, 0x90, 0x00,
+ 0x00, 0x7F, 0x00, 0x7E, 0x08, 0xE4, 0xF0, 0xA3,
+/*0A80*/0xDF, 0xFC, 0xDE, 0xFA, 0x02, 0x0A, 0xDB, 0x43,
+ 0xFA, 0x01, 0xC0, 0xE0, 0xC0, 0xF0, 0xC0, 0x83,
+/*0A90*/0xC0, 0x82, 0xC0, 0xD0, 0x12, 0x1C, 0xE7, 0xD0,
+ 0xD0, 0xD0, 0x82, 0xD0, 0x83, 0xD0, 0xF0, 0xD0,
+/*0AA0*/0xE0, 0x53, 0xFA, 0xFE, 0x32, 0x02, 0x1B, 0x55,
+ 0xE4, 0x93, 0xA3, 0xF8, 0xE4, 0x93, 0xA3, 0xF6,
+/*0AB0*/0x08, 0xDF, 0xF9, 0x80, 0x29, 0xE4, 0x93, 0xA3,
+ 0xF8, 0x54, 0x07, 0x24, 0x0C, 0xC8, 0xC3, 0x33,
+/*0AC0*/0xC4, 0x54, 0x0F, 0x44, 0x20, 0xC8, 0x83, 0x40,
+ 0x04, 0xF4, 0x56, 0x80, 0x01, 0x46, 0xF6, 0xDF,
+/*0AD0*/0xE4, 0x80, 0x0B, 0x01, 0x02, 0x04, 0x08, 0x10,
+ 0x20, 0x40, 0x80, 0x90, 0x00, 0x3F, 0xE4, 0x7E,
+/*0AE0*/0x01, 0x93, 0x60, 0xC1, 0xA3, 0xFF, 0x54, 0x3F,
+ 0x30, 0xE5, 0x09, 0x54, 0x1F, 0xFE, 0xE4, 0x93,
+/*0AF0*/0xA3, 0x60, 0x01, 0x0E, 0xCF, 0x54, 0xC0, 0x25,
+ 0xE0, 0x60, 0xAD, 0x40, 0xB8, 0x80, 0xFE, 0x8C,
+/*0B00*/0x64, 0x8D, 0x65, 0x8A, 0x66, 0x8B, 0x67, 0xE4,
+ 0xF5, 0x69, 0xEF, 0x4E, 0x70, 0x03, 0x02, 0x1D,
+/*0B10*/0x55, 0xE4, 0xF5, 0x68, 0xE5, 0x67, 0x45, 0x66,
+ 0x70, 0x32, 0x12, 0x07, 0x2A, 0x75, 0x83, 0x90,
+/*0B20*/0xE4, 0x12, 0x07, 0x29, 0x75, 0x83, 0xC2, 0xE4,
+ 0x12, 0x07, 0x29, 0x75, 0x83, 0xC4, 0xE4, 0x12,
+/*0B30*/0x08, 0x70, 0x70, 0x29, 0x12, 0x07, 0x2A, 0x75,
+ 0x83, 0x92, 0xE4, 0x12, 0x07, 0x29, 0x75, 0x83,
+/*0B40*/0xC6, 0xE4, 0x12, 0x07, 0x29, 0x75, 0x83, 0xC8,
+ 0xE4, 0xF0, 0x80, 0x11, 0x90, 0x07, 0x26, 0x12,
+/*0B50*/0x07, 0x35, 0xE4, 0x12, 0x08, 0x70, 0x70, 0x05,
+ 0x12, 0x07, 0x32, 0xE4, 0xF0, 0x12, 0x1D, 0x55,
+/*0B60*/0x12, 0x1E, 0xBF, 0xE5, 0x67, 0x45, 0x66, 0x70,
+ 0x33, 0x12, 0x07, 0x2A, 0x75, 0x83, 0x90, 0xE5,
+/*0B70*/0x41, 0x12, 0x07, 0x29, 0x75, 0x83, 0xC2, 0xE5,
+ 0x41, 0x12, 0x07, 0x29, 0x75, 0x83, 0xC4, 0x12,
+/*0B80*/0x08, 0x6E, 0x70, 0x29, 0x12, 0x07, 0x2A, 0x75,
+ 0x83, 0x92, 0xE5, 0x40, 0x12, 0x07, 0x29, 0x75,
+/*0B90*/0x83, 0xC6, 0xE5, 0x40, 0x12, 0x07, 0x29, 0x75,
+ 0x83, 0xC8, 0x80, 0x0E, 0x90, 0x07, 0x26, 0x12,
+/*0BA0*/0x07, 0x35, 0x12, 0x08, 0x6E, 0x70, 0x06, 0x12,
+ 0x07, 0x32, 0xE5, 0x40, 0xF0, 0xAF, 0x69, 0x7E,
+/*0BB0*/0x00, 0xAD, 0x67, 0xAC, 0x66, 0x12, 0x04, 0x44,
+ 0x12, 0x07, 0x2A, 0x75, 0x83, 0xCA, 0xE0, 0xD3,
+/*0BC0*/0x94, 0x00, 0x50, 0x0C, 0x05, 0x68, 0xE5, 0x68,
+ 0xC3, 0x94, 0x05, 0x50, 0x03, 0x02, 0x0B, 0x14,
+/*0BD0*/0x22, 0x8C, 0x60, 0x8D, 0x61, 0x12, 0x08, 0xDA,
+ 0x74, 0x20, 0x40, 0x0D, 0x2F, 0xF5, 0x82, 0x74,
+/*0BE0*/0x03, 0x3E, 0xF5, 0x83, 0xE5, 0x3E, 0xF0, 0x80,
+ 0x0B, 0x2F, 0xF5, 0x82, 0x74, 0x03, 0x3E, 0xF5,
+/*0BF0*/0x83, 0xE5, 0x3C, 0xF0, 0xE5, 0x3C, 0xD3, 0x95,
+ 0x3E, 0x40, 0x3C, 0xE5, 0x61, 0x45, 0x60, 0x70,
+/*0C00*/0x10, 0xE9, 0x12, 0x09, 0x04, 0xE5, 0x3E, 0x12,
+ 0x07, 0x68, 0x40, 0x3B, 0x12, 0x08, 0x95, 0x80,
+/*0C10*/0x18, 0xE5, 0x3E, 0xC3, 0x95, 0x38, 0x40, 0x1D,
+ 0x85, 0x3E, 0x38, 0xE5, 0x3E, 0x60, 0x05, 0x85,
+/*0C20*/0x3F, 0x39, 0x80, 0x03, 0x85, 0x39, 0x39, 0x8F,
+ 0x3A, 0x12, 0x08, 0x14, 0xE5, 0x3E, 0x12, 0x07,
+/*0C30*/0xC0, 0xE5, 0x3F, 0xF0, 0x22, 0x80, 0x43, 0xE5,
+ 0x61, 0x45, 0x60, 0x70, 0x19, 0x12, 0x07, 0x5F,
+/*0C40*/0x40, 0x05, 0x12, 0x08, 0x9E, 0x80, 0x27, 0x12,
+ 0x09, 0x0B, 0x12, 0x08, 0x14, 0xE5, 0x42, 0x12,
+/*0C50*/0x07, 0xC0, 0xE5, 0x41, 0xF0, 0x22, 0xE5, 0x3C,
+ 0xC3, 0x95, 0x38, 0x40, 0x1D, 0x85, 0x3C, 0x38,
+/*0C60*/0xE5, 0x3C, 0x60, 0x05, 0x85, 0x3D, 0x39, 0x80,
+ 0x03, 0x85, 0x39, 0x39, 0x8F, 0x3A, 0x12, 0x08,
+/*0C70*/0x14, 0xE5, 0x3C, 0x12, 0x07, 0xC0, 0xE5, 0x3D,
+ 0xF0, 0x22, 0x85, 0x38, 0x38, 0x85, 0x39, 0x39,
+/*0C80*/0x85, 0x3A, 0x3A, 0x12, 0x08, 0x14, 0xE5, 0x38,
+ 0x12, 0x07, 0xC0, 0xE5, 0x39, 0xF0, 0x22, 0x7F,
+/*0C90*/0x06, 0x12, 0x17, 0x31, 0x12, 0x1D, 0x23, 0x12,
+ 0x0E, 0x04, 0x12, 0x0E, 0x33, 0xE0, 0x44, 0x0A,
+/*0CA0*/0xF0, 0x74, 0x8E, 0xFE, 0x12, 0x0E, 0x04, 0x12,
+ 0x0E, 0x0B, 0xEF, 0xF0, 0xE5, 0x28, 0x30, 0xE5,
+/*0CB0*/0x03, 0xD3, 0x80, 0x01, 0xC3, 0x40, 0x05, 0x75,
+ 0x14, 0x20, 0x80, 0x03, 0x75, 0x14, 0x08, 0x12,
+/*0CC0*/0x0E, 0x04, 0x75, 0x83, 0x8A, 0xE5, 0x14, 0xF0,
+ 0xB4, 0xFF, 0x05, 0x75, 0x12, 0x80, 0x80, 0x06,
+/*0CD0*/0xE5, 0x14, 0xC3, 0x13, 0xF5, 0x12, 0xE4, 0xF5,
+ 0x16, 0xF5, 0x7F, 0x12, 0x19, 0x36, 0x12, 0x13,
+/*0CE0*/0xA3, 0xE5, 0x0A, 0xC3, 0x94, 0x01, 0x50, 0x09,
+ 0x05, 0x16, 0xE5, 0x16, 0xC3, 0x94, 0x14, 0x40,
+/*0CF0*/0xEA, 0xE5, 0xE4, 0x20, 0xE7, 0x28, 0x12, 0x0E,
+ 0x04, 0x75, 0x83, 0xD2, 0xE0, 0x54, 0x08, 0xD3,
+/*0D00*/0x94, 0x00, 0x40, 0x04, 0x7F, 0x01, 0x80, 0x02,
+ 0x7F, 0x00, 0xE5, 0x0A, 0xC3, 0x94, 0x01, 0x40,
+/*0D10*/0x04, 0x7E, 0x01, 0x80, 0x02, 0x7E, 0x00, 0xEF,
+ 0x5E, 0x60, 0x03, 0x12, 0x1D, 0xD7, 0xE5, 0x7F,
+/*0D20*/0xC3, 0x94, 0x11, 0x40, 0x14, 0x12, 0x0E, 0x04,
+ 0x75, 0x83, 0xD2, 0xE0, 0x44, 0x80, 0xF0, 0xE5,
+/*0D30*/0xE4, 0x20, 0xE7, 0x0F, 0x12, 0x1D, 0xD7, 0x80,
+ 0x0A, 0x12, 0x0E, 0x04, 0x75, 0x83, 0xD2, 0xE0,
+/*0D40*/0x54, 0x7F, 0xF0, 0x12, 0x1D, 0x23, 0x22, 0x74,
+ 0x8A, 0x85, 0x08, 0x82, 0xF5, 0x83, 0xE5, 0x17,
+/*0D50*/0xF0, 0x12, 0x0E, 0x3A, 0xE4, 0xF0, 0x90, 0x07,
+ 0x02, 0xE0, 0x12, 0x0E, 0x17, 0x75, 0x83, 0x90,
+/*0D60*/0xEF, 0xF0, 0x74, 0x92, 0xFE, 0xE5, 0x08, 0x44,
+ 0x07, 0xFF, 0xF5, 0x82, 0x8E, 0x83, 0xE0, 0x54,
+/*0D70*/0xC0, 0xFD, 0x90, 0x07, 0x03, 0xE0, 0x54, 0x3F,
+ 0x4D, 0x8F, 0x82, 0x8E, 0x83, 0xF0, 0x90, 0x07,
+/*0D80*/0x04, 0xE0, 0x12, 0x0E, 0x17, 0x75, 0x83, 0x82,
+ 0xEF, 0xF0, 0x90, 0x07, 0x05, 0xE0, 0xFF, 0xED,
+/*0D90*/0x44, 0x07, 0xF5, 0x82, 0x75, 0x83, 0xB4, 0xEF,
+ 0x12, 0x0E, 0x03, 0x75, 0x83, 0x80, 0xE0, 0x54,
+/*0DA0*/0xBF, 0xF0, 0x30, 0x37, 0x0A, 0x12, 0x0E, 0x91,
+ 0x75, 0x83, 0x94, 0xE0, 0x44, 0x80, 0xF0, 0x30,
+/*0DB0*/0x38, 0x0A, 0x12, 0x0E, 0x91, 0x75, 0x83, 0x92,
+ 0xE0, 0x44, 0x80, 0xF0, 0xE5, 0x28, 0x30, 0xE4,
+/*0DC0*/0x1A, 0x20, 0x39, 0x0A, 0x12, 0x0E, 0x04, 0x75,
+ 0x83, 0x88, 0xE0, 0x54, 0x7F, 0xF0, 0x20, 0x3A,
+/*0DD0*/0x0A, 0x12, 0x0E, 0x04, 0x75, 0x83, 0x88, 0xE0,
+ 0x54, 0xBF, 0xF0, 0x74, 0x8C, 0xFE, 0x12, 0x0E,
+/*0DE0*/0x04, 0x8E, 0x83, 0xE0, 0x54, 0x0F, 0x12, 0x0E,
+ 0x03, 0x75, 0x83, 0x86, 0xE0, 0x54, 0xBF, 0xF0,
+/*0DF0*/0xE5, 0x08, 0x44, 0x06, 0x12, 0x0D, 0xFD, 0x75,
+ 0x83, 0x8A, 0xE4, 0xF0, 0x22, 0xF5, 0x82, 0x75,
+/*0E00*/0x83, 0x82, 0xE4, 0xF0, 0xE5, 0x08, 0x44, 0x07,
+ 0xF5, 0x82, 0x22, 0x8E, 0x83, 0xE0, 0xF5, 0x10,
+/*0E10*/0x54, 0xFE, 0xF0, 0xE5, 0x10, 0x44, 0x01, 0xFF,
+ 0xE5, 0x08, 0xFD, 0xED, 0x44, 0x07, 0xF5, 0x82,
+/*0E20*/0x22, 0xE5, 0x15, 0xC4, 0x54, 0x07, 0xFF, 0xE5,
+ 0x08, 0xFD, 0xED, 0x44, 0x08, 0xF5, 0x82, 0x75,
+/*0E30*/0x83, 0x82, 0x22, 0x75, 0x83, 0x80, 0xE0, 0x44,
+ 0x40, 0xF0, 0xE5, 0x08, 0x44, 0x08, 0xF5, 0x82,
+/*0E40*/0x75, 0x83, 0x8A, 0x22, 0xE5, 0x16, 0x25, 0xE0,
+ 0x25, 0xE0, 0x24, 0xAF, 0xF5, 0x82, 0xE4, 0x34,
+/*0E50*/0x1A, 0xF5, 0x83, 0xE4, 0x93, 0xF5, 0x0D, 0x22,
+ 0x43, 0xE1, 0x10, 0x43, 0xE1, 0x80, 0x53, 0xE1,
+/*0E60*/0xFD, 0x85, 0xE1, 0x10, 0x22, 0xE5, 0x16, 0x25,
+ 0xE0, 0x25, 0xE0, 0x24, 0xB2, 0xF5, 0x82, 0xE4,
+/*0E70*/0x34, 0x1A, 0xF5, 0x83, 0xE4, 0x93, 0x22, 0x85,
+ 0x55, 0x82, 0x85, 0x54, 0x83, 0xE5, 0x15, 0xF0,
+/*0E80*/0x22, 0xE5, 0xE2, 0x54, 0x20, 0xD3, 0x94, 0x00,
+ 0x22, 0xE5, 0xE2, 0x54, 0x40, 0xD3, 0x94, 0x00,
+/*0E90*/0x22, 0xE5, 0x08, 0x44, 0x06, 0xF5, 0x82, 0x22,
+ 0xFD, 0xE5, 0x08, 0xFB, 0xEB, 0x44, 0x07, 0xF5,
+/*0EA0*/0x82, 0x22, 0x53, 0xF9, 0xF7, 0x75, 0xFE, 0x30,
+ 0x22, 0xEF, 0x4E, 0x70, 0x26, 0x12, 0x07, 0xCC,
+/*0EB0*/0xE0, 0xFD, 0x90, 0x07, 0x26, 0x12, 0x07, 0x7B,
+ 0x12, 0x07, 0xD8, 0xE0, 0xFD, 0x90, 0x07, 0x28,
+/*0EC0*/0x12, 0x07, 0x7B, 0x12, 0x08, 0x81, 0x12, 0x07,
+ 0x72, 0x12, 0x08, 0x35, 0xE0, 0x90, 0x07, 0x24,
+/*0ED0*/0x12, 0x07, 0x78, 0xEF, 0x64, 0x04, 0x4E, 0x70,
+ 0x29, 0x12, 0x07, 0xE4, 0xE0, 0xFD, 0x90, 0x07,
+/*0EE0*/0x26, 0x12, 0x07, 0x7B, 0x12, 0x07, 0xF0, 0xE0,
+ 0xFD, 0x90, 0x07, 0x28, 0x12, 0x07, 0x7B, 0x12,
+/*0EF0*/0x08, 0x8B, 0x12, 0x07, 0x72, 0x12, 0x08, 0x41,
+ 0xE0, 0x54, 0x1F, 0xFD, 0x90, 0x07, 0x24, 0x12,
+/*0F00*/0x07, 0x7B, 0xEF, 0x64, 0x01, 0x4E, 0x70, 0x04,
+ 0x7D, 0x01, 0x80, 0x02, 0x7D, 0x00, 0xEF, 0x64,
+/*0F10*/0x02, 0x4E, 0x70, 0x04, 0x7F, 0x01, 0x80, 0x02,
+ 0x7F, 0x00, 0xEF, 0x4D, 0x60, 0x35, 0x12, 0x07,
+/*0F20*/0xFC, 0xE0, 0xFF, 0x90, 0x07, 0x26, 0x12, 0x07,
+ 0x89, 0xEF, 0xF0, 0x12, 0x08, 0x08, 0xE0, 0xFF,
+/*0F30*/0x90, 0x07, 0x28, 0x12, 0x07, 0x89, 0xEF, 0xF0,
+ 0x12, 0x08, 0x4D, 0xE0, 0x54, 0x1F, 0xFF, 0x12,
+/*0F40*/0x07, 0x86, 0xEF, 0xF0, 0x12, 0x08, 0x59, 0xE0,
+ 0x54, 0x1F, 0xFF, 0x90, 0x07, 0x24, 0x12, 0x07,
+/*0F50*/0x89, 0xEF, 0xF0, 0x22, 0xE4, 0xF5, 0x53, 0x12,
+ 0x0E, 0x81, 0x40, 0x04, 0x7F, 0x01, 0x80, 0x02,
+/*0F60*/0x7F, 0x00, 0x12, 0x0E, 0x89, 0x40, 0x04, 0x7E,
+ 0x01, 0x80, 0x02, 0x7E, 0x00, 0xEE, 0x4F, 0x70,
+/*0F70*/0x03, 0x02, 0x0F, 0xF6, 0x85, 0xE1, 0x10, 0x43,
+ 0xE1, 0x02, 0x53, 0xE1, 0x0F, 0x85, 0xE1, 0x10,
+/*0F80*/0xE4, 0xF5, 0x51, 0xE5, 0xE3, 0x54, 0x3F, 0xF5,
+ 0x52, 0x12, 0x0E, 0x89, 0x40, 0x1D, 0xAD, 0x52,
+/*0F90*/0xAF, 0x51, 0x12, 0x11, 0x18, 0xEF, 0x60, 0x08,
+ 0x85, 0xE1, 0x10, 0x43, 0xE1, 0x40, 0x80, 0x0B,
+/*0FA0*/0x53, 0xE1, 0xBF, 0x12, 0x0E, 0x58, 0x12, 0x00,
+ 0x06, 0x80, 0xFB, 0xE5, 0xE3, 0x54, 0x3F, 0xF5,
+/*0FB0*/0x51, 0xE5, 0xE4, 0x54, 0x3F, 0xF5, 0x52, 0x12,
+ 0x0E, 0x81, 0x40, 0x1D, 0xAD, 0x52, 0xAF, 0x51,
+/*0FC0*/0x12, 0x11, 0x18, 0xEF, 0x60, 0x08, 0x85, 0xE1,
+ 0x10, 0x43, 0xE1, 0x20, 0x80, 0x0B, 0x53, 0xE1,
+/*0FD0*/0xDF, 0x12, 0x0E, 0x58, 0x12, 0x00, 0x06, 0x80,
+ 0xFB, 0x12, 0x0E, 0x81, 0x40, 0x04, 0x7F, 0x01,
+/*0FE0*/0x80, 0x02, 0x7F, 0x00, 0x12, 0x0E, 0x89, 0x40,
+ 0x04, 0x7E, 0x01, 0x80, 0x02, 0x7E, 0x00, 0xEE,
+/*0FF0*/0x4F, 0x60, 0x03, 0x12, 0x0E, 0x5B, 0x22, 0x12,
+ 0x0E, 0x21, 0xEF, 0xF0, 0x12, 0x10, 0x91, 0x22,
+/*1000*/0x02, 0x11, 0x00, 0x02, 0x10, 0x40, 0x02, 0x10,
+ 0x90, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+/*1010*/0x01, 0x20, 0x01, 0x20, 0xE4, 0xF5, 0x57, 0x12,
+ 0x16, 0xBD, 0x12, 0x16, 0x44, 0xE4, 0x12, 0x10,
+/*1020*/0x56, 0x12, 0x14, 0xB7, 0x90, 0x07, 0x26, 0x12,
+ 0x07, 0x35, 0xE4, 0x12, 0x07, 0x31, 0xE4, 0xF0,
+/*1030*/0x12, 0x10, 0x56, 0x12, 0x14, 0xB7, 0x90, 0x07,
+ 0x26, 0x12, 0x07, 0x35, 0xE5, 0x41, 0x12, 0x07,
+/*1040*/0x31, 0xE5, 0x40, 0xF0, 0xAF, 0x57, 0x7E, 0x00,
+ 0xAD, 0x56, 0x7C, 0x00, 0x12, 0x04, 0x44, 0xAF,
+/*1050*/0x56, 0x7E, 0x00, 0x02, 0x11, 0xEE, 0xFF, 0x90,
+ 0x07, 0x20, 0xA3, 0xE0, 0xFD, 0xE4, 0xF5, 0x56,
+/*1060*/0xF5, 0x40, 0xFE, 0xFC, 0xAB, 0x56, 0xFA, 0x12,
+ 0x11, 0x51, 0x7F, 0x0F, 0x7D, 0x18, 0xE4, 0xF5,
+/*1070*/0x56, 0xF5, 0x40, 0xFE, 0xFC, 0xAB, 0x56, 0xFA,
+ 0x12, 0x15, 0x41, 0xAF, 0x56, 0x7E, 0x00, 0x12,
+/*1080*/0x1A, 0xFF, 0xE4, 0xFF, 0xF5, 0x56, 0x7D, 0x1F,
+ 0xF5, 0x40, 0xFE, 0xFC, 0xAB, 0x56, 0xFA, 0x22,
+/*1090*/0x22, 0xE4, 0xF5, 0x55, 0xE5, 0x08, 0xFD, 0x74,
+ 0xA0, 0xF5, 0x56, 0xED, 0x44, 0x07, 0xF5, 0x57,
+/*10A0*/0xE5, 0x28, 0x30, 0xE5, 0x03, 0xD3, 0x80, 0x01,
+ 0xC3, 0x40, 0x05, 0x7F, 0x28, 0xEF, 0x80, 0x04,
+/*10B0*/0x7F, 0x14, 0xEF, 0xC3, 0x13, 0xF5, 0x54, 0xE4,
+ 0xF9, 0x12, 0x0E, 0x18, 0x75, 0x83, 0x8E, 0xE0,
+/*10C0*/0xF5, 0x10, 0xCE, 0xEF, 0xCE, 0xEE, 0xD3, 0x94,
+ 0x00, 0x40, 0x26, 0xE5, 0x10, 0x54, 0xFE, 0x12,
+/*10D0*/0x0E, 0x98, 0x75, 0x83, 0x8E, 0xED, 0xF0, 0xE5,
+ 0x10, 0x44, 0x01, 0xFD, 0xEB, 0x44, 0x07, 0xF5,
+/*10E0*/0x82, 0xED, 0xF0, 0x85, 0x57, 0x82, 0x85, 0x56,
+ 0x83, 0xE0, 0x30, 0xE3, 0x01, 0x09, 0x1E, 0x80,
+/*10F0*/0xD4, 0xC2, 0x34, 0xE9, 0xC3, 0x95, 0x54, 0x40,
+ 0x02, 0xD2, 0x34, 0x22, 0x02, 0x00, 0x06, 0x22,
+/*1100*/0x30, 0x30, 0x11, 0x90, 0x10, 0x00, 0xE4, 0x93,
+ 0xF5, 0x10, 0x90, 0x10, 0x10, 0xE4, 0x93, 0xF5,
+/*1110*/0x10, 0x12, 0x10, 0x90, 0x12, 0x11, 0x50, 0x22,
+ 0xE4, 0xFC, 0xC3, 0xED, 0x9F, 0xFA, 0xEF, 0xF5,
+/*1120*/0x83, 0x75, 0x82, 0x00, 0x79, 0xFF, 0xE4, 0x93,
+ 0xCC, 0x6C, 0xCC, 0xA3, 0xD9, 0xF8, 0xDA, 0xF6,
+/*1130*/0xE5, 0xE2, 0x30, 0xE4, 0x02, 0x8C, 0xE5, 0xED,
+ 0x24, 0xFF, 0xFF, 0xEF, 0x75, 0x82, 0xFF, 0xF5,
+/*1140*/0x83, 0xE4, 0x93, 0x6C, 0x70, 0x03, 0x7F, 0x01,
+ 0x22, 0x7F, 0x00, 0x22, 0x22, 0x11, 0x00, 0x00,
+/*1150*/0x22, 0x8E, 0x58, 0x8F, 0x59, 0x8C, 0x5A, 0x8D,
+ 0x5B, 0x8A, 0x5C, 0x8B, 0x5D, 0x75, 0x5E, 0x01,
+/*1160*/0xE4, 0xF5, 0x5F, 0xF5, 0x60, 0xF5, 0x62, 0x12,
+ 0x07, 0x2A, 0x75, 0x83, 0xD0, 0xE0, 0xFF, 0xC4,
+/*1170*/0x54, 0x0F, 0xF5, 0x61, 0x12, 0x1E, 0xA5, 0x85,
+ 0x59, 0x5E, 0xD3, 0xE5, 0x5E, 0x95, 0x5B, 0xE5,
+/*1180*/0x5A, 0x12, 0x07, 0x6B, 0x50, 0x4B, 0x12, 0x07,
+ 0x03, 0x75, 0x83, 0xBC, 0xE0, 0x45, 0x5E, 0x12,
+/*1190*/0x07, 0x29, 0x75, 0x83, 0xBE, 0xE0, 0x45, 0x5E,
+ 0x12, 0x07, 0x29, 0x75, 0x83, 0xC0, 0xE0, 0x45,
+/*11A0*/0x5E, 0xF0, 0xAF, 0x5F, 0xE5, 0x60, 0x12, 0x08,
+ 0x78, 0x12, 0x0A, 0xFF, 0xAF, 0x62, 0x7E, 0x00,
+/*11B0*/0xAD, 0x5D, 0xAC, 0x5C, 0x12, 0x04, 0x44, 0xE5,
+ 0x61, 0xAF, 0x5E, 0x7E, 0x00, 0xB4, 0x03, 0x05,
+/*11C0*/0x12, 0x1E, 0x21, 0x80, 0x07, 0xAD, 0x5D, 0xAC,
+ 0x5C, 0x12, 0x13, 0x17, 0x05, 0x5E, 0x02, 0x11,
+/*11D0*/0x7A, 0x12, 0x07, 0x03, 0x75, 0x83, 0xBC, 0xE0,
+ 0x45, 0x40, 0x12, 0x07, 0x29, 0x75, 0x83, 0xBE,
+/*11E0*/0xE0, 0x45, 0x40, 0x12, 0x07, 0x29, 0x75, 0x83,
+ 0xC0, 0xE0, 0x45, 0x40, 0xF0, 0x22, 0x8E, 0x58,
+/*11F0*/0x8F, 0x59, 0x75, 0x5A, 0x01, 0x79, 0x01, 0x75,
+ 0x5B, 0x01, 0xE4, 0xFB, 0x12, 0x07, 0x2A, 0x75,
+/*1200*/0x83, 0xAE, 0xE0, 0x54, 0x1A, 0xFF, 0x12, 0x08,
+ 0x65, 0xE0, 0xC4, 0x13, 0x54, 0x07, 0xFE, 0xEF,
+/*1210*/0x70, 0x0C, 0xEE, 0x65, 0x35, 0x70, 0x07, 0x90,
+ 0x07, 0x2F, 0xE0, 0xB4, 0x01, 0x0D, 0xAF, 0x35,
+/*1220*/0x7E, 0x00, 0x12, 0x0E, 0xA9, 0xCF, 0xEB, 0xCF,
+ 0x02, 0x1E, 0x60, 0xE5, 0x59, 0x64, 0x02, 0x45,
+/*1230*/0x58, 0x70, 0x04, 0x7F, 0x01, 0x80, 0x02, 0x7F,
+ 0x00, 0xE5, 0x59, 0x45, 0x58, 0x70, 0x04, 0x7E,
+/*1240*/0x01, 0x80, 0x02, 0x7E, 0x00, 0xEE, 0x4F, 0x60,
+ 0x23, 0x85, 0x41, 0x49, 0x85, 0x40, 0x4B, 0xE5,
+/*1250*/0x59, 0x45, 0x58, 0x70, 0x2C, 0xAF, 0x5A, 0xFE,
+ 0xCD, 0xE9, 0xCD, 0xFC, 0xAB, 0x59, 0xAA, 0x58,
+/*1260*/0x12, 0x0A, 0xFF, 0xAF, 0x5B, 0x7E, 0x00, 0x12,
+ 0x1E, 0x60, 0x80, 0x15, 0xAF, 0x5B, 0x7E, 0x00,
+/*1270*/0x12, 0x1E, 0x60, 0x90, 0x07, 0x26, 0x12, 0x07,
+ 0x35, 0xE5, 0x49, 0x12, 0x07, 0x31, 0xE5, 0x4B,
+/*1280*/0xF0, 0xE4, 0xFD, 0xAF, 0x35, 0xFE, 0xFC, 0x12,
+ 0x09, 0x15, 0x22, 0x8C, 0x64, 0x8D, 0x65, 0x12,
+/*1290*/0x08, 0xDA, 0x40, 0x3C, 0xE5, 0x65, 0x45, 0x64,
+ 0x70, 0x10, 0x12, 0x09, 0x04, 0xC3, 0xE5, 0x3E,
+/*12A0*/0x12, 0x07, 0x69, 0x40, 0x3B, 0x12, 0x08, 0x95,
+ 0x80, 0x18, 0xE5, 0x3E, 0xC3, 0x95, 0x38, 0x40,
+/*12B0*/0x1D, 0x85, 0x3E, 0x38, 0xE5, 0x3E, 0x60, 0x05,
+ 0x85, 0x3F, 0x39, 0x80, 0x03, 0x85, 0x39, 0x39,
+/*12C0*/0x8F, 0x3A, 0x12, 0x07, 0xA8, 0xE5, 0x3E, 0x12,
+ 0x07, 0x53, 0xE5, 0x3F, 0xF0, 0x22, 0x80, 0x3B,
+/*12D0*/0xE5, 0x65, 0x45, 0x64, 0x70, 0x11, 0x12, 0x07,
+ 0x5F, 0x40, 0x05, 0x12, 0x08, 0x9E, 0x80, 0x1F,
+/*12E0*/0x12, 0x07, 0x3E, 0xE5, 0x41, 0xF0, 0x22, 0xE5,
+ 0x3C, 0xC3, 0x95, 0x38, 0x40, 0x1D, 0x85, 0x3C,
+/*12F0*/0x38, 0xE5, 0x3C, 0x60, 0x05, 0x85, 0x3D, 0x39,
+ 0x80, 0x03, 0x85, 0x39, 0x39, 0x8F, 0x3A, 0x12,
+/*1300*/0x07, 0xA8, 0xE5, 0x3C, 0x12, 0x07, 0x53, 0xE5,
+ 0x3D, 0xF0, 0x22, 0x12, 0x07, 0x9F, 0xE5, 0x38,
+/*1310*/0x12, 0x07, 0x53, 0xE5, 0x39, 0xF0, 0x22, 0x8C,
+ 0x63, 0x8D, 0x64, 0x12, 0x08, 0xDA, 0x40, 0x3C,
+/*1320*/0xE5, 0x64, 0x45, 0x63, 0x70, 0x10, 0x12, 0x09,
+ 0x04, 0xC3, 0xE5, 0x3E, 0x12, 0x07, 0x69, 0x40,
+/*1330*/0x3B, 0x12, 0x08, 0x95, 0x80, 0x18, 0xE5, 0x3E,
+ 0xC3, 0x95, 0x38, 0x40, 0x1D, 0x85, 0x3E, 0x38,
+/*1340*/0xE5, 0x3E, 0x60, 0x05, 0x85, 0x3F, 0x39, 0x80,
+ 0x03, 0x85, 0x39, 0x39, 0x8F, 0x3A, 0x12, 0x07,
+/*1350*/0xA8, 0xE5, 0x3E, 0x12, 0x07, 0x53, 0xE5, 0x3F,
+ 0xF0, 0x22, 0x80, 0x3B, 0xE5, 0x64, 0x45, 0x63,
+/*1360*/0x70, 0x11, 0x12, 0x07, 0x5F, 0x40, 0x05, 0x12,
+ 0x08, 0x9E, 0x80, 0x1F, 0x12, 0x07, 0x3E, 0xE5,
+/*1370*/0x41, 0xF0, 0x22, 0xE5, 0x3C, 0xC3, 0x95, 0x38,
+ 0x40, 0x1D, 0x85, 0x3C, 0x38, 0xE5, 0x3C, 0x60,
+/*1380*/0x05, 0x85, 0x3D, 0x39, 0x80, 0x03, 0x85, 0x39,
+ 0x39, 0x8F, 0x3A, 0x12, 0x07, 0xA8, 0xE5, 0x3C,
+/*1390*/0x12, 0x07, 0x53, 0xE5, 0x3D, 0xF0, 0x22, 0x12,
+ 0x07, 0x9F, 0xE5, 0x38, 0x12, 0x07, 0x53, 0xE5,
+/*13A0*/0x39, 0xF0, 0x22, 0xE5, 0x0D, 0xFE, 0xE5, 0x08,
+ 0x8E, 0x54, 0x44, 0x05, 0xF5, 0x55, 0x75, 0x15,
+/*13B0*/0x0F, 0xF5, 0x82, 0x12, 0x0E, 0x7A, 0x12, 0x17,
+ 0xA3, 0x20, 0x31, 0x05, 0x75, 0x15, 0x03, 0x80,
+/*13C0*/0x03, 0x75, 0x15, 0x0B, 0xE5, 0x0A, 0xC3, 0x94,
+ 0x01, 0x50, 0x38, 0x12, 0x14, 0x20, 0x20, 0x31,
+/*13D0*/0x06, 0x05, 0x15, 0x05, 0x15, 0x80, 0x04, 0x15,
+ 0x15, 0x15, 0x15, 0xE5, 0x0A, 0xC3, 0x94, 0x01,
+/*13E0*/0x50, 0x21, 0x12, 0x14, 0x20, 0x20, 0x31, 0x04,
+ 0x05, 0x15, 0x80, 0x02, 0x15, 0x15, 0xE5, 0x0A,
+/*13F0*/0xC3, 0x94, 0x01, 0x50, 0x0E, 0x12, 0x0E, 0x77,
+ 0x12, 0x17, 0xA3, 0x20, 0x31, 0x05, 0x05, 0x15,
+/*1400*/0x12, 0x0E, 0x77, 0xE5, 0x15, 0xB4, 0x08, 0x04,
+ 0x7F, 0x01, 0x80, 0x02, 0x7F, 0x00, 0xE5, 0x15,
+/*1410*/0xB4, 0x07, 0x04, 0x7E, 0x01, 0x80, 0x02, 0x7E,
+ 0x00, 0xEE, 0x4F, 0x60, 0x02, 0x05, 0x7F, 0x22,
+/*1420*/0x85, 0x55, 0x82, 0x85, 0x54, 0x83, 0xE5, 0x15,
+ 0xF0, 0x12, 0x17, 0xA3, 0x22, 0x12, 0x07, 0x2A,
+/*1430*/0x75, 0x83, 0xAE, 0x74, 0xFF, 0x12, 0x07, 0x29,
+ 0xE0, 0x54, 0x1A, 0xF5, 0x34, 0xE0, 0xC4, 0x13,
+/*1440*/0x54, 0x07, 0xF5, 0x35, 0x24, 0xFE, 0x60, 0x24,
+ 0x24, 0xFE, 0x60, 0x3C, 0x24, 0x04, 0x70, 0x63,
+/*1450*/0x75, 0x31, 0x2D, 0xE5, 0x08, 0xFD, 0x74, 0xB6,
+ 0x12, 0x07, 0x92, 0x74, 0xBC, 0x90, 0x07, 0x22,
+/*1460*/0x12, 0x07, 0x95, 0x74, 0x90, 0x12, 0x07, 0xB3,
+ 0x74, 0x92, 0x80, 0x3C, 0x75, 0x31, 0x3A, 0xE5,
+/*1470*/0x08, 0xFD, 0x74, 0xBA, 0x12, 0x07, 0x92, 0x74,
+ 0xC0, 0x90, 0x07, 0x22, 0x12, 0x07, 0xB6, 0x74,
+/*1480*/0xC4, 0x12, 0x07, 0xB3, 0x74, 0xC8, 0x80, 0x20,
+ 0x75, 0x31, 0x35, 0xE5, 0x08, 0xFD, 0x74, 0xB8,
+/*1490*/0x12, 0x07, 0x92, 0x74, 0xBE, 0xFF, 0xED, 0x44,
+ 0x07, 0x90, 0x07, 0x22, 0xCF, 0xF0, 0xA3, 0xEF,
+/*14A0*/0xF0, 0x74, 0xC2, 0x12, 0x07, 0xB3, 0x74, 0xC6,
+ 0xFF, 0xED, 0x44, 0x07, 0xA3, 0xCF, 0xF0, 0xA3,
+/*14B0*/0xEF, 0xF0, 0x22, 0x75, 0x34, 0x01, 0x22, 0x8E,
+ 0x58, 0x8F, 0x59, 0x8C, 0x5A, 0x8D, 0x5B, 0x8A,
+/*14C0*/0x5C, 0x8B, 0x5D, 0x75, 0x5E, 0x01, 0xE4, 0xF5,
+ 0x5F, 0x12, 0x1E, 0xA5, 0x85, 0x59, 0x5E, 0xD3,
+/*14D0*/0xE5, 0x5E, 0x95, 0x5B, 0xE5, 0x5A, 0x12, 0x07,
+ 0x6B, 0x50, 0x57, 0xE5, 0x5D, 0x45, 0x5C, 0x70,
+/*14E0*/0x30, 0x12, 0x07, 0x2A, 0x75, 0x83, 0x92, 0xE5,
+ 0x5E, 0x12, 0x07, 0x29, 0x75, 0x83, 0xC6, 0xE5,
+/*14F0*/0x5E, 0x12, 0x07, 0x29, 0x75, 0x83, 0xC8, 0xE5,
+ 0x5E, 0x12, 0x07, 0x29, 0x75, 0x83, 0x90, 0xE5,
+/*1500*/0x5E, 0x12, 0x07, 0x29, 0x75, 0x83, 0xC2, 0xE5,
+ 0x5E, 0x12, 0x07, 0x29, 0x75, 0x83, 0xC4, 0x80,
+/*1510*/0x03, 0x12, 0x07, 0x32, 0xE5, 0x5E, 0xF0, 0xAF,
+ 0x5F, 0x7E, 0x00, 0xAD, 0x5D, 0xAC, 0x5C, 0x12,
+/*1520*/0x04, 0x44, 0xAF, 0x5E, 0x7E, 0x00, 0xAD, 0x5D,
+ 0xAC, 0x5C, 0x12, 0x0B, 0xD1, 0x05, 0x5E, 0x02,
+/*1530*/0x14, 0xCF, 0xAB, 0x5D, 0xAA, 0x5C, 0xAD, 0x5B,
+ 0xAC, 0x5A, 0xAF, 0x59, 0xAE, 0x58, 0x02, 0x1B,
+/*1540*/0xFB, 0x8C, 0x5C, 0x8D, 0x5D, 0x8A, 0x5E, 0x8B,
+ 0x5F, 0x75, 0x60, 0x01, 0xE4, 0xF5, 0x61, 0xF5,
+/*1550*/0x62, 0xF5, 0x63, 0x12, 0x1E, 0xA5, 0x8F, 0x60,
+ 0xD3, 0xE5, 0x60, 0x95, 0x5D, 0xE5, 0x5C, 0x12,
+/*1560*/0x07, 0x6B, 0x50, 0x61, 0xE5, 0x5F, 0x45, 0x5E,
+ 0x70, 0x27, 0x12, 0x07, 0x2A, 0x75, 0x83, 0xB6,
+/*1570*/0xE5, 0x60, 0x12, 0x07, 0x29, 0x75, 0x83, 0xB8,
+ 0xE5, 0x60, 0x12, 0x07, 0x29, 0x75, 0x83, 0xBA,
+/*1580*/0xE5, 0x60, 0xF0, 0xAF, 0x61, 0x7E, 0x00, 0xE5,
+ 0x62, 0x12, 0x08, 0x7A, 0x12, 0x0A, 0xFF, 0x80,
+/*1590*/0x19, 0x90, 0x07, 0x24, 0x12, 0x07, 0x35, 0xE5,
+ 0x60, 0x12, 0x07, 0x29, 0x75, 0x83, 0x8E, 0xE4,
+/*15A0*/0x12, 0x07, 0x29, 0x74, 0x01, 0x12, 0x07, 0x29,
+ 0xE4, 0xF0, 0xAF, 0x63, 0x7E, 0x00, 0xAD, 0x5F,
+/*15B0*/0xAC, 0x5E, 0x12, 0x04, 0x44, 0xAF, 0x60, 0x7E,
+ 0x00, 0xAD, 0x5F, 0xAC, 0x5E, 0x12, 0x12, 0x8B,
+/*15C0*/0x05, 0x60, 0x02, 0x15, 0x58, 0x22, 0x90, 0x11,
+ 0x4D, 0xE4, 0x93, 0x90, 0x07, 0x2E, 0xF0, 0x12,
+/*15D0*/0x08, 0x1F, 0x75, 0x83, 0xAE, 0xE0, 0x54, 0x1A,
+ 0xF5, 0x34, 0x70, 0x67, 0xEF, 0x44, 0x07, 0xF5,
+/*15E0*/0x82, 0x75, 0x83, 0xCE, 0xE0, 0xFF, 0x13, 0x13,
+ 0x13, 0x54, 0x07, 0xF5, 0x36, 0x54, 0x0F, 0xD3,
+/*15F0*/0x94, 0x00, 0x40, 0x06, 0x12, 0x14, 0x2D, 0x12,
+ 0x1B, 0xA9, 0xE5, 0x36, 0x54, 0x0F, 0x24, 0xFE,
+/*1600*/0x60, 0x0C, 0x14, 0x60, 0x0C, 0x14, 0x60, 0x19,
+ 0x24, 0x03, 0x70, 0x37, 0x80, 0x10, 0x02, 0x1E,
+/*1610*/0x91, 0x12, 0x1E, 0x91, 0x12, 0x07, 0x2A, 0x75,
+ 0x83, 0xCE, 0xE0, 0x54, 0xEF, 0xF0, 0x02, 0x1D,
+/*1620*/0xAE, 0x12, 0x10, 0x14, 0xE4, 0xF5, 0x55, 0x12,
+ 0x1D, 0x85, 0x05, 0x55, 0xE5, 0x55, 0xC3, 0x94,
+/*1630*/0x05, 0x40, 0xF4, 0x12, 0x07, 0x2A, 0x75, 0x83,
+ 0xCE, 0xE0, 0x54, 0xC7, 0x12, 0x07, 0x29, 0xE0,
+/*1640*/0x44, 0x08, 0xF0, 0x22, 0xE4, 0xF5, 0x58, 0xF5,
+ 0x59, 0xAF, 0x08, 0xEF, 0x44, 0x07, 0xF5, 0x82,
+/*1650*/0x75, 0x83, 0xD0, 0xE0, 0xFD, 0xC4, 0x54, 0x0F,
+ 0xF5, 0x5A, 0xEF, 0x44, 0x07, 0xF5, 0x82, 0x75,
+/*1660*/0x83, 0x80, 0x74, 0x01, 0xF0, 0x12, 0x08, 0x21,
+ 0x75, 0x83, 0x82, 0xE5, 0x45, 0xF0, 0xEF, 0x44,
+/*1670*/0x07, 0xF5, 0x82, 0x75, 0x83, 0x8A, 0x74, 0xFF,
+ 0xF0, 0x12, 0x1A, 0x4D, 0x12, 0x07, 0x2A, 0x75,
+/*1680*/0x83, 0xBC, 0xE0, 0x54, 0xEF, 0x12, 0x07, 0x29,
+ 0x75, 0x83, 0xBE, 0xE0, 0x54, 0xEF, 0x12, 0x07,
+/*1690*/0x29, 0x75, 0x83, 0xC0, 0xE0, 0x54, 0xEF, 0x12,
+ 0x07, 0x29, 0x75, 0x83, 0xBC, 0xE0, 0x44, 0x10,
+/*16A0*/0x12, 0x07, 0x29, 0x75, 0x83, 0xBE, 0xE0, 0x44,
+ 0x10, 0x12, 0x07, 0x29, 0x75, 0x83, 0xC0, 0xE0,
+/*16B0*/0x44, 0x10, 0xF0, 0xAF, 0x58, 0xE5, 0x59, 0x12,
+ 0x08, 0x78, 0x02, 0x0A, 0xFF, 0xE4, 0xF5, 0x58,
+/*16C0*/0x7D, 0x01, 0xF5, 0x59, 0xAF, 0x35, 0xFE, 0xFC,
+ 0x12, 0x09, 0x15, 0x12, 0x07, 0x2A, 0x75, 0x83,
+/*16D0*/0xB6, 0x74, 0x10, 0x12, 0x07, 0x29, 0x75, 0x83,
+ 0xB8, 0x74, 0x10, 0x12, 0x07, 0x29, 0x75, 0x83,
+/*16E0*/0xBA, 0x74, 0x10, 0x12, 0x07, 0x29, 0x75, 0x83,
+ 0xBC, 0x74, 0x10, 0x12, 0x07, 0x29, 0x75, 0x83,
+/*16F0*/0xBE, 0x74, 0x10, 0x12, 0x07, 0x29, 0x75, 0x83,
+ 0xC0, 0x74, 0x10, 0x12, 0x07, 0x29, 0x75, 0x83,
+/*1700*/0x90, 0xE4, 0x12, 0x07, 0x29, 0x75, 0x83, 0xC2,
+ 0xE4, 0x12, 0x07, 0x29, 0x75, 0x83, 0xC4, 0xE4,
+/*1710*/0x12, 0x07, 0x29, 0x75, 0x83, 0x92, 0xE4, 0x12,
+ 0x07, 0x29, 0x75, 0x83, 0xC6, 0xE4, 0x12, 0x07,
+/*1720*/0x29, 0x75, 0x83, 0xC8, 0xE4, 0xF0, 0xAF, 0x58,
+ 0xFE, 0xE5, 0x59, 0x12, 0x08, 0x7A, 0x02, 0x0A,
+/*1730*/0xFF, 0xE5, 0xE2, 0x30, 0xE4, 0x6C, 0xE5, 0xE7,
+ 0x54, 0xC0, 0x64, 0x40, 0x70, 0x64, 0xE5, 0x09,
+/*1740*/0xC4, 0x54, 0x30, 0xFE, 0xE5, 0x08, 0x25, 0xE0,
+ 0x25, 0xE0, 0x54, 0xC0, 0x4E, 0xFE, 0xEF, 0x54,
+/*1750*/0x3F, 0x4E, 0xFD, 0xE5, 0x2B, 0xAE, 0x2A, 0x78,
+ 0x02, 0xC3, 0x33, 0xCE, 0x33, 0xCE, 0xD8, 0xF9,
+/*1760*/0xF5, 0x82, 0x8E, 0x83, 0xED, 0xF0, 0xE5, 0x2B,
+ 0xAE, 0x2A, 0x78, 0x02, 0xC3, 0x33, 0xCE, 0x33,
+/*1770*/0xCE, 0xD8, 0xF9, 0xFF, 0xF5, 0x82, 0x8E, 0x83,
+ 0xA3, 0xE5, 0xFE, 0xF0, 0x8F, 0x82, 0x8E, 0x83,
+/*1780*/0xA3, 0xA3, 0xE5, 0xFD, 0xF0, 0x8F, 0x82, 0x8E,
+ 0x83, 0xA3, 0xA3, 0xA3, 0xE5, 0xFC, 0xF0, 0xC3,
+/*1790*/0xE5, 0x2B, 0x94, 0xFA, 0xE5, 0x2A, 0x94, 0x00,
+ 0x50, 0x08, 0x05, 0x2B, 0xE5, 0x2B, 0x70, 0x02,
+/*17A0*/0x05, 0x2A, 0x22, 0xE4, 0xFF, 0xE4, 0xF5, 0x58,
+ 0xF5, 0x56, 0xF5, 0x57, 0x74, 0x82, 0xFC, 0x12,
+/*17B0*/0x0E, 0x04, 0x8C, 0x83, 0xE0, 0xF5, 0x10, 0x54,
+ 0x7F, 0xF0, 0xE5, 0x10, 0x44, 0x80, 0x12, 0x0E,
+/*17C0*/0x98, 0xED, 0xF0, 0x7E, 0x0A, 0x12, 0x0E, 0x04,
+ 0x75, 0x83, 0xA0, 0xE0, 0x20, 0xE0, 0x26, 0xDE,
+/*17D0*/0xF4, 0x05, 0x57, 0xE5, 0x57, 0x70, 0x02, 0x05,
+ 0x56, 0xE5, 0x14, 0x24, 0x01, 0xFD, 0xE4, 0x33,
+/*17E0*/0xFC, 0xD3, 0xE5, 0x57, 0x9D, 0xE5, 0x56, 0x9C,
+ 0x40, 0xD9, 0xE5, 0x0A, 0x94, 0x20, 0x50, 0x02,
+/*17F0*/0x05, 0x0A, 0x43, 0xE1, 0x08, 0xC2, 0x31, 0x12,
+ 0x0E, 0x04, 0x75, 0x83, 0xA6, 0xE0, 0x55, 0x12,
+/*1800*/0x65, 0x12, 0x70, 0x03, 0xD2, 0x31, 0x22, 0xC2,
+ 0x31, 0x22, 0x90, 0x07, 0x26, 0xE0, 0xFA, 0xA3,
+/*1810*/0xE0, 0xF5, 0x82, 0x8A, 0x83, 0xE0, 0xF5, 0x41,
+ 0xE5, 0x39, 0xC3, 0x95, 0x41, 0x40, 0x26, 0xE5,
+/*1820*/0x39, 0x95, 0x41, 0xC3, 0x9F, 0xEE, 0x12, 0x07,
+ 0x6B, 0x40, 0x04, 0x7C, 0x01, 0x80, 0x02, 0x7C,
+/*1830*/0x00, 0xE5, 0x41, 0x64, 0x3F, 0x60, 0x04, 0x7B,
+ 0x01, 0x80, 0x02, 0x7B, 0x00, 0xEC, 0x5B, 0x60,
+/*1840*/0x29, 0x05, 0x41, 0x80, 0x28, 0xC3, 0xE5, 0x41,
+ 0x95, 0x39, 0xC3, 0x9F, 0xEE, 0x12, 0x07, 0x6B,
+/*1850*/0x40, 0x04, 0x7F, 0x01, 0x80, 0x02, 0x7F, 0x00,
+ 0xE5, 0x41, 0x60, 0x04, 0x7E, 0x01, 0x80, 0x02,
+/*1860*/0x7E, 0x00, 0xEF, 0x5E, 0x60, 0x04, 0x15, 0x41,
+ 0x80, 0x03, 0x85, 0x39, 0x41, 0x85, 0x3A, 0x40,
+/*1870*/0x22, 0xE5, 0xE2, 0x30, 0xE4, 0x60, 0xE5, 0xE1,
+ 0x30, 0xE2, 0x5B, 0xE5, 0x09, 0x70, 0x04, 0x7F,
+/*1880*/0x01, 0x80, 0x02, 0x7F, 0x00, 0xE5, 0x08, 0x70,
+ 0x04, 0x7E, 0x01, 0x80, 0x02, 0x7E, 0x00, 0xEE,
+/*1890*/0x5F, 0x60, 0x43, 0x53, 0xF9, 0xF8, 0xE5, 0xE2,
+ 0x30, 0xE4, 0x3B, 0xE5, 0xE1, 0x30, 0xE2, 0x2E,
+/*18A0*/0x43, 0xFA, 0x02, 0x53, 0xFA, 0xFB, 0xE4, 0xF5,
+ 0x10, 0x90, 0x94, 0x70, 0xE5, 0x10, 0xF0, 0xE5,
+/*18B0*/0xE1, 0x30, 0xE2, 0xE7, 0x90, 0x94, 0x70, 0xE0,
+ 0x65, 0x10, 0x60, 0x03, 0x43, 0xFA, 0x04, 0x05,
+/*18C0*/0x10, 0x90, 0x94, 0x70, 0xE5, 0x10, 0xF0, 0x70,
+ 0xE6, 0x12, 0x00, 0x06, 0x80, 0xE1, 0x53, 0xFA,
+/*18D0*/0xFD, 0x53, 0xFA, 0xFB, 0x80, 0xC0, 0x22, 0x8F,
+ 0x54, 0x12, 0x00, 0x06, 0xE5, 0xE1, 0x30, 0xE0,
+/*18E0*/0x04, 0x7F, 0x01, 0x80, 0x02, 0x7F, 0x00, 0xE5,
+ 0x7E, 0xD3, 0x94, 0x05, 0x40, 0x04, 0x7E, 0x01,
+/*18F0*/0x80, 0x02, 0x7E, 0x00, 0xEE, 0x4F, 0x60, 0x3D,
+ 0x85, 0x54, 0x11, 0xE5, 0xE2, 0x20, 0xE1, 0x32,
+/*1900*/0x74, 0xCE, 0x12, 0x1A, 0x05, 0x30, 0xE7, 0x04,
+ 0x7D, 0x01, 0x80, 0x02, 0x7D, 0x00, 0x8F, 0x82,
+/*1910*/0x8E, 0x83, 0xE0, 0x30, 0xE6, 0x04, 0x7F, 0x01,
+ 0x80, 0x02, 0x7F, 0x00, 0xEF, 0x5D, 0x70, 0x15,
+/*1920*/0x12, 0x15, 0xC6, 0x74, 0xCE, 0x12, 0x1A, 0x05,
+ 0x30, 0xE6, 0x07, 0xE0, 0x44, 0x80, 0xF0, 0x43,
+/*1930*/0xF9, 0x80, 0x12, 0x18, 0x71, 0x22, 0x12, 0x0E,
+ 0x44, 0xE5, 0x16, 0x25, 0xE0, 0x25, 0xE0, 0x24,
+/*1940*/0xB0, 0xF5, 0x82, 0xE4, 0x34, 0x1A, 0xF5, 0x83,
+ 0xE4, 0x93, 0xF5, 0x0F, 0xE5, 0x16, 0x25, 0xE0,
+/*1950*/0x25, 0xE0, 0x24, 0xB1, 0xF5, 0x82, 0xE4, 0x34,
+ 0x1A, 0xF5, 0x83, 0xE4, 0x93, 0xF5, 0x0E, 0x12,
+/*1960*/0x0E, 0x65, 0xF5, 0x10, 0xE5, 0x0F, 0x54, 0xF0,
+ 0x12, 0x0E, 0x17, 0x75, 0x83, 0x8C, 0xEF, 0xF0,
+/*1970*/0xE5, 0x0F, 0x30, 0xE0, 0x0C, 0x12, 0x0E, 0x04,
+ 0x75, 0x83, 0x86, 0xE0, 0x44, 0x40, 0xF0, 0x80,
+/*1980*/0x0A, 0x12, 0x0E, 0x04, 0x75, 0x83, 0x86, 0xE0,
+ 0x54, 0xBF, 0xF0, 0x12, 0x0E, 0x91, 0x75, 0x83,
+/*1990*/0x82, 0xE5, 0x0E, 0xF0, 0x22, 0x7F, 0x05, 0x12,
+ 0x17, 0x31, 0x12, 0x0E, 0x04, 0x12, 0x0E, 0x33,
+/*19A0*/0x74, 0x02, 0xF0, 0x74, 0x8E, 0xFE, 0x12, 0x0E,
+ 0x04, 0x12, 0x0E, 0x0B, 0xEF, 0xF0, 0x75, 0x15,
+/*19B0*/0x70, 0x12, 0x0F, 0xF7, 0x20, 0x34, 0x05, 0x75,
+ 0x15, 0x10, 0x80, 0x03, 0x75, 0x15, 0x50, 0x12,
+/*19C0*/0x0F, 0xF7, 0x20, 0x34, 0x04, 0x74, 0x10, 0x80,
+ 0x02, 0x74, 0xF0, 0x25, 0x15, 0xF5, 0x15, 0x12,
+/*19D0*/0x0E, 0x21, 0xEF, 0xF0, 0x12, 0x10, 0x91, 0x20,
+ 0x34, 0x17, 0xE5, 0x15, 0x64, 0x30, 0x60, 0x0C,
+/*19E0*/0x74, 0x10, 0x25, 0x15, 0xF5, 0x15, 0xB4, 0x80,
+ 0x03, 0xE4, 0xF5, 0x15, 0x12, 0x0E, 0x21, 0xEF,
+/*19F0*/0xF0, 0x22, 0xF0, 0xE5, 0x0B, 0x25, 0xE0, 0x25,
+ 0xE0, 0x24, 0x82, 0xF5, 0x82, 0xE4, 0x34, 0x07,
+/*1A00*/0xF5, 0x83, 0x22, 0x74, 0x88, 0xFE, 0xE5, 0x08,
+ 0x44, 0x07, 0xFF, 0xF5, 0x82, 0x8E, 0x83, 0xE0,
+/*1A10*/0x22, 0xF0, 0xE5, 0x08, 0x44, 0x07, 0xF5, 0x82,
+ 0x22, 0xF0, 0xE0, 0x54, 0xC0, 0x8F, 0x82, 0x8E,
+/*1A20*/0x83, 0xF0, 0x22, 0xEF, 0x44, 0x07, 0xF5, 0x82,
+ 0x75, 0x83, 0x86, 0xE0, 0x54, 0x10, 0xD3, 0x94,
+/*1A30*/0x00, 0x22, 0xF0, 0x90, 0x07, 0x15, 0xE0, 0x04,
+ 0xF0, 0x22, 0x44, 0x06, 0xF5, 0x82, 0x75, 0x83,
+/*1A40*/0x9E, 0xE0, 0x22, 0xFE, 0xEF, 0x44, 0x07, 0xF5,
+ 0x82, 0x8E, 0x83, 0xE0, 0x22, 0xE4, 0x90, 0x07,
+/*1A50*/0x2A, 0xF0, 0xA3, 0xF0, 0x12, 0x07, 0x2A, 0x75,
+ 0x83, 0x82, 0xE0, 0x54, 0x7F, 0x12, 0x07, 0x29,
+/*1A60*/0xE0, 0x44, 0x80, 0xF0, 0x12, 0x10, 0xFC, 0x12,
+ 0x08, 0x1F, 0x75, 0x83, 0xA0, 0xE0, 0x20, 0xE0,
+/*1A70*/0x1A, 0x90, 0x07, 0x2B, 0xE0, 0x04, 0xF0, 0x70,
+ 0x06, 0x90, 0x07, 0x2A, 0xE0, 0x04, 0xF0, 0x90,
+/*1A80*/0x07, 0x2A, 0xE0, 0xB4, 0x10, 0xE1, 0xA3, 0xE0,
+ 0xB4, 0x00, 0xDC, 0xEE, 0x44, 0xA6, 0xFC, 0xEF,
+/*1A90*/0x44, 0x07, 0xF5, 0x82, 0x8C, 0x83, 0xE0, 0xF5,
+ 0x32, 0xEE, 0x44, 0xA8, 0xFE, 0xEF, 0x44, 0x07,
+/*1AA0*/0xF5, 0x82, 0x8E, 0x83, 0xE0, 0xF5, 0x33, 0x22,
+ 0x01, 0x20, 0x11, 0x00, 0x04, 0x20, 0x00, 0x90,
+/*1AB0*/0x00, 0x20, 0x0F, 0x92, 0x00, 0x21, 0x0F, 0x94,
+ 0x00, 0x22, 0x0F, 0x96, 0x00, 0x23, 0x0F, 0x98,
+/*1AC0*/0x00, 0x24, 0x0F, 0x9A, 0x00, 0x25, 0x0F, 0x9C,
+ 0x00, 0x26, 0x0F, 0x9E, 0x00, 0x27, 0x0F, 0xA0,
+/*1AD0*/0x01, 0x20, 0x01, 0xA2, 0x01, 0x21, 0x01, 0xA4,
+ 0x01, 0x22, 0x01, 0xA6, 0x01, 0x23, 0x01, 0xA8,
+/*1AE0*/0x01, 0x24, 0x01, 0xAA, 0x01, 0x25, 0x01, 0xAC,
+ 0x01, 0x26, 0x01, 0xAE, 0x01, 0x27, 0x01, 0xB0,
+/*1AF0*/0x01, 0x28, 0x01, 0xB4, 0x00, 0x28, 0x0F, 0xB6,
+ 0x40, 0x28, 0x0F, 0xB8, 0x61, 0x28, 0x01, 0xCB,
+/*1B00*/0xEF, 0xCB, 0xCA, 0xEE, 0xCA, 0x7F, 0x01, 0xE4,
+ 0xFD, 0xEB, 0x4A, 0x70, 0x24, 0xE5, 0x08, 0xF5,
+/*1B10*/0x82, 0x74, 0xB6, 0x12, 0x08, 0x29, 0xE5, 0x08,
+ 0xF5, 0x82, 0x74, 0xB8, 0x12, 0x08, 0x29, 0xE5,
+/*1B20*/0x08, 0xF5, 0x82, 0x74, 0xBA, 0x12, 0x08, 0x29,
+ 0x7E, 0x00, 0x7C, 0x00, 0x12, 0x0A, 0xFF, 0x80,
+/*1B30*/0x12, 0x90, 0x07, 0x26, 0x12, 0x07, 0x35, 0xE5,
+ 0x41, 0xF0, 0x90, 0x07, 0x24, 0x12, 0x07, 0x35,
+/*1B40*/0xE5, 0x40, 0xF0, 0x12, 0x07, 0x2A, 0x75, 0x83,
+ 0x8E, 0xE4, 0x12, 0x07, 0x29, 0x74, 0x01, 0x12,
+/*1B50*/0x07, 0x29, 0xE4, 0xF0, 0x22, 0xE4, 0xF5, 0x26,
+ 0xF5, 0x27, 0x53, 0xE1, 0xFE, 0xF5, 0x2A, 0x75,
+/*1B60*/0x2B, 0x01, 0xF5, 0x08, 0x7F, 0x01, 0x12, 0x17,
+ 0x31, 0x30, 0x30, 0x1C, 0x90, 0x1A, 0xA9, 0xE4,
+/*1B70*/0x93, 0xF5, 0x10, 0x90, 0x1F, 0xF9, 0xE4, 0x93,
+ 0xF5, 0x10, 0x90, 0x00, 0x41, 0xE4, 0x93, 0xF5,
+/*1B80*/0x10, 0x90, 0x1E, 0xCA, 0xE4, 0x93, 0xF5, 0x10,
+ 0x7F, 0x02, 0x12, 0x17, 0x31, 0x12, 0x0F, 0x54,
+/*1B90*/0x7F, 0x03, 0x12, 0x17, 0x31, 0x12, 0x00, 0x06,
+ 0xE5, 0xE2, 0x30, 0xE7, 0x09, 0x12, 0x10, 0x00,
+/*1BA0*/0x30, 0x30, 0x03, 0x12, 0x11, 0x00, 0x02, 0x00,
+ 0x47, 0x12, 0x08, 0x1F, 0x75, 0x83, 0xD0, 0xE0,
+/*1BB0*/0xC4, 0x54, 0x0F, 0xFD, 0x75, 0x43, 0x01, 0x75,
+ 0x44, 0xFF, 0x12, 0x08, 0xAA, 0x74, 0x04, 0xF0,
+/*1BC0*/0x75, 0x3B, 0x01, 0xED, 0x14, 0x60, 0x0C, 0x14,
+ 0x60, 0x0B, 0x14, 0x60, 0x0F, 0x24, 0x03, 0x70,
+/*1BD0*/0x0B, 0x80, 0x09, 0x80, 0x00, 0x12, 0x08, 0xA7,
+ 0x04, 0xF0, 0x80, 0x06, 0x12, 0x08, 0xA7, 0x74,
+/*1BE0*/0x04, 0xF0, 0xEE, 0x44, 0x82, 0xFE, 0xEF, 0x44,
+ 0x07, 0xF5, 0x82, 0x8E, 0x83, 0xE5, 0x45, 0x12,
+/*1BF0*/0x08, 0xBE, 0x75, 0x83, 0x82, 0xE5, 0x31, 0xF0,
+ 0x02, 0x11, 0x4C, 0x8E, 0x60, 0x8F, 0x61, 0x12,
+/*1C00*/0x1E, 0xA5, 0xE4, 0xFF, 0xCE, 0xED, 0xCE, 0xEE,
+ 0xD3, 0x95, 0x61, 0xE5, 0x60, 0x12, 0x07, 0x6B,
+/*1C10*/0x40, 0x39, 0x74, 0x20, 0x2E, 0xF5, 0x82, 0xE4,
+ 0x34, 0x03, 0xF5, 0x83, 0xE0, 0x70, 0x03, 0xFF,
+/*1C20*/0x80, 0x26, 0x12, 0x08, 0xE2, 0xFD, 0xC3, 0x9F,
+ 0x40, 0x1E, 0xCF, 0xED, 0xCF, 0xEB, 0x4A, 0x70,
+/*1C30*/0x0B, 0x8D, 0x42, 0x12, 0x08, 0xEE, 0xF5, 0x41,
+ 0x8E, 0x40, 0x80, 0x0C, 0x12, 0x08, 0xE2, 0xF5,
+/*1C40*/0x38, 0x12, 0x08, 0xEE, 0xF5, 0x39, 0x8E, 0x3A,
+ 0x1E, 0x80, 0xBC, 0x22, 0x75, 0x58, 0x01, 0xE5,
+/*1C50*/0x35, 0x70, 0x0C, 0x12, 0x07, 0xCC, 0xE0, 0xF5,
+ 0x4A, 0x12, 0x07, 0xD8, 0xE0, 0xF5, 0x4C, 0xE5,
+/*1C60*/0x35, 0xB4, 0x04, 0x0C, 0x12, 0x07, 0xE4, 0xE0,
+ 0xF5, 0x4A, 0x12, 0x07, 0xF0, 0xE0, 0xF5, 0x4C,
+/*1C70*/0xE5, 0x35, 0xB4, 0x01, 0x04, 0x7F, 0x01, 0x80,
+ 0x02, 0x7F, 0x00, 0xE5, 0x35, 0xB4, 0x02, 0x04,
+/*1C80*/0x7E, 0x01, 0x80, 0x02, 0x7E, 0x00, 0xEE, 0x4F,
+ 0x60, 0x0C, 0x12, 0x07, 0xFC, 0xE0, 0xF5, 0x4A,
+/*1C90*/0x12, 0x08, 0x08, 0xE0, 0xF5, 0x4C, 0x85, 0x41,
+ 0x49, 0x85, 0x40, 0x4B, 0x22, 0x75, 0x5B, 0x01,
+/*1CA0*/0x90, 0x07, 0x24, 0x12, 0x07, 0x35, 0xE0, 0x54,
+ 0x1F, 0xFF, 0xD3, 0x94, 0x02, 0x50, 0x04, 0x8F,
+/*1CB0*/0x58, 0x80, 0x05, 0xEF, 0x24, 0xFE, 0xF5, 0x58,
+ 0xEF, 0xC3, 0x94, 0x18, 0x40, 0x05, 0x75, 0x59,
+/*1CC0*/0x18, 0x80, 0x04, 0xEF, 0x04, 0xF5, 0x59, 0x85,
+ 0x43, 0x5A, 0xAF, 0x58, 0x7E, 0x00, 0xAD, 0x59,
+/*1CD0*/0x7C, 0x00, 0xAB, 0x5B, 0x7A, 0x00, 0x12, 0x15,
+ 0x41, 0xAF, 0x5A, 0x7E, 0x00, 0x12, 0x18, 0x0A,
+/*1CE0*/0xAF, 0x5B, 0x7E, 0x00, 0x02, 0x1A, 0xFF, 0xE5,
+ 0xE2, 0x30, 0xE7, 0x0E, 0x12, 0x10, 0x03, 0xC2,
+/*1CF0*/0x30, 0x30, 0x30, 0x03, 0x12, 0x10, 0xFF, 0x20,
+ 0x33, 0x28, 0xE5, 0xE7, 0x30, 0xE7, 0x05, 0x12,
+/*1D00*/0x0E, 0xA2, 0x80, 0x0D, 0xE5, 0xFE, 0xC3, 0x94,
+ 0x20, 0x50, 0x06, 0x12, 0x0E, 0xA2, 0x43, 0xF9,
+/*1D10*/0x08, 0xE5, 0xF2, 0x30, 0xE7, 0x03, 0x53, 0xF9,
+ 0x7F, 0xE5, 0xF1, 0x54, 0x70, 0xD3, 0x94, 0x00,
+/*1D20*/0x50, 0xD8, 0x22, 0x12, 0x0E, 0x04, 0x75, 0x83,
+ 0x80, 0xE4, 0xF0, 0xE5, 0x08, 0x44, 0x07, 0x12,
+/*1D30*/0x0D, 0xFD, 0x75, 0x83, 0x84, 0x12, 0x0E, 0x02,
+ 0x75, 0x83, 0x86, 0x12, 0x0E, 0x02, 0x75, 0x83,
+/*1D40*/0x8C, 0xE0, 0x54, 0xF3, 0x12, 0x0E, 0x03, 0x75,
+ 0x83, 0x8E, 0x12, 0x0E, 0x02, 0x75, 0x83, 0x94,
+/*1D50*/0xE0, 0x54, 0xFB, 0xF0, 0x22, 0x12, 0x07, 0x2A,
+ 0x75, 0x83, 0x8E, 0xE4, 0x12, 0x07, 0x29, 0x74,
+/*1D60*/0x01, 0x12, 0x07, 0x29, 0xE4, 0x12, 0x08, 0xBE,
+ 0x75, 0x83, 0x8C, 0xE0, 0x44, 0x20, 0x12, 0x08,
+/*1D70*/0xBE, 0xE0, 0x54, 0xDF, 0xF0, 0x74, 0x84, 0x85,
+ 0x08, 0x82, 0xF5, 0x83, 0xE0, 0x54, 0x7F, 0xF0,
+/*1D80*/0xE0, 0x44, 0x80, 0xF0, 0x22, 0x75, 0x56, 0x01,
+ 0xE4, 0xFD, 0xF5, 0x57, 0xAF, 0x35, 0xFE, 0xFC,
+/*1D90*/0x12, 0x09, 0x15, 0x12, 0x1C, 0x9D, 0x12, 0x1E,
+ 0x7A, 0x12, 0x1C, 0x4C, 0xAF, 0x57, 0x7E, 0x00,
+/*1DA0*/0xAD, 0x56, 0x7C, 0x00, 0x12, 0x04, 0x44, 0xAF,
+ 0x56, 0x7E, 0x00, 0x02, 0x11, 0xEE, 0x75, 0x56,
+/*1DB0*/0x01, 0xE4, 0xFD, 0xF5, 0x57, 0xAF, 0x35, 0xFE,
+ 0xFC, 0x12, 0x09, 0x15, 0x12, 0x1C, 0x9D, 0x12,
+/*1DC0*/0x1E, 0x7A, 0x12, 0x1C, 0x4C, 0xAF, 0x57, 0x7E,
+ 0x00, 0xAD, 0x56, 0x7C, 0x00, 0x12, 0x04, 0x44,
+/*1DD0*/0xAF, 0x56, 0x7E, 0x00, 0x02, 0x11, 0xEE, 0xE4,
+ 0xF5, 0x16, 0x12, 0x0E, 0x44, 0xFE, 0xE5, 0x08,
+/*1DE0*/0x44, 0x05, 0xFF, 0x12, 0x0E, 0x65, 0x8F, 0x82,
+ 0x8E, 0x83, 0xF0, 0x05, 0x16, 0xE5, 0x16, 0xC3,
+/*1DF0*/0x94, 0x14, 0x40, 0xE6, 0xE5, 0x08, 0x12, 0x0E,
+ 0x2B, 0xE4, 0xF0, 0x22, 0xE4, 0xF5, 0x58, 0xF5,
+/*1E00*/0x59, 0xF5, 0x5A, 0xFF, 0xFE, 0xAD, 0x58, 0xFC,
+ 0x12, 0x09, 0x15, 0x7F, 0x04, 0x7E, 0x00, 0xAD,
+/*1E10*/0x58, 0x7C, 0x00, 0x12, 0x09, 0x15, 0x7F, 0x02,
+ 0x7E, 0x00, 0xAD, 0x58, 0x7C, 0x00, 0x02, 0x09,
+/*1E20*/0x15, 0xE5, 0x3C, 0x25, 0x3E, 0xFC, 0xE5, 0x42,
+ 0x24, 0x00, 0xFB, 0xE4, 0x33, 0xFA, 0xEC, 0xC3,
+/*1E30*/0x9B, 0xEA, 0x12, 0x07, 0x6B, 0x40, 0x0B, 0x8C,
+ 0x42, 0xE5, 0x3D, 0x25, 0x3F, 0xF5, 0x41, 0x8F,
+/*1E40*/0x40, 0x22, 0x12, 0x09, 0x0B, 0x22, 0x74, 0x84,
+ 0xF5, 0x18, 0x85, 0x08, 0x19, 0x85, 0x19, 0x82,
+/*1E50*/0x85, 0x18, 0x83, 0xE0, 0x54, 0x7F, 0xF0, 0xE0,
+ 0x44, 0x80, 0xF0, 0xE0, 0x44, 0x80, 0xF0, 0x22,
+/*1E60*/0xEF, 0x4E, 0x70, 0x0B, 0x12, 0x07, 0x2A, 0x75,
+ 0x83, 0xD2, 0xE0, 0x54, 0xDF, 0xF0, 0x22, 0x12,
+/*1E70*/0x07, 0x2A, 0x75, 0x83, 0xD2, 0xE0, 0x44, 0x20,
+ 0xF0, 0x22, 0x75, 0x58, 0x01, 0x90, 0x07, 0x26,
+/*1E80*/0x12, 0x07, 0x35, 0xE0, 0x54, 0x3F, 0xF5, 0x41,
+ 0x12, 0x07, 0x32, 0xE0, 0x54, 0x3F, 0xF5, 0x40,
+/*1E90*/0x22, 0x75, 0x56, 0x02, 0xE4, 0xF5, 0x57, 0x12,
+ 0x1D, 0xFC, 0xAF, 0x57, 0x7E, 0x00, 0xAD, 0x56,
+/*1EA0*/0x7C, 0x00, 0x02, 0x04, 0x44, 0xE4, 0xF5, 0x42,
+ 0xF5, 0x41, 0xF5, 0x40, 0xF5, 0x38, 0xF5, 0x39,
+/*1EB0*/0xF5, 0x3A, 0x22, 0xEF, 0x54, 0x07, 0xFF, 0xE5,
+ 0xF9, 0x54, 0xF8, 0x4F, 0xF5, 0xF9, 0x22, 0x7F,
+/*1EC0*/0x01, 0xE4, 0xFE, 0x0F, 0x0E, 0xBE, 0xFF, 0xFB,
+ 0x22, 0x01, 0x20, 0x00, 0x01, 0x04, 0x20, 0x00,
+/*1ED0*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+/*1EE0*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+/*1EF0*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+/*1F00*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+/*1F10*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+/*1F20*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+/*1F30*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+/*1F40*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+/*1F50*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+/*1F60*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+/*1F70*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+/*1F80*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+/*1F90*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+/*1FA0*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+/*1FB0*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+/*1FC0*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+/*1FD0*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+/*1FE0*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+/*1FF0*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x01, 0x20, 0x11, 0x00, 0x04, 0x20, 0x00, 0x81
+};
+
+int ipath_sd7220_ib_load(struct ipath_devdata *dd)
+{
+ return ipath_sd7220_prog_ld(dd, IB_7220_SERDES, ipath_sd7220_ib_img,
+ sizeof(ipath_sd7220_ib_img), 0);
+}
+
+int ipath_sd7220_ib_vfy(struct ipath_devdata *dd)
+{
+ return ipath_sd7220_prog_vfy(dd, IB_7220_SERDES, ipath_sd7220_ib_img,
+ sizeof(ipath_sd7220_ib_img), 0);
+}
diff --git a/drivers/infiniband/hw/ipath/ipath_sdma.c b/drivers/infiniband/hw/ipath/ipath_sdma.c
new file mode 100644
index 0000000..284c9bc
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_sdma.c
@@ -0,0 +1,806 @@
+/*
+ * Copyright (c) 2007, 2008 QLogic Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/spinlock.h>
+
+#include "ipath_kernel.h"
+#include "ipath_verbs.h"
+#include "ipath_common.h"
+
+#define SDMA_DESCQ_SZ PAGE_SIZE /* 256 entries per 4KB page */
+
+static void vl15_watchdog_enq(struct ipath_devdata *dd)
+{
+ /* ipath_sdma_lock must already be held */
+ if (atomic_inc_return(&dd->ipath_sdma_vl15_count) == 1) {
+ unsigned long interval = (HZ + 19) / 20;
+ dd->ipath_sdma_vl15_timer.expires = jiffies + interval;
+ add_timer(&dd->ipath_sdma_vl15_timer);
+ }
+}
+
+static void vl15_watchdog_deq(struct ipath_devdata *dd)
+{
+ /* ipath_sdma_lock must already be held */
+ if (atomic_dec_return(&dd->ipath_sdma_vl15_count) != 0) {
+ unsigned long interval = (HZ + 19) / 20;
+ mod_timer(&dd->ipath_sdma_vl15_timer, jiffies + interval);
+ } else {
+ del_timer(&dd->ipath_sdma_vl15_timer);
+ }
+}
+
+static void vl15_watchdog_timeout(unsigned long opaque)
+{
+ struct ipath_devdata *dd = (struct ipath_devdata *)opaque;
+
+ if (atomic_read(&dd->ipath_sdma_vl15_count) != 0) {
+ ipath_dbg("vl15 watchdog timeout - clearing\n");
+ ipath_cancel_sends(dd, 1);
+ ipath_hol_down(dd);
+ } else {
+ ipath_dbg("vl15 watchdog timeout - "
+ "condition already cleared\n");
+ }
+}
+
+static void unmap_desc(struct ipath_devdata *dd, unsigned head)
+{
+ __le64 *descqp = &dd->ipath_sdma_descq[head].qw[0];
+ u64 desc[2];
+ dma_addr_t addr;
+ size_t len;
+
+ desc[0] = le64_to_cpu(descqp[0]);
+ desc[1] = le64_to_cpu(descqp[1]);
+
+ addr = (desc[1] << 32) | (desc[0] >> 32);
+ len = (desc[0] >> 14) & (0x7ffULL << 2);
+ dma_unmap_single(&dd->pcidev->dev, addr, len, DMA_TO_DEVICE);
+}
+
+/*
+ * ipath_sdma_lock should be locked before calling this.
+ */
+int ipath_sdma_make_progress(struct ipath_devdata *dd)
+{
+ struct list_head *lp = NULL;
+ struct ipath_sdma_txreq *txp = NULL;
+ u16 dmahead;
+ u16 start_idx = 0;
+ int progress = 0;
+
+ if (!list_empty(&dd->ipath_sdma_activelist)) {
+ lp = dd->ipath_sdma_activelist.next;
+ txp = list_entry(lp, struct ipath_sdma_txreq, list);
+ start_idx = txp->start_idx;
+ }
+
+ /*
+ * Read the SDMA head register in order to know that the
+ * interrupt clear has been written to the chip.
+ * Otherwise, we may not get an interrupt for the last
+ * descriptor in the queue.
+ */
+ dmahead = (u16)ipath_read_kreg32(dd, dd->ipath_kregs->kr_senddmahead);
+ /* sanity check return value for error handling (chip reset, etc.) */
+ if (dmahead >= dd->ipath_sdma_descq_cnt)
+ goto done;
+
+ while (dd->ipath_sdma_descq_head != dmahead) {
+ if (txp && txp->flags & IPATH_SDMA_TXREQ_F_FREEDESC &&
+ dd->ipath_sdma_descq_head == start_idx) {
+ unmap_desc(dd, dd->ipath_sdma_descq_head);
+ start_idx++;
+ if (start_idx == dd->ipath_sdma_descq_cnt)
+ start_idx = 0;
+ }
+
+ /* increment free count and head */
+ dd->ipath_sdma_descq_removed++;
+ if (++dd->ipath_sdma_descq_head == dd->ipath_sdma_descq_cnt)
+ dd->ipath_sdma_descq_head = 0;
+
+ if (txp && txp->next_descq_idx == dd->ipath_sdma_descq_head) {
+ /* move to notify list */
+ if (txp->flags & IPATH_SDMA_TXREQ_F_VL15)
+ vl15_watchdog_deq(dd);
+ list_move_tail(lp, &dd->ipath_sdma_notifylist);
+ if (!list_empty(&dd->ipath_sdma_activelist)) {
+ lp = dd->ipath_sdma_activelist.next;
+ txp = list_entry(lp, struct ipath_sdma_txreq,
+ list);
+ start_idx = txp->start_idx;
+ } else {
+ lp = NULL;
+ txp = NULL;
+ }
+ }
+ progress = 1;
+ }
+
+ if (progress)
+ tasklet_hi_schedule(&dd->ipath_sdma_notify_task);
+
+done:
+ return progress;
+}
+
+static void ipath_sdma_notify(struct ipath_devdata *dd, struct list_head *list)
+{
+ struct ipath_sdma_txreq *txp, *txp_next;
+
+ list_for_each_entry_safe(txp, txp_next, list, list) {
+ list_del_init(&txp->list);
+
+ if (txp->callback)
+ (*txp->callback)(txp->callback_cookie,
+ txp->callback_status);
+ }
+}
+
+static void sdma_notify_taskbody(struct ipath_devdata *dd)
+{
+ unsigned long flags;
+ struct list_head list;
+
+ INIT_LIST_HEAD(&list);
+
+ spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
+
+ list_splice_init(&dd->ipath_sdma_notifylist, &list);
+
+ spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
+
+ ipath_sdma_notify(dd, &list);
+
+ /*
+ * The IB verbs layer needs to see the callback before getting
+ * the call to ipath_ib_piobufavail() because the callback
+ * handles releasing resources the next send will need.
+ * Otherwise, we could do these calls in
+ * ipath_sdma_make_progress().
+ */
+ ipath_ib_piobufavail(dd->verbs_dev);
+}
+
+static void sdma_notify_task(unsigned long opaque)
+{
+ struct ipath_devdata *dd = (struct ipath_devdata *)opaque;
+
+ if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
+ sdma_notify_taskbody(dd);
+}
+
+static void dump_sdma_state(struct ipath_devdata *dd)
+{
+ unsigned long reg;
+
+ reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmastatus);
+ ipath_cdbg(VERBOSE, "kr_senddmastatus: 0x%016lx\n", reg);
+
+ reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendctrl);
+ ipath_cdbg(VERBOSE, "kr_sendctrl: 0x%016lx\n", reg);
+
+ reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmabufmask0);
+ ipath_cdbg(VERBOSE, "kr_senddmabufmask0: 0x%016lx\n", reg);
+
+ reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmabufmask1);
+ ipath_cdbg(VERBOSE, "kr_senddmabufmask1: 0x%016lx\n", reg);
+
+ reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmabufmask2);
+ ipath_cdbg(VERBOSE, "kr_senddmabufmask2: 0x%016lx\n", reg);
+
+ reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmatail);
+ ipath_cdbg(VERBOSE, "kr_senddmatail: 0x%016lx\n", reg);
+
+ reg = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmahead);
+ ipath_cdbg(VERBOSE, "kr_senddmahead: 0x%016lx\n", reg);
+}
+
+static void sdma_abort_task(unsigned long opaque)
+{
+ struct ipath_devdata *dd = (struct ipath_devdata *) opaque;
+ u64 status;
+ unsigned long flags;
+
+ if (test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
+ return;
+
+ spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
+
+ status = dd->ipath_sdma_status & IPATH_SDMA_ABORT_MASK;
+
+ /* nothing to do */
+ if (status == IPATH_SDMA_ABORT_NONE)
+ goto unlock;
+
+ /* ipath_sdma_abort() is done, waiting for interrupt */
+ if (status == IPATH_SDMA_ABORT_DISARMED) {
+ if (jiffies < dd->ipath_sdma_abort_intr_timeout)
+ goto resched_noprint;
+ /* give up, intr got lost somewhere */
+ ipath_dbg("give up waiting for SDMADISABLED intr\n");
+ __set_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status);
+ status = IPATH_SDMA_ABORT_ABORTED;
+ }
+
+ /* everything is stopped, time to clean up and restart */
+ if (status == IPATH_SDMA_ABORT_ABORTED) {
+ struct ipath_sdma_txreq *txp, *txpnext;
+ u64 hwstatus;
+ int notify = 0;
+
+ hwstatus = ipath_read_kreg64(dd,
+ dd->ipath_kregs->kr_senddmastatus);
+
+ if ((hwstatus & (IPATH_SDMA_STATUS_SCORE_BOARD_DRAIN_IN_PROG |
+ IPATH_SDMA_STATUS_ABORT_IN_PROG |
+ IPATH_SDMA_STATUS_INTERNAL_SDMA_ENABLE)) ||
+ !(hwstatus & IPATH_SDMA_STATUS_SCB_EMPTY)) {
+ if (dd->ipath_sdma_reset_wait > 0) {
+ /* not done shutting down sdma */
+ --dd->ipath_sdma_reset_wait;
+ goto resched;
+ }
+ ipath_cdbg(VERBOSE, "gave up waiting for quiescent "
+ "status after SDMA reset, continuing\n");
+ dump_sdma_state(dd);
+ }
+
+ /* dequeue all "sent" requests */
+ list_for_each_entry_safe(txp, txpnext,
+ &dd->ipath_sdma_activelist, list) {
+ txp->callback_status = IPATH_SDMA_TXREQ_S_ABORTED;
+ if (txp->flags & IPATH_SDMA_TXREQ_F_VL15)
+ vl15_watchdog_deq(dd);
+ list_move_tail(&txp->list, &dd->ipath_sdma_notifylist);
+ notify = 1;
+ }
+ if (notify)
+ tasklet_hi_schedule(&dd->ipath_sdma_notify_task);
+
+ /* reset our notion of head and tail */
+ dd->ipath_sdma_descq_tail = 0;
+ dd->ipath_sdma_descq_head = 0;
+ dd->ipath_sdma_head_dma[0] = 0;
+ dd->ipath_sdma_generation = 0;
+ dd->ipath_sdma_descq_removed = dd->ipath_sdma_descq_added;
+
+ /* Reset SendDmaLenGen */
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmalengen,
+ (u64) dd->ipath_sdma_descq_cnt | (1ULL << 18));
+
+ /* done with sdma state for a bit */
+ spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
+
+ /*
+ * Don't restart sdma here (with the exception
+ * below). Wait until link is up to ACTIVE. VL15 MADs
+ * used to bring the link up use PIO, and multiple link
+ * transitions otherwise cause the sdma engine to be
+ * stopped and started multiple times.
+ * The disable is done here, including the shadow,
+ * so the state is kept consistent.
+ * See ipath_restart_sdma() for the actual starting
+ * of sdma.
+ */
+ spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
+ dd->ipath_sendctrl &= ~INFINIPATH_S_SDMAENABLE;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
+ dd->ipath_sendctrl);
+ ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+ spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
+
+ /* make sure I see next message */
+ dd->ipath_sdma_abort_jiffies = 0;
+
+ /*
+ * Not everything that takes SDMA offline is a link
+ * status change. If the link was up, restart SDMA.
+ */
+ if (dd->ipath_flags & IPATH_LINKACTIVE)
+ ipath_restart_sdma(dd);
+
+ goto done;
+ }
+
+resched:
+ /*
+ * for now, keep spinning
+ * JAG - this is bad to just have default be a loop without
+ * state change
+ */
+ if (jiffies > dd->ipath_sdma_abort_jiffies) {
+ ipath_dbg("looping with status 0x%08lx\n",
+ dd->ipath_sdma_status);
+ dd->ipath_sdma_abort_jiffies = jiffies + 5 * HZ;
+ }
+resched_noprint:
+ spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
+ if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
+ tasklet_hi_schedule(&dd->ipath_sdma_abort_task);
+ return;
+
+unlock:
+ spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
+done:
+ return;
+}
+
+/*
+ * This is called from interrupt context.
+ */
+void ipath_sdma_intr(struct ipath_devdata *dd)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
+
+ (void) ipath_sdma_make_progress(dd);
+
+ spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
+}
+
+static int alloc_sdma(struct ipath_devdata *dd)
+{
+ int ret = 0;
+
+ /* Allocate memory for SendDMA descriptor FIFO */
+ dd->ipath_sdma_descq = dma_alloc_coherent(&dd->pcidev->dev,
+ SDMA_DESCQ_SZ, &dd->ipath_sdma_descq_phys, GFP_KERNEL);
+
+ if (!dd->ipath_sdma_descq) {
+ ipath_dev_err(dd, "failed to allocate SendDMA descriptor "
+ "FIFO memory\n");
+ ret = -ENOMEM;
+ goto done;
+ }
+
+ dd->ipath_sdma_descq_cnt =
+ SDMA_DESCQ_SZ / sizeof(struct ipath_sdma_desc);
+
+ /* Allocate memory for DMA of head register to memory */
+ dd->ipath_sdma_head_dma = dma_alloc_coherent(&dd->pcidev->dev,
+ PAGE_SIZE, &dd->ipath_sdma_head_phys, GFP_KERNEL);
+ if (!dd->ipath_sdma_head_dma) {
+ ipath_dev_err(dd, "failed to allocate SendDMA head memory\n");
+ ret = -ENOMEM;
+ goto cleanup_descq;
+ }
+ dd->ipath_sdma_head_dma[0] = 0;
+
+ init_timer(&dd->ipath_sdma_vl15_timer);
+ dd->ipath_sdma_vl15_timer.function = vl15_watchdog_timeout;
+ dd->ipath_sdma_vl15_timer.data = (unsigned long)dd;
+ atomic_set(&dd->ipath_sdma_vl15_count, 0);
+
+ goto done;
+
+cleanup_descq:
+ dma_free_coherent(&dd->pcidev->dev, SDMA_DESCQ_SZ,
+ (void *)dd->ipath_sdma_descq, dd->ipath_sdma_descq_phys);
+ dd->ipath_sdma_descq = NULL;
+ dd->ipath_sdma_descq_phys = 0;
+done:
+ return ret;
+}
+
+int setup_sdma(struct ipath_devdata *dd)
+{
+ int ret = 0;
+ unsigned i, n;
+ u64 tmp64;
+ u64 senddmabufmask[3] = { 0 };
+ unsigned long flags;
+
+ ret = alloc_sdma(dd);
+ if (ret)
+ goto done;
+
+ if (!dd->ipath_sdma_descq) {
+ ipath_dev_err(dd, "SendDMA memory not allocated\n");
+ goto done;
+ }
+
+ /*
+ * Set initial status as if we had been up, then gone down.
+ * This lets initial start on transition to ACTIVE be the
+ * same as restart after link flap.
+ */
+ dd->ipath_sdma_status = IPATH_SDMA_ABORT_ABORTED;
+ dd->ipath_sdma_abort_jiffies = 0;
+ dd->ipath_sdma_generation = 0;
+ dd->ipath_sdma_descq_tail = 0;
+ dd->ipath_sdma_descq_head = 0;
+ dd->ipath_sdma_descq_removed = 0;
+ dd->ipath_sdma_descq_added = 0;
+
+ /* Set SendDmaBase */
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabase,
+ dd->ipath_sdma_descq_phys);
+ /* Set SendDmaLenGen */
+ tmp64 = dd->ipath_sdma_descq_cnt;
+ tmp64 |= 1<<18; /* enable generation checking */
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmalengen, tmp64);
+ /* Set SendDmaTail */
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmatail,
+ dd->ipath_sdma_descq_tail);
+ /* Set SendDmaHeadAddr */
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmaheadaddr,
+ dd->ipath_sdma_head_phys);
+
+ /*
+ * Reserve all the former "kernel" piobufs, using high number range
+ * so we get as many 4K buffers as possible
+ */
+ n = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
+ i = dd->ipath_lastport_piobuf + dd->ipath_pioreserved;
+ ipath_chg_pioavailkernel(dd, i, n - i , 0);
+ for (; i < n; ++i) {
+ unsigned word = i / 64;
+ unsigned bit = i & 63;
+ BUG_ON(word >= 3);
+ senddmabufmask[word] |= 1ULL << bit;
+ }
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask0,
+ senddmabufmask[0]);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask1,
+ senddmabufmask[1]);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask2,
+ senddmabufmask[2]);
+
+ INIT_LIST_HEAD(&dd->ipath_sdma_activelist);
+ INIT_LIST_HEAD(&dd->ipath_sdma_notifylist);
+
+ tasklet_init(&dd->ipath_sdma_notify_task, sdma_notify_task,
+ (unsigned long) dd);
+ tasklet_init(&dd->ipath_sdma_abort_task, sdma_abort_task,
+ (unsigned long) dd);
+
+ /*
+ * No use to turn on SDMA here, as link is probably not ACTIVE
+ * Just mark it RUNNING and enable the interrupt, and let the
+ * ipath_restart_sdma() on link transition to ACTIVE actually
+ * enable it.
+ */
+ spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
+ dd->ipath_sendctrl |= INFINIPATH_S_SDMAINTENABLE;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
+ ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+ __set_bit(IPATH_SDMA_RUNNING, &dd->ipath_sdma_status);
+ spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
+
+done:
+ return ret;
+}
+
+void teardown_sdma(struct ipath_devdata *dd)
+{
+ struct ipath_sdma_txreq *txp, *txpnext;
+ unsigned long flags;
+ dma_addr_t sdma_head_phys = 0;
+ dma_addr_t sdma_descq_phys = 0;
+ void *sdma_descq = NULL;
+ void *sdma_head_dma = NULL;
+
+ spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
+ __clear_bit(IPATH_SDMA_RUNNING, &dd->ipath_sdma_status);
+ __set_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status);
+ __set_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status);
+ spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
+
+ tasklet_kill(&dd->ipath_sdma_abort_task);
+ tasklet_kill(&dd->ipath_sdma_notify_task);
+
+ /* turn off sdma */
+ spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
+ dd->ipath_sendctrl &= ~INFINIPATH_S_SDMAENABLE;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
+ dd->ipath_sendctrl);
+ ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+ spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
+
+ spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
+ /* dequeue all "sent" requests */
+ list_for_each_entry_safe(txp, txpnext, &dd->ipath_sdma_activelist,
+ list) {
+ txp->callback_status = IPATH_SDMA_TXREQ_S_SHUTDOWN;
+ if (txp->flags & IPATH_SDMA_TXREQ_F_VL15)
+ vl15_watchdog_deq(dd);
+ list_move_tail(&txp->list, &dd->ipath_sdma_notifylist);
+ }
+ spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
+
+ sdma_notify_taskbody(dd);
+
+ del_timer_sync(&dd->ipath_sdma_vl15_timer);
+
+ spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
+
+ dd->ipath_sdma_abort_jiffies = 0;
+
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabase, 0);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmalengen, 0);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmatail, 0);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmaheadaddr, 0);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask0, 0);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask1, 0);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask2, 0);
+
+ if (dd->ipath_sdma_head_dma) {
+ sdma_head_dma = (void *) dd->ipath_sdma_head_dma;
+ sdma_head_phys = dd->ipath_sdma_head_phys;
+ dd->ipath_sdma_head_dma = NULL;
+ dd->ipath_sdma_head_phys = 0;
+ }
+
+ if (dd->ipath_sdma_descq) {
+ sdma_descq = dd->ipath_sdma_descq;
+ sdma_descq_phys = dd->ipath_sdma_descq_phys;
+ dd->ipath_sdma_descq = NULL;
+ dd->ipath_sdma_descq_phys = 0;
+ }
+
+ spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
+
+ if (sdma_head_dma)
+ dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
+ sdma_head_dma, sdma_head_phys);
+
+ if (sdma_descq)
+ dma_free_coherent(&dd->pcidev->dev, SDMA_DESCQ_SZ,
+ sdma_descq, sdma_descq_phys);
+}
+
+/*
+ * [Re]start SDMA, if we use it, and it's not already OK.
+ * This is called on transition to link ACTIVE, either the first or
+ * subsequent times.
+ */
+void ipath_restart_sdma(struct ipath_devdata *dd)
+{
+ unsigned long flags;
+ int needed = 1;
+
+ if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA))
+ goto bail;
+
+ /*
+ * First, make sure we should, which is to say,
+ * check that we are "RUNNING" (not in teardown)
+ * and not "SHUTDOWN"
+ */
+ spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
+ if (!test_bit(IPATH_SDMA_RUNNING, &dd->ipath_sdma_status)
+ || test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
+ needed = 0;
+ else {
+ __clear_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status);
+ __clear_bit(IPATH_SDMA_DISARMED, &dd->ipath_sdma_status);
+ __clear_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status);
+ }
+ spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
+ if (!needed) {
+ ipath_dbg("invalid attempt to restart SDMA, status 0x%08lx\n",
+ dd->ipath_sdma_status);
+ goto bail;
+ }
+ spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
+ /*
+ * First clear, just to be safe. Enable is only done
+ * in chip on 0->1 transition
+ */
+ dd->ipath_sendctrl &= ~INFINIPATH_S_SDMAENABLE;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
+ ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+ dd->ipath_sendctrl |= INFINIPATH_S_SDMAENABLE;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
+ ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+ spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
+
+ /* notify upper layers */
+ ipath_ib_piobufavail(dd->verbs_dev);
+
+bail:
+ return;
+}
+
+static inline void make_sdma_desc(struct ipath_devdata *dd,
+ u64 *sdmadesc, u64 addr, u64 dwlen, u64 dwoffset)
+{
+ WARN_ON(addr & 3);
+ /* SDmaPhyAddr[47:32] */
+ sdmadesc[1] = addr >> 32;
+ /* SDmaPhyAddr[31:0] */
+ sdmadesc[0] = (addr & 0xfffffffcULL) << 32;
+ /* SDmaGeneration[1:0] */
+ sdmadesc[0] |= (dd->ipath_sdma_generation & 3ULL) << 30;
+ /* SDmaDwordCount[10:0] */
+ sdmadesc[0] |= (dwlen & 0x7ffULL) << 16;
+ /* SDmaBufOffset[12:2] */
+ sdmadesc[0] |= dwoffset & 0x7ffULL;
+}
+
+/*
+ * This function queues one IB packet onto the send DMA queue per call.
+ * The caller is responsible for checking:
+ * 1) The number of send DMA descriptor entries is less than the size of
+ * the descriptor queue.
+ * 2) The IB SGE addresses and lengths are 32-bit aligned
+ * (except possibly the last SGE's length)
+ * 3) The SGE addresses are suitable for passing to dma_map_single().
+ */
+int ipath_sdma_verbs_send(struct ipath_devdata *dd,
+ struct ipath_sge_state *ss, u32 dwords,
+ struct ipath_verbs_txreq *tx)
+{
+
+ unsigned long flags;
+ struct ipath_sge *sge;
+ int ret = 0;
+ u16 tail;
+ __le64 *descqp;
+ u64 sdmadesc[2];
+ u32 dwoffset;
+ dma_addr_t addr;
+
+ if ((tx->map_len + (dwords<<2)) > dd->ipath_ibmaxlen) {
+ ipath_dbg("packet size %X > ibmax %X, fail\n",
+ tx->map_len + (dwords<<2), dd->ipath_ibmaxlen);
+ ret = -EMSGSIZE;
+ goto fail;
+ }
+
+ spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
+
+retry:
+ if (unlikely(test_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status))) {
+ ret = -EBUSY;
+ goto unlock;
+ }
+
+ if (tx->txreq.sg_count > ipath_sdma_descq_freecnt(dd)) {
+ if (ipath_sdma_make_progress(dd))
+ goto retry;
+ ret = -ENOBUFS;
+ goto unlock;
+ }
+
+ addr = dma_map_single(&dd->pcidev->dev, tx->txreq.map_addr,
+ tx->map_len, DMA_TO_DEVICE);
+ if (dma_mapping_error(&dd->pcidev->dev, addr)) {
+ ret = -EIO;
+ goto unlock;
+ }
+
+ dwoffset = tx->map_len >> 2;
+ make_sdma_desc(dd, sdmadesc, (u64) addr, dwoffset, 0);
+
+ /* SDmaFirstDesc */
+ sdmadesc[0] |= 1ULL << 12;
+ if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_USELARGEBUF)
+ sdmadesc[0] |= 1ULL << 14; /* SDmaUseLargeBuf */
+
+ /* write to the descq */
+ tail = dd->ipath_sdma_descq_tail;
+ descqp = &dd->ipath_sdma_descq[tail].qw[0];
+ *descqp++ = cpu_to_le64(sdmadesc[0]);
+ *descqp++ = cpu_to_le64(sdmadesc[1]);
+
+ if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEDESC)
+ tx->txreq.start_idx = tail;
+
+ /* increment the tail */
+ if (++tail == dd->ipath_sdma_descq_cnt) {
+ tail = 0;
+ descqp = &dd->ipath_sdma_descq[0].qw[0];
+ ++dd->ipath_sdma_generation;
+ }
+
+ sge = &ss->sge;
+ while (dwords) {
+ u32 dw;
+ u32 len;
+
+ len = dwords << 2;
+ if (len > sge->length)
+ len = sge->length;
+ if (len > sge->sge_length)
+ len = sge->sge_length;
+ BUG_ON(len == 0);
+ dw = (len + 3) >> 2;
+ addr = dma_map_single(&dd->pcidev->dev, sge->vaddr, dw << 2,
+ DMA_TO_DEVICE);
+ make_sdma_desc(dd, sdmadesc, (u64) addr, dw, dwoffset);
+ /* SDmaUseLargeBuf has to be set in every descriptor */
+ if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_USELARGEBUF)
+ sdmadesc[0] |= 1ULL << 14;
+ /* write to the descq */
+ *descqp++ = cpu_to_le64(sdmadesc[0]);
+ *descqp++ = cpu_to_le64(sdmadesc[1]);
+
+ /* increment the tail */
+ if (++tail == dd->ipath_sdma_descq_cnt) {
+ tail = 0;
+ descqp = &dd->ipath_sdma_descq[0].qw[0];
+ ++dd->ipath_sdma_generation;
+ }
+ sge->vaddr += len;
+ sge->length -= len;
+ sge->sge_length -= len;
+ if (sge->sge_length == 0) {
+ if (--ss->num_sge)
+ *sge = *ss->sg_list++;
+ } else if (sge->length == 0 && sge->mr != NULL) {
+ if (++sge->n >= IPATH_SEGSZ) {
+ if (++sge->m >= sge->mr->mapsz)
+ break;
+ sge->n = 0;
+ }
+ sge->vaddr =
+ sge->mr->map[sge->m]->segs[sge->n].vaddr;
+ sge->length =
+ sge->mr->map[sge->m]->segs[sge->n].length;
+ }
+
+ dwoffset += dw;
+ dwords -= dw;
+ }
+
+ if (!tail)
+ descqp = &dd->ipath_sdma_descq[dd->ipath_sdma_descq_cnt].qw[0];
+ descqp -= 2;
+ /* SDmaLastDesc */
+ descqp[0] |= __constant_cpu_to_le64(1ULL << 11);
+ if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_INTREQ) {
+ /* SDmaIntReq */
+ descqp[0] |= __constant_cpu_to_le64(1ULL << 15);
+ }
+
+ /* Commit writes to memory and advance the tail on the chip */
+ wmb();
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmatail, tail);
+
+ tx->txreq.next_descq_idx = tail;
+ tx->txreq.callback_status = IPATH_SDMA_TXREQ_S_OK;
+ dd->ipath_sdma_descq_tail = tail;
+ dd->ipath_sdma_descq_added += tx->txreq.sg_count;
+ list_add_tail(&tx->txreq.list, &dd->ipath_sdma_activelist);
+ if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_VL15)
+ vl15_watchdog_enq(dd);
+
+unlock:
+ spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
+fail:
+ return ret;
+}
diff --git a/drivers/infiniband/hw/ipath/ipath_srq.c b/drivers/infiniband/hw/ipath/ipath_srq.c
new file mode 100644
index 0000000..e3d80ca
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_srq.c
@@ -0,0 +1,374 @@
+/*
+ * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/err.h>
+#include <linux/vmalloc.h>
+
+#include "ipath_verbs.h"
+
+/**
+ * ipath_post_srq_receive - post a receive on a shared receive queue
+ * @ibsrq: the SRQ to post the receive on
+ * @wr: the list of work requests to post
+ * @bad_wr: the first WR to cause a problem is put here
+ *
+ * This may be called from interrupt context.
+ */
+int ipath_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
+ struct ib_recv_wr **bad_wr)
+{
+ struct ipath_srq *srq = to_isrq(ibsrq);
+ struct ipath_rwq *wq;
+ unsigned long flags;
+ int ret;
+
+ for (; wr; wr = wr->next) {
+ struct ipath_rwqe *wqe;
+ u32 next;
+ int i;
+
+ if ((unsigned) wr->num_sge > srq->rq.max_sge) {
+ *bad_wr = wr;
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ spin_lock_irqsave(&srq->rq.lock, flags);
+ wq = srq->rq.wq;
+ next = wq->head + 1;
+ if (next >= srq->rq.size)
+ next = 0;
+ if (next == wq->tail) {
+ spin_unlock_irqrestore(&srq->rq.lock, flags);
+ *bad_wr = wr;
+ ret = -ENOMEM;
+ goto bail;
+ }
+
+ wqe = get_rwqe_ptr(&srq->rq, wq->head);
+ wqe->wr_id = wr->wr_id;
+ wqe->num_sge = wr->num_sge;
+ for (i = 0; i < wr->num_sge; i++)
+ wqe->sg_list[i] = wr->sg_list[i];
+ /* Make sure queue entry is written before the head index. */
+ smp_wmb();
+ wq->head = next;
+ spin_unlock_irqrestore(&srq->rq.lock, flags);
+ }
+ ret = 0;
+
+bail:
+ return ret;
+}
+
+/**
+ * ipath_create_srq - create a shared receive queue
+ * @ibpd: the protection domain of the SRQ to create
+ * @srq_init_attr: the attributes of the SRQ
+ * @udata: data from libipathverbs when creating a user SRQ
+ */
+struct ib_srq *ipath_create_srq(struct ib_pd *ibpd,
+ struct ib_srq_init_attr *srq_init_attr,
+ struct ib_udata *udata)
+{
+ struct ipath_ibdev *dev = to_idev(ibpd->device);
+ struct ipath_srq *srq;
+ u32 sz;
+ struct ib_srq *ret;
+
+ if (srq_init_attr->attr.max_wr == 0) {
+ ret = ERR_PTR(-EINVAL);
+ goto done;
+ }
+
+ if ((srq_init_attr->attr.max_sge > ib_ipath_max_srq_sges) ||
+ (srq_init_attr->attr.max_wr > ib_ipath_max_srq_wrs)) {
+ ret = ERR_PTR(-EINVAL);
+ goto done;
+ }
+
+ srq = kmalloc(sizeof(*srq), GFP_KERNEL);
+ if (!srq) {
+ ret = ERR_PTR(-ENOMEM);
+ goto done;
+ }
+
+ /*
+ * Need to use vmalloc() if we want to support large #s of entries.
+ */
+ srq->rq.size = srq_init_attr->attr.max_wr + 1;
+ srq->rq.max_sge = srq_init_attr->attr.max_sge;
+ sz = sizeof(struct ib_sge) * srq->rq.max_sge +
+ sizeof(struct ipath_rwqe);
+ srq->rq.wq = vmalloc_user(sizeof(struct ipath_rwq) + srq->rq.size * sz);
+ if (!srq->rq.wq) {
+ ret = ERR_PTR(-ENOMEM);
+ goto bail_srq;
+ }
+
+ /*
+ * Return the address of the RWQ as the offset to mmap.
+ * See ipath_mmap() for details.
+ */
+ if (udata && udata->outlen >= sizeof(__u64)) {
+ int err;
+ u32 s = sizeof(struct ipath_rwq) + srq->rq.size * sz;
+
+ srq->ip =
+ ipath_create_mmap_info(dev, s,
+ ibpd->uobject->context,
+ srq->rq.wq);
+ if (!srq->ip) {
+ ret = ERR_PTR(-ENOMEM);
+ goto bail_wq;
+ }
+
+ err = ib_copy_to_udata(udata, &srq->ip->offset,
+ sizeof(srq->ip->offset));
+ if (err) {
+ ret = ERR_PTR(err);
+ goto bail_ip;
+ }
+ } else
+ srq->ip = NULL;
+
+ /*
+ * ib_create_srq() will initialize srq->ibsrq.
+ */
+ spin_lock_init(&srq->rq.lock);
+ srq->rq.wq->head = 0;
+ srq->rq.wq->tail = 0;
+ srq->limit = srq_init_attr->attr.srq_limit;
+
+ spin_lock(&dev->n_srqs_lock);
+ if (dev->n_srqs_allocated == ib_ipath_max_srqs) {
+ spin_unlock(&dev->n_srqs_lock);
+ ret = ERR_PTR(-ENOMEM);
+ goto bail_ip;
+ }
+
+ dev->n_srqs_allocated++;
+ spin_unlock(&dev->n_srqs_lock);
+
+ if (srq->ip) {
+ spin_lock_irq(&dev->pending_lock);
+ list_add(&srq->ip->pending_mmaps, &dev->pending_mmaps);
+ spin_unlock_irq(&dev->pending_lock);
+ }
+
+ ret = &srq->ibsrq;
+ goto done;
+
+bail_ip:
+ kfree(srq->ip);
+bail_wq:
+ vfree(srq->rq.wq);
+bail_srq:
+ kfree(srq);
+done:
+ return ret;
+}
+
+/**
+ * ipath_modify_srq - modify a shared receive queue
+ * @ibsrq: the SRQ to modify
+ * @attr: the new attributes of the SRQ
+ * @attr_mask: indicates which attributes to modify
+ * @udata: user data for ipathverbs.so
+ */
+int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
+ enum ib_srq_attr_mask attr_mask,
+ struct ib_udata *udata)
+{
+ struct ipath_srq *srq = to_isrq(ibsrq);
+ struct ipath_rwq *wq;
+ int ret = 0;
+
+ if (attr_mask & IB_SRQ_MAX_WR) {
+ struct ipath_rwq *owq;
+ struct ipath_rwqe *p;
+ u32 sz, size, n, head, tail;
+
+ /* Check that the requested sizes are below the limits. */
+ if ((attr->max_wr > ib_ipath_max_srq_wrs) ||
+ ((attr_mask & IB_SRQ_LIMIT) ?
+ attr->srq_limit : srq->limit) > attr->max_wr) {
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ sz = sizeof(struct ipath_rwqe) +
+ srq->rq.max_sge * sizeof(struct ib_sge);
+ size = attr->max_wr + 1;
+ wq = vmalloc_user(sizeof(struct ipath_rwq) + size * sz);
+ if (!wq) {
+ ret = -ENOMEM;
+ goto bail;
+ }
+
+ /* Check that we can write the offset to mmap. */
+ if (udata && udata->inlen >= sizeof(__u64)) {
+ __u64 offset_addr;
+ __u64 offset = 0;
+
+ ret = ib_copy_from_udata(&offset_addr, udata,
+ sizeof(offset_addr));
+ if (ret)
+ goto bail_free;
+ udata->outbuf =
+ (void __user *) (unsigned long) offset_addr;
+ ret = ib_copy_to_udata(udata, &offset,
+ sizeof(offset));
+ if (ret)
+ goto bail_free;
+ }
+
+ spin_lock_irq(&srq->rq.lock);
+ /*
+ * validate head pointer value and compute
+ * the number of remaining WQEs.
+ */
+ owq = srq->rq.wq;
+ head = owq->head;
+ if (head >= srq->rq.size)
+ head = 0;
+ tail = owq->tail;
+ if (tail >= srq->rq.size)
+ tail = 0;
+ n = head;
+ if (n < tail)
+ n += srq->rq.size - tail;
+ else
+ n -= tail;
+ if (size <= n) {
+ ret = -EINVAL;
+ goto bail_unlock;
+ }
+ n = 0;
+ p = wq->wq;
+ while (tail != head) {
+ struct ipath_rwqe *wqe;
+ int i;
+
+ wqe = get_rwqe_ptr(&srq->rq, tail);
+ p->wr_id = wqe->wr_id;
+ p->num_sge = wqe->num_sge;
+ for (i = 0; i < wqe->num_sge; i++)
+ p->sg_list[i] = wqe->sg_list[i];
+ n++;
+ p = (struct ipath_rwqe *)((char *) p + sz);
+ if (++tail >= srq->rq.size)
+ tail = 0;
+ }
+ srq->rq.wq = wq;
+ srq->rq.size = size;
+ wq->head = n;
+ wq->tail = 0;
+ if (attr_mask & IB_SRQ_LIMIT)
+ srq->limit = attr->srq_limit;
+ spin_unlock_irq(&srq->rq.lock);
+
+ vfree(owq);
+
+ if (srq->ip) {
+ struct ipath_mmap_info *ip = srq->ip;
+ struct ipath_ibdev *dev = to_idev(srq->ibsrq.device);
+ u32 s = sizeof(struct ipath_rwq) + size * sz;
+
+ ipath_update_mmap_info(dev, ip, s, wq);
+
+ /*
+ * Return the offset to mmap.
+ * See ipath_mmap() for details.
+ */
+ if (udata && udata->inlen >= sizeof(__u64)) {
+ ret = ib_copy_to_udata(udata, &ip->offset,
+ sizeof(ip->offset));
+ if (ret)
+ goto bail;
+ }
+
+ spin_lock_irq(&dev->pending_lock);
+ if (list_empty(&ip->pending_mmaps))
+ list_add(&ip->pending_mmaps,
+ &dev->pending_mmaps);
+ spin_unlock_irq(&dev->pending_lock);
+ }
+ } else if (attr_mask & IB_SRQ_LIMIT) {
+ spin_lock_irq(&srq->rq.lock);
+ if (attr->srq_limit >= srq->rq.size)
+ ret = -EINVAL;
+ else
+ srq->limit = attr->srq_limit;
+ spin_unlock_irq(&srq->rq.lock);
+ }
+ goto bail;
+
+bail_unlock:
+ spin_unlock_irq(&srq->rq.lock);
+bail_free:
+ vfree(wq);
+bail:
+ return ret;
+}
+
+int ipath_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
+{
+ struct ipath_srq *srq = to_isrq(ibsrq);
+
+ attr->max_wr = srq->rq.size - 1;
+ attr->max_sge = srq->rq.max_sge;
+ attr->srq_limit = srq->limit;
+ return 0;
+}
+
+/**
+ * ipath_destroy_srq - destroy a shared receive queue
+ * @ibsrq: the SRQ to destroy
+ */
+int ipath_destroy_srq(struct ib_srq *ibsrq)
+{
+ struct ipath_srq *srq = to_isrq(ibsrq);
+ struct ipath_ibdev *dev = to_idev(ibsrq->device);
+
+ spin_lock(&dev->n_srqs_lock);
+ dev->n_srqs_allocated--;
+ spin_unlock(&dev->n_srqs_lock);
+ if (srq->ip)
+ kref_put(&srq->ip->ref, ipath_release_mmap_info);
+ else
+ vfree(srq->rq.wq);
+ kfree(srq);
+
+ return 0;
+}
diff --git a/drivers/infiniband/hw/ipath/ipath_stats.c b/drivers/infiniband/hw/ipath/ipath_stats.c
new file mode 100644
index 0000000..c8e3d65
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_stats.c
@@ -0,0 +1,339 @@
+/*
+ * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "ipath_kernel.h"
+
+struct infinipath_stats ipath_stats;
+
+/**
+ * ipath_snap_cntr - snapshot a chip counter
+ * @dd: the infinipath device
+ * @creg: the counter to snapshot
+ *
+ * called from add_timer and user counter read calls, to deal with
+ * counters that wrap in "human time". The words sent and received, and
+ * the packets sent and received are all that we worry about. For now,
+ * at least, we don't worry about error counters, because if they wrap
+ * that quickly, we probably don't care. We may eventually just make this
+ * handle all the counters. word counters can wrap in about 20 seconds
+ * of full bandwidth traffic, packet counters in a few hours.
+ */
+
+u64 ipath_snap_cntr(struct ipath_devdata *dd, ipath_creg creg)
+{
+ u32 val, reg64 = 0;
+ u64 val64;
+ unsigned long t0, t1;
+ u64 ret;
+
+ t0 = jiffies;
+ /* If fast increment counters are only 32 bits, snapshot them,
+ * and maintain them as 64bit values in the driver */
+ if (!(dd->ipath_flags & IPATH_32BITCOUNTERS) &&
+ (creg == dd->ipath_cregs->cr_wordsendcnt ||
+ creg == dd->ipath_cregs->cr_wordrcvcnt ||
+ creg == dd->ipath_cregs->cr_pktsendcnt ||
+ creg == dd->ipath_cregs->cr_pktrcvcnt)) {
+ val64 = ipath_read_creg(dd, creg);
+ val = val64 == ~0ULL ? ~0U : 0;
+ reg64 = 1;
+ } else /* val64 just to keep gcc quiet... */
+ val64 = val = ipath_read_creg32(dd, creg);
+ /*
+ * See if a second has passed. This is just a way to detect things
+ * that are quite broken. Normally this should take just a few
+ * cycles (the check is for long enough that we don't care if we get
+ * pre-empted.) An Opteron HT O read timeout is 4 seconds with
+ * normal NB values
+ */
+ t1 = jiffies;
+ if (time_before(t0 + HZ, t1) && val == -1) {
+ ipath_dev_err(dd, "Error! Read counter 0x%x timed out\n",
+ creg);
+ ret = 0ULL;
+ goto bail;
+ }
+ if (reg64) {
+ ret = val64;
+ goto bail;
+ }
+
+ if (creg == dd->ipath_cregs->cr_wordsendcnt) {
+ if (val != dd->ipath_lastsword) {
+ dd->ipath_sword += val - dd->ipath_lastsword;
+ dd->ipath_lastsword = val;
+ }
+ val64 = dd->ipath_sword;
+ } else if (creg == dd->ipath_cregs->cr_wordrcvcnt) {
+ if (val != dd->ipath_lastrword) {
+ dd->ipath_rword += val - dd->ipath_lastrword;
+ dd->ipath_lastrword = val;
+ }
+ val64 = dd->ipath_rword;
+ } else if (creg == dd->ipath_cregs->cr_pktsendcnt) {
+ if (val != dd->ipath_lastspkts) {
+ dd->ipath_spkts += val - dd->ipath_lastspkts;
+ dd->ipath_lastspkts = val;
+ }
+ val64 = dd->ipath_spkts;
+ } else if (creg == dd->ipath_cregs->cr_pktrcvcnt) {
+ if (val != dd->ipath_lastrpkts) {
+ dd->ipath_rpkts += val - dd->ipath_lastrpkts;
+ dd->ipath_lastrpkts = val;
+ }
+ val64 = dd->ipath_rpkts;
+ } else
+ val64 = (u64) val;
+
+ ret = val64;
+
+bail:
+ return ret;
+}
+
+/**
+ * ipath_qcheck - print delta of egrfull/hdrqfull errors for kernel ports
+ * @dd: the infinipath device
+ *
+ * print the delta of egrfull/hdrqfull errors for kernel ports no more than
+ * every 5 seconds. User processes are printed at close, but kernel doesn't
+ * close, so... Separate routine so may call from other places someday, and
+ * so function name when printed by _IPATH_INFO is meaningfull
+ */
+static void ipath_qcheck(struct ipath_devdata *dd)
+{
+ static u64 last_tot_hdrqfull;
+ struct ipath_portdata *pd = dd->ipath_pd[0];
+ size_t blen = 0;
+ char buf[128];
+ u32 hdrqtail;
+
+ *buf = 0;
+ if (pd->port_hdrqfull != dd->ipath_p0_hdrqfull) {
+ blen = snprintf(buf, sizeof buf, "port 0 hdrqfull %u",
+ pd->port_hdrqfull -
+ dd->ipath_p0_hdrqfull);
+ dd->ipath_p0_hdrqfull = pd->port_hdrqfull;
+ }
+ if (ipath_stats.sps_etidfull != dd->ipath_last_tidfull) {
+ blen += snprintf(buf + blen, sizeof buf - blen,
+ "%srcvegrfull %llu",
+ blen ? ", " : "",
+ (unsigned long long)
+ (ipath_stats.sps_etidfull -
+ dd->ipath_last_tidfull));
+ dd->ipath_last_tidfull = ipath_stats.sps_etidfull;
+ }
+
+ /*
+ * this is actually the number of hdrq full interrupts, not actual
+ * events, but at the moment that's mostly what I'm interested in.
+ * Actual count, etc. is in the counters, if needed. For production
+ * users this won't ordinarily be printed.
+ */
+
+ if ((ipath_debug & (__IPATH_PKTDBG | __IPATH_DBG)) &&
+ ipath_stats.sps_hdrqfull != last_tot_hdrqfull) {
+ blen += snprintf(buf + blen, sizeof buf - blen,
+ "%shdrqfull %llu (all ports)",
+ blen ? ", " : "",
+ (unsigned long long)
+ (ipath_stats.sps_hdrqfull -
+ last_tot_hdrqfull));
+ last_tot_hdrqfull = ipath_stats.sps_hdrqfull;
+ }
+ if (blen)
+ ipath_dbg("%s\n", buf);
+
+ hdrqtail = ipath_get_hdrqtail(pd);
+ if (pd->port_head != hdrqtail) {
+ if (dd->ipath_lastport0rcv_cnt ==
+ ipath_stats.sps_port0pkts) {
+ ipath_cdbg(PKT, "missing rcv interrupts? "
+ "port0 hd=%x tl=%x; port0pkts %llx; write"
+ " hd (w/intr)\n",
+ pd->port_head, hdrqtail,
+ (unsigned long long)
+ ipath_stats.sps_port0pkts);
+ ipath_write_ureg(dd, ur_rcvhdrhead, hdrqtail |
+ dd->ipath_rhdrhead_intr_off, pd->port_port);
+ }
+ dd->ipath_lastport0rcv_cnt = ipath_stats.sps_port0pkts;
+ }
+}
+
+static void ipath_chk_errormask(struct ipath_devdata *dd)
+{
+ static u32 fixed;
+ u32 ctrl;
+ unsigned long errormask;
+ unsigned long hwerrs;
+
+ if (!dd->ipath_errormask || !(dd->ipath_flags & IPATH_INITTED))
+ return;
+
+ errormask = ipath_read_kreg64(dd, dd->ipath_kregs->kr_errormask);
+
+ if (errormask == dd->ipath_errormask)
+ return;
+ fixed++;
+
+ hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus);
+ ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control);
+
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
+ dd->ipath_errormask);
+
+ if ((hwerrs & dd->ipath_hwerrmask) ||
+ (ctrl & INFINIPATH_C_FREEZEMODE)) {
+ /* force re-interrupt of pending events, just in case */
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear, 0ULL);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, 0ULL);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, 0ULL);
+ dev_info(&dd->pcidev->dev,
+ "errormask fixed(%u) %lx -> %lx, ctrl %x hwerr %lx\n",
+ fixed, errormask, (unsigned long)dd->ipath_errormask,
+ ctrl, hwerrs);
+ } else
+ ipath_dbg("errormask fixed(%u) %lx -> %lx, no freeze\n",
+ fixed, errormask,
+ (unsigned long)dd->ipath_errormask);
+}
+
+
+/**
+ * ipath_get_faststats - get word counters from chip before they overflow
+ * @opaque - contains a pointer to the infinipath device ipath_devdata
+ *
+ * called from add_timer
+ */
+void ipath_get_faststats(unsigned long opaque)
+{
+ struct ipath_devdata *dd = (struct ipath_devdata *) opaque;
+ int i;
+ static unsigned cnt;
+ unsigned long flags;
+ u64 traffic_wds;
+
+ /*
+ * don't access the chip while running diags, or memory diags can
+ * fail
+ */
+ if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_INITTED) ||
+ ipath_diag_inuse)
+ /* but re-arm the timer, for diags case; won't hurt other */
+ goto done;
+
+ /*
+ * We now try to maintain a "active timer", based on traffic
+ * exceeding a threshold, so we need to check the word-counts
+ * even if they are 64-bit.
+ */
+ traffic_wds = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt) +
+ ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);
+ spin_lock_irqsave(&dd->ipath_eep_st_lock, flags);
+ traffic_wds -= dd->ipath_traffic_wds;
+ dd->ipath_traffic_wds += traffic_wds;
+ if (traffic_wds >= IPATH_TRAFFIC_ACTIVE_THRESHOLD)
+ atomic_add(5, &dd->ipath_active_time); /* S/B #define */
+ spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags);
+
+ if (dd->ipath_flags & IPATH_32BITCOUNTERS) {
+ ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt);
+ ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt);
+ }
+
+ ipath_qcheck(dd);
+
+ /*
+ * deal with repeat error suppression. Doesn't really matter if
+ * last error was almost a full interval ago, or just a few usecs
+ * ago; still won't get more than 2 per interval. We may want
+ * longer intervals for this eventually, could do with mod, counter
+ * or separate timer. Also see code in ipath_handle_errors() and
+ * ipath_handle_hwerrors().
+ */
+
+ if (dd->ipath_lasterror)
+ dd->ipath_lasterror = 0;
+ if (dd->ipath_lasthwerror)
+ dd->ipath_lasthwerror = 0;
+ if (dd->ipath_maskederrs
+ && time_after(jiffies, dd->ipath_unmasktime)) {
+ char ebuf[256];
+ int iserr;
+ iserr = ipath_decode_err(dd, ebuf, sizeof ebuf,
+ dd->ipath_maskederrs);
+ if (dd->ipath_maskederrs &
+ ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL |
+ INFINIPATH_E_PKTERRS))
+ ipath_dev_err(dd, "Re-enabling masked errors "
+ "(%s)\n", ebuf);
+ else {
+ /*
+ * rcvegrfull and rcvhdrqfull are "normal", for some
+ * types of processes (mostly benchmarks) that send
+ * huge numbers of messages, while not processing
+ * them. So only complain about these at debug
+ * level.
+ */
+ if (iserr)
+ ipath_dbg(
+ "Re-enabling queue full errors (%s)\n",
+ ebuf);
+ else
+ ipath_cdbg(ERRPKT, "Re-enabling packet"
+ " problem interrupt (%s)\n", ebuf);
+ }
+
+ /* re-enable masked errors */
+ dd->ipath_errormask |= dd->ipath_maskederrs;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
+ dd->ipath_errormask);
+ dd->ipath_maskederrs = 0;
+ }
+
+ /* limit qfull messages to ~one per minute per port */
+ if ((++cnt & 0x10)) {
+ for (i = (int) dd->ipath_cfgports; --i >= 0; ) {
+ struct ipath_portdata *pd = dd->ipath_pd[i];
+
+ if (pd && pd->port_lastrcvhdrqtail != -1)
+ pd->port_lastrcvhdrqtail = -1;
+ }
+ }
+
+ ipath_chk_errormask(dd);
+done:
+ mod_timer(&dd->ipath_stats_timer, jiffies + HZ * 5);
+}
diff --git a/drivers/infiniband/hw/ipath/ipath_sysfs.c b/drivers/infiniband/hw/ipath/ipath_sysfs.c
new file mode 100644
index 0000000..a6c8efb
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_sysfs.c
@@ -0,0 +1,1236 @@
+/*
+ * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/ctype.h>
+
+#include "ipath_kernel.h"
+#include "ipath_verbs.h"
+#include "ipath_common.h"
+
+/**
+ * ipath_parse_ushort - parse an unsigned short value in an arbitrary base
+ * @str: the string containing the number
+ * @valp: where to put the result
+ *
+ * returns the number of bytes consumed, or negative value on error
+ */
+int ipath_parse_ushort(const char *str, unsigned short *valp)
+{
+ unsigned long val;
+ char *end;
+ int ret;
+
+ if (!isdigit(str[0])) {
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ val = simple_strtoul(str, &end, 0);
+
+ if (val > 0xffff) {
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ *valp = val;
+
+ ret = end + 1 - str;
+ if (ret == 0)
+ ret = -EINVAL;
+
+bail:
+ return ret;
+}
+
+static ssize_t show_version(struct device_driver *dev, char *buf)
+{
+ /* The string printed here is already newline-terminated. */
+ return scnprintf(buf, PAGE_SIZE, "%s", ib_ipath_version);
+}
+
+static ssize_t show_num_units(struct device_driver *dev, char *buf)
+{
+ return scnprintf(buf, PAGE_SIZE, "%d\n",
+ ipath_count_units(NULL, NULL, NULL));
+}
+
+static ssize_t show_status(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct ipath_devdata *dd = dev_get_drvdata(dev);
+ ssize_t ret;
+
+ if (!dd->ipath_statusp) {
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ ret = scnprintf(buf, PAGE_SIZE, "0x%llx\n",
+ (unsigned long long) *(dd->ipath_statusp));
+
+bail:
+ return ret;
+}
+
+static const char *ipath_status_str[] = {
+ "Initted",
+ "Disabled",
+ "Admin_Disabled",
+ "", /* This used to be the old "OIB_SMA" status. */
+ "", /* This used to be the old "SMA" status. */
+ "Present",
+ "IB_link_up",
+ "IB_configured",
+ "NoIBcable",
+ "Fatal_Hardware_Error",
+ NULL,
+};
+
+static ssize_t show_status_str(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct ipath_devdata *dd = dev_get_drvdata(dev);
+ int i, any;
+ u64 s;
+ ssize_t ret;
+
+ if (!dd->ipath_statusp) {
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ s = *(dd->ipath_statusp);
+ *buf = '\0';
+ for (any = i = 0; s && ipath_status_str[i]; i++) {
+ if (s & 1) {
+ if (any && strlcat(buf, " ", PAGE_SIZE) >=
+ PAGE_SIZE)
+ /* overflow */
+ break;
+ if (strlcat(buf, ipath_status_str[i],
+ PAGE_SIZE) >= PAGE_SIZE)
+ break;
+ any = 1;
+ }
+ s >>= 1;
+ }
+ if (any)
+ strlcat(buf, "\n", PAGE_SIZE);
+
+ ret = strlen(buf);
+
+bail:
+ return ret;
+}
+
+static ssize_t show_boardversion(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct ipath_devdata *dd = dev_get_drvdata(dev);
+ /* The string printed here is already newline-terminated. */
+ return scnprintf(buf, PAGE_SIZE, "%s", dd->ipath_boardversion);
+}
+
+static ssize_t show_localbus_info(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct ipath_devdata *dd = dev_get_drvdata(dev);
+ /* The string printed here is already newline-terminated. */
+ return scnprintf(buf, PAGE_SIZE, "%s", dd->ipath_lbus_info);
+}
+
+static ssize_t show_lmc(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct ipath_devdata *dd = dev_get_drvdata(dev);
+
+ return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_lmc);
+}
+
+static ssize_t store_lmc(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct ipath_devdata *dd = dev_get_drvdata(dev);
+ u16 lmc = 0;
+ int ret;
+
+ ret = ipath_parse_ushort(buf, &lmc);
+ if (ret < 0)
+ goto invalid;
+
+ if (lmc > 7) {
+ ret = -EINVAL;
+ goto invalid;
+ }
+
+ ipath_set_lid(dd, dd->ipath_lid, lmc);
+
+ goto bail;
+invalid:
+ ipath_dev_err(dd, "attempt to set invalid LMC %u\n", lmc);
+bail:
+ return ret;
+}
+
+static ssize_t show_lid(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct ipath_devdata *dd = dev_get_drvdata(dev);
+
+ return scnprintf(buf, PAGE_SIZE, "0x%x\n", dd->ipath_lid);
+}
+
+static ssize_t store_lid(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct ipath_devdata *dd = dev_get_drvdata(dev);
+ u16 lid = 0;
+ int ret;
+
+ ret = ipath_parse_ushort(buf, &lid);
+ if (ret < 0)
+ goto invalid;
+
+ if (lid == 0 || lid >= IPATH_MULTICAST_LID_BASE) {
+ ret = -EINVAL;
+ goto invalid;
+ }
+
+ ipath_set_lid(dd, lid, dd->ipath_lmc);
+
+ goto bail;
+invalid:
+ ipath_dev_err(dd, "attempt to set invalid LID 0x%x\n", lid);
+bail:
+ return ret;
+}
+
+static ssize_t show_mlid(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct ipath_devdata *dd = dev_get_drvdata(dev);
+
+ return scnprintf(buf, PAGE_SIZE, "0x%x\n", dd->ipath_mlid);
+}
+
+static ssize_t store_mlid(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct ipath_devdata *dd = dev_get_drvdata(dev);
+ u16 mlid;
+ int ret;
+
+ ret = ipath_parse_ushort(buf, &mlid);
+ if (ret < 0 || mlid < IPATH_MULTICAST_LID_BASE)
+ goto invalid;
+
+ dd->ipath_mlid = mlid;
+
+ goto bail;
+invalid:
+ ipath_dev_err(dd, "attempt to set invalid MLID\n");
+bail:
+ return ret;
+}
+
+static ssize_t show_guid(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct ipath_devdata *dd = dev_get_drvdata(dev);
+ u8 *guid;
+
+ guid = (u8 *) & (dd->ipath_guid);
+
+ return scnprintf(buf, PAGE_SIZE,
+ "%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x\n",
+ guid[0], guid[1], guid[2], guid[3],
+ guid[4], guid[5], guid[6], guid[7]);
+}
+
+static ssize_t store_guid(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct ipath_devdata *dd = dev_get_drvdata(dev);
+ ssize_t ret;
+ unsigned short guid[8];
+ __be64 new_guid;
+ u8 *ng;
+ int i;
+
+ if (sscanf(buf, "%hx:%hx:%hx:%hx:%hx:%hx:%hx:%hx",
+ &guid[0], &guid[1], &guid[2], &guid[3],
+ &guid[4], &guid[5], &guid[6], &guid[7]) != 8)
+ goto invalid;
+
+ ng = (u8 *) &new_guid;
+
+ for (i = 0; i < 8; i++) {
+ if (guid[i] > 0xff)
+ goto invalid;
+ ng[i] = guid[i];
+ }
+
+ if (new_guid == 0)
+ goto invalid;
+
+ dd->ipath_guid = new_guid;
+ dd->ipath_nguid = 1;
+ if (dd->verbs_dev)
+ dd->verbs_dev->ibdev.node_guid = new_guid;
+
+ ret = strlen(buf);
+ goto bail;
+
+invalid:
+ ipath_dev_err(dd, "attempt to set invalid GUID\n");
+ ret = -EINVAL;
+
+bail:
+ return ret;
+}
+
+static ssize_t show_nguid(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct ipath_devdata *dd = dev_get_drvdata(dev);
+
+ return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_nguid);
+}
+
+static ssize_t show_nports(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct ipath_devdata *dd = dev_get_drvdata(dev);
+
+ /* Return the number of user ports available. */
+ return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_cfgports - 1);
+}
+
+static ssize_t show_serial(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct ipath_devdata *dd = dev_get_drvdata(dev);
+
+ buf[sizeof dd->ipath_serial] = '\0';
+ memcpy(buf, dd->ipath_serial, sizeof dd->ipath_serial);
+ strcat(buf, "\n");
+ return strlen(buf);
+}
+
+static ssize_t show_unit(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct ipath_devdata *dd = dev_get_drvdata(dev);
+
+ return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_unit);
+}
+
+static ssize_t show_jint_max_packets(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct ipath_devdata *dd = dev_get_drvdata(dev);
+
+ return scnprintf(buf, PAGE_SIZE, "%hu\n", dd->ipath_jint_max_packets);
+}
+
+static ssize_t store_jint_max_packets(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct ipath_devdata *dd = dev_get_drvdata(dev);
+ u16 v = 0;
+ int ret;
+
+ ret = ipath_parse_ushort(buf, &v);
+ if (ret < 0)
+ ipath_dev_err(dd, "invalid jint_max_packets.\n");
+ else
+ dd->ipath_f_config_jint(dd, dd->ipath_jint_idle_ticks, v);
+
+ return ret;
+}
+
+static ssize_t show_jint_idle_ticks(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct ipath_devdata *dd = dev_get_drvdata(dev);
+
+ return scnprintf(buf, PAGE_SIZE, "%hu\n", dd->ipath_jint_idle_ticks);
+}
+
+static ssize_t store_jint_idle_ticks(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct ipath_devdata *dd = dev_get_drvdata(dev);
+ u16 v = 0;
+ int ret;
+
+ ret = ipath_parse_ushort(buf, &v);
+ if (ret < 0)
+ ipath_dev_err(dd, "invalid jint_idle_ticks.\n");
+ else
+ dd->ipath_f_config_jint(dd, v, dd->ipath_jint_max_packets);
+
+ return ret;
+}
+
+#define DEVICE_COUNTER(name, attr) \
+ static ssize_t show_counter_##name(struct device *dev, \
+ struct device_attribute *attr, \
+ char *buf) \
+ { \
+ struct ipath_devdata *dd = dev_get_drvdata(dev); \
+ return scnprintf(\
+ buf, PAGE_SIZE, "%llu\n", (unsigned long long) \
+ ipath_snap_cntr( \
+ dd, offsetof(struct infinipath_counters, \
+ attr) / sizeof(u64))); \
+ } \
+ static DEVICE_ATTR(name, S_IRUGO, show_counter_##name, NULL);
+
+DEVICE_COUNTER(ib_link_downeds, IBLinkDownedCnt);
+DEVICE_COUNTER(ib_link_err_recoveries, IBLinkErrRecoveryCnt);
+DEVICE_COUNTER(ib_status_changes, IBStatusChangeCnt);
+DEVICE_COUNTER(ib_symbol_errs, IBSymbolErrCnt);
+DEVICE_COUNTER(lb_flow_stalls, LBFlowStallCnt);
+DEVICE_COUNTER(lb_ints, LBIntCnt);
+DEVICE_COUNTER(rx_bad_formats, RxBadFormatCnt);
+DEVICE_COUNTER(rx_buf_ovfls, RxBufOvflCnt);
+DEVICE_COUNTER(rx_data_pkts, RxDataPktCnt);
+DEVICE_COUNTER(rx_dropped_pkts, RxDroppedPktCnt);
+DEVICE_COUNTER(rx_dwords, RxDwordCnt);
+DEVICE_COUNTER(rx_ebps, RxEBPCnt);
+DEVICE_COUNTER(rx_flow_ctrl_errs, RxFlowCtrlErrCnt);
+DEVICE_COUNTER(rx_flow_pkts, RxFlowPktCnt);
+DEVICE_COUNTER(rx_icrc_errs, RxICRCErrCnt);
+DEVICE_COUNTER(rx_len_errs, RxLenErrCnt);
+DEVICE_COUNTER(rx_link_problems, RxLinkProblemCnt);
+DEVICE_COUNTER(rx_lpcrc_errs, RxLPCRCErrCnt);
+DEVICE_COUNTER(rx_max_min_len_errs, RxMaxMinLenErrCnt);
+DEVICE_COUNTER(rx_p0_hdr_egr_ovfls, RxP0HdrEgrOvflCnt);
+DEVICE_COUNTER(rx_p1_hdr_egr_ovfls, RxP1HdrEgrOvflCnt);
+DEVICE_COUNTER(rx_p2_hdr_egr_ovfls, RxP2HdrEgrOvflCnt);
+DEVICE_COUNTER(rx_p3_hdr_egr_ovfls, RxP3HdrEgrOvflCnt);
+DEVICE_COUNTER(rx_p4_hdr_egr_ovfls, RxP4HdrEgrOvflCnt);
+DEVICE_COUNTER(rx_p5_hdr_egr_ovfls, RxP5HdrEgrOvflCnt);
+DEVICE_COUNTER(rx_p6_hdr_egr_ovfls, RxP6HdrEgrOvflCnt);
+DEVICE_COUNTER(rx_p7_hdr_egr_ovfls, RxP7HdrEgrOvflCnt);
+DEVICE_COUNTER(rx_p8_hdr_egr_ovfls, RxP8HdrEgrOvflCnt);
+DEVICE_COUNTER(rx_pkey_mismatches, RxPKeyMismatchCnt);
+DEVICE_COUNTER(rx_tid_full_errs, RxTIDFullErrCnt);
+DEVICE_COUNTER(rx_tid_valid_errs, RxTIDValidErrCnt);
+DEVICE_COUNTER(rx_vcrc_errs, RxVCRCErrCnt);
+DEVICE_COUNTER(tx_data_pkts, TxDataPktCnt);
+DEVICE_COUNTER(tx_dropped_pkts, TxDroppedPktCnt);
+DEVICE_COUNTER(tx_dwords, TxDwordCnt);
+DEVICE_COUNTER(tx_flow_pkts, TxFlowPktCnt);
+DEVICE_COUNTER(tx_flow_stalls, TxFlowStallCnt);
+DEVICE_COUNTER(tx_len_errs, TxLenErrCnt);
+DEVICE_COUNTER(tx_max_min_len_errs, TxMaxMinLenErrCnt);
+DEVICE_COUNTER(tx_underruns, TxUnderrunCnt);
+DEVICE_COUNTER(tx_unsup_vl_errs, TxUnsupVLErrCnt);
+
+static struct attribute *dev_counter_attributes[] = {
+ &dev_attr_ib_link_downeds.attr,
+ &dev_attr_ib_link_err_recoveries.attr,
+ &dev_attr_ib_status_changes.attr,
+ &dev_attr_ib_symbol_errs.attr,
+ &dev_attr_lb_flow_stalls.attr,
+ &dev_attr_lb_ints.attr,
+ &dev_attr_rx_bad_formats.attr,
+ &dev_attr_rx_buf_ovfls.attr,
+ &dev_attr_rx_data_pkts.attr,
+ &dev_attr_rx_dropped_pkts.attr,
+ &dev_attr_rx_dwords.attr,
+ &dev_attr_rx_ebps.attr,
+ &dev_attr_rx_flow_ctrl_errs.attr,
+ &dev_attr_rx_flow_pkts.attr,
+ &dev_attr_rx_icrc_errs.attr,
+ &dev_attr_rx_len_errs.attr,
+ &dev_attr_rx_link_problems.attr,
+ &dev_attr_rx_lpcrc_errs.attr,
+ &dev_attr_rx_max_min_len_errs.attr,
+ &dev_attr_rx_p0_hdr_egr_ovfls.attr,
+ &dev_attr_rx_p1_hdr_egr_ovfls.attr,
+ &dev_attr_rx_p2_hdr_egr_ovfls.attr,
+ &dev_attr_rx_p3_hdr_egr_ovfls.attr,
+ &dev_attr_rx_p4_hdr_egr_ovfls.attr,
+ &dev_attr_rx_p5_hdr_egr_ovfls.attr,
+ &dev_attr_rx_p6_hdr_egr_ovfls.attr,
+ &dev_attr_rx_p7_hdr_egr_ovfls.attr,
+ &dev_attr_rx_p8_hdr_egr_ovfls.attr,
+ &dev_attr_rx_pkey_mismatches.attr,
+ &dev_attr_rx_tid_full_errs.attr,
+ &dev_attr_rx_tid_valid_errs.attr,
+ &dev_attr_rx_vcrc_errs.attr,
+ &dev_attr_tx_data_pkts.attr,
+ &dev_attr_tx_dropped_pkts.attr,
+ &dev_attr_tx_dwords.attr,
+ &dev_attr_tx_flow_pkts.attr,
+ &dev_attr_tx_flow_stalls.attr,
+ &dev_attr_tx_len_errs.attr,
+ &dev_attr_tx_max_min_len_errs.attr,
+ &dev_attr_tx_underruns.attr,
+ &dev_attr_tx_unsup_vl_errs.attr,
+ NULL
+};
+
+static struct attribute_group dev_counter_attr_group = {
+ .name = "counters",
+ .attrs = dev_counter_attributes
+};
+
+static ssize_t store_reset(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct ipath_devdata *dd = dev_get_drvdata(dev);
+ int ret;
+
+ if (count < 5 || memcmp(buf, "reset", 5)) {
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ if (dd->ipath_flags & IPATH_DISABLED) {
+ /*
+ * post-reset init would re-enable interrupts, etc.
+ * so don't allow reset on disabled devices. Not
+ * perfect error, but about the best choice.
+ */
+ dev_info(dev,"Unit %d is disabled, can't reset\n",
+ dd->ipath_unit);
+ ret = -EINVAL;
+ }
+ ret = ipath_reset_device(dd->ipath_unit);
+bail:
+ return ret<0 ? ret : count;
+}
+
+static ssize_t store_link_state(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct ipath_devdata *dd = dev_get_drvdata(dev);
+ int ret, r;
+ u16 state;
+
+ ret = ipath_parse_ushort(buf, &state);
+ if (ret < 0)
+ goto invalid;
+
+ r = ipath_set_linkstate(dd, state);
+ if (r < 0) {
+ ret = r;
+ goto bail;
+ }
+
+ goto bail;
+invalid:
+ ipath_dev_err(dd, "attempt to set invalid link state\n");
+bail:
+ return ret;
+}
+
+static ssize_t show_mtu(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct ipath_devdata *dd = dev_get_drvdata(dev);
+ return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_ibmtu);
+}
+
+static ssize_t store_mtu(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct ipath_devdata *dd = dev_get_drvdata(dev);
+ ssize_t ret;
+ u16 mtu = 0;
+ int r;
+
+ ret = ipath_parse_ushort(buf, &mtu);
+ if (ret < 0)
+ goto invalid;
+
+ r = ipath_set_mtu(dd, mtu);
+ if (r < 0)
+ ret = r;
+
+ goto bail;
+invalid:
+ ipath_dev_err(dd, "attempt to set invalid MTU\n");
+bail:
+ return ret;
+}
+
+static ssize_t show_enabled(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct ipath_devdata *dd = dev_get_drvdata(dev);
+ return scnprintf(buf, PAGE_SIZE, "%u\n",
+ (dd->ipath_flags & IPATH_DISABLED) ? 0 : 1);
+}
+
+static ssize_t store_enabled(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct ipath_devdata *dd = dev_get_drvdata(dev);
+ ssize_t ret;
+ u16 enable = 0;
+
+ ret = ipath_parse_ushort(buf, &enable);
+ if (ret < 0) {
+ ipath_dev_err(dd, "attempt to use non-numeric on enable\n");
+ goto bail;
+ }
+
+ if (enable) {
+ if (!(dd->ipath_flags & IPATH_DISABLED))
+ goto bail;
+
+ dev_info(dev, "Enabling unit %d\n", dd->ipath_unit);
+ /* same as post-reset */
+ ret = ipath_init_chip(dd, 1);
+ if (ret)
+ ipath_dev_err(dd, "Failed to enable unit %d\n",
+ dd->ipath_unit);
+ else {
+ dd->ipath_flags &= ~IPATH_DISABLED;
+ *dd->ipath_statusp &= ~IPATH_STATUS_ADMIN_DISABLED;
+ }
+ }
+ else if (!(dd->ipath_flags & IPATH_DISABLED)) {
+ dev_info(dev, "Disabling unit %d\n", dd->ipath_unit);
+ ipath_shutdown_device(dd);
+ dd->ipath_flags |= IPATH_DISABLED;
+ *dd->ipath_statusp |= IPATH_STATUS_ADMIN_DISABLED;
+ }
+
+bail:
+ return ret;
+}
+
+static ssize_t store_rx_pol_inv(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct ipath_devdata *dd = dev_get_drvdata(dev);
+ int ret, r;
+ u16 val;
+
+ ret = ipath_parse_ushort(buf, &val);
+ if (ret < 0)
+ goto invalid;
+
+ r = ipath_set_rx_pol_inv(dd, val);
+ if (r < 0) {
+ ret = r;
+ goto bail;
+ }
+
+ goto bail;
+invalid:
+ ipath_dev_err(dd, "attempt to set invalid Rx Polarity invert\n");
+bail:
+ return ret;
+}
+
+static ssize_t store_led_override(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct ipath_devdata *dd = dev_get_drvdata(dev);
+ int ret;
+ u16 val;
+
+ ret = ipath_parse_ushort(buf, &val);
+ if (ret > 0)
+ ipath_set_led_override(dd, val);
+ else
+ ipath_dev_err(dd, "attempt to set invalid LED override\n");
+ return ret;
+}
+
+static ssize_t show_logged_errs(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct ipath_devdata *dd = dev_get_drvdata(dev);
+ int idx, count;
+
+ /* force consistency with actual EEPROM */
+ if (ipath_update_eeprom_log(dd) != 0)
+ return -ENXIO;
+
+ count = 0;
+ for (idx = 0; idx < IPATH_EEP_LOG_CNT; ++idx) {
+ count += scnprintf(buf + count, PAGE_SIZE - count, "%d%c",
+ dd->ipath_eep_st_errs[idx],
+ idx == (IPATH_EEP_LOG_CNT - 1) ? '\n' : ' ');
+ }
+
+ return count;
+}
+
+/*
+ * New sysfs entries to control various IB config. These all turn into
+ * accesses via ipath_f_get/set_ib_cfg.
+ *
+ * Get/Set heartbeat enable. Or of 1=enabled, 2=auto
+ */
+static ssize_t show_hrtbt_enb(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct ipath_devdata *dd = dev_get_drvdata(dev);
+ int ret;
+
+ ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_HRTBT);
+ if (ret >= 0)
+ ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret);
+ return ret;
+}
+
+static ssize_t store_hrtbt_enb(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct ipath_devdata *dd = dev_get_drvdata(dev);
+ int ret, r;
+ u16 val;
+
+ ret = ipath_parse_ushort(buf, &val);
+ if (ret >= 0 && val > 3)
+ ret = -EINVAL;
+ if (ret < 0) {
+ ipath_dev_err(dd, "attempt to set invalid Heartbeat enable\n");
+ goto bail;
+ }
+
+ /*
+ * Set the "intentional" heartbeat enable per either of
+ * "Enable" and "Auto", as these are normally set together.
+ * This bit is consulted when leaving loopback mode,
+ * because entering loopback mode overrides it and automatically
+ * disables heartbeat.
+ */
+ r = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT, val);
+ if (r < 0)
+ ret = r;
+ else if (val == IPATH_IB_HRTBT_OFF)
+ dd->ipath_flags |= IPATH_NO_HRTBT;
+ else
+ dd->ipath_flags &= ~IPATH_NO_HRTBT;
+
+bail:
+ return ret;
+}
+
+/*
+ * Get/Set Link-widths enabled. Or of 1=1x, 2=4x (this is human/IB centric,
+ * _not_ the particular encoding of any given chip)
+ */
+static ssize_t show_lwid_enb(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct ipath_devdata *dd = dev_get_drvdata(dev);
+ int ret;
+
+ ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_LWID_ENB);
+ if (ret >= 0)
+ ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret);
+ return ret;
+}
+
+static ssize_t store_lwid_enb(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct ipath_devdata *dd = dev_get_drvdata(dev);
+ int ret, r;
+ u16 val;
+
+ ret = ipath_parse_ushort(buf, &val);
+ if (ret >= 0 && (val == 0 || val > 3))
+ ret = -EINVAL;
+ if (ret < 0) {
+ ipath_dev_err(dd,
+ "attempt to set invalid Link Width (enable)\n");
+ goto bail;
+ }
+
+ r = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_LWID_ENB, val);
+ if (r < 0)
+ ret = r;
+
+bail:
+ return ret;
+}
+
+/* Get current link width */
+static ssize_t show_lwid(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+
+{
+ struct ipath_devdata *dd = dev_get_drvdata(dev);
+ int ret;
+
+ ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_LWID);
+ if (ret >= 0)
+ ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret);
+ return ret;
+}
+
+/*
+ * Get/Set Link-speeds enabled. Or of 1=SDR 2=DDR.
+ */
+static ssize_t show_spd_enb(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct ipath_devdata *dd = dev_get_drvdata(dev);
+ int ret;
+
+ ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_SPD_ENB);
+ if (ret >= 0)
+ ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret);
+ return ret;
+}
+
+static ssize_t store_spd_enb(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct ipath_devdata *dd = dev_get_drvdata(dev);
+ int ret, r;
+ u16 val;
+
+ ret = ipath_parse_ushort(buf, &val);
+ if (ret >= 0 && (val == 0 || val > (IPATH_IB_SDR | IPATH_IB_DDR)))
+ ret = -EINVAL;
+ if (ret < 0) {
+ ipath_dev_err(dd,
+ "attempt to set invalid Link Speed (enable)\n");
+ goto bail;
+ }
+
+ r = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_SPD_ENB, val);
+ if (r < 0)
+ ret = r;
+
+bail:
+ return ret;
+}
+
+/* Get current link speed */
+static ssize_t show_spd(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct ipath_devdata *dd = dev_get_drvdata(dev);
+ int ret;
+
+ ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_SPD);
+ if (ret >= 0)
+ ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret);
+ return ret;
+}
+
+/*
+ * Get/Set RX polarity-invert enable. 0=no, 1=yes.
+ */
+static ssize_t show_rx_polinv_enb(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct ipath_devdata *dd = dev_get_drvdata(dev);
+ int ret;
+
+ ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_RXPOL_ENB);
+ if (ret >= 0)
+ ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret);
+ return ret;
+}
+
+static ssize_t store_rx_polinv_enb(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct ipath_devdata *dd = dev_get_drvdata(dev);
+ int ret, r;
+ u16 val;
+
+ ret = ipath_parse_ushort(buf, &val);
+ if (ret >= 0 && val > 1) {
+ ipath_dev_err(dd,
+ "attempt to set invalid Rx Polarity (enable)\n");
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ r = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_RXPOL_ENB, val);
+ if (r < 0)
+ ret = r;
+
+bail:
+ return ret;
+}
+
+/*
+ * Get/Set RX lane-reversal enable. 0=no, 1=yes.
+ */
+static ssize_t show_lanerev_enb(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct ipath_devdata *dd = dev_get_drvdata(dev);
+ int ret;
+
+ ret = dd->ipath_f_get_ib_cfg(dd, IPATH_IB_CFG_LREV_ENB);
+ if (ret >= 0)
+ ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret);
+ return ret;
+}
+
+static ssize_t store_lanerev_enb(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct ipath_devdata *dd = dev_get_drvdata(dev);
+ int ret, r;
+ u16 val;
+
+ ret = ipath_parse_ushort(buf, &val);
+ if (ret >= 0 && val > 1) {
+ ret = -EINVAL;
+ ipath_dev_err(dd,
+ "attempt to set invalid Lane reversal (enable)\n");
+ goto bail;
+ }
+
+ r = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_LREV_ENB, val);
+ if (r < 0)
+ ret = r;
+
+bail:
+ return ret;
+}
+
+static DRIVER_ATTR(num_units, S_IRUGO, show_num_units, NULL);
+static DRIVER_ATTR(version, S_IRUGO, show_version, NULL);
+
+static struct attribute *driver_attributes[] = {
+ &driver_attr_num_units.attr,
+ &driver_attr_version.attr,
+ NULL
+};
+
+static struct attribute_group driver_attr_group = {
+ .attrs = driver_attributes
+};
+
+static ssize_t store_tempsense(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct ipath_devdata *dd = dev_get_drvdata(dev);
+ int ret, stat;
+ u16 val;
+
+ ret = ipath_parse_ushort(buf, &val);
+ if (ret <= 0) {
+ ipath_dev_err(dd, "attempt to set invalid tempsense config\n");
+ goto bail;
+ }
+ /* If anything but the highest limit, enable T_CRIT_A "interrupt" */
+ stat = ipath_tempsense_write(dd, 9, (val == 0x7f7f) ? 0x80 : 0);
+ if (stat) {
+ ipath_dev_err(dd, "Unable to set tempsense config\n");
+ ret = -1;
+ goto bail;
+ }
+ stat = ipath_tempsense_write(dd, 0xB, (u8) (val & 0xFF));
+ if (stat) {
+ ipath_dev_err(dd, "Unable to set local Tcrit\n");
+ ret = -1;
+ goto bail;
+ }
+ stat = ipath_tempsense_write(dd, 0xD, (u8) (val >> 8));
+ if (stat) {
+ ipath_dev_err(dd, "Unable to set remote Tcrit\n");
+ ret = -1;
+ goto bail;
+ }
+
+bail:
+ return ret;
+}
+
+/*
+ * dump tempsense regs. in decimal, to ease shell-scripts.
+ */
+static ssize_t show_tempsense(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct ipath_devdata *dd = dev_get_drvdata(dev);
+ int ret;
+ int idx;
+ u8 regvals[8];
+
+ ret = -ENXIO;
+ for (idx = 0; idx < 8; ++idx) {
+ if (idx == 6)
+ continue;
+ ret = ipath_tempsense_read(dd, idx);
+ if (ret < 0)
+ break;
+ regvals[idx] = ret;
+ }
+ if (idx == 8)
+ ret = scnprintf(buf, PAGE_SIZE, "%d %d %02X %02X %d %d\n",
+ *(signed char *)(regvals),
+ *(signed char *)(regvals + 1),
+ regvals[2], regvals[3],
+ *(signed char *)(regvals + 5),
+ *(signed char *)(regvals + 7));
+ return ret;
+}
+
+struct attribute_group *ipath_driver_attr_groups[] = {
+ &driver_attr_group,
+ NULL,
+};
+
+static DEVICE_ATTR(guid, S_IWUSR | S_IRUGO, show_guid, store_guid);
+static DEVICE_ATTR(lmc, S_IWUSR | S_IRUGO, show_lmc, store_lmc);
+static DEVICE_ATTR(lid, S_IWUSR | S_IRUGO, show_lid, store_lid);
+static DEVICE_ATTR(link_state, S_IWUSR, NULL, store_link_state);
+static DEVICE_ATTR(mlid, S_IWUSR | S_IRUGO, show_mlid, store_mlid);
+static DEVICE_ATTR(mtu, S_IWUSR | S_IRUGO, show_mtu, store_mtu);
+static DEVICE_ATTR(enabled, S_IWUSR | S_IRUGO, show_enabled, store_enabled);
+static DEVICE_ATTR(nguid, S_IRUGO, show_nguid, NULL);
+static DEVICE_ATTR(nports, S_IRUGO, show_nports, NULL);
+static DEVICE_ATTR(reset, S_IWUSR, NULL, store_reset);
+static DEVICE_ATTR(serial, S_IRUGO, show_serial, NULL);
+static DEVICE_ATTR(status, S_IRUGO, show_status, NULL);
+static DEVICE_ATTR(status_str, S_IRUGO, show_status_str, NULL);
+static DEVICE_ATTR(boardversion, S_IRUGO, show_boardversion, NULL);
+static DEVICE_ATTR(unit, S_IRUGO, show_unit, NULL);
+static DEVICE_ATTR(rx_pol_inv, S_IWUSR, NULL, store_rx_pol_inv);
+static DEVICE_ATTR(led_override, S_IWUSR, NULL, store_led_override);
+static DEVICE_ATTR(logged_errors, S_IRUGO, show_logged_errs, NULL);
+static DEVICE_ATTR(localbus_info, S_IRUGO, show_localbus_info, NULL);
+static DEVICE_ATTR(jint_max_packets, S_IWUSR | S_IRUGO,
+ show_jint_max_packets, store_jint_max_packets);
+static DEVICE_ATTR(jint_idle_ticks, S_IWUSR | S_IRUGO,
+ show_jint_idle_ticks, store_jint_idle_ticks);
+static DEVICE_ATTR(tempsense, S_IWUSR | S_IRUGO,
+ show_tempsense, store_tempsense);
+
+static struct attribute *dev_attributes[] = {
+ &dev_attr_guid.attr,
+ &dev_attr_lmc.attr,
+ &dev_attr_lid.attr,
+ &dev_attr_link_state.attr,
+ &dev_attr_mlid.attr,
+ &dev_attr_mtu.attr,
+ &dev_attr_nguid.attr,
+ &dev_attr_nports.attr,
+ &dev_attr_serial.attr,
+ &dev_attr_status.attr,
+ &dev_attr_status_str.attr,
+ &dev_attr_boardversion.attr,
+ &dev_attr_unit.attr,
+ &dev_attr_enabled.attr,
+ &dev_attr_rx_pol_inv.attr,
+ &dev_attr_led_override.attr,
+ &dev_attr_logged_errors.attr,
+ &dev_attr_tempsense.attr,
+ &dev_attr_localbus_info.attr,
+ NULL
+};
+
+static struct attribute_group dev_attr_group = {
+ .attrs = dev_attributes
+};
+
+static DEVICE_ATTR(hrtbt_enable, S_IWUSR | S_IRUGO, show_hrtbt_enb,
+ store_hrtbt_enb);
+static DEVICE_ATTR(link_width_enable, S_IWUSR | S_IRUGO, show_lwid_enb,
+ store_lwid_enb);
+static DEVICE_ATTR(link_width, S_IRUGO, show_lwid, NULL);
+static DEVICE_ATTR(link_speed_enable, S_IWUSR | S_IRUGO, show_spd_enb,
+ store_spd_enb);
+static DEVICE_ATTR(link_speed, S_IRUGO, show_spd, NULL);
+static DEVICE_ATTR(rx_pol_inv_enable, S_IWUSR | S_IRUGO, show_rx_polinv_enb,
+ store_rx_polinv_enb);
+static DEVICE_ATTR(rx_lane_rev_enable, S_IWUSR | S_IRUGO, show_lanerev_enb,
+ store_lanerev_enb);
+
+static struct attribute *dev_ibcfg_attributes[] = {
+ &dev_attr_hrtbt_enable.attr,
+ &dev_attr_link_width_enable.attr,
+ &dev_attr_link_width.attr,
+ &dev_attr_link_speed_enable.attr,
+ &dev_attr_link_speed.attr,
+ &dev_attr_rx_pol_inv_enable.attr,
+ &dev_attr_rx_lane_rev_enable.attr,
+ NULL
+};
+
+static struct attribute_group dev_ibcfg_attr_group = {
+ .attrs = dev_ibcfg_attributes
+};
+
+/**
+ * ipath_expose_reset - create a device reset file
+ * @dev: the device structure
+ *
+ * Only expose a file that lets us reset the device after someone
+ * enters diag mode. A device reset is quite likely to crash the
+ * machine entirely, so we don't want to normally make it
+ * available.
+ *
+ * Called with ipath_mutex held.
+ */
+int ipath_expose_reset(struct device *dev)
+{
+ static int exposed;
+ int ret;
+
+ if (!exposed) {
+ ret = device_create_file(dev, &dev_attr_reset);
+ exposed = 1;
+ }
+ else
+ ret = 0;
+
+ return ret;
+}
+
+int ipath_device_create_group(struct device *dev, struct ipath_devdata *dd)
+{
+ int ret;
+
+ ret = sysfs_create_group(&dev->kobj, &dev_attr_group);
+ if (ret)
+ goto bail;
+
+ ret = sysfs_create_group(&dev->kobj, &dev_counter_attr_group);
+ if (ret)
+ goto bail_attrs;
+
+ if (dd->ipath_flags & IPATH_HAS_MULT_IB_SPEED) {
+ ret = device_create_file(dev, &dev_attr_jint_idle_ticks);
+ if (ret)
+ goto bail_counter;
+ ret = device_create_file(dev, &dev_attr_jint_max_packets);
+ if (ret)
+ goto bail_idle;
+
+ ret = sysfs_create_group(&dev->kobj, &dev_ibcfg_attr_group);
+ if (ret)
+ goto bail_max;
+ }
+
+ return 0;
+
+bail_max:
+ device_remove_file(dev, &dev_attr_jint_max_packets);
+bail_idle:
+ device_remove_file(dev, &dev_attr_jint_idle_ticks);
+bail_counter:
+ sysfs_remove_group(&dev->kobj, &dev_counter_attr_group);
+bail_attrs:
+ sysfs_remove_group(&dev->kobj, &dev_attr_group);
+bail:
+ return ret;
+}
+
+void ipath_device_remove_group(struct device *dev, struct ipath_devdata *dd)
+{
+ sysfs_remove_group(&dev->kobj, &dev_counter_attr_group);
+
+ if (dd->ipath_flags & IPATH_HAS_MULT_IB_SPEED) {
+ sysfs_remove_group(&dev->kobj, &dev_ibcfg_attr_group);
+ device_remove_file(dev, &dev_attr_jint_idle_ticks);
+ device_remove_file(dev, &dev_attr_jint_max_packets);
+ }
+
+ sysfs_remove_group(&dev->kobj, &dev_attr_group);
+
+ device_remove_file(dev, &dev_attr_reset);
+}
diff --git a/drivers/infiniband/hw/ipath/ipath_uc.c b/drivers/infiniband/hw/ipath/ipath_uc.c
new file mode 100644
index 0000000..82cc588
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_uc.c
@@ -0,0 +1,547 @@
+/*
+ * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "ipath_verbs.h"
+#include "ipath_kernel.h"
+
+/* cut down ridiculously long IB macro names */
+#define OP(x) IB_OPCODE_UC_##x
+
+/**
+ * ipath_make_uc_req - construct a request packet (SEND, RDMA write)
+ * @qp: a pointer to the QP
+ *
+ * Return 1 if constructed; otherwise, return 0.
+ */
+int ipath_make_uc_req(struct ipath_qp *qp)
+{
+ struct ipath_other_headers *ohdr;
+ struct ipath_swqe *wqe;
+ unsigned long flags;
+ u32 hwords;
+ u32 bth0;
+ u32 len;
+ u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
+ int ret = 0;
+
+ spin_lock_irqsave(&qp->s_lock, flags);
+
+ if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)) {
+ if (!(ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND))
+ goto bail;
+ /* We are in the error state, flush the work request. */
+ if (qp->s_last == qp->s_head)
+ goto bail;
+ /* If DMAs are in progress, we can't flush immediately. */
+ if (atomic_read(&qp->s_dma_busy)) {
+ qp->s_flags |= IPATH_S_WAIT_DMA;
+ goto bail;
+ }
+ wqe = get_swqe_ptr(qp, qp->s_last);
+ ipath_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
+ goto done;
+ }
+
+ ohdr = &qp->s_hdr.u.oth;
+ if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
+ ohdr = &qp->s_hdr.u.l.oth;
+
+ /* header size in 32-bit words LRH+BTH = (8+12)/4. */
+ hwords = 5;
+ bth0 = 1 << 22; /* Set M bit */
+
+ /* Get the next send request. */
+ wqe = get_swqe_ptr(qp, qp->s_cur);
+ qp->s_wqe = NULL;
+ switch (qp->s_state) {
+ default:
+ if (!(ib_ipath_state_ops[qp->state] &
+ IPATH_PROCESS_NEXT_SEND_OK))
+ goto bail;
+ /* Check if send work queue is empty. */
+ if (qp->s_cur == qp->s_head)
+ goto bail;
+ /*
+ * Start a new request.
+ */
+ qp->s_psn = wqe->psn = qp->s_next_psn;
+ qp->s_sge.sge = wqe->sg_list[0];
+ qp->s_sge.sg_list = wqe->sg_list + 1;
+ qp->s_sge.num_sge = wqe->wr.num_sge;
+ qp->s_len = len = wqe->length;
+ switch (wqe->wr.opcode) {
+ case IB_WR_SEND:
+ case IB_WR_SEND_WITH_IMM:
+ if (len > pmtu) {
+ qp->s_state = OP(SEND_FIRST);
+ len = pmtu;
+ break;
+ }
+ if (wqe->wr.opcode == IB_WR_SEND)
+ qp->s_state = OP(SEND_ONLY);
+ else {
+ qp->s_state =
+ OP(SEND_ONLY_WITH_IMMEDIATE);
+ /* Immediate data comes after the BTH */
+ ohdr->u.imm_data = wqe->wr.ex.imm_data;
+ hwords += 1;
+ }
+ if (wqe->wr.send_flags & IB_SEND_SOLICITED)
+ bth0 |= 1 << 23;
+ qp->s_wqe = wqe;
+ if (++qp->s_cur >= qp->s_size)
+ qp->s_cur = 0;
+ break;
+
+ case IB_WR_RDMA_WRITE:
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ ohdr->u.rc.reth.vaddr =
+ cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
+ ohdr->u.rc.reth.rkey =
+ cpu_to_be32(wqe->wr.wr.rdma.rkey);
+ ohdr->u.rc.reth.length = cpu_to_be32(len);
+ hwords += sizeof(struct ib_reth) / 4;
+ if (len > pmtu) {
+ qp->s_state = OP(RDMA_WRITE_FIRST);
+ len = pmtu;
+ break;
+ }
+ if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
+ qp->s_state = OP(RDMA_WRITE_ONLY);
+ else {
+ qp->s_state =
+ OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
+ /* Immediate data comes after the RETH */
+ ohdr->u.rc.imm_data = wqe->wr.ex.imm_data;
+ hwords += 1;
+ if (wqe->wr.send_flags & IB_SEND_SOLICITED)
+ bth0 |= 1 << 23;
+ }
+ qp->s_wqe = wqe;
+ if (++qp->s_cur >= qp->s_size)
+ qp->s_cur = 0;
+ break;
+
+ default:
+ goto bail;
+ }
+ break;
+
+ case OP(SEND_FIRST):
+ qp->s_state = OP(SEND_MIDDLE);
+ /* FALLTHROUGH */
+ case OP(SEND_MIDDLE):
+ len = qp->s_len;
+ if (len > pmtu) {
+ len = pmtu;
+ break;
+ }
+ if (wqe->wr.opcode == IB_WR_SEND)
+ qp->s_state = OP(SEND_LAST);
+ else {
+ qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE);
+ /* Immediate data comes after the BTH */
+ ohdr->u.imm_data = wqe->wr.ex.imm_data;
+ hwords += 1;
+ }
+ if (wqe->wr.send_flags & IB_SEND_SOLICITED)
+ bth0 |= 1 << 23;
+ qp->s_wqe = wqe;
+ if (++qp->s_cur >= qp->s_size)
+ qp->s_cur = 0;
+ break;
+
+ case OP(RDMA_WRITE_FIRST):
+ qp->s_state = OP(RDMA_WRITE_MIDDLE);
+ /* FALLTHROUGH */
+ case OP(RDMA_WRITE_MIDDLE):
+ len = qp->s_len;
+ if (len > pmtu) {
+ len = pmtu;
+ break;
+ }
+ if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
+ qp->s_state = OP(RDMA_WRITE_LAST);
+ else {
+ qp->s_state =
+ OP(RDMA_WRITE_LAST_WITH_IMMEDIATE);
+ /* Immediate data comes after the BTH */
+ ohdr->u.imm_data = wqe->wr.ex.imm_data;
+ hwords += 1;
+ if (wqe->wr.send_flags & IB_SEND_SOLICITED)
+ bth0 |= 1 << 23;
+ }
+ qp->s_wqe = wqe;
+ if (++qp->s_cur >= qp->s_size)
+ qp->s_cur = 0;
+ break;
+ }
+ qp->s_len -= len;
+ qp->s_hdrwords = hwords;
+ qp->s_cur_sge = &qp->s_sge;
+ qp->s_cur_size = len;
+ ipath_make_ruc_header(to_idev(qp->ibqp.device),
+ qp, ohdr, bth0 | (qp->s_state << 24),
+ qp->s_next_psn++ & IPATH_PSN_MASK);
+done:
+ ret = 1;
+ goto unlock;
+
+bail:
+ qp->s_flags &= ~IPATH_S_BUSY;
+unlock:
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ return ret;
+}
+
+/**
+ * ipath_uc_rcv - handle an incoming UC packet
+ * @dev: the device the packet came in on
+ * @hdr: the header of the packet
+ * @has_grh: true if the packet has a GRH
+ * @data: the packet data
+ * @tlen: the length of the packet
+ * @qp: the QP for this packet.
+ *
+ * This is called from ipath_qp_rcv() to process an incoming UC packet
+ * for the given QP.
+ * Called at interrupt level.
+ */
+void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
+ int has_grh, void *data, u32 tlen, struct ipath_qp *qp)
+{
+ struct ipath_other_headers *ohdr;
+ int opcode;
+ u32 hdrsize;
+ u32 psn;
+ u32 pad;
+ struct ib_wc wc;
+ u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
+ struct ib_reth *reth;
+ int header_in_data;
+
+ /* Validate the SLID. See Ch. 9.6.1.5 */
+ if (unlikely(be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid))
+ goto done;
+
+ /* Check for GRH */
+ if (!has_grh) {
+ ohdr = &hdr->u.oth;
+ hdrsize = 8 + 12; /* LRH + BTH */
+ psn = be32_to_cpu(ohdr->bth[2]);
+ header_in_data = 0;
+ } else {
+ ohdr = &hdr->u.l.oth;
+ hdrsize = 8 + 40 + 12; /* LRH + GRH + BTH */
+ /*
+ * The header with GRH is 60 bytes and the
+ * core driver sets the eager header buffer
+ * size to 56 bytes so the last 4 bytes of
+ * the BTH header (PSN) is in the data buffer.
+ */
+ header_in_data = dev->dd->ipath_rcvhdrentsize == 16;
+ if (header_in_data) {
+ psn = be32_to_cpu(((__be32 *) data)[0]);
+ data += sizeof(__be32);
+ } else
+ psn = be32_to_cpu(ohdr->bth[2]);
+ }
+ /*
+ * The opcode is in the low byte when its in network order
+ * (top byte when in host order).
+ */
+ opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
+
+ memset(&wc, 0, sizeof wc);
+
+ /* Compare the PSN verses the expected PSN. */
+ if (unlikely(ipath_cmp24(psn, qp->r_psn) != 0)) {
+ /*
+ * Handle a sequence error.
+ * Silently drop any current message.
+ */
+ qp->r_psn = psn;
+ inv:
+ qp->r_state = OP(SEND_LAST);
+ switch (opcode) {
+ case OP(SEND_FIRST):
+ case OP(SEND_ONLY):
+ case OP(SEND_ONLY_WITH_IMMEDIATE):
+ goto send_first;
+
+ case OP(RDMA_WRITE_FIRST):
+ case OP(RDMA_WRITE_ONLY):
+ case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE):
+ goto rdma_first;
+
+ default:
+ dev->n_pkt_drops++;
+ goto done;
+ }
+ }
+
+ /* Check for opcode sequence errors. */
+ switch (qp->r_state) {
+ case OP(SEND_FIRST):
+ case OP(SEND_MIDDLE):
+ if (opcode == OP(SEND_MIDDLE) ||
+ opcode == OP(SEND_LAST) ||
+ opcode == OP(SEND_LAST_WITH_IMMEDIATE))
+ break;
+ goto inv;
+
+ case OP(RDMA_WRITE_FIRST):
+ case OP(RDMA_WRITE_MIDDLE):
+ if (opcode == OP(RDMA_WRITE_MIDDLE) ||
+ opcode == OP(RDMA_WRITE_LAST) ||
+ opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))
+ break;
+ goto inv;
+
+ default:
+ if (opcode == OP(SEND_FIRST) ||
+ opcode == OP(SEND_ONLY) ||
+ opcode == OP(SEND_ONLY_WITH_IMMEDIATE) ||
+ opcode == OP(RDMA_WRITE_FIRST) ||
+ opcode == OP(RDMA_WRITE_ONLY) ||
+ opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE))
+ break;
+ goto inv;
+ }
+
+ /* OK, process the packet. */
+ switch (opcode) {
+ case OP(SEND_FIRST):
+ case OP(SEND_ONLY):
+ case OP(SEND_ONLY_WITH_IMMEDIATE):
+ send_first:
+ if (qp->r_flags & IPATH_R_REUSE_SGE) {
+ qp->r_flags &= ~IPATH_R_REUSE_SGE;
+ qp->r_sge = qp->s_rdma_read_sge;
+ } else if (!ipath_get_rwqe(qp, 0)) {
+ dev->n_pkt_drops++;
+ goto done;
+ }
+ /* Save the WQE so we can reuse it in case of an error. */
+ qp->s_rdma_read_sge = qp->r_sge;
+ qp->r_rcv_len = 0;
+ if (opcode == OP(SEND_ONLY))
+ goto send_last;
+ else if (opcode == OP(SEND_ONLY_WITH_IMMEDIATE))
+ goto send_last_imm;
+ /* FALLTHROUGH */
+ case OP(SEND_MIDDLE):
+ /* Check for invalid length PMTU or posted rwqe len. */
+ if (unlikely(tlen != (hdrsize + pmtu + 4))) {
+ qp->r_flags |= IPATH_R_REUSE_SGE;
+ dev->n_pkt_drops++;
+ goto done;
+ }
+ qp->r_rcv_len += pmtu;
+ if (unlikely(qp->r_rcv_len > qp->r_len)) {
+ qp->r_flags |= IPATH_R_REUSE_SGE;
+ dev->n_pkt_drops++;
+ goto done;
+ }
+ ipath_copy_sge(&qp->r_sge, data, pmtu);
+ break;
+
+ case OP(SEND_LAST_WITH_IMMEDIATE):
+ send_last_imm:
+ if (header_in_data) {
+ wc.ex.imm_data = *(__be32 *) data;
+ data += sizeof(__be32);
+ } else {
+ /* Immediate data comes after BTH */
+ wc.ex.imm_data = ohdr->u.imm_data;
+ }
+ hdrsize += 4;
+ wc.wc_flags = IB_WC_WITH_IMM;
+ /* FALLTHROUGH */
+ case OP(SEND_LAST):
+ send_last:
+ /* Get the number of bytes the message was padded by. */
+ pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
+ /* Check for invalid length. */
+ /* XXX LAST len should be >= 1 */
+ if (unlikely(tlen < (hdrsize + pad + 4))) {
+ qp->r_flags |= IPATH_R_REUSE_SGE;
+ dev->n_pkt_drops++;
+ goto done;
+ }
+ /* Don't count the CRC. */
+ tlen -= (hdrsize + pad + 4);
+ wc.byte_len = tlen + qp->r_rcv_len;
+ if (unlikely(wc.byte_len > qp->r_len)) {
+ qp->r_flags |= IPATH_R_REUSE_SGE;
+ dev->n_pkt_drops++;
+ goto done;
+ }
+ wc.opcode = IB_WC_RECV;
+ last_imm:
+ ipath_copy_sge(&qp->r_sge, data, tlen);
+ wc.wr_id = qp->r_wr_id;
+ wc.status = IB_WC_SUCCESS;
+ wc.qp = &qp->ibqp;
+ wc.src_qp = qp->remote_qpn;
+ wc.slid = qp->remote_ah_attr.dlid;
+ wc.sl = qp->remote_ah_attr.sl;
+ /* Signal completion event if the solicited bit is set. */
+ ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
+ (ohdr->bth[0] &
+ __constant_cpu_to_be32(1 << 23)) != 0);
+ break;
+
+ case OP(RDMA_WRITE_FIRST):
+ case OP(RDMA_WRITE_ONLY):
+ case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE): /* consume RWQE */
+ rdma_first:
+ /* RETH comes after BTH */
+ if (!header_in_data)
+ reth = &ohdr->u.rc.reth;
+ else {
+ reth = (struct ib_reth *)data;
+ data += sizeof(*reth);
+ }
+ hdrsize += sizeof(*reth);
+ qp->r_len = be32_to_cpu(reth->length);
+ qp->r_rcv_len = 0;
+ if (qp->r_len != 0) {
+ u32 rkey = be32_to_cpu(reth->rkey);
+ u64 vaddr = be64_to_cpu(reth->vaddr);
+ int ok;
+
+ /* Check rkey */
+ ok = ipath_rkey_ok(qp, &qp->r_sge, qp->r_len,
+ vaddr, rkey,
+ IB_ACCESS_REMOTE_WRITE);
+ if (unlikely(!ok)) {
+ dev->n_pkt_drops++;
+ goto done;
+ }
+ } else {
+ qp->r_sge.sg_list = NULL;
+ qp->r_sge.sge.mr = NULL;
+ qp->r_sge.sge.vaddr = NULL;
+ qp->r_sge.sge.length = 0;
+ qp->r_sge.sge.sge_length = 0;
+ }
+ if (unlikely(!(qp->qp_access_flags &
+ IB_ACCESS_REMOTE_WRITE))) {
+ dev->n_pkt_drops++;
+ goto done;
+ }
+ if (opcode == OP(RDMA_WRITE_ONLY))
+ goto rdma_last;
+ else if (opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE))
+ goto rdma_last_imm;
+ /* FALLTHROUGH */
+ case OP(RDMA_WRITE_MIDDLE):
+ /* Check for invalid length PMTU or posted rwqe len. */
+ if (unlikely(tlen != (hdrsize + pmtu + 4))) {
+ dev->n_pkt_drops++;
+ goto done;
+ }
+ qp->r_rcv_len += pmtu;
+ if (unlikely(qp->r_rcv_len > qp->r_len)) {
+ dev->n_pkt_drops++;
+ goto done;
+ }
+ ipath_copy_sge(&qp->r_sge, data, pmtu);
+ break;
+
+ case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
+ rdma_last_imm:
+ if (header_in_data) {
+ wc.ex.imm_data = *(__be32 *) data;
+ data += sizeof(__be32);
+ } else {
+ /* Immediate data comes after BTH */
+ wc.ex.imm_data = ohdr->u.imm_data;
+ }
+ hdrsize += 4;
+ wc.wc_flags = IB_WC_WITH_IMM;
+
+ /* Get the number of bytes the message was padded by. */
+ pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
+ /* Check for invalid length. */
+ /* XXX LAST len should be >= 1 */
+ if (unlikely(tlen < (hdrsize + pad + 4))) {
+ dev->n_pkt_drops++;
+ goto done;
+ }
+ /* Don't count the CRC. */
+ tlen -= (hdrsize + pad + 4);
+ if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) {
+ dev->n_pkt_drops++;
+ goto done;
+ }
+ if (qp->r_flags & IPATH_R_REUSE_SGE)
+ qp->r_flags &= ~IPATH_R_REUSE_SGE;
+ else if (!ipath_get_rwqe(qp, 1)) {
+ dev->n_pkt_drops++;
+ goto done;
+ }
+ wc.byte_len = qp->r_len;
+ wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
+ goto last_imm;
+
+ case OP(RDMA_WRITE_LAST):
+ rdma_last:
+ /* Get the number of bytes the message was padded by. */
+ pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
+ /* Check for invalid length. */
+ /* XXX LAST len should be >= 1 */
+ if (unlikely(tlen < (hdrsize + pad + 4))) {
+ dev->n_pkt_drops++;
+ goto done;
+ }
+ /* Don't count the CRC. */
+ tlen -= (hdrsize + pad + 4);
+ if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) {
+ dev->n_pkt_drops++;
+ goto done;
+ }
+ ipath_copy_sge(&qp->r_sge, data, tlen);
+ break;
+
+ default:
+ /* Drop packet for unknown opcodes. */
+ dev->n_pkt_drops++;
+ goto done;
+ }
+ qp->r_psn++;
+ qp->r_state = opcode;
+done:
+ return;
+}
diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c
new file mode 100644
index 0000000..729446f
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_ud.c
@@ -0,0 +1,596 @@
+/*
+ * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <rdma/ib_smi.h>
+
+#include "ipath_verbs.h"
+#include "ipath_kernel.h"
+
+/**
+ * ipath_ud_loopback - handle send on loopback QPs
+ * @sqp: the sending QP
+ * @swqe: the send work request
+ *
+ * This is called from ipath_make_ud_req() to forward a WQE addressed
+ * to the same HCA.
+ * Note that the receive interrupt handler may be calling ipath_ud_rcv()
+ * while this is being called.
+ */
+static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)
+{
+ struct ipath_ibdev *dev = to_idev(sqp->ibqp.device);
+ struct ipath_qp *qp;
+ struct ib_ah_attr *ah_attr;
+ unsigned long flags;
+ struct ipath_rq *rq;
+ struct ipath_srq *srq;
+ struct ipath_sge_state rsge;
+ struct ipath_sge *sge;
+ struct ipath_rwq *wq;
+ struct ipath_rwqe *wqe;
+ void (*handler)(struct ib_event *, void *);
+ struct ib_wc wc;
+ u32 tail;
+ u32 rlen;
+ u32 length;
+
+ qp = ipath_lookup_qpn(&dev->qp_table, swqe->wr.wr.ud.remote_qpn);
+ if (!qp || !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) {
+ dev->n_pkt_drops++;
+ goto done;
+ }
+
+ rsge.sg_list = NULL;
+
+ /*
+ * Check that the qkey matches (except for QP0, see 9.6.1.4.1).
+ * Qkeys with the high order bit set mean use the
+ * qkey from the QP context instead of the WR (see 10.2.5).
+ */
+ if (unlikely(qp->ibqp.qp_num &&
+ ((int) swqe->wr.wr.ud.remote_qkey < 0 ?
+ sqp->qkey : swqe->wr.wr.ud.remote_qkey) != qp->qkey)) {
+ /* XXX OK to lose a count once in a while. */
+ dev->qkey_violations++;
+ dev->n_pkt_drops++;
+ goto drop;
+ }
+
+ /*
+ * A GRH is expected to preceed the data even if not
+ * present on the wire.
+ */
+ length = swqe->length;
+ memset(&wc, 0, sizeof wc);
+ wc.byte_len = length + sizeof(struct ib_grh);
+
+ if (swqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
+ wc.wc_flags = IB_WC_WITH_IMM;
+ wc.ex.imm_data = swqe->wr.ex.imm_data;
+ }
+
+ /*
+ * This would be a lot simpler if we could call ipath_get_rwqe()
+ * but that uses state that the receive interrupt handler uses
+ * so we would need to lock out receive interrupts while doing
+ * local loopback.
+ */
+ if (qp->ibqp.srq) {
+ srq = to_isrq(qp->ibqp.srq);
+ handler = srq->ibsrq.event_handler;
+ rq = &srq->rq;
+ } else {
+ srq = NULL;
+ handler = NULL;
+ rq = &qp->r_rq;
+ }
+
+ if (rq->max_sge > 1) {
+ /*
+ * XXX We could use GFP_KERNEL if ipath_do_send()
+ * was always called from the tasklet instead of
+ * from ipath_post_send().
+ */
+ rsge.sg_list = kmalloc((rq->max_sge - 1) *
+ sizeof(struct ipath_sge),
+ GFP_ATOMIC);
+ if (!rsge.sg_list) {
+ dev->n_pkt_drops++;
+ goto drop;
+ }
+ }
+
+ /*
+ * Get the next work request entry to find where to put the data.
+ * Note that it is safe to drop the lock after changing rq->tail
+ * since ipath_post_receive() won't fill the empty slot.
+ */
+ spin_lock_irqsave(&rq->lock, flags);
+ wq = rq->wq;
+ tail = wq->tail;
+ /* Validate tail before using it since it is user writable. */
+ if (tail >= rq->size)
+ tail = 0;
+ if (unlikely(tail == wq->head)) {
+ spin_unlock_irqrestore(&rq->lock, flags);
+ dev->n_pkt_drops++;
+ goto drop;
+ }
+ wqe = get_rwqe_ptr(rq, tail);
+ if (!ipath_init_sge(qp, wqe, &rlen, &rsge)) {
+ spin_unlock_irqrestore(&rq->lock, flags);
+ dev->n_pkt_drops++;
+ goto drop;
+ }
+ /* Silently drop packets which are too big. */
+ if (wc.byte_len > rlen) {
+ spin_unlock_irqrestore(&rq->lock, flags);
+ dev->n_pkt_drops++;
+ goto drop;
+ }
+ if (++tail >= rq->size)
+ tail = 0;
+ wq->tail = tail;
+ wc.wr_id = wqe->wr_id;
+ if (handler) {
+ u32 n;
+
+ /*
+ * validate head pointer value and compute
+ * the number of remaining WQEs.
+ */
+ n = wq->head;
+ if (n >= rq->size)
+ n = 0;
+ if (n < tail)
+ n += rq->size - tail;
+ else
+ n -= tail;
+ if (n < srq->limit) {
+ struct ib_event ev;
+
+ srq->limit = 0;
+ spin_unlock_irqrestore(&rq->lock, flags);
+ ev.device = qp->ibqp.device;
+ ev.element.srq = qp->ibqp.srq;
+ ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
+ handler(&ev, srq->ibsrq.srq_context);
+ } else
+ spin_unlock_irqrestore(&rq->lock, flags);
+ } else
+ spin_unlock_irqrestore(&rq->lock, flags);
+
+ ah_attr = &to_iah(swqe->wr.wr.ud.ah)->attr;
+ if (ah_attr->ah_flags & IB_AH_GRH) {
+ ipath_copy_sge(&rsge, &ah_attr->grh, sizeof(struct ib_grh));
+ wc.wc_flags |= IB_WC_GRH;
+ } else
+ ipath_skip_sge(&rsge, sizeof(struct ib_grh));
+ sge = swqe->sg_list;
+ while (length) {
+ u32 len = sge->length;
+
+ if (len > length)
+ len = length;
+ if (len > sge->sge_length)
+ len = sge->sge_length;
+ BUG_ON(len == 0);
+ ipath_copy_sge(&rsge, sge->vaddr, len);
+ sge->vaddr += len;
+ sge->length -= len;
+ sge->sge_length -= len;
+ if (sge->sge_length == 0) {
+ if (--swqe->wr.num_sge)
+ sge++;
+ } else if (sge->length == 0 && sge->mr != NULL) {
+ if (++sge->n >= IPATH_SEGSZ) {
+ if (++sge->m >= sge->mr->mapsz)
+ break;
+ sge->n = 0;
+ }
+ sge->vaddr =
+ sge->mr->map[sge->m]->segs[sge->n].vaddr;
+ sge->length =
+ sge->mr->map[sge->m]->segs[sge->n].length;
+ }
+ length -= len;
+ }
+ wc.status = IB_WC_SUCCESS;
+ wc.opcode = IB_WC_RECV;
+ wc.qp = &qp->ibqp;
+ wc.src_qp = sqp->ibqp.qp_num;
+ /* XXX do we know which pkey matched? Only needed for GSI. */
+ wc.pkey_index = 0;
+ wc.slid = dev->dd->ipath_lid |
+ (ah_attr->src_path_bits &
+ ((1 << dev->dd->ipath_lmc) - 1));
+ wc.sl = ah_attr->sl;
+ wc.dlid_path_bits =
+ ah_attr->dlid & ((1 << dev->dd->ipath_lmc) - 1);
+ wc.port_num = 1;
+ /* Signal completion event if the solicited bit is set. */
+ ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
+ swqe->wr.send_flags & IB_SEND_SOLICITED);
+drop:
+ kfree(rsge.sg_list);
+ if (atomic_dec_and_test(&qp->refcount))
+ wake_up(&qp->wait);
+done:;
+}
+
+/**
+ * ipath_make_ud_req - construct a UD request packet
+ * @qp: the QP
+ *
+ * Return 1 if constructed; otherwise, return 0.
+ */
+int ipath_make_ud_req(struct ipath_qp *qp)
+{
+ struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
+ struct ipath_other_headers *ohdr;
+ struct ib_ah_attr *ah_attr;
+ struct ipath_swqe *wqe;
+ unsigned long flags;
+ u32 nwords;
+ u32 extra_bytes;
+ u32 bth0;
+ u16 lrh0;
+ u16 lid;
+ int ret = 0;
+ int next_cur;
+
+ spin_lock_irqsave(&qp->s_lock, flags);
+
+ if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_NEXT_SEND_OK)) {
+ if (!(ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND))
+ goto bail;
+ /* We are in the error state, flush the work request. */
+ if (qp->s_last == qp->s_head)
+ goto bail;
+ /* If DMAs are in progress, we can't flush immediately. */
+ if (atomic_read(&qp->s_dma_busy)) {
+ qp->s_flags |= IPATH_S_WAIT_DMA;
+ goto bail;
+ }
+ wqe = get_swqe_ptr(qp, qp->s_last);
+ ipath_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
+ goto done;
+ }
+
+ if (qp->s_cur == qp->s_head)
+ goto bail;
+
+ wqe = get_swqe_ptr(qp, qp->s_cur);
+ next_cur = qp->s_cur + 1;
+ if (next_cur >= qp->s_size)
+ next_cur = 0;
+
+ /* Construct the header. */
+ ah_attr = &to_iah(wqe->wr.wr.ud.ah)->attr;
+ if (ah_attr->dlid >= IPATH_MULTICAST_LID_BASE) {
+ if (ah_attr->dlid != IPATH_PERMISSIVE_LID)
+ dev->n_multicast_xmit++;
+ else
+ dev->n_unicast_xmit++;
+ } else {
+ dev->n_unicast_xmit++;
+ lid = ah_attr->dlid & ~((1 << dev->dd->ipath_lmc) - 1);
+ if (unlikely(lid == dev->dd->ipath_lid)) {
+ /*
+ * If DMAs are in progress, we can't generate
+ * a completion for the loopback packet since
+ * it would be out of order.
+ * XXX Instead of waiting, we could queue a
+ * zero length descriptor so we get a callback.
+ */
+ if (atomic_read(&qp->s_dma_busy)) {
+ qp->s_flags |= IPATH_S_WAIT_DMA;
+ goto bail;
+ }
+ qp->s_cur = next_cur;
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ ipath_ud_loopback(qp, wqe);
+ spin_lock_irqsave(&qp->s_lock, flags);
+ ipath_send_complete(qp, wqe, IB_WC_SUCCESS);
+ goto done;
+ }
+ }
+
+ qp->s_cur = next_cur;
+ extra_bytes = -wqe->length & 3;
+ nwords = (wqe->length + extra_bytes) >> 2;
+
+ /* header size in 32-bit words LRH+BTH+DETH = (8+12+8)/4. */
+ qp->s_hdrwords = 7;
+ qp->s_cur_size = wqe->length;
+ qp->s_cur_sge = &qp->s_sge;
+ qp->s_dmult = ah_attr->static_rate;
+ qp->s_wqe = wqe;
+ qp->s_sge.sge = wqe->sg_list[0];
+ qp->s_sge.sg_list = wqe->sg_list + 1;
+ qp->s_sge.num_sge = wqe->wr.num_sge;
+
+ if (ah_attr->ah_flags & IB_AH_GRH) {
+ /* Header size in 32-bit words. */
+ qp->s_hdrwords += ipath_make_grh(dev, &qp->s_hdr.u.l.grh,
+ &ah_attr->grh,
+ qp->s_hdrwords, nwords);
+ lrh0 = IPATH_LRH_GRH;
+ ohdr = &qp->s_hdr.u.l.oth;
+ /*
+ * Don't worry about sending to locally attached multicast
+ * QPs. It is unspecified by the spec. what happens.
+ */
+ } else {
+ /* Header size in 32-bit words. */
+ lrh0 = IPATH_LRH_BTH;
+ ohdr = &qp->s_hdr.u.oth;
+ }
+ if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
+ qp->s_hdrwords++;
+ ohdr->u.ud.imm_data = wqe->wr.ex.imm_data;
+ bth0 = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE << 24;
+ } else
+ bth0 = IB_OPCODE_UD_SEND_ONLY << 24;
+ lrh0 |= ah_attr->sl << 4;
+ if (qp->ibqp.qp_type == IB_QPT_SMI)
+ lrh0 |= 0xF000; /* Set VL (see ch. 13.5.3.1) */
+ qp->s_hdr.lrh[0] = cpu_to_be16(lrh0);
+ qp->s_hdr.lrh[1] = cpu_to_be16(ah_attr->dlid); /* DEST LID */
+ qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords +
+ SIZE_OF_CRC);
+ lid = dev->dd->ipath_lid;
+ if (lid) {
+ lid |= ah_attr->src_path_bits &
+ ((1 << dev->dd->ipath_lmc) - 1);
+ qp->s_hdr.lrh[3] = cpu_to_be16(lid);
+ } else
+ qp->s_hdr.lrh[3] = IB_LID_PERMISSIVE;
+ if (wqe->wr.send_flags & IB_SEND_SOLICITED)
+ bth0 |= 1 << 23;
+ bth0 |= extra_bytes << 20;
+ bth0 |= qp->ibqp.qp_type == IB_QPT_SMI ? IPATH_DEFAULT_P_KEY :
+ ipath_get_pkey(dev->dd, qp->s_pkey_index);
+ ohdr->bth[0] = cpu_to_be32(bth0);
+ /*
+ * Use the multicast QP if the destination LID is a multicast LID.
+ */
+ ohdr->bth[1] = ah_attr->dlid >= IPATH_MULTICAST_LID_BASE &&
+ ah_attr->dlid != IPATH_PERMISSIVE_LID ?
+ __constant_cpu_to_be32(IPATH_MULTICAST_QPN) :
+ cpu_to_be32(wqe->wr.wr.ud.remote_qpn);
+ ohdr->bth[2] = cpu_to_be32(qp->s_next_psn++ & IPATH_PSN_MASK);
+ /*
+ * Qkeys with the high order bit set mean use the
+ * qkey from the QP context instead of the WR (see 10.2.5).
+ */
+ ohdr->u.ud.deth[0] = cpu_to_be32((int)wqe->wr.wr.ud.remote_qkey < 0 ?
+ qp->qkey : wqe->wr.wr.ud.remote_qkey);
+ ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num);
+
+done:
+ ret = 1;
+ goto unlock;
+
+bail:
+ qp->s_flags &= ~IPATH_S_BUSY;
+unlock:
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ return ret;
+}
+
+/**
+ * ipath_ud_rcv - receive an incoming UD packet
+ * @dev: the device the packet came in on
+ * @hdr: the packet header
+ * @has_grh: true if the packet has a GRH
+ * @data: the packet data
+ * @tlen: the packet length
+ * @qp: the QP the packet came on
+ *
+ * This is called from ipath_qp_rcv() to process an incoming UD packet
+ * for the given QP.
+ * Called at interrupt level.
+ */
+void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
+ int has_grh, void *data, u32 tlen, struct ipath_qp *qp)
+{
+ struct ipath_other_headers *ohdr;
+ int opcode;
+ u32 hdrsize;
+ u32 pad;
+ struct ib_wc wc;
+ u32 qkey;
+ u32 src_qp;
+ u16 dlid;
+ int header_in_data;
+
+ /* Check for GRH */
+ if (!has_grh) {
+ ohdr = &hdr->u.oth;
+ hdrsize = 8 + 12 + 8; /* LRH + BTH + DETH */
+ qkey = be32_to_cpu(ohdr->u.ud.deth[0]);
+ src_qp = be32_to_cpu(ohdr->u.ud.deth[1]);
+ header_in_data = 0;
+ } else {
+ ohdr = &hdr->u.l.oth;
+ hdrsize = 8 + 40 + 12 + 8; /* LRH + GRH + BTH + DETH */
+ /*
+ * The header with GRH is 68 bytes and the core driver sets
+ * the eager header buffer size to 56 bytes so the last 12
+ * bytes of the IB header is in the data buffer.
+ */
+ header_in_data = dev->dd->ipath_rcvhdrentsize == 16;
+ if (header_in_data) {
+ qkey = be32_to_cpu(((__be32 *) data)[1]);
+ src_qp = be32_to_cpu(((__be32 *) data)[2]);
+ data += 12;
+ } else {
+ qkey = be32_to_cpu(ohdr->u.ud.deth[0]);
+ src_qp = be32_to_cpu(ohdr->u.ud.deth[1]);
+ }
+ }
+ src_qp &= IPATH_QPN_MASK;
+
+ /*
+ * Check that the permissive LID is only used on QP0
+ * and the QKEY matches (see 9.6.1.4.1 and 9.6.1.5.1).
+ */
+ if (qp->ibqp.qp_num) {
+ if (unlikely(hdr->lrh[1] == IB_LID_PERMISSIVE ||
+ hdr->lrh[3] == IB_LID_PERMISSIVE)) {
+ dev->n_pkt_drops++;
+ goto bail;
+ }
+ if (unlikely(qkey != qp->qkey)) {
+ /* XXX OK to lose a count once in a while. */
+ dev->qkey_violations++;
+ dev->n_pkt_drops++;
+ goto bail;
+ }
+ } else if (hdr->lrh[1] == IB_LID_PERMISSIVE ||
+ hdr->lrh[3] == IB_LID_PERMISSIVE) {
+ struct ib_smp *smp = (struct ib_smp *) data;
+
+ if (smp->mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
+ dev->n_pkt_drops++;
+ goto bail;
+ }
+ }
+
+ /*
+ * The opcode is in the low byte when its in network order
+ * (top byte when in host order).
+ */
+ opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
+ if (qp->ibqp.qp_num > 1 &&
+ opcode == IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE) {
+ if (header_in_data) {
+ wc.ex.imm_data = *(__be32 *) data;
+ data += sizeof(__be32);
+ } else
+ wc.ex.imm_data = ohdr->u.ud.imm_data;
+ wc.wc_flags = IB_WC_WITH_IMM;
+ hdrsize += sizeof(u32);
+ } else if (opcode == IB_OPCODE_UD_SEND_ONLY) {
+ wc.ex.imm_data = 0;
+ wc.wc_flags = 0;
+ } else {
+ dev->n_pkt_drops++;
+ goto bail;
+ }
+
+ /* Get the number of bytes the message was padded by. */
+ pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
+ if (unlikely(tlen < (hdrsize + pad + 4))) {
+ /* Drop incomplete packets. */
+ dev->n_pkt_drops++;
+ goto bail;
+ }
+ tlen -= hdrsize + pad + 4;
+
+ /* Drop invalid MAD packets (see 13.5.3.1). */
+ if (unlikely((qp->ibqp.qp_num == 0 &&
+ (tlen != 256 ||
+ (be16_to_cpu(hdr->lrh[0]) >> 12) != 15)) ||
+ (qp->ibqp.qp_num == 1 &&
+ (tlen != 256 ||
+ (be16_to_cpu(hdr->lrh[0]) >> 12) == 15)))) {
+ dev->n_pkt_drops++;
+ goto bail;
+ }
+
+ /*
+ * A GRH is expected to preceed the data even if not
+ * present on the wire.
+ */
+ wc.byte_len = tlen + sizeof(struct ib_grh);
+
+ /*
+ * Get the next work request entry to find where to put the data.
+ */
+ if (qp->r_flags & IPATH_R_REUSE_SGE)
+ qp->r_flags &= ~IPATH_R_REUSE_SGE;
+ else if (!ipath_get_rwqe(qp, 0)) {
+ /*
+ * Count VL15 packets dropped due to no receive buffer.
+ * Otherwise, count them as buffer overruns since usually,
+ * the HW will be able to receive packets even if there are
+ * no QPs with posted receive buffers.
+ */
+ if (qp->ibqp.qp_num == 0)
+ dev->n_vl15_dropped++;
+ else
+ dev->rcv_errors++;
+ goto bail;
+ }
+ /* Silently drop packets which are too big. */
+ if (wc.byte_len > qp->r_len) {
+ qp->r_flags |= IPATH_R_REUSE_SGE;
+ dev->n_pkt_drops++;
+ goto bail;
+ }
+ if (has_grh) {
+ ipath_copy_sge(&qp->r_sge, &hdr->u.l.grh,
+ sizeof(struct ib_grh));
+ wc.wc_flags |= IB_WC_GRH;
+ } else
+ ipath_skip_sge(&qp->r_sge, sizeof(struct ib_grh));
+ ipath_copy_sge(&qp->r_sge, data,
+ wc.byte_len - sizeof(struct ib_grh));
+ if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags))
+ goto bail;
+ wc.wr_id = qp->r_wr_id;
+ wc.status = IB_WC_SUCCESS;
+ wc.opcode = IB_WC_RECV;
+ wc.vendor_err = 0;
+ wc.qp = &qp->ibqp;
+ wc.src_qp = src_qp;
+ /* XXX do we know which pkey matched? Only needed for GSI. */
+ wc.pkey_index = 0;
+ wc.slid = be16_to_cpu(hdr->lrh[3]);
+ wc.sl = (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF;
+ dlid = be16_to_cpu(hdr->lrh[1]);
+ /*
+ * Save the LMC lower bits if the destination LID is a unicast LID.
+ */
+ wc.dlid_path_bits = dlid >= IPATH_MULTICAST_LID_BASE ? 0 :
+ dlid & ((1 << dev->dd->ipath_lmc) - 1);
+ wc.port_num = 1;
+ /* Signal completion event if the solicited bit is set. */
+ ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
+ (ohdr->bth[0] &
+ __constant_cpu_to_be32(1 << 23)) != 0);
+
+bail:;
+}
diff --git a/drivers/infiniband/hw/ipath/ipath_user_pages.c b/drivers/infiniband/hw/ipath/ipath_user_pages.c
new file mode 100644
index 0000000..0190edc
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_user_pages.c
@@ -0,0 +1,228 @@
+/*
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/mm.h>
+#include <linux/device.h>
+
+#include "ipath_kernel.h"
+
+static void __ipath_release_user_pages(struct page **p, size_t num_pages,
+ int dirty)
+{
+ size_t i;
+
+ for (i = 0; i < num_pages; i++) {
+ ipath_cdbg(MM, "%lu/%lu put_page %p\n", (unsigned long) i,
+ (unsigned long) num_pages, p[i]);
+ if (dirty)
+ set_page_dirty_lock(p[i]);
+ put_page(p[i]);
+ }
+}
+
+/* call with current->mm->mmap_sem held */
+static int __get_user_pages(unsigned long start_page, size_t num_pages,
+ struct page **p, struct vm_area_struct **vma)
+{
+ unsigned long lock_limit;
+ size_t got;
+ int ret;
+
+ lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >>
+ PAGE_SHIFT;
+
+ if (num_pages > lock_limit) {
+ ret = -ENOMEM;
+ goto bail;
+ }
+
+ ipath_cdbg(VERBOSE, "pin %lx pages from vaddr %lx\n",
+ (unsigned long) num_pages, start_page);
+
+ for (got = 0; got < num_pages; got += ret) {
+ ret = get_user_pages(current, current->mm,
+ start_page + got * PAGE_SIZE,
+ num_pages - got, 1, 1,
+ p + got, vma);
+ if (ret < 0)
+ goto bail_release;
+ }
+
+ current->mm->locked_vm += num_pages;
+
+ ret = 0;
+ goto bail;
+
+bail_release:
+ __ipath_release_user_pages(p, got, 0);
+bail:
+ return ret;
+}
+
+/**
+ * ipath_map_page - a safety wrapper around pci_map_page()
+ *
+ * A dma_addr of all 0's is interpreted by the chip as "disabled".
+ * Unfortunately, it can also be a valid dma_addr returned on some
+ * architectures.
+ *
+ * The powerpc iommu assigns dma_addrs in ascending order, so we don't
+ * have to bother with retries or mapping a dummy page to insure we
+ * don't just get the same mapping again.
+ *
+ * I'm sure we won't be so lucky with other iommu's, so FIXME.
+ */
+dma_addr_t ipath_map_page(struct pci_dev *hwdev, struct page *page,
+ unsigned long offset, size_t size, int direction)
+{
+ dma_addr_t phys;
+
+ phys = pci_map_page(hwdev, page, offset, size, direction);
+
+ if (phys == 0) {
+ pci_unmap_page(hwdev, phys, size, direction);
+ phys = pci_map_page(hwdev, page, offset, size, direction);
+ /*
+ * FIXME: If we get 0 again, we should keep this page,
+ * map another, then free the 0 page.
+ */
+ }
+
+ return phys;
+}
+
+/**
+ * ipath_map_single - a safety wrapper around pci_map_single()
+ *
+ * Same idea as ipath_map_page().
+ */
+dma_addr_t ipath_map_single(struct pci_dev *hwdev, void *ptr, size_t size,
+ int direction)
+{
+ dma_addr_t phys;
+
+ phys = pci_map_single(hwdev, ptr, size, direction);
+
+ if (phys == 0) {
+ pci_unmap_single(hwdev, phys, size, direction);
+ phys = pci_map_single(hwdev, ptr, size, direction);
+ /*
+ * FIXME: If we get 0 again, we should keep this page,
+ * map another, then free the 0 page.
+ */
+ }
+
+ return phys;
+}
+
+/**
+ * ipath_get_user_pages - lock user pages into memory
+ * @start_page: the start page
+ * @num_pages: the number of pages
+ * @p: the output page structures
+ *
+ * This function takes a given start page (page aligned user virtual
+ * address) and pins it and the following specified number of pages. For
+ * now, num_pages is always 1, but that will probably change at some point
+ * (because caller is doing expected sends on a single virtually contiguous
+ * buffer, so we can do all pages at once).
+ */
+int ipath_get_user_pages(unsigned long start_page, size_t num_pages,
+ struct page **p)
+{
+ int ret;
+
+ down_write(&current->mm->mmap_sem);
+
+ ret = __get_user_pages(start_page, num_pages, p, NULL);
+
+ up_write(&current->mm->mmap_sem);
+
+ return ret;
+}
+
+void ipath_release_user_pages(struct page **p, size_t num_pages)
+{
+ down_write(&current->mm->mmap_sem);
+
+ __ipath_release_user_pages(p, num_pages, 1);
+
+ current->mm->locked_vm -= num_pages;
+
+ up_write(&current->mm->mmap_sem);
+}
+
+struct ipath_user_pages_work {
+ struct work_struct work;
+ struct mm_struct *mm;
+ unsigned long num_pages;
+};
+
+static void user_pages_account(struct work_struct *_work)
+{
+ struct ipath_user_pages_work *work =
+ container_of(_work, struct ipath_user_pages_work, work);
+
+ down_write(&work->mm->mmap_sem);
+ work->mm->locked_vm -= work->num_pages;
+ up_write(&work->mm->mmap_sem);
+ mmput(work->mm);
+ kfree(work);
+}
+
+void ipath_release_user_pages_on_close(struct page **p, size_t num_pages)
+{
+ struct ipath_user_pages_work *work;
+ struct mm_struct *mm;
+
+ __ipath_release_user_pages(p, num_pages, 1);
+
+ mm = get_task_mm(current);
+ if (!mm)
+ goto bail;
+
+ work = kmalloc(sizeof(*work), GFP_KERNEL);
+ if (!work)
+ goto bail_mm;
+
+ goto bail;
+
+ INIT_WORK(&work->work, user_pages_account);
+ work->mm = mm;
+ work->num_pages = num_pages;
+
+bail_mm:
+ mmput(mm);
+bail:
+ return;
+}
diff --git a/drivers/infiniband/hw/ipath/ipath_user_sdma.c b/drivers/infiniband/hw/ipath/ipath_user_sdma.c
new file mode 100644
index 0000000..82d9a0b
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_user_sdma.c
@@ -0,0 +1,879 @@
+/*
+ * Copyright (c) 2007, 2008 QLogic Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/mm.h>
+#include <linux/types.h>
+#include <linux/device.h>
+#include <linux/dmapool.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/highmem.h>
+#include <linux/io.h>
+#include <linux/uio.h>
+#include <linux/rbtree.h>
+#include <linux/spinlock.h>
+#include <linux/delay.h>
+
+#include "ipath_kernel.h"
+#include "ipath_user_sdma.h"
+
+/* minimum size of header */
+#define IPATH_USER_SDMA_MIN_HEADER_LENGTH 64
+/* expected size of headers (for dma_pool) */
+#define IPATH_USER_SDMA_EXP_HEADER_LENGTH 64
+/* length mask in PBC (lower 11 bits) */
+#define IPATH_PBC_LENGTH_MASK ((1 << 11) - 1)
+
+struct ipath_user_sdma_pkt {
+ u8 naddr; /* dimension of addr (1..3) ... */
+ u32 counter; /* sdma pkts queued counter for this entry */
+ u64 added; /* global descq number of entries */
+
+ struct {
+ u32 offset; /* offset for kvaddr, addr */
+ u32 length; /* length in page */
+ u8 put_page; /* should we put_page? */
+ u8 dma_mapped; /* is page dma_mapped? */
+ struct page *page; /* may be NULL (coherent mem) */
+ void *kvaddr; /* FIXME: only for pio hack */
+ dma_addr_t addr;
+ } addr[4]; /* max pages, any more and we coalesce */
+ struct list_head list; /* list element */
+};
+
+struct ipath_user_sdma_queue {
+ /*
+ * pkts sent to dma engine are queued on this
+ * list head. the type of the elements of this
+ * list are struct ipath_user_sdma_pkt...
+ */
+ struct list_head sent;
+
+ /* headers with expected length are allocated from here... */
+ char header_cache_name[64];
+ struct dma_pool *header_cache;
+
+ /* packets are allocated from the slab cache... */
+ char pkt_slab_name[64];
+ struct kmem_cache *pkt_slab;
+
+ /* as packets go on the queued queue, they are counted... */
+ u32 counter;
+ u32 sent_counter;
+
+ /* dma page table */
+ struct rb_root dma_pages_root;
+
+ /* protect everything above... */
+ struct mutex lock;
+};
+
+struct ipath_user_sdma_queue *
+ipath_user_sdma_queue_create(struct device *dev, int unit, int port, int sport)
+{
+ struct ipath_user_sdma_queue *pq =
+ kmalloc(sizeof(struct ipath_user_sdma_queue), GFP_KERNEL);
+
+ if (!pq)
+ goto done;
+
+ pq->counter = 0;
+ pq->sent_counter = 0;
+ INIT_LIST_HEAD(&pq->sent);
+
+ mutex_init(&pq->lock);
+
+ snprintf(pq->pkt_slab_name, sizeof(pq->pkt_slab_name),
+ "ipath-user-sdma-pkts-%u-%02u.%02u", unit, port, sport);
+ pq->pkt_slab = kmem_cache_create(pq->pkt_slab_name,
+ sizeof(struct ipath_user_sdma_pkt),
+ 0, 0, NULL);
+
+ if (!pq->pkt_slab)
+ goto err_kfree;
+
+ snprintf(pq->header_cache_name, sizeof(pq->header_cache_name),
+ "ipath-user-sdma-headers-%u-%02u.%02u", unit, port, sport);
+ pq->header_cache = dma_pool_create(pq->header_cache_name,
+ dev,
+ IPATH_USER_SDMA_EXP_HEADER_LENGTH,
+ 4, 0);
+ if (!pq->header_cache)
+ goto err_slab;
+
+ pq->dma_pages_root = RB_ROOT;
+
+ goto done;
+
+err_slab:
+ kmem_cache_destroy(pq->pkt_slab);
+err_kfree:
+ kfree(pq);
+ pq = NULL;
+
+done:
+ return pq;
+}
+
+static void ipath_user_sdma_init_frag(struct ipath_user_sdma_pkt *pkt,
+ int i, size_t offset, size_t len,
+ int put_page, int dma_mapped,
+ struct page *page,
+ void *kvaddr, dma_addr_t dma_addr)
+{
+ pkt->addr[i].offset = offset;
+ pkt->addr[i].length = len;
+ pkt->addr[i].put_page = put_page;
+ pkt->addr[i].dma_mapped = dma_mapped;
+ pkt->addr[i].page = page;
+ pkt->addr[i].kvaddr = kvaddr;
+ pkt->addr[i].addr = dma_addr;
+}
+
+static void ipath_user_sdma_init_header(struct ipath_user_sdma_pkt *pkt,
+ u32 counter, size_t offset,
+ size_t len, int dma_mapped,
+ struct page *page,
+ void *kvaddr, dma_addr_t dma_addr)
+{
+ pkt->naddr = 1;
+ pkt->counter = counter;
+ ipath_user_sdma_init_frag(pkt, 0, offset, len, 0, dma_mapped, page,
+ kvaddr, dma_addr);
+}
+
+/* we've too many pages in the iovec, coalesce to a single page */
+static int ipath_user_sdma_coalesce(const struct ipath_devdata *dd,
+ struct ipath_user_sdma_pkt *pkt,
+ const struct iovec *iov,
+ unsigned long niov) {
+ int ret = 0;
+ struct page *page = alloc_page(GFP_KERNEL);
+ void *mpage_save;
+ char *mpage;
+ int i;
+ int len = 0;
+ dma_addr_t dma_addr;
+
+ if (!page) {
+ ret = -ENOMEM;
+ goto done;
+ }
+
+ mpage = kmap(page);
+ mpage_save = mpage;
+ for (i = 0; i < niov; i++) {
+ int cfur;
+
+ cfur = copy_from_user(mpage,
+ iov[i].iov_base, iov[i].iov_len);
+ if (cfur) {
+ ret = -EFAULT;
+ goto free_unmap;
+ }
+
+ mpage += iov[i].iov_len;
+ len += iov[i].iov_len;
+ }
+
+ dma_addr = dma_map_page(&dd->pcidev->dev, page, 0, len,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) {
+ ret = -ENOMEM;
+ goto free_unmap;
+ }
+
+ ipath_user_sdma_init_frag(pkt, 1, 0, len, 0, 1, page, mpage_save,
+ dma_addr);
+ pkt->naddr = 2;
+
+ goto done;
+
+free_unmap:
+ kunmap(page);
+ __free_page(page);
+done:
+ return ret;
+}
+
+/* how many pages in this iovec element? */
+static int ipath_user_sdma_num_pages(const struct iovec *iov)
+{
+ const unsigned long addr = (unsigned long) iov->iov_base;
+ const unsigned long len = iov->iov_len;
+ const unsigned long spage = addr & PAGE_MASK;
+ const unsigned long epage = (addr + len - 1) & PAGE_MASK;
+
+ return 1 + ((epage - spage) >> PAGE_SHIFT);
+}
+
+/* truncate length to page boundry */
+static int ipath_user_sdma_page_length(unsigned long addr, unsigned long len)
+{
+ const unsigned long offset = addr & ~PAGE_MASK;
+
+ return ((offset + len) > PAGE_SIZE) ? (PAGE_SIZE - offset) : len;
+}
+
+static void ipath_user_sdma_free_pkt_frag(struct device *dev,
+ struct ipath_user_sdma_queue *pq,
+ struct ipath_user_sdma_pkt *pkt,
+ int frag)
+{
+ const int i = frag;
+
+ if (pkt->addr[i].page) {
+ if (pkt->addr[i].dma_mapped)
+ dma_unmap_page(dev,
+ pkt->addr[i].addr,
+ pkt->addr[i].length,
+ DMA_TO_DEVICE);
+
+ if (pkt->addr[i].kvaddr)
+ kunmap(pkt->addr[i].page);
+
+ if (pkt->addr[i].put_page)
+ put_page(pkt->addr[i].page);
+ else
+ __free_page(pkt->addr[i].page);
+ } else if (pkt->addr[i].kvaddr)
+ /* free coherent mem from cache... */
+ dma_pool_free(pq->header_cache,
+ pkt->addr[i].kvaddr, pkt->addr[i].addr);
+}
+
+/* return number of pages pinned... */
+static int ipath_user_sdma_pin_pages(const struct ipath_devdata *dd,
+ struct ipath_user_sdma_pkt *pkt,
+ unsigned long addr, int tlen, int npages)
+{
+ struct page *pages[2];
+ int j;
+ int ret;
+
+ ret = get_user_pages(current, current->mm, addr,
+ npages, 0, 1, pages, NULL);
+
+ if (ret != npages) {
+ int i;
+
+ for (i = 0; i < ret; i++)
+ put_page(pages[i]);
+
+ ret = -ENOMEM;
+ goto done;
+ }
+
+ for (j = 0; j < npages; j++) {
+ /* map the pages... */
+ const int flen =
+ ipath_user_sdma_page_length(addr, tlen);
+ dma_addr_t dma_addr =
+ dma_map_page(&dd->pcidev->dev,
+ pages[j], 0, flen, DMA_TO_DEVICE);
+ unsigned long fofs = addr & ~PAGE_MASK;
+
+ if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) {
+ ret = -ENOMEM;
+ goto done;
+ }
+
+ ipath_user_sdma_init_frag(pkt, pkt->naddr, fofs, flen, 1, 1,
+ pages[j], kmap(pages[j]),
+ dma_addr);
+
+ pkt->naddr++;
+ addr += flen;
+ tlen -= flen;
+ }
+
+done:
+ return ret;
+}
+
+static int ipath_user_sdma_pin_pkt(const struct ipath_devdata *dd,
+ struct ipath_user_sdma_queue *pq,
+ struct ipath_user_sdma_pkt *pkt,
+ const struct iovec *iov,
+ unsigned long niov)
+{
+ int ret = 0;
+ unsigned long idx;
+
+ for (idx = 0; idx < niov; idx++) {
+ const int npages = ipath_user_sdma_num_pages(iov + idx);
+ const unsigned long addr = (unsigned long) iov[idx].iov_base;
+
+ ret = ipath_user_sdma_pin_pages(dd, pkt,
+ addr, iov[idx].iov_len,
+ npages);
+ if (ret < 0)
+ goto free_pkt;
+ }
+
+ goto done;
+
+free_pkt:
+ for (idx = 0; idx < pkt->naddr; idx++)
+ ipath_user_sdma_free_pkt_frag(&dd->pcidev->dev, pq, pkt, idx);
+
+done:
+ return ret;
+}
+
+static int ipath_user_sdma_init_payload(const struct ipath_devdata *dd,
+ struct ipath_user_sdma_queue *pq,
+ struct ipath_user_sdma_pkt *pkt,
+ const struct iovec *iov,
+ unsigned long niov, int npages)
+{
+ int ret = 0;
+
+ if (npages >= ARRAY_SIZE(pkt->addr))
+ ret = ipath_user_sdma_coalesce(dd, pkt, iov, niov);
+ else
+ ret = ipath_user_sdma_pin_pkt(dd, pq, pkt, iov, niov);
+
+ return ret;
+}
+
+/* free a packet list -- return counter value of last packet */
+static void ipath_user_sdma_free_pkt_list(struct device *dev,
+ struct ipath_user_sdma_queue *pq,
+ struct list_head *list)
+{
+ struct ipath_user_sdma_pkt *pkt, *pkt_next;
+
+ list_for_each_entry_safe(pkt, pkt_next, list, list) {
+ int i;
+
+ for (i = 0; i < pkt->naddr; i++)
+ ipath_user_sdma_free_pkt_frag(dev, pq, pkt, i);
+
+ kmem_cache_free(pq->pkt_slab, pkt);
+ }
+}
+
+/*
+ * copy headers, coalesce etc -- pq->lock must be held
+ *
+ * we queue all the packets to list, returning the
+ * number of bytes total. list must be empty initially,
+ * as, if there is an error we clean it...
+ */
+static int ipath_user_sdma_queue_pkts(const struct ipath_devdata *dd,
+ struct ipath_user_sdma_queue *pq,
+ struct list_head *list,
+ const struct iovec *iov,
+ unsigned long niov,
+ int maxpkts)
+{
+ unsigned long idx = 0;
+ int ret = 0;
+ int npkts = 0;
+ struct page *page = NULL;
+ __le32 *pbc;
+ dma_addr_t dma_addr;
+ struct ipath_user_sdma_pkt *pkt = NULL;
+ size_t len;
+ size_t nw;
+ u32 counter = pq->counter;
+ int dma_mapped = 0;
+
+ while (idx < niov && npkts < maxpkts) {
+ const unsigned long addr = (unsigned long) iov[idx].iov_base;
+ const unsigned long idx_save = idx;
+ unsigned pktnw;
+ unsigned pktnwc;
+ int nfrags = 0;
+ int npages = 0;
+ int cfur;
+
+ dma_mapped = 0;
+ len = iov[idx].iov_len;
+ nw = len >> 2;
+ page = NULL;
+
+ pkt = kmem_cache_alloc(pq->pkt_slab, GFP_KERNEL);
+ if (!pkt) {
+ ret = -ENOMEM;
+ goto free_list;
+ }
+
+ if (len < IPATH_USER_SDMA_MIN_HEADER_LENGTH ||
+ len > PAGE_SIZE || len & 3 || addr & 3) {
+ ret = -EINVAL;
+ goto free_pkt;
+ }
+
+ if (len == IPATH_USER_SDMA_EXP_HEADER_LENGTH)
+ pbc = dma_pool_alloc(pq->header_cache, GFP_KERNEL,
+ &dma_addr);
+ else
+ pbc = NULL;
+
+ if (!pbc) {
+ page = alloc_page(GFP_KERNEL);
+ if (!page) {
+ ret = -ENOMEM;
+ goto free_pkt;
+ }
+ pbc = kmap(page);
+ }
+
+ cfur = copy_from_user(pbc, iov[idx].iov_base, len);
+ if (cfur) {
+ ret = -EFAULT;
+ goto free_pbc;
+ }
+
+ /*
+ * this assignment is a bit strange. it's because the
+ * the pbc counts the number of 32 bit words in the full
+ * packet _except_ the first word of the pbc itself...
+ */
+ pktnwc = nw - 1;
+
+ /*
+ * pktnw computation yields the number of 32 bit words
+ * that the caller has indicated in the PBC. note that
+ * this is one less than the total number of words that
+ * goes to the send DMA engine as the first 32 bit word
+ * of the PBC itself is not counted. Armed with this count,
+ * we can verify that the packet is consistent with the
+ * iovec lengths.
+ */
+ pktnw = le32_to_cpu(*pbc) & IPATH_PBC_LENGTH_MASK;
+ if (pktnw < pktnwc || pktnw > pktnwc + (PAGE_SIZE >> 2)) {
+ ret = -EINVAL;
+ goto free_pbc;
+ }
+
+
+ idx++;
+ while (pktnwc < pktnw && idx < niov) {
+ const size_t slen = iov[idx].iov_len;
+ const unsigned long faddr =
+ (unsigned long) iov[idx].iov_base;
+
+ if (slen & 3 || faddr & 3 || !slen ||
+ slen > PAGE_SIZE) {
+ ret = -EINVAL;
+ goto free_pbc;
+ }
+
+ npages++;
+ if ((faddr & PAGE_MASK) !=
+ ((faddr + slen - 1) & PAGE_MASK))
+ npages++;
+
+ pktnwc += slen >> 2;
+ idx++;
+ nfrags++;
+ }
+
+ if (pktnwc != pktnw) {
+ ret = -EINVAL;
+ goto free_pbc;
+ }
+
+ if (page) {
+ dma_addr = dma_map_page(&dd->pcidev->dev,
+ page, 0, len, DMA_TO_DEVICE);
+ if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) {
+ ret = -ENOMEM;
+ goto free_pbc;
+ }
+
+ dma_mapped = 1;
+ }
+
+ ipath_user_sdma_init_header(pkt, counter, 0, len, dma_mapped,
+ page, pbc, dma_addr);
+
+ if (nfrags) {
+ ret = ipath_user_sdma_init_payload(dd, pq, pkt,
+ iov + idx_save + 1,
+ nfrags, npages);
+ if (ret < 0)
+ goto free_pbc_dma;
+ }
+
+ counter++;
+ npkts++;
+
+ list_add_tail(&pkt->list, list);
+ }
+
+ ret = idx;
+ goto done;
+
+free_pbc_dma:
+ if (dma_mapped)
+ dma_unmap_page(&dd->pcidev->dev, dma_addr, len, DMA_TO_DEVICE);
+free_pbc:
+ if (page) {
+ kunmap(page);
+ __free_page(page);
+ } else
+ dma_pool_free(pq->header_cache, pbc, dma_addr);
+free_pkt:
+ kmem_cache_free(pq->pkt_slab, pkt);
+free_list:
+ ipath_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, list);
+done:
+ return ret;
+}
+
+static void ipath_user_sdma_set_complete_counter(struct ipath_user_sdma_queue *pq,
+ u32 c)
+{
+ pq->sent_counter = c;
+}
+
+/* try to clean out queue -- needs pq->lock */
+static int ipath_user_sdma_queue_clean(const struct ipath_devdata *dd,
+ struct ipath_user_sdma_queue *pq)
+{
+ struct list_head free_list;
+ struct ipath_user_sdma_pkt *pkt;
+ struct ipath_user_sdma_pkt *pkt_prev;
+ int ret = 0;
+
+ INIT_LIST_HEAD(&free_list);
+
+ list_for_each_entry_safe(pkt, pkt_prev, &pq->sent, list) {
+ s64 descd = dd->ipath_sdma_descq_removed - pkt->added;
+
+ if (descd < 0)
+ break;
+
+ list_move_tail(&pkt->list, &free_list);
+
+ /* one more packet cleaned */
+ ret++;
+ }
+
+ if (!list_empty(&free_list)) {
+ u32 counter;
+
+ pkt = list_entry(free_list.prev,
+ struct ipath_user_sdma_pkt, list);
+ counter = pkt->counter;
+
+ ipath_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, &free_list);
+ ipath_user_sdma_set_complete_counter(pq, counter);
+ }
+
+ return ret;
+}
+
+void ipath_user_sdma_queue_destroy(struct ipath_user_sdma_queue *pq)
+{
+ if (!pq)
+ return;
+
+ kmem_cache_destroy(pq->pkt_slab);
+ dma_pool_destroy(pq->header_cache);
+ kfree(pq);
+}
+
+/* clean descriptor queue, returns > 0 if some elements cleaned */
+static int ipath_user_sdma_hwqueue_clean(struct ipath_devdata *dd)
+{
+ int ret;
+ unsigned long flags;
+
+ spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
+ ret = ipath_sdma_make_progress(dd);
+ spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
+
+ return ret;
+}
+
+/* we're in close, drain packets so that we can cleanup successfully... */
+void ipath_user_sdma_queue_drain(struct ipath_devdata *dd,
+ struct ipath_user_sdma_queue *pq)
+{
+ int i;
+
+ if (!pq)
+ return;
+
+ for (i = 0; i < 100; i++) {
+ mutex_lock(&pq->lock);
+ if (list_empty(&pq->sent)) {
+ mutex_unlock(&pq->lock);
+ break;
+ }
+ ipath_user_sdma_hwqueue_clean(dd);
+ ipath_user_sdma_queue_clean(dd, pq);
+ mutex_unlock(&pq->lock);
+ msleep(10);
+ }
+
+ if (!list_empty(&pq->sent)) {
+ struct list_head free_list;
+
+ printk(KERN_INFO "drain: lists not empty: forcing!\n");
+ INIT_LIST_HEAD(&free_list);
+ mutex_lock(&pq->lock);
+ list_splice_init(&pq->sent, &free_list);
+ ipath_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, &free_list);
+ mutex_unlock(&pq->lock);
+ }
+}
+
+static inline __le64 ipath_sdma_make_desc0(struct ipath_devdata *dd,
+ u64 addr, u64 dwlen, u64 dwoffset)
+{
+ return cpu_to_le64(/* SDmaPhyAddr[31:0] */
+ ((addr & 0xfffffffcULL) << 32) |
+ /* SDmaGeneration[1:0] */
+ ((dd->ipath_sdma_generation & 3ULL) << 30) |
+ /* SDmaDwordCount[10:0] */
+ ((dwlen & 0x7ffULL) << 16) |
+ /* SDmaBufOffset[12:2] */
+ (dwoffset & 0x7ffULL));
+}
+
+static inline __le64 ipath_sdma_make_first_desc0(__le64 descq)
+{
+ return descq | __constant_cpu_to_le64(1ULL << 12);
+}
+
+static inline __le64 ipath_sdma_make_last_desc0(__le64 descq)
+{
+ /* last */ /* dma head */
+ return descq | __constant_cpu_to_le64(1ULL << 11 | 1ULL << 13);
+}
+
+static inline __le64 ipath_sdma_make_desc1(u64 addr)
+{
+ /* SDmaPhyAddr[47:32] */
+ return cpu_to_le64(addr >> 32);
+}
+
+static void ipath_user_sdma_send_frag(struct ipath_devdata *dd,
+ struct ipath_user_sdma_pkt *pkt, int idx,
+ unsigned ofs, u16 tail)
+{
+ const u64 addr = (u64) pkt->addr[idx].addr +
+ (u64) pkt->addr[idx].offset;
+ const u64 dwlen = (u64) pkt->addr[idx].length / 4;
+ __le64 *descqp;
+ __le64 descq0;
+
+ descqp = &dd->ipath_sdma_descq[tail].qw[0];
+
+ descq0 = ipath_sdma_make_desc0(dd, addr, dwlen, ofs);
+ if (idx == 0)
+ descq0 = ipath_sdma_make_first_desc0(descq0);
+ if (idx == pkt->naddr - 1)
+ descq0 = ipath_sdma_make_last_desc0(descq0);
+
+ descqp[0] = descq0;
+ descqp[1] = ipath_sdma_make_desc1(addr);
+}
+
+/* pq->lock must be held, get packets on the wire... */
+static int ipath_user_sdma_push_pkts(struct ipath_devdata *dd,
+ struct ipath_user_sdma_queue *pq,
+ struct list_head *pktlist)
+{
+ int ret = 0;
+ unsigned long flags;
+ u16 tail;
+
+ if (list_empty(pktlist))
+ return 0;
+
+ if (unlikely(!(dd->ipath_flags & IPATH_LINKACTIVE)))
+ return -ECOMM;
+
+ spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
+
+ if (unlikely(dd->ipath_sdma_status & IPATH_SDMA_ABORT_MASK)) {
+ ret = -ECOMM;
+ goto unlock;
+ }
+
+ tail = dd->ipath_sdma_descq_tail;
+ while (!list_empty(pktlist)) {
+ struct ipath_user_sdma_pkt *pkt =
+ list_entry(pktlist->next, struct ipath_user_sdma_pkt,
+ list);
+ int i;
+ unsigned ofs = 0;
+ u16 dtail = tail;
+
+ if (pkt->naddr > ipath_sdma_descq_freecnt(dd))
+ goto unlock_check_tail;
+
+ for (i = 0; i < pkt->naddr; i++) {
+ ipath_user_sdma_send_frag(dd, pkt, i, ofs, tail);
+ ofs += pkt->addr[i].length >> 2;
+
+ if (++tail == dd->ipath_sdma_descq_cnt) {
+ tail = 0;
+ ++dd->ipath_sdma_generation;
+ }
+ }
+
+ if ((ofs<<2) > dd->ipath_ibmaxlen) {
+ ipath_dbg("packet size %X > ibmax %X, fail\n",
+ ofs<<2, dd->ipath_ibmaxlen);
+ ret = -EMSGSIZE;
+ goto unlock;
+ }
+
+ /*
+ * if the packet is >= 2KB mtu equivalent, we have to use
+ * the large buffers, and have to mark each descriptor as
+ * part of a large buffer packet.
+ */
+ if (ofs >= IPATH_SMALLBUF_DWORDS) {
+ for (i = 0; i < pkt->naddr; i++) {
+ dd->ipath_sdma_descq[dtail].qw[0] |=
+ __constant_cpu_to_le64(1ULL << 14);
+ if (++dtail == dd->ipath_sdma_descq_cnt)
+ dtail = 0;
+ }
+ }
+
+ dd->ipath_sdma_descq_added += pkt->naddr;
+ pkt->added = dd->ipath_sdma_descq_added;
+ list_move_tail(&pkt->list, &pq->sent);
+ ret++;
+ }
+
+unlock_check_tail:
+ /* advance the tail on the chip if necessary */
+ if (dd->ipath_sdma_descq_tail != tail) {
+ wmb();
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmatail, tail);
+ dd->ipath_sdma_descq_tail = tail;
+ }
+
+unlock:
+ spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
+
+ return ret;
+}
+
+int ipath_user_sdma_writev(struct ipath_devdata *dd,
+ struct ipath_user_sdma_queue *pq,
+ const struct iovec *iov,
+ unsigned long dim)
+{
+ int ret = 0;
+ struct list_head list;
+ int npkts = 0;
+
+ INIT_LIST_HEAD(&list);
+
+ mutex_lock(&pq->lock);
+
+ if (dd->ipath_sdma_descq_added != dd->ipath_sdma_descq_removed) {
+ ipath_user_sdma_hwqueue_clean(dd);
+ ipath_user_sdma_queue_clean(dd, pq);
+ }
+
+ while (dim) {
+ const int mxp = 8;
+
+ down_write(&current->mm->mmap_sem);
+ ret = ipath_user_sdma_queue_pkts(dd, pq, &list, iov, dim, mxp);
+ up_write(&current->mm->mmap_sem);
+
+ if (ret <= 0)
+ goto done_unlock;
+ else {
+ dim -= ret;
+ iov += ret;
+ }
+
+ /* force packets onto the sdma hw queue... */
+ if (!list_empty(&list)) {
+ /*
+ * lazily clean hw queue. the 4 is a guess of about
+ * how many sdma descriptors a packet will take (it
+ * doesn't have to be perfect).
+ */
+ if (ipath_sdma_descq_freecnt(dd) < ret * 4) {
+ ipath_user_sdma_hwqueue_clean(dd);
+ ipath_user_sdma_queue_clean(dd, pq);
+ }
+
+ ret = ipath_user_sdma_push_pkts(dd, pq, &list);
+ if (ret < 0)
+ goto done_unlock;
+ else {
+ npkts += ret;
+ pq->counter += ret;
+
+ if (!list_empty(&list))
+ goto done_unlock;
+ }
+ }
+ }
+
+done_unlock:
+ if (!list_empty(&list))
+ ipath_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, &list);
+ mutex_unlock(&pq->lock);
+
+ return (ret < 0) ? ret : npkts;
+}
+
+int ipath_user_sdma_make_progress(struct ipath_devdata *dd,
+ struct ipath_user_sdma_queue *pq)
+{
+ int ret = 0;
+
+ mutex_lock(&pq->lock);
+ ipath_user_sdma_hwqueue_clean(dd);
+ ret = ipath_user_sdma_queue_clean(dd, pq);
+ mutex_unlock(&pq->lock);
+
+ return ret;
+}
+
+u32 ipath_user_sdma_complete_counter(const struct ipath_user_sdma_queue *pq)
+{
+ return pq->sent_counter;
+}
+
+u32 ipath_user_sdma_inflight_counter(struct ipath_user_sdma_queue *pq)
+{
+ return pq->counter;
+}
+
diff --git a/drivers/infiniband/hw/ipath/ipath_user_sdma.h b/drivers/infiniband/hw/ipath/ipath_user_sdma.h
new file mode 100644
index 0000000..fc76316
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_user_sdma.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2007, 2008 QLogic Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/device.h>
+
+struct ipath_user_sdma_queue;
+
+struct ipath_user_sdma_queue *
+ipath_user_sdma_queue_create(struct device *dev, int unit, int port, int sport);
+void ipath_user_sdma_queue_destroy(struct ipath_user_sdma_queue *pq);
+
+int ipath_user_sdma_writev(struct ipath_devdata *dd,
+ struct ipath_user_sdma_queue *pq,
+ const struct iovec *iov,
+ unsigned long dim);
+
+int ipath_user_sdma_make_progress(struct ipath_devdata *dd,
+ struct ipath_user_sdma_queue *pq);
+
+void ipath_user_sdma_queue_drain(struct ipath_devdata *dd,
+ struct ipath_user_sdma_queue *pq);
+
+u32 ipath_user_sdma_complete_counter(const struct ipath_user_sdma_queue *pq);
+u32 ipath_user_sdma_inflight_counter(struct ipath_user_sdma_queue *pq);
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c
new file mode 100644
index 0000000..eabc424
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.c
@@ -0,0 +1,2338 @@
+/*
+ * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <rdma/ib_mad.h>
+#include <rdma/ib_user_verbs.h>
+#include <linux/io.h>
+#include <linux/utsname.h>
+#include <linux/rculist.h>
+
+#include "ipath_kernel.h"
+#include "ipath_verbs.h"
+#include "ipath_common.h"
+
+static unsigned int ib_ipath_qp_table_size = 251;
+module_param_named(qp_table_size, ib_ipath_qp_table_size, uint, S_IRUGO);
+MODULE_PARM_DESC(qp_table_size, "QP table size");
+
+unsigned int ib_ipath_lkey_table_size = 12;
+module_param_named(lkey_table_size, ib_ipath_lkey_table_size, uint,
+ S_IRUGO);
+MODULE_PARM_DESC(lkey_table_size,
+ "LKEY table size in bits (2^n, 1 <= n <= 23)");
+
+static unsigned int ib_ipath_max_pds = 0xFFFF;
+module_param_named(max_pds, ib_ipath_max_pds, uint, S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(max_pds,
+ "Maximum number of protection domains to support");
+
+static unsigned int ib_ipath_max_ahs = 0xFFFF;
+module_param_named(max_ahs, ib_ipath_max_ahs, uint, S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(max_ahs, "Maximum number of address handles to support");
+
+unsigned int ib_ipath_max_cqes = 0x2FFFF;
+module_param_named(max_cqes, ib_ipath_max_cqes, uint, S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(max_cqes,
+ "Maximum number of completion queue entries to support");
+
+unsigned int ib_ipath_max_cqs = 0x1FFFF;
+module_param_named(max_cqs, ib_ipath_max_cqs, uint, S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(max_cqs, "Maximum number of completion queues to support");
+
+unsigned int ib_ipath_max_qp_wrs = 0x3FFF;
+module_param_named(max_qp_wrs, ib_ipath_max_qp_wrs, uint,
+ S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(max_qp_wrs, "Maximum number of QP WRs to support");
+
+unsigned int ib_ipath_max_qps = 16384;
+module_param_named(max_qps, ib_ipath_max_qps, uint, S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(max_qps, "Maximum number of QPs to support");
+
+unsigned int ib_ipath_max_sges = 0x60;
+module_param_named(max_sges, ib_ipath_max_sges, uint, S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(max_sges, "Maximum number of SGEs to support");
+
+unsigned int ib_ipath_max_mcast_grps = 16384;
+module_param_named(max_mcast_grps, ib_ipath_max_mcast_grps, uint,
+ S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(max_mcast_grps,
+ "Maximum number of multicast groups to support");
+
+unsigned int ib_ipath_max_mcast_qp_attached = 16;
+module_param_named(max_mcast_qp_attached, ib_ipath_max_mcast_qp_attached,
+ uint, S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(max_mcast_qp_attached,
+ "Maximum number of attached QPs to support");
+
+unsigned int ib_ipath_max_srqs = 1024;
+module_param_named(max_srqs, ib_ipath_max_srqs, uint, S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(max_srqs, "Maximum number of SRQs to support");
+
+unsigned int ib_ipath_max_srq_sges = 128;
+module_param_named(max_srq_sges, ib_ipath_max_srq_sges,
+ uint, S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(max_srq_sges, "Maximum number of SRQ SGEs to support");
+
+unsigned int ib_ipath_max_srq_wrs = 0x1FFFF;
+module_param_named(max_srq_wrs, ib_ipath_max_srq_wrs,
+ uint, S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(max_srq_wrs, "Maximum number of SRQ WRs support");
+
+static unsigned int ib_ipath_disable_sma;
+module_param_named(disable_sma, ib_ipath_disable_sma, uint, S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(disable_sma, "Disable the SMA");
+
+/*
+ * Note that it is OK to post send work requests in the SQE and ERR
+ * states; ipath_do_send() will process them and generate error
+ * completions as per IB 1.2 C10-96.
+ */
+const int ib_ipath_state_ops[IB_QPS_ERR + 1] = {
+ [IB_QPS_RESET] = 0,
+ [IB_QPS_INIT] = IPATH_POST_RECV_OK,
+ [IB_QPS_RTR] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK,
+ [IB_QPS_RTS] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK |
+ IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK |
+ IPATH_PROCESS_NEXT_SEND_OK,
+ [IB_QPS_SQD] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK |
+ IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK,
+ [IB_QPS_SQE] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK |
+ IPATH_POST_SEND_OK | IPATH_FLUSH_SEND,
+ [IB_QPS_ERR] = IPATH_POST_RECV_OK | IPATH_FLUSH_RECV |
+ IPATH_POST_SEND_OK | IPATH_FLUSH_SEND,
+};
+
+struct ipath_ucontext {
+ struct ib_ucontext ibucontext;
+};
+
+static inline struct ipath_ucontext *to_iucontext(struct ib_ucontext
+ *ibucontext)
+{
+ return container_of(ibucontext, struct ipath_ucontext, ibucontext);
+}
+
+/*
+ * Translate ib_wr_opcode into ib_wc_opcode.
+ */
+const enum ib_wc_opcode ib_ipath_wc_opcode[] = {
+ [IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE,
+ [IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE,
+ [IB_WR_SEND] = IB_WC_SEND,
+ [IB_WR_SEND_WITH_IMM] = IB_WC_SEND,
+ [IB_WR_RDMA_READ] = IB_WC_RDMA_READ,
+ [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP,
+ [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD
+};
+
+/*
+ * System image GUID.
+ */
+static __be64 sys_image_guid;
+
+/**
+ * ipath_copy_sge - copy data to SGE memory
+ * @ss: the SGE state
+ * @data: the data to copy
+ * @length: the length of the data
+ */
+void ipath_copy_sge(struct ipath_sge_state *ss, void *data, u32 length)
+{
+ struct ipath_sge *sge = &ss->sge;
+
+ while (length) {
+ u32 len = sge->length;
+
+ if (len > length)
+ len = length;
+ if (len > sge->sge_length)
+ len = sge->sge_length;
+ BUG_ON(len == 0);
+ memcpy(sge->vaddr, data, len);
+ sge->vaddr += len;
+ sge->length -= len;
+ sge->sge_length -= len;
+ if (sge->sge_length == 0) {
+ if (--ss->num_sge)
+ *sge = *ss->sg_list++;
+ } else if (sge->length == 0 && sge->mr != NULL) {
+ if (++sge->n >= IPATH_SEGSZ) {
+ if (++sge->m >= sge->mr->mapsz)
+ break;
+ sge->n = 0;
+ }
+ sge->vaddr =
+ sge->mr->map[sge->m]->segs[sge->n].vaddr;
+ sge->length =
+ sge->mr->map[sge->m]->segs[sge->n].length;
+ }
+ data += len;
+ length -= len;
+ }
+}
+
+/**
+ * ipath_skip_sge - skip over SGE memory - XXX almost dup of prev func
+ * @ss: the SGE state
+ * @length: the number of bytes to skip
+ */
+void ipath_skip_sge(struct ipath_sge_state *ss, u32 length)
+{
+ struct ipath_sge *sge = &ss->sge;
+
+ while (length) {
+ u32 len = sge->length;
+
+ if (len > length)
+ len = length;
+ if (len > sge->sge_length)
+ len = sge->sge_length;
+ BUG_ON(len == 0);
+ sge->vaddr += len;
+ sge->length -= len;
+ sge->sge_length -= len;
+ if (sge->sge_length == 0) {
+ if (--ss->num_sge)
+ *sge = *ss->sg_list++;
+ } else if (sge->length == 0 && sge->mr != NULL) {
+ if (++sge->n >= IPATH_SEGSZ) {
+ if (++sge->m >= sge->mr->mapsz)
+ break;
+ sge->n = 0;
+ }
+ sge->vaddr =
+ sge->mr->map[sge->m]->segs[sge->n].vaddr;
+ sge->length =
+ sge->mr->map[sge->m]->segs[sge->n].length;
+ }
+ length -= len;
+ }
+}
+
+/*
+ * Count the number of DMA descriptors needed to send length bytes of data.
+ * Don't modify the ipath_sge_state to get the count.
+ * Return zero if any of the segments is not aligned.
+ */
+static u32 ipath_count_sge(struct ipath_sge_state *ss, u32 length)
+{
+ struct ipath_sge *sg_list = ss->sg_list;
+ struct ipath_sge sge = ss->sge;
+ u8 num_sge = ss->num_sge;
+ u32 ndesc = 1; /* count the header */
+
+ while (length) {
+ u32 len = sge.length;
+
+ if (len > length)
+ len = length;
+ if (len > sge.sge_length)
+ len = sge.sge_length;
+ BUG_ON(len == 0);
+ if (((long) sge.vaddr & (sizeof(u32) - 1)) ||
+ (len != length && (len & (sizeof(u32) - 1)))) {
+ ndesc = 0;
+ break;
+ }
+ ndesc++;
+ sge.vaddr += len;
+ sge.length -= len;
+ sge.sge_length -= len;
+ if (sge.sge_length == 0) {
+ if (--num_sge)
+ sge = *sg_list++;
+ } else if (sge.length == 0 && sge.mr != NULL) {
+ if (++sge.n >= IPATH_SEGSZ) {
+ if (++sge.m >= sge.mr->mapsz)
+ break;
+ sge.n = 0;
+ }
+ sge.vaddr =
+ sge.mr->map[sge.m]->segs[sge.n].vaddr;
+ sge.length =
+ sge.mr->map[sge.m]->segs[sge.n].length;
+ }
+ length -= len;
+ }
+ return ndesc;
+}
+
+/*
+ * Copy from the SGEs to the data buffer.
+ */
+static void ipath_copy_from_sge(void *data, struct ipath_sge_state *ss,
+ u32 length)
+{
+ struct ipath_sge *sge = &ss->sge;
+
+ while (length) {
+ u32 len = sge->length;
+
+ if (len > length)
+ len = length;
+ if (len > sge->sge_length)
+ len = sge->sge_length;
+ BUG_ON(len == 0);
+ memcpy(data, sge->vaddr, len);
+ sge->vaddr += len;
+ sge->length -= len;
+ sge->sge_length -= len;
+ if (sge->sge_length == 0) {
+ if (--ss->num_sge)
+ *sge = *ss->sg_list++;
+ } else if (sge->length == 0 && sge->mr != NULL) {
+ if (++sge->n >= IPATH_SEGSZ) {
+ if (++sge->m >= sge->mr->mapsz)
+ break;
+ sge->n = 0;
+ }
+ sge->vaddr =
+ sge->mr->map[sge->m]->segs[sge->n].vaddr;
+ sge->length =
+ sge->mr->map[sge->m]->segs[sge->n].length;
+ }
+ data += len;
+ length -= len;
+ }
+}
+
+/**
+ * ipath_post_one_send - post one RC, UC, or UD send work request
+ * @qp: the QP to post on
+ * @wr: the work request to send
+ */
+static int ipath_post_one_send(struct ipath_qp *qp, struct ib_send_wr *wr)
+{
+ struct ipath_swqe *wqe;
+ u32 next;
+ int i;
+ int j;
+ int acc;
+ int ret;
+ unsigned long flags;
+ struct ipath_devdata *dd = to_idev(qp->ibqp.device)->dd;
+
+ spin_lock_irqsave(&qp->s_lock, flags);
+
+ if (qp->ibqp.qp_type != IB_QPT_SMI &&
+ !(dd->ipath_flags & IPATH_LINKACTIVE)) {
+ ret = -ENETDOWN;
+ goto bail;
+ }
+
+ /* Check that state is OK to post send. */
+ if (unlikely(!(ib_ipath_state_ops[qp->state] & IPATH_POST_SEND_OK)))
+ goto bail_inval;
+
+ /* IB spec says that num_sge == 0 is OK. */
+ if (wr->num_sge > qp->s_max_sge)
+ goto bail_inval;
+
+ /*
+ * Don't allow RDMA reads or atomic operations on UC or
+ * undefined operations.
+ * Make sure buffer is large enough to hold the result for atomics.
+ */
+ if (qp->ibqp.qp_type == IB_QPT_UC) {
+ if ((unsigned) wr->opcode >= IB_WR_RDMA_READ)
+ goto bail_inval;
+ } else if (qp->ibqp.qp_type == IB_QPT_UD) {
+ /* Check UD opcode */
+ if (wr->opcode != IB_WR_SEND &&
+ wr->opcode != IB_WR_SEND_WITH_IMM)
+ goto bail_inval;
+ /* Check UD destination address PD */
+ if (qp->ibqp.pd != wr->wr.ud.ah->pd)
+ goto bail_inval;
+ } else if ((unsigned) wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD)
+ goto bail_inval;
+ else if (wr->opcode >= IB_WR_ATOMIC_CMP_AND_SWP &&
+ (wr->num_sge == 0 ||
+ wr->sg_list[0].length < sizeof(u64) ||
+ wr->sg_list[0].addr & (sizeof(u64) - 1)))
+ goto bail_inval;
+ else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic)
+ goto bail_inval;
+
+ next = qp->s_head + 1;
+ if (next >= qp->s_size)
+ next = 0;
+ if (next == qp->s_last) {
+ ret = -ENOMEM;
+ goto bail;
+ }
+
+ wqe = get_swqe_ptr(qp, qp->s_head);
+ wqe->wr = *wr;
+ wqe->length = 0;
+ if (wr->num_sge) {
+ acc = wr->opcode >= IB_WR_RDMA_READ ?
+ IB_ACCESS_LOCAL_WRITE : 0;
+ for (i = 0, j = 0; i < wr->num_sge; i++) {
+ u32 length = wr->sg_list[i].length;
+ int ok;
+
+ if (length == 0)
+ continue;
+ ok = ipath_lkey_ok(qp, &wqe->sg_list[j],
+ &wr->sg_list[i], acc);
+ if (!ok)
+ goto bail_inval;
+ wqe->length += length;
+ j++;
+ }
+ wqe->wr.num_sge = j;
+ }
+ if (qp->ibqp.qp_type == IB_QPT_UC ||
+ qp->ibqp.qp_type == IB_QPT_RC) {
+ if (wqe->length > 0x80000000U)
+ goto bail_inval;
+ } else if (wqe->length > to_idev(qp->ibqp.device)->dd->ipath_ibmtu)
+ goto bail_inval;
+ wqe->ssn = qp->s_ssn++;
+ qp->s_head = next;
+
+ ret = 0;
+ goto bail;
+
+bail_inval:
+ ret = -EINVAL;
+bail:
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ return ret;
+}
+
+/**
+ * ipath_post_send - post a send on a QP
+ * @ibqp: the QP to post the send on
+ * @wr: the list of work requests to post
+ * @bad_wr: the first bad WR is put here
+ *
+ * This may be called from interrupt context.
+ */
+static int ipath_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
+ struct ib_send_wr **bad_wr)
+{
+ struct ipath_qp *qp = to_iqp(ibqp);
+ int err = 0;
+
+ for (; wr; wr = wr->next) {
+ err = ipath_post_one_send(qp, wr);
+ if (err) {
+ *bad_wr = wr;
+ goto bail;
+ }
+ }
+
+ /* Try to do the send work in the caller's context. */
+ ipath_do_send((unsigned long) qp);
+
+bail:
+ return err;
+}
+
+/**
+ * ipath_post_receive - post a receive on a QP
+ * @ibqp: the QP to post the receive on
+ * @wr: the WR to post
+ * @bad_wr: the first bad WR is put here
+ *
+ * This may be called from interrupt context.
+ */
+static int ipath_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
+ struct ib_recv_wr **bad_wr)
+{
+ struct ipath_qp *qp = to_iqp(ibqp);
+ struct ipath_rwq *wq = qp->r_rq.wq;
+ unsigned long flags;
+ int ret;
+
+ /* Check that state is OK to post receive. */
+ if (!(ib_ipath_state_ops[qp->state] & IPATH_POST_RECV_OK) || !wq) {
+ *bad_wr = wr;
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ for (; wr; wr = wr->next) {
+ struct ipath_rwqe *wqe;
+ u32 next;
+ int i;
+
+ if ((unsigned) wr->num_sge > qp->r_rq.max_sge) {
+ *bad_wr = wr;
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ spin_lock_irqsave(&qp->r_rq.lock, flags);
+ next = wq->head + 1;
+ if (next >= qp->r_rq.size)
+ next = 0;
+ if (next == wq->tail) {
+ spin_unlock_irqrestore(&qp->r_rq.lock, flags);
+ *bad_wr = wr;
+ ret = -ENOMEM;
+ goto bail;
+ }
+
+ wqe = get_rwqe_ptr(&qp->r_rq, wq->head);
+ wqe->wr_id = wr->wr_id;
+ wqe->num_sge = wr->num_sge;
+ for (i = 0; i < wr->num_sge; i++)
+ wqe->sg_list[i] = wr->sg_list[i];
+ /* Make sure queue entry is written before the head index. */
+ smp_wmb();
+ wq->head = next;
+ spin_unlock_irqrestore(&qp->r_rq.lock, flags);
+ }
+ ret = 0;
+
+bail:
+ return ret;
+}
+
+/**
+ * ipath_qp_rcv - processing an incoming packet on a QP
+ * @dev: the device the packet came on
+ * @hdr: the packet header
+ * @has_grh: true if the packet has a GRH
+ * @data: the packet data
+ * @tlen: the packet length
+ * @qp: the QP the packet came on
+ *
+ * This is called from ipath_ib_rcv() to process an incoming packet
+ * for the given QP.
+ * Called at interrupt level.
+ */
+static void ipath_qp_rcv(struct ipath_ibdev *dev,
+ struct ipath_ib_header *hdr, int has_grh,
+ void *data, u32 tlen, struct ipath_qp *qp)
+{
+ /* Check for valid receive state. */
+ if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) {
+ dev->n_pkt_drops++;
+ return;
+ }
+
+ switch (qp->ibqp.qp_type) {
+ case IB_QPT_SMI:
+ case IB_QPT_GSI:
+ if (ib_ipath_disable_sma)
+ break;
+ /* FALLTHROUGH */
+ case IB_QPT_UD:
+ ipath_ud_rcv(dev, hdr, has_grh, data, tlen, qp);
+ break;
+
+ case IB_QPT_RC:
+ ipath_rc_rcv(dev, hdr, has_grh, data, tlen, qp);
+ break;
+
+ case IB_QPT_UC:
+ ipath_uc_rcv(dev, hdr, has_grh, data, tlen, qp);
+ break;
+
+ default:
+ break;
+ }
+}
+
+/**
+ * ipath_ib_rcv - process an incoming packet
+ * @arg: the device pointer
+ * @rhdr: the header of the packet
+ * @data: the packet data
+ * @tlen: the packet length
+ *
+ * This is called from ipath_kreceive() to process an incoming packet at
+ * interrupt level. Tlen is the length of the header + data + CRC in bytes.
+ */
+void ipath_ib_rcv(struct ipath_ibdev *dev, void *rhdr, void *data,
+ u32 tlen)
+{
+ struct ipath_ib_header *hdr = rhdr;
+ struct ipath_other_headers *ohdr;
+ struct ipath_qp *qp;
+ u32 qp_num;
+ int lnh;
+ u8 opcode;
+ u16 lid;
+
+ if (unlikely(dev == NULL))
+ goto bail;
+
+ if (unlikely(tlen < 24)) { /* LRH+BTH+CRC */
+ dev->rcv_errors++;
+ goto bail;
+ }
+
+ /* Check for a valid destination LID (see ch. 7.11.1). */
+ lid = be16_to_cpu(hdr->lrh[1]);
+ if (lid < IPATH_MULTICAST_LID_BASE) {
+ lid &= ~((1 << dev->dd->ipath_lmc) - 1);
+ if (unlikely(lid != dev->dd->ipath_lid)) {
+ dev->rcv_errors++;
+ goto bail;
+ }
+ }
+
+ /* Check for GRH */
+ lnh = be16_to_cpu(hdr->lrh[0]) & 3;
+ if (lnh == IPATH_LRH_BTH)
+ ohdr = &hdr->u.oth;
+ else if (lnh == IPATH_LRH_GRH)
+ ohdr = &hdr->u.l.oth;
+ else {
+ dev->rcv_errors++;
+ goto bail;
+ }
+
+ opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
+ dev->opstats[opcode].n_bytes += tlen;
+ dev->opstats[opcode].n_packets++;
+
+ /* Get the destination QP number. */
+ qp_num = be32_to_cpu(ohdr->bth[1]) & IPATH_QPN_MASK;
+ if (qp_num == IPATH_MULTICAST_QPN) {
+ struct ipath_mcast *mcast;
+ struct ipath_mcast_qp *p;
+
+ if (lnh != IPATH_LRH_GRH) {
+ dev->n_pkt_drops++;
+ goto bail;
+ }
+ mcast = ipath_mcast_find(&hdr->u.l.grh.dgid);
+ if (mcast == NULL) {
+ dev->n_pkt_drops++;
+ goto bail;
+ }
+ dev->n_multicast_rcv++;
+ list_for_each_entry_rcu(p, &mcast->qp_list, list)
+ ipath_qp_rcv(dev, hdr, 1, data, tlen, p->qp);
+ /*
+ * Notify ipath_multicast_detach() if it is waiting for us
+ * to finish.
+ */
+ if (atomic_dec_return(&mcast->refcount) <= 1)
+ wake_up(&mcast->wait);
+ } else {
+ qp = ipath_lookup_qpn(&dev->qp_table, qp_num);
+ if (qp) {
+ dev->n_unicast_rcv++;
+ ipath_qp_rcv(dev, hdr, lnh == IPATH_LRH_GRH, data,
+ tlen, qp);
+ /*
+ * Notify ipath_destroy_qp() if it is waiting
+ * for us to finish.
+ */
+ if (atomic_dec_and_test(&qp->refcount))
+ wake_up(&qp->wait);
+ } else
+ dev->n_pkt_drops++;
+ }
+
+bail:;
+}
+
+/**
+ * ipath_ib_timer - verbs timer
+ * @arg: the device pointer
+ *
+ * This is called from ipath_do_rcv_timer() at interrupt level to check for
+ * QPs which need retransmits and to collect performance numbers.
+ */
+static void ipath_ib_timer(struct ipath_ibdev *dev)
+{
+ struct ipath_qp *resend = NULL;
+ struct ipath_qp *rnr = NULL;
+ struct list_head *last;
+ struct ipath_qp *qp;
+ unsigned long flags;
+
+ if (dev == NULL)
+ return;
+
+ spin_lock_irqsave(&dev->pending_lock, flags);
+ /* Start filling the next pending queue. */
+ if (++dev->pending_index >= ARRAY_SIZE(dev->pending))
+ dev->pending_index = 0;
+ /* Save any requests still in the new queue, they have timed out. */
+ last = &dev->pending[dev->pending_index];
+ while (!list_empty(last)) {
+ qp = list_entry(last->next, struct ipath_qp, timerwait);
+ list_del_init(&qp->timerwait);
+ qp->timer_next = resend;
+ resend = qp;
+ atomic_inc(&qp->refcount);
+ }
+ last = &dev->rnrwait;
+ if (!list_empty(last)) {
+ qp = list_entry(last->next, struct ipath_qp, timerwait);
+ if (--qp->s_rnr_timeout == 0) {
+ do {
+ list_del_init(&qp->timerwait);
+ qp->timer_next = rnr;
+ rnr = qp;
+ atomic_inc(&qp->refcount);
+ if (list_empty(last))
+ break;
+ qp = list_entry(last->next, struct ipath_qp,
+ timerwait);
+ } while (qp->s_rnr_timeout == 0);
+ }
+ }
+ /*
+ * We should only be in the started state if pma_sample_start != 0
+ */
+ if (dev->pma_sample_status == IB_PMA_SAMPLE_STATUS_STARTED &&
+ --dev->pma_sample_start == 0) {
+ dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_RUNNING;
+ ipath_snapshot_counters(dev->dd, &dev->ipath_sword,
+ &dev->ipath_rword,
+ &dev->ipath_spkts,
+ &dev->ipath_rpkts,
+ &dev->ipath_xmit_wait);
+ }
+ if (dev->pma_sample_status == IB_PMA_SAMPLE_STATUS_RUNNING) {
+ if (dev->pma_sample_interval == 0) {
+ u64 ta, tb, tc, td, te;
+
+ dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_DONE;
+ ipath_snapshot_counters(dev->dd, &ta, &tb,
+ &tc, &td, &te);
+
+ dev->ipath_sword = ta - dev->ipath_sword;
+ dev->ipath_rword = tb - dev->ipath_rword;
+ dev->ipath_spkts = tc - dev->ipath_spkts;
+ dev->ipath_rpkts = td - dev->ipath_rpkts;
+ dev->ipath_xmit_wait = te - dev->ipath_xmit_wait;
+ }
+ else
+ dev->pma_sample_interval--;
+ }
+ spin_unlock_irqrestore(&dev->pending_lock, flags);
+
+ /* XXX What if timer fires again while this is running? */
+ while (resend != NULL) {
+ qp = resend;
+ resend = qp->timer_next;
+
+ spin_lock_irqsave(&qp->s_lock, flags);
+ if (qp->s_last != qp->s_tail &&
+ ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) {
+ dev->n_timeouts++;
+ ipath_restart_rc(qp, qp->s_last_psn + 1);
+ }
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+
+ /* Notify ipath_destroy_qp() if it is waiting. */
+ if (atomic_dec_and_test(&qp->refcount))
+ wake_up(&qp->wait);
+ }
+ while (rnr != NULL) {
+ qp = rnr;
+ rnr = qp->timer_next;
+
+ spin_lock_irqsave(&qp->s_lock, flags);
+ if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)
+ ipath_schedule_send(qp);
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+
+ /* Notify ipath_destroy_qp() if it is waiting. */
+ if (atomic_dec_and_test(&qp->refcount))
+ wake_up(&qp->wait);
+ }
+}
+
+static void update_sge(struct ipath_sge_state *ss, u32 length)
+{
+ struct ipath_sge *sge = &ss->sge;
+
+ sge->vaddr += length;
+ sge->length -= length;
+ sge->sge_length -= length;
+ if (sge->sge_length == 0) {
+ if (--ss->num_sge)
+ *sge = *ss->sg_list++;
+ } else if (sge->length == 0 && sge->mr != NULL) {
+ if (++sge->n >= IPATH_SEGSZ) {
+ if (++sge->m >= sge->mr->mapsz)
+ return;
+ sge->n = 0;
+ }
+ sge->vaddr = sge->mr->map[sge->m]->segs[sge->n].vaddr;
+ sge->length = sge->mr->map[sge->m]->segs[sge->n].length;
+ }
+}
+
+#ifdef __LITTLE_ENDIAN
+static inline u32 get_upper_bits(u32 data, u32 shift)
+{
+ return data >> shift;
+}
+
+static inline u32 set_upper_bits(u32 data, u32 shift)
+{
+ return data << shift;
+}
+
+static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
+{
+ data <<= ((sizeof(u32) - n) * BITS_PER_BYTE);
+ data >>= ((sizeof(u32) - n - off) * BITS_PER_BYTE);
+ return data;
+}
+#else
+static inline u32 get_upper_bits(u32 data, u32 shift)
+{
+ return data << shift;
+}
+
+static inline u32 set_upper_bits(u32 data, u32 shift)
+{
+ return data >> shift;
+}
+
+static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
+{
+ data >>= ((sizeof(u32) - n) * BITS_PER_BYTE);
+ data <<= ((sizeof(u32) - n - off) * BITS_PER_BYTE);
+ return data;
+}
+#endif
+
+static void copy_io(u32 __iomem *piobuf, struct ipath_sge_state *ss,
+ u32 length, unsigned flush_wc)
+{
+ u32 extra = 0;
+ u32 data = 0;
+ u32 last;
+
+ while (1) {
+ u32 len = ss->sge.length;
+ u32 off;
+
+ if (len > length)
+ len = length;
+ if (len > ss->sge.sge_length)
+ len = ss->sge.sge_length;
+ BUG_ON(len == 0);
+ /* If the source address is not aligned, try to align it. */
+ off = (unsigned long)ss->sge.vaddr & (sizeof(u32) - 1);
+ if (off) {
+ u32 *addr = (u32 *)((unsigned long)ss->sge.vaddr &
+ ~(sizeof(u32) - 1));
+ u32 v = get_upper_bits(*addr, off * BITS_PER_BYTE);
+ u32 y;
+
+ y = sizeof(u32) - off;
+ if (len > y)
+ len = y;
+ if (len + extra >= sizeof(u32)) {
+ data |= set_upper_bits(v, extra *
+ BITS_PER_BYTE);
+ len = sizeof(u32) - extra;
+ if (len == length) {
+ last = data;
+ break;
+ }
+ __raw_writel(data, piobuf);
+ piobuf++;
+ extra = 0;
+ data = 0;
+ } else {
+ /* Clear unused upper bytes */
+ data |= clear_upper_bytes(v, len, extra);
+ if (len == length) {
+ last = data;
+ break;
+ }
+ extra += len;
+ }
+ } else if (extra) {
+ /* Source address is aligned. */
+ u32 *addr = (u32 *) ss->sge.vaddr;
+ int shift = extra * BITS_PER_BYTE;
+ int ushift = 32 - shift;
+ u32 l = len;
+
+ while (l >= sizeof(u32)) {
+ u32 v = *addr;
+
+ data |= set_upper_bits(v, shift);
+ __raw_writel(data, piobuf);
+ data = get_upper_bits(v, ushift);
+ piobuf++;
+ addr++;
+ l -= sizeof(u32);
+ }
+ /*
+ * We still have 'extra' number of bytes leftover.
+ */
+ if (l) {
+ u32 v = *addr;
+
+ if (l + extra >= sizeof(u32)) {
+ data |= set_upper_bits(v, shift);
+ len -= l + extra - sizeof(u32);
+ if (len == length) {
+ last = data;
+ break;
+ }
+ __raw_writel(data, piobuf);
+ piobuf++;
+ extra = 0;
+ data = 0;
+ } else {
+ /* Clear unused upper bytes */
+ data |= clear_upper_bytes(v, l,
+ extra);
+ if (len == length) {
+ last = data;
+ break;
+ }
+ extra += l;
+ }
+ } else if (len == length) {
+ last = data;
+ break;
+ }
+ } else if (len == length) {
+ u32 w;
+
+ /*
+ * Need to round up for the last dword in the
+ * packet.
+ */
+ w = (len + 3) >> 2;
+ __iowrite32_copy(piobuf, ss->sge.vaddr, w - 1);
+ piobuf += w - 1;
+ last = ((u32 *) ss->sge.vaddr)[w - 1];
+ break;
+ } else {
+ u32 w = len >> 2;
+
+ __iowrite32_copy(piobuf, ss->sge.vaddr, w);
+ piobuf += w;
+
+ extra = len & (sizeof(u32) - 1);
+ if (extra) {
+ u32 v = ((u32 *) ss->sge.vaddr)[w];
+
+ /* Clear unused upper bytes */
+ data = clear_upper_bytes(v, extra, 0);
+ }
+ }
+ update_sge(ss, len);
+ length -= len;
+ }
+ /* Update address before sending packet. */
+ update_sge(ss, length);
+ if (flush_wc) {
+ /* must flush early everything before trigger word */
+ ipath_flush_wc();
+ __raw_writel(last, piobuf);
+ /* be sure trigger word is written */
+ ipath_flush_wc();
+ } else
+ __raw_writel(last, piobuf);
+}
+
+/*
+ * Convert IB rate to delay multiplier.
+ */
+unsigned ipath_ib_rate_to_mult(enum ib_rate rate)
+{
+ switch (rate) {
+ case IB_RATE_2_5_GBPS: return 8;
+ case IB_RATE_5_GBPS: return 4;
+ case IB_RATE_10_GBPS: return 2;
+ case IB_RATE_20_GBPS: return 1;
+ default: return 0;
+ }
+}
+
+/*
+ * Convert delay multiplier to IB rate
+ */
+static enum ib_rate ipath_mult_to_ib_rate(unsigned mult)
+{
+ switch (mult) {
+ case 8: return IB_RATE_2_5_GBPS;
+ case 4: return IB_RATE_5_GBPS;
+ case 2: return IB_RATE_10_GBPS;
+ case 1: return IB_RATE_20_GBPS;
+ default: return IB_RATE_PORT_CURRENT;
+ }
+}
+
+static inline struct ipath_verbs_txreq *get_txreq(struct ipath_ibdev *dev)
+{
+ struct ipath_verbs_txreq *tx = NULL;
+ unsigned long flags;
+
+ spin_lock_irqsave(&dev->pending_lock, flags);
+ if (!list_empty(&dev->txreq_free)) {
+ struct list_head *l = dev->txreq_free.next;
+
+ list_del(l);
+ tx = list_entry(l, struct ipath_verbs_txreq, txreq.list);
+ }
+ spin_unlock_irqrestore(&dev->pending_lock, flags);
+ return tx;
+}
+
+static inline void put_txreq(struct ipath_ibdev *dev,
+ struct ipath_verbs_txreq *tx)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&dev->pending_lock, flags);
+ list_add(&tx->txreq.list, &dev->txreq_free);
+ spin_unlock_irqrestore(&dev->pending_lock, flags);
+}
+
+static void sdma_complete(void *cookie, int status)
+{
+ struct ipath_verbs_txreq *tx = cookie;
+ struct ipath_qp *qp = tx->qp;
+ struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
+ unsigned long flags;
+ enum ib_wc_status ibs = status == IPATH_SDMA_TXREQ_S_OK ?
+ IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR;
+
+ if (atomic_dec_and_test(&qp->s_dma_busy)) {
+ spin_lock_irqsave(&qp->s_lock, flags);
+ if (tx->wqe)
+ ipath_send_complete(qp, tx->wqe, ibs);
+ if ((ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND &&
+ qp->s_last != qp->s_head) ||
+ (qp->s_flags & IPATH_S_WAIT_DMA))
+ ipath_schedule_send(qp);
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ wake_up(&qp->wait_dma);
+ } else if (tx->wqe) {
+ spin_lock_irqsave(&qp->s_lock, flags);
+ ipath_send_complete(qp, tx->wqe, ibs);
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ }
+
+ if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEBUF)
+ kfree(tx->txreq.map_addr);
+ put_txreq(dev, tx);
+
+ if (atomic_dec_and_test(&qp->refcount))
+ wake_up(&qp->wait);
+}
+
+static void decrement_dma_busy(struct ipath_qp *qp)
+{
+ unsigned long flags;
+
+ if (atomic_dec_and_test(&qp->s_dma_busy)) {
+ spin_lock_irqsave(&qp->s_lock, flags);
+ if ((ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND &&
+ qp->s_last != qp->s_head) ||
+ (qp->s_flags & IPATH_S_WAIT_DMA))
+ ipath_schedule_send(qp);
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ wake_up(&qp->wait_dma);
+ }
+}
+
+/*
+ * Compute the number of clock cycles of delay before sending the next packet.
+ * The multipliers reflect the number of clocks for the fastest rate so
+ * one tick at 4xDDR is 8 ticks at 1xSDR.
+ * If the destination port will take longer to receive a packet than
+ * the outgoing link can send it, we need to delay sending the next packet
+ * by the difference in time it takes the receiver to receive and the sender
+ * to send this packet.
+ * Note that this delay is always correct for UC and RC but not always
+ * optimal for UD. For UD, the destination HCA can be different for each
+ * packet, in which case, we could send packets to a different destination
+ * while "waiting" for the delay. The overhead for doing this without
+ * HW support is more than just paying the cost of delaying some packets
+ * unnecessarily.
+ */
+static inline unsigned ipath_pkt_delay(u32 plen, u8 snd_mult, u8 rcv_mult)
+{
+ return (rcv_mult > snd_mult) ?
+ (plen * (rcv_mult - snd_mult) + 1) >> 1 : 0;
+}
+
+static int ipath_verbs_send_dma(struct ipath_qp *qp,
+ struct ipath_ib_header *hdr, u32 hdrwords,
+ struct ipath_sge_state *ss, u32 len,
+ u32 plen, u32 dwords)
+{
+ struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
+ struct ipath_devdata *dd = dev->dd;
+ struct ipath_verbs_txreq *tx;
+ u32 *piobuf;
+ u32 control;
+ u32 ndesc;
+ int ret;
+
+ tx = qp->s_tx;
+ if (tx) {
+ qp->s_tx = NULL;
+ /* resend previously constructed packet */
+ atomic_inc(&qp->s_dma_busy);
+ ret = ipath_sdma_verbs_send(dd, tx->ss, tx->len, tx);
+ if (ret) {
+ qp->s_tx = tx;
+ decrement_dma_busy(qp);
+ }
+ goto bail;
+ }
+
+ tx = get_txreq(dev);
+ if (!tx) {
+ ret = -EBUSY;
+ goto bail;
+ }
+
+ /*
+ * Get the saved delay count we computed for the previous packet
+ * and save the delay count for this packet to be used next time
+ * we get here.
+ */
+ control = qp->s_pkt_delay;
+ qp->s_pkt_delay = ipath_pkt_delay(plen, dd->delay_mult, qp->s_dmult);
+
+ tx->qp = qp;
+ atomic_inc(&qp->refcount);
+ tx->wqe = qp->s_wqe;
+ tx->txreq.callback = sdma_complete;
+ tx->txreq.callback_cookie = tx;
+ tx->txreq.flags = IPATH_SDMA_TXREQ_F_HEADTOHOST |
+ IPATH_SDMA_TXREQ_F_INTREQ | IPATH_SDMA_TXREQ_F_FREEDESC;
+ if (plen + 1 >= IPATH_SMALLBUF_DWORDS)
+ tx->txreq.flags |= IPATH_SDMA_TXREQ_F_USELARGEBUF;
+
+ /* VL15 packets bypass credit check */
+ if ((be16_to_cpu(hdr->lrh[0]) >> 12) == 15) {
+ control |= 1ULL << 31;
+ tx->txreq.flags |= IPATH_SDMA_TXREQ_F_VL15;
+ }
+
+ if (len) {
+ /*
+ * Don't try to DMA if it takes more descriptors than
+ * the queue holds.
+ */
+ ndesc = ipath_count_sge(ss, len);
+ if (ndesc >= dd->ipath_sdma_descq_cnt)
+ ndesc = 0;
+ } else
+ ndesc = 1;
+ if (ndesc) {
+ tx->hdr.pbc[0] = cpu_to_le32(plen);
+ tx->hdr.pbc[1] = cpu_to_le32(control);
+ memcpy(&tx->hdr.hdr, hdr, hdrwords << 2);
+ tx->txreq.sg_count = ndesc;
+ tx->map_len = (hdrwords + 2) << 2;
+ tx->txreq.map_addr = &tx->hdr;
+ atomic_inc(&qp->s_dma_busy);
+ ret = ipath_sdma_verbs_send(dd, ss, dwords, tx);
+ if (ret) {
+ /* save ss and length in dwords */
+ tx->ss = ss;
+ tx->len = dwords;
+ qp->s_tx = tx;
+ decrement_dma_busy(qp);
+ }
+ goto bail;
+ }
+
+ /* Allocate a buffer and copy the header and payload to it. */
+ tx->map_len = (plen + 1) << 2;
+ piobuf = kmalloc(tx->map_len, GFP_ATOMIC);
+ if (unlikely(piobuf == NULL)) {
+ ret = -EBUSY;
+ goto err_tx;
+ }
+ tx->txreq.map_addr = piobuf;
+ tx->txreq.flags |= IPATH_SDMA_TXREQ_F_FREEBUF;
+ tx->txreq.sg_count = 1;
+
+ *piobuf++ = (__force u32) cpu_to_le32(plen);
+ *piobuf++ = (__force u32) cpu_to_le32(control);
+ memcpy(piobuf, hdr, hdrwords << 2);
+ ipath_copy_from_sge(piobuf + hdrwords, ss, len);
+
+ atomic_inc(&qp->s_dma_busy);
+ ret = ipath_sdma_verbs_send(dd, NULL, 0, tx);
+ /*
+ * If we couldn't queue the DMA request, save the info
+ * and try again later rather than destroying the
+ * buffer and undoing the side effects of the copy.
+ */
+ if (ret) {
+ tx->ss = NULL;
+ tx->len = 0;
+ qp->s_tx = tx;
+ decrement_dma_busy(qp);
+ }
+ dev->n_unaligned++;
+ goto bail;
+
+err_tx:
+ if (atomic_dec_and_test(&qp->refcount))
+ wake_up(&qp->wait);
+ put_txreq(dev, tx);
+bail:
+ return ret;
+}
+
+static int ipath_verbs_send_pio(struct ipath_qp *qp,
+ struct ipath_ib_header *ibhdr, u32 hdrwords,
+ struct ipath_sge_state *ss, u32 len,
+ u32 plen, u32 dwords)
+{
+ struct ipath_devdata *dd = to_idev(qp->ibqp.device)->dd;
+ u32 *hdr = (u32 *) ibhdr;
+ u32 __iomem *piobuf;
+ unsigned flush_wc;
+ u32 control;
+ int ret;
+ unsigned long flags;
+
+ piobuf = ipath_getpiobuf(dd, plen, NULL);
+ if (unlikely(piobuf == NULL)) {
+ ret = -EBUSY;
+ goto bail;
+ }
+
+ /*
+ * Get the saved delay count we computed for the previous packet
+ * and save the delay count for this packet to be used next time
+ * we get here.
+ */
+ control = qp->s_pkt_delay;
+ qp->s_pkt_delay = ipath_pkt_delay(plen, dd->delay_mult, qp->s_dmult);
+
+ /* VL15 packets bypass credit check */
+ if ((be16_to_cpu(ibhdr->lrh[0]) >> 12) == 15)
+ control |= 1ULL << 31;
+
+ /*
+ * Write the length to the control qword plus any needed flags.
+ * We have to flush after the PBC for correctness on some cpus
+ * or WC buffer can be written out of order.
+ */
+ writeq(((u64) control << 32) | plen, piobuf);
+ piobuf += 2;
+
+ flush_wc = dd->ipath_flags & IPATH_PIO_FLUSH_WC;
+ if (len == 0) {
+ /*
+ * If there is just the header portion, must flush before
+ * writing last word of header for correctness, and after
+ * the last header word (trigger word).
+ */
+ if (flush_wc) {
+ ipath_flush_wc();
+ __iowrite32_copy(piobuf, hdr, hdrwords - 1);
+ ipath_flush_wc();
+ __raw_writel(hdr[hdrwords - 1], piobuf + hdrwords - 1);
+ ipath_flush_wc();
+ } else
+ __iowrite32_copy(piobuf, hdr, hdrwords);
+ goto done;
+ }
+
+ if (flush_wc)
+ ipath_flush_wc();
+ __iowrite32_copy(piobuf, hdr, hdrwords);
+ piobuf += hdrwords;
+
+ /* The common case is aligned and contained in one segment. */
+ if (likely(ss->num_sge == 1 && len <= ss->sge.length &&
+ !((unsigned long)ss->sge.vaddr & (sizeof(u32) - 1)))) {
+ u32 *addr = (u32 *) ss->sge.vaddr;
+
+ /* Update address before sending packet. */
+ update_sge(ss, len);
+ if (flush_wc) {
+ __iowrite32_copy(piobuf, addr, dwords - 1);
+ /* must flush early everything before trigger word */
+ ipath_flush_wc();
+ __raw_writel(addr[dwords - 1], piobuf + dwords - 1);
+ /* be sure trigger word is written */
+ ipath_flush_wc();
+ } else
+ __iowrite32_copy(piobuf, addr, dwords);
+ goto done;
+ }
+ copy_io(piobuf, ss, len, flush_wc);
+done:
+ if (qp->s_wqe) {
+ spin_lock_irqsave(&qp->s_lock, flags);
+ ipath_send_complete(qp, qp->s_wqe, IB_WC_SUCCESS);
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ }
+ ret = 0;
+bail:
+ return ret;
+}
+
+/**
+ * ipath_verbs_send - send a packet
+ * @qp: the QP to send on
+ * @hdr: the packet header
+ * @hdrwords: the number of 32-bit words in the header
+ * @ss: the SGE to send
+ * @len: the length of the packet in bytes
+ */
+int ipath_verbs_send(struct ipath_qp *qp, struct ipath_ib_header *hdr,
+ u32 hdrwords, struct ipath_sge_state *ss, u32 len)
+{
+ struct ipath_devdata *dd = to_idev(qp->ibqp.device)->dd;
+ u32 plen;
+ int ret;
+ u32 dwords = (len + 3) >> 2;
+
+ /*
+ * Calculate the send buffer trigger address.
+ * The +1 counts for the pbc control dword following the pbc length.
+ */
+ plen = hdrwords + dwords + 1;
+
+ /*
+ * VL15 packets (IB_QPT_SMI) will always use PIO, so we
+ * can defer SDMA restart until link goes ACTIVE without
+ * worrying about just how we got there.
+ */
+ if (qp->ibqp.qp_type == IB_QPT_SMI ||
+ !(dd->ipath_flags & IPATH_HAS_SEND_DMA))
+ ret = ipath_verbs_send_pio(qp, hdr, hdrwords, ss, len,
+ plen, dwords);
+ else
+ ret = ipath_verbs_send_dma(qp, hdr, hdrwords, ss, len,
+ plen, dwords);
+
+ return ret;
+}
+
+int ipath_snapshot_counters(struct ipath_devdata *dd, u64 *swords,
+ u64 *rwords, u64 *spkts, u64 *rpkts,
+ u64 *xmit_wait)
+{
+ int ret;
+
+ if (!(dd->ipath_flags & IPATH_INITTED)) {
+ /* no hardware, freeze, etc. */
+ ret = -EINVAL;
+ goto bail;
+ }
+ *swords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt);
+ *rwords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);
+ *spkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt);
+ *rpkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt);
+ *xmit_wait = ipath_snap_cntr(dd, dd->ipath_cregs->cr_sendstallcnt);
+
+ ret = 0;
+
+bail:
+ return ret;
+}
+
+/**
+ * ipath_get_counters - get various chip counters
+ * @dd: the infinipath device
+ * @cntrs: counters are placed here
+ *
+ * Return the counters needed by recv_pma_get_portcounters().
+ */
+int ipath_get_counters(struct ipath_devdata *dd,
+ struct ipath_verbs_counters *cntrs)
+{
+ struct ipath_cregs const *crp = dd->ipath_cregs;
+ int ret;
+
+ if (!(dd->ipath_flags & IPATH_INITTED)) {
+ /* no hardware, freeze, etc. */
+ ret = -EINVAL;
+ goto bail;
+ }
+ cntrs->symbol_error_counter =
+ ipath_snap_cntr(dd, crp->cr_ibsymbolerrcnt);
+ cntrs->link_error_recovery_counter =
+ ipath_snap_cntr(dd, crp->cr_iblinkerrrecovcnt);
+ /*
+ * The link downed counter counts when the other side downs the
+ * connection. We add in the number of times we downed the link
+ * due to local link integrity errors to compensate.
+ */
+ cntrs->link_downed_counter =
+ ipath_snap_cntr(dd, crp->cr_iblinkdowncnt);
+ cntrs->port_rcv_errors =
+ ipath_snap_cntr(dd, crp->cr_rxdroppktcnt) +
+ ipath_snap_cntr(dd, crp->cr_rcvovflcnt) +
+ ipath_snap_cntr(dd, crp->cr_portovflcnt) +
+ ipath_snap_cntr(dd, crp->cr_err_rlencnt) +
+ ipath_snap_cntr(dd, crp->cr_invalidrlencnt) +
+ ipath_snap_cntr(dd, crp->cr_errlinkcnt) +
+ ipath_snap_cntr(dd, crp->cr_erricrccnt) +
+ ipath_snap_cntr(dd, crp->cr_errvcrccnt) +
+ ipath_snap_cntr(dd, crp->cr_errlpcrccnt) +
+ ipath_snap_cntr(dd, crp->cr_badformatcnt) +
+ dd->ipath_rxfc_unsupvl_errs;
+ if (crp->cr_rxotherlocalphyerrcnt)
+ cntrs->port_rcv_errors +=
+ ipath_snap_cntr(dd, crp->cr_rxotherlocalphyerrcnt);
+ if (crp->cr_rxvlerrcnt)
+ cntrs->port_rcv_errors +=
+ ipath_snap_cntr(dd, crp->cr_rxvlerrcnt);
+ cntrs->port_rcv_remphys_errors =
+ ipath_snap_cntr(dd, crp->cr_rcvebpcnt);
+ cntrs->port_xmit_discards = ipath_snap_cntr(dd, crp->cr_unsupvlcnt);
+ cntrs->port_xmit_data = ipath_snap_cntr(dd, crp->cr_wordsendcnt);
+ cntrs->port_rcv_data = ipath_snap_cntr(dd, crp->cr_wordrcvcnt);
+ cntrs->port_xmit_packets = ipath_snap_cntr(dd, crp->cr_pktsendcnt);
+ cntrs->port_rcv_packets = ipath_snap_cntr(dd, crp->cr_pktrcvcnt);
+ cntrs->local_link_integrity_errors =
+ crp->cr_locallinkintegrityerrcnt ?
+ ipath_snap_cntr(dd, crp->cr_locallinkintegrityerrcnt) :
+ ((dd->ipath_flags & IPATH_GPIO_ERRINTRS) ?
+ dd->ipath_lli_errs : dd->ipath_lli_errors);
+ cntrs->excessive_buffer_overrun_errors =
+ crp->cr_excessbufferovflcnt ?
+ ipath_snap_cntr(dd, crp->cr_excessbufferovflcnt) :
+ dd->ipath_overrun_thresh_errs;
+ cntrs->vl15_dropped = crp->cr_vl15droppedpktcnt ?
+ ipath_snap_cntr(dd, crp->cr_vl15droppedpktcnt) : 0;
+
+ ret = 0;
+
+bail:
+ return ret;
+}
+
+/**
+ * ipath_ib_piobufavail - callback when a PIO buffer is available
+ * @arg: the device pointer
+ *
+ * This is called from ipath_intr() at interrupt level when a PIO buffer is
+ * available after ipath_verbs_send() returned an error that no buffers were
+ * available. Return 1 if we consumed all the PIO buffers and we still have
+ * QPs waiting for buffers (for now, just restart the send tasklet and
+ * return zero).
+ */
+int ipath_ib_piobufavail(struct ipath_ibdev *dev)
+{
+ struct list_head *list;
+ struct ipath_qp *qplist;
+ struct ipath_qp *qp;
+ unsigned long flags;
+
+ if (dev == NULL)
+ goto bail;
+
+ list = &dev->piowait;
+ qplist = NULL;
+
+ spin_lock_irqsave(&dev->pending_lock, flags);
+ while (!list_empty(list)) {
+ qp = list_entry(list->next, struct ipath_qp, piowait);
+ list_del_init(&qp->piowait);
+ qp->pio_next = qplist;
+ qplist = qp;
+ atomic_inc(&qp->refcount);
+ }
+ spin_unlock_irqrestore(&dev->pending_lock, flags);
+
+ while (qplist != NULL) {
+ qp = qplist;
+ qplist = qp->pio_next;
+
+ spin_lock_irqsave(&qp->s_lock, flags);
+ if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)
+ ipath_schedule_send(qp);
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+
+ /* Notify ipath_destroy_qp() if it is waiting. */
+ if (atomic_dec_and_test(&qp->refcount))
+ wake_up(&qp->wait);
+ }
+
+bail:
+ return 0;
+}
+
+static int ipath_query_device(struct ib_device *ibdev,
+ struct ib_device_attr *props)
+{
+ struct ipath_ibdev *dev = to_idev(ibdev);
+
+ memset(props, 0, sizeof(*props));
+
+ props->device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR |
+ IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT |
+ IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN |
+ IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE;
+ props->page_size_cap = PAGE_SIZE;
+ props->vendor_id =
+ IPATH_SRC_OUI_1 << 16 | IPATH_SRC_OUI_2 << 8 | IPATH_SRC_OUI_3;
+ props->vendor_part_id = dev->dd->ipath_deviceid;
+ props->hw_ver = dev->dd->ipath_pcirev;
+
+ props->sys_image_guid = dev->sys_image_guid;
+
+ props->max_mr_size = ~0ull;
+ props->max_qp = ib_ipath_max_qps;
+ props->max_qp_wr = ib_ipath_max_qp_wrs;
+ props->max_sge = ib_ipath_max_sges;
+ props->max_cq = ib_ipath_max_cqs;
+ props->max_ah = ib_ipath_max_ahs;
+ props->max_cqe = ib_ipath_max_cqes;
+ props->max_mr = dev->lk_table.max;
+ props->max_fmr = dev->lk_table.max;
+ props->max_map_per_fmr = 32767;
+ props->max_pd = ib_ipath_max_pds;
+ props->max_qp_rd_atom = IPATH_MAX_RDMA_ATOMIC;
+ props->max_qp_init_rd_atom = 255;
+ /* props->max_res_rd_atom */
+ props->max_srq = ib_ipath_max_srqs;
+ props->max_srq_wr = ib_ipath_max_srq_wrs;
+ props->max_srq_sge = ib_ipath_max_srq_sges;
+ /* props->local_ca_ack_delay */
+ props->atomic_cap = IB_ATOMIC_GLOB;
+ props->max_pkeys = ipath_get_npkeys(dev->dd);
+ props->max_mcast_grp = ib_ipath_max_mcast_grps;
+ props->max_mcast_qp_attach = ib_ipath_max_mcast_qp_attached;
+ props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
+ props->max_mcast_grp;
+
+ return 0;
+}
+
+const u8 ipath_cvt_physportstate[32] = {
+ [INFINIPATH_IBCS_LT_STATE_DISABLED] = IB_PHYSPORTSTATE_DISABLED,
+ [INFINIPATH_IBCS_LT_STATE_LINKUP] = IB_PHYSPORTSTATE_LINKUP,
+ [INFINIPATH_IBCS_LT_STATE_POLLACTIVE] = IB_PHYSPORTSTATE_POLL,
+ [INFINIPATH_IBCS_LT_STATE_POLLQUIET] = IB_PHYSPORTSTATE_POLL,
+ [INFINIPATH_IBCS_LT_STATE_SLEEPDELAY] = IB_PHYSPORTSTATE_SLEEP,
+ [INFINIPATH_IBCS_LT_STATE_SLEEPQUIET] = IB_PHYSPORTSTATE_SLEEP,
+ [INFINIPATH_IBCS_LT_STATE_CFGDEBOUNCE] =
+ IB_PHYSPORTSTATE_CFG_TRAIN,
+ [INFINIPATH_IBCS_LT_STATE_CFGRCVFCFG] =
+ IB_PHYSPORTSTATE_CFG_TRAIN,
+ [INFINIPATH_IBCS_LT_STATE_CFGWAITRMT] =
+ IB_PHYSPORTSTATE_CFG_TRAIN,
+ [INFINIPATH_IBCS_LT_STATE_CFGIDLE] = IB_PHYSPORTSTATE_CFG_TRAIN,
+ [INFINIPATH_IBCS_LT_STATE_RECOVERRETRAIN] =
+ IB_PHYSPORTSTATE_LINK_ERR_RECOVER,
+ [INFINIPATH_IBCS_LT_STATE_RECOVERWAITRMT] =
+ IB_PHYSPORTSTATE_LINK_ERR_RECOVER,
+ [INFINIPATH_IBCS_LT_STATE_RECOVERIDLE] =
+ IB_PHYSPORTSTATE_LINK_ERR_RECOVER,
+ [0x10] = IB_PHYSPORTSTATE_CFG_TRAIN,
+ [0x11] = IB_PHYSPORTSTATE_CFG_TRAIN,
+ [0x12] = IB_PHYSPORTSTATE_CFG_TRAIN,
+ [0x13] = IB_PHYSPORTSTATE_CFG_TRAIN,
+ [0x14] = IB_PHYSPORTSTATE_CFG_TRAIN,
+ [0x15] = IB_PHYSPORTSTATE_CFG_TRAIN,
+ [0x16] = IB_PHYSPORTSTATE_CFG_TRAIN,
+ [0x17] = IB_PHYSPORTSTATE_CFG_TRAIN
+};
+
+u32 ipath_get_cr_errpkey(struct ipath_devdata *dd)
+{
+ return ipath_read_creg32(dd, dd->ipath_cregs->cr_errpkey);
+}
+
+static int ipath_query_port(struct ib_device *ibdev,
+ u8 port, struct ib_port_attr *props)
+{
+ struct ipath_ibdev *dev = to_idev(ibdev);
+ struct ipath_devdata *dd = dev->dd;
+ enum ib_mtu mtu;
+ u16 lid = dd->ipath_lid;
+ u64 ibcstat;
+
+ memset(props, 0, sizeof(*props));
+ props->lid = lid ? lid : __constant_be16_to_cpu(IB_LID_PERMISSIVE);
+ props->lmc = dd->ipath_lmc;
+ props->sm_lid = dev->sm_lid;
+ props->sm_sl = dev->sm_sl;
+ ibcstat = dd->ipath_lastibcstat;
+ /* map LinkState to IB portinfo values. */
+ props->state = ipath_ib_linkstate(dd, ibcstat) + 1;
+
+ /* See phys_state_show() */
+ props->phys_state = /* MEA: assumes shift == 0 */
+ ipath_cvt_physportstate[dd->ipath_lastibcstat &
+ dd->ibcs_lts_mask];
+ props->port_cap_flags = dev->port_cap_flags;
+ props->gid_tbl_len = 1;
+ props->max_msg_sz = 0x80000000;
+ props->pkey_tbl_len = ipath_get_npkeys(dd);
+ props->bad_pkey_cntr = ipath_get_cr_errpkey(dd) -
+ dev->z_pkey_violations;
+ props->qkey_viol_cntr = dev->qkey_violations;
+ props->active_width = dd->ipath_link_width_active;
+ /* See rate_show() */
+ props->active_speed = dd->ipath_link_speed_active;
+ props->max_vl_num = 1; /* VLCap = VL0 */
+ props->init_type_reply = 0;
+
+ props->max_mtu = ipath_mtu4096 ? IB_MTU_4096 : IB_MTU_2048;
+ switch (dd->ipath_ibmtu) {
+ case 4096:
+ mtu = IB_MTU_4096;
+ break;
+ case 2048:
+ mtu = IB_MTU_2048;
+ break;
+ case 1024:
+ mtu = IB_MTU_1024;
+ break;
+ case 512:
+ mtu = IB_MTU_512;
+ break;
+ case 256:
+ mtu = IB_MTU_256;
+ break;
+ default:
+ mtu = IB_MTU_2048;
+ }
+ props->active_mtu = mtu;
+ props->subnet_timeout = dev->subnet_timeout;
+
+ return 0;
+}
+
+static int ipath_modify_device(struct ib_device *device,
+ int device_modify_mask,
+ struct ib_device_modify *device_modify)
+{
+ int ret;
+
+ if (device_modify_mask & ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID |
+ IB_DEVICE_MODIFY_NODE_DESC)) {
+ ret = -EOPNOTSUPP;
+ goto bail;
+ }
+
+ if (device_modify_mask & IB_DEVICE_MODIFY_NODE_DESC)
+ memcpy(device->node_desc, device_modify->node_desc, 64);
+
+ if (device_modify_mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID)
+ to_idev(device)->sys_image_guid =
+ cpu_to_be64(device_modify->sys_image_guid);
+
+ ret = 0;
+
+bail:
+ return ret;
+}
+
+static int ipath_modify_port(struct ib_device *ibdev,
+ u8 port, int port_modify_mask,
+ struct ib_port_modify *props)
+{
+ struct ipath_ibdev *dev = to_idev(ibdev);
+
+ dev->port_cap_flags |= props->set_port_cap_mask;
+ dev->port_cap_flags &= ~props->clr_port_cap_mask;
+ if (port_modify_mask & IB_PORT_SHUTDOWN)
+ ipath_set_linkstate(dev->dd, IPATH_IB_LINKDOWN);
+ if (port_modify_mask & IB_PORT_RESET_QKEY_CNTR)
+ dev->qkey_violations = 0;
+ return 0;
+}
+
+static int ipath_query_gid(struct ib_device *ibdev, u8 port,
+ int index, union ib_gid *gid)
+{
+ struct ipath_ibdev *dev = to_idev(ibdev);
+ int ret;
+
+ if (index >= 1) {
+ ret = -EINVAL;
+ goto bail;
+ }
+ gid->global.subnet_prefix = dev->gid_prefix;
+ gid->global.interface_id = dev->dd->ipath_guid;
+
+ ret = 0;
+
+bail:
+ return ret;
+}
+
+static struct ib_pd *ipath_alloc_pd(struct ib_device *ibdev,
+ struct ib_ucontext *context,
+ struct ib_udata *udata)
+{
+ struct ipath_ibdev *dev = to_idev(ibdev);
+ struct ipath_pd *pd;
+ struct ib_pd *ret;
+
+ /*
+ * This is actually totally arbitrary. Some correctness tests
+ * assume there's a maximum number of PDs that can be allocated.
+ * We don't actually have this limit, but we fail the test if
+ * we allow allocations of more than we report for this value.
+ */
+
+ pd = kmalloc(sizeof *pd, GFP_KERNEL);
+ if (!pd) {
+ ret = ERR_PTR(-ENOMEM);
+ goto bail;
+ }
+
+ spin_lock(&dev->n_pds_lock);
+ if (dev->n_pds_allocated == ib_ipath_max_pds) {
+ spin_unlock(&dev->n_pds_lock);
+ kfree(pd);
+ ret = ERR_PTR(-ENOMEM);
+ goto bail;
+ }
+
+ dev->n_pds_allocated++;
+ spin_unlock(&dev->n_pds_lock);
+
+ /* ib_alloc_pd() will initialize pd->ibpd. */
+ pd->user = udata != NULL;
+
+ ret = &pd->ibpd;
+
+bail:
+ return ret;
+}
+
+static int ipath_dealloc_pd(struct ib_pd *ibpd)
+{
+ struct ipath_pd *pd = to_ipd(ibpd);
+ struct ipath_ibdev *dev = to_idev(ibpd->device);
+
+ spin_lock(&dev->n_pds_lock);
+ dev->n_pds_allocated--;
+ spin_unlock(&dev->n_pds_lock);
+
+ kfree(pd);
+
+ return 0;
+}
+
+/**
+ * ipath_create_ah - create an address handle
+ * @pd: the protection domain
+ * @ah_attr: the attributes of the AH
+ *
+ * This may be called from interrupt context.
+ */
+static struct ib_ah *ipath_create_ah(struct ib_pd *pd,
+ struct ib_ah_attr *ah_attr)
+{
+ struct ipath_ah *ah;
+ struct ib_ah *ret;
+ struct ipath_ibdev *dev = to_idev(pd->device);
+ unsigned long flags;
+
+ /* A multicast address requires a GRH (see ch. 8.4.1). */
+ if (ah_attr->dlid >= IPATH_MULTICAST_LID_BASE &&
+ ah_attr->dlid != IPATH_PERMISSIVE_LID &&
+ !(ah_attr->ah_flags & IB_AH_GRH)) {
+ ret = ERR_PTR(-EINVAL);
+ goto bail;
+ }
+
+ if (ah_attr->dlid == 0) {
+ ret = ERR_PTR(-EINVAL);
+ goto bail;
+ }
+
+ if (ah_attr->port_num < 1 ||
+ ah_attr->port_num > pd->device->phys_port_cnt) {
+ ret = ERR_PTR(-EINVAL);
+ goto bail;
+ }
+
+ ah = kmalloc(sizeof *ah, GFP_ATOMIC);
+ if (!ah) {
+ ret = ERR_PTR(-ENOMEM);
+ goto bail;
+ }
+
+ spin_lock_irqsave(&dev->n_ahs_lock, flags);
+ if (dev->n_ahs_allocated == ib_ipath_max_ahs) {
+ spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
+ kfree(ah);
+ ret = ERR_PTR(-ENOMEM);
+ goto bail;
+ }
+
+ dev->n_ahs_allocated++;
+ spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
+
+ /* ib_create_ah() will initialize ah->ibah. */
+ ah->attr = *ah_attr;
+ ah->attr.static_rate = ipath_ib_rate_to_mult(ah_attr->static_rate);
+
+ ret = &ah->ibah;
+
+bail:
+ return ret;
+}
+
+/**
+ * ipath_destroy_ah - destroy an address handle
+ * @ibah: the AH to destroy
+ *
+ * This may be called from interrupt context.
+ */
+static int ipath_destroy_ah(struct ib_ah *ibah)
+{
+ struct ipath_ibdev *dev = to_idev(ibah->device);
+ struct ipath_ah *ah = to_iah(ibah);
+ unsigned long flags;
+
+ spin_lock_irqsave(&dev->n_ahs_lock, flags);
+ dev->n_ahs_allocated--;
+ spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
+
+ kfree(ah);
+
+ return 0;
+}
+
+static int ipath_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
+{
+ struct ipath_ah *ah = to_iah(ibah);
+
+ *ah_attr = ah->attr;
+ ah_attr->static_rate = ipath_mult_to_ib_rate(ah->attr.static_rate);
+
+ return 0;
+}
+
+/**
+ * ipath_get_npkeys - return the size of the PKEY table for port 0
+ * @dd: the infinipath device
+ */
+unsigned ipath_get_npkeys(struct ipath_devdata *dd)
+{
+ return ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys);
+}
+
+/**
+ * ipath_get_pkey - return the indexed PKEY from the port 0 PKEY table
+ * @dd: the infinipath device
+ * @index: the PKEY index
+ */
+unsigned ipath_get_pkey(struct ipath_devdata *dd, unsigned index)
+{
+ unsigned ret;
+
+ if (index >= ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys))
+ ret = 0;
+ else
+ ret = dd->ipath_pd[0]->port_pkeys[index];
+
+ return ret;
+}
+
+static int ipath_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
+ u16 *pkey)
+{
+ struct ipath_ibdev *dev = to_idev(ibdev);
+ int ret;
+
+ if (index >= ipath_get_npkeys(dev->dd)) {
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ *pkey = ipath_get_pkey(dev->dd, index);
+ ret = 0;
+
+bail:
+ return ret;
+}
+
+/**
+ * ipath_alloc_ucontext - allocate a ucontest
+ * @ibdev: the infiniband device
+ * @udata: not used by the InfiniPath driver
+ */
+
+static struct ib_ucontext *ipath_alloc_ucontext(struct ib_device *ibdev,
+ struct ib_udata *udata)
+{
+ struct ipath_ucontext *context;
+ struct ib_ucontext *ret;
+
+ context = kmalloc(sizeof *context, GFP_KERNEL);
+ if (!context) {
+ ret = ERR_PTR(-ENOMEM);
+ goto bail;
+ }
+
+ ret = &context->ibucontext;
+
+bail:
+ return ret;
+}
+
+static int ipath_dealloc_ucontext(struct ib_ucontext *context)
+{
+ kfree(to_iucontext(context));
+ return 0;
+}
+
+static int ipath_verbs_register_sysfs(struct ib_device *dev);
+
+static void __verbs_timer(unsigned long arg)
+{
+ struct ipath_devdata *dd = (struct ipath_devdata *) arg;
+
+ /* Handle verbs layer timeouts. */
+ ipath_ib_timer(dd->verbs_dev);
+
+ mod_timer(&dd->verbs_timer, jiffies + 1);
+}
+
+static int enable_timer(struct ipath_devdata *dd)
+{
+ /*
+ * Early chips had a design flaw where the chip and kernel idea
+ * of the tail register don't always agree, and therefore we won't
+ * get an interrupt on the next packet received.
+ * If the board supports per packet receive interrupts, use it.
+ * Otherwise, the timer function periodically checks for packets
+ * to cover this case.
+ * Either way, the timer is needed for verbs layer related
+ * processing.
+ */
+ if (dd->ipath_flags & IPATH_GPIO_INTR) {
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_debugportselect,
+ 0x2074076542310ULL);
+ /* Enable GPIO bit 2 interrupt */
+ dd->ipath_gpio_mask |= (u64) (1 << IPATH_GPIO_PORT0_BIT);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask,
+ dd->ipath_gpio_mask);
+ }
+
+ init_timer(&dd->verbs_timer);
+ dd->verbs_timer.function = __verbs_timer;
+ dd->verbs_timer.data = (unsigned long)dd;
+ dd->verbs_timer.expires = jiffies + 1;
+ add_timer(&dd->verbs_timer);
+
+ return 0;
+}
+
+static int disable_timer(struct ipath_devdata *dd)
+{
+ /* Disable GPIO bit 2 interrupt */
+ if (dd->ipath_flags & IPATH_GPIO_INTR) {
+ /* Disable GPIO bit 2 interrupt */
+ dd->ipath_gpio_mask &= ~((u64) (1 << IPATH_GPIO_PORT0_BIT));
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask,
+ dd->ipath_gpio_mask);
+ /*
+ * We might want to undo changes to debugportselect,
+ * but how?
+ */
+ }
+
+ del_timer_sync(&dd->verbs_timer);
+
+ return 0;
+}
+
+/**
+ * ipath_register_ib_device - register our device with the infiniband core
+ * @dd: the device data structure
+ * Return the allocated ipath_ibdev pointer or NULL on error.
+ */
+int ipath_register_ib_device(struct ipath_devdata *dd)
+{
+ struct ipath_verbs_counters cntrs;
+ struct ipath_ibdev *idev;
+ struct ib_device *dev;
+ struct ipath_verbs_txreq *tx;
+ unsigned i;
+ int ret;
+
+ idev = (struct ipath_ibdev *)ib_alloc_device(sizeof *idev);
+ if (idev == NULL) {
+ ret = -ENOMEM;
+ goto bail;
+ }
+
+ dev = &idev->ibdev;
+
+ if (dd->ipath_sdma_descq_cnt) {
+ tx = kmalloc(dd->ipath_sdma_descq_cnt * sizeof *tx,
+ GFP_KERNEL);
+ if (tx == NULL) {
+ ret = -ENOMEM;
+ goto err_tx;
+ }
+ } else
+ tx = NULL;
+ idev->txreq_bufs = tx;
+
+ /* Only need to initialize non-zero fields. */
+ spin_lock_init(&idev->n_pds_lock);
+ spin_lock_init(&idev->n_ahs_lock);
+ spin_lock_init(&idev->n_cqs_lock);
+ spin_lock_init(&idev->n_qps_lock);
+ spin_lock_init(&idev->n_srqs_lock);
+ spin_lock_init(&idev->n_mcast_grps_lock);
+
+ spin_lock_init(&idev->qp_table.lock);
+ spin_lock_init(&idev->lk_table.lock);
+ idev->sm_lid = __constant_be16_to_cpu(IB_LID_PERMISSIVE);
+ /* Set the prefix to the default value (see ch. 4.1.1) */
+ idev->gid_prefix = __constant_cpu_to_be64(0xfe80000000000000ULL);
+
+ ret = ipath_init_qp_table(idev, ib_ipath_qp_table_size);
+ if (ret)
+ goto err_qp;
+
+ /*
+ * The top ib_ipath_lkey_table_size bits are used to index the
+ * table. The lower 8 bits can be owned by the user (copied from
+ * the LKEY). The remaining bits act as a generation number or tag.
+ */
+ idev->lk_table.max = 1 << ib_ipath_lkey_table_size;
+ idev->lk_table.table = kzalloc(idev->lk_table.max *
+ sizeof(*idev->lk_table.table),
+ GFP_KERNEL);
+ if (idev->lk_table.table == NULL) {
+ ret = -ENOMEM;
+ goto err_lk;
+ }
+ INIT_LIST_HEAD(&idev->pending_mmaps);
+ spin_lock_init(&idev->pending_lock);
+ idev->mmap_offset = PAGE_SIZE;
+ spin_lock_init(&idev->mmap_offset_lock);
+ INIT_LIST_HEAD(&idev->pending[0]);
+ INIT_LIST_HEAD(&idev->pending[1]);
+ INIT_LIST_HEAD(&idev->pending[2]);
+ INIT_LIST_HEAD(&idev->piowait);
+ INIT_LIST_HEAD(&idev->rnrwait);
+ INIT_LIST_HEAD(&idev->txreq_free);
+ idev->pending_index = 0;
+ idev->port_cap_flags =
+ IB_PORT_SYS_IMAGE_GUID_SUP | IB_PORT_CLIENT_REG_SUP;
+ if (dd->ipath_flags & IPATH_HAS_LINK_LATENCY)
+ idev->port_cap_flags |= IB_PORT_LINK_LATENCY_SUP;
+ idev->pma_counter_select[0] = IB_PMA_PORT_XMIT_DATA;
+ idev->pma_counter_select[1] = IB_PMA_PORT_RCV_DATA;
+ idev->pma_counter_select[2] = IB_PMA_PORT_XMIT_PKTS;
+ idev->pma_counter_select[3] = IB_PMA_PORT_RCV_PKTS;
+ idev->pma_counter_select[4] = IB_PMA_PORT_XMIT_WAIT;
+
+ /* Snapshot current HW counters to "clear" them. */
+ ipath_get_counters(dd, &cntrs);
+ idev->z_symbol_error_counter = cntrs.symbol_error_counter;
+ idev->z_link_error_recovery_counter =
+ cntrs.link_error_recovery_counter;
+ idev->z_link_downed_counter = cntrs.link_downed_counter;
+ idev->z_port_rcv_errors = cntrs.port_rcv_errors;
+ idev->z_port_rcv_remphys_errors =
+ cntrs.port_rcv_remphys_errors;
+ idev->z_port_xmit_discards = cntrs.port_xmit_discards;
+ idev->z_port_xmit_data = cntrs.port_xmit_data;
+ idev->z_port_rcv_data = cntrs.port_rcv_data;
+ idev->z_port_xmit_packets = cntrs.port_xmit_packets;
+ idev->z_port_rcv_packets = cntrs.port_rcv_packets;
+ idev->z_local_link_integrity_errors =
+ cntrs.local_link_integrity_errors;
+ idev->z_excessive_buffer_overrun_errors =
+ cntrs.excessive_buffer_overrun_errors;
+ idev->z_vl15_dropped = cntrs.vl15_dropped;
+
+ for (i = 0; i < dd->ipath_sdma_descq_cnt; i++, tx++)
+ list_add(&tx->txreq.list, &idev->txreq_free);
+
+ /*
+ * The system image GUID is supposed to be the same for all
+ * IB HCAs in a single system but since there can be other
+ * device types in the system, we can't be sure this is unique.
+ */
+ if (!sys_image_guid)
+ sys_image_guid = dd->ipath_guid;
+ idev->sys_image_guid = sys_image_guid;
+ idev->ib_unit = dd->ipath_unit;
+ idev->dd = dd;
+
+ strlcpy(dev->name, "ipath%d", IB_DEVICE_NAME_MAX);
+ dev->owner = THIS_MODULE;
+ dev->node_guid = dd->ipath_guid;
+ dev->uverbs_abi_ver = IPATH_UVERBS_ABI_VERSION;
+ dev->uverbs_cmd_mask =
+ (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
+ (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
+ (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_AH) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_AH) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_AH) |
+ (1ull << IB_USER_VERBS_CMD_REG_MR) |
+ (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
+ (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
+ (1ull << IB_USER_VERBS_CMD_POLL_CQ) |
+ (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
+ (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
+ (1ull << IB_USER_VERBS_CMD_POST_SEND) |
+ (1ull << IB_USER_VERBS_CMD_POST_RECV) |
+ (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) |
+ (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
+ (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) |
+ (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV);
+ dev->node_type = RDMA_NODE_IB_CA;
+ dev->phys_port_cnt = 1;
+ dev->num_comp_vectors = 1;
+ dev->dma_device = &dd->pcidev->dev;
+ dev->query_device = ipath_query_device;
+ dev->modify_device = ipath_modify_device;
+ dev->query_port = ipath_query_port;
+ dev->modify_port = ipath_modify_port;
+ dev->query_pkey = ipath_query_pkey;
+ dev->query_gid = ipath_query_gid;
+ dev->alloc_ucontext = ipath_alloc_ucontext;
+ dev->dealloc_ucontext = ipath_dealloc_ucontext;
+ dev->alloc_pd = ipath_alloc_pd;
+ dev->dealloc_pd = ipath_dealloc_pd;
+ dev->create_ah = ipath_create_ah;
+ dev->destroy_ah = ipath_destroy_ah;
+ dev->query_ah = ipath_query_ah;
+ dev->create_srq = ipath_create_srq;
+ dev->modify_srq = ipath_modify_srq;
+ dev->query_srq = ipath_query_srq;
+ dev->destroy_srq = ipath_destroy_srq;
+ dev->create_qp = ipath_create_qp;
+ dev->modify_qp = ipath_modify_qp;
+ dev->query_qp = ipath_query_qp;
+ dev->destroy_qp = ipath_destroy_qp;
+ dev->post_send = ipath_post_send;
+ dev->post_recv = ipath_post_receive;
+ dev->post_srq_recv = ipath_post_srq_receive;
+ dev->create_cq = ipath_create_cq;
+ dev->destroy_cq = ipath_destroy_cq;
+ dev->resize_cq = ipath_resize_cq;
+ dev->poll_cq = ipath_poll_cq;
+ dev->req_notify_cq = ipath_req_notify_cq;
+ dev->get_dma_mr = ipath_get_dma_mr;
+ dev->reg_phys_mr = ipath_reg_phys_mr;
+ dev->reg_user_mr = ipath_reg_user_mr;
+ dev->dereg_mr = ipath_dereg_mr;
+ dev->alloc_fmr = ipath_alloc_fmr;
+ dev->map_phys_fmr = ipath_map_phys_fmr;
+ dev->unmap_fmr = ipath_unmap_fmr;
+ dev->dealloc_fmr = ipath_dealloc_fmr;
+ dev->attach_mcast = ipath_multicast_attach;
+ dev->detach_mcast = ipath_multicast_detach;
+ dev->process_mad = ipath_process_mad;
+ dev->mmap = ipath_mmap;
+ dev->dma_ops = &ipath_dma_mapping_ops;
+
+ snprintf(dev->node_desc, sizeof(dev->node_desc),
+ IPATH_IDSTR " %s", init_utsname()->nodename);
+
+ ret = ib_register_device(dev);
+ if (ret)
+ goto err_reg;
+
+ if (ipath_verbs_register_sysfs(dev))
+ goto err_class;
+
+ enable_timer(dd);
+
+ goto bail;
+
+err_class:
+ ib_unregister_device(dev);
+err_reg:
+ kfree(idev->lk_table.table);
+err_lk:
+ kfree(idev->qp_table.table);
+err_qp:
+ kfree(idev->txreq_bufs);
+err_tx:
+ ib_dealloc_device(dev);
+ ipath_dev_err(dd, "cannot register verbs: %d!\n", -ret);
+ idev = NULL;
+
+bail:
+ dd->verbs_dev = idev;
+ return ret;
+}
+
+void ipath_unregister_ib_device(struct ipath_ibdev *dev)
+{
+ struct ib_device *ibdev = &dev->ibdev;
+ u32 qps_inuse;
+
+ ib_unregister_device(ibdev);
+
+ disable_timer(dev->dd);
+
+ if (!list_empty(&dev->pending[0]) ||
+ !list_empty(&dev->pending[1]) ||
+ !list_empty(&dev->pending[2]))
+ ipath_dev_err(dev->dd, "pending list not empty!\n");
+ if (!list_empty(&dev->piowait))
+ ipath_dev_err(dev->dd, "piowait list not empty!\n");
+ if (!list_empty(&dev->rnrwait))
+ ipath_dev_err(dev->dd, "rnrwait list not empty!\n");
+ if (!ipath_mcast_tree_empty())
+ ipath_dev_err(dev->dd, "multicast table memory leak!\n");
+ /*
+ * Note that ipath_unregister_ib_device() can be called before all
+ * the QPs are destroyed!
+ */
+ qps_inuse = ipath_free_all_qps(&dev->qp_table);
+ if (qps_inuse)
+ ipath_dev_err(dev->dd, "QP memory leak! %u still in use\n",
+ qps_inuse);
+ kfree(dev->qp_table.table);
+ kfree(dev->lk_table.table);
+ kfree(dev->txreq_bufs);
+ ib_dealloc_device(ibdev);
+}
+
+static ssize_t show_rev(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ struct ipath_ibdev *dev =
+ container_of(device, struct ipath_ibdev, ibdev.dev);
+
+ return sprintf(buf, "%x\n", dev->dd->ipath_pcirev);
+}
+
+static ssize_t show_hca(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ struct ipath_ibdev *dev =
+ container_of(device, struct ipath_ibdev, ibdev.dev);
+ int ret;
+
+ ret = dev->dd->ipath_f_get_boardname(dev->dd, buf, 128);
+ if (ret < 0)
+ goto bail;
+ strcat(buf, "\n");
+ ret = strlen(buf);
+
+bail:
+ return ret;
+}
+
+static ssize_t show_stats(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ struct ipath_ibdev *dev =
+ container_of(device, struct ipath_ibdev, ibdev.dev);
+ int i;
+ int len;
+
+ len = sprintf(buf,
+ "RC resends %d\n"
+ "RC no QACK %d\n"
+ "RC ACKs %d\n"
+ "RC SEQ NAKs %d\n"
+ "RC RDMA seq %d\n"
+ "RC RNR NAKs %d\n"
+ "RC OTH NAKs %d\n"
+ "RC timeouts %d\n"
+ "RC RDMA dup %d\n"
+ "piobuf wait %d\n"
+ "unaligned %d\n"
+ "PKT drops %d\n"
+ "WQE errs %d\n",
+ dev->n_rc_resends, dev->n_rc_qacks, dev->n_rc_acks,
+ dev->n_seq_naks, dev->n_rdma_seq, dev->n_rnr_naks,
+ dev->n_other_naks, dev->n_timeouts,
+ dev->n_rdma_dup_busy, dev->n_piowait, dev->n_unaligned,
+ dev->n_pkt_drops, dev->n_wqe_errs);
+ for (i = 0; i < ARRAY_SIZE(dev->opstats); i++) {
+ const struct ipath_opcode_stats *si = &dev->opstats[i];
+
+ if (!si->n_packets && !si->n_bytes)
+ continue;
+ len += sprintf(buf + len, "%02x %llu/%llu\n", i,
+ (unsigned long long) si->n_packets,
+ (unsigned long long) si->n_bytes);
+ }
+ return len;
+}
+
+static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
+static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
+static DEVICE_ATTR(board_id, S_IRUGO, show_hca, NULL);
+static DEVICE_ATTR(stats, S_IRUGO, show_stats, NULL);
+
+static struct device_attribute *ipath_class_attributes[] = {
+ &dev_attr_hw_rev,
+ &dev_attr_hca_type,
+ &dev_attr_board_id,
+ &dev_attr_stats
+};
+
+static int ipath_verbs_register_sysfs(struct ib_device *dev)
+{
+ int i;
+ int ret;
+
+ for (i = 0; i < ARRAY_SIZE(ipath_class_attributes); ++i)
+ if (device_create_file(&dev->dev,
+ ipath_class_attributes[i])) {
+ ret = 1;
+ goto bail;
+ }
+
+ ret = 0;
+
+bail:
+ return ret;
+}
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h
new file mode 100644
index 0000000..9d12ae8
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.h
@@ -0,0 +1,935 @@
+/*
+ * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef IPATH_VERBS_H
+#define IPATH_VERBS_H
+
+#include <linux/types.h>
+#include <linux/spinlock.h>
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/kref.h>
+#include <rdma/ib_pack.h>
+#include <rdma/ib_user_verbs.h>
+
+#include "ipath_kernel.h"
+
+#define IPATH_MAX_RDMA_ATOMIC 4
+
+#define QPN_MAX (1 << 24)
+#define QPNMAP_ENTRIES (QPN_MAX / PAGE_SIZE / BITS_PER_BYTE)
+
+/*
+ * Increment this value if any changes that break userspace ABI
+ * compatibility are made.
+ */
+#define IPATH_UVERBS_ABI_VERSION 2
+
+/*
+ * Define an ib_cq_notify value that is not valid so we know when CQ
+ * notifications are armed.
+ */
+#define IB_CQ_NONE (IB_CQ_NEXT_COMP + 1)
+
+/* AETH NAK opcode values */
+#define IB_RNR_NAK 0x20
+#define IB_NAK_PSN_ERROR 0x60
+#define IB_NAK_INVALID_REQUEST 0x61
+#define IB_NAK_REMOTE_ACCESS_ERROR 0x62
+#define IB_NAK_REMOTE_OPERATIONAL_ERROR 0x63
+#define IB_NAK_INVALID_RD_REQUEST 0x64
+
+/* Flags for checking QP state (see ib_ipath_state_ops[]) */
+#define IPATH_POST_SEND_OK 0x01
+#define IPATH_POST_RECV_OK 0x02
+#define IPATH_PROCESS_RECV_OK 0x04
+#define IPATH_PROCESS_SEND_OK 0x08
+#define IPATH_PROCESS_NEXT_SEND_OK 0x10
+#define IPATH_FLUSH_SEND 0x20
+#define IPATH_FLUSH_RECV 0x40
+#define IPATH_PROCESS_OR_FLUSH_SEND \
+ (IPATH_PROCESS_SEND_OK | IPATH_FLUSH_SEND)
+
+/* IB Performance Manager status values */
+#define IB_PMA_SAMPLE_STATUS_DONE 0x00
+#define IB_PMA_SAMPLE_STATUS_STARTED 0x01
+#define IB_PMA_SAMPLE_STATUS_RUNNING 0x02
+
+/* Mandatory IB performance counter select values. */
+#define IB_PMA_PORT_XMIT_DATA __constant_htons(0x0001)
+#define IB_PMA_PORT_RCV_DATA __constant_htons(0x0002)
+#define IB_PMA_PORT_XMIT_PKTS __constant_htons(0x0003)
+#define IB_PMA_PORT_RCV_PKTS __constant_htons(0x0004)
+#define IB_PMA_PORT_XMIT_WAIT __constant_htons(0x0005)
+
+struct ib_reth {
+ __be64 vaddr;
+ __be32 rkey;
+ __be32 length;
+} __attribute__ ((packed));
+
+struct ib_atomic_eth {
+ __be32 vaddr[2]; /* unaligned so access as 2 32-bit words */
+ __be32 rkey;
+ __be64 swap_data;
+ __be64 compare_data;
+} __attribute__ ((packed));
+
+struct ipath_other_headers {
+ __be32 bth[3];
+ union {
+ struct {
+ __be32 deth[2];
+ __be32 imm_data;
+ } ud;
+ struct {
+ struct ib_reth reth;
+ __be32 imm_data;
+ } rc;
+ struct {
+ __be32 aeth;
+ __be32 atomic_ack_eth[2];
+ } at;
+ __be32 imm_data;
+ __be32 aeth;
+ struct ib_atomic_eth atomic_eth;
+ } u;
+} __attribute__ ((packed));
+
+/*
+ * Note that UD packets with a GRH header are 8+40+12+8 = 68 bytes
+ * long (72 w/ imm_data). Only the first 56 bytes of the IB header
+ * will be in the eager header buffer. The remaining 12 or 16 bytes
+ * are in the data buffer.
+ */
+struct ipath_ib_header {
+ __be16 lrh[4];
+ union {
+ struct {
+ struct ib_grh grh;
+ struct ipath_other_headers oth;
+ } l;
+ struct ipath_other_headers oth;
+ } u;
+} __attribute__ ((packed));
+
+struct ipath_pio_header {
+ __le32 pbc[2];
+ struct ipath_ib_header hdr;
+} __attribute__ ((packed));
+
+/*
+ * There is one struct ipath_mcast for each multicast GID.
+ * All attached QPs are then stored as a list of
+ * struct ipath_mcast_qp.
+ */
+struct ipath_mcast_qp {
+ struct list_head list;
+ struct ipath_qp *qp;
+};
+
+struct ipath_mcast {
+ struct rb_node rb_node;
+ union ib_gid mgid;
+ struct list_head qp_list;
+ wait_queue_head_t wait;
+ atomic_t refcount;
+ int n_attached;
+};
+
+/* Protection domain */
+struct ipath_pd {
+ struct ib_pd ibpd;
+ int user; /* non-zero if created from user space */
+};
+
+/* Address Handle */
+struct ipath_ah {
+ struct ib_ah ibah;
+ struct ib_ah_attr attr;
+};
+
+/*
+ * This structure is used by ipath_mmap() to validate an offset
+ * when an mmap() request is made. The vm_area_struct then uses
+ * this as its vm_private_data.
+ */
+struct ipath_mmap_info {
+ struct list_head pending_mmaps;
+ struct ib_ucontext *context;
+ void *obj;
+ __u64 offset;
+ struct kref ref;
+ unsigned size;
+};
+
+/*
+ * This structure is used to contain the head pointer, tail pointer,
+ * and completion queue entries as a single memory allocation so
+ * it can be mmap'ed into user space.
+ */
+struct ipath_cq_wc {
+ u32 head; /* index of next entry to fill */
+ u32 tail; /* index of next ib_poll_cq() entry */
+ union {
+ /* these are actually size ibcq.cqe + 1 */
+ struct ib_uverbs_wc uqueue[0];
+ struct ib_wc kqueue[0];
+ };
+};
+
+/*
+ * The completion queue structure.
+ */
+struct ipath_cq {
+ struct ib_cq ibcq;
+ struct tasklet_struct comptask;
+ spinlock_t lock;
+ u8 notify;
+ u8 triggered;
+ struct ipath_cq_wc *queue;
+ struct ipath_mmap_info *ip;
+};
+
+/*
+ * A segment is a linear region of low physical memory.
+ * XXX Maybe we should use phys addr here and kmap()/kunmap().
+ * Used by the verbs layer.
+ */
+struct ipath_seg {
+ void *vaddr;
+ size_t length;
+};
+
+/* The number of ipath_segs that fit in a page. */
+#define IPATH_SEGSZ (PAGE_SIZE / sizeof (struct ipath_seg))
+
+struct ipath_segarray {
+ struct ipath_seg segs[IPATH_SEGSZ];
+};
+
+struct ipath_mregion {
+ struct ib_pd *pd; /* shares refcnt of ibmr.pd */
+ u64 user_base; /* User's address for this region */
+ u64 iova; /* IB start address of this region */
+ size_t length;
+ u32 lkey;
+ u32 offset; /* offset (bytes) to start of region */
+ int access_flags;
+ u32 max_segs; /* number of ipath_segs in all the arrays */
+ u32 mapsz; /* size of the map array */
+ struct ipath_segarray *map[0]; /* the segments */
+};
+
+/*
+ * These keep track of the copy progress within a memory region.
+ * Used by the verbs layer.
+ */
+struct ipath_sge {
+ struct ipath_mregion *mr;
+ void *vaddr; /* kernel virtual address of segment */
+ u32 sge_length; /* length of the SGE */
+ u32 length; /* remaining length of the segment */
+ u16 m; /* current index: mr->map[m] */
+ u16 n; /* current index: mr->map[m]->segs[n] */
+};
+
+/* Memory region */
+struct ipath_mr {
+ struct ib_mr ibmr;
+ struct ib_umem *umem;
+ struct ipath_mregion mr; /* must be last */
+};
+
+/*
+ * Send work request queue entry.
+ * The size of the sg_list is determined when the QP is created and stored
+ * in qp->s_max_sge.
+ */
+struct ipath_swqe {
+ struct ib_send_wr wr; /* don't use wr.sg_list */
+ u32 psn; /* first packet sequence number */
+ u32 lpsn; /* last packet sequence number */
+ u32 ssn; /* send sequence number */
+ u32 length; /* total length of data in sg_list */
+ struct ipath_sge sg_list[0];
+};
+
+/*
+ * Receive work request queue entry.
+ * The size of the sg_list is determined when the QP (or SRQ) is created
+ * and stored in qp->r_rq.max_sge (or srq->rq.max_sge).
+ */
+struct ipath_rwqe {
+ u64 wr_id;
+ u8 num_sge;
+ struct ib_sge sg_list[0];
+};
+
+/*
+ * This structure is used to contain the head pointer, tail pointer,
+ * and receive work queue entries as a single memory allocation so
+ * it can be mmap'ed into user space.
+ * Note that the wq array elements are variable size so you can't
+ * just index into the array to get the N'th element;
+ * use get_rwqe_ptr() instead.
+ */
+struct ipath_rwq {
+ u32 head; /* new work requests posted to the head */
+ u32 tail; /* receives pull requests from here. */
+ struct ipath_rwqe wq[0];
+};
+
+struct ipath_rq {
+ struct ipath_rwq *wq;
+ spinlock_t lock;
+ u32 size; /* size of RWQE array */
+ u8 max_sge;
+};
+
+struct ipath_srq {
+ struct ib_srq ibsrq;
+ struct ipath_rq rq;
+ struct ipath_mmap_info *ip;
+ /* send signal when number of RWQEs < limit */
+ u32 limit;
+};
+
+struct ipath_sge_state {
+ struct ipath_sge *sg_list; /* next SGE to be used if any */
+ struct ipath_sge sge; /* progress state for the current SGE */
+ u8 num_sge;
+ u8 static_rate;
+};
+
+/*
+ * This structure holds the information that the send tasklet needs
+ * to send a RDMA read response or atomic operation.
+ */
+struct ipath_ack_entry {
+ u8 opcode;
+ u8 sent;
+ u32 psn;
+ union {
+ struct ipath_sge_state rdma_sge;
+ u64 atomic_data;
+ };
+};
+
+/*
+ * Variables prefixed with s_ are for the requester (sender).
+ * Variables prefixed with r_ are for the responder (receiver).
+ * Variables prefixed with ack_ are for responder replies.
+ *
+ * Common variables are protected by both r_rq.lock and s_lock in that order
+ * which only happens in modify_qp() or changing the QP 'state'.
+ */
+struct ipath_qp {
+ struct ib_qp ibqp;
+ struct ipath_qp *next; /* link list for QPN hash table */
+ struct ipath_qp *timer_next; /* link list for ipath_ib_timer() */
+ struct ipath_qp *pio_next; /* link for ipath_ib_piobufavail() */
+ struct list_head piowait; /* link for wait PIO buf */
+ struct list_head timerwait; /* link for waiting for timeouts */
+ struct ib_ah_attr remote_ah_attr;
+ struct ipath_ib_header s_hdr; /* next packet header to send */
+ atomic_t refcount;
+ wait_queue_head_t wait;
+ wait_queue_head_t wait_dma;
+ struct tasklet_struct s_task;
+ struct ipath_mmap_info *ip;
+ struct ipath_sge_state *s_cur_sge;
+ struct ipath_verbs_txreq *s_tx;
+ struct ipath_sge_state s_sge; /* current send request data */
+ struct ipath_ack_entry s_ack_queue[IPATH_MAX_RDMA_ATOMIC + 1];
+ struct ipath_sge_state s_ack_rdma_sge;
+ struct ipath_sge_state s_rdma_read_sge;
+ struct ipath_sge_state r_sge; /* current receive data */
+ spinlock_t s_lock;
+ atomic_t s_dma_busy;
+ u16 s_pkt_delay;
+ u16 s_hdrwords; /* size of s_hdr in 32 bit words */
+ u32 s_cur_size; /* size of send packet in bytes */
+ u32 s_len; /* total length of s_sge */
+ u32 s_rdma_read_len; /* total length of s_rdma_read_sge */
+ u32 s_next_psn; /* PSN for next request */
+ u32 s_last_psn; /* last response PSN processed */
+ u32 s_psn; /* current packet sequence number */
+ u32 s_ack_rdma_psn; /* PSN for sending RDMA read responses */
+ u32 s_ack_psn; /* PSN for acking sends and RDMA writes */
+ u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */
+ u32 r_ack_psn; /* PSN for next ACK or atomic ACK */
+ u64 r_wr_id; /* ID for current receive WQE */
+ unsigned long r_aflags;
+ u32 r_len; /* total length of r_sge */
+ u32 r_rcv_len; /* receive data len processed */
+ u32 r_psn; /* expected rcv packet sequence number */
+ u32 r_msn; /* message sequence number */
+ u8 state; /* QP state */
+ u8 s_state; /* opcode of last packet sent */
+ u8 s_ack_state; /* opcode of packet to ACK */
+ u8 s_nak_state; /* non-zero if NAK is pending */
+ u8 r_state; /* opcode of last packet received */
+ u8 r_nak_state; /* non-zero if NAK is pending */
+ u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */
+ u8 r_flags;
+ u8 r_max_rd_atomic; /* max number of RDMA read/atomic to receive */
+ u8 r_head_ack_queue; /* index into s_ack_queue[] */
+ u8 qp_access_flags;
+ u8 s_max_sge; /* size of s_wq->sg_list */
+ u8 s_retry_cnt; /* number of times to retry */
+ u8 s_rnr_retry_cnt;
+ u8 s_retry; /* requester retry counter */
+ u8 s_rnr_retry; /* requester RNR retry counter */
+ u8 s_pkey_index; /* PKEY index to use */
+ u8 s_max_rd_atomic; /* max number of RDMA read/atomic to send */
+ u8 s_num_rd_atomic; /* number of RDMA read/atomic pending */
+ u8 s_tail_ack_queue; /* index into s_ack_queue[] */
+ u8 s_flags;
+ u8 s_dmult;
+ u8 s_draining;
+ u8 timeout; /* Timeout for this QP */
+ enum ib_mtu path_mtu;
+ u32 remote_qpn;
+ u32 qkey; /* QKEY for this QP (for UD or RD) */
+ u32 s_size; /* send work queue size */
+ u32 s_head; /* new entries added here */
+ u32 s_tail; /* next entry to process */
+ u32 s_cur; /* current work queue entry */
+ u32 s_last; /* last un-ACK'ed entry */
+ u32 s_ssn; /* SSN of tail entry */
+ u32 s_lsn; /* limit sequence number (credit) */
+ struct ipath_swqe *s_wq; /* send work queue */
+ struct ipath_swqe *s_wqe;
+ struct ipath_rq r_rq; /* receive work queue */
+ struct ipath_sge r_sg_list[0]; /* verified SGEs */
+};
+
+/*
+ * Atomic bit definitions for r_aflags.
+ */
+#define IPATH_R_WRID_VALID 0
+
+/*
+ * Bit definitions for r_flags.
+ */
+#define IPATH_R_REUSE_SGE 0x01
+#define IPATH_R_RDMAR_SEQ 0x02
+
+/*
+ * Bit definitions for s_flags.
+ *
+ * IPATH_S_FENCE_PENDING - waiting for all prior RDMA read or atomic SWQEs
+ * before processing the next SWQE
+ * IPATH_S_RDMAR_PENDING - waiting for any RDMA read or atomic SWQEs
+ * before processing the next SWQE
+ * IPATH_S_WAITING - waiting for RNR timeout or send buffer available.
+ * IPATH_S_WAIT_SSN_CREDIT - waiting for RC credits to process next SWQE
+ * IPATH_S_WAIT_DMA - waiting for send DMA queue to drain before generating
+ * next send completion entry not via send DMA.
+ */
+#define IPATH_S_SIGNAL_REQ_WR 0x01
+#define IPATH_S_FENCE_PENDING 0x02
+#define IPATH_S_RDMAR_PENDING 0x04
+#define IPATH_S_ACK_PENDING 0x08
+#define IPATH_S_BUSY 0x10
+#define IPATH_S_WAITING 0x20
+#define IPATH_S_WAIT_SSN_CREDIT 0x40
+#define IPATH_S_WAIT_DMA 0x80
+
+#define IPATH_S_ANY_WAIT (IPATH_S_FENCE_PENDING | IPATH_S_RDMAR_PENDING | \
+ IPATH_S_WAITING | IPATH_S_WAIT_SSN_CREDIT | IPATH_S_WAIT_DMA)
+
+#define IPATH_PSN_CREDIT 512
+
+/*
+ * Since struct ipath_swqe is not a fixed size, we can't simply index into
+ * struct ipath_qp.s_wq. This function does the array index computation.
+ */
+static inline struct ipath_swqe *get_swqe_ptr(struct ipath_qp *qp,
+ unsigned n)
+{
+ return (struct ipath_swqe *)((char *)qp->s_wq +
+ (sizeof(struct ipath_swqe) +
+ qp->s_max_sge *
+ sizeof(struct ipath_sge)) * n);
+}
+
+/*
+ * Since struct ipath_rwqe is not a fixed size, we can't simply index into
+ * struct ipath_rwq.wq. This function does the array index computation.
+ */
+static inline struct ipath_rwqe *get_rwqe_ptr(struct ipath_rq *rq,
+ unsigned n)
+{
+ return (struct ipath_rwqe *)
+ ((char *) rq->wq->wq +
+ (sizeof(struct ipath_rwqe) +
+ rq->max_sge * sizeof(struct ib_sge)) * n);
+}
+
+/*
+ * QPN-map pages start out as NULL, they get allocated upon
+ * first use and are never deallocated. This way,
+ * large bitmaps are not allocated unless large numbers of QPs are used.
+ */
+struct qpn_map {
+ atomic_t n_free;
+ void *page;
+};
+
+struct ipath_qp_table {
+ spinlock_t lock;
+ u32 last; /* last QP number allocated */
+ u32 max; /* size of the hash table */
+ u32 nmaps; /* size of the map table */
+ struct ipath_qp **table;
+ /* bit map of free numbers */
+ struct qpn_map map[QPNMAP_ENTRIES];
+};
+
+struct ipath_lkey_table {
+ spinlock_t lock;
+ u32 next; /* next unused index (speeds search) */
+ u32 gen; /* generation count */
+ u32 max; /* size of the table */
+ struct ipath_mregion **table;
+};
+
+struct ipath_opcode_stats {
+ u64 n_packets; /* number of packets */
+ u64 n_bytes; /* total number of bytes */
+};
+
+struct ipath_ibdev {
+ struct ib_device ibdev;
+ struct ipath_devdata *dd;
+ struct list_head pending_mmaps;
+ spinlock_t mmap_offset_lock;
+ u32 mmap_offset;
+ int ib_unit; /* This is the device number */
+ u16 sm_lid; /* in host order */
+ u8 sm_sl;
+ u8 mkeyprot;
+ /* non-zero when timer is set */
+ unsigned long mkey_lease_timeout;
+
+ /* The following fields are really per port. */
+ struct ipath_qp_table qp_table;
+ struct ipath_lkey_table lk_table;
+ struct list_head pending[3]; /* FIFO of QPs waiting for ACKs */
+ struct list_head piowait; /* list for wait PIO buf */
+ struct list_head txreq_free;
+ void *txreq_bufs;
+ /* list of QPs waiting for RNR timer */
+ struct list_head rnrwait;
+ spinlock_t pending_lock;
+ __be64 sys_image_guid; /* in network order */
+ __be64 gid_prefix; /* in network order */
+ __be64 mkey;
+
+ u32 n_pds_allocated; /* number of PDs allocated for device */
+ spinlock_t n_pds_lock;
+ u32 n_ahs_allocated; /* number of AHs allocated for device */
+ spinlock_t n_ahs_lock;
+ u32 n_cqs_allocated; /* number of CQs allocated for device */
+ spinlock_t n_cqs_lock;
+ u32 n_qps_allocated; /* number of QPs allocated for device */
+ spinlock_t n_qps_lock;
+ u32 n_srqs_allocated; /* number of SRQs allocated for device */
+ spinlock_t n_srqs_lock;
+ u32 n_mcast_grps_allocated; /* number of mcast groups allocated */
+ spinlock_t n_mcast_grps_lock;
+
+ u64 ipath_sword; /* total dwords sent (sample result) */
+ u64 ipath_rword; /* total dwords received (sample result) */
+ u64 ipath_spkts; /* total packets sent (sample result) */
+ u64 ipath_rpkts; /* total packets received (sample result) */
+ /* # of ticks no data sent (sample result) */
+ u64 ipath_xmit_wait;
+ u64 rcv_errors; /* # of packets with SW detected rcv errs */
+ u64 n_unicast_xmit; /* total unicast packets sent */
+ u64 n_unicast_rcv; /* total unicast packets received */
+ u64 n_multicast_xmit; /* total multicast packets sent */
+ u64 n_multicast_rcv; /* total multicast packets received */
+ u64 z_symbol_error_counter; /* starting count for PMA */
+ u64 z_link_error_recovery_counter; /* starting count for PMA */
+ u64 z_link_downed_counter; /* starting count for PMA */
+ u64 z_port_rcv_errors; /* starting count for PMA */
+ u64 z_port_rcv_remphys_errors; /* starting count for PMA */
+ u64 z_port_xmit_discards; /* starting count for PMA */
+ u64 z_port_xmit_data; /* starting count for PMA */
+ u64 z_port_rcv_data; /* starting count for PMA */
+ u64 z_port_xmit_packets; /* starting count for PMA */
+ u64 z_port_rcv_packets; /* starting count for PMA */
+ u32 z_pkey_violations; /* starting count for PMA */
+ u32 z_local_link_integrity_errors; /* starting count for PMA */
+ u32 z_excessive_buffer_overrun_errors; /* starting count for PMA */
+ u32 z_vl15_dropped; /* starting count for PMA */
+ u32 n_rc_resends;
+ u32 n_rc_acks;
+ u32 n_rc_qacks;
+ u32 n_seq_naks;
+ u32 n_rdma_seq;
+ u32 n_rnr_naks;
+ u32 n_other_naks;
+ u32 n_timeouts;
+ u32 n_pkt_drops;
+ u32 n_vl15_dropped;
+ u32 n_wqe_errs;
+ u32 n_rdma_dup_busy;
+ u32 n_piowait;
+ u32 n_unaligned;
+ u32 port_cap_flags;
+ u32 pma_sample_start;
+ u32 pma_sample_interval;
+ __be16 pma_counter_select[5];
+ u16 pma_tag;
+ u16 qkey_violations;
+ u16 mkey_violations;
+ u16 mkey_lease_period;
+ u16 pending_index; /* which pending queue is active */
+ u8 pma_sample_status;
+ u8 subnet_timeout;
+ u8 vl_high_limit;
+ struct ipath_opcode_stats opstats[128];
+};
+
+struct ipath_verbs_counters {
+ u64 symbol_error_counter;
+ u64 link_error_recovery_counter;
+ u64 link_downed_counter;
+ u64 port_rcv_errors;
+ u64 port_rcv_remphys_errors;
+ u64 port_xmit_discards;
+ u64 port_xmit_data;
+ u64 port_rcv_data;
+ u64 port_xmit_packets;
+ u64 port_rcv_packets;
+ u32 local_link_integrity_errors;
+ u32 excessive_buffer_overrun_errors;
+ u32 vl15_dropped;
+};
+
+struct ipath_verbs_txreq {
+ struct ipath_qp *qp;
+ struct ipath_swqe *wqe;
+ u32 map_len;
+ u32 len;
+ struct ipath_sge_state *ss;
+ struct ipath_pio_header hdr;
+ struct ipath_sdma_txreq txreq;
+};
+
+static inline struct ipath_mr *to_imr(struct ib_mr *ibmr)
+{
+ return container_of(ibmr, struct ipath_mr, ibmr);
+}
+
+static inline struct ipath_pd *to_ipd(struct ib_pd *ibpd)
+{
+ return container_of(ibpd, struct ipath_pd, ibpd);
+}
+
+static inline struct ipath_ah *to_iah(struct ib_ah *ibah)
+{
+ return container_of(ibah, struct ipath_ah, ibah);
+}
+
+static inline struct ipath_cq *to_icq(struct ib_cq *ibcq)
+{
+ return container_of(ibcq, struct ipath_cq, ibcq);
+}
+
+static inline struct ipath_srq *to_isrq(struct ib_srq *ibsrq)
+{
+ return container_of(ibsrq, struct ipath_srq, ibsrq);
+}
+
+static inline struct ipath_qp *to_iqp(struct ib_qp *ibqp)
+{
+ return container_of(ibqp, struct ipath_qp, ibqp);
+}
+
+static inline struct ipath_ibdev *to_idev(struct ib_device *ibdev)
+{
+ return container_of(ibdev, struct ipath_ibdev, ibdev);
+}
+
+/*
+ * This must be called with s_lock held.
+ */
+static inline void ipath_schedule_send(struct ipath_qp *qp)
+{
+ if (qp->s_flags & IPATH_S_ANY_WAIT)
+ qp->s_flags &= ~IPATH_S_ANY_WAIT;
+ if (!(qp->s_flags & IPATH_S_BUSY))
+ tasklet_hi_schedule(&qp->s_task);
+}
+
+int ipath_process_mad(struct ib_device *ibdev,
+ int mad_flags,
+ u8 port_num,
+ struct ib_wc *in_wc,
+ struct ib_grh *in_grh,
+ struct ib_mad *in_mad, struct ib_mad *out_mad);
+
+/*
+ * Compare the lower 24 bits of the two values.
+ * Returns an integer <, ==, or > than zero.
+ */
+static inline int ipath_cmp24(u32 a, u32 b)
+{
+ return (((int) a) - ((int) b)) << 8;
+}
+
+struct ipath_mcast *ipath_mcast_find(union ib_gid *mgid);
+
+int ipath_snapshot_counters(struct ipath_devdata *dd, u64 *swords,
+ u64 *rwords, u64 *spkts, u64 *rpkts,
+ u64 *xmit_wait);
+
+int ipath_get_counters(struct ipath_devdata *dd,
+ struct ipath_verbs_counters *cntrs);
+
+int ipath_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
+
+int ipath_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
+
+int ipath_mcast_tree_empty(void);
+
+__be32 ipath_compute_aeth(struct ipath_qp *qp);
+
+struct ipath_qp *ipath_lookup_qpn(struct ipath_qp_table *qpt, u32 qpn);
+
+struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata);
+
+int ipath_destroy_qp(struct ib_qp *ibqp);
+
+int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err);
+
+int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata);
+
+int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_qp_init_attr *init_attr);
+
+unsigned ipath_free_all_qps(struct ipath_qp_table *qpt);
+
+int ipath_init_qp_table(struct ipath_ibdev *idev, int size);
+
+void ipath_get_credit(struct ipath_qp *qp, u32 aeth);
+
+unsigned ipath_ib_rate_to_mult(enum ib_rate rate);
+
+int ipath_verbs_send(struct ipath_qp *qp, struct ipath_ib_header *hdr,
+ u32 hdrwords, struct ipath_sge_state *ss, u32 len);
+
+void ipath_copy_sge(struct ipath_sge_state *ss, void *data, u32 length);
+
+void ipath_skip_sge(struct ipath_sge_state *ss, u32 length);
+
+void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
+ int has_grh, void *data, u32 tlen, struct ipath_qp *qp);
+
+void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
+ int has_grh, void *data, u32 tlen, struct ipath_qp *qp);
+
+void ipath_restart_rc(struct ipath_qp *qp, u32 psn);
+
+void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err);
+
+int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr);
+
+void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
+ int has_grh, void *data, u32 tlen, struct ipath_qp *qp);
+
+int ipath_alloc_lkey(struct ipath_lkey_table *rkt,
+ struct ipath_mregion *mr);
+
+void ipath_free_lkey(struct ipath_lkey_table *rkt, u32 lkey);
+
+int ipath_lkey_ok(struct ipath_qp *qp, struct ipath_sge *isge,
+ struct ib_sge *sge, int acc);
+
+int ipath_rkey_ok(struct ipath_qp *qp, struct ipath_sge_state *ss,
+ u32 len, u64 vaddr, u32 rkey, int acc);
+
+int ipath_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
+ struct ib_recv_wr **bad_wr);
+
+struct ib_srq *ipath_create_srq(struct ib_pd *ibpd,
+ struct ib_srq_init_attr *srq_init_attr,
+ struct ib_udata *udata);
+
+int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
+ enum ib_srq_attr_mask attr_mask,
+ struct ib_udata *udata);
+
+int ipath_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr);
+
+int ipath_destroy_srq(struct ib_srq *ibsrq);
+
+void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int sig);
+
+int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry);
+
+struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries, int comp_vector,
+ struct ib_ucontext *context,
+ struct ib_udata *udata);
+
+int ipath_destroy_cq(struct ib_cq *ibcq);
+
+int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags);
+
+int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata);
+
+struct ib_mr *ipath_get_dma_mr(struct ib_pd *pd, int acc);
+
+struct ib_mr *ipath_reg_phys_mr(struct ib_pd *pd,
+ struct ib_phys_buf *buffer_list,
+ int num_phys_buf, int acc, u64 *iova_start);
+
+struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 virt_addr, int mr_access_flags,
+ struct ib_udata *udata);
+
+int ipath_dereg_mr(struct ib_mr *ibmr);
+
+struct ib_fmr *ipath_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
+ struct ib_fmr_attr *fmr_attr);
+
+int ipath_map_phys_fmr(struct ib_fmr *ibfmr, u64 * page_list,
+ int list_len, u64 iova);
+
+int ipath_unmap_fmr(struct list_head *fmr_list);
+
+int ipath_dealloc_fmr(struct ib_fmr *ibfmr);
+
+void ipath_release_mmap_info(struct kref *ref);
+
+struct ipath_mmap_info *ipath_create_mmap_info(struct ipath_ibdev *dev,
+ u32 size,
+ struct ib_ucontext *context,
+ void *obj);
+
+void ipath_update_mmap_info(struct ipath_ibdev *dev,
+ struct ipath_mmap_info *ip,
+ u32 size, void *obj);
+
+int ipath_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
+
+void ipath_insert_rnr_queue(struct ipath_qp *qp);
+
+int ipath_init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe,
+ u32 *lengthp, struct ipath_sge_state *ss);
+
+int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only);
+
+u32 ipath_make_grh(struct ipath_ibdev *dev, struct ib_grh *hdr,
+ struct ib_global_route *grh, u32 hwords, u32 nwords);
+
+void ipath_make_ruc_header(struct ipath_ibdev *dev, struct ipath_qp *qp,
+ struct ipath_other_headers *ohdr,
+ u32 bth0, u32 bth2);
+
+void ipath_do_send(unsigned long data);
+
+void ipath_send_complete(struct ipath_qp *qp, struct ipath_swqe *wqe,
+ enum ib_wc_status status);
+
+int ipath_make_rc_req(struct ipath_qp *qp);
+
+int ipath_make_uc_req(struct ipath_qp *qp);
+
+int ipath_make_ud_req(struct ipath_qp *qp);
+
+int ipath_register_ib_device(struct ipath_devdata *);
+
+void ipath_unregister_ib_device(struct ipath_ibdev *);
+
+void ipath_ib_rcv(struct ipath_ibdev *, void *, void *, u32);
+
+int ipath_ib_piobufavail(struct ipath_ibdev *);
+
+unsigned ipath_get_npkeys(struct ipath_devdata *);
+
+u32 ipath_get_cr_errpkey(struct ipath_devdata *);
+
+unsigned ipath_get_pkey(struct ipath_devdata *, unsigned);
+
+extern const enum ib_wc_opcode ib_ipath_wc_opcode[];
+
+/*
+ * Below converts HCA-specific LinkTrainingState to IB PhysPortState
+ * values.
+ */
+extern const u8 ipath_cvt_physportstate[];
+#define IB_PHYSPORTSTATE_SLEEP 1
+#define IB_PHYSPORTSTATE_POLL 2
+#define IB_PHYSPORTSTATE_DISABLED 3
+#define IB_PHYSPORTSTATE_CFG_TRAIN 4
+#define IB_PHYSPORTSTATE_LINKUP 5
+#define IB_PHYSPORTSTATE_LINK_ERR_RECOVER 6
+
+extern const int ib_ipath_state_ops[];
+
+extern unsigned int ib_ipath_lkey_table_size;
+
+extern unsigned int ib_ipath_max_cqes;
+
+extern unsigned int ib_ipath_max_cqs;
+
+extern unsigned int ib_ipath_max_qp_wrs;
+
+extern unsigned int ib_ipath_max_qps;
+
+extern unsigned int ib_ipath_max_sges;
+
+extern unsigned int ib_ipath_max_mcast_grps;
+
+extern unsigned int ib_ipath_max_mcast_qp_attached;
+
+extern unsigned int ib_ipath_max_srqs;
+
+extern unsigned int ib_ipath_max_srq_sges;
+
+extern unsigned int ib_ipath_max_srq_wrs;
+
+extern const u32 ib_ipath_rnr_table[];
+
+extern struct ib_dma_mapping_ops ipath_dma_mapping_ops;
+
+#endif /* IPATH_VERBS_H */
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c b/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c
new file mode 100644
index 0000000..d73e322
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c
@@ -0,0 +1,362 @@
+/*
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/rculist.h>
+
+#include "ipath_verbs.h"
+
+/*
+ * Global table of GID to attached QPs.
+ * The table is global to all ipath devices since a send from one QP/device
+ * needs to be locally routed to any locally attached QPs on the same
+ * or different device.
+ */
+static struct rb_root mcast_tree;
+static DEFINE_SPINLOCK(mcast_lock);
+
+/**
+ * ipath_mcast_qp_alloc - alloc a struct to link a QP to mcast GID struct
+ * @qp: the QP to link
+ */
+static struct ipath_mcast_qp *ipath_mcast_qp_alloc(struct ipath_qp *qp)
+{
+ struct ipath_mcast_qp *mqp;
+
+ mqp = kmalloc(sizeof *mqp, GFP_KERNEL);
+ if (!mqp)
+ goto bail;
+
+ mqp->qp = qp;
+ atomic_inc(&qp->refcount);
+
+bail:
+ return mqp;
+}
+
+static void ipath_mcast_qp_free(struct ipath_mcast_qp *mqp)
+{
+ struct ipath_qp *qp = mqp->qp;
+
+ /* Notify ipath_destroy_qp() if it is waiting. */
+ if (atomic_dec_and_test(&qp->refcount))
+ wake_up(&qp->wait);
+
+ kfree(mqp);
+}
+
+/**
+ * ipath_mcast_alloc - allocate the multicast GID structure
+ * @mgid: the multicast GID
+ *
+ * A list of QPs will be attached to this structure.
+ */
+static struct ipath_mcast *ipath_mcast_alloc(union ib_gid *mgid)
+{
+ struct ipath_mcast *mcast;
+
+ mcast = kmalloc(sizeof *mcast, GFP_KERNEL);
+ if (!mcast)
+ goto bail;
+
+ mcast->mgid = *mgid;
+ INIT_LIST_HEAD(&mcast->qp_list);
+ init_waitqueue_head(&mcast->wait);
+ atomic_set(&mcast->refcount, 0);
+ mcast->n_attached = 0;
+
+bail:
+ return mcast;
+}
+
+static void ipath_mcast_free(struct ipath_mcast *mcast)
+{
+ struct ipath_mcast_qp *p, *tmp;
+
+ list_for_each_entry_safe(p, tmp, &mcast->qp_list, list)
+ ipath_mcast_qp_free(p);
+
+ kfree(mcast);
+}
+
+/**
+ * ipath_mcast_find - search the global table for the given multicast GID
+ * @mgid: the multicast GID to search for
+ *
+ * Returns NULL if not found.
+ *
+ * The caller is responsible for decrementing the reference count if found.
+ */
+struct ipath_mcast *ipath_mcast_find(union ib_gid *mgid)
+{
+ struct rb_node *n;
+ unsigned long flags;
+ struct ipath_mcast *mcast;
+
+ spin_lock_irqsave(&mcast_lock, flags);
+ n = mcast_tree.rb_node;
+ while (n) {
+ int ret;
+
+ mcast = rb_entry(n, struct ipath_mcast, rb_node);
+
+ ret = memcmp(mgid->raw, mcast->mgid.raw,
+ sizeof(union ib_gid));
+ if (ret < 0)
+ n = n->rb_left;
+ else if (ret > 0)
+ n = n->rb_right;
+ else {
+ atomic_inc(&mcast->refcount);
+ spin_unlock_irqrestore(&mcast_lock, flags);
+ goto bail;
+ }
+ }
+ spin_unlock_irqrestore(&mcast_lock, flags);
+
+ mcast = NULL;
+
+bail:
+ return mcast;
+}
+
+/**
+ * ipath_mcast_add - insert mcast GID into table and attach QP struct
+ * @mcast: the mcast GID table
+ * @mqp: the QP to attach
+ *
+ * Return zero if both were added. Return EEXIST if the GID was already in
+ * the table but the QP was added. Return ESRCH if the QP was already
+ * attached and neither structure was added.
+ */
+static int ipath_mcast_add(struct ipath_ibdev *dev,
+ struct ipath_mcast *mcast,
+ struct ipath_mcast_qp *mqp)
+{
+ struct rb_node **n = &mcast_tree.rb_node;
+ struct rb_node *pn = NULL;
+ int ret;
+
+ spin_lock_irq(&mcast_lock);
+
+ while (*n) {
+ struct ipath_mcast *tmcast;
+ struct ipath_mcast_qp *p;
+
+ pn = *n;
+ tmcast = rb_entry(pn, struct ipath_mcast, rb_node);
+
+ ret = memcmp(mcast->mgid.raw, tmcast->mgid.raw,
+ sizeof(union ib_gid));
+ if (ret < 0) {
+ n = &pn->rb_left;
+ continue;
+ }
+ if (ret > 0) {
+ n = &pn->rb_right;
+ continue;
+ }
+
+ /* Search the QP list to see if this is already there. */
+ list_for_each_entry_rcu(p, &tmcast->qp_list, list) {
+ if (p->qp == mqp->qp) {
+ ret = ESRCH;
+ goto bail;
+ }
+ }
+ if (tmcast->n_attached == ib_ipath_max_mcast_qp_attached) {
+ ret = ENOMEM;
+ goto bail;
+ }
+
+ tmcast->n_attached++;
+
+ list_add_tail_rcu(&mqp->list, &tmcast->qp_list);
+ ret = EEXIST;
+ goto bail;
+ }
+
+ spin_lock(&dev->n_mcast_grps_lock);
+ if (dev->n_mcast_grps_allocated == ib_ipath_max_mcast_grps) {
+ spin_unlock(&dev->n_mcast_grps_lock);
+ ret = ENOMEM;
+ goto bail;
+ }
+
+ dev->n_mcast_grps_allocated++;
+ spin_unlock(&dev->n_mcast_grps_lock);
+
+ mcast->n_attached++;
+
+ list_add_tail_rcu(&mqp->list, &mcast->qp_list);
+
+ atomic_inc(&mcast->refcount);
+ rb_link_node(&mcast->rb_node, pn, n);
+ rb_insert_color(&mcast->rb_node, &mcast_tree);
+
+ ret = 0;
+
+bail:
+ spin_unlock_irq(&mcast_lock);
+
+ return ret;
+}
+
+int ipath_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
+{
+ struct ipath_qp *qp = to_iqp(ibqp);
+ struct ipath_ibdev *dev = to_idev(ibqp->device);
+ struct ipath_mcast *mcast;
+ struct ipath_mcast_qp *mqp;
+ int ret;
+
+ /*
+ * Allocate data structures since its better to do this outside of
+ * spin locks and it will most likely be needed.
+ */
+ mcast = ipath_mcast_alloc(gid);
+ if (mcast == NULL) {
+ ret = -ENOMEM;
+ goto bail;
+ }
+ mqp = ipath_mcast_qp_alloc(qp);
+ if (mqp == NULL) {
+ ipath_mcast_free(mcast);
+ ret = -ENOMEM;
+ goto bail;
+ }
+ switch (ipath_mcast_add(dev, mcast, mqp)) {
+ case ESRCH:
+ /* Neither was used: can't attach the same QP twice. */
+ ipath_mcast_qp_free(mqp);
+ ipath_mcast_free(mcast);
+ ret = -EINVAL;
+ goto bail;
+ case EEXIST: /* The mcast wasn't used */
+ ipath_mcast_free(mcast);
+ break;
+ case ENOMEM:
+ /* Exceeded the maximum number of mcast groups. */
+ ipath_mcast_qp_free(mqp);
+ ipath_mcast_free(mcast);
+ ret = -ENOMEM;
+ goto bail;
+ default:
+ break;
+ }
+
+ ret = 0;
+
+bail:
+ return ret;
+}
+
+int ipath_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
+{
+ struct ipath_qp *qp = to_iqp(ibqp);
+ struct ipath_ibdev *dev = to_idev(ibqp->device);
+ struct ipath_mcast *mcast = NULL;
+ struct ipath_mcast_qp *p, *tmp;
+ struct rb_node *n;
+ int last = 0;
+ int ret;
+
+ spin_lock_irq(&mcast_lock);
+
+ /* Find the GID in the mcast table. */
+ n = mcast_tree.rb_node;
+ while (1) {
+ if (n == NULL) {
+ spin_unlock_irq(&mcast_lock);
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ mcast = rb_entry(n, struct ipath_mcast, rb_node);
+ ret = memcmp(gid->raw, mcast->mgid.raw,
+ sizeof(union ib_gid));
+ if (ret < 0)
+ n = n->rb_left;
+ else if (ret > 0)
+ n = n->rb_right;
+ else
+ break;
+ }
+
+ /* Search the QP list. */
+ list_for_each_entry_safe(p, tmp, &mcast->qp_list, list) {
+ if (p->qp != qp)
+ continue;
+ /*
+ * We found it, so remove it, but don't poison the forward
+ * link until we are sure there are no list walkers.
+ */
+ list_del_rcu(&p->list);
+ mcast->n_attached--;
+
+ /* If this was the last attached QP, remove the GID too. */
+ if (list_empty(&mcast->qp_list)) {
+ rb_erase(&mcast->rb_node, &mcast_tree);
+ last = 1;
+ }
+ break;
+ }
+
+ spin_unlock_irq(&mcast_lock);
+
+ if (p) {
+ /*
+ * Wait for any list walkers to finish before freeing the
+ * list element.
+ */
+ wait_event(mcast->wait, atomic_read(&mcast->refcount) <= 1);
+ ipath_mcast_qp_free(p);
+ }
+ if (last) {
+ atomic_dec(&mcast->refcount);
+ wait_event(mcast->wait, !atomic_read(&mcast->refcount));
+ ipath_mcast_free(mcast);
+ spin_lock_irq(&dev->n_mcast_grps_lock);
+ dev->n_mcast_grps_allocated--;
+ spin_unlock_irq(&dev->n_mcast_grps_lock);
+ }
+
+ ret = 0;
+
+bail:
+ return ret;
+}
+
+int ipath_mcast_tree_empty(void)
+{
+ return mcast_tree.rb_node == NULL;
+}
diff --git a/drivers/infiniband/hw/ipath/ipath_wc_ppc64.c b/drivers/infiniband/hw/ipath/ipath_wc_ppc64.c
new file mode 100644
index 0000000..1d7bd82
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_wc_ppc64.c
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/*
+ * This file is conditionally built on PowerPC only. Otherwise weak symbol
+ * versions of the functions exported from here are used.
+ */
+
+#include "ipath_kernel.h"
+
+/**
+ * ipath_enable_wc - enable write combining for MMIO writes to the device
+ * @dd: infinipath device
+ *
+ * Nothing to do on PowerPC, so just return without error.
+ */
+int ipath_enable_wc(struct ipath_devdata *dd)
+{
+ return 0;
+}
+
+/**
+ * ipath_unordered_wc - indicate whether write combining is unordered
+ *
+ * Because our performance depends on our ability to do write
+ * combining mmio writes in the most efficient way, we need to
+ * know if we are on a processor that may reorder stores when
+ * write combining.
+ */
+int ipath_unordered_wc(void)
+{
+ return 1;
+}
diff --git a/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c b/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c
new file mode 100644
index 0000000..3428acb
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c
@@ -0,0 +1,184 @@
+/*
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/*
+ * This file is conditionally built on x86_64 only. Otherwise weak symbol
+ * versions of the functions exported from here are used.
+ */
+
+#include <linux/pci.h>
+#include <asm/mtrr.h>
+#include <asm/processor.h>
+
+#include "ipath_kernel.h"
+
+/**
+ * ipath_enable_wc - enable write combining for MMIO writes to the device
+ * @dd: infinipath device
+ *
+ * This routine is x86_64-specific; it twiddles the CPU's MTRRs to enable
+ * write combining.
+ */
+int ipath_enable_wc(struct ipath_devdata *dd)
+{
+ int ret = 0;
+ u64 pioaddr, piolen;
+ unsigned bits;
+ const unsigned long addr = pci_resource_start(dd->pcidev, 0);
+ const size_t len = pci_resource_len(dd->pcidev, 0);
+
+ /*
+ * Set the PIO buffers to be WCCOMB, so we get HT bursts to the
+ * chip. Linux (possibly the hardware) requires it to be on a power
+ * of 2 address matching the length (which has to be a power of 2).
+ * For rev1, that means the base address, for rev2, it will be just
+ * the PIO buffers themselves.
+ * For chips with two sets of buffers, the calculations are
+ * somewhat more complicated; we need to sum, and the piobufbase
+ * register has both offsets, 2K in low 32 bits, 4K in high 32 bits.
+ * The buffers are still packed, so a single range covers both.
+ */
+ if (dd->ipath_piobcnt2k && dd->ipath_piobcnt4k) { /* 2 sizes */
+ unsigned long pio2kbase, pio4kbase;
+ pio2kbase = dd->ipath_piobufbase & 0xffffffffUL;
+ pio4kbase = (dd->ipath_piobufbase >> 32) & 0xffffffffUL;
+ if (pio2kbase < pio4kbase) { /* all, for now */
+ pioaddr = addr + pio2kbase;
+ piolen = pio4kbase - pio2kbase +
+ dd->ipath_piobcnt4k * dd->ipath_4kalign;
+ } else {
+ pioaddr = addr + pio4kbase;
+ piolen = pio2kbase - pio4kbase +
+ dd->ipath_piobcnt2k * dd->ipath_palign;
+ }
+ } else { /* single buffer size (2K, currently) */
+ pioaddr = addr + dd->ipath_piobufbase;
+ piolen = dd->ipath_piobcnt2k * dd->ipath_palign +
+ dd->ipath_piobcnt4k * dd->ipath_4kalign;
+ }
+
+ for (bits = 0; !(piolen & (1ULL << bits)); bits++)
+ /* do nothing */ ;
+
+ if (piolen != (1ULL << bits)) {
+ piolen >>= bits;
+ while (piolen >>= 1)
+ bits++;
+ piolen = 1ULL << (bits + 1);
+ }
+ if (pioaddr & (piolen - 1)) {
+ u64 atmp;
+ ipath_dbg("pioaddr %llx not on right boundary for size "
+ "%llx, fixing\n",
+ (unsigned long long) pioaddr,
+ (unsigned long long) piolen);
+ atmp = pioaddr & ~(piolen - 1);
+ if (atmp < addr || (atmp + piolen) > (addr + len)) {
+ ipath_dev_err(dd, "No way to align address/size "
+ "(%llx/%llx), no WC mtrr\n",
+ (unsigned long long) atmp,
+ (unsigned long long) piolen << 1);
+ ret = -ENODEV;
+ } else {
+ ipath_dbg("changing WC base from %llx to %llx, "
+ "len from %llx to %llx\n",
+ (unsigned long long) pioaddr,
+ (unsigned long long) atmp,
+ (unsigned long long) piolen,
+ (unsigned long long) piolen << 1);
+ pioaddr = atmp;
+ piolen <<= 1;
+ }
+ }
+
+ if (!ret) {
+ int cookie;
+ ipath_cdbg(VERBOSE, "Setting mtrr for chip to WC "
+ "(addr %llx, len=0x%llx)\n",
+ (unsigned long long) pioaddr,
+ (unsigned long long) piolen);
+ cookie = mtrr_add(pioaddr, piolen, MTRR_TYPE_WRCOMB, 0);
+ if (cookie < 0) {
+ {
+ dev_info(&dd->pcidev->dev,
+ "mtrr_add() WC for PIO bufs "
+ "failed (%d)\n",
+ cookie);
+ ret = -EINVAL;
+ }
+ } else {
+ ipath_cdbg(VERBOSE, "Set mtrr for chip to WC, "
+ "cookie is %d\n", cookie);
+ dd->ipath_wc_cookie = cookie;
+ dd->ipath_wc_base = (unsigned long) pioaddr;
+ dd->ipath_wc_len = (unsigned long) piolen;
+ }
+ }
+
+ return ret;
+}
+
+/**
+ * ipath_disable_wc - disable write combining for MMIO writes to the device
+ * @dd: infinipath device
+ */
+void ipath_disable_wc(struct ipath_devdata *dd)
+{
+ if (dd->ipath_wc_cookie) {
+ int r;
+ ipath_cdbg(VERBOSE, "undoing WCCOMB on pio buffers\n");
+ r = mtrr_del(dd->ipath_wc_cookie, dd->ipath_wc_base,
+ dd->ipath_wc_len);
+ if (r < 0)
+ dev_info(&dd->pcidev->dev,
+ "mtrr_del(%lx, %lx, %lx) failed: %d\n",
+ dd->ipath_wc_cookie, dd->ipath_wc_base,
+ dd->ipath_wc_len, r);
+ dd->ipath_wc_cookie = 0; /* even on failure */
+ }
+}
+
+/**
+ * ipath_unordered_wc - indicate whether write combining is ordered
+ *
+ * Because our performance depends on our ability to do write combining mmio
+ * writes in the most efficient way, we need to know if we are on an Intel
+ * or AMD x86_64 processor. AMD x86_64 processors flush WC buffers out in
+ * the order completed, and so no special flushing is required to get
+ * correct ordering. Intel processors, however, will flush write buffers
+ * out in "random" orders, and so explicit ordering is needed at times.
+ */
+int ipath_unordered_wc(void)
+{
+ return boot_cpu_data.x86_vendor != X86_VENDOR_AMD;
+}
diff --git a/drivers/infiniband/hw/mlx4/Kconfig b/drivers/infiniband/hw/mlx4/Kconfig
new file mode 100644
index 0000000..4175a4b
--- /dev/null
+++ b/drivers/infiniband/hw/mlx4/Kconfig
@@ -0,0 +1,8 @@
+config MLX4_INFINIBAND
+ tristate "Mellanox ConnectX HCA support"
+ select MLX4_CORE
+ ---help---
+ This driver provides low-level InfiniBand support for
+ Mellanox ConnectX PCI Express host channel adapters (HCAs).
+ This is required to use InfiniBand protocols such as
+ IP-over-IB or SRP with these devices.
diff --git a/drivers/infiniband/hw/mlx4/Makefile b/drivers/infiniband/hw/mlx4/Makefile
new file mode 100644
index 0000000..70f09c7
--- /dev/null
+++ b/drivers/infiniband/hw/mlx4/Makefile
@@ -0,0 +1,3 @@
+obj-$(CONFIG_MLX4_INFINIBAND) += mlx4_ib.o
+
+mlx4_ib-y := ah.o cq.o doorbell.o mad.o main.o mr.o qp.o srq.o
diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c
new file mode 100644
index 0000000..c75ac94
--- /dev/null
+++ b/drivers/infiniband/hw/mlx4/ah.c
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "mlx4_ib.h"
+
+struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
+{
+ struct mlx4_dev *dev = to_mdev(pd->device)->dev;
+ struct mlx4_ib_ah *ah;
+
+ ah = kmalloc(sizeof *ah, GFP_ATOMIC);
+ if (!ah)
+ return ERR_PTR(-ENOMEM);
+
+ memset(&ah->av, 0, sizeof ah->av);
+
+ ah->av.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24));
+ ah->av.g_slid = ah_attr->src_path_bits;
+ ah->av.dlid = cpu_to_be16(ah_attr->dlid);
+ if (ah_attr->static_rate) {
+ ah->av.stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET;
+ while (ah->av.stat_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET &&
+ !(1 << ah->av.stat_rate & dev->caps.stat_rate_support))
+ --ah->av.stat_rate;
+ }
+ ah->av.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28);
+ if (ah_attr->ah_flags & IB_AH_GRH) {
+ ah->av.g_slid |= 0x80;
+ ah->av.gid_index = ah_attr->grh.sgid_index;
+ ah->av.hop_limit = ah_attr->grh.hop_limit;
+ ah->av.sl_tclass_flowlabel |=
+ cpu_to_be32((ah_attr->grh.traffic_class << 20) |
+ ah_attr->grh.flow_label);
+ memcpy(ah->av.dgid, ah_attr->grh.dgid.raw, 16);
+ }
+
+ return &ah->ibah;
+}
+
+int mlx4_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
+{
+ struct mlx4_ib_ah *ah = to_mah(ibah);
+
+ memset(ah_attr, 0, sizeof *ah_attr);
+ ah_attr->dlid = be16_to_cpu(ah->av.dlid);
+ ah_attr->sl = be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 28;
+ ah_attr->port_num = be32_to_cpu(ah->av.port_pd) >> 24;
+ if (ah->av.stat_rate)
+ ah_attr->static_rate = ah->av.stat_rate - MLX4_STAT_RATE_OFFSET;
+ ah_attr->src_path_bits = ah->av.g_slid & 0x7F;
+
+ if (mlx4_ib_ah_grh_present(ah)) {
+ ah_attr->ah_flags = IB_AH_GRH;
+
+ ah_attr->grh.traffic_class =
+ be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 20;
+ ah_attr->grh.flow_label =
+ be32_to_cpu(ah->av.sl_tclass_flowlabel) & 0xfffff;
+ ah_attr->grh.hop_limit = ah->av.hop_limit;
+ ah_attr->grh.sgid_index = ah->av.gid_index;
+ memcpy(ah_attr->grh.dgid.raw, ah->av.dgid, 16);
+ }
+
+ return 0;
+}
+
+int mlx4_ib_destroy_ah(struct ib_ah *ah)
+{
+ kfree(to_mah(ah));
+ return 0;
+}
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
new file mode 100644
index 0000000..1830849
--- /dev/null
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -0,0 +1,803 @@
+/*
+ * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
+ * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/mlx4/cq.h>
+#include <linux/mlx4/qp.h>
+
+#include "mlx4_ib.h"
+#include "user.h"
+
+static void mlx4_ib_cq_comp(struct mlx4_cq *cq)
+{
+ struct ib_cq *ibcq = &to_mibcq(cq)->ibcq;
+ ibcq->comp_handler(ibcq, ibcq->cq_context);
+}
+
+static void mlx4_ib_cq_event(struct mlx4_cq *cq, enum mlx4_event type)
+{
+ struct ib_event event;
+ struct ib_cq *ibcq;
+
+ if (type != MLX4_EVENT_TYPE_CQ_ERROR) {
+ printk(KERN_WARNING "mlx4_ib: Unexpected event type %d "
+ "on CQ %06x\n", type, cq->cqn);
+ return;
+ }
+
+ ibcq = &to_mibcq(cq)->ibcq;
+ if (ibcq->event_handler) {
+ event.device = ibcq->device;
+ event.event = IB_EVENT_CQ_ERR;
+ event.element.cq = ibcq;
+ ibcq->event_handler(&event, ibcq->cq_context);
+ }
+}
+
+static void *get_cqe_from_buf(struct mlx4_ib_cq_buf *buf, int n)
+{
+ return mlx4_buf_offset(&buf->buf, n * sizeof (struct mlx4_cqe));
+}
+
+static void *get_cqe(struct mlx4_ib_cq *cq, int n)
+{
+ return get_cqe_from_buf(&cq->buf, n);
+}
+
+static void *get_sw_cqe(struct mlx4_ib_cq *cq, int n)
+{
+ struct mlx4_cqe *cqe = get_cqe(cq, n & cq->ibcq.cqe);
+
+ return (!!(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK) ^
+ !!(n & (cq->ibcq.cqe + 1))) ? NULL : cqe;
+}
+
+static struct mlx4_cqe *next_cqe_sw(struct mlx4_ib_cq *cq)
+{
+ return get_sw_cqe(cq, cq->mcq.cons_index);
+}
+
+int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
+{
+ struct mlx4_ib_cq *mcq = to_mcq(cq);
+ struct mlx4_ib_dev *dev = to_mdev(cq->device);
+
+ return mlx4_cq_modify(dev->dev, &mcq->mcq, cq_count, cq_period);
+}
+
+static int mlx4_ib_alloc_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *buf, int nent)
+{
+ int err;
+
+ err = mlx4_buf_alloc(dev->dev, nent * sizeof(struct mlx4_cqe),
+ PAGE_SIZE * 2, &buf->buf);
+
+ if (err)
+ goto out;
+
+ err = mlx4_mtt_init(dev->dev, buf->buf.npages, buf->buf.page_shift,
+ &buf->mtt);
+ if (err)
+ goto err_buf;
+
+ err = mlx4_buf_write_mtt(dev->dev, &buf->mtt, &buf->buf);
+ if (err)
+ goto err_mtt;
+
+ return 0;
+
+err_mtt:
+ mlx4_mtt_cleanup(dev->dev, &buf->mtt);
+
+err_buf:
+ mlx4_buf_free(dev->dev, nent * sizeof(struct mlx4_cqe),
+ &buf->buf);
+
+out:
+ return err;
+}
+
+static void mlx4_ib_free_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *buf, int cqe)
+{
+ mlx4_buf_free(dev->dev, (cqe + 1) * sizeof(struct mlx4_cqe), &buf->buf);
+}
+
+static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_ucontext *context,
+ struct mlx4_ib_cq_buf *buf, struct ib_umem **umem,
+ u64 buf_addr, int cqe)
+{
+ int err;
+
+ *umem = ib_umem_get(context, buf_addr, cqe * sizeof (struct mlx4_cqe),
+ IB_ACCESS_LOCAL_WRITE, 1);
+ if (IS_ERR(*umem))
+ return PTR_ERR(*umem);
+
+ err = mlx4_mtt_init(dev->dev, ib_umem_page_count(*umem),
+ ilog2((*umem)->page_size), &buf->mtt);
+ if (err)
+ goto err_buf;
+
+ err = mlx4_ib_umem_write_mtt(dev, &buf->mtt, *umem);
+ if (err)
+ goto err_mtt;
+
+ return 0;
+
+err_mtt:
+ mlx4_mtt_cleanup(dev->dev, &buf->mtt);
+
+err_buf:
+ ib_umem_release(*umem);
+
+ return err;
+}
+
+struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector,
+ struct ib_ucontext *context,
+ struct ib_udata *udata)
+{
+ struct mlx4_ib_dev *dev = to_mdev(ibdev);
+ struct mlx4_ib_cq *cq;
+ struct mlx4_uar *uar;
+ int err;
+
+ if (entries < 1 || entries > dev->dev->caps.max_cqes)
+ return ERR_PTR(-EINVAL);
+
+ cq = kmalloc(sizeof *cq, GFP_KERNEL);
+ if (!cq)
+ return ERR_PTR(-ENOMEM);
+
+ entries = roundup_pow_of_two(entries + 1);
+ cq->ibcq.cqe = entries - 1;
+ mutex_init(&cq->resize_mutex);
+ spin_lock_init(&cq->lock);
+ cq->resize_buf = NULL;
+ cq->resize_umem = NULL;
+
+ if (context) {
+ struct mlx4_ib_create_cq ucmd;
+
+ if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {
+ err = -EFAULT;
+ goto err_cq;
+ }
+
+ err = mlx4_ib_get_cq_umem(dev, context, &cq->buf, &cq->umem,
+ ucmd.buf_addr, entries);
+ if (err)
+ goto err_cq;
+
+ err = mlx4_ib_db_map_user(to_mucontext(context), ucmd.db_addr,
+ &cq->db);
+ if (err)
+ goto err_mtt;
+
+ uar = &to_mucontext(context)->uar;
+ } else {
+ err = mlx4_db_alloc(dev->dev, &cq->db, 1);
+ if (err)
+ goto err_cq;
+
+ cq->mcq.set_ci_db = cq->db.db;
+ cq->mcq.arm_db = cq->db.db + 1;
+ *cq->mcq.set_ci_db = 0;
+ *cq->mcq.arm_db = 0;
+
+ err = mlx4_ib_alloc_cq_buf(dev, &cq->buf, entries);
+ if (err)
+ goto err_db;
+
+ uar = &dev->priv_uar;
+ }
+
+ err = mlx4_cq_alloc(dev->dev, entries, &cq->buf.mtt, uar,
+ cq->db.dma, &cq->mcq, 0);
+ if (err)
+ goto err_dbmap;
+
+ cq->mcq.comp = mlx4_ib_cq_comp;
+ cq->mcq.event = mlx4_ib_cq_event;
+
+ if (context)
+ if (ib_copy_to_udata(udata, &cq->mcq.cqn, sizeof (__u32))) {
+ err = -EFAULT;
+ goto err_dbmap;
+ }
+
+ return &cq->ibcq;
+
+err_dbmap:
+ if (context)
+ mlx4_ib_db_unmap_user(to_mucontext(context), &cq->db);
+
+err_mtt:
+ mlx4_mtt_cleanup(dev->dev, &cq->buf.mtt);
+
+ if (context)
+ ib_umem_release(cq->umem);
+ else
+ mlx4_ib_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe);
+
+err_db:
+ if (!context)
+ mlx4_db_free(dev->dev, &cq->db);
+
+err_cq:
+ kfree(cq);
+
+ return ERR_PTR(err);
+}
+
+static int mlx4_alloc_resize_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq *cq,
+ int entries)
+{
+ int err;
+
+ if (cq->resize_buf)
+ return -EBUSY;
+
+ cq->resize_buf = kmalloc(sizeof *cq->resize_buf, GFP_ATOMIC);
+ if (!cq->resize_buf)
+ return -ENOMEM;
+
+ err = mlx4_ib_alloc_cq_buf(dev, &cq->resize_buf->buf, entries);
+ if (err) {
+ kfree(cq->resize_buf);
+ cq->resize_buf = NULL;
+ return err;
+ }
+
+ cq->resize_buf->cqe = entries - 1;
+
+ return 0;
+}
+
+static int mlx4_alloc_resize_umem(struct mlx4_ib_dev *dev, struct mlx4_ib_cq *cq,
+ int entries, struct ib_udata *udata)
+{
+ struct mlx4_ib_resize_cq ucmd;
+ int err;
+
+ if (cq->resize_umem)
+ return -EBUSY;
+
+ if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd))
+ return -EFAULT;
+
+ cq->resize_buf = kmalloc(sizeof *cq->resize_buf, GFP_ATOMIC);
+ if (!cq->resize_buf)
+ return -ENOMEM;
+
+ err = mlx4_ib_get_cq_umem(dev, cq->umem->context, &cq->resize_buf->buf,
+ &cq->resize_umem, ucmd.buf_addr, entries);
+ if (err) {
+ kfree(cq->resize_buf);
+ cq->resize_buf = NULL;
+ return err;
+ }
+
+ cq->resize_buf->cqe = entries - 1;
+
+ return 0;
+}
+
+static int mlx4_ib_get_outstanding_cqes(struct mlx4_ib_cq *cq)
+{
+ u32 i;
+
+ i = cq->mcq.cons_index;
+ while (get_sw_cqe(cq, i & cq->ibcq.cqe))
+ ++i;
+
+ return i - cq->mcq.cons_index;
+}
+
+static void mlx4_ib_cq_resize_copy_cqes(struct mlx4_ib_cq *cq)
+{
+ struct mlx4_cqe *cqe;
+ int i;
+
+ i = cq->mcq.cons_index;
+ cqe = get_cqe(cq, i & cq->ibcq.cqe);
+ while ((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) != MLX4_CQE_OPCODE_RESIZE) {
+ memcpy(get_cqe_from_buf(&cq->resize_buf->buf,
+ (i + 1) & cq->resize_buf->cqe),
+ get_cqe(cq, i & cq->ibcq.cqe), sizeof(struct mlx4_cqe));
+ cqe = get_cqe(cq, ++i & cq->ibcq.cqe);
+ }
+ ++cq->mcq.cons_index;
+}
+
+int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
+{
+ struct mlx4_ib_dev *dev = to_mdev(ibcq->device);
+ struct mlx4_ib_cq *cq = to_mcq(ibcq);
+ struct mlx4_mtt mtt;
+ int outst_cqe;
+ int err;
+
+ mutex_lock(&cq->resize_mutex);
+
+ if (entries < 1 || entries > dev->dev->caps.max_cqes) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ entries = roundup_pow_of_two(entries + 1);
+ if (entries == ibcq->cqe + 1) {
+ err = 0;
+ goto out;
+ }
+
+ if (ibcq->uobject) {
+ err = mlx4_alloc_resize_umem(dev, cq, entries, udata);
+ if (err)
+ goto out;
+ } else {
+ /* Can't be smaller then the number of outstanding CQEs */
+ outst_cqe = mlx4_ib_get_outstanding_cqes(cq);
+ if (entries < outst_cqe + 1) {
+ err = 0;
+ goto out;
+ }
+
+ err = mlx4_alloc_resize_buf(dev, cq, entries);
+ if (err)
+ goto out;
+ }
+
+ mtt = cq->buf.mtt;
+
+ err = mlx4_cq_resize(dev->dev, &cq->mcq, entries, &cq->resize_buf->buf.mtt);
+ if (err)
+ goto err_buf;
+
+ mlx4_mtt_cleanup(dev->dev, &mtt);
+ if (ibcq->uobject) {
+ cq->buf = cq->resize_buf->buf;
+ cq->ibcq.cqe = cq->resize_buf->cqe;
+ ib_umem_release(cq->umem);
+ cq->umem = cq->resize_umem;
+
+ kfree(cq->resize_buf);
+ cq->resize_buf = NULL;
+ cq->resize_umem = NULL;
+ } else {
+ spin_lock_irq(&cq->lock);
+ if (cq->resize_buf) {
+ mlx4_ib_cq_resize_copy_cqes(cq);
+ mlx4_ib_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe);
+ cq->buf = cq->resize_buf->buf;
+ cq->ibcq.cqe = cq->resize_buf->cqe;
+
+ kfree(cq->resize_buf);
+ cq->resize_buf = NULL;
+ }
+ spin_unlock_irq(&cq->lock);
+ }
+
+ goto out;
+
+err_buf:
+ mlx4_mtt_cleanup(dev->dev, &cq->resize_buf->buf.mtt);
+ if (!ibcq->uobject)
+ mlx4_ib_free_cq_buf(dev, &cq->resize_buf->buf,
+ cq->resize_buf->cqe);
+
+ kfree(cq->resize_buf);
+ cq->resize_buf = NULL;
+
+ if (cq->resize_umem) {
+ ib_umem_release(cq->resize_umem);
+ cq->resize_umem = NULL;
+ }
+
+out:
+ mutex_unlock(&cq->resize_mutex);
+ return err;
+}
+
+int mlx4_ib_destroy_cq(struct ib_cq *cq)
+{
+ struct mlx4_ib_dev *dev = to_mdev(cq->device);
+ struct mlx4_ib_cq *mcq = to_mcq(cq);
+
+ mlx4_cq_free(dev->dev, &mcq->mcq);
+ mlx4_mtt_cleanup(dev->dev, &mcq->buf.mtt);
+
+ if (cq->uobject) {
+ mlx4_ib_db_unmap_user(to_mucontext(cq->uobject->context), &mcq->db);
+ ib_umem_release(mcq->umem);
+ } else {
+ mlx4_ib_free_cq_buf(dev, &mcq->buf, cq->cqe);
+ mlx4_db_free(dev->dev, &mcq->db);
+ }
+
+ kfree(mcq);
+
+ return 0;
+}
+
+static void dump_cqe(void *cqe)
+{
+ __be32 *buf = cqe;
+
+ printk(KERN_DEBUG "CQE contents %08x %08x %08x %08x %08x %08x %08x %08x\n",
+ be32_to_cpu(buf[0]), be32_to_cpu(buf[1]), be32_to_cpu(buf[2]),
+ be32_to_cpu(buf[3]), be32_to_cpu(buf[4]), be32_to_cpu(buf[5]),
+ be32_to_cpu(buf[6]), be32_to_cpu(buf[7]));
+}
+
+static void mlx4_ib_handle_error_cqe(struct mlx4_err_cqe *cqe,
+ struct ib_wc *wc)
+{
+ if (cqe->syndrome == MLX4_CQE_SYNDROME_LOCAL_QP_OP_ERR) {
+ printk(KERN_DEBUG "local QP operation err "
+ "(QPN %06x, WQE index %x, vendor syndrome %02x, "
+ "opcode = %02x)\n",
+ be32_to_cpu(cqe->my_qpn), be16_to_cpu(cqe->wqe_index),
+ cqe->vendor_err_syndrome,
+ cqe->owner_sr_opcode & ~MLX4_CQE_OWNER_MASK);
+ dump_cqe(cqe);
+ }
+
+ switch (cqe->syndrome) {
+ case MLX4_CQE_SYNDROME_LOCAL_LENGTH_ERR:
+ wc->status = IB_WC_LOC_LEN_ERR;
+ break;
+ case MLX4_CQE_SYNDROME_LOCAL_QP_OP_ERR:
+ wc->status = IB_WC_LOC_QP_OP_ERR;
+ break;
+ case MLX4_CQE_SYNDROME_LOCAL_PROT_ERR:
+ wc->status = IB_WC_LOC_PROT_ERR;
+ break;
+ case MLX4_CQE_SYNDROME_WR_FLUSH_ERR:
+ wc->status = IB_WC_WR_FLUSH_ERR;
+ break;
+ case MLX4_CQE_SYNDROME_MW_BIND_ERR:
+ wc->status = IB_WC_MW_BIND_ERR;
+ break;
+ case MLX4_CQE_SYNDROME_BAD_RESP_ERR:
+ wc->status = IB_WC_BAD_RESP_ERR;
+ break;
+ case MLX4_CQE_SYNDROME_LOCAL_ACCESS_ERR:
+ wc->status = IB_WC_LOC_ACCESS_ERR;
+ break;
+ case MLX4_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR:
+ wc->status = IB_WC_REM_INV_REQ_ERR;
+ break;
+ case MLX4_CQE_SYNDROME_REMOTE_ACCESS_ERR:
+ wc->status = IB_WC_REM_ACCESS_ERR;
+ break;
+ case MLX4_CQE_SYNDROME_REMOTE_OP_ERR:
+ wc->status = IB_WC_REM_OP_ERR;
+ break;
+ case MLX4_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR:
+ wc->status = IB_WC_RETRY_EXC_ERR;
+ break;
+ case MLX4_CQE_SYNDROME_RNR_RETRY_EXC_ERR:
+ wc->status = IB_WC_RNR_RETRY_EXC_ERR;
+ break;
+ case MLX4_CQE_SYNDROME_REMOTE_ABORTED_ERR:
+ wc->status = IB_WC_REM_ABORT_ERR;
+ break;
+ default:
+ wc->status = IB_WC_GENERAL_ERR;
+ break;
+ }
+
+ wc->vendor_err = cqe->vendor_err_syndrome;
+}
+
+static int mlx4_ib_ipoib_csum_ok(__be16 status, __be16 checksum)
+{
+ return ((status & cpu_to_be16(MLX4_CQE_STATUS_IPV4 |
+ MLX4_CQE_STATUS_IPV4F |
+ MLX4_CQE_STATUS_IPV4OPT |
+ MLX4_CQE_STATUS_IPV6 |
+ MLX4_CQE_STATUS_IPOK)) ==
+ cpu_to_be16(MLX4_CQE_STATUS_IPV4 |
+ MLX4_CQE_STATUS_IPOK)) &&
+ (status & cpu_to_be16(MLX4_CQE_STATUS_UDP |
+ MLX4_CQE_STATUS_TCP)) &&
+ checksum == cpu_to_be16(0xffff);
+}
+
+static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
+ struct mlx4_ib_qp **cur_qp,
+ struct ib_wc *wc)
+{
+ struct mlx4_cqe *cqe;
+ struct mlx4_qp *mqp;
+ struct mlx4_ib_wq *wq;
+ struct mlx4_ib_srq *srq;
+ int is_send;
+ int is_error;
+ u32 g_mlpath_rqpn;
+ u16 wqe_ctr;
+
+repoll:
+ cqe = next_cqe_sw(cq);
+ if (!cqe)
+ return -EAGAIN;
+
+ ++cq->mcq.cons_index;
+
+ /*
+ * Make sure we read CQ entry contents after we've checked the
+ * ownership bit.
+ */
+ rmb();
+
+ is_send = cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK;
+ is_error = (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) ==
+ MLX4_CQE_OPCODE_ERROR;
+
+ if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == MLX4_OPCODE_NOP &&
+ is_send)) {
+ printk(KERN_WARNING "Completion for NOP opcode detected!\n");
+ return -EINVAL;
+ }
+
+ /* Resize CQ in progress */
+ if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == MLX4_CQE_OPCODE_RESIZE)) {
+ if (cq->resize_buf) {
+ struct mlx4_ib_dev *dev = to_mdev(cq->ibcq.device);
+
+ mlx4_ib_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe);
+ cq->buf = cq->resize_buf->buf;
+ cq->ibcq.cqe = cq->resize_buf->cqe;
+
+ kfree(cq->resize_buf);
+ cq->resize_buf = NULL;
+ }
+
+ goto repoll;
+ }
+
+ if (!*cur_qp ||
+ (be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK) != (*cur_qp)->mqp.qpn) {
+ /*
+ * We do not have to take the QP table lock here,
+ * because CQs will be locked while QPs are removed
+ * from the table.
+ */
+ mqp = __mlx4_qp_lookup(to_mdev(cq->ibcq.device)->dev,
+ be32_to_cpu(cqe->vlan_my_qpn));
+ if (unlikely(!mqp)) {
+ printk(KERN_WARNING "CQ %06x with entry for unknown QPN %06x\n",
+ cq->mcq.cqn, be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK);
+ return -EINVAL;
+ }
+
+ *cur_qp = to_mibqp(mqp);
+ }
+
+ wc->qp = &(*cur_qp)->ibqp;
+
+ if (is_send) {
+ wq = &(*cur_qp)->sq;
+ if (!(*cur_qp)->sq_signal_bits) {
+ wqe_ctr = be16_to_cpu(cqe->wqe_index);
+ wq->tail += (u16) (wqe_ctr - (u16) wq->tail);
+ }
+ wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
+ ++wq->tail;
+ } else if ((*cur_qp)->ibqp.srq) {
+ srq = to_msrq((*cur_qp)->ibqp.srq);
+ wqe_ctr = be16_to_cpu(cqe->wqe_index);
+ wc->wr_id = srq->wrid[wqe_ctr];
+ mlx4_ib_free_srq_wqe(srq, wqe_ctr);
+ } else {
+ wq = &(*cur_qp)->rq;
+ wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
+ ++wq->tail;
+ }
+
+ if (unlikely(is_error)) {
+ mlx4_ib_handle_error_cqe((struct mlx4_err_cqe *) cqe, wc);
+ return 0;
+ }
+
+ wc->status = IB_WC_SUCCESS;
+
+ if (is_send) {
+ wc->wc_flags = 0;
+ switch (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) {
+ case MLX4_OPCODE_RDMA_WRITE_IMM:
+ wc->wc_flags |= IB_WC_WITH_IMM;
+ case MLX4_OPCODE_RDMA_WRITE:
+ wc->opcode = IB_WC_RDMA_WRITE;
+ break;
+ case MLX4_OPCODE_SEND_IMM:
+ wc->wc_flags |= IB_WC_WITH_IMM;
+ case MLX4_OPCODE_SEND:
+ case MLX4_OPCODE_SEND_INVAL:
+ wc->opcode = IB_WC_SEND;
+ break;
+ case MLX4_OPCODE_RDMA_READ:
+ wc->opcode = IB_WC_RDMA_READ;
+ wc->byte_len = be32_to_cpu(cqe->byte_cnt);
+ break;
+ case MLX4_OPCODE_ATOMIC_CS:
+ wc->opcode = IB_WC_COMP_SWAP;
+ wc->byte_len = 8;
+ break;
+ case MLX4_OPCODE_ATOMIC_FA:
+ wc->opcode = IB_WC_FETCH_ADD;
+ wc->byte_len = 8;
+ break;
+ case MLX4_OPCODE_BIND_MW:
+ wc->opcode = IB_WC_BIND_MW;
+ break;
+ case MLX4_OPCODE_LSO:
+ wc->opcode = IB_WC_LSO;
+ break;
+ case MLX4_OPCODE_FMR:
+ wc->opcode = IB_WC_FAST_REG_MR;
+ break;
+ case MLX4_OPCODE_LOCAL_INVAL:
+ wc->opcode = IB_WC_LOCAL_INV;
+ break;
+ }
+ } else {
+ wc->byte_len = be32_to_cpu(cqe->byte_cnt);
+
+ switch (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) {
+ case MLX4_RECV_OPCODE_RDMA_WRITE_IMM:
+ wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
+ wc->wc_flags = IB_WC_WITH_IMM;
+ wc->ex.imm_data = cqe->immed_rss_invalid;
+ break;
+ case MLX4_RECV_OPCODE_SEND_INVAL:
+ wc->opcode = IB_WC_RECV;
+ wc->wc_flags = IB_WC_WITH_INVALIDATE;
+ wc->ex.invalidate_rkey = be32_to_cpu(cqe->immed_rss_invalid);
+ break;
+ case MLX4_RECV_OPCODE_SEND:
+ wc->opcode = IB_WC_RECV;
+ wc->wc_flags = 0;
+ break;
+ case MLX4_RECV_OPCODE_SEND_IMM:
+ wc->opcode = IB_WC_RECV;
+ wc->wc_flags = IB_WC_WITH_IMM;
+ wc->ex.imm_data = cqe->immed_rss_invalid;
+ break;
+ }
+
+ wc->slid = be16_to_cpu(cqe->rlid);
+ wc->sl = be16_to_cpu(cqe->sl_vid >> 12);
+ g_mlpath_rqpn = be32_to_cpu(cqe->g_mlpath_rqpn);
+ wc->src_qp = g_mlpath_rqpn & 0xffffff;
+ wc->dlid_path_bits = (g_mlpath_rqpn >> 24) & 0x7f;
+ wc->wc_flags |= g_mlpath_rqpn & 0x80000000 ? IB_WC_GRH : 0;
+ wc->pkey_index = be32_to_cpu(cqe->immed_rss_invalid) & 0x7f;
+ wc->csum_ok = mlx4_ib_ipoib_csum_ok(cqe->status, cqe->checksum);
+ }
+
+ return 0;
+}
+
+int mlx4_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
+{
+ struct mlx4_ib_cq *cq = to_mcq(ibcq);
+ struct mlx4_ib_qp *cur_qp = NULL;
+ unsigned long flags;
+ int npolled;
+ int err = 0;
+
+ spin_lock_irqsave(&cq->lock, flags);
+
+ for (npolled = 0; npolled < num_entries; ++npolled) {
+ err = mlx4_ib_poll_one(cq, &cur_qp, wc + npolled);
+ if (err)
+ break;
+ }
+
+ if (npolled)
+ mlx4_cq_set_ci(&cq->mcq);
+
+ spin_unlock_irqrestore(&cq->lock, flags);
+
+ if (err == 0 || err == -EAGAIN)
+ return npolled;
+ else
+ return err;
+}
+
+int mlx4_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
+{
+ mlx4_cq_arm(&to_mcq(ibcq)->mcq,
+ (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ?
+ MLX4_CQ_DB_REQ_NOT_SOL : MLX4_CQ_DB_REQ_NOT,
+ to_mdev(ibcq->device)->uar_map,
+ MLX4_GET_DOORBELL_LOCK(&to_mdev(ibcq->device)->uar_lock));
+
+ return 0;
+}
+
+void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq)
+{
+ u32 prod_index;
+ int nfreed = 0;
+ struct mlx4_cqe *cqe, *dest;
+ u8 owner_bit;
+
+ /*
+ * First we need to find the current producer index, so we
+ * know where to start cleaning from. It doesn't matter if HW
+ * adds new entries after this loop -- the QP we're worried
+ * about is already in RESET, so the new entries won't come
+ * from our QP and therefore don't need to be checked.
+ */
+ for (prod_index = cq->mcq.cons_index; get_sw_cqe(cq, prod_index); ++prod_index)
+ if (prod_index == cq->mcq.cons_index + cq->ibcq.cqe)
+ break;
+
+ /*
+ * Now sweep backwards through the CQ, removing CQ entries
+ * that match our QP by copying older entries on top of them.
+ */
+ while ((int) --prod_index - (int) cq->mcq.cons_index >= 0) {
+ cqe = get_cqe(cq, prod_index & cq->ibcq.cqe);
+ if ((be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK) == qpn) {
+ if (srq && !(cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK))
+ mlx4_ib_free_srq_wqe(srq, be16_to_cpu(cqe->wqe_index));
+ ++nfreed;
+ } else if (nfreed) {
+ dest = get_cqe(cq, (prod_index + nfreed) & cq->ibcq.cqe);
+ owner_bit = dest->owner_sr_opcode & MLX4_CQE_OWNER_MASK;
+ memcpy(dest, cqe, sizeof *cqe);
+ dest->owner_sr_opcode = owner_bit |
+ (dest->owner_sr_opcode & ~MLX4_CQE_OWNER_MASK);
+ }
+ }
+
+ if (nfreed) {
+ cq->mcq.cons_index += nfreed;
+ /*
+ * Make sure update of buffer contents is done before
+ * updating consumer index.
+ */
+ wmb();
+ mlx4_cq_set_ci(&cq->mcq);
+ }
+}
+
+void mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq)
+{
+ spin_lock_irq(&cq->lock);
+ __mlx4_ib_cq_clean(cq, qpn, srq);
+ spin_unlock_irq(&cq->lock);
+}
diff --git a/drivers/infiniband/hw/mlx4/doorbell.c b/drivers/infiniband/hw/mlx4/doorbell.c
new file mode 100644
index 0000000..8aee423
--- /dev/null
+++ b/drivers/infiniband/hw/mlx4/doorbell.c
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/slab.h>
+
+#include "mlx4_ib.h"
+
+struct mlx4_ib_user_db_page {
+ struct list_head list;
+ struct ib_umem *umem;
+ unsigned long user_virt;
+ int refcnt;
+};
+
+int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt,
+ struct mlx4_db *db)
+{
+ struct mlx4_ib_user_db_page *page;
+ struct ib_umem_chunk *chunk;
+ int err = 0;
+
+ mutex_lock(&context->db_page_mutex);
+
+ list_for_each_entry(page, &context->db_page_list, list)
+ if (page->user_virt == (virt & PAGE_MASK))
+ goto found;
+
+ page = kmalloc(sizeof *page, GFP_KERNEL);
+ if (!page) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ page->user_virt = (virt & PAGE_MASK);
+ page->refcnt = 0;
+ page->umem = ib_umem_get(&context->ibucontext, virt & PAGE_MASK,
+ PAGE_SIZE, 0, 0);
+ if (IS_ERR(page->umem)) {
+ err = PTR_ERR(page->umem);
+ kfree(page);
+ goto out;
+ }
+
+ list_add(&page->list, &context->db_page_list);
+
+found:
+ chunk = list_entry(page->umem->chunk_list.next, struct ib_umem_chunk, list);
+ db->dma = sg_dma_address(chunk->page_list) + (virt & ~PAGE_MASK);
+ db->u.user_page = page;
+ ++page->refcnt;
+
+out:
+ mutex_unlock(&context->db_page_mutex);
+
+ return err;
+}
+
+void mlx4_ib_db_unmap_user(struct mlx4_ib_ucontext *context, struct mlx4_db *db)
+{
+ mutex_lock(&context->db_page_mutex);
+
+ if (!--db->u.user_page->refcnt) {
+ list_del(&db->u.user_page->list);
+ ib_umem_release(db->u.user_page->umem);
+ kfree(db->u.user_page);
+ }
+
+ mutex_unlock(&context->db_page_mutex);
+}
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
new file mode 100644
index 0000000..606f1e2
--- /dev/null
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -0,0 +1,340 @@
+/*
+ * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <rdma/ib_mad.h>
+#include <rdma/ib_smi.h>
+
+#include <linux/mlx4/cmd.h>
+
+#include "mlx4_ib.h"
+
+enum {
+ MLX4_IB_VENDOR_CLASS1 = 0x9,
+ MLX4_IB_VENDOR_CLASS2 = 0xa
+};
+
+int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int ignore_mkey, int ignore_bkey,
+ int port, struct ib_wc *in_wc, struct ib_grh *in_grh,
+ void *in_mad, void *response_mad)
+{
+ struct mlx4_cmd_mailbox *inmailbox, *outmailbox;
+ void *inbox;
+ int err;
+ u32 in_modifier = port;
+ u8 op_modifier = 0;
+
+ inmailbox = mlx4_alloc_cmd_mailbox(dev->dev);
+ if (IS_ERR(inmailbox))
+ return PTR_ERR(inmailbox);
+ inbox = inmailbox->buf;
+
+ outmailbox = mlx4_alloc_cmd_mailbox(dev->dev);
+ if (IS_ERR(outmailbox)) {
+ mlx4_free_cmd_mailbox(dev->dev, inmailbox);
+ return PTR_ERR(outmailbox);
+ }
+
+ memcpy(inbox, in_mad, 256);
+
+ /*
+ * Key check traps can't be generated unless we have in_wc to
+ * tell us where to send the trap.
+ */
+ if (ignore_mkey || !in_wc)
+ op_modifier |= 0x1;
+ if (ignore_bkey || !in_wc)
+ op_modifier |= 0x2;
+
+ if (in_wc) {
+ struct {
+ __be32 my_qpn;
+ u32 reserved1;
+ __be32 rqpn;
+ u8 sl;
+ u8 g_path;
+ u16 reserved2[2];
+ __be16 pkey;
+ u32 reserved3[11];
+ u8 grh[40];
+ } *ext_info;
+
+ memset(inbox + 256, 0, 256);
+ ext_info = inbox + 256;
+
+ ext_info->my_qpn = cpu_to_be32(in_wc->qp->qp_num);
+ ext_info->rqpn = cpu_to_be32(in_wc->src_qp);
+ ext_info->sl = in_wc->sl << 4;
+ ext_info->g_path = in_wc->dlid_path_bits |
+ (in_wc->wc_flags & IB_WC_GRH ? 0x80 : 0);
+ ext_info->pkey = cpu_to_be16(in_wc->pkey_index);
+
+ if (in_grh)
+ memcpy(ext_info->grh, in_grh, 40);
+
+ op_modifier |= 0x4;
+
+ in_modifier |= in_wc->slid << 16;
+ }
+
+ err = mlx4_cmd_box(dev->dev, inmailbox->dma, outmailbox->dma,
+ in_modifier, op_modifier,
+ MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C);
+
+ if (!err)
+ memcpy(response_mad, outmailbox->buf, 256);
+
+ mlx4_free_cmd_mailbox(dev->dev, inmailbox);
+ mlx4_free_cmd_mailbox(dev->dev, outmailbox);
+
+ return err;
+}
+
+static void update_sm_ah(struct mlx4_ib_dev *dev, u8 port_num, u16 lid, u8 sl)
+{
+ struct ib_ah *new_ah;
+ struct ib_ah_attr ah_attr;
+
+ if (!dev->send_agent[port_num - 1][0])
+ return;
+
+ memset(&ah_attr, 0, sizeof ah_attr);
+ ah_attr.dlid = lid;
+ ah_attr.sl = sl;
+ ah_attr.port_num = port_num;
+
+ new_ah = ib_create_ah(dev->send_agent[port_num - 1][0]->qp->pd,
+ &ah_attr);
+ if (IS_ERR(new_ah))
+ return;
+
+ spin_lock(&dev->sm_lock);
+ if (dev->sm_ah[port_num - 1])
+ ib_destroy_ah(dev->sm_ah[port_num - 1]);
+ dev->sm_ah[port_num - 1] = new_ah;
+ spin_unlock(&dev->sm_lock);
+}
+
+/*
+ * Snoop SM MADs for port info and P_Key table sets, so we can
+ * synthesize LID change and P_Key change events.
+ */
+static void smp_snoop(struct ib_device *ibdev, u8 port_num, struct ib_mad *mad)
+{
+ struct ib_event event;
+
+ if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
+ mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) &&
+ mad->mad_hdr.method == IB_MGMT_METHOD_SET) {
+ if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO) {
+ struct ib_port_info *pinfo =
+ (struct ib_port_info *) ((struct ib_smp *) mad)->data;
+
+ update_sm_ah(to_mdev(ibdev), port_num,
+ be16_to_cpu(pinfo->sm_lid),
+ pinfo->neighbormtu_mastersmsl & 0xf);
+
+ event.device = ibdev;
+ event.element.port_num = port_num;
+
+ if (pinfo->clientrereg_resv_subnetto & 0x80)
+ event.event = IB_EVENT_CLIENT_REREGISTER;
+ else
+ event.event = IB_EVENT_LID_CHANGE;
+
+ ib_dispatch_event(&event);
+ }
+
+ if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PKEY_TABLE) {
+ event.device = ibdev;
+ event.event = IB_EVENT_PKEY_CHANGE;
+ event.element.port_num = port_num;
+ ib_dispatch_event(&event);
+ }
+ }
+}
+
+static void node_desc_override(struct ib_device *dev,
+ struct ib_mad *mad)
+{
+ if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
+ mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) &&
+ mad->mad_hdr.method == IB_MGMT_METHOD_GET_RESP &&
+ mad->mad_hdr.attr_id == IB_SMP_ATTR_NODE_DESC) {
+ spin_lock(&to_mdev(dev)->sm_lock);
+ memcpy(((struct ib_smp *) mad)->data, dev->node_desc, 64);
+ spin_unlock(&to_mdev(dev)->sm_lock);
+ }
+}
+
+static void forward_trap(struct mlx4_ib_dev *dev, u8 port_num, struct ib_mad *mad)
+{
+ int qpn = mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_SUBN_LID_ROUTED;
+ struct ib_mad_send_buf *send_buf;
+ struct ib_mad_agent *agent = dev->send_agent[port_num - 1][qpn];
+ int ret;
+
+ if (agent) {
+ send_buf = ib_create_send_mad(agent, qpn, 0, 0, IB_MGMT_MAD_HDR,
+ IB_MGMT_MAD_DATA, GFP_ATOMIC);
+ /*
+ * We rely here on the fact that MLX QPs don't use the
+ * address handle after the send is posted (this is
+ * wrong following the IB spec strictly, but we know
+ * it's OK for our devices).
+ */
+ spin_lock(&dev->sm_lock);
+ memcpy(send_buf->mad, mad, sizeof *mad);
+ if ((send_buf->ah = dev->sm_ah[port_num - 1]))
+ ret = ib_post_send_mad(send_buf, NULL);
+ else
+ ret = -EINVAL;
+ spin_unlock(&dev->sm_lock);
+
+ if (ret)
+ ib_free_send_mad(send_buf);
+ }
+}
+
+int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
+ struct ib_wc *in_wc, struct ib_grh *in_grh,
+ struct ib_mad *in_mad, struct ib_mad *out_mad)
+{
+ u16 slid;
+ int err;
+
+ slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE);
+
+ if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && slid == 0) {
+ forward_trap(to_mdev(ibdev), port_num, in_mad);
+ return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
+ }
+
+ if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
+ in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
+ if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET &&
+ in_mad->mad_hdr.method != IB_MGMT_METHOD_SET &&
+ in_mad->mad_hdr.method != IB_MGMT_METHOD_TRAP_REPRESS)
+ return IB_MAD_RESULT_SUCCESS;
+
+ /*
+ * Don't process SMInfo queries or vendor-specific
+ * MADs -- the SMA can't handle them.
+ */
+ if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO ||
+ ((in_mad->mad_hdr.attr_id & IB_SMP_ATTR_VENDOR_MASK) ==
+ IB_SMP_ATTR_VENDOR_MASK))
+ return IB_MAD_RESULT_SUCCESS;
+ } else if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT ||
+ in_mad->mad_hdr.mgmt_class == MLX4_IB_VENDOR_CLASS1 ||
+ in_mad->mad_hdr.mgmt_class == MLX4_IB_VENDOR_CLASS2 ||
+ in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_CONG_MGMT) {
+ if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET &&
+ in_mad->mad_hdr.method != IB_MGMT_METHOD_SET)
+ return IB_MAD_RESULT_SUCCESS;
+ } else
+ return IB_MAD_RESULT_SUCCESS;
+
+ err = mlx4_MAD_IFC(to_mdev(ibdev),
+ mad_flags & IB_MAD_IGNORE_MKEY,
+ mad_flags & IB_MAD_IGNORE_BKEY,
+ port_num, in_wc, in_grh, in_mad, out_mad);
+ if (err)
+ return IB_MAD_RESULT_FAILURE;
+
+ if (!out_mad->mad_hdr.status) {
+ smp_snoop(ibdev, port_num, in_mad);
+ node_desc_override(ibdev, out_mad);
+ }
+
+ /* set return bit in status of directed route responses */
+ if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
+ out_mad->mad_hdr.status |= cpu_to_be16(1 << 15);
+
+ if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS)
+ /* no response for trap repress */
+ return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
+
+ return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
+}
+
+static void send_handler(struct ib_mad_agent *agent,
+ struct ib_mad_send_wc *mad_send_wc)
+{
+ ib_free_send_mad(mad_send_wc->send_buf);
+}
+
+int mlx4_ib_mad_init(struct mlx4_ib_dev *dev)
+{
+ struct ib_mad_agent *agent;
+ int p, q;
+ int ret;
+
+ for (p = 0; p < dev->num_ports; ++p)
+ for (q = 0; q <= 1; ++q) {
+ agent = ib_register_mad_agent(&dev->ib_dev, p + 1,
+ q ? IB_QPT_GSI : IB_QPT_SMI,
+ NULL, 0, send_handler,
+ NULL, NULL);
+ if (IS_ERR(agent)) {
+ ret = PTR_ERR(agent);
+ goto err;
+ }
+ dev->send_agent[p][q] = agent;
+ }
+
+ return 0;
+
+err:
+ for (p = 0; p < dev->num_ports; ++p)
+ for (q = 0; q <= 1; ++q)
+ if (dev->send_agent[p][q])
+ ib_unregister_mad_agent(dev->send_agent[p][q]);
+
+ return ret;
+}
+
+void mlx4_ib_mad_cleanup(struct mlx4_ib_dev *dev)
+{
+ struct ib_mad_agent *agent;
+ int p, q;
+
+ for (p = 0; p < dev->num_ports; ++p) {
+ for (q = 0; q <= 1; ++q) {
+ agent = dev->send_agent[p][q];
+ dev->send_agent[p][q] = NULL;
+ ib_unregister_mad_agent(agent);
+ }
+
+ if (dev->sm_ah[p])
+ ib_destroy_ah(dev->sm_ah[p]);
+ }
+}
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
new file mode 100644
index 0000000..2e80f8f
--- /dev/null
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -0,0 +1,757 @@
+/*
+ * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
+ * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+
+#include <rdma/ib_smi.h>
+#include <rdma/ib_user_verbs.h>
+
+#include <linux/mlx4/driver.h>
+#include <linux/mlx4/cmd.h>
+
+#include "mlx4_ib.h"
+#include "user.h"
+
+#define DRV_NAME "mlx4_ib"
+#define DRV_VERSION "1.0"
+#define DRV_RELDATE "April 4, 2008"
+
+MODULE_AUTHOR("Roland Dreier");
+MODULE_DESCRIPTION("Mellanox ConnectX HCA InfiniBand driver");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_VERSION(DRV_VERSION);
+
+static const char mlx4_ib_version[] =
+ DRV_NAME ": Mellanox ConnectX InfiniBand driver v"
+ DRV_VERSION " (" DRV_RELDATE ")\n";
+
+static void init_query_mad(struct ib_smp *mad)
+{
+ mad->base_version = 1;
+ mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
+ mad->class_version = 1;
+ mad->method = IB_MGMT_METHOD_GET;
+}
+
+static int mlx4_ib_query_device(struct ib_device *ibdev,
+ struct ib_device_attr *props)
+{
+ struct mlx4_ib_dev *dev = to_mdev(ibdev);
+ struct ib_smp *in_mad = NULL;
+ struct ib_smp *out_mad = NULL;
+ int err = -ENOMEM;
+
+ in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
+ out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
+ if (!in_mad || !out_mad)
+ goto out;
+
+ init_query_mad(in_mad);
+ in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
+
+ err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, 1, NULL, NULL, in_mad, out_mad);
+ if (err)
+ goto out;
+
+ memset(props, 0, sizeof *props);
+
+ props->fw_ver = dev->dev->caps.fw_ver;
+ props->device_cap_flags = IB_DEVICE_CHANGE_PHY_PORT |
+ IB_DEVICE_PORT_ACTIVE_EVENT |
+ IB_DEVICE_SYS_IMAGE_GUID |
+ IB_DEVICE_RC_RNR_NAK_GEN |
+ IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
+ if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR)
+ props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR;
+ if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_QKEY_CNTR)
+ props->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR;
+ if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_APM)
+ props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG;
+ if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_UD_AV_PORT)
+ props->device_cap_flags |= IB_DEVICE_UD_AV_PORT_ENFORCE;
+ if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IPOIB_CSUM)
+ props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM;
+ if (dev->dev->caps.max_gso_sz)
+ props->device_cap_flags |= IB_DEVICE_UD_TSO;
+ if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_RESERVED_LKEY)
+ props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY;
+ if ((dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_LOCAL_INV) &&
+ (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_REMOTE_INV) &&
+ (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_FAST_REG_WR))
+ props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
+
+ props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) &
+ 0xffffff;
+ props->vendor_part_id = be16_to_cpup((__be16 *) (out_mad->data + 30));
+ props->hw_ver = be32_to_cpup((__be32 *) (out_mad->data + 32));
+ memcpy(&props->sys_image_guid, out_mad->data + 4, 8);
+
+ props->max_mr_size = ~0ull;
+ props->page_size_cap = dev->dev->caps.page_size_cap;
+ props->max_qp = dev->dev->caps.num_qps - dev->dev->caps.reserved_qps;
+ props->max_qp_wr = dev->dev->caps.max_wqes;
+ props->max_sge = min(dev->dev->caps.max_sq_sg,
+ dev->dev->caps.max_rq_sg);
+ props->max_cq = dev->dev->caps.num_cqs - dev->dev->caps.reserved_cqs;
+ props->max_cqe = dev->dev->caps.max_cqes;
+ props->max_mr = dev->dev->caps.num_mpts - dev->dev->caps.reserved_mrws;
+ props->max_pd = dev->dev->caps.num_pds - dev->dev->caps.reserved_pds;
+ props->max_qp_rd_atom = dev->dev->caps.max_qp_dest_rdma;
+ props->max_qp_init_rd_atom = dev->dev->caps.max_qp_init_rdma;
+ props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp;
+ props->max_srq = dev->dev->caps.num_srqs - dev->dev->caps.reserved_srqs;
+ props->max_srq_wr = dev->dev->caps.max_srq_wqes - 1;
+ props->max_srq_sge = dev->dev->caps.max_srq_sge;
+ props->max_fast_reg_page_list_len = PAGE_SIZE / sizeof (u64);
+ props->local_ca_ack_delay = dev->dev->caps.local_ca_ack_delay;
+ props->atomic_cap = dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_ATOMIC ?
+ IB_ATOMIC_HCA : IB_ATOMIC_NONE;
+ props->max_pkeys = dev->dev->caps.pkey_table_len[1];
+ props->max_mcast_grp = dev->dev->caps.num_mgms + dev->dev->caps.num_amgms;
+ props->max_mcast_qp_attach = dev->dev->caps.num_qp_per_mgm;
+ props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
+ props->max_mcast_grp;
+ props->max_map_per_fmr = (1 << (32 - ilog2(dev->dev->caps.num_mpts))) - 1;
+
+out:
+ kfree(in_mad);
+ kfree(out_mad);
+
+ return err;
+}
+
+static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
+ struct ib_port_attr *props)
+{
+ struct ib_smp *in_mad = NULL;
+ struct ib_smp *out_mad = NULL;
+ int err = -ENOMEM;
+
+ in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
+ out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
+ if (!in_mad || !out_mad)
+ goto out;
+
+ memset(props, 0, sizeof *props);
+
+ init_query_mad(in_mad);
+ in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
+ in_mad->attr_mod = cpu_to_be32(port);
+
+ err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad);
+ if (err)
+ goto out;
+
+ props->lid = be16_to_cpup((__be16 *) (out_mad->data + 16));
+ props->lmc = out_mad->data[34] & 0x7;
+ props->sm_lid = be16_to_cpup((__be16 *) (out_mad->data + 18));
+ props->sm_sl = out_mad->data[36] & 0xf;
+ props->state = out_mad->data[32] & 0xf;
+ props->phys_state = out_mad->data[33] >> 4;
+ props->port_cap_flags = be32_to_cpup((__be32 *) (out_mad->data + 20));
+ props->gid_tbl_len = to_mdev(ibdev)->dev->caps.gid_table_len[port];
+ props->max_msg_sz = to_mdev(ibdev)->dev->caps.max_msg_sz;
+ props->pkey_tbl_len = to_mdev(ibdev)->dev->caps.pkey_table_len[port];
+ props->bad_pkey_cntr = be16_to_cpup((__be16 *) (out_mad->data + 46));
+ props->qkey_viol_cntr = be16_to_cpup((__be16 *) (out_mad->data + 48));
+ props->active_width = out_mad->data[31] & 0xf;
+ props->active_speed = out_mad->data[35] >> 4;
+ props->max_mtu = out_mad->data[41] & 0xf;
+ props->active_mtu = out_mad->data[36] >> 4;
+ props->subnet_timeout = out_mad->data[51] & 0x1f;
+ props->max_vl_num = out_mad->data[37] >> 4;
+ props->init_type_reply = out_mad->data[41] >> 4;
+
+out:
+ kfree(in_mad);
+ kfree(out_mad);
+
+ return err;
+}
+
+static int mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
+ union ib_gid *gid)
+{
+ struct ib_smp *in_mad = NULL;
+ struct ib_smp *out_mad = NULL;
+ int err = -ENOMEM;
+
+ in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
+ out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
+ if (!in_mad || !out_mad)
+ goto out;
+
+ init_query_mad(in_mad);
+ in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
+ in_mad->attr_mod = cpu_to_be32(port);
+
+ err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad);
+ if (err)
+ goto out;
+
+ memcpy(gid->raw, out_mad->data + 8, 8);
+
+ init_query_mad(in_mad);
+ in_mad->attr_id = IB_SMP_ATTR_GUID_INFO;
+ in_mad->attr_mod = cpu_to_be32(index / 8);
+
+ err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad);
+ if (err)
+ goto out;
+
+ memcpy(gid->raw + 8, out_mad->data + (index % 8) * 8, 8);
+
+out:
+ kfree(in_mad);
+ kfree(out_mad);
+ return err;
+}
+
+static int mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
+ u16 *pkey)
+{
+ struct ib_smp *in_mad = NULL;
+ struct ib_smp *out_mad = NULL;
+ int err = -ENOMEM;
+
+ in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
+ out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
+ if (!in_mad || !out_mad)
+ goto out;
+
+ init_query_mad(in_mad);
+ in_mad->attr_id = IB_SMP_ATTR_PKEY_TABLE;
+ in_mad->attr_mod = cpu_to_be32(index / 32);
+
+ err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad);
+ if (err)
+ goto out;
+
+ *pkey = be16_to_cpu(((__be16 *) out_mad->data)[index % 32]);
+
+out:
+ kfree(in_mad);
+ kfree(out_mad);
+ return err;
+}
+
+static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask,
+ struct ib_device_modify *props)
+{
+ if (mask & ~IB_DEVICE_MODIFY_NODE_DESC)
+ return -EOPNOTSUPP;
+
+ if (mask & IB_DEVICE_MODIFY_NODE_DESC) {
+ spin_lock(&to_mdev(ibdev)->sm_lock);
+ memcpy(ibdev->node_desc, props->node_desc, 64);
+ spin_unlock(&to_mdev(ibdev)->sm_lock);
+ }
+
+ return 0;
+}
+
+static int mlx4_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols,
+ u32 cap_mask)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ int err;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev->dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ memset(mailbox->buf, 0, 256);
+
+ if (dev->dev->flags & MLX4_FLAG_OLD_PORT_CMDS) {
+ *(u8 *) mailbox->buf = !!reset_qkey_viols << 6;
+ ((__be32 *) mailbox->buf)[2] = cpu_to_be32(cap_mask);
+ } else {
+ ((u8 *) mailbox->buf)[3] = !!reset_qkey_viols;
+ ((__be32 *) mailbox->buf)[1] = cpu_to_be32(cap_mask);
+ }
+
+ err = mlx4_cmd(dev->dev, mailbox->dma, port, 0, MLX4_CMD_SET_PORT,
+ MLX4_CMD_TIME_CLASS_B);
+
+ mlx4_free_cmd_mailbox(dev->dev, mailbox);
+ return err;
+}
+
+static int mlx4_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
+ struct ib_port_modify *props)
+{
+ struct ib_port_attr attr;
+ u32 cap_mask;
+ int err;
+
+ mutex_lock(&to_mdev(ibdev)->cap_mask_mutex);
+
+ err = mlx4_ib_query_port(ibdev, port, &attr);
+ if (err)
+ goto out;
+
+ cap_mask = (attr.port_cap_flags | props->set_port_cap_mask) &
+ ~props->clr_port_cap_mask;
+
+ err = mlx4_SET_PORT(to_mdev(ibdev), port,
+ !!(mask & IB_PORT_RESET_QKEY_CNTR),
+ cap_mask);
+
+out:
+ mutex_unlock(&to_mdev(ibdev)->cap_mask_mutex);
+ return err;
+}
+
+static struct ib_ucontext *mlx4_ib_alloc_ucontext(struct ib_device *ibdev,
+ struct ib_udata *udata)
+{
+ struct mlx4_ib_dev *dev = to_mdev(ibdev);
+ struct mlx4_ib_ucontext *context;
+ struct mlx4_ib_alloc_ucontext_resp resp;
+ int err;
+
+ resp.qp_tab_size = dev->dev->caps.num_qps;
+ resp.bf_reg_size = dev->dev->caps.bf_reg_size;
+ resp.bf_regs_per_page = dev->dev->caps.bf_regs_per_page;
+
+ context = kmalloc(sizeof *context, GFP_KERNEL);
+ if (!context)
+ return ERR_PTR(-ENOMEM);
+
+ err = mlx4_uar_alloc(to_mdev(ibdev)->dev, &context->uar);
+ if (err) {
+ kfree(context);
+ return ERR_PTR(err);
+ }
+
+ INIT_LIST_HEAD(&context->db_page_list);
+ mutex_init(&context->db_page_mutex);
+
+ err = ib_copy_to_udata(udata, &resp, sizeof resp);
+ if (err) {
+ mlx4_uar_free(to_mdev(ibdev)->dev, &context->uar);
+ kfree(context);
+ return ERR_PTR(-EFAULT);
+ }
+
+ return &context->ibucontext;
+}
+
+static int mlx4_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
+{
+ struct mlx4_ib_ucontext *context = to_mucontext(ibcontext);
+
+ mlx4_uar_free(to_mdev(ibcontext->device)->dev, &context->uar);
+ kfree(context);
+
+ return 0;
+}
+
+static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
+{
+ struct mlx4_ib_dev *dev = to_mdev(context->device);
+
+ if (vma->vm_end - vma->vm_start != PAGE_SIZE)
+ return -EINVAL;
+
+ if (vma->vm_pgoff == 0) {
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+ if (io_remap_pfn_range(vma, vma->vm_start,
+ to_mucontext(context)->uar.pfn,
+ PAGE_SIZE, vma->vm_page_prot))
+ return -EAGAIN;
+ } else if (vma->vm_pgoff == 1 && dev->dev->caps.bf_reg_size != 0) {
+ /* FIXME want pgprot_writecombine() for BlueFlame pages */
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+ if (io_remap_pfn_range(vma, vma->vm_start,
+ to_mucontext(context)->uar.pfn +
+ dev->dev->caps.num_uars,
+ PAGE_SIZE, vma->vm_page_prot))
+ return -EAGAIN;
+ } else
+ return -EINVAL;
+
+ return 0;
+}
+
+static struct ib_pd *mlx4_ib_alloc_pd(struct ib_device *ibdev,
+ struct ib_ucontext *context,
+ struct ib_udata *udata)
+{
+ struct mlx4_ib_pd *pd;
+ int err;
+
+ pd = kmalloc(sizeof *pd, GFP_KERNEL);
+ if (!pd)
+ return ERR_PTR(-ENOMEM);
+
+ err = mlx4_pd_alloc(to_mdev(ibdev)->dev, &pd->pdn);
+ if (err) {
+ kfree(pd);
+ return ERR_PTR(err);
+ }
+
+ if (context)
+ if (ib_copy_to_udata(udata, &pd->pdn, sizeof (__u32))) {
+ mlx4_pd_free(to_mdev(ibdev)->dev, pd->pdn);
+ kfree(pd);
+ return ERR_PTR(-EFAULT);
+ }
+
+ return &pd->ibpd;
+}
+
+static int mlx4_ib_dealloc_pd(struct ib_pd *pd)
+{
+ mlx4_pd_free(to_mdev(pd->device)->dev, to_mpd(pd)->pdn);
+ kfree(pd);
+
+ return 0;
+}
+
+static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
+{
+ return mlx4_multicast_attach(to_mdev(ibqp->device)->dev,
+ &to_mqp(ibqp)->mqp, gid->raw,
+ !!(to_mqp(ibqp)->flags &
+ MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK));
+}
+
+static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
+{
+ return mlx4_multicast_detach(to_mdev(ibqp->device)->dev,
+ &to_mqp(ibqp)->mqp, gid->raw);
+}
+
+static int init_node_data(struct mlx4_ib_dev *dev)
+{
+ struct ib_smp *in_mad = NULL;
+ struct ib_smp *out_mad = NULL;
+ int err = -ENOMEM;
+
+ in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
+ out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
+ if (!in_mad || !out_mad)
+ goto out;
+
+ init_query_mad(in_mad);
+ in_mad->attr_id = IB_SMP_ATTR_NODE_DESC;
+
+ err = mlx4_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad);
+ if (err)
+ goto out;
+
+ memcpy(dev->ib_dev.node_desc, out_mad->data, 64);
+
+ in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
+
+ err = mlx4_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad);
+ if (err)
+ goto out;
+
+ dev->dev->rev_id = be32_to_cpup((__be32 *) (out_mad->data + 32));
+ memcpy(&dev->ib_dev.node_guid, out_mad->data + 12, 8);
+
+out:
+ kfree(in_mad);
+ kfree(out_mad);
+ return err;
+}
+
+static ssize_t show_hca(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ struct mlx4_ib_dev *dev =
+ container_of(device, struct mlx4_ib_dev, ib_dev.dev);
+ return sprintf(buf, "MT%d\n", dev->dev->pdev->device);
+}
+
+static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ struct mlx4_ib_dev *dev =
+ container_of(device, struct mlx4_ib_dev, ib_dev.dev);
+ return sprintf(buf, "%d.%d.%d\n", (int) (dev->dev->caps.fw_ver >> 32),
+ (int) (dev->dev->caps.fw_ver >> 16) & 0xffff,
+ (int) dev->dev->caps.fw_ver & 0xffff);
+}
+
+static ssize_t show_rev(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ struct mlx4_ib_dev *dev =
+ container_of(device, struct mlx4_ib_dev, ib_dev.dev);
+ return sprintf(buf, "%x\n", dev->dev->rev_id);
+}
+
+static ssize_t show_board(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ struct mlx4_ib_dev *dev =
+ container_of(device, struct mlx4_ib_dev, ib_dev.dev);
+ return sprintf(buf, "%.*s\n", MLX4_BOARD_ID_LEN,
+ dev->dev->board_id);
+}
+
+static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
+static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
+static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
+static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
+
+static struct device_attribute *mlx4_class_attributes[] = {
+ &dev_attr_hw_rev,
+ &dev_attr_fw_ver,
+ &dev_attr_hca_type,
+ &dev_attr_board_id
+};
+
+static void *mlx4_ib_add(struct mlx4_dev *dev)
+{
+ static int mlx4_ib_version_printed;
+ struct mlx4_ib_dev *ibdev;
+ int i;
+
+
+ if (!mlx4_ib_version_printed) {
+ printk(KERN_INFO "%s", mlx4_ib_version);
+ ++mlx4_ib_version_printed;
+ }
+
+ ibdev = (struct mlx4_ib_dev *) ib_alloc_device(sizeof *ibdev);
+ if (!ibdev) {
+ dev_err(&dev->pdev->dev, "Device struct alloc failed\n");
+ return NULL;
+ }
+
+ if (mlx4_pd_alloc(dev, &ibdev->priv_pdn))
+ goto err_dealloc;
+
+ if (mlx4_uar_alloc(dev, &ibdev->priv_uar))
+ goto err_pd;
+
+ ibdev->uar_map = ioremap(ibdev->priv_uar.pfn << PAGE_SHIFT, PAGE_SIZE);
+ if (!ibdev->uar_map)
+ goto err_uar;
+ MLX4_INIT_DOORBELL_LOCK(&ibdev->uar_lock);
+
+ ibdev->dev = dev;
+
+ strlcpy(ibdev->ib_dev.name, "mlx4_%d", IB_DEVICE_NAME_MAX);
+ ibdev->ib_dev.owner = THIS_MODULE;
+ ibdev->ib_dev.node_type = RDMA_NODE_IB_CA;
+ ibdev->ib_dev.local_dma_lkey = dev->caps.reserved_lkey;
+ ibdev->num_ports = 0;
+ mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
+ ibdev->num_ports++;
+ ibdev->ib_dev.phys_port_cnt = ibdev->num_ports;
+ ibdev->ib_dev.num_comp_vectors = 1;
+ ibdev->ib_dev.dma_device = &dev->pdev->dev;
+
+ ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION;
+ ibdev->ib_dev.uverbs_cmd_mask =
+ (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
+ (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
+ (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
+ (1ull << IB_USER_VERBS_CMD_REG_MR) |
+ (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
+ (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
+ (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
+ (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) |
+ (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
+ (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ);
+
+ ibdev->ib_dev.query_device = mlx4_ib_query_device;
+ ibdev->ib_dev.query_port = mlx4_ib_query_port;
+ ibdev->ib_dev.query_gid = mlx4_ib_query_gid;
+ ibdev->ib_dev.query_pkey = mlx4_ib_query_pkey;
+ ibdev->ib_dev.modify_device = mlx4_ib_modify_device;
+ ibdev->ib_dev.modify_port = mlx4_ib_modify_port;
+ ibdev->ib_dev.alloc_ucontext = mlx4_ib_alloc_ucontext;
+ ibdev->ib_dev.dealloc_ucontext = mlx4_ib_dealloc_ucontext;
+ ibdev->ib_dev.mmap = mlx4_ib_mmap;
+ ibdev->ib_dev.alloc_pd = mlx4_ib_alloc_pd;
+ ibdev->ib_dev.dealloc_pd = mlx4_ib_dealloc_pd;
+ ibdev->ib_dev.create_ah = mlx4_ib_create_ah;
+ ibdev->ib_dev.query_ah = mlx4_ib_query_ah;
+ ibdev->ib_dev.destroy_ah = mlx4_ib_destroy_ah;
+ ibdev->ib_dev.create_srq = mlx4_ib_create_srq;
+ ibdev->ib_dev.modify_srq = mlx4_ib_modify_srq;
+ ibdev->ib_dev.query_srq = mlx4_ib_query_srq;
+ ibdev->ib_dev.destroy_srq = mlx4_ib_destroy_srq;
+ ibdev->ib_dev.post_srq_recv = mlx4_ib_post_srq_recv;
+ ibdev->ib_dev.create_qp = mlx4_ib_create_qp;
+ ibdev->ib_dev.modify_qp = mlx4_ib_modify_qp;
+ ibdev->ib_dev.query_qp = mlx4_ib_query_qp;
+ ibdev->ib_dev.destroy_qp = mlx4_ib_destroy_qp;
+ ibdev->ib_dev.post_send = mlx4_ib_post_send;
+ ibdev->ib_dev.post_recv = mlx4_ib_post_recv;
+ ibdev->ib_dev.create_cq = mlx4_ib_create_cq;
+ ibdev->ib_dev.modify_cq = mlx4_ib_modify_cq;
+ ibdev->ib_dev.resize_cq = mlx4_ib_resize_cq;
+ ibdev->ib_dev.destroy_cq = mlx4_ib_destroy_cq;
+ ibdev->ib_dev.poll_cq = mlx4_ib_poll_cq;
+ ibdev->ib_dev.req_notify_cq = mlx4_ib_arm_cq;
+ ibdev->ib_dev.get_dma_mr = mlx4_ib_get_dma_mr;
+ ibdev->ib_dev.reg_user_mr = mlx4_ib_reg_user_mr;
+ ibdev->ib_dev.dereg_mr = mlx4_ib_dereg_mr;
+ ibdev->ib_dev.alloc_fast_reg_mr = mlx4_ib_alloc_fast_reg_mr;
+ ibdev->ib_dev.alloc_fast_reg_page_list = mlx4_ib_alloc_fast_reg_page_list;
+ ibdev->ib_dev.free_fast_reg_page_list = mlx4_ib_free_fast_reg_page_list;
+ ibdev->ib_dev.attach_mcast = mlx4_ib_mcg_attach;
+ ibdev->ib_dev.detach_mcast = mlx4_ib_mcg_detach;
+ ibdev->ib_dev.process_mad = mlx4_ib_process_mad;
+
+ ibdev->ib_dev.alloc_fmr = mlx4_ib_fmr_alloc;
+ ibdev->ib_dev.map_phys_fmr = mlx4_ib_map_phys_fmr;
+ ibdev->ib_dev.unmap_fmr = mlx4_ib_unmap_fmr;
+ ibdev->ib_dev.dealloc_fmr = mlx4_ib_fmr_dealloc;
+
+ if (init_node_data(ibdev))
+ goto err_map;
+
+ spin_lock_init(&ibdev->sm_lock);
+ mutex_init(&ibdev->cap_mask_mutex);
+
+ if (ib_register_device(&ibdev->ib_dev))
+ goto err_map;
+
+ if (mlx4_ib_mad_init(ibdev))
+ goto err_reg;
+
+ for (i = 0; i < ARRAY_SIZE(mlx4_class_attributes); ++i) {
+ if (device_create_file(&ibdev->ib_dev.dev,
+ mlx4_class_attributes[i]))
+ goto err_reg;
+ }
+
+ return ibdev;
+
+err_reg:
+ ib_unregister_device(&ibdev->ib_dev);
+
+err_map:
+ iounmap(ibdev->uar_map);
+
+err_uar:
+ mlx4_uar_free(dev, &ibdev->priv_uar);
+
+err_pd:
+ mlx4_pd_free(dev, ibdev->priv_pdn);
+
+err_dealloc:
+ ib_dealloc_device(&ibdev->ib_dev);
+
+ return NULL;
+}
+
+static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
+{
+ struct mlx4_ib_dev *ibdev = ibdev_ptr;
+ int p;
+
+ for (p = 1; p <= ibdev->num_ports; ++p)
+ mlx4_CLOSE_PORT(dev, p);
+
+ mlx4_ib_mad_cleanup(ibdev);
+ ib_unregister_device(&ibdev->ib_dev);
+ iounmap(ibdev->uar_map);
+ mlx4_uar_free(dev, &ibdev->priv_uar);
+ mlx4_pd_free(dev, ibdev->priv_pdn);
+ ib_dealloc_device(&ibdev->ib_dev);
+}
+
+static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
+ enum mlx4_dev_event event, int port)
+{
+ struct ib_event ibev;
+ struct mlx4_ib_dev *ibdev = to_mdev((struct ib_device *) ibdev_ptr);
+
+ if (port > ibdev->num_ports)
+ return;
+
+ switch (event) {
+ case MLX4_DEV_EVENT_PORT_UP:
+ ibev.event = IB_EVENT_PORT_ACTIVE;
+ break;
+
+ case MLX4_DEV_EVENT_PORT_DOWN:
+ ibev.event = IB_EVENT_PORT_ERR;
+ break;
+
+ case MLX4_DEV_EVENT_CATASTROPHIC_ERROR:
+ ibev.event = IB_EVENT_DEVICE_FATAL;
+ break;
+
+ default:
+ return;
+ }
+
+ ibev.device = ibdev_ptr;
+ ibev.element.port_num = port;
+
+ ib_dispatch_event(&ibev);
+}
+
+static struct mlx4_interface mlx4_ib_interface = {
+ .add = mlx4_ib_add,
+ .remove = mlx4_ib_remove,
+ .event = mlx4_ib_event
+};
+
+static int __init mlx4_ib_init(void)
+{
+ return mlx4_register_interface(&mlx4_ib_interface);
+}
+
+static void __exit mlx4_ib_cleanup(void)
+{
+ mlx4_unregister_interface(&mlx4_ib_interface);
+}
+
+module_init(mlx4_ib_init);
+module_exit(mlx4_ib_cleanup);
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
new file mode 100644
index 0000000..9974e88
--- /dev/null
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -0,0 +1,320 @@
+/*
+ * Copyright (c) 2006, 2007 Cisco Systems. All rights reserved.
+ * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef MLX4_IB_H
+#define MLX4_IB_H
+
+#include <linux/compiler.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_umem.h>
+
+#include <linux/mlx4/device.h>
+#include <linux/mlx4/doorbell.h>
+
+struct mlx4_ib_ucontext {
+ struct ib_ucontext ibucontext;
+ struct mlx4_uar uar;
+ struct list_head db_page_list;
+ struct mutex db_page_mutex;
+};
+
+struct mlx4_ib_pd {
+ struct ib_pd ibpd;
+ u32 pdn;
+};
+
+struct mlx4_ib_cq_buf {
+ struct mlx4_buf buf;
+ struct mlx4_mtt mtt;
+};
+
+struct mlx4_ib_cq_resize {
+ struct mlx4_ib_cq_buf buf;
+ int cqe;
+};
+
+struct mlx4_ib_cq {
+ struct ib_cq ibcq;
+ struct mlx4_cq mcq;
+ struct mlx4_ib_cq_buf buf;
+ struct mlx4_ib_cq_resize *resize_buf;
+ struct mlx4_db db;
+ spinlock_t lock;
+ struct mutex resize_mutex;
+ struct ib_umem *umem;
+ struct ib_umem *resize_umem;
+};
+
+struct mlx4_ib_mr {
+ struct ib_mr ibmr;
+ struct mlx4_mr mmr;
+ struct ib_umem *umem;
+};
+
+struct mlx4_ib_fast_reg_page_list {
+ struct ib_fast_reg_page_list ibfrpl;
+ dma_addr_t map;
+};
+
+struct mlx4_ib_fmr {
+ struct ib_fmr ibfmr;
+ struct mlx4_fmr mfmr;
+};
+
+struct mlx4_ib_wq {
+ u64 *wrid;
+ spinlock_t lock;
+ int wqe_cnt;
+ int max_post;
+ int max_gs;
+ int offset;
+ int wqe_shift;
+ unsigned head;
+ unsigned tail;
+};
+
+enum mlx4_ib_qp_flags {
+ MLX4_IB_QP_LSO = 1 << 0,
+ MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = 1 << 1,
+};
+
+struct mlx4_ib_qp {
+ struct ib_qp ibqp;
+ struct mlx4_qp mqp;
+ struct mlx4_buf buf;
+
+ struct mlx4_db db;
+ struct mlx4_ib_wq rq;
+
+ u32 doorbell_qpn;
+ __be32 sq_signal_bits;
+ unsigned sq_next_wqe;
+ int sq_max_wqes_per_wr;
+ int sq_spare_wqes;
+ struct mlx4_ib_wq sq;
+
+ struct ib_umem *umem;
+ struct mlx4_mtt mtt;
+ int buf_size;
+ struct mutex mutex;
+ u32 flags;
+ u8 port;
+ u8 alt_port;
+ u8 atomic_rd_en;
+ u8 resp_depth;
+ u8 sq_no_prefetch;
+ u8 state;
+};
+
+struct mlx4_ib_srq {
+ struct ib_srq ibsrq;
+ struct mlx4_srq msrq;
+ struct mlx4_buf buf;
+ struct mlx4_db db;
+ u64 *wrid;
+ spinlock_t lock;
+ int head;
+ int tail;
+ u16 wqe_ctr;
+ struct ib_umem *umem;
+ struct mlx4_mtt mtt;
+ struct mutex mutex;
+};
+
+struct mlx4_ib_ah {
+ struct ib_ah ibah;
+ struct mlx4_av av;
+};
+
+struct mlx4_ib_dev {
+ struct ib_device ib_dev;
+ struct mlx4_dev *dev;
+ int num_ports;
+ void __iomem *uar_map;
+
+ struct mlx4_uar priv_uar;
+ u32 priv_pdn;
+ MLX4_DECLARE_DOORBELL_LOCK(uar_lock);
+
+ struct ib_mad_agent *send_agent[MLX4_MAX_PORTS][2];
+ struct ib_ah *sm_ah[MLX4_MAX_PORTS];
+ spinlock_t sm_lock;
+
+ struct mutex cap_mask_mutex;
+};
+
+static inline struct mlx4_ib_dev *to_mdev(struct ib_device *ibdev)
+{
+ return container_of(ibdev, struct mlx4_ib_dev, ib_dev);
+}
+
+static inline struct mlx4_ib_ucontext *to_mucontext(struct ib_ucontext *ibucontext)
+{
+ return container_of(ibucontext, struct mlx4_ib_ucontext, ibucontext);
+}
+
+static inline struct mlx4_ib_pd *to_mpd(struct ib_pd *ibpd)
+{
+ return container_of(ibpd, struct mlx4_ib_pd, ibpd);
+}
+
+static inline struct mlx4_ib_cq *to_mcq(struct ib_cq *ibcq)
+{
+ return container_of(ibcq, struct mlx4_ib_cq, ibcq);
+}
+
+static inline struct mlx4_ib_cq *to_mibcq(struct mlx4_cq *mcq)
+{
+ return container_of(mcq, struct mlx4_ib_cq, mcq);
+}
+
+static inline struct mlx4_ib_mr *to_mmr(struct ib_mr *ibmr)
+{
+ return container_of(ibmr, struct mlx4_ib_mr, ibmr);
+}
+
+static inline struct mlx4_ib_fast_reg_page_list *to_mfrpl(struct ib_fast_reg_page_list *ibfrpl)
+{
+ return container_of(ibfrpl, struct mlx4_ib_fast_reg_page_list, ibfrpl);
+}
+
+static inline struct mlx4_ib_fmr *to_mfmr(struct ib_fmr *ibfmr)
+{
+ return container_of(ibfmr, struct mlx4_ib_fmr, ibfmr);
+}
+static inline struct mlx4_ib_qp *to_mqp(struct ib_qp *ibqp)
+{
+ return container_of(ibqp, struct mlx4_ib_qp, ibqp);
+}
+
+static inline struct mlx4_ib_qp *to_mibqp(struct mlx4_qp *mqp)
+{
+ return container_of(mqp, struct mlx4_ib_qp, mqp);
+}
+
+static inline struct mlx4_ib_srq *to_msrq(struct ib_srq *ibsrq)
+{
+ return container_of(ibsrq, struct mlx4_ib_srq, ibsrq);
+}
+
+static inline struct mlx4_ib_srq *to_mibsrq(struct mlx4_srq *msrq)
+{
+ return container_of(msrq, struct mlx4_ib_srq, msrq);
+}
+
+static inline struct mlx4_ib_ah *to_mah(struct ib_ah *ibah)
+{
+ return container_of(ibah, struct mlx4_ib_ah, ibah);
+}
+
+int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt,
+ struct mlx4_db *db);
+void mlx4_ib_db_unmap_user(struct mlx4_ib_ucontext *context, struct mlx4_db *db);
+
+struct ib_mr *mlx4_ib_get_dma_mr(struct ib_pd *pd, int acc);
+int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt,
+ struct ib_umem *umem);
+struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 virt_addr, int access_flags,
+ struct ib_udata *udata);
+int mlx4_ib_dereg_mr(struct ib_mr *mr);
+struct ib_mr *mlx4_ib_alloc_fast_reg_mr(struct ib_pd *pd,
+ int max_page_list_len);
+struct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device *ibdev,
+ int page_list_len);
+void mlx4_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list);
+
+int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period);
+int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata);
+struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector,
+ struct ib_ucontext *context,
+ struct ib_udata *udata);
+int mlx4_ib_destroy_cq(struct ib_cq *cq);
+int mlx4_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
+int mlx4_ib_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags);
+void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq);
+void mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq);
+
+struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr);
+int mlx4_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr);
+int mlx4_ib_destroy_ah(struct ib_ah *ah);
+
+struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
+ struct ib_srq_init_attr *init_attr,
+ struct ib_udata *udata);
+int mlx4_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
+ enum ib_srq_attr_mask attr_mask, struct ib_udata *udata);
+int mlx4_ib_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
+int mlx4_ib_destroy_srq(struct ib_srq *srq);
+void mlx4_ib_free_srq_wqe(struct mlx4_ib_srq *srq, int wqe_index);
+int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
+ struct ib_recv_wr **bad_wr);
+
+struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata);
+int mlx4_ib_destroy_qp(struct ib_qp *qp);
+int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata);
+int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,
+ struct ib_qp_init_attr *qp_init_attr);
+int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
+ struct ib_send_wr **bad_wr);
+int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
+ struct ib_recv_wr **bad_wr);
+
+int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int ignore_mkey, int ignore_bkey,
+ int port, struct ib_wc *in_wc, struct ib_grh *in_grh,
+ void *in_mad, void *response_mad);
+int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
+ struct ib_wc *in_wc, struct ib_grh *in_grh,
+ struct ib_mad *in_mad, struct ib_mad *out_mad);
+int mlx4_ib_mad_init(struct mlx4_ib_dev *dev);
+void mlx4_ib_mad_cleanup(struct mlx4_ib_dev *dev);
+
+struct ib_fmr *mlx4_ib_fmr_alloc(struct ib_pd *pd, int mr_access_flags,
+ struct ib_fmr_attr *fmr_attr);
+int mlx4_ib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, int npages,
+ u64 iova);
+int mlx4_ib_unmap_fmr(struct list_head *fmr_list);
+int mlx4_ib_fmr_dealloc(struct ib_fmr *fmr);
+
+static inline int mlx4_ib_ah_grh_present(struct mlx4_ib_ah *ah)
+{
+ return !!(ah->av.g_slid & 0x80);
+}
+
+#endif /* MLX4_IB_H */
diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
new file mode 100644
index 0000000..8e4d26d
--- /dev/null
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -0,0 +1,351 @@
+/*
+ * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
+ * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "mlx4_ib.h"
+
+static u32 convert_access(int acc)
+{
+ return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX4_PERM_ATOMIC : 0) |
+ (acc & IB_ACCESS_REMOTE_WRITE ? MLX4_PERM_REMOTE_WRITE : 0) |
+ (acc & IB_ACCESS_REMOTE_READ ? MLX4_PERM_REMOTE_READ : 0) |
+ (acc & IB_ACCESS_LOCAL_WRITE ? MLX4_PERM_LOCAL_WRITE : 0) |
+ MLX4_PERM_LOCAL_READ;
+}
+
+struct ib_mr *mlx4_ib_get_dma_mr(struct ib_pd *pd, int acc)
+{
+ struct mlx4_ib_mr *mr;
+ int err;
+
+ mr = kmalloc(sizeof *mr, GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ err = mlx4_mr_alloc(to_mdev(pd->device)->dev, to_mpd(pd)->pdn, 0,
+ ~0ull, convert_access(acc), 0, 0, &mr->mmr);
+ if (err)
+ goto err_free;
+
+ err = mlx4_mr_enable(to_mdev(pd->device)->dev, &mr->mmr);
+ if (err)
+ goto err_mr;
+
+ mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key;
+ mr->umem = NULL;
+
+ return &mr->ibmr;
+
+err_mr:
+ mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr);
+
+err_free:
+ kfree(mr);
+
+ return ERR_PTR(err);
+}
+
+int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt,
+ struct ib_umem *umem)
+{
+ u64 *pages;
+ struct ib_umem_chunk *chunk;
+ int i, j, k;
+ int n;
+ int len;
+ int err = 0;
+
+ pages = (u64 *) __get_free_page(GFP_KERNEL);
+ if (!pages)
+ return -ENOMEM;
+
+ i = n = 0;
+
+ list_for_each_entry(chunk, &umem->chunk_list, list)
+ for (j = 0; j < chunk->nmap; ++j) {
+ len = sg_dma_len(&chunk->page_list[j]) >> mtt->page_shift;
+ for (k = 0; k < len; ++k) {
+ pages[i++] = sg_dma_address(&chunk->page_list[j]) +
+ umem->page_size * k;
+ /*
+ * Be friendly to mlx4_write_mtt() and
+ * pass it chunks of appropriate size.
+ */
+ if (i == PAGE_SIZE / sizeof (u64)) {
+ err = mlx4_write_mtt(dev->dev, mtt, n,
+ i, pages);
+ if (err)
+ goto out;
+ n += i;
+ i = 0;
+ }
+ }
+ }
+
+ if (i)
+ err = mlx4_write_mtt(dev->dev, mtt, n, i, pages);
+
+out:
+ free_page((unsigned long) pages);
+ return err;
+}
+
+struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 virt_addr, int access_flags,
+ struct ib_udata *udata)
+{
+ struct mlx4_ib_dev *dev = to_mdev(pd->device);
+ struct mlx4_ib_mr *mr;
+ int shift;
+ int err;
+ int n;
+
+ mr = kmalloc(sizeof *mr, GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ mr->umem = ib_umem_get(pd->uobject->context, start, length,
+ access_flags, 0);
+ if (IS_ERR(mr->umem)) {
+ err = PTR_ERR(mr->umem);
+ goto err_free;
+ }
+
+ n = ib_umem_page_count(mr->umem);
+ shift = ilog2(mr->umem->page_size);
+
+ err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, virt_addr, length,
+ convert_access(access_flags), n, shift, &mr->mmr);
+ if (err)
+ goto err_umem;
+
+ err = mlx4_ib_umem_write_mtt(dev, &mr->mmr.mtt, mr->umem);
+ if (err)
+ goto err_mr;
+
+ err = mlx4_mr_enable(dev->dev, &mr->mmr);
+ if (err)
+ goto err_mr;
+
+ mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key;
+
+ return &mr->ibmr;
+
+err_mr:
+ mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr);
+
+err_umem:
+ ib_umem_release(mr->umem);
+
+err_free:
+ kfree(mr);
+
+ return ERR_PTR(err);
+}
+
+int mlx4_ib_dereg_mr(struct ib_mr *ibmr)
+{
+ struct mlx4_ib_mr *mr = to_mmr(ibmr);
+
+ mlx4_mr_free(to_mdev(ibmr->device)->dev, &mr->mmr);
+ if (mr->umem)
+ ib_umem_release(mr->umem);
+ kfree(mr);
+
+ return 0;
+}
+
+struct ib_mr *mlx4_ib_alloc_fast_reg_mr(struct ib_pd *pd,
+ int max_page_list_len)
+{
+ struct mlx4_ib_dev *dev = to_mdev(pd->device);
+ struct mlx4_ib_mr *mr;
+ int err;
+
+ mr = kmalloc(sizeof *mr, GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, 0, 0, 0,
+ max_page_list_len, 0, &mr->mmr);
+ if (err)
+ goto err_free;
+
+ err = mlx4_mr_enable(dev->dev, &mr->mmr);
+ if (err)
+ goto err_mr;
+
+ mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key;
+ mr->umem = NULL;
+
+ return &mr->ibmr;
+
+err_mr:
+ mlx4_mr_free(dev->dev, &mr->mmr);
+
+err_free:
+ kfree(mr);
+ return ERR_PTR(err);
+}
+
+struct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device *ibdev,
+ int page_list_len)
+{
+ struct mlx4_ib_dev *dev = to_mdev(ibdev);
+ struct mlx4_ib_fast_reg_page_list *mfrpl;
+ int size = page_list_len * sizeof (u64);
+
+ if (size > PAGE_SIZE)
+ return ERR_PTR(-EINVAL);
+
+ mfrpl = kmalloc(sizeof *mfrpl, GFP_KERNEL);
+ if (!mfrpl)
+ return ERR_PTR(-ENOMEM);
+
+ mfrpl->ibfrpl.page_list = dma_alloc_coherent(&dev->dev->pdev->dev,
+ size, &mfrpl->map,
+ GFP_KERNEL);
+ if (!mfrpl->ibfrpl.page_list)
+ goto err_free;
+
+ WARN_ON(mfrpl->map & 0x3f);
+
+ return &mfrpl->ibfrpl;
+
+err_free:
+ kfree(mfrpl);
+ return ERR_PTR(-ENOMEM);
+}
+
+void mlx4_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list)
+{
+ struct mlx4_ib_dev *dev = to_mdev(page_list->device);
+ struct mlx4_ib_fast_reg_page_list *mfrpl = to_mfrpl(page_list);
+ int size = page_list->max_page_list_len * sizeof (u64);
+
+ dma_free_coherent(&dev->dev->pdev->dev, size, page_list->page_list,
+ mfrpl->map);
+ kfree(mfrpl);
+}
+
+struct ib_fmr *mlx4_ib_fmr_alloc(struct ib_pd *pd, int acc,
+ struct ib_fmr_attr *fmr_attr)
+{
+ struct mlx4_ib_dev *dev = to_mdev(pd->device);
+ struct mlx4_ib_fmr *fmr;
+ int err = -ENOMEM;
+
+ fmr = kmalloc(sizeof *fmr, GFP_KERNEL);
+ if (!fmr)
+ return ERR_PTR(-ENOMEM);
+
+ err = mlx4_fmr_alloc(dev->dev, to_mpd(pd)->pdn, convert_access(acc),
+ fmr_attr->max_pages, fmr_attr->max_maps,
+ fmr_attr->page_shift, &fmr->mfmr);
+ if (err)
+ goto err_free;
+
+ err = mlx4_fmr_enable(to_mdev(pd->device)->dev, &fmr->mfmr);
+ if (err)
+ goto err_mr;
+
+ fmr->ibfmr.rkey = fmr->ibfmr.lkey = fmr->mfmr.mr.key;
+
+ return &fmr->ibfmr;
+
+err_mr:
+ mlx4_mr_free(to_mdev(pd->device)->dev, &fmr->mfmr.mr);
+
+err_free:
+ kfree(fmr);
+
+ return ERR_PTR(err);
+}
+
+int mlx4_ib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
+ int npages, u64 iova)
+{
+ struct mlx4_ib_fmr *ifmr = to_mfmr(ibfmr);
+ struct mlx4_ib_dev *dev = to_mdev(ifmr->ibfmr.device);
+
+ return mlx4_map_phys_fmr(dev->dev, &ifmr->mfmr, page_list, npages, iova,
+ &ifmr->ibfmr.lkey, &ifmr->ibfmr.rkey);
+}
+
+int mlx4_ib_unmap_fmr(struct list_head *fmr_list)
+{
+ struct ib_fmr *ibfmr;
+ int err;
+ struct mlx4_dev *mdev = NULL;
+
+ list_for_each_entry(ibfmr, fmr_list, list) {
+ if (mdev && to_mdev(ibfmr->device)->dev != mdev)
+ return -EINVAL;
+ mdev = to_mdev(ibfmr->device)->dev;
+ }
+
+ if (!mdev)
+ return 0;
+
+ list_for_each_entry(ibfmr, fmr_list, list) {
+ struct mlx4_ib_fmr *ifmr = to_mfmr(ibfmr);
+
+ mlx4_fmr_unmap(mdev, &ifmr->mfmr, &ifmr->ibfmr.lkey, &ifmr->ibfmr.rkey);
+ }
+
+ /*
+ * Make sure all MPT status updates are visible before issuing
+ * SYNC_TPT firmware command.
+ */
+ wmb();
+
+ err = mlx4_SYNC_TPT(mdev);
+ if (err)
+ printk(KERN_WARNING "mlx4_ib: SYNC_TPT error %d when "
+ "unmapping FMRs\n", err);
+
+ return 0;
+}
+
+int mlx4_ib_fmr_dealloc(struct ib_fmr *ibfmr)
+{
+ struct mlx4_ib_fmr *ifmr = to_mfmr(ibfmr);
+ struct mlx4_ib_dev *dev = to_mdev(ibfmr->device);
+ int err;
+
+ err = mlx4_fmr_free(dev->dev, &ifmr->mfmr);
+
+ if (!err)
+ kfree(ifmr);
+
+ return err;
+}
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
new file mode 100644
index 0000000..39167a7
--- /dev/null
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -0,0 +1,1944 @@
+/*
+ * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
+ * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/log2.h>
+
+#include <rdma/ib_cache.h>
+#include <rdma/ib_pack.h>
+
+#include <linux/mlx4/qp.h>
+
+#include "mlx4_ib.h"
+#include "user.h"
+
+enum {
+ MLX4_IB_ACK_REQ_FREQ = 8,
+};
+
+enum {
+ MLX4_IB_DEFAULT_SCHED_QUEUE = 0x83,
+ MLX4_IB_DEFAULT_QP0_SCHED_QUEUE = 0x3f
+};
+
+enum {
+ /*
+ * Largest possible UD header: send with GRH and immediate data.
+ */
+ MLX4_IB_UD_HEADER_SIZE = 72
+};
+
+struct mlx4_ib_sqp {
+ struct mlx4_ib_qp qp;
+ int pkey_index;
+ u32 qkey;
+ u32 send_psn;
+ struct ib_ud_header ud_header;
+ u8 header_buf[MLX4_IB_UD_HEADER_SIZE];
+};
+
+enum {
+ MLX4_IB_MIN_SQ_STRIDE = 6
+};
+
+static const __be32 mlx4_ib_opcode[] = {
+ [IB_WR_SEND] = __constant_cpu_to_be32(MLX4_OPCODE_SEND),
+ [IB_WR_LSO] = __constant_cpu_to_be32(MLX4_OPCODE_LSO),
+ [IB_WR_SEND_WITH_IMM] = __constant_cpu_to_be32(MLX4_OPCODE_SEND_IMM),
+ [IB_WR_RDMA_WRITE] = __constant_cpu_to_be32(MLX4_OPCODE_RDMA_WRITE),
+ [IB_WR_RDMA_WRITE_WITH_IMM] = __constant_cpu_to_be32(MLX4_OPCODE_RDMA_WRITE_IMM),
+ [IB_WR_RDMA_READ] = __constant_cpu_to_be32(MLX4_OPCODE_RDMA_READ),
+ [IB_WR_ATOMIC_CMP_AND_SWP] = __constant_cpu_to_be32(MLX4_OPCODE_ATOMIC_CS),
+ [IB_WR_ATOMIC_FETCH_AND_ADD] = __constant_cpu_to_be32(MLX4_OPCODE_ATOMIC_FA),
+ [IB_WR_SEND_WITH_INV] = __constant_cpu_to_be32(MLX4_OPCODE_SEND_INVAL),
+ [IB_WR_LOCAL_INV] = __constant_cpu_to_be32(MLX4_OPCODE_LOCAL_INVAL),
+ [IB_WR_FAST_REG_MR] = __constant_cpu_to_be32(MLX4_OPCODE_FMR),
+};
+
+static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp)
+{
+ return container_of(mqp, struct mlx4_ib_sqp, qp);
+}
+
+static int is_sqp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
+{
+ return qp->mqp.qpn >= dev->dev->caps.sqp_start &&
+ qp->mqp.qpn <= dev->dev->caps.sqp_start + 3;
+}
+
+static int is_qp0(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
+{
+ return qp->mqp.qpn >= dev->dev->caps.sqp_start &&
+ qp->mqp.qpn <= dev->dev->caps.sqp_start + 1;
+}
+
+static void *get_wqe(struct mlx4_ib_qp *qp, int offset)
+{
+ return mlx4_buf_offset(&qp->buf, offset);
+}
+
+static void *get_recv_wqe(struct mlx4_ib_qp *qp, int n)
+{
+ return get_wqe(qp, qp->rq.offset + (n << qp->rq.wqe_shift));
+}
+
+static void *get_send_wqe(struct mlx4_ib_qp *qp, int n)
+{
+ return get_wqe(qp, qp->sq.offset + (n << qp->sq.wqe_shift));
+}
+
+/*
+ * Stamp a SQ WQE so that it is invalid if prefetched by marking the
+ * first four bytes of every 64 byte chunk with
+ * 0x7FFFFFF | (invalid_ownership_value << 31).
+ *
+ * When the max work request size is less than or equal to the WQE
+ * basic block size, as an optimization, we can stamp all WQEs with
+ * 0xffffffff, and skip the very first chunk of each WQE.
+ */
+static void stamp_send_wqe(struct mlx4_ib_qp *qp, int n, int size)
+{
+ __be32 *wqe;
+ int i;
+ int s;
+ int ind;
+ void *buf;
+ __be32 stamp;
+ struct mlx4_wqe_ctrl_seg *ctrl;
+
+ if (qp->sq_max_wqes_per_wr > 1) {
+ s = roundup(size, 1U << qp->sq.wqe_shift);
+ for (i = 0; i < s; i += 64) {
+ ind = (i >> qp->sq.wqe_shift) + n;
+ stamp = ind & qp->sq.wqe_cnt ? cpu_to_be32(0x7fffffff) :
+ cpu_to_be32(0xffffffff);
+ buf = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1));
+ wqe = buf + (i & ((1 << qp->sq.wqe_shift) - 1));
+ *wqe = stamp;
+ }
+ } else {
+ ctrl = buf = get_send_wqe(qp, n & (qp->sq.wqe_cnt - 1));
+ s = (ctrl->fence_size & 0x3f) << 4;
+ for (i = 64; i < s; i += 64) {
+ wqe = buf + i;
+ *wqe = cpu_to_be32(0xffffffff);
+ }
+ }
+}
+
+static void post_nop_wqe(struct mlx4_ib_qp *qp, int n, int size)
+{
+ struct mlx4_wqe_ctrl_seg *ctrl;
+ struct mlx4_wqe_inline_seg *inl;
+ void *wqe;
+ int s;
+
+ ctrl = wqe = get_send_wqe(qp, n & (qp->sq.wqe_cnt - 1));
+ s = sizeof(struct mlx4_wqe_ctrl_seg);
+
+ if (qp->ibqp.qp_type == IB_QPT_UD) {
+ struct mlx4_wqe_datagram_seg *dgram = wqe + sizeof *ctrl;
+ struct mlx4_av *av = (struct mlx4_av *)dgram->av;
+ memset(dgram, 0, sizeof *dgram);
+ av->port_pd = cpu_to_be32((qp->port << 24) | to_mpd(qp->ibqp.pd)->pdn);
+ s += sizeof(struct mlx4_wqe_datagram_seg);
+ }
+
+ /* Pad the remainder of the WQE with an inline data segment. */
+ if (size > s) {
+ inl = wqe + s;
+ inl->byte_count = cpu_to_be32(1 << 31 | (size - s - sizeof *inl));
+ }
+ ctrl->srcrb_flags = 0;
+ ctrl->fence_size = size / 16;
+ /*
+ * Make sure descriptor is fully written before setting ownership bit
+ * (because HW can start executing as soon as we do).
+ */
+ wmb();
+
+ ctrl->owner_opcode = cpu_to_be32(MLX4_OPCODE_NOP | MLX4_WQE_CTRL_NEC) |
+ (n & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0);
+
+ stamp_send_wqe(qp, n + qp->sq_spare_wqes, size);
+}
+
+/* Post NOP WQE to prevent wrap-around in the middle of WR */
+static inline unsigned pad_wraparound(struct mlx4_ib_qp *qp, int ind)
+{
+ unsigned s = qp->sq.wqe_cnt - (ind & (qp->sq.wqe_cnt - 1));
+ if (unlikely(s < qp->sq_max_wqes_per_wr)) {
+ post_nop_wqe(qp, ind, s << qp->sq.wqe_shift);
+ ind += s;
+ }
+ return ind;
+}
+
+static void mlx4_ib_qp_event(struct mlx4_qp *qp, enum mlx4_event type)
+{
+ struct ib_event event;
+ struct ib_qp *ibqp = &to_mibqp(qp)->ibqp;
+
+ if (type == MLX4_EVENT_TYPE_PATH_MIG)
+ to_mibqp(qp)->port = to_mibqp(qp)->alt_port;
+
+ if (ibqp->event_handler) {
+ event.device = ibqp->device;
+ event.element.qp = ibqp;
+ switch (type) {
+ case MLX4_EVENT_TYPE_PATH_MIG:
+ event.event = IB_EVENT_PATH_MIG;
+ break;
+ case MLX4_EVENT_TYPE_COMM_EST:
+ event.event = IB_EVENT_COMM_EST;
+ break;
+ case MLX4_EVENT_TYPE_SQ_DRAINED:
+ event.event = IB_EVENT_SQ_DRAINED;
+ break;
+ case MLX4_EVENT_TYPE_SRQ_QP_LAST_WQE:
+ event.event = IB_EVENT_QP_LAST_WQE_REACHED;
+ break;
+ case MLX4_EVENT_TYPE_WQ_CATAS_ERROR:
+ event.event = IB_EVENT_QP_FATAL;
+ break;
+ case MLX4_EVENT_TYPE_PATH_MIG_FAILED:
+ event.event = IB_EVENT_PATH_MIG_ERR;
+ break;
+ case MLX4_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
+ event.event = IB_EVENT_QP_REQ_ERR;
+ break;
+ case MLX4_EVENT_TYPE_WQ_ACCESS_ERROR:
+ event.event = IB_EVENT_QP_ACCESS_ERR;
+ break;
+ default:
+ printk(KERN_WARNING "mlx4_ib: Unexpected event type %d "
+ "on QP %06x\n", type, qp->qpn);
+ return;
+ }
+
+ ibqp->event_handler(&event, ibqp->qp_context);
+ }
+}
+
+static int send_wqe_overhead(enum ib_qp_type type, u32 flags)
+{
+ /*
+ * UD WQEs must have a datagram segment.
+ * RC and UC WQEs might have a remote address segment.
+ * MLX WQEs need two extra inline data segments (for the UD
+ * header and space for the ICRC).
+ */
+ switch (type) {
+ case IB_QPT_UD:
+ return sizeof (struct mlx4_wqe_ctrl_seg) +
+ sizeof (struct mlx4_wqe_datagram_seg) +
+ ((flags & MLX4_IB_QP_LSO) ? 64 : 0);
+ case IB_QPT_UC:
+ return sizeof (struct mlx4_wqe_ctrl_seg) +
+ sizeof (struct mlx4_wqe_raddr_seg);
+ case IB_QPT_RC:
+ return sizeof (struct mlx4_wqe_ctrl_seg) +
+ sizeof (struct mlx4_wqe_atomic_seg) +
+ sizeof (struct mlx4_wqe_raddr_seg);
+ case IB_QPT_SMI:
+ case IB_QPT_GSI:
+ return sizeof (struct mlx4_wqe_ctrl_seg) +
+ ALIGN(MLX4_IB_UD_HEADER_SIZE +
+ DIV_ROUND_UP(MLX4_IB_UD_HEADER_SIZE,
+ MLX4_INLINE_ALIGN) *
+ sizeof (struct mlx4_wqe_inline_seg),
+ sizeof (struct mlx4_wqe_data_seg)) +
+ ALIGN(4 +
+ sizeof (struct mlx4_wqe_inline_seg),
+ sizeof (struct mlx4_wqe_data_seg));
+ default:
+ return sizeof (struct mlx4_wqe_ctrl_seg);
+ }
+}
+
+static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
+ int is_user, int has_srq, struct mlx4_ib_qp *qp)
+{
+ /* Sanity check RQ size before proceeding */
+ if (cap->max_recv_wr > dev->dev->caps.max_wqes ||
+ cap->max_recv_sge > dev->dev->caps.max_rq_sg)
+ return -EINVAL;
+
+ if (has_srq) {
+ /* QPs attached to an SRQ should have no RQ */
+ if (cap->max_recv_wr)
+ return -EINVAL;
+
+ qp->rq.wqe_cnt = qp->rq.max_gs = 0;
+ } else {
+ /* HW requires >= 1 RQ entry with >= 1 gather entry */
+ if (is_user && (!cap->max_recv_wr || !cap->max_recv_sge))
+ return -EINVAL;
+
+ qp->rq.wqe_cnt = roundup_pow_of_two(max(1U, cap->max_recv_wr));
+ qp->rq.max_gs = roundup_pow_of_two(max(1U, cap->max_recv_sge));
+ qp->rq.wqe_shift = ilog2(qp->rq.max_gs * sizeof (struct mlx4_wqe_data_seg));
+ }
+
+ cap->max_recv_wr = qp->rq.max_post = qp->rq.wqe_cnt;
+ cap->max_recv_sge = qp->rq.max_gs;
+
+ return 0;
+}
+
+static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
+ enum ib_qp_type type, struct mlx4_ib_qp *qp)
+{
+ int s;
+
+ /* Sanity check SQ size before proceeding */
+ if (cap->max_send_wr > dev->dev->caps.max_wqes ||
+ cap->max_send_sge > dev->dev->caps.max_sq_sg ||
+ cap->max_inline_data + send_wqe_overhead(type, qp->flags) +
+ sizeof (struct mlx4_wqe_inline_seg) > dev->dev->caps.max_sq_desc_sz)
+ return -EINVAL;
+
+ /*
+ * For MLX transport we need 2 extra S/G entries:
+ * one for the header and one for the checksum at the end
+ */
+ if ((type == IB_QPT_SMI || type == IB_QPT_GSI) &&
+ cap->max_send_sge + 2 > dev->dev->caps.max_sq_sg)
+ return -EINVAL;
+
+ s = max(cap->max_send_sge * sizeof (struct mlx4_wqe_data_seg),
+ cap->max_inline_data + sizeof (struct mlx4_wqe_inline_seg)) +
+ send_wqe_overhead(type, qp->flags);
+
+ if (s > dev->dev->caps.max_sq_desc_sz)
+ return -EINVAL;
+
+ /*
+ * Hermon supports shrinking WQEs, such that a single work
+ * request can include multiple units of 1 << wqe_shift. This
+ * way, work requests can differ in size, and do not have to
+ * be a power of 2 in size, saving memory and speeding up send
+ * WR posting. Unfortunately, if we do this then the
+ * wqe_index field in CQEs can't be used to look up the WR ID
+ * anymore, so we do this only if selective signaling is off.
+ *
+ * Further, on 32-bit platforms, we can't use vmap() to make
+ * the QP buffer virtually contigious. Thus we have to use
+ * constant-sized WRs to make sure a WR is always fully within
+ * a single page-sized chunk.
+ *
+ * Finally, we use NOP work requests to pad the end of the
+ * work queue, to avoid wrap-around in the middle of WR. We
+ * set NEC bit to avoid getting completions with error for
+ * these NOP WRs, but since NEC is only supported starting
+ * with firmware 2.2.232, we use constant-sized WRs for older
+ * firmware.
+ *
+ * And, since MLX QPs only support SEND, we use constant-sized
+ * WRs in this case.
+ *
+ * We look for the smallest value of wqe_shift such that the
+ * resulting number of wqes does not exceed device
+ * capabilities.
+ *
+ * We set WQE size to at least 64 bytes, this way stamping
+ * invalidates each WQE.
+ */
+ if (dev->dev->caps.fw_ver >= MLX4_FW_VER_WQE_CTRL_NEC &&
+ qp->sq_signal_bits && BITS_PER_LONG == 64 &&
+ type != IB_QPT_SMI && type != IB_QPT_GSI)
+ qp->sq.wqe_shift = ilog2(64);
+ else
+ qp->sq.wqe_shift = ilog2(roundup_pow_of_two(s));
+
+ for (;;) {
+ qp->sq_max_wqes_per_wr = DIV_ROUND_UP(s, 1U << qp->sq.wqe_shift);
+
+ /*
+ * We need to leave 2 KB + 1 WR of headroom in the SQ to
+ * allow HW to prefetch.
+ */
+ qp->sq_spare_wqes = (2048 >> qp->sq.wqe_shift) + qp->sq_max_wqes_per_wr;
+ qp->sq.wqe_cnt = roundup_pow_of_two(cap->max_send_wr *
+ qp->sq_max_wqes_per_wr +
+ qp->sq_spare_wqes);
+
+ if (qp->sq.wqe_cnt <= dev->dev->caps.max_wqes)
+ break;
+
+ if (qp->sq_max_wqes_per_wr <= 1)
+ return -EINVAL;
+
+ ++qp->sq.wqe_shift;
+ }
+
+ qp->sq.max_gs = (min(dev->dev->caps.max_sq_desc_sz,
+ (qp->sq_max_wqes_per_wr << qp->sq.wqe_shift)) -
+ send_wqe_overhead(type, qp->flags)) /
+ sizeof (struct mlx4_wqe_data_seg);
+
+ qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) +
+ (qp->sq.wqe_cnt << qp->sq.wqe_shift);
+ if (qp->rq.wqe_shift > qp->sq.wqe_shift) {
+ qp->rq.offset = 0;
+ qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift;
+ } else {
+ qp->rq.offset = qp->sq.wqe_cnt << qp->sq.wqe_shift;
+ qp->sq.offset = 0;
+ }
+
+ cap->max_send_wr = qp->sq.max_post =
+ (qp->sq.wqe_cnt - qp->sq_spare_wqes) / qp->sq_max_wqes_per_wr;
+ cap->max_send_sge = min(qp->sq.max_gs,
+ min(dev->dev->caps.max_sq_sg,
+ dev->dev->caps.max_rq_sg));
+ /* We don't support inline sends for kernel QPs (yet) */
+ cap->max_inline_data = 0;
+
+ return 0;
+}
+
+static int set_user_sq_size(struct mlx4_ib_dev *dev,
+ struct mlx4_ib_qp *qp,
+ struct mlx4_ib_create_qp *ucmd)
+{
+ /* Sanity check SQ size before proceeding */
+ if ((1 << ucmd->log_sq_bb_count) > dev->dev->caps.max_wqes ||
+ ucmd->log_sq_stride >
+ ilog2(roundup_pow_of_two(dev->dev->caps.max_sq_desc_sz)) ||
+ ucmd->log_sq_stride < MLX4_IB_MIN_SQ_STRIDE)
+ return -EINVAL;
+
+ qp->sq.wqe_cnt = 1 << ucmd->log_sq_bb_count;
+ qp->sq.wqe_shift = ucmd->log_sq_stride;
+
+ qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) +
+ (qp->sq.wqe_cnt << qp->sq.wqe_shift);
+
+ return 0;
+}
+
+static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata, int sqpn, struct mlx4_ib_qp *qp)
+{
+ int qpn;
+ int err;
+
+ mutex_init(&qp->mutex);
+ spin_lock_init(&qp->sq.lock);
+ spin_lock_init(&qp->rq.lock);
+
+ qp->state = IB_QPS_RESET;
+ if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
+ qp->sq_signal_bits = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);
+
+ err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, !!init_attr->srq, qp);
+ if (err)
+ goto err;
+
+ if (pd->uobject) {
+ struct mlx4_ib_create_qp ucmd;
+
+ if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {
+ err = -EFAULT;
+ goto err;
+ }
+
+ qp->sq_no_prefetch = ucmd.sq_no_prefetch;
+
+ err = set_user_sq_size(dev, qp, &ucmd);
+ if (err)
+ goto err;
+
+ qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr,
+ qp->buf_size, 0, 0);
+ if (IS_ERR(qp->umem)) {
+ err = PTR_ERR(qp->umem);
+ goto err;
+ }
+
+ err = mlx4_mtt_init(dev->dev, ib_umem_page_count(qp->umem),
+ ilog2(qp->umem->page_size), &qp->mtt);
+ if (err)
+ goto err_buf;
+
+ err = mlx4_ib_umem_write_mtt(dev, &qp->mtt, qp->umem);
+ if (err)
+ goto err_mtt;
+
+ if (!init_attr->srq) {
+ err = mlx4_ib_db_map_user(to_mucontext(pd->uobject->context),
+ ucmd.db_addr, &qp->db);
+ if (err)
+ goto err_mtt;
+ }
+ } else {
+ qp->sq_no_prefetch = 0;
+
+ if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)
+ qp->flags |= MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK;
+
+ if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)
+ qp->flags |= MLX4_IB_QP_LSO;
+
+ err = set_kernel_sq_size(dev, &init_attr->cap, init_attr->qp_type, qp);
+ if (err)
+ goto err;
+
+ if (!init_attr->srq) {
+ err = mlx4_db_alloc(dev->dev, &qp->db, 0);
+ if (err)
+ goto err;
+
+ *qp->db.db = 0;
+ }
+
+ if (mlx4_buf_alloc(dev->dev, qp->buf_size, PAGE_SIZE * 2, &qp->buf)) {
+ err = -ENOMEM;
+ goto err_db;
+ }
+
+ err = mlx4_mtt_init(dev->dev, qp->buf.npages, qp->buf.page_shift,
+ &qp->mtt);
+ if (err)
+ goto err_buf;
+
+ err = mlx4_buf_write_mtt(dev->dev, &qp->mtt, &qp->buf);
+ if (err)
+ goto err_mtt;
+
+ qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof (u64), GFP_KERNEL);
+ qp->rq.wrid = kmalloc(qp->rq.wqe_cnt * sizeof (u64), GFP_KERNEL);
+
+ if (!qp->sq.wrid || !qp->rq.wrid) {
+ err = -ENOMEM;
+ goto err_wrid;
+ }
+ }
+
+ if (sqpn) {
+ qpn = sqpn;
+ } else {
+ err = mlx4_qp_reserve_range(dev->dev, 1, 1, &qpn);
+ if (err)
+ goto err_wrid;
+ }
+
+ err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp);
+ if (err)
+ goto err_qpn;
+
+ /*
+ * Hardware wants QPN written in big-endian order (after
+ * shifting) for send doorbell. Precompute this value to save
+ * a little bit when posting sends.
+ */
+ qp->doorbell_qpn = swab32(qp->mqp.qpn << 8);
+
+ qp->mqp.event = mlx4_ib_qp_event;
+
+ return 0;
+
+err_qpn:
+ if (!sqpn)
+ mlx4_qp_release_range(dev->dev, qpn, 1);
+
+err_wrid:
+ if (pd->uobject) {
+ if (!init_attr->srq)
+ mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context),
+ &qp->db);
+ } else {
+ kfree(qp->sq.wrid);
+ kfree(qp->rq.wrid);
+ }
+
+err_mtt:
+ mlx4_mtt_cleanup(dev->dev, &qp->mtt);
+
+err_buf:
+ if (pd->uobject)
+ ib_umem_release(qp->umem);
+ else
+ mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf);
+
+err_db:
+ if (!pd->uobject && !init_attr->srq)
+ mlx4_db_free(dev->dev, &qp->db);
+
+err:
+ return err;
+}
+
+static enum mlx4_qp_state to_mlx4_state(enum ib_qp_state state)
+{
+ switch (state) {
+ case IB_QPS_RESET: return MLX4_QP_STATE_RST;
+ case IB_QPS_INIT: return MLX4_QP_STATE_INIT;
+ case IB_QPS_RTR: return MLX4_QP_STATE_RTR;
+ case IB_QPS_RTS: return MLX4_QP_STATE_RTS;
+ case IB_QPS_SQD: return MLX4_QP_STATE_SQD;
+ case IB_QPS_SQE: return MLX4_QP_STATE_SQER;
+ case IB_QPS_ERR: return MLX4_QP_STATE_ERR;
+ default: return -1;
+ }
+}
+
+static void mlx4_ib_lock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv_cq)
+{
+ if (send_cq == recv_cq)
+ spin_lock_irq(&send_cq->lock);
+ else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
+ spin_lock_irq(&send_cq->lock);
+ spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING);
+ } else {
+ spin_lock_irq(&recv_cq->lock);
+ spin_lock_nested(&send_cq->lock, SINGLE_DEPTH_NESTING);
+ }
+}
+
+static void mlx4_ib_unlock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv_cq)
+{
+ if (send_cq == recv_cq)
+ spin_unlock_irq(&send_cq->lock);
+ else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
+ spin_unlock(&recv_cq->lock);
+ spin_unlock_irq(&send_cq->lock);
+ } else {
+ spin_unlock(&send_cq->lock);
+ spin_unlock_irq(&recv_cq->lock);
+ }
+}
+
+static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
+ int is_user)
+{
+ struct mlx4_ib_cq *send_cq, *recv_cq;
+
+ if (qp->state != IB_QPS_RESET)
+ if (mlx4_qp_modify(dev->dev, NULL, to_mlx4_state(qp->state),
+ MLX4_QP_STATE_RST, NULL, 0, 0, &qp->mqp))
+ printk(KERN_WARNING "mlx4_ib: modify QP %06x to RESET failed.\n",
+ qp->mqp.qpn);
+
+ send_cq = to_mcq(qp->ibqp.send_cq);
+ recv_cq = to_mcq(qp->ibqp.recv_cq);
+
+ mlx4_ib_lock_cqs(send_cq, recv_cq);
+
+ if (!is_user) {
+ __mlx4_ib_cq_clean(recv_cq, qp->mqp.qpn,
+ qp->ibqp.srq ? to_msrq(qp->ibqp.srq): NULL);
+ if (send_cq != recv_cq)
+ __mlx4_ib_cq_clean(send_cq, qp->mqp.qpn, NULL);
+ }
+
+ mlx4_qp_remove(dev->dev, &qp->mqp);
+
+ mlx4_ib_unlock_cqs(send_cq, recv_cq);
+
+ mlx4_qp_free(dev->dev, &qp->mqp);
+
+ if (!is_sqp(dev, qp))
+ mlx4_qp_release_range(dev->dev, qp->mqp.qpn, 1);
+
+ mlx4_mtt_cleanup(dev->dev, &qp->mtt);
+
+ if (is_user) {
+ if (!qp->ibqp.srq)
+ mlx4_ib_db_unmap_user(to_mucontext(qp->ibqp.uobject->context),
+ &qp->db);
+ ib_umem_release(qp->umem);
+ } else {
+ kfree(qp->sq.wrid);
+ kfree(qp->rq.wrid);
+ mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf);
+ if (!qp->ibqp.srq)
+ mlx4_db_free(dev->dev, &qp->db);
+ }
+}
+
+struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+ struct mlx4_ib_dev *dev = to_mdev(pd->device);
+ struct mlx4_ib_sqp *sqp;
+ struct mlx4_ib_qp *qp;
+ int err;
+
+ /*
+ * We only support LSO and multicast loopback blocking, and
+ * only for kernel UD QPs.
+ */
+ if (init_attr->create_flags & ~(IB_QP_CREATE_IPOIB_UD_LSO |
+ IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK))
+ return ERR_PTR(-EINVAL);
+
+ if (init_attr->create_flags &&
+ (pd->uobject || init_attr->qp_type != IB_QPT_UD))
+ return ERR_PTR(-EINVAL);
+
+ switch (init_attr->qp_type) {
+ case IB_QPT_RC:
+ case IB_QPT_UC:
+ case IB_QPT_UD:
+ {
+ qp = kzalloc(sizeof *qp, GFP_KERNEL);
+ if (!qp)
+ return ERR_PTR(-ENOMEM);
+
+ err = create_qp_common(dev, pd, init_attr, udata, 0, qp);
+ if (err) {
+ kfree(qp);
+ return ERR_PTR(err);
+ }
+
+ qp->ibqp.qp_num = qp->mqp.qpn;
+
+ break;
+ }
+ case IB_QPT_SMI:
+ case IB_QPT_GSI:
+ {
+ /* Userspace is not allowed to create special QPs: */
+ if (pd->uobject)
+ return ERR_PTR(-EINVAL);
+
+ sqp = kzalloc(sizeof *sqp, GFP_KERNEL);
+ if (!sqp)
+ return ERR_PTR(-ENOMEM);
+
+ qp = &sqp->qp;
+
+ err = create_qp_common(dev, pd, init_attr, udata,
+ dev->dev->caps.sqp_start +
+ (init_attr->qp_type == IB_QPT_SMI ? 0 : 2) +
+ init_attr->port_num - 1,
+ qp);
+ if (err) {
+ kfree(sqp);
+ return ERR_PTR(err);
+ }
+
+ qp->port = init_attr->port_num;
+ qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1;
+
+ break;
+ }
+ default:
+ /* Don't support raw QPs */
+ return ERR_PTR(-EINVAL);
+ }
+
+ return &qp->ibqp;
+}
+
+int mlx4_ib_destroy_qp(struct ib_qp *qp)
+{
+ struct mlx4_ib_dev *dev = to_mdev(qp->device);
+ struct mlx4_ib_qp *mqp = to_mqp(qp);
+
+ if (is_qp0(dev, mqp))
+ mlx4_CLOSE_PORT(dev->dev, mqp->port);
+
+ destroy_qp_common(dev, mqp, !!qp->pd->uobject);
+
+ if (is_sqp(dev, mqp))
+ kfree(to_msqp(mqp));
+ else
+ kfree(mqp);
+
+ return 0;
+}
+
+static int to_mlx4_st(enum ib_qp_type type)
+{
+ switch (type) {
+ case IB_QPT_RC: return MLX4_QP_ST_RC;
+ case IB_QPT_UC: return MLX4_QP_ST_UC;
+ case IB_QPT_UD: return MLX4_QP_ST_UD;
+ case IB_QPT_SMI:
+ case IB_QPT_GSI: return MLX4_QP_ST_MLX;
+ default: return -1;
+ }
+}
+
+static __be32 to_mlx4_access_flags(struct mlx4_ib_qp *qp, const struct ib_qp_attr *attr,
+ int attr_mask)
+{
+ u8 dest_rd_atomic;
+ u32 access_flags;
+ u32 hw_access_flags = 0;
+
+ if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
+ dest_rd_atomic = attr->max_dest_rd_atomic;
+ else
+ dest_rd_atomic = qp->resp_depth;
+
+ if (attr_mask & IB_QP_ACCESS_FLAGS)
+ access_flags = attr->qp_access_flags;
+ else
+ access_flags = qp->atomic_rd_en;
+
+ if (!dest_rd_atomic)
+ access_flags &= IB_ACCESS_REMOTE_WRITE;
+
+ if (access_flags & IB_ACCESS_REMOTE_READ)
+ hw_access_flags |= MLX4_QP_BIT_RRE;
+ if (access_flags & IB_ACCESS_REMOTE_ATOMIC)
+ hw_access_flags |= MLX4_QP_BIT_RAE;
+ if (access_flags & IB_ACCESS_REMOTE_WRITE)
+ hw_access_flags |= MLX4_QP_BIT_RWE;
+
+ return cpu_to_be32(hw_access_flags);
+}
+
+static void store_sqp_attrs(struct mlx4_ib_sqp *sqp, const struct ib_qp_attr *attr,
+ int attr_mask)
+{
+ if (attr_mask & IB_QP_PKEY_INDEX)
+ sqp->pkey_index = attr->pkey_index;
+ if (attr_mask & IB_QP_QKEY)
+ sqp->qkey = attr->qkey;
+ if (attr_mask & IB_QP_SQ_PSN)
+ sqp->send_psn = attr->sq_psn;
+}
+
+static void mlx4_set_sched(struct mlx4_qp_path *path, u8 port)
+{
+ path->sched_queue = (path->sched_queue & 0xbf) | ((port - 1) << 6);
+}
+
+static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
+ struct mlx4_qp_path *path, u8 port)
+{
+ path->grh_mylmc = ah->src_path_bits & 0x7f;
+ path->rlid = cpu_to_be16(ah->dlid);
+ if (ah->static_rate) {
+ path->static_rate = ah->static_rate + MLX4_STAT_RATE_OFFSET;
+ while (path->static_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET &&
+ !(1 << path->static_rate & dev->dev->caps.stat_rate_support))
+ --path->static_rate;
+ } else
+ path->static_rate = 0;
+ path->counter_index = 0xff;
+
+ if (ah->ah_flags & IB_AH_GRH) {
+ if (ah->grh.sgid_index >= dev->dev->caps.gid_table_len[port]) {
+ printk(KERN_ERR "sgid_index (%u) too large. max is %d\n",
+ ah->grh.sgid_index, dev->dev->caps.gid_table_len[port] - 1);
+ return -1;
+ }
+
+ path->grh_mylmc |= 1 << 7;
+ path->mgid_index = ah->grh.sgid_index;
+ path->hop_limit = ah->grh.hop_limit;
+ path->tclass_flowlabel =
+ cpu_to_be32((ah->grh.traffic_class << 20) |
+ (ah->grh.flow_label));
+ memcpy(path->rgid, ah->grh.dgid.raw, 16);
+ }
+
+ path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE |
+ ((port - 1) << 6) | ((ah->sl & 0xf) << 2);
+
+ return 0;
+}
+
+static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
+ const struct ib_qp_attr *attr, int attr_mask,
+ enum ib_qp_state cur_state, enum ib_qp_state new_state)
+{
+ struct mlx4_ib_dev *dev = to_mdev(ibqp->device);
+ struct mlx4_ib_qp *qp = to_mqp(ibqp);
+ struct mlx4_qp_context *context;
+ enum mlx4_qp_optpar optpar = 0;
+ int sqd_event;
+ int err = -EINVAL;
+
+ context = kzalloc(sizeof *context, GFP_KERNEL);
+ if (!context)
+ return -ENOMEM;
+
+ context->flags = cpu_to_be32((to_mlx4_state(new_state) << 28) |
+ (to_mlx4_st(ibqp->qp_type) << 16));
+ context->flags |= cpu_to_be32(1 << 8); /* DE? */
+
+ if (!(attr_mask & IB_QP_PATH_MIG_STATE))
+ context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11);
+ else {
+ optpar |= MLX4_QP_OPTPAR_PM_STATE;
+ switch (attr->path_mig_state) {
+ case IB_MIG_MIGRATED:
+ context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11);
+ break;
+ case IB_MIG_REARM:
+ context->flags |= cpu_to_be32(MLX4_QP_PM_REARM << 11);
+ break;
+ case IB_MIG_ARMED:
+ context->flags |= cpu_to_be32(MLX4_QP_PM_ARMED << 11);
+ break;
+ }
+ }
+
+ if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI)
+ context->mtu_msgmax = (IB_MTU_4096 << 5) | 11;
+ else if (ibqp->qp_type == IB_QPT_UD) {
+ if (qp->flags & MLX4_IB_QP_LSO)
+ context->mtu_msgmax = (IB_MTU_4096 << 5) |
+ ilog2(dev->dev->caps.max_gso_sz);
+ else
+ context->mtu_msgmax = (IB_MTU_4096 << 5) | 12;
+ } else if (attr_mask & IB_QP_PATH_MTU) {
+ if (attr->path_mtu < IB_MTU_256 || attr->path_mtu > IB_MTU_4096) {
+ printk(KERN_ERR "path MTU (%u) is invalid\n",
+ attr->path_mtu);
+ goto out;
+ }
+ context->mtu_msgmax = (attr->path_mtu << 5) |
+ ilog2(dev->dev->caps.max_msg_sz);
+ }
+
+ if (qp->rq.wqe_cnt)
+ context->rq_size_stride = ilog2(qp->rq.wqe_cnt) << 3;
+ context->rq_size_stride |= qp->rq.wqe_shift - 4;
+
+ if (qp->sq.wqe_cnt)
+ context->sq_size_stride = ilog2(qp->sq.wqe_cnt) << 3;
+ context->sq_size_stride |= qp->sq.wqe_shift - 4;
+
+ if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
+ context->sq_size_stride |= !!qp->sq_no_prefetch << 7;
+
+ if (qp->ibqp.uobject)
+ context->usr_page = cpu_to_be32(to_mucontext(ibqp->uobject->context)->uar.index);
+ else
+ context->usr_page = cpu_to_be32(dev->priv_uar.index);
+
+ if (attr_mask & IB_QP_DEST_QPN)
+ context->remote_qpn = cpu_to_be32(attr->dest_qp_num);
+
+ if (attr_mask & IB_QP_PORT) {
+ if (cur_state == IB_QPS_SQD && new_state == IB_QPS_SQD &&
+ !(attr_mask & IB_QP_AV)) {
+ mlx4_set_sched(&context->pri_path, attr->port_num);
+ optpar |= MLX4_QP_OPTPAR_SCHED_QUEUE;
+ }
+ }
+
+ if (attr_mask & IB_QP_PKEY_INDEX) {
+ context->pri_path.pkey_index = attr->pkey_index;
+ optpar |= MLX4_QP_OPTPAR_PKEY_INDEX;
+ }
+
+ if (attr_mask & IB_QP_AV) {
+ if (mlx4_set_path(dev, &attr->ah_attr, &context->pri_path,
+ attr_mask & IB_QP_PORT ? attr->port_num : qp->port))
+ goto out;
+
+ optpar |= (MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH |
+ MLX4_QP_OPTPAR_SCHED_QUEUE);
+ }
+
+ if (attr_mask & IB_QP_TIMEOUT) {
+ context->pri_path.ackto = attr->timeout << 3;
+ optpar |= MLX4_QP_OPTPAR_ACK_TIMEOUT;
+ }
+
+ if (attr_mask & IB_QP_ALT_PATH) {
+ if (attr->alt_port_num == 0 ||
+ attr->alt_port_num > dev->dev->caps.num_ports)
+ goto out;
+
+ if (attr->alt_pkey_index >=
+ dev->dev->caps.pkey_table_len[attr->alt_port_num])
+ goto out;
+
+ if (mlx4_set_path(dev, &attr->alt_ah_attr, &context->alt_path,
+ attr->alt_port_num))
+ goto out;
+
+ context->alt_path.pkey_index = attr->alt_pkey_index;
+ context->alt_path.ackto = attr->alt_timeout << 3;
+ optpar |= MLX4_QP_OPTPAR_ALT_ADDR_PATH;
+ }
+
+ context->pd = cpu_to_be32(to_mpd(ibqp->pd)->pdn);
+ context->params1 = cpu_to_be32(MLX4_IB_ACK_REQ_FREQ << 28);
+
+ /* Set "fast registration enabled" for all kernel QPs */
+ if (!qp->ibqp.uobject)
+ context->params1 |= cpu_to_be32(1 << 11);
+
+ if (attr_mask & IB_QP_RNR_RETRY) {
+ context->params1 |= cpu_to_be32(attr->rnr_retry << 13);
+ optpar |= MLX4_QP_OPTPAR_RNR_RETRY;
+ }
+
+ if (attr_mask & IB_QP_RETRY_CNT) {
+ context->params1 |= cpu_to_be32(attr->retry_cnt << 16);
+ optpar |= MLX4_QP_OPTPAR_RETRY_COUNT;
+ }
+
+ if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
+ if (attr->max_rd_atomic)
+ context->params1 |=
+ cpu_to_be32(fls(attr->max_rd_atomic - 1) << 21);
+ optpar |= MLX4_QP_OPTPAR_SRA_MAX;
+ }
+
+ if (attr_mask & IB_QP_SQ_PSN)
+ context->next_send_psn = cpu_to_be32(attr->sq_psn);
+
+ context->cqn_send = cpu_to_be32(to_mcq(ibqp->send_cq)->mcq.cqn);
+
+ if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
+ if (attr->max_dest_rd_atomic)
+ context->params2 |=
+ cpu_to_be32(fls(attr->max_dest_rd_atomic - 1) << 21);
+ optpar |= MLX4_QP_OPTPAR_RRA_MAX;
+ }
+
+ if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC)) {
+ context->params2 |= to_mlx4_access_flags(qp, attr, attr_mask);
+ optpar |= MLX4_QP_OPTPAR_RWE | MLX4_QP_OPTPAR_RRE | MLX4_QP_OPTPAR_RAE;
+ }
+
+ if (ibqp->srq)
+ context->params2 |= cpu_to_be32(MLX4_QP_BIT_RIC);
+
+ if (attr_mask & IB_QP_MIN_RNR_TIMER) {
+ context->rnr_nextrecvpsn |= cpu_to_be32(attr->min_rnr_timer << 24);
+ optpar |= MLX4_QP_OPTPAR_RNR_TIMEOUT;
+ }
+ if (attr_mask & IB_QP_RQ_PSN)
+ context->rnr_nextrecvpsn |= cpu_to_be32(attr->rq_psn);
+
+ context->cqn_recv = cpu_to_be32(to_mcq(ibqp->recv_cq)->mcq.cqn);
+
+ if (attr_mask & IB_QP_QKEY) {
+ context->qkey = cpu_to_be32(attr->qkey);
+ optpar |= MLX4_QP_OPTPAR_Q_KEY;
+ }
+
+ if (ibqp->srq)
+ context->srqn = cpu_to_be32(1 << 24 | to_msrq(ibqp->srq)->msrq.srqn);
+
+ if (!ibqp->srq && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
+ context->db_rec_addr = cpu_to_be64(qp->db.dma);
+
+ if (cur_state == IB_QPS_INIT &&
+ new_state == IB_QPS_RTR &&
+ (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI ||
+ ibqp->qp_type == IB_QPT_UD)) {
+ context->pri_path.sched_queue = (qp->port - 1) << 6;
+ if (is_qp0(dev, qp))
+ context->pri_path.sched_queue |= MLX4_IB_DEFAULT_QP0_SCHED_QUEUE;
+ else
+ context->pri_path.sched_queue |= MLX4_IB_DEFAULT_SCHED_QUEUE;
+ }
+
+ if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD &&
+ attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY && attr->en_sqd_async_notify)
+ sqd_event = 1;
+ else
+ sqd_event = 0;
+
+ if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
+ context->rlkey |= (1 << 4);
+
+ /*
+ * Before passing a kernel QP to the HW, make sure that the
+ * ownership bits of the send queue are set and the SQ
+ * headroom is stamped so that the hardware doesn't start
+ * processing stale work requests.
+ */
+ if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
+ struct mlx4_wqe_ctrl_seg *ctrl;
+ int i;
+
+ for (i = 0; i < qp->sq.wqe_cnt; ++i) {
+ ctrl = get_send_wqe(qp, i);
+ ctrl->owner_opcode = cpu_to_be32(1 << 31);
+ if (qp->sq_max_wqes_per_wr == 1)
+ ctrl->fence_size = 1 << (qp->sq.wqe_shift - 4);
+
+ stamp_send_wqe(qp, i, 1 << qp->sq.wqe_shift);
+ }
+ }
+
+ err = mlx4_qp_modify(dev->dev, &qp->mtt, to_mlx4_state(cur_state),
+ to_mlx4_state(new_state), context, optpar,
+ sqd_event, &qp->mqp);
+ if (err)
+ goto out;
+
+ qp->state = new_state;
+
+ if (attr_mask & IB_QP_ACCESS_FLAGS)
+ qp->atomic_rd_en = attr->qp_access_flags;
+ if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
+ qp->resp_depth = attr->max_dest_rd_atomic;
+ if (attr_mask & IB_QP_PORT)
+ qp->port = attr->port_num;
+ if (attr_mask & IB_QP_ALT_PATH)
+ qp->alt_port = attr->alt_port_num;
+
+ if (is_sqp(dev, qp))
+ store_sqp_attrs(to_msqp(qp), attr, attr_mask);
+
+ /*
+ * If we moved QP0 to RTR, bring the IB link up; if we moved
+ * QP0 to RESET or ERROR, bring the link back down.
+ */
+ if (is_qp0(dev, qp)) {
+ if (cur_state != IB_QPS_RTR && new_state == IB_QPS_RTR)
+ if (mlx4_INIT_PORT(dev->dev, qp->port))
+ printk(KERN_WARNING "INIT_PORT failed for port %d\n",
+ qp->port);
+
+ if (cur_state != IB_QPS_RESET && cur_state != IB_QPS_ERR &&
+ (new_state == IB_QPS_RESET || new_state == IB_QPS_ERR))
+ mlx4_CLOSE_PORT(dev->dev, qp->port);
+ }
+
+ /*
+ * If we moved a kernel QP to RESET, clean up all old CQ
+ * entries and reinitialize the QP.
+ */
+ if (new_state == IB_QPS_RESET && !ibqp->uobject) {
+ mlx4_ib_cq_clean(to_mcq(ibqp->recv_cq), qp->mqp.qpn,
+ ibqp->srq ? to_msrq(ibqp->srq): NULL);
+ if (ibqp->send_cq != ibqp->recv_cq)
+ mlx4_ib_cq_clean(to_mcq(ibqp->send_cq), qp->mqp.qpn, NULL);
+
+ qp->rq.head = 0;
+ qp->rq.tail = 0;
+ qp->sq.head = 0;
+ qp->sq.tail = 0;
+ qp->sq_next_wqe = 0;
+ if (!ibqp->srq)
+ *qp->db.db = 0;
+ }
+
+out:
+ kfree(context);
+ return err;
+}
+
+int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata)
+{
+ struct mlx4_ib_dev *dev = to_mdev(ibqp->device);
+ struct mlx4_ib_qp *qp = to_mqp(ibqp);
+ enum ib_qp_state cur_state, new_state;
+ int err = -EINVAL;
+
+ mutex_lock(&qp->mutex);
+
+ cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state;
+ new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
+
+ if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask))
+ goto out;
+
+ if ((attr_mask & IB_QP_PORT) &&
+ (attr->port_num == 0 || attr->port_num > dev->dev->caps.num_ports)) {
+ goto out;
+ }
+
+ if (attr_mask & IB_QP_PKEY_INDEX) {
+ int p = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
+ if (attr->pkey_index >= dev->dev->caps.pkey_table_len[p])
+ goto out;
+ }
+
+ if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
+ attr->max_rd_atomic > dev->dev->caps.max_qp_init_rdma) {
+ goto out;
+ }
+
+ if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
+ attr->max_dest_rd_atomic > dev->dev->caps.max_qp_dest_rdma) {
+ goto out;
+ }
+
+ if (cur_state == new_state && cur_state == IB_QPS_RESET) {
+ err = 0;
+ goto out;
+ }
+
+ err = __mlx4_ib_modify_qp(ibqp, attr, attr_mask, cur_state, new_state);
+
+out:
+ mutex_unlock(&qp->mutex);
+ return err;
+}
+
+static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
+ void *wqe, unsigned *mlx_seg_len)
+{
+ struct ib_device *ib_dev = &to_mdev(sqp->qp.ibqp.device)->ib_dev;
+ struct mlx4_wqe_mlx_seg *mlx = wqe;
+ struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx;
+ struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah);
+ u16 pkey;
+ int send_size;
+ int header_size;
+ int spc;
+ int i;
+
+ send_size = 0;
+ for (i = 0; i < wr->num_sge; ++i)
+ send_size += wr->sg_list[i].length;
+
+ ib_ud_header_init(send_size, mlx4_ib_ah_grh_present(ah), &sqp->ud_header);
+
+ sqp->ud_header.lrh.service_level =
+ be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 28;
+ sqp->ud_header.lrh.destination_lid = ah->av.dlid;
+ sqp->ud_header.lrh.source_lid = cpu_to_be16(ah->av.g_slid & 0x7f);
+ if (mlx4_ib_ah_grh_present(ah)) {
+ sqp->ud_header.grh.traffic_class =
+ (be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 20) & 0xff;
+ sqp->ud_header.grh.flow_label =
+ ah->av.sl_tclass_flowlabel & cpu_to_be32(0xfffff);
+ sqp->ud_header.grh.hop_limit = ah->av.hop_limit;
+ ib_get_cached_gid(ib_dev, be32_to_cpu(ah->av.port_pd) >> 24,
+ ah->av.gid_index, &sqp->ud_header.grh.source_gid);
+ memcpy(sqp->ud_header.grh.destination_gid.raw,
+ ah->av.dgid, 16);
+ }
+
+ mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);
+ mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MLX4_WQE_MLX_VL15 : 0) |
+ (sqp->ud_header.lrh.destination_lid ==
+ IB_LID_PERMISSIVE ? MLX4_WQE_MLX_SLR : 0) |
+ (sqp->ud_header.lrh.service_level << 8));
+ mlx->rlid = sqp->ud_header.lrh.destination_lid;
+
+ switch (wr->opcode) {
+ case IB_WR_SEND:
+ sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY;
+ sqp->ud_header.immediate_present = 0;
+ break;
+ case IB_WR_SEND_WITH_IMM:
+ sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
+ sqp->ud_header.immediate_present = 1;
+ sqp->ud_header.immediate_data = wr->ex.imm_data;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : 0;
+ if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE)
+ sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE;
+ sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED);
+ if (!sqp->qp.ibqp.qp_num)
+ ib_get_cached_pkey(ib_dev, sqp->qp.port, sqp->pkey_index, &pkey);
+ else
+ ib_get_cached_pkey(ib_dev, sqp->qp.port, wr->wr.ud.pkey_index, &pkey);
+ sqp->ud_header.bth.pkey = cpu_to_be16(pkey);
+ sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn);
+ sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1));
+ sqp->ud_header.deth.qkey = cpu_to_be32(wr->wr.ud.remote_qkey & 0x80000000 ?
+ sqp->qkey : wr->wr.ud.remote_qkey);
+ sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.ibqp.qp_num);
+
+ header_size = ib_ud_header_pack(&sqp->ud_header, sqp->header_buf);
+
+ if (0) {
+ printk(KERN_ERR "built UD header of size %d:\n", header_size);
+ for (i = 0; i < header_size / 4; ++i) {
+ if (i % 8 == 0)
+ printk(" [%02x] ", i * 4);
+ printk(" %08x",
+ be32_to_cpu(((__be32 *) sqp->header_buf)[i]));
+ if ((i + 1) % 8 == 0)
+ printk("\n");
+ }
+ printk("\n");
+ }
+
+ /*
+ * Inline data segments may not cross a 64 byte boundary. If
+ * our UD header is bigger than the space available up to the
+ * next 64 byte boundary in the WQE, use two inline data
+ * segments to hold the UD header.
+ */
+ spc = MLX4_INLINE_ALIGN -
+ ((unsigned long) (inl + 1) & (MLX4_INLINE_ALIGN - 1));
+ if (header_size <= spc) {
+ inl->byte_count = cpu_to_be32(1 << 31 | header_size);
+ memcpy(inl + 1, sqp->header_buf, header_size);
+ i = 1;
+ } else {
+ inl->byte_count = cpu_to_be32(1 << 31 | spc);
+ memcpy(inl + 1, sqp->header_buf, spc);
+
+ inl = (void *) (inl + 1) + spc;
+ memcpy(inl + 1, sqp->header_buf + spc, header_size - spc);
+ /*
+ * Need a barrier here to make sure all the data is
+ * visible before the byte_count field is set.
+ * Otherwise the HCA prefetcher could grab the 64-byte
+ * chunk with this inline segment and get a valid (!=
+ * 0xffffffff) byte count but stale data, and end up
+ * generating a packet with bad headers.
+ *
+ * The first inline segment's byte_count field doesn't
+ * need a barrier, because it comes after a
+ * control/MLX segment and therefore is at an offset
+ * of 16 mod 64.
+ */
+ wmb();
+ inl->byte_count = cpu_to_be32(1 << 31 | (header_size - spc));
+ i = 2;
+ }
+
+ *mlx_seg_len =
+ ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + header_size, 16);
+ return 0;
+}
+
+static int mlx4_wq_overflow(struct mlx4_ib_wq *wq, int nreq, struct ib_cq *ib_cq)
+{
+ unsigned cur;
+ struct mlx4_ib_cq *cq;
+
+ cur = wq->head - wq->tail;
+ if (likely(cur + nreq < wq->max_post))
+ return 0;
+
+ cq = to_mcq(ib_cq);
+ spin_lock(&cq->lock);
+ cur = wq->head - wq->tail;
+ spin_unlock(&cq->lock);
+
+ return cur + nreq >= wq->max_post;
+}
+
+static __be32 convert_access(int acc)
+{
+ return (acc & IB_ACCESS_REMOTE_ATOMIC ? cpu_to_be32(MLX4_WQE_FMR_PERM_ATOMIC) : 0) |
+ (acc & IB_ACCESS_REMOTE_WRITE ? cpu_to_be32(MLX4_WQE_FMR_PERM_REMOTE_WRITE) : 0) |
+ (acc & IB_ACCESS_REMOTE_READ ? cpu_to_be32(MLX4_WQE_FMR_PERM_REMOTE_READ) : 0) |
+ (acc & IB_ACCESS_LOCAL_WRITE ? cpu_to_be32(MLX4_WQE_FMR_PERM_LOCAL_WRITE) : 0) |
+ cpu_to_be32(MLX4_WQE_FMR_PERM_LOCAL_READ);
+}
+
+static void set_fmr_seg(struct mlx4_wqe_fmr_seg *fseg, struct ib_send_wr *wr)
+{
+ struct mlx4_ib_fast_reg_page_list *mfrpl = to_mfrpl(wr->wr.fast_reg.page_list);
+ int i;
+
+ for (i = 0; i < wr->wr.fast_reg.page_list_len; ++i)
+ wr->wr.fast_reg.page_list->page_list[i] =
+ cpu_to_be64(wr->wr.fast_reg.page_list->page_list[i] |
+ MLX4_MTT_FLAG_PRESENT);
+
+ fseg->flags = convert_access(wr->wr.fast_reg.access_flags);
+ fseg->mem_key = cpu_to_be32(wr->wr.fast_reg.rkey);
+ fseg->buf_list = cpu_to_be64(mfrpl->map);
+ fseg->start_addr = cpu_to_be64(wr->wr.fast_reg.iova_start);
+ fseg->reg_len = cpu_to_be64(wr->wr.fast_reg.length);
+ fseg->offset = 0; /* XXX -- is this just for ZBVA? */
+ fseg->page_size = cpu_to_be32(wr->wr.fast_reg.page_shift);
+ fseg->reserved[0] = 0;
+ fseg->reserved[1] = 0;
+}
+
+static void set_local_inv_seg(struct mlx4_wqe_local_inval_seg *iseg, u32 rkey)
+{
+ iseg->flags = 0;
+ iseg->mem_key = cpu_to_be32(rkey);
+ iseg->guest_id = 0;
+ iseg->pa = 0;
+}
+
+static __always_inline void set_raddr_seg(struct mlx4_wqe_raddr_seg *rseg,
+ u64 remote_addr, u32 rkey)
+{
+ rseg->raddr = cpu_to_be64(remote_addr);
+ rseg->rkey = cpu_to_be32(rkey);
+ rseg->reserved = 0;
+}
+
+static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg, struct ib_send_wr *wr)
+{
+ if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
+ aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap);
+ aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add);
+ } else {
+ aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add);
+ aseg->compare = 0;
+ }
+
+}
+
+static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg,
+ struct ib_send_wr *wr)
+{
+ memcpy(dseg->av, &to_mah(wr->wr.ud.ah)->av, sizeof (struct mlx4_av));
+ dseg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn);
+ dseg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey);
+}
+
+static void set_mlx_icrc_seg(void *dseg)
+{
+ u32 *t = dseg;
+ struct mlx4_wqe_inline_seg *iseg = dseg;
+
+ t[1] = 0;
+
+ /*
+ * Need a barrier here before writing the byte_count field to
+ * make sure that all the data is visible before the
+ * byte_count field is set. Otherwise, if the segment begins
+ * a new cacheline, the HCA prefetcher could grab the 64-byte
+ * chunk and get a valid (!= * 0xffffffff) byte count but
+ * stale data, and end up sending the wrong data.
+ */
+ wmb();
+
+ iseg->byte_count = cpu_to_be32((1 << 31) | 4);
+}
+
+static void set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg)
+{
+ dseg->lkey = cpu_to_be32(sg->lkey);
+ dseg->addr = cpu_to_be64(sg->addr);
+
+ /*
+ * Need a barrier here before writing the byte_count field to
+ * make sure that all the data is visible before the
+ * byte_count field is set. Otherwise, if the segment begins
+ * a new cacheline, the HCA prefetcher could grab the 64-byte
+ * chunk and get a valid (!= * 0xffffffff) byte count but
+ * stale data, and end up sending the wrong data.
+ */
+ wmb();
+
+ dseg->byte_count = cpu_to_be32(sg->length);
+}
+
+static void __set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg)
+{
+ dseg->byte_count = cpu_to_be32(sg->length);
+ dseg->lkey = cpu_to_be32(sg->lkey);
+ dseg->addr = cpu_to_be64(sg->addr);
+}
+
+static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_send_wr *wr,
+ struct mlx4_ib_qp *qp, unsigned *lso_seg_len)
+{
+ unsigned halign = ALIGN(sizeof *wqe + wr->wr.ud.hlen, 16);
+
+ /*
+ * This is a temporary limitation and will be removed in
+ * a forthcoming FW release:
+ */
+ if (unlikely(halign > 64))
+ return -EINVAL;
+
+ if (unlikely(!(qp->flags & MLX4_IB_QP_LSO) &&
+ wr->num_sge > qp->sq.max_gs - (halign >> 4)))
+ return -EINVAL;
+
+ memcpy(wqe->header, wr->wr.ud.header, wr->wr.ud.hlen);
+
+ /* make sure LSO header is written before overwriting stamping */
+ wmb();
+
+ wqe->mss_hdr_size = cpu_to_be32((wr->wr.ud.mss - wr->wr.ud.hlen) << 16 |
+ wr->wr.ud.hlen);
+
+ *lso_seg_len = halign;
+ return 0;
+}
+
+static __be32 send_ieth(struct ib_send_wr *wr)
+{
+ switch (wr->opcode) {
+ case IB_WR_SEND_WITH_IMM:
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ return wr->ex.imm_data;
+
+ case IB_WR_SEND_WITH_INV:
+ return cpu_to_be32(wr->ex.invalidate_rkey);
+
+ default:
+ return 0;
+ }
+}
+
+int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
+ struct ib_send_wr **bad_wr)
+{
+ struct mlx4_ib_qp *qp = to_mqp(ibqp);
+ void *wqe;
+ struct mlx4_wqe_ctrl_seg *ctrl;
+ struct mlx4_wqe_data_seg *dseg;
+ unsigned long flags;
+ int nreq;
+ int err = 0;
+ unsigned ind;
+ int uninitialized_var(stamp);
+ int uninitialized_var(size);
+ unsigned uninitialized_var(seglen);
+ int i;
+
+ spin_lock_irqsave(&qp->sq.lock, flags);
+
+ ind = qp->sq_next_wqe;
+
+ for (nreq = 0; wr; ++nreq, wr = wr->next) {
+ if (mlx4_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
+ err = -ENOMEM;
+ *bad_wr = wr;
+ goto out;
+ }
+
+ if (unlikely(wr->num_sge > qp->sq.max_gs)) {
+ err = -EINVAL;
+ *bad_wr = wr;
+ goto out;
+ }
+
+ ctrl = wqe = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1));
+ qp->sq.wrid[(qp->sq.head + nreq) & (qp->sq.wqe_cnt - 1)] = wr->wr_id;
+
+ ctrl->srcrb_flags =
+ (wr->send_flags & IB_SEND_SIGNALED ?
+ cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE) : 0) |
+ (wr->send_flags & IB_SEND_SOLICITED ?
+ cpu_to_be32(MLX4_WQE_CTRL_SOLICITED) : 0) |
+ ((wr->send_flags & IB_SEND_IP_CSUM) ?
+ cpu_to_be32(MLX4_WQE_CTRL_IP_CSUM |
+ MLX4_WQE_CTRL_TCP_UDP_CSUM) : 0) |
+ qp->sq_signal_bits;
+
+ ctrl->imm = send_ieth(wr);
+
+ wqe += sizeof *ctrl;
+ size = sizeof *ctrl / 16;
+
+ switch (ibqp->qp_type) {
+ case IB_QPT_RC:
+ case IB_QPT_UC:
+ switch (wr->opcode) {
+ case IB_WR_ATOMIC_CMP_AND_SWP:
+ case IB_WR_ATOMIC_FETCH_AND_ADD:
+ set_raddr_seg(wqe, wr->wr.atomic.remote_addr,
+ wr->wr.atomic.rkey);
+ wqe += sizeof (struct mlx4_wqe_raddr_seg);
+
+ set_atomic_seg(wqe, wr);
+ wqe += sizeof (struct mlx4_wqe_atomic_seg);
+
+ size += (sizeof (struct mlx4_wqe_raddr_seg) +
+ sizeof (struct mlx4_wqe_atomic_seg)) / 16;
+
+ break;
+
+ case IB_WR_RDMA_READ:
+ case IB_WR_RDMA_WRITE:
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ set_raddr_seg(wqe, wr->wr.rdma.remote_addr,
+ wr->wr.rdma.rkey);
+ wqe += sizeof (struct mlx4_wqe_raddr_seg);
+ size += sizeof (struct mlx4_wqe_raddr_seg) / 16;
+ break;
+
+ case IB_WR_LOCAL_INV:
+ set_local_inv_seg(wqe, wr->ex.invalidate_rkey);
+ wqe += sizeof (struct mlx4_wqe_local_inval_seg);
+ size += sizeof (struct mlx4_wqe_local_inval_seg) / 16;
+ break;
+
+ case IB_WR_FAST_REG_MR:
+ set_fmr_seg(wqe, wr);
+ wqe += sizeof (struct mlx4_wqe_fmr_seg);
+ size += sizeof (struct mlx4_wqe_fmr_seg) / 16;
+ break;
+
+ default:
+ /* No extra segments required for sends */
+ break;
+ }
+ break;
+
+ case IB_QPT_UD:
+ set_datagram_seg(wqe, wr);
+ wqe += sizeof (struct mlx4_wqe_datagram_seg);
+ size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
+
+ if (wr->opcode == IB_WR_LSO) {
+ err = build_lso_seg(wqe, wr, qp, &seglen);
+ if (unlikely(err)) {
+ *bad_wr = wr;
+ goto out;
+ }
+ wqe += seglen;
+ size += seglen / 16;
+ }
+ break;
+
+ case IB_QPT_SMI:
+ case IB_QPT_GSI:
+ err = build_mlx_header(to_msqp(qp), wr, ctrl, &seglen);
+ if (unlikely(err)) {
+ *bad_wr = wr;
+ goto out;
+ }
+ wqe += seglen;
+ size += seglen / 16;
+ break;
+
+ default:
+ break;
+ }
+
+ /*
+ * Write data segments in reverse order, so as to
+ * overwrite cacheline stamp last within each
+ * cacheline. This avoids issues with WQE
+ * prefetching.
+ */
+
+ dseg = wqe;
+ dseg += wr->num_sge - 1;
+ size += wr->num_sge * (sizeof (struct mlx4_wqe_data_seg) / 16);
+
+ /* Add one more inline data segment for ICRC for MLX sends */
+ if (unlikely(qp->ibqp.qp_type == IB_QPT_SMI ||
+ qp->ibqp.qp_type == IB_QPT_GSI)) {
+ set_mlx_icrc_seg(dseg + 1);
+ size += sizeof (struct mlx4_wqe_data_seg) / 16;
+ }
+
+ for (i = wr->num_sge - 1; i >= 0; --i, --dseg)
+ set_data_seg(dseg, wr->sg_list + i);
+
+ ctrl->fence_size = (wr->send_flags & IB_SEND_FENCE ?
+ MLX4_WQE_CTRL_FENCE : 0) | size;
+
+ /*
+ * Make sure descriptor is fully written before
+ * setting ownership bit (because HW can start
+ * executing as soon as we do).
+ */
+ wmb();
+
+ if (wr->opcode < 0 || wr->opcode >= ARRAY_SIZE(mlx4_ib_opcode)) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ ctrl->owner_opcode = mlx4_ib_opcode[wr->opcode] |
+ (ind & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0);
+
+ stamp = ind + qp->sq_spare_wqes;
+ ind += DIV_ROUND_UP(size * 16, 1U << qp->sq.wqe_shift);
+
+ /*
+ * We can improve latency by not stamping the last
+ * send queue WQE until after ringing the doorbell, so
+ * only stamp here if there are still more WQEs to post.
+ *
+ * Same optimization applies to padding with NOP wqe
+ * in case of WQE shrinking (used to prevent wrap-around
+ * in the middle of WR).
+ */
+ if (wr->next) {
+ stamp_send_wqe(qp, stamp, size * 16);
+ ind = pad_wraparound(qp, ind);
+ }
+
+ }
+
+out:
+ if (likely(nreq)) {
+ qp->sq.head += nreq;
+
+ /*
+ * Make sure that descriptors are written before
+ * doorbell record.
+ */
+ wmb();
+
+ writel(qp->doorbell_qpn,
+ to_mdev(ibqp->device)->uar_map + MLX4_SEND_DOORBELL);
+
+ /*
+ * Make sure doorbells don't leak out of SQ spinlock
+ * and reach the HCA out of order.
+ */
+ mmiowb();
+
+ stamp_send_wqe(qp, stamp, size * 16);
+
+ ind = pad_wraparound(qp, ind);
+ qp->sq_next_wqe = ind;
+ }
+
+ spin_unlock_irqrestore(&qp->sq.lock, flags);
+
+ return err;
+}
+
+int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
+ struct ib_recv_wr **bad_wr)
+{
+ struct mlx4_ib_qp *qp = to_mqp(ibqp);
+ struct mlx4_wqe_data_seg *scat;
+ unsigned long flags;
+ int err = 0;
+ int nreq;
+ int ind;
+ int i;
+
+ spin_lock_irqsave(&qp->rq.lock, flags);
+
+ ind = qp->rq.head & (qp->rq.wqe_cnt - 1);
+
+ for (nreq = 0; wr; ++nreq, wr = wr->next) {
+ if (mlx4_wq_overflow(&qp->rq, nreq, qp->ibqp.send_cq)) {
+ err = -ENOMEM;
+ *bad_wr = wr;
+ goto out;
+ }
+
+ if (unlikely(wr->num_sge > qp->rq.max_gs)) {
+ err = -EINVAL;
+ *bad_wr = wr;
+ goto out;
+ }
+
+ scat = get_recv_wqe(qp, ind);
+
+ for (i = 0; i < wr->num_sge; ++i)
+ __set_data_seg(scat + i, wr->sg_list + i);
+
+ if (i < qp->rq.max_gs) {
+ scat[i].byte_count = 0;
+ scat[i].lkey = cpu_to_be32(MLX4_INVALID_LKEY);
+ scat[i].addr = 0;
+ }
+
+ qp->rq.wrid[ind] = wr->wr_id;
+
+ ind = (ind + 1) & (qp->rq.wqe_cnt - 1);
+ }
+
+out:
+ if (likely(nreq)) {
+ qp->rq.head += nreq;
+
+ /*
+ * Make sure that descriptors are written before
+ * doorbell record.
+ */
+ wmb();
+
+ *qp->db.db = cpu_to_be32(qp->rq.head & 0xffff);
+ }
+
+ spin_unlock_irqrestore(&qp->rq.lock, flags);
+
+ return err;
+}
+
+static inline enum ib_qp_state to_ib_qp_state(enum mlx4_qp_state mlx4_state)
+{
+ switch (mlx4_state) {
+ case MLX4_QP_STATE_RST: return IB_QPS_RESET;
+ case MLX4_QP_STATE_INIT: return IB_QPS_INIT;
+ case MLX4_QP_STATE_RTR: return IB_QPS_RTR;
+ case MLX4_QP_STATE_RTS: return IB_QPS_RTS;
+ case MLX4_QP_STATE_SQ_DRAINING:
+ case MLX4_QP_STATE_SQD: return IB_QPS_SQD;
+ case MLX4_QP_STATE_SQER: return IB_QPS_SQE;
+ case MLX4_QP_STATE_ERR: return IB_QPS_ERR;
+ default: return -1;
+ }
+}
+
+static inline enum ib_mig_state to_ib_mig_state(int mlx4_mig_state)
+{
+ switch (mlx4_mig_state) {
+ case MLX4_QP_PM_ARMED: return IB_MIG_ARMED;
+ case MLX4_QP_PM_REARM: return IB_MIG_REARM;
+ case MLX4_QP_PM_MIGRATED: return IB_MIG_MIGRATED;
+ default: return -1;
+ }
+}
+
+static int to_ib_qp_access_flags(int mlx4_flags)
+{
+ int ib_flags = 0;
+
+ if (mlx4_flags & MLX4_QP_BIT_RRE)
+ ib_flags |= IB_ACCESS_REMOTE_READ;
+ if (mlx4_flags & MLX4_QP_BIT_RWE)
+ ib_flags |= IB_ACCESS_REMOTE_WRITE;
+ if (mlx4_flags & MLX4_QP_BIT_RAE)
+ ib_flags |= IB_ACCESS_REMOTE_ATOMIC;
+
+ return ib_flags;
+}
+
+static void to_ib_ah_attr(struct mlx4_dev *dev, struct ib_ah_attr *ib_ah_attr,
+ struct mlx4_qp_path *path)
+{
+ memset(ib_ah_attr, 0, sizeof *ib_ah_attr);
+ ib_ah_attr->port_num = path->sched_queue & 0x40 ? 2 : 1;
+
+ if (ib_ah_attr->port_num == 0 || ib_ah_attr->port_num > dev->caps.num_ports)
+ return;
+
+ ib_ah_attr->dlid = be16_to_cpu(path->rlid);
+ ib_ah_attr->sl = (path->sched_queue >> 2) & 0xf;
+ ib_ah_attr->src_path_bits = path->grh_mylmc & 0x7f;
+ ib_ah_attr->static_rate = path->static_rate ? path->static_rate - 5 : 0;
+ ib_ah_attr->ah_flags = (path->grh_mylmc & (1 << 7)) ? IB_AH_GRH : 0;
+ if (ib_ah_attr->ah_flags) {
+ ib_ah_attr->grh.sgid_index = path->mgid_index;
+ ib_ah_attr->grh.hop_limit = path->hop_limit;
+ ib_ah_attr->grh.traffic_class =
+ (be32_to_cpu(path->tclass_flowlabel) >> 20) & 0xff;
+ ib_ah_attr->grh.flow_label =
+ be32_to_cpu(path->tclass_flowlabel) & 0xfffff;
+ memcpy(ib_ah_attr->grh.dgid.raw,
+ path->rgid, sizeof ib_ah_attr->grh.dgid.raw);
+ }
+}
+
+int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,
+ struct ib_qp_init_attr *qp_init_attr)
+{
+ struct mlx4_ib_dev *dev = to_mdev(ibqp->device);
+ struct mlx4_ib_qp *qp = to_mqp(ibqp);
+ struct mlx4_qp_context context;
+ int mlx4_state;
+ int err = 0;
+
+ mutex_lock(&qp->mutex);
+
+ if (qp->state == IB_QPS_RESET) {
+ qp_attr->qp_state = IB_QPS_RESET;
+ goto done;
+ }
+
+ err = mlx4_qp_query(dev->dev, &qp->mqp, &context);
+ if (err) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ mlx4_state = be32_to_cpu(context.flags) >> 28;
+
+ qp->state = to_ib_qp_state(mlx4_state);
+ qp_attr->qp_state = qp->state;
+ qp_attr->path_mtu = context.mtu_msgmax >> 5;
+ qp_attr->path_mig_state =
+ to_ib_mig_state((be32_to_cpu(context.flags) >> 11) & 0x3);
+ qp_attr->qkey = be32_to_cpu(context.qkey);
+ qp_attr->rq_psn = be32_to_cpu(context.rnr_nextrecvpsn) & 0xffffff;
+ qp_attr->sq_psn = be32_to_cpu(context.next_send_psn) & 0xffffff;
+ qp_attr->dest_qp_num = be32_to_cpu(context.remote_qpn) & 0xffffff;
+ qp_attr->qp_access_flags =
+ to_ib_qp_access_flags(be32_to_cpu(context.params2));
+
+ if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) {
+ to_ib_ah_attr(dev->dev, &qp_attr->ah_attr, &context.pri_path);
+ to_ib_ah_attr(dev->dev, &qp_attr->alt_ah_attr, &context.alt_path);
+ qp_attr->alt_pkey_index = context.alt_path.pkey_index & 0x7f;
+ qp_attr->alt_port_num = qp_attr->alt_ah_attr.port_num;
+ }
+
+ qp_attr->pkey_index = context.pri_path.pkey_index & 0x7f;
+ if (qp_attr->qp_state == IB_QPS_INIT)
+ qp_attr->port_num = qp->port;
+ else
+ qp_attr->port_num = context.pri_path.sched_queue & 0x40 ? 2 : 1;
+
+ /* qp_attr->en_sqd_async_notify is only applicable in modify qp */
+ qp_attr->sq_draining = mlx4_state == MLX4_QP_STATE_SQ_DRAINING;
+
+ qp_attr->max_rd_atomic = 1 << ((be32_to_cpu(context.params1) >> 21) & 0x7);
+
+ qp_attr->max_dest_rd_atomic =
+ 1 << ((be32_to_cpu(context.params2) >> 21) & 0x7);
+ qp_attr->min_rnr_timer =
+ (be32_to_cpu(context.rnr_nextrecvpsn) >> 24) & 0x1f;
+ qp_attr->timeout = context.pri_path.ackto >> 3;
+ qp_attr->retry_cnt = (be32_to_cpu(context.params1) >> 16) & 0x7;
+ qp_attr->rnr_retry = (be32_to_cpu(context.params1) >> 13) & 0x7;
+ qp_attr->alt_timeout = context.alt_path.ackto >> 3;
+
+done:
+ qp_attr->cur_qp_state = qp_attr->qp_state;
+ qp_attr->cap.max_recv_wr = qp->rq.wqe_cnt;
+ qp_attr->cap.max_recv_sge = qp->rq.max_gs;
+
+ if (!ibqp->uobject) {
+ qp_attr->cap.max_send_wr = qp->sq.wqe_cnt;
+ qp_attr->cap.max_send_sge = qp->sq.max_gs;
+ } else {
+ qp_attr->cap.max_send_wr = 0;
+ qp_attr->cap.max_send_sge = 0;
+ }
+
+ /*
+ * We don't support inline sends for kernel QPs (yet), and we
+ * don't know what userspace's value should be.
+ */
+ qp_attr->cap.max_inline_data = 0;
+
+ qp_init_attr->cap = qp_attr->cap;
+
+ qp_init_attr->create_flags = 0;
+ if (qp->flags & MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK)
+ qp_init_attr->create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK;
+
+ if (qp->flags & MLX4_IB_QP_LSO)
+ qp_init_attr->create_flags |= IB_QP_CREATE_IPOIB_UD_LSO;
+
+out:
+ mutex_unlock(&qp->mutex);
+ return err;
+}
+
diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c
new file mode 100644
index 0000000..d425652
--- /dev/null
+++ b/drivers/infiniband/hw/mlx4/srq.c
@@ -0,0 +1,353 @@
+/*
+ * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
+ * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/mlx4/qp.h>
+#include <linux/mlx4/srq.h>
+
+#include "mlx4_ib.h"
+#include "user.h"
+
+static void *get_wqe(struct mlx4_ib_srq *srq, int n)
+{
+ return mlx4_buf_offset(&srq->buf, n << srq->msrq.wqe_shift);
+}
+
+static void mlx4_ib_srq_event(struct mlx4_srq *srq, enum mlx4_event type)
+{
+ struct ib_event event;
+ struct ib_srq *ibsrq = &to_mibsrq(srq)->ibsrq;
+
+ if (ibsrq->event_handler) {
+ event.device = ibsrq->device;
+ event.element.srq = ibsrq;
+ switch (type) {
+ case MLX4_EVENT_TYPE_SRQ_LIMIT:
+ event.event = IB_EVENT_SRQ_LIMIT_REACHED;
+ break;
+ case MLX4_EVENT_TYPE_SRQ_CATAS_ERROR:
+ event.event = IB_EVENT_SRQ_ERR;
+ break;
+ default:
+ printk(KERN_WARNING "mlx4_ib: Unexpected event type %d "
+ "on SRQ %06x\n", type, srq->srqn);
+ return;
+ }
+
+ ibsrq->event_handler(&event, ibsrq->srq_context);
+ }
+}
+
+struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
+ struct ib_srq_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+ struct mlx4_ib_dev *dev = to_mdev(pd->device);
+ struct mlx4_ib_srq *srq;
+ struct mlx4_wqe_srq_next_seg *next;
+ int desc_size;
+ int buf_size;
+ int err;
+ int i;
+
+ /* Sanity check SRQ size before proceeding */
+ if (init_attr->attr.max_wr >= dev->dev->caps.max_srq_wqes ||
+ init_attr->attr.max_sge > dev->dev->caps.max_srq_sge)
+ return ERR_PTR(-EINVAL);
+
+ srq = kmalloc(sizeof *srq, GFP_KERNEL);
+ if (!srq)
+ return ERR_PTR(-ENOMEM);
+
+ mutex_init(&srq->mutex);
+ spin_lock_init(&srq->lock);
+ srq->msrq.max = roundup_pow_of_two(init_attr->attr.max_wr + 1);
+ srq->msrq.max_gs = init_attr->attr.max_sge;
+
+ desc_size = max(32UL,
+ roundup_pow_of_two(sizeof (struct mlx4_wqe_srq_next_seg) +
+ srq->msrq.max_gs *
+ sizeof (struct mlx4_wqe_data_seg)));
+ srq->msrq.wqe_shift = ilog2(desc_size);
+
+ buf_size = srq->msrq.max * desc_size;
+
+ if (pd->uobject) {
+ struct mlx4_ib_create_srq ucmd;
+
+ if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {
+ err = -EFAULT;
+ goto err_srq;
+ }
+
+ srq->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr,
+ buf_size, 0, 0);
+ if (IS_ERR(srq->umem)) {
+ err = PTR_ERR(srq->umem);
+ goto err_srq;
+ }
+
+ err = mlx4_mtt_init(dev->dev, ib_umem_page_count(srq->umem),
+ ilog2(srq->umem->page_size), &srq->mtt);
+ if (err)
+ goto err_buf;
+
+ err = mlx4_ib_umem_write_mtt(dev, &srq->mtt, srq->umem);
+ if (err)
+ goto err_mtt;
+
+ err = mlx4_ib_db_map_user(to_mucontext(pd->uobject->context),
+ ucmd.db_addr, &srq->db);
+ if (err)
+ goto err_mtt;
+ } else {
+ err = mlx4_db_alloc(dev->dev, &srq->db, 0);
+ if (err)
+ goto err_srq;
+
+ *srq->db.db = 0;
+
+ if (mlx4_buf_alloc(dev->dev, buf_size, PAGE_SIZE * 2, &srq->buf)) {
+ err = -ENOMEM;
+ goto err_db;
+ }
+
+ srq->head = 0;
+ srq->tail = srq->msrq.max - 1;
+ srq->wqe_ctr = 0;
+
+ for (i = 0; i < srq->msrq.max; ++i) {
+ next = get_wqe(srq, i);
+ next->next_wqe_index =
+ cpu_to_be16((i + 1) & (srq->msrq.max - 1));
+ }
+
+ err = mlx4_mtt_init(dev->dev, srq->buf.npages, srq->buf.page_shift,
+ &srq->mtt);
+ if (err)
+ goto err_buf;
+
+ err = mlx4_buf_write_mtt(dev->dev, &srq->mtt, &srq->buf);
+ if (err)
+ goto err_mtt;
+
+ srq->wrid = kmalloc(srq->msrq.max * sizeof (u64), GFP_KERNEL);
+ if (!srq->wrid) {
+ err = -ENOMEM;
+ goto err_mtt;
+ }
+ }
+
+ err = mlx4_srq_alloc(dev->dev, to_mpd(pd)->pdn, &srq->mtt,
+ srq->db.dma, &srq->msrq);
+ if (err)
+ goto err_wrid;
+
+ srq->msrq.event = mlx4_ib_srq_event;
+
+ if (pd->uobject)
+ if (ib_copy_to_udata(udata, &srq->msrq.srqn, sizeof (__u32))) {
+ err = -EFAULT;
+ goto err_wrid;
+ }
+
+ init_attr->attr.max_wr = srq->msrq.max - 1;
+
+ return &srq->ibsrq;
+
+err_wrid:
+ if (pd->uobject)
+ mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &srq->db);
+ else
+ kfree(srq->wrid);
+
+err_mtt:
+ mlx4_mtt_cleanup(dev->dev, &srq->mtt);
+
+err_buf:
+ if (pd->uobject)
+ ib_umem_release(srq->umem);
+ else
+ mlx4_buf_free(dev->dev, buf_size, &srq->buf);
+
+err_db:
+ if (!pd->uobject)
+ mlx4_db_free(dev->dev, &srq->db);
+
+err_srq:
+ kfree(srq);
+
+ return ERR_PTR(err);
+}
+
+int mlx4_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
+ enum ib_srq_attr_mask attr_mask, struct ib_udata *udata)
+{
+ struct mlx4_ib_dev *dev = to_mdev(ibsrq->device);
+ struct mlx4_ib_srq *srq = to_msrq(ibsrq);
+ int ret;
+
+ /* We don't support resizing SRQs (yet?) */
+ if (attr_mask & IB_SRQ_MAX_WR)
+ return -EINVAL;
+
+ if (attr_mask & IB_SRQ_LIMIT) {
+ if (attr->srq_limit >= srq->msrq.max)
+ return -EINVAL;
+
+ mutex_lock(&srq->mutex);
+ ret = mlx4_srq_arm(dev->dev, &srq->msrq, attr->srq_limit);
+ mutex_unlock(&srq->mutex);
+
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+int mlx4_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
+{
+ struct mlx4_ib_dev *dev = to_mdev(ibsrq->device);
+ struct mlx4_ib_srq *srq = to_msrq(ibsrq);
+ int ret;
+ int limit_watermark;
+
+ ret = mlx4_srq_query(dev->dev, &srq->msrq, &limit_watermark);
+ if (ret)
+ return ret;
+
+ srq_attr->srq_limit = limit_watermark;
+ srq_attr->max_wr = srq->msrq.max - 1;
+ srq_attr->max_sge = srq->msrq.max_gs;
+
+ return 0;
+}
+
+int mlx4_ib_destroy_srq(struct ib_srq *srq)
+{
+ struct mlx4_ib_dev *dev = to_mdev(srq->device);
+ struct mlx4_ib_srq *msrq = to_msrq(srq);
+
+ mlx4_srq_free(dev->dev, &msrq->msrq);
+ mlx4_mtt_cleanup(dev->dev, &msrq->mtt);
+
+ if (srq->uobject) {
+ mlx4_ib_db_unmap_user(to_mucontext(srq->uobject->context), &msrq->db);
+ ib_umem_release(msrq->umem);
+ } else {
+ kfree(msrq->wrid);
+ mlx4_buf_free(dev->dev, msrq->msrq.max << msrq->msrq.wqe_shift,
+ &msrq->buf);
+ mlx4_db_free(dev->dev, &msrq->db);
+ }
+
+ kfree(msrq);
+
+ return 0;
+}
+
+void mlx4_ib_free_srq_wqe(struct mlx4_ib_srq *srq, int wqe_index)
+{
+ struct mlx4_wqe_srq_next_seg *next;
+
+ /* always called with interrupts disabled. */
+ spin_lock(&srq->lock);
+
+ next = get_wqe(srq, srq->tail);
+ next->next_wqe_index = cpu_to_be16(wqe_index);
+ srq->tail = wqe_index;
+
+ spin_unlock(&srq->lock);
+}
+
+int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
+ struct ib_recv_wr **bad_wr)
+{
+ struct mlx4_ib_srq *srq = to_msrq(ibsrq);
+ struct mlx4_wqe_srq_next_seg *next;
+ struct mlx4_wqe_data_seg *scat;
+ unsigned long flags;
+ int err = 0;
+ int nreq;
+ int i;
+
+ spin_lock_irqsave(&srq->lock, flags);
+
+ for (nreq = 0; wr; ++nreq, wr = wr->next) {
+ if (unlikely(wr->num_sge > srq->msrq.max_gs)) {
+ err = -EINVAL;
+ *bad_wr = wr;
+ break;
+ }
+
+ if (unlikely(srq->head == srq->tail)) {
+ err = -ENOMEM;
+ *bad_wr = wr;
+ break;
+ }
+
+ srq->wrid[srq->head] = wr->wr_id;
+
+ next = get_wqe(srq, srq->head);
+ srq->head = be16_to_cpu(next->next_wqe_index);
+ scat = (struct mlx4_wqe_data_seg *) (next + 1);
+
+ for (i = 0; i < wr->num_sge; ++i) {
+ scat[i].byte_count = cpu_to_be32(wr->sg_list[i].length);
+ scat[i].lkey = cpu_to_be32(wr->sg_list[i].lkey);
+ scat[i].addr = cpu_to_be64(wr->sg_list[i].addr);
+ }
+
+ if (i < srq->msrq.max_gs) {
+ scat[i].byte_count = 0;
+ scat[i].lkey = cpu_to_be32(MLX4_INVALID_LKEY);
+ scat[i].addr = 0;
+ }
+ }
+
+ if (likely(nreq)) {
+ srq->wqe_ctr += nreq;
+
+ /*
+ * Make sure that descriptors are written before
+ * doorbell record.
+ */
+ wmb();
+
+ *srq->db.db = cpu_to_be32(srq->wqe_ctr);
+ }
+
+ spin_unlock_irqrestore(&srq->lock, flags);
+
+ return err;
+}
diff --git a/drivers/infiniband/hw/mlx4/user.h b/drivers/infiniband/hw/mlx4/user.h
new file mode 100644
index 0000000..13beede
--- /dev/null
+++ b/drivers/infiniband/hw/mlx4/user.h
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
+ * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef MLX4_IB_USER_H
+#define MLX4_IB_USER_H
+
+#include <linux/types.h>
+
+/*
+ * Increment this value if any changes that break userspace ABI
+ * compatibility are made.
+ */
+#define MLX4_IB_UVERBS_ABI_VERSION 3
+
+/*
+ * Make sure that all structs defined in this file remain laid out so
+ * that they pack the same way on 32-bit and 64-bit architectures (to
+ * avoid incompatibility between 32-bit userspace and 64-bit kernels).
+ * In particular do not use pointer types -- pass pointers in __u64
+ * instead.
+ */
+
+struct mlx4_ib_alloc_ucontext_resp {
+ __u32 qp_tab_size;
+ __u16 bf_reg_size;
+ __u16 bf_regs_per_page;
+};
+
+struct mlx4_ib_alloc_pd_resp {
+ __u32 pdn;
+ __u32 reserved;
+};
+
+struct mlx4_ib_create_cq {
+ __u64 buf_addr;
+ __u64 db_addr;
+};
+
+struct mlx4_ib_create_cq_resp {
+ __u32 cqn;
+ __u32 reserved;
+};
+
+struct mlx4_ib_resize_cq {
+ __u64 buf_addr;
+};
+
+struct mlx4_ib_create_srq {
+ __u64 buf_addr;
+ __u64 db_addr;
+};
+
+struct mlx4_ib_create_srq_resp {
+ __u32 srqn;
+ __u32 reserved;
+};
+
+struct mlx4_ib_create_qp {
+ __u64 buf_addr;
+ __u64 db_addr;
+ __u8 log_sq_bb_count;
+ __u8 log_sq_stride;
+ __u8 sq_no_prefetch;
+ __u8 reserved[5];
+};
+
+#endif /* MLX4_IB_USER_H */
diff --git a/drivers/infiniband/hw/mthca/Kconfig b/drivers/infiniband/hw/mthca/Kconfig
new file mode 100644
index 0000000..03efc07
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/Kconfig
@@ -0,0 +1,17 @@
+config INFINIBAND_MTHCA
+ tristate "Mellanox HCA support"
+ depends on PCI
+ ---help---
+ This is a low-level driver for Mellanox InfiniHost host
+ channel adapters (HCAs), including the MT23108 PCI-X HCA
+ ("Tavor") and the MT25208 PCI Express HCA ("Arbel").
+
+config INFINIBAND_MTHCA_DEBUG
+ bool "Verbose debugging output" if EMBEDDED
+ depends on INFINIBAND_MTHCA
+ default y
+ ---help---
+ This option causes debugging code to be compiled into the
+ mthca driver. The output can be turned on via the
+ debug_level module parameter (which can also be set after
+ the driver is loaded through sysfs).
diff --git a/drivers/infiniband/hw/mthca/Makefile b/drivers/infiniband/hw/mthca/Makefile
new file mode 100644
index 0000000..e388d95
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/Makefile
@@ -0,0 +1,7 @@
+obj-$(CONFIG_INFINIBAND_MTHCA) += ib_mthca.o
+
+ib_mthca-y := mthca_main.o mthca_cmd.o mthca_profile.o mthca_reset.o \
+ mthca_allocator.o mthca_eq.o mthca_pd.o mthca_cq.o \
+ mthca_mr.o mthca_qp.o mthca_av.o mthca_mcg.o mthca_mad.o \
+ mthca_provider.o mthca_memfree.o mthca_uar.o mthca_srq.o \
+ mthca_catas.o
diff --git a/drivers/infiniband/hw/mthca/mthca_allocator.c b/drivers/infiniband/hw/mthca/mthca_allocator.c
new file mode 100644
index 0000000..c5ccc2d
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_allocator.c
@@ -0,0 +1,301 @@
+/*
+ * Copyright (c) 2004 Topspin Communications. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/bitmap.h>
+
+#include "mthca_dev.h"
+
+/* Trivial bitmap-based allocator */
+u32 mthca_alloc(struct mthca_alloc *alloc)
+{
+ unsigned long flags;
+ u32 obj;
+
+ spin_lock_irqsave(&alloc->lock, flags);
+
+ obj = find_next_zero_bit(alloc->table, alloc->max, alloc->last);
+ if (obj >= alloc->max) {
+ alloc->top = (alloc->top + alloc->max) & alloc->mask;
+ obj = find_first_zero_bit(alloc->table, alloc->max);
+ }
+
+ if (obj < alloc->max) {
+ set_bit(obj, alloc->table);
+ obj |= alloc->top;
+ } else
+ obj = -1;
+
+ spin_unlock_irqrestore(&alloc->lock, flags);
+
+ return obj;
+}
+
+void mthca_free(struct mthca_alloc *alloc, u32 obj)
+{
+ unsigned long flags;
+
+ obj &= alloc->max - 1;
+
+ spin_lock_irqsave(&alloc->lock, flags);
+
+ clear_bit(obj, alloc->table);
+ alloc->last = min(alloc->last, obj);
+ alloc->top = (alloc->top + alloc->max) & alloc->mask;
+
+ spin_unlock_irqrestore(&alloc->lock, flags);
+}
+
+int mthca_alloc_init(struct mthca_alloc *alloc, u32 num, u32 mask,
+ u32 reserved)
+{
+ int i;
+
+ /* num must be a power of 2 */
+ if (num != 1 << (ffs(num) - 1))
+ return -EINVAL;
+
+ alloc->last = 0;
+ alloc->top = 0;
+ alloc->max = num;
+ alloc->mask = mask;
+ spin_lock_init(&alloc->lock);
+ alloc->table = kmalloc(BITS_TO_LONGS(num) * sizeof (long),
+ GFP_KERNEL);
+ if (!alloc->table)
+ return -ENOMEM;
+
+ bitmap_zero(alloc->table, num);
+ for (i = 0; i < reserved; ++i)
+ set_bit(i, alloc->table);
+
+ return 0;
+}
+
+void mthca_alloc_cleanup(struct mthca_alloc *alloc)
+{
+ kfree(alloc->table);
+}
+
+/*
+ * Array of pointers with lazy allocation of leaf pages. Callers of
+ * _get, _set and _clear methods must use a lock or otherwise
+ * serialize access to the array.
+ */
+
+#define MTHCA_ARRAY_MASK (PAGE_SIZE / sizeof (void *) - 1)
+
+void *mthca_array_get(struct mthca_array *array, int index)
+{
+ int p = (index * sizeof (void *)) >> PAGE_SHIFT;
+
+ if (array->page_list[p].page)
+ return array->page_list[p].page[index & MTHCA_ARRAY_MASK];
+ else
+ return NULL;
+}
+
+int mthca_array_set(struct mthca_array *array, int index, void *value)
+{
+ int p = (index * sizeof (void *)) >> PAGE_SHIFT;
+
+ /* Allocate with GFP_ATOMIC because we'll be called with locks held. */
+ if (!array->page_list[p].page)
+ array->page_list[p].page = (void **) get_zeroed_page(GFP_ATOMIC);
+
+ if (!array->page_list[p].page)
+ return -ENOMEM;
+
+ array->page_list[p].page[index & MTHCA_ARRAY_MASK] = value;
+ ++array->page_list[p].used;
+
+ return 0;
+}
+
+void mthca_array_clear(struct mthca_array *array, int index)
+{
+ int p = (index * sizeof (void *)) >> PAGE_SHIFT;
+
+ if (--array->page_list[p].used == 0) {
+ free_page((unsigned long) array->page_list[p].page);
+ array->page_list[p].page = NULL;
+ } else
+ array->page_list[p].page[index & MTHCA_ARRAY_MASK] = NULL;
+
+ if (array->page_list[p].used < 0)
+ pr_debug("Array %p index %d page %d with ref count %d < 0\n",
+ array, index, p, array->page_list[p].used);
+}
+
+int mthca_array_init(struct mthca_array *array, int nent)
+{
+ int npage = (nent * sizeof (void *) + PAGE_SIZE - 1) / PAGE_SIZE;
+ int i;
+
+ array->page_list = kmalloc(npage * sizeof *array->page_list, GFP_KERNEL);
+ if (!array->page_list)
+ return -ENOMEM;
+
+ for (i = 0; i < npage; ++i) {
+ array->page_list[i].page = NULL;
+ array->page_list[i].used = 0;
+ }
+
+ return 0;
+}
+
+void mthca_array_cleanup(struct mthca_array *array, int nent)
+{
+ int i;
+
+ for (i = 0; i < (nent * sizeof (void *) + PAGE_SIZE - 1) / PAGE_SIZE; ++i)
+ free_page((unsigned long) array->page_list[i].page);
+
+ kfree(array->page_list);
+}
+
+/*
+ * Handling for queue buffers -- we allocate a bunch of memory and
+ * register it in a memory region at HCA virtual address 0. If the
+ * requested size is > max_direct, we split the allocation into
+ * multiple pages, so we don't require too much contiguous memory.
+ */
+
+int mthca_buf_alloc(struct mthca_dev *dev, int size, int max_direct,
+ union mthca_buf *buf, int *is_direct, struct mthca_pd *pd,
+ int hca_write, struct mthca_mr *mr)
+{
+ int err = -ENOMEM;
+ int npages, shift;
+ u64 *dma_list = NULL;
+ dma_addr_t t;
+ int i;
+
+ if (size <= max_direct) {
+ *is_direct = 1;
+ npages = 1;
+ shift = get_order(size) + PAGE_SHIFT;
+
+ buf->direct.buf = dma_alloc_coherent(&dev->pdev->dev,
+ size, &t, GFP_KERNEL);
+ if (!buf->direct.buf)
+ return -ENOMEM;
+
+ pci_unmap_addr_set(&buf->direct, mapping, t);
+
+ memset(buf->direct.buf, 0, size);
+
+ while (t & ((1 << shift) - 1)) {
+ --shift;
+ npages *= 2;
+ }
+
+ dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL);
+ if (!dma_list)
+ goto err_free;
+
+ for (i = 0; i < npages; ++i)
+ dma_list[i] = t + i * (1 << shift);
+ } else {
+ *is_direct = 0;
+ npages = (size + PAGE_SIZE - 1) / PAGE_SIZE;
+ shift = PAGE_SHIFT;
+
+ dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL);
+ if (!dma_list)
+ return -ENOMEM;
+
+ buf->page_list = kmalloc(npages * sizeof *buf->page_list,
+ GFP_KERNEL);
+ if (!buf->page_list)
+ goto err_out;
+
+ for (i = 0; i < npages; ++i)
+ buf->page_list[i].buf = NULL;
+
+ for (i = 0; i < npages; ++i) {
+ buf->page_list[i].buf =
+ dma_alloc_coherent(&dev->pdev->dev, PAGE_SIZE,
+ &t, GFP_KERNEL);
+ if (!buf->page_list[i].buf)
+ goto err_free;
+
+ dma_list[i] = t;
+ pci_unmap_addr_set(&buf->page_list[i], mapping, t);
+
+ clear_page(buf->page_list[i].buf);
+ }
+ }
+
+ err = mthca_mr_alloc_phys(dev, pd->pd_num,
+ dma_list, shift, npages,
+ 0, size,
+ MTHCA_MPT_FLAG_LOCAL_READ |
+ (hca_write ? MTHCA_MPT_FLAG_LOCAL_WRITE : 0),
+ mr);
+ if (err)
+ goto err_free;
+
+ kfree(dma_list);
+
+ return 0;
+
+err_free:
+ mthca_buf_free(dev, size, buf, *is_direct, NULL);
+
+err_out:
+ kfree(dma_list);
+
+ return err;
+}
+
+void mthca_buf_free(struct mthca_dev *dev, int size, union mthca_buf *buf,
+ int is_direct, struct mthca_mr *mr)
+{
+ int i;
+
+ if (mr)
+ mthca_free_mr(dev, mr);
+
+ if (is_direct)
+ dma_free_coherent(&dev->pdev->dev, size, buf->direct.buf,
+ pci_unmap_addr(&buf->direct, mapping));
+ else {
+ for (i = 0; i < (size + PAGE_SIZE - 1) / PAGE_SIZE; ++i)
+ dma_free_coherent(&dev->pdev->dev, PAGE_SIZE,
+ buf->page_list[i].buf,
+ pci_unmap_addr(&buf->page_list[i],
+ mapping));
+ kfree(buf->page_list);
+ }
+}
diff --git a/drivers/infiniband/hw/mthca/mthca_av.c b/drivers/infiniband/hw/mthca/mthca_av.c
new file mode 100644
index 0000000..32f6c63
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_av.c
@@ -0,0 +1,374 @@
+/*
+ * Copyright (c) 2004 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/string.h>
+#include <linux/slab.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_cache.h>
+
+#include "mthca_dev.h"
+
+enum {
+ MTHCA_RATE_TAVOR_FULL = 0,
+ MTHCA_RATE_TAVOR_1X = 1,
+ MTHCA_RATE_TAVOR_4X = 2,
+ MTHCA_RATE_TAVOR_1X_DDR = 3
+};
+
+enum {
+ MTHCA_RATE_MEMFREE_FULL = 0,
+ MTHCA_RATE_MEMFREE_QUARTER = 1,
+ MTHCA_RATE_MEMFREE_EIGHTH = 2,
+ MTHCA_RATE_MEMFREE_HALF = 3
+};
+
+struct mthca_av {
+ __be32 port_pd;
+ u8 reserved1;
+ u8 g_slid;
+ __be16 dlid;
+ u8 reserved2;
+ u8 gid_index;
+ u8 msg_sr;
+ u8 hop_limit;
+ __be32 sl_tclass_flowlabel;
+ __be32 dgid[4];
+};
+
+static enum ib_rate memfree_rate_to_ib(u8 mthca_rate, u8 port_rate)
+{
+ switch (mthca_rate) {
+ case MTHCA_RATE_MEMFREE_EIGHTH:
+ return mult_to_ib_rate(port_rate >> 3);
+ case MTHCA_RATE_MEMFREE_QUARTER:
+ return mult_to_ib_rate(port_rate >> 2);
+ case MTHCA_RATE_MEMFREE_HALF:
+ return mult_to_ib_rate(port_rate >> 1);
+ case MTHCA_RATE_MEMFREE_FULL:
+ default:
+ return mult_to_ib_rate(port_rate);
+ }
+}
+
+static enum ib_rate tavor_rate_to_ib(u8 mthca_rate, u8 port_rate)
+{
+ switch (mthca_rate) {
+ case MTHCA_RATE_TAVOR_1X: return IB_RATE_2_5_GBPS;
+ case MTHCA_RATE_TAVOR_1X_DDR: return IB_RATE_5_GBPS;
+ case MTHCA_RATE_TAVOR_4X: return IB_RATE_10_GBPS;
+ default: return mult_to_ib_rate(port_rate);
+ }
+}
+
+enum ib_rate mthca_rate_to_ib(struct mthca_dev *dev, u8 mthca_rate, u8 port)
+{
+ if (mthca_is_memfree(dev)) {
+ /* Handle old Arbel FW */
+ if (dev->limits.stat_rate_support == 0x3 && mthca_rate)
+ return IB_RATE_2_5_GBPS;
+
+ return memfree_rate_to_ib(mthca_rate, dev->rate[port - 1]);
+ } else
+ return tavor_rate_to_ib(mthca_rate, dev->rate[port - 1]);
+}
+
+static u8 ib_rate_to_memfree(u8 req_rate, u8 cur_rate)
+{
+ if (cur_rate <= req_rate)
+ return 0;
+
+ /*
+ * Inter-packet delay (IPD) to get from rate X down to a rate
+ * no more than Y is (X - 1) / Y.
+ */
+ switch ((cur_rate - 1) / req_rate) {
+ case 0: return MTHCA_RATE_MEMFREE_FULL;
+ case 1: return MTHCA_RATE_MEMFREE_HALF;
+ case 2: /* fall through */
+ case 3: return MTHCA_RATE_MEMFREE_QUARTER;
+ default: return MTHCA_RATE_MEMFREE_EIGHTH;
+ }
+}
+
+static u8 ib_rate_to_tavor(u8 static_rate)
+{
+ switch (static_rate) {
+ case IB_RATE_2_5_GBPS: return MTHCA_RATE_TAVOR_1X;
+ case IB_RATE_5_GBPS: return MTHCA_RATE_TAVOR_1X_DDR;
+ case IB_RATE_10_GBPS: return MTHCA_RATE_TAVOR_4X;
+ default: return MTHCA_RATE_TAVOR_FULL;
+ }
+}
+
+u8 mthca_get_rate(struct mthca_dev *dev, int static_rate, u8 port)
+{
+ u8 rate;
+
+ if (!static_rate || ib_rate_to_mult(static_rate) >= dev->rate[port - 1])
+ return 0;
+
+ if (mthca_is_memfree(dev))
+ rate = ib_rate_to_memfree(ib_rate_to_mult(static_rate),
+ dev->rate[port - 1]);
+ else
+ rate = ib_rate_to_tavor(static_rate);
+
+ if (!(dev->limits.stat_rate_support & (1 << rate)))
+ rate = 1;
+
+ return rate;
+}
+
+int mthca_create_ah(struct mthca_dev *dev,
+ struct mthca_pd *pd,
+ struct ib_ah_attr *ah_attr,
+ struct mthca_ah *ah)
+{
+ u32 index = -1;
+ struct mthca_av *av = NULL;
+
+ ah->type = MTHCA_AH_PCI_POOL;
+
+ if (mthca_is_memfree(dev)) {
+ ah->av = kmalloc(sizeof *ah->av, GFP_ATOMIC);
+ if (!ah->av)
+ return -ENOMEM;
+
+ ah->type = MTHCA_AH_KMALLOC;
+ av = ah->av;
+ } else if (!atomic_read(&pd->sqp_count) &&
+ !(dev->mthca_flags & MTHCA_FLAG_DDR_HIDDEN)) {
+ index = mthca_alloc(&dev->av_table.alloc);
+
+ /* fall back to allocate in host memory */
+ if (index == -1)
+ goto on_hca_fail;
+
+ av = kmalloc(sizeof *av, GFP_ATOMIC);
+ if (!av)
+ goto on_hca_fail;
+
+ ah->type = MTHCA_AH_ON_HCA;
+ ah->avdma = dev->av_table.ddr_av_base +
+ index * MTHCA_AV_SIZE;
+ }
+
+on_hca_fail:
+ if (ah->type == MTHCA_AH_PCI_POOL) {
+ ah->av = pci_pool_alloc(dev->av_table.pool,
+ GFP_ATOMIC, &ah->avdma);
+ if (!ah->av)
+ return -ENOMEM;
+
+ av = ah->av;
+ }
+
+ ah->key = pd->ntmr.ibmr.lkey;
+
+ memset(av, 0, MTHCA_AV_SIZE);
+
+ av->port_pd = cpu_to_be32(pd->pd_num | (ah_attr->port_num << 24));
+ av->g_slid = ah_attr->src_path_bits;
+ av->dlid = cpu_to_be16(ah_attr->dlid);
+ av->msg_sr = (3 << 4) | /* 2K message */
+ mthca_get_rate(dev, ah_attr->static_rate, ah_attr->port_num);
+ av->sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28);
+ if (ah_attr->ah_flags & IB_AH_GRH) {
+ av->g_slid |= 0x80;
+ av->gid_index = (ah_attr->port_num - 1) * dev->limits.gid_table_len +
+ ah_attr->grh.sgid_index;
+ av->hop_limit = ah_attr->grh.hop_limit;
+ av->sl_tclass_flowlabel |=
+ cpu_to_be32((ah_attr->grh.traffic_class << 20) |
+ ah_attr->grh.flow_label);
+ memcpy(av->dgid, ah_attr->grh.dgid.raw, 16);
+ } else {
+ /* Arbel workaround -- low byte of GID must be 2 */
+ av->dgid[3] = cpu_to_be32(2);
+ }
+
+ if (0) {
+ int j;
+
+ mthca_dbg(dev, "Created UDAV at %p/%08lx:\n",
+ av, (unsigned long) ah->avdma);
+ for (j = 0; j < 8; ++j)
+ printk(KERN_DEBUG " [%2x] %08x\n",
+ j * 4, be32_to_cpu(((__be32 *) av)[j]));
+ }
+
+ if (ah->type == MTHCA_AH_ON_HCA) {
+ memcpy_toio(dev->av_table.av_map + index * MTHCA_AV_SIZE,
+ av, MTHCA_AV_SIZE);
+ kfree(av);
+ }
+
+ return 0;
+}
+
+int mthca_destroy_ah(struct mthca_dev *dev, struct mthca_ah *ah)
+{
+ switch (ah->type) {
+ case MTHCA_AH_ON_HCA:
+ mthca_free(&dev->av_table.alloc,
+ (ah->avdma - dev->av_table.ddr_av_base) /
+ MTHCA_AV_SIZE);
+ break;
+
+ case MTHCA_AH_PCI_POOL:
+ pci_pool_free(dev->av_table.pool, ah->av, ah->avdma);
+ break;
+
+ case MTHCA_AH_KMALLOC:
+ kfree(ah->av);
+ break;
+ }
+
+ return 0;
+}
+
+int mthca_ah_grh_present(struct mthca_ah *ah)
+{
+ return !!(ah->av->g_slid & 0x80);
+}
+
+int mthca_read_ah(struct mthca_dev *dev, struct mthca_ah *ah,
+ struct ib_ud_header *header)
+{
+ if (ah->type == MTHCA_AH_ON_HCA)
+ return -EINVAL;
+
+ header->lrh.service_level = be32_to_cpu(ah->av->sl_tclass_flowlabel) >> 28;
+ header->lrh.destination_lid = ah->av->dlid;
+ header->lrh.source_lid = cpu_to_be16(ah->av->g_slid & 0x7f);
+ if (mthca_ah_grh_present(ah)) {
+ header->grh.traffic_class =
+ (be32_to_cpu(ah->av->sl_tclass_flowlabel) >> 20) & 0xff;
+ header->grh.flow_label =
+ ah->av->sl_tclass_flowlabel & cpu_to_be32(0xfffff);
+ header->grh.hop_limit = ah->av->hop_limit;
+ ib_get_cached_gid(&dev->ib_dev,
+ be32_to_cpu(ah->av->port_pd) >> 24,
+ ah->av->gid_index % dev->limits.gid_table_len,
+ &header->grh.source_gid);
+ memcpy(header->grh.destination_gid.raw,
+ ah->av->dgid, 16);
+ }
+
+ return 0;
+}
+
+int mthca_ah_query(struct ib_ah *ibah, struct ib_ah_attr *attr)
+{
+ struct mthca_ah *ah = to_mah(ibah);
+ struct mthca_dev *dev = to_mdev(ibah->device);
+
+ /* Only implement for MAD and memfree ah for now. */
+ if (ah->type == MTHCA_AH_ON_HCA)
+ return -ENOSYS;
+
+ memset(attr, 0, sizeof *attr);
+ attr->dlid = be16_to_cpu(ah->av->dlid);
+ attr->sl = be32_to_cpu(ah->av->sl_tclass_flowlabel) >> 28;
+ attr->port_num = be32_to_cpu(ah->av->port_pd) >> 24;
+ attr->static_rate = mthca_rate_to_ib(dev, ah->av->msg_sr & 0x7,
+ attr->port_num);
+ attr->src_path_bits = ah->av->g_slid & 0x7F;
+ attr->ah_flags = mthca_ah_grh_present(ah) ? IB_AH_GRH : 0;
+
+ if (attr->ah_flags) {
+ attr->grh.traffic_class =
+ be32_to_cpu(ah->av->sl_tclass_flowlabel) >> 20;
+ attr->grh.flow_label =
+ be32_to_cpu(ah->av->sl_tclass_flowlabel) & 0xfffff;
+ attr->grh.hop_limit = ah->av->hop_limit;
+ attr->grh.sgid_index = ah->av->gid_index &
+ (dev->limits.gid_table_len - 1);
+ memcpy(attr->grh.dgid.raw, ah->av->dgid, 16);
+ }
+
+ return 0;
+}
+
+int mthca_init_av_table(struct mthca_dev *dev)
+{
+ int err;
+
+ if (mthca_is_memfree(dev))
+ return 0;
+
+ err = mthca_alloc_init(&dev->av_table.alloc,
+ dev->av_table.num_ddr_avs,
+ dev->av_table.num_ddr_avs - 1,
+ 0);
+ if (err)
+ return err;
+
+ dev->av_table.pool = pci_pool_create("mthca_av", dev->pdev,
+ MTHCA_AV_SIZE,
+ MTHCA_AV_SIZE, 0);
+ if (!dev->av_table.pool)
+ goto out_free_alloc;
+
+ if (!(dev->mthca_flags & MTHCA_FLAG_DDR_HIDDEN)) {
+ dev->av_table.av_map = ioremap(pci_resource_start(dev->pdev, 4) +
+ dev->av_table.ddr_av_base -
+ dev->ddr_start,
+ dev->av_table.num_ddr_avs *
+ MTHCA_AV_SIZE);
+ if (!dev->av_table.av_map)
+ goto out_free_pool;
+ } else
+ dev->av_table.av_map = NULL;
+
+ return 0;
+
+ out_free_pool:
+ pci_pool_destroy(dev->av_table.pool);
+
+ out_free_alloc:
+ mthca_alloc_cleanup(&dev->av_table.alloc);
+ return -ENOMEM;
+}
+
+void mthca_cleanup_av_table(struct mthca_dev *dev)
+{
+ if (mthca_is_memfree(dev))
+ return;
+
+ if (dev->av_table.av_map)
+ iounmap(dev->av_table.av_map);
+ pci_pool_destroy(dev->av_table.pool);
+ mthca_alloc_cleanup(&dev->av_table.alloc);
+}
diff --git a/drivers/infiniband/hw/mthca/mthca_catas.c b/drivers/infiniband/hw/mthca/mthca_catas.c
new file mode 100644
index 0000000..65ad359
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_catas.c
@@ -0,0 +1,192 @@
+/*
+ * Copyright (c) 2005 Cisco Systems. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/jiffies.h>
+#include <linux/timer.h>
+#include <linux/workqueue.h>
+
+#include "mthca_dev.h"
+
+enum {
+ MTHCA_CATAS_POLL_INTERVAL = 5 * HZ,
+
+ MTHCA_CATAS_TYPE_INTERNAL = 0,
+ MTHCA_CATAS_TYPE_UPLINK = 3,
+ MTHCA_CATAS_TYPE_DDR = 4,
+ MTHCA_CATAS_TYPE_PARITY = 5,
+};
+
+static DEFINE_SPINLOCK(catas_lock);
+
+static LIST_HEAD(catas_list);
+static struct workqueue_struct *catas_wq;
+static struct work_struct catas_work;
+
+static int catas_reset_disable;
+module_param_named(catas_reset_disable, catas_reset_disable, int, 0644);
+MODULE_PARM_DESC(catas_reset_disable, "disable reset on catastrophic event if nonzero");
+
+static void catas_reset(struct work_struct *work)
+{
+ struct mthca_dev *dev, *tmpdev;
+ LIST_HEAD(tlist);
+ int ret;
+
+ mutex_lock(&mthca_device_mutex);
+
+ spin_lock_irq(&catas_lock);
+ list_splice_init(&catas_list, &tlist);
+ spin_unlock_irq(&catas_lock);
+
+ list_for_each_entry_safe(dev, tmpdev, &tlist, catas_err.list) {
+ ret = __mthca_restart_one(dev->pdev);
+ if (ret)
+ mthca_err(dev, "Reset failed (%d)\n", ret);
+ else
+ mthca_dbg(dev, "Reset succeeded\n");
+ }
+
+ mutex_unlock(&mthca_device_mutex);
+}
+
+static void handle_catas(struct mthca_dev *dev)
+{
+ struct ib_event event;
+ unsigned long flags;
+ const char *type;
+ int i;
+
+ event.device = &dev->ib_dev;
+ event.event = IB_EVENT_DEVICE_FATAL;
+ event.element.port_num = 0;
+
+ ib_dispatch_event(&event);
+
+ switch (swab32(readl(dev->catas_err.map)) >> 24) {
+ case MTHCA_CATAS_TYPE_INTERNAL:
+ type = "internal error";
+ break;
+ case MTHCA_CATAS_TYPE_UPLINK:
+ type = "uplink bus error";
+ break;
+ case MTHCA_CATAS_TYPE_DDR:
+ type = "DDR data error";
+ break;
+ case MTHCA_CATAS_TYPE_PARITY:
+ type = "internal parity error";
+ break;
+ default:
+ type = "unknown error";
+ break;
+ }
+
+ mthca_err(dev, "Catastrophic error detected: %s\n", type);
+ for (i = 0; i < dev->catas_err.size; ++i)
+ mthca_err(dev, " buf[%02x]: %08x\n",
+ i, swab32(readl(dev->catas_err.map + i)));
+
+ if (catas_reset_disable)
+ return;
+
+ spin_lock_irqsave(&catas_lock, flags);
+ list_add(&dev->catas_err.list, &catas_list);
+ queue_work(catas_wq, &catas_work);
+ spin_unlock_irqrestore(&catas_lock, flags);
+}
+
+static void poll_catas(unsigned long dev_ptr)
+{
+ struct mthca_dev *dev = (struct mthca_dev *) dev_ptr;
+ int i;
+
+ for (i = 0; i < dev->catas_err.size; ++i)
+ if (readl(dev->catas_err.map + i)) {
+ handle_catas(dev);
+ return;
+ }
+
+ mod_timer(&dev->catas_err.timer,
+ round_jiffies(jiffies + MTHCA_CATAS_POLL_INTERVAL));
+}
+
+void mthca_start_catas_poll(struct mthca_dev *dev)
+{
+ unsigned long addr;
+
+ init_timer(&dev->catas_err.timer);
+ dev->catas_err.map = NULL;
+
+ addr = pci_resource_start(dev->pdev, 0) +
+ ((pci_resource_len(dev->pdev, 0) - 1) &
+ dev->catas_err.addr);
+
+ dev->catas_err.map = ioremap(addr, dev->catas_err.size * 4);
+ if (!dev->catas_err.map) {
+ mthca_warn(dev, "couldn't map catastrophic error region "
+ "at 0x%lx/0x%x\n", addr, dev->catas_err.size * 4);
+ return;
+ }
+
+ dev->catas_err.timer.data = (unsigned long) dev;
+ dev->catas_err.timer.function = poll_catas;
+ dev->catas_err.timer.expires = jiffies + MTHCA_CATAS_POLL_INTERVAL;
+ INIT_LIST_HEAD(&dev->catas_err.list);
+ add_timer(&dev->catas_err.timer);
+}
+
+void mthca_stop_catas_poll(struct mthca_dev *dev)
+{
+ del_timer_sync(&dev->catas_err.timer);
+
+ if (dev->catas_err.map)
+ iounmap(dev->catas_err.map);
+
+ spin_lock_irq(&catas_lock);
+ list_del(&dev->catas_err.list);
+ spin_unlock_irq(&catas_lock);
+}
+
+int __init mthca_catas_init(void)
+{
+ INIT_WORK(&catas_work, catas_reset);
+
+ catas_wq = create_singlethread_workqueue("mthca_catas");
+ if (!catas_wq)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void mthca_catas_cleanup(void)
+{
+ destroy_workqueue(catas_wq);
+}
diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c
new file mode 100644
index 0000000..c33e1c5
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_cmd.c
@@ -0,0 +1,1937 @@
+/*
+ * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/completion.h>
+#include <linux/pci.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <asm/io.h>
+#include <rdma/ib_mad.h>
+
+#include "mthca_dev.h"
+#include "mthca_config_reg.h"
+#include "mthca_cmd.h"
+#include "mthca_memfree.h"
+
+#define CMD_POLL_TOKEN 0xffff
+
+enum {
+ HCR_IN_PARAM_OFFSET = 0x00,
+ HCR_IN_MODIFIER_OFFSET = 0x08,
+ HCR_OUT_PARAM_OFFSET = 0x0c,
+ HCR_TOKEN_OFFSET = 0x14,
+ HCR_STATUS_OFFSET = 0x18,
+
+ HCR_OPMOD_SHIFT = 12,
+ HCA_E_BIT = 22,
+ HCR_GO_BIT = 23
+};
+
+enum {
+ /* initialization and general commands */
+ CMD_SYS_EN = 0x1,
+ CMD_SYS_DIS = 0x2,
+ CMD_MAP_FA = 0xfff,
+ CMD_UNMAP_FA = 0xffe,
+ CMD_RUN_FW = 0xff6,
+ CMD_MOD_STAT_CFG = 0x34,
+ CMD_QUERY_DEV_LIM = 0x3,
+ CMD_QUERY_FW = 0x4,
+ CMD_ENABLE_LAM = 0xff8,
+ CMD_DISABLE_LAM = 0xff7,
+ CMD_QUERY_DDR = 0x5,
+ CMD_QUERY_ADAPTER = 0x6,
+ CMD_INIT_HCA = 0x7,
+ CMD_CLOSE_HCA = 0x8,
+ CMD_INIT_IB = 0x9,
+ CMD_CLOSE_IB = 0xa,
+ CMD_QUERY_HCA = 0xb,
+ CMD_SET_IB = 0xc,
+ CMD_ACCESS_DDR = 0x2e,
+ CMD_MAP_ICM = 0xffa,
+ CMD_UNMAP_ICM = 0xff9,
+ CMD_MAP_ICM_AUX = 0xffc,
+ CMD_UNMAP_ICM_AUX = 0xffb,
+ CMD_SET_ICM_SIZE = 0xffd,
+
+ /* TPT commands */
+ CMD_SW2HW_MPT = 0xd,
+ CMD_QUERY_MPT = 0xe,
+ CMD_HW2SW_MPT = 0xf,
+ CMD_READ_MTT = 0x10,
+ CMD_WRITE_MTT = 0x11,
+ CMD_SYNC_TPT = 0x2f,
+
+ /* EQ commands */
+ CMD_MAP_EQ = 0x12,
+ CMD_SW2HW_EQ = 0x13,
+ CMD_HW2SW_EQ = 0x14,
+ CMD_QUERY_EQ = 0x15,
+
+ /* CQ commands */
+ CMD_SW2HW_CQ = 0x16,
+ CMD_HW2SW_CQ = 0x17,
+ CMD_QUERY_CQ = 0x18,
+ CMD_RESIZE_CQ = 0x2c,
+
+ /* SRQ commands */
+ CMD_SW2HW_SRQ = 0x35,
+ CMD_HW2SW_SRQ = 0x36,
+ CMD_QUERY_SRQ = 0x37,
+ CMD_ARM_SRQ = 0x40,
+
+ /* QP/EE commands */
+ CMD_RST2INIT_QPEE = 0x19,
+ CMD_INIT2RTR_QPEE = 0x1a,
+ CMD_RTR2RTS_QPEE = 0x1b,
+ CMD_RTS2RTS_QPEE = 0x1c,
+ CMD_SQERR2RTS_QPEE = 0x1d,
+ CMD_2ERR_QPEE = 0x1e,
+ CMD_RTS2SQD_QPEE = 0x1f,
+ CMD_SQD2SQD_QPEE = 0x38,
+ CMD_SQD2RTS_QPEE = 0x20,
+ CMD_ERR2RST_QPEE = 0x21,
+ CMD_QUERY_QPEE = 0x22,
+ CMD_INIT2INIT_QPEE = 0x2d,
+ CMD_SUSPEND_QPEE = 0x32,
+ CMD_UNSUSPEND_QPEE = 0x33,
+ /* special QPs and management commands */
+ CMD_CONF_SPECIAL_QP = 0x23,
+ CMD_MAD_IFC = 0x24,
+
+ /* multicast commands */
+ CMD_READ_MGM = 0x25,
+ CMD_WRITE_MGM = 0x26,
+ CMD_MGID_HASH = 0x27,
+
+ /* miscellaneous commands */
+ CMD_DIAG_RPRT = 0x30,
+ CMD_NOP = 0x31,
+
+ /* debug commands */
+ CMD_QUERY_DEBUG_MSG = 0x2a,
+ CMD_SET_DEBUG_MSG = 0x2b,
+};
+
+/*
+ * According to Mellanox code, FW may be starved and never complete
+ * commands. So we can't use strict timeouts described in PRM -- we
+ * just arbitrarily select 60 seconds for now.
+ */
+#if 0
+/*
+ * Round up and add 1 to make sure we get the full wait time (since we
+ * will be starting in the middle of a jiffy)
+ */
+enum {
+ CMD_TIME_CLASS_A = (HZ + 999) / 1000 + 1,
+ CMD_TIME_CLASS_B = (HZ + 99) / 100 + 1,
+ CMD_TIME_CLASS_C = (HZ + 9) / 10 + 1
+};
+#else
+enum {
+ CMD_TIME_CLASS_A = 60 * HZ,
+ CMD_TIME_CLASS_B = 60 * HZ,
+ CMD_TIME_CLASS_C = 60 * HZ
+};
+#endif
+
+enum {
+ GO_BIT_TIMEOUT = HZ * 10
+};
+
+struct mthca_cmd_context {
+ struct completion done;
+ int result;
+ int next;
+ u64 out_param;
+ u16 token;
+ u8 status;
+};
+
+static int fw_cmd_doorbell = 0;
+module_param(fw_cmd_doorbell, int, 0644);
+MODULE_PARM_DESC(fw_cmd_doorbell, "post FW commands through doorbell page if nonzero "
+ "(and supported by FW)");
+
+static inline int go_bit(struct mthca_dev *dev)
+{
+ return readl(dev->hcr + HCR_STATUS_OFFSET) &
+ swab32(1 << HCR_GO_BIT);
+}
+
+static void mthca_cmd_post_dbell(struct mthca_dev *dev,
+ u64 in_param,
+ u64 out_param,
+ u32 in_modifier,
+ u8 op_modifier,
+ u16 op,
+ u16 token)
+{
+ void __iomem *ptr = dev->cmd.dbell_map;
+ u16 *offs = dev->cmd.dbell_offsets;
+
+ __raw_writel((__force u32) cpu_to_be32(in_param >> 32), ptr + offs[0]);
+ wmb();
+ __raw_writel((__force u32) cpu_to_be32(in_param & 0xfffffffful), ptr + offs[1]);
+ wmb();
+ __raw_writel((__force u32) cpu_to_be32(in_modifier), ptr + offs[2]);
+ wmb();
+ __raw_writel((__force u32) cpu_to_be32(out_param >> 32), ptr + offs[3]);
+ wmb();
+ __raw_writel((__force u32) cpu_to_be32(out_param & 0xfffffffful), ptr + offs[4]);
+ wmb();
+ __raw_writel((__force u32) cpu_to_be32(token << 16), ptr + offs[5]);
+ wmb();
+ __raw_writel((__force u32) cpu_to_be32((1 << HCR_GO_BIT) |
+ (1 << HCA_E_BIT) |
+ (op_modifier << HCR_OPMOD_SHIFT) |
+ op), ptr + offs[6]);
+ wmb();
+ __raw_writel((__force u32) 0, ptr + offs[7]);
+ wmb();
+}
+
+static int mthca_cmd_post_hcr(struct mthca_dev *dev,
+ u64 in_param,
+ u64 out_param,
+ u32 in_modifier,
+ u8 op_modifier,
+ u16 op,
+ u16 token,
+ int event)
+{
+ if (event) {
+ unsigned long end = jiffies + GO_BIT_TIMEOUT;
+
+ while (go_bit(dev) && time_before(jiffies, end)) {
+ set_current_state(TASK_RUNNING);
+ schedule();
+ }
+ }
+
+ if (go_bit(dev))
+ return -EAGAIN;
+
+ /*
+ * We use writel (instead of something like memcpy_toio)
+ * because writes of less than 32 bits to the HCR don't work
+ * (and some architectures such as ia64 implement memcpy_toio
+ * in terms of writeb).
+ */
+ __raw_writel((__force u32) cpu_to_be32(in_param >> 32), dev->hcr + 0 * 4);
+ __raw_writel((__force u32) cpu_to_be32(in_param & 0xfffffffful), dev->hcr + 1 * 4);
+ __raw_writel((__force u32) cpu_to_be32(in_modifier), dev->hcr + 2 * 4);
+ __raw_writel((__force u32) cpu_to_be32(out_param >> 32), dev->hcr + 3 * 4);
+ __raw_writel((__force u32) cpu_to_be32(out_param & 0xfffffffful), dev->hcr + 4 * 4);
+ __raw_writel((__force u32) cpu_to_be32(token << 16), dev->hcr + 5 * 4);
+
+ /* __raw_writel may not order writes. */
+ wmb();
+
+ __raw_writel((__force u32) cpu_to_be32((1 << HCR_GO_BIT) |
+ (event ? (1 << HCA_E_BIT) : 0) |
+ (op_modifier << HCR_OPMOD_SHIFT) |
+ op), dev->hcr + 6 * 4);
+
+ return 0;
+}
+
+static int mthca_cmd_post(struct mthca_dev *dev,
+ u64 in_param,
+ u64 out_param,
+ u32 in_modifier,
+ u8 op_modifier,
+ u16 op,
+ u16 token,
+ int event)
+{
+ int err = 0;
+
+ mutex_lock(&dev->cmd.hcr_mutex);
+
+ if (event && dev->cmd.flags & MTHCA_CMD_POST_DOORBELLS && fw_cmd_doorbell)
+ mthca_cmd_post_dbell(dev, in_param, out_param, in_modifier,
+ op_modifier, op, token);
+ else
+ err = mthca_cmd_post_hcr(dev, in_param, out_param, in_modifier,
+ op_modifier, op, token, event);
+
+ /*
+ * Make sure that our HCR writes don't get mixed in with
+ * writes from another CPU starting a FW command.
+ */
+ mmiowb();
+
+ mutex_unlock(&dev->cmd.hcr_mutex);
+ return err;
+}
+
+static int mthca_cmd_poll(struct mthca_dev *dev,
+ u64 in_param,
+ u64 *out_param,
+ int out_is_imm,
+ u32 in_modifier,
+ u8 op_modifier,
+ u16 op,
+ unsigned long timeout,
+ u8 *status)
+{
+ int err = 0;
+ unsigned long end;
+
+ down(&dev->cmd.poll_sem);
+
+ err = mthca_cmd_post(dev, in_param,
+ out_param ? *out_param : 0,
+ in_modifier, op_modifier,
+ op, CMD_POLL_TOKEN, 0);
+ if (err)
+ goto out;
+
+ end = timeout + jiffies;
+ while (go_bit(dev) && time_before(jiffies, end)) {
+ set_current_state(TASK_RUNNING);
+ schedule();
+ }
+
+ if (go_bit(dev)) {
+ err = -EBUSY;
+ goto out;
+ }
+
+ if (out_is_imm)
+ *out_param =
+ (u64) be32_to_cpu((__force __be32)
+ __raw_readl(dev->hcr + HCR_OUT_PARAM_OFFSET)) << 32 |
+ (u64) be32_to_cpu((__force __be32)
+ __raw_readl(dev->hcr + HCR_OUT_PARAM_OFFSET + 4));
+
+ *status = be32_to_cpu((__force __be32) __raw_readl(dev->hcr + HCR_STATUS_OFFSET)) >> 24;
+
+out:
+ up(&dev->cmd.poll_sem);
+ return err;
+}
+
+void mthca_cmd_event(struct mthca_dev *dev,
+ u16 token,
+ u8 status,
+ u64 out_param)
+{
+ struct mthca_cmd_context *context =
+ &dev->cmd.context[token & dev->cmd.token_mask];
+
+ /* previously timed out command completing at long last */
+ if (token != context->token)
+ return;
+
+ context->result = 0;
+ context->status = status;
+ context->out_param = out_param;
+
+ complete(&context->done);
+}
+
+static int mthca_cmd_wait(struct mthca_dev *dev,
+ u64 in_param,
+ u64 *out_param,
+ int out_is_imm,
+ u32 in_modifier,
+ u8 op_modifier,
+ u16 op,
+ unsigned long timeout,
+ u8 *status)
+{
+ int err = 0;
+ struct mthca_cmd_context *context;
+
+ down(&dev->cmd.event_sem);
+
+ spin_lock(&dev->cmd.context_lock);
+ BUG_ON(dev->cmd.free_head < 0);
+ context = &dev->cmd.context[dev->cmd.free_head];
+ context->token += dev->cmd.token_mask + 1;
+ dev->cmd.free_head = context->next;
+ spin_unlock(&dev->cmd.context_lock);
+
+ init_completion(&context->done);
+
+ err = mthca_cmd_post(dev, in_param,
+ out_param ? *out_param : 0,
+ in_modifier, op_modifier,
+ op, context->token, 1);
+ if (err)
+ goto out;
+
+ if (!wait_for_completion_timeout(&context->done, timeout)) {
+ err = -EBUSY;
+ goto out;
+ }
+
+ err = context->result;
+ if (err)
+ goto out;
+
+ *status = context->status;
+ if (*status)
+ mthca_dbg(dev, "Command %02x completed with status %02x\n",
+ op, *status);
+
+ if (out_is_imm)
+ *out_param = context->out_param;
+
+out:
+ spin_lock(&dev->cmd.context_lock);
+ context->next = dev->cmd.free_head;
+ dev->cmd.free_head = context - dev->cmd.context;
+ spin_unlock(&dev->cmd.context_lock);
+
+ up(&dev->cmd.event_sem);
+ return err;
+}
+
+/* Invoke a command with an output mailbox */
+static int mthca_cmd_box(struct mthca_dev *dev,
+ u64 in_param,
+ u64 out_param,
+ u32 in_modifier,
+ u8 op_modifier,
+ u16 op,
+ unsigned long timeout,
+ u8 *status)
+{
+ if (dev->cmd.flags & MTHCA_CMD_USE_EVENTS)
+ return mthca_cmd_wait(dev, in_param, &out_param, 0,
+ in_modifier, op_modifier, op,
+ timeout, status);
+ else
+ return mthca_cmd_poll(dev, in_param, &out_param, 0,
+ in_modifier, op_modifier, op,
+ timeout, status);
+}
+
+/* Invoke a command with no output parameter */
+static int mthca_cmd(struct mthca_dev *dev,
+ u64 in_param,
+ u32 in_modifier,
+ u8 op_modifier,
+ u16 op,
+ unsigned long timeout,
+ u8 *status)
+{
+ return mthca_cmd_box(dev, in_param, 0, in_modifier,
+ op_modifier, op, timeout, status);
+}
+
+/*
+ * Invoke a command with an immediate output parameter (and copy the
+ * output into the caller's out_param pointer after the command
+ * executes).
+ */
+static int mthca_cmd_imm(struct mthca_dev *dev,
+ u64 in_param,
+ u64 *out_param,
+ u32 in_modifier,
+ u8 op_modifier,
+ u16 op,
+ unsigned long timeout,
+ u8 *status)
+{
+ if (dev->cmd.flags & MTHCA_CMD_USE_EVENTS)
+ return mthca_cmd_wait(dev, in_param, out_param, 1,
+ in_modifier, op_modifier, op,
+ timeout, status);
+ else
+ return mthca_cmd_poll(dev, in_param, out_param, 1,
+ in_modifier, op_modifier, op,
+ timeout, status);
+}
+
+int mthca_cmd_init(struct mthca_dev *dev)
+{
+ mutex_init(&dev->cmd.hcr_mutex);
+ sema_init(&dev->cmd.poll_sem, 1);
+ dev->cmd.flags = 0;
+
+ dev->hcr = ioremap(pci_resource_start(dev->pdev, 0) + MTHCA_HCR_BASE,
+ MTHCA_HCR_SIZE);
+ if (!dev->hcr) {
+ mthca_err(dev, "Couldn't map command register.");
+ return -ENOMEM;
+ }
+
+ dev->cmd.pool = pci_pool_create("mthca_cmd", dev->pdev,
+ MTHCA_MAILBOX_SIZE,
+ MTHCA_MAILBOX_SIZE, 0);
+ if (!dev->cmd.pool) {
+ iounmap(dev->hcr);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+void mthca_cmd_cleanup(struct mthca_dev *dev)
+{
+ pci_pool_destroy(dev->cmd.pool);
+ iounmap(dev->hcr);
+ if (dev->cmd.flags & MTHCA_CMD_POST_DOORBELLS)
+ iounmap(dev->cmd.dbell_map);
+}
+
+/*
+ * Switch to using events to issue FW commands (should be called after
+ * event queue to command events has been initialized).
+ */
+int mthca_cmd_use_events(struct mthca_dev *dev)
+{
+ int i;
+
+ dev->cmd.context = kmalloc(dev->cmd.max_cmds *
+ sizeof (struct mthca_cmd_context),
+ GFP_KERNEL);
+ if (!dev->cmd.context)
+ return -ENOMEM;
+
+ for (i = 0; i < dev->cmd.max_cmds; ++i) {
+ dev->cmd.context[i].token = i;
+ dev->cmd.context[i].next = i + 1;
+ }
+
+ dev->cmd.context[dev->cmd.max_cmds - 1].next = -1;
+ dev->cmd.free_head = 0;
+
+ sema_init(&dev->cmd.event_sem, dev->cmd.max_cmds);
+ spin_lock_init(&dev->cmd.context_lock);
+
+ for (dev->cmd.token_mask = 1;
+ dev->cmd.token_mask < dev->cmd.max_cmds;
+ dev->cmd.token_mask <<= 1)
+ ; /* nothing */
+ --dev->cmd.token_mask;
+
+ dev->cmd.flags |= MTHCA_CMD_USE_EVENTS;
+
+ down(&dev->cmd.poll_sem);
+
+ return 0;
+}
+
+/*
+ * Switch back to polling (used when shutting down the device)
+ */
+void mthca_cmd_use_polling(struct mthca_dev *dev)
+{
+ int i;
+
+ dev->cmd.flags &= ~MTHCA_CMD_USE_EVENTS;
+
+ for (i = 0; i < dev->cmd.max_cmds; ++i)
+ down(&dev->cmd.event_sem);
+
+ kfree(dev->cmd.context);
+
+ up(&dev->cmd.poll_sem);
+}
+
+struct mthca_mailbox *mthca_alloc_mailbox(struct mthca_dev *dev,
+ gfp_t gfp_mask)
+{
+ struct mthca_mailbox *mailbox;
+
+ mailbox = kmalloc(sizeof *mailbox, gfp_mask);
+ if (!mailbox)
+ return ERR_PTR(-ENOMEM);
+
+ mailbox->buf = pci_pool_alloc(dev->cmd.pool, gfp_mask, &mailbox->dma);
+ if (!mailbox->buf) {
+ kfree(mailbox);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ return mailbox;
+}
+
+void mthca_free_mailbox(struct mthca_dev *dev, struct mthca_mailbox *mailbox)
+{
+ if (!mailbox)
+ return;
+
+ pci_pool_free(dev->cmd.pool, mailbox->buf, mailbox->dma);
+ kfree(mailbox);
+}
+
+int mthca_SYS_EN(struct mthca_dev *dev, u8 *status)
+{
+ u64 out;
+ int ret;
+
+ ret = mthca_cmd_imm(dev, 0, &out, 0, 0, CMD_SYS_EN, HZ, status);
+
+ if (*status == MTHCA_CMD_STAT_DDR_MEM_ERR)
+ mthca_warn(dev, "SYS_EN DDR error: syn=%x, sock=%d, "
+ "sladdr=%d, SPD source=%s\n",
+ (int) (out >> 6) & 0xf, (int) (out >> 4) & 3,
+ (int) (out >> 1) & 7, (int) out & 1 ? "NVMEM" : "DIMM");
+
+ return ret;
+}
+
+int mthca_SYS_DIS(struct mthca_dev *dev, u8 *status)
+{
+ return mthca_cmd(dev, 0, 0, 0, CMD_SYS_DIS, HZ, status);
+}
+
+static int mthca_map_cmd(struct mthca_dev *dev, u16 op, struct mthca_icm *icm,
+ u64 virt, u8 *status)
+{
+ struct mthca_mailbox *mailbox;
+ struct mthca_icm_iter iter;
+ __be64 *pages;
+ int lg;
+ int nent = 0;
+ int i;
+ int err = 0;
+ int ts = 0, tc = 0;
+
+ mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ memset(mailbox->buf, 0, MTHCA_MAILBOX_SIZE);
+ pages = mailbox->buf;
+
+ for (mthca_icm_first(icm, &iter);
+ !mthca_icm_last(&iter);
+ mthca_icm_next(&iter)) {
+ /*
+ * We have to pass pages that are aligned to their
+ * size, so find the least significant 1 in the
+ * address or size and use that as our log2 size.
+ */
+ lg = ffs(mthca_icm_addr(&iter) | mthca_icm_size(&iter)) - 1;
+ if (lg < MTHCA_ICM_PAGE_SHIFT) {
+ mthca_warn(dev, "Got FW area not aligned to %d (%llx/%lx).\n",
+ MTHCA_ICM_PAGE_SIZE,
+ (unsigned long long) mthca_icm_addr(&iter),
+ mthca_icm_size(&iter));
+ err = -EINVAL;
+ goto out;
+ }
+ for (i = 0; i < mthca_icm_size(&iter) >> lg; ++i) {
+ if (virt != -1) {
+ pages[nent * 2] = cpu_to_be64(virt);
+ virt += 1 << lg;
+ }
+
+ pages[nent * 2 + 1] =
+ cpu_to_be64((mthca_icm_addr(&iter) + (i << lg)) |
+ (lg - MTHCA_ICM_PAGE_SHIFT));
+ ts += 1 << (lg - 10);
+ ++tc;
+
+ if (++nent == MTHCA_MAILBOX_SIZE / 16) {
+ err = mthca_cmd(dev, mailbox->dma, nent, 0, op,
+ CMD_TIME_CLASS_B, status);
+ if (err || *status)
+ goto out;
+ nent = 0;
+ }
+ }
+ }
+
+ if (nent)
+ err = mthca_cmd(dev, mailbox->dma, nent, 0, op,
+ CMD_TIME_CLASS_B, status);
+
+ switch (op) {
+ case CMD_MAP_FA:
+ mthca_dbg(dev, "Mapped %d chunks/%d KB for FW.\n", tc, ts);
+ break;
+ case CMD_MAP_ICM_AUX:
+ mthca_dbg(dev, "Mapped %d chunks/%d KB for ICM aux.\n", tc, ts);
+ break;
+ case CMD_MAP_ICM:
+ mthca_dbg(dev, "Mapped %d chunks/%d KB at %llx for ICM.\n",
+ tc, ts, (unsigned long long) virt - (ts << 10));
+ break;
+ }
+
+out:
+ mthca_free_mailbox(dev, mailbox);
+ return err;
+}
+
+int mthca_MAP_FA(struct mthca_dev *dev, struct mthca_icm *icm, u8 *status)
+{
+ return mthca_map_cmd(dev, CMD_MAP_FA, icm, -1, status);
+}
+
+int mthca_UNMAP_FA(struct mthca_dev *dev, u8 *status)
+{
+ return mthca_cmd(dev, 0, 0, 0, CMD_UNMAP_FA, CMD_TIME_CLASS_B, status);
+}
+
+int mthca_RUN_FW(struct mthca_dev *dev, u8 *status)
+{
+ return mthca_cmd(dev, 0, 0, 0, CMD_RUN_FW, CMD_TIME_CLASS_A, status);
+}
+
+static void mthca_setup_cmd_doorbells(struct mthca_dev *dev, u64 base)
+{
+ unsigned long addr;
+ u16 max_off = 0;
+ int i;
+
+ for (i = 0; i < 8; ++i)
+ max_off = max(max_off, dev->cmd.dbell_offsets[i]);
+
+ if ((base & PAGE_MASK) != ((base + max_off) & PAGE_MASK)) {
+ mthca_warn(dev, "Firmware doorbell region at 0x%016llx, "
+ "length 0x%x crosses a page boundary\n",
+ (unsigned long long) base, max_off);
+ return;
+ }
+
+ addr = pci_resource_start(dev->pdev, 2) +
+ ((pci_resource_len(dev->pdev, 2) - 1) & base);
+ dev->cmd.dbell_map = ioremap(addr, max_off + sizeof(u32));
+ if (!dev->cmd.dbell_map)
+ return;
+
+ dev->cmd.flags |= MTHCA_CMD_POST_DOORBELLS;
+ mthca_dbg(dev, "Mapped doorbell page for posting FW commands\n");
+}
+
+int mthca_QUERY_FW(struct mthca_dev *dev, u8 *status)
+{
+ struct mthca_mailbox *mailbox;
+ u32 *outbox;
+ u64 base;
+ u32 tmp;
+ int err = 0;
+ u8 lg;
+ int i;
+
+#define QUERY_FW_OUT_SIZE 0x100
+#define QUERY_FW_VER_OFFSET 0x00
+#define QUERY_FW_MAX_CMD_OFFSET 0x0f
+#define QUERY_FW_ERR_START_OFFSET 0x30
+#define QUERY_FW_ERR_SIZE_OFFSET 0x38
+
+#define QUERY_FW_CMD_DB_EN_OFFSET 0x10
+#define QUERY_FW_CMD_DB_OFFSET 0x50
+#define QUERY_FW_CMD_DB_BASE 0x60
+
+#define QUERY_FW_START_OFFSET 0x20
+#define QUERY_FW_END_OFFSET 0x28
+
+#define QUERY_FW_SIZE_OFFSET 0x00
+#define QUERY_FW_CLR_INT_BASE_OFFSET 0x20
+#define QUERY_FW_EQ_ARM_BASE_OFFSET 0x40
+#define QUERY_FW_EQ_SET_CI_BASE_OFFSET 0x48
+
+ mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ outbox = mailbox->buf;
+
+ err = mthca_cmd_box(dev, 0, mailbox->dma, 0, 0, CMD_QUERY_FW,
+ CMD_TIME_CLASS_A, status);
+
+ if (err)
+ goto out;
+
+ MTHCA_GET(dev->fw_ver, outbox, QUERY_FW_VER_OFFSET);
+ /*
+ * FW subminor version is at more significant bits than minor
+ * version, so swap here.
+ */
+ dev->fw_ver = (dev->fw_ver & 0xffff00000000ull) |
+ ((dev->fw_ver & 0xffff0000ull) >> 16) |
+ ((dev->fw_ver & 0x0000ffffull) << 16);
+
+ MTHCA_GET(lg, outbox, QUERY_FW_MAX_CMD_OFFSET);
+ dev->cmd.max_cmds = 1 << lg;
+
+ mthca_dbg(dev, "FW version %012llx, max commands %d\n",
+ (unsigned long long) dev->fw_ver, dev->cmd.max_cmds);
+
+ MTHCA_GET(dev->catas_err.addr, outbox, QUERY_FW_ERR_START_OFFSET);
+ MTHCA_GET(dev->catas_err.size, outbox, QUERY_FW_ERR_SIZE_OFFSET);
+
+ mthca_dbg(dev, "Catastrophic error buffer at 0x%llx, size 0x%x\n",
+ (unsigned long long) dev->catas_err.addr, dev->catas_err.size);
+
+ MTHCA_GET(tmp, outbox, QUERY_FW_CMD_DB_EN_OFFSET);
+ if (tmp & 0x1) {
+ mthca_dbg(dev, "FW supports commands through doorbells\n");
+
+ MTHCA_GET(base, outbox, QUERY_FW_CMD_DB_BASE);
+ for (i = 0; i < MTHCA_CMD_NUM_DBELL_DWORDS; ++i)
+ MTHCA_GET(dev->cmd.dbell_offsets[i], outbox,
+ QUERY_FW_CMD_DB_OFFSET + (i << 1));
+
+ mthca_setup_cmd_doorbells(dev, base);
+ }
+
+ if (mthca_is_memfree(dev)) {
+ MTHCA_GET(dev->fw.arbel.fw_pages, outbox, QUERY_FW_SIZE_OFFSET);
+ MTHCA_GET(dev->fw.arbel.clr_int_base, outbox, QUERY_FW_CLR_INT_BASE_OFFSET);
+ MTHCA_GET(dev->fw.arbel.eq_arm_base, outbox, QUERY_FW_EQ_ARM_BASE_OFFSET);
+ MTHCA_GET(dev->fw.arbel.eq_set_ci_base, outbox, QUERY_FW_EQ_SET_CI_BASE_OFFSET);
+ mthca_dbg(dev, "FW size %d KB\n", dev->fw.arbel.fw_pages << 2);
+
+ /*
+ * Round up number of system pages needed in case
+ * MTHCA_ICM_PAGE_SIZE < PAGE_SIZE.
+ */
+ dev->fw.arbel.fw_pages =
+ ALIGN(dev->fw.arbel.fw_pages, PAGE_SIZE / MTHCA_ICM_PAGE_SIZE) >>
+ (PAGE_SHIFT - MTHCA_ICM_PAGE_SHIFT);
+
+ mthca_dbg(dev, "Clear int @ %llx, EQ arm @ %llx, EQ set CI @ %llx\n",
+ (unsigned long long) dev->fw.arbel.clr_int_base,
+ (unsigned long long) dev->fw.arbel.eq_arm_base,
+ (unsigned long long) dev->fw.arbel.eq_set_ci_base);
+ } else {
+ MTHCA_GET(dev->fw.tavor.fw_start, outbox, QUERY_FW_START_OFFSET);
+ MTHCA_GET(dev->fw.tavor.fw_end, outbox, QUERY_FW_END_OFFSET);
+
+ mthca_dbg(dev, "FW size %d KB (start %llx, end %llx)\n",
+ (int) ((dev->fw.tavor.fw_end - dev->fw.tavor.fw_start) >> 10),
+ (unsigned long long) dev->fw.tavor.fw_start,
+ (unsigned long long) dev->fw.tavor.fw_end);
+ }
+
+out:
+ mthca_free_mailbox(dev, mailbox);
+ return err;
+}
+
+int mthca_ENABLE_LAM(struct mthca_dev *dev, u8 *status)
+{
+ struct mthca_mailbox *mailbox;
+ u8 info;
+ u32 *outbox;
+ int err = 0;
+
+#define ENABLE_LAM_OUT_SIZE 0x100
+#define ENABLE_LAM_START_OFFSET 0x00
+#define ENABLE_LAM_END_OFFSET 0x08
+#define ENABLE_LAM_INFO_OFFSET 0x13
+
+#define ENABLE_LAM_INFO_HIDDEN_FLAG (1 << 4)
+#define ENABLE_LAM_INFO_ECC_MASK 0x3
+
+ mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ outbox = mailbox->buf;
+
+ err = mthca_cmd_box(dev, 0, mailbox->dma, 0, 0, CMD_ENABLE_LAM,
+ CMD_TIME_CLASS_C, status);
+
+ if (err)
+ goto out;
+
+ if (*status == MTHCA_CMD_STAT_LAM_NOT_PRE)
+ goto out;
+
+ MTHCA_GET(dev->ddr_start, outbox, ENABLE_LAM_START_OFFSET);
+ MTHCA_GET(dev->ddr_end, outbox, ENABLE_LAM_END_OFFSET);
+ MTHCA_GET(info, outbox, ENABLE_LAM_INFO_OFFSET);
+
+ if (!!(info & ENABLE_LAM_INFO_HIDDEN_FLAG) !=
+ !!(dev->mthca_flags & MTHCA_FLAG_DDR_HIDDEN)) {
+ mthca_info(dev, "FW reports that HCA-attached memory "
+ "is %s hidden; does not match PCI config\n",
+ (info & ENABLE_LAM_INFO_HIDDEN_FLAG) ?
+ "" : "not");
+ }
+ if (info & ENABLE_LAM_INFO_HIDDEN_FLAG)
+ mthca_dbg(dev, "HCA-attached memory is hidden.\n");
+
+ mthca_dbg(dev, "HCA memory size %d KB (start %llx, end %llx)\n",
+ (int) ((dev->ddr_end - dev->ddr_start) >> 10),
+ (unsigned long long) dev->ddr_start,
+ (unsigned long long) dev->ddr_end);
+
+out:
+ mthca_free_mailbox(dev, mailbox);
+ return err;
+}
+
+int mthca_DISABLE_LAM(struct mthca_dev *dev, u8 *status)
+{
+ return mthca_cmd(dev, 0, 0, 0, CMD_SYS_DIS, CMD_TIME_CLASS_C, status);
+}
+
+int mthca_QUERY_DDR(struct mthca_dev *dev, u8 *status)
+{
+ struct mthca_mailbox *mailbox;
+ u8 info;
+ u32 *outbox;
+ int err = 0;
+
+#define QUERY_DDR_OUT_SIZE 0x100
+#define QUERY_DDR_START_OFFSET 0x00
+#define QUERY_DDR_END_OFFSET 0x08
+#define QUERY_DDR_INFO_OFFSET 0x13
+
+#define QUERY_DDR_INFO_HIDDEN_FLAG (1 << 4)
+#define QUERY_DDR_INFO_ECC_MASK 0x3
+
+ mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ outbox = mailbox->buf;
+
+ err = mthca_cmd_box(dev, 0, mailbox->dma, 0, 0, CMD_QUERY_DDR,
+ CMD_TIME_CLASS_A, status);
+
+ if (err)
+ goto out;
+
+ MTHCA_GET(dev->ddr_start, outbox, QUERY_DDR_START_OFFSET);
+ MTHCA_GET(dev->ddr_end, outbox, QUERY_DDR_END_OFFSET);
+ MTHCA_GET(info, outbox, QUERY_DDR_INFO_OFFSET);
+
+ if (!!(info & QUERY_DDR_INFO_HIDDEN_FLAG) !=
+ !!(dev->mthca_flags & MTHCA_FLAG_DDR_HIDDEN)) {
+ mthca_info(dev, "FW reports that HCA-attached memory "
+ "is %s hidden; does not match PCI config\n",
+ (info & QUERY_DDR_INFO_HIDDEN_FLAG) ?
+ "" : "not");
+ }
+ if (info & QUERY_DDR_INFO_HIDDEN_FLAG)
+ mthca_dbg(dev, "HCA-attached memory is hidden.\n");
+
+ mthca_dbg(dev, "HCA memory size %d KB (start %llx, end %llx)\n",
+ (int) ((dev->ddr_end - dev->ddr_start) >> 10),
+ (unsigned long long) dev->ddr_start,
+ (unsigned long long) dev->ddr_end);
+
+out:
+ mthca_free_mailbox(dev, mailbox);
+ return err;
+}
+
+int mthca_QUERY_DEV_LIM(struct mthca_dev *dev,
+ struct mthca_dev_lim *dev_lim, u8 *status)
+{
+ struct mthca_mailbox *mailbox;
+ u32 *outbox;
+ u8 field;
+ u16 size;
+ u16 stat_rate;
+ int err;
+
+#define QUERY_DEV_LIM_OUT_SIZE 0x100
+#define QUERY_DEV_LIM_MAX_SRQ_SZ_OFFSET 0x10
+#define QUERY_DEV_LIM_MAX_QP_SZ_OFFSET 0x11
+#define QUERY_DEV_LIM_RSVD_QP_OFFSET 0x12
+#define QUERY_DEV_LIM_MAX_QP_OFFSET 0x13
+#define QUERY_DEV_LIM_RSVD_SRQ_OFFSET 0x14
+#define QUERY_DEV_LIM_MAX_SRQ_OFFSET 0x15
+#define QUERY_DEV_LIM_RSVD_EEC_OFFSET 0x16
+#define QUERY_DEV_LIM_MAX_EEC_OFFSET 0x17
+#define QUERY_DEV_LIM_MAX_CQ_SZ_OFFSET 0x19
+#define QUERY_DEV_LIM_RSVD_CQ_OFFSET 0x1a
+#define QUERY_DEV_LIM_MAX_CQ_OFFSET 0x1b
+#define QUERY_DEV_LIM_MAX_MPT_OFFSET 0x1d
+#define QUERY_DEV_LIM_RSVD_EQ_OFFSET 0x1e
+#define QUERY_DEV_LIM_MAX_EQ_OFFSET 0x1f
+#define QUERY_DEV_LIM_RSVD_MTT_OFFSET 0x20
+#define QUERY_DEV_LIM_MAX_MRW_SZ_OFFSET 0x21
+#define QUERY_DEV_LIM_RSVD_MRW_OFFSET 0x22
+#define QUERY_DEV_LIM_MAX_MTT_SEG_OFFSET 0x23
+#define QUERY_DEV_LIM_MAX_AV_OFFSET 0x27
+#define QUERY_DEV_LIM_MAX_REQ_QP_OFFSET 0x29
+#define QUERY_DEV_LIM_MAX_RES_QP_OFFSET 0x2b
+#define QUERY_DEV_LIM_MAX_RDMA_OFFSET 0x2f
+#define QUERY_DEV_LIM_RSZ_SRQ_OFFSET 0x33
+#define QUERY_DEV_LIM_ACK_DELAY_OFFSET 0x35
+#define QUERY_DEV_LIM_MTU_WIDTH_OFFSET 0x36
+#define QUERY_DEV_LIM_VL_PORT_OFFSET 0x37
+#define QUERY_DEV_LIM_MAX_GID_OFFSET 0x3b
+#define QUERY_DEV_LIM_RATE_SUPPORT_OFFSET 0x3c
+#define QUERY_DEV_LIM_MAX_PKEY_OFFSET 0x3f
+#define QUERY_DEV_LIM_FLAGS_OFFSET 0x44
+#define QUERY_DEV_LIM_RSVD_UAR_OFFSET 0x48
+#define QUERY_DEV_LIM_UAR_SZ_OFFSET 0x49
+#define QUERY_DEV_LIM_PAGE_SZ_OFFSET 0x4b
+#define QUERY_DEV_LIM_MAX_SG_OFFSET 0x51
+#define QUERY_DEV_LIM_MAX_DESC_SZ_OFFSET 0x52
+#define QUERY_DEV_LIM_MAX_SG_RQ_OFFSET 0x55
+#define QUERY_DEV_LIM_MAX_DESC_SZ_RQ_OFFSET 0x56
+#define QUERY_DEV_LIM_MAX_QP_MCG_OFFSET 0x61
+#define QUERY_DEV_LIM_RSVD_MCG_OFFSET 0x62
+#define QUERY_DEV_LIM_MAX_MCG_OFFSET 0x63
+#define QUERY_DEV_LIM_RSVD_PD_OFFSET 0x64
+#define QUERY_DEV_LIM_MAX_PD_OFFSET 0x65
+#define QUERY_DEV_LIM_RSVD_RDD_OFFSET 0x66
+#define QUERY_DEV_LIM_MAX_RDD_OFFSET 0x67
+#define QUERY_DEV_LIM_EEC_ENTRY_SZ_OFFSET 0x80
+#define QUERY_DEV_LIM_QPC_ENTRY_SZ_OFFSET 0x82
+#define QUERY_DEV_LIM_EEEC_ENTRY_SZ_OFFSET 0x84
+#define QUERY_DEV_LIM_EQPC_ENTRY_SZ_OFFSET 0x86
+#define QUERY_DEV_LIM_EQC_ENTRY_SZ_OFFSET 0x88
+#define QUERY_DEV_LIM_CQC_ENTRY_SZ_OFFSET 0x8a
+#define QUERY_DEV_LIM_SRQ_ENTRY_SZ_OFFSET 0x8c
+#define QUERY_DEV_LIM_UAR_ENTRY_SZ_OFFSET 0x8e
+#define QUERY_DEV_LIM_MTT_ENTRY_SZ_OFFSET 0x90
+#define QUERY_DEV_LIM_MPT_ENTRY_SZ_OFFSET 0x92
+#define QUERY_DEV_LIM_PBL_SZ_OFFSET 0x96
+#define QUERY_DEV_LIM_BMME_FLAGS_OFFSET 0x97
+#define QUERY_DEV_LIM_RSVD_LKEY_OFFSET 0x98
+#define QUERY_DEV_LIM_LAMR_OFFSET 0x9f
+#define QUERY_DEV_LIM_MAX_ICM_SZ_OFFSET 0xa0
+
+ mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ outbox = mailbox->buf;
+
+ err = mthca_cmd_box(dev, 0, mailbox->dma, 0, 0, CMD_QUERY_DEV_LIM,
+ CMD_TIME_CLASS_A, status);
+
+ if (err)
+ goto out;
+
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_QP_OFFSET);
+ dev_lim->reserved_qps = 1 << (field & 0xf);
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_QP_OFFSET);
+ dev_lim->max_qps = 1 << (field & 0x1f);
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_SRQ_OFFSET);
+ dev_lim->reserved_srqs = 1 << (field >> 4);
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_SRQ_OFFSET);
+ dev_lim->max_srqs = 1 << (field & 0x1f);
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_EEC_OFFSET);
+ dev_lim->reserved_eecs = 1 << (field & 0xf);
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_EEC_OFFSET);
+ dev_lim->max_eecs = 1 << (field & 0x1f);
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_CQ_SZ_OFFSET);
+ dev_lim->max_cq_sz = 1 << field;
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_CQ_OFFSET);
+ dev_lim->reserved_cqs = 1 << (field & 0xf);
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_CQ_OFFSET);
+ dev_lim->max_cqs = 1 << (field & 0x1f);
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_MPT_OFFSET);
+ dev_lim->max_mpts = 1 << (field & 0x3f);
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_EQ_OFFSET);
+ dev_lim->reserved_eqs = 1 << (field & 0xf);
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_EQ_OFFSET);
+ dev_lim->max_eqs = 1 << (field & 0x7);
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_MTT_OFFSET);
+ if (mthca_is_memfree(dev))
+ dev_lim->reserved_mtts = ALIGN((1 << (field >> 4)) * sizeof(u64),
+ MTHCA_MTT_SEG_SIZE) / MTHCA_MTT_SEG_SIZE;
+ else
+ dev_lim->reserved_mtts = 1 << (field >> 4);
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_MRW_SZ_OFFSET);
+ dev_lim->max_mrw_sz = 1 << field;
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_MRW_OFFSET);
+ dev_lim->reserved_mrws = 1 << (field & 0xf);
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_MTT_SEG_OFFSET);
+ dev_lim->max_mtt_seg = 1 << (field & 0x3f);
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_REQ_QP_OFFSET);
+ dev_lim->max_requester_per_qp = 1 << (field & 0x3f);
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_RES_QP_OFFSET);
+ dev_lim->max_responder_per_qp = 1 << (field & 0x3f);
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_RDMA_OFFSET);
+ dev_lim->max_rdma_global = 1 << (field & 0x3f);
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_ACK_DELAY_OFFSET);
+ dev_lim->local_ca_ack_delay = field & 0x1f;
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_MTU_WIDTH_OFFSET);
+ dev_lim->max_mtu = field >> 4;
+ dev_lim->max_port_width = field & 0xf;
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_VL_PORT_OFFSET);
+ dev_lim->max_vl = field >> 4;
+ dev_lim->num_ports = field & 0xf;
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_GID_OFFSET);
+ dev_lim->max_gids = 1 << (field & 0xf);
+ MTHCA_GET(stat_rate, outbox, QUERY_DEV_LIM_RATE_SUPPORT_OFFSET);
+ dev_lim->stat_rate_support = stat_rate;
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_PKEY_OFFSET);
+ dev_lim->max_pkeys = 1 << (field & 0xf);
+ MTHCA_GET(dev_lim->flags, outbox, QUERY_DEV_LIM_FLAGS_OFFSET);
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_UAR_OFFSET);
+ dev_lim->reserved_uars = field >> 4;
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_UAR_SZ_OFFSET);
+ dev_lim->uar_size = 1 << ((field & 0x3f) + 20);
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_PAGE_SZ_OFFSET);
+ dev_lim->min_page_sz = 1 << field;
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_SG_OFFSET);
+ dev_lim->max_sg = field;
+
+ MTHCA_GET(size, outbox, QUERY_DEV_LIM_MAX_DESC_SZ_OFFSET);
+ dev_lim->max_desc_sz = size;
+
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_QP_MCG_OFFSET);
+ dev_lim->max_qp_per_mcg = 1 << field;
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_MCG_OFFSET);
+ dev_lim->reserved_mgms = field & 0xf;
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_MCG_OFFSET);
+ dev_lim->max_mcgs = 1 << field;
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_PD_OFFSET);
+ dev_lim->reserved_pds = field >> 4;
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_PD_OFFSET);
+ dev_lim->max_pds = 1 << (field & 0x3f);
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_RDD_OFFSET);
+ dev_lim->reserved_rdds = field >> 4;
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_RDD_OFFSET);
+ dev_lim->max_rdds = 1 << (field & 0x3f);
+
+ MTHCA_GET(size, outbox, QUERY_DEV_LIM_EEC_ENTRY_SZ_OFFSET);
+ dev_lim->eec_entry_sz = size;
+ MTHCA_GET(size, outbox, QUERY_DEV_LIM_QPC_ENTRY_SZ_OFFSET);
+ dev_lim->qpc_entry_sz = size;
+ MTHCA_GET(size, outbox, QUERY_DEV_LIM_EEEC_ENTRY_SZ_OFFSET);
+ dev_lim->eeec_entry_sz = size;
+ MTHCA_GET(size, outbox, QUERY_DEV_LIM_EQPC_ENTRY_SZ_OFFSET);
+ dev_lim->eqpc_entry_sz = size;
+ MTHCA_GET(size, outbox, QUERY_DEV_LIM_EQC_ENTRY_SZ_OFFSET);
+ dev_lim->eqc_entry_sz = size;
+ MTHCA_GET(size, outbox, QUERY_DEV_LIM_CQC_ENTRY_SZ_OFFSET);
+ dev_lim->cqc_entry_sz = size;
+ MTHCA_GET(size, outbox, QUERY_DEV_LIM_SRQ_ENTRY_SZ_OFFSET);
+ dev_lim->srq_entry_sz = size;
+ MTHCA_GET(size, outbox, QUERY_DEV_LIM_UAR_ENTRY_SZ_OFFSET);
+ dev_lim->uar_scratch_entry_sz = size;
+
+ if (mthca_is_memfree(dev)) {
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_SRQ_SZ_OFFSET);
+ dev_lim->max_srq_sz = 1 << field;
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_QP_SZ_OFFSET);
+ dev_lim->max_qp_sz = 1 << field;
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSZ_SRQ_OFFSET);
+ dev_lim->hca.arbel.resize_srq = field & 1;
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_SG_RQ_OFFSET);
+ dev_lim->max_sg = min_t(int, field, dev_lim->max_sg);
+ MTHCA_GET(size, outbox, QUERY_DEV_LIM_MAX_DESC_SZ_RQ_OFFSET);
+ dev_lim->max_desc_sz = min_t(int, size, dev_lim->max_desc_sz);
+ MTHCA_GET(size, outbox, QUERY_DEV_LIM_MPT_ENTRY_SZ_OFFSET);
+ dev_lim->mpt_entry_sz = size;
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_PBL_SZ_OFFSET);
+ dev_lim->hca.arbel.max_pbl_sz = 1 << (field & 0x3f);
+ MTHCA_GET(dev_lim->hca.arbel.bmme_flags, outbox,
+ QUERY_DEV_LIM_BMME_FLAGS_OFFSET);
+ MTHCA_GET(dev_lim->hca.arbel.reserved_lkey, outbox,
+ QUERY_DEV_LIM_RSVD_LKEY_OFFSET);
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_LAMR_OFFSET);
+ dev_lim->hca.arbel.lam_required = field & 1;
+ MTHCA_GET(dev_lim->hca.arbel.max_icm_sz, outbox,
+ QUERY_DEV_LIM_MAX_ICM_SZ_OFFSET);
+
+ if (dev_lim->hca.arbel.bmme_flags & 1)
+ mthca_dbg(dev, "Base MM extensions: yes "
+ "(flags %d, max PBL %d, rsvd L_Key %08x)\n",
+ dev_lim->hca.arbel.bmme_flags,
+ dev_lim->hca.arbel.max_pbl_sz,
+ dev_lim->hca.arbel.reserved_lkey);
+ else
+ mthca_dbg(dev, "Base MM extensions: no\n");
+
+ mthca_dbg(dev, "Max ICM size %lld MB\n",
+ (unsigned long long) dev_lim->hca.arbel.max_icm_sz >> 20);
+ } else {
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_SRQ_SZ_OFFSET);
+ dev_lim->max_srq_sz = (1 << field) - 1;
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_QP_SZ_OFFSET);
+ dev_lim->max_qp_sz = (1 << field) - 1;
+ MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_AV_OFFSET);
+ dev_lim->hca.tavor.max_avs = 1 << (field & 0x3f);
+ dev_lim->mpt_entry_sz = MTHCA_MPT_ENTRY_SIZE;
+ }
+
+ mthca_dbg(dev, "Max QPs: %d, reserved QPs: %d, entry size: %d\n",
+ dev_lim->max_qps, dev_lim->reserved_qps, dev_lim->qpc_entry_sz);
+ mthca_dbg(dev, "Max SRQs: %d, reserved SRQs: %d, entry size: %d\n",
+ dev_lim->max_srqs, dev_lim->reserved_srqs, dev_lim->srq_entry_sz);
+ mthca_dbg(dev, "Max CQs: %d, reserved CQs: %d, entry size: %d\n",
+ dev_lim->max_cqs, dev_lim->reserved_cqs, dev_lim->cqc_entry_sz);
+ mthca_dbg(dev, "Max EQs: %d, reserved EQs: %d, entry size: %d\n",
+ dev_lim->max_eqs, dev_lim->reserved_eqs, dev_lim->eqc_entry_sz);
+ mthca_dbg(dev, "reserved MPTs: %d, reserved MTTs: %d\n",
+ dev_lim->reserved_mrws, dev_lim->reserved_mtts);
+ mthca_dbg(dev, "Max PDs: %d, reserved PDs: %d, reserved UARs: %d\n",
+ dev_lim->max_pds, dev_lim->reserved_pds, dev_lim->reserved_uars);
+ mthca_dbg(dev, "Max QP/MCG: %d, reserved MGMs: %d\n",
+ dev_lim->max_pds, dev_lim->reserved_mgms);
+ mthca_dbg(dev, "Max CQEs: %d, max WQEs: %d, max SRQ WQEs: %d\n",
+ dev_lim->max_cq_sz, dev_lim->max_qp_sz, dev_lim->max_srq_sz);
+
+ mthca_dbg(dev, "Flags: %08x\n", dev_lim->flags);
+
+out:
+ mthca_free_mailbox(dev, mailbox);
+ return err;
+}
+
+static void get_board_id(void *vsd, char *board_id)
+{
+ int i;
+
+#define VSD_OFFSET_SIG1 0x00
+#define VSD_OFFSET_SIG2 0xde
+#define VSD_OFFSET_MLX_BOARD_ID 0xd0
+#define VSD_OFFSET_TS_BOARD_ID 0x20
+
+#define VSD_SIGNATURE_TOPSPIN 0x5ad
+
+ memset(board_id, 0, MTHCA_BOARD_ID_LEN);
+
+ if (be16_to_cpup(vsd + VSD_OFFSET_SIG1) == VSD_SIGNATURE_TOPSPIN &&
+ be16_to_cpup(vsd + VSD_OFFSET_SIG2) == VSD_SIGNATURE_TOPSPIN) {
+ strlcpy(board_id, vsd + VSD_OFFSET_TS_BOARD_ID, MTHCA_BOARD_ID_LEN);
+ } else {
+ /*
+ * The board ID is a string but the firmware byte
+ * swaps each 4-byte word before passing it back to
+ * us. Therefore we need to swab it before printing.
+ */
+ for (i = 0; i < 4; ++i)
+ ((u32 *) board_id)[i] =
+ swab32(*(u32 *) (vsd + VSD_OFFSET_MLX_BOARD_ID + i * 4));
+ }
+}
+
+int mthca_QUERY_ADAPTER(struct mthca_dev *dev,
+ struct mthca_adapter *adapter, u8 *status)
+{
+ struct mthca_mailbox *mailbox;
+ u32 *outbox;
+ int err;
+
+#define QUERY_ADAPTER_OUT_SIZE 0x100
+#define QUERY_ADAPTER_VENDOR_ID_OFFSET 0x00
+#define QUERY_ADAPTER_DEVICE_ID_OFFSET 0x04
+#define QUERY_ADAPTER_REVISION_ID_OFFSET 0x08
+#define QUERY_ADAPTER_INTA_PIN_OFFSET 0x10
+#define QUERY_ADAPTER_VSD_OFFSET 0x20
+
+ mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ outbox = mailbox->buf;
+
+ err = mthca_cmd_box(dev, 0, mailbox->dma, 0, 0, CMD_QUERY_ADAPTER,
+ CMD_TIME_CLASS_A, status);
+
+ if (err)
+ goto out;
+
+ if (!mthca_is_memfree(dev)) {
+ MTHCA_GET(adapter->vendor_id, outbox,
+ QUERY_ADAPTER_VENDOR_ID_OFFSET);
+ MTHCA_GET(adapter->device_id, outbox,
+ QUERY_ADAPTER_DEVICE_ID_OFFSET);
+ MTHCA_GET(adapter->revision_id, outbox,
+ QUERY_ADAPTER_REVISION_ID_OFFSET);
+ }
+ MTHCA_GET(adapter->inta_pin, outbox, QUERY_ADAPTER_INTA_PIN_OFFSET);
+
+ get_board_id(outbox + QUERY_ADAPTER_VSD_OFFSET / 4,
+ adapter->board_id);
+
+out:
+ mthca_free_mailbox(dev, mailbox);
+ return err;
+}
+
+int mthca_INIT_HCA(struct mthca_dev *dev,
+ struct mthca_init_hca_param *param,
+ u8 *status)
+{
+ struct mthca_mailbox *mailbox;
+ __be32 *inbox;
+ int err;
+
+#define INIT_HCA_IN_SIZE 0x200
+#define INIT_HCA_FLAGS1_OFFSET 0x00c
+#define INIT_HCA_FLAGS2_OFFSET 0x014
+#define INIT_HCA_QPC_OFFSET 0x020
+#define INIT_HCA_QPC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x10)
+#define INIT_HCA_LOG_QP_OFFSET (INIT_HCA_QPC_OFFSET + 0x17)
+#define INIT_HCA_EEC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x20)
+#define INIT_HCA_LOG_EEC_OFFSET (INIT_HCA_QPC_OFFSET + 0x27)
+#define INIT_HCA_SRQC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x28)
+#define INIT_HCA_LOG_SRQ_OFFSET (INIT_HCA_QPC_OFFSET + 0x2f)
+#define INIT_HCA_CQC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x30)
+#define INIT_HCA_LOG_CQ_OFFSET (INIT_HCA_QPC_OFFSET + 0x37)
+#define INIT_HCA_EQPC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x40)
+#define INIT_HCA_EEEC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x50)
+#define INIT_HCA_EQC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x60)
+#define INIT_HCA_LOG_EQ_OFFSET (INIT_HCA_QPC_OFFSET + 0x67)
+#define INIT_HCA_RDB_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x70)
+#define INIT_HCA_UDAV_OFFSET 0x0b0
+#define INIT_HCA_UDAV_LKEY_OFFSET (INIT_HCA_UDAV_OFFSET + 0x0)
+#define INIT_HCA_UDAV_PD_OFFSET (INIT_HCA_UDAV_OFFSET + 0x4)
+#define INIT_HCA_MCAST_OFFSET 0x0c0
+#define INIT_HCA_MC_BASE_OFFSET (INIT_HCA_MCAST_OFFSET + 0x00)
+#define INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET (INIT_HCA_MCAST_OFFSET + 0x12)
+#define INIT_HCA_MC_HASH_SZ_OFFSET (INIT_HCA_MCAST_OFFSET + 0x16)
+#define INIT_HCA_LOG_MC_TABLE_SZ_OFFSET (INIT_HCA_MCAST_OFFSET + 0x1b)
+#define INIT_HCA_TPT_OFFSET 0x0f0
+#define INIT_HCA_MPT_BASE_OFFSET (INIT_HCA_TPT_OFFSET + 0x00)
+#define INIT_HCA_MTT_SEG_SZ_OFFSET (INIT_HCA_TPT_OFFSET + 0x09)
+#define INIT_HCA_LOG_MPT_SZ_OFFSET (INIT_HCA_TPT_OFFSET + 0x0b)
+#define INIT_HCA_MTT_BASE_OFFSET (INIT_HCA_TPT_OFFSET + 0x10)
+#define INIT_HCA_UAR_OFFSET 0x120
+#define INIT_HCA_UAR_BASE_OFFSET (INIT_HCA_UAR_OFFSET + 0x00)
+#define INIT_HCA_UARC_SZ_OFFSET (INIT_HCA_UAR_OFFSET + 0x09)
+#define INIT_HCA_LOG_UAR_SZ_OFFSET (INIT_HCA_UAR_OFFSET + 0x0a)
+#define INIT_HCA_UAR_PAGE_SZ_OFFSET (INIT_HCA_UAR_OFFSET + 0x0b)
+#define INIT_HCA_UAR_SCATCH_BASE_OFFSET (INIT_HCA_UAR_OFFSET + 0x10)
+#define INIT_HCA_UAR_CTX_BASE_OFFSET (INIT_HCA_UAR_OFFSET + 0x18)
+
+ mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ inbox = mailbox->buf;
+
+ memset(inbox, 0, INIT_HCA_IN_SIZE);
+
+ if (dev->mthca_flags & MTHCA_FLAG_SINAI_OPT)
+ MTHCA_PUT(inbox, 0x1, INIT_HCA_FLAGS1_OFFSET);
+
+#if defined(__LITTLE_ENDIAN)
+ *(inbox + INIT_HCA_FLAGS2_OFFSET / 4) &= ~cpu_to_be32(1 << 1);
+#elif defined(__BIG_ENDIAN)
+ *(inbox + INIT_HCA_FLAGS2_OFFSET / 4) |= cpu_to_be32(1 << 1);
+#else
+#error Host endianness not defined
+#endif
+ /* Check port for UD address vector: */
+ *(inbox + INIT_HCA_FLAGS2_OFFSET / 4) |= cpu_to_be32(1);
+
+ /* Enable IPoIB checksumming if we can: */
+ if (dev->device_cap_flags & IB_DEVICE_UD_IP_CSUM)
+ *(inbox + INIT_HCA_FLAGS2_OFFSET / 4) |= cpu_to_be32(7 << 3);
+
+ /* We leave wqe_quota, responder_exu, etc as 0 (default) */
+
+ /* QPC/EEC/CQC/EQC/RDB attributes */
+
+ MTHCA_PUT(inbox, param->qpc_base, INIT_HCA_QPC_BASE_OFFSET);
+ MTHCA_PUT(inbox, param->log_num_qps, INIT_HCA_LOG_QP_OFFSET);
+ MTHCA_PUT(inbox, param->eec_base, INIT_HCA_EEC_BASE_OFFSET);
+ MTHCA_PUT(inbox, param->log_num_eecs, INIT_HCA_LOG_EEC_OFFSET);
+ MTHCA_PUT(inbox, param->srqc_base, INIT_HCA_SRQC_BASE_OFFSET);
+ MTHCA_PUT(inbox, param->log_num_srqs, INIT_HCA_LOG_SRQ_OFFSET);
+ MTHCA_PUT(inbox, param->cqc_base, INIT_HCA_CQC_BASE_OFFSET);
+ MTHCA_PUT(inbox, param->log_num_cqs, INIT_HCA_LOG_CQ_OFFSET);
+ MTHCA_PUT(inbox, param->eqpc_base, INIT_HCA_EQPC_BASE_OFFSET);
+ MTHCA_PUT(inbox, param->eeec_base, INIT_HCA_EEEC_BASE_OFFSET);
+ MTHCA_PUT(inbox, param->eqc_base, INIT_HCA_EQC_BASE_OFFSET);
+ MTHCA_PUT(inbox, param->log_num_eqs, INIT_HCA_LOG_EQ_OFFSET);
+ MTHCA_PUT(inbox, param->rdb_base, INIT_HCA_RDB_BASE_OFFSET);
+
+ /* UD AV attributes */
+
+ /* multicast attributes */
+
+ MTHCA_PUT(inbox, param->mc_base, INIT_HCA_MC_BASE_OFFSET);
+ MTHCA_PUT(inbox, param->log_mc_entry_sz, INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET);
+ MTHCA_PUT(inbox, param->mc_hash_sz, INIT_HCA_MC_HASH_SZ_OFFSET);
+ MTHCA_PUT(inbox, param->log_mc_table_sz, INIT_HCA_LOG_MC_TABLE_SZ_OFFSET);
+
+ /* TPT attributes */
+
+ MTHCA_PUT(inbox, param->mpt_base, INIT_HCA_MPT_BASE_OFFSET);
+ if (!mthca_is_memfree(dev))
+ MTHCA_PUT(inbox, param->mtt_seg_sz, INIT_HCA_MTT_SEG_SZ_OFFSET);
+ MTHCA_PUT(inbox, param->log_mpt_sz, INIT_HCA_LOG_MPT_SZ_OFFSET);
+ MTHCA_PUT(inbox, param->mtt_base, INIT_HCA_MTT_BASE_OFFSET);
+
+ /* UAR attributes */
+ {
+ u8 uar_page_sz = PAGE_SHIFT - 12;
+ MTHCA_PUT(inbox, uar_page_sz, INIT_HCA_UAR_PAGE_SZ_OFFSET);
+ }
+
+ MTHCA_PUT(inbox, param->uar_scratch_base, INIT_HCA_UAR_SCATCH_BASE_OFFSET);
+
+ if (mthca_is_memfree(dev)) {
+ MTHCA_PUT(inbox, param->log_uarc_sz, INIT_HCA_UARC_SZ_OFFSET);
+ MTHCA_PUT(inbox, param->log_uar_sz, INIT_HCA_LOG_UAR_SZ_OFFSET);
+ MTHCA_PUT(inbox, param->uarc_base, INIT_HCA_UAR_CTX_BASE_OFFSET);
+ }
+
+ err = mthca_cmd(dev, mailbox->dma, 0, 0, CMD_INIT_HCA, HZ, status);
+
+ mthca_free_mailbox(dev, mailbox);
+ return err;
+}
+
+int mthca_INIT_IB(struct mthca_dev *dev,
+ struct mthca_init_ib_param *param,
+ int port, u8 *status)
+{
+ struct mthca_mailbox *mailbox;
+ u32 *inbox;
+ int err;
+ u32 flags;
+
+#define INIT_IB_IN_SIZE 56
+#define INIT_IB_FLAGS_OFFSET 0x00
+#define INIT_IB_FLAG_SIG (1 << 18)
+#define INIT_IB_FLAG_NG (1 << 17)
+#define INIT_IB_FLAG_G0 (1 << 16)
+#define INIT_IB_VL_SHIFT 4
+#define INIT_IB_PORT_WIDTH_SHIFT 8
+#define INIT_IB_MTU_SHIFT 12
+#define INIT_IB_MAX_GID_OFFSET 0x06
+#define INIT_IB_MAX_PKEY_OFFSET 0x0a
+#define INIT_IB_GUID0_OFFSET 0x10
+#define INIT_IB_NODE_GUID_OFFSET 0x18
+#define INIT_IB_SI_GUID_OFFSET 0x20
+
+ mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ inbox = mailbox->buf;
+
+ memset(inbox, 0, INIT_IB_IN_SIZE);
+
+ flags = 0;
+ flags |= param->set_guid0 ? INIT_IB_FLAG_G0 : 0;
+ flags |= param->set_node_guid ? INIT_IB_FLAG_NG : 0;
+ flags |= param->set_si_guid ? INIT_IB_FLAG_SIG : 0;
+ flags |= param->vl_cap << INIT_IB_VL_SHIFT;
+ flags |= param->port_width << INIT_IB_PORT_WIDTH_SHIFT;
+ flags |= param->mtu_cap << INIT_IB_MTU_SHIFT;
+ MTHCA_PUT(inbox, flags, INIT_IB_FLAGS_OFFSET);
+
+ MTHCA_PUT(inbox, param->gid_cap, INIT_IB_MAX_GID_OFFSET);
+ MTHCA_PUT(inbox, param->pkey_cap, INIT_IB_MAX_PKEY_OFFSET);
+ MTHCA_PUT(inbox, param->guid0, INIT_IB_GUID0_OFFSET);
+ MTHCA_PUT(inbox, param->node_guid, INIT_IB_NODE_GUID_OFFSET);
+ MTHCA_PUT(inbox, param->si_guid, INIT_IB_SI_GUID_OFFSET);
+
+ err = mthca_cmd(dev, mailbox->dma, port, 0, CMD_INIT_IB,
+ CMD_TIME_CLASS_A, status);
+
+ mthca_free_mailbox(dev, mailbox);
+ return err;
+}
+
+int mthca_CLOSE_IB(struct mthca_dev *dev, int port, u8 *status)
+{
+ return mthca_cmd(dev, 0, port, 0, CMD_CLOSE_IB, HZ, status);
+}
+
+int mthca_CLOSE_HCA(struct mthca_dev *dev, int panic, u8 *status)
+{
+ return mthca_cmd(dev, 0, 0, panic, CMD_CLOSE_HCA, HZ, status);
+}
+
+int mthca_SET_IB(struct mthca_dev *dev, struct mthca_set_ib_param *param,
+ int port, u8 *status)
+{
+ struct mthca_mailbox *mailbox;
+ u32 *inbox;
+ int err;
+ u32 flags = 0;
+
+#define SET_IB_IN_SIZE 0x40
+#define SET_IB_FLAGS_OFFSET 0x00
+#define SET_IB_FLAG_SIG (1 << 18)
+#define SET_IB_FLAG_RQK (1 << 0)
+#define SET_IB_CAP_MASK_OFFSET 0x04
+#define SET_IB_SI_GUID_OFFSET 0x08
+
+ mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ inbox = mailbox->buf;
+
+ memset(inbox, 0, SET_IB_IN_SIZE);
+
+ flags |= param->set_si_guid ? SET_IB_FLAG_SIG : 0;
+ flags |= param->reset_qkey_viol ? SET_IB_FLAG_RQK : 0;
+ MTHCA_PUT(inbox, flags, SET_IB_FLAGS_OFFSET);
+
+ MTHCA_PUT(inbox, param->cap_mask, SET_IB_CAP_MASK_OFFSET);
+ MTHCA_PUT(inbox, param->si_guid, SET_IB_SI_GUID_OFFSET);
+
+ err = mthca_cmd(dev, mailbox->dma, port, 0, CMD_SET_IB,
+ CMD_TIME_CLASS_B, status);
+
+ mthca_free_mailbox(dev, mailbox);
+ return err;
+}
+
+int mthca_MAP_ICM(struct mthca_dev *dev, struct mthca_icm *icm, u64 virt, u8 *status)
+{
+ return mthca_map_cmd(dev, CMD_MAP_ICM, icm, virt, status);
+}
+
+int mthca_MAP_ICM_page(struct mthca_dev *dev, u64 dma_addr, u64 virt, u8 *status)
+{
+ struct mthca_mailbox *mailbox;
+ __be64 *inbox;
+ int err;
+
+ mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ inbox = mailbox->buf;
+
+ inbox[0] = cpu_to_be64(virt);
+ inbox[1] = cpu_to_be64(dma_addr);
+
+ err = mthca_cmd(dev, mailbox->dma, 1, 0, CMD_MAP_ICM,
+ CMD_TIME_CLASS_B, status);
+
+ mthca_free_mailbox(dev, mailbox);
+
+ if (!err)
+ mthca_dbg(dev, "Mapped page at %llx to %llx for ICM.\n",
+ (unsigned long long) dma_addr, (unsigned long long) virt);
+
+ return err;
+}
+
+int mthca_UNMAP_ICM(struct mthca_dev *dev, u64 virt, u32 page_count, u8 *status)
+{
+ mthca_dbg(dev, "Unmapping %d pages at %llx from ICM.\n",
+ page_count, (unsigned long long) virt);
+
+ return mthca_cmd(dev, virt, page_count, 0, CMD_UNMAP_ICM, CMD_TIME_CLASS_B, status);
+}
+
+int mthca_MAP_ICM_AUX(struct mthca_dev *dev, struct mthca_icm *icm, u8 *status)
+{
+ return mthca_map_cmd(dev, CMD_MAP_ICM_AUX, icm, -1, status);
+}
+
+int mthca_UNMAP_ICM_AUX(struct mthca_dev *dev, u8 *status)
+{
+ return mthca_cmd(dev, 0, 0, 0, CMD_UNMAP_ICM_AUX, CMD_TIME_CLASS_B, status);
+}
+
+int mthca_SET_ICM_SIZE(struct mthca_dev *dev, u64 icm_size, u64 *aux_pages,
+ u8 *status)
+{
+ int ret = mthca_cmd_imm(dev, icm_size, aux_pages, 0, 0, CMD_SET_ICM_SIZE,
+ CMD_TIME_CLASS_A, status);
+
+ if (ret || status)
+ return ret;
+
+ /*
+ * Round up number of system pages needed in case
+ * MTHCA_ICM_PAGE_SIZE < PAGE_SIZE.
+ */
+ *aux_pages = ALIGN(*aux_pages, PAGE_SIZE / MTHCA_ICM_PAGE_SIZE) >>
+ (PAGE_SHIFT - MTHCA_ICM_PAGE_SHIFT);
+
+ return 0;
+}
+
+int mthca_SW2HW_MPT(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
+ int mpt_index, u8 *status)
+{
+ return mthca_cmd(dev, mailbox->dma, mpt_index, 0, CMD_SW2HW_MPT,
+ CMD_TIME_CLASS_B, status);
+}
+
+int mthca_HW2SW_MPT(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
+ int mpt_index, u8 *status)
+{
+ return mthca_cmd_box(dev, 0, mailbox ? mailbox->dma : 0, mpt_index,
+ !mailbox, CMD_HW2SW_MPT,
+ CMD_TIME_CLASS_B, status);
+}
+
+int mthca_WRITE_MTT(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
+ int num_mtt, u8 *status)
+{
+ return mthca_cmd(dev, mailbox->dma, num_mtt, 0, CMD_WRITE_MTT,
+ CMD_TIME_CLASS_B, status);
+}
+
+int mthca_SYNC_TPT(struct mthca_dev *dev, u8 *status)
+{
+ return mthca_cmd(dev, 0, 0, 0, CMD_SYNC_TPT, CMD_TIME_CLASS_B, status);
+}
+
+int mthca_MAP_EQ(struct mthca_dev *dev, u64 event_mask, int unmap,
+ int eq_num, u8 *status)
+{
+ mthca_dbg(dev, "%s mask %016llx for eqn %d\n",
+ unmap ? "Clearing" : "Setting",
+ (unsigned long long) event_mask, eq_num);
+ return mthca_cmd(dev, event_mask, (unmap << 31) | eq_num,
+ 0, CMD_MAP_EQ, CMD_TIME_CLASS_B, status);
+}
+
+int mthca_SW2HW_EQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
+ int eq_num, u8 *status)
+{
+ return mthca_cmd(dev, mailbox->dma, eq_num, 0, CMD_SW2HW_EQ,
+ CMD_TIME_CLASS_A, status);
+}
+
+int mthca_HW2SW_EQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
+ int eq_num, u8 *status)
+{
+ return mthca_cmd_box(dev, 0, mailbox->dma, eq_num, 0,
+ CMD_HW2SW_EQ,
+ CMD_TIME_CLASS_A, status);
+}
+
+int mthca_SW2HW_CQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
+ int cq_num, u8 *status)
+{
+ return mthca_cmd(dev, mailbox->dma, cq_num, 0, CMD_SW2HW_CQ,
+ CMD_TIME_CLASS_A, status);
+}
+
+int mthca_HW2SW_CQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
+ int cq_num, u8 *status)
+{
+ return mthca_cmd_box(dev, 0, mailbox->dma, cq_num, 0,
+ CMD_HW2SW_CQ,
+ CMD_TIME_CLASS_A, status);
+}
+
+int mthca_RESIZE_CQ(struct mthca_dev *dev, int cq_num, u32 lkey, u8 log_size,
+ u8 *status)
+{
+ struct mthca_mailbox *mailbox;
+ __be32 *inbox;
+ int err;
+
+#define RESIZE_CQ_IN_SIZE 0x40
+#define RESIZE_CQ_LOG_SIZE_OFFSET 0x0c
+#define RESIZE_CQ_LKEY_OFFSET 0x1c
+
+ mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ inbox = mailbox->buf;
+
+ memset(inbox, 0, RESIZE_CQ_IN_SIZE);
+ /*
+ * Leave start address fields zeroed out -- mthca assumes that
+ * MRs for CQs always start at virtual address 0.
+ */
+ MTHCA_PUT(inbox, log_size, RESIZE_CQ_LOG_SIZE_OFFSET);
+ MTHCA_PUT(inbox, lkey, RESIZE_CQ_LKEY_OFFSET);
+
+ err = mthca_cmd(dev, mailbox->dma, cq_num, 1, CMD_RESIZE_CQ,
+ CMD_TIME_CLASS_B, status);
+
+ mthca_free_mailbox(dev, mailbox);
+ return err;
+}
+
+int mthca_SW2HW_SRQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
+ int srq_num, u8 *status)
+{
+ return mthca_cmd(dev, mailbox->dma, srq_num, 0, CMD_SW2HW_SRQ,
+ CMD_TIME_CLASS_A, status);
+}
+
+int mthca_HW2SW_SRQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
+ int srq_num, u8 *status)
+{
+ return mthca_cmd_box(dev, 0, mailbox->dma, srq_num, 0,
+ CMD_HW2SW_SRQ,
+ CMD_TIME_CLASS_A, status);
+}
+
+int mthca_QUERY_SRQ(struct mthca_dev *dev, u32 num,
+ struct mthca_mailbox *mailbox, u8 *status)
+{
+ return mthca_cmd_box(dev, 0, mailbox->dma, num, 0,
+ CMD_QUERY_SRQ, CMD_TIME_CLASS_A, status);
+}
+
+int mthca_ARM_SRQ(struct mthca_dev *dev, int srq_num, int limit, u8 *status)
+{
+ return mthca_cmd(dev, limit, srq_num, 0, CMD_ARM_SRQ,
+ CMD_TIME_CLASS_B, status);
+}
+
+int mthca_MODIFY_QP(struct mthca_dev *dev, enum ib_qp_state cur,
+ enum ib_qp_state next, u32 num, int is_ee,
+ struct mthca_mailbox *mailbox, u32 optmask,
+ u8 *status)
+{
+ static const u16 op[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = {
+ [IB_QPS_RESET] = {
+ [IB_QPS_RESET] = CMD_ERR2RST_QPEE,
+ [IB_QPS_ERR] = CMD_2ERR_QPEE,
+ [IB_QPS_INIT] = CMD_RST2INIT_QPEE,
+ },
+ [IB_QPS_INIT] = {
+ [IB_QPS_RESET] = CMD_ERR2RST_QPEE,
+ [IB_QPS_ERR] = CMD_2ERR_QPEE,
+ [IB_QPS_INIT] = CMD_INIT2INIT_QPEE,
+ [IB_QPS_RTR] = CMD_INIT2RTR_QPEE,
+ },
+ [IB_QPS_RTR] = {
+ [IB_QPS_RESET] = CMD_ERR2RST_QPEE,
+ [IB_QPS_ERR] = CMD_2ERR_QPEE,
+ [IB_QPS_RTS] = CMD_RTR2RTS_QPEE,
+ },
+ [IB_QPS_RTS] = {
+ [IB_QPS_RESET] = CMD_ERR2RST_QPEE,
+ [IB_QPS_ERR] = CMD_2ERR_QPEE,
+ [IB_QPS_RTS] = CMD_RTS2RTS_QPEE,
+ [IB_QPS_SQD] = CMD_RTS2SQD_QPEE,
+ },
+ [IB_QPS_SQD] = {
+ [IB_QPS_RESET] = CMD_ERR2RST_QPEE,
+ [IB_QPS_ERR] = CMD_2ERR_QPEE,
+ [IB_QPS_RTS] = CMD_SQD2RTS_QPEE,
+ [IB_QPS_SQD] = CMD_SQD2SQD_QPEE,
+ },
+ [IB_QPS_SQE] = {
+ [IB_QPS_RESET] = CMD_ERR2RST_QPEE,
+ [IB_QPS_ERR] = CMD_2ERR_QPEE,
+ [IB_QPS_RTS] = CMD_SQERR2RTS_QPEE,
+ },
+ [IB_QPS_ERR] = {
+ [IB_QPS_RESET] = CMD_ERR2RST_QPEE,
+ [IB_QPS_ERR] = CMD_2ERR_QPEE,
+ }
+ };
+
+ u8 op_mod = 0;
+ int my_mailbox = 0;
+ int err;
+
+ if (op[cur][next] == CMD_ERR2RST_QPEE) {
+ op_mod = 3; /* don't write outbox, any->reset */
+
+ /* For debugging */
+ if (!mailbox) {
+ mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (!IS_ERR(mailbox)) {
+ my_mailbox = 1;
+ op_mod = 2; /* write outbox, any->reset */
+ } else
+ mailbox = NULL;
+ }
+
+ err = mthca_cmd_box(dev, 0, mailbox ? mailbox->dma : 0,
+ (!!is_ee << 24) | num, op_mod,
+ op[cur][next], CMD_TIME_CLASS_C, status);
+
+ if (0 && mailbox) {
+ int i;
+ mthca_dbg(dev, "Dumping QP context:\n");
+ printk(" %08x\n", be32_to_cpup(mailbox->buf));
+ for (i = 0; i < 0x100 / 4; ++i) {
+ if (i % 8 == 0)
+ printk("[%02x] ", i * 4);
+ printk(" %08x",
+ be32_to_cpu(((__be32 *) mailbox->buf)[i + 2]));
+ if ((i + 1) % 8 == 0)
+ printk("\n");
+ }
+ }
+
+ if (my_mailbox)
+ mthca_free_mailbox(dev, mailbox);
+ } else {
+ if (0) {
+ int i;
+ mthca_dbg(dev, "Dumping QP context:\n");
+ printk(" opt param mask: %08x\n", be32_to_cpup(mailbox->buf));
+ for (i = 0; i < 0x100 / 4; ++i) {
+ if (i % 8 == 0)
+ printk(" [%02x] ", i * 4);
+ printk(" %08x",
+ be32_to_cpu(((__be32 *) mailbox->buf)[i + 2]));
+ if ((i + 1) % 8 == 0)
+ printk("\n");
+ }
+ }
+
+ err = mthca_cmd(dev, mailbox->dma, optmask | (!!is_ee << 24) | num,
+ op_mod, op[cur][next], CMD_TIME_CLASS_C, status);
+ }
+
+ return err;
+}
+
+int mthca_QUERY_QP(struct mthca_dev *dev, u32 num, int is_ee,
+ struct mthca_mailbox *mailbox, u8 *status)
+{
+ return mthca_cmd_box(dev, 0, mailbox->dma, (!!is_ee << 24) | num, 0,
+ CMD_QUERY_QPEE, CMD_TIME_CLASS_A, status);
+}
+
+int mthca_CONF_SPECIAL_QP(struct mthca_dev *dev, int type, u32 qpn,
+ u8 *status)
+{
+ u8 op_mod;
+
+ switch (type) {
+ case IB_QPT_SMI:
+ op_mod = 0;
+ break;
+ case IB_QPT_GSI:
+ op_mod = 1;
+ break;
+ case IB_QPT_RAW_IPV6:
+ op_mod = 2;
+ break;
+ case IB_QPT_RAW_ETY:
+ op_mod = 3;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return mthca_cmd(dev, 0, qpn, op_mod, CMD_CONF_SPECIAL_QP,
+ CMD_TIME_CLASS_B, status);
+}
+
+int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey,
+ int port, struct ib_wc *in_wc, struct ib_grh *in_grh,
+ void *in_mad, void *response_mad, u8 *status)
+{
+ struct mthca_mailbox *inmailbox, *outmailbox;
+ void *inbox;
+ int err;
+ u32 in_modifier = port;
+ u8 op_modifier = 0;
+
+#define MAD_IFC_BOX_SIZE 0x400
+#define MAD_IFC_MY_QPN_OFFSET 0x100
+#define MAD_IFC_RQPN_OFFSET 0x108
+#define MAD_IFC_SL_OFFSET 0x10c
+#define MAD_IFC_G_PATH_OFFSET 0x10d
+#define MAD_IFC_RLID_OFFSET 0x10e
+#define MAD_IFC_PKEY_OFFSET 0x112
+#define MAD_IFC_GRH_OFFSET 0x140
+
+ inmailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(inmailbox))
+ return PTR_ERR(inmailbox);
+ inbox = inmailbox->buf;
+
+ outmailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(outmailbox)) {
+ mthca_free_mailbox(dev, inmailbox);
+ return PTR_ERR(outmailbox);
+ }
+
+ memcpy(inbox, in_mad, 256);
+
+ /*
+ * Key check traps can't be generated unless we have in_wc to
+ * tell us where to send the trap.
+ */
+ if (ignore_mkey || !in_wc)
+ op_modifier |= 0x1;
+ if (ignore_bkey || !in_wc)
+ op_modifier |= 0x2;
+
+ if (in_wc) {
+ u8 val;
+
+ memset(inbox + 256, 0, 256);
+
+ MTHCA_PUT(inbox, in_wc->qp->qp_num, MAD_IFC_MY_QPN_OFFSET);
+ MTHCA_PUT(inbox, in_wc->src_qp, MAD_IFC_RQPN_OFFSET);
+
+ val = in_wc->sl << 4;
+ MTHCA_PUT(inbox, val, MAD_IFC_SL_OFFSET);
+
+ val = in_wc->dlid_path_bits |
+ (in_wc->wc_flags & IB_WC_GRH ? 0x80 : 0);
+ MTHCA_PUT(inbox, val, MAD_IFC_G_PATH_OFFSET);
+
+ MTHCA_PUT(inbox, in_wc->slid, MAD_IFC_RLID_OFFSET);
+ MTHCA_PUT(inbox, in_wc->pkey_index, MAD_IFC_PKEY_OFFSET);
+
+ if (in_grh)
+ memcpy(inbox + MAD_IFC_GRH_OFFSET, in_grh, 40);
+
+ op_modifier |= 0x4;
+
+ in_modifier |= in_wc->slid << 16;
+ }
+
+ err = mthca_cmd_box(dev, inmailbox->dma, outmailbox->dma,
+ in_modifier, op_modifier,
+ CMD_MAD_IFC, CMD_TIME_CLASS_C, status);
+
+ if (!err && !*status)
+ memcpy(response_mad, outmailbox->buf, 256);
+
+ mthca_free_mailbox(dev, inmailbox);
+ mthca_free_mailbox(dev, outmailbox);
+ return err;
+}
+
+int mthca_READ_MGM(struct mthca_dev *dev, int index,
+ struct mthca_mailbox *mailbox, u8 *status)
+{
+ return mthca_cmd_box(dev, 0, mailbox->dma, index, 0,
+ CMD_READ_MGM, CMD_TIME_CLASS_A, status);
+}
+
+int mthca_WRITE_MGM(struct mthca_dev *dev, int index,
+ struct mthca_mailbox *mailbox, u8 *status)
+{
+ return mthca_cmd(dev, mailbox->dma, index, 0, CMD_WRITE_MGM,
+ CMD_TIME_CLASS_A, status);
+}
+
+int mthca_MGID_HASH(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
+ u16 *hash, u8 *status)
+{
+ u64 imm;
+ int err;
+
+ err = mthca_cmd_imm(dev, mailbox->dma, &imm, 0, 0, CMD_MGID_HASH,
+ CMD_TIME_CLASS_A, status);
+
+ *hash = imm;
+ return err;
+}
+
+int mthca_NOP(struct mthca_dev *dev, u8 *status)
+{
+ return mthca_cmd(dev, 0, 0x1f, 0, CMD_NOP, msecs_to_jiffies(100), status);
+}
diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.h b/drivers/infiniband/hw/mthca/mthca_cmd.h
new file mode 100644
index 0000000..6efd326
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_cmd.h
@@ -0,0 +1,330 @@
+/*
+ * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2006 Cisco Systems. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef MTHCA_CMD_H
+#define MTHCA_CMD_H
+
+#include <rdma/ib_verbs.h>
+
+#define MTHCA_MAILBOX_SIZE 4096
+
+enum {
+ /* command completed successfully: */
+ MTHCA_CMD_STAT_OK = 0x00,
+ /* Internal error (such as a bus error) occurred while processing command: */
+ MTHCA_CMD_STAT_INTERNAL_ERR = 0x01,
+ /* Operation/command not supported or opcode modifier not supported: */
+ MTHCA_CMD_STAT_BAD_OP = 0x02,
+ /* Parameter not supported or parameter out of range: */
+ MTHCA_CMD_STAT_BAD_PARAM = 0x03,
+ /* System not enabled or bad system state: */
+ MTHCA_CMD_STAT_BAD_SYS_STATE = 0x04,
+ /* Attempt to access reserved or unallocaterd resource: */
+ MTHCA_CMD_STAT_BAD_RESOURCE = 0x05,
+ /* Requested resource is currently executing a command, or is otherwise busy: */
+ MTHCA_CMD_STAT_RESOURCE_BUSY = 0x06,
+ /* memory error: */
+ MTHCA_CMD_STAT_DDR_MEM_ERR = 0x07,
+ /* Required capability exceeds device limits: */
+ MTHCA_CMD_STAT_EXCEED_LIM = 0x08,
+ /* Resource is not in the appropriate state or ownership: */
+ MTHCA_CMD_STAT_BAD_RES_STATE = 0x09,
+ /* Index out of range: */
+ MTHCA_CMD_STAT_BAD_INDEX = 0x0a,
+ /* FW image corrupted: */
+ MTHCA_CMD_STAT_BAD_NVMEM = 0x0b,
+ /* Attempt to modify a QP/EE which is not in the presumed state: */
+ MTHCA_CMD_STAT_BAD_QPEE_STATE = 0x10,
+ /* Bad segment parameters (Address/Size): */
+ MTHCA_CMD_STAT_BAD_SEG_PARAM = 0x20,
+ /* Memory Region has Memory Windows bound to: */
+ MTHCA_CMD_STAT_REG_BOUND = 0x21,
+ /* HCA local attached memory not present: */
+ MTHCA_CMD_STAT_LAM_NOT_PRE = 0x22,
+ /* Bad management packet (silently discarded): */
+ MTHCA_CMD_STAT_BAD_PKT = 0x30,
+ /* More outstanding CQEs in CQ than new CQ size: */
+ MTHCA_CMD_STAT_BAD_SIZE = 0x40
+};
+
+enum {
+ MTHCA_TRANS_INVALID = 0,
+ MTHCA_TRANS_RST2INIT,
+ MTHCA_TRANS_INIT2INIT,
+ MTHCA_TRANS_INIT2RTR,
+ MTHCA_TRANS_RTR2RTS,
+ MTHCA_TRANS_RTS2RTS,
+ MTHCA_TRANS_SQERR2RTS,
+ MTHCA_TRANS_ANY2ERR,
+ MTHCA_TRANS_RTS2SQD,
+ MTHCA_TRANS_SQD2SQD,
+ MTHCA_TRANS_SQD2RTS,
+ MTHCA_TRANS_ANY2RST,
+};
+
+enum {
+ DEV_LIM_FLAG_RC = 1 << 0,
+ DEV_LIM_FLAG_UC = 1 << 1,
+ DEV_LIM_FLAG_UD = 1 << 2,
+ DEV_LIM_FLAG_RD = 1 << 3,
+ DEV_LIM_FLAG_RAW_IPV6 = 1 << 4,
+ DEV_LIM_FLAG_RAW_ETHER = 1 << 5,
+ DEV_LIM_FLAG_SRQ = 1 << 6,
+ DEV_LIM_FLAG_IPOIB_CSUM = 1 << 7,
+ DEV_LIM_FLAG_BAD_PKEY_CNTR = 1 << 8,
+ DEV_LIM_FLAG_BAD_QKEY_CNTR = 1 << 9,
+ DEV_LIM_FLAG_MW = 1 << 16,
+ DEV_LIM_FLAG_AUTO_PATH_MIG = 1 << 17,
+ DEV_LIM_FLAG_ATOMIC = 1 << 18,
+ DEV_LIM_FLAG_RAW_MULTI = 1 << 19,
+ DEV_LIM_FLAG_UD_AV_PORT_ENFORCE = 1 << 20,
+ DEV_LIM_FLAG_UD_MULTI = 1 << 21,
+};
+
+struct mthca_mailbox {
+ dma_addr_t dma;
+ void *buf;
+};
+
+struct mthca_dev_lim {
+ int max_srq_sz;
+ int max_qp_sz;
+ int reserved_qps;
+ int max_qps;
+ int reserved_srqs;
+ int max_srqs;
+ int reserved_eecs;
+ int max_eecs;
+ int max_cq_sz;
+ int reserved_cqs;
+ int max_cqs;
+ int max_mpts;
+ int reserved_eqs;
+ int max_eqs;
+ int reserved_mtts;
+ int max_mrw_sz;
+ int reserved_mrws;
+ int max_mtt_seg;
+ int max_requester_per_qp;
+ int max_responder_per_qp;
+ int max_rdma_global;
+ int local_ca_ack_delay;
+ int max_mtu;
+ int max_port_width;
+ int max_vl;
+ int num_ports;
+ int max_gids;
+ u16 stat_rate_support;
+ int max_pkeys;
+ u32 flags;
+ int reserved_uars;
+ int uar_size;
+ int min_page_sz;
+ int max_sg;
+ int max_desc_sz;
+ int max_qp_per_mcg;
+ int reserved_mgms;
+ int max_mcgs;
+ int reserved_pds;
+ int max_pds;
+ int reserved_rdds;
+ int max_rdds;
+ int eec_entry_sz;
+ int qpc_entry_sz;
+ int eeec_entry_sz;
+ int eqpc_entry_sz;
+ int eqc_entry_sz;
+ int cqc_entry_sz;
+ int srq_entry_sz;
+ int uar_scratch_entry_sz;
+ int mpt_entry_sz;
+ union {
+ struct {
+ int max_avs;
+ } tavor;
+ struct {
+ int resize_srq;
+ int max_pbl_sz;
+ u8 bmme_flags;
+ u32 reserved_lkey;
+ int lam_required;
+ u64 max_icm_sz;
+ } arbel;
+ } hca;
+};
+
+struct mthca_adapter {
+ u32 vendor_id;
+ u32 device_id;
+ u32 revision_id;
+ char board_id[MTHCA_BOARD_ID_LEN];
+ u8 inta_pin;
+};
+
+struct mthca_init_hca_param {
+ u64 qpc_base;
+ u64 eec_base;
+ u64 srqc_base;
+ u64 cqc_base;
+ u64 eqpc_base;
+ u64 eeec_base;
+ u64 eqc_base;
+ u64 rdb_base;
+ u64 mc_base;
+ u64 mpt_base;
+ u64 mtt_base;
+ u64 uar_scratch_base;
+ u64 uarc_base;
+ u16 log_mc_entry_sz;
+ u16 mc_hash_sz;
+ u8 log_num_qps;
+ u8 log_num_eecs;
+ u8 log_num_srqs;
+ u8 log_num_cqs;
+ u8 log_num_eqs;
+ u8 log_mc_table_sz;
+ u8 mtt_seg_sz;
+ u8 log_mpt_sz;
+ u8 log_uar_sz;
+ u8 log_uarc_sz;
+};
+
+struct mthca_init_ib_param {
+ int port_width;
+ int vl_cap;
+ int mtu_cap;
+ u16 gid_cap;
+ u16 pkey_cap;
+ int set_guid0;
+ u64 guid0;
+ int set_node_guid;
+ u64 node_guid;
+ int set_si_guid;
+ u64 si_guid;
+};
+
+struct mthca_set_ib_param {
+ int set_si_guid;
+ int reset_qkey_viol;
+ u64 si_guid;
+ u32 cap_mask;
+};
+
+int mthca_cmd_init(struct mthca_dev *dev);
+void mthca_cmd_cleanup(struct mthca_dev *dev);
+int mthca_cmd_use_events(struct mthca_dev *dev);
+void mthca_cmd_use_polling(struct mthca_dev *dev);
+void mthca_cmd_event(struct mthca_dev *dev, u16 token,
+ u8 status, u64 out_param);
+
+struct mthca_mailbox *mthca_alloc_mailbox(struct mthca_dev *dev,
+ gfp_t gfp_mask);
+void mthca_free_mailbox(struct mthca_dev *dev, struct mthca_mailbox *mailbox);
+
+int mthca_SYS_EN(struct mthca_dev *dev, u8 *status);
+int mthca_SYS_DIS(struct mthca_dev *dev, u8 *status);
+int mthca_MAP_FA(struct mthca_dev *dev, struct mthca_icm *icm, u8 *status);
+int mthca_UNMAP_FA(struct mthca_dev *dev, u8 *status);
+int mthca_RUN_FW(struct mthca_dev *dev, u8 *status);
+int mthca_QUERY_FW(struct mthca_dev *dev, u8 *status);
+int mthca_ENABLE_LAM(struct mthca_dev *dev, u8 *status);
+int mthca_DISABLE_LAM(struct mthca_dev *dev, u8 *status);
+int mthca_QUERY_DDR(struct mthca_dev *dev, u8 *status);
+int mthca_QUERY_DEV_LIM(struct mthca_dev *dev,
+ struct mthca_dev_lim *dev_lim, u8 *status);
+int mthca_QUERY_ADAPTER(struct mthca_dev *dev,
+ struct mthca_adapter *adapter, u8 *status);
+int mthca_INIT_HCA(struct mthca_dev *dev,
+ struct mthca_init_hca_param *param,
+ u8 *status);
+int mthca_INIT_IB(struct mthca_dev *dev,
+ struct mthca_init_ib_param *param,
+ int port, u8 *status);
+int mthca_CLOSE_IB(struct mthca_dev *dev, int port, u8 *status);
+int mthca_CLOSE_HCA(struct mthca_dev *dev, int panic, u8 *status);
+int mthca_SET_IB(struct mthca_dev *dev, struct mthca_set_ib_param *param,
+ int port, u8 *status);
+int mthca_MAP_ICM(struct mthca_dev *dev, struct mthca_icm *icm, u64 virt, u8 *status);
+int mthca_MAP_ICM_page(struct mthca_dev *dev, u64 dma_addr, u64 virt, u8 *status);
+int mthca_UNMAP_ICM(struct mthca_dev *dev, u64 virt, u32 page_count, u8 *status);
+int mthca_MAP_ICM_AUX(struct mthca_dev *dev, struct mthca_icm *icm, u8 *status);
+int mthca_UNMAP_ICM_AUX(struct mthca_dev *dev, u8 *status);
+int mthca_SET_ICM_SIZE(struct mthca_dev *dev, u64 icm_size, u64 *aux_pages,
+ u8 *status);
+int mthca_SW2HW_MPT(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
+ int mpt_index, u8 *status);
+int mthca_HW2SW_MPT(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
+ int mpt_index, u8 *status);
+int mthca_WRITE_MTT(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
+ int num_mtt, u8 *status);
+int mthca_SYNC_TPT(struct mthca_dev *dev, u8 *status);
+int mthca_MAP_EQ(struct mthca_dev *dev, u64 event_mask, int unmap,
+ int eq_num, u8 *status);
+int mthca_SW2HW_EQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
+ int eq_num, u8 *status);
+int mthca_HW2SW_EQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
+ int eq_num, u8 *status);
+int mthca_SW2HW_CQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
+ int cq_num, u8 *status);
+int mthca_HW2SW_CQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
+ int cq_num, u8 *status);
+int mthca_RESIZE_CQ(struct mthca_dev *dev, int cq_num, u32 lkey, u8 log_size,
+ u8 *status);
+int mthca_SW2HW_SRQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
+ int srq_num, u8 *status);
+int mthca_HW2SW_SRQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
+ int srq_num, u8 *status);
+int mthca_QUERY_SRQ(struct mthca_dev *dev, u32 num,
+ struct mthca_mailbox *mailbox, u8 *status);
+int mthca_ARM_SRQ(struct mthca_dev *dev, int srq_num, int limit, u8 *status);
+int mthca_MODIFY_QP(struct mthca_dev *dev, enum ib_qp_state cur,
+ enum ib_qp_state next, u32 num, int is_ee,
+ struct mthca_mailbox *mailbox, u32 optmask,
+ u8 *status);
+int mthca_QUERY_QP(struct mthca_dev *dev, u32 num, int is_ee,
+ struct mthca_mailbox *mailbox, u8 *status);
+int mthca_CONF_SPECIAL_QP(struct mthca_dev *dev, int type, u32 qpn,
+ u8 *status);
+int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey,
+ int port, struct ib_wc *in_wc, struct ib_grh *in_grh,
+ void *in_mad, void *response_mad, u8 *status);
+int mthca_READ_MGM(struct mthca_dev *dev, int index,
+ struct mthca_mailbox *mailbox, u8 *status);
+int mthca_WRITE_MGM(struct mthca_dev *dev, int index,
+ struct mthca_mailbox *mailbox, u8 *status);
+int mthca_MGID_HASH(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
+ u16 *hash, u8 *status);
+int mthca_NOP(struct mthca_dev *dev, u8 *status);
+
+#endif /* MTHCA_CMD_H */
diff --git a/drivers/infiniband/hw/mthca/mthca_config_reg.h b/drivers/infiniband/hw/mthca/mthca_config_reg.h
new file mode 100644
index 0000000..75671f7
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_config_reg.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2004 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef MTHCA_CONFIG_REG_H
+#define MTHCA_CONFIG_REG_H
+
+#include <asm/page.h>
+
+#define MTHCA_HCR_BASE 0x80680
+#define MTHCA_HCR_SIZE 0x0001c
+#define MTHCA_ECR_BASE 0x80700
+#define MTHCA_ECR_SIZE 0x00008
+#define MTHCA_ECR_CLR_BASE 0x80708
+#define MTHCA_ECR_CLR_SIZE 0x00008
+#define MTHCA_MAP_ECR_SIZE (MTHCA_ECR_SIZE + MTHCA_ECR_CLR_SIZE)
+#define MTHCA_CLR_INT_BASE 0xf00d8
+#define MTHCA_CLR_INT_SIZE 0x00008
+#define MTHCA_EQ_SET_CI_SIZE (8 * 32)
+
+#endif /* MTHCA_CONFIG_REG_H */
diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c
new file mode 100644
index 0000000..d9f4735
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_cq.c
@@ -0,0 +1,993 @@
+/*
+ * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright (c) 2005, 2006 Cisco Systems, Inc. All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/hardirq.h>
+#include <linux/sched.h>
+
+#include <asm/io.h>
+
+#include <rdma/ib_pack.h>
+
+#include "mthca_dev.h"
+#include "mthca_cmd.h"
+#include "mthca_memfree.h"
+
+enum {
+ MTHCA_MAX_DIRECT_CQ_SIZE = 4 * PAGE_SIZE
+};
+
+enum {
+ MTHCA_CQ_ENTRY_SIZE = 0x20
+};
+
+enum {
+ MTHCA_ATOMIC_BYTE_LEN = 8
+};
+
+/*
+ * Must be packed because start is 64 bits but only aligned to 32 bits.
+ */
+struct mthca_cq_context {
+ __be32 flags;
+ __be64 start;
+ __be32 logsize_usrpage;
+ __be32 error_eqn; /* Tavor only */
+ __be32 comp_eqn;
+ __be32 pd;
+ __be32 lkey;
+ __be32 last_notified_index;
+ __be32 solicit_producer_index;
+ __be32 consumer_index;
+ __be32 producer_index;
+ __be32 cqn;
+ __be32 ci_db; /* Arbel only */
+ __be32 state_db; /* Arbel only */
+ u32 reserved;
+} __attribute__((packed));
+
+#define MTHCA_CQ_STATUS_OK ( 0 << 28)
+#define MTHCA_CQ_STATUS_OVERFLOW ( 9 << 28)
+#define MTHCA_CQ_STATUS_WRITE_FAIL (10 << 28)
+#define MTHCA_CQ_FLAG_TR ( 1 << 18)
+#define MTHCA_CQ_FLAG_OI ( 1 << 17)
+#define MTHCA_CQ_STATE_DISARMED ( 0 << 8)
+#define MTHCA_CQ_STATE_ARMED ( 1 << 8)
+#define MTHCA_CQ_STATE_ARMED_SOL ( 4 << 8)
+#define MTHCA_EQ_STATE_FIRED (10 << 8)
+
+enum {
+ MTHCA_ERROR_CQE_OPCODE_MASK = 0xfe
+};
+
+enum {
+ SYNDROME_LOCAL_LENGTH_ERR = 0x01,
+ SYNDROME_LOCAL_QP_OP_ERR = 0x02,
+ SYNDROME_LOCAL_EEC_OP_ERR = 0x03,
+ SYNDROME_LOCAL_PROT_ERR = 0x04,
+ SYNDROME_WR_FLUSH_ERR = 0x05,
+ SYNDROME_MW_BIND_ERR = 0x06,
+ SYNDROME_BAD_RESP_ERR = 0x10,
+ SYNDROME_LOCAL_ACCESS_ERR = 0x11,
+ SYNDROME_REMOTE_INVAL_REQ_ERR = 0x12,
+ SYNDROME_REMOTE_ACCESS_ERR = 0x13,
+ SYNDROME_REMOTE_OP_ERR = 0x14,
+ SYNDROME_RETRY_EXC_ERR = 0x15,
+ SYNDROME_RNR_RETRY_EXC_ERR = 0x16,
+ SYNDROME_LOCAL_RDD_VIOL_ERR = 0x20,
+ SYNDROME_REMOTE_INVAL_RD_REQ_ERR = 0x21,
+ SYNDROME_REMOTE_ABORTED_ERR = 0x22,
+ SYNDROME_INVAL_EECN_ERR = 0x23,
+ SYNDROME_INVAL_EEC_STATE_ERR = 0x24
+};
+
+struct mthca_cqe {
+ __be32 my_qpn;
+ __be32 my_ee;
+ __be32 rqpn;
+ u8 sl_ipok;
+ u8 g_mlpath;
+ __be16 rlid;
+ __be32 imm_etype_pkey_eec;
+ __be32 byte_cnt;
+ __be32 wqe;
+ u8 opcode;
+ u8 is_send;
+ u8 reserved;
+ u8 owner;
+};
+
+struct mthca_err_cqe {
+ __be32 my_qpn;
+ u32 reserved1[3];
+ u8 syndrome;
+ u8 vendor_err;
+ __be16 db_cnt;
+ u32 reserved2;
+ __be32 wqe;
+ u8 opcode;
+ u8 reserved3[2];
+ u8 owner;
+};
+
+#define MTHCA_CQ_ENTRY_OWNER_SW (0 << 7)
+#define MTHCA_CQ_ENTRY_OWNER_HW (1 << 7)
+
+#define MTHCA_TAVOR_CQ_DB_INC_CI (1 << 24)
+#define MTHCA_TAVOR_CQ_DB_REQ_NOT (2 << 24)
+#define MTHCA_TAVOR_CQ_DB_REQ_NOT_SOL (3 << 24)
+#define MTHCA_TAVOR_CQ_DB_SET_CI (4 << 24)
+#define MTHCA_TAVOR_CQ_DB_REQ_NOT_MULT (5 << 24)
+
+#define MTHCA_ARBEL_CQ_DB_REQ_NOT_SOL (1 << 24)
+#define MTHCA_ARBEL_CQ_DB_REQ_NOT (2 << 24)
+#define MTHCA_ARBEL_CQ_DB_REQ_NOT_MULT (3 << 24)
+
+static inline struct mthca_cqe *get_cqe_from_buf(struct mthca_cq_buf *buf,
+ int entry)
+{
+ if (buf->is_direct)
+ return buf->queue.direct.buf + (entry * MTHCA_CQ_ENTRY_SIZE);
+ else
+ return buf->queue.page_list[entry * MTHCA_CQ_ENTRY_SIZE / PAGE_SIZE].buf
+ + (entry * MTHCA_CQ_ENTRY_SIZE) % PAGE_SIZE;
+}
+
+static inline struct mthca_cqe *get_cqe(struct mthca_cq *cq, int entry)
+{
+ return get_cqe_from_buf(&cq->buf, entry);
+}
+
+static inline struct mthca_cqe *cqe_sw(struct mthca_cqe *cqe)
+{
+ return MTHCA_CQ_ENTRY_OWNER_HW & cqe->owner ? NULL : cqe;
+}
+
+static inline struct mthca_cqe *next_cqe_sw(struct mthca_cq *cq)
+{
+ return cqe_sw(get_cqe(cq, cq->cons_index & cq->ibcq.cqe));
+}
+
+static inline void set_cqe_hw(struct mthca_cqe *cqe)
+{
+ cqe->owner = MTHCA_CQ_ENTRY_OWNER_HW;
+}
+
+static void dump_cqe(struct mthca_dev *dev, void *cqe_ptr)
+{
+ __be32 *cqe = cqe_ptr;
+
+ (void) cqe; /* avoid warning if mthca_dbg compiled away... */
+ mthca_dbg(dev, "CQE contents %08x %08x %08x %08x %08x %08x %08x %08x\n",
+ be32_to_cpu(cqe[0]), be32_to_cpu(cqe[1]), be32_to_cpu(cqe[2]),
+ be32_to_cpu(cqe[3]), be32_to_cpu(cqe[4]), be32_to_cpu(cqe[5]),
+ be32_to_cpu(cqe[6]), be32_to_cpu(cqe[7]));
+}
+
+/*
+ * incr is ignored in native Arbel (mem-free) mode, so cq->cons_index
+ * should be correct before calling update_cons_index().
+ */
+static inline void update_cons_index(struct mthca_dev *dev, struct mthca_cq *cq,
+ int incr)
+{
+ if (mthca_is_memfree(dev)) {
+ *cq->set_ci_db = cpu_to_be32(cq->cons_index);
+ wmb();
+ } else {
+ mthca_write64(MTHCA_TAVOR_CQ_DB_INC_CI | cq->cqn, incr - 1,
+ dev->kar + MTHCA_CQ_DOORBELL,
+ MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
+ /*
+ * Make sure doorbells don't leak out of CQ spinlock
+ * and reach the HCA out of order:
+ */
+ mmiowb();
+ }
+}
+
+void mthca_cq_completion(struct mthca_dev *dev, u32 cqn)
+{
+ struct mthca_cq *cq;
+
+ cq = mthca_array_get(&dev->cq_table.cq, cqn & (dev->limits.num_cqs - 1));
+
+ if (!cq) {
+ mthca_warn(dev, "Completion event for bogus CQ %08x\n", cqn);
+ return;
+ }
+
+ ++cq->arm_sn;
+
+ cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
+}
+
+void mthca_cq_event(struct mthca_dev *dev, u32 cqn,
+ enum ib_event_type event_type)
+{
+ struct mthca_cq *cq;
+ struct ib_event event;
+
+ spin_lock(&dev->cq_table.lock);
+
+ cq = mthca_array_get(&dev->cq_table.cq, cqn & (dev->limits.num_cqs - 1));
+ if (cq)
+ ++cq->refcount;
+
+ spin_unlock(&dev->cq_table.lock);
+
+ if (!cq) {
+ mthca_warn(dev, "Async event for bogus CQ %08x\n", cqn);
+ return;
+ }
+
+ event.device = &dev->ib_dev;
+ event.event = event_type;
+ event.element.cq = &cq->ibcq;
+ if (cq->ibcq.event_handler)
+ cq->ibcq.event_handler(&event, cq->ibcq.cq_context);
+
+ spin_lock(&dev->cq_table.lock);
+ if (!--cq->refcount)
+ wake_up(&cq->wait);
+ spin_unlock(&dev->cq_table.lock);
+}
+
+static inline int is_recv_cqe(struct mthca_cqe *cqe)
+{
+ if ((cqe->opcode & MTHCA_ERROR_CQE_OPCODE_MASK) ==
+ MTHCA_ERROR_CQE_OPCODE_MASK)
+ return !(cqe->opcode & 0x01);
+ else
+ return !(cqe->is_send & 0x80);
+}
+
+void mthca_cq_clean(struct mthca_dev *dev, struct mthca_cq *cq, u32 qpn,
+ struct mthca_srq *srq)
+{
+ struct mthca_cqe *cqe;
+ u32 prod_index;
+ int i, nfreed = 0;
+
+ spin_lock_irq(&cq->lock);
+
+ /*
+ * First we need to find the current producer index, so we
+ * know where to start cleaning from. It doesn't matter if HW
+ * adds new entries after this loop -- the QP we're worried
+ * about is already in RESET, so the new entries won't come
+ * from our QP and therefore don't need to be checked.
+ */
+ for (prod_index = cq->cons_index;
+ cqe_sw(get_cqe(cq, prod_index & cq->ibcq.cqe));
+ ++prod_index)
+ if (prod_index == cq->cons_index + cq->ibcq.cqe)
+ break;
+
+ if (0)
+ mthca_dbg(dev, "Cleaning QPN %06x from CQN %06x; ci %d, pi %d\n",
+ qpn, cq->cqn, cq->cons_index, prod_index);
+
+ /*
+ * Now sweep backwards through the CQ, removing CQ entries
+ * that match our QP by copying older entries on top of them.
+ */
+ while ((int) --prod_index - (int) cq->cons_index >= 0) {
+ cqe = get_cqe(cq, prod_index & cq->ibcq.cqe);
+ if (cqe->my_qpn == cpu_to_be32(qpn)) {
+ if (srq && is_recv_cqe(cqe))
+ mthca_free_srq_wqe(srq, be32_to_cpu(cqe->wqe));
+ ++nfreed;
+ } else if (nfreed)
+ memcpy(get_cqe(cq, (prod_index + nfreed) & cq->ibcq.cqe),
+ cqe, MTHCA_CQ_ENTRY_SIZE);
+ }
+
+ if (nfreed) {
+ for (i = 0; i < nfreed; ++i)
+ set_cqe_hw(get_cqe(cq, (cq->cons_index + i) & cq->ibcq.cqe));
+ wmb();
+ cq->cons_index += nfreed;
+ update_cons_index(dev, cq, nfreed);
+ }
+
+ spin_unlock_irq(&cq->lock);
+}
+
+void mthca_cq_resize_copy_cqes(struct mthca_cq *cq)
+{
+ int i;
+
+ /*
+ * In Tavor mode, the hardware keeps the consumer and producer
+ * indices mod the CQ size. Since we might be making the CQ
+ * bigger, we need to deal with the case where the producer
+ * index wrapped around before the CQ was resized.
+ */
+ if (!mthca_is_memfree(to_mdev(cq->ibcq.device)) &&
+ cq->ibcq.cqe < cq->resize_buf->cqe) {
+ cq->cons_index &= cq->ibcq.cqe;
+ if (cqe_sw(get_cqe(cq, cq->ibcq.cqe)))
+ cq->cons_index -= cq->ibcq.cqe + 1;
+ }
+
+ for (i = cq->cons_index; cqe_sw(get_cqe(cq, i & cq->ibcq.cqe)); ++i)
+ memcpy(get_cqe_from_buf(&cq->resize_buf->buf,
+ i & cq->resize_buf->cqe),
+ get_cqe(cq, i & cq->ibcq.cqe), MTHCA_CQ_ENTRY_SIZE);
+}
+
+int mthca_alloc_cq_buf(struct mthca_dev *dev, struct mthca_cq_buf *buf, int nent)
+{
+ int ret;
+ int i;
+
+ ret = mthca_buf_alloc(dev, nent * MTHCA_CQ_ENTRY_SIZE,
+ MTHCA_MAX_DIRECT_CQ_SIZE,
+ &buf->queue, &buf->is_direct,
+ &dev->driver_pd, 1, &buf->mr);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < nent; ++i)
+ set_cqe_hw(get_cqe_from_buf(buf, i));
+
+ return 0;
+}
+
+void mthca_free_cq_buf(struct mthca_dev *dev, struct mthca_cq_buf *buf, int cqe)
+{
+ mthca_buf_free(dev, (cqe + 1) * MTHCA_CQ_ENTRY_SIZE, &buf->queue,
+ buf->is_direct, &buf->mr);
+}
+
+static void handle_error_cqe(struct mthca_dev *dev, struct mthca_cq *cq,
+ struct mthca_qp *qp, int wqe_index, int is_send,
+ struct mthca_err_cqe *cqe,
+ struct ib_wc *entry, int *free_cqe)
+{
+ int dbd;
+ __be32 new_wqe;
+
+ if (cqe->syndrome == SYNDROME_LOCAL_QP_OP_ERR) {
+ mthca_dbg(dev, "local QP operation err "
+ "(QPN %06x, WQE @ %08x, CQN %06x, index %d)\n",
+ be32_to_cpu(cqe->my_qpn), be32_to_cpu(cqe->wqe),
+ cq->cqn, cq->cons_index);
+ dump_cqe(dev, cqe);
+ }
+
+ /*
+ * For completions in error, only work request ID, status, vendor error
+ * (and freed resource count for RD) have to be set.
+ */
+ switch (cqe->syndrome) {
+ case SYNDROME_LOCAL_LENGTH_ERR:
+ entry->status = IB_WC_LOC_LEN_ERR;
+ break;
+ case SYNDROME_LOCAL_QP_OP_ERR:
+ entry->status = IB_WC_LOC_QP_OP_ERR;
+ break;
+ case SYNDROME_LOCAL_EEC_OP_ERR:
+ entry->status = IB_WC_LOC_EEC_OP_ERR;
+ break;
+ case SYNDROME_LOCAL_PROT_ERR:
+ entry->status = IB_WC_LOC_PROT_ERR;
+ break;
+ case SYNDROME_WR_FLUSH_ERR:
+ entry->status = IB_WC_WR_FLUSH_ERR;
+ break;
+ case SYNDROME_MW_BIND_ERR:
+ entry->status = IB_WC_MW_BIND_ERR;
+ break;
+ case SYNDROME_BAD_RESP_ERR:
+ entry->status = IB_WC_BAD_RESP_ERR;
+ break;
+ case SYNDROME_LOCAL_ACCESS_ERR:
+ entry->status = IB_WC_LOC_ACCESS_ERR;
+ break;
+ case SYNDROME_REMOTE_INVAL_REQ_ERR:
+ entry->status = IB_WC_REM_INV_REQ_ERR;
+ break;
+ case SYNDROME_REMOTE_ACCESS_ERR:
+ entry->status = IB_WC_REM_ACCESS_ERR;
+ break;
+ case SYNDROME_REMOTE_OP_ERR:
+ entry->status = IB_WC_REM_OP_ERR;
+ break;
+ case SYNDROME_RETRY_EXC_ERR:
+ entry->status = IB_WC_RETRY_EXC_ERR;
+ break;
+ case SYNDROME_RNR_RETRY_EXC_ERR:
+ entry->status = IB_WC_RNR_RETRY_EXC_ERR;
+ break;
+ case SYNDROME_LOCAL_RDD_VIOL_ERR:
+ entry->status = IB_WC_LOC_RDD_VIOL_ERR;
+ break;
+ case SYNDROME_REMOTE_INVAL_RD_REQ_ERR:
+ entry->status = IB_WC_REM_INV_RD_REQ_ERR;
+ break;
+ case SYNDROME_REMOTE_ABORTED_ERR:
+ entry->status = IB_WC_REM_ABORT_ERR;
+ break;
+ case SYNDROME_INVAL_EECN_ERR:
+ entry->status = IB_WC_INV_EECN_ERR;
+ break;
+ case SYNDROME_INVAL_EEC_STATE_ERR:
+ entry->status = IB_WC_INV_EEC_STATE_ERR;
+ break;
+ default:
+ entry->status = IB_WC_GENERAL_ERR;
+ break;
+ }
+
+ entry->vendor_err = cqe->vendor_err;
+
+ /*
+ * Mem-free HCAs always generate one CQE per WQE, even in the
+ * error case, so we don't have to check the doorbell count, etc.
+ */
+ if (mthca_is_memfree(dev))
+ return;
+
+ mthca_free_err_wqe(dev, qp, is_send, wqe_index, &dbd, &new_wqe);
+
+ /*
+ * If we're at the end of the WQE chain, or we've used up our
+ * doorbell count, free the CQE. Otherwise just update it for
+ * the next poll operation.
+ */
+ if (!(new_wqe & cpu_to_be32(0x3f)) || (!cqe->db_cnt && dbd))
+ return;
+
+ be16_add_cpu(&cqe->db_cnt, -dbd);
+ cqe->wqe = new_wqe;
+ cqe->syndrome = SYNDROME_WR_FLUSH_ERR;
+
+ *free_cqe = 0;
+}
+
+static inline int mthca_poll_one(struct mthca_dev *dev,
+ struct mthca_cq *cq,
+ struct mthca_qp **cur_qp,
+ int *freed,
+ struct ib_wc *entry)
+{
+ struct mthca_wq *wq;
+ struct mthca_cqe *cqe;
+ int wqe_index;
+ int is_error;
+ int is_send;
+ int free_cqe = 1;
+ int err = 0;
+ u16 checksum;
+
+ cqe = next_cqe_sw(cq);
+ if (!cqe)
+ return -EAGAIN;
+
+ /*
+ * Make sure we read CQ entry contents after we've checked the
+ * ownership bit.
+ */
+ rmb();
+
+ if (0) {
+ mthca_dbg(dev, "%x/%d: CQE -> QPN %06x, WQE @ %08x\n",
+ cq->cqn, cq->cons_index, be32_to_cpu(cqe->my_qpn),
+ be32_to_cpu(cqe->wqe));
+ dump_cqe(dev, cqe);
+ }
+
+ is_error = (cqe->opcode & MTHCA_ERROR_CQE_OPCODE_MASK) ==
+ MTHCA_ERROR_CQE_OPCODE_MASK;
+ is_send = is_error ? cqe->opcode & 0x01 : cqe->is_send & 0x80;
+
+ if (!*cur_qp || be32_to_cpu(cqe->my_qpn) != (*cur_qp)->qpn) {
+ /*
+ * We do not have to take the QP table lock here,
+ * because CQs will be locked while QPs are removed
+ * from the table.
+ */
+ *cur_qp = mthca_array_get(&dev->qp_table.qp,
+ be32_to_cpu(cqe->my_qpn) &
+ (dev->limits.num_qps - 1));
+ if (!*cur_qp) {
+ mthca_warn(dev, "CQ entry for unknown QP %06x\n",
+ be32_to_cpu(cqe->my_qpn) & 0xffffff);
+ err = -EINVAL;
+ goto out;
+ }
+ }
+
+ entry->qp = &(*cur_qp)->ibqp;
+
+ if (is_send) {
+ wq = &(*cur_qp)->sq;
+ wqe_index = ((be32_to_cpu(cqe->wqe) - (*cur_qp)->send_wqe_offset)
+ >> wq->wqe_shift);
+ entry->wr_id = (*cur_qp)->wrid[wqe_index +
+ (*cur_qp)->rq.max];
+ } else if ((*cur_qp)->ibqp.srq) {
+ struct mthca_srq *srq = to_msrq((*cur_qp)->ibqp.srq);
+ u32 wqe = be32_to_cpu(cqe->wqe);
+ wq = NULL;
+ wqe_index = wqe >> srq->wqe_shift;
+ entry->wr_id = srq->wrid[wqe_index];
+ mthca_free_srq_wqe(srq, wqe);
+ } else {
+ s32 wqe;
+ wq = &(*cur_qp)->rq;
+ wqe = be32_to_cpu(cqe->wqe);
+ wqe_index = wqe >> wq->wqe_shift;
+ /*
+ * WQE addr == base - 1 might be reported in receive completion
+ * with error instead of (rq size - 1) by Sinai FW 1.0.800 and
+ * Arbel FW 5.1.400. This bug should be fixed in later FW revs.
+ */
+ if (unlikely(wqe_index < 0))
+ wqe_index = wq->max - 1;
+ entry->wr_id = (*cur_qp)->wrid[wqe_index];
+ }
+
+ if (wq) {
+ if (wq->last_comp < wqe_index)
+ wq->tail += wqe_index - wq->last_comp;
+ else
+ wq->tail += wqe_index + wq->max - wq->last_comp;
+
+ wq->last_comp = wqe_index;
+ }
+
+ if (is_error) {
+ handle_error_cqe(dev, cq, *cur_qp, wqe_index, is_send,
+ (struct mthca_err_cqe *) cqe,
+ entry, &free_cqe);
+ goto out;
+ }
+
+ if (is_send) {
+ entry->wc_flags = 0;
+ switch (cqe->opcode) {
+ case MTHCA_OPCODE_RDMA_WRITE:
+ entry->opcode = IB_WC_RDMA_WRITE;
+ break;
+ case MTHCA_OPCODE_RDMA_WRITE_IMM:
+ entry->opcode = IB_WC_RDMA_WRITE;
+ entry->wc_flags |= IB_WC_WITH_IMM;
+ break;
+ case MTHCA_OPCODE_SEND:
+ entry->opcode = IB_WC_SEND;
+ break;
+ case MTHCA_OPCODE_SEND_IMM:
+ entry->opcode = IB_WC_SEND;
+ entry->wc_flags |= IB_WC_WITH_IMM;
+ break;
+ case MTHCA_OPCODE_RDMA_READ:
+ entry->opcode = IB_WC_RDMA_READ;
+ entry->byte_len = be32_to_cpu(cqe->byte_cnt);
+ break;
+ case MTHCA_OPCODE_ATOMIC_CS:
+ entry->opcode = IB_WC_COMP_SWAP;
+ entry->byte_len = MTHCA_ATOMIC_BYTE_LEN;
+ break;
+ case MTHCA_OPCODE_ATOMIC_FA:
+ entry->opcode = IB_WC_FETCH_ADD;
+ entry->byte_len = MTHCA_ATOMIC_BYTE_LEN;
+ break;
+ case MTHCA_OPCODE_BIND_MW:
+ entry->opcode = IB_WC_BIND_MW;
+ break;
+ default:
+ entry->opcode = MTHCA_OPCODE_INVALID;
+ break;
+ }
+ } else {
+ entry->byte_len = be32_to_cpu(cqe->byte_cnt);
+ switch (cqe->opcode & 0x1f) {
+ case IB_OPCODE_SEND_LAST_WITH_IMMEDIATE:
+ case IB_OPCODE_SEND_ONLY_WITH_IMMEDIATE:
+ entry->wc_flags = IB_WC_WITH_IMM;
+ entry->ex.imm_data = cqe->imm_etype_pkey_eec;
+ entry->opcode = IB_WC_RECV;
+ break;
+ case IB_OPCODE_RDMA_WRITE_LAST_WITH_IMMEDIATE:
+ case IB_OPCODE_RDMA_WRITE_ONLY_WITH_IMMEDIATE:
+ entry->wc_flags = IB_WC_WITH_IMM;
+ entry->ex.imm_data = cqe->imm_etype_pkey_eec;
+ entry->opcode = IB_WC_RECV_RDMA_WITH_IMM;
+ break;
+ default:
+ entry->wc_flags = 0;
+ entry->opcode = IB_WC_RECV;
+ break;
+ }
+ entry->slid = be16_to_cpu(cqe->rlid);
+ entry->sl = cqe->sl_ipok >> 4;
+ entry->src_qp = be32_to_cpu(cqe->rqpn) & 0xffffff;
+ entry->dlid_path_bits = cqe->g_mlpath & 0x7f;
+ entry->pkey_index = be32_to_cpu(cqe->imm_etype_pkey_eec) >> 16;
+ entry->wc_flags |= cqe->g_mlpath & 0x80 ? IB_WC_GRH : 0;
+ checksum = (be32_to_cpu(cqe->rqpn) >> 24) |
+ ((be32_to_cpu(cqe->my_ee) >> 16) & 0xff00);
+ entry->csum_ok = (cqe->sl_ipok & 1 && checksum == 0xffff);
+ }
+
+ entry->status = IB_WC_SUCCESS;
+
+ out:
+ if (likely(free_cqe)) {
+ set_cqe_hw(cqe);
+ ++(*freed);
+ ++cq->cons_index;
+ }
+
+ return err;
+}
+
+int mthca_poll_cq(struct ib_cq *ibcq, int num_entries,
+ struct ib_wc *entry)
+{
+ struct mthca_dev *dev = to_mdev(ibcq->device);
+ struct mthca_cq *cq = to_mcq(ibcq);
+ struct mthca_qp *qp = NULL;
+ unsigned long flags;
+ int err = 0;
+ int freed = 0;
+ int npolled;
+
+ spin_lock_irqsave(&cq->lock, flags);
+
+ npolled = 0;
+repoll:
+ while (npolled < num_entries) {
+ err = mthca_poll_one(dev, cq, &qp,
+ &freed, entry + npolled);
+ if (err)
+ break;
+ ++npolled;
+ }
+
+ if (freed) {
+ wmb();
+ update_cons_index(dev, cq, freed);
+ }
+
+ /*
+ * If a CQ resize is in progress and we discovered that the
+ * old buffer is empty, then peek in the new buffer, and if
+ * it's not empty, switch to the new buffer and continue
+ * polling there.
+ */
+ if (unlikely(err == -EAGAIN && cq->resize_buf &&
+ cq->resize_buf->state == CQ_RESIZE_READY)) {
+ /*
+ * In Tavor mode, the hardware keeps the producer
+ * index modulo the CQ size. Since we might be making
+ * the CQ bigger, we need to mask our consumer index
+ * using the size of the old CQ buffer before looking
+ * in the new CQ buffer.
+ */
+ if (!mthca_is_memfree(dev))
+ cq->cons_index &= cq->ibcq.cqe;
+
+ if (cqe_sw(get_cqe_from_buf(&cq->resize_buf->buf,
+ cq->cons_index & cq->resize_buf->cqe))) {
+ struct mthca_cq_buf tbuf;
+ int tcqe;
+
+ tbuf = cq->buf;
+ tcqe = cq->ibcq.cqe;
+ cq->buf = cq->resize_buf->buf;
+ cq->ibcq.cqe = cq->resize_buf->cqe;
+
+ cq->resize_buf->buf = tbuf;
+ cq->resize_buf->cqe = tcqe;
+ cq->resize_buf->state = CQ_RESIZE_SWAPPED;
+
+ goto repoll;
+ }
+ }
+
+ spin_unlock_irqrestore(&cq->lock, flags);
+
+ return err == 0 || err == -EAGAIN ? npolled : err;
+}
+
+int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags)
+{
+ u32 dbhi = ((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ?
+ MTHCA_TAVOR_CQ_DB_REQ_NOT_SOL :
+ MTHCA_TAVOR_CQ_DB_REQ_NOT) |
+ to_mcq(cq)->cqn;
+
+ mthca_write64(dbhi, 0xffffffff, to_mdev(cq->device)->kar + MTHCA_CQ_DOORBELL,
+ MTHCA_GET_DOORBELL_LOCK(&to_mdev(cq->device)->doorbell_lock));
+
+ return 0;
+}
+
+int mthca_arbel_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
+{
+ struct mthca_cq *cq = to_mcq(ibcq);
+ __be32 db_rec[2];
+ u32 dbhi;
+ u32 sn = cq->arm_sn & 3;
+
+ db_rec[0] = cpu_to_be32(cq->cons_index);
+ db_rec[1] = cpu_to_be32((cq->cqn << 8) | (2 << 5) | (sn << 3) |
+ ((flags & IB_CQ_SOLICITED_MASK) ==
+ IB_CQ_SOLICITED ? 1 : 2));
+
+ mthca_write_db_rec(db_rec, cq->arm_db);
+
+ /*
+ * Make sure that the doorbell record in host memory is
+ * written before ringing the doorbell via PCI MMIO.
+ */
+ wmb();
+
+ dbhi = (sn << 28) |
+ ((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ?
+ MTHCA_ARBEL_CQ_DB_REQ_NOT_SOL :
+ MTHCA_ARBEL_CQ_DB_REQ_NOT) | cq->cqn;
+
+ mthca_write64(dbhi, cq->cons_index,
+ to_mdev(ibcq->device)->kar + MTHCA_CQ_DOORBELL,
+ MTHCA_GET_DOORBELL_LOCK(&to_mdev(ibcq->device)->doorbell_lock));
+
+ return 0;
+}
+
+int mthca_init_cq(struct mthca_dev *dev, int nent,
+ struct mthca_ucontext *ctx, u32 pdn,
+ struct mthca_cq *cq)
+{
+ struct mthca_mailbox *mailbox;
+ struct mthca_cq_context *cq_context;
+ int err = -ENOMEM;
+ u8 status;
+
+ cq->ibcq.cqe = nent - 1;
+ cq->is_kernel = !ctx;
+
+ cq->cqn = mthca_alloc(&dev->cq_table.alloc);
+ if (cq->cqn == -1)
+ return -ENOMEM;
+
+ if (mthca_is_memfree(dev)) {
+ err = mthca_table_get(dev, dev->cq_table.table, cq->cqn);
+ if (err)
+ goto err_out;
+
+ if (cq->is_kernel) {
+ cq->arm_sn = 1;
+
+ err = -ENOMEM;
+
+ cq->set_ci_db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_CQ_SET_CI,
+ cq->cqn, &cq->set_ci_db);
+ if (cq->set_ci_db_index < 0)
+ goto err_out_icm;
+
+ cq->arm_db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_CQ_ARM,
+ cq->cqn, &cq->arm_db);
+ if (cq->arm_db_index < 0)
+ goto err_out_ci;
+ }
+ }
+
+ mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(mailbox))
+ goto err_out_arm;
+
+ cq_context = mailbox->buf;
+
+ if (cq->is_kernel) {
+ err = mthca_alloc_cq_buf(dev, &cq->buf, nent);
+ if (err)
+ goto err_out_mailbox;
+ }
+
+ spin_lock_init(&cq->lock);
+ cq->refcount = 1;
+ init_waitqueue_head(&cq->wait);
+ mutex_init(&cq->mutex);
+
+ memset(cq_context, 0, sizeof *cq_context);
+ cq_context->flags = cpu_to_be32(MTHCA_CQ_STATUS_OK |
+ MTHCA_CQ_STATE_DISARMED |
+ MTHCA_CQ_FLAG_TR);
+ cq_context->logsize_usrpage = cpu_to_be32((ffs(nent) - 1) << 24);
+ if (ctx)
+ cq_context->logsize_usrpage |= cpu_to_be32(ctx->uar.index);
+ else
+ cq_context->logsize_usrpage |= cpu_to_be32(dev->driver_uar.index);
+ cq_context->error_eqn = cpu_to_be32(dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn);
+ cq_context->comp_eqn = cpu_to_be32(dev->eq_table.eq[MTHCA_EQ_COMP].eqn);
+ cq_context->pd = cpu_to_be32(pdn);
+ cq_context->lkey = cpu_to_be32(cq->buf.mr.ibmr.lkey);
+ cq_context->cqn = cpu_to_be32(cq->cqn);
+
+ if (mthca_is_memfree(dev)) {
+ cq_context->ci_db = cpu_to_be32(cq->set_ci_db_index);
+ cq_context->state_db = cpu_to_be32(cq->arm_db_index);
+ }
+
+ err = mthca_SW2HW_CQ(dev, mailbox, cq->cqn, &status);
+ if (err) {
+ mthca_warn(dev, "SW2HW_CQ failed (%d)\n", err);
+ goto err_out_free_mr;
+ }
+
+ if (status) {
+ mthca_warn(dev, "SW2HW_CQ returned status 0x%02x\n",
+ status);
+ err = -EINVAL;
+ goto err_out_free_mr;
+ }
+
+ spin_lock_irq(&dev->cq_table.lock);
+ if (mthca_array_set(&dev->cq_table.cq,
+ cq->cqn & (dev->limits.num_cqs - 1),
+ cq)) {
+ spin_unlock_irq(&dev->cq_table.lock);
+ goto err_out_free_mr;
+ }
+ spin_unlock_irq(&dev->cq_table.lock);
+
+ cq->cons_index = 0;
+
+ mthca_free_mailbox(dev, mailbox);
+
+ return 0;
+
+err_out_free_mr:
+ if (cq->is_kernel)
+ mthca_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe);
+
+err_out_mailbox:
+ mthca_free_mailbox(dev, mailbox);
+
+err_out_arm:
+ if (cq->is_kernel && mthca_is_memfree(dev))
+ mthca_free_db(dev, MTHCA_DB_TYPE_CQ_ARM, cq->arm_db_index);
+
+err_out_ci:
+ if (cq->is_kernel && mthca_is_memfree(dev))
+ mthca_free_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, cq->set_ci_db_index);
+
+err_out_icm:
+ mthca_table_put(dev, dev->cq_table.table, cq->cqn);
+
+err_out:
+ mthca_free(&dev->cq_table.alloc, cq->cqn);
+
+ return err;
+}
+
+static inline int get_cq_refcount(struct mthca_dev *dev, struct mthca_cq *cq)
+{
+ int c;
+
+ spin_lock_irq(&dev->cq_table.lock);
+ c = cq->refcount;
+ spin_unlock_irq(&dev->cq_table.lock);
+
+ return c;
+}
+
+void mthca_free_cq(struct mthca_dev *dev,
+ struct mthca_cq *cq)
+{
+ struct mthca_mailbox *mailbox;
+ int err;
+ u8 status;
+
+ mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(mailbox)) {
+ mthca_warn(dev, "No memory for mailbox to free CQ.\n");
+ return;
+ }
+
+ err = mthca_HW2SW_CQ(dev, mailbox, cq->cqn, &status);
+ if (err)
+ mthca_warn(dev, "HW2SW_CQ failed (%d)\n", err);
+ else if (status)
+ mthca_warn(dev, "HW2SW_CQ returned status 0x%02x\n", status);
+
+ if (0) {
+ __be32 *ctx = mailbox->buf;
+ int j;
+
+ printk(KERN_ERR "context for CQN %x (cons index %x, next sw %d)\n",
+ cq->cqn, cq->cons_index,
+ cq->is_kernel ? !!next_cqe_sw(cq) : 0);
+ for (j = 0; j < 16; ++j)
+ printk(KERN_ERR "[%2x] %08x\n", j * 4, be32_to_cpu(ctx[j]));
+ }
+
+ spin_lock_irq(&dev->cq_table.lock);
+ mthca_array_clear(&dev->cq_table.cq,
+ cq->cqn & (dev->limits.num_cqs - 1));
+ --cq->refcount;
+ spin_unlock_irq(&dev->cq_table.lock);
+
+ if (dev->mthca_flags & MTHCA_FLAG_MSI_X)
+ synchronize_irq(dev->eq_table.eq[MTHCA_EQ_COMP].msi_x_vector);
+ else
+ synchronize_irq(dev->pdev->irq);
+
+ wait_event(cq->wait, !get_cq_refcount(dev, cq));
+
+ if (cq->is_kernel) {
+ mthca_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe);
+ if (mthca_is_memfree(dev)) {
+ mthca_free_db(dev, MTHCA_DB_TYPE_CQ_ARM, cq->arm_db_index);
+ mthca_free_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, cq->set_ci_db_index);
+ }
+ }
+
+ mthca_table_put(dev, dev->cq_table.table, cq->cqn);
+ mthca_free(&dev->cq_table.alloc, cq->cqn);
+ mthca_free_mailbox(dev, mailbox);
+}
+
+int mthca_init_cq_table(struct mthca_dev *dev)
+{
+ int err;
+
+ spin_lock_init(&dev->cq_table.lock);
+
+ err = mthca_alloc_init(&dev->cq_table.alloc,
+ dev->limits.num_cqs,
+ (1 << 24) - 1,
+ dev->limits.reserved_cqs);
+ if (err)
+ return err;
+
+ err = mthca_array_init(&dev->cq_table.cq,
+ dev->limits.num_cqs);
+ if (err)
+ mthca_alloc_cleanup(&dev->cq_table.alloc);
+
+ return err;
+}
+
+void mthca_cleanup_cq_table(struct mthca_dev *dev)
+{
+ mthca_array_cleanup(&dev->cq_table.cq, dev->limits.num_cqs);
+ mthca_alloc_cleanup(&dev->cq_table.alloc);
+}
diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h
new file mode 100644
index 0000000..2525901
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_dev.h
@@ -0,0 +1,594 @@
+/*
+ * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef MTHCA_DEV_H
+#define MTHCA_DEV_H
+
+#include <linux/spinlock.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/dma-mapping.h>
+#include <linux/timer.h>
+#include <linux/mutex.h>
+#include <linux/list.h>
+#include <linux/semaphore.h>
+
+#include "mthca_provider.h"
+#include "mthca_doorbell.h"
+
+#define DRV_NAME "ib_mthca"
+#define PFX DRV_NAME ": "
+#define DRV_VERSION "1.0"
+#define DRV_RELDATE "April 4, 2008"
+
+enum {
+ MTHCA_FLAG_DDR_HIDDEN = 1 << 1,
+ MTHCA_FLAG_SRQ = 1 << 2,
+ MTHCA_FLAG_MSI_X = 1 << 3,
+ MTHCA_FLAG_NO_LAM = 1 << 4,
+ MTHCA_FLAG_FMR = 1 << 5,
+ MTHCA_FLAG_MEMFREE = 1 << 6,
+ MTHCA_FLAG_PCIE = 1 << 7,
+ MTHCA_FLAG_SINAI_OPT = 1 << 8
+};
+
+enum {
+ MTHCA_MAX_PORTS = 2
+};
+
+enum {
+ MTHCA_BOARD_ID_LEN = 64
+};
+
+enum {
+ MTHCA_EQ_CONTEXT_SIZE = 0x40,
+ MTHCA_CQ_CONTEXT_SIZE = 0x40,
+ MTHCA_QP_CONTEXT_SIZE = 0x200,
+ MTHCA_RDB_ENTRY_SIZE = 0x20,
+ MTHCA_AV_SIZE = 0x20,
+ MTHCA_MGM_ENTRY_SIZE = 0x100,
+
+ /* Arbel FW gives us these, but we need them for Tavor */
+ MTHCA_MPT_ENTRY_SIZE = 0x40,
+ MTHCA_MTT_SEG_SIZE = 0x40,
+
+ MTHCA_QP_PER_MGM = 4 * (MTHCA_MGM_ENTRY_SIZE / 16 - 2)
+};
+
+enum {
+ MTHCA_EQ_CMD,
+ MTHCA_EQ_ASYNC,
+ MTHCA_EQ_COMP,
+ MTHCA_NUM_EQ
+};
+
+enum {
+ MTHCA_OPCODE_NOP = 0x00,
+ MTHCA_OPCODE_RDMA_WRITE = 0x08,
+ MTHCA_OPCODE_RDMA_WRITE_IMM = 0x09,
+ MTHCA_OPCODE_SEND = 0x0a,
+ MTHCA_OPCODE_SEND_IMM = 0x0b,
+ MTHCA_OPCODE_RDMA_READ = 0x10,
+ MTHCA_OPCODE_ATOMIC_CS = 0x11,
+ MTHCA_OPCODE_ATOMIC_FA = 0x12,
+ MTHCA_OPCODE_BIND_MW = 0x18,
+ MTHCA_OPCODE_INVALID = 0xff
+};
+
+enum {
+ MTHCA_CMD_USE_EVENTS = 1 << 0,
+ MTHCA_CMD_POST_DOORBELLS = 1 << 1
+};
+
+enum {
+ MTHCA_CMD_NUM_DBELL_DWORDS = 8
+};
+
+struct mthca_cmd {
+ struct pci_pool *pool;
+ struct mutex hcr_mutex;
+ struct semaphore poll_sem;
+ struct semaphore event_sem;
+ int max_cmds;
+ spinlock_t context_lock;
+ int free_head;
+ struct mthca_cmd_context *context;
+ u16 token_mask;
+ u32 flags;
+ void __iomem *dbell_map;
+ u16 dbell_offsets[MTHCA_CMD_NUM_DBELL_DWORDS];
+};
+
+struct mthca_limits {
+ int num_ports;
+ int vl_cap;
+ int mtu_cap;
+ int gid_table_len;
+ int pkey_table_len;
+ int local_ca_ack_delay;
+ int num_uars;
+ int max_sg;
+ int num_qps;
+ int max_wqes;
+ int max_desc_sz;
+ int max_qp_init_rdma;
+ int reserved_qps;
+ int num_srqs;
+ int max_srq_wqes;
+ int max_srq_sge;
+ int reserved_srqs;
+ int num_eecs;
+ int reserved_eecs;
+ int num_cqs;
+ int max_cqes;
+ int reserved_cqs;
+ int num_eqs;
+ int reserved_eqs;
+ int num_mpts;
+ int num_mtt_segs;
+ int fmr_reserved_mtts;
+ int reserved_mtts;
+ int reserved_mrws;
+ int reserved_uars;
+ int num_mgms;
+ int num_amgms;
+ int reserved_mcgs;
+ int num_pds;
+ int reserved_pds;
+ u32 page_size_cap;
+ u32 flags;
+ u16 stat_rate_support;
+ u8 port_width_cap;
+};
+
+struct mthca_alloc {
+ u32 last;
+ u32 top;
+ u32 max;
+ u32 mask;
+ spinlock_t lock;
+ unsigned long *table;
+};
+
+struct mthca_array {
+ struct {
+ void **page;
+ int used;
+ } *page_list;
+};
+
+struct mthca_uar_table {
+ struct mthca_alloc alloc;
+ u64 uarc_base;
+ int uarc_size;
+};
+
+struct mthca_pd_table {
+ struct mthca_alloc alloc;
+};
+
+struct mthca_buddy {
+ unsigned long **bits;
+ int *num_free;
+ int max_order;
+ spinlock_t lock;
+};
+
+struct mthca_mr_table {
+ struct mthca_alloc mpt_alloc;
+ struct mthca_buddy mtt_buddy;
+ struct mthca_buddy *fmr_mtt_buddy;
+ u64 mtt_base;
+ u64 mpt_base;
+ struct mthca_icm_table *mtt_table;
+ struct mthca_icm_table *mpt_table;
+ struct {
+ void __iomem *mpt_base;
+ void __iomem *mtt_base;
+ struct mthca_buddy mtt_buddy;
+ } tavor_fmr;
+};
+
+struct mthca_eq_table {
+ struct mthca_alloc alloc;
+ void __iomem *clr_int;
+ u32 clr_mask;
+ u32 arm_mask;
+ struct mthca_eq eq[MTHCA_NUM_EQ];
+ u64 icm_virt;
+ struct page *icm_page;
+ dma_addr_t icm_dma;
+ int have_irq;
+ u8 inta_pin;
+};
+
+struct mthca_cq_table {
+ struct mthca_alloc alloc;
+ spinlock_t lock;
+ struct mthca_array cq;
+ struct mthca_icm_table *table;
+};
+
+struct mthca_srq_table {
+ struct mthca_alloc alloc;
+ spinlock_t lock;
+ struct mthca_array srq;
+ struct mthca_icm_table *table;
+};
+
+struct mthca_qp_table {
+ struct mthca_alloc alloc;
+ u32 rdb_base;
+ int rdb_shift;
+ int sqp_start;
+ spinlock_t lock;
+ struct mthca_array qp;
+ struct mthca_icm_table *qp_table;
+ struct mthca_icm_table *eqp_table;
+ struct mthca_icm_table *rdb_table;
+};
+
+struct mthca_av_table {
+ struct pci_pool *pool;
+ int num_ddr_avs;
+ u64 ddr_av_base;
+ void __iomem *av_map;
+ struct mthca_alloc alloc;
+};
+
+struct mthca_mcg_table {
+ struct mutex mutex;
+ struct mthca_alloc alloc;
+ struct mthca_icm_table *table;
+};
+
+struct mthca_catas_err {
+ u64 addr;
+ u32 __iomem *map;
+ u32 size;
+ struct timer_list timer;
+ struct list_head list;
+};
+
+extern struct mutex mthca_device_mutex;
+
+struct mthca_dev {
+ struct ib_device ib_dev;
+ struct pci_dev *pdev;
+
+ int hca_type;
+ unsigned long mthca_flags;
+ unsigned long device_cap_flags;
+
+ u32 rev_id;
+ char board_id[MTHCA_BOARD_ID_LEN];
+
+ /* firmware info */
+ u64 fw_ver;
+ union {
+ struct {
+ u64 fw_start;
+ u64 fw_end;
+ } tavor;
+ struct {
+ u64 clr_int_base;
+ u64 eq_arm_base;
+ u64 eq_set_ci_base;
+ struct mthca_icm *fw_icm;
+ struct mthca_icm *aux_icm;
+ u16 fw_pages;
+ } arbel;
+ } fw;
+
+ u64 ddr_start;
+ u64 ddr_end;
+
+ MTHCA_DECLARE_DOORBELL_LOCK(doorbell_lock)
+ struct mutex cap_mask_mutex;
+
+ void __iomem *hcr;
+ void __iomem *kar;
+ void __iomem *clr_base;
+ union {
+ struct {
+ void __iomem *ecr_base;
+ } tavor;
+ struct {
+ void __iomem *eq_arm;
+ void __iomem *eq_set_ci_base;
+ } arbel;
+ } eq_regs;
+
+ struct mthca_cmd cmd;
+ struct mthca_limits limits;
+
+ struct mthca_uar_table uar_table;
+ struct mthca_pd_table pd_table;
+ struct mthca_mr_table mr_table;
+ struct mthca_eq_table eq_table;
+ struct mthca_cq_table cq_table;
+ struct mthca_srq_table srq_table;
+ struct mthca_qp_table qp_table;
+ struct mthca_av_table av_table;
+ struct mthca_mcg_table mcg_table;
+
+ struct mthca_catas_err catas_err;
+
+ struct mthca_uar driver_uar;
+ struct mthca_db_table *db_tab;
+ struct mthca_pd driver_pd;
+ struct mthca_mr driver_mr;
+
+ struct ib_mad_agent *send_agent[MTHCA_MAX_PORTS][2];
+ struct ib_ah *sm_ah[MTHCA_MAX_PORTS];
+ spinlock_t sm_lock;
+ u8 rate[MTHCA_MAX_PORTS];
+};
+
+#ifdef CONFIG_INFINIBAND_MTHCA_DEBUG
+extern int mthca_debug_level;
+
+#define mthca_dbg(mdev, format, arg...) \
+ do { \
+ if (mthca_debug_level) \
+ dev_printk(KERN_DEBUG, &mdev->pdev->dev, format, ## arg); \
+ } while (0)
+
+#else /* CONFIG_INFINIBAND_MTHCA_DEBUG */
+
+#define mthca_dbg(mdev, format, arg...) do { (void) mdev; } while (0)
+
+#endif /* CONFIG_INFINIBAND_MTHCA_DEBUG */
+
+#define mthca_err(mdev, format, arg...) \
+ dev_err(&mdev->pdev->dev, format, ## arg)
+#define mthca_info(mdev, format, arg...) \
+ dev_info(&mdev->pdev->dev, format, ## arg)
+#define mthca_warn(mdev, format, arg...) \
+ dev_warn(&mdev->pdev->dev, format, ## arg)
+
+extern void __buggy_use_of_MTHCA_GET(void);
+extern void __buggy_use_of_MTHCA_PUT(void);
+
+#define MTHCA_GET(dest, source, offset) \
+ do { \
+ void *__p = (char *) (source) + (offset); \
+ switch (sizeof (dest)) { \
+ case 1: (dest) = *(u8 *) __p; break; \
+ case 2: (dest) = be16_to_cpup(__p); break; \
+ case 4: (dest) = be32_to_cpup(__p); break; \
+ case 8: (dest) = be64_to_cpup(__p); break; \
+ default: __buggy_use_of_MTHCA_GET(); \
+ } \
+ } while (0)
+
+#define MTHCA_PUT(dest, source, offset) \
+ do { \
+ void *__d = ((char *) (dest) + (offset)); \
+ switch (sizeof(source)) { \
+ case 1: *(u8 *) __d = (source); break; \
+ case 2: *(__be16 *) __d = cpu_to_be16(source); break; \
+ case 4: *(__be32 *) __d = cpu_to_be32(source); break; \
+ case 8: *(__be64 *) __d = cpu_to_be64(source); break; \
+ default: __buggy_use_of_MTHCA_PUT(); \
+ } \
+ } while (0)
+
+int mthca_reset(struct mthca_dev *mdev);
+
+u32 mthca_alloc(struct mthca_alloc *alloc);
+void mthca_free(struct mthca_alloc *alloc, u32 obj);
+int mthca_alloc_init(struct mthca_alloc *alloc, u32 num, u32 mask,
+ u32 reserved);
+void mthca_alloc_cleanup(struct mthca_alloc *alloc);
+void *mthca_array_get(struct mthca_array *array, int index);
+int mthca_array_set(struct mthca_array *array, int index, void *value);
+void mthca_array_clear(struct mthca_array *array, int index);
+int mthca_array_init(struct mthca_array *array, int nent);
+void mthca_array_cleanup(struct mthca_array *array, int nent);
+int mthca_buf_alloc(struct mthca_dev *dev, int size, int max_direct,
+ union mthca_buf *buf, int *is_direct, struct mthca_pd *pd,
+ int hca_write, struct mthca_mr *mr);
+void mthca_buf_free(struct mthca_dev *dev, int size, union mthca_buf *buf,
+ int is_direct, struct mthca_mr *mr);
+
+int mthca_init_uar_table(struct mthca_dev *dev);
+int mthca_init_pd_table(struct mthca_dev *dev);
+int mthca_init_mr_table(struct mthca_dev *dev);
+int mthca_init_eq_table(struct mthca_dev *dev);
+int mthca_init_cq_table(struct mthca_dev *dev);
+int mthca_init_srq_table(struct mthca_dev *dev);
+int mthca_init_qp_table(struct mthca_dev *dev);
+int mthca_init_av_table(struct mthca_dev *dev);
+int mthca_init_mcg_table(struct mthca_dev *dev);
+
+void mthca_cleanup_uar_table(struct mthca_dev *dev);
+void mthca_cleanup_pd_table(struct mthca_dev *dev);
+void mthca_cleanup_mr_table(struct mthca_dev *dev);
+void mthca_cleanup_eq_table(struct mthca_dev *dev);
+void mthca_cleanup_cq_table(struct mthca_dev *dev);
+void mthca_cleanup_srq_table(struct mthca_dev *dev);
+void mthca_cleanup_qp_table(struct mthca_dev *dev);
+void mthca_cleanup_av_table(struct mthca_dev *dev);
+void mthca_cleanup_mcg_table(struct mthca_dev *dev);
+
+int mthca_register_device(struct mthca_dev *dev);
+void mthca_unregister_device(struct mthca_dev *dev);
+
+void mthca_start_catas_poll(struct mthca_dev *dev);
+void mthca_stop_catas_poll(struct mthca_dev *dev);
+int __mthca_restart_one(struct pci_dev *pdev);
+int mthca_catas_init(void);
+void mthca_catas_cleanup(void);
+
+int mthca_uar_alloc(struct mthca_dev *dev, struct mthca_uar *uar);
+void mthca_uar_free(struct mthca_dev *dev, struct mthca_uar *uar);
+
+int mthca_pd_alloc(struct mthca_dev *dev, int privileged, struct mthca_pd *pd);
+void mthca_pd_free(struct mthca_dev *dev, struct mthca_pd *pd);
+
+int mthca_write_mtt_size(struct mthca_dev *dev);
+
+struct mthca_mtt *mthca_alloc_mtt(struct mthca_dev *dev, int size);
+void mthca_free_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt);
+int mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt,
+ int start_index, u64 *buffer_list, int list_len);
+int mthca_mr_alloc(struct mthca_dev *dev, u32 pd, int buffer_size_shift,
+ u64 iova, u64 total_size, u32 access, struct mthca_mr *mr);
+int mthca_mr_alloc_notrans(struct mthca_dev *dev, u32 pd,
+ u32 access, struct mthca_mr *mr);
+int mthca_mr_alloc_phys(struct mthca_dev *dev, u32 pd,
+ u64 *buffer_list, int buffer_size_shift,
+ int list_len, u64 iova, u64 total_size,
+ u32 access, struct mthca_mr *mr);
+void mthca_free_mr(struct mthca_dev *dev, struct mthca_mr *mr);
+
+int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd,
+ u32 access, struct mthca_fmr *fmr);
+int mthca_tavor_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
+ int list_len, u64 iova);
+void mthca_tavor_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr);
+int mthca_arbel_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
+ int list_len, u64 iova);
+void mthca_arbel_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr);
+int mthca_free_fmr(struct mthca_dev *dev, struct mthca_fmr *fmr);
+
+int mthca_map_eq_icm(struct mthca_dev *dev, u64 icm_virt);
+void mthca_unmap_eq_icm(struct mthca_dev *dev);
+
+int mthca_poll_cq(struct ib_cq *ibcq, int num_entries,
+ struct ib_wc *entry);
+int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags);
+int mthca_arbel_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags);
+int mthca_init_cq(struct mthca_dev *dev, int nent,
+ struct mthca_ucontext *ctx, u32 pdn,
+ struct mthca_cq *cq);
+void mthca_free_cq(struct mthca_dev *dev,
+ struct mthca_cq *cq);
+void mthca_cq_completion(struct mthca_dev *dev, u32 cqn);
+void mthca_cq_event(struct mthca_dev *dev, u32 cqn,
+ enum ib_event_type event_type);
+void mthca_cq_clean(struct mthca_dev *dev, struct mthca_cq *cq, u32 qpn,
+ struct mthca_srq *srq);
+void mthca_cq_resize_copy_cqes(struct mthca_cq *cq);
+int mthca_alloc_cq_buf(struct mthca_dev *dev, struct mthca_cq_buf *buf, int nent);
+void mthca_free_cq_buf(struct mthca_dev *dev, struct mthca_cq_buf *buf, int cqe);
+
+int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd,
+ struct ib_srq_attr *attr, struct mthca_srq *srq);
+void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq);
+int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
+ enum ib_srq_attr_mask attr_mask, struct ib_udata *udata);
+int mthca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
+int mthca_max_srq_sge(struct mthca_dev *dev);
+void mthca_srq_event(struct mthca_dev *dev, u32 srqn,
+ enum ib_event_type event_type);
+void mthca_free_srq_wqe(struct mthca_srq *srq, u32 wqe_addr);
+int mthca_tavor_post_srq_recv(struct ib_srq *srq, struct ib_recv_wr *wr,
+ struct ib_recv_wr **bad_wr);
+int mthca_arbel_post_srq_recv(struct ib_srq *srq, struct ib_recv_wr *wr,
+ struct ib_recv_wr **bad_wr);
+
+void mthca_qp_event(struct mthca_dev *dev, u32 qpn,
+ enum ib_event_type event_type);
+int mthca_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,
+ struct ib_qp_init_attr *qp_init_attr);
+int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
+ struct ib_udata *udata);
+int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
+ struct ib_send_wr **bad_wr);
+int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
+ struct ib_recv_wr **bad_wr);
+int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
+ struct ib_send_wr **bad_wr);
+int mthca_arbel_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
+ struct ib_recv_wr **bad_wr);
+void mthca_free_err_wqe(struct mthca_dev *dev, struct mthca_qp *qp, int is_send,
+ int index, int *dbd, __be32 *new_wqe);
+int mthca_alloc_qp(struct mthca_dev *dev,
+ struct mthca_pd *pd,
+ struct mthca_cq *send_cq,
+ struct mthca_cq *recv_cq,
+ enum ib_qp_type type,
+ enum ib_sig_type send_policy,
+ struct ib_qp_cap *cap,
+ struct mthca_qp *qp);
+int mthca_alloc_sqp(struct mthca_dev *dev,
+ struct mthca_pd *pd,
+ struct mthca_cq *send_cq,
+ struct mthca_cq *recv_cq,
+ enum ib_sig_type send_policy,
+ struct ib_qp_cap *cap,
+ int qpn,
+ int port,
+ struct mthca_sqp *sqp);
+void mthca_free_qp(struct mthca_dev *dev, struct mthca_qp *qp);
+int mthca_create_ah(struct mthca_dev *dev,
+ struct mthca_pd *pd,
+ struct ib_ah_attr *ah_attr,
+ struct mthca_ah *ah);
+int mthca_destroy_ah(struct mthca_dev *dev, struct mthca_ah *ah);
+int mthca_read_ah(struct mthca_dev *dev, struct mthca_ah *ah,
+ struct ib_ud_header *header);
+int mthca_ah_query(struct ib_ah *ibah, struct ib_ah_attr *attr);
+int mthca_ah_grh_present(struct mthca_ah *ah);
+u8 mthca_get_rate(struct mthca_dev *dev, int static_rate, u8 port);
+enum ib_rate mthca_rate_to_ib(struct mthca_dev *dev, u8 mthca_rate, u8 port);
+
+int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
+int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
+
+int mthca_process_mad(struct ib_device *ibdev,
+ int mad_flags,
+ u8 port_num,
+ struct ib_wc *in_wc,
+ struct ib_grh *in_grh,
+ struct ib_mad *in_mad,
+ struct ib_mad *out_mad);
+int mthca_create_agents(struct mthca_dev *dev);
+void mthca_free_agents(struct mthca_dev *dev);
+
+static inline struct mthca_dev *to_mdev(struct ib_device *ibdev)
+{
+ return container_of(ibdev, struct mthca_dev, ib_dev);
+}
+
+static inline int mthca_is_memfree(struct mthca_dev *dev)
+{
+ return dev->mthca_flags & MTHCA_FLAG_MEMFREE;
+}
+
+#endif /* MTHCA_DEV_H */
diff --git a/drivers/infiniband/hw/mthca/mthca_doorbell.h b/drivers/infiniband/hw/mthca/mthca_doorbell.h
new file mode 100644
index 0000000..14f51ef
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_doorbell.h
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2004 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/types.h>
+
+#define MTHCA_RD_DOORBELL 0x00
+#define MTHCA_SEND_DOORBELL 0x10
+#define MTHCA_RECEIVE_DOORBELL 0x18
+#define MTHCA_CQ_DOORBELL 0x20
+#define MTHCA_EQ_DOORBELL 0x28
+
+#if BITS_PER_LONG == 64
+/*
+ * Assume that we can just write a 64-bit doorbell atomically. s390
+ * actually doesn't have writeq() but S/390 systems don't even have
+ * PCI so we won't worry about it.
+ */
+
+#define MTHCA_DECLARE_DOORBELL_LOCK(name)
+#define MTHCA_INIT_DOORBELL_LOCK(ptr) do { } while (0)
+#define MTHCA_GET_DOORBELL_LOCK(ptr) (NULL)
+
+static inline void mthca_write64_raw(__be64 val, void __iomem *dest)
+{
+ __raw_writeq((__force u64) val, dest);
+}
+
+static inline void mthca_write64(u32 hi, u32 lo, void __iomem *dest,
+ spinlock_t *doorbell_lock)
+{
+ __raw_writeq((__force u64) cpu_to_be64((u64) hi << 32 | lo), dest);
+}
+
+static inline void mthca_write_db_rec(__be32 val[2], __be32 *db)
+{
+ *(u64 *) db = *(u64 *) val;
+}
+
+#else
+
+/*
+ * Just fall back to a spinlock to protect the doorbell if
+ * BITS_PER_LONG is 32 -- there's no portable way to do atomic 64-bit
+ * MMIO writes.
+ */
+
+#define MTHCA_DECLARE_DOORBELL_LOCK(name) spinlock_t name;
+#define MTHCA_INIT_DOORBELL_LOCK(ptr) spin_lock_init(ptr)
+#define MTHCA_GET_DOORBELL_LOCK(ptr) (ptr)
+
+static inline void mthca_write64_raw(__be64 val, void __iomem *dest)
+{
+ __raw_writel(((__force u32 *) &val)[0], dest);
+ __raw_writel(((__force u32 *) &val)[1], dest + 4);
+}
+
+static inline void mthca_write64(u32 hi, u32 lo, void __iomem *dest,
+ spinlock_t *doorbell_lock)
+{
+ unsigned long flags;
+
+ hi = (__force u32) cpu_to_be32(hi);
+ lo = (__force u32) cpu_to_be32(lo);
+
+ spin_lock_irqsave(doorbell_lock, flags);
+ __raw_writel(hi, dest);
+ __raw_writel(lo, dest + 4);
+ spin_unlock_irqrestore(doorbell_lock, flags);
+}
+
+static inline void mthca_write_db_rec(__be32 val[2], __be32 *db)
+{
+ db[0] = val[0];
+ wmb();
+ db[1] = val[1];
+}
+
+#endif
diff --git a/drivers/infiniband/hw/mthca/mthca_eq.c b/drivers/infiniband/hw/mthca/mthca_eq.c
new file mode 100644
index 0000000..28f0e0c
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_eq.c
@@ -0,0 +1,918 @@
+/*
+ * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+
+#include "mthca_dev.h"
+#include "mthca_cmd.h"
+#include "mthca_config_reg.h"
+
+enum {
+ MTHCA_NUM_ASYNC_EQE = 0x80,
+ MTHCA_NUM_CMD_EQE = 0x80,
+ MTHCA_NUM_SPARE_EQE = 0x80,
+ MTHCA_EQ_ENTRY_SIZE = 0x20
+};
+
+/*
+ * Must be packed because start is 64 bits but only aligned to 32 bits.
+ */
+struct mthca_eq_context {
+ __be32 flags;
+ __be64 start;
+ __be32 logsize_usrpage;
+ __be32 tavor_pd; /* reserved for Arbel */
+ u8 reserved1[3];
+ u8 intr;
+ __be32 arbel_pd; /* lost_count for Tavor */
+ __be32 lkey;
+ u32 reserved2[2];
+ __be32 consumer_index;
+ __be32 producer_index;
+ u32 reserved3[4];
+} __attribute__((packed));
+
+#define MTHCA_EQ_STATUS_OK ( 0 << 28)
+#define MTHCA_EQ_STATUS_OVERFLOW ( 9 << 28)
+#define MTHCA_EQ_STATUS_WRITE_FAIL (10 << 28)
+#define MTHCA_EQ_OWNER_SW ( 0 << 24)
+#define MTHCA_EQ_OWNER_HW ( 1 << 24)
+#define MTHCA_EQ_FLAG_TR ( 1 << 18)
+#define MTHCA_EQ_FLAG_OI ( 1 << 17)
+#define MTHCA_EQ_STATE_ARMED ( 1 << 8)
+#define MTHCA_EQ_STATE_FIRED ( 2 << 8)
+#define MTHCA_EQ_STATE_ALWAYS_ARMED ( 3 << 8)
+#define MTHCA_EQ_STATE_ARBEL ( 8 << 8)
+
+enum {
+ MTHCA_EVENT_TYPE_COMP = 0x00,
+ MTHCA_EVENT_TYPE_PATH_MIG = 0x01,
+ MTHCA_EVENT_TYPE_COMM_EST = 0x02,
+ MTHCA_EVENT_TYPE_SQ_DRAINED = 0x03,
+ MTHCA_EVENT_TYPE_SRQ_QP_LAST_WQE = 0x13,
+ MTHCA_EVENT_TYPE_SRQ_LIMIT = 0x14,
+ MTHCA_EVENT_TYPE_CQ_ERROR = 0x04,
+ MTHCA_EVENT_TYPE_WQ_CATAS_ERROR = 0x05,
+ MTHCA_EVENT_TYPE_EEC_CATAS_ERROR = 0x06,
+ MTHCA_EVENT_TYPE_PATH_MIG_FAILED = 0x07,
+ MTHCA_EVENT_TYPE_WQ_INVAL_REQ_ERROR = 0x10,
+ MTHCA_EVENT_TYPE_WQ_ACCESS_ERROR = 0x11,
+ MTHCA_EVENT_TYPE_SRQ_CATAS_ERROR = 0x12,
+ MTHCA_EVENT_TYPE_LOCAL_CATAS_ERROR = 0x08,
+ MTHCA_EVENT_TYPE_PORT_CHANGE = 0x09,
+ MTHCA_EVENT_TYPE_EQ_OVERFLOW = 0x0f,
+ MTHCA_EVENT_TYPE_ECC_DETECT = 0x0e,
+ MTHCA_EVENT_TYPE_CMD = 0x0a
+};
+
+#define MTHCA_ASYNC_EVENT_MASK ((1ULL << MTHCA_EVENT_TYPE_PATH_MIG) | \
+ (1ULL << MTHCA_EVENT_TYPE_COMM_EST) | \
+ (1ULL << MTHCA_EVENT_TYPE_SQ_DRAINED) | \
+ (1ULL << MTHCA_EVENT_TYPE_CQ_ERROR) | \
+ (1ULL << MTHCA_EVENT_TYPE_WQ_CATAS_ERROR) | \
+ (1ULL << MTHCA_EVENT_TYPE_EEC_CATAS_ERROR) | \
+ (1ULL << MTHCA_EVENT_TYPE_PATH_MIG_FAILED) | \
+ (1ULL << MTHCA_EVENT_TYPE_WQ_INVAL_REQ_ERROR) | \
+ (1ULL << MTHCA_EVENT_TYPE_WQ_ACCESS_ERROR) | \
+ (1ULL << MTHCA_EVENT_TYPE_LOCAL_CATAS_ERROR) | \
+ (1ULL << MTHCA_EVENT_TYPE_PORT_CHANGE) | \
+ (1ULL << MTHCA_EVENT_TYPE_ECC_DETECT))
+#define MTHCA_SRQ_EVENT_MASK ((1ULL << MTHCA_EVENT_TYPE_SRQ_CATAS_ERROR) | \
+ (1ULL << MTHCA_EVENT_TYPE_SRQ_QP_LAST_WQE) | \
+ (1ULL << MTHCA_EVENT_TYPE_SRQ_LIMIT))
+#define MTHCA_CMD_EVENT_MASK (1ULL << MTHCA_EVENT_TYPE_CMD)
+
+#define MTHCA_EQ_DB_INC_CI (1 << 24)
+#define MTHCA_EQ_DB_REQ_NOT (2 << 24)
+#define MTHCA_EQ_DB_DISARM_CQ (3 << 24)
+#define MTHCA_EQ_DB_SET_CI (4 << 24)
+#define MTHCA_EQ_DB_ALWAYS_ARM (5 << 24)
+
+struct mthca_eqe {
+ u8 reserved1;
+ u8 type;
+ u8 reserved2;
+ u8 subtype;
+ union {
+ u32 raw[6];
+ struct {
+ __be32 cqn;
+ } __attribute__((packed)) comp;
+ struct {
+ u16 reserved1;
+ __be16 token;
+ u32 reserved2;
+ u8 reserved3[3];
+ u8 status;
+ __be64 out_param;
+ } __attribute__((packed)) cmd;
+ struct {
+ __be32 qpn;
+ } __attribute__((packed)) qp;
+ struct {
+ __be32 srqn;
+ } __attribute__((packed)) srq;
+ struct {
+ __be32 cqn;
+ u32 reserved1;
+ u8 reserved2[3];
+ u8 syndrome;
+ } __attribute__((packed)) cq_err;
+ struct {
+ u32 reserved1[2];
+ __be32 port;
+ } __attribute__((packed)) port_change;
+ } event;
+ u8 reserved3[3];
+ u8 owner;
+} __attribute__((packed));
+
+#define MTHCA_EQ_ENTRY_OWNER_SW (0 << 7)
+#define MTHCA_EQ_ENTRY_OWNER_HW (1 << 7)
+
+static inline u64 async_mask(struct mthca_dev *dev)
+{
+ return dev->mthca_flags & MTHCA_FLAG_SRQ ?
+ MTHCA_ASYNC_EVENT_MASK | MTHCA_SRQ_EVENT_MASK :
+ MTHCA_ASYNC_EVENT_MASK;
+}
+
+static inline void tavor_set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u32 ci)
+{
+ /*
+ * This barrier makes sure that all updates to ownership bits
+ * done by set_eqe_hw() hit memory before the consumer index
+ * is updated. set_eq_ci() allows the HCA to possibly write
+ * more EQ entries, and we want to avoid the exceedingly
+ * unlikely possibility of the HCA writing an entry and then
+ * having set_eqe_hw() overwrite the owner field.
+ */
+ wmb();
+ mthca_write64(MTHCA_EQ_DB_SET_CI | eq->eqn, ci & (eq->nent - 1),
+ dev->kar + MTHCA_EQ_DOORBELL,
+ MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
+}
+
+static inline void arbel_set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u32 ci)
+{
+ /* See comment in tavor_set_eq_ci() above. */
+ wmb();
+ __raw_writel((__force u32) cpu_to_be32(ci),
+ dev->eq_regs.arbel.eq_set_ci_base + eq->eqn * 8);
+ /* We still want ordering, just not swabbing, so add a barrier */
+ mb();
+}
+
+static inline void set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u32 ci)
+{
+ if (mthca_is_memfree(dev))
+ arbel_set_eq_ci(dev, eq, ci);
+ else
+ tavor_set_eq_ci(dev, eq, ci);
+}
+
+static inline void tavor_eq_req_not(struct mthca_dev *dev, int eqn)
+{
+ mthca_write64(MTHCA_EQ_DB_REQ_NOT | eqn, 0,
+ dev->kar + MTHCA_EQ_DOORBELL,
+ MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
+}
+
+static inline void arbel_eq_req_not(struct mthca_dev *dev, u32 eqn_mask)
+{
+ writel(eqn_mask, dev->eq_regs.arbel.eq_arm);
+}
+
+static inline void disarm_cq(struct mthca_dev *dev, int eqn, int cqn)
+{
+ if (!mthca_is_memfree(dev)) {
+ mthca_write64(MTHCA_EQ_DB_DISARM_CQ | eqn, cqn,
+ dev->kar + MTHCA_EQ_DOORBELL,
+ MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
+ }
+}
+
+static inline struct mthca_eqe *get_eqe(struct mthca_eq *eq, u32 entry)
+{
+ unsigned long off = (entry & (eq->nent - 1)) * MTHCA_EQ_ENTRY_SIZE;
+ return eq->page_list[off / PAGE_SIZE].buf + off % PAGE_SIZE;
+}
+
+static inline struct mthca_eqe *next_eqe_sw(struct mthca_eq *eq)
+{
+ struct mthca_eqe *eqe;
+ eqe = get_eqe(eq, eq->cons_index);
+ return (MTHCA_EQ_ENTRY_OWNER_HW & eqe->owner) ? NULL : eqe;
+}
+
+static inline void set_eqe_hw(struct mthca_eqe *eqe)
+{
+ eqe->owner = MTHCA_EQ_ENTRY_OWNER_HW;
+}
+
+static void port_change(struct mthca_dev *dev, int port, int active)
+{
+ struct ib_event record;
+
+ mthca_dbg(dev, "Port change to %s for port %d\n",
+ active ? "active" : "down", port);
+
+ record.device = &dev->ib_dev;
+ record.event = active ? IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
+ record.element.port_num = port;
+
+ ib_dispatch_event(&record);
+}
+
+static int mthca_eq_int(struct mthca_dev *dev, struct mthca_eq *eq)
+{
+ struct mthca_eqe *eqe;
+ int disarm_cqn;
+ int eqes_found = 0;
+ int set_ci = 0;
+
+ while ((eqe = next_eqe_sw(eq))) {
+ /*
+ * Make sure we read EQ entry contents after we've
+ * checked the ownership bit.
+ */
+ rmb();
+
+ switch (eqe->type) {
+ case MTHCA_EVENT_TYPE_COMP:
+ disarm_cqn = be32_to_cpu(eqe->event.comp.cqn) & 0xffffff;
+ disarm_cq(dev, eq->eqn, disarm_cqn);
+ mthca_cq_completion(dev, disarm_cqn);
+ break;
+
+ case MTHCA_EVENT_TYPE_PATH_MIG:
+ mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
+ IB_EVENT_PATH_MIG);
+ break;
+
+ case MTHCA_EVENT_TYPE_COMM_EST:
+ mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
+ IB_EVENT_COMM_EST);
+ break;
+
+ case MTHCA_EVENT_TYPE_SQ_DRAINED:
+ mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
+ IB_EVENT_SQ_DRAINED);
+ break;
+
+ case MTHCA_EVENT_TYPE_SRQ_QP_LAST_WQE:
+ mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
+ IB_EVENT_QP_LAST_WQE_REACHED);
+ break;
+
+ case MTHCA_EVENT_TYPE_SRQ_LIMIT:
+ mthca_srq_event(dev, be32_to_cpu(eqe->event.srq.srqn) & 0xffffff,
+ IB_EVENT_SRQ_LIMIT_REACHED);
+ break;
+
+ case MTHCA_EVENT_TYPE_WQ_CATAS_ERROR:
+ mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
+ IB_EVENT_QP_FATAL);
+ break;
+
+ case MTHCA_EVENT_TYPE_PATH_MIG_FAILED:
+ mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
+ IB_EVENT_PATH_MIG_ERR);
+ break;
+
+ case MTHCA_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
+ mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
+ IB_EVENT_QP_REQ_ERR);
+ break;
+
+ case MTHCA_EVENT_TYPE_WQ_ACCESS_ERROR:
+ mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
+ IB_EVENT_QP_ACCESS_ERR);
+ break;
+
+ case MTHCA_EVENT_TYPE_CMD:
+ mthca_cmd_event(dev,
+ be16_to_cpu(eqe->event.cmd.token),
+ eqe->event.cmd.status,
+ be64_to_cpu(eqe->event.cmd.out_param));
+ break;
+
+ case MTHCA_EVENT_TYPE_PORT_CHANGE:
+ port_change(dev,
+ (be32_to_cpu(eqe->event.port_change.port) >> 28) & 3,
+ eqe->subtype == 0x4);
+ break;
+
+ case MTHCA_EVENT_TYPE_CQ_ERROR:
+ mthca_warn(dev, "CQ %s on CQN %06x\n",
+ eqe->event.cq_err.syndrome == 1 ?
+ "overrun" : "access violation",
+ be32_to_cpu(eqe->event.cq_err.cqn) & 0xffffff);
+ mthca_cq_event(dev, be32_to_cpu(eqe->event.cq_err.cqn),
+ IB_EVENT_CQ_ERR);
+ break;
+
+ case MTHCA_EVENT_TYPE_EQ_OVERFLOW:
+ mthca_warn(dev, "EQ overrun on EQN %d\n", eq->eqn);
+ break;
+
+ case MTHCA_EVENT_TYPE_EEC_CATAS_ERROR:
+ case MTHCA_EVENT_TYPE_SRQ_CATAS_ERROR:
+ case MTHCA_EVENT_TYPE_LOCAL_CATAS_ERROR:
+ case MTHCA_EVENT_TYPE_ECC_DETECT:
+ default:
+ mthca_warn(dev, "Unhandled event %02x(%02x) on EQ %d\n",
+ eqe->type, eqe->subtype, eq->eqn);
+ break;
+ };
+
+ set_eqe_hw(eqe);
+ ++eq->cons_index;
+ eqes_found = 1;
+ ++set_ci;
+
+ /*
+ * The HCA will think the queue has overflowed if we
+ * don't tell it we've been processing events. We
+ * create our EQs with MTHCA_NUM_SPARE_EQE extra
+ * entries, so we must update our consumer index at
+ * least that often.
+ */
+ if (unlikely(set_ci >= MTHCA_NUM_SPARE_EQE)) {
+ /*
+ * Conditional on hca_type is OK here because
+ * this is a rare case, not the fast path.
+ */
+ set_eq_ci(dev, eq, eq->cons_index);
+ set_ci = 0;
+ }
+ }
+
+ /*
+ * Rely on caller to set consumer index so that we don't have
+ * to test hca_type in our interrupt handling fast path.
+ */
+ return eqes_found;
+}
+
+static irqreturn_t mthca_tavor_interrupt(int irq, void *dev_ptr)
+{
+ struct mthca_dev *dev = dev_ptr;
+ u32 ecr;
+ int i;
+
+ if (dev->eq_table.clr_mask)
+ writel(dev->eq_table.clr_mask, dev->eq_table.clr_int);
+
+ ecr = readl(dev->eq_regs.tavor.ecr_base + 4);
+ if (!ecr)
+ return IRQ_NONE;
+
+ writel(ecr, dev->eq_regs.tavor.ecr_base +
+ MTHCA_ECR_CLR_BASE - MTHCA_ECR_BASE + 4);
+
+ for (i = 0; i < MTHCA_NUM_EQ; ++i)
+ if (ecr & dev->eq_table.eq[i].eqn_mask) {
+ if (mthca_eq_int(dev, &dev->eq_table.eq[i]))
+ tavor_set_eq_ci(dev, &dev->eq_table.eq[i],
+ dev->eq_table.eq[i].cons_index);
+ tavor_eq_req_not(dev, dev->eq_table.eq[i].eqn);
+ }
+
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t mthca_tavor_msi_x_interrupt(int irq, void *eq_ptr)
+{
+ struct mthca_eq *eq = eq_ptr;
+ struct mthca_dev *dev = eq->dev;
+
+ mthca_eq_int(dev, eq);
+ tavor_set_eq_ci(dev, eq, eq->cons_index);
+ tavor_eq_req_not(dev, eq->eqn);
+
+ /* MSI-X vectors always belong to us */
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t mthca_arbel_interrupt(int irq, void *dev_ptr)
+{
+ struct mthca_dev *dev = dev_ptr;
+ int work = 0;
+ int i;
+
+ if (dev->eq_table.clr_mask)
+ writel(dev->eq_table.clr_mask, dev->eq_table.clr_int);
+
+ for (i = 0; i < MTHCA_NUM_EQ; ++i)
+ if (mthca_eq_int(dev, &dev->eq_table.eq[i])) {
+ work = 1;
+ arbel_set_eq_ci(dev, &dev->eq_table.eq[i],
+ dev->eq_table.eq[i].cons_index);
+ }
+
+ arbel_eq_req_not(dev, dev->eq_table.arm_mask);
+
+ return IRQ_RETVAL(work);
+}
+
+static irqreturn_t mthca_arbel_msi_x_interrupt(int irq, void *eq_ptr)
+{
+ struct mthca_eq *eq = eq_ptr;
+ struct mthca_dev *dev = eq->dev;
+
+ mthca_eq_int(dev, eq);
+ arbel_set_eq_ci(dev, eq, eq->cons_index);
+ arbel_eq_req_not(dev, eq->eqn_mask);
+
+ /* MSI-X vectors always belong to us */
+ return IRQ_HANDLED;
+}
+
+static int mthca_create_eq(struct mthca_dev *dev,
+ int nent,
+ u8 intr,
+ struct mthca_eq *eq)
+{
+ int npages;
+ u64 *dma_list = NULL;
+ dma_addr_t t;
+ struct mthca_mailbox *mailbox;
+ struct mthca_eq_context *eq_context;
+ int err = -ENOMEM;
+ int i;
+ u8 status;
+
+ eq->dev = dev;
+ eq->nent = roundup_pow_of_two(max(nent, 2));
+ npages = ALIGN(eq->nent * MTHCA_EQ_ENTRY_SIZE, PAGE_SIZE) / PAGE_SIZE;
+
+ eq->page_list = kmalloc(npages * sizeof *eq->page_list,
+ GFP_KERNEL);
+ if (!eq->page_list)
+ goto err_out;
+
+ for (i = 0; i < npages; ++i)
+ eq->page_list[i].buf = NULL;
+
+ dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL);
+ if (!dma_list)
+ goto err_out_free;
+
+ mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(mailbox))
+ goto err_out_free;
+ eq_context = mailbox->buf;
+
+ for (i = 0; i < npages; ++i) {
+ eq->page_list[i].buf = dma_alloc_coherent(&dev->pdev->dev,
+ PAGE_SIZE, &t, GFP_KERNEL);
+ if (!eq->page_list[i].buf)
+ goto err_out_free_pages;
+
+ dma_list[i] = t;
+ pci_unmap_addr_set(&eq->page_list[i], mapping, t);
+
+ clear_page(eq->page_list[i].buf);
+ }
+
+ for (i = 0; i < eq->nent; ++i)
+ set_eqe_hw(get_eqe(eq, i));
+
+ eq->eqn = mthca_alloc(&dev->eq_table.alloc);
+ if (eq->eqn == -1)
+ goto err_out_free_pages;
+
+ err = mthca_mr_alloc_phys(dev, dev->driver_pd.pd_num,
+ dma_list, PAGE_SHIFT, npages,
+ 0, npages * PAGE_SIZE,
+ MTHCA_MPT_FLAG_LOCAL_WRITE |
+ MTHCA_MPT_FLAG_LOCAL_READ,
+ &eq->mr);
+ if (err)
+ goto err_out_free_eq;
+
+ memset(eq_context, 0, sizeof *eq_context);
+ eq_context->flags = cpu_to_be32(MTHCA_EQ_STATUS_OK |
+ MTHCA_EQ_OWNER_HW |
+ MTHCA_EQ_STATE_ARMED |
+ MTHCA_EQ_FLAG_TR);
+ if (mthca_is_memfree(dev))
+ eq_context->flags |= cpu_to_be32(MTHCA_EQ_STATE_ARBEL);
+
+ eq_context->logsize_usrpage = cpu_to_be32((ffs(eq->nent) - 1) << 24);
+ if (mthca_is_memfree(dev)) {
+ eq_context->arbel_pd = cpu_to_be32(dev->driver_pd.pd_num);
+ } else {
+ eq_context->logsize_usrpage |= cpu_to_be32(dev->driver_uar.index);
+ eq_context->tavor_pd = cpu_to_be32(dev->driver_pd.pd_num);
+ }
+ eq_context->intr = intr;
+ eq_context->lkey = cpu_to_be32(eq->mr.ibmr.lkey);
+
+ err = mthca_SW2HW_EQ(dev, mailbox, eq->eqn, &status);
+ if (err) {
+ mthca_warn(dev, "SW2HW_EQ failed (%d)\n", err);
+ goto err_out_free_mr;
+ }
+ if (status) {
+ mthca_warn(dev, "SW2HW_EQ returned status 0x%02x\n",
+ status);
+ err = -EINVAL;
+ goto err_out_free_mr;
+ }
+
+ kfree(dma_list);
+ mthca_free_mailbox(dev, mailbox);
+
+ eq->eqn_mask = swab32(1 << eq->eqn);
+ eq->cons_index = 0;
+
+ dev->eq_table.arm_mask |= eq->eqn_mask;
+
+ mthca_dbg(dev, "Allocated EQ %d with %d entries\n",
+ eq->eqn, eq->nent);
+
+ return err;
+
+ err_out_free_mr:
+ mthca_free_mr(dev, &eq->mr);
+
+ err_out_free_eq:
+ mthca_free(&dev->eq_table.alloc, eq->eqn);
+
+ err_out_free_pages:
+ for (i = 0; i < npages; ++i)
+ if (eq->page_list[i].buf)
+ dma_free_coherent(&dev->pdev->dev, PAGE_SIZE,
+ eq->page_list[i].buf,
+ pci_unmap_addr(&eq->page_list[i],
+ mapping));
+
+ mthca_free_mailbox(dev, mailbox);
+
+ err_out_free:
+ kfree(eq->page_list);
+ kfree(dma_list);
+
+ err_out:
+ return err;
+}
+
+static void mthca_free_eq(struct mthca_dev *dev,
+ struct mthca_eq *eq)
+{
+ struct mthca_mailbox *mailbox;
+ int err;
+ u8 status;
+ int npages = (eq->nent * MTHCA_EQ_ENTRY_SIZE + PAGE_SIZE - 1) /
+ PAGE_SIZE;
+ int i;
+
+ mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(mailbox))
+ return;
+
+ err = mthca_HW2SW_EQ(dev, mailbox, eq->eqn, &status);
+ if (err)
+ mthca_warn(dev, "HW2SW_EQ failed (%d)\n", err);
+ if (status)
+ mthca_warn(dev, "HW2SW_EQ returned status 0x%02x\n", status);
+
+ dev->eq_table.arm_mask &= ~eq->eqn_mask;
+
+ if (0) {
+ mthca_dbg(dev, "Dumping EQ context %02x:\n", eq->eqn);
+ for (i = 0; i < sizeof (struct mthca_eq_context) / 4; ++i) {
+ if (i % 4 == 0)
+ printk("[%02x] ", i * 4);
+ printk(" %08x", be32_to_cpup(mailbox->buf + i * 4));
+ if ((i + 1) % 4 == 0)
+ printk("\n");
+ }
+ }
+
+ mthca_free_mr(dev, &eq->mr);
+ for (i = 0; i < npages; ++i)
+ pci_free_consistent(dev->pdev, PAGE_SIZE,
+ eq->page_list[i].buf,
+ pci_unmap_addr(&eq->page_list[i], mapping));
+
+ kfree(eq->page_list);
+ mthca_free_mailbox(dev, mailbox);
+}
+
+static void mthca_free_irqs(struct mthca_dev *dev)
+{
+ int i;
+
+ if (dev->eq_table.have_irq)
+ free_irq(dev->pdev->irq, dev);
+ for (i = 0; i < MTHCA_NUM_EQ; ++i)
+ if (dev->eq_table.eq[i].have_irq)
+ free_irq(dev->eq_table.eq[i].msi_x_vector,
+ dev->eq_table.eq + i);
+}
+
+static int mthca_map_reg(struct mthca_dev *dev,
+ unsigned long offset, unsigned long size,
+ void __iomem **map)
+{
+ unsigned long base = pci_resource_start(dev->pdev, 0);
+
+ *map = ioremap(base + offset, size);
+ if (!*map)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static int mthca_map_eq_regs(struct mthca_dev *dev)
+{
+ if (mthca_is_memfree(dev)) {
+ /*
+ * We assume that the EQ arm and EQ set CI registers
+ * fall within the first BAR. We can't trust the
+ * values firmware gives us, since those addresses are
+ * valid on the HCA's side of the PCI bus but not
+ * necessarily the host side.
+ */
+ if (mthca_map_reg(dev, (pci_resource_len(dev->pdev, 0) - 1) &
+ dev->fw.arbel.clr_int_base, MTHCA_CLR_INT_SIZE,
+ &dev->clr_base)) {
+ mthca_err(dev, "Couldn't map interrupt clear register, "
+ "aborting.\n");
+ return -ENOMEM;
+ }
+
+ /*
+ * Add 4 because we limit ourselves to EQs 0 ... 31,
+ * so we only need the low word of the register.
+ */
+ if (mthca_map_reg(dev, ((pci_resource_len(dev->pdev, 0) - 1) &
+ dev->fw.arbel.eq_arm_base) + 4, 4,
+ &dev->eq_regs.arbel.eq_arm)) {
+ mthca_err(dev, "Couldn't map EQ arm register, aborting.\n");
+ iounmap(dev->clr_base);
+ return -ENOMEM;
+ }
+
+ if (mthca_map_reg(dev, (pci_resource_len(dev->pdev, 0) - 1) &
+ dev->fw.arbel.eq_set_ci_base,
+ MTHCA_EQ_SET_CI_SIZE,
+ &dev->eq_regs.arbel.eq_set_ci_base)) {
+ mthca_err(dev, "Couldn't map EQ CI register, aborting.\n");
+ iounmap(dev->eq_regs.arbel.eq_arm);
+ iounmap(dev->clr_base);
+ return -ENOMEM;
+ }
+ } else {
+ if (mthca_map_reg(dev, MTHCA_CLR_INT_BASE, MTHCA_CLR_INT_SIZE,
+ &dev->clr_base)) {
+ mthca_err(dev, "Couldn't map interrupt clear register, "
+ "aborting.\n");
+ return -ENOMEM;
+ }
+
+ if (mthca_map_reg(dev, MTHCA_ECR_BASE,
+ MTHCA_ECR_SIZE + MTHCA_ECR_CLR_SIZE,
+ &dev->eq_regs.tavor.ecr_base)) {
+ mthca_err(dev, "Couldn't map ecr register, "
+ "aborting.\n");
+ iounmap(dev->clr_base);
+ return -ENOMEM;
+ }
+ }
+
+ return 0;
+
+}
+
+static void mthca_unmap_eq_regs(struct mthca_dev *dev)
+{
+ if (mthca_is_memfree(dev)) {
+ iounmap(dev->eq_regs.arbel.eq_set_ci_base);
+ iounmap(dev->eq_regs.arbel.eq_arm);
+ iounmap(dev->clr_base);
+ } else {
+ iounmap(dev->eq_regs.tavor.ecr_base);
+ iounmap(dev->clr_base);
+ }
+}
+
+int mthca_map_eq_icm(struct mthca_dev *dev, u64 icm_virt)
+{
+ int ret;
+ u8 status;
+
+ /*
+ * We assume that mapping one page is enough for the whole EQ
+ * context table. This is fine with all current HCAs, because
+ * we only use 32 EQs and each EQ uses 32 bytes of context
+ * memory, or 1 KB total.
+ */
+ dev->eq_table.icm_virt = icm_virt;
+ dev->eq_table.icm_page = alloc_page(GFP_HIGHUSER);
+ if (!dev->eq_table.icm_page)
+ return -ENOMEM;
+ dev->eq_table.icm_dma = pci_map_page(dev->pdev, dev->eq_table.icm_page, 0,
+ PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
+ if (pci_dma_mapping_error(dev->pdev, dev->eq_table.icm_dma)) {
+ __free_page(dev->eq_table.icm_page);
+ return -ENOMEM;
+ }
+
+ ret = mthca_MAP_ICM_page(dev, dev->eq_table.icm_dma, icm_virt, &status);
+ if (!ret && status)
+ ret = -EINVAL;
+ if (ret) {
+ pci_unmap_page(dev->pdev, dev->eq_table.icm_dma, PAGE_SIZE,
+ PCI_DMA_BIDIRECTIONAL);
+ __free_page(dev->eq_table.icm_page);
+ }
+
+ return ret;
+}
+
+void mthca_unmap_eq_icm(struct mthca_dev *dev)
+{
+ u8 status;
+
+ mthca_UNMAP_ICM(dev, dev->eq_table.icm_virt, 1, &status);
+ pci_unmap_page(dev->pdev, dev->eq_table.icm_dma, PAGE_SIZE,
+ PCI_DMA_BIDIRECTIONAL);
+ __free_page(dev->eq_table.icm_page);
+}
+
+int mthca_init_eq_table(struct mthca_dev *dev)
+{
+ int err;
+ u8 status;
+ u8 intr;
+ int i;
+
+ err = mthca_alloc_init(&dev->eq_table.alloc,
+ dev->limits.num_eqs,
+ dev->limits.num_eqs - 1,
+ dev->limits.reserved_eqs);
+ if (err)
+ return err;
+
+ err = mthca_map_eq_regs(dev);
+ if (err)
+ goto err_out_free;
+
+ if (dev->mthca_flags & MTHCA_FLAG_MSI_X) {
+ dev->eq_table.clr_mask = 0;
+ } else {
+ dev->eq_table.clr_mask =
+ swab32(1 << (dev->eq_table.inta_pin & 31));
+ dev->eq_table.clr_int = dev->clr_base +
+ (dev->eq_table.inta_pin < 32 ? 4 : 0);
+ }
+
+ dev->eq_table.arm_mask = 0;
+
+ intr = dev->eq_table.inta_pin;
+
+ err = mthca_create_eq(dev, dev->limits.num_cqs + MTHCA_NUM_SPARE_EQE,
+ (dev->mthca_flags & MTHCA_FLAG_MSI_X) ? 128 : intr,
+ &dev->eq_table.eq[MTHCA_EQ_COMP]);
+ if (err)
+ goto err_out_unmap;
+
+ err = mthca_create_eq(dev, MTHCA_NUM_ASYNC_EQE + MTHCA_NUM_SPARE_EQE,
+ (dev->mthca_flags & MTHCA_FLAG_MSI_X) ? 129 : intr,
+ &dev->eq_table.eq[MTHCA_EQ_ASYNC]);
+ if (err)
+ goto err_out_comp;
+
+ err = mthca_create_eq(dev, MTHCA_NUM_CMD_EQE + MTHCA_NUM_SPARE_EQE,
+ (dev->mthca_flags & MTHCA_FLAG_MSI_X) ? 130 : intr,
+ &dev->eq_table.eq[MTHCA_EQ_CMD]);
+ if (err)
+ goto err_out_async;
+
+ if (dev->mthca_flags & MTHCA_FLAG_MSI_X) {
+ static const char *eq_name[] = {
+ [MTHCA_EQ_COMP] = DRV_NAME " (comp)",
+ [MTHCA_EQ_ASYNC] = DRV_NAME " (async)",
+ [MTHCA_EQ_CMD] = DRV_NAME " (cmd)"
+ };
+
+ for (i = 0; i < MTHCA_NUM_EQ; ++i) {
+ err = request_irq(dev->eq_table.eq[i].msi_x_vector,
+ mthca_is_memfree(dev) ?
+ mthca_arbel_msi_x_interrupt :
+ mthca_tavor_msi_x_interrupt,
+ 0, eq_name[i], dev->eq_table.eq + i);
+ if (err)
+ goto err_out_cmd;
+ dev->eq_table.eq[i].have_irq = 1;
+ }
+ } else {
+ err = request_irq(dev->pdev->irq,
+ mthca_is_memfree(dev) ?
+ mthca_arbel_interrupt :
+ mthca_tavor_interrupt,
+ IRQF_SHARED, DRV_NAME, dev);
+ if (err)
+ goto err_out_cmd;
+ dev->eq_table.have_irq = 1;
+ }
+
+ err = mthca_MAP_EQ(dev, async_mask(dev),
+ 0, dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn, &status);
+ if (err)
+ mthca_warn(dev, "MAP_EQ for async EQ %d failed (%d)\n",
+ dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn, err);
+ if (status)
+ mthca_warn(dev, "MAP_EQ for async EQ %d returned status 0x%02x\n",
+ dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn, status);
+
+ err = mthca_MAP_EQ(dev, MTHCA_CMD_EVENT_MASK,
+ 0, dev->eq_table.eq[MTHCA_EQ_CMD].eqn, &status);
+ if (err)
+ mthca_warn(dev, "MAP_EQ for cmd EQ %d failed (%d)\n",
+ dev->eq_table.eq[MTHCA_EQ_CMD].eqn, err);
+ if (status)
+ mthca_warn(dev, "MAP_EQ for cmd EQ %d returned status 0x%02x\n",
+ dev->eq_table.eq[MTHCA_EQ_CMD].eqn, status);
+
+ for (i = 0; i < MTHCA_NUM_EQ; ++i)
+ if (mthca_is_memfree(dev))
+ arbel_eq_req_not(dev, dev->eq_table.eq[i].eqn_mask);
+ else
+ tavor_eq_req_not(dev, dev->eq_table.eq[i].eqn);
+
+ return 0;
+
+err_out_cmd:
+ mthca_free_irqs(dev);
+ mthca_free_eq(dev, &dev->eq_table.eq[MTHCA_EQ_CMD]);
+
+err_out_async:
+ mthca_free_eq(dev, &dev->eq_table.eq[MTHCA_EQ_ASYNC]);
+
+err_out_comp:
+ mthca_free_eq(dev, &dev->eq_table.eq[MTHCA_EQ_COMP]);
+
+err_out_unmap:
+ mthca_unmap_eq_regs(dev);
+
+err_out_free:
+ mthca_alloc_cleanup(&dev->eq_table.alloc);
+ return err;
+}
+
+void mthca_cleanup_eq_table(struct mthca_dev *dev)
+{
+ u8 status;
+ int i;
+
+ mthca_free_irqs(dev);
+
+ mthca_MAP_EQ(dev, async_mask(dev),
+ 1, dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn, &status);
+ mthca_MAP_EQ(dev, MTHCA_CMD_EVENT_MASK,
+ 1, dev->eq_table.eq[MTHCA_EQ_CMD].eqn, &status);
+
+ for (i = 0; i < MTHCA_NUM_EQ; ++i)
+ mthca_free_eq(dev, &dev->eq_table.eq[i]);
+
+ mthca_unmap_eq_regs(dev);
+
+ mthca_alloc_cleanup(&dev->eq_table.alloc);
+}
diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c
new file mode 100644
index 0000000..6404495
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_mad.c
@@ -0,0 +1,333 @@
+/*
+ * Copyright (c) 2004 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/string.h>
+#include <linux/slab.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_mad.h>
+#include <rdma/ib_smi.h>
+
+#include "mthca_dev.h"
+#include "mthca_cmd.h"
+
+enum {
+ MTHCA_VENDOR_CLASS1 = 0x9,
+ MTHCA_VENDOR_CLASS2 = 0xa
+};
+
+static int mthca_update_rate(struct mthca_dev *dev, u8 port_num)
+{
+ struct ib_port_attr *tprops = NULL;
+ int ret;
+
+ tprops = kmalloc(sizeof *tprops, GFP_KERNEL);
+ if (!tprops)
+ return -ENOMEM;
+
+ ret = ib_query_port(&dev->ib_dev, port_num, tprops);
+ if (ret) {
+ printk(KERN_WARNING "ib_query_port failed (%d) for %s port %d\n",
+ ret, dev->ib_dev.name, port_num);
+ goto out;
+ }
+
+ dev->rate[port_num - 1] = tprops->active_speed *
+ ib_width_enum_to_int(tprops->active_width);
+
+out:
+ kfree(tprops);
+ return ret;
+}
+
+static void update_sm_ah(struct mthca_dev *dev,
+ u8 port_num, u16 lid, u8 sl)
+{
+ struct ib_ah *new_ah;
+ struct ib_ah_attr ah_attr;
+ unsigned long flags;
+
+ if (!dev->send_agent[port_num - 1][0])
+ return;
+
+ memset(&ah_attr, 0, sizeof ah_attr);
+ ah_attr.dlid = lid;
+ ah_attr.sl = sl;
+ ah_attr.port_num = port_num;
+
+ new_ah = ib_create_ah(dev->send_agent[port_num - 1][0]->qp->pd,
+ &ah_attr);
+ if (IS_ERR(new_ah))
+ return;
+
+ spin_lock_irqsave(&dev->sm_lock, flags);
+ if (dev->sm_ah[port_num - 1])
+ ib_destroy_ah(dev->sm_ah[port_num - 1]);
+ dev->sm_ah[port_num - 1] = new_ah;
+ spin_unlock_irqrestore(&dev->sm_lock, flags);
+}
+
+/*
+ * Snoop SM MADs for port info and P_Key table sets, so we can
+ * synthesize LID change and P_Key change events.
+ */
+static void smp_snoop(struct ib_device *ibdev,
+ u8 port_num,
+ struct ib_mad *mad)
+{
+ struct ib_event event;
+
+ if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
+ mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) &&
+ mad->mad_hdr.method == IB_MGMT_METHOD_SET) {
+ if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO) {
+ struct ib_port_info *pinfo =
+ (struct ib_port_info *) ((struct ib_smp *) mad)->data;
+
+ mthca_update_rate(to_mdev(ibdev), port_num);
+ update_sm_ah(to_mdev(ibdev), port_num,
+ be16_to_cpu(pinfo->sm_lid),
+ pinfo->neighbormtu_mastersmsl & 0xf);
+
+ event.device = ibdev;
+ event.element.port_num = port_num;
+
+ if (pinfo->clientrereg_resv_subnetto & 0x80)
+ event.event = IB_EVENT_CLIENT_REREGISTER;
+ else
+ event.event = IB_EVENT_LID_CHANGE;
+
+ ib_dispatch_event(&event);
+ }
+
+ if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PKEY_TABLE) {
+ event.device = ibdev;
+ event.event = IB_EVENT_PKEY_CHANGE;
+ event.element.port_num = port_num;
+ ib_dispatch_event(&event);
+ }
+ }
+}
+
+static void node_desc_override(struct ib_device *dev,
+ struct ib_mad *mad)
+{
+ if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
+ mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) &&
+ mad->mad_hdr.method == IB_MGMT_METHOD_GET_RESP &&
+ mad->mad_hdr.attr_id == IB_SMP_ATTR_NODE_DESC) {
+ mutex_lock(&to_mdev(dev)->cap_mask_mutex);
+ memcpy(((struct ib_smp *) mad)->data, dev->node_desc, 64);
+ mutex_unlock(&to_mdev(dev)->cap_mask_mutex);
+ }
+}
+
+static void forward_trap(struct mthca_dev *dev,
+ u8 port_num,
+ struct ib_mad *mad)
+{
+ int qpn = mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_SUBN_LID_ROUTED;
+ struct ib_mad_send_buf *send_buf;
+ struct ib_mad_agent *agent = dev->send_agent[port_num - 1][qpn];
+ int ret;
+ unsigned long flags;
+
+ if (agent) {
+ send_buf = ib_create_send_mad(agent, qpn, 0, 0, IB_MGMT_MAD_HDR,
+ IB_MGMT_MAD_DATA, GFP_ATOMIC);
+ /*
+ * We rely here on the fact that MLX QPs don't use the
+ * address handle after the send is posted (this is
+ * wrong following the IB spec strictly, but we know
+ * it's OK for our devices).
+ */
+ spin_lock_irqsave(&dev->sm_lock, flags);
+ memcpy(send_buf->mad, mad, sizeof *mad);
+ if ((send_buf->ah = dev->sm_ah[port_num - 1]))
+ ret = ib_post_send_mad(send_buf, NULL);
+ else
+ ret = -EINVAL;
+ spin_unlock_irqrestore(&dev->sm_lock, flags);
+
+ if (ret)
+ ib_free_send_mad(send_buf);
+ }
+}
+
+int mthca_process_mad(struct ib_device *ibdev,
+ int mad_flags,
+ u8 port_num,
+ struct ib_wc *in_wc,
+ struct ib_grh *in_grh,
+ struct ib_mad *in_mad,
+ struct ib_mad *out_mad)
+{
+ int err;
+ u8 status;
+ u16 slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE);
+
+ /* Forward locally generated traps to the SM */
+ if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP &&
+ slid == 0) {
+ forward_trap(to_mdev(ibdev), port_num, in_mad);
+ return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
+ }
+
+ /*
+ * Only handle SM gets, sets and trap represses for SM class
+ *
+ * Only handle PMA and Mellanox vendor-specific class gets and
+ * sets for other classes.
+ */
+ if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
+ in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
+ if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET &&
+ in_mad->mad_hdr.method != IB_MGMT_METHOD_SET &&
+ in_mad->mad_hdr.method != IB_MGMT_METHOD_TRAP_REPRESS)
+ return IB_MAD_RESULT_SUCCESS;
+
+ /*
+ * Don't process SMInfo queries or vendor-specific
+ * MADs -- the SMA can't handle them.
+ */
+ if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO ||
+ ((in_mad->mad_hdr.attr_id & IB_SMP_ATTR_VENDOR_MASK) ==
+ IB_SMP_ATTR_VENDOR_MASK))
+ return IB_MAD_RESULT_SUCCESS;
+ } else if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT ||
+ in_mad->mad_hdr.mgmt_class == MTHCA_VENDOR_CLASS1 ||
+ in_mad->mad_hdr.mgmt_class == MTHCA_VENDOR_CLASS2) {
+ if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET &&
+ in_mad->mad_hdr.method != IB_MGMT_METHOD_SET)
+ return IB_MAD_RESULT_SUCCESS;
+ } else
+ return IB_MAD_RESULT_SUCCESS;
+
+ err = mthca_MAD_IFC(to_mdev(ibdev),
+ mad_flags & IB_MAD_IGNORE_MKEY,
+ mad_flags & IB_MAD_IGNORE_BKEY,
+ port_num, in_wc, in_grh, in_mad, out_mad,
+ &status);
+ if (err) {
+ mthca_err(to_mdev(ibdev), "MAD_IFC failed\n");
+ return IB_MAD_RESULT_FAILURE;
+ }
+ if (status == MTHCA_CMD_STAT_BAD_PKT)
+ return IB_MAD_RESULT_SUCCESS;
+ if (status) {
+ mthca_err(to_mdev(ibdev), "MAD_IFC returned status %02x\n",
+ status);
+ return IB_MAD_RESULT_FAILURE;
+ }
+
+ if (!out_mad->mad_hdr.status) {
+ smp_snoop(ibdev, port_num, in_mad);
+ node_desc_override(ibdev, out_mad);
+ }
+
+ /* set return bit in status of directed route responses */
+ if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
+ out_mad->mad_hdr.status |= cpu_to_be16(1 << 15);
+
+ if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS)
+ /* no response for trap repress */
+ return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
+
+ return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
+}
+
+static void send_handler(struct ib_mad_agent *agent,
+ struct ib_mad_send_wc *mad_send_wc)
+{
+ ib_free_send_mad(mad_send_wc->send_buf);
+}
+
+int mthca_create_agents(struct mthca_dev *dev)
+{
+ struct ib_mad_agent *agent;
+ int p, q;
+ int ret;
+
+ spin_lock_init(&dev->sm_lock);
+
+ for (p = 0; p < dev->limits.num_ports; ++p)
+ for (q = 0; q <= 1; ++q) {
+ agent = ib_register_mad_agent(&dev->ib_dev, p + 1,
+ q ? IB_QPT_GSI : IB_QPT_SMI,
+ NULL, 0, send_handler,
+ NULL, NULL);
+ if (IS_ERR(agent)) {
+ ret = PTR_ERR(agent);
+ goto err;
+ }
+ dev->send_agent[p][q] = agent;
+ }
+
+
+ for (p = 1; p <= dev->limits.num_ports; ++p) {
+ ret = mthca_update_rate(dev, p);
+ if (ret) {
+ mthca_err(dev, "Failed to obtain port %d rate."
+ " aborting.\n", p);
+ goto err;
+ }
+ }
+
+ return 0;
+
+err:
+ for (p = 0; p < dev->limits.num_ports; ++p)
+ for (q = 0; q <= 1; ++q)
+ if (dev->send_agent[p][q])
+ ib_unregister_mad_agent(dev->send_agent[p][q]);
+
+ return ret;
+}
+
+void mthca_free_agents(struct mthca_dev *dev)
+{
+ struct ib_mad_agent *agent;
+ int p, q;
+
+ for (p = 0; p < dev->limits.num_ports; ++p) {
+ for (q = 0; q <= 1; ++q) {
+ agent = dev->send_agent[p][q];
+ dev->send_agent[p][q] = NULL;
+ ib_unregister_mad_agent(agent);
+ }
+
+ if (dev->sm_ah[p])
+ ib_destroy_ah(dev->sm_ah[p]);
+ }
+}
diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c
new file mode 100644
index 0000000..52f60f4
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_main.c
@@ -0,0 +1,1346 @@
+/*
+ * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+
+#include "mthca_dev.h"
+#include "mthca_config_reg.h"
+#include "mthca_cmd.h"
+#include "mthca_profile.h"
+#include "mthca_memfree.h"
+#include "mthca_wqe.h"
+
+MODULE_AUTHOR("Roland Dreier");
+MODULE_DESCRIPTION("Mellanox InfiniBand HCA low-level driver");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_VERSION(DRV_VERSION);
+
+#ifdef CONFIG_INFINIBAND_MTHCA_DEBUG
+
+int mthca_debug_level = 0;
+module_param_named(debug_level, mthca_debug_level, int, 0644);
+MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0");
+
+#endif /* CONFIG_INFINIBAND_MTHCA_DEBUG */
+
+#ifdef CONFIG_PCI_MSI
+
+static int msi_x = 1;
+module_param(msi_x, int, 0444);
+MODULE_PARM_DESC(msi_x, "attempt to use MSI-X if nonzero");
+
+#else /* CONFIG_PCI_MSI */
+
+#define msi_x (0)
+
+#endif /* CONFIG_PCI_MSI */
+
+static int tune_pci = 0;
+module_param(tune_pci, int, 0444);
+MODULE_PARM_DESC(tune_pci, "increase PCI burst from the default set by BIOS if nonzero");
+
+DEFINE_MUTEX(mthca_device_mutex);
+
+#define MTHCA_DEFAULT_NUM_QP (1 << 16)
+#define MTHCA_DEFAULT_RDB_PER_QP (1 << 2)
+#define MTHCA_DEFAULT_NUM_CQ (1 << 16)
+#define MTHCA_DEFAULT_NUM_MCG (1 << 13)
+#define MTHCA_DEFAULT_NUM_MPT (1 << 17)
+#define MTHCA_DEFAULT_NUM_MTT (1 << 20)
+#define MTHCA_DEFAULT_NUM_UDAV (1 << 15)
+#define MTHCA_DEFAULT_NUM_RESERVED_MTTS (1 << 18)
+#define MTHCA_DEFAULT_NUM_UARC_SIZE (1 << 18)
+
+static struct mthca_profile hca_profile = {
+ .num_qp = MTHCA_DEFAULT_NUM_QP,
+ .rdb_per_qp = MTHCA_DEFAULT_RDB_PER_QP,
+ .num_cq = MTHCA_DEFAULT_NUM_CQ,
+ .num_mcg = MTHCA_DEFAULT_NUM_MCG,
+ .num_mpt = MTHCA_DEFAULT_NUM_MPT,
+ .num_mtt = MTHCA_DEFAULT_NUM_MTT,
+ .num_udav = MTHCA_DEFAULT_NUM_UDAV, /* Tavor only */
+ .fmr_reserved_mtts = MTHCA_DEFAULT_NUM_RESERVED_MTTS, /* Tavor only */
+ .uarc_size = MTHCA_DEFAULT_NUM_UARC_SIZE, /* Arbel only */
+};
+
+module_param_named(num_qp, hca_profile.num_qp, int, 0444);
+MODULE_PARM_DESC(num_qp, "maximum number of QPs per HCA");
+
+module_param_named(rdb_per_qp, hca_profile.rdb_per_qp, int, 0444);
+MODULE_PARM_DESC(rdb_per_qp, "number of RDB buffers per QP");
+
+module_param_named(num_cq, hca_profile.num_cq, int, 0444);
+MODULE_PARM_DESC(num_cq, "maximum number of CQs per HCA");
+
+module_param_named(num_mcg, hca_profile.num_mcg, int, 0444);
+MODULE_PARM_DESC(num_mcg, "maximum number of multicast groups per HCA");
+
+module_param_named(num_mpt, hca_profile.num_mpt, int, 0444);
+MODULE_PARM_DESC(num_mpt,
+ "maximum number of memory protection table entries per HCA");
+
+module_param_named(num_mtt, hca_profile.num_mtt, int, 0444);
+MODULE_PARM_DESC(num_mtt,
+ "maximum number of memory translation table segments per HCA");
+
+module_param_named(num_udav, hca_profile.num_udav, int, 0444);
+MODULE_PARM_DESC(num_udav, "maximum number of UD address vectors per HCA");
+
+module_param_named(fmr_reserved_mtts, hca_profile.fmr_reserved_mtts, int, 0444);
+MODULE_PARM_DESC(fmr_reserved_mtts,
+ "number of memory translation table segments reserved for FMR");
+
+static char mthca_version[] __devinitdata =
+ DRV_NAME ": Mellanox InfiniBand HCA driver v"
+ DRV_VERSION " (" DRV_RELDATE ")\n";
+
+static int mthca_tune_pci(struct mthca_dev *mdev)
+{
+ if (!tune_pci)
+ return 0;
+
+ /* First try to max out Read Byte Count */
+ if (pci_find_capability(mdev->pdev, PCI_CAP_ID_PCIX)) {
+ if (pcix_set_mmrbc(mdev->pdev, pcix_get_max_mmrbc(mdev->pdev))) {
+ mthca_err(mdev, "Couldn't set PCI-X max read count, "
+ "aborting.\n");
+ return -ENODEV;
+ }
+ } else if (!(mdev->mthca_flags & MTHCA_FLAG_PCIE))
+ mthca_info(mdev, "No PCI-X capability, not setting RBC.\n");
+
+ if (pci_find_capability(mdev->pdev, PCI_CAP_ID_EXP)) {
+ if (pcie_set_readrq(mdev->pdev, 4096)) {
+ mthca_err(mdev, "Couldn't write PCI Express read request, "
+ "aborting.\n");
+ return -ENODEV;
+ }
+ } else if (mdev->mthca_flags & MTHCA_FLAG_PCIE)
+ mthca_info(mdev, "No PCI Express capability, "
+ "not setting Max Read Request Size.\n");
+
+ return 0;
+}
+
+static int mthca_dev_lim(struct mthca_dev *mdev, struct mthca_dev_lim *dev_lim)
+{
+ int err;
+ u8 status;
+
+ err = mthca_QUERY_DEV_LIM(mdev, dev_lim, &status);
+ if (err) {
+ mthca_err(mdev, "QUERY_DEV_LIM command failed, aborting.\n");
+ return err;
+ }
+ if (status) {
+ mthca_err(mdev, "QUERY_DEV_LIM returned status 0x%02x, "
+ "aborting.\n", status);
+ return -EINVAL;
+ }
+ if (dev_lim->min_page_sz > PAGE_SIZE) {
+ mthca_err(mdev, "HCA minimum page size of %d bigger than "
+ "kernel PAGE_SIZE of %ld, aborting.\n",
+ dev_lim->min_page_sz, PAGE_SIZE);
+ return -ENODEV;
+ }
+ if (dev_lim->num_ports > MTHCA_MAX_PORTS) {
+ mthca_err(mdev, "HCA has %d ports, but we only support %d, "
+ "aborting.\n",
+ dev_lim->num_ports, MTHCA_MAX_PORTS);
+ return -ENODEV;
+ }
+
+ if (dev_lim->uar_size > pci_resource_len(mdev->pdev, 2)) {
+ mthca_err(mdev, "HCA reported UAR size of 0x%x bigger than "
+ "PCI resource 2 size of 0x%llx, aborting.\n",
+ dev_lim->uar_size,
+ (unsigned long long)pci_resource_len(mdev->pdev, 2));
+ return -ENODEV;
+ }
+
+ mdev->limits.num_ports = dev_lim->num_ports;
+ mdev->limits.vl_cap = dev_lim->max_vl;
+ mdev->limits.mtu_cap = dev_lim->max_mtu;
+ mdev->limits.gid_table_len = dev_lim->max_gids;
+ mdev->limits.pkey_table_len = dev_lim->max_pkeys;
+ mdev->limits.local_ca_ack_delay = dev_lim->local_ca_ack_delay;
+ /*
+ * Need to allow for worst case send WQE overhead and check
+ * whether max_desc_sz imposes a lower limit than max_sg; UD
+ * send has the biggest overhead.
+ */
+ mdev->limits.max_sg = min_t(int, dev_lim->max_sg,
+ (dev_lim->max_desc_sz -
+ sizeof (struct mthca_next_seg) -
+ (mthca_is_memfree(mdev) ?
+ sizeof (struct mthca_arbel_ud_seg) :
+ sizeof (struct mthca_tavor_ud_seg))) /
+ sizeof (struct mthca_data_seg));
+ mdev->limits.max_wqes = dev_lim->max_qp_sz;
+ mdev->limits.max_qp_init_rdma = dev_lim->max_requester_per_qp;
+ mdev->limits.reserved_qps = dev_lim->reserved_qps;
+ mdev->limits.max_srq_wqes = dev_lim->max_srq_sz;
+ mdev->limits.reserved_srqs = dev_lim->reserved_srqs;
+ mdev->limits.reserved_eecs = dev_lim->reserved_eecs;
+ mdev->limits.max_desc_sz = dev_lim->max_desc_sz;
+ mdev->limits.max_srq_sge = mthca_max_srq_sge(mdev);
+ /*
+ * Subtract 1 from the limit because we need to allocate a
+ * spare CQE so the HCA HW can tell the difference between an
+ * empty CQ and a full CQ.
+ */
+ mdev->limits.max_cqes = dev_lim->max_cq_sz - 1;
+ mdev->limits.reserved_cqs = dev_lim->reserved_cqs;
+ mdev->limits.reserved_eqs = dev_lim->reserved_eqs;
+ mdev->limits.reserved_mtts = dev_lim->reserved_mtts;
+ mdev->limits.reserved_mrws = dev_lim->reserved_mrws;
+ mdev->limits.reserved_uars = dev_lim->reserved_uars;
+ mdev->limits.reserved_pds = dev_lim->reserved_pds;
+ mdev->limits.port_width_cap = dev_lim->max_port_width;
+ mdev->limits.page_size_cap = ~(u32) (dev_lim->min_page_sz - 1);
+ mdev->limits.flags = dev_lim->flags;
+ /*
+ * For old FW that doesn't return static rate support, use a
+ * value of 0x3 (only static rate values of 0 or 1 are handled),
+ * except on Sinai, where even old FW can handle static rate
+ * values of 2 and 3.
+ */
+ if (dev_lim->stat_rate_support)
+ mdev->limits.stat_rate_support = dev_lim->stat_rate_support;
+ else if (mdev->mthca_flags & MTHCA_FLAG_SINAI_OPT)
+ mdev->limits.stat_rate_support = 0xf;
+ else
+ mdev->limits.stat_rate_support = 0x3;
+
+ /* IB_DEVICE_RESIZE_MAX_WR not supported by driver.
+ May be doable since hardware supports it for SRQ.
+
+ IB_DEVICE_N_NOTIFY_CQ is supported by hardware but not by driver.
+
+ IB_DEVICE_SRQ_RESIZE is supported by hardware but SRQ is not
+ supported by driver. */
+ mdev->device_cap_flags = IB_DEVICE_CHANGE_PHY_PORT |
+ IB_DEVICE_PORT_ACTIVE_EVENT |
+ IB_DEVICE_SYS_IMAGE_GUID |
+ IB_DEVICE_RC_RNR_NAK_GEN;
+
+ if (dev_lim->flags & DEV_LIM_FLAG_BAD_PKEY_CNTR)
+ mdev->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR;
+
+ if (dev_lim->flags & DEV_LIM_FLAG_BAD_QKEY_CNTR)
+ mdev->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR;
+
+ if (dev_lim->flags & DEV_LIM_FLAG_RAW_MULTI)
+ mdev->device_cap_flags |= IB_DEVICE_RAW_MULTI;
+
+ if (dev_lim->flags & DEV_LIM_FLAG_AUTO_PATH_MIG)
+ mdev->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG;
+
+ if (dev_lim->flags & DEV_LIM_FLAG_UD_AV_PORT_ENFORCE)
+ mdev->device_cap_flags |= IB_DEVICE_UD_AV_PORT_ENFORCE;
+
+ if (dev_lim->flags & DEV_LIM_FLAG_SRQ)
+ mdev->mthca_flags |= MTHCA_FLAG_SRQ;
+
+ if (mthca_is_memfree(mdev))
+ if (dev_lim->flags & DEV_LIM_FLAG_IPOIB_CSUM)
+ mdev->device_cap_flags |= IB_DEVICE_UD_IP_CSUM;
+
+ return 0;
+}
+
+static int mthca_init_tavor(struct mthca_dev *mdev)
+{
+ s64 size;
+ u8 status;
+ int err;
+ struct mthca_dev_lim dev_lim;
+ struct mthca_profile profile;
+ struct mthca_init_hca_param init_hca;
+
+ err = mthca_SYS_EN(mdev, &status);
+ if (err) {
+ mthca_err(mdev, "SYS_EN command failed, aborting.\n");
+ return err;
+ }
+ if (status) {
+ mthca_err(mdev, "SYS_EN returned status 0x%02x, "
+ "aborting.\n", status);
+ return -EINVAL;
+ }
+
+ err = mthca_QUERY_FW(mdev, &status);
+ if (err) {
+ mthca_err(mdev, "QUERY_FW command failed, aborting.\n");
+ goto err_disable;
+ }
+ if (status) {
+ mthca_err(mdev, "QUERY_FW returned status 0x%02x, "
+ "aborting.\n", status);
+ err = -EINVAL;
+ goto err_disable;
+ }
+ err = mthca_QUERY_DDR(mdev, &status);
+ if (err) {
+ mthca_err(mdev, "QUERY_DDR command failed, aborting.\n");
+ goto err_disable;
+ }
+ if (status) {
+ mthca_err(mdev, "QUERY_DDR returned status 0x%02x, "
+ "aborting.\n", status);
+ err = -EINVAL;
+ goto err_disable;
+ }
+
+ err = mthca_dev_lim(mdev, &dev_lim);
+ if (err) {
+ mthca_err(mdev, "QUERY_DEV_LIM command failed, aborting.\n");
+ goto err_disable;
+ }
+
+ profile = hca_profile;
+ profile.num_uar = dev_lim.uar_size / PAGE_SIZE;
+ profile.uarc_size = 0;
+ if (mdev->mthca_flags & MTHCA_FLAG_SRQ)
+ profile.num_srq = dev_lim.max_srqs;
+
+ size = mthca_make_profile(mdev, &profile, &dev_lim, &init_hca);
+ if (size < 0) {
+ err = size;
+ goto err_disable;
+ }
+
+ err = mthca_INIT_HCA(mdev, &init_hca, &status);
+ if (err) {
+ mthca_err(mdev, "INIT_HCA command failed, aborting.\n");
+ goto err_disable;
+ }
+ if (status) {
+ mthca_err(mdev, "INIT_HCA returned status 0x%02x, "
+ "aborting.\n", status);
+ err = -EINVAL;
+ goto err_disable;
+ }
+
+ return 0;
+
+err_disable:
+ mthca_SYS_DIS(mdev, &status);
+
+ return err;
+}
+
+static int mthca_load_fw(struct mthca_dev *mdev)
+{
+ u8 status;
+ int err;
+
+ /* FIXME: use HCA-attached memory for FW if present */
+
+ mdev->fw.arbel.fw_icm =
+ mthca_alloc_icm(mdev, mdev->fw.arbel.fw_pages,
+ GFP_HIGHUSER | __GFP_NOWARN, 0);
+ if (!mdev->fw.arbel.fw_icm) {
+ mthca_err(mdev, "Couldn't allocate FW area, aborting.\n");
+ return -ENOMEM;
+ }
+
+ err = mthca_MAP_FA(mdev, mdev->fw.arbel.fw_icm, &status);
+ if (err) {
+ mthca_err(mdev, "MAP_FA command failed, aborting.\n");
+ goto err_free;
+ }
+ if (status) {
+ mthca_err(mdev, "MAP_FA returned status 0x%02x, aborting.\n", status);
+ err = -EINVAL;
+ goto err_free;
+ }
+ err = mthca_RUN_FW(mdev, &status);
+ if (err) {
+ mthca_err(mdev, "RUN_FW command failed, aborting.\n");
+ goto err_unmap_fa;
+ }
+ if (status) {
+ mthca_err(mdev, "RUN_FW returned status 0x%02x, aborting.\n", status);
+ err = -EINVAL;
+ goto err_unmap_fa;
+ }
+
+ return 0;
+
+err_unmap_fa:
+ mthca_UNMAP_FA(mdev, &status);
+
+err_free:
+ mthca_free_icm(mdev, mdev->fw.arbel.fw_icm, 0);
+ return err;
+}
+
+static int mthca_init_icm(struct mthca_dev *mdev,
+ struct mthca_dev_lim *dev_lim,
+ struct mthca_init_hca_param *init_hca,
+ u64 icm_size)
+{
+ u64 aux_pages;
+ u8 status;
+ int err;
+
+ err = mthca_SET_ICM_SIZE(mdev, icm_size, &aux_pages, &status);
+ if (err) {
+ mthca_err(mdev, "SET_ICM_SIZE command failed, aborting.\n");
+ return err;
+ }
+ if (status) {
+ mthca_err(mdev, "SET_ICM_SIZE returned status 0x%02x, "
+ "aborting.\n", status);
+ return -EINVAL;
+ }
+
+ mthca_dbg(mdev, "%lld KB of HCA context requires %lld KB aux memory.\n",
+ (unsigned long long) icm_size >> 10,
+ (unsigned long long) aux_pages << 2);
+
+ mdev->fw.arbel.aux_icm = mthca_alloc_icm(mdev, aux_pages,
+ GFP_HIGHUSER | __GFP_NOWARN, 0);
+ if (!mdev->fw.arbel.aux_icm) {
+ mthca_err(mdev, "Couldn't allocate aux memory, aborting.\n");
+ return -ENOMEM;
+ }
+
+ err = mthca_MAP_ICM_AUX(mdev, mdev->fw.arbel.aux_icm, &status);
+ if (err) {
+ mthca_err(mdev, "MAP_ICM_AUX command failed, aborting.\n");
+ goto err_free_aux;
+ }
+ if (status) {
+ mthca_err(mdev, "MAP_ICM_AUX returned status 0x%02x, aborting.\n", status);
+ err = -EINVAL;
+ goto err_free_aux;
+ }
+
+ err = mthca_map_eq_icm(mdev, init_hca->eqc_base);
+ if (err) {
+ mthca_err(mdev, "Failed to map EQ context memory, aborting.\n");
+ goto err_unmap_aux;
+ }
+
+ /* CPU writes to non-reserved MTTs, while HCA might DMA to reserved mtts */
+ mdev->limits.reserved_mtts = ALIGN(mdev->limits.reserved_mtts * MTHCA_MTT_SEG_SIZE,
+ dma_get_cache_alignment()) / MTHCA_MTT_SEG_SIZE;
+
+ mdev->mr_table.mtt_table = mthca_alloc_icm_table(mdev, init_hca->mtt_base,
+ MTHCA_MTT_SEG_SIZE,
+ mdev->limits.num_mtt_segs,
+ mdev->limits.reserved_mtts,
+ 1, 0);
+ if (!mdev->mr_table.mtt_table) {
+ mthca_err(mdev, "Failed to map MTT context memory, aborting.\n");
+ err = -ENOMEM;
+ goto err_unmap_eq;
+ }
+
+ mdev->mr_table.mpt_table = mthca_alloc_icm_table(mdev, init_hca->mpt_base,
+ dev_lim->mpt_entry_sz,
+ mdev->limits.num_mpts,
+ mdev->limits.reserved_mrws,
+ 1, 1);
+ if (!mdev->mr_table.mpt_table) {
+ mthca_err(mdev, "Failed to map MPT context memory, aborting.\n");
+ err = -ENOMEM;
+ goto err_unmap_mtt;
+ }
+
+ mdev->qp_table.qp_table = mthca_alloc_icm_table(mdev, init_hca->qpc_base,
+ dev_lim->qpc_entry_sz,
+ mdev->limits.num_qps,
+ mdev->limits.reserved_qps,
+ 0, 0);
+ if (!mdev->qp_table.qp_table) {
+ mthca_err(mdev, "Failed to map QP context memory, aborting.\n");
+ err = -ENOMEM;
+ goto err_unmap_mpt;
+ }
+
+ mdev->qp_table.eqp_table = mthca_alloc_icm_table(mdev, init_hca->eqpc_base,
+ dev_lim->eqpc_entry_sz,
+ mdev->limits.num_qps,
+ mdev->limits.reserved_qps,
+ 0, 0);
+ if (!mdev->qp_table.eqp_table) {
+ mthca_err(mdev, "Failed to map EQP context memory, aborting.\n");
+ err = -ENOMEM;
+ goto err_unmap_qp;
+ }
+
+ mdev->qp_table.rdb_table = mthca_alloc_icm_table(mdev, init_hca->rdb_base,
+ MTHCA_RDB_ENTRY_SIZE,
+ mdev->limits.num_qps <<
+ mdev->qp_table.rdb_shift, 0,
+ 0, 0);
+ if (!mdev->qp_table.rdb_table) {
+ mthca_err(mdev, "Failed to map RDB context memory, aborting\n");
+ err = -ENOMEM;
+ goto err_unmap_eqp;
+ }
+
+ mdev->cq_table.table = mthca_alloc_icm_table(mdev, init_hca->cqc_base,
+ dev_lim->cqc_entry_sz,
+ mdev->limits.num_cqs,
+ mdev->limits.reserved_cqs,
+ 0, 0);
+ if (!mdev->cq_table.table) {
+ mthca_err(mdev, "Failed to map CQ context memory, aborting.\n");
+ err = -ENOMEM;
+ goto err_unmap_rdb;
+ }
+
+ if (mdev->mthca_flags & MTHCA_FLAG_SRQ) {
+ mdev->srq_table.table =
+ mthca_alloc_icm_table(mdev, init_hca->srqc_base,
+ dev_lim->srq_entry_sz,
+ mdev->limits.num_srqs,
+ mdev->limits.reserved_srqs,
+ 0, 0);
+ if (!mdev->srq_table.table) {
+ mthca_err(mdev, "Failed to map SRQ context memory, "
+ "aborting.\n");
+ err = -ENOMEM;
+ goto err_unmap_cq;
+ }
+ }
+
+ /*
+ * It's not strictly required, but for simplicity just map the
+ * whole multicast group table now. The table isn't very big
+ * and it's a lot easier than trying to track ref counts.
+ */
+ mdev->mcg_table.table = mthca_alloc_icm_table(mdev, init_hca->mc_base,
+ MTHCA_MGM_ENTRY_SIZE,
+ mdev->limits.num_mgms +
+ mdev->limits.num_amgms,
+ mdev->limits.num_mgms +
+ mdev->limits.num_amgms,
+ 0, 0);
+ if (!mdev->mcg_table.table) {
+ mthca_err(mdev, "Failed to map MCG context memory, aborting.\n");
+ err = -ENOMEM;
+ goto err_unmap_srq;
+ }
+
+ return 0;
+
+err_unmap_srq:
+ if (mdev->mthca_flags & MTHCA_FLAG_SRQ)
+ mthca_free_icm_table(mdev, mdev->srq_table.table);
+
+err_unmap_cq:
+ mthca_free_icm_table(mdev, mdev->cq_table.table);
+
+err_unmap_rdb:
+ mthca_free_icm_table(mdev, mdev->qp_table.rdb_table);
+
+err_unmap_eqp:
+ mthca_free_icm_table(mdev, mdev->qp_table.eqp_table);
+
+err_unmap_qp:
+ mthca_free_icm_table(mdev, mdev->qp_table.qp_table);
+
+err_unmap_mpt:
+ mthca_free_icm_table(mdev, mdev->mr_table.mpt_table);
+
+err_unmap_mtt:
+ mthca_free_icm_table(mdev, mdev->mr_table.mtt_table);
+
+err_unmap_eq:
+ mthca_unmap_eq_icm(mdev);
+
+err_unmap_aux:
+ mthca_UNMAP_ICM_AUX(mdev, &status);
+
+err_free_aux:
+ mthca_free_icm(mdev, mdev->fw.arbel.aux_icm, 0);
+
+ return err;
+}
+
+static void mthca_free_icms(struct mthca_dev *mdev)
+{
+ u8 status;
+
+ mthca_free_icm_table(mdev, mdev->mcg_table.table);
+ if (mdev->mthca_flags & MTHCA_FLAG_SRQ)
+ mthca_free_icm_table(mdev, mdev->srq_table.table);
+ mthca_free_icm_table(mdev, mdev->cq_table.table);
+ mthca_free_icm_table(mdev, mdev->qp_table.rdb_table);
+ mthca_free_icm_table(mdev, mdev->qp_table.eqp_table);
+ mthca_free_icm_table(mdev, mdev->qp_table.qp_table);
+ mthca_free_icm_table(mdev, mdev->mr_table.mpt_table);
+ mthca_free_icm_table(mdev, mdev->mr_table.mtt_table);
+ mthca_unmap_eq_icm(mdev);
+
+ mthca_UNMAP_ICM_AUX(mdev, &status);
+ mthca_free_icm(mdev, mdev->fw.arbel.aux_icm, 0);
+}
+
+static int mthca_init_arbel(struct mthca_dev *mdev)
+{
+ struct mthca_dev_lim dev_lim;
+ struct mthca_profile profile;
+ struct mthca_init_hca_param init_hca;
+ s64 icm_size;
+ u8 status;
+ int err;
+
+ err = mthca_QUERY_FW(mdev, &status);
+ if (err) {
+ mthca_err(mdev, "QUERY_FW command failed, aborting.\n");
+ return err;
+ }
+ if (status) {
+ mthca_err(mdev, "QUERY_FW returned status 0x%02x, "
+ "aborting.\n", status);
+ return -EINVAL;
+ }
+
+ err = mthca_ENABLE_LAM(mdev, &status);
+ if (err) {
+ mthca_err(mdev, "ENABLE_LAM command failed, aborting.\n");
+ return err;
+ }
+ if (status == MTHCA_CMD_STAT_LAM_NOT_PRE) {
+ mthca_dbg(mdev, "No HCA-attached memory (running in MemFree mode)\n");
+ mdev->mthca_flags |= MTHCA_FLAG_NO_LAM;
+ } else if (status) {
+ mthca_err(mdev, "ENABLE_LAM returned status 0x%02x, "
+ "aborting.\n", status);
+ return -EINVAL;
+ }
+
+ err = mthca_load_fw(mdev);
+ if (err) {
+ mthca_err(mdev, "Failed to start FW, aborting.\n");
+ goto err_disable;
+ }
+
+ err = mthca_dev_lim(mdev, &dev_lim);
+ if (err) {
+ mthca_err(mdev, "QUERY_DEV_LIM command failed, aborting.\n");
+ goto err_stop_fw;
+ }
+
+ profile = hca_profile;
+ profile.num_uar = dev_lim.uar_size / PAGE_SIZE;
+ profile.num_udav = 0;
+ if (mdev->mthca_flags & MTHCA_FLAG_SRQ)
+ profile.num_srq = dev_lim.max_srqs;
+
+ icm_size = mthca_make_profile(mdev, &profile, &dev_lim, &init_hca);
+ if (icm_size < 0) {
+ err = icm_size;
+ goto err_stop_fw;
+ }
+
+ err = mthca_init_icm(mdev, &dev_lim, &init_hca, icm_size);
+ if (err)
+ goto err_stop_fw;
+
+ err = mthca_INIT_HCA(mdev, &init_hca, &status);
+ if (err) {
+ mthca_err(mdev, "INIT_HCA command failed, aborting.\n");
+ goto err_free_icm;
+ }
+ if (status) {
+ mthca_err(mdev, "INIT_HCA returned status 0x%02x, "
+ "aborting.\n", status);
+ err = -EINVAL;
+ goto err_free_icm;
+ }
+
+ return 0;
+
+err_free_icm:
+ mthca_free_icms(mdev);
+
+err_stop_fw:
+ mthca_UNMAP_FA(mdev, &status);
+ mthca_free_icm(mdev, mdev->fw.arbel.fw_icm, 0);
+
+err_disable:
+ if (!(mdev->mthca_flags & MTHCA_FLAG_NO_LAM))
+ mthca_DISABLE_LAM(mdev, &status);
+
+ return err;
+}
+
+static void mthca_close_hca(struct mthca_dev *mdev)
+{
+ u8 status;
+
+ mthca_CLOSE_HCA(mdev, 0, &status);
+
+ if (mthca_is_memfree(mdev)) {
+ mthca_free_icms(mdev);
+
+ mthca_UNMAP_FA(mdev, &status);
+ mthca_free_icm(mdev, mdev->fw.arbel.fw_icm, 0);
+
+ if (!(mdev->mthca_flags & MTHCA_FLAG_NO_LAM))
+ mthca_DISABLE_LAM(mdev, &status);
+ } else
+ mthca_SYS_DIS(mdev, &status);
+}
+
+static int mthca_init_hca(struct mthca_dev *mdev)
+{
+ u8 status;
+ int err;
+ struct mthca_adapter adapter;
+
+ if (mthca_is_memfree(mdev))
+ err = mthca_init_arbel(mdev);
+ else
+ err = mthca_init_tavor(mdev);
+
+ if (err)
+ return err;
+
+ err = mthca_QUERY_ADAPTER(mdev, &adapter, &status);
+ if (err) {
+ mthca_err(mdev, "QUERY_ADAPTER command failed, aborting.\n");
+ goto err_close;
+ }
+ if (status) {
+ mthca_err(mdev, "QUERY_ADAPTER returned status 0x%02x, "
+ "aborting.\n", status);
+ err = -EINVAL;
+ goto err_close;
+ }
+
+ mdev->eq_table.inta_pin = adapter.inta_pin;
+ if (!mthca_is_memfree(mdev))
+ mdev->rev_id = adapter.revision_id;
+ memcpy(mdev->board_id, adapter.board_id, sizeof mdev->board_id);
+
+ return 0;
+
+err_close:
+ mthca_close_hca(mdev);
+ return err;
+}
+
+static int mthca_setup_hca(struct mthca_dev *dev)
+{
+ int err;
+ u8 status;
+
+ MTHCA_INIT_DOORBELL_LOCK(&dev->doorbell_lock);
+
+ err = mthca_init_uar_table(dev);
+ if (err) {
+ mthca_err(dev, "Failed to initialize "
+ "user access region table, aborting.\n");
+ return err;
+ }
+
+ err = mthca_uar_alloc(dev, &dev->driver_uar);
+ if (err) {
+ mthca_err(dev, "Failed to allocate driver access region, "
+ "aborting.\n");
+ goto err_uar_table_free;
+ }
+
+ dev->kar = ioremap(dev->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE);
+ if (!dev->kar) {
+ mthca_err(dev, "Couldn't map kernel access region, "
+ "aborting.\n");
+ err = -ENOMEM;
+ goto err_uar_free;
+ }
+
+ err = mthca_init_pd_table(dev);
+ if (err) {
+ mthca_err(dev, "Failed to initialize "
+ "protection domain table, aborting.\n");
+ goto err_kar_unmap;
+ }
+
+ err = mthca_init_mr_table(dev);
+ if (err) {
+ mthca_err(dev, "Failed to initialize "
+ "memory region table, aborting.\n");
+ goto err_pd_table_free;
+ }
+
+ err = mthca_pd_alloc(dev, 1, &dev->driver_pd);
+ if (err) {
+ mthca_err(dev, "Failed to create driver PD, "
+ "aborting.\n");
+ goto err_mr_table_free;
+ }
+
+ err = mthca_init_eq_table(dev);
+ if (err) {
+ mthca_err(dev, "Failed to initialize "
+ "event queue table, aborting.\n");
+ goto err_pd_free;
+ }
+
+ err = mthca_cmd_use_events(dev);
+ if (err) {
+ mthca_err(dev, "Failed to switch to event-driven "
+ "firmware commands, aborting.\n");
+ goto err_eq_table_free;
+ }
+
+ err = mthca_NOP(dev, &status);
+ if (err || status) {
+ if (dev->mthca_flags & MTHCA_FLAG_MSI_X) {
+ mthca_warn(dev, "NOP command failed to generate interrupt "
+ "(IRQ %d).\n",
+ dev->eq_table.eq[MTHCA_EQ_CMD].msi_x_vector);
+ mthca_warn(dev, "Trying again with MSI-X disabled.\n");
+ } else {
+ mthca_err(dev, "NOP command failed to generate interrupt "
+ "(IRQ %d), aborting.\n",
+ dev->pdev->irq);
+ mthca_err(dev, "BIOS or ACPI interrupt routing problem?\n");
+ }
+
+ goto err_cmd_poll;
+ }
+
+ mthca_dbg(dev, "NOP command IRQ test passed\n");
+
+ err = mthca_init_cq_table(dev);
+ if (err) {
+ mthca_err(dev, "Failed to initialize "
+ "completion queue table, aborting.\n");
+ goto err_cmd_poll;
+ }
+
+ err = mthca_init_srq_table(dev);
+ if (err) {
+ mthca_err(dev, "Failed to initialize "
+ "shared receive queue table, aborting.\n");
+ goto err_cq_table_free;
+ }
+
+ err = mthca_init_qp_table(dev);
+ if (err) {
+ mthca_err(dev, "Failed to initialize "
+ "queue pair table, aborting.\n");
+ goto err_srq_table_free;
+ }
+
+ err = mthca_init_av_table(dev);
+ if (err) {
+ mthca_err(dev, "Failed to initialize "
+ "address vector table, aborting.\n");
+ goto err_qp_table_free;
+ }
+
+ err = mthca_init_mcg_table(dev);
+ if (err) {
+ mthca_err(dev, "Failed to initialize "
+ "multicast group table, aborting.\n");
+ goto err_av_table_free;
+ }
+
+ return 0;
+
+err_av_table_free:
+ mthca_cleanup_av_table(dev);
+
+err_qp_table_free:
+ mthca_cleanup_qp_table(dev);
+
+err_srq_table_free:
+ mthca_cleanup_srq_table(dev);
+
+err_cq_table_free:
+ mthca_cleanup_cq_table(dev);
+
+err_cmd_poll:
+ mthca_cmd_use_polling(dev);
+
+err_eq_table_free:
+ mthca_cleanup_eq_table(dev);
+
+err_pd_free:
+ mthca_pd_free(dev, &dev->driver_pd);
+
+err_mr_table_free:
+ mthca_cleanup_mr_table(dev);
+
+err_pd_table_free:
+ mthca_cleanup_pd_table(dev);
+
+err_kar_unmap:
+ iounmap(dev->kar);
+
+err_uar_free:
+ mthca_uar_free(dev, &dev->driver_uar);
+
+err_uar_table_free:
+ mthca_cleanup_uar_table(dev);
+ return err;
+}
+
+static int mthca_enable_msi_x(struct mthca_dev *mdev)
+{
+ struct msix_entry entries[3];
+ int err;
+
+ entries[0].entry = 0;
+ entries[1].entry = 1;
+ entries[2].entry = 2;
+
+ err = pci_enable_msix(mdev->pdev, entries, ARRAY_SIZE(entries));
+ if (err) {
+ if (err > 0)
+ mthca_info(mdev, "Only %d MSI-X vectors available, "
+ "not using MSI-X\n", err);
+ return err;
+ }
+
+ mdev->eq_table.eq[MTHCA_EQ_COMP ].msi_x_vector = entries[0].vector;
+ mdev->eq_table.eq[MTHCA_EQ_ASYNC].msi_x_vector = entries[1].vector;
+ mdev->eq_table.eq[MTHCA_EQ_CMD ].msi_x_vector = entries[2].vector;
+
+ return 0;
+}
+
+/* Types of supported HCA */
+enum {
+ TAVOR, /* MT23108 */
+ ARBEL_COMPAT, /* MT25208 in Tavor compat mode */
+ ARBEL_NATIVE, /* MT25208 with extended features */
+ SINAI /* MT25204 */
+};
+
+#define MTHCA_FW_VER(major, minor, subminor) \
+ (((u64) (major) << 32) | ((u64) (minor) << 16) | (u64) (subminor))
+
+static struct {
+ u64 latest_fw;
+ u32 flags;
+} mthca_hca_table[] = {
+ [TAVOR] = { .latest_fw = MTHCA_FW_VER(3, 5, 0),
+ .flags = 0 },
+ [ARBEL_COMPAT] = { .latest_fw = MTHCA_FW_VER(4, 8, 200),
+ .flags = MTHCA_FLAG_PCIE },
+ [ARBEL_NATIVE] = { .latest_fw = MTHCA_FW_VER(5, 3, 0),
+ .flags = MTHCA_FLAG_MEMFREE |
+ MTHCA_FLAG_PCIE },
+ [SINAI] = { .latest_fw = MTHCA_FW_VER(1, 2, 0),
+ .flags = MTHCA_FLAG_MEMFREE |
+ MTHCA_FLAG_PCIE |
+ MTHCA_FLAG_SINAI_OPT }
+};
+
+static int __mthca_init_one(struct pci_dev *pdev, int hca_type)
+{
+ int ddr_hidden = 0;
+ int err;
+ struct mthca_dev *mdev;
+
+ printk(KERN_INFO PFX "Initializing %s\n",
+ pci_name(pdev));
+
+ err = pci_enable_device(pdev);
+ if (err) {
+ dev_err(&pdev->dev, "Cannot enable PCI device, "
+ "aborting.\n");
+ return err;
+ }
+
+ /*
+ * Check for BARs. We expect 0: 1MB, 2: 8MB, 4: DDR (may not
+ * be present)
+ */
+ if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM) ||
+ pci_resource_len(pdev, 0) != 1 << 20) {
+ dev_err(&pdev->dev, "Missing DCS, aborting.\n");
+ err = -ENODEV;
+ goto err_disable_pdev;
+ }
+ if (!(pci_resource_flags(pdev, 2) & IORESOURCE_MEM)) {
+ dev_err(&pdev->dev, "Missing UAR, aborting.\n");
+ err = -ENODEV;
+ goto err_disable_pdev;
+ }
+ if (!(pci_resource_flags(pdev, 4) & IORESOURCE_MEM))
+ ddr_hidden = 1;
+
+ err = pci_request_regions(pdev, DRV_NAME);
+ if (err) {
+ dev_err(&pdev->dev, "Cannot obtain PCI resources, "
+ "aborting.\n");
+ goto err_disable_pdev;
+ }
+
+ pci_set_master(pdev);
+
+ err = pci_set_dma_mask(pdev, DMA_64BIT_MASK);
+ if (err) {
+ dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask.\n");
+ err = pci_set_dma_mask(pdev, DMA_32BIT_MASK);
+ if (err) {
+ dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting.\n");
+ goto err_free_res;
+ }
+ }
+ err = pci_set_consistent_dma_mask(pdev, DMA_64BIT_MASK);
+ if (err) {
+ dev_warn(&pdev->dev, "Warning: couldn't set 64-bit "
+ "consistent PCI DMA mask.\n");
+ err = pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK);
+ if (err) {
+ dev_err(&pdev->dev, "Can't set consistent PCI DMA mask, "
+ "aborting.\n");
+ goto err_free_res;
+ }
+ }
+
+ mdev = (struct mthca_dev *) ib_alloc_device(sizeof *mdev);
+ if (!mdev) {
+ dev_err(&pdev->dev, "Device struct alloc failed, "
+ "aborting.\n");
+ err = -ENOMEM;
+ goto err_free_res;
+ }
+
+ mdev->pdev = pdev;
+
+ mdev->mthca_flags = mthca_hca_table[hca_type].flags;
+ if (ddr_hidden)
+ mdev->mthca_flags |= MTHCA_FLAG_DDR_HIDDEN;
+
+ /*
+ * Now reset the HCA before we touch the PCI capabilities or
+ * attempt a firmware command, since a boot ROM may have left
+ * the HCA in an undefined state.
+ */
+ err = mthca_reset(mdev);
+ if (err) {
+ mthca_err(mdev, "Failed to reset HCA, aborting.\n");
+ goto err_free_dev;
+ }
+
+ if (mthca_cmd_init(mdev)) {
+ mthca_err(mdev, "Failed to init command interface, aborting.\n");
+ goto err_free_dev;
+ }
+
+ err = mthca_tune_pci(mdev);
+ if (err)
+ goto err_cmd;
+
+ err = mthca_init_hca(mdev);
+ if (err)
+ goto err_cmd;
+
+ if (mdev->fw_ver < mthca_hca_table[hca_type].latest_fw) {
+ mthca_warn(mdev, "HCA FW version %d.%d.%03d is old (%d.%d.%03d is current).\n",
+ (int) (mdev->fw_ver >> 32), (int) (mdev->fw_ver >> 16) & 0xffff,
+ (int) (mdev->fw_ver & 0xffff),
+ (int) (mthca_hca_table[hca_type].latest_fw >> 32),
+ (int) (mthca_hca_table[hca_type].latest_fw >> 16) & 0xffff,
+ (int) (mthca_hca_table[hca_type].latest_fw & 0xffff));
+ mthca_warn(mdev, "If you have problems, try updating your HCA FW.\n");
+ }
+
+ if (msi_x && !mthca_enable_msi_x(mdev))
+ mdev->mthca_flags |= MTHCA_FLAG_MSI_X;
+
+ err = mthca_setup_hca(mdev);
+ if (err == -EBUSY && (mdev->mthca_flags & MTHCA_FLAG_MSI_X)) {
+ if (mdev->mthca_flags & MTHCA_FLAG_MSI_X)
+ pci_disable_msix(pdev);
+ mdev->mthca_flags &= ~MTHCA_FLAG_MSI_X;
+
+ err = mthca_setup_hca(mdev);
+ }
+
+ if (err)
+ goto err_close;
+
+ err = mthca_register_device(mdev);
+ if (err)
+ goto err_cleanup;
+
+ err = mthca_create_agents(mdev);
+ if (err)
+ goto err_unregister;
+
+ pci_set_drvdata(pdev, mdev);
+ mdev->hca_type = hca_type;
+
+ return 0;
+
+err_unregister:
+ mthca_unregister_device(mdev);
+
+err_cleanup:
+ mthca_cleanup_mcg_table(mdev);
+ mthca_cleanup_av_table(mdev);
+ mthca_cleanup_qp_table(mdev);
+ mthca_cleanup_srq_table(mdev);
+ mthca_cleanup_cq_table(mdev);
+ mthca_cmd_use_polling(mdev);
+ mthca_cleanup_eq_table(mdev);
+
+ mthca_pd_free(mdev, &mdev->driver_pd);
+
+ mthca_cleanup_mr_table(mdev);
+ mthca_cleanup_pd_table(mdev);
+ mthca_cleanup_uar_table(mdev);
+
+err_close:
+ if (mdev->mthca_flags & MTHCA_FLAG_MSI_X)
+ pci_disable_msix(pdev);
+
+ mthca_close_hca(mdev);
+
+err_cmd:
+ mthca_cmd_cleanup(mdev);
+
+err_free_dev:
+ ib_dealloc_device(&mdev->ib_dev);
+
+err_free_res:
+ pci_release_regions(pdev);
+
+err_disable_pdev:
+ pci_disable_device(pdev);
+ pci_set_drvdata(pdev, NULL);
+ return err;
+}
+
+static void __mthca_remove_one(struct pci_dev *pdev)
+{
+ struct mthca_dev *mdev = pci_get_drvdata(pdev);
+ u8 status;
+ int p;
+
+ if (mdev) {
+ mthca_free_agents(mdev);
+ mthca_unregister_device(mdev);
+
+ for (p = 1; p <= mdev->limits.num_ports; ++p)
+ mthca_CLOSE_IB(mdev, p, &status);
+
+ mthca_cleanup_mcg_table(mdev);
+ mthca_cleanup_av_table(mdev);
+ mthca_cleanup_qp_table(mdev);
+ mthca_cleanup_srq_table(mdev);
+ mthca_cleanup_cq_table(mdev);
+ mthca_cmd_use_polling(mdev);
+ mthca_cleanup_eq_table(mdev);
+
+ mthca_pd_free(mdev, &mdev->driver_pd);
+
+ mthca_cleanup_mr_table(mdev);
+ mthca_cleanup_pd_table(mdev);
+
+ iounmap(mdev->kar);
+ mthca_uar_free(mdev, &mdev->driver_uar);
+ mthca_cleanup_uar_table(mdev);
+ mthca_close_hca(mdev);
+ mthca_cmd_cleanup(mdev);
+
+ if (mdev->mthca_flags & MTHCA_FLAG_MSI_X)
+ pci_disable_msix(pdev);
+
+ ib_dealloc_device(&mdev->ib_dev);
+ pci_release_regions(pdev);
+ pci_disable_device(pdev);
+ pci_set_drvdata(pdev, NULL);
+ }
+}
+
+int __mthca_restart_one(struct pci_dev *pdev)
+{
+ struct mthca_dev *mdev;
+ int hca_type;
+
+ mdev = pci_get_drvdata(pdev);
+ if (!mdev)
+ return -ENODEV;
+ hca_type = mdev->hca_type;
+ __mthca_remove_one(pdev);
+ return __mthca_init_one(pdev, hca_type);
+}
+
+static int __devinit mthca_init_one(struct pci_dev *pdev,
+ const struct pci_device_id *id)
+{
+ static int mthca_version_printed = 0;
+ int ret;
+
+ mutex_lock(&mthca_device_mutex);
+
+ if (!mthca_version_printed) {
+ printk(KERN_INFO "%s", mthca_version);
+ ++mthca_version_printed;
+ }
+
+ if (id->driver_data >= ARRAY_SIZE(mthca_hca_table)) {
+ printk(KERN_ERR PFX "%s has invalid driver data %lx\n",
+ pci_name(pdev), id->driver_data);
+ mutex_unlock(&mthca_device_mutex);
+ return -ENODEV;
+ }
+
+ ret = __mthca_init_one(pdev, id->driver_data);
+
+ mutex_unlock(&mthca_device_mutex);
+
+ return ret;
+}
+
+static void __devexit mthca_remove_one(struct pci_dev *pdev)
+{
+ mutex_lock(&mthca_device_mutex);
+ __mthca_remove_one(pdev);
+ mutex_unlock(&mthca_device_mutex);
+}
+
+static struct pci_device_id mthca_pci_table[] = {
+ { PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_TAVOR),
+ .driver_data = TAVOR },
+ { PCI_DEVICE(PCI_VENDOR_ID_TOPSPIN, PCI_DEVICE_ID_MELLANOX_TAVOR),
+ .driver_data = TAVOR },
+ { PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_ARBEL_COMPAT),
+ .driver_data = ARBEL_COMPAT },
+ { PCI_DEVICE(PCI_VENDOR_ID_TOPSPIN, PCI_DEVICE_ID_MELLANOX_ARBEL_COMPAT),
+ .driver_data = ARBEL_COMPAT },
+ { PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_ARBEL),
+ .driver_data = ARBEL_NATIVE },
+ { PCI_DEVICE(PCI_VENDOR_ID_TOPSPIN, PCI_DEVICE_ID_MELLANOX_ARBEL),
+ .driver_data = ARBEL_NATIVE },
+ { PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_SINAI),
+ .driver_data = SINAI },
+ { PCI_DEVICE(PCI_VENDOR_ID_TOPSPIN, PCI_DEVICE_ID_MELLANOX_SINAI),
+ .driver_data = SINAI },
+ { PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_SINAI_OLD),
+ .driver_data = SINAI },
+ { PCI_DEVICE(PCI_VENDOR_ID_TOPSPIN, PCI_DEVICE_ID_MELLANOX_SINAI_OLD),
+ .driver_data = SINAI },
+ { 0, }
+};
+
+MODULE_DEVICE_TABLE(pci, mthca_pci_table);
+
+static struct pci_driver mthca_driver = {
+ .name = DRV_NAME,
+ .id_table = mthca_pci_table,
+ .probe = mthca_init_one,
+ .remove = __devexit_p(mthca_remove_one)
+};
+
+static void __init __mthca_check_profile_val(const char *name, int *pval,
+ int pval_default)
+{
+ /* value must be positive and power of 2 */
+ int old_pval = *pval;
+
+ if (old_pval <= 0)
+ *pval = pval_default;
+ else
+ *pval = roundup_pow_of_two(old_pval);
+
+ if (old_pval != *pval) {
+ printk(KERN_WARNING PFX "Invalid value %d for %s in module parameter.\n",
+ old_pval, name);
+ printk(KERN_WARNING PFX "Corrected %s to %d.\n", name, *pval);
+ }
+}
+
+#define mthca_check_profile_val(name, default) \
+ __mthca_check_profile_val(#name, &hca_profile.name, default)
+
+static void __init mthca_validate_profile(void)
+{
+ mthca_check_profile_val(num_qp, MTHCA_DEFAULT_NUM_QP);
+ mthca_check_profile_val(rdb_per_qp, MTHCA_DEFAULT_RDB_PER_QP);
+ mthca_check_profile_val(num_cq, MTHCA_DEFAULT_NUM_CQ);
+ mthca_check_profile_val(num_mcg, MTHCA_DEFAULT_NUM_MCG);
+ mthca_check_profile_val(num_mpt, MTHCA_DEFAULT_NUM_MPT);
+ mthca_check_profile_val(num_mtt, MTHCA_DEFAULT_NUM_MTT);
+ mthca_check_profile_val(num_udav, MTHCA_DEFAULT_NUM_UDAV);
+ mthca_check_profile_val(fmr_reserved_mtts, MTHCA_DEFAULT_NUM_RESERVED_MTTS);
+
+ if (hca_profile.fmr_reserved_mtts >= hca_profile.num_mtt) {
+ printk(KERN_WARNING PFX "Invalid fmr_reserved_mtts module parameter %d.\n",
+ hca_profile.fmr_reserved_mtts);
+ printk(KERN_WARNING PFX "(Must be smaller than num_mtt %d)\n",
+ hca_profile.num_mtt);
+ hca_profile.fmr_reserved_mtts = hca_profile.num_mtt / 2;
+ printk(KERN_WARNING PFX "Corrected fmr_reserved_mtts to %d.\n",
+ hca_profile.fmr_reserved_mtts);
+ }
+}
+
+static int __init mthca_init(void)
+{
+ int ret;
+
+ mthca_validate_profile();
+
+ ret = mthca_catas_init();
+ if (ret)
+ return ret;
+
+ ret = pci_register_driver(&mthca_driver);
+ if (ret < 0) {
+ mthca_catas_cleanup();
+ return ret;
+ }
+
+ return 0;
+}
+
+static void __exit mthca_cleanup(void)
+{
+ pci_unregister_driver(&mthca_driver);
+ mthca_catas_cleanup();
+}
+
+module_init(mthca_init);
+module_exit(mthca_cleanup);
diff --git a/drivers/infiniband/hw/mthca/mthca_mcg.c b/drivers/infiniband/hw/mthca/mthca_mcg.c
new file mode 100644
index 0000000..3f5f948
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_mcg.c
@@ -0,0 +1,391 @@
+/*
+ * Copyright (c) 2004 Topspin Communications. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/string.h>
+#include <linux/slab.h>
+
+#include "mthca_dev.h"
+#include "mthca_cmd.h"
+
+struct mthca_mgm {
+ __be32 next_gid_index;
+ u32 reserved[3];
+ u8 gid[16];
+ __be32 qp[MTHCA_QP_PER_MGM];
+};
+
+static const u8 zero_gid[16]; /* automatically initialized to 0 */
+
+/*
+ * Caller must hold MCG table semaphore. gid and mgm parameters must
+ * be properly aligned for command interface.
+ *
+ * Returns 0 unless a firmware command error occurs.
+ *
+ * If GID is found in MGM or MGM is empty, *index = *hash, *prev = -1
+ * and *mgm holds MGM entry.
+ *
+ * if GID is found in AMGM, *index = index in AMGM, *prev = index of
+ * previous entry in hash chain and *mgm holds AMGM entry.
+ *
+ * If no AMGM exists for given gid, *index = -1, *prev = index of last
+ * entry in hash chain and *mgm holds end of hash chain.
+ */
+static int find_mgm(struct mthca_dev *dev,
+ u8 *gid, struct mthca_mailbox *mgm_mailbox,
+ u16 *hash, int *prev, int *index)
+{
+ struct mthca_mailbox *mailbox;
+ struct mthca_mgm *mgm = mgm_mailbox->buf;
+ u8 *mgid;
+ int err;
+ u8 status;
+
+ mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(mailbox))
+ return -ENOMEM;
+ mgid = mailbox->buf;
+
+ memcpy(mgid, gid, 16);
+
+ err = mthca_MGID_HASH(dev, mailbox, hash, &status);
+ if (err)
+ goto out;
+ if (status) {
+ mthca_err(dev, "MGID_HASH returned status %02x\n", status);
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (0)
+ mthca_dbg(dev, "Hash for %04x:%04x:%04x:%04x:"
+ "%04x:%04x:%04x:%04x is %04x\n",
+ be16_to_cpu(((__be16 *) gid)[0]),
+ be16_to_cpu(((__be16 *) gid)[1]),
+ be16_to_cpu(((__be16 *) gid)[2]),
+ be16_to_cpu(((__be16 *) gid)[3]),
+ be16_to_cpu(((__be16 *) gid)[4]),
+ be16_to_cpu(((__be16 *) gid)[5]),
+ be16_to_cpu(((__be16 *) gid)[6]),
+ be16_to_cpu(((__be16 *) gid)[7]),
+ *hash);
+
+ *index = *hash;
+ *prev = -1;
+
+ do {
+ err = mthca_READ_MGM(dev, *index, mgm_mailbox, &status);
+ if (err)
+ goto out;
+ if (status) {
+ mthca_err(dev, "READ_MGM returned status %02x\n", status);
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (!memcmp(mgm->gid, zero_gid, 16)) {
+ if (*index != *hash) {
+ mthca_err(dev, "Found zero MGID in AMGM.\n");
+ err = -EINVAL;
+ }
+ goto out;
+ }
+
+ if (!memcmp(mgm->gid, gid, 16))
+ goto out;
+
+ *prev = *index;
+ *index = be32_to_cpu(mgm->next_gid_index) >> 6;
+ } while (*index);
+
+ *index = -1;
+
+ out:
+ mthca_free_mailbox(dev, mailbox);
+ return err;
+}
+
+int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
+{
+ struct mthca_dev *dev = to_mdev(ibqp->device);
+ struct mthca_mailbox *mailbox;
+ struct mthca_mgm *mgm;
+ u16 hash;
+ int index, prev;
+ int link = 0;
+ int i;
+ int err;
+ u8 status;
+
+ mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ mgm = mailbox->buf;
+
+ mutex_lock(&dev->mcg_table.mutex);
+
+ err = find_mgm(dev, gid->raw, mailbox, &hash, &prev, &index);
+ if (err)
+ goto out;
+
+ if (index != -1) {
+ if (!memcmp(mgm->gid, zero_gid, 16))
+ memcpy(mgm->gid, gid->raw, 16);
+ } else {
+ link = 1;
+
+ index = mthca_alloc(&dev->mcg_table.alloc);
+ if (index == -1) {
+ mthca_err(dev, "No AMGM entries left\n");
+ err = -ENOMEM;
+ goto out;
+ }
+
+ err = mthca_READ_MGM(dev, index, mailbox, &status);
+ if (err)
+ goto out;
+ if (status) {
+ mthca_err(dev, "READ_MGM returned status %02x\n", status);
+ err = -EINVAL;
+ goto out;
+ }
+ memset(mgm, 0, sizeof *mgm);
+ memcpy(mgm->gid, gid->raw, 16);
+ }
+
+ for (i = 0; i < MTHCA_QP_PER_MGM; ++i)
+ if (mgm->qp[i] == cpu_to_be32(ibqp->qp_num | (1 << 31))) {
+ mthca_dbg(dev, "QP %06x already a member of MGM\n",
+ ibqp->qp_num);
+ err = 0;
+ goto out;
+ } else if (!(mgm->qp[i] & cpu_to_be32(1 << 31))) {
+ mgm->qp[i] = cpu_to_be32(ibqp->qp_num | (1 << 31));
+ break;
+ }
+
+ if (i == MTHCA_QP_PER_MGM) {
+ mthca_err(dev, "MGM at index %x is full.\n", index);
+ err = -ENOMEM;
+ goto out;
+ }
+
+ err = mthca_WRITE_MGM(dev, index, mailbox, &status);
+ if (err)
+ goto out;
+ if (status) {
+ mthca_err(dev, "WRITE_MGM returned status %02x\n", status);
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (!link)
+ goto out;
+
+ err = mthca_READ_MGM(dev, prev, mailbox, &status);
+ if (err)
+ goto out;
+ if (status) {
+ mthca_err(dev, "READ_MGM returned status %02x\n", status);
+ err = -EINVAL;
+ goto out;
+ }
+
+ mgm->next_gid_index = cpu_to_be32(index << 6);
+
+ err = mthca_WRITE_MGM(dev, prev, mailbox, &status);
+ if (err)
+ goto out;
+ if (status) {
+ mthca_err(dev, "WRITE_MGM returned status %02x\n", status);
+ err = -EINVAL;
+ }
+
+ out:
+ if (err && link && index != -1) {
+ BUG_ON(index < dev->limits.num_mgms);
+ mthca_free(&dev->mcg_table.alloc, index);
+ }
+ mutex_unlock(&dev->mcg_table.mutex);
+
+ mthca_free_mailbox(dev, mailbox);
+ return err;
+}
+
+int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
+{
+ struct mthca_dev *dev = to_mdev(ibqp->device);
+ struct mthca_mailbox *mailbox;
+ struct mthca_mgm *mgm;
+ u16 hash;
+ int prev, index;
+ int i, loc;
+ int err;
+ u8 status;
+
+ mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ mgm = mailbox->buf;
+
+ mutex_lock(&dev->mcg_table.mutex);
+
+ err = find_mgm(dev, gid->raw, mailbox, &hash, &prev, &index);
+ if (err)
+ goto out;
+
+ if (index == -1) {
+ mthca_err(dev, "MGID %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x "
+ "not found\n",
+ be16_to_cpu(((__be16 *) gid->raw)[0]),
+ be16_to_cpu(((__be16 *) gid->raw)[1]),
+ be16_to_cpu(((__be16 *) gid->raw)[2]),
+ be16_to_cpu(((__be16 *) gid->raw)[3]),
+ be16_to_cpu(((__be16 *) gid->raw)[4]),
+ be16_to_cpu(((__be16 *) gid->raw)[5]),
+ be16_to_cpu(((__be16 *) gid->raw)[6]),
+ be16_to_cpu(((__be16 *) gid->raw)[7]));
+ err = -EINVAL;
+ goto out;
+ }
+
+ for (loc = -1, i = 0; i < MTHCA_QP_PER_MGM; ++i) {
+ if (mgm->qp[i] == cpu_to_be32(ibqp->qp_num | (1 << 31)))
+ loc = i;
+ if (!(mgm->qp[i] & cpu_to_be32(1 << 31)))
+ break;
+ }
+
+ if (loc == -1) {
+ mthca_err(dev, "QP %06x not found in MGM\n", ibqp->qp_num);
+ err = -EINVAL;
+ goto out;
+ }
+
+ mgm->qp[loc] = mgm->qp[i - 1];
+ mgm->qp[i - 1] = 0;
+
+ err = mthca_WRITE_MGM(dev, index, mailbox, &status);
+ if (err)
+ goto out;
+ if (status) {
+ mthca_err(dev, "WRITE_MGM returned status %02x\n", status);
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (i != 1)
+ goto out;
+
+ if (prev == -1) {
+ /* Remove entry from MGM */
+ int amgm_index_to_free = be32_to_cpu(mgm->next_gid_index) >> 6;
+ if (amgm_index_to_free) {
+ err = mthca_READ_MGM(dev, amgm_index_to_free,
+ mailbox, &status);
+ if (err)
+ goto out;
+ if (status) {
+ mthca_err(dev, "READ_MGM returned status %02x\n",
+ status);
+ err = -EINVAL;
+ goto out;
+ }
+ } else
+ memset(mgm->gid, 0, 16);
+
+ err = mthca_WRITE_MGM(dev, index, mailbox, &status);
+ if (err)
+ goto out;
+ if (status) {
+ mthca_err(dev, "WRITE_MGM returned status %02x\n", status);
+ err = -EINVAL;
+ goto out;
+ }
+ if (amgm_index_to_free) {
+ BUG_ON(amgm_index_to_free < dev->limits.num_mgms);
+ mthca_free(&dev->mcg_table.alloc, amgm_index_to_free);
+ }
+ } else {
+ /* Remove entry from AMGM */
+ int curr_next_index = be32_to_cpu(mgm->next_gid_index) >> 6;
+ err = mthca_READ_MGM(dev, prev, mailbox, &status);
+ if (err)
+ goto out;
+ if (status) {
+ mthca_err(dev, "READ_MGM returned status %02x\n", status);
+ err = -EINVAL;
+ goto out;
+ }
+
+ mgm->next_gid_index = cpu_to_be32(curr_next_index << 6);
+
+ err = mthca_WRITE_MGM(dev, prev, mailbox, &status);
+ if (err)
+ goto out;
+ if (status) {
+ mthca_err(dev, "WRITE_MGM returned status %02x\n", status);
+ err = -EINVAL;
+ goto out;
+ }
+ BUG_ON(index < dev->limits.num_mgms);
+ mthca_free(&dev->mcg_table.alloc, index);
+ }
+
+ out:
+ mutex_unlock(&dev->mcg_table.mutex);
+
+ mthca_free_mailbox(dev, mailbox);
+ return err;
+}
+
+int mthca_init_mcg_table(struct mthca_dev *dev)
+{
+ int err;
+ int table_size = dev->limits.num_mgms + dev->limits.num_amgms;
+
+ err = mthca_alloc_init(&dev->mcg_table.alloc,
+ table_size,
+ table_size - 1,
+ dev->limits.num_mgms);
+ if (err)
+ return err;
+
+ mutex_init(&dev->mcg_table.mutex);
+
+ return 0;
+}
+
+void mthca_cleanup_mcg_table(struct mthca_dev *dev)
+{
+ mthca_alloc_cleanup(&dev->mcg_table.alloc);
+}
diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c
new file mode 100644
index 0000000..1f7d1a2
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_memfree.c
@@ -0,0 +1,774 @@
+/*
+ * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Cisco Systems. All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/mm.h>
+#include <linux/scatterlist.h>
+#include <linux/sched.h>
+
+#include <asm/page.h>
+
+#include "mthca_memfree.h"
+#include "mthca_dev.h"
+#include "mthca_cmd.h"
+
+/*
+ * We allocate in as big chunks as we can, up to a maximum of 256 KB
+ * per chunk.
+ */
+enum {
+ MTHCA_ICM_ALLOC_SIZE = 1 << 18,
+ MTHCA_TABLE_CHUNK_SIZE = 1 << 18
+};
+
+struct mthca_user_db_table {
+ struct mutex mutex;
+ struct {
+ u64 uvirt;
+ struct scatterlist mem;
+ int refcount;
+ } page[0];
+};
+
+static void mthca_free_icm_pages(struct mthca_dev *dev, struct mthca_icm_chunk *chunk)
+{
+ int i;
+
+ if (chunk->nsg > 0)
+ pci_unmap_sg(dev->pdev, chunk->mem, chunk->npages,
+ PCI_DMA_BIDIRECTIONAL);
+
+ for (i = 0; i < chunk->npages; ++i)
+ __free_pages(sg_page(&chunk->mem[i]),
+ get_order(chunk->mem[i].length));
+}
+
+static void mthca_free_icm_coherent(struct mthca_dev *dev, struct mthca_icm_chunk *chunk)
+{
+ int i;
+
+ for (i = 0; i < chunk->npages; ++i) {
+ dma_free_coherent(&dev->pdev->dev, chunk->mem[i].length,
+ lowmem_page_address(sg_page(&chunk->mem[i])),
+ sg_dma_address(&chunk->mem[i]));
+ }
+}
+
+void mthca_free_icm(struct mthca_dev *dev, struct mthca_icm *icm, int coherent)
+{
+ struct mthca_icm_chunk *chunk, *tmp;
+
+ if (!icm)
+ return;
+
+ list_for_each_entry_safe(chunk, tmp, &icm->chunk_list, list) {
+ if (coherent)
+ mthca_free_icm_coherent(dev, chunk);
+ else
+ mthca_free_icm_pages(dev, chunk);
+
+ kfree(chunk);
+ }
+
+ kfree(icm);
+}
+
+static int mthca_alloc_icm_pages(struct scatterlist *mem, int order, gfp_t gfp_mask)
+{
+ struct page *page;
+
+ /*
+ * Use __GFP_ZERO because buggy firmware assumes ICM pages are
+ * cleared, and subtle failures are seen if they aren't.
+ */
+ page = alloc_pages(gfp_mask | __GFP_ZERO, order);
+ if (!page)
+ return -ENOMEM;
+
+ sg_set_page(mem, page, PAGE_SIZE << order, 0);
+ return 0;
+}
+
+static int mthca_alloc_icm_coherent(struct device *dev, struct scatterlist *mem,
+ int order, gfp_t gfp_mask)
+{
+ void *buf = dma_alloc_coherent(dev, PAGE_SIZE << order, &sg_dma_address(mem),
+ gfp_mask);
+ if (!buf)
+ return -ENOMEM;
+
+ sg_set_buf(mem, buf, PAGE_SIZE << order);
+ BUG_ON(mem->offset);
+ sg_dma_len(mem) = PAGE_SIZE << order;
+ return 0;
+}
+
+struct mthca_icm *mthca_alloc_icm(struct mthca_dev *dev, int npages,
+ gfp_t gfp_mask, int coherent)
+{
+ struct mthca_icm *icm;
+ struct mthca_icm_chunk *chunk = NULL;
+ int cur_order;
+ int ret;
+
+ /* We use sg_set_buf for coherent allocs, which assumes low memory */
+ BUG_ON(coherent && (gfp_mask & __GFP_HIGHMEM));
+
+ icm = kmalloc(sizeof *icm, gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN));
+ if (!icm)
+ return icm;
+
+ icm->refcount = 0;
+ INIT_LIST_HEAD(&icm->chunk_list);
+
+ cur_order = get_order(MTHCA_ICM_ALLOC_SIZE);
+
+ while (npages > 0) {
+ if (!chunk) {
+ chunk = kmalloc(sizeof *chunk,
+ gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN));
+ if (!chunk)
+ goto fail;
+
+ sg_init_table(chunk->mem, MTHCA_ICM_CHUNK_LEN);
+ chunk->npages = 0;
+ chunk->nsg = 0;
+ list_add_tail(&chunk->list, &icm->chunk_list);
+ }
+
+ while (1 << cur_order > npages)
+ --cur_order;
+
+ if (coherent)
+ ret = mthca_alloc_icm_coherent(&dev->pdev->dev,
+ &chunk->mem[chunk->npages],
+ cur_order, gfp_mask);
+ else
+ ret = mthca_alloc_icm_pages(&chunk->mem[chunk->npages],
+ cur_order, gfp_mask);
+
+ if (!ret) {
+ ++chunk->npages;
+
+ if (coherent)
+ ++chunk->nsg;
+ else if (chunk->npages == MTHCA_ICM_CHUNK_LEN) {
+ chunk->nsg = pci_map_sg(dev->pdev, chunk->mem,
+ chunk->npages,
+ PCI_DMA_BIDIRECTIONAL);
+
+ if (chunk->nsg <= 0)
+ goto fail;
+ }
+
+ if (chunk->npages == MTHCA_ICM_CHUNK_LEN)
+ chunk = NULL;
+
+ npages -= 1 << cur_order;
+ } else {
+ --cur_order;
+ if (cur_order < 0)
+ goto fail;
+ }
+ }
+
+ if (!coherent && chunk) {
+ chunk->nsg = pci_map_sg(dev->pdev, chunk->mem,
+ chunk->npages,
+ PCI_DMA_BIDIRECTIONAL);
+
+ if (chunk->nsg <= 0)
+ goto fail;
+ }
+
+ return icm;
+
+fail:
+ mthca_free_icm(dev, icm, coherent);
+ return NULL;
+}
+
+int mthca_table_get(struct mthca_dev *dev, struct mthca_icm_table *table, int obj)
+{
+ int i = (obj & (table->num_obj - 1)) * table->obj_size / MTHCA_TABLE_CHUNK_SIZE;
+ int ret = 0;
+ u8 status;
+
+ mutex_lock(&table->mutex);
+
+ if (table->icm[i]) {
+ ++table->icm[i]->refcount;
+ goto out;
+ }
+
+ table->icm[i] = mthca_alloc_icm(dev, MTHCA_TABLE_CHUNK_SIZE >> PAGE_SHIFT,
+ (table->lowmem ? GFP_KERNEL : GFP_HIGHUSER) |
+ __GFP_NOWARN, table->coherent);
+ if (!table->icm[i]) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ if (mthca_MAP_ICM(dev, table->icm[i], table->virt + i * MTHCA_TABLE_CHUNK_SIZE,
+ &status) || status) {
+ mthca_free_icm(dev, table->icm[i], table->coherent);
+ table->icm[i] = NULL;
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ++table->icm[i]->refcount;
+
+out:
+ mutex_unlock(&table->mutex);
+ return ret;
+}
+
+void mthca_table_put(struct mthca_dev *dev, struct mthca_icm_table *table, int obj)
+{
+ int i;
+ u8 status;
+
+ if (!mthca_is_memfree(dev))
+ return;
+
+ i = (obj & (table->num_obj - 1)) * table->obj_size / MTHCA_TABLE_CHUNK_SIZE;
+
+ mutex_lock(&table->mutex);
+
+ if (--table->icm[i]->refcount == 0) {
+ mthca_UNMAP_ICM(dev, table->virt + i * MTHCA_TABLE_CHUNK_SIZE,
+ MTHCA_TABLE_CHUNK_SIZE / MTHCA_ICM_PAGE_SIZE,
+ &status);
+ mthca_free_icm(dev, table->icm[i], table->coherent);
+ table->icm[i] = NULL;
+ }
+
+ mutex_unlock(&table->mutex);
+}
+
+void *mthca_table_find(struct mthca_icm_table *table, int obj, dma_addr_t *dma_handle)
+{
+ int idx, offset, dma_offset, i;
+ struct mthca_icm_chunk *chunk;
+ struct mthca_icm *icm;
+ struct page *page = NULL;
+
+ if (!table->lowmem)
+ return NULL;
+
+ mutex_lock(&table->mutex);
+
+ idx = (obj & (table->num_obj - 1)) * table->obj_size;
+ icm = table->icm[idx / MTHCA_TABLE_CHUNK_SIZE];
+ dma_offset = offset = idx % MTHCA_TABLE_CHUNK_SIZE;
+
+ if (!icm)
+ goto out;
+
+ list_for_each_entry(chunk, &icm->chunk_list, list) {
+ for (i = 0; i < chunk->npages; ++i) {
+ if (dma_handle && dma_offset >= 0) {
+ if (sg_dma_len(&chunk->mem[i]) > dma_offset)
+ *dma_handle = sg_dma_address(&chunk->mem[i]) +
+ dma_offset;
+ dma_offset -= sg_dma_len(&chunk->mem[i]);
+ }
+ /* DMA mapping can merge pages but not split them,
+ * so if we found the page, dma_handle has already
+ * been assigned to. */
+ if (chunk->mem[i].length > offset) {
+ page = sg_page(&chunk->mem[i]);
+ goto out;
+ }
+ offset -= chunk->mem[i].length;
+ }
+ }
+
+out:
+ mutex_unlock(&table->mutex);
+ return page ? lowmem_page_address(page) + offset : NULL;
+}
+
+int mthca_table_get_range(struct mthca_dev *dev, struct mthca_icm_table *table,
+ int start, int end)
+{
+ int inc = MTHCA_TABLE_CHUNK_SIZE / table->obj_size;
+ int i, err;
+
+ for (i = start; i <= end; i += inc) {
+ err = mthca_table_get(dev, table, i);
+ if (err)
+ goto fail;
+ }
+
+ return 0;
+
+fail:
+ while (i > start) {
+ i -= inc;
+ mthca_table_put(dev, table, i);
+ }
+
+ return err;
+}
+
+void mthca_table_put_range(struct mthca_dev *dev, struct mthca_icm_table *table,
+ int start, int end)
+{
+ int i;
+
+ if (!mthca_is_memfree(dev))
+ return;
+
+ for (i = start; i <= end; i += MTHCA_TABLE_CHUNK_SIZE / table->obj_size)
+ mthca_table_put(dev, table, i);
+}
+
+struct mthca_icm_table *mthca_alloc_icm_table(struct mthca_dev *dev,
+ u64 virt, int obj_size,
+ int nobj, int reserved,
+ int use_lowmem, int use_coherent)
+{
+ struct mthca_icm_table *table;
+ int obj_per_chunk;
+ int num_icm;
+ unsigned chunk_size;
+ int i;
+ u8 status;
+
+ obj_per_chunk = MTHCA_TABLE_CHUNK_SIZE / obj_size;
+ num_icm = DIV_ROUND_UP(nobj, obj_per_chunk);
+
+ table = kmalloc(sizeof *table + num_icm * sizeof *table->icm, GFP_KERNEL);
+ if (!table)
+ return NULL;
+
+ table->virt = virt;
+ table->num_icm = num_icm;
+ table->num_obj = nobj;
+ table->obj_size = obj_size;
+ table->lowmem = use_lowmem;
+ table->coherent = use_coherent;
+ mutex_init(&table->mutex);
+
+ for (i = 0; i < num_icm; ++i)
+ table->icm[i] = NULL;
+
+ for (i = 0; i * MTHCA_TABLE_CHUNK_SIZE < reserved * obj_size; ++i) {
+ chunk_size = MTHCA_TABLE_CHUNK_SIZE;
+ if ((i + 1) * MTHCA_TABLE_CHUNK_SIZE > nobj * obj_size)
+ chunk_size = nobj * obj_size - i * MTHCA_TABLE_CHUNK_SIZE;
+
+ table->icm[i] = mthca_alloc_icm(dev, chunk_size >> PAGE_SHIFT,
+ (use_lowmem ? GFP_KERNEL : GFP_HIGHUSER) |
+ __GFP_NOWARN, use_coherent);
+ if (!table->icm[i])
+ goto err;
+ if (mthca_MAP_ICM(dev, table->icm[i], virt + i * MTHCA_TABLE_CHUNK_SIZE,
+ &status) || status) {
+ mthca_free_icm(dev, table->icm[i], table->coherent);
+ table->icm[i] = NULL;
+ goto err;
+ }
+
+ /*
+ * Add a reference to this ICM chunk so that it never
+ * gets freed (since it contains reserved firmware objects).
+ */
+ ++table->icm[i]->refcount;
+ }
+
+ return table;
+
+err:
+ for (i = 0; i < num_icm; ++i)
+ if (table->icm[i]) {
+ mthca_UNMAP_ICM(dev, virt + i * MTHCA_TABLE_CHUNK_SIZE,
+ MTHCA_TABLE_CHUNK_SIZE / MTHCA_ICM_PAGE_SIZE,
+ &status);
+ mthca_free_icm(dev, table->icm[i], table->coherent);
+ }
+
+ kfree(table);
+
+ return NULL;
+}
+
+void mthca_free_icm_table(struct mthca_dev *dev, struct mthca_icm_table *table)
+{
+ int i;
+ u8 status;
+
+ for (i = 0; i < table->num_icm; ++i)
+ if (table->icm[i]) {
+ mthca_UNMAP_ICM(dev, table->virt + i * MTHCA_TABLE_CHUNK_SIZE,
+ MTHCA_TABLE_CHUNK_SIZE / MTHCA_ICM_PAGE_SIZE,
+ &status);
+ mthca_free_icm(dev, table->icm[i], table->coherent);
+ }
+
+ kfree(table);
+}
+
+static u64 mthca_uarc_virt(struct mthca_dev *dev, struct mthca_uar *uar, int page)
+{
+ return dev->uar_table.uarc_base +
+ uar->index * dev->uar_table.uarc_size +
+ page * MTHCA_ICM_PAGE_SIZE;
+}
+
+int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
+ struct mthca_user_db_table *db_tab, int index, u64 uaddr)
+{
+ struct page *pages[1];
+ int ret = 0;
+ u8 status;
+ int i;
+
+ if (!mthca_is_memfree(dev))
+ return 0;
+
+ if (index < 0 || index > dev->uar_table.uarc_size / 8)
+ return -EINVAL;
+
+ mutex_lock(&db_tab->mutex);
+
+ i = index / MTHCA_DB_REC_PER_PAGE;
+
+ if ((db_tab->page[i].refcount >= MTHCA_DB_REC_PER_PAGE) ||
+ (db_tab->page[i].uvirt && db_tab->page[i].uvirt != uaddr) ||
+ (uaddr & 4095)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (db_tab->page[i].refcount) {
+ ++db_tab->page[i].refcount;
+ goto out;
+ }
+
+ ret = get_user_pages(current, current->mm, uaddr & PAGE_MASK, 1, 1, 0,
+ pages, NULL);
+ if (ret < 0)
+ goto out;
+
+ sg_set_page(&db_tab->page[i].mem, pages[0], MTHCA_ICM_PAGE_SIZE,
+ uaddr & ~PAGE_MASK);
+
+ ret = pci_map_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE);
+ if (ret < 0) {
+ put_page(pages[0]);
+ goto out;
+ }
+
+ ret = mthca_MAP_ICM_page(dev, sg_dma_address(&db_tab->page[i].mem),
+ mthca_uarc_virt(dev, uar, i), &status);
+ if (!ret && status)
+ ret = -EINVAL;
+ if (ret) {
+ pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE);
+ put_page(sg_page(&db_tab->page[i].mem));
+ goto out;
+ }
+
+ db_tab->page[i].uvirt = uaddr;
+ db_tab->page[i].refcount = 1;
+
+out:
+ mutex_unlock(&db_tab->mutex);
+ return ret;
+}
+
+void mthca_unmap_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
+ struct mthca_user_db_table *db_tab, int index)
+{
+ if (!mthca_is_memfree(dev))
+ return;
+
+ /*
+ * To make our bookkeeping simpler, we don't unmap DB
+ * pages until we clean up the whole db table.
+ */
+
+ mutex_lock(&db_tab->mutex);
+
+ --db_tab->page[index / MTHCA_DB_REC_PER_PAGE].refcount;
+
+ mutex_unlock(&db_tab->mutex);
+}
+
+struct mthca_user_db_table *mthca_init_user_db_tab(struct mthca_dev *dev)
+{
+ struct mthca_user_db_table *db_tab;
+ int npages;
+ int i;
+
+ if (!mthca_is_memfree(dev))
+ return NULL;
+
+ npages = dev->uar_table.uarc_size / MTHCA_ICM_PAGE_SIZE;
+ db_tab = kmalloc(sizeof *db_tab + npages * sizeof *db_tab->page, GFP_KERNEL);
+ if (!db_tab)
+ return ERR_PTR(-ENOMEM);
+
+ mutex_init(&db_tab->mutex);
+ for (i = 0; i < npages; ++i) {
+ db_tab->page[i].refcount = 0;
+ db_tab->page[i].uvirt = 0;
+ sg_init_table(&db_tab->page[i].mem, 1);
+ }
+
+ return db_tab;
+}
+
+void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar,
+ struct mthca_user_db_table *db_tab)
+{
+ int i;
+ u8 status;
+
+ if (!mthca_is_memfree(dev))
+ return;
+
+ for (i = 0; i < dev->uar_table.uarc_size / MTHCA_ICM_PAGE_SIZE; ++i) {
+ if (db_tab->page[i].uvirt) {
+ mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, uar, i), 1, &status);
+ pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE);
+ put_page(sg_page(&db_tab->page[i].mem));
+ }
+ }
+
+ kfree(db_tab);
+}
+
+int mthca_alloc_db(struct mthca_dev *dev, enum mthca_db_type type,
+ u32 qn, __be32 **db)
+{
+ int group;
+ int start, end, dir;
+ int i, j;
+ struct mthca_db_page *page;
+ int ret = 0;
+ u8 status;
+
+ mutex_lock(&dev->db_tab->mutex);
+
+ switch (type) {
+ case MTHCA_DB_TYPE_CQ_ARM:
+ case MTHCA_DB_TYPE_SQ:
+ group = 0;
+ start = 0;
+ end = dev->db_tab->max_group1;
+ dir = 1;
+ break;
+
+ case MTHCA_DB_TYPE_CQ_SET_CI:
+ case MTHCA_DB_TYPE_RQ:
+ case MTHCA_DB_TYPE_SRQ:
+ group = 1;
+ start = dev->db_tab->npages - 1;
+ end = dev->db_tab->min_group2;
+ dir = -1;
+ break;
+
+ default:
+ ret = -EINVAL;
+ goto out;
+ }
+
+ for (i = start; i != end; i += dir)
+ if (dev->db_tab->page[i].db_rec &&
+ !bitmap_full(dev->db_tab->page[i].used,
+ MTHCA_DB_REC_PER_PAGE)) {
+ page = dev->db_tab->page + i;
+ goto found;
+ }
+
+ for (i = start; i != end; i += dir)
+ if (!dev->db_tab->page[i].db_rec) {
+ page = dev->db_tab->page + i;
+ goto alloc;
+ }
+
+ if (dev->db_tab->max_group1 >= dev->db_tab->min_group2 - 1) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ if (group == 0)
+ ++dev->db_tab->max_group1;
+ else
+ --dev->db_tab->min_group2;
+
+ page = dev->db_tab->page + end;
+
+alloc:
+ page->db_rec = dma_alloc_coherent(&dev->pdev->dev, MTHCA_ICM_PAGE_SIZE,
+ &page->mapping, GFP_KERNEL);
+ if (!page->db_rec) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ memset(page->db_rec, 0, MTHCA_ICM_PAGE_SIZE);
+
+ ret = mthca_MAP_ICM_page(dev, page->mapping,
+ mthca_uarc_virt(dev, &dev->driver_uar, i), &status);
+ if (!ret && status)
+ ret = -EINVAL;
+ if (ret) {
+ dma_free_coherent(&dev->pdev->dev, MTHCA_ICM_PAGE_SIZE,
+ page->db_rec, page->mapping);
+ goto out;
+ }
+
+ bitmap_zero(page->used, MTHCA_DB_REC_PER_PAGE);
+
+found:
+ j = find_first_zero_bit(page->used, MTHCA_DB_REC_PER_PAGE);
+ set_bit(j, page->used);
+
+ if (group == 1)
+ j = MTHCA_DB_REC_PER_PAGE - 1 - j;
+
+ ret = i * MTHCA_DB_REC_PER_PAGE + j;
+
+ page->db_rec[j] = cpu_to_be64((qn << 8) | (type << 5));
+
+ *db = (__be32 *) &page->db_rec[j];
+
+out:
+ mutex_unlock(&dev->db_tab->mutex);
+
+ return ret;
+}
+
+void mthca_free_db(struct mthca_dev *dev, int type, int db_index)
+{
+ int i, j;
+ struct mthca_db_page *page;
+ u8 status;
+
+ i = db_index / MTHCA_DB_REC_PER_PAGE;
+ j = db_index % MTHCA_DB_REC_PER_PAGE;
+
+ page = dev->db_tab->page + i;
+
+ mutex_lock(&dev->db_tab->mutex);
+
+ page->db_rec[j] = 0;
+ if (i >= dev->db_tab->min_group2)
+ j = MTHCA_DB_REC_PER_PAGE - 1 - j;
+ clear_bit(j, page->used);
+
+ if (bitmap_empty(page->used, MTHCA_DB_REC_PER_PAGE) &&
+ i >= dev->db_tab->max_group1 - 1) {
+ mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, &dev->driver_uar, i), 1, &status);
+
+ dma_free_coherent(&dev->pdev->dev, MTHCA_ICM_PAGE_SIZE,
+ page->db_rec, page->mapping);
+ page->db_rec = NULL;
+
+ if (i == dev->db_tab->max_group1) {
+ --dev->db_tab->max_group1;
+ /* XXX may be able to unmap more pages now */
+ }
+ if (i == dev->db_tab->min_group2)
+ ++dev->db_tab->min_group2;
+ }
+
+ mutex_unlock(&dev->db_tab->mutex);
+}
+
+int mthca_init_db_tab(struct mthca_dev *dev)
+{
+ int i;
+
+ if (!mthca_is_memfree(dev))
+ return 0;
+
+ dev->db_tab = kmalloc(sizeof *dev->db_tab, GFP_KERNEL);
+ if (!dev->db_tab)
+ return -ENOMEM;
+
+ mutex_init(&dev->db_tab->mutex);
+
+ dev->db_tab->npages = dev->uar_table.uarc_size / MTHCA_ICM_PAGE_SIZE;
+ dev->db_tab->max_group1 = 0;
+ dev->db_tab->min_group2 = dev->db_tab->npages - 1;
+
+ dev->db_tab->page = kmalloc(dev->db_tab->npages *
+ sizeof *dev->db_tab->page,
+ GFP_KERNEL);
+ if (!dev->db_tab->page) {
+ kfree(dev->db_tab);
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < dev->db_tab->npages; ++i)
+ dev->db_tab->page[i].db_rec = NULL;
+
+ return 0;
+}
+
+void mthca_cleanup_db_tab(struct mthca_dev *dev)
+{
+ int i;
+ u8 status;
+
+ if (!mthca_is_memfree(dev))
+ return;
+
+ /*
+ * Because we don't always free our UARC pages when they
+ * become empty to make mthca_free_db() simpler we need to
+ * make a sweep through the doorbell pages and free any
+ * leftover pages now.
+ */
+ for (i = 0; i < dev->db_tab->npages; ++i) {
+ if (!dev->db_tab->page[i].db_rec)
+ continue;
+
+ if (!bitmap_empty(dev->db_tab->page[i].used, MTHCA_DB_REC_PER_PAGE))
+ mthca_warn(dev, "Kernel UARC page %d not empty\n", i);
+
+ mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, &dev->driver_uar, i), 1, &status);
+
+ dma_free_coherent(&dev->pdev->dev, MTHCA_ICM_PAGE_SIZE,
+ dev->db_tab->page[i].db_rec,
+ dev->db_tab->page[i].mapping);
+ }
+
+ kfree(dev->db_tab->page);
+ kfree(dev->db_tab);
+}
diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.h b/drivers/infiniband/hw/mthca/mthca_memfree.h
new file mode 100644
index 0000000..da9b8f9
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_memfree.h
@@ -0,0 +1,179 @@
+/*
+ * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Cisco Systems. All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef MTHCA_MEMFREE_H
+#define MTHCA_MEMFREE_H
+
+#include <linux/list.h>
+#include <linux/mutex.h>
+
+#define MTHCA_ICM_CHUNK_LEN \
+ ((256 - sizeof (struct list_head) - 2 * sizeof (int)) / \
+ (sizeof (struct scatterlist)))
+
+enum {
+ MTHCA_ICM_PAGE_SHIFT = 12,
+ MTHCA_ICM_PAGE_SIZE = 1 << MTHCA_ICM_PAGE_SHIFT,
+ MTHCA_DB_REC_PER_PAGE = MTHCA_ICM_PAGE_SIZE / 8
+};
+
+struct mthca_icm_chunk {
+ struct list_head list;
+ int npages;
+ int nsg;
+ struct scatterlist mem[MTHCA_ICM_CHUNK_LEN];
+};
+
+struct mthca_icm {
+ struct list_head chunk_list;
+ int refcount;
+};
+
+struct mthca_icm_table {
+ u64 virt;
+ int num_icm;
+ int num_obj;
+ int obj_size;
+ int lowmem;
+ int coherent;
+ struct mutex mutex;
+ struct mthca_icm *icm[0];
+};
+
+struct mthca_icm_iter {
+ struct mthca_icm *icm;
+ struct mthca_icm_chunk *chunk;
+ int page_idx;
+};
+
+struct mthca_dev;
+
+struct mthca_icm *mthca_alloc_icm(struct mthca_dev *dev, int npages,
+ gfp_t gfp_mask, int coherent);
+void mthca_free_icm(struct mthca_dev *dev, struct mthca_icm *icm, int coherent);
+
+struct mthca_icm_table *mthca_alloc_icm_table(struct mthca_dev *dev,
+ u64 virt, int obj_size,
+ int nobj, int reserved,
+ int use_lowmem, int use_coherent);
+void mthca_free_icm_table(struct mthca_dev *dev, struct mthca_icm_table *table);
+int mthca_table_get(struct mthca_dev *dev, struct mthca_icm_table *table, int obj);
+void mthca_table_put(struct mthca_dev *dev, struct mthca_icm_table *table, int obj);
+void *mthca_table_find(struct mthca_icm_table *table, int obj, dma_addr_t *dma_handle);
+int mthca_table_get_range(struct mthca_dev *dev, struct mthca_icm_table *table,
+ int start, int end);
+void mthca_table_put_range(struct mthca_dev *dev, struct mthca_icm_table *table,
+ int start, int end);
+
+static inline void mthca_icm_first(struct mthca_icm *icm,
+ struct mthca_icm_iter *iter)
+{
+ iter->icm = icm;
+ iter->chunk = list_empty(&icm->chunk_list) ?
+ NULL : list_entry(icm->chunk_list.next,
+ struct mthca_icm_chunk, list);
+ iter->page_idx = 0;
+}
+
+static inline int mthca_icm_last(struct mthca_icm_iter *iter)
+{
+ return !iter->chunk;
+}
+
+static inline void mthca_icm_next(struct mthca_icm_iter *iter)
+{
+ if (++iter->page_idx >= iter->chunk->nsg) {
+ if (iter->chunk->list.next == &iter->icm->chunk_list) {
+ iter->chunk = NULL;
+ return;
+ }
+
+ iter->chunk = list_entry(iter->chunk->list.next,
+ struct mthca_icm_chunk, list);
+ iter->page_idx = 0;
+ }
+}
+
+static inline dma_addr_t mthca_icm_addr(struct mthca_icm_iter *iter)
+{
+ return sg_dma_address(&iter->chunk->mem[iter->page_idx]);
+}
+
+static inline unsigned long mthca_icm_size(struct mthca_icm_iter *iter)
+{
+ return sg_dma_len(&iter->chunk->mem[iter->page_idx]);
+}
+
+struct mthca_db_page {
+ DECLARE_BITMAP(used, MTHCA_DB_REC_PER_PAGE);
+ __be64 *db_rec;
+ dma_addr_t mapping;
+};
+
+struct mthca_db_table {
+ int npages;
+ int max_group1;
+ int min_group2;
+ struct mthca_db_page *page;
+ struct mutex mutex;
+};
+
+enum mthca_db_type {
+ MTHCA_DB_TYPE_INVALID = 0x0,
+ MTHCA_DB_TYPE_CQ_SET_CI = 0x1,
+ MTHCA_DB_TYPE_CQ_ARM = 0x2,
+ MTHCA_DB_TYPE_SQ = 0x3,
+ MTHCA_DB_TYPE_RQ = 0x4,
+ MTHCA_DB_TYPE_SRQ = 0x5,
+ MTHCA_DB_TYPE_GROUP_SEP = 0x7
+};
+
+struct mthca_user_db_table;
+struct mthca_uar;
+
+int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
+ struct mthca_user_db_table *db_tab, int index, u64 uaddr);
+void mthca_unmap_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
+ struct mthca_user_db_table *db_tab, int index);
+struct mthca_user_db_table *mthca_init_user_db_tab(struct mthca_dev *dev);
+void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar,
+ struct mthca_user_db_table *db_tab);
+
+int mthca_init_db_tab(struct mthca_dev *dev);
+void mthca_cleanup_db_tab(struct mthca_dev *dev);
+int mthca_alloc_db(struct mthca_dev *dev, enum mthca_db_type type,
+ u32 qn, __be32 **db);
+void mthca_free_db(struct mthca_dev *dev, int type, int db_index);
+
+#endif /* MTHCA_MEMFREE_H */
diff --git a/drivers/infiniband/hw/mthca/mthca_mr.c b/drivers/infiniband/hw/mthca/mthca_mr.c
new file mode 100644
index 0000000..882e6b7
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_mr.c
@@ -0,0 +1,985 @@
+/*
+ * Copyright (c) 2004 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/slab.h>
+#include <linux/errno.h>
+
+#include "mthca_dev.h"
+#include "mthca_cmd.h"
+#include "mthca_memfree.h"
+
+struct mthca_mtt {
+ struct mthca_buddy *buddy;
+ int order;
+ u32 first_seg;
+};
+
+/*
+ * Must be packed because mtt_seg is 64 bits but only aligned to 32 bits.
+ */
+struct mthca_mpt_entry {
+ __be32 flags;
+ __be32 page_size;
+ __be32 key;
+ __be32 pd;
+ __be64 start;
+ __be64 length;
+ __be32 lkey;
+ __be32 window_count;
+ __be32 window_count_limit;
+ __be64 mtt_seg;
+ __be32 mtt_sz; /* Arbel only */
+ u32 reserved[2];
+} __attribute__((packed));
+
+#define MTHCA_MPT_FLAG_SW_OWNS (0xfUL << 28)
+#define MTHCA_MPT_FLAG_MIO (1 << 17)
+#define MTHCA_MPT_FLAG_BIND_ENABLE (1 << 15)
+#define MTHCA_MPT_FLAG_PHYSICAL (1 << 9)
+#define MTHCA_MPT_FLAG_REGION (1 << 8)
+
+#define MTHCA_MTT_FLAG_PRESENT 1
+
+#define MTHCA_MPT_STATUS_SW 0xF0
+#define MTHCA_MPT_STATUS_HW 0x00
+
+#define SINAI_FMR_KEY_INC 0x1000000
+
+/*
+ * Buddy allocator for MTT segments (currently not very efficient
+ * since it doesn't keep a free list and just searches linearly
+ * through the bitmaps)
+ */
+
+static u32 mthca_buddy_alloc(struct mthca_buddy *buddy, int order)
+{
+ int o;
+ int m;
+ u32 seg;
+
+ spin_lock(&buddy->lock);
+
+ for (o = order; o <= buddy->max_order; ++o)
+ if (buddy->num_free[o]) {
+ m = 1 << (buddy->max_order - o);
+ seg = find_first_bit(buddy->bits[o], m);
+ if (seg < m)
+ goto found;
+ }
+
+ spin_unlock(&buddy->lock);
+ return -1;
+
+ found:
+ clear_bit(seg, buddy->bits[o]);
+ --buddy->num_free[o];
+
+ while (o > order) {
+ --o;
+ seg <<= 1;
+ set_bit(seg ^ 1, buddy->bits[o]);
+ ++buddy->num_free[o];
+ }
+
+ spin_unlock(&buddy->lock);
+
+ seg <<= order;
+
+ return seg;
+}
+
+static void mthca_buddy_free(struct mthca_buddy *buddy, u32 seg, int order)
+{
+ seg >>= order;
+
+ spin_lock(&buddy->lock);
+
+ while (test_bit(seg ^ 1, buddy->bits[order])) {
+ clear_bit(seg ^ 1, buddy->bits[order]);
+ --buddy->num_free[order];
+ seg >>= 1;
+ ++order;
+ }
+
+ set_bit(seg, buddy->bits[order]);
+ ++buddy->num_free[order];
+
+ spin_unlock(&buddy->lock);
+}
+
+static int mthca_buddy_init(struct mthca_buddy *buddy, int max_order)
+{
+ int i, s;
+
+ buddy->max_order = max_order;
+ spin_lock_init(&buddy->lock);
+
+ buddy->bits = kzalloc((buddy->max_order + 1) * sizeof (long *),
+ GFP_KERNEL);
+ buddy->num_free = kzalloc((buddy->max_order + 1) * sizeof (int *),
+ GFP_KERNEL);
+ if (!buddy->bits || !buddy->num_free)
+ goto err_out;
+
+ for (i = 0; i <= buddy->max_order; ++i) {
+ s = BITS_TO_LONGS(1 << (buddy->max_order - i));
+ buddy->bits[i] = kmalloc(s * sizeof (long), GFP_KERNEL);
+ if (!buddy->bits[i])
+ goto err_out_free;
+ bitmap_zero(buddy->bits[i],
+ 1 << (buddy->max_order - i));
+ }
+
+ set_bit(0, buddy->bits[buddy->max_order]);
+ buddy->num_free[buddy->max_order] = 1;
+
+ return 0;
+
+err_out_free:
+ for (i = 0; i <= buddy->max_order; ++i)
+ kfree(buddy->bits[i]);
+
+err_out:
+ kfree(buddy->bits);
+ kfree(buddy->num_free);
+
+ return -ENOMEM;
+}
+
+static void mthca_buddy_cleanup(struct mthca_buddy *buddy)
+{
+ int i;
+
+ for (i = 0; i <= buddy->max_order; ++i)
+ kfree(buddy->bits[i]);
+
+ kfree(buddy->bits);
+ kfree(buddy->num_free);
+}
+
+static u32 mthca_alloc_mtt_range(struct mthca_dev *dev, int order,
+ struct mthca_buddy *buddy)
+{
+ u32 seg = mthca_buddy_alloc(buddy, order);
+
+ if (seg == -1)
+ return -1;
+
+ if (mthca_is_memfree(dev))
+ if (mthca_table_get_range(dev, dev->mr_table.mtt_table, seg,
+ seg + (1 << order) - 1)) {
+ mthca_buddy_free(buddy, seg, order);
+ seg = -1;
+ }
+
+ return seg;
+}
+
+static struct mthca_mtt *__mthca_alloc_mtt(struct mthca_dev *dev, int size,
+ struct mthca_buddy *buddy)
+{
+ struct mthca_mtt *mtt;
+ int i;
+
+ if (size <= 0)
+ return ERR_PTR(-EINVAL);
+
+ mtt = kmalloc(sizeof *mtt, GFP_KERNEL);
+ if (!mtt)
+ return ERR_PTR(-ENOMEM);
+
+ mtt->buddy = buddy;
+ mtt->order = 0;
+ for (i = MTHCA_MTT_SEG_SIZE / 8; i < size; i <<= 1)
+ ++mtt->order;
+
+ mtt->first_seg = mthca_alloc_mtt_range(dev, mtt->order, buddy);
+ if (mtt->first_seg == -1) {
+ kfree(mtt);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ return mtt;
+}
+
+struct mthca_mtt *mthca_alloc_mtt(struct mthca_dev *dev, int size)
+{
+ return __mthca_alloc_mtt(dev, size, &dev->mr_table.mtt_buddy);
+}
+
+void mthca_free_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt)
+{
+ if (!mtt)
+ return;
+
+ mthca_buddy_free(mtt->buddy, mtt->first_seg, mtt->order);
+
+ mthca_table_put_range(dev, dev->mr_table.mtt_table,
+ mtt->first_seg,
+ mtt->first_seg + (1 << mtt->order) - 1);
+
+ kfree(mtt);
+}
+
+static int __mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt,
+ int start_index, u64 *buffer_list, int list_len)
+{
+ struct mthca_mailbox *mailbox;
+ __be64 *mtt_entry;
+ int err = 0;
+ u8 status;
+ int i;
+
+ mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ mtt_entry = mailbox->buf;
+
+ while (list_len > 0) {
+ mtt_entry[0] = cpu_to_be64(dev->mr_table.mtt_base +
+ mtt->first_seg * MTHCA_MTT_SEG_SIZE +
+ start_index * 8);
+ mtt_entry[1] = 0;
+ for (i = 0; i < list_len && i < MTHCA_MAILBOX_SIZE / 8 - 2; ++i)
+ mtt_entry[i + 2] = cpu_to_be64(buffer_list[i] |
+ MTHCA_MTT_FLAG_PRESENT);
+
+ /*
+ * If we have an odd number of entries to write, add
+ * one more dummy entry for firmware efficiency.
+ */
+ if (i & 1)
+ mtt_entry[i + 2] = 0;
+
+ err = mthca_WRITE_MTT(dev, mailbox, (i + 1) & ~1, &status);
+ if (err) {
+ mthca_warn(dev, "WRITE_MTT failed (%d)\n", err);
+ goto out;
+ }
+ if (status) {
+ mthca_warn(dev, "WRITE_MTT returned status 0x%02x\n",
+ status);
+ err = -EINVAL;
+ goto out;
+ }
+
+ list_len -= i;
+ start_index += i;
+ buffer_list += i;
+ }
+
+out:
+ mthca_free_mailbox(dev, mailbox);
+ return err;
+}
+
+int mthca_write_mtt_size(struct mthca_dev *dev)
+{
+ if (dev->mr_table.fmr_mtt_buddy != &dev->mr_table.mtt_buddy ||
+ !(dev->mthca_flags & MTHCA_FLAG_FMR))
+ /*
+ * Be friendly to WRITE_MTT command
+ * and leave two empty slots for the
+ * index and reserved fields of the
+ * mailbox.
+ */
+ return PAGE_SIZE / sizeof (u64) - 2;
+
+ /* For Arbel, all MTTs must fit in the same page. */
+ return mthca_is_memfree(dev) ? (PAGE_SIZE / sizeof (u64)) : 0x7ffffff;
+}
+
+static void mthca_tavor_write_mtt_seg(struct mthca_dev *dev,
+ struct mthca_mtt *mtt, int start_index,
+ u64 *buffer_list, int list_len)
+{
+ u64 __iomem *mtts;
+ int i;
+
+ mtts = dev->mr_table.tavor_fmr.mtt_base + mtt->first_seg * MTHCA_MTT_SEG_SIZE +
+ start_index * sizeof (u64);
+ for (i = 0; i < list_len; ++i)
+ mthca_write64_raw(cpu_to_be64(buffer_list[i] | MTHCA_MTT_FLAG_PRESENT),
+ mtts + i);
+}
+
+static void mthca_arbel_write_mtt_seg(struct mthca_dev *dev,
+ struct mthca_mtt *mtt, int start_index,
+ u64 *buffer_list, int list_len)
+{
+ __be64 *mtts;
+ dma_addr_t dma_handle;
+ int i;
+ int s = start_index * sizeof (u64);
+
+ /* For Arbel, all MTTs must fit in the same page. */
+ BUG_ON(s / PAGE_SIZE != (s + list_len * sizeof(u64) - 1) / PAGE_SIZE);
+ /* Require full segments */
+ BUG_ON(s % MTHCA_MTT_SEG_SIZE);
+
+ mtts = mthca_table_find(dev->mr_table.mtt_table, mtt->first_seg +
+ s / MTHCA_MTT_SEG_SIZE, &dma_handle);
+
+ BUG_ON(!mtts);
+
+ for (i = 0; i < list_len; ++i)
+ mtts[i] = cpu_to_be64(buffer_list[i] | MTHCA_MTT_FLAG_PRESENT);
+
+ dma_sync_single(&dev->pdev->dev, dma_handle, list_len * sizeof (u64), DMA_TO_DEVICE);
+}
+
+int mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt,
+ int start_index, u64 *buffer_list, int list_len)
+{
+ int size = mthca_write_mtt_size(dev);
+ int chunk;
+
+ if (dev->mr_table.fmr_mtt_buddy != &dev->mr_table.mtt_buddy ||
+ !(dev->mthca_flags & MTHCA_FLAG_FMR))
+ return __mthca_write_mtt(dev, mtt, start_index, buffer_list, list_len);
+
+ while (list_len > 0) {
+ chunk = min(size, list_len);
+ if (mthca_is_memfree(dev))
+ mthca_arbel_write_mtt_seg(dev, mtt, start_index,
+ buffer_list, chunk);
+ else
+ mthca_tavor_write_mtt_seg(dev, mtt, start_index,
+ buffer_list, chunk);
+
+ list_len -= chunk;
+ start_index += chunk;
+ buffer_list += chunk;
+ }
+
+ return 0;
+}
+
+static inline u32 tavor_hw_index_to_key(u32 ind)
+{
+ return ind;
+}
+
+static inline u32 tavor_key_to_hw_index(u32 key)
+{
+ return key;
+}
+
+static inline u32 arbel_hw_index_to_key(u32 ind)
+{
+ return (ind >> 24) | (ind << 8);
+}
+
+static inline u32 arbel_key_to_hw_index(u32 key)
+{
+ return (key << 24) | (key >> 8);
+}
+
+static inline u32 hw_index_to_key(struct mthca_dev *dev, u32 ind)
+{
+ if (mthca_is_memfree(dev))
+ return arbel_hw_index_to_key(ind);
+ else
+ return tavor_hw_index_to_key(ind);
+}
+
+static inline u32 key_to_hw_index(struct mthca_dev *dev, u32 key)
+{
+ if (mthca_is_memfree(dev))
+ return arbel_key_to_hw_index(key);
+ else
+ return tavor_key_to_hw_index(key);
+}
+
+static inline u32 adjust_key(struct mthca_dev *dev, u32 key)
+{
+ if (dev->mthca_flags & MTHCA_FLAG_SINAI_OPT)
+ return ((key << 20) & 0x800000) | (key & 0x7fffff);
+ else
+ return key;
+}
+
+int mthca_mr_alloc(struct mthca_dev *dev, u32 pd, int buffer_size_shift,
+ u64 iova, u64 total_size, u32 access, struct mthca_mr *mr)
+{
+ struct mthca_mailbox *mailbox;
+ struct mthca_mpt_entry *mpt_entry;
+ u32 key;
+ int i;
+ int err;
+ u8 status;
+
+ WARN_ON(buffer_size_shift >= 32);
+
+ key = mthca_alloc(&dev->mr_table.mpt_alloc);
+ if (key == -1)
+ return -ENOMEM;
+ key = adjust_key(dev, key);
+ mr->ibmr.rkey = mr->ibmr.lkey = hw_index_to_key(dev, key);
+
+ if (mthca_is_memfree(dev)) {
+ err = mthca_table_get(dev, dev->mr_table.mpt_table, key);
+ if (err)
+ goto err_out_mpt_free;
+ }
+
+ mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(mailbox)) {
+ err = PTR_ERR(mailbox);
+ goto err_out_table;
+ }
+ mpt_entry = mailbox->buf;
+
+ mpt_entry->flags = cpu_to_be32(MTHCA_MPT_FLAG_SW_OWNS |
+ MTHCA_MPT_FLAG_MIO |
+ MTHCA_MPT_FLAG_REGION |
+ access);
+ if (!mr->mtt)
+ mpt_entry->flags |= cpu_to_be32(MTHCA_MPT_FLAG_PHYSICAL);
+
+ mpt_entry->page_size = cpu_to_be32(buffer_size_shift - 12);
+ mpt_entry->key = cpu_to_be32(key);
+ mpt_entry->pd = cpu_to_be32(pd);
+ mpt_entry->start = cpu_to_be64(iova);
+ mpt_entry->length = cpu_to_be64(total_size);
+
+ memset(&mpt_entry->lkey, 0,
+ sizeof *mpt_entry - offsetof(struct mthca_mpt_entry, lkey));
+
+ if (mr->mtt)
+ mpt_entry->mtt_seg =
+ cpu_to_be64(dev->mr_table.mtt_base +
+ mr->mtt->first_seg * MTHCA_MTT_SEG_SIZE);
+
+ if (0) {
+ mthca_dbg(dev, "Dumping MPT entry %08x:\n", mr->ibmr.lkey);
+ for (i = 0; i < sizeof (struct mthca_mpt_entry) / 4; ++i) {
+ if (i % 4 == 0)
+ printk("[%02x] ", i * 4);
+ printk(" %08x", be32_to_cpu(((__be32 *) mpt_entry)[i]));
+ if ((i + 1) % 4 == 0)
+ printk("\n");
+ }
+ }
+
+ err = mthca_SW2HW_MPT(dev, mailbox,
+ key & (dev->limits.num_mpts - 1),
+ &status);
+ if (err) {
+ mthca_warn(dev, "SW2HW_MPT failed (%d)\n", err);
+ goto err_out_mailbox;
+ } else if (status) {
+ mthca_warn(dev, "SW2HW_MPT returned status 0x%02x\n",
+ status);
+ err = -EINVAL;
+ goto err_out_mailbox;
+ }
+
+ mthca_free_mailbox(dev, mailbox);
+ return err;
+
+err_out_mailbox:
+ mthca_free_mailbox(dev, mailbox);
+
+err_out_table:
+ mthca_table_put(dev, dev->mr_table.mpt_table, key);
+
+err_out_mpt_free:
+ mthca_free(&dev->mr_table.mpt_alloc, key);
+ return err;
+}
+
+int mthca_mr_alloc_notrans(struct mthca_dev *dev, u32 pd,
+ u32 access, struct mthca_mr *mr)
+{
+ mr->mtt = NULL;
+ return mthca_mr_alloc(dev, pd, 12, 0, ~0ULL, access, mr);
+}
+
+int mthca_mr_alloc_phys(struct mthca_dev *dev, u32 pd,
+ u64 *buffer_list, int buffer_size_shift,
+ int list_len, u64 iova, u64 total_size,
+ u32 access, struct mthca_mr *mr)
+{
+ int err;
+
+ mr->mtt = mthca_alloc_mtt(dev, list_len);
+ if (IS_ERR(mr->mtt))
+ return PTR_ERR(mr->mtt);
+
+ err = mthca_write_mtt(dev, mr->mtt, 0, buffer_list, list_len);
+ if (err) {
+ mthca_free_mtt(dev, mr->mtt);
+ return err;
+ }
+
+ err = mthca_mr_alloc(dev, pd, buffer_size_shift, iova,
+ total_size, access, mr);
+ if (err)
+ mthca_free_mtt(dev, mr->mtt);
+
+ return err;
+}
+
+/* Free mr or fmr */
+static void mthca_free_region(struct mthca_dev *dev, u32 lkey)
+{
+ mthca_table_put(dev, dev->mr_table.mpt_table,
+ key_to_hw_index(dev, lkey));
+
+ mthca_free(&dev->mr_table.mpt_alloc, key_to_hw_index(dev, lkey));
+}
+
+void mthca_free_mr(struct mthca_dev *dev, struct mthca_mr *mr)
+{
+ int err;
+ u8 status;
+
+ err = mthca_HW2SW_MPT(dev, NULL,
+ key_to_hw_index(dev, mr->ibmr.lkey) &
+ (dev->limits.num_mpts - 1),
+ &status);
+ if (err)
+ mthca_warn(dev, "HW2SW_MPT failed (%d)\n", err);
+ else if (status)
+ mthca_warn(dev, "HW2SW_MPT returned status 0x%02x\n",
+ status);
+
+ mthca_free_region(dev, mr->ibmr.lkey);
+ mthca_free_mtt(dev, mr->mtt);
+}
+
+int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd,
+ u32 access, struct mthca_fmr *mr)
+{
+ struct mthca_mpt_entry *mpt_entry;
+ struct mthca_mailbox *mailbox;
+ u64 mtt_seg;
+ u32 key, idx;
+ u8 status;
+ int list_len = mr->attr.max_pages;
+ int err = -ENOMEM;
+ int i;
+
+ if (mr->attr.page_shift < 12 || mr->attr.page_shift >= 32)
+ return -EINVAL;
+
+ /* For Arbel, all MTTs must fit in the same page. */
+ if (mthca_is_memfree(dev) &&
+ mr->attr.max_pages * sizeof *mr->mem.arbel.mtts > PAGE_SIZE)
+ return -EINVAL;
+
+ mr->maps = 0;
+
+ key = mthca_alloc(&dev->mr_table.mpt_alloc);
+ if (key == -1)
+ return -ENOMEM;
+ key = adjust_key(dev, key);
+
+ idx = key & (dev->limits.num_mpts - 1);
+ mr->ibmr.rkey = mr->ibmr.lkey = hw_index_to_key(dev, key);
+
+ if (mthca_is_memfree(dev)) {
+ err = mthca_table_get(dev, dev->mr_table.mpt_table, key);
+ if (err)
+ goto err_out_mpt_free;
+
+ mr->mem.arbel.mpt = mthca_table_find(dev->mr_table.mpt_table, key, NULL);
+ BUG_ON(!mr->mem.arbel.mpt);
+ } else
+ mr->mem.tavor.mpt = dev->mr_table.tavor_fmr.mpt_base +
+ sizeof *(mr->mem.tavor.mpt) * idx;
+
+ mr->mtt = __mthca_alloc_mtt(dev, list_len, dev->mr_table.fmr_mtt_buddy);
+ if (IS_ERR(mr->mtt)) {
+ err = PTR_ERR(mr->mtt);
+ goto err_out_table;
+ }
+
+ mtt_seg = mr->mtt->first_seg * MTHCA_MTT_SEG_SIZE;
+
+ if (mthca_is_memfree(dev)) {
+ mr->mem.arbel.mtts = mthca_table_find(dev->mr_table.mtt_table,
+ mr->mtt->first_seg,
+ &mr->mem.arbel.dma_handle);
+ BUG_ON(!mr->mem.arbel.mtts);
+ } else
+ mr->mem.tavor.mtts = dev->mr_table.tavor_fmr.mtt_base + mtt_seg;
+
+ mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(mailbox)) {
+ err = PTR_ERR(mailbox);
+ goto err_out_free_mtt;
+ }
+
+ mpt_entry = mailbox->buf;
+
+ mpt_entry->flags = cpu_to_be32(MTHCA_MPT_FLAG_SW_OWNS |
+ MTHCA_MPT_FLAG_MIO |
+ MTHCA_MPT_FLAG_REGION |
+ access);
+
+ mpt_entry->page_size = cpu_to_be32(mr->attr.page_shift - 12);
+ mpt_entry->key = cpu_to_be32(key);
+ mpt_entry->pd = cpu_to_be32(pd);
+ memset(&mpt_entry->start, 0,
+ sizeof *mpt_entry - offsetof(struct mthca_mpt_entry, start));
+ mpt_entry->mtt_seg = cpu_to_be64(dev->mr_table.mtt_base + mtt_seg);
+
+ if (0) {
+ mthca_dbg(dev, "Dumping MPT entry %08x:\n", mr->ibmr.lkey);
+ for (i = 0; i < sizeof (struct mthca_mpt_entry) / 4; ++i) {
+ if (i % 4 == 0)
+ printk("[%02x] ", i * 4);
+ printk(" %08x", be32_to_cpu(((__be32 *) mpt_entry)[i]));
+ if ((i + 1) % 4 == 0)
+ printk("\n");
+ }
+ }
+
+ err = mthca_SW2HW_MPT(dev, mailbox,
+ key & (dev->limits.num_mpts - 1),
+ &status);
+ if (err) {
+ mthca_warn(dev, "SW2HW_MPT failed (%d)\n", err);
+ goto err_out_mailbox_free;
+ }
+ if (status) {
+ mthca_warn(dev, "SW2HW_MPT returned status 0x%02x\n",
+ status);
+ err = -EINVAL;
+ goto err_out_mailbox_free;
+ }
+
+ mthca_free_mailbox(dev, mailbox);
+ return 0;
+
+err_out_mailbox_free:
+ mthca_free_mailbox(dev, mailbox);
+
+err_out_free_mtt:
+ mthca_free_mtt(dev, mr->mtt);
+
+err_out_table:
+ mthca_table_put(dev, dev->mr_table.mpt_table, key);
+
+err_out_mpt_free:
+ mthca_free(&dev->mr_table.mpt_alloc, key);
+ return err;
+}
+
+int mthca_free_fmr(struct mthca_dev *dev, struct mthca_fmr *fmr)
+{
+ if (fmr->maps)
+ return -EBUSY;
+
+ mthca_free_region(dev, fmr->ibmr.lkey);
+ mthca_free_mtt(dev, fmr->mtt);
+
+ return 0;
+}
+
+static inline int mthca_check_fmr(struct mthca_fmr *fmr, u64 *page_list,
+ int list_len, u64 iova)
+{
+ int i, page_mask;
+
+ if (list_len > fmr->attr.max_pages)
+ return -EINVAL;
+
+ page_mask = (1 << fmr->attr.page_shift) - 1;
+
+ /* We are getting page lists, so va must be page aligned. */
+ if (iova & page_mask)
+ return -EINVAL;
+
+ /* Trust the user not to pass misaligned data in page_list */
+ if (0)
+ for (i = 0; i < list_len; ++i) {
+ if (page_list[i] & ~page_mask)
+ return -EINVAL;
+ }
+
+ if (fmr->maps >= fmr->attr.max_maps)
+ return -EINVAL;
+
+ return 0;
+}
+
+
+int mthca_tavor_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
+ int list_len, u64 iova)
+{
+ struct mthca_fmr *fmr = to_mfmr(ibfmr);
+ struct mthca_dev *dev = to_mdev(ibfmr->device);
+ struct mthca_mpt_entry mpt_entry;
+ u32 key;
+ int i, err;
+
+ err = mthca_check_fmr(fmr, page_list, list_len, iova);
+ if (err)
+ return err;
+
+ ++fmr->maps;
+
+ key = tavor_key_to_hw_index(fmr->ibmr.lkey);
+ key += dev->limits.num_mpts;
+ fmr->ibmr.lkey = fmr->ibmr.rkey = tavor_hw_index_to_key(key);
+
+ writeb(MTHCA_MPT_STATUS_SW, fmr->mem.tavor.mpt);
+
+ for (i = 0; i < list_len; ++i) {
+ __be64 mtt_entry = cpu_to_be64(page_list[i] |
+ MTHCA_MTT_FLAG_PRESENT);
+ mthca_write64_raw(mtt_entry, fmr->mem.tavor.mtts + i);
+ }
+
+ mpt_entry.lkey = cpu_to_be32(key);
+ mpt_entry.length = cpu_to_be64(list_len * (1ull << fmr->attr.page_shift));
+ mpt_entry.start = cpu_to_be64(iova);
+
+ __raw_writel((__force u32) mpt_entry.lkey, &fmr->mem.tavor.mpt->key);
+ memcpy_toio(&fmr->mem.tavor.mpt->start, &mpt_entry.start,
+ offsetof(struct mthca_mpt_entry, window_count) -
+ offsetof(struct mthca_mpt_entry, start));
+
+ writeb(MTHCA_MPT_STATUS_HW, fmr->mem.tavor.mpt);
+
+ return 0;
+}
+
+int mthca_arbel_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
+ int list_len, u64 iova)
+{
+ struct mthca_fmr *fmr = to_mfmr(ibfmr);
+ struct mthca_dev *dev = to_mdev(ibfmr->device);
+ u32 key;
+ int i, err;
+
+ err = mthca_check_fmr(fmr, page_list, list_len, iova);
+ if (err)
+ return err;
+
+ ++fmr->maps;
+
+ key = arbel_key_to_hw_index(fmr->ibmr.lkey);
+ if (dev->mthca_flags & MTHCA_FLAG_SINAI_OPT)
+ key += SINAI_FMR_KEY_INC;
+ else
+ key += dev->limits.num_mpts;
+ fmr->ibmr.lkey = fmr->ibmr.rkey = arbel_hw_index_to_key(key);
+
+ *(u8 *) fmr->mem.arbel.mpt = MTHCA_MPT_STATUS_SW;
+
+ wmb();
+
+ for (i = 0; i < list_len; ++i)
+ fmr->mem.arbel.mtts[i] = cpu_to_be64(page_list[i] |
+ MTHCA_MTT_FLAG_PRESENT);
+
+ dma_sync_single(&dev->pdev->dev, fmr->mem.arbel.dma_handle,
+ list_len * sizeof(u64), DMA_TO_DEVICE);
+
+ fmr->mem.arbel.mpt->key = cpu_to_be32(key);
+ fmr->mem.arbel.mpt->lkey = cpu_to_be32(key);
+ fmr->mem.arbel.mpt->length = cpu_to_be64(list_len * (1ull << fmr->attr.page_shift));
+ fmr->mem.arbel.mpt->start = cpu_to_be64(iova);
+
+ wmb();
+
+ *(u8 *) fmr->mem.arbel.mpt = MTHCA_MPT_STATUS_HW;
+
+ wmb();
+
+ return 0;
+}
+
+void mthca_tavor_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr)
+{
+ if (!fmr->maps)
+ return;
+
+ fmr->maps = 0;
+
+ writeb(MTHCA_MPT_STATUS_SW, fmr->mem.tavor.mpt);
+}
+
+void mthca_arbel_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr)
+{
+ if (!fmr->maps)
+ return;
+
+ fmr->maps = 0;
+
+ *(u8 *) fmr->mem.arbel.mpt = MTHCA_MPT_STATUS_SW;
+}
+
+int mthca_init_mr_table(struct mthca_dev *dev)
+{
+ unsigned long addr;
+ int mpts, mtts, err, i;
+
+ err = mthca_alloc_init(&dev->mr_table.mpt_alloc,
+ dev->limits.num_mpts,
+ ~0, dev->limits.reserved_mrws);
+ if (err)
+ return err;
+
+ if (!mthca_is_memfree(dev) &&
+ (dev->mthca_flags & MTHCA_FLAG_DDR_HIDDEN))
+ dev->limits.fmr_reserved_mtts = 0;
+ else
+ dev->mthca_flags |= MTHCA_FLAG_FMR;
+
+ if (dev->mthca_flags & MTHCA_FLAG_SINAI_OPT)
+ mthca_dbg(dev, "Memory key throughput optimization activated.\n");
+
+ err = mthca_buddy_init(&dev->mr_table.mtt_buddy,
+ fls(dev->limits.num_mtt_segs - 1));
+
+ if (err)
+ goto err_mtt_buddy;
+
+ dev->mr_table.tavor_fmr.mpt_base = NULL;
+ dev->mr_table.tavor_fmr.mtt_base = NULL;
+
+ if (dev->limits.fmr_reserved_mtts) {
+ i = fls(dev->limits.fmr_reserved_mtts - 1);
+
+ if (i >= 31) {
+ mthca_warn(dev, "Unable to reserve 2^31 FMR MTTs.\n");
+ err = -EINVAL;
+ goto err_fmr_mpt;
+ }
+ mpts = mtts = 1 << i;
+ } else {
+ mtts = dev->limits.num_mtt_segs;
+ mpts = dev->limits.num_mpts;
+ }
+
+ if (!mthca_is_memfree(dev) &&
+ (dev->mthca_flags & MTHCA_FLAG_FMR)) {
+
+ addr = pci_resource_start(dev->pdev, 4) +
+ ((pci_resource_len(dev->pdev, 4) - 1) &
+ dev->mr_table.mpt_base);
+
+ dev->mr_table.tavor_fmr.mpt_base =
+ ioremap(addr, mpts * sizeof(struct mthca_mpt_entry));
+
+ if (!dev->mr_table.tavor_fmr.mpt_base) {
+ mthca_warn(dev, "MPT ioremap for FMR failed.\n");
+ err = -ENOMEM;
+ goto err_fmr_mpt;
+ }
+
+ addr = pci_resource_start(dev->pdev, 4) +
+ ((pci_resource_len(dev->pdev, 4) - 1) &
+ dev->mr_table.mtt_base);
+
+ dev->mr_table.tavor_fmr.mtt_base =
+ ioremap(addr, mtts * MTHCA_MTT_SEG_SIZE);
+ if (!dev->mr_table.tavor_fmr.mtt_base) {
+ mthca_warn(dev, "MTT ioremap for FMR failed.\n");
+ err = -ENOMEM;
+ goto err_fmr_mtt;
+ }
+ }
+
+ if (dev->limits.fmr_reserved_mtts) {
+ err = mthca_buddy_init(&dev->mr_table.tavor_fmr.mtt_buddy, fls(mtts - 1));
+ if (err)
+ goto err_fmr_mtt_buddy;
+
+ /* Prevent regular MRs from using FMR keys */
+ err = mthca_buddy_alloc(&dev->mr_table.mtt_buddy, fls(mtts - 1));
+ if (err)
+ goto err_reserve_fmr;
+
+ dev->mr_table.fmr_mtt_buddy =
+ &dev->mr_table.tavor_fmr.mtt_buddy;
+ } else
+ dev->mr_table.fmr_mtt_buddy = &dev->mr_table.mtt_buddy;
+
+ /* FMR table is always the first, take reserved MTTs out of there */
+ if (dev->limits.reserved_mtts) {
+ i = fls(dev->limits.reserved_mtts - 1);
+
+ if (mthca_alloc_mtt_range(dev, i,
+ dev->mr_table.fmr_mtt_buddy) == -1) {
+ mthca_warn(dev, "MTT table of order %d is too small.\n",
+ dev->mr_table.fmr_mtt_buddy->max_order);
+ err = -ENOMEM;
+ goto err_reserve_mtts;
+ }
+ }
+
+ return 0;
+
+err_reserve_mtts:
+err_reserve_fmr:
+ if (dev->limits.fmr_reserved_mtts)
+ mthca_buddy_cleanup(&dev->mr_table.tavor_fmr.mtt_buddy);
+
+err_fmr_mtt_buddy:
+ if (dev->mr_table.tavor_fmr.mtt_base)
+ iounmap(dev->mr_table.tavor_fmr.mtt_base);
+
+err_fmr_mtt:
+ if (dev->mr_table.tavor_fmr.mpt_base)
+ iounmap(dev->mr_table.tavor_fmr.mpt_base);
+
+err_fmr_mpt:
+ mthca_buddy_cleanup(&dev->mr_table.mtt_buddy);
+
+err_mtt_buddy:
+ mthca_alloc_cleanup(&dev->mr_table.mpt_alloc);
+
+ return err;
+}
+
+void mthca_cleanup_mr_table(struct mthca_dev *dev)
+{
+ /* XXX check if any MRs are still allocated? */
+ if (dev->limits.fmr_reserved_mtts)
+ mthca_buddy_cleanup(&dev->mr_table.tavor_fmr.mtt_buddy);
+
+ mthca_buddy_cleanup(&dev->mr_table.mtt_buddy);
+
+ if (dev->mr_table.tavor_fmr.mtt_base)
+ iounmap(dev->mr_table.tavor_fmr.mtt_base);
+ if (dev->mr_table.tavor_fmr.mpt_base)
+ iounmap(dev->mr_table.tavor_fmr.mpt_base);
+
+ mthca_alloc_cleanup(&dev->mr_table.mpt_alloc);
+}
diff --git a/drivers/infiniband/hw/mthca/mthca_pd.c b/drivers/infiniband/hw/mthca/mthca_pd.c
new file mode 100644
index 0000000..266f14e
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_pd.c
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2004 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Cisco Systems. All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/errno.h>
+
+#include "mthca_dev.h"
+
+int mthca_pd_alloc(struct mthca_dev *dev, int privileged, struct mthca_pd *pd)
+{
+ int err = 0;
+
+ pd->privileged = privileged;
+
+ atomic_set(&pd->sqp_count, 0);
+ pd->pd_num = mthca_alloc(&dev->pd_table.alloc);
+ if (pd->pd_num == -1)
+ return -ENOMEM;
+
+ if (privileged) {
+ err = mthca_mr_alloc_notrans(dev, pd->pd_num,
+ MTHCA_MPT_FLAG_LOCAL_READ |
+ MTHCA_MPT_FLAG_LOCAL_WRITE,
+ &pd->ntmr);
+ if (err)
+ mthca_free(&dev->pd_table.alloc, pd->pd_num);
+ }
+
+ return err;
+}
+
+void mthca_pd_free(struct mthca_dev *dev, struct mthca_pd *pd)
+{
+ if (pd->privileged)
+ mthca_free_mr(dev, &pd->ntmr);
+ mthca_free(&dev->pd_table.alloc, pd->pd_num);
+}
+
+int mthca_init_pd_table(struct mthca_dev *dev)
+{
+ return mthca_alloc_init(&dev->pd_table.alloc,
+ dev->limits.num_pds,
+ (1 << 24) - 1,
+ dev->limits.reserved_pds);
+}
+
+void mthca_cleanup_pd_table(struct mthca_dev *dev)
+{
+ /* XXX check if any PDs are still allocated? */
+ mthca_alloc_cleanup(&dev->pd_table.alloc);
+}
diff --git a/drivers/infiniband/hw/mthca/mthca_profile.c b/drivers/infiniband/hw/mthca/mthca_profile.c
new file mode 100644
index 0000000..d168c25
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_profile.c
@@ -0,0 +1,285 @@
+/*
+ * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+
+#include "mthca_profile.h"
+
+enum {
+ MTHCA_RES_QP,
+ MTHCA_RES_EEC,
+ MTHCA_RES_SRQ,
+ MTHCA_RES_CQ,
+ MTHCA_RES_EQP,
+ MTHCA_RES_EEEC,
+ MTHCA_RES_EQ,
+ MTHCA_RES_RDB,
+ MTHCA_RES_MCG,
+ MTHCA_RES_MPT,
+ MTHCA_RES_MTT,
+ MTHCA_RES_UAR,
+ MTHCA_RES_UDAV,
+ MTHCA_RES_UARC,
+ MTHCA_RES_NUM
+};
+
+enum {
+ MTHCA_NUM_EQS = 32,
+ MTHCA_NUM_PDS = 1 << 15
+};
+
+s64 mthca_make_profile(struct mthca_dev *dev,
+ struct mthca_profile *request,
+ struct mthca_dev_lim *dev_lim,
+ struct mthca_init_hca_param *init_hca)
+{
+ struct mthca_resource {
+ u64 size;
+ u64 start;
+ int type;
+ int num;
+ int log_num;
+ };
+
+ u64 mem_base, mem_avail;
+ s64 total_size = 0;
+ struct mthca_resource *profile;
+ struct mthca_resource tmp;
+ int i, j;
+
+ profile = kzalloc(MTHCA_RES_NUM * sizeof *profile, GFP_KERNEL);
+ if (!profile)
+ return -ENOMEM;
+
+ profile[MTHCA_RES_QP].size = dev_lim->qpc_entry_sz;
+ profile[MTHCA_RES_EEC].size = dev_lim->eec_entry_sz;
+ profile[MTHCA_RES_SRQ].size = dev_lim->srq_entry_sz;
+ profile[MTHCA_RES_CQ].size = dev_lim->cqc_entry_sz;
+ profile[MTHCA_RES_EQP].size = dev_lim->eqpc_entry_sz;
+ profile[MTHCA_RES_EEEC].size = dev_lim->eeec_entry_sz;
+ profile[MTHCA_RES_EQ].size = dev_lim->eqc_entry_sz;
+ profile[MTHCA_RES_RDB].size = MTHCA_RDB_ENTRY_SIZE;
+ profile[MTHCA_RES_MCG].size = MTHCA_MGM_ENTRY_SIZE;
+ profile[MTHCA_RES_MPT].size = dev_lim->mpt_entry_sz;
+ profile[MTHCA_RES_MTT].size = MTHCA_MTT_SEG_SIZE;
+ profile[MTHCA_RES_UAR].size = dev_lim->uar_scratch_entry_sz;
+ profile[MTHCA_RES_UDAV].size = MTHCA_AV_SIZE;
+ profile[MTHCA_RES_UARC].size = request->uarc_size;
+
+ profile[MTHCA_RES_QP].num = request->num_qp;
+ profile[MTHCA_RES_SRQ].num = request->num_srq;
+ profile[MTHCA_RES_EQP].num = request->num_qp;
+ profile[MTHCA_RES_RDB].num = request->num_qp * request->rdb_per_qp;
+ profile[MTHCA_RES_CQ].num = request->num_cq;
+ profile[MTHCA_RES_EQ].num = MTHCA_NUM_EQS;
+ profile[MTHCA_RES_MCG].num = request->num_mcg;
+ profile[MTHCA_RES_MPT].num = request->num_mpt;
+ profile[MTHCA_RES_MTT].num = request->num_mtt;
+ profile[MTHCA_RES_UAR].num = request->num_uar;
+ profile[MTHCA_RES_UARC].num = request->num_uar;
+ profile[MTHCA_RES_UDAV].num = request->num_udav;
+
+ for (i = 0; i < MTHCA_RES_NUM; ++i) {
+ profile[i].type = i;
+ profile[i].log_num = max(ffs(profile[i].num) - 1, 0);
+ profile[i].size *= profile[i].num;
+ if (mthca_is_memfree(dev))
+ profile[i].size = max(profile[i].size, (u64) PAGE_SIZE);
+ }
+
+ if (mthca_is_memfree(dev)) {
+ mem_base = 0;
+ mem_avail = dev_lim->hca.arbel.max_icm_sz;
+ } else {
+ mem_base = dev->ddr_start;
+ mem_avail = dev->fw.tavor.fw_start - dev->ddr_start;
+ }
+
+ /*
+ * Sort the resources in decreasing order of size. Since they
+ * all have sizes that are powers of 2, we'll be able to keep
+ * resources aligned to their size and pack them without gaps
+ * using the sorted order.
+ */
+ for (i = MTHCA_RES_NUM; i > 0; --i)
+ for (j = 1; j < i; ++j) {
+ if (profile[j].size > profile[j - 1].size) {
+ tmp = profile[j];
+ profile[j] = profile[j - 1];
+ profile[j - 1] = tmp;
+ }
+ }
+
+ for (i = 0; i < MTHCA_RES_NUM; ++i) {
+ if (profile[i].size) {
+ profile[i].start = mem_base + total_size;
+ total_size += profile[i].size;
+ }
+ if (total_size > mem_avail) {
+ mthca_err(dev, "Profile requires 0x%llx bytes; "
+ "won't fit in 0x%llx bytes of context memory.\n",
+ (unsigned long long) total_size,
+ (unsigned long long) mem_avail);
+ kfree(profile);
+ return -ENOMEM;
+ }
+
+ if (profile[i].size)
+ mthca_dbg(dev, "profile[%2d]--%2d/%2d @ 0x%16llx "
+ "(size 0x%8llx)\n",
+ i, profile[i].type, profile[i].log_num,
+ (unsigned long long) profile[i].start,
+ (unsigned long long) profile[i].size);
+ }
+
+ if (mthca_is_memfree(dev))
+ mthca_dbg(dev, "HCA context memory: reserving %d KB\n",
+ (int) (total_size >> 10));
+ else
+ mthca_dbg(dev, "HCA memory: allocated %d KB/%d KB (%d KB free)\n",
+ (int) (total_size >> 10), (int) (mem_avail >> 10),
+ (int) ((mem_avail - total_size) >> 10));
+
+ for (i = 0; i < MTHCA_RES_NUM; ++i) {
+ switch (profile[i].type) {
+ case MTHCA_RES_QP:
+ dev->limits.num_qps = profile[i].num;
+ init_hca->qpc_base = profile[i].start;
+ init_hca->log_num_qps = profile[i].log_num;
+ break;
+ case MTHCA_RES_EEC:
+ dev->limits.num_eecs = profile[i].num;
+ init_hca->eec_base = profile[i].start;
+ init_hca->log_num_eecs = profile[i].log_num;
+ break;
+ case MTHCA_RES_SRQ:
+ dev->limits.num_srqs = profile[i].num;
+ init_hca->srqc_base = profile[i].start;
+ init_hca->log_num_srqs = profile[i].log_num;
+ break;
+ case MTHCA_RES_CQ:
+ dev->limits.num_cqs = profile[i].num;
+ init_hca->cqc_base = profile[i].start;
+ init_hca->log_num_cqs = profile[i].log_num;
+ break;
+ case MTHCA_RES_EQP:
+ init_hca->eqpc_base = profile[i].start;
+ break;
+ case MTHCA_RES_EEEC:
+ init_hca->eeec_base = profile[i].start;
+ break;
+ case MTHCA_RES_EQ:
+ dev->limits.num_eqs = profile[i].num;
+ init_hca->eqc_base = profile[i].start;
+ init_hca->log_num_eqs = profile[i].log_num;
+ break;
+ case MTHCA_RES_RDB:
+ for (dev->qp_table.rdb_shift = 0;
+ request->num_qp << dev->qp_table.rdb_shift < profile[i].num;
+ ++dev->qp_table.rdb_shift)
+ ; /* nothing */
+ dev->qp_table.rdb_base = (u32) profile[i].start;
+ init_hca->rdb_base = profile[i].start;
+ break;
+ case MTHCA_RES_MCG:
+ dev->limits.num_mgms = profile[i].num >> 1;
+ dev->limits.num_amgms = profile[i].num >> 1;
+ init_hca->mc_base = profile[i].start;
+ init_hca->log_mc_entry_sz = ffs(MTHCA_MGM_ENTRY_SIZE) - 1;
+ init_hca->log_mc_table_sz = profile[i].log_num;
+ init_hca->mc_hash_sz = 1 << (profile[i].log_num - 1);
+ break;
+ case MTHCA_RES_MPT:
+ dev->limits.num_mpts = profile[i].num;
+ dev->mr_table.mpt_base = profile[i].start;
+ init_hca->mpt_base = profile[i].start;
+ init_hca->log_mpt_sz = profile[i].log_num;
+ break;
+ case MTHCA_RES_MTT:
+ dev->limits.num_mtt_segs = profile[i].num;
+ dev->mr_table.mtt_base = profile[i].start;
+ init_hca->mtt_base = profile[i].start;
+ init_hca->mtt_seg_sz = ffs(MTHCA_MTT_SEG_SIZE) - 7;
+ break;
+ case MTHCA_RES_UAR:
+ dev->limits.num_uars = profile[i].num;
+ init_hca->uar_scratch_base = profile[i].start;
+ break;
+ case MTHCA_RES_UDAV:
+ dev->av_table.ddr_av_base = profile[i].start;
+ dev->av_table.num_ddr_avs = profile[i].num;
+ break;
+ case MTHCA_RES_UARC:
+ dev->uar_table.uarc_size = request->uarc_size;
+ dev->uar_table.uarc_base = profile[i].start;
+ init_hca->uarc_base = profile[i].start;
+ init_hca->log_uarc_sz = ffs(request->uarc_size) - 13;
+ init_hca->log_uar_sz = ffs(request->num_uar) - 1;
+ break;
+ default:
+ break;
+ }
+ }
+
+ /*
+ * PDs don't take any HCA memory, but we assign them as part
+ * of the HCA profile anyway.
+ */
+ dev->limits.num_pds = MTHCA_NUM_PDS;
+
+ if (dev->mthca_flags & MTHCA_FLAG_SINAI_OPT &&
+ init_hca->log_mpt_sz > 23) {
+ mthca_warn(dev, "MPT table too large (requested size 2^%d >= 2^24)\n",
+ init_hca->log_mpt_sz);
+ mthca_warn(dev, "Disabling memory key throughput optimization.\n");
+ dev->mthca_flags &= ~MTHCA_FLAG_SINAI_OPT;
+ }
+
+ /*
+ * For Tavor, FMRs use ioremapped PCI memory. For 32 bit
+ * systems it may use too much vmalloc space to map all MTT
+ * memory, so we reserve some MTTs for FMR access, taking them
+ * out of the MR pool. They don't use additional memory, but
+ * we assign them as part of the HCA profile anyway.
+ */
+ if (mthca_is_memfree(dev) || BITS_PER_LONG == 64)
+ dev->limits.fmr_reserved_mtts = 0;
+ else
+ dev->limits.fmr_reserved_mtts = request->fmr_reserved_mtts;
+
+ kfree(profile);
+ return total_size;
+}
diff --git a/drivers/infiniband/hw/mthca/mthca_profile.h b/drivers/infiniband/hw/mthca/mthca_profile.h
new file mode 100644
index 0000000..62b009c
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_profile.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef MTHCA_PROFILE_H
+#define MTHCA_PROFILE_H
+
+#include "mthca_dev.h"
+#include "mthca_cmd.h"
+
+struct mthca_profile {
+ int num_qp;
+ int rdb_per_qp;
+ int num_srq;
+ int num_cq;
+ int num_mcg;
+ int num_mpt;
+ int num_mtt;
+ int num_udav;
+ int num_uar;
+ int uarc_size;
+ int fmr_reserved_mtts;
+};
+
+s64 mthca_make_profile(struct mthca_dev *mdev,
+ struct mthca_profile *request,
+ struct mthca_dev_lim *dev_lim,
+ struct mthca_init_hca_param *init_hca);
+
+#endif /* MTHCA_PROFILE_H */
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
new file mode 100644
index 0000000..87ad889
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -0,0 +1,1424 @@
+/*
+ * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <rdma/ib_smi.h>
+#include <rdma/ib_umem.h>
+#include <rdma/ib_user_verbs.h>
+
+#include <linux/sched.h>
+#include <linux/mm.h>
+
+#include "mthca_dev.h"
+#include "mthca_cmd.h"
+#include "mthca_user.h"
+#include "mthca_memfree.h"
+
+static void init_query_mad(struct ib_smp *mad)
+{
+ mad->base_version = 1;
+ mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
+ mad->class_version = 1;
+ mad->method = IB_MGMT_METHOD_GET;
+}
+
+static int mthca_query_device(struct ib_device *ibdev,
+ struct ib_device_attr *props)
+{
+ struct ib_smp *in_mad = NULL;
+ struct ib_smp *out_mad = NULL;
+ int err = -ENOMEM;
+ struct mthca_dev *mdev = to_mdev(ibdev);
+
+ u8 status;
+
+ in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
+ out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
+ if (!in_mad || !out_mad)
+ goto out;
+
+ memset(props, 0, sizeof *props);
+
+ props->fw_ver = mdev->fw_ver;
+
+ init_query_mad(in_mad);
+ in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
+
+ err = mthca_MAD_IFC(mdev, 1, 1,
+ 1, NULL, NULL, in_mad, out_mad,
+ &status);
+ if (err)
+ goto out;
+ if (status) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ props->device_cap_flags = mdev->device_cap_flags;
+ props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) &
+ 0xffffff;
+ props->vendor_part_id = be16_to_cpup((__be16 *) (out_mad->data + 30));
+ props->hw_ver = be32_to_cpup((__be32 *) (out_mad->data + 32));
+ memcpy(&props->sys_image_guid, out_mad->data + 4, 8);
+
+ props->max_mr_size = ~0ull;
+ props->page_size_cap = mdev->limits.page_size_cap;
+ props->max_qp = mdev->limits.num_qps - mdev->limits.reserved_qps;
+ props->max_qp_wr = mdev->limits.max_wqes;
+ props->max_sge = mdev->limits.max_sg;
+ props->max_cq = mdev->limits.num_cqs - mdev->limits.reserved_cqs;
+ props->max_cqe = mdev->limits.max_cqes;
+ props->max_mr = mdev->limits.num_mpts - mdev->limits.reserved_mrws;
+ props->max_pd = mdev->limits.num_pds - mdev->limits.reserved_pds;
+ props->max_qp_rd_atom = 1 << mdev->qp_table.rdb_shift;
+ props->max_qp_init_rd_atom = mdev->limits.max_qp_init_rdma;
+ props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp;
+ props->max_srq = mdev->limits.num_srqs - mdev->limits.reserved_srqs;
+ props->max_srq_wr = mdev->limits.max_srq_wqes;
+ props->max_srq_sge = mdev->limits.max_srq_sge;
+ props->local_ca_ack_delay = mdev->limits.local_ca_ack_delay;
+ props->atomic_cap = mdev->limits.flags & DEV_LIM_FLAG_ATOMIC ?
+ IB_ATOMIC_HCA : IB_ATOMIC_NONE;
+ props->max_pkeys = mdev->limits.pkey_table_len;
+ props->max_mcast_grp = mdev->limits.num_mgms + mdev->limits.num_amgms;
+ props->max_mcast_qp_attach = MTHCA_QP_PER_MGM;
+ props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
+ props->max_mcast_grp;
+ /*
+ * If Sinai memory key optimization is being used, then only
+ * the 8-bit key portion will change. For other HCAs, the
+ * unused index bits will also be used for FMR remapping.
+ */
+ if (mdev->mthca_flags & MTHCA_FLAG_SINAI_OPT)
+ props->max_map_per_fmr = 255;
+ else
+ props->max_map_per_fmr =
+ (1 << (32 - ilog2(mdev->limits.num_mpts))) - 1;
+
+ err = 0;
+ out:
+ kfree(in_mad);
+ kfree(out_mad);
+ return err;
+}
+
+static int mthca_query_port(struct ib_device *ibdev,
+ u8 port, struct ib_port_attr *props)
+{
+ struct ib_smp *in_mad = NULL;
+ struct ib_smp *out_mad = NULL;
+ int err = -ENOMEM;
+ u8 status;
+
+ in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
+ out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
+ if (!in_mad || !out_mad)
+ goto out;
+
+ memset(props, 0, sizeof *props);
+
+ init_query_mad(in_mad);
+ in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
+ in_mad->attr_mod = cpu_to_be32(port);
+
+ err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1,
+ port, NULL, NULL, in_mad, out_mad,
+ &status);
+ if (err)
+ goto out;
+ if (status) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ props->lid = be16_to_cpup((__be16 *) (out_mad->data + 16));
+ props->lmc = out_mad->data[34] & 0x7;
+ props->sm_lid = be16_to_cpup((__be16 *) (out_mad->data + 18));
+ props->sm_sl = out_mad->data[36] & 0xf;
+ props->state = out_mad->data[32] & 0xf;
+ props->phys_state = out_mad->data[33] >> 4;
+ props->port_cap_flags = be32_to_cpup((__be32 *) (out_mad->data + 20));
+ props->gid_tbl_len = to_mdev(ibdev)->limits.gid_table_len;
+ props->max_msg_sz = 0x80000000;
+ props->pkey_tbl_len = to_mdev(ibdev)->limits.pkey_table_len;
+ props->bad_pkey_cntr = be16_to_cpup((__be16 *) (out_mad->data + 46));
+ props->qkey_viol_cntr = be16_to_cpup((__be16 *) (out_mad->data + 48));
+ props->active_width = out_mad->data[31] & 0xf;
+ props->active_speed = out_mad->data[35] >> 4;
+ props->max_mtu = out_mad->data[41] & 0xf;
+ props->active_mtu = out_mad->data[36] >> 4;
+ props->subnet_timeout = out_mad->data[51] & 0x1f;
+ props->max_vl_num = out_mad->data[37] >> 4;
+ props->init_type_reply = out_mad->data[41] >> 4;
+
+ out:
+ kfree(in_mad);
+ kfree(out_mad);
+ return err;
+}
+
+static int mthca_modify_device(struct ib_device *ibdev,
+ int mask,
+ struct ib_device_modify *props)
+{
+ if (mask & ~IB_DEVICE_MODIFY_NODE_DESC)
+ return -EOPNOTSUPP;
+
+ if (mask & IB_DEVICE_MODIFY_NODE_DESC) {
+ if (mutex_lock_interruptible(&to_mdev(ibdev)->cap_mask_mutex))
+ return -ERESTARTSYS;
+ memcpy(ibdev->node_desc, props->node_desc, 64);
+ mutex_unlock(&to_mdev(ibdev)->cap_mask_mutex);
+ }
+
+ return 0;
+}
+
+static int mthca_modify_port(struct ib_device *ibdev,
+ u8 port, int port_modify_mask,
+ struct ib_port_modify *props)
+{
+ struct mthca_set_ib_param set_ib;
+ struct ib_port_attr attr;
+ int err;
+ u8 status;
+
+ if (mutex_lock_interruptible(&to_mdev(ibdev)->cap_mask_mutex))
+ return -ERESTARTSYS;
+
+ err = mthca_query_port(ibdev, port, &attr);
+ if (err)
+ goto out;
+
+ set_ib.set_si_guid = 0;
+ set_ib.reset_qkey_viol = !!(port_modify_mask & IB_PORT_RESET_QKEY_CNTR);
+
+ set_ib.cap_mask = (attr.port_cap_flags | props->set_port_cap_mask) &
+ ~props->clr_port_cap_mask;
+
+ err = mthca_SET_IB(to_mdev(ibdev), &set_ib, port, &status);
+ if (err)
+ goto out;
+ if (status) {
+ err = -EINVAL;
+ goto out;
+ }
+
+out:
+ mutex_unlock(&to_mdev(ibdev)->cap_mask_mutex);
+ return err;
+}
+
+static int mthca_query_pkey(struct ib_device *ibdev,
+ u8 port, u16 index, u16 *pkey)
+{
+ struct ib_smp *in_mad = NULL;
+ struct ib_smp *out_mad = NULL;
+ int err = -ENOMEM;
+ u8 status;
+
+ in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
+ out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
+ if (!in_mad || !out_mad)
+ goto out;
+
+ init_query_mad(in_mad);
+ in_mad->attr_id = IB_SMP_ATTR_PKEY_TABLE;
+ in_mad->attr_mod = cpu_to_be32(index / 32);
+
+ err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1,
+ port, NULL, NULL, in_mad, out_mad,
+ &status);
+ if (err)
+ goto out;
+ if (status) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ *pkey = be16_to_cpu(((__be16 *) out_mad->data)[index % 32]);
+
+ out:
+ kfree(in_mad);
+ kfree(out_mad);
+ return err;
+}
+
+static int mthca_query_gid(struct ib_device *ibdev, u8 port,
+ int index, union ib_gid *gid)
+{
+ struct ib_smp *in_mad = NULL;
+ struct ib_smp *out_mad = NULL;
+ int err = -ENOMEM;
+ u8 status;
+
+ in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
+ out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
+ if (!in_mad || !out_mad)
+ goto out;
+
+ init_query_mad(in_mad);
+ in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
+ in_mad->attr_mod = cpu_to_be32(port);
+
+ err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1,
+ port, NULL, NULL, in_mad, out_mad,
+ &status);
+ if (err)
+ goto out;
+ if (status) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ memcpy(gid->raw, out_mad->data + 8, 8);
+
+ init_query_mad(in_mad);
+ in_mad->attr_id = IB_SMP_ATTR_GUID_INFO;
+ in_mad->attr_mod = cpu_to_be32(index / 8);
+
+ err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1,
+ port, NULL, NULL, in_mad, out_mad,
+ &status);
+ if (err)
+ goto out;
+ if (status) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ memcpy(gid->raw + 8, out_mad->data + (index % 8) * 8, 8);
+
+ out:
+ kfree(in_mad);
+ kfree(out_mad);
+ return err;
+}
+
+static struct ib_ucontext *mthca_alloc_ucontext(struct ib_device *ibdev,
+ struct ib_udata *udata)
+{
+ struct mthca_alloc_ucontext_resp uresp;
+ struct mthca_ucontext *context;
+ int err;
+
+ memset(&uresp, 0, sizeof uresp);
+
+ uresp.qp_tab_size = to_mdev(ibdev)->limits.num_qps;
+ if (mthca_is_memfree(to_mdev(ibdev)))
+ uresp.uarc_size = to_mdev(ibdev)->uar_table.uarc_size;
+ else
+ uresp.uarc_size = 0;
+
+ context = kmalloc(sizeof *context, GFP_KERNEL);
+ if (!context)
+ return ERR_PTR(-ENOMEM);
+
+ err = mthca_uar_alloc(to_mdev(ibdev), &context->uar);
+ if (err) {
+ kfree(context);
+ return ERR_PTR(err);
+ }
+
+ context->db_tab = mthca_init_user_db_tab(to_mdev(ibdev));
+ if (IS_ERR(context->db_tab)) {
+ err = PTR_ERR(context->db_tab);
+ mthca_uar_free(to_mdev(ibdev), &context->uar);
+ kfree(context);
+ return ERR_PTR(err);
+ }
+
+ if (ib_copy_to_udata(udata, &uresp, sizeof uresp)) {
+ mthca_cleanup_user_db_tab(to_mdev(ibdev), &context->uar, context->db_tab);
+ mthca_uar_free(to_mdev(ibdev), &context->uar);
+ kfree(context);
+ return ERR_PTR(-EFAULT);
+ }
+
+ context->reg_mr_warned = 0;
+
+ return &context->ibucontext;
+}
+
+static int mthca_dealloc_ucontext(struct ib_ucontext *context)
+{
+ mthca_cleanup_user_db_tab(to_mdev(context->device), &to_mucontext(context)->uar,
+ to_mucontext(context)->db_tab);
+ mthca_uar_free(to_mdev(context->device), &to_mucontext(context)->uar);
+ kfree(to_mucontext(context));
+
+ return 0;
+}
+
+static int mthca_mmap_uar(struct ib_ucontext *context,
+ struct vm_area_struct *vma)
+{
+ if (vma->vm_end - vma->vm_start != PAGE_SIZE)
+ return -EINVAL;
+
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+ if (io_remap_pfn_range(vma, vma->vm_start,
+ to_mucontext(context)->uar.pfn,
+ PAGE_SIZE, vma->vm_page_prot))
+ return -EAGAIN;
+
+ return 0;
+}
+
+static struct ib_pd *mthca_alloc_pd(struct ib_device *ibdev,
+ struct ib_ucontext *context,
+ struct ib_udata *udata)
+{
+ struct mthca_pd *pd;
+ int err;
+
+ pd = kmalloc(sizeof *pd, GFP_KERNEL);
+ if (!pd)
+ return ERR_PTR(-ENOMEM);
+
+ err = mthca_pd_alloc(to_mdev(ibdev), !context, pd);
+ if (err) {
+ kfree(pd);
+ return ERR_PTR(err);
+ }
+
+ if (context) {
+ if (ib_copy_to_udata(udata, &pd->pd_num, sizeof (__u32))) {
+ mthca_pd_free(to_mdev(ibdev), pd);
+ kfree(pd);
+ return ERR_PTR(-EFAULT);
+ }
+ }
+
+ return &pd->ibpd;
+}
+
+static int mthca_dealloc_pd(struct ib_pd *pd)
+{
+ mthca_pd_free(to_mdev(pd->device), to_mpd(pd));
+ kfree(pd);
+
+ return 0;
+}
+
+static struct ib_ah *mthca_ah_create(struct ib_pd *pd,
+ struct ib_ah_attr *ah_attr)
+{
+ int err;
+ struct mthca_ah *ah;
+
+ ah = kmalloc(sizeof *ah, GFP_ATOMIC);
+ if (!ah)
+ return ERR_PTR(-ENOMEM);
+
+ err = mthca_create_ah(to_mdev(pd->device), to_mpd(pd), ah_attr, ah);
+ if (err) {
+ kfree(ah);
+ return ERR_PTR(err);
+ }
+
+ return &ah->ibah;
+}
+
+static int mthca_ah_destroy(struct ib_ah *ah)
+{
+ mthca_destroy_ah(to_mdev(ah->device), to_mah(ah));
+ kfree(ah);
+
+ return 0;
+}
+
+static struct ib_srq *mthca_create_srq(struct ib_pd *pd,
+ struct ib_srq_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+ struct mthca_create_srq ucmd;
+ struct mthca_ucontext *context = NULL;
+ struct mthca_srq *srq;
+ int err;
+
+ srq = kmalloc(sizeof *srq, GFP_KERNEL);
+ if (!srq)
+ return ERR_PTR(-ENOMEM);
+
+ if (pd->uobject) {
+ context = to_mucontext(pd->uobject->context);
+
+ if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {
+ err = -EFAULT;
+ goto err_free;
+ }
+
+ err = mthca_map_user_db(to_mdev(pd->device), &context->uar,
+ context->db_tab, ucmd.db_index,
+ ucmd.db_page);
+
+ if (err)
+ goto err_free;
+
+ srq->mr.ibmr.lkey = ucmd.lkey;
+ srq->db_index = ucmd.db_index;
+ }
+
+ err = mthca_alloc_srq(to_mdev(pd->device), to_mpd(pd),
+ &init_attr->attr, srq);
+
+ if (err && pd->uobject)
+ mthca_unmap_user_db(to_mdev(pd->device), &context->uar,
+ context->db_tab, ucmd.db_index);
+
+ if (err)
+ goto err_free;
+
+ if (context && ib_copy_to_udata(udata, &srq->srqn, sizeof (__u32))) {
+ mthca_free_srq(to_mdev(pd->device), srq);
+ err = -EFAULT;
+ goto err_free;
+ }
+
+ return &srq->ibsrq;
+
+err_free:
+ kfree(srq);
+
+ return ERR_PTR(err);
+}
+
+static int mthca_destroy_srq(struct ib_srq *srq)
+{
+ struct mthca_ucontext *context;
+
+ if (srq->uobject) {
+ context = to_mucontext(srq->uobject->context);
+
+ mthca_unmap_user_db(to_mdev(srq->device), &context->uar,
+ context->db_tab, to_msrq(srq)->db_index);
+ }
+
+ mthca_free_srq(to_mdev(srq->device), to_msrq(srq));
+ kfree(srq);
+
+ return 0;
+}
+
+static struct ib_qp *mthca_create_qp(struct ib_pd *pd,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+ struct mthca_create_qp ucmd;
+ struct mthca_qp *qp;
+ int err;
+
+ if (init_attr->create_flags)
+ return ERR_PTR(-EINVAL);
+
+ switch (init_attr->qp_type) {
+ case IB_QPT_RC:
+ case IB_QPT_UC:
+ case IB_QPT_UD:
+ {
+ struct mthca_ucontext *context;
+
+ qp = kmalloc(sizeof *qp, GFP_KERNEL);
+ if (!qp)
+ return ERR_PTR(-ENOMEM);
+
+ if (pd->uobject) {
+ context = to_mucontext(pd->uobject->context);
+
+ if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {
+ kfree(qp);
+ return ERR_PTR(-EFAULT);
+ }
+
+ err = mthca_map_user_db(to_mdev(pd->device), &context->uar,
+ context->db_tab,
+ ucmd.sq_db_index, ucmd.sq_db_page);
+ if (err) {
+ kfree(qp);
+ return ERR_PTR(err);
+ }
+
+ err = mthca_map_user_db(to_mdev(pd->device), &context->uar,
+ context->db_tab,
+ ucmd.rq_db_index, ucmd.rq_db_page);
+ if (err) {
+ mthca_unmap_user_db(to_mdev(pd->device),
+ &context->uar,
+ context->db_tab,
+ ucmd.sq_db_index);
+ kfree(qp);
+ return ERR_PTR(err);
+ }
+
+ qp->mr.ibmr.lkey = ucmd.lkey;
+ qp->sq.db_index = ucmd.sq_db_index;
+ qp->rq.db_index = ucmd.rq_db_index;
+ }
+
+ err = mthca_alloc_qp(to_mdev(pd->device), to_mpd(pd),
+ to_mcq(init_attr->send_cq),
+ to_mcq(init_attr->recv_cq),
+ init_attr->qp_type, init_attr->sq_sig_type,
+ &init_attr->cap, qp);
+
+ if (err && pd->uobject) {
+ context = to_mucontext(pd->uobject->context);
+
+ mthca_unmap_user_db(to_mdev(pd->device),
+ &context->uar,
+ context->db_tab,
+ ucmd.sq_db_index);
+ mthca_unmap_user_db(to_mdev(pd->device),
+ &context->uar,
+ context->db_tab,
+ ucmd.rq_db_index);
+ }
+
+ qp->ibqp.qp_num = qp->qpn;
+ break;
+ }
+ case IB_QPT_SMI:
+ case IB_QPT_GSI:
+ {
+ /* Don't allow userspace to create special QPs */
+ if (pd->uobject)
+ return ERR_PTR(-EINVAL);
+
+ qp = kmalloc(sizeof (struct mthca_sqp), GFP_KERNEL);
+ if (!qp)
+ return ERR_PTR(-ENOMEM);
+
+ qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1;
+
+ err = mthca_alloc_sqp(to_mdev(pd->device), to_mpd(pd),
+ to_mcq(init_attr->send_cq),
+ to_mcq(init_attr->recv_cq),
+ init_attr->sq_sig_type, &init_attr->cap,
+ qp->ibqp.qp_num, init_attr->port_num,
+ to_msqp(qp));
+ break;
+ }
+ default:
+ /* Don't support raw QPs */
+ return ERR_PTR(-ENOSYS);
+ }
+
+ if (err) {
+ kfree(qp);
+ return ERR_PTR(err);
+ }
+
+ init_attr->cap.max_send_wr = qp->sq.max;
+ init_attr->cap.max_recv_wr = qp->rq.max;
+ init_attr->cap.max_send_sge = qp->sq.max_gs;
+ init_attr->cap.max_recv_sge = qp->rq.max_gs;
+ init_attr->cap.max_inline_data = qp->max_inline_data;
+
+ return &qp->ibqp;
+}
+
+static int mthca_destroy_qp(struct ib_qp *qp)
+{
+ if (qp->uobject) {
+ mthca_unmap_user_db(to_mdev(qp->device),
+ &to_mucontext(qp->uobject->context)->uar,
+ to_mucontext(qp->uobject->context)->db_tab,
+ to_mqp(qp)->sq.db_index);
+ mthca_unmap_user_db(to_mdev(qp->device),
+ &to_mucontext(qp->uobject->context)->uar,
+ to_mucontext(qp->uobject->context)->db_tab,
+ to_mqp(qp)->rq.db_index);
+ }
+ mthca_free_qp(to_mdev(qp->device), to_mqp(qp));
+ kfree(qp);
+ return 0;
+}
+
+static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries,
+ int comp_vector,
+ struct ib_ucontext *context,
+ struct ib_udata *udata)
+{
+ struct mthca_create_cq ucmd;
+ struct mthca_cq *cq;
+ int nent;
+ int err;
+
+ if (entries < 1 || entries > to_mdev(ibdev)->limits.max_cqes)
+ return ERR_PTR(-EINVAL);
+
+ if (context) {
+ if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd))
+ return ERR_PTR(-EFAULT);
+
+ err = mthca_map_user_db(to_mdev(ibdev), &to_mucontext(context)->uar,
+ to_mucontext(context)->db_tab,
+ ucmd.set_db_index, ucmd.set_db_page);
+ if (err)
+ return ERR_PTR(err);
+
+ err = mthca_map_user_db(to_mdev(ibdev), &to_mucontext(context)->uar,
+ to_mucontext(context)->db_tab,
+ ucmd.arm_db_index, ucmd.arm_db_page);
+ if (err)
+ goto err_unmap_set;
+ }
+
+ cq = kmalloc(sizeof *cq, GFP_KERNEL);
+ if (!cq) {
+ err = -ENOMEM;
+ goto err_unmap_arm;
+ }
+
+ if (context) {
+ cq->buf.mr.ibmr.lkey = ucmd.lkey;
+ cq->set_ci_db_index = ucmd.set_db_index;
+ cq->arm_db_index = ucmd.arm_db_index;
+ }
+
+ for (nent = 1; nent <= entries; nent <<= 1)
+ ; /* nothing */
+
+ err = mthca_init_cq(to_mdev(ibdev), nent,
+ context ? to_mucontext(context) : NULL,
+ context ? ucmd.pdn : to_mdev(ibdev)->driver_pd.pd_num,
+ cq);
+ if (err)
+ goto err_free;
+
+ if (context && ib_copy_to_udata(udata, &cq->cqn, sizeof (__u32))) {
+ mthca_free_cq(to_mdev(ibdev), cq);
+ goto err_free;
+ }
+
+ cq->resize_buf = NULL;
+
+ return &cq->ibcq;
+
+err_free:
+ kfree(cq);
+
+err_unmap_arm:
+ if (context)
+ mthca_unmap_user_db(to_mdev(ibdev), &to_mucontext(context)->uar,
+ to_mucontext(context)->db_tab, ucmd.arm_db_index);
+
+err_unmap_set:
+ if (context)
+ mthca_unmap_user_db(to_mdev(ibdev), &to_mucontext(context)->uar,
+ to_mucontext(context)->db_tab, ucmd.set_db_index);
+
+ return ERR_PTR(err);
+}
+
+static int mthca_alloc_resize_buf(struct mthca_dev *dev, struct mthca_cq *cq,
+ int entries)
+{
+ int ret;
+
+ spin_lock_irq(&cq->lock);
+ if (cq->resize_buf) {
+ ret = -EBUSY;
+ goto unlock;
+ }
+
+ cq->resize_buf = kmalloc(sizeof *cq->resize_buf, GFP_ATOMIC);
+ if (!cq->resize_buf) {
+ ret = -ENOMEM;
+ goto unlock;
+ }
+
+ cq->resize_buf->state = CQ_RESIZE_ALLOC;
+
+ ret = 0;
+
+unlock:
+ spin_unlock_irq(&cq->lock);
+
+ if (ret)
+ return ret;
+
+ ret = mthca_alloc_cq_buf(dev, &cq->resize_buf->buf, entries);
+ if (ret) {
+ spin_lock_irq(&cq->lock);
+ kfree(cq->resize_buf);
+ cq->resize_buf = NULL;
+ spin_unlock_irq(&cq->lock);
+ return ret;
+ }
+
+ cq->resize_buf->cqe = entries - 1;
+
+ spin_lock_irq(&cq->lock);
+ cq->resize_buf->state = CQ_RESIZE_READY;
+ spin_unlock_irq(&cq->lock);
+
+ return 0;
+}
+
+static int mthca_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
+{
+ struct mthca_dev *dev = to_mdev(ibcq->device);
+ struct mthca_cq *cq = to_mcq(ibcq);
+ struct mthca_resize_cq ucmd;
+ u32 lkey;
+ u8 status;
+ int ret;
+
+ if (entries < 1 || entries > dev->limits.max_cqes)
+ return -EINVAL;
+
+ mutex_lock(&cq->mutex);
+
+ entries = roundup_pow_of_two(entries + 1);
+ if (entries == ibcq->cqe + 1) {
+ ret = 0;
+ goto out;
+ }
+
+ if (cq->is_kernel) {
+ ret = mthca_alloc_resize_buf(dev, cq, entries);
+ if (ret)
+ goto out;
+ lkey = cq->resize_buf->buf.mr.ibmr.lkey;
+ } else {
+ if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {
+ ret = -EFAULT;
+ goto out;
+ }
+ lkey = ucmd.lkey;
+ }
+
+ ret = mthca_RESIZE_CQ(dev, cq->cqn, lkey, ilog2(entries), &status);
+ if (status)
+ ret = -EINVAL;
+
+ if (ret) {
+ if (cq->resize_buf) {
+ mthca_free_cq_buf(dev, &cq->resize_buf->buf,
+ cq->resize_buf->cqe);
+ kfree(cq->resize_buf);
+ spin_lock_irq(&cq->lock);
+ cq->resize_buf = NULL;
+ spin_unlock_irq(&cq->lock);
+ }
+ goto out;
+ }
+
+ if (cq->is_kernel) {
+ struct mthca_cq_buf tbuf;
+ int tcqe;
+
+ spin_lock_irq(&cq->lock);
+ if (cq->resize_buf->state == CQ_RESIZE_READY) {
+ mthca_cq_resize_copy_cqes(cq);
+ tbuf = cq->buf;
+ tcqe = cq->ibcq.cqe;
+ cq->buf = cq->resize_buf->buf;
+ cq->ibcq.cqe = cq->resize_buf->cqe;
+ } else {
+ tbuf = cq->resize_buf->buf;
+ tcqe = cq->resize_buf->cqe;
+ }
+
+ kfree(cq->resize_buf);
+ cq->resize_buf = NULL;
+ spin_unlock_irq(&cq->lock);
+
+ mthca_free_cq_buf(dev, &tbuf, tcqe);
+ } else
+ ibcq->cqe = entries - 1;
+
+out:
+ mutex_unlock(&cq->mutex);
+
+ return ret;
+}
+
+static int mthca_destroy_cq(struct ib_cq *cq)
+{
+ if (cq->uobject) {
+ mthca_unmap_user_db(to_mdev(cq->device),
+ &to_mucontext(cq->uobject->context)->uar,
+ to_mucontext(cq->uobject->context)->db_tab,
+ to_mcq(cq)->arm_db_index);
+ mthca_unmap_user_db(to_mdev(cq->device),
+ &to_mucontext(cq->uobject->context)->uar,
+ to_mucontext(cq->uobject->context)->db_tab,
+ to_mcq(cq)->set_ci_db_index);
+ }
+ mthca_free_cq(to_mdev(cq->device), to_mcq(cq));
+ kfree(cq);
+
+ return 0;
+}
+
+static inline u32 convert_access(int acc)
+{
+ return (acc & IB_ACCESS_REMOTE_ATOMIC ? MTHCA_MPT_FLAG_ATOMIC : 0) |
+ (acc & IB_ACCESS_REMOTE_WRITE ? MTHCA_MPT_FLAG_REMOTE_WRITE : 0) |
+ (acc & IB_ACCESS_REMOTE_READ ? MTHCA_MPT_FLAG_REMOTE_READ : 0) |
+ (acc & IB_ACCESS_LOCAL_WRITE ? MTHCA_MPT_FLAG_LOCAL_WRITE : 0) |
+ MTHCA_MPT_FLAG_LOCAL_READ;
+}
+
+static struct ib_mr *mthca_get_dma_mr(struct ib_pd *pd, int acc)
+{
+ struct mthca_mr *mr;
+ int err;
+
+ mr = kmalloc(sizeof *mr, GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ err = mthca_mr_alloc_notrans(to_mdev(pd->device),
+ to_mpd(pd)->pd_num,
+ convert_access(acc), mr);
+
+ if (err) {
+ kfree(mr);
+ return ERR_PTR(err);
+ }
+
+ mr->umem = NULL;
+
+ return &mr->ibmr;
+}
+
+static struct ib_mr *mthca_reg_phys_mr(struct ib_pd *pd,
+ struct ib_phys_buf *buffer_list,
+ int num_phys_buf,
+ int acc,
+ u64 *iova_start)
+{
+ struct mthca_mr *mr;
+ u64 *page_list;
+ u64 total_size;
+ unsigned long mask;
+ int shift;
+ int npages;
+ int err;
+ int i, j, n;
+
+ mask = buffer_list[0].addr ^ *iova_start;
+ total_size = 0;
+ for (i = 0; i < num_phys_buf; ++i) {
+ if (i != 0)
+ mask |= buffer_list[i].addr;
+ if (i != num_phys_buf - 1)
+ mask |= buffer_list[i].addr + buffer_list[i].size;
+
+ total_size += buffer_list[i].size;
+ }
+
+ if (mask & ~PAGE_MASK)
+ return ERR_PTR(-EINVAL);
+
+ shift = __ffs(mask | 1 << 31);
+
+ buffer_list[0].size += buffer_list[0].addr & ((1ULL << shift) - 1);
+ buffer_list[0].addr &= ~0ull << shift;
+
+ mr = kmalloc(sizeof *mr, GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ npages = 0;
+ for (i = 0; i < num_phys_buf; ++i)
+ npages += (buffer_list[i].size + (1ULL << shift) - 1) >> shift;
+
+ if (!npages)
+ return &mr->ibmr;
+
+ page_list = kmalloc(npages * sizeof *page_list, GFP_KERNEL);
+ if (!page_list) {
+ kfree(mr);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ n = 0;
+ for (i = 0; i < num_phys_buf; ++i)
+ for (j = 0;
+ j < (buffer_list[i].size + (1ULL << shift) - 1) >> shift;
+ ++j)
+ page_list[n++] = buffer_list[i].addr + ((u64) j << shift);
+
+ mthca_dbg(to_mdev(pd->device), "Registering memory at %llx (iova %llx) "
+ "in PD %x; shift %d, npages %d.\n",
+ (unsigned long long) buffer_list[0].addr,
+ (unsigned long long) *iova_start,
+ to_mpd(pd)->pd_num,
+ shift, npages);
+
+ err = mthca_mr_alloc_phys(to_mdev(pd->device),
+ to_mpd(pd)->pd_num,
+ page_list, shift, npages,
+ *iova_start, total_size,
+ convert_access(acc), mr);
+
+ if (err) {
+ kfree(page_list);
+ kfree(mr);
+ return ERR_PTR(err);
+ }
+
+ kfree(page_list);
+ mr->umem = NULL;
+
+ return &mr->ibmr;
+}
+
+static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 virt, int acc, struct ib_udata *udata)
+{
+ struct mthca_dev *dev = to_mdev(pd->device);
+ struct ib_umem_chunk *chunk;
+ struct mthca_mr *mr;
+ struct mthca_reg_mr ucmd;
+ u64 *pages;
+ int shift, n, len;
+ int i, j, k;
+ int err = 0;
+ int write_mtt_size;
+
+ if (udata->inlen - sizeof (struct ib_uverbs_cmd_hdr) < sizeof ucmd) {
+ if (!to_mucontext(pd->uobject->context)->reg_mr_warned) {
+ mthca_warn(dev, "Process '%s' did not pass in MR attrs.\n",
+ current->comm);
+ mthca_warn(dev, " Update libmthca to fix this.\n");
+ }
+ ++to_mucontext(pd->uobject->context)->reg_mr_warned;
+ ucmd.mr_attrs = 0;
+ } else if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd))
+ return ERR_PTR(-EFAULT);
+
+ mr = kmalloc(sizeof *mr, GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ mr->umem = ib_umem_get(pd->uobject->context, start, length, acc,
+ ucmd.mr_attrs & MTHCA_MR_DMASYNC);
+
+ if (IS_ERR(mr->umem)) {
+ err = PTR_ERR(mr->umem);
+ goto err;
+ }
+
+ shift = ffs(mr->umem->page_size) - 1;
+
+ n = 0;
+ list_for_each_entry(chunk, &mr->umem->chunk_list, list)
+ n += chunk->nents;
+
+ mr->mtt = mthca_alloc_mtt(dev, n);
+ if (IS_ERR(mr->mtt)) {
+ err = PTR_ERR(mr->mtt);
+ goto err_umem;
+ }
+
+ pages = (u64 *) __get_free_page(GFP_KERNEL);
+ if (!pages) {
+ err = -ENOMEM;
+ goto err_mtt;
+ }
+
+ i = n = 0;
+
+ write_mtt_size = min(mthca_write_mtt_size(dev), (int) (PAGE_SIZE / sizeof *pages));
+
+ list_for_each_entry(chunk, &mr->umem->chunk_list, list)
+ for (j = 0; j < chunk->nmap; ++j) {
+ len = sg_dma_len(&chunk->page_list[j]) >> shift;
+ for (k = 0; k < len; ++k) {
+ pages[i++] = sg_dma_address(&chunk->page_list[j]) +
+ mr->umem->page_size * k;
+ /*
+ * Be friendly to write_mtt and pass it chunks
+ * of appropriate size.
+ */
+ if (i == write_mtt_size) {
+ err = mthca_write_mtt(dev, mr->mtt, n, pages, i);
+ if (err)
+ goto mtt_done;
+ n += i;
+ i = 0;
+ }
+ }
+ }
+
+ if (i)
+ err = mthca_write_mtt(dev, mr->mtt, n, pages, i);
+mtt_done:
+ free_page((unsigned long) pages);
+ if (err)
+ goto err_mtt;
+
+ err = mthca_mr_alloc(dev, to_mpd(pd)->pd_num, shift, virt, length,
+ convert_access(acc), mr);
+
+ if (err)
+ goto err_mtt;
+
+ return &mr->ibmr;
+
+err_mtt:
+ mthca_free_mtt(dev, mr->mtt);
+
+err_umem:
+ ib_umem_release(mr->umem);
+
+err:
+ kfree(mr);
+ return ERR_PTR(err);
+}
+
+static int mthca_dereg_mr(struct ib_mr *mr)
+{
+ struct mthca_mr *mmr = to_mmr(mr);
+
+ mthca_free_mr(to_mdev(mr->device), mmr);
+ if (mmr->umem)
+ ib_umem_release(mmr->umem);
+ kfree(mmr);
+
+ return 0;
+}
+
+static struct ib_fmr *mthca_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
+ struct ib_fmr_attr *fmr_attr)
+{
+ struct mthca_fmr *fmr;
+ int err;
+
+ fmr = kmalloc(sizeof *fmr, GFP_KERNEL);
+ if (!fmr)
+ return ERR_PTR(-ENOMEM);
+
+ memcpy(&fmr->attr, fmr_attr, sizeof *fmr_attr);
+ err = mthca_fmr_alloc(to_mdev(pd->device), to_mpd(pd)->pd_num,
+ convert_access(mr_access_flags), fmr);
+
+ if (err) {
+ kfree(fmr);
+ return ERR_PTR(err);
+ }
+
+ return &fmr->ibmr;
+}
+
+static int mthca_dealloc_fmr(struct ib_fmr *fmr)
+{
+ struct mthca_fmr *mfmr = to_mfmr(fmr);
+ int err;
+
+ err = mthca_free_fmr(to_mdev(fmr->device), mfmr);
+ if (err)
+ return err;
+
+ kfree(mfmr);
+ return 0;
+}
+
+static int mthca_unmap_fmr(struct list_head *fmr_list)
+{
+ struct ib_fmr *fmr;
+ int err;
+ u8 status;
+ struct mthca_dev *mdev = NULL;
+
+ list_for_each_entry(fmr, fmr_list, list) {
+ if (mdev && to_mdev(fmr->device) != mdev)
+ return -EINVAL;
+ mdev = to_mdev(fmr->device);
+ }
+
+ if (!mdev)
+ return 0;
+
+ if (mthca_is_memfree(mdev)) {
+ list_for_each_entry(fmr, fmr_list, list)
+ mthca_arbel_fmr_unmap(mdev, to_mfmr(fmr));
+
+ wmb();
+ } else
+ list_for_each_entry(fmr, fmr_list, list)
+ mthca_tavor_fmr_unmap(mdev, to_mfmr(fmr));
+
+ err = mthca_SYNC_TPT(mdev, &status);
+ if (err)
+ return err;
+ if (status)
+ return -EINVAL;
+ return 0;
+}
+
+static ssize_t show_rev(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ struct mthca_dev *dev =
+ container_of(device, struct mthca_dev, ib_dev.dev);
+ return sprintf(buf, "%x\n", dev->rev_id);
+}
+
+static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ struct mthca_dev *dev =
+ container_of(device, struct mthca_dev, ib_dev.dev);
+ return sprintf(buf, "%d.%d.%d\n", (int) (dev->fw_ver >> 32),
+ (int) (dev->fw_ver >> 16) & 0xffff,
+ (int) dev->fw_ver & 0xffff);
+}
+
+static ssize_t show_hca(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ struct mthca_dev *dev =
+ container_of(device, struct mthca_dev, ib_dev.dev);
+ switch (dev->pdev->device) {
+ case PCI_DEVICE_ID_MELLANOX_TAVOR:
+ return sprintf(buf, "MT23108\n");
+ case PCI_DEVICE_ID_MELLANOX_ARBEL_COMPAT:
+ return sprintf(buf, "MT25208 (MT23108 compat mode)\n");
+ case PCI_DEVICE_ID_MELLANOX_ARBEL:
+ return sprintf(buf, "MT25208\n");
+ case PCI_DEVICE_ID_MELLANOX_SINAI:
+ case PCI_DEVICE_ID_MELLANOX_SINAI_OLD:
+ return sprintf(buf, "MT25204\n");
+ default:
+ return sprintf(buf, "unknown\n");
+ }
+}
+
+static ssize_t show_board(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ struct mthca_dev *dev =
+ container_of(device, struct mthca_dev, ib_dev.dev);
+ return sprintf(buf, "%.*s\n", MTHCA_BOARD_ID_LEN, dev->board_id);
+}
+
+static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
+static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
+static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
+static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
+
+static struct device_attribute *mthca_dev_attributes[] = {
+ &dev_attr_hw_rev,
+ &dev_attr_fw_ver,
+ &dev_attr_hca_type,
+ &dev_attr_board_id
+};
+
+static int mthca_init_node_data(struct mthca_dev *dev)
+{
+ struct ib_smp *in_mad = NULL;
+ struct ib_smp *out_mad = NULL;
+ int err = -ENOMEM;
+ u8 status;
+
+ in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
+ out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
+ if (!in_mad || !out_mad)
+ goto out;
+
+ init_query_mad(in_mad);
+ in_mad->attr_id = IB_SMP_ATTR_NODE_DESC;
+
+ err = mthca_MAD_IFC(dev, 1, 1,
+ 1, NULL, NULL, in_mad, out_mad,
+ &status);
+ if (err)
+ goto out;
+ if (status) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ memcpy(dev->ib_dev.node_desc, out_mad->data, 64);
+
+ in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
+
+ err = mthca_MAD_IFC(dev, 1, 1,
+ 1, NULL, NULL, in_mad, out_mad,
+ &status);
+ if (err)
+ goto out;
+ if (status) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (mthca_is_memfree(dev))
+ dev->rev_id = be32_to_cpup((__be32 *) (out_mad->data + 32));
+ memcpy(&dev->ib_dev.node_guid, out_mad->data + 12, 8);
+
+out:
+ kfree(in_mad);
+ kfree(out_mad);
+ return err;
+}
+
+int mthca_register_device(struct mthca_dev *dev)
+{
+ int ret;
+ int i;
+
+ ret = mthca_init_node_data(dev);
+ if (ret)
+ return ret;
+
+ strlcpy(dev->ib_dev.name, "mthca%d", IB_DEVICE_NAME_MAX);
+ dev->ib_dev.owner = THIS_MODULE;
+
+ dev->ib_dev.uverbs_abi_ver = MTHCA_UVERBS_ABI_VERSION;
+ dev->ib_dev.uverbs_cmd_mask =
+ (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
+ (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
+ (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
+ (1ull << IB_USER_VERBS_CMD_REG_MR) |
+ (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
+ (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
+ (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
+ (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) |
+ (1ull << IB_USER_VERBS_CMD_DETACH_MCAST);
+ dev->ib_dev.node_type = RDMA_NODE_IB_CA;
+ dev->ib_dev.phys_port_cnt = dev->limits.num_ports;
+ dev->ib_dev.num_comp_vectors = 1;
+ dev->ib_dev.dma_device = &dev->pdev->dev;
+ dev->ib_dev.query_device = mthca_query_device;
+ dev->ib_dev.query_port = mthca_query_port;
+ dev->ib_dev.modify_device = mthca_modify_device;
+ dev->ib_dev.modify_port = mthca_modify_port;
+ dev->ib_dev.query_pkey = mthca_query_pkey;
+ dev->ib_dev.query_gid = mthca_query_gid;
+ dev->ib_dev.alloc_ucontext = mthca_alloc_ucontext;
+ dev->ib_dev.dealloc_ucontext = mthca_dealloc_ucontext;
+ dev->ib_dev.mmap = mthca_mmap_uar;
+ dev->ib_dev.alloc_pd = mthca_alloc_pd;
+ dev->ib_dev.dealloc_pd = mthca_dealloc_pd;
+ dev->ib_dev.create_ah = mthca_ah_create;
+ dev->ib_dev.query_ah = mthca_ah_query;
+ dev->ib_dev.destroy_ah = mthca_ah_destroy;
+
+ if (dev->mthca_flags & MTHCA_FLAG_SRQ) {
+ dev->ib_dev.create_srq = mthca_create_srq;
+ dev->ib_dev.modify_srq = mthca_modify_srq;
+ dev->ib_dev.query_srq = mthca_query_srq;
+ dev->ib_dev.destroy_srq = mthca_destroy_srq;
+ dev->ib_dev.uverbs_cmd_mask |=
+ (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
+ (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ);
+
+ if (mthca_is_memfree(dev))
+ dev->ib_dev.post_srq_recv = mthca_arbel_post_srq_recv;
+ else
+ dev->ib_dev.post_srq_recv = mthca_tavor_post_srq_recv;
+ }
+
+ dev->ib_dev.create_qp = mthca_create_qp;
+ dev->ib_dev.modify_qp = mthca_modify_qp;
+ dev->ib_dev.query_qp = mthca_query_qp;
+ dev->ib_dev.destroy_qp = mthca_destroy_qp;
+ dev->ib_dev.create_cq = mthca_create_cq;
+ dev->ib_dev.resize_cq = mthca_resize_cq;
+ dev->ib_dev.destroy_cq = mthca_destroy_cq;
+ dev->ib_dev.poll_cq = mthca_poll_cq;
+ dev->ib_dev.get_dma_mr = mthca_get_dma_mr;
+ dev->ib_dev.reg_phys_mr = mthca_reg_phys_mr;
+ dev->ib_dev.reg_user_mr = mthca_reg_user_mr;
+ dev->ib_dev.dereg_mr = mthca_dereg_mr;
+
+ if (dev->mthca_flags & MTHCA_FLAG_FMR) {
+ dev->ib_dev.alloc_fmr = mthca_alloc_fmr;
+ dev->ib_dev.unmap_fmr = mthca_unmap_fmr;
+ dev->ib_dev.dealloc_fmr = mthca_dealloc_fmr;
+ if (mthca_is_memfree(dev))
+ dev->ib_dev.map_phys_fmr = mthca_arbel_map_phys_fmr;
+ else
+ dev->ib_dev.map_phys_fmr = mthca_tavor_map_phys_fmr;
+ }
+
+ dev->ib_dev.attach_mcast = mthca_multicast_attach;
+ dev->ib_dev.detach_mcast = mthca_multicast_detach;
+ dev->ib_dev.process_mad = mthca_process_mad;
+
+ if (mthca_is_memfree(dev)) {
+ dev->ib_dev.req_notify_cq = mthca_arbel_arm_cq;
+ dev->ib_dev.post_send = mthca_arbel_post_send;
+ dev->ib_dev.post_recv = mthca_arbel_post_receive;
+ } else {
+ dev->ib_dev.req_notify_cq = mthca_tavor_arm_cq;
+ dev->ib_dev.post_send = mthca_tavor_post_send;
+ dev->ib_dev.post_recv = mthca_tavor_post_receive;
+ }
+
+ mutex_init(&dev->cap_mask_mutex);
+
+ ret = ib_register_device(&dev->ib_dev);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < ARRAY_SIZE(mthca_dev_attributes); ++i) {
+ ret = device_create_file(&dev->ib_dev.dev,
+ mthca_dev_attributes[i]);
+ if (ret) {
+ ib_unregister_device(&dev->ib_dev);
+ return ret;
+ }
+ }
+
+ mthca_start_catas_poll(dev);
+
+ return 0;
+}
+
+void mthca_unregister_device(struct mthca_dev *dev)
+{
+ mthca_stop_catas_poll(dev);
+ ib_unregister_device(&dev->ib_dev);
+}
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.h b/drivers/infiniband/hw/mthca/mthca_provider.h
new file mode 100644
index 0000000..c621f87
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_provider.h
@@ -0,0 +1,343 @@
+/*
+ * Copyright (c) 2004 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef MTHCA_PROVIDER_H
+#define MTHCA_PROVIDER_H
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_pack.h>
+
+#define MTHCA_MPT_FLAG_ATOMIC (1 << 14)
+#define MTHCA_MPT_FLAG_REMOTE_WRITE (1 << 13)
+#define MTHCA_MPT_FLAG_REMOTE_READ (1 << 12)
+#define MTHCA_MPT_FLAG_LOCAL_WRITE (1 << 11)
+#define MTHCA_MPT_FLAG_LOCAL_READ (1 << 10)
+
+struct mthca_buf_list {
+ void *buf;
+ DECLARE_PCI_UNMAP_ADDR(mapping)
+};
+
+union mthca_buf {
+ struct mthca_buf_list direct;
+ struct mthca_buf_list *page_list;
+};
+
+struct mthca_uar {
+ unsigned long pfn;
+ int index;
+};
+
+struct mthca_user_db_table;
+
+struct mthca_ucontext {
+ struct ib_ucontext ibucontext;
+ struct mthca_uar uar;
+ struct mthca_user_db_table *db_tab;
+ int reg_mr_warned;
+};
+
+struct mthca_mtt;
+
+struct mthca_mr {
+ struct ib_mr ibmr;
+ struct ib_umem *umem;
+ struct mthca_mtt *mtt;
+};
+
+struct mthca_fmr {
+ struct ib_fmr ibmr;
+ struct ib_fmr_attr attr;
+ struct mthca_mtt *mtt;
+ int maps;
+ union {
+ struct {
+ struct mthca_mpt_entry __iomem *mpt;
+ u64 __iomem *mtts;
+ } tavor;
+ struct {
+ struct mthca_mpt_entry *mpt;
+ __be64 *mtts;
+ dma_addr_t dma_handle;
+ } arbel;
+ } mem;
+};
+
+struct mthca_pd {
+ struct ib_pd ibpd;
+ u32 pd_num;
+ atomic_t sqp_count;
+ struct mthca_mr ntmr;
+ int privileged;
+};
+
+struct mthca_eq {
+ struct mthca_dev *dev;
+ int eqn;
+ u32 eqn_mask;
+ u32 cons_index;
+ u16 msi_x_vector;
+ u16 msi_x_entry;
+ int have_irq;
+ int nent;
+ struct mthca_buf_list *page_list;
+ struct mthca_mr mr;
+};
+
+struct mthca_av;
+
+enum mthca_ah_type {
+ MTHCA_AH_ON_HCA,
+ MTHCA_AH_PCI_POOL,
+ MTHCA_AH_KMALLOC
+};
+
+struct mthca_ah {
+ struct ib_ah ibah;
+ enum mthca_ah_type type;
+ u32 key;
+ struct mthca_av *av;
+ dma_addr_t avdma;
+};
+
+/*
+ * Quick description of our CQ/QP locking scheme:
+ *
+ * We have one global lock that protects dev->cq/qp_table. Each
+ * struct mthca_cq/qp also has its own lock. An individual qp lock
+ * may be taken inside of an individual cq lock. Both cqs attached to
+ * a qp may be locked, with the cq with the lower cqn locked first.
+ * No other nesting should be done.
+ *
+ * Each struct mthca_cq/qp also has an ref count, protected by the
+ * corresponding table lock. The pointer from the cq/qp_table to the
+ * struct counts as one reference. This reference also is good for
+ * access through the consumer API, so modifying the CQ/QP etc doesn't
+ * need to take another reference. Access to a QP because of a
+ * completion being polled does not need a reference either.
+ *
+ * Finally, each struct mthca_cq/qp has a wait_queue_head_t for the
+ * destroy function to sleep on.
+ *
+ * This means that access from the consumer API requires nothing but
+ * taking the struct's lock.
+ *
+ * Access because of a completion event should go as follows:
+ * - lock cq/qp_table and look up struct
+ * - increment ref count in struct
+ * - drop cq/qp_table lock
+ * - lock struct, do your thing, and unlock struct
+ * - decrement ref count; if zero, wake up waiters
+ *
+ * To destroy a CQ/QP, we can do the following:
+ * - lock cq/qp_table
+ * - remove pointer and decrement ref count
+ * - unlock cq/qp_table lock
+ * - wait_event until ref count is zero
+ *
+ * It is the consumer's responsibilty to make sure that no QP
+ * operations (WQE posting or state modification) are pending when a
+ * QP is destroyed. Also, the consumer must make sure that calls to
+ * qp_modify are serialized. Similarly, the consumer is responsible
+ * for ensuring that no CQ resize operations are pending when a CQ
+ * is destroyed.
+ *
+ * Possible optimizations (wait for profile data to see if/where we
+ * have locks bouncing between CPUs):
+ * - split cq/qp table lock into n separate (cache-aligned) locks,
+ * indexed (say) by the page in the table
+ * - split QP struct lock into three (one for common info, one for the
+ * send queue and one for the receive queue)
+ */
+
+struct mthca_cq_buf {
+ union mthca_buf queue;
+ struct mthca_mr mr;
+ int is_direct;
+};
+
+struct mthca_cq_resize {
+ struct mthca_cq_buf buf;
+ int cqe;
+ enum {
+ CQ_RESIZE_ALLOC,
+ CQ_RESIZE_READY,
+ CQ_RESIZE_SWAPPED
+ } state;
+};
+
+struct mthca_cq {
+ struct ib_cq ibcq;
+ spinlock_t lock;
+ int refcount;
+ int cqn;
+ u32 cons_index;
+ struct mthca_cq_buf buf;
+ struct mthca_cq_resize *resize_buf;
+ int is_kernel;
+
+ /* Next fields are Arbel only */
+ int set_ci_db_index;
+ __be32 *set_ci_db;
+ int arm_db_index;
+ __be32 *arm_db;
+ int arm_sn;
+
+ wait_queue_head_t wait;
+ struct mutex mutex;
+};
+
+struct mthca_srq {
+ struct ib_srq ibsrq;
+ spinlock_t lock;
+ int refcount;
+ int srqn;
+ int max;
+ int max_gs;
+ int wqe_shift;
+ int first_free;
+ int last_free;
+ u16 counter; /* Arbel only */
+ int db_index; /* Arbel only */
+ __be32 *db; /* Arbel only */
+ void *last;
+
+ int is_direct;
+ u64 *wrid;
+ union mthca_buf queue;
+ struct mthca_mr mr;
+
+ wait_queue_head_t wait;
+ struct mutex mutex;
+};
+
+struct mthca_wq {
+ spinlock_t lock;
+ int max;
+ unsigned next_ind;
+ unsigned last_comp;
+ unsigned head;
+ unsigned tail;
+ void *last;
+ int max_gs;
+ int wqe_shift;
+
+ int db_index; /* Arbel only */
+ __be32 *db;
+};
+
+struct mthca_qp {
+ struct ib_qp ibqp;
+ int refcount;
+ u32 qpn;
+ int is_direct;
+ u8 port; /* for SQP and memfree use only */
+ u8 alt_port; /* for memfree use only */
+ u8 transport;
+ u8 state;
+ u8 atomic_rd_en;
+ u8 resp_depth;
+
+ struct mthca_mr mr;
+
+ struct mthca_wq rq;
+ struct mthca_wq sq;
+ enum ib_sig_type sq_policy;
+ int send_wqe_offset;
+ int max_inline_data;
+
+ u64 *wrid;
+ union mthca_buf queue;
+
+ wait_queue_head_t wait;
+ struct mutex mutex;
+};
+
+struct mthca_sqp {
+ struct mthca_qp qp;
+ int pkey_index;
+ u32 qkey;
+ u32 send_psn;
+ struct ib_ud_header ud_header;
+ int header_buf_size;
+ void *header_buf;
+ dma_addr_t header_dma;
+};
+
+static inline struct mthca_ucontext *to_mucontext(struct ib_ucontext *ibucontext)
+{
+ return container_of(ibucontext, struct mthca_ucontext, ibucontext);
+}
+
+static inline struct mthca_fmr *to_mfmr(struct ib_fmr *ibmr)
+{
+ return container_of(ibmr, struct mthca_fmr, ibmr);
+}
+
+static inline struct mthca_mr *to_mmr(struct ib_mr *ibmr)
+{
+ return container_of(ibmr, struct mthca_mr, ibmr);
+}
+
+static inline struct mthca_pd *to_mpd(struct ib_pd *ibpd)
+{
+ return container_of(ibpd, struct mthca_pd, ibpd);
+}
+
+static inline struct mthca_ah *to_mah(struct ib_ah *ibah)
+{
+ return container_of(ibah, struct mthca_ah, ibah);
+}
+
+static inline struct mthca_cq *to_mcq(struct ib_cq *ibcq)
+{
+ return container_of(ibcq, struct mthca_cq, ibcq);
+}
+
+static inline struct mthca_srq *to_msrq(struct ib_srq *ibsrq)
+{
+ return container_of(ibsrq, struct mthca_srq, ibsrq);
+}
+
+static inline struct mthca_qp *to_mqp(struct ib_qp *ibqp)
+{
+ return container_of(ibqp, struct mthca_qp, ibqp);
+}
+
+static inline struct mthca_sqp *to_msqp(struct mthca_qp *qp)
+{
+ return container_of(qp, struct mthca_sqp, qp);
+}
+
+#endif /* MTHCA_PROVIDER_H */
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
new file mode 100644
index 0000000..f5081bf
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -0,0 +1,2323 @@
+/*
+ * Copyright (c) 2004 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Cisco Systems. All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+
+#include <asm/io.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_cache.h>
+#include <rdma/ib_pack.h>
+
+#include "mthca_dev.h"
+#include "mthca_cmd.h"
+#include "mthca_memfree.h"
+#include "mthca_wqe.h"
+
+enum {
+ MTHCA_MAX_DIRECT_QP_SIZE = 4 * PAGE_SIZE,
+ MTHCA_ACK_REQ_FREQ = 10,
+ MTHCA_FLIGHT_LIMIT = 9,
+ MTHCA_UD_HEADER_SIZE = 72, /* largest UD header possible */
+ MTHCA_INLINE_HEADER_SIZE = 4, /* data segment overhead for inline */
+ MTHCA_INLINE_CHUNK_SIZE = 16 /* inline data segment chunk */
+};
+
+enum {
+ MTHCA_QP_STATE_RST = 0,
+ MTHCA_QP_STATE_INIT = 1,
+ MTHCA_QP_STATE_RTR = 2,
+ MTHCA_QP_STATE_RTS = 3,
+ MTHCA_QP_STATE_SQE = 4,
+ MTHCA_QP_STATE_SQD = 5,
+ MTHCA_QP_STATE_ERR = 6,
+ MTHCA_QP_STATE_DRAINING = 7
+};
+
+enum {
+ MTHCA_QP_ST_RC = 0x0,
+ MTHCA_QP_ST_UC = 0x1,
+ MTHCA_QP_ST_RD = 0x2,
+ MTHCA_QP_ST_UD = 0x3,
+ MTHCA_QP_ST_MLX = 0x7
+};
+
+enum {
+ MTHCA_QP_PM_MIGRATED = 0x3,
+ MTHCA_QP_PM_ARMED = 0x0,
+ MTHCA_QP_PM_REARM = 0x1
+};
+
+enum {
+ /* qp_context flags */
+ MTHCA_QP_BIT_DE = 1 << 8,
+ /* params1 */
+ MTHCA_QP_BIT_SRE = 1 << 15,
+ MTHCA_QP_BIT_SWE = 1 << 14,
+ MTHCA_QP_BIT_SAE = 1 << 13,
+ MTHCA_QP_BIT_SIC = 1 << 4,
+ MTHCA_QP_BIT_SSC = 1 << 3,
+ /* params2 */
+ MTHCA_QP_BIT_RRE = 1 << 15,
+ MTHCA_QP_BIT_RWE = 1 << 14,
+ MTHCA_QP_BIT_RAE = 1 << 13,
+ MTHCA_QP_BIT_RIC = 1 << 4,
+ MTHCA_QP_BIT_RSC = 1 << 3
+};
+
+enum {
+ MTHCA_SEND_DOORBELL_FENCE = 1 << 5
+};
+
+struct mthca_qp_path {
+ __be32 port_pkey;
+ u8 rnr_retry;
+ u8 g_mylmc;
+ __be16 rlid;
+ u8 ackto;
+ u8 mgid_index;
+ u8 static_rate;
+ u8 hop_limit;
+ __be32 sl_tclass_flowlabel;
+ u8 rgid[16];
+} __attribute__((packed));
+
+struct mthca_qp_context {
+ __be32 flags;
+ __be32 tavor_sched_queue; /* Reserved on Arbel */
+ u8 mtu_msgmax;
+ u8 rq_size_stride; /* Reserved on Tavor */
+ u8 sq_size_stride; /* Reserved on Tavor */
+ u8 rlkey_arbel_sched_queue; /* Reserved on Tavor */
+ __be32 usr_page;
+ __be32 local_qpn;
+ __be32 remote_qpn;
+ u32 reserved1[2];
+ struct mthca_qp_path pri_path;
+ struct mthca_qp_path alt_path;
+ __be32 rdd;
+ __be32 pd;
+ __be32 wqe_base;
+ __be32 wqe_lkey;
+ __be32 params1;
+ __be32 reserved2;
+ __be32 next_send_psn;
+ __be32 cqn_snd;
+ __be32 snd_wqe_base_l; /* Next send WQE on Tavor */
+ __be32 snd_db_index; /* (debugging only entries) */
+ __be32 last_acked_psn;
+ __be32 ssn;
+ __be32 params2;
+ __be32 rnr_nextrecvpsn;
+ __be32 ra_buff_indx;
+ __be32 cqn_rcv;
+ __be32 rcv_wqe_base_l; /* Next recv WQE on Tavor */
+ __be32 rcv_db_index; /* (debugging only entries) */
+ __be32 qkey;
+ __be32 srqn;
+ __be32 rmsn;
+ __be16 rq_wqe_counter; /* reserved on Tavor */
+ __be16 sq_wqe_counter; /* reserved on Tavor */
+ u32 reserved3[18];
+} __attribute__((packed));
+
+struct mthca_qp_param {
+ __be32 opt_param_mask;
+ u32 reserved1;
+ struct mthca_qp_context context;
+ u32 reserved2[62];
+} __attribute__((packed));
+
+enum {
+ MTHCA_QP_OPTPAR_ALT_ADDR_PATH = 1 << 0,
+ MTHCA_QP_OPTPAR_RRE = 1 << 1,
+ MTHCA_QP_OPTPAR_RAE = 1 << 2,
+ MTHCA_QP_OPTPAR_RWE = 1 << 3,
+ MTHCA_QP_OPTPAR_PKEY_INDEX = 1 << 4,
+ MTHCA_QP_OPTPAR_Q_KEY = 1 << 5,
+ MTHCA_QP_OPTPAR_RNR_TIMEOUT = 1 << 6,
+ MTHCA_QP_OPTPAR_PRIMARY_ADDR_PATH = 1 << 7,
+ MTHCA_QP_OPTPAR_SRA_MAX = 1 << 8,
+ MTHCA_QP_OPTPAR_RRA_MAX = 1 << 9,
+ MTHCA_QP_OPTPAR_PM_STATE = 1 << 10,
+ MTHCA_QP_OPTPAR_PORT_NUM = 1 << 11,
+ MTHCA_QP_OPTPAR_RETRY_COUNT = 1 << 12,
+ MTHCA_QP_OPTPAR_ALT_RNR_RETRY = 1 << 13,
+ MTHCA_QP_OPTPAR_ACK_TIMEOUT = 1 << 14,
+ MTHCA_QP_OPTPAR_RNR_RETRY = 1 << 15,
+ MTHCA_QP_OPTPAR_SCHED_QUEUE = 1 << 16
+};
+
+static const u8 mthca_opcode[] = {
+ [IB_WR_SEND] = MTHCA_OPCODE_SEND,
+ [IB_WR_SEND_WITH_IMM] = MTHCA_OPCODE_SEND_IMM,
+ [IB_WR_RDMA_WRITE] = MTHCA_OPCODE_RDMA_WRITE,
+ [IB_WR_RDMA_WRITE_WITH_IMM] = MTHCA_OPCODE_RDMA_WRITE_IMM,
+ [IB_WR_RDMA_READ] = MTHCA_OPCODE_RDMA_READ,
+ [IB_WR_ATOMIC_CMP_AND_SWP] = MTHCA_OPCODE_ATOMIC_CS,
+ [IB_WR_ATOMIC_FETCH_AND_ADD] = MTHCA_OPCODE_ATOMIC_FA,
+};
+
+static int is_sqp(struct mthca_dev *dev, struct mthca_qp *qp)
+{
+ return qp->qpn >= dev->qp_table.sqp_start &&
+ qp->qpn <= dev->qp_table.sqp_start + 3;
+}
+
+static int is_qp0(struct mthca_dev *dev, struct mthca_qp *qp)
+{
+ return qp->qpn >= dev->qp_table.sqp_start &&
+ qp->qpn <= dev->qp_table.sqp_start + 1;
+}
+
+static void *get_recv_wqe(struct mthca_qp *qp, int n)
+{
+ if (qp->is_direct)
+ return qp->queue.direct.buf + (n << qp->rq.wqe_shift);
+ else
+ return qp->queue.page_list[(n << qp->rq.wqe_shift) >> PAGE_SHIFT].buf +
+ ((n << qp->rq.wqe_shift) & (PAGE_SIZE - 1));
+}
+
+static void *get_send_wqe(struct mthca_qp *qp, int n)
+{
+ if (qp->is_direct)
+ return qp->queue.direct.buf + qp->send_wqe_offset +
+ (n << qp->sq.wqe_shift);
+ else
+ return qp->queue.page_list[(qp->send_wqe_offset +
+ (n << qp->sq.wqe_shift)) >>
+ PAGE_SHIFT].buf +
+ ((qp->send_wqe_offset + (n << qp->sq.wqe_shift)) &
+ (PAGE_SIZE - 1));
+}
+
+static void mthca_wq_reset(struct mthca_wq *wq)
+{
+ wq->next_ind = 0;
+ wq->last_comp = wq->max - 1;
+ wq->head = 0;
+ wq->tail = 0;
+}
+
+void mthca_qp_event(struct mthca_dev *dev, u32 qpn,
+ enum ib_event_type event_type)
+{
+ struct mthca_qp *qp;
+ struct ib_event event;
+
+ spin_lock(&dev->qp_table.lock);
+ qp = mthca_array_get(&dev->qp_table.qp, qpn & (dev->limits.num_qps - 1));
+ if (qp)
+ ++qp->refcount;
+ spin_unlock(&dev->qp_table.lock);
+
+ if (!qp) {
+ mthca_warn(dev, "Async event for bogus QP %08x\n", qpn);
+ return;
+ }
+
+ if (event_type == IB_EVENT_PATH_MIG)
+ qp->port = qp->alt_port;
+
+ event.device = &dev->ib_dev;
+ event.event = event_type;
+ event.element.qp = &qp->ibqp;
+ if (qp->ibqp.event_handler)
+ qp->ibqp.event_handler(&event, qp->ibqp.qp_context);
+
+ spin_lock(&dev->qp_table.lock);
+ if (!--qp->refcount)
+ wake_up(&qp->wait);
+ spin_unlock(&dev->qp_table.lock);
+}
+
+static int to_mthca_state(enum ib_qp_state ib_state)
+{
+ switch (ib_state) {
+ case IB_QPS_RESET: return MTHCA_QP_STATE_RST;
+ case IB_QPS_INIT: return MTHCA_QP_STATE_INIT;
+ case IB_QPS_RTR: return MTHCA_QP_STATE_RTR;
+ case IB_QPS_RTS: return MTHCA_QP_STATE_RTS;
+ case IB_QPS_SQD: return MTHCA_QP_STATE_SQD;
+ case IB_QPS_SQE: return MTHCA_QP_STATE_SQE;
+ case IB_QPS_ERR: return MTHCA_QP_STATE_ERR;
+ default: return -1;
+ }
+}
+
+enum { RC, UC, UD, RD, RDEE, MLX, NUM_TRANS };
+
+static int to_mthca_st(int transport)
+{
+ switch (transport) {
+ case RC: return MTHCA_QP_ST_RC;
+ case UC: return MTHCA_QP_ST_UC;
+ case UD: return MTHCA_QP_ST_UD;
+ case RD: return MTHCA_QP_ST_RD;
+ case MLX: return MTHCA_QP_ST_MLX;
+ default: return -1;
+ }
+}
+
+static void store_attrs(struct mthca_sqp *sqp, const struct ib_qp_attr *attr,
+ int attr_mask)
+{
+ if (attr_mask & IB_QP_PKEY_INDEX)
+ sqp->pkey_index = attr->pkey_index;
+ if (attr_mask & IB_QP_QKEY)
+ sqp->qkey = attr->qkey;
+ if (attr_mask & IB_QP_SQ_PSN)
+ sqp->send_psn = attr->sq_psn;
+}
+
+static void init_port(struct mthca_dev *dev, int port)
+{
+ int err;
+ u8 status;
+ struct mthca_init_ib_param param;
+
+ memset(&param, 0, sizeof param);
+
+ param.port_width = dev->limits.port_width_cap;
+ param.vl_cap = dev->limits.vl_cap;
+ param.mtu_cap = dev->limits.mtu_cap;
+ param.gid_cap = dev->limits.gid_table_len;
+ param.pkey_cap = dev->limits.pkey_table_len;
+
+ err = mthca_INIT_IB(dev, &param, port, &status);
+ if (err)
+ mthca_warn(dev, "INIT_IB failed, return code %d.\n", err);
+ if (status)
+ mthca_warn(dev, "INIT_IB returned status %02x.\n", status);
+}
+
+static __be32 get_hw_access_flags(struct mthca_qp *qp, const struct ib_qp_attr *attr,
+ int attr_mask)
+{
+ u8 dest_rd_atomic;
+ u32 access_flags;
+ u32 hw_access_flags = 0;
+
+ if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
+ dest_rd_atomic = attr->max_dest_rd_atomic;
+ else
+ dest_rd_atomic = qp->resp_depth;
+
+ if (attr_mask & IB_QP_ACCESS_FLAGS)
+ access_flags = attr->qp_access_flags;
+ else
+ access_flags = qp->atomic_rd_en;
+
+ if (!dest_rd_atomic)
+ access_flags &= IB_ACCESS_REMOTE_WRITE;
+
+ if (access_flags & IB_ACCESS_REMOTE_READ)
+ hw_access_flags |= MTHCA_QP_BIT_RRE;
+ if (access_flags & IB_ACCESS_REMOTE_ATOMIC)
+ hw_access_flags |= MTHCA_QP_BIT_RAE;
+ if (access_flags & IB_ACCESS_REMOTE_WRITE)
+ hw_access_flags |= MTHCA_QP_BIT_RWE;
+
+ return cpu_to_be32(hw_access_flags);
+}
+
+static inline enum ib_qp_state to_ib_qp_state(int mthca_state)
+{
+ switch (mthca_state) {
+ case MTHCA_QP_STATE_RST: return IB_QPS_RESET;
+ case MTHCA_QP_STATE_INIT: return IB_QPS_INIT;
+ case MTHCA_QP_STATE_RTR: return IB_QPS_RTR;
+ case MTHCA_QP_STATE_RTS: return IB_QPS_RTS;
+ case MTHCA_QP_STATE_DRAINING:
+ case MTHCA_QP_STATE_SQD: return IB_QPS_SQD;
+ case MTHCA_QP_STATE_SQE: return IB_QPS_SQE;
+ case MTHCA_QP_STATE_ERR: return IB_QPS_ERR;
+ default: return -1;
+ }
+}
+
+static inline enum ib_mig_state to_ib_mig_state(int mthca_mig_state)
+{
+ switch (mthca_mig_state) {
+ case 0: return IB_MIG_ARMED;
+ case 1: return IB_MIG_REARM;
+ case 3: return IB_MIG_MIGRATED;
+ default: return -1;
+ }
+}
+
+static int to_ib_qp_access_flags(int mthca_flags)
+{
+ int ib_flags = 0;
+
+ if (mthca_flags & MTHCA_QP_BIT_RRE)
+ ib_flags |= IB_ACCESS_REMOTE_READ;
+ if (mthca_flags & MTHCA_QP_BIT_RWE)
+ ib_flags |= IB_ACCESS_REMOTE_WRITE;
+ if (mthca_flags & MTHCA_QP_BIT_RAE)
+ ib_flags |= IB_ACCESS_REMOTE_ATOMIC;
+
+ return ib_flags;
+}
+
+static void to_ib_ah_attr(struct mthca_dev *dev, struct ib_ah_attr *ib_ah_attr,
+ struct mthca_qp_path *path)
+{
+ memset(ib_ah_attr, 0, sizeof *ib_ah_attr);
+ ib_ah_attr->port_num = (be32_to_cpu(path->port_pkey) >> 24) & 0x3;
+
+ if (ib_ah_attr->port_num == 0 || ib_ah_attr->port_num > dev->limits.num_ports)
+ return;
+
+ ib_ah_attr->dlid = be16_to_cpu(path->rlid);
+ ib_ah_attr->sl = be32_to_cpu(path->sl_tclass_flowlabel) >> 28;
+ ib_ah_attr->src_path_bits = path->g_mylmc & 0x7f;
+ ib_ah_attr->static_rate = mthca_rate_to_ib(dev,
+ path->static_rate & 0xf,
+ ib_ah_attr->port_num);
+ ib_ah_attr->ah_flags = (path->g_mylmc & (1 << 7)) ? IB_AH_GRH : 0;
+ if (ib_ah_attr->ah_flags) {
+ ib_ah_attr->grh.sgid_index = path->mgid_index & (dev->limits.gid_table_len - 1);
+ ib_ah_attr->grh.hop_limit = path->hop_limit;
+ ib_ah_attr->grh.traffic_class =
+ (be32_to_cpu(path->sl_tclass_flowlabel) >> 20) & 0xff;
+ ib_ah_attr->grh.flow_label =
+ be32_to_cpu(path->sl_tclass_flowlabel) & 0xfffff;
+ memcpy(ib_ah_attr->grh.dgid.raw,
+ path->rgid, sizeof ib_ah_attr->grh.dgid.raw);
+ }
+}
+
+int mthca_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,
+ struct ib_qp_init_attr *qp_init_attr)
+{
+ struct mthca_dev *dev = to_mdev(ibqp->device);
+ struct mthca_qp *qp = to_mqp(ibqp);
+ int err = 0;
+ struct mthca_mailbox *mailbox = NULL;
+ struct mthca_qp_param *qp_param;
+ struct mthca_qp_context *context;
+ int mthca_state;
+ u8 status;
+
+ mutex_lock(&qp->mutex);
+
+ if (qp->state == IB_QPS_RESET) {
+ qp_attr->qp_state = IB_QPS_RESET;
+ goto done;
+ }
+
+ mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(mailbox)) {
+ err = PTR_ERR(mailbox);
+ goto out;
+ }
+
+ err = mthca_QUERY_QP(dev, qp->qpn, 0, mailbox, &status);
+ if (err)
+ goto out_mailbox;
+ if (status) {
+ mthca_warn(dev, "QUERY_QP returned status %02x\n", status);
+ err = -EINVAL;
+ goto out_mailbox;
+ }
+
+ qp_param = mailbox->buf;
+ context = &qp_param->context;
+ mthca_state = be32_to_cpu(context->flags) >> 28;
+
+ qp->state = to_ib_qp_state(mthca_state);
+ qp_attr->qp_state = qp->state;
+ qp_attr->path_mtu = context->mtu_msgmax >> 5;
+ qp_attr->path_mig_state =
+ to_ib_mig_state((be32_to_cpu(context->flags) >> 11) & 0x3);
+ qp_attr->qkey = be32_to_cpu(context->qkey);
+ qp_attr->rq_psn = be32_to_cpu(context->rnr_nextrecvpsn) & 0xffffff;
+ qp_attr->sq_psn = be32_to_cpu(context->next_send_psn) & 0xffffff;
+ qp_attr->dest_qp_num = be32_to_cpu(context->remote_qpn) & 0xffffff;
+ qp_attr->qp_access_flags =
+ to_ib_qp_access_flags(be32_to_cpu(context->params2));
+
+ if (qp->transport == RC || qp->transport == UC) {
+ to_ib_ah_attr(dev, &qp_attr->ah_attr, &context->pri_path);
+ to_ib_ah_attr(dev, &qp_attr->alt_ah_attr, &context->alt_path);
+ qp_attr->alt_pkey_index =
+ be32_to_cpu(context->alt_path.port_pkey) & 0x7f;
+ qp_attr->alt_port_num = qp_attr->alt_ah_attr.port_num;
+ }
+
+ qp_attr->pkey_index = be32_to_cpu(context->pri_path.port_pkey) & 0x7f;
+ qp_attr->port_num =
+ (be32_to_cpu(context->pri_path.port_pkey) >> 24) & 0x3;
+
+ /* qp_attr->en_sqd_async_notify is only applicable in modify qp */
+ qp_attr->sq_draining = mthca_state == MTHCA_QP_STATE_DRAINING;
+
+ qp_attr->max_rd_atomic = 1 << ((be32_to_cpu(context->params1) >> 21) & 0x7);
+
+ qp_attr->max_dest_rd_atomic =
+ 1 << ((be32_to_cpu(context->params2) >> 21) & 0x7);
+ qp_attr->min_rnr_timer =
+ (be32_to_cpu(context->rnr_nextrecvpsn) >> 24) & 0x1f;
+ qp_attr->timeout = context->pri_path.ackto >> 3;
+ qp_attr->retry_cnt = (be32_to_cpu(context->params1) >> 16) & 0x7;
+ qp_attr->rnr_retry = context->pri_path.rnr_retry >> 5;
+ qp_attr->alt_timeout = context->alt_path.ackto >> 3;
+
+done:
+ qp_attr->cur_qp_state = qp_attr->qp_state;
+ qp_attr->cap.max_send_wr = qp->sq.max;
+ qp_attr->cap.max_recv_wr = qp->rq.max;
+ qp_attr->cap.max_send_sge = qp->sq.max_gs;
+ qp_attr->cap.max_recv_sge = qp->rq.max_gs;
+ qp_attr->cap.max_inline_data = qp->max_inline_data;
+
+ qp_init_attr->cap = qp_attr->cap;
+
+out_mailbox:
+ mthca_free_mailbox(dev, mailbox);
+
+out:
+ mutex_unlock(&qp->mutex);
+ return err;
+}
+
+static int mthca_path_set(struct mthca_dev *dev, const struct ib_ah_attr *ah,
+ struct mthca_qp_path *path, u8 port)
+{
+ path->g_mylmc = ah->src_path_bits & 0x7f;
+ path->rlid = cpu_to_be16(ah->dlid);
+ path->static_rate = mthca_get_rate(dev, ah->static_rate, port);
+
+ if (ah->ah_flags & IB_AH_GRH) {
+ if (ah->grh.sgid_index >= dev->limits.gid_table_len) {
+ mthca_dbg(dev, "sgid_index (%u) too large. max is %d\n",
+ ah->grh.sgid_index, dev->limits.gid_table_len-1);
+ return -1;
+ }
+
+ path->g_mylmc |= 1 << 7;
+ path->mgid_index = ah->grh.sgid_index;
+ path->hop_limit = ah->grh.hop_limit;
+ path->sl_tclass_flowlabel =
+ cpu_to_be32((ah->sl << 28) |
+ (ah->grh.traffic_class << 20) |
+ (ah->grh.flow_label));
+ memcpy(path->rgid, ah->grh.dgid.raw, 16);
+ } else
+ path->sl_tclass_flowlabel = cpu_to_be32(ah->sl << 28);
+
+ return 0;
+}
+
+static int __mthca_modify_qp(struct ib_qp *ibqp,
+ const struct ib_qp_attr *attr, int attr_mask,
+ enum ib_qp_state cur_state, enum ib_qp_state new_state)
+{
+ struct mthca_dev *dev = to_mdev(ibqp->device);
+ struct mthca_qp *qp = to_mqp(ibqp);
+ struct mthca_mailbox *mailbox;
+ struct mthca_qp_param *qp_param;
+ struct mthca_qp_context *qp_context;
+ u32 sqd_event = 0;
+ u8 status;
+ int err = -EINVAL;
+
+ mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(mailbox)) {
+ err = PTR_ERR(mailbox);
+ goto out;
+ }
+ qp_param = mailbox->buf;
+ qp_context = &qp_param->context;
+ memset(qp_param, 0, sizeof *qp_param);
+
+ qp_context->flags = cpu_to_be32((to_mthca_state(new_state) << 28) |
+ (to_mthca_st(qp->transport) << 16));
+ qp_context->flags |= cpu_to_be32(MTHCA_QP_BIT_DE);
+ if (!(attr_mask & IB_QP_PATH_MIG_STATE))
+ qp_context->flags |= cpu_to_be32(MTHCA_QP_PM_MIGRATED << 11);
+ else {
+ qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_PM_STATE);
+ switch (attr->path_mig_state) {
+ case IB_MIG_MIGRATED:
+ qp_context->flags |= cpu_to_be32(MTHCA_QP_PM_MIGRATED << 11);
+ break;
+ case IB_MIG_REARM:
+ qp_context->flags |= cpu_to_be32(MTHCA_QP_PM_REARM << 11);
+ break;
+ case IB_MIG_ARMED:
+ qp_context->flags |= cpu_to_be32(MTHCA_QP_PM_ARMED << 11);
+ break;
+ }
+ }
+
+ /* leave tavor_sched_queue as 0 */
+
+ if (qp->transport == MLX || qp->transport == UD)
+ qp_context->mtu_msgmax = (IB_MTU_2048 << 5) | 11;
+ else if (attr_mask & IB_QP_PATH_MTU) {
+ if (attr->path_mtu < IB_MTU_256 || attr->path_mtu > IB_MTU_2048) {
+ mthca_dbg(dev, "path MTU (%u) is invalid\n",
+ attr->path_mtu);
+ goto out_mailbox;
+ }
+ qp_context->mtu_msgmax = (attr->path_mtu << 5) | 31;
+ }
+
+ if (mthca_is_memfree(dev)) {
+ if (qp->rq.max)
+ qp_context->rq_size_stride = ilog2(qp->rq.max) << 3;
+ qp_context->rq_size_stride |= qp->rq.wqe_shift - 4;
+
+ if (qp->sq.max)
+ qp_context->sq_size_stride = ilog2(qp->sq.max) << 3;
+ qp_context->sq_size_stride |= qp->sq.wqe_shift - 4;
+ }
+
+ /* leave arbel_sched_queue as 0 */
+
+ if (qp->ibqp.uobject)
+ qp_context->usr_page =
+ cpu_to_be32(to_mucontext(qp->ibqp.uobject->context)->uar.index);
+ else
+ qp_context->usr_page = cpu_to_be32(dev->driver_uar.index);
+ qp_context->local_qpn = cpu_to_be32(qp->qpn);
+ if (attr_mask & IB_QP_DEST_QPN) {
+ qp_context->remote_qpn = cpu_to_be32(attr->dest_qp_num);
+ }
+
+ if (qp->transport == MLX)
+ qp_context->pri_path.port_pkey |=
+ cpu_to_be32(qp->port << 24);
+ else {
+ if (attr_mask & IB_QP_PORT) {
+ qp_context->pri_path.port_pkey |=
+ cpu_to_be32(attr->port_num << 24);
+ qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_PORT_NUM);
+ }
+ }
+
+ if (attr_mask & IB_QP_PKEY_INDEX) {
+ qp_context->pri_path.port_pkey |=
+ cpu_to_be32(attr->pkey_index);
+ qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_PKEY_INDEX);
+ }
+
+ if (attr_mask & IB_QP_RNR_RETRY) {
+ qp_context->alt_path.rnr_retry = qp_context->pri_path.rnr_retry =
+ attr->rnr_retry << 5;
+ qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RNR_RETRY |
+ MTHCA_QP_OPTPAR_ALT_RNR_RETRY);
+ }
+
+ if (attr_mask & IB_QP_AV) {
+ if (mthca_path_set(dev, &attr->ah_attr, &qp_context->pri_path,
+ attr_mask & IB_QP_PORT ? attr->port_num : qp->port))
+ goto out_mailbox;
+
+ qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_PRIMARY_ADDR_PATH);
+ }
+
+ if (ibqp->qp_type == IB_QPT_RC &&
+ cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) {
+ u8 sched_queue = ibqp->uobject ? 0x2 : 0x1;
+
+ if (mthca_is_memfree(dev))
+ qp_context->rlkey_arbel_sched_queue |= sched_queue;
+ else
+ qp_context->tavor_sched_queue |= cpu_to_be32(sched_queue);
+
+ qp_param->opt_param_mask |=
+ cpu_to_be32(MTHCA_QP_OPTPAR_SCHED_QUEUE);
+ }
+
+ if (attr_mask & IB_QP_TIMEOUT) {
+ qp_context->pri_path.ackto = attr->timeout << 3;
+ qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_ACK_TIMEOUT);
+ }
+
+ if (attr_mask & IB_QP_ALT_PATH) {
+ if (attr->alt_pkey_index >= dev->limits.pkey_table_len) {
+ mthca_dbg(dev, "Alternate P_Key index (%u) too large. max is %d\n",
+ attr->alt_pkey_index, dev->limits.pkey_table_len-1);
+ goto out_mailbox;
+ }
+
+ if (attr->alt_port_num == 0 || attr->alt_port_num > dev->limits.num_ports) {
+ mthca_dbg(dev, "Alternate port number (%u) is invalid\n",
+ attr->alt_port_num);
+ goto out_mailbox;
+ }
+
+ if (mthca_path_set(dev, &attr->alt_ah_attr, &qp_context->alt_path,
+ attr->alt_ah_attr.port_num))
+ goto out_mailbox;
+
+ qp_context->alt_path.port_pkey |= cpu_to_be32(attr->alt_pkey_index |
+ attr->alt_port_num << 24);
+ qp_context->alt_path.ackto = attr->alt_timeout << 3;
+ qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_ALT_ADDR_PATH);
+ }
+
+ /* leave rdd as 0 */
+ qp_context->pd = cpu_to_be32(to_mpd(ibqp->pd)->pd_num);
+ /* leave wqe_base as 0 (we always create an MR based at 0 for WQs) */
+ qp_context->wqe_lkey = cpu_to_be32(qp->mr.ibmr.lkey);
+ qp_context->params1 = cpu_to_be32((MTHCA_ACK_REQ_FREQ << 28) |
+ (MTHCA_FLIGHT_LIMIT << 24) |
+ MTHCA_QP_BIT_SWE);
+ if (qp->sq_policy == IB_SIGNAL_ALL_WR)
+ qp_context->params1 |= cpu_to_be32(MTHCA_QP_BIT_SSC);
+ if (attr_mask & IB_QP_RETRY_CNT) {
+ qp_context->params1 |= cpu_to_be32(attr->retry_cnt << 16);
+ qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RETRY_COUNT);
+ }
+
+ if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
+ if (attr->max_rd_atomic) {
+ qp_context->params1 |=
+ cpu_to_be32(MTHCA_QP_BIT_SRE |
+ MTHCA_QP_BIT_SAE);
+ qp_context->params1 |=
+ cpu_to_be32(fls(attr->max_rd_atomic - 1) << 21);
+ }
+ qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_SRA_MAX);
+ }
+
+ if (attr_mask & IB_QP_SQ_PSN)
+ qp_context->next_send_psn = cpu_to_be32(attr->sq_psn);
+ qp_context->cqn_snd = cpu_to_be32(to_mcq(ibqp->send_cq)->cqn);
+
+ if (mthca_is_memfree(dev)) {
+ qp_context->snd_wqe_base_l = cpu_to_be32(qp->send_wqe_offset);
+ qp_context->snd_db_index = cpu_to_be32(qp->sq.db_index);
+ }
+
+ if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
+ if (attr->max_dest_rd_atomic)
+ qp_context->params2 |=
+ cpu_to_be32(fls(attr->max_dest_rd_atomic - 1) << 21);
+
+ qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RRA_MAX);
+ }
+
+ if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC)) {
+ qp_context->params2 |= get_hw_access_flags(qp, attr, attr_mask);
+ qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RWE |
+ MTHCA_QP_OPTPAR_RRE |
+ MTHCA_QP_OPTPAR_RAE);
+ }
+
+ qp_context->params2 |= cpu_to_be32(MTHCA_QP_BIT_RSC);
+
+ if (ibqp->srq)
+ qp_context->params2 |= cpu_to_be32(MTHCA_QP_BIT_RIC);
+
+ if (attr_mask & IB_QP_MIN_RNR_TIMER) {
+ qp_context->rnr_nextrecvpsn |= cpu_to_be32(attr->min_rnr_timer << 24);
+ qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RNR_TIMEOUT);
+ }
+ if (attr_mask & IB_QP_RQ_PSN)
+ qp_context->rnr_nextrecvpsn |= cpu_to_be32(attr->rq_psn);
+
+ qp_context->ra_buff_indx =
+ cpu_to_be32(dev->qp_table.rdb_base +
+ ((qp->qpn & (dev->limits.num_qps - 1)) * MTHCA_RDB_ENTRY_SIZE <<
+ dev->qp_table.rdb_shift));
+
+ qp_context->cqn_rcv = cpu_to_be32(to_mcq(ibqp->recv_cq)->cqn);
+
+ if (mthca_is_memfree(dev))
+ qp_context->rcv_db_index = cpu_to_be32(qp->rq.db_index);
+
+ if (attr_mask & IB_QP_QKEY) {
+ qp_context->qkey = cpu_to_be32(attr->qkey);
+ qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_Q_KEY);
+ }
+
+ if (ibqp->srq)
+ qp_context->srqn = cpu_to_be32(1 << 24 |
+ to_msrq(ibqp->srq)->srqn);
+
+ if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD &&
+ attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY &&
+ attr->en_sqd_async_notify)
+ sqd_event = 1 << 31;
+
+ err = mthca_MODIFY_QP(dev, cur_state, new_state, qp->qpn, 0,
+ mailbox, sqd_event, &status);
+ if (err)
+ goto out_mailbox;
+ if (status) {
+ mthca_warn(dev, "modify QP %d->%d returned status %02x.\n",
+ cur_state, new_state, status);
+ err = -EINVAL;
+ goto out_mailbox;
+ }
+
+ qp->state = new_state;
+ if (attr_mask & IB_QP_ACCESS_FLAGS)
+ qp->atomic_rd_en = attr->qp_access_flags;
+ if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
+ qp->resp_depth = attr->max_dest_rd_atomic;
+ if (attr_mask & IB_QP_PORT)
+ qp->port = attr->port_num;
+ if (attr_mask & IB_QP_ALT_PATH)
+ qp->alt_port = attr->alt_port_num;
+
+ if (is_sqp(dev, qp))
+ store_attrs(to_msqp(qp), attr, attr_mask);
+
+ /*
+ * If we moved QP0 to RTR, bring the IB link up; if we moved
+ * QP0 to RESET or ERROR, bring the link back down.
+ */
+ if (is_qp0(dev, qp)) {
+ if (cur_state != IB_QPS_RTR &&
+ new_state == IB_QPS_RTR)
+ init_port(dev, qp->port);
+
+ if (cur_state != IB_QPS_RESET &&
+ cur_state != IB_QPS_ERR &&
+ (new_state == IB_QPS_RESET ||
+ new_state == IB_QPS_ERR))
+ mthca_CLOSE_IB(dev, qp->port, &status);
+ }
+
+ /*
+ * If we moved a kernel QP to RESET, clean up all old CQ
+ * entries and reinitialize the QP.
+ */
+ if (new_state == IB_QPS_RESET && !qp->ibqp.uobject) {
+ mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq), qp->qpn,
+ qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
+ if (qp->ibqp.send_cq != qp->ibqp.recv_cq)
+ mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq), qp->qpn, NULL);
+
+ mthca_wq_reset(&qp->sq);
+ qp->sq.last = get_send_wqe(qp, qp->sq.max - 1);
+
+ mthca_wq_reset(&qp->rq);
+ qp->rq.last = get_recv_wqe(qp, qp->rq.max - 1);
+
+ if (mthca_is_memfree(dev)) {
+ *qp->sq.db = 0;
+ *qp->rq.db = 0;
+ }
+ }
+
+out_mailbox:
+ mthca_free_mailbox(dev, mailbox);
+out:
+ return err;
+}
+
+int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
+ struct ib_udata *udata)
+{
+ struct mthca_dev *dev = to_mdev(ibqp->device);
+ struct mthca_qp *qp = to_mqp(ibqp);
+ enum ib_qp_state cur_state, new_state;
+ int err = -EINVAL;
+
+ mutex_lock(&qp->mutex);
+ if (attr_mask & IB_QP_CUR_STATE) {
+ cur_state = attr->cur_qp_state;
+ } else {
+ spin_lock_irq(&qp->sq.lock);
+ spin_lock(&qp->rq.lock);
+ cur_state = qp->state;
+ spin_unlock(&qp->rq.lock);
+ spin_unlock_irq(&qp->sq.lock);
+ }
+
+ new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
+
+ if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask)) {
+ mthca_dbg(dev, "Bad QP transition (transport %d) "
+ "%d->%d with attr 0x%08x\n",
+ qp->transport, cur_state, new_state,
+ attr_mask);
+ goto out;
+ }
+
+ if ((attr_mask & IB_QP_PKEY_INDEX) &&
+ attr->pkey_index >= dev->limits.pkey_table_len) {
+ mthca_dbg(dev, "P_Key index (%u) too large. max is %d\n",
+ attr->pkey_index, dev->limits.pkey_table_len-1);
+ goto out;
+ }
+
+ if ((attr_mask & IB_QP_PORT) &&
+ (attr->port_num == 0 || attr->port_num > dev->limits.num_ports)) {
+ mthca_dbg(dev, "Port number (%u) is invalid\n", attr->port_num);
+ goto out;
+ }
+
+ if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
+ attr->max_rd_atomic > dev->limits.max_qp_init_rdma) {
+ mthca_dbg(dev, "Max rdma_atomic as initiator %u too large (max is %d)\n",
+ attr->max_rd_atomic, dev->limits.max_qp_init_rdma);
+ goto out;
+ }
+
+ if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
+ attr->max_dest_rd_atomic > 1 << dev->qp_table.rdb_shift) {
+ mthca_dbg(dev, "Max rdma_atomic as responder %u too large (max %d)\n",
+ attr->max_dest_rd_atomic, 1 << dev->qp_table.rdb_shift);
+ goto out;
+ }
+
+ if (cur_state == new_state && cur_state == IB_QPS_RESET) {
+ err = 0;
+ goto out;
+ }
+
+ err = __mthca_modify_qp(ibqp, attr, attr_mask, cur_state, new_state);
+
+out:
+ mutex_unlock(&qp->mutex);
+ return err;
+}
+
+static int mthca_max_data_size(struct mthca_dev *dev, struct mthca_qp *qp, int desc_sz)
+{
+ /*
+ * Calculate the maximum size of WQE s/g segments, excluding
+ * the next segment and other non-data segments.
+ */
+ int max_data_size = desc_sz - sizeof (struct mthca_next_seg);
+
+ switch (qp->transport) {
+ case MLX:
+ max_data_size -= 2 * sizeof (struct mthca_data_seg);
+ break;
+
+ case UD:
+ if (mthca_is_memfree(dev))
+ max_data_size -= sizeof (struct mthca_arbel_ud_seg);
+ else
+ max_data_size -= sizeof (struct mthca_tavor_ud_seg);
+ break;
+
+ default:
+ max_data_size -= sizeof (struct mthca_raddr_seg);
+ break;
+ }
+
+ return max_data_size;
+}
+
+static inline int mthca_max_inline_data(struct mthca_pd *pd, int max_data_size)
+{
+ /* We don't support inline data for kernel QPs (yet). */
+ return pd->ibpd.uobject ? max_data_size - MTHCA_INLINE_HEADER_SIZE : 0;
+}
+
+static void mthca_adjust_qp_caps(struct mthca_dev *dev,
+ struct mthca_pd *pd,
+ struct mthca_qp *qp)
+{
+ int max_data_size = mthca_max_data_size(dev, qp,
+ min(dev->limits.max_desc_sz,
+ 1 << qp->sq.wqe_shift));
+
+ qp->max_inline_data = mthca_max_inline_data(pd, max_data_size);
+
+ qp->sq.max_gs = min_t(int, dev->limits.max_sg,
+ max_data_size / sizeof (struct mthca_data_seg));
+ qp->rq.max_gs = min_t(int, dev->limits.max_sg,
+ (min(dev->limits.max_desc_sz, 1 << qp->rq.wqe_shift) -
+ sizeof (struct mthca_next_seg)) /
+ sizeof (struct mthca_data_seg));
+}
+
+/*
+ * Allocate and register buffer for WQEs. qp->rq.max, sq.max,
+ * rq.max_gs and sq.max_gs must all be assigned.
+ * mthca_alloc_wqe_buf will calculate rq.wqe_shift and
+ * sq.wqe_shift (as well as send_wqe_offset, is_direct, and
+ * queue)
+ */
+static int mthca_alloc_wqe_buf(struct mthca_dev *dev,
+ struct mthca_pd *pd,
+ struct mthca_qp *qp)
+{
+ int size;
+ int err = -ENOMEM;
+
+ size = sizeof (struct mthca_next_seg) +
+ qp->rq.max_gs * sizeof (struct mthca_data_seg);
+
+ if (size > dev->limits.max_desc_sz)
+ return -EINVAL;
+
+ for (qp->rq.wqe_shift = 6; 1 << qp->rq.wqe_shift < size;
+ qp->rq.wqe_shift++)
+ ; /* nothing */
+
+ size = qp->sq.max_gs * sizeof (struct mthca_data_seg);
+ switch (qp->transport) {
+ case MLX:
+ size += 2 * sizeof (struct mthca_data_seg);
+ break;
+
+ case UD:
+ size += mthca_is_memfree(dev) ?
+ sizeof (struct mthca_arbel_ud_seg) :
+ sizeof (struct mthca_tavor_ud_seg);
+ break;
+
+ case UC:
+ size += sizeof (struct mthca_raddr_seg);
+ break;
+
+ case RC:
+ size += sizeof (struct mthca_raddr_seg);
+ /*
+ * An atomic op will require an atomic segment, a
+ * remote address segment and one scatter entry.
+ */
+ size = max_t(int, size,
+ sizeof (struct mthca_atomic_seg) +
+ sizeof (struct mthca_raddr_seg) +
+ sizeof (struct mthca_data_seg));
+ break;
+
+ default:
+ break;
+ }
+
+ /* Make sure that we have enough space for a bind request */
+ size = max_t(int, size, sizeof (struct mthca_bind_seg));
+
+ size += sizeof (struct mthca_next_seg);
+
+ if (size > dev->limits.max_desc_sz)
+ return -EINVAL;
+
+ for (qp->sq.wqe_shift = 6; 1 << qp->sq.wqe_shift < size;
+ qp->sq.wqe_shift++)
+ ; /* nothing */
+
+ qp->send_wqe_offset = ALIGN(qp->rq.max << qp->rq.wqe_shift,
+ 1 << qp->sq.wqe_shift);
+
+ /*
+ * If this is a userspace QP, we don't actually have to
+ * allocate anything. All we need is to calculate the WQE
+ * sizes and the send_wqe_offset, so we're done now.
+ */
+ if (pd->ibpd.uobject)
+ return 0;
+
+ size = PAGE_ALIGN(qp->send_wqe_offset +
+ (qp->sq.max << qp->sq.wqe_shift));
+
+ qp->wrid = kmalloc((qp->rq.max + qp->sq.max) * sizeof (u64),
+ GFP_KERNEL);
+ if (!qp->wrid)
+ goto err_out;
+
+ err = mthca_buf_alloc(dev, size, MTHCA_MAX_DIRECT_QP_SIZE,
+ &qp->queue, &qp->is_direct, pd, 0, &qp->mr);
+ if (err)
+ goto err_out;
+
+ return 0;
+
+err_out:
+ kfree(qp->wrid);
+ return err;
+}
+
+static void mthca_free_wqe_buf(struct mthca_dev *dev,
+ struct mthca_qp *qp)
+{
+ mthca_buf_free(dev, PAGE_ALIGN(qp->send_wqe_offset +
+ (qp->sq.max << qp->sq.wqe_shift)),
+ &qp->queue, qp->is_direct, &qp->mr);
+ kfree(qp->wrid);
+}
+
+static int mthca_map_memfree(struct mthca_dev *dev,
+ struct mthca_qp *qp)
+{
+ int ret;
+
+ if (mthca_is_memfree(dev)) {
+ ret = mthca_table_get(dev, dev->qp_table.qp_table, qp->qpn);
+ if (ret)
+ return ret;
+
+ ret = mthca_table_get(dev, dev->qp_table.eqp_table, qp->qpn);
+ if (ret)
+ goto err_qpc;
+
+ ret = mthca_table_get(dev, dev->qp_table.rdb_table,
+ qp->qpn << dev->qp_table.rdb_shift);
+ if (ret)
+ goto err_eqpc;
+
+ }
+
+ return 0;
+
+err_eqpc:
+ mthca_table_put(dev, dev->qp_table.eqp_table, qp->qpn);
+
+err_qpc:
+ mthca_table_put(dev, dev->qp_table.qp_table, qp->qpn);
+
+ return ret;
+}
+
+static void mthca_unmap_memfree(struct mthca_dev *dev,
+ struct mthca_qp *qp)
+{
+ mthca_table_put(dev, dev->qp_table.rdb_table,
+ qp->qpn << dev->qp_table.rdb_shift);
+ mthca_table_put(dev, dev->qp_table.eqp_table, qp->qpn);
+ mthca_table_put(dev, dev->qp_table.qp_table, qp->qpn);
+}
+
+static int mthca_alloc_memfree(struct mthca_dev *dev,
+ struct mthca_qp *qp)
+{
+ if (mthca_is_memfree(dev)) {
+ qp->rq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_RQ,
+ qp->qpn, &qp->rq.db);
+ if (qp->rq.db_index < 0)
+ return -ENOMEM;
+
+ qp->sq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_SQ,
+ qp->qpn, &qp->sq.db);
+ if (qp->sq.db_index < 0) {
+ mthca_free_db(dev, MTHCA_DB_TYPE_RQ, qp->rq.db_index);
+ return -ENOMEM;
+ }
+ }
+
+ return 0;
+}
+
+static void mthca_free_memfree(struct mthca_dev *dev,
+ struct mthca_qp *qp)
+{
+ if (mthca_is_memfree(dev)) {
+ mthca_free_db(dev, MTHCA_DB_TYPE_SQ, qp->sq.db_index);
+ mthca_free_db(dev, MTHCA_DB_TYPE_RQ, qp->rq.db_index);
+ }
+}
+
+static int mthca_alloc_qp_common(struct mthca_dev *dev,
+ struct mthca_pd *pd,
+ struct mthca_cq *send_cq,
+ struct mthca_cq *recv_cq,
+ enum ib_sig_type send_policy,
+ struct mthca_qp *qp)
+{
+ int ret;
+ int i;
+ struct mthca_next_seg *next;
+
+ qp->refcount = 1;
+ init_waitqueue_head(&qp->wait);
+ mutex_init(&qp->mutex);
+ qp->state = IB_QPS_RESET;
+ qp->atomic_rd_en = 0;
+ qp->resp_depth = 0;
+ qp->sq_policy = send_policy;
+ mthca_wq_reset(&qp->sq);
+ mthca_wq_reset(&qp->rq);
+
+ spin_lock_init(&qp->sq.lock);
+ spin_lock_init(&qp->rq.lock);
+
+ ret = mthca_map_memfree(dev, qp);
+ if (ret)
+ return ret;
+
+ ret = mthca_alloc_wqe_buf(dev, pd, qp);
+ if (ret) {
+ mthca_unmap_memfree(dev, qp);
+ return ret;
+ }
+
+ mthca_adjust_qp_caps(dev, pd, qp);
+
+ /*
+ * If this is a userspace QP, we're done now. The doorbells
+ * will be allocated and buffers will be initialized in
+ * userspace.
+ */
+ if (pd->ibpd.uobject)
+ return 0;
+
+ ret = mthca_alloc_memfree(dev, qp);
+ if (ret) {
+ mthca_free_wqe_buf(dev, qp);
+ mthca_unmap_memfree(dev, qp);
+ return ret;
+ }
+
+ if (mthca_is_memfree(dev)) {
+ struct mthca_data_seg *scatter;
+ int size = (sizeof (struct mthca_next_seg) +
+ qp->rq.max_gs * sizeof (struct mthca_data_seg)) / 16;
+
+ for (i = 0; i < qp->rq.max; ++i) {
+ next = get_recv_wqe(qp, i);
+ next->nda_op = cpu_to_be32(((i + 1) & (qp->rq.max - 1)) <<
+ qp->rq.wqe_shift);
+ next->ee_nds = cpu_to_be32(size);
+
+ for (scatter = (void *) (next + 1);
+ (void *) scatter < (void *) next + (1 << qp->rq.wqe_shift);
+ ++scatter)
+ scatter->lkey = cpu_to_be32(MTHCA_INVAL_LKEY);
+ }
+
+ for (i = 0; i < qp->sq.max; ++i) {
+ next = get_send_wqe(qp, i);
+ next->nda_op = cpu_to_be32((((i + 1) & (qp->sq.max - 1)) <<
+ qp->sq.wqe_shift) +
+ qp->send_wqe_offset);
+ }
+ } else {
+ for (i = 0; i < qp->rq.max; ++i) {
+ next = get_recv_wqe(qp, i);
+ next->nda_op = htonl((((i + 1) % qp->rq.max) <<
+ qp->rq.wqe_shift) | 1);
+ }
+
+ }
+
+ qp->sq.last = get_send_wqe(qp, qp->sq.max - 1);
+ qp->rq.last = get_recv_wqe(qp, qp->rq.max - 1);
+
+ return 0;
+}
+
+static int mthca_set_qp_size(struct mthca_dev *dev, struct ib_qp_cap *cap,
+ struct mthca_pd *pd, struct mthca_qp *qp)
+{
+ int max_data_size = mthca_max_data_size(dev, qp, dev->limits.max_desc_sz);
+
+ /* Sanity check QP size before proceeding */
+ if (cap->max_send_wr > dev->limits.max_wqes ||
+ cap->max_recv_wr > dev->limits.max_wqes ||
+ cap->max_send_sge > dev->limits.max_sg ||
+ cap->max_recv_sge > dev->limits.max_sg ||
+ cap->max_inline_data > mthca_max_inline_data(pd, max_data_size))
+ return -EINVAL;
+
+ /*
+ * For MLX transport we need 2 extra send gather entries:
+ * one for the header and one for the checksum at the end
+ */
+ if (qp->transport == MLX && cap->max_send_sge + 2 > dev->limits.max_sg)
+ return -EINVAL;
+
+ if (mthca_is_memfree(dev)) {
+ qp->rq.max = cap->max_recv_wr ?
+ roundup_pow_of_two(cap->max_recv_wr) : 0;
+ qp->sq.max = cap->max_send_wr ?
+ roundup_pow_of_two(cap->max_send_wr) : 0;
+ } else {
+ qp->rq.max = cap->max_recv_wr;
+ qp->sq.max = cap->max_send_wr;
+ }
+
+ qp->rq.max_gs = cap->max_recv_sge;
+ qp->sq.max_gs = max_t(int, cap->max_send_sge,
+ ALIGN(cap->max_inline_data + MTHCA_INLINE_HEADER_SIZE,
+ MTHCA_INLINE_CHUNK_SIZE) /
+ sizeof (struct mthca_data_seg));
+
+ return 0;
+}
+
+int mthca_alloc_qp(struct mthca_dev *dev,
+ struct mthca_pd *pd,
+ struct mthca_cq *send_cq,
+ struct mthca_cq *recv_cq,
+ enum ib_qp_type type,
+ enum ib_sig_type send_policy,
+ struct ib_qp_cap *cap,
+ struct mthca_qp *qp)
+{
+ int err;
+
+ switch (type) {
+ case IB_QPT_RC: qp->transport = RC; break;
+ case IB_QPT_UC: qp->transport = UC; break;
+ case IB_QPT_UD: qp->transport = UD; break;
+ default: return -EINVAL;
+ }
+
+ err = mthca_set_qp_size(dev, cap, pd, qp);
+ if (err)
+ return err;
+
+ qp->qpn = mthca_alloc(&dev->qp_table.alloc);
+ if (qp->qpn == -1)
+ return -ENOMEM;
+
+ /* initialize port to zero for error-catching. */
+ qp->port = 0;
+
+ err = mthca_alloc_qp_common(dev, pd, send_cq, recv_cq,
+ send_policy, qp);
+ if (err) {
+ mthca_free(&dev->qp_table.alloc, qp->qpn);
+ return err;
+ }
+
+ spin_lock_irq(&dev->qp_table.lock);
+ mthca_array_set(&dev->qp_table.qp,
+ qp->qpn & (dev->limits.num_qps - 1), qp);
+ spin_unlock_irq(&dev->qp_table.lock);
+
+ return 0;
+}
+
+static void mthca_lock_cqs(struct mthca_cq *send_cq, struct mthca_cq *recv_cq)
+{
+ if (send_cq == recv_cq)
+ spin_lock_irq(&send_cq->lock);
+ else if (send_cq->cqn < recv_cq->cqn) {
+ spin_lock_irq(&send_cq->lock);
+ spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING);
+ } else {
+ spin_lock_irq(&recv_cq->lock);
+ spin_lock_nested(&send_cq->lock, SINGLE_DEPTH_NESTING);
+ }
+}
+
+static void mthca_unlock_cqs(struct mthca_cq *send_cq, struct mthca_cq *recv_cq)
+{
+ if (send_cq == recv_cq)
+ spin_unlock_irq(&send_cq->lock);
+ else if (send_cq->cqn < recv_cq->cqn) {
+ spin_unlock(&recv_cq->lock);
+ spin_unlock_irq(&send_cq->lock);
+ } else {
+ spin_unlock(&send_cq->lock);
+ spin_unlock_irq(&recv_cq->lock);
+ }
+}
+
+int mthca_alloc_sqp(struct mthca_dev *dev,
+ struct mthca_pd *pd,
+ struct mthca_cq *send_cq,
+ struct mthca_cq *recv_cq,
+ enum ib_sig_type send_policy,
+ struct ib_qp_cap *cap,
+ int qpn,
+ int port,
+ struct mthca_sqp *sqp)
+{
+ u32 mqpn = qpn * 2 + dev->qp_table.sqp_start + port - 1;
+ int err;
+
+ sqp->qp.transport = MLX;
+ err = mthca_set_qp_size(dev, cap, pd, &sqp->qp);
+ if (err)
+ return err;
+
+ sqp->header_buf_size = sqp->qp.sq.max * MTHCA_UD_HEADER_SIZE;
+ sqp->header_buf = dma_alloc_coherent(&dev->pdev->dev, sqp->header_buf_size,
+ &sqp->header_dma, GFP_KERNEL);
+ if (!sqp->header_buf)
+ return -ENOMEM;
+
+ spin_lock_irq(&dev->qp_table.lock);
+ if (mthca_array_get(&dev->qp_table.qp, mqpn))
+ err = -EBUSY;
+ else
+ mthca_array_set(&dev->qp_table.qp, mqpn, sqp);
+ spin_unlock_irq(&dev->qp_table.lock);
+
+ if (err)
+ goto err_out;
+
+ sqp->qp.port = port;
+ sqp->qp.qpn = mqpn;
+ sqp->qp.transport = MLX;
+
+ err = mthca_alloc_qp_common(dev, pd, send_cq, recv_cq,
+ send_policy, &sqp->qp);
+ if (err)
+ goto err_out_free;
+
+ atomic_inc(&pd->sqp_count);
+
+ return 0;
+
+ err_out_free:
+ /*
+ * Lock CQs here, so that CQ polling code can do QP lookup
+ * without taking a lock.
+ */
+ mthca_lock_cqs(send_cq, recv_cq);
+
+ spin_lock(&dev->qp_table.lock);
+ mthca_array_clear(&dev->qp_table.qp, mqpn);
+ spin_unlock(&dev->qp_table.lock);
+
+ mthca_unlock_cqs(send_cq, recv_cq);
+
+ err_out:
+ dma_free_coherent(&dev->pdev->dev, sqp->header_buf_size,
+ sqp->header_buf, sqp->header_dma);
+
+ return err;
+}
+
+static inline int get_qp_refcount(struct mthca_dev *dev, struct mthca_qp *qp)
+{
+ int c;
+
+ spin_lock_irq(&dev->qp_table.lock);
+ c = qp->refcount;
+ spin_unlock_irq(&dev->qp_table.lock);
+
+ return c;
+}
+
+void mthca_free_qp(struct mthca_dev *dev,
+ struct mthca_qp *qp)
+{
+ u8 status;
+ struct mthca_cq *send_cq;
+ struct mthca_cq *recv_cq;
+
+ send_cq = to_mcq(qp->ibqp.send_cq);
+ recv_cq = to_mcq(qp->ibqp.recv_cq);
+
+ /*
+ * Lock CQs here, so that CQ polling code can do QP lookup
+ * without taking a lock.
+ */
+ mthca_lock_cqs(send_cq, recv_cq);
+
+ spin_lock(&dev->qp_table.lock);
+ mthca_array_clear(&dev->qp_table.qp,
+ qp->qpn & (dev->limits.num_qps - 1));
+ --qp->refcount;
+ spin_unlock(&dev->qp_table.lock);
+
+ mthca_unlock_cqs(send_cq, recv_cq);
+
+ wait_event(qp->wait, !get_qp_refcount(dev, qp));
+
+ if (qp->state != IB_QPS_RESET)
+ mthca_MODIFY_QP(dev, qp->state, IB_QPS_RESET, qp->qpn, 0,
+ NULL, 0, &status);
+
+ /*
+ * If this is a userspace QP, the buffers, MR, CQs and so on
+ * will be cleaned up in userspace, so all we have to do is
+ * unref the mem-free tables and free the QPN in our table.
+ */
+ if (!qp->ibqp.uobject) {
+ mthca_cq_clean(dev, recv_cq, qp->qpn,
+ qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
+ if (send_cq != recv_cq)
+ mthca_cq_clean(dev, send_cq, qp->qpn, NULL);
+
+ mthca_free_memfree(dev, qp);
+ mthca_free_wqe_buf(dev, qp);
+ }
+
+ mthca_unmap_memfree(dev, qp);
+
+ if (is_sqp(dev, qp)) {
+ atomic_dec(&(to_mpd(qp->ibqp.pd)->sqp_count));
+ dma_free_coherent(&dev->pdev->dev,
+ to_msqp(qp)->header_buf_size,
+ to_msqp(qp)->header_buf,
+ to_msqp(qp)->header_dma);
+ } else
+ mthca_free(&dev->qp_table.alloc, qp->qpn);
+}
+
+/* Create UD header for an MLX send and build a data segment for it */
+static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp,
+ int ind, struct ib_send_wr *wr,
+ struct mthca_mlx_seg *mlx,
+ struct mthca_data_seg *data)
+{
+ int header_size;
+ int err;
+ u16 pkey;
+
+ ib_ud_header_init(256, /* assume a MAD */
+ mthca_ah_grh_present(to_mah(wr->wr.ud.ah)),
+ &sqp->ud_header);
+
+ err = mthca_read_ah(dev, to_mah(wr->wr.ud.ah), &sqp->ud_header);
+ if (err)
+ return err;
+ mlx->flags &= ~cpu_to_be32(MTHCA_NEXT_SOLICIT | 1);
+ mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MTHCA_MLX_VL15 : 0) |
+ (sqp->ud_header.lrh.destination_lid ==
+ IB_LID_PERMISSIVE ? MTHCA_MLX_SLR : 0) |
+ (sqp->ud_header.lrh.service_level << 8));
+ mlx->rlid = sqp->ud_header.lrh.destination_lid;
+ mlx->vcrc = 0;
+
+ switch (wr->opcode) {
+ case IB_WR_SEND:
+ sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY;
+ sqp->ud_header.immediate_present = 0;
+ break;
+ case IB_WR_SEND_WITH_IMM:
+ sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
+ sqp->ud_header.immediate_present = 1;
+ sqp->ud_header.immediate_data = wr->ex.imm_data;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : 0;
+ if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE)
+ sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE;
+ sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED);
+ if (!sqp->qp.ibqp.qp_num)
+ ib_get_cached_pkey(&dev->ib_dev, sqp->qp.port,
+ sqp->pkey_index, &pkey);
+ else
+ ib_get_cached_pkey(&dev->ib_dev, sqp->qp.port,
+ wr->wr.ud.pkey_index, &pkey);
+ sqp->ud_header.bth.pkey = cpu_to_be16(pkey);
+ sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn);
+ sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1));
+ sqp->ud_header.deth.qkey = cpu_to_be32(wr->wr.ud.remote_qkey & 0x80000000 ?
+ sqp->qkey : wr->wr.ud.remote_qkey);
+ sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.ibqp.qp_num);
+
+ header_size = ib_ud_header_pack(&sqp->ud_header,
+ sqp->header_buf +
+ ind * MTHCA_UD_HEADER_SIZE);
+
+ data->byte_count = cpu_to_be32(header_size);
+ data->lkey = cpu_to_be32(to_mpd(sqp->qp.ibqp.pd)->ntmr.ibmr.lkey);
+ data->addr = cpu_to_be64(sqp->header_dma +
+ ind * MTHCA_UD_HEADER_SIZE);
+
+ return 0;
+}
+
+static inline int mthca_wq_overflow(struct mthca_wq *wq, int nreq,
+ struct ib_cq *ib_cq)
+{
+ unsigned cur;
+ struct mthca_cq *cq;
+
+ cur = wq->head - wq->tail;
+ if (likely(cur + nreq < wq->max))
+ return 0;
+
+ cq = to_mcq(ib_cq);
+ spin_lock(&cq->lock);
+ cur = wq->head - wq->tail;
+ spin_unlock(&cq->lock);
+
+ return cur + nreq >= wq->max;
+}
+
+static __always_inline void set_raddr_seg(struct mthca_raddr_seg *rseg,
+ u64 remote_addr, u32 rkey)
+{
+ rseg->raddr = cpu_to_be64(remote_addr);
+ rseg->rkey = cpu_to_be32(rkey);
+ rseg->reserved = 0;
+}
+
+static __always_inline void set_atomic_seg(struct mthca_atomic_seg *aseg,
+ struct ib_send_wr *wr)
+{
+ if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
+ aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap);
+ aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add);
+ } else {
+ aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add);
+ aseg->compare = 0;
+ }
+
+}
+
+static void set_tavor_ud_seg(struct mthca_tavor_ud_seg *useg,
+ struct ib_send_wr *wr)
+{
+ useg->lkey = cpu_to_be32(to_mah(wr->wr.ud.ah)->key);
+ useg->av_addr = cpu_to_be64(to_mah(wr->wr.ud.ah)->avdma);
+ useg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn);
+ useg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey);
+
+}
+
+static void set_arbel_ud_seg(struct mthca_arbel_ud_seg *useg,
+ struct ib_send_wr *wr)
+{
+ memcpy(useg->av, to_mah(wr->wr.ud.ah)->av, MTHCA_AV_SIZE);
+ useg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn);
+ useg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey);
+}
+
+int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
+ struct ib_send_wr **bad_wr)
+{
+ struct mthca_dev *dev = to_mdev(ibqp->device);
+ struct mthca_qp *qp = to_mqp(ibqp);
+ void *wqe;
+ void *prev_wqe;
+ unsigned long flags;
+ int err = 0;
+ int nreq;
+ int i;
+ int size;
+ /*
+ * f0 and size0 are only used if nreq != 0, and they will
+ * always be initialized the first time through the main loop
+ * before nreq is incremented. So nreq cannot become non-zero
+ * without initializing f0 and size0, and they are in fact
+ * never used uninitialized.
+ */
+ int uninitialized_var(size0);
+ u32 uninitialized_var(f0);
+ int ind;
+ u8 op0 = 0;
+
+ spin_lock_irqsave(&qp->sq.lock, flags);
+
+ /* XXX check that state is OK to post send */
+
+ ind = qp->sq.next_ind;
+
+ for (nreq = 0; wr; ++nreq, wr = wr->next) {
+ if (mthca_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
+ mthca_err(dev, "SQ %06x full (%u head, %u tail,"
+ " %d max, %d nreq)\n", qp->qpn,
+ qp->sq.head, qp->sq.tail,
+ qp->sq.max, nreq);
+ err = -ENOMEM;
+ *bad_wr = wr;
+ goto out;
+ }
+
+ wqe = get_send_wqe(qp, ind);
+ prev_wqe = qp->sq.last;
+ qp->sq.last = wqe;
+
+ ((struct mthca_next_seg *) wqe)->nda_op = 0;
+ ((struct mthca_next_seg *) wqe)->ee_nds = 0;
+ ((struct mthca_next_seg *) wqe)->flags =
+ ((wr->send_flags & IB_SEND_SIGNALED) ?
+ cpu_to_be32(MTHCA_NEXT_CQ_UPDATE) : 0) |
+ ((wr->send_flags & IB_SEND_SOLICITED) ?
+ cpu_to_be32(MTHCA_NEXT_SOLICIT) : 0) |
+ cpu_to_be32(1);
+ if (wr->opcode == IB_WR_SEND_WITH_IMM ||
+ wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM)
+ ((struct mthca_next_seg *) wqe)->imm = wr->ex.imm_data;
+
+ wqe += sizeof (struct mthca_next_seg);
+ size = sizeof (struct mthca_next_seg) / 16;
+
+ switch (qp->transport) {
+ case RC:
+ switch (wr->opcode) {
+ case IB_WR_ATOMIC_CMP_AND_SWP:
+ case IB_WR_ATOMIC_FETCH_AND_ADD:
+ set_raddr_seg(wqe, wr->wr.atomic.remote_addr,
+ wr->wr.atomic.rkey);
+ wqe += sizeof (struct mthca_raddr_seg);
+
+ set_atomic_seg(wqe, wr);
+ wqe += sizeof (struct mthca_atomic_seg);
+ size += (sizeof (struct mthca_raddr_seg) +
+ sizeof (struct mthca_atomic_seg)) / 16;
+ break;
+
+ case IB_WR_RDMA_WRITE:
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ case IB_WR_RDMA_READ:
+ set_raddr_seg(wqe, wr->wr.rdma.remote_addr,
+ wr->wr.rdma.rkey);
+ wqe += sizeof (struct mthca_raddr_seg);
+ size += sizeof (struct mthca_raddr_seg) / 16;
+ break;
+
+ default:
+ /* No extra segments required for sends */
+ break;
+ }
+
+ break;
+
+ case UC:
+ switch (wr->opcode) {
+ case IB_WR_RDMA_WRITE:
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ set_raddr_seg(wqe, wr->wr.rdma.remote_addr,
+ wr->wr.rdma.rkey);
+ wqe += sizeof (struct mthca_raddr_seg);
+ size += sizeof (struct mthca_raddr_seg) / 16;
+ break;
+
+ default:
+ /* No extra segments required for sends */
+ break;
+ }
+
+ break;
+
+ case UD:
+ set_tavor_ud_seg(wqe, wr);
+ wqe += sizeof (struct mthca_tavor_ud_seg);
+ size += sizeof (struct mthca_tavor_ud_seg) / 16;
+ break;
+
+ case MLX:
+ err = build_mlx_header(dev, to_msqp(qp), ind, wr,
+ wqe - sizeof (struct mthca_next_seg),
+ wqe);
+ if (err) {
+ *bad_wr = wr;
+ goto out;
+ }
+ wqe += sizeof (struct mthca_data_seg);
+ size += sizeof (struct mthca_data_seg) / 16;
+ break;
+ }
+
+ if (wr->num_sge > qp->sq.max_gs) {
+ mthca_err(dev, "too many gathers\n");
+ err = -EINVAL;
+ *bad_wr = wr;
+ goto out;
+ }
+
+ for (i = 0; i < wr->num_sge; ++i) {
+ mthca_set_data_seg(wqe, wr->sg_list + i);
+ wqe += sizeof (struct mthca_data_seg);
+ size += sizeof (struct mthca_data_seg) / 16;
+ }
+
+ /* Add one more inline data segment for ICRC */
+ if (qp->transport == MLX) {
+ ((struct mthca_data_seg *) wqe)->byte_count =
+ cpu_to_be32((1 << 31) | 4);
+ ((u32 *) wqe)[1] = 0;
+ wqe += sizeof (struct mthca_data_seg);
+ size += sizeof (struct mthca_data_seg) / 16;
+ }
+
+ qp->wrid[ind + qp->rq.max] = wr->wr_id;
+
+ if (wr->opcode >= ARRAY_SIZE(mthca_opcode)) {
+ mthca_err(dev, "opcode invalid\n");
+ err = -EINVAL;
+ *bad_wr = wr;
+ goto out;
+ }
+
+ ((struct mthca_next_seg *) prev_wqe)->nda_op =
+ cpu_to_be32(((ind << qp->sq.wqe_shift) +
+ qp->send_wqe_offset) |
+ mthca_opcode[wr->opcode]);
+ wmb();
+ ((struct mthca_next_seg *) prev_wqe)->ee_nds =
+ cpu_to_be32((nreq ? 0 : MTHCA_NEXT_DBD) | size |
+ ((wr->send_flags & IB_SEND_FENCE) ?
+ MTHCA_NEXT_FENCE : 0));
+
+ if (!nreq) {
+ size0 = size;
+ op0 = mthca_opcode[wr->opcode];
+ f0 = wr->send_flags & IB_SEND_FENCE ?
+ MTHCA_SEND_DOORBELL_FENCE : 0;
+ }
+
+ ++ind;
+ if (unlikely(ind >= qp->sq.max))
+ ind -= qp->sq.max;
+ }
+
+out:
+ if (likely(nreq)) {
+ wmb();
+
+ mthca_write64(((qp->sq.next_ind << qp->sq.wqe_shift) +
+ qp->send_wqe_offset) | f0 | op0,
+ (qp->qpn << 8) | size0,
+ dev->kar + MTHCA_SEND_DOORBELL,
+ MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
+ /*
+ * Make sure doorbells don't leak out of SQ spinlock
+ * and reach the HCA out of order:
+ */
+ mmiowb();
+ }
+
+ qp->sq.next_ind = ind;
+ qp->sq.head += nreq;
+
+ spin_unlock_irqrestore(&qp->sq.lock, flags);
+ return err;
+}
+
+int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
+ struct ib_recv_wr **bad_wr)
+{
+ struct mthca_dev *dev = to_mdev(ibqp->device);
+ struct mthca_qp *qp = to_mqp(ibqp);
+ unsigned long flags;
+ int err = 0;
+ int nreq;
+ int i;
+ int size;
+ /*
+ * size0 is only used if nreq != 0, and it will always be
+ * initialized the first time through the main loop before
+ * nreq is incremented. So nreq cannot become non-zero
+ * without initializing size0, and it is in fact never used
+ * uninitialized.
+ */
+ int uninitialized_var(size0);
+ int ind;
+ void *wqe;
+ void *prev_wqe;
+
+ spin_lock_irqsave(&qp->rq.lock, flags);
+
+ /* XXX check that state is OK to post receive */
+
+ ind = qp->rq.next_ind;
+
+ for (nreq = 0; wr; wr = wr->next) {
+ if (mthca_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) {
+ mthca_err(dev, "RQ %06x full (%u head, %u tail,"
+ " %d max, %d nreq)\n", qp->qpn,
+ qp->rq.head, qp->rq.tail,
+ qp->rq.max, nreq);
+ err = -ENOMEM;
+ *bad_wr = wr;
+ goto out;
+ }
+
+ wqe = get_recv_wqe(qp, ind);
+ prev_wqe = qp->rq.last;
+ qp->rq.last = wqe;
+
+ ((struct mthca_next_seg *) wqe)->ee_nds =
+ cpu_to_be32(MTHCA_NEXT_DBD);
+ ((struct mthca_next_seg *) wqe)->flags = 0;
+
+ wqe += sizeof (struct mthca_next_seg);
+ size = sizeof (struct mthca_next_seg) / 16;
+
+ if (unlikely(wr->num_sge > qp->rq.max_gs)) {
+ err = -EINVAL;
+ *bad_wr = wr;
+ goto out;
+ }
+
+ for (i = 0; i < wr->num_sge; ++i) {
+ mthca_set_data_seg(wqe, wr->sg_list + i);
+ wqe += sizeof (struct mthca_data_seg);
+ size += sizeof (struct mthca_data_seg) / 16;
+ }
+
+ qp->wrid[ind] = wr->wr_id;
+
+ ((struct mthca_next_seg *) prev_wqe)->ee_nds =
+ cpu_to_be32(MTHCA_NEXT_DBD | size);
+
+ if (!nreq)
+ size0 = size;
+
+ ++ind;
+ if (unlikely(ind >= qp->rq.max))
+ ind -= qp->rq.max;
+
+ ++nreq;
+ if (unlikely(nreq == MTHCA_TAVOR_MAX_WQES_PER_RECV_DB)) {
+ nreq = 0;
+
+ wmb();
+
+ mthca_write64((qp->rq.next_ind << qp->rq.wqe_shift) | size0,
+ qp->qpn << 8, dev->kar + MTHCA_RECEIVE_DOORBELL,
+ MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
+
+ qp->rq.next_ind = ind;
+ qp->rq.head += MTHCA_TAVOR_MAX_WQES_PER_RECV_DB;
+ }
+ }
+
+out:
+ if (likely(nreq)) {
+ wmb();
+
+ mthca_write64((qp->rq.next_ind << qp->rq.wqe_shift) | size0,
+ qp->qpn << 8 | nreq, dev->kar + MTHCA_RECEIVE_DOORBELL,
+ MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
+ }
+
+ qp->rq.next_ind = ind;
+ qp->rq.head += nreq;
+
+ /*
+ * Make sure doorbells don't leak out of RQ spinlock and reach
+ * the HCA out of order:
+ */
+ mmiowb();
+
+ spin_unlock_irqrestore(&qp->rq.lock, flags);
+ return err;
+}
+
+int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
+ struct ib_send_wr **bad_wr)
+{
+ struct mthca_dev *dev = to_mdev(ibqp->device);
+ struct mthca_qp *qp = to_mqp(ibqp);
+ u32 dbhi;
+ void *wqe;
+ void *prev_wqe;
+ unsigned long flags;
+ int err = 0;
+ int nreq;
+ int i;
+ int size;
+ /*
+ * f0 and size0 are only used if nreq != 0, and they will
+ * always be initialized the first time through the main loop
+ * before nreq is incremented. So nreq cannot become non-zero
+ * without initializing f0 and size0, and they are in fact
+ * never used uninitialized.
+ */
+ int uninitialized_var(size0);
+ u32 uninitialized_var(f0);
+ int ind;
+ u8 op0 = 0;
+
+ spin_lock_irqsave(&qp->sq.lock, flags);
+
+ /* XXX check that state is OK to post send */
+
+ ind = qp->sq.head & (qp->sq.max - 1);
+
+ for (nreq = 0; wr; ++nreq, wr = wr->next) {
+ if (unlikely(nreq == MTHCA_ARBEL_MAX_WQES_PER_SEND_DB)) {
+ nreq = 0;
+
+ dbhi = (MTHCA_ARBEL_MAX_WQES_PER_SEND_DB << 24) |
+ ((qp->sq.head & 0xffff) << 8) | f0 | op0;
+
+ qp->sq.head += MTHCA_ARBEL_MAX_WQES_PER_SEND_DB;
+
+ /*
+ * Make sure that descriptors are written before
+ * doorbell record.
+ */
+ wmb();
+ *qp->sq.db = cpu_to_be32(qp->sq.head & 0xffff);
+
+ /*
+ * Make sure doorbell record is written before we
+ * write MMIO send doorbell.
+ */
+ wmb();
+
+ mthca_write64(dbhi, (qp->qpn << 8) | size0,
+ dev->kar + MTHCA_SEND_DOORBELL,
+ MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
+ }
+
+ if (mthca_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
+ mthca_err(dev, "SQ %06x full (%u head, %u tail,"
+ " %d max, %d nreq)\n", qp->qpn,
+ qp->sq.head, qp->sq.tail,
+ qp->sq.max, nreq);
+ err = -ENOMEM;
+ *bad_wr = wr;
+ goto out;
+ }
+
+ wqe = get_send_wqe(qp, ind);
+ prev_wqe = qp->sq.last;
+ qp->sq.last = wqe;
+
+ ((struct mthca_next_seg *) wqe)->flags =
+ ((wr->send_flags & IB_SEND_SIGNALED) ?
+ cpu_to_be32(MTHCA_NEXT_CQ_UPDATE) : 0) |
+ ((wr->send_flags & IB_SEND_SOLICITED) ?
+ cpu_to_be32(MTHCA_NEXT_SOLICIT) : 0) |
+ ((wr->send_flags & IB_SEND_IP_CSUM) ?
+ cpu_to_be32(MTHCA_NEXT_IP_CSUM | MTHCA_NEXT_TCP_UDP_CSUM) : 0) |
+ cpu_to_be32(1);
+ if (wr->opcode == IB_WR_SEND_WITH_IMM ||
+ wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM)
+ ((struct mthca_next_seg *) wqe)->imm = wr->ex.imm_data;
+
+ wqe += sizeof (struct mthca_next_seg);
+ size = sizeof (struct mthca_next_seg) / 16;
+
+ switch (qp->transport) {
+ case RC:
+ switch (wr->opcode) {
+ case IB_WR_ATOMIC_CMP_AND_SWP:
+ case IB_WR_ATOMIC_FETCH_AND_ADD:
+ set_raddr_seg(wqe, wr->wr.atomic.remote_addr,
+ wr->wr.atomic.rkey);
+ wqe += sizeof (struct mthca_raddr_seg);
+
+ set_atomic_seg(wqe, wr);
+ wqe += sizeof (struct mthca_atomic_seg);
+ size += (sizeof (struct mthca_raddr_seg) +
+ sizeof (struct mthca_atomic_seg)) / 16;
+ break;
+
+ case IB_WR_RDMA_READ:
+ case IB_WR_RDMA_WRITE:
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ set_raddr_seg(wqe, wr->wr.rdma.remote_addr,
+ wr->wr.rdma.rkey);
+ wqe += sizeof (struct mthca_raddr_seg);
+ size += sizeof (struct mthca_raddr_seg) / 16;
+ break;
+
+ default:
+ /* No extra segments required for sends */
+ break;
+ }
+
+ break;
+
+ case UC:
+ switch (wr->opcode) {
+ case IB_WR_RDMA_WRITE:
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ set_raddr_seg(wqe, wr->wr.rdma.remote_addr,
+ wr->wr.rdma.rkey);
+ wqe += sizeof (struct mthca_raddr_seg);
+ size += sizeof (struct mthca_raddr_seg) / 16;
+ break;
+
+ default:
+ /* No extra segments required for sends */
+ break;
+ }
+
+ break;
+
+ case UD:
+ set_arbel_ud_seg(wqe, wr);
+ wqe += sizeof (struct mthca_arbel_ud_seg);
+ size += sizeof (struct mthca_arbel_ud_seg) / 16;
+ break;
+
+ case MLX:
+ err = build_mlx_header(dev, to_msqp(qp), ind, wr,
+ wqe - sizeof (struct mthca_next_seg),
+ wqe);
+ if (err) {
+ *bad_wr = wr;
+ goto out;
+ }
+ wqe += sizeof (struct mthca_data_seg);
+ size += sizeof (struct mthca_data_seg) / 16;
+ break;
+ }
+
+ if (wr->num_sge > qp->sq.max_gs) {
+ mthca_err(dev, "too many gathers\n");
+ err = -EINVAL;
+ *bad_wr = wr;
+ goto out;
+ }
+
+ for (i = 0; i < wr->num_sge; ++i) {
+ mthca_set_data_seg(wqe, wr->sg_list + i);
+ wqe += sizeof (struct mthca_data_seg);
+ size += sizeof (struct mthca_data_seg) / 16;
+ }
+
+ /* Add one more inline data segment for ICRC */
+ if (qp->transport == MLX) {
+ ((struct mthca_data_seg *) wqe)->byte_count =
+ cpu_to_be32((1 << 31) | 4);
+ ((u32 *) wqe)[1] = 0;
+ wqe += sizeof (struct mthca_data_seg);
+ size += sizeof (struct mthca_data_seg) / 16;
+ }
+
+ qp->wrid[ind + qp->rq.max] = wr->wr_id;
+
+ if (wr->opcode >= ARRAY_SIZE(mthca_opcode)) {
+ mthca_err(dev, "opcode invalid\n");
+ err = -EINVAL;
+ *bad_wr = wr;
+ goto out;
+ }
+
+ ((struct mthca_next_seg *) prev_wqe)->nda_op =
+ cpu_to_be32(((ind << qp->sq.wqe_shift) +
+ qp->send_wqe_offset) |
+ mthca_opcode[wr->opcode]);
+ wmb();
+ ((struct mthca_next_seg *) prev_wqe)->ee_nds =
+ cpu_to_be32(MTHCA_NEXT_DBD | size |
+ ((wr->send_flags & IB_SEND_FENCE) ?
+ MTHCA_NEXT_FENCE : 0));
+
+ if (!nreq) {
+ size0 = size;
+ op0 = mthca_opcode[wr->opcode];
+ f0 = wr->send_flags & IB_SEND_FENCE ?
+ MTHCA_SEND_DOORBELL_FENCE : 0;
+ }
+
+ ++ind;
+ if (unlikely(ind >= qp->sq.max))
+ ind -= qp->sq.max;
+ }
+
+out:
+ if (likely(nreq)) {
+ dbhi = (nreq << 24) | ((qp->sq.head & 0xffff) << 8) | f0 | op0;
+
+ qp->sq.head += nreq;
+
+ /*
+ * Make sure that descriptors are written before
+ * doorbell record.
+ */
+ wmb();
+ *qp->sq.db = cpu_to_be32(qp->sq.head & 0xffff);
+
+ /*
+ * Make sure doorbell record is written before we
+ * write MMIO send doorbell.
+ */
+ wmb();
+
+ mthca_write64(dbhi, (qp->qpn << 8) | size0, dev->kar + MTHCA_SEND_DOORBELL,
+ MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
+ }
+
+ /*
+ * Make sure doorbells don't leak out of SQ spinlock and reach
+ * the HCA out of order:
+ */
+ mmiowb();
+
+ spin_unlock_irqrestore(&qp->sq.lock, flags);
+ return err;
+}
+
+int mthca_arbel_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
+ struct ib_recv_wr **bad_wr)
+{
+ struct mthca_dev *dev = to_mdev(ibqp->device);
+ struct mthca_qp *qp = to_mqp(ibqp);
+ unsigned long flags;
+ int err = 0;
+ int nreq;
+ int ind;
+ int i;
+ void *wqe;
+
+ spin_lock_irqsave(&qp->rq.lock, flags);
+
+ /* XXX check that state is OK to post receive */
+
+ ind = qp->rq.head & (qp->rq.max - 1);
+
+ for (nreq = 0; wr; ++nreq, wr = wr->next) {
+ if (mthca_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) {
+ mthca_err(dev, "RQ %06x full (%u head, %u tail,"
+ " %d max, %d nreq)\n", qp->qpn,
+ qp->rq.head, qp->rq.tail,
+ qp->rq.max, nreq);
+ err = -ENOMEM;
+ *bad_wr = wr;
+ goto out;
+ }
+
+ wqe = get_recv_wqe(qp, ind);
+
+ ((struct mthca_next_seg *) wqe)->flags = 0;
+
+ wqe += sizeof (struct mthca_next_seg);
+
+ if (unlikely(wr->num_sge > qp->rq.max_gs)) {
+ err = -EINVAL;
+ *bad_wr = wr;
+ goto out;
+ }
+
+ for (i = 0; i < wr->num_sge; ++i) {
+ mthca_set_data_seg(wqe, wr->sg_list + i);
+ wqe += sizeof (struct mthca_data_seg);
+ }
+
+ if (i < qp->rq.max_gs)
+ mthca_set_data_seg_inval(wqe);
+
+ qp->wrid[ind] = wr->wr_id;
+
+ ++ind;
+ if (unlikely(ind >= qp->rq.max))
+ ind -= qp->rq.max;
+ }
+out:
+ if (likely(nreq)) {
+ qp->rq.head += nreq;
+
+ /*
+ * Make sure that descriptors are written before
+ * doorbell record.
+ */
+ wmb();
+ *qp->rq.db = cpu_to_be32(qp->rq.head & 0xffff);
+ }
+
+ spin_unlock_irqrestore(&qp->rq.lock, flags);
+ return err;
+}
+
+void mthca_free_err_wqe(struct mthca_dev *dev, struct mthca_qp *qp, int is_send,
+ int index, int *dbd, __be32 *new_wqe)
+{
+ struct mthca_next_seg *next;
+
+ /*
+ * For SRQs, all receive WQEs generate a CQE, so we're always
+ * at the end of the doorbell chain.
+ */
+ if (qp->ibqp.srq && !is_send) {
+ *new_wqe = 0;
+ return;
+ }
+
+ if (is_send)
+ next = get_send_wqe(qp, index);
+ else
+ next = get_recv_wqe(qp, index);
+
+ *dbd = !!(next->ee_nds & cpu_to_be32(MTHCA_NEXT_DBD));
+ if (next->ee_nds & cpu_to_be32(0x3f))
+ *new_wqe = (next->nda_op & cpu_to_be32(~0x3f)) |
+ (next->ee_nds & cpu_to_be32(0x3f));
+ else
+ *new_wqe = 0;
+}
+
+int mthca_init_qp_table(struct mthca_dev *dev)
+{
+ int err;
+ u8 status;
+ int i;
+
+ spin_lock_init(&dev->qp_table.lock);
+
+ /*
+ * We reserve 2 extra QPs per port for the special QPs. The
+ * special QP for port 1 has to be even, so round up.
+ */
+ dev->qp_table.sqp_start = (dev->limits.reserved_qps + 1) & ~1UL;
+ err = mthca_alloc_init(&dev->qp_table.alloc,
+ dev->limits.num_qps,
+ (1 << 24) - 1,
+ dev->qp_table.sqp_start +
+ MTHCA_MAX_PORTS * 2);
+ if (err)
+ return err;
+
+ err = mthca_array_init(&dev->qp_table.qp,
+ dev->limits.num_qps);
+ if (err) {
+ mthca_alloc_cleanup(&dev->qp_table.alloc);
+ return err;
+ }
+
+ for (i = 0; i < 2; ++i) {
+ err = mthca_CONF_SPECIAL_QP(dev, i ? IB_QPT_GSI : IB_QPT_SMI,
+ dev->qp_table.sqp_start + i * 2,
+ &status);
+ if (err)
+ goto err_out;
+ if (status) {
+ mthca_warn(dev, "CONF_SPECIAL_QP returned "
+ "status %02x, aborting.\n",
+ status);
+ err = -EINVAL;
+ goto err_out;
+ }
+ }
+ return 0;
+
+ err_out:
+ for (i = 0; i < 2; ++i)
+ mthca_CONF_SPECIAL_QP(dev, i, 0, &status);
+
+ mthca_array_cleanup(&dev->qp_table.qp, dev->limits.num_qps);
+ mthca_alloc_cleanup(&dev->qp_table.alloc);
+
+ return err;
+}
+
+void mthca_cleanup_qp_table(struct mthca_dev *dev)
+{
+ int i;
+ u8 status;
+
+ for (i = 0; i < 2; ++i)
+ mthca_CONF_SPECIAL_QP(dev, i, 0, &status);
+
+ mthca_array_cleanup(&dev->qp_table.qp, dev->limits.num_qps);
+ mthca_alloc_cleanup(&dev->qp_table.alloc);
+}
diff --git a/drivers/infiniband/hw/mthca/mthca_reset.c b/drivers/infiniband/hw/mthca/mthca_reset.c
new file mode 100644
index 0000000..acb6817
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_reset.c
@@ -0,0 +1,289 @@
+/*
+ * Copyright (c) 2004 Topspin Communications. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+
+#include "mthca_dev.h"
+#include "mthca_cmd.h"
+
+int mthca_reset(struct mthca_dev *mdev)
+{
+ int i;
+ int err = 0;
+ u32 *hca_header = NULL;
+ u32 *bridge_header = NULL;
+ struct pci_dev *bridge = NULL;
+ int bridge_pcix_cap = 0;
+ int hca_pcie_cap = 0;
+ int hca_pcix_cap = 0;
+
+ u16 devctl;
+ u16 linkctl;
+
+#define MTHCA_RESET_OFFSET 0xf0010
+#define MTHCA_RESET_VALUE swab32(1)
+
+ /*
+ * Reset the chip. This is somewhat ugly because we have to
+ * save off the PCI header before reset and then restore it
+ * after the chip reboots. We skip config space offsets 22
+ * and 23 since those have a special meaning.
+ *
+ * To make matters worse, for Tavor (PCI-X HCA) we have to
+ * find the associated bridge device and save off its PCI
+ * header as well.
+ */
+
+ if (!(mdev->mthca_flags & MTHCA_FLAG_PCIE)) {
+ /* Look for the bridge -- its device ID will be 2 more
+ than HCA's device ID. */
+ while ((bridge = pci_get_device(mdev->pdev->vendor,
+ mdev->pdev->device + 2,
+ bridge)) != NULL) {
+ if (bridge->hdr_type == PCI_HEADER_TYPE_BRIDGE &&
+ bridge->subordinate == mdev->pdev->bus) {
+ mthca_dbg(mdev, "Found bridge: %s\n",
+ pci_name(bridge));
+ break;
+ }
+ }
+
+ if (!bridge) {
+ /*
+ * Didn't find a bridge for a Tavor device --
+ * assume we're in no-bridge mode and hope for
+ * the best.
+ */
+ mthca_warn(mdev, "No bridge found for %s\n",
+ pci_name(mdev->pdev));
+ }
+
+ }
+
+ /* For Arbel do we need to save off the full 4K PCI Express header?? */
+ hca_header = kmalloc(256, GFP_KERNEL);
+ if (!hca_header) {
+ err = -ENOMEM;
+ mthca_err(mdev, "Couldn't allocate memory to save HCA "
+ "PCI header, aborting.\n");
+ goto out;
+ }
+
+ for (i = 0; i < 64; ++i) {
+ if (i == 22 || i == 23)
+ continue;
+ if (pci_read_config_dword(mdev->pdev, i * 4, hca_header + i)) {
+ err = -ENODEV;
+ mthca_err(mdev, "Couldn't save HCA "
+ "PCI header, aborting.\n");
+ goto out;
+ }
+ }
+
+ hca_pcix_cap = pci_find_capability(mdev->pdev, PCI_CAP_ID_PCIX);
+ hca_pcie_cap = pci_find_capability(mdev->pdev, PCI_CAP_ID_EXP);
+
+ if (bridge) {
+ bridge_header = kmalloc(256, GFP_KERNEL);
+ if (!bridge_header) {
+ err = -ENOMEM;
+ mthca_err(mdev, "Couldn't allocate memory to save HCA "
+ "bridge PCI header, aborting.\n");
+ goto out;
+ }
+
+ for (i = 0; i < 64; ++i) {
+ if (i == 22 || i == 23)
+ continue;
+ if (pci_read_config_dword(bridge, i * 4, bridge_header + i)) {
+ err = -ENODEV;
+ mthca_err(mdev, "Couldn't save HCA bridge "
+ "PCI header, aborting.\n");
+ goto out;
+ }
+ }
+ bridge_pcix_cap = pci_find_capability(bridge, PCI_CAP_ID_PCIX);
+ if (!bridge_pcix_cap) {
+ err = -ENODEV;
+ mthca_err(mdev, "Couldn't locate HCA bridge "
+ "PCI-X capability, aborting.\n");
+ goto out;
+ }
+ }
+
+ /* actually hit reset */
+ {
+ void __iomem *reset = ioremap(pci_resource_start(mdev->pdev, 0) +
+ MTHCA_RESET_OFFSET, 4);
+
+ if (!reset) {
+ err = -ENOMEM;
+ mthca_err(mdev, "Couldn't map HCA reset register, "
+ "aborting.\n");
+ goto out;
+ }
+
+ writel(MTHCA_RESET_VALUE, reset);
+ iounmap(reset);
+ }
+
+ /* Docs say to wait one second before accessing device */
+ msleep(1000);
+
+ /* Now wait for PCI device to start responding again */
+ {
+ u32 v;
+ int c = 0;
+
+ for (c = 0; c < 100; ++c) {
+ if (pci_read_config_dword(bridge ? bridge : mdev->pdev, 0, &v)) {
+ err = -ENODEV;
+ mthca_err(mdev, "Couldn't access HCA after reset, "
+ "aborting.\n");
+ goto out;
+ }
+
+ if (v != 0xffffffff)
+ goto good;
+
+ msleep(100);
+ }
+
+ err = -ENODEV;
+ mthca_err(mdev, "PCI device did not come back after reset, "
+ "aborting.\n");
+ goto out;
+ }
+
+good:
+ /* Now restore the PCI headers */
+ if (bridge) {
+ if (pci_write_config_dword(bridge, bridge_pcix_cap + 0x8,
+ bridge_header[(bridge_pcix_cap + 0x8) / 4])) {
+ err = -ENODEV;
+ mthca_err(mdev, "Couldn't restore HCA bridge Upstream "
+ "split transaction control, aborting.\n");
+ goto out;
+ }
+ if (pci_write_config_dword(bridge, bridge_pcix_cap + 0xc,
+ bridge_header[(bridge_pcix_cap + 0xc) / 4])) {
+ err = -ENODEV;
+ mthca_err(mdev, "Couldn't restore HCA bridge Downstream "
+ "split transaction control, aborting.\n");
+ goto out;
+ }
+ /*
+ * Bridge control register is at 0x3e, so we'll
+ * naturally restore it last in this loop.
+ */
+ for (i = 0; i < 16; ++i) {
+ if (i * 4 == PCI_COMMAND)
+ continue;
+
+ if (pci_write_config_dword(bridge, i * 4, bridge_header[i])) {
+ err = -ENODEV;
+ mthca_err(mdev, "Couldn't restore HCA bridge reg %x, "
+ "aborting.\n", i);
+ goto out;
+ }
+ }
+
+ if (pci_write_config_dword(bridge, PCI_COMMAND,
+ bridge_header[PCI_COMMAND / 4])) {
+ err = -ENODEV;
+ mthca_err(mdev, "Couldn't restore HCA bridge COMMAND, "
+ "aborting.\n");
+ goto out;
+ }
+ }
+
+ if (hca_pcix_cap) {
+ if (pci_write_config_dword(mdev->pdev, hca_pcix_cap,
+ hca_header[hca_pcix_cap / 4])) {
+ err = -ENODEV;
+ mthca_err(mdev, "Couldn't restore HCA PCI-X "
+ "command register, aborting.\n");
+ goto out;
+ }
+ }
+
+ if (hca_pcie_cap) {
+ devctl = hca_header[(hca_pcie_cap + PCI_EXP_DEVCTL) / 4];
+ if (pci_write_config_word(mdev->pdev, hca_pcie_cap + PCI_EXP_DEVCTL,
+ devctl)) {
+ err = -ENODEV;
+ mthca_err(mdev, "Couldn't restore HCA PCI Express "
+ "Device Control register, aborting.\n");
+ goto out;
+ }
+ linkctl = hca_header[(hca_pcie_cap + PCI_EXP_LNKCTL) / 4];
+ if (pci_write_config_word(mdev->pdev, hca_pcie_cap + PCI_EXP_LNKCTL,
+ linkctl)) {
+ err = -ENODEV;
+ mthca_err(mdev, "Couldn't restore HCA PCI Express "
+ "Link control register, aborting.\n");
+ goto out;
+ }
+ }
+
+ for (i = 0; i < 16; ++i) {
+ if (i * 4 == PCI_COMMAND)
+ continue;
+
+ if (pci_write_config_dword(mdev->pdev, i * 4, hca_header[i])) {
+ err = -ENODEV;
+ mthca_err(mdev, "Couldn't restore HCA reg %x, "
+ "aborting.\n", i);
+ goto out;
+ }
+ }
+
+ if (pci_write_config_dword(mdev->pdev, PCI_COMMAND,
+ hca_header[PCI_COMMAND / 4])) {
+ err = -ENODEV;
+ mthca_err(mdev, "Couldn't restore HCA COMMAND, "
+ "aborting.\n");
+ goto out;
+ }
+
+out:
+ if (bridge)
+ pci_dev_put(bridge);
+ kfree(bridge_header);
+ kfree(hca_header);
+
+ return err;
+}
diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c
new file mode 100644
index 0000000..4fabe62
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_srq.c
@@ -0,0 +1,715 @@
+/*
+ * Copyright (c) 2005 Cisco Systems. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/sched.h>
+
+#include <asm/io.h>
+
+#include "mthca_dev.h"
+#include "mthca_cmd.h"
+#include "mthca_memfree.h"
+#include "mthca_wqe.h"
+
+enum {
+ MTHCA_MAX_DIRECT_SRQ_SIZE = 4 * PAGE_SIZE
+};
+
+struct mthca_tavor_srq_context {
+ __be64 wqe_base_ds; /* low 6 bits is descriptor size */
+ __be32 state_pd;
+ __be32 lkey;
+ __be32 uar;
+ __be16 limit_watermark;
+ __be16 wqe_cnt;
+ u32 reserved[2];
+};
+
+struct mthca_arbel_srq_context {
+ __be32 state_logsize_srqn;
+ __be32 lkey;
+ __be32 db_index;
+ __be32 logstride_usrpage;
+ __be64 wqe_base;
+ __be32 eq_pd;
+ __be16 limit_watermark;
+ __be16 wqe_cnt;
+ u16 reserved1;
+ __be16 wqe_counter;
+ u32 reserved2[3];
+};
+
+static void *get_wqe(struct mthca_srq *srq, int n)
+{
+ if (srq->is_direct)
+ return srq->queue.direct.buf + (n << srq->wqe_shift);
+ else
+ return srq->queue.page_list[(n << srq->wqe_shift) >> PAGE_SHIFT].buf +
+ ((n << srq->wqe_shift) & (PAGE_SIZE - 1));
+}
+
+/*
+ * Return a pointer to the location within a WQE that we're using as a
+ * link when the WQE is in the free list. We use the imm field
+ * because in the Tavor case, posting a WQE may overwrite the next
+ * segment of the previous WQE, but a receive WQE will never touch the
+ * imm field. This avoids corrupting our free list if the previous
+ * WQE has already completed and been put on the free list when we
+ * post the next WQE.
+ */
+static inline int *wqe_to_link(void *wqe)
+{
+ return (int *) (wqe + offsetof(struct mthca_next_seg, imm));
+}
+
+static void mthca_tavor_init_srq_context(struct mthca_dev *dev,
+ struct mthca_pd *pd,
+ struct mthca_srq *srq,
+ struct mthca_tavor_srq_context *context)
+{
+ memset(context, 0, sizeof *context);
+
+ context->wqe_base_ds = cpu_to_be64(1 << (srq->wqe_shift - 4));
+ context->state_pd = cpu_to_be32(pd->pd_num);
+ context->lkey = cpu_to_be32(srq->mr.ibmr.lkey);
+
+ if (pd->ibpd.uobject)
+ context->uar =
+ cpu_to_be32(to_mucontext(pd->ibpd.uobject->context)->uar.index);
+ else
+ context->uar = cpu_to_be32(dev->driver_uar.index);
+}
+
+static void mthca_arbel_init_srq_context(struct mthca_dev *dev,
+ struct mthca_pd *pd,
+ struct mthca_srq *srq,
+ struct mthca_arbel_srq_context *context)
+{
+ int logsize, max;
+
+ memset(context, 0, sizeof *context);
+
+ /*
+ * Put max in a temporary variable to work around gcc bug
+ * triggered by ilog2() on sparc64.
+ */
+ max = srq->max;
+ logsize = ilog2(max);
+ context->state_logsize_srqn = cpu_to_be32(logsize << 24 | srq->srqn);
+ context->lkey = cpu_to_be32(srq->mr.ibmr.lkey);
+ context->db_index = cpu_to_be32(srq->db_index);
+ context->logstride_usrpage = cpu_to_be32((srq->wqe_shift - 4) << 29);
+ if (pd->ibpd.uobject)
+ context->logstride_usrpage |=
+ cpu_to_be32(to_mucontext(pd->ibpd.uobject->context)->uar.index);
+ else
+ context->logstride_usrpage |= cpu_to_be32(dev->driver_uar.index);
+ context->eq_pd = cpu_to_be32(MTHCA_EQ_ASYNC << 24 | pd->pd_num);
+}
+
+static void mthca_free_srq_buf(struct mthca_dev *dev, struct mthca_srq *srq)
+{
+ mthca_buf_free(dev, srq->max << srq->wqe_shift, &srq->queue,
+ srq->is_direct, &srq->mr);
+ kfree(srq->wrid);
+}
+
+static int mthca_alloc_srq_buf(struct mthca_dev *dev, struct mthca_pd *pd,
+ struct mthca_srq *srq)
+{
+ struct mthca_data_seg *scatter;
+ void *wqe;
+ int err;
+ int i;
+
+ if (pd->ibpd.uobject)
+ return 0;
+
+ srq->wrid = kmalloc(srq->max * sizeof (u64), GFP_KERNEL);
+ if (!srq->wrid)
+ return -ENOMEM;
+
+ err = mthca_buf_alloc(dev, srq->max << srq->wqe_shift,
+ MTHCA_MAX_DIRECT_SRQ_SIZE,
+ &srq->queue, &srq->is_direct, pd, 1, &srq->mr);
+ if (err) {
+ kfree(srq->wrid);
+ return err;
+ }
+
+ /*
+ * Now initialize the SRQ buffer so that all of the WQEs are
+ * linked into the list of free WQEs. In addition, set the
+ * scatter list L_Keys to the sentry value of 0x100.
+ */
+ for (i = 0; i < srq->max; ++i) {
+ struct mthca_next_seg *next;
+
+ next = wqe = get_wqe(srq, i);
+
+ if (i < srq->max - 1) {
+ *wqe_to_link(wqe) = i + 1;
+ next->nda_op = htonl(((i + 1) << srq->wqe_shift) | 1);
+ } else {
+ *wqe_to_link(wqe) = -1;
+ next->nda_op = 0;
+ }
+
+ for (scatter = wqe + sizeof (struct mthca_next_seg);
+ (void *) scatter < wqe + (1 << srq->wqe_shift);
+ ++scatter)
+ scatter->lkey = cpu_to_be32(MTHCA_INVAL_LKEY);
+ }
+
+ srq->last = get_wqe(srq, srq->max - 1);
+
+ return 0;
+}
+
+int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd,
+ struct ib_srq_attr *attr, struct mthca_srq *srq)
+{
+ struct mthca_mailbox *mailbox;
+ u8 status;
+ int ds;
+ int err;
+
+ /* Sanity check SRQ size before proceeding */
+ if (attr->max_wr > dev->limits.max_srq_wqes ||
+ attr->max_sge > dev->limits.max_srq_sge)
+ return -EINVAL;
+
+ srq->max = attr->max_wr;
+ srq->max_gs = attr->max_sge;
+ srq->counter = 0;
+
+ if (mthca_is_memfree(dev))
+ srq->max = roundup_pow_of_two(srq->max + 1);
+ else
+ srq->max = srq->max + 1;
+
+ ds = max(64UL,
+ roundup_pow_of_two(sizeof (struct mthca_next_seg) +
+ srq->max_gs * sizeof (struct mthca_data_seg)));
+
+ if (!mthca_is_memfree(dev) && (ds > dev->limits.max_desc_sz))
+ return -EINVAL;
+
+ srq->wqe_shift = ilog2(ds);
+
+ srq->srqn = mthca_alloc(&dev->srq_table.alloc);
+ if (srq->srqn == -1)
+ return -ENOMEM;
+
+ if (mthca_is_memfree(dev)) {
+ err = mthca_table_get(dev, dev->srq_table.table, srq->srqn);
+ if (err)
+ goto err_out;
+
+ if (!pd->ibpd.uobject) {
+ srq->db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_SRQ,
+ srq->srqn, &srq->db);
+ if (srq->db_index < 0) {
+ err = -ENOMEM;
+ goto err_out_icm;
+ }
+ }
+ }
+
+ mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(mailbox)) {
+ err = PTR_ERR(mailbox);
+ goto err_out_db;
+ }
+
+ err = mthca_alloc_srq_buf(dev, pd, srq);
+ if (err)
+ goto err_out_mailbox;
+
+ spin_lock_init(&srq->lock);
+ srq->refcount = 1;
+ init_waitqueue_head(&srq->wait);
+ mutex_init(&srq->mutex);
+
+ if (mthca_is_memfree(dev))
+ mthca_arbel_init_srq_context(dev, pd, srq, mailbox->buf);
+ else
+ mthca_tavor_init_srq_context(dev, pd, srq, mailbox->buf);
+
+ err = mthca_SW2HW_SRQ(dev, mailbox, srq->srqn, &status);
+
+ if (err) {
+ mthca_warn(dev, "SW2HW_SRQ failed (%d)\n", err);
+ goto err_out_free_buf;
+ }
+ if (status) {
+ mthca_warn(dev, "SW2HW_SRQ returned status 0x%02x\n",
+ status);
+ err = -EINVAL;
+ goto err_out_free_buf;
+ }
+
+ spin_lock_irq(&dev->srq_table.lock);
+ if (mthca_array_set(&dev->srq_table.srq,
+ srq->srqn & (dev->limits.num_srqs - 1),
+ srq)) {
+ spin_unlock_irq(&dev->srq_table.lock);
+ goto err_out_free_srq;
+ }
+ spin_unlock_irq(&dev->srq_table.lock);
+
+ mthca_free_mailbox(dev, mailbox);
+
+ srq->first_free = 0;
+ srq->last_free = srq->max - 1;
+
+ attr->max_wr = srq->max - 1;
+ attr->max_sge = srq->max_gs;
+
+ return 0;
+
+err_out_free_srq:
+ err = mthca_HW2SW_SRQ(dev, mailbox, srq->srqn, &status);
+ if (err)
+ mthca_warn(dev, "HW2SW_SRQ failed (%d)\n", err);
+ else if (status)
+ mthca_warn(dev, "HW2SW_SRQ returned status 0x%02x\n", status);
+
+err_out_free_buf:
+ if (!pd->ibpd.uobject)
+ mthca_free_srq_buf(dev, srq);
+
+err_out_mailbox:
+ mthca_free_mailbox(dev, mailbox);
+
+err_out_db:
+ if (!pd->ibpd.uobject && mthca_is_memfree(dev))
+ mthca_free_db(dev, MTHCA_DB_TYPE_SRQ, srq->db_index);
+
+err_out_icm:
+ mthca_table_put(dev, dev->srq_table.table, srq->srqn);
+
+err_out:
+ mthca_free(&dev->srq_table.alloc, srq->srqn);
+
+ return err;
+}
+
+static inline int get_srq_refcount(struct mthca_dev *dev, struct mthca_srq *srq)
+{
+ int c;
+
+ spin_lock_irq(&dev->srq_table.lock);
+ c = srq->refcount;
+ spin_unlock_irq(&dev->srq_table.lock);
+
+ return c;
+}
+
+void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq)
+{
+ struct mthca_mailbox *mailbox;
+ int err;
+ u8 status;
+
+ mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(mailbox)) {
+ mthca_warn(dev, "No memory for mailbox to free SRQ.\n");
+ return;
+ }
+
+ err = mthca_HW2SW_SRQ(dev, mailbox, srq->srqn, &status);
+ if (err)
+ mthca_warn(dev, "HW2SW_SRQ failed (%d)\n", err);
+ else if (status)
+ mthca_warn(dev, "HW2SW_SRQ returned status 0x%02x\n", status);
+
+ spin_lock_irq(&dev->srq_table.lock);
+ mthca_array_clear(&dev->srq_table.srq,
+ srq->srqn & (dev->limits.num_srqs - 1));
+ --srq->refcount;
+ spin_unlock_irq(&dev->srq_table.lock);
+
+ wait_event(srq->wait, !get_srq_refcount(dev, srq));
+
+ if (!srq->ibsrq.uobject) {
+ mthca_free_srq_buf(dev, srq);
+ if (mthca_is_memfree(dev))
+ mthca_free_db(dev, MTHCA_DB_TYPE_SRQ, srq->db_index);
+ }
+
+ mthca_table_put(dev, dev->srq_table.table, srq->srqn);
+ mthca_free(&dev->srq_table.alloc, srq->srqn);
+ mthca_free_mailbox(dev, mailbox);
+}
+
+int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
+ enum ib_srq_attr_mask attr_mask, struct ib_udata *udata)
+{
+ struct mthca_dev *dev = to_mdev(ibsrq->device);
+ struct mthca_srq *srq = to_msrq(ibsrq);
+ int ret;
+ u8 status;
+
+ /* We don't support resizing SRQs (yet?) */
+ if (attr_mask & IB_SRQ_MAX_WR)
+ return -EINVAL;
+
+ if (attr_mask & IB_SRQ_LIMIT) {
+ u32 max_wr = mthca_is_memfree(dev) ? srq->max - 1 : srq->max;
+ if (attr->srq_limit > max_wr)
+ return -EINVAL;
+
+ mutex_lock(&srq->mutex);
+ ret = mthca_ARM_SRQ(dev, srq->srqn, attr->srq_limit, &status);
+ mutex_unlock(&srq->mutex);
+
+ if (ret)
+ return ret;
+ if (status)
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int mthca_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
+{
+ struct mthca_dev *dev = to_mdev(ibsrq->device);
+ struct mthca_srq *srq = to_msrq(ibsrq);
+ struct mthca_mailbox *mailbox;
+ struct mthca_arbel_srq_context *arbel_ctx;
+ struct mthca_tavor_srq_context *tavor_ctx;
+ u8 status;
+ int err;
+
+ mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ err = mthca_QUERY_SRQ(dev, srq->srqn, mailbox, &status);
+ if (err)
+ goto out;
+
+ if (mthca_is_memfree(dev)) {
+ arbel_ctx = mailbox->buf;
+ srq_attr->srq_limit = be16_to_cpu(arbel_ctx->limit_watermark);
+ } else {
+ tavor_ctx = mailbox->buf;
+ srq_attr->srq_limit = be16_to_cpu(tavor_ctx->limit_watermark);
+ }
+
+ srq_attr->max_wr = srq->max - 1;
+ srq_attr->max_sge = srq->max_gs;
+
+out:
+ mthca_free_mailbox(dev, mailbox);
+
+ return err;
+}
+
+void mthca_srq_event(struct mthca_dev *dev, u32 srqn,
+ enum ib_event_type event_type)
+{
+ struct mthca_srq *srq;
+ struct ib_event event;
+
+ spin_lock(&dev->srq_table.lock);
+ srq = mthca_array_get(&dev->srq_table.srq, srqn & (dev->limits.num_srqs - 1));
+ if (srq)
+ ++srq->refcount;
+ spin_unlock(&dev->srq_table.lock);
+
+ if (!srq) {
+ mthca_warn(dev, "Async event for bogus SRQ %08x\n", srqn);
+ return;
+ }
+
+ if (!srq->ibsrq.event_handler)
+ goto out;
+
+ event.device = &dev->ib_dev;
+ event.event = event_type;
+ event.element.srq = &srq->ibsrq;
+ srq->ibsrq.event_handler(&event, srq->ibsrq.srq_context);
+
+out:
+ spin_lock(&dev->srq_table.lock);
+ if (!--srq->refcount)
+ wake_up(&srq->wait);
+ spin_unlock(&dev->srq_table.lock);
+}
+
+/*
+ * This function must be called with IRQs disabled.
+ */
+void mthca_free_srq_wqe(struct mthca_srq *srq, u32 wqe_addr)
+{
+ int ind;
+ struct mthca_next_seg *last_free;
+
+ ind = wqe_addr >> srq->wqe_shift;
+
+ spin_lock(&srq->lock);
+
+ last_free = get_wqe(srq, srq->last_free);
+ *wqe_to_link(last_free) = ind;
+ last_free->nda_op = htonl((ind << srq->wqe_shift) | 1);
+ *wqe_to_link(get_wqe(srq, ind)) = -1;
+ srq->last_free = ind;
+
+ spin_unlock(&srq->lock);
+}
+
+int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
+ struct ib_recv_wr **bad_wr)
+{
+ struct mthca_dev *dev = to_mdev(ibsrq->device);
+ struct mthca_srq *srq = to_msrq(ibsrq);
+ unsigned long flags;
+ int err = 0;
+ int first_ind;
+ int ind;
+ int next_ind;
+ int nreq;
+ int i;
+ void *wqe;
+ void *prev_wqe;
+
+ spin_lock_irqsave(&srq->lock, flags);
+
+ first_ind = srq->first_free;
+
+ for (nreq = 0; wr; wr = wr->next) {
+ ind = srq->first_free;
+ wqe = get_wqe(srq, ind);
+ next_ind = *wqe_to_link(wqe);
+
+ if (unlikely(next_ind < 0)) {
+ mthca_err(dev, "SRQ %06x full\n", srq->srqn);
+ err = -ENOMEM;
+ *bad_wr = wr;
+ break;
+ }
+
+ prev_wqe = srq->last;
+ srq->last = wqe;
+
+ ((struct mthca_next_seg *) wqe)->ee_nds = 0;
+ /* flags field will always remain 0 */
+
+ wqe += sizeof (struct mthca_next_seg);
+
+ if (unlikely(wr->num_sge > srq->max_gs)) {
+ err = -EINVAL;
+ *bad_wr = wr;
+ srq->last = prev_wqe;
+ break;
+ }
+
+ for (i = 0; i < wr->num_sge; ++i) {
+ mthca_set_data_seg(wqe, wr->sg_list + i);
+ wqe += sizeof (struct mthca_data_seg);
+ }
+
+ if (i < srq->max_gs)
+ mthca_set_data_seg_inval(wqe);
+
+ ((struct mthca_next_seg *) prev_wqe)->ee_nds =
+ cpu_to_be32(MTHCA_NEXT_DBD);
+
+ srq->wrid[ind] = wr->wr_id;
+ srq->first_free = next_ind;
+
+ ++nreq;
+ if (unlikely(nreq == MTHCA_TAVOR_MAX_WQES_PER_RECV_DB)) {
+ nreq = 0;
+
+ /*
+ * Make sure that descriptors are written
+ * before doorbell is rung.
+ */
+ wmb();
+
+ mthca_write64(first_ind << srq->wqe_shift, srq->srqn << 8,
+ dev->kar + MTHCA_RECEIVE_DOORBELL,
+ MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
+
+ first_ind = srq->first_free;
+ }
+ }
+
+ if (likely(nreq)) {
+ /*
+ * Make sure that descriptors are written before
+ * doorbell is rung.
+ */
+ wmb();
+
+ mthca_write64(first_ind << srq->wqe_shift, (srq->srqn << 8) | nreq,
+ dev->kar + MTHCA_RECEIVE_DOORBELL,
+ MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
+ }
+
+ /*
+ * Make sure doorbells don't leak out of SRQ spinlock and
+ * reach the HCA out of order:
+ */
+ mmiowb();
+
+ spin_unlock_irqrestore(&srq->lock, flags);
+ return err;
+}
+
+int mthca_arbel_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
+ struct ib_recv_wr **bad_wr)
+{
+ struct mthca_dev *dev = to_mdev(ibsrq->device);
+ struct mthca_srq *srq = to_msrq(ibsrq);
+ unsigned long flags;
+ int err = 0;
+ int ind;
+ int next_ind;
+ int nreq;
+ int i;
+ void *wqe;
+
+ spin_lock_irqsave(&srq->lock, flags);
+
+ for (nreq = 0; wr; ++nreq, wr = wr->next) {
+ ind = srq->first_free;
+ wqe = get_wqe(srq, ind);
+ next_ind = *wqe_to_link(wqe);
+
+ if (unlikely(next_ind < 0)) {
+ mthca_err(dev, "SRQ %06x full\n", srq->srqn);
+ err = -ENOMEM;
+ *bad_wr = wr;
+ break;
+ }
+
+ ((struct mthca_next_seg *) wqe)->ee_nds = 0;
+ /* flags field will always remain 0 */
+
+ wqe += sizeof (struct mthca_next_seg);
+
+ if (unlikely(wr->num_sge > srq->max_gs)) {
+ err = -EINVAL;
+ *bad_wr = wr;
+ break;
+ }
+
+ for (i = 0; i < wr->num_sge; ++i) {
+ mthca_set_data_seg(wqe, wr->sg_list + i);
+ wqe += sizeof (struct mthca_data_seg);
+ }
+
+ if (i < srq->max_gs)
+ mthca_set_data_seg_inval(wqe);
+
+ srq->wrid[ind] = wr->wr_id;
+ srq->first_free = next_ind;
+ }
+
+ if (likely(nreq)) {
+ srq->counter += nreq;
+
+ /*
+ * Make sure that descriptors are written before
+ * we write doorbell record.
+ */
+ wmb();
+ *srq->db = cpu_to_be32(srq->counter);
+ }
+
+ spin_unlock_irqrestore(&srq->lock, flags);
+ return err;
+}
+
+int mthca_max_srq_sge(struct mthca_dev *dev)
+{
+ if (mthca_is_memfree(dev))
+ return dev->limits.max_sg;
+
+ /*
+ * SRQ allocations are based on powers of 2 for Tavor,
+ * (although they only need to be multiples of 16 bytes).
+ *
+ * Therefore, we need to base the max number of sg entries on
+ * the largest power of 2 descriptor size that is <= to the
+ * actual max WQE descriptor size, rather than return the
+ * max_sg value given by the firmware (which is based on WQE
+ * sizes as multiples of 16, not powers of 2).
+ *
+ * If SRQ implementation is changed for Tavor to be based on
+ * multiples of 16, the calculation below can be deleted and
+ * the FW max_sg value returned.
+ */
+ return min_t(int, dev->limits.max_sg,
+ ((1 << (fls(dev->limits.max_desc_sz) - 1)) -
+ sizeof (struct mthca_next_seg)) /
+ sizeof (struct mthca_data_seg));
+}
+
+int mthca_init_srq_table(struct mthca_dev *dev)
+{
+ int err;
+
+ if (!(dev->mthca_flags & MTHCA_FLAG_SRQ))
+ return 0;
+
+ spin_lock_init(&dev->srq_table.lock);
+
+ err = mthca_alloc_init(&dev->srq_table.alloc,
+ dev->limits.num_srqs,
+ dev->limits.num_srqs - 1,
+ dev->limits.reserved_srqs);
+ if (err)
+ return err;
+
+ err = mthca_array_init(&dev->srq_table.srq,
+ dev->limits.num_srqs);
+ if (err)
+ mthca_alloc_cleanup(&dev->srq_table.alloc);
+
+ return err;
+}
+
+void mthca_cleanup_srq_table(struct mthca_dev *dev)
+{
+ if (!(dev->mthca_flags & MTHCA_FLAG_SRQ))
+ return;
+
+ mthca_array_cleanup(&dev->srq_table.srq, dev->limits.num_srqs);
+ mthca_alloc_cleanup(&dev->srq_table.alloc);
+}
diff --git a/drivers/infiniband/hw/mthca/mthca_uar.c b/drivers/infiniband/hw/mthca/mthca_uar.c
new file mode 100644
index 0000000..ca5900c
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_uar.c
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2005 Topspin Communications. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <asm/page.h> /* PAGE_SHIFT */
+
+#include "mthca_dev.h"
+#include "mthca_memfree.h"
+
+int mthca_uar_alloc(struct mthca_dev *dev, struct mthca_uar *uar)
+{
+ uar->index = mthca_alloc(&dev->uar_table.alloc);
+ if (uar->index == -1)
+ return -ENOMEM;
+
+ uar->pfn = (pci_resource_start(dev->pdev, 2) >> PAGE_SHIFT) + uar->index;
+
+ return 0;
+}
+
+void mthca_uar_free(struct mthca_dev *dev, struct mthca_uar *uar)
+{
+ mthca_free(&dev->uar_table.alloc, uar->index);
+}
+
+int mthca_init_uar_table(struct mthca_dev *dev)
+{
+ int ret;
+
+ ret = mthca_alloc_init(&dev->uar_table.alloc,
+ dev->limits.num_uars,
+ dev->limits.num_uars - 1,
+ dev->limits.reserved_uars + 1);
+ if (ret)
+ return ret;
+
+ ret = mthca_init_db_tab(dev);
+ if (ret)
+ mthca_alloc_cleanup(&dev->uar_table.alloc);
+
+ return ret;
+}
+
+void mthca_cleanup_uar_table(struct mthca_dev *dev)
+{
+ mthca_cleanup_db_tab(dev);
+
+ /* XXX check if any UARs are still allocated? */
+ mthca_alloc_cleanup(&dev->uar_table.alloc);
+}
diff --git a/drivers/infiniband/hw/mthca/mthca_user.h b/drivers/infiniband/hw/mthca/mthca_user.h
new file mode 100644
index 0000000..5fe56e8
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_user.h
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef MTHCA_USER_H
+#define MTHCA_USER_H
+
+#include <linux/types.h>
+
+/*
+ * Increment this value if any changes that break userspace ABI
+ * compatibility are made.
+ */
+#define MTHCA_UVERBS_ABI_VERSION 1
+
+/*
+ * Make sure that all structs defined in this file remain laid out so
+ * that they pack the same way on 32-bit and 64-bit architectures (to
+ * avoid incompatibility between 32-bit userspace and 64-bit kernels).
+ * In particular do not use pointer types -- pass pointers in __u64
+ * instead.
+ */
+
+struct mthca_alloc_ucontext_resp {
+ __u32 qp_tab_size;
+ __u32 uarc_size;
+};
+
+struct mthca_alloc_pd_resp {
+ __u32 pdn;
+ __u32 reserved;
+};
+
+struct mthca_reg_mr {
+/*
+ * Mark the memory region with a DMA attribute that causes
+ * in-flight DMA to be flushed when the region is written to:
+ */
+#define MTHCA_MR_DMASYNC 0x1
+ __u32 mr_attrs;
+ __u32 reserved;
+};
+
+struct mthca_create_cq {
+ __u32 lkey;
+ __u32 pdn;
+ __u64 arm_db_page;
+ __u64 set_db_page;
+ __u32 arm_db_index;
+ __u32 set_db_index;
+};
+
+struct mthca_create_cq_resp {
+ __u32 cqn;
+ __u32 reserved;
+};
+
+struct mthca_resize_cq {
+ __u32 lkey;
+ __u32 reserved;
+};
+
+struct mthca_create_srq {
+ __u32 lkey;
+ __u32 db_index;
+ __u64 db_page;
+};
+
+struct mthca_create_srq_resp {
+ __u32 srqn;
+ __u32 reserved;
+};
+
+struct mthca_create_qp {
+ __u32 lkey;
+ __u32 reserved;
+ __u64 sq_db_page;
+ __u64 rq_db_page;
+ __u32 sq_db_index;
+ __u32 rq_db_index;
+};
+
+#endif /* MTHCA_USER_H */
diff --git a/drivers/infiniband/hw/mthca/mthca_wqe.h b/drivers/infiniband/hw/mthca/mthca_wqe.h
new file mode 100644
index 0000000..341a5ae
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_wqe.h
@@ -0,0 +1,131 @@
+/*
+ * Copyright (c) 2005 Cisco Systems. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef MTHCA_WQE_H
+#define MTHCA_WQE_H
+
+#include <linux/types.h>
+
+enum {
+ MTHCA_NEXT_DBD = 1 << 7,
+ MTHCA_NEXT_FENCE = 1 << 6,
+ MTHCA_NEXT_CQ_UPDATE = 1 << 3,
+ MTHCA_NEXT_EVENT_GEN = 1 << 2,
+ MTHCA_NEXT_SOLICIT = 1 << 1,
+ MTHCA_NEXT_IP_CSUM = 1 << 4,
+ MTHCA_NEXT_TCP_UDP_CSUM = 1 << 5,
+
+ MTHCA_MLX_VL15 = 1 << 17,
+ MTHCA_MLX_SLR = 1 << 16
+};
+
+enum {
+ MTHCA_INVAL_LKEY = 0x100,
+ MTHCA_TAVOR_MAX_WQES_PER_RECV_DB = 256,
+ MTHCA_ARBEL_MAX_WQES_PER_SEND_DB = 255
+};
+
+struct mthca_next_seg {
+ __be32 nda_op; /* [31:6] next WQE [4:0] next opcode */
+ __be32 ee_nds; /* [31:8] next EE [7] DBD [6] F [5:0] next WQE size */
+ __be32 flags; /* [3] CQ [2] Event [1] Solicit */
+ __be32 imm; /* immediate data */
+};
+
+struct mthca_tavor_ud_seg {
+ u32 reserved1;
+ __be32 lkey;
+ __be64 av_addr;
+ u32 reserved2[4];
+ __be32 dqpn;
+ __be32 qkey;
+ u32 reserved3[2];
+};
+
+struct mthca_arbel_ud_seg {
+ __be32 av[8];
+ __be32 dqpn;
+ __be32 qkey;
+ u32 reserved[2];
+};
+
+struct mthca_bind_seg {
+ __be32 flags; /* [31] Atomic [30] rem write [29] rem read */
+ u32 reserved;
+ __be32 new_rkey;
+ __be32 lkey;
+ __be64 addr;
+ __be64 length;
+};
+
+struct mthca_raddr_seg {
+ __be64 raddr;
+ __be32 rkey;
+ u32 reserved;
+};
+
+struct mthca_atomic_seg {
+ __be64 swap_add;
+ __be64 compare;
+};
+
+struct mthca_data_seg {
+ __be32 byte_count;
+ __be32 lkey;
+ __be64 addr;
+};
+
+struct mthca_mlx_seg {
+ __be32 nda_op;
+ __be32 nds;
+ __be32 flags; /* [17] VL15 [16] SLR [14:12] static rate
+ [11:8] SL [3] C [2] E */
+ __be16 rlid;
+ __be16 vcrc;
+};
+
+static __always_inline void mthca_set_data_seg(struct mthca_data_seg *dseg,
+ struct ib_sge *sg)
+{
+ dseg->byte_count = cpu_to_be32(sg->length);
+ dseg->lkey = cpu_to_be32(sg->lkey);
+ dseg->addr = cpu_to_be64(sg->addr);
+}
+
+static __always_inline void mthca_set_data_seg_inval(struct mthca_data_seg *dseg)
+{
+ dseg->byte_count = 0;
+ dseg->lkey = cpu_to_be32(MTHCA_INVAL_LKEY);
+ dseg->addr = 0;
+}
+
+#endif /* MTHCA_WQE_H */
diff --git a/drivers/infiniband/hw/nes/Kconfig b/drivers/infiniband/hw/nes/Kconfig
new file mode 100644
index 0000000..d449eb6
--- /dev/null
+++ b/drivers/infiniband/hw/nes/Kconfig
@@ -0,0 +1,17 @@
+config INFINIBAND_NES
+ tristate "NetEffect RNIC Driver"
+ depends on PCI && INET && INFINIBAND
+ select LIBCRC32C
+ select INET_LRO
+ ---help---
+ This is a low-level driver for NetEffect RDMA enabled
+ Network Interface Cards (RNIC).
+
+config INFINIBAND_NES_DEBUG
+ bool "Verbose debugging output"
+ depends on INFINIBAND_NES
+ default n
+ ---help---
+ This option causes the NetEffect RNIC driver to produce debug
+ messages. Select this if you are developing the driver
+ or trying to diagnose a problem.
diff --git a/drivers/infiniband/hw/nes/Makefile b/drivers/infiniband/hw/nes/Makefile
new file mode 100644
index 0000000..3514851
--- /dev/null
+++ b/drivers/infiniband/hw/nes/Makefile
@@ -0,0 +1,3 @@
+obj-$(CONFIG_INFINIBAND_NES) += iw_nes.o
+
+iw_nes-objs := nes.o nes_hw.o nes_nic.o nes_utils.o nes_verbs.o nes_cm.o
diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c
new file mode 100644
index 0000000..aa1dc41
--- /dev/null
+++ b/drivers/infiniband/hw/nes/nes.c
@@ -0,0 +1,1229 @@
+/*
+ * Copyright (c) 2006 - 2008 NetEffect, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/ethtool.h>
+#include <linux/mii.h>
+#include <linux/if_vlan.h>
+#include <linux/crc32.h>
+#include <linux/in.h>
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/if_arp.h>
+#include <linux/highmem.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/byteorder.h>
+#include <rdma/ib_smi.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_pack.h>
+#include <rdma/iw_cm.h>
+
+#include "nes.h"
+
+#include <net/netevent.h>
+#include <net/neighbour.h>
+#include <linux/route.h>
+#include <net/ip_fib.h>
+
+MODULE_AUTHOR("NetEffect");
+MODULE_DESCRIPTION("NetEffect RNIC Low-level iWARP Driver");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_VERSION(DRV_VERSION);
+
+int max_mtu = 9000;
+int interrupt_mod_interval = 0;
+
+
+/* Interoperability */
+int mpa_version = 1;
+module_param(mpa_version, int, 0644);
+MODULE_PARM_DESC(mpa_version, "MPA version to be used int MPA Req/Resp (0 or 1)");
+
+/* Interoperability */
+int disable_mpa_crc = 0;
+module_param(disable_mpa_crc, int, 0644);
+MODULE_PARM_DESC(disable_mpa_crc, "Disable checking of MPA CRC");
+
+unsigned int send_first = 0;
+module_param(send_first, int, 0644);
+MODULE_PARM_DESC(send_first, "Send RDMA Message First on Active Connection");
+
+
+unsigned int nes_drv_opt = 0;
+module_param(nes_drv_opt, int, 0644);
+MODULE_PARM_DESC(nes_drv_opt, "Driver option parameters");
+
+unsigned int nes_debug_level = 0;
+module_param_named(debug_level, nes_debug_level, uint, 0644);
+MODULE_PARM_DESC(debug_level, "Enable debug output level");
+
+unsigned int wqm_quanta = 0x10000;
+module_param(wqm_quanta, int, 0644);
+MODULE_PARM_DESC(wqm_quanta, "WQM quanta");
+
+static unsigned int limit_maxrdreqsz;
+module_param(limit_maxrdreqsz, bool, 0644);
+MODULE_PARM_DESC(limit_maxrdreqsz, "Limit max read request size to 256 Bytes");
+
+LIST_HEAD(nes_adapter_list);
+static LIST_HEAD(nes_dev_list);
+
+atomic_t qps_destroyed;
+
+static unsigned int ee_flsh_adapter;
+static unsigned int sysfs_nonidx_addr;
+static unsigned int sysfs_idx_addr;
+
+static struct pci_device_id nes_pci_table[] = {
+ {PCI_VENDOR_ID_NETEFFECT, PCI_DEVICE_ID_NETEFFECT_NE020, PCI_ANY_ID, PCI_ANY_ID},
+ {0}
+};
+
+MODULE_DEVICE_TABLE(pci, nes_pci_table);
+
+static int nes_inetaddr_event(struct notifier_block *, unsigned long, void *);
+static int nes_net_event(struct notifier_block *, unsigned long, void *);
+static int nes_notifiers_registered;
+
+
+static struct notifier_block nes_inetaddr_notifier = {
+ .notifier_call = nes_inetaddr_event
+};
+
+static struct notifier_block nes_net_notifier = {
+ .notifier_call = nes_net_event
+};
+
+
+
+
+/**
+ * nes_inetaddr_event
+ */
+static int nes_inetaddr_event(struct notifier_block *notifier,
+ unsigned long event, void *ptr)
+{
+ struct in_ifaddr *ifa = ptr;
+ struct net_device *event_netdev = ifa->ifa_dev->dev;
+ struct nes_device *nesdev;
+ struct net_device *netdev;
+ struct nes_vnic *nesvnic;
+ unsigned int addr;
+ unsigned int mask;
+
+ addr = ntohl(ifa->ifa_address);
+ mask = ntohl(ifa->ifa_mask);
+ nes_debug(NES_DBG_NETDEV, "nes_inetaddr_event: ip address " NIPQUAD_FMT
+ ", netmask " NIPQUAD_FMT ".\n",
+ HIPQUAD(addr), HIPQUAD(mask));
+ list_for_each_entry(nesdev, &nes_dev_list, list) {
+ nes_debug(NES_DBG_NETDEV, "Nesdev list entry = 0x%p. (%s)\n",
+ nesdev, nesdev->netdev[0]->name);
+ netdev = nesdev->netdev[0];
+ nesvnic = netdev_priv(netdev);
+ if (netdev == event_netdev) {
+ if (nesvnic->rdma_enabled == 0) {
+ nes_debug(NES_DBG_NETDEV, "Returning without processing event for %s since"
+ " RDMA is not enabled.\n",
+ netdev->name);
+ return NOTIFY_OK;
+ }
+ /* we have ifa->ifa_address/mask here if we need it */
+ switch (event) {
+ case NETDEV_DOWN:
+ nes_debug(NES_DBG_NETDEV, "event:DOWN\n");
+ nes_write_indexed(nesdev,
+ NES_IDX_DST_IP_ADDR+(0x10*PCI_FUNC(nesdev->pcidev->devfn)), 0);
+
+ nes_manage_arp_cache(netdev, netdev->dev_addr,
+ ntohl(nesvnic->local_ipaddr), NES_ARP_DELETE);
+ nesvnic->local_ipaddr = 0;
+ return NOTIFY_OK;
+ break;
+ case NETDEV_UP:
+ nes_debug(NES_DBG_NETDEV, "event:UP\n");
+
+ if (nesvnic->local_ipaddr != 0) {
+ nes_debug(NES_DBG_NETDEV, "Interface already has local_ipaddr\n");
+ return NOTIFY_OK;
+ }
+ /* Add the address to the IP table */
+ nesvnic->local_ipaddr = ifa->ifa_address;
+
+ nes_write_indexed(nesdev,
+ NES_IDX_DST_IP_ADDR+(0x10*PCI_FUNC(nesdev->pcidev->devfn)),
+ ntohl(ifa->ifa_address));
+ nes_manage_arp_cache(netdev, netdev->dev_addr,
+ ntohl(nesvnic->local_ipaddr), NES_ARP_ADD);
+ return NOTIFY_OK;
+ break;
+ default:
+ break;
+ }
+ }
+ }
+
+ return NOTIFY_DONE;
+}
+
+
+/**
+ * nes_net_event
+ */
+static int nes_net_event(struct notifier_block *notifier,
+ unsigned long event, void *ptr)
+{
+ struct neighbour *neigh = ptr;
+ struct nes_device *nesdev;
+ struct net_device *netdev;
+ struct nes_vnic *nesvnic;
+
+ switch (event) {
+ case NETEVENT_NEIGH_UPDATE:
+ list_for_each_entry(nesdev, &nes_dev_list, list) {
+ /* nes_debug(NES_DBG_NETDEV, "Nesdev list entry = 0x%p.\n", nesdev); */
+ netdev = nesdev->netdev[0];
+ nesvnic = netdev_priv(netdev);
+ if (netdev == neigh->dev) {
+ if (nesvnic->rdma_enabled == 0) {
+ nes_debug(NES_DBG_NETDEV, "Skipping device %s since no RDMA\n",
+ netdev->name);
+ } else {
+ if (neigh->nud_state & NUD_VALID) {
+ nes_manage_arp_cache(neigh->dev, neigh->ha,
+ ntohl(*(__be32 *)neigh->primary_key), NES_ARP_ADD);
+ } else {
+ nes_manage_arp_cache(neigh->dev, neigh->ha,
+ ntohl(*(__be32 *)neigh->primary_key), NES_ARP_DELETE);
+ }
+ }
+ return NOTIFY_OK;
+ }
+ }
+ break;
+ default:
+ nes_debug(NES_DBG_NETDEV, "NETEVENT_ %lu undefined\n", event);
+ break;
+ }
+
+ return NOTIFY_DONE;
+}
+
+
+/**
+ * nes_add_ref
+ */
+void nes_add_ref(struct ib_qp *ibqp)
+{
+ struct nes_qp *nesqp;
+
+ nesqp = to_nesqp(ibqp);
+ nes_debug(NES_DBG_QP, "Bumping refcount for QP%u. Pre-inc value = %u\n",
+ ibqp->qp_num, atomic_read(&nesqp->refcount));
+ atomic_inc(&nesqp->refcount);
+}
+
+static void nes_cqp_rem_ref_callback(struct nes_device *nesdev, struct nes_cqp_request *cqp_request)
+{
+ unsigned long flags;
+ struct nes_qp *nesqp = cqp_request->cqp_callback_pointer;
+ struct nes_adapter *nesadapter = nesdev->nesadapter;
+ u32 qp_id;
+
+ atomic_inc(&qps_destroyed);
+
+ /* Free the control structures */
+
+ qp_id = nesqp->hwqp.qp_id;
+ if (nesqp->pbl_vbase) {
+ pci_free_consistent(nesdev->pcidev, nesqp->qp_mem_size,
+ nesqp->hwqp.q2_vbase, nesqp->hwqp.q2_pbase);
+ spin_lock_irqsave(&nesadapter->pbl_lock, flags);
+ nesadapter->free_256pbl++;
+ spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+ pci_free_consistent(nesdev->pcidev, 256, nesqp->pbl_vbase, nesqp->pbl_pbase);
+ nesqp->pbl_vbase = NULL;
+
+ } else {
+ pci_free_consistent(nesdev->pcidev, nesqp->qp_mem_size,
+ nesqp->hwqp.sq_vbase, nesqp->hwqp.sq_pbase);
+ }
+ nes_free_resource(nesadapter, nesadapter->allocated_qps, nesqp->hwqp.qp_id);
+
+ nesadapter->qp_table[nesqp->hwqp.qp_id-NES_FIRST_QPN] = NULL;
+ kfree(nesqp->allocated_buffer);
+
+}
+
+/**
+ * nes_rem_ref
+ */
+void nes_rem_ref(struct ib_qp *ibqp)
+{
+ u64 u64temp;
+ struct nes_qp *nesqp;
+ struct nes_vnic *nesvnic = to_nesvnic(ibqp->device);
+ struct nes_device *nesdev = nesvnic->nesdev;
+ struct nes_hw_cqp_wqe *cqp_wqe;
+ struct nes_cqp_request *cqp_request;
+ u32 opcode;
+
+ nesqp = to_nesqp(ibqp);
+
+ if (atomic_read(&nesqp->refcount) == 0) {
+ printk(KERN_INFO PFX "%s: Reference count already 0 for QP%d, last aeq = 0x%04X.\n",
+ __func__, ibqp->qp_num, nesqp->last_aeq);
+ BUG();
+ }
+
+ if (atomic_dec_and_test(&nesqp->refcount)) {
+ /* Destroy the QP */
+ cqp_request = nes_get_cqp_request(nesdev);
+ if (cqp_request == NULL) {
+ nes_debug(NES_DBG_QP, "Failed to get a cqp_request.\n");
+ return;
+ }
+ cqp_request->waiting = 0;
+ cqp_request->callback = 1;
+ cqp_request->cqp_callback = nes_cqp_rem_ref_callback;
+ cqp_request->cqp_callback_pointer = nesqp;
+ cqp_wqe = &cqp_request->cqp_wqe;
+
+ nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
+ opcode = NES_CQP_DESTROY_QP | NES_CQP_QP_TYPE_IWARP;
+
+ if (nesqp->hte_added) {
+ opcode |= NES_CQP_QP_DEL_HTE;
+ nesqp->hte_added = 0;
+ }
+ set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, opcode);
+ set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, nesqp->hwqp.qp_id);
+ u64temp = (u64)nesqp->nesqp_context_pbase;
+ set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, u64temp);
+ nes_post_cqp_request(nesdev, cqp_request);
+ }
+}
+
+
+/**
+ * nes_get_qp
+ */
+struct ib_qp *nes_get_qp(struct ib_device *device, int qpn)
+{
+ struct nes_vnic *nesvnic = to_nesvnic(device);
+ struct nes_device *nesdev = nesvnic->nesdev;
+ struct nes_adapter *nesadapter = nesdev->nesadapter;
+
+ if ((qpn < NES_FIRST_QPN) || (qpn >= (NES_FIRST_QPN + nesadapter->max_qp)))
+ return NULL;
+
+ return &nesadapter->qp_table[qpn - NES_FIRST_QPN]->ibqp;
+}
+
+
+/**
+ * nes_print_macaddr
+ */
+static void nes_print_macaddr(struct net_device *netdev)
+{
+ DECLARE_MAC_BUF(mac);
+
+ nes_debug(NES_DBG_INIT, "%s: %s, IRQ %u\n",
+ netdev->name, print_mac(mac, netdev->dev_addr), netdev->irq);
+}
+
+/**
+ * nes_interrupt - handle interrupts
+ */
+static irqreturn_t nes_interrupt(int irq, void *dev_id)
+{
+ struct nes_device *nesdev = (struct nes_device *)dev_id;
+ int handled = 0;
+ u32 int_mask;
+ u32 int_req;
+ u32 int_stat;
+ u32 intf_int_stat;
+ u32 timer_stat;
+
+ if (nesdev->msi_enabled) {
+ /* No need to read the interrupt pending register if msi is enabled */
+ handled = 1;
+ } else {
+ if (unlikely(nesdev->nesadapter->hw_rev == NE020_REV)) {
+ /* Master interrupt enable provides synchronization for kicking off bottom half
+ when interrupt sharing is going on */
+ int_mask = nes_read32(nesdev->regs + NES_INT_MASK);
+ if (int_mask & 0x80000000) {
+ /* Check interrupt status to see if this might be ours */
+ int_stat = nes_read32(nesdev->regs + NES_INT_STAT);
+ int_req = nesdev->int_req;
+ if (int_stat&int_req) {
+ /* if interesting CEQ or AEQ is pending, claim the interrupt */
+ if ((int_stat&int_req) & (~(NES_INT_TIMER|NES_INT_INTF))) {
+ handled = 1;
+ } else {
+ if (((int_stat & int_req) & NES_INT_TIMER) == NES_INT_TIMER) {
+ /* Timer might be running but might be for another function */
+ timer_stat = nes_read32(nesdev->regs + NES_TIMER_STAT);
+ if ((timer_stat & nesdev->timer_int_req) != 0) {
+ handled = 1;
+ }
+ }
+ if ((((int_stat & int_req) & NES_INT_INTF) == NES_INT_INTF) &&
+ (handled == 0)) {
+ intf_int_stat = nes_read32(nesdev->regs+NES_INTF_INT_STAT);
+ if ((intf_int_stat & nesdev->intf_int_req) != 0) {
+ handled = 1;
+ }
+ }
+ }
+ if (handled) {
+ nes_write32(nesdev->regs+NES_INT_MASK, int_mask & (~0x80000000));
+ int_mask = nes_read32(nesdev->regs+NES_INT_MASK);
+ /* Save off the status to save an additional read */
+ nesdev->int_stat = int_stat;
+ nesdev->napi_isr_ran = 1;
+ }
+ }
+ }
+ } else {
+ handled = nes_read32(nesdev->regs+NES_INT_PENDING);
+ }
+ }
+
+ if (handled) {
+
+ if (nes_napi_isr(nesdev) == 0) {
+ tasklet_schedule(&nesdev->dpc_tasklet);
+
+ }
+ return IRQ_HANDLED;
+ } else {
+ return IRQ_NONE;
+ }
+}
+
+
+/**
+ * nes_probe - Device initialization
+ */
+static int __devinit nes_probe(struct pci_dev *pcidev, const struct pci_device_id *ent)
+{
+ struct net_device *netdev = NULL;
+ struct nes_device *nesdev = NULL;
+ int ret = 0;
+ struct nes_vnic *nesvnic = NULL;
+ void __iomem *mmio_regs = NULL;
+ u8 hw_rev;
+
+ assert(pcidev != NULL);
+ assert(ent != NULL);
+
+ printk(KERN_INFO PFX "NetEffect RNIC driver v%s loading. (%s)\n",
+ DRV_VERSION, pci_name(pcidev));
+
+ ret = pci_enable_device(pcidev);
+ if (ret) {
+ printk(KERN_ERR PFX "Unable to enable PCI device. (%s)\n", pci_name(pcidev));
+ goto bail0;
+ }
+
+ nes_debug(NES_DBG_INIT, "BAR0 (@0x%08lX) size = 0x%lX bytes\n",
+ (long unsigned int)pci_resource_start(pcidev, BAR_0),
+ (long unsigned int)pci_resource_len(pcidev, BAR_0));
+ nes_debug(NES_DBG_INIT, "BAR1 (@0x%08lX) size = 0x%lX bytes\n",
+ (long unsigned int)pci_resource_start(pcidev, BAR_1),
+ (long unsigned int)pci_resource_len(pcidev, BAR_1));
+
+ /* Make sure PCI base addr are MMIO */
+ if (!(pci_resource_flags(pcidev, BAR_0) & IORESOURCE_MEM) ||
+ !(pci_resource_flags(pcidev, BAR_1) & IORESOURCE_MEM)) {
+ printk(KERN_ERR PFX "PCI regions not an MMIO resource\n");
+ ret = -ENODEV;
+ goto bail1;
+ }
+
+ /* Reserve PCI I/O and memory resources */
+ ret = pci_request_regions(pcidev, DRV_NAME);
+ if (ret) {
+ printk(KERN_ERR PFX "Unable to request regions. (%s)\n", pci_name(pcidev));
+ goto bail1;
+ }
+
+ if ((sizeof(dma_addr_t) > 4)) {
+ ret = pci_set_dma_mask(pcidev, DMA_64BIT_MASK);
+ if (ret < 0) {
+ printk(KERN_ERR PFX "64b DMA mask configuration failed\n");
+ goto bail2;
+ }
+ ret = pci_set_consistent_dma_mask(pcidev, DMA_64BIT_MASK);
+ if (ret) {
+ printk(KERN_ERR PFX "64b DMA consistent mask configuration failed\n");
+ goto bail2;
+ }
+ } else {
+ ret = pci_set_dma_mask(pcidev, DMA_32BIT_MASK);
+ if (ret < 0) {
+ printk(KERN_ERR PFX "32b DMA mask configuration failed\n");
+ goto bail2;
+ }
+ ret = pci_set_consistent_dma_mask(pcidev, DMA_32BIT_MASK);
+ if (ret) {
+ printk(KERN_ERR PFX "32b DMA consistent mask configuration failed\n");
+ goto bail2;
+ }
+ }
+
+ pci_set_master(pcidev);
+
+ /* Allocate hardware structure */
+ nesdev = kzalloc(sizeof(struct nes_device), GFP_KERNEL);
+ if (!nesdev) {
+ printk(KERN_ERR PFX "%s: Unable to alloc hardware struct\n", pci_name(pcidev));
+ ret = -ENOMEM;
+ goto bail2;
+ }
+
+ nes_debug(NES_DBG_INIT, "Allocated nes device at %p\n", nesdev);
+ nesdev->pcidev = pcidev;
+ pci_set_drvdata(pcidev, nesdev);
+
+ pci_read_config_byte(pcidev, 0x0008, &hw_rev);
+ nes_debug(NES_DBG_INIT, "hw_rev=%u\n", hw_rev);
+
+ spin_lock_init(&nesdev->indexed_regs_lock);
+
+ /* Remap the PCI registers in adapter BAR0 to kernel VA space */
+ mmio_regs = ioremap_nocache(pci_resource_start(pcidev, BAR_0), sizeof(mmio_regs));
+ if (mmio_regs == NULL) {
+ printk(KERN_ERR PFX "Unable to remap BAR0\n");
+ ret = -EIO;
+ goto bail3;
+ }
+ nesdev->regs = mmio_regs;
+ nesdev->index_reg = 0x50 + (PCI_FUNC(pcidev->devfn)*8) + mmio_regs;
+
+ /* Ensure interrupts are disabled */
+ nes_write32(nesdev->regs+NES_INT_MASK, 0x7fffffff);
+
+ if (nes_drv_opt & NES_DRV_OPT_ENABLE_MSI) {
+ if (!pci_enable_msi(nesdev->pcidev)) {
+ nesdev->msi_enabled = 1;
+ nes_debug(NES_DBG_INIT, "MSI is enabled for device %s\n",
+ pci_name(pcidev));
+ } else {
+ nes_debug(NES_DBG_INIT, "MSI is disabled by linux for device %s\n",
+ pci_name(pcidev));
+ }
+ } else {
+ nes_debug(NES_DBG_INIT, "MSI not requested due to driver options for device %s\n",
+ pci_name(pcidev));
+ }
+
+ nesdev->csr_start = pci_resource_start(nesdev->pcidev, BAR_0);
+ nesdev->doorbell_region = pci_resource_start(nesdev->pcidev, BAR_1);
+
+ /* Init the adapter */
+ nesdev->nesadapter = nes_init_adapter(nesdev, hw_rev);
+ if (!nesdev->nesadapter) {
+ printk(KERN_ERR PFX "Unable to initialize adapter.\n");
+ ret = -ENOMEM;
+ goto bail5;
+ }
+ nesdev->nesadapter->et_rx_coalesce_usecs_irq = interrupt_mod_interval;
+ nesdev->nesadapter->wqm_quanta = wqm_quanta;
+
+ /* nesdev->base_doorbell_index =
+ nesdev->nesadapter->pd_config_base[PCI_FUNC(nesdev->pcidev->devfn)]; */
+ nesdev->base_doorbell_index = 1;
+ nesdev->doorbell_start = nesdev->nesadapter->doorbell_start;
+ if (nesdev->nesadapter->phy_type[0] == NES_PHY_TYPE_PUMA_1G) {
+ switch (PCI_FUNC(nesdev->pcidev->devfn) %
+ nesdev->nesadapter->port_count) {
+ case 1:
+ nesdev->mac_index = 2;
+ break;
+ case 2:
+ nesdev->mac_index = 1;
+ break;
+ case 3:
+ nesdev->mac_index = 3;
+ break;
+ case 0:
+ default:
+ nesdev->mac_index = 0;
+ }
+ } else {
+ nesdev->mac_index = PCI_FUNC(nesdev->pcidev->devfn) %
+ nesdev->nesadapter->port_count;
+ }
+
+ if ((limit_maxrdreqsz ||
+ ((nesdev->nesadapter->phy_type[0] == NES_PHY_TYPE_GLADIUS) &&
+ (hw_rev == NE020_REV1))) &&
+ (pcie_get_readrq(pcidev) > 256)) {
+ if (pcie_set_readrq(pcidev, 256))
+ printk(KERN_ERR PFX "Unable to set max read request"
+ " to 256 bytes\n");
+ else
+ nes_debug(NES_DBG_INIT, "Max read request size set"
+ " to 256 bytes\n");
+ }
+
+ tasklet_init(&nesdev->dpc_tasklet, nes_dpc, (unsigned long)nesdev);
+
+ /* bring up the Control QP */
+ if (nes_init_cqp(nesdev)) {
+ ret = -ENODEV;
+ goto bail6;
+ }
+
+ /* Arm the CCQ */
+ nes_write32(nesdev->regs+NES_CQE_ALLOC, NES_CQE_ALLOC_NOTIFY_NEXT |
+ PCI_FUNC(nesdev->pcidev->devfn));
+ nes_read32(nesdev->regs+NES_CQE_ALLOC);
+
+ /* Enable the interrupts */
+ nesdev->int_req = (0x101 << PCI_FUNC(nesdev->pcidev->devfn)) |
+ (1 << (PCI_FUNC(nesdev->pcidev->devfn)+16));
+ if (PCI_FUNC(nesdev->pcidev->devfn) < 4) {
+ nesdev->int_req |= (1 << (PCI_FUNC(nesdev->mac_index)+24));
+ }
+
+ /* TODO: This really should be the first driver to load, not function 0 */
+ if (PCI_FUNC(nesdev->pcidev->devfn) == 0) {
+ /* pick up PCI and critical errors if the first driver to load */
+ nesdev->intf_int_req = NES_INTF_INT_PCIERR | NES_INTF_INT_CRITERR;
+ nesdev->int_req |= NES_INT_INTF;
+ } else {
+ nesdev->intf_int_req = 0;
+ }
+ nesdev->intf_int_req |= (1 << (PCI_FUNC(nesdev->pcidev->devfn)+16));
+ nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_MASKS0, 0);
+ nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_MASKS1, 0);
+ nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_MASKS2, 0x00001265);
+ nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_MASKS4, 0x18021804);
+
+ nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_MASKS3, 0x17801790);
+
+ /* deal with both periodic and one_shot */
+ nesdev->timer_int_req = 0x101 << PCI_FUNC(nesdev->pcidev->devfn);
+ nesdev->nesadapter->timer_int_req |= nesdev->timer_int_req;
+ nes_debug(NES_DBG_INIT, "setting int_req for function %u, nesdev = 0x%04X, adapter = 0x%04X\n",
+ PCI_FUNC(nesdev->pcidev->devfn),
+ nesdev->timer_int_req, nesdev->nesadapter->timer_int_req);
+
+ nes_write32(nesdev->regs+NES_INTF_INT_MASK, ~(nesdev->intf_int_req));
+
+ list_add_tail(&nesdev->list, &nes_dev_list);
+
+ /* Request an interrupt line for the driver */
+ ret = request_irq(pcidev->irq, nes_interrupt, IRQF_SHARED, DRV_NAME, nesdev);
+ if (ret) {
+ printk(KERN_ERR PFX "%s: requested IRQ %u is busy\n",
+ pci_name(pcidev), pcidev->irq);
+ goto bail65;
+ }
+
+ nes_write32(nesdev->regs+NES_INT_MASK, ~nesdev->int_req);
+
+ if (nes_notifiers_registered == 0) {
+ register_inetaddr_notifier(&nes_inetaddr_notifier);
+ register_netevent_notifier(&nes_net_notifier);
+ }
+ nes_notifiers_registered++;
+
+ /* Initialize network devices */
+ if ((netdev = nes_netdev_init(nesdev, mmio_regs)) == NULL) {
+ goto bail7;
+ }
+
+ /* Register network device */
+ ret = register_netdev(netdev);
+ if (ret) {
+ printk(KERN_ERR PFX "Unable to register netdev, ret = %d\n", ret);
+ nes_netdev_destroy(netdev);
+ goto bail7;
+ }
+
+ nes_print_macaddr(netdev);
+ /* create a CM core for this netdev */
+ nesvnic = netdev_priv(netdev);
+
+ nesdev->netdev_count++;
+ nesdev->nesadapter->netdev_count++;
+
+
+ printk(KERN_ERR PFX "%s: NetEffect RNIC driver successfully loaded.\n",
+ pci_name(pcidev));
+ return 0;
+
+ bail7:
+ printk(KERN_ERR PFX "bail7\n");
+ while (nesdev->netdev_count > 0) {
+ nesdev->netdev_count--;
+ nesdev->nesadapter->netdev_count--;
+
+ unregister_netdev(nesdev->netdev[nesdev->netdev_count]);
+ nes_netdev_destroy(nesdev->netdev[nesdev->netdev_count]);
+ }
+
+ nes_debug(NES_DBG_INIT, "netdev_count=%d, nesadapter->netdev_count=%d\n",
+ nesdev->netdev_count, nesdev->nesadapter->netdev_count);
+
+ nes_notifiers_registered--;
+ if (nes_notifiers_registered == 0) {
+ unregister_netevent_notifier(&nes_net_notifier);
+ unregister_inetaddr_notifier(&nes_inetaddr_notifier);
+ }
+
+ list_del(&nesdev->list);
+ nes_destroy_cqp(nesdev);
+
+ bail65:
+ printk(KERN_ERR PFX "bail65\n");
+ free_irq(pcidev->irq, nesdev);
+ if (nesdev->msi_enabled) {
+ pci_disable_msi(pcidev);
+ }
+ bail6:
+ printk(KERN_ERR PFX "bail6\n");
+ tasklet_kill(&nesdev->dpc_tasklet);
+ /* Deallocate the Adapter Structure */
+ nes_destroy_adapter(nesdev->nesadapter);
+
+ bail5:
+ printk(KERN_ERR PFX "bail5\n");
+ iounmap(nesdev->regs);
+
+ bail3:
+ printk(KERN_ERR PFX "bail3\n");
+ kfree(nesdev);
+
+ bail2:
+ pci_release_regions(pcidev);
+
+ bail1:
+ pci_disable_device(pcidev);
+
+ bail0:
+ return ret;
+}
+
+
+/**
+ * nes_remove - unload from kernel
+ */
+static void __devexit nes_remove(struct pci_dev *pcidev)
+{
+ struct nes_device *nesdev = pci_get_drvdata(pcidev);
+ struct net_device *netdev;
+ int netdev_index = 0;
+
+ if (nesdev->netdev_count) {
+ netdev = nesdev->netdev[netdev_index];
+ if (netdev) {
+ netif_stop_queue(netdev);
+ unregister_netdev(netdev);
+ nes_netdev_destroy(netdev);
+
+ nesdev->netdev[netdev_index] = NULL;
+ nesdev->netdev_count--;
+ nesdev->nesadapter->netdev_count--;
+ }
+ }
+
+ nes_notifiers_registered--;
+ if (nes_notifiers_registered == 0) {
+ unregister_netevent_notifier(&nes_net_notifier);
+ unregister_inetaddr_notifier(&nes_inetaddr_notifier);
+ }
+
+ list_del(&nesdev->list);
+ nes_destroy_cqp(nesdev);
+
+ free_irq(pcidev->irq, nesdev);
+ tasklet_kill(&nesdev->dpc_tasklet);
+
+ /* Deallocate the Adapter Structure */
+ nes_destroy_adapter(nesdev->nesadapter);
+
+ if (nesdev->msi_enabled) {
+ pci_disable_msi(pcidev);
+ }
+
+ iounmap(nesdev->regs);
+ kfree(nesdev);
+
+ /* nes_debug(NES_DBG_SHUTDOWN, "calling pci_release_regions.\n"); */
+ pci_release_regions(pcidev);
+ pci_disable_device(pcidev);
+ pci_set_drvdata(pcidev, NULL);
+}
+
+
+static struct pci_driver nes_pci_driver = {
+ .name = DRV_NAME,
+ .id_table = nes_pci_table,
+ .probe = nes_probe,
+ .remove = __devexit_p(nes_remove),
+};
+
+static ssize_t nes_show_adapter(struct device_driver *ddp, char *buf)
+{
+ unsigned int devfn = 0xffffffff;
+ unsigned char bus_number = 0xff;
+ unsigned int i = 0;
+ struct nes_device *nesdev;
+
+ list_for_each_entry(nesdev, &nes_dev_list, list) {
+ if (i == ee_flsh_adapter) {
+ devfn = nesdev->pcidev->devfn;
+ bus_number = nesdev->pcidev->bus->number;
+ break;
+ }
+ i++;
+ }
+
+ return snprintf(buf, PAGE_SIZE, "%x:%x\n", bus_number, devfn);
+}
+
+static ssize_t nes_store_adapter(struct device_driver *ddp,
+ const char *buf, size_t count)
+{
+ char *p = (char *)buf;
+
+ ee_flsh_adapter = simple_strtoul(p, &p, 10);
+ return strnlen(buf, count);
+}
+
+static ssize_t nes_show_ee_cmd(struct device_driver *ddp, char *buf)
+{
+ u32 eeprom_cmd = 0xdead;
+ u32 i = 0;
+ struct nes_device *nesdev;
+
+ list_for_each_entry(nesdev, &nes_dev_list, list) {
+ if (i == ee_flsh_adapter) {
+ eeprom_cmd = nes_read32(nesdev->regs + NES_EEPROM_COMMAND);
+ break;
+ }
+ i++;
+ }
+ return snprintf(buf, PAGE_SIZE, "0x%x\n", eeprom_cmd);
+}
+
+static ssize_t nes_store_ee_cmd(struct device_driver *ddp,
+ const char *buf, size_t count)
+{
+ char *p = (char *)buf;
+ u32 val;
+ u32 i = 0;
+ struct nes_device *nesdev;
+
+ if (p[1] == 'x' || p[1] == 'X' || p[0] == 'x' || p[0] == 'X') {
+ val = simple_strtoul(p, &p, 16);
+ list_for_each_entry(nesdev, &nes_dev_list, list) {
+ if (i == ee_flsh_adapter) {
+ nes_write32(nesdev->regs + NES_EEPROM_COMMAND, val);
+ break;
+ }
+ i++;
+ }
+ }
+ return strnlen(buf, count);
+}
+
+static ssize_t nes_show_ee_data(struct device_driver *ddp, char *buf)
+{
+ u32 eeprom_data = 0xdead;
+ u32 i = 0;
+ struct nes_device *nesdev;
+
+ list_for_each_entry(nesdev, &nes_dev_list, list) {
+ if (i == ee_flsh_adapter) {
+ eeprom_data = nes_read32(nesdev->regs + NES_EEPROM_DATA);
+ break;
+ }
+ i++;
+ }
+
+ return snprintf(buf, PAGE_SIZE, "0x%x\n", eeprom_data);
+}
+
+static ssize_t nes_store_ee_data(struct device_driver *ddp,
+ const char *buf, size_t count)
+{
+ char *p = (char *)buf;
+ u32 val;
+ u32 i = 0;
+ struct nes_device *nesdev;
+
+ if (p[1] == 'x' || p[1] == 'X' || p[0] == 'x' || p[0] == 'X') {
+ val = simple_strtoul(p, &p, 16);
+ list_for_each_entry(nesdev, &nes_dev_list, list) {
+ if (i == ee_flsh_adapter) {
+ nes_write32(nesdev->regs + NES_EEPROM_DATA, val);
+ break;
+ }
+ i++;
+ }
+ }
+ return strnlen(buf, count);
+}
+
+static ssize_t nes_show_flash_cmd(struct device_driver *ddp, char *buf)
+{
+ u32 flash_cmd = 0xdead;
+ u32 i = 0;
+ struct nes_device *nesdev;
+
+ list_for_each_entry(nesdev, &nes_dev_list, list) {
+ if (i == ee_flsh_adapter) {
+ flash_cmd = nes_read32(nesdev->regs + NES_FLASH_COMMAND);
+ break;
+ }
+ i++;
+ }
+
+ return snprintf(buf, PAGE_SIZE, "0x%x\n", flash_cmd);
+}
+
+static ssize_t nes_store_flash_cmd(struct device_driver *ddp,
+ const char *buf, size_t count)
+{
+ char *p = (char *)buf;
+ u32 val;
+ u32 i = 0;
+ struct nes_device *nesdev;
+
+ if (p[1] == 'x' || p[1] == 'X' || p[0] == 'x' || p[0] == 'X') {
+ val = simple_strtoul(p, &p, 16);
+ list_for_each_entry(nesdev, &nes_dev_list, list) {
+ if (i == ee_flsh_adapter) {
+ nes_write32(nesdev->regs + NES_FLASH_COMMAND, val);
+ break;
+ }
+ i++;
+ }
+ }
+ return strnlen(buf, count);
+}
+
+static ssize_t nes_show_flash_data(struct device_driver *ddp, char *buf)
+{
+ u32 flash_data = 0xdead;
+ u32 i = 0;
+ struct nes_device *nesdev;
+
+ list_for_each_entry(nesdev, &nes_dev_list, list) {
+ if (i == ee_flsh_adapter) {
+ flash_data = nes_read32(nesdev->regs + NES_FLASH_DATA);
+ break;
+ }
+ i++;
+ }
+
+ return snprintf(buf, PAGE_SIZE, "0x%x\n", flash_data);
+}
+
+static ssize_t nes_store_flash_data(struct device_driver *ddp,
+ const char *buf, size_t count)
+{
+ char *p = (char *)buf;
+ u32 val;
+ u32 i = 0;
+ struct nes_device *nesdev;
+
+ if (p[1] == 'x' || p[1] == 'X' || p[0] == 'x' || p[0] == 'X') {
+ val = simple_strtoul(p, &p, 16);
+ list_for_each_entry(nesdev, &nes_dev_list, list) {
+ if (i == ee_flsh_adapter) {
+ nes_write32(nesdev->regs + NES_FLASH_DATA, val);
+ break;
+ }
+ i++;
+ }
+ }
+ return strnlen(buf, count);
+}
+
+static ssize_t nes_show_nonidx_addr(struct device_driver *ddp, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "0x%x\n", sysfs_nonidx_addr);
+}
+
+static ssize_t nes_store_nonidx_addr(struct device_driver *ddp,
+ const char *buf, size_t count)
+{
+ char *p = (char *)buf;
+
+ if (p[1] == 'x' || p[1] == 'X' || p[0] == 'x' || p[0] == 'X')
+ sysfs_nonidx_addr = simple_strtoul(p, &p, 16);
+
+ return strnlen(buf, count);
+}
+
+static ssize_t nes_show_nonidx_data(struct device_driver *ddp, char *buf)
+{
+ u32 nonidx_data = 0xdead;
+ u32 i = 0;
+ struct nes_device *nesdev;
+
+ list_for_each_entry(nesdev, &nes_dev_list, list) {
+ if (i == ee_flsh_adapter) {
+ nonidx_data = nes_read32(nesdev->regs + sysfs_nonidx_addr);
+ break;
+ }
+ i++;
+ }
+
+ return snprintf(buf, PAGE_SIZE, "0x%x\n", nonidx_data);
+}
+
+static ssize_t nes_store_nonidx_data(struct device_driver *ddp,
+ const char *buf, size_t count)
+{
+ char *p = (char *)buf;
+ u32 val;
+ u32 i = 0;
+ struct nes_device *nesdev;
+
+ if (p[1] == 'x' || p[1] == 'X' || p[0] == 'x' || p[0] == 'X') {
+ val = simple_strtoul(p, &p, 16);
+ list_for_each_entry(nesdev, &nes_dev_list, list) {
+ if (i == ee_flsh_adapter) {
+ nes_write32(nesdev->regs + sysfs_nonidx_addr, val);
+ break;
+ }
+ i++;
+ }
+ }
+ return strnlen(buf, count);
+}
+
+static ssize_t nes_show_idx_addr(struct device_driver *ddp, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "0x%x\n", sysfs_idx_addr);
+}
+
+static ssize_t nes_store_idx_addr(struct device_driver *ddp,
+ const char *buf, size_t count)
+{
+ char *p = (char *)buf;
+
+ if (p[1] == 'x' || p[1] == 'X' || p[0] == 'x' || p[0] == 'X')
+ sysfs_idx_addr = simple_strtoul(p, &p, 16);
+
+ return strnlen(buf, count);
+}
+
+static ssize_t nes_show_idx_data(struct device_driver *ddp, char *buf)
+{
+ u32 idx_data = 0xdead;
+ u32 i = 0;
+ struct nes_device *nesdev;
+
+ list_for_each_entry(nesdev, &nes_dev_list, list) {
+ if (i == ee_flsh_adapter) {
+ idx_data = nes_read_indexed(nesdev, sysfs_idx_addr);
+ break;
+ }
+ i++;
+ }
+
+ return snprintf(buf, PAGE_SIZE, "0x%x\n", idx_data);
+}
+
+static ssize_t nes_store_idx_data(struct device_driver *ddp,
+ const char *buf, size_t count)
+{
+ char *p = (char *)buf;
+ u32 val;
+ u32 i = 0;
+ struct nes_device *nesdev;
+
+ if (p[1] == 'x' || p[1] == 'X' || p[0] == 'x' || p[0] == 'X') {
+ val = simple_strtoul(p, &p, 16);
+ list_for_each_entry(nesdev, &nes_dev_list, list) {
+ if (i == ee_flsh_adapter) {
+ nes_write_indexed(nesdev, sysfs_idx_addr, val);
+ break;
+ }
+ i++;
+ }
+ }
+ return strnlen(buf, count);
+}
+
+
+/**
+ * nes_show_wqm_quanta
+ */
+static ssize_t nes_show_wqm_quanta(struct device_driver *ddp, char *buf)
+{
+ u32 wqm_quanta_value = 0xdead;
+ u32 i = 0;
+ struct nes_device *nesdev;
+
+ list_for_each_entry(nesdev, &nes_dev_list, list) {
+ if (i == ee_flsh_adapter) {
+ wqm_quanta_value = nesdev->nesadapter->wqm_quanta;
+ break;
+ }
+ i++;
+ }
+
+ return snprintf(buf, PAGE_SIZE, "0x%X\n", wqm_quanta);
+}
+
+
+/**
+ * nes_store_wqm_quanta
+ */
+static ssize_t nes_store_wqm_quanta(struct device_driver *ddp,
+ const char *buf, size_t count)
+{
+ unsigned long wqm_quanta_value;
+ u32 wqm_config1;
+ u32 i = 0;
+ struct nes_device *nesdev;
+
+ strict_strtoul(buf, 0, &wqm_quanta_value);
+ list_for_each_entry(nesdev, &nes_dev_list, list) {
+ if (i == ee_flsh_adapter) {
+ nesdev->nesadapter->wqm_quanta = wqm_quanta_value;
+ wqm_config1 = nes_read_indexed(nesdev,
+ NES_IDX_WQM_CONFIG1);
+ nes_write_indexed(nesdev, NES_IDX_WQM_CONFIG1,
+ ((wqm_quanta_value << 1) |
+ (wqm_config1 & 0x00000001)));
+ break;
+ }
+ i++;
+ }
+ return strnlen(buf, count);
+}
+
+static DRIVER_ATTR(adapter, S_IRUSR | S_IWUSR,
+ nes_show_adapter, nes_store_adapter);
+static DRIVER_ATTR(eeprom_cmd, S_IRUSR | S_IWUSR,
+ nes_show_ee_cmd, nes_store_ee_cmd);
+static DRIVER_ATTR(eeprom_data, S_IRUSR | S_IWUSR,
+ nes_show_ee_data, nes_store_ee_data);
+static DRIVER_ATTR(flash_cmd, S_IRUSR | S_IWUSR,
+ nes_show_flash_cmd, nes_store_flash_cmd);
+static DRIVER_ATTR(flash_data, S_IRUSR | S_IWUSR,
+ nes_show_flash_data, nes_store_flash_data);
+static DRIVER_ATTR(nonidx_addr, S_IRUSR | S_IWUSR,
+ nes_show_nonidx_addr, nes_store_nonidx_addr);
+static DRIVER_ATTR(nonidx_data, S_IRUSR | S_IWUSR,
+ nes_show_nonidx_data, nes_store_nonidx_data);
+static DRIVER_ATTR(idx_addr, S_IRUSR | S_IWUSR,
+ nes_show_idx_addr, nes_store_idx_addr);
+static DRIVER_ATTR(idx_data, S_IRUSR | S_IWUSR,
+ nes_show_idx_data, nes_store_idx_data);
+static DRIVER_ATTR(wqm_quanta, S_IRUSR | S_IWUSR,
+ nes_show_wqm_quanta, nes_store_wqm_quanta);
+
+static int nes_create_driver_sysfs(struct pci_driver *drv)
+{
+ int error;
+ error = driver_create_file(&drv->driver, &driver_attr_adapter);
+ error |= driver_create_file(&drv->driver, &driver_attr_eeprom_cmd);
+ error |= driver_create_file(&drv->driver, &driver_attr_eeprom_data);
+ error |= driver_create_file(&drv->driver, &driver_attr_flash_cmd);
+ error |= driver_create_file(&drv->driver, &driver_attr_flash_data);
+ error |= driver_create_file(&drv->driver, &driver_attr_nonidx_addr);
+ error |= driver_create_file(&drv->driver, &driver_attr_nonidx_data);
+ error |= driver_create_file(&drv->driver, &driver_attr_idx_addr);
+ error |= driver_create_file(&drv->driver, &driver_attr_idx_data);
+ error |= driver_create_file(&drv->driver, &driver_attr_wqm_quanta);
+ return error;
+}
+
+static void nes_remove_driver_sysfs(struct pci_driver *drv)
+{
+ driver_remove_file(&drv->driver, &driver_attr_adapter);
+ driver_remove_file(&drv->driver, &driver_attr_eeprom_cmd);
+ driver_remove_file(&drv->driver, &driver_attr_eeprom_data);
+ driver_remove_file(&drv->driver, &driver_attr_flash_cmd);
+ driver_remove_file(&drv->driver, &driver_attr_flash_data);
+ driver_remove_file(&drv->driver, &driver_attr_nonidx_addr);
+ driver_remove_file(&drv->driver, &driver_attr_nonidx_data);
+ driver_remove_file(&drv->driver, &driver_attr_idx_addr);
+ driver_remove_file(&drv->driver, &driver_attr_idx_data);
+ driver_remove_file(&drv->driver, &driver_attr_wqm_quanta);
+}
+
+/**
+ * nes_init_module - module initialization entry point
+ */
+static int __init nes_init_module(void)
+{
+ int retval;
+ int retval1;
+
+ retval = nes_cm_start();
+ if (retval) {
+ printk(KERN_ERR PFX "Unable to start NetEffect iWARP CM.\n");
+ return retval;
+ }
+ retval = pci_register_driver(&nes_pci_driver);
+ if (retval >= 0) {
+ retval1 = nes_create_driver_sysfs(&nes_pci_driver);
+ if (retval1 < 0)
+ printk(KERN_ERR PFX "Unable to create NetEffect sys files.\n");
+ }
+ return retval;
+}
+
+
+/**
+ * nes_exit_module - module unload entry point
+ */
+static void __exit nes_exit_module(void)
+{
+ nes_cm_stop();
+ nes_remove_driver_sysfs(&nes_pci_driver);
+
+ pci_unregister_driver(&nes_pci_driver);
+}
+
+
+module_init(nes_init_module);
+module_exit(nes_exit_module);
diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h
new file mode 100644
index 0000000..1595dc7
--- /dev/null
+++ b/drivers/infiniband/hw/nes/nes.h
@@ -0,0 +1,548 @@
+/*
+ * Copyright (c) 2006 - 2008 NetEffect, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __NES_H
+#define __NES_H
+
+#include <linux/netdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/spinlock.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/pci.h>
+#include <linux/dma-mapping.h>
+#include <linux/workqueue.h>
+#include <linux/slab.h>
+#include <asm/io.h>
+#include <linux/crc32c.h>
+
+#include <rdma/ib_smi.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_pack.h>
+#include <rdma/rdma_cm.h>
+#include <rdma/iw_cm.h>
+
+#define NES_SEND_FIRST_WRITE
+
+#define QUEUE_DISCONNECTS
+
+#define DRV_BUILD "1"
+
+#define DRV_NAME "iw_nes"
+#define DRV_VERSION "1.0 KO Build " DRV_BUILD
+#define PFX DRV_NAME ": "
+
+/*
+ * NetEffect PCI vendor id and NE010 PCI device id.
+ */
+#ifndef PCI_VENDOR_ID_NETEFFECT /* not in pci.ids yet */
+#define PCI_VENDOR_ID_NETEFFECT 0x1678
+#define PCI_DEVICE_ID_NETEFFECT_NE020 0x0100
+#endif
+
+#define NE020_REV 4
+#define NE020_REV1 5
+
+#define BAR_0 0
+#define BAR_1 2
+
+#define RX_BUF_SIZE (1536 + 8)
+#define NES_REG0_SIZE (4 * 1024)
+#define NES_TX_TIMEOUT (6*HZ)
+#define NES_FIRST_QPN 64
+#define NES_SW_CONTEXT_ALIGN 1024
+
+#define NES_NIC_MAX_NICS 16
+#define NES_MAX_ARP_TABLE_SIZE 4096
+
+#define NES_NIC_CEQ_SIZE 8
+/* NICs will be on a separate CQ */
+#define NES_CCEQ_SIZE ((nesadapter->max_cq / nesadapter->port_count) - 32)
+
+#define NES_MAX_PORT_COUNT 4
+
+#define MAX_DPC_ITERATIONS 128
+
+#define NES_DRV_OPT_ENABLE_MPA_VER_0 0x00000001
+#define NES_DRV_OPT_DISABLE_MPA_CRC 0x00000002
+#define NES_DRV_OPT_DISABLE_FIRST_WRITE 0x00000004
+#define NES_DRV_OPT_DISABLE_INTF 0x00000008
+#define NES_DRV_OPT_ENABLE_MSI 0x00000010
+#define NES_DRV_OPT_DUAL_LOGICAL_PORT 0x00000020
+#define NES_DRV_OPT_SUPRESS_OPTION_BC 0x00000040
+#define NES_DRV_OPT_NO_INLINE_DATA 0x00000080
+#define NES_DRV_OPT_DISABLE_INT_MOD 0x00000100
+#define NES_DRV_OPT_DISABLE_VIRT_WQ 0x00000200
+
+#define NES_AEQ_EVENT_TIMEOUT 2500
+#define NES_DISCONNECT_EVENT_TIMEOUT 2000
+
+/* debug levels */
+/* must match userspace */
+#define NES_DBG_HW 0x00000001
+#define NES_DBG_INIT 0x00000002
+#define NES_DBG_ISR 0x00000004
+#define NES_DBG_PHY 0x00000008
+#define NES_DBG_NETDEV 0x00000010
+#define NES_DBG_CM 0x00000020
+#define NES_DBG_CM1 0x00000040
+#define NES_DBG_NIC_RX 0x00000080
+#define NES_DBG_NIC_TX 0x00000100
+#define NES_DBG_CQP 0x00000200
+#define NES_DBG_MMAP 0x00000400
+#define NES_DBG_MR 0x00000800
+#define NES_DBG_PD 0x00001000
+#define NES_DBG_CQ 0x00002000
+#define NES_DBG_QP 0x00004000
+#define NES_DBG_MOD_QP 0x00008000
+#define NES_DBG_AEQ 0x00010000
+#define NES_DBG_IW_RX 0x00020000
+#define NES_DBG_IW_TX 0x00040000
+#define NES_DBG_SHUTDOWN 0x00080000
+#define NES_DBG_RSVD1 0x10000000
+#define NES_DBG_RSVD2 0x20000000
+#define NES_DBG_RSVD3 0x40000000
+#define NES_DBG_RSVD4 0x80000000
+#define NES_DBG_ALL 0xffffffff
+
+#ifdef CONFIG_INFINIBAND_NES_DEBUG
+#define nes_debug(level, fmt, args...) \
+ if (level & nes_debug_level) \
+ printk(KERN_ERR PFX "%s[%u]: " fmt, __func__, __LINE__, ##args)
+
+#define assert(expr) \
+if (!(expr)) { \
+ printk(KERN_ERR PFX "Assertion failed! %s, %s, %s, line %d\n", \
+ #expr, __FILE__, __func__, __LINE__); \
+}
+
+#define NES_EVENT_TIMEOUT 1200000
+#else
+#define nes_debug(level, fmt, args...)
+#define assert(expr) do {} while (0)
+
+#define NES_EVENT_TIMEOUT 100000
+#endif
+
+#include "nes_hw.h"
+#include "nes_verbs.h"
+#include "nes_context.h"
+#include "nes_user.h"
+#include "nes_cm.h"
+
+extern int max_mtu;
+#define max_frame_len (max_mtu+ETH_HLEN)
+extern int interrupt_mod_interval;
+extern int nes_if_count;
+extern int mpa_version;
+extern int disable_mpa_crc;
+extern unsigned int send_first;
+extern unsigned int nes_drv_opt;
+extern unsigned int nes_debug_level;
+extern unsigned int wqm_quanta;
+extern struct list_head nes_adapter_list;
+
+extern atomic_t cm_connects;
+extern atomic_t cm_accepts;
+extern atomic_t cm_disconnects;
+extern atomic_t cm_closes;
+extern atomic_t cm_connecteds;
+extern atomic_t cm_connect_reqs;
+extern atomic_t cm_rejects;
+extern atomic_t mod_qp_timouts;
+extern atomic_t qps_created;
+extern atomic_t qps_destroyed;
+extern atomic_t sw_qps_destroyed;
+extern u32 mh_detected;
+extern u32 mh_pauses_sent;
+extern u32 cm_packets_sent;
+extern u32 cm_packets_bounced;
+extern u32 cm_packets_created;
+extern u32 cm_packets_received;
+extern u32 cm_packets_dropped;
+extern u32 cm_packets_retrans;
+extern u32 cm_listens_created;
+extern u32 cm_listens_destroyed;
+extern u32 cm_backlog_drops;
+extern atomic_t cm_loopbacks;
+extern atomic_t cm_nodes_created;
+extern atomic_t cm_nodes_destroyed;
+extern atomic_t cm_accel_dropped_pkts;
+extern atomic_t cm_resets_recvd;
+
+extern u32 int_mod_timer_init;
+extern u32 int_mod_cq_depth_256;
+extern u32 int_mod_cq_depth_128;
+extern u32 int_mod_cq_depth_32;
+extern u32 int_mod_cq_depth_24;
+extern u32 int_mod_cq_depth_16;
+extern u32 int_mod_cq_depth_4;
+extern u32 int_mod_cq_depth_1;
+
+struct nes_device {
+ struct nes_adapter *nesadapter;
+ void __iomem *regs;
+ void __iomem *index_reg;
+ struct pci_dev *pcidev;
+ struct net_device *netdev[NES_NIC_MAX_NICS];
+ u64 link_status_interrupts;
+ struct tasklet_struct dpc_tasklet;
+ spinlock_t indexed_regs_lock;
+ unsigned long csr_start;
+ unsigned long doorbell_region;
+ unsigned long doorbell_start;
+ unsigned long mac_tx_errors;
+ unsigned long mac_pause_frames_sent;
+ unsigned long mac_pause_frames_received;
+ unsigned long mac_rx_errors;
+ unsigned long mac_rx_crc_errors;
+ unsigned long mac_rx_symbol_err_frames;
+ unsigned long mac_rx_jabber_frames;
+ unsigned long mac_rx_oversized_frames;
+ unsigned long mac_rx_short_frames;
+ unsigned long port_rx_discards;
+ unsigned long port_tx_discards;
+ unsigned int mac_index;
+ unsigned int nes_stack_start;
+
+ /* Control Structures */
+ void *cqp_vbase;
+ dma_addr_t cqp_pbase;
+ u32 cqp_mem_size;
+ u8 ceq_index;
+ u8 nic_ceq_index;
+ struct nes_hw_cqp cqp;
+ struct nes_hw_cq ccq;
+ struct list_head cqp_avail_reqs;
+ struct list_head cqp_pending_reqs;
+ struct nes_cqp_request *nes_cqp_requests;
+
+ u32 int_req;
+ u32 int_stat;
+ u32 timer_int_req;
+ u32 timer_only_int_count;
+ u32 intf_int_req;
+ u32 last_mac_tx_pauses;
+ u32 last_used_chunks_tx;
+ struct list_head list;
+
+ u16 base_doorbell_index;
+ u16 currcq_count;
+ u16 deepcq_count;
+ u8 msi_enabled;
+ u8 netdev_count;
+ u8 napi_isr_ran;
+ u8 disable_rx_flow_control;
+ u8 disable_tx_flow_control;
+};
+
+
+static inline __le32 get_crc_value(struct nes_v4_quad *nes_quad)
+{
+ u32 crc_value;
+ crc_value = crc32c(~0, (void *)nes_quad, sizeof (struct nes_v4_quad));
+
+ /*
+ * With commit ef19454b ("[LIB] crc32c: Keep intermediate crc
+ * state in cpu order"), behavior of crc32c changes on
+ * big-endian platforms. Our algorithm expects the previous
+ * behavior; otherwise we have RDMA connection establishment
+ * issue on big-endian.
+ */
+ return cpu_to_le32(crc_value);
+}
+
+static inline void
+set_wqe_64bit_value(__le32 *wqe_words, u32 index, u64 value)
+{
+ wqe_words[index] = cpu_to_le32((u32) ((unsigned long)value));
+ wqe_words[index + 1] = cpu_to_le32((u32)(upper_32_bits((unsigned long)value)));
+}
+
+static inline void
+set_wqe_32bit_value(__le32 *wqe_words, u32 index, u32 value)
+{
+ wqe_words[index] = cpu_to_le32(value);
+}
+
+static inline void
+nes_fill_init_cqp_wqe(struct nes_hw_cqp_wqe *cqp_wqe, struct nes_device *nesdev)
+{
+ set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_COMP_CTX_LOW_IDX,
+ (u64)((unsigned long) &nesdev->cqp));
+ cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_LOW_IDX] = 0;
+ cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_HIGH_IDX] = 0;
+ cqp_wqe->wqe_words[NES_CQP_STAG_WQE_PBL_BLK_COUNT_IDX] = 0;
+ cqp_wqe->wqe_words[NES_CQP_STAG_WQE_PBL_LEN_IDX] = 0;
+ cqp_wqe->wqe_words[NES_CQP_STAG_WQE_LEN_LOW_IDX] = 0;
+ cqp_wqe->wqe_words[NES_CQP_STAG_WQE_PA_LOW_IDX] = 0;
+ cqp_wqe->wqe_words[NES_CQP_STAG_WQE_PA_HIGH_IDX] = 0;
+}
+
+static inline void
+nes_fill_init_qp_wqe(struct nes_hw_qp_wqe *wqe, struct nes_qp *nesqp, u32 head)
+{
+ u32 value;
+ value = ((u32)((unsigned long) nesqp)) | head;
+ set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_CTX_HIGH_IDX,
+ (u32)(upper_32_bits((unsigned long)(nesqp))));
+ set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX, value);
+}
+
+/* Read from memory-mapped device */
+static inline u32 nes_read_indexed(struct nes_device *nesdev, u32 reg_index)
+{
+ unsigned long flags;
+ void __iomem *addr = nesdev->index_reg;
+ u32 value;
+
+ spin_lock_irqsave(&nesdev->indexed_regs_lock, flags);
+
+ writel(reg_index, addr);
+ value = readl((void __iomem *)addr + 4);
+
+ spin_unlock_irqrestore(&nesdev->indexed_regs_lock, flags);
+ return value;
+}
+
+static inline u32 nes_read32(const void __iomem *addr)
+{
+ return readl(addr);
+}
+
+static inline u16 nes_read16(const void __iomem *addr)
+{
+ return readw(addr);
+}
+
+static inline u8 nes_read8(const void __iomem *addr)
+{
+ return readb(addr);
+}
+
+/* Write to memory-mapped device */
+static inline void nes_write_indexed(struct nes_device *nesdev, u32 reg_index, u32 val)
+{
+ unsigned long flags;
+ void __iomem *addr = nesdev->index_reg;
+
+ spin_lock_irqsave(&nesdev->indexed_regs_lock, flags);
+
+ writel(reg_index, addr);
+ writel(val, (void __iomem *)addr + 4);
+
+ spin_unlock_irqrestore(&nesdev->indexed_regs_lock, flags);
+}
+
+static inline void nes_write32(void __iomem *addr, u32 val)
+{
+ writel(val, addr);
+}
+
+static inline void nes_write16(void __iomem *addr, u16 val)
+{
+ writew(val, addr);
+}
+
+static inline void nes_write8(void __iomem *addr, u8 val)
+{
+ writeb(val, addr);
+}
+
+
+
+static inline int nes_alloc_resource(struct nes_adapter *nesadapter,
+ unsigned long *resource_array, u32 max_resources,
+ u32 *req_resource_num, u32 *next)
+{
+ unsigned long flags;
+ u32 resource_num;
+
+ spin_lock_irqsave(&nesadapter->resource_lock, flags);
+
+ resource_num = find_next_zero_bit(resource_array, max_resources, *next);
+ if (resource_num >= max_resources) {
+ resource_num = find_first_zero_bit(resource_array, max_resources);
+ if (resource_num >= max_resources) {
+ printk(KERN_ERR PFX "%s: No available resourcess.\n", __func__);
+ spin_unlock_irqrestore(&nesadapter->resource_lock, flags);
+ return -EMFILE;
+ }
+ }
+ set_bit(resource_num, resource_array);
+ *next = resource_num+1;
+ if (*next == max_resources) {
+ *next = 0;
+ }
+ spin_unlock_irqrestore(&nesadapter->resource_lock, flags);
+ *req_resource_num = resource_num;
+
+ return 0;
+}
+
+static inline int nes_is_resource_allocated(struct nes_adapter *nesadapter,
+ unsigned long *resource_array, u32 resource_num)
+{
+ unsigned long flags;
+ int bit_is_set;
+
+ spin_lock_irqsave(&nesadapter->resource_lock, flags);
+
+ bit_is_set = test_bit(resource_num, resource_array);
+ nes_debug(NES_DBG_HW, "resource_num %u is%s allocated.\n",
+ resource_num, (bit_is_set ? "": " not"));
+ spin_unlock_irqrestore(&nesadapter->resource_lock, flags);
+
+ return bit_is_set;
+}
+
+static inline void nes_free_resource(struct nes_adapter *nesadapter,
+ unsigned long *resource_array, u32 resource_num)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&nesadapter->resource_lock, flags);
+ clear_bit(resource_num, resource_array);
+ spin_unlock_irqrestore(&nesadapter->resource_lock, flags);
+}
+
+static inline struct nes_vnic *to_nesvnic(struct ib_device *ibdev)
+{
+ return container_of(ibdev, struct nes_ib_device, ibdev)->nesvnic;
+}
+
+static inline struct nes_pd *to_nespd(struct ib_pd *ibpd)
+{
+ return container_of(ibpd, struct nes_pd, ibpd);
+}
+
+static inline struct nes_ucontext *to_nesucontext(struct ib_ucontext *ibucontext)
+{
+ return container_of(ibucontext, struct nes_ucontext, ibucontext);
+}
+
+static inline struct nes_mr *to_nesmr(struct ib_mr *ibmr)
+{
+ return container_of(ibmr, struct nes_mr, ibmr);
+}
+
+static inline struct nes_mr *to_nesmr_from_ibfmr(struct ib_fmr *ibfmr)
+{
+ return container_of(ibfmr, struct nes_mr, ibfmr);
+}
+
+static inline struct nes_mr *to_nesmw(struct ib_mw *ibmw)
+{
+ return container_of(ibmw, struct nes_mr, ibmw);
+}
+
+static inline struct nes_fmr *to_nesfmr(struct nes_mr *nesmr)
+{
+ return container_of(nesmr, struct nes_fmr, nesmr);
+}
+
+static inline struct nes_cq *to_nescq(struct ib_cq *ibcq)
+{
+ return container_of(ibcq, struct nes_cq, ibcq);
+}
+
+static inline struct nes_qp *to_nesqp(struct ib_qp *ibqp)
+{
+ return container_of(ibqp, struct nes_qp, ibqp);
+}
+
+
+
+/* nes.c */
+void nes_add_ref(struct ib_qp *);
+void nes_rem_ref(struct ib_qp *);
+struct ib_qp *nes_get_qp(struct ib_device *, int);
+
+
+/* nes_hw.c */
+struct nes_adapter *nes_init_adapter(struct nes_device *, u8);
+void nes_nic_init_timer_defaults(struct nes_device *, u8);
+void nes_destroy_adapter(struct nes_adapter *);
+int nes_init_cqp(struct nes_device *);
+int nes_init_phy(struct nes_device *);
+int nes_init_nic_qp(struct nes_device *, struct net_device *);
+void nes_destroy_nic_qp(struct nes_vnic *);
+int nes_napi_isr(struct nes_device *);
+void nes_dpc(unsigned long);
+void nes_nic_ce_handler(struct nes_device *, struct nes_hw_nic_cq *);
+void nes_iwarp_ce_handler(struct nes_device *, struct nes_hw_cq *);
+int nes_destroy_cqp(struct nes_device *);
+int nes_nic_cm_xmit(struct sk_buff *, struct net_device *);
+
+/* nes_nic.c */
+struct net_device *nes_netdev_init(struct nes_device *, void __iomem *);
+void nes_netdev_destroy(struct net_device *);
+int nes_nic_cm_xmit(struct sk_buff *, struct net_device *);
+
+/* nes_cm.c */
+void *nes_cm_create(struct net_device *);
+int nes_cm_recv(struct sk_buff *, struct net_device *);
+void nes_update_arp(unsigned char *, u32, u32, u16, u16);
+void nes_manage_arp_cache(struct net_device *, unsigned char *, u32, u32);
+void nes_sock_release(struct nes_qp *, unsigned long *);
+void flush_wqes(struct nes_device *nesdev, struct nes_qp *, u32, u32);
+int nes_manage_apbvt(struct nes_vnic *, u32, u32, u32);
+int nes_cm_disconn(struct nes_qp *);
+void nes_cm_disconn_worker(void *);
+
+/* nes_verbs.c */
+int nes_hw_modify_qp(struct nes_device *, struct nes_qp *, u32, u32);
+int nes_modify_qp(struct ib_qp *, struct ib_qp_attr *, int, struct ib_udata *);
+struct nes_ib_device *nes_init_ofa_device(struct net_device *);
+void nes_destroy_ofa_device(struct nes_ib_device *);
+int nes_register_ofa_device(struct nes_ib_device *);
+
+/* nes_util.c */
+int nes_read_eeprom_values(struct nes_device *, struct nes_adapter *);
+void nes_write_1G_phy_reg(struct nes_device *, u8, u8, u16);
+void nes_read_1G_phy_reg(struct nes_device *, u8, u8, u16 *);
+void nes_write_10G_phy_reg(struct nes_device *, u16, u8, u16, u16);
+void nes_read_10G_phy_reg(struct nes_device *, u8, u8, u16);
+struct nes_cqp_request *nes_get_cqp_request(struct nes_device *);
+void nes_free_cqp_request(struct nes_device *nesdev,
+ struct nes_cqp_request *cqp_request);
+void nes_put_cqp_request(struct nes_device *nesdev,
+ struct nes_cqp_request *cqp_request);
+void nes_post_cqp_request(struct nes_device *, struct nes_cqp_request *);
+int nes_arp_table(struct nes_device *, u32, u8 *, u32);
+void nes_mh_fix(unsigned long);
+void nes_clc(unsigned long);
+void nes_dump_mem(unsigned int, void *, int);
+u32 nes_crc32(u32, u32, u32, u32, u8 *, u32, u32, u32);
+
+#endif /* __NES_H */
diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c
new file mode 100644
index 0000000..7890be3
--- /dev/null
+++ b/drivers/infiniband/hw/nes/nes_cm.c
@@ -0,0 +1,3384 @@
+/*
+ * Copyright (c) 2006 - 2008 NetEffect, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+
+#define TCPOPT_TIMESTAMP 8
+
+#include <asm/atomic.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/init.h>
+#include <linux/if_arp.h>
+#include <linux/if_vlan.h>
+#include <linux/notifier.h>
+#include <linux/net.h>
+#include <linux/types.h>
+#include <linux/timer.h>
+#include <linux/time.h>
+#include <linux/delay.h>
+#include <linux/etherdevice.h>
+#include <linux/netdevice.h>
+#include <linux/random.h>
+#include <linux/list.h>
+#include <linux/threads.h>
+#include <net/arp.h>
+#include <net/neighbour.h>
+#include <net/route.h>
+#include <net/ip_fib.h>
+
+#include "nes.h"
+
+u32 cm_packets_sent;
+u32 cm_packets_bounced;
+u32 cm_packets_dropped;
+u32 cm_packets_retrans;
+u32 cm_packets_created;
+u32 cm_packets_received;
+u32 cm_listens_created;
+u32 cm_listens_destroyed;
+u32 cm_backlog_drops;
+atomic_t cm_loopbacks;
+atomic_t cm_nodes_created;
+atomic_t cm_nodes_destroyed;
+atomic_t cm_accel_dropped_pkts;
+atomic_t cm_resets_recvd;
+
+static inline int mini_cm_accelerated(struct nes_cm_core *,
+ struct nes_cm_node *);
+static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *,
+ struct nes_vnic *, struct nes_cm_info *);
+static int mini_cm_del_listen(struct nes_cm_core *, struct nes_cm_listener *);
+static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *,
+ struct nes_vnic *, u16, void *, struct nes_cm_info *);
+static int mini_cm_close(struct nes_cm_core *, struct nes_cm_node *);
+static int mini_cm_accept(struct nes_cm_core *, struct ietf_mpa_frame *,
+ struct nes_cm_node *);
+static int mini_cm_reject(struct nes_cm_core *, struct ietf_mpa_frame *,
+ struct nes_cm_node *);
+static void mini_cm_recv_pkt(struct nes_cm_core *, struct nes_vnic *,
+ struct sk_buff *);
+static int mini_cm_dealloc_core(struct nes_cm_core *);
+static int mini_cm_get(struct nes_cm_core *);
+static int mini_cm_set(struct nes_cm_core *, u32, u32);
+
+static struct sk_buff *form_cm_frame(struct sk_buff *, struct nes_cm_node *,
+ void *, u32, void *, u32, u8);
+static struct sk_buff *get_free_pkt(struct nes_cm_node *cm_node);
+static int add_ref_cm_node(struct nes_cm_node *);
+static int rem_ref_cm_node(struct nes_cm_core *, struct nes_cm_node *);
+
+static int nes_cm_disconn_true(struct nes_qp *);
+static int nes_cm_post_event(struct nes_cm_event *event);
+static int nes_disconnect(struct nes_qp *nesqp, int abrupt);
+static void nes_disconnect_worker(struct work_struct *work);
+
+static int send_mpa_request(struct nes_cm_node *, struct sk_buff *);
+static int send_syn(struct nes_cm_node *, u32, struct sk_buff *);
+static int send_reset(struct nes_cm_node *, struct sk_buff *);
+static int send_ack(struct nes_cm_node *cm_node, struct sk_buff *skb);
+static int send_fin(struct nes_cm_node *cm_node, struct sk_buff *skb);
+static void process_packet(struct nes_cm_node *, struct sk_buff *,
+ struct nes_cm_core *);
+
+static void active_open_err(struct nes_cm_node *, struct sk_buff *, int);
+static void passive_open_err(struct nes_cm_node *, struct sk_buff *, int);
+static void cleanup_retrans_entry(struct nes_cm_node *);
+static void handle_rcv_mpa(struct nes_cm_node *, struct sk_buff *,
+ enum nes_cm_event_type);
+static void free_retrans_entry(struct nes_cm_node *cm_node);
+static int handle_tcp_options(struct nes_cm_node *cm_node, struct tcphdr *tcph,
+ struct sk_buff *skb, int optionsize, int passive);
+
+/* CM event handler functions */
+static void cm_event_connected(struct nes_cm_event *);
+static void cm_event_connect_error(struct nes_cm_event *);
+static void cm_event_reset(struct nes_cm_event *);
+static void cm_event_mpa_req(struct nes_cm_event *);
+
+static void print_core(struct nes_cm_core *core);
+
+/* External CM API Interface */
+/* instance of function pointers for client API */
+/* set address of this instance to cm_core->cm_ops at cm_core alloc */
+static struct nes_cm_ops nes_cm_api = {
+ mini_cm_accelerated,
+ mini_cm_listen,
+ mini_cm_del_listen,
+ mini_cm_connect,
+ mini_cm_close,
+ mini_cm_accept,
+ mini_cm_reject,
+ mini_cm_recv_pkt,
+ mini_cm_dealloc_core,
+ mini_cm_get,
+ mini_cm_set
+};
+
+static struct nes_cm_core *g_cm_core;
+
+atomic_t cm_connects;
+atomic_t cm_accepts;
+atomic_t cm_disconnects;
+atomic_t cm_closes;
+atomic_t cm_connecteds;
+atomic_t cm_connect_reqs;
+atomic_t cm_rejects;
+
+
+/**
+ * create_event
+ */
+static struct nes_cm_event *create_event(struct nes_cm_node *cm_node,
+ enum nes_cm_event_type type)
+{
+ struct nes_cm_event *event;
+
+ if (!cm_node->cm_id)
+ return NULL;
+
+ /* allocate an empty event */
+ event = kzalloc(sizeof(*event), GFP_ATOMIC);
+
+ if (!event)
+ return NULL;
+
+ event->type = type;
+ event->cm_node = cm_node;
+ event->cm_info.rem_addr = cm_node->rem_addr;
+ event->cm_info.loc_addr = cm_node->loc_addr;
+ event->cm_info.rem_port = cm_node->rem_port;
+ event->cm_info.loc_port = cm_node->loc_port;
+ event->cm_info.cm_id = cm_node->cm_id;
+
+ nes_debug(NES_DBG_CM, "cm_node=%p Created event=%p, type=%u, "
+ "dst_addr=%08x[%x], src_addr=%08x[%x]\n",
+ cm_node, event, type, event->cm_info.loc_addr,
+ event->cm_info.loc_port, event->cm_info.rem_addr,
+ event->cm_info.rem_port);
+
+ nes_cm_post_event(event);
+ return event;
+}
+
+
+/**
+ * send_mpa_request
+ */
+static int send_mpa_request(struct nes_cm_node *cm_node, struct sk_buff *skb)
+{
+ int ret;
+ if (!skb) {
+ nes_debug(NES_DBG_CM, "skb set to NULL\n");
+ return -1;
+ }
+
+ /* send an MPA Request frame */
+ form_cm_frame(skb, cm_node, NULL, 0, &cm_node->mpa_frame,
+ cm_node->mpa_frame_size, SET_ACK);
+
+ ret = schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 1, 0);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+
+/**
+ * recv_mpa - process a received TCP pkt, we are expecting an
+ * IETF MPA frame
+ */
+static int parse_mpa(struct nes_cm_node *cm_node, u8 *buffer, u32 len)
+{
+ struct ietf_mpa_frame *mpa_frame;
+
+ /* assume req frame is in tcp data payload */
+ if (len < sizeof(struct ietf_mpa_frame)) {
+ nes_debug(NES_DBG_CM, "The received ietf buffer was too small (%x)\n", len);
+ return -1;
+ }
+
+ mpa_frame = (struct ietf_mpa_frame *)buffer;
+ cm_node->mpa_frame_size = ntohs(mpa_frame->priv_data_len);
+
+ if (cm_node->mpa_frame_size + sizeof(struct ietf_mpa_frame) != len) {
+ nes_debug(NES_DBG_CM, "The received ietf buffer was not right"
+ " complete (%x + %x != %x)\n",
+ cm_node->mpa_frame_size, (u32)sizeof(struct ietf_mpa_frame), len);
+ return -1;
+ }
+
+ /* copy entire MPA frame to our cm_node's frame */
+ memcpy(cm_node->mpa_frame_buf, buffer + sizeof(struct ietf_mpa_frame),
+ cm_node->mpa_frame_size);
+
+ return 0;
+}
+
+
+/**
+ * form_cm_frame - get a free packet and build empty frame Use
+ * node info to build.
+ */
+static struct sk_buff *form_cm_frame(struct sk_buff *skb,
+ struct nes_cm_node *cm_node, void *options, u32 optionsize,
+ void *data, u32 datasize, u8 flags)
+{
+ struct tcphdr *tcph;
+ struct iphdr *iph;
+ struct ethhdr *ethh;
+ u8 *buf;
+ u16 packetsize = sizeof(*iph);
+
+ packetsize += sizeof(*tcph);
+ packetsize += optionsize + datasize;
+
+ memset(skb->data, 0x00, ETH_HLEN + sizeof(*iph) + sizeof(*tcph));
+
+ skb->len = 0;
+ buf = skb_put(skb, packetsize + ETH_HLEN);
+
+ ethh = (struct ethhdr *) buf;
+ buf += ETH_HLEN;
+
+ iph = (struct iphdr *)buf;
+ buf += sizeof(*iph);
+ tcph = (struct tcphdr *)buf;
+ skb_reset_mac_header(skb);
+ skb_set_network_header(skb, ETH_HLEN);
+ skb_set_transport_header(skb, ETH_HLEN+sizeof(*iph));
+ buf += sizeof(*tcph);
+
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ skb->protocol = htons(0x800);
+ skb->data_len = 0;
+ skb->mac_len = ETH_HLEN;
+
+ memcpy(ethh->h_dest, cm_node->rem_mac, ETH_ALEN);
+ memcpy(ethh->h_source, cm_node->loc_mac, ETH_ALEN);
+ ethh->h_proto = htons(0x0800);
+
+ iph->version = IPVERSION;
+ iph->ihl = 5; /* 5 * 4Byte words, IP headr len */
+ iph->tos = 0;
+ iph->tot_len = htons(packetsize);
+ iph->id = htons(++cm_node->tcp_cntxt.loc_id);
+
+ iph->frag_off = htons(0x4000);
+ iph->ttl = 0x40;
+ iph->protocol = 0x06; /* IPPROTO_TCP */
+
+ iph->saddr = htonl(cm_node->loc_addr);
+ iph->daddr = htonl(cm_node->rem_addr);
+
+ tcph->source = htons(cm_node->loc_port);
+ tcph->dest = htons(cm_node->rem_port);
+ tcph->seq = htonl(cm_node->tcp_cntxt.loc_seq_num);
+
+ if (flags & SET_ACK) {
+ cm_node->tcp_cntxt.loc_ack_num = cm_node->tcp_cntxt.rcv_nxt;
+ tcph->ack_seq = htonl(cm_node->tcp_cntxt.loc_ack_num);
+ tcph->ack = 1;
+ } else
+ tcph->ack_seq = 0;
+
+ if (flags & SET_SYN) {
+ cm_node->tcp_cntxt.loc_seq_num++;
+ tcph->syn = 1;
+ } else
+ cm_node->tcp_cntxt.loc_seq_num += datasize;
+
+ if (flags & SET_FIN) {
+ cm_node->tcp_cntxt.loc_seq_num++;
+ tcph->fin = 1;
+ }
+
+ if (flags & SET_RST)
+ tcph->rst = 1;
+
+ tcph->doff = (u16)((sizeof(*tcph) + optionsize + 3) >> 2);
+ tcph->window = htons(cm_node->tcp_cntxt.rcv_wnd);
+ tcph->urg_ptr = 0;
+ if (optionsize)
+ memcpy(buf, options, optionsize);
+ buf += optionsize;
+ if (datasize)
+ memcpy(buf, data, datasize);
+
+ skb_shinfo(skb)->nr_frags = 0;
+ cm_packets_created++;
+
+ return skb;
+}
+
+
+/**
+ * print_core - dump a cm core
+ */
+static void print_core(struct nes_cm_core *core)
+{
+ nes_debug(NES_DBG_CM, "---------------------------------------------\n");
+ nes_debug(NES_DBG_CM, "CM Core -- (core = %p )\n", core);
+ if (!core)
+ return;
+ nes_debug(NES_DBG_CM, "---------------------------------------------\n");
+
+ nes_debug(NES_DBG_CM, "State : %u \n", core->state);
+
+ nes_debug(NES_DBG_CM, "Tx Free cnt : %u \n", skb_queue_len(&core->tx_free_list));
+ nes_debug(NES_DBG_CM, "Listen Nodes : %u \n", atomic_read(&core->listen_node_cnt));
+ nes_debug(NES_DBG_CM, "Active Nodes : %u \n", atomic_read(&core->node_cnt));
+
+ nes_debug(NES_DBG_CM, "core : %p \n", core);
+
+ nes_debug(NES_DBG_CM, "-------------- end core ---------------\n");
+}
+
+
+/**
+ * schedule_nes_timer
+ * note - cm_node needs to be protected before calling this. Encase in:
+ * rem_ref_cm_node(cm_core, cm_node);add_ref_cm_node(cm_node);
+ */
+int schedule_nes_timer(struct nes_cm_node *cm_node, struct sk_buff *skb,
+ enum nes_timer_type type, int send_retrans,
+ int close_when_complete)
+{
+ unsigned long flags;
+ struct nes_cm_core *cm_core = cm_node->cm_core;
+ struct nes_timer_entry *new_send;
+ int ret = 0;
+ u32 was_timer_set;
+
+ if (!cm_node)
+ return -EINVAL;
+ new_send = kzalloc(sizeof(*new_send), GFP_ATOMIC);
+ if (!new_send)
+ return -1;
+
+ /* new_send->timetosend = currenttime */
+ new_send->retrycount = NES_DEFAULT_RETRYS;
+ new_send->retranscount = NES_DEFAULT_RETRANS;
+ new_send->skb = skb;
+ new_send->timetosend = jiffies;
+ new_send->type = type;
+ new_send->netdev = cm_node->netdev;
+ new_send->send_retrans = send_retrans;
+ new_send->close_when_complete = close_when_complete;
+
+ if (type == NES_TIMER_TYPE_CLOSE) {
+ new_send->timetosend += (HZ/10);
+ spin_lock_irqsave(&cm_node->recv_list_lock, flags);
+ list_add_tail(&new_send->list, &cm_node->recv_list);
+ spin_unlock_irqrestore(&cm_node->recv_list_lock, flags);
+ }
+
+ if (type == NES_TIMER_TYPE_SEND) {
+ new_send->seq_num = ntohl(tcp_hdr(skb)->seq);
+ atomic_inc(&new_send->skb->users);
+ spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
+ cm_node->send_entry = new_send;
+ add_ref_cm_node(cm_node);
+ spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
+ new_send->timetosend = jiffies + NES_RETRY_TIMEOUT;
+
+ ret = nes_nic_cm_xmit(new_send->skb, cm_node->netdev);
+ if (ret != NETDEV_TX_OK) {
+ nes_debug(NES_DBG_CM, "Error sending packet %p "
+ "(jiffies = %lu)\n", new_send, jiffies);
+ atomic_dec(&new_send->skb->users);
+ new_send->timetosend = jiffies;
+ } else {
+ cm_packets_sent++;
+ if (!send_retrans) {
+ cleanup_retrans_entry(cm_node);
+ if (close_when_complete)
+ rem_ref_cm_node(cm_core, cm_node);
+ return ret;
+ }
+ }
+ }
+
+ was_timer_set = timer_pending(&cm_core->tcp_timer);
+
+ if (!was_timer_set) {
+ cm_core->tcp_timer.expires = new_send->timetosend;
+ add_timer(&cm_core->tcp_timer);
+ }
+
+ return ret;
+}
+
+
+/**
+ * nes_cm_timer_tick
+ */
+static void nes_cm_timer_tick(unsigned long pass)
+{
+ unsigned long flags, qplockflags;
+ unsigned long nexttimeout = jiffies + NES_LONG_TIME;
+ struct iw_cm_id *cm_id;
+ struct nes_cm_node *cm_node;
+ struct nes_timer_entry *send_entry, *recv_entry;
+ struct list_head *list_core, *list_core_temp;
+ struct list_head *list_node, *list_node_temp;
+ struct nes_cm_core *cm_core = g_cm_core;
+ struct nes_qp *nesqp;
+ u32 settimer = 0;
+ int ret = NETDEV_TX_OK;
+ enum nes_cm_node_state last_state;
+
+ spin_lock_irqsave(&cm_core->ht_lock, flags);
+
+ list_for_each_safe(list_node, list_core_temp,
+ &cm_core->connected_nodes) {
+ cm_node = container_of(list_node, struct nes_cm_node, list);
+ add_ref_cm_node(cm_node);
+ spin_unlock_irqrestore(&cm_core->ht_lock, flags);
+ spin_lock_irqsave(&cm_node->recv_list_lock, flags);
+ list_for_each_safe(list_core, list_node_temp,
+ &cm_node->recv_list) {
+ recv_entry = container_of(list_core,
+ struct nes_timer_entry, list);
+ if (!recv_entry)
+ break;
+ if (time_after(recv_entry->timetosend, jiffies)) {
+ if (nexttimeout > recv_entry->timetosend ||
+ !settimer) {
+ nexttimeout = recv_entry->timetosend;
+ settimer = 1;
+ }
+ continue;
+ }
+ list_del(&recv_entry->list);
+ cm_id = cm_node->cm_id;
+ spin_unlock_irqrestore(&cm_node->recv_list_lock, flags);
+ nesqp = (struct nes_qp *)recv_entry->skb;
+ spin_lock_irqsave(&nesqp->lock, qplockflags);
+ if (nesqp->cm_id) {
+ nes_debug(NES_DBG_CM, "QP%u: cm_id = %p, "
+ "refcount = %d: HIT A "
+ "NES_TIMER_TYPE_CLOSE with something "
+ "to do!!!\n", nesqp->hwqp.qp_id, cm_id,
+ atomic_read(&nesqp->refcount));
+ nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED;
+ nesqp->last_aeq = NES_AEQE_AEID_RESET_SENT;
+ nesqp->ibqp_state = IB_QPS_ERR;
+ spin_unlock_irqrestore(&nesqp->lock,
+ qplockflags);
+ nes_cm_disconn(nesqp);
+ } else {
+ spin_unlock_irqrestore(&nesqp->lock,
+ qplockflags);
+ nes_debug(NES_DBG_CM, "QP%u: cm_id = %p, "
+ "refcount = %d: HIT A "
+ "NES_TIMER_TYPE_CLOSE with nothing "
+ "to do!!!\n", nesqp->hwqp.qp_id, cm_id,
+ atomic_read(&nesqp->refcount));
+ }
+ if (cm_id)
+ cm_id->rem_ref(cm_id);
+
+ kfree(recv_entry);
+ spin_lock_irqsave(&cm_node->recv_list_lock, flags);
+ }
+ spin_unlock_irqrestore(&cm_node->recv_list_lock, flags);
+
+ spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
+ do {
+ send_entry = cm_node->send_entry;
+ if (!send_entry)
+ continue;
+ if (time_after(send_entry->timetosend, jiffies)) {
+ if (cm_node->state != NES_CM_STATE_TSA) {
+ if ((nexttimeout >
+ send_entry->timetosend) ||
+ !settimer) {
+ nexttimeout =
+ send_entry->timetosend;
+ settimer = 1;
+ continue;
+ }
+ } else {
+ free_retrans_entry(cm_node);
+ continue;
+ }
+ }
+
+ if ((cm_node->state == NES_CM_STATE_TSA) ||
+ (cm_node->state == NES_CM_STATE_CLOSED)) {
+ free_retrans_entry(cm_node);
+ continue;
+ }
+
+ if (!send_entry->retranscount ||
+ !send_entry->retrycount) {
+ cm_packets_dropped++;
+ last_state = cm_node->state;
+ cm_node->state = NES_CM_STATE_CLOSED;
+ free_retrans_entry(cm_node);
+ spin_unlock_irqrestore(
+ &cm_node->retrans_list_lock, flags);
+ if (last_state == NES_CM_STATE_SYN_RCVD)
+ rem_ref_cm_node(cm_core, cm_node);
+ else
+ create_event(cm_node,
+ NES_CM_EVENT_ABORTED);
+ spin_lock_irqsave(&cm_node->retrans_list_lock,
+ flags);
+ continue;
+ }
+ atomic_inc(&send_entry->skb->users);
+ cm_packets_retrans++;
+ nes_debug(NES_DBG_CM, "Retransmitting send_entry %p "
+ "for node %p, jiffies = %lu, time to send = "
+ "%lu, retranscount = %u, send_entry->seq_num = "
+ "0x%08X, cm_node->tcp_cntxt.rem_ack_num = "
+ "0x%08X\n", send_entry, cm_node, jiffies,
+ send_entry->timetosend,
+ send_entry->retranscount,
+ send_entry->seq_num,
+ cm_node->tcp_cntxt.rem_ack_num);
+
+ spin_unlock_irqrestore(&cm_node->retrans_list_lock,
+ flags);
+ ret = nes_nic_cm_xmit(send_entry->skb, cm_node->netdev);
+ spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
+ if (ret != NETDEV_TX_OK) {
+ nes_debug(NES_DBG_CM, "rexmit failed for "
+ "node=%p\n", cm_node);
+ cm_packets_bounced++;
+ atomic_dec(&send_entry->skb->users);
+ send_entry->retrycount--;
+ nexttimeout = jiffies + NES_SHORT_TIME;
+ settimer = 1;
+ continue;
+ } else {
+ cm_packets_sent++;
+ }
+ nes_debug(NES_DBG_CM, "Packet Sent: retrans count = "
+ "%u, retry count = %u.\n",
+ send_entry->retranscount,
+ send_entry->retrycount);
+ if (send_entry->send_retrans) {
+ send_entry->retranscount--;
+ send_entry->timetosend = jiffies +
+ NES_RETRY_TIMEOUT;
+ if (nexttimeout > send_entry->timetosend ||
+ !settimer) {
+ nexttimeout = send_entry->timetosend;
+ settimer = 1;
+ }
+ } else {
+ int close_when_complete;
+ close_when_complete =
+ send_entry->close_when_complete;
+ nes_debug(NES_DBG_CM, "cm_node=%p state=%d\n",
+ cm_node, cm_node->state);
+ free_retrans_entry(cm_node);
+ if (close_when_complete)
+ rem_ref_cm_node(cm_node->cm_core,
+ cm_node);
+ }
+ } while (0);
+
+ spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
+ rem_ref_cm_node(cm_node->cm_core, cm_node);
+ spin_lock_irqsave(&cm_core->ht_lock, flags);
+ if (ret != NETDEV_TX_OK) {
+ nes_debug(NES_DBG_CM, "rexmit failed for cm_node=%p\n",
+ cm_node);
+ break;
+ }
+ }
+ spin_unlock_irqrestore(&cm_core->ht_lock, flags);
+
+ if (settimer) {
+ if (!timer_pending(&cm_core->tcp_timer)) {
+ cm_core->tcp_timer.expires = nexttimeout;
+ add_timer(&cm_core->tcp_timer);
+ }
+ }
+}
+
+
+/**
+ * send_syn
+ */
+static int send_syn(struct nes_cm_node *cm_node, u32 sendack,
+ struct sk_buff *skb)
+{
+ int ret;
+ int flags = SET_SYN;
+ char optionsbuffer[sizeof(struct option_mss) +
+ sizeof(struct option_windowscale) + sizeof(struct option_base) +
+ TCP_OPTIONS_PADDING];
+
+ int optionssize = 0;
+ /* Sending MSS option */
+ union all_known_options *options;
+
+ if (!cm_node)
+ return -EINVAL;
+
+ options = (union all_known_options *)&optionsbuffer[optionssize];
+ options->as_mss.optionnum = OPTION_NUMBER_MSS;
+ options->as_mss.length = sizeof(struct option_mss);
+ options->as_mss.mss = htons(cm_node->tcp_cntxt.mss);
+ optionssize += sizeof(struct option_mss);
+
+ options = (union all_known_options *)&optionsbuffer[optionssize];
+ options->as_windowscale.optionnum = OPTION_NUMBER_WINDOW_SCALE;
+ options->as_windowscale.length = sizeof(struct option_windowscale);
+ options->as_windowscale.shiftcount = cm_node->tcp_cntxt.rcv_wscale;
+ optionssize += sizeof(struct option_windowscale);
+
+ if (sendack && !(NES_DRV_OPT_SUPRESS_OPTION_BC & nes_drv_opt)) {
+ options = (union all_known_options *)&optionsbuffer[optionssize];
+ options->as_base.optionnum = OPTION_NUMBER_WRITE0;
+ options->as_base.length = sizeof(struct option_base);
+ optionssize += sizeof(struct option_base);
+ /* we need the size to be a multiple of 4 */
+ options = (union all_known_options *)&optionsbuffer[optionssize];
+ options->as_end = 1;
+ optionssize += 1;
+ options = (union all_known_options *)&optionsbuffer[optionssize];
+ options->as_end = 1;
+ optionssize += 1;
+ }
+
+ options = (union all_known_options *)&optionsbuffer[optionssize];
+ options->as_end = OPTION_NUMBER_END;
+ optionssize += 1;
+
+ if (!skb)
+ skb = get_free_pkt(cm_node);
+ if (!skb) {
+ nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n");
+ return -1;
+ }
+
+ if (sendack)
+ flags |= SET_ACK;
+
+ form_cm_frame(skb, cm_node, optionsbuffer, optionssize, NULL, 0, flags);
+ ret = schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 1, 0);
+
+ return ret;
+}
+
+
+/**
+ * send_reset
+ */
+static int send_reset(struct nes_cm_node *cm_node, struct sk_buff *skb)
+{
+ int ret;
+ int flags = SET_RST | SET_ACK;
+
+ if (!skb)
+ skb = get_free_pkt(cm_node);
+ if (!skb) {
+ nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n");
+ return -1;
+ }
+
+ form_cm_frame(skb, cm_node, NULL, 0, NULL, 0, flags);
+ ret = schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 0, 1);
+
+ return ret;
+}
+
+
+/**
+ * send_ack
+ */
+static int send_ack(struct nes_cm_node *cm_node, struct sk_buff *skb)
+{
+ int ret;
+
+ if (!skb)
+ skb = get_free_pkt(cm_node);
+
+ if (!skb) {
+ nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n");
+ return -1;
+ }
+
+ form_cm_frame(skb, cm_node, NULL, 0, NULL, 0, SET_ACK);
+ ret = schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 0, 0);
+
+ return ret;
+}
+
+
+/**
+ * send_fin
+ */
+static int send_fin(struct nes_cm_node *cm_node, struct sk_buff *skb)
+{
+ int ret;
+
+ /* if we didn't get a frame get one */
+ if (!skb)
+ skb = get_free_pkt(cm_node);
+
+ if (!skb) {
+ nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n");
+ return -1;
+ }
+
+ form_cm_frame(skb, cm_node, NULL, 0, NULL, 0, SET_ACK | SET_FIN);
+ ret = schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 1, 0);
+
+ return ret;
+}
+
+
+/**
+ * get_free_pkt
+ */
+static struct sk_buff *get_free_pkt(struct nes_cm_node *cm_node)
+{
+ struct sk_buff *skb, *new_skb;
+
+ /* check to see if we need to repopulate the free tx pkt queue */
+ if (skb_queue_len(&cm_node->cm_core->tx_free_list) < NES_CM_FREE_PKT_LO_WATERMARK) {
+ while (skb_queue_len(&cm_node->cm_core->tx_free_list) <
+ cm_node->cm_core->free_tx_pkt_max) {
+ /* replace the frame we took, we won't get it back */
+ new_skb = dev_alloc_skb(cm_node->cm_core->mtu);
+ BUG_ON(!new_skb);
+ /* add a replacement frame to the free tx list head */
+ skb_queue_head(&cm_node->cm_core->tx_free_list, new_skb);
+ }
+ }
+
+ skb = skb_dequeue(&cm_node->cm_core->tx_free_list);
+
+ return skb;
+}
+
+
+/**
+ * make_hashkey - generate hash key from node tuple
+ */
+static inline int make_hashkey(u16 loc_port, nes_addr_t loc_addr, u16 rem_port,
+ nes_addr_t rem_addr)
+{
+ u32 hashkey = 0;
+
+ hashkey = loc_addr + rem_addr + loc_port + rem_port;
+ hashkey = (hashkey % NES_CM_HASHTABLE_SIZE);
+
+ return hashkey;
+}
+
+
+/**
+ * find_node - find a cm node that matches the reference cm node
+ */
+static struct nes_cm_node *find_node(struct nes_cm_core *cm_core,
+ u16 rem_port, nes_addr_t rem_addr, u16 loc_port, nes_addr_t loc_addr)
+{
+ unsigned long flags;
+ u32 hashkey;
+ struct list_head *hte;
+ struct nes_cm_node *cm_node;
+
+ /* make a hash index key for this packet */
+ hashkey = make_hashkey(loc_port, loc_addr, rem_port, rem_addr);
+
+ /* get a handle on the hte */
+ hte = &cm_core->connected_nodes;
+
+ nes_debug(NES_DBG_CM, "Searching for an owner node: " NIPQUAD_FMT ":%x from core %p->%p\n",
+ HIPQUAD(loc_addr), loc_port, cm_core, hte);
+
+ /* walk list and find cm_node associated with this session ID */
+ spin_lock_irqsave(&cm_core->ht_lock, flags);
+ list_for_each_entry(cm_node, hte, list) {
+ /* compare quad, return node handle if a match */
+ nes_debug(NES_DBG_CM, "finding node %x:%x =? %x:%x ^ %x:%x =? %x:%x\n",
+ cm_node->loc_addr, cm_node->loc_port,
+ loc_addr, loc_port,
+ cm_node->rem_addr, cm_node->rem_port,
+ rem_addr, rem_port);
+ if ((cm_node->loc_addr == loc_addr) && (cm_node->loc_port == loc_port) &&
+ (cm_node->rem_addr == rem_addr) && (cm_node->rem_port == rem_port)) {
+ add_ref_cm_node(cm_node);
+ spin_unlock_irqrestore(&cm_core->ht_lock, flags);
+ return cm_node;
+ }
+ }
+ spin_unlock_irqrestore(&cm_core->ht_lock, flags);
+
+ /* no owner node */
+ return NULL;
+}
+
+
+/**
+ * find_listener - find a cm node listening on this addr-port pair
+ */
+static struct nes_cm_listener *find_listener(struct nes_cm_core *cm_core,
+ nes_addr_t dst_addr, u16 dst_port, enum nes_cm_listener_state listener_state)
+{
+ unsigned long flags;
+ struct nes_cm_listener *listen_node;
+
+ /* walk list and find cm_node associated with this session ID */
+ spin_lock_irqsave(&cm_core->listen_list_lock, flags);
+ list_for_each_entry(listen_node, &cm_core->listen_list.list, list) {
+ /* compare node pair, return node handle if a match */
+ if (((listen_node->loc_addr == dst_addr) ||
+ listen_node->loc_addr == 0x00000000) &&
+ (listen_node->loc_port == dst_port) &&
+ (listener_state & listen_node->listener_state)) {
+ atomic_inc(&listen_node->ref_count);
+ spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
+ return listen_node;
+ }
+ }
+ spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
+
+ nes_debug(NES_DBG_CM, "Unable to find listener for " NIPQUAD_FMT ":%x\n",
+ HIPQUAD(dst_addr), dst_port);
+
+ /* no listener */
+ return NULL;
+}
+
+
+/**
+ * add_hte_node - add a cm node to the hash table
+ */
+static int add_hte_node(struct nes_cm_core *cm_core, struct nes_cm_node *cm_node)
+{
+ unsigned long flags;
+ u32 hashkey;
+ struct list_head *hte;
+
+ if (!cm_node || !cm_core)
+ return -EINVAL;
+
+ nes_debug(NES_DBG_CM, "Adding Node %p to Active Connection HT\n",
+ cm_node);
+
+ /* first, make an index into our hash table */
+ hashkey = make_hashkey(cm_node->loc_port, cm_node->loc_addr,
+ cm_node->rem_port, cm_node->rem_addr);
+ cm_node->hashkey = hashkey;
+
+ spin_lock_irqsave(&cm_core->ht_lock, flags);
+
+ /* get a handle on the hash table element (list head for this slot) */
+ hte = &cm_core->connected_nodes;
+ list_add_tail(&cm_node->list, hte);
+ atomic_inc(&cm_core->ht_node_cnt);
+
+ spin_unlock_irqrestore(&cm_core->ht_lock, flags);
+
+ return 0;
+}
+
+
+/**
+ * mini_cm_dec_refcnt_listen
+ */
+static int mini_cm_dec_refcnt_listen(struct nes_cm_core *cm_core,
+ struct nes_cm_listener *listener, int free_hanging_nodes)
+{
+ int ret = 1;
+ unsigned long flags;
+ struct list_head *list_pos = NULL;
+ struct list_head *list_temp = NULL;
+ struct nes_cm_node *cm_node = NULL;
+
+ nes_debug(NES_DBG_CM, "attempting listener= %p free_nodes= %d, "
+ "refcnt=%d\n", listener, free_hanging_nodes,
+ atomic_read(&listener->ref_count));
+ /* free non-accelerated child nodes for this listener */
+ if (free_hanging_nodes) {
+ spin_lock_irqsave(&cm_core->ht_lock, flags);
+ list_for_each_safe(list_pos, list_temp,
+ &g_cm_core->connected_nodes) {
+ cm_node = container_of(list_pos, struct nes_cm_node,
+ list);
+ if ((cm_node->listener == listener) &&
+ (!cm_node->accelerated)) {
+ cleanup_retrans_entry(cm_node);
+ spin_unlock_irqrestore(&cm_core->ht_lock,
+ flags);
+ send_reset(cm_node, NULL);
+ spin_lock_irqsave(&cm_core->ht_lock, flags);
+ }
+ }
+ spin_unlock_irqrestore(&cm_core->ht_lock, flags);
+ }
+ spin_lock_irqsave(&cm_core->listen_list_lock, flags);
+ if (!atomic_dec_return(&listener->ref_count)) {
+ list_del(&listener->list);
+
+ /* decrement our listen node count */
+ atomic_dec(&cm_core->listen_node_cnt);
+
+ spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
+
+ if (listener->nesvnic) {
+ nes_manage_apbvt(listener->nesvnic, listener->loc_port,
+ PCI_FUNC(listener->nesvnic->nesdev->pcidev->devfn), NES_MANAGE_APBVT_DEL);
+ }
+
+ nes_debug(NES_DBG_CM, "destroying listener (%p)\n", listener);
+
+ kfree(listener);
+ listener = NULL;
+ ret = 0;
+ cm_listens_destroyed++;
+ } else {
+ spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
+ }
+ if (listener) {
+ if (atomic_read(&listener->pend_accepts_cnt) > 0)
+ nes_debug(NES_DBG_CM, "destroying listener (%p)"
+ " with non-zero pending accepts=%u\n",
+ listener, atomic_read(&listener->pend_accepts_cnt));
+ }
+
+ return ret;
+}
+
+
+/**
+ * mini_cm_del_listen
+ */
+static int mini_cm_del_listen(struct nes_cm_core *cm_core,
+ struct nes_cm_listener *listener)
+{
+ listener->listener_state = NES_CM_LISTENER_PASSIVE_STATE;
+ listener->cm_id = NULL; /* going to be destroyed pretty soon */
+ return mini_cm_dec_refcnt_listen(cm_core, listener, 1);
+}
+
+
+/**
+ * mini_cm_accelerated
+ */
+static inline int mini_cm_accelerated(struct nes_cm_core *cm_core,
+ struct nes_cm_node *cm_node)
+{
+ u32 was_timer_set;
+ cm_node->accelerated = 1;
+
+ if (cm_node->accept_pend) {
+ BUG_ON(!cm_node->listener);
+ atomic_dec(&cm_node->listener->pend_accepts_cnt);
+ BUG_ON(atomic_read(&cm_node->listener->pend_accepts_cnt) < 0);
+ }
+
+ was_timer_set = timer_pending(&cm_core->tcp_timer);
+ if (!was_timer_set) {
+ cm_core->tcp_timer.expires = jiffies + NES_SHORT_TIME;
+ add_timer(&cm_core->tcp_timer);
+ }
+
+ return 0;
+}
+
+
+/**
+ * nes_addr_resolve_neigh
+ */
+static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip)
+{
+ struct rtable *rt;
+ struct flowi fl;
+ struct neighbour *neigh;
+ int rc = -1;
+ DECLARE_MAC_BUF(mac);
+
+ memset(&fl, 0, sizeof fl);
+ fl.nl_u.ip4_u.daddr = htonl(dst_ip);
+ if (ip_route_output_key(&init_net, &rt, &fl)) {
+ printk("%s: ip_route_output_key failed for 0x%08X\n",
+ __func__, dst_ip);
+ return rc;
+ }
+
+ neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, nesvnic->netdev);
+ if (neigh) {
+ if (neigh->nud_state & NUD_VALID) {
+ nes_debug(NES_DBG_CM, "Neighbor MAC address for 0x%08X"
+ " is %s, Gateway is 0x%08X \n", dst_ip,
+ print_mac(mac, neigh->ha), ntohl(rt->rt_gateway));
+ nes_manage_arp_cache(nesvnic->netdev, neigh->ha,
+ dst_ip, NES_ARP_ADD);
+ rc = nes_arp_table(nesvnic->nesdev, dst_ip, NULL,
+ NES_ARP_RESOLVE);
+ }
+ neigh_release(neigh);
+ }
+
+ if ((neigh == NULL) || (!(neigh->nud_state & NUD_VALID)))
+ neigh_event_send(rt->u.dst.neighbour, NULL);
+
+ ip_rt_put(rt);
+ return rc;
+}
+
+
+/**
+ * make_cm_node - create a new instance of a cm node
+ */
+static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core,
+ struct nes_vnic *nesvnic, struct nes_cm_info *cm_info,
+ struct nes_cm_listener *listener)
+{
+ struct nes_cm_node *cm_node;
+ struct timespec ts;
+ int arpindex = 0;
+ struct nes_device *nesdev;
+ struct nes_adapter *nesadapter;
+ DECLARE_MAC_BUF(mac);
+
+ /* create an hte and cm_node for this instance */
+ cm_node = kzalloc(sizeof(*cm_node), GFP_ATOMIC);
+ if (!cm_node)
+ return NULL;
+
+ /* set our node specific transport info */
+ cm_node->loc_addr = cm_info->loc_addr;
+ cm_node->rem_addr = cm_info->rem_addr;
+ cm_node->loc_port = cm_info->loc_port;
+ cm_node->rem_port = cm_info->rem_port;
+ cm_node->send_write0 = send_first;
+ nes_debug(NES_DBG_CM, "Make node addresses : loc = " NIPQUAD_FMT
+ ":%x, rem = " NIPQUAD_FMT ":%x\n",
+ HIPQUAD(cm_node->loc_addr), cm_node->loc_port,
+ HIPQUAD(cm_node->rem_addr), cm_node->rem_port);
+ cm_node->listener = listener;
+ cm_node->netdev = nesvnic->netdev;
+ cm_node->cm_id = cm_info->cm_id;
+ memcpy(cm_node->loc_mac, nesvnic->netdev->dev_addr, ETH_ALEN);
+
+ nes_debug(NES_DBG_CM, "listener=%p, cm_id=%p\n", cm_node->listener,
+ cm_node->cm_id);
+
+ spin_lock_init(&cm_node->retrans_list_lock);
+ INIT_LIST_HEAD(&cm_node->recv_list);
+ spin_lock_init(&cm_node->recv_list_lock);
+
+ cm_node->loopbackpartner = NULL;
+ atomic_set(&cm_node->ref_count, 1);
+ /* associate our parent CM core */
+ cm_node->cm_core = cm_core;
+ cm_node->tcp_cntxt.loc_id = NES_CM_DEF_LOCAL_ID;
+ cm_node->tcp_cntxt.rcv_wscale = NES_CM_DEFAULT_RCV_WND_SCALE;
+ cm_node->tcp_cntxt.rcv_wnd = NES_CM_DEFAULT_RCV_WND_SCALED >>
+ NES_CM_DEFAULT_RCV_WND_SCALE;
+ ts = current_kernel_time();
+ cm_node->tcp_cntxt.loc_seq_num = htonl(ts.tv_nsec);
+ cm_node->tcp_cntxt.mss = nesvnic->max_frame_size - sizeof(struct iphdr) -
+ sizeof(struct tcphdr) - ETH_HLEN - VLAN_HLEN;
+ cm_node->tcp_cntxt.rcv_nxt = 0;
+ /* get a unique session ID , add thread_id to an upcounter to handle race */
+ atomic_inc(&cm_core->node_cnt);
+ cm_node->conn_type = cm_info->conn_type;
+ cm_node->apbvt_set = 0;
+ cm_node->accept_pend = 0;
+
+ cm_node->nesvnic = nesvnic;
+ /* get some device handles, for arp lookup */
+ nesdev = nesvnic->nesdev;
+ nesadapter = nesdev->nesadapter;
+
+ cm_node->loopbackpartner = NULL;
+ /* get the mac addr for the remote node */
+ arpindex = nes_arp_table(nesdev, cm_node->rem_addr, NULL, NES_ARP_RESOLVE);
+ if (arpindex < 0) {
+ arpindex = nes_addr_resolve_neigh(nesvnic, cm_info->rem_addr);
+ if (arpindex < 0) {
+ kfree(cm_node);
+ return NULL;
+ }
+ }
+
+ /* copy the mac addr to node context */
+ memcpy(cm_node->rem_mac, nesadapter->arp_table[arpindex].mac_addr, ETH_ALEN);
+ nes_debug(NES_DBG_CM, "Remote mac addr from arp table: %s\n",
+ print_mac(mac, cm_node->rem_mac));
+
+ add_hte_node(cm_core, cm_node);
+ atomic_inc(&cm_nodes_created);
+
+ return cm_node;
+}
+
+
+/**
+ * add_ref_cm_node - destroy an instance of a cm node
+ */
+static int add_ref_cm_node(struct nes_cm_node *cm_node)
+{
+ atomic_inc(&cm_node->ref_count);
+ return 0;
+}
+
+
+/**
+ * rem_ref_cm_node - destroy an instance of a cm node
+ */
+static int rem_ref_cm_node(struct nes_cm_core *cm_core,
+ struct nes_cm_node *cm_node)
+{
+ unsigned long flags, qplockflags;
+ struct nes_timer_entry *recv_entry;
+ struct iw_cm_id *cm_id;
+ struct list_head *list_core, *list_node_temp;
+ struct nes_qp *nesqp;
+
+ if (!cm_node)
+ return -EINVAL;
+
+ spin_lock_irqsave(&cm_node->cm_core->ht_lock, flags);
+ if (atomic_dec_return(&cm_node->ref_count)) {
+ spin_unlock_irqrestore(&cm_node->cm_core->ht_lock, flags);
+ return 0;
+ }
+ list_del(&cm_node->list);
+ atomic_dec(&cm_core->ht_node_cnt);
+ spin_unlock_irqrestore(&cm_node->cm_core->ht_lock, flags);
+
+ /* if the node is destroyed before connection was accelerated */
+ if (!cm_node->accelerated && cm_node->accept_pend) {
+ BUG_ON(!cm_node->listener);
+ atomic_dec(&cm_node->listener->pend_accepts_cnt);
+ BUG_ON(atomic_read(&cm_node->listener->pend_accepts_cnt) < 0);
+ }
+ BUG_ON(cm_node->send_entry);
+ spin_lock_irqsave(&cm_node->recv_list_lock, flags);
+ list_for_each_safe(list_core, list_node_temp, &cm_node->recv_list) {
+ recv_entry = container_of(list_core, struct nes_timer_entry,
+ list);
+ list_del(&recv_entry->list);
+ cm_id = cm_node->cm_id;
+ spin_unlock_irqrestore(&cm_node->recv_list_lock, flags);
+ nesqp = (struct nes_qp *)recv_entry->skb;
+ spin_lock_irqsave(&nesqp->lock, qplockflags);
+ if (nesqp->cm_id) {
+ nes_debug(NES_DBG_CM, "QP%u: cm_id = %p: HIT A "
+ "NES_TIMER_TYPE_CLOSE with something to do!\n",
+ nesqp->hwqp.qp_id, cm_id);
+ nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED;
+ nesqp->last_aeq = NES_AEQE_AEID_RESET_SENT;
+ nesqp->ibqp_state = IB_QPS_ERR;
+ spin_unlock_irqrestore(&nesqp->lock, qplockflags);
+ nes_cm_disconn(nesqp);
+ } else {
+ spin_unlock_irqrestore(&nesqp->lock, qplockflags);
+ nes_debug(NES_DBG_CM, "QP%u: cm_id = %p: HIT A "
+ "NES_TIMER_TYPE_CLOSE with nothing to do!\n",
+ nesqp->hwqp.qp_id, cm_id);
+ }
+ cm_id->rem_ref(cm_id);
+
+ kfree(recv_entry);
+ spin_lock_irqsave(&cm_node->recv_list_lock, flags);
+ }
+ spin_unlock_irqrestore(&cm_node->recv_list_lock, flags);
+
+ if (cm_node->listener) {
+ mini_cm_dec_refcnt_listen(cm_core, cm_node->listener, 0);
+ } else {
+ if (cm_node->apbvt_set && cm_node->nesvnic) {
+ nes_manage_apbvt(cm_node->nesvnic, cm_node->loc_port,
+ PCI_FUNC(
+ cm_node->nesvnic->nesdev->pcidev->devfn),
+ NES_MANAGE_APBVT_DEL);
+ }
+ }
+
+ atomic_dec(&cm_core->node_cnt);
+ atomic_inc(&cm_nodes_destroyed);
+ nesqp = cm_node->nesqp;
+ if (nesqp) {
+ nesqp->cm_node = NULL;
+ nes_rem_ref(&nesqp->ibqp);
+ cm_node->nesqp = NULL;
+ }
+
+ cm_node->freed = 1;
+ kfree(cm_node);
+ return 0;
+}
+
+/**
+ * process_options
+ */
+static int process_options(struct nes_cm_node *cm_node, u8 *optionsloc,
+ u32 optionsize, u32 syn_packet)
+{
+ u32 tmp;
+ u32 offset = 0;
+ union all_known_options *all_options;
+ char got_mss_option = 0;
+
+ while (offset < optionsize) {
+ all_options = (union all_known_options *)(optionsloc + offset);
+ switch (all_options->as_base.optionnum) {
+ case OPTION_NUMBER_END:
+ offset = optionsize;
+ break;
+ case OPTION_NUMBER_NONE:
+ offset += 1;
+ continue;
+ case OPTION_NUMBER_MSS:
+ nes_debug(NES_DBG_CM, "%s: MSS Length: %d Offset: %d "
+ "Size: %d\n", __func__,
+ all_options->as_mss.length, offset, optionsize);
+ got_mss_option = 1;
+ if (all_options->as_mss.length != 4) {
+ return 1;
+ } else {
+ tmp = ntohs(all_options->as_mss.mss);
+ if (tmp > 0 && tmp <
+ cm_node->tcp_cntxt.mss)
+ cm_node->tcp_cntxt.mss = tmp;
+ }
+ break;
+ case OPTION_NUMBER_WINDOW_SCALE:
+ cm_node->tcp_cntxt.snd_wscale =
+ all_options->as_windowscale.shiftcount;
+ break;
+ case OPTION_NUMBER_WRITE0:
+ cm_node->send_write0 = 1;
+ break;
+ default:
+ nes_debug(NES_DBG_CM, "TCP Option not understood: %x\n",
+ all_options->as_base.optionnum);
+ break;
+ }
+ offset += all_options->as_base.length;
+ }
+ if ((!got_mss_option) && (syn_packet))
+ cm_node->tcp_cntxt.mss = NES_CM_DEFAULT_MSS;
+ return 0;
+}
+
+static void drop_packet(struct sk_buff *skb)
+{
+ atomic_inc(&cm_accel_dropped_pkts);
+ dev_kfree_skb_any(skb);
+}
+
+static void handle_fin_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
+ struct tcphdr *tcph)
+{
+ atomic_inc(&cm_resets_recvd);
+ nes_debug(NES_DBG_CM, "Received FIN, cm_node = %p, state = %u. "
+ "refcnt=%d\n", cm_node, cm_node->state,
+ atomic_read(&cm_node->ref_count));
+ cm_node->tcp_cntxt.rcv_nxt++;
+ cleanup_retrans_entry(cm_node);
+ switch (cm_node->state) {
+ case NES_CM_STATE_SYN_RCVD:
+ case NES_CM_STATE_SYN_SENT:
+ case NES_CM_STATE_ESTABLISHED:
+ case NES_CM_STATE_MPAREQ_SENT:
+ cm_node->state = NES_CM_STATE_LAST_ACK;
+ send_fin(cm_node, skb);
+ break;
+ case NES_CM_STATE_FIN_WAIT1:
+ cm_node->state = NES_CM_STATE_CLOSING;
+ send_ack(cm_node, skb);
+ break;
+ case NES_CM_STATE_FIN_WAIT2:
+ cm_node->state = NES_CM_STATE_TIME_WAIT;
+ send_ack(cm_node, skb);
+ cm_node->state = NES_CM_STATE_CLOSED;
+ break;
+ case NES_CM_STATE_TSA:
+ default:
+ nes_debug(NES_DBG_CM, "Error Rcvd FIN for node-%p state = %d\n",
+ cm_node, cm_node->state);
+ drop_packet(skb);
+ break;
+ }
+}
+
+
+static void handle_rst_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
+ struct tcphdr *tcph)
+{
+
+ int reset = 0; /* whether to send reset in case of err.. */
+ atomic_inc(&cm_resets_recvd);
+ nes_debug(NES_DBG_CM, "Received Reset, cm_node = %p, state = %u."
+ " refcnt=%d\n", cm_node, cm_node->state,
+ atomic_read(&cm_node->ref_count));
+ cleanup_retrans_entry(cm_node);
+ switch (cm_node->state) {
+ case NES_CM_STATE_SYN_SENT:
+ case NES_CM_STATE_MPAREQ_SENT:
+ nes_debug(NES_DBG_CM, "%s[%u] create abort for cm_node=%p "
+ "listener=%p state=%d\n", __func__, __LINE__, cm_node,
+ cm_node->listener, cm_node->state);
+ active_open_err(cm_node, skb, reset);
+ break;
+ /* For PASSIVE open states, remove the cm_node event */
+ case NES_CM_STATE_ESTABLISHED:
+ case NES_CM_STATE_SYN_RCVD:
+ case NES_CM_STATE_LISTENING:
+ nes_debug(NES_DBG_CM, "Bad state %s[%u]\n", __func__, __LINE__);
+ passive_open_err(cm_node, skb, reset);
+ break;
+ case NES_CM_STATE_TSA:
+ default:
+ break;
+ }
+}
+
+static void handle_rcv_mpa(struct nes_cm_node *cm_node, struct sk_buff *skb,
+ enum nes_cm_event_type type)
+{
+
+ int ret;
+ int datasize = skb->len;
+ u8 *dataloc = skb->data;
+ ret = parse_mpa(cm_node, dataloc, datasize);
+ if (ret < 0) {
+ nes_debug(NES_DBG_CM, "didn't like MPA Request\n");
+ if (type == NES_CM_EVENT_CONNECTED) {
+ nes_debug(NES_DBG_CM, "%s[%u] create abort for "
+ "cm_node=%p listener=%p state=%d\n", __func__,
+ __LINE__, cm_node, cm_node->listener,
+ cm_node->state);
+ active_open_err(cm_node, skb, 1);
+ } else {
+ passive_open_err(cm_node, skb, 1);
+ }
+ } else {
+ cleanup_retrans_entry(cm_node);
+ dev_kfree_skb_any(skb);
+ if (type == NES_CM_EVENT_CONNECTED)
+ cm_node->state = NES_CM_STATE_TSA;
+ create_event(cm_node, type);
+
+ }
+ return ;
+}
+
+static void indicate_pkt_err(struct nes_cm_node *cm_node, struct sk_buff *skb)
+{
+ switch (cm_node->state) {
+ case NES_CM_STATE_SYN_SENT:
+ case NES_CM_STATE_MPAREQ_SENT:
+ nes_debug(NES_DBG_CM, "%s[%u] create abort for cm_node=%p "
+ "listener=%p state=%d\n", __func__, __LINE__, cm_node,
+ cm_node->listener, cm_node->state);
+ active_open_err(cm_node, skb, 1);
+ break;
+ case NES_CM_STATE_ESTABLISHED:
+ case NES_CM_STATE_SYN_RCVD:
+ passive_open_err(cm_node, skb, 1);
+ break;
+ case NES_CM_STATE_TSA:
+ default:
+ drop_packet(skb);
+ }
+}
+
+static int check_syn(struct nes_cm_node *cm_node, struct tcphdr *tcph,
+ struct sk_buff *skb)
+{
+ int err;
+
+ err = ((ntohl(tcph->ack_seq) == cm_node->tcp_cntxt.loc_seq_num))? 0 : 1;
+ if (err)
+ active_open_err(cm_node, skb, 1);
+
+ return err;
+}
+
+static int check_seq(struct nes_cm_node *cm_node, struct tcphdr *tcph,
+ struct sk_buff *skb)
+{
+ int err = 0;
+ u32 seq;
+ u32 ack_seq;
+ u32 loc_seq_num = cm_node->tcp_cntxt.loc_seq_num;
+ u32 rcv_nxt = cm_node->tcp_cntxt.rcv_nxt;
+ u32 rcv_wnd;
+ seq = ntohl(tcph->seq);
+ ack_seq = ntohl(tcph->ack_seq);
+ rcv_wnd = cm_node->tcp_cntxt.rcv_wnd;
+ if (ack_seq != loc_seq_num)
+ err = 1;
+ else if ((seq + rcv_wnd) < rcv_nxt)
+ err = 1;
+ if (err) {
+ nes_debug(NES_DBG_CM, "%s[%u] create abort for cm_node=%p "
+ "listener=%p state=%d\n", __func__, __LINE__, cm_node,
+ cm_node->listener, cm_node->state);
+ indicate_pkt_err(cm_node, skb);
+ nes_debug(NES_DBG_CM, "seq ERROR cm_node =%p seq=0x%08X "
+ "rcv_nxt=0x%08X rcv_wnd=0x%x\n", cm_node, seq, rcv_nxt,
+ rcv_wnd);
+ }
+ return err;
+}
+
+/*
+ * handle_syn_pkt() is for Passive node. The syn packet is received when a node
+ * is created with a listener or it may comein as rexmitted packet which in
+ * that case will be just dropped.
+ */
+
+static void handle_syn_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
+ struct tcphdr *tcph)
+{
+ int ret;
+ u32 inc_sequence;
+ int optionsize;
+
+ optionsize = (tcph->doff << 2) - sizeof(struct tcphdr);
+ skb_pull(skb, tcph->doff << 2);
+ inc_sequence = ntohl(tcph->seq);
+
+ switch (cm_node->state) {
+ case NES_CM_STATE_SYN_SENT:
+ case NES_CM_STATE_MPAREQ_SENT:
+ /* Rcvd syn on active open connection*/
+ active_open_err(cm_node, skb, 1);
+ break;
+ case NES_CM_STATE_LISTENING:
+ /* Passive OPEN */
+ cm_node->accept_pend = 1;
+ atomic_inc(&cm_node->listener->pend_accepts_cnt);
+ if (atomic_read(&cm_node->listener->pend_accepts_cnt) >
+ cm_node->listener->backlog) {
+ nes_debug(NES_DBG_CM, "drop syn due to backlog "
+ "pressure \n");
+ cm_backlog_drops++;
+ passive_open_err(cm_node, skb, 0);
+ break;
+ }
+ ret = handle_tcp_options(cm_node, tcph, skb, optionsize,
+ 1);
+ if (ret) {
+ passive_open_err(cm_node, skb, 0);
+ /* drop pkt */
+ break;
+ }
+ cm_node->tcp_cntxt.rcv_nxt = inc_sequence + 1;
+ BUG_ON(cm_node->send_entry);
+ cm_node->state = NES_CM_STATE_SYN_RCVD;
+ send_syn(cm_node, 1, skb);
+ break;
+ case NES_CM_STATE_TSA:
+ case NES_CM_STATE_ESTABLISHED:
+ case NES_CM_STATE_FIN_WAIT1:
+ case NES_CM_STATE_FIN_WAIT2:
+ case NES_CM_STATE_MPAREQ_RCVD:
+ case NES_CM_STATE_LAST_ACK:
+ case NES_CM_STATE_CLOSING:
+ case NES_CM_STATE_UNKNOWN:
+ case NES_CM_STATE_CLOSED:
+ default:
+ drop_packet(skb);
+ break;
+ }
+}
+
+static void handle_synack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
+ struct tcphdr *tcph)
+{
+
+ int ret;
+ u32 inc_sequence;
+ int optionsize;
+
+ optionsize = (tcph->doff << 2) - sizeof(struct tcphdr);
+ skb_pull(skb, tcph->doff << 2);
+ inc_sequence = ntohl(tcph->seq);
+ switch (cm_node->state) {
+ case NES_CM_STATE_SYN_SENT:
+ /* active open */
+ if (check_syn(cm_node, tcph, skb))
+ return;
+ cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq);
+ /* setup options */
+ ret = handle_tcp_options(cm_node, tcph, skb, optionsize, 0);
+ if (ret) {
+ nes_debug(NES_DBG_CM, "cm_node=%p tcp_options failed\n",
+ cm_node);
+ break;
+ }
+ cleanup_retrans_entry(cm_node);
+ cm_node->tcp_cntxt.rcv_nxt = inc_sequence + 1;
+ send_mpa_request(cm_node, skb);
+ cm_node->state = NES_CM_STATE_MPAREQ_SENT;
+ break;
+ case NES_CM_STATE_MPAREQ_RCVD:
+ /* passive open, so should not be here */
+ passive_open_err(cm_node, skb, 1);
+ break;
+ case NES_CM_STATE_ESTABLISHED:
+ case NES_CM_STATE_FIN_WAIT1:
+ case NES_CM_STATE_FIN_WAIT2:
+ case NES_CM_STATE_LAST_ACK:
+ case NES_CM_STATE_TSA:
+ case NES_CM_STATE_CLOSING:
+ case NES_CM_STATE_UNKNOWN:
+ case NES_CM_STATE_CLOSED:
+ case NES_CM_STATE_MPAREQ_SENT:
+ default:
+ drop_packet(skb);
+ break;
+ }
+}
+
+static void handle_ack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
+ struct tcphdr *tcph)
+{
+ int datasize = 0;
+ u32 inc_sequence;
+ u32 rem_seq_ack;
+ u32 rem_seq;
+ if (check_seq(cm_node, tcph, skb))
+ return;
+
+ skb_pull(skb, tcph->doff << 2);
+ inc_sequence = ntohl(tcph->seq);
+ rem_seq = ntohl(tcph->seq);
+ rem_seq_ack = ntohl(tcph->ack_seq);
+ datasize = skb->len;
+
+ switch (cm_node->state) {
+ case NES_CM_STATE_SYN_RCVD:
+ /* Passive OPEN */
+ cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq);
+ cm_node->state = NES_CM_STATE_ESTABLISHED;
+ if (datasize) {
+ cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize;
+ cm_node->state = NES_CM_STATE_MPAREQ_RCVD;
+ handle_rcv_mpa(cm_node, skb, NES_CM_EVENT_MPA_REQ);
+ } else { /* rcvd ACK only */
+ dev_kfree_skb_any(skb);
+ cleanup_retrans_entry(cm_node);
+ }
+ break;
+ case NES_CM_STATE_ESTABLISHED:
+ /* Passive OPEN */
+ /* We expect mpa frame to be received only */
+ if (datasize) {
+ cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize;
+ cm_node->state = NES_CM_STATE_MPAREQ_RCVD;
+ handle_rcv_mpa(cm_node, skb,
+ NES_CM_EVENT_MPA_REQ);
+ } else
+ drop_packet(skb);
+ break;
+ case NES_CM_STATE_MPAREQ_SENT:
+ cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq);
+ if (datasize) {
+ cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize;
+ handle_rcv_mpa(cm_node, skb, NES_CM_EVENT_CONNECTED);
+ } else { /* Could be just an ack pkt.. */
+ cleanup_retrans_entry(cm_node);
+ dev_kfree_skb_any(skb);
+ }
+ break;
+ case NES_CM_STATE_FIN_WAIT1:
+ case NES_CM_STATE_SYN_SENT:
+ case NES_CM_STATE_FIN_WAIT2:
+ case NES_CM_STATE_TSA:
+ case NES_CM_STATE_CLOSED:
+ case NES_CM_STATE_MPAREQ_RCVD:
+ case NES_CM_STATE_LAST_ACK:
+ case NES_CM_STATE_CLOSING:
+ case NES_CM_STATE_UNKNOWN:
+ default:
+ drop_packet(skb);
+ break;
+ }
+}
+
+
+
+static int handle_tcp_options(struct nes_cm_node *cm_node, struct tcphdr *tcph,
+ struct sk_buff *skb, int optionsize, int passive)
+{
+ u8 *optionsloc = (u8 *)&tcph[1];
+ if (optionsize) {
+ if (process_options(cm_node, optionsloc, optionsize,
+ (u32)tcph->syn)) {
+ nes_debug(NES_DBG_CM, "%s: Node %p, Sending RESET\n",
+ __func__, cm_node);
+ if (passive)
+ passive_open_err(cm_node, skb, 0);
+ else
+ active_open_err(cm_node, skb, 0);
+ return 1;
+ }
+ }
+
+ cm_node->tcp_cntxt.snd_wnd = ntohs(tcph->window) <<
+ cm_node->tcp_cntxt.snd_wscale;
+
+ if (cm_node->tcp_cntxt.snd_wnd > cm_node->tcp_cntxt.max_snd_wnd)
+ cm_node->tcp_cntxt.max_snd_wnd = cm_node->tcp_cntxt.snd_wnd;
+ return 0;
+}
+
+/*
+ * active_open_err() will send reset() if flag set..
+ * It will also send ABORT event.
+ */
+
+static void active_open_err(struct nes_cm_node *cm_node, struct sk_buff *skb,
+ int reset)
+{
+ cleanup_retrans_entry(cm_node);
+ if (reset) {
+ nes_debug(NES_DBG_CM, "ERROR active err called for cm_node=%p, "
+ "state=%d\n", cm_node, cm_node->state);
+ add_ref_cm_node(cm_node);
+ send_reset(cm_node, skb);
+ } else
+ dev_kfree_skb_any(skb);
+
+ cm_node->state = NES_CM_STATE_CLOSED;
+ create_event(cm_node, NES_CM_EVENT_ABORTED);
+}
+
+/*
+ * passive_open_err() will either do a reset() or will free up the skb and
+ * remove the cm_node.
+ */
+
+static void passive_open_err(struct nes_cm_node *cm_node, struct sk_buff *skb,
+ int reset)
+{
+ cleanup_retrans_entry(cm_node);
+ cm_node->state = NES_CM_STATE_CLOSED;
+ if (reset) {
+ nes_debug(NES_DBG_CM, "passive_open_err sending RST for "
+ "cm_node=%p state =%d\n", cm_node, cm_node->state);
+ send_reset(cm_node, skb);
+ } else {
+ dev_kfree_skb_any(skb);
+ rem_ref_cm_node(cm_node->cm_core, cm_node);
+ }
+}
+
+/*
+ * free_retrans_entry() routines assumes that the retrans_list_lock has
+ * been acquired before calling.
+ */
+static void free_retrans_entry(struct nes_cm_node *cm_node)
+{
+ struct nes_timer_entry *send_entry;
+ send_entry = cm_node->send_entry;
+ if (send_entry) {
+ cm_node->send_entry = NULL;
+ dev_kfree_skb_any(send_entry->skb);
+ kfree(send_entry);
+ rem_ref_cm_node(cm_node->cm_core, cm_node);
+ }
+}
+
+static void cleanup_retrans_entry(struct nes_cm_node *cm_node)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
+ free_retrans_entry(cm_node);
+ spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
+}
+
+/**
+ * process_packet
+ * Returns skb if to be freed, else it will return NULL if already used..
+ */
+static void process_packet(struct nes_cm_node *cm_node, struct sk_buff *skb,
+ struct nes_cm_core *cm_core)
+{
+ enum nes_tcpip_pkt_type pkt_type = NES_PKT_TYPE_UNKNOWN;
+ struct tcphdr *tcph = tcp_hdr(skb);
+ skb_pull(skb, ip_hdr(skb)->ihl << 2);
+
+ nes_debug(NES_DBG_CM, "process_packet: cm_node=%p state =%d syn=%d "
+ "ack=%d rst=%d fin=%d\n", cm_node, cm_node->state, tcph->syn,
+ tcph->ack, tcph->rst, tcph->fin);
+
+ if (tcph->rst)
+ pkt_type = NES_PKT_TYPE_RST;
+ else if (tcph->syn) {
+ pkt_type = NES_PKT_TYPE_SYN;
+ if (tcph->ack)
+ pkt_type = NES_PKT_TYPE_SYNACK;
+ } else if (tcph->fin)
+ pkt_type = NES_PKT_TYPE_FIN;
+ else if (tcph->ack)
+ pkt_type = NES_PKT_TYPE_ACK;
+
+ switch (pkt_type) {
+ case NES_PKT_TYPE_SYN:
+ handle_syn_pkt(cm_node, skb, tcph);
+ break;
+ case NES_PKT_TYPE_SYNACK:
+ handle_synack_pkt(cm_node, skb, tcph);
+ break;
+ case NES_PKT_TYPE_ACK:
+ handle_ack_pkt(cm_node, skb, tcph);
+ break;
+ case NES_PKT_TYPE_RST:
+ handle_rst_pkt(cm_node, skb, tcph);
+ break;
+ case NES_PKT_TYPE_FIN:
+ handle_fin_pkt(cm_node, skb, tcph);
+ break;
+ default:
+ drop_packet(skb);
+ break;
+ }
+}
+
+/**
+ * mini_cm_listen - create a listen node with params
+ */
+static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *cm_core,
+ struct nes_vnic *nesvnic, struct nes_cm_info *cm_info)
+{
+ struct nes_cm_listener *listener;
+ unsigned long flags;
+
+ nes_debug(NES_DBG_CM, "Search for 0x%08x : 0x%04x\n",
+ cm_info->loc_addr, cm_info->loc_port);
+
+ /* cannot have multiple matching listeners */
+ listener = find_listener(cm_core, htonl(cm_info->loc_addr),
+ htons(cm_info->loc_port), NES_CM_LISTENER_EITHER_STATE);
+ if (listener && listener->listener_state == NES_CM_LISTENER_ACTIVE_STATE) {
+ /* find automatically incs ref count ??? */
+ atomic_dec(&listener->ref_count);
+ nes_debug(NES_DBG_CM, "Not creating listener since it already exists\n");
+ return NULL;
+ }
+
+ if (!listener) {
+ /* create a CM listen node (1/2 node to compare incoming traffic to) */
+ listener = kzalloc(sizeof(*listener), GFP_ATOMIC);
+ if (!listener) {
+ nes_debug(NES_DBG_CM, "Not creating listener memory allocation failed\n");
+ return NULL;
+ }
+
+ listener->loc_addr = htonl(cm_info->loc_addr);
+ listener->loc_port = htons(cm_info->loc_port);
+ listener->reused_node = 0;
+
+ atomic_set(&listener->ref_count, 1);
+ }
+ /* pasive case */
+ /* find already inc'ed the ref count */
+ else {
+ listener->reused_node = 1;
+ }
+
+ listener->cm_id = cm_info->cm_id;
+ atomic_set(&listener->pend_accepts_cnt, 0);
+ listener->cm_core = cm_core;
+ listener->nesvnic = nesvnic;
+ atomic_inc(&cm_core->node_cnt);
+
+ listener->conn_type = cm_info->conn_type;
+ listener->backlog = cm_info->backlog;
+ listener->listener_state = NES_CM_LISTENER_ACTIVE_STATE;
+
+ if (!listener->reused_node) {
+ spin_lock_irqsave(&cm_core->listen_list_lock, flags);
+ list_add(&listener->list, &cm_core->listen_list.list);
+ spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
+ atomic_inc(&cm_core->listen_node_cnt);
+ }
+
+ nes_debug(NES_DBG_CM, "Api - listen(): addr=0x%08X, port=0x%04x,"
+ " listener = %p, backlog = %d, cm_id = %p.\n",
+ cm_info->loc_addr, cm_info->loc_port,
+ listener, listener->backlog, listener->cm_id);
+
+ return listener;
+}
+
+
+/**
+ * mini_cm_connect - make a connection node with params
+ */
+static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core,
+ struct nes_vnic *nesvnic, u16 private_data_len,
+ void *private_data, struct nes_cm_info *cm_info)
+{
+ int ret = 0;
+ struct nes_cm_node *cm_node;
+ struct nes_cm_listener *loopbackremotelistener;
+ struct nes_cm_node *loopbackremotenode;
+ struct nes_cm_info loopback_cm_info;
+ u16 mpa_frame_size = sizeof(struct ietf_mpa_frame) + private_data_len;
+ struct ietf_mpa_frame *mpa_frame = NULL;
+
+ /* create a CM connection node */
+ cm_node = make_cm_node(cm_core, nesvnic, cm_info, NULL);
+ if (!cm_node)
+ return NULL;
+ mpa_frame = &cm_node->mpa_frame;
+ strcpy(mpa_frame->key, IEFT_MPA_KEY_REQ);
+ mpa_frame->flags = IETF_MPA_FLAGS_CRC;
+ mpa_frame->rev = IETF_MPA_VERSION;
+ mpa_frame->priv_data_len = htons(private_data_len);
+
+ /* set our node side to client (active) side */
+ cm_node->tcp_cntxt.client = 1;
+ cm_node->tcp_cntxt.rcv_wscale = NES_CM_DEFAULT_RCV_WND_SCALE;
+
+ if (cm_info->loc_addr == cm_info->rem_addr) {
+ loopbackremotelistener = find_listener(cm_core,
+ ntohl(nesvnic->local_ipaddr), cm_node->rem_port,
+ NES_CM_LISTENER_ACTIVE_STATE);
+ if (loopbackremotelistener == NULL) {
+ create_event(cm_node, NES_CM_EVENT_ABORTED);
+ } else {
+ atomic_inc(&cm_loopbacks);
+ loopback_cm_info = *cm_info;
+ loopback_cm_info.loc_port = cm_info->rem_port;
+ loopback_cm_info.rem_port = cm_info->loc_port;
+ loopback_cm_info.cm_id = loopbackremotelistener->cm_id;
+ loopbackremotenode = make_cm_node(cm_core, nesvnic,
+ &loopback_cm_info, loopbackremotelistener);
+ loopbackremotenode->loopbackpartner = cm_node;
+ loopbackremotenode->tcp_cntxt.rcv_wscale =
+ NES_CM_DEFAULT_RCV_WND_SCALE;
+ cm_node->loopbackpartner = loopbackremotenode;
+ memcpy(loopbackremotenode->mpa_frame_buf, private_data,
+ private_data_len);
+ loopbackremotenode->mpa_frame_size = private_data_len;
+
+ /* we are done handling this state. */
+ /* set node to a TSA state */
+ cm_node->state = NES_CM_STATE_TSA;
+ cm_node->tcp_cntxt.rcv_nxt =
+ loopbackremotenode->tcp_cntxt.loc_seq_num;
+ loopbackremotenode->tcp_cntxt.rcv_nxt =
+ cm_node->tcp_cntxt.loc_seq_num;
+ cm_node->tcp_cntxt.max_snd_wnd =
+ loopbackremotenode->tcp_cntxt.rcv_wnd;
+ loopbackremotenode->tcp_cntxt.max_snd_wnd =
+ cm_node->tcp_cntxt.rcv_wnd;
+ cm_node->tcp_cntxt.snd_wnd =
+ loopbackremotenode->tcp_cntxt.rcv_wnd;
+ loopbackremotenode->tcp_cntxt.snd_wnd =
+ cm_node->tcp_cntxt.rcv_wnd;
+ cm_node->tcp_cntxt.snd_wscale =
+ loopbackremotenode->tcp_cntxt.rcv_wscale;
+ loopbackremotenode->tcp_cntxt.snd_wscale =
+ cm_node->tcp_cntxt.rcv_wscale;
+
+ create_event(loopbackremotenode, NES_CM_EVENT_MPA_REQ);
+ }
+ return cm_node;
+ }
+
+ /* set our node side to client (active) side */
+ cm_node->tcp_cntxt.client = 1;
+ /* init our MPA frame ptr */
+ memcpy(mpa_frame->priv_data, private_data, private_data_len);
+
+ cm_node->mpa_frame_size = mpa_frame_size;
+
+ /* send a syn and goto syn sent state */
+ cm_node->state = NES_CM_STATE_SYN_SENT;
+ ret = send_syn(cm_node, 0, NULL);
+
+ if (ret) {
+ /* error in sending the syn free up the cm_node struct */
+ nes_debug(NES_DBG_CM, "Api - connect() FAILED: dest "
+ "addr=0x%08X, port=0x%04x, cm_node=%p, cm_id = %p.\n",
+ cm_node->rem_addr, cm_node->rem_port, cm_node,
+ cm_node->cm_id);
+ rem_ref_cm_node(cm_node->cm_core, cm_node);
+ cm_node = NULL;
+ }
+
+ if (cm_node)
+ nes_debug(NES_DBG_CM, "Api - connect(): dest addr=0x%08X,"
+ "port=0x%04x, cm_node=%p, cm_id = %p.\n",
+ cm_node->rem_addr, cm_node->rem_port, cm_node,
+ cm_node->cm_id);
+
+ return cm_node;
+}
+
+
+/**
+ * mini_cm_accept - accept a connection
+ * This function is never called
+ */
+static int mini_cm_accept(struct nes_cm_core *cm_core,
+ struct ietf_mpa_frame *mpa_frame, struct nes_cm_node *cm_node)
+{
+ return 0;
+}
+
+
+/**
+ * mini_cm_reject - reject and teardown a connection
+ */
+static int mini_cm_reject(struct nes_cm_core *cm_core,
+ struct ietf_mpa_frame *mpa_frame, struct nes_cm_node *cm_node)
+{
+ int ret = 0;
+
+ nes_debug(NES_DBG_CM, "%s cm_node=%p type=%d state=%d\n",
+ __func__, cm_node, cm_node->tcp_cntxt.client, cm_node->state);
+
+ if (cm_node->tcp_cntxt.client)
+ return ret;
+ cleanup_retrans_entry(cm_node);
+ cm_node->state = NES_CM_STATE_CLOSED;
+
+ ret = send_reset(cm_node, NULL);
+ return ret;
+}
+
+
+/**
+ * mini_cm_close
+ */
+static int mini_cm_close(struct nes_cm_core *cm_core, struct nes_cm_node *cm_node)
+{
+ int ret = 0;
+
+ if (!cm_core || !cm_node)
+ return -EINVAL;
+
+ switch (cm_node->state) {
+ case NES_CM_STATE_SYN_RCVD:
+ case NES_CM_STATE_SYN_SENT:
+ case NES_CM_STATE_ONE_SIDE_ESTABLISHED:
+ case NES_CM_STATE_ESTABLISHED:
+ case NES_CM_STATE_ACCEPTING:
+ case NES_CM_STATE_MPAREQ_SENT:
+ case NES_CM_STATE_MPAREQ_RCVD:
+ cleanup_retrans_entry(cm_node);
+ send_reset(cm_node, NULL);
+ break;
+ case NES_CM_STATE_CLOSE_WAIT:
+ cm_node->state = NES_CM_STATE_LAST_ACK;
+ send_fin(cm_node, NULL);
+ break;
+ case NES_CM_STATE_FIN_WAIT1:
+ case NES_CM_STATE_FIN_WAIT2:
+ case NES_CM_STATE_LAST_ACK:
+ case NES_CM_STATE_TIME_WAIT:
+ case NES_CM_STATE_CLOSING:
+ ret = -1;
+ break;
+ case NES_CM_STATE_LISTENING:
+ case NES_CM_STATE_UNKNOWN:
+ case NES_CM_STATE_INITED:
+ case NES_CM_STATE_CLOSED:
+ ret = rem_ref_cm_node(cm_core, cm_node);
+ break;
+ case NES_CM_STATE_TSA:
+ if (cm_node->send_entry)
+ printk(KERN_ERR "ERROR Close got called from STATE_TSA "
+ "send_entry=%p\n", cm_node->send_entry);
+ ret = rem_ref_cm_node(cm_core, cm_node);
+ break;
+ }
+ return ret;
+}
+
+
+/**
+ * recv_pkt - recv an ETHERNET packet, and process it through CM
+ * node state machine
+ */
+static void mini_cm_recv_pkt(struct nes_cm_core *cm_core,
+ struct nes_vnic *nesvnic, struct sk_buff *skb)
+{
+ struct nes_cm_node *cm_node = NULL;
+ struct nes_cm_listener *listener = NULL;
+ struct iphdr *iph;
+ struct tcphdr *tcph;
+ struct nes_cm_info nfo;
+
+ if (!skb)
+ return;
+ if (skb->len < sizeof(struct iphdr) + sizeof(struct tcphdr)) {
+ dev_kfree_skb_any(skb);
+ return;
+ }
+
+ iph = (struct iphdr *)skb->data;
+ tcph = (struct tcphdr *)(skb->data + sizeof(struct iphdr));
+ skb_reset_network_header(skb);
+ skb_set_transport_header(skb, sizeof(*tcph));
+ if (!tcph) {
+ dev_kfree_skb_any(skb);
+ return;
+ }
+ skb->len = ntohs(iph->tot_len);
+
+ nfo.loc_addr = ntohl(iph->daddr);
+ nfo.loc_port = ntohs(tcph->dest);
+ nfo.rem_addr = ntohl(iph->saddr);
+ nfo.rem_port = ntohs(tcph->source);
+
+ nes_debug(NES_DBG_CM, "Received packet: dest=" NIPQUAD_FMT
+ ":0x%04X src=" NIPQUAD_FMT ":0x%04X\n",
+ NIPQUAD(iph->daddr), tcph->dest,
+ NIPQUAD(iph->saddr), tcph->source);
+
+ do {
+ cm_node = find_node(cm_core,
+ nfo.rem_port, nfo.rem_addr,
+ nfo.loc_port, nfo.loc_addr);
+
+ if (!cm_node) {
+ /* Only type of packet accepted are for */
+ /* the PASSIVE open (syn only) */
+ if ((!tcph->syn) || (tcph->ack)) {
+ cm_packets_dropped++;
+ break;
+ }
+ listener = find_listener(cm_core, nfo.loc_addr,
+ nfo.loc_port,
+ NES_CM_LISTENER_ACTIVE_STATE);
+ if (listener) {
+ nfo.cm_id = listener->cm_id;
+ nfo.conn_type = listener->conn_type;
+ } else {
+ nes_debug(NES_DBG_CM, "Unable to find listener "
+ "for the pkt\n");
+ cm_packets_dropped++;
+ dev_kfree_skb_any(skb);
+ break;
+ }
+
+ cm_node = make_cm_node(cm_core, nesvnic, &nfo,
+ listener);
+ if (!cm_node) {
+ nes_debug(NES_DBG_CM, "Unable to allocate "
+ "node\n");
+ cm_packets_dropped++;
+ atomic_dec(&listener->ref_count);
+ dev_kfree_skb_any(skb);
+ break;
+ }
+ if (!tcph->rst && !tcph->fin) {
+ cm_node->state = NES_CM_STATE_LISTENING;
+ } else {
+ cm_packets_dropped++;
+ rem_ref_cm_node(cm_core, cm_node);
+ dev_kfree_skb_any(skb);
+ break;
+ }
+ add_ref_cm_node(cm_node);
+ } else if (cm_node->state == NES_CM_STATE_TSA) {
+ rem_ref_cm_node(cm_core, cm_node);
+ atomic_inc(&cm_accel_dropped_pkts);
+ dev_kfree_skb_any(skb);
+ break;
+ }
+ process_packet(cm_node, skb, cm_core);
+ rem_ref_cm_node(cm_core, cm_node);
+ } while (0);
+}
+
+
+/**
+ * nes_cm_alloc_core - allocate a top level instance of a cm core
+ */
+static struct nes_cm_core *nes_cm_alloc_core(void)
+{
+ int i;
+
+ struct nes_cm_core *cm_core;
+ struct sk_buff *skb = NULL;
+
+ /* setup the CM core */
+ /* alloc top level core control structure */
+ cm_core = kzalloc(sizeof(*cm_core), GFP_KERNEL);
+ if (!cm_core)
+ return NULL;
+
+ INIT_LIST_HEAD(&cm_core->connected_nodes);
+ init_timer(&cm_core->tcp_timer);
+ cm_core->tcp_timer.function = nes_cm_timer_tick;
+
+ cm_core->mtu = NES_CM_DEFAULT_MTU;
+ cm_core->state = NES_CM_STATE_INITED;
+ cm_core->free_tx_pkt_max = NES_CM_DEFAULT_FREE_PKTS;
+
+ atomic_set(&cm_core->events_posted, 0);
+
+ /* init the packet lists */
+ skb_queue_head_init(&cm_core->tx_free_list);
+
+ for (i = 0; i < NES_CM_DEFAULT_FRAME_CNT; i++) {
+ skb = dev_alloc_skb(cm_core->mtu);
+ if (!skb) {
+ kfree(cm_core);
+ return NULL;
+ }
+ /* add 'raw' skb to free frame list */
+ skb_queue_head(&cm_core->tx_free_list, skb);
+ }
+
+ cm_core->api = &nes_cm_api;
+
+ spin_lock_init(&cm_core->ht_lock);
+ spin_lock_init(&cm_core->listen_list_lock);
+
+ INIT_LIST_HEAD(&cm_core->listen_list.list);
+
+ nes_debug(NES_DBG_CM, "Init CM Core completed -- cm_core=%p\n", cm_core);
+
+ nes_debug(NES_DBG_CM, "Enable QUEUE EVENTS\n");
+ cm_core->event_wq = create_singlethread_workqueue("nesewq");
+ cm_core->post_event = nes_cm_post_event;
+ nes_debug(NES_DBG_CM, "Enable QUEUE DISCONNECTS\n");
+ cm_core->disconn_wq = create_singlethread_workqueue("nesdwq");
+
+ print_core(cm_core);
+ return cm_core;
+}
+
+
+/**
+ * mini_cm_dealloc_core - deallocate a top level instance of a cm core
+ */
+static int mini_cm_dealloc_core(struct nes_cm_core *cm_core)
+{
+ nes_debug(NES_DBG_CM, "De-Alloc CM Core (%p)\n", cm_core);
+
+ if (!cm_core)
+ return -EINVAL;
+
+ barrier();
+
+ if (timer_pending(&cm_core->tcp_timer)) {
+ del_timer(&cm_core->tcp_timer);
+ }
+
+ destroy_workqueue(cm_core->event_wq);
+ destroy_workqueue(cm_core->disconn_wq);
+ nes_debug(NES_DBG_CM, "\n");
+ kfree(cm_core);
+
+ return 0;
+}
+
+
+/**
+ * mini_cm_get
+ */
+static int mini_cm_get(struct nes_cm_core *cm_core)
+{
+ return cm_core->state;
+}
+
+
+/**
+ * mini_cm_set
+ */
+static int mini_cm_set(struct nes_cm_core *cm_core, u32 type, u32 value)
+{
+ int ret = 0;
+
+ switch (type) {
+ case NES_CM_SET_PKT_SIZE:
+ cm_core->mtu = value;
+ break;
+ case NES_CM_SET_FREE_PKT_Q_SIZE:
+ cm_core->free_tx_pkt_max = value;
+ break;
+ default:
+ /* unknown set option */
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+
+/**
+ * nes_cm_init_tsa_conn setup HW; MPA frames must be
+ * successfully exchanged when this is called
+ */
+static int nes_cm_init_tsa_conn(struct nes_qp *nesqp, struct nes_cm_node *cm_node)
+{
+ int ret = 0;
+
+ if (!nesqp)
+ return -EINVAL;
+
+ nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_IPV4 |
+ NES_QPCONTEXT_MISC_NO_NAGLE | NES_QPCONTEXT_MISC_DO_NOT_FRAG |
+ NES_QPCONTEXT_MISC_DROS);
+
+ if (cm_node->tcp_cntxt.snd_wscale || cm_node->tcp_cntxt.rcv_wscale)
+ nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_WSCALE);
+
+ nesqp->nesqp_context->misc2 |= cpu_to_le32(64 << NES_QPCONTEXT_MISC2_TTL_SHIFT);
+
+ nesqp->nesqp_context->mss |= cpu_to_le32(((u32)cm_node->tcp_cntxt.mss) << 16);
+
+ nesqp->nesqp_context->tcp_state_flow_label |= cpu_to_le32(
+ (u32)NES_QPCONTEXT_TCPSTATE_EST << NES_QPCONTEXT_TCPFLOW_TCP_STATE_SHIFT);
+
+ nesqp->nesqp_context->pd_index_wscale |= cpu_to_le32(
+ (cm_node->tcp_cntxt.snd_wscale << NES_QPCONTEXT_PDWSCALE_SND_WSCALE_SHIFT) &
+ NES_QPCONTEXT_PDWSCALE_SND_WSCALE_MASK);
+
+ nesqp->nesqp_context->pd_index_wscale |= cpu_to_le32(
+ (cm_node->tcp_cntxt.rcv_wscale << NES_QPCONTEXT_PDWSCALE_RCV_WSCALE_SHIFT) &
+ NES_QPCONTEXT_PDWSCALE_RCV_WSCALE_MASK);
+
+ nesqp->nesqp_context->keepalive = cpu_to_le32(0x80);
+ nesqp->nesqp_context->ts_recent = 0;
+ nesqp->nesqp_context->ts_age = 0;
+ nesqp->nesqp_context->snd_nxt = cpu_to_le32(cm_node->tcp_cntxt.loc_seq_num);
+ nesqp->nesqp_context->snd_wnd = cpu_to_le32(cm_node->tcp_cntxt.snd_wnd);
+ nesqp->nesqp_context->rcv_nxt = cpu_to_le32(cm_node->tcp_cntxt.rcv_nxt);
+ nesqp->nesqp_context->rcv_wnd = cpu_to_le32(cm_node->tcp_cntxt.rcv_wnd <<
+ cm_node->tcp_cntxt.rcv_wscale);
+ nesqp->nesqp_context->snd_max = cpu_to_le32(cm_node->tcp_cntxt.loc_seq_num);
+ nesqp->nesqp_context->snd_una = cpu_to_le32(cm_node->tcp_cntxt.loc_seq_num);
+ nesqp->nesqp_context->srtt = 0;
+ nesqp->nesqp_context->rttvar = cpu_to_le32(0x6);
+ nesqp->nesqp_context->ssthresh = cpu_to_le32(0x3FFFC000);
+ nesqp->nesqp_context->cwnd = cpu_to_le32(2*cm_node->tcp_cntxt.mss);
+ nesqp->nesqp_context->snd_wl1 = cpu_to_le32(cm_node->tcp_cntxt.rcv_nxt);
+ nesqp->nesqp_context->snd_wl2 = cpu_to_le32(cm_node->tcp_cntxt.loc_seq_num);
+ nesqp->nesqp_context->max_snd_wnd = cpu_to_le32(cm_node->tcp_cntxt.max_snd_wnd);
+
+ nes_debug(NES_DBG_CM, "QP%u: rcv_nxt = 0x%08X, snd_nxt = 0x%08X,"
+ " Setting MSS to %u, PDWscale = 0x%08X, rcv_wnd = %u, context misc = 0x%08X.\n",
+ nesqp->hwqp.qp_id, le32_to_cpu(nesqp->nesqp_context->rcv_nxt),
+ le32_to_cpu(nesqp->nesqp_context->snd_nxt),
+ cm_node->tcp_cntxt.mss, le32_to_cpu(nesqp->nesqp_context->pd_index_wscale),
+ le32_to_cpu(nesqp->nesqp_context->rcv_wnd),
+ le32_to_cpu(nesqp->nesqp_context->misc));
+ nes_debug(NES_DBG_CM, " snd_wnd = 0x%08X.\n", le32_to_cpu(nesqp->nesqp_context->snd_wnd));
+ nes_debug(NES_DBG_CM, " snd_cwnd = 0x%08X.\n", le32_to_cpu(nesqp->nesqp_context->cwnd));
+ nes_debug(NES_DBG_CM, " max_swnd = 0x%08X.\n", le32_to_cpu(nesqp->nesqp_context->max_snd_wnd));
+
+ nes_debug(NES_DBG_CM, "Change cm_node state to TSA\n");
+ cm_node->state = NES_CM_STATE_TSA;
+
+ return ret;
+}
+
+
+/**
+ * nes_cm_disconn
+ */
+int nes_cm_disconn(struct nes_qp *nesqp)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&nesqp->lock, flags);
+ if (nesqp->disconn_pending == 0) {
+ nesqp->disconn_pending++;
+ spin_unlock_irqrestore(&nesqp->lock, flags);
+ /* init our disconnect work element, to */
+ INIT_WORK(&nesqp->disconn_work, nes_disconnect_worker);
+
+ queue_work(g_cm_core->disconn_wq, &nesqp->disconn_work);
+ } else
+ spin_unlock_irqrestore(&nesqp->lock, flags);
+
+ return 0;
+}
+
+
+/**
+ * nes_disconnect_worker
+ */
+static void nes_disconnect_worker(struct work_struct *work)
+{
+ struct nes_qp *nesqp = container_of(work, struct nes_qp, disconn_work);
+
+ nes_debug(NES_DBG_CM, "processing AEQE id 0x%04X for QP%u.\n",
+ nesqp->last_aeq, nesqp->hwqp.qp_id);
+ nes_cm_disconn_true(nesqp);
+}
+
+
+/**
+ * nes_cm_disconn_true
+ */
+static int nes_cm_disconn_true(struct nes_qp *nesqp)
+{
+ unsigned long flags;
+ int ret = 0;
+ struct iw_cm_id *cm_id;
+ struct iw_cm_event cm_event;
+ struct nes_vnic *nesvnic;
+ u16 last_ae;
+ u8 original_hw_tcp_state;
+ u8 original_ibqp_state;
+ u8 issued_disconnect_reset = 0;
+
+ if (!nesqp) {
+ nes_debug(NES_DBG_CM, "disconnect_worker nesqp is NULL\n");
+ return -1;
+ }
+
+ spin_lock_irqsave(&nesqp->lock, flags);
+ cm_id = nesqp->cm_id;
+ /* make sure we havent already closed this connection */
+ if (!cm_id) {
+ nes_debug(NES_DBG_CM, "QP%u disconnect_worker cmid is NULL\n",
+ nesqp->hwqp.qp_id);
+ spin_unlock_irqrestore(&nesqp->lock, flags);
+ return -1;
+ }
+
+ nesvnic = to_nesvnic(nesqp->ibqp.device);
+ nes_debug(NES_DBG_CM, "Disconnecting QP%u\n", nesqp->hwqp.qp_id);
+
+ original_hw_tcp_state = nesqp->hw_tcp_state;
+ original_ibqp_state = nesqp->ibqp_state;
+ last_ae = nesqp->last_aeq;
+
+
+ nes_debug(NES_DBG_CM, "set ibqp_state=%u\n", nesqp->ibqp_state);
+
+ if ((nesqp->cm_id) && (cm_id->event_handler)) {
+ if ((original_hw_tcp_state == NES_AEQE_TCP_STATE_CLOSE_WAIT) ||
+ ((original_ibqp_state == IB_QPS_RTS) &&
+ (last_ae == NES_AEQE_AEID_LLP_CONNECTION_RESET))) {
+ atomic_inc(&cm_disconnects);
+ cm_event.event = IW_CM_EVENT_DISCONNECT;
+ if (last_ae == NES_AEQE_AEID_LLP_CONNECTION_RESET) {
+ issued_disconnect_reset = 1;
+ cm_event.status = IW_CM_EVENT_STATUS_RESET;
+ nes_debug(NES_DBG_CM, "Generating a CM "
+ "Disconnect Event (status reset) for "
+ "QP%u, cm_id = %p. \n",
+ nesqp->hwqp.qp_id, cm_id);
+ } else
+ cm_event.status = IW_CM_EVENT_STATUS_OK;
+
+ cm_event.local_addr = cm_id->local_addr;
+ cm_event.remote_addr = cm_id->remote_addr;
+ cm_event.private_data = NULL;
+ cm_event.private_data_len = 0;
+
+ nes_debug(NES_DBG_CM, "Generating a CM Disconnect Event"
+ " for QP%u, SQ Head = %u, SQ Tail = %u. "
+ "cm_id = %p, refcount = %u.\n",
+ nesqp->hwqp.qp_id, nesqp->hwqp.sq_head,
+ nesqp->hwqp.sq_tail, cm_id,
+ atomic_read(&nesqp->refcount));
+
+ spin_unlock_irqrestore(&nesqp->lock, flags);
+ ret = cm_id->event_handler(cm_id, &cm_event);
+ if (ret)
+ nes_debug(NES_DBG_CM, "OFA CM event_handler "
+ "returned, ret=%d\n", ret);
+ spin_lock_irqsave(&nesqp->lock, flags);
+ }
+
+ nesqp->disconn_pending = 0;
+ /* There might have been another AE while the lock was released */
+ original_hw_tcp_state = nesqp->hw_tcp_state;
+ original_ibqp_state = nesqp->ibqp_state;
+ last_ae = nesqp->last_aeq;
+
+ if ((issued_disconnect_reset == 0) && (nesqp->cm_id) &&
+ ((original_hw_tcp_state == NES_AEQE_TCP_STATE_CLOSED) ||
+ (original_hw_tcp_state == NES_AEQE_TCP_STATE_TIME_WAIT) ||
+ (last_ae == NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE) ||
+ (last_ae == NES_AEQE_AEID_LLP_CONNECTION_RESET))) {
+ atomic_inc(&cm_closes);
+ nesqp->cm_id = NULL;
+ nesqp->in_disconnect = 0;
+ spin_unlock_irqrestore(&nesqp->lock, flags);
+ nes_disconnect(nesqp, 1);
+
+ cm_id->provider_data = nesqp;
+ /* Send up the close complete event */
+ cm_event.event = IW_CM_EVENT_CLOSE;
+ cm_event.status = IW_CM_EVENT_STATUS_OK;
+ cm_event.provider_data = cm_id->provider_data;
+ cm_event.local_addr = cm_id->local_addr;
+ cm_event.remote_addr = cm_id->remote_addr;
+ cm_event.private_data = NULL;
+ cm_event.private_data_len = 0;
+
+ ret = cm_id->event_handler(cm_id, &cm_event);
+ if (ret) {
+ nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret);
+ }
+
+ cm_id->rem_ref(cm_id);
+
+ spin_lock_irqsave(&nesqp->lock, flags);
+ if (nesqp->flush_issued == 0) {
+ nesqp->flush_issued = 1;
+ spin_unlock_irqrestore(&nesqp->lock, flags);
+ flush_wqes(nesvnic->nesdev, nesqp,
+ NES_CQP_FLUSH_RQ, 1);
+ } else
+ spin_unlock_irqrestore(&nesqp->lock, flags);
+ } else {
+ cm_id = nesqp->cm_id;
+ spin_unlock_irqrestore(&nesqp->lock, flags);
+ /* check to see if the inbound reset beat the outbound reset */
+ if ((!cm_id) && (last_ae==NES_AEQE_AEID_RESET_SENT)) {
+ nes_debug(NES_DBG_CM, "QP%u: Decing refcount "
+ "due to inbound reset beating the "
+ "outbound reset.\n", nesqp->hwqp.qp_id);
+ }
+ }
+ } else {
+ nesqp->disconn_pending = 0;
+ spin_unlock_irqrestore(&nesqp->lock, flags);
+ }
+
+ return 0;
+}
+
+
+/**
+ * nes_disconnect
+ */
+static int nes_disconnect(struct nes_qp *nesqp, int abrupt)
+{
+ int ret = 0;
+ struct nes_vnic *nesvnic;
+ struct nes_device *nesdev;
+ struct nes_ib_device *nesibdev;
+
+ nesvnic = to_nesvnic(nesqp->ibqp.device);
+ if (!nesvnic)
+ return -EINVAL;
+
+ nesdev = nesvnic->nesdev;
+ nesibdev = nesvnic->nesibdev;
+
+ nes_debug(NES_DBG_CM, "netdev refcnt = %u.\n",
+ atomic_read(&nesvnic->netdev->refcnt));
+
+ if (nesqp->active_conn) {
+
+ /* indicate this connection is NOT active */
+ nesqp->active_conn = 0;
+ } else {
+ /* Need to free the Last Streaming Mode Message */
+ if (nesqp->ietf_frame) {
+ if (nesqp->lsmm_mr)
+ nesibdev->ibdev.dereg_mr(nesqp->lsmm_mr);
+ pci_free_consistent(nesdev->pcidev,
+ nesqp->private_data_len+sizeof(struct ietf_mpa_frame),
+ nesqp->ietf_frame, nesqp->ietf_frame_pbase);
+ }
+ }
+
+ /* close the CM node down if it is still active */
+ if (nesqp->cm_node) {
+ nes_debug(NES_DBG_CM, "Call close API\n");
+
+ g_cm_core->api->close(g_cm_core, nesqp->cm_node);
+ }
+
+ return ret;
+}
+
+
+/**
+ * nes_accept
+ */
+int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
+{
+ u64 u64temp;
+ struct ib_qp *ibqp;
+ struct nes_qp *nesqp;
+ struct nes_vnic *nesvnic;
+ struct nes_device *nesdev;
+ struct nes_cm_node *cm_node;
+ struct nes_adapter *adapter;
+ struct ib_qp_attr attr;
+ struct iw_cm_event cm_event;
+ struct nes_hw_qp_wqe *wqe;
+ struct nes_v4_quad nes_quad;
+ struct nes_ib_device *nesibdev;
+ struct ib_mr *ibmr = NULL;
+ struct ib_phys_buf ibphysbuf;
+ struct nes_pd *nespd;
+ u32 crc_value;
+ int ret;
+
+ ibqp = nes_get_qp(cm_id->device, conn_param->qpn);
+ if (!ibqp)
+ return -EINVAL;
+
+ /* get all our handles */
+ nesqp = to_nesqp(ibqp);
+ nesvnic = to_nesvnic(nesqp->ibqp.device);
+ nesdev = nesvnic->nesdev;
+ adapter = nesdev->nesadapter;
+
+ cm_node = (struct nes_cm_node *)cm_id->provider_data;
+ nes_debug(NES_DBG_CM, "nes_accept: cm_node= %p nesvnic=%p, netdev=%p,"
+ "%s\n", cm_node, nesvnic, nesvnic->netdev,
+ nesvnic->netdev->name);
+
+ /* associate the node with the QP */
+ nesqp->cm_node = (void *)cm_node;
+ cm_node->nesqp = nesqp;
+ nes_add_ref(&nesqp->ibqp);
+
+ nes_debug(NES_DBG_CM, "QP%u, cm_node=%p, jiffies = %lu listener = %p\n",
+ nesqp->hwqp.qp_id, cm_node, jiffies, cm_node->listener);
+ atomic_inc(&cm_accepts);
+
+ nes_debug(NES_DBG_CM, "netdev refcnt = %u.\n",
+ atomic_read(&nesvnic->netdev->refcnt));
+
+ /* allocate the ietf frame and space for private data */
+ nesqp->ietf_frame = pci_alloc_consistent(nesdev->pcidev,
+ sizeof(struct ietf_mpa_frame) + conn_param->private_data_len,
+ &nesqp->ietf_frame_pbase);
+
+ if (!nesqp->ietf_frame) {
+ nes_debug(NES_DBG_CM, "Unable to allocate memory for private "
+ "data\n");
+ return -ENOMEM;
+ }
+
+
+ /* setup the MPA frame */
+ nesqp->private_data_len = conn_param->private_data_len;
+ memcpy(nesqp->ietf_frame->key, IEFT_MPA_KEY_REP, IETF_MPA_KEY_SIZE);
+
+ memcpy(nesqp->ietf_frame->priv_data, conn_param->private_data,
+ conn_param->private_data_len);
+
+ nesqp->ietf_frame->priv_data_len =
+ cpu_to_be16(conn_param->private_data_len);
+ nesqp->ietf_frame->rev = mpa_version;
+ nesqp->ietf_frame->flags = IETF_MPA_FLAGS_CRC;
+
+ /* setup our first outgoing iWarp send WQE (the IETF frame response) */
+ wqe = &nesqp->hwqp.sq_vbase[0];
+
+ if (cm_id->remote_addr.sin_addr.s_addr !=
+ cm_id->local_addr.sin_addr.s_addr) {
+ u64temp = (unsigned long)nesqp;
+ nesibdev = nesvnic->nesibdev;
+ nespd = nesqp->nespd;
+ ibphysbuf.addr = nesqp->ietf_frame_pbase;
+ ibphysbuf.size = conn_param->private_data_len +
+ sizeof(struct ietf_mpa_frame);
+ ibmr = nesibdev->ibdev.reg_phys_mr((struct ib_pd *)nespd,
+ &ibphysbuf, 1,
+ IB_ACCESS_LOCAL_WRITE,
+ (u64 *)&nesqp->ietf_frame);
+ if (!ibmr) {
+ nes_debug(NES_DBG_CM, "Unable to register memory region"
+ "for lSMM for cm_node = %p \n",
+ cm_node);
+ return -ENOMEM;
+ }
+
+ ibmr->pd = &nespd->ibpd;
+ ibmr->device = nespd->ibpd.device;
+ nesqp->lsmm_mr = ibmr;
+
+ u64temp |= NES_SW_CONTEXT_ALIGN>>1;
+ set_wqe_64bit_value(wqe->wqe_words,
+ NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX,
+ u64temp);
+ wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] =
+ cpu_to_le32(NES_IWARP_SQ_WQE_STREAMING |
+ NES_IWARP_SQ_WQE_WRPDU);
+ wqe->wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX] =
+ cpu_to_le32(conn_param->private_data_len +
+ sizeof(struct ietf_mpa_frame));
+ set_wqe_64bit_value(wqe->wqe_words,
+ NES_IWARP_SQ_WQE_FRAG0_LOW_IDX,
+ (u64)nesqp->ietf_frame);
+ wqe->wqe_words[NES_IWARP_SQ_WQE_LENGTH0_IDX] =
+ cpu_to_le32(conn_param->private_data_len +
+ sizeof(struct ietf_mpa_frame));
+ wqe->wqe_words[NES_IWARP_SQ_WQE_STAG0_IDX] = ibmr->lkey;
+
+ nesqp->nesqp_context->ird_ord_sizes |=
+ cpu_to_le32(NES_QPCONTEXT_ORDIRD_LSMM_PRESENT |
+ NES_QPCONTEXT_ORDIRD_WRPDU);
+ } else {
+ nesqp->nesqp_context->ird_ord_sizes |=
+ cpu_to_le32((NES_QPCONTEXT_ORDIRD_LSMM_PRESENT |
+ NES_QPCONTEXT_ORDIRD_WRPDU |
+ NES_QPCONTEXT_ORDIRD_ALSMM));
+ }
+ nesqp->skip_lsmm = 1;
+
+
+ /* Cache the cm_id in the qp */
+ nesqp->cm_id = cm_id;
+ cm_node->cm_id = cm_id;
+
+ /* nesqp->cm_node = (void *)cm_id->provider_data; */
+ cm_id->provider_data = nesqp;
+ nesqp->active_conn = 0;
+
+ if (cm_node->state == NES_CM_STATE_TSA)
+ nes_debug(NES_DBG_CM, "Already state = TSA for cm_node=%p\n",
+ cm_node);
+
+ nes_cm_init_tsa_conn(nesqp, cm_node);
+
+ nesqp->nesqp_context->tcpPorts[0] =
+ cpu_to_le16(ntohs(cm_id->local_addr.sin_port));
+ nesqp->nesqp_context->tcpPorts[1] =
+ cpu_to_le16(ntohs(cm_id->remote_addr.sin_port));
+
+ if (ipv4_is_loopback(cm_id->remote_addr.sin_addr.s_addr))
+ nesqp->nesqp_context->ip0 =
+ cpu_to_le32(ntohl(nesvnic->local_ipaddr));
+ else
+ nesqp->nesqp_context->ip0 =
+ cpu_to_le32(ntohl(cm_id->remote_addr.sin_addr.s_addr));
+
+ nesqp->nesqp_context->misc2 |= cpu_to_le32(
+ (u32)PCI_FUNC(nesdev->pcidev->devfn) <<
+ NES_QPCONTEXT_MISC2_SRC_IP_SHIFT);
+
+ nesqp->nesqp_context->arp_index_vlan |=
+ cpu_to_le32(nes_arp_table(nesdev,
+ le32_to_cpu(nesqp->nesqp_context->ip0), NULL,
+ NES_ARP_RESOLVE) << 16);
+
+ nesqp->nesqp_context->ts_val_delta = cpu_to_le32(
+ jiffies - nes_read_indexed(nesdev, NES_IDX_TCP_NOW));
+
+ nesqp->nesqp_context->ird_index = cpu_to_le32(nesqp->hwqp.qp_id);
+
+ nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32(
+ ((u32)1 << NES_QPCONTEXT_ORDIRD_IWARP_MODE_SHIFT));
+ nesqp->nesqp_context->ird_ord_sizes |=
+ cpu_to_le32((u32)conn_param->ord);
+
+ memset(&nes_quad, 0, sizeof(nes_quad));
+ nes_quad.DstIpAdrIndex =
+ cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24);
+ if (ipv4_is_loopback(cm_id->remote_addr.sin_addr.s_addr))
+ nes_quad.SrcIpadr = nesvnic->local_ipaddr;
+ else
+ nes_quad.SrcIpadr = cm_id->remote_addr.sin_addr.s_addr;
+ nes_quad.TcpPorts[0] = cm_id->remote_addr.sin_port;
+ nes_quad.TcpPorts[1] = cm_id->local_addr.sin_port;
+
+ /* Produce hash key */
+ crc_value = get_crc_value(&nes_quad);
+ nesqp->hte_index = cpu_to_be32(crc_value ^ 0xffffffff);
+ nes_debug(NES_DBG_CM, "HTE Index = 0x%08X, CRC = 0x%08X\n",
+ nesqp->hte_index, nesqp->hte_index & adapter->hte_index_mask);
+
+ nesqp->hte_index &= adapter->hte_index_mask;
+ nesqp->nesqp_context->hte_index = cpu_to_le32(nesqp->hte_index);
+
+ cm_node->cm_core->api->accelerated(cm_node->cm_core, cm_node);
+
+ nes_debug(NES_DBG_CM, "QP%u, Destination IP = 0x%08X:0x%04X, local = "
+ "0x%08X:0x%04X, rcv_nxt=0x%08X, snd_nxt=0x%08X, mpa + "
+ "private data length=%zu.\n", nesqp->hwqp.qp_id,
+ ntohl(cm_id->remote_addr.sin_addr.s_addr),
+ ntohs(cm_id->remote_addr.sin_port),
+ ntohl(cm_id->local_addr.sin_addr.s_addr),
+ ntohs(cm_id->local_addr.sin_port),
+ le32_to_cpu(nesqp->nesqp_context->rcv_nxt),
+ le32_to_cpu(nesqp->nesqp_context->snd_nxt),
+ conn_param->private_data_len +
+ sizeof(struct ietf_mpa_frame));
+
+ attr.qp_state = IB_QPS_RTS;
+ nes_modify_qp(&nesqp->ibqp, &attr, IB_QP_STATE, NULL);
+
+ /* notify OF layer that accept event was successfull */
+ cm_id->add_ref(cm_id);
+
+ cm_event.event = IW_CM_EVENT_ESTABLISHED;
+ cm_event.status = IW_CM_EVENT_STATUS_ACCEPTED;
+ cm_event.provider_data = (void *)nesqp;
+ cm_event.local_addr = cm_id->local_addr;
+ cm_event.remote_addr = cm_id->remote_addr;
+ cm_event.private_data = NULL;
+ cm_event.private_data_len = 0;
+ ret = cm_id->event_handler(cm_id, &cm_event);
+ if (cm_node->loopbackpartner) {
+ cm_node->loopbackpartner->mpa_frame_size =
+ nesqp->private_data_len;
+ /* copy entire MPA frame to our cm_node's frame */
+ memcpy(cm_node->loopbackpartner->mpa_frame_buf,
+ nesqp->ietf_frame->priv_data, nesqp->private_data_len);
+ create_event(cm_node->loopbackpartner, NES_CM_EVENT_CONNECTED);
+ }
+ if (ret)
+ printk(KERN_ERR "%s[%u] OFA CM event_handler returned, "
+ "ret=%d\n", __func__, __LINE__, ret);
+
+ return 0;
+}
+
+
+/**
+ * nes_reject
+ */
+int nes_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
+{
+ struct nes_cm_node *cm_node;
+ struct nes_cm_core *cm_core;
+
+ atomic_inc(&cm_rejects);
+ cm_node = (struct nes_cm_node *) cm_id->provider_data;
+ cm_core = cm_node->cm_core;
+ cm_node->mpa_frame_size = sizeof(struct ietf_mpa_frame) + pdata_len;
+
+ strcpy(&cm_node->mpa_frame.key[0], IEFT_MPA_KEY_REP);
+ memcpy(&cm_node->mpa_frame.priv_data, pdata, pdata_len);
+
+ cm_node->mpa_frame.priv_data_len = cpu_to_be16(pdata_len);
+ cm_node->mpa_frame.rev = mpa_version;
+ cm_node->mpa_frame.flags = IETF_MPA_FLAGS_CRC | IETF_MPA_FLAGS_REJECT;
+
+ cm_core->api->reject(cm_core, &cm_node->mpa_frame, cm_node);
+
+ return 0;
+}
+
+
+/**
+ * nes_connect
+ * setup and launch cm connect node
+ */
+int nes_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
+{
+ struct ib_qp *ibqp;
+ struct nes_qp *nesqp;
+ struct nes_vnic *nesvnic;
+ struct nes_device *nesdev;
+ struct nes_cm_node *cm_node;
+ struct nes_cm_info cm_info;
+
+ ibqp = nes_get_qp(cm_id->device, conn_param->qpn);
+ if (!ibqp)
+ return -EINVAL;
+ nesqp = to_nesqp(ibqp);
+ if (!nesqp)
+ return -EINVAL;
+ nesvnic = to_nesvnic(nesqp->ibqp.device);
+ if (!nesvnic)
+ return -EINVAL;
+ nesdev = nesvnic->nesdev;
+ if (!nesdev)
+ return -EINVAL;
+
+ nes_debug(NES_DBG_CM, "QP%u, current IP = 0x%08X, Destination IP = "
+ "0x%08X:0x%04X, local = 0x%08X:0x%04X.\n", nesqp->hwqp.qp_id,
+ ntohl(nesvnic->local_ipaddr),
+ ntohl(cm_id->remote_addr.sin_addr.s_addr),
+ ntohs(cm_id->remote_addr.sin_port),
+ ntohl(cm_id->local_addr.sin_addr.s_addr),
+ ntohs(cm_id->local_addr.sin_port));
+
+ atomic_inc(&cm_connects);
+ nesqp->active_conn = 1;
+
+ /* cache the cm_id in the qp */
+ nesqp->cm_id = cm_id;
+
+ cm_id->provider_data = nesqp;
+
+ nesqp->private_data_len = conn_param->private_data_len;
+ nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32((u32)conn_param->ord);
+ nes_debug(NES_DBG_CM, "requested ord = 0x%08X.\n", (u32)conn_param->ord);
+ nes_debug(NES_DBG_CM, "mpa private data len =%u\n",
+ conn_param->private_data_len);
+
+ if (cm_id->local_addr.sin_addr.s_addr !=
+ cm_id->remote_addr.sin_addr.s_addr)
+ nes_manage_apbvt(nesvnic, ntohs(cm_id->local_addr.sin_port),
+ PCI_FUNC(nesdev->pcidev->devfn), NES_MANAGE_APBVT_ADD);
+
+ /* set up the connection params for the node */
+ cm_info.loc_addr = htonl(cm_id->local_addr.sin_addr.s_addr);
+ cm_info.loc_port = htons(cm_id->local_addr.sin_port);
+ cm_info.rem_addr = htonl(cm_id->remote_addr.sin_addr.s_addr);
+ cm_info.rem_port = htons(cm_id->remote_addr.sin_port);
+ cm_info.cm_id = cm_id;
+ cm_info.conn_type = NES_CM_IWARP_CONN_TYPE;
+
+ cm_id->add_ref(cm_id);
+
+ /* create a connect CM node connection */
+ cm_node = g_cm_core->api->connect(g_cm_core, nesvnic,
+ conn_param->private_data_len, (void *)conn_param->private_data,
+ &cm_info);
+ if (!cm_node) {
+ if (cm_id->local_addr.sin_addr.s_addr !=
+ cm_id->remote_addr.sin_addr.s_addr)
+ nes_manage_apbvt(nesvnic, ntohs(cm_id->local_addr.sin_port),
+ PCI_FUNC(nesdev->pcidev->devfn),
+ NES_MANAGE_APBVT_DEL);
+
+ cm_id->rem_ref(cm_id);
+ return -ENOMEM;
+ }
+
+ cm_node->apbvt_set = 1;
+ nesqp->cm_node = cm_node;
+ cm_node->nesqp = nesqp;
+ nes_add_ref(&nesqp->ibqp);
+
+ return 0;
+}
+
+
+/**
+ * nes_create_listen
+ */
+int nes_create_listen(struct iw_cm_id *cm_id, int backlog)
+{
+ struct nes_vnic *nesvnic;
+ struct nes_cm_listener *cm_node;
+ struct nes_cm_info cm_info;
+ struct nes_adapter *adapter;
+ int err;
+
+
+ nes_debug(NES_DBG_CM, "cm_id = %p, local port = 0x%04X.\n",
+ cm_id, ntohs(cm_id->local_addr.sin_port));
+
+ nesvnic = to_nesvnic(cm_id->device);
+ if (!nesvnic)
+ return -EINVAL;
+ adapter = nesvnic->nesdev->nesadapter;
+ nes_debug(NES_DBG_CM, "nesvnic=%p, netdev=%p, %s\n",
+ nesvnic, nesvnic->netdev, nesvnic->netdev->name);
+
+ nes_debug(NES_DBG_CM, "nesvnic->local_ipaddr=0x%08x, sin_addr.s_addr=0x%08x\n",
+ nesvnic->local_ipaddr, cm_id->local_addr.sin_addr.s_addr);
+
+ /* setup listen params in our api call struct */
+ cm_info.loc_addr = nesvnic->local_ipaddr;
+ cm_info.loc_port = cm_id->local_addr.sin_port;
+ cm_info.backlog = backlog;
+ cm_info.cm_id = cm_id;
+
+ cm_info.conn_type = NES_CM_IWARP_CONN_TYPE;
+
+
+ cm_node = g_cm_core->api->listen(g_cm_core, nesvnic, &cm_info);
+ if (!cm_node) {
+ printk(KERN_ERR "%s[%u] Error returned from listen API call\n",
+ __func__, __LINE__);
+ return -ENOMEM;
+ }
+
+ cm_id->provider_data = cm_node;
+
+ if (!cm_node->reused_node) {
+ err = nes_manage_apbvt(nesvnic,
+ ntohs(cm_id->local_addr.sin_port),
+ PCI_FUNC(nesvnic->nesdev->pcidev->devfn),
+ NES_MANAGE_APBVT_ADD);
+ if (err) {
+ printk(KERN_ERR "nes_manage_apbvt call returned %d.\n",
+ err);
+ g_cm_core->api->stop_listener(g_cm_core, (void *)cm_node);
+ return err;
+ }
+ cm_listens_created++;
+ }
+
+ cm_id->add_ref(cm_id);
+ cm_id->provider_data = (void *)cm_node;
+
+
+ return 0;
+}
+
+
+/**
+ * nes_destroy_listen
+ */
+int nes_destroy_listen(struct iw_cm_id *cm_id)
+{
+ if (cm_id->provider_data)
+ g_cm_core->api->stop_listener(g_cm_core, cm_id->provider_data);
+ else
+ nes_debug(NES_DBG_CM, "cm_id->provider_data was NULL\n");
+
+ cm_id->rem_ref(cm_id);
+
+ return 0;
+}
+
+
+/**
+ * nes_cm_recv
+ */
+int nes_cm_recv(struct sk_buff *skb, struct net_device *netdevice)
+{
+ cm_packets_received++;
+ if ((g_cm_core) && (g_cm_core->api)) {
+ g_cm_core->api->recv_pkt(g_cm_core, netdev_priv(netdevice), skb);
+ } else {
+ nes_debug(NES_DBG_CM, "Unable to process packet for CM,"
+ " cm is not setup properly.\n");
+ }
+
+ return 0;
+}
+
+
+/**
+ * nes_cm_start
+ * Start and init a cm core module
+ */
+int nes_cm_start(void)
+{
+ nes_debug(NES_DBG_CM, "\n");
+ /* create the primary CM core, pass this handle to subsequent core inits */
+ g_cm_core = nes_cm_alloc_core();
+ if (g_cm_core) {
+ return 0;
+ } else {
+ return -ENOMEM;
+ }
+}
+
+
+/**
+ * nes_cm_stop
+ * stop and dealloc all cm core instances
+ */
+int nes_cm_stop(void)
+{
+ g_cm_core->api->destroy_cm_core(g_cm_core);
+ return 0;
+}
+
+
+/**
+ * cm_event_connected
+ * handle a connected event, setup QPs and HW
+ */
+static void cm_event_connected(struct nes_cm_event *event)
+{
+ u64 u64temp;
+ struct nes_qp *nesqp;
+ struct nes_vnic *nesvnic;
+ struct nes_device *nesdev;
+ struct nes_cm_node *cm_node;
+ struct nes_adapter *nesadapter;
+ struct ib_qp_attr attr;
+ struct iw_cm_id *cm_id;
+ struct iw_cm_event cm_event;
+ struct nes_hw_qp_wqe *wqe;
+ struct nes_v4_quad nes_quad;
+ u32 crc_value;
+ int ret;
+
+ /* get all our handles */
+ cm_node = event->cm_node;
+ cm_id = cm_node->cm_id;
+ nes_debug(NES_DBG_CM, "cm_event_connected - %p - cm_id = %p\n", cm_node, cm_id);
+ nesqp = (struct nes_qp *)cm_id->provider_data;
+ nesvnic = to_nesvnic(nesqp->ibqp.device);
+ nesdev = nesvnic->nesdev;
+ nesadapter = nesdev->nesadapter;
+
+ if (nesqp->destroyed) {
+ return;
+ }
+ atomic_inc(&cm_connecteds);
+ nes_debug(NES_DBG_CM, "QP%u attempting to connect to 0x%08X:0x%04X on"
+ " local port 0x%04X. jiffies = %lu.\n",
+ nesqp->hwqp.qp_id,
+ ntohl(cm_id->remote_addr.sin_addr.s_addr),
+ ntohs(cm_id->remote_addr.sin_port),
+ ntohs(cm_id->local_addr.sin_port),
+ jiffies);
+
+ nes_cm_init_tsa_conn(nesqp, cm_node);
+
+ /* set the QP tsa context */
+ nesqp->nesqp_context->tcpPorts[0] =
+ cpu_to_le16(ntohs(cm_id->local_addr.sin_port));
+ nesqp->nesqp_context->tcpPorts[1] =
+ cpu_to_le16(ntohs(cm_id->remote_addr.sin_port));
+ if (ipv4_is_loopback(cm_id->remote_addr.sin_addr.s_addr))
+ nesqp->nesqp_context->ip0 =
+ cpu_to_le32(ntohl(nesvnic->local_ipaddr));
+ else
+ nesqp->nesqp_context->ip0 =
+ cpu_to_le32(ntohl(cm_id->remote_addr.sin_addr.s_addr));
+
+ nesqp->nesqp_context->misc2 |= cpu_to_le32(
+ (u32)PCI_FUNC(nesdev->pcidev->devfn) <<
+ NES_QPCONTEXT_MISC2_SRC_IP_SHIFT);
+ nesqp->nesqp_context->arp_index_vlan |= cpu_to_le32(
+ nes_arp_table(nesdev,
+ le32_to_cpu(nesqp->nesqp_context->ip0),
+ NULL, NES_ARP_RESOLVE) << 16);
+ nesqp->nesqp_context->ts_val_delta = cpu_to_le32(
+ jiffies - nes_read_indexed(nesdev, NES_IDX_TCP_NOW));
+ nesqp->nesqp_context->ird_index = cpu_to_le32(nesqp->hwqp.qp_id);
+ nesqp->nesqp_context->ird_ord_sizes |=
+ cpu_to_le32((u32)1 <<
+ NES_QPCONTEXT_ORDIRD_IWARP_MODE_SHIFT);
+
+ /* Adjust tail for not having a LSMM */
+ nesqp->hwqp.sq_tail = 1;
+
+#if defined(NES_SEND_FIRST_WRITE)
+ if (cm_node->send_write0) {
+ nes_debug(NES_DBG_CM, "Sending first write.\n");
+ wqe = &nesqp->hwqp.sq_vbase[0];
+ u64temp = (unsigned long)nesqp;
+ u64temp |= NES_SW_CONTEXT_ALIGN>>1;
+ set_wqe_64bit_value(wqe->wqe_words,
+ NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX, u64temp);
+ wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] =
+ cpu_to_le32(NES_IWARP_SQ_OP_RDMAW);
+ wqe->wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX] = 0;
+ wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_LOW_IDX] = 0;
+ wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_HIGH_IDX] = 0;
+ wqe->wqe_words[NES_IWARP_SQ_WQE_LENGTH0_IDX] = 0;
+ wqe->wqe_words[NES_IWARP_SQ_WQE_STAG0_IDX] = 0;
+
+ /* use the reserved spot on the WQ for the extra first WQE */
+ nesqp->nesqp_context->ird_ord_sizes &=
+ cpu_to_le32(~(NES_QPCONTEXT_ORDIRD_LSMM_PRESENT |
+ NES_QPCONTEXT_ORDIRD_WRPDU |
+ NES_QPCONTEXT_ORDIRD_ALSMM));
+ nesqp->skip_lsmm = 1;
+ nesqp->hwqp.sq_tail = 0;
+ nes_write32(nesdev->regs + NES_WQE_ALLOC,
+ (1 << 24) | 0x00800000 | nesqp->hwqp.qp_id);
+ }
+#endif
+
+ memset(&nes_quad, 0, sizeof(nes_quad));
+
+ nes_quad.DstIpAdrIndex =
+ cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24);
+ if (ipv4_is_loopback(cm_id->remote_addr.sin_addr.s_addr))
+ nes_quad.SrcIpadr = nesvnic->local_ipaddr;
+ else
+ nes_quad.SrcIpadr = cm_id->remote_addr.sin_addr.s_addr;
+ nes_quad.TcpPorts[0] = cm_id->remote_addr.sin_port;
+ nes_quad.TcpPorts[1] = cm_id->local_addr.sin_port;
+
+ /* Produce hash key */
+ crc_value = get_crc_value(&nes_quad);
+ nesqp->hte_index = cpu_to_be32(crc_value ^ 0xffffffff);
+ nes_debug(NES_DBG_CM, "HTE Index = 0x%08X, After CRC = 0x%08X\n",
+ nesqp->hte_index, nesqp->hte_index & nesadapter->hte_index_mask);
+
+ nesqp->hte_index &= nesadapter->hte_index_mask;
+ nesqp->nesqp_context->hte_index = cpu_to_le32(nesqp->hte_index);
+
+ nesqp->ietf_frame = &cm_node->mpa_frame;
+ nesqp->private_data_len = (u8) cm_node->mpa_frame_size;
+ cm_node->cm_core->api->accelerated(cm_node->cm_core, cm_node);
+
+ /* notify OF layer we successfully created the requested connection */
+ cm_event.event = IW_CM_EVENT_CONNECT_REPLY;
+ cm_event.status = IW_CM_EVENT_STATUS_ACCEPTED;
+ cm_event.provider_data = cm_id->provider_data;
+ cm_event.local_addr.sin_family = AF_INET;
+ cm_event.local_addr.sin_port = cm_id->local_addr.sin_port;
+ cm_event.remote_addr = cm_id->remote_addr;
+
+ cm_event.private_data = (void *)event->cm_node->mpa_frame_buf;
+ cm_event.private_data_len = (u8) event->cm_node->mpa_frame_size;
+
+ cm_event.local_addr.sin_addr.s_addr = event->cm_info.rem_addr;
+ ret = cm_id->event_handler(cm_id, &cm_event);
+ nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret);
+
+ if (ret)
+ printk(KERN_ERR "%s[%u] OFA CM event_handler returned, "
+ "ret=%d\n", __func__, __LINE__, ret);
+ attr.qp_state = IB_QPS_RTS;
+ nes_modify_qp(&nesqp->ibqp, &attr, IB_QP_STATE, NULL);
+
+ nes_debug(NES_DBG_CM, "Exiting connect thread for QP%u. jiffies = "
+ "%lu\n", nesqp->hwqp.qp_id, jiffies);
+
+ return;
+}
+
+
+/**
+ * cm_event_connect_error
+ */
+static void cm_event_connect_error(struct nes_cm_event *event)
+{
+ struct nes_qp *nesqp;
+ struct iw_cm_id *cm_id;
+ struct iw_cm_event cm_event;
+ /* struct nes_cm_info cm_info; */
+ int ret;
+
+ if (!event->cm_node)
+ return;
+
+ cm_id = event->cm_node->cm_id;
+ if (!cm_id) {
+ return;
+ }
+
+ nes_debug(NES_DBG_CM, "cm_node=%p, cm_id=%p\n", event->cm_node, cm_id);
+ nesqp = cm_id->provider_data;
+
+ if (!nesqp) {
+ return;
+ }
+
+ /* notify OF layer about this connection error event */
+ /* cm_id->rem_ref(cm_id); */
+ nesqp->cm_id = NULL;
+ cm_id->provider_data = NULL;
+ cm_event.event = IW_CM_EVENT_CONNECT_REPLY;
+ cm_event.status = IW_CM_EVENT_STATUS_REJECTED;
+ cm_event.provider_data = cm_id->provider_data;
+ cm_event.local_addr = cm_id->local_addr;
+ cm_event.remote_addr = cm_id->remote_addr;
+ cm_event.private_data = NULL;
+ cm_event.private_data_len = 0;
+
+ nes_debug(NES_DBG_CM, "call CM_EVENT REJECTED, local_addr=%08x, "
+ "remove_addr=%08x\n", cm_event.local_addr.sin_addr.s_addr,
+ cm_event.remote_addr.sin_addr.s_addr);
+
+ ret = cm_id->event_handler(cm_id, &cm_event);
+ nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret);
+ if (ret)
+ printk(KERN_ERR "%s[%u] OFA CM event_handler returned, "
+ "ret=%d\n", __func__, __LINE__, ret);
+ cm_id->rem_ref(cm_id);
+
+ rem_ref_cm_node(event->cm_node->cm_core, event->cm_node);
+ return;
+}
+
+
+/**
+ * cm_event_reset
+ */
+static void cm_event_reset(struct nes_cm_event *event)
+{
+ struct nes_qp *nesqp;
+ struct iw_cm_id *cm_id;
+ struct iw_cm_event cm_event;
+ /* struct nes_cm_info cm_info; */
+ int ret;
+
+ if (!event->cm_node)
+ return;
+
+ if (!event->cm_node->cm_id)
+ return;
+
+ cm_id = event->cm_node->cm_id;
+
+ nes_debug(NES_DBG_CM, "%p - cm_id = %p\n", event->cm_node, cm_id);
+ nesqp = cm_id->provider_data;
+
+ nesqp->cm_id = NULL;
+ /* cm_id->provider_data = NULL; */
+ cm_event.event = IW_CM_EVENT_DISCONNECT;
+ cm_event.status = IW_CM_EVENT_STATUS_RESET;
+ cm_event.provider_data = cm_id->provider_data;
+ cm_event.local_addr = cm_id->local_addr;
+ cm_event.remote_addr = cm_id->remote_addr;
+ cm_event.private_data = NULL;
+ cm_event.private_data_len = 0;
+
+ ret = cm_id->event_handler(cm_id, &cm_event);
+ nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret);
+
+
+ /* notify OF layer about this connection error event */
+ cm_id->rem_ref(cm_id);
+
+ return;
+}
+
+
+/**
+ * cm_event_mpa_req
+ */
+static void cm_event_mpa_req(struct nes_cm_event *event)
+{
+ struct iw_cm_id *cm_id;
+ struct iw_cm_event cm_event;
+ int ret;
+ struct nes_cm_node *cm_node;
+
+ cm_node = event->cm_node;
+ if (!cm_node)
+ return;
+ cm_id = cm_node->cm_id;
+
+ atomic_inc(&cm_connect_reqs);
+ nes_debug(NES_DBG_CM, "cm_node = %p - cm_id = %p, jiffies = %lu\n",
+ cm_node, cm_id, jiffies);
+
+ cm_event.event = IW_CM_EVENT_CONNECT_REQUEST;
+ cm_event.status = IW_CM_EVENT_STATUS_OK;
+ cm_event.provider_data = (void *)cm_node;
+
+ cm_event.local_addr.sin_family = AF_INET;
+ cm_event.local_addr.sin_port = htons(event->cm_info.loc_port);
+ cm_event.local_addr.sin_addr.s_addr = htonl(event->cm_info.loc_addr);
+
+ cm_event.remote_addr.sin_family = AF_INET;
+ cm_event.remote_addr.sin_port = htons(event->cm_info.rem_port);
+ cm_event.remote_addr.sin_addr.s_addr = htonl(event->cm_info.rem_addr);
+
+ cm_event.private_data = cm_node->mpa_frame_buf;
+ cm_event.private_data_len = (u8) cm_node->mpa_frame_size;
+
+ ret = cm_id->event_handler(cm_id, &cm_event);
+ if (ret)
+ printk("%s[%u] OFA CM event_handler returned, ret=%d\n",
+ __func__, __LINE__, ret);
+
+ return;
+}
+
+
+static void nes_cm_event_handler(struct work_struct *);
+
+/**
+ * nes_cm_post_event
+ * post an event to the cm event handler
+ */
+static int nes_cm_post_event(struct nes_cm_event *event)
+{
+ atomic_inc(&event->cm_node->cm_core->events_posted);
+ add_ref_cm_node(event->cm_node);
+ event->cm_info.cm_id->add_ref(event->cm_info.cm_id);
+ INIT_WORK(&event->event_work, nes_cm_event_handler);
+ nes_debug(NES_DBG_CM, "cm_node=%p queue_work, event=%p\n",
+ event->cm_node, event);
+
+ queue_work(event->cm_node->cm_core->event_wq, &event->event_work);
+
+ nes_debug(NES_DBG_CM, "Exit\n");
+ return 0;
+}
+
+
+/**
+ * nes_cm_event_handler
+ * worker function to handle cm events
+ * will free instance of nes_cm_event
+ */
+static void nes_cm_event_handler(struct work_struct *work)
+{
+ struct nes_cm_event *event = container_of(work, struct nes_cm_event,
+ event_work);
+ struct nes_cm_core *cm_core;
+
+ if ((!event) || (!event->cm_node) || (!event->cm_node->cm_core))
+ return;
+
+ cm_core = event->cm_node->cm_core;
+ nes_debug(NES_DBG_CM, "event=%p, event->type=%u, events posted=%u\n",
+ event, event->type, atomic_read(&cm_core->events_posted));
+
+ switch (event->type) {
+ case NES_CM_EVENT_MPA_REQ:
+ cm_event_mpa_req(event);
+ nes_debug(NES_DBG_CM, "cm_node=%p CM Event: MPA REQUEST\n",
+ event->cm_node);
+ break;
+ case NES_CM_EVENT_RESET:
+ nes_debug(NES_DBG_CM, "cm_node = %p CM Event: RESET\n",
+ event->cm_node);
+ cm_event_reset(event);
+ break;
+ case NES_CM_EVENT_CONNECTED:
+ if ((!event->cm_node->cm_id) ||
+ (event->cm_node->state != NES_CM_STATE_TSA))
+ break;
+ cm_event_connected(event);
+ nes_debug(NES_DBG_CM, "CM Event: CONNECTED\n");
+ break;
+ case NES_CM_EVENT_ABORTED:
+ if ((!event->cm_node->cm_id) ||
+ (event->cm_node->state == NES_CM_STATE_TSA))
+ break;
+ cm_event_connect_error(event);
+ nes_debug(NES_DBG_CM, "CM Event: ABORTED\n");
+ break;
+ case NES_CM_EVENT_DROPPED_PKT:
+ nes_debug(NES_DBG_CM, "CM Event: DROPPED PKT\n");
+ break;
+ default:
+ nes_debug(NES_DBG_CM, "CM Event: UNKNOWN EVENT TYPE\n");
+ break;
+ }
+
+ atomic_dec(&cm_core->events_posted);
+ event->cm_info.cm_id->rem_ref(event->cm_info.cm_id);
+ rem_ref_cm_node(cm_core, event->cm_node);
+ kfree(event);
+
+ return;
+}
diff --git a/drivers/infiniband/hw/nes/nes_cm.h b/drivers/infiniband/hw/nes/nes_cm.h
new file mode 100644
index 0000000..367b3d2
--- /dev/null
+++ b/drivers/infiniband/hw/nes/nes_cm.h
@@ -0,0 +1,415 @@
+/*
+ * Copyright (c) 2006 - 2008 NetEffect, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef NES_CM_H
+#define NES_CM_H
+
+#define QUEUE_EVENTS
+
+#define NES_MANAGE_APBVT_DEL 0
+#define NES_MANAGE_APBVT_ADD 1
+
+/* IETF MPA -- defines, enums, structs */
+#define IEFT_MPA_KEY_REQ "MPA ID Req Frame"
+#define IEFT_MPA_KEY_REP "MPA ID Rep Frame"
+#define IETF_MPA_KEY_SIZE 16
+#define IETF_MPA_VERSION 1
+
+enum ietf_mpa_flags {
+ IETF_MPA_FLAGS_MARKERS = 0x80, /* receive Markers */
+ IETF_MPA_FLAGS_CRC = 0x40, /* receive Markers */
+ IETF_MPA_FLAGS_REJECT = 0x20, /* Reject */
+};
+
+struct ietf_mpa_frame {
+ u8 key[IETF_MPA_KEY_SIZE];
+ u8 flags;
+ u8 rev;
+ __be16 priv_data_len;
+ u8 priv_data[0];
+};
+
+#define ietf_mpa_req_resp_frame ietf_mpa_frame
+
+struct nes_v4_quad {
+ u32 rsvd0;
+ __le32 DstIpAdrIndex; /* Only most significant 5 bits are valid */
+ __be32 SrcIpadr;
+ __be16 TcpPorts[2]; /* src is low, dest is high */
+};
+
+struct nes_cm_node;
+enum nes_timer_type {
+ NES_TIMER_TYPE_SEND,
+ NES_TIMER_TYPE_RECV,
+ NES_TIMER_NODE_CLEANUP,
+ NES_TIMER_TYPE_CLOSE,
+};
+
+#define MAX_NES_IFS 4
+
+#define SET_ACK 1
+#define SET_SYN 2
+#define SET_FIN 4
+#define SET_RST 8
+
+#define TCP_OPTIONS_PADDING 3
+
+struct option_base {
+ u8 optionnum;
+ u8 length;
+};
+
+enum option_numbers {
+ OPTION_NUMBER_END,
+ OPTION_NUMBER_NONE,
+ OPTION_NUMBER_MSS,
+ OPTION_NUMBER_WINDOW_SCALE,
+ OPTION_NUMBER_SACK_PERM,
+ OPTION_NUMBER_SACK,
+ OPTION_NUMBER_WRITE0 = 0xbc
+};
+
+struct option_mss {
+ u8 optionnum;
+ u8 length;
+ __be16 mss;
+};
+
+struct option_windowscale {
+ u8 optionnum;
+ u8 length;
+ u8 shiftcount;
+};
+
+union all_known_options {
+ char as_end;
+ struct option_base as_base;
+ struct option_mss as_mss;
+ struct option_windowscale as_windowscale;
+};
+
+struct nes_timer_entry {
+ struct list_head list;
+ unsigned long timetosend; /* jiffies */
+ struct sk_buff *skb;
+ u32 type;
+ u32 retrycount;
+ u32 retranscount;
+ u32 context;
+ u32 seq_num;
+ u32 send_retrans;
+ int close_when_complete;
+ struct net_device *netdev;
+};
+
+#define NES_DEFAULT_RETRYS 64
+#define NES_DEFAULT_RETRANS 8
+#ifdef CONFIG_INFINIBAND_NES_DEBUG
+#define NES_RETRY_TIMEOUT (1000*HZ/1000)
+#else
+#define NES_RETRY_TIMEOUT (3000*HZ/1000)
+#endif
+#define NES_SHORT_TIME (10)
+#define NES_LONG_TIME (2000*HZ/1000)
+
+#define NES_CM_HASHTABLE_SIZE 1024
+#define NES_CM_TCP_TIMER_INTERVAL 3000
+#define NES_CM_DEFAULT_MTU 1540
+#define NES_CM_DEFAULT_FRAME_CNT 10
+#define NES_CM_THREAD_STACK_SIZE 256
+#define NES_CM_DEFAULT_RCV_WND 64240 // before we know that window scaling is allowed
+#define NES_CM_DEFAULT_RCV_WND_SCALED 256960 // after we know that window scaling is allowed
+#define NES_CM_DEFAULT_RCV_WND_SCALE 2
+#define NES_CM_DEFAULT_FREE_PKTS 0x000A
+#define NES_CM_FREE_PKT_LO_WATERMARK 2
+
+#define NES_CM_DEFAULT_MSS 536
+
+#define NES_CM_DEF_SEQ 0x159bf75f
+#define NES_CM_DEF_LOCAL_ID 0x3b47
+
+#define NES_CM_DEF_SEQ2 0x18ed5740
+#define NES_CM_DEF_LOCAL_ID2 0xb807
+
+typedef u32 nes_addr_t;
+
+#define nes_cm_tsa_context nes_qp_context
+
+struct nes_qp;
+
+/* cm node transition states */
+enum nes_cm_node_state {
+ NES_CM_STATE_UNKNOWN,
+ NES_CM_STATE_INITED,
+ NES_CM_STATE_LISTENING,
+ NES_CM_STATE_SYN_RCVD,
+ NES_CM_STATE_SYN_SENT,
+ NES_CM_STATE_ONE_SIDE_ESTABLISHED,
+ NES_CM_STATE_ESTABLISHED,
+ NES_CM_STATE_ACCEPTING,
+ NES_CM_STATE_MPAREQ_SENT,
+ NES_CM_STATE_MPAREQ_RCVD,
+ NES_CM_STATE_TSA,
+ NES_CM_STATE_FIN_WAIT1,
+ NES_CM_STATE_FIN_WAIT2,
+ NES_CM_STATE_CLOSE_WAIT,
+ NES_CM_STATE_TIME_WAIT,
+ NES_CM_STATE_LAST_ACK,
+ NES_CM_STATE_CLOSING,
+ NES_CM_STATE_CLOSED
+};
+
+enum nes_tcpip_pkt_type {
+ NES_PKT_TYPE_UNKNOWN,
+ NES_PKT_TYPE_SYN,
+ NES_PKT_TYPE_SYNACK,
+ NES_PKT_TYPE_ACK,
+ NES_PKT_TYPE_FIN,
+ NES_PKT_TYPE_RST
+};
+
+
+/* type of nes connection */
+enum nes_cm_conn_type {
+ NES_CM_IWARP_CONN_TYPE,
+};
+
+/* CM context params */
+struct nes_cm_tcp_context {
+ u8 client;
+
+ u32 loc_seq_num;
+ u32 loc_ack_num;
+ u32 rem_ack_num;
+ u32 rcv_nxt;
+
+ u32 loc_id;
+ u32 rem_id;
+
+ u32 snd_wnd;
+ u32 max_snd_wnd;
+
+ u32 rcv_wnd;
+ u32 mss;
+ u8 snd_wscale;
+ u8 rcv_wscale;
+
+ struct nes_cm_tsa_context tsa_cntxt;
+ struct timeval sent_ts;
+};
+
+
+enum nes_cm_listener_state {
+ NES_CM_LISTENER_PASSIVE_STATE=1,
+ NES_CM_LISTENER_ACTIVE_STATE=2,
+ NES_CM_LISTENER_EITHER_STATE=3
+};
+
+struct nes_cm_listener {
+ struct list_head list;
+ struct nes_cm_core *cm_core;
+ u8 loc_mac[ETH_ALEN];
+ nes_addr_t loc_addr;
+ u16 loc_port;
+ struct iw_cm_id *cm_id;
+ enum nes_cm_conn_type conn_type;
+ atomic_t ref_count;
+ struct nes_vnic *nesvnic;
+ atomic_t pend_accepts_cnt;
+ int backlog;
+ enum nes_cm_listener_state listener_state;
+ u32 reused_node;
+};
+
+/* per connection node and node state information */
+struct nes_cm_node {
+ u32 hashkey;
+
+ nes_addr_t loc_addr, rem_addr;
+ u16 loc_port, rem_port;
+
+ u8 loc_mac[ETH_ALEN];
+ u8 rem_mac[ETH_ALEN];
+
+ enum nes_cm_node_state state;
+ struct nes_cm_tcp_context tcp_cntxt;
+ struct nes_cm_core *cm_core;
+ struct sk_buff_head resend_list;
+ atomic_t ref_count;
+ struct net_device *netdev;
+
+ struct nes_cm_node *loopbackpartner;
+
+ struct nes_timer_entry *send_entry;
+
+ spinlock_t retrans_list_lock;
+ struct list_head recv_list;
+ spinlock_t recv_list_lock;
+
+ int send_write0;
+ union {
+ struct ietf_mpa_frame mpa_frame;
+ u8 mpa_frame_buf[NES_CM_DEFAULT_MTU];
+ };
+ u16 mpa_frame_size;
+ struct iw_cm_id *cm_id;
+ struct list_head list;
+ int accelerated;
+ struct nes_cm_listener *listener;
+ enum nes_cm_conn_type conn_type;
+ struct nes_vnic *nesvnic;
+ int apbvt_set;
+ int accept_pend;
+ int freed;
+ struct nes_qp *nesqp;
+};
+
+/* structure for client or CM to fill when making CM api calls. */
+/* - only need to set relevant data, based on op. */
+struct nes_cm_info {
+ union {
+ struct iw_cm_id *cm_id;
+ struct net_device *netdev;
+ };
+
+ u16 loc_port;
+ u16 rem_port;
+ nes_addr_t loc_addr;
+ nes_addr_t rem_addr;
+
+ enum nes_cm_conn_type conn_type;
+ int backlog;
+};
+
+/* CM event codes */
+enum nes_cm_event_type {
+ NES_CM_EVENT_UNKNOWN,
+ NES_CM_EVENT_ESTABLISHED,
+ NES_CM_EVENT_MPA_REQ,
+ NES_CM_EVENT_MPA_CONNECT,
+ NES_CM_EVENT_MPA_ACCEPT,
+ NES_CM_EVENT_MPA_ESTABLISHED,
+ NES_CM_EVENT_CONNECTED,
+ NES_CM_EVENT_CLOSED,
+ NES_CM_EVENT_RESET,
+ NES_CM_EVENT_DROPPED_PKT,
+ NES_CM_EVENT_CLOSE_IMMED,
+ NES_CM_EVENT_CLOSE_HARD,
+ NES_CM_EVENT_CLOSE_CLEAN,
+ NES_CM_EVENT_ABORTED,
+ NES_CM_EVENT_SEND_FIRST
+};
+
+/* event to post to CM event handler */
+struct nes_cm_event {
+ enum nes_cm_event_type type;
+
+ struct nes_cm_info cm_info;
+ struct work_struct event_work;
+ struct nes_cm_node *cm_node;
+};
+
+struct nes_cm_core {
+ enum nes_cm_node_state state;
+
+ atomic_t listen_node_cnt;
+ struct nes_cm_node listen_list;
+ spinlock_t listen_list_lock;
+
+ u32 mtu;
+ u32 free_tx_pkt_max;
+ u32 rx_pkt_posted;
+ struct sk_buff_head tx_free_list;
+ atomic_t ht_node_cnt;
+ struct list_head connected_nodes;
+ /* struct list_head hashtable[NES_CM_HASHTABLE_SIZE]; */
+ spinlock_t ht_lock;
+
+ struct timer_list tcp_timer;
+
+ struct nes_cm_ops *api;
+
+ int (*post_event)(struct nes_cm_event *event);
+ atomic_t events_posted;
+ struct workqueue_struct *event_wq;
+ struct workqueue_struct *disconn_wq;
+
+ atomic_t node_cnt;
+ u64 aborted_connects;
+ u32 options;
+
+ struct nes_cm_node *current_listen_node;
+};
+
+
+#define NES_CM_SET_PKT_SIZE (1 << 1)
+#define NES_CM_SET_FREE_PKT_Q_SIZE (1 << 2)
+
+/* CM ops/API for client interface */
+struct nes_cm_ops {
+ int (*accelerated)(struct nes_cm_core *, struct nes_cm_node *);
+ struct nes_cm_listener * (*listen)(struct nes_cm_core *, struct nes_vnic *,
+ struct nes_cm_info *);
+ int (*stop_listener)(struct nes_cm_core *, struct nes_cm_listener *);
+ struct nes_cm_node * (*connect)(struct nes_cm_core *,
+ struct nes_vnic *, u16, void *,
+ struct nes_cm_info *);
+ int (*close)(struct nes_cm_core *, struct nes_cm_node *);
+ int (*accept)(struct nes_cm_core *, struct ietf_mpa_frame *,
+ struct nes_cm_node *);
+ int (*reject)(struct nes_cm_core *, struct ietf_mpa_frame *,
+ struct nes_cm_node *);
+ void (*recv_pkt)(struct nes_cm_core *, struct nes_vnic *,
+ struct sk_buff *);
+ int (*destroy_cm_core)(struct nes_cm_core *);
+ int (*get)(struct nes_cm_core *);
+ int (*set)(struct nes_cm_core *, u32, u32);
+};
+
+int schedule_nes_timer(struct nes_cm_node *, struct sk_buff *,
+ enum nes_timer_type, int, int);
+
+int nes_cm_disconn(struct nes_qp *);
+
+int nes_accept(struct iw_cm_id *, struct iw_cm_conn_param *);
+int nes_reject(struct iw_cm_id *, const void *, u8);
+int nes_connect(struct iw_cm_id *, struct iw_cm_conn_param *);
+int nes_create_listen(struct iw_cm_id *, int);
+int nes_destroy_listen(struct iw_cm_id *);
+
+int nes_cm_recv(struct sk_buff *, struct net_device *);
+int nes_cm_start(void);
+int nes_cm_stop(void);
+
+#endif /* NES_CM_H */
diff --git a/drivers/infiniband/hw/nes/nes_context.h b/drivers/infiniband/hw/nes/nes_context.h
new file mode 100644
index 0000000..da9daba
--- /dev/null
+++ b/drivers/infiniband/hw/nes/nes_context.h
@@ -0,0 +1,193 @@
+/*
+ * Copyright (c) 2006 - 2008 NetEffect, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef NES_CONTEXT_H
+#define NES_CONTEXT_H
+
+struct nes_qp_context {
+ __le32 misc;
+ __le32 cqs;
+ __le32 sq_addr_low;
+ __le32 sq_addr_high;
+ __le32 rq_addr_low;
+ __le32 rq_addr_high;
+ __le32 misc2;
+ __le16 tcpPorts[2];
+ __le32 ip0;
+ __le32 ip1;
+ __le32 ip2;
+ __le32 ip3;
+ __le32 mss;
+ __le32 arp_index_vlan;
+ __le32 tcp_state_flow_label;
+ __le32 pd_index_wscale;
+ __le32 keepalive;
+ u32 ts_recent;
+ u32 ts_age;
+ __le32 snd_nxt;
+ __le32 snd_wnd;
+ __le32 rcv_nxt;
+ __le32 rcv_wnd;
+ __le32 snd_max;
+ __le32 snd_una;
+ u32 srtt;
+ __le32 rttvar;
+ __le32 ssthresh;
+ __le32 cwnd;
+ __le32 snd_wl1;
+ __le32 snd_wl2;
+ __le32 max_snd_wnd;
+ __le32 ts_val_delta;
+ u32 retransmit;
+ u32 probe_cnt;
+ u32 hte_index;
+ __le32 q2_addr_low;
+ __le32 q2_addr_high;
+ __le32 ird_index;
+ u32 Rsvd3;
+ __le32 ird_ord_sizes;
+ u32 mrkr_offset;
+ __le32 aeq_token_low;
+ __le32 aeq_token_high;
+};
+
+/* QP Context Misc Field */
+
+#define NES_QPCONTEXT_MISC_IWARP_VER_MASK 0x00000003
+#define NES_QPCONTEXT_MISC_IWARP_VER_SHIFT 0
+#define NES_QPCONTEXT_MISC_EFB_SIZE_MASK 0x000000C0
+#define NES_QPCONTEXT_MISC_EFB_SIZE_SHIFT 6
+#define NES_QPCONTEXT_MISC_RQ_SIZE_MASK 0x00000300
+#define NES_QPCONTEXT_MISC_RQ_SIZE_SHIFT 8
+#define NES_QPCONTEXT_MISC_SQ_SIZE_MASK 0x00000c00
+#define NES_QPCONTEXT_MISC_SQ_SIZE_SHIFT 10
+#define NES_QPCONTEXT_MISC_PCI_FCN_MASK 0x00007000
+#define NES_QPCONTEXT_MISC_PCI_FCN_SHIFT 12
+#define NES_QPCONTEXT_MISC_DUP_ACKS_MASK 0x00070000
+#define NES_QPCONTEXT_MISC_DUP_ACKS_SHIFT 16
+
+enum nes_qp_context_misc_bits {
+ NES_QPCONTEXT_MISC_RX_WQE_SIZE = 0x00000004,
+ NES_QPCONTEXT_MISC_IPV4 = 0x00000008,
+ NES_QPCONTEXT_MISC_DO_NOT_FRAG = 0x00000010,
+ NES_QPCONTEXT_MISC_INSERT_VLAN = 0x00000020,
+ NES_QPCONTEXT_MISC_DROS = 0x00008000,
+ NES_QPCONTEXT_MISC_WSCALE = 0x00080000,
+ NES_QPCONTEXT_MISC_KEEPALIVE = 0x00100000,
+ NES_QPCONTEXT_MISC_TIMESTAMP = 0x00200000,
+ NES_QPCONTEXT_MISC_SACK = 0x00400000,
+ NES_QPCONTEXT_MISC_RDMA_WRITE_EN = 0x00800000,
+ NES_QPCONTEXT_MISC_RDMA_READ_EN = 0x01000000,
+ NES_QPCONTEXT_MISC_WBIND_EN = 0x10000000,
+ NES_QPCONTEXT_MISC_FAST_REGISTER_EN = 0x20000000,
+ NES_QPCONTEXT_MISC_PRIV_EN = 0x40000000,
+ NES_QPCONTEXT_MISC_NO_NAGLE = 0x80000000
+};
+
+enum nes_qp_acc_wq_sizes {
+ HCONTEXT_TSA_WQ_SIZE_4 = 0,
+ HCONTEXT_TSA_WQ_SIZE_32 = 1,
+ HCONTEXT_TSA_WQ_SIZE_128 = 2,
+ HCONTEXT_TSA_WQ_SIZE_512 = 3
+};
+
+/* QP Context Misc2 Fields */
+#define NES_QPCONTEXT_MISC2_TTL_MASK 0x000000ff
+#define NES_QPCONTEXT_MISC2_TTL_SHIFT 0
+#define NES_QPCONTEXT_MISC2_HOP_LIMIT_MASK 0x000000ff
+#define NES_QPCONTEXT_MISC2_HOP_LIMIT_SHIFT 0
+#define NES_QPCONTEXT_MISC2_LIMIT_MASK 0x00000300
+#define NES_QPCONTEXT_MISC2_LIMIT_SHIFT 8
+#define NES_QPCONTEXT_MISC2_NIC_INDEX_MASK 0x0000fc00
+#define NES_QPCONTEXT_MISC2_NIC_INDEX_SHIFT 10
+#define NES_QPCONTEXT_MISC2_SRC_IP_MASK 0x001f0000
+#define NES_QPCONTEXT_MISC2_SRC_IP_SHIFT 16
+#define NES_QPCONTEXT_MISC2_TOS_MASK 0xff000000
+#define NES_QPCONTEXT_MISC2_TOS_SHIFT 24
+#define NES_QPCONTEXT_MISC2_TRAFFIC_CLASS_MASK 0xff000000
+#define NES_QPCONTEXT_MISC2_TRAFFIC_CLASS_SHIFT 24
+
+/* QP Context Tcp State/Flow Label Fields */
+#define NES_QPCONTEXT_TCPFLOW_FLOW_LABEL_MASK 0x000fffff
+#define NES_QPCONTEXT_TCPFLOW_FLOW_LABEL_SHIFT 0
+#define NES_QPCONTEXT_TCPFLOW_TCP_STATE_MASK 0xf0000000
+#define NES_QPCONTEXT_TCPFLOW_TCP_STATE_SHIFT 28
+
+enum nes_qp_tcp_state {
+ NES_QPCONTEXT_TCPSTATE_CLOSED = 1,
+ NES_QPCONTEXT_TCPSTATE_EST = 5,
+ NES_QPCONTEXT_TCPSTATE_TIME_WAIT = 11,
+};
+
+/* QP Context PD Index/wscale Fields */
+#define NES_QPCONTEXT_PDWSCALE_RCV_WSCALE_MASK 0x0000000f
+#define NES_QPCONTEXT_PDWSCALE_RCV_WSCALE_SHIFT 0
+#define NES_QPCONTEXT_PDWSCALE_SND_WSCALE_MASK 0x00000f00
+#define NES_QPCONTEXT_PDWSCALE_SND_WSCALE_SHIFT 8
+#define NES_QPCONTEXT_PDWSCALE_PDINDEX_MASK 0xffff0000
+#define NES_QPCONTEXT_PDWSCALE_PDINDEX_SHIFT 16
+
+/* QP Context Keepalive Fields */
+#define NES_QPCONTEXT_KEEPALIVE_DELTA_MASK 0x0000ffff
+#define NES_QPCONTEXT_KEEPALIVE_DELTA_SHIFT 0
+#define NES_QPCONTEXT_KEEPALIVE_PROBE_CNT_MASK 0x00ff0000
+#define NES_QPCONTEXT_KEEPALIVE_PROBE_CNT_SHIFT 16
+#define NES_QPCONTEXT_KEEPALIVE_INTV_MASK 0xff000000
+#define NES_QPCONTEXT_KEEPALIVE_INTV_SHIFT 24
+
+/* QP Context ORD/IRD Fields */
+#define NES_QPCONTEXT_ORDIRD_ORDSIZE_MASK 0x0000007f
+#define NES_QPCONTEXT_ORDIRD_ORDSIZE_SHIFT 0
+#define NES_QPCONTEXT_ORDIRD_IRDSIZE_MASK 0x00030000
+#define NES_QPCONTEXT_ORDIRD_IRDSIZE_SHIFT 16
+#define NES_QPCONTEXT_ORDIRD_IWARP_MODE_MASK 0x30000000
+#define NES_QPCONTEXT_ORDIRD_IWARP_MODE_SHIFT 28
+
+enum nes_ord_ird_bits {
+ NES_QPCONTEXT_ORDIRD_WRPDU = 0x02000000,
+ NES_QPCONTEXT_ORDIRD_LSMM_PRESENT = 0x04000000,
+ NES_QPCONTEXT_ORDIRD_ALSMM = 0x08000000,
+ NES_QPCONTEXT_ORDIRD_AAH = 0x40000000,
+ NES_QPCONTEXT_ORDIRD_RNMC = 0x80000000
+};
+
+enum nes_iwarp_qp_state {
+ NES_QPCONTEXT_IWARP_STATE_NONEXIST = 0,
+ NES_QPCONTEXT_IWARP_STATE_IDLE = 1,
+ NES_QPCONTEXT_IWARP_STATE_RTS = 2,
+ NES_QPCONTEXT_IWARP_STATE_CLOSING = 3,
+ NES_QPCONTEXT_IWARP_STATE_TERMINATE = 5,
+ NES_QPCONTEXT_IWARP_STATE_ERROR = 6
+};
+
+
+#endif /* NES_CONTEXT_H */
diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c
new file mode 100644
index 0000000..7c49cc8
--- /dev/null
+++ b/drivers/infiniband/hw/nes/nes_hw.c
@@ -0,0 +1,3387 @@
+/*
+ * Copyright (c) 2006 - 2008 NetEffect, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/if_vlan.h>
+#include <linux/inet_lro.h>
+
+#include "nes.h"
+
+static unsigned int nes_lro_max_aggr = NES_LRO_MAX_AGGR;
+module_param(nes_lro_max_aggr, uint, 0444);
+MODULE_PARM_DESC(nes_lro_max_aggr, "NIC LRO max packet aggregation");
+
+static u32 crit_err_count;
+u32 int_mod_timer_init;
+u32 int_mod_cq_depth_256;
+u32 int_mod_cq_depth_128;
+u32 int_mod_cq_depth_32;
+u32 int_mod_cq_depth_24;
+u32 int_mod_cq_depth_16;
+u32 int_mod_cq_depth_4;
+u32 int_mod_cq_depth_1;
+static const u8 nes_max_critical_error_count = 100;
+#include "nes_cm.h"
+
+static void nes_cqp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *cq);
+static void nes_init_csr_ne020(struct nes_device *nesdev, u8 hw_rev, u8 port_count);
+static int nes_init_serdes(struct nes_device *nesdev, u8 hw_rev, u8 port_count,
+ struct nes_adapter *nesadapter, u8 OneG_Mode);
+static void nes_nic_napi_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq);
+static void nes_process_aeq(struct nes_device *nesdev, struct nes_hw_aeq *aeq);
+static void nes_process_ceq(struct nes_device *nesdev, struct nes_hw_ceq *ceq);
+static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
+ struct nes_hw_aeqe *aeqe);
+static void process_critical_error(struct nes_device *nesdev);
+static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number);
+static unsigned int nes_reset_adapter_ne020(struct nes_device *nesdev, u8 *OneG_Mode);
+
+#ifdef CONFIG_INFINIBAND_NES_DEBUG
+static unsigned char *nes_iwarp_state_str[] = {
+ "Non-Existant",
+ "Idle",
+ "RTS",
+ "Closing",
+ "RSVD1",
+ "Terminate",
+ "Error",
+ "RSVD2",
+};
+
+static unsigned char *nes_tcp_state_str[] = {
+ "Non-Existant",
+ "Closed",
+ "Listen",
+ "SYN Sent",
+ "SYN Rcvd",
+ "Established",
+ "Close Wait",
+ "FIN Wait 1",
+ "Closing",
+ "Last Ack",
+ "FIN Wait 2",
+ "Time Wait",
+ "RSVD1",
+ "RSVD2",
+ "RSVD3",
+ "RSVD4",
+};
+#endif
+
+
+/**
+ * nes_nic_init_timer_defaults
+ */
+void nes_nic_init_timer_defaults(struct nes_device *nesdev, u8 jumbomode)
+{
+ unsigned long flags;
+ struct nes_adapter *nesadapter = nesdev->nesadapter;
+ struct nes_hw_tune_timer *shared_timer = &nesadapter->tune_timer;
+
+ spin_lock_irqsave(&nesadapter->periodic_timer_lock, flags);
+
+ shared_timer->timer_in_use_min = NES_NIC_FAST_TIMER_LOW;
+ shared_timer->timer_in_use_max = NES_NIC_FAST_TIMER_HIGH;
+ if (jumbomode) {
+ shared_timer->threshold_low = DEFAULT_JUMBO_NES_QL_LOW;
+ shared_timer->threshold_target = DEFAULT_JUMBO_NES_QL_TARGET;
+ shared_timer->threshold_high = DEFAULT_JUMBO_NES_QL_HIGH;
+ } else {
+ shared_timer->threshold_low = DEFAULT_NES_QL_LOW;
+ shared_timer->threshold_target = DEFAULT_NES_QL_TARGET;
+ shared_timer->threshold_high = DEFAULT_NES_QL_HIGH;
+ }
+
+ /* todo use netdev->mtu to set thresholds */
+ spin_unlock_irqrestore(&nesadapter->periodic_timer_lock, flags);
+}
+
+
+/**
+ * nes_nic_init_timer
+ */
+static void nes_nic_init_timer(struct nes_device *nesdev)
+{
+ unsigned long flags;
+ struct nes_adapter *nesadapter = nesdev->nesadapter;
+ struct nes_hw_tune_timer *shared_timer = &nesadapter->tune_timer;
+
+ spin_lock_irqsave(&nesadapter->periodic_timer_lock, flags);
+
+ if (shared_timer->timer_in_use_old == 0) {
+ nesdev->deepcq_count = 0;
+ shared_timer->timer_direction_upward = 0;
+ shared_timer->timer_direction_downward = 0;
+ shared_timer->timer_in_use = NES_NIC_FAST_TIMER;
+ shared_timer->timer_in_use_old = 0;
+
+ }
+ if (shared_timer->timer_in_use != shared_timer->timer_in_use_old) {
+ shared_timer->timer_in_use_old = shared_timer->timer_in_use;
+ nes_write32(nesdev->regs+NES_PERIODIC_CONTROL,
+ 0x80000000 | ((u32)(shared_timer->timer_in_use*8)));
+ }
+ /* todo use netdev->mtu to set thresholds */
+ spin_unlock_irqrestore(&nesadapter->periodic_timer_lock, flags);
+}
+
+
+/**
+ * nes_nic_tune_timer
+ */
+static void nes_nic_tune_timer(struct nes_device *nesdev)
+{
+ unsigned long flags;
+ struct nes_adapter *nesadapter = nesdev->nesadapter;
+ struct nes_hw_tune_timer *shared_timer = &nesadapter->tune_timer;
+ u16 cq_count = nesdev->currcq_count;
+
+ spin_lock_irqsave(&nesadapter->periodic_timer_lock, flags);
+
+ if (shared_timer->cq_count_old <= cq_count)
+ shared_timer->cq_direction_downward = 0;
+ else
+ shared_timer->cq_direction_downward++;
+ shared_timer->cq_count_old = cq_count;
+ if (shared_timer->cq_direction_downward > NES_NIC_CQ_DOWNWARD_TREND) {
+ if (cq_count <= shared_timer->threshold_low &&
+ shared_timer->threshold_low > 4) {
+ shared_timer->threshold_low = shared_timer->threshold_low/2;
+ shared_timer->cq_direction_downward=0;
+ nesdev->currcq_count = 0;
+ spin_unlock_irqrestore(&nesadapter->periodic_timer_lock, flags);
+ return;
+ }
+ }
+
+ if (cq_count > 1) {
+ nesdev->deepcq_count += cq_count;
+ if (cq_count <= shared_timer->threshold_low) { /* increase timer gently */
+ shared_timer->timer_direction_upward++;
+ shared_timer->timer_direction_downward = 0;
+ } else if (cq_count <= shared_timer->threshold_target) { /* balanced */
+ shared_timer->timer_direction_upward = 0;
+ shared_timer->timer_direction_downward = 0;
+ } else if (cq_count <= shared_timer->threshold_high) { /* decrease timer gently */
+ shared_timer->timer_direction_downward++;
+ shared_timer->timer_direction_upward = 0;
+ } else if (cq_count <= (shared_timer->threshold_high) * 2) {
+ shared_timer->timer_in_use -= 2;
+ shared_timer->timer_direction_upward = 0;
+ shared_timer->timer_direction_downward++;
+ } else {
+ shared_timer->timer_in_use -= 4;
+ shared_timer->timer_direction_upward = 0;
+ shared_timer->timer_direction_downward++;
+ }
+
+ if (shared_timer->timer_direction_upward > 3 ) { /* using history */
+ shared_timer->timer_in_use += 3;
+ shared_timer->timer_direction_upward = 0;
+ shared_timer->timer_direction_downward = 0;
+ }
+ if (shared_timer->timer_direction_downward > 5) { /* using history */
+ shared_timer->timer_in_use -= 4 ;
+ shared_timer->timer_direction_downward = 0;
+ shared_timer->timer_direction_upward = 0;
+ }
+ }
+
+ /* boundary checking */
+ if (shared_timer->timer_in_use > shared_timer->threshold_high)
+ shared_timer->timer_in_use = shared_timer->threshold_high;
+ else if (shared_timer->timer_in_use < shared_timer->threshold_low)
+ shared_timer->timer_in_use = shared_timer->threshold_low;
+
+ nesdev->currcq_count = 0;
+
+ spin_unlock_irqrestore(&nesadapter->periodic_timer_lock, flags);
+}
+
+
+/**
+ * nes_init_adapter - initialize adapter
+ */
+struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) {
+ struct nes_adapter *nesadapter = NULL;
+ unsigned long num_pds;
+ u32 u32temp;
+ u32 port_count;
+ u16 max_rq_wrs;
+ u16 max_sq_wrs;
+ u32 max_mr;
+ u32 max_256pbl;
+ u32 max_4kpbl;
+ u32 max_qp;
+ u32 max_irrq;
+ u32 max_cq;
+ u32 hte_index_mask;
+ u32 adapter_size;
+ u32 arp_table_size;
+ u16 vendor_id;
+ u8 OneG_Mode;
+ u8 func_index;
+
+ /* search the list of existing adapters */
+ list_for_each_entry(nesadapter, &nes_adapter_list, list) {
+ nes_debug(NES_DBG_INIT, "Searching Adapter list for PCI devfn = 0x%X,"
+ " adapter PCI slot/bus = %u/%u, pci devices PCI slot/bus = %u/%u, .\n",
+ nesdev->pcidev->devfn,
+ PCI_SLOT(nesadapter->devfn),
+ nesadapter->bus_number,
+ PCI_SLOT(nesdev->pcidev->devfn),
+ nesdev->pcidev->bus->number );
+ if ((PCI_SLOT(nesadapter->devfn) == PCI_SLOT(nesdev->pcidev->devfn)) &&
+ (nesadapter->bus_number == nesdev->pcidev->bus->number)) {
+ nesadapter->ref_count++;
+ return nesadapter;
+ }
+ }
+
+ /* no adapter found */
+ num_pds = pci_resource_len(nesdev->pcidev, BAR_1) >> PAGE_SHIFT;
+ if ((hw_rev != NE020_REV) && (hw_rev != NE020_REV1)) {
+ nes_debug(NES_DBG_INIT, "NE020 driver detected unknown hardware revision 0x%x\n",
+ hw_rev);
+ return NULL;
+ }
+
+ nes_debug(NES_DBG_INIT, "Determine Soft Reset, QP_control=0x%x, CPU0=0x%x, CPU1=0x%x, CPU2=0x%x\n",
+ nes_read_indexed(nesdev, NES_IDX_QP_CONTROL + PCI_FUNC(nesdev->pcidev->devfn) * 8),
+ nes_read_indexed(nesdev, NES_IDX_INT_CPU_STATUS),
+ nes_read_indexed(nesdev, NES_IDX_INT_CPU_STATUS + 4),
+ nes_read_indexed(nesdev, NES_IDX_INT_CPU_STATUS + 8));
+
+ nes_debug(NES_DBG_INIT, "Reset and init NE020\n");
+
+
+ if ((port_count = nes_reset_adapter_ne020(nesdev, &OneG_Mode)) == 0)
+ return NULL;
+
+ max_qp = nes_read_indexed(nesdev, NES_IDX_QP_CTX_SIZE);
+ nes_debug(NES_DBG_INIT, "QP_CTX_SIZE=%u\n", max_qp);
+
+ u32temp = nes_read_indexed(nesdev, NES_IDX_QUAD_HASH_TABLE_SIZE);
+ if (max_qp > ((u32)1 << (u32temp & 0x001f))) {
+ nes_debug(NES_DBG_INIT, "Reducing Max QPs to %u due to hash table size = 0x%08X\n",
+ max_qp, u32temp);
+ max_qp = (u32)1 << (u32temp & 0x001f);
+ }
+
+ hte_index_mask = ((u32)1 << ((u32temp & 0x001f)+1))-1;
+ nes_debug(NES_DBG_INIT, "Max QP = %u, hte_index_mask = 0x%08X.\n",
+ max_qp, hte_index_mask);
+
+ u32temp = nes_read_indexed(nesdev, NES_IDX_IRRQ_COUNT);
+
+ max_irrq = 1 << (u32temp & 0x001f);
+
+ if (max_qp > max_irrq) {
+ max_qp = max_irrq;
+ nes_debug(NES_DBG_INIT, "Reducing Max QPs to %u due to Available Q1s.\n",
+ max_qp);
+ }
+
+ /* there should be no reason to allocate more pds than qps */
+ if (num_pds > max_qp)
+ num_pds = max_qp;
+
+ u32temp = nes_read_indexed(nesdev, NES_IDX_MRT_SIZE);
+ max_mr = (u32)8192 << (u32temp & 0x7);
+
+ u32temp = nes_read_indexed(nesdev, NES_IDX_PBL_REGION_SIZE);
+ max_256pbl = (u32)1 << (u32temp & 0x0000001f);
+ max_4kpbl = (u32)1 << ((u32temp >> 16) & 0x0000001f);
+ max_cq = nes_read_indexed(nesdev, NES_IDX_CQ_CTX_SIZE);
+
+ u32temp = nes_read_indexed(nesdev, NES_IDX_ARP_CACHE_SIZE);
+ arp_table_size = 1 << u32temp;
+
+ adapter_size = (sizeof(struct nes_adapter) +
+ (sizeof(unsigned long)-1)) & (~(sizeof(unsigned long)-1));
+ adapter_size += sizeof(unsigned long) * BITS_TO_LONGS(max_qp);
+ adapter_size += sizeof(unsigned long) * BITS_TO_LONGS(max_mr);
+ adapter_size += sizeof(unsigned long) * BITS_TO_LONGS(max_cq);
+ adapter_size += sizeof(unsigned long) * BITS_TO_LONGS(num_pds);
+ adapter_size += sizeof(unsigned long) * BITS_TO_LONGS(arp_table_size);
+ adapter_size += sizeof(struct nes_qp **) * max_qp;
+
+ /* allocate a new adapter struct */
+ nesadapter = kzalloc(adapter_size, GFP_KERNEL);
+ if (nesadapter == NULL) {
+ return NULL;
+ }
+
+ nes_debug(NES_DBG_INIT, "Allocating new nesadapter @ %p, size = %u (actual size = %u).\n",
+ nesadapter, (u32)sizeof(struct nes_adapter), adapter_size);
+
+ if (nes_read_eeprom_values(nesdev, nesadapter)) {
+ printk(KERN_ERR PFX "Unable to read EEPROM data.\n");
+ kfree(nesadapter);
+ return NULL;
+ }
+
+ if (nes_init_serdes(nesdev, hw_rev, port_count, nesadapter,
+ OneG_Mode)) {
+ kfree(nesadapter);
+ return NULL;
+ }
+ nes_init_csr_ne020(nesdev, hw_rev, port_count);
+
+ memset(nesadapter->pft_mcast_map, 255,
+ sizeof nesadapter->pft_mcast_map);
+
+ /* populate the new nesadapter */
+ nesadapter->devfn = nesdev->pcidev->devfn;
+ nesadapter->bus_number = nesdev->pcidev->bus->number;
+ nesadapter->ref_count = 1;
+ nesadapter->timer_int_req = 0xffff0000;
+ nesadapter->OneG_Mode = OneG_Mode;
+ nesadapter->doorbell_start = nesdev->doorbell_region;
+
+ /* nesadapter->tick_delta = clk_divisor; */
+ nesadapter->hw_rev = hw_rev;
+ nesadapter->port_count = port_count;
+
+ nesadapter->max_qp = max_qp;
+ nesadapter->hte_index_mask = hte_index_mask;
+ nesadapter->max_irrq = max_irrq;
+ nesadapter->max_mr = max_mr;
+ nesadapter->max_256pbl = max_256pbl - 1;
+ nesadapter->max_4kpbl = max_4kpbl - 1;
+ nesadapter->max_cq = max_cq;
+ nesadapter->free_256pbl = max_256pbl - 1;
+ nesadapter->free_4kpbl = max_4kpbl - 1;
+ nesadapter->max_pd = num_pds;
+ nesadapter->arp_table_size = arp_table_size;
+
+ nesadapter->et_pkt_rate_low = NES_TIMER_ENABLE_LIMIT;
+ if (nes_drv_opt & NES_DRV_OPT_DISABLE_INT_MOD) {
+ nesadapter->et_use_adaptive_rx_coalesce = 0;
+ nesadapter->timer_int_limit = NES_TIMER_INT_LIMIT;
+ nesadapter->et_rx_coalesce_usecs_irq = interrupt_mod_interval;
+ } else {
+ nesadapter->et_use_adaptive_rx_coalesce = 1;
+ nesadapter->timer_int_limit = NES_TIMER_INT_LIMIT_DYNAMIC;
+ nesadapter->et_rx_coalesce_usecs_irq = 0;
+ printk(PFX "%s: Using Adaptive Interrupt Moderation\n", __func__);
+ }
+ /* Setup and enable the periodic timer */
+ if (nesadapter->et_rx_coalesce_usecs_irq)
+ nes_write32(nesdev->regs+NES_PERIODIC_CONTROL, 0x80000000 |
+ ((u32)(nesadapter->et_rx_coalesce_usecs_irq * 8)));
+ else
+ nes_write32(nesdev->regs+NES_PERIODIC_CONTROL, 0x00000000);
+
+ nesadapter->base_pd = 1;
+
+ nesadapter->device_cap_flags =
+ IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_WINDOW;
+
+ nesadapter->allocated_qps = (unsigned long *)&(((unsigned char *)nesadapter)
+ [(sizeof(struct nes_adapter)+(sizeof(unsigned long)-1))&(~(sizeof(unsigned long)-1))]);
+ nesadapter->allocated_cqs = &nesadapter->allocated_qps[BITS_TO_LONGS(max_qp)];
+ nesadapter->allocated_mrs = &nesadapter->allocated_cqs[BITS_TO_LONGS(max_cq)];
+ nesadapter->allocated_pds = &nesadapter->allocated_mrs[BITS_TO_LONGS(max_mr)];
+ nesadapter->allocated_arps = &nesadapter->allocated_pds[BITS_TO_LONGS(num_pds)];
+ nesadapter->qp_table = (struct nes_qp **)(&nesadapter->allocated_arps[BITS_TO_LONGS(arp_table_size)]);
+
+
+ /* mark the usual suspect QPs and CQs as in use */
+ for (u32temp = 0; u32temp < NES_FIRST_QPN; u32temp++) {
+ set_bit(u32temp, nesadapter->allocated_qps);
+ set_bit(u32temp, nesadapter->allocated_cqs);
+ }
+
+ for (u32temp = 0; u32temp < 20; u32temp++)
+ set_bit(u32temp, nesadapter->allocated_pds);
+ u32temp = nes_read_indexed(nesdev, NES_IDX_QP_MAX_CFG_SIZES);
+
+ max_rq_wrs = ((u32temp >> 8) & 3);
+ switch (max_rq_wrs) {
+ case 0:
+ max_rq_wrs = 4;
+ break;
+ case 1:
+ max_rq_wrs = 16;
+ break;
+ case 2:
+ max_rq_wrs = 32;
+ break;
+ case 3:
+ max_rq_wrs = 512;
+ break;
+ }
+
+ max_sq_wrs = (u32temp & 3);
+ switch (max_sq_wrs) {
+ case 0:
+ max_sq_wrs = 4;
+ break;
+ case 1:
+ max_sq_wrs = 16;
+ break;
+ case 2:
+ max_sq_wrs = 32;
+ break;
+ case 3:
+ max_sq_wrs = 512;
+ break;
+ }
+ nesadapter->max_qp_wr = min(max_rq_wrs, max_sq_wrs);
+ nesadapter->max_irrq_wr = (u32temp >> 16) & 3;
+
+ nesadapter->max_sge = 4;
+ nesadapter->max_cqe = 32767;
+
+ if (nes_read_eeprom_values(nesdev, nesadapter)) {
+ printk(KERN_ERR PFX "Unable to read EEPROM data.\n");
+ kfree(nesadapter);
+ return NULL;
+ }
+
+ u32temp = nes_read_indexed(nesdev, NES_IDX_TCP_TIMER_CONFIG);
+ nes_write_indexed(nesdev, NES_IDX_TCP_TIMER_CONFIG,
+ (u32temp & 0xff000000) | (nesadapter->tcp_timer_core_clk_divisor & 0x00ffffff));
+
+ /* setup port configuration */
+ if (nesadapter->port_count == 1) {
+ nesadapter->log_port = 0x00000000;
+ if (nes_drv_opt & NES_DRV_OPT_DUAL_LOGICAL_PORT)
+ nes_write_indexed(nesdev, NES_IDX_TX_POOL_SIZE, 0x00000002);
+ else
+ nes_write_indexed(nesdev, NES_IDX_TX_POOL_SIZE, 0x00000003);
+ } else {
+ if (nesadapter->phy_type[0] == NES_PHY_TYPE_PUMA_1G) {
+ nesadapter->log_port = 0x000000D8;
+ } else {
+ if (nesadapter->port_count == 2)
+ nesadapter->log_port = 0x00000044;
+ else
+ nesadapter->log_port = 0x000000e4;
+ }
+ nes_write_indexed(nesdev, NES_IDX_TX_POOL_SIZE, 0x00000003);
+ }
+
+ nes_write_indexed(nesdev, NES_IDX_NIC_LOGPORT_TO_PHYPORT,
+ nesadapter->log_port);
+ nes_debug(NES_DBG_INIT, "Probe time, LOG2PHY=%u\n",
+ nes_read_indexed(nesdev, NES_IDX_NIC_LOGPORT_TO_PHYPORT));
+
+ spin_lock_init(&nesadapter->resource_lock);
+ spin_lock_init(&nesadapter->phy_lock);
+ spin_lock_init(&nesadapter->pbl_lock);
+ spin_lock_init(&nesadapter->periodic_timer_lock);
+
+ INIT_LIST_HEAD(&nesadapter->nesvnic_list[0]);
+ INIT_LIST_HEAD(&nesadapter->nesvnic_list[1]);
+ INIT_LIST_HEAD(&nesadapter->nesvnic_list[2]);
+ INIT_LIST_HEAD(&nesadapter->nesvnic_list[3]);
+
+ if ((!nesadapter->OneG_Mode) && (nesadapter->port_count == 2)) {
+ u32 pcs_control_status0, pcs_control_status1;
+ u32 reset_value;
+ u32 i = 0;
+ u32 int_cnt = 0;
+ u32 ext_cnt = 0;
+ unsigned long flags;
+ u32 j = 0;
+
+ pcs_control_status0 = nes_read_indexed(nesdev,
+ NES_IDX_PHY_PCS_CONTROL_STATUS0);
+ pcs_control_status1 = nes_read_indexed(nesdev,
+ NES_IDX_PHY_PCS_CONTROL_STATUS0 + 0x200);
+
+ for (i = 0; i < NES_MAX_LINK_CHECK; i++) {
+ pcs_control_status0 = nes_read_indexed(nesdev,
+ NES_IDX_PHY_PCS_CONTROL_STATUS0);
+ pcs_control_status1 = nes_read_indexed(nesdev,
+ NES_IDX_PHY_PCS_CONTROL_STATUS0 + 0x200);
+ if ((0x0F000100 == (pcs_control_status0 & 0x0F000100))
+ || (0x0F000100 == (pcs_control_status1 & 0x0F000100)))
+ int_cnt++;
+ msleep(1);
+ }
+ if (int_cnt > 1) {
+ spin_lock_irqsave(&nesadapter->phy_lock, flags);
+ nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1, 0x0000F088);
+ mh_detected++;
+ reset_value = nes_read32(nesdev->regs+NES_SOFTWARE_RESET);
+ reset_value |= 0x0000003d;
+ nes_write32(nesdev->regs+NES_SOFTWARE_RESET, reset_value);
+
+ while (((nes_read32(nesdev->regs+NES_SOFTWARE_RESET)
+ & 0x00000040) != 0x00000040) && (j++ < 5000));
+ spin_unlock_irqrestore(&nesadapter->phy_lock, flags);
+
+ pcs_control_status0 = nes_read_indexed(nesdev,
+ NES_IDX_PHY_PCS_CONTROL_STATUS0);
+ pcs_control_status1 = nes_read_indexed(nesdev,
+ NES_IDX_PHY_PCS_CONTROL_STATUS0 + 0x200);
+
+ for (i = 0; i < NES_MAX_LINK_CHECK; i++) {
+ pcs_control_status0 = nes_read_indexed(nesdev,
+ NES_IDX_PHY_PCS_CONTROL_STATUS0);
+ pcs_control_status1 = nes_read_indexed(nesdev,
+ NES_IDX_PHY_PCS_CONTROL_STATUS0 + 0x200);
+ if ((0x0F000100 == (pcs_control_status0 & 0x0F000100))
+ || (0x0F000100 == (pcs_control_status1 & 0x0F000100))) {
+ if (++ext_cnt > int_cnt) {
+ spin_lock_irqsave(&nesadapter->phy_lock, flags);
+ nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1,
+ 0x0000F0C8);
+ mh_detected++;
+ reset_value = nes_read32(nesdev->regs+NES_SOFTWARE_RESET);
+ reset_value |= 0x0000003d;
+ nes_write32(nesdev->regs+NES_SOFTWARE_RESET, reset_value);
+
+ while (((nes_read32(nesdev->regs+NES_SOFTWARE_RESET)
+ & 0x00000040) != 0x00000040) && (j++ < 5000));
+ spin_unlock_irqrestore(&nesadapter->phy_lock, flags);
+ break;
+ }
+ }
+ msleep(1);
+ }
+ }
+ }
+
+ if (nesadapter->hw_rev == NE020_REV) {
+ init_timer(&nesadapter->mh_timer);
+ nesadapter->mh_timer.function = nes_mh_fix;
+ nesadapter->mh_timer.expires = jiffies + (HZ/5); /* 1 second */
+ nesadapter->mh_timer.data = (unsigned long)nesdev;
+ add_timer(&nesadapter->mh_timer);
+ } else {
+ nes_write32(nesdev->regs+NES_INTF_INT_STAT, 0x0f000000);
+ }
+
+ init_timer(&nesadapter->lc_timer);
+ nesadapter->lc_timer.function = nes_clc;
+ nesadapter->lc_timer.expires = jiffies + 3600 * HZ; /* 1 hour */
+ nesadapter->lc_timer.data = (unsigned long)nesdev;
+ add_timer(&nesadapter->lc_timer);
+
+ list_add_tail(&nesadapter->list, &nes_adapter_list);
+
+ for (func_index = 0; func_index < 8; func_index++) {
+ pci_bus_read_config_word(nesdev->pcidev->bus,
+ PCI_DEVFN(PCI_SLOT(nesdev->pcidev->devfn),
+ func_index), 0, &vendor_id);
+ if (vendor_id == 0xffff)
+ break;
+ }
+ nes_debug(NES_DBG_INIT, "%s %d functions found for %s.\n", __func__,
+ func_index, pci_name(nesdev->pcidev));
+ nesadapter->adapter_fcn_count = func_index;
+
+ return nesadapter;
+}
+
+
+/**
+ * nes_reset_adapter_ne020
+ */
+static unsigned int nes_reset_adapter_ne020(struct nes_device *nesdev, u8 *OneG_Mode)
+{
+ u32 port_count;
+ u32 u32temp;
+ u32 i;
+
+ u32temp = nes_read32(nesdev->regs+NES_SOFTWARE_RESET);
+ port_count = ((u32temp & 0x00000300) >> 8) + 1;
+ /* TODO: assuming that both SERDES are set the same for now */
+ *OneG_Mode = (u32temp & 0x00003c00) ? 0 : 1;
+ nes_debug(NES_DBG_INIT, "Initial Software Reset = 0x%08X, port_count=%u\n",
+ u32temp, port_count);
+ if (*OneG_Mode)
+ nes_debug(NES_DBG_INIT, "Running in 1G mode.\n");
+ u32temp &= 0xff00ffc0;
+ switch (port_count) {
+ case 1:
+ u32temp |= 0x00ee0000;
+ break;
+ case 2:
+ u32temp |= 0x00cc0000;
+ break;
+ case 4:
+ u32temp |= 0x00000000;
+ break;
+ default:
+ return 0;
+ break;
+ }
+
+ /* check and do full reset if needed */
+ if (nes_read_indexed(nesdev, NES_IDX_QP_CONTROL+(PCI_FUNC(nesdev->pcidev->devfn)*8))) {
+ nes_debug(NES_DBG_INIT, "Issuing Full Soft reset = 0x%08X\n", u32temp | 0xd);
+ nes_write32(nesdev->regs+NES_SOFTWARE_RESET, u32temp | 0xd);
+
+ i = 0;
+ while (((nes_read32(nesdev->regs+NES_SOFTWARE_RESET) & 0x00000040) == 0) && i++ < 10000)
+ mdelay(1);
+ if (i >= 10000) {
+ nes_debug(NES_DBG_INIT, "Did not see full soft reset done.\n");
+ return 0;
+ }
+
+ i = 0;
+ while ((nes_read_indexed(nesdev, NES_IDX_INT_CPU_STATUS) != 0x80) && i++ < 10000)
+ mdelay(1);
+ if (i >= 10000) {
+ printk(KERN_ERR PFX "Internal CPU not ready, status = %02X\n",
+ nes_read_indexed(nesdev, NES_IDX_INT_CPU_STATUS));
+ return 0;
+ }
+ }
+
+ /* port reset */
+ switch (port_count) {
+ case 1:
+ u32temp |= 0x00ee0010;
+ break;
+ case 2:
+ u32temp |= 0x00cc0030;
+ break;
+ case 4:
+ u32temp |= 0x00000030;
+ break;
+ }
+
+ nes_debug(NES_DBG_INIT, "Issuing Port Soft reset = 0x%08X\n", u32temp | 0xd);
+ nes_write32(nesdev->regs+NES_SOFTWARE_RESET, u32temp | 0xd);
+
+ i = 0;
+ while (((nes_read32(nesdev->regs+NES_SOFTWARE_RESET) & 0x00000040) == 0) && i++ < 10000)
+ mdelay(1);
+ if (i >= 10000) {
+ nes_debug(NES_DBG_INIT, "Did not see port soft reset done.\n");
+ return 0;
+ }
+
+ /* serdes 0 */
+ i = 0;
+ while (((u32temp = (nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS0)
+ & 0x0000000f)) != 0x0000000f) && i++ < 5000)
+ mdelay(1);
+ if (i >= 5000) {
+ nes_debug(NES_DBG_INIT, "Serdes 0 not ready, status=%x\n", u32temp);
+ return 0;
+ }
+
+ /* serdes 1 */
+ if (port_count > 1) {
+ i = 0;
+ while (((u32temp = (nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS1)
+ & 0x0000000f)) != 0x0000000f) && i++ < 5000)
+ mdelay(1);
+ if (i >= 5000) {
+ nes_debug(NES_DBG_INIT, "Serdes 1 not ready, status=%x\n", u32temp);
+ return 0;
+ }
+ }
+
+ return port_count;
+}
+
+
+/**
+ * nes_init_serdes
+ */
+static int nes_init_serdes(struct nes_device *nesdev, u8 hw_rev, u8 port_count,
+ struct nes_adapter *nesadapter, u8 OneG_Mode)
+{
+ int i;
+ u32 u32temp;
+ u32 serdes_common_control;
+
+ if (hw_rev != NE020_REV) {
+ /* init serdes 0 */
+
+ nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000000FF);
+ if (nesadapter->phy_type[0] == NES_PHY_TYPE_PUMA_1G) {
+ serdes_common_control = nes_read_indexed(nesdev,
+ NES_IDX_ETH_SERDES_COMMON_CONTROL0);
+ serdes_common_control |= 0x000000100;
+ nes_write_indexed(nesdev,
+ NES_IDX_ETH_SERDES_COMMON_CONTROL0,
+ serdes_common_control);
+ } else if (!OneG_Mode) {
+ nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_HIGHZ_LANE_MODE0, 0x11110000);
+ }
+ if (((port_count > 1) &&
+ (nesadapter->phy_type[0] != NES_PHY_TYPE_PUMA_1G)) ||
+ ((port_count > 2) &&
+ (nesadapter->phy_type[0] == NES_PHY_TYPE_PUMA_1G))) {
+ /* init serdes 1 */
+ nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL1, 0x000000FF);
+ if (nesadapter->phy_type[0] == NES_PHY_TYPE_PUMA_1G) {
+ serdes_common_control = nes_read_indexed(nesdev,
+ NES_IDX_ETH_SERDES_COMMON_CONTROL1);
+ serdes_common_control |= 0x000000100;
+ nes_write_indexed(nesdev,
+ NES_IDX_ETH_SERDES_COMMON_CONTROL1,
+ serdes_common_control);
+ } else if (!OneG_Mode) {
+ nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_HIGHZ_LANE_MODE1, 0x11110000);
+ }
+ }
+ } else {
+ /* init serdes 0 */
+ nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0, 0x00000008);
+ i = 0;
+ while (((u32temp = (nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS0)
+ & 0x0000000f)) != 0x0000000f) && i++ < 5000)
+ mdelay(1);
+ if (i >= 5000) {
+ nes_debug(NES_DBG_PHY, "Init: serdes 0 not ready, status=%x\n", u32temp);
+ return 1;
+ }
+ nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_EMP0, 0x000bdef7);
+ nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_DRIVE0, 0x9ce73000);
+ nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_MODE0, 0x0ff00000);
+ nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_SIGDET0, 0x00000000);
+ nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_BYPASS0, 0x00000000);
+ nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_LOOPBACK_CONTROL0, 0x00000000);
+ if (OneG_Mode)
+ nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_EQ_CONTROL0, 0xf0182222);
+ else
+ nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_EQ_CONTROL0, 0xf0042222);
+
+ nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000000ff);
+ if (port_count > 1) {
+ /* init serdes 1 */
+ nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1, 0x00000048);
+ i = 0;
+ while (((u32temp = (nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS1)
+ & 0x0000000f)) != 0x0000000f) && (i++ < 5000))
+ mdelay(1);
+ if (i >= 5000) {
+ printk("%s: Init: serdes 1 not ready, status=%x\n", __func__, u32temp);
+ /* return 1; */
+ }
+ nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_EMP1, 0x000bdef7);
+ nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_DRIVE1, 0x9ce73000);
+ nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_MODE1, 0x0ff00000);
+ nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_SIGDET1, 0x00000000);
+ nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_BYPASS1, 0x00000000);
+ nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_LOOPBACK_CONTROL1, 0x00000000);
+ nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_EQ_CONTROL1, 0xf0002222);
+ nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL1, 0x000000ff);
+ }
+ }
+ return 0;
+}
+
+
+/**
+ * nes_init_csr_ne020
+ * Initialize registers for ne020 hardware
+ */
+static void nes_init_csr_ne020(struct nes_device *nesdev, u8 hw_rev, u8 port_count)
+{
+ u32 u32temp;
+
+ nes_debug(NES_DBG_INIT, "port_count=%d\n", port_count);
+
+ nes_write_indexed(nesdev, 0x000001E4, 0x00000007);
+ /* nes_write_indexed(nesdev, 0x000001E8, 0x000208C4); */
+ nes_write_indexed(nesdev, 0x000001E8, 0x00020874);
+ nes_write_indexed(nesdev, 0x000001D8, 0x00048002);
+ /* nes_write_indexed(nesdev, 0x000001D8, 0x0004B002); */
+ nes_write_indexed(nesdev, 0x000001FC, 0x00050005);
+ nes_write_indexed(nesdev, 0x00000600, 0x55555555);
+ nes_write_indexed(nesdev, 0x00000604, 0x55555555);
+
+ /* TODO: move these MAC register settings to NIC bringup */
+ nes_write_indexed(nesdev, 0x00002000, 0x00000001);
+ nes_write_indexed(nesdev, 0x00002004, 0x00000001);
+ nes_write_indexed(nesdev, 0x00002008, 0x0000FFFF);
+ nes_write_indexed(nesdev, 0x0000200C, 0x00000001);
+ nes_write_indexed(nesdev, 0x00002010, 0x000003c1);
+ nes_write_indexed(nesdev, 0x0000201C, 0x75345678);
+ if (port_count > 1) {
+ nes_write_indexed(nesdev, 0x00002200, 0x00000001);
+ nes_write_indexed(nesdev, 0x00002204, 0x00000001);
+ nes_write_indexed(nesdev, 0x00002208, 0x0000FFFF);
+ nes_write_indexed(nesdev, 0x0000220C, 0x00000001);
+ nes_write_indexed(nesdev, 0x00002210, 0x000003c1);
+ nes_write_indexed(nesdev, 0x0000221C, 0x75345678);
+ nes_write_indexed(nesdev, 0x00000908, 0x20000001);
+ }
+ if (port_count > 2) {
+ nes_write_indexed(nesdev, 0x00002400, 0x00000001);
+ nes_write_indexed(nesdev, 0x00002404, 0x00000001);
+ nes_write_indexed(nesdev, 0x00002408, 0x0000FFFF);
+ nes_write_indexed(nesdev, 0x0000240C, 0x00000001);
+ nes_write_indexed(nesdev, 0x00002410, 0x000003c1);
+ nes_write_indexed(nesdev, 0x0000241C, 0x75345678);
+ nes_write_indexed(nesdev, 0x00000910, 0x20000001);
+
+ nes_write_indexed(nesdev, 0x00002600, 0x00000001);
+ nes_write_indexed(nesdev, 0x00002604, 0x00000001);
+ nes_write_indexed(nesdev, 0x00002608, 0x0000FFFF);
+ nes_write_indexed(nesdev, 0x0000260C, 0x00000001);
+ nes_write_indexed(nesdev, 0x00002610, 0x000003c1);
+ nes_write_indexed(nesdev, 0x0000261C, 0x75345678);
+ nes_write_indexed(nesdev, 0x00000918, 0x20000001);
+ }
+
+ nes_write_indexed(nesdev, 0x00005000, 0x00018000);
+ /* nes_write_indexed(nesdev, 0x00005000, 0x00010000); */
+ nes_write_indexed(nesdev, NES_IDX_WQM_CONFIG1, (wqm_quanta << 1) |
+ 0x00000001);
+ nes_write_indexed(nesdev, 0x00005008, 0x1F1F1F1F);
+ nes_write_indexed(nesdev, 0x00005010, 0x1F1F1F1F);
+ nes_write_indexed(nesdev, 0x00005018, 0x1F1F1F1F);
+ nes_write_indexed(nesdev, 0x00005020, 0x1F1F1F1F);
+ nes_write_indexed(nesdev, 0x00006090, 0xFFFFFFFF);
+
+ /* TODO: move this to code, get from EEPROM */
+ nes_write_indexed(nesdev, 0x00000900, 0x20000001);
+ nes_write_indexed(nesdev, 0x000060C0, 0x0000028e);
+ nes_write_indexed(nesdev, 0x000060C8, 0x00000020);
+
+ nes_write_indexed(nesdev, 0x000001EC, 0x7b2625a0);
+ /* nes_write_indexed(nesdev, 0x000001EC, 0x5f2625a0); */
+
+ if (hw_rev != NE020_REV) {
+ u32temp = nes_read_indexed(nesdev, 0x000008e8);
+ u32temp |= 0x80000000;
+ nes_write_indexed(nesdev, 0x000008e8, u32temp);
+ u32temp = nes_read_indexed(nesdev, 0x000021f8);
+ u32temp &= 0x7fffffff;
+ u32temp |= 0x7fff0010;
+ nes_write_indexed(nesdev, 0x000021f8, u32temp);
+ }
+}
+
+
+/**
+ * nes_destroy_adapter - destroy the adapter structure
+ */
+void nes_destroy_adapter(struct nes_adapter *nesadapter)
+{
+ struct nes_adapter *tmp_adapter;
+
+ list_for_each_entry(tmp_adapter, &nes_adapter_list, list) {
+ nes_debug(NES_DBG_SHUTDOWN, "Nes Adapter list entry = 0x%p.\n",
+ tmp_adapter);
+ }
+
+ nesadapter->ref_count--;
+ if (!nesadapter->ref_count) {
+ if (nesadapter->hw_rev == NE020_REV) {
+ del_timer(&nesadapter->mh_timer);
+ }
+ del_timer(&nesadapter->lc_timer);
+
+ list_del(&nesadapter->list);
+ kfree(nesadapter);
+ }
+}
+
+
+/**
+ * nes_init_cqp
+ */
+int nes_init_cqp(struct nes_device *nesdev)
+{
+ struct nes_adapter *nesadapter = nesdev->nesadapter;
+ struct nes_hw_cqp_qp_context *cqp_qp_context;
+ struct nes_hw_cqp_wqe *cqp_wqe;
+ struct nes_hw_ceq *ceq;
+ struct nes_hw_ceq *nic_ceq;
+ struct nes_hw_aeq *aeq;
+ void *vmem;
+ dma_addr_t pmem;
+ u32 count=0;
+ u32 cqp_head;
+ u64 u64temp;
+ u32 u32temp;
+
+ /* allocate CQP memory */
+ /* Need to add max_cq to the aeq size once cq overflow checking is added back */
+ /* SQ is 512 byte aligned, others are 256 byte aligned */
+ nesdev->cqp_mem_size = 512 +
+ (sizeof(struct nes_hw_cqp_wqe) * NES_CQP_SQ_SIZE) +
+ (sizeof(struct nes_hw_cqe) * NES_CCQ_SIZE) +
+ max(((u32)sizeof(struct nes_hw_ceqe) * NES_CCEQ_SIZE), (u32)256) +
+ max(((u32)sizeof(struct nes_hw_ceqe) * NES_NIC_CEQ_SIZE), (u32)256) +
+ (sizeof(struct nes_hw_aeqe) * nesadapter->max_qp) +
+ sizeof(struct nes_hw_cqp_qp_context);
+
+ nesdev->cqp_vbase = pci_alloc_consistent(nesdev->pcidev, nesdev->cqp_mem_size,
+ &nesdev->cqp_pbase);
+ if (!nesdev->cqp_vbase) {
+ nes_debug(NES_DBG_INIT, "Unable to allocate memory for host descriptor rings\n");
+ return -ENOMEM;
+ }
+ memset(nesdev->cqp_vbase, 0, nesdev->cqp_mem_size);
+
+ /* Allocate a twice the number of CQP requests as the SQ size */
+ nesdev->nes_cqp_requests = kzalloc(sizeof(struct nes_cqp_request) *
+ 2 * NES_CQP_SQ_SIZE, GFP_KERNEL);
+ if (nesdev->nes_cqp_requests == NULL) {
+ nes_debug(NES_DBG_INIT, "Unable to allocate memory CQP request entries.\n");
+ pci_free_consistent(nesdev->pcidev, nesdev->cqp_mem_size, nesdev->cqp.sq_vbase,
+ nesdev->cqp.sq_pbase);
+ return -ENOMEM;
+ }
+
+ nes_debug(NES_DBG_INIT, "Allocated CQP structures at %p (phys = %016lX), size = %u.\n",
+ nesdev->cqp_vbase, (unsigned long)nesdev->cqp_pbase, nesdev->cqp_mem_size);
+
+ spin_lock_init(&nesdev->cqp.lock);
+ init_waitqueue_head(&nesdev->cqp.waitq);
+
+ /* Setup Various Structures */
+ vmem = (void *)(((unsigned long)nesdev->cqp_vbase + (512 - 1)) &
+ ~(unsigned long)(512 - 1));
+ pmem = (dma_addr_t)(((unsigned long long)nesdev->cqp_pbase + (512 - 1)) &
+ ~(unsigned long long)(512 - 1));
+
+ nesdev->cqp.sq_vbase = vmem;
+ nesdev->cqp.sq_pbase = pmem;
+ nesdev->cqp.sq_size = NES_CQP_SQ_SIZE;
+ nesdev->cqp.sq_head = 0;
+ nesdev->cqp.sq_tail = 0;
+ nesdev->cqp.qp_id = PCI_FUNC(nesdev->pcidev->devfn);
+
+ vmem += (sizeof(struct nes_hw_cqp_wqe) * nesdev->cqp.sq_size);
+ pmem += (sizeof(struct nes_hw_cqp_wqe) * nesdev->cqp.sq_size);
+
+ nesdev->ccq.cq_vbase = vmem;
+ nesdev->ccq.cq_pbase = pmem;
+ nesdev->ccq.cq_size = NES_CCQ_SIZE;
+ nesdev->ccq.cq_head = 0;
+ nesdev->ccq.ce_handler = nes_cqp_ce_handler;
+ nesdev->ccq.cq_number = PCI_FUNC(nesdev->pcidev->devfn);
+
+ vmem += (sizeof(struct nes_hw_cqe) * nesdev->ccq.cq_size);
+ pmem += (sizeof(struct nes_hw_cqe) * nesdev->ccq.cq_size);
+
+ nesdev->ceq_index = PCI_FUNC(nesdev->pcidev->devfn);
+ ceq = &nesadapter->ceq[nesdev->ceq_index];
+ ceq->ceq_vbase = vmem;
+ ceq->ceq_pbase = pmem;
+ ceq->ceq_size = NES_CCEQ_SIZE;
+ ceq->ceq_head = 0;
+
+ vmem += max(((u32)sizeof(struct nes_hw_ceqe) * ceq->ceq_size), (u32)256);
+ pmem += max(((u32)sizeof(struct nes_hw_ceqe) * ceq->ceq_size), (u32)256);
+
+ nesdev->nic_ceq_index = PCI_FUNC(nesdev->pcidev->devfn) + 8;
+ nic_ceq = &nesadapter->ceq[nesdev->nic_ceq_index];
+ nic_ceq->ceq_vbase = vmem;
+ nic_ceq->ceq_pbase = pmem;
+ nic_ceq->ceq_size = NES_NIC_CEQ_SIZE;
+ nic_ceq->ceq_head = 0;
+
+ vmem += max(((u32)sizeof(struct nes_hw_ceqe) * nic_ceq->ceq_size), (u32)256);
+ pmem += max(((u32)sizeof(struct nes_hw_ceqe) * nic_ceq->ceq_size), (u32)256);
+
+ aeq = &nesadapter->aeq[PCI_FUNC(nesdev->pcidev->devfn)];
+ aeq->aeq_vbase = vmem;
+ aeq->aeq_pbase = pmem;
+ aeq->aeq_size = nesadapter->max_qp;
+ aeq->aeq_head = 0;
+
+ /* Setup QP Context */
+ vmem += (sizeof(struct nes_hw_aeqe) * aeq->aeq_size);
+ pmem += (sizeof(struct nes_hw_aeqe) * aeq->aeq_size);
+
+ cqp_qp_context = vmem;
+ cqp_qp_context->context_words[0] =
+ cpu_to_le32((PCI_FUNC(nesdev->pcidev->devfn) << 12) + (2 << 10));
+ cqp_qp_context->context_words[1] = 0;
+ cqp_qp_context->context_words[2] = cpu_to_le32((u32)nesdev->cqp.sq_pbase);
+ cqp_qp_context->context_words[3] = cpu_to_le32(((u64)nesdev->cqp.sq_pbase) >> 32);
+
+
+ /* Write the address to Create CQP */
+ if ((sizeof(dma_addr_t) > 4)) {
+ nes_write_indexed(nesdev,
+ NES_IDX_CREATE_CQP_HIGH + (PCI_FUNC(nesdev->pcidev->devfn) * 8),
+ ((u64)pmem) >> 32);
+ } else {
+ nes_write_indexed(nesdev,
+ NES_IDX_CREATE_CQP_HIGH + (PCI_FUNC(nesdev->pcidev->devfn) * 8), 0);
+ }
+ nes_write_indexed(nesdev,
+ NES_IDX_CREATE_CQP_LOW + (PCI_FUNC(nesdev->pcidev->devfn) * 8),
+ (u32)pmem);
+
+ INIT_LIST_HEAD(&nesdev->cqp_avail_reqs);
+ INIT_LIST_HEAD(&nesdev->cqp_pending_reqs);
+
+ for (count = 0; count < 2*NES_CQP_SQ_SIZE; count++) {
+ init_waitqueue_head(&nesdev->nes_cqp_requests[count].waitq);
+ list_add_tail(&nesdev->nes_cqp_requests[count].list, &nesdev->cqp_avail_reqs);
+ }
+
+ /* Write Create CCQ WQE */
+ cqp_head = nesdev->cqp.sq_head++;
+ cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
+ nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
+ set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX,
+ (NES_CQP_CREATE_CQ | NES_CQP_CQ_CEQ_VALID |
+ NES_CQP_CQ_CHK_OVERFLOW | ((u32)nesdev->ccq.cq_size << 16)));
+ set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX,
+ (nesdev->ccq.cq_number |
+ ((u32)nesdev->ceq_index << 16)));
+ u64temp = (u64)nesdev->ccq.cq_pbase;
+ set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_CQ_WQE_PBL_LOW_IDX, u64temp);
+ cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_HIGH_IDX] = 0;
+ u64temp = (unsigned long)&nesdev->ccq;
+ cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_LOW_IDX] =
+ cpu_to_le32((u32)(u64temp >> 1));
+ cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_HIGH_IDX] =
+ cpu_to_le32(((u32)((u64temp) >> 33)) & 0x7FFFFFFF);
+ cqp_wqe->wqe_words[NES_CQP_CQ_WQE_DOORBELL_INDEX_HIGH_IDX] = 0;
+
+ /* Write Create CEQ WQE */
+ cqp_head = nesdev->cqp.sq_head++;
+ cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
+ nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
+ set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX,
+ (NES_CQP_CREATE_CEQ + ((u32)nesdev->ceq_index << 8)));
+ set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_CEQ_WQE_ELEMENT_COUNT_IDX, ceq->ceq_size);
+ u64temp = (u64)ceq->ceq_pbase;
+ set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_CQ_WQE_PBL_LOW_IDX, u64temp);
+
+ /* Write Create AEQ WQE */
+ cqp_head = nesdev->cqp.sq_head++;
+ cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
+ nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
+ set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX,
+ (NES_CQP_CREATE_AEQ + ((u32)PCI_FUNC(nesdev->pcidev->devfn) << 8)));
+ set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_AEQ_WQE_ELEMENT_COUNT_IDX, aeq->aeq_size);
+ u64temp = (u64)aeq->aeq_pbase;
+ set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_CQ_WQE_PBL_LOW_IDX, u64temp);
+
+ /* Write Create NIC CEQ WQE */
+ cqp_head = nesdev->cqp.sq_head++;
+ cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
+ nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
+ set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX,
+ (NES_CQP_CREATE_CEQ + ((u32)nesdev->nic_ceq_index << 8)));
+ set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_CEQ_WQE_ELEMENT_COUNT_IDX, nic_ceq->ceq_size);
+ u64temp = (u64)nic_ceq->ceq_pbase;
+ set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_CQ_WQE_PBL_LOW_IDX, u64temp);
+
+ /* Poll until CCQP done */
+ count = 0;
+ do {
+ if (count++ > 1000) {
+ printk(KERN_ERR PFX "Error creating CQP\n");
+ pci_free_consistent(nesdev->pcidev, nesdev->cqp_mem_size,
+ nesdev->cqp_vbase, nesdev->cqp_pbase);
+ return -1;
+ }
+ udelay(10);
+ } while (!(nes_read_indexed(nesdev,
+ NES_IDX_QP_CONTROL + (PCI_FUNC(nesdev->pcidev->devfn) * 8)) & (1 << 8)));
+
+ nes_debug(NES_DBG_INIT, "CQP Status = 0x%08X\n", nes_read_indexed(nesdev,
+ NES_IDX_QP_CONTROL+(PCI_FUNC(nesdev->pcidev->devfn)*8)));
+
+ u32temp = 0x04800000;
+ nes_write32(nesdev->regs+NES_WQE_ALLOC, u32temp | nesdev->cqp.qp_id);
+
+ /* wait for the CCQ, CEQ, and AEQ to get created */
+ count = 0;
+ do {
+ if (count++ > 1000) {
+ printk(KERN_ERR PFX "Error creating CCQ, CEQ, and AEQ\n");
+ pci_free_consistent(nesdev->pcidev, nesdev->cqp_mem_size,
+ nesdev->cqp_vbase, nesdev->cqp_pbase);
+ return -1;
+ }
+ udelay(10);
+ } while (((nes_read_indexed(nesdev,
+ NES_IDX_QP_CONTROL+(PCI_FUNC(nesdev->pcidev->devfn)*8)) & (15<<8)) != (15<<8)));
+
+ /* dump the QP status value */
+ nes_debug(NES_DBG_INIT, "QP Status = 0x%08X\n", nes_read_indexed(nesdev,
+ NES_IDX_QP_CONTROL+(PCI_FUNC(nesdev->pcidev->devfn)*8)));
+
+ nesdev->cqp.sq_tail++;
+
+ return 0;
+}
+
+
+/**
+ * nes_destroy_cqp
+ */
+int nes_destroy_cqp(struct nes_device *nesdev)
+{
+ struct nes_hw_cqp_wqe *cqp_wqe;
+ u32 count = 0;
+ u32 cqp_head;
+ unsigned long flags;
+
+ do {
+ if (count++ > 1000)
+ break;
+ udelay(10);
+ } while (!(nesdev->cqp.sq_head == nesdev->cqp.sq_tail));
+
+ /* Reset CCQ */
+ nes_write32(nesdev->regs+NES_CQE_ALLOC, NES_CQE_ALLOC_RESET |
+ nesdev->ccq.cq_number);
+
+ /* Disable device interrupts */
+ nes_write32(nesdev->regs+NES_INT_MASK, 0x7fffffff);
+
+ spin_lock_irqsave(&nesdev->cqp.lock, flags);
+
+ /* Destroy the AEQ */
+ cqp_head = nesdev->cqp.sq_head++;
+ nesdev->cqp.sq_head &= nesdev->cqp.sq_size-1;
+ cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
+ cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(NES_CQP_DESTROY_AEQ |
+ ((u32)PCI_FUNC(nesdev->pcidev->devfn) << 8));
+ cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_HIGH_IDX] = 0;
+
+ /* Destroy the NIC CEQ */
+ cqp_head = nesdev->cqp.sq_head++;
+ nesdev->cqp.sq_head &= nesdev->cqp.sq_size-1;
+ cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
+ cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(NES_CQP_DESTROY_CEQ |
+ ((u32)nesdev->nic_ceq_index << 8));
+
+ /* Destroy the CEQ */
+ cqp_head = nesdev->cqp.sq_head++;
+ nesdev->cqp.sq_head &= nesdev->cqp.sq_size-1;
+ cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
+ cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(NES_CQP_DESTROY_CEQ |
+ (nesdev->ceq_index << 8));
+
+ /* Destroy the CCQ */
+ cqp_head = nesdev->cqp.sq_head++;
+ nesdev->cqp.sq_head &= nesdev->cqp.sq_size-1;
+ cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
+ cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(NES_CQP_DESTROY_CQ);
+ cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32(nesdev->ccq.cq_number |
+ ((u32)nesdev->ceq_index << 16));
+
+ /* Destroy CQP */
+ cqp_head = nesdev->cqp.sq_head++;
+ nesdev->cqp.sq_head &= nesdev->cqp.sq_size-1;
+ cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
+ cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(NES_CQP_DESTROY_QP |
+ NES_CQP_QP_TYPE_CQP);
+ cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32(nesdev->cqp.qp_id);
+
+ barrier();
+ /* Ring doorbell (5 WQEs) */
+ nes_write32(nesdev->regs+NES_WQE_ALLOC, 0x05800000 | nesdev->cqp.qp_id);
+
+ spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
+
+ /* wait for the CCQ, CEQ, and AEQ to get destroyed */
+ count = 0;
+ do {
+ if (count++ > 1000) {
+ printk(KERN_ERR PFX "Function%d: Error destroying CCQ, CEQ, and AEQ\n",
+ PCI_FUNC(nesdev->pcidev->devfn));
+ break;
+ }
+ udelay(10);
+ } while (((nes_read_indexed(nesdev,
+ NES_IDX_QP_CONTROL + (PCI_FUNC(nesdev->pcidev->devfn)*8)) & (15 << 8)) != 0));
+
+ /* dump the QP status value */
+ nes_debug(NES_DBG_SHUTDOWN, "Function%d: QP Status = 0x%08X\n",
+ PCI_FUNC(nesdev->pcidev->devfn),
+ nes_read_indexed(nesdev,
+ NES_IDX_QP_CONTROL+(PCI_FUNC(nesdev->pcidev->devfn)*8)));
+
+ kfree(nesdev->nes_cqp_requests);
+
+ /* Free the control structures */
+ pci_free_consistent(nesdev->pcidev, nesdev->cqp_mem_size, nesdev->cqp.sq_vbase,
+ nesdev->cqp.sq_pbase);
+
+ return 0;
+}
+
+
+/**
+ * nes_init_phy
+ */
+int nes_init_phy(struct nes_device *nesdev)
+{
+ struct nes_adapter *nesadapter = nesdev->nesadapter;
+ u32 counter = 0;
+ u32 sds_common_control0;
+ u32 mac_index = nesdev->mac_index;
+ u32 tx_config = 0;
+ u16 phy_data;
+ u32 temp_phy_data = 0;
+ u32 temp_phy_data2 = 0;
+ u32 i = 0;
+
+ if ((nesadapter->OneG_Mode) &&
+ (nesadapter->phy_type[mac_index] != NES_PHY_TYPE_PUMA_1G)) {
+ nes_debug(NES_DBG_PHY, "1G PHY, mac_index = %d.\n", mac_index);
+ if (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_1G) {
+ printk(PFX "%s: Programming mdc config for 1G\n", __func__);
+ tx_config = nes_read_indexed(nesdev, NES_IDX_MAC_TX_CONFIG);
+ tx_config &= 0xFFFFFFE3;
+ tx_config |= 0x04;
+ nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONFIG, tx_config);
+ }
+
+ nes_read_1G_phy_reg(nesdev, 1, nesadapter->phy_index[mac_index], &phy_data);
+ nes_debug(NES_DBG_PHY, "Phy data from register 1 phy address %u = 0x%X.\n",
+ nesadapter->phy_index[mac_index], phy_data);
+ nes_write_1G_phy_reg(nesdev, 23, nesadapter->phy_index[mac_index], 0xb000);
+
+ /* Reset the PHY */
+ nes_write_1G_phy_reg(nesdev, 0, nesadapter->phy_index[mac_index], 0x8000);
+ udelay(100);
+ counter = 0;
+ do {
+ nes_read_1G_phy_reg(nesdev, 0, nesadapter->phy_index[mac_index], &phy_data);
+ nes_debug(NES_DBG_PHY, "Phy data from register 0 = 0x%X.\n", phy_data);
+ if (counter++ > 100) break;
+ } while (phy_data & 0x8000);
+
+ /* Setting no phy loopback */
+ phy_data &= 0xbfff;
+ phy_data |= 0x1140;
+ nes_write_1G_phy_reg(nesdev, 0, nesadapter->phy_index[mac_index], phy_data);
+ nes_read_1G_phy_reg(nesdev, 0, nesadapter->phy_index[mac_index], &phy_data);
+ nes_debug(NES_DBG_PHY, "Phy data from register 0 = 0x%X.\n", phy_data);
+
+ nes_read_1G_phy_reg(nesdev, 0x17, nesadapter->phy_index[mac_index], &phy_data);
+ nes_debug(NES_DBG_PHY, "Phy data from register 0x17 = 0x%X.\n", phy_data);
+
+ nes_read_1G_phy_reg(nesdev, 0x1e, nesadapter->phy_index[mac_index], &phy_data);
+ nes_debug(NES_DBG_PHY, "Phy data from register 0x1e = 0x%X.\n", phy_data);
+
+ /* Setting the interrupt mask */
+ nes_read_1G_phy_reg(nesdev, 0x19, nesadapter->phy_index[mac_index], &phy_data);
+ nes_debug(NES_DBG_PHY, "Phy data from register 0x19 = 0x%X.\n", phy_data);
+ nes_write_1G_phy_reg(nesdev, 0x19, nesadapter->phy_index[mac_index], 0xffee);
+
+ nes_read_1G_phy_reg(nesdev, 0x19, nesadapter->phy_index[mac_index], &phy_data);
+ nes_debug(NES_DBG_PHY, "Phy data from register 0x19 = 0x%X.\n", phy_data);
+
+ /* turning on flow control */
+ nes_read_1G_phy_reg(nesdev, 4, nesadapter->phy_index[mac_index], &phy_data);
+ nes_debug(NES_DBG_PHY, "Phy data from register 0x4 = 0x%X.\n", phy_data);
+ nes_write_1G_phy_reg(nesdev, 4, nesadapter->phy_index[mac_index],
+ (phy_data & ~(0x03E0)) | 0xc00);
+ /* nes_write_1G_phy_reg(nesdev, 4, nesadapter->phy_index[mac_index],
+ phy_data | 0xc00); */
+ nes_read_1G_phy_reg(nesdev, 4, nesadapter->phy_index[mac_index], &phy_data);
+ nes_debug(NES_DBG_PHY, "Phy data from register 0x4 = 0x%X.\n", phy_data);
+
+ nes_read_1G_phy_reg(nesdev, 9, nesadapter->phy_index[mac_index], &phy_data);
+ nes_debug(NES_DBG_PHY, "Phy data from register 0x9 = 0x%X.\n", phy_data);
+ /* Clear Half duplex */
+ nes_write_1G_phy_reg(nesdev, 9, nesadapter->phy_index[mac_index],
+ phy_data & ~(0x0100));
+ nes_read_1G_phy_reg(nesdev, 9, nesadapter->phy_index[mac_index], &phy_data);
+ nes_debug(NES_DBG_PHY, "Phy data from register 0x9 = 0x%X.\n", phy_data);
+
+ nes_read_1G_phy_reg(nesdev, 0, nesadapter->phy_index[mac_index], &phy_data);
+ nes_write_1G_phy_reg(nesdev, 0, nesadapter->phy_index[mac_index], phy_data | 0x0300);
+ } else {
+ if ((nesadapter->phy_type[mac_index] == NES_PHY_TYPE_IRIS) ||
+ (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_ARGUS)) {
+ /* setup 10G MDIO operation */
+ tx_config = nes_read_indexed(nesdev, NES_IDX_MAC_TX_CONFIG);
+ tx_config &= 0xFFFFFFE3;
+ tx_config |= 0x15;
+ nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONFIG, tx_config);
+ }
+ if ((nesadapter->phy_type[mac_index] == NES_PHY_TYPE_ARGUS)) {
+ nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0xd7ee);
+
+ temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
+ mdelay(10);
+ nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0xd7ee);
+ temp_phy_data2 = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
+
+ /*
+ * if firmware is already running (like from a
+ * driver un-load/load, don't do anything.
+ */
+ if (temp_phy_data == temp_phy_data2) {
+ /* configure QT2505 AMCC PHY */
+ nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0x0000, 0x8000);
+ nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xc300, 0x0000);
+ nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xc302, 0x0044);
+ nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xc318, 0x0052);
+ nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xc319, 0x0008);
+ nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xc31a, 0x0098);
+ nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0x0026, 0x0E00);
+ nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0x0027, 0x0001);
+ nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0x0028, 0xA528);
+
+ /*
+ * remove micro from reset; chip boots from ROM,
+ * uploads EEPROM f/w image, uC executes f/w
+ */
+ nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xc300, 0x0002);
+
+ /*
+ * wait for heart beat to start to
+ * know loading is done
+ */
+ counter = 0;
+ do {
+ nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0xd7ee);
+ temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
+ if (counter++ > 1000) {
+ nes_debug(NES_DBG_PHY, "AMCC PHY- breaking from heartbeat check <this is bad!!!> \n");
+ break;
+ }
+ mdelay(100);
+ nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0xd7ee);
+ temp_phy_data2 = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
+ } while ((temp_phy_data2 == temp_phy_data));
+
+ /*
+ * wait for tracking to start to know
+ * f/w is good to go
+ */
+ counter = 0;
+ do {
+ nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0xd7fd);
+ temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
+ if (counter++ > 1000) {
+ nes_debug(NES_DBG_PHY, "AMCC PHY- breaking from status check <this is bad!!!> \n");
+ break;
+ }
+ mdelay(1000);
+ /*
+ * nes_debug(NES_DBG_PHY, "AMCC PHY- phy_status not ready yet = 0x%02X\n",
+ * temp_phy_data);
+ */
+ } while (((temp_phy_data & 0xff) != 0x50) && ((temp_phy_data & 0xff) != 0x70));
+
+ /* set LOS Control invert RXLOSB_I_PADINV */
+ nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xd003, 0x0000);
+ /* set LOS Control to mask of RXLOSB_I */
+ nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xc314, 0x0042);
+ /* set LED1 to input mode (LED1 and LED2 share same LED) */
+ nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xd006, 0x0007);
+ /* set LED2 to RX link_status and activity */
+ nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xd007, 0x000A);
+ /* set LED3 to RX link_status */
+ nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xd008, 0x0009);
+
+ /*
+ * reset the res-calibration on t2
+ * serdes; ensures it is stable after
+ * the amcc phy is stable
+ */
+
+ sds_common_control0 = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0);
+ sds_common_control0 |= 0x1;
+ nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0, sds_common_control0);
+
+ /* release the res-calibration reset */
+ sds_common_control0 &= 0xfffffffe;
+ nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0, sds_common_control0);
+
+ i = 0;
+ while (((nes_read32(nesdev->regs + NES_SOFTWARE_RESET) & 0x00000040) != 0x00000040)
+ && (i++ < 5000)) {
+ /* mdelay(1); */
+ }
+
+ /*
+ * wait for link train done before moving on,
+ * or will get an interupt storm
+ */
+ counter = 0;
+ do {
+ temp_phy_data = nes_read_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 +
+ (0x200 * (nesdev->mac_index & 1)));
+ if (counter++ > 1000) {
+ nes_debug(NES_DBG_PHY, "AMCC PHY- breaking from link train wait <this is bad, link didnt train!!!>\n");
+ break;
+ }
+ mdelay(1);
+ } while (((temp_phy_data & 0x0f1f0000) != 0x0f0f0000));
+ }
+ }
+ }
+ return 0;
+}
+
+
+/**
+ * nes_replenish_nic_rq
+ */
+static void nes_replenish_nic_rq(struct nes_vnic *nesvnic)
+{
+ unsigned long flags;
+ dma_addr_t bus_address;
+ struct sk_buff *skb;
+ struct nes_hw_nic_rq_wqe *nic_rqe;
+ struct nes_hw_nic *nesnic;
+ struct nes_device *nesdev;
+ u32 rx_wqes_posted = 0;
+
+ nesnic = &nesvnic->nic;
+ nesdev = nesvnic->nesdev;
+ spin_lock_irqsave(&nesnic->rq_lock, flags);
+ if (nesnic->replenishing_rq !=0) {
+ if (((nesnic->rq_size-1) == atomic_read(&nesvnic->rx_skbs_needed)) &&
+ (atomic_read(&nesvnic->rx_skb_timer_running) == 0)) {
+ atomic_set(&nesvnic->rx_skb_timer_running, 1);
+ spin_unlock_irqrestore(&nesnic->rq_lock, flags);
+ nesvnic->rq_wqes_timer.expires = jiffies + (HZ/2); /* 1/2 second */
+ add_timer(&nesvnic->rq_wqes_timer);
+ } else
+ spin_unlock_irqrestore(&nesnic->rq_lock, flags);
+ return;
+ }
+ nesnic->replenishing_rq = 1;
+ spin_unlock_irqrestore(&nesnic->rq_lock, flags);
+ do {
+ skb = dev_alloc_skb(nesvnic->max_frame_size);
+ if (skb) {
+ skb->dev = nesvnic->netdev;
+
+ bus_address = pci_map_single(nesdev->pcidev,
+ skb->data, nesvnic->max_frame_size, PCI_DMA_FROMDEVICE);
+
+ nic_rqe = &nesnic->rq_vbase[nesvnic->nic.rq_head];
+ nic_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_1_0_IDX] =
+ cpu_to_le32(nesvnic->max_frame_size);
+ nic_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_3_2_IDX] = 0;
+ nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_LOW_IDX] =
+ cpu_to_le32((u32)bus_address);
+ nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_HIGH_IDX] =
+ cpu_to_le32((u32)((u64)bus_address >> 32));
+ nesnic->rx_skb[nesnic->rq_head] = skb;
+ nesnic->rq_head++;
+ nesnic->rq_head &= nesnic->rq_size - 1;
+ atomic_dec(&nesvnic->rx_skbs_needed);
+ barrier();
+ if (++rx_wqes_posted == 255) {
+ nes_write32(nesdev->regs+NES_WQE_ALLOC, (rx_wqes_posted << 24) | nesnic->qp_id);
+ rx_wqes_posted = 0;
+ }
+ } else {
+ spin_lock_irqsave(&nesnic->rq_lock, flags);
+ if (((nesnic->rq_size-1) == atomic_read(&nesvnic->rx_skbs_needed)) &&
+ (atomic_read(&nesvnic->rx_skb_timer_running) == 0)) {
+ atomic_set(&nesvnic->rx_skb_timer_running, 1);
+ spin_unlock_irqrestore(&nesnic->rq_lock, flags);
+ nesvnic->rq_wqes_timer.expires = jiffies + (HZ/2); /* 1/2 second */
+ add_timer(&nesvnic->rq_wqes_timer);
+ } else
+ spin_unlock_irqrestore(&nesnic->rq_lock, flags);
+ break;
+ }
+ } while (atomic_read(&nesvnic->rx_skbs_needed));
+ barrier();
+ if (rx_wqes_posted)
+ nes_write32(nesdev->regs+NES_WQE_ALLOC, (rx_wqes_posted << 24) | nesnic->qp_id);
+ nesnic->replenishing_rq = 0;
+}
+
+
+/**
+ * nes_rq_wqes_timeout
+ */
+static void nes_rq_wqes_timeout(unsigned long parm)
+{
+ struct nes_vnic *nesvnic = (struct nes_vnic *)parm;
+ printk("%s: Timer fired.\n", __func__);
+ atomic_set(&nesvnic->rx_skb_timer_running, 0);
+ if (atomic_read(&nesvnic->rx_skbs_needed))
+ nes_replenish_nic_rq(nesvnic);
+}
+
+
+static int nes_lro_get_skb_hdr(struct sk_buff *skb, void **iphdr,
+ void **tcph, u64 *hdr_flags, void *priv)
+{
+ unsigned int ip_len;
+ struct iphdr *iph;
+ skb_reset_network_header(skb);
+ iph = ip_hdr(skb);
+ if (iph->protocol != IPPROTO_TCP)
+ return -1;
+ ip_len = ip_hdrlen(skb);
+ skb_set_transport_header(skb, ip_len);
+ *tcph = tcp_hdr(skb);
+
+ *hdr_flags = LRO_IPV4 | LRO_TCP;
+ *iphdr = iph;
+ return 0;
+}
+
+
+/**
+ * nes_init_nic_qp
+ */
+int nes_init_nic_qp(struct nes_device *nesdev, struct net_device *netdev)
+{
+ struct nes_hw_cqp_wqe *cqp_wqe;
+ struct nes_hw_nic_sq_wqe *nic_sqe;
+ struct nes_hw_nic_qp_context *nic_context;
+ struct sk_buff *skb;
+ struct nes_hw_nic_rq_wqe *nic_rqe;
+ struct nes_vnic *nesvnic = netdev_priv(netdev);
+ unsigned long flags;
+ void *vmem;
+ dma_addr_t pmem;
+ u64 u64temp;
+ int ret;
+ u32 cqp_head;
+ u32 counter;
+ u32 wqe_count;
+ u8 jumbomode=0;
+
+ /* Allocate fragment, SQ, RQ, and CQ; Reuse CEQ based on the PCI function */
+ nesvnic->nic_mem_size = 256 +
+ (NES_NIC_WQ_SIZE * sizeof(struct nes_first_frag)) +
+ (NES_NIC_WQ_SIZE * sizeof(struct nes_hw_nic_sq_wqe)) +
+ (NES_NIC_WQ_SIZE * sizeof(struct nes_hw_nic_rq_wqe)) +
+ (NES_NIC_WQ_SIZE * 2 * sizeof(struct nes_hw_nic_cqe)) +
+ sizeof(struct nes_hw_nic_qp_context);
+
+ nesvnic->nic_vbase = pci_alloc_consistent(nesdev->pcidev, nesvnic->nic_mem_size,
+ &nesvnic->nic_pbase);
+ if (!nesvnic->nic_vbase) {
+ nes_debug(NES_DBG_INIT, "Unable to allocate memory for NIC host descriptor rings\n");
+ return -ENOMEM;
+ }
+ memset(nesvnic->nic_vbase, 0, nesvnic->nic_mem_size);
+ nes_debug(NES_DBG_INIT, "Allocated NIC QP structures at %p (phys = %016lX), size = %u.\n",
+ nesvnic->nic_vbase, (unsigned long)nesvnic->nic_pbase, nesvnic->nic_mem_size);
+
+ vmem = (void *)(((unsigned long)nesvnic->nic_vbase + (256 - 1)) &
+ ~(unsigned long)(256 - 1));
+ pmem = (dma_addr_t)(((unsigned long long)nesvnic->nic_pbase + (256 - 1)) &
+ ~(unsigned long long)(256 - 1));
+
+ /* Setup the first Fragment buffers */
+ nesvnic->nic.first_frag_vbase = vmem;
+
+ for (counter = 0; counter < NES_NIC_WQ_SIZE; counter++) {
+ nesvnic->nic.frag_paddr[counter] = pmem;
+ pmem += sizeof(struct nes_first_frag);
+ }
+
+ /* setup the SQ */
+ vmem += (NES_NIC_WQ_SIZE * sizeof(struct nes_first_frag));
+
+ nesvnic->nic.sq_vbase = (void *)vmem;
+ nesvnic->nic.sq_pbase = pmem;
+ nesvnic->nic.sq_head = 0;
+ nesvnic->nic.sq_tail = 0;
+ nesvnic->nic.sq_size = NES_NIC_WQ_SIZE;
+ for (counter = 0; counter < NES_NIC_WQ_SIZE; counter++) {
+ nic_sqe = &nesvnic->nic.sq_vbase[counter];
+ nic_sqe->wqe_words[NES_NIC_SQ_WQE_MISC_IDX] =
+ cpu_to_le32(NES_NIC_SQ_WQE_DISABLE_CHKSUM |
+ NES_NIC_SQ_WQE_COMPLETION);
+ nic_sqe->wqe_words[NES_NIC_SQ_WQE_LENGTH_0_TAG_IDX] =
+ cpu_to_le32((u32)NES_FIRST_FRAG_SIZE << 16);
+ nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_LOW_IDX] =
+ cpu_to_le32((u32)nesvnic->nic.frag_paddr[counter]);
+ nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_HIGH_IDX] =
+ cpu_to_le32((u32)((u64)nesvnic->nic.frag_paddr[counter] >> 32));
+ }
+
+ nesvnic->get_cqp_request = nes_get_cqp_request;
+ nesvnic->post_cqp_request = nes_post_cqp_request;
+ nesvnic->mcrq_mcast_filter = NULL;
+
+ spin_lock_init(&nesvnic->nic.sq_lock);
+ spin_lock_init(&nesvnic->nic.rq_lock);
+
+ /* setup the RQ */
+ vmem += (NES_NIC_WQ_SIZE * sizeof(struct nes_hw_nic_sq_wqe));
+ pmem += (NES_NIC_WQ_SIZE * sizeof(struct nes_hw_nic_sq_wqe));
+
+
+ nesvnic->nic.rq_vbase = vmem;
+ nesvnic->nic.rq_pbase = pmem;
+ nesvnic->nic.rq_head = 0;
+ nesvnic->nic.rq_tail = 0;
+ nesvnic->nic.rq_size = NES_NIC_WQ_SIZE;
+
+ /* setup the CQ */
+ vmem += (NES_NIC_WQ_SIZE * sizeof(struct nes_hw_nic_rq_wqe));
+ pmem += (NES_NIC_WQ_SIZE * sizeof(struct nes_hw_nic_rq_wqe));
+
+ if (nesdev->nesadapter->netdev_count > 2)
+ nesvnic->mcrq_qp_id = nesvnic->nic_index + 32;
+ else
+ nesvnic->mcrq_qp_id = nesvnic->nic.qp_id + 4;
+
+ nesvnic->nic_cq.cq_vbase = vmem;
+ nesvnic->nic_cq.cq_pbase = pmem;
+ nesvnic->nic_cq.cq_head = 0;
+ nesvnic->nic_cq.cq_size = NES_NIC_WQ_SIZE * 2;
+
+ nesvnic->nic_cq.ce_handler = nes_nic_napi_ce_handler;
+
+ /* Send CreateCQ request to CQP */
+ spin_lock_irqsave(&nesdev->cqp.lock, flags);
+ cqp_head = nesdev->cqp.sq_head;
+
+ cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
+ nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
+
+ cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(
+ NES_CQP_CREATE_CQ | NES_CQP_CQ_CEQ_VALID |
+ ((u32)nesvnic->nic_cq.cq_size << 16));
+ cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32(
+ nesvnic->nic_cq.cq_number | ((u32)nesdev->nic_ceq_index << 16));
+ u64temp = (u64)nesvnic->nic_cq.cq_pbase;
+ set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_CQ_WQE_PBL_LOW_IDX, u64temp);
+ cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_HIGH_IDX] = 0;
+ u64temp = (unsigned long)&nesvnic->nic_cq;
+ cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_LOW_IDX] = cpu_to_le32((u32)(u64temp >> 1));
+ cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_HIGH_IDX] =
+ cpu_to_le32(((u32)((u64temp) >> 33)) & 0x7FFFFFFF);
+ cqp_wqe->wqe_words[NES_CQP_CQ_WQE_DOORBELL_INDEX_HIGH_IDX] = 0;
+ if (++cqp_head >= nesdev->cqp.sq_size)
+ cqp_head = 0;
+ cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
+ nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
+
+ /* Send CreateQP request to CQP */
+ nic_context = (void *)(&nesvnic->nic_cq.cq_vbase[nesvnic->nic_cq.cq_size]);
+ nic_context->context_words[NES_NIC_CTX_MISC_IDX] =
+ cpu_to_le32((u32)NES_NIC_CTX_SIZE |
+ ((u32)PCI_FUNC(nesdev->pcidev->devfn) << 12));
+ nes_debug(NES_DBG_INIT, "RX_WINDOW_BUFFER_PAGE_TABLE_SIZE = 0x%08X, RX_WINDOW_BUFFER_SIZE = 0x%08X\n",
+ nes_read_indexed(nesdev, NES_IDX_RX_WINDOW_BUFFER_PAGE_TABLE_SIZE),
+ nes_read_indexed(nesdev, NES_IDX_RX_WINDOW_BUFFER_SIZE));
+ if (nes_read_indexed(nesdev, NES_IDX_RX_WINDOW_BUFFER_SIZE) != 0) {
+ nic_context->context_words[NES_NIC_CTX_MISC_IDX] |= cpu_to_le32(NES_NIC_BACK_STORE);
+ }
+
+ u64temp = (u64)nesvnic->nic.sq_pbase;
+ nic_context->context_words[NES_NIC_CTX_SQ_LOW_IDX] = cpu_to_le32((u32)u64temp);
+ nic_context->context_words[NES_NIC_CTX_SQ_HIGH_IDX] = cpu_to_le32((u32)(u64temp >> 32));
+ u64temp = (u64)nesvnic->nic.rq_pbase;
+ nic_context->context_words[NES_NIC_CTX_RQ_LOW_IDX] = cpu_to_le32((u32)u64temp);
+ nic_context->context_words[NES_NIC_CTX_RQ_HIGH_IDX] = cpu_to_le32((u32)(u64temp >> 32));
+
+ cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(NES_CQP_CREATE_QP |
+ NES_CQP_QP_TYPE_NIC);
+ cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32(nesvnic->nic.qp_id);
+ u64temp = (u64)nesvnic->nic_cq.cq_pbase +
+ (nesvnic->nic_cq.cq_size * sizeof(struct nes_hw_nic_cqe));
+ set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, u64temp);
+
+ if (++cqp_head >= nesdev->cqp.sq_size)
+ cqp_head = 0;
+ nesdev->cqp.sq_head = cqp_head;
+
+ barrier();
+
+ /* Ring doorbell (2 WQEs) */
+ nes_write32(nesdev->regs+NES_WQE_ALLOC, 0x02800000 | nesdev->cqp.qp_id);
+
+ spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
+ nes_debug(NES_DBG_INIT, "Waiting for create NIC QP%u to complete.\n",
+ nesvnic->nic.qp_id);
+
+ ret = wait_event_timeout(nesdev->cqp.waitq, (nesdev->cqp.sq_tail == cqp_head),
+ NES_EVENT_TIMEOUT);
+ nes_debug(NES_DBG_INIT, "Create NIC QP%u completed, wait_event_timeout ret = %u.\n",
+ nesvnic->nic.qp_id, ret);
+ if (!ret) {
+ nes_debug(NES_DBG_INIT, "NIC QP%u create timeout expired\n", nesvnic->nic.qp_id);
+ pci_free_consistent(nesdev->pcidev, nesvnic->nic_mem_size, nesvnic->nic_vbase,
+ nesvnic->nic_pbase);
+ return -EIO;
+ }
+
+ /* Populate the RQ */
+ for (counter = 0; counter < (NES_NIC_WQ_SIZE - 1); counter++) {
+ skb = dev_alloc_skb(nesvnic->max_frame_size);
+ if (!skb) {
+ nes_debug(NES_DBG_INIT, "%s: out of memory for receive skb\n", netdev->name);
+
+ nes_destroy_nic_qp(nesvnic);
+ return -ENOMEM;
+ }
+
+ skb->dev = netdev;
+
+ pmem = pci_map_single(nesdev->pcidev, skb->data,
+ nesvnic->max_frame_size, PCI_DMA_FROMDEVICE);
+
+ nic_rqe = &nesvnic->nic.rq_vbase[counter];
+ nic_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_1_0_IDX] = cpu_to_le32(nesvnic->max_frame_size);
+ nic_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_3_2_IDX] = 0;
+ nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_LOW_IDX] = cpu_to_le32((u32)pmem);
+ nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_HIGH_IDX] = cpu_to_le32((u32)((u64)pmem >> 32));
+ nesvnic->nic.rx_skb[counter] = skb;
+ }
+
+ wqe_count = NES_NIC_WQ_SIZE - 1;
+ nesvnic->nic.rq_head = wqe_count;
+ barrier();
+ do {
+ counter = min(wqe_count, ((u32)255));
+ wqe_count -= counter;
+ nes_write32(nesdev->regs+NES_WQE_ALLOC, (counter << 24) | nesvnic->nic.qp_id);
+ } while (wqe_count);
+ init_timer(&nesvnic->rq_wqes_timer);
+ nesvnic->rq_wqes_timer.function = nes_rq_wqes_timeout;
+ nesvnic->rq_wqes_timer.data = (unsigned long)nesvnic;
+ nes_debug(NES_DBG_INIT, "NAPI support Enabled\n");
+ if (nesdev->nesadapter->et_use_adaptive_rx_coalesce)
+ {
+ nes_nic_init_timer(nesdev);
+ if (netdev->mtu > 1500)
+ jumbomode = 1;
+ nes_nic_init_timer_defaults(nesdev, jumbomode);
+ }
+ nesvnic->lro_mgr.max_aggr = nes_lro_max_aggr;
+ nesvnic->lro_mgr.max_desc = NES_MAX_LRO_DESCRIPTORS;
+ nesvnic->lro_mgr.lro_arr = nesvnic->lro_desc;
+ nesvnic->lro_mgr.get_skb_header = nes_lro_get_skb_hdr;
+ nesvnic->lro_mgr.features = LRO_F_NAPI | LRO_F_EXTRACT_VLAN_ID;
+ nesvnic->lro_mgr.dev = netdev;
+ nesvnic->lro_mgr.ip_summed = CHECKSUM_UNNECESSARY;
+ nesvnic->lro_mgr.ip_summed_aggr = CHECKSUM_UNNECESSARY;
+ return 0;
+}
+
+
+/**
+ * nes_destroy_nic_qp
+ */
+void nes_destroy_nic_qp(struct nes_vnic *nesvnic)
+{
+ u64 u64temp;
+ dma_addr_t bus_address;
+ struct nes_device *nesdev = nesvnic->nesdev;
+ struct nes_hw_cqp_wqe *cqp_wqe;
+ struct nes_hw_nic_sq_wqe *nic_sqe;
+ struct nes_hw_nic_rq_wqe *nic_rqe;
+ __le16 *wqe_fragment_length;
+ u16 wqe_fragment_index;
+ u64 wqe_frag;
+ u32 cqp_head;
+ unsigned long flags;
+ int ret;
+
+ /* Free remaining NIC receive buffers */
+ while (nesvnic->nic.rq_head != nesvnic->nic.rq_tail) {
+ nic_rqe = &nesvnic->nic.rq_vbase[nesvnic->nic.rq_tail];
+ wqe_frag = (u64)le32_to_cpu(
+ nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_LOW_IDX]);
+ wqe_frag |= ((u64)le32_to_cpu(
+ nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_HIGH_IDX]))<<32;
+ pci_unmap_single(nesdev->pcidev, (dma_addr_t)wqe_frag,
+ nesvnic->max_frame_size, PCI_DMA_FROMDEVICE);
+ dev_kfree_skb(nesvnic->nic.rx_skb[nesvnic->nic.rq_tail++]);
+ nesvnic->nic.rq_tail &= (nesvnic->nic.rq_size - 1);
+ }
+
+ /* Free remaining NIC transmit buffers */
+ while (nesvnic->nic.sq_head != nesvnic->nic.sq_tail) {
+ nic_sqe = &nesvnic->nic.sq_vbase[nesvnic->nic.sq_tail];
+ wqe_fragment_index = 1;
+ wqe_fragment_length = (__le16 *)
+ &nic_sqe->wqe_words[NES_NIC_SQ_WQE_LENGTH_0_TAG_IDX];
+ /* bump past the vlan tag */
+ wqe_fragment_length++;
+ if (le16_to_cpu(wqe_fragment_length[wqe_fragment_index]) != 0) {
+ u64temp = (u64)le32_to_cpu(
+ nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_LOW_IDX+
+ wqe_fragment_index*2]);
+ u64temp += ((u64)le32_to_cpu(
+ nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_HIGH_IDX
+ + wqe_fragment_index*2]))<<32;
+ bus_address = (dma_addr_t)u64temp;
+ if (test_and_clear_bit(nesvnic->nic.sq_tail,
+ nesvnic->nic.first_frag_overflow)) {
+ pci_unmap_single(nesdev->pcidev,
+ bus_address,
+ le16_to_cpu(wqe_fragment_length[
+ wqe_fragment_index++]),
+ PCI_DMA_TODEVICE);
+ }
+ for (; wqe_fragment_index < 5; wqe_fragment_index++) {
+ if (wqe_fragment_length[wqe_fragment_index]) {
+ u64temp = le32_to_cpu(
+ nic_sqe->wqe_words[
+ NES_NIC_SQ_WQE_FRAG0_LOW_IDX+
+ wqe_fragment_index*2]);
+ u64temp += ((u64)le32_to_cpu(
+ nic_sqe->wqe_words[
+ NES_NIC_SQ_WQE_FRAG0_HIGH_IDX+
+ wqe_fragment_index*2]))<<32;
+ bus_address = (dma_addr_t)u64temp;
+ pci_unmap_page(nesdev->pcidev,
+ bus_address,
+ le16_to_cpu(
+ wqe_fragment_length[
+ wqe_fragment_index]),
+ PCI_DMA_TODEVICE);
+ } else
+ break;
+ }
+ }
+ if (nesvnic->nic.tx_skb[nesvnic->nic.sq_tail])
+ dev_kfree_skb(
+ nesvnic->nic.tx_skb[nesvnic->nic.sq_tail]);
+
+ nesvnic->nic.sq_tail = (++nesvnic->nic.sq_tail)
+ & (nesvnic->nic.sq_size - 1);
+ }
+
+ spin_lock_irqsave(&nesdev->cqp.lock, flags);
+
+ /* Destroy NIC QP */
+ cqp_head = nesdev->cqp.sq_head;
+ cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
+ nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
+
+ set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX,
+ (NES_CQP_DESTROY_QP | NES_CQP_QP_TYPE_NIC));
+ set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX,
+ nesvnic->nic.qp_id);
+
+ if (++cqp_head >= nesdev->cqp.sq_size)
+ cqp_head = 0;
+
+ cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
+
+ /* Destroy NIC CQ */
+ nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
+ set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX,
+ (NES_CQP_DESTROY_CQ | ((u32)nesvnic->nic_cq.cq_size << 16)));
+ set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX,
+ (nesvnic->nic_cq.cq_number | ((u32)nesdev->nic_ceq_index << 16)));
+
+ if (++cqp_head >= nesdev->cqp.sq_size)
+ cqp_head = 0;
+
+ nesdev->cqp.sq_head = cqp_head;
+ barrier();
+
+ /* Ring doorbell (2 WQEs) */
+ nes_write32(nesdev->regs+NES_WQE_ALLOC, 0x02800000 | nesdev->cqp.qp_id);
+
+ spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
+ nes_debug(NES_DBG_SHUTDOWN, "Waiting for CQP, cqp_head=%u, cqp.sq_head=%u,"
+ " cqp.sq_tail=%u, cqp.sq_size=%u\n",
+ cqp_head, nesdev->cqp.sq_head,
+ nesdev->cqp.sq_tail, nesdev->cqp.sq_size);
+
+ ret = wait_event_timeout(nesdev->cqp.waitq, (nesdev->cqp.sq_tail == cqp_head),
+ NES_EVENT_TIMEOUT);
+
+ nes_debug(NES_DBG_SHUTDOWN, "Destroy NIC QP returned, wait_event_timeout ret = %u, cqp_head=%u,"
+ " cqp.sq_head=%u, cqp.sq_tail=%u\n",
+ ret, cqp_head, nesdev->cqp.sq_head, nesdev->cqp.sq_tail);
+ if (!ret) {
+ nes_debug(NES_DBG_SHUTDOWN, "NIC QP%u destroy timeout expired\n",
+ nesvnic->nic.qp_id);
+ }
+
+ pci_free_consistent(nesdev->pcidev, nesvnic->nic_mem_size, nesvnic->nic_vbase,
+ nesvnic->nic_pbase);
+}
+
+/**
+ * nes_napi_isr
+ */
+int nes_napi_isr(struct nes_device *nesdev)
+{
+ struct nes_adapter *nesadapter = nesdev->nesadapter;
+ u32 int_stat;
+
+ if (nesdev->napi_isr_ran) {
+ /* interrupt status has already been read in ISR */
+ int_stat = nesdev->int_stat;
+ } else {
+ int_stat = nes_read32(nesdev->regs + NES_INT_STAT);
+ nesdev->int_stat = int_stat;
+ nesdev->napi_isr_ran = 1;
+ }
+
+ int_stat &= nesdev->int_req;
+ /* iff NIC, process here, else wait for DPC */
+ if ((int_stat) && ((int_stat & 0x0000ff00) == int_stat)) {
+ nesdev->napi_isr_ran = 0;
+ nes_write32(nesdev->regs + NES_INT_STAT,
+ (int_stat &
+ ~(NES_INT_INTF | NES_INT_TIMER | NES_INT_MAC0 | NES_INT_MAC1 | NES_INT_MAC2 | NES_INT_MAC3)));
+
+ /* Process the CEQs */
+ nes_process_ceq(nesdev, &nesdev->nesadapter->ceq[nesdev->nic_ceq_index]);
+
+ if (unlikely((((nesadapter->et_rx_coalesce_usecs_irq) &&
+ (!nesadapter->et_use_adaptive_rx_coalesce)) ||
+ ((nesadapter->et_use_adaptive_rx_coalesce) &&
+ (nesdev->deepcq_count > nesadapter->et_pkt_rate_low))))) {
+ if ((nesdev->int_req & NES_INT_TIMER) == 0) {
+ /* Enable Periodic timer interrupts */
+ nesdev->int_req |= NES_INT_TIMER;
+ /* ack any pending periodic timer interrupts so we don't get an immediate interrupt */
+ /* TODO: need to also ack other unused periodic timer values, get from nesadapter */
+ nes_write32(nesdev->regs+NES_TIMER_STAT,
+ nesdev->timer_int_req | ~(nesdev->nesadapter->timer_int_req));
+ nes_write32(nesdev->regs+NES_INTF_INT_MASK,
+ ~(nesdev->intf_int_req | NES_INTF_PERIODIC_TIMER));
+ }
+
+ if (unlikely(nesadapter->et_use_adaptive_rx_coalesce))
+ {
+ nes_nic_init_timer(nesdev);
+ }
+ /* Enable interrupts, except CEQs */
+ nes_write32(nesdev->regs+NES_INT_MASK, 0x0000ffff | (~nesdev->int_req));
+ } else {
+ /* Enable interrupts, make sure timer is off */
+ nesdev->int_req &= ~NES_INT_TIMER;
+ nes_write32(nesdev->regs+NES_INTF_INT_MASK, ~(nesdev->intf_int_req));
+ nes_write32(nesdev->regs+NES_INT_MASK, ~nesdev->int_req);
+ }
+ nesdev->deepcq_count = 0;
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+static void process_critical_error(struct nes_device *nesdev)
+{
+ u32 debug_error;
+ u32 nes_idx_debug_error_masks0 = 0;
+ u16 error_module = 0;
+
+ debug_error = nes_read_indexed(nesdev, NES_IDX_DEBUG_ERROR_CONTROL_STATUS);
+ printk(KERN_ERR PFX "Critical Error reported by device!!! 0x%02X\n",
+ (u16)debug_error);
+ nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_CONTROL_STATUS,
+ 0x01010000 | (debug_error & 0x0000ffff));
+ if (crit_err_count++ > 10)
+ nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_MASKS1, 1 << 0x17);
+ error_module = (u16) (debug_error & 0x1F00) >> 8;
+ if (++nesdev->nesadapter->crit_error_count[error_module-1] >=
+ nes_max_critical_error_count) {
+ printk(KERN_ERR PFX "Masking off critical error for module "
+ "0x%02X\n", (u16)error_module);
+ nes_idx_debug_error_masks0 = nes_read_indexed(nesdev,
+ NES_IDX_DEBUG_ERROR_MASKS0);
+ nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_MASKS0,
+ nes_idx_debug_error_masks0 | (1 << error_module));
+ }
+}
+/**
+ * nes_dpc
+ */
+void nes_dpc(unsigned long param)
+{
+ struct nes_device *nesdev = (struct nes_device *)param;
+ struct nes_adapter *nesadapter = nesdev->nesadapter;
+ u32 counter;
+ u32 loop_counter = 0;
+ u32 int_status_bit;
+ u32 int_stat;
+ u32 timer_stat;
+ u32 temp_int_stat;
+ u32 intf_int_stat;
+ u32 processed_intf_int = 0;
+ u16 processed_timer_int = 0;
+ u16 completion_ints = 0;
+ u16 timer_ints = 0;
+
+ /* nes_debug(NES_DBG_ISR, "\n"); */
+
+ do {
+ timer_stat = 0;
+ if (nesdev->napi_isr_ran) {
+ nesdev->napi_isr_ran = 0;
+ int_stat = nesdev->int_stat;
+ } else
+ int_stat = nes_read32(nesdev->regs+NES_INT_STAT);
+ if (processed_intf_int != 0)
+ int_stat &= nesdev->int_req & ~NES_INT_INTF;
+ else
+ int_stat &= nesdev->int_req;
+ if (processed_timer_int == 0) {
+ processed_timer_int = 1;
+ if (int_stat & NES_INT_TIMER) {
+ timer_stat = nes_read32(nesdev->regs + NES_TIMER_STAT);
+ if ((timer_stat & nesdev->timer_int_req) == 0) {
+ int_stat &= ~NES_INT_TIMER;
+ }
+ }
+ } else {
+ int_stat &= ~NES_INT_TIMER;
+ }
+
+ if (int_stat) {
+ if (int_stat & ~(NES_INT_INTF | NES_INT_TIMER | NES_INT_MAC0|
+ NES_INT_MAC1|NES_INT_MAC2 | NES_INT_MAC3)) {
+ /* Ack the interrupts */
+ nes_write32(nesdev->regs+NES_INT_STAT,
+ (int_stat & ~(NES_INT_INTF | NES_INT_TIMER | NES_INT_MAC0|
+ NES_INT_MAC1 | NES_INT_MAC2 | NES_INT_MAC3)));
+ }
+
+ temp_int_stat = int_stat;
+ for (counter = 0, int_status_bit = 1; counter < 16; counter++) {
+ if (int_stat & int_status_bit) {
+ nes_process_ceq(nesdev, &nesadapter->ceq[counter]);
+ temp_int_stat &= ~int_status_bit;
+ completion_ints = 1;
+ }
+ if (!(temp_int_stat & 0x0000ffff))
+ break;
+ int_status_bit <<= 1;
+ }
+
+ /* Process the AEQ for this pci function */
+ int_status_bit = 1 << (16 + PCI_FUNC(nesdev->pcidev->devfn));
+ if (int_stat & int_status_bit) {
+ nes_process_aeq(nesdev, &nesadapter->aeq[PCI_FUNC(nesdev->pcidev->devfn)]);
+ }
+
+ /* Process the MAC interrupt for this pci function */
+ int_status_bit = 1 << (24 + nesdev->mac_index);
+ if (int_stat & int_status_bit) {
+ nes_process_mac_intr(nesdev, nesdev->mac_index);
+ }
+
+ if (int_stat & NES_INT_TIMER) {
+ if (timer_stat & nesdev->timer_int_req) {
+ nes_write32(nesdev->regs + NES_TIMER_STAT,
+ (timer_stat & nesdev->timer_int_req) |
+ ~(nesdev->nesadapter->timer_int_req));
+ timer_ints = 1;
+ }
+ }
+
+ if (int_stat & NES_INT_INTF) {
+ processed_intf_int = 1;
+ intf_int_stat = nes_read32(nesdev->regs+NES_INTF_INT_STAT);
+ intf_int_stat &= nesdev->intf_int_req;
+ if (NES_INTF_INT_CRITERR & intf_int_stat) {
+ process_critical_error(nesdev);
+ }
+ if (NES_INTF_INT_PCIERR & intf_int_stat) {
+ printk(KERN_ERR PFX "PCI Error reported by device!!!\n");
+ BUG();
+ }
+ if (NES_INTF_INT_AEQ_OFLOW & intf_int_stat) {
+ printk(KERN_ERR PFX "AEQ Overflow reported by device!!!\n");
+ BUG();
+ }
+ nes_write32(nesdev->regs+NES_INTF_INT_STAT, intf_int_stat);
+ }
+
+ if (int_stat & NES_INT_TSW) {
+ }
+ }
+ /* Don't use the interface interrupt bit stay in loop */
+ int_stat &= ~NES_INT_INTF | NES_INT_TIMER | NES_INT_MAC0 |
+ NES_INT_MAC1 | NES_INT_MAC2 | NES_INT_MAC3;
+ } while ((int_stat != 0) && (loop_counter++ < MAX_DPC_ITERATIONS));
+
+ if (timer_ints == 1) {
+ if ((nesadapter->et_rx_coalesce_usecs_irq) || (nesadapter->et_use_adaptive_rx_coalesce)) {
+ if (completion_ints == 0) {
+ nesdev->timer_only_int_count++;
+ if (nesdev->timer_only_int_count>=nesadapter->timer_int_limit) {
+ nesdev->timer_only_int_count = 0;
+ nesdev->int_req &= ~NES_INT_TIMER;
+ nes_write32(nesdev->regs + NES_INTF_INT_MASK, ~(nesdev->intf_int_req));
+ nes_write32(nesdev->regs + NES_INT_MASK, ~nesdev->int_req);
+ } else {
+ nes_write32(nesdev->regs+NES_INT_MASK, 0x0000ffff | (~nesdev->int_req));
+ }
+ } else {
+ if (unlikely(nesadapter->et_use_adaptive_rx_coalesce))
+ {
+ nes_nic_init_timer(nesdev);
+ }
+ nesdev->timer_only_int_count = 0;
+ nes_write32(nesdev->regs+NES_INT_MASK, 0x0000ffff | (~nesdev->int_req));
+ }
+ } else {
+ nesdev->timer_only_int_count = 0;
+ nesdev->int_req &= ~NES_INT_TIMER;
+ nes_write32(nesdev->regs+NES_INTF_INT_MASK, ~(nesdev->intf_int_req));
+ nes_write32(nesdev->regs+NES_TIMER_STAT,
+ nesdev->timer_int_req | ~(nesdev->nesadapter->timer_int_req));
+ nes_write32(nesdev->regs+NES_INT_MASK, ~nesdev->int_req);
+ }
+ } else {
+ if ( (completion_ints == 1) &&
+ (((nesadapter->et_rx_coalesce_usecs_irq) &&
+ (!nesadapter->et_use_adaptive_rx_coalesce)) ||
+ ((nesdev->deepcq_count > nesadapter->et_pkt_rate_low) &&
+ (nesadapter->et_use_adaptive_rx_coalesce) )) ) {
+ /* nes_debug(NES_DBG_ISR, "Enabling periodic timer interrupt.\n" ); */
+ nesdev->timer_only_int_count = 0;
+ nesdev->int_req |= NES_INT_TIMER;
+ nes_write32(nesdev->regs+NES_TIMER_STAT,
+ nesdev->timer_int_req | ~(nesdev->nesadapter->timer_int_req));
+ nes_write32(nesdev->regs+NES_INTF_INT_MASK,
+ ~(nesdev->intf_int_req | NES_INTF_PERIODIC_TIMER));
+ nes_write32(nesdev->regs+NES_INT_MASK, 0x0000ffff | (~nesdev->int_req));
+ } else {
+ nes_write32(nesdev->regs+NES_INT_MASK, ~nesdev->int_req);
+ }
+ }
+ nesdev->deepcq_count = 0;
+}
+
+
+/**
+ * nes_process_ceq
+ */
+static void nes_process_ceq(struct nes_device *nesdev, struct nes_hw_ceq *ceq)
+{
+ u64 u64temp;
+ struct nes_hw_cq *cq;
+ u32 head;
+ u32 ceq_size;
+
+ /* nes_debug(NES_DBG_CQ, "\n"); */
+ head = ceq->ceq_head;
+ ceq_size = ceq->ceq_size;
+
+ do {
+ if (le32_to_cpu(ceq->ceq_vbase[head].ceqe_words[NES_CEQE_CQ_CTX_HIGH_IDX]) &
+ NES_CEQE_VALID) {
+ u64temp = (((u64)(le32_to_cpu(ceq->ceq_vbase[head].ceqe_words[NES_CEQE_CQ_CTX_HIGH_IDX]))) << 32) |
+ ((u64)(le32_to_cpu(ceq->ceq_vbase[head].ceqe_words[NES_CEQE_CQ_CTX_LOW_IDX])));
+ u64temp <<= 1;
+ cq = *((struct nes_hw_cq **)&u64temp);
+ /* nes_debug(NES_DBG_CQ, "pCQ = %p\n", cq); */
+ barrier();
+ ceq->ceq_vbase[head].ceqe_words[NES_CEQE_CQ_CTX_HIGH_IDX] = 0;
+
+ /* call the event handler */
+ cq->ce_handler(nesdev, cq);
+
+ if (++head >= ceq_size)
+ head = 0;
+ } else {
+ break;
+ }
+
+ } while (1);
+
+ ceq->ceq_head = head;
+}
+
+
+/**
+ * nes_process_aeq
+ */
+static void nes_process_aeq(struct nes_device *nesdev, struct nes_hw_aeq *aeq)
+{
+ /* u64 u64temp; */
+ u32 head;
+ u32 aeq_size;
+ u32 aeqe_misc;
+ u32 aeqe_cq_id;
+ struct nes_hw_aeqe volatile *aeqe;
+
+ head = aeq->aeq_head;
+ aeq_size = aeq->aeq_size;
+
+ do {
+ aeqe = &aeq->aeq_vbase[head];
+ if ((le32_to_cpu(aeqe->aeqe_words[NES_AEQE_MISC_IDX]) & NES_AEQE_VALID) == 0)
+ break;
+ aeqe_misc = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_MISC_IDX]);
+ aeqe_cq_id = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]);
+ if (aeqe_misc & (NES_AEQE_QP|NES_AEQE_CQ)) {
+ if (aeqe_cq_id >= NES_FIRST_QPN) {
+ /* dealing with an accelerated QP related AE */
+ /*
+ * u64temp = (((u64)(le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_HIGH_IDX]))) << 32) |
+ * ((u64)(le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_LOW_IDX])));
+ */
+ nes_process_iwarp_aeqe(nesdev, (struct nes_hw_aeqe *)aeqe);
+ } else {
+ /* TODO: dealing with a CQP related AE */
+ nes_debug(NES_DBG_AEQ, "Processing CQP related AE, misc = 0x%04X\n",
+ (u16)(aeqe_misc >> 16));
+ }
+ }
+
+ aeqe->aeqe_words[NES_AEQE_MISC_IDX] = 0;
+
+ if (++head >= aeq_size)
+ head = 0;
+ }
+ while (1);
+ aeq->aeq_head = head;
+}
+
+static void nes_reset_link(struct nes_device *nesdev, u32 mac_index)
+{
+ struct nes_adapter *nesadapter = nesdev->nesadapter;
+ u32 reset_value;
+ u32 i=0;
+ u32 u32temp;
+
+ if (nesadapter->hw_rev == NE020_REV) {
+ return;
+ }
+ mh_detected++;
+
+ reset_value = nes_read32(nesdev->regs+NES_SOFTWARE_RESET);
+
+ if ((mac_index == 0) || ((mac_index == 1) && (nesadapter->OneG_Mode)))
+ reset_value |= 0x0000001d;
+ else
+ reset_value |= 0x0000002d;
+
+ if (4 <= (nesadapter->link_interrupt_count[mac_index] / ((u16)NES_MAX_LINK_INTERRUPTS))) {
+ if ((!nesadapter->OneG_Mode) && (nesadapter->port_count == 2)) {
+ nesadapter->link_interrupt_count[0] = 0;
+ nesadapter->link_interrupt_count[1] = 0;
+ u32temp = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1);
+ if (0x00000040 & u32temp)
+ nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1, 0x0000F088);
+ else
+ nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1, 0x0000F0C8);
+
+ reset_value |= 0x0000003d;
+ }
+ nesadapter->link_interrupt_count[mac_index] = 0;
+ }
+
+ nes_write32(nesdev->regs+NES_SOFTWARE_RESET, reset_value);
+
+ while (((nes_read32(nesdev->regs+NES_SOFTWARE_RESET)
+ & 0x00000040) != 0x00000040) && (i++ < 5000));
+
+ if (0x0000003d == (reset_value & 0x0000003d)) {
+ u32 pcs_control_status0, pcs_control_status1;
+
+ for (i = 0; i < 10; i++) {
+ pcs_control_status0 = nes_read_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0);
+ pcs_control_status1 = nes_read_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 + 0x200);
+ if (((0x0F000000 == (pcs_control_status0 & 0x0F000000))
+ && (pcs_control_status0 & 0x00100000))
+ || ((0x0F000000 == (pcs_control_status1 & 0x0F000000))
+ && (pcs_control_status1 & 0x00100000)))
+ continue;
+ else
+ break;
+ }
+ if (10 == i) {
+ u32temp = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1);
+ if (0x00000040 & u32temp)
+ nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1, 0x0000F088);
+ else
+ nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1, 0x0000F0C8);
+
+ nes_write32(nesdev->regs+NES_SOFTWARE_RESET, reset_value);
+
+ while (((nes_read32(nesdev->regs + NES_SOFTWARE_RESET)
+ & 0x00000040) != 0x00000040) && (i++ < 5000));
+ }
+ }
+}
+
+/**
+ * nes_process_mac_intr
+ */
+static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number)
+{
+ unsigned long flags;
+ u32 pcs_control_status;
+ struct nes_adapter *nesadapter = nesdev->nesadapter;
+ struct nes_vnic *nesvnic;
+ u32 mac_status;
+ u32 mac_index = nesdev->mac_index;
+ u32 u32temp;
+ u16 phy_data;
+ u16 temp_phy_data;
+ u32 pcs_val = 0x0f0f0000;
+ u32 pcs_mask = 0x0f1f0000;
+
+ spin_lock_irqsave(&nesadapter->phy_lock, flags);
+ if (nesadapter->mac_sw_state[mac_number] != NES_MAC_SW_IDLE) {
+ spin_unlock_irqrestore(&nesadapter->phy_lock, flags);
+ return;
+ }
+ nesadapter->mac_sw_state[mac_number] = NES_MAC_SW_INTERRUPT;
+ spin_unlock_irqrestore(&nesadapter->phy_lock, flags);
+
+ /* ack the MAC interrupt */
+ mac_status = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS + (mac_index * 0x200));
+ /* Clear the interrupt */
+ nes_write_indexed(nesdev, NES_IDX_MAC_INT_STATUS + (mac_index * 0x200), mac_status);
+
+ nes_debug(NES_DBG_PHY, "MAC%u interrupt status = 0x%X.\n", mac_number, mac_status);
+
+ if (mac_status & (NES_MAC_INT_LINK_STAT_CHG | NES_MAC_INT_XGMII_EXT)) {
+ nesdev->link_status_interrupts++;
+ if (0 == (++nesadapter->link_interrupt_count[mac_index] % ((u16)NES_MAX_LINK_INTERRUPTS))) {
+ spin_lock_irqsave(&nesadapter->phy_lock, flags);
+ nes_reset_link(nesdev, mac_index);
+ spin_unlock_irqrestore(&nesadapter->phy_lock, flags);
+ }
+ /* read the PHY interrupt status register */
+ if ((nesadapter->OneG_Mode) &&
+ (nesadapter->phy_type[mac_index] != NES_PHY_TYPE_PUMA_1G)) {
+ do {
+ nes_read_1G_phy_reg(nesdev, 0x1a,
+ nesadapter->phy_index[mac_index], &phy_data);
+ nes_debug(NES_DBG_PHY, "Phy%d data from register 0x1a = 0x%X.\n",
+ nesadapter->phy_index[mac_index], phy_data);
+ } while (phy_data&0x8000);
+
+ temp_phy_data = 0;
+ do {
+ nes_read_1G_phy_reg(nesdev, 0x11,
+ nesadapter->phy_index[mac_index], &phy_data);
+ nes_debug(NES_DBG_PHY, "Phy%d data from register 0x11 = 0x%X.\n",
+ nesadapter->phy_index[mac_index], phy_data);
+ if (temp_phy_data == phy_data)
+ break;
+ temp_phy_data = phy_data;
+ } while (1);
+
+ nes_read_1G_phy_reg(nesdev, 0x1e,
+ nesadapter->phy_index[mac_index], &phy_data);
+ nes_debug(NES_DBG_PHY, "Phy%d data from register 0x1e = 0x%X.\n",
+ nesadapter->phy_index[mac_index], phy_data);
+
+ nes_read_1G_phy_reg(nesdev, 1,
+ nesadapter->phy_index[mac_index], &phy_data);
+ nes_debug(NES_DBG_PHY, "1G phy%u data from register 1 = 0x%X\n",
+ nesadapter->phy_index[mac_index], phy_data);
+
+ if (temp_phy_data & 0x1000) {
+ nes_debug(NES_DBG_PHY, "The Link is up according to the PHY\n");
+ phy_data = 4;
+ } else {
+ nes_debug(NES_DBG_PHY, "The Link is down according to the PHY\n");
+ }
+ }
+ nes_debug(NES_DBG_PHY, "Eth SERDES Common Status: 0=0x%08X, 1=0x%08X\n",
+ nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS0),
+ nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS0+0x200));
+
+ if (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_PUMA_1G) {
+ switch (mac_index) {
+ case 1:
+ case 3:
+ pcs_control_status = nes_read_indexed(nesdev,
+ NES_IDX_PHY_PCS_CONTROL_STATUS0 + 0x200);
+ break;
+ default:
+ pcs_control_status = nes_read_indexed(nesdev,
+ NES_IDX_PHY_PCS_CONTROL_STATUS0);
+ break;
+ }
+ } else {
+ pcs_control_status = nes_read_indexed(nesdev,
+ NES_IDX_PHY_PCS_CONTROL_STATUS0 + ((mac_index & 1) * 0x200));
+ pcs_control_status = nes_read_indexed(nesdev,
+ NES_IDX_PHY_PCS_CONTROL_STATUS0 + ((mac_index & 1) * 0x200));
+ }
+
+ nes_debug(NES_DBG_PHY, "PCS PHY Control/Status%u: 0x%08X\n",
+ mac_index, pcs_control_status);
+ if ((nesadapter->OneG_Mode) &&
+ (nesadapter->phy_type[mac_index] != NES_PHY_TYPE_PUMA_1G)) {
+ u32temp = 0x01010000;
+ if (nesadapter->port_count > 2) {
+ u32temp |= 0x02020000;
+ }
+ if ((pcs_control_status & u32temp)!= u32temp) {
+ phy_data = 0;
+ nes_debug(NES_DBG_PHY, "PCS says the link is down\n");
+ }
+ } else {
+ switch (nesadapter->phy_type[mac_index]) {
+ case NES_PHY_TYPE_IRIS:
+ nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 1);
+ temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
+ u32temp = 20;
+ do {
+ nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 1);
+ phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
+ if ((phy_data == temp_phy_data) || (!(--u32temp)))
+ break;
+ temp_phy_data = phy_data;
+ } while (1);
+ nes_debug(NES_DBG_PHY, "%s: Phy data = 0x%04X, link was %s.\n",
+ __func__, phy_data, nesadapter->mac_link_down[mac_index] ? "DOWN" : "UP");
+ break;
+
+ case NES_PHY_TYPE_ARGUS:
+ /* clear the alarms */
+ nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 4, 0x0008);
+ nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 4, 0xc001);
+ nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 4, 0xc002);
+ nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 4, 0xc005);
+ nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 4, 0xc006);
+ nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 0x9003);
+ nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 0x9004);
+ nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 0x9005);
+ /* check link status */
+ nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 1);
+ temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
+ u32temp = 100;
+ do {
+ nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 1);
+
+ phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
+ if ((phy_data == temp_phy_data) || (!(--u32temp)))
+ break;
+ temp_phy_data = phy_data;
+ } while (1);
+ nes_debug(NES_DBG_PHY, "%s: Phy data = 0x%04X, link was %s.\n",
+ __func__, phy_data, nesadapter->mac_link_down ? "DOWN" : "UP");
+ break;
+
+ case NES_PHY_TYPE_PUMA_1G:
+ if (mac_index < 2)
+ pcs_val = pcs_mask = 0x01010000;
+ else
+ pcs_val = pcs_mask = 0x02020000;
+ /* fall through */
+ default:
+ phy_data = (pcs_val == (pcs_control_status & pcs_mask)) ? 0x4 : 0x0;
+ break;
+ }
+ }
+
+ if (phy_data & 0x0004) {
+ nesadapter->mac_link_down[mac_index] = 0;
+ list_for_each_entry(nesvnic, &nesadapter->nesvnic_list[mac_index], list) {
+ nes_debug(NES_DBG_PHY, "The Link is UP!!. linkup was %d\n",
+ nesvnic->linkup);
+ if (nesvnic->linkup == 0) {
+ printk(PFX "The Link is now up for port %s, netdev %p.\n",
+ nesvnic->netdev->name, nesvnic->netdev);
+ if (netif_queue_stopped(nesvnic->netdev))
+ netif_start_queue(nesvnic->netdev);
+ nesvnic->linkup = 1;
+ netif_carrier_on(nesvnic->netdev);
+ }
+ }
+ } else {
+ nesadapter->mac_link_down[mac_index] = 1;
+ list_for_each_entry(nesvnic, &nesadapter->nesvnic_list[mac_index], list) {
+ nes_debug(NES_DBG_PHY, "The Link is Down!!. linkup was %d\n",
+ nesvnic->linkup);
+ if (nesvnic->linkup == 1) {
+ printk(PFX "The Link is now down for port %s, netdev %p.\n",
+ nesvnic->netdev->name, nesvnic->netdev);
+ if (!(netif_queue_stopped(nesvnic->netdev)))
+ netif_stop_queue(nesvnic->netdev);
+ nesvnic->linkup = 0;
+ netif_carrier_off(nesvnic->netdev);
+ }
+ }
+ }
+ }
+
+ nesadapter->mac_sw_state[mac_number] = NES_MAC_SW_IDLE;
+}
+
+
+
+static void nes_nic_napi_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq)
+{
+ struct nes_vnic *nesvnic = container_of(cq, struct nes_vnic, nic_cq);
+
+ netif_rx_schedule(nesdev->netdev[nesvnic->netdev_index], &nesvnic->napi);
+}
+
+
+/* The MAX_RQES_TO_PROCESS defines how many max read requests to complete before
+* getting out of nic_ce_handler
+*/
+#define MAX_RQES_TO_PROCESS 384
+
+/**
+ * nes_nic_ce_handler
+ */
+void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq)
+{
+ u64 u64temp;
+ dma_addr_t bus_address;
+ struct nes_hw_nic *nesnic;
+ struct nes_vnic *nesvnic = container_of(cq, struct nes_vnic, nic_cq);
+ struct nes_adapter *nesadapter = nesdev->nesadapter;
+ struct nes_hw_nic_rq_wqe *nic_rqe;
+ struct nes_hw_nic_sq_wqe *nic_sqe;
+ struct sk_buff *skb;
+ struct sk_buff *rx_skb;
+ __le16 *wqe_fragment_length;
+ u32 head;
+ u32 cq_size;
+ u32 rx_pkt_size;
+ u32 cqe_count=0;
+ u32 cqe_errv;
+ u32 cqe_misc;
+ u16 wqe_fragment_index = 1; /* first fragment (0) is used by copy buffer */
+ u16 vlan_tag;
+ u16 pkt_type;
+ u16 rqes_processed = 0;
+ u8 sq_cqes = 0;
+ u8 nes_use_lro = 0;
+
+ head = cq->cq_head;
+ cq_size = cq->cq_size;
+ cq->cqes_pending = 1;
+ if (nesvnic->netdev->features & NETIF_F_LRO)
+ nes_use_lro = 1;
+ do {
+ if (le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_NIC_CQE_MISC_IDX]) &
+ NES_NIC_CQE_VALID) {
+ nesnic = &nesvnic->nic;
+ cqe_misc = le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_NIC_CQE_MISC_IDX]);
+ if (cqe_misc & NES_NIC_CQE_SQ) {
+ sq_cqes++;
+ wqe_fragment_index = 1;
+ nic_sqe = &nesnic->sq_vbase[nesnic->sq_tail];
+ skb = nesnic->tx_skb[nesnic->sq_tail];
+ wqe_fragment_length = (__le16 *)&nic_sqe->wqe_words[NES_NIC_SQ_WQE_LENGTH_0_TAG_IDX];
+ /* bump past the vlan tag */
+ wqe_fragment_length++;
+ if (le16_to_cpu(wqe_fragment_length[wqe_fragment_index]) != 0) {
+ u64temp = (u64) le32_to_cpu(nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_LOW_IDX +
+ wqe_fragment_index * 2]);
+ u64temp += ((u64)le32_to_cpu(nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_HIGH_IDX +
+ wqe_fragment_index * 2])) << 32;
+ bus_address = (dma_addr_t)u64temp;
+ if (test_and_clear_bit(nesnic->sq_tail, nesnic->first_frag_overflow)) {
+ pci_unmap_single(nesdev->pcidev,
+ bus_address,
+ le16_to_cpu(wqe_fragment_length[wqe_fragment_index++]),
+ PCI_DMA_TODEVICE);
+ }
+ for (; wqe_fragment_index < 5; wqe_fragment_index++) {
+ if (wqe_fragment_length[wqe_fragment_index]) {
+ u64temp = le32_to_cpu(nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_LOW_IDX +
+ wqe_fragment_index * 2]);
+ u64temp += ((u64)le32_to_cpu(nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_HIGH_IDX
+ + wqe_fragment_index * 2])) <<32;
+ bus_address = (dma_addr_t)u64temp;
+ pci_unmap_page(nesdev->pcidev,
+ bus_address,
+ le16_to_cpu(wqe_fragment_length[wqe_fragment_index]),
+ PCI_DMA_TODEVICE);
+ } else
+ break;
+ }
+ if (skb)
+ dev_kfree_skb_any(skb);
+ }
+ nesnic->sq_tail++;
+ nesnic->sq_tail &= nesnic->sq_size-1;
+ if (sq_cqes > 128) {
+ barrier();
+ /* restart the queue if it had been stopped */
+ if (netif_queue_stopped(nesvnic->netdev))
+ netif_wake_queue(nesvnic->netdev);
+ sq_cqes = 0;
+ }
+ } else {
+ rqes_processed ++;
+
+ cq->rx_cqes_completed++;
+ cq->rx_pkts_indicated++;
+ rx_pkt_size = cqe_misc & 0x0000ffff;
+ nic_rqe = &nesnic->rq_vbase[nesnic->rq_tail];
+ /* Get the skb */
+ rx_skb = nesnic->rx_skb[nesnic->rq_tail];
+ nic_rqe = &nesnic->rq_vbase[nesvnic->nic.rq_tail];
+ bus_address = (dma_addr_t)le32_to_cpu(nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_LOW_IDX]);
+ bus_address += ((u64)le32_to_cpu(nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_HIGH_IDX])) << 32;
+ pci_unmap_single(nesdev->pcidev, bus_address,
+ nesvnic->max_frame_size, PCI_DMA_FROMDEVICE);
+ /* rx_skb->tail = rx_skb->data + rx_pkt_size; */
+ /* rx_skb->len = rx_pkt_size; */
+ rx_skb->len = 0; /* TODO: see if this is necessary */
+ skb_put(rx_skb, rx_pkt_size);
+ rx_skb->protocol = eth_type_trans(rx_skb, nesvnic->netdev);
+ nesnic->rq_tail++;
+ nesnic->rq_tail &= nesnic->rq_size - 1;
+
+ atomic_inc(&nesvnic->rx_skbs_needed);
+ if (atomic_read(&nesvnic->rx_skbs_needed) > (nesvnic->nic.rq_size>>1)) {
+ nes_write32(nesdev->regs+NES_CQE_ALLOC,
+ cq->cq_number | (cqe_count << 16));
+ /* nesadapter->tune_timer.cq_count += cqe_count; */
+ nesdev->currcq_count += cqe_count;
+ cqe_count = 0;
+ nes_replenish_nic_rq(nesvnic);
+ }
+ pkt_type = (u16)(le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_NIC_CQE_TAG_PKT_TYPE_IDX]));
+ cqe_errv = (cqe_misc & NES_NIC_CQE_ERRV_MASK) >> NES_NIC_CQE_ERRV_SHIFT;
+ rx_skb->ip_summed = CHECKSUM_NONE;
+
+ if ((NES_PKT_TYPE_TCPV4_BITS == (pkt_type & NES_PKT_TYPE_TCPV4_MASK)) ||
+ (NES_PKT_TYPE_UDPV4_BITS == (pkt_type & NES_PKT_TYPE_UDPV4_MASK))) {
+ if ((cqe_errv &
+ (NES_NIC_ERRV_BITS_IPV4_CSUM_ERR | NES_NIC_ERRV_BITS_TCPUDP_CSUM_ERR |
+ NES_NIC_ERRV_BITS_IPH_ERR | NES_NIC_ERRV_BITS_WQE_OVERRUN)) == 0) {
+ if (nesvnic->rx_checksum_disabled == 0) {
+ rx_skb->ip_summed = CHECKSUM_UNNECESSARY;
+ }
+ } else
+ nes_debug(NES_DBG_CQ, "%s: unsuccessfully checksummed TCP or UDP packet."
+ " errv = 0x%X, pkt_type = 0x%X.\n",
+ nesvnic->netdev->name, cqe_errv, pkt_type);
+
+ } else if ((pkt_type & NES_PKT_TYPE_IPV4_MASK) == NES_PKT_TYPE_IPV4_BITS) {
+ if ((cqe_errv &
+ (NES_NIC_ERRV_BITS_IPV4_CSUM_ERR | NES_NIC_ERRV_BITS_IPH_ERR |
+ NES_NIC_ERRV_BITS_WQE_OVERRUN)) == 0) {
+ if (nesvnic->rx_checksum_disabled == 0) {
+ rx_skb->ip_summed = CHECKSUM_UNNECESSARY;
+ /* nes_debug(NES_DBG_CQ, "%s: Reporting successfully checksummed IPv4 packet.\n",
+ nesvnic->netdev->name); */
+ }
+ } else
+ nes_debug(NES_DBG_CQ, "%s: unsuccessfully checksummed TCP or UDP packet."
+ " errv = 0x%X, pkt_type = 0x%X.\n",
+ nesvnic->netdev->name, cqe_errv, pkt_type);
+ }
+ /* nes_debug(NES_DBG_CQ, "pkt_type=%x, APBVT_MASK=%x\n",
+ pkt_type, (pkt_type & NES_PKT_TYPE_APBVT_MASK)); */
+
+ if ((pkt_type & NES_PKT_TYPE_APBVT_MASK) == NES_PKT_TYPE_APBVT_BITS) {
+ nes_cm_recv(rx_skb, nesvnic->netdev);
+ } else {
+ if ((cqe_misc & NES_NIC_CQE_TAG_VALID) && (nesvnic->vlan_grp != NULL)) {
+ vlan_tag = (u16)(le32_to_cpu(
+ cq->cq_vbase[head].cqe_words[NES_NIC_CQE_TAG_PKT_TYPE_IDX])
+ >> 16);
+ nes_debug(NES_DBG_CQ, "%s: Reporting stripped VLAN packet. Tag = 0x%04X\n",
+ nesvnic->netdev->name, vlan_tag);
+ if (nes_use_lro)
+ lro_vlan_hwaccel_receive_skb(&nesvnic->lro_mgr, rx_skb,
+ nesvnic->vlan_grp, vlan_tag, NULL);
+ else
+ nes_vlan_rx(rx_skb, nesvnic->vlan_grp, vlan_tag);
+ } else {
+ if (nes_use_lro)
+ lro_receive_skb(&nesvnic->lro_mgr, rx_skb, NULL);
+ else
+ nes_netif_rx(rx_skb);
+ }
+ }
+
+ nesvnic->netdev->last_rx = jiffies;
+ /* nesvnic->netstats.rx_packets++; */
+ /* nesvnic->netstats.rx_bytes += rx_pkt_size; */
+ }
+
+ cq->cq_vbase[head].cqe_words[NES_NIC_CQE_MISC_IDX] = 0;
+ /* Accounting... */
+ cqe_count++;
+ if (++head >= cq_size)
+ head = 0;
+ if (cqe_count == 255) {
+ /* Replenish Nic CQ */
+ nes_write32(nesdev->regs+NES_CQE_ALLOC,
+ cq->cq_number | (cqe_count << 16));
+ /* nesdev->nesadapter->tune_timer.cq_count += cqe_count; */
+ nesdev->currcq_count += cqe_count;
+ cqe_count = 0;
+ }
+
+ if (cq->rx_cqes_completed >= nesvnic->budget)
+ break;
+ } else {
+ cq->cqes_pending = 0;
+ break;
+ }
+
+ } while (1);
+
+ if (nes_use_lro)
+ lro_flush_all(&nesvnic->lro_mgr);
+ if (sq_cqes) {
+ barrier();
+ /* restart the queue if it had been stopped */
+ if (netif_queue_stopped(nesvnic->netdev))
+ netif_wake_queue(nesvnic->netdev);
+ }
+ cq->cq_head = head;
+ /* nes_debug(NES_DBG_CQ, "CQ%u Processed = %u cqes, new head = %u.\n",
+ cq->cq_number, cqe_count, cq->cq_head); */
+ cq->cqe_allocs_pending = cqe_count;
+ if (unlikely(nesadapter->et_use_adaptive_rx_coalesce))
+ {
+ /* nesdev->nesadapter->tune_timer.cq_count += cqe_count; */
+ nesdev->currcq_count += cqe_count;
+ nes_nic_tune_timer(nesdev);
+ }
+ if (atomic_read(&nesvnic->rx_skbs_needed))
+ nes_replenish_nic_rq(nesvnic);
+}
+
+
+/**
+ * nes_cqp_ce_handler
+ */
+static void nes_cqp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *cq)
+{
+ u64 u64temp;
+ unsigned long flags;
+ struct nes_hw_cqp *cqp = NULL;
+ struct nes_cqp_request *cqp_request;
+ struct nes_hw_cqp_wqe *cqp_wqe;
+ u32 head;
+ u32 cq_size;
+ u32 cqe_count=0;
+ u32 error_code;
+ /* u32 counter; */
+
+ head = cq->cq_head;
+ cq_size = cq->cq_size;
+
+ do {
+ /* process the CQE */
+ /* nes_debug(NES_DBG_CQP, "head=%u cqe_words=%08X\n", head,
+ le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX])); */
+
+ if (le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX]) & NES_CQE_VALID) {
+ u64temp = (((u64)(le32_to_cpu(cq->cq_vbase[head].
+ cqe_words[NES_CQE_COMP_COMP_CTX_HIGH_IDX]))) << 32) |
+ ((u64)(le32_to_cpu(cq->cq_vbase[head].
+ cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX])));
+ cqp = *((struct nes_hw_cqp **)&u64temp);
+
+ error_code = le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_CQE_ERROR_CODE_IDX]);
+ if (error_code) {
+ nes_debug(NES_DBG_CQP, "Bad Completion code for opcode 0x%02X from CQP,"
+ " Major/Minor codes = 0x%04X:%04X.\n",
+ le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX])&0x3f,
+ (u16)(error_code >> 16),
+ (u16)error_code);
+ nes_debug(NES_DBG_CQP, "cqp: qp_id=%u, sq_head=%u, sq_tail=%u\n",
+ cqp->qp_id, cqp->sq_head, cqp->sq_tail);
+ }
+
+ u64temp = (((u64)(le32_to_cpu(nesdev->cqp.sq_vbase[cqp->sq_tail].
+ wqe_words[NES_CQP_WQE_COMP_SCRATCH_HIGH_IDX]))) << 32) |
+ ((u64)(le32_to_cpu(nesdev->cqp.sq_vbase[cqp->sq_tail].
+ wqe_words[NES_CQP_WQE_COMP_SCRATCH_LOW_IDX])));
+ cqp_request = *((struct nes_cqp_request **)&u64temp);
+ if (cqp_request) {
+ if (cqp_request->waiting) {
+ /* nes_debug(NES_DBG_CQP, "%s: Waking up requestor\n"); */
+ cqp_request->major_code = (u16)(error_code >> 16);
+ cqp_request->minor_code = (u16)error_code;
+ barrier();
+ cqp_request->request_done = 1;
+ wake_up(&cqp_request->waitq);
+ nes_put_cqp_request(nesdev, cqp_request);
+ } else {
+ if (cqp_request->callback)
+ cqp_request->cqp_callback(nesdev, cqp_request);
+ nes_free_cqp_request(nesdev, cqp_request);
+ }
+ } else {
+ wake_up(&nesdev->cqp.waitq);
+ }
+
+ cq->cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX] = 0;
+ nes_write32(nesdev->regs + NES_CQE_ALLOC, cq->cq_number | (1 << 16));
+ if (++cqp->sq_tail >= cqp->sq_size)
+ cqp->sq_tail = 0;
+
+ /* Accounting... */
+ cqe_count++;
+ if (++head >= cq_size)
+ head = 0;
+ } else {
+ break;
+ }
+ } while (1);
+ cq->cq_head = head;
+
+ spin_lock_irqsave(&nesdev->cqp.lock, flags);
+ while ((!list_empty(&nesdev->cqp_pending_reqs)) &&
+ ((((nesdev->cqp.sq_tail+nesdev->cqp.sq_size)-nesdev->cqp.sq_head) &
+ (nesdev->cqp.sq_size - 1)) != 1)) {
+ cqp_request = list_entry(nesdev->cqp_pending_reqs.next,
+ struct nes_cqp_request, list);
+ list_del_init(&cqp_request->list);
+ head = nesdev->cqp.sq_head++;
+ nesdev->cqp.sq_head &= nesdev->cqp.sq_size-1;
+ cqp_wqe = &nesdev->cqp.sq_vbase[head];
+ memcpy(cqp_wqe, &cqp_request->cqp_wqe, sizeof(*cqp_wqe));
+ barrier();
+ cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_LOW_IDX] =
+ cpu_to_le32((u32)((unsigned long)cqp_request));
+ cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_HIGH_IDX] =
+ cpu_to_le32((u32)(upper_32_bits((unsigned long)cqp_request)));
+ nes_debug(NES_DBG_CQP, "CQP request %p (opcode 0x%02X) put on CQPs SQ wqe%u.\n",
+ cqp_request, le32_to_cpu(cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX])&0x3f, head);
+ /* Ring doorbell (1 WQEs) */
+ barrier();
+ nes_write32(nesdev->regs+NES_WQE_ALLOC, 0x01800000 | nesdev->cqp.qp_id);
+ }
+ spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
+
+ /* Arm the CCQ */
+ nes_write32(nesdev->regs+NES_CQE_ALLOC, NES_CQE_ALLOC_NOTIFY_NEXT |
+ cq->cq_number);
+ nes_read32(nesdev->regs+NES_CQE_ALLOC);
+}
+
+
+/**
+ * nes_process_iwarp_aeqe
+ */
+static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
+ struct nes_hw_aeqe *aeqe)
+{
+ u64 context;
+ u64 aeqe_context = 0;
+ unsigned long flags;
+ struct nes_qp *nesqp;
+ int resource_allocated;
+ /* struct iw_cm_id *cm_id; */
+ struct nes_adapter *nesadapter = nesdev->nesadapter;
+ struct ib_event ibevent;
+ /* struct iw_cm_event cm_event; */
+ u32 aeq_info;
+ u32 next_iwarp_state = 0;
+ u16 async_event_id;
+ u8 tcp_state;
+ u8 iwarp_state;
+
+ nes_debug(NES_DBG_AEQ, "\n");
+ aeq_info = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_MISC_IDX]);
+ if ((NES_AEQE_INBOUND_RDMA&aeq_info) || (!(NES_AEQE_QP&aeq_info))) {
+ context = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_LOW_IDX]);
+ context += ((u64)le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_HIGH_IDX])) << 32;
+ } else {
+ aeqe_context = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_LOW_IDX]);
+ aeqe_context += ((u64)le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_HIGH_IDX])) << 32;
+ context = (unsigned long)nesadapter->qp_table[le32_to_cpu(
+ aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]) - NES_FIRST_QPN];
+ BUG_ON(!context);
+ }
+
+ async_event_id = (u16)aeq_info;
+ tcp_state = (aeq_info & NES_AEQE_TCP_STATE_MASK) >> NES_AEQE_TCP_STATE_SHIFT;
+ iwarp_state = (aeq_info & NES_AEQE_IWARP_STATE_MASK) >> NES_AEQE_IWARP_STATE_SHIFT;
+ nes_debug(NES_DBG_AEQ, "aeid = 0x%04X, qp-cq id = %d, aeqe = %p,"
+ " Tcp state = %s, iWARP state = %s\n",
+ async_event_id,
+ le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]), aeqe,
+ nes_tcp_state_str[tcp_state], nes_iwarp_state_str[iwarp_state]);
+
+ switch (async_event_id) {
+ case NES_AEQE_AEID_LLP_FIN_RECEIVED:
+ nesqp = *((struct nes_qp **)&context);
+ if (atomic_inc_return(&nesqp->close_timer_started) == 1) {
+ nesqp->cm_id->add_ref(nesqp->cm_id);
+ schedule_nes_timer(nesqp->cm_node, (struct sk_buff *)nesqp,
+ NES_TIMER_TYPE_CLOSE, 1, 0);
+ nes_debug(NES_DBG_AEQ, "QP%u Not decrementing QP refcount (%d),"
+ " need ae to finish up, original_last_aeq = 0x%04X."
+ " last_aeq = 0x%04X, scheduling timer. TCP state = %d\n",
+ nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount),
+ async_event_id, nesqp->last_aeq, tcp_state);
+ }
+ if ((tcp_state != NES_AEQE_TCP_STATE_CLOSE_WAIT) ||
+ (nesqp->ibqp_state != IB_QPS_RTS)) {
+ /* FIN Received but tcp state or IB state moved on,
+ should expect a close complete */
+ return;
+ }
+ case NES_AEQE_AEID_LLP_CLOSE_COMPLETE:
+ case NES_AEQE_AEID_LLP_CONNECTION_RESET:
+ case NES_AEQE_AEID_TERMINATE_SENT:
+ case NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE:
+ case NES_AEQE_AEID_RESET_SENT:
+ nesqp = *((struct nes_qp **)&context);
+ if (async_event_id == NES_AEQE_AEID_RESET_SENT) {
+ tcp_state = NES_AEQE_TCP_STATE_CLOSED;
+ }
+ spin_lock_irqsave(&nesqp->lock, flags);
+ nesqp->hw_iwarp_state = iwarp_state;
+ nesqp->hw_tcp_state = tcp_state;
+ nesqp->last_aeq = async_event_id;
+
+ if ((tcp_state == NES_AEQE_TCP_STATE_CLOSED) ||
+ (tcp_state == NES_AEQE_TCP_STATE_TIME_WAIT)) {
+ nesqp->hte_added = 0;
+ spin_unlock_irqrestore(&nesqp->lock, flags);
+ nes_debug(NES_DBG_AEQ, "issuing hw modifyqp for QP%u to remove hte\n",
+ nesqp->hwqp.qp_id);
+ nes_hw_modify_qp(nesdev, nesqp,
+ NES_CQP_QP_IWARP_STATE_ERROR | NES_CQP_QP_DEL_HTE, 0);
+ spin_lock_irqsave(&nesqp->lock, flags);
+ }
+
+ if ((nesqp->ibqp_state == IB_QPS_RTS) &&
+ ((tcp_state == NES_AEQE_TCP_STATE_CLOSE_WAIT) ||
+ (async_event_id == NES_AEQE_AEID_LLP_CONNECTION_RESET))) {
+ switch (nesqp->hw_iwarp_state) {
+ case NES_AEQE_IWARP_STATE_RTS:
+ next_iwarp_state = NES_CQP_QP_IWARP_STATE_CLOSING;
+ nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_CLOSING;
+ break;
+ case NES_AEQE_IWARP_STATE_TERMINATE:
+ next_iwarp_state = NES_CQP_QP_IWARP_STATE_TERMINATE;
+ nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_TERMINATE;
+ if (async_event_id == NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE) {
+ next_iwarp_state |= 0x02000000;
+ nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED;
+ }
+ break;
+ default:
+ next_iwarp_state = 0;
+ }
+ spin_unlock_irqrestore(&nesqp->lock, flags);
+ if (next_iwarp_state) {
+ nes_debug(NES_DBG_AEQ, "issuing hw modifyqp for QP%u. next state = 0x%08X,"
+ " also added another reference\n",
+ nesqp->hwqp.qp_id, next_iwarp_state);
+ nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 0);
+ }
+ nes_cm_disconn(nesqp);
+ } else {
+ if (async_event_id == NES_AEQE_AEID_LLP_FIN_RECEIVED) {
+ /* FIN Received but ib state not RTS,
+ close complete will be on its way */
+ spin_unlock_irqrestore(&nesqp->lock, flags);
+ return;
+ }
+ spin_unlock_irqrestore(&nesqp->lock, flags);
+ if (async_event_id == NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE) {
+ next_iwarp_state = NES_CQP_QP_IWARP_STATE_TERMINATE | 0x02000000;
+ nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED;
+ nes_debug(NES_DBG_AEQ, "issuing hw modifyqp for QP%u. next state = 0x%08X,"
+ " also added another reference\n",
+ nesqp->hwqp.qp_id, next_iwarp_state);
+ nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 0);
+ }
+ nes_cm_disconn(nesqp);
+ }
+ break;
+ case NES_AEQE_AEID_LLP_TERMINATE_RECEIVED:
+ nesqp = *((struct nes_qp **)&context);
+ spin_lock_irqsave(&nesqp->lock, flags);
+ nesqp->hw_iwarp_state = iwarp_state;
+ nesqp->hw_tcp_state = tcp_state;
+ nesqp->last_aeq = async_event_id;
+ spin_unlock_irqrestore(&nesqp->lock, flags);
+ nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_LLP_TERMINATE_RECEIVED"
+ " event on QP%u \n Q2 Data:\n",
+ nesqp->hwqp.qp_id);
+ if (nesqp->ibqp.event_handler) {
+ ibevent.device = nesqp->ibqp.device;
+ ibevent.element.qp = &nesqp->ibqp;
+ ibevent.event = IB_EVENT_QP_FATAL;
+ nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context);
+ }
+ if ((tcp_state == NES_AEQE_TCP_STATE_CLOSE_WAIT) ||
+ ((nesqp->ibqp_state == IB_QPS_RTS)&&
+ (async_event_id == NES_AEQE_AEID_LLP_CONNECTION_RESET))) {
+ nes_cm_disconn(nesqp);
+ } else {
+ nesqp->in_disconnect = 0;
+ wake_up(&nesqp->kick_waitq);
+ }
+ break;
+ case NES_AEQE_AEID_LLP_TOO_MANY_RETRIES:
+ nesqp = *((struct nes_qp **)&context);
+ spin_lock_irqsave(&nesqp->lock, flags);
+ nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_ERROR;
+ nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED;
+ nesqp->last_aeq = async_event_id;
+ if (nesqp->cm_id) {
+ nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_LLP_TOO_MANY_RETRIES"
+ " event on QP%u, remote IP = 0x%08X \n",
+ nesqp->hwqp.qp_id,
+ ntohl(nesqp->cm_id->remote_addr.sin_addr.s_addr));
+ } else {
+ nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_LLP_TOO_MANY_RETRIES"
+ " event on QP%u \n",
+ nesqp->hwqp.qp_id);
+ }
+ spin_unlock_irqrestore(&nesqp->lock, flags);
+ next_iwarp_state = NES_CQP_QP_IWARP_STATE_ERROR | NES_CQP_QP_RESET;
+ nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 0);
+ if (nesqp->ibqp.event_handler) {
+ ibevent.device = nesqp->ibqp.device;
+ ibevent.element.qp = &nesqp->ibqp;
+ ibevent.event = IB_EVENT_QP_FATAL;
+ nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context);
+ }
+ break;
+ case NES_AEQE_AEID_AMP_BAD_STAG_INDEX:
+ if (NES_AEQE_INBOUND_RDMA&aeq_info) {
+ nesqp = nesadapter->qp_table[le32_to_cpu(
+ aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX])-NES_FIRST_QPN];
+ } else {
+ /* TODO: get the actual WQE and mask off wqe index */
+ context &= ~((u64)511);
+ nesqp = *((struct nes_qp **)&context);
+ }
+ spin_lock_irqsave(&nesqp->lock, flags);
+ nesqp->hw_iwarp_state = iwarp_state;
+ nesqp->hw_tcp_state = tcp_state;
+ nesqp->last_aeq = async_event_id;
+ spin_unlock_irqrestore(&nesqp->lock, flags);
+ nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_AMP_BAD_STAG_INDEX event on QP%u\n",
+ nesqp->hwqp.qp_id);
+ if (nesqp->ibqp.event_handler) {
+ ibevent.device = nesqp->ibqp.device;
+ ibevent.element.qp = &nesqp->ibqp;
+ ibevent.event = IB_EVENT_QP_ACCESS_ERR;
+ nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context);
+ }
+ break;
+ case NES_AEQE_AEID_AMP_UNALLOCATED_STAG:
+ nesqp = *((struct nes_qp **)&context);
+ spin_lock_irqsave(&nesqp->lock, flags);
+ nesqp->hw_iwarp_state = iwarp_state;
+ nesqp->hw_tcp_state = tcp_state;
+ nesqp->last_aeq = async_event_id;
+ spin_unlock_irqrestore(&nesqp->lock, flags);
+ nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_AMP_UNALLOCATED_STAG event on QP%u\n",
+ nesqp->hwqp.qp_id);
+ if (nesqp->ibqp.event_handler) {
+ ibevent.device = nesqp->ibqp.device;
+ ibevent.element.qp = &nesqp->ibqp;
+ ibevent.event = IB_EVENT_QP_ACCESS_ERR;
+ nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context);
+ }
+ break;
+ case NES_AEQE_AEID_PRIV_OPERATION_DENIED:
+ nesqp = nesadapter->qp_table[le32_to_cpu(aeqe->aeqe_words
+ [NES_AEQE_COMP_QP_CQ_ID_IDX])-NES_FIRST_QPN];
+ spin_lock_irqsave(&nesqp->lock, flags);
+ nesqp->hw_iwarp_state = iwarp_state;
+ nesqp->hw_tcp_state = tcp_state;
+ nesqp->last_aeq = async_event_id;
+ spin_unlock_irqrestore(&nesqp->lock, flags);
+ nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_PRIV_OPERATION_DENIED event on QP%u,"
+ " nesqp = %p, AE reported %p\n",
+ nesqp->hwqp.qp_id, nesqp, *((struct nes_qp **)&context));
+ if (nesqp->ibqp.event_handler) {
+ ibevent.device = nesqp->ibqp.device;
+ ibevent.element.qp = &nesqp->ibqp;
+ ibevent.event = IB_EVENT_QP_ACCESS_ERR;
+ nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context);
+ }
+ break;
+ case NES_AEQE_AEID_CQ_OPERATION_ERROR:
+ context <<= 1;
+ nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_CQ_OPERATION_ERROR event on CQ%u, %p\n",
+ le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]), (void *)(unsigned long)context);
+ resource_allocated = nes_is_resource_allocated(nesadapter, nesadapter->allocated_cqs,
+ le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]));
+ if (resource_allocated) {
+ printk(KERN_ERR PFX "%s: Processing an NES_AEQE_AEID_CQ_OPERATION_ERROR event on CQ%u\n",
+ __func__, le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]));
+ }
+ break;
+ case NES_AEQE_AEID_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER:
+ nesqp = nesadapter->qp_table[le32_to_cpu(
+ aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX])-NES_FIRST_QPN];
+ spin_lock_irqsave(&nesqp->lock, flags);
+ nesqp->hw_iwarp_state = iwarp_state;
+ nesqp->hw_tcp_state = tcp_state;
+ nesqp->last_aeq = async_event_id;
+ spin_unlock_irqrestore(&nesqp->lock, flags);
+ nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_DDP_UBE_DDP_MESSAGE_TOO_LONG"
+ "_FOR_AVAILABLE_BUFFER event on QP%u\n",
+ nesqp->hwqp.qp_id);
+ if (nesqp->ibqp.event_handler) {
+ ibevent.device = nesqp->ibqp.device;
+ ibevent.element.qp = &nesqp->ibqp;
+ ibevent.event = IB_EVENT_QP_ACCESS_ERR;
+ nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context);
+ }
+ /* tell cm to disconnect, cm will queue work to thread */
+ nes_cm_disconn(nesqp);
+ break;
+ case NES_AEQE_AEID_DDP_UBE_INVALID_MSN_NO_BUFFER_AVAILABLE:
+ nesqp = *((struct nes_qp **)&context);
+ spin_lock_irqsave(&nesqp->lock, flags);
+ nesqp->hw_iwarp_state = iwarp_state;
+ nesqp->hw_tcp_state = tcp_state;
+ nesqp->last_aeq = async_event_id;
+ spin_unlock_irqrestore(&nesqp->lock, flags);
+ nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_DDP_UBE_INVALID_MSN"
+ "_NO_BUFFER_AVAILABLE event on QP%u\n",
+ nesqp->hwqp.qp_id);
+ if (nesqp->ibqp.event_handler) {
+ ibevent.device = nesqp->ibqp.device;
+ ibevent.element.qp = &nesqp->ibqp;
+ ibevent.event = IB_EVENT_QP_FATAL;
+ nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context);
+ }
+ /* tell cm to disconnect, cm will queue work to thread */
+ nes_cm_disconn(nesqp);
+ break;
+ case NES_AEQE_AEID_LLP_RECEIVED_MPA_CRC_ERROR:
+ nesqp = *((struct nes_qp **)&context);
+ spin_lock_irqsave(&nesqp->lock, flags);
+ nesqp->hw_iwarp_state = iwarp_state;
+ nesqp->hw_tcp_state = tcp_state;
+ nesqp->last_aeq = async_event_id;
+ spin_unlock_irqrestore(&nesqp->lock, flags);
+ nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_LLP_RECEIVED_MPA_CRC_ERROR"
+ " event on QP%u \n Q2 Data:\n",
+ nesqp->hwqp.qp_id);
+ if (nesqp->ibqp.event_handler) {
+ ibevent.device = nesqp->ibqp.device;
+ ibevent.element.qp = &nesqp->ibqp;
+ ibevent.event = IB_EVENT_QP_FATAL;
+ nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context);
+ }
+ /* tell cm to disconnect, cm will queue work to thread */
+ nes_cm_disconn(nesqp);
+ break;
+ /* TODO: additional AEs need to be here */
+ case NES_AEQE_AEID_AMP_BOUNDS_VIOLATION:
+ nesqp = *((struct nes_qp **)&context);
+ spin_lock_irqsave(&nesqp->lock, flags);
+ nesqp->hw_iwarp_state = iwarp_state;
+ nesqp->hw_tcp_state = tcp_state;
+ nesqp->last_aeq = async_event_id;
+ spin_unlock_irqrestore(&nesqp->lock, flags);
+ if (nesqp->ibqp.event_handler) {
+ ibevent.device = nesqp->ibqp.device;
+ ibevent.element.qp = &nesqp->ibqp;
+ ibevent.event = IB_EVENT_QP_ACCESS_ERR;
+ nesqp->ibqp.event_handler(&ibevent,
+ nesqp->ibqp.qp_context);
+ }
+ nes_cm_disconn(nesqp);
+ break;
+ default:
+ nes_debug(NES_DBG_AEQ, "Processing an iWARP related AE for QP, misc = 0x%04X\n",
+ async_event_id);
+ break;
+ }
+
+}
+
+
+/**
+ * nes_iwarp_ce_handler
+ */
+void nes_iwarp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *hw_cq)
+{
+ struct nes_cq *nescq = container_of(hw_cq, struct nes_cq, hw_cq);
+
+ /* nes_debug(NES_DBG_CQ, "Processing completion event for iWARP CQ%u.\n",
+ nescq->hw_cq.cq_number); */
+ nes_write32(nesdev->regs+NES_CQ_ACK, nescq->hw_cq.cq_number);
+
+ if (nescq->ibcq.comp_handler)
+ nescq->ibcq.comp_handler(&nescq->ibcq, nescq->ibcq.cq_context);
+
+ return;
+}
+
+
+/**
+ * nes_manage_apbvt()
+ */
+int nes_manage_apbvt(struct nes_vnic *nesvnic, u32 accel_local_port,
+ u32 nic_index, u32 add_port)
+{
+ struct nes_device *nesdev = nesvnic->nesdev;
+ struct nes_hw_cqp_wqe *cqp_wqe;
+ struct nes_cqp_request *cqp_request;
+ int ret = 0;
+ u16 major_code;
+
+ /* Send manage APBVT request to CQP */
+ cqp_request = nes_get_cqp_request(nesdev);
+ if (cqp_request == NULL) {
+ nes_debug(NES_DBG_QP, "Failed to get a cqp_request.\n");
+ return -ENOMEM;
+ }
+ cqp_request->waiting = 1;
+ cqp_wqe = &cqp_request->cqp_wqe;
+
+ nes_debug(NES_DBG_QP, "%s APBV for local port=%u(0x%04x), nic_index=%u\n",
+ (add_port == NES_MANAGE_APBVT_ADD) ? "ADD" : "DEL",
+ accel_local_port, accel_local_port, nic_index);
+
+ nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
+ set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, (NES_CQP_MANAGE_APBVT |
+ ((add_port == NES_MANAGE_APBVT_ADD) ? NES_CQP_APBVT_ADD : 0)));
+ set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX,
+ ((nic_index << NES_CQP_APBVT_NIC_SHIFT) | accel_local_port));
+
+ nes_debug(NES_DBG_QP, "Waiting for CQP completion for APBVT.\n");
+
+ atomic_set(&cqp_request->refcount, 2);
+ nes_post_cqp_request(nesdev, cqp_request);
+
+ if (add_port == NES_MANAGE_APBVT_ADD)
+ ret = wait_event_timeout(cqp_request->waitq, (cqp_request->request_done != 0),
+ NES_EVENT_TIMEOUT);
+ nes_debug(NES_DBG_QP, "Completed, ret=%u, CQP Major:Minor codes = 0x%04X:0x%04X\n",
+ ret, cqp_request->major_code, cqp_request->minor_code);
+ major_code = cqp_request->major_code;
+
+ nes_put_cqp_request(nesdev, cqp_request);
+
+ if (!ret)
+ return -ETIME;
+ else if (major_code)
+ return -EIO;
+ else
+ return 0;
+}
+
+
+/**
+ * nes_manage_arp_cache
+ */
+void nes_manage_arp_cache(struct net_device *netdev, unsigned char *mac_addr,
+ u32 ip_addr, u32 action)
+{
+ struct nes_hw_cqp_wqe *cqp_wqe;
+ struct nes_vnic *nesvnic = netdev_priv(netdev);
+ struct nes_device *nesdev;
+ struct nes_cqp_request *cqp_request;
+ int arp_index;
+
+ nesdev = nesvnic->nesdev;
+ arp_index = nes_arp_table(nesdev, ip_addr, mac_addr, action);
+ if (arp_index == -1) {
+ return;
+ }
+
+ /* update the ARP entry */
+ cqp_request = nes_get_cqp_request(nesdev);
+ if (cqp_request == NULL) {
+ nes_debug(NES_DBG_NETDEV, "Failed to get a cqp_request.\n");
+ return;
+ }
+ cqp_request->waiting = 0;
+ cqp_wqe = &cqp_request->cqp_wqe;
+ nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
+
+ cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(
+ NES_CQP_MANAGE_ARP_CACHE | NES_CQP_ARP_PERM);
+ cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] |= cpu_to_le32(
+ (u32)PCI_FUNC(nesdev->pcidev->devfn) << NES_CQP_ARP_AEQ_INDEX_SHIFT);
+ cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32(arp_index);
+
+ if (action == NES_ARP_ADD) {
+ cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] |= cpu_to_le32(NES_CQP_ARP_VALID);
+ cqp_wqe->wqe_words[NES_CQP_ARP_WQE_MAC_ADDR_LOW_IDX] = cpu_to_le32(
+ (((u32)mac_addr[2]) << 24) | (((u32)mac_addr[3]) << 16) |
+ (((u32)mac_addr[4]) << 8) | (u32)mac_addr[5]);
+ cqp_wqe->wqe_words[NES_CQP_ARP_WQE_MAC_HIGH_IDX] = cpu_to_le32(
+ (((u32)mac_addr[0]) << 16) | (u32)mac_addr[1]);
+ } else {
+ cqp_wqe->wqe_words[NES_CQP_ARP_WQE_MAC_ADDR_LOW_IDX] = 0;
+ cqp_wqe->wqe_words[NES_CQP_ARP_WQE_MAC_HIGH_IDX] = 0;
+ }
+
+ nes_debug(NES_DBG_NETDEV, "Not waiting for CQP, cqp.sq_head=%u, cqp.sq_tail=%u\n",
+ nesdev->cqp.sq_head, nesdev->cqp.sq_tail);
+
+ atomic_set(&cqp_request->refcount, 1);
+ nes_post_cqp_request(nesdev, cqp_request);
+}
+
+
+/**
+ * flush_wqes
+ */
+void flush_wqes(struct nes_device *nesdev, struct nes_qp *nesqp,
+ u32 which_wq, u32 wait_completion)
+{
+ struct nes_cqp_request *cqp_request;
+ struct nes_hw_cqp_wqe *cqp_wqe;
+ int ret;
+
+ cqp_request = nes_get_cqp_request(nesdev);
+ if (cqp_request == NULL) {
+ nes_debug(NES_DBG_QP, "Failed to get a cqp_request.\n");
+ return;
+ }
+ if (wait_completion) {
+ cqp_request->waiting = 1;
+ atomic_set(&cqp_request->refcount, 2);
+ } else {
+ cqp_request->waiting = 0;
+ }
+ cqp_wqe = &cqp_request->cqp_wqe;
+ nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
+
+ cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] =
+ cpu_to_le32(NES_CQP_FLUSH_WQES | which_wq);
+ cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32(nesqp->hwqp.qp_id);
+
+ nes_post_cqp_request(nesdev, cqp_request);
+
+ if (wait_completion) {
+ /* Wait for CQP */
+ ret = wait_event_timeout(cqp_request->waitq, (cqp_request->request_done != 0),
+ NES_EVENT_TIMEOUT);
+ nes_debug(NES_DBG_QP, "Flush SQ QP WQEs completed, ret=%u,"
+ " CQP Major:Minor codes = 0x%04X:0x%04X\n",
+ ret, cqp_request->major_code, cqp_request->minor_code);
+ nes_put_cqp_request(nesdev, cqp_request);
+ }
+}
diff --git a/drivers/infiniband/hw/nes/nes_hw.h b/drivers/infiniband/hw/nes/nes_hw.h
new file mode 100644
index 0000000..bc0b4de
--- /dev/null
+++ b/drivers/infiniband/hw/nes/nes_hw.h
@@ -0,0 +1,1222 @@
+/*
+* Copyright (c) 2006 - 2008 NetEffect, Inc. All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses. You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenIB.org BSD license below:
+*
+* Redistribution and use in source and binary forms, with or
+* without modification, are permitted provided that the following
+* conditions are met:
+*
+* - Redistributions of source code must retain the above
+* copyright notice, this list of conditions and the following
+* disclaimer.
+*
+* - Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials
+* provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*/
+
+#ifndef __NES_HW_H
+#define __NES_HW_H
+
+#include <linux/inet_lro.h>
+
+#define NES_PHY_TYPE_1G 2
+#define NES_PHY_TYPE_IRIS 3
+#define NES_PHY_TYPE_ARGUS 4
+#define NES_PHY_TYPE_PUMA_1G 5
+#define NES_PHY_TYPE_PUMA_10G 6
+#define NES_PHY_TYPE_GLADIUS 7
+
+#define NES_MULTICAST_PF_MAX 8
+
+enum pci_regs {
+ NES_INT_STAT = 0x0000,
+ NES_INT_MASK = 0x0004,
+ NES_INT_PENDING = 0x0008,
+ NES_INTF_INT_STAT = 0x000C,
+ NES_INTF_INT_MASK = 0x0010,
+ NES_TIMER_STAT = 0x0014,
+ NES_PERIODIC_CONTROL = 0x0018,
+ NES_ONE_SHOT_CONTROL = 0x001C,
+ NES_EEPROM_COMMAND = 0x0020,
+ NES_EEPROM_DATA = 0x0024,
+ NES_FLASH_COMMAND = 0x0028,
+ NES_FLASH_DATA = 0x002C,
+ NES_SOFTWARE_RESET = 0x0030,
+ NES_CQ_ACK = 0x0034,
+ NES_WQE_ALLOC = 0x0040,
+ NES_CQE_ALLOC = 0x0044,
+};
+
+enum indexed_regs {
+ NES_IDX_CREATE_CQP_LOW = 0x0000,
+ NES_IDX_CREATE_CQP_HIGH = 0x0004,
+ NES_IDX_QP_CONTROL = 0x0040,
+ NES_IDX_FLM_CONTROL = 0x0080,
+ NES_IDX_INT_CPU_STATUS = 0x00a0,
+ NES_IDX_GPIO_CONTROL = 0x00f0,
+ NES_IDX_GPIO_DATA = 0x00f4,
+ NES_IDX_TCP_CONFIG0 = 0x01e4,
+ NES_IDX_TCP_TIMER_CONFIG = 0x01ec,
+ NES_IDX_TCP_NOW = 0x01f0,
+ NES_IDX_QP_MAX_CFG_SIZES = 0x0200,
+ NES_IDX_QP_CTX_SIZE = 0x0218,
+ NES_IDX_TCP_TIMER_SIZE0 = 0x0238,
+ NES_IDX_TCP_TIMER_SIZE1 = 0x0240,
+ NES_IDX_ARP_CACHE_SIZE = 0x0258,
+ NES_IDX_CQ_CTX_SIZE = 0x0260,
+ NES_IDX_MRT_SIZE = 0x0278,
+ NES_IDX_PBL_REGION_SIZE = 0x0280,
+ NES_IDX_IRRQ_COUNT = 0x02b0,
+ NES_IDX_RX_WINDOW_BUFFER_PAGE_TABLE_SIZE = 0x02f0,
+ NES_IDX_RX_WINDOW_BUFFER_SIZE = 0x0300,
+ NES_IDX_DST_IP_ADDR = 0x0400,
+ NES_IDX_PCIX_DIAG = 0x08e8,
+ NES_IDX_MPP_DEBUG = 0x0a00,
+ NES_IDX_PORT_RX_DISCARDS = 0x0a30,
+ NES_IDX_PORT_TX_DISCARDS = 0x0a34,
+ NES_IDX_MPP_LB_DEBUG = 0x0b00,
+ NES_IDX_DENALI_CTL_22 = 0x1058,
+ NES_IDX_MAC_TX_CONTROL = 0x2000,
+ NES_IDX_MAC_TX_CONFIG = 0x2004,
+ NES_IDX_MAC_TX_PAUSE_QUANTA = 0x2008,
+ NES_IDX_MAC_RX_CONTROL = 0x200c,
+ NES_IDX_MAC_RX_CONFIG = 0x2010,
+ NES_IDX_MAC_EXACT_MATCH_BOTTOM = 0x201c,
+ NES_IDX_MAC_MDIO_CONTROL = 0x2084,
+ NES_IDX_MAC_TX_OCTETS_LOW = 0x2100,
+ NES_IDX_MAC_TX_OCTETS_HIGH = 0x2104,
+ NES_IDX_MAC_TX_FRAMES_LOW = 0x2108,
+ NES_IDX_MAC_TX_FRAMES_HIGH = 0x210c,
+ NES_IDX_MAC_TX_PAUSE_FRAMES = 0x2118,
+ NES_IDX_MAC_TX_ERRORS = 0x2138,
+ NES_IDX_MAC_RX_OCTETS_LOW = 0x213c,
+ NES_IDX_MAC_RX_OCTETS_HIGH = 0x2140,
+ NES_IDX_MAC_RX_FRAMES_LOW = 0x2144,
+ NES_IDX_MAC_RX_FRAMES_HIGH = 0x2148,
+ NES_IDX_MAC_RX_BC_FRAMES_LOW = 0x214c,
+ NES_IDX_MAC_RX_MC_FRAMES_HIGH = 0x2150,
+ NES_IDX_MAC_RX_PAUSE_FRAMES = 0x2154,
+ NES_IDX_MAC_RX_SHORT_FRAMES = 0x2174,
+ NES_IDX_MAC_RX_OVERSIZED_FRAMES = 0x2178,
+ NES_IDX_MAC_RX_JABBER_FRAMES = 0x217c,
+ NES_IDX_MAC_RX_CRC_ERR_FRAMES = 0x2180,
+ NES_IDX_MAC_RX_LENGTH_ERR_FRAMES = 0x2184,
+ NES_IDX_MAC_RX_SYMBOL_ERR_FRAMES = 0x2188,
+ NES_IDX_MAC_INT_STATUS = 0x21f0,
+ NES_IDX_MAC_INT_MASK = 0x21f4,
+ NES_IDX_PHY_PCS_CONTROL_STATUS0 = 0x2800,
+ NES_IDX_PHY_PCS_CONTROL_STATUS1 = 0x2a00,
+ NES_IDX_ETH_SERDES_COMMON_CONTROL0 = 0x2808,
+ NES_IDX_ETH_SERDES_COMMON_CONTROL1 = 0x2a08,
+ NES_IDX_ETH_SERDES_COMMON_STATUS0 = 0x280c,
+ NES_IDX_ETH_SERDES_COMMON_STATUS1 = 0x2a0c,
+ NES_IDX_ETH_SERDES_TX_EMP0 = 0x2810,
+ NES_IDX_ETH_SERDES_TX_EMP1 = 0x2a10,
+ NES_IDX_ETH_SERDES_TX_DRIVE0 = 0x2814,
+ NES_IDX_ETH_SERDES_TX_DRIVE1 = 0x2a14,
+ NES_IDX_ETH_SERDES_RX_MODE0 = 0x2818,
+ NES_IDX_ETH_SERDES_RX_MODE1 = 0x2a18,
+ NES_IDX_ETH_SERDES_RX_SIGDET0 = 0x281c,
+ NES_IDX_ETH_SERDES_RX_SIGDET1 = 0x2a1c,
+ NES_IDX_ETH_SERDES_BYPASS0 = 0x2820,
+ NES_IDX_ETH_SERDES_BYPASS1 = 0x2a20,
+ NES_IDX_ETH_SERDES_LOOPBACK_CONTROL0 = 0x2824,
+ NES_IDX_ETH_SERDES_LOOPBACK_CONTROL1 = 0x2a24,
+ NES_IDX_ETH_SERDES_RX_EQ_CONTROL0 = 0x2828,
+ NES_IDX_ETH_SERDES_RX_EQ_CONTROL1 = 0x2a28,
+ NES_IDX_ETH_SERDES_RX_EQ_STATUS0 = 0x282c,
+ NES_IDX_ETH_SERDES_RX_EQ_STATUS1 = 0x2a2c,
+ NES_IDX_ETH_SERDES_CDR_RESET0 = 0x2830,
+ NES_IDX_ETH_SERDES_CDR_RESET1 = 0x2a30,
+ NES_IDX_ETH_SERDES_CDR_CONTROL0 = 0x2834,
+ NES_IDX_ETH_SERDES_CDR_CONTROL1 = 0x2a34,
+ NES_IDX_ETH_SERDES_TX_HIGHZ_LANE_MODE0 = 0x2838,
+ NES_IDX_ETH_SERDES_TX_HIGHZ_LANE_MODE1 = 0x2a38,
+ NES_IDX_ENDNODE0_NSTAT_RX_DISCARD = 0x3080,
+ NES_IDX_ENDNODE0_NSTAT_RX_OCTETS_LO = 0x3000,
+ NES_IDX_ENDNODE0_NSTAT_RX_OCTETS_HI = 0x3004,
+ NES_IDX_ENDNODE0_NSTAT_RX_FRAMES_LO = 0x3008,
+ NES_IDX_ENDNODE0_NSTAT_RX_FRAMES_HI = 0x300c,
+ NES_IDX_ENDNODE0_NSTAT_TX_OCTETS_LO = 0x7000,
+ NES_IDX_ENDNODE0_NSTAT_TX_OCTETS_HI = 0x7004,
+ NES_IDX_ENDNODE0_NSTAT_TX_FRAMES_LO = 0x7008,
+ NES_IDX_ENDNODE0_NSTAT_TX_FRAMES_HI = 0x700c,
+ NES_IDX_WQM_CONFIG1 = 0x5004,
+ NES_IDX_CM_CONFIG = 0x5100,
+ NES_IDX_NIC_LOGPORT_TO_PHYPORT = 0x6000,
+ NES_IDX_NIC_PHYPORT_TO_USW = 0x6008,
+ NES_IDX_NIC_ACTIVE = 0x6010,
+ NES_IDX_NIC_UNICAST_ALL = 0x6018,
+ NES_IDX_NIC_MULTICAST_ALL = 0x6020,
+ NES_IDX_NIC_MULTICAST_ENABLE = 0x6028,
+ NES_IDX_NIC_BROADCAST_ON = 0x6030,
+ NES_IDX_USED_CHUNKS_TX = 0x60b0,
+ NES_IDX_TX_POOL_SIZE = 0x60b8,
+ NES_IDX_QUAD_HASH_TABLE_SIZE = 0x6148,
+ NES_IDX_PERFECT_FILTER_LOW = 0x6200,
+ NES_IDX_PERFECT_FILTER_HIGH = 0x6204,
+ NES_IDX_IPV4_TCP_REXMITS = 0x7080,
+ NES_IDX_DEBUG_ERROR_CONTROL_STATUS = 0x913c,
+ NES_IDX_DEBUG_ERROR_MASKS0 = 0x9140,
+ NES_IDX_DEBUG_ERROR_MASKS1 = 0x9144,
+ NES_IDX_DEBUG_ERROR_MASKS2 = 0x9148,
+ NES_IDX_DEBUG_ERROR_MASKS3 = 0x914c,
+ NES_IDX_DEBUG_ERROR_MASKS4 = 0x9150,
+ NES_IDX_DEBUG_ERROR_MASKS5 = 0x9154,
+};
+
+#define NES_IDX_MAC_TX_CONFIG_ENABLE_PAUSE 1
+#define NES_IDX_MPP_DEBUG_PORT_DISABLE_PAUSE (1 << 17)
+
+enum nes_cqp_opcodes {
+ NES_CQP_CREATE_QP = 0x00,
+ NES_CQP_MODIFY_QP = 0x01,
+ NES_CQP_DESTROY_QP = 0x02,
+ NES_CQP_CREATE_CQ = 0x03,
+ NES_CQP_MODIFY_CQ = 0x04,
+ NES_CQP_DESTROY_CQ = 0x05,
+ NES_CQP_ALLOCATE_STAG = 0x09,
+ NES_CQP_REGISTER_STAG = 0x0a,
+ NES_CQP_QUERY_STAG = 0x0b,
+ NES_CQP_REGISTER_SHARED_STAG = 0x0c,
+ NES_CQP_DEALLOCATE_STAG = 0x0d,
+ NES_CQP_MANAGE_ARP_CACHE = 0x0f,
+ NES_CQP_SUSPEND_QPS = 0x11,
+ NES_CQP_UPLOAD_CONTEXT = 0x13,
+ NES_CQP_CREATE_CEQ = 0x16,
+ NES_CQP_DESTROY_CEQ = 0x18,
+ NES_CQP_CREATE_AEQ = 0x19,
+ NES_CQP_DESTROY_AEQ = 0x1b,
+ NES_CQP_LMI_ACCESS = 0x20,
+ NES_CQP_FLUSH_WQES = 0x22,
+ NES_CQP_MANAGE_APBVT = 0x23
+};
+
+enum nes_cqp_wqe_word_idx {
+ NES_CQP_WQE_OPCODE_IDX = 0,
+ NES_CQP_WQE_ID_IDX = 1,
+ NES_CQP_WQE_COMP_CTX_LOW_IDX = 2,
+ NES_CQP_WQE_COMP_CTX_HIGH_IDX = 3,
+ NES_CQP_WQE_COMP_SCRATCH_LOW_IDX = 4,
+ NES_CQP_WQE_COMP_SCRATCH_HIGH_IDX = 5,
+};
+
+enum nes_cqp_cq_wqeword_idx {
+ NES_CQP_CQ_WQE_PBL_LOW_IDX = 6,
+ NES_CQP_CQ_WQE_PBL_HIGH_IDX = 7,
+ NES_CQP_CQ_WQE_CQ_CONTEXT_LOW_IDX = 8,
+ NES_CQP_CQ_WQE_CQ_CONTEXT_HIGH_IDX = 9,
+ NES_CQP_CQ_WQE_DOORBELL_INDEX_HIGH_IDX = 10,
+};
+
+enum nes_cqp_stag_wqeword_idx {
+ NES_CQP_STAG_WQE_PBL_BLK_COUNT_IDX = 1,
+ NES_CQP_STAG_WQE_LEN_HIGH_PD_IDX = 6,
+ NES_CQP_STAG_WQE_LEN_LOW_IDX = 7,
+ NES_CQP_STAG_WQE_STAG_IDX = 8,
+ NES_CQP_STAG_WQE_VA_LOW_IDX = 10,
+ NES_CQP_STAG_WQE_VA_HIGH_IDX = 11,
+ NES_CQP_STAG_WQE_PA_LOW_IDX = 12,
+ NES_CQP_STAG_WQE_PA_HIGH_IDX = 13,
+ NES_CQP_STAG_WQE_PBL_LEN_IDX = 14
+};
+
+#define NES_CQP_OP_IWARP_STATE_SHIFT 28
+
+enum nes_cqp_qp_bits {
+ NES_CQP_QP_ARP_VALID = (1<<8),
+ NES_CQP_QP_WINBUF_VALID = (1<<9),
+ NES_CQP_QP_CONTEXT_VALID = (1<<10),
+ NES_CQP_QP_ORD_VALID = (1<<11),
+ NES_CQP_QP_WINBUF_DATAIND_EN = (1<<12),
+ NES_CQP_QP_VIRT_WQS = (1<<13),
+ NES_CQP_QP_DEL_HTE = (1<<14),
+ NES_CQP_QP_CQS_VALID = (1<<15),
+ NES_CQP_QP_TYPE_TSA = 0,
+ NES_CQP_QP_TYPE_IWARP = (1<<16),
+ NES_CQP_QP_TYPE_CQP = (4<<16),
+ NES_CQP_QP_TYPE_NIC = (5<<16),
+ NES_CQP_QP_MSS_CHG = (1<<20),
+ NES_CQP_QP_STATIC_RESOURCES = (1<<21),
+ NES_CQP_QP_IGNORE_MW_BOUND = (1<<22),
+ NES_CQP_QP_VWQ_USE_LMI = (1<<23),
+ NES_CQP_QP_IWARP_STATE_IDLE = (1<<NES_CQP_OP_IWARP_STATE_SHIFT),
+ NES_CQP_QP_IWARP_STATE_RTS = (2<<NES_CQP_OP_IWARP_STATE_SHIFT),
+ NES_CQP_QP_IWARP_STATE_CLOSING = (3<<NES_CQP_OP_IWARP_STATE_SHIFT),
+ NES_CQP_QP_IWARP_STATE_TERMINATE = (5<<NES_CQP_OP_IWARP_STATE_SHIFT),
+ NES_CQP_QP_IWARP_STATE_ERROR = (6<<NES_CQP_OP_IWARP_STATE_SHIFT),
+ NES_CQP_QP_IWARP_STATE_MASK = (7<<NES_CQP_OP_IWARP_STATE_SHIFT),
+ NES_CQP_QP_RESET = (1<<31),
+};
+
+enum nes_cqp_qp_wqe_word_idx {
+ NES_CQP_QP_WQE_CONTEXT_LOW_IDX = 6,
+ NES_CQP_QP_WQE_CONTEXT_HIGH_IDX = 7,
+ NES_CQP_QP_WQE_NEW_MSS_IDX = 15,
+};
+
+enum nes_nic_ctx_bits {
+ NES_NIC_CTX_RQ_SIZE_32 = (3<<8),
+ NES_NIC_CTX_RQ_SIZE_512 = (3<<8),
+ NES_NIC_CTX_SQ_SIZE_32 = (1<<10),
+ NES_NIC_CTX_SQ_SIZE_512 = (3<<10),
+};
+
+enum nes_nic_qp_ctx_word_idx {
+ NES_NIC_CTX_MISC_IDX = 0,
+ NES_NIC_CTX_SQ_LOW_IDX = 2,
+ NES_NIC_CTX_SQ_HIGH_IDX = 3,
+ NES_NIC_CTX_RQ_LOW_IDX = 4,
+ NES_NIC_CTX_RQ_HIGH_IDX = 5,
+};
+
+enum nes_cqp_cq_bits {
+ NES_CQP_CQ_CEQE_MASK = (1<<9),
+ NES_CQP_CQ_CEQ_VALID = (1<<10),
+ NES_CQP_CQ_RESIZE = (1<<11),
+ NES_CQP_CQ_CHK_OVERFLOW = (1<<12),
+ NES_CQP_CQ_4KB_CHUNK = (1<<14),
+ NES_CQP_CQ_VIRT = (1<<15),
+};
+
+enum nes_cqp_stag_bits {
+ NES_CQP_STAG_VA_TO = (1<<9),
+ NES_CQP_STAG_DEALLOC_PBLS = (1<<10),
+ NES_CQP_STAG_PBL_BLK_SIZE = (1<<11),
+ NES_CQP_STAG_MR = (1<<13),
+ NES_CQP_STAG_RIGHTS_LOCAL_READ = (1<<16),
+ NES_CQP_STAG_RIGHTS_LOCAL_WRITE = (1<<17),
+ NES_CQP_STAG_RIGHTS_REMOTE_READ = (1<<18),
+ NES_CQP_STAG_RIGHTS_REMOTE_WRITE = (1<<19),
+ NES_CQP_STAG_RIGHTS_WINDOW_BIND = (1<<20),
+ NES_CQP_STAG_REM_ACC_EN = (1<<21),
+ NES_CQP_STAG_LEAVE_PENDING = (1<<31),
+};
+
+enum nes_cqp_ceq_wqeword_idx {
+ NES_CQP_CEQ_WQE_ELEMENT_COUNT_IDX = 1,
+ NES_CQP_CEQ_WQE_PBL_LOW_IDX = 6,
+ NES_CQP_CEQ_WQE_PBL_HIGH_IDX = 7,
+};
+
+enum nes_cqp_ceq_bits {
+ NES_CQP_CEQ_4KB_CHUNK = (1<<14),
+ NES_CQP_CEQ_VIRT = (1<<15),
+};
+
+enum nes_cqp_aeq_wqeword_idx {
+ NES_CQP_AEQ_WQE_ELEMENT_COUNT_IDX = 1,
+ NES_CQP_AEQ_WQE_PBL_LOW_IDX = 6,
+ NES_CQP_AEQ_WQE_PBL_HIGH_IDX = 7,
+};
+
+enum nes_cqp_aeq_bits {
+ NES_CQP_AEQ_4KB_CHUNK = (1<<14),
+ NES_CQP_AEQ_VIRT = (1<<15),
+};
+
+enum nes_cqp_lmi_wqeword_idx {
+ NES_CQP_LMI_WQE_LMI_OFFSET_IDX = 1,
+ NES_CQP_LMI_WQE_FRAG_LOW_IDX = 8,
+ NES_CQP_LMI_WQE_FRAG_HIGH_IDX = 9,
+ NES_CQP_LMI_WQE_FRAG_LEN_IDX = 10,
+};
+
+enum nes_cqp_arp_wqeword_idx {
+ NES_CQP_ARP_WQE_MAC_ADDR_LOW_IDX = 6,
+ NES_CQP_ARP_WQE_MAC_HIGH_IDX = 7,
+ NES_CQP_ARP_WQE_REACHABILITY_MAX_IDX = 1,
+};
+
+enum nes_cqp_upload_wqeword_idx {
+ NES_CQP_UPLOAD_WQE_CTXT_LOW_IDX = 6,
+ NES_CQP_UPLOAD_WQE_CTXT_HIGH_IDX = 7,
+ NES_CQP_UPLOAD_WQE_HTE_IDX = 8,
+};
+
+enum nes_cqp_arp_bits {
+ NES_CQP_ARP_VALID = (1<<8),
+ NES_CQP_ARP_PERM = (1<<9),
+};
+
+enum nes_cqp_flush_bits {
+ NES_CQP_FLUSH_SQ = (1<<30),
+ NES_CQP_FLUSH_RQ = (1<<31),
+};
+
+enum nes_cqe_opcode_bits {
+ NES_CQE_STAG_VALID = (1<<6),
+ NES_CQE_ERROR = (1<<7),
+ NES_CQE_SQ = (1<<8),
+ NES_CQE_SE = (1<<9),
+ NES_CQE_PSH = (1<<29),
+ NES_CQE_FIN = (1<<30),
+ NES_CQE_VALID = (1<<31),
+};
+
+
+enum nes_cqe_word_idx {
+ NES_CQE_PAYLOAD_LENGTH_IDX = 0,
+ NES_CQE_COMP_COMP_CTX_LOW_IDX = 2,
+ NES_CQE_COMP_COMP_CTX_HIGH_IDX = 3,
+ NES_CQE_INV_STAG_IDX = 4,
+ NES_CQE_QP_ID_IDX = 5,
+ NES_CQE_ERROR_CODE_IDX = 6,
+ NES_CQE_OPCODE_IDX = 7,
+};
+
+enum nes_ceqe_word_idx {
+ NES_CEQE_CQ_CTX_LOW_IDX = 0,
+ NES_CEQE_CQ_CTX_HIGH_IDX = 1,
+};
+
+enum nes_ceqe_status_bit {
+ NES_CEQE_VALID = (1<<31),
+};
+
+enum nes_int_bits {
+ NES_INT_CEQ0 = (1<<0),
+ NES_INT_CEQ1 = (1<<1),
+ NES_INT_CEQ2 = (1<<2),
+ NES_INT_CEQ3 = (1<<3),
+ NES_INT_CEQ4 = (1<<4),
+ NES_INT_CEQ5 = (1<<5),
+ NES_INT_CEQ6 = (1<<6),
+ NES_INT_CEQ7 = (1<<7),
+ NES_INT_CEQ8 = (1<<8),
+ NES_INT_CEQ9 = (1<<9),
+ NES_INT_CEQ10 = (1<<10),
+ NES_INT_CEQ11 = (1<<11),
+ NES_INT_CEQ12 = (1<<12),
+ NES_INT_CEQ13 = (1<<13),
+ NES_INT_CEQ14 = (1<<14),
+ NES_INT_CEQ15 = (1<<15),
+ NES_INT_AEQ0 = (1<<16),
+ NES_INT_AEQ1 = (1<<17),
+ NES_INT_AEQ2 = (1<<18),
+ NES_INT_AEQ3 = (1<<19),
+ NES_INT_AEQ4 = (1<<20),
+ NES_INT_AEQ5 = (1<<21),
+ NES_INT_AEQ6 = (1<<22),
+ NES_INT_AEQ7 = (1<<23),
+ NES_INT_MAC0 = (1<<24),
+ NES_INT_MAC1 = (1<<25),
+ NES_INT_MAC2 = (1<<26),
+ NES_INT_MAC3 = (1<<27),
+ NES_INT_TSW = (1<<28),
+ NES_INT_TIMER = (1<<29),
+ NES_INT_INTF = (1<<30),
+};
+
+enum nes_intf_int_bits {
+ NES_INTF_INT_PCIERR = (1<<0),
+ NES_INTF_PERIODIC_TIMER = (1<<2),
+ NES_INTF_ONE_SHOT_TIMER = (1<<3),
+ NES_INTF_INT_CRITERR = (1<<14),
+ NES_INTF_INT_AEQ0_OFLOW = (1<<16),
+ NES_INTF_INT_AEQ1_OFLOW = (1<<17),
+ NES_INTF_INT_AEQ2_OFLOW = (1<<18),
+ NES_INTF_INT_AEQ3_OFLOW = (1<<19),
+ NES_INTF_INT_AEQ4_OFLOW = (1<<20),
+ NES_INTF_INT_AEQ5_OFLOW = (1<<21),
+ NES_INTF_INT_AEQ6_OFLOW = (1<<22),
+ NES_INTF_INT_AEQ7_OFLOW = (1<<23),
+ NES_INTF_INT_AEQ_OFLOW = (0xff<<16),
+};
+
+enum nes_mac_int_bits {
+ NES_MAC_INT_LINK_STAT_CHG = (1<<1),
+ NES_MAC_INT_XGMII_EXT = (1<<2),
+ NES_MAC_INT_TX_UNDERFLOW = (1<<6),
+ NES_MAC_INT_TX_ERROR = (1<<7),
+};
+
+enum nes_cqe_allocate_bits {
+ NES_CQE_ALLOC_INC_SELECT = (1<<28),
+ NES_CQE_ALLOC_NOTIFY_NEXT = (1<<29),
+ NES_CQE_ALLOC_NOTIFY_SE = (1<<30),
+ NES_CQE_ALLOC_RESET = (1<<31),
+};
+
+enum nes_nic_rq_wqe_word_idx {
+ NES_NIC_RQ_WQE_LENGTH_1_0_IDX = 0,
+ NES_NIC_RQ_WQE_LENGTH_3_2_IDX = 1,
+ NES_NIC_RQ_WQE_FRAG0_LOW_IDX = 2,
+ NES_NIC_RQ_WQE_FRAG0_HIGH_IDX = 3,
+ NES_NIC_RQ_WQE_FRAG1_LOW_IDX = 4,
+ NES_NIC_RQ_WQE_FRAG1_HIGH_IDX = 5,
+ NES_NIC_RQ_WQE_FRAG2_LOW_IDX = 6,
+ NES_NIC_RQ_WQE_FRAG2_HIGH_IDX = 7,
+ NES_NIC_RQ_WQE_FRAG3_LOW_IDX = 8,
+ NES_NIC_RQ_WQE_FRAG3_HIGH_IDX = 9,
+};
+
+enum nes_nic_sq_wqe_word_idx {
+ NES_NIC_SQ_WQE_MISC_IDX = 0,
+ NES_NIC_SQ_WQE_TOTAL_LENGTH_IDX = 1,
+ NES_NIC_SQ_WQE_LSO_INFO_IDX = 2,
+ NES_NIC_SQ_WQE_LENGTH_0_TAG_IDX = 3,
+ NES_NIC_SQ_WQE_LENGTH_2_1_IDX = 4,
+ NES_NIC_SQ_WQE_LENGTH_4_3_IDX = 5,
+ NES_NIC_SQ_WQE_FRAG0_LOW_IDX = 6,
+ NES_NIC_SQ_WQE_FRAG0_HIGH_IDX = 7,
+ NES_NIC_SQ_WQE_FRAG1_LOW_IDX = 8,
+ NES_NIC_SQ_WQE_FRAG1_HIGH_IDX = 9,
+ NES_NIC_SQ_WQE_FRAG2_LOW_IDX = 10,
+ NES_NIC_SQ_WQE_FRAG2_HIGH_IDX = 11,
+ NES_NIC_SQ_WQE_FRAG3_LOW_IDX = 12,
+ NES_NIC_SQ_WQE_FRAG3_HIGH_IDX = 13,
+ NES_NIC_SQ_WQE_FRAG4_LOW_IDX = 14,
+ NES_NIC_SQ_WQE_FRAG4_HIGH_IDX = 15,
+};
+
+enum nes_iwarp_sq_wqe_word_idx {
+ NES_IWARP_SQ_WQE_MISC_IDX = 0,
+ NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX = 1,
+ NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX = 2,
+ NES_IWARP_SQ_WQE_COMP_CTX_HIGH_IDX = 3,
+ NES_IWARP_SQ_WQE_COMP_SCRATCH_LOW_IDX = 4,
+ NES_IWARP_SQ_WQE_COMP_SCRATCH_HIGH_IDX = 5,
+ NES_IWARP_SQ_WQE_INV_STAG_LOW_IDX = 7,
+ NES_IWARP_SQ_WQE_RDMA_TO_LOW_IDX = 8,
+ NES_IWARP_SQ_WQE_RDMA_TO_HIGH_IDX = 9,
+ NES_IWARP_SQ_WQE_RDMA_LENGTH_IDX = 10,
+ NES_IWARP_SQ_WQE_RDMA_STAG_IDX = 11,
+ NES_IWARP_SQ_WQE_IMM_DATA_START_IDX = 12,
+ NES_IWARP_SQ_WQE_FRAG0_LOW_IDX = 16,
+ NES_IWARP_SQ_WQE_FRAG0_HIGH_IDX = 17,
+ NES_IWARP_SQ_WQE_LENGTH0_IDX = 18,
+ NES_IWARP_SQ_WQE_STAG0_IDX = 19,
+ NES_IWARP_SQ_WQE_FRAG1_LOW_IDX = 20,
+ NES_IWARP_SQ_WQE_FRAG1_HIGH_IDX = 21,
+ NES_IWARP_SQ_WQE_LENGTH1_IDX = 22,
+ NES_IWARP_SQ_WQE_STAG1_IDX = 23,
+ NES_IWARP_SQ_WQE_FRAG2_LOW_IDX = 24,
+ NES_IWARP_SQ_WQE_FRAG2_HIGH_IDX = 25,
+ NES_IWARP_SQ_WQE_LENGTH2_IDX = 26,
+ NES_IWARP_SQ_WQE_STAG2_IDX = 27,
+ NES_IWARP_SQ_WQE_FRAG3_LOW_IDX = 28,
+ NES_IWARP_SQ_WQE_FRAG3_HIGH_IDX = 29,
+ NES_IWARP_SQ_WQE_LENGTH3_IDX = 30,
+ NES_IWARP_SQ_WQE_STAG3_IDX = 31,
+};
+
+enum nes_iwarp_sq_bind_wqe_word_idx {
+ NES_IWARP_SQ_BIND_WQE_MR_IDX = 6,
+ NES_IWARP_SQ_BIND_WQE_MW_IDX = 7,
+ NES_IWARP_SQ_BIND_WQE_LENGTH_LOW_IDX = 8,
+ NES_IWARP_SQ_BIND_WQE_LENGTH_HIGH_IDX = 9,
+ NES_IWARP_SQ_BIND_WQE_VA_FBO_LOW_IDX = 10,
+ NES_IWARP_SQ_BIND_WQE_VA_FBO_HIGH_IDX = 11,
+};
+
+enum nes_iwarp_sq_fmr_wqe_word_idx {
+ NES_IWARP_SQ_FMR_WQE_MR_STAG_IDX = 7,
+ NES_IWARP_SQ_FMR_WQE_LENGTH_LOW_IDX = 8,
+ NES_IWARP_SQ_FMR_WQE_LENGTH_HIGH_IDX = 9,
+ NES_IWARP_SQ_FMR_WQE_VA_FBO_LOW_IDX = 10,
+ NES_IWARP_SQ_FMR_WQE_VA_FBO_HIGH_IDX = 11,
+ NES_IWARP_SQ_FMR_WQE_PBL_ADDR_LOW_IDX = 12,
+ NES_IWARP_SQ_FMR_WQE_PBL_ADDR_HIGH_IDX = 13,
+ NES_IWARP_SQ_FMR_WQE_PBL_LENGTH_IDX = 14,
+};
+
+enum nes_iwarp_sq_locinv_wqe_word_idx {
+ NES_IWARP_SQ_LOCINV_WQE_INV_STAG_IDX = 6,
+};
+
+
+enum nes_iwarp_rq_wqe_word_idx {
+ NES_IWARP_RQ_WQE_TOTAL_PAYLOAD_IDX = 1,
+ NES_IWARP_RQ_WQE_COMP_CTX_LOW_IDX = 2,
+ NES_IWARP_RQ_WQE_COMP_CTX_HIGH_IDX = 3,
+ NES_IWARP_RQ_WQE_COMP_SCRATCH_LOW_IDX = 4,
+ NES_IWARP_RQ_WQE_COMP_SCRATCH_HIGH_IDX = 5,
+ NES_IWARP_RQ_WQE_FRAG0_LOW_IDX = 8,
+ NES_IWARP_RQ_WQE_FRAG0_HIGH_IDX = 9,
+ NES_IWARP_RQ_WQE_LENGTH0_IDX = 10,
+ NES_IWARP_RQ_WQE_STAG0_IDX = 11,
+ NES_IWARP_RQ_WQE_FRAG1_LOW_IDX = 12,
+ NES_IWARP_RQ_WQE_FRAG1_HIGH_IDX = 13,
+ NES_IWARP_RQ_WQE_LENGTH1_IDX = 14,
+ NES_IWARP_RQ_WQE_STAG1_IDX = 15,
+ NES_IWARP_RQ_WQE_FRAG2_LOW_IDX = 16,
+ NES_IWARP_RQ_WQE_FRAG2_HIGH_IDX = 17,
+ NES_IWARP_RQ_WQE_LENGTH2_IDX = 18,
+ NES_IWARP_RQ_WQE_STAG2_IDX = 19,
+ NES_IWARP_RQ_WQE_FRAG3_LOW_IDX = 20,
+ NES_IWARP_RQ_WQE_FRAG3_HIGH_IDX = 21,
+ NES_IWARP_RQ_WQE_LENGTH3_IDX = 22,
+ NES_IWARP_RQ_WQE_STAG3_IDX = 23,
+};
+
+enum nes_nic_sq_wqe_bits {
+ NES_NIC_SQ_WQE_PHDR_CS_READY = (1<<21),
+ NES_NIC_SQ_WQE_LSO_ENABLE = (1<<22),
+ NES_NIC_SQ_WQE_TAGVALUE_ENABLE = (1<<23),
+ NES_NIC_SQ_WQE_DISABLE_CHKSUM = (1<<30),
+ NES_NIC_SQ_WQE_COMPLETION = (1<<31),
+};
+
+enum nes_nic_cqe_word_idx {
+ NES_NIC_CQE_ACCQP_ID_IDX = 0,
+ NES_NIC_CQE_TAG_PKT_TYPE_IDX = 2,
+ NES_NIC_CQE_MISC_IDX = 3,
+};
+
+#define NES_PKT_TYPE_APBVT_BITS 0xC112
+#define NES_PKT_TYPE_APBVT_MASK 0xff3e
+
+#define NES_PKT_TYPE_PVALID_BITS 0x10000000
+#define NES_PKT_TYPE_PVALID_MASK 0x30000000
+
+#define NES_PKT_TYPE_TCPV4_BITS 0x0110
+#define NES_PKT_TYPE_TCPV4_MASK 0x3f30
+
+#define NES_PKT_TYPE_UDPV4_BITS 0x0210
+#define NES_PKT_TYPE_UDPV4_MASK 0x3f30
+
+#define NES_PKT_TYPE_IPV4_BITS 0x0010
+#define NES_PKT_TYPE_IPV4_MASK 0x3f30
+
+#define NES_PKT_TYPE_OTHER_BITS 0x0000
+#define NES_PKT_TYPE_OTHER_MASK 0x0030
+
+#define NES_NIC_CQE_ERRV_SHIFT 16
+enum nes_nic_ev_bits {
+ NES_NIC_ERRV_BITS_MODE = (1<<0),
+ NES_NIC_ERRV_BITS_IPV4_CSUM_ERR = (1<<1),
+ NES_NIC_ERRV_BITS_TCPUDP_CSUM_ERR = (1<<2),
+ NES_NIC_ERRV_BITS_WQE_OVERRUN = (1<<3),
+ NES_NIC_ERRV_BITS_IPH_ERR = (1<<4),
+};
+
+enum nes_nic_cqe_bits {
+ NES_NIC_CQE_ERRV_MASK = (0xff<<NES_NIC_CQE_ERRV_SHIFT),
+ NES_NIC_CQE_SQ = (1<<24),
+ NES_NIC_CQE_ACCQP_PORT = (1<<28),
+ NES_NIC_CQE_ACCQP_VALID = (1<<29),
+ NES_NIC_CQE_TAG_VALID = (1<<30),
+ NES_NIC_CQE_VALID = (1<<31),
+};
+
+enum nes_aeqe_word_idx {
+ NES_AEQE_COMP_CTXT_LOW_IDX = 0,
+ NES_AEQE_COMP_CTXT_HIGH_IDX = 1,
+ NES_AEQE_COMP_QP_CQ_ID_IDX = 2,
+ NES_AEQE_MISC_IDX = 3,
+};
+
+enum nes_aeqe_bits {
+ NES_AEQE_QP = (1<<16),
+ NES_AEQE_CQ = (1<<17),
+ NES_AEQE_SQ = (1<<18),
+ NES_AEQE_INBOUND_RDMA = (1<<19),
+ NES_AEQE_IWARP_STATE_MASK = (7<<20),
+ NES_AEQE_TCP_STATE_MASK = (0xf<<24),
+ NES_AEQE_VALID = (1<<31),
+};
+
+#define NES_AEQE_IWARP_STATE_SHIFT 20
+#define NES_AEQE_TCP_STATE_SHIFT 24
+
+enum nes_aeqe_iwarp_state {
+ NES_AEQE_IWARP_STATE_NON_EXISTANT = 0,
+ NES_AEQE_IWARP_STATE_IDLE = 1,
+ NES_AEQE_IWARP_STATE_RTS = 2,
+ NES_AEQE_IWARP_STATE_CLOSING = 3,
+ NES_AEQE_IWARP_STATE_TERMINATE = 5,
+ NES_AEQE_IWARP_STATE_ERROR = 6
+};
+
+enum nes_aeqe_tcp_state {
+ NES_AEQE_TCP_STATE_NON_EXISTANT = 0,
+ NES_AEQE_TCP_STATE_CLOSED = 1,
+ NES_AEQE_TCP_STATE_LISTEN = 2,
+ NES_AEQE_TCP_STATE_SYN_SENT = 3,
+ NES_AEQE_TCP_STATE_SYN_RCVD = 4,
+ NES_AEQE_TCP_STATE_ESTABLISHED = 5,
+ NES_AEQE_TCP_STATE_CLOSE_WAIT = 6,
+ NES_AEQE_TCP_STATE_FIN_WAIT_1 = 7,
+ NES_AEQE_TCP_STATE_CLOSING = 8,
+ NES_AEQE_TCP_STATE_LAST_ACK = 9,
+ NES_AEQE_TCP_STATE_FIN_WAIT_2 = 10,
+ NES_AEQE_TCP_STATE_TIME_WAIT = 11
+};
+
+enum nes_aeqe_aeid {
+ NES_AEQE_AEID_AMP_UNALLOCATED_STAG = 0x0102,
+ NES_AEQE_AEID_AMP_INVALID_STAG = 0x0103,
+ NES_AEQE_AEID_AMP_BAD_QP = 0x0104,
+ NES_AEQE_AEID_AMP_BAD_PD = 0x0105,
+ NES_AEQE_AEID_AMP_BAD_STAG_KEY = 0x0106,
+ NES_AEQE_AEID_AMP_BAD_STAG_INDEX = 0x0107,
+ NES_AEQE_AEID_AMP_BOUNDS_VIOLATION = 0x0108,
+ NES_AEQE_AEID_AMP_RIGHTS_VIOLATION = 0x0109,
+ NES_AEQE_AEID_AMP_TO_WRAP = 0x010a,
+ NES_AEQE_AEID_AMP_FASTREG_SHARED = 0x010b,
+ NES_AEQE_AEID_AMP_FASTREG_VALID_STAG = 0x010c,
+ NES_AEQE_AEID_AMP_FASTREG_MW_STAG = 0x010d,
+ NES_AEQE_AEID_AMP_FASTREG_INVALID_RIGHTS = 0x010e,
+ NES_AEQE_AEID_AMP_FASTREG_PBL_TABLE_OVERFLOW = 0x010f,
+ NES_AEQE_AEID_AMP_FASTREG_INVALID_LENGTH = 0x0110,
+ NES_AEQE_AEID_AMP_INVALIDATE_SHARED = 0x0111,
+ NES_AEQE_AEID_AMP_INVALIDATE_NO_REMOTE_ACCESS_RIGHTS = 0x0112,
+ NES_AEQE_AEID_AMP_INVALIDATE_MR_WITH_BOUND_WINDOWS = 0x0113,
+ NES_AEQE_AEID_AMP_MWBIND_VALID_STAG = 0x0114,
+ NES_AEQE_AEID_AMP_MWBIND_OF_MR_STAG = 0x0115,
+ NES_AEQE_AEID_AMP_MWBIND_TO_ZERO_BASED_STAG = 0x0116,
+ NES_AEQE_AEID_AMP_MWBIND_TO_MW_STAG = 0x0117,
+ NES_AEQE_AEID_AMP_MWBIND_INVALID_RIGHTS = 0x0118,
+ NES_AEQE_AEID_AMP_MWBIND_INVALID_BOUNDS = 0x0119,
+ NES_AEQE_AEID_AMP_MWBIND_TO_INVALID_PARENT = 0x011a,
+ NES_AEQE_AEID_AMP_MWBIND_BIND_DISABLED = 0x011b,
+ NES_AEQE_AEID_BAD_CLOSE = 0x0201,
+ NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE = 0x0202,
+ NES_AEQE_AEID_CQ_OPERATION_ERROR = 0x0203,
+ NES_AEQE_AEID_PRIV_OPERATION_DENIED = 0x0204,
+ NES_AEQE_AEID_RDMA_READ_WHILE_ORD_ZERO = 0x0205,
+ NES_AEQE_AEID_STAG_ZERO_INVALID = 0x0206,
+ NES_AEQE_AEID_DDP_INVALID_MSN_GAP_IN_MSN = 0x0301,
+ NES_AEQE_AEID_DDP_INVALID_MSN_RANGE_IS_NOT_VALID = 0x0302,
+ NES_AEQE_AEID_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER = 0x0303,
+ NES_AEQE_AEID_DDP_UBE_INVALID_DDP_VERSION = 0x0304,
+ NES_AEQE_AEID_DDP_UBE_INVALID_MO = 0x0305,
+ NES_AEQE_AEID_DDP_UBE_INVALID_MSN_NO_BUFFER_AVAILABLE = 0x0306,
+ NES_AEQE_AEID_DDP_UBE_INVALID_QN = 0x0307,
+ NES_AEQE_AEID_DDP_NO_L_BIT = 0x0308,
+ NES_AEQE_AEID_RDMAP_ROE_INVALID_RDMAP_VERSION = 0x0311,
+ NES_AEQE_AEID_RDMAP_ROE_UNEXPECTED_OPCODE = 0x0312,
+ NES_AEQE_AEID_ROE_INVALID_RDMA_READ_REQUEST = 0x0313,
+ NES_AEQE_AEID_ROE_INVALID_RDMA_WRITE_OR_READ_RESP = 0x0314,
+ NES_AEQE_AEID_INVALID_ARP_ENTRY = 0x0401,
+ NES_AEQE_AEID_INVALID_TCP_OPTION_RCVD = 0x0402,
+ NES_AEQE_AEID_STALE_ARP_ENTRY = 0x0403,
+ NES_AEQE_AEID_LLP_CLOSE_COMPLETE = 0x0501,
+ NES_AEQE_AEID_LLP_CONNECTION_RESET = 0x0502,
+ NES_AEQE_AEID_LLP_FIN_RECEIVED = 0x0503,
+ NES_AEQE_AEID_LLP_RECEIVED_MARKER_AND_LENGTH_FIELDS_DONT_MATCH = 0x0504,
+ NES_AEQE_AEID_LLP_RECEIVED_MPA_CRC_ERROR = 0x0505,
+ NES_AEQE_AEID_LLP_SEGMENT_TOO_LARGE = 0x0506,
+ NES_AEQE_AEID_LLP_SEGMENT_TOO_SMALL = 0x0507,
+ NES_AEQE_AEID_LLP_SYN_RECEIVED = 0x0508,
+ NES_AEQE_AEID_LLP_TERMINATE_RECEIVED = 0x0509,
+ NES_AEQE_AEID_LLP_TOO_MANY_RETRIES = 0x050a,
+ NES_AEQE_AEID_LLP_TOO_MANY_KEEPALIVE_RETRIES = 0x050b,
+ NES_AEQE_AEID_RESET_SENT = 0x0601,
+ NES_AEQE_AEID_TERMINATE_SENT = 0x0602,
+ NES_AEQE_AEID_DDP_LCE_LOCAL_CATASTROPHIC = 0x0700
+};
+
+enum nes_iwarp_sq_opcodes {
+ NES_IWARP_SQ_WQE_WRPDU = (1<<15),
+ NES_IWARP_SQ_WQE_PSH = (1<<21),
+ NES_IWARP_SQ_WQE_STREAMING = (1<<23),
+ NES_IWARP_SQ_WQE_IMM_DATA = (1<<28),
+ NES_IWARP_SQ_WQE_READ_FENCE = (1<<29),
+ NES_IWARP_SQ_WQE_LOCAL_FENCE = (1<<30),
+ NES_IWARP_SQ_WQE_SIGNALED_COMPL = (1<<31),
+};
+
+enum nes_iwarp_sq_wqe_bits {
+ NES_IWARP_SQ_OP_RDMAW = 0,
+ NES_IWARP_SQ_OP_RDMAR = 1,
+ NES_IWARP_SQ_OP_SEND = 3,
+ NES_IWARP_SQ_OP_SENDINV = 4,
+ NES_IWARP_SQ_OP_SENDSE = 5,
+ NES_IWARP_SQ_OP_SENDSEINV = 6,
+ NES_IWARP_SQ_OP_BIND = 8,
+ NES_IWARP_SQ_OP_FAST_REG = 9,
+ NES_IWARP_SQ_OP_LOCINV = 10,
+ NES_IWARP_SQ_OP_RDMAR_LOCINV = 11,
+ NES_IWARP_SQ_OP_NOP = 12,
+};
+
+#define NES_EEPROM_READ_REQUEST (1<<16)
+#define NES_MAC_ADDR_VALID (1<<20)
+
+/*
+ * NES index registers init values.
+ */
+struct nes_init_values {
+ u32 index;
+ u32 data;
+ u8 wrt;
+};
+
+/*
+ * NES registers in BAR0.
+ */
+struct nes_pci_regs {
+ u32 int_status;
+ u32 int_mask;
+ u32 int_pending;
+ u32 intf_int_status;
+ u32 intf_int_mask;
+ u32 other_regs[59]; /* pad out to 256 bytes for now */
+};
+
+#define NES_CQP_SQ_SIZE 128
+#define NES_CCQ_SIZE 128
+#define NES_NIC_WQ_SIZE 512
+#define NES_NIC_CTX_SIZE ((NES_NIC_CTX_RQ_SIZE_512) | (NES_NIC_CTX_SQ_SIZE_512))
+#define NES_NIC_BACK_STORE 0x00038000
+
+struct nes_device;
+
+struct nes_hw_nic_qp_context {
+ __le32 context_words[6];
+};
+
+struct nes_hw_nic_sq_wqe {
+ __le32 wqe_words[16];
+};
+
+struct nes_hw_nic_rq_wqe {
+ __le32 wqe_words[16];
+};
+
+struct nes_hw_nic_cqe {
+ __le32 cqe_words[4];
+};
+
+struct nes_hw_cqp_qp_context {
+ __le32 context_words[4];
+};
+
+struct nes_hw_cqp_wqe {
+ __le32 wqe_words[16];
+};
+
+struct nes_hw_qp_wqe {
+ __le32 wqe_words[32];
+};
+
+struct nes_hw_cqe {
+ __le32 cqe_words[8];
+};
+
+struct nes_hw_ceqe {
+ __le32 ceqe_words[2];
+};
+
+struct nes_hw_aeqe {
+ __le32 aeqe_words[4];
+};
+
+struct nes_cqp_request {
+ union {
+ u64 cqp_callback_context;
+ void *cqp_callback_pointer;
+ };
+ wait_queue_head_t waitq;
+ struct nes_hw_cqp_wqe cqp_wqe;
+ struct list_head list;
+ atomic_t refcount;
+ void (*cqp_callback)(struct nes_device *nesdev, struct nes_cqp_request *cqp_request);
+ u16 major_code;
+ u16 minor_code;
+ u8 waiting;
+ u8 request_done;
+ u8 dynamic;
+ u8 callback;
+};
+
+struct nes_hw_cqp {
+ struct nes_hw_cqp_wqe *sq_vbase;
+ dma_addr_t sq_pbase;
+ spinlock_t lock;
+ wait_queue_head_t waitq;
+ u16 qp_id;
+ u16 sq_head;
+ u16 sq_tail;
+ u16 sq_size;
+};
+
+#define NES_FIRST_FRAG_SIZE 128
+struct nes_first_frag {
+ u8 buffer[NES_FIRST_FRAG_SIZE];
+};
+
+struct nes_hw_nic {
+ struct nes_first_frag *first_frag_vbase; /* virtual address of first frags */
+ struct nes_hw_nic_sq_wqe *sq_vbase; /* virtual address of sq */
+ struct nes_hw_nic_rq_wqe *rq_vbase; /* virtual address of rq */
+ struct sk_buff *tx_skb[NES_NIC_WQ_SIZE];
+ struct sk_buff *rx_skb[NES_NIC_WQ_SIZE];
+ dma_addr_t frag_paddr[NES_NIC_WQ_SIZE];
+ unsigned long first_frag_overflow[BITS_TO_LONGS(NES_NIC_WQ_SIZE)];
+ dma_addr_t sq_pbase; /* PCI memory for host rings */
+ dma_addr_t rq_pbase; /* PCI memory for host rings */
+
+ u16 qp_id;
+ u16 sq_head;
+ u16 sq_tail;
+ u16 sq_size;
+ u16 rq_head;
+ u16 rq_tail;
+ u16 rq_size;
+ u8 replenishing_rq;
+ u8 reserved;
+
+ spinlock_t sq_lock;
+ spinlock_t rq_lock;
+};
+
+struct nes_hw_nic_cq {
+ struct nes_hw_nic_cqe volatile *cq_vbase; /* PCI memory for host rings */
+ void (*ce_handler)(struct nes_device *nesdev, struct nes_hw_nic_cq *cq);
+ dma_addr_t cq_pbase; /* PCI memory for host rings */
+ int rx_cqes_completed;
+ int cqe_allocs_pending;
+ int rx_pkts_indicated;
+ u16 cq_head;
+ u16 cq_size;
+ u16 cq_number;
+ u8 cqes_pending;
+};
+
+struct nes_hw_qp {
+ struct nes_hw_qp_wqe *sq_vbase; /* PCI memory for host rings */
+ struct nes_hw_qp_wqe *rq_vbase; /* PCI memory for host rings */
+ void *q2_vbase; /* PCI memory for host rings */
+ dma_addr_t sq_pbase; /* PCI memory for host rings */
+ dma_addr_t rq_pbase; /* PCI memory for host rings */
+ dma_addr_t q2_pbase; /* PCI memory for host rings */
+ u32 qp_id;
+ u16 sq_head;
+ u16 sq_tail;
+ u16 sq_size;
+ u16 rq_head;
+ u16 rq_tail;
+ u16 rq_size;
+ u8 rq_encoded_size;
+ u8 sq_encoded_size;
+};
+
+struct nes_hw_cq {
+ struct nes_hw_cqe *cq_vbase; /* PCI memory for host rings */
+ void (*ce_handler)(struct nes_device *nesdev, struct nes_hw_cq *cq);
+ dma_addr_t cq_pbase; /* PCI memory for host rings */
+ u16 cq_head;
+ u16 cq_size;
+ u16 cq_number;
+};
+
+struct nes_hw_ceq {
+ struct nes_hw_ceqe volatile *ceq_vbase; /* PCI memory for host rings */
+ dma_addr_t ceq_pbase; /* PCI memory for host rings */
+ u16 ceq_head;
+ u16 ceq_size;
+};
+
+struct nes_hw_aeq {
+ struct nes_hw_aeqe volatile *aeq_vbase; /* PCI memory for host rings */
+ dma_addr_t aeq_pbase; /* PCI memory for host rings */
+ u16 aeq_head;
+ u16 aeq_size;
+};
+
+struct nic_qp_map {
+ u8 qpid;
+ u8 nic_index;
+ u8 logical_port;
+ u8 is_hnic;
+};
+
+#define NES_CQP_ARP_AEQ_INDEX_MASK 0x000f0000
+#define NES_CQP_ARP_AEQ_INDEX_SHIFT 16
+
+#define NES_CQP_APBVT_ADD 0x00008000
+#define NES_CQP_APBVT_NIC_SHIFT 16
+
+#define NES_ARP_ADD 1
+#define NES_ARP_DELETE 2
+#define NES_ARP_RESOLVE 3
+
+#define NES_MAC_SW_IDLE 0
+#define NES_MAC_SW_INTERRUPT 1
+#define NES_MAC_SW_MH 2
+
+struct nes_arp_entry {
+ u32 ip_addr;
+ u8 mac_addr[ETH_ALEN];
+};
+
+#define NES_NIC_FAST_TIMER 96
+#define NES_NIC_FAST_TIMER_LOW 40
+#define NES_NIC_FAST_TIMER_HIGH 1000
+#define DEFAULT_NES_QL_HIGH 256
+#define DEFAULT_NES_QL_LOW 16
+#define DEFAULT_NES_QL_TARGET 64
+#define DEFAULT_JUMBO_NES_QL_LOW 12
+#define DEFAULT_JUMBO_NES_QL_TARGET 40
+#define DEFAULT_JUMBO_NES_QL_HIGH 128
+#define NES_NIC_CQ_DOWNWARD_TREND 16
+#define NES_PFT_SIZE 48
+
+struct nes_hw_tune_timer {
+ /* u16 cq_count; */
+ u16 threshold_low;
+ u16 threshold_target;
+ u16 threshold_high;
+ u16 timer_in_use;
+ u16 timer_in_use_old;
+ u16 timer_in_use_min;
+ u16 timer_in_use_max;
+ u8 timer_direction_upward;
+ u8 timer_direction_downward;
+ u16 cq_count_old;
+ u8 cq_direction_downward;
+};
+
+#define NES_TIMER_INT_LIMIT 2
+#define NES_TIMER_INT_LIMIT_DYNAMIC 10
+#define NES_TIMER_ENABLE_LIMIT 4
+#define NES_MAX_LINK_INTERRUPTS 128
+#define NES_MAX_LINK_CHECK 200
+#define NES_MAX_LRO_DESCRIPTORS 32
+#define NES_LRO_MAX_AGGR 64
+
+struct nes_adapter {
+ u64 fw_ver;
+ unsigned long *allocated_qps;
+ unsigned long *allocated_cqs;
+ unsigned long *allocated_mrs;
+ unsigned long *allocated_pds;
+ unsigned long *allocated_arps;
+ struct nes_qp **qp_table;
+ struct workqueue_struct *work_q;
+
+ struct list_head list;
+ struct list_head active_listeners;
+ /* list of the netdev's associated with each logical port */
+ struct list_head nesvnic_list[4];
+
+ struct timer_list mh_timer;
+ struct timer_list lc_timer;
+ struct work_struct work;
+ spinlock_t resource_lock;
+ spinlock_t phy_lock;
+ spinlock_t pbl_lock;
+ spinlock_t periodic_timer_lock;
+
+ struct nes_arp_entry arp_table[NES_MAX_ARP_TABLE_SIZE];
+
+ /* Adapter CEQ and AEQs */
+ struct nes_hw_ceq ceq[16];
+ struct nes_hw_aeq aeq[8];
+
+ struct nes_hw_tune_timer tune_timer;
+
+ unsigned long doorbell_start;
+
+ u32 hw_rev;
+ u32 vendor_id;
+ u32 vendor_part_id;
+ u32 device_cap_flags;
+ u32 tick_delta;
+ u32 timer_int_req;
+ u32 arp_table_size;
+ u32 next_arp_index;
+
+ u32 max_mr;
+ u32 max_256pbl;
+ u32 max_4kpbl;
+ u32 free_256pbl;
+ u32 free_4kpbl;
+ u32 max_mr_size;
+ u32 max_qp;
+ u32 next_qp;
+ u32 max_irrq;
+ u32 max_qp_wr;
+ u32 max_sge;
+ u32 max_cq;
+ u32 next_cq;
+ u32 max_cqe;
+ u32 max_pd;
+ u32 base_pd;
+ u32 next_pd;
+ u32 hte_index_mask;
+
+ /* EEPROM information */
+ u32 rx_pool_size;
+ u32 tx_pool_size;
+ u32 rx_threshold;
+ u32 tcp_timer_core_clk_divisor;
+ u32 iwarp_config;
+ u32 cm_config;
+ u32 sws_timer_config;
+ u32 tcp_config1;
+ u32 wqm_wat;
+ u32 core_clock;
+ u32 firmware_version;
+
+ u32 nic_rx_eth_route_err;
+
+ u32 et_rx_coalesce_usecs;
+ u32 et_rx_max_coalesced_frames;
+ u32 et_rx_coalesce_usecs_irq;
+ u32 et_rx_max_coalesced_frames_irq;
+ u32 et_pkt_rate_low;
+ u32 et_rx_coalesce_usecs_low;
+ u32 et_rx_max_coalesced_frames_low;
+ u32 et_pkt_rate_high;
+ u32 et_rx_coalesce_usecs_high;
+ u32 et_rx_max_coalesced_frames_high;
+ u32 et_rate_sample_interval;
+ u32 timer_int_limit;
+ u32 wqm_quanta;
+
+ /* Adapter base MAC address */
+ u32 mac_addr_low;
+ u16 mac_addr_high;
+
+ u16 firmware_eeprom_offset;
+ u16 software_eeprom_offset;
+
+ u16 max_irrq_wr;
+
+ /* pd config for each port */
+ u16 pd_config_size[4];
+ u16 pd_config_base[4];
+
+ u16 link_interrupt_count[4];
+ u8 crit_error_count[32];
+
+ /* the phy index for each port */
+ u8 phy_index[4];
+ u8 mac_sw_state[4];
+ u8 mac_link_down[4];
+ u8 phy_type[4];
+ u8 log_port;
+
+ /* PCI information */
+ unsigned int devfn;
+ unsigned char bus_number;
+ unsigned char OneG_Mode;
+
+ unsigned char ref_count;
+ u8 netdev_count;
+ u8 netdev_max; /* from host nic address count in EEPROM */
+ u8 port_count;
+ u8 virtwq;
+ u8 et_use_adaptive_rx_coalesce;
+ u8 adapter_fcn_count;
+ u8 pft_mcast_map[NES_PFT_SIZE];
+};
+
+struct nes_pbl {
+ u64 *pbl_vbase;
+ dma_addr_t pbl_pbase;
+ struct page *page;
+ unsigned long user_base;
+ u32 pbl_size;
+ struct list_head list;
+ /* TODO: need to add list for two level tables */
+};
+
+struct nes_listener {
+ struct work_struct work;
+ struct workqueue_struct *wq;
+ struct nes_vnic *nesvnic;
+ struct iw_cm_id *cm_id;
+ struct list_head list;
+ unsigned long socket;
+ u8 accept_failed;
+};
+
+struct nes_ib_device;
+
+struct nes_vnic {
+ struct nes_ib_device *nesibdev;
+ u64 sq_full;
+ u64 sq_locked;
+ u64 tso_requests;
+ u64 segmented_tso_requests;
+ u64 linearized_skbs;
+ u64 tx_sw_dropped;
+ u64 endnode_nstat_rx_discard;
+ u64 endnode_nstat_rx_octets;
+ u64 endnode_nstat_rx_frames;
+ u64 endnode_nstat_tx_octets;
+ u64 endnode_nstat_tx_frames;
+ u64 endnode_ipv4_tcp_retransmits;
+ /* void *mem; */
+ struct nes_device *nesdev;
+ struct net_device *netdev;
+ struct vlan_group *vlan_grp;
+ atomic_t rx_skbs_needed;
+ atomic_t rx_skb_timer_running;
+ int budget;
+ u32 msg_enable;
+ /* u32 tx_avail; */
+ __be32 local_ipaddr;
+ struct napi_struct napi;
+ spinlock_t tx_lock; /* could use netdev tx lock? */
+ struct timer_list rq_wqes_timer;
+ u32 nic_mem_size;
+ void *nic_vbase;
+ dma_addr_t nic_pbase;
+ struct nes_hw_nic nic;
+ struct nes_hw_nic_cq nic_cq;
+ u32 mcrq_qp_id;
+ struct nes_ucontext *mcrq_ucontext;
+ struct nes_cqp_request* (*get_cqp_request)(struct nes_device *nesdev);
+ void (*post_cqp_request)(struct nes_device*, struct nes_cqp_request *);
+ int (*mcrq_mcast_filter)( struct nes_vnic* nesvnic, __u8* dmi_addr );
+ struct net_device_stats netstats;
+ /* used to put the netdev on the adapters logical port list */
+ struct list_head list;
+ u16 max_frame_size;
+ u8 netdev_open;
+ u8 linkup;
+ u8 logical_port;
+ u8 netdev_index; /* might not be needed, indexes nesdev->netdev */
+ u8 perfect_filter_index;
+ u8 nic_index;
+ u8 qp_nic_index[4];
+ u8 next_qp_nic_index;
+ u8 of_device_registered;
+ u8 rdma_enabled;
+ u8 rx_checksum_disabled;
+ u32 lro_max_aggr;
+ struct net_lro_mgr lro_mgr;
+ struct net_lro_desc lro_desc[NES_MAX_LRO_DESCRIPTORS];
+};
+
+struct nes_ib_device {
+ struct ib_device ibdev;
+ struct nes_vnic *nesvnic;
+
+ /* Virtual RNIC Limits */
+ u32 max_mr;
+ u32 max_qp;
+ u32 max_cq;
+ u32 max_pd;
+ u32 num_mr;
+ u32 num_qp;
+ u32 num_cq;
+ u32 num_pd;
+};
+
+#define nes_vlan_rx vlan_hwaccel_receive_skb
+#define nes_netif_rx netif_receive_skb
+
+#endif /* __NES_HW_H */
diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c
new file mode 100644
index 0000000..7303586
--- /dev/null
+++ b/drivers/infiniband/hw/nes/nes_nic.c
@@ -0,0 +1,1806 @@
+/*
+ * Copyright (c) 2006 - 2008 NetEffect, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/if_arp.h>
+#include <linux/if_vlan.h>
+#include <linux/ethtool.h>
+#include <net/tcp.h>
+
+#include <net/inet_common.h>
+#include <linux/inet.h>
+
+#include "nes.h"
+
+static struct nic_qp_map nic_qp_mapping_0[] = {
+ {16,0,0,1},{24,4,0,0},{28,8,0,0},{32,12,0,0},
+ {20,2,2,1},{26,6,2,0},{30,10,2,0},{34,14,2,0},
+ {18,1,1,1},{25,5,1,0},{29,9,1,0},{33,13,1,0},
+ {22,3,3,1},{27,7,3,0},{31,11,3,0},{35,15,3,0}
+};
+
+static struct nic_qp_map nic_qp_mapping_1[] = {
+ {18,1,1,1},{25,5,1,0},{29,9,1,0},{33,13,1,0},
+ {22,3,3,1},{27,7,3,0},{31,11,3,0},{35,15,3,0}
+};
+
+static struct nic_qp_map nic_qp_mapping_2[] = {
+ {20,2,2,1},{26,6,2,0},{30,10,2,0},{34,14,2,0}
+};
+
+static struct nic_qp_map nic_qp_mapping_3[] = {
+ {22,3,3,1},{27,7,3,0},{31,11,3,0},{35,15,3,0}
+};
+
+static struct nic_qp_map nic_qp_mapping_4[] = {
+ {28,8,0,0},{32,12,0,0}
+};
+
+static struct nic_qp_map nic_qp_mapping_5[] = {
+ {29,9,1,0},{33,13,1,0}
+};
+
+static struct nic_qp_map nic_qp_mapping_6[] = {
+ {30,10,2,0},{34,14,2,0}
+};
+
+static struct nic_qp_map nic_qp_mapping_7[] = {
+ {31,11,3,0},{35,15,3,0}
+};
+
+static struct nic_qp_map *nic_qp_mapping_per_function[] = {
+ nic_qp_mapping_0, nic_qp_mapping_1, nic_qp_mapping_2, nic_qp_mapping_3,
+ nic_qp_mapping_4, nic_qp_mapping_5, nic_qp_mapping_6, nic_qp_mapping_7
+};
+
+static const u32 default_msg = NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK
+ | NETIF_MSG_IFUP | NETIF_MSG_IFDOWN;
+static int debug = -1;
+static int nics_per_function = 1;
+
+/**
+ * nes_netdev_poll
+ */
+static int nes_netdev_poll(struct napi_struct *napi, int budget)
+{
+ struct nes_vnic *nesvnic = container_of(napi, struct nes_vnic, napi);
+ struct net_device *netdev = nesvnic->netdev;
+ struct nes_device *nesdev = nesvnic->nesdev;
+ struct nes_hw_nic_cq *nescq = &nesvnic->nic_cq;
+
+ nesvnic->budget = budget;
+ nescq->cqes_pending = 0;
+ nescq->rx_cqes_completed = 0;
+ nescq->cqe_allocs_pending = 0;
+ nescq->rx_pkts_indicated = 0;
+
+ nes_nic_ce_handler(nesdev, nescq);
+
+ if (nescq->cqes_pending == 0) {
+ netif_rx_complete(netdev, napi);
+ /* clear out completed cqes and arm */
+ nes_write32(nesdev->regs+NES_CQE_ALLOC, NES_CQE_ALLOC_NOTIFY_NEXT |
+ nescq->cq_number | (nescq->cqe_allocs_pending << 16));
+ nes_read32(nesdev->regs+NES_CQE_ALLOC);
+ } else {
+ /* clear out completed cqes but don't arm */
+ nes_write32(nesdev->regs+NES_CQE_ALLOC,
+ nescq->cq_number | (nescq->cqe_allocs_pending << 16));
+ nes_debug(NES_DBG_NETDEV, "%s: exiting with work pending\n",
+ nesvnic->netdev->name);
+ }
+ return nescq->rx_pkts_indicated;
+}
+
+
+/**
+ * nes_netdev_open - Activate the network interface; ifconfig
+ * ethx up.
+ */
+static int nes_netdev_open(struct net_device *netdev)
+{
+ u32 macaddr_low;
+ u16 macaddr_high;
+ struct nes_vnic *nesvnic = netdev_priv(netdev);
+ struct nes_device *nesdev = nesvnic->nesdev;
+ int ret;
+ int i;
+ struct nes_vnic *first_nesvnic = NULL;
+ u32 nic_active_bit;
+ u32 nic_active;
+ struct list_head *list_pos, *list_temp;
+
+ assert(nesdev != NULL);
+
+ if (nesvnic->netdev_open == 1)
+ return 0;
+
+ if (netif_msg_ifup(nesvnic))
+ printk(KERN_INFO PFX "%s: enabling interface\n", netdev->name);
+
+ ret = nes_init_nic_qp(nesdev, netdev);
+ if (ret) {
+ return ret;
+ }
+
+ netif_carrier_off(netdev);
+ netif_stop_queue(netdev);
+
+ if ((!nesvnic->of_device_registered) && (nesvnic->rdma_enabled)) {
+ nesvnic->nesibdev = nes_init_ofa_device(netdev);
+ if (nesvnic->nesibdev == NULL) {
+ printk(KERN_ERR PFX "%s: nesvnic->nesibdev alloc failed", netdev->name);
+ } else {
+ nesvnic->nesibdev->nesvnic = nesvnic;
+ ret = nes_register_ofa_device(nesvnic->nesibdev);
+ if (ret) {
+ printk(KERN_ERR PFX "%s: Unable to register RDMA device, ret = %d\n",
+ netdev->name, ret);
+ }
+ }
+ }
+ /* Set packet filters */
+ nic_active_bit = 1 << nesvnic->nic_index;
+ nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_ACTIVE);
+ nic_active |= nic_active_bit;
+ nes_write_indexed(nesdev, NES_IDX_NIC_ACTIVE, nic_active);
+ nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_MULTICAST_ENABLE);
+ nic_active |= nic_active_bit;
+ nes_write_indexed(nesdev, NES_IDX_NIC_MULTICAST_ENABLE, nic_active);
+ nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_BROADCAST_ON);
+ nic_active |= nic_active_bit;
+ nes_write_indexed(nesdev, NES_IDX_NIC_BROADCAST_ON, nic_active);
+
+ macaddr_high = ((u16)netdev->dev_addr[0]) << 8;
+ macaddr_high += (u16)netdev->dev_addr[1];
+
+ macaddr_low = ((u32)netdev->dev_addr[2]) << 24;
+ macaddr_low += ((u32)netdev->dev_addr[3]) << 16;
+ macaddr_low += ((u32)netdev->dev_addr[4]) << 8;
+ macaddr_low += (u32)netdev->dev_addr[5];
+
+ /* Program the various MAC regs */
+ for (i = 0; i < NES_MAX_PORT_COUNT; i++) {
+ if (nesvnic->qp_nic_index[i] == 0xf) {
+ break;
+ }
+ nes_debug(NES_DBG_NETDEV, "i=%d, perfect filter table index= %d, PERF FILTER LOW"
+ " (Addr:%08X) = %08X, HIGH = %08X.\n",
+ i, nesvnic->qp_nic_index[i],
+ NES_IDX_PERFECT_FILTER_LOW+
+ (nesvnic->qp_nic_index[i] * 8),
+ macaddr_low,
+ (u32)macaddr_high | NES_MAC_ADDR_VALID |
+ ((((u32)nesvnic->nic_index) << 16)));
+ nes_write_indexed(nesdev,
+ NES_IDX_PERFECT_FILTER_LOW + (nesvnic->qp_nic_index[i] * 8),
+ macaddr_low);
+ nes_write_indexed(nesdev,
+ NES_IDX_PERFECT_FILTER_HIGH + (nesvnic->qp_nic_index[i] * 8),
+ (u32)macaddr_high | NES_MAC_ADDR_VALID |
+ ((((u32)nesvnic->nic_index) << 16)));
+ }
+
+
+ nes_write32(nesdev->regs+NES_CQE_ALLOC, NES_CQE_ALLOC_NOTIFY_NEXT |
+ nesvnic->nic_cq.cq_number);
+ nes_read32(nesdev->regs+NES_CQE_ALLOC);
+ list_for_each_safe(list_pos, list_temp, &nesdev->nesadapter->nesvnic_list[nesdev->mac_index]) {
+ first_nesvnic = container_of(list_pos, struct nes_vnic, list);
+ if (first_nesvnic->netdev_open == 1)
+ break;
+ }
+ if (first_nesvnic->netdev_open == 0) {
+ nes_debug(NES_DBG_INIT, "Setting up MAC interrupt mask.\n");
+ nes_write_indexed(nesdev, NES_IDX_MAC_INT_MASK + (0x200 * nesdev->mac_index),
+ ~(NES_MAC_INT_LINK_STAT_CHG | NES_MAC_INT_XGMII_EXT |
+ NES_MAC_INT_TX_UNDERFLOW | NES_MAC_INT_TX_ERROR));
+ first_nesvnic = nesvnic;
+ }
+ if (first_nesvnic->linkup) {
+ /* Enable network packets */
+ nesvnic->linkup = 1;
+ netif_start_queue(netdev);
+ netif_carrier_on(netdev);
+ }
+ napi_enable(&nesvnic->napi);
+ nesvnic->netdev_open = 1;
+
+ return 0;
+}
+
+
+/**
+ * nes_netdev_stop
+ */
+static int nes_netdev_stop(struct net_device *netdev)
+{
+ struct nes_vnic *nesvnic = netdev_priv(netdev);
+ struct nes_device *nesdev = nesvnic->nesdev;
+ u32 nic_active_mask;
+ u32 nic_active;
+ struct nes_vnic *first_nesvnic = NULL;
+ struct list_head *list_pos, *list_temp;
+
+ nes_debug(NES_DBG_SHUTDOWN, "nesvnic=%p, nesdev=%p, netdev=%p %s\n",
+ nesvnic, nesdev, netdev, netdev->name);
+ if (nesvnic->netdev_open == 0)
+ return 0;
+
+ if (netif_msg_ifdown(nesvnic))
+ printk(KERN_INFO PFX "%s: disabling interface\n", netdev->name);
+
+ /* Disable network packets */
+ napi_disable(&nesvnic->napi);
+ netif_stop_queue(netdev);
+ list_for_each_safe(list_pos, list_temp, &nesdev->nesadapter->nesvnic_list[nesdev->mac_index]) {
+ first_nesvnic = container_of(list_pos, struct nes_vnic, list);
+ if ((first_nesvnic->netdev_open == 1) && (first_nesvnic != nesvnic))
+ break;
+ }
+
+ if ((first_nesvnic->netdev_open == 1) && (first_nesvnic != nesvnic) &&
+ (PCI_FUNC(first_nesvnic->nesdev->pcidev->devfn) !=
+ PCI_FUNC(nesvnic->nesdev->pcidev->devfn))) {
+ nes_write_indexed(nesdev, NES_IDX_MAC_INT_MASK+
+ (0x200*nesdev->mac_index), 0xffffffff);
+ nes_write_indexed(first_nesvnic->nesdev,
+ NES_IDX_MAC_INT_MASK+
+ (0x200*first_nesvnic->nesdev->mac_index),
+ ~(NES_MAC_INT_LINK_STAT_CHG | NES_MAC_INT_XGMII_EXT |
+ NES_MAC_INT_TX_UNDERFLOW | NES_MAC_INT_TX_ERROR));
+ } else {
+ nes_write_indexed(nesdev, NES_IDX_MAC_INT_MASK+(0x200*nesdev->mac_index), 0xffffffff);
+ }
+
+ nic_active_mask = ~((u32)(1 << nesvnic->nic_index));
+ nes_write_indexed(nesdev, NES_IDX_PERFECT_FILTER_HIGH+
+ (nesvnic->perfect_filter_index*8), 0);
+ nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_ACTIVE);
+ nic_active &= nic_active_mask;
+ nes_write_indexed(nesdev, NES_IDX_NIC_ACTIVE, nic_active);
+ nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL);
+ nic_active &= nic_active_mask;
+ nes_write_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL, nic_active);
+ nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_MULTICAST_ENABLE);
+ nic_active &= nic_active_mask;
+ nes_write_indexed(nesdev, NES_IDX_NIC_MULTICAST_ENABLE, nic_active);
+ nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL);
+ nic_active &= nic_active_mask;
+ nes_write_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL, nic_active);
+ nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_BROADCAST_ON);
+ nic_active &= nic_active_mask;
+ nes_write_indexed(nesdev, NES_IDX_NIC_BROADCAST_ON, nic_active);
+
+
+ if (nesvnic->of_device_registered) {
+ nes_destroy_ofa_device(nesvnic->nesibdev);
+ nesvnic->nesibdev = NULL;
+ nesvnic->of_device_registered = 0;
+ }
+ nes_destroy_nic_qp(nesvnic);
+
+ nesvnic->netdev_open = 0;
+
+ return 0;
+}
+
+
+/**
+ * nes_nic_send
+ */
+static int nes_nic_send(struct sk_buff *skb, struct net_device *netdev)
+{
+ struct nes_vnic *nesvnic = netdev_priv(netdev);
+ struct nes_device *nesdev = nesvnic->nesdev;
+ struct nes_hw_nic *nesnic = &nesvnic->nic;
+ struct nes_hw_nic_sq_wqe *nic_sqe;
+ struct tcphdr *tcph;
+ __le16 *wqe_fragment_length;
+ u32 wqe_misc;
+ u16 wqe_fragment_index = 1; /* first fragment (0) is used by copy buffer */
+ u16 skb_fragment_index;
+ dma_addr_t bus_address;
+
+ nic_sqe = &nesnic->sq_vbase[nesnic->sq_head];
+ wqe_fragment_length = (__le16 *)&nic_sqe->wqe_words[NES_NIC_SQ_WQE_LENGTH_0_TAG_IDX];
+
+ /* setup the VLAN tag if present */
+ if (vlan_tx_tag_present(skb)) {
+ nes_debug(NES_DBG_NIC_TX, "%s: VLAN packet to send... VLAN = %08X\n",
+ netdev->name, vlan_tx_tag_get(skb));
+ wqe_misc = NES_NIC_SQ_WQE_TAGVALUE_ENABLE;
+ wqe_fragment_length[0] = (__force __le16) vlan_tx_tag_get(skb);
+ } else
+ wqe_misc = 0;
+
+ /* bump past the vlan tag */
+ wqe_fragment_length++;
+ /* wqe_fragment_address = (u64 *)&nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_LOW_IDX]; */
+
+ if (skb->ip_summed == CHECKSUM_PARTIAL) {
+ tcph = tcp_hdr(skb);
+ if (1) {
+ if (skb_is_gso(skb)) {
+ /* nes_debug(NES_DBG_NIC_TX, "%s: TSO request... seg size = %u\n",
+ netdev->name, skb_is_gso(skb)); */
+ wqe_misc |= NES_NIC_SQ_WQE_LSO_ENABLE |
+ NES_NIC_SQ_WQE_COMPLETION | (u16)skb_is_gso(skb);
+ set_wqe_32bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_LSO_INFO_IDX,
+ ((u32)tcph->doff) |
+ (((u32)(((unsigned char *)tcph) - skb->data)) << 4));
+ } else {
+ wqe_misc |= NES_NIC_SQ_WQE_COMPLETION;
+ }
+ }
+ } else { /* CHECKSUM_HW */
+ wqe_misc |= NES_NIC_SQ_WQE_DISABLE_CHKSUM | NES_NIC_SQ_WQE_COMPLETION;
+ }
+
+ set_wqe_32bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_TOTAL_LENGTH_IDX,
+ skb->len);
+ memcpy(&nesnic->first_frag_vbase[nesnic->sq_head].buffer,
+ skb->data, min(((unsigned int)NES_FIRST_FRAG_SIZE), skb_headlen(skb)));
+ wqe_fragment_length[0] = cpu_to_le16(min(((unsigned int)NES_FIRST_FRAG_SIZE),
+ skb_headlen(skb)));
+ wqe_fragment_length[1] = 0;
+ if (skb_headlen(skb) > NES_FIRST_FRAG_SIZE) {
+ if ((skb_shinfo(skb)->nr_frags + 1) > 4) {
+ nes_debug(NES_DBG_NIC_TX, "%s: Packet with %u fragments not sent, skb_headlen=%u\n",
+ netdev->name, skb_shinfo(skb)->nr_frags + 2, skb_headlen(skb));
+ kfree_skb(skb);
+ nesvnic->tx_sw_dropped++;
+ return NETDEV_TX_LOCKED;
+ }
+ set_bit(nesnic->sq_head, nesnic->first_frag_overflow);
+ bus_address = pci_map_single(nesdev->pcidev, skb->data + NES_FIRST_FRAG_SIZE,
+ skb_headlen(skb) - NES_FIRST_FRAG_SIZE, PCI_DMA_TODEVICE);
+ wqe_fragment_length[wqe_fragment_index++] =
+ cpu_to_le16(skb_headlen(skb) - NES_FIRST_FRAG_SIZE);
+ wqe_fragment_length[wqe_fragment_index] = 0;
+ set_wqe_64bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_FRAG1_LOW_IDX,
+ ((u64)(bus_address)));
+ nesnic->tx_skb[nesnic->sq_head] = skb;
+ }
+
+ if (skb_headlen(skb) == skb->len) {
+ if (skb_headlen(skb) <= NES_FIRST_FRAG_SIZE) {
+ nic_sqe->wqe_words[NES_NIC_SQ_WQE_LENGTH_2_1_IDX] = 0;
+ nesnic->tx_skb[nesnic->sq_head] = NULL;
+ dev_kfree_skb(skb);
+ }
+ } else {
+ /* Deal with Fragments */
+ nesnic->tx_skb[nesnic->sq_head] = skb;
+ for (skb_fragment_index = 0; skb_fragment_index < skb_shinfo(skb)->nr_frags;
+ skb_fragment_index++) {
+ bus_address = pci_map_page( nesdev->pcidev,
+ skb_shinfo(skb)->frags[skb_fragment_index].page,
+ skb_shinfo(skb)->frags[skb_fragment_index].page_offset,
+ skb_shinfo(skb)->frags[skb_fragment_index].size,
+ PCI_DMA_TODEVICE);
+ wqe_fragment_length[wqe_fragment_index] =
+ cpu_to_le16(skb_shinfo(skb)->frags[skb_fragment_index].size);
+ set_wqe_64bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_FRAG0_LOW_IDX+(2*wqe_fragment_index),
+ bus_address);
+ wqe_fragment_index++;
+ if (wqe_fragment_index < 5)
+ wqe_fragment_length[wqe_fragment_index] = 0;
+ }
+ }
+
+ set_wqe_32bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_MISC_IDX, wqe_misc);
+ nesnic->sq_head++;
+ nesnic->sq_head &= nesnic->sq_size - 1;
+
+ return NETDEV_TX_OK;
+}
+
+
+/**
+ * nes_netdev_start_xmit
+ */
+static int nes_netdev_start_xmit(struct sk_buff *skb, struct net_device *netdev)
+{
+ struct nes_vnic *nesvnic = netdev_priv(netdev);
+ struct nes_device *nesdev = nesvnic->nesdev;
+ struct nes_hw_nic *nesnic = &nesvnic->nic;
+ struct nes_hw_nic_sq_wqe *nic_sqe;
+ struct tcphdr *tcph;
+ /* struct udphdr *udph; */
+#define NES_MAX_TSO_FRAGS MAX_SKB_FRAGS
+ /* 64K segment plus overflow on each side */
+ dma_addr_t tso_bus_address[NES_MAX_TSO_FRAGS];
+ dma_addr_t bus_address;
+ u32 tso_frag_index;
+ u32 tso_frag_count;
+ u32 tso_wqe_length;
+ u32 curr_tcp_seq;
+ u32 wqe_count=1;
+ u32 send_rc;
+ struct iphdr *iph;
+ unsigned long flags;
+ __le16 *wqe_fragment_length;
+ u32 nr_frags;
+ u32 original_first_length;
+ /* u64 *wqe_fragment_address; */
+ /* first fragment (0) is used by copy buffer */
+ u16 wqe_fragment_index=1;
+ u16 hoffset;
+ u16 nhoffset;
+ u16 wqes_needed;
+ u16 wqes_available;
+ u32 old_head;
+ u32 wqe_misc;
+
+ /*
+ * nes_debug(NES_DBG_NIC_TX, "%s Request to tx NIC packet length %u, headlen %u,"
+ * " (%u frags), tso_size=%u\n",
+ * netdev->name, skb->len, skb_headlen(skb),
+ * skb_shinfo(skb)->nr_frags, skb_is_gso(skb));
+ */
+
+ if (!netif_carrier_ok(netdev))
+ return NETDEV_TX_OK;
+
+ if (netif_queue_stopped(netdev))
+ return NETDEV_TX_BUSY;
+
+ local_irq_save(flags);
+ if (!spin_trylock(&nesnic->sq_lock)) {
+ local_irq_restore(flags);
+ nesvnic->sq_locked++;
+ return NETDEV_TX_LOCKED;
+ }
+
+ /* Check if SQ is full */
+ if ((((nesnic->sq_tail+(nesnic->sq_size*2))-nesnic->sq_head) & (nesnic->sq_size - 1)) == 1) {
+ if (!netif_queue_stopped(netdev)) {
+ netif_stop_queue(netdev);
+ barrier();
+ if ((((((volatile u16)nesnic->sq_tail)+(nesnic->sq_size*2))-nesnic->sq_head) & (nesnic->sq_size - 1)) != 1) {
+ netif_start_queue(netdev);
+ goto sq_no_longer_full;
+ }
+ }
+ nesvnic->sq_full++;
+ spin_unlock_irqrestore(&nesnic->sq_lock, flags);
+ return NETDEV_TX_BUSY;
+ }
+
+sq_no_longer_full:
+ nr_frags = skb_shinfo(skb)->nr_frags;
+ if (skb_headlen(skb) > NES_FIRST_FRAG_SIZE) {
+ nr_frags++;
+ }
+ /* Check if too many fragments */
+ if (unlikely((nr_frags > 4))) {
+ if (skb_is_gso(skb)) {
+ nesvnic->segmented_tso_requests++;
+ nesvnic->tso_requests++;
+ old_head = nesnic->sq_head;
+ /* Basically 4 fragments available per WQE with extended fragments */
+ wqes_needed = nr_frags >> 2;
+ wqes_needed += (nr_frags&3)?1:0;
+ wqes_available = (((nesnic->sq_tail+nesnic->sq_size)-nesnic->sq_head) - 1) &
+ (nesnic->sq_size - 1);
+
+ if (unlikely(wqes_needed > wqes_available)) {
+ if (!netif_queue_stopped(netdev)) {
+ netif_stop_queue(netdev);
+ barrier();
+ wqes_available = (((((volatile u16)nesnic->sq_tail)+nesnic->sq_size)-nesnic->sq_head) - 1) &
+ (nesnic->sq_size - 1);
+ if (wqes_needed <= wqes_available) {
+ netif_start_queue(netdev);
+ goto tso_sq_no_longer_full;
+ }
+ }
+ nesvnic->sq_full++;
+ spin_unlock_irqrestore(&nesnic->sq_lock, flags);
+ nes_debug(NES_DBG_NIC_TX, "%s: HNIC SQ full- TSO request has too many frags!\n",
+ netdev->name);
+ return NETDEV_TX_BUSY;
+ }
+tso_sq_no_longer_full:
+ /* Map all the buffers */
+ for (tso_frag_count=0; tso_frag_count < skb_shinfo(skb)->nr_frags;
+ tso_frag_count++) {
+ tso_bus_address[tso_frag_count] = pci_map_page( nesdev->pcidev,
+ skb_shinfo(skb)->frags[tso_frag_count].page,
+ skb_shinfo(skb)->frags[tso_frag_count].page_offset,
+ skb_shinfo(skb)->frags[tso_frag_count].size,
+ PCI_DMA_TODEVICE);
+ }
+
+ tso_frag_index = 0;
+ curr_tcp_seq = ntohl(tcp_hdr(skb)->seq);
+ hoffset = skb_transport_header(skb) - skb->data;
+ nhoffset = skb_network_header(skb) - skb->data;
+ original_first_length = hoffset + ((((struct tcphdr *)skb_transport_header(skb))->doff)<<2);
+
+ for (wqe_count=0; wqe_count<((u32)wqes_needed); wqe_count++) {
+ tso_wqe_length = 0;
+ nic_sqe = &nesnic->sq_vbase[nesnic->sq_head];
+ wqe_fragment_length =
+ (__le16 *)&nic_sqe->wqe_words[NES_NIC_SQ_WQE_LENGTH_0_TAG_IDX];
+ /* setup the VLAN tag if present */
+ if (vlan_tx_tag_present(skb)) {
+ nes_debug(NES_DBG_NIC_TX, "%s: VLAN packet to send... VLAN = %08X\n",
+ netdev->name, vlan_tx_tag_get(skb) );
+ wqe_misc = NES_NIC_SQ_WQE_TAGVALUE_ENABLE;
+ wqe_fragment_length[0] = (__force __le16) vlan_tx_tag_get(skb);
+ } else
+ wqe_misc = 0;
+
+ /* bump past the vlan tag */
+ wqe_fragment_length++;
+
+ /* Assumes header totally fits in allocated buffer and is in first fragment */
+ if (original_first_length > NES_FIRST_FRAG_SIZE) {
+ nes_debug(NES_DBG_NIC_TX, "ERROR: SKB header too big, headlen=%u, FIRST_FRAG_SIZE=%u\n",
+ original_first_length, NES_FIRST_FRAG_SIZE);
+ nes_debug(NES_DBG_NIC_TX, "%s Request to tx NIC packet length %u, headlen %u,"
+ " (%u frags), tso_size=%u\n",
+ netdev->name,
+ skb->len, skb_headlen(skb),
+ skb_shinfo(skb)->nr_frags, skb_is_gso(skb));
+ }
+ memcpy(&nesnic->first_frag_vbase[nesnic->sq_head].buffer,
+ skb->data, min(((unsigned int)NES_FIRST_FRAG_SIZE),
+ original_first_length));
+ iph = (struct iphdr *)
+ (&nesnic->first_frag_vbase[nesnic->sq_head].buffer[nhoffset]);
+ tcph = (struct tcphdr *)
+ (&nesnic->first_frag_vbase[nesnic->sq_head].buffer[hoffset]);
+ if ((wqe_count+1)!=(u32)wqes_needed) {
+ tcph->fin = 0;
+ tcph->psh = 0;
+ tcph->rst = 0;
+ tcph->urg = 0;
+ }
+ if (wqe_count) {
+ tcph->syn = 0;
+ }
+ tcph->seq = htonl(curr_tcp_seq);
+ wqe_fragment_length[0] = cpu_to_le16(min(((unsigned int)NES_FIRST_FRAG_SIZE),
+ original_first_length));
+
+ wqe_fragment_index = 1;
+ if ((wqe_count==0) && (skb_headlen(skb) > original_first_length)) {
+ set_bit(nesnic->sq_head, nesnic->first_frag_overflow);
+ bus_address = pci_map_single(nesdev->pcidev, skb->data + original_first_length,
+ skb_headlen(skb) - original_first_length, PCI_DMA_TODEVICE);
+ wqe_fragment_length[wqe_fragment_index++] =
+ cpu_to_le16(skb_headlen(skb) - original_first_length);
+ wqe_fragment_length[wqe_fragment_index] = 0;
+ set_wqe_64bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_FRAG1_LOW_IDX,
+ bus_address);
+ tso_wqe_length += skb_headlen(skb) -
+ original_first_length;
+ }
+ while (wqe_fragment_index < 5) {
+ wqe_fragment_length[wqe_fragment_index] =
+ cpu_to_le16(skb_shinfo(skb)->frags[tso_frag_index].size);
+ set_wqe_64bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_FRAG0_LOW_IDX+(2*wqe_fragment_index),
+ (u64)tso_bus_address[tso_frag_index]);
+ wqe_fragment_index++;
+ tso_wqe_length += skb_shinfo(skb)->frags[tso_frag_index++].size;
+ if (wqe_fragment_index < 5)
+ wqe_fragment_length[wqe_fragment_index] = 0;
+ if (tso_frag_index == tso_frag_count)
+ break;
+ }
+ if ((wqe_count+1) == (u32)wqes_needed) {
+ nesnic->tx_skb[nesnic->sq_head] = skb;
+ } else {
+ nesnic->tx_skb[nesnic->sq_head] = NULL;
+ }
+ wqe_misc |= NES_NIC_SQ_WQE_COMPLETION | (u16)skb_is_gso(skb);
+ if ((tso_wqe_length + original_first_length) > skb_is_gso(skb)) {
+ wqe_misc |= NES_NIC_SQ_WQE_LSO_ENABLE;
+ } else {
+ iph->tot_len = htons(tso_wqe_length + original_first_length - nhoffset);
+ }
+
+ set_wqe_32bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_MISC_IDX,
+ wqe_misc);
+ set_wqe_32bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_LSO_INFO_IDX,
+ ((u32)tcph->doff) | (((u32)hoffset) << 4));
+
+ set_wqe_32bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_TOTAL_LENGTH_IDX,
+ tso_wqe_length + original_first_length);
+ curr_tcp_seq += tso_wqe_length;
+ nesnic->sq_head++;
+ nesnic->sq_head &= nesnic->sq_size-1;
+ }
+ } else {
+ nesvnic->linearized_skbs++;
+ hoffset = skb_transport_header(skb) - skb->data;
+ nhoffset = skb_network_header(skb) - skb->data;
+ skb_linearize(skb);
+ skb_set_transport_header(skb, hoffset);
+ skb_set_network_header(skb, nhoffset);
+ send_rc = nes_nic_send(skb, netdev);
+ if (send_rc != NETDEV_TX_OK) {
+ spin_unlock_irqrestore(&nesnic->sq_lock, flags);
+ return NETDEV_TX_OK;
+ }
+ }
+ } else {
+ send_rc = nes_nic_send(skb, netdev);
+ if (send_rc != NETDEV_TX_OK) {
+ spin_unlock_irqrestore(&nesnic->sq_lock, flags);
+ return NETDEV_TX_OK;
+ }
+ }
+
+ barrier();
+
+ if (wqe_count)
+ nes_write32(nesdev->regs+NES_WQE_ALLOC,
+ (wqe_count << 24) | (1 << 23) | nesvnic->nic.qp_id);
+
+ netdev->trans_start = jiffies;
+ spin_unlock_irqrestore(&nesnic->sq_lock, flags);
+
+ return NETDEV_TX_OK;
+}
+
+
+/**
+ * nes_netdev_get_stats
+ */
+static struct net_device_stats *nes_netdev_get_stats(struct net_device *netdev)
+{
+ struct nes_vnic *nesvnic = netdev_priv(netdev);
+ struct nes_device *nesdev = nesvnic->nesdev;
+ u64 u64temp;
+ u32 u32temp;
+
+ u32temp = nes_read_indexed(nesdev,
+ NES_IDX_ENDNODE0_NSTAT_RX_DISCARD + (nesvnic->nic_index*0x200));
+ nesvnic->netstats.rx_dropped += u32temp;
+ nesvnic->endnode_nstat_rx_discard += u32temp;
+
+ u64temp = (u64)nes_read_indexed(nesdev,
+ NES_IDX_ENDNODE0_NSTAT_RX_OCTETS_LO + (nesvnic->nic_index*0x200));
+ u64temp += ((u64)nes_read_indexed(nesdev,
+ NES_IDX_ENDNODE0_NSTAT_RX_OCTETS_HI + (nesvnic->nic_index*0x200))) << 32;
+
+ nesvnic->endnode_nstat_rx_octets += u64temp;
+ nesvnic->netstats.rx_bytes += u64temp;
+
+ u64temp = (u64)nes_read_indexed(nesdev,
+ NES_IDX_ENDNODE0_NSTAT_RX_FRAMES_LO + (nesvnic->nic_index*0x200));
+ u64temp += ((u64)nes_read_indexed(nesdev,
+ NES_IDX_ENDNODE0_NSTAT_RX_FRAMES_HI + (nesvnic->nic_index*0x200))) << 32;
+
+ nesvnic->endnode_nstat_rx_frames += u64temp;
+ nesvnic->netstats.rx_packets += u64temp;
+
+ u64temp = (u64)nes_read_indexed(nesdev,
+ NES_IDX_ENDNODE0_NSTAT_TX_OCTETS_LO + (nesvnic->nic_index*0x200));
+ u64temp += ((u64)nes_read_indexed(nesdev,
+ NES_IDX_ENDNODE0_NSTAT_TX_OCTETS_HI + (nesvnic->nic_index*0x200))) << 32;
+
+ nesvnic->endnode_nstat_tx_octets += u64temp;
+ nesvnic->netstats.tx_bytes += u64temp;
+
+ u64temp = (u64)nes_read_indexed(nesdev,
+ NES_IDX_ENDNODE0_NSTAT_TX_FRAMES_LO + (nesvnic->nic_index*0x200));
+ u64temp += ((u64)nes_read_indexed(nesdev,
+ NES_IDX_ENDNODE0_NSTAT_TX_FRAMES_HI + (nesvnic->nic_index*0x200))) << 32;
+
+ nesvnic->endnode_nstat_tx_frames += u64temp;
+ nesvnic->netstats.tx_packets += u64temp;
+
+ u32temp = nes_read_indexed(nesdev,
+ NES_IDX_MAC_RX_SHORT_FRAMES + (nesvnic->nesdev->mac_index*0x200));
+ nesvnic->netstats.rx_dropped += u32temp;
+ nesvnic->nesdev->mac_rx_errors += u32temp;
+ nesvnic->nesdev->mac_rx_short_frames += u32temp;
+
+ u32temp = nes_read_indexed(nesdev,
+ NES_IDX_MAC_RX_OVERSIZED_FRAMES + (nesvnic->nesdev->mac_index*0x200));
+ nesvnic->netstats.rx_dropped += u32temp;
+ nesvnic->nesdev->mac_rx_errors += u32temp;
+ nesvnic->nesdev->mac_rx_oversized_frames += u32temp;
+
+ u32temp = nes_read_indexed(nesdev,
+ NES_IDX_MAC_RX_JABBER_FRAMES + (nesvnic->nesdev->mac_index*0x200));
+ nesvnic->netstats.rx_dropped += u32temp;
+ nesvnic->nesdev->mac_rx_errors += u32temp;
+ nesvnic->nesdev->mac_rx_jabber_frames += u32temp;
+
+ u32temp = nes_read_indexed(nesdev,
+ NES_IDX_MAC_RX_SYMBOL_ERR_FRAMES + (nesvnic->nesdev->mac_index*0x200));
+ nesvnic->netstats.rx_dropped += u32temp;
+ nesvnic->nesdev->mac_rx_errors += u32temp;
+ nesvnic->nesdev->mac_rx_symbol_err_frames += u32temp;
+
+ u32temp = nes_read_indexed(nesdev,
+ NES_IDX_MAC_RX_LENGTH_ERR_FRAMES + (nesvnic->nesdev->mac_index*0x200));
+ nesvnic->netstats.rx_length_errors += u32temp;
+ nesvnic->nesdev->mac_rx_errors += u32temp;
+
+ u32temp = nes_read_indexed(nesdev,
+ NES_IDX_MAC_RX_CRC_ERR_FRAMES + (nesvnic->nesdev->mac_index*0x200));
+ nesvnic->nesdev->mac_rx_errors += u32temp;
+ nesvnic->nesdev->mac_rx_crc_errors += u32temp;
+ nesvnic->netstats.rx_crc_errors += u32temp;
+
+ u32temp = nes_read_indexed(nesdev,
+ NES_IDX_MAC_TX_ERRORS + (nesvnic->nesdev->mac_index*0x200));
+ nesvnic->nesdev->mac_tx_errors += u32temp;
+ nesvnic->netstats.tx_errors += u32temp;
+
+ return &nesvnic->netstats;
+}
+
+
+/**
+ * nes_netdev_tx_timeout
+ */
+static void nes_netdev_tx_timeout(struct net_device *netdev)
+{
+ struct nes_vnic *nesvnic = netdev_priv(netdev);
+
+ if (netif_msg_timer(nesvnic))
+ nes_debug(NES_DBG_NIC_TX, "%s: tx timeout\n", netdev->name);
+}
+
+
+/**
+ * nes_netdev_set_mac_address
+ */
+static int nes_netdev_set_mac_address(struct net_device *netdev, void *p)
+{
+ struct nes_vnic *nesvnic = netdev_priv(netdev);
+ struct nes_device *nesdev = nesvnic->nesdev;
+ struct sockaddr *mac_addr = p;
+ int i;
+ u32 macaddr_low;
+ u16 macaddr_high;
+ DECLARE_MAC_BUF(mac);
+
+ if (!is_valid_ether_addr(mac_addr->sa_data))
+ return -EADDRNOTAVAIL;
+
+ memcpy(netdev->dev_addr, mac_addr->sa_data, netdev->addr_len);
+ printk(PFX "%s: Address length = %d, Address = %s\n",
+ __func__, netdev->addr_len, print_mac(mac, mac_addr->sa_data));
+ macaddr_high = ((u16)netdev->dev_addr[0]) << 8;
+ macaddr_high += (u16)netdev->dev_addr[1];
+ macaddr_low = ((u32)netdev->dev_addr[2]) << 24;
+ macaddr_low += ((u32)netdev->dev_addr[3]) << 16;
+ macaddr_low += ((u32)netdev->dev_addr[4]) << 8;
+ macaddr_low += (u32)netdev->dev_addr[5];
+
+ for (i = 0; i < NES_MAX_PORT_COUNT; i++) {
+ if (nesvnic->qp_nic_index[i] == 0xf) {
+ break;
+ }
+ nes_write_indexed(nesdev,
+ NES_IDX_PERFECT_FILTER_LOW + (nesvnic->qp_nic_index[i] * 8),
+ macaddr_low);
+ nes_write_indexed(nesdev,
+ NES_IDX_PERFECT_FILTER_HIGH + (nesvnic->qp_nic_index[i] * 8),
+ (u32)macaddr_high | NES_MAC_ADDR_VALID |
+ ((((u32)nesvnic->nic_index) << 16)));
+ }
+ return 0;
+}
+
+
+/**
+ * nes_netdev_set_multicast_list
+ */
+static void nes_netdev_set_multicast_list(struct net_device *netdev)
+{
+ struct nes_vnic *nesvnic = netdev_priv(netdev);
+ struct nes_device *nesdev = nesvnic->nesdev;
+ struct nes_adapter *nesadapter = nesvnic->nesdev->nesadapter;
+ struct dev_mc_list *multicast_addr;
+ u32 nic_active_bit;
+ u32 nic_active;
+ u32 perfect_filter_register_address;
+ u32 macaddr_low;
+ u16 macaddr_high;
+ u8 mc_all_on = 0;
+ u8 mc_index;
+ int mc_nic_index = -1;
+ u8 pft_entries_preallocated = max(nesadapter->adapter_fcn_count *
+ nics_per_function, 4);
+ u8 max_pft_entries_avaiable = NES_PFT_SIZE - pft_entries_preallocated;
+ unsigned long flags;
+
+ spin_lock_irqsave(&nesadapter->resource_lock, flags);
+ nic_active_bit = 1 << nesvnic->nic_index;
+
+ if (netdev->flags & IFF_PROMISC) {
+ nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL);
+ nic_active |= nic_active_bit;
+ nes_write_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL, nic_active);
+ nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL);
+ nic_active |= nic_active_bit;
+ nes_write_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL, nic_active);
+ mc_all_on = 1;
+ } else if ((netdev->flags & IFF_ALLMULTI) ||
+ (nesvnic->nic_index > 3)) {
+ nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL);
+ nic_active |= nic_active_bit;
+ nes_write_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL, nic_active);
+ nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL);
+ nic_active &= ~nic_active_bit;
+ nes_write_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL, nic_active);
+ mc_all_on = 1;
+ } else {
+ nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL);
+ nic_active &= ~nic_active_bit;
+ nes_write_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL, nic_active);
+ nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL);
+ nic_active &= ~nic_active_bit;
+ nes_write_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL, nic_active);
+ }
+
+ nes_debug(NES_DBG_NIC_RX, "Number of MC entries = %d, Promiscous = %d, All Multicast = %d.\n",
+ netdev->mc_count, !!(netdev->flags & IFF_PROMISC),
+ !!(netdev->flags & IFF_ALLMULTI));
+ if (!mc_all_on) {
+ multicast_addr = netdev->mc_list;
+ perfect_filter_register_address = NES_IDX_PERFECT_FILTER_LOW +
+ pft_entries_preallocated * 0x8;
+ for (mc_index = 0; mc_index < max_pft_entries_avaiable;
+ mc_index++) {
+ while (multicast_addr && nesvnic->mcrq_mcast_filter &&
+ ((mc_nic_index = nesvnic->mcrq_mcast_filter(nesvnic,
+ multicast_addr->dmi_addr)) == 0)) {
+ multicast_addr = multicast_addr->next;
+ }
+ if (mc_nic_index < 0)
+ mc_nic_index = nesvnic->nic_index;
+ while (nesadapter->pft_mcast_map[mc_index] < 16 &&
+ nesadapter->pft_mcast_map[mc_index] !=
+ nesvnic->nic_index &&
+ mc_index < max_pft_entries_avaiable) {
+ nes_debug(NES_DBG_NIC_RX,
+ "mc_index=%d skipping nic_index=%d,\
+ used for=%d \n", mc_index,
+ nesvnic->nic_index,
+ nesadapter->pft_mcast_map[mc_index]);
+ mc_index++;
+ }
+ if (mc_index >= max_pft_entries_avaiable)
+ break;
+ if (multicast_addr) {
+ DECLARE_MAC_BUF(mac);
+ nes_debug(NES_DBG_NIC_RX, "Assigning MC Address %s to register 0x%04X nic_idx=%d\n",
+ print_mac(mac, multicast_addr->dmi_addr),
+ perfect_filter_register_address+(mc_index * 8),
+ mc_nic_index);
+ macaddr_high = ((u16)multicast_addr->dmi_addr[0]) << 8;
+ macaddr_high += (u16)multicast_addr->dmi_addr[1];
+ macaddr_low = ((u32)multicast_addr->dmi_addr[2]) << 24;
+ macaddr_low += ((u32)multicast_addr->dmi_addr[3]) << 16;
+ macaddr_low += ((u32)multicast_addr->dmi_addr[4]) << 8;
+ macaddr_low += (u32)multicast_addr->dmi_addr[5];
+ nes_write_indexed(nesdev,
+ perfect_filter_register_address+(mc_index * 8),
+ macaddr_low);
+ nes_write_indexed(nesdev,
+ perfect_filter_register_address+4+(mc_index * 8),
+ (u32)macaddr_high | NES_MAC_ADDR_VALID |
+ ((((u32)(1<<mc_nic_index)) << 16)));
+ multicast_addr = multicast_addr->next;
+ nesadapter->pft_mcast_map[mc_index] =
+ nesvnic->nic_index;
+ } else {
+ nes_debug(NES_DBG_NIC_RX, "Clearing MC Address at register 0x%04X\n",
+ perfect_filter_register_address+(mc_index * 8));
+ nes_write_indexed(nesdev,
+ perfect_filter_register_address+4+(mc_index * 8),
+ 0);
+ nesadapter->pft_mcast_map[mc_index] = 255;
+ }
+ }
+ /* PFT is not large enough */
+ if (multicast_addr && multicast_addr->next) {
+ nic_active = nes_read_indexed(nesdev,
+ NES_IDX_NIC_MULTICAST_ALL);
+ nic_active |= nic_active_bit;
+ nes_write_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL,
+ nic_active);
+ nic_active = nes_read_indexed(nesdev,
+ NES_IDX_NIC_UNICAST_ALL);
+ nic_active &= ~nic_active_bit;
+ nes_write_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL,
+ nic_active);
+ }
+ }
+
+ spin_unlock_irqrestore(&nesadapter->resource_lock, flags);
+}
+
+
+/**
+ * nes_netdev_change_mtu
+ */
+static int nes_netdev_change_mtu(struct net_device *netdev, int new_mtu)
+{
+ struct nes_vnic *nesvnic = netdev_priv(netdev);
+ struct nes_device *nesdev = nesvnic->nesdev;
+ int ret = 0;
+ u8 jumbomode = 0;
+ u32 nic_active;
+ u32 nic_active_bit;
+ u32 uc_all_active;
+ u32 mc_all_active;
+
+ if ((new_mtu < ETH_ZLEN) || (new_mtu > max_mtu))
+ return -EINVAL;
+
+ netdev->mtu = new_mtu;
+ nesvnic->max_frame_size = new_mtu + VLAN_ETH_HLEN;
+
+ if (netdev->mtu > 1500) {
+ jumbomode=1;
+ }
+ nes_nic_init_timer_defaults(nesdev, jumbomode);
+
+ if (netif_running(netdev)) {
+ nic_active_bit = 1 << nesvnic->nic_index;
+ mc_all_active = nes_read_indexed(nesdev,
+ NES_IDX_NIC_MULTICAST_ALL) & nic_active_bit;
+ uc_all_active = nes_read_indexed(nesdev,
+ NES_IDX_NIC_UNICAST_ALL) & nic_active_bit;
+
+ nes_netdev_stop(netdev);
+ nes_netdev_open(netdev);
+
+ nic_active = nes_read_indexed(nesdev,
+ NES_IDX_NIC_MULTICAST_ALL);
+ nic_active |= mc_all_active;
+ nes_write_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL,
+ nic_active);
+
+ nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL);
+ nic_active |= uc_all_active;
+ nes_write_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL, nic_active);
+ }
+
+ return ret;
+}
+
+static const char nes_ethtool_stringset[][ETH_GSTRING_LEN] = {
+ "Link Change Interrupts",
+ "Linearized SKBs",
+ "T/GSO Requests",
+ "Pause Frames Sent",
+ "Pause Frames Received",
+ "Internal Routing Errors",
+ "SQ SW Dropped SKBs",
+ "SQ Locked",
+ "SQ Full",
+ "Segmented TSO Requests",
+ "Rx Symbol Errors",
+ "Rx Jabber Errors",
+ "Rx Oversized Frames",
+ "Rx Short Frames",
+ "Endnode Rx Discards",
+ "Endnode Rx Octets",
+ "Endnode Rx Frames",
+ "Endnode Tx Octets",
+ "Endnode Tx Frames",
+ "mh detected",
+ "mh pauses",
+ "Retransmission Count",
+ "CM Connects",
+ "CM Accepts",
+ "Disconnects",
+ "Connected Events",
+ "Connect Requests",
+ "CM Rejects",
+ "ModifyQP Timeouts",
+ "CreateQPs",
+ "SW DestroyQPs",
+ "DestroyQPs",
+ "CM Closes",
+ "CM Packets Sent",
+ "CM Packets Bounced",
+ "CM Packets Created",
+ "CM Packets Rcvd",
+ "CM Packets Dropped",
+ "CM Packets Retrans",
+ "CM Listens Created",
+ "CM Listens Destroyed",
+ "CM Backlog Drops",
+ "CM Loopbacks",
+ "CM Nodes Created",
+ "CM Nodes Destroyed",
+ "CM Accel Drops",
+ "CM Resets Received",
+ "Timer Inits",
+ "CQ Depth 1",
+ "CQ Depth 4",
+ "CQ Depth 16",
+ "CQ Depth 24",
+ "CQ Depth 32",
+ "CQ Depth 128",
+ "CQ Depth 256",
+ "LRO aggregated",
+ "LRO flushed",
+ "LRO no_desc",
+};
+
+#define NES_ETHTOOL_STAT_COUNT ARRAY_SIZE(nes_ethtool_stringset)
+
+/**
+ * nes_netdev_get_rx_csum
+ */
+static u32 nes_netdev_get_rx_csum (struct net_device *netdev)
+{
+ struct nes_vnic *nesvnic = netdev_priv(netdev);
+
+ if (nesvnic->rx_checksum_disabled)
+ return 0;
+ else
+ return 1;
+}
+
+
+/**
+ * nes_netdev_set_rc_csum
+ */
+static int nes_netdev_set_rx_csum(struct net_device *netdev, u32 enable)
+{
+ struct nes_vnic *nesvnic = netdev_priv(netdev);
+
+ if (enable)
+ nesvnic->rx_checksum_disabled = 0;
+ else
+ nesvnic->rx_checksum_disabled = 1;
+ return 0;
+}
+
+
+/**
+ * nes_netdev_get_stats_count
+ */
+static int nes_netdev_get_stats_count(struct net_device *netdev)
+{
+ return NES_ETHTOOL_STAT_COUNT;
+}
+
+
+/**
+ * nes_netdev_get_strings
+ */
+static void nes_netdev_get_strings(struct net_device *netdev, u32 stringset,
+ u8 *ethtool_strings)
+{
+ if (stringset == ETH_SS_STATS)
+ memcpy(ethtool_strings,
+ &nes_ethtool_stringset,
+ sizeof(nes_ethtool_stringset));
+}
+
+
+/**
+ * nes_netdev_get_ethtool_stats
+ */
+static void nes_netdev_get_ethtool_stats(struct net_device *netdev,
+ struct ethtool_stats *target_ethtool_stats, u64 *target_stat_values)
+{
+ u64 u64temp;
+ struct nes_vnic *nesvnic = netdev_priv(netdev);
+ struct nes_device *nesdev = nesvnic->nesdev;
+ u32 nic_count;
+ u32 u32temp;
+
+ target_ethtool_stats->n_stats = NES_ETHTOOL_STAT_COUNT;
+ target_stat_values[0] = nesvnic->nesdev->link_status_interrupts;
+ target_stat_values[1] = nesvnic->linearized_skbs;
+ target_stat_values[2] = nesvnic->tso_requests;
+
+ u32temp = nes_read_indexed(nesdev,
+ NES_IDX_MAC_TX_PAUSE_FRAMES + (nesvnic->nesdev->mac_index*0x200));
+ nesvnic->nesdev->mac_pause_frames_sent += u32temp;
+ target_stat_values[3] = nesvnic->nesdev->mac_pause_frames_sent;
+
+ u32temp = nes_read_indexed(nesdev,
+ NES_IDX_MAC_RX_PAUSE_FRAMES + (nesvnic->nesdev->mac_index*0x200));
+ nesvnic->nesdev->mac_pause_frames_received += u32temp;
+
+ u32temp = nes_read_indexed(nesdev,
+ NES_IDX_PORT_RX_DISCARDS + (nesvnic->nesdev->mac_index*0x40));
+ nesvnic->nesdev->port_rx_discards += u32temp;
+ nesvnic->netstats.rx_dropped += u32temp;
+
+ u32temp = nes_read_indexed(nesdev,
+ NES_IDX_PORT_TX_DISCARDS + (nesvnic->nesdev->mac_index*0x40));
+ nesvnic->nesdev->port_tx_discards += u32temp;
+ nesvnic->netstats.tx_dropped += u32temp;
+
+ for (nic_count = 0; nic_count < NES_MAX_PORT_COUNT; nic_count++) {
+ if (nesvnic->qp_nic_index[nic_count] == 0xf)
+ break;
+
+ u32temp = nes_read_indexed(nesdev,
+ NES_IDX_ENDNODE0_NSTAT_RX_DISCARD +
+ (nesvnic->qp_nic_index[nic_count]*0x200));
+ nesvnic->netstats.rx_dropped += u32temp;
+ nesvnic->endnode_nstat_rx_discard += u32temp;
+
+ u64temp = (u64)nes_read_indexed(nesdev,
+ NES_IDX_ENDNODE0_NSTAT_RX_OCTETS_LO +
+ (nesvnic->qp_nic_index[nic_count]*0x200));
+ u64temp += ((u64)nes_read_indexed(nesdev,
+ NES_IDX_ENDNODE0_NSTAT_RX_OCTETS_HI +
+ (nesvnic->qp_nic_index[nic_count]*0x200))) << 32;
+
+ nesvnic->endnode_nstat_rx_octets += u64temp;
+ nesvnic->netstats.rx_bytes += u64temp;
+
+ u64temp = (u64)nes_read_indexed(nesdev,
+ NES_IDX_ENDNODE0_NSTAT_RX_FRAMES_LO +
+ (nesvnic->qp_nic_index[nic_count]*0x200));
+ u64temp += ((u64)nes_read_indexed(nesdev,
+ NES_IDX_ENDNODE0_NSTAT_RX_FRAMES_HI +
+ (nesvnic->qp_nic_index[nic_count]*0x200))) << 32;
+
+ nesvnic->endnode_nstat_rx_frames += u64temp;
+ nesvnic->netstats.rx_packets += u64temp;
+
+ u64temp = (u64)nes_read_indexed(nesdev,
+ NES_IDX_ENDNODE0_NSTAT_TX_OCTETS_LO +
+ (nesvnic->qp_nic_index[nic_count]*0x200));
+ u64temp += ((u64)nes_read_indexed(nesdev,
+ NES_IDX_ENDNODE0_NSTAT_TX_OCTETS_HI +
+ (nesvnic->qp_nic_index[nic_count]*0x200))) << 32;
+
+ nesvnic->endnode_nstat_tx_octets += u64temp;
+ nesvnic->netstats.tx_bytes += u64temp;
+
+ u64temp = (u64)nes_read_indexed(nesdev,
+ NES_IDX_ENDNODE0_NSTAT_TX_FRAMES_LO +
+ (nesvnic->qp_nic_index[nic_count]*0x200));
+ u64temp += ((u64)nes_read_indexed(nesdev,
+ NES_IDX_ENDNODE0_NSTAT_TX_FRAMES_HI +
+ (nesvnic->qp_nic_index[nic_count]*0x200))) << 32;
+
+ nesvnic->endnode_nstat_tx_frames += u64temp;
+ nesvnic->netstats.tx_packets += u64temp;
+
+ u32temp = nes_read_indexed(nesdev,
+ NES_IDX_IPV4_TCP_REXMITS + (nesvnic->qp_nic_index[nic_count]*0x200));
+ nesvnic->endnode_ipv4_tcp_retransmits += u32temp;
+ }
+
+ target_stat_values[4] = nesvnic->nesdev->mac_pause_frames_received;
+ target_stat_values[5] = nesdev->nesadapter->nic_rx_eth_route_err;
+ target_stat_values[6] = nesvnic->tx_sw_dropped;
+ target_stat_values[7] = nesvnic->sq_locked;
+ target_stat_values[8] = nesvnic->sq_full;
+ target_stat_values[9] = nesvnic->segmented_tso_requests;
+ target_stat_values[10] = nesvnic->nesdev->mac_rx_symbol_err_frames;
+ target_stat_values[11] = nesvnic->nesdev->mac_rx_jabber_frames;
+ target_stat_values[12] = nesvnic->nesdev->mac_rx_oversized_frames;
+ target_stat_values[13] = nesvnic->nesdev->mac_rx_short_frames;
+ target_stat_values[14] = nesvnic->endnode_nstat_rx_discard;
+ target_stat_values[15] = nesvnic->endnode_nstat_rx_octets;
+ target_stat_values[16] = nesvnic->endnode_nstat_rx_frames;
+ target_stat_values[17] = nesvnic->endnode_nstat_tx_octets;
+ target_stat_values[18] = nesvnic->endnode_nstat_tx_frames;
+ target_stat_values[19] = mh_detected;
+ target_stat_values[20] = mh_pauses_sent;
+ target_stat_values[21] = nesvnic->endnode_ipv4_tcp_retransmits;
+ target_stat_values[22] = atomic_read(&cm_connects);
+ target_stat_values[23] = atomic_read(&cm_accepts);
+ target_stat_values[24] = atomic_read(&cm_disconnects);
+ target_stat_values[25] = atomic_read(&cm_connecteds);
+ target_stat_values[26] = atomic_read(&cm_connect_reqs);
+ target_stat_values[27] = atomic_read(&cm_rejects);
+ target_stat_values[28] = atomic_read(&mod_qp_timouts);
+ target_stat_values[29] = atomic_read(&qps_created);
+ target_stat_values[30] = atomic_read(&sw_qps_destroyed);
+ target_stat_values[31] = atomic_read(&qps_destroyed);
+ target_stat_values[32] = atomic_read(&cm_closes);
+ target_stat_values[33] = cm_packets_sent;
+ target_stat_values[34] = cm_packets_bounced;
+ target_stat_values[35] = cm_packets_created;
+ target_stat_values[36] = cm_packets_received;
+ target_stat_values[37] = cm_packets_dropped;
+ target_stat_values[38] = cm_packets_retrans;
+ target_stat_values[39] = cm_listens_created;
+ target_stat_values[40] = cm_listens_destroyed;
+ target_stat_values[41] = cm_backlog_drops;
+ target_stat_values[42] = atomic_read(&cm_loopbacks);
+ target_stat_values[43] = atomic_read(&cm_nodes_created);
+ target_stat_values[44] = atomic_read(&cm_nodes_destroyed);
+ target_stat_values[45] = atomic_read(&cm_accel_dropped_pkts);
+ target_stat_values[46] = atomic_read(&cm_resets_recvd);
+ target_stat_values[47] = int_mod_timer_init;
+ target_stat_values[48] = int_mod_cq_depth_1;
+ target_stat_values[49] = int_mod_cq_depth_4;
+ target_stat_values[50] = int_mod_cq_depth_16;
+ target_stat_values[51] = int_mod_cq_depth_24;
+ target_stat_values[52] = int_mod_cq_depth_32;
+ target_stat_values[53] = int_mod_cq_depth_128;
+ target_stat_values[54] = int_mod_cq_depth_256;
+ target_stat_values[55] = nesvnic->lro_mgr.stats.aggregated;
+ target_stat_values[56] = nesvnic->lro_mgr.stats.flushed;
+ target_stat_values[57] = nesvnic->lro_mgr.stats.no_desc;
+
+}
+
+
+/**
+ * nes_netdev_get_drvinfo
+ */
+static void nes_netdev_get_drvinfo(struct net_device *netdev,
+ struct ethtool_drvinfo *drvinfo)
+{
+ struct nes_vnic *nesvnic = netdev_priv(netdev);
+ struct nes_adapter *nesadapter = nesvnic->nesdev->nesadapter;
+
+ strcpy(drvinfo->driver, DRV_NAME);
+ strcpy(drvinfo->bus_info, pci_name(nesvnic->nesdev->pcidev));
+ sprintf(drvinfo->fw_version, "%u.%u", nesadapter->firmware_version>>16,
+ nesadapter->firmware_version & 0x000000ff);
+ strcpy(drvinfo->version, DRV_VERSION);
+ drvinfo->n_stats = nes_netdev_get_stats_count(netdev);
+ drvinfo->testinfo_len = 0;
+ drvinfo->eedump_len = 0;
+ drvinfo->regdump_len = 0;
+}
+
+
+/**
+ * nes_netdev_set_coalesce
+ */
+static int nes_netdev_set_coalesce(struct net_device *netdev,
+ struct ethtool_coalesce *et_coalesce)
+{
+ struct nes_vnic *nesvnic = netdev_priv(netdev);
+ struct nes_device *nesdev = nesvnic->nesdev;
+ struct nes_adapter *nesadapter = nesdev->nesadapter;
+ struct nes_hw_tune_timer *shared_timer = &nesadapter->tune_timer;
+ unsigned long flags;
+
+ spin_lock_irqsave(&nesadapter->periodic_timer_lock, flags);
+ if (et_coalesce->rx_max_coalesced_frames_low) {
+ shared_timer->threshold_low = et_coalesce->rx_max_coalesced_frames_low;
+ }
+ if (et_coalesce->rx_max_coalesced_frames_irq) {
+ shared_timer->threshold_target = et_coalesce->rx_max_coalesced_frames_irq;
+ }
+ if (et_coalesce->rx_max_coalesced_frames_high) {
+ shared_timer->threshold_high = et_coalesce->rx_max_coalesced_frames_high;
+ }
+ if (et_coalesce->rx_coalesce_usecs_low) {
+ shared_timer->timer_in_use_min = et_coalesce->rx_coalesce_usecs_low;
+ }
+ if (et_coalesce->rx_coalesce_usecs_high) {
+ shared_timer->timer_in_use_max = et_coalesce->rx_coalesce_usecs_high;
+ }
+ spin_unlock_irqrestore(&nesadapter->periodic_timer_lock, flags);
+
+ /* using this to drive total interrupt moderation */
+ nesadapter->et_rx_coalesce_usecs_irq = et_coalesce->rx_coalesce_usecs_irq;
+ if (et_coalesce->use_adaptive_rx_coalesce) {
+ nesadapter->et_use_adaptive_rx_coalesce = 1;
+ nesadapter->timer_int_limit = NES_TIMER_INT_LIMIT_DYNAMIC;
+ nesadapter->et_rx_coalesce_usecs_irq = 0;
+ if (et_coalesce->pkt_rate_low) {
+ nesadapter->et_pkt_rate_low = et_coalesce->pkt_rate_low;
+ }
+ } else {
+ nesadapter->et_use_adaptive_rx_coalesce = 0;
+ nesadapter->timer_int_limit = NES_TIMER_INT_LIMIT;
+ if (nesadapter->et_rx_coalesce_usecs_irq) {
+ nes_write32(nesdev->regs+NES_PERIODIC_CONTROL,
+ 0x80000000 | ((u32)(nesadapter->et_rx_coalesce_usecs_irq*8)));
+ }
+ }
+ return 0;
+}
+
+
+/**
+ * nes_netdev_get_coalesce
+ */
+static int nes_netdev_get_coalesce(struct net_device *netdev,
+ struct ethtool_coalesce *et_coalesce)
+{
+ struct nes_vnic *nesvnic = netdev_priv(netdev);
+ struct nes_device *nesdev = nesvnic->nesdev;
+ struct nes_adapter *nesadapter = nesdev->nesadapter;
+ struct ethtool_coalesce temp_et_coalesce;
+ struct nes_hw_tune_timer *shared_timer = &nesadapter->tune_timer;
+ unsigned long flags;
+
+ memset(&temp_et_coalesce, 0, sizeof(temp_et_coalesce));
+ temp_et_coalesce.rx_coalesce_usecs_irq = nesadapter->et_rx_coalesce_usecs_irq;
+ temp_et_coalesce.use_adaptive_rx_coalesce = nesadapter->et_use_adaptive_rx_coalesce;
+ temp_et_coalesce.rate_sample_interval = nesadapter->et_rate_sample_interval;
+ temp_et_coalesce.pkt_rate_low = nesadapter->et_pkt_rate_low;
+ spin_lock_irqsave(&nesadapter->periodic_timer_lock, flags);
+ temp_et_coalesce.rx_max_coalesced_frames_low = shared_timer->threshold_low;
+ temp_et_coalesce.rx_max_coalesced_frames_irq = shared_timer->threshold_target;
+ temp_et_coalesce.rx_max_coalesced_frames_high = shared_timer->threshold_high;
+ temp_et_coalesce.rx_coalesce_usecs_low = shared_timer->timer_in_use_min;
+ temp_et_coalesce.rx_coalesce_usecs_high = shared_timer->timer_in_use_max;
+ if (nesadapter->et_use_adaptive_rx_coalesce) {
+ temp_et_coalesce.rx_coalesce_usecs_irq = shared_timer->timer_in_use;
+ }
+ spin_unlock_irqrestore(&nesadapter->periodic_timer_lock, flags);
+ memcpy(et_coalesce, &temp_et_coalesce, sizeof(*et_coalesce));
+ return 0;
+}
+
+
+/**
+ * nes_netdev_get_pauseparam
+ */
+static void nes_netdev_get_pauseparam(struct net_device *netdev,
+ struct ethtool_pauseparam *et_pauseparam)
+{
+ struct nes_vnic *nesvnic = netdev_priv(netdev);
+
+ et_pauseparam->autoneg = 0;
+ et_pauseparam->rx_pause = (nesvnic->nesdev->disable_rx_flow_control == 0) ? 1:0;
+ et_pauseparam->tx_pause = (nesvnic->nesdev->disable_tx_flow_control == 0) ? 1:0;
+}
+
+
+/**
+ * nes_netdev_set_pauseparam
+ */
+static int nes_netdev_set_pauseparam(struct net_device *netdev,
+ struct ethtool_pauseparam *et_pauseparam)
+{
+ struct nes_vnic *nesvnic = netdev_priv(netdev);
+ struct nes_device *nesdev = nesvnic->nesdev;
+ u32 u32temp;
+
+ if (et_pauseparam->autoneg) {
+ /* TODO: should return unsupported */
+ return 0;
+ }
+ if ((et_pauseparam->tx_pause == 1) && (nesdev->disable_tx_flow_control == 1)) {
+ u32temp = nes_read_indexed(nesdev,
+ NES_IDX_MAC_TX_CONFIG + (nesdev->mac_index*0x200));
+ u32temp |= NES_IDX_MAC_TX_CONFIG_ENABLE_PAUSE;
+ nes_write_indexed(nesdev,
+ NES_IDX_MAC_TX_CONFIG_ENABLE_PAUSE + (nesdev->mac_index*0x200), u32temp);
+ nesdev->disable_tx_flow_control = 0;
+ } else if ((et_pauseparam->tx_pause == 0) && (nesdev->disable_tx_flow_control == 0)) {
+ u32temp = nes_read_indexed(nesdev,
+ NES_IDX_MAC_TX_CONFIG + (nesdev->mac_index*0x200));
+ u32temp &= ~NES_IDX_MAC_TX_CONFIG_ENABLE_PAUSE;
+ nes_write_indexed(nesdev,
+ NES_IDX_MAC_TX_CONFIG_ENABLE_PAUSE + (nesdev->mac_index*0x200), u32temp);
+ nesdev->disable_tx_flow_control = 1;
+ }
+ if ((et_pauseparam->rx_pause == 1) && (nesdev->disable_rx_flow_control == 1)) {
+ u32temp = nes_read_indexed(nesdev,
+ NES_IDX_MPP_DEBUG + (nesdev->mac_index*0x40));
+ u32temp &= ~NES_IDX_MPP_DEBUG_PORT_DISABLE_PAUSE;
+ nes_write_indexed(nesdev,
+ NES_IDX_MPP_DEBUG + (nesdev->mac_index*0x40), u32temp);
+ nesdev->disable_rx_flow_control = 0;
+ } else if ((et_pauseparam->rx_pause == 0) && (nesdev->disable_rx_flow_control == 0)) {
+ u32temp = nes_read_indexed(nesdev,
+ NES_IDX_MPP_DEBUG + (nesdev->mac_index*0x40));
+ u32temp |= NES_IDX_MPP_DEBUG_PORT_DISABLE_PAUSE;
+ nes_write_indexed(nesdev,
+ NES_IDX_MPP_DEBUG + (nesdev->mac_index*0x40), u32temp);
+ nesdev->disable_rx_flow_control = 1;
+ }
+
+ return 0;
+}
+
+
+/**
+ * nes_netdev_get_settings
+ */
+static int nes_netdev_get_settings(struct net_device *netdev, struct ethtool_cmd *et_cmd)
+{
+ struct nes_vnic *nesvnic = netdev_priv(netdev);
+ struct nes_device *nesdev = nesvnic->nesdev;
+ struct nes_adapter *nesadapter = nesdev->nesadapter;
+ u16 phy_data;
+
+ et_cmd->duplex = DUPLEX_FULL;
+ et_cmd->port = PORT_MII;
+
+ if (nesadapter->OneG_Mode) {
+ et_cmd->speed = SPEED_1000;
+ if (nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_PUMA_1G) {
+ et_cmd->supported = SUPPORTED_1000baseT_Full;
+ et_cmd->advertising = ADVERTISED_1000baseT_Full;
+ et_cmd->autoneg = AUTONEG_DISABLE;
+ et_cmd->transceiver = XCVR_INTERNAL;
+ et_cmd->phy_address = nesdev->mac_index;
+ } else {
+ et_cmd->supported = SUPPORTED_1000baseT_Full | SUPPORTED_Autoneg;
+ et_cmd->advertising = ADVERTISED_1000baseT_Full | ADVERTISED_Autoneg;
+ nes_read_1G_phy_reg(nesdev, 0, nesadapter->phy_index[nesdev->mac_index], &phy_data);
+ if (phy_data & 0x1000)
+ et_cmd->autoneg = AUTONEG_ENABLE;
+ else
+ et_cmd->autoneg = AUTONEG_DISABLE;
+ et_cmd->transceiver = XCVR_EXTERNAL;
+ et_cmd->phy_address = nesadapter->phy_index[nesdev->mac_index];
+ }
+ } else {
+ if ((nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_IRIS) ||
+ (nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_ARGUS)) {
+ et_cmd->transceiver = XCVR_EXTERNAL;
+ et_cmd->port = PORT_FIBRE;
+ et_cmd->supported = SUPPORTED_FIBRE;
+ et_cmd->advertising = ADVERTISED_FIBRE;
+ et_cmd->phy_address = nesadapter->phy_index[nesdev->mac_index];
+ } else {
+ et_cmd->transceiver = XCVR_INTERNAL;
+ et_cmd->supported = SUPPORTED_10000baseT_Full;
+ et_cmd->advertising = ADVERTISED_10000baseT_Full;
+ et_cmd->phy_address = nesdev->mac_index;
+ }
+ et_cmd->speed = SPEED_10000;
+ et_cmd->autoneg = AUTONEG_DISABLE;
+ }
+ et_cmd->maxtxpkt = 511;
+ et_cmd->maxrxpkt = 511;
+ return 0;
+}
+
+
+/**
+ * nes_netdev_set_settings
+ */
+static int nes_netdev_set_settings(struct net_device *netdev, struct ethtool_cmd *et_cmd)
+{
+ struct nes_vnic *nesvnic = netdev_priv(netdev);
+ struct nes_device *nesdev = nesvnic->nesdev;
+ struct nes_adapter *nesadapter = nesdev->nesadapter;
+ u16 phy_data;
+
+ if ((nesadapter->OneG_Mode) &&
+ (nesadapter->phy_type[nesdev->mac_index] != NES_PHY_TYPE_PUMA_1G)) {
+ nes_read_1G_phy_reg(nesdev, 0, nesadapter->phy_index[nesdev->mac_index],
+ &phy_data);
+ if (et_cmd->autoneg) {
+ /* Turn on Full duplex, Autoneg, and restart autonegotiation */
+ phy_data |= 0x1300;
+ } else {
+ /* Turn off autoneg */
+ phy_data &= ~0x1000;
+ }
+ nes_write_1G_phy_reg(nesdev, 0, nesadapter->phy_index[nesdev->mac_index],
+ phy_data);
+ }
+
+ return 0;
+}
+
+
+static struct ethtool_ops nes_ethtool_ops = {
+ .get_link = ethtool_op_get_link,
+ .get_settings = nes_netdev_get_settings,
+ .set_settings = nes_netdev_set_settings,
+ .get_tx_csum = ethtool_op_get_tx_csum,
+ .get_rx_csum = nes_netdev_get_rx_csum,
+ .get_sg = ethtool_op_get_sg,
+ .get_strings = nes_netdev_get_strings,
+ .get_stats_count = nes_netdev_get_stats_count,
+ .get_ethtool_stats = nes_netdev_get_ethtool_stats,
+ .get_drvinfo = nes_netdev_get_drvinfo,
+ .get_coalesce = nes_netdev_get_coalesce,
+ .set_coalesce = nes_netdev_set_coalesce,
+ .get_pauseparam = nes_netdev_get_pauseparam,
+ .set_pauseparam = nes_netdev_set_pauseparam,
+ .set_tx_csum = ethtool_op_set_tx_csum,
+ .set_rx_csum = nes_netdev_set_rx_csum,
+ .set_sg = ethtool_op_set_sg,
+ .get_tso = ethtool_op_get_tso,
+ .set_tso = ethtool_op_set_tso,
+ .get_flags = ethtool_op_get_flags,
+ .set_flags = ethtool_op_set_flags,
+};
+
+
+static void nes_netdev_vlan_rx_register(struct net_device *netdev, struct vlan_group *grp)
+{
+ struct nes_vnic *nesvnic = netdev_priv(netdev);
+ struct nes_device *nesdev = nesvnic->nesdev;
+ struct nes_adapter *nesadapter = nesdev->nesadapter;
+ u32 u32temp;
+ unsigned long flags;
+
+ spin_lock_irqsave(&nesadapter->phy_lock, flags);
+ nesvnic->vlan_grp = grp;
+
+ nes_debug(NES_DBG_NETDEV, "%s: %s\n", __func__, netdev->name);
+
+ /* Enable/Disable VLAN Stripping */
+ u32temp = nes_read_indexed(nesdev, NES_IDX_PCIX_DIAG);
+ if (grp)
+ u32temp &= 0xfdffffff;
+ else
+ u32temp |= 0x02000000;
+
+ nes_write_indexed(nesdev, NES_IDX_PCIX_DIAG, u32temp);
+ spin_unlock_irqrestore(&nesadapter->phy_lock, flags);
+}
+
+
+/**
+ * nes_netdev_init - initialize network device
+ */
+struct net_device *nes_netdev_init(struct nes_device *nesdev,
+ void __iomem *mmio_addr)
+{
+ u64 u64temp;
+ struct nes_vnic *nesvnic = NULL;
+ struct net_device *netdev;
+ struct nic_qp_map *curr_qp_map;
+ u32 u32temp;
+ u16 phy_data;
+ u16 temp_phy_data;
+
+ netdev = alloc_etherdev(sizeof(struct nes_vnic));
+ if (!netdev) {
+ printk(KERN_ERR PFX "nesvnic etherdev alloc failed");
+ return NULL;
+ }
+
+ nes_debug(NES_DBG_INIT, "netdev = %p, %s\n", netdev, netdev->name);
+
+ SET_NETDEV_DEV(netdev, &nesdev->pcidev->dev);
+
+ nesvnic = netdev_priv(netdev);
+ memset(nesvnic, 0, sizeof(*nesvnic));
+
+ netdev->open = nes_netdev_open;
+ netdev->stop = nes_netdev_stop;
+ netdev->hard_start_xmit = nes_netdev_start_xmit;
+ netdev->get_stats = nes_netdev_get_stats;
+ netdev->tx_timeout = nes_netdev_tx_timeout;
+ netdev->set_mac_address = nes_netdev_set_mac_address;
+ netdev->set_multicast_list = nes_netdev_set_multicast_list;
+ netdev->change_mtu = nes_netdev_change_mtu;
+ netdev->watchdog_timeo = NES_TX_TIMEOUT;
+ netdev->irq = nesdev->pcidev->irq;
+ netdev->mtu = ETH_DATA_LEN;
+ netdev->hard_header_len = ETH_HLEN;
+ netdev->addr_len = ETH_ALEN;
+ netdev->type = ARPHRD_ETHER;
+ netdev->features = NETIF_F_HIGHDMA;
+ netdev->ethtool_ops = &nes_ethtool_ops;
+ netif_napi_add(netdev, &nesvnic->napi, nes_netdev_poll, 128);
+ nes_debug(NES_DBG_INIT, "Enabling VLAN Insert/Delete.\n");
+ netdev->features |= NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX;
+ netdev->vlan_rx_register = nes_netdev_vlan_rx_register;
+ netdev->features |= NETIF_F_LLTX;
+
+ /* Fill in the port structure */
+ nesvnic->netdev = netdev;
+ nesvnic->nesdev = nesdev;
+ nesvnic->msg_enable = netif_msg_init(debug, default_msg);
+ nesvnic->netdev_index = nesdev->netdev_count;
+ nesvnic->perfect_filter_index = nesdev->nesadapter->netdev_count;
+ nesvnic->max_frame_size = netdev->mtu + netdev->hard_header_len + VLAN_HLEN;
+
+ curr_qp_map = nic_qp_mapping_per_function[PCI_FUNC(nesdev->pcidev->devfn)];
+ nesvnic->nic.qp_id = curr_qp_map[nesdev->netdev_count].qpid;
+ nesvnic->nic_index = curr_qp_map[nesdev->netdev_count].nic_index;
+ nesvnic->logical_port = curr_qp_map[nesdev->netdev_count].logical_port;
+
+ /* Setup the burned in MAC address */
+ u64temp = (u64)nesdev->nesadapter->mac_addr_low;
+ u64temp += ((u64)nesdev->nesadapter->mac_addr_high) << 32;
+ u64temp += nesvnic->nic_index;
+ netdev->dev_addr[0] = (u8)(u64temp>>40);
+ netdev->dev_addr[1] = (u8)(u64temp>>32);
+ netdev->dev_addr[2] = (u8)(u64temp>>24);
+ netdev->dev_addr[3] = (u8)(u64temp>>16);
+ netdev->dev_addr[4] = (u8)(u64temp>>8);
+ netdev->dev_addr[5] = (u8)u64temp;
+ memcpy(netdev->perm_addr, netdev->dev_addr, 6);
+
+ if ((nesvnic->logical_port < 2) || (nesdev->nesadapter->hw_rev != NE020_REV)) {
+ netdev->features |= NETIF_F_TSO | NETIF_F_SG | NETIF_F_IP_CSUM;
+ netdev->features |= NETIF_F_GSO | NETIF_F_TSO | NETIF_F_SG | NETIF_F_IP_CSUM;
+ } else {
+ netdev->features |= NETIF_F_SG | NETIF_F_IP_CSUM;
+ }
+
+ nes_debug(NES_DBG_INIT, "nesvnic = %p, reported features = 0x%lX, QPid = %d,"
+ " nic_index = %d, logical_port = %d, mac_index = %d.\n",
+ nesvnic, (unsigned long)netdev->features, nesvnic->nic.qp_id,
+ nesvnic->nic_index, nesvnic->logical_port, nesdev->mac_index);
+
+ if (nesvnic->nesdev->nesadapter->port_count == 1 &&
+ nesvnic->nesdev->nesadapter->adapter_fcn_count == 1) {
+
+ nesvnic->qp_nic_index[0] = nesvnic->nic_index;
+ nesvnic->qp_nic_index[1] = nesvnic->nic_index + 1;
+ if (nes_drv_opt & NES_DRV_OPT_DUAL_LOGICAL_PORT) {
+ nesvnic->qp_nic_index[2] = 0xf;
+ nesvnic->qp_nic_index[3] = 0xf;
+ } else {
+ nesvnic->qp_nic_index[2] = nesvnic->nic_index + 2;
+ nesvnic->qp_nic_index[3] = nesvnic->nic_index + 3;
+ }
+ } else {
+ if (nesvnic->nesdev->nesadapter->port_count == 2 ||
+ (nesvnic->nesdev->nesadapter->port_count == 1 &&
+ nesvnic->nesdev->nesadapter->adapter_fcn_count == 2)) {
+ nesvnic->qp_nic_index[0] = nesvnic->nic_index;
+ nesvnic->qp_nic_index[1] = nesvnic->nic_index
+ + 2;
+ nesvnic->qp_nic_index[2] = 0xf;
+ nesvnic->qp_nic_index[3] = 0xf;
+ } else {
+ nesvnic->qp_nic_index[0] = nesvnic->nic_index;
+ nesvnic->qp_nic_index[1] = 0xf;
+ nesvnic->qp_nic_index[2] = 0xf;
+ nesvnic->qp_nic_index[3] = 0xf;
+ }
+ }
+ nesvnic->next_qp_nic_index = 0;
+
+ if (nesdev->netdev_count == 0) {
+ nesvnic->rdma_enabled = 1;
+ } else {
+ nesvnic->rdma_enabled = 0;
+ }
+ nesvnic->nic_cq.cq_number = nesvnic->nic.qp_id;
+ spin_lock_init(&nesvnic->tx_lock);
+ nesdev->netdev[nesdev->netdev_count] = netdev;
+
+ nes_debug(NES_DBG_INIT, "Adding nesvnic (%p) to the adapters nesvnic_list for MAC%d.\n",
+ nesvnic, nesdev->mac_index);
+ list_add_tail(&nesvnic->list, &nesdev->nesadapter->nesvnic_list[nesdev->mac_index]);
+
+ if ((nesdev->netdev_count == 0) &&
+ ((PCI_FUNC(nesdev->pcidev->devfn) == nesdev->mac_index) ||
+ ((nesdev->nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_PUMA_1G) &&
+ (((PCI_FUNC(nesdev->pcidev->devfn) == 1) && (nesdev->mac_index == 2)) ||
+ ((PCI_FUNC(nesdev->pcidev->devfn) == 2) && (nesdev->mac_index == 1)))))) {
+ /*
+ * nes_debug(NES_DBG_INIT, "Setting up PHY interrupt mask. Using register index 0x%04X\n",
+ * NES_IDX_PHY_PCS_CONTROL_STATUS0 + (0x200 * (nesvnic->logical_port & 1)));
+ */
+ u32temp = nes_read_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 +
+ (0x200 * (nesdev->mac_index & 1)));
+ if (nesdev->nesadapter->phy_type[nesdev->mac_index] != NES_PHY_TYPE_PUMA_1G) {
+ u32temp |= 0x00200000;
+ nes_write_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 +
+ (0x200 * (nesdev->mac_index & 1)), u32temp);
+ }
+
+ u32temp = nes_read_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 +
+ (0x200 * (nesdev->mac_index & 1)));
+
+ if ((u32temp&0x0f1f0000) == 0x0f0f0000) {
+ if (nesdev->nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_IRIS) {
+ nes_init_phy(nesdev);
+ nes_read_10G_phy_reg(nesdev, nesdev->nesadapter->phy_index[nesdev->mac_index], 1, 1);
+ temp_phy_data = (u16)nes_read_indexed(nesdev,
+ NES_IDX_MAC_MDIO_CONTROL);
+ u32temp = 20;
+ do {
+ nes_read_10G_phy_reg(nesdev, nesdev->nesadapter->phy_index[nesdev->mac_index], 1, 1);
+ phy_data = (u16)nes_read_indexed(nesdev,
+ NES_IDX_MAC_MDIO_CONTROL);
+ if ((phy_data == temp_phy_data) || (!(--u32temp)))
+ break;
+ temp_phy_data = phy_data;
+ } while (1);
+ if (phy_data & 4) {
+ nes_debug(NES_DBG_INIT, "The Link is UP!!.\n");
+ nesvnic->linkup = 1;
+ } else {
+ nes_debug(NES_DBG_INIT, "The Link is DOWN!!.\n");
+ }
+ } else {
+ nes_debug(NES_DBG_INIT, "The Link is UP!!.\n");
+ nesvnic->linkup = 1;
+ }
+ } else if (nesdev->nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_PUMA_1G) {
+ nes_debug(NES_DBG_INIT, "mac_index=%d, logical_port=%d, u32temp=0x%04X, PCI_FUNC=%d\n",
+ nesdev->mac_index, nesvnic->logical_port, u32temp, PCI_FUNC(nesdev->pcidev->devfn));
+ if (((nesdev->mac_index < 2) && ((u32temp&0x01010000) == 0x01010000)) ||
+ ((nesdev->mac_index > 1) && ((u32temp&0x02020000) == 0x02020000))) {
+ nes_debug(NES_DBG_INIT, "The Link is UP!!.\n");
+ nesvnic->linkup = 1;
+ }
+ }
+ /* clear the MAC interrupt status, assumes direct logical to physical mapping */
+ u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS + (0x200 * nesdev->mac_index));
+ nes_debug(NES_DBG_INIT, "Phy interrupt status = 0x%X.\n", u32temp);
+ nes_write_indexed(nesdev, NES_IDX_MAC_INT_STATUS + (0x200 * nesdev->mac_index), u32temp);
+
+ if (nesdev->nesadapter->phy_type[nesdev->mac_index] != NES_PHY_TYPE_IRIS)
+ nes_init_phy(nesdev);
+
+ }
+
+ return netdev;
+}
+
+
+/**
+ * nes_netdev_destroy - destroy network device structure
+ */
+void nes_netdev_destroy(struct net_device *netdev)
+{
+ struct nes_vnic *nesvnic = netdev_priv(netdev);
+
+ /* make sure 'stop' method is called by Linux stack */
+ /* nes_netdev_stop(netdev); */
+
+ list_del(&nesvnic->list);
+
+ if (nesvnic->of_device_registered) {
+ nes_destroy_ofa_device(nesvnic->nesibdev);
+ }
+
+ free_netdev(netdev);
+}
+
+
+/**
+ * nes_nic_cm_xmit -- CM calls this to send out pkts
+ */
+int nes_nic_cm_xmit(struct sk_buff *skb, struct net_device *netdev)
+{
+ int ret;
+
+ skb->dev = netdev;
+ ret = dev_queue_xmit(skb);
+ if (ret) {
+ nes_debug(NES_DBG_CM, "Bad return code from dev_queue_xmit %d\n", ret);
+ }
+
+ return ret;
+}
diff --git a/drivers/infiniband/hw/nes/nes_user.h b/drivers/infiniband/hw/nes/nes_user.h
new file mode 100644
index 0000000..e64306b
--- /dev/null
+++ b/drivers/infiniband/hw/nes/nes_user.h
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2006 - 2008 NetEffect. All rights reserved.
+ * Copyright (c) 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Cisco Systems. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef NES_USER_H
+#define NES_USER_H
+
+#include <linux/types.h>
+
+#define NES_ABI_USERSPACE_VER 1
+#define NES_ABI_KERNEL_VER 1
+
+/*
+ * Make sure that all structs defined in this file remain laid out so
+ * that they pack the same way on 32-bit and 64-bit architectures (to
+ * avoid incompatibility between 32-bit userspace and 64-bit kernels).
+ * In particular do not use pointer types -- pass pointers in __u64
+ * instead.
+ */
+
+struct nes_alloc_ucontext_req {
+ __u32 reserved32;
+ __u8 userspace_ver;
+ __u8 reserved8[3];
+};
+
+struct nes_alloc_ucontext_resp {
+ __u32 max_pds; /* maximum pds allowed for this user process */
+ __u32 max_qps; /* maximum qps allowed for this user process */
+ __u32 wq_size; /* size of the WQs (sq+rq) allocated to the mmaped area */
+ __u8 virtwq; /* flag to indicate if virtual WQ are to be used or not */
+ __u8 kernel_ver;
+ __u8 reserved[2];
+};
+
+struct nes_alloc_pd_resp {
+ __u32 pd_id;
+ __u32 mmap_db_index;
+};
+
+struct nes_create_cq_req {
+ __u64 user_cq_buffer;
+ __u32 mcrqf;
+ __u8 reserved[4];
+};
+
+struct nes_create_qp_req {
+ __u64 user_wqe_buffers;
+};
+
+enum iwnes_memreg_type {
+ IWNES_MEMREG_TYPE_MEM = 0x0000,
+ IWNES_MEMREG_TYPE_QP = 0x0001,
+ IWNES_MEMREG_TYPE_CQ = 0x0002,
+ IWNES_MEMREG_TYPE_MW = 0x0003,
+ IWNES_MEMREG_TYPE_FMR = 0x0004,
+};
+
+struct nes_mem_reg_req {
+ __u32 reg_type; /* indicates if id is memory, QP or CQ */
+ __u32 reserved;
+};
+
+struct nes_create_cq_resp {
+ __u32 cq_id;
+ __u32 cq_size;
+ __u32 mmap_db_index;
+ __u32 reserved;
+};
+
+struct nes_create_qp_resp {
+ __u32 qp_id;
+ __u32 actual_sq_size;
+ __u32 actual_rq_size;
+ __u32 mmap_sq_db_index;
+ __u32 mmap_rq_db_index;
+ __u32 nes_drv_opt;
+};
+
+#endif /* NES_USER_H */
diff --git a/drivers/infiniband/hw/nes/nes_utils.c b/drivers/infiniband/hw/nes/nes_utils.c
new file mode 100644
index 0000000..fb8cbd7
--- /dev/null
+++ b/drivers/infiniband/hw/nes/nes_utils.c
@@ -0,0 +1,938 @@
+/*
+ * Copyright (c) 2006 - 2008 NetEffect, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/ethtool.h>
+#include <linux/mii.h>
+#include <linux/if_vlan.h>
+#include <linux/crc32.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/init.h>
+
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/byteorder.h>
+
+#include "nes.h"
+
+
+
+static u16 nes_read16_eeprom(void __iomem *addr, u16 offset);
+
+u32 mh_detected;
+u32 mh_pauses_sent;
+
+/**
+ * nes_read_eeprom_values -
+ */
+int nes_read_eeprom_values(struct nes_device *nesdev, struct nes_adapter *nesadapter)
+{
+ u32 mac_addr_low;
+ u16 mac_addr_high;
+ u16 eeprom_data;
+ u16 eeprom_offset;
+ u16 next_section_address;
+ u16 sw_section_ver;
+ u8 major_ver = 0;
+ u8 minor_ver = 0;
+
+ /* TODO: deal with EEPROM endian issues */
+ if (nesadapter->firmware_eeprom_offset == 0) {
+ /* Read the EEPROM Parameters */
+ eeprom_data = nes_read16_eeprom(nesdev->regs, 0);
+ nes_debug(NES_DBG_HW, "EEPROM Offset 0 = 0x%04X\n", eeprom_data);
+ eeprom_offset = 2 + (((eeprom_data & 0x007f) << 3) <<
+ ((eeprom_data & 0x0080) >> 7));
+ nes_debug(NES_DBG_HW, "Firmware Offset = 0x%04X\n", eeprom_offset);
+ nesadapter->firmware_eeprom_offset = eeprom_offset;
+ eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset + 4);
+ if (eeprom_data != 0x5746) {
+ nes_debug(NES_DBG_HW, "Not a valid Firmware Image = 0x%04X\n", eeprom_data);
+ return -1;
+ }
+
+ eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset + 2);
+ nes_debug(NES_DBG_HW, "EEPROM Offset %u = 0x%04X\n",
+ eeprom_offset + 2, eeprom_data);
+ eeprom_offset += ((eeprom_data & 0x00ff) << 3) << ((eeprom_data & 0x0100) >> 8);
+ nes_debug(NES_DBG_HW, "Software Offset = 0x%04X\n", eeprom_offset);
+ nesadapter->software_eeprom_offset = eeprom_offset;
+ eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset + 4);
+ if (eeprom_data != 0x5753) {
+ printk("Not a valid Software Image = 0x%04X\n", eeprom_data);
+ return -1;
+ }
+ sw_section_ver = nes_read16_eeprom(nesdev->regs, nesadapter->software_eeprom_offset + 6);
+ nes_debug(NES_DBG_HW, "Software section version number = 0x%04X\n",
+ sw_section_ver);
+
+ eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset + 2);
+ nes_debug(NES_DBG_HW, "EEPROM Offset %u (next section) = 0x%04X\n",
+ eeprom_offset + 2, eeprom_data);
+ next_section_address = eeprom_offset + (((eeprom_data & 0x00ff) << 3) <<
+ ((eeprom_data & 0x0100) >> 8));
+ eeprom_data = nes_read16_eeprom(nesdev->regs, next_section_address + 4);
+ if (eeprom_data != 0x414d) {
+ nes_debug(NES_DBG_HW, "EEPROM Changed offset should be 0x414d but was 0x%04X\n",
+ eeprom_data);
+ goto no_fw_rev;
+ }
+ eeprom_offset = next_section_address;
+
+ eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset + 2);
+ nes_debug(NES_DBG_HW, "EEPROM Offset %u (next section) = 0x%04X\n",
+ eeprom_offset + 2, eeprom_data);
+ next_section_address = eeprom_offset + (((eeprom_data & 0x00ff) << 3) <<
+ ((eeprom_data & 0x0100) >> 8));
+ eeprom_data = nes_read16_eeprom(nesdev->regs, next_section_address + 4);
+ if (eeprom_data != 0x4f52) {
+ nes_debug(NES_DBG_HW, "EEPROM Changed offset should be 0x4f52 but was 0x%04X\n",
+ eeprom_data);
+ goto no_fw_rev;
+ }
+ eeprom_offset = next_section_address;
+
+ eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset + 2);
+ nes_debug(NES_DBG_HW, "EEPROM Offset %u (next section) = 0x%04X\n",
+ eeprom_offset + 2, eeprom_data);
+ next_section_address = eeprom_offset + ((eeprom_data & 0x00ff) << 3);
+ eeprom_data = nes_read16_eeprom(nesdev->regs, next_section_address + 4);
+ if (eeprom_data != 0x5746) {
+ nes_debug(NES_DBG_HW, "EEPROM Changed offset should be 0x5746 but was 0x%04X\n",
+ eeprom_data);
+ goto no_fw_rev;
+ }
+ eeprom_offset = next_section_address;
+
+ eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset + 2);
+ nes_debug(NES_DBG_HW, "EEPROM Offset %u (next section) = 0x%04X\n",
+ eeprom_offset + 2, eeprom_data);
+ next_section_address = eeprom_offset + ((eeprom_data & 0x00ff) << 3);
+ eeprom_data = nes_read16_eeprom(nesdev->regs, next_section_address + 4);
+ if (eeprom_data != 0x5753) {
+ nes_debug(NES_DBG_HW, "EEPROM Changed offset should be 0x5753 but was 0x%04X\n",
+ eeprom_data);
+ goto no_fw_rev;
+ }
+ eeprom_offset = next_section_address;
+
+ eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset + 2);
+ nes_debug(NES_DBG_HW, "EEPROM Offset %u (next section) = 0x%04X\n",
+ eeprom_offset + 2, eeprom_data);
+ next_section_address = eeprom_offset + ((eeprom_data & 0x00ff) << 3);
+ eeprom_data = nes_read16_eeprom(nesdev->regs, next_section_address + 4);
+ if (eeprom_data != 0x414d) {
+ nes_debug(NES_DBG_HW, "EEPROM Changed offset should be 0x414d but was 0x%04X\n",
+ eeprom_data);
+ goto no_fw_rev;
+ }
+ eeprom_offset = next_section_address;
+
+ eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset + 2);
+ nes_debug(NES_DBG_HW, "EEPROM Offset %u (next section) = 0x%04X\n",
+ eeprom_offset + 2, eeprom_data);
+ next_section_address = eeprom_offset + ((eeprom_data & 0x00ff) << 3);
+ eeprom_data = nes_read16_eeprom(nesdev->regs, next_section_address + 4);
+ if (eeprom_data != 0x464e) {
+ nes_debug(NES_DBG_HW, "EEPROM Changed offset should be 0x464e but was 0x%04X\n",
+ eeprom_data);
+ goto no_fw_rev;
+ }
+ eeprom_data = nes_read16_eeprom(nesdev->regs, next_section_address + 8);
+ printk(PFX "Firmware version %u.%u\n", (u8)(eeprom_data>>8), (u8)eeprom_data);
+ major_ver = (u8)(eeprom_data >> 8);
+ minor_ver = (u8)(eeprom_data);
+
+ if (nes_drv_opt & NES_DRV_OPT_DISABLE_VIRT_WQ) {
+ nes_debug(NES_DBG_HW, "Virtual WQs have been disabled\n");
+ } else if (((major_ver == 2) && (minor_ver > 21)) || ((major_ver > 2) && (major_ver != 255))) {
+ nesadapter->virtwq = 1;
+ }
+ nesadapter->firmware_version = (((u32)(u8)(eeprom_data>>8)) << 16) +
+ (u32)((u8)eeprom_data);
+
+no_fw_rev:
+ /* eeprom is valid */
+ eeprom_offset = nesadapter->software_eeprom_offset;
+ eeprom_offset += 8;
+ nesadapter->netdev_max = (u8)nes_read16_eeprom(nesdev->regs, eeprom_offset);
+ eeprom_offset += 2;
+ mac_addr_high = nes_read16_eeprom(nesdev->regs, eeprom_offset);
+ eeprom_offset += 2;
+ mac_addr_low = (u32)nes_read16_eeprom(nesdev->regs, eeprom_offset);
+ eeprom_offset += 2;
+ mac_addr_low <<= 16;
+ mac_addr_low += (u32)nes_read16_eeprom(nesdev->regs, eeprom_offset);
+ nes_debug(NES_DBG_HW, "Base MAC Address = 0x%04X%08X\n",
+ mac_addr_high, mac_addr_low);
+ nes_debug(NES_DBG_HW, "MAC Address count = %u\n", nesadapter->netdev_max);
+
+ nesadapter->mac_addr_low = mac_addr_low;
+ nesadapter->mac_addr_high = mac_addr_high;
+
+ /* Read the Phy Type array */
+ eeprom_offset += 10;
+ eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
+ nesadapter->phy_type[0] = (u8)(eeprom_data >> 8);
+ nesadapter->phy_type[1] = (u8)eeprom_data;
+
+ /* Read the port array */
+ eeprom_offset += 2;
+ eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
+ nesadapter->phy_type[2] = (u8)(eeprom_data >> 8);
+ nesadapter->phy_type[3] = (u8)eeprom_data;
+ /* port_count is set by soft reset reg */
+ nes_debug(NES_DBG_HW, "port_count = %u, port 0 -> %u, port 1 -> %u,"
+ " port 2 -> %u, port 3 -> %u\n",
+ nesadapter->port_count,
+ nesadapter->phy_type[0], nesadapter->phy_type[1],
+ nesadapter->phy_type[2], nesadapter->phy_type[3]);
+
+ /* Read PD config array */
+ eeprom_offset += 10;
+ eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
+ nesadapter->pd_config_size[0] = eeprom_data;
+ eeprom_offset += 2;
+ eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
+ nesadapter->pd_config_base[0] = eeprom_data;
+ nes_debug(NES_DBG_HW, "PD0 config, size=0x%04x, base=0x%04x\n",
+ nesadapter->pd_config_size[0], nesadapter->pd_config_base[0]);
+
+ eeprom_offset += 2;
+ eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
+ nesadapter->pd_config_size[1] = eeprom_data;
+ eeprom_offset += 2;
+ eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
+ nesadapter->pd_config_base[1] = eeprom_data;
+ nes_debug(NES_DBG_HW, "PD1 config, size=0x%04x, base=0x%04x\n",
+ nesadapter->pd_config_size[1], nesadapter->pd_config_base[1]);
+
+ eeprom_offset += 2;
+ eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
+ nesadapter->pd_config_size[2] = eeprom_data;
+ eeprom_offset += 2;
+ eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
+ nesadapter->pd_config_base[2] = eeprom_data;
+ nes_debug(NES_DBG_HW, "PD2 config, size=0x%04x, base=0x%04x\n",
+ nesadapter->pd_config_size[2], nesadapter->pd_config_base[2]);
+
+ eeprom_offset += 2;
+ eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
+ nesadapter->pd_config_size[3] = eeprom_data;
+ eeprom_offset += 2;
+ eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
+ nesadapter->pd_config_base[3] = eeprom_data;
+ nes_debug(NES_DBG_HW, "PD3 config, size=0x%04x, base=0x%04x\n",
+ nesadapter->pd_config_size[3], nesadapter->pd_config_base[3]);
+
+ /* Read Rx Pool Size */
+ eeprom_offset += 22; /* 46 */
+ eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
+ eeprom_offset += 2;
+ nesadapter->rx_pool_size = (((u32)eeprom_data) << 16) +
+ nes_read16_eeprom(nesdev->regs, eeprom_offset);
+ nes_debug(NES_DBG_HW, "rx_pool_size = 0x%08X\n", nesadapter->rx_pool_size);
+
+ eeprom_offset += 2;
+ eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
+ eeprom_offset += 2;
+ nesadapter->tx_pool_size = (((u32)eeprom_data) << 16) +
+ nes_read16_eeprom(nesdev->regs, eeprom_offset);
+ nes_debug(NES_DBG_HW, "tx_pool_size = 0x%08X\n", nesadapter->tx_pool_size);
+
+ eeprom_offset += 2;
+ eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
+ eeprom_offset += 2;
+ nesadapter->rx_threshold = (((u32)eeprom_data) << 16) +
+ nes_read16_eeprom(nesdev->regs, eeprom_offset);
+ nes_debug(NES_DBG_HW, "rx_threshold = 0x%08X\n", nesadapter->rx_threshold);
+
+ eeprom_offset += 2;
+ eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
+ eeprom_offset += 2;
+ nesadapter->tcp_timer_core_clk_divisor = (((u32)eeprom_data) << 16) +
+ nes_read16_eeprom(nesdev->regs, eeprom_offset);
+ nes_debug(NES_DBG_HW, "tcp_timer_core_clk_divisor = 0x%08X\n",
+ nesadapter->tcp_timer_core_clk_divisor);
+
+ eeprom_offset += 2;
+ eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
+ eeprom_offset += 2;
+ nesadapter->iwarp_config = (((u32)eeprom_data) << 16) +
+ nes_read16_eeprom(nesdev->regs, eeprom_offset);
+ nes_debug(NES_DBG_HW, "iwarp_config = 0x%08X\n", nesadapter->iwarp_config);
+
+ eeprom_offset += 2;
+ eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
+ eeprom_offset += 2;
+ nesadapter->cm_config = (((u32)eeprom_data) << 16) +
+ nes_read16_eeprom(nesdev->regs, eeprom_offset);
+ nes_debug(NES_DBG_HW, "cm_config = 0x%08X\n", nesadapter->cm_config);
+
+ eeprom_offset += 2;
+ eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
+ eeprom_offset += 2;
+ nesadapter->sws_timer_config = (((u32)eeprom_data) << 16) +
+ nes_read16_eeprom(nesdev->regs, eeprom_offset);
+ nes_debug(NES_DBG_HW, "sws_timer_config = 0x%08X\n", nesadapter->sws_timer_config);
+
+ eeprom_offset += 2;
+ eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
+ eeprom_offset += 2;
+ nesadapter->tcp_config1 = (((u32)eeprom_data) << 16) +
+ nes_read16_eeprom(nesdev->regs, eeprom_offset);
+ nes_debug(NES_DBG_HW, "tcp_config1 = 0x%08X\n", nesadapter->tcp_config1);
+
+ eeprom_offset += 2;
+ eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
+ eeprom_offset += 2;
+ nesadapter->wqm_wat = (((u32)eeprom_data) << 16) +
+ nes_read16_eeprom(nesdev->regs, eeprom_offset);
+ nes_debug(NES_DBG_HW, "wqm_wat = 0x%08X\n", nesadapter->wqm_wat);
+
+ eeprom_offset += 2;
+ eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
+ eeprom_offset += 2;
+ nesadapter->core_clock = (((u32)eeprom_data) << 16) +
+ nes_read16_eeprom(nesdev->regs, eeprom_offset);
+ nes_debug(NES_DBG_HW, "core_clock = 0x%08X\n", nesadapter->core_clock);
+
+ if ((sw_section_ver) && (nesadapter->hw_rev != NE020_REV)) {
+ eeprom_offset += 2;
+ eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
+ nesadapter->phy_index[0] = (eeprom_data & 0xff00)>>8;
+ nesadapter->phy_index[1] = eeprom_data & 0x00ff;
+ eeprom_offset += 2;
+ eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
+ nesadapter->phy_index[2] = (eeprom_data & 0xff00)>>8;
+ nesadapter->phy_index[3] = eeprom_data & 0x00ff;
+ } else {
+ nesadapter->phy_index[0] = 4;
+ nesadapter->phy_index[1] = 5;
+ nesadapter->phy_index[2] = 6;
+ nesadapter->phy_index[3] = 7;
+ }
+ nes_debug(NES_DBG_HW, "Phy address map = 0 > %u, 1 > %u, 2 > %u, 3 > %u\n",
+ nesadapter->phy_index[0],nesadapter->phy_index[1],
+ nesadapter->phy_index[2],nesadapter->phy_index[3]);
+ }
+
+ return 0;
+}
+
+
+/**
+ * nes_read16_eeprom
+ */
+static u16 nes_read16_eeprom(void __iomem *addr, u16 offset)
+{
+ writel(NES_EEPROM_READ_REQUEST + (offset >> 1),
+ (void __iomem *)addr + NES_EEPROM_COMMAND);
+
+ do {
+ } while (readl((void __iomem *)addr + NES_EEPROM_COMMAND) &
+ NES_EEPROM_READ_REQUEST);
+
+ return readw((void __iomem *)addr + NES_EEPROM_DATA);
+}
+
+
+/**
+ * nes_write_1G_phy_reg
+ */
+void nes_write_1G_phy_reg(struct nes_device *nesdev, u8 phy_reg, u8 phy_addr, u16 data)
+{
+ struct nes_adapter *nesadapter = nesdev->nesadapter;
+ u32 u32temp;
+ u32 counter;
+ unsigned long flags;
+
+ spin_lock_irqsave(&nesadapter->phy_lock, flags);
+
+ nes_write_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL,
+ 0x50020000 | data | ((u32)phy_reg << 18) | ((u32)phy_addr << 23));
+ for (counter = 0; counter < 100 ; counter++) {
+ udelay(30);
+ u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS);
+ if (u32temp & 1) {
+ /* nes_debug(NES_DBG_PHY, "Phy interrupt status = 0x%X.\n", u32temp); */
+ nes_write_indexed(nesdev, NES_IDX_MAC_INT_STATUS, 1);
+ break;
+ }
+ }
+ if (!(u32temp & 1))
+ nes_debug(NES_DBG_PHY, "Phy is not responding. interrupt status = 0x%X.\n",
+ u32temp);
+
+ spin_unlock_irqrestore(&nesadapter->phy_lock, flags);
+}
+
+
+/**
+ * nes_read_1G_phy_reg
+ * This routine only issues the read, the data must be read
+ * separately.
+ */
+void nes_read_1G_phy_reg(struct nes_device *nesdev, u8 phy_reg, u8 phy_addr, u16 *data)
+{
+ struct nes_adapter *nesadapter = nesdev->nesadapter;
+ u32 u32temp;
+ u32 counter;
+ unsigned long flags;
+
+ /* nes_debug(NES_DBG_PHY, "phy addr = %d, mac_index = %d\n",
+ phy_addr, nesdev->mac_index); */
+ spin_lock_irqsave(&nesadapter->phy_lock, flags);
+
+ nes_write_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL,
+ 0x60020000 | ((u32)phy_reg << 18) | ((u32)phy_addr << 23));
+ for (counter = 0; counter < 100 ; counter++) {
+ udelay(30);
+ u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS);
+ if (u32temp & 1) {
+ /* nes_debug(NES_DBG_PHY, "Phy interrupt status = 0x%X.\n", u32temp); */
+ nes_write_indexed(nesdev, NES_IDX_MAC_INT_STATUS, 1);
+ break;
+ }
+ }
+ if (!(u32temp & 1)) {
+ nes_debug(NES_DBG_PHY, "Phy is not responding. interrupt status = 0x%X.\n",
+ u32temp);
+ *data = 0xffff;
+ } else {
+ *data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
+ }
+ spin_unlock_irqrestore(&nesadapter->phy_lock, flags);
+}
+
+
+/**
+ * nes_write_10G_phy_reg
+ */
+void nes_write_10G_phy_reg(struct nes_device *nesdev, u16 phy_addr, u8 dev_addr, u16 phy_reg,
+ u16 data)
+{
+ u32 port_addr;
+ u32 u32temp;
+ u32 counter;
+
+ port_addr = phy_addr;
+
+ /* set address */
+ nes_write_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL,
+ 0x00020000 | (u32)phy_reg | (((u32)dev_addr) << 18) | (((u32)port_addr) << 23));
+ for (counter = 0; counter < 100 ; counter++) {
+ udelay(30);
+ u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS);
+ if (u32temp & 1) {
+ nes_write_indexed(nesdev, NES_IDX_MAC_INT_STATUS, 1);
+ break;
+ }
+ }
+ if (!(u32temp & 1))
+ nes_debug(NES_DBG_PHY, "Phy is not responding. interrupt status = 0x%X.\n",
+ u32temp);
+
+ /* set data */
+ nes_write_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL,
+ 0x10020000 | (u32)data | (((u32)dev_addr) << 18) | (((u32)port_addr) << 23));
+ for (counter = 0; counter < 100 ; counter++) {
+ udelay(30);
+ u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS);
+ if (u32temp & 1) {
+ nes_write_indexed(nesdev, NES_IDX_MAC_INT_STATUS, 1);
+ break;
+ }
+ }
+ if (!(u32temp & 1))
+ nes_debug(NES_DBG_PHY, "Phy is not responding. interrupt status = 0x%X.\n",
+ u32temp);
+}
+
+
+/**
+ * nes_read_10G_phy_reg
+ * This routine only issues the read, the data must be read
+ * separately.
+ */
+void nes_read_10G_phy_reg(struct nes_device *nesdev, u8 phy_addr, u8 dev_addr, u16 phy_reg)
+{
+ u32 port_addr;
+ u32 u32temp;
+ u32 counter;
+
+ port_addr = phy_addr;
+
+ /* set address */
+ nes_write_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL,
+ 0x00020000 | (u32)phy_reg | (((u32)dev_addr) << 18) | (((u32)port_addr) << 23));
+ for (counter = 0; counter < 100 ; counter++) {
+ udelay(30);
+ u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS);
+ if (u32temp & 1) {
+ nes_write_indexed(nesdev, NES_IDX_MAC_INT_STATUS, 1);
+ break;
+ }
+ }
+ if (!(u32temp & 1))
+ nes_debug(NES_DBG_PHY, "Phy is not responding. interrupt status = 0x%X.\n",
+ u32temp);
+
+ /* issue read */
+ nes_write_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL,
+ 0x30020000 | (((u32)dev_addr) << 18) | (((u32)port_addr) << 23));
+ for (counter = 0; counter < 100 ; counter++) {
+ udelay(30);
+ u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS);
+ if (u32temp & 1) {
+ nes_write_indexed(nesdev, NES_IDX_MAC_INT_STATUS, 1);
+ break;
+ }
+ }
+ if (!(u32temp & 1))
+ nes_debug(NES_DBG_PHY, "Phy is not responding. interrupt status = 0x%X.\n",
+ u32temp);
+}
+
+
+/**
+ * nes_get_cqp_request
+ */
+struct nes_cqp_request *nes_get_cqp_request(struct nes_device *nesdev)
+{
+ unsigned long flags;
+ struct nes_cqp_request *cqp_request = NULL;
+
+ if (!list_empty(&nesdev->cqp_avail_reqs)) {
+ spin_lock_irqsave(&nesdev->cqp.lock, flags);
+ cqp_request = list_entry(nesdev->cqp_avail_reqs.next,
+ struct nes_cqp_request, list);
+ list_del_init(&cqp_request->list);
+ spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
+ } else {
+ cqp_request = kzalloc(sizeof(struct nes_cqp_request), GFP_KERNEL);
+ if (cqp_request) {
+ cqp_request->dynamic = 1;
+ INIT_LIST_HEAD(&cqp_request->list);
+ }
+ }
+
+ if (cqp_request) {
+ init_waitqueue_head(&cqp_request->waitq);
+ cqp_request->waiting = 0;
+ cqp_request->request_done = 0;
+ cqp_request->callback = 0;
+ init_waitqueue_head(&cqp_request->waitq);
+ nes_debug(NES_DBG_CQP, "Got cqp request %p from the available list \n",
+ cqp_request);
+ } else
+ printk(KERN_ERR PFX "%s: Could not allocated a CQP request.\n",
+ __func__);
+
+ return cqp_request;
+}
+
+void nes_free_cqp_request(struct nes_device *nesdev,
+ struct nes_cqp_request *cqp_request)
+{
+ unsigned long flags;
+
+ nes_debug(NES_DBG_CQP, "CQP request %p (opcode 0x%02X) freed.\n",
+ cqp_request,
+ le32_to_cpu(cqp_request->cqp_wqe.wqe_words[NES_CQP_WQE_OPCODE_IDX]) & 0x3f);
+
+ if (cqp_request->dynamic) {
+ kfree(cqp_request);
+ } else {
+ spin_lock_irqsave(&nesdev->cqp.lock, flags);
+ list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
+ spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
+ }
+}
+
+void nes_put_cqp_request(struct nes_device *nesdev,
+ struct nes_cqp_request *cqp_request)
+{
+ if (atomic_dec_and_test(&cqp_request->refcount))
+ nes_free_cqp_request(nesdev, cqp_request);
+}
+
+/**
+ * nes_post_cqp_request
+ */
+void nes_post_cqp_request(struct nes_device *nesdev,
+ struct nes_cqp_request *cqp_request)
+{
+ struct nes_hw_cqp_wqe *cqp_wqe;
+ unsigned long flags;
+ u32 cqp_head;
+ u64 u64temp;
+
+ spin_lock_irqsave(&nesdev->cqp.lock, flags);
+
+ if (((((nesdev->cqp.sq_tail+(nesdev->cqp.sq_size*2))-nesdev->cqp.sq_head) &
+ (nesdev->cqp.sq_size - 1)) != 1)
+ && (list_empty(&nesdev->cqp_pending_reqs))) {
+ cqp_head = nesdev->cqp.sq_head++;
+ nesdev->cqp.sq_head &= nesdev->cqp.sq_size-1;
+ cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
+ memcpy(cqp_wqe, &cqp_request->cqp_wqe, sizeof(*cqp_wqe));
+ barrier();
+ u64temp = (unsigned long)cqp_request;
+ set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_COMP_SCRATCH_LOW_IDX,
+ u64temp);
+ nes_debug(NES_DBG_CQP, "CQP request (opcode 0x%02X), line 1 = 0x%08X put on CQPs SQ,"
+ " request = %p, cqp_head = %u, cqp_tail = %u, cqp_size = %u,"
+ " waiting = %d, refcount = %d.\n",
+ le32_to_cpu(cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX])&0x3f,
+ le32_to_cpu(cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX]), cqp_request,
+ nesdev->cqp.sq_head, nesdev->cqp.sq_tail, nesdev->cqp.sq_size,
+ cqp_request->waiting, atomic_read(&cqp_request->refcount));
+ barrier();
+
+ /* Ring doorbell (1 WQEs) */
+ nes_write32(nesdev->regs+NES_WQE_ALLOC, 0x01800000 | nesdev->cqp.qp_id);
+
+ barrier();
+ } else {
+ nes_debug(NES_DBG_CQP, "CQP request %p (opcode 0x%02X), line 1 = 0x%08X"
+ " put on the pending queue.\n",
+ cqp_request,
+ le32_to_cpu(cqp_request->cqp_wqe.wqe_words[NES_CQP_WQE_OPCODE_IDX])&0x3f,
+ le32_to_cpu(cqp_request->cqp_wqe.wqe_words[NES_CQP_WQE_ID_IDX]));
+ list_add_tail(&cqp_request->list, &nesdev->cqp_pending_reqs);
+ }
+
+ spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
+
+ return;
+}
+
+
+/**
+ * nes_arp_table
+ */
+int nes_arp_table(struct nes_device *nesdev, u32 ip_addr, u8 *mac_addr, u32 action)
+{
+ struct nes_adapter *nesadapter = nesdev->nesadapter;
+ int arp_index;
+ int err = 0;
+
+ for (arp_index = 0; (u32) arp_index < nesadapter->arp_table_size; arp_index++) {
+ if (nesadapter->arp_table[arp_index].ip_addr == ip_addr)
+ break;
+ }
+
+ if (action == NES_ARP_ADD) {
+ if (arp_index != nesadapter->arp_table_size) {
+ return -1;
+ }
+
+ arp_index = 0;
+ err = nes_alloc_resource(nesadapter, nesadapter->allocated_arps,
+ nesadapter->arp_table_size, (u32 *)&arp_index, &nesadapter->next_arp_index);
+ if (err) {
+ nes_debug(NES_DBG_NETDEV, "nes_alloc_resource returned error = %u\n", err);
+ return err;
+ }
+ nes_debug(NES_DBG_NETDEV, "ADD, arp_index=%d\n", arp_index);
+
+ nesadapter->arp_table[arp_index].ip_addr = ip_addr;
+ memcpy(nesadapter->arp_table[arp_index].mac_addr, mac_addr, ETH_ALEN);
+ return arp_index;
+ }
+
+ /* DELETE or RESOLVE */
+ if (arp_index == nesadapter->arp_table_size) {
+ nes_debug(NES_DBG_NETDEV, "MAC for " NIPQUAD_FMT " not in ARP table - cannot %s\n",
+ HIPQUAD(ip_addr),
+ action == NES_ARP_RESOLVE ? "resolve" : "delete");
+ return -1;
+ }
+
+ if (action == NES_ARP_RESOLVE) {
+ nes_debug(NES_DBG_NETDEV, "RESOLVE, arp_index=%d\n", arp_index);
+ return arp_index;
+ }
+
+ if (action == NES_ARP_DELETE) {
+ nes_debug(NES_DBG_NETDEV, "DELETE, arp_index=%d\n", arp_index);
+ nesadapter->arp_table[arp_index].ip_addr = 0;
+ memset(nesadapter->arp_table[arp_index].mac_addr, 0x00, ETH_ALEN);
+ nes_free_resource(nesadapter, nesadapter->allocated_arps, arp_index);
+ return arp_index;
+ }
+
+ return -1;
+}
+
+
+/**
+ * nes_mh_fix
+ */
+void nes_mh_fix(unsigned long parm)
+{
+ unsigned long flags;
+ struct nes_device *nesdev = (struct nes_device *)parm;
+ struct nes_adapter *nesadapter = nesdev->nesadapter;
+ struct nes_vnic *nesvnic;
+ u32 used_chunks_tx;
+ u32 temp_used_chunks_tx;
+ u32 temp_last_used_chunks_tx;
+ u32 used_chunks_mask;
+ u32 mac_tx_frames_low;
+ u32 mac_tx_frames_high;
+ u32 mac_tx_pauses;
+ u32 serdes_status;
+ u32 reset_value;
+ u32 tx_control;
+ u32 tx_config;
+ u32 tx_pause_quanta;
+ u32 rx_control;
+ u32 rx_config;
+ u32 mac_exact_match;
+ u32 mpp_debug;
+ u32 i=0;
+ u32 chunks_tx_progress = 0;
+
+ spin_lock_irqsave(&nesadapter->phy_lock, flags);
+ if ((nesadapter->mac_sw_state[0] != NES_MAC_SW_IDLE) || (nesadapter->mac_link_down[0])) {
+ spin_unlock_irqrestore(&nesadapter->phy_lock, flags);
+ goto no_mh_work;
+ }
+ nesadapter->mac_sw_state[0] = NES_MAC_SW_MH;
+ spin_unlock_irqrestore(&nesadapter->phy_lock, flags);
+ do {
+ mac_tx_frames_low = nes_read_indexed(nesdev, NES_IDX_MAC_TX_FRAMES_LOW);
+ mac_tx_frames_high = nes_read_indexed(nesdev, NES_IDX_MAC_TX_FRAMES_HIGH);
+ mac_tx_pauses = nes_read_indexed(nesdev, NES_IDX_MAC_TX_PAUSE_FRAMES);
+ used_chunks_tx = nes_read_indexed(nesdev, NES_IDX_USED_CHUNKS_TX);
+ nesdev->mac_pause_frames_sent += mac_tx_pauses;
+ used_chunks_mask = 0;
+ temp_used_chunks_tx = used_chunks_tx;
+ temp_last_used_chunks_tx = nesdev->last_used_chunks_tx;
+
+ if (nesdev->netdev[0]) {
+ nesvnic = netdev_priv(nesdev->netdev[0]);
+ } else {
+ break;
+ }
+
+ for (i=0; i<4; i++) {
+ used_chunks_mask <<= 8;
+ if (nesvnic->qp_nic_index[i] != 0xff) {
+ used_chunks_mask |= 0xff;
+ if ((temp_used_chunks_tx&0xff)<(temp_last_used_chunks_tx&0xff)) {
+ chunks_tx_progress = 1;
+ }
+ }
+ temp_used_chunks_tx >>= 8;
+ temp_last_used_chunks_tx >>= 8;
+ }
+ if ((mac_tx_frames_low) || (mac_tx_frames_high) ||
+ (!(used_chunks_tx&used_chunks_mask)) ||
+ (!(nesdev->last_used_chunks_tx&used_chunks_mask)) ||
+ (chunks_tx_progress) ) {
+ nesdev->last_used_chunks_tx = used_chunks_tx;
+ break;
+ }
+ nesdev->last_used_chunks_tx = used_chunks_tx;
+ barrier();
+
+ nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONTROL, 0x00000005);
+ mh_pauses_sent++;
+ mac_tx_pauses = nes_read_indexed(nesdev, NES_IDX_MAC_TX_PAUSE_FRAMES);
+ if (mac_tx_pauses) {
+ nesdev->mac_pause_frames_sent += mac_tx_pauses;
+ break;
+ }
+
+ tx_control = nes_read_indexed(nesdev, NES_IDX_MAC_TX_CONTROL);
+ tx_config = nes_read_indexed(nesdev, NES_IDX_MAC_TX_CONFIG);
+ tx_pause_quanta = nes_read_indexed(nesdev, NES_IDX_MAC_TX_PAUSE_QUANTA);
+ rx_control = nes_read_indexed(nesdev, NES_IDX_MAC_RX_CONTROL);
+ rx_config = nes_read_indexed(nesdev, NES_IDX_MAC_RX_CONFIG);
+ mac_exact_match = nes_read_indexed(nesdev, NES_IDX_MAC_EXACT_MATCH_BOTTOM);
+ mpp_debug = nes_read_indexed(nesdev, NES_IDX_MPP_DEBUG);
+
+ /* one last ditch effort to avoid a false positive */
+ mac_tx_pauses = nes_read_indexed(nesdev, NES_IDX_MAC_TX_PAUSE_FRAMES);
+ if (mac_tx_pauses) {
+ nesdev->last_mac_tx_pauses = nesdev->mac_pause_frames_sent;
+ nes_debug(NES_DBG_HW, "failsafe caught slow outbound pause\n");
+ break;
+ }
+ mh_detected++;
+
+ nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONTROL, 0x00000000);
+ nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONFIG, 0x00000000);
+ reset_value = nes_read32(nesdev->regs+NES_SOFTWARE_RESET);
+
+ nes_write32(nesdev->regs+NES_SOFTWARE_RESET, reset_value | 0x0000001d);
+
+ while (((nes_read32(nesdev->regs+NES_SOFTWARE_RESET)
+ & 0x00000040) != 0x00000040) && (i++ < 5000)) {
+ /* mdelay(1); */
+ }
+
+ nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0, 0x00000008);
+ serdes_status = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS0);
+
+ nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_EMP0, 0x000bdef7);
+ nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_DRIVE0, 0x9ce73000);
+ nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_MODE0, 0x0ff00000);
+ nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_SIGDET0, 0x00000000);
+ nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_BYPASS0, 0x00000000);
+ nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_LOOPBACK_CONTROL0, 0x00000000);
+ if (nesadapter->OneG_Mode) {
+ nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_EQ_CONTROL0, 0xf0182222);
+ } else {
+ nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_EQ_CONTROL0, 0xf0042222);
+ }
+ serdes_status = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_RX_EQ_STATUS0);
+ nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000000ff);
+
+ nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONTROL, tx_control);
+ nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONFIG, tx_config);
+ nes_write_indexed(nesdev, NES_IDX_MAC_TX_PAUSE_QUANTA, tx_pause_quanta);
+ nes_write_indexed(nesdev, NES_IDX_MAC_RX_CONTROL, rx_control);
+ nes_write_indexed(nesdev, NES_IDX_MAC_RX_CONFIG, rx_config);
+ nes_write_indexed(nesdev, NES_IDX_MAC_EXACT_MATCH_BOTTOM, mac_exact_match);
+ nes_write_indexed(nesdev, NES_IDX_MPP_DEBUG, mpp_debug);
+
+ } while (0);
+
+ nesadapter->mac_sw_state[0] = NES_MAC_SW_IDLE;
+no_mh_work:
+ nesdev->nesadapter->mh_timer.expires = jiffies + (HZ/5);
+ add_timer(&nesdev->nesadapter->mh_timer);
+}
+
+/**
+ * nes_clc
+ */
+void nes_clc(unsigned long parm)
+{
+ unsigned long flags;
+ struct nes_device *nesdev = (struct nes_device *)parm;
+ struct nes_adapter *nesadapter = nesdev->nesadapter;
+
+ spin_lock_irqsave(&nesadapter->phy_lock, flags);
+ nesadapter->link_interrupt_count[0] = 0;
+ nesadapter->link_interrupt_count[1] = 0;
+ nesadapter->link_interrupt_count[2] = 0;
+ nesadapter->link_interrupt_count[3] = 0;
+ spin_unlock_irqrestore(&nesadapter->phy_lock, flags);
+
+ nesadapter->lc_timer.expires = jiffies + 3600 * HZ; /* 1 hour */
+ add_timer(&nesadapter->lc_timer);
+}
+
+
+/**
+ * nes_dump_mem
+ */
+void nes_dump_mem(unsigned int dump_debug_level, void *addr, int length)
+{
+ char xlate[] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
+ 'a', 'b', 'c', 'd', 'e', 'f'};
+ char *ptr;
+ char hex_buf[80];
+ char ascii_buf[20];
+ int num_char;
+ int num_ascii;
+ int num_hex;
+
+ if (!(nes_debug_level & dump_debug_level)) {
+ return;
+ }
+
+ ptr = addr;
+ if (length > 0x100) {
+ nes_debug(dump_debug_level, "Length truncated from %x to %x\n", length, 0x100);
+ length = 0x100;
+ }
+ nes_debug(dump_debug_level, "Address=0x%p, length=0x%x (%d)\n", ptr, length, length);
+
+ memset(ascii_buf, 0, 20);
+ memset(hex_buf, 0, 80);
+
+ num_ascii = 0;
+ num_hex = 0;
+ for (num_char = 0; num_char < length; num_char++) {
+ if (num_ascii == 8) {
+ ascii_buf[num_ascii++] = ' ';
+ hex_buf[num_hex++] = '-';
+ hex_buf[num_hex++] = ' ';
+ }
+
+ if (*ptr < 0x20 || *ptr > 0x7e)
+ ascii_buf[num_ascii++] = '.';
+ else
+ ascii_buf[num_ascii++] = *ptr;
+ hex_buf[num_hex++] = xlate[((*ptr & 0xf0) >> 4)];
+ hex_buf[num_hex++] = xlate[*ptr & 0x0f];
+ hex_buf[num_hex++] = ' ';
+ ptr++;
+
+ if (num_ascii >= 17) {
+ /* output line and reset */
+ nes_debug(dump_debug_level, " %s | %s\n", hex_buf, ascii_buf);
+ memset(ascii_buf, 0, 20);
+ memset(hex_buf, 0, 80);
+ num_ascii = 0;
+ num_hex = 0;
+ }
+ }
+
+ /* output the rest */
+ if (num_ascii) {
+ while (num_ascii < 17) {
+ if (num_ascii == 8) {
+ hex_buf[num_hex++] = ' ';
+ hex_buf[num_hex++] = ' ';
+ }
+ hex_buf[num_hex++] = ' ';
+ hex_buf[num_hex++] = ' ';
+ hex_buf[num_hex++] = ' ';
+ num_ascii++;
+ }
+
+ nes_debug(dump_debug_level, " %s | %s\n", hex_buf, ascii_buf);
+ }
+}
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
new file mode 100644
index 0000000..e0f658d
--- /dev/null
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -0,0 +1,3801 @@
+/*
+ * Copyright (c) 2006 - 2008 NetEffect, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/random.h>
+#include <linux/highmem.h>
+#include <asm/byteorder.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/iw_cm.h>
+#include <rdma/ib_user_verbs.h>
+
+#include "nes.h"
+
+#include <rdma/ib_umem.h>
+
+atomic_t mod_qp_timouts;
+atomic_t qps_created;
+atomic_t sw_qps_destroyed;
+
+static void nes_unregister_ofa_device(struct nes_ib_device *nesibdev);
+
+/**
+ * nes_alloc_mw
+ */
+static struct ib_mw *nes_alloc_mw(struct ib_pd *ibpd) {
+ struct nes_pd *nespd = to_nespd(ibpd);
+ struct nes_vnic *nesvnic = to_nesvnic(ibpd->device);
+ struct nes_device *nesdev = nesvnic->nesdev;
+ struct nes_adapter *nesadapter = nesdev->nesadapter;
+ struct nes_cqp_request *cqp_request;
+ struct nes_mr *nesmr;
+ struct ib_mw *ibmw;
+ struct nes_hw_cqp_wqe *cqp_wqe;
+ int ret;
+ u32 stag;
+ u32 stag_index = 0;
+ u32 next_stag_index = 0;
+ u32 driver_key = 0;
+ u8 stag_key = 0;
+
+ get_random_bytes(&next_stag_index, sizeof(next_stag_index));
+ stag_key = (u8)next_stag_index;
+
+ driver_key = 0;
+
+ next_stag_index >>= 8;
+ next_stag_index %= nesadapter->max_mr;
+
+ ret = nes_alloc_resource(nesadapter, nesadapter->allocated_mrs,
+ nesadapter->max_mr, &stag_index, &next_stag_index);
+ if (ret) {
+ return ERR_PTR(ret);
+ }
+
+ nesmr = kzalloc(sizeof(*nesmr), GFP_KERNEL);
+ if (!nesmr) {
+ nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ stag = stag_index << 8;
+ stag |= driver_key;
+ stag += (u32)stag_key;
+
+ nes_debug(NES_DBG_MR, "Registering STag 0x%08X, index = 0x%08X\n",
+ stag, stag_index);
+
+ /* Register the region with the adapter */
+ cqp_request = nes_get_cqp_request(nesdev);
+ if (cqp_request == NULL) {
+ kfree(nesmr);
+ nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ cqp_request->waiting = 1;
+ cqp_wqe = &cqp_request->cqp_wqe;
+
+ cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] =
+ cpu_to_le32( NES_CQP_ALLOCATE_STAG | NES_CQP_STAG_RIGHTS_REMOTE_READ |
+ NES_CQP_STAG_RIGHTS_REMOTE_WRITE | NES_CQP_STAG_VA_TO |
+ NES_CQP_STAG_REM_ACC_EN);
+
+ nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
+ set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_LEN_HIGH_PD_IDX, (nespd->pd_id & 0x00007fff));
+ set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_STAG_IDX, stag);
+
+ atomic_set(&cqp_request->refcount, 2);
+ nes_post_cqp_request(nesdev, cqp_request);
+
+ /* Wait for CQP */
+ ret = wait_event_timeout(cqp_request->waitq, (cqp_request->request_done != 0),
+ NES_EVENT_TIMEOUT);
+ nes_debug(NES_DBG_MR, "Register STag 0x%08X completed, wait_event_timeout ret = %u,"
+ " CQP Major:Minor codes = 0x%04X:0x%04X.\n",
+ stag, ret, cqp_request->major_code, cqp_request->minor_code);
+ if ((!ret) || (cqp_request->major_code)) {
+ nes_put_cqp_request(nesdev, cqp_request);
+ kfree(nesmr);
+ nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
+ if (!ret) {
+ return ERR_PTR(-ETIME);
+ } else {
+ return ERR_PTR(-ENOMEM);
+ }
+ }
+ nes_put_cqp_request(nesdev, cqp_request);
+
+ nesmr->ibmw.rkey = stag;
+ nesmr->mode = IWNES_MEMREG_TYPE_MW;
+ ibmw = &nesmr->ibmw;
+ nesmr->pbl_4k = 0;
+ nesmr->pbls_used = 0;
+
+ return ibmw;
+}
+
+
+/**
+ * nes_dealloc_mw
+ */
+static int nes_dealloc_mw(struct ib_mw *ibmw)
+{
+ struct nes_mr *nesmr = to_nesmw(ibmw);
+ struct nes_vnic *nesvnic = to_nesvnic(ibmw->device);
+ struct nes_device *nesdev = nesvnic->nesdev;
+ struct nes_adapter *nesadapter = nesdev->nesadapter;
+ struct nes_hw_cqp_wqe *cqp_wqe;
+ struct nes_cqp_request *cqp_request;
+ int err = 0;
+ int ret;
+
+ /* Deallocate the window with the adapter */
+ cqp_request = nes_get_cqp_request(nesdev);
+ if (cqp_request == NULL) {
+ nes_debug(NES_DBG_MR, "Failed to get a cqp_request.\n");
+ return -ENOMEM;
+ }
+ cqp_request->waiting = 1;
+ cqp_wqe = &cqp_request->cqp_wqe;
+ nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
+ set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, NES_CQP_DEALLOCATE_STAG);
+ set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_STAG_IDX, ibmw->rkey);
+
+ atomic_set(&cqp_request->refcount, 2);
+ nes_post_cqp_request(nesdev, cqp_request);
+
+ /* Wait for CQP */
+ nes_debug(NES_DBG_MR, "Waiting for deallocate STag 0x%08X to complete.\n",
+ ibmw->rkey);
+ ret = wait_event_timeout(cqp_request->waitq, (0 != cqp_request->request_done),
+ NES_EVENT_TIMEOUT);
+ nes_debug(NES_DBG_MR, "Deallocate STag completed, wait_event_timeout ret = %u,"
+ " CQP Major:Minor codes = 0x%04X:0x%04X.\n",
+ ret, cqp_request->major_code, cqp_request->minor_code);
+ if (!ret)
+ err = -ETIME;
+ else if (cqp_request->major_code)
+ err = -EIO;
+
+ nes_put_cqp_request(nesdev, cqp_request);
+
+ nes_free_resource(nesadapter, nesadapter->allocated_mrs,
+ (ibmw->rkey & 0x0fffff00) >> 8);
+ kfree(nesmr);
+
+ return err;
+}
+
+
+/**
+ * nes_bind_mw
+ */
+static int nes_bind_mw(struct ib_qp *ibqp, struct ib_mw *ibmw,
+ struct ib_mw_bind *ibmw_bind)
+{
+ u64 u64temp;
+ struct nes_vnic *nesvnic = to_nesvnic(ibqp->device);
+ struct nes_device *nesdev = nesvnic->nesdev;
+ /* struct nes_mr *nesmr = to_nesmw(ibmw); */
+ struct nes_qp *nesqp = to_nesqp(ibqp);
+ struct nes_hw_qp_wqe *wqe;
+ unsigned long flags = 0;
+ u32 head;
+ u32 wqe_misc = 0;
+ u32 qsize;
+
+ if (nesqp->ibqp_state > IB_QPS_RTS)
+ return -EINVAL;
+
+ spin_lock_irqsave(&nesqp->lock, flags);
+
+ head = nesqp->hwqp.sq_head;
+ qsize = nesqp->hwqp.sq_tail;
+
+ /* Check for SQ overflow */
+ if (((head + (2 * qsize) - nesqp->hwqp.sq_tail) % qsize) == (qsize - 1)) {
+ spin_unlock_irqrestore(&nesqp->lock, flags);
+ return -EINVAL;
+ }
+
+ wqe = &nesqp->hwqp.sq_vbase[head];
+ /* nes_debug(NES_DBG_MR, "processing sq wqe at %p, head = %u.\n", wqe, head); */
+ nes_fill_init_qp_wqe(wqe, nesqp, head);
+ u64temp = ibmw_bind->wr_id;
+ set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_SCRATCH_LOW_IDX, u64temp);
+ wqe_misc = NES_IWARP_SQ_OP_BIND;
+
+ wqe_misc |= NES_IWARP_SQ_WQE_LOCAL_FENCE;
+
+ if (ibmw_bind->send_flags & IB_SEND_SIGNALED)
+ wqe_misc |= NES_IWARP_SQ_WQE_SIGNALED_COMPL;
+
+ if (ibmw_bind->mw_access_flags & IB_ACCESS_REMOTE_WRITE) {
+ wqe_misc |= NES_CQP_STAG_RIGHTS_REMOTE_WRITE;
+ }
+ if (ibmw_bind->mw_access_flags & IB_ACCESS_REMOTE_READ) {
+ wqe_misc |= NES_CQP_STAG_RIGHTS_REMOTE_READ;
+ }
+
+ set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_MISC_IDX, wqe_misc);
+ set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_MR_IDX, ibmw_bind->mr->lkey);
+ set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_MW_IDX, ibmw->rkey);
+ set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_LENGTH_LOW_IDX,
+ ibmw_bind->length);
+ wqe->wqe_words[NES_IWARP_SQ_BIND_WQE_LENGTH_HIGH_IDX] = 0;
+ u64temp = (u64)ibmw_bind->addr;
+ set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_VA_FBO_LOW_IDX, u64temp);
+
+ head++;
+ if (head >= qsize)
+ head = 0;
+
+ nesqp->hwqp.sq_head = head;
+ barrier();
+
+ nes_write32(nesdev->regs+NES_WQE_ALLOC,
+ (1 << 24) | 0x00800000 | nesqp->hwqp.qp_id);
+
+ spin_unlock_irqrestore(&nesqp->lock, flags);
+
+ return 0;
+}
+
+
+/**
+ * nes_alloc_fmr
+ */
+static struct ib_fmr *nes_alloc_fmr(struct ib_pd *ibpd,
+ int ibmr_access_flags,
+ struct ib_fmr_attr *ibfmr_attr)
+{
+ unsigned long flags;
+ struct nes_pd *nespd = to_nespd(ibpd);
+ struct nes_vnic *nesvnic = to_nesvnic(ibpd->device);
+ struct nes_device *nesdev = nesvnic->nesdev;
+ struct nes_adapter *nesadapter = nesdev->nesadapter;
+ struct nes_fmr *nesfmr;
+ struct nes_cqp_request *cqp_request;
+ struct nes_hw_cqp_wqe *cqp_wqe;
+ int ret;
+ u32 stag;
+ u32 stag_index = 0;
+ u32 next_stag_index = 0;
+ u32 driver_key = 0;
+ u32 opcode = 0;
+ u8 stag_key = 0;
+ int i=0;
+ struct nes_vpbl vpbl;
+
+ get_random_bytes(&next_stag_index, sizeof(next_stag_index));
+ stag_key = (u8)next_stag_index;
+
+ driver_key = 0;
+
+ next_stag_index >>= 8;
+ next_stag_index %= nesadapter->max_mr;
+
+ ret = nes_alloc_resource(nesadapter, nesadapter->allocated_mrs,
+ nesadapter->max_mr, &stag_index, &next_stag_index);
+ if (ret) {
+ goto failed_resource_alloc;
+ }
+
+ nesfmr = kzalloc(sizeof(*nesfmr), GFP_KERNEL);
+ if (!nesfmr) {
+ ret = -ENOMEM;
+ goto failed_fmr_alloc;
+ }
+
+ nesfmr->nesmr.mode = IWNES_MEMREG_TYPE_FMR;
+ if (ibfmr_attr->max_pages == 1) {
+ /* use zero length PBL */
+ nesfmr->nesmr.pbl_4k = 0;
+ nesfmr->nesmr.pbls_used = 0;
+ } else if (ibfmr_attr->max_pages <= 32) {
+ /* use PBL 256 */
+ nesfmr->nesmr.pbl_4k = 0;
+ nesfmr->nesmr.pbls_used = 1;
+ } else if (ibfmr_attr->max_pages <= 512) {
+ /* use 4K PBLs */
+ nesfmr->nesmr.pbl_4k = 1;
+ nesfmr->nesmr.pbls_used = 1;
+ } else {
+ /* use two level 4K PBLs */
+ /* add support for two level 256B PBLs */
+ nesfmr->nesmr.pbl_4k = 1;
+ nesfmr->nesmr.pbls_used = 1 + (ibfmr_attr->max_pages >> 9) +
+ ((ibfmr_attr->max_pages & 511) ? 1 : 0);
+ }
+ /* Register the region with the adapter */
+ spin_lock_irqsave(&nesadapter->pbl_lock, flags);
+
+ /* track PBL resources */
+ if (nesfmr->nesmr.pbls_used != 0) {
+ if (nesfmr->nesmr.pbl_4k) {
+ if (nesfmr->nesmr.pbls_used > nesadapter->free_4kpbl) {
+ spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+ ret = -ENOMEM;
+ goto failed_vpbl_avail;
+ } else {
+ nesadapter->free_4kpbl -= nesfmr->nesmr.pbls_used;
+ }
+ } else {
+ if (nesfmr->nesmr.pbls_used > nesadapter->free_256pbl) {
+ spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+ ret = -ENOMEM;
+ goto failed_vpbl_avail;
+ } else {
+ nesadapter->free_256pbl -= nesfmr->nesmr.pbls_used;
+ }
+ }
+ }
+
+ /* one level pbl */
+ if (nesfmr->nesmr.pbls_used == 0) {
+ nesfmr->root_vpbl.pbl_vbase = NULL;
+ nes_debug(NES_DBG_MR, "zero level pbl \n");
+ } else if (nesfmr->nesmr.pbls_used == 1) {
+ /* can change it to kmalloc & dma_map_single */
+ nesfmr->root_vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 4096,
+ &nesfmr->root_vpbl.pbl_pbase);
+ if (!nesfmr->root_vpbl.pbl_vbase) {
+ spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+ ret = -ENOMEM;
+ goto failed_vpbl_alloc;
+ }
+ nesfmr->leaf_pbl_cnt = 0;
+ nes_debug(NES_DBG_MR, "one level pbl, root_vpbl.pbl_vbase=%p \n",
+ nesfmr->root_vpbl.pbl_vbase);
+ }
+ /* two level pbl */
+ else {
+ nesfmr->root_vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 8192,
+ &nesfmr->root_vpbl.pbl_pbase);
+ if (!nesfmr->root_vpbl.pbl_vbase) {
+ spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+ ret = -ENOMEM;
+ goto failed_vpbl_alloc;
+ }
+
+ nesfmr->leaf_pbl_cnt = nesfmr->nesmr.pbls_used-1;
+ nesfmr->root_vpbl.leaf_vpbl = kzalloc(sizeof(*nesfmr->root_vpbl.leaf_vpbl)*1024, GFP_ATOMIC);
+ if (!nesfmr->root_vpbl.leaf_vpbl) {
+ spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+ ret = -ENOMEM;
+ goto failed_leaf_vpbl_alloc;
+ }
+
+ nes_debug(NES_DBG_MR, "two level pbl, root_vpbl.pbl_vbase=%p"
+ " leaf_pbl_cnt=%d root_vpbl.leaf_vpbl=%p\n",
+ nesfmr->root_vpbl.pbl_vbase, nesfmr->leaf_pbl_cnt, nesfmr->root_vpbl.leaf_vpbl);
+
+ for (i=0; i<nesfmr->leaf_pbl_cnt; i++)
+ nesfmr->root_vpbl.leaf_vpbl[i].pbl_vbase = NULL;
+
+ for (i=0; i<nesfmr->leaf_pbl_cnt; i++) {
+ vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 4096,
+ &vpbl.pbl_pbase);
+
+ if (!vpbl.pbl_vbase) {
+ ret = -ENOMEM;
+ spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+ goto failed_leaf_vpbl_pages_alloc;
+ }
+
+ nesfmr->root_vpbl.pbl_vbase[i].pa_low = cpu_to_le32((u32)vpbl.pbl_pbase);
+ nesfmr->root_vpbl.pbl_vbase[i].pa_high = cpu_to_le32((u32)((((u64)vpbl.pbl_pbase)>>32)));
+ nesfmr->root_vpbl.leaf_vpbl[i] = vpbl;
+
+ nes_debug(NES_DBG_MR, "pbase_low=0x%x, pbase_high=0x%x, vpbl=%p\n",
+ nesfmr->root_vpbl.pbl_vbase[i].pa_low,
+ nesfmr->root_vpbl.pbl_vbase[i].pa_high,
+ &nesfmr->root_vpbl.leaf_vpbl[i]);
+ }
+ }
+ nesfmr->ib_qp = NULL;
+ nesfmr->access_rights =0;
+
+ stag = stag_index << 8;
+ stag |= driver_key;
+ stag += (u32)stag_key;
+
+ spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+ cqp_request = nes_get_cqp_request(nesdev);
+ if (cqp_request == NULL) {
+ nes_debug(NES_DBG_MR, "Failed to get a cqp_request.\n");
+ ret = -ENOMEM;
+ goto failed_leaf_vpbl_pages_alloc;
+ }
+ cqp_request->waiting = 1;
+ cqp_wqe = &cqp_request->cqp_wqe;
+
+ nes_debug(NES_DBG_MR, "Registering STag 0x%08X, index = 0x%08X\n",
+ stag, stag_index);
+
+ opcode = NES_CQP_ALLOCATE_STAG | NES_CQP_STAG_VA_TO | NES_CQP_STAG_MR;
+
+ if (nesfmr->nesmr.pbl_4k == 1)
+ opcode |= NES_CQP_STAG_PBL_BLK_SIZE;
+
+ if (ibmr_access_flags & IB_ACCESS_REMOTE_WRITE) {
+ opcode |= NES_CQP_STAG_RIGHTS_REMOTE_WRITE |
+ NES_CQP_STAG_RIGHTS_LOCAL_WRITE | NES_CQP_STAG_REM_ACC_EN;
+ nesfmr->access_rights |=
+ NES_CQP_STAG_RIGHTS_REMOTE_WRITE | NES_CQP_STAG_RIGHTS_LOCAL_WRITE |
+ NES_CQP_STAG_REM_ACC_EN;
+ }
+
+ if (ibmr_access_flags & IB_ACCESS_REMOTE_READ) {
+ opcode |= NES_CQP_STAG_RIGHTS_REMOTE_READ |
+ NES_CQP_STAG_RIGHTS_LOCAL_READ | NES_CQP_STAG_REM_ACC_EN;
+ nesfmr->access_rights |=
+ NES_CQP_STAG_RIGHTS_REMOTE_READ | NES_CQP_STAG_RIGHTS_LOCAL_READ |
+ NES_CQP_STAG_REM_ACC_EN;
+ }
+
+ nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
+ set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, opcode);
+ set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_LEN_HIGH_PD_IDX, (nespd->pd_id & 0x00007fff));
+ set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_STAG_IDX, stag);
+
+ cqp_wqe->wqe_words[NES_CQP_STAG_WQE_PBL_BLK_COUNT_IDX] =
+ cpu_to_le32((nesfmr->nesmr.pbls_used>1) ?
+ (nesfmr->nesmr.pbls_used-1) : nesfmr->nesmr.pbls_used);
+
+ atomic_set(&cqp_request->refcount, 2);
+ nes_post_cqp_request(nesdev, cqp_request);
+
+ /* Wait for CQP */
+ ret = wait_event_timeout(cqp_request->waitq, (cqp_request->request_done != 0),
+ NES_EVENT_TIMEOUT);
+ nes_debug(NES_DBG_MR, "Register STag 0x%08X completed, wait_event_timeout ret = %u,"
+ " CQP Major:Minor codes = 0x%04X:0x%04X.\n",
+ stag, ret, cqp_request->major_code, cqp_request->minor_code);
+
+ if ((!ret) || (cqp_request->major_code)) {
+ nes_put_cqp_request(nesdev, cqp_request);
+ ret = (!ret) ? -ETIME : -EIO;
+ goto failed_leaf_vpbl_pages_alloc;
+ }
+ nes_put_cqp_request(nesdev, cqp_request);
+ nesfmr->nesmr.ibfmr.lkey = stag;
+ nesfmr->nesmr.ibfmr.rkey = stag;
+ nesfmr->attr = *ibfmr_attr;
+
+ return &nesfmr->nesmr.ibfmr;
+
+ failed_leaf_vpbl_pages_alloc:
+ /* unroll all allocated pages */
+ for (i=0; i<nesfmr->leaf_pbl_cnt; i++) {
+ if (nesfmr->root_vpbl.leaf_vpbl[i].pbl_vbase) {
+ pci_free_consistent(nesdev->pcidev, 4096, nesfmr->root_vpbl.leaf_vpbl[i].pbl_vbase,
+ nesfmr->root_vpbl.leaf_vpbl[i].pbl_pbase);
+ }
+ }
+ if (nesfmr->root_vpbl.leaf_vpbl)
+ kfree(nesfmr->root_vpbl.leaf_vpbl);
+
+ failed_leaf_vpbl_alloc:
+ if (nesfmr->leaf_pbl_cnt == 0) {
+ if (nesfmr->root_vpbl.pbl_vbase)
+ pci_free_consistent(nesdev->pcidev, 4096, nesfmr->root_vpbl.pbl_vbase,
+ nesfmr->root_vpbl.pbl_pbase);
+ } else
+ pci_free_consistent(nesdev->pcidev, 8192, nesfmr->root_vpbl.pbl_vbase,
+ nesfmr->root_vpbl.pbl_pbase);
+
+ failed_vpbl_alloc:
+ if (nesfmr->nesmr.pbls_used != 0) {
+ spin_lock_irqsave(&nesadapter->pbl_lock, flags);
+ if (nesfmr->nesmr.pbl_4k)
+ nesadapter->free_4kpbl += nesfmr->nesmr.pbls_used;
+ else
+ nesadapter->free_256pbl += nesfmr->nesmr.pbls_used;
+ spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+ }
+
+failed_vpbl_avail:
+ kfree(nesfmr);
+
+ failed_fmr_alloc:
+ nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
+
+ failed_resource_alloc:
+ return ERR_PTR(ret);
+}
+
+
+/**
+ * nes_dealloc_fmr
+ */
+static int nes_dealloc_fmr(struct ib_fmr *ibfmr)
+{
+ unsigned long flags;
+ struct nes_mr *nesmr = to_nesmr_from_ibfmr(ibfmr);
+ struct nes_fmr *nesfmr = to_nesfmr(nesmr);
+ struct nes_vnic *nesvnic = to_nesvnic(ibfmr->device);
+ struct nes_device *nesdev = nesvnic->nesdev;
+ struct nes_adapter *nesadapter = nesdev->nesadapter;
+ int i = 0;
+
+ /* free the resources */
+ if (nesfmr->leaf_pbl_cnt == 0) {
+ /* single PBL case */
+ if (nesfmr->root_vpbl.pbl_vbase)
+ pci_free_consistent(nesdev->pcidev, 4096, nesfmr->root_vpbl.pbl_vbase,
+ nesfmr->root_vpbl.pbl_pbase);
+ } else {
+ for (i = 0; i < nesfmr->leaf_pbl_cnt; i++) {
+ pci_free_consistent(nesdev->pcidev, 4096, nesfmr->root_vpbl.leaf_vpbl[i].pbl_vbase,
+ nesfmr->root_vpbl.leaf_vpbl[i].pbl_pbase);
+ }
+ kfree(nesfmr->root_vpbl.leaf_vpbl);
+ pci_free_consistent(nesdev->pcidev, 8192, nesfmr->root_vpbl.pbl_vbase,
+ nesfmr->root_vpbl.pbl_pbase);
+ }
+ nesmr->ibmw.device = ibfmr->device;
+ nesmr->ibmw.pd = ibfmr->pd;
+ nesmr->ibmw.rkey = ibfmr->rkey;
+ nesmr->ibmw.uobject = NULL;
+
+ if (nesfmr->nesmr.pbls_used != 0) {
+ spin_lock_irqsave(&nesadapter->pbl_lock, flags);
+ if (nesfmr->nesmr.pbl_4k) {
+ nesadapter->free_4kpbl += nesfmr->nesmr.pbls_used;
+ WARN_ON(nesadapter->free_4kpbl > nesadapter->max_4kpbl);
+ } else {
+ nesadapter->free_256pbl += nesfmr->nesmr.pbls_used;
+ WARN_ON(nesadapter->free_256pbl > nesadapter->max_256pbl);
+ }
+ spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+ }
+
+ return nes_dealloc_mw(&nesmr->ibmw);
+}
+
+
+/**
+ * nes_map_phys_fmr
+ */
+static int nes_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
+ int list_len, u64 iova)
+{
+ return 0;
+}
+
+
+/**
+ * nes_unmap_frm
+ */
+static int nes_unmap_fmr(struct list_head *ibfmr_list)
+{
+ return 0;
+}
+
+
+
+/**
+ * nes_query_device
+ */
+static int nes_query_device(struct ib_device *ibdev, struct ib_device_attr *props)
+{
+ struct nes_vnic *nesvnic = to_nesvnic(ibdev);
+ struct nes_device *nesdev = nesvnic->nesdev;
+ struct nes_ib_device *nesibdev = nesvnic->nesibdev;
+
+ memset(props, 0, sizeof(*props));
+ memcpy(&props->sys_image_guid, nesvnic->netdev->dev_addr, 6);
+
+ props->fw_ver = nesdev->nesadapter->fw_ver;
+ props->device_cap_flags = nesdev->nesadapter->device_cap_flags;
+ props->vendor_id = nesdev->nesadapter->vendor_id;
+ props->vendor_part_id = nesdev->nesadapter->vendor_part_id;
+ props->hw_ver = nesdev->nesadapter->hw_rev;
+ props->max_mr_size = 0x80000000;
+ props->max_qp = nesibdev->max_qp;
+ props->max_qp_wr = nesdev->nesadapter->max_qp_wr - 2;
+ props->max_sge = nesdev->nesadapter->max_sge;
+ props->max_cq = nesibdev->max_cq;
+ props->max_cqe = nesdev->nesadapter->max_cqe - 1;
+ props->max_mr = nesibdev->max_mr;
+ props->max_mw = nesibdev->max_mr;
+ props->max_pd = nesibdev->max_pd;
+ props->max_sge_rd = 1;
+ switch (nesdev->nesadapter->max_irrq_wr) {
+ case 0:
+ props->max_qp_rd_atom = 1;
+ break;
+ case 1:
+ props->max_qp_rd_atom = 4;
+ break;
+ case 2:
+ props->max_qp_rd_atom = 16;
+ break;
+ case 3:
+ props->max_qp_rd_atom = 32;
+ break;
+ default:
+ props->max_qp_rd_atom = 0;
+ }
+ props->max_qp_init_rd_atom = props->max_qp_wr;
+ props->atomic_cap = IB_ATOMIC_NONE;
+ props->max_map_per_fmr = 1;
+
+ return 0;
+}
+
+
+/**
+ * nes_query_port
+ */
+static int nes_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props)
+{
+ memset(props, 0, sizeof(*props));
+
+ props->max_mtu = IB_MTU_2048;
+ props->active_mtu = IB_MTU_2048;
+ props->lid = 1;
+ props->lmc = 0;
+ props->sm_lid = 0;
+ props->sm_sl = 0;
+ props->state = IB_PORT_ACTIVE;
+ props->phys_state = 0;
+ props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_REINIT_SUP |
+ IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP;
+ props->gid_tbl_len = 1;
+ props->pkey_tbl_len = 1;
+ props->qkey_viol_cntr = 0;
+ props->active_width = IB_WIDTH_4X;
+ props->active_speed = 1;
+ props->max_msg_sz = 0x80000000;
+
+ return 0;
+}
+
+
+/**
+ * nes_modify_port
+ */
+static int nes_modify_port(struct ib_device *ibdev, u8 port,
+ int port_modify_mask, struct ib_port_modify *props)
+{
+ return 0;
+}
+
+
+/**
+ * nes_query_pkey
+ */
+static int nes_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
+{
+ *pkey = 0;
+ return 0;
+}
+
+
+/**
+ * nes_query_gid
+ */
+static int nes_query_gid(struct ib_device *ibdev, u8 port,
+ int index, union ib_gid *gid)
+{
+ struct nes_vnic *nesvnic = to_nesvnic(ibdev);
+
+ memset(&(gid->raw[0]), 0, sizeof(gid->raw));
+ memcpy(&(gid->raw[0]), nesvnic->netdev->dev_addr, 6);
+
+ return 0;
+}
+
+
+/**
+ * nes_alloc_ucontext - Allocate the user context data structure. This keeps track
+ * of all objects associated with a particular user-mode client.
+ */
+static struct ib_ucontext *nes_alloc_ucontext(struct ib_device *ibdev,
+ struct ib_udata *udata)
+{
+ struct nes_vnic *nesvnic = to_nesvnic(ibdev);
+ struct nes_device *nesdev = nesvnic->nesdev;
+ struct nes_adapter *nesadapter = nesdev->nesadapter;
+ struct nes_alloc_ucontext_req req;
+ struct nes_alloc_ucontext_resp uresp;
+ struct nes_ucontext *nes_ucontext;
+ struct nes_ib_device *nesibdev = nesvnic->nesibdev;
+
+
+ if (ib_copy_from_udata(&req, udata, sizeof(struct nes_alloc_ucontext_req))) {
+ printk(KERN_ERR PFX "Invalid structure size on allocate user context.\n");
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (req.userspace_ver != NES_ABI_USERSPACE_VER) {
+ printk(KERN_ERR PFX "Invalid userspace driver version detected. Detected version %d, should be %d\n",
+ req.userspace_ver, NES_ABI_USERSPACE_VER);
+ return ERR_PTR(-EINVAL);
+ }
+
+
+ memset(&uresp, 0, sizeof uresp);
+
+ uresp.max_qps = nesibdev->max_qp;
+ uresp.max_pds = nesibdev->max_pd;
+ uresp.wq_size = nesdev->nesadapter->max_qp_wr * 2;
+ uresp.virtwq = nesadapter->virtwq;
+ uresp.kernel_ver = NES_ABI_KERNEL_VER;
+
+ nes_ucontext = kzalloc(sizeof *nes_ucontext, GFP_KERNEL);
+ if (!nes_ucontext)
+ return ERR_PTR(-ENOMEM);
+
+ nes_ucontext->nesdev = nesdev;
+ nes_ucontext->mmap_wq_offset = uresp.max_pds;
+ nes_ucontext->mmap_cq_offset = nes_ucontext->mmap_wq_offset +
+ ((sizeof(struct nes_hw_qp_wqe) * uresp.max_qps * 2) + PAGE_SIZE-1) /
+ PAGE_SIZE;
+
+
+ if (ib_copy_to_udata(udata, &uresp, sizeof uresp)) {
+ kfree(nes_ucontext);
+ return ERR_PTR(-EFAULT);
+ }
+
+ INIT_LIST_HEAD(&nes_ucontext->cq_reg_mem_list);
+ INIT_LIST_HEAD(&nes_ucontext->qp_reg_mem_list);
+ atomic_set(&nes_ucontext->usecnt, 1);
+ return &nes_ucontext->ibucontext;
+}
+
+
+/**
+ * nes_dealloc_ucontext
+ */
+static int nes_dealloc_ucontext(struct ib_ucontext *context)
+{
+ /* struct nes_vnic *nesvnic = to_nesvnic(context->device); */
+ /* struct nes_device *nesdev = nesvnic->nesdev; */
+ struct nes_ucontext *nes_ucontext = to_nesucontext(context);
+
+ if (!atomic_dec_and_test(&nes_ucontext->usecnt))
+ return 0;
+ kfree(nes_ucontext);
+ return 0;
+}
+
+
+/**
+ * nes_mmap
+ */
+static int nes_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
+{
+ unsigned long index;
+ struct nes_vnic *nesvnic = to_nesvnic(context->device);
+ struct nes_device *nesdev = nesvnic->nesdev;
+ /* struct nes_adapter *nesadapter = nesdev->nesadapter; */
+ struct nes_ucontext *nes_ucontext;
+ struct nes_qp *nesqp;
+
+ nes_ucontext = to_nesucontext(context);
+
+
+ if (vma->vm_pgoff >= nes_ucontext->mmap_wq_offset) {
+ index = (vma->vm_pgoff - nes_ucontext->mmap_wq_offset) * PAGE_SIZE;
+ index /= ((sizeof(struct nes_hw_qp_wqe) * nesdev->nesadapter->max_qp_wr * 2) +
+ PAGE_SIZE-1) & (~(PAGE_SIZE-1));
+ if (!test_bit(index, nes_ucontext->allocated_wqs)) {
+ nes_debug(NES_DBG_MMAP, "wq %lu not allocated\n", index);
+ return -EFAULT;
+ }
+ nesqp = nes_ucontext->mmap_nesqp[index];
+ if (nesqp == NULL) {
+ nes_debug(NES_DBG_MMAP, "wq %lu has a NULL QP base.\n", index);
+ return -EFAULT;
+ }
+ if (remap_pfn_range(vma, vma->vm_start,
+ virt_to_phys(nesqp->hwqp.sq_vbase) >> PAGE_SHIFT,
+ vma->vm_end - vma->vm_start,
+ vma->vm_page_prot)) {
+ nes_debug(NES_DBG_MMAP, "remap_pfn_range failed.\n");
+ return -EAGAIN;
+ }
+ vma->vm_private_data = nesqp;
+ return 0;
+ } else {
+ index = vma->vm_pgoff;
+ if (!test_bit(index, nes_ucontext->allocated_doorbells))
+ return -EFAULT;
+
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+ if (io_remap_pfn_range(vma, vma->vm_start,
+ (nesdev->doorbell_start +
+ ((nes_ucontext->mmap_db_index[index] - nesdev->base_doorbell_index) * 4096))
+ >> PAGE_SHIFT, PAGE_SIZE, vma->vm_page_prot))
+ return -EAGAIN;
+ vma->vm_private_data = nes_ucontext;
+ return 0;
+ }
+
+ return -ENOSYS;
+}
+
+
+/**
+ * nes_alloc_pd
+ */
+static struct ib_pd *nes_alloc_pd(struct ib_device *ibdev,
+ struct ib_ucontext *context, struct ib_udata *udata)
+{
+ struct nes_pd *nespd;
+ struct nes_vnic *nesvnic = to_nesvnic(ibdev);
+ struct nes_device *nesdev = nesvnic->nesdev;
+ struct nes_adapter *nesadapter = nesdev->nesadapter;
+ struct nes_ucontext *nesucontext;
+ struct nes_alloc_pd_resp uresp;
+ u32 pd_num = 0;
+ int err;
+
+ nes_debug(NES_DBG_PD, "nesvnic=%p, netdev=%p %s, ibdev=%p, context=%p, netdev refcnt=%u\n",
+ nesvnic, nesdev->netdev[0], nesdev->netdev[0]->name, ibdev, context,
+ atomic_read(&nesvnic->netdev->refcnt));
+
+ err = nes_alloc_resource(nesadapter, nesadapter->allocated_pds,
+ nesadapter->max_pd, &pd_num, &nesadapter->next_pd);
+ if (err) {
+ return ERR_PTR(err);
+ }
+
+ nespd = kzalloc(sizeof (struct nes_pd), GFP_KERNEL);
+ if (!nespd) {
+ nes_free_resource(nesadapter, nesadapter->allocated_pds, pd_num);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ nes_debug(NES_DBG_PD, "Allocating PD (%p) for ib device %s\n",
+ nespd, nesvnic->nesibdev->ibdev.name);
+
+ nespd->pd_id = (pd_num << (PAGE_SHIFT-12)) + nesadapter->base_pd;
+
+ if (context) {
+ nesucontext = to_nesucontext(context);
+ nespd->mmap_db_index = find_next_zero_bit(nesucontext->allocated_doorbells,
+ NES_MAX_USER_DB_REGIONS, nesucontext->first_free_db);
+ nes_debug(NES_DBG_PD, "find_first_zero_biton doorbells returned %u, mapping pd_id %u.\n",
+ nespd->mmap_db_index, nespd->pd_id);
+ if (nespd->mmap_db_index >= NES_MAX_USER_DB_REGIONS) {
+ nes_debug(NES_DBG_PD, "mmap_db_index > MAX\n");
+ nes_free_resource(nesadapter, nesadapter->allocated_pds, pd_num);
+ kfree(nespd);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ uresp.pd_id = nespd->pd_id;
+ uresp.mmap_db_index = nespd->mmap_db_index;
+ if (ib_copy_to_udata(udata, &uresp, sizeof (struct nes_alloc_pd_resp))) {
+ nes_free_resource(nesadapter, nesadapter->allocated_pds, pd_num);
+ kfree(nespd);
+ return ERR_PTR(-EFAULT);
+ }
+
+ set_bit(nespd->mmap_db_index, nesucontext->allocated_doorbells);
+ nesucontext->mmap_db_index[nespd->mmap_db_index] = nespd->pd_id;
+ nesucontext->first_free_db = nespd->mmap_db_index + 1;
+ }
+
+ nes_debug(NES_DBG_PD, "PD%u structure located @%p.\n", nespd->pd_id, nespd);
+ return &nespd->ibpd;
+}
+
+
+/**
+ * nes_dealloc_pd
+ */
+static int nes_dealloc_pd(struct ib_pd *ibpd)
+{
+ struct nes_ucontext *nesucontext;
+ struct nes_pd *nespd = to_nespd(ibpd);
+ struct nes_vnic *nesvnic = to_nesvnic(ibpd->device);
+ struct nes_device *nesdev = nesvnic->nesdev;
+ struct nes_adapter *nesadapter = nesdev->nesadapter;
+
+ if ((ibpd->uobject) && (ibpd->uobject->context)) {
+ nesucontext = to_nesucontext(ibpd->uobject->context);
+ nes_debug(NES_DBG_PD, "Clearing bit %u from allocated doorbells\n",
+ nespd->mmap_db_index);
+ clear_bit(nespd->mmap_db_index, nesucontext->allocated_doorbells);
+ nesucontext->mmap_db_index[nespd->mmap_db_index] = 0;
+ if (nesucontext->first_free_db > nespd->mmap_db_index) {
+ nesucontext->first_free_db = nespd->mmap_db_index;
+ }
+ }
+
+ nes_debug(NES_DBG_PD, "Deallocating PD%u structure located @%p.\n",
+ nespd->pd_id, nespd);
+ nes_free_resource(nesadapter, nesadapter->allocated_pds,
+ (nespd->pd_id-nesadapter->base_pd)>>(PAGE_SHIFT-12));
+ kfree(nespd);
+
+ return 0;
+}
+
+
+/**
+ * nes_create_ah
+ */
+static struct ib_ah *nes_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
+{
+ return ERR_PTR(-ENOSYS);
+}
+
+
+/**
+ * nes_destroy_ah
+ */
+static int nes_destroy_ah(struct ib_ah *ah)
+{
+ return -ENOSYS;
+}
+
+
+/**
+ * nes_get_encoded_size
+ */
+static inline u8 nes_get_encoded_size(int *size)
+{
+ u8 encoded_size = 0;
+ if (*size <= 32) {
+ *size = 32;
+ encoded_size = 1;
+ } else if (*size <= 128) {
+ *size = 128;
+ encoded_size = 2;
+ } else if (*size <= 512) {
+ *size = 512;
+ encoded_size = 3;
+ }
+ return (encoded_size);
+}
+
+
+
+/**
+ * nes_setup_virt_qp
+ */
+static int nes_setup_virt_qp(struct nes_qp *nesqp, struct nes_pbl *nespbl,
+ struct nes_vnic *nesvnic, int sq_size, int rq_size)
+{
+ unsigned long flags;
+ void *mem;
+ __le64 *pbl = NULL;
+ __le64 *tpbl;
+ __le64 *pblbuffer;
+ struct nes_device *nesdev = nesvnic->nesdev;
+ struct nes_adapter *nesadapter = nesdev->nesadapter;
+ u32 pbl_entries;
+ u8 rq_pbl_entries;
+ u8 sq_pbl_entries;
+
+ pbl_entries = nespbl->pbl_size >> 3;
+ nes_debug(NES_DBG_QP, "Userspace PBL, pbl_size=%u, pbl_entries = %d pbl_vbase=%p, pbl_pbase=%lx\n",
+ nespbl->pbl_size, pbl_entries,
+ (void *)nespbl->pbl_vbase,
+ (unsigned long) nespbl->pbl_pbase);
+ pbl = (__le64 *) nespbl->pbl_vbase; /* points to first pbl entry */
+ /* now lets set the sq_vbase as well as rq_vbase addrs we will assign */
+ /* the first pbl to be fro the rq_vbase... */
+ rq_pbl_entries = (rq_size * sizeof(struct nes_hw_qp_wqe)) >> 12;
+ sq_pbl_entries = (sq_size * sizeof(struct nes_hw_qp_wqe)) >> 12;
+ nesqp->hwqp.sq_pbase = (le32_to_cpu(((__le32 *)pbl)[0])) | ((u64)((le32_to_cpu(((__le32 *)pbl)[1]))) << 32);
+ if (!nespbl->page) {
+ nes_debug(NES_DBG_QP, "QP nespbl->page is NULL \n");
+ kfree(nespbl);
+ return -ENOMEM;
+ }
+
+ nesqp->hwqp.sq_vbase = kmap(nespbl->page);
+ nesqp->page = nespbl->page;
+ if (!nesqp->hwqp.sq_vbase) {
+ nes_debug(NES_DBG_QP, "QP sq_vbase kmap failed\n");
+ kfree(nespbl);
+ return -ENOMEM;
+ }
+
+ /* Now to get to sq.. we need to calculate how many */
+ /* PBL entries were used by the rq.. */
+ pbl += sq_pbl_entries;
+ nesqp->hwqp.rq_pbase = (le32_to_cpu(((__le32 *)pbl)[0])) | ((u64)((le32_to_cpu(((__le32 *)pbl)[1]))) << 32);
+ /* nesqp->hwqp.rq_vbase = bus_to_virt(*pbl); */
+ /*nesqp->hwqp.rq_vbase = phys_to_virt(*pbl); */
+
+ nes_debug(NES_DBG_QP, "QP sq_vbase= %p sq_pbase=%lx rq_vbase=%p rq_pbase=%lx\n",
+ nesqp->hwqp.sq_vbase, (unsigned long) nesqp->hwqp.sq_pbase,
+ nesqp->hwqp.rq_vbase, (unsigned long) nesqp->hwqp.rq_pbase);
+ spin_lock_irqsave(&nesadapter->pbl_lock, flags);
+ if (!nesadapter->free_256pbl) {
+ pci_free_consistent(nesdev->pcidev, nespbl->pbl_size, nespbl->pbl_vbase,
+ nespbl->pbl_pbase);
+ spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+ kunmap(nesqp->page);
+ kfree(nespbl);
+ return -ENOMEM;
+ }
+ nesadapter->free_256pbl--;
+ spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+
+ nesqp->pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 256, &nesqp->pbl_pbase);
+ pblbuffer = nesqp->pbl_vbase;
+ if (!nesqp->pbl_vbase) {
+ /* memory allocated during nes_reg_user_mr() */
+ pci_free_consistent(nesdev->pcidev, nespbl->pbl_size, nespbl->pbl_vbase,
+ nespbl->pbl_pbase);
+ kfree(nespbl);
+ spin_lock_irqsave(&nesadapter->pbl_lock, flags);
+ nesadapter->free_256pbl++;
+ spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+ kunmap(nesqp->page);
+ return -ENOMEM;
+ }
+ memset(nesqp->pbl_vbase, 0, 256);
+ /* fill in the page address in the pbl buffer.. */
+ tpbl = pblbuffer + 16;
+ pbl = (__le64 *)nespbl->pbl_vbase;
+ while (sq_pbl_entries--)
+ *tpbl++ = *pbl++;
+ tpbl = pblbuffer;
+ while (rq_pbl_entries--)
+ *tpbl++ = *pbl++;
+
+ /* done with memory allocated during nes_reg_user_mr() */
+ pci_free_consistent(nesdev->pcidev, nespbl->pbl_size, nespbl->pbl_vbase,
+ nespbl->pbl_pbase);
+ kfree(nespbl);
+
+ nesqp->qp_mem_size =
+ max((u32)sizeof(struct nes_qp_context), ((u32)256)) + 256; /* this is Q2 */
+ /* Round up to a multiple of a page */
+ nesqp->qp_mem_size += PAGE_SIZE - 1;
+ nesqp->qp_mem_size &= ~(PAGE_SIZE - 1);
+
+ mem = pci_alloc_consistent(nesdev->pcidev, nesqp->qp_mem_size,
+ &nesqp->hwqp.q2_pbase);
+
+ if (!mem) {
+ pci_free_consistent(nesdev->pcidev, 256, nesqp->pbl_vbase, nesqp->pbl_pbase);
+ nesqp->pbl_vbase = NULL;
+ spin_lock_irqsave(&nesadapter->pbl_lock, flags);
+ nesadapter->free_256pbl++;
+ spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+ kunmap(nesqp->page);
+ return -ENOMEM;
+ }
+ nesqp->hwqp.q2_vbase = mem;
+ mem += 256;
+ memset(nesqp->hwqp.q2_vbase, 0, 256);
+ nesqp->nesqp_context = mem;
+ memset(nesqp->nesqp_context, 0, sizeof(*nesqp->nesqp_context));
+ nesqp->nesqp_context_pbase = nesqp->hwqp.q2_pbase + 256;
+
+ return 0;
+}
+
+
+/**
+ * nes_setup_mmap_qp
+ */
+static int nes_setup_mmap_qp(struct nes_qp *nesqp, struct nes_vnic *nesvnic,
+ int sq_size, int rq_size)
+{
+ void *mem;
+ struct nes_device *nesdev = nesvnic->nesdev;
+
+ nesqp->qp_mem_size = (sizeof(struct nes_hw_qp_wqe) * sq_size) +
+ (sizeof(struct nes_hw_qp_wqe) * rq_size) +
+ max((u32)sizeof(struct nes_qp_context), ((u32)256)) +
+ 256; /* this is Q2 */
+ /* Round up to a multiple of a page */
+ nesqp->qp_mem_size += PAGE_SIZE - 1;
+ nesqp->qp_mem_size &= ~(PAGE_SIZE - 1);
+
+ mem = pci_alloc_consistent(nesdev->pcidev, nesqp->qp_mem_size,
+ &nesqp->hwqp.sq_pbase);
+ if (!mem)
+ return -ENOMEM;
+ nes_debug(NES_DBG_QP, "PCI consistent memory for "
+ "host descriptor rings located @ %p (pa = 0x%08lX.) size = %u.\n",
+ mem, (unsigned long)nesqp->hwqp.sq_pbase, nesqp->qp_mem_size);
+
+ memset(mem, 0, nesqp->qp_mem_size);
+
+ nesqp->hwqp.sq_vbase = mem;
+ mem += sizeof(struct nes_hw_qp_wqe) * sq_size;
+
+ nesqp->hwqp.rq_vbase = mem;
+ nesqp->hwqp.rq_pbase = nesqp->hwqp.sq_pbase +
+ sizeof(struct nes_hw_qp_wqe) * sq_size;
+ mem += sizeof(struct nes_hw_qp_wqe) * rq_size;
+
+ nesqp->hwqp.q2_vbase = mem;
+ nesqp->hwqp.q2_pbase = nesqp->hwqp.rq_pbase +
+ sizeof(struct nes_hw_qp_wqe) * rq_size;
+ mem += 256;
+ memset(nesqp->hwqp.q2_vbase, 0, 256);
+
+ nesqp->nesqp_context = mem;
+ nesqp->nesqp_context_pbase = nesqp->hwqp.q2_pbase + 256;
+ memset(nesqp->nesqp_context, 0, sizeof(*nesqp->nesqp_context));
+ return 0;
+}
+
+
+/**
+ * nes_free_qp_mem() is to free up the qp's pci_alloc_consistent() memory.
+ */
+static inline void nes_free_qp_mem(struct nes_device *nesdev,
+ struct nes_qp *nesqp, int virt_wqs)
+{
+ unsigned long flags;
+ struct nes_adapter *nesadapter = nesdev->nesadapter;
+ if (!virt_wqs) {
+ pci_free_consistent(nesdev->pcidev, nesqp->qp_mem_size,
+ nesqp->hwqp.sq_vbase, nesqp->hwqp.sq_pbase);
+ }else {
+ spin_lock_irqsave(&nesadapter->pbl_lock, flags);
+ nesadapter->free_256pbl++;
+ spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+ pci_free_consistent(nesdev->pcidev, nesqp->qp_mem_size, nesqp->hwqp.q2_vbase, nesqp->hwqp.q2_pbase);
+ pci_free_consistent(nesdev->pcidev, 256, nesqp->pbl_vbase, nesqp->pbl_pbase );
+ nesqp->pbl_vbase = NULL;
+ kunmap(nesqp->page);
+ }
+}
+
+
+/**
+ * nes_create_qp
+ */
+static struct ib_qp *nes_create_qp(struct ib_pd *ibpd,
+ struct ib_qp_init_attr *init_attr, struct ib_udata *udata)
+{
+ u64 u64temp= 0;
+ u64 u64nesqp = 0;
+ struct nes_pd *nespd = to_nespd(ibpd);
+ struct nes_vnic *nesvnic = to_nesvnic(ibpd->device);
+ struct nes_device *nesdev = nesvnic->nesdev;
+ struct nes_adapter *nesadapter = nesdev->nesadapter;
+ struct nes_qp *nesqp;
+ struct nes_cq *nescq;
+ struct nes_ucontext *nes_ucontext;
+ struct nes_hw_cqp_wqe *cqp_wqe;
+ struct nes_cqp_request *cqp_request;
+ struct nes_create_qp_req req;
+ struct nes_create_qp_resp uresp;
+ struct nes_pbl *nespbl = NULL;
+ u32 qp_num = 0;
+ u32 opcode = 0;
+ /* u32 counter = 0; */
+ void *mem;
+ unsigned long flags;
+ int ret;
+ int err;
+ int virt_wqs = 0;
+ int sq_size;
+ int rq_size;
+ u8 sq_encoded_size;
+ u8 rq_encoded_size;
+ /* int counter; */
+
+ if (init_attr->create_flags)
+ return ERR_PTR(-EINVAL);
+
+ atomic_inc(&qps_created);
+ switch (init_attr->qp_type) {
+ case IB_QPT_RC:
+ if (nes_drv_opt & NES_DRV_OPT_NO_INLINE_DATA) {
+ init_attr->cap.max_inline_data = 0;
+ } else {
+ init_attr->cap.max_inline_data = 64;
+ }
+ sq_size = init_attr->cap.max_send_wr;
+ rq_size = init_attr->cap.max_recv_wr;
+
+ /* check if the encoded sizes are OK or not... */
+ sq_encoded_size = nes_get_encoded_size(&sq_size);
+ rq_encoded_size = nes_get_encoded_size(&rq_size);
+
+ if ((!sq_encoded_size) || (!rq_encoded_size)) {
+ nes_debug(NES_DBG_QP, "ERROR bad rq (%u) or sq (%u) size\n",
+ rq_size, sq_size);
+ return ERR_PTR(-EINVAL);
+ }
+
+ init_attr->cap.max_send_wr = sq_size -2;
+ init_attr->cap.max_recv_wr = rq_size -1;
+ nes_debug(NES_DBG_QP, "RQ size=%u, SQ Size=%u\n", rq_size, sq_size);
+
+ ret = nes_alloc_resource(nesadapter, nesadapter->allocated_qps,
+ nesadapter->max_qp, &qp_num, &nesadapter->next_qp);
+ if (ret) {
+ return ERR_PTR(ret);
+ }
+
+ /* Need 512 (actually now 1024) byte alignment on this structure */
+ mem = kzalloc(sizeof(*nesqp)+NES_SW_CONTEXT_ALIGN-1, GFP_KERNEL);
+ if (!mem) {
+ nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num);
+ nes_debug(NES_DBG_QP, "Unable to allocate QP\n");
+ return ERR_PTR(-ENOMEM);
+ }
+ u64nesqp = (unsigned long)mem;
+ u64nesqp += ((u64)NES_SW_CONTEXT_ALIGN) - 1;
+ u64temp = ((u64)NES_SW_CONTEXT_ALIGN) - 1;
+ u64nesqp &= ~u64temp;
+ nesqp = (struct nes_qp *)(unsigned long)u64nesqp;
+ /* nes_debug(NES_DBG_QP, "nesqp=%p, allocated buffer=%p. Rounded to closest %u\n",
+ nesqp, mem, NES_SW_CONTEXT_ALIGN); */
+ nesqp->allocated_buffer = mem;
+
+ if (udata) {
+ if (ib_copy_from_udata(&req, udata, sizeof(struct nes_create_qp_req))) {
+ nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num);
+ kfree(nesqp->allocated_buffer);
+ nes_debug(NES_DBG_QP, "ib_copy_from_udata() Failed \n");
+ return NULL;
+ }
+ if (req.user_wqe_buffers) {
+ virt_wqs = 1;
+ }
+ if ((ibpd->uobject) && (ibpd->uobject->context)) {
+ nesqp->user_mode = 1;
+ nes_ucontext = to_nesucontext(ibpd->uobject->context);
+ if (virt_wqs) {
+ err = 1;
+ list_for_each_entry(nespbl, &nes_ucontext->qp_reg_mem_list, list) {
+ if (nespbl->user_base == (unsigned long )req.user_wqe_buffers) {
+ list_del(&nespbl->list);
+ err = 0;
+ nes_debug(NES_DBG_QP, "Found PBL for virtual QP. nespbl=%p. user_base=0x%lx\n",
+ nespbl, nespbl->user_base);
+ break;
+ }
+ }
+ if (err) {
+ nes_debug(NES_DBG_QP, "Didn't Find PBL for virtual QP. address = %llx.\n",
+ (long long unsigned int)req.user_wqe_buffers);
+ nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num);
+ kfree(nesqp->allocated_buffer);
+ return ERR_PTR(-EFAULT);
+ }
+ }
+
+ nes_ucontext = to_nesucontext(ibpd->uobject->context);
+ nesqp->mmap_sq_db_index =
+ find_next_zero_bit(nes_ucontext->allocated_wqs,
+ NES_MAX_USER_WQ_REGIONS, nes_ucontext->first_free_wq);
+ /* nes_debug(NES_DBG_QP, "find_first_zero_biton wqs returned %u\n",
+ nespd->mmap_db_index); */
+ if (nesqp->mmap_sq_db_index >= NES_MAX_USER_WQ_REGIONS) {
+ nes_debug(NES_DBG_QP,
+ "db index > max user regions, failing create QP\n");
+ nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num);
+ if (virt_wqs) {
+ pci_free_consistent(nesdev->pcidev, nespbl->pbl_size, nespbl->pbl_vbase,
+ nespbl->pbl_pbase);
+ kfree(nespbl);
+ }
+ kfree(nesqp->allocated_buffer);
+ return ERR_PTR(-ENOMEM);
+ }
+ set_bit(nesqp->mmap_sq_db_index, nes_ucontext->allocated_wqs);
+ nes_ucontext->mmap_nesqp[nesqp->mmap_sq_db_index] = nesqp;
+ nes_ucontext->first_free_wq = nesqp->mmap_sq_db_index + 1;
+ } else {
+ nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num);
+ kfree(nesqp->allocated_buffer);
+ return ERR_PTR(-EFAULT);
+ }
+ }
+ err = (!virt_wqs) ? nes_setup_mmap_qp(nesqp, nesvnic, sq_size, rq_size) :
+ nes_setup_virt_qp(nesqp, nespbl, nesvnic, sq_size, rq_size);
+ if (err) {
+ nes_debug(NES_DBG_QP,
+ "error geting qp mem code = %d\n", err);
+ nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num);
+ kfree(nesqp->allocated_buffer);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ nesqp->hwqp.sq_size = sq_size;
+ nesqp->hwqp.sq_encoded_size = sq_encoded_size;
+ nesqp->hwqp.sq_head = 1;
+ nesqp->hwqp.rq_size = rq_size;
+ nesqp->hwqp.rq_encoded_size = rq_encoded_size;
+ /* nes_debug(NES_DBG_QP, "nesqp->nesqp_context_pbase = %p\n",
+ (void *)nesqp->nesqp_context_pbase);
+ */
+ nesqp->hwqp.qp_id = qp_num;
+ nesqp->ibqp.qp_num = nesqp->hwqp.qp_id;
+ nesqp->nespd = nespd;
+
+ nescq = to_nescq(init_attr->send_cq);
+ nesqp->nesscq = nescq;
+ nescq = to_nescq(init_attr->recv_cq);
+ nesqp->nesrcq = nescq;
+
+ nesqp->nesqp_context->misc |= cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) <<
+ NES_QPCONTEXT_MISC_PCI_FCN_SHIFT);
+ nesqp->nesqp_context->misc |= cpu_to_le32((u32)nesqp->hwqp.rq_encoded_size <<
+ NES_QPCONTEXT_MISC_RQ_SIZE_SHIFT);
+ nesqp->nesqp_context->misc |= cpu_to_le32((u32)nesqp->hwqp.sq_encoded_size <<
+ NES_QPCONTEXT_MISC_SQ_SIZE_SHIFT);
+ if (!udata) {
+ nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_PRIV_EN);
+ nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_FAST_REGISTER_EN);
+ }
+ nesqp->nesqp_context->cqs = cpu_to_le32(nesqp->nesscq->hw_cq.cq_number +
+ ((u32)nesqp->nesrcq->hw_cq.cq_number << 16));
+ u64temp = (u64)nesqp->hwqp.sq_pbase;
+ nesqp->nesqp_context->sq_addr_low = cpu_to_le32((u32)u64temp);
+ nesqp->nesqp_context->sq_addr_high = cpu_to_le32((u32)(u64temp >> 32));
+
+
+ if (!virt_wqs) {
+ u64temp = (u64)nesqp->hwqp.sq_pbase;
+ nesqp->nesqp_context->sq_addr_low = cpu_to_le32((u32)u64temp);
+ nesqp->nesqp_context->sq_addr_high = cpu_to_le32((u32)(u64temp >> 32));
+ u64temp = (u64)nesqp->hwqp.rq_pbase;
+ nesqp->nesqp_context->rq_addr_low = cpu_to_le32((u32)u64temp);
+ nesqp->nesqp_context->rq_addr_high = cpu_to_le32((u32)(u64temp >> 32));
+ } else {
+ u64temp = (u64)nesqp->pbl_pbase;
+ nesqp->nesqp_context->rq_addr_low = cpu_to_le32((u32)u64temp);
+ nesqp->nesqp_context->rq_addr_high = cpu_to_le32((u32)(u64temp >> 32));
+ }
+
+ /* nes_debug(NES_DBG_QP, "next_qp_nic_index=%u, using nic_index=%d\n",
+ nesvnic->next_qp_nic_index,
+ nesvnic->qp_nic_index[nesvnic->next_qp_nic_index]); */
+ spin_lock_irqsave(&nesdev->cqp.lock, flags);
+ nesqp->nesqp_context->misc2 |= cpu_to_le32(
+ (u32)nesvnic->qp_nic_index[nesvnic->next_qp_nic_index] <<
+ NES_QPCONTEXT_MISC2_NIC_INDEX_SHIFT);
+ nesvnic->next_qp_nic_index++;
+ if ((nesvnic->next_qp_nic_index > 3) ||
+ (nesvnic->qp_nic_index[nesvnic->next_qp_nic_index] == 0xf)) {
+ nesvnic->next_qp_nic_index = 0;
+ }
+ spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
+
+ nesqp->nesqp_context->pd_index_wscale |= cpu_to_le32((u32)nesqp->nespd->pd_id << 16);
+ u64temp = (u64)nesqp->hwqp.q2_pbase;
+ nesqp->nesqp_context->q2_addr_low = cpu_to_le32((u32)u64temp);
+ nesqp->nesqp_context->q2_addr_high = cpu_to_le32((u32)(u64temp >> 32));
+ nesqp->nesqp_context->aeq_token_low = cpu_to_le32((u32)((unsigned long)(nesqp)));
+ nesqp->nesqp_context->aeq_token_high = cpu_to_le32((u32)(upper_32_bits((unsigned long)(nesqp))));
+ nesqp->nesqp_context->ird_ord_sizes = cpu_to_le32(NES_QPCONTEXT_ORDIRD_ALSMM |
+ ((((u32)nesadapter->max_irrq_wr) <<
+ NES_QPCONTEXT_ORDIRD_IRDSIZE_SHIFT) & NES_QPCONTEXT_ORDIRD_IRDSIZE_MASK));
+ if (disable_mpa_crc) {
+ nes_debug(NES_DBG_QP, "Disabling MPA crc checking due to module option.\n");
+ nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32(NES_QPCONTEXT_ORDIRD_RNMC);
+ }
+
+
+ /* Create the QP */
+ cqp_request = nes_get_cqp_request(nesdev);
+ if (cqp_request == NULL) {
+ nes_debug(NES_DBG_QP, "Failed to get a cqp_request\n");
+ nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num);
+ nes_free_qp_mem(nesdev, nesqp,virt_wqs);
+ kfree(nesqp->allocated_buffer);
+ return ERR_PTR(-ENOMEM);
+ }
+ cqp_request->waiting = 1;
+ cqp_wqe = &cqp_request->cqp_wqe;
+
+ if (!virt_wqs) {
+ opcode = NES_CQP_CREATE_QP | NES_CQP_QP_TYPE_IWARP |
+ NES_CQP_QP_IWARP_STATE_IDLE;
+ } else {
+ opcode = NES_CQP_CREATE_QP | NES_CQP_QP_TYPE_IWARP | NES_CQP_QP_VIRT_WQS |
+ NES_CQP_QP_IWARP_STATE_IDLE;
+ }
+ opcode |= NES_CQP_QP_CQS_VALID;
+ nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
+ set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, opcode);
+ set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, nesqp->hwqp.qp_id);
+
+ u64temp = (u64)nesqp->nesqp_context_pbase;
+ set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, u64temp);
+
+ atomic_set(&cqp_request->refcount, 2);
+ nes_post_cqp_request(nesdev, cqp_request);
+
+ /* Wait for CQP */
+ nes_debug(NES_DBG_QP, "Waiting for create iWARP QP%u to complete.\n",
+ nesqp->hwqp.qp_id);
+ ret = wait_event_timeout(cqp_request->waitq,
+ (cqp_request->request_done != 0), NES_EVENT_TIMEOUT);
+ nes_debug(NES_DBG_QP, "Create iwarp QP%u completed, wait_event_timeout ret=%u,"
+ " nesdev->cqp_head = %u, nesdev->cqp.sq_tail = %u,"
+ " CQP Major:Minor codes = 0x%04X:0x%04X.\n",
+ nesqp->hwqp.qp_id, ret, nesdev->cqp.sq_head, nesdev->cqp.sq_tail,
+ cqp_request->major_code, cqp_request->minor_code);
+ if ((!ret) || (cqp_request->major_code)) {
+ nes_put_cqp_request(nesdev, cqp_request);
+ nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num);
+ nes_free_qp_mem(nesdev, nesqp,virt_wqs);
+ kfree(nesqp->allocated_buffer);
+ if (!ret) {
+ return ERR_PTR(-ETIME);
+ } else {
+ return ERR_PTR(-EIO);
+ }
+ }
+
+ nes_put_cqp_request(nesdev, cqp_request);
+
+ if (ibpd->uobject) {
+ uresp.mmap_sq_db_index = nesqp->mmap_sq_db_index;
+ uresp.actual_sq_size = sq_size;
+ uresp.actual_rq_size = rq_size;
+ uresp.qp_id = nesqp->hwqp.qp_id;
+ uresp.nes_drv_opt = nes_drv_opt;
+ if (ib_copy_to_udata(udata, &uresp, sizeof uresp)) {
+ nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num);
+ nes_free_qp_mem(nesdev, nesqp,virt_wqs);
+ kfree(nesqp->allocated_buffer);
+ return ERR_PTR(-EFAULT);
+ }
+ }
+
+ nes_debug(NES_DBG_QP, "QP%u structure located @%p.Size = %u.\n",
+ nesqp->hwqp.qp_id, nesqp, (u32)sizeof(*nesqp));
+ spin_lock_init(&nesqp->lock);
+ init_waitqueue_head(&nesqp->state_waitq);
+ init_waitqueue_head(&nesqp->kick_waitq);
+ nes_add_ref(&nesqp->ibqp);
+ break;
+ default:
+ nes_debug(NES_DBG_QP, "Invalid QP type: %d\n", init_attr->qp_type);
+ return ERR_PTR(-EINVAL);
+ }
+
+ /* update the QP table */
+ nesdev->nesadapter->qp_table[nesqp->hwqp.qp_id-NES_FIRST_QPN] = nesqp;
+ nes_debug(NES_DBG_QP, "netdev refcnt=%u\n",
+ atomic_read(&nesvnic->netdev->refcnt));
+
+ return &nesqp->ibqp;
+}
+
+
+/**
+ * nes_destroy_qp
+ */
+static int nes_destroy_qp(struct ib_qp *ibqp)
+{
+ struct nes_qp *nesqp = to_nesqp(ibqp);
+ /* struct nes_vnic *nesvnic = to_nesvnic(ibqp->device); */
+ struct nes_ucontext *nes_ucontext;
+ struct ib_qp_attr attr;
+ struct iw_cm_id *cm_id;
+ struct iw_cm_event cm_event;
+ int ret;
+
+ atomic_inc(&sw_qps_destroyed);
+ nesqp->destroyed = 1;
+
+ /* Blow away the connection if it exists. */
+ if (nesqp->ibqp_state >= IB_QPS_INIT && nesqp->ibqp_state <= IB_QPS_RTS) {
+ /* if (nesqp->ibqp_state == IB_QPS_RTS) { */
+ attr.qp_state = IB_QPS_ERR;
+ nes_modify_qp(&nesqp->ibqp, &attr, IB_QP_STATE, NULL);
+ }
+
+ if (((nesqp->ibqp_state == IB_QPS_INIT) ||
+ (nesqp->ibqp_state == IB_QPS_RTR)) && (nesqp->cm_id)) {
+ cm_id = nesqp->cm_id;
+ cm_event.event = IW_CM_EVENT_CONNECT_REPLY;
+ cm_event.status = IW_CM_EVENT_STATUS_TIMEOUT;
+ cm_event.local_addr = cm_id->local_addr;
+ cm_event.remote_addr = cm_id->remote_addr;
+ cm_event.private_data = NULL;
+ cm_event.private_data_len = 0;
+
+ nes_debug(NES_DBG_QP, "Generating a CM Timeout Event for "
+ "QP%u. cm_id = %p, refcount = %u. \n",
+ nesqp->hwqp.qp_id, cm_id, atomic_read(&nesqp->refcount));
+
+ cm_id->rem_ref(cm_id);
+ ret = cm_id->event_handler(cm_id, &cm_event);
+ if (ret)
+ nes_debug(NES_DBG_QP, "OFA CM event_handler returned, ret=%d\n", ret);
+ }
+
+
+ if (nesqp->user_mode) {
+ if ((ibqp->uobject)&&(ibqp->uobject->context)) {
+ nes_ucontext = to_nesucontext(ibqp->uobject->context);
+ clear_bit(nesqp->mmap_sq_db_index, nes_ucontext->allocated_wqs);
+ nes_ucontext->mmap_nesqp[nesqp->mmap_sq_db_index] = NULL;
+ if (nes_ucontext->first_free_wq > nesqp->mmap_sq_db_index) {
+ nes_ucontext->first_free_wq = nesqp->mmap_sq_db_index;
+ }
+ }
+ if (nesqp->pbl_pbase)
+ kunmap(nesqp->page);
+ }
+
+ nes_rem_ref(&nesqp->ibqp);
+ return 0;
+}
+
+
+/**
+ * nes_create_cq
+ */
+static struct ib_cq *nes_create_cq(struct ib_device *ibdev, int entries,
+ int comp_vector,
+ struct ib_ucontext *context, struct ib_udata *udata)
+{
+ u64 u64temp;
+ struct nes_vnic *nesvnic = to_nesvnic(ibdev);
+ struct nes_device *nesdev = nesvnic->nesdev;
+ struct nes_adapter *nesadapter = nesdev->nesadapter;
+ struct nes_cq *nescq;
+ struct nes_ucontext *nes_ucontext = NULL;
+ struct nes_cqp_request *cqp_request;
+ void *mem = NULL;
+ struct nes_hw_cqp_wqe *cqp_wqe;
+ struct nes_pbl *nespbl = NULL;
+ struct nes_create_cq_req req;
+ struct nes_create_cq_resp resp;
+ u32 cq_num = 0;
+ u32 opcode = 0;
+ u32 pbl_entries = 1;
+ int err;
+ unsigned long flags;
+ int ret;
+
+ err = nes_alloc_resource(nesadapter, nesadapter->allocated_cqs,
+ nesadapter->max_cq, &cq_num, &nesadapter->next_cq);
+ if (err) {
+ return ERR_PTR(err);
+ }
+
+ nescq = kzalloc(sizeof(struct nes_cq), GFP_KERNEL);
+ if (!nescq) {
+ nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num);
+ nes_debug(NES_DBG_CQ, "Unable to allocate nes_cq struct\n");
+ return ERR_PTR(-ENOMEM);
+ }
+
+ nescq->hw_cq.cq_size = max(entries + 1, 5);
+ nescq->hw_cq.cq_number = cq_num;
+ nescq->ibcq.cqe = nescq->hw_cq.cq_size - 1;
+
+
+ if (context) {
+ nes_ucontext = to_nesucontext(context);
+ if (ib_copy_from_udata(&req, udata, sizeof (struct nes_create_cq_req))) {
+ nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num);
+ kfree(nescq);
+ return ERR_PTR(-EFAULT);
+ }
+ nesvnic->mcrq_ucontext = nes_ucontext;
+ nes_ucontext->mcrqf = req.mcrqf;
+ if (nes_ucontext->mcrqf) {
+ if (nes_ucontext->mcrqf & 0x80000000)
+ nescq->hw_cq.cq_number = nesvnic->nic.qp_id + 28 + 2 * ((nes_ucontext->mcrqf & 0xf) - 1);
+ else if (nes_ucontext->mcrqf & 0x40000000)
+ nescq->hw_cq.cq_number = nes_ucontext->mcrqf & 0xffff;
+ else
+ nescq->hw_cq.cq_number = nesvnic->mcrq_qp_id + nes_ucontext->mcrqf-1;
+ nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num);
+ }
+ nes_debug(NES_DBG_CQ, "CQ Virtual Address = %08lX, size = %u.\n",
+ (unsigned long)req.user_cq_buffer, entries);
+ err = 1;
+ list_for_each_entry(nespbl, &nes_ucontext->cq_reg_mem_list, list) {
+ if (nespbl->user_base == (unsigned long )req.user_cq_buffer) {
+ list_del(&nespbl->list);
+ err = 0;
+ nes_debug(NES_DBG_CQ, "Found PBL for virtual CQ. nespbl=%p.\n",
+ nespbl);
+ break;
+ }
+ }
+ if (err) {
+ nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num);
+ kfree(nescq);
+ return ERR_PTR(-EFAULT);
+ }
+
+ pbl_entries = nespbl->pbl_size >> 3;
+ nescq->cq_mem_size = 0;
+ } else {
+ nescq->cq_mem_size = nescq->hw_cq.cq_size * sizeof(struct nes_hw_cqe);
+ nes_debug(NES_DBG_CQ, "Attempting to allocate pci memory (%u entries, %u bytes) for CQ%u.\n",
+ entries, nescq->cq_mem_size, nescq->hw_cq.cq_number);
+
+ /* allocate the physical buffer space */
+ mem = pci_alloc_consistent(nesdev->pcidev, nescq->cq_mem_size,
+ &nescq->hw_cq.cq_pbase);
+ if (!mem) {
+ printk(KERN_ERR PFX "Unable to allocate pci memory for cq\n");
+ nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num);
+ kfree(nescq);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ memset(mem, 0, nescq->cq_mem_size);
+ nescq->hw_cq.cq_vbase = mem;
+ nescq->hw_cq.cq_head = 0;
+ nes_debug(NES_DBG_CQ, "CQ%u virtual address @ %p, phys = 0x%08X\n",
+ nescq->hw_cq.cq_number, nescq->hw_cq.cq_vbase,
+ (u32)nescq->hw_cq.cq_pbase);
+ }
+
+ nescq->hw_cq.ce_handler = nes_iwarp_ce_handler;
+ spin_lock_init(&nescq->lock);
+
+ /* send CreateCQ request to CQP */
+ cqp_request = nes_get_cqp_request(nesdev);
+ if (cqp_request == NULL) {
+ nes_debug(NES_DBG_CQ, "Failed to get a cqp_request.\n");
+ if (!context)
+ pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size, mem,
+ nescq->hw_cq.cq_pbase);
+ nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num);
+ kfree(nescq);
+ return ERR_PTR(-ENOMEM);
+ }
+ cqp_request->waiting = 1;
+ cqp_wqe = &cqp_request->cqp_wqe;
+
+ opcode = NES_CQP_CREATE_CQ | NES_CQP_CQ_CEQ_VALID |
+ NES_CQP_CQ_CHK_OVERFLOW |
+ NES_CQP_CQ_CEQE_MASK | ((u32)nescq->hw_cq.cq_size << 16);
+
+ spin_lock_irqsave(&nesadapter->pbl_lock, flags);
+
+ if (pbl_entries != 1) {
+ if (pbl_entries > 32) {
+ /* use 4k pbl */
+ nes_debug(NES_DBG_CQ, "pbl_entries=%u, use a 4k PBL\n", pbl_entries);
+ if (nesadapter->free_4kpbl == 0) {
+ if (cqp_request->dynamic) {
+ spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+ kfree(cqp_request);
+ } else {
+ list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
+ spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+ }
+ if (!context)
+ pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size, mem,
+ nescq->hw_cq.cq_pbase);
+ nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num);
+ kfree(nescq);
+ return ERR_PTR(-ENOMEM);
+ } else {
+ opcode |= (NES_CQP_CQ_VIRT | NES_CQP_CQ_4KB_CHUNK);
+ nescq->virtual_cq = 2;
+ nesadapter->free_4kpbl--;
+ }
+ } else {
+ /* use 256 byte pbl */
+ nes_debug(NES_DBG_CQ, "pbl_entries=%u, use a 256 byte PBL\n", pbl_entries);
+ if (nesadapter->free_256pbl == 0) {
+ if (cqp_request->dynamic) {
+ spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+ kfree(cqp_request);
+ } else {
+ list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
+ spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+ }
+ if (!context)
+ pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size, mem,
+ nescq->hw_cq.cq_pbase);
+ nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num);
+ kfree(nescq);
+ return ERR_PTR(-ENOMEM);
+ } else {
+ opcode |= NES_CQP_CQ_VIRT;
+ nescq->virtual_cq = 1;
+ nesadapter->free_256pbl--;
+ }
+ }
+ }
+
+ spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+
+ nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
+ set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, opcode);
+ set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX,
+ (nescq->hw_cq.cq_number | ((u32)nesdev->ceq_index << 16)));
+
+ if (context) {
+ if (pbl_entries != 1)
+ u64temp = (u64)nespbl->pbl_pbase;
+ else
+ u64temp = le64_to_cpu(nespbl->pbl_vbase[0]);
+ set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_CQ_WQE_DOORBELL_INDEX_HIGH_IDX,
+ nes_ucontext->mmap_db_index[0]);
+ } else {
+ u64temp = (u64)nescq->hw_cq.cq_pbase;
+ cqp_wqe->wqe_words[NES_CQP_CQ_WQE_DOORBELL_INDEX_HIGH_IDX] = 0;
+ }
+ set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_CQ_WQE_PBL_LOW_IDX, u64temp);
+ cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_HIGH_IDX] = 0;
+ u64temp = (u64)(unsigned long)&nescq->hw_cq;
+ cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_LOW_IDX] =
+ cpu_to_le32((u32)(u64temp >> 1));
+ cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_HIGH_IDX] =
+ cpu_to_le32(((u32)((u64temp) >> 33)) & 0x7FFFFFFF);
+
+ atomic_set(&cqp_request->refcount, 2);
+ nes_post_cqp_request(nesdev, cqp_request);
+
+ /* Wait for CQP */
+ nes_debug(NES_DBG_CQ, "Waiting for create iWARP CQ%u to complete.\n",
+ nescq->hw_cq.cq_number);
+ ret = wait_event_timeout(cqp_request->waitq, (0 != cqp_request->request_done),
+ NES_EVENT_TIMEOUT * 2);
+ nes_debug(NES_DBG_CQ, "Create iWARP CQ%u completed, wait_event_timeout ret = %d.\n",
+ nescq->hw_cq.cq_number, ret);
+ if ((!ret) || (cqp_request->major_code)) {
+ nes_put_cqp_request(nesdev, cqp_request);
+ if (!context)
+ pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size, mem,
+ nescq->hw_cq.cq_pbase);
+ nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num);
+ kfree(nescq);
+ return ERR_PTR(-EIO);
+ }
+ nes_put_cqp_request(nesdev, cqp_request);
+
+ if (context) {
+ /* free the nespbl */
+ pci_free_consistent(nesdev->pcidev, nespbl->pbl_size, nespbl->pbl_vbase,
+ nespbl->pbl_pbase);
+ kfree(nespbl);
+ resp.cq_id = nescq->hw_cq.cq_number;
+ resp.cq_size = nescq->hw_cq.cq_size;
+ resp.mmap_db_index = 0;
+ if (ib_copy_to_udata(udata, &resp, sizeof resp)) {
+ nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num);
+ kfree(nescq);
+ return ERR_PTR(-EFAULT);
+ }
+ }
+
+ return &nescq->ibcq;
+}
+
+
+/**
+ * nes_destroy_cq
+ */
+static int nes_destroy_cq(struct ib_cq *ib_cq)
+{
+ struct nes_cq *nescq;
+ struct nes_device *nesdev;
+ struct nes_vnic *nesvnic;
+ struct nes_adapter *nesadapter;
+ struct nes_hw_cqp_wqe *cqp_wqe;
+ struct nes_cqp_request *cqp_request;
+ unsigned long flags;
+ u32 opcode = 0;
+ int ret;
+
+ if (ib_cq == NULL)
+ return 0;
+
+ nescq = to_nescq(ib_cq);
+ nesvnic = to_nesvnic(ib_cq->device);
+ nesdev = nesvnic->nesdev;
+ nesadapter = nesdev->nesadapter;
+
+ nes_debug(NES_DBG_CQ, "Destroy CQ%u\n", nescq->hw_cq.cq_number);
+
+ /* Send DestroyCQ request to CQP */
+ cqp_request = nes_get_cqp_request(nesdev);
+ if (cqp_request == NULL) {
+ nes_debug(NES_DBG_CQ, "Failed to get a cqp_request.\n");
+ return -ENOMEM;
+ }
+ cqp_request->waiting = 1;
+ cqp_wqe = &cqp_request->cqp_wqe;
+ opcode = NES_CQP_DESTROY_CQ | (nescq->hw_cq.cq_size << 16);
+ spin_lock_irqsave(&nesadapter->pbl_lock, flags);
+ if (nescq->virtual_cq == 1) {
+ nesadapter->free_256pbl++;
+ if (nesadapter->free_256pbl > nesadapter->max_256pbl) {
+ printk(KERN_ERR PFX "%s: free 256B PBLs(%u) has exceeded the max(%u)\n",
+ __func__, nesadapter->free_256pbl, nesadapter->max_256pbl);
+ }
+ } else if (nescq->virtual_cq == 2) {
+ nesadapter->free_4kpbl++;
+ if (nesadapter->free_4kpbl > nesadapter->max_4kpbl) {
+ printk(KERN_ERR PFX "%s: free 4K PBLs(%u) has exceeded the max(%u)\n",
+ __func__, nesadapter->free_4kpbl, nesadapter->max_4kpbl);
+ }
+ opcode |= NES_CQP_CQ_4KB_CHUNK;
+ }
+
+ spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+
+ nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
+ set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, opcode);
+ set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX,
+ (nescq->hw_cq.cq_number | ((u32)PCI_FUNC(nesdev->pcidev->devfn) << 16)));
+ nes_free_resource(nesadapter, nesadapter->allocated_cqs, nescq->hw_cq.cq_number);
+ atomic_set(&cqp_request->refcount, 2);
+ nes_post_cqp_request(nesdev, cqp_request);
+
+ /* Wait for CQP */
+ nes_debug(NES_DBG_CQ, "Waiting for destroy iWARP CQ%u to complete.\n",
+ nescq->hw_cq.cq_number);
+ ret = wait_event_timeout(cqp_request->waitq, (0 != cqp_request->request_done),
+ NES_EVENT_TIMEOUT);
+ nes_debug(NES_DBG_CQ, "Destroy iWARP CQ%u completed, wait_event_timeout ret = %u,"
+ " CQP Major:Minor codes = 0x%04X:0x%04X.\n",
+ nescq->hw_cq.cq_number, ret, cqp_request->major_code,
+ cqp_request->minor_code);
+ if (!ret) {
+ nes_debug(NES_DBG_CQ, "iWARP CQ%u destroy timeout expired\n",
+ nescq->hw_cq.cq_number);
+ ret = -ETIME;
+ } else if (cqp_request->major_code) {
+ nes_debug(NES_DBG_CQ, "iWARP CQ%u destroy failed\n",
+ nescq->hw_cq.cq_number);
+ ret = -EIO;
+ } else {
+ ret = 0;
+ }
+ nes_put_cqp_request(nesdev, cqp_request);
+
+ if (nescq->cq_mem_size)
+ pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size,
+ nescq->hw_cq.cq_vbase, nescq->hw_cq.cq_pbase);
+ kfree(nescq);
+
+ return ret;
+}
+
+
+/**
+ * nes_reg_mr
+ */
+static int nes_reg_mr(struct nes_device *nesdev, struct nes_pd *nespd,
+ u32 stag, u64 region_length, struct nes_root_vpbl *root_vpbl,
+ dma_addr_t single_buffer, u16 pbl_count, u16 residual_page_count,
+ int acc, u64 *iova_start)
+{
+ struct nes_hw_cqp_wqe *cqp_wqe;
+ struct nes_cqp_request *cqp_request;
+ unsigned long flags;
+ int ret;
+ struct nes_adapter *nesadapter = nesdev->nesadapter;
+ /* int count; */
+ u32 opcode = 0;
+ u16 major_code;
+
+ /* Register the region with the adapter */
+ cqp_request = nes_get_cqp_request(nesdev);
+ if (cqp_request == NULL) {
+ nes_debug(NES_DBG_MR, "Failed to get a cqp_request.\n");
+ return -ENOMEM;
+ }
+ cqp_request->waiting = 1;
+ cqp_wqe = &cqp_request->cqp_wqe;
+
+ spin_lock_irqsave(&nesadapter->pbl_lock, flags);
+ /* track PBL resources */
+ if (pbl_count != 0) {
+ if (pbl_count > 1) {
+ /* Two level PBL */
+ if ((pbl_count+1) > nesadapter->free_4kpbl) {
+ nes_debug(NES_DBG_MR, "Out of 4KB Pbls for two level request.\n");
+ if (cqp_request->dynamic) {
+ spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+ kfree(cqp_request);
+ } else {
+ list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
+ spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+ }
+ return -ENOMEM;
+ } else {
+ nesadapter->free_4kpbl -= pbl_count+1;
+ }
+ } else if (residual_page_count > 32) {
+ if (pbl_count > nesadapter->free_4kpbl) {
+ nes_debug(NES_DBG_MR, "Out of 4KB Pbls.\n");
+ if (cqp_request->dynamic) {
+ spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+ kfree(cqp_request);
+ } else {
+ list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
+ spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+ }
+ return -ENOMEM;
+ } else {
+ nesadapter->free_4kpbl -= pbl_count;
+ }
+ } else {
+ if (pbl_count > nesadapter->free_256pbl) {
+ nes_debug(NES_DBG_MR, "Out of 256B Pbls.\n");
+ if (cqp_request->dynamic) {
+ spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+ kfree(cqp_request);
+ } else {
+ list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
+ spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+ }
+ return -ENOMEM;
+ } else {
+ nesadapter->free_256pbl -= pbl_count;
+ }
+ }
+ }
+
+ spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+
+ opcode = NES_CQP_REGISTER_STAG | NES_CQP_STAG_RIGHTS_LOCAL_READ |
+ NES_CQP_STAG_VA_TO | NES_CQP_STAG_MR;
+ if (acc & IB_ACCESS_LOCAL_WRITE)
+ opcode |= NES_CQP_STAG_RIGHTS_LOCAL_WRITE;
+ if (acc & IB_ACCESS_REMOTE_WRITE)
+ opcode |= NES_CQP_STAG_RIGHTS_REMOTE_WRITE | NES_CQP_STAG_REM_ACC_EN;
+ if (acc & IB_ACCESS_REMOTE_READ)
+ opcode |= NES_CQP_STAG_RIGHTS_REMOTE_READ | NES_CQP_STAG_REM_ACC_EN;
+ if (acc & IB_ACCESS_MW_BIND)
+ opcode |= NES_CQP_STAG_RIGHTS_WINDOW_BIND | NES_CQP_STAG_REM_ACC_EN;
+
+ nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
+ set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, opcode);
+ set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_VA_LOW_IDX, *iova_start);
+ set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_LEN_LOW_IDX, region_length);
+
+ cqp_wqe->wqe_words[NES_CQP_STAG_WQE_LEN_HIGH_PD_IDX] =
+ cpu_to_le32((u32)(region_length >> 8) & 0xff000000);
+ cqp_wqe->wqe_words[NES_CQP_STAG_WQE_LEN_HIGH_PD_IDX] |=
+ cpu_to_le32(nespd->pd_id & 0x00007fff);
+ set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_STAG_IDX, stag);
+
+ if (pbl_count == 0) {
+ set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_PA_LOW_IDX, single_buffer);
+ } else {
+ set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_PA_LOW_IDX, root_vpbl->pbl_pbase);
+ set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_PBL_BLK_COUNT_IDX, pbl_count);
+ set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_PBL_LEN_IDX,
+ (((pbl_count - 1) * 4096) + (residual_page_count*8)));
+
+ if ((pbl_count > 1) || (residual_page_count > 32))
+ cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] |= cpu_to_le32(NES_CQP_STAG_PBL_BLK_SIZE);
+ }
+ barrier();
+
+ atomic_set(&cqp_request->refcount, 2);
+ nes_post_cqp_request(nesdev, cqp_request);
+
+ /* Wait for CQP */
+ ret = wait_event_timeout(cqp_request->waitq, (0 != cqp_request->request_done),
+ NES_EVENT_TIMEOUT);
+ nes_debug(NES_DBG_MR, "Register STag 0x%08X completed, wait_event_timeout ret = %u,"
+ " CQP Major:Minor codes = 0x%04X:0x%04X.\n",
+ stag, ret, cqp_request->major_code, cqp_request->minor_code);
+ major_code = cqp_request->major_code;
+ nes_put_cqp_request(nesdev, cqp_request);
+
+ if (!ret)
+ return -ETIME;
+ else if (major_code)
+ return -EIO;
+ else
+ return 0;
+
+ return 0;
+}
+
+
+/**
+ * nes_reg_phys_mr
+ */
+static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd,
+ struct ib_phys_buf *buffer_list, int num_phys_buf, int acc,
+ u64 * iova_start)
+{
+ u64 region_length;
+ struct nes_pd *nespd = to_nespd(ib_pd);
+ struct nes_vnic *nesvnic = to_nesvnic(ib_pd->device);
+ struct nes_device *nesdev = nesvnic->nesdev;
+ struct nes_adapter *nesadapter = nesdev->nesadapter;
+ struct nes_mr *nesmr;
+ struct ib_mr *ibmr;
+ struct nes_vpbl vpbl;
+ struct nes_root_vpbl root_vpbl;
+ u32 stag;
+ u32 i;
+ u32 stag_index = 0;
+ u32 next_stag_index = 0;
+ u32 driver_key = 0;
+ u32 root_pbl_index = 0;
+ u32 cur_pbl_index = 0;
+ int err = 0, pbl_depth = 0;
+ int ret = 0;
+ u16 pbl_count = 0;
+ u8 single_page = 1;
+ u8 stag_key = 0;
+
+ pbl_depth = 0;
+ region_length = 0;
+ vpbl.pbl_vbase = NULL;
+ root_vpbl.pbl_vbase = NULL;
+ root_vpbl.pbl_pbase = 0;
+
+ get_random_bytes(&next_stag_index, sizeof(next_stag_index));
+ stag_key = (u8)next_stag_index;
+
+ driver_key = 0;
+
+ next_stag_index >>= 8;
+ next_stag_index %= nesadapter->max_mr;
+ if (num_phys_buf > (1024*512)) {
+ return ERR_PTR(-E2BIG);
+ }
+
+ err = nes_alloc_resource(nesadapter, nesadapter->allocated_mrs, nesadapter->max_mr,
+ &stag_index, &next_stag_index);
+ if (err) {
+ return ERR_PTR(err);
+ }
+
+ nesmr = kzalloc(sizeof(*nesmr), GFP_KERNEL);
+ if (!nesmr) {
+ nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ for (i = 0; i < num_phys_buf; i++) {
+
+ if ((i & 0x01FF) == 0) {
+ if (root_pbl_index == 1) {
+ /* Allocate the root PBL */
+ root_vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 8192,
+ &root_vpbl.pbl_pbase);
+ nes_debug(NES_DBG_MR, "Allocating root PBL, va = %p, pa = 0x%08X\n",
+ root_vpbl.pbl_vbase, (unsigned int)root_vpbl.pbl_pbase);
+ if (!root_vpbl.pbl_vbase) {
+ pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase,
+ vpbl.pbl_pbase);
+ nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
+ kfree(nesmr);
+ return ERR_PTR(-ENOMEM);
+ }
+ root_vpbl.leaf_vpbl = kzalloc(sizeof(*root_vpbl.leaf_vpbl)*1024, GFP_KERNEL);
+ if (!root_vpbl.leaf_vpbl) {
+ pci_free_consistent(nesdev->pcidev, 8192, root_vpbl.pbl_vbase,
+ root_vpbl.pbl_pbase);
+ pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase,
+ vpbl.pbl_pbase);
+ nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
+ kfree(nesmr);
+ return ERR_PTR(-ENOMEM);
+ }
+ root_vpbl.pbl_vbase[0].pa_low = cpu_to_le32((u32)vpbl.pbl_pbase);
+ root_vpbl.pbl_vbase[0].pa_high =
+ cpu_to_le32((u32)((((u64)vpbl.pbl_pbase) >> 32)));
+ root_vpbl.leaf_vpbl[0] = vpbl;
+ }
+ /* Allocate a 4K buffer for the PBL */
+ vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 4096,
+ &vpbl.pbl_pbase);
+ nes_debug(NES_DBG_MR, "Allocating leaf PBL, va = %p, pa = 0x%016lX\n",
+ vpbl.pbl_vbase, (unsigned long)vpbl.pbl_pbase);
+ if (!vpbl.pbl_vbase) {
+ nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
+ ibmr = ERR_PTR(-ENOMEM);
+ kfree(nesmr);
+ goto reg_phys_err;
+ }
+ /* Fill in the root table */
+ if (1 <= root_pbl_index) {
+ root_vpbl.pbl_vbase[root_pbl_index].pa_low =
+ cpu_to_le32((u32)vpbl.pbl_pbase);
+ root_vpbl.pbl_vbase[root_pbl_index].pa_high =
+ cpu_to_le32((u32)((((u64)vpbl.pbl_pbase) >> 32)));
+ root_vpbl.leaf_vpbl[root_pbl_index] = vpbl;
+ }
+ root_pbl_index++;
+ cur_pbl_index = 0;
+ }
+ if (buffer_list[i].addr & ~PAGE_MASK) {
+ /* TODO: Unwind allocated buffers */
+ nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
+ nes_debug(NES_DBG_MR, "Unaligned Memory Buffer: 0x%x\n",
+ (unsigned int) buffer_list[i].addr);
+ ibmr = ERR_PTR(-EINVAL);
+ kfree(nesmr);
+ goto reg_phys_err;
+ }
+
+ if (!buffer_list[i].size) {
+ nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
+ nes_debug(NES_DBG_MR, "Invalid Buffer Size\n");
+ ibmr = ERR_PTR(-EINVAL);
+ kfree(nesmr);
+ goto reg_phys_err;
+ }
+
+ region_length += buffer_list[i].size;
+ if ((i != 0) && (single_page)) {
+ if ((buffer_list[i-1].addr+PAGE_SIZE) != buffer_list[i].addr)
+ single_page = 0;
+ }
+ vpbl.pbl_vbase[cur_pbl_index].pa_low = cpu_to_le32((u32)buffer_list[i].addr);
+ vpbl.pbl_vbase[cur_pbl_index++].pa_high =
+ cpu_to_le32((u32)((((u64)buffer_list[i].addr) >> 32)));
+ }
+
+ stag = stag_index << 8;
+ stag |= driver_key;
+ stag += (u32)stag_key;
+
+ nes_debug(NES_DBG_MR, "Registering STag 0x%08X, VA = 0x%016lX,"
+ " length = 0x%016lX, index = 0x%08X\n",
+ stag, (unsigned long)*iova_start, (unsigned long)region_length, stag_index);
+
+ region_length -= (*iova_start)&PAGE_MASK;
+
+ /* Make the leaf PBL the root if only one PBL */
+ if (root_pbl_index == 1) {
+ root_vpbl.pbl_pbase = vpbl.pbl_pbase;
+ }
+
+ if (single_page) {
+ pbl_count = 0;
+ } else {
+ pbl_count = root_pbl_index;
+ }
+ ret = nes_reg_mr(nesdev, nespd, stag, region_length, &root_vpbl,
+ buffer_list[0].addr, pbl_count, (u16)cur_pbl_index, acc, iova_start);
+
+ if (ret == 0) {
+ nesmr->ibmr.rkey = stag;
+ nesmr->ibmr.lkey = stag;
+ nesmr->mode = IWNES_MEMREG_TYPE_MEM;
+ ibmr = &nesmr->ibmr;
+ nesmr->pbl_4k = ((pbl_count > 1) || (cur_pbl_index > 32)) ? 1 : 0;
+ nesmr->pbls_used = pbl_count;
+ if (pbl_count > 1) {
+ nesmr->pbls_used++;
+ }
+ } else {
+ kfree(nesmr);
+ ibmr = ERR_PTR(-ENOMEM);
+ }
+
+ reg_phys_err:
+ /* free the resources */
+ if (root_pbl_index == 1) {
+ /* single PBL case */
+ pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase, vpbl.pbl_pbase);
+ } else {
+ for (i=0; i<root_pbl_index; i++) {
+ pci_free_consistent(nesdev->pcidev, 4096, root_vpbl.leaf_vpbl[i].pbl_vbase,
+ root_vpbl.leaf_vpbl[i].pbl_pbase);
+ }
+ kfree(root_vpbl.leaf_vpbl);
+ pci_free_consistent(nesdev->pcidev, 8192, root_vpbl.pbl_vbase,
+ root_vpbl.pbl_pbase);
+ }
+
+ return ibmr;
+}
+
+
+/**
+ * nes_get_dma_mr
+ */
+static struct ib_mr *nes_get_dma_mr(struct ib_pd *pd, int acc)
+{
+ struct ib_phys_buf bl;
+ u64 kva = 0;
+
+ nes_debug(NES_DBG_MR, "\n");
+
+ bl.size = (u64)0xffffffffffULL;
+ bl.addr = 0;
+ return nes_reg_phys_mr(pd, &bl, 1, acc, &kva);
+}
+
+
+/**
+ * nes_reg_user_mr
+ */
+static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 virt, int acc, struct ib_udata *udata)
+{
+ u64 iova_start;
+ __le64 *pbl;
+ u64 region_length;
+ dma_addr_t last_dma_addr = 0;
+ dma_addr_t first_dma_addr = 0;
+ struct nes_pd *nespd = to_nespd(pd);
+ struct nes_vnic *nesvnic = to_nesvnic(pd->device);
+ struct nes_device *nesdev = nesvnic->nesdev;
+ struct nes_adapter *nesadapter = nesdev->nesadapter;
+ struct ib_mr *ibmr = ERR_PTR(-EINVAL);
+ struct ib_umem_chunk *chunk;
+ struct nes_ucontext *nes_ucontext;
+ struct nes_pbl *nespbl;
+ struct nes_mr *nesmr;
+ struct ib_umem *region;
+ struct nes_mem_reg_req req;
+ struct nes_vpbl vpbl;
+ struct nes_root_vpbl root_vpbl;
+ int nmap_index, page_index;
+ int page_count = 0;
+ int err, pbl_depth = 0;
+ int chunk_pages;
+ int ret;
+ u32 stag;
+ u32 stag_index = 0;
+ u32 next_stag_index;
+ u32 driver_key;
+ u32 root_pbl_index = 0;
+ u32 cur_pbl_index = 0;
+ u32 skip_pages;
+ u16 pbl_count;
+ u8 single_page = 1;
+ u8 stag_key;
+
+ region = ib_umem_get(pd->uobject->context, start, length, acc, 0);
+ if (IS_ERR(region)) {
+ return (struct ib_mr *)region;
+ }
+
+ nes_debug(NES_DBG_MR, "User base = 0x%lX, Virt base = 0x%lX, length = %u,"
+ " offset = %u, page size = %u.\n",
+ (unsigned long int)start, (unsigned long int)virt, (u32)length,
+ region->offset, region->page_size);
+
+ skip_pages = ((u32)region->offset) >> 12;
+
+ if (ib_copy_from_udata(&req, udata, sizeof(req)))
+ return ERR_PTR(-EFAULT);
+ nes_debug(NES_DBG_MR, "Memory Registration type = %08X.\n", req.reg_type);
+
+ switch (req.reg_type) {
+ case IWNES_MEMREG_TYPE_MEM:
+ pbl_depth = 0;
+ region_length = 0;
+ vpbl.pbl_vbase = NULL;
+ root_vpbl.pbl_vbase = NULL;
+ root_vpbl.pbl_pbase = 0;
+
+ get_random_bytes(&next_stag_index, sizeof(next_stag_index));
+ stag_key = (u8)next_stag_index;
+
+ driver_key = next_stag_index & 0x70000000;
+
+ next_stag_index >>= 8;
+ next_stag_index %= nesadapter->max_mr;
+
+ err = nes_alloc_resource(nesadapter, nesadapter->allocated_mrs,
+ nesadapter->max_mr, &stag_index, &next_stag_index);
+ if (err) {
+ ib_umem_release(region);
+ return ERR_PTR(err);
+ }
+
+ nesmr = kzalloc(sizeof(*nesmr), GFP_KERNEL);
+ if (!nesmr) {
+ ib_umem_release(region);
+ nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
+ return ERR_PTR(-ENOMEM);
+ }
+ nesmr->region = region;
+
+ list_for_each_entry(chunk, &region->chunk_list, list) {
+ nes_debug(NES_DBG_MR, "Chunk: nents = %u, nmap = %u .\n",
+ chunk->nents, chunk->nmap);
+ for (nmap_index = 0; nmap_index < chunk->nmap; ++nmap_index) {
+ if (sg_dma_address(&chunk->page_list[nmap_index]) & ~PAGE_MASK) {
+ ib_umem_release(region);
+ nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
+ nes_debug(NES_DBG_MR, "Unaligned Memory Buffer: 0x%x\n",
+ (unsigned int) sg_dma_address(&chunk->page_list[nmap_index]));
+ ibmr = ERR_PTR(-EINVAL);
+ kfree(nesmr);
+ goto reg_user_mr_err;
+ }
+
+ if (!sg_dma_len(&chunk->page_list[nmap_index])) {
+ ib_umem_release(region);
+ nes_free_resource(nesadapter, nesadapter->allocated_mrs,
+ stag_index);
+ nes_debug(NES_DBG_MR, "Invalid Buffer Size\n");
+ ibmr = ERR_PTR(-EINVAL);
+ kfree(nesmr);
+ goto reg_user_mr_err;
+ }
+
+ region_length += sg_dma_len(&chunk->page_list[nmap_index]);
+ chunk_pages = sg_dma_len(&chunk->page_list[nmap_index]) >> 12;
+ region_length -= skip_pages << 12;
+ for (page_index=skip_pages; page_index < chunk_pages; page_index++) {
+ skip_pages = 0;
+ if ((page_count!=0)&&(page_count<<12)-(region->offset&(4096-1))>=region->length)
+ goto enough_pages;
+ if ((page_count&0x01FF) == 0) {
+ if (page_count >= 1024 * 512) {
+ ib_umem_release(region);
+ nes_free_resource(nesadapter,
+ nesadapter->allocated_mrs, stag_index);
+ kfree(nesmr);
+ ibmr = ERR_PTR(-E2BIG);
+ goto reg_user_mr_err;
+ }
+ if (root_pbl_index == 1) {
+ root_vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev,
+ 8192, &root_vpbl.pbl_pbase);
+ nes_debug(NES_DBG_MR, "Allocating root PBL, va = %p, pa = 0x%08X\n",
+ root_vpbl.pbl_vbase, (unsigned int)root_vpbl.pbl_pbase);
+ if (!root_vpbl.pbl_vbase) {
+ ib_umem_release(region);
+ pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase,
+ vpbl.pbl_pbase);
+ nes_free_resource(nesadapter, nesadapter->allocated_mrs,
+ stag_index);
+ kfree(nesmr);
+ ibmr = ERR_PTR(-ENOMEM);
+ goto reg_user_mr_err;
+ }
+ root_vpbl.leaf_vpbl = kzalloc(sizeof(*root_vpbl.leaf_vpbl)*1024,
+ GFP_KERNEL);
+ if (!root_vpbl.leaf_vpbl) {
+ ib_umem_release(region);
+ pci_free_consistent(nesdev->pcidev, 8192, root_vpbl.pbl_vbase,
+ root_vpbl.pbl_pbase);
+ pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase,
+ vpbl.pbl_pbase);
+ nes_free_resource(nesadapter, nesadapter->allocated_mrs,
+ stag_index);
+ kfree(nesmr);
+ ibmr = ERR_PTR(-ENOMEM);
+ goto reg_user_mr_err;
+ }
+ root_vpbl.pbl_vbase[0].pa_low =
+ cpu_to_le32((u32)vpbl.pbl_pbase);
+ root_vpbl.pbl_vbase[0].pa_high =
+ cpu_to_le32((u32)((((u64)vpbl.pbl_pbase) >> 32)));
+ root_vpbl.leaf_vpbl[0] = vpbl;
+ }
+ vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 4096,
+ &vpbl.pbl_pbase);
+ nes_debug(NES_DBG_MR, "Allocating leaf PBL, va = %p, pa = 0x%08X\n",
+ vpbl.pbl_vbase, (unsigned int)vpbl.pbl_pbase);
+ if (!vpbl.pbl_vbase) {
+ ib_umem_release(region);
+ nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
+ ibmr = ERR_PTR(-ENOMEM);
+ kfree(nesmr);
+ goto reg_user_mr_err;
+ }
+ if (1 <= root_pbl_index) {
+ root_vpbl.pbl_vbase[root_pbl_index].pa_low =
+ cpu_to_le32((u32)vpbl.pbl_pbase);
+ root_vpbl.pbl_vbase[root_pbl_index].pa_high =
+ cpu_to_le32((u32)((((u64)vpbl.pbl_pbase)>>32)));
+ root_vpbl.leaf_vpbl[root_pbl_index] = vpbl;
+ }
+ root_pbl_index++;
+ cur_pbl_index = 0;
+ }
+ if (single_page) {
+ if (page_count != 0) {
+ if ((last_dma_addr+4096) !=
+ (sg_dma_address(&chunk->page_list[nmap_index])+
+ (page_index*4096)))
+ single_page = 0;
+ last_dma_addr = sg_dma_address(&chunk->page_list[nmap_index])+
+ (page_index*4096);
+ } else {
+ first_dma_addr = sg_dma_address(&chunk->page_list[nmap_index])+
+ (page_index*4096);
+ last_dma_addr = first_dma_addr;
+ }
+ }
+
+ vpbl.pbl_vbase[cur_pbl_index].pa_low =
+ cpu_to_le32((u32)(sg_dma_address(&chunk->page_list[nmap_index])+
+ (page_index*4096)));
+ vpbl.pbl_vbase[cur_pbl_index].pa_high =
+ cpu_to_le32((u32)((((u64)(sg_dma_address(&chunk->page_list[nmap_index])+
+ (page_index*4096))) >> 32)));
+ cur_pbl_index++;
+ page_count++;
+ }
+ }
+ }
+ enough_pages:
+ nes_debug(NES_DBG_MR, "calculating stag, stag_index=0x%08x, driver_key=0x%08x,"
+ " stag_key=0x%08x\n",
+ stag_index, driver_key, stag_key);
+ stag = stag_index << 8;
+ stag |= driver_key;
+ stag += (u32)stag_key;
+ if (stag == 0) {
+ stag = 1;
+ }
+
+ iova_start = virt;
+ /* Make the leaf PBL the root if only one PBL */
+ if (root_pbl_index == 1) {
+ root_vpbl.pbl_pbase = vpbl.pbl_pbase;
+ }
+
+ if (single_page) {
+ pbl_count = 0;
+ } else {
+ pbl_count = root_pbl_index;
+ first_dma_addr = 0;
+ }
+ nes_debug(NES_DBG_MR, "Registering STag 0x%08X, VA = 0x%08X, length = 0x%08X,"
+ " index = 0x%08X, region->length=0x%08llx, pbl_count = %u\n",
+ stag, (unsigned int)iova_start,
+ (unsigned int)region_length, stag_index,
+ (unsigned long long)region->length, pbl_count);
+ ret = nes_reg_mr( nesdev, nespd, stag, region->length, &root_vpbl,
+ first_dma_addr, pbl_count, (u16)cur_pbl_index, acc, &iova_start);
+
+ nes_debug(NES_DBG_MR, "ret=%d\n", ret);
+
+ if (ret == 0) {
+ nesmr->ibmr.rkey = stag;
+ nesmr->ibmr.lkey = stag;
+ nesmr->mode = IWNES_MEMREG_TYPE_MEM;
+ ibmr = &nesmr->ibmr;
+ nesmr->pbl_4k = ((pbl_count > 1) || (cur_pbl_index > 32)) ? 1 : 0;
+ nesmr->pbls_used = pbl_count;
+ if (pbl_count > 1) {
+ nesmr->pbls_used++;
+ }
+ } else {
+ ib_umem_release(region);
+ kfree(nesmr);
+ ibmr = ERR_PTR(-ENOMEM);
+ }
+
+ reg_user_mr_err:
+ /* free the resources */
+ if (root_pbl_index == 1) {
+ pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase,
+ vpbl.pbl_pbase);
+ } else {
+ for (page_index=0; page_index<root_pbl_index; page_index++) {
+ pci_free_consistent(nesdev->pcidev, 4096,
+ root_vpbl.leaf_vpbl[page_index].pbl_vbase,
+ root_vpbl.leaf_vpbl[page_index].pbl_pbase);
+ }
+ kfree(root_vpbl.leaf_vpbl);
+ pci_free_consistent(nesdev->pcidev, 8192, root_vpbl.pbl_vbase,
+ root_vpbl.pbl_pbase);
+ }
+
+ nes_debug(NES_DBG_MR, "Leaving, ibmr=%p", ibmr);
+
+ return ibmr;
+ case IWNES_MEMREG_TYPE_QP:
+ case IWNES_MEMREG_TYPE_CQ:
+ nespbl = kzalloc(sizeof(*nespbl), GFP_KERNEL);
+ if (!nespbl) {
+ nes_debug(NES_DBG_MR, "Unable to allocate PBL\n");
+ ib_umem_release(region);
+ return ERR_PTR(-ENOMEM);
+ }
+ nesmr = kzalloc(sizeof(*nesmr), GFP_KERNEL);
+ if (!nesmr) {
+ ib_umem_release(region);
+ kfree(nespbl);
+ nes_debug(NES_DBG_MR, "Unable to allocate nesmr\n");
+ return ERR_PTR(-ENOMEM);
+ }
+ nesmr->region = region;
+ nes_ucontext = to_nesucontext(pd->uobject->context);
+ pbl_depth = region->length >> 12;
+ pbl_depth += (region->length & (4096-1)) ? 1 : 0;
+ nespbl->pbl_size = pbl_depth*sizeof(u64);
+ if (req.reg_type == IWNES_MEMREG_TYPE_QP) {
+ nes_debug(NES_DBG_MR, "Attempting to allocate QP PBL memory");
+ } else {
+ nes_debug(NES_DBG_MR, "Attempting to allocate CP PBL memory");
+ }
+
+ nes_debug(NES_DBG_MR, " %u bytes, %u entries.\n",
+ nespbl->pbl_size, pbl_depth);
+ pbl = pci_alloc_consistent(nesdev->pcidev, nespbl->pbl_size,
+ &nespbl->pbl_pbase);
+ if (!pbl) {
+ ib_umem_release(region);
+ kfree(nesmr);
+ kfree(nespbl);
+ nes_debug(NES_DBG_MR, "Unable to allocate PBL memory\n");
+ return ERR_PTR(-ENOMEM);
+ }
+
+ nespbl->pbl_vbase = (u64 *)pbl;
+ nespbl->user_base = start;
+ nes_debug(NES_DBG_MR, "Allocated PBL memory, %u bytes, pbl_pbase=%lx,"
+ " pbl_vbase=%p user_base=0x%lx\n",
+ nespbl->pbl_size, (unsigned long) nespbl->pbl_pbase,
+ (void *) nespbl->pbl_vbase, nespbl->user_base);
+
+ list_for_each_entry(chunk, &region->chunk_list, list) {
+ for (nmap_index = 0; nmap_index < chunk->nmap; ++nmap_index) {
+ chunk_pages = sg_dma_len(&chunk->page_list[nmap_index]) >> 12;
+ chunk_pages += (sg_dma_len(&chunk->page_list[nmap_index]) & (4096-1)) ? 1 : 0;
+ nespbl->page = sg_page(&chunk->page_list[0]);
+ for (page_index=0; page_index<chunk_pages; page_index++) {
+ ((__le32 *)pbl)[0] = cpu_to_le32((u32)
+ (sg_dma_address(&chunk->page_list[nmap_index])+
+ (page_index*4096)));
+ ((__le32 *)pbl)[1] = cpu_to_le32(((u64)
+ (sg_dma_address(&chunk->page_list[nmap_index])+
+ (page_index*4096)))>>32);
+ nes_debug(NES_DBG_MR, "pbl=%p, *pbl=0x%016llx, 0x%08x%08x\n", pbl,
+ (unsigned long long)*pbl,
+ le32_to_cpu(((__le32 *)pbl)[1]), le32_to_cpu(((__le32 *)pbl)[0]));
+ pbl++;
+ }
+ }
+ }
+ if (req.reg_type == IWNES_MEMREG_TYPE_QP) {
+ list_add_tail(&nespbl->list, &nes_ucontext->qp_reg_mem_list);
+ } else {
+ list_add_tail(&nespbl->list, &nes_ucontext->cq_reg_mem_list);
+ }
+ nesmr->ibmr.rkey = -1;
+ nesmr->ibmr.lkey = -1;
+ nesmr->mode = req.reg_type;
+ return &nesmr->ibmr;
+ }
+
+ return ERR_PTR(-ENOSYS);
+}
+
+
+/**
+ * nes_dereg_mr
+ */
+static int nes_dereg_mr(struct ib_mr *ib_mr)
+{
+ struct nes_mr *nesmr = to_nesmr(ib_mr);
+ struct nes_vnic *nesvnic = to_nesvnic(ib_mr->device);
+ struct nes_device *nesdev = nesvnic->nesdev;
+ struct nes_adapter *nesadapter = nesdev->nesadapter;
+ struct nes_hw_cqp_wqe *cqp_wqe;
+ struct nes_cqp_request *cqp_request;
+ unsigned long flags;
+ int ret;
+ u16 major_code;
+ u16 minor_code;
+
+ if (nesmr->region) {
+ ib_umem_release(nesmr->region);
+ }
+ if (nesmr->mode != IWNES_MEMREG_TYPE_MEM) {
+ kfree(nesmr);
+ return 0;
+ }
+
+ /* Deallocate the region with the adapter */
+
+ cqp_request = nes_get_cqp_request(nesdev);
+ if (cqp_request == NULL) {
+ nes_debug(NES_DBG_MR, "Failed to get a cqp_request.\n");
+ return -ENOMEM;
+ }
+ cqp_request->waiting = 1;
+ cqp_wqe = &cqp_request->cqp_wqe;
+
+ spin_lock_irqsave(&nesadapter->pbl_lock, flags);
+ if (nesmr->pbls_used != 0) {
+ if (nesmr->pbl_4k) {
+ nesadapter->free_4kpbl += nesmr->pbls_used;
+ if (nesadapter->free_4kpbl > nesadapter->max_4kpbl) {
+ printk(KERN_ERR PFX "free 4KB PBLs(%u) has exceeded the max(%u)\n",
+ nesadapter->free_4kpbl, nesadapter->max_4kpbl);
+ }
+ } else {
+ nesadapter->free_256pbl += nesmr->pbls_used;
+ if (nesadapter->free_256pbl > nesadapter->max_256pbl) {
+ printk(KERN_ERR PFX "free 256B PBLs(%u) has exceeded the max(%u)\n",
+ nesadapter->free_256pbl, nesadapter->max_256pbl);
+ }
+ }
+ }
+
+ spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+ nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
+ set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX,
+ NES_CQP_DEALLOCATE_STAG | NES_CQP_STAG_VA_TO |
+ NES_CQP_STAG_DEALLOC_PBLS | NES_CQP_STAG_MR);
+ set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_STAG_IDX, ib_mr->rkey);
+
+ atomic_set(&cqp_request->refcount, 2);
+ nes_post_cqp_request(nesdev, cqp_request);
+
+ /* Wait for CQP */
+ nes_debug(NES_DBG_MR, "Waiting for deallocate STag 0x%08X completed\n", ib_mr->rkey);
+ ret = wait_event_timeout(cqp_request->waitq, (cqp_request->request_done != 0),
+ NES_EVENT_TIMEOUT);
+ nes_debug(NES_DBG_MR, "Deallocate STag 0x%08X completed, wait_event_timeout ret = %u,"
+ " CQP Major:Minor codes = 0x%04X:0x%04X\n",
+ ib_mr->rkey, ret, cqp_request->major_code, cqp_request->minor_code);
+
+ nes_free_resource(nesadapter, nesadapter->allocated_mrs,
+ (ib_mr->rkey & 0x0fffff00) >> 8);
+
+ kfree(nesmr);
+
+ major_code = cqp_request->major_code;
+ minor_code = cqp_request->minor_code;
+
+ nes_put_cqp_request(nesdev, cqp_request);
+
+ if (!ret) {
+ nes_debug(NES_DBG_MR, "Timeout waiting to destroy STag,"
+ " ib_mr=%p, rkey = 0x%08X\n",
+ ib_mr, ib_mr->rkey);
+ return -ETIME;
+ } else if (major_code) {
+ nes_debug(NES_DBG_MR, "Error (0x%04X:0x%04X) while attempting"
+ " to destroy STag, ib_mr=%p, rkey = 0x%08X\n",
+ major_code, minor_code, ib_mr, ib_mr->rkey);
+ return -EIO;
+ } else
+ return 0;
+}
+
+
+/**
+ * show_rev
+ */
+static ssize_t show_rev(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct nes_ib_device *nesibdev =
+ container_of(dev, struct nes_ib_device, ibdev.dev);
+ struct nes_vnic *nesvnic = nesibdev->nesvnic;
+
+ nes_debug(NES_DBG_INIT, "\n");
+ return sprintf(buf, "%x\n", nesvnic->nesdev->nesadapter->hw_rev);
+}
+
+
+/**
+ * show_fw_ver
+ */
+static ssize_t show_fw_ver(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct nes_ib_device *nesibdev =
+ container_of(dev, struct nes_ib_device, ibdev.dev);
+ struct nes_vnic *nesvnic = nesibdev->nesvnic;
+
+ nes_debug(NES_DBG_INIT, "\n");
+ return sprintf(buf, "%x.%x.%x\n",
+ (int)(nesvnic->nesdev->nesadapter->fw_ver >> 32),
+ (int)(nesvnic->nesdev->nesadapter->fw_ver >> 16) & 0xffff,
+ (int)(nesvnic->nesdev->nesadapter->fw_ver & 0xffff));
+}
+
+
+/**
+ * show_hca
+ */
+static ssize_t show_hca(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ nes_debug(NES_DBG_INIT, "\n");
+ return sprintf(buf, "NES020\n");
+}
+
+
+/**
+ * show_board
+ */
+static ssize_t show_board(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ nes_debug(NES_DBG_INIT, "\n");
+ return sprintf(buf, "%.*s\n", 32, "NES020 Board ID");
+}
+
+
+static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
+static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
+static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
+static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
+
+static struct device_attribute *nes_dev_attributes[] = {
+ &dev_attr_hw_rev,
+ &dev_attr_fw_ver,
+ &dev_attr_hca_type,
+ &dev_attr_board_id
+};
+
+
+/**
+ * nes_query_qp
+ */
+static int nes_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_qp_init_attr *init_attr)
+{
+ struct nes_qp *nesqp = to_nesqp(ibqp);
+
+ nes_debug(NES_DBG_QP, "\n");
+
+ attr->qp_access_flags = 0;
+ attr->cap.max_send_wr = nesqp->hwqp.sq_size;
+ attr->cap.max_recv_wr = nesqp->hwqp.rq_size;
+ attr->cap.max_recv_sge = 1;
+ if (nes_drv_opt & NES_DRV_OPT_NO_INLINE_DATA) {
+ init_attr->cap.max_inline_data = 0;
+ } else {
+ init_attr->cap.max_inline_data = 64;
+ }
+
+ init_attr->event_handler = nesqp->ibqp.event_handler;
+ init_attr->qp_context = nesqp->ibqp.qp_context;
+ init_attr->send_cq = nesqp->ibqp.send_cq;
+ init_attr->recv_cq = nesqp->ibqp.recv_cq;
+ init_attr->srq = nesqp->ibqp.srq = nesqp->ibqp.srq;
+ init_attr->cap = attr->cap;
+
+ return 0;
+}
+
+
+/**
+ * nes_hw_modify_qp
+ */
+int nes_hw_modify_qp(struct nes_device *nesdev, struct nes_qp *nesqp,
+ u32 next_iwarp_state, u32 wait_completion)
+{
+ struct nes_hw_cqp_wqe *cqp_wqe;
+ /* struct iw_cm_id *cm_id = nesqp->cm_id; */
+ /* struct iw_cm_event cm_event; */
+ struct nes_cqp_request *cqp_request;
+ int ret;
+ u16 major_code;
+
+ nes_debug(NES_DBG_MOD_QP, "QP%u, refcount=%d\n",
+ nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount));
+
+ cqp_request = nes_get_cqp_request(nesdev);
+ if (cqp_request == NULL) {
+ nes_debug(NES_DBG_MOD_QP, "Failed to get a cqp_request.\n");
+ return -ENOMEM;
+ }
+ if (wait_completion) {
+ cqp_request->waiting = 1;
+ } else {
+ cqp_request->waiting = 0;
+ }
+ cqp_wqe = &cqp_request->cqp_wqe;
+
+ set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX,
+ NES_CQP_MODIFY_QP | NES_CQP_QP_TYPE_IWARP | next_iwarp_state);
+ nes_debug(NES_DBG_MOD_QP, "using next_iwarp_state=%08x, wqe_words=%08x\n",
+ next_iwarp_state, le32_to_cpu(cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX]));
+ nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
+ set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, nesqp->hwqp.qp_id);
+ set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, (u64)nesqp->nesqp_context_pbase);
+
+ atomic_set(&cqp_request->refcount, 2);
+ nes_post_cqp_request(nesdev, cqp_request);
+
+ /* Wait for CQP */
+ if (wait_completion) {
+ /* nes_debug(NES_DBG_MOD_QP, "Waiting for modify iWARP QP%u to complete.\n",
+ nesqp->hwqp.qp_id); */
+ ret = wait_event_timeout(cqp_request->waitq, (cqp_request->request_done != 0),
+ NES_EVENT_TIMEOUT);
+ nes_debug(NES_DBG_MOD_QP, "Modify iwarp QP%u completed, wait_event_timeout ret=%u, "
+ "CQP Major:Minor codes = 0x%04X:0x%04X.\n",
+ nesqp->hwqp.qp_id, ret, cqp_request->major_code, cqp_request->minor_code);
+ major_code = cqp_request->major_code;
+ if (major_code) {
+ nes_debug(NES_DBG_MOD_QP, "Modify iwarp QP%u failed"
+ "CQP Major:Minor codes = 0x%04X:0x%04X, intended next state = 0x%08X.\n",
+ nesqp->hwqp.qp_id, cqp_request->major_code,
+ cqp_request->minor_code, next_iwarp_state);
+ }
+
+ nes_put_cqp_request(nesdev, cqp_request);
+
+ if (!ret)
+ return -ETIME;
+ else if (major_code)
+ return -EIO;
+ else
+ return 0;
+ } else {
+ return 0;
+ }
+}
+
+
+/**
+ * nes_modify_qp
+ */
+int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata)
+{
+ struct nes_qp *nesqp = to_nesqp(ibqp);
+ struct nes_vnic *nesvnic = to_nesvnic(ibqp->device);
+ struct nes_device *nesdev = nesvnic->nesdev;
+ /* u32 cqp_head; */
+ /* u32 counter; */
+ u32 next_iwarp_state = 0;
+ int err;
+ unsigned long qplockflags;
+ int ret;
+ u16 original_last_aeq;
+ u8 issue_modify_qp = 0;
+ u8 issue_disconnect = 0;
+ u8 dont_wait = 0;
+
+ nes_debug(NES_DBG_MOD_QP, "QP%u: QP State=%u, cur QP State=%u,"
+ " iwarp_state=0x%X, refcount=%d\n",
+ nesqp->hwqp.qp_id, attr->qp_state, nesqp->ibqp_state,
+ nesqp->iwarp_state, atomic_read(&nesqp->refcount));
+
+ spin_lock_irqsave(&nesqp->lock, qplockflags);
+
+ nes_debug(NES_DBG_MOD_QP, "QP%u: hw_iwarp_state=0x%X, hw_tcp_state=0x%X,"
+ " QP Access Flags=0x%X, attr_mask = 0x%0x\n",
+ nesqp->hwqp.qp_id, nesqp->hw_iwarp_state,
+ nesqp->hw_tcp_state, attr->qp_access_flags, attr_mask);
+
+ if (attr_mask & IB_QP_STATE) {
+ switch (attr->qp_state) {
+ case IB_QPS_INIT:
+ nes_debug(NES_DBG_MOD_QP, "QP%u: new state = init\n",
+ nesqp->hwqp.qp_id);
+ if (nesqp->iwarp_state > (u32)NES_CQP_QP_IWARP_STATE_IDLE) {
+ spin_unlock_irqrestore(&nesqp->lock, qplockflags);
+ return -EINVAL;
+ }
+ next_iwarp_state = NES_CQP_QP_IWARP_STATE_IDLE;
+ issue_modify_qp = 1;
+ break;
+ case IB_QPS_RTR:
+ nes_debug(NES_DBG_MOD_QP, "QP%u: new state = rtr\n",
+ nesqp->hwqp.qp_id);
+ if (nesqp->iwarp_state>(u32)NES_CQP_QP_IWARP_STATE_IDLE) {
+ spin_unlock_irqrestore(&nesqp->lock, qplockflags);
+ return -EINVAL;
+ }
+ next_iwarp_state = NES_CQP_QP_IWARP_STATE_IDLE;
+ issue_modify_qp = 1;
+ break;
+ case IB_QPS_RTS:
+ nes_debug(NES_DBG_MOD_QP, "QP%u: new state = rts\n",
+ nesqp->hwqp.qp_id);
+ if (nesqp->iwarp_state>(u32)NES_CQP_QP_IWARP_STATE_RTS) {
+ spin_unlock_irqrestore(&nesqp->lock, qplockflags);
+ return -EINVAL;
+ }
+ if (nesqp->cm_id == NULL) {
+ nes_debug(NES_DBG_MOD_QP, "QP%u: Failing attempt to move QP to RTS without a CM_ID. \n",
+ nesqp->hwqp.qp_id );
+ spin_unlock_irqrestore(&nesqp->lock, qplockflags);
+ return -EINVAL;
+ }
+ next_iwarp_state = NES_CQP_QP_IWARP_STATE_RTS;
+ if (nesqp->iwarp_state != NES_CQP_QP_IWARP_STATE_RTS)
+ next_iwarp_state |= NES_CQP_QP_CONTEXT_VALID |
+ NES_CQP_QP_ARP_VALID | NES_CQP_QP_ORD_VALID;
+ issue_modify_qp = 1;
+ nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_ESTABLISHED;
+ nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_RTS;
+ nesqp->hte_added = 1;
+ break;
+ case IB_QPS_SQD:
+ issue_modify_qp = 1;
+ nes_debug(NES_DBG_MOD_QP, "QP%u: new state=closing. SQ head=%u, SQ tail=%u\n",
+ nesqp->hwqp.qp_id, nesqp->hwqp.sq_head, nesqp->hwqp.sq_tail);
+ if (nesqp->iwarp_state == (u32)NES_CQP_QP_IWARP_STATE_CLOSING) {
+ spin_unlock_irqrestore(&nesqp->lock, qplockflags);
+ return 0;
+ } else {
+ if (nesqp->iwarp_state > (u32)NES_CQP_QP_IWARP_STATE_CLOSING) {
+ nes_debug(NES_DBG_MOD_QP, "QP%u: State change to closing"
+ " ignored due to current iWARP state\n",
+ nesqp->hwqp.qp_id);
+ spin_unlock_irqrestore(&nesqp->lock, qplockflags);
+ return -EINVAL;
+ }
+ if (nesqp->hw_iwarp_state != NES_AEQE_IWARP_STATE_RTS) {
+ nes_debug(NES_DBG_MOD_QP, "QP%u: State change to closing"
+ " already done based on hw state.\n",
+ nesqp->hwqp.qp_id);
+ issue_modify_qp = 0;
+ nesqp->in_disconnect = 0;
+ }
+ switch (nesqp->hw_iwarp_state) {
+ case NES_AEQE_IWARP_STATE_CLOSING:
+ next_iwarp_state = NES_CQP_QP_IWARP_STATE_CLOSING;
+ case NES_AEQE_IWARP_STATE_TERMINATE:
+ next_iwarp_state = NES_CQP_QP_IWARP_STATE_TERMINATE;
+ break;
+ case NES_AEQE_IWARP_STATE_ERROR:
+ next_iwarp_state = NES_CQP_QP_IWARP_STATE_ERROR;
+ break;
+ default:
+ next_iwarp_state = NES_CQP_QP_IWARP_STATE_CLOSING;
+ nesqp->in_disconnect = 1;
+ nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_CLOSING;
+ break;
+ }
+ }
+ break;
+ case IB_QPS_SQE:
+ nes_debug(NES_DBG_MOD_QP, "QP%u: new state = terminate\n",
+ nesqp->hwqp.qp_id);
+ if (nesqp->iwarp_state>=(u32)NES_CQP_QP_IWARP_STATE_TERMINATE) {
+ spin_unlock_irqrestore(&nesqp->lock, qplockflags);
+ return -EINVAL;
+ }
+ /* next_iwarp_state = (NES_CQP_QP_IWARP_STATE_TERMINATE | 0x02000000); */
+ next_iwarp_state = NES_CQP_QP_IWARP_STATE_TERMINATE;
+ nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_TERMINATE;
+ issue_modify_qp = 1;
+ nesqp->in_disconnect = 1;
+ break;
+ case IB_QPS_ERR:
+ case IB_QPS_RESET:
+ if (nesqp->iwarp_state == (u32)NES_CQP_QP_IWARP_STATE_ERROR) {
+ spin_unlock_irqrestore(&nesqp->lock, qplockflags);
+ return -EINVAL;
+ }
+ nes_debug(NES_DBG_MOD_QP, "QP%u: new state = error\n",
+ nesqp->hwqp.qp_id);
+ next_iwarp_state = NES_CQP_QP_IWARP_STATE_ERROR;
+ /* next_iwarp_state = (NES_CQP_QP_IWARP_STATE_TERMINATE | 0x02000000); */
+ if (nesqp->hte_added) {
+ nes_debug(NES_DBG_MOD_QP, "set CQP_QP_DEL_HTE\n");
+ next_iwarp_state |= NES_CQP_QP_DEL_HTE;
+ nesqp->hte_added = 0;
+ }
+ if ((nesqp->hw_tcp_state > NES_AEQE_TCP_STATE_CLOSED) &&
+ (nesqp->hw_tcp_state != NES_AEQE_TCP_STATE_TIME_WAIT)) {
+ next_iwarp_state |= NES_CQP_QP_RESET;
+ nesqp->in_disconnect = 1;
+ } else {
+ nes_debug(NES_DBG_MOD_QP, "QP%u NOT setting NES_CQP_QP_RESET since TCP state = %u\n",
+ nesqp->hwqp.qp_id, nesqp->hw_tcp_state);
+ dont_wait = 1;
+ }
+ issue_modify_qp = 1;
+ nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_ERROR;
+ break;
+ default:
+ spin_unlock_irqrestore(&nesqp->lock, qplockflags);
+ return -EINVAL;
+ break;
+ }
+
+ nesqp->ibqp_state = attr->qp_state;
+ if (((nesqp->iwarp_state & NES_CQP_QP_IWARP_STATE_MASK) ==
+ (u32)NES_CQP_QP_IWARP_STATE_RTS) &&
+ ((next_iwarp_state & NES_CQP_QP_IWARP_STATE_MASK) >
+ (u32)NES_CQP_QP_IWARP_STATE_RTS)) {
+ nesqp->iwarp_state = next_iwarp_state & NES_CQP_QP_IWARP_STATE_MASK;
+ nes_debug(NES_DBG_MOD_QP, "Change nesqp->iwarp_state=%08x\n",
+ nesqp->iwarp_state);
+ issue_disconnect = 1;
+ } else {
+ nesqp->iwarp_state = next_iwarp_state & NES_CQP_QP_IWARP_STATE_MASK;
+ nes_debug(NES_DBG_MOD_QP, "Change nesqp->iwarp_state=%08x\n",
+ nesqp->iwarp_state);
+ }
+ }
+
+ if (attr_mask & IB_QP_ACCESS_FLAGS) {
+ if (attr->qp_access_flags & IB_ACCESS_LOCAL_WRITE) {
+ nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_RDMA_WRITE_EN |
+ NES_QPCONTEXT_MISC_RDMA_READ_EN);
+ issue_modify_qp = 1;
+ }
+ if (attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE) {
+ nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_RDMA_WRITE_EN);
+ issue_modify_qp = 1;
+ }
+ if (attr->qp_access_flags & IB_ACCESS_REMOTE_READ) {
+ nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_RDMA_READ_EN);
+ issue_modify_qp = 1;
+ }
+ if (attr->qp_access_flags & IB_ACCESS_MW_BIND) {
+ nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_WBIND_EN);
+ issue_modify_qp = 1;
+ }
+
+ if (nesqp->user_mode) {
+ nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_RDMA_WRITE_EN |
+ NES_QPCONTEXT_MISC_RDMA_READ_EN);
+ issue_modify_qp = 1;
+ }
+ }
+
+ original_last_aeq = nesqp->last_aeq;
+ spin_unlock_irqrestore(&nesqp->lock, qplockflags);
+
+ nes_debug(NES_DBG_MOD_QP, "issue_modify_qp=%u\n", issue_modify_qp);
+
+ ret = 0;
+
+
+ if (issue_modify_qp) {
+ nes_debug(NES_DBG_MOD_QP, "call nes_hw_modify_qp\n");
+ ret = nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 1);
+ if (ret)
+ nes_debug(NES_DBG_MOD_QP, "nes_hw_modify_qp (next_iwarp_state = 0x%08X)"
+ " failed for QP%u.\n",
+ next_iwarp_state, nesqp->hwqp.qp_id);
+
+ }
+
+ if ((issue_modify_qp) && (nesqp->ibqp_state > IB_QPS_RTS)) {
+ nes_debug(NES_DBG_MOD_QP, "QP%u Issued ModifyQP refcount (%d),"
+ " original_last_aeq = 0x%04X. last_aeq = 0x%04X.\n",
+ nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount),
+ original_last_aeq, nesqp->last_aeq);
+ if ((!ret) ||
+ ((original_last_aeq != NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE) &&
+ (ret))) {
+ if (dont_wait) {
+ if (nesqp->cm_id && nesqp->hw_tcp_state != 0) {
+ nes_debug(NES_DBG_MOD_QP, "QP%u Queuing fake disconnect for QP refcount (%d),"
+ " original_last_aeq = 0x%04X. last_aeq = 0x%04X.\n",
+ nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount),
+ original_last_aeq, nesqp->last_aeq);
+ /* this one is for the cm_disconnect thread */
+ spin_lock_irqsave(&nesqp->lock, qplockflags);
+ nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED;
+ nesqp->last_aeq = NES_AEQE_AEID_RESET_SENT;
+ spin_unlock_irqrestore(&nesqp->lock, qplockflags);
+ nes_cm_disconn(nesqp);
+ } else {
+ nes_debug(NES_DBG_MOD_QP, "QP%u No fake disconnect, QP refcount=%d\n",
+ nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount));
+ }
+ } else {
+ spin_lock_irqsave(&nesqp->lock, qplockflags);
+ if (nesqp->cm_id) {
+ /* These two are for the timer thread */
+ if (atomic_inc_return(&nesqp->close_timer_started) == 1) {
+ nesqp->cm_id->add_ref(nesqp->cm_id);
+ nes_debug(NES_DBG_MOD_QP, "QP%u Not decrementing QP refcount (%d),"
+ " need ae to finish up, original_last_aeq = 0x%04X."
+ " last_aeq = 0x%04X, scheduling timer.\n",
+ nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount),
+ original_last_aeq, nesqp->last_aeq);
+ schedule_nes_timer(nesqp->cm_node, (struct sk_buff *) nesqp, NES_TIMER_TYPE_CLOSE, 1, 0);
+ }
+ spin_unlock_irqrestore(&nesqp->lock, qplockflags);
+ } else {
+ spin_unlock_irqrestore(&nesqp->lock, qplockflags);
+ nes_debug(NES_DBG_MOD_QP, "QP%u Not decrementing QP refcount (%d),"
+ " need ae to finish up, original_last_aeq = 0x%04X."
+ " last_aeq = 0x%04X.\n",
+ nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount),
+ original_last_aeq, nesqp->last_aeq);
+ }
+ }
+ } else {
+ nes_debug(NES_DBG_MOD_QP, "QP%u Decrementing QP refcount (%d), No ae to finish up,"
+ " original_last_aeq = 0x%04X. last_aeq = 0x%04X.\n",
+ nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount),
+ original_last_aeq, nesqp->last_aeq);
+ }
+ } else {
+ nes_debug(NES_DBG_MOD_QP, "QP%u Decrementing QP refcount (%d), No ae to finish up,"
+ " original_last_aeq = 0x%04X. last_aeq = 0x%04X.\n",
+ nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount),
+ original_last_aeq, nesqp->last_aeq);
+ }
+
+ err = 0;
+
+ nes_debug(NES_DBG_MOD_QP, "QP%u Leaving, refcount=%d\n",
+ nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount));
+
+ return err;
+}
+
+
+/**
+ * nes_muticast_attach
+ */
+static int nes_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
+{
+ nes_debug(NES_DBG_INIT, "\n");
+ return -ENOSYS;
+}
+
+
+/**
+ * nes_multicast_detach
+ */
+static int nes_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
+{
+ nes_debug(NES_DBG_INIT, "\n");
+ return -ENOSYS;
+}
+
+
+/**
+ * nes_process_mad
+ */
+static int nes_process_mad(struct ib_device *ibdev, int mad_flags,
+ u8 port_num, struct ib_wc *in_wc, struct ib_grh *in_grh,
+ struct ib_mad *in_mad, struct ib_mad *out_mad)
+{
+ nes_debug(NES_DBG_INIT, "\n");
+ return -ENOSYS;
+}
+
+static inline void
+fill_wqe_sg_send(struct nes_hw_qp_wqe *wqe, struct ib_send_wr *ib_wr, u32 uselkey)
+{
+ int sge_index;
+ int total_payload_length = 0;
+ for (sge_index = 0; sge_index < ib_wr->num_sge; sge_index++) {
+ set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_FRAG0_LOW_IDX+(sge_index*4),
+ ib_wr->sg_list[sge_index].addr);
+ set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_LENGTH0_IDX + (sge_index*4),
+ ib_wr->sg_list[sge_index].length);
+ if (uselkey)
+ set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_STAG0_IDX + (sge_index*4),
+ (ib_wr->sg_list[sge_index].lkey));
+ else
+ set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_STAG0_IDX + (sge_index*4), 0);
+
+ total_payload_length += ib_wr->sg_list[sge_index].length;
+ }
+ nes_debug(NES_DBG_IW_TX, "UC UC UC, sending total_payload_length=%u \n",
+ total_payload_length);
+ set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX,
+ total_payload_length);
+}
+
+/**
+ * nes_post_send
+ */
+static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
+ struct ib_send_wr **bad_wr)
+{
+ u64 u64temp;
+ unsigned long flags = 0;
+ struct nes_vnic *nesvnic = to_nesvnic(ibqp->device);
+ struct nes_device *nesdev = nesvnic->nesdev;
+ struct nes_qp *nesqp = to_nesqp(ibqp);
+ struct nes_hw_qp_wqe *wqe;
+ int err;
+ u32 qsize = nesqp->hwqp.sq_size;
+ u32 head;
+ u32 wqe_misc;
+ u32 wqe_count;
+ u32 counter;
+ u32 total_payload_length;
+
+ err = 0;
+ wqe_misc = 0;
+ wqe_count = 0;
+ total_payload_length = 0;
+
+ if (nesqp->ibqp_state > IB_QPS_RTS)
+ return -EINVAL;
+
+ spin_lock_irqsave(&nesqp->lock, flags);
+
+ head = nesqp->hwqp.sq_head;
+
+ while (ib_wr) {
+ /* Check for SQ overflow */
+ if (((head + (2 * qsize) - nesqp->hwqp.sq_tail) % qsize) == (qsize - 1)) {
+ err = -EINVAL;
+ break;
+ }
+
+ wqe = &nesqp->hwqp.sq_vbase[head];
+ /* nes_debug(NES_DBG_IW_TX, "processing sq wqe for QP%u at %p, head = %u.\n",
+ nesqp->hwqp.qp_id, wqe, head); */
+ nes_fill_init_qp_wqe(wqe, nesqp, head);
+ u64temp = (u64)(ib_wr->wr_id);
+ set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_SCRATCH_LOW_IDX,
+ u64temp);
+ switch (ib_wr->opcode) {
+ case IB_WR_SEND:
+ if (ib_wr->send_flags & IB_SEND_SOLICITED) {
+ wqe_misc = NES_IWARP_SQ_OP_SENDSE;
+ } else {
+ wqe_misc = NES_IWARP_SQ_OP_SEND;
+ }
+ if (ib_wr->num_sge > nesdev->nesadapter->max_sge) {
+ err = -EINVAL;
+ break;
+ }
+ if (ib_wr->send_flags & IB_SEND_FENCE) {
+ wqe_misc |= NES_IWARP_SQ_WQE_LOCAL_FENCE;
+ }
+ if ((ib_wr->send_flags & IB_SEND_INLINE) &&
+ ((nes_drv_opt & NES_DRV_OPT_NO_INLINE_DATA) == 0) &&
+ (ib_wr->sg_list[0].length <= 64)) {
+ memcpy(&wqe->wqe_words[NES_IWARP_SQ_WQE_IMM_DATA_START_IDX],
+ (void *)(unsigned long)ib_wr->sg_list[0].addr, ib_wr->sg_list[0].length);
+ set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX,
+ ib_wr->sg_list[0].length);
+ wqe_misc |= NES_IWARP_SQ_WQE_IMM_DATA;
+ } else {
+ fill_wqe_sg_send(wqe, ib_wr, 1);
+ }
+
+ break;
+ case IB_WR_RDMA_WRITE:
+ wqe_misc = NES_IWARP_SQ_OP_RDMAW;
+ if (ib_wr->num_sge > nesdev->nesadapter->max_sge) {
+ nes_debug(NES_DBG_IW_TX, "Exceeded max sge, ib_wr=%u, max=%u\n",
+ ib_wr->num_sge,
+ nesdev->nesadapter->max_sge);
+ err = -EINVAL;
+ break;
+ }
+ if (ib_wr->send_flags & IB_SEND_FENCE) {
+ wqe_misc |= NES_IWARP_SQ_WQE_LOCAL_FENCE;
+ }
+
+ set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_STAG_IDX,
+ ib_wr->wr.rdma.rkey);
+ set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_TO_LOW_IDX,
+ ib_wr->wr.rdma.remote_addr);
+
+ if ((ib_wr->send_flags & IB_SEND_INLINE) &&
+ ((nes_drv_opt & NES_DRV_OPT_NO_INLINE_DATA) == 0) &&
+ (ib_wr->sg_list[0].length <= 64)) {
+ memcpy(&wqe->wqe_words[NES_IWARP_SQ_WQE_IMM_DATA_START_IDX],
+ (void *)(unsigned long)ib_wr->sg_list[0].addr, ib_wr->sg_list[0].length);
+ set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX,
+ ib_wr->sg_list[0].length);
+ wqe_misc |= NES_IWARP_SQ_WQE_IMM_DATA;
+ } else {
+ fill_wqe_sg_send(wqe, ib_wr, 1);
+ }
+ wqe->wqe_words[NES_IWARP_SQ_WQE_RDMA_LENGTH_IDX] =
+ wqe->wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX];
+ break;
+ case IB_WR_RDMA_READ:
+ /* iWARP only supports 1 sge for RDMA reads */
+ if (ib_wr->num_sge > 1) {
+ nes_debug(NES_DBG_IW_TX, "Exceeded max sge, ib_wr=%u, max=1\n",
+ ib_wr->num_sge);
+ err = -EINVAL;
+ break;
+ }
+ wqe_misc = NES_IWARP_SQ_OP_RDMAR;
+ set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_TO_LOW_IDX,
+ ib_wr->wr.rdma.remote_addr);
+ set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_STAG_IDX,
+ ib_wr->wr.rdma.rkey);
+ set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_LENGTH_IDX,
+ ib_wr->sg_list->length);
+ set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_FRAG0_LOW_IDX,
+ ib_wr->sg_list->addr);
+ set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_STAG0_IDX,
+ ib_wr->sg_list->lkey);
+ break;
+ default:
+ /* error */
+ err = -EINVAL;
+ break;
+ }
+
+ if (ib_wr->send_flags & IB_SEND_SIGNALED) {
+ wqe_misc |= NES_IWARP_SQ_WQE_SIGNALED_COMPL;
+ }
+ wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] = cpu_to_le32(wqe_misc);
+
+ ib_wr = ib_wr->next;
+ head++;
+ wqe_count++;
+ if (head >= qsize)
+ head = 0;
+
+ }
+
+ nesqp->hwqp.sq_head = head;
+ barrier();
+ while (wqe_count) {
+ counter = min(wqe_count, ((u32)255));
+ wqe_count -= counter;
+ nes_write32(nesdev->regs + NES_WQE_ALLOC,
+ (counter << 24) | 0x00800000 | nesqp->hwqp.qp_id);
+ }
+
+ spin_unlock_irqrestore(&nesqp->lock, flags);
+
+ if (err)
+ *bad_wr = ib_wr;
+ return err;
+}
+
+
+/**
+ * nes_post_recv
+ */
+static int nes_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr,
+ struct ib_recv_wr **bad_wr)
+{
+ u64 u64temp;
+ unsigned long flags = 0;
+ struct nes_vnic *nesvnic = to_nesvnic(ibqp->device);
+ struct nes_device *nesdev = nesvnic->nesdev;
+ struct nes_qp *nesqp = to_nesqp(ibqp);
+ struct nes_hw_qp_wqe *wqe;
+ int err = 0;
+ int sge_index;
+ u32 qsize = nesqp->hwqp.rq_size;
+ u32 head;
+ u32 wqe_count = 0;
+ u32 counter;
+ u32 total_payload_length;
+
+ if (nesqp->ibqp_state > IB_QPS_RTS)
+ return -EINVAL;
+
+ spin_lock_irqsave(&nesqp->lock, flags);
+
+ head = nesqp->hwqp.rq_head;
+
+ while (ib_wr) {
+ if (ib_wr->num_sge > nesdev->nesadapter->max_sge) {
+ err = -EINVAL;
+ break;
+ }
+ /* Check for RQ overflow */
+ if (((head + (2 * qsize) - nesqp->hwqp.rq_tail) % qsize) == (qsize - 1)) {
+ err = -EINVAL;
+ break;
+ }
+
+ nes_debug(NES_DBG_IW_RX, "ibwr sge count = %u.\n", ib_wr->num_sge);
+ wqe = &nesqp->hwqp.rq_vbase[head];
+
+ /* nes_debug(NES_DBG_IW_RX, "QP%u:processing rq wqe at %p, head = %u.\n",
+ nesqp->hwqp.qp_id, wqe, head); */
+ nes_fill_init_qp_wqe(wqe, nesqp, head);
+ u64temp = (u64)(ib_wr->wr_id);
+ set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_SCRATCH_LOW_IDX,
+ u64temp);
+ total_payload_length = 0;
+ for (sge_index=0; sge_index < ib_wr->num_sge; sge_index++) {
+ set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_RQ_WQE_FRAG0_LOW_IDX+(sge_index*4),
+ ib_wr->sg_list[sge_index].addr);
+ set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_RQ_WQE_LENGTH0_IDX+(sge_index*4),
+ ib_wr->sg_list[sge_index].length);
+ set_wqe_32bit_value(wqe->wqe_words,NES_IWARP_RQ_WQE_STAG0_IDX+(sge_index*4),
+ ib_wr->sg_list[sge_index].lkey);
+
+ total_payload_length += ib_wr->sg_list[sge_index].length;
+ }
+ set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_RQ_WQE_TOTAL_PAYLOAD_IDX,
+ total_payload_length);
+
+ ib_wr = ib_wr->next;
+ head++;
+ wqe_count++;
+ if (head >= qsize)
+ head = 0;
+ }
+
+ nesqp->hwqp.rq_head = head;
+ barrier();
+ while (wqe_count) {
+ counter = min(wqe_count, ((u32)255));
+ wqe_count -= counter;
+ nes_write32(nesdev->regs+NES_WQE_ALLOC, (counter<<24) | nesqp->hwqp.qp_id);
+ }
+
+ spin_unlock_irqrestore(&nesqp->lock, flags);
+
+ if (err)
+ *bad_wr = ib_wr;
+ return err;
+}
+
+
+/**
+ * nes_poll_cq
+ */
+static int nes_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
+{
+ u64 u64temp;
+ u64 wrid;
+ /* u64 u64temp; */
+ unsigned long flags = 0;
+ struct nes_vnic *nesvnic = to_nesvnic(ibcq->device);
+ struct nes_device *nesdev = nesvnic->nesdev;
+ struct nes_cq *nescq = to_nescq(ibcq);
+ struct nes_qp *nesqp;
+ struct nes_hw_cqe cqe;
+ u32 head;
+ u32 wq_tail;
+ u32 cq_size;
+ u32 cqe_count = 0;
+ u32 wqe_index;
+ u32 u32temp;
+ /* u32 counter; */
+
+ nes_debug(NES_DBG_CQ, "\n");
+
+ spin_lock_irqsave(&nescq->lock, flags);
+
+ head = nescq->hw_cq.cq_head;
+ cq_size = nescq->hw_cq.cq_size;
+
+ while (cqe_count < num_entries) {
+ if (le32_to_cpu(nescq->hw_cq.cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX]) &
+ NES_CQE_VALID) {
+ /*
+ * Make sure we read CQ entry contents *after*
+ * we've checked the valid bit.
+ */
+ rmb();
+
+ cqe = nescq->hw_cq.cq_vbase[head];
+ nescq->hw_cq.cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX] = 0;
+ u32temp = le32_to_cpu(cqe.cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX]);
+ wqe_index = u32temp &
+ (nesdev->nesadapter->max_qp_wr - 1);
+ u32temp &= ~(NES_SW_CONTEXT_ALIGN-1);
+ /* parse CQE, get completion context from WQE (either rq or sq */
+ u64temp = (((u64)(le32_to_cpu(cqe.cqe_words[NES_CQE_COMP_COMP_CTX_HIGH_IDX])))<<32) |
+ ((u64)u32temp);
+ nesqp = *((struct nes_qp **)&u64temp);
+ memset(entry, 0, sizeof *entry);
+ if (cqe.cqe_words[NES_CQE_ERROR_CODE_IDX] == 0) {
+ entry->status = IB_WC_SUCCESS;
+ } else {
+ entry->status = IB_WC_WR_FLUSH_ERR;
+ }
+
+ entry->qp = &nesqp->ibqp;
+ entry->src_qp = nesqp->hwqp.qp_id;
+
+ if (le32_to_cpu(cqe.cqe_words[NES_CQE_OPCODE_IDX]) & NES_CQE_SQ) {
+ if (nesqp->skip_lsmm) {
+ nesqp->skip_lsmm = 0;
+ wq_tail = nesqp->hwqp.sq_tail++;
+ }
+
+ /* Working on a SQ Completion*/
+ wq_tail = wqe_index;
+ nesqp->hwqp.sq_tail = (wqe_index+1)&(nesqp->hwqp.sq_size - 1);
+ wrid = (((u64)(cpu_to_le32((u32)nesqp->hwqp.sq_vbase[wq_tail].
+ wqe_words[NES_IWARP_SQ_WQE_COMP_SCRATCH_HIGH_IDX]))) << 32) |
+ ((u64)(cpu_to_le32((u32)nesqp->hwqp.sq_vbase[wq_tail].
+ wqe_words[NES_IWARP_SQ_WQE_COMP_SCRATCH_LOW_IDX])));
+ entry->byte_len = le32_to_cpu(nesqp->hwqp.sq_vbase[wq_tail].
+ wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX]);
+
+ switch (le32_to_cpu(nesqp->hwqp.sq_vbase[wq_tail].
+ wqe_words[NES_IWARP_SQ_WQE_MISC_IDX]) & 0x3f) {
+ case NES_IWARP_SQ_OP_RDMAW:
+ nes_debug(NES_DBG_CQ, "Operation = RDMA WRITE.\n");
+ entry->opcode = IB_WC_RDMA_WRITE;
+ break;
+ case NES_IWARP_SQ_OP_RDMAR:
+ nes_debug(NES_DBG_CQ, "Operation = RDMA READ.\n");
+ entry->opcode = IB_WC_RDMA_READ;
+ entry->byte_len = le32_to_cpu(nesqp->hwqp.sq_vbase[wq_tail].
+ wqe_words[NES_IWARP_SQ_WQE_RDMA_LENGTH_IDX]);
+ break;
+ case NES_IWARP_SQ_OP_SENDINV:
+ case NES_IWARP_SQ_OP_SENDSEINV:
+ case NES_IWARP_SQ_OP_SEND:
+ case NES_IWARP_SQ_OP_SENDSE:
+ nes_debug(NES_DBG_CQ, "Operation = Send.\n");
+ entry->opcode = IB_WC_SEND;
+ break;
+ }
+ } else {
+ /* Working on a RQ Completion*/
+ wq_tail = wqe_index;
+ nesqp->hwqp.rq_tail = (wqe_index+1)&(nesqp->hwqp.rq_size - 1);
+ entry->byte_len = le32_to_cpu(cqe.cqe_words[NES_CQE_PAYLOAD_LENGTH_IDX]);
+ wrid = ((u64)(le32_to_cpu(nesqp->hwqp.rq_vbase[wq_tail].wqe_words[NES_IWARP_RQ_WQE_COMP_SCRATCH_LOW_IDX]))) |
+ ((u64)(le32_to_cpu(nesqp->hwqp.rq_vbase[wq_tail].wqe_words[NES_IWARP_RQ_WQE_COMP_SCRATCH_HIGH_IDX]))<<32);
+ entry->opcode = IB_WC_RECV;
+ }
+ entry->wr_id = wrid;
+
+ if (++head >= cq_size)
+ head = 0;
+ cqe_count++;
+ nescq->polled_completions++;
+ if ((nescq->polled_completions > (cq_size / 2)) ||
+ (nescq->polled_completions == 255)) {
+ nes_debug(NES_DBG_CQ, "CQ%u Issuing CQE Allocate since more than half of cqes"
+ " are pending %u of %u.\n",
+ nescq->hw_cq.cq_number, nescq->polled_completions, cq_size);
+ nes_write32(nesdev->regs+NES_CQE_ALLOC,
+ nescq->hw_cq.cq_number | (nescq->polled_completions << 16));
+ nescq->polled_completions = 0;
+ }
+ entry++;
+ } else
+ break;
+ }
+
+ if (nescq->polled_completions) {
+ nes_write32(nesdev->regs+NES_CQE_ALLOC,
+ nescq->hw_cq.cq_number | (nescq->polled_completions << 16));
+ nescq->polled_completions = 0;
+ }
+
+ nescq->hw_cq.cq_head = head;
+ nes_debug(NES_DBG_CQ, "Reporting %u completions for CQ%u.\n",
+ cqe_count, nescq->hw_cq.cq_number);
+
+ spin_unlock_irqrestore(&nescq->lock, flags);
+
+ return cqe_count;
+}
+
+
+/**
+ * nes_req_notify_cq
+ */
+static int nes_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags)
+ {
+ struct nes_vnic *nesvnic = to_nesvnic(ibcq->device);
+ struct nes_device *nesdev = nesvnic->nesdev;
+ struct nes_cq *nescq = to_nescq(ibcq);
+ u32 cq_arm;
+
+ nes_debug(NES_DBG_CQ, "Requesting notification for CQ%u.\n",
+ nescq->hw_cq.cq_number);
+
+ cq_arm = nescq->hw_cq.cq_number;
+ if ((notify_flags & IB_CQ_SOLICITED_MASK) == IB_CQ_NEXT_COMP)
+ cq_arm |= NES_CQE_ALLOC_NOTIFY_NEXT;
+ else if ((notify_flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED)
+ cq_arm |= NES_CQE_ALLOC_NOTIFY_SE;
+ else
+ return -EINVAL;
+
+ nes_write32(nesdev->regs+NES_CQE_ALLOC, cq_arm);
+ nes_read32(nesdev->regs+NES_CQE_ALLOC);
+
+ return 0;
+}
+
+
+/**
+ * nes_init_ofa_device
+ */
+struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev)
+{
+ struct nes_ib_device *nesibdev;
+ struct nes_vnic *nesvnic = netdev_priv(netdev);
+ struct nes_device *nesdev = nesvnic->nesdev;
+
+ nesibdev = (struct nes_ib_device *)ib_alloc_device(sizeof(struct nes_ib_device));
+ if (nesibdev == NULL) {
+ return NULL;
+ }
+ strlcpy(nesibdev->ibdev.name, "nes%d", IB_DEVICE_NAME_MAX);
+ nesibdev->ibdev.owner = THIS_MODULE;
+
+ nesibdev->ibdev.node_type = RDMA_NODE_RNIC;
+ memset(&nesibdev->ibdev.node_guid, 0, sizeof(nesibdev->ibdev.node_guid));
+ memcpy(&nesibdev->ibdev.node_guid, netdev->dev_addr, 6);
+
+ nesibdev->ibdev.uverbs_cmd_mask =
+ (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
+ (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
+ (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
+ (1ull << IB_USER_VERBS_CMD_REG_MR) |
+ (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_AH) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_AH) |
+ (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
+ (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
+ (1ull << IB_USER_VERBS_CMD_POLL_CQ) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
+ (1ull << IB_USER_VERBS_CMD_ALLOC_MW) |
+ (1ull << IB_USER_VERBS_CMD_BIND_MW) |
+ (1ull << IB_USER_VERBS_CMD_DEALLOC_MW) |
+ (1ull << IB_USER_VERBS_CMD_POST_RECV) |
+ (1ull << IB_USER_VERBS_CMD_POST_SEND);
+
+ nesibdev->ibdev.phys_port_cnt = 1;
+ nesibdev->ibdev.num_comp_vectors = 1;
+ nesibdev->ibdev.dma_device = &nesdev->pcidev->dev;
+ nesibdev->ibdev.dev.parent = &nesdev->pcidev->dev;
+ nesibdev->ibdev.query_device = nes_query_device;
+ nesibdev->ibdev.query_port = nes_query_port;
+ nesibdev->ibdev.modify_port = nes_modify_port;
+ nesibdev->ibdev.query_pkey = nes_query_pkey;
+ nesibdev->ibdev.query_gid = nes_query_gid;
+ nesibdev->ibdev.alloc_ucontext = nes_alloc_ucontext;
+ nesibdev->ibdev.dealloc_ucontext = nes_dealloc_ucontext;
+ nesibdev->ibdev.mmap = nes_mmap;
+ nesibdev->ibdev.alloc_pd = nes_alloc_pd;
+ nesibdev->ibdev.dealloc_pd = nes_dealloc_pd;
+ nesibdev->ibdev.create_ah = nes_create_ah;
+ nesibdev->ibdev.destroy_ah = nes_destroy_ah;
+ nesibdev->ibdev.create_qp = nes_create_qp;
+ nesibdev->ibdev.modify_qp = nes_modify_qp;
+ nesibdev->ibdev.query_qp = nes_query_qp;
+ nesibdev->ibdev.destroy_qp = nes_destroy_qp;
+ nesibdev->ibdev.create_cq = nes_create_cq;
+ nesibdev->ibdev.destroy_cq = nes_destroy_cq;
+ nesibdev->ibdev.poll_cq = nes_poll_cq;
+ nesibdev->ibdev.get_dma_mr = nes_get_dma_mr;
+ nesibdev->ibdev.reg_phys_mr = nes_reg_phys_mr;
+ nesibdev->ibdev.reg_user_mr = nes_reg_user_mr;
+ nesibdev->ibdev.dereg_mr = nes_dereg_mr;
+ nesibdev->ibdev.alloc_mw = nes_alloc_mw;
+ nesibdev->ibdev.dealloc_mw = nes_dealloc_mw;
+ nesibdev->ibdev.bind_mw = nes_bind_mw;
+
+ nesibdev->ibdev.alloc_fmr = nes_alloc_fmr;
+ nesibdev->ibdev.unmap_fmr = nes_unmap_fmr;
+ nesibdev->ibdev.dealloc_fmr = nes_dealloc_fmr;
+ nesibdev->ibdev.map_phys_fmr = nes_map_phys_fmr;
+
+ nesibdev->ibdev.attach_mcast = nes_multicast_attach;
+ nesibdev->ibdev.detach_mcast = nes_multicast_detach;
+ nesibdev->ibdev.process_mad = nes_process_mad;
+
+ nesibdev->ibdev.req_notify_cq = nes_req_notify_cq;
+ nesibdev->ibdev.post_send = nes_post_send;
+ nesibdev->ibdev.post_recv = nes_post_recv;
+
+ nesibdev->ibdev.iwcm = kzalloc(sizeof(*nesibdev->ibdev.iwcm), GFP_KERNEL);
+ if (nesibdev->ibdev.iwcm == NULL) {
+ ib_dealloc_device(&nesibdev->ibdev);
+ return NULL;
+ }
+ nesibdev->ibdev.iwcm->add_ref = nes_add_ref;
+ nesibdev->ibdev.iwcm->rem_ref = nes_rem_ref;
+ nesibdev->ibdev.iwcm->get_qp = nes_get_qp;
+ nesibdev->ibdev.iwcm->connect = nes_connect;
+ nesibdev->ibdev.iwcm->accept = nes_accept;
+ nesibdev->ibdev.iwcm->reject = nes_reject;
+ nesibdev->ibdev.iwcm->create_listen = nes_create_listen;
+ nesibdev->ibdev.iwcm->destroy_listen = nes_destroy_listen;
+
+ return nesibdev;
+}
+
+
+/**
+ * nes_destroy_ofa_device
+ */
+void nes_destroy_ofa_device(struct nes_ib_device *nesibdev)
+{
+ if (nesibdev == NULL)
+ return;
+
+ nes_unregister_ofa_device(nesibdev);
+
+ kfree(nesibdev->ibdev.iwcm);
+ ib_dealloc_device(&nesibdev->ibdev);
+}
+
+
+/**
+ * nes_register_ofa_device
+ */
+int nes_register_ofa_device(struct nes_ib_device *nesibdev)
+{
+ struct nes_vnic *nesvnic = nesibdev->nesvnic;
+ struct nes_device *nesdev = nesvnic->nesdev;
+ struct nes_adapter *nesadapter = nesdev->nesadapter;
+ int i, ret;
+
+ ret = ib_register_device(&nesvnic->nesibdev->ibdev);
+ if (ret) {
+ return ret;
+ }
+
+ /* Get the resources allocated to this device */
+ nesibdev->max_cq = (nesadapter->max_cq-NES_FIRST_QPN) / nesadapter->port_count;
+ nesibdev->max_mr = nesadapter->max_mr / nesadapter->port_count;
+ nesibdev->max_qp = (nesadapter->max_qp-NES_FIRST_QPN) / nesadapter->port_count;
+ nesibdev->max_pd = nesadapter->max_pd / nesadapter->port_count;
+
+ for (i = 0; i < ARRAY_SIZE(nes_dev_attributes); ++i) {
+ ret = device_create_file(&nesibdev->ibdev.dev, nes_dev_attributes[i]);
+ if (ret) {
+ while (i > 0) {
+ i--;
+ device_remove_file(&nesibdev->ibdev.dev,
+ nes_dev_attributes[i]);
+ }
+ ib_unregister_device(&nesibdev->ibdev);
+ return ret;
+ }
+ }
+
+ nesvnic->of_device_registered = 1;
+
+ return 0;
+}
+
+
+/**
+ * nes_unregister_ofa_device
+ */
+static void nes_unregister_ofa_device(struct nes_ib_device *nesibdev)
+{
+ struct nes_vnic *nesvnic = nesibdev->nesvnic;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(nes_dev_attributes); ++i) {
+ device_remove_file(&nesibdev->ibdev.dev, nes_dev_attributes[i]);
+ }
+
+ if (nesvnic->of_device_registered) {
+ ib_unregister_device(&nesibdev->ibdev);
+ }
+
+ nesvnic->of_device_registered = 0;
+}
diff --git a/drivers/infiniband/hw/nes/nes_verbs.h b/drivers/infiniband/hw/nes/nes_verbs.h
new file mode 100644
index 0000000..ae0ca9b
--- /dev/null
+++ b/drivers/infiniband/hw/nes/nes_verbs.h
@@ -0,0 +1,170 @@
+/*
+ * Copyright (c) 2006 - 2008 NetEffect, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef NES_VERBS_H
+#define NES_VERBS_H
+
+struct nes_device;
+
+#define NES_MAX_USER_DB_REGIONS 4096
+#define NES_MAX_USER_WQ_REGIONS 4096
+
+struct nes_ucontext {
+ struct ib_ucontext ibucontext;
+ struct nes_device *nesdev;
+ unsigned long mmap_wq_offset;
+ unsigned long mmap_cq_offset; /* to be removed */
+ int index; /* rnic index (minor) */
+ unsigned long allocated_doorbells[BITS_TO_LONGS(NES_MAX_USER_DB_REGIONS)];
+ u16 mmap_db_index[NES_MAX_USER_DB_REGIONS];
+ u16 first_free_db;
+ unsigned long allocated_wqs[BITS_TO_LONGS(NES_MAX_USER_WQ_REGIONS)];
+ struct nes_qp *mmap_nesqp[NES_MAX_USER_WQ_REGIONS];
+ u16 first_free_wq;
+ struct list_head cq_reg_mem_list;
+ struct list_head qp_reg_mem_list;
+ u32 mcrqf;
+ atomic_t usecnt;
+};
+
+struct nes_pd {
+ struct ib_pd ibpd;
+ u16 pd_id;
+ atomic_t sqp_count;
+ u16 mmap_db_index;
+};
+
+struct nes_mr {
+ union {
+ struct ib_mr ibmr;
+ struct ib_mw ibmw;
+ struct ib_fmr ibfmr;
+ };
+ struct ib_umem *region;
+ u16 pbls_used;
+ u8 mode;
+ u8 pbl_4k;
+};
+
+struct nes_hw_pb {
+ __le32 pa_low;
+ __le32 pa_high;
+};
+
+struct nes_vpbl {
+ dma_addr_t pbl_pbase;
+ struct nes_hw_pb *pbl_vbase;
+};
+
+struct nes_root_vpbl {
+ dma_addr_t pbl_pbase;
+ struct nes_hw_pb *pbl_vbase;
+ struct nes_vpbl *leaf_vpbl;
+};
+
+struct nes_fmr {
+ struct nes_mr nesmr;
+ u32 leaf_pbl_cnt;
+ struct nes_root_vpbl root_vpbl;
+ struct ib_qp *ib_qp;
+ int access_rights;
+ struct ib_fmr_attr attr;
+};
+
+struct nes_av;
+
+struct nes_cq {
+ struct ib_cq ibcq;
+ struct nes_hw_cq hw_cq;
+ u32 polled_completions;
+ u32 cq_mem_size;
+ spinlock_t lock;
+ u8 virtual_cq;
+ u8 pad[3];
+};
+
+struct nes_wq {
+ spinlock_t lock;
+};
+
+struct iw_cm_id;
+struct ietf_mpa_frame;
+
+struct nes_qp {
+ struct ib_qp ibqp;
+ void *allocated_buffer;
+ struct iw_cm_id *cm_id;
+ struct workqueue_struct *wq;
+ struct work_struct disconn_work;
+ struct nes_cq *nesscq;
+ struct nes_cq *nesrcq;
+ struct nes_pd *nespd;
+ void *cm_node; /* handle of the node this QP is associated with */
+ struct ietf_mpa_frame *ietf_frame;
+ dma_addr_t ietf_frame_pbase;
+ wait_queue_head_t state_waitq;
+ struct ib_mr *lsmm_mr;
+ unsigned long socket;
+ struct nes_hw_qp hwqp;
+ struct work_struct work;
+ struct work_struct ae_work;
+ enum ib_qp_state ibqp_state;
+ u32 iwarp_state;
+ u32 hte_index;
+ u32 last_aeq;
+ u32 qp_mem_size;
+ atomic_t refcount;
+ atomic_t close_timer_started;
+ u32 mmap_sq_db_index;
+ u32 mmap_rq_db_index;
+ spinlock_t lock;
+ struct nes_qp_context *nesqp_context;
+ dma_addr_t nesqp_context_pbase;
+ void *pbl_vbase;
+ dma_addr_t pbl_pbase;
+ struct page *page;
+ wait_queue_head_t kick_waitq;
+ u16 in_disconnect;
+ u16 private_data_len;
+ u8 active_conn;
+ u8 skip_lsmm;
+ u8 user_mode;
+ u8 hte_added;
+ u8 hw_iwarp_state;
+ u8 flush_issued;
+ u8 hw_tcp_state;
+ u8 disconn_pending;
+ u8 destroyed;
+};
+#endif /* NES_VERBS_H */
OpenPOWER on IntegriCloud