diff options
author | hselasky <hselasky@FreeBSD.org> | 2014-09-23 12:37:01 +0000 |
---|---|---|
committer | hselasky <hselasky@FreeBSD.org> | 2014-09-23 12:37:01 +0000 |
commit | 512a43f91c2230c87d24426072957ff31f5ae88e (patch) | |
tree | 5d34fd35d43dfcaa4ecfece82f628789398a10d1 /sys/ofed/drivers | |
parent | 6ca5e81a1632f8d4048ee0028c9081129e51d3bc (diff) | |
download | FreeBSD-src-512a43f91c2230c87d24426072957ff31f5ae88e.zip FreeBSD-src-512a43f91c2230c87d24426072957ff31f5ae88e.tar.gz |
Hardware driver update from Mellanox Technologies, including:
- improved performance
- better stability
- new features
- bugfixes
Supported HCAs:
- ConnectX-2
- ConnectX-3
- ConnectX-3 Pro
Sponsored by: Mellanox Technologies
MFC after: 1 week
Diffstat (limited to 'sys/ofed/drivers')
42 files changed, 8600 insertions, 3312 deletions
diff --git a/sys/ofed/drivers/infiniband/hw/mlx4/mad.c b/sys/ofed/drivers/infiniband/hw/mlx4/mad.c index b35cf1c..74bbf5c 100644 --- a/sys/ofed/drivers/infiniband/hw/mlx4/mad.c +++ b/sys/ofed/drivers/infiniband/hw/mlx4/mad.c @@ -1081,7 +1081,7 @@ static void handle_lid_change_event(struct mlx4_ib_dev *dev, u8 port_num) if (mlx4_is_master(dev->dev) && !dev->sriov.is_going_down) mlx4_gen_slaves_port_mgt_ev(dev->dev, port_num, - MLX4_EQ_PORT_INFO_LID_CHANGE_MASK); + MLX4_EQ_PORT_INFO_LID_CHANGE_MASK, 0, 0); } static void handle_client_rereg_event(struct mlx4_ib_dev *dev, u8 port_num) @@ -1093,7 +1093,7 @@ static void handle_client_rereg_event(struct mlx4_ib_dev *dev, u8 port_num) if (!dev->sriov.is_going_down) { mlx4_ib_mcg_port_cleanup(&dev->sriov.demux[port_num - 1], 0); mlx4_gen_slaves_port_mgt_ev(dev->dev, port_num, - MLX4_EQ_PORT_INFO_CLIENT_REREG_MASK); + MLX4_EQ_PORT_INFO_CLIENT_REREG_MASK, 0, 0); } } mlx4_ib_dispatch_event(dev, port_num, IB_EVENT_CLIENT_REREGISTER); @@ -1191,7 +1191,7 @@ void handle_port_mgmt_change_event(struct work_struct *work) /*if master, notify all slaves*/ if (mlx4_is_master(dev->dev)) mlx4_gen_slaves_port_mgt_ev(dev->dev, port, - MLX4_EQ_PORT_INFO_GID_PFX_CHANGE_MASK); + MLX4_EQ_PORT_INFO_GID_PFX_CHANGE_MASK, 0, 0); } if (changed_attr & MLX4_EQ_PORT_INFO_CLIENT_REREG_MASK) diff --git a/sys/ofed/drivers/infiniband/hw/mlx4/main.c b/sys/ofed/drivers/infiniband/hw/mlx4/main.c index 1e72826..343b193 100644 --- a/sys/ofed/drivers/infiniband/hw/mlx4/main.c +++ b/sys/ofed/drivers/infiniband/hw/mlx4/main.c @@ -1005,7 +1005,7 @@ static int flow_spec_to_net_rule(struct ib_device *dev, struct ib_flow_spec *flo case IB_FLOW_IB_UC: spec_l2->id = MLX4_NET_TRANS_RULE_ID_IB; if(flow_spec->l2_id.ib_uc.qpn) { - spec_l2->ib.r_u_qpn = cpu_to_be32(flow_spec->l2_id.ib_uc.qpn); + spec_l2->ib.l3_qpn = cpu_to_be32(flow_spec->l2_id.ib_uc.qpn); spec_l2->ib.qpn_msk = cpu_to_be32(0xffffff); } break; @@ -2013,7 +2013,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) for (i = 0; i < ibdev->num_ports; ++i) { if (mlx4_ib_port_link_layer(&ibdev->ib_dev, i + 1) == IB_LINK_LAYER_ETHERNET) { - err = mlx4_counter_alloc(ibdev->dev, &ibdev->counters[i]); + err = mlx4_counter_alloc(ibdev->dev, i + 1, &ibdev->counters[i]); if (err) ibdev->counters[i] = -1; } else @@ -2112,7 +2112,7 @@ err_steer_qp_release: err_counter: for (; i; --i) if (ibdev->counters[i - 1] != -1) - mlx4_counter_free(ibdev->dev, ibdev->counters[i - 1]); + mlx4_counter_free(ibdev->dev, i, ibdev->counters[i - 1]); err_map: iounmap(ibdev->priv_uar.map); @@ -2200,7 +2200,7 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr) iounmap(ibdev->priv_uar.map); for (p = 0; p < ibdev->num_ports; ++p) if (ibdev->counters[p] != -1) - mlx4_counter_free(ibdev->dev, ibdev->counters[p]); + mlx4_counter_free(ibdev->dev, p + 1, ibdev->counters[p]); mlx4_foreach_port(p, dev, MLX4_PORT_TYPE_IB) mlx4_CLOSE_PORT(dev, p); diff --git a/sys/ofed/drivers/infiniband/hw/mlx4/qp.c b/sys/ofed/drivers/infiniband/hw/mlx4/qp.c index 4c7d819..c5ebe6b 100644 --- a/sys/ofed/drivers/infiniband/hw/mlx4/qp.c +++ b/sys/ofed/drivers/infiniband/hw/mlx4/qp.c @@ -2679,10 +2679,10 @@ static int mlx4_wq_overflow(struct mlx4_ib_wq *wq, int nreq, struct ib_cq *ib_cq static __be32 convert_access(int acc) { - return (acc & IB_ACCESS_REMOTE_ATOMIC ? cpu_to_be32(MLX4_WQE_FMR_PERM_ATOMIC) : 0) | - (acc & IB_ACCESS_REMOTE_WRITE ? cpu_to_be32(MLX4_WQE_FMR_PERM_REMOTE_WRITE) : 0) | - (acc & IB_ACCESS_REMOTE_READ ? cpu_to_be32(MLX4_WQE_FMR_PERM_REMOTE_READ) : 0) | - (acc & IB_ACCESS_LOCAL_WRITE ? cpu_to_be32(MLX4_WQE_FMR_PERM_LOCAL_WRITE) : 0) | + return (acc & IB_ACCESS_REMOTE_ATOMIC ? cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_ATOMIC) : 0) | + (acc & IB_ACCESS_REMOTE_WRITE ? cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_WRITE) : 0) | + (acc & IB_ACCESS_REMOTE_READ ? cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_READ) : 0) | + (acc & IB_ACCESS_LOCAL_WRITE ? cpu_to_be32(MLX4_WQE_FMR_PERM_LOCAL_WRITE) : 0) | cpu_to_be32(MLX4_WQE_FMR_PERM_LOCAL_READ); } @@ -2709,10 +2709,12 @@ static void set_fmr_seg(struct mlx4_wqe_fmr_seg *fseg, struct ib_send_wr *wr) static void set_local_inv_seg(struct mlx4_wqe_local_inval_seg *iseg, u32 rkey) { - iseg->flags = 0; - iseg->mem_key = cpu_to_be32(rkey); - iseg->guest_id = 0; - iseg->pa = 0; + iseg->mem_key = cpu_to_be32(rkey); + + iseg->reserved1 = 0; + iseg->reserved2 = 0; + iseg->reserved3[0] = 0; + iseg->reserved3[1] = 0; } static __always_inline void set_raddr_seg(struct mlx4_wqe_raddr_seg *rseg, diff --git a/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 339e305..10ac933 100644 --- a/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -240,7 +240,7 @@ ipoib_ib_handle_rx_wc(struct ipoib_dev_priv *priv, struct ib_wc *wc) */ if (unlikely(!ipoib_alloc_rx_mb(priv, wr_id))) { memcpy(&priv->rx_ring[wr_id], &saverx, sizeof(saverx)); - dev->if_iqdrops++; + if_inc_counter(dev, IFCOUNTER_IQDROPS, 1); goto repost; } diff --git a/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c b/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c index 46cd5d1..9f33797 100644 --- a/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -745,7 +745,7 @@ ipoib_vlan_start(struct ifnet *dev) if (mb == NULL) break; m_freem(mb); - dev->if_oerrors++; + if_inc_counter(dev, IFCOUNTER_OERRORS, 1); } } @@ -1452,7 +1452,7 @@ ipoib_input(struct ifnet *ifp, struct mbuf *m) m->m_flags |= M_BCAST; else m->m_flags |= M_MCAST; - ifp->if_imcasts++; + if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1); } ipoib_demux(ifp, m, ntohs(eh->proto)); diff --git a/sys/ofed/drivers/net/mlx4/alloc.c b/sys/ofed/drivers/net/mlx4/alloc.c index b444bbd..da36b34 100644 --- a/sys/ofed/drivers/net/mlx4/alloc.c +++ b/sys/ofed/drivers/net/mlx4/alloc.c @@ -1,6 +1,6 @@ /* * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. + * Copyright (c) 2007, 2008, 2014 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -34,7 +34,7 @@ #include <linux/errno.h> #include <linux/slab.h> #include <linux/mm.h> -#include <linux/bitops.h> +#include <linux/module.h> #include <linux/dma-mapping.h> #include <linux/vmalloc.h> @@ -70,9 +70,9 @@ u32 mlx4_bitmap_alloc(struct mlx4_bitmap *bitmap) return obj; } -void mlx4_bitmap_free(struct mlx4_bitmap *bitmap, u32 obj) +void mlx4_bitmap_free(struct mlx4_bitmap *bitmap, u32 obj, int use_rr) { - mlx4_bitmap_free_range(bitmap, obj, 1); + mlx4_bitmap_free_range(bitmap, obj, 1, use_rr); } static unsigned long find_aligned_range(unsigned long *bitmap, @@ -148,11 +148,17 @@ u32 mlx4_bitmap_avail(struct mlx4_bitmap *bitmap) return bitmap->avail; } -void mlx4_bitmap_free_range(struct mlx4_bitmap *bitmap, u32 obj, int cnt) +void mlx4_bitmap_free_range(struct mlx4_bitmap *bitmap, u32 obj, int cnt, + int use_rr) { obj &= bitmap->max + bitmap->reserved_top - 1; spin_lock(&bitmap->lock); + if (!use_rr) { + bitmap->last = min(bitmap->last, obj); + bitmap->top = (bitmap->top + bitmap->max + bitmap->reserved_top) + & bitmap->mask; + } bitmap_clear(bitmap->table, obj, cnt); bitmap->avail += cnt; spin_unlock(&bitmap->lock); diff --git a/sys/ofed/drivers/net/mlx4/catas.c b/sys/ofed/drivers/net/mlx4/catas.c index 185129a..f62c16c 100644 --- a/sys/ofed/drivers/net/mlx4/catas.c +++ b/sys/ofed/drivers/net/mlx4/catas.c @@ -1,6 +1,6 @@ /* * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. + * Copyright (c) 2007, 2008, 2014 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -34,10 +34,11 @@ #include <linux/workqueue.h> #include <linux/module.h> -#include "mlx4.h" +#include <asm/byteorder.h> -#define MLX4_CATAS_POLL_INTERVAL (5 * HZ) +#include "mlx4.h" +#define MLX4_CATAS_POLL_INTERVAL (5 * HZ) static DEFINE_SPINLOCK(catas_lock); @@ -156,11 +157,13 @@ void mlx4_stop_catas_poll(struct mlx4_dev *dev) del_timer_sync(&priv->catas_err.timer); - if (priv->catas_err.map) + if (priv->catas_err.map) { iounmap(priv->catas_err.map); + priv->catas_err.map = NULL; + } spin_lock_irq(&catas_lock); - list_del(&priv->catas_err.list); + list_del_init(&priv->catas_err.list); spin_unlock_irq(&catas_lock); } diff --git a/sys/ofed/drivers/net/mlx4/cmd.c b/sys/ofed/drivers/net/mlx4/cmd.c index edbde9c..828009d 100644 --- a/sys/ofed/drivers/net/mlx4/cmd.c +++ b/sys/ofed/drivers/net/mlx4/cmd.c @@ -1,6 +1,6 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. - * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved. + * Copyright (c) 2005, 2006, 2007, 2008, 2014 Mellanox Technologies. All rights reserved. * Copyright (c) 2005, 2006, 2007 Cisco Systems, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -34,14 +34,17 @@ #include <linux/sched.h> #include <linux/slab.h> +#include <linux/module.h> #include <linux/pci.h> #include <linux/errno.h> #include <linux/mlx4/cmd.h> +#include <linux/mlx4/device.h> #include <linux/semaphore.h> #include <rdma/ib_smi.h> #include <asm/io.h> +#include <linux/ktime.h> #include "mlx4.h" #include "fw.h" @@ -110,6 +113,14 @@ enum { GO_BIT_TIMEOUT_MSECS = 10000 }; +enum mlx4_vlan_transition { + MLX4_VLAN_TRANSITION_VST_VST = 0, + MLX4_VLAN_TRANSITION_VST_VGT = 1, + MLX4_VLAN_TRANSITION_VGT_VST = 2, + MLX4_VLAN_TRANSITION_VGT_VGT = 3, +}; + + struct mlx4_cmd_context { struct completion done; int result; @@ -152,6 +163,131 @@ static int mlx4_status_to_errno(u8 status) return trans_table[status]; } +static const char *cmd_to_str(u16 cmd) +{ + switch (cmd) { + case MLX4_CMD_SYS_EN: return "SYS_EN"; + case MLX4_CMD_SYS_DIS: return "SYS_DIS"; + case MLX4_CMD_MAP_FA: return "MAP_FA"; + case MLX4_CMD_UNMAP_FA: return "UNMAP_FA"; + case MLX4_CMD_RUN_FW: return "RUN_FW"; + case MLX4_CMD_MOD_STAT_CFG: return "MOD_STAT_CFG"; + case MLX4_CMD_QUERY_DEV_CAP: return "QUERY_DEV_CAP"; + case MLX4_CMD_QUERY_FW: return "QUERY_FW"; + case MLX4_CMD_ENABLE_LAM: return "ENABLE_LAM"; + case MLX4_CMD_DISABLE_LAM: return "DISABLE_LAM"; + case MLX4_CMD_QUERY_DDR: return "QUERY_DDR"; + case MLX4_CMD_QUERY_ADAPTER: return "QUERY_ADAPTER"; + case MLX4_CMD_INIT_HCA: return "INIT_HCA"; + case MLX4_CMD_CLOSE_HCA: return "CLOSE_HCA"; + case MLX4_CMD_INIT_PORT: return "INIT_PORT"; + case MLX4_CMD_CLOSE_PORT: return "CLOSE_PORT"; + case MLX4_CMD_QUERY_HCA: return "QUERY_HCA"; + case MLX4_CMD_QUERY_PORT: return "QUERY_PORT"; + case MLX4_CMD_SENSE_PORT: return "SENSE_PORT"; + case MLX4_CMD_HW_HEALTH_CHECK: return "HW_HEALTH_CHECK"; + case MLX4_CMD_SET_PORT: return "SET_PORT"; + case MLX4_CMD_SET_NODE: return "SET_NODE"; + case MLX4_CMD_QUERY_FUNC: return "QUERY_FUNC"; + case MLX4_CMD_MAP_ICM: return "MAP_ICM"; + case MLX4_CMD_UNMAP_ICM: return "UNMAP_ICM"; + case MLX4_CMD_MAP_ICM_AUX: return "MAP_ICM_AUX"; + case MLX4_CMD_UNMAP_ICM_AUX: return "UNMAP_ICM_AUX"; + case MLX4_CMD_SET_ICM_SIZE: return "SET_ICM_SIZE"; + /*master notify fw on finish for slave's flr*/ + case MLX4_CMD_INFORM_FLR_DONE: return "INFORM_FLR_DONE"; + case MLX4_CMD_GET_OP_REQ: return "GET_OP_REQ"; + + /* TPT commands */ + case MLX4_CMD_SW2HW_MPT: return "SW2HW_MPT"; + case MLX4_CMD_QUERY_MPT: return "QUERY_MPT"; + case MLX4_CMD_HW2SW_MPT: return "HW2SW_MPT"; + case MLX4_CMD_READ_MTT: return "READ_MTT"; + case MLX4_CMD_WRITE_MTT: return "WRITE_MTT"; + case MLX4_CMD_SYNC_TPT: return "SYNC_TPT"; + + /* EQ commands */ + case MLX4_CMD_MAP_EQ: return "MAP_EQ"; + case MLX4_CMD_SW2HW_EQ: return "SW2HW_EQ"; + case MLX4_CMD_HW2SW_EQ: return "HW2SW_EQ"; + case MLX4_CMD_QUERY_EQ: return "QUERY_EQ"; + + /* CQ commands */ + case MLX4_CMD_SW2HW_CQ: return "SW2HW_CQ"; + case MLX4_CMD_HW2SW_CQ: return "HW2SW_CQ"; + case MLX4_CMD_QUERY_CQ: return "QUERY_CQ:"; + case MLX4_CMD_MODIFY_CQ: return "MODIFY_CQ:"; + + /* SRQ commands */ + case MLX4_CMD_SW2HW_SRQ: return "SW2HW_SRQ"; + case MLX4_CMD_HW2SW_SRQ: return "HW2SW_SRQ"; + case MLX4_CMD_QUERY_SRQ: return "QUERY_SRQ"; + case MLX4_CMD_ARM_SRQ: return "ARM_SRQ"; + + /* QP/EE commands */ + case MLX4_CMD_RST2INIT_QP: return "RST2INIT_QP"; + case MLX4_CMD_INIT2RTR_QP: return "INIT2RTR_QP"; + case MLX4_CMD_RTR2RTS_QP: return "RTR2RTS_QP"; + case MLX4_CMD_RTS2RTS_QP: return "RTS2RTS_QP"; + case MLX4_CMD_SQERR2RTS_QP: return "SQERR2RTS_QP"; + case MLX4_CMD_2ERR_QP: return "2ERR_QP"; + case MLX4_CMD_RTS2SQD_QP: return "RTS2SQD_QP"; + case MLX4_CMD_SQD2SQD_QP: return "SQD2SQD_QP"; + case MLX4_CMD_SQD2RTS_QP: return "SQD2RTS_QP"; + case MLX4_CMD_2RST_QP: return "2RST_QP"; + case MLX4_CMD_QUERY_QP: return "QUERY_QP"; + case MLX4_CMD_INIT2INIT_QP: return "INIT2INIT_QP"; + case MLX4_CMD_SUSPEND_QP: return "SUSPEND_QP"; + case MLX4_CMD_UNSUSPEND_QP: return "UNSUSPEND_QP"; + /* special QP and management commands */ + case MLX4_CMD_CONF_SPECIAL_QP: return "CONF_SPECIAL_QP"; + case MLX4_CMD_MAD_IFC: return "MAD_IFC"; + + /* multicast commands */ + case MLX4_CMD_READ_MCG: return "READ_MCG"; + case MLX4_CMD_WRITE_MCG: return "WRITE_MCG"; + case MLX4_CMD_MGID_HASH: return "MGID_HASH"; + + /* miscellaneous commands */ + case MLX4_CMD_DIAG_RPRT: return "DIAG_RPRT"; + case MLX4_CMD_NOP: return "NOP"; + case MLX4_CMD_ACCESS_MEM: return "ACCESS_MEM"; + case MLX4_CMD_SET_VEP: return "SET_VEP"; + + /* Ethernet specific commands */ + case MLX4_CMD_SET_VLAN_FLTR: return "SET_VLAN_FLTR"; + case MLX4_CMD_SET_MCAST_FLTR: return "SET_MCAST_FLTR"; + case MLX4_CMD_DUMP_ETH_STATS: return "DUMP_ETH_STATS"; + + /* Communication channel commands */ + case MLX4_CMD_ARM_COMM_CHANNEL: return "ARM_COMM_CHANNEL"; + case MLX4_CMD_GEN_EQE: return "GEN_EQE"; + + /* virtual commands */ + case MLX4_CMD_ALLOC_RES: return "ALLOC_RES"; + case MLX4_CMD_FREE_RES: return "FREE_RES"; + case MLX4_CMD_MCAST_ATTACH: return "MCAST_ATTACH"; + case MLX4_CMD_UCAST_ATTACH: return "UCAST_ATTACH"; + case MLX4_CMD_PROMISC: return "PROMISC"; + case MLX4_CMD_QUERY_FUNC_CAP: return "QUERY_FUNC_CAP"; + case MLX4_CMD_QP_ATTACH: return "QP_ATTACH"; + + /* debug commands */ + case MLX4_CMD_QUERY_DEBUG_MSG: return "QUERY_DEBUG_MSG"; + case MLX4_CMD_SET_DEBUG_MSG: return "SET_DEBUG_MSG"; + + /* statistics commands */ + case MLX4_CMD_QUERY_IF_STAT: return "QUERY_IF_STAT"; + case MLX4_CMD_SET_IF_STAT: return "SET_IF_STAT"; + + /* register/delete flow steering network rules */ + case MLX4_QP_FLOW_STEERING_ATTACH: return "QP_FLOW_STEERING_ATTACH"; + case MLX4_QP_FLOW_STEERING_DETACH: return "QP_FLOW_STEERING_DETACH"; + case MLX4_FLOW_STEERING_IB_UC_QP_RANGE: return "FLOW_STEERING_IB_UC_QP_RANGE"; + default: return "OTHER"; + } +} + static u8 mlx4_errno_to_status(int errno) { switch (errno) { @@ -244,6 +380,17 @@ static int mlx4_comm_cmd_wait(struct mlx4_dev *dev, u8 op, down(&cmd->event_sem); + end = msecs_to_jiffies(timeout) + jiffies; + while (comm_pending(dev) && time_before(jiffies, end)) + cond_resched(); + if (comm_pending(dev)) { + mlx4_warn(dev, "mlx4_comm_cmd_wait: Comm channel " + "is not idle. My toggle is %d (op: 0x%x)\n", + mlx4_priv(dev)->cmd.comm_toggle, op); + up(&cmd->event_sem); + return -EAGAIN; + } + spin_lock(&cmd->context_lock); BUG_ON(cmd->free_head < 0); context = &cmd->context[cmd->free_head]; @@ -255,12 +402,8 @@ static int mlx4_comm_cmd_wait(struct mlx4_dev *dev, u8 op, mlx4_comm_cmd_post(dev, op, param); - if (!wait_for_completion_timeout(&context->done, - msecs_to_jiffies(timeout))) { - mlx4_warn(dev, "communication channel command 0x%x timed out\n", op); - err = -EBUSY; - goto out; - } + /* In slave, wait unconditionally for completion */ + wait_for_completion(&context->done); err = context->result; if (err && context->fw_status != CMD_STAT_MULTI_FUNC_REQ) { @@ -309,14 +452,29 @@ static int cmd_pending(struct mlx4_dev *dev) !!(status & swab32(1 << HCR_T_BIT))); } -static int mlx4_cmd_post(struct mlx4_dev *dev, u64 in_param, u64 out_param, - u32 in_modifier, u8 op_modifier, u16 op, u16 token, - int event) +static int get_status(struct mlx4_dev *dev, u32 *status, int *go_bit, + int *t_bit) +{ + if (pci_channel_offline(dev->pdev)) + return -EIO; + + *status = readl(mlx4_priv(dev)->cmd.hcr + HCR_STATUS_OFFSET); + *t_bit = !!(*status & swab32(1 << HCR_T_BIT)); + *go_bit = !!(*status & swab32(1 << HCR_GO_BIT)); + + return 0; +} + +static int mlx4_cmd_post(struct mlx4_dev *dev, struct timespec *ts1, + u64 in_param, u64 out_param, u32 in_modifier, + u8 op_modifier, u16 op, u16 token, int event) { struct mlx4_cmd *cmd = &mlx4_priv(dev)->cmd; u32 __iomem *hcr = cmd->hcr; int ret = -EAGAIN; unsigned long end; + int err, go_bit = 0, t_bit = 0; + u32 status = 0; mutex_lock(&cmd->hcr_mutex); @@ -363,6 +521,9 @@ static int mlx4_cmd_post(struct mlx4_dev *dev, u64 in_param, u64 out_param, __raw_writel((__force u32) cpu_to_be32(out_param & 0xfffffffful), hcr + 4); __raw_writel((__force u32) cpu_to_be32(token << 16), hcr + 5); + if (ts1) + ktime_get_ts(ts1); + /* __raw_writel may not order writes. */ wmb(); @@ -383,6 +544,15 @@ static int mlx4_cmd_post(struct mlx4_dev *dev, u64 in_param, u64 out_param, ret = 0; out: + if (ret) { + err = get_status(dev, &status, &go_bit, &t_bit); + mlx4_warn(dev, "Could not post command %s (0x%x): ret=%d, " + "in_param=0x%llx, in_mod=0x%x, op_mod=0x%x, " + "get_status err=%d, status_reg=0x%x, go_bit=%d, " + "t_bit=%d, toggle=0x%x\n", cmd_to_str(op), op, ret, + (unsigned long long) in_param, in_modifier, op_modifier, err, status, + go_bit, t_bit, cmd->toggle); + } mutex_unlock(&cmd->hcr_mutex); return ret; } @@ -439,7 +609,7 @@ static int mlx4_slave_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param, ret = mlx4_status_to_errno(vhcr->status); } else mlx4_err(dev, "failed execution of VHCR_POST command" - "opcode 0x%x\n", op); + "opcode %s (0x%x)\n", cmd_to_str(op), op); } mutex_unlock(&priv->cmd.slave_cmd_mutex); @@ -467,7 +637,7 @@ static int mlx4_cmd_poll(struct mlx4_dev *dev, u64 in_param, u64 *out_param, goto out; } - err = mlx4_cmd_post(dev, in_param, out_param ? *out_param : 0, + err = mlx4_cmd_post(dev, NULL, in_param, out_param ? *out_param : 0, in_modifier, op_modifier, op, CMD_POLL_TOKEN, 0); if (err) goto out; @@ -487,7 +657,8 @@ static int mlx4_cmd_poll(struct mlx4_dev *dev, u64 in_param, u64 *out_param, } if (cmd_pending(dev)) { - mlx4_warn(dev, "command 0x%x timed out (go bit not cleared)\n", op); + mlx4_warn(dev, "command %s (0x%x) timed out (go bit not cleared)\n", + cmd_to_str(op), op); err = -ETIMEDOUT; goto out; } @@ -502,8 +673,8 @@ static int mlx4_cmd_poll(struct mlx4_dev *dev, u64 in_param, u64 *out_param, __raw_readl(hcr + HCR_STATUS_OFFSET)) >> 24; err = mlx4_status_to_errno(stat); if (err) - mlx4_err(dev, "command 0x%x failed: fw status = 0x%x\n", - op, stat); + mlx4_err(dev, "command %s (0x%x) failed: fw status = 0x%x\n", + cmd_to_str(op), op, stat); out: up(&priv->cmd.poll_sem); @@ -527,19 +698,6 @@ void mlx4_cmd_event(struct mlx4_dev *dev, u16 token, u8 status, u64 out_param) complete(&context->done); } -static int get_status(struct mlx4_dev *dev, u32 *status, int *go_bit, - int *t_bit) -{ - if (pci_channel_offline(dev->pdev)) - return -EIO; - - *status = readl(mlx4_priv(dev)->cmd.hcr + HCR_STATUS_OFFSET); - *t_bit = !!(*status & swab32(1 << HCR_T_BIT)); - *go_bit = !!(*status & swab32(1 << HCR_GO_BIT)); - - return 0; -} - static int mlx4_cmd_wait(struct mlx4_dev *dev, u64 in_param, u64 *out_param, int out_is_imm, u32 in_modifier, u8 op_modifier, u16 op, unsigned long timeout) @@ -549,6 +707,12 @@ static int mlx4_cmd_wait(struct mlx4_dev *dev, u64 in_param, u64 *out_param, int err = 0; int go_bit = 0, t_bit = 0, stat_err; u32 status = 0; + struct timespec ts1, ts2; + ktime_t t1, t2, delta; + s64 ds; + + if (out_is_imm && !out_param) + return -EINVAL; down(&cmd->event_sem); @@ -561,29 +725,38 @@ static int mlx4_cmd_wait(struct mlx4_dev *dev, u64 in_param, u64 *out_param, init_completion(&context->done); - err = mlx4_cmd_post(dev, in_param, out_param ? *out_param : 0, + err = mlx4_cmd_post(dev, &ts1, in_param, out_param ? *out_param : 0, in_modifier, op_modifier, op, context->token, 1); - if (err) { - mlx4_warn(dev, "command 0x%x could not be posted (%d)\n", - op, err); + if (err) goto out; - } if (!wait_for_completion_timeout(&context->done, msecs_to_jiffies(timeout))) { stat_err = get_status(dev, &status, &go_bit, &t_bit); - mlx4_warn(dev, "command 0x%x timed out: " - "get_status err=%d, status=0x%x, go_bit=%d, " - "t_bit=%d, toggle=0x%x\n", op, stat_err, status, - go_bit, t_bit, mlx4_priv(dev)->cmd.toggle); + mlx4_warn(dev, "command %s (0x%x) timed out: in_param=0x%llx, " + "in_mod=0x%x, op_mod=0x%x, get_status err=%d, " + "status_reg=0x%x, go_bit=%d, t_bit=%d, toggle=0x%x\n" + , cmd_to_str(op), op, (unsigned long long) in_param, in_modifier, + op_modifier, stat_err, status, go_bit, t_bit, + mlx4_priv(dev)->cmd.toggle); err = -EBUSY; goto out; } + if (mlx4_debug_level & MLX4_DEBUG_MASK_CMD_TIME) { + ktime_get_ts(&ts2); + t1 = timespec_to_ktime(ts1); + t2 = timespec_to_ktime(ts2); + delta = ktime_sub(t2, t1); + ds = ktime_to_ns(delta); + pr_info("mlx4: fw exec time for %s is %lld nsec\n", cmd_to_str(op), (long long) ds); + } err = context->result; if (err) { - mlx4_err(dev, "command 0x%x failed: fw status = 0x%x\n", - op, context->fw_status); + mlx4_err(dev, "command %s (0x%x) failed: in_param=0x%llx, " + "in_mod=0x%x, op_mod=0x%x, fw status = 0x%x\n", + cmd_to_str(op), op, (unsigned long long) in_param, in_modifier, + op_modifier, context->fw_status); goto out; } @@ -640,7 +813,7 @@ static int mlx4_ACCESS_MEM(struct mlx4_dev *dev, u64 master_addr, (slave & ~0x7f) | (size & 0xff)) { mlx4_err(dev, "Bad access mem params - slave_addr:0x%llx " "master_addr:0x%llx slave_id:%d size:%d\n", - (long long)slave_addr, (long long)master_addr, slave, size); + (unsigned long long) slave_addr, (unsigned long long) master_addr, slave, size); return -EINVAL; } @@ -813,6 +986,24 @@ static int mlx4_MAD_IFC_wrapper(struct mlx4_dev *dev, int slave, vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE); } +static int MLX4_CMD_DIAG_RPRT_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + return -EPERM; +} + +static int MLX4_CMD_UPDATE_QP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + return -EPERM; +} + int mlx4_DMA_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, @@ -950,6 +1141,16 @@ static struct mlx4_cmd_info cmd_info[] = { .wrapper = NULL }, { + .opcode = MLX4_CMD_DIAG_RPRT, + .has_inbox = false, + .has_outbox = false, + .out_is_imm = false, + .encode_slave_id = false, + .skip_err_print = true, + .verify = NULL, + .wrapper = MLX4_CMD_DIAG_RPRT_wrapper + }, + { .opcode = MLX4_CMD_NOP, .has_inbox = false, .has_outbox = false, @@ -1247,6 +1448,16 @@ static struct mlx4_cmd_info cmd_info[] = { .wrapper = mlx4_GEN_QP_wrapper }, { + .opcode = MLX4_CMD_UPDATE_QP, + .has_inbox = false, + .has_outbox = false, + .out_is_imm = false, + .encode_slave_id = false, + .skip_err_print = true, + .verify = NULL, + .wrapper = MLX4_CMD_UPDATE_QP_wrapper + }, + { .opcode = MLX4_CMD_CONF_SPECIAL_QP, .has_inbox = false, .has_outbox = false, @@ -1348,6 +1559,17 @@ static struct mlx4_cmd_info cmd_info[] = { .verify = NULL, .wrapper = mlx4_QP_FLOW_STEERING_DETACH_wrapper }, + /* wol commands */ + { + .opcode = MLX4_CMD_MOD_STAT_CFG, + .has_inbox = false, + .has_outbox = false, + .out_is_imm = false, + .encode_slave_id = false, + .skip_err_print = true, + .verify = NULL, + .wrapper = mlx4_MOD_STAT_CFG_wrapper + }, }; static int mlx4_master_process_vhcr(struct mlx4_dev *dev, int slave, @@ -1401,8 +1623,8 @@ static int mlx4_master_process_vhcr(struct mlx4_dev *dev, int slave, } } if (!cmd) { - mlx4_err(dev, "Unknown command:0x%x accepted from slave:%d\n", - vhcr->op, slave); + mlx4_err(dev, "unparavirt command: %s (0x%x) accepted from slave:%d\n", + cmd_to_str(vhcr->op), vhcr->op, slave); vhcr_cmd->status = CMD_STAT_BAD_PARAM; goto out_status; } @@ -1420,8 +1642,8 @@ static int mlx4_master_process_vhcr(struct mlx4_dev *dev, int slave, if (mlx4_ACCESS_MEM(dev, inbox->dma, slave, vhcr->in_param, MLX4_MAILBOX_SIZE, 1)) { - mlx4_err(dev, "%s: Failed reading inbox (cmd:0x%x)\n", - __func__, cmd->opcode); + mlx4_err(dev, "%s: Failed reading inbox for cmd %s (0x%x)\n", + __func__, cmd_to_str(cmd->opcode), cmd->opcode); vhcr_cmd->status = CMD_STAT_INTERNAL_ERR; goto out_status; } @@ -1429,9 +1651,9 @@ static int mlx4_master_process_vhcr(struct mlx4_dev *dev, int slave, /* Apply permission and bound checks if applicable */ if (cmd->verify && cmd->verify(dev, slave, vhcr, inbox)) { - mlx4_warn(dev, "Command:0x%x from slave: %d failed protection " - "checks for resource_id:%d\n", vhcr->op, slave, - vhcr->in_modifier); + mlx4_warn(dev, "Command %s (0x%x) from slave: %d failed protection " + "checks for resource_id: %d\n", cmd_to_str(vhcr->op), + vhcr->op, slave, vhcr->in_modifier); vhcr_cmd->status = CMD_STAT_BAD_OP; goto out_status; } @@ -1470,9 +1692,13 @@ static int mlx4_master_process_vhcr(struct mlx4_dev *dev, int slave, } if (err) { - mlx4_warn(dev, "vhcr command:0x%x slave:%d failed with" - " error:%d, status %d\n", - vhcr->op, slave, vhcr->errno, err); + if (!cmd->skip_err_print) + mlx4_warn(dev, "vhcr command %s (0x%x) slave:%d " + "in_param 0x%llx in_mod=0x%x, op_mod=0x%x " + "failed with error:%d, status %d\n", + cmd_to_str(vhcr->op), vhcr->op, slave, + (unsigned long long) vhcr->in_param, vhcr->in_modifier, + vhcr->op_modifier, vhcr->errno, err); vhcr_cmd->status = mlx4_errno_to_status(err); goto out_status; } @@ -1487,7 +1713,7 @@ static int mlx4_master_process_vhcr(struct mlx4_dev *dev, int slave, /* If we failed to write back the outbox after the *command was successfully executed, we must fail this * slave, as it is now in undefined state */ - mlx4_err(dev, "%s:Failed writing outbox\n", __func__); + mlx4_err(dev, "%s: Failed writing outbox\n", __func__); goto out; } } @@ -1516,6 +1742,75 @@ out: return ret; } +static int mlx4_master_immediate_activate_vlan_qos(struct mlx4_priv *priv, + int slave, int port) +{ + struct mlx4_vport_oper_state *vp_oper; + struct mlx4_vport_state *vp_admin; + struct mlx4_vf_immed_vlan_work *work; + int err; + int admin_vlan_ix = NO_INDX; + + vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port]; + vp_admin = &priv->mfunc.master.vf_admin[slave].vport[port]; + + if (vp_oper->state.default_vlan == vp_admin->default_vlan && + vp_oper->state.default_qos == vp_admin->default_qos) + return 0; + + work = kzalloc(sizeof(*work), GFP_KERNEL); + if (!work) + return -ENOMEM; + + if (vp_oper->state.default_vlan != vp_admin->default_vlan) { + if (MLX4_VGT != vp_admin->default_vlan) { + err = __mlx4_register_vlan(&priv->dev, port, + vp_admin->default_vlan, + &admin_vlan_ix); + if (err) { + mlx4_warn((&priv->dev), + "No vlan resources slave %d, port %d\n", + slave, port); + return err; + } + } else { + admin_vlan_ix = NO_INDX; + } + work->flags |= MLX4_VF_IMMED_VLAN_FLAG_VLAN; + mlx4_dbg((&(priv->dev)), + "alloc vlan %d idx %d slave %d port %d\n", + (int)(vp_admin->default_vlan), + admin_vlan_ix, slave, port); + } + + /* save original vlan ix and vlan id */ + work->orig_vlan_id = vp_oper->state.default_vlan; + work->orig_vlan_ix = vp_oper->vlan_idx; + + /* handle new qos */ + if (vp_oper->state.default_qos != vp_admin->default_qos) + work->flags |= MLX4_VF_IMMED_VLAN_FLAG_QOS; + + if (work->flags & MLX4_VF_IMMED_VLAN_FLAG_VLAN) + vp_oper->vlan_idx = admin_vlan_ix; + + vp_oper->state.default_vlan = vp_admin->default_vlan; + vp_oper->state.default_qos = vp_admin->default_qos; + + /* iterate over QPs owned by this slave, using UPDATE_QP */ + work->port = port; + work->slave = slave; + work->qos = vp_oper->state.default_qos; + work->vlan_id = vp_oper->state.default_vlan; + work->vlan_ix = vp_oper->vlan_idx; + work->priv = priv; + INIT_WORK(&work->work, mlx4_vf_immed_vlan_work_handler); + queue_work(priv->mfunc.master.comm_wq, &work->work); + + return 0; +} + + static int mlx4_master_activate_admin_state(struct mlx4_priv *priv, int slave) { int port, err; @@ -1527,7 +1822,7 @@ static int mlx4_master_activate_admin_state(struct mlx4_priv *priv, int slave) vp_admin = &priv->mfunc.master.vf_admin[slave].vport[port]; vp_oper->state = *vp_admin; if (MLX4_VGT != vp_admin->default_vlan) { - err = mlx4_register_vlan(&priv->dev, port, + err = __mlx4_register_vlan(&priv->dev, port, vp_admin->default_vlan, &(vp_oper->vlan_idx)); if (err) { vp_oper->vlan_idx = NO_INDX; @@ -1548,12 +1843,12 @@ static int mlx4_master_activate_admin_state(struct mlx4_priv *priv, int slave) err = vp_oper->mac_idx; vp_oper->mac_idx = NO_INDX; mlx4_warn((&priv->dev), - "No mac resorces slave %d, port %d\n", + "No mac resources slave %d, port %d\n", slave, port); return err; } mlx4_dbg((&(priv->dev)), "alloc mac %llx idx %d slave %d port %d\n", - (long long)vp_oper->state.mac, vp_oper->mac_idx, slave, port); + (unsigned long long) vp_oper->state.mac, vp_oper->mac_idx, slave, port); } } return 0; @@ -1599,6 +1894,7 @@ static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd, if (cmd == MLX4_COMM_CMD_RESET) { mlx4_warn(dev, "Received reset from slave:%d\n", slave); slave_state[slave].active = false; + slave_state[slave].old_vlan_api = false; mlx4_master_deactivate_admin_state(priv, slave); for (i = 0; i < MLX4_EVENT_TYPES_NUM; ++i) { slave_state[slave].event_eq[i].eqn = -1; @@ -1619,7 +1915,7 @@ static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd, /*command from slave in the middle of FLR*/ if (cmd != MLX4_COMM_CMD_RESET && MLX4_COMM_CMD_FLR == slave_state[slave].last_cmd) { - mlx4_warn(dev, "slave:%d is Trying to run cmd(0x%x) " + mlx4_warn(dev, "slave:%d is Trying to run cmd (0x%x) " "in the middle of FLR\n", slave, cmd); return; } @@ -1630,7 +1926,6 @@ static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd, goto reset_slave; slave_state[slave].vhcr_dma = ((u64) param) << 48; priv->mfunc.master.slave_state[slave].cookie = 0; - mutex_init(&priv->mfunc.master.gen_eqe_mutex[slave]); break; case MLX4_COMM_CMD_VHCR1: if (slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR0) @@ -1658,7 +1953,7 @@ static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd, mutex_lock(&priv->cmd.slave_cmd_mutex); if (mlx4_master_process_vhcr(dev, slave, NULL)) { - mlx4_err(dev, "Failed processing vhcr for slave:%d," + mlx4_err(dev, "Failed processing vhcr for slave: %d," " resetting slave.\n", slave); mutex_unlock(&priv->cmd.slave_cmd_mutex); goto reset_slave; @@ -1666,7 +1961,7 @@ static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd, mutex_unlock(&priv->cmd.slave_cmd_mutex); break; default: - mlx4_warn(dev, "Bad comm cmd:%d from slave:%d\n", cmd, slave); + mlx4_warn(dev, "Bad comm cmd: %d from slave: %d\n", cmd, slave); goto reset_slave; } spin_lock_irqsave(&priv->mfunc.master.slave_state_lock, flags); @@ -1676,8 +1971,8 @@ static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd, is_going_down = 1; spin_unlock_irqrestore(&priv->mfunc.master.slave_state_lock, flags); if (is_going_down) { - mlx4_warn(dev, "Slave is going down aborting command(%d)" - " executing from slave:%d\n", + mlx4_warn(dev, "Slave is going down aborting command (%d)" + " executing from slave: %d\n", cmd, slave); return; } @@ -1696,8 +1991,6 @@ reset_slave: spin_unlock_irqrestore(&priv->mfunc.master.slave_state_lock, flags); /*with slave in the middle of flr, no need to clean resources again.*/ inform_slave_state: - memset(&slave_state[slave].event_eq, 0, - sizeof(struct mlx4_slave_event_eq_info)); __raw_writel((__force u32) cpu_to_be32(reply), &priv->mfunc.comm[slave].slave_read); wmb(); @@ -1751,7 +2044,10 @@ void mlx4_master_comm_channel(struct work_struct *work) comm_cmd >> 16 & 0xff, comm_cmd & 0xffff, toggle); ++served; - } + } else + mlx4_err(dev, "slave %d out of sync." + " read toggle %d, write toggle %d.\n", slave, slt, + toggle); } } @@ -1759,6 +2055,19 @@ void mlx4_master_comm_channel(struct work_struct *work) mlx4_warn(dev, "Got command event with bitmask from %d slaves" " but %d were served\n", reported, served); +} +/* master command processing */ +void mlx4_master_arm_comm_channel(struct work_struct *work) +{ + struct mlx4_mfunc_master_ctx *master = + container_of(work, + struct mlx4_mfunc_master_ctx, + arm_comm_work); + struct mlx4_mfunc *mfunc = + container_of(master, struct mlx4_mfunc, master); + struct mlx4_priv *priv = + container_of(mfunc, struct mlx4_priv, mfunc); + struct mlx4_dev *dev = &priv->dev; if (mlx4_ARM_COMM_CHANNEL(dev)) mlx4_warn(dev, "Failed to arm comm channel events\n"); @@ -1839,6 +2148,7 @@ int mlx4_multi_func_init(struct mlx4_dev *dev) for (i = 0; i < dev->num_slaves; ++i) { s_state = &priv->mfunc.master.slave_state[i]; s_state->last_cmd = MLX4_COMM_CMD_RESET; + mutex_init(&priv->mfunc.master.gen_eqe_mutex[i]); for (j = 0; j < MLX4_EVENT_TYPES_NUM; ++j) s_state->event_eq[j].eqn = -1; __raw_writel((__force u32) 0, @@ -1868,6 +2178,8 @@ int mlx4_multi_func_init(struct mlx4_dev *dev) priv->mfunc.master.cmd_eqe.type = MLX4_EVENT_TYPE_CMD; INIT_WORK(&priv->mfunc.master.comm_work, mlx4_master_comm_channel); + INIT_WORK(&priv->mfunc.master.arm_comm_work, + mlx4_master_arm_comm_channel); INIT_WORK(&priv->mfunc.master.slave_event_work, mlx4_gen_slave_eqe); INIT_WORK(&priv->mfunc.master.slave_flr_event_work, @@ -2081,6 +2393,8 @@ struct mlx4_cmd_mailbox *mlx4_alloc_cmd_mailbox(struct mlx4_dev *dev) return ERR_PTR(-ENOMEM); } + memset(mailbox->buf, 0, MLX4_MAILBOX_SIZE); + return mailbox; } EXPORT_SYMBOL_GPL(mlx4_alloc_cmd_mailbox); @@ -2101,23 +2415,32 @@ u32 mlx4_comm_get_version(void) return ((u32) CMD_CHAN_IF_REV << 8) | (u32) CMD_CHAN_VER; } +static int mlx4_get_slave_indx(struct mlx4_dev *dev, int vf) +{ + if ((vf < 0) || (vf >= dev->num_vfs)) { + mlx4_err(dev, "Bad vf number:%d (number of activated vf: %d)\n", vf, dev->num_vfs); + return -EINVAL; + } + return (vf+1); +} + int mlx4_set_vf_mac(struct mlx4_dev *dev, int port, int vf, u8 *mac) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_vport_state *s_info; + int slave; if (!mlx4_is_master(dev)) return -EPROTONOSUPPORT; - if ((vf <= 0) || (vf > dev->num_vfs)) { - mlx4_err(dev, "Bad vf number:%d (max vf activated: %d)\n", vf, dev->num_vfs); + slave = mlx4_get_slave_indx(dev, vf); + if (slave < 0) return -EINVAL; - } - s_info = &priv->mfunc.master.vf_admin[vf].vport[port]; + s_info = &priv->mfunc.master.vf_admin[slave].vport[port]; s_info->mac = mlx4_mac_to_u64(mac); mlx4_info(dev, "default mac on vf %d port %d to %llX will take afect only after vf restart\n", - vf, port, (long long)s_info->mac); + vf, port, (unsigned long long) s_info->mac); return 0; } EXPORT_SYMBOL_GPL(mlx4_set_vf_mac); @@ -2125,40 +2448,145 @@ EXPORT_SYMBOL_GPL(mlx4_set_vf_mac); int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos) { struct mlx4_priv *priv = mlx4_priv(dev); - struct mlx4_vport_state *s_info; + struct mlx4_vport_oper_state *vf_oper; + struct mlx4_vport_state *vf_admin; + int slave; if ((!mlx4_is_master(dev)) || - !(dev->caps.flags & MLX4_DEV_CAP_FLAG_ESWITCH_SUPPORT)) + !(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_VLAN_CONTROL)) return -EPROTONOSUPPORT; - if ((vf <= 0) || (vf > dev->num_vfs) || (vlan > 4095) || (qos > 7)) + if ((vlan > 4095) || (qos > 7)) + return -EINVAL; + + slave = mlx4_get_slave_indx(dev, vf); + if (slave < 0) return -EINVAL; - s_info = &priv->mfunc.master.vf_admin[vf].vport[port]; + vf_admin = &priv->mfunc.master.vf_admin[slave].vport[port]; + vf_oper = &priv->mfunc.master.vf_oper[slave].vport[port]; + if ((0 == vlan) && (0 == qos)) - s_info->default_vlan = MLX4_VGT; + vf_admin->default_vlan = MLX4_VGT; else - s_info->default_vlan = vlan; - s_info->default_qos = qos; + vf_admin->default_vlan = vlan; + vf_admin->default_qos = qos; + + if (priv->mfunc.master.slave_state[slave].active && + dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_UPDATE_QP) { + mlx4_info(dev, "updating vf %d port %d config params immediately\n", + vf, port); + mlx4_master_immediate_activate_vlan_qos(priv, slave, port); + } return 0; } EXPORT_SYMBOL_GPL(mlx4_set_vf_vlan); + /* mlx4_get_slave_default_vlan - + * retrun true if VST ( default vlan) + * if VST will fill vlan & qos (if not NULL) */ +bool mlx4_get_slave_default_vlan(struct mlx4_dev *dev, int port, int slave, u16 *vlan, u8 *qos) +{ + struct mlx4_vport_oper_state *vp_oper; + struct mlx4_priv *priv; + + priv = mlx4_priv(dev); + vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port]; + + if (MLX4_VGT != vp_oper->state.default_vlan) { + if (vlan) + *vlan = vp_oper->state.default_vlan; + if (qos) + *qos = vp_oper->state.default_qos; + return true; + } + return false; +} +EXPORT_SYMBOL_GPL(mlx4_get_slave_default_vlan); + int mlx4_set_vf_spoofchk(struct mlx4_dev *dev, int port, int vf, bool setting) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_vport_state *s_info; + int slave; if ((!mlx4_is_master(dev)) || - !(dev->caps.flags & MLX4_DEV_CAP_FLAG_ESWITCH_SUPPORT)) + !(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FSM)) return -EPROTONOSUPPORT; - if ((vf <= 0) || (vf > dev->num_vfs)) + slave = mlx4_get_slave_indx(dev, vf); + if (slave < 0) return -EINVAL; - s_info = &priv->mfunc.master.vf_admin[vf].vport[port]; + s_info = &priv->mfunc.master.vf_admin[slave].vport[port]; s_info->spoofchk = setting; return 0; } EXPORT_SYMBOL_GPL(mlx4_set_vf_spoofchk); + +int mlx4_set_vf_link_state(struct mlx4_dev *dev, int port, int vf, int link_state) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_vport_state *s_info; + struct mlx4_vport_oper_state *vp_oper; + int slave; + u8 link_stat_event; + + slave = mlx4_get_slave_indx(dev, vf); + if (slave < 0) + return -EINVAL; + + switch (link_state) { + case IFLA_VF_LINK_STATE_AUTO: + /* get link curent state */ + if (!priv->sense.do_sense_port[port]) + link_stat_event = MLX4_PORT_CHANGE_SUBTYPE_ACTIVE; + else + link_stat_event = MLX4_PORT_CHANGE_SUBTYPE_DOWN; + break; + + case IFLA_VF_LINK_STATE_ENABLE: + link_stat_event = MLX4_PORT_CHANGE_SUBTYPE_ACTIVE; + break; + + case IFLA_VF_LINK_STATE_DISABLE: + link_stat_event = MLX4_PORT_CHANGE_SUBTYPE_DOWN; + break; + + default: + mlx4_warn(dev, "unknown value for link_state %02x on slave %d port %d\n", + link_state, slave, port); + return -EINVAL; + }; + /* update the admin & oper state on the link state */ + s_info = &priv->mfunc.master.vf_admin[slave].vport[port]; + vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port]; + s_info->link_state = link_state; + vp_oper->state.link_state = link_state; + + /* send event */ + mlx4_gen_port_state_change_eqe(dev, slave, port, link_stat_event); + return 0; +} +EXPORT_SYMBOL_GPL(mlx4_set_vf_link_state); + +int mlx4_get_vf_link_state(struct mlx4_dev *dev, int port, int vf) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_vport_state *s_info; + int slave; + + if (!mlx4_is_master(dev)) + return -EPROTONOSUPPORT; + + slave = mlx4_get_slave_indx(dev, vf); + if (slave < 0) + return -EINVAL; + + s_info = &priv->mfunc.master.vf_admin[slave].vport[port]; + + return s_info->link_state; +} +EXPORT_SYMBOL_GPL(mlx4_get_vf_link_state); + diff --git a/sys/ofed/drivers/net/mlx4/cq.c b/sys/ofed/drivers/net/mlx4/cq.c index f87025a..b133ae6 100644 --- a/sys/ofed/drivers/net/mlx4/cq.c +++ b/sys/ofed/drivers/net/mlx4/cq.c @@ -2,7 +2,7 @@ * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2005, 2006, 2007 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved. + * Copyright (c) 2005, 2006, 2007, 2008, 2014 Mellanox Technologies. All rights reserved. * Copyright (c) 2004 Voltaire, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -35,7 +35,7 @@ */ #include <linux/hardirq.h> - +#include <linux/module.h> #include <linux/mlx4/cmd.h> #include <linux/mlx4/cq.h> @@ -56,12 +56,14 @@ void mlx4_cq_completion(struct mlx4_dev *dev, u32 cqn) struct mlx4_cq_table *cq_table = &mlx4_priv(dev)->cq_table; struct mlx4_cq *cq; - spin_lock(&cq_table->lock); + read_lock(&cq_table->cq_table_lock); + cq = radix_tree_lookup(&mlx4_priv(dev)->cq_table.tree, cqn & (dev->caps.num_cqs - 1)); if (cq) atomic_inc(&cq->refcount); - spin_unlock(&cq_table->lock); + + read_unlock(&cq_table->cq_table_lock); if (!cq) { mlx4_dbg(dev, "Completion event for bogus CQ %08x\n", cqn); @@ -81,13 +83,13 @@ void mlx4_cq_event(struct mlx4_dev *dev, u32 cqn, int event_type) struct mlx4_cq_table *cq_table = &mlx4_priv(dev)->cq_table; struct mlx4_cq *cq; - spin_lock(&cq_table->lock); + read_lock(&cq_table->cq_table_lock); cq = radix_tree_lookup(&cq_table->tree, cqn & (dev->caps.num_cqs - 1)); if (cq) atomic_inc(&cq->refcount); - spin_unlock(&cq_table->lock); + read_unlock(&cq_table->cq_table_lock); if (!cq) { mlx4_warn(dev, "Async event for bogus CQ %08x\n", cqn); @@ -220,7 +222,7 @@ err_put: mlx4_table_put(dev, &cq_table->table, *cqn); err_out: - mlx4_bitmap_free(&cq_table->bitmap, *cqn); + mlx4_bitmap_free(&cq_table->bitmap, *cqn, MLX4_NO_RR); return err; } @@ -250,7 +252,7 @@ void __mlx4_cq_free_icm(struct mlx4_dev *dev, int cqn) mlx4_table_put(dev, &cq_table->cmpt_table, cqn); mlx4_table_put(dev, &cq_table->table, cqn); - mlx4_bitmap_free(&cq_table->bitmap, cqn); + mlx4_bitmap_free(&cq_table->bitmap, cqn, MLX4_NO_RR); } static void mlx4_cq_free_icm(struct mlx4_dev *dev, int cqn) @@ -269,23 +271,6 @@ static void mlx4_cq_free_icm(struct mlx4_dev *dev, int cqn) __mlx4_cq_free_icm(dev, cqn); } -static int mlx4_find_least_loaded_vector(struct mlx4_priv *priv) -{ - int i; - int index = 0; - int min = priv->eq_table.eq[0].load; - - for (i = 1; i < priv->dev.caps.num_comp_vectors; i++) { - if (priv->eq_table.eq[i].load < min) { - index = i; - min = priv->eq_table.eq[i].load; - } - } - - return index; -} - - int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt, struct mlx4_uar *uar, u64 db_rec, struct mlx4_cq *cq, unsigned vector, int collapsed, @@ -298,24 +283,20 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, u64 mtt_addr; int err; - cq->vector = (vector == MLX4_LEAST_ATTACHED_VECTOR) ? - mlx4_find_least_loaded_vector(priv) : vector; - - if (cq->vector > dev->caps.num_comp_vectors + dev->caps.comp_pool) { + if (vector > dev->caps.num_comp_vectors + dev->caps.comp_pool) return -EINVAL; - } + + cq->vector = vector; err = mlx4_cq_alloc_icm(dev, &cq->cqn); - if (err) { + if (err) return err; - } spin_lock_irq(&cq_table->lock); err = radix_tree_insert(&cq_table->tree, cq->cqn, cq); spin_unlock_irq(&cq_table->lock); - if (err){ + if (err) goto err_icm; - } mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) { @@ -331,7 +312,7 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, cq_context->flags |= cpu_to_be32(1 << 19); cq_context->logsize_usrpage = cpu_to_be32((ilog2(nent) << 24) | uar->index); - cq_context->comp_eqn = priv->eq_table.eq[cq->vector].eqn; + cq_context->comp_eqn = priv->eq_table.eq[vector].eqn; cq_context->log_page_size = mtt->page_shift - MLX4_ICM_PAGE_SHIFT; mtt_addr = mlx4_mtt_addr(dev, mtt); @@ -344,7 +325,6 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, if (err) goto err_radix; - priv->eq_table.eq[cq->vector].load++; cq->cons_index = 0; cq->arm_sn = 1; cq->uar = uar; @@ -378,8 +358,6 @@ void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq) if (err) mlx4_warn(dev, "HW2SW_CQ failed (%d) for CQN %06x\n", err, cq->cqn); - - priv->eq_table.eq[cq->vector].load--; synchronize_irq(priv->eq_table.eq[cq->vector].irq); spin_lock_irq(&cq_table->lock); @@ -400,6 +378,7 @@ int mlx4_init_cq_table(struct mlx4_dev *dev) int err; spin_lock_init(&cq_table->lock); + rwlock_init(&cq_table->cq_table_lock); INIT_RADIX_TREE(&cq_table->tree, GFP_ATOMIC); if (mlx4_is_slave(dev)) return 0; diff --git a/sys/ofed/drivers/net/mlx4/en_cq.c b/sys/ofed/drivers/net/mlx4/en_cq.c index 9783e23..be043ce 100644 --- a/sys/ofed/drivers/net/mlx4/en_cq.c +++ b/sys/ofed/drivers/net/mlx4/en_cq.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007 Mellanox Technologies. All rights reserved. + * Copyright (c) 2007, 2014 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -31,12 +31,13 @@ * */ -#include "mlx4_en.h" - #include <linux/mlx4/cq.h> #include <linux/mlx4/qp.h> #include <linux/mlx4/cmd.h> +#include "mlx4_en.h" + + static void mlx4_en_cq_event(struct mlx4_cq *cq, enum mlx4_event event) { return; @@ -44,52 +45,72 @@ static void mlx4_en_cq_event(struct mlx4_cq *cq, enum mlx4_event event) int mlx4_en_create_cq(struct mlx4_en_priv *priv, - struct mlx4_en_cq *cq, - int entries, int ring, enum cq_type mode) + struct mlx4_en_cq **pcq, + int entries, int ring, enum cq_type mode, + int node) { struct mlx4_en_dev *mdev = priv->mdev; + struct mlx4_en_cq *cq; int err; + cq = kzalloc_node(sizeof(struct mlx4_en_cq), GFP_KERNEL, node); + if (!cq) { + cq = kzalloc(sizeof(struct mlx4_en_cq), GFP_KERNEL); + if (!cq) { + en_err(priv, "Failed to allocate CW struture\n"); + return -ENOMEM; + } + } + cq->size = entries; + cq->buf_size = cq->size * mdev->dev->caps.cqe_size; + cq->tq = taskqueue_create_fast("mlx4_en_que", M_NOWAIT, - taskqueue_thread_enqueue, &cq->tq); - if (mode == RX) { - cq->buf_size = cq->size * sizeof(struct mlx4_cqe); - cq->vector = (ring + priv->port) % - mdev->dev->caps.num_comp_vectors; + taskqueue_thread_enqueue, &cq->tq); + if (mode == RX) { TASK_INIT(&cq->cq_task, 0, mlx4_en_rx_que, cq); taskqueue_start_threads(&cq->tq, 1, PI_NET, "%s rx cq", - if_name(priv->dev)); + if_name(priv->dev)); + } else { - cq->buf_size = sizeof(struct mlx4_cqe); - cq->vector = MLX4_LEAST_ATTACHED_VECTOR; TASK_INIT(&cq->cq_task, 0, mlx4_en_tx_que, cq); taskqueue_start_threads(&cq->tq, 1, PI_NET, "%s tx cq", - if_name(priv->dev)); + if_name(priv->dev)); } cq->ring = ring; cq->is_tx = mode; - mtx_init(&cq->lock.m, "mlx4 cq", NULL, MTX_DEF); + spin_lock_init(&cq->lock); err = mlx4_alloc_hwq_res(mdev->dev, &cq->wqres, cq->buf_size, 2 * PAGE_SIZE); if (err) - return err; + goto err_cq; err = mlx4_en_map_buffer(&cq->wqres.buf); if (err) - mlx4_free_hwq_res(mdev->dev, &cq->wqres, cq->buf_size); - else - cq->buf = (struct mlx4_cqe *) cq->wqres.buf.direct.buf; + goto err_res; + + cq->buf = (struct mlx4_cqe *) cq->wqres.buf.direct.buf; + *pcq = cq; + return 0; + +err_res: + mlx4_free_hwq_res(mdev->dev, &cq->wqres, cq->buf_size); +err_cq: + kfree(cq); return err; } -int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq) + +int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq, + int cq_idx) { struct mlx4_en_dev *mdev = priv->mdev; - int err; + int err = 0; + char name[25]; + int timestamp_en = 0; cq->dev = mdev->pndev[priv->port]; cq->mcq.set_ci_db = cq->wqres.db.db; @@ -98,52 +119,83 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq) *cq->mcq.arm_db = 0; memset(cq->buf, 0, cq->buf_size); - if (!cq->is_tx) - cq->size = priv->rx_ring[cq->ring].actual_size; - + if (cq->is_tx == RX) { + if (mdev->dev->caps.comp_pool) { + if (!cq->vector) { + sprintf(name, "%s-%d", if_name(priv->dev), + cq->ring); + /* Set IRQ for specific name (per ring) */ + if (mlx4_assign_eq(mdev->dev, name, &cq->vector)) { + cq->vector = (cq->ring + 1 + priv->port) + % mdev->dev->caps.num_comp_vectors; + mlx4_warn(mdev, "Failed Assigning an EQ to " + "%s ,Falling back to legacy EQ's\n", + name); + } + } + } else { + cq->vector = (cq->ring + 1 + priv->port) % + mdev->dev->caps.num_comp_vectors; + } + } else { + struct mlx4_en_cq *rx_cq; + /* + * For TX we use the same irq per + * ring we assigned for the RX + */ + cq_idx = cq_idx % priv->rx_ring_num; + rx_cq = priv->rx_cq[cq_idx]; + cq->vector = rx_cq->vector; + } - err = mlx4_cq_alloc(mdev->dev, cq->size, &cq->wqres.mtt, &mdev->priv_uar, - cq->wqres.db.dma, &cq->mcq, cq->vector, cq->is_tx, 0); - if (err) { + if (!cq->is_tx) + cq->size = priv->rx_ring[cq->ring]->actual_size; + err = mlx4_cq_alloc(mdev->dev, cq->size, &cq->wqres.mtt, + &mdev->priv_uar, cq->wqres.db.dma, &cq->mcq, + cq->vector, 0, timestamp_en); + if (err) return err; - } cq->mcq.comp = cq->is_tx ? mlx4_en_tx_irq : mlx4_en_rx_irq; cq->mcq.event = mlx4_en_cq_event; - if (cq->is_tx) { - init_timer(&cq->timer); - cq->timer.function = mlx4_en_poll_tx_cq; - cq->timer.data = (unsigned long) cq; - } + if (cq->is_tx) { + init_timer(&cq->timer); + cq->timer.function = mlx4_en_poll_tx_cq; + cq->timer.data = (unsigned long) cq; + } + return 0; } -void mlx4_en_destroy_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq) +void mlx4_en_destroy_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq **pcq) { struct mlx4_en_dev *mdev = priv->mdev; + struct mlx4_en_cq *cq = *pcq; taskqueue_drain(cq->tq, &cq->cq_task); taskqueue_free(cq->tq); mlx4_en_unmap_buffer(&cq->wqres.buf); mlx4_free_hwq_res(mdev->dev, &cq->wqres, cq->buf_size); - cq->buf_size = 0; - cq->buf = NULL; - mtx_destroy(&cq->lock.m); + if (priv->mdev->dev->caps.comp_pool && cq->vector) + mlx4_release_eq(priv->mdev->dev, cq->vector); + kfree(cq); + *pcq = NULL; } void mlx4_en_deactivate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq) { - struct mlx4_en_dev *mdev = priv->mdev; + struct mlx4_en_dev *mdev = priv->mdev; - taskqueue_drain(cq->tq, &cq->cq_task); - if (cq->is_tx) - del_timer(&cq->timer); + taskqueue_drain(cq->tq, &cq->cq_task); + if (cq->is_tx) + del_timer(&cq->timer); - mlx4_cq_free(mdev->dev, &cq->mcq); + mlx4_cq_free(mdev->dev, &cq->mcq); } + /* Set rx cq moderation parameters */ int mlx4_en_set_cq_moder(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq) { diff --git a/sys/ofed/drivers/net/mlx4/en_ethtool.c b/sys/ofed/drivers/net/mlx4/en_ethtool.c index 9279944..6569a53 100644 --- a/sys/ofed/drivers/net/mlx4/en_ethtool.c +++ b/sys/ofed/drivers/net/mlx4/en_ethtool.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007 Mellanox Technologies. All rights reserved. + * Copyright (c) 2007, 2014 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -34,26 +34,57 @@ #include <linux/kernel.h> #include <linux/ethtool.h> #include <linux/netdevice.h> -#include <linux/if_vlan.h> +#include <linux/mlx4/driver.h> +#include <linux/in.h> +#include <net/ip.h> +#include <linux/bitmap.h> #include "mlx4_en.h" #include "en_port.h" +#define EN_ETHTOOL_QP_ATTACH (1ull << 63) + +union mlx4_ethtool_flow_union { + struct ethtool_tcpip4_spec tcp_ip4_spec; + struct ethtool_tcpip4_spec udp_ip4_spec; + struct ethtool_tcpip4_spec sctp_ip4_spec; + struct ethtool_ah_espip4_spec ah_ip4_spec; + struct ethtool_ah_espip4_spec esp_ip4_spec; + struct ethtool_usrip4_spec usr_ip4_spec; + struct ethhdr ether_spec; + __u8 hdata[52]; +}; -static void mlx4_en_update_lro_stats(struct mlx4_en_priv *priv) -{ - int i; +struct mlx4_ethtool_flow_ext { + __u8 padding[2]; + unsigned char h_dest[ETH_ALEN]; + __be16 vlan_etype; + __be16 vlan_tci; + __be32 data[2]; +}; - priv->port_stats.lro_aggregated = 0; - priv->port_stats.lro_flushed = 0; - priv->port_stats.lro_no_desc = 0; +struct mlx4_ethtool_rx_flow_spec { + __u32 flow_type; + union mlx4_ethtool_flow_union h_u; + struct mlx4_ethtool_flow_ext h_ext; + union mlx4_ethtool_flow_union m_u; + struct mlx4_ethtool_flow_ext m_ext; + __u64 ring_cookie; + __u32 location; +}; - for (i = 0; i < priv->rx_ring_num; i++) { - priv->port_stats.lro_aggregated += priv->rx_ring[i].lro.stats.aggregated; - priv->port_stats.lro_flushed += priv->rx_ring[i].lro.stats.flushed; - priv->port_stats.lro_no_desc += priv->rx_ring[i].lro.stats.no_desc; - } -} +struct mlx4_ethtool_rxnfc { + __u32 cmd; + __u32 flow_type; + __u64 data; + struct mlx4_ethtool_rx_flow_spec fs; + __u32 rule_cnt; + __u32 rule_locs[0]; +}; + +#ifndef FLOW_MAC_EXT +#define FLOW_MAC_EXT 0x40000000 +#endif static void mlx4_en_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *drvinfo) @@ -61,104 +92,142 @@ mlx4_en_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *drvinfo) struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; - sprintf(drvinfo->driver, DRV_NAME " (%s)", mdev->dev->board_id); - strncpy(drvinfo->version, DRV_VERSION " (" DRV_RELDATE ")", 32); - sprintf(drvinfo->fw_version, "%d.%d.%d", + strlcpy(drvinfo->driver, DRV_NAME, sizeof(drvinfo->driver)); + strlcpy(drvinfo->version, DRV_VERSION " (" DRV_RELDATE ")", + sizeof(drvinfo->version)); + snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), + "%d.%d.%d", (u16) (mdev->dev->caps.fw_ver >> 32), (u16) ((mdev->dev->caps.fw_ver >> 16) & 0xffff), (u16) (mdev->dev->caps.fw_ver & 0xffff)); - strncpy(drvinfo->bus_info, pci_name(mdev->dev->pdev), 32); + strlcpy(drvinfo->bus_info, pci_name(mdev->dev->pdev), + sizeof(drvinfo->bus_info)); drvinfo->n_stats = 0; drvinfo->regdump_len = 0; drvinfo->eedump_len = 0; } -static u32 mlx4_en_get_tso(struct net_device *dev) -{ - return (dev->features & NETIF_F_TSO) != 0; -} - -static int mlx4_en_set_tso(struct net_device *dev, u32 data) -{ - struct mlx4_en_priv *priv = netdev_priv(dev); - - if (data) { - if (!priv->mdev->LSO_support) - return -EPERM; - dev->features |= (NETIF_F_TSO | NETIF_F_TSO6); -#ifdef HAVE_NETDEV_VLAN_FEATURES - dev->vlan_features |= (NETIF_F_TSO | NETIF_F_TSO6); -#else - if (priv->vlgrp) { - int i; - struct net_device *vdev; - for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) { - vdev = vlan_group_get_device(priv->vlgrp, i); - if (vdev) { - vdev->features |= (NETIF_F_TSO | NETIF_F_TSO6); - vlan_group_set_device(priv->vlgrp, i, vdev); - } - } - } -#endif - } else { - dev->features &= ~(NETIF_F_TSO | NETIF_F_TSO6); -#ifdef HAVE_NETDEV_VLAN_FEATURES - dev->vlan_features &= ~(NETIF_F_TSO | NETIF_F_TSO6); -#else - if (priv->vlgrp) { - int i; - struct net_device *vdev; - for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) { - vdev = vlan_group_get_device(priv->vlgrp, i); - if (vdev) { - vdev->features &= ~(NETIF_F_TSO | NETIF_F_TSO6); - vlan_group_set_device(priv->vlgrp, i, vdev); - } - } - } -#endif - } - return 0; -} - -static u32 mlx4_en_get_rx_csum(struct net_device *dev) -{ - struct mlx4_en_priv *priv = netdev_priv(dev); - return priv->rx_csum; -} - -static int mlx4_en_set_rx_csum(struct net_device *dev, u32 data) -{ - struct mlx4_en_priv *priv = netdev_priv(dev); - priv->rx_csum = (data != 0); - return 0; -} - static const char main_strings[][ETH_GSTRING_LEN] = { - "rx_packets", "tx_packets", "rx_bytes", "tx_bytes", "rx_errors", - "tx_errors", "rx_dropped", "tx_dropped", "multicast", "collisions", - "rx_length_errors", "rx_over_errors", "rx_crc_errors", - "rx_frame_errors", "rx_fifo_errors", "rx_missed_errors", - "tx_aborted_errors", "tx_carrier_errors", "tx_fifo_errors", - "tx_heartbeat_errors", "tx_window_errors", + /* packet statistics */ + "rx_packets", + "rx_bytes", + "rx_multicast_packets", + "rx_broadcast_packets", + "rx_errors", + "rx_dropped", + "rx_length_errors", + "rx_over_errors", + "rx_crc_errors", + "rx_jabbers", + "rx_in_range_length_error", + "rx_out_range_length_error", + "rx_lt_64_bytes_packets", + "rx_127_bytes_packets", + "rx_255_bytes_packets", + "rx_511_bytes_packets", + "rx_1023_bytes_packets", + "rx_1518_bytes_packets", + "rx_1522_bytes_packets", + "rx_1548_bytes_packets", + "rx_gt_1548_bytes_packets", + "tx_packets", + "tx_bytes", + "tx_multicast_packets", + "tx_broadcast_packets", + "tx_errors", + "tx_dropped", + "tx_lt_64_bytes_packets", + "tx_127_bytes_packets", + "tx_255_bytes_packets", + "tx_511_bytes_packets", + "tx_1023_bytes_packets", + "tx_1518_bytes_packets", + "tx_1522_bytes_packets", + "tx_1548_bytes_packets", + "tx_gt_1548_bytes_packets", + "rx_prio_0_packets", "rx_prio_0_bytes", + "rx_prio_1_packets", "rx_prio_1_bytes", + "rx_prio_2_packets", "rx_prio_2_bytes", + "rx_prio_3_packets", "rx_prio_3_bytes", + "rx_prio_4_packets", "rx_prio_4_bytes", + "rx_prio_5_packets", "rx_prio_5_bytes", + "rx_prio_6_packets", "rx_prio_6_bytes", + "rx_prio_7_packets", "rx_prio_7_bytes", + "rx_novlan_packets", "rx_novlan_bytes", + "tx_prio_0_packets", "tx_prio_0_bytes", + "tx_prio_1_packets", "tx_prio_1_bytes", + "tx_prio_2_packets", "tx_prio_2_bytes", + "tx_prio_3_packets", "tx_prio_3_bytes", + "tx_prio_4_packets", "tx_prio_4_bytes", + "tx_prio_5_packets", "tx_prio_5_bytes", + "tx_prio_6_packets", "tx_prio_6_bytes", + "tx_prio_7_packets", "tx_prio_7_bytes", + "tx_novlan_packets", "tx_novlan_bytes", + + /* flow control statistics */ + "rx_pause_prio_0", "rx_pause_duration_prio_0", + "rx_pause_transition_prio_0", "tx_pause_prio_0", + "tx_pause_duration_prio_0", "tx_pause_transition_prio_0", + "rx_pause_prio_1", "rx_pause_duration_prio_1", + "rx_pause_transition_prio_1", "tx_pause_prio_1", + "tx_pause_duration_prio_1", "tx_pause_transition_prio_1", + "rx_pause_prio_2", "rx_pause_duration_prio_2", + "rx_pause_transition_prio_2", "tx_pause_prio_2", + "tx_pause_duration_prio_2", "tx_pause_transition_prio_2", + "rx_pause_prio_3", "rx_pause_duration_prio_3", + "rx_pause_transition_prio_3", "tx_pause_prio_3", + "tx_pause_duration_prio_3", "tx_pause_transition_prio_3", + "rx_pause_prio_4", "rx_pause_duration_prio_4", + "rx_pause_transition_prio_4", "tx_pause_prio_4", + "tx_pause_duration_prio_4", "tx_pause_transition_prio_4", + "rx_pause_prio_5", "rx_pause_duration_prio_5", + "rx_pause_transition_prio_5", "tx_pause_prio_5", + "tx_pause_duration_prio_5", "tx_pause_transition_prio_5", + "rx_pause_prio_6", "rx_pause_duration_prio_6", + "rx_pause_transition_prio_6", "tx_pause_prio_6", + "tx_pause_duration_prio_6", "tx_pause_transition_prio_6", + "rx_pause_prio_7", "rx_pause_duration_prio_7", + "rx_pause_transition_prio_7", "tx_pause_prio_7", + "tx_pause_duration_prio_7", "tx_pause_transition_prio_7", + + /* VF statistics */ + "rx_packets", + "rx_bytes", + "rx_multicast_packets", + "rx_broadcast_packets", + "rx_errors", + "rx_dropped", + "tx_packets", + "tx_bytes", + "tx_multicast_packets", + "tx_broadcast_packets", + "tx_errors", + + /* VPort statistics */ + "vport_rx_unicast_packets", + "vport_rx_unicast_bytes", + "vport_rx_multicast_packets", + "vport_rx_multicast_bytes", + "vport_rx_broadcast_packets", + "vport_rx_broadcast_bytes", + "vport_rx_dropped", + "vport_rx_errors", + "vport_tx_unicast_packets", + "vport_tx_unicast_bytes", + "vport_tx_multicast_packets", + "vport_tx_multicast_bytes", + "vport_tx_broadcast_packets", + "vport_tx_broadcast_bytes", + "vport_tx_errors", /* port statistics */ - "lro_aggregated", "lro_flushed", "lro_no_desc", "tso_packets", - "queue_stopped", "wake_queue", "tx_timeout", "rx_alloc_failed", + "tx_tso_packets", + "tx_queue_stopped", "tx_wake_queue", "tx_timeout", "rx_alloc_failed", "rx_csum_good", "rx_csum_none", "tx_chksum_offload", - - /* packet statistics */ - "broadcast", "rx_prio_0", "rx_prio_1", "rx_prio_2", "rx_prio_3", - "rx_prio_4", "rx_prio_5", "rx_prio_6", "rx_prio_7", "tx_prio_0", - "tx_prio_1", "tx_prio_2", "tx_prio_3", "tx_prio_4", "tx_prio_5", - "tx_prio_6", "tx_prio_7", }; -#define NUM_MAIN_STATS 21 -#define NUM_ALL_STATS (NUM_MAIN_STATS + NUM_PORT_STATS + NUM_PKT_STATS + NUM_PERF_STATS) static const char mlx4_en_test_names[][ETH_GSTRING_LEN]= { - "Interupt Test", + "Interrupt Test", "Link Test", "Speed Test", "Register Test", @@ -178,56 +247,265 @@ static void mlx4_en_set_msglevel(struct net_device *dev, u32 val) static void mlx4_en_get_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) { - wol->supported = 0; - wol->wolopts = 0; + struct mlx4_en_priv *priv = netdev_priv(netdev); + int err = 0; + u64 config = 0; + u64 mask; + + if ((priv->port < 1) || (priv->port > 2)) { + en_err(priv, "Failed to get WoL information\n"); + return; + } + + mask = (priv->port == 1) ? MLX4_DEV_CAP_FLAG_WOL_PORT1 : + MLX4_DEV_CAP_FLAG_WOL_PORT2; + + if (!(priv->mdev->dev->caps.flags & mask)) { + wol->supported = 0; + wol->wolopts = 0; + return; + } + + err = mlx4_wol_read(priv->mdev->dev, &config, priv->port); + if (err) { + en_err(priv, "Failed to get WoL information\n"); + return; + } + + if (config & MLX4_EN_WOL_MAGIC) + wol->supported = WAKE_MAGIC; + else + wol->supported = 0; + + if (config & MLX4_EN_WOL_ENABLED) + wol->wolopts = WAKE_MAGIC; + else + wol->wolopts = 0; +} + +static int mlx4_en_set_wol(struct net_device *netdev, + struct ethtool_wolinfo *wol) +{ + struct mlx4_en_priv *priv = netdev_priv(netdev); + u64 config = 0; + int err = 0; + u64 mask; + + if ((priv->port < 1) || (priv->port > 2)) + return -EOPNOTSUPP; + + mask = (priv->port == 1) ? MLX4_DEV_CAP_FLAG_WOL_PORT1 : + MLX4_DEV_CAP_FLAG_WOL_PORT2; + + if (!(priv->mdev->dev->caps.flags & mask)) + return -EOPNOTSUPP; + + if (wol->supported & ~WAKE_MAGIC) + return -EINVAL; - return; + err = mlx4_wol_read(priv->mdev->dev, &config, priv->port); + if (err) { + en_err(priv, "Failed to get WoL info, unable to modify\n"); + return err; + } + + if (wol->wolopts & WAKE_MAGIC) { + config |= MLX4_EN_WOL_DO_MODIFY | MLX4_EN_WOL_ENABLED | + MLX4_EN_WOL_MAGIC; + } else { + config &= ~(MLX4_EN_WOL_ENABLED | MLX4_EN_WOL_MAGIC); + config |= MLX4_EN_WOL_DO_MODIFY; + } + + err = mlx4_wol_write(priv->mdev->dev, config, priv->port); + if (err) + en_err(priv, "Failed to set WoL information\n"); + + return err; +} + +struct bitmap_sim_iterator { + bool advance_array; + unsigned long *stats_bitmap; + unsigned int count; + unsigned int j; +}; + +static inline void bitmap_sim_iterator_init(struct bitmap_sim_iterator *h, + unsigned long *stats_bitmap, + int count) +{ + h->j = 0; + h->advance_array = !bitmap_empty(stats_bitmap, count); + h->count = h->advance_array ? bitmap_weight(stats_bitmap, count) + : count; + h->stats_bitmap = stats_bitmap; } -static int mlx4_en_get_sset_count(struct net_device *dev, int sset) +static inline int bitmap_sim_iterator_test(struct bitmap_sim_iterator *h) +{ + return !h->advance_array ? 1 : test_bit(h->j, h->stats_bitmap); +} + +static inline int bitmap_sim_iterator_inc(struct bitmap_sim_iterator *h) +{ + return h->j++; +} + +static inline unsigned int bitmap_sim_iterator_count( + struct bitmap_sim_iterator *h) +{ + return h->count; +} + +int mlx4_en_get_sset_count(struct net_device *dev, int sset) { struct mlx4_en_priv *priv = netdev_priv(dev); + struct bitmap_sim_iterator it; + + int num_of_stats = NUM_ALL_STATS - + ((priv->mdev->dev->caps.flags2 & + MLX4_DEV_CAP_FLAG2_FLOWSTATS_EN) ? 0 : NUM_FLOW_STATS); + + bitmap_sim_iterator_init(&it, priv->stats_bitmap, num_of_stats); switch (sset) { case ETH_SS_STATS: - return NUM_ALL_STATS + - (priv->tx_ring_num + priv->rx_ring_num) * 2; + return bitmap_sim_iterator_count(&it) + + (priv->tx_ring_num * 2) + +#ifdef LL_EXTENDED_STATS + (priv->rx_ring_num * 5); +#else + (priv->rx_ring_num * 2); +#endif case ETH_SS_TEST: - return MLX4_EN_NUM_SELF_TEST - !(priv->mdev->dev->caps.loopback_support) * 2; + return MLX4_EN_NUM_SELF_TEST - !(priv->mdev->dev->caps.flags + & MLX4_DEV_CAP_FLAG_UC_LOOPBACK) * 2; default: return -EOPNOTSUPP; } } -static void mlx4_en_get_ethtool_stats(struct net_device *dev, - struct ethtool_stats *stats, uint64_t *data) +void mlx4_en_get_ethtool_stats(struct net_device *dev, + struct ethtool_stats *stats, u64 *data) { struct mlx4_en_priv *priv = netdev_priv(dev); int index = 0; int i; + struct bitmap_sim_iterator it; + + int num_of_stats = NUM_ALL_STATS - + ((priv->mdev->dev->caps.flags2 & + MLX4_DEV_CAP_FLAG2_FLOWSTATS_EN) ? 0 : NUM_FLOW_STATS); + + bitmap_sim_iterator_init(&it, priv->stats_bitmap, num_of_stats); + + if (!data || !priv->port_up) + return; spin_lock_bh(&priv->stats_lock); - mlx4_en_update_lro_stats(priv); + for (i = 0; i < NUM_PKT_STATS; i++, + bitmap_sim_iterator_inc(&it)) + if (bitmap_sim_iterator_test(&it)) + data[index++] = + ((unsigned long *)&priv->pkstats)[i]; + for (i = 0; i < NUM_FLOW_STATS; i++, + bitmap_sim_iterator_inc(&it)) + if (priv->mdev->dev->caps.flags2 & + MLX4_DEV_CAP_FLAG2_FLOWSTATS_EN) + if (bitmap_sim_iterator_test(&it)) + data[index++] = + ((u64 *)&priv->flowstats)[i]; + for (i = 0; i < NUM_VF_STATS; i++, + bitmap_sim_iterator_inc(&it)) + if (bitmap_sim_iterator_test(&it)) + data[index++] = + ((unsigned long *)&priv->vf_stats)[i]; + for (i = 0; i < NUM_VPORT_STATS; i++, + bitmap_sim_iterator_inc(&it)) + if (bitmap_sim_iterator_test(&it)) + data[index++] = + ((unsigned long *)&priv->vport_stats)[i]; + for (i = 0; i < NUM_PORT_STATS; i++, + bitmap_sim_iterator_inc(&it)) + if (bitmap_sim_iterator_test(&it)) + data[index++] = + ((unsigned long *)&priv->port_stats)[i]; - for (i = 0; i < NUM_MAIN_STATS; i++) - data[index++] = ((unsigned long *) &priv->stats)[i]; - for (i = 0; i < NUM_PORT_STATS; i++) - data[index++] = ((unsigned long *) &priv->port_stats)[i]; for (i = 0; i < priv->tx_ring_num; i++) { - data[index++] = priv->tx_ring[i].packets; - data[index++] = priv->tx_ring[i].bytes; + data[index++] = priv->tx_ring[i]->packets; + data[index++] = priv->tx_ring[i]->bytes; } for (i = 0; i < priv->rx_ring_num; i++) { - data[index++] = priv->rx_ring[i].packets; - data[index++] = priv->rx_ring[i].bytes; + data[index++] = priv->rx_ring[i]->packets; + data[index++] = priv->rx_ring[i]->bytes; +#ifdef LL_EXTENDED_STATS + data[index++] = priv->rx_ring[i]->yields; + data[index++] = priv->rx_ring[i]->misses; + data[index++] = priv->rx_ring[i]->cleaned; +#endif } - for (i = 0; i < NUM_PKT_STATS; i++) - data[index++] = ((unsigned long *) &priv->pkstats)[i]; spin_unlock_bh(&priv->stats_lock); } +void mlx4_en_restore_ethtool_stats(struct mlx4_en_priv *priv, u64 *data) +{ + int index = 0; + int i; + struct bitmap_sim_iterator it; + + int num_of_stats = NUM_ALL_STATS - + ((priv->mdev->dev->caps.flags2 & + MLX4_DEV_CAP_FLAG2_FLOWSTATS_EN) ? 0 : NUM_FLOW_STATS); + + bitmap_sim_iterator_init(&it, priv->stats_bitmap, num_of_stats); + + if (!data || !priv->port_up) + return; + + spin_lock_bh(&priv->stats_lock); + + for (i = 0; i < NUM_PKT_STATS; i++, + bitmap_sim_iterator_inc(&it)) + if (bitmap_sim_iterator_test(&it)) + ((unsigned long *)&priv->pkstats)[i] = + data[index++]; + for (i = 0; i < NUM_FLOW_STATS; i++, + bitmap_sim_iterator_inc(&it)) + if (priv->mdev->dev->caps.flags2 & + MLX4_DEV_CAP_FLAG2_FLOWSTATS_EN) + if (bitmap_sim_iterator_test(&it)) + ((u64 *)&priv->flowstats)[i] = + data[index++]; + for (i = 0; i < NUM_VF_STATS; i++, + bitmap_sim_iterator_inc(&it)) + if (bitmap_sim_iterator_test(&it)) + ((unsigned long *)&priv->vf_stats)[i] = + data[index++]; + for (i = 0; i < NUM_VPORT_STATS; i++, + bitmap_sim_iterator_inc(&it)) + if (bitmap_sim_iterator_test(&it)) + ((unsigned long *)&priv->vport_stats)[i] = + data[index++]; + for (i = 0; i < NUM_PORT_STATS; i++, + bitmap_sim_iterator_inc(&it)) + if (bitmap_sim_iterator_test(&it)) + ((unsigned long *)&priv->port_stats)[i] = + data[index++]; + + for (i = 0; i < priv->tx_ring_num; i++) { + priv->tx_ring[i]->packets = data[index++]; + priv->tx_ring[i]->bytes = data[index++]; + } + for (i = 0; i < priv->rx_ring_num; i++) { + priv->rx_ring[i]->packets = data[index++]; + priv->rx_ring[i]->bytes = data[index++]; + } + spin_unlock_bh(&priv->stats_lock); +} + static void mlx4_en_self_test(struct net_device *dev, struct ethtool_test *etest, u64 *buf) { @@ -239,24 +517,47 @@ static void mlx4_en_get_strings(struct net_device *dev, { struct mlx4_en_priv *priv = netdev_priv(dev); int index = 0; - int i; + int i, k; + struct bitmap_sim_iterator it; + + int num_of_stats = NUM_ALL_STATS - + ((priv->mdev->dev->caps.flags2 & + MLX4_DEV_CAP_FLAG2_FLOWSTATS_EN) ? 0 : NUM_FLOW_STATS); + + bitmap_sim_iterator_init(&it, priv->stats_bitmap, num_of_stats); switch (stringset) { case ETH_SS_TEST: for (i = 0; i < MLX4_EN_NUM_SELF_TEST - 2; i++) strcpy(data + i * ETH_GSTRING_LEN, mlx4_en_test_names[i]); - if (priv->mdev->dev->caps.loopback_support) + if (priv->mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_UC_LOOPBACK) for (; i < MLX4_EN_NUM_SELF_TEST; i++) strcpy(data + i * ETH_GSTRING_LEN, mlx4_en_test_names[i]); break; case ETH_SS_STATS: /* Add main counters */ - for (i = 0; i < NUM_MAIN_STATS; i++) - strcpy(data + (index++) * ETH_GSTRING_LEN, main_strings[i]); - for (i = 0; i< NUM_PORT_STATS; i++) - strcpy(data + (index++) * ETH_GSTRING_LEN, - main_strings[i + NUM_MAIN_STATS]); + for (i = 0; i < NUM_PKT_STATS; i++, + bitmap_sim_iterator_inc(&it)) + if (bitmap_sim_iterator_test(&it)) + strcpy(data + (index++) * ETH_GSTRING_LEN, + main_strings[i]); + + for (k = 0; k < NUM_FLOW_STATS; k++, + bitmap_sim_iterator_inc(&it)) + if (priv->mdev->dev->caps.flags2 & + MLX4_DEV_CAP_FLAG2_FLOWSTATS_EN) + if (bitmap_sim_iterator_test(&it)) + strcpy(data + (index++) * + ETH_GSTRING_LEN, + main_strings[i + k]); + + for (; (i + k) < num_of_stats; i++, + bitmap_sim_iterator_inc(&it)) + if (bitmap_sim_iterator_test(&it)) + strcpy(data + (index++) * ETH_GSTRING_LEN, + main_strings[i + k]); + for (i = 0; i < priv->tx_ring_num; i++) { sprintf(data + (index++) * ETH_GSTRING_LEN, "tx%d_packets", i); @@ -268,32 +569,79 @@ static void mlx4_en_get_strings(struct net_device *dev, "rx%d_packets", i); sprintf(data + (index++) * ETH_GSTRING_LEN, "rx%d_bytes", i); +#ifdef LL_EXTENDED_STATS + sprintf(data + (index++) * ETH_GSTRING_LEN, + "rx%d_napi_yield", i); + sprintf(data + (index++) * ETH_GSTRING_LEN, + "rx%d_misses", i); + sprintf(data + (index++) * ETH_GSTRING_LEN, + "rx%d_cleaned", i); +#endif } - for (i = 0; i< NUM_PKT_STATS; i++) - strcpy(data + (index++) * ETH_GSTRING_LEN, - main_strings[i + NUM_MAIN_STATS + NUM_PORT_STATS]); break; } } +static u32 mlx4_en_autoneg_get(struct net_device *dev) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + u32 autoneg = AUTONEG_DISABLE; + + if ((mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ETH_BACKPL_AN_REP) && + priv->port_state.autoneg) { + autoneg = AUTONEG_ENABLE; + } + + return autoneg; +} + static int mlx4_en_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) { struct mlx4_en_priv *priv = netdev_priv(dev); int trans_type; - cmd->autoneg = AUTONEG_DISABLE; - cmd->supported = SUPPORTED_10000baseT_Full; - cmd->advertising = ADVERTISED_10000baseT_Full; + /* SUPPORTED_1000baseT_Half isn't supported */ + cmd->supported = SUPPORTED_1000baseT_Full + |SUPPORTED_10000baseT_Full; + + cmd->advertising = ADVERTISED_1000baseT_Full + |ADVERTISED_10000baseT_Full; + + cmd->supported |= SUPPORTED_1000baseKX_Full + |SUPPORTED_10000baseKX4_Full + |SUPPORTED_10000baseKR_Full + |SUPPORTED_10000baseR_FEC + |SUPPORTED_40000baseKR4_Full + |SUPPORTED_40000baseCR4_Full + |SUPPORTED_40000baseSR4_Full + |SUPPORTED_40000baseLR4_Full; + + /* ADVERTISED_1000baseT_Half isn't advertised */ + cmd->advertising |= ADVERTISED_1000baseKX_Full + |ADVERTISED_10000baseKX4_Full + |ADVERTISED_10000baseKR_Full + |ADVERTISED_10000baseR_FEC + |ADVERTISED_40000baseKR4_Full + |ADVERTISED_40000baseCR4_Full + |ADVERTISED_40000baseSR4_Full + |ADVERTISED_40000baseLR4_Full; if (mlx4_en_QUERY_PORT(priv->mdev, priv->port)) return -ENOMEM; + cmd->autoneg = mlx4_en_autoneg_get(dev); + if (cmd->autoneg == AUTONEG_ENABLE) { + cmd->supported |= SUPPORTED_Autoneg; + cmd->advertising |= ADVERTISED_Autoneg; + } + trans_type = priv->port_state.transciver; if (netif_carrier_ok(dev)) { - cmd->speed = priv->port_state.link_speed; + ethtool_cmd_speed_set(cmd, priv->port_state.link_speed); cmd->duplex = DUPLEX_FULL; } else { - cmd->speed = -1; + ethtool_cmd_speed_set(cmd, -1); cmd->duplex = -1; } @@ -314,13 +662,38 @@ static int mlx4_en_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) return 0; } +static const char *mlx4_en_duplex_to_string(int duplex) +{ + switch (duplex) { + case DUPLEX_FULL: + return "FULL"; + case DUPLEX_HALF: + return "HALF"; + default: + break; + } + return "UNKNOWN"; +} + static int mlx4_en_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) { - if ((cmd->autoneg == AUTONEG_ENABLE) || - (cmd->speed != SPEED_10000) || (cmd->duplex != DUPLEX_FULL)) - return -EINVAL; + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_port_state *port_state = &priv->port_state; + + if ((cmd->autoneg != port_state->autoneg) || + (ethtool_cmd_speed(cmd) != port_state->link_speed) || + (cmd->duplex != DUPLEX_FULL)) { + en_info(priv, "Changing port state properties (auto-negotiation" + " , speed/duplex) is not supported. Current:" + " auto-negotiation=%d speed/duplex=%d/%s\n", + port_state->autoneg, port_state->link_speed, + mlx4_en_duplex_to_string(DUPLEX_FULL)); + return -EOPNOTSUPP; + } - /* Nothing to change */ + /* User provided same port state properties that are currently set. + * Nothing to change + */ return 0; } @@ -329,8 +702,8 @@ static int mlx4_en_get_coalesce(struct net_device *dev, { struct mlx4_en_priv *priv = netdev_priv(dev); - coal->tx_coalesce_usecs = 0; - coal->tx_max_coalesced_frames = 0; + coal->tx_coalesce_usecs = priv->tx_usecs; + coal->tx_max_coalesced_frames = priv->tx_frames; coal->rx_coalesce_usecs = priv->rx_usecs; coal->rx_max_coalesced_frames = priv->rx_frames; @@ -359,6 +732,21 @@ static int mlx4_en_set_coalesce(struct net_device *dev, MLX4_EN_RX_COAL_TIME : coal->rx_coalesce_usecs; + /* Setting TX coalescing parameters */ + if (coal->tx_coalesce_usecs != priv->tx_usecs || + coal->tx_max_coalesced_frames != priv->tx_frames) { + priv->tx_usecs = coal->tx_coalesce_usecs; + priv->tx_frames = coal->tx_max_coalesced_frames; + if (priv->port_up) { + for (i = 0; i < priv->tx_ring_num; i++) { + priv->tx_cq[i]->moder_cnt = priv->tx_frames; + priv->tx_cq[i]->moder_time = priv->tx_usecs; + if (mlx4_en_set_cq_moder(priv, priv->tx_cq[i])) + en_warn(priv, "Failed changing moderation for TX cq %d\n", i); + } + } + } + /* Set adaptive coalescing params */ priv->pkt_rate_low = coal->pkt_rate_low; priv->rx_usecs_low = coal->rx_coalesce_usecs_low; @@ -369,14 +757,17 @@ static int mlx4_en_set_coalesce(struct net_device *dev, if (priv->adaptive_rx_coal) return 0; - for (i = 0; i < priv->rx_ring_num; i++) { - priv->rx_cq[i].moder_cnt = priv->rx_frames; - priv->rx_cq[i].moder_time = priv->rx_usecs; - priv->last_moder_time[i] = MLX4_EN_AUTO_CONF; - err = mlx4_en_set_cq_moder(priv, &priv->rx_cq[i]); - if (err) - return err; + if (priv->port_up) { + for (i = 0; i < priv->rx_ring_num; i++) { + priv->rx_cq[i]->moder_cnt = priv->rx_frames; + priv->rx_cq[i]->moder_time = priv->rx_usecs; + priv->last_moder_time[i] = MLX4_EN_AUTO_CONF; + err = mlx4_en_set_cq_moder(priv, priv->rx_cq[i]); + if (err) + return err; + } } + return 0; } @@ -387,10 +778,13 @@ static int mlx4_en_set_pauseparam(struct net_device *dev, struct mlx4_en_dev *mdev = priv->mdev; int err; + if (pause->autoneg) + return -EOPNOTSUPP; + priv->prof->tx_pause = pause->tx_pause != 0; priv->prof->rx_pause = pause->rx_pause != 0; err = mlx4_SET_PORT_general(mdev->dev, priv->port, - priv->rx_mb_size + ETH_FCS_LEN, + priv->rx_skb_size + ETH_FCS_LEN, priv->prof->tx_pause, priv->prof->tx_ppp, priv->prof->rx_pause, @@ -408,6 +802,41 @@ static void mlx4_en_get_pauseparam(struct net_device *dev, pause->tx_pause = priv->prof->tx_pause; pause->rx_pause = priv->prof->rx_pause; + pause->autoneg = mlx4_en_autoneg_get(dev); +} + +/* rtnl lock must be taken before calling */ +int mlx4_en_pre_config(struct mlx4_en_priv *priv) +{ +#ifdef CONFIG_RFS_ACCEL + struct cpu_rmap *rmap; + + if (!priv->dev->rx_cpu_rmap) + return 0; + + /* Disable RFS events + * Must have all RFS jobs flushed before freeing resources + */ + rmap = priv->dev->rx_cpu_rmap; + priv->dev->rx_cpu_rmap = NULL; + + rtnl_unlock(); + free_irq_cpu_rmap(rmap); + rtnl_lock(); + + if (priv->dev->rx_cpu_rmap) + return -EBUSY; /* another configuration completed while lock + * was free + */ + + /* Make sure all currently running filter_work are being processed + * Other work will return immediatly because of disable_rfs + */ + flush_workqueue(priv->mdev->workqueue); + +#endif + + return 0; } static int mlx4_en_set_ringparam(struct net_device *dev, @@ -418,7 +847,11 @@ static int mlx4_en_set_ringparam(struct net_device *dev, u32 rx_size, tx_size; int port_up = 0; int err = 0; - int i; + int i, n_stats; + u64 *data = NULL; + + if (!priv->port_up) + return -ENOMEM; if (param->rx_jumbo_pending || param->rx_mini_pending) return -EINVAL; @@ -430,10 +863,13 @@ static int mlx4_en_set_ringparam(struct net_device *dev, tx_size = max_t(u32, tx_size, MLX4_EN_MIN_TX_SIZE); tx_size = min_t(u32, tx_size, MLX4_EN_MAX_TX_SIZE); - if (rx_size == (priv->port_up ? priv->rx_ring[0].actual_size : - priv->rx_ring[0].size) && - tx_size == priv->tx_ring[0].size) + if (rx_size == (priv->port_up ? priv->rx_ring[0]->actual_size : + priv->rx_ring[0]->size) && + tx_size == priv->tx_ring[0]->size) return 0; + err = mlx4_en_pre_config(priv); + if (err) + return err; mutex_lock(&mdev->state_lock); if (priv->port_up) { @@ -441,6 +877,14 @@ static int mlx4_en_set_ringparam(struct net_device *dev, mlx4_en_stop_port(dev); } + /* Cache port statistics */ + n_stats = mlx4_en_get_sset_count(dev, ETH_SS_STATS); + if (n_stats > 0) { + data = kmalloc(n_stats * sizeof(u64), GFP_KERNEL); + if (data) + mlx4_en_get_ethtool_stats(dev, NULL, data); + } + mlx4_en_free_resources(priv); priv->prof->tx_ring_size = tx_size; @@ -451,22 +895,30 @@ static int mlx4_en_set_ringparam(struct net_device *dev, en_err(priv, "Failed reallocating port resources\n"); goto out; } + + /* Restore port statistics */ + if (n_stats > 0 && data) + mlx4_en_restore_ethtool_stats(priv, data); + if (port_up) { err = mlx4_en_start_port(dev); - if (err) + if (err) { en_err(priv, "Failed starting port\n"); - } - - for (i = 0; i < priv->rx_ring_num; i++) { - priv->rx_cq[i].moder_cnt = priv->rx_frames; - priv->rx_cq[i].moder_time = priv->rx_usecs; - priv->last_moder_time[i] = MLX4_EN_AUTO_CONF; - err = mlx4_en_set_cq_moder(priv, &priv->rx_cq[i]); - if (err) goto out; + } + + for (i = 0; i < priv->rx_ring_num; i++) { + priv->rx_cq[i]->moder_cnt = priv->rx_frames; + priv->rx_cq[i]->moder_time = priv->rx_usecs; + priv->last_moder_time[i] = MLX4_EN_AUTO_CONF; + err = mlx4_en_set_cq_moder(priv, priv->rx_cq[i]); + if (err) + goto out; + } } out: + kfree(data); mutex_unlock(&mdev->state_lock); return err; } @@ -476,29 +928,665 @@ static void mlx4_en_get_ringparam(struct net_device *dev, { struct mlx4_en_priv *priv = netdev_priv(dev); + if (!priv->port_up) + return; + memset(param, 0, sizeof(*param)); param->rx_max_pending = MLX4_EN_MAX_RX_SIZE; param->tx_max_pending = MLX4_EN_MAX_TX_SIZE; param->rx_pending = priv->port_up ? - priv->rx_ring[0].actual_size : priv->rx_ring[0].size; - param->tx_pending = priv->tx_ring[0].size; + priv->rx_ring[0]->actual_size : priv->rx_ring[0]->size; + param->tx_pending = priv->tx_ring[0]->size; +} + +static u32 mlx4_en_get_rxfh_indir_size(struct net_device *dev) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + + return priv->rx_ring_num; +} + +static int mlx4_en_get_rxfh_indir(struct net_device *dev, u32 *ring_index) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_rss_map *rss_map = &priv->rss_map; + int rss_rings; + size_t n = priv->rx_ring_num; + int err = 0; + + rss_rings = priv->prof->rss_rings ?: priv->rx_ring_num; + rss_rings = 1 << ilog2(rss_rings); + + while (n--) { + ring_index[n] = rss_map->qps[n % rss_rings].qpn - + rss_map->base_qpn; + } + + return err; +} + +static int mlx4_en_set_rxfh_indir(struct net_device *dev, + const u32 *ring_index) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + int port_up = 0; + int err = 0; + int i; + int rss_rings = 0; + + /* Calculate RSS table size and make sure flows are spread evenly + * between rings + */ + for (i = 0; i < priv->rx_ring_num; i++) { + if (i > 0 && !ring_index[i] && !rss_rings) + rss_rings = i; + + if (ring_index[i] != (i % (rss_rings ?: priv->rx_ring_num))) + return -EINVAL; + } + + if (!rss_rings) + rss_rings = priv->rx_ring_num; + + /* RSS table size must be an order of 2 */ + if (!is_power_of_2(rss_rings)) + return -EINVAL; + + mutex_lock(&mdev->state_lock); + if (priv->port_up) { + port_up = 1; + mlx4_en_stop_port(dev); + } + + priv->prof->rss_rings = rss_rings; + + if (port_up) { + err = mlx4_en_start_port(dev); + if (err) + en_err(priv, "Failed starting port\n"); + } + + mutex_unlock(&mdev->state_lock); + return err; +} + +#define all_zeros_or_all_ones(field) \ + ((field) == 0 || (field) == (__force typeof(field))-1) + +static int mlx4_en_validate_flow(struct net_device *dev, + struct mlx4_ethtool_rxnfc *cmd) +{ + struct ethtool_usrip4_spec *l3_mask; + struct ethtool_tcpip4_spec *l4_mask; + struct ethhdr *eth_mask; + + if (cmd->fs.location >= MAX_NUM_OF_FS_RULES) + return -EINVAL; + + if (cmd->fs.flow_type & FLOW_MAC_EXT) { + /* dest mac mask must be ff:ff:ff:ff:ff:ff */ + if (!is_broadcast_ether_addr(cmd->fs.m_ext.h_dest)) + return -EINVAL; + } + + switch (cmd->fs.flow_type & ~(FLOW_EXT | FLOW_MAC_EXT)) { + case TCP_V4_FLOW: + case UDP_V4_FLOW: + if (cmd->fs.m_u.tcp_ip4_spec.tos) + return -EINVAL; + l4_mask = &cmd->fs.m_u.tcp_ip4_spec; + /* don't allow mask which isn't all 0 or 1 */ + if (!all_zeros_or_all_ones(l4_mask->ip4src) || + !all_zeros_or_all_ones(l4_mask->ip4dst) || + !all_zeros_or_all_ones(l4_mask->psrc) || + !all_zeros_or_all_ones(l4_mask->pdst)) + return -EINVAL; + break; + case IP_USER_FLOW: + l3_mask = &cmd->fs.m_u.usr_ip4_spec; + if (l3_mask->l4_4_bytes || l3_mask->tos || l3_mask->proto || + cmd->fs.h_u.usr_ip4_spec.ip_ver != ETH_RX_NFC_IP4 || + (!l3_mask->ip4src && !l3_mask->ip4dst) || + !all_zeros_or_all_ones(l3_mask->ip4src) || + !all_zeros_or_all_ones(l3_mask->ip4dst)) + return -EINVAL; + break; + case ETHER_FLOW: + eth_mask = &cmd->fs.m_u.ether_spec; + /* source mac mask must not be set */ + if (!is_zero_ether_addr(eth_mask->h_source)) + return -EINVAL; + + /* dest mac mask must be ff:ff:ff:ff:ff:ff */ + if (!is_broadcast_ether_addr(eth_mask->h_dest)) + return -EINVAL; + + if (!all_zeros_or_all_ones(eth_mask->h_proto)) + return -EINVAL; + break; + default: + return -EINVAL; + } + + if ((cmd->fs.flow_type & FLOW_EXT)) { + if (cmd->fs.m_ext.vlan_etype || + !(cmd->fs.m_ext.vlan_tci == 0 || + cmd->fs.m_ext.vlan_tci == cpu_to_be16(0xfff))) + return -EINVAL; + if (cmd->fs.m_ext.vlan_tci) { + if (be16_to_cpu(cmd->fs.h_ext.vlan_tci) < + VLAN_MIN_VALUE || + be16_to_cpu(cmd->fs.h_ext.vlan_tci) > + VLAN_MAX_VALUE) + return -EINVAL; + } + } + + return 0; +} + +static int mlx4_en_ethtool_add_mac_rule(struct mlx4_ethtool_rxnfc *cmd, + struct list_head *rule_list_h, + struct mlx4_spec_list *spec_l2, + unsigned char *mac) +{ + int err = 0; + __be64 mac_msk = cpu_to_be64(MLX4_MAC_MASK << 16); + + spec_l2->id = MLX4_NET_TRANS_RULE_ID_ETH; + memcpy(spec_l2->eth.dst_mac_msk, &mac_msk, ETH_ALEN); + memcpy(spec_l2->eth.dst_mac, mac, ETH_ALEN); + + if ((cmd->fs.flow_type & FLOW_EXT) && cmd->fs.m_ext.vlan_tci) { + spec_l2->eth.vlan_id = cmd->fs.h_ext.vlan_tci; + spec_l2->eth.vlan_id_msk = cpu_to_be16(0xfff); + } + + list_add_tail(&spec_l2->list, rule_list_h); + + return err; +} + +static int mlx4_en_ethtool_add_mac_rule_by_ipv4(struct mlx4_en_priv *priv, + struct mlx4_ethtool_rxnfc *cmd, + struct list_head *rule_list_h, + struct mlx4_spec_list *spec_l2, + __be32 ipv4_dst) +{ + unsigned char mac[ETH_ALEN]; + + if (!ipv4_is_multicast(ipv4_dst)) { + if (cmd->fs.flow_type & FLOW_MAC_EXT) + memcpy(&mac, cmd->fs.h_ext.h_dest, ETH_ALEN); + else + memcpy(&mac, priv->dev->dev_addr, ETH_ALEN); + } else { + ip_eth_mc_map(ipv4_dst, mac); + } + + return mlx4_en_ethtool_add_mac_rule(cmd, rule_list_h, spec_l2, &mac[0]); +} + +static int add_ip_rule(struct mlx4_en_priv *priv, + struct mlx4_ethtool_rxnfc *cmd, + struct list_head *list_h) +{ + struct mlx4_spec_list *spec_l2 = NULL; + struct mlx4_spec_list *spec_l3 = NULL; + struct ethtool_usrip4_spec *l3_mask = &cmd->fs.m_u.usr_ip4_spec; + + spec_l3 = kzalloc(sizeof(*spec_l3), GFP_KERNEL); + spec_l2 = kzalloc(sizeof(*spec_l2), GFP_KERNEL); + if (!spec_l2 || !spec_l3) { + en_err(priv, "Fail to alloc ethtool rule.\n"); + kfree(spec_l2); + kfree(spec_l3); + return -ENOMEM; + } + + mlx4_en_ethtool_add_mac_rule_by_ipv4(priv, cmd, list_h, spec_l2, + cmd->fs.h_u. + usr_ip4_spec.ip4dst); + spec_l3->id = MLX4_NET_TRANS_RULE_ID_IPV4; + spec_l3->ipv4.src_ip = cmd->fs.h_u.usr_ip4_spec.ip4src; + if (l3_mask->ip4src) + spec_l3->ipv4.src_ip_msk = MLX4_BE_WORD_MASK; + spec_l3->ipv4.dst_ip = cmd->fs.h_u.usr_ip4_spec.ip4dst; + if (l3_mask->ip4dst) + spec_l3->ipv4.dst_ip_msk = MLX4_BE_WORD_MASK; + list_add_tail(&spec_l3->list, list_h); + + return 0; +} + +static int add_tcp_udp_rule(struct mlx4_en_priv *priv, + struct mlx4_ethtool_rxnfc *cmd, + struct list_head *list_h, int proto) +{ + struct mlx4_spec_list *spec_l2 = NULL; + struct mlx4_spec_list *spec_l3 = NULL; + struct mlx4_spec_list *spec_l4 = NULL; + struct ethtool_tcpip4_spec *l4_mask = &cmd->fs.m_u.tcp_ip4_spec; + + spec_l2 = kzalloc(sizeof(*spec_l2), GFP_KERNEL); + spec_l3 = kzalloc(sizeof(*spec_l3), GFP_KERNEL); + spec_l4 = kzalloc(sizeof(*spec_l4), GFP_KERNEL); + if (!spec_l2 || !spec_l3 || !spec_l4) { + en_err(priv, "Fail to alloc ethtool rule.\n"); + kfree(spec_l2); + kfree(spec_l3); + kfree(spec_l4); + return -ENOMEM; + } + + spec_l3->id = MLX4_NET_TRANS_RULE_ID_IPV4; + + if (proto == TCP_V4_FLOW) { + mlx4_en_ethtool_add_mac_rule_by_ipv4(priv, cmd, list_h, + spec_l2, + cmd->fs.h_u. + tcp_ip4_spec.ip4dst); + spec_l4->id = MLX4_NET_TRANS_RULE_ID_TCP; + spec_l3->ipv4.src_ip = cmd->fs.h_u.tcp_ip4_spec.ip4src; + spec_l3->ipv4.dst_ip = cmd->fs.h_u.tcp_ip4_spec.ip4dst; + spec_l4->tcp_udp.src_port = cmd->fs.h_u.tcp_ip4_spec.psrc; + spec_l4->tcp_udp.dst_port = cmd->fs.h_u.tcp_ip4_spec.pdst; + } else { + mlx4_en_ethtool_add_mac_rule_by_ipv4(priv, cmd, list_h, + spec_l2, + cmd->fs.h_u. + udp_ip4_spec.ip4dst); + spec_l4->id = MLX4_NET_TRANS_RULE_ID_UDP; + spec_l3->ipv4.src_ip = cmd->fs.h_u.udp_ip4_spec.ip4src; + spec_l3->ipv4.dst_ip = cmd->fs.h_u.udp_ip4_spec.ip4dst; + spec_l4->tcp_udp.src_port = cmd->fs.h_u.udp_ip4_spec.psrc; + spec_l4->tcp_udp.dst_port = cmd->fs.h_u.udp_ip4_spec.pdst; + } + + if (l4_mask->ip4src) + spec_l3->ipv4.src_ip_msk = MLX4_BE_WORD_MASK; + if (l4_mask->ip4dst) + spec_l3->ipv4.dst_ip_msk = MLX4_BE_WORD_MASK; + + if (l4_mask->psrc) + spec_l4->tcp_udp.src_port_msk = MLX4_BE_SHORT_MASK; + if (l4_mask->pdst) + spec_l4->tcp_udp.dst_port_msk = MLX4_BE_SHORT_MASK; + + list_add_tail(&spec_l3->list, list_h); + list_add_tail(&spec_l4->list, list_h); + + return 0; +} + +static int mlx4_en_ethtool_to_net_trans_rule(struct net_device *dev, + struct mlx4_ethtool_rxnfc *cmd, + struct list_head *rule_list_h) +{ + int err; + struct ethhdr *eth_spec; + struct mlx4_spec_list *spec_l2; + struct mlx4_en_priv *priv = netdev_priv(dev); + + err = mlx4_en_validate_flow(dev, cmd); + if (err) + return err; + + switch (cmd->fs.flow_type & ~(FLOW_EXT | FLOW_MAC_EXT)) { + case ETHER_FLOW: + spec_l2 = kzalloc(sizeof(*spec_l2), GFP_KERNEL); + if (!spec_l2) + return -ENOMEM; + + eth_spec = &cmd->fs.h_u.ether_spec; + mlx4_en_ethtool_add_mac_rule(cmd, rule_list_h, spec_l2, ð_spec->h_dest[0]); + spec_l2->eth.ether_type = eth_spec->h_proto; + if (eth_spec->h_proto) + spec_l2->eth.ether_type_enable = 1; + break; + case IP_USER_FLOW: + err = add_ip_rule(priv, cmd, rule_list_h); + break; + case TCP_V4_FLOW: + err = add_tcp_udp_rule(priv, cmd, rule_list_h, TCP_V4_FLOW); + break; + case UDP_V4_FLOW: + err = add_tcp_udp_rule(priv, cmd, rule_list_h, UDP_V4_FLOW); + break; + } + + return err; +} + +static int mlx4_en_flow_replace(struct net_device *dev, + struct mlx4_ethtool_rxnfc *cmd) +{ + int err; + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + struct ethtool_flow_id *loc_rule; + struct mlx4_spec_list *spec, *tmp_spec; + u32 qpn; + u64 reg_id; + + struct mlx4_net_trans_rule rule = { + .queue_mode = MLX4_NET_TRANS_Q_FIFO, + .exclusive = 0, + .allow_loopback = 1, + .promisc_mode = MLX4_FS_REGULAR, + }; + + rule.port = priv->port; + rule.priority = MLX4_DOMAIN_ETHTOOL | cmd->fs.location; + INIT_LIST_HEAD(&rule.list); + + /* Allow direct QP attaches if the EN_ETHTOOL_QP_ATTACH flag is set */ + if (cmd->fs.ring_cookie == RX_CLS_FLOW_DISC) + qpn = priv->drop_qp.qpn; + else if (cmd->fs.ring_cookie & EN_ETHTOOL_QP_ATTACH) { + qpn = cmd->fs.ring_cookie & (EN_ETHTOOL_QP_ATTACH - 1); + } else { + if (cmd->fs.ring_cookie >= priv->rx_ring_num) { + en_warn(priv, "rxnfc: RX ring (%llu) doesn't exist.\n", + cmd->fs.ring_cookie); + return -EINVAL; + } + qpn = priv->rss_map.qps[cmd->fs.ring_cookie].qpn; + if (!qpn) { + en_warn(priv, "rxnfc: RX ring (%llu) is inactive.\n", + cmd->fs.ring_cookie); + return -EINVAL; + } + } + rule.qpn = qpn; + err = mlx4_en_ethtool_to_net_trans_rule(dev, cmd, &rule.list); + if (err) + goto out_free_list; + + mutex_lock(&mdev->state_lock); + loc_rule = &priv->ethtool_rules[cmd->fs.location]; + if (loc_rule->id) { + err = mlx4_flow_detach(priv->mdev->dev, loc_rule->id); + if (err) { + en_err(priv, "Fail to detach network rule at location %d. registration id = %llx\n", + cmd->fs.location, loc_rule->id); + goto unlock; + } + loc_rule->id = 0; + memset(&loc_rule->flow_spec, 0, + sizeof(struct ethtool_rx_flow_spec)); + list_del(&loc_rule->list); + } + err = mlx4_flow_attach(priv->mdev->dev, &rule, ®_id); + if (err) { + en_err(priv, "Fail to attach network rule at location %d.\n", + cmd->fs.location); + goto unlock; + } + loc_rule->id = reg_id; + memcpy(&loc_rule->flow_spec, &cmd->fs, + sizeof(struct ethtool_rx_flow_spec)); + list_add_tail(&loc_rule->list, &priv->ethtool_list); + +unlock: + mutex_unlock(&mdev->state_lock); +out_free_list: + list_for_each_entry_safe(spec, tmp_spec, &rule.list, list) { + list_del(&spec->list); + kfree(spec); + } + return err; +} + +static int mlx4_en_flow_detach(struct net_device *dev, + struct mlx4_ethtool_rxnfc *cmd) +{ + int err = 0; + struct ethtool_flow_id *rule; + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + + if (cmd->fs.location >= MAX_NUM_OF_FS_RULES) + return -EINVAL; + + mutex_lock(&mdev->state_lock); + rule = &priv->ethtool_rules[cmd->fs.location]; + if (!rule->id) { + err = -ENOENT; + goto out; + } + + err = mlx4_flow_detach(priv->mdev->dev, rule->id); + if (err) { + en_err(priv, "Fail to detach network rule at location %d. registration id = 0x%llx\n", + cmd->fs.location, rule->id); + goto out; + } + rule->id = 0; + memset(&rule->flow_spec, 0, sizeof(struct ethtool_rx_flow_spec)); + + list_del(&rule->list); +out: + mutex_unlock(&mdev->state_lock); + return err; + +} + +static int mlx4_en_get_flow(struct net_device *dev, struct mlx4_ethtool_rxnfc *cmd, + int loc) +{ + int err = 0; + struct ethtool_flow_id *rule; + struct mlx4_en_priv *priv = netdev_priv(dev); + + if (loc < 0 || loc >= MAX_NUM_OF_FS_RULES) + return -EINVAL; + + rule = &priv->ethtool_rules[loc]; + if (rule->id) + memcpy(&cmd->fs, &rule->flow_spec, + sizeof(struct ethtool_rx_flow_spec)); + else + err = -ENOENT; + + return err; +} + +static int mlx4_en_get_num_flows(struct mlx4_en_priv *priv) +{ + + int i, res = 0; + for (i = 0; i < MAX_NUM_OF_FS_RULES; i++) { + if (priv->ethtool_rules[i].id) + res++; + } + return res; + +} + +static int mlx4_en_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *c, + u32 *rule_locs) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + int err = 0; + int i = 0, priority = 0; + struct mlx4_ethtool_rxnfc *cmd = (struct mlx4_ethtool_rxnfc *)c; + + if ((cmd->cmd == ETHTOOL_GRXCLSRLCNT || + cmd->cmd == ETHTOOL_GRXCLSRULE || + cmd->cmd == ETHTOOL_GRXCLSRLALL) && + (mdev->dev->caps.steering_mode != + MLX4_STEERING_MODE_DEVICE_MANAGED || !priv->port_up)) + return -EINVAL; + + switch (cmd->cmd) { + case ETHTOOL_GRXRINGS: + cmd->data = priv->rx_ring_num; + break; + case ETHTOOL_GRXCLSRLCNT: + cmd->rule_cnt = mlx4_en_get_num_flows(priv); + break; + case ETHTOOL_GRXCLSRULE: + err = mlx4_en_get_flow(dev, cmd, cmd->fs.location); + break; + case ETHTOOL_GRXCLSRLALL: + while ((!err || err == -ENOENT) && priority < cmd->rule_cnt) { + err = mlx4_en_get_flow(dev, cmd, i); + if (!err) + rule_locs[priority++] = i; + i++; + } + err = 0; + break; + default: + err = -EOPNOTSUPP; + break; + } + + return err; +} + +static int mlx4_en_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *c) +{ + int err = 0; + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + struct mlx4_ethtool_rxnfc *cmd = (struct mlx4_ethtool_rxnfc *)c; + + if (mdev->dev->caps.steering_mode != + MLX4_STEERING_MODE_DEVICE_MANAGED || !priv->port_up) + return -EINVAL; + + switch (cmd->cmd) { + case ETHTOOL_SRXCLSRLINS: + err = mlx4_en_flow_replace(dev, cmd); + break; + case ETHTOOL_SRXCLSRLDEL: + err = mlx4_en_flow_detach(dev, cmd); + break; + default: + en_warn(priv, "Unsupported ethtool command. (%d)\n", cmd->cmd); + return -EINVAL; + } + + return err; +} + +static void mlx4_en_get_channels(struct net_device *dev, + struct ethtool_channels *channel) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + + memset(channel, 0, sizeof(*channel)); + + channel->max_rx = MAX_RX_RINGS; + channel->max_tx = MLX4_EN_MAX_TX_RING_P_UP; + + channel->rx_count = priv->rx_ring_num; + channel->tx_count = priv->tx_ring_num / MLX4_EN_NUM_UP; +} + +static int mlx4_en_set_channels(struct net_device *dev, + struct ethtool_channels *channel) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + int port_up = 0; + int i; + int err = 0; + + if (channel->other_count || channel->combined_count || + channel->tx_count > MLX4_EN_MAX_TX_RING_P_UP || + channel->rx_count > MAX_RX_RINGS || + !channel->tx_count || !channel->rx_count) + return -EINVAL; + + err = mlx4_en_pre_config(priv); + if (err) + return err; + + mutex_lock(&mdev->state_lock); + if (priv->port_up) { + port_up = 1; + mlx4_en_stop_port(dev); + } + + mlx4_en_free_resources(priv); + + priv->num_tx_rings_p_up = channel->tx_count; + priv->tx_ring_num = channel->tx_count * MLX4_EN_NUM_UP; + priv->rx_ring_num = channel->rx_count; + + err = mlx4_en_alloc_resources(priv); + if (err) { + en_err(priv, "Failed reallocating port resources\n"); + goto out; + } + + netif_set_real_num_tx_queues(dev, priv->tx_ring_num); + netif_set_real_num_rx_queues(dev, priv->rx_ring_num); + + mlx4_en_setup_tc(dev, MLX4_EN_NUM_UP); + + en_warn(priv, "Using %d TX rings\n", priv->tx_ring_num); + en_warn(priv, "Using %d RX rings\n", priv->rx_ring_num); + + if (port_up) { + err = mlx4_en_start_port(dev); + if (err) + en_err(priv, "Failed starting port\n"); + + for (i = 0; i < priv->rx_ring_num; i++) { + priv->rx_cq[i]->moder_cnt = priv->rx_frames; + priv->rx_cq[i]->moder_time = priv->rx_usecs; + priv->last_moder_time[i] = MLX4_EN_AUTO_CONF; + err = mlx4_en_set_cq_moder(priv, priv->rx_cq[i]); + if (err) + goto out; + } + } + +out: + mutex_unlock(&mdev->state_lock); + return err; +} + +static int mlx4_en_get_ts_info(struct net_device *dev, + struct ethtool_ts_info *info) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + int ret; + + ret = ethtool_op_get_ts_info(dev, info); + if (ret) + return ret; + + if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS) { + info->so_timestamping |= + SOF_TIMESTAMPING_TX_HARDWARE | + SOF_TIMESTAMPING_RX_HARDWARE | + SOF_TIMESTAMPING_RAW_HARDWARE; + + info->tx_types = + (1 << HWTSTAMP_TX_OFF) | + (1 << HWTSTAMP_TX_ON); + + info->rx_filters = + (1 << HWTSTAMP_FILTER_NONE) | + (1 << HWTSTAMP_FILTER_ALL); + } + + return ret; } const struct ethtool_ops mlx4_en_ethtool_ops = { .get_drvinfo = mlx4_en_get_drvinfo, .get_settings = mlx4_en_get_settings, .set_settings = mlx4_en_set_settings, -#ifdef NETIF_F_TSO - .get_tso = mlx4_en_get_tso, - .set_tso = mlx4_en_set_tso, -#endif - .get_sg = ethtool_op_get_sg, - .set_sg = ethtool_op_set_sg, .get_link = ethtool_op_get_link, - .get_rx_csum = mlx4_en_get_rx_csum, - .set_rx_csum = mlx4_en_set_rx_csum, - .get_tx_csum = ethtool_op_get_tx_csum, - .set_tx_csum = ethtool_op_set_tx_ipv6_csum, .get_strings = mlx4_en_get_strings, .get_sset_count = mlx4_en_get_sset_count, .get_ethtool_stats = mlx4_en_get_ethtool_stats, @@ -513,8 +1601,14 @@ const struct ethtool_ops mlx4_en_ethtool_ops = { .set_pauseparam = mlx4_en_set_pauseparam, .get_ringparam = mlx4_en_get_ringparam, .set_ringparam = mlx4_en_set_ringparam, - .get_flags = ethtool_op_get_flags, - .set_flags = ethtool_op_set_flags, + .get_rxnfc = mlx4_en_get_rxnfc, + .set_rxnfc = mlx4_en_set_rxnfc, + .get_rxfh_indir_size = mlx4_en_get_rxfh_indir_size, + .get_rxfh_indir = mlx4_en_get_rxfh_indir, + .set_rxfh_indir = mlx4_en_set_rxfh_indir, + .get_channels = mlx4_en_get_channels, + .set_channels = mlx4_en_set_channels, + .get_ts_info = mlx4_en_get_ts_info, }; diff --git a/sys/ofed/drivers/net/mlx4/en_main.c b/sys/ofed/drivers/net/mlx4/en_main.c index 302be52..cbbf37d 100644 --- a/sys/ofed/drivers/net/mlx4/en_main.c +++ b/sys/ofed/drivers/net/mlx4/en_main.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007 Mellanox Technologies. All rights reserved. + * Copyright (c) 2007, 2014 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -34,6 +34,7 @@ #include <linux/module.h> #include <linux/delay.h> #include <linux/netdevice.h> +#include <linux/slab.h> #include <linux/mlx4/driver.h> #include <linux/mlx4/device.h> @@ -60,20 +61,9 @@ static const char mlx4_en_version[] = * Device scope module parameters */ - -/* Enable RSS TCP traffic */ -MLX4_EN_PARM_INT(tcp_rss, 1, - "Enable RSS for incomming TCP traffic or disabled (0)"); /* Enable RSS UDP traffic */ MLX4_EN_PARM_INT(udp_rss, 1, - "Enable RSS for incomming UDP traffic or disabled (0)"); - -/* Number of LRO sessions per Rx ring (rounded up to a power of two) */ -MLX4_EN_PARM_INT(num_lro, MLX4_EN_MAX_LRO_DESCRIPTORS, - "Number of LRO sessions per ring or disabled (0)"); - -/* Allow reassembly of fragmented IP packets */ -MLX4_EN_PARM_INT(ip_reasm, 1, "Allow reassembly of fragmented IP packets (!0)"); + "Enable RSS for incoming UDP traffic"); /* Priority pausing */ MLX4_EN_PARM_INT(pfctx, 0, "Priority based Flow Control policy on TX[7:0]." @@ -81,20 +71,23 @@ MLX4_EN_PARM_INT(pfctx, 0, "Priority based Flow Control policy on TX[7:0]." MLX4_EN_PARM_INT(pfcrx, 0, "Priority based Flow Control policy on RX[7:0]." " Per priority bit mask"); +#define MAX_PFC_TX 0xff +#define MAX_PFC_RX 0xff + + static int mlx4_en_get_profile(struct mlx4_en_dev *mdev) { struct mlx4_en_profile *params = &mdev->profile; int i; - params->tcp_rss = tcp_rss; params->udp_rss = udp_rss; - if (params->udp_rss && !(mdev->dev->caps.flags - & MLX4_DEV_CAP_FLAG_UDP_RSS)) { + params->num_tx_rings_p_up = min_t(int, mp_ncpus, + MLX4_EN_MAX_TX_RING_P_UP); + if (params->udp_rss && !(mdev->dev->caps.flags + & MLX4_DEV_CAP_FLAG_UDP_RSS)) { mlx4_warn(mdev, "UDP RSS is not supported on this device.\n"); params->udp_rss = 0; } - params->num_lro = min_t(int, num_lro , MLX4_EN_MAX_LRO_DESCRIPTORS); - params->ip_reasm = ip_reasm; for (i = 1; i <= MLX4_MAX_PORTS; i++) { params->prof[i].rx_pause = 1; params->prof[i].rx_ppp = pfcrx; @@ -102,14 +95,15 @@ static int mlx4_en_get_profile(struct mlx4_en_dev *mdev) params->prof[i].tx_ppp = pfctx; params->prof[i].tx_ring_size = MLX4_EN_DEF_TX_RING_SIZE; params->prof[i].rx_ring_size = MLX4_EN_DEF_RX_RING_SIZE; - params->prof[i].tx_ring_num = MLX4_EN_NUM_HASH_RINGS + 1 + - (!!pfcrx) * MLX4_EN_NUM_PPP_RINGS; + params->prof[i].tx_ring_num = params->num_tx_rings_p_up * + MLX4_EN_NUM_UP; + params->prof[i].rss_rings = 0; } return 0; } -static void *get_netdev(struct mlx4_dev *dev, void *ctx, u8 port) +static void *mlx4_en_get_netdev(struct mlx4_dev *dev, void *ctx, u8 port) { struct mlx4_en_dev *endev = ctx; @@ -138,6 +132,9 @@ static void mlx4_en_event(struct mlx4_dev *dev, void *endev_ptr, mlx4_err(mdev, "Internal error detected, restarting device\n"); break; + case MLX4_DEV_EVENT_SLAVE_INIT: + case MLX4_DEV_EVENT_SLAVE_SHUTDOWN: + break; default: if (port < 1 || port > dev->caps.num_ports || !mdev->pndev[port]) @@ -150,7 +147,7 @@ static void mlx4_en_event(struct mlx4_dev *dev, void *endev_ptr, static void mlx4_en_remove(struct mlx4_dev *dev, void *endev_ptr) { struct mlx4_en_dev *mdev = endev_ptr; - int i; + int i, ret; mutex_lock(&mdev->state_lock); mdev->device_up = false; @@ -162,25 +159,22 @@ static void mlx4_en_remove(struct mlx4_dev *dev, void *endev_ptr) flush_workqueue(mdev->workqueue); destroy_workqueue(mdev->workqueue); - mlx4_mr_free(dev, &mdev->mr); + ret = mlx4_mr_free(dev, &mdev->mr); + if (ret) + mlx4_err(mdev, "Error deregistering MR. The system may have become unstable."); + iounmap(mdev->uar_map); mlx4_uar_free(dev, &mdev->priv_uar); mlx4_pd_free(dev, mdev->priv_pdn); - sx_destroy(&mdev->state_lock.sx); - mtx_destroy(&mdev->uar_lock.m); kfree(mdev); } static void *mlx4_en_add(struct mlx4_dev *dev) { - static int mlx4_en_version_printed; struct mlx4_en_dev *mdev; int i; int err; - if (!mlx4_en_version_printed) { - printk(KERN_INFO "%s", mlx4_en_version); - mlx4_en_version_printed++; - } + printk_once(KERN_INFO "%s", mlx4_en_version); mdev = kzalloc(sizeof *mdev, GFP_KERNEL); if (!mdev) { @@ -196,10 +190,11 @@ static void *mlx4_en_add(struct mlx4_dev *dev) if (mlx4_uar_alloc(dev, &mdev->priv_uar)) goto err_pd; - mtx_init(&mdev->uar_lock.m, "mlx4 uar", NULL, MTX_DEF); - mdev->uar_map = ioremap(mdev->priv_uar.pfn << PAGE_SHIFT, PAGE_SIZE); + mdev->uar_map = ioremap((phys_addr_t) mdev->priv_uar.pfn << PAGE_SHIFT, + PAGE_SIZE); if (!mdev->uar_map) goto err_uar; + spin_lock_init(&mdev->uar_lock); mdev->dev = dev; mdev->dma_device = &(dev->pdev->dev); @@ -215,7 +210,7 @@ static void *mlx4_en_add(struct mlx4_dev *dev) MLX4_PERM_LOCAL_WRITE | MLX4_PERM_LOCAL_READ, 0, 0, &mdev->mr)) { mlx4_err(mdev, "Failed allocating memory region\n"); - goto err_uar; + goto err_map; } if (mlx4_mr_enable(mdev->dev, &mdev->mr)) { mlx4_err(mdev, "Failed enabling memory region\n"); @@ -229,21 +224,24 @@ static void *mlx4_en_add(struct mlx4_dev *dev) goto err_mr; } - /* Configure wich ports to start according to module parameters */ + /* Configure which ports to start according to module parameters */ mdev->port_cnt = 0; mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH) mdev->port_cnt++; - /* If we did not receive an explicit number of Rx rings, default to - * the number of completion vectors populated by the mlx4_core */ - mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH) { - mlx4_info(mdev, "Using %d tx rings for port:%d\n", - mdev->profile.prof[i].tx_ring_num, i); - mdev->profile.prof[i].rx_ring_num = rounddown_pow_of_two( - min_t(int, dev->caps.num_comp_vectors, MAX_RX_RINGS)); - mlx4_info(mdev, "Defaulting to %d rx rings for port:%d\n", - mdev->profile.prof[i].rx_ring_num, i); + mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH) { + if (!dev->caps.comp_pool) { + mdev->profile.prof[i].rx_ring_num = + rounddown_pow_of_two(max_t(int, MIN_RX_RINGS, + min_t(int, + dev->caps.num_comp_vectors, + DEF_RX_RINGS))); + } else { + mdev->profile.prof[i].rx_ring_num = rounddown_pow_of_two( + min_t(int, dev->caps.comp_pool/ + dev->caps.num_ports - 1 , MAX_MSIX_P_PORT - 1)); + } } /* Create our own workqueue for reset/multicast tasks @@ -257,7 +255,7 @@ static void *mlx4_en_add(struct mlx4_dev *dev) /* At this stage all non-port specific tasks are complete: * mark the card state as up */ - sx_init(&mdev->state_lock.sx, "mlxen state"); + mutex_init(&mdev->state_lock); mdev->device_up = true; /* Setup ports */ @@ -265,32 +263,20 @@ static void *mlx4_en_add(struct mlx4_dev *dev) /* Create a netdev for each port */ mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH) { mlx4_info(mdev, "Activating port:%d\n", i); - if (mlx4_en_init_netdev(mdev, i, &mdev->profile.prof[i])) { + if (mlx4_en_init_netdev(mdev, i, &mdev->profile.prof[i])) mdev->pndev[i] = NULL; - goto err_free_netdev; - } - } - return mdev; - - -err_free_netdev: - mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH) { - if (mdev->pndev[i]) - mlx4_en_destroy_netdev(mdev->pndev[i]); } - mutex_lock(&mdev->state_lock); - mdev->device_up = false; - mutex_unlock(&mdev->state_lock); - flush_workqueue(mdev->workqueue); - - /* Stop event queue before we drop down to release shared SW state */ - destroy_workqueue(mdev->workqueue); + return mdev; err_mr: - mlx4_mr_free(dev, &mdev->mr); + err = mlx4_mr_free(dev, &mdev->mr); + if (err) + mlx4_err(mdev, "Error deregistering MR. The system may have become unstable."); +err_map: + if (mdev->uar_map) + iounmap(mdev->uar_map); err_uar: - mtx_destroy(&mdev->uar_lock.m); mlx4_uar_free(dev, &mdev->priv_uar); err_pd: mlx4_pd_free(dev, mdev->priv_pdn); @@ -300,73 +286,51 @@ err_free_res: return NULL; } -enum mlx4_query_reply mlx4_en_query(void *endev_ptr, void *int_dev) -{ - struct mlx4_en_dev *mdev = endev_ptr; - struct net_device *netdev = int_dev; - int p; - - for (p = 1; p <= MLX4_MAX_PORTS; ++p) - if (mdev->pndev[p] == netdev) - return p; - - return MLX4_QUERY_NOT_MINE; -} - -#if 0 -static struct pci_device_id mlx4_en_pci_table[] = { - { PCI_VDEVICE(MELLANOX, 0x6340) }, /* MT25408 "Hermon" SDR */ - { PCI_VDEVICE(MELLANOX, 0x634a) }, /* MT25408 "Hermon" DDR */ - { PCI_VDEVICE(MELLANOX, 0x6354) }, /* MT25408 "Hermon" QDR */ - { PCI_VDEVICE(MELLANOX, 0x6732) }, /* MT25408 "Hermon" DDR PCIe gen2 */ - { PCI_VDEVICE(MELLANOX, 0x673c) }, /* MT25408 "Hermon" QDR PCIe gen2 */ - { PCI_VDEVICE(MELLANOX, 0x6368) }, /* MT25408 "Hermon" EN 10GigE */ - { PCI_VDEVICE(MELLANOX, 0x6750) }, /* MT25408 "Hermon" EN 10GigE PCIe gen2 */ - { PCI_VDEVICE(MELLANOX, 0x6372) }, /* MT25458 ConnectX EN 10GBASE-T 10GigE */ - { PCI_VDEVICE(MELLANOX, 0x675a) }, /* MT25458 ConnectX EN 10GBASE-T+Gen2 10GigE */ - { PCI_VDEVICE(MELLANOX, 0x6764) }, /* MT26468 ConnectX EN 10GigE PCIe gen2 */ - { PCI_VDEVICE(MELLANOX, 0x6746) }, /* MT26438 ConnectX VPI PCIe 2.0 5GT/s - IB QDR / 10GigE Virt+ */ - { PCI_VDEVICE(MELLANOX, 0x676e) }, /* MT26478 ConnectX EN 40GigE PCIe 2.0 5GT/s */ - { PCI_VDEVICE(MELLANOX, 0x6778) }, /* MT26488 ConnectX VPI PCIe 2.0 5GT/s - IB DDR / 10GigE Virt+ */ - { PCI_VDEVICE(MELLANOX, 0x1000) }, - { PCI_VDEVICE(MELLANOX, 0x1001) }, - { PCI_VDEVICE(MELLANOX, 0x1002) }, - { PCI_VDEVICE(MELLANOX, 0x1003) }, - { PCI_VDEVICE(MELLANOX, 0x1004) }, - { PCI_VDEVICE(MELLANOX, 0x1005) }, - { PCI_VDEVICE(MELLANOX, 0x1006) }, - { PCI_VDEVICE(MELLANOX, 0x1007) }, - { PCI_VDEVICE(MELLANOX, 0x1008) }, - { PCI_VDEVICE(MELLANOX, 0x1009) }, - { PCI_VDEVICE(MELLANOX, 0x100a) }, - { PCI_VDEVICE(MELLANOX, 0x100b) }, - { PCI_VDEVICE(MELLANOX, 0x100c) }, - { PCI_VDEVICE(MELLANOX, 0x100d) }, - { PCI_VDEVICE(MELLANOX, 0x100e) }, - { PCI_VDEVICE(MELLANOX, 0x100f) }, - { 0, } -}; - -MODULE_DEVICE_TABLE(pci, mlx4_en_pci_table); -#endif - static struct mlx4_interface mlx4_en_interface = { - .add = mlx4_en_add, - .remove = mlx4_en_remove, - .event = mlx4_en_event, - .query = mlx4_en_query, - .get_dev = get_netdev, + .add = mlx4_en_add, + .remove = mlx4_en_remove, + .event = mlx4_en_event, + .get_dev = mlx4_en_get_netdev, .protocol = MLX4_PROT_ETH, }; +static void mlx4_en_verify_params(void) +{ + if (pfctx > MAX_PFC_TX) { + pr_warn("mlx4_en: WARNING: illegal module parameter pfctx 0x%x - " + "should be in range 0-0x%x, will be changed to default (0)\n", + pfctx, MAX_PFC_TX); + pfctx = 0; + } + + if (pfcrx > MAX_PFC_RX) { + pr_warn("mlx4_en: WARNING: illegal module parameter pfcrx 0x%x - " + "should be in range 0-0x%x, will be changed to default (0)\n", + pfcrx, MAX_PFC_RX); + pfcrx = 0; + } +} + + static int __init mlx4_en_init(void) { + mlx4_en_verify_params(); + +#ifdef CONFIG_DEBUG_FS + int err = 0; + err = mlx4_en_register_debugfs(); + if (err) + pr_err(KERN_ERR "Failed to register debugfs\n"); +#endif return mlx4_register_interface(&mlx4_en_interface); } static void __exit mlx4_en_cleanup(void) { mlx4_unregister_interface(&mlx4_en_interface); +#ifdef CONFIG_DEBUG_FS + mlx4_en_unregister_debugfs(); +#endif } module_init(mlx4_en_init); diff --git a/sys/ofed/drivers/net/mlx4/en_netdev.c b/sys/ofed/drivers/net/mlx4/en_netdev.c index 15b576a..e76da65 100644 --- a/sys/ofed/drivers/net/mlx4/en_netdev.c +++ b/sys/ofed/drivers/net/mlx4/en_netdev.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007 Mellanox Technologies. All rights reserved. + * Copyright (c) 2007, 2014 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -31,221 +31,809 @@ * */ -#include "mlx4_en.h" +#include <linux/etherdevice.h> +#include <linux/delay.h> +#include <linux/slab.h> +#ifdef CONFIG_NET_RX_BUSY_POLL +#include <net/busy_poll.h> +#endif + +#include <linux/list.h> +#include <linux/if_ether.h> #include <linux/mlx4/driver.h> #include <linux/mlx4/device.h> #include <linux/mlx4/cmd.h> #include <linux/mlx4/cq.h> -#include <linux/delay.h> -#include <net/ethernet.h> -#include <net/if_vlan_var.h> #include <sys/sockio.h> #include <sys/sysctl.h> -static void mlx4_en_init_locked(struct mlx4_en_priv *priv); +#include "mlx4_en.h" +#include "en_port.h" + static void mlx4_en_sysctl_stat(struct mlx4_en_priv *priv); +static void mlx4_en_sysctl_conf(struct mlx4_en_priv *priv); +static int mlx4_en_unit; + +#ifdef CONFIG_NET_RX_BUSY_POLL +/* must be called with local_bh_disable()d */ +static int mlx4_en_low_latency_recv(struct napi_struct *napi) +{ + struct mlx4_en_cq *cq = container_of(napi, struct mlx4_en_cq, napi); + struct net_device *dev = cq->dev; + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_rx_ring *rx_ring = priv->rx_ring[cq->ring]; + int done; + + if (!priv->port_up) + return LL_FLUSH_FAILED; + + if (!mlx4_en_cq_lock_poll(cq)) + return LL_FLUSH_BUSY; + + done = mlx4_en_process_rx_cq(dev, cq, 4); +#ifdef LL_EXTENDED_STATS + if (done) + rx_ring->cleaned += done; + else + rx_ring->misses++; +#endif + + mlx4_en_cq_unlock_poll(cq); + + return done; +} +#endif /* CONFIG_NET_RX_BUSY_POLL */ + +#ifdef CONFIG_RFS_ACCEL + +struct mlx4_en_filter { + struct list_head next; + struct work_struct work; + + u8 ip_proto; + __be32 src_ip; + __be32 dst_ip; + __be16 src_port; + __be16 dst_port; + + int rxq_index; + struct mlx4_en_priv *priv; + u32 flow_id; /* RFS infrastructure id */ + int id; /* mlx4_en driver id */ + u64 reg_id; /* Flow steering API id */ + u8 activated; /* Used to prevent expiry before filter + * is attached + */ + struct hlist_node filter_chain; +}; + +static void mlx4_en_filter_rfs_expire(struct mlx4_en_priv *priv); + +static enum mlx4_net_trans_rule_id mlx4_ip_proto_to_trans_rule_id(u8 ip_proto) +{ + switch (ip_proto) { + case IPPROTO_UDP: + return MLX4_NET_TRANS_RULE_ID_UDP; + case IPPROTO_TCP: + return MLX4_NET_TRANS_RULE_ID_TCP; + default: + return -EPROTONOSUPPORT; + } +}; + +static void mlx4_en_filter_work(struct work_struct *work) +{ + struct mlx4_en_filter *filter = container_of(work, + struct mlx4_en_filter, + work); + struct mlx4_en_priv *priv = filter->priv; + struct mlx4_spec_list spec_tcp_udp = { + .id = mlx4_ip_proto_to_trans_rule_id(filter->ip_proto), + { + .tcp_udp = { + .dst_port = filter->dst_port, + .dst_port_msk = (__force __be16)-1, + .src_port = filter->src_port, + .src_port_msk = (__force __be16)-1, + }, + }, + }; + struct mlx4_spec_list spec_ip = { + .id = MLX4_NET_TRANS_RULE_ID_IPV4, + { + .ipv4 = { + .dst_ip = filter->dst_ip, + .dst_ip_msk = (__force __be32)-1, + .src_ip = filter->src_ip, + .src_ip_msk = (__force __be32)-1, + }, + }, + }; + struct mlx4_spec_list spec_eth = { + .id = MLX4_NET_TRANS_RULE_ID_ETH, + }; + struct mlx4_net_trans_rule rule = { + .list = LIST_HEAD_INIT(rule.list), + .queue_mode = MLX4_NET_TRANS_Q_LIFO, + .exclusive = 1, + .allow_loopback = 1, + .promisc_mode = MLX4_FS_REGULAR, + .port = priv->port, + .priority = MLX4_DOMAIN_RFS, + }; + int rc; + __be64 mac_mask = cpu_to_be64(MLX4_MAC_MASK << 16); + + if (spec_tcp_udp.id < 0) { + en_warn(priv, "RFS: ignoring unsupported ip protocol (%d)\n", + filter->ip_proto); + goto ignore; + } + list_add_tail(&spec_eth.list, &rule.list); + list_add_tail(&spec_ip.list, &rule.list); + list_add_tail(&spec_tcp_udp.list, &rule.list); + + rule.qpn = priv->rss_map.qps[filter->rxq_index].qpn; + memcpy(spec_eth.eth.dst_mac, priv->dev->dev_addr, ETH_ALEN); + memcpy(spec_eth.eth.dst_mac_msk, &mac_mask, ETH_ALEN); + + filter->activated = 0; + + if (filter->reg_id) { + rc = mlx4_flow_detach(priv->mdev->dev, filter->reg_id); + if (rc && rc != -ENOENT) + en_err(priv, "Error detaching flow. rc = %d\n", rc); + } + + rc = mlx4_flow_attach(priv->mdev->dev, &rule, &filter->reg_id); + if (rc) + en_err(priv, "Error attaching flow. err = %d\n", rc); + +ignore: + mlx4_en_filter_rfs_expire(priv); + + filter->activated = 1; +} + +static inline struct hlist_head * +filter_hash_bucket(struct mlx4_en_priv *priv, __be32 src_ip, __be32 dst_ip, + __be16 src_port, __be16 dst_port) +{ + unsigned long l; + int bucket_idx; + + l = (__force unsigned long)src_port | + ((__force unsigned long)dst_port << 2); + l ^= (__force unsigned long)(src_ip ^ dst_ip); + + bucket_idx = hash_long(l, MLX4_EN_FILTER_HASH_SHIFT); + + return &priv->filter_hash[bucket_idx]; +} + +static struct mlx4_en_filter * +mlx4_en_filter_alloc(struct mlx4_en_priv *priv, int rxq_index, __be32 src_ip, + __be32 dst_ip, u8 ip_proto, __be16 src_port, + __be16 dst_port, u32 flow_id) +{ + struct mlx4_en_filter *filter = NULL; + + filter = kzalloc(sizeof(struct mlx4_en_filter), GFP_ATOMIC); + if (!filter) + return NULL; + + filter->priv = priv; + filter->rxq_index = rxq_index; + INIT_WORK(&filter->work, mlx4_en_filter_work); + + filter->src_ip = src_ip; + filter->dst_ip = dst_ip; + filter->ip_proto = ip_proto; + filter->src_port = src_port; + filter->dst_port = dst_port; + + filter->flow_id = flow_id; + + filter->id = priv->last_filter_id++ % RPS_NO_FILTER; + + list_add_tail(&filter->next, &priv->filters); + hlist_add_head(&filter->filter_chain, + filter_hash_bucket(priv, src_ip, dst_ip, src_port, + dst_port)); + + return filter; +} + +static void mlx4_en_filter_free(struct mlx4_en_filter *filter) +{ + struct mlx4_en_priv *priv = filter->priv; + int rc; + + list_del(&filter->next); + + rc = mlx4_flow_detach(priv->mdev->dev, filter->reg_id); + if (rc && rc != -ENOENT) + en_err(priv, "Error detaching flow. rc = %d\n", rc); + + kfree(filter); +} + +static inline struct mlx4_en_filter * +mlx4_en_filter_find(struct mlx4_en_priv *priv, __be32 src_ip, __be32 dst_ip, + u8 ip_proto, __be16 src_port, __be16 dst_port) +{ + struct hlist_node *elem; + struct mlx4_en_filter *filter; + struct mlx4_en_filter *ret = NULL; + + hlist_for_each_entry(filter, elem, + filter_hash_bucket(priv, src_ip, dst_ip, + src_port, dst_port), + filter_chain) { + if (filter->src_ip == src_ip && + filter->dst_ip == dst_ip && + filter->ip_proto == ip_proto && + filter->src_port == src_port && + filter->dst_port == dst_port) { + ret = filter; + break; + } + } + + return ret; +} + +static int +mlx4_en_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb, + u16 rxq_index, u32 flow_id) +{ + struct mlx4_en_priv *priv = netdev_priv(net_dev); + struct mlx4_en_filter *filter; + const struct iphdr *ip; + const __be16 *ports; + u8 ip_proto; + __be32 src_ip; + __be32 dst_ip; + __be16 src_port; + __be16 dst_port; + int nhoff = skb_network_offset(skb); + int ret = 0; + + if (skb->protocol != htons(ETH_P_IP)) + return -EPROTONOSUPPORT; + + ip = (const struct iphdr *)(skb->data + nhoff); + if (ip_is_fragment(ip)) + return -EPROTONOSUPPORT; + + if ((ip->protocol != IPPROTO_TCP) && (ip->protocol != IPPROTO_UDP)) + return -EPROTONOSUPPORT; + ports = (const __be16 *)(skb->data + nhoff + 4 * ip->ihl); + + ip_proto = ip->protocol; + src_ip = ip->saddr; + dst_ip = ip->daddr; + src_port = ports[0]; + dst_port = ports[1]; + + spin_lock_bh(&priv->filters_lock); + filter = mlx4_en_filter_find(priv, src_ip, dst_ip, ip_proto, + src_port, dst_port); + if (filter) { + if (filter->rxq_index == rxq_index) + goto out; + + filter->rxq_index = rxq_index; + } else { + filter = mlx4_en_filter_alloc(priv, rxq_index, + src_ip, dst_ip, ip_proto, + src_port, dst_port, flow_id); + if (!filter) { + ret = -ENOMEM; + goto err; + } + } + + queue_work(priv->mdev->workqueue, &filter->work); + +out: + ret = filter->id; +err: + spin_unlock_bh(&priv->filters_lock); + + return ret; +} + +void mlx4_en_cleanup_filters(struct mlx4_en_priv *priv, + struct mlx4_en_rx_ring *rx_ring) +{ + struct mlx4_en_filter *filter, *tmp; + LIST_HEAD(del_list); + + spin_lock_bh(&priv->filters_lock); + list_for_each_entry_safe(filter, tmp, &priv->filters, next) { + list_move(&filter->next, &del_list); + hlist_del(&filter->filter_chain); + } + spin_unlock_bh(&priv->filters_lock); + + list_for_each_entry_safe(filter, tmp, &del_list, next) { + cancel_work_sync(&filter->work); + mlx4_en_filter_free(filter); + } +} + +static void mlx4_en_filter_rfs_expire(struct mlx4_en_priv *priv) +{ + struct mlx4_en_filter *filter = NULL, *tmp, *last_filter = NULL; + LIST_HEAD(del_list); + int i = 0; + + spin_lock_bh(&priv->filters_lock); + list_for_each_entry_safe(filter, tmp, &priv->filters, next) { + if (i > MLX4_EN_FILTER_EXPIRY_QUOTA) + break; + + if (filter->activated && + !work_pending(&filter->work) && + rps_may_expire_flow(priv->dev, + filter->rxq_index, filter->flow_id, + filter->id)) { + list_move(&filter->next, &del_list); + hlist_del(&filter->filter_chain); + } else + last_filter = filter; + + i++; + } + + if (last_filter && (&last_filter->next != priv->filters.next)) + list_move(&priv->filters, &last_filter->next); + + spin_unlock_bh(&priv->filters_lock); + + list_for_each_entry_safe(filter, tmp, &del_list, next) + mlx4_en_filter_free(filter); +} +#endif static void mlx4_en_vlan_rx_add_vid(void *arg, struct net_device *dev, u16 vid) { struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + int err; int idx; - u8 field; if (arg != priv) return; - if ((vid == 0) || (vid > 4095)) /* Invalid */ - return; en_dbg(HW, priv, "adding VLAN:%d\n", vid); - idx = vid >> 5; - field = 1 << (vid & 0x1f); - spin_lock(&priv->vlan_lock); - priv->vlgrp_modified = true; - if (priv->vlan_unregister[idx] & field) - priv->vlan_unregister[idx] &= ~field; - else - priv->vlan_register[idx] |= field; - priv->vlans[idx] |= field; - spin_unlock(&priv->vlan_lock); + + set_bit(vid, priv->active_vlans); + + /* Add VID to port VLAN filter */ + mutex_lock(&mdev->state_lock); + if (mdev->device_up && priv->port_up) { + err = mlx4_SET_VLAN_FLTR(mdev->dev, priv); + if (err) + en_err(priv, "Failed configuring VLAN filter\n"); + } + if (mlx4_register_vlan(mdev->dev, priv->port, vid, &idx)) + en_dbg(HW, priv, "failed adding vlan %d\n", vid); + mutex_unlock(&mdev->state_lock); + } static void mlx4_en_vlan_rx_kill_vid(void *arg, struct net_device *dev, u16 vid) { struct mlx4_en_priv *priv = netdev_priv(dev); - int idx; - u8 field; + struct mlx4_en_dev *mdev = priv->mdev; + int err; if (arg != priv) return; - if ((vid == 0) || (vid > 4095)) /* Invalid */ - return; en_dbg(HW, priv, "Killing VID:%d\n", vid); - idx = vid >> 5; - field = 1 << (vid & 0x1f); - spin_lock(&priv->vlan_lock); - priv->vlgrp_modified = true; - if (priv->vlan_register[idx] & field) - priv->vlan_register[idx] &= ~field; - else - priv->vlan_unregister[idx] |= field; - priv->vlans[idx] &= ~field; - spin_unlock(&priv->vlan_lock); + + clear_bit(vid, priv->active_vlans); + + /* Remove VID from port VLAN filter */ + mutex_lock(&mdev->state_lock); + mlx4_unregister_vlan(mdev->dev, priv->port, vid); + + if (mdev->device_up && priv->port_up) { + err = mlx4_SET_VLAN_FLTR(mdev->dev, priv); + if (err) + en_err(priv, "Failed configuring VLAN filter\n"); + } + mutex_unlock(&mdev->state_lock); + } -u64 mlx4_en_mac_to_u64(u8 *addr) +static int mlx4_en_uc_steer_add(struct mlx4_en_priv *priv, + unsigned char *mac, int *qpn, u64 *reg_id) { - u64 mac = 0; - int i; + struct mlx4_en_dev *mdev = priv->mdev; + struct mlx4_dev *dev = mdev->dev; + int err; - for (i = 0; i < ETHER_ADDR_LEN; i++) { - mac <<= 8; - mac |= addr[i]; + switch (dev->caps.steering_mode) { + case MLX4_STEERING_MODE_B0: { + struct mlx4_qp qp; + u8 gid[16] = {0}; + + qp.qpn = *qpn; + memcpy(&gid[10], mac, ETH_ALEN); + gid[5] = priv->port; + + err = mlx4_unicast_attach(dev, &qp, gid, 0, MLX4_PROT_ETH); + break; + } + case MLX4_STEERING_MODE_DEVICE_MANAGED: { + struct mlx4_spec_list spec_eth = { {NULL} }; + __be64 mac_mask = cpu_to_be64(MLX4_MAC_MASK << 16); + + struct mlx4_net_trans_rule rule = { + .queue_mode = MLX4_NET_TRANS_Q_FIFO, + .exclusive = 0, + .allow_loopback = 1, + .promisc_mode = MLX4_FS_REGULAR, + .priority = MLX4_DOMAIN_NIC, + }; + + rule.port = priv->port; + rule.qpn = *qpn; + INIT_LIST_HEAD(&rule.list); + + spec_eth.id = MLX4_NET_TRANS_RULE_ID_ETH; + memcpy(spec_eth.eth.dst_mac, mac, ETH_ALEN); + memcpy(spec_eth.eth.dst_mac_msk, &mac_mask, ETH_ALEN); + list_add_tail(&spec_eth.list, &rule.list); + + err = mlx4_flow_attach(dev, &rule, reg_id); + break; + } + default: + return -EINVAL; } - return mac; + if (err) + en_warn(priv, "Failed Attaching Unicast\n"); + + return err; } -static int mlx4_en_cache_mclist(struct net_device *dev, u64 **mcaddrp) +static void mlx4_en_uc_steer_release(struct mlx4_en_priv *priv, + unsigned char *mac, int qpn, u64 reg_id) { - struct ifmultiaddr *ifma; - u64 *mcaddr; - int cnt; - int i; + struct mlx4_en_dev *mdev = priv->mdev; + struct mlx4_dev *dev = mdev->dev; - *mcaddrp = NULL; -restart: - cnt = 0; - if_maddr_rlock(dev); - TAILQ_FOREACH(ifma, &dev->if_multiaddrs, ifma_link) { - if (ifma->ifma_addr->sa_family != AF_LINK) - continue; - if (((struct sockaddr_dl *)ifma->ifma_addr)->sdl_alen != - ETHER_ADDR_LEN) - continue; - cnt++; - } - if_maddr_runlock(dev); - if (cnt == 0) - return (0); - mcaddr = kmalloc(sizeof(u64) * cnt, GFP_KERNEL); - if (mcaddr == NULL) - return (0); - i = 0; - if_maddr_rlock(dev); - TAILQ_FOREACH(ifma, &dev->if_multiaddrs, ifma_link) { - if (ifma->ifma_addr->sa_family != AF_LINK) - continue; - if (((struct sockaddr_dl *)ifma->ifma_addr)->sdl_alen != - ETHER_ADDR_LEN) - continue; - /* Make sure the list didn't grow. */ - if (i == cnt) { - if_maddr_runlock(dev); - kfree(mcaddr); - goto restart; - } - mcaddr[i++] = mlx4_en_mac_to_u64( - LLADDR((struct sockaddr_dl *)ifma->ifma_addr)); + switch (dev->caps.steering_mode) { + case MLX4_STEERING_MODE_B0: { + struct mlx4_qp qp; + u8 gid[16] = {0}; + + qp.qpn = qpn; + memcpy(&gid[10], mac, ETH_ALEN); + gid[5] = priv->port; + + mlx4_unicast_detach(dev, &qp, gid, MLX4_PROT_ETH); + break; + } + case MLX4_STEERING_MODE_DEVICE_MANAGED: { + mlx4_flow_detach(dev, reg_id); + break; + } + default: + en_err(priv, "Invalid steering mode.\n"); } - if_maddr_runlock(dev); - *mcaddrp = mcaddr; - return (i); } -static void mlx4_en_stop_port(struct net_device *dev) +static int mlx4_en_get_qp(struct mlx4_en_priv *priv) { - struct mlx4_en_priv *priv = netdev_priv(dev); - - queue_work(priv->mdev->workqueue, &priv->stop_port_task); + struct mlx4_en_dev *mdev = priv->mdev; + struct mlx4_dev *dev = mdev->dev; + struct mlx4_mac_entry *entry; + int index = 0; + int err = 0; + u64 reg_id; + int *qpn = &priv->base_qpn; + u64 mac = mlx4_mac_to_u64(IF_LLADDR(priv->dev)); + + en_dbg(DRV, priv, "Registering MAC: %pM for adding\n", + IF_LLADDR(priv->dev)); + index = mlx4_register_mac(dev, priv->port, mac); + if (index < 0) { + err = index; + en_err(priv, "Failed adding MAC: %pM\n", + IF_LLADDR(priv->dev)); + return err; + } + + if (dev->caps.steering_mode == MLX4_STEERING_MODE_A0) { + int base_qpn = mlx4_get_base_qpn(dev, priv->port); + *qpn = base_qpn + index; + return 0; + } + + err = mlx4_qp_reserve_range(dev, 1, 1, qpn, 0); + en_dbg(DRV, priv, "Reserved qp %d\n", *qpn); + if (err) { + en_err(priv, "Failed to reserve qp for mac registration\n"); + goto qp_err; + } + + err = mlx4_en_uc_steer_add(priv, IF_LLADDR(priv->dev), qpn, ®_id); + if (err) + goto steer_err; + + entry = kmalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) { + err = -ENOMEM; + goto alloc_err; + } + memcpy(entry->mac, IF_LLADDR(priv->dev), sizeof(entry->mac)); + entry->reg_id = reg_id; + + hlist_add_head(&entry->hlist, + &priv->mac_hash[entry->mac[MLX4_EN_MAC_HASH_IDX]]); + + return 0; + +alloc_err: + mlx4_en_uc_steer_release(priv, IF_LLADDR(priv->dev), *qpn, reg_id); + +steer_err: + mlx4_qp_release_range(dev, *qpn, 1); + +qp_err: + mlx4_unregister_mac(dev, priv->port, mac); + return err; } -static void mlx4_en_start_port(struct net_device *dev) +static void mlx4_en_put_qp(struct mlx4_en_priv *priv) { - struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + struct mlx4_dev *dev = mdev->dev; + int qpn = priv->base_qpn; + u64 mac; + + if (dev->caps.steering_mode == MLX4_STEERING_MODE_A0) { + mac = mlx4_mac_to_u64(IF_LLADDR(priv->dev)); + en_dbg(DRV, priv, "Registering MAC: %pM for deleting\n", + IF_LLADDR(priv->dev)); + mlx4_unregister_mac(dev, priv->port, mac); + } else { + struct mlx4_mac_entry *entry; + struct hlist_node *n, *tmp; + struct hlist_head *bucket; + unsigned int i; + + for (i = 0; i < MLX4_EN_MAC_HASH_SIZE; ++i) { + bucket = &priv->mac_hash[i]; + hlist_for_each_entry_safe(entry, n, tmp, bucket, hlist) { + mac = mlx4_mac_to_u64(entry->mac); + en_dbg(DRV, priv, "Registering MAC: %pM for deleting\n", + entry->mac); + mlx4_en_uc_steer_release(priv, entry->mac, + qpn, entry->reg_id); + + mlx4_unregister_mac(dev, priv->port, mac); + hlist_del(&entry->hlist); + kfree(entry); + } + } - queue_work(priv->mdev->workqueue, &priv->start_port_task); + en_dbg(DRV, priv, "Releasing qp: port %d, qpn %d\n", + priv->port, qpn); + mlx4_qp_release_range(dev, qpn, 1); + priv->flags &= ~MLX4_EN_FLAG_FORCE_PROMISC; + } } -static void mlx4_en_set_multicast(struct net_device *dev) +static void mlx4_en_clear_list(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_mc_list *tmp, *mc_to_del; - if (!priv->port_up) - return; + list_for_each_entry_safe(mc_to_del, tmp, &priv->mc_list, list) { + list_del(&mc_to_del->list); + kfree(mc_to_del); + } +} + +static void mlx4_en_cache_mclist(struct net_device *dev) +{ + struct ifmultiaddr *ifma; + struct mlx4_en_mc_list *tmp; + struct mlx4_en_priv *priv = netdev_priv(dev); - queue_work(priv->mdev->workqueue, &priv->mcast_task); + TAILQ_FOREACH(ifma, &dev->if_multiaddrs, ifma_link) { + if (ifma->ifma_addr->sa_family != AF_LINK) + continue; + if (((struct sockaddr_dl *)ifma->ifma_addr)->sdl_alen != + ETHER_ADDR_LEN) + continue; + /* Make sure the list didn't grow. */ + tmp = kzalloc(sizeof(struct mlx4_en_mc_list), GFP_ATOMIC); + memcpy(tmp->addr, + LLADDR((struct sockaddr_dl *)ifma->ifma_addr), ETH_ALEN); + list_add_tail(&tmp->list, &priv->mc_list); + } } -static void mlx4_en_do_set_multicast(struct work_struct *work) +static void update_mclist_flags(struct mlx4_en_priv *priv, + struct list_head *dst, + struct list_head *src) { - struct mlx4_en_priv *priv = container_of(work, struct mlx4_en_priv, - mcast_task); - struct net_device *dev = priv->dev; - struct mlx4_en_dev *mdev = priv->mdev; - int err; + struct mlx4_en_mc_list *dst_tmp, *src_tmp, *new_mc; + bool found; - mutex_lock(&mdev->state_lock); - if (!mdev->device_up) { - en_dbg(HW, priv, "Card is not up, " - "ignoring multicast change.\n"); - goto out; - } - if (!priv->port_up) { - en_dbg(HW, priv, "Port is down, " - "ignoring multicast change.\n"); - goto out; + /* Find all the entries that should be removed from dst, + * These are the entries that are not found in src + */ + list_for_each_entry(dst_tmp, dst, list) { + found = false; + list_for_each_entry(src_tmp, src, list) { + if (!memcmp(dst_tmp->addr, src_tmp->addr, ETH_ALEN)) { + found = true; + break; + } + } + if (!found) + dst_tmp->action = MCLIST_REM; } - /* - * Promsicuous mode: disable all filters + /* Add entries that exist in src but not in dst + * mark them as need to add */ + list_for_each_entry(src_tmp, src, list) { + found = false; + list_for_each_entry(dst_tmp, dst, list) { + if (!memcmp(dst_tmp->addr, src_tmp->addr, ETH_ALEN)) { + dst_tmp->action = MCLIST_NONE; + found = true; + break; + } + } + if (!found) { + new_mc = kmalloc(sizeof(struct mlx4_en_mc_list), + GFP_KERNEL); + if (!new_mc) { + en_err(priv, "Failed to allocate current multicast list\n"); + return; + } + memcpy(new_mc, src_tmp, + sizeof(struct mlx4_en_mc_list)); + new_mc->action = MCLIST_ADD; + list_add_tail(&new_mc->list, dst); + } + } +} - if (dev->if_flags & IFF_PROMISC) { - if (!(priv->flags & MLX4_EN_FLAG_PROMISC)) { - priv->flags |= MLX4_EN_FLAG_PROMISC; +static void mlx4_en_set_rx_mode(struct net_device *dev) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + + if (!priv->port_up) + return; - /* Enable promiscouos mode */ - err = mlx4_SET_PORT_qpn_calc(mdev->dev, priv->port, - priv->base_qpn, 1); + queue_work(priv->mdev->workqueue, &priv->rx_mode_task); +} + +static void mlx4_en_set_promisc_mode(struct mlx4_en_priv *priv, + struct mlx4_en_dev *mdev) +{ + int err = 0; + if (!(priv->flags & MLX4_EN_FLAG_PROMISC)) { + priv->flags |= MLX4_EN_FLAG_PROMISC; + + /* Enable promiscouos mode */ + switch (mdev->dev->caps.steering_mode) { + case MLX4_STEERING_MODE_DEVICE_MANAGED: + err = mlx4_flow_steer_promisc_add(mdev->dev, + priv->port, + priv->base_qpn, + MLX4_FS_ALL_DEFAULT); if (err) - en_err(priv, "Failed enabling " - "promiscous mode\n"); + en_err(priv, "Failed enabling promiscuous mode\n"); + priv->flags |= MLX4_EN_FLAG_MC_PROMISC; + break; - /* Disable port multicast filter (unconditionally) */ - err = mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0, - 0, MLX4_MCAST_DISABLE); + case MLX4_STEERING_MODE_B0: + err = mlx4_unicast_promisc_add(mdev->dev, + priv->base_qpn, + priv->port); if (err) - en_err(priv, "Failed disabling " - "multicast filter\n"); + en_err(priv, "Failed enabling unicast promiscuous mode\n"); + + /* Add the default qp number as multicast + * promisc + */ + if (!(priv->flags & MLX4_EN_FLAG_MC_PROMISC)) { + err = mlx4_multicast_promisc_add(mdev->dev, + priv->base_qpn, + priv->port); + if (err) + en_err(priv, "Failed enabling multicast promiscuous mode\n"); + priv->flags |= MLX4_EN_FLAG_MC_PROMISC; + } + break; - /* Disable port VLAN filter */ - err = mlx4_SET_VLAN_FLTR(mdev->dev, priv->port, NULL); + case MLX4_STEERING_MODE_A0: + err = mlx4_SET_PORT_qpn_calc(mdev->dev, + priv->port, + priv->base_qpn, + 1); if (err) - en_err(priv, "Failed disabling VLAN filter\n"); + en_err(priv, "Failed enabling promiscuous mode\n"); + break; } - goto out; + + /* Disable port multicast filter (unconditionally) */ + err = mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0, + 0, MLX4_MCAST_DISABLE); + if (err) + en_err(priv, "Failed disabling multicast filter\n"); } +} - /* - * Not in promiscous mode - */ +static void mlx4_en_clear_promisc_mode(struct mlx4_en_priv *priv, + struct mlx4_en_dev *mdev) +{ + int err = 0; - if (priv->flags & MLX4_EN_FLAG_PROMISC) { - priv->flags &= ~MLX4_EN_FLAG_PROMISC; + priv->flags &= ~MLX4_EN_FLAG_PROMISC; - /* Disable promiscouos mode */ - err = mlx4_SET_PORT_qpn_calc(mdev->dev, priv->port, - priv->base_qpn, 0); + /* Disable promiscouos mode */ + switch (mdev->dev->caps.steering_mode) { + case MLX4_STEERING_MODE_DEVICE_MANAGED: + err = mlx4_flow_steer_promisc_remove(mdev->dev, + priv->port, + MLX4_FS_ALL_DEFAULT); + if (err) + en_err(priv, "Failed disabling promiscuous mode\n"); + priv->flags &= ~MLX4_EN_FLAG_MC_PROMISC; + break; + + case MLX4_STEERING_MODE_B0: + err = mlx4_unicast_promisc_remove(mdev->dev, + priv->base_qpn, + priv->port); if (err) - en_err(priv, "Failed disabling promiscous mode\n"); + en_err(priv, "Failed disabling unicast promiscuous mode\n"); + /* Disable Multicast promisc */ + if (priv->flags & MLX4_EN_FLAG_MC_PROMISC) { + err = mlx4_multicast_promisc_remove(mdev->dev, + priv->base_qpn, + priv->port); + if (err) + en_err(priv, "Failed disabling multicast promiscuous mode\n"); + priv->flags &= ~MLX4_EN_FLAG_MC_PROMISC; + } + break; - /* Enable port VLAN filter */ - err = mlx4_SET_VLAN_FLTR(mdev->dev, priv->port, priv->vlans); + case MLX4_STEERING_MODE_A0: + err = mlx4_SET_PORT_qpn_calc(mdev->dev, + priv->port, + priv->base_qpn, 0); if (err) - en_err(priv, "Failed enabling VLAN filter\n"); + en_err(priv, "Failed disabling promiscuous mode\n"); + break; } +} + +static void mlx4_en_do_multicast(struct mlx4_en_priv *priv, + struct net_device *dev, + struct mlx4_en_dev *mdev) +{ + struct mlx4_en_mc_list *mclist, *tmp; + u8 mc_list[16] = {0}; + int err = 0; + u64 mcast_addr = 0; + /* Enable/disable the multicast filter according to IFF_ALLMULTI */ if (dev->if_flags & IFF_ALLMULTI) { @@ -253,10 +841,53 @@ static void mlx4_en_do_set_multicast(struct work_struct *work) 0, MLX4_MCAST_DISABLE); if (err) en_err(priv, "Failed disabling multicast filter\n"); + + /* Add the default qp number as multicast promisc */ + if (!(priv->flags & MLX4_EN_FLAG_MC_PROMISC)) { + switch (mdev->dev->caps.steering_mode) { + case MLX4_STEERING_MODE_DEVICE_MANAGED: + err = mlx4_flow_steer_promisc_add(mdev->dev, + priv->port, + priv->base_qpn, + MLX4_FS_MC_DEFAULT); + break; + + case MLX4_STEERING_MODE_B0: + err = mlx4_multicast_promisc_add(mdev->dev, + priv->base_qpn, + priv->port); + break; + + case MLX4_STEERING_MODE_A0: + break; + } + if (err) + en_err(priv, "Failed entering multicast promisc mode\n"); + priv->flags |= MLX4_EN_FLAG_MC_PROMISC; + } } else { - u64 *mcaddr; - int mccount; - int i; + /* Disable Multicast promisc */ + if (priv->flags & MLX4_EN_FLAG_MC_PROMISC) { + switch (mdev->dev->caps.steering_mode) { + case MLX4_STEERING_MODE_DEVICE_MANAGED: + err = mlx4_flow_steer_promisc_remove(mdev->dev, + priv->port, + MLX4_FS_MC_DEFAULT); + break; + + case MLX4_STEERING_MODE_B0: + err = mlx4_multicast_promisc_remove(mdev->dev, + priv->base_qpn, + priv->port); + break; + + case MLX4_STEERING_MODE_A0: + break; + } + if (err) + en_err(priv, "Failed disabling multicast promiscuous mode\n"); + priv->flags &= ~MLX4_EN_FLAG_MC_PROMISC; + } err = mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0, 0, MLX4_MCAST_DISABLE); @@ -269,17 +900,93 @@ static void mlx4_en_do_set_multicast(struct work_struct *work) /* Update multicast list - we cache all addresses so they won't * change while HW is updated holding the command semaphor */ - mccount = mlx4_en_cache_mclist(dev, &mcaddr); - for (i = 0; i < mccount; i++) + mlx4_en_cache_mclist(dev); + list_for_each_entry(mclist, &priv->mc_list, list) { + mcast_addr = mlx4_mac_to_u64(mclist->addr); mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, - mcaddr[i], 0, MLX4_MCAST_CONFIG); + mcast_addr, 0, MLX4_MCAST_CONFIG); + } err = mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0, 0, MLX4_MCAST_ENABLE); if (err) en_err(priv, "Failed enabling multicast filter\n"); - kfree(mcaddr); + update_mclist_flags(priv, &priv->curr_list, &priv->mc_list); + list_for_each_entry_safe(mclist, tmp, &priv->curr_list, list) { + if (mclist->action == MCLIST_REM) { + /* detach this address and delete from list */ + memcpy(&mc_list[10], mclist->addr, ETH_ALEN); + mc_list[5] = priv->port; + err = mlx4_multicast_detach(mdev->dev, + &priv->rss_map.indir_qp, + mc_list, + MLX4_PROT_ETH, + mclist->reg_id); + if (err) + en_err(priv, "Fail to detach multicast address\n"); + + /* remove from list */ + list_del(&mclist->list); + kfree(mclist); + } else if (mclist->action == MCLIST_ADD) { + /* attach the address */ + memcpy(&mc_list[10], mclist->addr, ETH_ALEN); + /* needed for B0 steering support */ + mc_list[5] = priv->port; + err = mlx4_multicast_attach(mdev->dev, + &priv->rss_map.indir_qp, + mc_list, + priv->port, 0, + MLX4_PROT_ETH, + &mclist->reg_id); + if (err) + en_err(priv, "Fail to attach multicast address\n"); + + } + } + } +} + +static void mlx4_en_do_set_rx_mode(struct work_struct *work) +{ + struct mlx4_en_priv *priv = container_of(work, struct mlx4_en_priv, + rx_mode_task); + struct mlx4_en_dev *mdev = priv->mdev; + struct net_device *dev = priv->dev; + + + mutex_lock(&mdev->state_lock); + if (!mdev->device_up) { + en_dbg(HW, priv, "Card is not up, ignoring rx mode change.\n"); + goto out; + } + if (!priv->port_up) { + en_dbg(HW, priv, "Port is down, ignoring rx mode change.\n"); + goto out; + } + if (!mlx4_en_QUERY_PORT(mdev, priv->port)) { + if (priv->port_state.link_state) { + priv->last_link_state = MLX4_DEV_EVENT_PORT_UP; + /* Important note: the following call for if_link_state_change + * is needed for interface up scenario (start port, link state + * change) */ + if_link_state_change(priv->dev, LINK_STATE_UP); + en_dbg(HW, priv, "Link Up\n"); + } + } + + /* Promsicuous mode: disable all filters */ + if ((dev->if_flags & IFF_PROMISC) || + (priv->flags & MLX4_EN_FLAG_FORCE_PROMISC)) { + mlx4_en_set_promisc_mode(priv, mdev); + goto out; } + + /* Not in promiscuous mode */ + if (priv->flags & MLX4_EN_FLAG_PROMISC) + mlx4_en_clear_promisc_mode(priv, mdev); + + mlx4_en_do_multicast(priv, dev, mdev); out: mutex_unlock(&mdev->state_lock); } @@ -293,7 +1000,7 @@ static void mlx4_en_netpoll(struct net_device *dev) int i; for (i = 0; i < priv->rx_ring_num; i++) { - cq = &priv->rx_cq[i]; + cq = priv->rx_cq[i]; spin_lock_irqsave(&cq->lock, flags); napi_synchronize(&cq->napi); mlx4_en_process_rx_cq(dev, cq, 0); @@ -304,38 +1011,40 @@ static void mlx4_en_netpoll(struct net_device *dev) static void mlx4_en_watchdog_timeout(void *arg) { - struct mlx4_en_priv *priv = arg; - struct mlx4_en_dev *mdev = priv->mdev; - - en_dbg(DRV, priv, "Scheduling watchdog\n"); - queue_work(mdev->workqueue, &priv->watchdog_task); - if (priv->port_up) - callout_reset(&priv->watchdog_timer, MLX4_EN_WATCHDOG_TIMEOUT, - mlx4_en_watchdog_timeout, priv); + struct mlx4_en_priv *priv = arg; + struct mlx4_en_dev *mdev = priv->mdev; + + en_dbg(DRV, priv, "Scheduling watchdog\n"); + queue_work(mdev->workqueue, &priv->watchdog_task); + if (priv->port_up) + callout_reset(&priv->watchdog_timer, MLX4_EN_WATCHDOG_TIMEOUT, + mlx4_en_watchdog_timeout, priv); } -/* XXX This clears user settings in too many cases. */ + static void mlx4_en_set_default_moderation(struct mlx4_en_priv *priv) { struct mlx4_en_cq *cq; int i; /* If we haven't received a specific coalescing setting - * (module param), we set the moderation paramters as follows: + * (module param), we set the moderation parameters as follows: * - moder_cnt is set to the number of mtu sized packets to * satisfy our coelsing target. * - moder_time is set to a fixed value. */ priv->rx_frames = MLX4_EN_RX_COAL_TARGET / priv->dev->if_mtu + 1; priv->rx_usecs = MLX4_EN_RX_COAL_TIME; - en_dbg(INTR, priv, "Default coalesing params for mtu:%u - " - "rx_frames:%d rx_usecs:%d\n", - priv->dev->if_mtu, priv->rx_frames, priv->rx_usecs); + priv->tx_frames = MLX4_EN_TX_COAL_PKTS; + priv->tx_usecs = MLX4_EN_TX_COAL_TIME; + en_dbg(INTR, priv, "Default coalesing params for mtu: %u - " + "rx_frames:%d rx_usecs:%d\n", + (unsigned)priv->dev->if_mtu, priv->rx_frames, priv->rx_usecs); /* Setup cq moderation params */ for (i = 0; i < priv->rx_ring_num; i++) { - cq = &priv->rx_cq[i]; + cq = priv->rx_cq[i]; cq->moder_cnt = priv->rx_frames; cq->moder_time = priv->rx_usecs; priv->last_moder_time[i] = MLX4_EN_AUTO_CONF; @@ -344,9 +1053,9 @@ static void mlx4_en_set_default_moderation(struct mlx4_en_priv *priv) } for (i = 0; i < priv->tx_ring_num; i++) { - cq = &priv->tx_cq[i]; - cq->moder_cnt = MLX4_EN_TX_COAL_PKTS; - cq->moder_time = MLX4_EN_TX_COAL_TIME; + cq = priv->tx_cq[i]; + cq->moder_cnt = priv->tx_frames; + cq->moder_time = priv->tx_usecs; } /* Reset auto-moderation params */ @@ -375,10 +1084,11 @@ static void mlx4_en_auto_moderation(struct mlx4_en_priv *priv) if (!priv->adaptive_rx_coal || period < priv->sample_interval * HZ) return; + for (ring = 0; ring < priv->rx_ring_num; ring++) { - spin_lock(&priv->stats_lock); - rx_packets = priv->rx_ring[ring].packets; - rx_bytes = priv->rx_ring[ring].bytes; + spin_lock(&priv->stats_lock); + rx_packets = priv->rx_ring[ring]->packets; + rx_bytes = priv->rx_ring[ring]->bytes; spin_unlock(&priv->stats_lock); rx_pkt_diff = ((unsigned long) (rx_packets - @@ -389,11 +1099,10 @@ static void mlx4_en_auto_moderation(struct mlx4_en_priv *priv) priv->last_moder_bytes[ring])) / packets : 0; /* Apply auto-moderation only when packet rate - * exceeds a rate that it matters */ + * exceeds a rate that it matters */ if (rate > (MLX4_EN_RX_RATE_THRESH / priv->rx_ring_num) && - avg_pkt_size > MLX4_EN_AVG_PKT_SMALL) { - if (rate < priv->pkt_rate_low || - avg_pkt_size < MLX4_EN_AVG_PKT_SMALL) + avg_pkt_size > MLX4_EN_AVG_PKT_SMALL) { + if (rate < priv->pkt_rate_low) moder_time = priv->rx_usecs_low; else if (rate > priv->pkt_rate_high) moder_time = priv->rx_usecs_high; @@ -408,12 +1117,12 @@ static void mlx4_en_auto_moderation(struct mlx4_en_priv *priv) if (moder_time != priv->last_moder_time[ring]) { priv->last_moder_time[ring] = moder_time; - cq = &priv->rx_cq[ring]; + cq = priv->rx_cq[ring]; cq->moder_time = moder_time; err = mlx4_en_set_cq_moder(priv, cq); if (err) - en_err(priv, "Failed modifying moderation " - "for cq:%d\n", ring); + en_err(priv, "Failed modifying moderation for cq:%d\n", + ring); } priv->last_moder_packets[ring] = rx_packets; priv->last_moder_bytes[ring] = rx_bytes; @@ -422,47 +1131,6 @@ static void mlx4_en_auto_moderation(struct mlx4_en_priv *priv) priv->last_moder_jiffies = jiffies; } -static void mlx4_en_handle_vlans(struct mlx4_en_priv *priv) -{ - u8 vlan_register[VLAN_FLTR_SIZE]; - u8 vlan_unregister[VLAN_FLTR_SIZE]; - int i, j, idx; - u16 vid; - - /* cache the vlan data for processing - * done under lock to avoid changes during work */ - spin_lock(&priv->vlan_lock); - for (i = 0; i < VLAN_FLTR_SIZE; i++) { - vlan_register[i] = priv->vlan_register[i]; - priv->vlan_register[i] = 0; - vlan_unregister[i] = priv->vlan_unregister[i]; - priv->vlan_unregister[i] = 0; - } - priv->vlgrp_modified = false; - spin_unlock(&priv->vlan_lock); - - /* Configure the vlan filter - * The vlgrp is updated with all the vids that need to be allowed */ - if (mlx4_SET_VLAN_FLTR(priv->mdev->dev, priv->port, priv->vlans)) - en_err(priv, "Failed configuring VLAN filter\n"); - - /* Configure the VLAN table */ - for (i = 0; i < VLAN_FLTR_SIZE; i++) { - for (j = 0; j < 32; j++) { - vid = (i << 5) + j; - if (vlan_register[i] & (1 << j)) - if (mlx4_register_vlan(priv->mdev->dev, priv->port, vid, &idx)) - en_dbg(HW, priv, "failed registering vlan %d\n", vid); - if (vlan_unregister[i] & (1 << j)) { - if (!mlx4_find_cached_vlan(priv->mdev->dev, priv->port, vid, &idx)) - mlx4_unregister_vlan(priv->mdev->dev, priv->port, idx); - else - en_dbg(HW, priv, "could not find vid %d in cache\n", vid); - } - } - } -} - static void mlx4_en_do_get_stats(struct work_struct *work) { struct delayed_work *delay = to_delayed_work(work); @@ -471,23 +1139,36 @@ static void mlx4_en_do_get_stats(struct work_struct *work) struct mlx4_en_dev *mdev = priv->mdev; int err; - err = mlx4_en_DUMP_ETH_STATS(mdev, priv->port, 0); - if (err) - en_dbg(HW, priv, "Could not update stats \n"); - - mutex_lock(&mdev->state_lock); if (mdev->device_up) { if (priv->port_up) { - if (priv->vlgrp_modified) - mlx4_en_handle_vlans(priv); + err = mlx4_en_DUMP_ETH_STATS(mdev, priv->port, 0); + if (err) + en_dbg(HW, priv, "Could not update stats\n"); mlx4_en_auto_moderation(priv); } queue_delayed_work(mdev->workqueue, &priv->stats_task, STATS_DELAY); } - mlx4_en_QUERY_PORT(priv->mdev, priv->port); + mutex_unlock(&mdev->state_lock); +} + +/* mlx4_en_service_task - Run service task for tasks that needed to be done + * periodically + */ +static void mlx4_en_service_task(struct work_struct *work) +{ + struct delayed_work *delay = to_delayed_work(work); + struct mlx4_en_priv *priv = container_of(delay, struct mlx4_en_priv, + service_task); + struct mlx4_en_dev *mdev = priv->mdev; + + mutex_lock(&mdev->state_lock); + if (mdev->device_up) { + queue_delayed_work(mdev->workqueue, &priv->service_task, + SERVICE_TASK_DELAY); + } mutex_unlock(&mdev->state_lock); } @@ -503,8 +1184,15 @@ static void mlx4_en_linkstate(struct work_struct *work) * report to system log */ if (priv->last_link_state != linkstate) { if (linkstate == MLX4_DEV_EVENT_PORT_DOWN) { + en_info(priv, "Link Down\n"); if_link_state_change(priv->dev, LINK_STATE_DOWN); - } else { + /* make sure the port is up before notifying the OS. + * This is tricky since we get here on INIT_PORT and + * in such case we can't tell the OS the port is up. + * To solve this there is a call to if_link_state_change + * in set_rx_mode. + * */ + } else if (priv->port_up && (linkstate == MLX4_DEV_EVENT_PORT_UP)){ en_info(priv, "Link Up\n"); if_link_state_change(priv->dev, LINK_STATE_UP); } @@ -513,51 +1201,38 @@ static void mlx4_en_linkstate(struct work_struct *work) mutex_unlock(&mdev->state_lock); } -static void mlx4_en_lock_and_stop_port(struct work_struct *work) -{ - struct mlx4_en_priv *priv = container_of(work, struct mlx4_en_priv, - stop_port_task); - struct net_device *dev = priv->dev; - struct mlx4_en_dev *mdev = priv->mdev; - - mutex_lock(&mdev->state_lock); - mlx4_en_do_stop_port(dev); - mutex_unlock(&mdev->state_lock); -} -static void mlx4_en_lock_and_start_port(struct work_struct *work) -{ - struct mlx4_en_priv *priv = container_of(work, struct mlx4_en_priv, - start_port_task); - struct net_device *dev = priv->dev; - struct mlx4_en_dev *mdev = priv->mdev; - - mutex_lock(&mdev->state_lock); - mlx4_en_do_start_port(dev); - mutex_unlock(&mdev->state_lock); -} - -int mlx4_en_do_start_port(struct net_device *dev) +int mlx4_en_start_port(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_en_cq *cq; struct mlx4_en_tx_ring *tx_ring; - u64 config; int rx_index = 0; int tx_index = 0; int err = 0; int i; int j; + u8 mc_list[16] = {0}; + if (priv->port_up) { en_dbg(DRV, priv, "start port called while port already up\n"); return 0; } + INIT_LIST_HEAD(&priv->mc_list); + INIT_LIST_HEAD(&priv->curr_list); + INIT_LIST_HEAD(&priv->ethtool_list); + /* Calculate Rx buf size */ dev->if_mtu = min(dev->if_mtu, priv->max_mtu); - mlx4_en_calc_rx_buf(dev); + mlx4_en_calc_rx_buf(dev); + priv->rx_alloc_size = max_t(int, 2 * roundup_pow_of_two(priv->rx_mb_size), + PAGE_SIZE); + priv->rx_alloc_order = get_order(priv->rx_alloc_size); + priv->rx_buf_size = roundup_pow_of_two(priv->rx_mb_size); + priv->log_rx_info = ROUNDUP_LOG2(sizeof(struct mlx4_en_rx_buf)); en_dbg(DRV, priv, "Rx buf size:%d\n", priv->rx_mb_size); /* Configure rx cq's and rings */ @@ -566,11 +1241,11 @@ int mlx4_en_do_start_port(struct net_device *dev) en_err(priv, "Failed to activate RX rings\n"); return err; } - for (i = 0; i < priv->rx_ring_num; i++) { - cq = &priv->rx_cq[i]; + cq = priv->rx_cq[i]; - err = mlx4_en_activate_cq(priv, cq); + mlx4_en_cq_init_lock(cq); + err = mlx4_en_activate_cq(priv, cq, i); if (err) { en_err(priv, "Failed activating Rx CQ\n"); goto cq_err; @@ -584,21 +1259,41 @@ int mlx4_en_do_start_port(struct net_device *dev) goto cq_err; } mlx4_en_arm_cq(priv, cq); - priv->rx_ring[i].cqn = cq->mcq.cqn; + priv->rx_ring[i]->cqn = cq->mcq.cqn; ++rx_index; } + /* Set qp number */ + en_dbg(DRV, priv, "Getting qp number for port %d\n", priv->port); + err = mlx4_en_get_qp(priv); + if (err) { + en_err(priv, "Failed getting eth qp\n"); + goto cq_err; + } + mdev->mac_removed[priv->port] = 0; + + /* gets default allocated counter index from func cap */ + /* or sink counter index if no resources */ + priv->counter_index = mdev->dev->caps.def_counter_index[priv->port - 1]; + + en_dbg(DRV, priv, "%s: default counter index %d for port %d\n", + __func__, priv->counter_index, priv->port); + err = mlx4_en_config_rss_steer(priv); if (err) { en_err(priv, "Failed configuring rss steering\n"); - goto cq_err; + goto mac_err; } + err = mlx4_en_create_drop_qp(priv); + if (err) + goto rss_err; + /* Configure tx cq's and rings */ for (i = 0; i < priv->tx_ring_num; i++) { /* Configure cq */ - cq = &priv->tx_cq[i]; - err = mlx4_en_activate_cq(priv, cq); + cq = priv->tx_cq[i]; + err = mlx4_en_activate_cq(priv, cq, i); if (err) { en_err(priv, "Failed allocating Tx CQ\n"); goto tx_err; @@ -613,13 +1308,19 @@ int mlx4_en_do_start_port(struct net_device *dev) cq->buf->wqe_index = cpu_to_be16(0xffff); /* Configure ring */ - tx_ring = &priv->tx_ring[i]; - err = mlx4_en_activate_tx_ring(priv, tx_ring, cq->mcq.cqn); + tx_ring = priv->tx_ring[i]; + + err = mlx4_en_activate_tx_ring(priv, tx_ring, cq->mcq.cqn, + i / priv->num_tx_rings_p_up); if (err) { en_err(priv, "Failed allocating Tx ring\n"); mlx4_en_deactivate_cq(priv, cq); goto tx_err; } + + /* Arm CQ for TX completions */ + mlx4_en_arm_cq(priv, cq); + /* Set initial ownership of all Tx TXBBs to SW (1) */ for (j = 0; j < tx_ring->buf_size; j += STAMP_STRIDE) *((u32 *) (tx_ring->buf + j)) = 0xffffffff; @@ -628,14 +1329,14 @@ int mlx4_en_do_start_port(struct net_device *dev) /* Configure port */ err = mlx4_SET_PORT_general(mdev->dev, priv->port, - priv->rx_mb_size + ETHER_CRC_LEN, + priv->rx_mb_size, priv->prof->tx_pause, priv->prof->tx_ppp, priv->prof->rx_pause, priv->prof->rx_ppp); if (err) { - en_err(priv, "Failed setting port general configurations " - "for port %d, with error %d\n", priv->port, err); + en_err(priv, "Failed setting port general configurations for port %d, with error %d\n", + priv->port, err); goto tx_err; } /* Set default qp number */ @@ -644,139 +1345,166 @@ int mlx4_en_do_start_port(struct net_device *dev) en_err(priv, "Failed setting default qp numbers\n"); goto tx_err; } - /* Set port mac number */ - en_dbg(DRV, priv, "Setting mac for port %d\n", priv->port); - err = mlx4_register_mac(mdev->dev, priv->port, - mlx4_en_mac_to_u64(IF_LLADDR(dev))); - if (err < 0) { - en_err(priv, "Failed setting port mac err=%d\n", err); - goto tx_err; - } - mdev->mac_removed[priv->port] = 0; /* Init port */ en_dbg(HW, priv, "Initializing port\n"); err = mlx4_INIT_PORT(mdev->dev, priv->port); if (err) { en_err(priv, "Failed Initializing port\n"); - goto mac_err; + goto tx_err; } - /* Set the various hardware offload abilities */ - dev->if_hwassist = 0; - if (dev->if_capenable & IFCAP_TSO4) - dev->if_hwassist |= CSUM_TSO; - if (dev->if_capenable & IFCAP_TXCSUM) - dev->if_hwassist |= (CSUM_TCP | CSUM_UDP | CSUM_IP); - if (dev->if_capenable & IFCAP_RXCSUM) - priv->rx_csum = 1; - else - priv->rx_csum = 0; + /* Attach rx QP to bradcast address */ + memset(&mc_list[10], 0xff, ETH_ALEN); + mc_list[5] = priv->port; /* needed for B0 steering support */ + if (mlx4_multicast_attach(mdev->dev, &priv->rss_map.indir_qp, mc_list, + priv->port, 0, MLX4_PROT_ETH, + &priv->broadcast_id)) + mlx4_warn(mdev, "Failed Attaching Broadcast\n"); - /* set TSO limits so that we don't have to drop TX packets */ - dev->if_hw_tsomax = 65536 - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); - dev->if_hw_tsomaxsegcount = 16; - dev->if_hw_tsomaxsegsize = 65536; /* XXX can do up to 4GByte */ + /* Must redo promiscuous mode setup. */ + priv->flags &= ~(MLX4_EN_FLAG_PROMISC | MLX4_EN_FLAG_MC_PROMISC); - err = mlx4_wol_read(priv->mdev->dev, &config, priv->port); - if (err) { - en_err(priv, "Failed to get WoL info, unable to modify\n"); - goto wol_err; - } - if (dev->if_capenable & IFCAP_WOL_MAGIC) { - config |= MLX4_EN_WOL_DO_MODIFY | MLX4_EN_WOL_ENABLED | - MLX4_EN_WOL_MAGIC; - } else { - config &= ~(MLX4_EN_WOL_ENABLED | MLX4_EN_WOL_MAGIC); - config |= MLX4_EN_WOL_DO_MODIFY; - } + /* Schedule multicast task to populate multicast list */ + queue_work(mdev->workqueue, &priv->rx_mode_task); - err = mlx4_wol_write(priv->mdev->dev, config, priv->port); - if (err) { - en_err(priv, "Failed to set WoL information\n"); - goto wol_err; - } + mlx4_set_stats_bitmap(mdev->dev, priv->stats_bitmap); priv->port_up = true; - /* Populate multicast list */ - mlx4_en_set_multicast(dev); - - /* Enable the queues. */ - dev->if_drv_flags &= ~IFF_DRV_OACTIVE; - dev->if_drv_flags |= IFF_DRV_RUNNING; + /* Enable the queues. */ + dev->if_drv_flags &= ~IFF_DRV_OACTIVE; + dev->if_drv_flags |= IFF_DRV_RUNNING; +#ifdef CONFIG_DEBUG_FS + mlx4_en_create_debug_files(priv); +#endif + callout_reset(&priv->watchdog_timer, MLX4_EN_WATCHDOG_TIMEOUT, + mlx4_en_watchdog_timeout, priv); - callout_reset(&priv->watchdog_timer, MLX4_EN_WATCHDOG_TIMEOUT, - mlx4_en_watchdog_timeout, priv); return 0; -wol_err: - /* close port*/ - mlx4_CLOSE_PORT(mdev->dev, priv->port); - -mac_err: - mlx4_unregister_mac(mdev->dev, priv->port, priv->mac); tx_err: while (tx_index--) { - mlx4_en_deactivate_tx_ring(priv, &priv->tx_ring[tx_index]); - mlx4_en_deactivate_cq(priv, &priv->tx_cq[tx_index]); + mlx4_en_deactivate_tx_ring(priv, priv->tx_ring[tx_index]); + mlx4_en_deactivate_cq(priv, priv->tx_cq[tx_index]); } - + mlx4_en_destroy_drop_qp(priv); +rss_err: mlx4_en_release_rss_steer(priv); +mac_err: + mlx4_en_put_qp(priv); cq_err: while (rx_index--) - mlx4_en_deactivate_cq(priv, &priv->rx_cq[rx_index]); + mlx4_en_deactivate_cq(priv, priv->rx_cq[rx_index]); for (i = 0; i < priv->rx_ring_num; i++) - mlx4_en_deactivate_rx_ring(priv, &priv->rx_ring[i]); + mlx4_en_deactivate_rx_ring(priv, priv->rx_ring[i]); return err; /* need to close devices */ } -void mlx4_en_do_stop_port(struct net_device *dev) +void mlx4_en_stop_port(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; + struct mlx4_en_mc_list *mclist, *tmp; int i; + u8 mc_list[16] = {0}; if (!priv->port_up) { en_dbg(DRV, priv, "stop port called while port already down\n"); return; } +#ifdef CONFIG_DEBUG_FS + mlx4_en_delete_debug_files(priv); +#endif + + /* close port*/ + mlx4_CLOSE_PORT(mdev->dev, priv->port); + /* Set port as not active */ priv->port_up = false; + if (priv->counter_index != 0xff) { + mlx4_counter_free(mdev->dev, priv->port, priv->counter_index); + priv->counter_index = 0xff; + } - /* Unregister Mac address for the port */ - mlx4_unregister_mac(mdev->dev, priv->port, priv->mac); - mdev->mac_removed[priv->port] = 1; + /* Promsicuous mode */ + if (mdev->dev->caps.steering_mode == + MLX4_STEERING_MODE_DEVICE_MANAGED) { + priv->flags &= ~(MLX4_EN_FLAG_PROMISC | + MLX4_EN_FLAG_MC_PROMISC); + mlx4_flow_steer_promisc_remove(mdev->dev, + priv->port, + MLX4_FS_ALL_DEFAULT); + mlx4_flow_steer_promisc_remove(mdev->dev, + priv->port, + MLX4_FS_MC_DEFAULT); + } else if (priv->flags & MLX4_EN_FLAG_PROMISC) { + priv->flags &= ~MLX4_EN_FLAG_PROMISC; + + /* Disable promiscouos mode */ + mlx4_unicast_promisc_remove(mdev->dev, priv->base_qpn, + priv->port); + + /* Disable Multicast promisc */ + if (priv->flags & MLX4_EN_FLAG_MC_PROMISC) { + mlx4_multicast_promisc_remove(mdev->dev, priv->base_qpn, + priv->port); + priv->flags &= ~MLX4_EN_FLAG_MC_PROMISC; + } + } + + /* Detach All multicasts */ + memset(&mc_list[10], 0xff, ETH_ALEN); + mc_list[5] = priv->port; /* needed for B0 steering support */ + mlx4_multicast_detach(mdev->dev, &priv->rss_map.indir_qp, mc_list, + MLX4_PROT_ETH, priv->broadcast_id); + list_for_each_entry(mclist, &priv->curr_list, list) { + memcpy(&mc_list[10], mclist->addr, ETH_ALEN); + mc_list[5] = priv->port; + mlx4_multicast_detach(mdev->dev, &priv->rss_map.indir_qp, + mc_list, MLX4_PROT_ETH, mclist->reg_id); + } + mlx4_en_clear_list(dev); + list_for_each_entry_safe(mclist, tmp, &priv->curr_list, list) { + list_del(&mclist->list); + kfree(mclist); + } + + /* Flush multicast filter */ + mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0, 1, MLX4_MCAST_CONFIG); + mlx4_en_destroy_drop_qp(priv); /* Free TX Rings */ for (i = 0; i < priv->tx_ring_num; i++) { - mlx4_en_deactivate_tx_ring(priv, &priv->tx_ring[i]); - mlx4_en_deactivate_cq(priv, &priv->tx_cq[i]); + mlx4_en_deactivate_tx_ring(priv, priv->tx_ring[i]); + mlx4_en_deactivate_cq(priv, priv->tx_cq[i]); } msleep(10); for (i = 0; i < priv->tx_ring_num; i++) - mlx4_en_free_tx_buf(dev, &priv->tx_ring[i]); + mlx4_en_free_tx_buf(dev, priv->tx_ring[i]); /* Free RSS qps */ mlx4_en_release_rss_steer(priv); + /* Unregister Mac address for the port */ + mlx4_en_put_qp(priv); + mdev->mac_removed[priv->port] = 1; + /* Free RX Rings */ for (i = 0; i < priv->rx_ring_num; i++) { - mlx4_en_deactivate_rx_ring(priv, &priv->rx_ring[i]); - mlx4_en_deactivate_cq(priv, &priv->rx_cq[i]); + struct mlx4_en_cq *cq = priv->rx_cq[i]; + mlx4_en_deactivate_rx_ring(priv, priv->rx_ring[i]); + mlx4_en_deactivate_cq(priv, cq); } - /* close port*/ - mlx4_CLOSE_PORT(mdev->dev, priv->port); - - callout_stop(&priv->watchdog_timer); + callout_stop(&priv->watchdog_timer); - dev->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); + dev->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); } static void mlx4_en_restart(struct work_struct *work) @@ -788,12 +1516,13 @@ static void mlx4_en_restart(struct work_struct *work) struct mlx4_en_tx_ring *ring; int i; + if (priv->blocked == 0 || priv->port_up == 0) return; for (i = 0; i < priv->tx_ring_num; i++) { - ring = &priv->tx_ring[i]; + ring = priv->tx_ring[i]; if (ring->blocked && - ring->watchdog_time + MLX4_EN_WATCHDOG_TIMEOUT < ticks) + ring->watchdog_time + MLX4_EN_WATCHDOG_TIMEOUT < ticks) goto reset; } return; @@ -804,129 +1533,175 @@ reset: mutex_lock(&mdev->state_lock); if (priv->port_up) { - mlx4_en_do_stop_port(dev); - if (mlx4_en_do_start_port(dev)) + mlx4_en_stop_port(dev); + //for (i = 0; i < priv->tx_ring_num; i++) + // netdev_tx_reset_queue(priv->tx_ring[i]->tx_queue); + if (mlx4_en_start_port(dev)) en_err(priv, "Failed restarting port %d\n", priv->port); } mutex_unlock(&mdev->state_lock); } - -static void -mlx4_en_init(void *arg) +static void mlx4_en_clear_stats(struct net_device *dev) { - struct mlx4_en_priv *priv; - struct mlx4_en_dev *mdev; + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + int i; - priv = arg; - mdev = priv->mdev; - mutex_lock(&mdev->state_lock); - mlx4_en_init_locked(priv); - mutex_unlock(&mdev->state_lock); + if (!mlx4_is_slave(mdev->dev)) + if (mlx4_en_DUMP_ETH_STATS(mdev, priv->port, 1)) + en_dbg(HW, priv, "Failed dumping statistics\n"); + + memset(&priv->pstats, 0, sizeof(priv->pstats)); + memset(&priv->pkstats, 0, sizeof(priv->pkstats)); + memset(&priv->port_stats, 0, sizeof(priv->port_stats)); + memset(&priv->vport_stats, 0, sizeof(priv->vport_stats)); + + for (i = 0; i < priv->tx_ring_num; i++) { + priv->tx_ring[i]->bytes = 0; + priv->tx_ring[i]->packets = 0; + priv->tx_ring[i]->tx_csum = 0; + } + for (i = 0; i < priv->rx_ring_num; i++) { + priv->rx_ring[i]->bytes = 0; + priv->rx_ring[i]->packets = 0; + priv->rx_ring[i]->csum_ok = 0; + priv->rx_ring[i]->csum_none = 0; + } } -static void -mlx4_en_init_locked(struct mlx4_en_priv *priv) +static void mlx4_en_open(void* arg) { - struct mlx4_en_dev *mdev; - struct ifnet *dev; - int i; + struct mlx4_en_priv *priv; + struct mlx4_en_dev *mdev; + struct net_device *dev; + int err = 0; - dev = priv->dev; - mdev = priv->mdev; - if (dev->if_drv_flags & IFF_DRV_RUNNING) - mlx4_en_do_stop_port(dev); + priv = arg; + mdev = priv->mdev; + dev = priv->dev; + + + mutex_lock(&mdev->state_lock); if (!mdev->device_up) { en_err(priv, "Cannot open - device down/disabled\n"); - return; + goto out; } - /* Reset HW statistics and performance counters */ - if (mlx4_en_DUMP_ETH_STATS(mdev, priv->port, 1)) - en_dbg(HW, priv, "Failed dumping statistics\n"); - - memset(&priv->pstats, 0, sizeof(priv->pstats)); - - for (i = 0; i < priv->tx_ring_num; i++) { - priv->tx_ring[i].bytes = 0; - priv->tx_ring[i].packets = 0; - } - for (i = 0; i < priv->rx_ring_num; i++) { - priv->rx_ring[i].bytes = 0; - priv->rx_ring[i].packets = 0; - } + /* Reset HW statistics and SW counters */ + mlx4_en_clear_stats(dev); - mlx4_en_set_default_moderation(priv); - if (mlx4_en_do_start_port(dev)) + err = mlx4_en_start_port(dev); + if (err) en_err(priv, "Failed starting port:%d\n", priv->port); + +out: + mutex_unlock(&mdev->state_lock); + return; } void mlx4_en_free_resources(struct mlx4_en_priv *priv) { int i; +#ifdef CONFIG_RFS_ACCEL + if (priv->dev->rx_cpu_rmap) { + free_irq_cpu_rmap(priv->dev->rx_cpu_rmap); + priv->dev->rx_cpu_rmap = NULL; + } +#endif + for (i = 0; i < priv->tx_ring_num; i++) { - if (priv->tx_ring[i].tx_info) + if (priv->tx_ring && priv->tx_ring[i]) mlx4_en_destroy_tx_ring(priv, &priv->tx_ring[i]); - if (priv->tx_cq[i].buf) + if (priv->tx_cq && priv->tx_cq[i]) mlx4_en_destroy_cq(priv, &priv->tx_cq[i]); } for (i = 0; i < priv->rx_ring_num; i++) { - if (priv->rx_ring[i].rx_info) - mlx4_en_destroy_rx_ring(priv, &priv->rx_ring[i]); - if (priv->rx_cq[i].buf) + if (priv->rx_ring[i]) + mlx4_en_destroy_rx_ring(priv, &priv->rx_ring[i], + priv->prof->rx_ring_size, priv->stride); + if (priv->rx_cq[i]) mlx4_en_destroy_cq(priv, &priv->rx_cq[i]); } - /* Free the stats tree when we resize the rings. */ + if (priv->sysctl) sysctl_ctx_free(&priv->stat_ctx); + } int mlx4_en_alloc_resources(struct mlx4_en_priv *priv) { struct mlx4_en_port_profile *prof = priv->prof; int i; - - /* Create tx Rings */ - for (i = 0; i < priv->tx_ring_num; i++) { - if (mlx4_en_create_cq(priv, &priv->tx_cq[i], - prof->tx_ring_size, i, TX)) - goto err; - - if (mlx4_en_create_tx_ring(priv, &priv->tx_ring[i], - prof->tx_ring_size, TXBB_SIZE)) - goto err; - } + int node = 0; /* Create rx Rings */ for (i = 0; i < priv->rx_ring_num; i++) { if (mlx4_en_create_cq(priv, &priv->rx_cq[i], - prof->rx_ring_size, i, RX)) + prof->rx_ring_size, i, RX, node)) goto err; if (mlx4_en_create_rx_ring(priv, &priv->rx_ring[i], - prof->rx_ring_size)) + prof->rx_ring_size, node)) goto err; } - /* Re-create stat sysctls in case the number of rings changed. */ - mlx4_en_sysctl_stat(priv); + /* Create tx Rings */ + for (i = 0; i < priv->tx_ring_num; i++) { + if (mlx4_en_create_cq(priv, &priv->tx_cq[i], + prof->tx_ring_size, i, TX, node)) + goto err; - /* Populate Tx priority mappings */ - mlx4_en_set_prio_map(priv, priv->tx_prio_map, - priv->tx_ring_num - MLX4_EN_NUM_HASH_RINGS); + if (mlx4_en_create_tx_ring(priv, &priv->tx_ring[i], + prof->tx_ring_size, TXBB_SIZE, node, i)) + goto err; + } +#ifdef CONFIG_RFS_ACCEL + priv->dev->rx_cpu_rmap = alloc_irq_cpu_rmap(priv->rx_ring_num); + if (!priv->dev->rx_cpu_rmap) + goto err; +#endif + /* Re-create stat sysctls in case the number of rings changed. */ + mlx4_en_sysctl_stat(priv); return 0; err: en_err(priv, "Failed to allocate NIC resources\n"); + for (i = 0; i < priv->rx_ring_num; i++) { + if (priv->rx_ring[i]) + mlx4_en_destroy_rx_ring(priv, &priv->rx_ring[i], + prof->rx_ring_size, + priv->stride); + if (priv->rx_cq[i]) + mlx4_en_destroy_cq(priv, &priv->rx_cq[i]); + } + for (i = 0; i < priv->tx_ring_num; i++) { + if (priv->tx_ring[i]) + mlx4_en_destroy_tx_ring(priv, &priv->tx_ring[i]); + if (priv->tx_cq[i]) + mlx4_en_destroy_cq(priv, &priv->tx_cq[i]); + } + priv->port_up = false; return -ENOMEM; } +struct en_port_attribute { + struct attribute attr; + ssize_t (*show)(struct en_port *, struct en_port_attribute *, char *buf); + ssize_t (*store)(struct en_port *, struct en_port_attribute *, char *buf, size_t count); +}; + +#define PORT_ATTR_RO(_name) \ +struct en_port_attribute en_port_attr_##_name = __ATTR_RO(_name) + +#define EN_PORT_ATTR(_name, _mode, _show, _store) \ +struct en_port_attribute en_port_attr_##_name = __ATTR(_name, _mode, _show, _store) void mlx4_en_destroy_netdev(struct net_device *dev) { @@ -935,10 +1710,10 @@ void mlx4_en_destroy_netdev(struct net_device *dev) en_dbg(DRV, priv, "Destroying netdev on port:%d\n", priv->port); - if (priv->vlan_attach != NULL) - EVENTHANDLER_DEREGISTER(vlan_config, priv->vlan_attach); - if (priv->vlan_detach != NULL) - EVENTHANDLER_DEREGISTER(vlan_unconfig, priv->vlan_detach); + if (priv->vlan_attach != NULL) + EVENTHANDLER_DEREGISTER(vlan_config, priv->vlan_attach); + if (priv->vlan_detach != NULL) + EVENTHANDLER_DEREGISTER(vlan_unconfig, priv->vlan_detach); /* Unregister device - this will close the port if it was up */ if (priv->registered) @@ -948,28 +1723,34 @@ void mlx4_en_destroy_netdev(struct net_device *dev) mlx4_free_hwq_res(mdev->dev, &priv->res, MLX4_EN_PAGE_SIZE); mutex_lock(&mdev->state_lock); - mlx4_en_do_stop_port(dev); + mlx4_en_stop_port(dev); mutex_unlock(&mdev->state_lock); + cancel_delayed_work(&priv->stats_task); + cancel_delayed_work(&priv->service_task); /* flush any pending task for this netdev */ flush_workqueue(mdev->workqueue); - callout_drain(&priv->watchdog_timer); + callout_drain(&priv->watchdog_timer); /* Detach the netdev so tasks would not attempt to access it */ mutex_lock(&mdev->state_lock); mdev->pndev[priv->port] = NULL; mutex_unlock(&mdev->state_lock); + mlx4_en_free_resources(priv); + /* freeing the sysctl conf cannot be called from within mlx4_en_free_resources */ if (priv->sysctl) sysctl_ctx_free(&priv->conf_ctx); - mtx_destroy(&priv->stats_lock.m); - mtx_destroy(&priv->vlan_lock.m); - kfree(priv); - if_free(dev); + kfree(priv->tx_ring); + kfree(priv->tx_cq); + + kfree(priv); + if_free(dev); + } static int mlx4_en_change_mtu(struct net_device *dev, int new_mtu) @@ -978,8 +1759,8 @@ static int mlx4_en_change_mtu(struct net_device *dev, int new_mtu) struct mlx4_en_dev *mdev = priv->mdev; int err = 0; - en_dbg(DRV, priv, "Change MTU called - current:%u new:%d\n", - dev->if_mtu, new_mtu); + en_dbg(DRV, priv, "Change MTU called - current:%u new:%u\n", + (unsigned)dev->if_mtu, (unsigned)new_mtu); if ((new_mtu < MLX4_EN_MIN_MTU) || (new_mtu > priv->max_mtu)) { en_err(priv, "Bad MTU size:%d.\n", new_mtu); @@ -990,15 +1771,14 @@ static int mlx4_en_change_mtu(struct net_device *dev, int new_mtu) if (dev->if_drv_flags & IFF_DRV_RUNNING) { if (!mdev->device_up) { /* NIC is probably restarting - let watchdog task reset - * the port */ + * * the port */ en_dbg(DRV, priv, "Change MTU called with card down!?\n"); } else { - mlx4_en_do_stop_port(dev); - mlx4_en_set_default_moderation(priv); - err = mlx4_en_do_start_port(dev); + mlx4_en_stop_port(dev); + err = mlx4_en_start_port(dev); if (err) { en_err(priv, "Failed restarting port:%d\n", - priv->port); + priv->port); queue_work(mdev->workqueue, &priv->watchdog_task); } } @@ -1047,7 +1827,6 @@ static int mlx4_en_calc_media(struct mlx4_en_priv *priv) return (active); } - static void mlx4_en_media_status(struct ifnet *dev, struct ifmediareq *ifmr) { struct mlx4_en_priv *priv; @@ -1082,9 +1861,10 @@ static int mlx4_en_media_change(struct ifnet *dev) case IFM_10G_SR: case IFM_10G_CX4: case IFM_1000_T: - if (IFM_SUBTYPE(ifm->ifm_media) == - IFM_SUBTYPE(mlx4_en_calc_media(priv)) && - (ifm->ifm_media & IFM_FDX)) + case IFM_40G_CR4: + if ((IFM_SUBTYPE(ifm->ifm_media) + == IFM_SUBTYPE(mlx4_en_calc_media(priv))) + && (ifm->ifm_media & IFM_FDX)) break; /* Fallthrough */ default: @@ -1121,36 +1901,28 @@ static int mlx4_en_ioctl(struct ifnet *dev, u_long command, caddr_t data) mdev = priv->mdev; ifr = (struct ifreq *) data; switch (command) { + case SIOCSIFMTU: error = -mlx4_en_change_mtu(dev, ifr->ifr_mtu); break; case SIOCSIFFLAGS: + mutex_lock(&mdev->state_lock); if (dev->if_flags & IFF_UP) { if ((dev->if_drv_flags & IFF_DRV_RUNNING) == 0) mlx4_en_start_port(dev); else - mlx4_en_set_multicast(dev); + mlx4_en_set_rx_mode(dev); } else { if (dev->if_drv_flags & IFF_DRV_RUNNING) { mlx4_en_stop_port(dev); if_link_state_change(dev, LINK_STATE_DOWN); - /* - * Since mlx4_en_stop_port is defered we - * have to wait till it's finished. - */ - for (int count=0; count<10; count++) { - if (dev->if_drv_flags & IFF_DRV_RUNNING) { - DELAY(20000); - } else { - break; - } - } } } + mutex_unlock(&mdev->state_lock); break; case SIOCADDMULTI: case SIOCDELMULTI: - mlx4_en_set_multicast(dev); + mlx4_en_set_rx_mode(dev); break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: @@ -1163,6 +1935,8 @@ static int mlx4_en_ioctl(struct ifnet *dev, u_long command, caddr_t data) dev->if_capenable ^= IFCAP_HWCSUM; if (mask & IFCAP_TSO4) dev->if_capenable ^= IFCAP_TSO4; + if (mask & IFCAP_TSO6) + dev->if_capenable ^= IFCAP_TSO6; if (mask & IFCAP_LRO) dev->if_capenable ^= IFCAP_LRO; if (mask & IFCAP_VLAN_HWTAGGING) @@ -1172,7 +1946,7 @@ static int mlx4_en_ioctl(struct ifnet *dev, u_long command, caddr_t data) if (mask & IFCAP_WOL_MAGIC) dev->if_capenable ^= IFCAP_WOL_MAGIC; if (dev->if_drv_flags & IFF_DRV_RUNNING) - mlx4_en_init_locked(priv); + mlx4_en_start_port(dev); mutex_unlock(&mdev->state_lock); VLAN_CAPABILITIES(dev); break; @@ -1184,228 +1958,441 @@ static int mlx4_en_ioctl(struct ifnet *dev, u_long command, caddr_t data) return (error); } -static int mlx4_en_set_ring_size(struct net_device *dev, - int rx_size, int tx_size) + +int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, + struct mlx4_en_port_profile *prof) { - struct mlx4_en_priv *priv = netdev_priv(dev); - struct mlx4_en_dev *mdev = priv->mdev; - int port_up = 0; - int err = 0; + struct net_device *dev; + struct mlx4_en_priv *priv; + uint8_t dev_addr[ETHER_ADDR_LEN]; + int err; + int i; - rx_size = roundup_pow_of_two(rx_size); - rx_size = max_t(u32, rx_size, MLX4_EN_MIN_RX_SIZE); - rx_size = min_t(u32, rx_size, MLX4_EN_MAX_RX_SIZE); - tx_size = roundup_pow_of_two(tx_size); - tx_size = max_t(u32, tx_size, MLX4_EN_MIN_TX_SIZE); - tx_size = min_t(u32, tx_size, MLX4_EN_MAX_TX_SIZE); + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + dev = priv->dev = if_alloc(IFT_ETHER); + if (dev == NULL) { + en_err(priv, "Net device allocation failed\n"); + kfree(priv); + return -ENOMEM; + } + dev->if_softc = priv; + if_initname(dev, "mlxen", atomic_fetchadd_int(&mlx4_en_unit, 1)); + dev->if_mtu = ETHERMTU; + dev->if_baudrate = 1000000000; + dev->if_init = mlx4_en_open; + dev->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; + dev->if_ioctl = mlx4_en_ioctl; + dev->if_transmit = mlx4_en_transmit; + dev->if_qflush = mlx4_en_qflush; + dev->if_snd.ifq_maxlen = prof->tx_ring_size; - if (rx_size == (priv->port_up ? - priv->rx_ring[0].actual_size : priv->rx_ring[0].size) && - tx_size == priv->tx_ring[0].size) - return 0; + /* + * Initialize driver private data + */ + priv->counter_index = 0xff; + spin_lock_init(&priv->stats_lock); + INIT_WORK(&priv->rx_mode_task, mlx4_en_do_set_rx_mode); + INIT_WORK(&priv->watchdog_task, mlx4_en_restart); + INIT_WORK(&priv->linkstate_task, mlx4_en_linkstate); + INIT_DELAYED_WORK(&priv->stats_task, mlx4_en_do_get_stats); + INIT_DELAYED_WORK(&priv->service_task, mlx4_en_service_task); + callout_init(&priv->watchdog_timer, 1); +#ifdef CONFIG_RFS_ACCEL + INIT_LIST_HEAD(&priv->filters); + spin_lock_init(&priv->filters_lock); +#endif - mutex_lock(&mdev->state_lock); - if (priv->port_up) { - port_up = 1; - mlx4_en_do_stop_port(dev); + priv->msg_enable = MLX4_EN_MSG_LEVEL; + priv->dev = dev; + priv->mdev = mdev; + priv->ddev = &mdev->pdev->dev; + priv->prof = prof; + priv->port = port; + priv->port_up = false; + priv->flags = prof->flags; + priv->ctrl_flags = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE | + MLX4_WQE_CTRL_SOLICITED); + + priv->num_tx_rings_p_up = mdev->profile.num_tx_rings_p_up; + priv->tx_ring_num = prof->tx_ring_num; + priv->tx_ring = kcalloc(MAX_TX_RINGS, + sizeof(struct mlx4_en_tx_ring *), GFP_KERNEL); + if (!priv->tx_ring) { + err = -ENOMEM; + goto out; } - mlx4_en_free_resources(priv); - priv->prof->tx_ring_size = tx_size; - priv->prof->rx_ring_size = rx_size; + priv->tx_cq = kcalloc(sizeof(struct mlx4_en_cq *), MAX_TX_RINGS, + GFP_KERNEL); + if (!priv->tx_cq) { + err = -ENOMEM; + goto out; + } + + priv->rx_ring_num = prof->rx_ring_num; + priv->cqe_factor = (mdev->dev->caps.cqe_size == 64) ? 1 : 0; + priv->mac_index = -1; + priv->last_ifq_jiffies = 0; + priv->if_counters_rx_errors = 0; + priv->if_counters_rx_no_buffer = 0; +#ifdef CONFIG_MLX4_EN_DCB + if (!mlx4_is_slave(priv->mdev->dev)) { + priv->dcbx_cap = DCB_CAP_DCBX_HOST; + priv->flags |= MLX4_EN_FLAG_DCB_ENABLED; + if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ETS_CFG) { + dev->dcbnl_ops = &mlx4_en_dcbnl_ops; + } else { + en_info(priv, "QoS disabled - no HW support\n"); + dev->dcbnl_ops = &mlx4_en_dcbnl_pfc_ops; + } + } +#endif + + for (i = 0; i < MLX4_EN_MAC_HASH_SIZE; ++i) + INIT_HLIST_HEAD(&priv->mac_hash[i]); + + + /* Query for default mac and max mtu */ + priv->max_mtu = mdev->dev->caps.eth_mtu_cap[priv->port]; + priv->mac = mdev->dev->caps.def_mac[priv->port]; + if (ILLEGAL_MAC(priv->mac)) { +#if BITS_PER_LONG == 64 + en_err(priv, "Port: %d, invalid mac burned: 0x%lx, quiting\n", + priv->port, priv->mac); +#elif BITS_PER_LONG == 32 + en_err(priv, "Port: %d, invalid mac burned: 0x%llx, quiting\n", + priv->port, priv->mac); +#endif + err = -EINVAL; + goto out; + } + + + + priv->stride = roundup_pow_of_two(sizeof(struct mlx4_en_rx_desc) + + DS_SIZE); + + mlx4_en_sysctl_conf(priv); + err = mlx4_en_alloc_resources(priv); + if (err) + goto out; + + /* Allocate page for receive rings */ + err = mlx4_alloc_hwq_res(mdev->dev, &priv->res, + MLX4_EN_PAGE_SIZE, MLX4_EN_PAGE_SIZE); if (err) { - en_err(priv, "Failed reallocating port resources\n"); + en_err(priv, "Failed to allocate page for rx qps\n"); goto out; } - if (port_up) { - err = mlx4_en_do_start_port(dev); - if (err) - en_err(priv, "Failed starting port\n"); + priv->allocated = 1; + + /* + * Set driver features + */ + dev->if_capabilities |= IFCAP_RXCSUM | IFCAP_TXCSUM; + dev->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING; + dev->if_capabilities |= IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWFILTER; + dev->if_capabilities |= IFCAP_LINKSTATE | IFCAP_JUMBO_MTU; + dev->if_capabilities |= IFCAP_LRO; + + if (mdev->LSO_support) + dev->if_capabilities |= IFCAP_TSO4 | IFCAP_TSO6 | IFCAP_VLAN_HWTSO; + + /* set TSO limits so that we don't have to drop TX packets */ + dev->if_hw_tsomax = 65536 - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); + dev->if_hw_tsomaxsegcount = 16; + dev->if_hw_tsomaxsegsize = 65536; /* XXX can do up to 4GByte */ + + dev->if_capenable = dev->if_capabilities; + + dev->if_hwassist = 0; + if (dev->if_capenable & (IFCAP_TSO4 | IFCAP_TSO6)) + dev->if_hwassist |= CSUM_TSO; + if (dev->if_capenable & IFCAP_TXCSUM) + dev->if_hwassist |= (CSUM_TCP | CSUM_UDP | CSUM_IP); + + + /* Register for VLAN events */ + priv->vlan_attach = EVENTHANDLER_REGISTER(vlan_config, + mlx4_en_vlan_rx_add_vid, priv, EVENTHANDLER_PRI_FIRST); + priv->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, + mlx4_en_vlan_rx_kill_vid, priv, EVENTHANDLER_PRI_FIRST); + + mdev->pndev[priv->port] = dev; + + priv->last_link_state = MLX4_DEV_EVENT_PORT_DOWN; + mlx4_en_set_default_moderation(priv); + + /* Set default MAC */ + for (i = 0; i < ETHER_ADDR_LEN; i++) + dev_addr[ETHER_ADDR_LEN - 1 - i] = (u8) (priv->mac >> (8 * i)); + + + ether_ifattach(dev, dev_addr); + if_link_state_change(dev, LINK_STATE_DOWN); + ifmedia_init(&priv->media, IFM_IMASK | IFM_ETH_FMASK, + mlx4_en_media_change, mlx4_en_media_status); + ifmedia_add(&priv->media, IFM_ETHER | IFM_FDX | IFM_1000_T, 0, NULL); + ifmedia_add(&priv->media, IFM_ETHER | IFM_FDX | IFM_10G_SR, 0, NULL); + ifmedia_add(&priv->media, IFM_ETHER | IFM_FDX | IFM_10G_CX4, 0, NULL); + ifmedia_add(&priv->media, IFM_ETHER | IFM_FDX | IFM_40G_CR4, 0, NULL); + ifmedia_add(&priv->media, IFM_ETHER | IFM_AUTO, 0, NULL); + ifmedia_set(&priv->media, IFM_ETHER | IFM_AUTO); + + en_warn(priv, "Using %d TX rings\n", prof->tx_ring_num); + en_warn(priv, "Using %d RX rings\n", prof->rx_ring_num); + + priv->registered = 1; + + en_warn(priv, "Using %d TX rings\n", prof->tx_ring_num); + en_warn(priv, "Using %d RX rings\n", prof->rx_ring_num); + + + priv->rx_mb_size = dev->if_mtu + ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN; + err = mlx4_SET_PORT_general(mdev->dev, priv->port, + priv->rx_mb_size, + prof->tx_pause, prof->tx_ppp, + prof->rx_pause, prof->rx_ppp); + if (err) { + en_err(priv, "Failed setting port general configurations " + "for port %d, with error %d\n", priv->port, err); + goto out; } + + /* Init port */ + en_warn(priv, "Initializing port\n"); + err = mlx4_INIT_PORT(mdev->dev, priv->port); + if (err) { + en_err(priv, "Failed Initializing port\n"); + goto out; + } + + queue_delayed_work(mdev->workqueue, &priv->stats_task, STATS_DELAY); + + if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS) + queue_delayed_work(mdev->workqueue, &priv->service_task, SERVICE_TASK_DELAY); + + + + return 0; + out: - mutex_unlock(&mdev->state_lock); + mlx4_en_destroy_netdev(dev); return err; } - +static int mlx4_en_set_ring_size(struct net_device *dev, + int rx_size, int tx_size) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + int port_up = 0; + int err = 0; + + rx_size = roundup_pow_of_two(rx_size); + rx_size = max_t(u32, rx_size, MLX4_EN_MIN_RX_SIZE); + rx_size = min_t(u32, rx_size, MLX4_EN_MAX_RX_SIZE); + tx_size = roundup_pow_of_two(tx_size); + tx_size = max_t(u32, tx_size, MLX4_EN_MIN_TX_SIZE); + tx_size = min_t(u32, tx_size, MLX4_EN_MAX_TX_SIZE); + + if (rx_size == (priv->port_up ? + priv->rx_ring[0]->actual_size : priv->rx_ring[0]->size) && + tx_size == priv->tx_ring[0]->size) + return 0; + mutex_lock(&mdev->state_lock); + if (priv->port_up) { + port_up = 1; + mlx4_en_stop_port(dev); + } + mlx4_en_free_resources(priv); + priv->prof->tx_ring_size = tx_size; + priv->prof->rx_ring_size = rx_size; + err = mlx4_en_alloc_resources(priv); + if (err) { + en_err(priv, "Failed reallocating port resources\n"); + goto out; + } + if (port_up) { + err = mlx4_en_start_port(dev); + if (err) + en_err(priv, "Failed starting port\n"); + } +out: + mutex_unlock(&mdev->state_lock); + return err; +} static int mlx4_en_set_rx_ring_size(SYSCTL_HANDLER_ARGS) { - struct mlx4_en_priv *priv; - int size; - int error; - - priv = arg1; - size = priv->prof->rx_ring_size; - error = sysctl_handle_int(oidp, &size, 0, req); - if (error || !req->newptr) - return (error); - error = -mlx4_en_set_ring_size(priv->dev, size, - priv->prof->tx_ring_size); - - return (error); + struct mlx4_en_priv *priv; + int size; + int error; + + priv = arg1; + size = priv->prof->rx_ring_size; + error = sysctl_handle_int(oidp, &size, 0, req); + if (error || !req->newptr) + return (error); + error = -mlx4_en_set_ring_size(priv->dev, size, + priv->prof->tx_ring_size); + return (error); } static int mlx4_en_set_tx_ring_size(SYSCTL_HANDLER_ARGS) { - struct mlx4_en_priv *priv; - int size; - int error; - - priv = arg1; - size = priv->prof->tx_ring_size; - error = sysctl_handle_int(oidp, &size, 0, req); - if (error || !req->newptr) - return (error); - error = -mlx4_en_set_ring_size(priv->dev, priv->prof->rx_ring_size, - size); - - return (error); + struct mlx4_en_priv *priv; + int size; + int error; + + priv = arg1; + size = priv->prof->tx_ring_size; + error = sysctl_handle_int(oidp, &size, 0, req); + if (error || !req->newptr) + return (error); + error = -mlx4_en_set_ring_size(priv->dev, priv->prof->rx_ring_size, + size); + + return (error); } static int mlx4_en_set_tx_ppp(SYSCTL_HANDLER_ARGS) { - struct mlx4_en_priv *priv; - int ppp; - int error; - - priv = arg1; - ppp = priv->prof->tx_ppp; - error = sysctl_handle_int(oidp, &ppp, 0, req); - if (error || !req->newptr) - return (error); - if (ppp > 0xff || ppp < 0) - return (-EINVAL); - priv->prof->tx_ppp = ppp; - error = -mlx4_SET_PORT_general(priv->mdev->dev, priv->port, - priv->rx_mb_size + ETHER_CRC_LEN, - priv->prof->tx_pause, - priv->prof->tx_ppp, - priv->prof->rx_pause, - priv->prof->rx_ppp); - - return (error); + struct mlx4_en_priv *priv; + int ppp; + int error; + + priv = arg1; + ppp = priv->prof->tx_ppp; + error = sysctl_handle_int(oidp, &ppp, 0, req); + if (error || !req->newptr) + return (error); + if (ppp > 0xff || ppp < 0) + return (-EINVAL); + priv->prof->tx_ppp = ppp; + error = -mlx4_SET_PORT_general(priv->mdev->dev, priv->port, + priv->rx_mb_size + ETHER_CRC_LEN, + priv->prof->tx_pause, + priv->prof->tx_ppp, + priv->prof->rx_pause, + priv->prof->rx_ppp); + + return (error); } static int mlx4_en_set_rx_ppp(SYSCTL_HANDLER_ARGS) { - struct mlx4_en_priv *priv; - struct mlx4_en_dev *mdev; - int tx_ring_num; - int ppp; - int error; - int port_up; - - port_up = 0; - priv = arg1; - mdev = priv->mdev; - ppp = priv->prof->rx_ppp; - error = sysctl_handle_int(oidp, &ppp, 0, req); - if (error || !req->newptr) - return (error); - if (ppp > 0xff || ppp < 0) - return (-EINVAL); - /* See if we have to change the number of tx queues. */ - if (!ppp != !priv->prof->rx_ppp) { - tx_ring_num = MLX4_EN_NUM_HASH_RINGS + 1 + - (!!ppp) * MLX4_EN_NUM_PPP_RINGS; - mutex_lock(&mdev->state_lock); - if (priv->port_up) { - port_up = 1; - mlx4_en_do_stop_port(priv->dev); - } - mlx4_en_free_resources(priv); - priv->tx_ring_num = tx_ring_num; - priv->prof->rx_ppp = ppp; - error = -mlx4_en_alloc_resources(priv); - if (error) - en_err(priv, "Failed reallocating port resources\n"); - if (error == 0 && port_up) { - error = -mlx4_en_do_start_port(priv->dev); - if (error) - en_err(priv, "Failed starting port\n"); - } - mutex_unlock(&mdev->state_lock); - return (error); - - } - priv->prof->rx_ppp = ppp; - error = -mlx4_SET_PORT_general(priv->mdev->dev, priv->port, - priv->rx_mb_size + ETHER_CRC_LEN, - priv->prof->tx_pause, - priv->prof->tx_ppp, - priv->prof->rx_pause, - priv->prof->rx_ppp); - - return (error); + struct mlx4_en_priv *priv; + struct mlx4_en_dev *mdev; + int ppp; + int error; + int port_up; + + port_up = 0; + priv = arg1; + mdev = priv->mdev; + ppp = priv->prof->rx_ppp; + error = sysctl_handle_int(oidp, &ppp, 0, req); + if (error || !req->newptr) + return (error); + if (ppp > 0xff || ppp < 0) + return (-EINVAL); + /* See if we have to change the number of tx queues. */ + if (!ppp != !priv->prof->rx_ppp) { + mutex_lock(&mdev->state_lock); + if (priv->port_up) { + port_up = 1; + mlx4_en_stop_port(priv->dev); + } + mlx4_en_free_resources(priv); + priv->prof->rx_ppp = ppp; + error = -mlx4_en_alloc_resources(priv); + if (error) + en_err(priv, "Failed reallocating port resources\n"); + if (error == 0 && port_up) { + error = -mlx4_en_start_port(priv->dev); + if (error) + en_err(priv, "Failed starting port\n"); + } + mutex_unlock(&mdev->state_lock); + return (error); + + } + priv->prof->rx_ppp = ppp; + error = -mlx4_SET_PORT_general(priv->mdev->dev, priv->port, + priv->rx_mb_size + ETHER_CRC_LEN, + priv->prof->tx_pause, + priv->prof->tx_ppp, + priv->prof->rx_pause, + priv->prof->rx_ppp); + + return (error); } static void mlx4_en_sysctl_conf(struct mlx4_en_priv *priv) { - struct net_device *dev; - struct sysctl_ctx_list *ctx; - struct sysctl_oid *node; - struct sysctl_oid_list *node_list; - struct sysctl_oid *coal; - struct sysctl_oid_list *coal_list; - - dev = priv->dev; - ctx = &priv->conf_ctx; - - sysctl_ctx_init(ctx); - priv->sysctl = SYSCTL_ADD_NODE(ctx, SYSCTL_STATIC_CHILDREN(_hw), - OID_AUTO, dev->if_xname, CTLFLAG_RD, 0, "mlx4 10gig ethernet"); - node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(priv->sysctl), OID_AUTO, - "conf", CTLFLAG_RD, NULL, "Configuration"); - node_list = SYSCTL_CHILDREN(node); - - SYSCTL_ADD_UINT(ctx, node_list, OID_AUTO, "msg_enable", - CTLFLAG_RW, &priv->msg_enable, 0, - "Driver message enable bitfield"); - SYSCTL_ADD_UINT(ctx, node_list, OID_AUTO, "rx_rings", - CTLTYPE_INT | CTLFLAG_RD, &priv->rx_ring_num, 0, - "Number of receive rings"); - SYSCTL_ADD_UINT(ctx, node_list, OID_AUTO, "tx_rings", - CTLTYPE_INT | CTLFLAG_RD, &priv->tx_ring_num, 0, - "Number of transmit rings"); - SYSCTL_ADD_PROC(ctx, node_list, OID_AUTO, "rx_size", - CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, priv, 0, - mlx4_en_set_rx_ring_size, "I", "Receive ring size"); - SYSCTL_ADD_PROC(ctx, node_list, OID_AUTO, "tx_size", - CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, priv, 0, - mlx4_en_set_tx_ring_size, "I", "Transmit ring size"); - SYSCTL_ADD_UINT(ctx, node_list, OID_AUTO, "ip_reasm", - CTLFLAG_RW, &priv->ip_reasm, 0, - "Allow reassembly of IP fragments."); - SYSCTL_ADD_PROC(ctx, node_list, OID_AUTO, "tx_ppp", - CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, priv, 0, - mlx4_en_set_tx_ppp, "I", "TX Per-priority pause"); - SYSCTL_ADD_PROC(ctx, node_list, OID_AUTO, "rx_ppp", - CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, priv, 0, - mlx4_en_set_rx_ppp, "I", "RX Per-priority pause"); - - /* Add coalescer configuration. */ - coal = SYSCTL_ADD_NODE(ctx, node_list, OID_AUTO, - "coalesce", CTLFLAG_RD, NULL, "Interrupt coalesce configuration"); - coal_list = SYSCTL_CHILDREN(node); - SYSCTL_ADD_UINT(ctx, coal_list, OID_AUTO, "pkt_rate_low", - CTLFLAG_RW, &priv->pkt_rate_low, 0, - "Packets per-second for minimum delay"); - SYSCTL_ADD_UINT(ctx, coal_list, OID_AUTO, "rx_usecs_low", - CTLFLAG_RW, &priv->rx_usecs_low, 0, - "Minimum RX delay in micro-seconds"); - SYSCTL_ADD_UINT(ctx, coal_list, OID_AUTO, "pkt_rate_high", - CTLFLAG_RW, &priv->pkt_rate_high, 0, - "Packets per-second for maximum delay"); - SYSCTL_ADD_UINT(ctx, coal_list, OID_AUTO, "rx_usecs_high", - CTLFLAG_RW, &priv->rx_usecs_high, 0, - "Maximum RX delay in micro-seconds"); - SYSCTL_ADD_UINT(ctx, coal_list, OID_AUTO, "sample_interval", - CTLFLAG_RW, &priv->sample_interval, 0, - "adaptive frequency in units of HZ ticks"); - SYSCTL_ADD_UINT(ctx, coal_list, OID_AUTO, "adaptive_rx_coal", - CTLFLAG_RW, &priv->adaptive_rx_coal, 0, - "Enable adaptive rx coalescing"); + struct net_device *dev; + struct sysctl_ctx_list *ctx; + struct sysctl_oid *node; + struct sysctl_oid_list *node_list; + struct sysctl_oid *coal; + struct sysctl_oid_list *coal_list; + + dev = priv->dev; + ctx = &priv->conf_ctx; + + sysctl_ctx_init(ctx); + priv->sysctl = SYSCTL_ADD_NODE(ctx, SYSCTL_STATIC_CHILDREN(_hw), + OID_AUTO, dev->if_xname, CTLFLAG_RD, 0, "mlx4 10gig ethernet"); + node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(priv->sysctl), OID_AUTO, + "conf", CTLFLAG_RD, NULL, "Configuration"); + node_list = SYSCTL_CHILDREN(node); + + SYSCTL_ADD_UINT(ctx, node_list, OID_AUTO, "msg_enable", + CTLFLAG_RW, &priv->msg_enable, 0, + "Driver message enable bitfield"); + SYSCTL_ADD_UINT(ctx, node_list, OID_AUTO, "rx_rings", + CTLTYPE_INT | CTLFLAG_RD, &priv->rx_ring_num, 0, + "Number of receive rings"); + SYSCTL_ADD_UINT(ctx, node_list, OID_AUTO, "tx_rings", + CTLTYPE_INT | CTLFLAG_RD, &priv->tx_ring_num, 0, + "Number of transmit rings"); + SYSCTL_ADD_PROC(ctx, node_list, OID_AUTO, "rx_size", + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, priv, 0, + mlx4_en_set_rx_ring_size, "I", "Receive ring size"); + SYSCTL_ADD_PROC(ctx, node_list, OID_AUTO, "tx_size", + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, priv, 0, + mlx4_en_set_tx_ring_size, "I", "Transmit ring size"); + SYSCTL_ADD_PROC(ctx, node_list, OID_AUTO, "tx_ppp", + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, priv, 0, + mlx4_en_set_tx_ppp, "I", "TX Per-priority pause"); + SYSCTL_ADD_PROC(ctx, node_list, OID_AUTO, "rx_ppp", + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, priv, 0, + mlx4_en_set_rx_ppp, "I", "RX Per-priority pause"); + + /* Add coalescer configuration. */ + coal = SYSCTL_ADD_NODE(ctx, node_list, OID_AUTO, + "coalesce", CTLFLAG_RD, NULL, "Interrupt coalesce configuration"); + coal_list = SYSCTL_CHILDREN(node); + SYSCTL_ADD_UINT(ctx, coal_list, OID_AUTO, "pkt_rate_low", + CTLFLAG_RW, &priv->pkt_rate_low, 0, + "Packets per-second for minimum delay"); + SYSCTL_ADD_UINT(ctx, coal_list, OID_AUTO, "rx_usecs_low", + CTLFLAG_RW, &priv->rx_usecs_low, 0, + "Minimum RX delay in micro-seconds"); + SYSCTL_ADD_UINT(ctx, coal_list, OID_AUTO, "pkt_rate_high", + CTLFLAG_RW, &priv->pkt_rate_high, 0, + "Packets per-second for maximum delay"); + SYSCTL_ADD_UINT(ctx, coal_list, OID_AUTO, "rx_usecs_high", + CTLFLAG_RW, &priv->rx_usecs_high, 0, + "Maximum RX delay in micro-seconds"); + SYSCTL_ADD_UINT(ctx, coal_list, OID_AUTO, "sample_interval", + CTLFLAG_RW, &priv->sample_interval, 0, + "adaptive frequency in units of HZ ticks"); + SYSCTL_ADD_UINT(ctx, coal_list, OID_AUTO, "adaptive_rx_coal", + CTLFLAG_RW, &priv->adaptive_rx_coal, 0, + "Enable adaptive rx coalescing"); } + static void mlx4_en_sysctl_stat(struct mlx4_en_priv *priv) { struct net_device *dev; @@ -1459,43 +2446,111 @@ static void mlx4_en_sysctl_stat(struct mlx4_en_priv *priv) "TX checksum offloads"); /* Could strdup the names and add in a loop. This is simpler. */ - SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "broadcast", CTLFLAG_RD, - &priv->pkstats.broadcast, "Broadcast packets"); - SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_prio0", CTLFLAG_RD, - &priv->pkstats.tx_prio[0], "TX Priority 0 packets"); - SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_prio1", CTLFLAG_RD, - &priv->pkstats.tx_prio[1], "TX Priority 1 packets"); - SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_prio2", CTLFLAG_RD, - &priv->pkstats.tx_prio[2], "TX Priority 2 packets"); - SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_prio3", CTLFLAG_RD, - &priv->pkstats.tx_prio[3], "TX Priority 3 packets"); - SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_prio4", CTLFLAG_RD, - &priv->pkstats.tx_prio[4], "TX Priority 4 packets"); - SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_prio5", CTLFLAG_RD, - &priv->pkstats.tx_prio[5], "TX Priority 5 packets"); - SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_prio6", CTLFLAG_RD, - &priv->pkstats.tx_prio[6], "TX Priority 6 packets"); - SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_prio7", CTLFLAG_RD, - &priv->pkstats.tx_prio[7], "TX Priority 7 packets"); - SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_prio0", CTLFLAG_RD, - &priv->pkstats.rx_prio[0], "RX Priority 0 packets"); - SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_prio1", CTLFLAG_RD, - &priv->pkstats.rx_prio[1], "RX Priority 1 packets"); - SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_prio2", CTLFLAG_RD, - &priv->pkstats.rx_prio[2], "RX Priority 2 packets"); - SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_prio3", CTLFLAG_RD, - &priv->pkstats.rx_prio[3], "RX Priority 3 packets"); - SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_prio4", CTLFLAG_RD, - &priv->pkstats.rx_prio[4], "RX Priority 4 packets"); - SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_prio5", CTLFLAG_RD, - &priv->pkstats.rx_prio[5], "RX Priority 5 packets"); - SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_prio6", CTLFLAG_RD, - &priv->pkstats.rx_prio[6], "RX Priority 6 packets"); - SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_prio7", CTLFLAG_RD, - &priv->pkstats.rx_prio[7], "RX Priority 7 packets"); + SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_bytes", CTLFLAG_RD, + &priv->pkstats.rx_bytes, "RX Bytes"); + SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_packets", CTLFLAG_RD, + &priv->pkstats.rx_packets, "RX packets"); + SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_multicast_packets", CTLFLAG_RD, + &priv->pkstats.rx_multicast_packets, "RX Multicast Packets"); + SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_broadcast_packets", CTLFLAG_RD, + &priv->pkstats.rx_broadcast_packets, "RX Broadcast Packets"); + SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_errors", CTLFLAG_RD, + &priv->pkstats.rx_errors, "RX Errors"); + SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_dropped", CTLFLAG_RD, + &priv->pkstats.rx_dropped, "RX Dropped"); + SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_length_errors", CTLFLAG_RD, + &priv->pkstats.rx_length_errors, "RX Length Errors"); + SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_over_errors", CTLFLAG_RD, + &priv->pkstats.rx_over_errors, "RX Over Errors"); + SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_crc_errors", CTLFLAG_RD, + &priv->pkstats.rx_crc_errors, "RX CRC Errors"); + SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_jabbers", CTLFLAG_RD, + &priv->pkstats.rx_jabbers, "RX Jabbers"); + + + SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_in_range_length_error", CTLFLAG_RD, + &priv->pkstats.rx_in_range_length_error, "RX IN_Range Length Error"); + SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_out_range_length_error", + CTLFLAG_RD, &priv->pkstats.rx_out_range_length_error, + "RX Out Range Length Error"); + SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_lt_64_bytes_packets", CTLFLAG_RD, + &priv->pkstats.rx_lt_64_bytes_packets, "RX Lt 64 Bytes Packets"); + SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_127_bytes_packets", CTLFLAG_RD, + &priv->pkstats.rx_127_bytes_packets, "RX 127 bytes Packets"); + SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_255_bytes_packets", CTLFLAG_RD, + &priv->pkstats.rx_255_bytes_packets, "RX 255 bytes Packets"); + SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_511_bytes_packets", CTLFLAG_RD, + &priv->pkstats.rx_511_bytes_packets, "RX 511 bytes Packets"); + SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_1023_bytes_packets", CTLFLAG_RD, + &priv->pkstats.rx_1023_bytes_packets, "RX 1023 bytes Packets"); + SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_1518_bytes_packets", CTLFLAG_RD, + &priv->pkstats.rx_1518_bytes_packets, "RX 1518 bytes Packets"); + SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_1522_bytes_packets", CTLFLAG_RD, + &priv->pkstats.rx_1522_bytes_packets, "RX 1522 bytes Packets"); + SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_1548_bytes_packets", CTLFLAG_RD, + &priv->pkstats.rx_1548_bytes_packets, "RX 1548 bytes Packets"); + SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_gt_1548_bytes_packets", CTLFLAG_RD, + &priv->pkstats.rx_gt_1548_bytes_packets, + "RX Greater Then 1548 bytes Packets"); + +struct mlx4_en_pkt_stats { + unsigned long tx_packets; + unsigned long tx_bytes; + unsigned long tx_multicast_packets; + unsigned long tx_broadcast_packets; + unsigned long tx_errors; + unsigned long tx_dropped; + unsigned long tx_lt_64_bytes_packets; + unsigned long tx_127_bytes_packets; + unsigned long tx_255_bytes_packets; + unsigned long tx_511_bytes_packets; + unsigned long tx_1023_bytes_packets; + unsigned long tx_1518_bytes_packets; + unsigned long tx_1522_bytes_packets; + unsigned long tx_1548_bytes_packets; + unsigned long tx_gt_1548_bytes_packets; + unsigned long rx_prio[NUM_PRIORITIES][NUM_PRIORITY_STATS]; + unsigned long tx_prio[NUM_PRIORITIES][NUM_PRIORITY_STATS]; +#define NUM_PKT_STATS 72 +}; + + + SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_packets", CTLFLAG_RD, + &priv->pkstats.tx_packets, "TX packets"); + SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_bytes", CTLFLAG_RD, + &priv->pkstats.tx_packets, "TX Bytes"); + SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_multicast_packets", CTLFLAG_RD, + &priv->pkstats.tx_multicast_packets, "TX Multicast Packets"); + SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_broadcast_packets", CTLFLAG_RD, + &priv->pkstats.tx_broadcast_packets, "TX Broadcast Packets"); + SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_errors", CTLFLAG_RD, + &priv->pkstats.tx_errors, "TX Errors"); + SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_dropped", CTLFLAG_RD, + &priv->pkstats.tx_dropped, "TX Dropped"); + SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_lt_64_bytes_packets", CTLFLAG_RD, + &priv->pkstats.tx_lt_64_bytes_packets, "TX Less Then 64 Bytes Packets"); + SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_127_bytes_packets", CTLFLAG_RD, + &priv->pkstats.tx_127_bytes_packets, "TX 127 Bytes Packets"); + SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_255_bytes_packets", CTLFLAG_RD, + &priv->pkstats.tx_255_bytes_packets, "TX 255 Bytes Packets"); + SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_511_bytes_packets", CTLFLAG_RD, + &priv->pkstats.tx_511_bytes_packets, "TX 511 Bytes Packets"); + SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_1023_bytes_packets", CTLFLAG_RD, + &priv->pkstats.tx_1023_bytes_packets, "TX 1023 Bytes Packets"); + SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_1518_bytes_packets", CTLFLAG_RD, + &priv->pkstats.tx_1518_bytes_packets, "TX 1518 Bytes Packets"); + SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_1522_bytes_packets", CTLFLAG_RD, + &priv->pkstats.tx_1522_bytes_packets, "TX 1522 Bytes Packets"); + SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_1548_bytes_packets", CTLFLAG_RD, + &priv->pkstats.tx_1548_bytes_packets, "TX 1548 Bytes Packets"); + SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_gt_1548_bytes_packets", CTLFLAG_RD, + &priv->pkstats.tx_gt_1548_bytes_packets, + "TX Greater Then 1548 Bytes Packets"); + + for (i = 0; i < priv->tx_ring_num; i++) { - tx_ring = &priv->tx_ring[i]; + tx_ring = priv->tx_ring[i]; snprintf(namebuf, sizeof(namebuf), "tx_ring%d", i); ring_node = SYSCTL_ADD_NODE(ctx, node_list, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "TX Ring"); @@ -1504,12 +2559,10 @@ static void mlx4_en_sysctl_stat(struct mlx4_en_priv *priv) CTLFLAG_RD, &tx_ring->packets, "TX packets"); SYSCTL_ADD_ULONG(ctx, ring_list, OID_AUTO, "bytes", CTLFLAG_RD, &tx_ring->bytes, "TX bytes"); - SYSCTL_ADD_ULONG(ctx, ring_list, OID_AUTO, "error", - CTLFLAG_RD, &tx_ring->errors, "TX soft errors"); } for (i = 0; i < priv->rx_ring_num; i++) { - rx_ring = &priv->rx_ring[i]; + rx_ring = priv->rx_ring[i]; snprintf(namebuf, sizeof(namebuf), "rx_ring%d", i); ring_node = SYSCTL_ADD_NODE(ctx, node_list, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "RX Ring"); @@ -1520,153 +2573,5 @@ static void mlx4_en_sysctl_stat(struct mlx4_en_priv *priv) CTLFLAG_RD, &rx_ring->bytes, "RX bytes"); SYSCTL_ADD_ULONG(ctx, ring_list, OID_AUTO, "error", CTLFLAG_RD, &rx_ring->errors, "RX soft errors"); - SYSCTL_ADD_UINT(ctx, ring_list, OID_AUTO, "lro_queued", - CTLFLAG_RD, &rx_ring->lro.lro_queued, 0, "LRO Queued"); - SYSCTL_ADD_UINT(ctx, ring_list, OID_AUTO, "lro_flushed", - CTLFLAG_RD, &rx_ring->lro.lro_flushed, 0, "LRO Flushed"); - } -} - -int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, - struct mlx4_en_port_profile *prof) -{ - static volatile int mlx4_en_unit; - struct net_device *dev; - struct mlx4_en_priv *priv; - uint8_t dev_addr[ETHER_ADDR_LEN]; - int err; - int i; - - priv = kzalloc(sizeof(*priv), GFP_KERNEL); - dev = priv->dev = if_alloc(IFT_ETHER); - if (dev == NULL) { - mlx4_err(mdev, "Net device allocation failed\n"); - kfree(priv); - return -ENOMEM; - } - dev->if_softc = priv; - if_initname(dev, "mlxen", atomic_fetchadd_int(&mlx4_en_unit, 1)); - dev->if_mtu = ETHERMTU; - dev->if_baudrate = 1000000000; - dev->if_init = mlx4_en_init; - dev->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; - dev->if_ioctl = mlx4_en_ioctl; - dev->if_transmit = mlx4_en_transmit; - dev->if_qflush = mlx4_en_qflush; - dev->if_snd.ifq_maxlen = prof->tx_ring_size; - - /* - * Initialize driver private data - */ - priv->dev = dev; - priv->mdev = mdev; - priv->prof = prof; - priv->port = port; - priv->port_up = false; - priv->rx_csum = 1; - priv->flags = prof->flags; - priv->tx_ring_num = prof->tx_ring_num; - priv->rx_ring_num = prof->rx_ring_num; - priv->mac_index = -1; - priv->msg_enable = MLX4_EN_MSG_LEVEL; - priv->ip_reasm = priv->mdev->profile.ip_reasm; - mtx_init(&priv->stats_lock.m, "mlx4 stats", NULL, MTX_DEF); - mtx_init(&priv->vlan_lock.m, "mlx4 vlan", NULL, MTX_DEF); - INIT_WORK(&priv->start_port_task, mlx4_en_lock_and_start_port); - INIT_WORK(&priv->stop_port_task, mlx4_en_lock_and_stop_port); - INIT_WORK(&priv->mcast_task, mlx4_en_do_set_multicast); - INIT_WORK(&priv->watchdog_task, mlx4_en_restart); - INIT_WORK(&priv->linkstate_task, mlx4_en_linkstate); - INIT_DELAYED_WORK(&priv->stats_task, mlx4_en_do_get_stats); - callout_init(&priv->watchdog_timer, 1); - - /* Query for default mac and max mtu */ - priv->max_mtu = mdev->dev->caps.eth_mtu_cap[priv->port]; - priv->mac = mdev->dev->caps.def_mac[priv->port]; - - if (ILLEGAL_MAC(priv->mac)) { - en_err(priv, "Port: %d, invalid mac burned: 0x%llx, quiting\n", - priv->port, (long long)priv->mac); - err = -EINVAL; - goto out; - } - - mlx4_en_sysctl_conf(priv); - - err = mlx4_en_alloc_resources(priv); - if (err) - goto out; - - /* Allocate page for receive rings */ - err = mlx4_alloc_hwq_res(mdev->dev, &priv->res, - MLX4_EN_PAGE_SIZE, MLX4_EN_PAGE_SIZE); - if (err) { - en_err(priv, "Failed to allocate page for rx qps\n"); - goto out; - } - priv->allocated = 1; - - /* - * Set driver features - */ - dev->if_capabilities |= IFCAP_RXCSUM | IFCAP_TXCSUM; - dev->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING; - dev->if_capabilities |= IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWFILTER; - dev->if_capabilities |= IFCAP_LINKSTATE | IFCAP_JUMBO_MTU; - if (mdev->LSO_support) - dev->if_capabilities |= IFCAP_TSO4 | IFCAP_VLAN_HWTSO; - if (mdev->profile.num_lro) - dev->if_capabilities |= IFCAP_LRO; - dev->if_capenable = dev->if_capabilities; - /* - * Setup wake-on-lan. - */ -#if 0 - if (priv->mdev->dev->caps.wol) { - u64 config; - if (mlx4_wol_read(priv->mdev->dev, &config, priv->port) == 0) { - if (config & MLX4_EN_WOL_MAGIC) - dev->if_capabilities |= IFCAP_WOL_MAGIC; - if (config & MLX4_EN_WOL_ENABLED) - dev->if_capenable |= IFCAP_WOL_MAGIC; - } } -#endif - - /* Register for VLAN events */ - priv->vlan_attach = EVENTHANDLER_REGISTER(vlan_config, - mlx4_en_vlan_rx_add_vid, priv, EVENTHANDLER_PRI_FIRST); - priv->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, - mlx4_en_vlan_rx_kill_vid, priv, EVENTHANDLER_PRI_FIRST); - - mdev->pndev[priv->port] = dev; - - priv->last_link_state = MLX4_DEV_EVENT_PORT_DOWN; - if_link_state_change(dev, LINK_STATE_DOWN); - - /* Set default MAC */ - for (i = 0; i < ETHER_ADDR_LEN; i++) - dev_addr[ETHER_ADDR_LEN - 1 - i] = (u8) (priv->mac >> (8 * i)); - - ether_ifattach(dev, dev_addr); - ifmedia_init(&priv->media, IFM_IMASK | IFM_ETH_FMASK, - mlx4_en_media_change, mlx4_en_media_status); - ifmedia_add(&priv->media, IFM_ETHER | IFM_FDX | IFM_1000_T, 0, NULL); - ifmedia_add(&priv->media, IFM_ETHER | IFM_FDX | IFM_10G_SR, 0, NULL); - ifmedia_add(&priv->media, IFM_ETHER | IFM_FDX | IFM_10G_CX4, 0, NULL); - ifmedia_add(&priv->media, IFM_ETHER | IFM_AUTO, 0, NULL); - ifmedia_set(&priv->media, IFM_ETHER | IFM_AUTO); - - en_warn(priv, "Using %d TX rings\n", prof->tx_ring_num); - en_warn(priv, "Using %d RX rings\n", prof->rx_ring_num); - - priv->registered = 1; - queue_delayed_work(mdev->workqueue, &priv->stats_task, STATS_DELAY); - - return 0; - -out: - mlx4_en_destroy_netdev(dev); - return err; } - diff --git a/sys/ofed/drivers/net/mlx4/en_port.c b/sys/ofed/drivers/net/mlx4/en_port.c index 303bb2b..645c9c4 100644 --- a/sys/ofed/drivers/net/mlx4/en_port.c +++ b/sys/ofed/drivers/net/mlx4/en_port.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007 Mellanox Technologies. All rights reserved. + * Copyright (c) 2007, 2014 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -31,28 +31,25 @@ * */ - -#include "mlx4_en.h" - +#include <sys/types.h> #include <linux/if_vlan.h> #include <linux/mlx4/device.h> #include <linux/mlx4/cmd.h> -#if 0 // moved to port.c -int mlx4_SET_MCAST_FLTR(struct mlx4_dev *dev, u8 port, - u64 mac, u64 clear, u8 mode) -{ - return mlx4_cmd(dev, (mac | (clear << 63)), port, mode, - MLX4_CMD_SET_MCAST_FLTR, MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); -} -#endif +#include "en_port.h" +#include "mlx4_en.h" +#define EN_IFQ_MIN_INTERVAL 3000 -int mlx4_SET_VLAN_FLTR(struct mlx4_dev *dev, u8 port, u32 *vlans) + +int mlx4_SET_VLAN_FLTR(struct mlx4_dev *dev, struct mlx4_en_priv *priv) { struct mlx4_cmd_mailbox *mailbox; struct mlx4_set_vlan_fltr_mbox *filter; - int i, j; + int i; + int j; + int index = 0; + u32 entry; int err = 0; mailbox = mlx4_alloc_cmd_mailbox(dev); @@ -60,85 +57,20 @@ int mlx4_SET_VLAN_FLTR(struct mlx4_dev *dev, u8 port, u32 *vlans) return PTR_ERR(mailbox); filter = mailbox->buf; - memset(filter, 0, sizeof *filter); - if (vlans) - for (i = 0, j = VLAN_FLTR_SIZE - 1; i < VLAN_FLTR_SIZE; - i++, j--) - filter->entry[j] = cpu_to_be32(vlans[i]); - err = mlx4_cmd(dev, mailbox->dma, port, 0, MLX4_CMD_SET_VLAN_FLTR, - MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); - mlx4_free_cmd_mailbox(dev, mailbox); - return err; -} - - -#if 0 //moved to port.c - shahark -int mlx4_SET_PORT_general(struct mlx4_dev *dev, u8 port, int mtu, - u8 pptx, u8 pfctx, u8 pprx, u8 pfcrx) -{ - struct mlx4_cmd_mailbox *mailbox; - struct mlx4_set_port_general_context *context; - int err; - u32 in_mod; - - mailbox = mlx4_alloc_cmd_mailbox(dev); - if (IS_ERR(mailbox)) - return PTR_ERR(mailbox); - context = mailbox->buf; - memset(context, 0, sizeof *context); - - context->flags = SET_PORT_GEN_ALL_VALID; - context->mtu = cpu_to_be16(mtu); - context->pptx = (pptx * (!pfctx)) << 7; - context->pfctx = pfctx; - context->pprx = (pprx * (!pfcrx)) << 7; - context->pfcrx = pfcrx; - - in_mod = MLX4_SET_PORT_GENERAL << 8 | port; - err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT, - MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); - - mlx4_free_cmd_mailbox(dev, mailbox); - return err; -} -int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn, - u8 promisc) -{ - - printf("%s %s:%d\n", __func__, __FILE__, __LINE__); - - - - struct mlx4_cmd_mailbox *mailbox; - struct mlx4_set_port_rqp_calc_context *context; - int err; - u32 in_mod; - - mailbox = mlx4_alloc_cmd_mailbox(dev); - if (IS_ERR(mailbox)) - return PTR_ERR(mailbox); - context = mailbox->buf; - memset(context, 0, sizeof *context); - - context->base_qpn = cpu_to_be32(base_qpn); - context->promisc = cpu_to_be32(promisc << SET_PORT_PROMISC_EN_SHIFT | base_qpn); -/* - context->mcast = cpu_to_be32((dev->caps.mc_promisc_mode << - SET_PORT_PROMISC_MODE_SHIFT) | base_qpn); -*/ - context->intra_no_vlan = 0; - context->no_vlan = MLX4_NO_VLAN_IDX; - context->intra_vlan_miss = 0; - context->vlan_miss = MLX4_VLAN_MISS_IDX; - - in_mod = MLX4_SET_PORT_RQP_CALC << 8 | port; - err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT, - MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); - + memset(filter, 0, sizeof(*filter)); + for (i = VLAN_FLTR_SIZE - 1; i >= 0; i--) { + entry = 0; + for (j = 0; j < 32; j++) + if (test_bit(index, priv->active_vlans)) + entry |= 1 << j; + index++; + filter->entry[i] = cpu_to_be32(entry); + } + err = mlx4_cmd(dev, mailbox->dma, priv->port, 0, MLX4_CMD_SET_VLAN_FLTR, + MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED); mlx4_free_cmd_mailbox(dev, mailbox); return err; } -#endif int mlx4_en_QUERY_PORT(struct mlx4_en_dev *mdev, u8 port) { @@ -153,7 +85,8 @@ int mlx4_en_QUERY_PORT(struct mlx4_en_dev *mdev, u8 port) return PTR_ERR(mailbox); memset(mailbox->buf, 0, sizeof(*qport_context)); err = mlx4_cmd_box(mdev->dev, 0, mailbox->dma, port, 0, - MLX4_CMD_QUERY_PORT, MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED); + MLX4_CMD_QUERY_PORT, MLX4_CMD_TIME_CLASS_B, + MLX4_CMD_WRAPPED); if (err) goto out; qport_context = mailbox->buf; @@ -169,95 +102,77 @@ int mlx4_en_QUERY_PORT(struct mlx4_en_dev *mdev, u8 port) case MLX4_EN_10G_SPEED_XFI: state->link_speed = 10000; break; + case MLX4_EN_20G_SPEED: + state->link_speed = 20000; + break; case MLX4_EN_40G_SPEED: state->link_speed = 40000; break; + case MLX4_EN_56G_SPEED: + state->link_speed = 56000; + break; default: state->link_speed = -1; break; } state->transciver = qport_context->transceiver; - if (be32_to_cpu(qport_context->transceiver_code_hi) & 0x400) - state->transciver = 0x80; + state->autoneg = !!(qport_context->autoneg & MLX4_EN_AUTONEG_MASK); out: mlx4_free_cmd_mailbox(mdev->dev, mailbox); return err; } -#if 0 -static int read_iboe_counters(struct mlx4_dev *dev, int index, u64 counters[]) +int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset) { - struct mlx4_cmd_mailbox *mailbox; + struct mlx4_en_stat_out_mbox *mlx4_en_stats; + struct mlx4_en_stat_out_flow_control_mbox *flowstats; + struct mlx4_en_priv *priv = netdev_priv(mdev->pndev[port]); + struct mlx4_en_vport_stats *vport_stats = &priv->vport_stats; + struct mlx4_cmd_mailbox *mailbox = NULL; + struct mlx4_cmd_mailbox *mailbox_flow = NULL; + u64 in_mod = reset << 8 | port; int err; - int mode; - struct mlx4_counters_ext *ext; - struct mlx4_counters *reg; + int i; + int do_if_stat = 1; + unsigned long period = (unsigned long) (jiffies - priv->last_ifq_jiffies); + struct mlx4_en_vport_stats tmp_vport_stats; + struct net_device *dev; - mailbox = mlx4_alloc_cmd_mailbox(dev); - if (IS_ERR(mailbox)) - return -ENOMEM; + if (jiffies_to_msecs(period) < EN_IFQ_MIN_INTERVAL || + priv->counter_index == 0xff) + do_if_stat = 0; - err = mlx4_cmd_box(dev, 0, mailbox->dma, index, 0, - MLX4_CMD_QUERY_IF_STAT, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_WRAPPED); - if (err) - goto out; + mailbox = mlx4_alloc_cmd_mailbox(mdev->dev); + if (IS_ERR(mailbox)) { + err = PTR_ERR(mailbox); + goto mailbox_out; + } - mode = be32_to_cpu(((struct mlx4_counters *)mailbox->buf)->counter_mode) & 0xf; - switch (mode) { - case 0: - reg = mailbox->buf; - counters[0] = be64_to_cpu(reg->rx_frames); - counters[1] = be64_to_cpu(reg->tx_frames); - counters[2] = be64_to_cpu(reg->rx_bytes); - counters[3] = be64_to_cpu(reg->tx_bytes); - break; - case 1: - ext = mailbox->buf; - counters[0] = be64_to_cpu(ext->rx_uni_frames); - counters[1] = be64_to_cpu(ext->tx_uni_frames); - counters[2] = be64_to_cpu(ext->rx_uni_bytes); - counters[3] = be64_to_cpu(ext->tx_uni_bytes); - break; - default: - err = -EINVAL; + mailbox_flow = mlx4_alloc_cmd_mailbox(mdev->dev); + if (IS_ERR(mailbox_flow)) { + mlx4_free_cmd_mailbox(mdev->dev, mailbox); + err = PTR_ERR(mailbox_flow); + goto mailbox_out; } -out: - mlx4_free_cmd_mailbox(dev, mailbox); - return err; -} -#endif + /* 0xffs indicates invalid value */ + memset(mailbox_flow->buf, 0xff, sizeof(*flowstats) * + MLX4_NUM_PRIORITIES); -int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset) -{ - struct mlx4_en_stat_out_mbox *mlx4_en_stats; - struct net_device *dev; - struct mlx4_en_priv *priv; - struct mlx4_cmd_mailbox *mailbox; - u64 in_mod = reset << 8 | port; - unsigned long oerror; - unsigned long ierror; - int err; - int i; - //int counter; - u64 counters[4]; - - dev = mdev->pndev[port]; - priv = netdev_priv(dev); - memset(counters, 0, sizeof counters); - /* - counter = mlx4_get_iboe_counter(priv->mdev->dev, port); - if (counter >= 0) - err = read_iboe_counters(priv->mdev->dev, counter, counters); - */ + if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FLOWSTATS_EN) { + memset(mailbox_flow->buf, 0, sizeof(*flowstats)); + err = mlx4_cmd_box(mdev->dev, 0, mailbox_flow->dma, + in_mod | 1<<12, 0, MLX4_CMD_DUMP_ETH_STATS, + MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); + + if (err) + goto out; + } - mailbox = mlx4_alloc_cmd_mailbox(mdev->dev); - if (IS_ERR(mailbox)) - return PTR_ERR(mailbox); - memset(mailbox->buf, 0, sizeof(*mlx4_en_stats)); err = mlx4_cmd_box(mdev->dev, 0, mailbox->dma, in_mod, 0, - MLX4_CMD_DUMP_ETH_STATS, MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED); + MLX4_CMD_DUMP_ETH_STATS, MLX4_CMD_TIME_CLASS_B, + MLX4_CMD_NATIVE); if (err) goto out; @@ -265,74 +180,394 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset) spin_lock(&priv->stats_lock); - oerror = ierror = 0; - dev->if_ipackets = counters[0]; - dev->if_ibytes = counters[2]; + priv->port_stats.rx_chksum_good = 0; + priv->port_stats.rx_chksum_none = 0; for (i = 0; i < priv->rx_ring_num; i++) { - dev->if_ipackets += priv->rx_ring[i].packets; - dev->if_ibytes += priv->rx_ring[i].bytes; - ierror += priv->rx_ring[i].errors; + priv->port_stats.rx_chksum_good += priv->rx_ring[i]->csum_ok; + priv->port_stats.rx_chksum_none += priv->rx_ring[i]->csum_none; } - dev->if_opackets = counters[1]; - dev->if_obytes = counters[3]; - for (i = 0; i <= priv->tx_ring_num; i++) { - dev->if_opackets += priv->tx_ring[i].packets; - dev->if_obytes += priv->tx_ring[i].bytes; - oerror += priv->tx_ring[i].errors; + + priv->port_stats.tx_chksum_offload = 0; + priv->port_stats.queue_stopped = 0; + priv->port_stats.wake_queue = 0; + for (i = 0; i < priv->tx_ring_num; i++) { + priv->port_stats.tx_chksum_offload += priv->tx_ring[i]->tx_csum; + priv->port_stats.queue_stopped += priv->tx_ring[i]->queue_stopped; + priv->port_stats.wake_queue += priv->tx_ring[i]->wake_queue; + } + /* RX Statistics */ + priv->pkstats.rx_packets = be64_to_cpu(mlx4_en_stats->RTOT_prio_0) + + be64_to_cpu(mlx4_en_stats->RTOT_prio_1) + + be64_to_cpu(mlx4_en_stats->RTOT_prio_2) + + be64_to_cpu(mlx4_en_stats->RTOT_prio_3) + + be64_to_cpu(mlx4_en_stats->RTOT_prio_4) + + be64_to_cpu(mlx4_en_stats->RTOT_prio_5) + + be64_to_cpu(mlx4_en_stats->RTOT_prio_6) + + be64_to_cpu(mlx4_en_stats->RTOT_prio_7) + + be64_to_cpu(mlx4_en_stats->RTOT_novlan); + priv->pkstats.rx_bytes = be64_to_cpu(mlx4_en_stats->ROCT_prio_0) + + be64_to_cpu(mlx4_en_stats->ROCT_prio_1) + + be64_to_cpu(mlx4_en_stats->ROCT_prio_2) + + be64_to_cpu(mlx4_en_stats->ROCT_prio_3) + + be64_to_cpu(mlx4_en_stats->ROCT_prio_4) + + be64_to_cpu(mlx4_en_stats->ROCT_prio_5) + + be64_to_cpu(mlx4_en_stats->ROCT_prio_6) + + be64_to_cpu(mlx4_en_stats->ROCT_prio_7) + + be64_to_cpu(mlx4_en_stats->ROCT_novlan); + priv->pkstats.rx_multicast_packets = be64_to_cpu(mlx4_en_stats->MCAST_prio_0) + + be64_to_cpu(mlx4_en_stats->MCAST_prio_1) + + be64_to_cpu(mlx4_en_stats->MCAST_prio_2) + + be64_to_cpu(mlx4_en_stats->MCAST_prio_3) + + be64_to_cpu(mlx4_en_stats->MCAST_prio_4) + + be64_to_cpu(mlx4_en_stats->MCAST_prio_5) + + be64_to_cpu(mlx4_en_stats->MCAST_prio_6) + + be64_to_cpu(mlx4_en_stats->MCAST_prio_7) + + be64_to_cpu(mlx4_en_stats->MCAST_novlan); + priv->pkstats.rx_broadcast_packets = be64_to_cpu(mlx4_en_stats->RBCAST_prio_0) + + be64_to_cpu(mlx4_en_stats->RBCAST_prio_1) + + be64_to_cpu(mlx4_en_stats->RBCAST_prio_2) + + be64_to_cpu(mlx4_en_stats->RBCAST_prio_3) + + be64_to_cpu(mlx4_en_stats->RBCAST_prio_4) + + be64_to_cpu(mlx4_en_stats->RBCAST_prio_5) + + be64_to_cpu(mlx4_en_stats->RBCAST_prio_6) + + be64_to_cpu(mlx4_en_stats->RBCAST_prio_7) + + be64_to_cpu(mlx4_en_stats->RBCAST_novlan); + priv->pkstats.rx_errors = be64_to_cpu(mlx4_en_stats->PCS) + + be32_to_cpu(mlx4_en_stats->RJBBR) + + be32_to_cpu(mlx4_en_stats->RCRC) + + be32_to_cpu(mlx4_en_stats->RRUNT) + + be64_to_cpu(mlx4_en_stats->RInRangeLengthErr) + + be64_to_cpu(mlx4_en_stats->ROutRangeLengthErr) + + be32_to_cpu(mlx4_en_stats->RSHORT) + + be64_to_cpu(mlx4_en_stats->RGIANT_prio_0) + + be64_to_cpu(mlx4_en_stats->RGIANT_prio_1) + + be64_to_cpu(mlx4_en_stats->RGIANT_prio_2) + + be64_to_cpu(mlx4_en_stats->RGIANT_prio_3) + + be64_to_cpu(mlx4_en_stats->RGIANT_prio_4) + + be64_to_cpu(mlx4_en_stats->RGIANT_prio_5) + + be64_to_cpu(mlx4_en_stats->RGIANT_prio_6) + + be64_to_cpu(mlx4_en_stats->RGIANT_prio_7) + + be64_to_cpu(mlx4_en_stats->RGIANT_novlan); + priv->pkstats.rx_dropped = be32_to_cpu(mlx4_en_stats->RdropOvflw); + priv->pkstats.rx_length_errors = be32_to_cpu(mlx4_en_stats->RdropLength); + priv->pkstats.rx_over_errors = be32_to_cpu(mlx4_en_stats->RdropOvflw); + priv->pkstats.rx_crc_errors = be32_to_cpu(mlx4_en_stats->RCRC); + priv->pkstats.rx_jabbers = be32_to_cpu(mlx4_en_stats->RJBBR); + priv->pkstats.rx_in_range_length_error = be64_to_cpu(mlx4_en_stats->RInRangeLengthErr); + priv->pkstats.rx_out_range_length_error = be64_to_cpu(mlx4_en_stats->ROutRangeLengthErr); + priv->pkstats.rx_lt_64_bytes_packets = be64_to_cpu(mlx4_en_stats->R64_prio_0) + + be64_to_cpu(mlx4_en_stats->R64_prio_1) + + be64_to_cpu(mlx4_en_stats->R64_prio_2) + + be64_to_cpu(mlx4_en_stats->R64_prio_3) + + be64_to_cpu(mlx4_en_stats->R64_prio_4) + + be64_to_cpu(mlx4_en_stats->R64_prio_5) + + be64_to_cpu(mlx4_en_stats->R64_prio_6) + + be64_to_cpu(mlx4_en_stats->R64_prio_7) + + be64_to_cpu(mlx4_en_stats->R64_novlan); + priv->pkstats.rx_127_bytes_packets = be64_to_cpu(mlx4_en_stats->R127_prio_0) + + be64_to_cpu(mlx4_en_stats->R127_prio_1) + + be64_to_cpu(mlx4_en_stats->R127_prio_2) + + be64_to_cpu(mlx4_en_stats->R127_prio_3) + + be64_to_cpu(mlx4_en_stats->R127_prio_4) + + be64_to_cpu(mlx4_en_stats->R127_prio_5) + + be64_to_cpu(mlx4_en_stats->R127_prio_6) + + be64_to_cpu(mlx4_en_stats->R127_prio_7) + + be64_to_cpu(mlx4_en_stats->R127_novlan); + priv->pkstats.rx_255_bytes_packets = be64_to_cpu(mlx4_en_stats->R255_prio_0) + + be64_to_cpu(mlx4_en_stats->R255_prio_1) + + be64_to_cpu(mlx4_en_stats->R255_prio_2) + + be64_to_cpu(mlx4_en_stats->R255_prio_3) + + be64_to_cpu(mlx4_en_stats->R255_prio_4) + + be64_to_cpu(mlx4_en_stats->R255_prio_5) + + be64_to_cpu(mlx4_en_stats->R255_prio_6) + + be64_to_cpu(mlx4_en_stats->R255_prio_7) + + be64_to_cpu(mlx4_en_stats->R255_novlan); + priv->pkstats.rx_511_bytes_packets = be64_to_cpu(mlx4_en_stats->R511_prio_0) + + be64_to_cpu(mlx4_en_stats->R511_prio_1) + + be64_to_cpu(mlx4_en_stats->R511_prio_2) + + be64_to_cpu(mlx4_en_stats->R511_prio_3) + + be64_to_cpu(mlx4_en_stats->R511_prio_4) + + be64_to_cpu(mlx4_en_stats->R511_prio_5) + + be64_to_cpu(mlx4_en_stats->R511_prio_6) + + be64_to_cpu(mlx4_en_stats->R511_prio_7) + + be64_to_cpu(mlx4_en_stats->R511_novlan); + priv->pkstats.rx_1023_bytes_packets = be64_to_cpu(mlx4_en_stats->R1023_prio_0) + + be64_to_cpu(mlx4_en_stats->R1023_prio_1) + + be64_to_cpu(mlx4_en_stats->R1023_prio_2) + + be64_to_cpu(mlx4_en_stats->R1023_prio_3) + + be64_to_cpu(mlx4_en_stats->R1023_prio_4) + + be64_to_cpu(mlx4_en_stats->R1023_prio_5) + + be64_to_cpu(mlx4_en_stats->R1023_prio_6) + + be64_to_cpu(mlx4_en_stats->R1023_prio_7) + + be64_to_cpu(mlx4_en_stats->R1023_novlan); + priv->pkstats.rx_1518_bytes_packets = be64_to_cpu(mlx4_en_stats->R1518_prio_0) + + be64_to_cpu(mlx4_en_stats->R1518_prio_1) + + be64_to_cpu(mlx4_en_stats->R1518_prio_2) + + be64_to_cpu(mlx4_en_stats->R1518_prio_3) + + be64_to_cpu(mlx4_en_stats->R1518_prio_4) + + be64_to_cpu(mlx4_en_stats->R1518_prio_5) + + be64_to_cpu(mlx4_en_stats->R1518_prio_6) + + be64_to_cpu(mlx4_en_stats->R1518_prio_7) + + be64_to_cpu(mlx4_en_stats->R1518_novlan); + priv->pkstats.rx_1522_bytes_packets = be64_to_cpu(mlx4_en_stats->R1522_prio_0) + + be64_to_cpu(mlx4_en_stats->R1522_prio_1) + + be64_to_cpu(mlx4_en_stats->R1522_prio_2) + + be64_to_cpu(mlx4_en_stats->R1522_prio_3) + + be64_to_cpu(mlx4_en_stats->R1522_prio_4) + + be64_to_cpu(mlx4_en_stats->R1522_prio_5) + + be64_to_cpu(mlx4_en_stats->R1522_prio_6) + + be64_to_cpu(mlx4_en_stats->R1522_prio_7) + + be64_to_cpu(mlx4_en_stats->R1522_novlan); + priv->pkstats.rx_1548_bytes_packets = be64_to_cpu(mlx4_en_stats->R1548_prio_0) + + be64_to_cpu(mlx4_en_stats->R1548_prio_1) + + be64_to_cpu(mlx4_en_stats->R1548_prio_2) + + be64_to_cpu(mlx4_en_stats->R1548_prio_3) + + be64_to_cpu(mlx4_en_stats->R1548_prio_4) + + be64_to_cpu(mlx4_en_stats->R1548_prio_5) + + be64_to_cpu(mlx4_en_stats->R1548_prio_6) + + be64_to_cpu(mlx4_en_stats->R1548_prio_7) + + be64_to_cpu(mlx4_en_stats->R1548_novlan); + priv->pkstats.rx_gt_1548_bytes_packets = be64_to_cpu(mlx4_en_stats->R2MTU_prio_0) + + be64_to_cpu(mlx4_en_stats->R2MTU_prio_1) + + be64_to_cpu(mlx4_en_stats->R2MTU_prio_2) + + be64_to_cpu(mlx4_en_stats->R2MTU_prio_3) + + be64_to_cpu(mlx4_en_stats->R2MTU_prio_4) + + be64_to_cpu(mlx4_en_stats->R2MTU_prio_5) + + be64_to_cpu(mlx4_en_stats->R2MTU_prio_6) + + be64_to_cpu(mlx4_en_stats->R2MTU_prio_7) + + be64_to_cpu(mlx4_en_stats->R2MTU_novlan); + + /* Tx Stats */ + priv->pkstats.tx_packets = be64_to_cpu(mlx4_en_stats->TTOT_prio_0) + + be64_to_cpu(mlx4_en_stats->TTOT_prio_1) + + be64_to_cpu(mlx4_en_stats->TTOT_prio_2) + + be64_to_cpu(mlx4_en_stats->TTOT_prio_3) + + be64_to_cpu(mlx4_en_stats->TTOT_prio_4) + + be64_to_cpu(mlx4_en_stats->TTOT_prio_5) + + be64_to_cpu(mlx4_en_stats->TTOT_prio_6) + + be64_to_cpu(mlx4_en_stats->TTOT_prio_7) + + be64_to_cpu(mlx4_en_stats->TTOT_novlan); + priv->pkstats.tx_bytes = be64_to_cpu(mlx4_en_stats->TOCT_prio_0) + + be64_to_cpu(mlx4_en_stats->TOCT_prio_1) + + be64_to_cpu(mlx4_en_stats->TOCT_prio_2) + + be64_to_cpu(mlx4_en_stats->TOCT_prio_3) + + be64_to_cpu(mlx4_en_stats->TOCT_prio_4) + + be64_to_cpu(mlx4_en_stats->TOCT_prio_5) + + be64_to_cpu(mlx4_en_stats->TOCT_prio_6) + + be64_to_cpu(mlx4_en_stats->TOCT_prio_7) + + be64_to_cpu(mlx4_en_stats->TOCT_novlan); + priv->pkstats.tx_multicast_packets = be64_to_cpu(mlx4_en_stats->TMCAST_prio_0) + + be64_to_cpu(mlx4_en_stats->TMCAST_prio_1) + + be64_to_cpu(mlx4_en_stats->TMCAST_prio_2) + + be64_to_cpu(mlx4_en_stats->TMCAST_prio_3) + + be64_to_cpu(mlx4_en_stats->TMCAST_prio_4) + + be64_to_cpu(mlx4_en_stats->TMCAST_prio_5) + + be64_to_cpu(mlx4_en_stats->TMCAST_prio_6) + + be64_to_cpu(mlx4_en_stats->TMCAST_prio_7) + + be64_to_cpu(mlx4_en_stats->TMCAST_novlan); + priv->pkstats.tx_broadcast_packets = be64_to_cpu(mlx4_en_stats->TBCAST_prio_0) + + be64_to_cpu(mlx4_en_stats->TBCAST_prio_1) + + be64_to_cpu(mlx4_en_stats->TBCAST_prio_2) + + be64_to_cpu(mlx4_en_stats->TBCAST_prio_3) + + be64_to_cpu(mlx4_en_stats->TBCAST_prio_4) + + be64_to_cpu(mlx4_en_stats->TBCAST_prio_5) + + be64_to_cpu(mlx4_en_stats->TBCAST_prio_6) + + be64_to_cpu(mlx4_en_stats->TBCAST_prio_7) + + be64_to_cpu(mlx4_en_stats->TBCAST_novlan); + priv->pkstats.tx_errors = be64_to_cpu(mlx4_en_stats->TGIANT_prio_0) + + be64_to_cpu(mlx4_en_stats->TGIANT_prio_1) + + be64_to_cpu(mlx4_en_stats->TGIANT_prio_2) + + be64_to_cpu(mlx4_en_stats->TGIANT_prio_3) + + be64_to_cpu(mlx4_en_stats->TGIANT_prio_4) + + be64_to_cpu(mlx4_en_stats->TGIANT_prio_5) + + be64_to_cpu(mlx4_en_stats->TGIANT_prio_6) + + be64_to_cpu(mlx4_en_stats->TGIANT_prio_7) + + be64_to_cpu(mlx4_en_stats->TGIANT_novlan); + priv->pkstats.tx_dropped = be32_to_cpu(mlx4_en_stats->TDROP) - + priv->pkstats.tx_errors; + priv->pkstats.tx_lt_64_bytes_packets = be64_to_cpu(mlx4_en_stats->T64_prio_0) + + be64_to_cpu(mlx4_en_stats->T64_prio_1) + + be64_to_cpu(mlx4_en_stats->T64_prio_2) + + be64_to_cpu(mlx4_en_stats->T64_prio_3) + + be64_to_cpu(mlx4_en_stats->T64_prio_4) + + be64_to_cpu(mlx4_en_stats->T64_prio_5) + + be64_to_cpu(mlx4_en_stats->T64_prio_6) + + be64_to_cpu(mlx4_en_stats->T64_prio_7) + + be64_to_cpu(mlx4_en_stats->T64_novlan); + priv->pkstats.tx_127_bytes_packets = be64_to_cpu(mlx4_en_stats->T127_prio_0) + + be64_to_cpu(mlx4_en_stats->T127_prio_1) + + be64_to_cpu(mlx4_en_stats->T127_prio_2) + + be64_to_cpu(mlx4_en_stats->T127_prio_3) + + be64_to_cpu(mlx4_en_stats->T127_prio_4) + + be64_to_cpu(mlx4_en_stats->T127_prio_5) + + be64_to_cpu(mlx4_en_stats->T127_prio_6) + + be64_to_cpu(mlx4_en_stats->T127_prio_7) + + be64_to_cpu(mlx4_en_stats->T127_novlan); + priv->pkstats.tx_255_bytes_packets = be64_to_cpu(mlx4_en_stats->T255_prio_0) + + be64_to_cpu(mlx4_en_stats->T255_prio_1) + + be64_to_cpu(mlx4_en_stats->T255_prio_2) + + be64_to_cpu(mlx4_en_stats->T255_prio_3) + + be64_to_cpu(mlx4_en_stats->T255_prio_4) + + be64_to_cpu(mlx4_en_stats->T255_prio_5) + + be64_to_cpu(mlx4_en_stats->T255_prio_6) + + be64_to_cpu(mlx4_en_stats->T255_prio_7) + + be64_to_cpu(mlx4_en_stats->T255_novlan); + priv->pkstats.tx_511_bytes_packets = be64_to_cpu(mlx4_en_stats->T511_prio_0) + + be64_to_cpu(mlx4_en_stats->T511_prio_1) + + be64_to_cpu(mlx4_en_stats->T511_prio_2) + + be64_to_cpu(mlx4_en_stats->T511_prio_3) + + be64_to_cpu(mlx4_en_stats->T511_prio_4) + + be64_to_cpu(mlx4_en_stats->T511_prio_5) + + be64_to_cpu(mlx4_en_stats->T511_prio_6) + + be64_to_cpu(mlx4_en_stats->T511_prio_7) + + be64_to_cpu(mlx4_en_stats->T511_novlan); + priv->pkstats.tx_1023_bytes_packets = be64_to_cpu(mlx4_en_stats->T1023_prio_0) + + be64_to_cpu(mlx4_en_stats->T1023_prio_1) + + be64_to_cpu(mlx4_en_stats->T1023_prio_2) + + be64_to_cpu(mlx4_en_stats->T1023_prio_3) + + be64_to_cpu(mlx4_en_stats->T1023_prio_4) + + be64_to_cpu(mlx4_en_stats->T1023_prio_5) + + be64_to_cpu(mlx4_en_stats->T1023_prio_6) + + be64_to_cpu(mlx4_en_stats->T1023_prio_7) + + be64_to_cpu(mlx4_en_stats->T1023_novlan); + priv->pkstats.tx_1518_bytes_packets = be64_to_cpu(mlx4_en_stats->T1518_prio_0) + + be64_to_cpu(mlx4_en_stats->T1518_prio_1) + + be64_to_cpu(mlx4_en_stats->T1518_prio_2) + + be64_to_cpu(mlx4_en_stats->T1518_prio_3) + + be64_to_cpu(mlx4_en_stats->T1518_prio_4) + + be64_to_cpu(mlx4_en_stats->T1518_prio_5) + + be64_to_cpu(mlx4_en_stats->T1518_prio_6) + + be64_to_cpu(mlx4_en_stats->T1518_prio_7) + + be64_to_cpu(mlx4_en_stats->T1518_novlan); + priv->pkstats.tx_1522_bytes_packets = be64_to_cpu(mlx4_en_stats->T1522_prio_0) + + be64_to_cpu(mlx4_en_stats->T1522_prio_1) + + be64_to_cpu(mlx4_en_stats->T1522_prio_2) + + be64_to_cpu(mlx4_en_stats->T1522_prio_3) + + be64_to_cpu(mlx4_en_stats->T1522_prio_4) + + be64_to_cpu(mlx4_en_stats->T1522_prio_5) + + be64_to_cpu(mlx4_en_stats->T1522_prio_6) + + be64_to_cpu(mlx4_en_stats->T1522_prio_7) + + be64_to_cpu(mlx4_en_stats->T1522_novlan); + priv->pkstats.tx_1548_bytes_packets = be64_to_cpu(mlx4_en_stats->T1548_prio_0) + + be64_to_cpu(mlx4_en_stats->T1548_prio_1) + + be64_to_cpu(mlx4_en_stats->T1548_prio_2) + + be64_to_cpu(mlx4_en_stats->T1548_prio_3) + + be64_to_cpu(mlx4_en_stats->T1548_prio_4) + + be64_to_cpu(mlx4_en_stats->T1548_prio_5) + + be64_to_cpu(mlx4_en_stats->T1548_prio_6) + + be64_to_cpu(mlx4_en_stats->T1548_prio_7) + + be64_to_cpu(mlx4_en_stats->T1548_novlan); + priv->pkstats.tx_gt_1548_bytes_packets = be64_to_cpu(mlx4_en_stats->T2MTU_prio_0) + + be64_to_cpu(mlx4_en_stats->T2MTU_prio_1) + + be64_to_cpu(mlx4_en_stats->T2MTU_prio_2) + + be64_to_cpu(mlx4_en_stats->T2MTU_prio_3) + + be64_to_cpu(mlx4_en_stats->T2MTU_prio_4) + + be64_to_cpu(mlx4_en_stats->T2MTU_prio_5) + + be64_to_cpu(mlx4_en_stats->T2MTU_prio_6) + + be64_to_cpu(mlx4_en_stats->T2MTU_prio_7) + + be64_to_cpu(mlx4_en_stats->T2MTU_novlan); + + priv->pkstats.rx_prio[0][0] = be64_to_cpu(mlx4_en_stats->RTOT_prio_0); + priv->pkstats.rx_prio[0][1] = be64_to_cpu(mlx4_en_stats->ROCT_prio_0); + priv->pkstats.rx_prio[1][0] = be64_to_cpu(mlx4_en_stats->RTOT_prio_1); + priv->pkstats.rx_prio[1][1] = be64_to_cpu(mlx4_en_stats->ROCT_prio_1); + priv->pkstats.rx_prio[2][0] = be64_to_cpu(mlx4_en_stats->RTOT_prio_2); + priv->pkstats.rx_prio[2][1] = be64_to_cpu(mlx4_en_stats->ROCT_prio_2); + priv->pkstats.rx_prio[3][0] = be64_to_cpu(mlx4_en_stats->RTOT_prio_3); + priv->pkstats.rx_prio[3][1] = be64_to_cpu(mlx4_en_stats->ROCT_prio_3); + priv->pkstats.rx_prio[4][0] = be64_to_cpu(mlx4_en_stats->RTOT_prio_4); + priv->pkstats.rx_prio[4][1] = be64_to_cpu(mlx4_en_stats->ROCT_prio_4); + priv->pkstats.rx_prio[5][0] = be64_to_cpu(mlx4_en_stats->RTOT_prio_5); + priv->pkstats.rx_prio[5][1] = be64_to_cpu(mlx4_en_stats->ROCT_prio_5); + priv->pkstats.rx_prio[6][0] = be64_to_cpu(mlx4_en_stats->RTOT_prio_6); + priv->pkstats.rx_prio[6][1] = be64_to_cpu(mlx4_en_stats->ROCT_prio_6); + priv->pkstats.rx_prio[7][0] = be64_to_cpu(mlx4_en_stats->RTOT_prio_7); + priv->pkstats.rx_prio[7][1] = be64_to_cpu(mlx4_en_stats->ROCT_prio_7); + priv->pkstats.rx_prio[8][0] = be64_to_cpu(mlx4_en_stats->RTOT_novlan); + priv->pkstats.rx_prio[8][1] = be64_to_cpu(mlx4_en_stats->ROCT_novlan); + priv->pkstats.tx_prio[0][0] = be64_to_cpu(mlx4_en_stats->TTOT_prio_0); + priv->pkstats.tx_prio[0][1] = be64_to_cpu(mlx4_en_stats->TOCT_prio_0); + priv->pkstats.tx_prio[1][0] = be64_to_cpu(mlx4_en_stats->TTOT_prio_1); + priv->pkstats.tx_prio[1][1] = be64_to_cpu(mlx4_en_stats->TOCT_prio_1); + priv->pkstats.tx_prio[2][0] = be64_to_cpu(mlx4_en_stats->TTOT_prio_2); + priv->pkstats.tx_prio[2][1] = be64_to_cpu(mlx4_en_stats->TOCT_prio_2); + priv->pkstats.tx_prio[3][0] = be64_to_cpu(mlx4_en_stats->TTOT_prio_3); + priv->pkstats.tx_prio[3][1] = be64_to_cpu(mlx4_en_stats->TOCT_prio_3); + priv->pkstats.tx_prio[4][0] = be64_to_cpu(mlx4_en_stats->TTOT_prio_4); + priv->pkstats.tx_prio[4][1] = be64_to_cpu(mlx4_en_stats->TOCT_prio_4); + priv->pkstats.tx_prio[5][0] = be64_to_cpu(mlx4_en_stats->TTOT_prio_5); + priv->pkstats.tx_prio[5][1] = be64_to_cpu(mlx4_en_stats->TOCT_prio_5); + priv->pkstats.tx_prio[6][0] = be64_to_cpu(mlx4_en_stats->TTOT_prio_6); + priv->pkstats.tx_prio[6][1] = be64_to_cpu(mlx4_en_stats->TOCT_prio_6); + priv->pkstats.tx_prio[7][0] = be64_to_cpu(mlx4_en_stats->TTOT_prio_7); + priv->pkstats.tx_prio[7][1] = be64_to_cpu(mlx4_en_stats->TOCT_prio_7); + priv->pkstats.tx_prio[8][0] = be64_to_cpu(mlx4_en_stats->TTOT_novlan); + priv->pkstats.tx_prio[8][1] = be64_to_cpu(mlx4_en_stats->TOCT_novlan); + + flowstats = mailbox_flow->buf; + + for (i = 0; i < MLX4_NUM_PRIORITIES; i++) { + priv->flowstats[i].rx_pause = + be64_to_cpu(flowstats[i].rx_pause); + priv->flowstats[i].rx_pause_duration = + be64_to_cpu(flowstats[i].rx_pause_duration); + priv->flowstats[i].rx_pause_transition = + be64_to_cpu(flowstats[i].rx_pause_transition); + priv->flowstats[i].tx_pause = + be64_to_cpu(flowstats[i].tx_pause); + priv->flowstats[i].tx_pause_duration = + be64_to_cpu(flowstats[i].tx_pause_duration); + priv->flowstats[i].tx_pause_transition = + be64_to_cpu(flowstats[i].tx_pause_transition); + } + + memset(&tmp_vport_stats, 0, sizeof(tmp_vport_stats)); + spin_unlock(&priv->stats_lock); + err = mlx4_get_vport_ethtool_stats(mdev->dev, port, + &tmp_vport_stats, reset); + spin_lock(&priv->stats_lock); + if (!err) { + /* ethtool stats format */ + vport_stats->rx_unicast_packets = tmp_vport_stats.rx_unicast_packets; + vport_stats->rx_unicast_bytes = tmp_vport_stats.rx_unicast_bytes; + vport_stats->rx_multicast_packets = tmp_vport_stats.rx_multicast_packets; + vport_stats->rx_multicast_bytes = tmp_vport_stats.rx_multicast_bytes; + vport_stats->rx_broadcast_packets = tmp_vport_stats.rx_broadcast_packets; + vport_stats->rx_broadcast_bytes = tmp_vport_stats.rx_broadcast_bytes; + vport_stats->rx_dropped = tmp_vport_stats.rx_dropped; + vport_stats->rx_errors = tmp_vport_stats.rx_errors; + vport_stats->tx_unicast_packets = tmp_vport_stats.tx_unicast_packets; + vport_stats->tx_unicast_bytes = tmp_vport_stats.tx_unicast_bytes; + vport_stats->tx_multicast_packets = tmp_vport_stats.tx_multicast_packets; + vport_stats->tx_multicast_bytes = tmp_vport_stats.tx_multicast_bytes; + vport_stats->tx_broadcast_packets = tmp_vport_stats.tx_broadcast_packets; + vport_stats->tx_broadcast_bytes = tmp_vport_stats.tx_broadcast_bytes; + vport_stats->tx_errors = tmp_vport_stats.tx_errors; + } + + if (!mlx4_is_mfunc(mdev->dev)) { + /* netdevice stats format */ + dev = mdev->pndev[port]; + dev->if_ipackets = priv->pkstats.rx_packets; + dev->if_opackets = priv->pkstats.tx_packets; + dev->if_ibytes = priv->pkstats.rx_bytes; + dev->if_obytes = priv->pkstats.tx_bytes; + dev->if_ierrors = priv->pkstats.rx_errors; + dev->if_iqdrops = priv->pkstats.rx_dropped; + dev->if_imcasts = priv->pkstats.rx_multicast_packets; + dev->if_omcasts = priv->pkstats.tx_multicast_packets; + dev->if_collisions = 0; } - dev->if_ierrors = be32_to_cpu(mlx4_en_stats->RDROP) + ierror; - dev->if_oerrors = be32_to_cpu(mlx4_en_stats->TDROP) + oerror; - dev->if_imcasts = be64_to_cpu(mlx4_en_stats->MCAST_prio_0) + - be64_to_cpu(mlx4_en_stats->MCAST_prio_1) + - be64_to_cpu(mlx4_en_stats->MCAST_prio_2) + - be64_to_cpu(mlx4_en_stats->MCAST_prio_3) + - be64_to_cpu(mlx4_en_stats->MCAST_prio_4) + - be64_to_cpu(mlx4_en_stats->MCAST_prio_5) + - be64_to_cpu(mlx4_en_stats->MCAST_prio_6) + - be64_to_cpu(mlx4_en_stats->MCAST_prio_7) + - be64_to_cpu(mlx4_en_stats->MCAST_novlan); - dev->if_omcasts = be64_to_cpu(mlx4_en_stats->TMCAST_prio_0) + - be64_to_cpu(mlx4_en_stats->TMCAST_prio_1) + - be64_to_cpu(mlx4_en_stats->TMCAST_prio_2) + - be64_to_cpu(mlx4_en_stats->TMCAST_prio_3) + - be64_to_cpu(mlx4_en_stats->TMCAST_prio_4) + - be64_to_cpu(mlx4_en_stats->TMCAST_prio_5) + - be64_to_cpu(mlx4_en_stats->TMCAST_prio_6) + - be64_to_cpu(mlx4_en_stats->TMCAST_prio_7) + - be64_to_cpu(mlx4_en_stats->TMCAST_novlan); - dev->if_collisions = 0; - - priv->pkstats.broadcast = - be64_to_cpu(mlx4_en_stats->RBCAST_prio_0) + - be64_to_cpu(mlx4_en_stats->RBCAST_prio_1) + - be64_to_cpu(mlx4_en_stats->RBCAST_prio_2) + - be64_to_cpu(mlx4_en_stats->RBCAST_prio_3) + - be64_to_cpu(mlx4_en_stats->RBCAST_prio_4) + - be64_to_cpu(mlx4_en_stats->RBCAST_prio_5) + - be64_to_cpu(mlx4_en_stats->RBCAST_prio_6) + - be64_to_cpu(mlx4_en_stats->RBCAST_prio_7) + - be64_to_cpu(mlx4_en_stats->RBCAST_novlan); - priv->pkstats.rx_prio[0] = be64_to_cpu(mlx4_en_stats->RTOT_prio_0); - priv->pkstats.rx_prio[1] = be64_to_cpu(mlx4_en_stats->RTOT_prio_1); - priv->pkstats.rx_prio[2] = be64_to_cpu(mlx4_en_stats->RTOT_prio_2); - priv->pkstats.rx_prio[3] = be64_to_cpu(mlx4_en_stats->RTOT_prio_3); - priv->pkstats.rx_prio[4] = be64_to_cpu(mlx4_en_stats->RTOT_prio_4); - priv->pkstats.rx_prio[5] = be64_to_cpu(mlx4_en_stats->RTOT_prio_5); - priv->pkstats.rx_prio[6] = be64_to_cpu(mlx4_en_stats->RTOT_prio_6); - priv->pkstats.rx_prio[7] = be64_to_cpu(mlx4_en_stats->RTOT_prio_7); - priv->pkstats.tx_prio[0] = be64_to_cpu(mlx4_en_stats->TTOT_prio_0); - priv->pkstats.tx_prio[1] = be64_to_cpu(mlx4_en_stats->TTOT_prio_1); - priv->pkstats.tx_prio[2] = be64_to_cpu(mlx4_en_stats->TTOT_prio_2); - priv->pkstats.tx_prio[3] = be64_to_cpu(mlx4_en_stats->TTOT_prio_3); - priv->pkstats.tx_prio[4] = be64_to_cpu(mlx4_en_stats->TTOT_prio_4); - priv->pkstats.tx_prio[5] = be64_to_cpu(mlx4_en_stats->TTOT_prio_5); - priv->pkstats.tx_prio[6] = be64_to_cpu(mlx4_en_stats->TTOT_prio_6); - priv->pkstats.tx_prio[7] = be64_to_cpu(mlx4_en_stats->TTOT_prio_7); spin_unlock(&priv->stats_lock); out: + mlx4_free_cmd_mailbox(mdev->dev, mailbox_flow); mlx4_free_cmd_mailbox(mdev->dev, mailbox); + +mailbox_out: + if (do_if_stat) + priv->last_ifq_jiffies = jiffies; + return err; } - diff --git a/sys/ofed/drivers/net/mlx4/en_port.h b/sys/ofed/drivers/net/mlx4/en_port.h index 5319814..6301717 100644 --- a/sys/ofed/drivers/net/mlx4/en_port.h +++ b/sys/ofed/drivers/net/mlx4/en_port.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007 Mellanox Technologies. All rights reserved. + * Copyright (c) 2007, 2014 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -36,39 +36,10 @@ #define SET_PORT_GEN_ALL_VALID 0x7 -#define SET_PORT_PROMISC_EN_SHIFT 31 -#define SET_PORT_PROMISC_MODE_SHIFT 30 +#define SET_PORT_PROMISC_SHIFT 31 +#define SET_PORT_MC_PROMISC_SHIFT 30 -#if 0 //moved to port.c - shahark - -struct mlx4_set_port_general_context { - u8 reserved[3]; - u8 flags; - u16 reserved2; - __be16 mtu; - u8 pptx; - u8 pfctx; - u16 reserved3; - u8 pprx; - u8 pfcrx; - u16 reserved4; -}; - -struct mlx4_set_port_rqp_calc_context { - __be32 base_qpn; - __be32 flags; - u8 reserved[3]; - u8 mac_miss; - u8 intra_no_vlan; - u8 no_vlan; - u8 intra_vlan_miss; - u8 vlan_miss; - u8 reserved2[3]; - u8 no_vlan_prio; - __be32 promisc; - __be32 mcast; -}; -#endif +#define MLX4_EN_NUM_TC 8 #define VLAN_FLTR_SIZE 128 struct mlx4_set_vlan_fltr_mbox { @@ -83,29 +54,27 @@ enum { }; enum { - MLX4_EN_1G_SPEED = 0x02, - MLX4_EN_10G_SPEED_XFI = 0x01, MLX4_EN_10G_SPEED_XAUI = 0x00, + MLX4_EN_10G_SPEED_XFI = 0x01, + MLX4_EN_1G_SPEED = 0x02, + MLX4_EN_20G_SPEED = 0x08, MLX4_EN_40G_SPEED = 0x40, + MLX4_EN_56G_SPEED = 0x20, MLX4_EN_OTHER_SPEED = 0x0f, }; struct mlx4_en_query_port_context { u8 link_up; #define MLX4_EN_LINK_UP_MASK 0x80 - u8 reserved; + u8 autoneg; +#define MLX4_EN_AUTONEG_MASK 0x80 __be16 mtu; u8 reserved2; u8 link_speed; -#define MLX4_EN_SPEED_MASK 0x43 +#define MLX4_EN_SPEED_MASK 0x6b u16 reserved3[5]; __be64 mac; u8 transceiver; - u8 reserved4[3]; - __be32 wavelenth; - u32 reserved5; - __be32 transceiver_code_hi; - __be32 transceiver_code_low; }; @@ -590,6 +559,5 @@ struct mlx4_en_stat_out_mbox { __be32 TDROP; }; -enum mlx4_query_reply mlx4_en_query(void *endev_ptr, void *int_dev); #endif diff --git a/sys/ofed/drivers/net/mlx4/en_resources.c b/sys/ofed/drivers/net/mlx4/en_resources.c index a147153..669ecbd 100644 --- a/sys/ofed/drivers/net/mlx4/en_resources.c +++ b/sys/ofed/drivers/net/mlx4/en_resources.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007 Mellanox Technologies. All rights reserved. + * Copyright (c) 2007, 2014 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -31,24 +31,26 @@ * */ +#include <linux/slab.h> #include <linux/vmalloc.h> #include <linux/mlx4/qp.h> #include "mlx4_en.h" + void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride, int is_tx, int rss, int qpn, int cqn, - struct mlx4_qp_context *context) + int user_prio, struct mlx4_qp_context *context) { struct mlx4_en_dev *mdev = priv->mdev; + struct net_device *dev = priv->dev; memset(context, 0, sizeof *context); - context->flags = cpu_to_be32(7 << 16 | rss << 13); + context->flags = cpu_to_be32(7 << 16 | rss << MLX4_RSS_QPC_FLAG_OFFSET); context->pd = cpu_to_be32(mdev->priv_pdn); context->mtu_msgmax = 0xff; - if (!is_tx && !rss) { + if (!is_tx && !rss) context->rq_size_stride = ilog2(size) << 3 | (ilog2(stride) - 4); - } if (is_tx) context->sq_size_stride = ilog2(size) << 3 | (ilog2(stride) - 4); else @@ -57,10 +59,25 @@ void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride, context->local_qpn = cpu_to_be32(qpn); context->pri_path.ackto = 1 & 0x07; context->pri_path.sched_queue = 0x83 | (priv->port - 1) << 6; - context->pri_path.counter_index = 0xff; + if (user_prio >= 0) { + context->pri_path.sched_queue |= user_prio << 3; + context->pri_path.feup = 1 << 6; + } + context->pri_path.counter_index = (u8)(priv->counter_index); + if (!rss && + (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_LB_SRC_CHK) && + context->pri_path.counter_index != 0xFF) { + /* disable multicast loopback to qp with same counter */ + context->pri_path.fl |= MLX4_FL_ETH_SRC_CHECK_MC_LB; + context->pri_path.vlan_control |= + MLX4_VLAN_CTRL_ETH_SRC_CHECK_IF_COUNTER; + } + context->cqn_send = cpu_to_be32(cqn); context->cqn_recv = cpu_to_be32(cqn); context->db_rec_addr = cpu_to_be64(priv->res.db.dma << 2); + if (!(dev->if_capabilities & IFCAP_VLAN_HWCSUM)) + context->param3 |= cpu_to_be32(1 << 30); } @@ -69,6 +86,8 @@ int mlx4_en_map_buffer(struct mlx4_buf *buf) struct page **pages; int i; + // if nbufs == 1 - there is no need to vmap + // if buf->direct.buf is not NULL it means that vmap was already done by mlx4_alloc_buff if (buf->direct.buf != NULL || buf->nbufs == 1) return 0; @@ -89,11 +108,10 @@ int mlx4_en_map_buffer(struct mlx4_buf *buf) void mlx4_en_unmap_buffer(struct mlx4_buf *buf) { - if (buf->direct.buf != NULL || buf->nbufs == 1) + if (BITS_PER_LONG == 64 || buf->nbufs == 1) return; vunmap(buf->direct.buf); - buf->direct.buf = NULL; } void mlx4_en_sqp_event(struct mlx4_qp *qp, enum mlx4_event event) diff --git a/sys/ofed/drivers/net/mlx4/en_rx.c b/sys/ofed/drivers/net/mlx4/en_rx.c index ca46721..320462e 100644 --- a/sys/ofed/drivers/net/mlx4/en_rx.c +++ b/sys/ofed/drivers/net/mlx4/en_rx.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007 Mellanox Technologies. All rights reserved. + * Copyright (c) 2007, 2014 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -30,20 +30,48 @@ * SOFTWARE. * */ - #include "opt_inet.h" -#include "mlx4_en.h" - #include <linux/mlx4/cq.h> +#include <linux/slab.h> #include <linux/mlx4/qp.h> +#include <linux/if_ether.h> +#include <linux/if_vlan.h> +#include <linux/vmalloc.h> +#include <linux/mlx4/driver.h> +#ifdef CONFIG_NET_RX_BUSY_POLL +#include <net/busy_poll.h> +#endif -#include <net/ethernet.h> -#include <net/if_vlan_var.h> -#include <sys/mbuf.h> +#include "mlx4_en.h" -enum { - MIN_RX_ARM = 1024, -}; + +static void mlx4_en_init_rx_desc(struct mlx4_en_priv *priv, + struct mlx4_en_rx_ring *ring, + int index) +{ + struct mlx4_en_rx_desc *rx_desc = ring->buf + ring->stride * index; + int possible_frags; + int i; + + + /* Set size and memtype fields */ + for (i = 0; i < priv->num_frags; i++) { + rx_desc->data[i].byte_count = + cpu_to_be32(priv->frag_info[i].frag_size); + rx_desc->data[i].lkey = cpu_to_be32(priv->mdev->mr.key); + } + + /* If the number of used fragments does not fill up the ring stride, + * * remaining (unused) fragments must be padded with null address/size + * * and a special memory key */ + possible_frags = (ring->stride - sizeof(struct mlx4_en_rx_desc)) / DS_SIZE; + for (i = priv->num_frags; i < possible_frags; i++) { + rx_desc->data[i].byte_count = 0; + rx_desc->data[i].lkey = cpu_to_be32(MLX4_EN_MEMTYPE_PAD); + rx_desc->data[i].addr = 0; + } + +} static int mlx4_en_alloc_buf(struct mlx4_en_priv *priv, struct mlx4_en_rx_desc *rx_desc, @@ -70,48 +98,24 @@ static int mlx4_en_alloc_buf(struct mlx4_en_priv *priv, return 0; } -static void mlx4_en_init_rx_desc(struct mlx4_en_priv *priv, - struct mlx4_en_rx_ring *ring, int index) -{ - struct mlx4_en_rx_desc *rx_desc = ring->buf + ring->stride * index; - int possible_frags; - int i; - - /* Set size and memtype fields */ - for (i = 0; i < priv->num_frags; i++) { - rx_desc->data[i].byte_count = - cpu_to_be32(priv->frag_info[i].frag_size); - rx_desc->data[i].lkey = cpu_to_be32(priv->mdev->mr.key); - } - - /* If the number of used fragments does not fill up the ring stride, - * remaining (unused) fragments must be padded with null address/size - * and a special memory key */ - possible_frags = (ring->stride - sizeof(struct mlx4_en_rx_desc)) / DS_SIZE; - for (i = priv->num_frags; i < possible_frags; i++) { - rx_desc->data[i].byte_count = 0; - rx_desc->data[i].lkey = cpu_to_be32(MLX4_EN_MEMTYPE_PAD); - rx_desc->data[i].addr = 0; - } -} static int mlx4_en_prepare_rx_desc(struct mlx4_en_priv *priv, - struct mlx4_en_rx_ring *ring, int index) + struct mlx4_en_rx_ring *ring, int index) { - struct mlx4_en_rx_desc *rx_desc = ring->buf + (index * ring->stride); - struct mbuf **mb_list = ring->rx_info + (index << priv->log_rx_info); - int i; + struct mlx4_en_rx_desc *rx_desc = ring->buf + (index * ring->stride); + struct mbuf **mb_list = ring->rx_info + (index << priv->log_rx_info); + int i; - for (i = 0; i < priv->num_frags; i++) - if (mlx4_en_alloc_buf(priv, rx_desc, mb_list, i)) - goto err; + for (i = 0; i < priv->num_frags; i++) + if (mlx4_en_alloc_buf(priv, rx_desc, mb_list, i)) + goto err; - return 0; + return 0; err: - while (i--) - m_free(mb_list[i]); - return -ENOMEM; + while (i--) + m_free(mb_list[i]); + return -ENOMEM; } static inline void mlx4_en_update_rx_prod_db(struct mlx4_en_rx_ring *ring) @@ -136,7 +140,11 @@ static void mlx4_en_free_rx_desc(struct mlx4_en_priv *priv, frag_info = &priv->frag_info[nr]; dma = be64_to_cpu(rx_desc->data[nr].addr); - en_dbg(DRV, priv, "Unmaping buffer at dma:0x%llx\n", (long long) dma); +#if BITS_PER_LONG == 64 + en_dbg(DRV, priv, "Unmaping buffer at dma:0x%lx\n", (u64) dma); +#elif BITS_PER_LONG == 32 + en_dbg(DRV, priv, "Unmaping buffer at dma:0x%llx\n", (u64) dma); +#endif pci_unmap_single(mdev->pdev, dma, frag_info->frag_size, PCI_DMA_FROMDEVICE); m_free(mb_list[nr]); @@ -153,7 +161,7 @@ static int mlx4_en_fill_rx_buffers(struct mlx4_en_priv *priv) for (buf_ind = 0; buf_ind < priv->prof->rx_ring_size; buf_ind++) { for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) { - ring = &priv->rx_ring[ring_ind]; + ring = priv->rx_ring[ring_ind]; err = mlx4_en_prepare_rx_desc(priv, ring, ring->actual_size); @@ -163,7 +171,8 @@ static int mlx4_en_fill_rx_buffers(struct mlx4_en_priv *priv) "enough rx buffers\n"); return -ENOMEM; } else { - new_size = rounddown_pow_of_two(ring->actual_size); + new_size = + rounddown_pow_of_two(ring->actual_size); en_warn(priv, "Only %d buffers allocated " "reducing ring size to %d\n", ring->actual_size, new_size); @@ -178,7 +187,7 @@ static int mlx4_en_fill_rx_buffers(struct mlx4_en_priv *priv) reduce_rings: for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) { - ring = &priv->rx_ring[ring_ind]; + ring = priv->rx_ring[ring_ind]; while (ring->actual_size > new_size) { ring->actual_size--; ring->prod--; @@ -207,39 +216,95 @@ static void mlx4_en_free_rx_buf(struct mlx4_en_priv *priv, } } +#if MLX4_EN_MAX_RX_FRAGS == 3 +static int frag_sizes[] = { + FRAG_SZ0, + FRAG_SZ1, + FRAG_SZ2, +}; +#elif MLX4_EN_MAX_RX_FRAGS == 2 +static int frag_sizes[] = { + FRAG_SZ0, + FRAG_SZ1, +}; +#else +#error "Unknown MAX_RX_FRAGS" +#endif + +void mlx4_en_calc_rx_buf(struct net_device *dev) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + int eff_mtu = dev->if_mtu + ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN; + int buf_size = 0; + int i, frag; + + for (i = 0, frag = 0; buf_size < eff_mtu; frag++, i++) { + /* + * Allocate small to large but only as much as is needed for + * the tail. + */ + while (i > 0 && eff_mtu - buf_size <= frag_sizes[i - 1]) + i--; + priv->frag_info[frag].frag_size = frag_sizes[i]; + priv->frag_info[frag].frag_prefix_size = buf_size; + buf_size += priv->frag_info[frag].frag_size; + } + + priv->num_frags = frag; + priv->rx_mb_size = eff_mtu; + priv->log_rx_info = + ROUNDUP_LOG2(priv->num_frags * sizeof(struct mbuf *)); + + en_dbg(DRV, priv, "Rx buffer scatter-list (effective-mtu:%d " + "num_frags:%d):\n", eff_mtu, priv->num_frags); + for (i = 0; i < priv->num_frags; i++) { + en_dbg(DRV, priv, " frag:%d - size:%d prefix:%d\n", i, + priv->frag_info[i].frag_size, + priv->frag_info[i].frag_prefix_size); + } +} + int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv, - struct mlx4_en_rx_ring *ring, u32 size) + struct mlx4_en_rx_ring **pring, + u32 size, int node) { struct mlx4_en_dev *mdev = priv->mdev; - int err; + struct mlx4_en_rx_ring *ring; + int err = -ENOMEM; int tmp; - + ring = kzalloc(sizeof(struct mlx4_en_rx_ring), GFP_KERNEL); + if (!ring) { + en_err(priv, "Failed to allocate RX ring structure\n"); + return -ENOMEM; + } + ring->prod = 0; ring->cons = 0; ring->size = size; ring->size_mask = size - 1; ring->stride = roundup_pow_of_two(sizeof(struct mlx4_en_rx_desc) + - DS_SIZE * MLX4_EN_MAX_RX_FRAGS); + DS_SIZE * MLX4_EN_MAX_RX_FRAGS); ring->log_stride = ffs(ring->stride) - 1; ring->buf_size = ring->size * ring->stride + TXBB_SIZE; tmp = size * roundup_pow_of_two(MLX4_EN_MAX_RX_FRAGS * - sizeof(struct mbuf *)); + sizeof(struct mbuf *)); - ring->rx_info = kmalloc(tmp, GFP_KERNEL); - if (!ring->rx_info) { - en_err(priv, "Failed allocating rx_info ring\n"); - return -ENOMEM; - } - en_dbg(DRV, priv, "Allocated rx_info ring at addr:%p size:%d stride:%d (%d)\n", - ring->rx_info, tmp, ring->stride, ring->log_stride); + ring->rx_info = kmalloc(tmp, GFP_KERNEL); + if (!ring->rx_info) { + err = -ENOMEM; + goto err_ring; + } + + en_dbg(DRV, priv, "Allocated rx_info ring at addr:%p size:%d\n", + ring->rx_info, tmp); err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres, ring->buf_size, 2 * PAGE_SIZE); if (err) - goto err_ring; + goto err_info; err = mlx4_en_map_buffer(&ring->wqres.buf); if (err) { @@ -247,18 +312,20 @@ int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv, goto err_hwq; } ring->buf = ring->wqres.buf.direct.buf; - + *pring = ring; return 0; - mlx4_en_unmap_buffer(&ring->wqres.buf); err_hwq: mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size); +err_info: + vfree(ring->rx_info); err_ring: - kfree(ring->rx_info); - ring->rx_info = NULL; + kfree(ring); + return err; } + int mlx4_en_activate_rx_rings(struct mlx4_en_priv *priv) { struct mlx4_en_rx_ring *ring; @@ -266,14 +333,20 @@ int mlx4_en_activate_rx_rings(struct mlx4_en_priv *priv) int ring_ind; int err; int stride = roundup_pow_of_two(sizeof(struct mlx4_en_rx_desc) + - DS_SIZE * priv->num_frags); + DS_SIZE * priv->num_frags); + for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) { - ring = &priv->rx_ring[ring_ind]; + ring = priv->rx_ring[ring_ind]; ring->prod = 0; ring->cons = 0; ring->actual_size = 0; - ring->cqn = priv->rx_cq[ring_ind].mcq.cqn; + ring->cqn = priv->rx_cq[ring_ind]->mcq.cqn; + ring->rx_alloc_order = priv->rx_alloc_order; + ring->rx_alloc_size = priv->rx_alloc_size; + ring->rx_buf_size = priv->rx_buf_size; + ring->rx_mb_size = priv->rx_mb_size; + ring->stride = stride; if (ring->stride <= TXBB_SIZE) ring->buf += TXBB_SIZE; @@ -284,9 +357,10 @@ int mlx4_en_activate_rx_rings(struct mlx4_en_priv *priv) memset(ring->buf, 0, ring->buf_size); mlx4_en_update_rx_prod_db(ring); - /* Initailize all descriptors */ + /* Initialize all descriptors */ for (i = 0; i < ring->size; i++) mlx4_en_init_rx_desc(priv, ring, i); + #ifdef INET /* Configure lro mngr */ if (priv->dev->if_capenable & IFCAP_LRO) { @@ -297,38 +371,56 @@ int mlx4_en_activate_rx_rings(struct mlx4_en_priv *priv) } #endif } + + err = mlx4_en_fill_rx_buffers(priv); if (err) goto err_buffers; for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) { - ring = &priv->rx_ring[ring_ind]; + ring = priv->rx_ring[ring_ind]; ring->size_mask = ring->actual_size - 1; mlx4_en_update_rx_prod_db(ring); } - return 0; err_buffers: for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) - mlx4_en_free_rx_buf(priv, &priv->rx_ring[ring_ind]); + mlx4_en_free_rx_buf(priv, priv->rx_ring[ring_ind]); + + ring_ind = priv->rx_ring_num - 1; + + while (ring_ind >= 0) { + ring = priv->rx_ring[ring_ind]; + if (ring->stride <= TXBB_SIZE) + ring->buf -= TXBB_SIZE; + ring_ind--; + } return err; } + void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv, - struct mlx4_en_rx_ring *ring) + struct mlx4_en_rx_ring **pring, + u32 size, u16 stride) { struct mlx4_en_dev *mdev = priv->mdev; + struct mlx4_en_rx_ring *ring = *pring; mlx4_en_unmap_buffer(&ring->wqres.buf); - mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size + TXBB_SIZE); - kfree(ring->rx_info); - ring->rx_info = NULL; + mlx4_free_hwq_res(mdev->dev, &ring->wqres, size * stride + TXBB_SIZE); + vfree(ring->rx_info); + kfree(ring); + *pring = NULL; +#ifdef CONFIG_RFS_ACCEL + mlx4_en_cleanup_filters(priv, ring); +#endif } + void mlx4_en_deactivate_rx_ring(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring *ring) { @@ -341,6 +433,43 @@ void mlx4_en_deactivate_rx_ring(struct mlx4_en_priv *priv, } +static void validate_loopback(struct mlx4_en_priv *priv, struct mbuf *mb) +{ + int i; + int offset = ETHER_HDR_LEN; + + for (i = 0; i < MLX4_LOOPBACK_TEST_PAYLOAD; i++, offset++) { + if (*(mb->m_data + offset) != (unsigned char) (i & 0xff)) + goto out_loopback; + } + /* Loopback found */ + priv->loopback_ok = 1; + +out_loopback: + m_freem(mb); +} + + +static inline int invalid_cqe(struct mlx4_en_priv *priv, + struct mlx4_cqe *cqe) +{ + /* Drop packet on bad receive or bad checksum */ + if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == + MLX4_CQE_OPCODE_ERROR)) { + en_err(priv, "CQE completed in error - vendor syndrom:%d syndrom:%d\n", + ((struct mlx4_err_cqe *)cqe)->vendor_err_syndrome, + ((struct mlx4_err_cqe *)cqe)->syndrome); + return 1; + } + if (unlikely(cqe->badfcs_enc & MLX4_CQE_BAD_FCS)) { + en_dbg(RX_ERR, priv, "Accepted frame with bad FCS\n"); + return 1; + } + + return 0; +} + + /* Unmap a completed descriptor and free unused pages */ static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv, struct mlx4_en_rx_desc *rx_desc, @@ -360,15 +489,15 @@ static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv, frag_info = &priv->frag_info[nr]; if (length <= frag_info->frag_prefix_size) break; - if (nr) + if (nr) mb->m_next = mb_list[nr]; mb = mb_list[nr]; mb->m_len = frag_info[nr].frag_size; dma = be64_to_cpu(rx_desc->data[nr].addr); - /* Allocate a replacement page */ - if (mlx4_en_alloc_buf(priv, rx_desc, mb_list, nr)) - goto fail; + /* Allocate a replacement page */ + if (mlx4_en_alloc_buf(priv, rx_desc, mb_list, nr)) + goto fail; /* Unmap buffer */ pci_unmap_single(mdev->pdev, dma, frag_info[nr].frag_size, @@ -380,50 +509,14 @@ static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv, return 0; fail: - /* Drop all accumulated fragments (which have already been replaced in - * the descriptor) of this packet; remaining fragments are reused... */ - while (nr > 0) { - nr--; - m_free(mb_list[nr]); - } - return -ENOMEM; -} - - -static inline int invalid_cqe(struct mlx4_en_priv *priv, - struct mlx4_cqe *cqe) -{ - /* Drop packet on bad receive or bad checksum */ - if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == - MLX4_CQE_OPCODE_ERROR)) { - en_err(priv, "CQE completed in error - vendor " - "syndrom:%d syndrom:%d\n", - ((struct mlx4_err_cqe *) cqe)->vendor_err_syndrome, - ((struct mlx4_err_cqe *) cqe)->syndrome); - return 1; - } - if (unlikely(cqe->badfcs_enc & MLX4_CQE_BAD_FCS)) { - en_dbg(RX_ERR, priv, "Accepted frame with bad FCS\n"); - return 1; - } - - return 0; -} + /* Drop all accumulated fragments (which have already been replaced in + * the descriptor) of this packet; remaining fragments are reused... */ + while (nr > 0) { + nr--; + m_free(mb_list[nr]); + } + return -ENOMEM; -static void validate_loopback(struct mlx4_en_priv *priv, struct mbuf *mb) -{ - int i; - int offset = ETHER_HDR_LEN; - - for (i = 0; i < MLX4_LOOPBACK_TEST_PAYLOAD; i++, offset++) { - if (*(mb->m_data + offset) != (unsigned char) (i & 0xff)) - goto out_loopback; - } - /* Loopback found */ - priv->loopback_ok = 1; - -out_loopback: - m_freem(mb); } static struct mbuf *mlx4_en_rx_mb(struct mlx4_en_priv *priv, @@ -441,35 +534,45 @@ static struct mbuf *mlx4_en_rx_mb(struct mlx4_en_priv *priv, return mb; } - +/* For cpu arch with cache line of 64B the performance is better when cqe size==64B + * To enlarge cqe size from 32B to 64B --> 32B of garbage (i.e. 0xccccccc) + * was added in the beginning of each cqe (the real data is in the corresponding 32B). + * The following calc ensures that when factor==1, it means we are alligned to 64B + * and we get the real cqe data*/ +#define CQE_FACTOR_INDEX(index, factor) ((index << factor) + factor) int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int budget) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_cqe *cqe; - struct mlx4_en_rx_ring *ring = &priv->rx_ring[cq->ring]; + struct mlx4_en_rx_ring *ring = priv->rx_ring[cq->ring]; struct mbuf **mb_list; struct mlx4_en_rx_desc *rx_desc; struct mbuf *mb; + struct mlx4_cq *mcq = &cq->mcq; + struct mlx4_cqe *buf = cq->buf; #ifdef INET struct lro_entry *queued; #endif int index; unsigned int length; int polled = 0; + u32 cons_index = mcq->cons_index; + u32 size_mask = ring->size_mask; + int size = cq->size; + int factor = priv->cqe_factor; if (!priv->port_up) return 0; /* We assume a 1:1 mapping between CQEs and Rx descriptors, so Rx - * descriptor offset can be deduced from the CQE index instead of + * descriptor offset can be deducted from the CQE index instead of * reading 'cqe->index' */ - index = cq->mcq.cons_index & ring->size_mask; - cqe = &cq->buf[index]; + index = cons_index & size_mask; + cqe = &buf[CQE_FACTOR_INDEX(index, factor)]; /* Process all completed CQEs */ while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK, - cq->mcq.cons_index & cq->size)) { - + cons_index & size)) { mb_list = ring->rx_info + (index << priv->log_rx_info); rx_desc = ring->buf + (index << ring->log_stride); @@ -478,13 +581,14 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud */ rmb(); - if (invalid_cqe(priv, cqe)) + if (invalid_cqe(priv, cqe)) { goto next; - + } /* * Packet is OK - process it. */ length = be32_to_cpu(cqe->byte_cnt); + length -= ring->fcs_del; mb = mlx4_en_rx_mb(priv, rx_desc, mb_list, length); if (!mb) { ring->errors++; @@ -494,7 +598,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud ring->bytes += length; ring->packets++; - if (unlikely(priv->validate_loopback)) { + if (unlikely(priv->validate_loopback)) { validate_loopback(priv, mb); goto next; } @@ -507,11 +611,11 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud mb->m_pkthdr.ether_vtag = be16_to_cpu(cqe->sl_vid); mb->m_flags |= M_VLANTAG; } - if (likely(priv->rx_csum) && + if (likely(dev->if_capabilities & IFCAP_RXCSUM) && (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPOK)) && (cqe->checksum == cpu_to_be16(0xffff))) { priv->port_stats.rx_chksum_good++; - mb->m_pkthdr.csum_flags = + mb->m_pkthdr.csum_flags = CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR; mb->m_pkthdr.csum_data = htons(0xffff); @@ -523,139 +627,89 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud */ #ifdef INET if (mlx4_en_can_lro(cqe->status) && - (dev->if_capenable & IFCAP_LRO)) { + (dev->if_capenable & IFCAP_LRO)) { if (ring->lro.lro_cnt != 0 && - tcp_lro_rx(&ring->lro, mb, 0) == 0) + tcp_lro_rx(&ring->lro, mb, 0) == 0) goto next; } -#endif +#endif /* LRO not possible, complete processing here */ INC_PERF_COUNTER(priv->pstats.lro_misses); } else { mb->m_pkthdr.csum_flags = 0; priv->port_stats.rx_chksum_none++; -#ifdef INET - if (priv->ip_reasm && - cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPV4) && - !mlx4_en_rx_frags(priv, ring, mb, cqe)) - goto next; -#endif } /* Push it up the stack */ dev->if_input(dev, mb); next: - ++cq->mcq.cons_index; - index = (cq->mcq.cons_index) & ring->size_mask; - cqe = &cq->buf[index]; + ++cons_index; + index = cons_index & size_mask; + cqe = &buf[CQE_FACTOR_INDEX(index, factor)]; if (++polled == budget) goto out; } /* Flush all pending IP reassembly sessions */ out: #ifdef INET - mlx4_en_flush_frags(priv, ring); while ((queued = SLIST_FIRST(&ring->lro.lro_active)) != NULL) { SLIST_REMOVE_HEAD(&ring->lro.lro_active, next); tcp_lro_flush(&ring->lro, queued); } #endif AVG_PERF_COUNTER(priv->pstats.rx_coal_avg, polled); - mlx4_cq_set_ci(&cq->mcq); + mcq->cons_index = cons_index; + mlx4_cq_set_ci(mcq); wmb(); /* ensure HW sees CQ consumer before we post new buffers */ - ring->cons = cq->mcq.cons_index; + ring->cons = mcq->cons_index; ring->prod += polled; /* Polled descriptors were realocated in place */ mlx4_en_update_rx_prod_db(ring); return polled; -} +} /* Rx CQ polling - called by NAPI */ static int mlx4_en_poll_rx_cq(struct mlx4_en_cq *cq, int budget) { - struct net_device *dev = cq->dev; - int done; - - done = mlx4_en_process_rx_cq(dev, cq, budget); - cq->tot_rx += done; + struct net_device *dev = cq->dev; + int done; - return done; -} + done = mlx4_en_process_rx_cq(dev, cq, budget); + cq->tot_rx += done; -void mlx4_en_rx_que(void *context, int pending) -{ - struct mlx4_en_cq *cq; + return done; - cq = context; - while (mlx4_en_poll_rx_cq(cq, MLX4_EN_MAX_RX_POLL) - == MLX4_EN_MAX_RX_POLL); - mlx4_en_arm_cq(cq->dev->if_softc, cq); } - void mlx4_en_rx_irq(struct mlx4_cq *mcq) { struct mlx4_en_cq *cq = container_of(mcq, struct mlx4_en_cq, mcq); struct mlx4_en_priv *priv = netdev_priv(cq->dev); - int done; + int done; - done = mlx4_en_poll_rx_cq(cq, MLX4_EN_MAX_RX_POLL); - if (done == MLX4_EN_MAX_RX_POLL) + // Shoot one within the irq context + // Because there is no NAPI in freeBSD + done = mlx4_en_poll_rx_cq(cq, MLX4_EN_RX_BUDGET); + if (priv->port_up && (done == MLX4_EN_RX_BUDGET) ) { taskqueue_enqueue(cq->tq, &cq->cq_task); - else + } + else { mlx4_en_arm_cq(priv, cq); + } } - -#if MLX4_EN_MAX_RX_FRAGS == 3 -static int frag_sizes[] = { - FRAG_SZ0, - FRAG_SZ1, - FRAG_SZ2, -}; -#elif MLX4_EN_MAX_RX_FRAGS == 2 -static int frag_sizes[] = { - FRAG_SZ0, - FRAG_SZ1, -}; -#else -#error "Unknown MAX_RX_FRAGS" -#endif - -void mlx4_en_calc_rx_buf(struct net_device *dev) +void mlx4_en_rx_que(void *context, int pending) { - struct mlx4_en_priv *priv = netdev_priv(dev); - int eff_mtu = dev->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + ETH_LLC_SNAP_SIZE; - int buf_size = 0; - int i, frag; - - for (i = 0, frag = 0; buf_size < eff_mtu; frag++, i++) { - /* - * Allocate small to large but only as much as is needed for - * the tail. - */ - while (i > 0 && eff_mtu - buf_size <= frag_sizes[i - 1]) - i--; - priv->frag_info[frag].frag_size = frag_sizes[i]; - priv->frag_info[frag].frag_prefix_size = buf_size; - buf_size += priv->frag_info[frag].frag_size; - } + struct mlx4_en_cq *cq; - priv->num_frags = frag; - priv->rx_mb_size = eff_mtu; - priv->log_rx_info = - ROUNDUP_LOG2(priv->num_frags * sizeof(struct mbuf *)); - - en_dbg(DRV, priv, "Rx buffer scatter-list (effective-mtu:%d " - "num_frags:%d):\n", eff_mtu, priv->num_frags); - for (i = 0; i < priv->num_frags; i++) { - en_dbg(DRV, priv, " frag:%d - size:%d prefix:%d\n", i, - priv->frag_info[i].frag_size, - priv->frag_info[i].frag_prefix_size) - } + cq = context; + while (mlx4_en_poll_rx_cq(cq, MLX4_EN_RX_BUDGET) + == MLX4_EN_RX_BUDGET); + mlx4_en_arm_cq(cq->dev->if_softc, cq); } + /* RSS related functions */ static int mlx4_en_config_rss_qp(struct mlx4_en_priv *priv, int qpn, @@ -672,6 +726,7 @@ static int mlx4_en_config_rss_qp(struct mlx4_en_priv *priv, int qpn, en_err(priv, "Failed to allocate qp context\n"); return -ENOMEM; } + err = mlx4_qp_alloc(mdev->dev, qpn, qp); if (err) { en_err(priv, "Failed to allocate qp #%x\n", qpn); @@ -681,9 +736,16 @@ static int mlx4_en_config_rss_qp(struct mlx4_en_priv *priv, int qpn, memset(context, 0, sizeof *context); mlx4_en_fill_qp_context(priv, ring->actual_size, ring->stride, 0, 0, - qpn, ring->cqn, context); + qpn, ring->cqn, -1, context); context->db_rec_addr = cpu_to_be64(ring->wqres.db.dma); + /* Cancel FCS removal if FW allows */ + if (mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_FCS_KEEP) { + context->param3 |= cpu_to_be32(1 << 29); + ring->fcs_del = ETH_FCS_LEN; + } else + ring->fcs_del = 0; + err = mlx4_qp_to_ready(mdev->dev, &ring->wqres.mtt, context, qp, state); if (err) { mlx4_qp_remove(mdev->dev, qp); @@ -695,26 +757,57 @@ out: return err; } +int mlx4_en_create_drop_qp(struct mlx4_en_priv *priv) +{ + int err; + u32 qpn; + + err = mlx4_qp_reserve_range(priv->mdev->dev, 1, 1, &qpn, 0); + if (err) { + en_err(priv, "Failed reserving drop qpn\n"); + return err; + } + err = mlx4_qp_alloc(priv->mdev->dev, qpn, &priv->drop_qp); + if (err) { + en_err(priv, "Failed allocating drop qp\n"); + mlx4_qp_release_range(priv->mdev->dev, qpn, 1); + return err; + } + + return 0; +} + +void mlx4_en_destroy_drop_qp(struct mlx4_en_priv *priv) +{ + u32 qpn; + + qpn = priv->drop_qp.qpn; + mlx4_qp_remove(priv->mdev->dev, &priv->drop_qp); + mlx4_qp_free(priv->mdev->dev, &priv->drop_qp); + mlx4_qp_release_range(priv->mdev->dev, qpn, 1); +} + /* Allocate rx qp's and configure them according to rss map */ int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv) { struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_en_rss_map *rss_map = &priv->rss_map; struct mlx4_qp_context context; - struct mlx4_en_rss_context *rss_context; + struct mlx4_rss_context *rss_context; + int rss_rings; void *ptr; - u8 rss_mask; - int i, qpn; + u8 rss_mask = (MLX4_RSS_IPV4 | MLX4_RSS_TCP_IPV4 | MLX4_RSS_IPV6 | + MLX4_RSS_TCP_IPV6); + int i; int err = 0; int good_qps = 0; + static const u32 rsskey[10] = { 0xD181C62C, 0xF7F4DB5B, 0x1983A2FC, + 0x943E1ADB, 0xD9389E6B, 0xD1039C2C, 0xA74499AD, + 0x593D56D9, 0xF3253C06, 0x2ADC1FFC}; - if (mdev->profile.udp_rss) - rss_mask = 0x3f; - else - rss_mask = 0x14; en_dbg(DRV, priv, "Configuring rss steering\n"); err = mlx4_qp_reserve_range(mdev->dev, priv->rx_ring_num, - roundup_pow_of_two(priv->rx_ring_num), + priv->rx_ring_num, &rss_map->base_qpn, 0); if (err) { en_err(priv, "Failed reserving %d qps\n", priv->rx_ring_num); @@ -722,9 +815,9 @@ int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv) } for (i = 0; i < priv->rx_ring_num; i++) { - qpn = rss_map->base_qpn + i; - err = mlx4_en_config_rss_qp(priv, qpn, - &priv->rx_ring[i], + priv->rx_ring[i]->qpn = rss_map->base_qpn + i; + err = mlx4_en_config_rss_qp(priv, priv->rx_ring[i]->qpn, + priv->rx_ring[i], &rss_map->state[i], &rss_map->qps[i]); if (err) @@ -734,28 +827,34 @@ int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv) } /* Configure RSS indirection qp */ - err = mlx4_qp_reserve_range(mdev->dev, 1, 1, &priv->base_qpn, 0); - if (err) { - en_err(priv, "Failed to reserve range for RSS " - "indirection qp\n"); - goto rss_err; - } err = mlx4_qp_alloc(mdev->dev, priv->base_qpn, &rss_map->indir_qp); if (err) { en_err(priv, "Failed to allocate RSS indirection QP\n"); - goto reserve_err; + goto rss_err; } rss_map->indir_qp.event = mlx4_en_sqp_event; mlx4_en_fill_qp_context(priv, 0, 0, 0, 1, priv->base_qpn, - priv->rx_ring[0].cqn, &context); + priv->rx_ring[0]->cqn, -1, &context); - ptr = ((void *) &context) + 0x3c; - rss_context = (struct mlx4_en_rss_context *) ptr; - rss_context->base_qpn = cpu_to_be32(ilog2(priv->rx_ring_num) << 24 | + if (!priv->prof->rss_rings || priv->prof->rss_rings > priv->rx_ring_num) + rss_rings = priv->rx_ring_num; + else + rss_rings = priv->prof->rss_rings; + + ptr = ((void *) &context) + offsetof(struct mlx4_qp_context, pri_path) + + MLX4_RSS_OFFSET_IN_QPC_PRI_PATH; + rss_context = ptr; + rss_context->base_qpn = cpu_to_be32(ilog2(rss_rings) << 24 | (rss_map->base_qpn)); rss_context->default_qpn = cpu_to_be32(rss_map->base_qpn); + if (priv->mdev->profile.udp_rss) { + rss_mask |= MLX4_RSS_UDP_IPV4 | MLX4_RSS_UDP_IPV6; + rss_context->base_qpn_udp = rss_context->default_qpn; + } rss_context->flags = rss_mask; - rss_context->base_qpn_udp = rss_context->default_qpn; + rss_context->hash_fn = MLX4_RSS_HASH_TOP; + for (i = 0; i < 10; i++) + rss_context->rss_key[i] = cpu_to_be32(rsskey[i]); err = mlx4_qp_to_ready(mdev->dev, &priv->res.mtt, &context, &rss_map->indir_qp, &rss_map->indir_state); @@ -769,8 +868,6 @@ indir_err: MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->indir_qp); mlx4_qp_remove(mdev->dev, &rss_map->indir_qp); mlx4_qp_free(mdev->dev, &rss_map->indir_qp); -reserve_err: - mlx4_qp_release_range(mdev->dev, priv->base_qpn, 1); rss_err: for (i = 0; i < good_qps; i++) { mlx4_qp_modify(mdev->dev, NULL, rss_map->state[i], @@ -792,7 +889,6 @@ void mlx4_en_release_rss_steer(struct mlx4_en_priv *priv) MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->indir_qp); mlx4_qp_remove(mdev->dev, &rss_map->indir_qp); mlx4_qp_free(mdev->dev, &rss_map->indir_qp); - mlx4_qp_release_range(mdev->dev, priv->base_qpn, 1); for (i = 0; i < priv->rx_ring_num; i++) { mlx4_qp_modify(mdev->dev, NULL, rss_map->state[i], @@ -802,3 +898,4 @@ void mlx4_en_release_rss_steer(struct mlx4_en_priv *priv) } mlx4_qp_release_range(mdev->dev, rss_map->base_qpn, priv->rx_ring_num); } + diff --git a/sys/ofed/drivers/net/mlx4/en_selftest.c b/sys/ofed/drivers/net/mlx4/en_selftest.c index 0e62027..2a28315 100644 --- a/sys/ofed/drivers/net/mlx4/en_selftest.c +++ b/sys/ofed/drivers/net/mlx4/en_selftest.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007 Mellanox Technologies. All rights reserved. + * Copyright (c) 2007, 2014 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -31,24 +31,24 @@ * */ -#include "mlx4_en.h" - #include <linux/kernel.h> #include <linux/ethtool.h> #include <linux/netdevice.h> #include <linux/delay.h> #include <linux/mlx4/driver.h> +#include "mlx4_en.h" + static int mlx4_en_test_registers(struct mlx4_en_priv *priv) { return mlx4_cmd(priv->mdev->dev, 0, 0, 0, MLX4_CMD_HW_HEALTH_CHECK, - MLX4_CMD_TIME_CLASS_A); + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); } static int mlx4_en_test_loopback_xmit(struct mlx4_en_priv *priv) { - struct mbuf *mb; + struct sk_buff *skb; struct ethhdr *ethh; unsigned char *packet; unsigned int packet_size = MLX4_LOOPBACK_TEST_PAYLOAD; @@ -57,24 +57,24 @@ static int mlx4_en_test_loopback_xmit(struct mlx4_en_priv *priv) /* build the pkt before xmit */ - mb = netdev_alloc_mb(priv->dev, MLX4_LOOPBACK_TEST_PAYLOAD + ETH_HLEN + NET_IP_ALIGN); - if (!mb) { - en_err(priv, "-LOOPBACK_TEST_XMIT- failed to create mb for xmit\n"); + skb = netdev_alloc_skb(priv->dev, MLX4_LOOPBACK_TEST_PAYLOAD + ETH_HLEN + NET_IP_ALIGN); + if (!skb) { + en_err(priv, "-LOOPBACK_TEST_XMIT- failed to create skb for xmit\n"); return -ENOMEM; } - mb_reserve(mb, NET_IP_ALIGN); + skb_reserve(skb, NET_IP_ALIGN); - ethh = (struct ethhdr *)mb_put(mb, sizeof(struct ethhdr)); - packet = (unsigned char *)mb_put(mb, packet_size); + ethh = (struct ethhdr *)skb_put(skb, sizeof(struct ethhdr)); + packet = (unsigned char *)skb_put(skb, packet_size); memcpy(ethh->h_dest, priv->dev->dev_addr, ETH_ALEN); memset(ethh->h_source, 0, ETH_ALEN); ethh->h_proto = htons(ETH_P_ARP); - mb_set_mac_header(mb, 0); + skb_set_mac_header(skb, 0); for (i = 0; i < packet_size; ++i) /* fill our packet */ packet[i] = (unsigned char)(i & 0xff); /* xmit the pkt */ - err = mlx4_en_xmit(mb, priv->dev); + err = mlx4_en_xmit(skb, priv->dev); return err; } @@ -87,6 +87,8 @@ static int mlx4_en_test_loopback(struct mlx4_en_priv *priv) priv->loopback_ok = 0; priv->validate_loopback = 1; + mlx4_en_update_loopback_state(priv->dev, priv->dev->features); + /* xmit */ if (mlx4_en_test_loopback_xmit(priv)) { en_err(priv, "Transmitting loopback packet failed\n"); @@ -107,7 +109,8 @@ static int mlx4_en_test_loopback(struct mlx4_en_priv *priv) mlx4_en_test_loopback_exit: priv->validate_loopback = 0; - return (!loopback_ok); + mlx4_en_update_loopback_state(priv->dev, priv->dev->features); + return !loopback_ok; } @@ -127,8 +130,10 @@ static int mlx4_en_test_speed(struct mlx4_en_priv *priv) if (mlx4_en_QUERY_PORT(priv->mdev, priv->port)) return -ENOMEM; - /* The device currently only supports 10G speed */ - if (priv->port_state.link_speed != SPEED_10000) + /* The device supports 1G, 10G and 40G speed */ + if (priv->port_state.link_speed != MLX4_EN_LINK_SPEED_1G && + priv->port_state.link_speed != MLX4_EN_LINK_SPEED_10G && + priv->port_state.link_speed != MLX4_EN_LINK_SPEED_40G) return priv->port_state.link_speed; return 0; } @@ -138,7 +143,6 @@ void mlx4_en_ex_selftest(struct net_device *dev, u32 *flags, u64 *buf) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; - struct mlx4_en_tx_ring *tx_ring; int i, carrier_ok; memset(buf, 0, sizeof(u64) * MLX4_EN_NUM_SELF_TEST); @@ -148,20 +152,16 @@ void mlx4_en_ex_selftest(struct net_device *dev, u32 *flags, u64 *buf) carrier_ok = netif_carrier_ok(dev); netif_carrier_off(dev); -retry_tx: - /* Wait untill all tx queues are empty. + /* Wait until all tx queues are empty. * there should not be any additional incoming traffic * since we turned the carrier off */ msleep(200); - for (i = 0; i < priv->tx_ring_num && carrier_ok; i++) { - tx_ring = &priv->tx_ring[i]; - if (tx_ring->prod != (tx_ring->cons + tx_ring->last_nr_txbb)) - goto retry_tx; - } - if (priv->mdev->dev->caps.loopback_support){ + if (priv->mdev->dev->caps.flags & + MLX4_DEV_CAP_FLAG_UC_LOOPBACK) { buf[3] = mlx4_en_test_registers(priv); - buf[4] = mlx4_en_test_loopback(priv); + if (priv->port_up) + buf[4] = mlx4_en_test_loopback(priv); } if (carrier_ok) diff --git a/sys/ofed/drivers/net/mlx4/en_tx.c b/sys/ofed/drivers/net/mlx4/en_tx.c index 4661024..94a35a1 100644 --- a/sys/ofed/drivers/net/mlx4/en_tx.c +++ b/sys/ofed/drivers/net/mlx4/en_tx.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007 Mellanox Technologies. All rights reserved. + * Copyright (c) 2007, 2014 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -31,15 +31,13 @@ * */ -#include "mlx4_en.h" - +#include <asm/page.h> #include <linux/mlx4/cq.h> +#include <linux/slab.h> #include <linux/mlx4/qp.h> +#include <linux/if_vlan.h> #include <linux/vmalloc.h> - -#include <net/ethernet.h> -#include <net/if_vlan_var.h> -#include <sys/mbuf.h> +#include <linux/moduleparam.h> #include <netinet/in_systm.h> #include <netinet/in.h> @@ -50,59 +48,78 @@ #include <netinet/tcp_lro.h> #include <netinet/udp.h> +#include "mlx4_en.h" +#include "utils.h" + enum { MAX_INLINE = 104, /* 128 - 16 - 4 - 4 */ MAX_BF = 256, + MIN_PKT_LEN = 17, }; -static int inline_thold = MAX_INLINE; +static int inline_thold __read_mostly = MAX_INLINE; -module_param_named(inline_thold, inline_thold, int, 0444); -MODULE_PARM_DESC(inline_thold, "treshold for using inline data"); +module_param_named(inline_thold, inline_thold, uint, 0444); +MODULE_PARM_DESC(inline_thold, "threshold for using inline data"); int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, - struct mlx4_en_tx_ring *ring, u32 size, - u16 stride) + struct mlx4_en_tx_ring **pring, u32 size, + u16 stride, int node, int queue_idx) { struct mlx4_en_dev *mdev = priv->mdev; + struct mlx4_en_tx_ring *ring; int tmp; int err; + ring = kzalloc_node(sizeof(struct mlx4_en_tx_ring), GFP_KERNEL, node); + if (!ring) { + ring = kzalloc(sizeof(struct mlx4_en_tx_ring), GFP_KERNEL); + if (!ring) { + en_err(priv, "Failed allocating TX ring\n"); + return -ENOMEM; + } + } + ring->size = size; ring->size_mask = size - 1; ring->stride = stride; - - inline_thold = min(inline_thold, MAX_INLINE); - + ring->full_size = ring->size - HEADROOM - MAX_DESC_TXBBS; + ring->inline_thold = min(inline_thold, MAX_INLINE); mtx_init(&ring->tx_lock.m, "mlx4 tx", NULL, MTX_DEF); mtx_init(&ring->comp_lock.m, "mlx4 comp", NULL, MTX_DEF); /* Allocate the buf ring */ ring->br = buf_ring_alloc(MLX4_EN_DEF_TX_QUEUE_SIZE, M_DEVBUF, - M_WAITOK, &ring->tx_lock.m); + M_WAITOK, &ring->tx_lock.m); if (ring->br == NULL) { en_err(priv, "Failed allocating tx_info ring\n"); return -ENOMEM; } tmp = size * sizeof(struct mlx4_en_tx_info); - ring->tx_info = kmalloc(tmp, GFP_KERNEL); + ring->tx_info = vmalloc_node(tmp, node); if (!ring->tx_info) { - en_err(priv, "Failed allocating tx_info ring\n"); - err = -ENOMEM; - goto err_tx; + ring->tx_info = vmalloc(tmp); + if (!ring->tx_info) { + err = -ENOMEM; + goto err_ring; + } } + en_dbg(DRV, priv, "Allocated tx_info ring at addr:%p size:%d\n", ring->tx_info, tmp); - ring->bounce_buf = kmalloc(MAX_DESC_SIZE, GFP_KERNEL); + ring->bounce_buf = kmalloc_node(MAX_DESC_SIZE, GFP_KERNEL, node); if (!ring->bounce_buf) { - en_err(priv, "Failed allocating bounce buffer\n"); - err = -ENOMEM; - goto err_tx; + ring->bounce_buf = kmalloc(MAX_DESC_SIZE, GFP_KERNEL); + if (!ring->bounce_buf) { + err = -ENOMEM; + goto err_info; + } } ring->buf_size = ALIGN(size * ring->stride, MLX4_EN_PAGE_SIZE); + /* Allocate HW buffers on provided NUMA node */ err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres, ring->buf_size, 2 * PAGE_SIZE); if (err) { @@ -122,9 +139,10 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, "buf_size:%d dma:%llx\n", ring, ring->buf, ring->size, ring->buf_size, (unsigned long long) ring->wqres.buf.direct.map); - err = mlx4_qp_reserve_range(mdev->dev, 1, 256, &ring->qpn, MLX4_RESERVE_BF_QP); + err = mlx4_qp_reserve_range(mdev->dev, 1, 1, &ring->qpn, + MLX4_RESERVE_BF_QP); if (err) { - en_err(priv, "Failed reserving qp for tx ring.\n"); + en_err(priv, "failed reserving qp for TX ring\n"); goto err_map; } @@ -135,14 +153,19 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, } ring->qp.event = mlx4_en_sqp_event; - err = mlx4_bf_alloc(mdev->dev, &ring->bf, 0); + err = mlx4_bf_alloc(mdev->dev, &ring->bf, node); if (err) { + en_dbg(DRV, priv, "working without blueflame (%d)", err); ring->bf.uar = &mdev->priv_uar; ring->bf.uar->map = mdev->uar_map; ring->bf_enabled = false; } else ring->bf_enabled = true; + ring->queue_index = queue_idx; + if (queue_idx < priv->num_tx_rings_p_up ) + CPU_SET(queue_idx, &ring->affinity_mask); + *pring = ring; return 0; err_reserve: @@ -153,18 +176,19 @@ err_hwq_res: mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size); err_bounce: kfree(ring->bounce_buf); - ring->bounce_buf = NULL; -err_tx: +err_info: + vfree(ring->tx_info); +err_ring: buf_ring_free(ring->br, M_DEVBUF); - kfree(ring->tx_info); - ring->tx_info = NULL; + kfree(ring); return err; } void mlx4_en_destroy_tx_ring(struct mlx4_en_priv *priv, - struct mlx4_en_tx_ring *ring) + struct mlx4_en_tx_ring **pring) { struct mlx4_en_dev *mdev = priv->mdev; + struct mlx4_en_tx_ring *ring = *pring; en_dbg(DRV, priv, "Destroying tx ring, qpn: %d\n", ring->qpn); buf_ring_free(ring->br, M_DEVBUF); @@ -172,20 +196,20 @@ void mlx4_en_destroy_tx_ring(struct mlx4_en_priv *priv, mlx4_bf_free(mdev->dev, &ring->bf); mlx4_qp_remove(mdev->dev, &ring->qp); mlx4_qp_free(mdev->dev, &ring->qp); - mlx4_qp_release_range(mdev->dev, ring->qpn, 1); + mlx4_qp_release_range(priv->mdev->dev, ring->qpn, 1); mlx4_en_unmap_buffer(&ring->wqres.buf); mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size); kfree(ring->bounce_buf); - ring->bounce_buf = NULL; - kfree(ring->tx_info); - ring->tx_info = NULL; + vfree(ring->tx_info); mtx_destroy(&ring->tx_lock.m); mtx_destroy(&ring->comp_lock.m); + kfree(ring); + *pring = NULL; } int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring *ring, - int cq) + int cq, int user_prio) { struct mlx4_en_dev *mdev = priv->mdev; int err; @@ -200,16 +224,15 @@ int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv, memset(ring->buf, 0, ring->buf_size); ring->qp_state = MLX4_QP_STATE_RST; - ring->doorbell_qpn = swab32(ring->qp.qpn << 8); + ring->doorbell_qpn = ring->qp.qpn << 8; mlx4_en_fill_qp_context(priv, ring->size, ring->stride, 1, 0, ring->qpn, - ring->cqn, &ring->context); + ring->cqn, user_prio, &ring->context); if (ring->bf_enabled) ring->context.usr_page = cpu_to_be32(ring->bf.uar->index); err = mlx4_qp_to_ready(mdev->dev, &ring->wqres.mtt, &ring->context, &ring->qp, &ring->qp_state); - return err; } @@ -222,65 +245,98 @@ void mlx4_en_deactivate_tx_ring(struct mlx4_en_priv *priv, MLX4_QP_STATE_RST, NULL, 0, 0, &ring->qp); } +static void mlx4_en_stamp_wqe(struct mlx4_en_priv *priv, + struct mlx4_en_tx_ring *ring, + int index, u8 owner) +{ + struct mlx4_en_tx_info *tx_info = &ring->tx_info[index]; + struct mlx4_en_tx_desc *tx_desc = ring->buf + index * TXBB_SIZE; + void *end = ring->buf + ring->buf_size; + __be32 *ptr = (__be32 *)tx_desc; + __be32 stamp = cpu_to_be32(STAMP_VAL | (!!owner << STAMP_SHIFT)); + int i; + + /* Optimize the common case when there are no wraparounds */ + if (likely((void *)tx_desc + tx_info->nr_txbb * TXBB_SIZE <= end)) + /* Stamp the freed descriptor */ + for (i = 0; i < tx_info->nr_txbb * TXBB_SIZE; i += STAMP_STRIDE) { + *ptr = stamp; + ptr += STAMP_DWORDS; + } + else + /* Stamp the freed descriptor */ + for (i = 0; i < tx_info->nr_txbb * TXBB_SIZE; i += STAMP_STRIDE) { + *ptr = stamp; + ptr += STAMP_DWORDS; + if ((void *)ptr >= end) { + ptr = ring->buf; + stamp ^= cpu_to_be32(0x80000000); + } + } +} static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring *ring, - int index, u8 owner) + int index, u8 owner, u64 timestamp) { struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_en_tx_info *tx_info = &ring->tx_info[index]; struct mlx4_en_tx_desc *tx_desc = ring->buf + index * TXBB_SIZE; struct mlx4_wqe_data_seg *data = (void *) tx_desc + tx_info->data_offset; - struct mbuf *mb = tx_info->mb; + struct mbuf *mb = tx_info->mb; void *end = ring->buf + ring->buf_size; - int frags = tx_info->nr_segs; + int frags = tx_info->nr_segs;; int i; - __be32 *ptr = (__be32 *)tx_desc; - __be32 stamp = cpu_to_be32(STAMP_VAL | (!!owner << STAMP_SHIFT)); /* Optimize the common case when there are no wraparounds */ if (likely((void *) tx_desc + tx_info->nr_txbb * TXBB_SIZE <= end)) { if (!tx_info->inl) { + if (tx_info->linear) { + dma_unmap_single(priv->ddev, + (dma_addr_t) be64_to_cpu(data->addr), + be32_to_cpu(data->byte_count), + PCI_DMA_TODEVICE); + ++data; + } + for (i = 0; i < frags; i++) { - pci_unmap_single(mdev->pdev, - (dma_addr_t) be64_to_cpu(data[i].addr), - data[i].byte_count, PCI_DMA_TODEVICE); + pci_unmap_single(mdev->pdev, + (dma_addr_t) be64_to_cpu(data[i].addr), + data[i].byte_count, PCI_DMA_TODEVICE); } } - /* Stamp the freed descriptor */ - for (i = 0; i < tx_info->nr_txbb * TXBB_SIZE; i += STAMP_STRIDE) { - *ptr = stamp; - ptr += STAMP_DWORDS; - } - } else { if (!tx_info->inl) { + if ((void *) data >= end) { + data = ring->buf + ((void *)data - end); + } + + if (tx_info->linear) { + dma_unmap_single(priv->ddev, + (dma_addr_t) be64_to_cpu(data->addr), + be32_to_cpu(data->byte_count), + PCI_DMA_TODEVICE); + ++data; + } + for (i = 0; i < frags; i++) { /* Check for wraparound before unmapping */ if ((void *) data >= end) - data = (struct mlx4_wqe_data_seg *) ring->buf; - pci_unmap_single(mdev->pdev, - (dma_addr_t) be64_to_cpu(data->addr), - data->byte_count, PCI_DMA_TODEVICE); + data = ring->buf; + pci_unmap_single(mdev->pdev, + (dma_addr_t) be64_to_cpu(data->addr), + data->byte_count, PCI_DMA_TODEVICE); ++data; } } - /* Stamp the freed descriptor */ - for (i = 0; i < tx_info->nr_txbb * TXBB_SIZE; i += STAMP_STRIDE) { - *ptr = stamp; - ptr += STAMP_DWORDS; - if ((void *) ptr >= end) { - ptr = ring->buf; - stamp ^= cpu_to_be32(0x80000000); - } - } - } - m_freem(mb); + /* Send a copy of the frame to the BPF listener */ + if (priv->dev && priv->dev->if_bpf) + ETHER_BPF_MTAP(priv->dev, mb); + m_freem(mb); return tx_info->nr_txbb; } - int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring) { struct mlx4_en_priv *priv = netdev_priv(dev); @@ -292,14 +348,14 @@ int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring) ring->cons, ring->prod); if ((u32) (ring->prod - ring->cons) > ring->size) { - en_warn(priv, "Tx consumer passed producer!\n"); + en_warn(priv, "Tx consumer passed producer!\n"); return 0; } while (ring->cons != ring->prod) { ring->last_nr_txbb = mlx4_en_free_tx_desc(priv, ring, ring->cons & ring->size_mask, - !!(ring->cons & ring->size)); + !!(ring->cons & ring->size), 0); ring->cons += ring->last_nr_txbb; cnt++; } @@ -310,109 +366,105 @@ int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring) return cnt; } -void mlx4_en_set_prio_map(struct mlx4_en_priv *priv, u16 *prio_map, u32 ring_num) -{ - int block = 8 / ring_num; - int extra = 8 - (block * ring_num); - int num = 0; - u16 ring = 1; - int prio; - - if (ring_num == 1) { - for (prio = 0; prio < 8; prio++) - prio_map[prio] = 0; - return; - } - - for (prio = 0; prio < 8; prio++) { - if (extra && (num == block + 1)) { - ring++; - num = 0; - extra--; - } else if (!extra && (num == block)) { - ring++; - num = 0; - } - prio_map[prio] = ring; - en_dbg(DRV, priv, " prio:%d --> ring:%d\n", prio, ring); - num++; - } -} - -static void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq) +static int mlx4_en_process_tx_cq(struct net_device *dev, + struct mlx4_en_cq *cq) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_cq *mcq = &cq->mcq; - struct mlx4_en_tx_ring *ring = &priv->tx_ring[cq->ring]; - struct mlx4_cqe *cqe = cq->buf; + struct mlx4_en_tx_ring *ring = priv->tx_ring[cq->ring]; + struct mlx4_cqe *cqe; u16 index; - u16 new_index; + u16 new_index, ring_index, stamp_index; u32 txbbs_skipped = 0; - u32 cq_last_sav; + u32 txbbs_stamp = 0; + u32 cons_index = mcq->cons_index; + int size = cq->size; + u32 size_mask = ring->size_mask; + struct mlx4_cqe *buf = cq->buf; + u32 packets = 0; + u32 bytes = 0; + int factor = priv->cqe_factor; + u64 timestamp = 0; + int done = 0; - /* index always points to the first TXBB of the last polled descriptor */ - index = ring->cons & ring->size_mask; - new_index = be16_to_cpu(cqe->wqe_index) & ring->size_mask; - if (index == new_index) - return; if (!priv->port_up) - return; + return 0; + + index = cons_index & size_mask; + cqe = &buf[(index << factor) + factor]; + ring_index = ring->cons & size_mask; + stamp_index = ring_index; + + /* Process all completed CQEs */ + while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK, + cons_index & size)) { + /* + * make sure we read the CQE after we read the + * ownership bit + */ + rmb(); + + if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == + MLX4_CQE_OPCODE_ERROR)) { + en_err(priv, "CQE completed in error - vendor syndrom: 0x%x syndrom: 0x%x\n", + ((struct mlx4_err_cqe *)cqe)-> + vendor_err_syndrome, + ((struct mlx4_err_cqe *)cqe)->syndrome); + } + + /* Skip over last polled CQE */ + new_index = be16_to_cpu(cqe->wqe_index) & size_mask; - /* - * We use a two-stage loop: - * - the first samples the HW-updated CQE - * - the second frees TXBBs until the last sample - * This lets us amortize CQE cache misses, while still polling the CQ - * until is quiescent. - */ - cq_last_sav = mcq->cons_index; - do { do { - /* Skip over last polled CQE */ - index = (index + ring->last_nr_txbb) & ring->size_mask; txbbs_skipped += ring->last_nr_txbb; - - /* Poll next CQE */ + ring_index = (ring_index + ring->last_nr_txbb) & size_mask; + /* free next descriptor */ ring->last_nr_txbb = mlx4_en_free_tx_desc( - priv, ring, index, - !!((ring->cons + txbbs_skipped) & - ring->size)); - ++mcq->cons_index; - - } while (index != new_index); + priv, ring, ring_index, + !!((ring->cons + txbbs_skipped) & + ring->size), timestamp); + mlx4_en_stamp_wqe(priv, ring, stamp_index, + !!((ring->cons + txbbs_stamp) & + ring->size)); + stamp_index = ring_index; + txbbs_stamp = txbbs_skipped; + packets++; + bytes += ring->tx_info[ring_index].nr_bytes; + } while (ring_index != new_index); + + ++cons_index; + index = cons_index & size_mask; + cqe = &buf[(index << factor) + factor]; + } - new_index = be16_to_cpu(cqe->wqe_index) & ring->size_mask; - } while (index != new_index); - AVG_PERF_COUNTER(priv->pstats.tx_coal_avg, - (u32) (mcq->cons_index - cq_last_sav)); /* * To prevent CQ overflow we first update CQ consumer and only then * the ring consumer. */ + mcq->cons_index = cons_index; mlx4_cq_set_ci(mcq); wmb(); ring->cons += txbbs_skipped; - /* Wakeup Tx queue if this ring stopped it */ - if (unlikely(ring->blocked)) { - if ((u32) (ring->prod - ring->cons) <= - ring->size - HEADROOM - MAX_DESC_TXBBS) { - ring->blocked = 0; - if (atomic_fetchadd_int(&priv->blocked, -1) == 1) - atomic_clear_int(&dev->if_drv_flags, - IFF_DRV_OACTIVE); - priv->port_stats.wake_queue++; - } + /* Wakeup Tx queue if it was stopped and ring is not full */ + if (unlikely(ring->blocked) && + (ring->prod - ring->cons) <= ring->full_size) { + ring->blocked = 0; + if (atomic_fetchadd_int(&priv->blocked, -1) == 1) + atomic_clear_int(&dev->if_drv_flags ,IFF_DRV_OACTIVE); + ring->wake_queue++; + priv->port_stats.wake_queue++; } + return done; } void mlx4_en_tx_irq(struct mlx4_cq *mcq) { struct mlx4_en_cq *cq = container_of(mcq, struct mlx4_en_cq, mcq); struct mlx4_en_priv *priv = netdev_priv(cq->dev); - struct mlx4_en_tx_ring *ring = &priv->tx_ring[cq->ring]; + struct mlx4_en_tx_ring *ring = priv->tx_ring[cq->ring]; if (!spin_trylock(&ring->comp_lock)) return; @@ -421,12 +473,11 @@ void mlx4_en_tx_irq(struct mlx4_cq *mcq) spin_unlock(&ring->comp_lock); } - void mlx4_en_poll_tx_cq(unsigned long data) { struct mlx4_en_cq *cq = (struct mlx4_en_cq *) data; struct mlx4_en_priv *priv = netdev_priv(cq->dev); - struct mlx4_en_tx_ring *ring = &priv->tx_ring[cq->ring]; + struct mlx4_en_tx_ring *ring = priv->tx_ring[cq->ring]; u32 inflight; INC_PERF_COUNTER(priv->pstats.tx_poll); @@ -477,8 +528,8 @@ static struct mlx4_en_tx_desc *mlx4_en_bounce_to_desc(struct mlx4_en_priv *priv, static inline void mlx4_en_xmit_poll(struct mlx4_en_priv *priv, int tx_ind) { - struct mlx4_en_cq *cq = &priv->tx_cq[tx_ind]; - struct mlx4_en_tx_ring *ring = &priv->tx_ring[tx_ind]; + struct mlx4_en_cq *cq = priv->tx_cq[tx_ind]; + struct mlx4_en_tx_ring *ring = priv->tx_ring[tx_ind]; /* If we don't have a pending timer, set one up to catch our recent post in case the interface becomes idle */ @@ -493,14 +544,13 @@ static inline void mlx4_en_xmit_poll(struct mlx4_en_priv *priv, int tx_ind) } } -static int is_inline(struct mbuf *mb) +static int is_inline(struct mbuf *mb, int thold) { - - if (inline_thold && mb->m_pkthdr.len <= inline_thold && - (mb->m_pkthdr.csum_flags & CSUM_TSO) == 0) + if (thold && mb->m_pkthdr.len <= thold && + (mb->m_pkthdr.csum_flags & CSUM_TSO) == 0) return 1; - return 0; + return 0; } static int inline_size(struct mbuf *mb) @@ -519,81 +569,108 @@ static int inline_size(struct mbuf *mb) static int get_head_size(struct mbuf *mb) { - struct tcphdr *th; - struct ip *ip; - int ip_hlen, tcp_hlen; - int len; - - len = ETHER_HDR_LEN; - if (mb->m_len < len + sizeof(struct ip)) + struct ether_vlan_header *eh; + struct tcphdr *th; + struct ip *ip; + int ip_hlen, tcp_hlen; + struct ip6_hdr *ip6; + uint16_t eth_type; + int eth_hdr_len; + + eh = mtod(mb, struct ether_vlan_header *); + if (mb->m_len < ETHER_HDR_LEN) + return (0); + if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { + eth_type = ntohs(eh->evl_proto); + eth_hdr_len = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; + } else { + eth_type = ntohs(eh->evl_encap_proto); + eth_hdr_len = ETHER_HDR_LEN; + } + if (mb->m_len < eth_hdr_len) return (0); - ip = (struct ip *)(mtod(mb, char *) + len); - if (ip->ip_p != IPPROTO_TCP) + switch (eth_type) { + case ETHERTYPE_IP: + ip = (struct ip *)(mb->m_data + eth_hdr_len); + if (mb->m_len < eth_hdr_len + sizeof(*ip)) + return (0); + if (ip->ip_p != IPPROTO_TCP) + return (0); + ip_hlen = ip->ip_hl << 2; + eth_hdr_len += ip_hlen; + break; + case ETHERTYPE_IPV6: + ip6 = (struct ip6_hdr *)(mb->m_data + eth_hdr_len); + if (mb->m_len < eth_hdr_len + sizeof(*ip6)) + return (0); + if (ip6->ip6_nxt != IPPROTO_TCP) + return (0); + eth_hdr_len += sizeof(*ip6); + break; + default: return (0); - ip_hlen = ip->ip_hl << 2; - len += ip_hlen; - if (mb->m_len < len + sizeof(struct tcphdr)) + } + if (mb->m_len < eth_hdr_len + sizeof(*th)) return (0); - th = (struct tcphdr *)(mtod(mb, char *) + len); + th = (struct tcphdr *)(mb->m_data + eth_hdr_len); tcp_hlen = th->th_off << 2; - len += tcp_hlen; - if (mb->m_len < len) + eth_hdr_len += tcp_hlen; + if (mb->m_len < eth_hdr_len) return (0); - return (len); + return (eth_hdr_len); } -static int get_real_size(struct mbuf *mb, struct net_device *dev, int *segsp, - int *lso_header_size) +static int get_real_size(struct mbuf *mb, struct net_device *dev, int *p_n_segs, + int *lso_header_size, int inl) { - struct mbuf *m; - int nr_segs; - - nr_segs = 0; - for (m = mb; m != NULL; m = m->m_next) - if (m->m_len) - nr_segs++; - - if (mb->m_pkthdr.csum_flags & CSUM_TSO) { - *lso_header_size = get_head_size(mb); - if (*lso_header_size) { - if (mb->m_len == *lso_header_size) - nr_segs--; - *segsp = nr_segs; - return CTRL_SIZE + nr_segs * DS_SIZE + - ALIGN(*lso_header_size + 4, DS_SIZE); - } - } else - *lso_header_size = 0; - *segsp = nr_segs; - if (is_inline(mb)) - return inline_size(mb); - return (CTRL_SIZE + nr_segs * DS_SIZE); + struct mbuf *m; + int nr_segs = 0; + + for (m = mb; m != NULL; m = m->m_next) + if (m->m_len) + nr_segs++; + + if (mb->m_pkthdr.csum_flags & CSUM_TSO) { + *lso_header_size = get_head_size(mb); + if (*lso_header_size) { + if (mb->m_len == *lso_header_size) + nr_segs--; + *p_n_segs = nr_segs; + return CTRL_SIZE + nr_segs * DS_SIZE + + ALIGN(*lso_header_size + 4, DS_SIZE); + } + } else + *lso_header_size = 0; + *p_n_segs = nr_segs; + if (inl) + return inline_size(mb); + return (CTRL_SIZE + nr_segs * DS_SIZE); } static struct mbuf *mb_copy(struct mbuf *mb, int *offp, char *data, int len) { - int bytes; - int off; - - off = *offp; - while (len) { - bytes = min(mb->m_len - off, len); - if (bytes) - memcpy(data, mb->m_data + off, bytes); - len -= bytes; - data += bytes; - off += bytes; - if (off == mb->m_len) { - off = 0; - mb = mb->m_next; - } - } - *offp = off; - return (mb); + int bytes; + int off; + + off = *offp; + while (len) { + bytes = min(mb->m_len - off, len); + if (bytes) + memcpy(data, mb->m_data + off, bytes); + len -= bytes; + data += bytes; + off += bytes; + if (off == mb->m_len) { + off = 0; + mb = mb->m_next; + } + } + *offp = off; + return (mb); } static void build_inline_wqe(struct mlx4_en_tx_desc *tx_desc, struct mbuf *mb, - int real_size, u16 *vlan_tag, int tx_ind) + int real_size, u16 *vlan_tag, int tx_ind) { struct mlx4_wqe_inline_seg *inl = &tx_desc->inl; int spc = MLX4_INLINE_ALIGN - CTRL_SIZE - sizeof *inl; @@ -603,8 +680,12 @@ static void build_inline_wqe(struct mlx4_en_tx_desc *tx_desc, struct mbuf *mb, off = 0; len = mb->m_pkthdr.len; if (len <= spc) { - inl->byte_count = cpu_to_be32(1 << 31 | len); + inl->byte_count = cpu_to_be32(1 << 31 | + (max_t(typeof(len), len, MIN_PKT_LEN))); mb_copy(mb, &off, (void *)(inl + 1), len); + if (len < MIN_PKT_LEN) + memset(((void *)(inl + 1)) + len, 0, + MIN_PKT_LEN - len); } else { inl->byte_count = cpu_to_be32(1 << 31 | spc); mb = mb_copy(mb, &off, (void *)(inl + 1), spc); @@ -618,74 +699,50 @@ static void build_inline_wqe(struct mlx4_en_tx_desc *tx_desc, struct mbuf *mb, tx_desc->ctrl.fence_size = (real_size / 16) & 0x3f; } +static unsigned long hashrandom; +static void hashrandom_init(void *arg) +{ + hashrandom = random(); +} +SYSINIT(hashrandom_init, SI_SUB_KLD, SI_ORDER_SECOND, &hashrandom_init, NULL); + u16 mlx4_en_select_queue(struct net_device *dev, struct mbuf *mb) { struct mlx4_en_priv *priv = netdev_priv(dev); - struct mlx4_en_tx_hash_entry *entry; - struct ether_header *eth; - struct tcphdr *th; - struct ip *iph; - u32 hash_index; - int tx_ind = 0; - u16 vlan_tag = 0; - int len; + u32 rings_p_up = priv->num_tx_rings_p_up; + u32 vlan_tag = 0; + u32 up = 0; + u32 queue_index; /* Obtain VLAN information if present */ if (mb->m_flags & M_VLANTAG) { vlan_tag = mb->m_pkthdr.ether_vtag; - /* Set the Tx ring to use according to vlan priority */ - tx_ind = priv->tx_prio_map[vlan_tag >> 13]; - if (tx_ind) - return tx_ind; - } - if (mb->m_len < - ETHER_HDR_LEN + sizeof(struct ip) + sizeof(struct tcphdr)) - return MLX4_EN_NUM_HASH_RINGS; - eth = mtod(mb, struct ether_header *); - /* Hashing is only done for TCP/IP or UDP/IP packets */ - if (be16_to_cpu(eth->ether_type) != ETHERTYPE_IP) - return MLX4_EN_NUM_HASH_RINGS; - len = ETHER_HDR_LEN; - iph = (struct ip *)(mtod(mb, char *) + len); - len += iph->ip_hl << 2; - th = (struct tcphdr *)(mtod(mb, char *) + len); - hash_index = be32_to_cpu(iph->ip_dst.s_addr) & MLX4_EN_TX_HASH_MASK; - switch(iph->ip_p) { - case IPPROTO_UDP: - break; - case IPPROTO_TCP: - if (mb->m_len < len + sizeof(struct tcphdr)) - return MLX4_EN_NUM_HASH_RINGS; - hash_index = - (hash_index ^ be16_to_cpu(th->th_dport ^ th->th_sport)) & - MLX4_EN_TX_HASH_MASK; - break; - default: - return MLX4_EN_NUM_HASH_RINGS; + up = (vlan_tag >> 13); } - entry = &priv->tx_hash[hash_index]; - if(unlikely(!entry->cnt)) { - tx_ind = hash_index & (MLX4_EN_NUM_HASH_RINGS / 2 - 1); - if (2 * entry->small_pkts > entry->big_pkts) - tx_ind += MLX4_EN_NUM_HASH_RINGS / 2; - entry->small_pkts = entry->big_pkts = 0; - entry->ring = tx_ind; - } + /* hash mbuf */ + queue_index = mlx4_en_hashmbuf(MLX4_F_HASHL3 | MLX4_F_HASHL4, mb, hashrandom); - entry->cnt++; - if (mb->m_pkthdr.len > MLX4_EN_SMALL_PKT_SIZE) - entry->big_pkts++; - else - entry->small_pkts++; - return entry->ring; + return ((queue_index % rings_p_up) + (up * rings_p_up)); } -static void mlx4_bf_copy(unsigned long *dst, unsigned long *src, unsigned bytecnt) +static void mlx4_bf_copy(void __iomem *dst, unsigned long *src, unsigned bytecnt) { __iowrite64_copy(dst, src, bytecnt / 8); } +static u64 mlx4_en_mac_to_u64(u8 *addr) +{ + u64 mac = 0; + int i; + + for (i = 0; i < ETHER_ADDR_LEN; i++) { + mac <<= 8; + mac |= addr[i]; + } + return mac; +} + static int mlx4_en_xmit(struct net_device *dev, int tx_ind, struct mbuf **mbp) { struct mlx4_en_priv *priv = netdev_priv(dev); @@ -701,57 +758,73 @@ static int mlx4_en_xmit(struct net_device *dev, int tx_ind, struct mbuf **mbp) int desc_size; int real_size; dma_addr_t dma; - u32 index, bf_index; + u32 index, bf_index, ring_size; __be32 op_own; u16 vlan_tag = 0; int i; int lso_header_size; bool bounce = false; + bool inl = false; struct mbuf *mb; + mb = *mbp; int defrag = 1; - ring = &priv->tx_ring[tx_ind]; - mb = *mbp; if (!priv->port_up) goto tx_drop; + ring = priv->tx_ring[tx_ind]; + ring_size = ring->size; + inl = is_inline(mb, ring->inline_thold); + retry: - real_size = get_real_size(mb, dev, &nr_segs, &lso_header_size); + real_size = get_real_size(mb, dev, &nr_segs, &lso_header_size, inl); if (unlikely(!real_size)) goto tx_drop; - /* Allign descriptor to TXBB size */ + /* Align descriptor to TXBB size */ desc_size = ALIGN(real_size, TXBB_SIZE); nr_txbb = desc_size / TXBB_SIZE; if (unlikely(nr_txbb > MAX_DESC_TXBBS)) { if (defrag) { - mb = m_defrag(*mbp, M_NOWAIT); - if (mb == NULL) { - mb = *mbp; - goto tx_drop; - } - *mbp = mb; - defrag = 0; - goto retry; - } + mb = m_defrag(*mbp, M_NOWAIT); + if (mb == NULL) { + mb = *mbp; + goto tx_drop; + } + *mbp = mb; + defrag = 0; + goto retry; + } + en_warn(priv, "Oversized header or SG list\n"); goto tx_drop; } - /* Check available TXBBs And 2K spare for prefetch */ - if (unlikely(((int)(ring->prod - ring->cons)) > - ring->size - HEADROOM - MAX_DESC_TXBBS)) { + /* Obtain VLAN information if present */ + if (mb->m_flags & M_VLANTAG) { + vlan_tag = mb->m_pkthdr.ether_vtag; + } + + /* Check available TXBBs and 2K spare for prefetch + * Even if netif_tx_stop_queue() will be called + * driver will send current packet to ensure + * that at least one completion will be issued after + * stopping the queue + */ + if (unlikely((int)(ring->prod - ring->cons) > ring->full_size)) { /* every full Tx ring stops queue */ if (ring->blocked == 0) - atomic_add_int(&priv->blocked, 1); + atomic_add_int(&priv->blocked, 1); + /* Set HW-queue-is-full flag */ atomic_set_int(&dev->if_drv_flags, IFF_DRV_OACTIVE); ring->blocked = 1; priv->port_stats.queue_stopped++; + ring->queue_stopped++; /* Use interrupts to find out when queue opened */ - cq = &priv->tx_cq[tx_ind]; + cq = priv->tx_cq[tx_ind]; mlx4_en_arm_cq(priv, cq); return EBUSY; - } + } /* Track current inflight packets for performance analysis */ AVG_PERF_COUNTER(priv->pstats.inflight_avg, @@ -763,128 +836,135 @@ retry: /* See if we have enough space for whole descriptor TXBB for setting * SW ownership on next descriptor; if not, use a bounce buffer. */ - if (likely(index + nr_txbb <= ring->size)) + if (likely(index + nr_txbb <= ring_size)) tx_desc = ring->buf + index * TXBB_SIZE; else { tx_desc = (struct mlx4_en_tx_desc *) ring->bounce_buf; bounce = true; } + /* Save mb in tx_info ring */ + tx_info = &ring->tx_info[index]; + tx_info->mb = mb; + tx_info->nr_txbb = nr_txbb; + tx_info->nr_segs = nr_segs; + + if (lso_header_size) { + memcpy(tx_desc->lso.header, mb->m_data, lso_header_size); + data = ((void *)&tx_desc->lso + ALIGN(lso_header_size + 4, + DS_SIZE)); + /* lso header is part of m_data. + * need to omit when mapping DMA */ + mb->m_data += lso_header_size; + mb->m_len -= lso_header_size; + } + else + data = &tx_desc->data; + + /* valid only for none inline segments */ + tx_info->data_offset = (void *)data - (void *)tx_desc; + + if (inl) { + tx_info->inl = 1; + } else { + for (i = 0, m = mb; i < nr_segs; i++, m = m->m_next) { + if (m->m_len == 0) { + i--; + continue; + } + dma = pci_map_single(mdev->dev->pdev, m->m_data, + m->m_len, PCI_DMA_TODEVICE); + data->addr = cpu_to_be64(dma); + data->lkey = cpu_to_be32(mdev->mr.key); + wmb(); + data->byte_count = cpu_to_be32(m->m_len); + data++; + } + if (lso_header_size) { + mb->m_data -= lso_header_size; + mb->m_len += lso_header_size; + } + tx_info->inl = 0; + } + + /* Prepare ctrl segement apart opcode+ownership, which depends on * whether LSO is used */ - if (mb->m_flags & M_VLANTAG) - vlan_tag = mb->m_pkthdr.ether_vtag; tx_desc->ctrl.vlan_tag = cpu_to_be16(vlan_tag); - tx_desc->ctrl.ins_vlan = MLX4_WQE_CTRL_INS_VLAN * !!vlan_tag; + tx_desc->ctrl.ins_vlan = MLX4_WQE_CTRL_INS_VLAN * + !!vlan_tag; tx_desc->ctrl.fence_size = (real_size / 16) & 0x3f; - tx_desc->ctrl.srcrb_flags = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE | - MLX4_WQE_CTRL_SOLICITED); - if (mb->m_pkthdr.csum_flags & (CSUM_IP|CSUM_TCP|CSUM_UDP)) { - if (mb->m_pkthdr.csum_flags & CSUM_IP) - tx_desc->ctrl.srcrb_flags |= - cpu_to_be32(MLX4_WQE_CTRL_IP_CSUM); - if (mb->m_pkthdr.csum_flags & (CSUM_TCP|CSUM_UDP)) - tx_desc->ctrl.srcrb_flags |= - cpu_to_be32(MLX4_WQE_CTRL_TCP_UDP_CSUM); + tx_desc->ctrl.srcrb_flags = priv->ctrl_flags; + if (mb->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TSO | + CSUM_TCP | CSUM_UDP | CSUM_TCP_IPV6 | CSUM_UDP_IPV6)) { + if (mb->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TSO)) + tx_desc->ctrl.srcrb_flags |= cpu_to_be32(MLX4_WQE_CTRL_IP_CSUM); + if (mb->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP | + CSUM_UDP_IPV6 | CSUM_TCP_IPV6 | CSUM_TSO)) + tx_desc->ctrl.srcrb_flags |= cpu_to_be32(MLX4_WQE_CTRL_TCP_UDP_CSUM); priv->port_stats.tx_chksum_offload++; - } + ring->tx_csum++; + } if (unlikely(priv->validate_loopback)) { /* Copy dst mac address to wqe */ - struct ether_header *ethh; - u64 mac; - u32 mac_l, mac_h; - - ethh = mtod(mb, struct ether_header *); - mac = mlx4_en_mac_to_u64(ethh->ether_dhost); - if (mac) { - mac_h = (u32) ((mac & 0xffff00000000ULL) >> 16); - mac_l = (u32) (mac & 0xffffffff); - tx_desc->ctrl.srcrb_flags |= cpu_to_be32(mac_h); - tx_desc->ctrl.imm = cpu_to_be32(mac_l); - } + struct ether_header *ethh; + u64 mac; + u32 mac_l, mac_h; + + ethh = mtod(mb, struct ether_header *); + mac = mlx4_en_mac_to_u64(ethh->ether_dhost); + if (mac) { + mac_h = (u32) ((mac & 0xffff00000000ULL) >> 16); + mac_l = (u32) (mac & 0xffffffff); + tx_desc->ctrl.srcrb_flags |= cpu_to_be32(mac_h); + tx_desc->ctrl.imm = cpu_to_be32(mac_l); + } } /* Handle LSO (TSO) packets */ if (lso_header_size) { int segsz; - /* Mark opcode as LSO */ op_own = cpu_to_be32(MLX4_OPCODE_LSO | (1 << 6)) | - ((ring->prod & ring->size) ? + ((ring->prod & ring_size) ? cpu_to_be32(MLX4_EN_BIT_DESC_OWN) : 0); /* Fill in the LSO prefix */ tx_desc->lso.mss_hdr_size = cpu_to_be32( mb->m_pkthdr.tso_segsz << 16 | lso_header_size); - /* Copy headers; - * note that we already verified that it is linear */ - memcpy(tx_desc->lso.header, mb->m_data, lso_header_size); - data = ((void *) &tx_desc->lso + - ALIGN(lso_header_size + 4, DS_SIZE)); - - priv->port_stats.tso_packets++; - segsz = mb->m_pkthdr.tso_segsz; - i = ((mb->m_pkthdr.len - lso_header_size) / segsz) + - !!((mb->m_pkthdr.len - lso_header_size) % segsz); - ring->bytes += mb->m_pkthdr.len + (i - 1) * lso_header_size; - ring->packets += i; - mb->m_data += lso_header_size; - mb->m_len -= lso_header_size; + priv->port_stats.tso_packets++; + segsz = mb->m_pkthdr.tso_segsz; + i = ((mb->m_pkthdr.len - lso_header_size + segsz - 1) / segsz); + tx_info->nr_bytes= mb->m_pkthdr.len + (i - 1) * lso_header_size; + ring->packets += i; } else { /* Normal (Non LSO) packet */ op_own = cpu_to_be32(MLX4_OPCODE_SEND) | - ((ring->prod & ring->size) ? + ((ring->prod & ring_size) ? cpu_to_be32(MLX4_EN_BIT_DESC_OWN) : 0); - data = &tx_desc->data; - ring->bytes += max(mb->m_pkthdr.len, - (unsigned int)ETHER_MIN_LEN - ETHER_CRC_LEN); + tx_info->nr_bytes = max(mb->m_pkthdr.len, + (unsigned int)ETHER_MIN_LEN - ETHER_CRC_LEN); ring->packets++; } + ring->bytes += tx_info->nr_bytes; AVG_PERF_COUNTER(priv->pstats.tx_pktsz_avg, mb->m_pkthdr.len); - /* Save mb in tx_info ring */ - tx_info = &ring->tx_info[index]; - tx_info->mb = mb; - tx_info->nr_txbb = nr_txbb; - tx_info->nr_segs = nr_segs; - /* valid only for non inline segments */ - tx_info->data_offset = (void *) data - (void *) tx_desc; - - if (!is_inline(mb)) { - for (i = 0, m = mb; i < nr_segs; i++, m = m->m_next) { - if (m->m_len == 0) { - i--; - continue; - } - dma = pci_map_single(mdev->dev->pdev, m->m_data, - m->m_len, PCI_DMA_TODEVICE); - data->addr = cpu_to_be64(dma); - data->lkey = cpu_to_be32(mdev->mr.key); - wmb(); - data->byte_count = cpu_to_be32(m->m_len); - data++; - } - if (lso_header_size) { - mb->m_data -= lso_header_size; - mb->m_len += lso_header_size; - } - tx_info->inl = 0; - } else { + if (tx_info->inl) { build_inline_wqe(tx_desc, mb, real_size, &vlan_tag, tx_ind); tx_info->inl = 1; } ring->prod += nr_txbb; + /* If we used a bounce buffer then copy descriptor back into place */ - if (bounce) + if (unlikely(bounce)) tx_desc = mlx4_en_bounce_to_desc(priv, ring, index, desc_size); - if (ring->bf_enabled && desc_size <= MAX_BF && !bounce && !vlan_tag) { - *(u32 *) (&tx_desc->ctrl.vlan_tag) |= ring->doorbell_qpn; + *(__be32 *) (&tx_desc->ctrl.vlan_tag) |= cpu_to_be32(ring->doorbell_qpn); op_own |= htonl((bf_index & 0xffff) << 8); /* Ensure new descirptor hits memory * before setting ownership of this descriptor to HW */ @@ -905,19 +985,16 @@ retry: wmb(); tx_desc->ctrl.owner_opcode = op_own; wmb(); - writel(ring->doorbell_qpn, ring->bf.uar->map + MLX4_SEND_DOORBELL); + writel(cpu_to_be32(ring->doorbell_qpn), ring->bf.uar->map + MLX4_SEND_DOORBELL); } return 0; - tx_drop: *mbp = NULL; m_freem(mb); - ring->errors++; return EINVAL; } - static int mlx4_en_transmit_locked(struct ifnet *dev, int tx_ind, struct mbuf *m) { @@ -926,12 +1003,12 @@ mlx4_en_transmit_locked(struct ifnet *dev, int tx_ind, struct mbuf *m) struct mbuf *next; int enqueued, err = 0; - ring = &priv->tx_ring[tx_ind]; + ring = priv->tx_ring[tx_ind]; if ((dev->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING || priv->port_up == 0) { if (m != NULL) err = drbr_enqueue(dev, ring->br, m); - return (err); + return (err); } enqueued = 0; @@ -953,9 +1030,7 @@ mlx4_en_transmit_locked(struct ifnet *dev, int tx_ind, struct mbuf *m) enqueued++; dev->if_obytes += next->m_pkthdr.len; if (next->m_flags & M_MCAST) - dev->if_omcasts++; - /* Send a copy of the frame to the BPF listener */ - ETHER_BPF_MTAP(dev, next); + if_inc_counter(dev, IFCOUNTER_OMCASTS, 1); if ((dev->if_drv_flags & IFF_DRV_RUNNING) == 0) break; } @@ -974,12 +1049,11 @@ mlx4_en_tx_que(void *context, int pending) struct net_device *dev; struct mlx4_en_cq *cq; int tx_ind; - cq = context; dev = cq->dev; priv = dev->if_softc; tx_ind = cq->ring; - ring = &priv->tx_ring[tx_ind]; + ring = priv->tx_ring[tx_ind]; if (dev->if_drv_flags & IFF_DRV_RUNNING) { mlx4_en_xmit_poll(priv, tx_ind); spin_lock(&ring->tx_lock); @@ -998,12 +1072,13 @@ mlx4_en_transmit(struct ifnet *dev, struct mbuf *m) int i = 0, err = 0; /* Which queue to use */ - if ((m->m_flags & (M_FLOWID | M_VLANTAG)) == M_FLOWID) - i = m->m_pkthdr.flowid % (MLX4_EN_NUM_HASH_RINGS - 1); - else + if ((m->m_flags & (M_FLOWID | M_VLANTAG)) == M_FLOWID) { + i = m->m_pkthdr.flowid % (priv->tx_ring_num - 1); + } + else { i = mlx4_en_select_queue(dev, m); - - ring = &priv->tx_ring[i]; + } + ring = priv->tx_ring[i]; if (spin_trylock(&ring->tx_lock)) { err = mlx4_en_transmit_locked(dev, i, m); @@ -1012,7 +1087,7 @@ mlx4_en_transmit(struct ifnet *dev, struct mbuf *m) mlx4_en_xmit_poll(priv, i); } else { err = drbr_enqueue(dev, ring->br, m); - cq = &priv->tx_cq[i]; + cq = priv->tx_cq[i]; taskqueue_enqueue(cq->tq, &cq->cq_task); } @@ -1026,10 +1101,11 @@ void mlx4_en_qflush(struct ifnet *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); - struct mlx4_en_tx_ring *ring = priv->tx_ring; + struct mlx4_en_tx_ring *ring; struct mbuf *m; - for (int i = 0; i < priv->tx_ring_num; i++, ring++) { + for (int i = 0; i < priv->tx_ring_num; i++) { + ring = priv->tx_ring[i]; spin_lock(&ring->tx_lock); while ((m = buf_ring_dequeue_sc(ring->br)) != NULL) m_freem(m); diff --git a/sys/ofed/drivers/net/mlx4/eq.c b/sys/ofed/drivers/net/mlx4/eq.c index b585e8c..31fafbe 100644 --- a/sys/ofed/drivers/net/mlx4/eq.c +++ b/sys/ofed/drivers/net/mlx4/eq.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved. + * Copyright (c) 2005, 2006, 2007, 2008, 2014 Mellanox Technologies. All rights reserved. * Copyright (c) 2005, 2006, 2007 Cisco Systems, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -33,6 +33,7 @@ #include <linux/interrupt.h> #include <linux/slab.h> +#include <linux/module.h> #include <linux/mm.h> #include <linux/dma-mapping.h> @@ -86,6 +87,8 @@ static u64 get_async_ev_mask(struct mlx4_dev *dev) u64 async_ev_mask = MLX4_ASYNC_EVENT_MASK; if (dev->caps.flags & MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV) async_ev_mask |= (1ull << MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT); + if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RECOVERABLE_ERROR_EVENT) + async_ev_mask |= (1ull << MLX4_EVENT_TYPE_RECOVERABLE_ERROR_EVENT); return async_ev_mask; } @@ -147,12 +150,9 @@ void mlx4_gen_slave_eqe(struct work_struct *work) /* All active slaves need to receive the event */ if (slave == ALL_SLAVES) { for (i = 0; i < dev->num_slaves; i++) { - if (i != dev->caps.function && - master->slave_state[i].active) - if (mlx4_GEN_EQE(dev, i, eqe)) - mlx4_warn(dev, "Failed to " - " generate event " - "for slave %d\n", i); + if (mlx4_GEN_EQE(dev, i, eqe)) + mlx4_warn(dev, "Failed to generate " + "event for slave %d\n", i); } } else { if (mlx4_GEN_EQE(dev, slave, eqe)) @@ -197,13 +197,13 @@ static void mlx4_slave_event(struct mlx4_dev *dev, int slave, struct mlx4_eqe *eqe) { struct mlx4_priv *priv = mlx4_priv(dev); - struct mlx4_slave_state *s_slave = - &priv->mfunc.master.slave_state[slave]; - if (!s_slave->active) { - /*mlx4_warn(dev, "Trying to pass event to inactive slave\n");*/ + if (slave < 0 || slave >= dev->num_slaves || + slave == dev->caps.function) + return; + + if (!priv->mfunc.master.slave_state[slave].active) return; - } slave_event(dev, slave, eqe); } @@ -375,7 +375,7 @@ out: EXPORT_SYMBOL(set_and_calc_slave_port_state); -int mlx4_gen_slaves_port_mgt_ev(struct mlx4_dev *dev, u8 port, int attr) +int mlx4_gen_slaves_port_mgt_ev(struct mlx4_dev *dev, u8 port, int attr, u16 sm_lid, u8 sm_sl) { struct mlx4_eqe eqe; @@ -386,6 +386,12 @@ int mlx4_gen_slaves_port_mgt_ev(struct mlx4_dev *dev, u8 port, int attr) eqe.event.port_mgmt_change.port = port; eqe.event.port_mgmt_change.params.port_info.changed_attr = cpu_to_be32((u32) attr); + if (attr & MSTR_SM_CHANGE_MASK) { + eqe.event.port_mgmt_change.params.port_info.mstr_sm_lid = + cpu_to_be16(sm_lid); + eqe.event.port_mgmt_change.params.port_info.mstr_sm_sl = + sm_sl; + } slave_event(dev, ALL_SLAVES, &eqe); return 0; @@ -446,6 +452,7 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) int i; enum slave_port_gen_event gen_event; unsigned long flags; + struct mlx4_vport_state *s_info; while ((eqe = next_eqe_sw(eq, dev->caps.eqe_factor))) { /* @@ -495,8 +502,9 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) break; case MLX4_EVENT_TYPE_SRQ_LIMIT: - mlx4_warn(dev, "%s: MLX4_EVENT_TYPE_SRQ_LIMIT\n", - __func__); + mlx4_dbg(dev, "%s: MLX4_EVENT_TYPE_SRQ_LIMIT\n", + __func__); + /* fall through */ case MLX4_EVENT_TYPE_SRQ_CATAS_ERROR: if (mlx4_is_master(dev)) { /* forward only to slave owning the SRQ */ @@ -513,17 +521,15 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) eq->eqn, eq->cons_index, ret); break; } - mlx4_warn(dev, "%s: slave:%d, srq_no:0x%x," - " event: %02x(%02x)\n", __func__, - slave, - be32_to_cpu(eqe->event.srq.srqn), - eqe->type, eqe->subtype); + mlx4_dbg(dev, "%s: slave:%d, srq_no:0x%x, event: %02x(%02x)\n", + __func__, slave, + be32_to_cpu(eqe->event.srq.srqn), + eqe->type, eqe->subtype); if (!ret && slave != dev->caps.function) { - mlx4_warn(dev, "%s: sending event " - "%02x(%02x) to slave:%d\n", - __func__, eqe->type, - eqe->subtype, slave); + mlx4_dbg(dev, "%s: sending event %02x(%02x) to slave:%d\n", + __func__, eqe->type, + eqe->subtype, slave); mlx4_slave_event(dev, slave, eqe); break; } @@ -554,7 +560,9 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) mlx4_dbg(dev, "%s: Sending MLX4_PORT_CHANGE_SUBTYPE_DOWN" " to slave: %d, port:%d\n", __func__, i, port); - mlx4_slave_event(dev, i, eqe); + s_info = &priv->mfunc.master.vf_oper[slave].vport[port].state; + if (IFLA_VF_LINK_STATE_AUTO == s_info->link_state) + mlx4_slave_event(dev, i, eqe); } else { /* IB port */ set_and_calc_slave_port_state(dev, i, port, MLX4_PORT_STATE_DEV_EVENT_PORT_DOWN, @@ -578,7 +586,9 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) for (i = 0; i < dev->num_slaves; i++) { if (i == mlx4_master_func_num(dev)) continue; - mlx4_slave_event(dev, i, eqe); + s_info = &priv->mfunc.master.vf_oper[slave].vport[port].state; + if (IFLA_VF_LINK_STATE_AUTO == s_info->link_state) + mlx4_slave_event(dev, i, eqe); } else /* IB port */ /* port-up event will be sent to a slave when the @@ -635,11 +645,18 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) "for non master device\n"); break; } + memcpy(&priv->mfunc.master.comm_arm_bit_vector, eqe->event.comm_channel_arm.bit_vec, sizeof eqe->event.comm_channel_arm.bit_vec); - queue_work(priv->mfunc.master.comm_wq, - &priv->mfunc.master.comm_work); + + if (!queue_work(priv->mfunc.master.comm_wq, + &priv->mfunc.master.comm_work)) + mlx4_warn(dev, "Failed to queue comm channel work\n"); + + if (!queue_work(priv->mfunc.master.comm_wq, + &priv->mfunc.master.arm_comm_work)) + mlx4_warn(dev, "Failed to queue arm comm channel work\n"); break; case MLX4_EVENT_TYPE_FLR_EVENT: @@ -704,6 +721,27 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) (unsigned long) eqe); break; + case MLX4_EVENT_TYPE_RECOVERABLE_ERROR_EVENT: + switch (eqe->subtype) { + case MLX4_RECOVERABLE_ERROR_EVENT_SUBTYPE_BAD_CABLE: + mlx4_warn(dev, "Bad cable detected on port %u\n", + eqe->event.bad_cable.port); + break; + case MLX4_RECOVERABLE_ERROR_EVENT_SUBTYPE_UNSUPPORTED_CABLE: + mlx4_warn(dev, "Unsupported cable detected\n"); + break; + default: + mlx4_dbg(dev, "Unhandled recoverable error event " + "detected: %02x(%02x) on EQ %d at index %u. " + "owner=%x, nent=0x%x, ownership=%s\n", + eqe->type, eqe->subtype, eq->eqn, + eq->cons_index, eqe->owner, eq->nent, + !!(eqe->owner & 0x80) ^ + !!(eq->cons_index & eq->nent) ? "HW" : "SW"); + break; + } + break; + case MLX4_EVENT_TYPE_EEC_CATAS_ERROR: case MLX4_EVENT_TYPE_ECC_DETECT: default: @@ -747,7 +785,6 @@ static irqreturn_t mlx4_interrupt(int irq, void *dev_ptr) int work = 0; int i; - writel(priv->eq_table.clr_mask, priv->eq_table.clr_int); for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i) @@ -777,7 +814,7 @@ int mlx4_MAP_EQ_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_slave_event_eq_info *event_eq = priv->mfunc.master.slave_state[slave].event_eq; u32 in_modifier = vhcr->in_modifier; - u32 eqn = in_modifier & 0x1FF; + u32 eqn = in_modifier & 0x3FF; u64 in_param = vhcr->in_param; int err = 0; int i; @@ -956,7 +993,7 @@ err_out_free_mtt: mlx4_mtt_cleanup(dev, &eq->mtt); err_out_free_eq: - mlx4_bitmap_free(&priv->eq_table.bitmap, eq->eqn); + mlx4_bitmap_free(&priv->eq_table.bitmap, eq->eqn, MLX4_USE_RR); err_out_free_pages: for (i = 0; i < npages; ++i) @@ -1011,7 +1048,7 @@ static void mlx4_free_eq(struct mlx4_dev *dev, eq->page_list[i].map); kfree(eq->page_list); - mlx4_bitmap_free(&priv->eq_table.bitmap, eq->eqn); + mlx4_bitmap_free(&priv->eq_table.bitmap, eq->eqn, MLX4_USE_RR); mlx4_free_cmd_mailbox(dev, mailbox); } @@ -1306,7 +1343,7 @@ int mlx4_test_interrupts(struct mlx4_dev *dev) } EXPORT_SYMBOL(mlx4_test_interrupts); -int mlx4_assign_eq(struct mlx4_dev *dev, char *name, int *vector) +int mlx4_assign_eq(struct mlx4_dev *dev, char* name, int * vector) { struct mlx4_priv *priv = mlx4_priv(dev); diff --git a/sys/ofed/drivers/net/mlx4/fw.c b/sys/ofed/drivers/net/mlx4/fw.c index cf079ea..93f7f71 100644 --- a/sys/ofed/drivers/net/mlx4/fw.c +++ b/sys/ofed/drivers/net/mlx4/fw.c @@ -1,6 +1,6 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. - * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved. + * Copyright (c) 2005, 2006, 2007, 2008, 2014 Mellanox Technologies. All rights reserved. * Copyright (c) 2005, 2006, 2007 Cisco Systems, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -32,8 +32,10 @@ * SOFTWARE. */ +#include <linux/etherdevice.h> #include <linux/mlx4/cmd.h> #include <linux/module.h> +#include <linux/cache.h> #include "fw.h" #include "icm.h" @@ -106,6 +108,7 @@ static void dump_dev_cap_flags(struct mlx4_dev *dev, u64 flags) [40] = "UDP RSS support", [41] = "Unicast VEP steering support", [42] = "Multicast VEP steering support", + [44] = "Cross-channel (sync_qp) operations support", [48] = "Counters support", [59] = "Port management change event support", [60] = "eSwitch support", @@ -126,7 +129,18 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags) [0] = "RSS support", [1] = "RSS Toeplitz Hash Function support", [2] = "RSS XOR Hash Function support", - [3] = "Device manage flow steering support" + [3] = "Device manage flow steering support", + [4] = "FSM (MAC unti-spoofing) support", + [5] = "VST (control vlan insertion/stripping) support", + [6] = "Dynamic QP updates support", + [7] = "Loopback source checks support", + [8] = "Device managed flow steering IPoIB support", + [9] = "ETS configuration support", + [10] = "ETH backplane autoneg report", + [11] = "Ethernet Flow control statistics support", + [12] = "Recoverable error events support", + [13] = "Time stamping support", + [14] = "Report driver version to FW support" }; int i; @@ -170,7 +184,7 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_cmd_info *cmd) { struct mlx4_priv *priv = mlx4_priv(dev); - u8 field; + u8 field, port; u32 size; int err = 0; @@ -178,23 +192,32 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, #define QUERY_FUNC_CAP_NUM_PORTS_OFFSET 0x1 #define QUERY_FUNC_CAP_PF_BHVR_OFFSET 0x4 #define QUERY_FUNC_CAP_FMR_OFFSET 0x8 -#define QUERY_FUNC_CAP_QP_QUOTA_OFFSET 0x10 -#define QUERY_FUNC_CAP_CQ_QUOTA_OFFSET 0x14 -#define QUERY_FUNC_CAP_SRQ_QUOTA_OFFSET 0x18 -#define QUERY_FUNC_CAP_MPT_QUOTA_OFFSET 0x20 -#define QUERY_FUNC_CAP_MTT_QUOTA_OFFSET 0x24 -#define QUERY_FUNC_CAP_MCG_QUOTA_OFFSET 0x28 +#define QUERY_FUNC_CAP_QP_QUOTA_OFFSET_DEP 0x10 +#define QUERY_FUNC_CAP_CQ_QUOTA_OFFSET_DEP 0x14 +#define QUERY_FUNC_CAP_SRQ_QUOTA_OFFSET_DEP 0x18 +#define QUERY_FUNC_CAP_MPT_QUOTA_OFFSET_DEP 0x20 +#define QUERY_FUNC_CAP_MTT_QUOTA_OFFSET_DEP 0x24 +#define QUERY_FUNC_CAP_MCG_QUOTA_OFFSET_DEP 0x28 #define QUERY_FUNC_CAP_MAX_EQ_OFFSET 0x2c #define QUERY_FUNC_CAP_RESERVED_EQ_OFFSET 0x30 +#define QUERY_FUNC_CAP_QP_QUOTA_OFFSET 0x50 +#define QUERY_FUNC_CAP_CQ_QUOTA_OFFSET 0x54 +#define QUERY_FUNC_CAP_SRQ_QUOTA_OFFSET 0x58 +#define QUERY_FUNC_CAP_MPT_QUOTA_OFFSET 0x60 +#define QUERY_FUNC_CAP_MTT_QUOTA_OFFSET 0x64 +#define QUERY_FUNC_CAP_MCG_QUOTA_OFFSET 0x68 + #define QUERY_FUNC_CAP_FMR_FLAG 0x80 #define QUERY_FUNC_CAP_FLAG_RDMA 0x40 #define QUERY_FUNC_CAP_FLAG_ETH 0x80 +#define QUERY_FUNC_CAP_FLAG_QUOTAS 0x10 /* when opcode modifier = 1 */ #define QUERY_FUNC_CAP_PHYS_PORT_OFFSET 0x3 -#define QUERY_FUNC_CAP_RDMA_PROPS_OFFSET 0x8 -#define QUERY_FUNC_CAP_ETH_PROPS_OFFSET 0xc +#define QUERY_FUNC_CAP_FLAGS0_OFFSET 0x8 +#define QUERY_FUNC_CAP_FLAGS1_OFFSET 0xc +#define QUERY_FUNC_CAP_COUNTER_INDEX_OFFSET 0xd #define QUERY_FUNC_CAP_QP0_TUNNEL 0x10 #define QUERY_FUNC_CAP_QP0_PROXY 0x14 @@ -203,35 +226,45 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, #define QUERY_FUNC_CAP_ETH_PROPS_FORCE_MAC 0x40 #define QUERY_FUNC_CAP_ETH_PROPS_FORCE_VLAN 0x80 +#define QUERY_FUNC_CAP_PROPS_DEF_COUNTER 0x20 #define QUERY_FUNC_CAP_RDMA_PROPS_FORCE_PHY_WQE_GID 0x80 if (vhcr->op_modifier == 1) { + port = vhcr->in_modifier; /* phys-port = logical-port */ + MLX4_PUT(outbox->buf, port, QUERY_FUNC_CAP_PHYS_PORT_OFFSET); + field = 0; - /* ensure force vlan and force mac bits are not set */ - MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_ETH_PROPS_OFFSET); /* ensure that phy_wqe_gid bit is not set */ - MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_RDMA_PROPS_OFFSET); + MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_FLAGS0_OFFSET); - field = vhcr->in_modifier; /* phys-port = logical-port */ - MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_PHYS_PORT_OFFSET); + /* ensure force vlan and force mac bits are not set + * and that default counter bit is set + */ + field = QUERY_FUNC_CAP_PROPS_DEF_COUNTER; /* def counter */ + MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_FLAGS1_OFFSET); + + /* There is always default counter legal or sink counter */ + field = mlx4_get_default_counter_index(dev, slave, vhcr->in_modifier); + MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_COUNTER_INDEX_OFFSET); /* size is now the QP number */ - size = dev->phys_caps.base_tunnel_sqpn + 8 * slave + field - 1; + size = dev->phys_caps.base_tunnel_sqpn + 8 * slave + port - 1; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP0_TUNNEL); size += 2; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP1_TUNNEL); - size = dev->phys_caps.base_proxy_sqpn + 8 * slave + field - 1; + size = dev->phys_caps.base_proxy_sqpn + 8 * slave + port - 1; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP0_PROXY); size += 2; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP1_PROXY); } else if (vhcr->op_modifier == 0) { - /* enable rdma and ethernet interfaces */ - field = (QUERY_FUNC_CAP_FLAG_ETH | QUERY_FUNC_CAP_FLAG_RDMA); + /* enable rdma and ethernet interfaces, and new quota locations */ + field = (QUERY_FUNC_CAP_FLAG_ETH | QUERY_FUNC_CAP_FLAG_RDMA | + QUERY_FUNC_CAP_FLAG_QUOTAS); MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_FLAGS_OFFSET); field = dev->caps.num_ports; @@ -245,12 +278,18 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, size = priv->mfunc.master.res_tracker.res_alloc[RES_QP].quota[slave]; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP_QUOTA_OFFSET); + size = dev->caps.num_qps; + MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP_QUOTA_OFFSET_DEP); size = priv->mfunc.master.res_tracker.res_alloc[RES_SRQ].quota[slave]; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_SRQ_QUOTA_OFFSET); + size = dev->caps.num_srqs; + MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_SRQ_QUOTA_OFFSET_DEP); size = priv->mfunc.master.res_tracker.res_alloc[RES_CQ].quota[slave]; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_CQ_QUOTA_OFFSET); + size = dev->caps.num_cqs; + MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_CQ_QUOTA_OFFSET_DEP); size = dev->caps.num_eqs; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MAX_EQ_OFFSET); @@ -260,12 +299,17 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, size = priv->mfunc.master.res_tracker.res_alloc[RES_MPT].quota[slave]; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MPT_QUOTA_OFFSET); + size = dev->caps.num_mpts; + MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MPT_QUOTA_OFFSET_DEP); size = priv->mfunc.master.res_tracker.res_alloc[RES_MTT].quota[slave]; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MTT_QUOTA_OFFSET); + size = dev->caps.num_mtts; + MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MTT_QUOTA_OFFSET_DEP); size = dev->caps.num_mgms + dev->caps.num_amgms; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MCG_QUOTA_OFFSET); + MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MCG_QUOTA_OFFSET_DEP); } else err = -EINVAL; @@ -280,7 +324,7 @@ int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u32 gen_or_port, u32 *outbox; u8 field, op_modifier; u32 size; - int err = 0; + int err = 0, quotas = 0; op_modifier = !!gen_or_port; /* 0 = general, 1 = logical port */ @@ -304,6 +348,7 @@ int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u32 gen_or_port, goto out; } func_cap->flags = field; + quotas = !!(func_cap->flags & QUERY_FUNC_CAP_FLAG_QUOTAS); MLX4_GET(field, outbox, QUERY_FUNC_CAP_NUM_PORTS_OFFSET); func_cap->num_ports = field; @@ -311,29 +356,50 @@ int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u32 gen_or_port, MLX4_GET(size, outbox, QUERY_FUNC_CAP_PF_BHVR_OFFSET); func_cap->pf_context_behaviour = size; - MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP_QUOTA_OFFSET); - func_cap->qp_quota = size & 0xFFFFFF; + if (quotas) { + MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP_QUOTA_OFFSET); + func_cap->qp_quota = size & 0xFFFFFF; + + MLX4_GET(size, outbox, QUERY_FUNC_CAP_SRQ_QUOTA_OFFSET); + func_cap->srq_quota = size & 0xFFFFFF; + + MLX4_GET(size, outbox, QUERY_FUNC_CAP_CQ_QUOTA_OFFSET); + func_cap->cq_quota = size & 0xFFFFFF; + + MLX4_GET(size, outbox, QUERY_FUNC_CAP_MPT_QUOTA_OFFSET); + func_cap->mpt_quota = size & 0xFFFFFF; + + MLX4_GET(size, outbox, QUERY_FUNC_CAP_MTT_QUOTA_OFFSET); + func_cap->mtt_quota = size & 0xFFFFFF; + + MLX4_GET(size, outbox, QUERY_FUNC_CAP_MCG_QUOTA_OFFSET); + func_cap->mcg_quota = size & 0xFFFFFF; + + } else { + MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP_QUOTA_OFFSET_DEP); + func_cap->qp_quota = size & 0xFFFFFF; - MLX4_GET(size, outbox, QUERY_FUNC_CAP_SRQ_QUOTA_OFFSET); - func_cap->srq_quota = size & 0xFFFFFF; + MLX4_GET(size, outbox, QUERY_FUNC_CAP_SRQ_QUOTA_OFFSET_DEP); + func_cap->srq_quota = size & 0xFFFFFF; - MLX4_GET(size, outbox, QUERY_FUNC_CAP_CQ_QUOTA_OFFSET); - func_cap->cq_quota = size & 0xFFFFFF; + MLX4_GET(size, outbox, QUERY_FUNC_CAP_CQ_QUOTA_OFFSET_DEP); + func_cap->cq_quota = size & 0xFFFFFF; + MLX4_GET(size, outbox, QUERY_FUNC_CAP_MPT_QUOTA_OFFSET_DEP); + func_cap->mpt_quota = size & 0xFFFFFF; + + MLX4_GET(size, outbox, QUERY_FUNC_CAP_MTT_QUOTA_OFFSET_DEP); + func_cap->mtt_quota = size & 0xFFFFFF; + + MLX4_GET(size, outbox, QUERY_FUNC_CAP_MCG_QUOTA_OFFSET_DEP); + func_cap->mcg_quota = size & 0xFFFFFF; + } MLX4_GET(size, outbox, QUERY_FUNC_CAP_MAX_EQ_OFFSET); func_cap->max_eq = size & 0xFFFFFF; MLX4_GET(size, outbox, QUERY_FUNC_CAP_RESERVED_EQ_OFFSET); func_cap->reserved_eq = size & 0xFFFFFF; - MLX4_GET(size, outbox, QUERY_FUNC_CAP_MPT_QUOTA_OFFSET); - func_cap->mpt_quota = size & 0xFFFFFF; - - MLX4_GET(size, outbox, QUERY_FUNC_CAP_MTT_QUOTA_OFFSET); - func_cap->mtt_quota = size & 0xFFFFFF; - - MLX4_GET(size, outbox, QUERY_FUNC_CAP_MCG_QUOTA_OFFSET); - func_cap->mcg_quota = size & 0xFFFFFF; goto out; } @@ -344,7 +410,7 @@ int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u32 gen_or_port, } if (dev->caps.port_type[gen_or_port] == MLX4_PORT_TYPE_ETH) { - MLX4_GET(field, outbox, QUERY_FUNC_CAP_ETH_PROPS_OFFSET); + MLX4_GET(field, outbox, QUERY_FUNC_CAP_FLAGS1_OFFSET); if (field & QUERY_FUNC_CAP_ETH_PROPS_FORCE_VLAN) { mlx4_err(dev, "VLAN is enforced on this port\n"); err = -EPROTONOSUPPORT; @@ -357,7 +423,7 @@ int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u32 gen_or_port, goto out; } } else if (dev->caps.port_type[gen_or_port] == MLX4_PORT_TYPE_IB) { - MLX4_GET(field, outbox, QUERY_FUNC_CAP_RDMA_PROPS_OFFSET); + MLX4_GET(field, outbox, QUERY_FUNC_CAP_FLAGS0_OFFSET); if (field & QUERY_FUNC_CAP_RDMA_PROPS_FORCE_PHY_WQE_GID) { mlx4_err(dev, "phy_wqe_gid is " "enforced on this ib port\n"); @@ -373,6 +439,14 @@ int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u32 gen_or_port, goto out; } + MLX4_GET(field, outbox, QUERY_FUNC_CAP_FLAGS1_OFFSET); + if (field & QUERY_FUNC_CAP_PROPS_DEF_COUNTER) { + MLX4_GET(field, outbox, QUERY_FUNC_CAP_COUNTER_INDEX_OFFSET); + func_cap->def_counter_index = field; + } else { + func_cap->def_counter_index = MLX4_SINK_COUNTER_INDEX; + } + MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP0_TUNNEL); func_cap->qp0_tunnel_qpn = size & 0xFFFFFF; @@ -466,7 +540,10 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) #define QUERY_DEV_CAP_MAX_XRC_OFFSET 0x67 #define QUERY_DEV_CAP_MAX_BASIC_COUNTERS_OFFSET 0x68 #define QUERY_DEV_CAP_MAX_EXTENDED_COUNTERS_OFFSET 0x6c +#define QUERY_DEV_CAP_PORT_FLOWSTATS_COUNTERS_OFFSET 0x70 #define QUERY_DEV_CAP_FLOW_STEERING_RANGE_EN_OFFSET 0x76 +#define QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET 0x70 +#define QUERY_DEV_CAP_FLOW_STEERING_IPOIB_OFFSET 0x74 #define QUERY_DEV_CAP_FLOW_STEERING_MAX_QP_OFFSET 0x77 #define QUERY_DEV_CAP_RDMARC_ENTRY_SZ_OFFSET 0x80 #define QUERY_DEV_CAP_QPC_ENTRY_SZ_OFFSET 0x82 @@ -480,6 +557,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) #define QUERY_DEV_CAP_D_MPT_ENTRY_SZ_OFFSET 0x92 #define QUERY_DEV_CAP_BMME_FLAGS_OFFSET 0x94 #define QUERY_DEV_CAP_RSVD_LKEY_OFFSET 0x98 +#define QUERY_DEV_CAP_ETS_CFG_OFFSET 0x9c #define QUERY_DEV_CAP_MAX_ICM_SZ_OFFSET 0xa0 dev_cap->flags2 = 0; @@ -551,16 +629,23 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev_cap->num_ports = field & 0xf; MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_MSG_SZ_OFFSET); dev_cap->max_msg_sz = 1 << (field & 0x1f); + MLX4_GET(field, outbox, QUERY_DEV_CAP_PORT_FLOWSTATS_COUNTERS_OFFSET); + if (field & 0x10) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_FLOWSTATS_EN; MLX4_GET(field, outbox, QUERY_DEV_CAP_FLOW_STEERING_RANGE_EN_OFFSET); if (field & 0x80) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_FS_EN; + MLX4_GET(field, outbox, QUERY_DEV_CAP_FLOW_STEERING_IPOIB_OFFSET); + if (field & 0x80) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_DMFS_IPOIB; dev_cap->fs_log_max_ucast_qp_range_size = field & 0x1f; MLX4_GET(field, outbox, QUERY_DEV_CAP_FLOW_STEERING_MAX_QP_OFFSET); dev_cap->fs_max_num_qp_per_entry = field; MLX4_GET(stat_rate, outbox, QUERY_DEV_CAP_RATE_SUPPORT_OFFSET); dev_cap->stat_rate_support = stat_rate; MLX4_GET(field, outbox, QUERY_DEV_CAP_CQ_TS_SUPPORT_OFFSET); - dev_cap->timestamp_support = field & 0x80; + if (field & 0x80) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_TS; MLX4_GET(ext_flags, outbox, QUERY_DEV_CAP_EXT_FLAGS_OFFSET); MLX4_GET(flags, outbox, QUERY_DEV_CAP_FLAGS_OFFSET); dev_cap->flags = flags | (u64)ext_flags << 32; @@ -644,6 +729,16 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) QUERY_DEV_CAP_BMME_FLAGS_OFFSET); MLX4_GET(dev_cap->reserved_lkey, outbox, QUERY_DEV_CAP_RSVD_LKEY_OFFSET); + MLX4_GET(field32, outbox, QUERY_DEV_CAP_ETS_CFG_OFFSET); + if (field32 & (1 << 0)) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_ETH_BACKPL_AN_REP; + if (field32 & (1 << 7)) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_RECOVERABLE_ERROR_EVENT; + if (field32 & (1 << 8)) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_DRIVER_VERSION_TO_FW; + if (field32 & (1 << 13)) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_ETS_CFG; + MLX4_GET(dev_cap->max_icm_sz, outbox, QUERY_DEV_CAP_MAX_ICM_SZ_OFFSET); if (dev_cap->flags & MLX4_DEV_CAP_FLAG_COUNTERS) @@ -655,6 +750,16 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) MLX4_GET(dev_cap->max_extended_counters, outbox, QUERY_DEV_CAP_MAX_EXTENDED_COUNTERS_OFFSET); + MLX4_GET(field32, outbox, QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET); + if (field32 & (1 << 16)) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_UPDATE_QP; + if (field32 & (1 << 19)) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_LB_SRC_CHK; + if (field32 & (1 << 20)) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_FSM; + if (field32 & (1 << 26)) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_VLAN_CONTROL; + if (dev->flags & MLX4_FLAG_OLD_PORT_CMDS) { for (i = 1; i <= dev_cap->num_ports; ++i) { MLX4_GET(field, outbox, QUERY_DEV_CAP_VL_PORT_OFFSET); @@ -786,6 +891,14 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave, field &= 0x7f; MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_BF_OFFSET); + /* turn off device-managed steering capability if not enabled */ + if (dev->caps.steering_mode != MLX4_STEERING_MODE_DEVICE_MANAGED) { + MLX4_GET(field, outbox->buf, + QUERY_DEV_CAP_FLOW_STEERING_RANGE_EN_OFFSET); + field &= 0x7f; + MLX4_PUT(outbox->buf, field, + QUERY_DEV_CAP_FLOW_STEERING_RANGE_EN_OFFSET); + } return 0; } @@ -800,8 +913,10 @@ int mlx4_QUERY_PORT_wrapper(struct mlx4_dev *dev, int slave, u8 port_type; u16 short_field; int err; + int admin_link_state; #define MLX4_VF_PORT_NO_LINK_SENSE_MASK 0xE0 +#define MLX4_PORT_LINK_UP_MASK 0x80 #define QUERY_PORT_CUR_MAX_PKEY_OFFSET 0x0c #define QUERY_PORT_CUR_MAX_GID_OFFSET 0x0e @@ -810,12 +925,8 @@ int mlx4_QUERY_PORT_wrapper(struct mlx4_dev *dev, int slave, MLX4_CMD_NATIVE); if (!err && dev->caps.function != slave) { - /* set slave default_mac address */ - MLX4_GET(def_mac, outbox->buf, QUERY_PORT_MAC_OFFSET); - def_mac += slave << 8; - /* if config MAC in DB use it */ - if (priv->mfunc.master.vf_oper[slave].vport[vhcr->in_modifier].state.mac) - def_mac = priv->mfunc.master.vf_oper[slave].vport[vhcr->in_modifier].state.mac; + /* set slave default_mac address to be zero MAC */ + def_mac = priv->mfunc.master.vf_oper[slave].vport[vhcr->in_modifier].state.mac; MLX4_PUT(outbox->buf, def_mac, QUERY_PORT_MAC_OFFSET); /* get port type - currently only eth is enabled */ @@ -827,6 +938,12 @@ int mlx4_QUERY_PORT_wrapper(struct mlx4_dev *dev, int slave, /* set port type to currently operating port type */ port_type |= (dev->caps.port_type[vhcr->in_modifier] & 0x3); + admin_link_state = priv->mfunc.master.vf_oper[slave].vport[vhcr->in_modifier].state.link_state; + if (IFLA_VF_LINK_STATE_ENABLE == admin_link_state) + port_type |= MLX4_PORT_LINK_UP_MASK; + else if (IFLA_VF_LINK_STATE_DISABLE == admin_link_state) + port_type &= ~MLX4_PORT_LINK_UP_MASK; + MLX4_PUT(outbox->buf, port_type, QUERY_PORT_SUPPORTED_TYPE_OFFSET); @@ -1078,14 +1195,14 @@ int mlx4_QUERY_FW(struct mlx4_dev *dev) MLX4_GET(fw->comm_bar, outbox, QUERY_FW_COMM_BAR_OFFSET); fw->comm_bar = (fw->comm_bar >> 6) * 2; mlx4_dbg(dev, "Communication vector bar:%d offset:0x%llx\n", - fw->comm_bar, (long long)fw->comm_base); + fw->comm_bar, (unsigned long long)fw->comm_base); mlx4_dbg(dev, "FW size %d KB\n", fw->fw_pages >> 2); MLX4_GET(fw->clock_offset, outbox, QUERY_FW_CLOCK_OFFSET); MLX4_GET(fw->clock_bar, outbox, QUERY_FW_CLOCK_BAR); fw->clock_bar = (fw->clock_bar >> 6) * 2; mlx4_dbg(dev, "Internal clock bar:%d offset:0x%llx\n", - fw->comm_bar, (long long)fw->comm_base); + fw->comm_bar, (unsigned long long)fw->comm_base); /* * Round up number of system pages needed in case @@ -1127,7 +1244,7 @@ int mlx4_QUERY_FW_wrapper(struct mlx4_dev *dev, int slave, return 0; } -static void get_board_id(void *vsd, char *board_id) +static void get_board_id(void *vsd, char *board_id, char *vsdstr) { int i; @@ -1135,9 +1252,16 @@ static void get_board_id(void *vsd, char *board_id) #define VSD_OFFSET_SIG2 0xde #define VSD_OFFSET_MLX_BOARD_ID 0xd0 #define VSD_OFFSET_TS_BOARD_ID 0x20 +#define VSD_LEN 0xd0 #define VSD_SIGNATURE_TOPSPIN 0x5ad + memset(vsdstr, 0, MLX4_VSD_LEN); + + for (i = 0; i < VSD_LEN / 4; i++) + ((u32 *)vsdstr)[i] = + swab32(*(u32 *)(vsd + i * 4)); + memset(board_id, 0, MLX4_BOARD_ID_LEN); if (be16_to_cpup(vsd + VSD_OFFSET_SIG1) == VSD_SIGNATURE_TOPSPIN && @@ -1164,6 +1288,7 @@ int mlx4_QUERY_ADAPTER(struct mlx4_dev *dev, struct mlx4_adapter *adapter) #define QUERY_ADAPTER_OUT_SIZE 0x100 #define QUERY_ADAPTER_INTA_PIN_OFFSET 0x10 #define QUERY_ADAPTER_VSD_OFFSET 0x20 +#define QUERY_ADAPTER_VSD_VENDOR_ID_OFFSET 0x1e mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) @@ -1177,8 +1302,11 @@ int mlx4_QUERY_ADAPTER(struct mlx4_dev *dev, struct mlx4_adapter *adapter) MLX4_GET(adapter->inta_pin, outbox, QUERY_ADAPTER_INTA_PIN_OFFSET); + adapter->vsd_vendor_id = be16_to_cpup((u16 *)outbox + + QUERY_ADAPTER_VSD_VENDOR_ID_OFFSET / 2); + get_board_id(outbox + QUERY_ADAPTER_VSD_OFFSET / 4, - adapter->board_id); + adapter->board_id, adapter->vsd); out: mlx4_free_cmd_mailbox(dev, mailbox); @@ -1189,13 +1317,16 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param) { struct mlx4_cmd_mailbox *mailbox; __be32 *inbox; + u32 mw_enable; int err; #define INIT_HCA_IN_SIZE 0x200 +#define INIT_HCA_DRV_NAME_FOR_FW_MAX_SIZE 64 #define INIT_HCA_VERSION_OFFSET 0x000 #define INIT_HCA_VERSION 2 #define INIT_HCA_CACHELINE_SZ_OFFSET 0x0e #define INIT_HCA_FLAGS_OFFSET 0x014 +#define INIT_HCA_RECOVERABLE_ERROR_EVENT_OFFSET 0x018 #define INIT_HCA_QPC_OFFSET 0x020 #define INIT_HCA_QPC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x10) #define INIT_HCA_LOG_QP_OFFSET (INIT_HCA_QPC_OFFSET + 0x17) @@ -1217,6 +1348,7 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param) #define INIT_HCA_UC_STEERING_OFFSET (INIT_HCA_MCAST_OFFSET + 0x18) #define INIT_HCA_LOG_MC_TABLE_SZ_OFFSET (INIT_HCA_MCAST_OFFSET + 0x1b) #define INIT_HCA_DEVICE_MANAGED_FLOW_STEERING_EN 0x6 +#define INIT_HCA_DRIVER_VERSION_OFFSET 0x140 #define INIT_HCA_FS_PARAM_OFFSET 0x1d0 #define INIT_HCA_FS_BASE_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x00) #define INIT_HCA_FS_LOG_ENTRY_SZ_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x12) @@ -1227,6 +1359,8 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param) #define INIT_HCA_FS_IB_NUM_ADDRS_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x26) #define INIT_HCA_TPT_OFFSET 0x0f0 #define INIT_HCA_DMPT_BASE_OFFSET (INIT_HCA_TPT_OFFSET + 0x00) +#define INIT_HCA_TPT_MW_OFFSET (INIT_HCA_TPT_OFFSET + 0x08) +#define INIT_HCA_TPT_MW_ENABLE (1 << 31) #define INIT_HCA_LOG_MPT_SZ_OFFSET (INIT_HCA_TPT_OFFSET + 0x0b) #define INIT_HCA_MTT_BASE_OFFSET (INIT_HCA_TPT_OFFSET + 0x10) #define INIT_HCA_CMPT_BASE_OFFSET (INIT_HCA_TPT_OFFSET + 0x18) @@ -1244,7 +1378,7 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param) *((u8 *) mailbox->buf + INIT_HCA_VERSION_OFFSET) = INIT_HCA_VERSION; *((u8 *) mailbox->buf + INIT_HCA_CACHELINE_SZ_OFFSET) = - ((ilog2(CACHE_LINE_SIZE) - 4) << 5) | (1 << 4); + ((ilog2(cache_line_size()) - 4) << 5) | (1 << 4); #if defined(__LITTLE_ENDIAN) *(inbox + INIT_HCA_FLAGS_OFFSET / 4) &= ~cpu_to_be32(1 << 1); @@ -1290,6 +1424,17 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param) dev->caps.cqe_size = 32; } + if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RECOVERABLE_ERROR_EVENT) + *(inbox + INIT_HCA_RECOVERABLE_ERROR_EVENT_OFFSET / 4) |= cpu_to_be32(1 << 31); + + if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DRIVER_VERSION_TO_FW) { + strncpy((u8 *)mailbox->buf + INIT_HCA_DRIVER_VERSION_OFFSET, + DRV_NAME_FOR_FW, + INIT_HCA_DRV_NAME_FOR_FW_MAX_SIZE - 1); + mlx4_dbg(dev, "Reporting Driver Version to FW: %s\n", + (u8 *)mailbox->buf + INIT_HCA_DRIVER_VERSION_OFFSET); + } + /* QPC/EEC/CQC/EQC/RDMARC attributes */ MLX4_PUT(inbox, param->qpc_base, INIT_HCA_QPC_BASE_OFFSET); @@ -1339,15 +1484,16 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param) INIT_HCA_LOG_MC_HASH_SZ_OFFSET); MLX4_PUT(inbox, param->log_mc_table_sz, INIT_HCA_LOG_MC_TABLE_SZ_OFFSET); - if (dev->caps.steering_mode == MLX4_STEERING_MODE_B0) { + if (dev->caps.steering_mode == MLX4_STEERING_MODE_B0) MLX4_PUT(inbox, (u8) (1 << 3), INIT_HCA_UC_STEERING_OFFSET); - } } /* TPT attributes */ MLX4_PUT(inbox, param->dmpt_base, INIT_HCA_DMPT_BASE_OFFSET); + mw_enable = param->mw_enable ? INIT_HCA_TPT_MW_ENABLE : 0; + MLX4_PUT(inbox, mw_enable, INIT_HCA_TPT_MW_OFFSET); MLX4_PUT(inbox, param->log_mpt_sz, INIT_HCA_LOG_MPT_SZ_OFFSET); MLX4_PUT(inbox, param->mtt_base, INIT_HCA_MTT_BASE_OFFSET); MLX4_PUT(inbox, param->cmpt_base, INIT_HCA_CMPT_BASE_OFFSET); @@ -1373,6 +1519,7 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox; __be32 *outbox; u32 dword_field; + u32 mw_enable; int err; u8 byte_field; @@ -1414,13 +1561,12 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev, param->steering_mode = MLX4_STEERING_MODE_DEVICE_MANAGED; } else { MLX4_GET(byte_field, outbox, INIT_HCA_UC_STEERING_OFFSET); - if (byte_field & 0x8) { + if (byte_field & 0x8) param->steering_mode = MLX4_STEERING_MODE_B0; - } - else { + else param->steering_mode = MLX4_STEERING_MODE_A0; - } } + /* steering attributes */ if (param->steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) { MLX4_GET(param->mc_base, outbox, INIT_HCA_FS_BASE_OFFSET); MLX4_GET(param->log_mc_entry_sz, outbox, @@ -1447,6 +1593,9 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev, /* TPT attributes */ MLX4_GET(param->dmpt_base, outbox, INIT_HCA_DMPT_BASE_OFFSET); + MLX4_GET(mw_enable, outbox, INIT_HCA_TPT_MW_OFFSET); + param->mw_enable = (mw_enable & INIT_HCA_TPT_MW_ENABLE) == + INIT_HCA_TPT_MW_ENABLE; MLX4_GET(param->log_mpt_sz, outbox, INIT_HCA_LOG_MPT_SZ_OFFSET); MLX4_GET(param->mtt_base, outbox, INIT_HCA_MTT_BASE_OFFSET); MLX4_GET(param->cmpt_base, outbox, INIT_HCA_CMPT_BASE_OFFSET); @@ -1682,6 +1831,15 @@ out: } EXPORT_SYMBOL_GPL(mlx4_query_diag_counters); +int mlx4_MOD_STAT_CFG_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + return -EPERM; +} + #define MLX4_WOL_SETUP_MODE (5 << 28) int mlx4_wol_read(struct mlx4_dev *dev, u64 *config, int port) { diff --git a/sys/ofed/drivers/net/mlx4/fw.h b/sys/ofed/drivers/net/mlx4/fw.h index 5fe7782..bff7440 100644 --- a/sys/ofed/drivers/net/mlx4/fw.h +++ b/sys/ofed/drivers/net/mlx4/fw.h @@ -1,6 +1,6 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. - * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved. + * Copyright (c) 2005, 2006, 2007, 2008, 2014 Mellanox Technologies. All rights reserved. * Copyright (c) 2006, 2007 Cisco Systems. All rights reserved. * * This software is available to you under a choice of one of two @@ -143,10 +143,13 @@ struct mlx4_func_cap { u32 qp1_proxy_qpn; u8 physical_port; u8 port_flags; + u8 def_counter_index; }; struct mlx4_adapter { + u16 vsd_vendor_id; char board_id[MLX4_BOARD_ID_LEN]; + char vsd[MLX4_VSD_LEN]; u8 inta_pin; }; @@ -175,6 +178,8 @@ struct mlx4_init_hca_param { u8 log_mpt_sz; u8 log_uar_sz; u8 uar_page_sz; /* log pg sz in 4k chunks */ + u8 mw_enable; /* Enable memory windows */ + u8 fs_hash_enable_bits; u8 steering_mode; /* for QUERY_HCA */ u64 dev_cap_enabled; }; diff --git a/sys/ofed/drivers/net/mlx4/icm.c b/sys/ofed/drivers/net/mlx4/icm.c index d18fde1..25ae7b7 100644 --- a/sys/ofed/drivers/net/mlx4/icm.c +++ b/sys/ofed/drivers/net/mlx4/icm.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved. + * Copyright (c) 2005, 2006, 2007, 2008, 2014 Mellanox Technologies. All rights reserved. * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -35,6 +35,7 @@ #include <linux/mm.h> #include <linux/scatterlist.h> #include <linux/slab.h> +#include <linux/math64.h> #include <linux/mlx4/cmd.h> @@ -288,10 +289,14 @@ void mlx4_table_put(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj) if (--table->icm[i]->refcount == 0) { offset = (u64) i * MLX4_TABLE_CHUNK_SIZE; - mlx4_UNMAP_ICM(dev, table->virt + offset, - MLX4_TABLE_CHUNK_SIZE / MLX4_ICM_PAGE_SIZE); - mlx4_free_icm(dev, table->icm[i], table->coherent); - table->icm[i] = NULL; + + if (!mlx4_UNMAP_ICM(dev, table->virt + offset, + MLX4_TABLE_CHUNK_SIZE / MLX4_ICM_PAGE_SIZE)) { + mlx4_free_icm(dev, table->icm[i], table->coherent); + table->icm[i] = NULL; + } else { + pr_warn("mlx4_core: mlx4_UNMAP_ICM failed.\n"); + } } mutex_unlock(&table->mutex); @@ -378,7 +383,7 @@ void mlx4_table_put_range(struct mlx4_dev *dev, struct mlx4_icm_table *table, } int mlx4_init_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table, - u64 virt, int obj_size, u32 nobj, int reserved, + u64 virt, int obj_size, u64 nobj, int reserved, int use_lowmem, int use_coherent) { int obj_per_chunk; @@ -388,7 +393,7 @@ int mlx4_init_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table, u64 size; obj_per_chunk = MLX4_TABLE_CHUNK_SIZE / obj_size; - num_icm = (nobj + obj_per_chunk - 1) / obj_per_chunk; + num_icm = div_u64((nobj + obj_per_chunk - 1), obj_per_chunk); table->icm = kcalloc(num_icm, sizeof *table->icm, GFP_KERNEL); if (!table->icm) @@ -431,11 +436,15 @@ int mlx4_init_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table, err: for (i = 0; i < num_icm; ++i) if (table->icm[i]) { - mlx4_UNMAP_ICM(dev, virt + i * MLX4_TABLE_CHUNK_SIZE, - MLX4_TABLE_CHUNK_SIZE / MLX4_ICM_PAGE_SIZE); - mlx4_free_icm(dev, table->icm[i], use_coherent); + if (!mlx4_UNMAP_ICM(dev, + virt + i * MLX4_TABLE_CHUNK_SIZE, + MLX4_TABLE_CHUNK_SIZE / MLX4_ICM_PAGE_SIZE)) { + mlx4_free_icm(dev, table->icm[i], use_coherent); + } else { + pr_warn("mlx4_core: mlx4_UNMAP_ICM failed.\n"); + return -ENOMEM; + } } - kfree(table->icm); return -ENOMEM; @@ -443,14 +452,22 @@ err: void mlx4_cleanup_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table) { - int i; + int i, err = 0; for (i = 0; i < table->num_icm; ++i) if (table->icm[i]) { - mlx4_UNMAP_ICM(dev, table->virt + i * MLX4_TABLE_CHUNK_SIZE, - MLX4_TABLE_CHUNK_SIZE / MLX4_ICM_PAGE_SIZE); - mlx4_free_icm(dev, table->icm[i], table->coherent); + err = mlx4_UNMAP_ICM(dev, + table->virt + i * MLX4_TABLE_CHUNK_SIZE, + MLX4_TABLE_CHUNK_SIZE / MLX4_ICM_PAGE_SIZE); + if (!err) { + mlx4_free_icm(dev, table->icm[i], + table->coherent); + } else { + pr_warn("mlx4_core: mlx4_UNMAP_ICM failed.\n"); + break; + } } - kfree(table->icm); + if (!err) + kfree(table->icm); } diff --git a/sys/ofed/drivers/net/mlx4/icm.h b/sys/ofed/drivers/net/mlx4/icm.h index f83ad81..f7a2537 100644 --- a/sys/ofed/drivers/net/mlx4/icm.h +++ b/sys/ofed/drivers/net/mlx4/icm.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved. + * Copyright (c) 2005, 2006, 2007, 2008, 2014 Mellanox Technologies. All rights reserved. * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -37,6 +37,7 @@ #include <linux/list.h> #include <linux/pci.h> #include <linux/mutex.h> +#include <linux/scatterlist.h> #define MLX4_ICM_CHUNK_LEN \ ((256 - sizeof (struct list_head) - 2 * sizeof (int)) / \ @@ -78,7 +79,7 @@ int mlx4_table_get_range(struct mlx4_dev *dev, struct mlx4_icm_table *table, void mlx4_table_put_range(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 start, u32 end); int mlx4_init_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table, - u64 virt, int obj_size, u32 nobj, int reserved, + u64 virt, int obj_size, u64 nobj, int reserved, int use_lowmem, int use_coherent); void mlx4_cleanup_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table); void *mlx4_table_find(struct mlx4_icm_table *table, u32 obj, dma_addr_t *dma_handle); @@ -122,5 +123,7 @@ static inline unsigned long mlx4_icm_size(struct mlx4_icm_iter *iter) return sg_dma_len(&iter->chunk->mem[iter->page_idx]); } +int mlx4_MAP_ICM_AUX(struct mlx4_dev *dev, struct mlx4_icm *icm); +int mlx4_UNMAP_ICM_AUX(struct mlx4_dev *dev); #endif /* MLX4_ICM_H */ diff --git a/sys/ofed/drivers/net/mlx4/intf.c b/sys/ofed/drivers/net/mlx4/intf.c index 0f6754b..1416298 100644 --- a/sys/ofed/drivers/net/mlx4/intf.c +++ b/sys/ofed/drivers/net/mlx4/intf.c @@ -1,6 +1,6 @@ /* * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. + * Copyright (c) 2007, 2008, 2014 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -32,6 +32,7 @@ */ #include <linux/slab.h> +#include <linux/module.h> #include "mlx4.h" @@ -160,7 +161,7 @@ void mlx4_unregister_device(struct mlx4_dev *dev) list_for_each_entry(intf, &intf_list, list) mlx4_remove_device(intf, priv); - list_del(&priv->dev_list); + list_del_init(&priv->dev_list); mutex_unlock(&intf_mutex); } diff --git a/sys/ofed/drivers/net/mlx4/main.c b/sys/ofed/drivers/net/mlx4/main.c index 5fe77d6..12c5836 100644 --- a/sys/ofed/drivers/net/mlx4/main.c +++ b/sys/ofed/drivers/net/mlx4/main.c @@ -1,7 +1,7 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. - * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved. + * Copyright (c) 2005, 2006, 2007, 2008, 2014 Mellanox Technologies. All rights reserved. * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -33,6 +33,11 @@ * SOFTWARE. */ +#include <linux/kmod.h> +/* + * kmod.h must be included before module.h since it includes (indirectly) sys/module.h + * To use the FBSD macro sys/module.h should define MODULE_VERSION before linux/module does. +*/ #include <linux/module.h> #include <linux/errno.h> #include <linux/pci.h> @@ -41,6 +46,7 @@ #include <linux/io-mapping.h> #include <linux/delay.h> #include <linux/netdevice.h> +#include <linux/string.h> #include <linux/fs.h> #include <linux/mlx4/device.h> @@ -49,11 +55,11 @@ #include "mlx4.h" #include "fw.h" #include "icm.h" +#include "mlx4_stats.h" MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("Mellanox ConnectX HCA low-level driver"); MODULE_LICENSE("Dual BSD/GPL"); -MODULE_VERSION(DRV_VERSION); struct workqueue_struct *mlx4_wq; @@ -69,7 +75,7 @@ MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0"); static int msi_x = 1; module_param(msi_x, int, 0444); -MODULE_PARM_DESC(msi_x, "attempt to use MSI-X if nonzero"); +MODULE_PARM_DESC(msi_x, "0 - don't use MSI-X, 1 - use MSI-X, >1 - limit number of MSI-X irqs to msi_x (non-SRIOV only)"); #else /* CONFIG_PCI_MSI */ @@ -85,14 +91,60 @@ int mlx4_blck_lb = 1; module_param_named(block_loopback, mlx4_blck_lb, int, 0644); MODULE_PARM_DESC(block_loopback, "Block multicast loopback packets if > 0 " "(default: 1)"); +enum { + DEFAULT_DOMAIN = 0, + BDF_STR_SIZE = 8, /* bb:dd.f- */ + DBDF_STR_SIZE = 13 /* mmmm:bb:dd.f- */ +}; -static int num_vfs; -module_param(num_vfs, int, 0444); -MODULE_PARM_DESC(num_vfs, "enable #num_vfs functions if num_vfs > 0"); +enum { + NUM_VFS, + PROBE_VF, + PORT_TYPE_ARRAY +}; -static int probe_vf; -module_param(probe_vf, int, 0644); -MODULE_PARM_DESC(probe_vf, "number of vfs to probe by pf driver (num_vfs > 0)"); +enum { + VALID_DATA, + INVALID_DATA, + INVALID_STR +}; + +struct param_data { + int id; + struct mlx4_dbdf2val_lst dbdf2val; +}; + +static struct param_data num_vfs = { + .id = NUM_VFS, + .dbdf2val = { + .name = "num_vfs param", + .num_vals = 1, + .def_val = {0}, + .range = {0, MLX4_MAX_NUM_VF} + } +}; +module_param_string(num_vfs, num_vfs.dbdf2val.str, + sizeof(num_vfs.dbdf2val.str), 0444); +MODULE_PARM_DESC(num_vfs, + "Either single value (e.g. '5') to define uniform num_vfs value for all devices functions\n" + "\t\tor a string to map device function numbers to their num_vfs values (e.g. '0000:04:00.0-5,002b:1c:0b.a-15').\n" + "\t\tHexadecimal digits for the device function (e.g. 002b:1c:0b.a) and decimal for num_vfs value (e.g. 15)."); + +static struct param_data probe_vf = { + .id = PROBE_VF, + .dbdf2val = { + .name = "probe_vf param", + .num_vals = 1, + .def_val = {0}, + .range = {0, MLX4_MAX_NUM_VF} + } +}; +module_param_string(probe_vf, probe_vf.dbdf2val.str, + sizeof(probe_vf.dbdf2val.str), 0444); +MODULE_PARM_DESC(probe_vf, + "Either single value (e.g. '3') to define uniform number of VFs to probe by the pf driver for all devices functions\n" + "\t\tor a string to map device function numbers to their probe_vf values (e.g. '0000:04:00.0-3,002b:1c:0b.a-13').\n" + "\t\tHexadecimal digits for the device function (e.g. 002b:1c:0b.a) and decimal for probe_vf value (e.g. 13)."); int mlx4_log_num_mgm_entry_size = MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE; @@ -115,10 +167,10 @@ module_param_named(fast_drop, fast_drop, int, 0444); MODULE_PARM_DESC(fast_drop, "Enable fast packet drop when no recieve WQEs are posted"); -int mlx4_enable_64b_cqe_eqe; +int mlx4_enable_64b_cqe_eqe = 1; module_param_named(enable_64b_cqe_eqe, mlx4_enable_64b_cqe_eqe, int, 0644); MODULE_PARM_DESC(enable_64b_cqe_eqe, - "Enable 64 byte CQEs/EQEs when the the FW supports this, if nonzero"); + "Enable 64 byte CQEs/EQEs when the the FW supports this if non-zero (default: 1)"); #define HCA_GLOBAL_CAP_MASK 0 @@ -144,13 +196,23 @@ module_param_named(log_mtts_per_seg, log_mtts_per_seg, int, 0444); MODULE_PARM_DESC(log_mtts_per_seg, "Log2 number of MTT entries per segment " "(0-7) (default: 0)"); -static int port_type_array[2] = {MLX4_PORT_TYPE_NONE, MLX4_PORT_TYPE_NONE}; -#if 0 -static int arr_argc = 2; -module_param_array(port_type_array, int, &arr_argc, 0444); -MODULE_PARM_DESC(port_type_array, "Array of port types: HW_DEFAULT (0) is default " - "1 for IB, 2 for Ethernet"); -#endif +static struct param_data port_type_array = { + .id = PORT_TYPE_ARRAY, + .dbdf2val = { + .name = "port_type_array param", + .num_vals = 2, + .def_val = {MLX4_PORT_TYPE_ETH, MLX4_PORT_TYPE_ETH}, + .range = {MLX4_PORT_TYPE_IB, MLX4_PORT_TYPE_NA} + } +}; +module_param_string(port_type_array, port_type_array.dbdf2val.str, + sizeof(port_type_array.dbdf2val.str), 0444); +MODULE_PARM_DESC(port_type_array, + "Either pair of values (e.g. '1,2') to define uniform port1/port2 types configuration for all devices functions\n" + "\t\tor a string to map device function numbers to their pair of port types values (e.g. '0000:04:00.0-1;2,002b:1c:0b.a-1;1').\n" + "\t\tValid port types: 1-ib, 2-eth, 3-auto, 4-N/A\n" + "\t\tIn case that only one port is available use the N/A port type for port2 (e.g '1,4')."); + struct mlx4_port_config { struct list_head list; @@ -170,7 +232,7 @@ static struct mlx4_profile mod_param_profile = { .num_cq = 16, .num_mcg = 13, .num_mpt = 19, - .num_mtt = 0, /* max(20, 2*MTTs for host memory)) */ + .num_mtt_segs = 0, /* max(20, 2*MTTs for host memory)) */ }; module_param_named(log_num_qp, mod_param_profile.num_qp, int, 0444); @@ -197,7 +259,7 @@ MODULE_PARM_DESC(log_num_mpt, "log maximum number of memory protection table entries per " "HCA (default: 19)"); -module_param_named(log_num_mtt, mod_param_profile.num_mtt, int, 0444); +module_param_named(log_num_mtt, mod_param_profile.num_mtt_segs, int, 0444); MODULE_PARM_DESC(log_num_mtt, "log maximum number of memory translation table segments per " "HCA (default: max(20, 2*MTTs for register all of the host memory limited to 30))"); @@ -206,9 +268,263 @@ enum { MLX4_IF_STATE_BASIC, MLX4_IF_STATE_EXTENDED }; -static void process_mod_param_profile(struct mlx4_profile *profile) + +static inline u64 dbdf_to_u64(int domain, int bus, int dev, int fn) { + return (domain << 20) | (bus << 12) | (dev << 4) | fn; +} + +static inline void pr_bdf_err(const char *dbdf, const char *pname) +{ + pr_warn("mlx4_core: '%s' is not valid bdf in '%s'\n", dbdf, pname); +} + +static inline void pr_val_err(const char *dbdf, const char *pname, + const char *val) +{ + pr_warn("mlx4_core: value '%s' of bdf '%s' in '%s' is not valid\n" + , val, dbdf, pname); +} + +static inline void pr_out_of_range_bdf(const char *dbdf, int val, + struct mlx4_dbdf2val_lst *dbdf2val) +{ + pr_warn("mlx4_core: value %d in bdf '%s' of '%s' is out of its valid range (%d,%d)\n" + , val, dbdf, dbdf2val->name , dbdf2val->range.min, + dbdf2val->range.max); +} + +static inline void pr_out_of_range(struct mlx4_dbdf2val_lst *dbdf2val) +{ + pr_warn("mlx4_core: value of '%s' is out of its valid range (%d,%d)\n" + , dbdf2val->name , dbdf2val->range.min, dbdf2val->range.max); +} + +static inline int is_in_range(int val, struct mlx4_range *r) +{ + return (val >= r->min && val <= r->max); +} + +static int update_defaults(struct param_data *pdata) +{ + long int val[MLX4_MAX_BDF_VALS]; + int ret; + char *t, *p = pdata->dbdf2val.str; + char sval[32]; + int val_len; + + if (!strlen(p) || strchr(p, ':') || strchr(p, '.') || strchr(p, ';')) + return INVALID_STR; + + switch (pdata->id) { + case PORT_TYPE_ARRAY: + t = strchr(p, ','); + if (!t || t == p || (t - p) > sizeof(sval)) + return INVALID_STR; + + val_len = t - p; + strncpy(sval, p, val_len); + sval[val_len] = 0; + + ret = kstrtol(sval, 0, &val[0]); + if (ret == -EINVAL) + return INVALID_STR; + if (ret || !is_in_range(val[0], &pdata->dbdf2val.range)) { + pr_out_of_range(&pdata->dbdf2val); + return INVALID_DATA; + } + + ret = kstrtol(t + 1, 0, &val[1]); + if (ret == -EINVAL) + return INVALID_STR; + if (ret || !is_in_range(val[1], &pdata->dbdf2val.range)) { + pr_out_of_range(&pdata->dbdf2val); + return INVALID_DATA; + } + + pdata->dbdf2val.tbl[0].val[0] = val[0]; + pdata->dbdf2val.tbl[0].val[1] = val[1]; + break; + + case NUM_VFS: + case PROBE_VF: + ret = kstrtol(p, 0, &val[0]); + if (ret == -EINVAL) + return INVALID_STR; + if (ret || !is_in_range(val[0], &pdata->dbdf2val.range)) { + pr_out_of_range(&pdata->dbdf2val); + return INVALID_DATA; + } + pdata->dbdf2val.tbl[0].val[0] = val[0]; + break; + } + pdata->dbdf2val.tbl[1].dbdf = MLX4_ENDOF_TBL; + + return VALID_DATA; +} + +int mlx4_fill_dbdf2val_tbl(struct mlx4_dbdf2val_lst *dbdf2val_lst) +{ + int domain, bus, dev, fn; + u64 dbdf; + char *p, *t, *v; + char tmp[32]; + char sbdf[32]; + char sep = ','; + int j, k, str_size, i = 1; + int prfx_size; + + p = dbdf2val_lst->str; + + for (j = 0; j < dbdf2val_lst->num_vals; j++) + dbdf2val_lst->tbl[0].val[j] = dbdf2val_lst->def_val[j]; + dbdf2val_lst->tbl[1].dbdf = MLX4_ENDOF_TBL; + + str_size = strlen(dbdf2val_lst->str); + + if (str_size == 0) + return 0; + + while (strlen(p)) { + prfx_size = BDF_STR_SIZE; + sbdf[prfx_size] = 0; + strncpy(sbdf, p, prfx_size); + domain = DEFAULT_DOMAIN; + if (sscanf(sbdf, "%02x:%02x.%x-", &bus, &dev, &fn) != 3) { + prfx_size = DBDF_STR_SIZE; + sbdf[prfx_size] = 0; + strncpy(sbdf, p, prfx_size); + if (sscanf(sbdf, "%04x:%02x:%02x.%x-", &domain, &bus, + &dev, &fn) != 4) { + pr_bdf_err(sbdf, dbdf2val_lst->name); + goto err; + } + sprintf(tmp, "%04x:%02x:%02x.%x-", domain, bus, dev, + fn); + } else { + sprintf(tmp, "%02x:%02x.%x-", bus, dev, fn); + } + + if (strnicmp(sbdf, tmp, sizeof(tmp))) { + pr_bdf_err(sbdf, dbdf2val_lst->name); + goto err; + } + + dbdf = dbdf_to_u64(domain, bus, dev, fn); + + for (j = 1; j < i; j++) + if (dbdf2val_lst->tbl[j].dbdf == dbdf) { + pr_warn("mlx4_core: in '%s', %s appears multiple times\n" + , dbdf2val_lst->name, sbdf); + goto err; + } + + if (i >= MLX4_DEVS_TBL_SIZE) { + pr_warn("mlx4_core: Too many devices in '%s'\n" + , dbdf2val_lst->name); + goto err; + } + + p += prfx_size; + t = strchr(p, sep); + t = t ? t : p + strlen(p); + if (p >= t) { + pr_val_err(sbdf, dbdf2val_lst->name, ""); + goto err; + } + + for (k = 0; k < dbdf2val_lst->num_vals; k++) { + char sval[32]; + long int val; + int ret, val_len; + char vsep = ';'; + + v = (k == dbdf2val_lst->num_vals - 1) ? t : strchr(p, vsep); + if (!v || v > t || v == p || (v - p) > sizeof(sval)) { + pr_val_err(sbdf, dbdf2val_lst->name, p); + goto err; + } + val_len = v - p; + strncpy(sval, p, val_len); + sval[val_len] = 0; + + ret = kstrtol(sval, 0, &val); + if (ret) { + if (strchr(p, vsep)) + pr_warn("mlx4_core: too many vals in bdf '%s' of '%s'\n" + , sbdf, dbdf2val_lst->name); + else + pr_val_err(sbdf, dbdf2val_lst->name, + sval); + goto err; + } + if (!is_in_range(val, &dbdf2val_lst->range)) { + pr_out_of_range_bdf(sbdf, val, dbdf2val_lst); + goto err; + } + + dbdf2val_lst->tbl[i].val[k] = val; + p = v; + if (p[0] == vsep) + p++; + } + + dbdf2val_lst->tbl[i].dbdf = dbdf; + if (strlen(p)) { + if (p[0] != sep) { + pr_warn("mlx4_core: expect separator '%c' before '%s' in '%s'\n" + , sep, p, dbdf2val_lst->name); + goto err; + } + p++; + } + i++; + if (i < MLX4_DEVS_TBL_SIZE) + dbdf2val_lst->tbl[i].dbdf = MLX4_ENDOF_TBL; + } + + return 0; + +err: + dbdf2val_lst->tbl[1].dbdf = MLX4_ENDOF_TBL; + pr_warn("mlx4_core: The value of '%s' is incorrect. The value is discarded!\n" + , dbdf2val_lst->name); + + return -EINVAL; +} +EXPORT_SYMBOL(mlx4_fill_dbdf2val_tbl); + +int mlx4_get_val(struct mlx4_dbdf2val *tbl, struct pci_dev *pdev, int idx, + int *val) +{ + u64 dbdf; + int i = 1; + + *val = tbl[0].val[idx]; + if (!pdev) + return -EINVAL; + + if (!pdev->bus) { + return -EINVAL; + } + + dbdf = dbdf_to_u64(pci_get_domain(pdev->dev.bsddev), pci_get_bus(pdev->dev.bsddev), + PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); + + while ((i < MLX4_DEVS_TBL_SIZE) && (tbl[i].dbdf != MLX4_ENDOF_TBL)) { + if (tbl[i].dbdf == dbdf) { + *val = tbl[i].val[idx]; + return 0; + } + i++; + } + + return 0; +} +EXPORT_SYMBOL(mlx4_get_val); +static void process_mod_param_profile(struct mlx4_profile *profile) +{ vm_size_t hwphyssz; hwphyssz = 0; TUNABLE_ULONG_FETCH("hw.realmem", (u_long *) &hwphyssz); @@ -232,10 +548,10 @@ static void process_mod_param_profile(struct mlx4_profile *profile) * That limits us to 4TB of memory registration per HCA with * 4KB pages, which is probably OK for the next few months. */ - if (mod_param_profile.num_mtt) - profile->num_mtt = 1 << mod_param_profile.num_mtt; + if (mod_param_profile.num_mtt_segs) + profile->num_mtt_segs = 1 << mod_param_profile.num_mtt_segs; else { - profile->num_mtt = + profile->num_mtt_segs = roundup_pow_of_two(max_t(unsigned, 1 << (MLX4_LOG_NUM_MTT - log_mtts_per_seg), min(1UL << @@ -245,7 +561,7 @@ static void process_mod_param_profile(struct mlx4_profile *profile) >> log_mtts_per_seg))); /* set the actual value, so it will be reflected to the user using the sysfs */ - mod_param_profile.num_mtt = ilog2(profile->num_mtt * (1 << log_mtts_per_seg)); + mod_param_profile.num_mtt_segs = ilog2(profile->num_mtt_segs); } } @@ -406,14 +722,26 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) MLX4_PORT_TYPE_IB) dev->caps.port_type[i] = MLX4_PORT_TYPE_IB; else { - /* if IB and ETH are supported, we set the port + /* + * if IB and ETH are supported, we set the port * type according to user selection of port type; - * if user selected none, take the FW hint */ - if (port_type_array[i - 1] == MLX4_PORT_TYPE_NONE) + * if there is no user selection, take the FW hint + */ + int pta; + mlx4_get_val(port_type_array.dbdf2val.tbl, + pci_physfn(dev->pdev), i - 1, + &pta); + if (pta == MLX4_PORT_TYPE_NONE) { dev->caps.port_type[i] = dev->caps.suggested_type[i] ? MLX4_PORT_TYPE_ETH : MLX4_PORT_TYPE_IB; - else - dev->caps.port_type[i] = port_type_array[i - 1]; + } else if (pta == MLX4_PORT_TYPE_NA) { + mlx4_err(dev, "Port %d is valid port. " + "It is not allowed to configure its type to N/A(%d)\n", + i, MLX4_PORT_TYPE_NA); + return -EINVAL; + } else { + dev->caps.port_type[i] = pta; + } } } /* @@ -427,6 +755,9 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) (dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP) && (dev->caps.flags & MLX4_DEV_CAP_FLAG_SENSE_SUPPORT)); + /* Disablling auto sense for default Eth ports support */ + mlx4_priv(dev)->sense.sense_allowed[i] = 0; + /* * If "default_sense" bit is set, we move the port to "AUTO" mode * and perform sense_port FW command to try and set the correct @@ -478,9 +809,12 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH]; dev->caps.sync_qp = dev_cap->sync_qp; + if (dev->pdev->device == 0x1003) + dev->caps.cq_flags |= MLX4_DEV_CAP_CQ_FLAG_IO; + dev->caps.sqp_demux = (mlx4_is_master(dev)) ? MLX4_MAX_NUM_SLAVES : 0; - if (!mlx4_enable_64b_cqe_eqe) { + if (!mlx4_enable_64b_cqe_eqe && !mlx4_is_slave(dev)) { if (dev_cap->flags & (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) { mlx4_warn(dev, "64B EQEs/CQEs supported by the device but not enabled\n"); @@ -494,6 +828,11 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) mlx4_is_master(dev)) dev->caps.function_caps |= MLX4_FUNC_CAP_64B_EQE_CQE; + if (!mlx4_is_slave(dev)) { + for (i = 0; i < dev->caps.num_ports; ++i) + dev->caps.def_counter_index[i] = i << 1; + } + return 0; } /*The function checks if there are live vf, return the num of them*/ @@ -634,6 +973,11 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) if (err) mlx4_err(dev, "QUERY_FW command failed: could not get FW version.\n"); + if (!hca_param.mw_enable) { + dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_MEM_WINDOW; + dev->caps.bmme_flags &= ~MLX4_BMME_FLAG_TYPE_2_WIN; + } + page_size = ~dev->caps.page_size_cap + 1; mlx4_warn(dev, "HCA minimum page size:%d\n", page_size); if (page_size > PAGE_SIZE) { @@ -711,6 +1055,8 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) dev->caps.qp0_proxy[i - 1] = func_cap.qp0_proxy_qpn; dev->caps.qp1_tunnel[i - 1] = func_cap.qp1_tunnel_qpn; dev->caps.qp1_proxy[i - 1] = func_cap.qp1_proxy_qpn; + dev->caps.def_counter_index[i - 1] = func_cap.def_counter_index; + dev->caps.port_mask[i] = dev->caps.port_type[i]; err = mlx4_get_slave_pkey_gid_tbl_len(dev, i, &dev->caps.gid_table_len[i], @@ -745,6 +1091,9 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) dev->caps.cqe_size = 32; } + dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS; + mlx4_warn(dev, "Timestamping is not supported in slave mode.\n"); + slave_adjust_steering_mode(dev, &dev_cap, &hca_param); return 0; @@ -760,6 +1109,27 @@ err_mem: return err; } +static void mlx4_request_modules(struct mlx4_dev *dev) +{ + int port; + int has_ib_port = false; + int has_eth_port = false; +#define EN_DRV_NAME "mlx4_en" +#define IB_DRV_NAME "mlx4_ib" + + for (port = 1; port <= dev->caps.num_ports; port++) { + if (dev->caps.port_type[port] == MLX4_PORT_TYPE_IB) + has_ib_port = true; + else if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH) + has_eth_port = true; + } + + if (has_ib_port) + request_module_nowait(IB_DRV_NAME); + if (has_eth_port) + request_module_nowait(EN_DRV_NAME); +} + /* * Change the port configuration of the device. * Every user of this function must hold the port mutex. @@ -791,6 +1161,11 @@ int mlx4_change_port_types(struct mlx4_dev *dev, } mlx4_set_port_mask(dev); err = mlx4_register_device(dev); + if (err) { + mlx4_err(dev, "Failed to register device\n"); + goto out; + } + mlx4_request_modules(dev); } out: @@ -841,7 +1216,14 @@ static ssize_t set_port_type(struct device *dev, return -EINVAL; } - mlx4_stop_sense(mdev); + if ((info->tmp_type & mdev->caps.supported_type[info->port]) != + info->tmp_type) { + mlx4_err(mdev, "Requested port type for port %d is not supported on this HCA\n", + info->port); + return -EINVAL; + } + + mlx4_stop_sense(mdev); mutex_lock(&priv->port_mutex); /* Possible type is always the one that was delivered */ mdev->caps.possible_type[info->port] = info->tmp_type; @@ -984,7 +1366,7 @@ err_set_port: static int mlx4_load_fw(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); - int err; + int err, unmap_flag = 0; priv->fw.fw_icm = mlx4_alloc_icm(dev, priv->fw.fw_pages, GFP_HIGHUSER | __GFP_NOWARN, 0); @@ -1008,10 +1390,13 @@ static int mlx4_load_fw(struct mlx4_dev *dev) return 0; err_unmap_fa: - mlx4_UNMAP_FA(dev); + unmap_flag = mlx4_UNMAP_FA(dev); + if (unmap_flag) + pr_warn("mlx4_core: mlx4_UNMAP_FA failed.\n"); err_free: - mlx4_free_icm(dev, priv->fw.fw_icm, 0); + if (!unmap_flag) + mlx4_free_icm(dev, priv->fw.fw_icm, 0); return err; } @@ -1081,7 +1466,7 @@ static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap, struct mlx4_priv *priv = mlx4_priv(dev); u64 aux_pages; int num_eqs; - int err; + int err, unmap_flag = 0; err = mlx4_SET_ICM_SIZE(dev, icm_size, &aux_pages); if (err) { @@ -1272,10 +1657,13 @@ err_unmap_cmpt: mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table); err_unmap_aux: - mlx4_UNMAP_ICM_AUX(dev); + unmap_flag = mlx4_UNMAP_ICM_AUX(dev); + if (unmap_flag) + pr_warn("mlx4_core: mlx4_UNMAP_ICM_AUX failed.\n"); err_free_aux: - mlx4_free_icm(dev, priv->fw.aux_icm, 0); + if (!unmap_flag) + mlx4_free_icm(dev, priv->fw.aux_icm, 0); return err; } @@ -1299,8 +1687,10 @@ static void mlx4_free_icms(struct mlx4_dev *dev) mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table); mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table); - mlx4_UNMAP_ICM_AUX(dev); - mlx4_free_icm(dev, priv->fw.aux_icm, 0); + if (!mlx4_UNMAP_ICM_AUX(dev)) + mlx4_free_icm(dev, priv->fw.aux_icm, 0); + else + pr_warn("mlx4_core: mlx4_UNMAP_ICM_AUX failed.\n"); } static void mlx4_slave_exit(struct mlx4_dev *dev) @@ -1340,13 +1730,16 @@ static void unmap_bf_area(struct mlx4_dev *dev) io_mapping_free(mlx4_priv(dev)->bf_mapping); } -cycle_t mlx4_read_clock(struct mlx4_dev *dev) +int mlx4_read_clock(struct mlx4_dev *dev) { u32 clockhi, clocklo, clockhi1; cycle_t cycles; int i; struct mlx4_priv *priv = mlx4_priv(dev); + if (!priv->clock_mapping) + return -ENOTSUPP; + for (i = 0; i < 10; i++) { clockhi = swab32(readl(priv->clock_mapping)); clocklo = swab32(readl(priv->clock_mapping + 4)); @@ -1376,6 +1769,25 @@ static int map_internal_clock(struct mlx4_dev *dev) return 0; } + +int mlx4_get_internal_clock_params(struct mlx4_dev *dev, + struct mlx4_clock_params *params) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + + if (mlx4_is_slave(dev)) + return -ENOTSUPP; + if (!params) + return -EINVAL; + + params->bar = priv->fw.clock_bar; + params->offset = priv->fw.clock_offset; + params->size = MLX4_CLOCK_SIZE; + + return 0; +} +EXPORT_SYMBOL_GPL(mlx4_get_internal_clock_params); + static void unmap_internal_clock(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); @@ -1388,13 +1800,16 @@ static void mlx4_close_hca(struct mlx4_dev *dev) { unmap_internal_clock(dev); unmap_bf_area(dev); - if (mlx4_is_slave(dev)) + if (mlx4_is_slave(dev)) { mlx4_slave_exit(dev); - else { + } else { mlx4_CLOSE_HCA(dev, 0); mlx4_free_icms(dev); - mlx4_UNMAP_FA(dev); - mlx4_free_icm(dev, mlx4_priv(dev)->fw.fw_icm, 0); + + if (!mlx4_UNMAP_FA(dev)) + mlx4_free_icm(dev, mlx4_priv(dev)->fw.fw_icm, 0); + else + pr_warn("mlx4_core: mlx4_UNMAP_FA failed.\n"); } } @@ -1497,13 +1912,9 @@ static int choose_log_fs_mgm_entry_size(int qp_per_entry) static void choose_steering_mode(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) { - // This is only valid to the integrated driver. - // The new ported mlx4_core driver is in B0 steering mode by default - // and the old mlx4_en driver is in A0 steering mode by default. - // If high_rate_steer == TRUE it means that A0 steering mode is on. - // The integration fix is to hard code high_rate_steer to TRUE. - high_rate_steer = 1; + int nvfs; + mlx4_get_val(num_vfs.dbdf2val.tbl, pci_physfn(dev->pdev), 0, &nvfs); if (high_rate_steer && !mlx4_is_mfunc(dev)) { dev->caps.flags &= ~(MLX4_DEV_CAP_FLAG_VEP_MC_STEER | MLX4_DEV_CAP_FLAG_VEP_UC_STEER); @@ -1512,9 +1923,8 @@ static void choose_steering_mode(struct mlx4_dev *dev, if (mlx4_log_num_mgm_entry_size == -1 && dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_FS_EN && - dev_cap->fs_log_max_ucast_qp_range_size == 0 && (!mlx4_is_mfunc(dev) || - (dev_cap->fs_max_num_qp_per_entry >= (num_vfs + 1))) && + (dev_cap->fs_max_num_qp_per_entry >= (nvfs + 1))) && choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry) >= MLX4_MIN_MGM_LOG_ENTRY_SIZE) { dev->oper_log_mgm_entry_size = @@ -1523,9 +1933,8 @@ static void choose_steering_mode(struct mlx4_dev *dev, dev->caps.num_qp_per_mgm = dev_cap->fs_max_num_qp_per_entry; } else { if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER && - dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER) { + dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER) dev->caps.steering_mode = MLX4_STEERING_MODE_B0; - } else { dev->caps.steering_mode = MLX4_STEERING_MODE_A0; @@ -1618,11 +2027,42 @@ static int mlx4_init_hca(struct mlx4_dev *dev) if (err) goto err_stop_fw; + init_hca.mw_enable = 1; + err = mlx4_INIT_HCA(dev, &init_hca); if (err) { mlx4_err(dev, "INIT_HCA command failed, aborting.\n"); goto err_free_icm; } + + /* + * Read HCA frequency by QUERY_HCA command + */ + if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS) { + memset(&init_hca, 0, sizeof(init_hca)); + err = mlx4_QUERY_HCA(dev, &init_hca); + if (err) { + mlx4_err(dev, "QUERY_HCA command failed, disable timestamp.\n"); + dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS; + } else { + dev->caps.hca_core_clock = + init_hca.hca_core_clock; + } + + /* In case we got HCA frequency 0 - disable timestamping + * to avoid dividing by zero + */ + if (!dev->caps.hca_core_clock) { + dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS; + mlx4_err(dev, "HCA frequency is 0. Timestamping is not supported."); + } else if (map_internal_clock(dev)) { + /* Map internal clock, + * in case of failure disable timestamping + */ + dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS; + mlx4_err(dev, "Failed to map internal clock. Timestamping is not supported.\n"); + } + } } else { err = mlx4_init_slave(dev); if (err) { @@ -1640,39 +2080,7 @@ static int mlx4_init_hca(struct mlx4_dev *dev) if (map_bf_area(dev)) mlx4_dbg(dev, "Failed to map blue flame area\n"); - /* - * Read HCA frequency by QUERY_HCA command - */ - if (dev->caps.cq_timestamp) { - memset(&init_hca, 0, sizeof(init_hca)); - err = mlx4_QUERY_HCA(dev, &init_hca); - if (err) { - mlx4_err(dev, "QUERY_HCA command failed, disable timestamp.\n"); - dev->caps.cq_timestamp = 0; - } else - dev->caps.hca_core_clock = init_hca.hca_core_clock; - - /* - * In case we got HCA frequency 0 - disable timestamping - * to avoid dividing by zero - */ - if (!dev->caps.hca_core_clock) { - dev->caps.cq_timestamp = 0; - mlx4_err(dev, "HCA frequency is 0. " - "Timestamping is not supported."); - } - - /* - * Map internal clock, in case of failure disable timestamping - */ - if (map_internal_clock(dev)) { - dev->caps.cq_timestamp = 0; - mlx4_err(dev, "Failed to map internal clock. " - "Timestamping is not supported.\n"); - } - } - - /*Only the master set the ports, all the rest got it from it.*/ + /* Only the master set the ports, all the rest got it from it.*/ if (!mlx4_is_slave(dev)) mlx4_set_port_mask(dev); @@ -1684,6 +2092,8 @@ static int mlx4_init_hca(struct mlx4_dev *dev) priv->eq_table.inta_pin = adapter.inta_pin; memcpy(dev->board_id, adapter.board_id, sizeof dev->board_id); + memcpy(dev->vsd, adapter.vsd, sizeof(dev->vsd)); + dev->vsd_vendor_id = adapter.vsd_vendor_id; if (!mlx4_is_slave(dev)) kfree(dev_cap); @@ -1691,7 +2101,8 @@ static int mlx4_init_hca(struct mlx4_dev *dev) return 0; unmap_bf: - unmap_internal_clock(dev); + if (!mlx4_is_slave(dev)) + unmap_internal_clock(dev); unmap_bf_area(dev); if (mlx4_is_slave(dev)) { @@ -1713,10 +2124,11 @@ err_free_icm: err_stop_fw: if (!mlx4_is_slave(dev)) { - mlx4_UNMAP_FA(dev); - mlx4_free_icm(dev, priv->fw.fw_icm, 0); - if (dev_cap) - kfree(dev_cap); + if (!mlx4_UNMAP_FA(dev)) + mlx4_free_icm(dev, priv->fw.fw_icm, 0); + else + pr_warn("mlx4_core: mlx4_UNMAP_FA failed.\n"); + kfree(dev_cap); } return err; } @@ -1724,92 +2136,539 @@ err_stop_fw: static int mlx4_init_counters_table(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); - int res; - int nent_pow2; + int nent_pow2, port_indx, vf_index, num_counters; + int res, index = 0; + struct counter_index *new_counter_index; + if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS)) return -ENOENT; - nent_pow2 = roundup_pow_of_two(dev->caps.max_counters); - res = mlx4_bitmap_init(&priv->counters_bitmap, nent_pow2, - nent_pow2 - 1, 0, - nent_pow2 - dev->caps.max_counters); - if (res) - return res; + if (!mlx4_is_slave(dev) && + dev->caps.max_counters == dev->caps.max_extended_counters) { + res = mlx4_cmd(dev, MLX4_IF_STATE_EXTENDED, 0, 0, + MLX4_CMD_SET_IF_STAT, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); + if (res) { + mlx4_err(dev, "Failed to set extended counters (err=%d)\n", res); + return res; + } + } + + mutex_init(&priv->counters_table.mutex); - if (dev->caps.max_counters == dev->caps.max_basic_counters) + if (mlx4_is_slave(dev)) { + for (port_indx = 0; port_indx < dev->caps.num_ports; port_indx++) { + INIT_LIST_HEAD(&priv->counters_table.global_port_list[port_indx]); + if (dev->caps.def_counter_index[port_indx] != 0xFF) { + new_counter_index = kmalloc(sizeof(struct counter_index), GFP_KERNEL); + if (!new_counter_index) + return -ENOMEM; + new_counter_index->index = dev->caps.def_counter_index[port_indx]; + list_add_tail(&new_counter_index->list, &priv->counters_table.global_port_list[port_indx]); + } + } + mlx4_dbg(dev, "%s: slave allocated %d counters for %d ports\n", + __func__, dev->caps.num_ports, dev->caps.num_ports); return 0; + } - res = mlx4_cmd(dev, MLX4_IF_STATE_EXTENDED, 0, 0, - MLX4_CMD_SET_IF_STAT, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); + nent_pow2 = roundup_pow_of_two(dev->caps.max_counters); + + for (port_indx = 0; port_indx < dev->caps.num_ports; port_indx++) { + INIT_LIST_HEAD(&priv->counters_table.global_port_list[port_indx]); + /* allocating 2 counters per port for PFs */ + /* For the PF, the ETH default counters are 0,2; */ + /* and the RoCE default counters are 1,3 */ + for (num_counters = 0; num_counters < 2; num_counters++, index++) { + new_counter_index = kmalloc(sizeof(struct counter_index), GFP_KERNEL); + if (!new_counter_index) + return -ENOMEM; + new_counter_index->index = index; + list_add_tail(&new_counter_index->list, + &priv->counters_table.global_port_list[port_indx]); + } + } + + if (mlx4_is_master(dev)) { + for (vf_index = 0; vf_index < dev->num_vfs; vf_index++) { + for (port_indx = 0; port_indx < dev->caps.num_ports; port_indx++) { + INIT_LIST_HEAD(&priv->counters_table.vf_list[vf_index][port_indx]); + new_counter_index = kmalloc(sizeof(struct counter_index), GFP_KERNEL); + if (!new_counter_index) + return -ENOMEM; + if (index < nent_pow2 - 2) { + new_counter_index->index = index; + index++; + } else { + new_counter_index->index = MLX4_SINK_COUNTER_INDEX; + } + + list_add_tail(&new_counter_index->list, + &priv->counters_table.vf_list[vf_index][port_indx]); + } + } + + res = mlx4_bitmap_init(&priv->counters_table.bitmap, + nent_pow2, nent_pow2 - 1, + index, 1); + mlx4_dbg(dev, "%s: master allocated %d counters for %d VFs\n", + __func__, index, dev->num_vfs); + } else { + res = mlx4_bitmap_init(&priv->counters_table.bitmap, + nent_pow2, nent_pow2 - 1, + index, 1); + mlx4_dbg(dev, "%s: native allocated %d counters for %d ports\n", + __func__, index, dev->caps.num_ports); + } - if (res) - mlx4_err(dev, "Failed to set extended counters (err=%d)\n", - res); - return res; + return 0; } static void mlx4_cleanup_counters_table(struct mlx4_dev *dev) { - if (!mlx4_is_slave(dev) && - (dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS)) - mlx4_bitmap_cleanup(&mlx4_priv(dev)->counters_bitmap); + struct mlx4_priv *priv = mlx4_priv(dev); + int i, j; + struct counter_index *port, *tmp_port; + struct counter_index *vf, *tmp_vf; + + mutex_lock(&priv->counters_table.mutex); + + if (dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS) { + for (i = 0; i < dev->caps.num_ports; i++) { + list_for_each_entry_safe(port, tmp_port, + &priv->counters_table.global_port_list[i], + list) { + list_del(&port->list); + kfree(port); + } + } + if (!mlx4_is_slave(dev)) { + for (i = 0; i < dev->num_vfs; i++) { + for (j = 0; j < dev->caps.num_ports; j++) { + list_for_each_entry_safe(vf, tmp_vf, + &priv->counters_table.vf_list[i][j], + list) { + /* clear the counter statistic */ + if (__mlx4_clear_if_stat(dev, vf->index)) + mlx4_dbg(dev, "%s: reset counter %d failed\n", + __func__, vf->index); + list_del(&vf->list); + kfree(vf); + } + } + } + mlx4_bitmap_cleanup(&priv->counters_table.bitmap); + } + } + mutex_unlock(&priv->counters_table.mutex); } -int __mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx) +int __mlx4_slave_counters_free(struct mlx4_dev *dev, int slave) { struct mlx4_priv *priv = mlx4_priv(dev); + int i, first; + struct counter_index *vf, *tmp_vf; + + /* clean VF's counters for the next useg */ + if (slave > 0 && slave <= dev->num_vfs) { + mlx4_dbg(dev, "%s: free counters of slave(%d)\n" + , __func__, slave); + + mutex_lock(&priv->counters_table.mutex); + for (i = 0; i < dev->caps.num_ports; i++) { + first = 0; + list_for_each_entry_safe(vf, tmp_vf, + &priv->counters_table.vf_list[slave - 1][i], + list) { + /* clear the counter statistic */ + if (__mlx4_clear_if_stat(dev, vf->index)) + mlx4_dbg(dev, "%s: reset counter %d failed\n", + __func__, vf->index); + if (first++ && vf->index != MLX4_SINK_COUNTER_INDEX) { + mlx4_dbg(dev, "%s: delete counter index %d for slave %d and port %d\n" + , __func__, vf->index, slave, i + 1); + mlx4_bitmap_free(&priv->counters_table.bitmap, vf->index, MLX4_USE_RR); + list_del(&vf->list); + kfree(vf); + } else { + mlx4_dbg(dev, "%s: can't delete default counter index %d for slave %d and port %d\n" + , __func__, vf->index, slave, i + 1); + } + } + } + mutex_unlock(&priv->counters_table.mutex); + } + + return 0; +} + +int __mlx4_counter_alloc(struct mlx4_dev *dev, int slave, int port, u32 *idx) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct counter_index *new_counter_index; if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS)) return -ENOENT; - *idx = mlx4_bitmap_alloc(&priv->counters_bitmap); - if (*idx == -1) - return -ENOMEM; + if ((slave > MLX4_MAX_NUM_VF) || (slave < 0) || + (port < 0) || (port > MLX4_MAX_PORTS)) { + mlx4_dbg(dev, "%s: invalid slave(%d) or port(%d) index\n", + __func__, slave, port); + return -EINVAL; + } + /* handle old guest request does not support request by port index */ + if (port == 0) { + *idx = MLX4_SINK_COUNTER_INDEX; + mlx4_dbg(dev, "%s: allocated default counter index %d for slave %d port %d\n" + , __func__, *idx, slave, port); + return 0; + } + + mutex_lock(&priv->counters_table.mutex); + + *idx = mlx4_bitmap_alloc(&priv->counters_table.bitmap); + /* if no resources return the default counter of the slave and port */ + if (*idx == -1) { + if (slave == 0) { /* its the ethernet counter ?????? */ + new_counter_index = list_entry(priv->counters_table.global_port_list[port - 1].next, + struct counter_index, + list); + } else { + new_counter_index = list_entry(priv->counters_table.vf_list[slave - 1][port - 1].next, + struct counter_index, + list); + } + + *idx = new_counter_index->index; + mlx4_dbg(dev, "%s: allocated defualt counter index %d for slave %d port %d\n" + , __func__, *idx, slave, port); + goto out; + } + + if (slave == 0) { /* native or master */ + new_counter_index = kmalloc(sizeof(struct counter_index), GFP_KERNEL); + if (!new_counter_index) + goto no_mem; + new_counter_index->index = *idx; + list_add_tail(&new_counter_index->list, &priv->counters_table.global_port_list[port - 1]); + } else { + new_counter_index = kmalloc(sizeof(struct counter_index), GFP_KERNEL); + if (!new_counter_index) + goto no_mem; + new_counter_index->index = *idx; + list_add_tail(&new_counter_index->list, &priv->counters_table.vf_list[slave - 1][port - 1]); + } + + mlx4_dbg(dev, "%s: allocated counter index %d for slave %d port %d\n" + , __func__, *idx, slave, port); +out: + mutex_unlock(&priv->counters_table.mutex); return 0; + +no_mem: + mlx4_bitmap_free(&priv->counters_table.bitmap, *idx, MLX4_USE_RR); + mutex_unlock(&priv->counters_table.mutex); + *idx = MLX4_SINK_COUNTER_INDEX; + mlx4_dbg(dev, "%s: failed err (%d)\n" + , __func__, -ENOMEM); + return -ENOMEM; } -int mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx) +int mlx4_counter_alloc(struct mlx4_dev *dev, u8 port, u32 *idx) { u64 out_param; int err; + struct mlx4_priv *priv = mlx4_priv(dev); + struct counter_index *new_counter_index, *c_index; if (mlx4_is_mfunc(dev)) { - err = mlx4_cmd_imm(dev, 0, &out_param, RES_COUNTER, + err = mlx4_cmd_imm(dev, 0, &out_param, + ((u32) port) << 8 | (u32) RES_COUNTER, RES_OP_RESERVE, MLX4_CMD_ALLOC_RES, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); - if (!err) + if (!err) { *idx = get_param_l(&out_param); - + if (*idx == MLX4_SINK_COUNTER_INDEX) + return -ENOSPC; + + mutex_lock(&priv->counters_table.mutex); + c_index = list_entry(priv->counters_table.global_port_list[port - 1].next, + struct counter_index, + list); + mutex_unlock(&priv->counters_table.mutex); + if (c_index->index == *idx) + return -EEXIST; + + if (mlx4_is_slave(dev)) { + new_counter_index = kmalloc(sizeof(struct counter_index), GFP_KERNEL); + if (!new_counter_index) { + mlx4_counter_free(dev, port, *idx); + return -ENOMEM; + } + new_counter_index->index = *idx; + mutex_lock(&priv->counters_table.mutex); + list_add_tail(&new_counter_index->list, &priv->counters_table.global_port_list[port - 1]); + mutex_unlock(&priv->counters_table.mutex); + mlx4_dbg(dev, "%s: allocated counter index %d for port %d\n" + , __func__, *idx, port); + } + } return err; } - return __mlx4_counter_alloc(dev, idx); + return __mlx4_counter_alloc(dev, 0, port, idx); } EXPORT_SYMBOL_GPL(mlx4_counter_alloc); -void __mlx4_counter_free(struct mlx4_dev *dev, u32 idx) +void __mlx4_counter_free(struct mlx4_dev *dev, int slave, int port, u32 idx) { - mlx4_bitmap_free(&mlx4_priv(dev)->counters_bitmap, idx); - return; + /* check if native or slave and deletes acordingly */ + struct mlx4_priv *priv = mlx4_priv(dev); + struct counter_index *pf, *tmp_pf; + struct counter_index *vf, *tmp_vf; + int first; + + + if (idx == MLX4_SINK_COUNTER_INDEX) { + mlx4_dbg(dev, "%s: try to delete default counter index %d for port %d\n" + , __func__, idx, port); + return; + } + + if ((slave > MLX4_MAX_NUM_VF) || (slave < 0) || + (port < 0) || (port > MLX4_MAX_PORTS)) { + mlx4_warn(dev, "%s: deletion failed due to invalid slave(%d) or port(%d) index\n" + , __func__, slave, idx); + return; + } + + mutex_lock(&priv->counters_table.mutex); + if (slave == 0) { + first = 0; + list_for_each_entry_safe(pf, tmp_pf, + &priv->counters_table.global_port_list[port - 1], + list) { + /* the first 2 counters are reserved */ + if (pf->index == idx) { + /* clear the counter statistic */ + if (__mlx4_clear_if_stat(dev, pf->index)) + mlx4_dbg(dev, "%s: reset counter %d failed\n", + __func__, pf->index); + if (1 < first && idx != MLX4_SINK_COUNTER_INDEX) { + list_del(&pf->list); + kfree(pf); + mlx4_dbg(dev, "%s: delete counter index %d for native device (%d) port %d\n" + , __func__, idx, slave, port); + mlx4_bitmap_free(&priv->counters_table.bitmap, idx, MLX4_USE_RR); + goto out; + } else { + mlx4_dbg(dev, "%s: can't delete default counter index %d for native device (%d) port %d\n" + , __func__, idx, slave, port); + goto out; + } + } + first++; + } + mlx4_dbg(dev, "%s: can't delete counter index %d for native device (%d) port %d\n" + , __func__, idx, slave, port); + } else { + first = 0; + list_for_each_entry_safe(vf, tmp_vf, + &priv->counters_table.vf_list[slave - 1][port - 1], + list) { + /* the first element is reserved */ + if (vf->index == idx) { + /* clear the counter statistic */ + if (__mlx4_clear_if_stat(dev, vf->index)) + mlx4_dbg(dev, "%s: reset counter %d failed\n", + __func__, vf->index); + if (first) { + list_del(&vf->list); + kfree(vf); + mlx4_dbg(dev, "%s: delete counter index %d for slave %d port %d\n", + __func__, idx, slave, port); + mlx4_bitmap_free(&priv->counters_table.bitmap, idx, MLX4_USE_RR); + goto out; + } else { + mlx4_dbg(dev, "%s: can't delete default slave (%d) counter index %d for port %d\n" + , __func__, slave, idx, port); + goto out; + } + } + first++; + } + mlx4_dbg(dev, "%s: can't delete slave (%d) counter index %d for port %d\n" + , __func__, slave, idx, port); + } + +out: + mutex_unlock(&priv->counters_table.mutex); } -void mlx4_counter_free(struct mlx4_dev *dev, u32 idx) +void mlx4_counter_free(struct mlx4_dev *dev, u8 port, u32 idx) { u64 in_param = 0; + struct mlx4_priv *priv = mlx4_priv(dev); + struct counter_index *counter, *tmp_counter; + int first = 0; if (mlx4_is_mfunc(dev)) { set_param_l(&in_param, idx); - mlx4_cmd(dev, in_param, RES_COUNTER, RES_OP_RESERVE, + mlx4_cmd(dev, in_param, + ((u32) port) << 8 | (u32) RES_COUNTER, + RES_OP_RESERVE, MLX4_CMD_FREE_RES, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); + + if (mlx4_is_slave(dev) && idx != MLX4_SINK_COUNTER_INDEX) { + mutex_lock(&priv->counters_table.mutex); + list_for_each_entry_safe(counter, tmp_counter, + &priv->counters_table.global_port_list[port - 1], + list) { + if (counter->index == idx && first++) { + list_del(&counter->list); + kfree(counter); + mlx4_dbg(dev, "%s: delete counter index %d for port %d\n" + , __func__, idx, port); + mutex_unlock(&priv->counters_table.mutex); + return; + } + } + mutex_unlock(&priv->counters_table.mutex); + } + return; } - __mlx4_counter_free(dev, idx); + __mlx4_counter_free(dev, 0, port, idx); } EXPORT_SYMBOL_GPL(mlx4_counter_free); +int __mlx4_clear_if_stat(struct mlx4_dev *dev, + u8 counter_index) +{ + struct mlx4_cmd_mailbox *if_stat_mailbox = NULL; + int err = 0; + u32 if_stat_in_mod = (counter_index & 0xff) | (1 << 31); + + if (counter_index == MLX4_SINK_COUNTER_INDEX) + return -EINVAL; + + if (mlx4_is_slave(dev)) + return 0; + + if_stat_mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(if_stat_mailbox)) { + err = PTR_ERR(if_stat_mailbox); + return err; + } + + err = mlx4_cmd_box(dev, 0, if_stat_mailbox->dma, if_stat_in_mod, 0, + MLX4_CMD_QUERY_IF_STAT, MLX4_CMD_TIME_CLASS_C, + MLX4_CMD_NATIVE); + + mlx4_free_cmd_mailbox(dev, if_stat_mailbox); + return err; +} + +u8 mlx4_get_default_counter_index(struct mlx4_dev *dev, int slave, int port) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct counter_index *new_counter_index; + + if (dev->caps.port_type[port] == MLX4_PORT_TYPE_IB) { + mlx4_dbg(dev, "%s: return counter index %d for slave %d port (MLX4_PORT_TYPE_IB) %d\n", + __func__, MLX4_SINK_COUNTER_INDEX, slave, port); + return (u8)MLX4_SINK_COUNTER_INDEX; + } + + mutex_lock(&priv->counters_table.mutex); + if (slave == 0) { + new_counter_index = list_entry(priv->counters_table.global_port_list[port - 1].next, + struct counter_index, + list); + } else { + new_counter_index = list_entry(priv->counters_table.vf_list[slave - 1][port - 1].next, + struct counter_index, + list); + } + mutex_unlock(&priv->counters_table.mutex); + + mlx4_dbg(dev, "%s: return counter index %d for slave %d port %d\n", + __func__, new_counter_index->index, slave, port); + + + return (u8)new_counter_index->index; +} + +int mlx4_get_vport_ethtool_stats(struct mlx4_dev *dev, int port, + struct mlx4_en_vport_stats *vport_stats, + int reset) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_cmd_mailbox *if_stat_mailbox = NULL; + union mlx4_counter *counter; + int err = 0; + u32 if_stat_in_mod; + struct counter_index *vport, *tmp_vport; + + if (!vport_stats) + return -EINVAL; + + if_stat_mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(if_stat_mailbox)) { + err = PTR_ERR(if_stat_mailbox); + return err; + } + + mutex_lock(&priv->counters_table.mutex); + list_for_each_entry_safe(vport, tmp_vport, + &priv->counters_table.global_port_list[port - 1], + list) { + if (vport->index == MLX4_SINK_COUNTER_INDEX) + continue; + + memset(if_stat_mailbox->buf, 0, sizeof(union mlx4_counter)); + if_stat_in_mod = (vport->index & 0xff) | ((reset & 1) << 31); + err = mlx4_cmd_box(dev, 0, if_stat_mailbox->dma, + if_stat_in_mod, 0, + MLX4_CMD_QUERY_IF_STAT, + MLX4_CMD_TIME_CLASS_C, + MLX4_CMD_NATIVE); + if (err) { + mlx4_dbg(dev, "%s: failed to read statistics for counter index %d\n", + __func__, vport->index); + goto if_stat_out; + } + counter = (union mlx4_counter *)if_stat_mailbox->buf; + if ((counter->control.cnt_mode & 0xf) == 1) { + vport_stats->rx_broadcast_packets += be64_to_cpu(counter->ext.counters[0].IfRxBroadcastFrames); + vport_stats->rx_unicast_packets += be64_to_cpu(counter->ext.counters[0].IfRxUnicastFrames); + vport_stats->rx_multicast_packets += be64_to_cpu(counter->ext.counters[0].IfRxMulticastFrames); + vport_stats->tx_broadcast_packets += be64_to_cpu(counter->ext.counters[0].IfTxBroadcastFrames); + vport_stats->tx_unicast_packets += be64_to_cpu(counter->ext.counters[0].IfTxUnicastFrames); + vport_stats->tx_multicast_packets += be64_to_cpu(counter->ext.counters[0].IfTxMulticastFrames); + vport_stats->rx_broadcast_bytes += be64_to_cpu(counter->ext.counters[0].IfRxBroadcastOctets); + vport_stats->rx_unicast_bytes += be64_to_cpu(counter->ext.counters[0].IfRxUnicastOctets); + vport_stats->rx_multicast_bytes += be64_to_cpu(counter->ext.counters[0].IfRxMulticastOctets); + vport_stats->tx_broadcast_bytes += be64_to_cpu(counter->ext.counters[0].IfTxBroadcastOctets); + vport_stats->tx_unicast_bytes += be64_to_cpu(counter->ext.counters[0].IfTxUnicastOctets); + vport_stats->tx_multicast_bytes += be64_to_cpu(counter->ext.counters[0].IfTxMulticastOctets); + vport_stats->rx_errors += be64_to_cpu(counter->ext.counters[0].IfRxErrorFrames); + vport_stats->rx_dropped += be64_to_cpu(counter->ext.counters[0].IfRxNoBufferFrames); + vport_stats->tx_errors += be64_to_cpu(counter->ext.counters[0].IfTxDroppedFrames); + } + } + +if_stat_out: + mutex_unlock(&priv->counters_table.mutex); + mlx4_free_cmd_mailbox(dev, if_stat_mailbox); + + return err; +} +EXPORT_SYMBOL_GPL(mlx4_get_vport_ethtool_stats); + static int mlx4_setup_hca(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); @@ -1862,11 +2721,21 @@ static int mlx4_setup_hca(struct mlx4_dev *dev) goto err_xrcd_table_free; } + if (!mlx4_is_slave(dev)) { + err = mlx4_init_mcg_table(dev); + if (err) { + mlx4_err(dev, "Failed to initialize " + "multicast group table (err=%d), aborting.\n", + err); + goto err_mr_table_free; + } + } + err = mlx4_init_eq_table(dev); if (err) { mlx4_err(dev, "Failed to initialize " "event queue table (err=%d), aborting.\n", err); - goto err_mr_table_free; + goto err_mcg_table_free; } err = mlx4_cmd_use_events(dev); @@ -1917,22 +2786,14 @@ static int mlx4_setup_hca(struct mlx4_dev *dev) goto err_srq_table_free; } - if (!mlx4_is_slave(dev)) { - err = mlx4_init_mcg_table(dev); - if (err) { - mlx4_err(dev, "Failed to initialize " - "multicast group table (err=%d), aborting.\n", - err); - goto err_qp_table_free; - } - - err = mlx4_init_counters_table(dev); - if (err && err != -ENOENT) { - mlx4_err(dev, "Failed to initialize counters table (err=%d), " - "aborting.\n", err); - goto err_mcg_table_free; - } + err = mlx4_init_counters_table(dev); + if (err && err != -ENOENT) { + mlx4_err(dev, "Failed to initialize counters table (err=%d), " + "aborting.\n", err); + goto err_qp_table_free; + } + if (!mlx4_is_slave(dev)) { for (port = 1; port <= dev->caps.num_ports; port++) { ib_port_default_caps = 0; err = mlx4_get_port_ib_caps(dev, port, @@ -1954,10 +2815,7 @@ static int mlx4_setup_hca(struct mlx4_dev *dev) } } - if (mlx4_is_mfunc(dev)) - dev->caps.port_ib_mtu[port] = IB_MTU_2048; - else - dev->caps.port_ib_mtu[port] = IB_MTU_4096; + dev->caps.port_ib_mtu[port] = IB_MTU_4096; err = mlx4_SET_PORT(dev, port, mlx4_is_master(dev) ? dev->caps.pkey_table_len[port] : -1); @@ -1974,9 +2832,6 @@ static int mlx4_setup_hca(struct mlx4_dev *dev) err_counters_table_free: mlx4_cleanup_counters_table(dev); -err_mcg_table_free: - mlx4_cleanup_mcg_table(dev); - err_qp_table_free: mlx4_cleanup_qp_table(dev); @@ -1992,6 +2847,10 @@ err_cmd_poll: err_eq_table_free: mlx4_cleanup_eq_table(dev); +err_mcg_table_free: + if (!mlx4_is_slave(dev)) + mlx4_cleanup_mcg_table(dev); + err_mr_table_free: mlx4_cleanup_mr_table(dev); @@ -2026,6 +2885,9 @@ static void mlx4_enable_msi_x(struct mlx4_dev *dev) nreq = min_t(int, dev->caps.num_eqs - dev->caps.reserved_eqs, nreq); + if (msi_x > 1 && !mlx4_is_mfunc(dev)) + nreq = min_t(int, nreq, msi_x); + entries = kcalloc(nreq, sizeof *entries, GFP_KERNEL); if (!entries) goto no_msi; @@ -2237,6 +3099,7 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data) struct mlx4_dev *dev; int err; int port; + int nvfs, prb_vf; pr_info(DRV_NAME ": Initializing %s\n", pci_name(pdev)); @@ -2246,13 +3109,16 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data) "aborting.\n"); return err; } - if (num_vfs > MLX4_MAX_NUM_VF) { + + mlx4_get_val(num_vfs.dbdf2val.tbl, pci_physfn(pdev), 0, &nvfs); + mlx4_get_val(probe_vf.dbdf2val.tbl, pci_physfn(pdev), 0, &prb_vf); + if (nvfs > MLX4_MAX_NUM_VF) { dev_err(&pdev->dev, "There are more VF's (%d) than allowed(%d)\n", - num_vfs, MLX4_MAX_NUM_VF); + nvfs, MLX4_MAX_NUM_VF); return -EINVAL; } - if (num_vfs < 0) { + if (nvfs < 0) { dev_err(&pdev->dev, "num_vfs module parameter cannot be negative\n"); return -EINVAL; } @@ -2315,6 +3181,7 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data) dev = &priv->dev; dev->pdev = pdev; + INIT_LIST_HEAD(&priv->dev_list); INIT_LIST_HEAD(&priv->ctx_list); spin_lock_init(&priv->ctx_lock); @@ -2332,7 +3199,7 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data) if (pci_dev_data & MLX4_PCI_DEV_IS_VF) { /* When acting as pf, we normally skip vfs unless explicitly * requested to probe them. */ - if (num_vfs && extended_func_num(pdev) > probe_vf) { + if (nvfs && extended_func_num(pdev) > prb_vf) { mlx4_warn(dev, "Skipping virtual function:%d\n", extended_func_num(pdev)); err = -ENODEV; @@ -2356,9 +3223,9 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data) } } - if (num_vfs) { - mlx4_warn(dev, "Enabling SR-IOV with %d VFs\n", num_vfs); - err = pci_enable_sriov(pdev, num_vfs); + if (nvfs) { + mlx4_warn(dev, "Enabling SR-IOV with %d VFs\n", nvfs); + err = pci_enable_sriov(pdev, nvfs); if (err) { mlx4_err(dev, "Failed to enable SR-IOV, continuing without SR-IOV (err = %d).\n", err); @@ -2367,7 +3234,7 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data) mlx4_warn(dev, "Running in master mode\n"); dev->flags |= MLX4_FLAG_SRIOV | MLX4_FLAG_MASTER; - dev->num_vfs = num_vfs; + dev->num_vfs = nvfs; } } @@ -2481,10 +3348,9 @@ slave_start: if (err) goto err_port; - err = mlx4_sense_init(dev); - if (err) - goto err_port; + mlx4_request_modules(dev); + mlx4_sense_init(dev); mlx4_start_sense(dev); priv->pci_dev_data = pci_dev_data; @@ -2497,12 +3363,12 @@ err_port: mlx4_cleanup_port_info(&priv->port[port]); mlx4_cleanup_counters_table(dev); - mlx4_cleanup_mcg_table(dev); mlx4_cleanup_qp_table(dev); mlx4_cleanup_srq_table(dev); mlx4_cleanup_cq_table(dev); mlx4_cmd_use_polling(dev); mlx4_cleanup_eq_table(dev); + mlx4_cleanup_mcg_table(dev); mlx4_cleanup_mr_table(dev); mlx4_cleanup_xrcd_table(dev); mlx4_cleanup_pd_table(dev); @@ -2516,8 +3382,10 @@ err_free_eq: mlx4_free_eq_table(dev); err_master_mfunc: - if (mlx4_is_master(dev)) + if (mlx4_is_master(dev)) { + mlx4_free_resource_tracker(dev, RES_TR_FREE_STRUCTS_ONLY); mlx4_multi_func_cleanup(dev); + } if (mlx4_is_slave(dev)) { kfree(dev->caps.qp0_tunnel); @@ -2579,7 +3447,7 @@ static void mlx4_remove_one(struct pci_dev *pdev) if (mlx4_how_many_lives_vf(dev)) mlx4_err(dev, "Removing PF when there are assigned VF's !!!\n"); } - mlx4_sense_cleanup(dev); + mlx4_stop_sense(dev); mlx4_unregister_device(dev); for (p = 1; p <= dev->caps.num_ports; p++) { @@ -2592,12 +3460,12 @@ static void mlx4_remove_one(struct pci_dev *pdev) RES_TR_FREE_SLAVES_ONLY); mlx4_cleanup_counters_table(dev); - mlx4_cleanup_mcg_table(dev); mlx4_cleanup_qp_table(dev); mlx4_cleanup_srq_table(dev); mlx4_cleanup_cq_table(dev); mlx4_cmd_use_polling(dev); mlx4_cleanup_eq_table(dev); + mlx4_cleanup_mcg_table(dev); mlx4_cleanup_mr_table(dev); mlx4_cleanup_xrcd_table(dev); mlx4_cleanup_pd_table(dev); @@ -2641,15 +3509,48 @@ static void mlx4_remove_one(struct pci_dev *pdev) } } +static int restore_current_port_types(struct mlx4_dev *dev, + enum mlx4_port_type *types, + enum mlx4_port_type *poss_types) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + int err, i; + + mlx4_stop_sense(dev); + mutex_lock(&priv->port_mutex); + for (i = 0; i < dev->caps.num_ports; i++) + dev->caps.possible_type[i + 1] = poss_types[i]; + err = mlx4_change_port_types(dev, types); + mlx4_start_sense(dev); + mutex_unlock(&priv->port_mutex); + return err; +} + int mlx4_restart_one(struct pci_dev *pdev) { struct mlx4_dev *dev = pci_get_drvdata(pdev); struct mlx4_priv *priv = mlx4_priv(dev); - int pci_dev_data; + enum mlx4_port_type curr_type[MLX4_MAX_PORTS]; + enum mlx4_port_type poss_type[MLX4_MAX_PORTS]; + int pci_dev_data, err, i; pci_dev_data = priv->pci_dev_data; + for (i = 0; i < dev->caps.num_ports; i++) { + curr_type[i] = dev->caps.port_type[i + 1]; + poss_type[i] = dev->caps.possible_type[i + 1]; + } + mlx4_remove_one(pdev); - return __mlx4_init_one(pdev, pci_dev_data); + err = __mlx4_init_one(pdev, pci_dev_data); + if (err) + return err; + + dev = pci_get_drvdata(pdev); + err = restore_current_port_types(dev, curr_type, poss_type); + if (err) + mlx4_err(dev, "mlx4_restart_one: could not restore original port types (%d)\n", + err); + return 0; } static DEFINE_PCI_DEVICE_TABLE(mlx4_pci_table) = { @@ -2725,16 +3626,6 @@ static int suspend(struct pci_dev *pdev, pm_message_t state) { mlx4_remove_one(pdev); - if (mlx4_log_num_mgm_entry_size != -1 && - (mlx4_log_num_mgm_entry_size < MLX4_MIN_MGM_LOG_ENTRY_SIZE || - mlx4_log_num_mgm_entry_size > MLX4_MAX_MGM_LOG_ENTRY_SIZE)) { - pr_warning("mlx4_core: mlx4_log_num_mgm_entry_size (%d) not " - "in legal range (-1 or %d..%d)\n", - mlx4_log_num_mgm_entry_size, - MLX4_MIN_MGM_LOG_ENTRY_SIZE, - MLX4_MAX_MGM_LOG_ENTRY_SIZE); - return -1; - } return 0; } @@ -2745,16 +3636,47 @@ static int resume(struct pci_dev *pdev) static struct pci_driver mlx4_driver = { .name = DRV_NAME, - .id_table = (struct pci_device_id*)mlx4_pci_table, + .id_table = mlx4_pci_table, .probe = mlx4_init_one, .remove = __devexit_p(mlx4_remove_one), .suspend = suspend, .resume = resume, - .err_handler = (struct pci_error_handlers*)&mlx4_err_handler, + .err_handler = &mlx4_err_handler, }; static int __init mlx4_verify_params(void) { + int status; + + status = update_defaults(&port_type_array); + if (status == INVALID_STR) { + if (mlx4_fill_dbdf2val_tbl(&port_type_array.dbdf2val)) + return -1; + } else if (status == INVALID_DATA) { + return -1; + } + + status = update_defaults(&num_vfs); + if (status == INVALID_STR) { + if (mlx4_fill_dbdf2val_tbl(&num_vfs.dbdf2val)) + return -1; + } else if (status == INVALID_DATA) { + return -1; + } + + status = update_defaults(&probe_vf); + if (status == INVALID_STR) { + if (mlx4_fill_dbdf2val_tbl(&probe_vf.dbdf2val)) + return -1; + } else if (status == INVALID_DATA) { + return -1; + } + + if (msi_x < 0) { + pr_warn("mlx4_core: bad msi_x: %d\n", msi_x); + return -1; + } + if ((log_num_mac < 0) || (log_num_mac > 7)) { pr_warning("mlx4_core: bad num_mac: %d\n", log_num_mac); return -1; @@ -2772,12 +3694,6 @@ static int __init mlx4_verify_params(void) return -1; } - /* Check if module param for ports type has legal combination */ - if (port_type_array[0] == false && port_type_array[1] == true) { - pr_warning("mlx4_core: module parameter configuration ETH/IB is not supported. Switching to default configuration IB/IB\n"); - port_type_array[0] = true; - } - if (mlx4_log_num_mgm_entry_size != -1 && (mlx4_log_num_mgm_entry_size < MLX4_MIN_MGM_LOG_ENTRY_SIZE || mlx4_log_num_mgm_entry_size > MLX4_MAX_MGM_LOG_ENTRY_SIZE)) { @@ -2813,15 +3729,16 @@ static int __init mlx4_verify_params(void) return -1; } - if (mod_param_profile.num_mtt && mod_param_profile.num_mtt < 15) { + if (mod_param_profile.num_mtt_segs && + mod_param_profile.num_mtt_segs < 15) { pr_warning("mlx4_core: too low log_num_mtt: %d\n", - mod_param_profile.num_mtt); + mod_param_profile.num_mtt_segs); return -1; } - if (mod_param_profile.num_mtt > MLX4_MAX_LOG_NUM_MTT) { + if (mod_param_profile.num_mtt_segs > MLX4_MAX_LOG_NUM_MTT) { pr_warning("mlx4_core: too high log_num_mtt: %d\n", - mod_param_profile.num_mtt); + mod_param_profile.num_mtt_segs); return -1; } return 0; @@ -2844,10 +3761,18 @@ static int __init mlx4_init(void) sys_tune_init(); ret = pci_register_driver(&mlx4_driver); - if (ret < 0 && enable_sys_tune) + if (ret < 0) + goto err; + + return 0; + +err: + if (enable_sys_tune) sys_tune_fini(); - return ret < 0 ? ret : 0; + destroy_workqueue(mlx4_wq); + + return ret; } static void __exit mlx4_cleanup(void) @@ -2862,17 +3787,16 @@ static void __exit mlx4_cleanup(void) module_init_order(mlx4_init, SI_ORDER_MIDDLE); module_exit(mlx4_cleanup); -#undef MODULE_VERSION #include <sys/module.h> static int mlx4_evhand(module_t mod, int event, void *arg) { - return (0); + return (0); } static moduledata_t mlx4_mod = { - .name = "mlx4", - .evhand = mlx4_evhand, + .name = "mlx4", + .evhand = mlx4_evhand, }; MODULE_VERSION(mlx4, 1); DECLARE_MODULE(mlx4, mlx4_mod, SI_SUB_OFED_PREINIT, SI_ORDER_ANY); diff --git a/sys/ofed/drivers/net/mlx4/mcg.c b/sys/ofed/drivers/net/mlx4/mcg.c index 60ac951..52c52f8 100644 --- a/sys/ofed/drivers/net/mlx4/mcg.c +++ b/sys/ofed/drivers/net/mlx4/mcg.c @@ -1,6 +1,6 @@ /* * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. + * Copyright (c) 2007, 2008, 2014 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -32,8 +32,10 @@ */ #include <linux/string.h> +#include <linux/etherdevice.h> #include <linux/mlx4/cmd.h> +#include <linux/module.h> #include "mlx4.h" @@ -124,9 +126,14 @@ static struct mlx4_promisc_qp *get_promisc_qp(struct mlx4_dev *dev, u8 port, enum mlx4_steer_type steer, u32 qpn) { - struct mlx4_steer *s_steer = &mlx4_priv(dev)->steer[port - 1]; + struct mlx4_steer *s_steer; struct mlx4_promisc_qp *pqp; + if (port < 1 || port > dev->caps.num_ports) + return NULL; + + s_steer = &mlx4_priv(dev)->steer[port - 1]; + list_for_each_entry(pqp, &s_steer->promisc_qps[steer], list) { if (pqp->qpn == qpn) return pqp; @@ -153,6 +160,9 @@ static int new_steering_entry(struct mlx4_dev *dev, u8 port, u32 prot; int err; + if (port < 1 || port > dev->caps.num_ports) + return -EINVAL; + s_steer = &mlx4_priv(dev)->steer[port - 1]; new_entry = kzalloc(sizeof *new_entry, GFP_KERNEL); if (!new_entry) @@ -237,6 +247,9 @@ static int existing_steering_entry(struct mlx4_dev *dev, u8 port, struct mlx4_promisc_qp *pqp; struct mlx4_promisc_qp *dqp; + if (port < 1 || port > dev->caps.num_ports) + return -EINVAL; + s_steer = &mlx4_priv(dev)->steer[port - 1]; pqp = get_promisc_qp(dev, port, steer, qpn); @@ -258,7 +271,7 @@ static int existing_steering_entry(struct mlx4_dev *dev, u8 port, * we need to add it as a duplicate to this entry * for future references */ list_for_each_entry(dqp, &entry->duplicates, list) { - if (qpn == pqp->qpn) + if (qpn == dqp->qpn) return 0; /* qp is already duplicated */ } @@ -282,6 +295,9 @@ static bool check_duplicate_entry(struct mlx4_dev *dev, u8 port, struct mlx4_steer_index *tmp_entry, *entry = NULL; struct mlx4_promisc_qp *dqp, *tmp_dqp; + if (port < 1 || port > dev->caps.num_ports) + return NULL; + s_steer = &mlx4_priv(dev)->steer[port - 1]; /* if qp is not promisc, it cannot be duplicated */ @@ -309,20 +325,24 @@ static bool check_duplicate_entry(struct mlx4_dev *dev, u8 port, return true; } -/* I a steering entry contains only promisc QPs, it can be removed. */ -static bool can_remove_steering_entry(struct mlx4_dev *dev, u8 port, +/* + * returns true if all the QPs != tqpn contained in this entry + * are Promisc QPs. return false otherwise. + */ +static bool promisc_steering_entry(struct mlx4_dev *dev, u8 port, enum mlx4_steer_type steer, - unsigned int index, u32 tqpn) + unsigned int index, u32 tqpn, u32 *members_count) { struct mlx4_steer *s_steer; struct mlx4_cmd_mailbox *mailbox; struct mlx4_mgm *mgm; - struct mlx4_steer_index *entry = NULL, *tmp_entry; - u32 qpn; - u32 members_count; + u32 m_count; bool ret = false; int i; + if (port < 1 || port > dev->caps.num_ports) + return false; + s_steer = &mlx4_priv(dev)->steer[port - 1]; mailbox = mlx4_alloc_cmd_mailbox(dev); @@ -332,15 +352,42 @@ static bool can_remove_steering_entry(struct mlx4_dev *dev, u8 port, if (mlx4_READ_ENTRY(dev, index, mailbox)) goto out; - members_count = be32_to_cpu(mgm->members_count) & 0xffffff; - for (i = 0; i < members_count; i++) { - qpn = be32_to_cpu(mgm->qp[i]) & MGM_QPN_MASK; + m_count = be32_to_cpu(mgm->members_count) & 0xffffff; + if (members_count) + *members_count = m_count; + + for (i = 0; i < m_count; i++) { + u32 qpn = be32_to_cpu(mgm->qp[i]) & MGM_QPN_MASK; if (!get_promisc_qp(dev, port, steer, qpn) && qpn != tqpn) { /* the qp is not promisc, the entry can't be removed */ goto out; } } - /* All the qps currently registered for this entry are promiscuous, + ret = true; +out: + mlx4_free_cmd_mailbox(dev, mailbox); + return ret; +} + +/* IF a steering entry contains only promisc QPs, it can be removed. */ +static bool can_remove_steering_entry(struct mlx4_dev *dev, u8 port, + enum mlx4_steer_type steer, + unsigned int index, u32 tqpn) +{ + struct mlx4_steer *s_steer; + struct mlx4_steer_index *entry = NULL, *tmp_entry; + u32 members_count; + bool ret = false; + + if (port < 1 || port > dev->caps.num_ports) + return NULL; + + s_steer = &mlx4_priv(dev)->steer[port - 1]; + + if (!promisc_steering_entry(dev, port, steer, index, tqpn, &members_count)) + goto out; + + /* All the qps currently registered for this entry are promiscuous, * Checking for duplicates */ ret = true; list_for_each_entry_safe(entry, tmp_entry, &s_steer->steer_entries[steer], list) { @@ -369,7 +416,6 @@ static bool can_remove_steering_entry(struct mlx4_dev *dev, u8 port, } out: - mlx4_free_cmd_mailbox(dev, mailbox); return ret; } @@ -389,6 +435,9 @@ static int add_promisc_qp(struct mlx4_dev *dev, u8 port, int err; struct mlx4_priv *priv = mlx4_priv(dev); + if (port < 1 || port > dev->caps.num_ports) + return -EINVAL; + s_steer = &mlx4_priv(dev)->steer[port - 1]; mutex_lock(&priv->mcg_table.mutex); @@ -412,43 +461,45 @@ static int add_promisc_qp(struct mlx4_dev *dev, u8 port, } mgm = mailbox->buf; - /* the promisc qp needs to be added for each one of the steering - * entries, if it already exists, needs to be added as a duplicate - * for this entry */ - list_for_each_entry(entry, &s_steer->steer_entries[steer], list) { - err = mlx4_READ_ENTRY(dev, entry->index, mailbox); - if (err) - goto out_mailbox; + if (!(mlx4_is_mfunc(dev) && steer == MLX4_UC_STEER)) { + /* the promisc qp needs to be added for each one of the steering + * entries, if it already exists, needs to be added as a duplicate + * for this entry */ + list_for_each_entry(entry, &s_steer->steer_entries[steer], list) { + err = mlx4_READ_ENTRY(dev, entry->index, mailbox); + if (err) + goto out_mailbox; - members_count = be32_to_cpu(mgm->members_count) & 0xffffff; - prot = be32_to_cpu(mgm->members_count) >> 30; - found = false; - for (i = 0; i < members_count; i++) { - if ((be32_to_cpu(mgm->qp[i]) & MGM_QPN_MASK) == qpn) { - /* Entry already exists, add to duplicates */ - dqp = kmalloc(sizeof *dqp, GFP_KERNEL); - if (!dqp) { + members_count = be32_to_cpu(mgm->members_count) & 0xffffff; + prot = be32_to_cpu(mgm->members_count) >> 30; + found = false; + for (i = 0; i < members_count; i++) { + if ((be32_to_cpu(mgm->qp[i]) & MGM_QPN_MASK) == qpn) { + /* Entry already exists, add to duplicates */ + dqp = kmalloc(sizeof *dqp, GFP_KERNEL); + if (!dqp) { + err = -ENOMEM; + goto out_mailbox; + } + dqp->qpn = qpn; + list_add_tail(&dqp->list, &entry->duplicates); + found = true; + } + } + if (!found) { + /* Need to add the qpn to mgm */ + if (members_count == dev->caps.num_qp_per_mgm) { + /* entry is full */ err = -ENOMEM; goto out_mailbox; } - dqp->qpn = qpn; - list_add_tail(&dqp->list, &entry->duplicates); - found = true; + mgm->qp[members_count++] = cpu_to_be32(qpn & MGM_QPN_MASK); + mgm->members_count = cpu_to_be32(members_count | (prot << 30)); + err = mlx4_WRITE_ENTRY(dev, entry->index, mailbox); + if (err) + goto out_mailbox; } } - if (!found) { - /* Need to add the qpn to mgm */ - if (members_count == dev->caps.num_qp_per_mgm) { - /* entry is full */ - err = -ENOMEM; - goto out_mailbox; - } - mgm->qp[members_count++] = cpu_to_be32(qpn & MGM_QPN_MASK); - mgm->members_count = cpu_to_be32(members_count | (prot << 30)); - err = mlx4_WRITE_ENTRY(dev, entry->index, mailbox); - if (err) - goto out_mailbox; - } } /* add the new qpn to list of promisc qps */ @@ -492,7 +543,7 @@ static int remove_promisc_qp(struct mlx4_dev *dev, u8 port, struct mlx4_steer *s_steer; struct mlx4_cmd_mailbox *mailbox; struct mlx4_mgm *mgm; - struct mlx4_steer_index *entry; + struct mlx4_steer_index *entry, *tmp_entry; struct mlx4_promisc_qp *pqp; struct mlx4_promisc_qp *dqp; u32 members_count; @@ -501,6 +552,9 @@ static int remove_promisc_qp(struct mlx4_dev *dev, u8 port, int i, loc = -1; int err; + if (port < 1 || port > dev->caps.num_ports) + return -EINVAL; + s_steer = &mlx4_priv(dev)->steer[port - 1]; mutex_lock(&priv->mcg_table.mutex); @@ -533,49 +587,58 @@ static int remove_promisc_qp(struct mlx4_dev *dev, u8 port, if (err) goto out_mailbox; - /* remove the qp from all the steering entries*/ - list_for_each_entry(entry, &s_steer->steer_entries[steer], list) { - found = false; - list_for_each_entry(dqp, &entry->duplicates, list) { - if (dqp->qpn == qpn) { - found = true; - break; - } - } - if (found) { - /* a duplicate, no need to change the mgm, - * only update the duplicates list */ - list_del(&dqp->list); - kfree(dqp); - } else { - err = mlx4_READ_ENTRY(dev, entry->index, mailbox); - if (err) - goto out_mailbox; - members_count = be32_to_cpu(mgm->members_count) & 0xffffff; - for (i = 0; i < members_count; ++i) - if ((be32_to_cpu(mgm->qp[i]) & MGM_QPN_MASK) == qpn) { - loc = i; + if (!(mlx4_is_mfunc(dev) && steer == MLX4_UC_STEER)) { + /* remove the qp from all the steering entries*/ + list_for_each_entry_safe(entry, tmp_entry, &s_steer->steer_entries[steer], list) { + found = false; + list_for_each_entry(dqp, &entry->duplicates, list) { + if (dqp->qpn == qpn) { + found = true; break; } - - if (loc < 0) { - mlx4_err(dev, "QP %06x wasn't found in entry %d\n", - qpn, entry->index); - err = -EINVAL; - goto out_mailbox; } + if (found) { + /* a duplicate, no need to change the mgm, + * only update the duplicates list */ + list_del(&dqp->list); + kfree(dqp); + } else { + err = mlx4_READ_ENTRY(dev, entry->index, mailbox); + if (err) + goto out_mailbox; + members_count = be32_to_cpu(mgm->members_count) & 0xffffff; + if (!members_count) { + mlx4_warn(dev, "QP %06x wasn't found in entry %x mcount=0." + " deleting entry...\n", qpn, entry->index); + list_del(&entry->list); + kfree(entry); + continue; + } - /* copy the last QP in this MGM over removed QP */ - mgm->qp[loc] = mgm->qp[members_count - 1]; - mgm->qp[members_count - 1] = 0; - mgm->members_count = cpu_to_be32(--members_count | - (MLX4_PROT_ETH << 30)); + for (i = 0; i < members_count; ++i) + if ((be32_to_cpu(mgm->qp[i]) & MGM_QPN_MASK) == qpn) { + loc = i; + break; + } - err = mlx4_WRITE_ENTRY(dev, entry->index, mailbox); - if (err) + if (loc < 0) { + mlx4_err(dev, "QP %06x wasn't found in entry %d\n", + qpn, entry->index); + err = -EINVAL; goto out_mailbox; - } + } + /* copy the last QP in this MGM over removed QP */ + mgm->qp[loc] = mgm->qp[members_count - 1]; + mgm->qp[members_count - 1] = 0; + mgm->members_count = cpu_to_be32(--members_count | + (MLX4_PROT_ETH << 30)); + + err = mlx4_WRITE_ENTRY(dev, entry->index, mailbox); + if (err) + goto out_mailbox; + } + } } out_mailbox: @@ -661,26 +724,37 @@ static int find_entry(struct mlx4_dev *dev, u8 port, return err; } +static const u8 __promisc_mode[] = { + [MLX4_FS_REGULAR] = 0x0, + [MLX4_FS_ALL_DEFAULT] = 0x1, + [MLX4_FS_MC_DEFAULT] = 0x3, + [MLX4_FS_UC_SNIFFER] = 0x4, + [MLX4_FS_MC_SNIFFER] = 0x5, +}; + +int map_sw_to_hw_steering_mode(struct mlx4_dev *dev, + enum mlx4_net_trans_promisc_mode flow_type) +{ + if (flow_type >= MLX4_FS_MODE_NUM || flow_type < 0) { + mlx4_err(dev, "Invalid flow type. type = %d\n", flow_type); + return -EINVAL; + } + return __promisc_mode[flow_type]; +} +EXPORT_SYMBOL_GPL(map_sw_to_hw_steering_mode); + static void trans_rule_ctrl_to_hw(struct mlx4_net_trans_rule *ctrl, struct mlx4_net_trans_rule_hw_ctrl *hw) { - static const u8 __promisc_mode[] = { - [MLX4_FS_REGULAR] = 0x0, - [MLX4_FS_ALL_DEFAULT] = 0x1, - [MLX4_FS_MC_DEFAULT] = 0x3, - [MLX4_FS_UC_SNIFFER] = 0x4, - [MLX4_FS_MC_SNIFFER] = 0x5, - }; - - u32 dw = 0; - - dw = ctrl->queue_mode == MLX4_NET_TRANS_Q_LIFO ? 1 : 0; - dw |= ctrl->exclusive ? (1 << 2) : 0; - dw |= ctrl->allow_loopback ? (1 << 3) : 0; - dw |= __promisc_mode[ctrl->promisc_mode] << 8; - dw |= ctrl->priority << 16; - - hw->ctrl = cpu_to_be32(dw); + u8 flags = 0; + + flags = ctrl->queue_mode == MLX4_NET_TRANS_Q_LIFO ? 1 : 0; + flags |= ctrl->exclusive ? (1 << 2) : 0; + flags |= ctrl->allow_loopback ? (1 << 3) : 0; + + hw->flags = flags; + hw->type = __promisc_mode[ctrl->promisc_mode]; + hw->prio = cpu_to_be16(ctrl->priority); hw->port = ctrl->port; hw->qpn = cpu_to_be32(ctrl->qpn); } @@ -694,29 +768,51 @@ const u16 __sw_id_hw[] = { [MLX4_NET_TRANS_RULE_ID_UDP] = 0xE006 }; +int map_sw_to_hw_steering_id(struct mlx4_dev *dev, + enum mlx4_net_trans_rule_id id) +{ + if (id >= MLX4_NET_TRANS_RULE_NUM || id < 0) { + mlx4_err(dev, "Invalid network rule id. id = %d\n", id); + return -EINVAL; + } + return __sw_id_hw[id]; +} +EXPORT_SYMBOL_GPL(map_sw_to_hw_steering_id); + +static const int __rule_hw_sz[] = { + [MLX4_NET_TRANS_RULE_ID_ETH] = + sizeof(struct mlx4_net_trans_rule_hw_eth), + [MLX4_NET_TRANS_RULE_ID_IB] = + sizeof(struct mlx4_net_trans_rule_hw_ib), + [MLX4_NET_TRANS_RULE_ID_IPV6] = 0, + [MLX4_NET_TRANS_RULE_ID_IPV4] = + sizeof(struct mlx4_net_trans_rule_hw_ipv4), + [MLX4_NET_TRANS_RULE_ID_TCP] = + sizeof(struct mlx4_net_trans_rule_hw_tcp_udp), + [MLX4_NET_TRANS_RULE_ID_UDP] = + sizeof(struct mlx4_net_trans_rule_hw_tcp_udp) +}; + +int hw_rule_sz(struct mlx4_dev *dev, + enum mlx4_net_trans_rule_id id) +{ + if (id >= MLX4_NET_TRANS_RULE_NUM || id < 0) { + mlx4_err(dev, "Invalid network rule id. id = %d\n", id); + return -EINVAL; + } + + return __rule_hw_sz[id]; +} +EXPORT_SYMBOL_GPL(hw_rule_sz); + static int parse_trans_rule(struct mlx4_dev *dev, struct mlx4_spec_list *spec, struct _rule_hw *rule_hw) { - static const size_t __rule_hw_sz[] = { - [MLX4_NET_TRANS_RULE_ID_ETH] = - sizeof(struct mlx4_net_trans_rule_hw_eth), - [MLX4_NET_TRANS_RULE_ID_IB] = - sizeof(struct mlx4_net_trans_rule_hw_ib), - [MLX4_NET_TRANS_RULE_ID_IPV6] = 0, - [MLX4_NET_TRANS_RULE_ID_IPV4] = - sizeof(struct mlx4_net_trans_rule_hw_ipv4), - [MLX4_NET_TRANS_RULE_ID_TCP] = - sizeof(struct mlx4_net_trans_rule_hw_tcp_udp), - [MLX4_NET_TRANS_RULE_ID_UDP] = - sizeof(struct mlx4_net_trans_rule_hw_tcp_udp) - }; - if (spec->id >= MLX4_NET_TRANS_RULE_NUM) { - mlx4_err(dev, "Invalid network rule id. id = %d\n", spec->id); + if (hw_rule_sz(dev, spec->id) < 0) return -EINVAL; - } - memset(rule_hw, 0, __rule_hw_sz[spec->id]); + memset(rule_hw, 0, hw_rule_sz(dev, spec->id)); rule_hw->id = cpu_to_be16(__sw_id_hw[spec->id]); - rule_hw->size = __rule_hw_sz[spec->id] >> 2; + rule_hw->size = hw_rule_sz(dev, spec->id) >> 2; switch (spec->id) { case MLX4_NET_TRANS_RULE_ID_ETH: @@ -730,12 +826,12 @@ static int parse_trans_rule(struct mlx4_dev *dev, struct mlx4_spec_list *spec, rule_hw->eth.ether_type_enable = 1; rule_hw->eth.ether_type = spec->eth.ether_type; } - rule_hw->eth.vlan_id = spec->eth.vlan_id; - rule_hw->eth.vlan_id_msk = spec->eth.vlan_id_msk; + rule_hw->eth.vlan_tag = spec->eth.vlan_id; + rule_hw->eth.vlan_tag_msk = spec->eth.vlan_id_msk; break; case MLX4_NET_TRANS_RULE_ID_IB: - rule_hw->ib.r_u_qpn = spec->ib.r_u_qpn; + rule_hw->ib.l3_qpn = spec->ib.l3_qpn; rule_hw->ib.qpn_mask = spec->ib.qpn_msk; memcpy(&rule_hw->ib.dst_gid, &spec->ib.dst_gid, 16); memcpy(&rule_hw->ib.dst_gid_msk, &spec->ib.dst_gid_msk, 16); @@ -886,7 +982,7 @@ int mlx4_flow_detach(struct mlx4_dev *dev, u64 reg_id) err = mlx4_QP_FLOW_STEERING_DETACH(dev, reg_id); if (err) mlx4_err(dev, "Fail to detach network rule. registration id = 0x%llx\n", - (long long)reg_id); + (unsigned long long)reg_id); return err; } EXPORT_SYMBOL_GPL(mlx4_flow_detach); @@ -977,8 +1073,9 @@ int mlx4_qp_attach_common(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], if (err) goto out; + /* if !link, still add the new entry. */ if (!link) - goto out; + goto skip_link; err = mlx4_READ_ENTRY(dev, prev, mailbox); if (err) @@ -990,6 +1087,7 @@ int mlx4_qp_attach_common(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], if (err) goto out; +skip_link: if (prot == MLX4_PROT_ETH) { /* manage the steering entry for promisc mode */ if (new_entry) @@ -1006,7 +1104,7 @@ out: index, dev->caps.num_mgms); else mlx4_bitmap_free(&priv->mcg_table.bitmap, - index - dev->caps.num_mgms); + index - dev->caps.num_mgms, MLX4_USE_RR); } mutex_unlock(&priv->mcg_table.mutex); @@ -1045,10 +1143,14 @@ int mlx4_qp_detach_common(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], goto out; } - /* if this pq is also a promisc qp, it shouldn't be removed */ + /* + if this QP is also a promisc QP, it shouldn't be removed only if + at least one none promisc QP is also attached to this MCG + */ if (prot == MLX4_PROT_ETH && - check_duplicate_entry(dev, port, steer, index, qp->qpn)) - goto out; + check_duplicate_entry(dev, port, steer, index, qp->qpn) && + !promisc_steering_entry(dev, port, steer, index, qp->qpn, NULL)) + goto out; members_count = be32_to_cpu(mgm->members_count) & 0xffffff; for (i = 0; i < members_count; ++i) @@ -1099,7 +1201,7 @@ int mlx4_qp_detach_common(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], index, amgm_index, dev->caps.num_mgms); else mlx4_bitmap_free(&priv->mcg_table.bitmap, - amgm_index - dev->caps.num_mgms); + amgm_index - dev->caps.num_mgms, MLX4_USE_RR); } } else { /* Remove entry from AMGM */ @@ -1119,7 +1221,7 @@ int mlx4_qp_detach_common(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], prev, index, dev->caps.num_mgms); else mlx4_bitmap_free(&priv->mcg_table.bitmap, - index - dev->caps.num_mgms); + index - dev->caps.num_mgms, MLX4_USE_RR); } out: @@ -1148,7 +1250,7 @@ static int mlx4_QP_ATTACH(struct mlx4_dev *dev, struct mlx4_qp *qp, qpn = qp->qpn; qpn |= (prot << 28); if (attach && block_loopback) - qpn |= (1U << 31); + qpn |= (1 << 31); err = mlx4_cmd(dev, mailbox->dma, qpn, attach, MLX4_CMD_QP_ATTACH, MLX4_CMD_TIME_CLASS_A, @@ -1158,28 +1260,11 @@ static int mlx4_QP_ATTACH(struct mlx4_dev *dev, struct mlx4_qp *qp, return err; } -int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], - u8 port, int block_mcast_loopback, - enum mlx4_protocol prot, u64 *reg_id) +int mlx4_trans_to_dmfs_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, + u8 gid[16], u8 port, + int block_mcast_loopback, + enum mlx4_protocol prot, u64 *reg_id) { - - switch (dev->caps.steering_mode) { - case MLX4_STEERING_MODE_A0: - if (prot == MLX4_PROT_ETH) - return 0; - - case MLX4_STEERING_MODE_B0: - if (prot == MLX4_PROT_ETH) - gid[7] |= (MLX4_MC_STEER << 1); - - if (mlx4_is_mfunc(dev)) - return mlx4_QP_ATTACH(dev, qp, gid, 1, - block_mcast_loopback, prot); - return mlx4_qp_attach_common(dev, qp, gid, - block_mcast_loopback, prot, - MLX4_MC_STEER); - - case MLX4_STEERING_MODE_DEVICE_MANAGED: { struct mlx4_spec_list spec = { {NULL} }; __be64 mac_mask = cpu_to_be64(MLX4_MAC_MASK << 16); @@ -1213,8 +1298,35 @@ int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], list_add_tail(&spec.list, &rule.list); return mlx4_flow_attach(dev, &rule, reg_id); - } +} +int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], + u8 port, int block_mcast_loopback, + enum mlx4_protocol prot, u64 *reg_id) +{ + enum mlx4_steer_type steer; + steer = (is_valid_ether_addr(&gid[10])) ? MLX4_UC_STEER : MLX4_MC_STEER; + + switch (dev->caps.steering_mode) { + case MLX4_STEERING_MODE_A0: + if (prot == MLX4_PROT_ETH) + return 0; + + case MLX4_STEERING_MODE_B0: + if (prot == MLX4_PROT_ETH) + gid[7] |= (steer << 1); + + if (mlx4_is_mfunc(dev)) + return mlx4_QP_ATTACH(dev, qp, gid, 1, + block_mcast_loopback, prot); + return mlx4_qp_attach_common(dev, qp, gid, + block_mcast_loopback, prot, + MLX4_MC_STEER); + + case MLX4_STEERING_MODE_DEVICE_MANAGED: + return mlx4_trans_to_dmfs_attach(dev, qp, gid, port, + block_mcast_loopback, + prot, reg_id); default: return -EINVAL; } @@ -1224,6 +1336,9 @@ EXPORT_SYMBOL_GPL(mlx4_multicast_attach); int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], enum mlx4_protocol prot, u64 reg_id) { + enum mlx4_steer_type steer; + steer = (is_valid_ether_addr(&gid[10])) ? MLX4_UC_STEER : MLX4_MC_STEER; + switch (dev->caps.steering_mode) { case MLX4_STEERING_MODE_A0: if (prot == MLX4_PROT_ETH) @@ -1231,7 +1346,7 @@ int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], case MLX4_STEERING_MODE_B0: if (prot == MLX4_PROT_ETH) - gid[7] |= (MLX4_MC_STEER << 1); + gid[7] |= (steer << 1); if (mlx4_is_mfunc(dev)) return mlx4_QP_ATTACH(dev, qp, gid, 0, 0, prot); @@ -1345,8 +1460,8 @@ int mlx4_PROMISC_wrapper(struct mlx4_dev *dev, int slave, u8 port = vhcr->in_param >> 62; enum mlx4_steer_type steer = vhcr->in_modifier; - /* Promiscuous unicast is not allowed in mfunc */ - if (mlx4_is_mfunc(dev) && steer == MLX4_UC_STEER) + /* Promiscuous unicast is not allowed in mfunc for VFs */ + if ((slave != dev->caps.function) && (steer == MLX4_UC_STEER)) return 0; if (vhcr->op_modifier) diff --git a/sys/ofed/drivers/net/mlx4/mlx4.h b/sys/ofed/drivers/net/mlx4/mlx4.h index b342d9a..624a61c 100644 --- a/sys/ofed/drivers/net/mlx4/mlx4.h +++ b/sys/ofed/drivers/net/mlx4/mlx4.h @@ -2,7 +2,7 @@ * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2005, 2006, 2007 Cisco Systems. All rights reserved. - * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved. + * Copyright (c) 2005, 2006, 2007, 2008, 2014 Mellanox Technologies. All rights reserved. * Copyright (c) 2004 Voltaire, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -43,7 +43,7 @@ #include <linux/timer.h> #include <linux/semaphore.h> #include <linux/workqueue.h> - +#include <linux/device.h> #include <linux/mlx4/device.h> #include <linux/mlx4/driver.h> #include <linux/mlx4/doorbell.h> @@ -51,8 +51,12 @@ #define DRV_NAME "mlx4_core" #define PFX DRV_NAME ": " -#define DRV_VERSION "1.1" -#define DRV_RELDATE "Dec, 2011" +#define DRV_VERSION "2.1" +#define DRV_RELDATE __DATE__ + +#define DRV_STACK_NAME "Linux-MLNX_OFED" +#define DRV_STACK_VERSION "2.1" +#define DRV_NAME_FOR_FW DRV_STACK_NAME","DRV_STACK_VERSION #define MLX4_FS_UDP_UC_EN (1 << 1) #define MLX4_FS_TCP_UC_EN (1 << 2) @@ -108,10 +112,10 @@ enum { MLX4_NUM_CMPTS = MLX4_CMPT_NUM_TYPE << MLX4_CMPT_SHIFT }; -enum mlx4_mr_state { - MLX4_MR_DISABLED = 0, - MLX4_MR_EN_HW, - MLX4_MR_EN_SW +enum mlx4_mpt_state { + MLX4_MPT_DISABLED = 0, + MLX4_MPT_EN_HW, + MLX4_MPT_EN_SW }; #define MLX4_COMM_TIME 10000 @@ -139,9 +143,10 @@ enum mlx4_resource { RES_MTT, RES_MAC, RES_VLAN, - RES_EQ, + RES_NPORT_ID, RES_COUNTER, RES_FS_RULE, + RES_EQ, MLX4_NUM_OF_RESOURCE_TYPE }; @@ -180,13 +185,14 @@ struct mlx4_vhcr { struct mlx4_vhcr_cmd { __be64 in_param; __be32 in_modifier; + u32 reserved1; __be64 out_param; __be16 token; u16 reserved; u8 status; u8 flags; __be16 opcode; -}; +} __packed; struct mlx4_cmd_info { u16 opcode; @@ -194,6 +200,7 @@ struct mlx4_cmd_info { bool has_outbox; bool out_is_imm; bool encode_slave_id; + bool skip_err_print; int (*verify)(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox); int (*wrapper)(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, @@ -202,6 +209,10 @@ struct mlx4_cmd_info { struct mlx4_cmd_info *cmd); }; +enum { + MLX4_DEBUG_MASK_CMD_TIME = 0x100, +}; + #ifdef CONFIG_MLX4_DEBUG extern int mlx4_debug_level; #else /* CONFIG_MLX4_DEBUG */ @@ -260,6 +271,22 @@ struct mlx4_icm_table { struct mlx4_icm **icm; }; +#define MLX4_MPT_FLAG_SW_OWNS (0xfUL << 28) +#define MLX4_MPT_FLAG_FREE (0x3UL << 28) +#define MLX4_MPT_FLAG_MIO (1 << 17) +#define MLX4_MPT_FLAG_BIND_ENABLE (1 << 15) +#define MLX4_MPT_FLAG_PHYSICAL (1 << 9) +#define MLX4_MPT_FLAG_REGION (1 << 8) + +#define MLX4_MPT_PD_FLAG_FAST_REG (1 << 27) +#define MLX4_MPT_PD_FLAG_RAE (1 << 28) +#define MLX4_MPT_PD_FLAG_EN_INV (3 << 24) + +#define MLX4_MPT_QP_FLAG_BOUND_QP (1 << 7) + +#define MLX4_MPT_STATUS_SW 0xF0 +#define MLX4_MPT_STATUS_HW 0x00 + /* * Must be packed because mtt_seg is 64 bits but only aligned to 32 bits. */ @@ -353,7 +380,6 @@ struct mlx4_eq { u16 irq; u16 have_irq; int nent; - int load; struct mlx4_buf_list *page_list; struct mlx4_mtt mtt; }; @@ -376,7 +402,7 @@ struct mlx4_profile { int num_cq; int num_mcg; int num_mpt; - unsigned num_mtt; + unsigned num_mtt_segs; }; struct mlx4_fw { @@ -434,6 +460,7 @@ struct mlx4_slave_state { u8 last_cmd; u8 init_port_mask; bool active; + bool old_vlan_api; u8 function; dma_addr_t vhcr_dma; u16 mtu[MLX4_MAX_PORTS + 1]; @@ -455,12 +482,14 @@ struct mlx4_slave_state { #define MLX4_VGT 4095 #define NO_INDX (-1) + struct mlx4_vport_state { u64 mac; u16 default_vlan; u8 default_qos; u32 tx_rate; bool spoofchk; + u32 link_state; }; struct mlx4_vf_admin_state { @@ -531,6 +560,7 @@ struct mlx4_mfunc_master_ctx { struct mlx4_resource_tracker res_tracker; struct workqueue_struct *comm_wq; struct work_struct comm_work; + struct work_struct arm_comm_work; struct work_struct slave_event_work; struct work_struct slave_flr_event_work; spinlock_t slave_state_lock; @@ -576,6 +606,24 @@ struct mlx4_cmd { u8 comm_toggle; }; +enum { + MLX4_VF_IMMED_VLAN_FLAG_VLAN = 1 << 0, + MLX4_VF_IMMED_VLAN_FLAG_QOS = 1 << 1, +}; +struct mlx4_vf_immed_vlan_work { + struct work_struct work; + struct mlx4_priv *priv; + int flags; + int slave; + int vlan_ix; + int orig_vlan_ix; + u8 port; + u8 qos; + u16 vlan_id; + u16 orig_vlan_id; +}; + + struct mlx4_uar_table { struct mlx4_bitmap bitmap; }; @@ -592,6 +640,7 @@ struct mlx4_mr_table { struct mlx4_cq_table { struct mlx4_bitmap bitmap; spinlock_t lock; + rwlock_t cq_table_lock; struct radix_tree_root tree; struct mlx4_icm_table table; struct mlx4_icm_table cmpt_table; @@ -724,8 +773,6 @@ struct mlx4_sense { u8 do_sense_port[MLX4_MAX_PORTS + 1]; u8 sense_allowed[MLX4_MAX_PORTS + 1]; struct delayed_work sense_poll; - struct workqueue_struct *sense_wq; - u32 resched; }; struct mlx4_msix_ctl { @@ -738,85 +785,6 @@ struct mlx4_steer { struct list_head steer_entries[MLX4_NUM_STEERS]; }; -struct mlx4_net_trans_rule_hw_ctrl { - __be32 ctrl; - u8 rsvd1; - u8 funcid; - u8 vep; - u8 port; - __be32 qpn; - __be32 rsvd2; -}; - -struct mlx4_net_trans_rule_hw_ib { - u8 size; - u8 rsvd1; - __be16 id; - u32 rsvd2; - __be32 r_u_qpn; - __be32 qpn_mask; - u8 dst_gid[16]; - u8 dst_gid_msk[16]; -} __packed; - -struct mlx4_net_trans_rule_hw_eth { - u8 size; - u8 rsvd; - __be16 id; - u8 rsvd1[6]; - u8 dst_mac[6]; - u16 rsvd2; - u8 dst_mac_msk[6]; - u16 rsvd3; - u8 src_mac[6]; - u16 rsvd4; - u8 src_mac_msk[6]; - u8 rsvd5; - u8 ether_type_enable; - __be16 ether_type; - __be16 vlan_id_msk; - __be16 vlan_id; -} __packed; - -struct mlx4_net_trans_rule_hw_tcp_udp { - u8 size; - u8 rsvd; - __be16 id; - __be16 rsvd1[3]; - __be16 dst_port; - __be16 rsvd2; - __be16 dst_port_msk; - __be16 rsvd3; - __be16 src_port; - __be16 rsvd4; - __be16 src_port_msk; -} __packed; - -struct mlx4_net_trans_rule_hw_ipv4 { - u8 size; - u8 rsvd; - __be16 id; - __be32 rsvd1; - __be32 dst_ip; - __be32 dst_ip_msk; - __be32 src_ip; - __be32 src_ip_msk; -} __packed; - -struct _rule_hw { - union { - struct { - u8 size; - u8 rsvd; - __be16 id; - }; - struct mlx4_net_trans_rule_hw_eth eth; - struct mlx4_net_trans_rule_hw_ib ib; - struct mlx4_net_trans_rule_hw_ipv4 ipv4; - struct mlx4_net_trans_rule_hw_tcp_udp tcp_udp; - }; -}; - enum { MLX4_PCI_DEV_IS_VF = 1 << 0, MLX4_PCI_DEV_FORCE_SENSE_PORT = 1 << 1, @@ -826,6 +794,23 @@ struct mlx4_roce_gid_entry { u8 raw[16]; }; +struct counter_index { + struct list_head list; + u32 index; +}; + +struct mlx4_counters { + struct mlx4_bitmap bitmap; + struct list_head global_port_list[MLX4_MAX_PORTS]; + struct list_head vf_list[MLX4_MAX_NUM_VF][MLX4_MAX_PORTS]; + struct mutex mutex; +}; + +enum { + MLX4_NO_RR = 0, + MLX4_USE_RR = 1, +}; + struct mlx4_priv { struct mlx4_dev dev; @@ -851,7 +836,7 @@ struct mlx4_priv { struct mlx4_srq_table srq_table; struct mlx4_qp_table qp_table; struct mlx4_mcg_table mcg_table; - struct mlx4_bitmap counters_bitmap; + struct mlx4_counters counters_table; struct mlx4_catas_err catas_err; @@ -887,10 +872,11 @@ static inline struct mlx4_priv *mlx4_priv(struct mlx4_dev *dev) extern struct workqueue_struct *mlx4_wq; u32 mlx4_bitmap_alloc(struct mlx4_bitmap *bitmap); -void mlx4_bitmap_free(struct mlx4_bitmap *bitmap, u32 obj); +void mlx4_bitmap_free(struct mlx4_bitmap *bitmap, u32 obj, int use_rr); u32 mlx4_bitmap_alloc_range(struct mlx4_bitmap *bitmap, int cnt, int align, u32 skip_mask); -void mlx4_bitmap_free_range(struct mlx4_bitmap *bitmap, u32 obj, int cnt); +void mlx4_bitmap_free_range(struct mlx4_bitmap *bitmap, u32 obj, int cnt, + int use_rr); u32 mlx4_bitmap_avail(struct mlx4_bitmap *bitmap); int mlx4_bitmap_init(struct mlx4_bitmap *bitmap, u32 num, u32 mask, u32 reserved_bot, u32 resetrved_top); @@ -926,10 +912,10 @@ int __mlx4_cq_alloc_icm(struct mlx4_dev *dev, int *cqn); void __mlx4_cq_free_icm(struct mlx4_dev *dev, int cqn); int __mlx4_srq_alloc_icm(struct mlx4_dev *dev, int *srqn); void __mlx4_srq_free_icm(struct mlx4_dev *dev, int srqn); -int __mlx4_mr_reserve(struct mlx4_dev *dev); -void __mlx4_mr_release(struct mlx4_dev *dev, u32 index); -int __mlx4_mr_alloc_icm(struct mlx4_dev *dev, u32 index); -void __mlx4_mr_free_icm(struct mlx4_dev *dev, u32 index); +int __mlx4_mpt_reserve(struct mlx4_dev *dev); +void __mlx4_mpt_release(struct mlx4_dev *dev, u32 index); +int __mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index); +void __mlx4_mpt_free_icm(struct mlx4_dev *dev, u32 index); u32 __mlx4_alloc_mtt_range(struct mlx4_dev *dev, int order); void __mlx4_free_mtt_range(struct mlx4_dev *dev, u32 first_seg, int order); @@ -969,14 +955,20 @@ int mlx4_DMA_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd); int __mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, - int *base, u8 bf_qp); + int *base, u8 flags); void __mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt); int __mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac); void __mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, u64 mac); int __mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt, int start_index, int npages, u64 *page_list); -int __mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx); -void __mlx4_counter_free(struct mlx4_dev *dev, u32 idx); +int __mlx4_counter_alloc(struct mlx4_dev *dev, int slave, int port, u32 *idx); +void __mlx4_counter_free(struct mlx4_dev *dev, int slave, int port, u32 idx); + +int __mlx4_slave_counters_free(struct mlx4_dev *dev, int slave); +int __mlx4_clear_if_stat(struct mlx4_dev *dev, + u8 counter_index); +u8 mlx4_get_default_counter_index(struct mlx4_dev *dev, int slave, int port); + int __mlx4_xrcd_alloc(struct mlx4_dev *dev, u32 *xrcdn); void __mlx4_xrcd_free(struct mlx4_dev *dev, u32 xrcdn); @@ -997,6 +989,7 @@ u64 mlx4_make_profile(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap, struct mlx4_init_hca_param *init_hca); void mlx4_master_comm_channel(struct work_struct *work); +void mlx4_master_arm_comm_channel(struct work_struct *work); void mlx4_gen_slave_eqe(struct work_struct *work); void mlx4_master_handle_slave_flr(struct work_struct *work); @@ -1164,8 +1157,7 @@ void mlx4_do_sense_ports(struct mlx4_dev *dev, enum mlx4_port_type *defaults); void mlx4_start_sense(struct mlx4_dev *dev); void mlx4_stop_sense(struct mlx4_dev *dev); -void mlx4_sense_cleanup(struct mlx4_dev *dev); -int mlx4_sense_init(struct mlx4_dev *dev); +void mlx4_sense_init(struct mlx4_dev *dev); int mlx4_check_port_params(struct mlx4_dev *dev, enum mlx4_port_type *port_type); int mlx4_change_port_types(struct mlx4_dev *dev, @@ -1238,6 +1230,10 @@ int mlx4_qp_detach_common(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], int mlx4_qp_attach_common(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], int block_mcast_loopback, enum mlx4_protocol prot, enum mlx4_steer_type steer); +int mlx4_trans_to_dmfs_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, + u8 gid[16], u8 port, + int block_mcast_loopback, + enum mlx4_protocol prot, u64 *reg_id); int mlx4_SET_MCAST_FLTR_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, @@ -1250,8 +1246,6 @@ int mlx4_SET_VLAN_FLTR_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_cmd_info *cmd); int mlx4_common_set_vlan_fltr(struct mlx4_dev *dev, int function, int port, void *buf); -int mlx4_common_dump_eth_stats(struct mlx4_dev *dev, int slave, u32 in_mod, - struct mlx4_cmd_mailbox *outbox); int mlx4_DUMP_ETH_STATS_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, @@ -1277,6 +1271,11 @@ int mlx4_QP_FLOW_STEERING_DETACH_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd); +int mlx4_MOD_STAT_CFG_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); int mlx4_get_mgm_entry_size(struct mlx4_dev *dev); int mlx4_get_qp_per_mgm(struct mlx4_dev *dev); @@ -1315,5 +1314,6 @@ void mlx4_init_quotas(struct mlx4_dev *dev); int mlx4_get_slave_num_gids(struct mlx4_dev *dev, int slave); int mlx4_get_base_gid_ix(struct mlx4_dev *dev, int slave); +void mlx4_vf_immed_vlan_work_handler(struct work_struct *_work); #endif /* MLX4_H */ diff --git a/sys/ofed/drivers/net/mlx4/mlx4_en.h b/sys/ofed/drivers/net/mlx4/mlx4_en.h index 0dc5fe6..9a87bf7 100644 --- a/sys/ofed/drivers/net/mlx4/mlx4_en.h +++ b/sys/ofed/drivers/net/mlx4/mlx4_en.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007 Mellanox Technologies. All rights reserved. + * Copyright (c) 2007, 2014 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -34,13 +34,17 @@ #ifndef _MLX4_EN_H_ #define _MLX4_EN_H_ -#include <sys/cdefs.h> - -#include <linux/types.h> +#include <linux/bitops.h> #include <linux/compiler.h> #include <linux/list.h> #include <linux/mutex.h> +#include <linux/kobject.h> #include <linux/netdevice.h> +#include <linux/if_vlan.h> +#include <linux/if_ether.h> +#ifdef CONFIG_MLX4_EN_DCB +#include <linux/dcbnl.h> +#endif #include <linux/mlx4/device.h> #include <linux/mlx4/qp.h> @@ -49,56 +53,17 @@ #include <linux/mlx4/doorbell.h> #include <linux/mlx4/cmd.h> -#include <net/if_media.h> #include <netinet/tcp_lro.h> #include "en_port.h" +#include "mlx4_stats.h" #define DRV_NAME "mlx4_en" -#define DRV_VERSION "1.5.2" -#define DRV_RELDATE "July 2010" - -/* XXX */ -#define NETIF_MSG_LINK 0x1 -#define NETIF_MSG_IFDOWN 0x2 -#define NETIF_MSG_HW 0x4 -#define NETIF_MSG_DRV 0x8 -#define NETIF_MSG_INTR 0x10 -#define NETIF_MSG_RX_ERR 0x20 +#define DRV_VERSION "2.1" +#define DRV_RELDATE __DATE__ #define MLX4_EN_MSG_LEVEL (NETIF_MSG_LINK | NETIF_MSG_IFDOWN) -#define en_print(level, priv, format, arg...) \ - { \ - if ((priv)->registered) \ - printk(level "%s: %s: " format, DRV_NAME, \ - (priv->dev)->if_xname, ## arg); \ - else \ - printk(level "%s: %s: Port %d: " format, \ - DRV_NAME, dev_name(&priv->mdev->pdev->dev), \ - (priv)->port, ## arg); \ - } - -#define en_dbg(mlevel, priv, format, arg...) \ - if (NETIF_MSG_##mlevel & priv->msg_enable) \ - en_print(KERN_DEBUG, priv, format, ## arg) -#define en_warn(priv, format, arg...) \ - en_print(KERN_WARNING, priv, format, ## arg) -#define en_err(priv, format, arg...) \ - en_print(KERN_ERR, priv, format, ## arg) -#define en_info(priv, format, arg...) \ - en_print(KERN_INFO, priv, format, ## arg) - -#define mlx4_err(mdev, format, arg...) \ - printk(KERN_ERR "%s %s: " format , DRV_NAME ,\ - dev_name(&mdev->pdev->dev) , ## arg) -#define mlx4_info(mdev, format, arg...) \ - printk(KERN_INFO "%s %s: " format , DRV_NAME ,\ - dev_name(&mdev->pdev->dev) , ## arg) -#define mlx4_warn(mdev, format, arg...) \ - printk(KERN_WARNING "%s %s: " format , DRV_NAME ,\ - dev_name(&mdev->pdev->dev) , ## arg) - /* * Device constants */ @@ -106,8 +71,9 @@ #define MLX4_EN_PAGE_SHIFT 12 #define MLX4_EN_PAGE_SIZE (1 << MLX4_EN_PAGE_SHIFT) -#define MAX_TX_RINGS (MLX4_EN_NUM_HASH_RINGS + 1 + MLX4_EN_NUM_PPP_RINGS) -#define MAX_RX_RINGS 16 +#define DEF_RX_RINGS 16 +#define MAX_RX_RINGS 128 +#define MIN_RX_RINGS 4 #define TXBB_SIZE 64 #define HEADROOM (2048 / TXBB_SIZE + 1) #define STAMP_STRIDE 64 @@ -115,6 +81,19 @@ #define STAMP_SHIFT 31 #define STAMP_VAL 0x7fffffff #define STATS_DELAY (HZ / 4) +#define SERVICE_TASK_DELAY (HZ / 4) +#define MAX_NUM_OF_FS_RULES 256 + +#define MLX4_EN_FILTER_HASH_SHIFT 4 +#define MLX4_EN_FILTER_EXPIRY_QUOTA 60 + +#ifdef CONFIG_NET_RX_BUSY_POLL +#define LL_EXTENDED_STATS +#endif + +/* vlan valid range */ +#define VLAN_MIN_VALUE 1 +#define VLAN_MAX_VALUE 4094 /* Typical TSO descriptor with 16 gather entries is 352 bytes... */ #define MAX_DESC_SIZE 512 @@ -126,8 +105,13 @@ #define MLX4_EN_WATCHDOG_TIMEOUT (15 * HZ) -#define MLX4_EN_MAX_LRO_DESCRIPTORS 32 -#define MLX4_EN_NUM_IPFRAG_SESSIONS 16 +#define MLX4_EN_ALLOC_SIZE PAGE_ALIGN(PAGE_SIZE) +#define MLX4_EN_ALLOC_ORDER get_order(MLX4_EN_ALLOC_SIZE) + +enum mlx4_en_alloc_type { + MLX4_EN_ALLOC_NEW = 0, + MLX4_EN_ALLOC_REPLACEMENT = 1, +}; /* Receive fragment sizes; we use at most 3 fragments (for 9600 byte MTU * and 4K allocations) */ @@ -150,28 +134,37 @@ enum { #endif /* Maximum ring sizes */ +#define MLX4_EN_DEF_TX_QUEUE_SIZE 4096 + +/* Minimum packet number till arming the CQ */ +#define MLX4_EN_MIN_RX_ARM 2048 +#define MLX4_EN_MIN_TX_ARM 2048 + +/* Maximum ring sizes */ #define MLX4_EN_MAX_TX_SIZE 8192 #define MLX4_EN_MAX_RX_SIZE 8192 -#define MLX4_EN_MIN_RX_SIZE (128) +/* Minimum ring sizes */ +#define MLX4_EN_MIN_RX_SIZE (4096 / TXBB_SIZE) #define MLX4_EN_MIN_TX_SIZE (4096 / TXBB_SIZE) #define MLX4_EN_SMALL_PKT_SIZE 64 -#define MLX4_EN_TX_HASH_SIZE 256 -#define MLX4_EN_TX_HASH_MASK (MLX4_EN_TX_HASH_SIZE - 1) -#define MLX4_EN_NUM_HASH_RINGS 4 -#define MLX4_EN_NUM_PPP_RINGS 8 -#define MLX4_EN_DEF_TX_RING_SIZE 512 -#define MLX4_EN_DEF_TX_QUEUE_SIZE 4096 + +#define MLX4_EN_MAX_TX_RING_P_UP 32 +#define MLX4_EN_NUM_UP 1 + +#define MAX_TX_RINGS (MLX4_EN_MAX_TX_RING_P_UP * \ + (MLX4_EN_NUM_UP + 1)) + +#define MLX4_EN_DEF_TX_RING_SIZE 1024 #define MLX4_EN_DEF_RX_RING_SIZE 1024 -#define MLX4_EN_MAX_RX_POLL 1024 /* Target number of bytes to coalesce with interrupt moderation */ #define MLX4_EN_RX_COAL_TARGET 0x20000 #define MLX4_EN_RX_COAL_TIME 0x10 -#define MLX4_EN_TX_COAL_PKTS 5 -#define MLX4_EN_TX_COAL_TIME 0x80 +#define MLX4_EN_TX_COAL_PKTS 64 +#define MLX4_EN_TX_COAL_TIME 64 #define MLX4_EN_RX_RATE_LOW 400000 #define MLX4_EN_RX_COAL_TIME_LOW 0 @@ -187,14 +180,13 @@ enum { #define MLX4_EN_DEF_RX_PAUSE 1 #define MLX4_EN_DEF_TX_PAUSE 1 -/* Interval between sucessive polls in the Tx routine when polling is used +/* Interval between successive polls in the Tx routine when polling is used instead of interrupts (in per-core Tx rings) - should be power of 2 */ #define MLX4_EN_TX_POLL_MODER 16 #define MLX4_EN_TX_POLL_TIMEOUT (HZ / 4) -#define ETH_LLC_SNAP_SIZE 8 - -#define SMALL_PACKET_SIZE (MHLEN) +#define MLX4_EN_64_ALIGN (64 - NET_SKB_PAD) +#define SMALL_PACKET_SIZE (256 - NET_IP_ALIGN) #define HEADER_COPY_SIZE (128) #define MLX4_LOOPBACK_TEST_PAYLOAD (HEADER_COPY_SIZE - ETHER_HDR_LEN) @@ -208,7 +200,6 @@ enum { /* Number of samples to 'average' */ #define AVG_SIZE 128 #define AVG_FACTOR 1024 -#define NUM_PERF_STATS NUM_PERF_COUNTERS #define INC_PERF_COUNTER(cnt) (++(cnt)) #define ADD_PERF_COUNTER(cnt, add) ((cnt) += (add)) @@ -219,7 +210,6 @@ enum { #else -#define NUM_PERF_STATS 0 #define INC_PERF_COUNTER(cnt) do {} while (0) #define ADD_PERF_COUNTER(cnt, add) do {} while (0) #define AVG_PERF_COUNTER(cnt, sample) do {} while (0) @@ -244,13 +234,17 @@ enum cq_type { #define XNOR(x, y) (!(x) == !(y)) #define ILLEGAL_MAC(addr) (addr == 0xffffffffffffULL || addr == 0x0) - struct mlx4_en_tx_info { - struct mbuf *mb; - u32 nr_txbb; - u8 nr_segs; - u8 data_offset; - u8 inl; + struct mbuf *mb; + u32 nr_txbb; + u32 nr_bytes; + u8 linear; + u8 nr_segs; + u8 data_offset; + u8 inl; +#if 0 + u8 ts_requested; +#endif }; @@ -271,8 +265,14 @@ struct mlx4_en_tx_desc { #define MLX4_EN_USE_SRQ 0x01000000 +#define MLX4_EN_TX_BUDGET 64*4 //Compensate for no NAPI in freeBSD - might need some fine tunning in the future. +#define MLX4_EN_RX_BUDGET 64 + +#define MLX4_EN_CX3_LOW_ID 0x1000 +#define MLX4_EN_CX3_HIGH_ID 0x1005 + struct mlx4_en_tx_ring { - spinlock_t tx_lock; + spinlock_t tx_lock; struct mlx4_hwq_resources wqres; u32 size ; /* number of TXBBs */ u32 size_mask; @@ -285,9 +285,11 @@ struct mlx4_en_tx_ring { void *buf; u16 poll_cnt; int blocked; - struct buf_ring *br; struct mlx4_en_tx_info *tx_info; u8 *bounce_buf; + u8 queue_index; + cpuset_t affinity_mask; + struct buf_ring *br; u32 last_nr_txbb; struct mlx4_qp qp; struct mlx4_qp_context context; @@ -296,29 +298,30 @@ struct mlx4_en_tx_ring { struct mlx4_srq dummy; unsigned long bytes; unsigned long packets; - unsigned long errors; - spinlock_t comp_lock; + unsigned long tx_csum; + unsigned long queue_stopped; + unsigned long wake_queue; struct mlx4_bf bf; bool bf_enabled; + struct netdev_queue *tx_queue; + int hwtstamp_tx_type; + spinlock_t comp_lock; + int full_size; + int inline_thold; u64 watchdog_time; }; -struct mlx4_en_ipfrag { - struct mbuf *fragments; - struct mbuf *last; - __be32 saddr; - __be32 daddr; - __be16 id; - u8 protocol; - int total_len; - u16 offset; -}; - struct mlx4_en_rx_desc { /* actual number of entries depends on rx ring stride */ struct mlx4_wqe_data_seg data[0]; }; +struct mlx4_en_rx_buf { + dma_addr_t dma; + struct page *page; + unsigned int page_offset; +}; + struct mlx4_en_rx_ring { struct mlx4_hwq_resources wqres; u32 size ; /* number of Rx descs*/ @@ -330,38 +333,66 @@ struct mlx4_en_rx_ring { u32 prod; u32 cons; u32 buf_size; + u8 fcs_del; + u16 rx_alloc_order; + u32 rx_alloc_size; + u32 rx_buf_size; + u32 rx_mb_size; + int qpn; void *buf; void *rx_info; + unsigned long errors; unsigned long bytes; unsigned long packets; - unsigned long errors; +#ifdef LL_EXTENDED_STATS + unsigned long yields; + unsigned long misses; + unsigned long cleaned; +#endif + unsigned long csum_ok; + unsigned long csum_none; + int hwtstamp_rx_filter; + int numa_node; struct lro_ctrl lro; - struct mlx4_en_ipfrag ipfrag[MLX4_EN_NUM_IPFRAG_SESSIONS]; }; - static inline int mlx4_en_can_lro(__be16 status) { - return (status & cpu_to_be16(MLX4_CQE_STATUS_IPV4 | - MLX4_CQE_STATUS_IPV4F | - MLX4_CQE_STATUS_IPV6 | - MLX4_CQE_STATUS_IPV4OPT | - MLX4_CQE_STATUS_TCP | - MLX4_CQE_STATUS_UDP | - MLX4_CQE_STATUS_IPOK)) == - cpu_to_be16(MLX4_CQE_STATUS_IPV4 | - MLX4_CQE_STATUS_IPOK | - MLX4_CQE_STATUS_TCP); + static __be16 status_all; + static __be16 status_ipv4_ipok_tcp; + static __be16 status_ipv6_ipok_tcp; + + status_all = cpu_to_be16( + MLX4_CQE_STATUS_IPV4 | + MLX4_CQE_STATUS_IPV4F | + MLX4_CQE_STATUS_IPV6 | + MLX4_CQE_STATUS_IPV4OPT | + MLX4_CQE_STATUS_TCP | + MLX4_CQE_STATUS_UDP | + MLX4_CQE_STATUS_IPOK); + status_ipv4_ipok_tcp = cpu_to_be16( + MLX4_CQE_STATUS_IPV4 | + MLX4_CQE_STATUS_IPOK | + MLX4_CQE_STATUS_TCP); + status_ipv6_ipok_tcp = cpu_to_be16( + MLX4_CQE_STATUS_IPV6 | + MLX4_CQE_STATUS_IPOK | + MLX4_CQE_STATUS_TCP); + + status &= status_all; + return (status == status_ipv4_ipok_tcp || + status == status_ipv6_ipok_tcp); } + struct mlx4_en_cq { struct mlx4_cq mcq; struct mlx4_hwq_resources wqres; int ring; spinlock_t lock; struct net_device *dev; - /* Per-core Tx cq processing support */ - struct timer_list timer; + /* Per-core Tx cq processing support */ + struct timer_list timer; int size; int buf_size; unsigned vector; @@ -373,6 +404,20 @@ struct mlx4_en_cq { struct taskqueue *tq; #define MLX4_EN_OPCODE_ERROR 0x1e u32 tot_rx; + u32 tot_tx; + +#ifdef CONFIG_NET_RX_BUSY_POLL + unsigned int state; +#define MLX4_EN_CQ_STATEIDLE 0 +#define MLX4_EN_CQ_STATENAPI 1 /* NAPI owns this CQ */ +#define MLX4_EN_CQ_STATEPOLL 2 /* poll owns this CQ */ +#define MLX4_CQ_LOCKED (MLX4_EN_CQ_STATENAPI | MLX4_EN_CQ_STATEPOLL) +#define MLX4_EN_CQ_STATENAPI_YIELD 4 /* NAPI yielded this CQ */ +#define MLX4_EN_CQ_STATEPOLL_YIELD 8 /* poll yielded this CQ */ +#define CQ_YIELD (MLX4_EN_CQ_STATENAPI_YIELD | MLX4_EN_CQ_STATEPOLL_YIELD) +#define CQ_USER_PEND (MLX4_EN_CQ_STATEPOLL | MLX4_EN_CQ_STATEPOLL_YIELD) + spinlock_t poll_lock; /* protects from LLS/napi conflicts */ +#endif /* CONFIG_NET_RX_BUSY_POLL */ }; struct mlx4_en_port_profile { @@ -382,41 +427,42 @@ struct mlx4_en_port_profile { u32 tx_ring_size; u32 rx_ring_size; u8 rx_pause; + u8 rx_ppp; u8 tx_pause; - u32 rx_ppp; - u32 tx_ppp; + u8 tx_ppp; + int rss_rings; }; struct mlx4_en_profile { int rss_xor; - int num_lro; - int ip_reasm; - int tcp_rss; int udp_rss; u8 rss_mask; u32 active_ports; u32 small_pkt_int; u8 no_reset; + u8 num_tx_rings_p_up; struct mlx4_en_port_profile prof[MLX4_MAX_PORTS + 1]; }; struct mlx4_en_dev { - struct mlx4_dev *dev; + struct mlx4_dev *dev; struct pci_dev *pdev; struct mutex state_lock; - struct net_device *pndev[MLX4_MAX_PORTS + 1]; - u32 port_cnt; + struct net_device *pndev[MLX4_MAX_PORTS + 1]; + u32 port_cnt; bool device_up; - struct mlx4_en_profile profile; + struct mlx4_en_profile profile; u32 LSO_support; struct workqueue_struct *workqueue; - struct device *dma_device; - void __iomem *uar_map; - struct mlx4_uar priv_uar; + struct device *dma_device; + void __iomem *uar_map; + struct mlx4_uar priv_uar; struct mlx4_mr mr; - u32 priv_pdn; - spinlock_t uar_lock; + u32 priv_pdn; + spinlock_t uar_lock; u8 mac_removed[MLX4_MAX_PORTS + 1]; + unsigned long last_overflow_check; + unsigned long overflow_period; }; @@ -428,71 +474,76 @@ struct mlx4_en_rss_map { enum mlx4_qp_state indir_state; }; -struct mlx4_en_rss_context { - __be32 base_qpn; - __be32 default_qpn; - u16 reserved; - u8 hash_fn; - u8 flags; - __be32 rss_key[10]; - __be32 base_qpn_udp; -}; - struct mlx4_en_port_state { int link_state; int link_speed; int transciver; + int autoneg; }; -struct mlx4_en_pkt_stats { - unsigned long broadcast; - unsigned long rx_prio[8]; - unsigned long tx_prio[8]; -#define NUM_PKT_STATS 17 +enum mlx4_en_mclist_act { + MCLIST_NONE, + MCLIST_REM, + MCLIST_ADD, }; -struct mlx4_en_port_stats { - unsigned long tso_packets; - unsigned long queue_stopped; - unsigned long wake_queue; - unsigned long tx_timeout; - unsigned long rx_alloc_failed; - unsigned long rx_chksum_good; - unsigned long rx_chksum_none; - unsigned long tx_chksum_offload; +struct mlx4_en_mc_list { + struct list_head list; + enum mlx4_en_mclist_act action; + u8 addr[ETH_ALEN]; + u64 reg_id; }; -struct mlx4_en_perf_stats { - u32 tx_poll; - u64 tx_pktsz_avg; - u32 inflight_avg; - u32 tx_coal_avg; - u32 rx_coal_avg; +#ifdef CONFIG_MLX4_EN_DCB +/* Minimal TC BW - setting to 0 will block traffic */ +#define MLX4_EN_BW_MIN 1 +#define MLX4_EN_BW_MAX 100 /* Utilize 100% of the line */ + +#define MLX4_EN_TC_ETS 7 + +#endif + + +enum { + MLX4_EN_FLAG_PROMISC = (1 << 0), + MLX4_EN_FLAG_MC_PROMISC = (1 << 1), + /* whether we need to enable hardware loopback by putting dmac + * in Tx WQE + */ + MLX4_EN_FLAG_ENABLE_HW_LOOPBACK = (1 << 2), + /* whether we need to drop packets that hardware loopback-ed */ + MLX4_EN_FLAG_RX_FILTER_NEEDED = (1 << 3), + MLX4_EN_FLAG_FORCE_PROMISC = (1 << 4), +#ifdef CONFIG_MLX4_EN_DCB + MLX4_EN_FLAG_DCB_ENABLED = (1 << 5) +#endif }; -struct mlx4_en_frag_info { - u16 frag_size; - u16 frag_prefix_size; +#define MLX4_EN_MAC_HASH_SIZE (1 << BITS_PER_BYTE) +#define MLX4_EN_MAC_HASH_IDX 5 + +struct en_port { + struct kobject kobj; + struct mlx4_dev *dev; + u8 port_num; + u8 vport_num; }; -struct mlx4_en_tx_hash_entry { - u8 cnt; - unsigned int small_pkts; - unsigned int big_pkts; - unsigned int ring; +struct mlx4_en_frag_info { + u16 frag_size; + u16 frag_prefix_size; }; + struct mlx4_en_priv { struct mlx4_en_dev *mdev; struct mlx4_en_port_profile *prof; struct net_device *dev; - bool vlgrp_modified; - u32 vlan_register[VLAN_FLTR_SIZE]; - u32 vlan_unregister[VLAN_FLTR_SIZE]; - u32 vlans[VLAN_FLTR_SIZE]; - spinlock_t vlan_lock; + unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; struct mlx4_en_port_state port_state; spinlock_t stats_lock; + /* To allow rules removal while port is going down */ + struct list_head ethtool_list; unsigned long last_moder_packets[MAX_RX_RINGS]; unsigned long last_moder_tx_packets; @@ -520,139 +571,380 @@ struct mlx4_en_priv { int port; int registered; int allocated; - int rx_csum; - u64 mac; + int stride; + unsigned char current_mac[ETH_ALEN + 2]; + u64 mac; int mac_index; unsigned max_mtu; int base_qpn; + int cqe_factor; struct mlx4_en_rss_map rss_map; - u16 tx_prio_map[8]; + __be32 ctrl_flags; u32 flags; -#define MLX4_EN_FLAG_PROMISC 0x1 + u8 num_tx_rings_p_up; u32 tx_ring_num; u32 rx_ring_num; u32 rx_mb_size; - struct mlx4_en_frag_info frag_info[MLX4_EN_MAX_RX_FRAGS]; - u16 num_frags; + struct mlx4_en_frag_info frag_info[MLX4_EN_MAX_RX_FRAGS]; + u16 rx_alloc_order; + u32 rx_alloc_size; + u32 rx_buf_size; + u16 num_frags; u16 log_rx_info; - int ip_reasm; - bool wol; - struct mlx4_en_tx_ring tx_ring[MAX_TX_RINGS]; - struct mlx4_en_rx_ring rx_ring[MAX_RX_RINGS]; - struct mlx4_en_cq tx_cq[MAX_TX_RINGS]; - struct mlx4_en_cq rx_cq[MAX_RX_RINGS]; - struct mlx4_en_tx_hash_entry tx_hash[MLX4_EN_TX_HASH_SIZE]; - struct work_struct mcast_task; - struct work_struct start_port_task; - struct work_struct stop_port_task; + struct mlx4_en_tx_ring **tx_ring; + struct mlx4_en_rx_ring *rx_ring[MAX_RX_RINGS]; + struct mlx4_en_cq **tx_cq; + struct mlx4_en_cq *rx_cq[MAX_RX_RINGS]; + struct mlx4_qp drop_qp; + struct work_struct rx_mode_task; struct work_struct watchdog_task; struct work_struct linkstate_task; struct delayed_work stats_task; + struct delayed_work service_task; struct mlx4_en_perf_stats pstats; struct mlx4_en_pkt_stats pkstats; + struct mlx4_en_flow_stats flowstats[MLX4_NUM_PRIORITIES]; struct mlx4_en_port_stats port_stats; + struct mlx4_en_vport_stats vport_stats; + struct mlx4_en_vf_stats vf_stats; + DECLARE_BITMAP(stats_bitmap, NUM_ALL_STATS); + struct list_head mc_list; + struct list_head curr_list; + u64 broadcast_id; struct mlx4_en_stat_out_mbox hw_stats; - struct ifmedia media; + int vids[128]; + bool wol; + struct device *ddev; + struct dentry *dev_root; + u32 counter_index; eventhandler_tag vlan_attach; eventhandler_tag vlan_detach; struct callout watchdog_timer; + struct ifmedia media; volatile int blocked; struct sysctl_oid *sysctl; struct sysctl_ctx_list conf_ctx; struct sysctl_ctx_list stat_ctx; +#define MLX4_EN_MAC_HASH_IDX 5 + struct hlist_head mac_hash[MLX4_EN_MAC_HASH_SIZE]; + +#ifdef CONFIG_MLX4_EN_DCB + struct ieee_ets ets; + u16 maxrate[IEEE_8021QAZ_MAX_TCS]; + u8 dcbx_cap; +#endif +#ifdef CONFIG_RFS_ACCEL + spinlock_t filters_lock; + int last_filter_id; + struct list_head filters; + struct hlist_head filter_hash[1 << MLX4_EN_FILTER_HASH_SHIFT]; +#endif + struct en_port *vf_ports[MLX4_MAX_NUM_VF]; + unsigned long last_ifq_jiffies; + u64 if_counters_rx_errors; + u64 if_counters_rx_no_buffer; + }; enum mlx4_en_wol { MLX4_EN_WOL_MAGIC = (1ULL << 61), MLX4_EN_WOL_ENABLED = (1ULL << 62), - MLX4_EN_WOL_DO_MODIFY = (1ULL << 63), }; -int mlx4_en_transmit(struct net_device *dev, struct mbuf *mb); -void mlx4_en_qflush(struct net_device *dev); +struct mlx4_mac_entry { + struct hlist_node hlist; + unsigned char mac[ETH_ALEN + 2]; + u64 reg_id; +}; + +#ifdef CONFIG_NET_RX_BUSY_POLL +static inline void mlx4_en_cq_init_lock(struct mlx4_en_cq *cq) +{ + spin_lock_init(&cq->poll_lock); + cq->state = MLX4_EN_CQ_STATEIDLE; +} + +/* called from the device poll rutine to get ownership of a cq */ +static inline bool mlx4_en_cq_lock_napi(struct mlx4_en_cq *cq) +{ + int rc = true; + spin_lock(&cq->poll_lock); + if (cq->state & MLX4_CQ_LOCKED) { + WARN_ON(cq->state & MLX4_EN_CQ_STATENAPI); + cq->state |= MLX4_EN_CQ_STATENAPI_YIELD; + rc = false; + } else + /* we don't care if someone yielded */ + cq->state = MLX4_EN_CQ_STATENAPI; + spin_unlock(&cq->poll_lock); + return rc; +} + +/* returns true is someone tried to get the cq while napi had it */ +static inline bool mlx4_en_cq_unlock_napi(struct mlx4_en_cq *cq) +{ + int rc = false; + spin_lock(&cq->poll_lock); + WARN_ON(cq->state & (MLX4_EN_CQ_STATEPOLL | + MLX4_EN_CQ_STATENAPI_YIELD)); + + if (cq->state & MLX4_EN_CQ_STATEPOLL_YIELD) + rc = true; + cq->state = MLX4_EN_CQ_STATEIDLE; + spin_unlock(&cq->poll_lock); + return rc; +} + +/* called from mlx4_en_low_latency_poll() */ +static inline bool mlx4_en_cq_lock_poll(struct mlx4_en_cq *cq) +{ + int rc = true; + spin_lock_bh(&cq->poll_lock); + if ((cq->state & MLX4_CQ_LOCKED)) { + struct net_device *dev = cq->dev; + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_rx_ring *rx_ring = priv->rx_ring[cq->ring]; + + cq->state |= MLX4_EN_CQ_STATEPOLL_YIELD; + rc = false; +#ifdef LL_EXTENDED_STATS + rx_ring->yields++; +#endif + } else + /* preserve yield marks */ + cq->state |= MLX4_EN_CQ_STATEPOLL; + spin_unlock_bh(&cq->poll_lock); + return rc; +} + +/* returns true if someone tried to get the cq while it was locked */ +static inline bool mlx4_en_cq_unlock_poll(struct mlx4_en_cq *cq) +{ + int rc = false; + spin_lock_bh(&cq->poll_lock); + WARN_ON(cq->state & (MLX4_EN_CQ_STATENAPI)); + + if (cq->state & MLX4_EN_CQ_STATEPOLL_YIELD) + rc = true; + cq->state = MLX4_EN_CQ_STATEIDLE; + spin_unlock_bh(&cq->poll_lock); + return rc; +} + +/* true if a socket is polling, even if it did not get the lock */ +static inline bool mlx4_en_cq_ll_polling(struct mlx4_en_cq *cq) +{ + WARN_ON(!(cq->state & MLX4_CQ_LOCKED)); + return cq->state & CQ_USER_PEND; +} +#else +static inline void mlx4_en_cq_init_lock(struct mlx4_en_cq *cq) +{ +} + +static inline bool mlx4_en_cq_lock_napi(struct mlx4_en_cq *cq) +{ + return true; +} + +static inline bool mlx4_en_cq_unlock_napi(struct mlx4_en_cq *cq) +{ + return false; +} + +static inline bool mlx4_en_cq_lock_poll(struct mlx4_en_cq *cq) +{ + return false; +} + +static inline bool mlx4_en_cq_unlock_poll(struct mlx4_en_cq *cq) +{ + return false; +} + +static inline bool mlx4_en_cq_ll_polling(struct mlx4_en_cq *cq) +{ + return false; +} +#endif /* CONFIG_NET_RX_BUSY_POLL */ + +#define MLX4_EN_WOL_DO_MODIFY (1ULL << 63) -int mlx4_en_rx_frags(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring *ring, - struct mbuf *mb, struct mlx4_cqe *cqe); -void mlx4_en_flush_frags(struct mlx4_en_priv *priv, - struct mlx4_en_rx_ring *ring); void mlx4_en_destroy_netdev(struct net_device *dev); int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, struct mlx4_en_port_profile *prof); -int mlx4_en_do_start_port(struct net_device *dev); -void mlx4_en_do_stop_port(struct net_device *dev); +int mlx4_en_start_port(struct net_device *dev); +void mlx4_en_stop_port(struct net_device *dev); void mlx4_en_free_resources(struct mlx4_en_priv *priv); int mlx4_en_alloc_resources(struct mlx4_en_priv *priv); -int mlx4_en_create_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq, - int entries, int ring, enum cq_type mode); -void mlx4_en_destroy_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq); -int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq); +int mlx4_en_pre_config(struct mlx4_en_priv *priv); +int mlx4_en_create_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq **pcq, + int entries, int ring, enum cq_type mode, int node); +void mlx4_en_destroy_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq **pcq); +int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq, + int cq_idx); void mlx4_en_deactivate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq); int mlx4_en_set_cq_moder(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq); int mlx4_en_arm_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq); -void mlx4_en_poll_tx_cq(unsigned long data); void mlx4_en_tx_irq(struct mlx4_cq *mcq); u16 mlx4_en_select_queue(struct net_device *dev, struct mbuf *mb); -int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring *ring, - u32 size, u16 stride); -void mlx4_en_destroy_tx_ring(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring *ring); +int mlx4_en_transmit(struct ifnet *dev, struct mbuf *m); +int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, + struct mlx4_en_tx_ring **pring, + u32 size, u16 stride, int node, int queue_idx); +void mlx4_en_destroy_tx_ring(struct mlx4_en_priv *priv, + struct mlx4_en_tx_ring **pring); int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring *ring, - int cq); + int cq, int user_prio); void mlx4_en_deactivate_tx_ring(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring *ring); +void mlx4_en_qflush(struct ifnet *dev); int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv, - struct mlx4_en_rx_ring *ring, u32 size); + struct mlx4_en_rx_ring **pring, + u32 size, int node); void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv, - struct mlx4_en_rx_ring *ring); + struct mlx4_en_rx_ring **pring, + u32 size, u16 stride); +void mlx4_en_tx_que(void *context, int pending); +void mlx4_en_rx_que(void *context, int pending); int mlx4_en_activate_rx_rings(struct mlx4_en_priv *priv); void mlx4_en_deactivate_rx_ring(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring *ring); int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int budget); -int mlx4_en_process_rx_cq_mb(struct net_device *dev, - struct mlx4_en_cq *cq, - int budget); -void mlx4_en_tx_que(void *context, int pending); -void mlx4_en_rx_que(void *context, int pending); +void mlx4_en_poll_tx_cq(unsigned long data); void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride, - int is_tx, int rss, int qpn, int cqn, - struct mlx4_qp_context *context); + int is_tx, int rss, int qpn, int cqn, int user_prio, + struct mlx4_qp_context *context); void mlx4_en_sqp_event(struct mlx4_qp *qp, enum mlx4_event event); int mlx4_en_map_buffer(struct mlx4_buf *buf); void mlx4_en_unmap_buffer(struct mlx4_buf *buf); - void mlx4_en_calc_rx_buf(struct net_device *dev); -void mlx4_en_set_prio_map(struct mlx4_en_priv *priv, u16 *prio_map, u32 ring_num); + int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv); void mlx4_en_release_rss_steer(struct mlx4_en_priv *priv); +int mlx4_en_create_drop_qp(struct mlx4_en_priv *priv); +void mlx4_en_destroy_drop_qp(struct mlx4_en_priv *priv); int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring); void mlx4_en_rx_irq(struct mlx4_cq *mcq); -//int mlx4_SET_MCAST_FLTR(struct mlx4_dev *dev, u8 port, u64 mac, u64 clear, u8 mode); -int mlx4_SET_VLAN_FLTR(struct mlx4_dev *dev, u8 port, u32 *vlans); -//int mlx4_SET_PORT_general(struct mlx4_dev *dev, u8 port, int mtu, -// u8 pptx, u8 pfctx, u8 pprx, u8 pfcrx); -//int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn, -// u8 promisc); +int mlx4_SET_MCAST_FLTR(struct mlx4_dev *dev, u8 port, u64 mac, u64 clear, u8 mode); +int mlx4_SET_VLAN_FLTR(struct mlx4_dev *dev, struct mlx4_en_priv *priv); int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset); int mlx4_en_QUERY_PORT(struct mlx4_en_dev *mdev, u8 port); +int mlx4_en_get_vport_stats(struct mlx4_en_dev *mdev, u8 port); +void mlx4_en_create_debug_files(struct mlx4_en_priv *priv); +void mlx4_en_delete_debug_files(struct mlx4_en_priv *priv); +int mlx4_en_register_debugfs(void); +void mlx4_en_unregister_debugfs(void); + +#ifdef CONFIG_MLX4_EN_DCB +extern const struct dcbnl_rtnl_ops mlx4_en_dcbnl_ops; +extern const struct dcbnl_rtnl_ops mlx4_en_dcbnl_pfc_ops; +#endif + +int mlx4_en_setup_tc(struct net_device *dev, u8 up); + +#ifdef CONFIG_RFS_ACCEL +void mlx4_en_cleanup_filters(struct mlx4_en_priv *priv, + struct mlx4_en_rx_ring *rx_ring); +#endif #define MLX4_EN_NUM_SELF_TEST 5 void mlx4_en_ex_selftest(struct net_device *dev, u32 *flags, u64 *buf); -u64 mlx4_en_mac_to_u64(u8 *addr); +void mlx4_en_ptp_overflow_check(struct mlx4_en_dev *mdev); + +/* + * Functions for time stamping + */ +#define SKBTX_HW_TSTAMP (1 << 0) +#define SKBTX_IN_PROGRESS (1 << 2) + +u64 mlx4_en_get_cqe_ts(struct mlx4_cqe *cqe); + +/* Functions for caching and restoring statistics */ +int mlx4_en_get_sset_count(struct net_device *dev, int sset); +void mlx4_en_restore_ethtool_stats(struct mlx4_en_priv *priv, + u64 *data); /* * Globals */ extern const struct ethtool_ops mlx4_en_ethtool_ops; + +/* + * Defines for link speed - needed by selftest + */ +#define MLX4_EN_LINK_SPEED_1G 1000 +#define MLX4_EN_LINK_SPEED_10G 10000 +#define MLX4_EN_LINK_SPEED_40G 40000 + +enum { + NETIF_MSG_DRV = 0x0001, + NETIF_MSG_PROBE = 0x0002, + NETIF_MSG_LINK = 0x0004, + NETIF_MSG_TIMER = 0x0008, + NETIF_MSG_IFDOWN = 0x0010, + NETIF_MSG_IFUP = 0x0020, + NETIF_MSG_RX_ERR = 0x0040, + NETIF_MSG_TX_ERR = 0x0080, + NETIF_MSG_TX_QUEUED = 0x0100, + NETIF_MSG_INTR = 0x0200, + NETIF_MSG_TX_DONE = 0x0400, + NETIF_MSG_RX_STATUS = 0x0800, + NETIF_MSG_PKTDATA = 0x1000, + NETIF_MSG_HW = 0x2000, + NETIF_MSG_WOL = 0x4000, +}; + + +/* + * printk / logging functions + */ + +#define en_print(level, priv, format, arg...) \ + { \ + if ((priv)->registered) \ + printk(level "%s: %s: " format, DRV_NAME, \ + (priv->dev)->if_xname, ## arg); \ + else \ + printk(level "%s: %s: Port %d: " format, \ + DRV_NAME, dev_name(&priv->mdev->pdev->dev), \ + (priv)->port, ## arg); \ + } + + +#define en_dbg(mlevel, priv, format, arg...) \ +do { \ + if (NETIF_MSG_##mlevel & priv->msg_enable) \ + en_print(KERN_DEBUG, priv, format, ##arg); \ +} while (0) +#define en_warn(priv, format, arg...) \ + en_print(KERN_WARNING, priv, format, ##arg) +#define en_err(priv, format, arg...) \ + en_print(KERN_ERR, priv, format, ##arg) +#define en_info(priv, format, arg...) \ + en_print(KERN_INFO, priv, format, ## arg) + +#define mlx4_err(mdev, format, arg...) \ + pr_err("%s %s: " format, DRV_NAME, \ + dev_name(&mdev->pdev->dev), ##arg) +#define mlx4_info(mdev, format, arg...) \ + pr_info("%s %s: " format, DRV_NAME, \ + dev_name(&mdev->pdev->dev), ##arg) +#define mlx4_warn(mdev, format, arg...) \ + pr_warning("%s %s: " format, DRV_NAME, \ + dev_name(&mdev->pdev->dev), ##arg) + #endif diff --git a/sys/ofed/drivers/net/mlx4/mlx4_stats.h b/sys/ofed/drivers/net/mlx4/mlx4_stats.h new file mode 100644 index 0000000..0270cef --- /dev/null +++ b/sys/ofed/drivers/net/mlx4/mlx4_stats.h @@ -0,0 +1,184 @@ +/* + * Copyright (c) 2014 Mellanox Technologies Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _MLX4_STATS_ +#define _MLX4_STATS_ + + +#ifdef MLX4_EN_PERF_STAT +#define NUM_PERF_STATS NUM_PERF_COUNTERS +#else +#define NUM_PERF_STATS 0 +#endif + +#define NUM_PRIORITIES 9 +#define NUM_PRIORITY_STATS 2 + +struct mlx4_en_pkt_stats { + unsigned long rx_packets; + unsigned long rx_bytes; + unsigned long rx_multicast_packets; + unsigned long rx_broadcast_packets; + unsigned long rx_errors; + unsigned long rx_dropped; + unsigned long rx_length_errors; + unsigned long rx_over_errors; + unsigned long rx_crc_errors; + unsigned long rx_jabbers; + unsigned long rx_in_range_length_error; + unsigned long rx_out_range_length_error; + unsigned long rx_lt_64_bytes_packets; + unsigned long rx_127_bytes_packets; + unsigned long rx_255_bytes_packets; + unsigned long rx_511_bytes_packets; + unsigned long rx_1023_bytes_packets; + unsigned long rx_1518_bytes_packets; + unsigned long rx_1522_bytes_packets; + unsigned long rx_1548_bytes_packets; + unsigned long rx_gt_1548_bytes_packets; + unsigned long tx_packets; + unsigned long tx_bytes; + unsigned long tx_multicast_packets; + unsigned long tx_broadcast_packets; + unsigned long tx_errors; + unsigned long tx_dropped; + unsigned long tx_lt_64_bytes_packets; + unsigned long tx_127_bytes_packets; + unsigned long tx_255_bytes_packets; + unsigned long tx_511_bytes_packets; + unsigned long tx_1023_bytes_packets; + unsigned long tx_1518_bytes_packets; + unsigned long tx_1522_bytes_packets; + unsigned long tx_1548_bytes_packets; + unsigned long tx_gt_1548_bytes_packets; + unsigned long rx_prio[NUM_PRIORITIES][NUM_PRIORITY_STATS]; + unsigned long tx_prio[NUM_PRIORITIES][NUM_PRIORITY_STATS]; +#define NUM_PKT_STATS 72 +}; + +struct mlx4_en_vf_stats { + unsigned long rx_packets; + unsigned long rx_bytes; + unsigned long rx_multicast_packets; + unsigned long rx_broadcast_packets; + unsigned long rx_errors; + unsigned long rx_dropped; + unsigned long tx_packets; + unsigned long tx_bytes; + unsigned long tx_multicast_packets; + unsigned long tx_broadcast_packets; + unsigned long tx_errors; +#define NUM_VF_STATS 11 +}; + +struct mlx4_en_vport_stats { + unsigned long rx_unicast_packets; + unsigned long rx_unicast_bytes; + unsigned long rx_multicast_packets; + unsigned long rx_multicast_bytes; + unsigned long rx_broadcast_packets; + unsigned long rx_broadcast_bytes; + unsigned long rx_dropped; + unsigned long rx_errors; + unsigned long tx_unicast_packets; + unsigned long tx_unicast_bytes; + unsigned long tx_multicast_packets; + unsigned long tx_multicast_bytes; + unsigned long tx_broadcast_packets; + unsigned long tx_broadcast_bytes; + unsigned long tx_errors; +#define NUM_VPORT_STATS 15 +}; + +struct mlx4_en_port_stats { + unsigned long tso_packets; + unsigned long queue_stopped; + unsigned long wake_queue; + unsigned long tx_timeout; + unsigned long rx_alloc_failed; + unsigned long rx_chksum_good; + unsigned long rx_chksum_none; + unsigned long tx_chksum_offload; +#define NUM_PORT_STATS 8 +}; + +struct mlx4_en_perf_stats { + u32 tx_poll; + u64 tx_pktsz_avg; + u32 inflight_avg; + u16 tx_coal_avg; + u16 rx_coal_avg; + u32 napi_quota; +#define NUM_PERF_COUNTERS 6 +}; + +struct mlx4_en_flow_stats { + u64 rx_pause; + u64 rx_pause_duration; + u64 rx_pause_transition; + u64 tx_pause; + u64 tx_pause_duration; + u64 tx_pause_transition; +}; +#define MLX4_NUM_PRIORITIES 8 +#define NUM_FLOW_PRIORITY_STATS 6 +#define NUM_FLOW_STATS (NUM_FLOW_PRIORITY_STATS*MLX4_NUM_PRIORITIES) + + +struct mlx4_en_stat_out_flow_control_mbox { + /* Total number of PAUSE frames received from the far-end port */ + __be64 rx_pause; + /* Total number of microseconds that far-end port requested to pause + * transmission of packets + */ + __be64 rx_pause_duration; + /* Number of received transmission from XOFF state to XON state */ + __be64 rx_pause_transition; + /* Total number of PAUSE frames sent from the far-end port */ + __be64 tx_pause; + /* Total time in microseconds that transmission of packets has been + * paused + */ + __be64 tx_pause_duration; + /* Number of transmitter transitions from XOFF state to XON state */ + __be64 tx_pause_transition; + /* Reserverd */ + __be64 reserved[2]; +}; + +int mlx4_get_vport_ethtool_stats(struct mlx4_dev *dev, int port, + struct mlx4_en_vport_stats *vport_stats, + int reset); + +#define NUM_ALL_STATS (NUM_PKT_STATS + NUM_FLOW_STATS + NUM_VPORT_STATS + \ + NUM_VF_STATS + NUM_PORT_STATS + NUM_PERF_STATS) +#endif diff --git a/sys/ofed/drivers/net/mlx4/mr.c b/sys/ofed/drivers/net/mlx4/mr.c index 69a0abd..876d16d 100644 --- a/sys/ofed/drivers/net/mlx4/mr.c +++ b/sys/ofed/drivers/net/mlx4/mr.c @@ -1,6 +1,6 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. - * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved. + * Copyright (c) 2005, 2006, 2007, 2008, 2014 Mellanox Technologies. All rights reserved. * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -32,30 +32,20 @@ * SOFTWARE. */ +#include <linux/err.h> #include <linux/errno.h> +#include <linux/module.h> #include <linux/slab.h> #include <linux/kernel.h> #include <linux/vmalloc.h> #include <linux/mlx4/cmd.h> +#include <linux/math64.h> + #include "mlx4.h" #include "icm.h" -#define MLX4_MPT_FLAG_SW_OWNS (0xfUL << 28) -#define MLX4_MPT_FLAG_FREE (0x3UL << 28) -#define MLX4_MPT_FLAG_MIO (1 << 17) -#define MLX4_MPT_FLAG_BIND_ENABLE (1 << 15) -#define MLX4_MPT_FLAG_PHYSICAL (1 << 9) -#define MLX4_MPT_FLAG_REGION (1 << 8) - -#define MLX4_MPT_PD_FLAG_FAST_REG (1 << 27) -#define MLX4_MPT_PD_FLAG_RAE (1 << 28) -#define MLX4_MPT_PD_FLAG_EN_INV (3 << 24) - -#define MLX4_MPT_STATUS_SW 0xF0 -#define MLX4_MPT_STATUS_HW 0x00 - static u32 mlx4_buddy_alloc(struct mlx4_buddy *buddy, int order) { int o; @@ -129,9 +119,8 @@ static int mlx4_buddy_init(struct mlx4_buddy *buddy, int max_order) for (i = 0; i <= buddy->max_order; ++i) { s = BITS_TO_LONGS(1 << (buddy->max_order - i)); buddy->bits[i] = kcalloc(s, sizeof (long), GFP_KERNEL | __GFP_NOWARN); - if (!buddy->bits[i]) { - goto err_out_free; - } + if (!buddy->bits[i]) + goto err_out_free; } set_bit(0, buddy->bits[buddy->max_order]); @@ -141,8 +130,7 @@ static int mlx4_buddy_init(struct mlx4_buddy *buddy, int max_order) err_out_free: for (i = 0; i <= buddy->max_order; ++i) - if ( buddy->bits[i] ) - kfree(buddy->bits[i]); + kfree(buddy->bits[i]); err_out: kfree(buddy->bits); @@ -156,7 +144,7 @@ static void mlx4_buddy_cleanup(struct mlx4_buddy *buddy) int i; for (i = 0; i <= buddy->max_order; ++i) - kfree(buddy->bits[i]); + kfree(buddy->bits[i]); kfree(buddy->bits); kfree(buddy->num_free); @@ -315,7 +303,7 @@ static int mlx4_mr_alloc_reserved(struct mlx4_dev *dev, u32 mridx, u32 pd, mr->size = size; mr->pd = pd; mr->access = access; - mr->enabled = MLX4_MR_DISABLED; + mr->enabled = MLX4_MPT_DISABLED; mr->key = hw_index_to_key(mridx); return mlx4_mtt_init(dev, npages, page_shift, &mr->mtt); @@ -329,14 +317,14 @@ static int mlx4_WRITE_MTT(struct mlx4_dev *dev, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); } -int __mlx4_mr_reserve(struct mlx4_dev *dev) +int __mlx4_mpt_reserve(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); return mlx4_bitmap_alloc(&priv->mr_table.mpt_bitmap); } -static int mlx4_mr_reserve(struct mlx4_dev *dev) +static int mlx4_mpt_reserve(struct mlx4_dev *dev) { u64 out_param; @@ -347,17 +335,17 @@ static int mlx4_mr_reserve(struct mlx4_dev *dev) return -1; return get_param_l(&out_param); } - return __mlx4_mr_reserve(dev); + return __mlx4_mpt_reserve(dev); } -void __mlx4_mr_release(struct mlx4_dev *dev, u32 index) +void __mlx4_mpt_release(struct mlx4_dev *dev, u32 index) { struct mlx4_priv *priv = mlx4_priv(dev); - mlx4_bitmap_free(&priv->mr_table.mpt_bitmap, index); + mlx4_bitmap_free(&priv->mr_table.mpt_bitmap, index, MLX4_NO_RR); } -static void mlx4_mr_release(struct mlx4_dev *dev, u32 index) +static void mlx4_mpt_release(struct mlx4_dev *dev, u32 index) { u64 in_param = 0; @@ -370,17 +358,17 @@ static void mlx4_mr_release(struct mlx4_dev *dev, u32 index) index); return; } - __mlx4_mr_release(dev, index); + __mlx4_mpt_release(dev, index); } -int __mlx4_mr_alloc_icm(struct mlx4_dev *dev, u32 index) +int __mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index) { struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table; return mlx4_table_get(dev, &mr_table->dmpt_table, index); } -static int mlx4_mr_alloc_icm(struct mlx4_dev *dev, u32 index) +static int mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index) { u64 param = 0; @@ -391,17 +379,17 @@ static int mlx4_mr_alloc_icm(struct mlx4_dev *dev, u32 index) MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); } - return __mlx4_mr_alloc_icm(dev, index); + return __mlx4_mpt_alloc_icm(dev, index); } -void __mlx4_mr_free_icm(struct mlx4_dev *dev, u32 index) +void __mlx4_mpt_free_icm(struct mlx4_dev *dev, u32 index) { struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table; mlx4_table_put(dev, &mr_table->dmpt_table, index); } -static void mlx4_mr_free_icm(struct mlx4_dev *dev, u32 index) +static void mlx4_mpt_free_icm(struct mlx4_dev *dev, u32 index) { u64 in_param = 0; @@ -414,7 +402,7 @@ static void mlx4_mr_free_icm(struct mlx4_dev *dev, u32 index) index); return; } - return __mlx4_mr_free_icm(dev, index); + return __mlx4_mpt_free_icm(dev, index); } int mlx4_mr_alloc(struct mlx4_dev *dev, u32 pd, u64 iova, u64 size, u32 access, @@ -423,41 +411,52 @@ int mlx4_mr_alloc(struct mlx4_dev *dev, u32 pd, u64 iova, u64 size, u32 access, u32 index; int err; - index = mlx4_mr_reserve(dev); + index = mlx4_mpt_reserve(dev); if (index == -1) return -ENOMEM; err = mlx4_mr_alloc_reserved(dev, index, pd, iova, size, access, npages, page_shift, mr); if (err) - mlx4_mr_release(dev, index); + mlx4_mpt_release(dev, index); return err; } EXPORT_SYMBOL_GPL(mlx4_mr_alloc); -static void mlx4_mr_free_reserved(struct mlx4_dev *dev, struct mlx4_mr *mr) +static int mlx4_mr_free_reserved(struct mlx4_dev *dev, struct mlx4_mr *mr) { int err; - if (mr->enabled == MLX4_MR_EN_HW) { + if (mr->enabled == MLX4_MPT_EN_HW) { err = mlx4_HW2SW_MPT(dev, NULL, key_to_hw_index(mr->key) & (dev->caps.num_mpts - 1)); - if (err) - mlx4_warn(dev, "xxx HW2SW_MPT failed (%d)\n", err); + if (err) { + mlx4_warn(dev, "HW2SW_MPT failed (%d).", err); + mlx4_warn(dev, "Most likely the MR has MWs bound to it.\n"); + return err; + } - mr->enabled = MLX4_MR_EN_SW; + mr->enabled = MLX4_MPT_EN_SW; } mlx4_mtt_cleanup(dev, &mr->mtt); + + return 0; } -void mlx4_mr_free(struct mlx4_dev *dev, struct mlx4_mr *mr) +int mlx4_mr_free(struct mlx4_dev *dev, struct mlx4_mr *mr) { - mlx4_mr_free_reserved(dev, mr); + int ret; + + ret = mlx4_mr_free_reserved(dev, mr); + if (ret) + return ret; if (mr->enabled) - mlx4_mr_free_icm(dev, key_to_hw_index(mr->key)); - mlx4_mr_release(dev, key_to_hw_index(mr->key)); + mlx4_mpt_free_icm(dev, key_to_hw_index(mr->key)); + mlx4_mpt_release(dev, key_to_hw_index(mr->key)); + + return 0; } EXPORT_SYMBOL_GPL(mlx4_mr_free); @@ -467,7 +466,7 @@ int mlx4_mr_enable(struct mlx4_dev *dev, struct mlx4_mr *mr) struct mlx4_mpt_entry *mpt_entry; int err; - err = mlx4_mr_alloc_icm(dev, key_to_hw_index(mr->key)); + err = mlx4_mpt_alloc_icm(dev, key_to_hw_index(mr->key)); if (err) return err; @@ -514,7 +513,7 @@ int mlx4_mr_enable(struct mlx4_dev *dev, struct mlx4_mr *mr) mlx4_warn(dev, "SW2HW_MPT failed (%d)\n", err); goto err_cmd; } - mr->enabled = MLX4_MR_EN_HW; + mr->enabled = MLX4_MPT_EN_HW; mlx4_free_cmd_mailbox(dev, mailbox); @@ -524,7 +523,7 @@ err_cmd: mlx4_free_cmd_mailbox(dev, mailbox); err_table: - mlx4_mr_free_icm(dev, key_to_hw_index(mr->key)); + mlx4_mpt_free_icm(dev, key_to_hw_index(mr->key)); return err; } EXPORT_SYMBOL_GPL(mlx4_mr_enable); @@ -651,6 +650,95 @@ int mlx4_buf_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt, } EXPORT_SYMBOL_GPL(mlx4_buf_write_mtt); +int mlx4_mw_alloc(struct mlx4_dev *dev, u32 pd, enum mlx4_mw_type type, + struct mlx4_mw *mw) +{ + u32 index; + + index = mlx4_mpt_reserve(dev); + if (index == -1) + return -ENOMEM; + + mw->key = hw_index_to_key(index); + mw->pd = pd; + mw->type = type; + mw->enabled = MLX4_MPT_DISABLED; + + return 0; +} +EXPORT_SYMBOL_GPL(mlx4_mw_alloc); + +int mlx4_mw_enable(struct mlx4_dev *dev, struct mlx4_mw *mw) +{ + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_mpt_entry *mpt_entry; + int err; + + err = mlx4_mpt_alloc_icm(dev, key_to_hw_index(mw->key)); + if (err) + return err; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) { + err = PTR_ERR(mailbox); + goto err_table; + } + mpt_entry = mailbox->buf; + + memset(mpt_entry, 0, sizeof(*mpt_entry)); + + /* Note that the MLX4_MPT_FLAG_REGION bit in mpt_entry->flags is turned + * off, thus creating a memory window and not a memory region. + */ + mpt_entry->key = cpu_to_be32(key_to_hw_index(mw->key)); + mpt_entry->pd_flags = cpu_to_be32(mw->pd); + if (mw->type == MLX4_MW_TYPE_2) { + mpt_entry->flags |= cpu_to_be32(MLX4_MPT_FLAG_FREE); + mpt_entry->qpn = cpu_to_be32(MLX4_MPT_QP_FLAG_BOUND_QP); + mpt_entry->pd_flags |= cpu_to_be32(MLX4_MPT_PD_FLAG_EN_INV); + } + + err = mlx4_SW2HW_MPT(dev, mailbox, + key_to_hw_index(mw->key) & + (dev->caps.num_mpts - 1)); + if (err) { + mlx4_warn(dev, "SW2HW_MPT failed (%d)\n", err); + goto err_cmd; + } + mw->enabled = MLX4_MPT_EN_HW; + + mlx4_free_cmd_mailbox(dev, mailbox); + + return 0; + +err_cmd: + mlx4_free_cmd_mailbox(dev, mailbox); + +err_table: + mlx4_mpt_free_icm(dev, key_to_hw_index(mw->key)); + return err; +} +EXPORT_SYMBOL_GPL(mlx4_mw_enable); + +void mlx4_mw_free(struct mlx4_dev *dev, struct mlx4_mw *mw) +{ + int err; + + if (mw->enabled == MLX4_MPT_EN_HW) { + err = mlx4_HW2SW_MPT(dev, NULL, + key_to_hw_index(mw->key) & + (dev->caps.num_mpts - 1)); + if (err) + mlx4_warn(dev, "xxx HW2SW_MPT failed (%d)\n", err); + + mw->enabled = MLX4_MPT_EN_SW; + } + if (mw->enabled) + mlx4_mpt_free_icm(dev, key_to_hw_index(mw->key)); + mlx4_mpt_release(dev, key_to_hw_index(mw->key)); +} +EXPORT_SYMBOL_GPL(mlx4_mw_free); + int mlx4_init_mr_table(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); @@ -671,8 +759,8 @@ int mlx4_init_mr_table(struct mlx4_dev *dev) return err; err = mlx4_buddy_init(&mr_table->mtt_buddy, - ilog2((u32)dev->caps.num_mtts / - (1 << log_mtts_per_seg))); + ilog2(div_u64(dev->caps.num_mtts, + (1 << log_mtts_per_seg)))); if (err) goto err_buddy; @@ -791,7 +879,7 @@ int mlx4_fmr_alloc(struct mlx4_dev *dev, u32 pd, u32 access, int max_pages, int max_maps, u8 page_shift, struct mlx4_fmr *fmr) { struct mlx4_priv *priv = mlx4_priv(dev); - int err = -ENOMEM; + int err = -ENOMEM, ret; if (max_maps > dev->caps.max_fmr_maps) return -EINVAL; @@ -825,7 +913,9 @@ int mlx4_fmr_alloc(struct mlx4_dev *dev, u32 pd, u32 access, int max_pages, return 0; err_free: - mlx4_mr_free(dev, &fmr->mr); + ret = mlx4_mr_free(dev, &fmr->mr); + if (ret) + mlx4_err(dev, "Error deregistering MR. The system may have become unstable."); return err; } EXPORT_SYMBOL_GPL(mlx4_fmr_alloc); @@ -851,40 +941,48 @@ EXPORT_SYMBOL_GPL(mlx4_fmr_enable); void mlx4_fmr_unmap(struct mlx4_dev *dev, struct mlx4_fmr *fmr, u32 *lkey, u32 *rkey) { - struct mlx4_cmd_mailbox *mailbox; - int err; + u32 key; if (!fmr->maps) return; - fmr->maps = 0; + key = key_to_hw_index(fmr->mr.key) & (dev->caps.num_mpts - 1); - mailbox = mlx4_alloc_cmd_mailbox(dev); - if (IS_ERR(mailbox)) { - err = PTR_ERR(mailbox); - mlx4_warn(dev, "mlx4_alloc_cmd_mailbox failed (%d)\n", err); - return; - } + *(u8 *)fmr->mpt = MLX4_MPT_STATUS_SW; - err = mlx4_HW2SW_MPT(dev, NULL, - key_to_hw_index(fmr->mr.key) & - (dev->caps.num_mpts - 1)); - mlx4_free_cmd_mailbox(dev, mailbox); - if (err) { - mlx4_warn(dev, "mlx4_HW2SW_MPT failed (%d)\n", err); - return; - } - fmr->mr.enabled = MLX4_MR_EN_SW; + /* Make sure MPT status is visible before changing MPT fields */ + wmb(); + + fmr->mr.key = hw_index_to_key(key); + + fmr->mpt->key = cpu_to_be32(key); + fmr->mpt->lkey = cpu_to_be32(key); + fmr->mpt->length = 0; + fmr->mpt->start = 0; + + /* Make sure MPT data is visible before changing MPT status */ + wmb(); + + *(u8 *)fmr->mpt = MLX4_MPT_STATUS_HW; + + /* Make sure MPT satus is visible */ + wmb(); + + fmr->maps = 0; } EXPORT_SYMBOL_GPL(mlx4_fmr_unmap); int mlx4_fmr_free(struct mlx4_dev *dev, struct mlx4_fmr *fmr) { + int ret; + if (fmr->maps) return -EBUSY; - mlx4_mr_free(dev, &fmr->mr); - fmr->mr.enabled = MLX4_MR_DISABLED; + ret = mlx4_mr_free(dev, &fmr->mr); + if (ret) + return ret; + fmr->mr.enabled = MLX4_MPT_DISABLED; return 0; } diff --git a/sys/ofed/drivers/net/mlx4/pd.c b/sys/ofed/drivers/net/mlx4/pd.c index 2c525aa..5162a47 100644 --- a/sys/ofed/drivers/net/mlx4/pd.c +++ b/sys/ofed/drivers/net/mlx4/pd.c @@ -1,6 +1,6 @@ /* * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2005, 2014 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -32,6 +32,7 @@ */ #include <linux/errno.h> +#include <linux/module.h> #include <linux/io-mapping.h> #include <asm/page.h> @@ -57,7 +58,7 @@ EXPORT_SYMBOL_GPL(mlx4_pd_alloc); void mlx4_pd_free(struct mlx4_dev *dev, u32 pdn) { - mlx4_bitmap_free(&mlx4_priv(dev)->pd_bitmap, pdn); + mlx4_bitmap_free(&mlx4_priv(dev)->pd_bitmap, pdn, MLX4_USE_RR); } EXPORT_SYMBOL_GPL(mlx4_pd_free); @@ -94,7 +95,7 @@ EXPORT_SYMBOL_GPL(mlx4_xrcd_alloc); void __mlx4_xrcd_free(struct mlx4_dev *dev, u32 xrcdn) { - mlx4_bitmap_free(&mlx4_priv(dev)->xrcd_bitmap, xrcdn); + mlx4_bitmap_free(&mlx4_priv(dev)->xrcd_bitmap, xrcdn, MLX4_USE_RR); } void mlx4_xrcd_free(struct mlx4_dev *dev, u32 xrcdn) @@ -162,7 +163,7 @@ EXPORT_SYMBOL_GPL(mlx4_uar_alloc); void mlx4_uar_free(struct mlx4_dev *dev, struct mlx4_uar *uar) { - mlx4_bitmap_free(&mlx4_priv(dev)->uar_table.bitmap, uar->index); + mlx4_bitmap_free(&mlx4_priv(dev)->uar_table.bitmap, uar->index, MLX4_USE_RR); } EXPORT_SYMBOL_GPL(mlx4_uar_free); diff --git a/sys/ofed/drivers/net/mlx4/port.c b/sys/ofed/drivers/net/mlx4/port.c index 2a009ea..c653d4b 100644 --- a/sys/ofed/drivers/net/mlx4/port.c +++ b/sys/ofed/drivers/net/mlx4/port.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007 Mellanox Technologies. All rights reserved. + * Copyright (c) 2007, 2014 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -32,10 +32,14 @@ #include <linux/errno.h> #include <linux/if_ether.h> +#include <linux/module.h> +#include <linux/err.h> #include <linux/mlx4/cmd.h> #include <linux/moduleparam.h> #include "mlx4.h" +#include "mlx4_stats.h" + int mlx4_set_4k_mtu = -1; module_param_named(set_4k_mtu, mlx4_set_4k_mtu, int, 0444); @@ -48,12 +52,6 @@ MODULE_PARM_DESC(set_4k_mtu, #define MLX4_VLAN_VALID (1u << 31) #define MLX4_VLAN_MASK 0xfff -#define MLX4_STATS_TRAFFIC_COUNTERS_MASK 0xfULL -#define MLX4_STATS_TRAFFIC_DROPS_MASK 0xc0ULL -#define MLX4_STATS_ERROR_COUNTERS_MASK 0x1ffc30ULL -#define MLX4_STATS_PORT_COUNTERS_MASK 0x1fe00000ULL -#define MLX4_STATS_IF_RX_ERRORS_COUNTERS_MASK 0x8010ULL - void mlx4_init_mac_table(struct mlx4_dev *dev, struct mlx4_mac_table *table) { int i; @@ -85,7 +83,7 @@ static int validate_index(struct mlx4_dev *dev, { int err = 0; - if (index < 0 || index >= table->max || !table->entries[index]) { + if (index < 0 || index >= table->max || !table->refs[index]) { mlx4_warn(dev, "No valid Mac entry for the given index\n"); err = -EINVAL; } @@ -140,14 +138,15 @@ int __mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac) mutex_lock(&table->mutex); for (i = 0; i < MLX4_MAX_MAC_NUM; i++) { - if (free < 0 && !table->entries[i]) { + if (free < 0 && !table->refs[i]) { free = i; continue; } - if (mac == (MLX4_MAC_MASK & be64_to_cpu(table->entries[i]))) { + if ((mac == (MLX4_MAC_MASK & be64_to_cpu(table->entries[i]))) && + table->refs[i]) { /* MAC already registered, Must not have duplicates */ - err = i; + err = i; ++table->refs[i]; goto out; } @@ -184,13 +183,24 @@ EXPORT_SYMBOL_GPL(__mlx4_register_mac); int mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac) { u64 out_param = 0; - int err; + int err = -EINVAL; if (mlx4_is_mfunc(dev)) { - err = mlx4_cmd_imm(dev, mac, &out_param, - ((u32) port) << 8 | (u32) RES_MAC, - RES_OP_RESERVE_AND_MAP, MLX4_CMD_ALLOC_RES, - MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); + if (!(dev->flags & MLX4_FLAG_OLD_REG_MAC)) { + err = mlx4_cmd_imm(dev, mac, &out_param, + ((u32) port) << 8 | (u32) RES_MAC, + RES_OP_RESERVE_AND_MAP, MLX4_CMD_ALLOC_RES, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); + } + if (err && err == -EINVAL && mlx4_is_slave(dev)) { + /* retry using old REG_MAC format */ + set_param_l(&out_param, port); + err = mlx4_cmd_imm(dev, mac, &out_param, RES_MAC, + RES_OP_RESERVE_AND_MAP, MLX4_CMD_ALLOC_RES, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); + if (!err) + dev->flags |= MLX4_FLAG_OLD_REG_MAC; + } if (err) return err; @@ -245,10 +255,18 @@ void mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, u64 mac) u64 out_param = 0; if (mlx4_is_mfunc(dev)) { - (void) mlx4_cmd_imm(dev, mac, &out_param, - ((u32) port) << 8 | (u32) RES_MAC, - RES_OP_RESERVE_AND_MAP, MLX4_CMD_FREE_RES, - MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); + if (!(dev->flags & MLX4_FLAG_OLD_REG_MAC)) { + (void) mlx4_cmd_imm(dev, mac, &out_param, + ((u32) port) << 8 | (u32) RES_MAC, + RES_OP_RESERVE_AND_MAP, MLX4_CMD_FREE_RES, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); + } else { + /* use old unregister mac format */ + set_param_l(&out_param, port); + (void) mlx4_cmd_imm(dev, mac, &out_param, RES_MAC, + RES_OP_RESERVE_AND_MAP, MLX4_CMD_FREE_RES, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); + } return; } __mlx4_unregister_mac(dev, port, mac); @@ -535,17 +553,21 @@ static int mlx4_common_set_port(struct mlx4_dev *dev, int slave, u32 in_mod, __be32 new_cap_mask; port = in_mod & 0xff; - in_modifier = in_mod >> 8; + in_modifier = (in_mod >> 8) & 0xff; is_eth = op_mod; port_info = &priv->port[port]; + if (op_mod > 1) + return -EINVAL; + /* Slaves cannot perform SET_PORT operations except changing MTU */ if (is_eth) { if (slave != dev->caps.function && in_modifier != MLX4_SET_PORT_GENERAL && in_modifier != MLX4_SET_PORT_GID_TABLE) { - mlx4_warn(dev, "denying SET_PORT for slave:%d\n", - slave); + mlx4_warn(dev, "denying SET_PORT for slave:%d," + "port %d, config_select 0x%x\n", + slave, port, in_modifier); return -EINVAL; } switch (in_modifier) { @@ -570,7 +592,8 @@ static int mlx4_common_set_port(struct mlx4_dev *dev, int slave, u32 in_mod, /* Mtu is configured as the max MTU among all the * the functions on the port. */ mtu = be16_to_cpu(gen_context->mtu); - mtu = min_t(int, mtu, dev->caps.eth_mtu_cap[port]); + mtu = min_t(int, mtu, dev->caps.eth_mtu_cap[port] + + ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN); prev_mtu = slave_st->mtu[port]; slave_st->mtu[port] = mtu; if (mtu > master->max_mtu[port]) @@ -650,7 +673,7 @@ static int mlx4_common_set_port(struct mlx4_dev *dev, int slave, u32 in_mod, break; } - return mlx4_cmd(dev, inbox->dma, in_mod, op_mod, + return mlx4_cmd(dev, inbox->dma, in_mod & 0xffff, op_mod, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); } @@ -727,19 +750,10 @@ enum { MLX4_CHANGE_PORT_MTU_CAP = 22, }; -#define CX3_PPF_DEV_ID 0x1003 -static int vl_cap_start(struct mlx4_dev *dev) -{ - /* for non CX3 devices, start with 4 VLs to avoid errors in syslog */ - if (dev->pdev->device != CX3_PPF_DEV_ID) - return 4; - return 8; -} - int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port, int pkey_tbl_sz) { struct mlx4_cmd_mailbox *mailbox; - int err, vl_cap, pkey_tbl_flag = 0; + int err = -EINVAL, vl_cap, pkey_tbl_flag = 0; u32 in_mod; if (dev->caps.port_type[port] == MLX4_PORT_TYPE_NONE) @@ -765,7 +779,8 @@ int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port, int pkey_tbl_sz) } /* IB VL CAP enum isn't used by the firmware, just numerical values */ - for (vl_cap = vl_cap_start(dev); vl_cap >= 1; vl_cap >>= 1) { + for (vl_cap = dev->caps.vl_cap[port]; + vl_cap >= 1; vl_cap >>= 1) { ((__be32 *) mailbox->buf)[0] = cpu_to_be32( (1 << MLX4_CHANGE_PORT_MTU_CAP) | (1 << MLX4_CHANGE_PORT_VL_CAP) | @@ -822,10 +837,9 @@ int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn, u32 in_mod; u32 m_promisc = (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER) ? MCAST_DIRECT : MCAST_DEFAULT; -/* + if (dev->caps.steering_mode != MLX4_STEERING_MODE_A0) return 0; -*/ mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) @@ -834,10 +848,7 @@ int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn, memset(context, 0, sizeof *context); context->base_qpn = cpu_to_be32(base_qpn); - /* - * This assignment breaks vlan support - I don't know why. Probablya an A0 issue - shahar Klein - * context->n_mac = dev->caps.log_num_macs; - */ + context->n_mac = dev->caps.log_num_macs; context->promisc = cpu_to_be32(promisc << SET_PORT_PROMISC_SHIFT | base_qpn); context->mcast = cpu_to_be32(m_promisc << SET_PORT_MC_PROMISC_SHIFT | @@ -960,40 +971,44 @@ int mlx4_SET_VLAN_FLTR_wrapper(struct mlx4_dev *dev, int slave, return err; } -int mlx4_common_dump_eth_stats(struct mlx4_dev *dev, int slave, - u32 in_mod, struct mlx4_cmd_mailbox *outbox) -{ - return mlx4_cmd_box(dev, 0, outbox->dma, in_mod, 0, - MLX4_CMD_DUMP_ETH_STATS, MLX4_CMD_TIME_CLASS_B, - MLX4_CMD_NATIVE); -} - int mlx4_DUMP_ETH_STATS_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { - if (slave != dev->caps.function) - return 0; - return mlx4_common_dump_eth_stats(dev, slave, - vhcr->in_modifier, outbox); + return 0; } -void mlx4_set_stats_bitmap(struct mlx4_dev *dev, u64 *stats_bitmap) +void mlx4_set_stats_bitmap(struct mlx4_dev *dev, unsigned long *stats_bitmap) { - if (!mlx4_is_mfunc(dev)) { - *stats_bitmap = 0; - return; + int last_i = 0; + + bitmap_zero(stats_bitmap, NUM_ALL_STATS); + + if (mlx4_is_slave(dev)) { + last_i = dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FLOWSTATS_EN ? + NUM_PKT_STATS + NUM_FLOW_STATS : NUM_PKT_STATS; + } else { + bitmap_set(stats_bitmap, last_i, NUM_PKT_STATS); + last_i = NUM_PKT_STATS; + + if (dev->caps.flags2 & + MLX4_DEV_CAP_FLAG2_FLOWSTATS_EN) { + bitmap_set(stats_bitmap, last_i, NUM_FLOW_STATS); + last_i += NUM_FLOW_STATS; + } } - *stats_bitmap = (MLX4_STATS_TRAFFIC_COUNTERS_MASK | - MLX4_STATS_TRAFFIC_DROPS_MASK | - MLX4_STATS_PORT_COUNTERS_MASK | - MLX4_STATS_IF_RX_ERRORS_COUNTERS_MASK); + if (mlx4_is_slave(dev)) + bitmap_set(stats_bitmap, last_i, NUM_VF_STATS); + last_i += NUM_VF_STATS; if (mlx4_is_master(dev)) - *stats_bitmap |= MLX4_STATS_ERROR_COUNTERS_MASK; + bitmap_set(stats_bitmap, last_i, NUM_VPORT_STATS); + last_i += NUM_VPORT_STATS; + + bitmap_set(stats_bitmap, last_i, NUM_PORT_STATS); } EXPORT_SYMBOL(mlx4_set_stats_bitmap); diff --git a/sys/ofed/drivers/net/mlx4/profile.c b/sys/ofed/drivers/net/mlx4/profile.c index d3042f0..aa5f957 100644 --- a/sys/ofed/drivers/net/mlx4/profile.c +++ b/sys/ofed/drivers/net/mlx4/profile.c @@ -1,6 +1,6 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. - * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2005, 2014 Mellanox Technologies. All rights reserved. * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -76,7 +76,7 @@ u64 mlx4_make_profile(struct mlx4_dev *dev, u64 size; u64 start; int type; - u32 num; + u64 num; int log_num; }; @@ -112,7 +112,8 @@ u64 mlx4_make_profile(struct mlx4_dev *dev, min_t(unsigned, dev_cap->max_eqs, MAX_MSIX); profile[MLX4_RES_DMPT].num = request->num_mpt; profile[MLX4_RES_CMPT].num = MLX4_NUM_CMPTS; - profile[MLX4_RES_MTT].num = request->num_mtt * (1 << log_mtts_per_seg); + profile[MLX4_RES_MTT].num = ((u64)request->num_mtt_segs) * + (1 << log_mtts_per_seg); profile[MLX4_RES_MCG].num = request->num_mcg; for (i = 0; i < MLX4_RES_NUM; ++i) { diff --git a/sys/ofed/drivers/net/mlx4/qp.c b/sys/ofed/drivers/net/mlx4/qp.c index 2e2033d..fe8d3c2 100644 --- a/sys/ofed/drivers/net/mlx4/qp.c +++ b/sys/ofed/drivers/net/mlx4/qp.c @@ -1,7 +1,7 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. * Copyright (c) 2005, 2006, 2007 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved. + * Copyright (c) 2005, 2006, 2007, 2008, 2014 Mellanox Technologies. All rights reserved. * Copyright (c) 2004 Voltaire, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -33,6 +33,10 @@ * SOFTWARE. */ +#include <linux/types.h> +#include <linux/gfp.h> +#include <linux/module.h> + #include <linux/mlx4/cmd.h> #include <linux/mlx4/qp.h> @@ -210,13 +214,18 @@ int mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt, EXPORT_SYMBOL_GPL(mlx4_qp_modify); int __mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, - int *base, u8 bf_qp) + int *base, u8 flags) { + int bf_qp = !!(flags & (u8) MLX4_RESERVE_BF_QP); + struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_qp_table *qp_table = &priv->qp_table; + /* Only IPoIB uses a large cnt. In this case, just allocate + * as usual, ignoring bf skipping, since IPoIB does not run over RoCE + */ if (cnt > MLX4_MAX_BF_QP_RANGE && bf_qp) - return -ENOMEM; + bf_qp = 0; *base = mlx4_bitmap_alloc_range(&qp_table->bitmap, cnt, align, bf_qp ? MLX4_BF_QP_SKIP_MASK : 0); @@ -227,14 +236,14 @@ int __mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, } int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, - int *base, u8 bf_qp) + int *base, u8 flags) { u64 in_param = 0; u64 out_param; int err; if (mlx4_is_mfunc(dev)) { - set_param_l(&in_param, (((!!bf_qp) << 31) | (u32)cnt)); + set_param_l(&in_param, (((u32) flags) << 24) | (u32) cnt); set_param_h(&in_param, align); err = mlx4_cmd_imm(dev, in_param, &out_param, RES_QP, RES_OP_RESERVE, @@ -246,7 +255,7 @@ int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, *base = get_param_l(&out_param); return 0; } - return __mlx4_qp_reserve_range(dev, cnt, align, base, bf_qp); + return __mlx4_qp_reserve_range(dev, cnt, align, base, flags); } EXPORT_SYMBOL_GPL(mlx4_qp_reserve_range); @@ -257,7 +266,7 @@ void __mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt) if (mlx4_is_qp_reserved(dev, (u32) base_qpn)) return; - mlx4_bitmap_free_range(&qp_table->bitmap, base_qpn, cnt); + mlx4_bitmap_free_range(&qp_table->bitmap, base_qpn, cnt, MLX4_USE_RR); } void mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt) diff --git a/sys/ofed/drivers/net/mlx4/reset.c b/sys/ofed/drivers/net/mlx4/reset.c index 43b1541..44ec1e1 100644 --- a/sys/ofed/drivers/net/mlx4/reset.c +++ b/sys/ofed/drivers/net/mlx4/reset.c @@ -1,6 +1,6 @@ /* * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. + * Copyright (c) 2007, 2008, 2014 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -77,7 +77,7 @@ int mlx4_reset(struct mlx4_dev *dev) goto out; } - pcie_cap = pci_find_capability(dev->pdev, PCI_CAP_ID_EXP); + pcie_cap = pci_pcie_cap(dev->pdev); for (i = 0; i < 64; ++i) { if (i == 22 || i == 23) @@ -119,8 +119,8 @@ int mlx4_reset(struct mlx4_dev *dev) writel(MLX4_RESET_VALUE, reset + MLX4_RESET_OFFSET); iounmap(reset); - /* Docs say to wait one second before accessing device */ - msleep(2000); + /* wait half a second before accessing device */ + msleep(500); end = jiffies + MLX4_RESET_TIMEOUT_JIFFIES; do { @@ -138,11 +138,10 @@ int mlx4_reset(struct mlx4_dev *dev) goto out; } - /* Now restore the PCI headers */ if (pcie_cap) { devctl = hca_header[(pcie_cap + PCI_EXP_DEVCTL) / 4]; - if (pci_write_config_word(dev->pdev, pcie_cap + PCI_EXP_DEVCTL, + if (pcie_capability_write_word(dev->pdev, PCI_EXP_DEVCTL, devctl)) { err = -ENODEV; mlx4_err(dev, "Couldn't restore HCA PCI Express " @@ -150,7 +149,7 @@ int mlx4_reset(struct mlx4_dev *dev) goto out; } linkctl = hca_header[(pcie_cap + PCI_EXP_LNKCTL) / 4]; - if (pci_write_config_word(dev->pdev, pcie_cap + PCI_EXP_LNKCTL, + if (pcie_capability_write_word(dev->pdev, PCI_EXP_LNKCTL, linkctl)) { err = -ENODEV; mlx4_err(dev, "Couldn't restore HCA PCI Express " diff --git a/sys/ofed/drivers/net/mlx4/resource_tracker.c b/sys/ofed/drivers/net/mlx4/resource_tracker.c index 65fc1dd..2d43871 100644 --- a/sys/ofed/drivers/net/mlx4/resource_tracker.c +++ b/sys/ofed/drivers/net/mlx4/resource_tracker.c @@ -1,6 +1,6 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. - * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. + * Copyright (c) 2005, 2006, 2007, 2008, 2014 Mellanox Technologies. * All rights reserved. * Copyright (c) 2005, 2006, 2007 Cisco Systems, Inc. All rights reserved. * @@ -42,7 +42,7 @@ #include <linux/mlx4/cmd.h> #include <linux/mlx4/qp.h> #include <linux/if_ether.h> -#include <linux/compat.h> +#include <linux/etherdevice.h> #include "mlx4.h" #include "fw.h" @@ -85,6 +85,7 @@ struct res_gid { u8 gid[16]; enum mlx4_protocol prot; enum mlx4_steer_type steer; + u64 reg_id; }; enum res_qp_states { @@ -109,6 +110,16 @@ struct res_qp { struct list_head mcg_list; spinlock_t mcg_spl; int local_qpn; + atomic_t ref_count; + u32 qpc_flags; + /* saved qp params before VST enforcement in order to restore on VGT */ + u8 sched_queue; + __be32 param3; + u8 vlan_control; + u8 fvl_rx; + u8 pri_path_fl; + u8 vlan_index; + u8 feup; }; enum res_mtt_states { @@ -207,6 +218,7 @@ enum res_fs_rule_states { struct res_fs_rule { struct res_common com; + int qpn; }; static int mlx4_is_eth(struct mlx4_dev *dev, int port) @@ -622,9 +634,33 @@ static void update_gid(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *inbox, } } +static int check_counter_index_validity(struct mlx4_dev *dev, int slave, int port, int idx) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct counter_index *counter, *tmp_counter; + + if (slave == 0) { + list_for_each_entry_safe(counter, tmp_counter, + &priv->counters_table.global_port_list[port - 1], + list) { + if (counter->index == idx) + return 0; + } + return -EINVAL; + } else { + list_for_each_entry_safe(counter, tmp_counter, + &priv->counters_table.vf_list[slave - 1][port - 1], + list) { + if (counter->index == idx) + return 0; + } + return -EINVAL; + } +} + static int update_vport_qp_param(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *inbox, - u8 slave) + u8 slave, u32 qpn) { struct mlx4_qp_context *qpc = inbox->buf + 8; struct mlx4_vport_oper_state *vp_oper; @@ -635,31 +671,63 @@ static int update_vport_qp_param(struct mlx4_dev *dev, port = (qpc->pri_path.sched_queue & 0x40) ? 2 : 1; priv = mlx4_priv(dev); vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port]; + qp_type = (be32_to_cpu(qpc->flags) >> 16) & 0xff; - if (MLX4_VGT != vp_oper->state.default_vlan) { - qp_type = (be32_to_cpu(qpc->flags) >> 16) & 0xff; - if (MLX4_QP_ST_RC == qp_type) + if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH && + qpc->pri_path.counter_index != MLX4_SINK_COUNTER_INDEX) { + if (check_counter_index_validity(dev, slave, port, + qpc->pri_path.counter_index)) return -EINVAL; + } + + mlx4_dbg(dev, "%s: QP counter_index %d for slave %d port %d\n", + __func__, qpc->pri_path.counter_index, slave, port); + + if ((dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_LB_SRC_CHK) && + dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH && + !mlx4_is_qp_reserved(dev, qpn) && + qp_type == MLX4_QP_ST_MLX && + qpc->pri_path.counter_index != 0xFF) { + /* disable multicast loopback to qp with same counter */ + qpc->pri_path.fl |= MLX4_FL_ETH_SRC_CHECK_MC_LB; + qpc->pri_path.vlan_control |= + MLX4_VLAN_CTRL_ETH_SRC_CHECK_IF_COUNTER; + } + + if (MLX4_VGT != vp_oper->state.default_vlan) { + /* the reserved QPs (special, proxy, tunnel) + * do not operate over vlans + */ + if (mlx4_is_qp_reserved(dev, qpn)) + return 0; - qpc->srqn |= cpu_to_be32(1 << 25); /*set cqe vlan mask */ + /* force strip vlan by clear vsd */ + qpc->param3 &= ~cpu_to_be32(MLX4_STRIP_VLAN); + /* preserve IF_COUNTER flag */ + qpc->pri_path.vlan_control &= + MLX4_VLAN_CTRL_ETH_SRC_CHECK_IF_COUNTER; + if (MLX4_QP_ST_RC != qp_type) { + if (0 != vp_oper->state.default_vlan) { + qpc->pri_path.vlan_control |= + MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED | + MLX4_VLAN_CTRL_ETH_RX_BLOCK_PRIO_TAGGED | + MLX4_VLAN_CTRL_ETH_RX_BLOCK_UNTAGGED; + } else { /* priority tagged */ + qpc->pri_path.vlan_control |= + MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED | + MLX4_VLAN_CTRL_ETH_RX_BLOCK_TAGGED; + } + } + qpc->pri_path.fvl_rx |= MLX4_FVL_RX_FORCE_ETH_VLAN; qpc->pri_path.vlan_index = vp_oper->vlan_idx; - qpc->pri_path.fl = 1 << 6; /* set cv bit*/ - qpc->pri_path.feup |= 1 << 3; /* set fvl bit */ + qpc->pri_path.fl |= MLX4_FL_CV | MLX4_FL_ETH_HIDE_CQE_VLAN; + qpc->pri_path.feup |= MLX4_FEUP_FORCE_ETH_UP | MLX4_FVL_FORCE_ETH_VLAN; qpc->pri_path.sched_queue &= 0xC7; qpc->pri_path.sched_queue |= (vp_oper->state.default_qos) << 3; - mlx4_dbg(dev, "qp %d port %d Q 0x%x set vlan to %d vidx %d feup %x fl %x\n", - be32_to_cpu(qpc->local_qpn) & 0xffffff, port, - (int)(qpc->pri_path.sched_queue), vp_oper->state.default_vlan, - vp_oper->vlan_idx, (int)(qpc->pri_path.feup), - (int)(qpc->pri_path.fl)); } if (vp_oper->state.spoofchk) { - qpc->pri_path.feup |= 1 << 5; /* set fsm bit */; + qpc->pri_path.feup |= MLX4_FSM_FORCE_ETH_SRC_MAC; qpc->pri_path.grh_mylmc = (0x80 & qpc->pri_path.grh_mylmc) + vp_oper->mac_idx; - mlx4_dbg(dev, "spoof qp %d port %d feup 0x%x, myLmc 0x%x mindx %d\n", - be32_to_cpu(qpc->local_qpn) & 0xffffff, port, - (int)qpc->pri_path.feup, (int)qpc->pri_path.grh_mylmc, - vp_oper->mac_idx); } return 0; } @@ -669,7 +737,7 @@ static int mpt_mask(struct mlx4_dev *dev) return dev->caps.num_mpts - 1; } -static void *find_res(struct mlx4_dev *dev, int res_id, +static void *find_res(struct mlx4_dev *dev, u64 res_id, enum mlx4_resource type) { struct mlx4_priv *priv = mlx4_priv(dev); @@ -688,7 +756,7 @@ static int get_res(struct mlx4_dev *dev, int slave, u64 res_id, spin_lock_irq(mlx4_tlock(dev)); r = find_res(dev, res_id, type); if (!r) { - err = -ENOENT; + err = -ENONET; goto exit; } @@ -761,6 +829,7 @@ static struct res_common *alloc_qp_tr(int id) ret->local_qpn = id; INIT_LIST_HEAD(&ret->mcg_list); spin_lock_init(&ret->mcg_spl); + atomic_set(&ret->ref_count, 0); return &ret->com; } @@ -868,7 +937,7 @@ static struct res_common *alloc_xrcdn_tr(int id) return &ret->com; } -static struct res_common *alloc_fs_rule_tr(u64 id) +static struct res_common *alloc_fs_rule_tr(u64 id, int qpn) { struct res_fs_rule *ret; @@ -878,7 +947,7 @@ static struct res_common *alloc_fs_rule_tr(u64 id) ret->com.res_id = id; ret->com.state = RES_FS_RULE_ALLOCATED; - + ret->qpn = qpn; return &ret->com; } @@ -916,7 +985,7 @@ static struct res_common *alloc_tr(u64 id, enum mlx4_resource type, int slave, ret = alloc_xrcdn_tr(id); break; case RES_FS_RULE: - ret = alloc_fs_rule_tr(id); + ret = alloc_fs_rule_tr(id, extra); break; default: return NULL; @@ -970,8 +1039,10 @@ static int add_res_range(struct mlx4_dev *dev, int slave, u64 base, int count, return 0; undo: - for (--i; i >= base; --i) + for (--i; i >= 0; --i) { rb_erase(&res_arr[i]->node, root); + list_del_init(&res_arr[i]->list); + } spin_unlock_irq(mlx4_tlock(dev)); @@ -985,10 +1056,14 @@ undo: static int remove_qp_ok(struct res_qp *res) { - if (res->com.state == RES_QP_BUSY) + if (res->com.state == RES_QP_BUSY || atomic_read(&res->ref_count) || + !list_empty(&res->mcg_list)) { + pr_err("resource tracker: fail to remove qp, state %d, ref_count %d\n", + res->com.state, atomic_read(&res->ref_count)); return -EBUSY; - else if (res->com.state != RES_QP_RESERVED) + } else if (res->com.state != RES_QP_RESERVED) { return -EPERM; + } return 0; } @@ -1166,7 +1241,7 @@ static int qp_res_start_move_to(struct mlx4_dev *dev, int slave, int qpn, switch (state) { case RES_QP_BUSY: mlx4_dbg(dev, "%s: failed RES_QP, 0x%llx\n", - __func__, (long long)r->com.res_id); + __func__, (unsigned long long)r->com.res_id); err = -EBUSY; break; @@ -1174,7 +1249,7 @@ static int qp_res_start_move_to(struct mlx4_dev *dev, int slave, int qpn, if (r->com.state == RES_QP_MAPPED && !alloc) break; - mlx4_dbg(dev, "failed RES_QP, 0x%llx\n", (long long)r->com.res_id); + mlx4_dbg(dev, "failed RES_QP, 0x%llx\n", (unsigned long long)r->com.res_id); err = -EINVAL; break; @@ -1184,7 +1259,7 @@ static int qp_res_start_move_to(struct mlx4_dev *dev, int slave, int qpn, break; else { mlx4_dbg(dev, "failed RES_QP, 0x%llx\n", - (long long)r->com.res_id); + (unsigned long long)r->com.res_id); err = -EINVAL; } @@ -1464,18 +1539,18 @@ static int qp_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, int align; int base; int qpn; - u8 bf_qp; + u8 flags; switch (op) { case RES_OP_RESERVE: count = get_param_l(&in_param) & 0xffffff; - bf_qp = get_param_l(&in_param) >> 31; + flags = get_param_l(&in_param) >> 24; align = get_param_h(&in_param); err = mlx4_grant_resource(dev, slave, RES_QP, count, 0); if (err) return err; - err = __mlx4_qp_reserve_range(dev, count, align, &base, bf_qp); + err = __mlx4_qp_reserve_range(dev, count, align, &base, flags); if (err) { mlx4_release_resource(dev, slave, RES_QP, count, 0); return err; @@ -1566,7 +1641,7 @@ static int mpt_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, if (err) break; - index = __mlx4_mr_reserve(dev); + index = __mlx4_mpt_reserve(dev); if (index == -1) { mlx4_release_resource(dev, slave, RES_MPT, 1, 0); break; @@ -1576,7 +1651,7 @@ static int mpt_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, err = add_res_range(dev, slave, id, 1, RES_MPT, index); if (err) { mlx4_release_resource(dev, slave, RES_MPT, 1, 0); - __mlx4_mr_release(dev, index); + __mlx4_mpt_release(dev, index); break; } set_param_l(out_param, index); @@ -1589,7 +1664,7 @@ static int mpt_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, if (err) return err; - err = __mlx4_mr_alloc_icm(dev, mpt->key); + err = __mlx4_mpt_alloc_icm(dev, mpt->key); if (err) { res_abort_move(dev, slave, RES_MPT, id); return err; @@ -1768,7 +1843,7 @@ static int mac_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, int err = -EINVAL; int port; u64 mac; - u8 smac_index = 0; + u8 smac_index; if (op != RES_OP_RESERVE_AND_MAP) return err; @@ -1867,11 +1942,16 @@ static void rem_slave_vlans(struct mlx4_dev *dev, int slave) } static int vlan_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, - u64 in_param, u64 *out_param, int port) + u64 in_param, u64 *out_param, int in_port) { + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_slave_state *slave_state = priv->mfunc.master.slave_state; int err = -EINVAL; u16 vlan; int vlan_index; + int port; + + port = !in_port ? get_param_l(out_param) : in_port; if (!port) return err; @@ -1879,6 +1959,12 @@ static int vlan_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, if (op != RES_OP_RESERVE_AND_MAP) return err; + /* upstream kernels had NOP for reg/unreg vlan. Continue this. */ + if (!in_port && port > 0 && port <= dev->caps.num_ports) { + slave_state[slave].old_vlan_api = true; + return 0; + } + vlan = (u16) in_param; err = __mlx4_register_vlan(dev, port, vlan, &vlan_index); @@ -1892,7 +1978,7 @@ static int vlan_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, } static int counter_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, - u64 in_param, u64 *out_param) + u64 in_param, u64 *out_param, int port) { u32 index; int err; @@ -1900,23 +1986,9 @@ static int counter_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, if (op != RES_OP_RESERVE) return -EINVAL; - err = mlx4_grant_resource(dev, slave, RES_COUNTER, 1, 0); - if (err) - return err; - - err = __mlx4_counter_alloc(dev, &index); - if (err) { - mlx4_release_resource(dev, slave, RES_COUNTER, 1, 0); - return err; - } - - err = add_res_range(dev, slave, index, 1, RES_COUNTER, 0); - if (err) { - __mlx4_counter_free(dev, index); - mlx4_release_resource(dev, slave, RES_COUNTER, 1, 0); - } else { + err = __mlx4_counter_alloc(dev, slave, port, &index); + if (!err) set_param_l(out_param, index); - } return err; } @@ -1992,7 +2064,8 @@ int mlx4_ALLOC_RES_wrapper(struct mlx4_dev *dev, int slave, case RES_COUNTER: err = counter_alloc_res(dev, slave, vhcr->op_modifier, alop, - vhcr->in_param, &vhcr->out_param); + vhcr->in_param, &vhcr->out_param, + (vhcr->in_modifier >> 8) & 0xFF); break; case RES_XRCD: @@ -2090,7 +2163,7 @@ static int mpt_free_res(struct mlx4_dev *dev, int slave, int op, int cmd, if (err) break; mlx4_release_resource(dev, slave, RES_MPT, 1, 0); - __mlx4_mr_release(dev, index); + __mlx4_mpt_release(dev, index); break; case RES_OP_MAP_ICM: index = get_param_l(&in_param); @@ -2100,7 +2173,7 @@ static int mpt_free_res(struct mlx4_dev *dev, int slave, int op, int cmd, if (err) return err; - __mlx4_mr_free_icm(dev, mpt->key); + __mlx4_mpt_free_icm(dev, mpt->key); res_end_move(dev, slave, RES_MPT, id); return err; break; @@ -2185,10 +2258,14 @@ static int mac_free_res(struct mlx4_dev *dev, int slave, int op, int cmd, static int vlan_free_res(struct mlx4_dev *dev, int slave, int op, int cmd, u64 in_param, u64 *out_param, int port) { + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_slave_state *slave_state = priv->mfunc.master.slave_state; int err = 0; switch (op) { case RES_OP_RESERVE_AND_MAP: + if (slave_state[slave].old_vlan_api == true) + return 0; if (!port) return -EINVAL; vlan_del_from_slave(dev, slave, in_param, port); @@ -2203,23 +2280,18 @@ static int vlan_free_res(struct mlx4_dev *dev, int slave, int op, int cmd, } static int counter_free_res(struct mlx4_dev *dev, int slave, int op, int cmd, - u64 in_param, u64 *out_param) + u64 in_param, u64 *out_param, int port) { int index; - int err; if (op != RES_OP_RESERVE) return -EINVAL; index = get_param_l(&in_param); - err = rem_res_range(dev, slave, index, 1, RES_COUNTER, 0); - if (err) - return err; - __mlx4_counter_free(dev, index); - mlx4_release_resource(dev, slave, RES_COUNTER, 1, 0); + __mlx4_counter_free(dev, slave, port, index); - return err; + return 0; } static int xrcdn_free_res(struct mlx4_dev *dev, int slave, int op, int cmd, @@ -2290,7 +2362,8 @@ int mlx4_FREE_RES_wrapper(struct mlx4_dev *dev, int slave, case RES_COUNTER: err = counter_free_res(dev, slave, vhcr->op_modifier, alop, - vhcr->in_param, &vhcr->out_param); + vhcr->in_param, &vhcr->out_param, + (vhcr->in_modifier >> 8) & 0xFF); break; case RES_XRCD: @@ -2319,6 +2392,26 @@ static int mr_get_mtt_size(struct mlx4_mpt_entry *mpt) return be32_to_cpu(mpt->mtt_sz); } +static u32 mr_get_pd(struct mlx4_mpt_entry *mpt) +{ + return be32_to_cpu(mpt->pd_flags) & 0x00ffffff; +} + +static int mr_is_fmr(struct mlx4_mpt_entry *mpt) +{ + return be32_to_cpu(mpt->pd_flags) & MLX4_MPT_PD_FLAG_FAST_REG; +} + +static int mr_is_bind_enabled(struct mlx4_mpt_entry *mpt) +{ + return be32_to_cpu(mpt->flags) & MLX4_MPT_FLAG_BIND_ENABLE; +} + +static int mr_is_region(struct mlx4_mpt_entry *mpt) +{ + return be32_to_cpu(mpt->flags) & MLX4_MPT_FLAG_REGION; +} + static int qp_get_mtt_addr(struct mlx4_qp_context *qpc) { return be32_to_cpu(qpc->mtt_base_addr_l) & 0xfffffff8; @@ -2338,7 +2431,8 @@ static int qp_get_mtt_size(struct mlx4_qp_context *qpc) int log_rq_stride = qpc->rq_size_stride & 7; int srq = (be32_to_cpu(qpc->srqn) >> 24) & 1; int rss = (be32_to_cpu(qpc->flags) >> 13) & 1; - int xrc = (be32_to_cpu(qpc->local_qpn) >> 23) & 1; + u32 ts = (be32_to_cpu(qpc->flags) >> 16) & 0xff; + int xrc = (ts == MLX4_QP_ST_XRC) ? 1 : 0; int sq_size; int rq_size; int total_pages; @@ -2379,12 +2473,43 @@ int mlx4_SW2HW_MPT_wrapper(struct mlx4_dev *dev, int slave, int mtt_base = mr_get_mtt_addr(inbox->buf) / dev->caps.mtt_entry_sz; int phys; int id; + u32 pd; + int pd_slave; id = index & mpt_mask(dev); err = mr_res_start_move_to(dev, slave, id, RES_MPT_HW, &mpt); if (err) return err; + /* Currently disable memory windows since this feature isn't tested yet + * under virtualization. + */ + if (!mr_is_region(inbox->buf)) { + err = -ENOSYS; + goto ex_abort; + } + + /* Make sure that the PD bits related to the slave id are zeros. */ + pd = mr_get_pd(inbox->buf); + pd_slave = (pd >> 17) & 0x7f; + if (pd_slave != 0 && pd_slave != slave) { + err = -EPERM; + goto ex_abort; + } + + if (mr_is_fmr(inbox->buf)) { + /* FMR and Bind Enable are forbidden in slave devices. */ + if (mr_is_bind_enabled(inbox->buf)) { + err = -EPERM; + goto ex_abort; + } + /* FMR and Memory Windows are also forbidden. */ + if (!mr_is_region(inbox->buf)) { + err = -EPERM; + goto ex_abort; + } + } + phys = mr_phys_mpt(inbox->buf); if (!phys) { err = get_res(dev, slave, mtt_base, RES_MTT, &mtt); @@ -2534,6 +2659,14 @@ int mlx4_RST2INIT_QP_wrapper(struct mlx4_dev *dev, int slave, if (err) return err; qp->local_qpn = local_qpn; + qp->sched_queue = 0; + qp->param3 = 0; + qp->vlan_control = 0; + qp->fvl_rx = 0; + qp->pri_path_fl = 0; + qp->vlan_index = 0; + qp->feup = 0; + qp->qpc_flags = be32_to_cpu(qpc->flags); err = get_res(dev, slave, mtt_base, RES_MTT, &mtt); if (err) @@ -2851,6 +2984,12 @@ int mlx4_GEN_EQE(struct mlx4_dev *dev, int slave, struct mlx4_eqe *eqe) if (!priv->mfunc.master.slave_state) return -EINVAL; + /* check for slave valid, slave not PF, and slave active */ + if (slave < 0 || slave >= dev->num_slaves || + slave == dev->caps.function || + !priv->mfunc.master.slave_state[slave].active) + return 0; + event_eq = &priv->mfunc.master.slave_state[slave].event_eq[eqe->type]; /* Create the event only if the slave is registered */ @@ -3288,6 +3427,15 @@ int mlx4_INIT2RTR_QP_wrapper(struct mlx4_dev *dev, int slave, { int err; struct mlx4_qp_context *qpc = inbox->buf + 8; + int qpn = vhcr->in_modifier & 0x7fffff; + struct res_qp *qp; + u8 orig_sched_queue; + __be32 orig_param3 = qpc->param3; + u8 orig_vlan_control = qpc->pri_path.vlan_control; + u8 orig_fvl_rx = qpc->pri_path.fvl_rx; + u8 orig_pri_path_fl = qpc->pri_path.fl; + u8 orig_vlan_index = qpc->pri_path.vlan_index; + u8 orig_feup = qpc->pri_path.feup; err = verify_qp_parameters(dev, inbox, QP_TRANS_INIT2RTR, slave); if (err) @@ -3299,11 +3447,40 @@ int mlx4_INIT2RTR_QP_wrapper(struct mlx4_dev *dev, int slave, update_pkey_index(dev, slave, inbox); update_gid(dev, inbox, (u8)slave); adjust_proxy_tun_qkey(dev, vhcr, qpc); - err = update_vport_qp_param(dev, inbox, slave); + orig_sched_queue = qpc->pri_path.sched_queue; + + err = get_res(dev, slave, qpn, RES_QP, &qp); if (err) return err; + if (qp->com.from_state != RES_QP_HW) { + err = -EBUSY; + goto out; + } - return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd); + /* do not modify vport QP params for RSS QPs */ + if (!(qp->qpc_flags & (1 << MLX4_RSS_QPC_FLAG_OFFSET))) { + err = update_vport_qp_param(dev, inbox, slave, qpn); + if (err) + goto out; + } + + err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); +out: + /* if no error, save sched queue value passed in by VF. This is + * essentially the QOS value provided by the VF. This will be useful + * if we allow dynamic changes from VST back to VGT + */ + if (!err) { + qp->sched_queue = orig_sched_queue; + qp->param3 = orig_param3; + qp->vlan_control = orig_vlan_control; + qp->fvl_rx = orig_fvl_rx; + qp->pri_path_fl = orig_pri_path_fl; + qp->vlan_index = orig_vlan_index; + qp->feup = orig_feup; + } + put_res(dev, slave, qpn, RES_QP); + return err; } int mlx4_RTR2RTS_QP_wrapper(struct mlx4_dev *dev, int slave, @@ -3439,7 +3616,7 @@ static struct res_gid *find_gid(struct mlx4_dev *dev, int slave, static int add_mcg_res(struct mlx4_dev *dev, int slave, struct res_qp *rqp, u8 *gid, enum mlx4_protocol prot, - enum mlx4_steer_type steer) + enum mlx4_steer_type steer, u64 reg_id) { struct res_gid *res; int err; @@ -3456,6 +3633,7 @@ static int add_mcg_res(struct mlx4_dev *dev, int slave, struct res_qp *rqp, memcpy(res->gid, gid, 16); res->prot = prot; res->steer = steer; + res->reg_id = reg_id; list_add_tail(&res->list, &rqp->mcg_list); err = 0; } @@ -3466,7 +3644,7 @@ static int add_mcg_res(struct mlx4_dev *dev, int slave, struct res_qp *rqp, static int rem_mcg_res(struct mlx4_dev *dev, int slave, struct res_qp *rqp, u8 *gid, enum mlx4_protocol prot, - enum mlx4_steer_type steer) + enum mlx4_steer_type steer, u64 *reg_id) { struct res_gid *res; int err; @@ -3476,6 +3654,7 @@ static int rem_mcg_res(struct mlx4_dev *dev, int slave, struct res_qp *rqp, if (!res || res->prot != prot || res->steer != steer) err = -EINVAL; else { + *reg_id = res->reg_id; list_del(&res->list); kfree(res); err = 0; @@ -3485,6 +3664,37 @@ static int rem_mcg_res(struct mlx4_dev *dev, int slave, struct res_qp *rqp, return err; } +static int qp_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], + int block_loopback, enum mlx4_protocol prot, + enum mlx4_steer_type type, u64 *reg_id) +{ + switch (dev->caps.steering_mode) { + case MLX4_STEERING_MODE_DEVICE_MANAGED: + return mlx4_trans_to_dmfs_attach(dev, qp, gid, gid[5], + block_loopback, prot, + reg_id); + case MLX4_STEERING_MODE_B0: + return mlx4_qp_attach_common(dev, qp, gid, + block_loopback, prot, type); + default: + return -EINVAL; + } +} + +static int qp_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], + enum mlx4_protocol prot, enum mlx4_steer_type type, + u64 reg_id) +{ + switch (dev->caps.steering_mode) { + case MLX4_STEERING_MODE_DEVICE_MANAGED: + return mlx4_flow_detach(dev, reg_id); + case MLX4_STEERING_MODE_B0: + return mlx4_qp_detach_common(dev, qp, gid, prot, type); + default: + return -EINVAL; + } +} + int mlx4_QP_ATTACH_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, @@ -3497,6 +3707,7 @@ int mlx4_QP_ATTACH_wrapper(struct mlx4_dev *dev, int slave, int err; int qpn; struct res_qp *rqp; + u64 reg_id = 0; int attach = vhcr->op_modifier; int block_loopback = vhcr->in_modifier >> 31; u8 steer_type_mask = 2; @@ -3509,30 +3720,32 @@ int mlx4_QP_ATTACH_wrapper(struct mlx4_dev *dev, int slave, qp.qpn = qpn; if (attach) { - err = add_mcg_res(dev, slave, rqp, gid, prot, type); - if (err) + err = qp_attach(dev, &qp, gid, block_loopback, prot, + type, ®_id); + if (err) { + pr_err("Fail to attach rule to qp 0x%x\n", qpn); goto ex_put; - - err = mlx4_qp_attach_common(dev, &qp, gid, - block_loopback, prot, type); + } + err = add_mcg_res(dev, slave, rqp, gid, prot, type, reg_id); if (err) - goto ex_rem; + goto ex_detach; } else { - err = rem_mcg_res(dev, slave, rqp, gid, prot, type); + err = rem_mcg_res(dev, slave, rqp, gid, prot, type, ®_id); if (err) goto ex_put; - err = mlx4_qp_detach_common(dev, &qp, gid, prot, type); - } + err = qp_detach(dev, &qp, gid, prot, type, reg_id); + if (err) + pr_err("Fail to detach rule from qp 0x%x reg_id = 0x%llx\n", + qpn, (unsigned long long)reg_id); + } put_res(dev, slave, qpn, RES_QP); - return 0; + return err; -ex_rem: - /* ignore error return below, already in error */ - (void) rem_mcg_res(dev, slave, rqp, gid, prot, type); +ex_detach: + qp_detach(dev, &qp, gid, prot, type, reg_id); ex_put: put_res(dev, slave, qpn, RES_QP); - return err; } @@ -3540,7 +3753,6 @@ ex_put: * MAC validation for Flow Steering rules. * VF can attach rules only with a mac address which is assigned to it. */ - static int validate_eth_header_mac(int slave, struct _rule_hw *eth_header, struct list_head *rlist) { @@ -3633,6 +3845,8 @@ int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; struct list_head *rlist = &tracker->slave_list[slave].res_list[RES_MAC]; int err; + int qpn; + struct res_qp *rqp; struct mlx4_net_trans_rule_hw_ctrl *ctrl; struct _rule_hw *rule_header; int header_id; @@ -3642,13 +3856,21 @@ int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave, return -EOPNOTSUPP; ctrl = (struct mlx4_net_trans_rule_hw_ctrl *)inbox->buf; + qpn = be32_to_cpu(ctrl->qpn) & 0xffffff; + err = get_res(dev, slave, qpn, RES_QP, &rqp); + if (err) { + pr_err("Steering rule with qpn 0x%x rejected.\n", qpn); + return err; + } rule_header = (struct _rule_hw *)(ctrl + 1); header_id = map_hw_to_sw_id(be16_to_cpu(rule_header->id)); switch (header_id) { case MLX4_NET_TRANS_RULE_ID_ETH: - if (validate_eth_header_mac(slave, rule_header, rlist)) - return -EINVAL; + if (validate_eth_header_mac(slave, rule_header, rlist)) { + err = -EINVAL; + goto err_put; + } break; case MLX4_NET_TRANS_RULE_ID_IB: break; @@ -3656,14 +3878,17 @@ int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave, case MLX4_NET_TRANS_RULE_ID_TCP: case MLX4_NET_TRANS_RULE_ID_UDP: pr_warn("Can't attach FS rule without L2 headers, adding L2 header.\n"); - if (add_eth_header(dev, slave, inbox, rlist, header_id)) - return -EINVAL; + if (add_eth_header(dev, slave, inbox, rlist, header_id)) { + err = -EINVAL; + goto err_put; + } vhcr->in_modifier += sizeof(struct mlx4_net_trans_rule_hw_eth) >> 2; break; default: pr_err("Corrupted mailbox.\n"); - return -EINVAL; + err = -EINVAL; + goto err_put; } err = mlx4_cmd_imm(dev, inbox->dma, &vhcr->out_param, @@ -3671,16 +3896,20 @@ int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave, MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (err) - return err; + goto err_put; - err = add_res_range(dev, slave, vhcr->out_param, 1, RES_FS_RULE, 0); + err = add_res_range(dev, slave, vhcr->out_param, 1, RES_FS_RULE, qpn); if (err) { mlx4_err(dev, "Fail to add flow steering resources.\n "); /* detach rule*/ mlx4_cmd(dev, vhcr->out_param, 0, 0, - MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A, + MLX4_QP_FLOW_STEERING_DETACH, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); + goto err_put; } + atomic_inc(&rqp->ref_count); +err_put: + put_res(dev, slave, qpn, RES_QP); return err; } @@ -3691,20 +3920,38 @@ int mlx4_QP_FLOW_STEERING_DETACH_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_cmd_info *cmd) { int err; + struct res_qp *rqp; + struct res_fs_rule *rrule; if (dev->caps.steering_mode != MLX4_STEERING_MODE_DEVICE_MANAGED) return -EOPNOTSUPP; - err = rem_res_range(dev, slave, vhcr->in_param, 1, RES_FS_RULE, 0); - if (err) { - mlx4_err(dev, "Fail to remove flow steering resources.\n "); + err = get_res(dev, slave, vhcr->in_param, RES_FS_RULE, &rrule); + if (err) + return err; + /* Release the rule form busy state before removal */ + put_res(dev, slave, vhcr->in_param, RES_FS_RULE); + err = get_res(dev, slave, rrule->qpn, RES_QP, &rqp); + if (err) return err; - } err = mlx4_cmd(dev, vhcr->in_param, 0, 0, MLX4_QP_FLOW_STEERING_DETACH, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); + if (!err) { + err = rem_res_range(dev, slave, vhcr->in_param, 1, RES_FS_RULE, + 0); + atomic_dec(&rqp->ref_count); + + if (err) { + mlx4_err(dev, "Fail to remove flow steering resources.\n "); + goto out; + } + } + +out: + put_res(dev, slave, rrule->qpn, RES_QP); return err; } @@ -3719,14 +3966,9 @@ int mlx4_QUERY_IF_STAT_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_cmd_info *cmd) { int err; - int index = vhcr->in_modifier & 0xffff; - - err = get_res(dev, slave, index, RES_COUNTER, NULL); - if (err) - return err; err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); - put_res(dev, slave, index, RES_COUNTER); + return err; } @@ -3737,9 +3979,16 @@ static void detach_qp(struct mlx4_dev *dev, int slave, struct res_qp *rqp) struct mlx4_qp qp; /* dummy for calling attach/detach */ list_for_each_entry_safe(rgid, tmp, &rqp->mcg_list, list) { - qp.qpn = rqp->local_qpn; - (void) mlx4_qp_detach_common(dev, &qp, rgid->gid, rgid->prot, - rgid->steer); + switch (dev->caps.steering_mode) { + case MLX4_STEERING_MODE_DEVICE_MANAGED: + mlx4_flow_detach(dev, rgid->reg_id); + break; + case MLX4_STEERING_MODE_B0: + qp.qpn = rqp->local_qpn; + (void) mlx4_qp_detach_common(dev, &qp, rgid->gid, + rgid->prot, rgid->steer); + break; + } list_del(&rgid->list); kfree(rgid); } @@ -3766,7 +4015,7 @@ static int _move_all_busy(struct mlx4_dev *dev, int slave, mlx4_dbg(dev, "%s id 0x%llx is busy\n", ResourceType(type), - (long long)r->res_id); + (unsigned long long)r->res_id); ++busy; } else { r->from_state = r->state; @@ -3834,6 +4083,11 @@ static void rem_slave_qps(struct mlx4_dev *dev, int slave) &tracker->res_tree[RES_QP]); list_del(&qp->com.list); spin_unlock_irq(mlx4_tlock(dev)); + if (!valid_reserved(dev, slave, qpn)) { + __mlx4_qp_release_range(dev, qpn, 1); + mlx4_release_resource(dev, slave, + RES_QP, 1, 0); + } kfree(qp); state = 0; break; @@ -3905,6 +4159,8 @@ static void rem_slave_srqs(struct mlx4_dev *dev, int slave) &tracker->res_tree[RES_SRQ]); list_del(&srq->com.list); spin_unlock_irq(mlx4_tlock(dev)); + mlx4_release_resource(dev, slave, + RES_SRQ, 1, 0); kfree(srq); state = 0; break; @@ -3971,6 +4227,8 @@ static void rem_slave_cqs(struct mlx4_dev *dev, int slave) &tracker->res_tree[RES_CQ]); list_del(&cq->com.list); spin_unlock_irq(mlx4_tlock(dev)); + mlx4_release_resource(dev, slave, + RES_CQ, 1, 0); kfree(cq); state = 0; break; @@ -4028,18 +4286,20 @@ static void rem_slave_mrs(struct mlx4_dev *dev, int slave) while (state != 0) { switch (state) { case RES_MPT_RESERVED: - __mlx4_mr_release(dev, mpt->key); + __mlx4_mpt_release(dev, mpt->key); spin_lock_irq(mlx4_tlock(dev)); rb_erase(&mpt->com.node, &tracker->res_tree[RES_MPT]); list_del(&mpt->com.list); spin_unlock_irq(mlx4_tlock(dev)); + mlx4_release_resource(dev, slave, + RES_MPT, 1, 0); kfree(mpt); state = 0; break; case RES_MPT_MAPPED: - __mlx4_mr_free_icm(dev, mpt->key); + __mlx4_mpt_free_icm(dev, mpt->key); state = RES_MPT_RESERVED; break; @@ -4103,6 +4363,8 @@ static void rem_slave_mtts(struct mlx4_dev *dev, int slave) &tracker->res_tree[RES_MTT]); list_del(&mtt->com.list); spin_unlock_irq(mlx4_tlock(dev)); + mlx4_release_resource(dev, slave, RES_MTT, + 1 << mtt->order, 0); kfree(mtt); state = 0; break; @@ -4238,32 +4500,7 @@ static void rem_slave_eqs(struct mlx4_dev *dev, int slave) static void rem_slave_counters(struct mlx4_dev *dev, int slave) { - struct mlx4_priv *priv = mlx4_priv(dev); - struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; - struct list_head *counter_list = - &tracker->slave_list[slave].res_list[RES_COUNTER]; - struct res_counter *counter; - struct res_counter *tmp; - int err; - int index; - - err = move_all_busy(dev, slave, RES_COUNTER); - if (err) - mlx4_warn(dev, "rem_slave_counters: Could not move all counters to " - "busy for slave %d\n", slave); - - spin_lock_irq(mlx4_tlock(dev)); - list_for_each_entry_safe(counter, tmp, counter_list, com.list) { - if (counter->com.owner == slave) { - index = counter->com.res_id; - rb_erase(&counter->com.node, - &tracker->res_tree[RES_COUNTER]); - list_del(&counter->com.list); - kfree(counter); - __mlx4_counter_free(dev, index); - } - } - spin_unlock_irq(mlx4_tlock(dev)); + __mlx4_slave_counters_free(dev, slave); } static void rem_slave_xrcdns(struct mlx4_dev *dev, int slave) @@ -4302,6 +4539,7 @@ void mlx4_delete_all_resources_for_slave(struct mlx4_dev *dev, int slave) mutex_lock(&priv->mfunc.master.res_tracker.slave_list[slave].mutex); rem_slave_macs(dev, slave); rem_slave_vlans(dev, slave); + rem_slave_fs_rule(dev, slave); rem_slave_qps(dev, slave); rem_slave_srqs(dev, slave); rem_slave_cqs(dev, slave); @@ -4310,6 +4548,139 @@ void mlx4_delete_all_resources_for_slave(struct mlx4_dev *dev, int slave) rem_slave_mtts(dev, slave); rem_slave_counters(dev, slave); rem_slave_xrcdns(dev, slave); - rem_slave_fs_rule(dev, slave); mutex_unlock(&priv->mfunc.master.res_tracker.slave_list[slave].mutex); } + +void mlx4_vf_immed_vlan_work_handler(struct work_struct *_work) +{ + struct mlx4_vf_immed_vlan_work *work = + container_of(_work, struct mlx4_vf_immed_vlan_work, work); + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_update_qp_context *upd_context; + struct mlx4_dev *dev = &work->priv->dev; + struct mlx4_resource_tracker *tracker = + &work->priv->mfunc.master.res_tracker; + struct list_head *qp_list = + &tracker->slave_list[work->slave].res_list[RES_QP]; + struct res_qp *qp; + struct res_qp *tmp; + u64 qp_path_mask_vlan_ctrl = + ((1ULL << MLX4_UPD_QP_PATH_MASK_ETH_TX_BLOCK_UNTAGGED) | + (1ULL << MLX4_UPD_QP_PATH_MASK_ETH_TX_BLOCK_1P) | + (1ULL << MLX4_UPD_QP_PATH_MASK_ETH_TX_BLOCK_TAGGED) | + (1ULL << MLX4_UPD_QP_PATH_MASK_ETH_RX_BLOCK_UNTAGGED) | + (1ULL << MLX4_UPD_QP_PATH_MASK_ETH_RX_BLOCK_1P) | + (1ULL << MLX4_UPD_QP_PATH_MASK_ETH_RX_BLOCK_TAGGED)); + + u64 qp_path_mask = ((1ULL << MLX4_UPD_QP_PATH_MASK_VLAN_INDEX) | + (1ULL << MLX4_UPD_QP_PATH_MASK_FVL) | + (1ULL << MLX4_UPD_QP_PATH_MASK_CV) | + (1ULL << MLX4_UPD_QP_PATH_MASK_ETH_HIDE_CQE_VLAN) | + (1ULL << MLX4_UPD_QP_PATH_MASK_FEUP) | + (1ULL << MLX4_UPD_QP_PATH_MASK_FVL_RX) | + (1ULL << MLX4_UPD_QP_PATH_MASK_SCHED_QUEUE)); + + int err; + int port, errors = 0; + u8 vlan_control; + + if (mlx4_is_slave(dev)) { + mlx4_warn(dev, "Trying to update-qp in slave %d\n", + work->slave); + goto out; + } + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + goto out; + + if (!work->vlan_id) + vlan_control = MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED | + MLX4_VLAN_CTRL_ETH_RX_BLOCK_TAGGED; + else + vlan_control = MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED | + MLX4_VLAN_CTRL_ETH_RX_BLOCK_PRIO_TAGGED | + MLX4_VLAN_CTRL_ETH_RX_BLOCK_UNTAGGED; + + upd_context = mailbox->buf; + upd_context->qp_mask = cpu_to_be64(MLX4_UPD_QP_MASK_VSD); + + spin_lock_irq(mlx4_tlock(dev)); + list_for_each_entry_safe(qp, tmp, qp_list, com.list) { + spin_unlock_irq(mlx4_tlock(dev)); + if (qp->com.owner == work->slave) { + if (qp->com.from_state != RES_QP_HW || + !qp->sched_queue || /* no INIT2RTR trans yet */ + mlx4_is_qp_reserved(dev, qp->local_qpn) || + qp->qpc_flags & (1 << MLX4_RSS_QPC_FLAG_OFFSET)) { + spin_lock_irq(mlx4_tlock(dev)); + continue; + } + port = (qp->sched_queue >> 6 & 1) + 1; + if (port != work->port) { + spin_lock_irq(mlx4_tlock(dev)); + continue; + } + if (MLX4_QP_ST_RC == ((qp->qpc_flags >> 16) & 0xff)) + upd_context->primary_addr_path_mask = cpu_to_be64(qp_path_mask); + else + upd_context->primary_addr_path_mask = + cpu_to_be64(qp_path_mask | qp_path_mask_vlan_ctrl); + if (work->vlan_id == MLX4_VGT) { + upd_context->qp_context.param3 = qp->param3; + upd_context->qp_context.pri_path.vlan_control = qp->vlan_control; + upd_context->qp_context.pri_path.fvl_rx = qp->fvl_rx; + upd_context->qp_context.pri_path.vlan_index = qp->vlan_index; + upd_context->qp_context.pri_path.fl = qp->pri_path_fl; + upd_context->qp_context.pri_path.feup = qp->feup; + upd_context->qp_context.pri_path.sched_queue = + qp->sched_queue; + } else { + upd_context->qp_context.param3 = qp->param3 & ~cpu_to_be32(MLX4_STRIP_VLAN); + upd_context->qp_context.pri_path.vlan_control = vlan_control; + upd_context->qp_context.pri_path.vlan_index = work->vlan_ix; + upd_context->qp_context.pri_path.fvl_rx = + qp->fvl_rx | MLX4_FVL_RX_FORCE_ETH_VLAN; + upd_context->qp_context.pri_path.fl = + qp->pri_path_fl | MLX4_FL_CV | MLX4_FL_ETH_HIDE_CQE_VLAN; + upd_context->qp_context.pri_path.feup = + qp->feup | MLX4_FEUP_FORCE_ETH_UP | MLX4_FVL_FORCE_ETH_VLAN; + upd_context->qp_context.pri_path.sched_queue = + qp->sched_queue & 0xC7; + upd_context->qp_context.pri_path.sched_queue |= + ((work->qos & 0x7) << 3); + } + + err = mlx4_cmd(dev, mailbox->dma, + qp->local_qpn & 0xffffff, + 0, MLX4_CMD_UPDATE_QP, + MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE); + if (err) { + mlx4_info(dev, "UPDATE_QP failed for slave %d, " + "port %d, qpn %d (%d)\n", + work->slave, port, qp->local_qpn, + err); + errors++; + } + } + spin_lock_irq(mlx4_tlock(dev)); + } + spin_unlock_irq(mlx4_tlock(dev)); + mlx4_free_cmd_mailbox(dev, mailbox); + + if (errors) + mlx4_err(dev, "%d UPDATE_QP failures for slave %d, port %d\n", + errors, work->slave, work->port); + + /* unregister previous vlan_id if needed and we had no errors + * while updating the QPs + */ + if (work->flags & MLX4_VF_IMMED_VLAN_FLAG_VLAN && !errors && + NO_INDX != work->orig_vlan_ix) + __mlx4_unregister_vlan(&work->priv->dev, work->port, + work->orig_vlan_id); +out: + kfree(work); + return; +} + diff --git a/sys/ofed/drivers/net/mlx4/sense.c b/sys/ofed/drivers/net/mlx4/sense.c index 5e1665e..3615e65 100644 --- a/sys/ofed/drivers/net/mlx4/sense.c +++ b/sys/ofed/drivers/net/mlx4/sense.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007 Mellanox Technologies. All rights reserved. + * Copyright (c) 2007, 2014 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -53,7 +53,7 @@ int mlx4_SENSE_PORT(struct mlx4_dev *dev, int port, } if (out_param > 2) { - mlx4_err(dev, "Sense returned illegal value: 0x%llx\n", (long long)out_param); + mlx4_err(dev, "Sense returned illegal value: 0x%llx\n", (unsigned long long)out_param); return -EINVAL; } @@ -108,9 +108,8 @@ static void mlx4_sense_port(struct work_struct *work) sense_again: mutex_unlock(&priv->port_mutex); - if (sense->resched) - queue_delayed_work(sense->sense_wq , &sense->sense_poll, - round_jiffies(MLX4_SENSE_RANGE)); + queue_delayed_work(mlx4_wq , &sense->sense_poll, + round_jiffies_relative(MLX4_SENSE_RANGE)); } void mlx4_start_sense(struct mlx4_dev *dev) @@ -121,40 +120,24 @@ void mlx4_start_sense(struct mlx4_dev *dev) if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP)) return; - sense->resched = 1; - queue_delayed_work(sense->sense_wq , &sense->sense_poll, - round_jiffies(MLX4_SENSE_RANGE)); + queue_delayed_work(mlx4_wq , &sense->sense_poll, + round_jiffies_relative(MLX4_SENSE_RANGE)); } void mlx4_stop_sense(struct mlx4_dev *dev) { - mlx4_priv(dev)->sense.resched = 0; + cancel_delayed_work_sync(&mlx4_priv(dev)->sense.sense_poll); } -void mlx4_sense_cleanup(struct mlx4_dev *dev) -{ - mlx4_stop_sense(dev); - cancel_delayed_work(&mlx4_priv(dev)->sense.sense_poll); - destroy_workqueue(mlx4_priv(dev)->sense.sense_wq); -} - - -int mlx4_sense_init(struct mlx4_dev *dev) +void mlx4_sense_init(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_sense *sense = &priv->sense; int port; - sense->dev = dev; - sense->sense_wq = create_singlethread_workqueue("mlx4_sense"); - if (!sense->sense_wq) - return -ENOMEM; - for (port = 1; port <= dev->caps.num_ports; port++) sense->do_sense_port[port] = 1; INIT_DEFERRABLE_WORK(&sense->sense_poll, mlx4_sense_port); - - return 0; } diff --git a/sys/ofed/drivers/net/mlx4/srq.c b/sys/ofed/drivers/net/mlx4/srq.c index c37f682..fcb6255 100644 --- a/sys/ofed/drivers/net/mlx4/srq.c +++ b/sys/ofed/drivers/net/mlx4/srq.c @@ -1,6 +1,6 @@ /* * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. + * Copyright (c) 2007, 2008, 2014 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -32,6 +32,8 @@ */ #include <linux/mlx4/cmd.h> +#include <linux/mlx4/srq.h> +#include <linux/module.h> #include <linux/gfp.h> #include "mlx4.h" @@ -113,7 +115,7 @@ err_put: mlx4_table_put(dev, &srq_table->table, *srqn); err_out: - mlx4_bitmap_free(&srq_table->bitmap, *srqn); + mlx4_bitmap_free(&srq_table->bitmap, *srqn, MLX4_NO_RR); return err; } @@ -141,7 +143,7 @@ void __mlx4_srq_free_icm(struct mlx4_dev *dev, int srqn) mlx4_table_put(dev, &srq_table->cmpt_table, srqn); mlx4_table_put(dev, &srq_table->table, srqn); - mlx4_bitmap_free(&srq_table->bitmap, srqn); + mlx4_bitmap_free(&srq_table->bitmap, srqn, MLX4_NO_RR); } static void mlx4_srq_free_icm(struct mlx4_dev *dev, int srqn) @@ -295,3 +297,18 @@ void mlx4_cleanup_srq_table(struct mlx4_dev *dev) return; mlx4_bitmap_cleanup(&mlx4_priv(dev)->srq_table.bitmap); } + +struct mlx4_srq *mlx4_srq_lookup(struct mlx4_dev *dev, u32 srqn) +{ + struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table; + struct mlx4_srq *srq; + unsigned long flags; + + spin_lock_irqsave(&srq_table->lock, flags); + srq = radix_tree_lookup(&srq_table->tree, + srqn & (dev->caps.num_srqs - 1)); + spin_unlock_irqrestore(&srq_table->lock, flags); + + return srq; +} +EXPORT_SYMBOL_GPL(mlx4_srq_lookup); diff --git a/sys/ofed/drivers/net/mlx4/sys_tune.c b/sys/ofed/drivers/net/mlx4/sys_tune.c index 0675e90..87eb30e 100644 --- a/sys/ofed/drivers/net/mlx4/sys_tune.c +++ b/sys/ofed/drivers/net/mlx4/sys_tune.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 Mellanox Technologies. All rights reserved. + * Copyright (c) 2010, 2014 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -39,8 +39,6 @@ #if defined(CONFIG_X86) && defined(CONFIG_APM_MODULE) - - /* Each CPU is put into a group. In most cases, the group number is * equal to the CPU number of one of the CPUs in the group. The * exception is group NR_CPUS which is the default group. This is diff --git a/sys/ofed/drivers/net/mlx4/utils.c b/sys/ofed/drivers/net/mlx4/utils.c new file mode 100644 index 0000000..2444ec5 --- /dev/null +++ b/sys/ofed/drivers/net/mlx4/utils.c @@ -0,0 +1,189 @@ +/* $OpenBSD: if_trunk.c,v 1.30 2007/01/31 06:20:19 reyk Exp $ */ + +/* + * Copyright (c) 2005, 2006 Reyk Floeter <reyk@openbsd.org> + * Copyright (c) 2007 Andrew Thompson <thompsa@FreeBSD.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include "opt_inet.h" +#include "opt_inet6.h" + +#include <sys/param.h> +#include <sys/kernel.h> +#include <sys/malloc.h> +#include <sys/mbuf.h> +#include <sys/queue.h> +#include <sys/socket.h> +#include <sys/sockio.h> +#include <sys/sysctl.h> +#include <sys/module.h> +#include <sys/priv.h> +#include <sys/systm.h> +#include <sys/proc.h> +#include <sys/hash.h> +#include <sys/lock.h> +#include <sys/rmlock.h> +#include <sys/taskqueue.h> +#include <sys/eventhandler.h> + +#include <net/ethernet.h> +#include <net/if.h> +#include <net/if_clone.h> +#include <net/if_arp.h> +#include <net/if_dl.h> +#include <net/if_llc.h> +#include <net/if_media.h> +#include <net/if_types.h> +#include <net/if_var.h> +#include <net/bpf.h> + +#if defined(INET) || defined(INET6) +#include <netinet/in.h> +#endif +#ifdef INET +#include <netinet/in_systm.h> +#include <netinet/if_ether.h> +#include <netinet/ip.h> +#endif + +#ifdef INET6 +#include <netinet/ip6.h> +#include <netinet6/in6_var.h> +#include <netinet6/in6_ifattach.h> +#endif + +#include <net/if_vlan_var.h> + +#include "utils.h" + +/* XXX this code should be factored out */ +/* XXX copied from if_lagg.c */ + +static const void * +mlx4_en_gethdr(struct mbuf *m, u_int off, u_int len, void *buf) +{ + if (m->m_pkthdr.len < (off + len)) { + return (NULL); + } else if (m->m_len < (off + len)) { + m_copydata(m, off, len, buf); + return (buf); + } + return (mtod(m, char *) + off); +} + +uint32_t +mlx4_en_hashmbuf(uint32_t flags, struct mbuf *m, uint32_t key) +{ + uint16_t etype; + uint32_t p = key; + int off; + struct ether_header *eh; + const struct ether_vlan_header *vlan; +#ifdef INET + const struct ip *ip; + const uint32_t *ports; + int iphlen; +#endif +#ifdef INET6 + const struct ip6_hdr *ip6; + uint32_t flow; +#endif + union { +#ifdef INET + struct ip ip; +#endif +#ifdef INET6 + struct ip6_hdr ip6; +#endif + struct ether_vlan_header vlan; + uint32_t port; + } buf; + + + off = sizeof(*eh); + if (m->m_len < off) + goto out; + eh = mtod(m, struct ether_header *); + etype = ntohs(eh->ether_type); + if (flags & MLX4_F_HASHL2) { + p = hash32_buf(&eh->ether_shost, ETHER_ADDR_LEN, p); + p = hash32_buf(&eh->ether_dhost, ETHER_ADDR_LEN, p); + } + + /* Special handling for encapsulating VLAN frames */ + if ((m->m_flags & M_VLANTAG) && (flags & MLX4_F_HASHL2)) { + p = hash32_buf(&m->m_pkthdr.ether_vtag, + sizeof(m->m_pkthdr.ether_vtag), p); + } else if (etype == ETHERTYPE_VLAN) { + vlan = mlx4_en_gethdr(m, off, sizeof(*vlan), &buf); + if (vlan == NULL) + goto out; + + if (flags & MLX4_F_HASHL2) + p = hash32_buf(&vlan->evl_tag, sizeof(vlan->evl_tag), p); + etype = ntohs(vlan->evl_proto); + off += sizeof(*vlan) - sizeof(*eh); + } + + switch (etype) { +#ifdef INET + case ETHERTYPE_IP: + ip = mlx4_en_gethdr(m, off, sizeof(*ip), &buf); + if (ip == NULL) + goto out; + + if (flags & MLX4_F_HASHL3) { + p = hash32_buf(&ip->ip_src, sizeof(struct in_addr), p); + p = hash32_buf(&ip->ip_dst, sizeof(struct in_addr), p); + } + if (!(flags & MLX4_F_HASHL4)) + break; + switch (ip->ip_p) { + case IPPROTO_TCP: + case IPPROTO_UDP: + case IPPROTO_SCTP: + iphlen = ip->ip_hl << 2; + if (iphlen < sizeof(*ip)) + break; + off += iphlen; + ports = mlx4_en_gethdr(m, off, sizeof(*ports), &buf); + if (ports == NULL) + break; + p = hash32_buf(ports, sizeof(*ports), p); + break; + } + break; +#endif +#ifdef INET6 + case ETHERTYPE_IPV6: + if (!(flags & MLX4_F_HASHL3)) + break; + ip6 = mlx4_en_gethdr(m, off, sizeof(*ip6), &buf); + if (ip6 == NULL) + goto out; + + p = hash32_buf(&ip6->ip6_src, sizeof(struct in6_addr), p); + p = hash32_buf(&ip6->ip6_dst, sizeof(struct in6_addr), p); + flow = ip6->ip6_flow & IPV6_FLOWLABEL_MASK; + p = hash32_buf(&flow, sizeof(flow), p); /* IPv6 flow label */ + break; +#endif + } +out: + return (p); +} diff --git a/sys/ofed/drivers/net/mlx4/utils.h b/sys/ofed/drivers/net/mlx4/utils.h new file mode 100644 index 0000000..51a654c --- /dev/null +++ b/sys/ofed/drivers/net/mlx4/utils.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2014 Mellanox Technologies Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _MLX4_UTILS_H_ +#define _MLX4_UTILS_H_ + +/* Lagg flags */ +#define MLX4_F_HASHL2 0x00000001 /* hash layer 2 */ +#define MLX4_F_HASHL3 0x00000002 /* hash layer 3 */ +#define MLX4_F_HASHL4 0x00000004 /* hash layer 4 */ +#define MLX4_F_HASHMASK 0x00000007 + +uint32_t mlx4_en_hashmbuf(uint32_t flags, struct mbuf *m, uint32_t key); + +#endif /* _MLX4_UTILS_H_ */ |