summaryrefslogtreecommitdiffstats
path: root/sys/dev/mlx5/mlx5_en
diff options
context:
space:
mode:
authorhselasky <hselasky@FreeBSD.org>2015-11-10 12:20:22 +0000
committerhselasky <hselasky@FreeBSD.org>2015-11-10 12:20:22 +0000
commit5c8e2d7ca19ebbae70a9f0d49ee07477f4be5aeb (patch)
tree7c4ab6c945e4031786f0bbf3a76cb0ea6f5d5167 /sys/dev/mlx5/mlx5_en
parent60024cd7aad6a43a5dd84afd90b915b12449c421 (diff)
downloadFreeBSD-src-5c8e2d7ca19ebbae70a9f0d49ee07477f4be5aeb.zip
FreeBSD-src-5c8e2d7ca19ebbae70a9f0d49ee07477f4be5aeb.tar.gz
Add mlx5 and mlx5en driver(s) for ConnectX-4 and ConnectX-4LX cards
from Mellanox Technologies. The current driver supports ethernet speeds up to and including 100 GBit/s. Infiniband support will be done later. The code added is not compiled by default, which will be done by a separate commit. Sponsored by: Mellanox Technologies MFC after: 2 weeks
Diffstat (limited to 'sys/dev/mlx5/mlx5_en')
-rw-r--r--sys/dev/mlx5/mlx5_en/en.h781
-rw-r--r--sys/dev/mlx5/mlx5_en/mlx5_en_ethtool.c493
-rw-r--r--sys/dev/mlx5/mlx5_en/mlx5_en_flow_table.c870
-rw-r--r--sys/dev/mlx5/mlx5_en/mlx5_en_main.c2902
-rw-r--r--sys/dev/mlx5/mlx5_en/mlx5_en_rx.c340
-rw-r--r--sys/dev/mlx5/mlx5_en/mlx5_en_tx.c485
-rw-r--r--sys/dev/mlx5/mlx5_en/mlx5_en_txrx.c58
-rw-r--r--sys/dev/mlx5/mlx5_en/tcp_tlro.c697
-rw-r--r--sys/dev/mlx5/mlx5_en/tcp_tlro.h83
9 files changed, 6709 insertions, 0 deletions
diff --git a/sys/dev/mlx5/mlx5_en/en.h b/sys/dev/mlx5/mlx5_en/en.h
new file mode 100644
index 0000000..2988db3
--- /dev/null
+++ b/sys/dev/mlx5/mlx5_en/en.h
@@ -0,0 +1,781 @@
+/*-
+ * Copyright (c) 2015 Mellanox Technologies. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _MLX5_EN_H_
+#define _MLX5_EN_H_
+
+#include <linux/kmod.h>
+#include <linux/page.h>
+#include <linux/slab.h>
+#include <linux/if_vlan.h>
+#include <linux/if_ether.h>
+#include <linux/vmalloc.h>
+#include <linux/moduleparam.h>
+#include <linux/delay.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+
+#include <netinet/in_systm.h>
+#include <netinet/in.h>
+#include <netinet/if_ether.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_lro.h>
+#include <netinet/udp.h>
+#include <net/ethernet.h>
+#include <sys/buf_ring.h>
+
+#include <machine/bus.h>
+
+#ifdef HAVE_TURBO_LRO
+#include "tcp_tlro.h"
+#endif
+
+#include <dev/mlx5/driver.h>
+#include <dev/mlx5/qp.h>
+#include <dev/mlx5/cq.h>
+#include <dev/mlx5/vport.h>
+
+#include <dev/mlx5/mlx5_core/wq.h>
+#include <dev/mlx5/mlx5_core/transobj.h>
+#include <dev/mlx5/mlx5_core/mlx5_core.h>
+
+#define MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE 0x7
+#define MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE 0xa
+#define MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE 0xd
+
+#define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE 0x7
+#define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE 0xa
+#define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE 0xd
+
+/* freeBSD HW LRO is limited by 16KB - the size of max mbuf */
+#define MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ MJUM16BYTES
+#define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC 0x10
+#define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE 0x3
+#define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS 0x20
+#define MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC 0x10
+#define MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS 0x20
+#define MLX5E_PARAMS_DEFAULT_MIN_RX_WQES 0x80
+#define MLX5E_PARAMS_DEFAULT_RX_HASH_LOG_TBL_SZ 0x7
+#define MLX5E_CACHELINE_SIZE CACHE_LINE_SIZE
+#define MLX5E_HW2SW_MTU(hwmtu) \
+ ((hwmtu) - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + ETHER_CRC_LEN))
+#define MLX5E_SW2HW_MTU(swmtu) \
+ ((swmtu) + (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + ETHER_CRC_LEN))
+#define MLX5E_SW2MB_MTU(swmtu) \
+ (MLX5E_SW2HW_MTU(swmtu) + MLX5E_NET_IP_ALIGN)
+#define MLX5E_MTU_MIN 72 /* Min MTU allowed by the kernel */
+#define MLX5E_MTU_MAX MIN(ETHERMTU_JUMBO, MJUM16BYTES) /* Max MTU of Ethernet jumbo frames */
+
+#define MLX5E_BUDGET_MAX 8192 /* RX and TX */
+#define MLX5E_RX_BUDGET_MAX 256
+#define MLX5E_SQ_BF_BUDGET 16
+#define MLX5E_SQ_TX_QUEUE_SIZE 4096 /* SQ drbr queue size */
+
+#define MLX5E_MAX_TX_NUM_TC 8 /* units */
+#define MLX5E_MAX_TX_HEADER 128 /* bytes */
+#define MLX5E_MAX_TX_PAYLOAD_SIZE 65536 /* bytes */
+#define MLX5E_MAX_TX_MBUF_SIZE 65536 /* bytes */
+#define MLX5E_MAX_TX_MBUF_FRAGS \
+ ((MLX5_SEND_WQE_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS) - \
+ (MLX5E_MAX_TX_HEADER / MLX5_SEND_WQE_DS)) /* units */
+#define MLX5E_MAX_TX_INLINE \
+ (MLX5E_MAX_TX_HEADER - sizeof(struct mlx5e_tx_wqe) + \
+ sizeof(((struct mlx5e_tx_wqe *)0)->eth.inline_hdr_start)) /* bytes */
+
+MALLOC_DECLARE(M_MLX5EN);
+
+struct mlx5_core_dev;
+struct mlx5e_cq;
+
+typedef void (mlx5e_cq_comp_t)(struct mlx5_core_cq *);
+
+#define MLX5E_STATS_COUNT(a,b,c,d) a
+#define MLX5E_STATS_VAR(a,b,c,d) b;
+#define MLX5E_STATS_DESC(a,b,c,d) c, d,
+
+#define MLX5E_VPORT_STATS(m) \
+ /* HW counters */ \
+ m(+1, u64 rx_packets, "rx_packets", "Received packets") \
+ m(+1, u64 rx_bytes, "rx_bytes", "Received bytes") \
+ m(+1, u64 tx_packets, "tx_packets", "Transmitted packets") \
+ m(+1, u64 tx_bytes, "tx_bytes", "Transmitted bytes") \
+ m(+1, u64 rx_error_packets, "rx_error_packets", "Received error packets") \
+ m(+1, u64 rx_error_bytes, "rx_error_bytes", "Received error bytes") \
+ m(+1, u64 tx_error_packets, "tx_error_packets", "Transmitted error packets") \
+ m(+1, u64 tx_error_bytes, "tx_error_bytes", "Transmitted error bytes") \
+ m(+1, u64 rx_unicast_packets, "rx_unicast_packets", "Received unicast packets") \
+ m(+1, u64 rx_unicast_bytes, "rx_unicast_bytes", "Received unicast bytes") \
+ m(+1, u64 tx_unicast_packets, "tx_unicast_packets", "Transmitted unicast packets") \
+ m(+1, u64 tx_unicast_bytes, "tx_unicast_bytes", "Transmitted unicast bytes") \
+ m(+1, u64 rx_multicast_packets, "rx_multicast_packets", "Received multicast packets") \
+ m(+1, u64 rx_multicast_bytes, "rx_multicast_bytes", "Received multicast bytes") \
+ m(+1, u64 tx_multicast_packets, "tx_multicast_packets", "Transmitted multicast packets") \
+ m(+1, u64 tx_multicast_bytes, "tx_multicast_bytes", "Transmitted multicast bytes") \
+ m(+1, u64 rx_broadcast_packets, "rx_broadcast_packets", "Received broadcast packets") \
+ m(+1, u64 rx_broadcast_bytes, "rx_broadcast_bytes", "Received broadcast bytes") \
+ m(+1, u64 tx_broadcast_packets, "tx_broadcast_packets", "Transmitted broadcast packets") \
+ m(+1, u64 tx_broadcast_bytes, "tx_broadcast_bytes", "Transmitted broadcast bytes") \
+ /* SW counters */ \
+ m(+1, u64 tso_packets, "tso_packets", "Transmitted TSO packets") \
+ m(+1, u64 tso_bytes, "tso_bytes", "Transmitted TSO bytes") \
+ m(+1, u64 lro_packets, "lro_packets", "Received LRO packets") \
+ m(+1, u64 lro_bytes, "lro_bytes", "Received LRO bytes") \
+ m(+1, u64 sw_lro_queued, "sw_lro_queued", "Packets queued for SW LRO") \
+ m(+1, u64 sw_lro_flushed, "sw_lro_flushed", "Packets flushed from SW LRO") \
+ m(+1, u64 rx_csum_good, "rx_csum_good", "Received checksum valid packets") \
+ m(+1, u64 rx_csum_none, "rx_csum_none", "Received no checksum packets") \
+ m(+1, u64 tx_csum_offload, "tx_csum_offload", "Transmit checksum offload packets") \
+ m(+1, u64 tx_queue_dropped, "tx_queue_dropped", "Transmit queue dropped") \
+ m(+1, u64 tx_defragged, "tx_defragged", "Transmit queue defragged") \
+ m(+1, u64 rx_wqe_err, "rx_wqe_err", "Receive WQE errors")
+
+#define MLX5E_VPORT_STATS_NUM (0 MLX5E_VPORT_STATS(MLX5E_STATS_COUNT))
+
+struct mlx5e_vport_stats {
+ struct sysctl_ctx_list ctx;
+ u64 arg [0];
+ MLX5E_VPORT_STATS(MLX5E_STATS_VAR)
+};
+
+#define MLX5E_PPORT_IEEE802_3_STATS(m) \
+ m(+1, u64 frames_tx, "frames_tx", "Frames transmitted") \
+ m(+1, u64 frames_rx, "frames_rx", "Frames received") \
+ m(+1, u64 check_seq_err, "check_seq_err", "Sequence errors") \
+ m(+1, u64 alignment_err, "alignment_err", "Alignment errors") \
+ m(+1, u64 octets_tx, "octets_tx", "Bytes transmitted") \
+ m(+1, u64 octets_received, "octets_received", "Bytes received") \
+ m(+1, u64 multicast_xmitted, "multicast_xmitted", "Multicast transmitted") \
+ m(+1, u64 broadcast_xmitted, "broadcast_xmitted", "Broadcast transmitted") \
+ m(+1, u64 multicast_rx, "multicast_rx", "Multicast received") \
+ m(+1, u64 broadcast_rx, "broadcast_rx", "Broadcast received") \
+ m(+1, u64 in_range_len_errors, "in_range_len_errors", "In range length errors") \
+ m(+1, u64 out_of_range_len, "out_of_range_len", "Out of range length errors") \
+ m(+1, u64 too_long_errors, "too_long_errors", "Too long errors") \
+ m(+1, u64 symbol_err, "symbol_err", "Symbol errors") \
+ m(+1, u64 mac_control_tx, "mac_control_tx", "MAC control transmitted") \
+ m(+1, u64 mac_control_rx, "mac_control_rx", "MAC control received") \
+ m(+1, u64 unsupported_op_rx, "unsupported_op_rx", "Unsupported operation received") \
+ m(+1, u64 pause_ctrl_rx, "pause_ctrl_rx", "Pause control received") \
+ m(+1, u64 pause_ctrl_tx, "pause_ctrl_tx", "Pause control transmitted")
+
+#define MLX5E_PPORT_RFC2819_STATS(m) \
+ m(+1, u64 drop_events, "drop_events", "Dropped events") \
+ m(+1, u64 octets, "octets", "Octets") \
+ m(+1, u64 pkts, "pkts", "Packets") \
+ m(+1, u64 broadcast_pkts, "broadcast_pkts", "Broadcast packets") \
+ m(+1, u64 multicast_pkts, "multicast_pkts", "Multicast packets") \
+ m(+1, u64 crc_align_errors, "crc_align_errors", "CRC alignment errors") \
+ m(+1, u64 undersize_pkts, "undersize_pkts", "Undersized packets") \
+ m(+1, u64 oversize_pkts, "oversize_pkts", "Oversized packets") \
+ m(+1, u64 fragments, "fragments", "Fragments") \
+ m(+1, u64 jabbers, "jabbers", "Jabbers") \
+ m(+1, u64 collisions, "collisions", "Collisions")
+
+#define MLX5E_PPORT_RFC2819_STATS_DEBUG(m) \
+ m(+1, u64 p64octets, "p64octets", "Bytes") \
+ m(+1, u64 p65to127octets, "p65to127octets", "Bytes") \
+ m(+1, u64 p128to255octets, "p128to255octets", "Bytes") \
+ m(+1, u64 p256to511octets, "p256to511octets", "Bytes") \
+ m(+1, u64 p512to1023octets, "p512to1023octets", "Bytes") \
+ m(+1, u64 p1024to1518octets, "p1024to1518octets", "Bytes") \
+ m(+1, u64 p1519to2047octets, "p1519to2047octets", "Bytes") \
+ m(+1, u64 p2048to4095octets, "p2048to4095octets", "Bytes") \
+ m(+1, u64 p4096to8191octets, "p4096to8191octets", "Bytes") \
+ m(+1, u64 p8192to10239octets, "p8192to10239octets", "Bytes")
+
+#define MLX5E_PPORT_RFC2863_STATS_DEBUG(m) \
+ m(+1, u64 in_octets, "in_octets", "In octets") \
+ m(+1, u64 in_ucast_pkts, "in_ucast_pkts", "In unicast packets") \
+ m(+1, u64 in_discards, "in_discards", "In discards") \
+ m(+1, u64 in_errors, "in_errors", "In errors") \
+ m(+1, u64 in_unknown_protos, "in_unknown_protos", "In unknown protocols") \
+ m(+1, u64 out_octets, "out_octets", "Out octets") \
+ m(+1, u64 out_ucast_pkts, "out_ucast_pkts", "Out unicast packets") \
+ m(+1, u64 out_discards, "out_discards", "Out discards") \
+ m(+1, u64 out_errors, "out_errors", "Out errors") \
+ m(+1, u64 in_multicast_pkts, "in_multicast_pkts", "In multicast packets") \
+ m(+1, u64 in_broadcast_pkts, "in_broadcast_pkts", "In broadcast packets") \
+ m(+1, u64 out_multicast_pkts, "out_multicast_pkts", "Out multicast packets") \
+ m(+1, u64 out_broadcast_pkts, "out_broadcast_pkts", "Out broadcast packets")
+
+#define MLX5E_PPORT_PHYSICAL_LAYER_STATS_DEBUG(m) \
+ m(+1, u64 time_since_last_clear, "time_since_last_clear", \
+ "Time since the last counters clear event (msec)") \
+ m(+1, u64 symbol_errors, "symbol_errors", "Symbol errors") \
+ m(+1, u64 sync_headers_errors, "sync_headers_errors", "Sync header error counter") \
+ m(+1, u64 bip_errors_lane0, "edpl_bip_errors_lane0", \
+ "Indicates the number of PRBS errors on lane 0") \
+ m(+1, u64 bip_errors_lane1, "edpl_bip_errors_lane1", \
+ "Indicates the number of PRBS errors on lane 1") \
+ m(+1, u64 bip_errors_lane2, "edpl_bip_errors_lane2", \
+ "Indicates the number of PRBS errors on lane 2") \
+ m(+1, u64 bip_errors_lane3, "edpl_bip_errors_lane3", \
+ "Indicates the number of PRBS errors on lane 3") \
+ m(+1, u64 fc_corrected_blocks_lane0, "fc_corrected_blocks_lane0", \
+ "FEC correctable block counter lane 0") \
+ m(+1, u64 fc_corrected_blocks_lane1, "fc_corrected_blocks_lane1", \
+ "FEC correctable block counter lane 1") \
+ m(+1, u64 fc_corrected_blocks_lane2, "fc_corrected_blocks_lane2", \
+ "FEC correctable block counter lane 2") \
+ m(+1, u64 fc_corrected_blocks_lane3, "fc_corrected_blocks_lane3", \
+ "FEC correctable block counter lane 3") \
+ m(+1, u64 rs_corrected_blocks, "rs_corrected_blocks", \
+ "FEC correcable block counter") \
+ m(+1, u64 rs_uncorrectable_blocks, "rs_uncorrectable_blocks", \
+ "FEC uncorrecable block counter") \
+ m(+1, u64 rs_no_errors_blocks, "rs_no_errors_blocks", \
+ "The number of RS-FEC blocks received that had no errors") \
+ m(+1, u64 rs_single_error_blocks, "rs_single_error_blocks", \
+ "The number of corrected RS-FEC blocks received that had" \
+ "exactly 1 error symbol") \
+ m(+1, u64 rs_corrected_symbols_total, "rs_corrected_symbols_total", \
+ "Port FEC corrected symbol counter") \
+ m(+1, u64 rs_corrected_symbols_lane0, "rs_corrected_symbols_lane0", \
+ "FEC corrected symbol counter lane 0") \
+ m(+1, u64 rs_corrected_symbols_lane1, "rs_corrected_symbols_lane1", \
+ "FEC corrected symbol counter lane 1") \
+ m(+1, u64 rs_corrected_symbols_lane2, "rs_corrected_symbols_lane2", \
+ "FEC corrected symbol counter lane 2") \
+ m(+1, u64 rs_corrected_symbols_lane3, "rs_corrected_symbols_lane3", \
+ "FEC corrected symbol counter lane 3") \
+
+#define MLX5E_PPORT_Q_CONTERS(m) \
+ m(+1, u64 out_of_rx_buffer, "out_of_rx_buffer", "out of rx buffers aka no recv wqes events")
+
+/*
+ * Make sure to update mlx5e_update_pport_counters()
+ * when adding a new MLX5E_PPORT_STATS block
+ */
+#define MLX5E_PPORT_STATS(m) \
+ MLX5E_PPORT_IEEE802_3_STATS(m) \
+ MLX5E_PPORT_RFC2819_STATS(m) \
+ MLX5E_PPORT_Q_CONTERS(m)
+
+#define MLX5E_PORT_STATS_DEBUG(m) \
+ MLX5E_PPORT_RFC2819_STATS_DEBUG(m) \
+ MLX5E_PPORT_RFC2863_STATS_DEBUG(m) \
+ MLX5E_PPORT_PHYSICAL_LAYER_STATS_DEBUG(m)
+
+#define MLX5E_PPORT_IEEE802_3_STATS_NUM \
+ (0 MLX5E_PPORT_IEEE802_3_STATS(MLX5E_STATS_COUNT))
+#define MLX5E_PPORT_RFC2819_STATS_NUM \
+ (0 MLX5E_PPORT_RFC2819_STATS(MLX5E_STATS_COUNT))
+#define MLX5E_PPORT_STATS_NUM \
+ (0 MLX5E_PPORT_STATS(MLX5E_STATS_COUNT))
+
+#define MLX5E_PPORT_RFC2819_STATS_DEBUG_NUM \
+ (0 MLX5E_PPORT_RFC2819_STATS_DEBUG(MLX5E_STATS_COUNT))
+#define MLX5E_PPORT_RFC2863_STATS_DEBUG_NUM \
+ (0 MLX5E_PPORT_RFC2863_STATS_DEBUG(MLX5E_STATS_COUNT))
+#define MLX5E_PPORT_PHYSICAL_LAYER_STATS_DEBUG_NUM \
+ (0 MLX5E_PPORT_PHYSICAL_LAYER_STATS_DEBUG(MLX5E_STATS_COUNT))
+#define MLX5E_PORT_STATS_DEBUG_NUM \
+ (0 MLX5E_PORT_STATS_DEBUG(MLX5E_STATS_COUNT))
+
+struct mlx5e_pport_stats {
+ struct sysctl_ctx_list ctx;
+ u64 arg [0];
+ MLX5E_PPORT_STATS(MLX5E_STATS_VAR)
+};
+
+struct mlx5e_port_stats_debug {
+ struct sysctl_ctx_list ctx;
+ u64 arg [0];
+ MLX5E_PORT_STATS_DEBUG(MLX5E_STATS_VAR)
+};
+
+#define MLX5E_RQ_STATS(m) \
+ m(+1, u64 packets, "packets", "Received packets") \
+ m(+1, u64 csum_none, "csum_none", "Received packets") \
+ m(+1, u64 lro_packets, "lro_packets", "Received packets") \
+ m(+1, u64 lro_bytes, "lro_bytes", "Received packets") \
+ m(+1, u64 sw_lro_queued, "sw_lro_queued", "Packets queued for SW LRO") \
+ m(+1, u64 sw_lro_flushed, "sw_lro_flushed", "Packets flushed from SW LRO") \
+ m(+1, u64 wqe_err, "wqe_err", "Received packets")
+
+#define MLX5E_RQ_STATS_NUM (0 MLX5E_RQ_STATS(MLX5E_STATS_COUNT))
+
+struct mlx5e_rq_stats {
+ struct sysctl_ctx_list ctx;
+ u64 arg [0];
+ MLX5E_RQ_STATS(MLX5E_STATS_VAR)
+};
+
+#define MLX5E_SQ_STATS(m) \
+ m(+1, u64 packets, "packets", "Transmitted packets") \
+ m(+1, u64 tso_packets, "tso_packets", "Transmitted packets") \
+ m(+1, u64 tso_bytes, "tso_bytes", "Transmitted bytes") \
+ m(+1, u64 csum_offload_none, "csum_offload_none", "Transmitted packets") \
+ m(+1, u64 defragged, "defragged", "Transmitted packets") \
+ m(+1, u64 dropped, "dropped", "Transmitted packets") \
+ m(+1, u64 nop, "nop", "Transmitted packets")
+
+#define MLX5E_SQ_STATS_NUM (0 MLX5E_SQ_STATS(MLX5E_STATS_COUNT))
+
+struct mlx5e_sq_stats {
+ struct sysctl_ctx_list ctx;
+ u64 arg [0];
+ MLX5E_SQ_STATS(MLX5E_STATS_VAR)
+};
+
+struct mlx5e_stats {
+ struct mlx5e_vport_stats vport;
+ struct mlx5e_pport_stats pport;
+ struct mlx5e_port_stats_debug port_stats_debug;
+};
+
+struct mlx5e_params {
+ u8 log_sq_size;
+ u8 log_rq_size;
+ u16 num_channels;
+ u8 default_vlan_prio;
+ u8 num_tc;
+ u8 rx_cq_moderation_mode;
+ u16 rx_cq_moderation_usec;
+ u16 rx_cq_moderation_pkts;
+ u16 tx_cq_moderation_usec;
+ u16 tx_cq_moderation_pkts;
+ u16 min_rx_wqes;
+ bool hw_lro_en;
+ u32 lro_wqe_sz;
+ u16 rx_hash_log_tbl_sz;
+};
+
+#define MLX5E_PARAMS(m) \
+ m(+1, u64 tx_pauseframe_control, "tx_pauseframe_control", "Set to enable TX pause frames. Clear to disable.") \
+ m(+1, u64 rx_pauseframe_control, "rx_pauseframe_control", "Set to enable RX pause frames. Clear to disable.") \
+ m(+1, u64 tx_queue_size_max, "tx_queue_size_max", "Max send queue size") \
+ m(+1, u64 rx_queue_size_max, "rx_queue_size_max", "Max receive queue size") \
+ m(+1, u64 tx_queue_size, "tx_queue_size", "Default send queue size") \
+ m(+1, u64 rx_queue_size, "rx_queue_size", "Default receive queue size") \
+ m(+1, u64 channels, "channels", "Default number of channels") \
+ m(+1, u64 coalesce_usecs_max, "coalesce_usecs_max", "Maximum usecs for joining packets") \
+ m(+1, u64 coalesce_pkts_max, "coalesce_pkts_max", "Maximum packets to join") \
+ m(+1, u64 rx_coalesce_usecs, "rx_coalesce_usecs", "Limit in usec for joining rx packets") \
+ m(+1, u64 rx_coalesce_pkts, "rx_coalesce_pkts", "Maximum number of rx packets to join") \
+ m(+1, u64 rx_coalesce_mode, "rx_coalesce_mode", "0: EQE mode 1: CQE mode") \
+ m(+1, u64 tx_coalesce_usecs, "tx_coalesce_usecs", "Limit in usec for joining tx packets") \
+ m(+1, u64 tx_coalesce_pkts, "tx_coalesce_pkts", "Maximum number of tx packets to join") \
+ m(+1, u64 hw_lro, "hw_lro", "set to enable hw_lro")
+
+#define MLX5E_PARAMS_NUM (0 MLX5E_PARAMS(MLX5E_STATS_COUNT))
+
+struct mlx5e_params_ethtool {
+ u64 arg [0];
+ MLX5E_PARAMS(MLX5E_STATS_VAR)
+};
+
+/* EEPROM Standards for plug in modules */
+#ifndef MLX5E_ETH_MODULE_SFF_8472
+#define MLX5E_ETH_MODULE_SFF_8472 0x1
+#define MLX5E_ETH_MODULE_SFF_8472_LEN 128
+#endif
+
+#ifndef MLX5E_ETH_MODULE_SFF_8636
+#define MLX5E_ETH_MODULE_SFF_8636 0x2
+#define MLX5E_ETH_MODULE_SFF_8636_LEN 256
+#endif
+
+#ifndef MLX5E_ETH_MODULE_SFF_8436
+#define MLX5E_ETH_MODULE_SFF_8436 0x3
+#define MLX5E_ETH_MODULE_SFF_8436_LEN 256
+#endif
+
+/* EEPROM I2C Addresses */
+#define MLX5E_I2C_ADDR_LOW 0x50
+#define MLX5E_I2C_ADDR_HIGH 0x51
+
+#define MLX5E_EEPROM_LOW_PAGE 0x0
+#define MLX5E_EEPROM_HIGH_PAGE 0x3
+
+#define MLX5E_EEPROM_HIGH_PAGE_OFFSET 128
+#define MLX5E_EEPROM_PAGE_LENGTH 256
+
+#define MLX5E_EEPROM_INFO_BYTES 0x3
+
+struct mlx5e_cq {
+ /* data path - accessed per cqe */
+ struct mlx5_cqwq wq;
+
+ /* data path - accessed per HW polling */
+ struct mlx5_core_cq mcq;
+ struct mlx5e_channel *channel;
+
+ /* control */
+ struct mlx5_wq_ctrl wq_ctrl;
+} __aligned(MLX5E_CACHELINE_SIZE);
+
+struct mlx5e_rq_mbuf {
+ bus_dmamap_t dma_map;
+ caddr_t data;
+ struct mbuf *mbuf;
+};
+
+struct mlx5e_rq {
+ /* data path */
+ struct mlx5_wq_ll wq;
+ struct mtx mtx;
+ bus_dma_tag_t dma_tag;
+ u32 wqe_sz;
+ struct mlx5e_rq_mbuf *mbuf;
+ struct device *pdev;
+ struct ifnet *ifp;
+ struct mlx5e_rq_stats stats;
+ struct mlx5e_cq cq;
+#ifdef HAVE_TURBO_LRO
+ struct tlro_ctrl lro;
+#else
+ struct lro_ctrl lro;
+#endif
+ volatile int enabled;
+ int ix;
+
+ /* control */
+ struct mlx5_wq_ctrl wq_ctrl;
+ u32 rqn;
+ struct mlx5e_channel *channel;
+} __aligned(MLX5E_CACHELINE_SIZE);
+
+struct mlx5e_sq_mbuf {
+ bus_dmamap_t dma_map;
+ struct mbuf *mbuf;
+ u32 num_bytes;
+ u32 num_wqebbs;
+};
+
+enum {
+ MLX5E_SQ_READY,
+ MLX5E_SQ_FULL
+};
+
+struct mlx5e_sq {
+ /* data path */
+ struct mtx lock;
+ bus_dma_tag_t dma_tag;
+ struct mtx comp_lock;
+
+ /* dirtied @completion */
+ u16 cc;
+
+ /* dirtied @xmit */
+ u16 pc __aligned(MLX5E_CACHELINE_SIZE);
+ u16 bf_offset;
+ struct mlx5e_sq_stats stats;
+
+ struct mlx5e_cq cq;
+ struct task sq_task;
+ struct taskqueue *sq_tq;
+
+ /* pointers to per packet info: write@xmit, read@completion */
+ struct mlx5e_sq_mbuf *mbuf;
+ struct buf_ring *br;
+
+ /* read only */
+ struct mlx5_wq_cyc wq;
+ void __iomem *uar_map;
+ void __iomem *uar_bf_map;
+ u32 sqn;
+ u32 bf_buf_size;
+ struct device *pdev;
+ u32 mkey_be;
+
+ /* control path */
+ struct mlx5_wq_ctrl wq_ctrl;
+ struct mlx5_uar uar;
+ struct mlx5e_channel *channel;
+ int tc;
+ unsigned int queue_state;
+} __aligned(MLX5E_CACHELINE_SIZE);
+
+static inline bool
+mlx5e_sq_has_room_for(struct mlx5e_sq *sq, u16 n)
+{
+ return ((sq->wq.sz_m1 & (sq->cc - sq->pc)) >= n ||
+ sq->cc == sq->pc);
+}
+
+struct mlx5e_channel {
+ /* data path */
+ struct mlx5e_rq rq;
+ struct mlx5e_sq sq[MLX5E_MAX_TX_NUM_TC];
+ struct device *pdev;
+ struct ifnet *ifp;
+ u32 mkey_be;
+ u8 num_tc;
+
+ /* control */
+ struct mlx5e_priv *priv;
+ int ix;
+ int cpu;
+} __aligned(MLX5E_CACHELINE_SIZE);
+
+enum mlx5e_traffic_types {
+ MLX5E_TT_IPV4_TCP,
+ MLX5E_TT_IPV6_TCP,
+ MLX5E_TT_IPV4_UDP,
+ MLX5E_TT_IPV6_UDP,
+ MLX5E_TT_IPV4_IPSEC_AH,
+ MLX5E_TT_IPV6_IPSEC_AH,
+ MLX5E_TT_IPV4_IPSEC_ESP,
+ MLX5E_TT_IPV6_IPSEC_ESP,
+ MLX5E_TT_IPV4,
+ MLX5E_TT_IPV6,
+ MLX5E_TT_ANY,
+ MLX5E_NUM_TT,
+};
+
+enum {
+ MLX5E_RQT_SPREADING = 0,
+ MLX5E_RQT_DEFAULT_RQ = 1,
+ MLX5E_NUM_RQT = 2,
+};
+
+struct mlx5e_eth_addr_info {
+ u8 addr [ETH_ALEN + 2];
+ u32 tt_vec;
+ u32 ft_ix[MLX5E_NUM_TT]; /* flow table index per traffic type */
+};
+
+#define MLX5E_ETH_ADDR_HASH_SIZE (1 << BITS_PER_BYTE)
+
+struct mlx5e_eth_addr_hash_node;
+
+struct mlx5e_eth_addr_hash_head {
+ struct mlx5e_eth_addr_hash_node *lh_first;
+};
+
+struct mlx5e_eth_addr_db {
+ struct mlx5e_eth_addr_hash_head if_uc[MLX5E_ETH_ADDR_HASH_SIZE];
+ struct mlx5e_eth_addr_hash_head if_mc[MLX5E_ETH_ADDR_HASH_SIZE];
+ struct mlx5e_eth_addr_info broadcast;
+ struct mlx5e_eth_addr_info allmulti;
+ struct mlx5e_eth_addr_info promisc;
+ bool broadcast_enabled;
+ bool allmulti_enabled;
+ bool promisc_enabled;
+};
+
+enum {
+ MLX5E_STATE_ASYNC_EVENTS_ENABLE,
+ MLX5E_STATE_OPENED,
+};
+
+struct mlx5e_vlan_db {
+ unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
+ u32 active_vlans_ft_ix[VLAN_N_VID];
+ u32 untagged_rule_ft_ix;
+ u32 any_vlan_rule_ft_ix;
+ bool filter_disabled;
+};
+
+struct mlx5e_flow_table {
+ void *vlan;
+ void *main;
+};
+
+struct mlx5e_priv {
+ /* priv data path fields - start */
+ int order_base_2_num_channels;
+ int queue_mapping_channel_mask;
+ int num_tc;
+ int default_vlan_prio;
+ /* priv data path fields - end */
+
+ unsigned long state;
+ int gone;
+#define PRIV_LOCK(priv) sx_xlock(&(priv)->state_lock)
+#define PRIV_UNLOCK(priv) sx_xunlock(&(priv)->state_lock)
+#define PRIV_LOCKED(priv) sx_xlocked(&(priv)->state_lock)
+ struct sx state_lock; /* Protects Interface state */
+ struct mlx5_uar cq_uar;
+ u32 pdn;
+ u32 tdn;
+ struct mlx5_core_mr mr;
+
+ struct mlx5e_channel * volatile *channel;
+ u32 tisn[MLX5E_MAX_TX_NUM_TC];
+ u32 rqtn;
+ u32 tirn[MLX5E_NUM_TT];
+
+ struct mlx5e_flow_table ft;
+ struct mlx5e_eth_addr_db eth_addr;
+ struct mlx5e_vlan_db vlan;
+
+ struct mlx5e_params params;
+ struct mlx5e_params_ethtool params_ethtool;
+ struct mtx async_events_mtx; /* sync hw events */
+ struct work_struct update_stats_work;
+ struct work_struct update_carrier_work;
+ struct work_struct set_rx_mode_work;
+
+ struct mlx5_core_dev *mdev;
+ struct ifnet *ifp;
+ struct sysctl_ctx_list sysctl_ctx;
+ struct sysctl_oid *sysctl_ifnet;
+ struct sysctl_oid *sysctl_hw;
+ int sysctl_debug;
+ struct mlx5e_stats stats;
+ int counter_set_id;
+
+ eventhandler_tag vlan_detach;
+ eventhandler_tag vlan_attach;
+ struct ifmedia media;
+ int media_status_last;
+ int media_active_last;
+
+ struct callout watchdog;
+};
+
+#define MLX5E_NET_IP_ALIGN 2
+
+struct mlx5e_tx_wqe {
+ struct mlx5_wqe_ctrl_seg ctrl;
+ struct mlx5_wqe_eth_seg eth;
+};
+
+struct mlx5e_rx_wqe {
+ struct mlx5_wqe_srq_next_seg next;
+ struct mlx5_wqe_data_seg data;
+};
+
+struct mlx5e_eeprom {
+ int lock_bit;
+ int i2c_addr;
+ int page_num;
+ int device_addr;
+ int module_num;
+ int len;
+ int type;
+ int page_valid;
+ u32 *data;
+};
+
+enum mlx5e_link_mode {
+ MLX5E_1000BASE_CX_SGMII = 0,
+ MLX5E_1000BASE_KX = 1,
+ MLX5E_10GBASE_CX4 = 2,
+ MLX5E_10GBASE_KX4 = 3,
+ MLX5E_10GBASE_KR = 4,
+ MLX5E_20GBASE_KR2 = 5,
+ MLX5E_40GBASE_CR4 = 6,
+ MLX5E_40GBASE_KR4 = 7,
+ MLX5E_56GBASE_R4 = 8,
+ MLX5E_10GBASE_CR = 12,
+ MLX5E_10GBASE_SR = 13,
+ MLX5E_10GBASE_ER = 14,
+ MLX5E_40GBASE_SR4 = 15,
+ MLX5E_40GBASE_LR4 = 16,
+ MLX5E_100GBASE_CR4 = 20,
+ MLX5E_100GBASE_SR4 = 21,
+ MLX5E_100GBASE_KR4 = 22,
+ MLX5E_100GBASE_LR4 = 23,
+ MLX5E_100BASE_TX = 24,
+ MLX5E_100BASE_T = 25,
+ MLX5E_10GBASE_T = 26,
+ MLX5E_25GBASE_CR = 27,
+ MLX5E_25GBASE_KR = 28,
+ MLX5E_25GBASE_SR = 29,
+ MLX5E_50GBASE_CR2 = 30,
+ MLX5E_50GBASE_KR2 = 31,
+ MLX5E_LINK_MODES_NUMBER,
+};
+
+#define MLX5E_PROT_MASK(link_mode) (1 << (link_mode))
+#define MLX5E_FLD_MAX(typ, fld) ((1ULL << __mlx5_bit_sz(typ, fld)) - 1ULL)
+
+int mlx5e_xmit(struct ifnet *, struct mbuf *);
+
+int mlx5e_open_locked(struct ifnet *);
+int mlx5e_close_locked(struct ifnet *);
+
+void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, int event);
+void mlx5e_rx_cq_comp(struct mlx5_core_cq *);
+void mlx5e_tx_cq_comp(struct mlx5_core_cq *);
+struct mlx5_cqe64 *mlx5e_get_cqe(struct mlx5e_cq *cq);
+void mlx5e_tx_que(void *context, int pending);
+
+int mlx5e_open_flow_table(struct mlx5e_priv *priv);
+void mlx5e_close_flow_table(struct mlx5e_priv *priv);
+void mlx5e_set_rx_mode_core(struct mlx5e_priv *priv);
+void mlx5e_set_rx_mode_work(struct work_struct *work);
+
+void mlx5e_vlan_rx_add_vid(void *, struct ifnet *, u16);
+void mlx5e_vlan_rx_kill_vid(void *, struct ifnet *, u16);
+void mlx5e_enable_vlan_filter(struct mlx5e_priv *priv);
+void mlx5e_disable_vlan_filter(struct mlx5e_priv *priv);
+int mlx5e_add_all_vlan_rules(struct mlx5e_priv *priv);
+void mlx5e_del_all_vlan_rules(struct mlx5e_priv *priv);
+
+static inline void
+mlx5e_tx_notify_hw(struct mlx5e_sq *sq,
+ struct mlx5e_tx_wqe *wqe, int bf_sz)
+{
+ u16 ofst = MLX5_BF_OFFSET + sq->bf_offset;
+
+ /* ensure wqe is visible to device before updating doorbell record */
+ wmb();
+
+ *sq->wq.db = cpu_to_be32(sq->pc);
+
+ /*
+ * Ensure the doorbell record is visible to device before ringing
+ * the doorbell:
+ */
+ wmb();
+
+ if (bf_sz) {
+ __iowrite64_copy(sq->uar_bf_map + ofst, &wqe->ctrl, bf_sz);
+
+ /* flush the write-combining mapped buffer */
+ wmb();
+
+ } else {
+ mlx5_write64((__be32 *)&wqe->ctrl, sq->uar_map + ofst, NULL);
+ }
+
+ sq->bf_offset ^= sq->bf_buf_size;
+}
+
+static inline void
+mlx5e_cq_arm(struct mlx5e_cq *cq)
+{
+ struct mlx5_core_cq *mcq;
+
+ mcq = &cq->mcq;
+ mlx5_cq_arm(mcq, MLX5_CQ_DB_REQ_NOT, mcq->uar->map, NULL, cq->wq.cc);
+}
+
+extern const struct ethtool_ops mlx5e_ethtool_ops;
+void mlx5e_create_ethtool(struct mlx5e_priv *);
+void mlx5e_create_stats(struct sysctl_ctx_list *,
+ struct sysctl_oid_list *, const char *,
+ const char **, unsigned, u64 *);
+void mlx5e_send_nop(struct mlx5e_sq *, u32, bool);
+
+#endif /* _MLX5_EN_H_ */
diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_ethtool.c b/sys/dev/mlx5/mlx5_en/mlx5_en_ethtool.c
new file mode 100644
index 0000000..2ad2255
--- /dev/null
+++ b/sys/dev/mlx5/mlx5_en/mlx5_en_ethtool.c
@@ -0,0 +1,493 @@
+/*-
+ * Copyright (c) 2015 Mellanox Technologies. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include "en.h"
+#include <net/sff8472.h>
+
+void
+mlx5e_create_stats(struct sysctl_ctx_list *ctx,
+ struct sysctl_oid_list *parent, const char *buffer,
+ const char **desc, unsigned num, u64 * arg)
+{
+ struct sysctl_oid *node;
+ unsigned x;
+
+ sysctl_ctx_init(ctx);
+
+ node = SYSCTL_ADD_NODE(ctx, parent, OID_AUTO,
+ buffer, CTLFLAG_RD, NULL, "Statistics");
+ if (node == NULL)
+ return;
+ for (x = 0; x != num; x++) {
+ SYSCTL_ADD_UQUAD(ctx, SYSCTL_CHILDREN(node), OID_AUTO,
+ desc[2 * x], CTLFLAG_RD, arg + x, desc[2 * x + 1]);
+ }
+}
+
+static int
+mlx5e_ethtool_handler(SYSCTL_HANDLER_ARGS)
+{
+ struct mlx5e_priv *priv = arg1;
+ uint64_t value;
+ int was_opened;
+ int error;
+
+ PRIV_LOCK(priv);
+ value = priv->params_ethtool.arg[arg2];
+ error = sysctl_handle_64(oidp, &value, 0, req);
+ if (error || req->newptr == NULL ||
+ value == priv->params_ethtool.arg[arg2])
+ goto done;
+
+ /* assign new value */
+ priv->params_ethtool.arg[arg2] = value;
+
+ /* check if device is gone */
+ if (priv->gone) {
+ error = ENXIO;
+ goto done;
+ }
+
+ if (&priv->params_ethtool.arg[arg2] == &priv->params_ethtool.rx_pauseframe_control ||
+ &priv->params_ethtool.arg[arg2] == &priv->params_ethtool.tx_pauseframe_control) {
+ /* range check parameters */
+ priv->params_ethtool.rx_pauseframe_control =
+ priv->params_ethtool.rx_pauseframe_control ? 1 : 0;
+ priv->params_ethtool.tx_pauseframe_control =
+ priv->params_ethtool.tx_pauseframe_control ? 1 : 0;
+
+ /* update firmware */
+ error = -mlx5_set_port_pause(priv->mdev, 1,
+ priv->params_ethtool.rx_pauseframe_control,
+ priv->params_ethtool.tx_pauseframe_control);
+ goto done;
+ }
+
+ was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
+ if (was_opened)
+ mlx5e_close_locked(priv->ifp);
+
+ /* import TX queue size */
+ if (priv->params_ethtool.tx_queue_size <
+ (1 << MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE)) {
+ priv->params_ethtool.tx_queue_size =
+ (1 << MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE);
+ } else if (priv->params_ethtool.tx_queue_size >
+ priv->params_ethtool.tx_queue_size_max) {
+ priv->params_ethtool.tx_queue_size =
+ priv->params_ethtool.tx_queue_size_max;
+ }
+ priv->params.log_sq_size =
+ order_base_2(priv->params_ethtool.tx_queue_size);
+
+ /* import RX queue size */
+ if (priv->params_ethtool.rx_queue_size <
+ (1 << MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE)) {
+ priv->params_ethtool.rx_queue_size =
+ (1 << MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE);
+ } else if (priv->params_ethtool.rx_queue_size >
+ priv->params_ethtool.rx_queue_size_max) {
+ priv->params_ethtool.rx_queue_size =
+ priv->params_ethtool.rx_queue_size_max;
+ }
+ priv->params.log_rq_size =
+ order_base_2(priv->params_ethtool.rx_queue_size);
+
+ priv->params.min_rx_wqes = min_t (u16,
+ priv->params_ethtool.rx_queue_size - 1,
+ MLX5E_PARAMS_DEFAULT_MIN_RX_WQES);
+
+ /* import number of channels */
+ if (priv->params_ethtool.channels < 1)
+ priv->params_ethtool.channels = 1;
+ else if (priv->params_ethtool.channels >
+ (u64) priv->mdev->priv.eq_table.num_comp_vectors) {
+ priv->params_ethtool.channels =
+ (u64) priv->mdev->priv.eq_table.num_comp_vectors;
+ }
+ priv->params.num_channels = priv->params_ethtool.channels;
+
+ /* import RX mode */
+ if (priv->params_ethtool.rx_coalesce_mode != 0)
+ priv->params_ethtool.rx_coalesce_mode = 1;
+ priv->params.rx_cq_moderation_mode = priv->params_ethtool.rx_coalesce_mode;
+
+ /* import RX coal time */
+ if (priv->params_ethtool.rx_coalesce_usecs < 1)
+ priv->params_ethtool.rx_coalesce_usecs = 0;
+ else if (priv->params_ethtool.rx_coalesce_usecs >
+ MLX5E_FLD_MAX(cqc, cq_period)) {
+ priv->params_ethtool.rx_coalesce_usecs =
+ MLX5E_FLD_MAX(cqc, cq_period);
+ }
+ priv->params.rx_cq_moderation_usec = priv->params_ethtool.rx_coalesce_usecs;
+
+ /* import RX coal pkts */
+ if (priv->params_ethtool.rx_coalesce_pkts < 1)
+ priv->params_ethtool.rx_coalesce_pkts = 0;
+ else if (priv->params_ethtool.rx_coalesce_pkts >
+ MLX5E_FLD_MAX(cqc, cq_max_count)) {
+ priv->params_ethtool.rx_coalesce_pkts =
+ MLX5E_FLD_MAX(cqc, cq_max_count);
+ }
+ priv->params.rx_cq_moderation_pkts = priv->params_ethtool.rx_coalesce_pkts;
+
+ /* import TX coal time */
+ if (priv->params_ethtool.tx_coalesce_usecs < 1)
+ priv->params_ethtool.tx_coalesce_usecs = 0;
+ else if (priv->params_ethtool.tx_coalesce_usecs >
+ MLX5E_FLD_MAX(cqc, cq_period)) {
+ priv->params_ethtool.tx_coalesce_usecs =
+ MLX5E_FLD_MAX(cqc, cq_period);
+ }
+ priv->params.tx_cq_moderation_usec = priv->params_ethtool.tx_coalesce_usecs;
+
+ /* import TX coal pkts */
+ if (priv->params_ethtool.tx_coalesce_pkts < 1)
+ priv->params_ethtool.tx_coalesce_pkts = 0;
+ else if (priv->params_ethtool.tx_coalesce_pkts >
+ MLX5E_FLD_MAX(cqc, cq_max_count)) {
+ priv->params_ethtool.tx_coalesce_pkts = MLX5E_FLD_MAX(cqc, cq_max_count);
+ }
+ priv->params.tx_cq_moderation_pkts = priv->params_ethtool.tx_coalesce_pkts;
+
+ /* we always agree to turn off HW LRO - but not always to turn on */
+ if (priv->params_ethtool.hw_lro) {
+ if (priv->params_ethtool.hw_lro != 1) {
+ priv->params_ethtool.hw_lro = priv->params.hw_lro_en;
+ error = EINVAL;
+ goto done;
+ }
+ if (priv->ifp->if_capenable & IFCAP_LRO)
+ priv->params.hw_lro_en = !!MLX5_CAP_ETH(priv->mdev, lro_cap);
+ }
+ else {
+ priv->params.hw_lro_en = false;
+ }
+
+ if (was_opened)
+ mlx5e_open_locked(priv->ifp);
+done:
+ PRIV_UNLOCK(priv);
+ return (error);
+}
+
+/*
+ * Read the first three bytes of the eeprom in order to get the needed info
+ * for the whole reading.
+ * Byte 0 - Identifier byte
+ * Byte 1 - Revision byte
+ * Byte 2 - Status byte
+ */
+static int
+mlx5e_get_eeprom_info(struct mlx5e_priv *priv, struct mlx5e_eeprom *eeprom)
+{
+ struct mlx5_core_dev *dev = priv->mdev;
+ u32 data = 0;
+ int size_read = 0;
+ int ret;
+
+ ret = mlx5_query_module_num(dev, &eeprom->module_num);
+ if (ret) {
+ if_printf(priv->ifp, "%s:%d: Failed query module error=%d\n",
+ __func__, __LINE__, ret);
+ return (ret);
+ }
+
+ /* Read the first three bytes to get Identifier, Revision and Status */
+ ret = mlx5_query_eeprom(dev, eeprom->i2c_addr, eeprom->page_num,
+ eeprom->device_addr, MLX5E_EEPROM_INFO_BYTES, eeprom->module_num, &data,
+ &size_read);
+ if (ret) {
+ if_printf(priv->ifp, "%s:%d: Failed query eeprom module error=0x%x\n",
+ __func__, __LINE__, ret);
+ return (ret);
+ }
+
+ switch (data & MLX5_EEPROM_IDENTIFIER_BYTE_MASK) {
+ case SFF_8024_ID_QSFP:
+ eeprom->type = MLX5E_ETH_MODULE_SFF_8436;
+ eeprom->len = MLX5E_ETH_MODULE_SFF_8436_LEN;
+ break;
+ case SFF_8024_ID_QSFPPLUS:
+ case SFF_8024_ID_QSFP28:
+ if ((data & MLX5_EEPROM_IDENTIFIER_BYTE_MASK) == SFF_8024_ID_QSFP28 ||
+ ((data & MLX5_EEPROM_REVISION_ID_BYTE_MASK) >> 8) >= 0x3) {
+ eeprom->type = MLX5E_ETH_MODULE_SFF_8636;
+ eeprom->len = MLX5E_ETH_MODULE_SFF_8636_LEN;
+ } else {
+ eeprom->type = MLX5E_ETH_MODULE_SFF_8436;
+ eeprom->len = MLX5E_ETH_MODULE_SFF_8436_LEN;
+ }
+ if ((data & MLX5_EEPROM_PAGE_3_VALID_BIT_MASK) == 0)
+ eeprom->page_valid = 1;
+ break;
+ case SFF_8024_ID_SFP:
+ eeprom->type = MLX5E_ETH_MODULE_SFF_8472;
+ eeprom->len = MLX5E_ETH_MODULE_SFF_8472_LEN;
+ break;
+ default:
+ if_printf(priv->ifp, "%s:%d: Not recognized cable type = 0x%x\n",
+ __func__, __LINE__, data & MLX5_EEPROM_IDENTIFIER_BYTE_MASK);
+ return (EINVAL);
+ }
+ return (0);
+}
+
+/* Read both low and high pages of the eeprom */
+static int
+mlx5e_get_eeprom(struct mlx5e_priv *priv, struct mlx5e_eeprom *ee)
+{
+ struct mlx5_core_dev *dev = priv->mdev;
+ int size_read = 0;
+ int ret;
+
+ if (ee->len == 0)
+ return (EINVAL);
+
+ /* Read low page of the eeprom */
+ while (ee->device_addr < ee->len) {
+ ret = mlx5_query_eeprom(dev, ee->i2c_addr, ee->page_num, ee->device_addr,
+ ee->len - ee->device_addr, ee->module_num,
+ ee->data + (ee->device_addr/4), &size_read);
+ if (ret) {
+ if_printf(priv->ifp, "%s:%d: Failed reading eeprom, "
+ "error = 0x%02x\n", __func__, __LINE__, ret);
+ return (ret);
+ }
+ ee->device_addr += size_read;
+ }
+
+ /* Read high page of the eeprom */
+ if (ee->page_valid) {
+ ee->device_addr = MLX5E_EEPROM_HIGH_PAGE_OFFSET;
+ ee->page_num = MLX5E_EEPROM_HIGH_PAGE;
+ size_read = 0;
+ while (ee->device_addr < MLX5E_EEPROM_PAGE_LENGTH) {
+ ret = mlx5_query_eeprom(dev, ee->i2c_addr, ee->page_num,
+ ee->device_addr, MLX5E_EEPROM_PAGE_LENGTH - ee->device_addr,
+ ee->module_num, ee->data + (ee->len/4) +
+ ((ee->device_addr - MLX5E_EEPROM_HIGH_PAGE_OFFSET)/4),
+ &size_read);
+ if (ret) {
+ if_printf(priv->ifp, "%s:%d: Failed reading eeprom, "
+ "error = 0x%02x\n", __func__, __LINE__, ret);
+ return (ret);
+ }
+ ee->device_addr += size_read;
+ }
+ }
+ return (0);
+}
+
+static void
+mlx5e_print_eeprom(struct mlx5e_eeprom *eeprom)
+{
+ int i, j = 0;
+ int row = 0;
+
+ printf("\nOffset\t\tValues\n");
+ printf("------\t\t------\n");
+ while (row < eeprom->len) {
+ printf("0x%04x\t\t",row);
+ for (i = 0; i < 16; i++) {
+ printf("%02x ", ((u8*)eeprom->data)[j]);
+ j++;
+ row++;
+ }
+ printf("\n");
+ }
+
+ if (eeprom->page_valid) {
+ row = MLX5E_EEPROM_HIGH_PAGE_OFFSET;
+ printf("\nUpper Page 0x03\n");
+ printf("\nOffset\t\tValues\n");
+ printf("------\t\t------\n");
+ while (row < MLX5E_EEPROM_PAGE_LENGTH) {
+ printf("0x%04x\t\t",row);
+ for (i = 0; i < 16; i++) {
+ printf("%02x ", ((u8*)eeprom->data)[j]);
+ j++;
+ row++;
+ }
+ printf("\n");
+ }
+ }
+}
+
+/*
+ * Read cable EEPROM module information by first inspecting the first
+ * three bytes to get the initial information for a whole reading.
+ * Information will be printed to dmesg.
+ */
+static int
+mlx5e_read_eeprom(SYSCTL_HANDLER_ARGS)
+{
+ struct mlx5e_priv *priv = arg1;
+ struct mlx5e_eeprom eeprom;
+ int error;
+ int result = 0;
+
+ PRIV_LOCK(priv);
+ error = sysctl_handle_int(oidp, &result, 0, req);
+ if (error || !req->newptr)
+ goto done;
+
+ /* Check if device is gone */
+ if (priv->gone) {
+ error = ENXIO;
+ goto done;
+ }
+
+ if (result == 1) {
+ eeprom.i2c_addr = MLX5E_I2C_ADDR_LOW;
+ eeprom.device_addr = 0;
+ eeprom.page_num = MLX5E_EEPROM_LOW_PAGE;
+ eeprom.page_valid = 0;
+
+ /* Read three first bytes to get important info */
+ error = mlx5e_get_eeprom_info(priv, &eeprom);
+ if (error) {
+ if_printf(priv->ifp, "%s:%d: Failed reading eeprom's "
+ "initial information\n", __func__, __LINE__);
+ error = 0;
+ goto done;
+ }
+
+ /* Allocate needed length buffer and additional space for the 3rd */
+ eeprom.data = malloc(eeprom.len + MLX5E_EEPROM_PAGE_LENGTH,
+ M_MLX5EN, M_WAITOK | M_ZERO);
+
+ /* Read the whole eeprom information */
+ error = mlx5e_get_eeprom(priv, &eeprom);
+ if (error) {
+ if_printf(priv->ifp, "%s:%d: Failed reading eeprom\n",
+ __func__, __LINE__);
+ error = 0;
+ /* Continue printing partial information in case of an error */
+ }
+
+ mlx5e_print_eeprom(&eeprom);
+ free(eeprom.data, M_MLX5EN);
+ }
+done:
+ PRIV_UNLOCK(priv);
+ return (error);
+}
+
+static const char *mlx5e_params_desc[] = {
+ MLX5E_PARAMS(MLX5E_STATS_DESC)
+};
+
+static const char *mlx5e_port_stats_debug_desc[] = {
+ MLX5E_PORT_STATS_DEBUG(MLX5E_STATS_DESC)
+};
+
+static int
+mlx5e_ethtool_debug_stats(SYSCTL_HANDLER_ARGS)
+{
+ struct mlx5e_priv *priv = arg1;
+ int error;
+ int sys_debug;
+
+ sys_debug = priv->sysctl_debug;
+ error = sysctl_handle_int(oidp, &priv->sysctl_debug, 0, req);
+ if (error || !req->newptr)
+ return (error);
+ priv->sysctl_debug = !!priv->sysctl_debug;
+ if (sys_debug == priv->sysctl_debug)
+ return (error);
+ if (priv->sysctl_debug)
+ mlx5e_create_stats(&priv->stats.port_stats_debug.ctx,
+ SYSCTL_CHILDREN(priv->sysctl_ifnet), "debug_stats",
+ mlx5e_port_stats_debug_desc, MLX5E_PORT_STATS_DEBUG_NUM,
+ priv->stats.port_stats_debug.arg);
+ else
+ sysctl_ctx_free(&priv->stats.port_stats_debug.ctx);
+ return (error);
+}
+
+void
+mlx5e_create_ethtool(struct mlx5e_priv *priv)
+{
+ struct sysctl_oid *node;
+ const char *pnameunit;
+ unsigned x;
+
+ /* set some defaults */
+ priv->params_ethtool.tx_queue_size_max = 1 << MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE;
+ priv->params_ethtool.rx_queue_size_max = 1 << MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE;
+ priv->params_ethtool.tx_queue_size = 1 << priv->params.log_sq_size;
+ priv->params_ethtool.rx_queue_size = 1 << priv->params.log_rq_size;
+ priv->params_ethtool.channels = priv->params.num_channels;
+ priv->params_ethtool.coalesce_pkts_max = MLX5E_FLD_MAX(cqc, cq_max_count);
+ priv->params_ethtool.coalesce_usecs_max = MLX5E_FLD_MAX(cqc, cq_period);
+ priv->params_ethtool.rx_coalesce_mode = priv->params.rx_cq_moderation_mode;
+ priv->params_ethtool.rx_coalesce_usecs = priv->params.rx_cq_moderation_usec;
+ priv->params_ethtool.rx_coalesce_pkts = priv->params.rx_cq_moderation_pkts;
+ priv->params_ethtool.tx_coalesce_usecs = priv->params.tx_cq_moderation_usec;
+ priv->params_ethtool.tx_coalesce_pkts = priv->params.tx_cq_moderation_pkts;
+ priv->params_ethtool.hw_lro = priv->params.hw_lro_en;
+
+ /* create root node */
+ node = SYSCTL_ADD_NODE(&priv->sysctl_ctx,
+ SYSCTL_CHILDREN(priv->sysctl_ifnet), OID_AUTO,
+ "conf", CTLFLAG_RW, NULL, "Configuration");
+ if (node == NULL)
+ return;
+ for (x = 0; x != MLX5E_PARAMS_NUM; x++) {
+ /* check for read-only parameter */
+ if (strstr(mlx5e_params_desc[2 * x], "_max") != NULL) {
+ SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(node), OID_AUTO,
+ mlx5e_params_desc[2 * x], CTLTYPE_U64 | CTLFLAG_RD |
+ CTLFLAG_MPSAFE, priv, x, &mlx5e_ethtool_handler, "QU",
+ mlx5e_params_desc[2 * x + 1]);
+ } else {
+ SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(node), OID_AUTO,
+ mlx5e_params_desc[2 * x], CTLTYPE_U64 | CTLFLAG_RWTUN |
+ CTLFLAG_MPSAFE, priv, x, &mlx5e_ethtool_handler, "QU",
+ mlx5e_params_desc[2 * x + 1]);
+ }
+ }
+
+ SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(node), OID_AUTO,
+ "debug_stats", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, priv,
+ 0, &mlx5e_ethtool_debug_stats, "I", "Extended debug statistics");
+
+ pnameunit = device_get_nameunit(priv->mdev->pdev->dev.bsddev);
+
+ SYSCTL_ADD_STRING(&priv->sysctl_ctx, SYSCTL_CHILDREN(node),
+ OID_AUTO, "device_name", CTLFLAG_RD,
+ __DECONST(void *, pnameunit), 0,
+ "PCI device name");
+
+ /* EEPROM support */
+ SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(node), OID_AUTO, "eeprom_info",
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, priv, 0,
+ mlx5e_read_eeprom, "I", "EEPROM information");
+}
+
diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_flow_table.c b/sys/dev/mlx5/mlx5_en/mlx5_en_flow_table.c
new file mode 100644
index 0000000..ab9ea73
--- /dev/null
+++ b/sys/dev/mlx5/mlx5_en/mlx5_en_flow_table.c
@@ -0,0 +1,870 @@
+/*-
+ * Copyright (c) 2015 Mellanox Technologies. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include "en.h"
+
+#include <linux/list.h>
+#include <dev/mlx5/flow_table.h>
+
+enum {
+ MLX5E_FULLMATCH = 0,
+ MLX5E_ALLMULTI = 1,
+ MLX5E_PROMISC = 2,
+};
+
+enum {
+ MLX5E_UC = 0,
+ MLX5E_MC_IPV4 = 1,
+ MLX5E_MC_IPV6 = 2,
+ MLX5E_MC_OTHER = 3,
+};
+
+enum {
+ MLX5E_ACTION_NONE = 0,
+ MLX5E_ACTION_ADD = 1,
+ MLX5E_ACTION_DEL = 2,
+};
+
+struct mlx5e_eth_addr_hash_node {
+ LIST_ENTRY(mlx5e_eth_addr_hash_node) hlist;
+ u8 action;
+ struct mlx5e_eth_addr_info ai;
+};
+
+static inline int
+mlx5e_hash_eth_addr(const u8 * addr)
+{
+ return (addr[5]);
+}
+
+static void
+mlx5e_add_eth_addr_to_hash(struct mlx5e_eth_addr_hash_head *hash,
+ const u8 * addr)
+{
+ struct mlx5e_eth_addr_hash_node *hn;
+ int ix = mlx5e_hash_eth_addr(addr);
+
+ LIST_FOREACH(hn, &hash[ix], hlist) {
+ if (bcmp(hn->ai.addr, addr, ETHER_ADDR_LEN) == 0) {
+ if (hn->action == MLX5E_ACTION_DEL)
+ hn->action = MLX5E_ACTION_NONE;
+ return;
+ }
+ }
+
+ hn = malloc(sizeof(*hn), M_MLX5EN, M_NOWAIT | M_ZERO);
+ if (hn == NULL)
+ return;
+
+ ether_addr_copy(hn->ai.addr, addr);
+ hn->action = MLX5E_ACTION_ADD;
+
+ LIST_INSERT_HEAD(&hash[ix], hn, hlist);
+}
+
+static void
+mlx5e_del_eth_addr_from_hash(struct mlx5e_eth_addr_hash_node *hn)
+{
+ LIST_REMOVE(hn, hlist);
+ free(hn, M_MLX5EN);
+}
+
+static void
+mlx5e_del_eth_addr_from_flow_table(struct mlx5e_priv *priv,
+ struct mlx5e_eth_addr_info *ai)
+{
+ void *ft = priv->ft.main;
+
+ if (ai->tt_vec & (1 << MLX5E_TT_IPV6_TCP))
+ mlx5_del_flow_table_entry(ft, ai->ft_ix[MLX5E_TT_IPV6_TCP]);
+
+ if (ai->tt_vec & (1 << MLX5E_TT_IPV4_TCP))
+ mlx5_del_flow_table_entry(ft, ai->ft_ix[MLX5E_TT_IPV4_TCP]);
+
+ if (ai->tt_vec & (1 << MLX5E_TT_IPV6_UDP))
+ mlx5_del_flow_table_entry(ft, ai->ft_ix[MLX5E_TT_IPV6_UDP]);
+
+ if (ai->tt_vec & (1 << MLX5E_TT_IPV4_UDP))
+ mlx5_del_flow_table_entry(ft, ai->ft_ix[MLX5E_TT_IPV4_UDP]);
+
+ if (ai->tt_vec & (1 << MLX5E_TT_IPV6))
+ mlx5_del_flow_table_entry(ft, ai->ft_ix[MLX5E_TT_IPV6]);
+
+ if (ai->tt_vec & (1 << MLX5E_TT_IPV4))
+ mlx5_del_flow_table_entry(ft, ai->ft_ix[MLX5E_TT_IPV4]);
+
+ if (ai->tt_vec & (1 << MLX5E_TT_ANY))
+ mlx5_del_flow_table_entry(ft, ai->ft_ix[MLX5E_TT_ANY]);
+}
+
+static int
+mlx5e_get_eth_addr_type(const u8 * addr)
+{
+ if (ETHER_IS_MULTICAST(addr) == 0)
+ return (MLX5E_UC);
+
+ if ((addr[0] == 0x01) &&
+ (addr[1] == 0x00) &&
+ (addr[2] == 0x5e) &&
+ !(addr[3] & 0x80))
+ return (MLX5E_MC_IPV4);
+
+ if ((addr[0] == 0x33) &&
+ (addr[1] == 0x33))
+ return (MLX5E_MC_IPV6);
+
+ return (MLX5E_MC_OTHER);
+}
+
+static u32
+mlx5e_get_tt_vec(struct mlx5e_eth_addr_info *ai, int type)
+{
+ int eth_addr_type;
+ u32 ret;
+
+ switch (type) {
+ case MLX5E_FULLMATCH:
+ eth_addr_type = mlx5e_get_eth_addr_type(ai->addr);
+ switch (eth_addr_type) {
+ case MLX5E_UC:
+ ret =
+ (1 << MLX5E_TT_IPV4_TCP) |
+ (1 << MLX5E_TT_IPV6_TCP) |
+ (1 << MLX5E_TT_IPV4_UDP) |
+ (1 << MLX5E_TT_IPV6_UDP) |
+ (1 << MLX5E_TT_IPV4) |
+ (1 << MLX5E_TT_IPV6) |
+ (1 << MLX5E_TT_ANY) |
+ 0;
+ break;
+
+ case MLX5E_MC_IPV4:
+ ret =
+ (1 << MLX5E_TT_IPV4_UDP) |
+ (1 << MLX5E_TT_IPV4) |
+ 0;
+ break;
+
+ case MLX5E_MC_IPV6:
+ ret =
+ (1 << MLX5E_TT_IPV6_UDP) |
+ (1 << MLX5E_TT_IPV6) |
+ 0;
+ break;
+
+ default:
+ ret =
+ (1 << MLX5E_TT_ANY) |
+ 0;
+ break;
+ }
+ break;
+
+ case MLX5E_ALLMULTI:
+ ret =
+ (1 << MLX5E_TT_IPV4_UDP) |
+ (1 << MLX5E_TT_IPV6_UDP) |
+ (1 << MLX5E_TT_IPV4) |
+ (1 << MLX5E_TT_IPV6) |
+ (1 << MLX5E_TT_ANY) |
+ 0;
+ break;
+
+ default: /* MLX5E_PROMISC */
+ ret =
+ (1 << MLX5E_TT_IPV4_TCP) |
+ (1 << MLX5E_TT_IPV6_TCP) |
+ (1 << MLX5E_TT_IPV4_UDP) |
+ (1 << MLX5E_TT_IPV6_UDP) |
+ (1 << MLX5E_TT_IPV4) |
+ (1 << MLX5E_TT_IPV6) |
+ (1 << MLX5E_TT_ANY) |
+ 0;
+ break;
+ }
+
+ return (ret);
+}
+
+static int
+mlx5e_add_eth_addr_rule_sub(struct mlx5e_priv *priv,
+ struct mlx5e_eth_addr_info *ai, int type,
+ void *flow_context, void *match_criteria)
+{
+ u8 match_criteria_enable = 0;
+ void *match_value;
+ void *dest;
+ u8 *dmac;
+ u8 *match_criteria_dmac;
+ void *ft = priv->ft.main;
+ u32 *tirn = priv->tirn;
+ u32 tt_vec;
+ int err;
+
+ match_value = MLX5_ADDR_OF(flow_context, flow_context, match_value);
+ dmac = MLX5_ADDR_OF(fte_match_param, match_value,
+ outer_headers.dmac_47_16);
+ match_criteria_dmac = MLX5_ADDR_OF(fte_match_param, match_criteria,
+ outer_headers.dmac_47_16);
+ dest = MLX5_ADDR_OF(flow_context, flow_context, destination);
+
+ MLX5_SET(flow_context, flow_context, action,
+ MLX5_FLOW_CONTEXT_ACTION_FWD_DEST);
+ MLX5_SET(flow_context, flow_context, destination_list_size, 1);
+ MLX5_SET(dest_format_struct, dest, destination_type,
+ MLX5_FLOW_CONTEXT_DEST_TYPE_TIR);
+
+ switch (type) {
+ case MLX5E_FULLMATCH:
+ match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+ memset(match_criteria_dmac, 0xff, ETH_ALEN);
+ ether_addr_copy(dmac, ai->addr);
+ break;
+
+ case MLX5E_ALLMULTI:
+ match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+ match_criteria_dmac[0] = 0x01;
+ dmac[0] = 0x01;
+ break;
+
+ case MLX5E_PROMISC:
+ break;
+ default:
+ break;
+ }
+
+ tt_vec = mlx5e_get_tt_vec(ai, type);
+
+ if (tt_vec & (1 << MLX5E_TT_ANY)) {
+ MLX5_SET(dest_format_struct, dest, destination_id,
+ tirn[MLX5E_TT_ANY]);
+ err = mlx5_add_flow_table_entry(ft, match_criteria_enable,
+ match_criteria, flow_context, &ai->ft_ix[MLX5E_TT_ANY]);
+ if (err) {
+ mlx5e_del_eth_addr_from_flow_table(priv, ai);
+ return (err);
+ }
+ ai->tt_vec |= (1 << MLX5E_TT_ANY);
+ }
+
+ match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria,
+ outer_headers.ethertype);
+
+ if (tt_vec & (1 << MLX5E_TT_IPV4)) {
+ MLX5_SET(fte_match_param, match_value, outer_headers.ethertype,
+ ETHERTYPE_IP);
+ MLX5_SET(dest_format_struct, dest, destination_id,
+ tirn[MLX5E_TT_IPV4]);
+ err = mlx5_add_flow_table_entry(ft, match_criteria_enable,
+ match_criteria, flow_context, &ai->ft_ix[MLX5E_TT_IPV4]);
+ if (err) {
+ mlx5e_del_eth_addr_from_flow_table(priv, ai);
+ return (err);
+ }
+ ai->tt_vec |= (1 << MLX5E_TT_IPV4);
+ }
+
+ if (tt_vec & (1 << MLX5E_TT_IPV6)) {
+ MLX5_SET(fte_match_param, match_value, outer_headers.ethertype,
+ ETHERTYPE_IPV6);
+ MLX5_SET(dest_format_struct, dest, destination_id,
+ tirn[MLX5E_TT_IPV6]);
+ err = mlx5_add_flow_table_entry(ft, match_criteria_enable,
+ match_criteria, flow_context, &ai->ft_ix[MLX5E_TT_IPV6]);
+ if (err) {
+ mlx5e_del_eth_addr_from_flow_table(priv, ai);
+ return (err);
+ }
+ ai->tt_vec |= (1 << MLX5E_TT_IPV6);
+ }
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria,
+ outer_headers.ip_protocol);
+ MLX5_SET(fte_match_param, match_value, outer_headers.ip_protocol,
+ IPPROTO_UDP);
+
+ if (tt_vec & (1 << MLX5E_TT_IPV4_UDP)) {
+ MLX5_SET(fte_match_param, match_value, outer_headers.ethertype,
+ ETHERTYPE_IP);
+ MLX5_SET(dest_format_struct, dest, destination_id,
+ tirn[MLX5E_TT_IPV4_UDP]);
+ err = mlx5_add_flow_table_entry(ft, match_criteria_enable,
+ match_criteria, flow_context, &ai->ft_ix[MLX5E_TT_IPV4_UDP]);
+ if (err) {
+ mlx5e_del_eth_addr_from_flow_table(priv, ai);
+ return (err);
+ }
+ ai->tt_vec |= (1 << MLX5E_TT_IPV4_UDP);
+ }
+ if (tt_vec & (1 << MLX5E_TT_IPV6_UDP)) {
+ MLX5_SET(fte_match_param, match_value, outer_headers.ethertype,
+ ETHERTYPE_IPV6);
+ MLX5_SET(dest_format_struct, dest, destination_id,
+ tirn[MLX5E_TT_IPV6_UDP]);
+ err = mlx5_add_flow_table_entry(ft, match_criteria_enable,
+ match_criteria, flow_context, &ai->ft_ix[MLX5E_TT_IPV6_UDP]);
+ if (err) {
+ mlx5e_del_eth_addr_from_flow_table(priv, ai);
+ return (err);
+ }
+ ai->tt_vec |= (1 << MLX5E_TT_IPV6_UDP);
+ }
+ MLX5_SET(fte_match_param, match_value, outer_headers.ip_protocol,
+ IPPROTO_TCP);
+
+ if (tt_vec & (1 << MLX5E_TT_IPV4_TCP)) {
+ MLX5_SET(fte_match_param, match_value, outer_headers.ethertype,
+ ETHERTYPE_IP);
+ MLX5_SET(dest_format_struct, dest, destination_id,
+ tirn[MLX5E_TT_IPV4_TCP]);
+ err = mlx5_add_flow_table_entry(ft, match_criteria_enable,
+ match_criteria, flow_context, &ai->ft_ix[MLX5E_TT_IPV4_TCP]);
+ if (err) {
+ mlx5e_del_eth_addr_from_flow_table(priv, ai);
+ return (err);
+ }
+ ai->tt_vec |= (1 << MLX5E_TT_IPV4_TCP);
+ }
+ if (tt_vec & (1 << MLX5E_TT_IPV6_TCP)) {
+ MLX5_SET(fte_match_param, match_value, outer_headers.ethertype,
+ ETHERTYPE_IPV6);
+ MLX5_SET(dest_format_struct, dest, destination_id,
+ tirn[MLX5E_TT_IPV6_TCP]);
+ err = mlx5_add_flow_table_entry(ft, match_criteria_enable,
+ match_criteria, flow_context, &ai->ft_ix[MLX5E_TT_IPV6_TCP]);
+ if (err) {
+ mlx5e_del_eth_addr_from_flow_table(priv, ai);
+ return (err);
+ }
+ ai->tt_vec |= (1 << MLX5E_TT_IPV6_TCP);
+ }
+ return (0);
+}
+
+static int
+mlx5e_add_eth_addr_rule(struct mlx5e_priv *priv,
+ struct mlx5e_eth_addr_info *ai, int type)
+{
+ u32 *flow_context;
+ u32 *match_criteria;
+ int err;
+
+ flow_context = mlx5_vzalloc(MLX5_ST_SZ_BYTES(flow_context) +
+ MLX5_ST_SZ_BYTES(dest_format_struct));
+ match_criteria = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param));
+ if (!flow_context || !match_criteria) {
+ if_printf(priv->ifp, "%s: alloc failed\n", __func__);
+ err = -ENOMEM;
+ goto add_eth_addr_rule_out;
+ }
+
+ err = mlx5e_add_eth_addr_rule_sub(priv, ai, type, flow_context,
+ match_criteria);
+ if (err)
+ if_printf(priv->ifp, "%s: failed\n", __func__);
+
+add_eth_addr_rule_out:
+ kvfree(match_criteria);
+ kvfree(flow_context);
+ return (err);
+}
+
+enum mlx5e_vlan_rule_type {
+ MLX5E_VLAN_RULE_TYPE_UNTAGGED,
+ MLX5E_VLAN_RULE_TYPE_ANY_VID,
+ MLX5E_VLAN_RULE_TYPE_MATCH_VID,
+};
+
+static int
+mlx5e_add_vlan_rule(struct mlx5e_priv *priv,
+ enum mlx5e_vlan_rule_type rule_type, u16 vid)
+{
+ u8 match_criteria_enable = 0;
+ u32 *flow_context;
+ void *match_value;
+ void *dest;
+ u32 *match_criteria;
+ u32 *ft_ix;
+ int err;
+
+ flow_context = mlx5_vzalloc(MLX5_ST_SZ_BYTES(flow_context) +
+ MLX5_ST_SZ_BYTES(dest_format_struct));
+ match_criteria = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param));
+ if (!flow_context || !match_criteria) {
+ if_printf(priv->ifp, "%s: alloc failed\n", __func__);
+ err = -ENOMEM;
+ goto add_vlan_rule_out;
+ }
+ match_value = MLX5_ADDR_OF(flow_context, flow_context, match_value);
+ dest = MLX5_ADDR_OF(flow_context, flow_context, destination);
+
+ MLX5_SET(flow_context, flow_context, action,
+ MLX5_FLOW_CONTEXT_ACTION_FWD_DEST);
+ MLX5_SET(flow_context, flow_context, destination_list_size, 1);
+ MLX5_SET(dest_format_struct, dest, destination_type,
+ MLX5_FLOW_CONTEXT_DEST_TYPE_FLOW_TABLE);
+ MLX5_SET(dest_format_struct, dest, destination_id,
+ mlx5_get_flow_table_id(priv->ft.main));
+
+ match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria,
+ outer_headers.vlan_tag);
+
+ switch (rule_type) {
+ case MLX5E_VLAN_RULE_TYPE_UNTAGGED:
+ ft_ix = &priv->vlan.untagged_rule_ft_ix;
+ break;
+ case MLX5E_VLAN_RULE_TYPE_ANY_VID:
+ ft_ix = &priv->vlan.any_vlan_rule_ft_ix;
+ MLX5_SET(fte_match_param, match_value, outer_headers.vlan_tag,
+ 1);
+ break;
+ default: /* MLX5E_VLAN_RULE_TYPE_MATCH_VID */
+ ft_ix = &priv->vlan.active_vlans_ft_ix[vid];
+ MLX5_SET(fte_match_param, match_value, outer_headers.vlan_tag,
+ 1);
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria,
+ outer_headers.first_vid);
+ MLX5_SET(fte_match_param, match_value, outer_headers.first_vid,
+ vid);
+ break;
+ }
+
+ err = mlx5_add_flow_table_entry(priv->ft.vlan, match_criteria_enable,
+ match_criteria, flow_context, ft_ix);
+ if (err)
+ if_printf(priv->ifp, "%s: failed\n", __func__);
+
+add_vlan_rule_out:
+ kvfree(match_criteria);
+ kvfree(flow_context);
+ return (err);
+}
+
+static void
+mlx5e_del_vlan_rule(struct mlx5e_priv *priv,
+ enum mlx5e_vlan_rule_type rule_type, u16 vid)
+{
+ switch (rule_type) {
+ case MLX5E_VLAN_RULE_TYPE_UNTAGGED:
+ mlx5_del_flow_table_entry(priv->ft.vlan,
+ priv->vlan.untagged_rule_ft_ix);
+ break;
+ case MLX5E_VLAN_RULE_TYPE_ANY_VID:
+ mlx5_del_flow_table_entry(priv->ft.vlan,
+ priv->vlan.any_vlan_rule_ft_ix);
+ break;
+ case MLX5E_VLAN_RULE_TYPE_MATCH_VID:
+ mlx5_del_flow_table_entry(priv->ft.vlan,
+ priv->vlan.active_vlans_ft_ix[vid]);
+ break;
+ }
+}
+
+void
+mlx5e_enable_vlan_filter(struct mlx5e_priv *priv)
+{
+ if (priv->vlan.filter_disabled) {
+ priv->vlan.filter_disabled = false;
+ if (test_bit(MLX5E_STATE_OPENED, &priv->state))
+ mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID,
+ 0);
+ }
+}
+
+void
+mlx5e_disable_vlan_filter(struct mlx5e_priv *priv)
+{
+ if (!priv->vlan.filter_disabled) {
+ priv->vlan.filter_disabled = true;
+ if (test_bit(MLX5E_STATE_OPENED, &priv->state))
+ mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID,
+ 0);
+ }
+}
+
+void
+mlx5e_vlan_rx_add_vid(void *arg, struct ifnet *ifp, u16 vid)
+{
+ struct mlx5e_priv *priv = arg;
+
+ if (ifp != priv->ifp)
+ return;
+
+ PRIV_LOCK(priv);
+ set_bit(vid, priv->vlan.active_vlans);
+ if (test_bit(MLX5E_STATE_OPENED, &priv->state))
+ mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, vid);
+ PRIV_UNLOCK(priv);
+}
+
+void
+mlx5e_vlan_rx_kill_vid(void *arg, struct ifnet *ifp, u16 vid)
+{
+ struct mlx5e_priv *priv = arg;
+
+ if (ifp != priv->ifp)
+ return;
+
+ PRIV_LOCK(priv);
+ clear_bit(vid, priv->vlan.active_vlans);
+ if (test_bit(MLX5E_STATE_OPENED, &priv->state))
+ mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, vid);
+ PRIV_UNLOCK(priv);
+}
+
+int
+mlx5e_add_all_vlan_rules(struct mlx5e_priv *priv)
+{
+ u16 vid;
+ int err;
+
+ for_each_set_bit(vid, priv->vlan.active_vlans, VLAN_N_VID) {
+ err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID,
+ vid);
+ if (err)
+ return (err);
+ }
+
+ err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0);
+ if (err)
+ return (err);
+
+ if (priv->vlan.filter_disabled) {
+ err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID,
+ 0);
+ if (err)
+ return (err);
+ }
+ return (0);
+}
+
+void
+mlx5e_del_all_vlan_rules(struct mlx5e_priv *priv)
+{
+ u16 vid;
+
+ if (priv->vlan.filter_disabled)
+ mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, 0);
+
+ mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0);
+
+ for_each_set_bit(vid, priv->vlan.active_vlans, VLAN_N_VID)
+ mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, vid);
+}
+
+#define mlx5e_for_each_hash_node(hn, tmp, hash, i) \
+ for (i = 0; i < MLX5E_ETH_ADDR_HASH_SIZE; i++) \
+ LIST_FOREACH_SAFE(hn, &(hash)[i], hlist, tmp)
+
+static void
+mlx5e_execute_action(struct mlx5e_priv *priv,
+ struct mlx5e_eth_addr_hash_node *hn)
+{
+ switch (hn->action) {
+ case MLX5E_ACTION_ADD:
+ mlx5e_add_eth_addr_rule(priv, &hn->ai, MLX5E_FULLMATCH);
+ hn->action = MLX5E_ACTION_NONE;
+ break;
+
+ case MLX5E_ACTION_DEL:
+ mlx5e_del_eth_addr_from_flow_table(priv, &hn->ai);
+ mlx5e_del_eth_addr_from_hash(hn);
+ break;
+
+ default:
+ break;
+ }
+}
+
+static void
+mlx5e_sync_ifp_addr(struct mlx5e_priv *priv)
+{
+ struct ifnet *ifp = priv->ifp;
+ struct ifaddr *ifa;
+ struct ifmultiaddr *ifma;
+
+ /* XXX adding this entry might not be needed */
+ mlx5e_add_eth_addr_to_hash(priv->eth_addr.if_uc,
+ LLADDR((struct sockaddr_dl *)(ifp->if_addr->ifa_addr)));
+
+ if_addr_rlock(ifp);
+ TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
+ if (ifa->ifa_addr->sa_family != AF_LINK)
+ continue;
+ mlx5e_add_eth_addr_to_hash(priv->eth_addr.if_uc,
+ LLADDR((struct sockaddr_dl *)ifa->ifa_addr));
+ }
+ if_addr_runlock(ifp);
+
+ if_maddr_rlock(ifp);
+ TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
+ if (ifma->ifma_addr->sa_family != AF_LINK)
+ continue;
+ mlx5e_add_eth_addr_to_hash(priv->eth_addr.if_mc,
+ LLADDR((struct sockaddr_dl *)ifma->ifma_addr));
+ }
+ if_maddr_runlock(ifp);
+}
+
+static void
+mlx5e_apply_ifp_addr(struct mlx5e_priv *priv)
+{
+ struct mlx5e_eth_addr_hash_node *hn;
+ struct mlx5e_eth_addr_hash_node *tmp;
+ int i;
+
+ mlx5e_for_each_hash_node(hn, tmp, priv->eth_addr.if_uc, i)
+ mlx5e_execute_action(priv, hn);
+
+ mlx5e_for_each_hash_node(hn, tmp, priv->eth_addr.if_mc, i)
+ mlx5e_execute_action(priv, hn);
+}
+
+static void
+mlx5e_handle_ifp_addr(struct mlx5e_priv *priv)
+{
+ struct mlx5e_eth_addr_hash_node *hn;
+ struct mlx5e_eth_addr_hash_node *tmp;
+ int i;
+
+ mlx5e_for_each_hash_node(hn, tmp, priv->eth_addr.if_uc, i)
+ hn->action = MLX5E_ACTION_DEL;
+ mlx5e_for_each_hash_node(hn, tmp, priv->eth_addr.if_mc, i)
+ hn->action = MLX5E_ACTION_DEL;
+
+ if (test_bit(MLX5E_STATE_OPENED, &priv->state))
+ mlx5e_sync_ifp_addr(priv);
+
+ mlx5e_apply_ifp_addr(priv);
+}
+
+void
+mlx5e_set_rx_mode_core(struct mlx5e_priv *priv)
+{
+ struct mlx5e_eth_addr_db *ea = &priv->eth_addr;
+ struct ifnet *ndev = priv->ifp;
+
+ bool rx_mode_enable = test_bit(MLX5E_STATE_OPENED, &priv->state);
+ bool promisc_enabled = rx_mode_enable && (ndev->if_flags & IFF_PROMISC);
+ bool allmulti_enabled = rx_mode_enable && (ndev->if_flags & IFF_ALLMULTI);
+ bool broadcast_enabled = rx_mode_enable;
+
+ bool enable_promisc = !ea->promisc_enabled && promisc_enabled;
+ bool disable_promisc = ea->promisc_enabled && !promisc_enabled;
+ bool enable_allmulti = !ea->allmulti_enabled && allmulti_enabled;
+ bool disable_allmulti = ea->allmulti_enabled && !allmulti_enabled;
+ bool enable_broadcast = !ea->broadcast_enabled && broadcast_enabled;
+ bool disable_broadcast = ea->broadcast_enabled && !broadcast_enabled;
+
+ /* update broadcast address */
+ ether_addr_copy(priv->eth_addr.broadcast.addr,
+ priv->ifp->if_broadcastaddr);
+
+ if (enable_promisc)
+ mlx5e_add_eth_addr_rule(priv, &ea->promisc, MLX5E_PROMISC);
+ if (enable_allmulti)
+ mlx5e_add_eth_addr_rule(priv, &ea->allmulti, MLX5E_ALLMULTI);
+ if (enable_broadcast)
+ mlx5e_add_eth_addr_rule(priv, &ea->broadcast, MLX5E_FULLMATCH);
+
+ mlx5e_handle_ifp_addr(priv);
+
+ if (disable_broadcast)
+ mlx5e_del_eth_addr_from_flow_table(priv, &ea->broadcast);
+ if (disable_allmulti)
+ mlx5e_del_eth_addr_from_flow_table(priv, &ea->allmulti);
+ if (disable_promisc)
+ mlx5e_del_eth_addr_from_flow_table(priv, &ea->promisc);
+
+ ea->promisc_enabled = promisc_enabled;
+ ea->allmulti_enabled = allmulti_enabled;
+ ea->broadcast_enabled = broadcast_enabled;
+}
+
+void
+mlx5e_set_rx_mode_work(struct work_struct *work)
+{
+ struct mlx5e_priv *priv =
+ container_of(work, struct mlx5e_priv, set_rx_mode_work);
+
+ PRIV_LOCK(priv);
+ if (test_bit(MLX5E_STATE_OPENED, &priv->state))
+ mlx5e_set_rx_mode_core(priv);
+ PRIV_UNLOCK(priv);
+}
+
+static int
+mlx5e_create_main_flow_table(struct mlx5e_priv *priv)
+{
+ struct mlx5_flow_table_group *g;
+ u8 *dmac;
+
+ g = malloc(9 * sizeof(*g), M_MLX5EN, M_WAITOK | M_ZERO);
+ if (g == NULL)
+ return (-ENOMEM);
+
+ g[0].log_sz = 2;
+ g[0].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+ MLX5_SET_TO_ONES(fte_match_param, g[0].match_criteria,
+ outer_headers.ethertype);
+ MLX5_SET_TO_ONES(fte_match_param, g[0].match_criteria,
+ outer_headers.ip_protocol);
+
+ g[1].log_sz = 1;
+ g[1].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+ MLX5_SET_TO_ONES(fte_match_param, g[1].match_criteria,
+ outer_headers.ethertype);
+
+ g[2].log_sz = 0;
+
+ g[3].log_sz = 14;
+ g[3].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+ dmac = MLX5_ADDR_OF(fte_match_param, g[3].match_criteria,
+ outer_headers.dmac_47_16);
+ memset(dmac, 0xff, ETH_ALEN);
+ MLX5_SET_TO_ONES(fte_match_param, g[3].match_criteria,
+ outer_headers.ethertype);
+ MLX5_SET_TO_ONES(fte_match_param, g[3].match_criteria,
+ outer_headers.ip_protocol);
+
+ g[4].log_sz = 13;
+ g[4].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+ dmac = MLX5_ADDR_OF(fte_match_param, g[4].match_criteria,
+ outer_headers.dmac_47_16);
+ memset(dmac, 0xff, ETH_ALEN);
+ MLX5_SET_TO_ONES(fte_match_param, g[4].match_criteria,
+ outer_headers.ethertype);
+
+ g[5].log_sz = 11;
+ g[5].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+ dmac = MLX5_ADDR_OF(fte_match_param, g[5].match_criteria,
+ outer_headers.dmac_47_16);
+ memset(dmac, 0xff, ETH_ALEN);
+
+ g[6].log_sz = 2;
+ g[6].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+ dmac = MLX5_ADDR_OF(fte_match_param, g[6].match_criteria,
+ outer_headers.dmac_47_16);
+ dmac[0] = 0x01;
+ MLX5_SET_TO_ONES(fte_match_param, g[6].match_criteria,
+ outer_headers.ethertype);
+ MLX5_SET_TO_ONES(fte_match_param, g[6].match_criteria,
+ outer_headers.ip_protocol);
+
+ g[7].log_sz = 1;
+ g[7].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+ dmac = MLX5_ADDR_OF(fte_match_param, g[7].match_criteria,
+ outer_headers.dmac_47_16);
+ dmac[0] = 0x01;
+ MLX5_SET_TO_ONES(fte_match_param, g[7].match_criteria,
+ outer_headers.ethertype);
+
+ g[8].log_sz = 0;
+ g[8].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+ dmac = MLX5_ADDR_OF(fte_match_param, g[8].match_criteria,
+ outer_headers.dmac_47_16);
+ dmac[0] = 0x01;
+ priv->ft.main = mlx5_create_flow_table(priv->mdev, 1,
+ MLX5_FLOW_TABLE_TYPE_NIC_RCV,
+ 0, 9, g);
+ free(g, M_MLX5EN);
+
+ return (priv->ft.main ? 0 : -ENOMEM);
+}
+
+static void
+mlx5e_destroy_main_flow_table(struct mlx5e_priv *priv)
+{
+ mlx5_destroy_flow_table(priv->ft.main);
+ priv->ft.main = NULL;
+}
+
+static int
+mlx5e_create_vlan_flow_table(struct mlx5e_priv *priv)
+{
+ struct mlx5_flow_table_group *g;
+
+ g = malloc(2 * sizeof(*g), M_MLX5EN, M_WAITOK | M_ZERO);
+ if (g == NULL)
+ return (-ENOMEM);
+
+ g[0].log_sz = 12;
+ g[0].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+ MLX5_SET_TO_ONES(fte_match_param, g[0].match_criteria,
+ outer_headers.vlan_tag);
+ MLX5_SET_TO_ONES(fte_match_param, g[0].match_criteria,
+ outer_headers.first_vid);
+
+ /* untagged + any vlan id */
+ g[1].log_sz = 1;
+ g[1].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+ MLX5_SET_TO_ONES(fte_match_param, g[1].match_criteria,
+ outer_headers.vlan_tag);
+
+ priv->ft.vlan = mlx5_create_flow_table(priv->mdev, 0,
+ MLX5_FLOW_TABLE_TYPE_NIC_RCV,
+ 0, 2, g);
+ free(g, M_MLX5EN);
+
+ return (priv->ft.vlan ? 0 : -ENOMEM);
+}
+
+static void
+mlx5e_destroy_vlan_flow_table(struct mlx5e_priv *priv)
+{
+ mlx5_destroy_flow_table(priv->ft.vlan);
+ priv->ft.vlan = NULL;
+}
+
+int
+mlx5e_open_flow_table(struct mlx5e_priv *priv)
+{
+ int err;
+
+ err = mlx5e_create_main_flow_table(priv);
+ if (err)
+ return (err);
+
+ err = mlx5e_create_vlan_flow_table(priv);
+ if (err)
+ goto err_destroy_main_flow_table;
+
+ return (0);
+
+err_destroy_main_flow_table:
+ mlx5e_destroy_main_flow_table(priv);
+
+ return (err);
+}
+
+void
+mlx5e_close_flow_table(struct mlx5e_priv *priv)
+{
+ mlx5e_destroy_vlan_flow_table(priv);
+ mlx5e_destroy_main_flow_table(priv);
+}
diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_main.c b/sys/dev/mlx5/mlx5_en/mlx5_en_main.c
new file mode 100644
index 0000000..e50252c
--- /dev/null
+++ b/sys/dev/mlx5/mlx5_en/mlx5_en_main.c
@@ -0,0 +1,2902 @@
+/*-
+ * Copyright (c) 2015 Mellanox Technologies. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include "en.h"
+
+#include <sys/sockio.h>
+#include <machine/atomic.h>
+
+#define ETH_DRIVER_VERSION "3.1.0-dev"
+char mlx5e_version[] = "Mellanox Ethernet driver"
+ " (" ETH_DRIVER_VERSION ")";
+
+struct mlx5e_rq_param {
+ u32 rqc [MLX5_ST_SZ_DW(rqc)];
+ struct mlx5_wq_param wq;
+};
+
+struct mlx5e_sq_param {
+ u32 sqc [MLX5_ST_SZ_DW(sqc)];
+ struct mlx5_wq_param wq;
+};
+
+struct mlx5e_cq_param {
+ u32 cqc [MLX5_ST_SZ_DW(cqc)];
+ struct mlx5_wq_param wq;
+ u16 eq_ix;
+};
+
+struct mlx5e_channel_param {
+ struct mlx5e_rq_param rq;
+ struct mlx5e_sq_param sq;
+ struct mlx5e_cq_param rx_cq;
+ struct mlx5e_cq_param tx_cq;
+};
+
+static const struct {
+ u32 subtype;
+ u64 baudrate;
+} mlx5e_mode_table[MLX5E_LINK_MODES_NUMBER] = {
+
+ [MLX5E_1000BASE_CX_SGMII] = {
+ .subtype = IFM_1000_CX_SGMII,
+ .baudrate = IF_Mbps(1000ULL),
+ },
+ [MLX5E_1000BASE_KX] = {
+ .subtype = IFM_1000_KX,
+ .baudrate = IF_Mbps(1000ULL),
+ },
+ [MLX5E_10GBASE_CX4] = {
+ .subtype = IFM_10G_CX4,
+ .baudrate = IF_Gbps(10ULL),
+ },
+ [MLX5E_10GBASE_KX4] = {
+ .subtype = IFM_10G_KX4,
+ .baudrate = IF_Gbps(10ULL),
+ },
+ [MLX5E_10GBASE_KR] = {
+ .subtype = IFM_10G_KR,
+ .baudrate = IF_Gbps(10ULL),
+ },
+ [MLX5E_20GBASE_KR2] = {
+ .subtype = IFM_20G_KR2,
+ .baudrate = IF_Gbps(20ULL),
+ },
+ [MLX5E_40GBASE_CR4] = {
+ .subtype = IFM_40G_CR4,
+ .baudrate = IF_Gbps(40ULL),
+ },
+ [MLX5E_40GBASE_KR4] = {
+ .subtype = IFM_40G_KR4,
+ .baudrate = IF_Gbps(40ULL),
+ },
+ [MLX5E_56GBASE_R4] = {
+ .subtype = IFM_56G_R4,
+ .baudrate = IF_Gbps(56ULL),
+ },
+ [MLX5E_10GBASE_CR] = {
+ .subtype = IFM_10G_CR1,
+ .baudrate = IF_Gbps(10ULL),
+ },
+ [MLX5E_10GBASE_SR] = {
+ .subtype = IFM_10G_SR,
+ .baudrate = IF_Gbps(10ULL),
+ },
+ [MLX5E_10GBASE_ER] = {
+ .subtype = IFM_10G_ER,
+ .baudrate = IF_Gbps(10ULL),
+ },
+ [MLX5E_40GBASE_SR4] = {
+ .subtype = IFM_40G_SR4,
+ .baudrate = IF_Gbps(40ULL),
+ },
+ [MLX5E_40GBASE_LR4] = {
+ .subtype = IFM_40G_LR4,
+ .baudrate = IF_Gbps(40ULL),
+ },
+ [MLX5E_100GBASE_CR4] = {
+ .subtype = IFM_100G_CR4,
+ .baudrate = IF_Gbps(100ULL),
+ },
+ [MLX5E_100GBASE_SR4] = {
+ .subtype = IFM_100G_SR4,
+ .baudrate = IF_Gbps(100ULL),
+ },
+ [MLX5E_100GBASE_KR4] = {
+ .subtype = IFM_100G_KR4,
+ .baudrate = IF_Gbps(100ULL),
+ },
+ [MLX5E_100GBASE_LR4] = {
+ .subtype = IFM_100G_LR4,
+ .baudrate = IF_Gbps(100ULL),
+ },
+ [MLX5E_100BASE_TX] = {
+ .subtype = IFM_100_TX,
+ .baudrate = IF_Mbps(100ULL),
+ },
+ [MLX5E_100BASE_T] = {
+ .subtype = IFM_100_T,
+ .baudrate = IF_Mbps(100ULL),
+ },
+ [MLX5E_10GBASE_T] = {
+ .subtype = IFM_10G_T,
+ .baudrate = IF_Gbps(10ULL),
+ },
+ [MLX5E_25GBASE_CR] = {
+ .subtype = IFM_25G_CR,
+ .baudrate = IF_Gbps(25ULL),
+ },
+ [MLX5E_25GBASE_KR] = {
+ .subtype = IFM_25G_KR,
+ .baudrate = IF_Gbps(25ULL),
+ },
+ [MLX5E_25GBASE_SR] = {
+ .subtype = IFM_25G_SR,
+ .baudrate = IF_Gbps(25ULL),
+ },
+ [MLX5E_50GBASE_CR2] = {
+ .subtype = IFM_50G_CR2,
+ .baudrate = IF_Gbps(50ULL),
+ },
+ [MLX5E_50GBASE_KR2] = {
+ .subtype = IFM_50G_KR2,
+ .baudrate = IF_Gbps(50ULL),
+ },
+};
+
+MALLOC_DEFINE(M_MLX5EN, "MLX5EN", "MLX5 Ethernet");
+
+static void
+mlx5e_update_carrier(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u32 out[MLX5_ST_SZ_DW(ptys_reg)];
+ u32 eth_proto_oper;
+ int error;
+ u8 port_state;
+ u8 i;
+
+ port_state = mlx5_query_vport_state(mdev,
+ MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT);
+
+ if (port_state == VPORT_STATE_UP) {
+ priv->media_status_last |= IFM_ACTIVE;
+ } else {
+ priv->media_status_last &= ~IFM_ACTIVE;
+ priv->media_active_last = IFM_ETHER;
+ if_link_state_change(priv->ifp, LINK_STATE_DOWN);
+ return;
+ }
+
+ error = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN);
+ if (error) {
+ priv->media_active_last = IFM_ETHER;
+ priv->ifp->if_baudrate = 1;
+ if_printf(priv->ifp, "%s: query port ptys failed: 0x%x\n",
+ __func__, error);
+ return;
+ }
+ eth_proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper);
+
+ for (i = 0; i != MLX5E_LINK_MODES_NUMBER; i++) {
+ if (mlx5e_mode_table[i].baudrate == 0)
+ continue;
+ if (MLX5E_PROT_MASK(i) & eth_proto_oper) {
+ priv->ifp->if_baudrate =
+ mlx5e_mode_table[i].baudrate;
+ priv->media_active_last =
+ mlx5e_mode_table[i].subtype | IFM_ETHER | IFM_FDX;
+ }
+ }
+ if_link_state_change(priv->ifp, LINK_STATE_UP);
+}
+
+static void
+mlx5e_media_status(struct ifnet *dev, struct ifmediareq *ifmr)
+{
+ struct mlx5e_priv *priv = dev->if_softc;
+
+ ifmr->ifm_status = priv->media_status_last;
+ ifmr->ifm_active = priv->media_active_last |
+ (priv->params_ethtool.rx_pauseframe_control ? IFM_ETH_RXPAUSE : 0) |
+ (priv->params_ethtool.tx_pauseframe_control ? IFM_ETH_TXPAUSE : 0);
+
+}
+
+static u32
+mlx5e_find_link_mode(u32 subtype)
+{
+ u32 i;
+ u32 link_mode = 0;
+
+ for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) {
+ if (mlx5e_mode_table[i].baudrate == 0)
+ continue;
+ if (mlx5e_mode_table[i].subtype == subtype)
+ link_mode |= MLX5E_PROT_MASK(i);
+ }
+
+ return (link_mode);
+}
+
+static int
+mlx5e_media_change(struct ifnet *dev)
+{
+ struct mlx5e_priv *priv = dev->if_softc;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u32 eth_proto_cap;
+ u32 link_mode;
+ int locked;
+ int error;
+
+ locked = PRIV_LOCKED(priv);
+ if (!locked)
+ PRIV_LOCK(priv);
+
+ if (IFM_TYPE(priv->media.ifm_media) != IFM_ETHER) {
+ error = EINVAL;
+ goto done;
+ }
+
+ link_mode = mlx5e_find_link_mode(IFM_SUBTYPE(priv->media.ifm_media));
+
+ error = mlx5_query_port_proto_cap(mdev, &eth_proto_cap, MLX5_PTYS_EN);
+ if (error) {
+ if_printf(dev, "Query port media capability failed\n");
+ goto done;
+ }
+ if (IFM_SUBTYPE(priv->media.ifm_media) == IFM_AUTO)
+ link_mode = eth_proto_cap;
+ else
+ link_mode = link_mode & eth_proto_cap;
+
+ if (!link_mode) {
+ if_printf(dev, "Not supported link mode requested\n");
+ error = EINVAL;
+ goto done;
+ }
+
+ mlx5_set_port_status(mdev, MLX5_PORT_DOWN);
+ mlx5_set_port_proto(mdev, link_mode, MLX5_PTYS_EN);
+ mlx5_set_port_status(mdev, MLX5_PORT_UP);
+
+done:
+ if (!locked)
+ PRIV_UNLOCK(priv);
+ return (error);
+}
+
+static void
+mlx5e_update_carrier_work(struct work_struct *work)
+{
+ struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
+ update_carrier_work);
+
+ PRIV_LOCK(priv);
+ if (test_bit(MLX5E_STATE_OPENED, &priv->state))
+ mlx5e_update_carrier(priv);
+ PRIV_UNLOCK(priv);
+}
+
+static void
+mlx5e_update_pport_counters(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5e_pport_stats *s = &priv->stats.pport;
+ struct mlx5e_port_stats_debug *s_debug = &priv->stats.port_stats_debug;
+ u32 *in;
+ u32 *out;
+ u64 *ptr;
+ unsigned sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
+ unsigned x;
+ unsigned y;
+
+ in = mlx5_vzalloc(sz);
+ out = mlx5_vzalloc(sz);
+ if (in == NULL || out == NULL)
+ goto free_out;
+
+ ptr = (uint64_t *)MLX5_ADDR_OF(ppcnt_reg, out, counter_set);
+
+ MLX5_SET(ppcnt_reg, in, local_port, 1);
+
+ MLX5_SET(ppcnt_reg, in, grp, MLX5_IEEE_802_3_COUNTERS_GROUP);
+ mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
+ for (x = y = 0; x != MLX5E_PPORT_IEEE802_3_STATS_NUM; x++, y++)
+ s->arg[y] = be64toh(ptr[x]);
+
+ MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2819_COUNTERS_GROUP);
+ mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
+ for (x = 0; x != MLX5E_PPORT_RFC2819_STATS_NUM; x++, y++)
+ s->arg[y] = be64toh(ptr[x]);
+ for (y = 0; x != MLX5E_PPORT_RFC2819_STATS_NUM +
+ MLX5E_PPORT_RFC2819_STATS_DEBUG_NUM; x++, y++)
+ s_debug->arg[y] = be64toh(ptr[x]);
+
+ MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2863_COUNTERS_GROUP);
+ mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
+ for (x = 0; x != MLX5E_PPORT_RFC2863_STATS_DEBUG_NUM; x++, y++)
+ s_debug->arg[y] = be64toh(ptr[x]);
+
+ MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_COUNTERS_GROUP);
+ mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
+ for (x = 0; x != MLX5E_PPORT_PHYSICAL_LAYER_STATS_DEBUG_NUM; x++, y++)
+ s_debug->arg[y] = be64toh(ptr[x]);
+free_out:
+ kvfree(in);
+ kvfree(out);
+}
+
+static void
+mlx5e_update_stats_work(struct work_struct *work)
+{
+ struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
+ update_stats_work);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5e_vport_stats *s = &priv->stats.vport;
+ struct mlx5e_rq_stats *rq_stats;
+ struct mlx5e_sq_stats *sq_stats;
+ struct buf_ring *sq_br;
+#if (__FreeBSD_version < 1100000)
+ struct ifnet *ifp = priv->ifp;
+#endif
+ u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)];
+ u32 *out;
+ int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out);
+ u64 tso_packets = 0;
+ u64 tso_bytes = 0;
+ u64 tx_queue_dropped = 0;
+ u64 tx_defragged = 0;
+ u64 tx_offload_none = 0;
+ u64 lro_packets = 0;
+ u64 lro_bytes = 0;
+ u64 sw_lro_queued = 0;
+ u64 sw_lro_flushed = 0;
+ u64 rx_csum_none = 0;
+ u64 rx_wqe_err = 0;
+ u32 out_of_rx_buffer = 0;
+ int i;
+ int j;
+
+ PRIV_LOCK(priv);
+ out = mlx5_vzalloc(outlen);
+ if (out == NULL)
+ goto free_out;
+ if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
+ goto free_out;
+
+ /* Collect firts the SW counters and then HW for consistency */
+ for (i = 0; i < priv->params.num_channels; i++) {
+ struct mlx5e_rq *rq = &priv->channel[i]->rq;
+
+ rq_stats = &priv->channel[i]->rq.stats;
+
+ /* collect stats from LRO */
+ rq_stats->sw_lro_queued = rq->lro.lro_queued;
+ rq_stats->sw_lro_flushed = rq->lro.lro_flushed;
+ sw_lro_queued += rq_stats->sw_lro_queued;
+ sw_lro_flushed += rq_stats->sw_lro_flushed;
+ lro_packets += rq_stats->lro_packets;
+ lro_bytes += rq_stats->lro_bytes;
+ rx_csum_none += rq_stats->csum_none;
+ rx_wqe_err += rq_stats->wqe_err;
+
+ for (j = 0; j < priv->num_tc; j++) {
+ sq_stats = &priv->channel[i]->sq[j].stats;
+ sq_br = priv->channel[i]->sq[j].br;
+
+ tso_packets += sq_stats->tso_packets;
+ tso_bytes += sq_stats->tso_bytes;
+ tx_queue_dropped += sq_stats->dropped;
+ tx_queue_dropped += sq_br->br_drops;
+ tx_defragged += sq_stats->defragged;
+ tx_offload_none += sq_stats->csum_offload_none;
+ }
+ }
+
+ /* update counters */
+ s->tso_packets = tso_packets;
+ s->tso_bytes = tso_bytes;
+ s->tx_queue_dropped = tx_queue_dropped;
+ s->tx_defragged = tx_defragged;
+ s->lro_packets = lro_packets;
+ s->lro_bytes = lro_bytes;
+ s->sw_lro_queued = sw_lro_queued;
+ s->sw_lro_flushed = sw_lro_flushed;
+ s->rx_csum_none = rx_csum_none;
+ s->rx_wqe_err = rx_wqe_err;
+
+ /* HW counters */
+ memset(in, 0, sizeof(in));
+
+ MLX5_SET(query_vport_counter_in, in, opcode,
+ MLX5_CMD_OP_QUERY_VPORT_COUNTER);
+ MLX5_SET(query_vport_counter_in, in, op_mod, 0);
+ MLX5_SET(query_vport_counter_in, in, other_vport, 0);
+
+ memset(out, 0, outlen);
+
+ if (mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen))
+ goto free_out;
+
+#define MLX5_GET_CTR(out, x) \
+ MLX5_GET64(query_vport_counter_out, out, x)
+
+ s->rx_error_packets =
+ MLX5_GET_CTR(out, received_errors.packets);
+ s->rx_error_bytes =
+ MLX5_GET_CTR(out, received_errors.octets);
+ s->tx_error_packets =
+ MLX5_GET_CTR(out, transmit_errors.packets);
+ s->tx_error_bytes =
+ MLX5_GET_CTR(out, transmit_errors.octets);
+
+ s->rx_unicast_packets =
+ MLX5_GET_CTR(out, received_eth_unicast.packets);
+ s->rx_unicast_bytes =
+ MLX5_GET_CTR(out, received_eth_unicast.octets);
+ s->tx_unicast_packets =
+ MLX5_GET_CTR(out, transmitted_eth_unicast.packets);
+ s->tx_unicast_bytes =
+ MLX5_GET_CTR(out, transmitted_eth_unicast.octets);
+
+ s->rx_multicast_packets =
+ MLX5_GET_CTR(out, received_eth_multicast.packets);
+ s->rx_multicast_bytes =
+ MLX5_GET_CTR(out, received_eth_multicast.octets);
+ s->tx_multicast_packets =
+ MLX5_GET_CTR(out, transmitted_eth_multicast.packets);
+ s->tx_multicast_bytes =
+ MLX5_GET_CTR(out, transmitted_eth_multicast.octets);
+
+ s->rx_broadcast_packets =
+ MLX5_GET_CTR(out, received_eth_broadcast.packets);
+ s->rx_broadcast_bytes =
+ MLX5_GET_CTR(out, received_eth_broadcast.octets);
+ s->tx_broadcast_packets =
+ MLX5_GET_CTR(out, transmitted_eth_broadcast.packets);
+ s->tx_broadcast_bytes =
+ MLX5_GET_CTR(out, transmitted_eth_broadcast.octets);
+
+ s->rx_packets =
+ s->rx_unicast_packets +
+ s->rx_multicast_packets +
+ s->rx_broadcast_packets;
+ s->rx_bytes =
+ s->rx_unicast_bytes +
+ s->rx_multicast_bytes +
+ s->rx_broadcast_bytes;
+ s->tx_packets =
+ s->tx_unicast_packets +
+ s->tx_multicast_packets +
+ s->tx_broadcast_packets;
+ s->tx_bytes =
+ s->tx_unicast_bytes +
+ s->tx_multicast_bytes +
+ s->tx_broadcast_bytes;
+
+ /* Update calculated offload counters */
+ s->tx_csum_offload = s->tx_packets - tx_offload_none;
+ s->rx_csum_good = s->rx_packets - s->rx_csum_none;
+
+#if (__FreeBSD_version < 1100000)
+ /* no get_counters interface in fbsd 10 */
+ ifp->if_ipackets = s->rx_packets;
+ ifp->if_ierrors = s->rx_error_packets;
+ ifp->if_opackets = s->tx_packets;
+ ifp->if_oerrors = s->tx_error_packets;
+ ifp->if_snd.ifq_drops = s->tx_queue_dropped;
+ ifp->if_ibytes = s->rx_bytes;
+ ifp->if_obytes = s->tx_bytes;
+#endif
+
+ mlx5_vport_query_out_of_rx_buffer(mdev, priv->counter_set_id,
+ &out_of_rx_buffer);
+
+ /* Update per port counters */
+ mlx5e_update_pport_counters(priv);
+ priv->stats.pport.out_of_rx_buffer = (u64)out_of_rx_buffer;
+free_out:
+ kvfree(out);
+ PRIV_UNLOCK(priv);
+}
+
+static void
+mlx5e_update_stats(void *arg)
+{
+ struct mlx5e_priv *priv = arg;
+
+ schedule_work(&priv->update_stats_work);
+
+ callout_reset(&priv->watchdog, hz, &mlx5e_update_stats, priv);
+}
+
+static void
+mlx5e_async_event_sub(struct mlx5e_priv *priv,
+ enum mlx5_dev_event event)
+{
+ switch (event) {
+ case MLX5_DEV_EVENT_PORT_UP:
+ case MLX5_DEV_EVENT_PORT_DOWN:
+ schedule_work(&priv->update_carrier_work);
+ break;
+
+ default:
+ break;
+ }
+}
+
+static void
+mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv,
+ enum mlx5_dev_event event, unsigned long param)
+{
+ struct mlx5e_priv *priv = vpriv;
+
+ mtx_lock(&priv->async_events_mtx);
+ if (test_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state))
+ mlx5e_async_event_sub(priv, event);
+ mtx_unlock(&priv->async_events_mtx);
+}
+
+static void
+mlx5e_enable_async_events(struct mlx5e_priv *priv)
+{
+ set_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state);
+}
+
+static void
+mlx5e_disable_async_events(struct mlx5e_priv *priv)
+{
+ mtx_lock(&priv->async_events_mtx);
+ clear_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state);
+ mtx_unlock(&priv->async_events_mtx);
+}
+
+static const char *mlx5e_rq_stats_desc[] = {
+ MLX5E_RQ_STATS(MLX5E_STATS_DESC)
+};
+
+static int
+mlx5e_create_rq(struct mlx5e_channel *c,
+ struct mlx5e_rq_param *param,
+ struct mlx5e_rq *rq)
+{
+ struct mlx5e_priv *priv = c->priv;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ char buffer[16];
+ void *rqc = param->rqc;
+ void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq);
+ int wq_sz;
+ int err;
+ int i;
+
+ /* Create DMA descriptor TAG */
+ if ((err = -bus_dma_tag_create(
+ bus_get_dma_tag(mdev->pdev->dev.bsddev),
+ 1, /* any alignment */
+ 0, /* no boundary */
+ BUS_SPACE_MAXADDR, /* lowaddr */
+ BUS_SPACE_MAXADDR, /* highaddr */
+ NULL, NULL, /* filter, filterarg */
+ MJUM16BYTES, /* maxsize */
+ 1, /* nsegments */
+ MJUM16BYTES, /* maxsegsize */
+ 0, /* flags */
+ NULL, NULL, /* lockfunc, lockfuncarg */
+ &rq->dma_tag)))
+ goto done;
+
+ err = mlx5_wq_ll_create(mdev, &param->wq, rqc_wq, &rq->wq,
+ &rq->wq_ctrl);
+ if (err)
+ goto err_free_dma_tag;
+
+ rq->wq.db = &rq->wq.db[MLX5_RCV_DBR];
+
+ if (priv->params.hw_lro_en) {
+ rq->wqe_sz = priv->params.lro_wqe_sz;
+ }
+ else {
+ rq->wqe_sz = MLX5E_SW2MB_MTU(priv->ifp->if_mtu);
+ }
+ if (rq->wqe_sz > MJUM16BYTES) {
+ err = -ENOMEM;
+ goto err_rq_wq_destroy;
+ } else if (rq->wqe_sz > MJUM9BYTES) {
+ rq->wqe_sz = MJUM16BYTES;
+ } else if (rq->wqe_sz > MJUMPAGESIZE) {
+ rq->wqe_sz = MJUM9BYTES;
+ } else if (rq->wqe_sz > MCLBYTES) {
+ rq->wqe_sz = MJUMPAGESIZE;
+ } else {
+ rq->wqe_sz = MCLBYTES;
+ }
+
+ wq_sz = mlx5_wq_ll_get_size(&rq->wq);
+ rq->mbuf = malloc(wq_sz * sizeof(rq->mbuf[0]), M_MLX5EN, M_WAITOK | M_ZERO);
+ if (rq->mbuf == NULL) {
+ err = -ENOMEM;
+ goto err_rq_wq_destroy;
+ }
+
+ for (i = 0; i != wq_sz; i++) {
+ struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, i);
+ uint32_t byte_count = rq->wqe_sz - MLX5E_NET_IP_ALIGN;
+
+ err = -bus_dmamap_create(rq->dma_tag, 0, &rq->mbuf[i].dma_map);
+ if (err != 0) {
+ while (i--)
+ bus_dmamap_destroy(rq->dma_tag, rq->mbuf[i].dma_map);
+ goto err_rq_mbuf_free;
+ }
+ wqe->data.lkey = c->mkey_be;
+ wqe->data.byte_count = cpu_to_be32(byte_count | MLX5_HW_START_PADDING);
+ }
+
+ rq->pdev = c->pdev;
+ rq->ifp = c->ifp;
+ rq->channel = c;
+ rq->ix = c->ix;
+
+ snprintf(buffer, sizeof(buffer), "rxstat%d", c->ix);
+ mlx5e_create_stats(&rq->stats.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
+ buffer, mlx5e_rq_stats_desc, MLX5E_RQ_STATS_NUM,
+ rq->stats.arg);
+
+#ifdef HAVE_TURBO_LRO
+ if (tcp_tlro_init(&rq->lro, c->ifp, MLX5E_BUDGET_MAX) != 0)
+ rq->lro.mbuf = NULL;
+#else
+ if (tcp_lro_init(&rq->lro))
+ rq->lro.lro_cnt = 0;
+ else
+ rq->lro.ifp = c->ifp;
+#endif
+ return (0);
+
+err_rq_mbuf_free:
+ free(rq->mbuf, M_MLX5EN);
+err_rq_wq_destroy:
+ mlx5_wq_destroy(&rq->wq_ctrl);
+err_free_dma_tag:
+ bus_dma_tag_destroy(rq->dma_tag);
+done:
+ return (err);
+}
+
+static void
+mlx5e_destroy_rq(struct mlx5e_rq *rq)
+{
+ int wq_sz;
+ int i;
+
+ /* destroy all sysctl nodes */
+ sysctl_ctx_free(&rq->stats.ctx);
+
+ /* free leftover LRO packets, if any */
+#ifdef HAVE_TURBO_LRO
+ tcp_tlro_free(&rq->lro);
+#else
+ tcp_lro_free(&rq->lro);
+#endif
+ wq_sz = mlx5_wq_ll_get_size(&rq->wq);
+ for (i = 0; i != wq_sz; i++) {
+ if (rq->mbuf[i].mbuf != NULL) {
+ bus_dmamap_unload(rq->dma_tag,
+ rq->mbuf[i].dma_map);
+ m_freem(rq->mbuf[i].mbuf);
+ }
+ bus_dmamap_destroy(rq->dma_tag, rq->mbuf[i].dma_map);
+ }
+ free(rq->mbuf, M_MLX5EN);
+ mlx5_wq_destroy(&rq->wq_ctrl);
+}
+
+static int
+mlx5e_enable_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param)
+{
+ struct mlx5e_channel *c = rq->channel;
+ struct mlx5e_priv *priv = c->priv;
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ void *in;
+ void *rqc;
+ void *wq;
+ int inlen;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(create_rq_in) +
+ sizeof(u64) * rq->wq_ctrl.buf.npages;
+ in = mlx5_vzalloc(inlen);
+ if (in == NULL)
+ return (-ENOMEM);
+
+ rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
+ wq = MLX5_ADDR_OF(rqc, rqc, wq);
+
+ memcpy(rqc, param->rqc, sizeof(param->rqc));
+
+ MLX5_SET(rqc, rqc, cqn, c->rq.cq.mcq.cqn);
+ MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST);
+ MLX5_SET(rqc, rqc, flush_in_error_en, 1);
+ if (priv->counter_set_id >= 0)
+ MLX5_SET(rqc, rqc, counter_set_id, priv->counter_set_id);
+ MLX5_SET(wq, wq, log_wq_pg_sz, rq->wq_ctrl.buf.page_shift -
+ PAGE_SHIFT);
+ MLX5_SET64(wq, wq, dbr_addr, rq->wq_ctrl.db.dma);
+
+ mlx5_fill_page_array(&rq->wq_ctrl.buf,
+ (__be64 *) MLX5_ADDR_OF(wq, wq, pas));
+
+ err = mlx5_core_create_rq(mdev, in, inlen, &rq->rqn);
+
+ kvfree(in);
+
+ return (err);
+}
+
+static int
+mlx5e_modify_rq(struct mlx5e_rq *rq, int curr_state, int next_state)
+{
+ struct mlx5e_channel *c = rq->channel;
+ struct mlx5e_priv *priv = c->priv;
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ void *in;
+ void *rqc;
+ int inlen;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(modify_rq_in);
+ in = mlx5_vzalloc(inlen);
+ if (in == NULL)
+ return (-ENOMEM);
+
+ rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
+
+ MLX5_SET(modify_rq_in, in, rqn, rq->rqn);
+ MLX5_SET(modify_rq_in, in, rq_state, curr_state);
+ MLX5_SET(rqc, rqc, state, next_state);
+
+ err = mlx5_core_modify_rq(mdev, in, inlen);
+
+ kvfree(in);
+
+ return (err);
+}
+
+static void
+mlx5e_disable_rq(struct mlx5e_rq *rq)
+{
+ struct mlx5e_channel *c = rq->channel;
+ struct mlx5e_priv *priv = c->priv;
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ mlx5_core_destroy_rq(mdev, rq->rqn);
+}
+
+static int
+mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq)
+{
+ struct mlx5e_channel *c = rq->channel;
+ struct mlx5e_priv *priv = c->priv;
+ struct mlx5_wq_ll *wq = &rq->wq;
+ int i;
+
+ for (i = 0; i < 1000; i++) {
+ if (wq->cur_sz >= priv->params.min_rx_wqes)
+ return (0);
+
+ msleep(4);
+ }
+ return (-ETIMEDOUT);
+}
+
+static int
+mlx5e_open_rq(struct mlx5e_channel *c,
+ struct mlx5e_rq_param *param,
+ struct mlx5e_rq *rq)
+{
+ int err;
+ int i;
+
+ err = mlx5e_create_rq(c, param, rq);
+ if (err)
+ return (err);
+
+ err = mlx5e_enable_rq(rq, param);
+ if (err)
+ goto err_destroy_rq;
+
+ err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY);
+ if (err)
+ goto err_disable_rq;
+
+ c->rq.enabled = 1;
+
+ /*
+ * Test send queues, which will trigger
+ * "mlx5e_post_rx_wqes()":
+ */
+ for (i = 0; i != c->num_tc; i++)
+ mlx5e_send_nop(&c->sq[i], 1, true);
+ return (0);
+
+err_disable_rq:
+ mlx5e_disable_rq(rq);
+err_destroy_rq:
+ mlx5e_destroy_rq(rq);
+
+ return (err);
+}
+
+static void
+mlx5e_close_rq(struct mlx5e_rq *rq)
+{
+ rq->enabled = 0;
+ mlx5e_modify_rq(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR);
+}
+
+static void
+mlx5e_close_rq_wait(struct mlx5e_rq *rq)
+{
+ /* wait till RQ is empty */
+ while (!mlx5_wq_ll_is_empty(&rq->wq)) {
+ msleep(4);
+ rq->cq.mcq.comp(&rq->cq.mcq);
+ }
+
+ mlx5e_disable_rq(rq);
+ mlx5e_destroy_rq(rq);
+}
+
+static void
+mlx5e_free_sq_db(struct mlx5e_sq *sq)
+{
+ int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
+ int x;
+
+ for (x = 0; x != wq_sz; x++)
+ bus_dmamap_destroy(sq->dma_tag, sq->mbuf[x].dma_map);
+ free(sq->mbuf, M_MLX5EN);
+}
+
+static int
+mlx5e_alloc_sq_db(struct mlx5e_sq *sq)
+{
+ int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
+ int err;
+ int x;
+
+ sq->mbuf = malloc(wq_sz * sizeof(sq->mbuf[0]), M_MLX5EN, M_WAITOK | M_ZERO);
+ if (sq->mbuf == NULL)
+ return (-ENOMEM);
+
+ /* Create DMA descriptor MAPs */
+ for (x = 0; x != wq_sz; x++) {
+ err = -bus_dmamap_create(sq->dma_tag, 0, &sq->mbuf[x].dma_map);
+ if (err != 0) {
+ while (x--)
+ bus_dmamap_destroy(sq->dma_tag, sq->mbuf[x].dma_map);
+ free(sq->mbuf, M_MLX5EN);
+ return (err);
+ }
+ }
+ return (0);
+}
+
+static const char *mlx5e_sq_stats_desc[] = {
+ MLX5E_SQ_STATS(MLX5E_STATS_DESC)
+};
+
+static int
+mlx5e_create_sq(struct mlx5e_channel *c,
+ int tc,
+ struct mlx5e_sq_param *param,
+ struct mlx5e_sq *sq)
+{
+ struct mlx5e_priv *priv = c->priv;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ char buffer[16];
+
+ void *sqc = param->sqc;
+ void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq);
+ int err;
+
+ /* Create DMA descriptor TAG */
+ if ((err = -bus_dma_tag_create(
+ bus_get_dma_tag(mdev->pdev->dev.bsddev),
+ 1, /* any alignment */
+ 0, /* no boundary */
+ BUS_SPACE_MAXADDR, /* lowaddr */
+ BUS_SPACE_MAXADDR, /* highaddr */
+ NULL, NULL, /* filter, filterarg */
+ MLX5E_MAX_TX_PAYLOAD_SIZE, /* maxsize */
+ MLX5E_MAX_TX_MBUF_FRAGS, /* nsegments */
+ MLX5E_MAX_TX_MBUF_SIZE, /* maxsegsize */
+ 0, /* flags */
+ NULL, NULL, /* lockfunc, lockfuncarg */
+ &sq->dma_tag)))
+ goto done;
+
+ err = mlx5_alloc_map_uar(mdev, &sq->uar);
+ if (err)
+ goto err_free_dma_tag;
+
+ err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq,
+ &sq->wq_ctrl);
+ if (err)
+ goto err_unmap_free_uar;
+
+ sq->wq.db = &sq->wq.db[MLX5_SND_DBR];
+ sq->uar_map = sq->uar.map;
+ sq->uar_bf_map = sq->uar.bf_map;
+ sq->bf_buf_size = (1 << MLX5_CAP_GEN(mdev, log_bf_reg_size)) / 2;
+
+ err = mlx5e_alloc_sq_db(sq);
+ if (err)
+ goto err_sq_wq_destroy;
+
+ sq->pdev = c->pdev;
+ sq->mkey_be = c->mkey_be;
+ sq->channel = c;
+ sq->tc = tc;
+
+ sq->br = buf_ring_alloc(MLX5E_SQ_TX_QUEUE_SIZE, M_MLX5EN,
+ M_WAITOK, &sq->lock);
+ if (sq->br == NULL) {
+ if_printf(c->ifp, "%s: Failed allocating sq drbr buffer\n",
+ __func__);
+ err = -ENOMEM;
+ goto err_free_sq_db;
+ }
+
+ sq->sq_tq = taskqueue_create_fast("mlx5e_que", M_WAITOK,
+ taskqueue_thread_enqueue, &sq->sq_tq);
+ if (sq->sq_tq == NULL) {
+ if_printf(c->ifp, "%s: Failed allocating taskqueue\n",
+ __func__);
+ err = -ENOMEM;
+ goto err_free_drbr;
+ }
+ TASK_INIT(&sq->sq_task, 0, mlx5e_tx_que, sq);
+ taskqueue_start_threads(&sq->sq_tq, 1, PI_NET, "%s tx sq",
+ c->ifp->if_xname);
+
+
+ snprintf(buffer, sizeof(buffer), "txstat%dtc%d", c->ix, tc);
+ mlx5e_create_stats(&sq->stats.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
+ buffer, mlx5e_sq_stats_desc, MLX5E_SQ_STATS_NUM,
+ sq->stats.arg);
+
+ return (0);
+
+err_free_drbr:
+ buf_ring_free(sq->br, M_MLX5EN);
+err_free_sq_db:
+ mlx5e_free_sq_db(sq);
+err_sq_wq_destroy:
+ mlx5_wq_destroy(&sq->wq_ctrl);
+
+err_unmap_free_uar:
+ mlx5_unmap_free_uar(mdev, &sq->uar);
+
+err_free_dma_tag:
+ bus_dma_tag_destroy(sq->dma_tag);
+done:
+ return (err);
+}
+
+static void
+mlx5e_destroy_sq(struct mlx5e_sq *sq)
+{
+ struct mlx5e_channel *c = sq->channel;
+ struct mlx5e_priv *priv = c->priv;
+
+ /* destroy all sysctl nodes */
+ sysctl_ctx_free(&sq->stats.ctx);
+
+ mlx5e_free_sq_db(sq);
+ mlx5_wq_destroy(&sq->wq_ctrl);
+ mlx5_unmap_free_uar(priv->mdev, &sq->uar);
+ taskqueue_drain(sq->sq_tq, &sq->sq_task);
+ taskqueue_free(sq->sq_tq);
+ buf_ring_free(sq->br, M_MLX5EN);
+}
+
+static int
+mlx5e_enable_sq(struct mlx5e_sq *sq, struct mlx5e_sq_param *param)
+{
+ struct mlx5e_channel *c = sq->channel;
+ struct mlx5e_priv *priv = c->priv;
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ void *in;
+ void *sqc;
+ void *wq;
+ int inlen;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(create_sq_in) +
+ sizeof(u64) * sq->wq_ctrl.buf.npages;
+ in = mlx5_vzalloc(inlen);
+ if (in == NULL)
+ return (-ENOMEM);
+
+ sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
+ wq = MLX5_ADDR_OF(sqc, sqc, wq);
+
+ memcpy(sqc, param->sqc, sizeof(param->sqc));
+
+ MLX5_SET(sqc, sqc, tis_num_0, priv->tisn[sq->tc]);
+ MLX5_SET(sqc, sqc, cqn, c->sq[sq->tc].cq.mcq.cqn);
+ MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST);
+ MLX5_SET(sqc, sqc, tis_lst_sz, 1);
+ MLX5_SET(sqc, sqc, flush_in_error_en, 1);
+
+ MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
+ MLX5_SET(wq, wq, uar_page, sq->uar.index);
+ MLX5_SET(wq, wq, log_wq_pg_sz, sq->wq_ctrl.buf.page_shift -
+ PAGE_SHIFT);
+ MLX5_SET64(wq, wq, dbr_addr, sq->wq_ctrl.db.dma);
+
+ mlx5_fill_page_array(&sq->wq_ctrl.buf,
+ (__be64 *) MLX5_ADDR_OF(wq, wq, pas));
+
+ err = mlx5_core_create_sq(mdev, in, inlen, &sq->sqn);
+
+ kvfree(in);
+
+ return (err);
+}
+
+static int
+mlx5e_modify_sq(struct mlx5e_sq *sq, int curr_state, int next_state)
+{
+ struct mlx5e_channel *c = sq->channel;
+ struct mlx5e_priv *priv = c->priv;
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ void *in;
+ void *sqc;
+ int inlen;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(modify_sq_in);
+ in = mlx5_vzalloc(inlen);
+ if (in == NULL)
+ return (-ENOMEM);
+
+ sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx);
+
+ MLX5_SET(modify_sq_in, in, sqn, sq->sqn);
+ MLX5_SET(modify_sq_in, in, sq_state, curr_state);
+ MLX5_SET(sqc, sqc, state, next_state);
+
+ err = mlx5_core_modify_sq(mdev, in, inlen);
+
+ kvfree(in);
+
+ return (err);
+}
+
+static void
+mlx5e_disable_sq(struct mlx5e_sq *sq)
+{
+ struct mlx5e_channel *c = sq->channel;
+ struct mlx5e_priv *priv = c->priv;
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ mlx5_core_destroy_sq(mdev, sq->sqn);
+}
+
+static int
+mlx5e_open_sq(struct mlx5e_channel *c,
+ int tc,
+ struct mlx5e_sq_param *param,
+ struct mlx5e_sq *sq)
+{
+ int err;
+
+ err = mlx5e_create_sq(c, tc, param, sq);
+ if (err)
+ return (err);
+
+ err = mlx5e_enable_sq(sq, param);
+ if (err)
+ goto err_destroy_sq;
+
+ err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY);
+ if (err)
+ goto err_disable_sq;
+
+ atomic_store_rel_int(&sq->queue_state, MLX5E_SQ_READY);
+
+ return (0);
+
+err_disable_sq:
+ mlx5e_disable_sq(sq);
+err_destroy_sq:
+ mlx5e_destroy_sq(sq);
+
+ return (err);
+}
+
+static void
+mlx5e_close_sq(struct mlx5e_sq *sq)
+{
+
+ /* ensure hw is notified of all pending wqes */
+ if (mlx5e_sq_has_room_for(sq, 1))
+ mlx5e_send_nop(sq, 1, true);
+
+ mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY, MLX5_SQC_STATE_ERR);
+}
+
+static void
+mlx5e_close_sq_wait(struct mlx5e_sq *sq)
+{
+ /* wait till SQ is empty */
+ while (sq->cc != sq->pc) {
+ msleep(4);
+ sq->cq.mcq.comp(&sq->cq.mcq);
+ }
+
+ mlx5e_disable_sq(sq);
+ mlx5e_destroy_sq(sq);
+}
+
+static int
+mlx5e_create_cq(struct mlx5e_channel *c,
+ struct mlx5e_cq_param *param,
+ struct mlx5e_cq *cq,
+ mlx5e_cq_comp_t *comp)
+{
+ struct mlx5e_priv *priv = c->priv;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5_core_cq *mcq = &cq->mcq;
+ int eqn_not_used;
+ int irqn;
+ int err;
+ u32 i;
+
+ param->wq.buf_numa_node = 0;
+ param->wq.db_numa_node = 0;
+ param->eq_ix = c->ix;
+
+ err = mlx5_cqwq_create(mdev, &param->wq, param->cqc, &cq->wq,
+ &cq->wq_ctrl);
+ if (err)
+ return (err);
+
+ mlx5_vector2eqn(mdev, param->eq_ix, &eqn_not_used, &irqn);
+
+ mcq->cqe_sz = 64;
+ mcq->set_ci_db = cq->wq_ctrl.db.db;
+ mcq->arm_db = cq->wq_ctrl.db.db + 1;
+ *mcq->set_ci_db = 0;
+ *mcq->arm_db = 0;
+ mcq->vector = param->eq_ix;
+ mcq->comp = comp;
+ mcq->event = mlx5e_cq_error_event;
+ mcq->irqn = irqn;
+ mcq->uar = &priv->cq_uar;
+
+ for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) {
+ struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, i);
+
+ cqe->op_own = 0xf1;
+ }
+
+ cq->channel = c;
+
+ return (0);
+}
+
+static void
+mlx5e_destroy_cq(struct mlx5e_cq *cq)
+{
+ mlx5_wq_destroy(&cq->wq_ctrl);
+}
+
+static int
+mlx5e_enable_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param,
+ u8 moderation_mode)
+{
+ struct mlx5e_channel *c = cq->channel;
+ struct mlx5e_priv *priv = c->priv;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5_core_cq *mcq = &cq->mcq;
+ void *in;
+ void *cqc;
+ int inlen;
+ int irqn_not_used;
+ int eqn;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
+ sizeof(u64) * cq->wq_ctrl.buf.npages;
+ in = mlx5_vzalloc(inlen);
+ if (in == NULL)
+ return (-ENOMEM);
+
+ cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
+
+ memcpy(cqc, param->cqc, sizeof(param->cqc));
+
+ mlx5_fill_page_array(&cq->wq_ctrl.buf,
+ (__be64 *) MLX5_ADDR_OF(create_cq_in, in, pas));
+
+ mlx5_vector2eqn(mdev, param->eq_ix, &eqn, &irqn_not_used);
+
+ MLX5_SET(cqc, cqc, cq_period_mode, moderation_mode);
+ MLX5_SET(cqc, cqc, c_eqn, eqn);
+ MLX5_SET(cqc, cqc, uar_page, mcq->uar->index);
+ MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift -
+ PAGE_SHIFT);
+ MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma);
+
+ err = mlx5_core_create_cq(mdev, mcq, in, inlen);
+
+ kvfree(in);
+
+ if (err)
+ return (err);
+
+ mlx5e_cq_arm(cq);
+
+ return (0);
+}
+
+static void
+mlx5e_disable_cq(struct mlx5e_cq *cq)
+{
+ struct mlx5e_channel *c = cq->channel;
+ struct mlx5e_priv *priv = c->priv;
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ mlx5_core_destroy_cq(mdev, &cq->mcq);
+}
+
+static int
+mlx5e_open_cq(struct mlx5e_channel *c,
+ struct mlx5e_cq_param *param,
+ struct mlx5e_cq *cq,
+ mlx5e_cq_comp_t *comp,
+ u8 moderation_mode)
+{
+ int err;
+
+ err = mlx5e_create_cq(c, param, cq, comp);
+ if (err)
+ return (err);
+
+ err = mlx5e_enable_cq(cq, param, moderation_mode);
+ if (err)
+ goto err_destroy_cq;
+
+ return (0);
+
+err_destroy_cq:
+ mlx5e_destroy_cq(cq);
+
+ return (err);
+}
+
+static void
+mlx5e_close_cq(struct mlx5e_cq *cq)
+{
+ mlx5e_disable_cq(cq);
+ mlx5e_destroy_cq(cq);
+}
+
+static int
+mlx5e_open_tx_cqs(struct mlx5e_channel *c,
+ struct mlx5e_channel_param *cparam)
+{
+ int err;
+ int tc;
+
+ for (tc = 0; tc < c->num_tc; tc++) {
+ /* open completion queue */
+ err = mlx5e_open_cq(c, &cparam->tx_cq, &c->sq[tc].cq,
+ &mlx5e_tx_cq_comp, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
+ if (err)
+ goto err_close_tx_cqs;
+ }
+ return (0);
+
+err_close_tx_cqs:
+ for (tc--; tc >= 0; tc--)
+ mlx5e_close_cq(&c->sq[tc].cq);
+
+ return (err);
+}
+
+static void
+mlx5e_close_tx_cqs(struct mlx5e_channel *c)
+{
+ int tc;
+
+ for (tc = 0; tc < c->num_tc; tc++)
+ mlx5e_close_cq(&c->sq[tc].cq);
+}
+
+static int
+mlx5e_open_sqs(struct mlx5e_channel *c,
+ struct mlx5e_channel_param *cparam)
+{
+ int err;
+ int tc;
+
+ for (tc = 0; tc < c->num_tc; tc++) {
+ err = mlx5e_open_sq(c, tc, &cparam->sq, &c->sq[tc]);
+ if (err)
+ goto err_close_sqs;
+ }
+
+ return (0);
+
+err_close_sqs:
+ for (tc--; tc >= 0; tc--) {
+ mlx5e_close_sq(&c->sq[tc]);
+ mlx5e_close_sq_wait(&c->sq[tc]);
+ }
+
+ return (err);
+}
+
+static void
+mlx5e_close_sqs(struct mlx5e_channel *c)
+{
+ int tc;
+
+ for (tc = 0; tc < c->num_tc; tc++)
+ mlx5e_close_sq(&c->sq[tc]);
+}
+
+static void
+mlx5e_close_sqs_wait(struct mlx5e_channel *c)
+{
+ int tc;
+
+ for (tc = 0; tc < c->num_tc; tc++)
+ mlx5e_close_sq_wait(&c->sq[tc]);
+}
+
+static void
+mlx5e_chan_mtx_init(struct mlx5e_channel *c)
+{
+ int tc;
+
+ mtx_init(&c->rq.mtx, "mlx5rx", MTX_NETWORK_LOCK, MTX_DEF);
+
+ for (tc = 0; tc < c->num_tc; tc++) {
+ mtx_init(&c->sq[tc].lock, "mlx5tx", MTX_NETWORK_LOCK, MTX_DEF);
+ mtx_init(&c->sq[tc].comp_lock, "mlx5comp", MTX_NETWORK_LOCK,
+ MTX_DEF);
+ }
+}
+
+static void
+mlx5e_chan_mtx_destroy(struct mlx5e_channel *c)
+{
+ int tc;
+
+ mtx_destroy(&c->rq.mtx);
+
+ for (tc = 0; tc < c->num_tc; tc++) {
+ mtx_destroy(&c->sq[tc].lock);
+ mtx_destroy(&c->sq[tc].comp_lock);
+ }
+}
+
+static int
+mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
+ struct mlx5e_channel_param *cparam,
+ struct mlx5e_channel * volatile *cp)
+{
+ struct mlx5e_channel *c;
+ u8 rx_moderation_mode;
+ int err;
+
+ c = malloc(sizeof(*c), M_MLX5EN, M_WAITOK | M_ZERO);
+ if (c == NULL)
+ return (-ENOMEM);
+
+ c->priv = priv;
+ c->ix = ix;
+ c->cpu = 0;
+ c->pdev = &priv->mdev->pdev->dev;
+ c->ifp = priv->ifp;
+ c->mkey_be = cpu_to_be32(priv->mr.key);
+ c->num_tc = priv->num_tc;
+
+ /* init mutexes */
+ mlx5e_chan_mtx_init(c);
+
+ /* open transmit completion queue */
+ err = mlx5e_open_tx_cqs(c, cparam);
+ if (err)
+ goto err_free;
+
+ switch (priv->params.rx_cq_moderation_mode) {
+ case 0:
+ rx_moderation_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
+ break;
+ default:
+ if (MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe))
+ rx_moderation_mode = MLX5_CQ_PERIOD_MODE_START_FROM_CQE;
+ else
+ rx_moderation_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
+ break;
+ }
+
+ /* open receive completion queue */
+ err = mlx5e_open_cq(c, &cparam->rx_cq, &c->rq.cq,
+ &mlx5e_rx_cq_comp, rx_moderation_mode);
+ if (err)
+ goto err_close_tx_cqs;
+
+ err = mlx5e_open_sqs(c, cparam);
+ if (err)
+ goto err_close_rx_cq;
+
+ err = mlx5e_open_rq(c, &cparam->rq, &c->rq);
+ if (err)
+ goto err_close_sqs;
+
+ /* store channel pointer */
+ *cp = c;
+
+ /* poll receive queue initially */
+ c->rq.cq.mcq.comp(&c->rq.cq.mcq);
+
+ return (0);
+
+err_close_sqs:
+ mlx5e_close_sqs(c);
+ mlx5e_close_sqs_wait(c);
+
+err_close_rx_cq:
+ mlx5e_close_cq(&c->rq.cq);
+
+err_close_tx_cqs:
+ mlx5e_close_tx_cqs(c);
+
+err_free:
+ /* destroy mutexes */
+ mlx5e_chan_mtx_destroy(c);
+ free(c, M_MLX5EN);
+ return (err);
+}
+
+static void
+mlx5e_close_channel(struct mlx5e_channel * volatile *pp)
+{
+ struct mlx5e_channel *c = *pp;
+
+ /* check if channel is already closed */
+ if (c == NULL)
+ return;
+ mlx5e_close_rq(&c->rq);
+ mlx5e_close_sqs(c);
+}
+
+static void
+mlx5e_close_channel_wait(struct mlx5e_channel * volatile *pp)
+{
+ struct mlx5e_channel *c = *pp;
+
+ /* check if channel is already closed */
+ if (c == NULL)
+ return;
+ /* ensure channel pointer is no longer used */
+ *pp = NULL;
+
+ mlx5e_close_rq_wait(&c->rq);
+ mlx5e_close_sqs_wait(c);
+ mlx5e_close_cq(&c->rq.cq);
+ mlx5e_close_tx_cqs(c);
+ /* destroy mutexes */
+ mlx5e_chan_mtx_destroy(c);
+ free(c, M_MLX5EN);
+}
+
+static void
+mlx5e_build_rq_param(struct mlx5e_priv *priv,
+ struct mlx5e_rq_param *param)
+{
+ void *rqc = param->rqc;
+ void *wq = MLX5_ADDR_OF(rqc, rqc, wq);
+
+ MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST);
+ MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN);
+ MLX5_SET(wq, wq, log_wq_stride, ilog2(sizeof(struct mlx5e_rx_wqe)));
+ MLX5_SET(wq, wq, log_wq_sz, priv->params.log_rq_size);
+ MLX5_SET(wq, wq, pd, priv->pdn);
+
+ param->wq.buf_numa_node = 0;
+ param->wq.db_numa_node = 0;
+ param->wq.linear = 1;
+}
+
+static void
+mlx5e_build_sq_param(struct mlx5e_priv *priv,
+ struct mlx5e_sq_param *param)
+{
+ void *sqc = param->sqc;
+ void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
+
+ MLX5_SET(wq, wq, log_wq_sz, priv->params.log_sq_size);
+ MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB));
+ MLX5_SET(wq, wq, pd, priv->pdn);
+
+ param->wq.buf_numa_node = 0;
+ param->wq.db_numa_node = 0;
+ param->wq.linear = 1;
+}
+
+static void
+mlx5e_build_common_cq_param(struct mlx5e_priv *priv,
+ struct mlx5e_cq_param *param)
+{
+ void *cqc = param->cqc;
+
+ MLX5_SET(cqc, cqc, uar_page, priv->cq_uar.index);
+}
+
+static void
+mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
+ struct mlx5e_cq_param *param)
+{
+ void *cqc = param->cqc;
+
+ MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_rq_size);
+ MLX5_SET(cqc, cqc, cq_period, priv->params.rx_cq_moderation_usec);
+ MLX5_SET(cqc, cqc, cq_max_count, priv->params.rx_cq_moderation_pkts);
+
+ mlx5e_build_common_cq_param(priv, param);
+}
+
+static void
+mlx5e_build_tx_cq_param(struct mlx5e_priv *priv,
+ struct mlx5e_cq_param *param)
+{
+ void *cqc = param->cqc;
+
+ MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_sq_size);
+ MLX5_SET(cqc, cqc, cq_period, priv->params.tx_cq_moderation_usec);
+ MLX5_SET(cqc, cqc, cq_max_count, priv->params.tx_cq_moderation_pkts);
+
+ mlx5e_build_common_cq_param(priv, param);
+}
+
+static void
+mlx5e_build_channel_param(struct mlx5e_priv *priv,
+ struct mlx5e_channel_param *cparam)
+{
+ memset(cparam, 0, sizeof(*cparam));
+
+ mlx5e_build_rq_param(priv, &cparam->rq);
+ mlx5e_build_sq_param(priv, &cparam->sq);
+ mlx5e_build_rx_cq_param(priv, &cparam->rx_cq);
+ mlx5e_build_tx_cq_param(priv, &cparam->tx_cq);
+}
+
+static int
+mlx5e_open_channels(struct mlx5e_priv *priv)
+{
+ struct mlx5e_channel_param cparam;
+ void *ptr;
+ int err;
+ int i;
+ int j;
+
+ priv->channel = malloc(priv->params.num_channels *
+ sizeof(struct mlx5e_channel *), M_MLX5EN, M_WAITOK | M_ZERO);
+ if (priv->channel == NULL)
+ return (-ENOMEM);
+
+ mlx5e_build_channel_param(priv, &cparam);
+ for (i = 0; i < priv->params.num_channels; i++) {
+ err = mlx5e_open_channel(priv, i, &cparam, &priv->channel[i]);
+ if (err)
+ goto err_close_channels;
+ }
+
+ for (j = 0; j < priv->params.num_channels; j++) {
+ err = mlx5e_wait_for_min_rx_wqes(&priv->channel[j]->rq);
+ if (err)
+ goto err_close_channels;
+ }
+
+ return (0);
+
+err_close_channels:
+ for (i--; i >= 0; i--) {
+ mlx5e_close_channel(&priv->channel[i]);
+ mlx5e_close_channel_wait(&priv->channel[i]);
+ }
+
+ /* remove "volatile" attribute from "channel" pointer */
+ ptr = __DECONST(void *, priv->channel);
+ priv->channel = NULL;
+
+ free(ptr, M_MLX5EN);
+
+ return (err);
+}
+
+static void
+mlx5e_close_channels(struct mlx5e_priv *priv)
+{
+ void *ptr;
+ int i;
+
+ if (priv->channel == NULL)
+ return;
+
+ for (i = 0; i < priv->params.num_channels; i++)
+ mlx5e_close_channel(&priv->channel[i]);
+ for (i = 0; i < priv->params.num_channels; i++)
+ mlx5e_close_channel_wait(&priv->channel[i]);
+
+ /* remove "volatile" attribute from "channel" pointer */
+ ptr = __DECONST(void *, priv->channel);
+ priv->channel = NULL;
+
+ free(ptr, M_MLX5EN);
+}
+
+static int
+mlx5e_open_tis(struct mlx5e_priv *priv, int tc)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u32 in[MLX5_ST_SZ_DW(create_tis_in)];
+ void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
+
+ memset(in, 0, sizeof(in));
+
+ MLX5_SET(tisc, tisc, prio, tc);
+ MLX5_SET(tisc, tisc, transport_domain, priv->tdn);
+
+ return (mlx5_core_create_tis(mdev, in, sizeof(in), &priv->tisn[tc]));
+}
+
+static void
+mlx5e_close_tis(struct mlx5e_priv *priv, int tc)
+{
+ mlx5_core_destroy_tis(priv->mdev, priv->tisn[tc]);
+}
+
+static int
+mlx5e_open_tises(struct mlx5e_priv *priv)
+{
+ int num_tc = priv->num_tc;
+ int err;
+ int tc;
+
+ for (tc = 0; tc < num_tc; tc++) {
+ err = mlx5e_open_tis(priv, tc);
+ if (err)
+ goto err_close_tises;
+ }
+
+ return (0);
+
+err_close_tises:
+ for (tc--; tc >= 0; tc--)
+ mlx5e_close_tis(priv, tc);
+
+ return (err);
+}
+
+static void
+mlx5e_close_tises(struct mlx5e_priv *priv)
+{
+ int num_tc = priv->num_tc;
+ int tc;
+
+ for (tc = 0; tc < num_tc; tc++)
+ mlx5e_close_tis(priv, tc);
+}
+
+static int
+mlx5e_open_rqt(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u32 *in;
+ u32 out[MLX5_ST_SZ_DW(create_rqt_out)];
+ void *rqtc;
+ int inlen;
+ int err;
+ int sz;
+ int i;
+
+ sz = 1 << priv->params.rx_hash_log_tbl_sz;
+
+ inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz;
+ in = mlx5_vzalloc(inlen);
+ if (in == NULL)
+ return (-ENOMEM);
+ rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
+
+ MLX5_SET(rqtc, rqtc, rqt_actual_size, sz);
+ MLX5_SET(rqtc, rqtc, rqt_max_size, sz);
+
+ for (i = 0; i < sz; i++) {
+ int ix = i % priv->params.num_channels;
+
+ MLX5_SET(rqtc, rqtc, rq_num[i], priv->channel[ix]->rq.rqn);
+ }
+
+ MLX5_SET(create_rqt_in, in, opcode, MLX5_CMD_OP_CREATE_RQT);
+
+ memset(out, 0, sizeof(out));
+ err = mlx5_cmd_exec_check_status(mdev, in, inlen, out, sizeof(out));
+ if (!err)
+ priv->rqtn = MLX5_GET(create_rqt_out, out, rqtn);
+
+ kvfree(in);
+
+ return (err);
+}
+
+static void
+mlx5e_close_rqt(struct mlx5e_priv *priv)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)];
+ u32 out[MLX5_ST_SZ_DW(destroy_rqt_out)];
+
+ memset(in, 0, sizeof(in));
+
+ MLX5_SET(destroy_rqt_in, in, opcode, MLX5_CMD_OP_DESTROY_RQT);
+ MLX5_SET(destroy_rqt_in, in, rqtn, priv->rqtn);
+
+ mlx5_cmd_exec_check_status(priv->mdev, in, sizeof(in), out,
+ sizeof(out));
+}
+
+static void
+mlx5e_build_tir_ctx(struct mlx5e_priv *priv, u32 * tirc, int tt)
+{
+ void *hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
+ __be32 *hkey;
+
+ MLX5_SET(tirc, tirc, transport_domain, priv->tdn);
+
+#define ROUGH_MAX_L2_L3_HDR_SZ 256
+
+#define MLX5_HASH_IP (MLX5_HASH_FIELD_SEL_SRC_IP |\
+ MLX5_HASH_FIELD_SEL_DST_IP)
+
+#define MLX5_HASH_ALL (MLX5_HASH_FIELD_SEL_SRC_IP |\
+ MLX5_HASH_FIELD_SEL_DST_IP |\
+ MLX5_HASH_FIELD_SEL_L4_SPORT |\
+ MLX5_HASH_FIELD_SEL_L4_DPORT)
+
+#define MLX5_HASH_IP_IPSEC_SPI (MLX5_HASH_FIELD_SEL_SRC_IP |\
+ MLX5_HASH_FIELD_SEL_DST_IP |\
+ MLX5_HASH_FIELD_SEL_IPSEC_SPI)
+
+ if (priv->params.hw_lro_en) {
+ MLX5_SET(tirc, tirc, lro_enable_mask,
+ MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO |
+ MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO);
+ MLX5_SET(tirc, tirc, lro_max_msg_sz,
+ (priv->params.lro_wqe_sz -
+ ROUGH_MAX_L2_L3_HDR_SZ) >> 8);
+ /* TODO: add the option to choose timer value dynamically */
+ MLX5_SET(tirc, tirc, lro_timeout_period_usecs,
+ MLX5_CAP_ETH(priv->mdev,
+ lro_timer_supported_periods[2]));
+ }
+
+
+ switch (tt) {
+ case MLX5E_TT_ANY:
+ MLX5_SET(tirc, tirc, disp_type,
+ MLX5_TIRC_DISP_TYPE_DIRECT);
+ MLX5_SET(tirc, tirc, inline_rqn,
+ priv->channel[0]->rq.rqn);
+ break;
+ default:
+ MLX5_SET(tirc, tirc, disp_type,
+ MLX5_TIRC_DISP_TYPE_INDIRECT);
+ MLX5_SET(tirc, tirc, indirect_table,
+ priv->rqtn);
+ MLX5_SET(tirc, tirc, rx_hash_fn,
+ MLX5_TIRC_RX_HASH_FN_HASH_TOEPLITZ);
+ MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
+ hkey = (__be32 *) MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
+ hkey[0] = cpu_to_be32(0xD181C62C);
+ hkey[1] = cpu_to_be32(0xF7F4DB5B);
+ hkey[2] = cpu_to_be32(0x1983A2FC);
+ hkey[3] = cpu_to_be32(0x943E1ADB);
+ hkey[4] = cpu_to_be32(0xD9389E6B);
+ hkey[5] = cpu_to_be32(0xD1039C2C);
+ hkey[6] = cpu_to_be32(0xA74499AD);
+ hkey[7] = cpu_to_be32(0x593D56D9);
+ hkey[8] = cpu_to_be32(0xF3253C06);
+ hkey[9] = cpu_to_be32(0x2ADC1FFC);
+ break;
+ }
+
+ switch (tt) {
+ case MLX5E_TT_IPV4_TCP:
+ MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
+ MLX5_L3_PROT_TYPE_IPV4);
+ MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
+ MLX5_L4_PROT_TYPE_TCP);
+ MLX5_SET(rx_hash_field_select, hfso, selected_fields,
+ MLX5_HASH_ALL);
+ break;
+
+ case MLX5E_TT_IPV6_TCP:
+ MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
+ MLX5_L3_PROT_TYPE_IPV6);
+ MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
+ MLX5_L4_PROT_TYPE_TCP);
+ MLX5_SET(rx_hash_field_select, hfso, selected_fields,
+ MLX5_HASH_ALL);
+ break;
+
+ case MLX5E_TT_IPV4_UDP:
+ MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
+ MLX5_L3_PROT_TYPE_IPV4);
+ MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
+ MLX5_L4_PROT_TYPE_UDP);
+ MLX5_SET(rx_hash_field_select, hfso, selected_fields,
+ MLX5_HASH_ALL);
+ break;
+
+ case MLX5E_TT_IPV6_UDP:
+ MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
+ MLX5_L3_PROT_TYPE_IPV6);
+ MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
+ MLX5_L4_PROT_TYPE_UDP);
+ MLX5_SET(rx_hash_field_select, hfso, selected_fields,
+ MLX5_HASH_ALL);
+ break;
+
+ case MLX5E_TT_IPV4_IPSEC_AH:
+ MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
+ MLX5_L3_PROT_TYPE_IPV4);
+ MLX5_SET(rx_hash_field_select, hfso, selected_fields,
+ MLX5_HASH_IP_IPSEC_SPI);
+ break;
+
+ case MLX5E_TT_IPV6_IPSEC_AH:
+ MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
+ MLX5_L3_PROT_TYPE_IPV6);
+ MLX5_SET(rx_hash_field_select, hfso, selected_fields,
+ MLX5_HASH_IP_IPSEC_SPI);
+ break;
+
+ case MLX5E_TT_IPV4_IPSEC_ESP:
+ MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
+ MLX5_L3_PROT_TYPE_IPV4);
+ MLX5_SET(rx_hash_field_select, hfso, selected_fields,
+ MLX5_HASH_IP_IPSEC_SPI);
+ break;
+
+ case MLX5E_TT_IPV6_IPSEC_ESP:
+ MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
+ MLX5_L3_PROT_TYPE_IPV6);
+ MLX5_SET(rx_hash_field_select, hfso, selected_fields,
+ MLX5_HASH_IP_IPSEC_SPI);
+ break;
+
+ case MLX5E_TT_IPV4:
+ MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
+ MLX5_L3_PROT_TYPE_IPV4);
+ MLX5_SET(rx_hash_field_select, hfso, selected_fields,
+ MLX5_HASH_IP);
+ break;
+
+ case MLX5E_TT_IPV6:
+ MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
+ MLX5_L3_PROT_TYPE_IPV6);
+ MLX5_SET(rx_hash_field_select, hfso, selected_fields,
+ MLX5_HASH_IP);
+ break;
+
+ default:
+ break;
+ }
+}
+
+static int
+mlx5e_open_tir(struct mlx5e_priv *priv, int tt)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u32 *in;
+ void *tirc;
+ int inlen;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(create_tir_in);
+ in = mlx5_vzalloc(inlen);
+ if (in == NULL)
+ return (-ENOMEM);
+ tirc = MLX5_ADDR_OF(create_tir_in, in, tir_context);
+
+ mlx5e_build_tir_ctx(priv, tirc, tt);
+
+ err = mlx5_core_create_tir(mdev, in, inlen, &priv->tirn[tt]);
+
+ kvfree(in);
+
+ return (err);
+}
+
+static void
+mlx5e_close_tir(struct mlx5e_priv *priv, int tt)
+{
+ mlx5_core_destroy_tir(priv->mdev, priv->tirn[tt]);
+}
+
+static int
+mlx5e_open_tirs(struct mlx5e_priv *priv)
+{
+ int err;
+ int i;
+
+ for (i = 0; i < MLX5E_NUM_TT; i++) {
+ err = mlx5e_open_tir(priv, i);
+ if (err)
+ goto err_close_tirs;
+ }
+
+ return (0);
+
+err_close_tirs:
+ for (i--; i >= 0; i--)
+ mlx5e_close_tir(priv, i);
+
+ return (err);
+}
+
+static void
+mlx5e_close_tirs(struct mlx5e_priv *priv)
+{
+ int i;
+
+ for (i = 0; i < MLX5E_NUM_TT; i++)
+ mlx5e_close_tir(priv, i);
+}
+
+/*
+ * SW MTU does not include headers,
+ * HW MTU includes all headers and checksums.
+ */
+static int
+mlx5e_set_dev_port_mtu(struct ifnet *ifp, int sw_mtu)
+{
+ struct mlx5e_priv *priv = ifp->if_softc;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ int hw_mtu;
+ int min_mtu;
+ int err;
+
+ /*
+ * Trying to set MTU to zero, in order
+ * to find out the FW's minimal MTU
+ */
+ err = mlx5_set_port_mtu(mdev, 0);
+ if (err)
+ return (err);
+ err = mlx5_query_port_oper_mtu(mdev, &min_mtu);
+ if (err) {
+ if_printf(ifp, "Query port minimal MTU failed\n");
+ return (err);
+ }
+
+ if (sw_mtu < MLX5E_HW2SW_MTU(min_mtu)) {
+ ifp->if_mtu = sw_mtu;
+ return (0);
+ }
+
+ err = mlx5_set_port_mtu(mdev, MLX5E_SW2HW_MTU(sw_mtu));
+ if (err)
+ return (err);
+
+ err = mlx5_query_port_oper_mtu(mdev, &hw_mtu);
+ if (!err) {
+ ifp->if_mtu = MLX5E_HW2SW_MTU(hw_mtu);
+
+ if (ifp->if_mtu != sw_mtu) {
+ if_printf(ifp, "Port MTU %d is different than "
+ "ifp mtu %d\n", sw_mtu, (int)ifp->if_mtu);
+ }
+ } else {
+ if_printf(ifp, "Query port MTU, after setting new "
+ "MTU value, failed\n");
+ ifp->if_mtu = sw_mtu;
+ }
+ return (0);
+}
+
+int
+mlx5e_open_locked(struct ifnet *ifp)
+{
+ struct mlx5e_priv *priv = ifp->if_softc;
+ int err;
+
+ /* check if already opened */
+ if (test_bit(MLX5E_STATE_OPENED, &priv->state) != 0)
+ return (0);
+
+ err = mlx5e_open_tises(priv);
+ if (err) {
+ if_printf(ifp, "%s: mlx5e_open_tises failed, %d\n",
+ __func__, err);
+ return (err);
+ }
+ err = mlx5_vport_alloc_q_counter(priv->mdev, &priv->counter_set_id);
+ if (err) {
+ if_printf(priv->ifp,
+ "%s: mlx5_vport_alloc_q_counter failed: %d\n",
+ __func__, err);
+ goto err_close_tises;
+ }
+ err = mlx5e_open_channels(priv);
+ if (err) {
+ if_printf(ifp, "%s: mlx5e_open_channels failed, %d\n",
+ __func__, err);
+ goto err_dalloc_q_counter;
+ }
+ err = mlx5e_open_rqt(priv);
+ if (err) {
+ if_printf(ifp, "%s: mlx5e_open_rqt failed, %d\n",
+ __func__, err);
+ goto err_close_channels;
+ }
+ err = mlx5e_open_tirs(priv);
+ if (err) {
+ if_printf(ifp, "%s: mlx5e_open_tir failed, %d\n",
+ __func__, err);
+ goto err_close_rqls;
+ }
+ err = mlx5e_open_flow_table(priv);
+ if (err) {
+ if_printf(ifp, "%s: mlx5e_open_flow_table failed, %d\n",
+ __func__, err);
+ goto err_close_tirs;
+ }
+ err = mlx5e_add_all_vlan_rules(priv);
+ if (err) {
+ if_printf(ifp, "%s: mlx5e_add_all_vlan_rules failed, %d\n",
+ __func__, err);
+ goto err_close_flow_table;
+ }
+ set_bit(MLX5E_STATE_OPENED, &priv->state);
+
+ mlx5e_update_carrier(priv);
+ mlx5e_set_rx_mode_core(priv);
+
+ return (0);
+
+err_close_flow_table:
+ mlx5e_close_flow_table(priv);
+
+err_close_tirs:
+ mlx5e_close_tirs(priv);
+
+err_close_rqls:
+ mlx5e_close_rqt(priv);
+
+err_close_channels:
+ mlx5e_close_channels(priv);
+
+err_dalloc_q_counter:
+ mlx5_vport_dealloc_q_counter(priv->mdev, priv->counter_set_id);
+
+err_close_tises:
+ mlx5e_close_tises(priv);
+
+ return (err);
+}
+
+static void
+mlx5e_open(void *arg)
+{
+ struct mlx5e_priv *priv = arg;
+
+ PRIV_LOCK(priv);
+ if (mlx5_set_port_status(priv->mdev, MLX5_PORT_UP))
+ if_printf(priv->ifp,
+ "%s: Setting port status to up failed\n",
+ __func__);
+
+ mlx5e_open_locked(priv->ifp);
+ priv->ifp->if_drv_flags |= IFF_DRV_RUNNING;
+ PRIV_UNLOCK(priv);
+}
+
+int
+mlx5e_close_locked(struct ifnet *ifp)
+{
+ struct mlx5e_priv *priv = ifp->if_softc;
+
+ /* check if already closed */
+ if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
+ return (0);
+
+ clear_bit(MLX5E_STATE_OPENED, &priv->state);
+
+ mlx5e_set_rx_mode_core(priv);
+ mlx5e_del_all_vlan_rules(priv);
+ if_link_state_change(priv->ifp, LINK_STATE_DOWN);
+ mlx5e_close_flow_table(priv);
+ mlx5e_close_tirs(priv);
+ mlx5e_close_rqt(priv);
+ mlx5e_close_channels(priv);
+ mlx5_vport_dealloc_q_counter(priv->mdev, priv->counter_set_id);
+ mlx5e_close_tises(priv);
+
+ return (0);
+}
+
+#if (__FreeBSD_version >= 1100000)
+static uint64_t
+mlx5e_get_counter(struct ifnet *ifp, ift_counter cnt)
+{
+ struct mlx5e_priv *priv = ifp->if_softc;
+ u64 retval;
+
+ /* PRIV_LOCK(priv); XXX not allowed */
+ switch (cnt) {
+ case IFCOUNTER_IPACKETS:
+ retval = priv->stats.vport.rx_packets;
+ break;
+ case IFCOUNTER_IERRORS:
+ retval = priv->stats.vport.rx_error_packets;
+ break;
+ case IFCOUNTER_OPACKETS:
+ retval = priv->stats.vport.tx_packets;
+ break;
+ case IFCOUNTER_OERRORS:
+ retval = priv->stats.vport.tx_error_packets;
+ break;
+ case IFCOUNTER_IBYTES:
+ retval = priv->stats.vport.rx_bytes;
+ break;
+ case IFCOUNTER_OBYTES:
+ retval = priv->stats.vport.tx_bytes;
+ break;
+ case IFCOUNTER_IMCASTS:
+ retval = priv->stats.vport.rx_multicast_packets;
+ break;
+ case IFCOUNTER_OMCASTS:
+ retval = priv->stats.vport.tx_multicast_packets;
+ break;
+ case IFCOUNTER_OQDROPS:
+ retval = priv->stats.vport.tx_queue_dropped;
+ break;
+ default:
+ retval = if_get_counter_default(ifp, cnt);
+ break;
+ }
+ /* PRIV_UNLOCK(priv); XXX not allowed */
+ return (retval);
+}
+#endif
+
+static void
+mlx5e_set_rx_mode(struct ifnet *ifp)
+{
+ struct mlx5e_priv *priv = ifp->if_softc;
+
+ schedule_work(&priv->set_rx_mode_work);
+}
+
+static int
+mlx5e_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
+{
+ struct mlx5e_priv *priv;
+ struct ifreq *ifr;
+ struct ifi2creq i2c;
+ int error = 0;
+ int mask = 0;
+ int size_read = 0;
+ int module_num;
+ int max_mtu;
+
+ priv = ifp->if_softc;
+
+ /* check if detaching */
+ if (priv == NULL || priv->gone != 0)
+ return (ENXIO);
+
+ switch (command) {
+ case SIOCSIFMTU:
+ ifr = (struct ifreq *)data;
+
+ PRIV_LOCK(priv);
+ mlx5_query_port_max_mtu(priv->mdev, &max_mtu);
+
+ if (ifr->ifr_mtu >= MLX5E_MTU_MIN &&
+ ifr->ifr_mtu <= MIN(MLX5E_MTU_MAX, max_mtu)) {
+ int was_opened;
+
+ was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
+ if (was_opened)
+ mlx5e_close_locked(ifp);
+
+ /* set new MTU */
+ mlx5e_set_dev_port_mtu(ifp, ifr->ifr_mtu);
+
+ if (was_opened)
+ mlx5e_open_locked(ifp);
+ } else {
+ error = EINVAL;
+ if_printf(ifp, "Invalid MTU value. Min val: %d, Max val: %d\n",
+ MLX5E_MTU_MIN, MIN(MLX5E_MTU_MAX, max_mtu));
+ }
+ PRIV_UNLOCK(priv);
+ break;
+ case SIOCSIFFLAGS:
+ if ((ifp->if_flags & IFF_UP) &&
+ (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
+ mlx5e_set_rx_mode(ifp);
+ break;
+ }
+ PRIV_LOCK(priv);
+ if (ifp->if_flags & IFF_UP) {
+ if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
+ if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
+ mlx5e_open_locked(ifp);
+ ifp->if_drv_flags |= IFF_DRV_RUNNING;
+ mlx5_set_port_status(priv->mdev, MLX5_PORT_UP);
+ }
+ } else {
+ if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
+ mlx5_set_port_status(priv->mdev,
+ MLX5_PORT_DOWN);
+ if (test_bit(MLX5E_STATE_OPENED, &priv->state) != 0)
+ mlx5e_close_locked(ifp);
+ mlx5e_update_carrier(priv);
+ ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+ }
+ }
+ PRIV_UNLOCK(priv);
+ break;
+ case SIOCADDMULTI:
+ case SIOCDELMULTI:
+ mlx5e_set_rx_mode(ifp);
+ break;
+ case SIOCSIFMEDIA:
+ case SIOCGIFMEDIA:
+ case SIOCGIFXMEDIA:
+ ifr = (struct ifreq *)data;
+ error = ifmedia_ioctl(ifp, ifr, &priv->media, command);
+ break;
+ case SIOCSIFCAP:
+ ifr = (struct ifreq *)data;
+ PRIV_LOCK(priv);
+ mask = ifr->ifr_reqcap ^ ifp->if_capenable;
+
+ if (mask & IFCAP_TXCSUM) {
+ ifp->if_capenable ^= IFCAP_TXCSUM;
+ ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP);
+
+ if (IFCAP_TSO4 & ifp->if_capenable &&
+ !(IFCAP_TXCSUM & ifp->if_capenable)) {
+ ifp->if_capenable &= ~IFCAP_TSO4;
+ ifp->if_hwassist &= ~CSUM_IP_TSO;
+ if_printf(ifp,
+ "tso4 disabled due to -txcsum.\n");
+ }
+ }
+ if (mask & IFCAP_TXCSUM_IPV6) {
+ ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
+ ifp->if_hwassist ^= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
+
+ if (IFCAP_TSO6 & ifp->if_capenable &&
+ !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
+ ifp->if_capenable &= ~IFCAP_TSO6;
+ ifp->if_hwassist &= ~CSUM_IP6_TSO;
+ if_printf(ifp,
+ "tso6 disabled due to -txcsum6.\n");
+ }
+ }
+ if (mask & IFCAP_RXCSUM)
+ ifp->if_capenable ^= IFCAP_RXCSUM;
+ if (mask & IFCAP_RXCSUM_IPV6)
+ ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
+
+ if (mask & IFCAP_TSO4) {
+ if (!(IFCAP_TSO4 & ifp->if_capenable) &&
+ !(IFCAP_TXCSUM & ifp->if_capenable)) {
+ if_printf(ifp, "enable txcsum first.\n");
+ error = EAGAIN;
+ goto out;
+ }
+ ifp->if_capenable ^= IFCAP_TSO4;
+ ifp->if_hwassist ^= CSUM_IP_TSO;
+ }
+ if (mask & IFCAP_TSO6) {
+ if (!(IFCAP_TSO6 & ifp->if_capenable) &&
+ !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
+ if_printf(ifp, "enable txcsum6 first.\n");
+ error = EAGAIN;
+ goto out;
+ }
+ ifp->if_capenable ^= IFCAP_TSO6;
+ ifp->if_hwassist ^= CSUM_IP6_TSO;
+ }
+
+ if (mask & IFCAP_VLAN_HWFILTER) {
+ if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
+ mlx5e_disable_vlan_filter(priv);
+ else
+ mlx5e_enable_vlan_filter(priv);
+
+ ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
+ }
+ if (mask & IFCAP_VLAN_HWTAGGING)
+ ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
+
+ if (mask & IFCAP_WOL_MAGIC)
+ ifp->if_capenable ^= IFCAP_WOL_MAGIC;
+
+ VLAN_CAPABILITIES(ifp);
+ /* turn off LRO means also turn of HW LRO - if it's on */
+ if (mask & IFCAP_LRO ) {
+ int was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
+ bool need_restart = false;
+
+ ifp->if_capenable ^= IFCAP_LRO;
+ if (!(ifp->if_capenable & IFCAP_LRO)) {
+ if (priv->params.hw_lro_en) {
+ priv->params.hw_lro_en = false;
+ need_restart = true;
+ /* Not sure this is the correct way */
+ priv->params_ethtool.hw_lro = priv->params.hw_lro_en;
+ }
+ }
+ if (was_opened && need_restart) {
+ mlx5e_close_locked(ifp);
+ mlx5e_open_locked(ifp);
+ }
+ }
+out:
+ PRIV_UNLOCK(priv);
+ break;
+
+ case SIOCGI2C:
+ ifr = (struct ifreq *)data;
+
+ /* Copy from the user-space address ifr_data to the kernel-space address i2c */
+ error = copyin(ifr->ifr_data, &i2c, sizeof(i2c));
+ if (error)
+ break;
+
+ if (i2c.len > sizeof(i2c.data)) {
+ error = EINVAL;
+ break;
+ }
+
+ PRIV_LOCK(priv);
+ /* Get module_num which is required for the query_eeprom */
+ error = mlx5_query_module_num(priv->mdev, &module_num);
+ if (error) {
+ if_printf(ifp, "Query module num failed, eeprom "
+ "reading is not supported\n");
+ goto err_i2c;
+ }
+
+ /*
+ * Note that we ignore i2c.addr here. The driver hardcodes
+ * the address to 0x50, while standard expects it to be 0xA0.
+ */
+ error = mlx5_query_eeprom(priv->mdev,
+ MLX5E_I2C_ADDR_LOW, MLX5E_EEPROM_LOW_PAGE,
+ (uint32_t)i2c.offset, (uint32_t)i2c.len, module_num,
+ (uint32_t *)i2c.data, &size_read);
+ if (error) {
+ if_printf(ifp, "Query eeprom failed, eeprom "
+ "reading is not supported\n");
+ goto err_i2c;
+ }
+
+ if (i2c.len > MLX5_EEPROM_MAX_BYTES) {
+ error = mlx5_query_eeprom(priv->mdev,
+ MLX5E_I2C_ADDR_LOW, MLX5E_EEPROM_LOW_PAGE,
+ (uint32_t)(i2c.offset + size_read),
+ (uint32_t)(i2c.len - size_read), module_num,
+ (uint32_t *)(i2c.data + size_read), &size_read);
+ }
+ if (error) {
+ if_printf(ifp, "Query eeprom failed, eeprom "
+ "reading is not supported\n");
+ goto err_i2c;
+ }
+
+ error = copyout(&i2c, ifr->ifr_data, sizeof(i2c));
+err_i2c:
+ PRIV_UNLOCK(priv);
+ break;
+
+ default:
+ error = ether_ioctl(ifp, command, data);
+ break;
+ }
+ return (error);
+}
+
+static int
+mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev)
+{
+ /*
+ * TODO: uncoment once FW really sets all these bits if
+ * (!mdev->caps.eth.rss_ind_tbl_cap || !mdev->caps.eth.csum_cap ||
+ * !mdev->caps.eth.max_lso_cap || !mdev->caps.eth.vlan_cap ||
+ * !(mdev->caps.gen.flags & MLX5_DEV_CAP_FLAG_SCQE_BRK_MOD)) return
+ * -ENOTSUPP;
+ */
+
+ /* TODO: add more must-to-have features */
+
+ return (0);
+}
+
+static void
+mlx5e_build_ifp_priv(struct mlx5_core_dev *mdev,
+ struct mlx5e_priv *priv,
+ int num_comp_vectors)
+{
+ /*
+ * TODO: Consider link speed for setting "log_sq_size",
+ * "log_rq_size" and "cq_moderation_xxx":
+ */
+ priv->params.log_sq_size =
+ MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE;
+ priv->params.log_rq_size =
+ MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE;
+ priv->params.rx_cq_moderation_usec =
+ MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ?
+ MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE :
+ MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC;
+ priv->params.rx_cq_moderation_mode =
+ MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ? 1 : 0;
+ priv->params.rx_cq_moderation_pkts =
+ MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS;
+ priv->params.tx_cq_moderation_usec =
+ MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC;
+ priv->params.tx_cq_moderation_pkts =
+ MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS;
+ priv->params.min_rx_wqes =
+ MLX5E_PARAMS_DEFAULT_MIN_RX_WQES;
+ priv->params.rx_hash_log_tbl_sz =
+ (order_base_2(num_comp_vectors) >
+ MLX5E_PARAMS_DEFAULT_RX_HASH_LOG_TBL_SZ) ?
+ order_base_2(num_comp_vectors) :
+ MLX5E_PARAMS_DEFAULT_RX_HASH_LOG_TBL_SZ;
+ priv->params.num_tc = 1;
+ priv->params.default_vlan_prio = 0;
+ priv->counter_set_id = -1;
+
+ /*
+ * hw lro is currently defaulted to off.
+ * when it won't anymore we will consider the
+ * HW capability: "!!MLX5_CAP_ETH(mdev, lro_cap)"
+ */
+ priv->params.hw_lro_en = false;
+ priv->params.lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ;
+
+ priv->mdev = mdev;
+ priv->params.num_channels = num_comp_vectors;
+ priv->order_base_2_num_channels = order_base_2(num_comp_vectors);
+ priv->queue_mapping_channel_mask =
+ roundup_pow_of_two(num_comp_vectors) - 1;
+ priv->num_tc = priv->params.num_tc;
+ priv->default_vlan_prio = priv->params.default_vlan_prio;
+
+ INIT_WORK(&priv->update_stats_work, mlx5e_update_stats_work);
+ INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work);
+ INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work);
+}
+
+static int
+mlx5e_create_mkey(struct mlx5e_priv *priv, u32 pdn,
+ struct mlx5_core_mr *mr)
+{
+ struct ifnet *ifp = priv->ifp;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5_create_mkey_mbox_in *in;
+ int err;
+
+ in = mlx5_vzalloc(sizeof(*in));
+ if (in == NULL) {
+ if_printf(ifp, "%s: failed to allocate inbox\n", __func__);
+ return (-ENOMEM);
+ }
+ in->seg.flags = MLX5_PERM_LOCAL_WRITE |
+ MLX5_PERM_LOCAL_READ |
+ MLX5_ACCESS_MODE_PA;
+ in->seg.flags_pd = cpu_to_be32(pdn | MLX5_MKEY_LEN64);
+ in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
+
+ err = mlx5_core_create_mkey(mdev, mr, in, sizeof(*in), NULL, NULL,
+ NULL);
+ if (err)
+ if_printf(ifp, "%s: mlx5_core_create_mkey failed, %d\n",
+ __func__, err);
+
+ kvfree(in);
+
+ return (err);
+}
+
+static const char *mlx5e_vport_stats_desc[] = {
+ MLX5E_VPORT_STATS(MLX5E_STATS_DESC)
+};
+
+static const char *mlx5e_pport_stats_desc[] = {
+ MLX5E_PPORT_STATS(MLX5E_STATS_DESC)
+};
+
+static void
+mlx5e_priv_mtx_init(struct mlx5e_priv *priv)
+{
+ mtx_init(&priv->async_events_mtx, "mlx5async", MTX_NETWORK_LOCK, MTX_DEF);
+ sx_init(&priv->state_lock, "mlx5state");
+ callout_init_mtx(&priv->watchdog, &priv->async_events_mtx, 0);
+}
+
+static void
+mlx5e_priv_mtx_destroy(struct mlx5e_priv *priv)
+{
+ mtx_destroy(&priv->async_events_mtx);
+ sx_destroy(&priv->state_lock);
+}
+
+static int
+sysctl_firmware(SYSCTL_HANDLER_ARGS)
+{
+ /* %d.%d%.d the string format.
+ * fw_rev_{maj,min,sub} return u16, 2^16 = 65536.
+ * We need at most 5 chars to store that.
+ * it also has: two "." and NULL at the end.
+ * Which means we need 18 (5*3 + 3) chars at most.
+ */
+ char fw[18];
+ struct mlx5e_priv *priv = arg1;
+ int error;
+
+ snprintf(fw, sizeof(fw), "%d.%d.%d", fw_rev_maj(priv->mdev), fw_rev_min(priv->mdev),
+ fw_rev_sub(priv->mdev));
+ error = sysctl_handle_string(oidp, fw, sizeof(fw), req);
+ return (error);
+}
+
+static void
+mlx5e_add_hw_stats(struct mlx5e_priv *priv)
+{
+ SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_hw),
+ OID_AUTO, "fw_version", CTLTYPE_STRING | CTLFLAG_RD, priv, 0,
+ sysctl_firmware, "A", "HCA firmware version");
+
+ SYSCTL_ADD_STRING(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_hw),
+ OID_AUTO, "board_id", CTLFLAG_RD, priv->mdev->board_id, 0,
+ "Board ID");
+}
+
+static void *
+mlx5e_create_ifp(struct mlx5_core_dev *mdev)
+{
+ static volatile int mlx5_en_unit;
+ struct ifnet *ifp;
+ struct mlx5e_priv *priv;
+ u8 dev_addr[ETHER_ADDR_LEN] __aligned(4);
+ struct sysctl_oid_list *child;
+ int ncv = mdev->priv.eq_table.num_comp_vectors;
+ char unit[16];
+ int err;
+ int i;
+ u32 eth_proto_cap;
+
+ if (mlx5e_check_required_hca_cap(mdev)) {
+ mlx5_core_dbg(mdev, "mlx5e_check_required_hca_cap() failed\n");
+ return (NULL);
+ }
+ priv = malloc(sizeof(*priv), M_MLX5EN, M_WAITOK | M_ZERO);
+ if (priv == NULL) {
+ mlx5_core_err(mdev, "malloc() failed\n");
+ return (NULL);
+ }
+ mlx5e_priv_mtx_init(priv);
+
+ ifp = priv->ifp = if_alloc(IFT_ETHER);
+ if (ifp == NULL) {
+ mlx5_core_err(mdev, "if_alloc() failed\n");
+ goto err_free_priv;
+ }
+ ifp->if_softc = priv;
+ if_initname(ifp, "mce", atomic_fetchadd_int(&mlx5_en_unit, 1));
+ ifp->if_mtu = ETHERMTU;
+ ifp->if_init = mlx5e_open;
+ ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
+ ifp->if_ioctl = mlx5e_ioctl;
+ ifp->if_transmit = mlx5e_xmit;
+ ifp->if_qflush = if_qflush;
+#if (__FreeBSD_version >= 1100000)
+ ifp->if_get_counter = mlx5e_get_counter;
+#endif
+ ifp->if_snd.ifq_maxlen = ifqmaxlen;
+ /*
+ * Set driver features
+ */
+ ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6;
+ ifp->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING;
+ ifp->if_capabilities |= IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWFILTER;
+ ifp->if_capabilities |= IFCAP_LINKSTATE | IFCAP_JUMBO_MTU;
+ ifp->if_capabilities |= IFCAP_LRO;
+ ifp->if_capabilities |= IFCAP_TSO | IFCAP_VLAN_HWTSO;
+
+ /* set TSO limits so that we don't have to drop TX packets */
+ ifp->if_hw_tsomax = MLX5E_MAX_TX_PAYLOAD_SIZE - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
+ ifp->if_hw_tsomaxsegcount = MLX5E_MAX_TX_MBUF_FRAGS - 1 /* hdr */;
+ ifp->if_hw_tsomaxsegsize = MLX5E_MAX_TX_MBUF_SIZE;
+
+ ifp->if_capenable = ifp->if_capabilities;
+ ifp->if_hwassist = 0;
+ if (ifp->if_capenable & IFCAP_TSO)
+ ifp->if_hwassist |= CSUM_TSO;
+ if (ifp->if_capenable & IFCAP_TXCSUM)
+ ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP | CSUM_IP);
+ if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
+ ifp->if_hwassist |= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
+
+ /* ifnet sysctl tree */
+ sysctl_ctx_init(&priv->sysctl_ctx);
+ priv->sysctl_ifnet = SYSCTL_ADD_NODE(&priv->sysctl_ctx, SYSCTL_STATIC_CHILDREN(_dev),
+ OID_AUTO, ifp->if_dname, CTLFLAG_RD, 0, "MLX5 ethernet - interface name");
+ if (priv->sysctl_ifnet == NULL) {
+ mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n");
+ goto err_free_sysctl;
+ }
+ snprintf(unit, sizeof(unit), "%d", ifp->if_dunit);
+ priv->sysctl_ifnet = SYSCTL_ADD_NODE(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
+ OID_AUTO, unit, CTLFLAG_RD, 0, "MLX5 ethernet - interface unit");
+ if (priv->sysctl_ifnet == NULL) {
+ mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n");
+ goto err_free_sysctl;
+ }
+ /* HW sysctl tree */
+ child = SYSCTL_CHILDREN(device_get_sysctl_tree(mdev->pdev->dev.bsddev));
+ priv->sysctl_hw = SYSCTL_ADD_NODE(&priv->sysctl_ctx, child,
+ OID_AUTO, "hw", CTLFLAG_RD, 0, "MLX5 ethernet dev hw");
+ if (priv->sysctl_hw == NULL) {
+ mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n");
+ goto err_free_sysctl;
+ }
+
+ mlx5e_build_ifp_priv(mdev, priv, ncv);
+
+ err = mlx5_alloc_map_uar(mdev, &priv->cq_uar);
+ if (err) {
+ if_printf(ifp, "%s: mlx5_alloc_map_uar failed, %d\n",
+ __func__, err);
+ goto err_free_sysctl;
+ }
+ err = mlx5_core_alloc_pd(mdev, &priv->pdn);
+ if (err) {
+ if_printf(ifp, "%s: mlx5_core_alloc_pd failed, %d\n",
+ __func__, err);
+ goto err_unmap_free_uar;
+ }
+
+ err = mlx5_alloc_transport_domain(mdev, &priv->tdn);
+
+ if (err) {
+ if_printf(ifp, "%s: mlx5_alloc_transport_domain failed, %d\n",
+ __func__, err);
+ goto err_dealloc_pd;
+ }
+
+ err = mlx5e_create_mkey(priv, priv->pdn, &priv->mr);
+ if (err) {
+ if_printf(ifp, "%s: mlx5e_create_mkey failed, %d\n",
+ __func__, err);
+ goto err_dealloc_transport_domain;
+ }
+ mlx5_query_nic_vport_mac_address(priv->mdev, 0, dev_addr);
+
+ /* set default MTU */
+ mlx5e_set_dev_port_mtu(ifp, ifp->if_mtu);
+
+ /* Set desc */
+ device_set_desc(mdev->pdev->dev.bsddev, mlx5e_version);
+
+ /* Set default media status */
+ priv->media_status_last = IFM_AVALID;
+ priv->media_active_last = IFM_ETHER | IFM_AUTO;
+
+ /* Pauseframes are enabled by default */
+ priv->params_ethtool.tx_pauseframe_control = 1;
+ priv->params_ethtool.rx_pauseframe_control = 1;
+
+ err = mlx5_query_port_proto_cap(mdev, &eth_proto_cap, MLX5_PTYS_EN);
+ if (err) {
+ eth_proto_cap = 0;
+ if_printf(ifp, "%s: Query port media capability failed, %d\n",
+ __func__, err);
+ }
+
+ /* Setup supported medias */
+ ifmedia_init(&priv->media, IFM_IMASK | IFM_ETH_FMASK,
+ mlx5e_media_change, mlx5e_media_status);
+
+ for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) {
+ if (mlx5e_mode_table[i].baudrate == 0)
+ continue;
+ if (MLX5E_PROT_MASK(i) & eth_proto_cap)
+ ifmedia_add(&priv->media,
+ IFM_ETHER | mlx5e_mode_table[i].subtype |
+ IFM_FDX, 0, NULL);
+ }
+
+ ifmedia_add(&priv->media, IFM_ETHER | IFM_AUTO, 0, NULL);
+ ifmedia_set(&priv->media, IFM_ETHER | IFM_AUTO);
+ ether_ifattach(ifp, dev_addr);
+
+ /* Register for VLAN events */
+ priv->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
+ mlx5e_vlan_rx_add_vid, priv, EVENTHANDLER_PRI_FIRST);
+ priv->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
+ mlx5e_vlan_rx_kill_vid, priv, EVENTHANDLER_PRI_FIRST);
+
+ /* Link is down by default */
+ if_link_state_change(ifp, LINK_STATE_DOWN);
+
+ mlx5e_enable_async_events(priv);
+
+ mlx5e_add_hw_stats(priv);
+
+ mlx5e_create_stats(&priv->stats.vport.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
+ "vstats", mlx5e_vport_stats_desc, MLX5E_VPORT_STATS_NUM,
+ priv->stats.vport.arg);
+
+ mlx5e_create_stats(&priv->stats.pport.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
+ "pstats", mlx5e_pport_stats_desc, MLX5E_PPORT_STATS_NUM,
+ priv->stats.pport.arg);
+
+ mlx5e_create_ethtool(priv);
+
+ mtx_lock(&priv->async_events_mtx);
+ mlx5e_update_stats(priv);
+ mtx_unlock(&priv->async_events_mtx);
+
+ return (priv);
+
+err_dealloc_transport_domain:
+ mlx5_dealloc_transport_domain(mdev, priv->tdn);
+
+err_dealloc_pd:
+ mlx5_core_dealloc_pd(mdev, priv->pdn);
+
+err_unmap_free_uar:
+ mlx5_unmap_free_uar(mdev, &priv->cq_uar);
+
+err_free_sysctl:
+ sysctl_ctx_free(&priv->sysctl_ctx);
+
+ if_free(ifp);
+
+err_free_priv:
+ mlx5e_priv_mtx_destroy(priv);
+ free(priv, M_MLX5EN);
+ return (NULL);
+}
+
+static void
+mlx5e_destroy_ifp(struct mlx5_core_dev *mdev, void *vpriv)
+{
+ struct mlx5e_priv *priv = vpriv;
+ struct ifnet *ifp = priv->ifp;
+
+ /* don't allow more IOCTLs */
+ priv->gone = 1;
+
+ /* XXX wait a bit to allow IOCTL handlers to complete */
+ pause("W", hz);
+
+ /* stop watchdog timer */
+ callout_drain(&priv->watchdog);
+
+ if (priv->vlan_attach != NULL)
+ EVENTHANDLER_DEREGISTER(vlan_config, priv->vlan_attach);
+ if (priv->vlan_detach != NULL)
+ EVENTHANDLER_DEREGISTER(vlan_unconfig, priv->vlan_detach);
+
+ /* make sure device gets closed */
+ PRIV_LOCK(priv);
+ mlx5e_close_locked(ifp);
+ PRIV_UNLOCK(priv);
+
+ /* unregister device */
+ ifmedia_removeall(&priv->media);
+ ether_ifdetach(ifp);
+ if_free(ifp);
+
+ /* destroy all remaining sysctl nodes */
+ if (priv->sysctl_debug)
+ sysctl_ctx_free(&priv->stats.port_stats_debug.ctx);
+ sysctl_ctx_free(&priv->stats.vport.ctx);
+ sysctl_ctx_free(&priv->stats.pport.ctx);
+ sysctl_ctx_free(&priv->sysctl_ctx);
+
+ mlx5_core_destroy_mkey(priv->mdev, &priv->mr);
+ mlx5_dealloc_transport_domain(priv->mdev, priv->tdn);
+ mlx5_core_dealloc_pd(priv->mdev, priv->pdn);
+ mlx5_unmap_free_uar(priv->mdev, &priv->cq_uar);
+ mlx5e_disable_async_events(priv);
+ flush_scheduled_work();
+ mlx5e_priv_mtx_destroy(priv);
+ free(priv, M_MLX5EN);
+}
+
+static void *
+mlx5e_get_ifp(void *vpriv)
+{
+ struct mlx5e_priv *priv = vpriv;
+
+ return (priv->ifp);
+}
+
+static struct mlx5_interface mlx5e_interface = {
+ .add = mlx5e_create_ifp,
+ .remove = mlx5e_destroy_ifp,
+ .event = mlx5e_async_event,
+ .protocol = MLX5_INTERFACE_PROTOCOL_ETH,
+ .get_dev = mlx5e_get_ifp,
+};
+
+void
+mlx5e_init(void)
+{
+ mlx5_register_interface(&mlx5e_interface);
+}
+
+void
+mlx5e_cleanup(void)
+{
+ mlx5_unregister_interface(&mlx5e_interface);
+}
+
+module_init_order(mlx5e_init, SI_ORDER_THIRD);
+module_exit_order(mlx5e_cleanup, SI_ORDER_THIRD);
+
+#if (__FreeBSD_version >= 1100000)
+MODULE_DEPEND(mlx5en, linuxkpi, 1, 1, 1);
+#endif
+MODULE_DEPEND(mlx5en, mlx5, 1, 1, 1);
+MODULE_VERSION(mlx5en, 1);
diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c b/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c
new file mode 100644
index 0000000..bce4915
--- /dev/null
+++ b/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c
@@ -0,0 +1,340 @@
+/*-
+ * Copyright (c) 2015 Mellanox Technologies. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include "en.h"
+#include <machine/in_cksum.h>
+
+static inline int
+mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq,
+ struct mlx5e_rx_wqe *wqe, u16 ix)
+{
+ bus_dma_segment_t segs[1];
+ struct mbuf *mb;
+ int nsegs;
+ int err;
+
+ if (rq->mbuf[ix].mbuf != NULL)
+ return (0);
+
+ mb = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, rq->wqe_sz);
+ if (unlikely(!mb))
+ return (-ENOMEM);
+
+ /* set initial mbuf length */
+ mb->m_pkthdr.len = mb->m_len = rq->wqe_sz;
+
+ /* get IP header aligned */
+ m_adj(mb, MLX5E_NET_IP_ALIGN);
+
+ err = -bus_dmamap_load_mbuf_sg(rq->dma_tag, rq->mbuf[ix].dma_map,
+ mb, segs, &nsegs, BUS_DMA_NOWAIT);
+ if (err != 0)
+ goto err_free_mbuf;
+ if (unlikely(nsegs != 1)) {
+ bus_dmamap_unload(rq->dma_tag, rq->mbuf[ix].dma_map);
+ err = -ENOMEM;
+ goto err_free_mbuf;
+ }
+ wqe->data.addr = cpu_to_be64(segs[0].ds_addr);
+
+ rq->mbuf[ix].mbuf = mb;
+ rq->mbuf[ix].data = mb->m_data;
+
+ bus_dmamap_sync(rq->dma_tag, rq->mbuf[ix].dma_map,
+ BUS_DMASYNC_PREREAD);
+ return (0);
+
+err_free_mbuf:
+ m_freem(mb);
+ return (err);
+}
+
+static void
+mlx5e_post_rx_wqes(struct mlx5e_rq *rq)
+{
+ if (unlikely(rq->enabled == 0))
+ return;
+
+ while (!mlx5_wq_ll_is_full(&rq->wq)) {
+ struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, rq->wq.head);
+
+ if (unlikely(mlx5e_alloc_rx_wqe(rq, wqe, rq->wq.head)))
+ break;
+
+ mlx5_wq_ll_push(&rq->wq, be16_to_cpu(wqe->next.next_wqe_index));
+ }
+
+ /* ensure wqes are visible to device before updating doorbell record */
+ wmb();
+
+ mlx5_wq_ll_update_db_record(&rq->wq);
+}
+
+static void
+mlx5e_lro_update_hdr(struct mbuf* mb, struct mlx5_cqe64 *cqe)
+{
+ /* TODO: consider vlans, ip options, ... */
+ struct ether_header *eh;
+ uint16_t eh_type;
+ struct ip6_hdr *ip6 = NULL;
+ struct ip *ip4 = NULL;
+ struct tcphdr *th;
+ uint32_t *ts_ptr;
+
+ eh = mtod(mb, struct ether_header *);
+ eh_type = ntohs(eh->ether_type);
+
+ u8 l4_hdr_type = get_cqe_l4_hdr_type(cqe);
+ int tcp_ack = ((CQE_L4_HDR_TYPE_TCP_ACK_NO_DATA == l4_hdr_type) ||
+ (CQE_L4_HDR_TYPE_TCP_ACK_AND_DATA == l4_hdr_type));
+
+ /* TODO: consider vlan */
+ u16 tot_len = be32_to_cpu(cqe->byte_cnt) - ETHER_HDR_LEN;
+
+ switch (eh_type) {
+ case ETHERTYPE_IP:
+ ip4 = (struct ip *)(eh + 1);
+ th = (struct tcphdr *)(ip4 + 1);
+ break;
+ case ETHERTYPE_IPV6:
+ ip6 = (struct ip6_hdr *)(eh + 1);
+ th = (struct tcphdr *)(ip6 + 1);
+ break;
+ default:
+ return;
+ }
+
+ ts_ptr = (uint32_t *)(th + 1);
+
+ if (get_cqe_lro_tcppsh(cqe))
+ th->th_flags |= TH_PUSH;
+
+ if (tcp_ack) {
+ th->th_flags |= TH_ACK;
+ th->th_ack = cqe->lro_ack_seq_num;
+ th->th_win = cqe->lro_tcp_win;
+
+ /* FreeBSD handles only 32bit aligned timestamp
+ * right after the TCP hdr
+ * +--------+--------+--------+--------+
+ * | NOP | NOP | TSopt | 10 |
+ * +--------+--------+--------+--------+
+ * | TSval timestamp |
+ * +--------+--------+--------+--------+
+ * | TSecr timestamp |
+ * +--------+--------+--------+--------+
+ */
+ if (get_cqe_lro_timestamp_valid(cqe) &&
+ (__predict_true(*ts_ptr) == ntohl(TCPOPT_NOP << 24 |
+ TCPOPT_NOP << 16 | TCPOPT_TIMESTAMP << 8 |
+ TCPOLEN_TIMESTAMP))) {
+ /* cqe->timestamp is 64bit long.
+ * [0-31] - timestamp.
+ * [32-64] - timestamp echo replay.
+ */
+ ts_ptr[1] = *(uint32_t *)&cqe->timestamp;
+ ts_ptr[2] = *((uint32_t *)&cqe->timestamp + 1);
+ }
+ }
+
+ if (ip4) {
+ ip4->ip_ttl = cqe->lro_min_ttl;
+ ip4->ip_len = cpu_to_be16(tot_len);
+ ip4->ip_sum = 0;
+ ip4->ip_sum = in_cksum(mb, ip4->ip_hl << 2);
+ } else {
+ ip6->ip6_hlim = cqe->lro_min_ttl;
+ ip6->ip6_plen = cpu_to_be16(tot_len -
+ sizeof(struct ip6_hdr));
+ }
+ /* TODO: handle tcp checksum */
+}
+
+static inline void
+mlx5e_build_rx_mbuf(struct mlx5_cqe64 *cqe,
+ struct mlx5e_rq *rq, struct mbuf *mb,
+ u32 cqe_bcnt)
+{
+ struct ifnet *ifp = rq->ifp;
+ int lro_num_seg; /* HW LRO session aggregated packets counter */
+
+ lro_num_seg = be32_to_cpu(cqe->srqn) >> 24;
+ if (lro_num_seg > 1) {
+ mlx5e_lro_update_hdr(mb, cqe);
+ rq->stats.lro_packets++;
+ rq->stats.lro_bytes += cqe_bcnt;
+ }
+
+ mb->m_pkthdr.len = mb->m_len = cqe_bcnt;
+ /* check if a Toeplitz hash was computed */
+ if (cqe->rss_hash_type != 0)
+ mb->m_pkthdr.flowid = be32_to_cpu(cqe->rss_hash_result);
+ else
+ mb->m_pkthdr.flowid = rq->ix;
+ M_HASHTYPE_SET(mb, M_HASHTYPE_OPAQUE);
+ mb->m_pkthdr.rcvif = ifp;
+
+ if (likely(ifp->if_capenable & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) &&
+ ((cqe->hds_ip_ext & (CQE_L2_OK | CQE_L3_OK | CQE_L4_OK)) ==
+ (CQE_L2_OK | CQE_L3_OK | CQE_L4_OK))) {
+ mb->m_pkthdr.csum_flags =
+ CSUM_IP_CHECKED | CSUM_IP_VALID |
+ CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
+ mb->m_pkthdr.csum_data = htons(0xffff);
+ } else {
+ rq->stats.csum_none++;
+ }
+
+ if (cqe_has_vlan(cqe)) {
+ mb->m_pkthdr.ether_vtag = be16_to_cpu(cqe->vlan_info);
+ mb->m_flags |= M_VLANTAG;
+ }
+}
+
+static int
+mlx5e_poll_rx_cq(struct mlx5e_rq *rq, int budget)
+{
+#ifndef HAVE_TURBO_LRO
+ struct lro_entry *queued;
+#endif
+ int i;
+
+ for (i = 0; i < budget; i++) {
+ struct mlx5e_rx_wqe *wqe;
+ struct mlx5_cqe64 *cqe;
+ struct mbuf *mb;
+ __be16 wqe_counter_be;
+ u16 wqe_counter;
+ u32 byte_cnt;
+
+ cqe = mlx5e_get_cqe(&rq->cq);
+ if (!cqe)
+ break;
+
+ wqe_counter_be = cqe->wqe_counter;
+ wqe_counter = be16_to_cpu(wqe_counter_be);
+ wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_counter);
+ byte_cnt = be32_to_cpu(cqe->byte_cnt);
+
+ bus_dmamap_sync(rq->dma_tag,
+ rq->mbuf[wqe_counter].dma_map,
+ BUS_DMASYNC_POSTREAD);
+
+ if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) {
+ rq->stats.wqe_err++;
+ goto wq_ll_pop;
+ }
+
+ if (MHLEN >= byte_cnt &&
+ (mb = m_gethdr(M_NOWAIT, MT_DATA)) != NULL) {
+ bcopy(rq->mbuf[wqe_counter].data, mtod(mb, caddr_t),
+ byte_cnt);
+ } else {
+ mb = rq->mbuf[wqe_counter].mbuf;
+ rq->mbuf[wqe_counter].mbuf = NULL; /* safety clear */
+
+ bus_dmamap_unload(rq->dma_tag,
+ rq->mbuf[wqe_counter].dma_map);
+ }
+
+ mlx5e_build_rx_mbuf(cqe, rq, mb, byte_cnt);
+ rq->stats.packets++;
+#ifdef HAVE_TURBO_LRO
+ if (mb->m_pkthdr.csum_flags == 0 ||
+ (rq->ifp->if_capenable & IFCAP_LRO) == 0 ||
+ rq->lro.mbuf == NULL) {
+ /* normal input */
+ rq->ifp->if_input(rq->ifp, mb);
+ } else {
+ tcp_tlro_rx(&rq->lro, mb);
+ }
+#else
+ if (mb->m_pkthdr.csum_flags == 0 ||
+ (rq->ifp->if_capenable & IFCAP_LRO) == 0 ||
+ rq->lro.lro_cnt == 0 ||
+ tcp_lro_rx(&rq->lro, mb, 0) != 0) {
+ rq->ifp->if_input(rq->ifp, mb);
+ }
+#endif
+wq_ll_pop:
+ mlx5_wq_ll_pop(&rq->wq, wqe_counter_be,
+ &wqe->next.next_wqe_index);
+ }
+
+ mlx5_cqwq_update_db_record(&rq->cq.wq);
+
+ /* ensure cq space is freed before enabling more cqes */
+ wmb();
+#ifndef HAVE_TURBO_LRO
+ while ((queued = SLIST_FIRST(&rq->lro.lro_active)) != NULL) {
+ SLIST_REMOVE_HEAD(&rq->lro.lro_active, next);
+ tcp_lro_flush(&rq->lro, queued);
+ }
+#endif
+ return (i);
+}
+
+void
+mlx5e_rx_cq_comp(struct mlx5_core_cq *mcq)
+{
+ struct mlx5e_rq *rq = container_of(mcq, struct mlx5e_rq, cq.mcq);
+ int i = 0;
+
+#ifdef HAVE_PER_CQ_EVENT_PACKET
+ struct mbuf *mb = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, rq->wqe_sz);
+ if (mb != NULL) {
+ /* this code is used for debugging purpose only */
+ mb->m_pkthdr.len = mb->m_len = 15;
+ memset(mb->m_data, 255, 14);
+ mb->m_data[14] = rq->ix;
+ mb->m_pkthdr.rcvif = rq->ifp;
+ rq->ifp->if_input(rq->ifp, mb);
+ }
+#endif
+
+ mtx_lock(&rq->mtx);
+
+ /*
+ * Polling the entire CQ without posting new WQEs results in
+ * lack of receive WQEs during heavy traffic scenarios.
+ */
+ while (1) {
+ if (mlx5e_poll_rx_cq(rq, MLX5E_RX_BUDGET_MAX) !=
+ MLX5E_RX_BUDGET_MAX)
+ break;
+ i += MLX5E_RX_BUDGET_MAX;
+ if (i >= MLX5E_BUDGET_MAX)
+ break;
+ mlx5e_post_rx_wqes(rq);
+ }
+ mlx5e_post_rx_wqes(rq);
+ mlx5e_cq_arm(&rq->cq);
+#ifdef HAVE_TURBO_LRO
+ tcp_tlro_flush(&rq->lro, 1);
+#endif
+ mtx_unlock(&rq->mtx);
+}
diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_tx.c b/sys/dev/mlx5/mlx5_en/mlx5_en_tx.c
new file mode 100644
index 0000000..3b69f84
--- /dev/null
+++ b/sys/dev/mlx5/mlx5_en/mlx5_en_tx.c
@@ -0,0 +1,485 @@
+/*-
+ * Copyright (c) 2015 Mellanox Technologies. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include "en.h"
+#include <machine/atomic.h>
+
+void
+mlx5e_send_nop(struct mlx5e_sq *sq, u32 ds_cnt, bool notify_hw)
+{
+ u16 pi = sq->pc & sq->wq.sz_m1;
+ struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi);
+
+ memset(&wqe->ctrl, 0, sizeof(wqe->ctrl));
+
+ wqe->ctrl.opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_NOP);
+ wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
+ wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
+
+ sq->mbuf[pi].mbuf = NULL;
+ sq->mbuf[pi].num_bytes = 0;
+ sq->mbuf[pi].num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
+ sq->pc += sq->mbuf[pi].num_wqebbs;
+ if (notify_hw)
+ mlx5e_tx_notify_hw(sq, wqe, 0);
+}
+
+#if (__FreeBSD_version >= 1100000)
+static uint32_t mlx5e_hash_value;
+
+static void
+mlx5e_hash_init(void *arg)
+{
+ mlx5e_hash_value = m_ether_tcpip_hash_init();
+}
+
+/* Make kernel call mlx5e_hash_init after the random stack finished initializing */
+SYSINIT(mlx5e_hash_init, SI_SUB_RANDOM, SI_ORDER_ANY, &mlx5e_hash_init, NULL);
+#endif
+
+static struct mlx5e_sq *
+mlx5e_select_queue(struct ifnet *ifp, struct mbuf *mb)
+{
+ struct mlx5e_priv *priv = ifp->if_softc;
+ u32 ch;
+ u32 tc;
+
+ /* check if channels are successfully opened */
+ if (unlikely(priv->channel == NULL))
+ return (NULL);
+
+ /* obtain VLAN information if present */
+ if (mb->m_flags & M_VLANTAG) {
+ tc = (mb->m_pkthdr.ether_vtag >> 13);
+ if (tc >= priv->num_tc)
+ tc = priv->default_vlan_prio;
+ } else {
+ tc = priv->default_vlan_prio;
+ }
+
+ ch = priv->params.num_channels;
+
+ /* check if flowid is set */
+ if (M_HASHTYPE_GET(mb) != M_HASHTYPE_NONE) {
+ ch = (mb->m_pkthdr.flowid % 128) % ch;
+ } else {
+#if (__FreeBSD_version >= 1100000)
+ ch = m_ether_tcpip_hash(MBUF_HASHFLAG_L3 |
+ MBUF_HASHFLAG_L4, mb, mlx5e_hash_value) % ch;
+#else
+ /*
+ * m_ether_tcpip_hash not present in stable, so just
+ * throw unhashed mbufs on queue 0
+ */
+ ch = 0;
+#endif
+ }
+
+ /* check if channel is allocated */
+ if (unlikely(priv->channel[ch] == NULL))
+ return (NULL);
+
+ return (&priv->channel[ch]->sq[tc]);
+}
+
+static inline u16
+mlx5e_get_inline_hdr_size(struct mlx5e_sq *sq, struct mbuf *mb)
+{
+ return (MIN(MLX5E_MAX_TX_INLINE, mb->m_len));
+}
+
+static int
+mlx5e_get_header_size(struct mbuf *mb)
+{
+ struct ether_vlan_header *eh;
+ struct tcphdr *th;
+ struct ip *ip;
+ int ip_hlen, tcp_hlen;
+ struct ip6_hdr *ip6;
+ uint16_t eth_type;
+ int eth_hdr_len;
+
+ eh = mtod(mb, struct ether_vlan_header *);
+ if (mb->m_len < ETHER_HDR_LEN)
+ return (0);
+ if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
+ eth_type = ntohs(eh->evl_proto);
+ eth_hdr_len = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
+ } else {
+ eth_type = ntohs(eh->evl_encap_proto);
+ eth_hdr_len = ETHER_HDR_LEN;
+ }
+ if (mb->m_len < eth_hdr_len)
+ return (0);
+ switch (eth_type) {
+ case ETHERTYPE_IP:
+ ip = (struct ip *)(mb->m_data + eth_hdr_len);
+ if (mb->m_len < eth_hdr_len + sizeof(*ip))
+ return (0);
+ if (ip->ip_p != IPPROTO_TCP)
+ return (0);
+ ip_hlen = ip->ip_hl << 2;
+ eth_hdr_len += ip_hlen;
+ break;
+ case ETHERTYPE_IPV6:
+ ip6 = (struct ip6_hdr *)(mb->m_data + eth_hdr_len);
+ if (mb->m_len < eth_hdr_len + sizeof(*ip6))
+ return (0);
+ if (ip6->ip6_nxt != IPPROTO_TCP)
+ return (0);
+ eth_hdr_len += sizeof(*ip6);
+ break;
+ default:
+ return (0);
+ }
+ if (mb->m_len < eth_hdr_len + sizeof(*th))
+ return (0);
+ th = (struct tcphdr *)(mb->m_data + eth_hdr_len);
+ tcp_hlen = th->th_off << 2;
+ eth_hdr_len += tcp_hlen;
+ if (mb->m_len < eth_hdr_len)
+ return (0);
+ return (eth_hdr_len);
+}
+
+/* The return value is not going back to the stack because of
+ * the drbr */
+static int
+mlx5e_sq_xmit(struct mlx5e_sq *sq, struct mbuf **mbp)
+{
+ bus_dma_segment_t segs[MLX5E_MAX_TX_MBUF_FRAGS];
+ struct mlx5_wqe_data_seg *dseg;
+ struct mlx5e_tx_wqe *wqe;
+ struct ifnet *ifp;
+ int nsegs;
+ int err;
+ int x;
+ struct mbuf *mb = *mbp;
+ u16 ds_cnt;
+ u16 ihs;
+ u16 pi;
+ u8 opcode;
+
+ /* Return ENOBUFS if the queue is full, this may trigger reinsertion
+ * of the mbuf into the drbr (see mlx5e_xmit_locked) */
+ if (unlikely(!mlx5e_sq_has_room_for(sq, 2 * MLX5_SEND_WQE_MAX_WQEBBS))) {
+ return (ENOBUFS);
+ }
+
+ /* Align SQ edge with NOPs to avoid WQE wrap around */
+ pi = ((~sq->pc) & sq->wq.sz_m1);
+ if (pi < (MLX5_SEND_WQE_MAX_WQEBBS - 1)) {
+ /* send one multi NOP message instead of many */
+ mlx5e_send_nop(sq, (pi + 1) * MLX5_SEND_WQEBB_NUM_DS, false);
+ pi = ((~sq->pc) & sq->wq.sz_m1);
+ if (pi < (MLX5_SEND_WQE_MAX_WQEBBS - 1)) {
+ m_freem(mb);
+ return (ENOMEM);
+ }
+ }
+
+ /* Setup local variables */
+ pi = sq->pc & sq->wq.sz_m1;
+ wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi);
+ ifp = sq->channel->ifp;
+
+ memset(wqe, 0, sizeof(*wqe));
+
+ /* send a copy of the frame to the BPF listener, if any */
+ if (ifp != NULL && ifp->if_bpf != NULL)
+ ETHER_BPF_MTAP(ifp, mb);
+
+ if (mb->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TSO)) {
+ wqe->eth.cs_flags |= MLX5_ETH_WQE_L3_CSUM;
+ }
+ if (mb->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP | CSUM_UDP_IPV6 | CSUM_TCP_IPV6 | CSUM_TSO)) {
+ wqe->eth.cs_flags |= MLX5_ETH_WQE_L4_CSUM;
+ }
+ if ( wqe->eth.cs_flags == 0 ) {
+ sq->stats.csum_offload_none++;
+ }
+
+ if (mb->m_pkthdr.csum_flags & CSUM_TSO) {
+ u32 payload_len;
+ u32 mss = mb->m_pkthdr.tso_segsz;
+ u32 num_pkts;
+
+ wqe->eth.mss = cpu_to_be16(mss);
+ opcode = MLX5_OPCODE_LSO;
+ ihs = mlx5e_get_header_size(mb);
+ payload_len = mb->m_pkthdr.len - ihs;
+ if (payload_len == 0)
+ num_pkts = 1;
+ else
+ num_pkts = DIV_ROUND_UP(payload_len, mss);
+ sq->mbuf[pi].num_bytes = payload_len + (num_pkts * ihs);
+
+ sq->stats.tso_packets++;
+ sq->stats.tso_bytes += payload_len;
+ } else {
+ opcode = MLX5_OPCODE_SEND;
+ ihs = mlx5e_get_inline_hdr_size(sq, mb);
+ sq->mbuf[pi].num_bytes = max_t (unsigned int,
+ mb->m_pkthdr.len, ETHER_MIN_LEN - ETHER_CRC_LEN);
+ }
+ if (mb->m_flags & M_VLANTAG) {
+ struct ether_vlan_header *eh =
+ (struct ether_vlan_header *)wqe->eth.inline_hdr_start;
+ /* range checks */
+ if (ihs > (MLX5E_MAX_TX_INLINE - ETHER_VLAN_ENCAP_LEN))
+ ihs = (MLX5E_MAX_TX_INLINE - ETHER_VLAN_ENCAP_LEN);
+ else if (ihs < ETHER_HDR_LEN) {
+ err = EINVAL;
+ goto tx_drop;
+ }
+ m_copydata(mb, 0, ETHER_HDR_LEN, (caddr_t)eh);
+ m_adj(mb, ETHER_HDR_LEN);
+ /* insert 4 bytes VLAN tag into data stream */
+ eh->evl_proto = eh->evl_encap_proto;
+ eh->evl_encap_proto = htons(ETHERTYPE_VLAN);
+ eh->evl_tag = htons(mb->m_pkthdr.ether_vtag);
+ /* copy rest of header data, if any */
+ m_copydata(mb, 0, ihs - ETHER_HDR_LEN, (caddr_t)(eh + 1));
+ m_adj(mb, ihs - ETHER_HDR_LEN);
+ /* extend header by 4 bytes */
+ ihs += ETHER_VLAN_ENCAP_LEN;
+ } else {
+ m_copydata(mb, 0, ihs, wqe->eth.inline_hdr_start);
+ m_adj(mb, ihs);
+ }
+
+ wqe->eth.inline_hdr_sz = cpu_to_be16(ihs);
+
+ ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS;
+ if (likely(ihs > sizeof(wqe->eth.inline_hdr_start))) {
+ ds_cnt += DIV_ROUND_UP(ihs - sizeof(wqe->eth.inline_hdr_start),
+ MLX5_SEND_WQE_DS);
+ }
+ dseg = ((struct mlx5_wqe_data_seg *)&wqe->ctrl) + ds_cnt;
+
+ /* trim off empty mbufs */
+ while (mb->m_len == 0) {
+ mb = m_free(mb);
+ /* check if all data has been inlined */
+ if (mb == NULL)
+ goto skip_dma;
+ }
+
+ err = bus_dmamap_load_mbuf_sg(sq->dma_tag, sq->mbuf[pi].dma_map,
+ mb, segs, &nsegs, BUS_DMA_NOWAIT);
+ if (err == EFBIG) {
+ /* Update *mbp before defrag in case it was trimmed in the loop above */
+ *mbp = mb;
+ /* Update statistics */
+ sq->stats.defragged++;
+ /* Too many mbuf fragments */
+ mb = m_defrag(*mbp, M_NOWAIT);
+ if (mb == NULL) {
+ mb = *mbp;
+ goto tx_drop;
+ }
+ /* Try again */
+ err = bus_dmamap_load_mbuf_sg(sq->dma_tag, sq->mbuf[pi].dma_map,
+ mb, segs, &nsegs, BUS_DMA_NOWAIT);
+ }
+ /* catch errors */
+ if (err != 0) {
+ goto tx_drop;
+ }
+ *mbp = mb;
+
+ for (x = 0; x != nsegs; x++) {
+ if (segs[x].ds_len == 0)
+ continue;
+ dseg->addr = cpu_to_be64((uint64_t)segs[x].ds_addr);
+ dseg->lkey = sq->mkey_be;
+ dseg->byte_count = cpu_to_be32((uint32_t)segs[x].ds_len);
+ dseg++;
+ }
+skip_dma:
+ ds_cnt = (dseg - ((struct mlx5_wqe_data_seg *)&wqe->ctrl));
+
+ wqe->ctrl.opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode);
+ wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
+ wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
+
+ /* store pointer to mbuf */
+ sq->mbuf[pi].mbuf = mb;
+ sq->mbuf[pi].num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
+ sq->pc += sq->mbuf[pi].num_wqebbs;
+
+ /* make sure all mbuf data is written to RAM */
+ if (mb != NULL)
+ bus_dmamap_sync(sq->dma_tag, sq->mbuf[pi].dma_map, BUS_DMASYNC_PREWRITE);
+
+ mlx5e_tx_notify_hw(sq, wqe, 0);
+
+ sq->stats.packets++;
+ return (0);
+
+tx_drop:
+ sq->stats.dropped++;
+ *mbp = NULL;
+ m_freem(mb);
+ return err;
+}
+
+static void
+mlx5e_poll_tx_cq(struct mlx5e_sq *sq, int budget)
+{
+ u16 sqcc;
+
+ /*
+ * sq->cc must be updated only after mlx5_cqwq_update_db_record(),
+ * otherwise a cq overrun may occur
+ */
+ sqcc = sq->cc;
+
+ while (budget--) {
+ struct mlx5_cqe64 *cqe;
+ struct mbuf *mb;
+ u16 ci;
+
+ cqe = mlx5e_get_cqe(&sq->cq);
+ if (!cqe)
+ break;
+
+ ci = sqcc & sq->wq.sz_m1;
+ mb = sq->mbuf[ci].mbuf;
+ sq->mbuf[ci].mbuf = NULL; /* safety clear */
+
+ if (mb == NULL) {
+ if (sq->mbuf[ci].num_bytes == 0) {
+ /* NOP */
+ sq->stats.nop++;
+ }
+ } else {
+ bus_dmamap_sync(sq->dma_tag, sq->mbuf[ci].dma_map,
+ BUS_DMASYNC_POSTWRITE);
+ bus_dmamap_unload(sq->dma_tag, sq->mbuf[ci].dma_map);
+
+ /* free transmitted mbuf */
+ m_freem(mb);
+ }
+ sqcc += sq->mbuf[ci].num_wqebbs;
+ }
+
+ mlx5_cqwq_update_db_record(&sq->cq.wq);
+
+ /* ensure cq space is freed before enabling more cqes */
+ wmb();
+
+ sq->cc = sqcc;
+
+ if (atomic_cmpset_int(&sq->queue_state, MLX5E_SQ_FULL, MLX5E_SQ_READY))
+ taskqueue_enqueue(sq->sq_tq, &sq->sq_task);
+}
+
+static int
+mlx5e_xmit_locked(struct ifnet *ifp, struct mlx5e_sq *sq, struct mbuf *mb)
+{
+ struct mbuf *next;
+ int err = 0;
+
+ if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
+ if (mb)
+ err = drbr_enqueue(ifp, sq->br, mb);
+ return (err);
+ }
+
+ if (mb != NULL)
+ /* If we can't insert mbuf into drbr, try to xmit anyway.
+ * We keep the error we got so we could return that after xmit.
+ */
+ err = drbr_enqueue(ifp, sq->br, mb);
+
+ /* Process the queue */
+ while ((next = drbr_peek(ifp, sq->br)) != NULL) {
+ if (mlx5e_sq_xmit(sq, &next) != 0) {
+ if (next == NULL) {
+ drbr_advance(ifp, sq->br);
+ } else {
+ drbr_putback(ifp, sq->br, next);
+ atomic_store_rel_int(&sq->queue_state, MLX5E_SQ_FULL);
+ }
+ break;
+ }
+ drbr_advance(ifp, sq->br);
+ if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
+ break;
+ }
+ return (err);
+}
+
+int
+mlx5e_xmit(struct ifnet *ifp, struct mbuf *mb)
+{
+ struct mlx5e_sq *sq;
+ int ret;
+
+ sq = mlx5e_select_queue(ifp, mb);
+ if (unlikely(sq == NULL)) {
+ /* invalid send queue */
+ m_freem(mb);
+ return (ENXIO);
+ }
+
+ if (mtx_trylock(&sq->lock)) {
+ ret = mlx5e_xmit_locked(ifp, sq, mb);
+ mtx_unlock(&sq->lock);
+ } else {
+ ret = drbr_enqueue(ifp, sq->br, mb);
+ taskqueue_enqueue(sq->sq_tq, &sq->sq_task);
+ }
+
+ return (ret);
+}
+
+void
+mlx5e_tx_cq_comp(struct mlx5_core_cq *mcq)
+{
+ struct mlx5e_sq *sq = container_of(mcq, struct mlx5e_sq, cq.mcq);
+
+ mtx_lock(&sq->comp_lock);
+ mlx5e_poll_tx_cq(sq, MLX5E_BUDGET_MAX);
+ mlx5e_cq_arm(&sq->cq);
+ mtx_unlock(&sq->comp_lock);
+}
+
+void
+mlx5e_tx_que(void *context, int pending)
+{
+ struct mlx5e_sq *sq = context;
+ struct ifnet *ifp = sq->channel->ifp;
+
+ if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
+ mtx_lock(&sq->lock);
+ if (!drbr_empty(ifp, sq->br))
+ mlx5e_xmit_locked(ifp, sq, NULL);
+ mtx_unlock(&sq->lock);
+ }
+}
diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_txrx.c b/sys/dev/mlx5/mlx5_en/mlx5_en_txrx.c
new file mode 100644
index 0000000..7836bfe
--- /dev/null
+++ b/sys/dev/mlx5/mlx5_en/mlx5_en_txrx.c
@@ -0,0 +1,58 @@
+/*-
+ * Copyright (c) 2015 Mellanox Technologies. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include "en.h"
+
+struct mlx5_cqe64 *
+mlx5e_get_cqe(struct mlx5e_cq *cq)
+{
+ struct mlx5_cqe64 *cqe;
+
+ cqe = mlx5_cqwq_get_wqe(&cq->wq, mlx5_cqwq_get_ci(&cq->wq));
+
+ if ((cqe->op_own ^ mlx5_cqwq_get_wrap_cnt(&cq->wq)) & MLX5_CQE_OWNER_MASK)
+ return (NULL);
+
+ mlx5_cqwq_pop(&cq->wq);
+
+ /* ensure cqe content is read after cqe ownership bit */
+ rmb();
+
+ return (cqe);
+}
+
+void
+mlx5e_cq_error_event(struct mlx5_core_cq *mcq, int event)
+{
+ struct mlx5e_cq *cq = container_of(mcq, struct mlx5e_cq, mcq);
+ struct mlx5e_channel *c = cq->channel;
+ struct mlx5e_priv *priv = c->priv;
+ struct ifnet *ifp = priv->ifp;
+
+ if_printf(ifp, "%s: cqn=0x%.6x event=0x%.2x\n",
+ __func__, mcq->cqn, event);
+}
diff --git a/sys/dev/mlx5/mlx5_en/tcp_tlro.c b/sys/dev/mlx5/mlx5_en/tcp_tlro.c
new file mode 100644
index 0000000..27e861e
--- /dev/null
+++ b/sys/dev/mlx5/mlx5_en/tcp_tlro.c
@@ -0,0 +1,697 @@
+/*-
+ * Copyright (c) 2015 Mellanox Technologies. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_inet.h"
+#include "opt_inet6.h"
+
+#include <sys/param.h>
+#include <sys/libkern.h>
+#include <sys/mbuf.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/sysctl.h>
+#include <sys/malloc.h>
+#include <sys/kernel.h>
+#include <sys/endian.h>
+#include <sys/socket.h>
+#include <sys/sockopt.h>
+#include <sys/smp.h>
+
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/ethernet.h>
+
+#if defined(INET) || defined(INET6)
+#include <netinet/in.h>
+#endif
+
+#ifdef INET
+#include <netinet/ip.h>
+#endif
+
+#ifdef INET6
+#include <netinet/ip6.h>
+#endif
+
+#include <netinet/tcp_var.h>
+
+#include "tcp_tlro.h"
+
+#ifndef M_HASHTYPE_LRO_TCP
+#ifndef KLD_MODULE
+#warning "M_HASHTYPE_LRO_TCP is not defined"
+#endif
+#define M_HASHTYPE_LRO_TCP 254
+#endif
+
+static SYSCTL_NODE(_net_inet_tcp, OID_AUTO, tlro,
+ CTLFLAG_RW, 0, "TCP turbo LRO parameters");
+
+static MALLOC_DEFINE(M_TLRO, "TLRO", "Turbo LRO");
+
+static int tlro_min_rate = 20; /* Hz */
+
+SYSCTL_INT(_net_inet_tcp_tlro, OID_AUTO, min_rate, CTLFLAG_RWTUN,
+ &tlro_min_rate, 0, "Minimum serving rate in Hz");
+
+static int tlro_max_packet = IP_MAXPACKET;
+
+SYSCTL_INT(_net_inet_tcp_tlro, OID_AUTO, max_packet, CTLFLAG_RWTUN,
+ &tlro_max_packet, 0, "Maximum packet size in bytes");
+
+typedef struct {
+ uint32_t value;
+} __packed uint32_p_t;
+
+static uint16_t
+tcp_tlro_csum(const uint32_p_t *p, size_t l)
+{
+ const uint32_p_t *pend = p + (l / 4);
+ uint64_t cs;
+
+ for (cs = 0; p != pend; p++)
+ cs += le32toh(p->value);
+ while (cs > 0xffff)
+ cs = (cs >> 16) + (cs & 0xffff);
+ return (cs);
+}
+
+static void *
+tcp_tlro_get_header(const struct mbuf *m, const u_int off,
+ const u_int len)
+{
+ if (m->m_len < (off + len))
+ return (NULL);
+ return (mtod(m, char *) + off);
+}
+
+static uint8_t
+tcp_tlro_info_save_timestamp(struct tlro_mbuf_data *pinfo)
+{
+ struct tcphdr *tcp = pinfo->tcp;
+ uint32_t *ts_ptr;
+
+ if (tcp->th_off < ((TCPOLEN_TSTAMP_APPA + sizeof(*tcp)) >> 2))
+ return (0);
+
+ ts_ptr = (uint32_t *)(tcp + 1);
+ if (*ts_ptr != ntohl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
+ (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP))
+ return (0);
+
+ /* save timestamps */
+ pinfo->tcp_ts = ts_ptr[1];
+ pinfo->tcp_ts_reply = ts_ptr[2];
+ return (1);
+}
+
+static void
+tcp_tlro_info_restore_timestamp(struct tlro_mbuf_data *pinfoa,
+ struct tlro_mbuf_data *pinfob)
+{
+ struct tcphdr *tcp = pinfoa->tcp;
+ uint32_t *ts_ptr;
+
+ if (tcp->th_off < ((TCPOLEN_TSTAMP_APPA + sizeof(*tcp)) >> 2))
+ return;
+
+ ts_ptr = (uint32_t *)(tcp + 1);
+ if (*ts_ptr != ntohl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
+ (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP))
+ return;
+
+ /* restore timestamps */
+ ts_ptr[1] = pinfob->tcp_ts;
+ ts_ptr[2] = pinfob->tcp_ts_reply;
+}
+
+static void
+tcp_tlro_extract_header(struct tlro_mbuf_data *pinfo, struct mbuf *m, int seq)
+{
+ uint8_t *phdr = (uint8_t *)pinfo->buf;
+ struct ether_header *eh;
+ struct ether_vlan_header *vlan;
+#ifdef INET
+ struct ip *ip;
+#endif
+#ifdef INET6
+ struct ip6_hdr *ip6;
+#endif
+ struct tcphdr *tcp;
+ uint16_t etype;
+ int diff;
+ int off;
+
+ /* fill in information */
+ pinfo->head = m;
+ pinfo->last_tick = ticks;
+ pinfo->sequence = seq;
+ pinfo->pprev = &m_last(m)->m_next;
+
+ off = sizeof(*eh);
+ if (m->m_len < off)
+ goto error;
+ eh = tcp_tlro_get_header(m, 0, sizeof(*eh));
+ if (eh == NULL)
+ goto error;
+ memcpy(phdr, &eh->ether_dhost, ETHER_ADDR_LEN);
+ phdr += ETHER_ADDR_LEN;
+ memcpy(phdr, &eh->ether_type, sizeof(eh->ether_type));
+ phdr += sizeof(eh->ether_type);
+ etype = ntohs(eh->ether_type);
+
+ if (etype == ETHERTYPE_VLAN) {
+ vlan = tcp_tlro_get_header(m, off, sizeof(*vlan));
+ if (vlan == NULL)
+ goto error;
+ memcpy(phdr, &vlan->evl_tag, sizeof(vlan->evl_tag) +
+ sizeof(vlan->evl_proto));
+ phdr += sizeof(vlan->evl_tag) + sizeof(vlan->evl_proto);
+ etype = ntohs(vlan->evl_proto);
+ off += sizeof(*vlan) - sizeof(*eh);
+ }
+ switch (etype) {
+#ifdef INET
+ case ETHERTYPE_IP:
+ /*
+ * Cannot LRO:
+ * - Non-IP packets
+ * - Fragmented packets
+ * - Packets with IPv4 options
+ * - Non-TCP packets
+ */
+ ip = tcp_tlro_get_header(m, off, sizeof(*ip));
+ if (ip == NULL ||
+ (ip->ip_off & htons(IP_MF | IP_OFFMASK)) != 0 ||
+ (ip->ip_p != IPPROTO_TCP) ||
+ (ip->ip_hl << 2) != sizeof(*ip))
+ goto error;
+
+ /* Legacy IP has a header checksum that needs to be correct */
+ if (!(m->m_pkthdr.csum_flags & CSUM_IP_CHECKED)) {
+ /* Verify IP header */
+ if (tcp_tlro_csum((uint32_p_t *)ip, sizeof(*ip)) != 0xFFFF)
+ m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED;
+ else
+ m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED |
+ CSUM_IP_VALID;
+ }
+ /* Only accept valid checksums */
+ if (!(m->m_pkthdr.csum_flags & CSUM_IP_VALID) ||
+ !(m->m_pkthdr.csum_flags & CSUM_DATA_VALID))
+ goto error;
+ memcpy(phdr, &ip->ip_src, sizeof(ip->ip_src) +
+ sizeof(ip->ip_dst));
+ phdr += sizeof(ip->ip_src) + sizeof(ip->ip_dst);
+ if (M_HASHTYPE_GET(m) == M_HASHTYPE_LRO_TCP)
+ pinfo->ip_len = m->m_pkthdr.len - off;
+ else
+ pinfo->ip_len = ntohs(ip->ip_len);
+ pinfo->ip_hdrlen = sizeof(*ip);
+ pinfo->ip.v4 = ip;
+ pinfo->ip_version = 4;
+ off += sizeof(*ip);
+ break;
+#endif
+#ifdef INET6
+ case ETHERTYPE_IPV6:
+ /*
+ * Cannot LRO:
+ * - Non-IP packets
+ * - Packets with IPv6 options
+ * - Non-TCP packets
+ */
+ ip6 = tcp_tlro_get_header(m, off, sizeof(*ip6));
+ if (ip6 == NULL || ip6->ip6_nxt != IPPROTO_TCP)
+ goto error;
+ if (!(m->m_pkthdr.csum_flags & CSUM_DATA_VALID))
+ goto error;
+ memcpy(phdr, &ip6->ip6_src, sizeof(struct in6_addr) +
+ sizeof(struct in6_addr));
+ phdr += sizeof(struct in6_addr) + sizeof(struct in6_addr);
+ if (M_HASHTYPE_GET(m) == M_HASHTYPE_LRO_TCP)
+ pinfo->ip_len = m->m_pkthdr.len - off;
+ else
+ pinfo->ip_len = ntohs(ip6->ip6_plen) + sizeof(*ip6);
+ pinfo->ip_hdrlen = sizeof(*ip6);
+ pinfo->ip.v6 = ip6;
+ pinfo->ip_version = 6;
+ off += sizeof(*ip6);
+ break;
+#endif
+ default:
+ goto error;
+ }
+ tcp = tcp_tlro_get_header(m, off, sizeof(*tcp));
+ if (tcp == NULL)
+ goto error;
+ memcpy(phdr, &tcp->th_sport, sizeof(tcp->th_sport) +
+ sizeof(tcp->th_dport));
+ phdr += sizeof(tcp->th_sport) +
+ sizeof(tcp->th_dport);
+ /* store TCP header length */
+ *phdr++ = tcp->th_off;
+ if (tcp->th_off < (sizeof(*tcp) >> 2))
+ goto error;
+
+ /* compute offset to data payload */
+ pinfo->tcp_len = (tcp->th_off << 2);
+ off += pinfo->tcp_len;
+
+ /* store more info */
+ pinfo->data_off = off;
+ pinfo->tcp = tcp;
+
+ /* try to save timestamp, if any */
+ *phdr++ = tcp_tlro_info_save_timestamp(pinfo);
+
+ /* verify offset and IP/TCP length */
+ if (off > m->m_pkthdr.len ||
+ pinfo->ip_len < pinfo->tcp_len)
+ goto error;
+
+ /* compute data payload length */
+ pinfo->data_len = (pinfo->ip_len - pinfo->tcp_len - pinfo->ip_hdrlen);
+
+ /* trim any padded data */
+ diff = (m->m_pkthdr.len - off) - pinfo->data_len;
+ if (diff != 0) {
+ if (diff < 0)
+ goto error;
+ else
+ m_adj(m, -diff);
+ }
+ /* compute header length */
+ pinfo->buf_length = phdr - (uint8_t *)pinfo->buf;
+ /* zero-pad rest of buffer */
+ memset(phdr, 0, TLRO_MAX_HEADER - pinfo->buf_length);
+ return;
+error:
+ pinfo->buf_length = 0;
+}
+
+static int
+tcp_tlro_cmp64(const uint64_t *pa, const uint64_t *pb)
+{
+ int64_t diff = 0;
+ unsigned x;
+
+ for (x = 0; x != TLRO_MAX_HEADER / 8; x++) {
+ /*
+ * NOTE: Endianness does not matter in this
+ * comparisation:
+ */
+ diff = pa[x] - pb[x];
+ if (diff != 0)
+ goto done;
+ }
+done:
+ if (diff < 0)
+ return (-1);
+ else if (diff > 0)
+ return (1);
+ return (0);
+}
+
+static int
+tcp_tlro_compare_header(const void *_ppa, const void *_ppb)
+{
+ const struct tlro_mbuf_ptr *ppa = _ppa;
+ const struct tlro_mbuf_ptr *ppb = _ppb;
+ struct tlro_mbuf_data *pinfoa = ppa->data;
+ struct tlro_mbuf_data *pinfob = ppb->data;
+ int ret;
+
+ ret = (pinfoa->head == NULL) - (pinfob->head == NULL);
+ if (ret != 0)
+ goto done;
+
+ ret = pinfoa->buf_length - pinfob->buf_length;
+ if (ret != 0)
+ goto done;
+ if (pinfoa->buf_length != 0) {
+ ret = tcp_tlro_cmp64(pinfoa->buf, pinfob->buf);
+ if (ret != 0)
+ goto done;
+ ret = ntohl(pinfoa->tcp->th_seq) - ntohl(pinfob->tcp->th_seq);
+ if (ret != 0)
+ goto done;
+ ret = ntohl(pinfoa->tcp->th_ack) - ntohl(pinfob->tcp->th_ack);
+ if (ret != 0)
+ goto done;
+ ret = pinfoa->sequence - pinfob->sequence;
+ if (ret != 0)
+ goto done;
+ }
+done:
+ return (ret);
+}
+
+static void
+tcp_tlro_sort(struct tlro_ctrl *tlro)
+{
+ if (tlro->curr == 0)
+ return;
+
+ qsort(tlro->mbuf, tlro->curr, sizeof(struct tlro_mbuf_ptr),
+ &tcp_tlro_compare_header);
+}
+
+static int
+tcp_tlro_get_ticks(void)
+{
+ int to = tlro_min_rate;
+
+ if (to < 1)
+ to = 1;
+ to = hz / to;
+ if (to < 1)
+ to = 1;
+ return (to);
+}
+
+static void
+tcp_tlro_combine(struct tlro_ctrl *tlro, int force)
+{
+ struct tlro_mbuf_data *pinfoa;
+ struct tlro_mbuf_data *pinfob;
+ uint32_t cs;
+ int curr_ticks = ticks;
+ int ticks_limit = tcp_tlro_get_ticks();
+ unsigned x;
+ unsigned y;
+ unsigned z;
+ int temp;
+
+ if (tlro->curr == 0)
+ return;
+
+ for (y = 0; y != tlro->curr;) {
+ struct mbuf *m;
+
+ pinfoa = tlro->mbuf[y].data;
+ for (x = y + 1; x != tlro->curr; x++) {
+ pinfob = tlro->mbuf[x].data;
+ if (pinfoa->buf_length != pinfob->buf_length ||
+ tcp_tlro_cmp64(pinfoa->buf, pinfob->buf) != 0)
+ break;
+ }
+ if (pinfoa->buf_length == 0) {
+ /* forward traffic which cannot be combined */
+ for (z = y; z != x; z++) {
+ /* just forward packets */
+ pinfob = tlro->mbuf[z].data;
+
+ m = pinfob->head;
+
+ /* reset info structure */
+ pinfob->head = NULL;
+ pinfob->buf_length = 0;
+
+ /* do stats */
+ tlro->lro_flushed++;
+
+ /* input packet to network layer */
+ (*tlro->ifp->if_input) (tlro->ifp, m);
+ }
+ y = z;
+ continue;
+ }
+
+ /* compute current checksum subtracted some header parts */
+ temp = (pinfoa->ip_len - pinfoa->ip_hdrlen);
+ cs = ((temp & 0xFF) << 8) + ((temp & 0xFF00) >> 8) +
+ tcp_tlro_csum((uint32_p_t *)pinfoa->tcp, pinfoa->tcp_len);
+
+ /* append all fragments into one block */
+ for (z = y + 1; z != x; z++) {
+
+ pinfob = tlro->mbuf[z].data;
+
+ /* check for command packets */
+ if ((pinfoa->tcp->th_flags & ~(TH_ACK | TH_PUSH)) ||
+ (pinfob->tcp->th_flags & ~(TH_ACK | TH_PUSH)))
+ break;
+
+ /* check if there is enough space */
+ if ((pinfoa->ip_len + pinfob->data_len) > tlro_max_packet)
+ break;
+
+ /* try to append the new segment */
+ temp = ntohl(pinfoa->tcp->th_seq) + pinfoa->data_len;
+ if (temp != (int)ntohl(pinfob->tcp->th_seq))
+ break;
+
+ temp = pinfob->ip_len - pinfob->ip_hdrlen;
+ cs += ((temp & 0xFF) << 8) + ((temp & 0xFF00) >> 8) +
+ tcp_tlro_csum((uint32_p_t *)pinfob->tcp, pinfob->tcp_len);
+ /* remove fields which appear twice */
+ cs += (IPPROTO_TCP << 8);
+ if (pinfob->ip_version == 4) {
+ cs += tcp_tlro_csum((uint32_p_t *)&pinfob->ip.v4->ip_src, 4);
+ cs += tcp_tlro_csum((uint32_p_t *)&pinfob->ip.v4->ip_dst, 4);
+ } else {
+ cs += tcp_tlro_csum((uint32_p_t *)&pinfob->ip.v6->ip6_src, 16);
+ cs += tcp_tlro_csum((uint32_p_t *)&pinfob->ip.v6->ip6_dst, 16);
+ }
+ /* remainder computation */
+ while (cs > 0xffff)
+ cs = (cs >> 16) + (cs & 0xffff);
+
+ /* update window and ack sequence number */
+ pinfoa->tcp->th_ack = pinfob->tcp->th_ack;
+ pinfoa->tcp->th_win = pinfob->tcp->th_win;
+
+ /* check if we should restore the timestamp */
+ tcp_tlro_info_restore_timestamp(pinfoa, pinfob);
+
+ /* accumulate TCP flags */
+ pinfoa->tcp->th_flags |= pinfob->tcp->th_flags;
+
+ /* update lengths */
+ pinfoa->ip_len += pinfob->data_len;
+ pinfoa->data_len += pinfob->data_len;
+
+ /* clear mbuf pointer - packet is accumulated */
+ m = pinfob->head;
+
+ /* reset info structure */
+ pinfob->head = NULL;
+ pinfob->buf_length = 0;
+
+ /* append data to mbuf [y] */
+ m_adj(m, pinfob->data_off);
+ /* delete mbuf tags, if any */
+ m_tag_delete_chain(m, NULL);
+ /* clear packet header flag */
+ m->m_flags &= ~M_PKTHDR;
+
+ /* concat mbuf(s) to end of list */
+ pinfoa->pprev[0] = m;
+ m = m_last(m);
+ pinfoa->pprev = &m->m_next;
+ pinfoa->head->m_pkthdr.len += pinfob->data_len;
+ }
+ /* compute new TCP header checksum */
+ pinfoa->tcp->th_sum = 0;
+
+ temp = pinfoa->ip_len - pinfoa->ip_hdrlen;
+ cs = (cs ^ 0xFFFF) +
+ tcp_tlro_csum((uint32_p_t *)pinfoa->tcp, pinfoa->tcp_len) +
+ ((temp & 0xFF) << 8) + ((temp & 0xFF00) >> 8);
+
+ /* remainder computation */
+ while (cs > 0xffff)
+ cs = (cs >> 16) + (cs & 0xffff);
+
+ /* update new checksum */
+ pinfoa->tcp->th_sum = ~htole16(cs);
+
+ /* update IP length, if any */
+ if (pinfoa->ip_version == 4) {
+ if (pinfoa->ip_len > IP_MAXPACKET) {
+ M_HASHTYPE_SET(pinfoa->head, M_HASHTYPE_LRO_TCP);
+ pinfoa->ip.v4->ip_len = htons(IP_MAXPACKET);
+ } else {
+ pinfoa->ip.v4->ip_len = htons(pinfoa->ip_len);
+ }
+ } else {
+ if (pinfoa->ip_len > (IP_MAXPACKET + sizeof(*pinfoa->ip.v6))) {
+ M_HASHTYPE_SET(pinfoa->head, M_HASHTYPE_LRO_TCP);
+ pinfoa->ip.v6->ip6_plen = htons(IP_MAXPACKET);
+ } else {
+ temp = pinfoa->ip_len - sizeof(*pinfoa->ip.v6);
+ pinfoa->ip.v6->ip6_plen = htons(temp);
+ }
+ }
+
+ temp = curr_ticks - pinfoa->last_tick;
+ /* check if packet should be forwarded */
+ if (force != 0 || z != x || temp >= ticks_limit ||
+ pinfoa->data_len == 0) {
+
+ /* compute new IPv4 header checksum */
+ if (pinfoa->ip_version == 4) {
+ pinfoa->ip.v4->ip_sum = 0;
+ cs = tcp_tlro_csum((uint32_p_t *)pinfoa->ip.v4,
+ sizeof(*pinfoa->ip.v4));
+ pinfoa->ip.v4->ip_sum = ~htole16(cs);
+ }
+ /* forward packet */
+ m = pinfoa->head;
+
+ /* reset info structure */
+ pinfoa->head = NULL;
+ pinfoa->buf_length = 0;
+
+ /* do stats */
+ tlro->lro_flushed++;
+
+ /* input packet to network layer */
+ (*tlro->ifp->if_input) (tlro->ifp, m);
+ }
+ y = z;
+ }
+
+ /* cleanup all NULL heads */
+ for (y = 0; y != tlro->curr; y++) {
+ if (tlro->mbuf[y].data->head == NULL) {
+ for (z = y + 1; z != tlro->curr; z++) {
+ struct tlro_mbuf_ptr ptemp;
+ if (tlro->mbuf[z].data->head == NULL)
+ continue;
+ ptemp = tlro->mbuf[y];
+ tlro->mbuf[y] = tlro->mbuf[z];
+ tlro->mbuf[z] = ptemp;
+ y++;
+ }
+ break;
+ }
+ }
+ tlro->curr = y;
+}
+
+static void
+tcp_tlro_cleanup(struct tlro_ctrl *tlro)
+{
+ while (tlro->curr != 0 &&
+ tlro->mbuf[tlro->curr - 1].data->head == NULL)
+ tlro->curr--;
+}
+
+void
+tcp_tlro_flush(struct tlro_ctrl *tlro, int force)
+{
+ if (tlro->curr == 0)
+ return;
+
+ tcp_tlro_sort(tlro);
+ tcp_tlro_cleanup(tlro);
+ tcp_tlro_combine(tlro, force);
+}
+
+int
+tcp_tlro_init(struct tlro_ctrl *tlro, struct ifnet *ifp,
+ int max_mbufs)
+{
+ ssize_t size;
+ uint32_t x;
+
+ /* set zero defaults */
+ memset(tlro, 0, sizeof(*tlro));
+
+ /* compute size needed for data */
+ size = (sizeof(struct tlro_mbuf_ptr) * max_mbufs) +
+ (sizeof(struct tlro_mbuf_data) * max_mbufs);
+
+ /* range check */
+ if (max_mbufs <= 0 || size <= 0 || ifp == NULL)
+ return (EINVAL);
+
+ /* setup tlro control structure */
+ tlro->mbuf = malloc(size, M_TLRO, M_WAITOK | M_ZERO);
+ tlro->max = max_mbufs;
+ tlro->ifp = ifp;
+
+ /* setup pointer array */
+ for (x = 0; x != tlro->max; x++) {
+ tlro->mbuf[x].data = ((struct tlro_mbuf_data *)
+ &tlro->mbuf[max_mbufs]) + x;
+ }
+ return (0);
+}
+
+void
+tcp_tlro_free(struct tlro_ctrl *tlro)
+{
+ struct tlro_mbuf_data *pinfo;
+ struct mbuf *m;
+ uint32_t y;
+
+ /* check if not setup */
+ if (tlro->mbuf == NULL)
+ return;
+ /* free MBUF array and any leftover MBUFs */
+ for (y = 0; y != tlro->max; y++) {
+
+ pinfo = tlro->mbuf[y].data;
+
+ m = pinfo->head;
+
+ /* reset info structure */
+ pinfo->head = NULL;
+ pinfo->buf_length = 0;
+
+ m_freem(m);
+ }
+ free(tlro->mbuf, M_TLRO);
+ /* reset buffer */
+ memset(tlro, 0, sizeof(*tlro));
+}
+
+void
+tcp_tlro_rx(struct tlro_ctrl *tlro, struct mbuf *m)
+{
+ if (m->m_len > 0 && tlro->curr < tlro->max) {
+ /* do stats */
+ tlro->lro_queued++;
+
+ /* extract header */
+ tcp_tlro_extract_header(tlro->mbuf[tlro->curr++].data,
+ m, tlro->sequence++);
+ } else if (tlro->ifp != NULL) {
+ /* do stats */
+ tlro->lro_flushed++;
+
+ /* input packet to network layer */
+ (*tlro->ifp->if_input) (tlro->ifp, m);
+ } else {
+ /* packet drop */
+ m_freem(m);
+ }
+}
diff --git a/sys/dev/mlx5/mlx5_en/tcp_tlro.h b/sys/dev/mlx5/mlx5_en/tcp_tlro.h
new file mode 100644
index 0000000..1e605d5
--- /dev/null
+++ b/sys/dev/mlx5/mlx5_en/tcp_tlro.h
@@ -0,0 +1,83 @@
+/*-
+ * Copyright (c) 2015 Mellanox Technologies. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _TCP_TLRO_H_
+#define _TCP_TLRO_H_
+
+#define TLRO_MAX_HEADER 64 /* bytes */
+
+struct ip;
+struct ip6_hdr;
+struct tcphdr;
+
+struct tlro_mbuf_data {
+ union {
+#ifdef INET
+ struct ip *v4;
+#endif
+#ifdef INET6
+ struct ip6_hdr *v6;
+#endif
+ } ip;
+ struct tcphdr *tcp;
+ struct mbuf *head;
+ struct mbuf **pprev;
+ int last_tick;
+ int sequence;
+ int data_len;
+ int data_off;
+ int ip_hdrlen;
+ int ip_len;
+ uint32_t tcp_ts;
+ uint32_t tcp_ts_reply;
+ uint16_t tcp_len;
+ uint8_t ip_version;
+ uint8_t buf_length; /* in 32-bit words */
+ uint64_t buf[TLRO_MAX_HEADER / 8];
+} __aligned(256);
+
+struct tlro_mbuf_ptr {
+ struct tlro_mbuf_data *data;
+};
+
+/* NB: This is part of driver structs */
+struct tlro_ctrl {
+ struct ifnet *ifp;
+ struct tlro_mbuf_ptr *mbuf;
+ uint64_t lro_queued;
+ uint64_t lro_flushed;
+ uint32_t max;
+ uint32_t curr;
+ int sequence;
+};
+
+int tcp_tlro_init(struct tlro_ctrl *, struct ifnet *, int);
+void tcp_tlro_free(struct tlro_ctrl *);
+void tcp_tlro_flush(struct tlro_ctrl *, int);
+void tcp_tlro_rx(struct tlro_ctrl *, struct mbuf *);
+
+#endif /* _TCP_TLRO_H_ */
OpenPOWER on IntegriCloud