diff options
Diffstat (limited to 'sys/dev/mlx5/mlx5_en')
-rw-r--r-- | sys/dev/mlx5/mlx5_en/en.h | 781 | ||||
-rw-r--r-- | sys/dev/mlx5/mlx5_en/mlx5_en_ethtool.c | 493 | ||||
-rw-r--r-- | sys/dev/mlx5/mlx5_en/mlx5_en_flow_table.c | 870 | ||||
-rw-r--r-- | sys/dev/mlx5/mlx5_en/mlx5_en_main.c | 2902 | ||||
-rw-r--r-- | sys/dev/mlx5/mlx5_en/mlx5_en_rx.c | 340 | ||||
-rw-r--r-- | sys/dev/mlx5/mlx5_en/mlx5_en_tx.c | 485 | ||||
-rw-r--r-- | sys/dev/mlx5/mlx5_en/mlx5_en_txrx.c | 58 | ||||
-rw-r--r-- | sys/dev/mlx5/mlx5_en/tcp_tlro.c | 697 | ||||
-rw-r--r-- | sys/dev/mlx5/mlx5_en/tcp_tlro.h | 83 |
9 files changed, 6709 insertions, 0 deletions
diff --git a/sys/dev/mlx5/mlx5_en/en.h b/sys/dev/mlx5/mlx5_en/en.h new file mode 100644 index 0000000..2988db3 --- /dev/null +++ b/sys/dev/mlx5/mlx5_en/en.h @@ -0,0 +1,781 @@ +/*- + * Copyright (c) 2015 Mellanox Technologies. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _MLX5_EN_H_ +#define _MLX5_EN_H_ + +#include <linux/kmod.h> +#include <linux/page.h> +#include <linux/slab.h> +#include <linux/if_vlan.h> +#include <linux/if_ether.h> +#include <linux/vmalloc.h> +#include <linux/moduleparam.h> +#include <linux/delay.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> + +#include <netinet/in_systm.h> +#include <netinet/in.h> +#include <netinet/if_ether.h> +#include <netinet/ip.h> +#include <netinet/ip6.h> +#include <netinet/tcp.h> +#include <netinet/tcp_lro.h> +#include <netinet/udp.h> +#include <net/ethernet.h> +#include <sys/buf_ring.h> + +#include <machine/bus.h> + +#ifdef HAVE_TURBO_LRO +#include "tcp_tlro.h" +#endif + +#include <dev/mlx5/driver.h> +#include <dev/mlx5/qp.h> +#include <dev/mlx5/cq.h> +#include <dev/mlx5/vport.h> + +#include <dev/mlx5/mlx5_core/wq.h> +#include <dev/mlx5/mlx5_core/transobj.h> +#include <dev/mlx5/mlx5_core/mlx5_core.h> + +#define MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE 0x7 +#define MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE 0xa +#define MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE 0xd + +#define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE 0x7 +#define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE 0xa +#define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE 0xd + +/* freeBSD HW LRO is limited by 16KB - the size of max mbuf */ +#define MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ MJUM16BYTES +#define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC 0x10 +#define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE 0x3 +#define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS 0x20 +#define MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC 0x10 +#define MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS 0x20 +#define MLX5E_PARAMS_DEFAULT_MIN_RX_WQES 0x80 +#define MLX5E_PARAMS_DEFAULT_RX_HASH_LOG_TBL_SZ 0x7 +#define MLX5E_CACHELINE_SIZE CACHE_LINE_SIZE +#define MLX5E_HW2SW_MTU(hwmtu) \ + ((hwmtu) - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + ETHER_CRC_LEN)) +#define MLX5E_SW2HW_MTU(swmtu) \ + ((swmtu) + (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + ETHER_CRC_LEN)) +#define MLX5E_SW2MB_MTU(swmtu) \ + (MLX5E_SW2HW_MTU(swmtu) + MLX5E_NET_IP_ALIGN) +#define MLX5E_MTU_MIN 72 /* Min MTU allowed by the kernel */ +#define MLX5E_MTU_MAX MIN(ETHERMTU_JUMBO, MJUM16BYTES) /* Max MTU of Ethernet jumbo frames */ + +#define MLX5E_BUDGET_MAX 8192 /* RX and TX */ +#define MLX5E_RX_BUDGET_MAX 256 +#define MLX5E_SQ_BF_BUDGET 16 +#define MLX5E_SQ_TX_QUEUE_SIZE 4096 /* SQ drbr queue size */ + +#define MLX5E_MAX_TX_NUM_TC 8 /* units */ +#define MLX5E_MAX_TX_HEADER 128 /* bytes */ +#define MLX5E_MAX_TX_PAYLOAD_SIZE 65536 /* bytes */ +#define MLX5E_MAX_TX_MBUF_SIZE 65536 /* bytes */ +#define MLX5E_MAX_TX_MBUF_FRAGS \ + ((MLX5_SEND_WQE_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS) - \ + (MLX5E_MAX_TX_HEADER / MLX5_SEND_WQE_DS)) /* units */ +#define MLX5E_MAX_TX_INLINE \ + (MLX5E_MAX_TX_HEADER - sizeof(struct mlx5e_tx_wqe) + \ + sizeof(((struct mlx5e_tx_wqe *)0)->eth.inline_hdr_start)) /* bytes */ + +MALLOC_DECLARE(M_MLX5EN); + +struct mlx5_core_dev; +struct mlx5e_cq; + +typedef void (mlx5e_cq_comp_t)(struct mlx5_core_cq *); + +#define MLX5E_STATS_COUNT(a,b,c,d) a +#define MLX5E_STATS_VAR(a,b,c,d) b; +#define MLX5E_STATS_DESC(a,b,c,d) c, d, + +#define MLX5E_VPORT_STATS(m) \ + /* HW counters */ \ + m(+1, u64 rx_packets, "rx_packets", "Received packets") \ + m(+1, u64 rx_bytes, "rx_bytes", "Received bytes") \ + m(+1, u64 tx_packets, "tx_packets", "Transmitted packets") \ + m(+1, u64 tx_bytes, "tx_bytes", "Transmitted bytes") \ + m(+1, u64 rx_error_packets, "rx_error_packets", "Received error packets") \ + m(+1, u64 rx_error_bytes, "rx_error_bytes", "Received error bytes") \ + m(+1, u64 tx_error_packets, "tx_error_packets", "Transmitted error packets") \ + m(+1, u64 tx_error_bytes, "tx_error_bytes", "Transmitted error bytes") \ + m(+1, u64 rx_unicast_packets, "rx_unicast_packets", "Received unicast packets") \ + m(+1, u64 rx_unicast_bytes, "rx_unicast_bytes", "Received unicast bytes") \ + m(+1, u64 tx_unicast_packets, "tx_unicast_packets", "Transmitted unicast packets") \ + m(+1, u64 tx_unicast_bytes, "tx_unicast_bytes", "Transmitted unicast bytes") \ + m(+1, u64 rx_multicast_packets, "rx_multicast_packets", "Received multicast packets") \ + m(+1, u64 rx_multicast_bytes, "rx_multicast_bytes", "Received multicast bytes") \ + m(+1, u64 tx_multicast_packets, "tx_multicast_packets", "Transmitted multicast packets") \ + m(+1, u64 tx_multicast_bytes, "tx_multicast_bytes", "Transmitted multicast bytes") \ + m(+1, u64 rx_broadcast_packets, "rx_broadcast_packets", "Received broadcast packets") \ + m(+1, u64 rx_broadcast_bytes, "rx_broadcast_bytes", "Received broadcast bytes") \ + m(+1, u64 tx_broadcast_packets, "tx_broadcast_packets", "Transmitted broadcast packets") \ + m(+1, u64 tx_broadcast_bytes, "tx_broadcast_bytes", "Transmitted broadcast bytes") \ + /* SW counters */ \ + m(+1, u64 tso_packets, "tso_packets", "Transmitted TSO packets") \ + m(+1, u64 tso_bytes, "tso_bytes", "Transmitted TSO bytes") \ + m(+1, u64 lro_packets, "lro_packets", "Received LRO packets") \ + m(+1, u64 lro_bytes, "lro_bytes", "Received LRO bytes") \ + m(+1, u64 sw_lro_queued, "sw_lro_queued", "Packets queued for SW LRO") \ + m(+1, u64 sw_lro_flushed, "sw_lro_flushed", "Packets flushed from SW LRO") \ + m(+1, u64 rx_csum_good, "rx_csum_good", "Received checksum valid packets") \ + m(+1, u64 rx_csum_none, "rx_csum_none", "Received no checksum packets") \ + m(+1, u64 tx_csum_offload, "tx_csum_offload", "Transmit checksum offload packets") \ + m(+1, u64 tx_queue_dropped, "tx_queue_dropped", "Transmit queue dropped") \ + m(+1, u64 tx_defragged, "tx_defragged", "Transmit queue defragged") \ + m(+1, u64 rx_wqe_err, "rx_wqe_err", "Receive WQE errors") + +#define MLX5E_VPORT_STATS_NUM (0 MLX5E_VPORT_STATS(MLX5E_STATS_COUNT)) + +struct mlx5e_vport_stats { + struct sysctl_ctx_list ctx; + u64 arg [0]; + MLX5E_VPORT_STATS(MLX5E_STATS_VAR) +}; + +#define MLX5E_PPORT_IEEE802_3_STATS(m) \ + m(+1, u64 frames_tx, "frames_tx", "Frames transmitted") \ + m(+1, u64 frames_rx, "frames_rx", "Frames received") \ + m(+1, u64 check_seq_err, "check_seq_err", "Sequence errors") \ + m(+1, u64 alignment_err, "alignment_err", "Alignment errors") \ + m(+1, u64 octets_tx, "octets_tx", "Bytes transmitted") \ + m(+1, u64 octets_received, "octets_received", "Bytes received") \ + m(+1, u64 multicast_xmitted, "multicast_xmitted", "Multicast transmitted") \ + m(+1, u64 broadcast_xmitted, "broadcast_xmitted", "Broadcast transmitted") \ + m(+1, u64 multicast_rx, "multicast_rx", "Multicast received") \ + m(+1, u64 broadcast_rx, "broadcast_rx", "Broadcast received") \ + m(+1, u64 in_range_len_errors, "in_range_len_errors", "In range length errors") \ + m(+1, u64 out_of_range_len, "out_of_range_len", "Out of range length errors") \ + m(+1, u64 too_long_errors, "too_long_errors", "Too long errors") \ + m(+1, u64 symbol_err, "symbol_err", "Symbol errors") \ + m(+1, u64 mac_control_tx, "mac_control_tx", "MAC control transmitted") \ + m(+1, u64 mac_control_rx, "mac_control_rx", "MAC control received") \ + m(+1, u64 unsupported_op_rx, "unsupported_op_rx", "Unsupported operation received") \ + m(+1, u64 pause_ctrl_rx, "pause_ctrl_rx", "Pause control received") \ + m(+1, u64 pause_ctrl_tx, "pause_ctrl_tx", "Pause control transmitted") + +#define MLX5E_PPORT_RFC2819_STATS(m) \ + m(+1, u64 drop_events, "drop_events", "Dropped events") \ + m(+1, u64 octets, "octets", "Octets") \ + m(+1, u64 pkts, "pkts", "Packets") \ + m(+1, u64 broadcast_pkts, "broadcast_pkts", "Broadcast packets") \ + m(+1, u64 multicast_pkts, "multicast_pkts", "Multicast packets") \ + m(+1, u64 crc_align_errors, "crc_align_errors", "CRC alignment errors") \ + m(+1, u64 undersize_pkts, "undersize_pkts", "Undersized packets") \ + m(+1, u64 oversize_pkts, "oversize_pkts", "Oversized packets") \ + m(+1, u64 fragments, "fragments", "Fragments") \ + m(+1, u64 jabbers, "jabbers", "Jabbers") \ + m(+1, u64 collisions, "collisions", "Collisions") + +#define MLX5E_PPORT_RFC2819_STATS_DEBUG(m) \ + m(+1, u64 p64octets, "p64octets", "Bytes") \ + m(+1, u64 p65to127octets, "p65to127octets", "Bytes") \ + m(+1, u64 p128to255octets, "p128to255octets", "Bytes") \ + m(+1, u64 p256to511octets, "p256to511octets", "Bytes") \ + m(+1, u64 p512to1023octets, "p512to1023octets", "Bytes") \ + m(+1, u64 p1024to1518octets, "p1024to1518octets", "Bytes") \ + m(+1, u64 p1519to2047octets, "p1519to2047octets", "Bytes") \ + m(+1, u64 p2048to4095octets, "p2048to4095octets", "Bytes") \ + m(+1, u64 p4096to8191octets, "p4096to8191octets", "Bytes") \ + m(+1, u64 p8192to10239octets, "p8192to10239octets", "Bytes") + +#define MLX5E_PPORT_RFC2863_STATS_DEBUG(m) \ + m(+1, u64 in_octets, "in_octets", "In octets") \ + m(+1, u64 in_ucast_pkts, "in_ucast_pkts", "In unicast packets") \ + m(+1, u64 in_discards, "in_discards", "In discards") \ + m(+1, u64 in_errors, "in_errors", "In errors") \ + m(+1, u64 in_unknown_protos, "in_unknown_protos", "In unknown protocols") \ + m(+1, u64 out_octets, "out_octets", "Out octets") \ + m(+1, u64 out_ucast_pkts, "out_ucast_pkts", "Out unicast packets") \ + m(+1, u64 out_discards, "out_discards", "Out discards") \ + m(+1, u64 out_errors, "out_errors", "Out errors") \ + m(+1, u64 in_multicast_pkts, "in_multicast_pkts", "In multicast packets") \ + m(+1, u64 in_broadcast_pkts, "in_broadcast_pkts", "In broadcast packets") \ + m(+1, u64 out_multicast_pkts, "out_multicast_pkts", "Out multicast packets") \ + m(+1, u64 out_broadcast_pkts, "out_broadcast_pkts", "Out broadcast packets") + +#define MLX5E_PPORT_PHYSICAL_LAYER_STATS_DEBUG(m) \ + m(+1, u64 time_since_last_clear, "time_since_last_clear", \ + "Time since the last counters clear event (msec)") \ + m(+1, u64 symbol_errors, "symbol_errors", "Symbol errors") \ + m(+1, u64 sync_headers_errors, "sync_headers_errors", "Sync header error counter") \ + m(+1, u64 bip_errors_lane0, "edpl_bip_errors_lane0", \ + "Indicates the number of PRBS errors on lane 0") \ + m(+1, u64 bip_errors_lane1, "edpl_bip_errors_lane1", \ + "Indicates the number of PRBS errors on lane 1") \ + m(+1, u64 bip_errors_lane2, "edpl_bip_errors_lane2", \ + "Indicates the number of PRBS errors on lane 2") \ + m(+1, u64 bip_errors_lane3, "edpl_bip_errors_lane3", \ + "Indicates the number of PRBS errors on lane 3") \ + m(+1, u64 fc_corrected_blocks_lane0, "fc_corrected_blocks_lane0", \ + "FEC correctable block counter lane 0") \ + m(+1, u64 fc_corrected_blocks_lane1, "fc_corrected_blocks_lane1", \ + "FEC correctable block counter lane 1") \ + m(+1, u64 fc_corrected_blocks_lane2, "fc_corrected_blocks_lane2", \ + "FEC correctable block counter lane 2") \ + m(+1, u64 fc_corrected_blocks_lane3, "fc_corrected_blocks_lane3", \ + "FEC correctable block counter lane 3") \ + m(+1, u64 rs_corrected_blocks, "rs_corrected_blocks", \ + "FEC correcable block counter") \ + m(+1, u64 rs_uncorrectable_blocks, "rs_uncorrectable_blocks", \ + "FEC uncorrecable block counter") \ + m(+1, u64 rs_no_errors_blocks, "rs_no_errors_blocks", \ + "The number of RS-FEC blocks received that had no errors") \ + m(+1, u64 rs_single_error_blocks, "rs_single_error_blocks", \ + "The number of corrected RS-FEC blocks received that had" \ + "exactly 1 error symbol") \ + m(+1, u64 rs_corrected_symbols_total, "rs_corrected_symbols_total", \ + "Port FEC corrected symbol counter") \ + m(+1, u64 rs_corrected_symbols_lane0, "rs_corrected_symbols_lane0", \ + "FEC corrected symbol counter lane 0") \ + m(+1, u64 rs_corrected_symbols_lane1, "rs_corrected_symbols_lane1", \ + "FEC corrected symbol counter lane 1") \ + m(+1, u64 rs_corrected_symbols_lane2, "rs_corrected_symbols_lane2", \ + "FEC corrected symbol counter lane 2") \ + m(+1, u64 rs_corrected_symbols_lane3, "rs_corrected_symbols_lane3", \ + "FEC corrected symbol counter lane 3") \ + +#define MLX5E_PPORT_Q_CONTERS(m) \ + m(+1, u64 out_of_rx_buffer, "out_of_rx_buffer", "out of rx buffers aka no recv wqes events") + +/* + * Make sure to update mlx5e_update_pport_counters() + * when adding a new MLX5E_PPORT_STATS block + */ +#define MLX5E_PPORT_STATS(m) \ + MLX5E_PPORT_IEEE802_3_STATS(m) \ + MLX5E_PPORT_RFC2819_STATS(m) \ + MLX5E_PPORT_Q_CONTERS(m) + +#define MLX5E_PORT_STATS_DEBUG(m) \ + MLX5E_PPORT_RFC2819_STATS_DEBUG(m) \ + MLX5E_PPORT_RFC2863_STATS_DEBUG(m) \ + MLX5E_PPORT_PHYSICAL_LAYER_STATS_DEBUG(m) + +#define MLX5E_PPORT_IEEE802_3_STATS_NUM \ + (0 MLX5E_PPORT_IEEE802_3_STATS(MLX5E_STATS_COUNT)) +#define MLX5E_PPORT_RFC2819_STATS_NUM \ + (0 MLX5E_PPORT_RFC2819_STATS(MLX5E_STATS_COUNT)) +#define MLX5E_PPORT_STATS_NUM \ + (0 MLX5E_PPORT_STATS(MLX5E_STATS_COUNT)) + +#define MLX5E_PPORT_RFC2819_STATS_DEBUG_NUM \ + (0 MLX5E_PPORT_RFC2819_STATS_DEBUG(MLX5E_STATS_COUNT)) +#define MLX5E_PPORT_RFC2863_STATS_DEBUG_NUM \ + (0 MLX5E_PPORT_RFC2863_STATS_DEBUG(MLX5E_STATS_COUNT)) +#define MLX5E_PPORT_PHYSICAL_LAYER_STATS_DEBUG_NUM \ + (0 MLX5E_PPORT_PHYSICAL_LAYER_STATS_DEBUG(MLX5E_STATS_COUNT)) +#define MLX5E_PORT_STATS_DEBUG_NUM \ + (0 MLX5E_PORT_STATS_DEBUG(MLX5E_STATS_COUNT)) + +struct mlx5e_pport_stats { + struct sysctl_ctx_list ctx; + u64 arg [0]; + MLX5E_PPORT_STATS(MLX5E_STATS_VAR) +}; + +struct mlx5e_port_stats_debug { + struct sysctl_ctx_list ctx; + u64 arg [0]; + MLX5E_PORT_STATS_DEBUG(MLX5E_STATS_VAR) +}; + +#define MLX5E_RQ_STATS(m) \ + m(+1, u64 packets, "packets", "Received packets") \ + m(+1, u64 csum_none, "csum_none", "Received packets") \ + m(+1, u64 lro_packets, "lro_packets", "Received packets") \ + m(+1, u64 lro_bytes, "lro_bytes", "Received packets") \ + m(+1, u64 sw_lro_queued, "sw_lro_queued", "Packets queued for SW LRO") \ + m(+1, u64 sw_lro_flushed, "sw_lro_flushed", "Packets flushed from SW LRO") \ + m(+1, u64 wqe_err, "wqe_err", "Received packets") + +#define MLX5E_RQ_STATS_NUM (0 MLX5E_RQ_STATS(MLX5E_STATS_COUNT)) + +struct mlx5e_rq_stats { + struct sysctl_ctx_list ctx; + u64 arg [0]; + MLX5E_RQ_STATS(MLX5E_STATS_VAR) +}; + +#define MLX5E_SQ_STATS(m) \ + m(+1, u64 packets, "packets", "Transmitted packets") \ + m(+1, u64 tso_packets, "tso_packets", "Transmitted packets") \ + m(+1, u64 tso_bytes, "tso_bytes", "Transmitted bytes") \ + m(+1, u64 csum_offload_none, "csum_offload_none", "Transmitted packets") \ + m(+1, u64 defragged, "defragged", "Transmitted packets") \ + m(+1, u64 dropped, "dropped", "Transmitted packets") \ + m(+1, u64 nop, "nop", "Transmitted packets") + +#define MLX5E_SQ_STATS_NUM (0 MLX5E_SQ_STATS(MLX5E_STATS_COUNT)) + +struct mlx5e_sq_stats { + struct sysctl_ctx_list ctx; + u64 arg [0]; + MLX5E_SQ_STATS(MLX5E_STATS_VAR) +}; + +struct mlx5e_stats { + struct mlx5e_vport_stats vport; + struct mlx5e_pport_stats pport; + struct mlx5e_port_stats_debug port_stats_debug; +}; + +struct mlx5e_params { + u8 log_sq_size; + u8 log_rq_size; + u16 num_channels; + u8 default_vlan_prio; + u8 num_tc; + u8 rx_cq_moderation_mode; + u16 rx_cq_moderation_usec; + u16 rx_cq_moderation_pkts; + u16 tx_cq_moderation_usec; + u16 tx_cq_moderation_pkts; + u16 min_rx_wqes; + bool hw_lro_en; + u32 lro_wqe_sz; + u16 rx_hash_log_tbl_sz; +}; + +#define MLX5E_PARAMS(m) \ + m(+1, u64 tx_pauseframe_control, "tx_pauseframe_control", "Set to enable TX pause frames. Clear to disable.") \ + m(+1, u64 rx_pauseframe_control, "rx_pauseframe_control", "Set to enable RX pause frames. Clear to disable.") \ + m(+1, u64 tx_queue_size_max, "tx_queue_size_max", "Max send queue size") \ + m(+1, u64 rx_queue_size_max, "rx_queue_size_max", "Max receive queue size") \ + m(+1, u64 tx_queue_size, "tx_queue_size", "Default send queue size") \ + m(+1, u64 rx_queue_size, "rx_queue_size", "Default receive queue size") \ + m(+1, u64 channels, "channels", "Default number of channels") \ + m(+1, u64 coalesce_usecs_max, "coalesce_usecs_max", "Maximum usecs for joining packets") \ + m(+1, u64 coalesce_pkts_max, "coalesce_pkts_max", "Maximum packets to join") \ + m(+1, u64 rx_coalesce_usecs, "rx_coalesce_usecs", "Limit in usec for joining rx packets") \ + m(+1, u64 rx_coalesce_pkts, "rx_coalesce_pkts", "Maximum number of rx packets to join") \ + m(+1, u64 rx_coalesce_mode, "rx_coalesce_mode", "0: EQE mode 1: CQE mode") \ + m(+1, u64 tx_coalesce_usecs, "tx_coalesce_usecs", "Limit in usec for joining tx packets") \ + m(+1, u64 tx_coalesce_pkts, "tx_coalesce_pkts", "Maximum number of tx packets to join") \ + m(+1, u64 hw_lro, "hw_lro", "set to enable hw_lro") + +#define MLX5E_PARAMS_NUM (0 MLX5E_PARAMS(MLX5E_STATS_COUNT)) + +struct mlx5e_params_ethtool { + u64 arg [0]; + MLX5E_PARAMS(MLX5E_STATS_VAR) +}; + +/* EEPROM Standards for plug in modules */ +#ifndef MLX5E_ETH_MODULE_SFF_8472 +#define MLX5E_ETH_MODULE_SFF_8472 0x1 +#define MLX5E_ETH_MODULE_SFF_8472_LEN 128 +#endif + +#ifndef MLX5E_ETH_MODULE_SFF_8636 +#define MLX5E_ETH_MODULE_SFF_8636 0x2 +#define MLX5E_ETH_MODULE_SFF_8636_LEN 256 +#endif + +#ifndef MLX5E_ETH_MODULE_SFF_8436 +#define MLX5E_ETH_MODULE_SFF_8436 0x3 +#define MLX5E_ETH_MODULE_SFF_8436_LEN 256 +#endif + +/* EEPROM I2C Addresses */ +#define MLX5E_I2C_ADDR_LOW 0x50 +#define MLX5E_I2C_ADDR_HIGH 0x51 + +#define MLX5E_EEPROM_LOW_PAGE 0x0 +#define MLX5E_EEPROM_HIGH_PAGE 0x3 + +#define MLX5E_EEPROM_HIGH_PAGE_OFFSET 128 +#define MLX5E_EEPROM_PAGE_LENGTH 256 + +#define MLX5E_EEPROM_INFO_BYTES 0x3 + +struct mlx5e_cq { + /* data path - accessed per cqe */ + struct mlx5_cqwq wq; + + /* data path - accessed per HW polling */ + struct mlx5_core_cq mcq; + struct mlx5e_channel *channel; + + /* control */ + struct mlx5_wq_ctrl wq_ctrl; +} __aligned(MLX5E_CACHELINE_SIZE); + +struct mlx5e_rq_mbuf { + bus_dmamap_t dma_map; + caddr_t data; + struct mbuf *mbuf; +}; + +struct mlx5e_rq { + /* data path */ + struct mlx5_wq_ll wq; + struct mtx mtx; + bus_dma_tag_t dma_tag; + u32 wqe_sz; + struct mlx5e_rq_mbuf *mbuf; + struct device *pdev; + struct ifnet *ifp; + struct mlx5e_rq_stats stats; + struct mlx5e_cq cq; +#ifdef HAVE_TURBO_LRO + struct tlro_ctrl lro; +#else + struct lro_ctrl lro; +#endif + volatile int enabled; + int ix; + + /* control */ + struct mlx5_wq_ctrl wq_ctrl; + u32 rqn; + struct mlx5e_channel *channel; +} __aligned(MLX5E_CACHELINE_SIZE); + +struct mlx5e_sq_mbuf { + bus_dmamap_t dma_map; + struct mbuf *mbuf; + u32 num_bytes; + u32 num_wqebbs; +}; + +enum { + MLX5E_SQ_READY, + MLX5E_SQ_FULL +}; + +struct mlx5e_sq { + /* data path */ + struct mtx lock; + bus_dma_tag_t dma_tag; + struct mtx comp_lock; + + /* dirtied @completion */ + u16 cc; + + /* dirtied @xmit */ + u16 pc __aligned(MLX5E_CACHELINE_SIZE); + u16 bf_offset; + struct mlx5e_sq_stats stats; + + struct mlx5e_cq cq; + struct task sq_task; + struct taskqueue *sq_tq; + + /* pointers to per packet info: write@xmit, read@completion */ + struct mlx5e_sq_mbuf *mbuf; + struct buf_ring *br; + + /* read only */ + struct mlx5_wq_cyc wq; + void __iomem *uar_map; + void __iomem *uar_bf_map; + u32 sqn; + u32 bf_buf_size; + struct device *pdev; + u32 mkey_be; + + /* control path */ + struct mlx5_wq_ctrl wq_ctrl; + struct mlx5_uar uar; + struct mlx5e_channel *channel; + int tc; + unsigned int queue_state; +} __aligned(MLX5E_CACHELINE_SIZE); + +static inline bool +mlx5e_sq_has_room_for(struct mlx5e_sq *sq, u16 n) +{ + return ((sq->wq.sz_m1 & (sq->cc - sq->pc)) >= n || + sq->cc == sq->pc); +} + +struct mlx5e_channel { + /* data path */ + struct mlx5e_rq rq; + struct mlx5e_sq sq[MLX5E_MAX_TX_NUM_TC]; + struct device *pdev; + struct ifnet *ifp; + u32 mkey_be; + u8 num_tc; + + /* control */ + struct mlx5e_priv *priv; + int ix; + int cpu; +} __aligned(MLX5E_CACHELINE_SIZE); + +enum mlx5e_traffic_types { + MLX5E_TT_IPV4_TCP, + MLX5E_TT_IPV6_TCP, + MLX5E_TT_IPV4_UDP, + MLX5E_TT_IPV6_UDP, + MLX5E_TT_IPV4_IPSEC_AH, + MLX5E_TT_IPV6_IPSEC_AH, + MLX5E_TT_IPV4_IPSEC_ESP, + MLX5E_TT_IPV6_IPSEC_ESP, + MLX5E_TT_IPV4, + MLX5E_TT_IPV6, + MLX5E_TT_ANY, + MLX5E_NUM_TT, +}; + +enum { + MLX5E_RQT_SPREADING = 0, + MLX5E_RQT_DEFAULT_RQ = 1, + MLX5E_NUM_RQT = 2, +}; + +struct mlx5e_eth_addr_info { + u8 addr [ETH_ALEN + 2]; + u32 tt_vec; + u32 ft_ix[MLX5E_NUM_TT]; /* flow table index per traffic type */ +}; + +#define MLX5E_ETH_ADDR_HASH_SIZE (1 << BITS_PER_BYTE) + +struct mlx5e_eth_addr_hash_node; + +struct mlx5e_eth_addr_hash_head { + struct mlx5e_eth_addr_hash_node *lh_first; +}; + +struct mlx5e_eth_addr_db { + struct mlx5e_eth_addr_hash_head if_uc[MLX5E_ETH_ADDR_HASH_SIZE]; + struct mlx5e_eth_addr_hash_head if_mc[MLX5E_ETH_ADDR_HASH_SIZE]; + struct mlx5e_eth_addr_info broadcast; + struct mlx5e_eth_addr_info allmulti; + struct mlx5e_eth_addr_info promisc; + bool broadcast_enabled; + bool allmulti_enabled; + bool promisc_enabled; +}; + +enum { + MLX5E_STATE_ASYNC_EVENTS_ENABLE, + MLX5E_STATE_OPENED, +}; + +struct mlx5e_vlan_db { + unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; + u32 active_vlans_ft_ix[VLAN_N_VID]; + u32 untagged_rule_ft_ix; + u32 any_vlan_rule_ft_ix; + bool filter_disabled; +}; + +struct mlx5e_flow_table { + void *vlan; + void *main; +}; + +struct mlx5e_priv { + /* priv data path fields - start */ + int order_base_2_num_channels; + int queue_mapping_channel_mask; + int num_tc; + int default_vlan_prio; + /* priv data path fields - end */ + + unsigned long state; + int gone; +#define PRIV_LOCK(priv) sx_xlock(&(priv)->state_lock) +#define PRIV_UNLOCK(priv) sx_xunlock(&(priv)->state_lock) +#define PRIV_LOCKED(priv) sx_xlocked(&(priv)->state_lock) + struct sx state_lock; /* Protects Interface state */ + struct mlx5_uar cq_uar; + u32 pdn; + u32 tdn; + struct mlx5_core_mr mr; + + struct mlx5e_channel * volatile *channel; + u32 tisn[MLX5E_MAX_TX_NUM_TC]; + u32 rqtn; + u32 tirn[MLX5E_NUM_TT]; + + struct mlx5e_flow_table ft; + struct mlx5e_eth_addr_db eth_addr; + struct mlx5e_vlan_db vlan; + + struct mlx5e_params params; + struct mlx5e_params_ethtool params_ethtool; + struct mtx async_events_mtx; /* sync hw events */ + struct work_struct update_stats_work; + struct work_struct update_carrier_work; + struct work_struct set_rx_mode_work; + + struct mlx5_core_dev *mdev; + struct ifnet *ifp; + struct sysctl_ctx_list sysctl_ctx; + struct sysctl_oid *sysctl_ifnet; + struct sysctl_oid *sysctl_hw; + int sysctl_debug; + struct mlx5e_stats stats; + int counter_set_id; + + eventhandler_tag vlan_detach; + eventhandler_tag vlan_attach; + struct ifmedia media; + int media_status_last; + int media_active_last; + + struct callout watchdog; +}; + +#define MLX5E_NET_IP_ALIGN 2 + +struct mlx5e_tx_wqe { + struct mlx5_wqe_ctrl_seg ctrl; + struct mlx5_wqe_eth_seg eth; +}; + +struct mlx5e_rx_wqe { + struct mlx5_wqe_srq_next_seg next; + struct mlx5_wqe_data_seg data; +}; + +struct mlx5e_eeprom { + int lock_bit; + int i2c_addr; + int page_num; + int device_addr; + int module_num; + int len; + int type; + int page_valid; + u32 *data; +}; + +enum mlx5e_link_mode { + MLX5E_1000BASE_CX_SGMII = 0, + MLX5E_1000BASE_KX = 1, + MLX5E_10GBASE_CX4 = 2, + MLX5E_10GBASE_KX4 = 3, + MLX5E_10GBASE_KR = 4, + MLX5E_20GBASE_KR2 = 5, + MLX5E_40GBASE_CR4 = 6, + MLX5E_40GBASE_KR4 = 7, + MLX5E_56GBASE_R4 = 8, + MLX5E_10GBASE_CR = 12, + MLX5E_10GBASE_SR = 13, + MLX5E_10GBASE_ER = 14, + MLX5E_40GBASE_SR4 = 15, + MLX5E_40GBASE_LR4 = 16, + MLX5E_100GBASE_CR4 = 20, + MLX5E_100GBASE_SR4 = 21, + MLX5E_100GBASE_KR4 = 22, + MLX5E_100GBASE_LR4 = 23, + MLX5E_100BASE_TX = 24, + MLX5E_100BASE_T = 25, + MLX5E_10GBASE_T = 26, + MLX5E_25GBASE_CR = 27, + MLX5E_25GBASE_KR = 28, + MLX5E_25GBASE_SR = 29, + MLX5E_50GBASE_CR2 = 30, + MLX5E_50GBASE_KR2 = 31, + MLX5E_LINK_MODES_NUMBER, +}; + +#define MLX5E_PROT_MASK(link_mode) (1 << (link_mode)) +#define MLX5E_FLD_MAX(typ, fld) ((1ULL << __mlx5_bit_sz(typ, fld)) - 1ULL) + +int mlx5e_xmit(struct ifnet *, struct mbuf *); + +int mlx5e_open_locked(struct ifnet *); +int mlx5e_close_locked(struct ifnet *); + +void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, int event); +void mlx5e_rx_cq_comp(struct mlx5_core_cq *); +void mlx5e_tx_cq_comp(struct mlx5_core_cq *); +struct mlx5_cqe64 *mlx5e_get_cqe(struct mlx5e_cq *cq); +void mlx5e_tx_que(void *context, int pending); + +int mlx5e_open_flow_table(struct mlx5e_priv *priv); +void mlx5e_close_flow_table(struct mlx5e_priv *priv); +void mlx5e_set_rx_mode_core(struct mlx5e_priv *priv); +void mlx5e_set_rx_mode_work(struct work_struct *work); + +void mlx5e_vlan_rx_add_vid(void *, struct ifnet *, u16); +void mlx5e_vlan_rx_kill_vid(void *, struct ifnet *, u16); +void mlx5e_enable_vlan_filter(struct mlx5e_priv *priv); +void mlx5e_disable_vlan_filter(struct mlx5e_priv *priv); +int mlx5e_add_all_vlan_rules(struct mlx5e_priv *priv); +void mlx5e_del_all_vlan_rules(struct mlx5e_priv *priv); + +static inline void +mlx5e_tx_notify_hw(struct mlx5e_sq *sq, + struct mlx5e_tx_wqe *wqe, int bf_sz) +{ + u16 ofst = MLX5_BF_OFFSET + sq->bf_offset; + + /* ensure wqe is visible to device before updating doorbell record */ + wmb(); + + *sq->wq.db = cpu_to_be32(sq->pc); + + /* + * Ensure the doorbell record is visible to device before ringing + * the doorbell: + */ + wmb(); + + if (bf_sz) { + __iowrite64_copy(sq->uar_bf_map + ofst, &wqe->ctrl, bf_sz); + + /* flush the write-combining mapped buffer */ + wmb(); + + } else { + mlx5_write64((__be32 *)&wqe->ctrl, sq->uar_map + ofst, NULL); + } + + sq->bf_offset ^= sq->bf_buf_size; +} + +static inline void +mlx5e_cq_arm(struct mlx5e_cq *cq) +{ + struct mlx5_core_cq *mcq; + + mcq = &cq->mcq; + mlx5_cq_arm(mcq, MLX5_CQ_DB_REQ_NOT, mcq->uar->map, NULL, cq->wq.cc); +} + +extern const struct ethtool_ops mlx5e_ethtool_ops; +void mlx5e_create_ethtool(struct mlx5e_priv *); +void mlx5e_create_stats(struct sysctl_ctx_list *, + struct sysctl_oid_list *, const char *, + const char **, unsigned, u64 *); +void mlx5e_send_nop(struct mlx5e_sq *, u32, bool); + +#endif /* _MLX5_EN_H_ */ diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_ethtool.c b/sys/dev/mlx5/mlx5_en/mlx5_en_ethtool.c new file mode 100644 index 0000000..2ad2255 --- /dev/null +++ b/sys/dev/mlx5/mlx5_en/mlx5_en_ethtool.c @@ -0,0 +1,493 @@ +/*- + * Copyright (c) 2015 Mellanox Technologies. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include "en.h" +#include <net/sff8472.h> + +void +mlx5e_create_stats(struct sysctl_ctx_list *ctx, + struct sysctl_oid_list *parent, const char *buffer, + const char **desc, unsigned num, u64 * arg) +{ + struct sysctl_oid *node; + unsigned x; + + sysctl_ctx_init(ctx); + + node = SYSCTL_ADD_NODE(ctx, parent, OID_AUTO, + buffer, CTLFLAG_RD, NULL, "Statistics"); + if (node == NULL) + return; + for (x = 0; x != num; x++) { + SYSCTL_ADD_UQUAD(ctx, SYSCTL_CHILDREN(node), OID_AUTO, + desc[2 * x], CTLFLAG_RD, arg + x, desc[2 * x + 1]); + } +} + +static int +mlx5e_ethtool_handler(SYSCTL_HANDLER_ARGS) +{ + struct mlx5e_priv *priv = arg1; + uint64_t value; + int was_opened; + int error; + + PRIV_LOCK(priv); + value = priv->params_ethtool.arg[arg2]; + error = sysctl_handle_64(oidp, &value, 0, req); + if (error || req->newptr == NULL || + value == priv->params_ethtool.arg[arg2]) + goto done; + + /* assign new value */ + priv->params_ethtool.arg[arg2] = value; + + /* check if device is gone */ + if (priv->gone) { + error = ENXIO; + goto done; + } + + if (&priv->params_ethtool.arg[arg2] == &priv->params_ethtool.rx_pauseframe_control || + &priv->params_ethtool.arg[arg2] == &priv->params_ethtool.tx_pauseframe_control) { + /* range check parameters */ + priv->params_ethtool.rx_pauseframe_control = + priv->params_ethtool.rx_pauseframe_control ? 1 : 0; + priv->params_ethtool.tx_pauseframe_control = + priv->params_ethtool.tx_pauseframe_control ? 1 : 0; + + /* update firmware */ + error = -mlx5_set_port_pause(priv->mdev, 1, + priv->params_ethtool.rx_pauseframe_control, + priv->params_ethtool.tx_pauseframe_control); + goto done; + } + + was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); + if (was_opened) + mlx5e_close_locked(priv->ifp); + + /* import TX queue size */ + if (priv->params_ethtool.tx_queue_size < + (1 << MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE)) { + priv->params_ethtool.tx_queue_size = + (1 << MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE); + } else if (priv->params_ethtool.tx_queue_size > + priv->params_ethtool.tx_queue_size_max) { + priv->params_ethtool.tx_queue_size = + priv->params_ethtool.tx_queue_size_max; + } + priv->params.log_sq_size = + order_base_2(priv->params_ethtool.tx_queue_size); + + /* import RX queue size */ + if (priv->params_ethtool.rx_queue_size < + (1 << MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE)) { + priv->params_ethtool.rx_queue_size = + (1 << MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE); + } else if (priv->params_ethtool.rx_queue_size > + priv->params_ethtool.rx_queue_size_max) { + priv->params_ethtool.rx_queue_size = + priv->params_ethtool.rx_queue_size_max; + } + priv->params.log_rq_size = + order_base_2(priv->params_ethtool.rx_queue_size); + + priv->params.min_rx_wqes = min_t (u16, + priv->params_ethtool.rx_queue_size - 1, + MLX5E_PARAMS_DEFAULT_MIN_RX_WQES); + + /* import number of channels */ + if (priv->params_ethtool.channels < 1) + priv->params_ethtool.channels = 1; + else if (priv->params_ethtool.channels > + (u64) priv->mdev->priv.eq_table.num_comp_vectors) { + priv->params_ethtool.channels = + (u64) priv->mdev->priv.eq_table.num_comp_vectors; + } + priv->params.num_channels = priv->params_ethtool.channels; + + /* import RX mode */ + if (priv->params_ethtool.rx_coalesce_mode != 0) + priv->params_ethtool.rx_coalesce_mode = 1; + priv->params.rx_cq_moderation_mode = priv->params_ethtool.rx_coalesce_mode; + + /* import RX coal time */ + if (priv->params_ethtool.rx_coalesce_usecs < 1) + priv->params_ethtool.rx_coalesce_usecs = 0; + else if (priv->params_ethtool.rx_coalesce_usecs > + MLX5E_FLD_MAX(cqc, cq_period)) { + priv->params_ethtool.rx_coalesce_usecs = + MLX5E_FLD_MAX(cqc, cq_period); + } + priv->params.rx_cq_moderation_usec = priv->params_ethtool.rx_coalesce_usecs; + + /* import RX coal pkts */ + if (priv->params_ethtool.rx_coalesce_pkts < 1) + priv->params_ethtool.rx_coalesce_pkts = 0; + else if (priv->params_ethtool.rx_coalesce_pkts > + MLX5E_FLD_MAX(cqc, cq_max_count)) { + priv->params_ethtool.rx_coalesce_pkts = + MLX5E_FLD_MAX(cqc, cq_max_count); + } + priv->params.rx_cq_moderation_pkts = priv->params_ethtool.rx_coalesce_pkts; + + /* import TX coal time */ + if (priv->params_ethtool.tx_coalesce_usecs < 1) + priv->params_ethtool.tx_coalesce_usecs = 0; + else if (priv->params_ethtool.tx_coalesce_usecs > + MLX5E_FLD_MAX(cqc, cq_period)) { + priv->params_ethtool.tx_coalesce_usecs = + MLX5E_FLD_MAX(cqc, cq_period); + } + priv->params.tx_cq_moderation_usec = priv->params_ethtool.tx_coalesce_usecs; + + /* import TX coal pkts */ + if (priv->params_ethtool.tx_coalesce_pkts < 1) + priv->params_ethtool.tx_coalesce_pkts = 0; + else if (priv->params_ethtool.tx_coalesce_pkts > + MLX5E_FLD_MAX(cqc, cq_max_count)) { + priv->params_ethtool.tx_coalesce_pkts = MLX5E_FLD_MAX(cqc, cq_max_count); + } + priv->params.tx_cq_moderation_pkts = priv->params_ethtool.tx_coalesce_pkts; + + /* we always agree to turn off HW LRO - but not always to turn on */ + if (priv->params_ethtool.hw_lro) { + if (priv->params_ethtool.hw_lro != 1) { + priv->params_ethtool.hw_lro = priv->params.hw_lro_en; + error = EINVAL; + goto done; + } + if (priv->ifp->if_capenable & IFCAP_LRO) + priv->params.hw_lro_en = !!MLX5_CAP_ETH(priv->mdev, lro_cap); + } + else { + priv->params.hw_lro_en = false; + } + + if (was_opened) + mlx5e_open_locked(priv->ifp); +done: + PRIV_UNLOCK(priv); + return (error); +} + +/* + * Read the first three bytes of the eeprom in order to get the needed info + * for the whole reading. + * Byte 0 - Identifier byte + * Byte 1 - Revision byte + * Byte 2 - Status byte + */ +static int +mlx5e_get_eeprom_info(struct mlx5e_priv *priv, struct mlx5e_eeprom *eeprom) +{ + struct mlx5_core_dev *dev = priv->mdev; + u32 data = 0; + int size_read = 0; + int ret; + + ret = mlx5_query_module_num(dev, &eeprom->module_num); + if (ret) { + if_printf(priv->ifp, "%s:%d: Failed query module error=%d\n", + __func__, __LINE__, ret); + return (ret); + } + + /* Read the first three bytes to get Identifier, Revision and Status */ + ret = mlx5_query_eeprom(dev, eeprom->i2c_addr, eeprom->page_num, + eeprom->device_addr, MLX5E_EEPROM_INFO_BYTES, eeprom->module_num, &data, + &size_read); + if (ret) { + if_printf(priv->ifp, "%s:%d: Failed query eeprom module error=0x%x\n", + __func__, __LINE__, ret); + return (ret); + } + + switch (data & MLX5_EEPROM_IDENTIFIER_BYTE_MASK) { + case SFF_8024_ID_QSFP: + eeprom->type = MLX5E_ETH_MODULE_SFF_8436; + eeprom->len = MLX5E_ETH_MODULE_SFF_8436_LEN; + break; + case SFF_8024_ID_QSFPPLUS: + case SFF_8024_ID_QSFP28: + if ((data & MLX5_EEPROM_IDENTIFIER_BYTE_MASK) == SFF_8024_ID_QSFP28 || + ((data & MLX5_EEPROM_REVISION_ID_BYTE_MASK) >> 8) >= 0x3) { + eeprom->type = MLX5E_ETH_MODULE_SFF_8636; + eeprom->len = MLX5E_ETH_MODULE_SFF_8636_LEN; + } else { + eeprom->type = MLX5E_ETH_MODULE_SFF_8436; + eeprom->len = MLX5E_ETH_MODULE_SFF_8436_LEN; + } + if ((data & MLX5_EEPROM_PAGE_3_VALID_BIT_MASK) == 0) + eeprom->page_valid = 1; + break; + case SFF_8024_ID_SFP: + eeprom->type = MLX5E_ETH_MODULE_SFF_8472; + eeprom->len = MLX5E_ETH_MODULE_SFF_8472_LEN; + break; + default: + if_printf(priv->ifp, "%s:%d: Not recognized cable type = 0x%x\n", + __func__, __LINE__, data & MLX5_EEPROM_IDENTIFIER_BYTE_MASK); + return (EINVAL); + } + return (0); +} + +/* Read both low and high pages of the eeprom */ +static int +mlx5e_get_eeprom(struct mlx5e_priv *priv, struct mlx5e_eeprom *ee) +{ + struct mlx5_core_dev *dev = priv->mdev; + int size_read = 0; + int ret; + + if (ee->len == 0) + return (EINVAL); + + /* Read low page of the eeprom */ + while (ee->device_addr < ee->len) { + ret = mlx5_query_eeprom(dev, ee->i2c_addr, ee->page_num, ee->device_addr, + ee->len - ee->device_addr, ee->module_num, + ee->data + (ee->device_addr/4), &size_read); + if (ret) { + if_printf(priv->ifp, "%s:%d: Failed reading eeprom, " + "error = 0x%02x\n", __func__, __LINE__, ret); + return (ret); + } + ee->device_addr += size_read; + } + + /* Read high page of the eeprom */ + if (ee->page_valid) { + ee->device_addr = MLX5E_EEPROM_HIGH_PAGE_OFFSET; + ee->page_num = MLX5E_EEPROM_HIGH_PAGE; + size_read = 0; + while (ee->device_addr < MLX5E_EEPROM_PAGE_LENGTH) { + ret = mlx5_query_eeprom(dev, ee->i2c_addr, ee->page_num, + ee->device_addr, MLX5E_EEPROM_PAGE_LENGTH - ee->device_addr, + ee->module_num, ee->data + (ee->len/4) + + ((ee->device_addr - MLX5E_EEPROM_HIGH_PAGE_OFFSET)/4), + &size_read); + if (ret) { + if_printf(priv->ifp, "%s:%d: Failed reading eeprom, " + "error = 0x%02x\n", __func__, __LINE__, ret); + return (ret); + } + ee->device_addr += size_read; + } + } + return (0); +} + +static void +mlx5e_print_eeprom(struct mlx5e_eeprom *eeprom) +{ + int i, j = 0; + int row = 0; + + printf("\nOffset\t\tValues\n"); + printf("------\t\t------\n"); + while (row < eeprom->len) { + printf("0x%04x\t\t",row); + for (i = 0; i < 16; i++) { + printf("%02x ", ((u8*)eeprom->data)[j]); + j++; + row++; + } + printf("\n"); + } + + if (eeprom->page_valid) { + row = MLX5E_EEPROM_HIGH_PAGE_OFFSET; + printf("\nUpper Page 0x03\n"); + printf("\nOffset\t\tValues\n"); + printf("------\t\t------\n"); + while (row < MLX5E_EEPROM_PAGE_LENGTH) { + printf("0x%04x\t\t",row); + for (i = 0; i < 16; i++) { + printf("%02x ", ((u8*)eeprom->data)[j]); + j++; + row++; + } + printf("\n"); + } + } +} + +/* + * Read cable EEPROM module information by first inspecting the first + * three bytes to get the initial information for a whole reading. + * Information will be printed to dmesg. + */ +static int +mlx5e_read_eeprom(SYSCTL_HANDLER_ARGS) +{ + struct mlx5e_priv *priv = arg1; + struct mlx5e_eeprom eeprom; + int error; + int result = 0; + + PRIV_LOCK(priv); + error = sysctl_handle_int(oidp, &result, 0, req); + if (error || !req->newptr) + goto done; + + /* Check if device is gone */ + if (priv->gone) { + error = ENXIO; + goto done; + } + + if (result == 1) { + eeprom.i2c_addr = MLX5E_I2C_ADDR_LOW; + eeprom.device_addr = 0; + eeprom.page_num = MLX5E_EEPROM_LOW_PAGE; + eeprom.page_valid = 0; + + /* Read three first bytes to get important info */ + error = mlx5e_get_eeprom_info(priv, &eeprom); + if (error) { + if_printf(priv->ifp, "%s:%d: Failed reading eeprom's " + "initial information\n", __func__, __LINE__); + error = 0; + goto done; + } + + /* Allocate needed length buffer and additional space for the 3rd */ + eeprom.data = malloc(eeprom.len + MLX5E_EEPROM_PAGE_LENGTH, + M_MLX5EN, M_WAITOK | M_ZERO); + + /* Read the whole eeprom information */ + error = mlx5e_get_eeprom(priv, &eeprom); + if (error) { + if_printf(priv->ifp, "%s:%d: Failed reading eeprom\n", + __func__, __LINE__); + error = 0; + /* Continue printing partial information in case of an error */ + } + + mlx5e_print_eeprom(&eeprom); + free(eeprom.data, M_MLX5EN); + } +done: + PRIV_UNLOCK(priv); + return (error); +} + +static const char *mlx5e_params_desc[] = { + MLX5E_PARAMS(MLX5E_STATS_DESC) +}; + +static const char *mlx5e_port_stats_debug_desc[] = { + MLX5E_PORT_STATS_DEBUG(MLX5E_STATS_DESC) +}; + +static int +mlx5e_ethtool_debug_stats(SYSCTL_HANDLER_ARGS) +{ + struct mlx5e_priv *priv = arg1; + int error; + int sys_debug; + + sys_debug = priv->sysctl_debug; + error = sysctl_handle_int(oidp, &priv->sysctl_debug, 0, req); + if (error || !req->newptr) + return (error); + priv->sysctl_debug = !!priv->sysctl_debug; + if (sys_debug == priv->sysctl_debug) + return (error); + if (priv->sysctl_debug) + mlx5e_create_stats(&priv->stats.port_stats_debug.ctx, + SYSCTL_CHILDREN(priv->sysctl_ifnet), "debug_stats", + mlx5e_port_stats_debug_desc, MLX5E_PORT_STATS_DEBUG_NUM, + priv->stats.port_stats_debug.arg); + else + sysctl_ctx_free(&priv->stats.port_stats_debug.ctx); + return (error); +} + +void +mlx5e_create_ethtool(struct mlx5e_priv *priv) +{ + struct sysctl_oid *node; + const char *pnameunit; + unsigned x; + + /* set some defaults */ + priv->params_ethtool.tx_queue_size_max = 1 << MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE; + priv->params_ethtool.rx_queue_size_max = 1 << MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE; + priv->params_ethtool.tx_queue_size = 1 << priv->params.log_sq_size; + priv->params_ethtool.rx_queue_size = 1 << priv->params.log_rq_size; + priv->params_ethtool.channels = priv->params.num_channels; + priv->params_ethtool.coalesce_pkts_max = MLX5E_FLD_MAX(cqc, cq_max_count); + priv->params_ethtool.coalesce_usecs_max = MLX5E_FLD_MAX(cqc, cq_period); + priv->params_ethtool.rx_coalesce_mode = priv->params.rx_cq_moderation_mode; + priv->params_ethtool.rx_coalesce_usecs = priv->params.rx_cq_moderation_usec; + priv->params_ethtool.rx_coalesce_pkts = priv->params.rx_cq_moderation_pkts; + priv->params_ethtool.tx_coalesce_usecs = priv->params.tx_cq_moderation_usec; + priv->params_ethtool.tx_coalesce_pkts = priv->params.tx_cq_moderation_pkts; + priv->params_ethtool.hw_lro = priv->params.hw_lro_en; + + /* create root node */ + node = SYSCTL_ADD_NODE(&priv->sysctl_ctx, + SYSCTL_CHILDREN(priv->sysctl_ifnet), OID_AUTO, + "conf", CTLFLAG_RW, NULL, "Configuration"); + if (node == NULL) + return; + for (x = 0; x != MLX5E_PARAMS_NUM; x++) { + /* check for read-only parameter */ + if (strstr(mlx5e_params_desc[2 * x], "_max") != NULL) { + SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(node), OID_AUTO, + mlx5e_params_desc[2 * x], CTLTYPE_U64 | CTLFLAG_RD | + CTLFLAG_MPSAFE, priv, x, &mlx5e_ethtool_handler, "QU", + mlx5e_params_desc[2 * x + 1]); + } else { + SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(node), OID_AUTO, + mlx5e_params_desc[2 * x], CTLTYPE_U64 | CTLFLAG_RWTUN | + CTLFLAG_MPSAFE, priv, x, &mlx5e_ethtool_handler, "QU", + mlx5e_params_desc[2 * x + 1]); + } + } + + SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(node), OID_AUTO, + "debug_stats", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, priv, + 0, &mlx5e_ethtool_debug_stats, "I", "Extended debug statistics"); + + pnameunit = device_get_nameunit(priv->mdev->pdev->dev.bsddev); + + SYSCTL_ADD_STRING(&priv->sysctl_ctx, SYSCTL_CHILDREN(node), + OID_AUTO, "device_name", CTLFLAG_RD, + __DECONST(void *, pnameunit), 0, + "PCI device name"); + + /* EEPROM support */ + SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(node), OID_AUTO, "eeprom_info", + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, priv, 0, + mlx5e_read_eeprom, "I", "EEPROM information"); +} + diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_flow_table.c b/sys/dev/mlx5/mlx5_en/mlx5_en_flow_table.c new file mode 100644 index 0000000..ab9ea73 --- /dev/null +++ b/sys/dev/mlx5/mlx5_en/mlx5_en_flow_table.c @@ -0,0 +1,870 @@ +/*- + * Copyright (c) 2015 Mellanox Technologies. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include "en.h" + +#include <linux/list.h> +#include <dev/mlx5/flow_table.h> + +enum { + MLX5E_FULLMATCH = 0, + MLX5E_ALLMULTI = 1, + MLX5E_PROMISC = 2, +}; + +enum { + MLX5E_UC = 0, + MLX5E_MC_IPV4 = 1, + MLX5E_MC_IPV6 = 2, + MLX5E_MC_OTHER = 3, +}; + +enum { + MLX5E_ACTION_NONE = 0, + MLX5E_ACTION_ADD = 1, + MLX5E_ACTION_DEL = 2, +}; + +struct mlx5e_eth_addr_hash_node { + LIST_ENTRY(mlx5e_eth_addr_hash_node) hlist; + u8 action; + struct mlx5e_eth_addr_info ai; +}; + +static inline int +mlx5e_hash_eth_addr(const u8 * addr) +{ + return (addr[5]); +} + +static void +mlx5e_add_eth_addr_to_hash(struct mlx5e_eth_addr_hash_head *hash, + const u8 * addr) +{ + struct mlx5e_eth_addr_hash_node *hn; + int ix = mlx5e_hash_eth_addr(addr); + + LIST_FOREACH(hn, &hash[ix], hlist) { + if (bcmp(hn->ai.addr, addr, ETHER_ADDR_LEN) == 0) { + if (hn->action == MLX5E_ACTION_DEL) + hn->action = MLX5E_ACTION_NONE; + return; + } + } + + hn = malloc(sizeof(*hn), M_MLX5EN, M_NOWAIT | M_ZERO); + if (hn == NULL) + return; + + ether_addr_copy(hn->ai.addr, addr); + hn->action = MLX5E_ACTION_ADD; + + LIST_INSERT_HEAD(&hash[ix], hn, hlist); +} + +static void +mlx5e_del_eth_addr_from_hash(struct mlx5e_eth_addr_hash_node *hn) +{ + LIST_REMOVE(hn, hlist); + free(hn, M_MLX5EN); +} + +static void +mlx5e_del_eth_addr_from_flow_table(struct mlx5e_priv *priv, + struct mlx5e_eth_addr_info *ai) +{ + void *ft = priv->ft.main; + + if (ai->tt_vec & (1 << MLX5E_TT_IPV6_TCP)) + mlx5_del_flow_table_entry(ft, ai->ft_ix[MLX5E_TT_IPV6_TCP]); + + if (ai->tt_vec & (1 << MLX5E_TT_IPV4_TCP)) + mlx5_del_flow_table_entry(ft, ai->ft_ix[MLX5E_TT_IPV4_TCP]); + + if (ai->tt_vec & (1 << MLX5E_TT_IPV6_UDP)) + mlx5_del_flow_table_entry(ft, ai->ft_ix[MLX5E_TT_IPV6_UDP]); + + if (ai->tt_vec & (1 << MLX5E_TT_IPV4_UDP)) + mlx5_del_flow_table_entry(ft, ai->ft_ix[MLX5E_TT_IPV4_UDP]); + + if (ai->tt_vec & (1 << MLX5E_TT_IPV6)) + mlx5_del_flow_table_entry(ft, ai->ft_ix[MLX5E_TT_IPV6]); + + if (ai->tt_vec & (1 << MLX5E_TT_IPV4)) + mlx5_del_flow_table_entry(ft, ai->ft_ix[MLX5E_TT_IPV4]); + + if (ai->tt_vec & (1 << MLX5E_TT_ANY)) + mlx5_del_flow_table_entry(ft, ai->ft_ix[MLX5E_TT_ANY]); +} + +static int +mlx5e_get_eth_addr_type(const u8 * addr) +{ + if (ETHER_IS_MULTICAST(addr) == 0) + return (MLX5E_UC); + + if ((addr[0] == 0x01) && + (addr[1] == 0x00) && + (addr[2] == 0x5e) && + !(addr[3] & 0x80)) + return (MLX5E_MC_IPV4); + + if ((addr[0] == 0x33) && + (addr[1] == 0x33)) + return (MLX5E_MC_IPV6); + + return (MLX5E_MC_OTHER); +} + +static u32 +mlx5e_get_tt_vec(struct mlx5e_eth_addr_info *ai, int type) +{ + int eth_addr_type; + u32 ret; + + switch (type) { + case MLX5E_FULLMATCH: + eth_addr_type = mlx5e_get_eth_addr_type(ai->addr); + switch (eth_addr_type) { + case MLX5E_UC: + ret = + (1 << MLX5E_TT_IPV4_TCP) | + (1 << MLX5E_TT_IPV6_TCP) | + (1 << MLX5E_TT_IPV4_UDP) | + (1 << MLX5E_TT_IPV6_UDP) | + (1 << MLX5E_TT_IPV4) | + (1 << MLX5E_TT_IPV6) | + (1 << MLX5E_TT_ANY) | + 0; + break; + + case MLX5E_MC_IPV4: + ret = + (1 << MLX5E_TT_IPV4_UDP) | + (1 << MLX5E_TT_IPV4) | + 0; + break; + + case MLX5E_MC_IPV6: + ret = + (1 << MLX5E_TT_IPV6_UDP) | + (1 << MLX5E_TT_IPV6) | + 0; + break; + + default: + ret = + (1 << MLX5E_TT_ANY) | + 0; + break; + } + break; + + case MLX5E_ALLMULTI: + ret = + (1 << MLX5E_TT_IPV4_UDP) | + (1 << MLX5E_TT_IPV6_UDP) | + (1 << MLX5E_TT_IPV4) | + (1 << MLX5E_TT_IPV6) | + (1 << MLX5E_TT_ANY) | + 0; + break; + + default: /* MLX5E_PROMISC */ + ret = + (1 << MLX5E_TT_IPV4_TCP) | + (1 << MLX5E_TT_IPV6_TCP) | + (1 << MLX5E_TT_IPV4_UDP) | + (1 << MLX5E_TT_IPV6_UDP) | + (1 << MLX5E_TT_IPV4) | + (1 << MLX5E_TT_IPV6) | + (1 << MLX5E_TT_ANY) | + 0; + break; + } + + return (ret); +} + +static int +mlx5e_add_eth_addr_rule_sub(struct mlx5e_priv *priv, + struct mlx5e_eth_addr_info *ai, int type, + void *flow_context, void *match_criteria) +{ + u8 match_criteria_enable = 0; + void *match_value; + void *dest; + u8 *dmac; + u8 *match_criteria_dmac; + void *ft = priv->ft.main; + u32 *tirn = priv->tirn; + u32 tt_vec; + int err; + + match_value = MLX5_ADDR_OF(flow_context, flow_context, match_value); + dmac = MLX5_ADDR_OF(fte_match_param, match_value, + outer_headers.dmac_47_16); + match_criteria_dmac = MLX5_ADDR_OF(fte_match_param, match_criteria, + outer_headers.dmac_47_16); + dest = MLX5_ADDR_OF(flow_context, flow_context, destination); + + MLX5_SET(flow_context, flow_context, action, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST); + MLX5_SET(flow_context, flow_context, destination_list_size, 1); + MLX5_SET(dest_format_struct, dest, destination_type, + MLX5_FLOW_CONTEXT_DEST_TYPE_TIR); + + switch (type) { + case MLX5E_FULLMATCH: + match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + memset(match_criteria_dmac, 0xff, ETH_ALEN); + ether_addr_copy(dmac, ai->addr); + break; + + case MLX5E_ALLMULTI: + match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + match_criteria_dmac[0] = 0x01; + dmac[0] = 0x01; + break; + + case MLX5E_PROMISC: + break; + default: + break; + } + + tt_vec = mlx5e_get_tt_vec(ai, type); + + if (tt_vec & (1 << MLX5E_TT_ANY)) { + MLX5_SET(dest_format_struct, dest, destination_id, + tirn[MLX5E_TT_ANY]); + err = mlx5_add_flow_table_entry(ft, match_criteria_enable, + match_criteria, flow_context, &ai->ft_ix[MLX5E_TT_ANY]); + if (err) { + mlx5e_del_eth_addr_from_flow_table(priv, ai); + return (err); + } + ai->tt_vec |= (1 << MLX5E_TT_ANY); + } + + match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + outer_headers.ethertype); + + if (tt_vec & (1 << MLX5E_TT_IPV4)) { + MLX5_SET(fte_match_param, match_value, outer_headers.ethertype, + ETHERTYPE_IP); + MLX5_SET(dest_format_struct, dest, destination_id, + tirn[MLX5E_TT_IPV4]); + err = mlx5_add_flow_table_entry(ft, match_criteria_enable, + match_criteria, flow_context, &ai->ft_ix[MLX5E_TT_IPV4]); + if (err) { + mlx5e_del_eth_addr_from_flow_table(priv, ai); + return (err); + } + ai->tt_vec |= (1 << MLX5E_TT_IPV4); + } + + if (tt_vec & (1 << MLX5E_TT_IPV6)) { + MLX5_SET(fte_match_param, match_value, outer_headers.ethertype, + ETHERTYPE_IPV6); + MLX5_SET(dest_format_struct, dest, destination_id, + tirn[MLX5E_TT_IPV6]); + err = mlx5_add_flow_table_entry(ft, match_criteria_enable, + match_criteria, flow_context, &ai->ft_ix[MLX5E_TT_IPV6]); + if (err) { + mlx5e_del_eth_addr_from_flow_table(priv, ai); + return (err); + } + ai->tt_vec |= (1 << MLX5E_TT_IPV6); + } + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + outer_headers.ip_protocol); + MLX5_SET(fte_match_param, match_value, outer_headers.ip_protocol, + IPPROTO_UDP); + + if (tt_vec & (1 << MLX5E_TT_IPV4_UDP)) { + MLX5_SET(fte_match_param, match_value, outer_headers.ethertype, + ETHERTYPE_IP); + MLX5_SET(dest_format_struct, dest, destination_id, + tirn[MLX5E_TT_IPV4_UDP]); + err = mlx5_add_flow_table_entry(ft, match_criteria_enable, + match_criteria, flow_context, &ai->ft_ix[MLX5E_TT_IPV4_UDP]); + if (err) { + mlx5e_del_eth_addr_from_flow_table(priv, ai); + return (err); + } + ai->tt_vec |= (1 << MLX5E_TT_IPV4_UDP); + } + if (tt_vec & (1 << MLX5E_TT_IPV6_UDP)) { + MLX5_SET(fte_match_param, match_value, outer_headers.ethertype, + ETHERTYPE_IPV6); + MLX5_SET(dest_format_struct, dest, destination_id, + tirn[MLX5E_TT_IPV6_UDP]); + err = mlx5_add_flow_table_entry(ft, match_criteria_enable, + match_criteria, flow_context, &ai->ft_ix[MLX5E_TT_IPV6_UDP]); + if (err) { + mlx5e_del_eth_addr_from_flow_table(priv, ai); + return (err); + } + ai->tt_vec |= (1 << MLX5E_TT_IPV6_UDP); + } + MLX5_SET(fte_match_param, match_value, outer_headers.ip_protocol, + IPPROTO_TCP); + + if (tt_vec & (1 << MLX5E_TT_IPV4_TCP)) { + MLX5_SET(fte_match_param, match_value, outer_headers.ethertype, + ETHERTYPE_IP); + MLX5_SET(dest_format_struct, dest, destination_id, + tirn[MLX5E_TT_IPV4_TCP]); + err = mlx5_add_flow_table_entry(ft, match_criteria_enable, + match_criteria, flow_context, &ai->ft_ix[MLX5E_TT_IPV4_TCP]); + if (err) { + mlx5e_del_eth_addr_from_flow_table(priv, ai); + return (err); + } + ai->tt_vec |= (1 << MLX5E_TT_IPV4_TCP); + } + if (tt_vec & (1 << MLX5E_TT_IPV6_TCP)) { + MLX5_SET(fte_match_param, match_value, outer_headers.ethertype, + ETHERTYPE_IPV6); + MLX5_SET(dest_format_struct, dest, destination_id, + tirn[MLX5E_TT_IPV6_TCP]); + err = mlx5_add_flow_table_entry(ft, match_criteria_enable, + match_criteria, flow_context, &ai->ft_ix[MLX5E_TT_IPV6_TCP]); + if (err) { + mlx5e_del_eth_addr_from_flow_table(priv, ai); + return (err); + } + ai->tt_vec |= (1 << MLX5E_TT_IPV6_TCP); + } + return (0); +} + +static int +mlx5e_add_eth_addr_rule(struct mlx5e_priv *priv, + struct mlx5e_eth_addr_info *ai, int type) +{ + u32 *flow_context; + u32 *match_criteria; + int err; + + flow_context = mlx5_vzalloc(MLX5_ST_SZ_BYTES(flow_context) + + MLX5_ST_SZ_BYTES(dest_format_struct)); + match_criteria = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); + if (!flow_context || !match_criteria) { + if_printf(priv->ifp, "%s: alloc failed\n", __func__); + err = -ENOMEM; + goto add_eth_addr_rule_out; + } + + err = mlx5e_add_eth_addr_rule_sub(priv, ai, type, flow_context, + match_criteria); + if (err) + if_printf(priv->ifp, "%s: failed\n", __func__); + +add_eth_addr_rule_out: + kvfree(match_criteria); + kvfree(flow_context); + return (err); +} + +enum mlx5e_vlan_rule_type { + MLX5E_VLAN_RULE_TYPE_UNTAGGED, + MLX5E_VLAN_RULE_TYPE_ANY_VID, + MLX5E_VLAN_RULE_TYPE_MATCH_VID, +}; + +static int +mlx5e_add_vlan_rule(struct mlx5e_priv *priv, + enum mlx5e_vlan_rule_type rule_type, u16 vid) +{ + u8 match_criteria_enable = 0; + u32 *flow_context; + void *match_value; + void *dest; + u32 *match_criteria; + u32 *ft_ix; + int err; + + flow_context = mlx5_vzalloc(MLX5_ST_SZ_BYTES(flow_context) + + MLX5_ST_SZ_BYTES(dest_format_struct)); + match_criteria = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); + if (!flow_context || !match_criteria) { + if_printf(priv->ifp, "%s: alloc failed\n", __func__); + err = -ENOMEM; + goto add_vlan_rule_out; + } + match_value = MLX5_ADDR_OF(flow_context, flow_context, match_value); + dest = MLX5_ADDR_OF(flow_context, flow_context, destination); + + MLX5_SET(flow_context, flow_context, action, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST); + MLX5_SET(flow_context, flow_context, destination_list_size, 1); + MLX5_SET(dest_format_struct, dest, destination_type, + MLX5_FLOW_CONTEXT_DEST_TYPE_FLOW_TABLE); + MLX5_SET(dest_format_struct, dest, destination_id, + mlx5_get_flow_table_id(priv->ft.main)); + + match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + outer_headers.vlan_tag); + + switch (rule_type) { + case MLX5E_VLAN_RULE_TYPE_UNTAGGED: + ft_ix = &priv->vlan.untagged_rule_ft_ix; + break; + case MLX5E_VLAN_RULE_TYPE_ANY_VID: + ft_ix = &priv->vlan.any_vlan_rule_ft_ix; + MLX5_SET(fte_match_param, match_value, outer_headers.vlan_tag, + 1); + break; + default: /* MLX5E_VLAN_RULE_TYPE_MATCH_VID */ + ft_ix = &priv->vlan.active_vlans_ft_ix[vid]; + MLX5_SET(fte_match_param, match_value, outer_headers.vlan_tag, + 1); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + outer_headers.first_vid); + MLX5_SET(fte_match_param, match_value, outer_headers.first_vid, + vid); + break; + } + + err = mlx5_add_flow_table_entry(priv->ft.vlan, match_criteria_enable, + match_criteria, flow_context, ft_ix); + if (err) + if_printf(priv->ifp, "%s: failed\n", __func__); + +add_vlan_rule_out: + kvfree(match_criteria); + kvfree(flow_context); + return (err); +} + +static void +mlx5e_del_vlan_rule(struct mlx5e_priv *priv, + enum mlx5e_vlan_rule_type rule_type, u16 vid) +{ + switch (rule_type) { + case MLX5E_VLAN_RULE_TYPE_UNTAGGED: + mlx5_del_flow_table_entry(priv->ft.vlan, + priv->vlan.untagged_rule_ft_ix); + break; + case MLX5E_VLAN_RULE_TYPE_ANY_VID: + mlx5_del_flow_table_entry(priv->ft.vlan, + priv->vlan.any_vlan_rule_ft_ix); + break; + case MLX5E_VLAN_RULE_TYPE_MATCH_VID: + mlx5_del_flow_table_entry(priv->ft.vlan, + priv->vlan.active_vlans_ft_ix[vid]); + break; + } +} + +void +mlx5e_enable_vlan_filter(struct mlx5e_priv *priv) +{ + if (priv->vlan.filter_disabled) { + priv->vlan.filter_disabled = false; + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) + mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, + 0); + } +} + +void +mlx5e_disable_vlan_filter(struct mlx5e_priv *priv) +{ + if (!priv->vlan.filter_disabled) { + priv->vlan.filter_disabled = true; + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) + mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, + 0); + } +} + +void +mlx5e_vlan_rx_add_vid(void *arg, struct ifnet *ifp, u16 vid) +{ + struct mlx5e_priv *priv = arg; + + if (ifp != priv->ifp) + return; + + PRIV_LOCK(priv); + set_bit(vid, priv->vlan.active_vlans); + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) + mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, vid); + PRIV_UNLOCK(priv); +} + +void +mlx5e_vlan_rx_kill_vid(void *arg, struct ifnet *ifp, u16 vid) +{ + struct mlx5e_priv *priv = arg; + + if (ifp != priv->ifp) + return; + + PRIV_LOCK(priv); + clear_bit(vid, priv->vlan.active_vlans); + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) + mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, vid); + PRIV_UNLOCK(priv); +} + +int +mlx5e_add_all_vlan_rules(struct mlx5e_priv *priv) +{ + u16 vid; + int err; + + for_each_set_bit(vid, priv->vlan.active_vlans, VLAN_N_VID) { + err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, + vid); + if (err) + return (err); + } + + err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0); + if (err) + return (err); + + if (priv->vlan.filter_disabled) { + err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, + 0); + if (err) + return (err); + } + return (0); +} + +void +mlx5e_del_all_vlan_rules(struct mlx5e_priv *priv) +{ + u16 vid; + + if (priv->vlan.filter_disabled) + mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, 0); + + mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0); + + for_each_set_bit(vid, priv->vlan.active_vlans, VLAN_N_VID) + mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, vid); +} + +#define mlx5e_for_each_hash_node(hn, tmp, hash, i) \ + for (i = 0; i < MLX5E_ETH_ADDR_HASH_SIZE; i++) \ + LIST_FOREACH_SAFE(hn, &(hash)[i], hlist, tmp) + +static void +mlx5e_execute_action(struct mlx5e_priv *priv, + struct mlx5e_eth_addr_hash_node *hn) +{ + switch (hn->action) { + case MLX5E_ACTION_ADD: + mlx5e_add_eth_addr_rule(priv, &hn->ai, MLX5E_FULLMATCH); + hn->action = MLX5E_ACTION_NONE; + break; + + case MLX5E_ACTION_DEL: + mlx5e_del_eth_addr_from_flow_table(priv, &hn->ai); + mlx5e_del_eth_addr_from_hash(hn); + break; + + default: + break; + } +} + +static void +mlx5e_sync_ifp_addr(struct mlx5e_priv *priv) +{ + struct ifnet *ifp = priv->ifp; + struct ifaddr *ifa; + struct ifmultiaddr *ifma; + + /* XXX adding this entry might not be needed */ + mlx5e_add_eth_addr_to_hash(priv->eth_addr.if_uc, + LLADDR((struct sockaddr_dl *)(ifp->if_addr->ifa_addr))); + + if_addr_rlock(ifp); + TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { + if (ifa->ifa_addr->sa_family != AF_LINK) + continue; + mlx5e_add_eth_addr_to_hash(priv->eth_addr.if_uc, + LLADDR((struct sockaddr_dl *)ifa->ifa_addr)); + } + if_addr_runlock(ifp); + + if_maddr_rlock(ifp); + TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { + if (ifma->ifma_addr->sa_family != AF_LINK) + continue; + mlx5e_add_eth_addr_to_hash(priv->eth_addr.if_mc, + LLADDR((struct sockaddr_dl *)ifma->ifma_addr)); + } + if_maddr_runlock(ifp); +} + +static void +mlx5e_apply_ifp_addr(struct mlx5e_priv *priv) +{ + struct mlx5e_eth_addr_hash_node *hn; + struct mlx5e_eth_addr_hash_node *tmp; + int i; + + mlx5e_for_each_hash_node(hn, tmp, priv->eth_addr.if_uc, i) + mlx5e_execute_action(priv, hn); + + mlx5e_for_each_hash_node(hn, tmp, priv->eth_addr.if_mc, i) + mlx5e_execute_action(priv, hn); +} + +static void +mlx5e_handle_ifp_addr(struct mlx5e_priv *priv) +{ + struct mlx5e_eth_addr_hash_node *hn; + struct mlx5e_eth_addr_hash_node *tmp; + int i; + + mlx5e_for_each_hash_node(hn, tmp, priv->eth_addr.if_uc, i) + hn->action = MLX5E_ACTION_DEL; + mlx5e_for_each_hash_node(hn, tmp, priv->eth_addr.if_mc, i) + hn->action = MLX5E_ACTION_DEL; + + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) + mlx5e_sync_ifp_addr(priv); + + mlx5e_apply_ifp_addr(priv); +} + +void +mlx5e_set_rx_mode_core(struct mlx5e_priv *priv) +{ + struct mlx5e_eth_addr_db *ea = &priv->eth_addr; + struct ifnet *ndev = priv->ifp; + + bool rx_mode_enable = test_bit(MLX5E_STATE_OPENED, &priv->state); + bool promisc_enabled = rx_mode_enable && (ndev->if_flags & IFF_PROMISC); + bool allmulti_enabled = rx_mode_enable && (ndev->if_flags & IFF_ALLMULTI); + bool broadcast_enabled = rx_mode_enable; + + bool enable_promisc = !ea->promisc_enabled && promisc_enabled; + bool disable_promisc = ea->promisc_enabled && !promisc_enabled; + bool enable_allmulti = !ea->allmulti_enabled && allmulti_enabled; + bool disable_allmulti = ea->allmulti_enabled && !allmulti_enabled; + bool enable_broadcast = !ea->broadcast_enabled && broadcast_enabled; + bool disable_broadcast = ea->broadcast_enabled && !broadcast_enabled; + + /* update broadcast address */ + ether_addr_copy(priv->eth_addr.broadcast.addr, + priv->ifp->if_broadcastaddr); + + if (enable_promisc) + mlx5e_add_eth_addr_rule(priv, &ea->promisc, MLX5E_PROMISC); + if (enable_allmulti) + mlx5e_add_eth_addr_rule(priv, &ea->allmulti, MLX5E_ALLMULTI); + if (enable_broadcast) + mlx5e_add_eth_addr_rule(priv, &ea->broadcast, MLX5E_FULLMATCH); + + mlx5e_handle_ifp_addr(priv); + + if (disable_broadcast) + mlx5e_del_eth_addr_from_flow_table(priv, &ea->broadcast); + if (disable_allmulti) + mlx5e_del_eth_addr_from_flow_table(priv, &ea->allmulti); + if (disable_promisc) + mlx5e_del_eth_addr_from_flow_table(priv, &ea->promisc); + + ea->promisc_enabled = promisc_enabled; + ea->allmulti_enabled = allmulti_enabled; + ea->broadcast_enabled = broadcast_enabled; +} + +void +mlx5e_set_rx_mode_work(struct work_struct *work) +{ + struct mlx5e_priv *priv = + container_of(work, struct mlx5e_priv, set_rx_mode_work); + + PRIV_LOCK(priv); + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) + mlx5e_set_rx_mode_core(priv); + PRIV_UNLOCK(priv); +} + +static int +mlx5e_create_main_flow_table(struct mlx5e_priv *priv) +{ + struct mlx5_flow_table_group *g; + u8 *dmac; + + g = malloc(9 * sizeof(*g), M_MLX5EN, M_WAITOK | M_ZERO); + if (g == NULL) + return (-ENOMEM); + + g[0].log_sz = 2; + g[0].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, g[0].match_criteria, + outer_headers.ethertype); + MLX5_SET_TO_ONES(fte_match_param, g[0].match_criteria, + outer_headers.ip_protocol); + + g[1].log_sz = 1; + g[1].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, g[1].match_criteria, + outer_headers.ethertype); + + g[2].log_sz = 0; + + g[3].log_sz = 14; + g[3].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + dmac = MLX5_ADDR_OF(fte_match_param, g[3].match_criteria, + outer_headers.dmac_47_16); + memset(dmac, 0xff, ETH_ALEN); + MLX5_SET_TO_ONES(fte_match_param, g[3].match_criteria, + outer_headers.ethertype); + MLX5_SET_TO_ONES(fte_match_param, g[3].match_criteria, + outer_headers.ip_protocol); + + g[4].log_sz = 13; + g[4].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + dmac = MLX5_ADDR_OF(fte_match_param, g[4].match_criteria, + outer_headers.dmac_47_16); + memset(dmac, 0xff, ETH_ALEN); + MLX5_SET_TO_ONES(fte_match_param, g[4].match_criteria, + outer_headers.ethertype); + + g[5].log_sz = 11; + g[5].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + dmac = MLX5_ADDR_OF(fte_match_param, g[5].match_criteria, + outer_headers.dmac_47_16); + memset(dmac, 0xff, ETH_ALEN); + + g[6].log_sz = 2; + g[6].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + dmac = MLX5_ADDR_OF(fte_match_param, g[6].match_criteria, + outer_headers.dmac_47_16); + dmac[0] = 0x01; + MLX5_SET_TO_ONES(fte_match_param, g[6].match_criteria, + outer_headers.ethertype); + MLX5_SET_TO_ONES(fte_match_param, g[6].match_criteria, + outer_headers.ip_protocol); + + g[7].log_sz = 1; + g[7].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + dmac = MLX5_ADDR_OF(fte_match_param, g[7].match_criteria, + outer_headers.dmac_47_16); + dmac[0] = 0x01; + MLX5_SET_TO_ONES(fte_match_param, g[7].match_criteria, + outer_headers.ethertype); + + g[8].log_sz = 0; + g[8].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + dmac = MLX5_ADDR_OF(fte_match_param, g[8].match_criteria, + outer_headers.dmac_47_16); + dmac[0] = 0x01; + priv->ft.main = mlx5_create_flow_table(priv->mdev, 1, + MLX5_FLOW_TABLE_TYPE_NIC_RCV, + 0, 9, g); + free(g, M_MLX5EN); + + return (priv->ft.main ? 0 : -ENOMEM); +} + +static void +mlx5e_destroy_main_flow_table(struct mlx5e_priv *priv) +{ + mlx5_destroy_flow_table(priv->ft.main); + priv->ft.main = NULL; +} + +static int +mlx5e_create_vlan_flow_table(struct mlx5e_priv *priv) +{ + struct mlx5_flow_table_group *g; + + g = malloc(2 * sizeof(*g), M_MLX5EN, M_WAITOK | M_ZERO); + if (g == NULL) + return (-ENOMEM); + + g[0].log_sz = 12; + g[0].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, g[0].match_criteria, + outer_headers.vlan_tag); + MLX5_SET_TO_ONES(fte_match_param, g[0].match_criteria, + outer_headers.first_vid); + + /* untagged + any vlan id */ + g[1].log_sz = 1; + g[1].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, g[1].match_criteria, + outer_headers.vlan_tag); + + priv->ft.vlan = mlx5_create_flow_table(priv->mdev, 0, + MLX5_FLOW_TABLE_TYPE_NIC_RCV, + 0, 2, g); + free(g, M_MLX5EN); + + return (priv->ft.vlan ? 0 : -ENOMEM); +} + +static void +mlx5e_destroy_vlan_flow_table(struct mlx5e_priv *priv) +{ + mlx5_destroy_flow_table(priv->ft.vlan); + priv->ft.vlan = NULL; +} + +int +mlx5e_open_flow_table(struct mlx5e_priv *priv) +{ + int err; + + err = mlx5e_create_main_flow_table(priv); + if (err) + return (err); + + err = mlx5e_create_vlan_flow_table(priv); + if (err) + goto err_destroy_main_flow_table; + + return (0); + +err_destroy_main_flow_table: + mlx5e_destroy_main_flow_table(priv); + + return (err); +} + +void +mlx5e_close_flow_table(struct mlx5e_priv *priv) +{ + mlx5e_destroy_vlan_flow_table(priv); + mlx5e_destroy_main_flow_table(priv); +} diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_main.c b/sys/dev/mlx5/mlx5_en/mlx5_en_main.c new file mode 100644 index 0000000..e50252c --- /dev/null +++ b/sys/dev/mlx5/mlx5_en/mlx5_en_main.c @@ -0,0 +1,2902 @@ +/*- + * Copyright (c) 2015 Mellanox Technologies. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include "en.h" + +#include <sys/sockio.h> +#include <machine/atomic.h> + +#define ETH_DRIVER_VERSION "3.1.0-dev" +char mlx5e_version[] = "Mellanox Ethernet driver" + " (" ETH_DRIVER_VERSION ")"; + +struct mlx5e_rq_param { + u32 rqc [MLX5_ST_SZ_DW(rqc)]; + struct mlx5_wq_param wq; +}; + +struct mlx5e_sq_param { + u32 sqc [MLX5_ST_SZ_DW(sqc)]; + struct mlx5_wq_param wq; +}; + +struct mlx5e_cq_param { + u32 cqc [MLX5_ST_SZ_DW(cqc)]; + struct mlx5_wq_param wq; + u16 eq_ix; +}; + +struct mlx5e_channel_param { + struct mlx5e_rq_param rq; + struct mlx5e_sq_param sq; + struct mlx5e_cq_param rx_cq; + struct mlx5e_cq_param tx_cq; +}; + +static const struct { + u32 subtype; + u64 baudrate; +} mlx5e_mode_table[MLX5E_LINK_MODES_NUMBER] = { + + [MLX5E_1000BASE_CX_SGMII] = { + .subtype = IFM_1000_CX_SGMII, + .baudrate = IF_Mbps(1000ULL), + }, + [MLX5E_1000BASE_KX] = { + .subtype = IFM_1000_KX, + .baudrate = IF_Mbps(1000ULL), + }, + [MLX5E_10GBASE_CX4] = { + .subtype = IFM_10G_CX4, + .baudrate = IF_Gbps(10ULL), + }, + [MLX5E_10GBASE_KX4] = { + .subtype = IFM_10G_KX4, + .baudrate = IF_Gbps(10ULL), + }, + [MLX5E_10GBASE_KR] = { + .subtype = IFM_10G_KR, + .baudrate = IF_Gbps(10ULL), + }, + [MLX5E_20GBASE_KR2] = { + .subtype = IFM_20G_KR2, + .baudrate = IF_Gbps(20ULL), + }, + [MLX5E_40GBASE_CR4] = { + .subtype = IFM_40G_CR4, + .baudrate = IF_Gbps(40ULL), + }, + [MLX5E_40GBASE_KR4] = { + .subtype = IFM_40G_KR4, + .baudrate = IF_Gbps(40ULL), + }, + [MLX5E_56GBASE_R4] = { + .subtype = IFM_56G_R4, + .baudrate = IF_Gbps(56ULL), + }, + [MLX5E_10GBASE_CR] = { + .subtype = IFM_10G_CR1, + .baudrate = IF_Gbps(10ULL), + }, + [MLX5E_10GBASE_SR] = { + .subtype = IFM_10G_SR, + .baudrate = IF_Gbps(10ULL), + }, + [MLX5E_10GBASE_ER] = { + .subtype = IFM_10G_ER, + .baudrate = IF_Gbps(10ULL), + }, + [MLX5E_40GBASE_SR4] = { + .subtype = IFM_40G_SR4, + .baudrate = IF_Gbps(40ULL), + }, + [MLX5E_40GBASE_LR4] = { + .subtype = IFM_40G_LR4, + .baudrate = IF_Gbps(40ULL), + }, + [MLX5E_100GBASE_CR4] = { + .subtype = IFM_100G_CR4, + .baudrate = IF_Gbps(100ULL), + }, + [MLX5E_100GBASE_SR4] = { + .subtype = IFM_100G_SR4, + .baudrate = IF_Gbps(100ULL), + }, + [MLX5E_100GBASE_KR4] = { + .subtype = IFM_100G_KR4, + .baudrate = IF_Gbps(100ULL), + }, + [MLX5E_100GBASE_LR4] = { + .subtype = IFM_100G_LR4, + .baudrate = IF_Gbps(100ULL), + }, + [MLX5E_100BASE_TX] = { + .subtype = IFM_100_TX, + .baudrate = IF_Mbps(100ULL), + }, + [MLX5E_100BASE_T] = { + .subtype = IFM_100_T, + .baudrate = IF_Mbps(100ULL), + }, + [MLX5E_10GBASE_T] = { + .subtype = IFM_10G_T, + .baudrate = IF_Gbps(10ULL), + }, + [MLX5E_25GBASE_CR] = { + .subtype = IFM_25G_CR, + .baudrate = IF_Gbps(25ULL), + }, + [MLX5E_25GBASE_KR] = { + .subtype = IFM_25G_KR, + .baudrate = IF_Gbps(25ULL), + }, + [MLX5E_25GBASE_SR] = { + .subtype = IFM_25G_SR, + .baudrate = IF_Gbps(25ULL), + }, + [MLX5E_50GBASE_CR2] = { + .subtype = IFM_50G_CR2, + .baudrate = IF_Gbps(50ULL), + }, + [MLX5E_50GBASE_KR2] = { + .subtype = IFM_50G_KR2, + .baudrate = IF_Gbps(50ULL), + }, +}; + +MALLOC_DEFINE(M_MLX5EN, "MLX5EN", "MLX5 Ethernet"); + +static void +mlx5e_update_carrier(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + u32 out[MLX5_ST_SZ_DW(ptys_reg)]; + u32 eth_proto_oper; + int error; + u8 port_state; + u8 i; + + port_state = mlx5_query_vport_state(mdev, + MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT); + + if (port_state == VPORT_STATE_UP) { + priv->media_status_last |= IFM_ACTIVE; + } else { + priv->media_status_last &= ~IFM_ACTIVE; + priv->media_active_last = IFM_ETHER; + if_link_state_change(priv->ifp, LINK_STATE_DOWN); + return; + } + + error = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN); + if (error) { + priv->media_active_last = IFM_ETHER; + priv->ifp->if_baudrate = 1; + if_printf(priv->ifp, "%s: query port ptys failed: 0x%x\n", + __func__, error); + return; + } + eth_proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper); + + for (i = 0; i != MLX5E_LINK_MODES_NUMBER; i++) { + if (mlx5e_mode_table[i].baudrate == 0) + continue; + if (MLX5E_PROT_MASK(i) & eth_proto_oper) { + priv->ifp->if_baudrate = + mlx5e_mode_table[i].baudrate; + priv->media_active_last = + mlx5e_mode_table[i].subtype | IFM_ETHER | IFM_FDX; + } + } + if_link_state_change(priv->ifp, LINK_STATE_UP); +} + +static void +mlx5e_media_status(struct ifnet *dev, struct ifmediareq *ifmr) +{ + struct mlx5e_priv *priv = dev->if_softc; + + ifmr->ifm_status = priv->media_status_last; + ifmr->ifm_active = priv->media_active_last | + (priv->params_ethtool.rx_pauseframe_control ? IFM_ETH_RXPAUSE : 0) | + (priv->params_ethtool.tx_pauseframe_control ? IFM_ETH_TXPAUSE : 0); + +} + +static u32 +mlx5e_find_link_mode(u32 subtype) +{ + u32 i; + u32 link_mode = 0; + + for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) { + if (mlx5e_mode_table[i].baudrate == 0) + continue; + if (mlx5e_mode_table[i].subtype == subtype) + link_mode |= MLX5E_PROT_MASK(i); + } + + return (link_mode); +} + +static int +mlx5e_media_change(struct ifnet *dev) +{ + struct mlx5e_priv *priv = dev->if_softc; + struct mlx5_core_dev *mdev = priv->mdev; + u32 eth_proto_cap; + u32 link_mode; + int locked; + int error; + + locked = PRIV_LOCKED(priv); + if (!locked) + PRIV_LOCK(priv); + + if (IFM_TYPE(priv->media.ifm_media) != IFM_ETHER) { + error = EINVAL; + goto done; + } + + link_mode = mlx5e_find_link_mode(IFM_SUBTYPE(priv->media.ifm_media)); + + error = mlx5_query_port_proto_cap(mdev, ð_proto_cap, MLX5_PTYS_EN); + if (error) { + if_printf(dev, "Query port media capability failed\n"); + goto done; + } + if (IFM_SUBTYPE(priv->media.ifm_media) == IFM_AUTO) + link_mode = eth_proto_cap; + else + link_mode = link_mode & eth_proto_cap; + + if (!link_mode) { + if_printf(dev, "Not supported link mode requested\n"); + error = EINVAL; + goto done; + } + + mlx5_set_port_status(mdev, MLX5_PORT_DOWN); + mlx5_set_port_proto(mdev, link_mode, MLX5_PTYS_EN); + mlx5_set_port_status(mdev, MLX5_PORT_UP); + +done: + if (!locked) + PRIV_UNLOCK(priv); + return (error); +} + +static void +mlx5e_update_carrier_work(struct work_struct *work) +{ + struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv, + update_carrier_work); + + PRIV_LOCK(priv); + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) + mlx5e_update_carrier(priv); + PRIV_UNLOCK(priv); +} + +static void +mlx5e_update_pport_counters(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_pport_stats *s = &priv->stats.pport; + struct mlx5e_port_stats_debug *s_debug = &priv->stats.port_stats_debug; + u32 *in; + u32 *out; + u64 *ptr; + unsigned sz = MLX5_ST_SZ_BYTES(ppcnt_reg); + unsigned x; + unsigned y; + + in = mlx5_vzalloc(sz); + out = mlx5_vzalloc(sz); + if (in == NULL || out == NULL) + goto free_out; + + ptr = (uint64_t *)MLX5_ADDR_OF(ppcnt_reg, out, counter_set); + + MLX5_SET(ppcnt_reg, in, local_port, 1); + + MLX5_SET(ppcnt_reg, in, grp, MLX5_IEEE_802_3_COUNTERS_GROUP); + mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); + for (x = y = 0; x != MLX5E_PPORT_IEEE802_3_STATS_NUM; x++, y++) + s->arg[y] = be64toh(ptr[x]); + + MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2819_COUNTERS_GROUP); + mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); + for (x = 0; x != MLX5E_PPORT_RFC2819_STATS_NUM; x++, y++) + s->arg[y] = be64toh(ptr[x]); + for (y = 0; x != MLX5E_PPORT_RFC2819_STATS_NUM + + MLX5E_PPORT_RFC2819_STATS_DEBUG_NUM; x++, y++) + s_debug->arg[y] = be64toh(ptr[x]); + + MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2863_COUNTERS_GROUP); + mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); + for (x = 0; x != MLX5E_PPORT_RFC2863_STATS_DEBUG_NUM; x++, y++) + s_debug->arg[y] = be64toh(ptr[x]); + + MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_COUNTERS_GROUP); + mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); + for (x = 0; x != MLX5E_PPORT_PHYSICAL_LAYER_STATS_DEBUG_NUM; x++, y++) + s_debug->arg[y] = be64toh(ptr[x]); +free_out: + kvfree(in); + kvfree(out); +} + +static void +mlx5e_update_stats_work(struct work_struct *work) +{ + struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv, + update_stats_work); + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_vport_stats *s = &priv->stats.vport; + struct mlx5e_rq_stats *rq_stats; + struct mlx5e_sq_stats *sq_stats; + struct buf_ring *sq_br; +#if (__FreeBSD_version < 1100000) + struct ifnet *ifp = priv->ifp; +#endif + u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)]; + u32 *out; + int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out); + u64 tso_packets = 0; + u64 tso_bytes = 0; + u64 tx_queue_dropped = 0; + u64 tx_defragged = 0; + u64 tx_offload_none = 0; + u64 lro_packets = 0; + u64 lro_bytes = 0; + u64 sw_lro_queued = 0; + u64 sw_lro_flushed = 0; + u64 rx_csum_none = 0; + u64 rx_wqe_err = 0; + u32 out_of_rx_buffer = 0; + int i; + int j; + + PRIV_LOCK(priv); + out = mlx5_vzalloc(outlen); + if (out == NULL) + goto free_out; + if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0) + goto free_out; + + /* Collect firts the SW counters and then HW for consistency */ + for (i = 0; i < priv->params.num_channels; i++) { + struct mlx5e_rq *rq = &priv->channel[i]->rq; + + rq_stats = &priv->channel[i]->rq.stats; + + /* collect stats from LRO */ + rq_stats->sw_lro_queued = rq->lro.lro_queued; + rq_stats->sw_lro_flushed = rq->lro.lro_flushed; + sw_lro_queued += rq_stats->sw_lro_queued; + sw_lro_flushed += rq_stats->sw_lro_flushed; + lro_packets += rq_stats->lro_packets; + lro_bytes += rq_stats->lro_bytes; + rx_csum_none += rq_stats->csum_none; + rx_wqe_err += rq_stats->wqe_err; + + for (j = 0; j < priv->num_tc; j++) { + sq_stats = &priv->channel[i]->sq[j].stats; + sq_br = priv->channel[i]->sq[j].br; + + tso_packets += sq_stats->tso_packets; + tso_bytes += sq_stats->tso_bytes; + tx_queue_dropped += sq_stats->dropped; + tx_queue_dropped += sq_br->br_drops; + tx_defragged += sq_stats->defragged; + tx_offload_none += sq_stats->csum_offload_none; + } + } + + /* update counters */ + s->tso_packets = tso_packets; + s->tso_bytes = tso_bytes; + s->tx_queue_dropped = tx_queue_dropped; + s->tx_defragged = tx_defragged; + s->lro_packets = lro_packets; + s->lro_bytes = lro_bytes; + s->sw_lro_queued = sw_lro_queued; + s->sw_lro_flushed = sw_lro_flushed; + s->rx_csum_none = rx_csum_none; + s->rx_wqe_err = rx_wqe_err; + + /* HW counters */ + memset(in, 0, sizeof(in)); + + MLX5_SET(query_vport_counter_in, in, opcode, + MLX5_CMD_OP_QUERY_VPORT_COUNTER); + MLX5_SET(query_vport_counter_in, in, op_mod, 0); + MLX5_SET(query_vport_counter_in, in, other_vport, 0); + + memset(out, 0, outlen); + + if (mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen)) + goto free_out; + +#define MLX5_GET_CTR(out, x) \ + MLX5_GET64(query_vport_counter_out, out, x) + + s->rx_error_packets = + MLX5_GET_CTR(out, received_errors.packets); + s->rx_error_bytes = + MLX5_GET_CTR(out, received_errors.octets); + s->tx_error_packets = + MLX5_GET_CTR(out, transmit_errors.packets); + s->tx_error_bytes = + MLX5_GET_CTR(out, transmit_errors.octets); + + s->rx_unicast_packets = + MLX5_GET_CTR(out, received_eth_unicast.packets); + s->rx_unicast_bytes = + MLX5_GET_CTR(out, received_eth_unicast.octets); + s->tx_unicast_packets = + MLX5_GET_CTR(out, transmitted_eth_unicast.packets); + s->tx_unicast_bytes = + MLX5_GET_CTR(out, transmitted_eth_unicast.octets); + + s->rx_multicast_packets = + MLX5_GET_CTR(out, received_eth_multicast.packets); + s->rx_multicast_bytes = + MLX5_GET_CTR(out, received_eth_multicast.octets); + s->tx_multicast_packets = + MLX5_GET_CTR(out, transmitted_eth_multicast.packets); + s->tx_multicast_bytes = + MLX5_GET_CTR(out, transmitted_eth_multicast.octets); + + s->rx_broadcast_packets = + MLX5_GET_CTR(out, received_eth_broadcast.packets); + s->rx_broadcast_bytes = + MLX5_GET_CTR(out, received_eth_broadcast.octets); + s->tx_broadcast_packets = + MLX5_GET_CTR(out, transmitted_eth_broadcast.packets); + s->tx_broadcast_bytes = + MLX5_GET_CTR(out, transmitted_eth_broadcast.octets); + + s->rx_packets = + s->rx_unicast_packets + + s->rx_multicast_packets + + s->rx_broadcast_packets; + s->rx_bytes = + s->rx_unicast_bytes + + s->rx_multicast_bytes + + s->rx_broadcast_bytes; + s->tx_packets = + s->tx_unicast_packets + + s->tx_multicast_packets + + s->tx_broadcast_packets; + s->tx_bytes = + s->tx_unicast_bytes + + s->tx_multicast_bytes + + s->tx_broadcast_bytes; + + /* Update calculated offload counters */ + s->tx_csum_offload = s->tx_packets - tx_offload_none; + s->rx_csum_good = s->rx_packets - s->rx_csum_none; + +#if (__FreeBSD_version < 1100000) + /* no get_counters interface in fbsd 10 */ + ifp->if_ipackets = s->rx_packets; + ifp->if_ierrors = s->rx_error_packets; + ifp->if_opackets = s->tx_packets; + ifp->if_oerrors = s->tx_error_packets; + ifp->if_snd.ifq_drops = s->tx_queue_dropped; + ifp->if_ibytes = s->rx_bytes; + ifp->if_obytes = s->tx_bytes; +#endif + + mlx5_vport_query_out_of_rx_buffer(mdev, priv->counter_set_id, + &out_of_rx_buffer); + + /* Update per port counters */ + mlx5e_update_pport_counters(priv); + priv->stats.pport.out_of_rx_buffer = (u64)out_of_rx_buffer; +free_out: + kvfree(out); + PRIV_UNLOCK(priv); +} + +static void +mlx5e_update_stats(void *arg) +{ + struct mlx5e_priv *priv = arg; + + schedule_work(&priv->update_stats_work); + + callout_reset(&priv->watchdog, hz, &mlx5e_update_stats, priv); +} + +static void +mlx5e_async_event_sub(struct mlx5e_priv *priv, + enum mlx5_dev_event event) +{ + switch (event) { + case MLX5_DEV_EVENT_PORT_UP: + case MLX5_DEV_EVENT_PORT_DOWN: + schedule_work(&priv->update_carrier_work); + break; + + default: + break; + } +} + +static void +mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv, + enum mlx5_dev_event event, unsigned long param) +{ + struct mlx5e_priv *priv = vpriv; + + mtx_lock(&priv->async_events_mtx); + if (test_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state)) + mlx5e_async_event_sub(priv, event); + mtx_unlock(&priv->async_events_mtx); +} + +static void +mlx5e_enable_async_events(struct mlx5e_priv *priv) +{ + set_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state); +} + +static void +mlx5e_disable_async_events(struct mlx5e_priv *priv) +{ + mtx_lock(&priv->async_events_mtx); + clear_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state); + mtx_unlock(&priv->async_events_mtx); +} + +static const char *mlx5e_rq_stats_desc[] = { + MLX5E_RQ_STATS(MLX5E_STATS_DESC) +}; + +static int +mlx5e_create_rq(struct mlx5e_channel *c, + struct mlx5e_rq_param *param, + struct mlx5e_rq *rq) +{ + struct mlx5e_priv *priv = c->priv; + struct mlx5_core_dev *mdev = priv->mdev; + char buffer[16]; + void *rqc = param->rqc; + void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq); + int wq_sz; + int err; + int i; + + /* Create DMA descriptor TAG */ + if ((err = -bus_dma_tag_create( + bus_get_dma_tag(mdev->pdev->dev.bsddev), + 1, /* any alignment */ + 0, /* no boundary */ + BUS_SPACE_MAXADDR, /* lowaddr */ + BUS_SPACE_MAXADDR, /* highaddr */ + NULL, NULL, /* filter, filterarg */ + MJUM16BYTES, /* maxsize */ + 1, /* nsegments */ + MJUM16BYTES, /* maxsegsize */ + 0, /* flags */ + NULL, NULL, /* lockfunc, lockfuncarg */ + &rq->dma_tag))) + goto done; + + err = mlx5_wq_ll_create(mdev, ¶m->wq, rqc_wq, &rq->wq, + &rq->wq_ctrl); + if (err) + goto err_free_dma_tag; + + rq->wq.db = &rq->wq.db[MLX5_RCV_DBR]; + + if (priv->params.hw_lro_en) { + rq->wqe_sz = priv->params.lro_wqe_sz; + } + else { + rq->wqe_sz = MLX5E_SW2MB_MTU(priv->ifp->if_mtu); + } + if (rq->wqe_sz > MJUM16BYTES) { + err = -ENOMEM; + goto err_rq_wq_destroy; + } else if (rq->wqe_sz > MJUM9BYTES) { + rq->wqe_sz = MJUM16BYTES; + } else if (rq->wqe_sz > MJUMPAGESIZE) { + rq->wqe_sz = MJUM9BYTES; + } else if (rq->wqe_sz > MCLBYTES) { + rq->wqe_sz = MJUMPAGESIZE; + } else { + rq->wqe_sz = MCLBYTES; + } + + wq_sz = mlx5_wq_ll_get_size(&rq->wq); + rq->mbuf = malloc(wq_sz * sizeof(rq->mbuf[0]), M_MLX5EN, M_WAITOK | M_ZERO); + if (rq->mbuf == NULL) { + err = -ENOMEM; + goto err_rq_wq_destroy; + } + + for (i = 0; i != wq_sz; i++) { + struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, i); + uint32_t byte_count = rq->wqe_sz - MLX5E_NET_IP_ALIGN; + + err = -bus_dmamap_create(rq->dma_tag, 0, &rq->mbuf[i].dma_map); + if (err != 0) { + while (i--) + bus_dmamap_destroy(rq->dma_tag, rq->mbuf[i].dma_map); + goto err_rq_mbuf_free; + } + wqe->data.lkey = c->mkey_be; + wqe->data.byte_count = cpu_to_be32(byte_count | MLX5_HW_START_PADDING); + } + + rq->pdev = c->pdev; + rq->ifp = c->ifp; + rq->channel = c; + rq->ix = c->ix; + + snprintf(buffer, sizeof(buffer), "rxstat%d", c->ix); + mlx5e_create_stats(&rq->stats.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet), + buffer, mlx5e_rq_stats_desc, MLX5E_RQ_STATS_NUM, + rq->stats.arg); + +#ifdef HAVE_TURBO_LRO + if (tcp_tlro_init(&rq->lro, c->ifp, MLX5E_BUDGET_MAX) != 0) + rq->lro.mbuf = NULL; +#else + if (tcp_lro_init(&rq->lro)) + rq->lro.lro_cnt = 0; + else + rq->lro.ifp = c->ifp; +#endif + return (0); + +err_rq_mbuf_free: + free(rq->mbuf, M_MLX5EN); +err_rq_wq_destroy: + mlx5_wq_destroy(&rq->wq_ctrl); +err_free_dma_tag: + bus_dma_tag_destroy(rq->dma_tag); +done: + return (err); +} + +static void +mlx5e_destroy_rq(struct mlx5e_rq *rq) +{ + int wq_sz; + int i; + + /* destroy all sysctl nodes */ + sysctl_ctx_free(&rq->stats.ctx); + + /* free leftover LRO packets, if any */ +#ifdef HAVE_TURBO_LRO + tcp_tlro_free(&rq->lro); +#else + tcp_lro_free(&rq->lro); +#endif + wq_sz = mlx5_wq_ll_get_size(&rq->wq); + for (i = 0; i != wq_sz; i++) { + if (rq->mbuf[i].mbuf != NULL) { + bus_dmamap_unload(rq->dma_tag, + rq->mbuf[i].dma_map); + m_freem(rq->mbuf[i].mbuf); + } + bus_dmamap_destroy(rq->dma_tag, rq->mbuf[i].dma_map); + } + free(rq->mbuf, M_MLX5EN); + mlx5_wq_destroy(&rq->wq_ctrl); +} + +static int +mlx5e_enable_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param) +{ + struct mlx5e_channel *c = rq->channel; + struct mlx5e_priv *priv = c->priv; + struct mlx5_core_dev *mdev = priv->mdev; + + void *in; + void *rqc; + void *wq; + int inlen; + int err; + + inlen = MLX5_ST_SZ_BYTES(create_rq_in) + + sizeof(u64) * rq->wq_ctrl.buf.npages; + in = mlx5_vzalloc(inlen); + if (in == NULL) + return (-ENOMEM); + + rqc = MLX5_ADDR_OF(create_rq_in, in, ctx); + wq = MLX5_ADDR_OF(rqc, rqc, wq); + + memcpy(rqc, param->rqc, sizeof(param->rqc)); + + MLX5_SET(rqc, rqc, cqn, c->rq.cq.mcq.cqn); + MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST); + MLX5_SET(rqc, rqc, flush_in_error_en, 1); + if (priv->counter_set_id >= 0) + MLX5_SET(rqc, rqc, counter_set_id, priv->counter_set_id); + MLX5_SET(wq, wq, log_wq_pg_sz, rq->wq_ctrl.buf.page_shift - + PAGE_SHIFT); + MLX5_SET64(wq, wq, dbr_addr, rq->wq_ctrl.db.dma); + + mlx5_fill_page_array(&rq->wq_ctrl.buf, + (__be64 *) MLX5_ADDR_OF(wq, wq, pas)); + + err = mlx5_core_create_rq(mdev, in, inlen, &rq->rqn); + + kvfree(in); + + return (err); +} + +static int +mlx5e_modify_rq(struct mlx5e_rq *rq, int curr_state, int next_state) +{ + struct mlx5e_channel *c = rq->channel; + struct mlx5e_priv *priv = c->priv; + struct mlx5_core_dev *mdev = priv->mdev; + + void *in; + void *rqc; + int inlen; + int err; + + inlen = MLX5_ST_SZ_BYTES(modify_rq_in); + in = mlx5_vzalloc(inlen); + if (in == NULL) + return (-ENOMEM); + + rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx); + + MLX5_SET(modify_rq_in, in, rqn, rq->rqn); + MLX5_SET(modify_rq_in, in, rq_state, curr_state); + MLX5_SET(rqc, rqc, state, next_state); + + err = mlx5_core_modify_rq(mdev, in, inlen); + + kvfree(in); + + return (err); +} + +static void +mlx5e_disable_rq(struct mlx5e_rq *rq) +{ + struct mlx5e_channel *c = rq->channel; + struct mlx5e_priv *priv = c->priv; + struct mlx5_core_dev *mdev = priv->mdev; + + mlx5_core_destroy_rq(mdev, rq->rqn); +} + +static int +mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq) +{ + struct mlx5e_channel *c = rq->channel; + struct mlx5e_priv *priv = c->priv; + struct mlx5_wq_ll *wq = &rq->wq; + int i; + + for (i = 0; i < 1000; i++) { + if (wq->cur_sz >= priv->params.min_rx_wqes) + return (0); + + msleep(4); + } + return (-ETIMEDOUT); +} + +static int +mlx5e_open_rq(struct mlx5e_channel *c, + struct mlx5e_rq_param *param, + struct mlx5e_rq *rq) +{ + int err; + int i; + + err = mlx5e_create_rq(c, param, rq); + if (err) + return (err); + + err = mlx5e_enable_rq(rq, param); + if (err) + goto err_destroy_rq; + + err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY); + if (err) + goto err_disable_rq; + + c->rq.enabled = 1; + + /* + * Test send queues, which will trigger + * "mlx5e_post_rx_wqes()": + */ + for (i = 0; i != c->num_tc; i++) + mlx5e_send_nop(&c->sq[i], 1, true); + return (0); + +err_disable_rq: + mlx5e_disable_rq(rq); +err_destroy_rq: + mlx5e_destroy_rq(rq); + + return (err); +} + +static void +mlx5e_close_rq(struct mlx5e_rq *rq) +{ + rq->enabled = 0; + mlx5e_modify_rq(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR); +} + +static void +mlx5e_close_rq_wait(struct mlx5e_rq *rq) +{ + /* wait till RQ is empty */ + while (!mlx5_wq_ll_is_empty(&rq->wq)) { + msleep(4); + rq->cq.mcq.comp(&rq->cq.mcq); + } + + mlx5e_disable_rq(rq); + mlx5e_destroy_rq(rq); +} + +static void +mlx5e_free_sq_db(struct mlx5e_sq *sq) +{ + int wq_sz = mlx5_wq_cyc_get_size(&sq->wq); + int x; + + for (x = 0; x != wq_sz; x++) + bus_dmamap_destroy(sq->dma_tag, sq->mbuf[x].dma_map); + free(sq->mbuf, M_MLX5EN); +} + +static int +mlx5e_alloc_sq_db(struct mlx5e_sq *sq) +{ + int wq_sz = mlx5_wq_cyc_get_size(&sq->wq); + int err; + int x; + + sq->mbuf = malloc(wq_sz * sizeof(sq->mbuf[0]), M_MLX5EN, M_WAITOK | M_ZERO); + if (sq->mbuf == NULL) + return (-ENOMEM); + + /* Create DMA descriptor MAPs */ + for (x = 0; x != wq_sz; x++) { + err = -bus_dmamap_create(sq->dma_tag, 0, &sq->mbuf[x].dma_map); + if (err != 0) { + while (x--) + bus_dmamap_destroy(sq->dma_tag, sq->mbuf[x].dma_map); + free(sq->mbuf, M_MLX5EN); + return (err); + } + } + return (0); +} + +static const char *mlx5e_sq_stats_desc[] = { + MLX5E_SQ_STATS(MLX5E_STATS_DESC) +}; + +static int +mlx5e_create_sq(struct mlx5e_channel *c, + int tc, + struct mlx5e_sq_param *param, + struct mlx5e_sq *sq) +{ + struct mlx5e_priv *priv = c->priv; + struct mlx5_core_dev *mdev = priv->mdev; + char buffer[16]; + + void *sqc = param->sqc; + void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq); + int err; + + /* Create DMA descriptor TAG */ + if ((err = -bus_dma_tag_create( + bus_get_dma_tag(mdev->pdev->dev.bsddev), + 1, /* any alignment */ + 0, /* no boundary */ + BUS_SPACE_MAXADDR, /* lowaddr */ + BUS_SPACE_MAXADDR, /* highaddr */ + NULL, NULL, /* filter, filterarg */ + MLX5E_MAX_TX_PAYLOAD_SIZE, /* maxsize */ + MLX5E_MAX_TX_MBUF_FRAGS, /* nsegments */ + MLX5E_MAX_TX_MBUF_SIZE, /* maxsegsize */ + 0, /* flags */ + NULL, NULL, /* lockfunc, lockfuncarg */ + &sq->dma_tag))) + goto done; + + err = mlx5_alloc_map_uar(mdev, &sq->uar); + if (err) + goto err_free_dma_tag; + + err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, &sq->wq, + &sq->wq_ctrl); + if (err) + goto err_unmap_free_uar; + + sq->wq.db = &sq->wq.db[MLX5_SND_DBR]; + sq->uar_map = sq->uar.map; + sq->uar_bf_map = sq->uar.bf_map; + sq->bf_buf_size = (1 << MLX5_CAP_GEN(mdev, log_bf_reg_size)) / 2; + + err = mlx5e_alloc_sq_db(sq); + if (err) + goto err_sq_wq_destroy; + + sq->pdev = c->pdev; + sq->mkey_be = c->mkey_be; + sq->channel = c; + sq->tc = tc; + + sq->br = buf_ring_alloc(MLX5E_SQ_TX_QUEUE_SIZE, M_MLX5EN, + M_WAITOK, &sq->lock); + if (sq->br == NULL) { + if_printf(c->ifp, "%s: Failed allocating sq drbr buffer\n", + __func__); + err = -ENOMEM; + goto err_free_sq_db; + } + + sq->sq_tq = taskqueue_create_fast("mlx5e_que", M_WAITOK, + taskqueue_thread_enqueue, &sq->sq_tq); + if (sq->sq_tq == NULL) { + if_printf(c->ifp, "%s: Failed allocating taskqueue\n", + __func__); + err = -ENOMEM; + goto err_free_drbr; + } + TASK_INIT(&sq->sq_task, 0, mlx5e_tx_que, sq); + taskqueue_start_threads(&sq->sq_tq, 1, PI_NET, "%s tx sq", + c->ifp->if_xname); + + + snprintf(buffer, sizeof(buffer), "txstat%dtc%d", c->ix, tc); + mlx5e_create_stats(&sq->stats.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet), + buffer, mlx5e_sq_stats_desc, MLX5E_SQ_STATS_NUM, + sq->stats.arg); + + return (0); + +err_free_drbr: + buf_ring_free(sq->br, M_MLX5EN); +err_free_sq_db: + mlx5e_free_sq_db(sq); +err_sq_wq_destroy: + mlx5_wq_destroy(&sq->wq_ctrl); + +err_unmap_free_uar: + mlx5_unmap_free_uar(mdev, &sq->uar); + +err_free_dma_tag: + bus_dma_tag_destroy(sq->dma_tag); +done: + return (err); +} + +static void +mlx5e_destroy_sq(struct mlx5e_sq *sq) +{ + struct mlx5e_channel *c = sq->channel; + struct mlx5e_priv *priv = c->priv; + + /* destroy all sysctl nodes */ + sysctl_ctx_free(&sq->stats.ctx); + + mlx5e_free_sq_db(sq); + mlx5_wq_destroy(&sq->wq_ctrl); + mlx5_unmap_free_uar(priv->mdev, &sq->uar); + taskqueue_drain(sq->sq_tq, &sq->sq_task); + taskqueue_free(sq->sq_tq); + buf_ring_free(sq->br, M_MLX5EN); +} + +static int +mlx5e_enable_sq(struct mlx5e_sq *sq, struct mlx5e_sq_param *param) +{ + struct mlx5e_channel *c = sq->channel; + struct mlx5e_priv *priv = c->priv; + struct mlx5_core_dev *mdev = priv->mdev; + + void *in; + void *sqc; + void *wq; + int inlen; + int err; + + inlen = MLX5_ST_SZ_BYTES(create_sq_in) + + sizeof(u64) * sq->wq_ctrl.buf.npages; + in = mlx5_vzalloc(inlen); + if (in == NULL) + return (-ENOMEM); + + sqc = MLX5_ADDR_OF(create_sq_in, in, ctx); + wq = MLX5_ADDR_OF(sqc, sqc, wq); + + memcpy(sqc, param->sqc, sizeof(param->sqc)); + + MLX5_SET(sqc, sqc, tis_num_0, priv->tisn[sq->tc]); + MLX5_SET(sqc, sqc, cqn, c->sq[sq->tc].cq.mcq.cqn); + MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST); + MLX5_SET(sqc, sqc, tis_lst_sz, 1); + MLX5_SET(sqc, sqc, flush_in_error_en, 1); + + MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); + MLX5_SET(wq, wq, uar_page, sq->uar.index); + MLX5_SET(wq, wq, log_wq_pg_sz, sq->wq_ctrl.buf.page_shift - + PAGE_SHIFT); + MLX5_SET64(wq, wq, dbr_addr, sq->wq_ctrl.db.dma); + + mlx5_fill_page_array(&sq->wq_ctrl.buf, + (__be64 *) MLX5_ADDR_OF(wq, wq, pas)); + + err = mlx5_core_create_sq(mdev, in, inlen, &sq->sqn); + + kvfree(in); + + return (err); +} + +static int +mlx5e_modify_sq(struct mlx5e_sq *sq, int curr_state, int next_state) +{ + struct mlx5e_channel *c = sq->channel; + struct mlx5e_priv *priv = c->priv; + struct mlx5_core_dev *mdev = priv->mdev; + + void *in; + void *sqc; + int inlen; + int err; + + inlen = MLX5_ST_SZ_BYTES(modify_sq_in); + in = mlx5_vzalloc(inlen); + if (in == NULL) + return (-ENOMEM); + + sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx); + + MLX5_SET(modify_sq_in, in, sqn, sq->sqn); + MLX5_SET(modify_sq_in, in, sq_state, curr_state); + MLX5_SET(sqc, sqc, state, next_state); + + err = mlx5_core_modify_sq(mdev, in, inlen); + + kvfree(in); + + return (err); +} + +static void +mlx5e_disable_sq(struct mlx5e_sq *sq) +{ + struct mlx5e_channel *c = sq->channel; + struct mlx5e_priv *priv = c->priv; + struct mlx5_core_dev *mdev = priv->mdev; + + mlx5_core_destroy_sq(mdev, sq->sqn); +} + +static int +mlx5e_open_sq(struct mlx5e_channel *c, + int tc, + struct mlx5e_sq_param *param, + struct mlx5e_sq *sq) +{ + int err; + + err = mlx5e_create_sq(c, tc, param, sq); + if (err) + return (err); + + err = mlx5e_enable_sq(sq, param); + if (err) + goto err_destroy_sq; + + err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY); + if (err) + goto err_disable_sq; + + atomic_store_rel_int(&sq->queue_state, MLX5E_SQ_READY); + + return (0); + +err_disable_sq: + mlx5e_disable_sq(sq); +err_destroy_sq: + mlx5e_destroy_sq(sq); + + return (err); +} + +static void +mlx5e_close_sq(struct mlx5e_sq *sq) +{ + + /* ensure hw is notified of all pending wqes */ + if (mlx5e_sq_has_room_for(sq, 1)) + mlx5e_send_nop(sq, 1, true); + + mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY, MLX5_SQC_STATE_ERR); +} + +static void +mlx5e_close_sq_wait(struct mlx5e_sq *sq) +{ + /* wait till SQ is empty */ + while (sq->cc != sq->pc) { + msleep(4); + sq->cq.mcq.comp(&sq->cq.mcq); + } + + mlx5e_disable_sq(sq); + mlx5e_destroy_sq(sq); +} + +static int +mlx5e_create_cq(struct mlx5e_channel *c, + struct mlx5e_cq_param *param, + struct mlx5e_cq *cq, + mlx5e_cq_comp_t *comp) +{ + struct mlx5e_priv *priv = c->priv; + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_core_cq *mcq = &cq->mcq; + int eqn_not_used; + int irqn; + int err; + u32 i; + + param->wq.buf_numa_node = 0; + param->wq.db_numa_node = 0; + param->eq_ix = c->ix; + + err = mlx5_cqwq_create(mdev, ¶m->wq, param->cqc, &cq->wq, + &cq->wq_ctrl); + if (err) + return (err); + + mlx5_vector2eqn(mdev, param->eq_ix, &eqn_not_used, &irqn); + + mcq->cqe_sz = 64; + mcq->set_ci_db = cq->wq_ctrl.db.db; + mcq->arm_db = cq->wq_ctrl.db.db + 1; + *mcq->set_ci_db = 0; + *mcq->arm_db = 0; + mcq->vector = param->eq_ix; + mcq->comp = comp; + mcq->event = mlx5e_cq_error_event; + mcq->irqn = irqn; + mcq->uar = &priv->cq_uar; + + for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) { + struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, i); + + cqe->op_own = 0xf1; + } + + cq->channel = c; + + return (0); +} + +static void +mlx5e_destroy_cq(struct mlx5e_cq *cq) +{ + mlx5_wq_destroy(&cq->wq_ctrl); +} + +static int +mlx5e_enable_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param, + u8 moderation_mode) +{ + struct mlx5e_channel *c = cq->channel; + struct mlx5e_priv *priv = c->priv; + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_core_cq *mcq = &cq->mcq; + void *in; + void *cqc; + int inlen; + int irqn_not_used; + int eqn; + int err; + + inlen = MLX5_ST_SZ_BYTES(create_cq_in) + + sizeof(u64) * cq->wq_ctrl.buf.npages; + in = mlx5_vzalloc(inlen); + if (in == NULL) + return (-ENOMEM); + + cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); + + memcpy(cqc, param->cqc, sizeof(param->cqc)); + + mlx5_fill_page_array(&cq->wq_ctrl.buf, + (__be64 *) MLX5_ADDR_OF(create_cq_in, in, pas)); + + mlx5_vector2eqn(mdev, param->eq_ix, &eqn, &irqn_not_used); + + MLX5_SET(cqc, cqc, cq_period_mode, moderation_mode); + MLX5_SET(cqc, cqc, c_eqn, eqn); + MLX5_SET(cqc, cqc, uar_page, mcq->uar->index); + MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift - + PAGE_SHIFT); + MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma); + + err = mlx5_core_create_cq(mdev, mcq, in, inlen); + + kvfree(in); + + if (err) + return (err); + + mlx5e_cq_arm(cq); + + return (0); +} + +static void +mlx5e_disable_cq(struct mlx5e_cq *cq) +{ + struct mlx5e_channel *c = cq->channel; + struct mlx5e_priv *priv = c->priv; + struct mlx5_core_dev *mdev = priv->mdev; + + mlx5_core_destroy_cq(mdev, &cq->mcq); +} + +static int +mlx5e_open_cq(struct mlx5e_channel *c, + struct mlx5e_cq_param *param, + struct mlx5e_cq *cq, + mlx5e_cq_comp_t *comp, + u8 moderation_mode) +{ + int err; + + err = mlx5e_create_cq(c, param, cq, comp); + if (err) + return (err); + + err = mlx5e_enable_cq(cq, param, moderation_mode); + if (err) + goto err_destroy_cq; + + return (0); + +err_destroy_cq: + mlx5e_destroy_cq(cq); + + return (err); +} + +static void +mlx5e_close_cq(struct mlx5e_cq *cq) +{ + mlx5e_disable_cq(cq); + mlx5e_destroy_cq(cq); +} + +static int +mlx5e_open_tx_cqs(struct mlx5e_channel *c, + struct mlx5e_channel_param *cparam) +{ + int err; + int tc; + + for (tc = 0; tc < c->num_tc; tc++) { + /* open completion queue */ + err = mlx5e_open_cq(c, &cparam->tx_cq, &c->sq[tc].cq, + &mlx5e_tx_cq_comp, MLX5_CQ_PERIOD_MODE_START_FROM_EQE); + if (err) + goto err_close_tx_cqs; + } + return (0); + +err_close_tx_cqs: + for (tc--; tc >= 0; tc--) + mlx5e_close_cq(&c->sq[tc].cq); + + return (err); +} + +static void +mlx5e_close_tx_cqs(struct mlx5e_channel *c) +{ + int tc; + + for (tc = 0; tc < c->num_tc; tc++) + mlx5e_close_cq(&c->sq[tc].cq); +} + +static int +mlx5e_open_sqs(struct mlx5e_channel *c, + struct mlx5e_channel_param *cparam) +{ + int err; + int tc; + + for (tc = 0; tc < c->num_tc; tc++) { + err = mlx5e_open_sq(c, tc, &cparam->sq, &c->sq[tc]); + if (err) + goto err_close_sqs; + } + + return (0); + +err_close_sqs: + for (tc--; tc >= 0; tc--) { + mlx5e_close_sq(&c->sq[tc]); + mlx5e_close_sq_wait(&c->sq[tc]); + } + + return (err); +} + +static void +mlx5e_close_sqs(struct mlx5e_channel *c) +{ + int tc; + + for (tc = 0; tc < c->num_tc; tc++) + mlx5e_close_sq(&c->sq[tc]); +} + +static void +mlx5e_close_sqs_wait(struct mlx5e_channel *c) +{ + int tc; + + for (tc = 0; tc < c->num_tc; tc++) + mlx5e_close_sq_wait(&c->sq[tc]); +} + +static void +mlx5e_chan_mtx_init(struct mlx5e_channel *c) +{ + int tc; + + mtx_init(&c->rq.mtx, "mlx5rx", MTX_NETWORK_LOCK, MTX_DEF); + + for (tc = 0; tc < c->num_tc; tc++) { + mtx_init(&c->sq[tc].lock, "mlx5tx", MTX_NETWORK_LOCK, MTX_DEF); + mtx_init(&c->sq[tc].comp_lock, "mlx5comp", MTX_NETWORK_LOCK, + MTX_DEF); + } +} + +static void +mlx5e_chan_mtx_destroy(struct mlx5e_channel *c) +{ + int tc; + + mtx_destroy(&c->rq.mtx); + + for (tc = 0; tc < c->num_tc; tc++) { + mtx_destroy(&c->sq[tc].lock); + mtx_destroy(&c->sq[tc].comp_lock); + } +} + +static int +mlx5e_open_channel(struct mlx5e_priv *priv, int ix, + struct mlx5e_channel_param *cparam, + struct mlx5e_channel * volatile *cp) +{ + struct mlx5e_channel *c; + u8 rx_moderation_mode; + int err; + + c = malloc(sizeof(*c), M_MLX5EN, M_WAITOK | M_ZERO); + if (c == NULL) + return (-ENOMEM); + + c->priv = priv; + c->ix = ix; + c->cpu = 0; + c->pdev = &priv->mdev->pdev->dev; + c->ifp = priv->ifp; + c->mkey_be = cpu_to_be32(priv->mr.key); + c->num_tc = priv->num_tc; + + /* init mutexes */ + mlx5e_chan_mtx_init(c); + + /* open transmit completion queue */ + err = mlx5e_open_tx_cqs(c, cparam); + if (err) + goto err_free; + + switch (priv->params.rx_cq_moderation_mode) { + case 0: + rx_moderation_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE; + break; + default: + if (MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe)) + rx_moderation_mode = MLX5_CQ_PERIOD_MODE_START_FROM_CQE; + else + rx_moderation_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE; + break; + } + + /* open receive completion queue */ + err = mlx5e_open_cq(c, &cparam->rx_cq, &c->rq.cq, + &mlx5e_rx_cq_comp, rx_moderation_mode); + if (err) + goto err_close_tx_cqs; + + err = mlx5e_open_sqs(c, cparam); + if (err) + goto err_close_rx_cq; + + err = mlx5e_open_rq(c, &cparam->rq, &c->rq); + if (err) + goto err_close_sqs; + + /* store channel pointer */ + *cp = c; + + /* poll receive queue initially */ + c->rq.cq.mcq.comp(&c->rq.cq.mcq); + + return (0); + +err_close_sqs: + mlx5e_close_sqs(c); + mlx5e_close_sqs_wait(c); + +err_close_rx_cq: + mlx5e_close_cq(&c->rq.cq); + +err_close_tx_cqs: + mlx5e_close_tx_cqs(c); + +err_free: + /* destroy mutexes */ + mlx5e_chan_mtx_destroy(c); + free(c, M_MLX5EN); + return (err); +} + +static void +mlx5e_close_channel(struct mlx5e_channel * volatile *pp) +{ + struct mlx5e_channel *c = *pp; + + /* check if channel is already closed */ + if (c == NULL) + return; + mlx5e_close_rq(&c->rq); + mlx5e_close_sqs(c); +} + +static void +mlx5e_close_channel_wait(struct mlx5e_channel * volatile *pp) +{ + struct mlx5e_channel *c = *pp; + + /* check if channel is already closed */ + if (c == NULL) + return; + /* ensure channel pointer is no longer used */ + *pp = NULL; + + mlx5e_close_rq_wait(&c->rq); + mlx5e_close_sqs_wait(c); + mlx5e_close_cq(&c->rq.cq); + mlx5e_close_tx_cqs(c); + /* destroy mutexes */ + mlx5e_chan_mtx_destroy(c); + free(c, M_MLX5EN); +} + +static void +mlx5e_build_rq_param(struct mlx5e_priv *priv, + struct mlx5e_rq_param *param) +{ + void *rqc = param->rqc; + void *wq = MLX5_ADDR_OF(rqc, rqc, wq); + + MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST); + MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN); + MLX5_SET(wq, wq, log_wq_stride, ilog2(sizeof(struct mlx5e_rx_wqe))); + MLX5_SET(wq, wq, log_wq_sz, priv->params.log_rq_size); + MLX5_SET(wq, wq, pd, priv->pdn); + + param->wq.buf_numa_node = 0; + param->wq.db_numa_node = 0; + param->wq.linear = 1; +} + +static void +mlx5e_build_sq_param(struct mlx5e_priv *priv, + struct mlx5e_sq_param *param) +{ + void *sqc = param->sqc; + void *wq = MLX5_ADDR_OF(sqc, sqc, wq); + + MLX5_SET(wq, wq, log_wq_sz, priv->params.log_sq_size); + MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB)); + MLX5_SET(wq, wq, pd, priv->pdn); + + param->wq.buf_numa_node = 0; + param->wq.db_numa_node = 0; + param->wq.linear = 1; +} + +static void +mlx5e_build_common_cq_param(struct mlx5e_priv *priv, + struct mlx5e_cq_param *param) +{ + void *cqc = param->cqc; + + MLX5_SET(cqc, cqc, uar_page, priv->cq_uar.index); +} + +static void +mlx5e_build_rx_cq_param(struct mlx5e_priv *priv, + struct mlx5e_cq_param *param) +{ + void *cqc = param->cqc; + + MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_rq_size); + MLX5_SET(cqc, cqc, cq_period, priv->params.rx_cq_moderation_usec); + MLX5_SET(cqc, cqc, cq_max_count, priv->params.rx_cq_moderation_pkts); + + mlx5e_build_common_cq_param(priv, param); +} + +static void +mlx5e_build_tx_cq_param(struct mlx5e_priv *priv, + struct mlx5e_cq_param *param) +{ + void *cqc = param->cqc; + + MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_sq_size); + MLX5_SET(cqc, cqc, cq_period, priv->params.tx_cq_moderation_usec); + MLX5_SET(cqc, cqc, cq_max_count, priv->params.tx_cq_moderation_pkts); + + mlx5e_build_common_cq_param(priv, param); +} + +static void +mlx5e_build_channel_param(struct mlx5e_priv *priv, + struct mlx5e_channel_param *cparam) +{ + memset(cparam, 0, sizeof(*cparam)); + + mlx5e_build_rq_param(priv, &cparam->rq); + mlx5e_build_sq_param(priv, &cparam->sq); + mlx5e_build_rx_cq_param(priv, &cparam->rx_cq); + mlx5e_build_tx_cq_param(priv, &cparam->tx_cq); +} + +static int +mlx5e_open_channels(struct mlx5e_priv *priv) +{ + struct mlx5e_channel_param cparam; + void *ptr; + int err; + int i; + int j; + + priv->channel = malloc(priv->params.num_channels * + sizeof(struct mlx5e_channel *), M_MLX5EN, M_WAITOK | M_ZERO); + if (priv->channel == NULL) + return (-ENOMEM); + + mlx5e_build_channel_param(priv, &cparam); + for (i = 0; i < priv->params.num_channels; i++) { + err = mlx5e_open_channel(priv, i, &cparam, &priv->channel[i]); + if (err) + goto err_close_channels; + } + + for (j = 0; j < priv->params.num_channels; j++) { + err = mlx5e_wait_for_min_rx_wqes(&priv->channel[j]->rq); + if (err) + goto err_close_channels; + } + + return (0); + +err_close_channels: + for (i--; i >= 0; i--) { + mlx5e_close_channel(&priv->channel[i]); + mlx5e_close_channel_wait(&priv->channel[i]); + } + + /* remove "volatile" attribute from "channel" pointer */ + ptr = __DECONST(void *, priv->channel); + priv->channel = NULL; + + free(ptr, M_MLX5EN); + + return (err); +} + +static void +mlx5e_close_channels(struct mlx5e_priv *priv) +{ + void *ptr; + int i; + + if (priv->channel == NULL) + return; + + for (i = 0; i < priv->params.num_channels; i++) + mlx5e_close_channel(&priv->channel[i]); + for (i = 0; i < priv->params.num_channels; i++) + mlx5e_close_channel_wait(&priv->channel[i]); + + /* remove "volatile" attribute from "channel" pointer */ + ptr = __DECONST(void *, priv->channel); + priv->channel = NULL; + + free(ptr, M_MLX5EN); +} + +static int +mlx5e_open_tis(struct mlx5e_priv *priv, int tc) +{ + struct mlx5_core_dev *mdev = priv->mdev; + u32 in[MLX5_ST_SZ_DW(create_tis_in)]; + void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx); + + memset(in, 0, sizeof(in)); + + MLX5_SET(tisc, tisc, prio, tc); + MLX5_SET(tisc, tisc, transport_domain, priv->tdn); + + return (mlx5_core_create_tis(mdev, in, sizeof(in), &priv->tisn[tc])); +} + +static void +mlx5e_close_tis(struct mlx5e_priv *priv, int tc) +{ + mlx5_core_destroy_tis(priv->mdev, priv->tisn[tc]); +} + +static int +mlx5e_open_tises(struct mlx5e_priv *priv) +{ + int num_tc = priv->num_tc; + int err; + int tc; + + for (tc = 0; tc < num_tc; tc++) { + err = mlx5e_open_tis(priv, tc); + if (err) + goto err_close_tises; + } + + return (0); + +err_close_tises: + for (tc--; tc >= 0; tc--) + mlx5e_close_tis(priv, tc); + + return (err); +} + +static void +mlx5e_close_tises(struct mlx5e_priv *priv) +{ + int num_tc = priv->num_tc; + int tc; + + for (tc = 0; tc < num_tc; tc++) + mlx5e_close_tis(priv, tc); +} + +static int +mlx5e_open_rqt(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + u32 *in; + u32 out[MLX5_ST_SZ_DW(create_rqt_out)]; + void *rqtc; + int inlen; + int err; + int sz; + int i; + + sz = 1 << priv->params.rx_hash_log_tbl_sz; + + inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz; + in = mlx5_vzalloc(inlen); + if (in == NULL) + return (-ENOMEM); + rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context); + + MLX5_SET(rqtc, rqtc, rqt_actual_size, sz); + MLX5_SET(rqtc, rqtc, rqt_max_size, sz); + + for (i = 0; i < sz; i++) { + int ix = i % priv->params.num_channels; + + MLX5_SET(rqtc, rqtc, rq_num[i], priv->channel[ix]->rq.rqn); + } + + MLX5_SET(create_rqt_in, in, opcode, MLX5_CMD_OP_CREATE_RQT); + + memset(out, 0, sizeof(out)); + err = mlx5_cmd_exec_check_status(mdev, in, inlen, out, sizeof(out)); + if (!err) + priv->rqtn = MLX5_GET(create_rqt_out, out, rqtn); + + kvfree(in); + + return (err); +} + +static void +mlx5e_close_rqt(struct mlx5e_priv *priv) +{ + u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)]; + u32 out[MLX5_ST_SZ_DW(destroy_rqt_out)]; + + memset(in, 0, sizeof(in)); + + MLX5_SET(destroy_rqt_in, in, opcode, MLX5_CMD_OP_DESTROY_RQT); + MLX5_SET(destroy_rqt_in, in, rqtn, priv->rqtn); + + mlx5_cmd_exec_check_status(priv->mdev, in, sizeof(in), out, + sizeof(out)); +} + +static void +mlx5e_build_tir_ctx(struct mlx5e_priv *priv, u32 * tirc, int tt) +{ + void *hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer); + __be32 *hkey; + + MLX5_SET(tirc, tirc, transport_domain, priv->tdn); + +#define ROUGH_MAX_L2_L3_HDR_SZ 256 + +#define MLX5_HASH_IP (MLX5_HASH_FIELD_SEL_SRC_IP |\ + MLX5_HASH_FIELD_SEL_DST_IP) + +#define MLX5_HASH_ALL (MLX5_HASH_FIELD_SEL_SRC_IP |\ + MLX5_HASH_FIELD_SEL_DST_IP |\ + MLX5_HASH_FIELD_SEL_L4_SPORT |\ + MLX5_HASH_FIELD_SEL_L4_DPORT) + +#define MLX5_HASH_IP_IPSEC_SPI (MLX5_HASH_FIELD_SEL_SRC_IP |\ + MLX5_HASH_FIELD_SEL_DST_IP |\ + MLX5_HASH_FIELD_SEL_IPSEC_SPI) + + if (priv->params.hw_lro_en) { + MLX5_SET(tirc, tirc, lro_enable_mask, + MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO | + MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO); + MLX5_SET(tirc, tirc, lro_max_msg_sz, + (priv->params.lro_wqe_sz - + ROUGH_MAX_L2_L3_HDR_SZ) >> 8); + /* TODO: add the option to choose timer value dynamically */ + MLX5_SET(tirc, tirc, lro_timeout_period_usecs, + MLX5_CAP_ETH(priv->mdev, + lro_timer_supported_periods[2])); + } + + + switch (tt) { + case MLX5E_TT_ANY: + MLX5_SET(tirc, tirc, disp_type, + MLX5_TIRC_DISP_TYPE_DIRECT); + MLX5_SET(tirc, tirc, inline_rqn, + priv->channel[0]->rq.rqn); + break; + default: + MLX5_SET(tirc, tirc, disp_type, + MLX5_TIRC_DISP_TYPE_INDIRECT); + MLX5_SET(tirc, tirc, indirect_table, + priv->rqtn); + MLX5_SET(tirc, tirc, rx_hash_fn, + MLX5_TIRC_RX_HASH_FN_HASH_TOEPLITZ); + MLX5_SET(tirc, tirc, rx_hash_symmetric, 1); + hkey = (__be32 *) MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key); + hkey[0] = cpu_to_be32(0xD181C62C); + hkey[1] = cpu_to_be32(0xF7F4DB5B); + hkey[2] = cpu_to_be32(0x1983A2FC); + hkey[3] = cpu_to_be32(0x943E1ADB); + hkey[4] = cpu_to_be32(0xD9389E6B); + hkey[5] = cpu_to_be32(0xD1039C2C); + hkey[6] = cpu_to_be32(0xA74499AD); + hkey[7] = cpu_to_be32(0x593D56D9); + hkey[8] = cpu_to_be32(0xF3253C06); + hkey[9] = cpu_to_be32(0x2ADC1FFC); + break; + } + + switch (tt) { + case MLX5E_TT_IPV4_TCP: + MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, + MLX5_L3_PROT_TYPE_IPV4); + MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, + MLX5_L4_PROT_TYPE_TCP); + MLX5_SET(rx_hash_field_select, hfso, selected_fields, + MLX5_HASH_ALL); + break; + + case MLX5E_TT_IPV6_TCP: + MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, + MLX5_L3_PROT_TYPE_IPV6); + MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, + MLX5_L4_PROT_TYPE_TCP); + MLX5_SET(rx_hash_field_select, hfso, selected_fields, + MLX5_HASH_ALL); + break; + + case MLX5E_TT_IPV4_UDP: + MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, + MLX5_L3_PROT_TYPE_IPV4); + MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, + MLX5_L4_PROT_TYPE_UDP); + MLX5_SET(rx_hash_field_select, hfso, selected_fields, + MLX5_HASH_ALL); + break; + + case MLX5E_TT_IPV6_UDP: + MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, + MLX5_L3_PROT_TYPE_IPV6); + MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, + MLX5_L4_PROT_TYPE_UDP); + MLX5_SET(rx_hash_field_select, hfso, selected_fields, + MLX5_HASH_ALL); + break; + + case MLX5E_TT_IPV4_IPSEC_AH: + MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, + MLX5_L3_PROT_TYPE_IPV4); + MLX5_SET(rx_hash_field_select, hfso, selected_fields, + MLX5_HASH_IP_IPSEC_SPI); + break; + + case MLX5E_TT_IPV6_IPSEC_AH: + MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, + MLX5_L3_PROT_TYPE_IPV6); + MLX5_SET(rx_hash_field_select, hfso, selected_fields, + MLX5_HASH_IP_IPSEC_SPI); + break; + + case MLX5E_TT_IPV4_IPSEC_ESP: + MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, + MLX5_L3_PROT_TYPE_IPV4); + MLX5_SET(rx_hash_field_select, hfso, selected_fields, + MLX5_HASH_IP_IPSEC_SPI); + break; + + case MLX5E_TT_IPV6_IPSEC_ESP: + MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, + MLX5_L3_PROT_TYPE_IPV6); + MLX5_SET(rx_hash_field_select, hfso, selected_fields, + MLX5_HASH_IP_IPSEC_SPI); + break; + + case MLX5E_TT_IPV4: + MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, + MLX5_L3_PROT_TYPE_IPV4); + MLX5_SET(rx_hash_field_select, hfso, selected_fields, + MLX5_HASH_IP); + break; + + case MLX5E_TT_IPV6: + MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, + MLX5_L3_PROT_TYPE_IPV6); + MLX5_SET(rx_hash_field_select, hfso, selected_fields, + MLX5_HASH_IP); + break; + + default: + break; + } +} + +static int +mlx5e_open_tir(struct mlx5e_priv *priv, int tt) +{ + struct mlx5_core_dev *mdev = priv->mdev; + u32 *in; + void *tirc; + int inlen; + int err; + + inlen = MLX5_ST_SZ_BYTES(create_tir_in); + in = mlx5_vzalloc(inlen); + if (in == NULL) + return (-ENOMEM); + tirc = MLX5_ADDR_OF(create_tir_in, in, tir_context); + + mlx5e_build_tir_ctx(priv, tirc, tt); + + err = mlx5_core_create_tir(mdev, in, inlen, &priv->tirn[tt]); + + kvfree(in); + + return (err); +} + +static void +mlx5e_close_tir(struct mlx5e_priv *priv, int tt) +{ + mlx5_core_destroy_tir(priv->mdev, priv->tirn[tt]); +} + +static int +mlx5e_open_tirs(struct mlx5e_priv *priv) +{ + int err; + int i; + + for (i = 0; i < MLX5E_NUM_TT; i++) { + err = mlx5e_open_tir(priv, i); + if (err) + goto err_close_tirs; + } + + return (0); + +err_close_tirs: + for (i--; i >= 0; i--) + mlx5e_close_tir(priv, i); + + return (err); +} + +static void +mlx5e_close_tirs(struct mlx5e_priv *priv) +{ + int i; + + for (i = 0; i < MLX5E_NUM_TT; i++) + mlx5e_close_tir(priv, i); +} + +/* + * SW MTU does not include headers, + * HW MTU includes all headers and checksums. + */ +static int +mlx5e_set_dev_port_mtu(struct ifnet *ifp, int sw_mtu) +{ + struct mlx5e_priv *priv = ifp->if_softc; + struct mlx5_core_dev *mdev = priv->mdev; + int hw_mtu; + int min_mtu; + int err; + + /* + * Trying to set MTU to zero, in order + * to find out the FW's minimal MTU + */ + err = mlx5_set_port_mtu(mdev, 0); + if (err) + return (err); + err = mlx5_query_port_oper_mtu(mdev, &min_mtu); + if (err) { + if_printf(ifp, "Query port minimal MTU failed\n"); + return (err); + } + + if (sw_mtu < MLX5E_HW2SW_MTU(min_mtu)) { + ifp->if_mtu = sw_mtu; + return (0); + } + + err = mlx5_set_port_mtu(mdev, MLX5E_SW2HW_MTU(sw_mtu)); + if (err) + return (err); + + err = mlx5_query_port_oper_mtu(mdev, &hw_mtu); + if (!err) { + ifp->if_mtu = MLX5E_HW2SW_MTU(hw_mtu); + + if (ifp->if_mtu != sw_mtu) { + if_printf(ifp, "Port MTU %d is different than " + "ifp mtu %d\n", sw_mtu, (int)ifp->if_mtu); + } + } else { + if_printf(ifp, "Query port MTU, after setting new " + "MTU value, failed\n"); + ifp->if_mtu = sw_mtu; + } + return (0); +} + +int +mlx5e_open_locked(struct ifnet *ifp) +{ + struct mlx5e_priv *priv = ifp->if_softc; + int err; + + /* check if already opened */ + if (test_bit(MLX5E_STATE_OPENED, &priv->state) != 0) + return (0); + + err = mlx5e_open_tises(priv); + if (err) { + if_printf(ifp, "%s: mlx5e_open_tises failed, %d\n", + __func__, err); + return (err); + } + err = mlx5_vport_alloc_q_counter(priv->mdev, &priv->counter_set_id); + if (err) { + if_printf(priv->ifp, + "%s: mlx5_vport_alloc_q_counter failed: %d\n", + __func__, err); + goto err_close_tises; + } + err = mlx5e_open_channels(priv); + if (err) { + if_printf(ifp, "%s: mlx5e_open_channels failed, %d\n", + __func__, err); + goto err_dalloc_q_counter; + } + err = mlx5e_open_rqt(priv); + if (err) { + if_printf(ifp, "%s: mlx5e_open_rqt failed, %d\n", + __func__, err); + goto err_close_channels; + } + err = mlx5e_open_tirs(priv); + if (err) { + if_printf(ifp, "%s: mlx5e_open_tir failed, %d\n", + __func__, err); + goto err_close_rqls; + } + err = mlx5e_open_flow_table(priv); + if (err) { + if_printf(ifp, "%s: mlx5e_open_flow_table failed, %d\n", + __func__, err); + goto err_close_tirs; + } + err = mlx5e_add_all_vlan_rules(priv); + if (err) { + if_printf(ifp, "%s: mlx5e_add_all_vlan_rules failed, %d\n", + __func__, err); + goto err_close_flow_table; + } + set_bit(MLX5E_STATE_OPENED, &priv->state); + + mlx5e_update_carrier(priv); + mlx5e_set_rx_mode_core(priv); + + return (0); + +err_close_flow_table: + mlx5e_close_flow_table(priv); + +err_close_tirs: + mlx5e_close_tirs(priv); + +err_close_rqls: + mlx5e_close_rqt(priv); + +err_close_channels: + mlx5e_close_channels(priv); + +err_dalloc_q_counter: + mlx5_vport_dealloc_q_counter(priv->mdev, priv->counter_set_id); + +err_close_tises: + mlx5e_close_tises(priv); + + return (err); +} + +static void +mlx5e_open(void *arg) +{ + struct mlx5e_priv *priv = arg; + + PRIV_LOCK(priv); + if (mlx5_set_port_status(priv->mdev, MLX5_PORT_UP)) + if_printf(priv->ifp, + "%s: Setting port status to up failed\n", + __func__); + + mlx5e_open_locked(priv->ifp); + priv->ifp->if_drv_flags |= IFF_DRV_RUNNING; + PRIV_UNLOCK(priv); +} + +int +mlx5e_close_locked(struct ifnet *ifp) +{ + struct mlx5e_priv *priv = ifp->if_softc; + + /* check if already closed */ + if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0) + return (0); + + clear_bit(MLX5E_STATE_OPENED, &priv->state); + + mlx5e_set_rx_mode_core(priv); + mlx5e_del_all_vlan_rules(priv); + if_link_state_change(priv->ifp, LINK_STATE_DOWN); + mlx5e_close_flow_table(priv); + mlx5e_close_tirs(priv); + mlx5e_close_rqt(priv); + mlx5e_close_channels(priv); + mlx5_vport_dealloc_q_counter(priv->mdev, priv->counter_set_id); + mlx5e_close_tises(priv); + + return (0); +} + +#if (__FreeBSD_version >= 1100000) +static uint64_t +mlx5e_get_counter(struct ifnet *ifp, ift_counter cnt) +{ + struct mlx5e_priv *priv = ifp->if_softc; + u64 retval; + + /* PRIV_LOCK(priv); XXX not allowed */ + switch (cnt) { + case IFCOUNTER_IPACKETS: + retval = priv->stats.vport.rx_packets; + break; + case IFCOUNTER_IERRORS: + retval = priv->stats.vport.rx_error_packets; + break; + case IFCOUNTER_OPACKETS: + retval = priv->stats.vport.tx_packets; + break; + case IFCOUNTER_OERRORS: + retval = priv->stats.vport.tx_error_packets; + break; + case IFCOUNTER_IBYTES: + retval = priv->stats.vport.rx_bytes; + break; + case IFCOUNTER_OBYTES: + retval = priv->stats.vport.tx_bytes; + break; + case IFCOUNTER_IMCASTS: + retval = priv->stats.vport.rx_multicast_packets; + break; + case IFCOUNTER_OMCASTS: + retval = priv->stats.vport.tx_multicast_packets; + break; + case IFCOUNTER_OQDROPS: + retval = priv->stats.vport.tx_queue_dropped; + break; + default: + retval = if_get_counter_default(ifp, cnt); + break; + } + /* PRIV_UNLOCK(priv); XXX not allowed */ + return (retval); +} +#endif + +static void +mlx5e_set_rx_mode(struct ifnet *ifp) +{ + struct mlx5e_priv *priv = ifp->if_softc; + + schedule_work(&priv->set_rx_mode_work); +} + +static int +mlx5e_ioctl(struct ifnet *ifp, u_long command, caddr_t data) +{ + struct mlx5e_priv *priv; + struct ifreq *ifr; + struct ifi2creq i2c; + int error = 0; + int mask = 0; + int size_read = 0; + int module_num; + int max_mtu; + + priv = ifp->if_softc; + + /* check if detaching */ + if (priv == NULL || priv->gone != 0) + return (ENXIO); + + switch (command) { + case SIOCSIFMTU: + ifr = (struct ifreq *)data; + + PRIV_LOCK(priv); + mlx5_query_port_max_mtu(priv->mdev, &max_mtu); + + if (ifr->ifr_mtu >= MLX5E_MTU_MIN && + ifr->ifr_mtu <= MIN(MLX5E_MTU_MAX, max_mtu)) { + int was_opened; + + was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); + if (was_opened) + mlx5e_close_locked(ifp); + + /* set new MTU */ + mlx5e_set_dev_port_mtu(ifp, ifr->ifr_mtu); + + if (was_opened) + mlx5e_open_locked(ifp); + } else { + error = EINVAL; + if_printf(ifp, "Invalid MTU value. Min val: %d, Max val: %d\n", + MLX5E_MTU_MIN, MIN(MLX5E_MTU_MAX, max_mtu)); + } + PRIV_UNLOCK(priv); + break; + case SIOCSIFFLAGS: + if ((ifp->if_flags & IFF_UP) && + (ifp->if_drv_flags & IFF_DRV_RUNNING)) { + mlx5e_set_rx_mode(ifp); + break; + } + PRIV_LOCK(priv); + if (ifp->if_flags & IFF_UP) { + if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { + if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0) + mlx5e_open_locked(ifp); + ifp->if_drv_flags |= IFF_DRV_RUNNING; + mlx5_set_port_status(priv->mdev, MLX5_PORT_UP); + } + } else { + if (ifp->if_drv_flags & IFF_DRV_RUNNING) { + mlx5_set_port_status(priv->mdev, + MLX5_PORT_DOWN); + if (test_bit(MLX5E_STATE_OPENED, &priv->state) != 0) + mlx5e_close_locked(ifp); + mlx5e_update_carrier(priv); + ifp->if_drv_flags &= ~IFF_DRV_RUNNING; + } + } + PRIV_UNLOCK(priv); + break; + case SIOCADDMULTI: + case SIOCDELMULTI: + mlx5e_set_rx_mode(ifp); + break; + case SIOCSIFMEDIA: + case SIOCGIFMEDIA: + case SIOCGIFXMEDIA: + ifr = (struct ifreq *)data; + error = ifmedia_ioctl(ifp, ifr, &priv->media, command); + break; + case SIOCSIFCAP: + ifr = (struct ifreq *)data; + PRIV_LOCK(priv); + mask = ifr->ifr_reqcap ^ ifp->if_capenable; + + if (mask & IFCAP_TXCSUM) { + ifp->if_capenable ^= IFCAP_TXCSUM; + ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP); + + if (IFCAP_TSO4 & ifp->if_capenable && + !(IFCAP_TXCSUM & ifp->if_capenable)) { + ifp->if_capenable &= ~IFCAP_TSO4; + ifp->if_hwassist &= ~CSUM_IP_TSO; + if_printf(ifp, + "tso4 disabled due to -txcsum.\n"); + } + } + if (mask & IFCAP_TXCSUM_IPV6) { + ifp->if_capenable ^= IFCAP_TXCSUM_IPV6; + ifp->if_hwassist ^= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6); + + if (IFCAP_TSO6 & ifp->if_capenable && + !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) { + ifp->if_capenable &= ~IFCAP_TSO6; + ifp->if_hwassist &= ~CSUM_IP6_TSO; + if_printf(ifp, + "tso6 disabled due to -txcsum6.\n"); + } + } + if (mask & IFCAP_RXCSUM) + ifp->if_capenable ^= IFCAP_RXCSUM; + if (mask & IFCAP_RXCSUM_IPV6) + ifp->if_capenable ^= IFCAP_RXCSUM_IPV6; + + if (mask & IFCAP_TSO4) { + if (!(IFCAP_TSO4 & ifp->if_capenable) && + !(IFCAP_TXCSUM & ifp->if_capenable)) { + if_printf(ifp, "enable txcsum first.\n"); + error = EAGAIN; + goto out; + } + ifp->if_capenable ^= IFCAP_TSO4; + ifp->if_hwassist ^= CSUM_IP_TSO; + } + if (mask & IFCAP_TSO6) { + if (!(IFCAP_TSO6 & ifp->if_capenable) && + !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) { + if_printf(ifp, "enable txcsum6 first.\n"); + error = EAGAIN; + goto out; + } + ifp->if_capenable ^= IFCAP_TSO6; + ifp->if_hwassist ^= CSUM_IP6_TSO; + } + + if (mask & IFCAP_VLAN_HWFILTER) { + if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) + mlx5e_disable_vlan_filter(priv); + else + mlx5e_enable_vlan_filter(priv); + + ifp->if_capenable ^= IFCAP_VLAN_HWFILTER; + } + if (mask & IFCAP_VLAN_HWTAGGING) + ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; + + if (mask & IFCAP_WOL_MAGIC) + ifp->if_capenable ^= IFCAP_WOL_MAGIC; + + VLAN_CAPABILITIES(ifp); + /* turn off LRO means also turn of HW LRO - if it's on */ + if (mask & IFCAP_LRO ) { + int was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); + bool need_restart = false; + + ifp->if_capenable ^= IFCAP_LRO; + if (!(ifp->if_capenable & IFCAP_LRO)) { + if (priv->params.hw_lro_en) { + priv->params.hw_lro_en = false; + need_restart = true; + /* Not sure this is the correct way */ + priv->params_ethtool.hw_lro = priv->params.hw_lro_en; + } + } + if (was_opened && need_restart) { + mlx5e_close_locked(ifp); + mlx5e_open_locked(ifp); + } + } +out: + PRIV_UNLOCK(priv); + break; + + case SIOCGI2C: + ifr = (struct ifreq *)data; + + /* Copy from the user-space address ifr_data to the kernel-space address i2c */ + error = copyin(ifr->ifr_data, &i2c, sizeof(i2c)); + if (error) + break; + + if (i2c.len > sizeof(i2c.data)) { + error = EINVAL; + break; + } + + PRIV_LOCK(priv); + /* Get module_num which is required for the query_eeprom */ + error = mlx5_query_module_num(priv->mdev, &module_num); + if (error) { + if_printf(ifp, "Query module num failed, eeprom " + "reading is not supported\n"); + goto err_i2c; + } + + /* + * Note that we ignore i2c.addr here. The driver hardcodes + * the address to 0x50, while standard expects it to be 0xA0. + */ + error = mlx5_query_eeprom(priv->mdev, + MLX5E_I2C_ADDR_LOW, MLX5E_EEPROM_LOW_PAGE, + (uint32_t)i2c.offset, (uint32_t)i2c.len, module_num, + (uint32_t *)i2c.data, &size_read); + if (error) { + if_printf(ifp, "Query eeprom failed, eeprom " + "reading is not supported\n"); + goto err_i2c; + } + + if (i2c.len > MLX5_EEPROM_MAX_BYTES) { + error = mlx5_query_eeprom(priv->mdev, + MLX5E_I2C_ADDR_LOW, MLX5E_EEPROM_LOW_PAGE, + (uint32_t)(i2c.offset + size_read), + (uint32_t)(i2c.len - size_read), module_num, + (uint32_t *)(i2c.data + size_read), &size_read); + } + if (error) { + if_printf(ifp, "Query eeprom failed, eeprom " + "reading is not supported\n"); + goto err_i2c; + } + + error = copyout(&i2c, ifr->ifr_data, sizeof(i2c)); +err_i2c: + PRIV_UNLOCK(priv); + break; + + default: + error = ether_ioctl(ifp, command, data); + break; + } + return (error); +} + +static int +mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev) +{ + /* + * TODO: uncoment once FW really sets all these bits if + * (!mdev->caps.eth.rss_ind_tbl_cap || !mdev->caps.eth.csum_cap || + * !mdev->caps.eth.max_lso_cap || !mdev->caps.eth.vlan_cap || + * !(mdev->caps.gen.flags & MLX5_DEV_CAP_FLAG_SCQE_BRK_MOD)) return + * -ENOTSUPP; + */ + + /* TODO: add more must-to-have features */ + + return (0); +} + +static void +mlx5e_build_ifp_priv(struct mlx5_core_dev *mdev, + struct mlx5e_priv *priv, + int num_comp_vectors) +{ + /* + * TODO: Consider link speed for setting "log_sq_size", + * "log_rq_size" and "cq_moderation_xxx": + */ + priv->params.log_sq_size = + MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE; + priv->params.log_rq_size = + MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE; + priv->params.rx_cq_moderation_usec = + MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ? + MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE : + MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC; + priv->params.rx_cq_moderation_mode = + MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ? 1 : 0; + priv->params.rx_cq_moderation_pkts = + MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS; + priv->params.tx_cq_moderation_usec = + MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC; + priv->params.tx_cq_moderation_pkts = + MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS; + priv->params.min_rx_wqes = + MLX5E_PARAMS_DEFAULT_MIN_RX_WQES; + priv->params.rx_hash_log_tbl_sz = + (order_base_2(num_comp_vectors) > + MLX5E_PARAMS_DEFAULT_RX_HASH_LOG_TBL_SZ) ? + order_base_2(num_comp_vectors) : + MLX5E_PARAMS_DEFAULT_RX_HASH_LOG_TBL_SZ; + priv->params.num_tc = 1; + priv->params.default_vlan_prio = 0; + priv->counter_set_id = -1; + + /* + * hw lro is currently defaulted to off. + * when it won't anymore we will consider the + * HW capability: "!!MLX5_CAP_ETH(mdev, lro_cap)" + */ + priv->params.hw_lro_en = false; + priv->params.lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ; + + priv->mdev = mdev; + priv->params.num_channels = num_comp_vectors; + priv->order_base_2_num_channels = order_base_2(num_comp_vectors); + priv->queue_mapping_channel_mask = + roundup_pow_of_two(num_comp_vectors) - 1; + priv->num_tc = priv->params.num_tc; + priv->default_vlan_prio = priv->params.default_vlan_prio; + + INIT_WORK(&priv->update_stats_work, mlx5e_update_stats_work); + INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work); + INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work); +} + +static int +mlx5e_create_mkey(struct mlx5e_priv *priv, u32 pdn, + struct mlx5_core_mr *mr) +{ + struct ifnet *ifp = priv->ifp; + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_create_mkey_mbox_in *in; + int err; + + in = mlx5_vzalloc(sizeof(*in)); + if (in == NULL) { + if_printf(ifp, "%s: failed to allocate inbox\n", __func__); + return (-ENOMEM); + } + in->seg.flags = MLX5_PERM_LOCAL_WRITE | + MLX5_PERM_LOCAL_READ | + MLX5_ACCESS_MODE_PA; + in->seg.flags_pd = cpu_to_be32(pdn | MLX5_MKEY_LEN64); + in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); + + err = mlx5_core_create_mkey(mdev, mr, in, sizeof(*in), NULL, NULL, + NULL); + if (err) + if_printf(ifp, "%s: mlx5_core_create_mkey failed, %d\n", + __func__, err); + + kvfree(in); + + return (err); +} + +static const char *mlx5e_vport_stats_desc[] = { + MLX5E_VPORT_STATS(MLX5E_STATS_DESC) +}; + +static const char *mlx5e_pport_stats_desc[] = { + MLX5E_PPORT_STATS(MLX5E_STATS_DESC) +}; + +static void +mlx5e_priv_mtx_init(struct mlx5e_priv *priv) +{ + mtx_init(&priv->async_events_mtx, "mlx5async", MTX_NETWORK_LOCK, MTX_DEF); + sx_init(&priv->state_lock, "mlx5state"); + callout_init_mtx(&priv->watchdog, &priv->async_events_mtx, 0); +} + +static void +mlx5e_priv_mtx_destroy(struct mlx5e_priv *priv) +{ + mtx_destroy(&priv->async_events_mtx); + sx_destroy(&priv->state_lock); +} + +static int +sysctl_firmware(SYSCTL_HANDLER_ARGS) +{ + /* %d.%d%.d the string format. + * fw_rev_{maj,min,sub} return u16, 2^16 = 65536. + * We need at most 5 chars to store that. + * it also has: two "." and NULL at the end. + * Which means we need 18 (5*3 + 3) chars at most. + */ + char fw[18]; + struct mlx5e_priv *priv = arg1; + int error; + + snprintf(fw, sizeof(fw), "%d.%d.%d", fw_rev_maj(priv->mdev), fw_rev_min(priv->mdev), + fw_rev_sub(priv->mdev)); + error = sysctl_handle_string(oidp, fw, sizeof(fw), req); + return (error); +} + +static void +mlx5e_add_hw_stats(struct mlx5e_priv *priv) +{ + SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_hw), + OID_AUTO, "fw_version", CTLTYPE_STRING | CTLFLAG_RD, priv, 0, + sysctl_firmware, "A", "HCA firmware version"); + + SYSCTL_ADD_STRING(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_hw), + OID_AUTO, "board_id", CTLFLAG_RD, priv->mdev->board_id, 0, + "Board ID"); +} + +static void * +mlx5e_create_ifp(struct mlx5_core_dev *mdev) +{ + static volatile int mlx5_en_unit; + struct ifnet *ifp; + struct mlx5e_priv *priv; + u8 dev_addr[ETHER_ADDR_LEN] __aligned(4); + struct sysctl_oid_list *child; + int ncv = mdev->priv.eq_table.num_comp_vectors; + char unit[16]; + int err; + int i; + u32 eth_proto_cap; + + if (mlx5e_check_required_hca_cap(mdev)) { + mlx5_core_dbg(mdev, "mlx5e_check_required_hca_cap() failed\n"); + return (NULL); + } + priv = malloc(sizeof(*priv), M_MLX5EN, M_WAITOK | M_ZERO); + if (priv == NULL) { + mlx5_core_err(mdev, "malloc() failed\n"); + return (NULL); + } + mlx5e_priv_mtx_init(priv); + + ifp = priv->ifp = if_alloc(IFT_ETHER); + if (ifp == NULL) { + mlx5_core_err(mdev, "if_alloc() failed\n"); + goto err_free_priv; + } + ifp->if_softc = priv; + if_initname(ifp, "mce", atomic_fetchadd_int(&mlx5_en_unit, 1)); + ifp->if_mtu = ETHERMTU; + ifp->if_init = mlx5e_open; + ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; + ifp->if_ioctl = mlx5e_ioctl; + ifp->if_transmit = mlx5e_xmit; + ifp->if_qflush = if_qflush; +#if (__FreeBSD_version >= 1100000) + ifp->if_get_counter = mlx5e_get_counter; +#endif + ifp->if_snd.ifq_maxlen = ifqmaxlen; + /* + * Set driver features + */ + ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6; + ifp->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING; + ifp->if_capabilities |= IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWFILTER; + ifp->if_capabilities |= IFCAP_LINKSTATE | IFCAP_JUMBO_MTU; + ifp->if_capabilities |= IFCAP_LRO; + ifp->if_capabilities |= IFCAP_TSO | IFCAP_VLAN_HWTSO; + + /* set TSO limits so that we don't have to drop TX packets */ + ifp->if_hw_tsomax = MLX5E_MAX_TX_PAYLOAD_SIZE - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); + ifp->if_hw_tsomaxsegcount = MLX5E_MAX_TX_MBUF_FRAGS - 1 /* hdr */; + ifp->if_hw_tsomaxsegsize = MLX5E_MAX_TX_MBUF_SIZE; + + ifp->if_capenable = ifp->if_capabilities; + ifp->if_hwassist = 0; + if (ifp->if_capenable & IFCAP_TSO) + ifp->if_hwassist |= CSUM_TSO; + if (ifp->if_capenable & IFCAP_TXCSUM) + ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP | CSUM_IP); + if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) + ifp->if_hwassist |= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6); + + /* ifnet sysctl tree */ + sysctl_ctx_init(&priv->sysctl_ctx); + priv->sysctl_ifnet = SYSCTL_ADD_NODE(&priv->sysctl_ctx, SYSCTL_STATIC_CHILDREN(_dev), + OID_AUTO, ifp->if_dname, CTLFLAG_RD, 0, "MLX5 ethernet - interface name"); + if (priv->sysctl_ifnet == NULL) { + mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n"); + goto err_free_sysctl; + } + snprintf(unit, sizeof(unit), "%d", ifp->if_dunit); + priv->sysctl_ifnet = SYSCTL_ADD_NODE(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet), + OID_AUTO, unit, CTLFLAG_RD, 0, "MLX5 ethernet - interface unit"); + if (priv->sysctl_ifnet == NULL) { + mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n"); + goto err_free_sysctl; + } + /* HW sysctl tree */ + child = SYSCTL_CHILDREN(device_get_sysctl_tree(mdev->pdev->dev.bsddev)); + priv->sysctl_hw = SYSCTL_ADD_NODE(&priv->sysctl_ctx, child, + OID_AUTO, "hw", CTLFLAG_RD, 0, "MLX5 ethernet dev hw"); + if (priv->sysctl_hw == NULL) { + mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n"); + goto err_free_sysctl; + } + + mlx5e_build_ifp_priv(mdev, priv, ncv); + + err = mlx5_alloc_map_uar(mdev, &priv->cq_uar); + if (err) { + if_printf(ifp, "%s: mlx5_alloc_map_uar failed, %d\n", + __func__, err); + goto err_free_sysctl; + } + err = mlx5_core_alloc_pd(mdev, &priv->pdn); + if (err) { + if_printf(ifp, "%s: mlx5_core_alloc_pd failed, %d\n", + __func__, err); + goto err_unmap_free_uar; + } + + err = mlx5_alloc_transport_domain(mdev, &priv->tdn); + + if (err) { + if_printf(ifp, "%s: mlx5_alloc_transport_domain failed, %d\n", + __func__, err); + goto err_dealloc_pd; + } + + err = mlx5e_create_mkey(priv, priv->pdn, &priv->mr); + if (err) { + if_printf(ifp, "%s: mlx5e_create_mkey failed, %d\n", + __func__, err); + goto err_dealloc_transport_domain; + } + mlx5_query_nic_vport_mac_address(priv->mdev, 0, dev_addr); + + /* set default MTU */ + mlx5e_set_dev_port_mtu(ifp, ifp->if_mtu); + + /* Set desc */ + device_set_desc(mdev->pdev->dev.bsddev, mlx5e_version); + + /* Set default media status */ + priv->media_status_last = IFM_AVALID; + priv->media_active_last = IFM_ETHER | IFM_AUTO; + + /* Pauseframes are enabled by default */ + priv->params_ethtool.tx_pauseframe_control = 1; + priv->params_ethtool.rx_pauseframe_control = 1; + + err = mlx5_query_port_proto_cap(mdev, ð_proto_cap, MLX5_PTYS_EN); + if (err) { + eth_proto_cap = 0; + if_printf(ifp, "%s: Query port media capability failed, %d\n", + __func__, err); + } + + /* Setup supported medias */ + ifmedia_init(&priv->media, IFM_IMASK | IFM_ETH_FMASK, + mlx5e_media_change, mlx5e_media_status); + + for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) { + if (mlx5e_mode_table[i].baudrate == 0) + continue; + if (MLX5E_PROT_MASK(i) & eth_proto_cap) + ifmedia_add(&priv->media, + IFM_ETHER | mlx5e_mode_table[i].subtype | + IFM_FDX, 0, NULL); + } + + ifmedia_add(&priv->media, IFM_ETHER | IFM_AUTO, 0, NULL); + ifmedia_set(&priv->media, IFM_ETHER | IFM_AUTO); + ether_ifattach(ifp, dev_addr); + + /* Register for VLAN events */ + priv->vlan_attach = EVENTHANDLER_REGISTER(vlan_config, + mlx5e_vlan_rx_add_vid, priv, EVENTHANDLER_PRI_FIRST); + priv->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, + mlx5e_vlan_rx_kill_vid, priv, EVENTHANDLER_PRI_FIRST); + + /* Link is down by default */ + if_link_state_change(ifp, LINK_STATE_DOWN); + + mlx5e_enable_async_events(priv); + + mlx5e_add_hw_stats(priv); + + mlx5e_create_stats(&priv->stats.vport.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet), + "vstats", mlx5e_vport_stats_desc, MLX5E_VPORT_STATS_NUM, + priv->stats.vport.arg); + + mlx5e_create_stats(&priv->stats.pport.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet), + "pstats", mlx5e_pport_stats_desc, MLX5E_PPORT_STATS_NUM, + priv->stats.pport.arg); + + mlx5e_create_ethtool(priv); + + mtx_lock(&priv->async_events_mtx); + mlx5e_update_stats(priv); + mtx_unlock(&priv->async_events_mtx); + + return (priv); + +err_dealloc_transport_domain: + mlx5_dealloc_transport_domain(mdev, priv->tdn); + +err_dealloc_pd: + mlx5_core_dealloc_pd(mdev, priv->pdn); + +err_unmap_free_uar: + mlx5_unmap_free_uar(mdev, &priv->cq_uar); + +err_free_sysctl: + sysctl_ctx_free(&priv->sysctl_ctx); + + if_free(ifp); + +err_free_priv: + mlx5e_priv_mtx_destroy(priv); + free(priv, M_MLX5EN); + return (NULL); +} + +static void +mlx5e_destroy_ifp(struct mlx5_core_dev *mdev, void *vpriv) +{ + struct mlx5e_priv *priv = vpriv; + struct ifnet *ifp = priv->ifp; + + /* don't allow more IOCTLs */ + priv->gone = 1; + + /* XXX wait a bit to allow IOCTL handlers to complete */ + pause("W", hz); + + /* stop watchdog timer */ + callout_drain(&priv->watchdog); + + if (priv->vlan_attach != NULL) + EVENTHANDLER_DEREGISTER(vlan_config, priv->vlan_attach); + if (priv->vlan_detach != NULL) + EVENTHANDLER_DEREGISTER(vlan_unconfig, priv->vlan_detach); + + /* make sure device gets closed */ + PRIV_LOCK(priv); + mlx5e_close_locked(ifp); + PRIV_UNLOCK(priv); + + /* unregister device */ + ifmedia_removeall(&priv->media); + ether_ifdetach(ifp); + if_free(ifp); + + /* destroy all remaining sysctl nodes */ + if (priv->sysctl_debug) + sysctl_ctx_free(&priv->stats.port_stats_debug.ctx); + sysctl_ctx_free(&priv->stats.vport.ctx); + sysctl_ctx_free(&priv->stats.pport.ctx); + sysctl_ctx_free(&priv->sysctl_ctx); + + mlx5_core_destroy_mkey(priv->mdev, &priv->mr); + mlx5_dealloc_transport_domain(priv->mdev, priv->tdn); + mlx5_core_dealloc_pd(priv->mdev, priv->pdn); + mlx5_unmap_free_uar(priv->mdev, &priv->cq_uar); + mlx5e_disable_async_events(priv); + flush_scheduled_work(); + mlx5e_priv_mtx_destroy(priv); + free(priv, M_MLX5EN); +} + +static void * +mlx5e_get_ifp(void *vpriv) +{ + struct mlx5e_priv *priv = vpriv; + + return (priv->ifp); +} + +static struct mlx5_interface mlx5e_interface = { + .add = mlx5e_create_ifp, + .remove = mlx5e_destroy_ifp, + .event = mlx5e_async_event, + .protocol = MLX5_INTERFACE_PROTOCOL_ETH, + .get_dev = mlx5e_get_ifp, +}; + +void +mlx5e_init(void) +{ + mlx5_register_interface(&mlx5e_interface); +} + +void +mlx5e_cleanup(void) +{ + mlx5_unregister_interface(&mlx5e_interface); +} + +module_init_order(mlx5e_init, SI_ORDER_THIRD); +module_exit_order(mlx5e_cleanup, SI_ORDER_THIRD); + +#if (__FreeBSD_version >= 1100000) +MODULE_DEPEND(mlx5en, linuxkpi, 1, 1, 1); +#endif +MODULE_DEPEND(mlx5en, mlx5, 1, 1, 1); +MODULE_VERSION(mlx5en, 1); diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c b/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c new file mode 100644 index 0000000..bce4915 --- /dev/null +++ b/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c @@ -0,0 +1,340 @@ +/*- + * Copyright (c) 2015 Mellanox Technologies. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include "en.h" +#include <machine/in_cksum.h> + +static inline int +mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, + struct mlx5e_rx_wqe *wqe, u16 ix) +{ + bus_dma_segment_t segs[1]; + struct mbuf *mb; + int nsegs; + int err; + + if (rq->mbuf[ix].mbuf != NULL) + return (0); + + mb = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, rq->wqe_sz); + if (unlikely(!mb)) + return (-ENOMEM); + + /* set initial mbuf length */ + mb->m_pkthdr.len = mb->m_len = rq->wqe_sz; + + /* get IP header aligned */ + m_adj(mb, MLX5E_NET_IP_ALIGN); + + err = -bus_dmamap_load_mbuf_sg(rq->dma_tag, rq->mbuf[ix].dma_map, + mb, segs, &nsegs, BUS_DMA_NOWAIT); + if (err != 0) + goto err_free_mbuf; + if (unlikely(nsegs != 1)) { + bus_dmamap_unload(rq->dma_tag, rq->mbuf[ix].dma_map); + err = -ENOMEM; + goto err_free_mbuf; + } + wqe->data.addr = cpu_to_be64(segs[0].ds_addr); + + rq->mbuf[ix].mbuf = mb; + rq->mbuf[ix].data = mb->m_data; + + bus_dmamap_sync(rq->dma_tag, rq->mbuf[ix].dma_map, + BUS_DMASYNC_PREREAD); + return (0); + +err_free_mbuf: + m_freem(mb); + return (err); +} + +static void +mlx5e_post_rx_wqes(struct mlx5e_rq *rq) +{ + if (unlikely(rq->enabled == 0)) + return; + + while (!mlx5_wq_ll_is_full(&rq->wq)) { + struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, rq->wq.head); + + if (unlikely(mlx5e_alloc_rx_wqe(rq, wqe, rq->wq.head))) + break; + + mlx5_wq_ll_push(&rq->wq, be16_to_cpu(wqe->next.next_wqe_index)); + } + + /* ensure wqes are visible to device before updating doorbell record */ + wmb(); + + mlx5_wq_ll_update_db_record(&rq->wq); +} + +static void +mlx5e_lro_update_hdr(struct mbuf* mb, struct mlx5_cqe64 *cqe) +{ + /* TODO: consider vlans, ip options, ... */ + struct ether_header *eh; + uint16_t eh_type; + struct ip6_hdr *ip6 = NULL; + struct ip *ip4 = NULL; + struct tcphdr *th; + uint32_t *ts_ptr; + + eh = mtod(mb, struct ether_header *); + eh_type = ntohs(eh->ether_type); + + u8 l4_hdr_type = get_cqe_l4_hdr_type(cqe); + int tcp_ack = ((CQE_L4_HDR_TYPE_TCP_ACK_NO_DATA == l4_hdr_type) || + (CQE_L4_HDR_TYPE_TCP_ACK_AND_DATA == l4_hdr_type)); + + /* TODO: consider vlan */ + u16 tot_len = be32_to_cpu(cqe->byte_cnt) - ETHER_HDR_LEN; + + switch (eh_type) { + case ETHERTYPE_IP: + ip4 = (struct ip *)(eh + 1); + th = (struct tcphdr *)(ip4 + 1); + break; + case ETHERTYPE_IPV6: + ip6 = (struct ip6_hdr *)(eh + 1); + th = (struct tcphdr *)(ip6 + 1); + break; + default: + return; + } + + ts_ptr = (uint32_t *)(th + 1); + + if (get_cqe_lro_tcppsh(cqe)) + th->th_flags |= TH_PUSH; + + if (tcp_ack) { + th->th_flags |= TH_ACK; + th->th_ack = cqe->lro_ack_seq_num; + th->th_win = cqe->lro_tcp_win; + + /* FreeBSD handles only 32bit aligned timestamp + * right after the TCP hdr + * +--------+--------+--------+--------+ + * | NOP | NOP | TSopt | 10 | + * +--------+--------+--------+--------+ + * | TSval timestamp | + * +--------+--------+--------+--------+ + * | TSecr timestamp | + * +--------+--------+--------+--------+ + */ + if (get_cqe_lro_timestamp_valid(cqe) && + (__predict_true(*ts_ptr) == ntohl(TCPOPT_NOP << 24 | + TCPOPT_NOP << 16 | TCPOPT_TIMESTAMP << 8 | + TCPOLEN_TIMESTAMP))) { + /* cqe->timestamp is 64bit long. + * [0-31] - timestamp. + * [32-64] - timestamp echo replay. + */ + ts_ptr[1] = *(uint32_t *)&cqe->timestamp; + ts_ptr[2] = *((uint32_t *)&cqe->timestamp + 1); + } + } + + if (ip4) { + ip4->ip_ttl = cqe->lro_min_ttl; + ip4->ip_len = cpu_to_be16(tot_len); + ip4->ip_sum = 0; + ip4->ip_sum = in_cksum(mb, ip4->ip_hl << 2); + } else { + ip6->ip6_hlim = cqe->lro_min_ttl; + ip6->ip6_plen = cpu_to_be16(tot_len - + sizeof(struct ip6_hdr)); + } + /* TODO: handle tcp checksum */ +} + +static inline void +mlx5e_build_rx_mbuf(struct mlx5_cqe64 *cqe, + struct mlx5e_rq *rq, struct mbuf *mb, + u32 cqe_bcnt) +{ + struct ifnet *ifp = rq->ifp; + int lro_num_seg; /* HW LRO session aggregated packets counter */ + + lro_num_seg = be32_to_cpu(cqe->srqn) >> 24; + if (lro_num_seg > 1) { + mlx5e_lro_update_hdr(mb, cqe); + rq->stats.lro_packets++; + rq->stats.lro_bytes += cqe_bcnt; + } + + mb->m_pkthdr.len = mb->m_len = cqe_bcnt; + /* check if a Toeplitz hash was computed */ + if (cqe->rss_hash_type != 0) + mb->m_pkthdr.flowid = be32_to_cpu(cqe->rss_hash_result); + else + mb->m_pkthdr.flowid = rq->ix; + M_HASHTYPE_SET(mb, M_HASHTYPE_OPAQUE); + mb->m_pkthdr.rcvif = ifp; + + if (likely(ifp->if_capenable & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) && + ((cqe->hds_ip_ext & (CQE_L2_OK | CQE_L3_OK | CQE_L4_OK)) == + (CQE_L2_OK | CQE_L3_OK | CQE_L4_OK))) { + mb->m_pkthdr.csum_flags = + CSUM_IP_CHECKED | CSUM_IP_VALID | + CSUM_DATA_VALID | CSUM_PSEUDO_HDR; + mb->m_pkthdr.csum_data = htons(0xffff); + } else { + rq->stats.csum_none++; + } + + if (cqe_has_vlan(cqe)) { + mb->m_pkthdr.ether_vtag = be16_to_cpu(cqe->vlan_info); + mb->m_flags |= M_VLANTAG; + } +} + +static int +mlx5e_poll_rx_cq(struct mlx5e_rq *rq, int budget) +{ +#ifndef HAVE_TURBO_LRO + struct lro_entry *queued; +#endif + int i; + + for (i = 0; i < budget; i++) { + struct mlx5e_rx_wqe *wqe; + struct mlx5_cqe64 *cqe; + struct mbuf *mb; + __be16 wqe_counter_be; + u16 wqe_counter; + u32 byte_cnt; + + cqe = mlx5e_get_cqe(&rq->cq); + if (!cqe) + break; + + wqe_counter_be = cqe->wqe_counter; + wqe_counter = be16_to_cpu(wqe_counter_be); + wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_counter); + byte_cnt = be32_to_cpu(cqe->byte_cnt); + + bus_dmamap_sync(rq->dma_tag, + rq->mbuf[wqe_counter].dma_map, + BUS_DMASYNC_POSTREAD); + + if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) { + rq->stats.wqe_err++; + goto wq_ll_pop; + } + + if (MHLEN >= byte_cnt && + (mb = m_gethdr(M_NOWAIT, MT_DATA)) != NULL) { + bcopy(rq->mbuf[wqe_counter].data, mtod(mb, caddr_t), + byte_cnt); + } else { + mb = rq->mbuf[wqe_counter].mbuf; + rq->mbuf[wqe_counter].mbuf = NULL; /* safety clear */ + + bus_dmamap_unload(rq->dma_tag, + rq->mbuf[wqe_counter].dma_map); + } + + mlx5e_build_rx_mbuf(cqe, rq, mb, byte_cnt); + rq->stats.packets++; +#ifdef HAVE_TURBO_LRO + if (mb->m_pkthdr.csum_flags == 0 || + (rq->ifp->if_capenable & IFCAP_LRO) == 0 || + rq->lro.mbuf == NULL) { + /* normal input */ + rq->ifp->if_input(rq->ifp, mb); + } else { + tcp_tlro_rx(&rq->lro, mb); + } +#else + if (mb->m_pkthdr.csum_flags == 0 || + (rq->ifp->if_capenable & IFCAP_LRO) == 0 || + rq->lro.lro_cnt == 0 || + tcp_lro_rx(&rq->lro, mb, 0) != 0) { + rq->ifp->if_input(rq->ifp, mb); + } +#endif +wq_ll_pop: + mlx5_wq_ll_pop(&rq->wq, wqe_counter_be, + &wqe->next.next_wqe_index); + } + + mlx5_cqwq_update_db_record(&rq->cq.wq); + + /* ensure cq space is freed before enabling more cqes */ + wmb(); +#ifndef HAVE_TURBO_LRO + while ((queued = SLIST_FIRST(&rq->lro.lro_active)) != NULL) { + SLIST_REMOVE_HEAD(&rq->lro.lro_active, next); + tcp_lro_flush(&rq->lro, queued); + } +#endif + return (i); +} + +void +mlx5e_rx_cq_comp(struct mlx5_core_cq *mcq) +{ + struct mlx5e_rq *rq = container_of(mcq, struct mlx5e_rq, cq.mcq); + int i = 0; + +#ifdef HAVE_PER_CQ_EVENT_PACKET + struct mbuf *mb = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, rq->wqe_sz); + if (mb != NULL) { + /* this code is used for debugging purpose only */ + mb->m_pkthdr.len = mb->m_len = 15; + memset(mb->m_data, 255, 14); + mb->m_data[14] = rq->ix; + mb->m_pkthdr.rcvif = rq->ifp; + rq->ifp->if_input(rq->ifp, mb); + } +#endif + + mtx_lock(&rq->mtx); + + /* + * Polling the entire CQ without posting new WQEs results in + * lack of receive WQEs during heavy traffic scenarios. + */ + while (1) { + if (mlx5e_poll_rx_cq(rq, MLX5E_RX_BUDGET_MAX) != + MLX5E_RX_BUDGET_MAX) + break; + i += MLX5E_RX_BUDGET_MAX; + if (i >= MLX5E_BUDGET_MAX) + break; + mlx5e_post_rx_wqes(rq); + } + mlx5e_post_rx_wqes(rq); + mlx5e_cq_arm(&rq->cq); +#ifdef HAVE_TURBO_LRO + tcp_tlro_flush(&rq->lro, 1); +#endif + mtx_unlock(&rq->mtx); +} diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_tx.c b/sys/dev/mlx5/mlx5_en/mlx5_en_tx.c new file mode 100644 index 0000000..3b69f84 --- /dev/null +++ b/sys/dev/mlx5/mlx5_en/mlx5_en_tx.c @@ -0,0 +1,485 @@ +/*- + * Copyright (c) 2015 Mellanox Technologies. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include "en.h" +#include <machine/atomic.h> + +void +mlx5e_send_nop(struct mlx5e_sq *sq, u32 ds_cnt, bool notify_hw) +{ + u16 pi = sq->pc & sq->wq.sz_m1; + struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi); + + memset(&wqe->ctrl, 0, sizeof(wqe->ctrl)); + + wqe->ctrl.opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_NOP); + wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); + wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; + + sq->mbuf[pi].mbuf = NULL; + sq->mbuf[pi].num_bytes = 0; + sq->mbuf[pi].num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS); + sq->pc += sq->mbuf[pi].num_wqebbs; + if (notify_hw) + mlx5e_tx_notify_hw(sq, wqe, 0); +} + +#if (__FreeBSD_version >= 1100000) +static uint32_t mlx5e_hash_value; + +static void +mlx5e_hash_init(void *arg) +{ + mlx5e_hash_value = m_ether_tcpip_hash_init(); +} + +/* Make kernel call mlx5e_hash_init after the random stack finished initializing */ +SYSINIT(mlx5e_hash_init, SI_SUB_RANDOM, SI_ORDER_ANY, &mlx5e_hash_init, NULL); +#endif + +static struct mlx5e_sq * +mlx5e_select_queue(struct ifnet *ifp, struct mbuf *mb) +{ + struct mlx5e_priv *priv = ifp->if_softc; + u32 ch; + u32 tc; + + /* check if channels are successfully opened */ + if (unlikely(priv->channel == NULL)) + return (NULL); + + /* obtain VLAN information if present */ + if (mb->m_flags & M_VLANTAG) { + tc = (mb->m_pkthdr.ether_vtag >> 13); + if (tc >= priv->num_tc) + tc = priv->default_vlan_prio; + } else { + tc = priv->default_vlan_prio; + } + + ch = priv->params.num_channels; + + /* check if flowid is set */ + if (M_HASHTYPE_GET(mb) != M_HASHTYPE_NONE) { + ch = (mb->m_pkthdr.flowid % 128) % ch; + } else { +#if (__FreeBSD_version >= 1100000) + ch = m_ether_tcpip_hash(MBUF_HASHFLAG_L3 | + MBUF_HASHFLAG_L4, mb, mlx5e_hash_value) % ch; +#else + /* + * m_ether_tcpip_hash not present in stable, so just + * throw unhashed mbufs on queue 0 + */ + ch = 0; +#endif + } + + /* check if channel is allocated */ + if (unlikely(priv->channel[ch] == NULL)) + return (NULL); + + return (&priv->channel[ch]->sq[tc]); +} + +static inline u16 +mlx5e_get_inline_hdr_size(struct mlx5e_sq *sq, struct mbuf *mb) +{ + return (MIN(MLX5E_MAX_TX_INLINE, mb->m_len)); +} + +static int +mlx5e_get_header_size(struct mbuf *mb) +{ + struct ether_vlan_header *eh; + struct tcphdr *th; + struct ip *ip; + int ip_hlen, tcp_hlen; + struct ip6_hdr *ip6; + uint16_t eth_type; + int eth_hdr_len; + + eh = mtod(mb, struct ether_vlan_header *); + if (mb->m_len < ETHER_HDR_LEN) + return (0); + if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { + eth_type = ntohs(eh->evl_proto); + eth_hdr_len = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; + } else { + eth_type = ntohs(eh->evl_encap_proto); + eth_hdr_len = ETHER_HDR_LEN; + } + if (mb->m_len < eth_hdr_len) + return (0); + switch (eth_type) { + case ETHERTYPE_IP: + ip = (struct ip *)(mb->m_data + eth_hdr_len); + if (mb->m_len < eth_hdr_len + sizeof(*ip)) + return (0); + if (ip->ip_p != IPPROTO_TCP) + return (0); + ip_hlen = ip->ip_hl << 2; + eth_hdr_len += ip_hlen; + break; + case ETHERTYPE_IPV6: + ip6 = (struct ip6_hdr *)(mb->m_data + eth_hdr_len); + if (mb->m_len < eth_hdr_len + sizeof(*ip6)) + return (0); + if (ip6->ip6_nxt != IPPROTO_TCP) + return (0); + eth_hdr_len += sizeof(*ip6); + break; + default: + return (0); + } + if (mb->m_len < eth_hdr_len + sizeof(*th)) + return (0); + th = (struct tcphdr *)(mb->m_data + eth_hdr_len); + tcp_hlen = th->th_off << 2; + eth_hdr_len += tcp_hlen; + if (mb->m_len < eth_hdr_len) + return (0); + return (eth_hdr_len); +} + +/* The return value is not going back to the stack because of + * the drbr */ +static int +mlx5e_sq_xmit(struct mlx5e_sq *sq, struct mbuf **mbp) +{ + bus_dma_segment_t segs[MLX5E_MAX_TX_MBUF_FRAGS]; + struct mlx5_wqe_data_seg *dseg; + struct mlx5e_tx_wqe *wqe; + struct ifnet *ifp; + int nsegs; + int err; + int x; + struct mbuf *mb = *mbp; + u16 ds_cnt; + u16 ihs; + u16 pi; + u8 opcode; + + /* Return ENOBUFS if the queue is full, this may trigger reinsertion + * of the mbuf into the drbr (see mlx5e_xmit_locked) */ + if (unlikely(!mlx5e_sq_has_room_for(sq, 2 * MLX5_SEND_WQE_MAX_WQEBBS))) { + return (ENOBUFS); + } + + /* Align SQ edge with NOPs to avoid WQE wrap around */ + pi = ((~sq->pc) & sq->wq.sz_m1); + if (pi < (MLX5_SEND_WQE_MAX_WQEBBS - 1)) { + /* send one multi NOP message instead of many */ + mlx5e_send_nop(sq, (pi + 1) * MLX5_SEND_WQEBB_NUM_DS, false); + pi = ((~sq->pc) & sq->wq.sz_m1); + if (pi < (MLX5_SEND_WQE_MAX_WQEBBS - 1)) { + m_freem(mb); + return (ENOMEM); + } + } + + /* Setup local variables */ + pi = sq->pc & sq->wq.sz_m1; + wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi); + ifp = sq->channel->ifp; + + memset(wqe, 0, sizeof(*wqe)); + + /* send a copy of the frame to the BPF listener, if any */ + if (ifp != NULL && ifp->if_bpf != NULL) + ETHER_BPF_MTAP(ifp, mb); + + if (mb->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TSO)) { + wqe->eth.cs_flags |= MLX5_ETH_WQE_L3_CSUM; + } + if (mb->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP | CSUM_UDP_IPV6 | CSUM_TCP_IPV6 | CSUM_TSO)) { + wqe->eth.cs_flags |= MLX5_ETH_WQE_L4_CSUM; + } + if ( wqe->eth.cs_flags == 0 ) { + sq->stats.csum_offload_none++; + } + + if (mb->m_pkthdr.csum_flags & CSUM_TSO) { + u32 payload_len; + u32 mss = mb->m_pkthdr.tso_segsz; + u32 num_pkts; + + wqe->eth.mss = cpu_to_be16(mss); + opcode = MLX5_OPCODE_LSO; + ihs = mlx5e_get_header_size(mb); + payload_len = mb->m_pkthdr.len - ihs; + if (payload_len == 0) + num_pkts = 1; + else + num_pkts = DIV_ROUND_UP(payload_len, mss); + sq->mbuf[pi].num_bytes = payload_len + (num_pkts * ihs); + + sq->stats.tso_packets++; + sq->stats.tso_bytes += payload_len; + } else { + opcode = MLX5_OPCODE_SEND; + ihs = mlx5e_get_inline_hdr_size(sq, mb); + sq->mbuf[pi].num_bytes = max_t (unsigned int, + mb->m_pkthdr.len, ETHER_MIN_LEN - ETHER_CRC_LEN); + } + if (mb->m_flags & M_VLANTAG) { + struct ether_vlan_header *eh = + (struct ether_vlan_header *)wqe->eth.inline_hdr_start; + /* range checks */ + if (ihs > (MLX5E_MAX_TX_INLINE - ETHER_VLAN_ENCAP_LEN)) + ihs = (MLX5E_MAX_TX_INLINE - ETHER_VLAN_ENCAP_LEN); + else if (ihs < ETHER_HDR_LEN) { + err = EINVAL; + goto tx_drop; + } + m_copydata(mb, 0, ETHER_HDR_LEN, (caddr_t)eh); + m_adj(mb, ETHER_HDR_LEN); + /* insert 4 bytes VLAN tag into data stream */ + eh->evl_proto = eh->evl_encap_proto; + eh->evl_encap_proto = htons(ETHERTYPE_VLAN); + eh->evl_tag = htons(mb->m_pkthdr.ether_vtag); + /* copy rest of header data, if any */ + m_copydata(mb, 0, ihs - ETHER_HDR_LEN, (caddr_t)(eh + 1)); + m_adj(mb, ihs - ETHER_HDR_LEN); + /* extend header by 4 bytes */ + ihs += ETHER_VLAN_ENCAP_LEN; + } else { + m_copydata(mb, 0, ihs, wqe->eth.inline_hdr_start); + m_adj(mb, ihs); + } + + wqe->eth.inline_hdr_sz = cpu_to_be16(ihs); + + ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS; + if (likely(ihs > sizeof(wqe->eth.inline_hdr_start))) { + ds_cnt += DIV_ROUND_UP(ihs - sizeof(wqe->eth.inline_hdr_start), + MLX5_SEND_WQE_DS); + } + dseg = ((struct mlx5_wqe_data_seg *)&wqe->ctrl) + ds_cnt; + + /* trim off empty mbufs */ + while (mb->m_len == 0) { + mb = m_free(mb); + /* check if all data has been inlined */ + if (mb == NULL) + goto skip_dma; + } + + err = bus_dmamap_load_mbuf_sg(sq->dma_tag, sq->mbuf[pi].dma_map, + mb, segs, &nsegs, BUS_DMA_NOWAIT); + if (err == EFBIG) { + /* Update *mbp before defrag in case it was trimmed in the loop above */ + *mbp = mb; + /* Update statistics */ + sq->stats.defragged++; + /* Too many mbuf fragments */ + mb = m_defrag(*mbp, M_NOWAIT); + if (mb == NULL) { + mb = *mbp; + goto tx_drop; + } + /* Try again */ + err = bus_dmamap_load_mbuf_sg(sq->dma_tag, sq->mbuf[pi].dma_map, + mb, segs, &nsegs, BUS_DMA_NOWAIT); + } + /* catch errors */ + if (err != 0) { + goto tx_drop; + } + *mbp = mb; + + for (x = 0; x != nsegs; x++) { + if (segs[x].ds_len == 0) + continue; + dseg->addr = cpu_to_be64((uint64_t)segs[x].ds_addr); + dseg->lkey = sq->mkey_be; + dseg->byte_count = cpu_to_be32((uint32_t)segs[x].ds_len); + dseg++; + } +skip_dma: + ds_cnt = (dseg - ((struct mlx5_wqe_data_seg *)&wqe->ctrl)); + + wqe->ctrl.opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode); + wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); + wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; + + /* store pointer to mbuf */ + sq->mbuf[pi].mbuf = mb; + sq->mbuf[pi].num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS); + sq->pc += sq->mbuf[pi].num_wqebbs; + + /* make sure all mbuf data is written to RAM */ + if (mb != NULL) + bus_dmamap_sync(sq->dma_tag, sq->mbuf[pi].dma_map, BUS_DMASYNC_PREWRITE); + + mlx5e_tx_notify_hw(sq, wqe, 0); + + sq->stats.packets++; + return (0); + +tx_drop: + sq->stats.dropped++; + *mbp = NULL; + m_freem(mb); + return err; +} + +static void +mlx5e_poll_tx_cq(struct mlx5e_sq *sq, int budget) +{ + u16 sqcc; + + /* + * sq->cc must be updated only after mlx5_cqwq_update_db_record(), + * otherwise a cq overrun may occur + */ + sqcc = sq->cc; + + while (budget--) { + struct mlx5_cqe64 *cqe; + struct mbuf *mb; + u16 ci; + + cqe = mlx5e_get_cqe(&sq->cq); + if (!cqe) + break; + + ci = sqcc & sq->wq.sz_m1; + mb = sq->mbuf[ci].mbuf; + sq->mbuf[ci].mbuf = NULL; /* safety clear */ + + if (mb == NULL) { + if (sq->mbuf[ci].num_bytes == 0) { + /* NOP */ + sq->stats.nop++; + } + } else { + bus_dmamap_sync(sq->dma_tag, sq->mbuf[ci].dma_map, + BUS_DMASYNC_POSTWRITE); + bus_dmamap_unload(sq->dma_tag, sq->mbuf[ci].dma_map); + + /* free transmitted mbuf */ + m_freem(mb); + } + sqcc += sq->mbuf[ci].num_wqebbs; + } + + mlx5_cqwq_update_db_record(&sq->cq.wq); + + /* ensure cq space is freed before enabling more cqes */ + wmb(); + + sq->cc = sqcc; + + if (atomic_cmpset_int(&sq->queue_state, MLX5E_SQ_FULL, MLX5E_SQ_READY)) + taskqueue_enqueue(sq->sq_tq, &sq->sq_task); +} + +static int +mlx5e_xmit_locked(struct ifnet *ifp, struct mlx5e_sq *sq, struct mbuf *mb) +{ + struct mbuf *next; + int err = 0; + + if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { + if (mb) + err = drbr_enqueue(ifp, sq->br, mb); + return (err); + } + + if (mb != NULL) + /* If we can't insert mbuf into drbr, try to xmit anyway. + * We keep the error we got so we could return that after xmit. + */ + err = drbr_enqueue(ifp, sq->br, mb); + + /* Process the queue */ + while ((next = drbr_peek(ifp, sq->br)) != NULL) { + if (mlx5e_sq_xmit(sq, &next) != 0) { + if (next == NULL) { + drbr_advance(ifp, sq->br); + } else { + drbr_putback(ifp, sq->br, next); + atomic_store_rel_int(&sq->queue_state, MLX5E_SQ_FULL); + } + break; + } + drbr_advance(ifp, sq->br); + if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) + break; + } + return (err); +} + +int +mlx5e_xmit(struct ifnet *ifp, struct mbuf *mb) +{ + struct mlx5e_sq *sq; + int ret; + + sq = mlx5e_select_queue(ifp, mb); + if (unlikely(sq == NULL)) { + /* invalid send queue */ + m_freem(mb); + return (ENXIO); + } + + if (mtx_trylock(&sq->lock)) { + ret = mlx5e_xmit_locked(ifp, sq, mb); + mtx_unlock(&sq->lock); + } else { + ret = drbr_enqueue(ifp, sq->br, mb); + taskqueue_enqueue(sq->sq_tq, &sq->sq_task); + } + + return (ret); +} + +void +mlx5e_tx_cq_comp(struct mlx5_core_cq *mcq) +{ + struct mlx5e_sq *sq = container_of(mcq, struct mlx5e_sq, cq.mcq); + + mtx_lock(&sq->comp_lock); + mlx5e_poll_tx_cq(sq, MLX5E_BUDGET_MAX); + mlx5e_cq_arm(&sq->cq); + mtx_unlock(&sq->comp_lock); +} + +void +mlx5e_tx_que(void *context, int pending) +{ + struct mlx5e_sq *sq = context; + struct ifnet *ifp = sq->channel->ifp; + + if (ifp->if_drv_flags & IFF_DRV_RUNNING) { + mtx_lock(&sq->lock); + if (!drbr_empty(ifp, sq->br)) + mlx5e_xmit_locked(ifp, sq, NULL); + mtx_unlock(&sq->lock); + } +} diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_txrx.c b/sys/dev/mlx5/mlx5_en/mlx5_en_txrx.c new file mode 100644 index 0000000..7836bfe --- /dev/null +++ b/sys/dev/mlx5/mlx5_en/mlx5_en_txrx.c @@ -0,0 +1,58 @@ +/*- + * Copyright (c) 2015 Mellanox Technologies. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include "en.h" + +struct mlx5_cqe64 * +mlx5e_get_cqe(struct mlx5e_cq *cq) +{ + struct mlx5_cqe64 *cqe; + + cqe = mlx5_cqwq_get_wqe(&cq->wq, mlx5_cqwq_get_ci(&cq->wq)); + + if ((cqe->op_own ^ mlx5_cqwq_get_wrap_cnt(&cq->wq)) & MLX5_CQE_OWNER_MASK) + return (NULL); + + mlx5_cqwq_pop(&cq->wq); + + /* ensure cqe content is read after cqe ownership bit */ + rmb(); + + return (cqe); +} + +void +mlx5e_cq_error_event(struct mlx5_core_cq *mcq, int event) +{ + struct mlx5e_cq *cq = container_of(mcq, struct mlx5e_cq, mcq); + struct mlx5e_channel *c = cq->channel; + struct mlx5e_priv *priv = c->priv; + struct ifnet *ifp = priv->ifp; + + if_printf(ifp, "%s: cqn=0x%.6x event=0x%.2x\n", + __func__, mcq->cqn, event); +} diff --git a/sys/dev/mlx5/mlx5_en/tcp_tlro.c b/sys/dev/mlx5/mlx5_en/tcp_tlro.c new file mode 100644 index 0000000..27e861e --- /dev/null +++ b/sys/dev/mlx5/mlx5_en/tcp_tlro.c @@ -0,0 +1,697 @@ +/*- + * Copyright (c) 2015 Mellanox Technologies. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include "opt_inet.h" +#include "opt_inet6.h" + +#include <sys/param.h> +#include <sys/libkern.h> +#include <sys/mbuf.h> +#include <sys/lock.h> +#include <sys/mutex.h> +#include <sys/sysctl.h> +#include <sys/malloc.h> +#include <sys/kernel.h> +#include <sys/endian.h> +#include <sys/socket.h> +#include <sys/sockopt.h> +#include <sys/smp.h> + +#include <net/if.h> +#include <net/if_var.h> +#include <net/ethernet.h> + +#if defined(INET) || defined(INET6) +#include <netinet/in.h> +#endif + +#ifdef INET +#include <netinet/ip.h> +#endif + +#ifdef INET6 +#include <netinet/ip6.h> +#endif + +#include <netinet/tcp_var.h> + +#include "tcp_tlro.h" + +#ifndef M_HASHTYPE_LRO_TCP +#ifndef KLD_MODULE +#warning "M_HASHTYPE_LRO_TCP is not defined" +#endif +#define M_HASHTYPE_LRO_TCP 254 +#endif + +static SYSCTL_NODE(_net_inet_tcp, OID_AUTO, tlro, + CTLFLAG_RW, 0, "TCP turbo LRO parameters"); + +static MALLOC_DEFINE(M_TLRO, "TLRO", "Turbo LRO"); + +static int tlro_min_rate = 20; /* Hz */ + +SYSCTL_INT(_net_inet_tcp_tlro, OID_AUTO, min_rate, CTLFLAG_RWTUN, + &tlro_min_rate, 0, "Minimum serving rate in Hz"); + +static int tlro_max_packet = IP_MAXPACKET; + +SYSCTL_INT(_net_inet_tcp_tlro, OID_AUTO, max_packet, CTLFLAG_RWTUN, + &tlro_max_packet, 0, "Maximum packet size in bytes"); + +typedef struct { + uint32_t value; +} __packed uint32_p_t; + +static uint16_t +tcp_tlro_csum(const uint32_p_t *p, size_t l) +{ + const uint32_p_t *pend = p + (l / 4); + uint64_t cs; + + for (cs = 0; p != pend; p++) + cs += le32toh(p->value); + while (cs > 0xffff) + cs = (cs >> 16) + (cs & 0xffff); + return (cs); +} + +static void * +tcp_tlro_get_header(const struct mbuf *m, const u_int off, + const u_int len) +{ + if (m->m_len < (off + len)) + return (NULL); + return (mtod(m, char *) + off); +} + +static uint8_t +tcp_tlro_info_save_timestamp(struct tlro_mbuf_data *pinfo) +{ + struct tcphdr *tcp = pinfo->tcp; + uint32_t *ts_ptr; + + if (tcp->th_off < ((TCPOLEN_TSTAMP_APPA + sizeof(*tcp)) >> 2)) + return (0); + + ts_ptr = (uint32_t *)(tcp + 1); + if (*ts_ptr != ntohl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | + (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) + return (0); + + /* save timestamps */ + pinfo->tcp_ts = ts_ptr[1]; + pinfo->tcp_ts_reply = ts_ptr[2]; + return (1); +} + +static void +tcp_tlro_info_restore_timestamp(struct tlro_mbuf_data *pinfoa, + struct tlro_mbuf_data *pinfob) +{ + struct tcphdr *tcp = pinfoa->tcp; + uint32_t *ts_ptr; + + if (tcp->th_off < ((TCPOLEN_TSTAMP_APPA + sizeof(*tcp)) >> 2)) + return; + + ts_ptr = (uint32_t *)(tcp + 1); + if (*ts_ptr != ntohl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | + (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) + return; + + /* restore timestamps */ + ts_ptr[1] = pinfob->tcp_ts; + ts_ptr[2] = pinfob->tcp_ts_reply; +} + +static void +tcp_tlro_extract_header(struct tlro_mbuf_data *pinfo, struct mbuf *m, int seq) +{ + uint8_t *phdr = (uint8_t *)pinfo->buf; + struct ether_header *eh; + struct ether_vlan_header *vlan; +#ifdef INET + struct ip *ip; +#endif +#ifdef INET6 + struct ip6_hdr *ip6; +#endif + struct tcphdr *tcp; + uint16_t etype; + int diff; + int off; + + /* fill in information */ + pinfo->head = m; + pinfo->last_tick = ticks; + pinfo->sequence = seq; + pinfo->pprev = &m_last(m)->m_next; + + off = sizeof(*eh); + if (m->m_len < off) + goto error; + eh = tcp_tlro_get_header(m, 0, sizeof(*eh)); + if (eh == NULL) + goto error; + memcpy(phdr, &eh->ether_dhost, ETHER_ADDR_LEN); + phdr += ETHER_ADDR_LEN; + memcpy(phdr, &eh->ether_type, sizeof(eh->ether_type)); + phdr += sizeof(eh->ether_type); + etype = ntohs(eh->ether_type); + + if (etype == ETHERTYPE_VLAN) { + vlan = tcp_tlro_get_header(m, off, sizeof(*vlan)); + if (vlan == NULL) + goto error; + memcpy(phdr, &vlan->evl_tag, sizeof(vlan->evl_tag) + + sizeof(vlan->evl_proto)); + phdr += sizeof(vlan->evl_tag) + sizeof(vlan->evl_proto); + etype = ntohs(vlan->evl_proto); + off += sizeof(*vlan) - sizeof(*eh); + } + switch (etype) { +#ifdef INET + case ETHERTYPE_IP: + /* + * Cannot LRO: + * - Non-IP packets + * - Fragmented packets + * - Packets with IPv4 options + * - Non-TCP packets + */ + ip = tcp_tlro_get_header(m, off, sizeof(*ip)); + if (ip == NULL || + (ip->ip_off & htons(IP_MF | IP_OFFMASK)) != 0 || + (ip->ip_p != IPPROTO_TCP) || + (ip->ip_hl << 2) != sizeof(*ip)) + goto error; + + /* Legacy IP has a header checksum that needs to be correct */ + if (!(m->m_pkthdr.csum_flags & CSUM_IP_CHECKED)) { + /* Verify IP header */ + if (tcp_tlro_csum((uint32_p_t *)ip, sizeof(*ip)) != 0xFFFF) + m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED; + else + m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED | + CSUM_IP_VALID; + } + /* Only accept valid checksums */ + if (!(m->m_pkthdr.csum_flags & CSUM_IP_VALID) || + !(m->m_pkthdr.csum_flags & CSUM_DATA_VALID)) + goto error; + memcpy(phdr, &ip->ip_src, sizeof(ip->ip_src) + + sizeof(ip->ip_dst)); + phdr += sizeof(ip->ip_src) + sizeof(ip->ip_dst); + if (M_HASHTYPE_GET(m) == M_HASHTYPE_LRO_TCP) + pinfo->ip_len = m->m_pkthdr.len - off; + else + pinfo->ip_len = ntohs(ip->ip_len); + pinfo->ip_hdrlen = sizeof(*ip); + pinfo->ip.v4 = ip; + pinfo->ip_version = 4; + off += sizeof(*ip); + break; +#endif +#ifdef INET6 + case ETHERTYPE_IPV6: + /* + * Cannot LRO: + * - Non-IP packets + * - Packets with IPv6 options + * - Non-TCP packets + */ + ip6 = tcp_tlro_get_header(m, off, sizeof(*ip6)); + if (ip6 == NULL || ip6->ip6_nxt != IPPROTO_TCP) + goto error; + if (!(m->m_pkthdr.csum_flags & CSUM_DATA_VALID)) + goto error; + memcpy(phdr, &ip6->ip6_src, sizeof(struct in6_addr) + + sizeof(struct in6_addr)); + phdr += sizeof(struct in6_addr) + sizeof(struct in6_addr); + if (M_HASHTYPE_GET(m) == M_HASHTYPE_LRO_TCP) + pinfo->ip_len = m->m_pkthdr.len - off; + else + pinfo->ip_len = ntohs(ip6->ip6_plen) + sizeof(*ip6); + pinfo->ip_hdrlen = sizeof(*ip6); + pinfo->ip.v6 = ip6; + pinfo->ip_version = 6; + off += sizeof(*ip6); + break; +#endif + default: + goto error; + } + tcp = tcp_tlro_get_header(m, off, sizeof(*tcp)); + if (tcp == NULL) + goto error; + memcpy(phdr, &tcp->th_sport, sizeof(tcp->th_sport) + + sizeof(tcp->th_dport)); + phdr += sizeof(tcp->th_sport) + + sizeof(tcp->th_dport); + /* store TCP header length */ + *phdr++ = tcp->th_off; + if (tcp->th_off < (sizeof(*tcp) >> 2)) + goto error; + + /* compute offset to data payload */ + pinfo->tcp_len = (tcp->th_off << 2); + off += pinfo->tcp_len; + + /* store more info */ + pinfo->data_off = off; + pinfo->tcp = tcp; + + /* try to save timestamp, if any */ + *phdr++ = tcp_tlro_info_save_timestamp(pinfo); + + /* verify offset and IP/TCP length */ + if (off > m->m_pkthdr.len || + pinfo->ip_len < pinfo->tcp_len) + goto error; + + /* compute data payload length */ + pinfo->data_len = (pinfo->ip_len - pinfo->tcp_len - pinfo->ip_hdrlen); + + /* trim any padded data */ + diff = (m->m_pkthdr.len - off) - pinfo->data_len; + if (diff != 0) { + if (diff < 0) + goto error; + else + m_adj(m, -diff); + } + /* compute header length */ + pinfo->buf_length = phdr - (uint8_t *)pinfo->buf; + /* zero-pad rest of buffer */ + memset(phdr, 0, TLRO_MAX_HEADER - pinfo->buf_length); + return; +error: + pinfo->buf_length = 0; +} + +static int +tcp_tlro_cmp64(const uint64_t *pa, const uint64_t *pb) +{ + int64_t diff = 0; + unsigned x; + + for (x = 0; x != TLRO_MAX_HEADER / 8; x++) { + /* + * NOTE: Endianness does not matter in this + * comparisation: + */ + diff = pa[x] - pb[x]; + if (diff != 0) + goto done; + } +done: + if (diff < 0) + return (-1); + else if (diff > 0) + return (1); + return (0); +} + +static int +tcp_tlro_compare_header(const void *_ppa, const void *_ppb) +{ + const struct tlro_mbuf_ptr *ppa = _ppa; + const struct tlro_mbuf_ptr *ppb = _ppb; + struct tlro_mbuf_data *pinfoa = ppa->data; + struct tlro_mbuf_data *pinfob = ppb->data; + int ret; + + ret = (pinfoa->head == NULL) - (pinfob->head == NULL); + if (ret != 0) + goto done; + + ret = pinfoa->buf_length - pinfob->buf_length; + if (ret != 0) + goto done; + if (pinfoa->buf_length != 0) { + ret = tcp_tlro_cmp64(pinfoa->buf, pinfob->buf); + if (ret != 0) + goto done; + ret = ntohl(pinfoa->tcp->th_seq) - ntohl(pinfob->tcp->th_seq); + if (ret != 0) + goto done; + ret = ntohl(pinfoa->tcp->th_ack) - ntohl(pinfob->tcp->th_ack); + if (ret != 0) + goto done; + ret = pinfoa->sequence - pinfob->sequence; + if (ret != 0) + goto done; + } +done: + return (ret); +} + +static void +tcp_tlro_sort(struct tlro_ctrl *tlro) +{ + if (tlro->curr == 0) + return; + + qsort(tlro->mbuf, tlro->curr, sizeof(struct tlro_mbuf_ptr), + &tcp_tlro_compare_header); +} + +static int +tcp_tlro_get_ticks(void) +{ + int to = tlro_min_rate; + + if (to < 1) + to = 1; + to = hz / to; + if (to < 1) + to = 1; + return (to); +} + +static void +tcp_tlro_combine(struct tlro_ctrl *tlro, int force) +{ + struct tlro_mbuf_data *pinfoa; + struct tlro_mbuf_data *pinfob; + uint32_t cs; + int curr_ticks = ticks; + int ticks_limit = tcp_tlro_get_ticks(); + unsigned x; + unsigned y; + unsigned z; + int temp; + + if (tlro->curr == 0) + return; + + for (y = 0; y != tlro->curr;) { + struct mbuf *m; + + pinfoa = tlro->mbuf[y].data; + for (x = y + 1; x != tlro->curr; x++) { + pinfob = tlro->mbuf[x].data; + if (pinfoa->buf_length != pinfob->buf_length || + tcp_tlro_cmp64(pinfoa->buf, pinfob->buf) != 0) + break; + } + if (pinfoa->buf_length == 0) { + /* forward traffic which cannot be combined */ + for (z = y; z != x; z++) { + /* just forward packets */ + pinfob = tlro->mbuf[z].data; + + m = pinfob->head; + + /* reset info structure */ + pinfob->head = NULL; + pinfob->buf_length = 0; + + /* do stats */ + tlro->lro_flushed++; + + /* input packet to network layer */ + (*tlro->ifp->if_input) (tlro->ifp, m); + } + y = z; + continue; + } + + /* compute current checksum subtracted some header parts */ + temp = (pinfoa->ip_len - pinfoa->ip_hdrlen); + cs = ((temp & 0xFF) << 8) + ((temp & 0xFF00) >> 8) + + tcp_tlro_csum((uint32_p_t *)pinfoa->tcp, pinfoa->tcp_len); + + /* append all fragments into one block */ + for (z = y + 1; z != x; z++) { + + pinfob = tlro->mbuf[z].data; + + /* check for command packets */ + if ((pinfoa->tcp->th_flags & ~(TH_ACK | TH_PUSH)) || + (pinfob->tcp->th_flags & ~(TH_ACK | TH_PUSH))) + break; + + /* check if there is enough space */ + if ((pinfoa->ip_len + pinfob->data_len) > tlro_max_packet) + break; + + /* try to append the new segment */ + temp = ntohl(pinfoa->tcp->th_seq) + pinfoa->data_len; + if (temp != (int)ntohl(pinfob->tcp->th_seq)) + break; + + temp = pinfob->ip_len - pinfob->ip_hdrlen; + cs += ((temp & 0xFF) << 8) + ((temp & 0xFF00) >> 8) + + tcp_tlro_csum((uint32_p_t *)pinfob->tcp, pinfob->tcp_len); + /* remove fields which appear twice */ + cs += (IPPROTO_TCP << 8); + if (pinfob->ip_version == 4) { + cs += tcp_tlro_csum((uint32_p_t *)&pinfob->ip.v4->ip_src, 4); + cs += tcp_tlro_csum((uint32_p_t *)&pinfob->ip.v4->ip_dst, 4); + } else { + cs += tcp_tlro_csum((uint32_p_t *)&pinfob->ip.v6->ip6_src, 16); + cs += tcp_tlro_csum((uint32_p_t *)&pinfob->ip.v6->ip6_dst, 16); + } + /* remainder computation */ + while (cs > 0xffff) + cs = (cs >> 16) + (cs & 0xffff); + + /* update window and ack sequence number */ + pinfoa->tcp->th_ack = pinfob->tcp->th_ack; + pinfoa->tcp->th_win = pinfob->tcp->th_win; + + /* check if we should restore the timestamp */ + tcp_tlro_info_restore_timestamp(pinfoa, pinfob); + + /* accumulate TCP flags */ + pinfoa->tcp->th_flags |= pinfob->tcp->th_flags; + + /* update lengths */ + pinfoa->ip_len += pinfob->data_len; + pinfoa->data_len += pinfob->data_len; + + /* clear mbuf pointer - packet is accumulated */ + m = pinfob->head; + + /* reset info structure */ + pinfob->head = NULL; + pinfob->buf_length = 0; + + /* append data to mbuf [y] */ + m_adj(m, pinfob->data_off); + /* delete mbuf tags, if any */ + m_tag_delete_chain(m, NULL); + /* clear packet header flag */ + m->m_flags &= ~M_PKTHDR; + + /* concat mbuf(s) to end of list */ + pinfoa->pprev[0] = m; + m = m_last(m); + pinfoa->pprev = &m->m_next; + pinfoa->head->m_pkthdr.len += pinfob->data_len; + } + /* compute new TCP header checksum */ + pinfoa->tcp->th_sum = 0; + + temp = pinfoa->ip_len - pinfoa->ip_hdrlen; + cs = (cs ^ 0xFFFF) + + tcp_tlro_csum((uint32_p_t *)pinfoa->tcp, pinfoa->tcp_len) + + ((temp & 0xFF) << 8) + ((temp & 0xFF00) >> 8); + + /* remainder computation */ + while (cs > 0xffff) + cs = (cs >> 16) + (cs & 0xffff); + + /* update new checksum */ + pinfoa->tcp->th_sum = ~htole16(cs); + + /* update IP length, if any */ + if (pinfoa->ip_version == 4) { + if (pinfoa->ip_len > IP_MAXPACKET) { + M_HASHTYPE_SET(pinfoa->head, M_HASHTYPE_LRO_TCP); + pinfoa->ip.v4->ip_len = htons(IP_MAXPACKET); + } else { + pinfoa->ip.v4->ip_len = htons(pinfoa->ip_len); + } + } else { + if (pinfoa->ip_len > (IP_MAXPACKET + sizeof(*pinfoa->ip.v6))) { + M_HASHTYPE_SET(pinfoa->head, M_HASHTYPE_LRO_TCP); + pinfoa->ip.v6->ip6_plen = htons(IP_MAXPACKET); + } else { + temp = pinfoa->ip_len - sizeof(*pinfoa->ip.v6); + pinfoa->ip.v6->ip6_plen = htons(temp); + } + } + + temp = curr_ticks - pinfoa->last_tick; + /* check if packet should be forwarded */ + if (force != 0 || z != x || temp >= ticks_limit || + pinfoa->data_len == 0) { + + /* compute new IPv4 header checksum */ + if (pinfoa->ip_version == 4) { + pinfoa->ip.v4->ip_sum = 0; + cs = tcp_tlro_csum((uint32_p_t *)pinfoa->ip.v4, + sizeof(*pinfoa->ip.v4)); + pinfoa->ip.v4->ip_sum = ~htole16(cs); + } + /* forward packet */ + m = pinfoa->head; + + /* reset info structure */ + pinfoa->head = NULL; + pinfoa->buf_length = 0; + + /* do stats */ + tlro->lro_flushed++; + + /* input packet to network layer */ + (*tlro->ifp->if_input) (tlro->ifp, m); + } + y = z; + } + + /* cleanup all NULL heads */ + for (y = 0; y != tlro->curr; y++) { + if (tlro->mbuf[y].data->head == NULL) { + for (z = y + 1; z != tlro->curr; z++) { + struct tlro_mbuf_ptr ptemp; + if (tlro->mbuf[z].data->head == NULL) + continue; + ptemp = tlro->mbuf[y]; + tlro->mbuf[y] = tlro->mbuf[z]; + tlro->mbuf[z] = ptemp; + y++; + } + break; + } + } + tlro->curr = y; +} + +static void +tcp_tlro_cleanup(struct tlro_ctrl *tlro) +{ + while (tlro->curr != 0 && + tlro->mbuf[tlro->curr - 1].data->head == NULL) + tlro->curr--; +} + +void +tcp_tlro_flush(struct tlro_ctrl *tlro, int force) +{ + if (tlro->curr == 0) + return; + + tcp_tlro_sort(tlro); + tcp_tlro_cleanup(tlro); + tcp_tlro_combine(tlro, force); +} + +int +tcp_tlro_init(struct tlro_ctrl *tlro, struct ifnet *ifp, + int max_mbufs) +{ + ssize_t size; + uint32_t x; + + /* set zero defaults */ + memset(tlro, 0, sizeof(*tlro)); + + /* compute size needed for data */ + size = (sizeof(struct tlro_mbuf_ptr) * max_mbufs) + + (sizeof(struct tlro_mbuf_data) * max_mbufs); + + /* range check */ + if (max_mbufs <= 0 || size <= 0 || ifp == NULL) + return (EINVAL); + + /* setup tlro control structure */ + tlro->mbuf = malloc(size, M_TLRO, M_WAITOK | M_ZERO); + tlro->max = max_mbufs; + tlro->ifp = ifp; + + /* setup pointer array */ + for (x = 0; x != tlro->max; x++) { + tlro->mbuf[x].data = ((struct tlro_mbuf_data *) + &tlro->mbuf[max_mbufs]) + x; + } + return (0); +} + +void +tcp_tlro_free(struct tlro_ctrl *tlro) +{ + struct tlro_mbuf_data *pinfo; + struct mbuf *m; + uint32_t y; + + /* check if not setup */ + if (tlro->mbuf == NULL) + return; + /* free MBUF array and any leftover MBUFs */ + for (y = 0; y != tlro->max; y++) { + + pinfo = tlro->mbuf[y].data; + + m = pinfo->head; + + /* reset info structure */ + pinfo->head = NULL; + pinfo->buf_length = 0; + + m_freem(m); + } + free(tlro->mbuf, M_TLRO); + /* reset buffer */ + memset(tlro, 0, sizeof(*tlro)); +} + +void +tcp_tlro_rx(struct tlro_ctrl *tlro, struct mbuf *m) +{ + if (m->m_len > 0 && tlro->curr < tlro->max) { + /* do stats */ + tlro->lro_queued++; + + /* extract header */ + tcp_tlro_extract_header(tlro->mbuf[tlro->curr++].data, + m, tlro->sequence++); + } else if (tlro->ifp != NULL) { + /* do stats */ + tlro->lro_flushed++; + + /* input packet to network layer */ + (*tlro->ifp->if_input) (tlro->ifp, m); + } else { + /* packet drop */ + m_freem(m); + } +} diff --git a/sys/dev/mlx5/mlx5_en/tcp_tlro.h b/sys/dev/mlx5/mlx5_en/tcp_tlro.h new file mode 100644 index 0000000..1e605d5 --- /dev/null +++ b/sys/dev/mlx5/mlx5_en/tcp_tlro.h @@ -0,0 +1,83 @@ +/*- + * Copyright (c) 2015 Mellanox Technologies. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _TCP_TLRO_H_ +#define _TCP_TLRO_H_ + +#define TLRO_MAX_HEADER 64 /* bytes */ + +struct ip; +struct ip6_hdr; +struct tcphdr; + +struct tlro_mbuf_data { + union { +#ifdef INET + struct ip *v4; +#endif +#ifdef INET6 + struct ip6_hdr *v6; +#endif + } ip; + struct tcphdr *tcp; + struct mbuf *head; + struct mbuf **pprev; + int last_tick; + int sequence; + int data_len; + int data_off; + int ip_hdrlen; + int ip_len; + uint32_t tcp_ts; + uint32_t tcp_ts_reply; + uint16_t tcp_len; + uint8_t ip_version; + uint8_t buf_length; /* in 32-bit words */ + uint64_t buf[TLRO_MAX_HEADER / 8]; +} __aligned(256); + +struct tlro_mbuf_ptr { + struct tlro_mbuf_data *data; +}; + +/* NB: This is part of driver structs */ +struct tlro_ctrl { + struct ifnet *ifp; + struct tlro_mbuf_ptr *mbuf; + uint64_t lro_queued; + uint64_t lro_flushed; + uint32_t max; + uint32_t curr; + int sequence; +}; + +int tcp_tlro_init(struct tlro_ctrl *, struct ifnet *, int); +void tcp_tlro_free(struct tlro_ctrl *); +void tcp_tlro_flush(struct tlro_ctrl *, int); +void tcp_tlro_rx(struct tlro_ctrl *, struct mbuf *); + +#endif /* _TCP_TLRO_H_ */ |