summaryrefslogtreecommitdiffstats
path: root/drivers/net/ethernet/mellanox
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-01-25 11:17:34 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2014-01-25 11:17:34 -0800
commit4ba9920e5e9c0e16b5ed24292d45322907bb9035 (patch)
tree7d023baea59ed0886ded1f0b6d1c6385690b88f7 /drivers/net/ethernet/mellanox
parent82c477669a4665eb4e52030792051e0559ee2a36 (diff)
parent8b662fe70c68282f78482dc272df0c4f355e49f5 (diff)
downloadop-kernel-dev-4ba9920e5e9c0e16b5ed24292d45322907bb9035.zip
op-kernel-dev-4ba9920e5e9c0e16b5ed24292d45322907bb9035.tar.gz
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: 1) BPF debugger and asm tool by Daniel Borkmann. 2) Speed up create/bind in AF_PACKET, also from Daniel Borkmann. 3) Correct reciprocal_divide and update users, from Hannes Frederic Sowa and Daniel Borkmann. 4) Currently we only have a "set" operation for the hw timestamp socket ioctl, add a "get" operation to match. From Ben Hutchings. 5) Add better trace events for debugging driver datapath problems, also from Ben Hutchings. 6) Implement auto corking in TCP, from Eric Dumazet. Basically, if we have a small send and a previous packet is already in the qdisc or device queue, defer until TX completion or we get more data. 7) Allow userspace to manage ipv6 temporary addresses, from Jiri Pirko. 8) Add a qdisc bypass option for AF_PACKET sockets, from Daniel Borkmann. 9) Share IP header compression code between Bluetooth and IEEE802154 layers, from Jukka Rissanen. 10) Fix ipv6 router reachability probing, from Jiri Benc. 11) Allow packets to be captured on macvtap devices, from Vlad Yasevich. 12) Support tunneling in GRO layer, from Jerry Chu. 13) Allow bonding to be configured fully using netlink, from Scott Feldman. 14) Allow AF_PACKET users to obtain the VLAN TPID, just like they can already get the TCI. From Atzm Watanabe. 15) New "Heavy Hitter" qdisc, from Terry Lam. 16) Significantly improve the IPSEC support in pktgen, from Fan Du. 17) Allow ipv4 tunnels to cache routes, just like sockets. From Tom Herbert. 18) Add Proportional Integral Enhanced packet scheduler, from Vijay Subramanian. 19) Allow openvswitch to mmap'd netlink, from Thomas Graf. 20) Key TCP metrics blobs also by source address, not just destination address. From Christoph Paasch. 21) Support 10G in generic phylib. From Andy Fleming. 22) Try to short-circuit GRO flow compares using device provided RX hash, if provided. From Tom Herbert. The wireless and netfilter folks have been busy little bees too. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (2064 commits) net/cxgb4: Fix referencing freed adapter ipv6: reallocate addrconf router for ipv6 address when lo device up fib_frontend: fix possible NULL pointer dereference rtnetlink: remove IFLA_BOND_SLAVE definition rtnetlink: remove check for fill_slave_info in rtnl_have_link_slave_info qlcnic: update version to 5.3.55 qlcnic: Enhance logic to calculate msix vectors. qlcnic: Refactor interrupt coalescing code for all adapters. qlcnic: Update poll controller code path qlcnic: Interrupt code cleanup qlcnic: Enhance Tx timeout debugging. qlcnic: Use bool for rx_mac_learn. bonding: fix u64 division rtnetlink: add missing IFLA_BOND_AD_INFO_UNSPEC sfc: Use the correct maximum TX DMA ring size for SFC9100 Add Shradha Shah as the sfc driver maintainer. net/vxlan: Share RX skb de-marking and checksum checks with ovs tulip: cleanup by using ARRAY_SIZE() ip_tunnel: clear IPCB in ip_tunnel_xmit() in case dst_link_failure() is called net/cxgb4: Don't retrieve stats during recovery ...
Diffstat (limited to 'drivers/net/ethernet/mellanox')
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/Kconfig1
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/alloc.c12
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/cq.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_clock.c198
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_cq.c12
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_ethtool.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_main.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_netdev.c142
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_resources.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_rx.c30
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_tx.c62
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/eq.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/fw.c93
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/fw.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/main.c109
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/mcg.c45
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/mlx4.h10
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/mlx4_en.h19
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/mr.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/pd.c7
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/port.c41
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/qp.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/resource_tracker.c100
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/srq.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/cmd.c1
25 files changed, 772 insertions, 148 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx4/Kconfig b/drivers/net/ethernet/mellanox/mlx4/Kconfig
index eb520ab..563495d 100644
--- a/drivers/net/ethernet/mellanox/mlx4/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlx4/Kconfig
@@ -6,6 +6,7 @@ config MLX4_EN
tristate "Mellanox Technologies 10Gbit Ethernet support"
depends on PCI
select MLX4_CORE
+ select PTP_1588_CLOCK
---help---
This driver supports Mellanox Technologies ConnectX Ethernet
devices.
diff --git a/drivers/net/ethernet/mellanox/mlx4/alloc.c b/drivers/net/ethernet/mellanox/mlx4/alloc.c
index 06fef5b..c3ad464 100644
--- a/drivers/net/ethernet/mellanox/mlx4/alloc.c
+++ b/drivers/net/ethernet/mellanox/mlx4/alloc.c
@@ -71,9 +71,9 @@ u32 mlx4_bitmap_alloc(struct mlx4_bitmap *bitmap)
return obj;
}
-void mlx4_bitmap_free(struct mlx4_bitmap *bitmap, u32 obj)
+void mlx4_bitmap_free(struct mlx4_bitmap *bitmap, u32 obj, int use_rr)
{
- mlx4_bitmap_free_range(bitmap, obj, 1);
+ mlx4_bitmap_free_range(bitmap, obj, 1, use_rr);
}
u32 mlx4_bitmap_alloc_range(struct mlx4_bitmap *bitmap, int cnt, int align)
@@ -118,11 +118,17 @@ u32 mlx4_bitmap_avail(struct mlx4_bitmap *bitmap)
return bitmap->avail;
}
-void mlx4_bitmap_free_range(struct mlx4_bitmap *bitmap, u32 obj, int cnt)
+void mlx4_bitmap_free_range(struct mlx4_bitmap *bitmap, u32 obj, int cnt,
+ int use_rr)
{
obj &= bitmap->max + bitmap->reserved_top - 1;
spin_lock(&bitmap->lock);
+ if (!use_rr) {
+ bitmap->last = min(bitmap->last, obj);
+ bitmap->top = (bitmap->top + bitmap->max + bitmap->reserved_top)
+ & bitmap->mask;
+ }
bitmap_clear(bitmap->table, obj, cnt);
bitmap->avail += cnt;
spin_unlock(&bitmap->lock);
diff --git a/drivers/net/ethernet/mellanox/mlx4/cq.c b/drivers/net/ethernet/mellanox/mlx4/cq.c
index 22fcbe7..0487121 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cq.c
@@ -34,7 +34,6 @@
* SOFTWARE.
*/
-#include <linux/init.h>
#include <linux/hardirq.h>
#include <linux/export.h>
@@ -187,7 +186,7 @@ err_put:
mlx4_table_put(dev, &cq_table->table, *cqn);
err_out:
- mlx4_bitmap_free(&cq_table->bitmap, *cqn);
+ mlx4_bitmap_free(&cq_table->bitmap, *cqn, MLX4_NO_RR);
return err;
}
@@ -217,7 +216,7 @@ void __mlx4_cq_free_icm(struct mlx4_dev *dev, int cqn)
mlx4_table_put(dev, &cq_table->cmpt_table, cqn);
mlx4_table_put(dev, &cq_table->table, cqn);
- mlx4_bitmap_free(&cq_table->bitmap, cqn);
+ mlx4_bitmap_free(&cq_table->bitmap, cqn, MLX4_NO_RR);
}
static void mlx4_cq_free_icm(struct mlx4_dev *dev, int cqn)
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_clock.c b/drivers/net/ethernet/mellanox/mlx4/en_clock.c
index fd64410..abaf6bb 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_clock.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_clock.c
@@ -42,6 +42,10 @@ int mlx4_en_timestamp_config(struct net_device *dev, int tx_type, int rx_filter)
int port_up = 0;
int err = 0;
+ if (priv->hwtstamp_config.tx_type == tx_type &&
+ priv->hwtstamp_config.rx_filter == rx_filter)
+ return 0;
+
mutex_lock(&mdev->state_lock);
if (priv->port_up) {
port_up = 1;
@@ -103,19 +107,191 @@ void mlx4_en_fill_hwtstamps(struct mlx4_en_dev *mdev,
struct skb_shared_hwtstamps *hwts,
u64 timestamp)
{
+ unsigned long flags;
u64 nsec;
+ read_lock_irqsave(&mdev->clock_lock, flags);
nsec = timecounter_cyc2time(&mdev->clock, timestamp);
+ read_unlock_irqrestore(&mdev->clock_lock, flags);
memset(hwts, 0, sizeof(struct skb_shared_hwtstamps));
hwts->hwtstamp = ns_to_ktime(nsec);
}
+/**
+ * mlx4_en_remove_timestamp - disable PTP device
+ * @mdev: board private structure
+ *
+ * Stop the PTP support.
+ **/
+void mlx4_en_remove_timestamp(struct mlx4_en_dev *mdev)
+{
+ if (mdev->ptp_clock) {
+ ptp_clock_unregister(mdev->ptp_clock);
+ mdev->ptp_clock = NULL;
+ mlx4_info(mdev, "removed PHC\n");
+ }
+}
+
+void mlx4_en_ptp_overflow_check(struct mlx4_en_dev *mdev)
+{
+ bool timeout = time_is_before_jiffies(mdev->last_overflow_check +
+ mdev->overflow_period);
+ unsigned long flags;
+
+ if (timeout) {
+ write_lock_irqsave(&mdev->clock_lock, flags);
+ timecounter_read(&mdev->clock);
+ write_unlock_irqrestore(&mdev->clock_lock, flags);
+ mdev->last_overflow_check = jiffies;
+ }
+}
+
+/**
+ * mlx4_en_phc_adjfreq - adjust the frequency of the hardware clock
+ * @ptp: ptp clock structure
+ * @delta: Desired frequency change in parts per billion
+ *
+ * Adjust the frequency of the PHC cycle counter by the indicated delta from
+ * the base frequency.
+ **/
+static int mlx4_en_phc_adjfreq(struct ptp_clock_info *ptp, s32 delta)
+{
+ u64 adj;
+ u32 diff, mult;
+ int neg_adj = 0;
+ unsigned long flags;
+ struct mlx4_en_dev *mdev = container_of(ptp, struct mlx4_en_dev,
+ ptp_clock_info);
+
+ if (delta < 0) {
+ neg_adj = 1;
+ delta = -delta;
+ }
+ mult = mdev->nominal_c_mult;
+ adj = mult;
+ adj *= delta;
+ diff = div_u64(adj, 1000000000ULL);
+
+ write_lock_irqsave(&mdev->clock_lock, flags);
+ timecounter_read(&mdev->clock);
+ mdev->cycles.mult = neg_adj ? mult - diff : mult + diff;
+ write_unlock_irqrestore(&mdev->clock_lock, flags);
+
+ return 0;
+}
+
+/**
+ * mlx4_en_phc_adjtime - Shift the time of the hardware clock
+ * @ptp: ptp clock structure
+ * @delta: Desired change in nanoseconds
+ *
+ * Adjust the timer by resetting the timecounter structure.
+ **/
+static int mlx4_en_phc_adjtime(struct ptp_clock_info *ptp, s64 delta)
+{
+ struct mlx4_en_dev *mdev = container_of(ptp, struct mlx4_en_dev,
+ ptp_clock_info);
+ unsigned long flags;
+ s64 now;
+
+ write_lock_irqsave(&mdev->clock_lock, flags);
+ now = timecounter_read(&mdev->clock);
+ now += delta;
+ timecounter_init(&mdev->clock, &mdev->cycles, now);
+ write_unlock_irqrestore(&mdev->clock_lock, flags);
+
+ return 0;
+}
+
+/**
+ * mlx4_en_phc_gettime - Reads the current time from the hardware clock
+ * @ptp: ptp clock structure
+ * @ts: timespec structure to hold the current time value
+ *
+ * Read the timecounter and return the correct value in ns after converting
+ * it into a struct timespec.
+ **/
+static int mlx4_en_phc_gettime(struct ptp_clock_info *ptp, struct timespec *ts)
+{
+ struct mlx4_en_dev *mdev = container_of(ptp, struct mlx4_en_dev,
+ ptp_clock_info);
+ unsigned long flags;
+ u32 remainder;
+ u64 ns;
+
+ write_lock_irqsave(&mdev->clock_lock, flags);
+ ns = timecounter_read(&mdev->clock);
+ write_unlock_irqrestore(&mdev->clock_lock, flags);
+
+ ts->tv_sec = div_u64_rem(ns, NSEC_PER_SEC, &remainder);
+ ts->tv_nsec = remainder;
+
+ return 0;
+}
+
+/**
+ * mlx4_en_phc_settime - Set the current time on the hardware clock
+ * @ptp: ptp clock structure
+ * @ts: timespec containing the new time for the cycle counter
+ *
+ * Reset the timecounter to use a new base value instead of the kernel
+ * wall timer value.
+ **/
+static int mlx4_en_phc_settime(struct ptp_clock_info *ptp,
+ const struct timespec *ts)
+{
+ struct mlx4_en_dev *mdev = container_of(ptp, struct mlx4_en_dev,
+ ptp_clock_info);
+ u64 ns = timespec_to_ns(ts);
+ unsigned long flags;
+
+ /* reset the timecounter */
+ write_lock_irqsave(&mdev->clock_lock, flags);
+ timecounter_init(&mdev->clock, &mdev->cycles, ns);
+ write_unlock_irqrestore(&mdev->clock_lock, flags);
+
+ return 0;
+}
+
+/**
+ * mlx4_en_phc_enable - enable or disable an ancillary feature
+ * @ptp: ptp clock structure
+ * @request: Desired resource to enable or disable
+ * @on: Caller passes one to enable or zero to disable
+ *
+ * Enable (or disable) ancillary features of the PHC subsystem.
+ * Currently, no ancillary features are supported.
+ **/
+static int mlx4_en_phc_enable(struct ptp_clock_info __always_unused *ptp,
+ struct ptp_clock_request __always_unused *request,
+ int __always_unused on)
+{
+ return -EOPNOTSUPP;
+}
+
+static const struct ptp_clock_info mlx4_en_ptp_clock_info = {
+ .owner = THIS_MODULE,
+ .max_adj = 100000000,
+ .n_alarm = 0,
+ .n_ext_ts = 0,
+ .n_per_out = 0,
+ .pps = 0,
+ .adjfreq = mlx4_en_phc_adjfreq,
+ .adjtime = mlx4_en_phc_adjtime,
+ .gettime = mlx4_en_phc_gettime,
+ .settime = mlx4_en_phc_settime,
+ .enable = mlx4_en_phc_enable,
+};
+
void mlx4_en_init_timestamp(struct mlx4_en_dev *mdev)
{
struct mlx4_dev *dev = mdev->dev;
+ unsigned long flags;
u64 ns;
+ rwlock_init(&mdev->clock_lock);
+
memset(&mdev->cycles, 0, sizeof(mdev->cycles));
mdev->cycles.read = mlx4_en_read_clock;
mdev->cycles.mask = CLOCKSOURCE_MASK(48);
@@ -127,9 +303,12 @@ void mlx4_en_init_timestamp(struct mlx4_en_dev *mdev)
mdev->cycles.shift = 14;
mdev->cycles.mult =
clocksource_khz2mult(1000 * dev->caps.hca_core_clock, mdev->cycles.shift);
+ mdev->nominal_c_mult = mdev->cycles.mult;
+ write_lock_irqsave(&mdev->clock_lock, flags);
timecounter_init(&mdev->clock, &mdev->cycles,
ktime_to_ns(ktime_get_real()));
+ write_unlock_irqrestore(&mdev->clock_lock, flags);
/* Calculate period in seconds to call the overflow watchdog - to make
* sure counter is checked at least once every wrap around.
@@ -137,15 +316,18 @@ void mlx4_en_init_timestamp(struct mlx4_en_dev *mdev)
ns = cyclecounter_cyc2ns(&mdev->cycles, mdev->cycles.mask);
do_div(ns, NSEC_PER_SEC / 2 / HZ);
mdev->overflow_period = ns;
-}
-void mlx4_en_ptp_overflow_check(struct mlx4_en_dev *mdev)
-{
- bool timeout = time_is_before_jiffies(mdev->last_overflow_check +
- mdev->overflow_period);
+ /* Configure the PHC */
+ mdev->ptp_clock_info = mlx4_en_ptp_clock_info;
+ snprintf(mdev->ptp_clock_info.name, 16, "mlx4 ptp");
- if (timeout) {
- timecounter_read(&mdev->clock);
- mdev->last_overflow_check = jiffies;
+ mdev->ptp_clock = ptp_clock_register(&mdev->ptp_clock_info,
+ &mdev->pdev->dev);
+ if (IS_ERR(mdev->ptp_clock)) {
+ mdev->ptp_clock = NULL;
+ mlx4_err(mdev, "ptp_clock_register failed\n");
+ } else {
+ mlx4_info(mdev, "registered PHC clock\n");
}
+
}
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c
index 3a098cc..70e9532 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c
@@ -161,12 +161,16 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq,
cq->mcq.comp = cq->is_tx ? mlx4_en_tx_irq : mlx4_en_rx_irq;
cq->mcq.event = mlx4_en_cq_event;
- if (!cq->is_tx) {
+ if (cq->is_tx) {
+ netif_napi_add(cq->dev, &cq->napi, mlx4_en_poll_tx_cq,
+ NAPI_POLL_WEIGHT);
+ } else {
netif_napi_add(cq->dev, &cq->napi, mlx4_en_poll_rx_cq, 64);
napi_hash_add(&cq->napi);
- napi_enable(&cq->napi);
}
+ napi_enable(&cq->napi);
+
return 0;
}
@@ -188,12 +192,12 @@ void mlx4_en_destroy_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq **pcq)
void mlx4_en_deactivate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq)
{
+ napi_disable(&cq->napi);
if (!cq->is_tx) {
- napi_disable(&cq->napi);
napi_hash_del(&cq->napi);
synchronize_rcu();
- netif_napi_del(&cq->napi);
}
+ netif_napi_del(&cq->napi);
mlx4_cq_free(priv->mdev->dev, &cq->mcq);
}
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
index 0596f9f..3e8d336 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
@@ -1193,6 +1193,9 @@ static int mlx4_en_get_ts_info(struct net_device *dev,
info->rx_filters =
(1 << HWTSTAMP_FILTER_NONE) |
(1 << HWTSTAMP_FILTER_ALL);
+
+ if (mdev->ptp_clock)
+ info->phc_index = ptp_clock_index(mdev->ptp_clock);
}
return ret;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_main.c b/drivers/net/ethernet/mellanox/mlx4/en_main.c
index 0d087b0..d357bf5 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_main.c
@@ -174,6 +174,9 @@ static void mlx4_en_event(struct mlx4_dev *dev, void *endev_ptr,
mlx4_err(mdev, "Internal error detected, restarting device\n");
break;
+ case MLX4_DEV_EVENT_SLAVE_INIT:
+ case MLX4_DEV_EVENT_SLAVE_SHUTDOWN:
+ break;
default:
if (port < 1 || port > dev->caps.num_ports ||
!mdev->pndev[port])
@@ -196,6 +199,9 @@ static void mlx4_en_remove(struct mlx4_dev *dev, void *endev_ptr)
if (mdev->pndev[i])
mlx4_en_destroy_netdev(mdev->pndev[i]);
+ if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS)
+ mlx4_en_remove_timestamp(mdev);
+
flush_workqueue(mdev->workqueue);
destroy_workqueue(mdev->workqueue);
(void) mlx4_mr_free(dev, &mdev->mr);
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index e72d8a1..fad4531 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -468,6 +468,53 @@ static void mlx4_en_u64_to_mac(unsigned char dst_mac[ETH_ALEN + 2], u64 src_mac)
memset(&dst_mac[ETH_ALEN], 0, 2);
}
+
+static int mlx4_en_tunnel_steer_add(struct mlx4_en_priv *priv, unsigned char *addr,
+ int qpn, u64 *reg_id)
+{
+ int err;
+ struct mlx4_spec_list spec_eth_outer = { {NULL} };
+ struct mlx4_spec_list spec_vxlan = { {NULL} };
+ struct mlx4_spec_list spec_eth_inner = { {NULL} };
+
+ struct mlx4_net_trans_rule rule = {
+ .queue_mode = MLX4_NET_TRANS_Q_FIFO,
+ .exclusive = 0,
+ .allow_loopback = 1,
+ .promisc_mode = MLX4_FS_REGULAR,
+ .priority = MLX4_DOMAIN_NIC,
+ };
+
+ __be64 mac_mask = cpu_to_be64(MLX4_MAC_MASK << 16);
+
+ if (priv->mdev->dev->caps.tunnel_offload_mode != MLX4_TUNNEL_OFFLOAD_MODE_VXLAN)
+ return 0; /* do nothing */
+
+ rule.port = priv->port;
+ rule.qpn = qpn;
+ INIT_LIST_HEAD(&rule.list);
+
+ spec_eth_outer.id = MLX4_NET_TRANS_RULE_ID_ETH;
+ memcpy(spec_eth_outer.eth.dst_mac, addr, ETH_ALEN);
+ memcpy(spec_eth_outer.eth.dst_mac_msk, &mac_mask, ETH_ALEN);
+
+ spec_vxlan.id = MLX4_NET_TRANS_RULE_ID_VXLAN; /* any vxlan header */
+ spec_eth_inner.id = MLX4_NET_TRANS_RULE_ID_ETH; /* any inner eth header */
+
+ list_add_tail(&spec_eth_outer.list, &rule.list);
+ list_add_tail(&spec_vxlan.list, &rule.list);
+ list_add_tail(&spec_eth_inner.list, &rule.list);
+
+ err = mlx4_flow_attach(priv->mdev->dev, &rule, reg_id);
+ if (err) {
+ en_err(priv, "failed to add vxlan steering rule, err %d\n", err);
+ return err;
+ }
+ en_dbg(DRV, priv, "added vxlan steering rule, mac %pM reg_id %llx\n", addr, *reg_id);
+ return 0;
+}
+
+
static int mlx4_en_uc_steer_add(struct mlx4_en_priv *priv,
unsigned char *mac, int *qpn, u64 *reg_id)
{
@@ -585,6 +632,11 @@ static int mlx4_en_get_qp(struct mlx4_en_priv *priv)
if (err)
goto steer_err;
+ err = mlx4_en_tunnel_steer_add(priv, priv->dev->dev_addr, *qpn,
+ &priv->tunnel_reg_id);
+ if (err)
+ goto tunnel_err;
+
entry = kmalloc(sizeof(*entry), GFP_KERNEL);
if (!entry) {
err = -ENOMEM;
@@ -599,6 +651,9 @@ static int mlx4_en_get_qp(struct mlx4_en_priv *priv)
return 0;
alloc_err:
+ if (priv->tunnel_reg_id)
+ mlx4_flow_detach(priv->mdev->dev, priv->tunnel_reg_id);
+tunnel_err:
mlx4_en_uc_steer_release(priv, priv->dev->dev_addr, *qpn, reg_id);
steer_err:
@@ -642,6 +697,11 @@ static void mlx4_en_put_qp(struct mlx4_en_priv *priv)
}
}
+ if (priv->tunnel_reg_id) {
+ mlx4_flow_detach(priv->mdev->dev, priv->tunnel_reg_id);
+ priv->tunnel_reg_id = 0;
+ }
+
en_dbg(DRV, priv, "Releasing qp: port %d, qpn %d\n",
priv->port, qpn);
mlx4_qp_release_range(dev, qpn, 1);
@@ -782,7 +842,7 @@ static void update_mclist_flags(struct mlx4_en_priv *priv,
list_for_each_entry(dst_tmp, dst, list) {
found = false;
list_for_each_entry(src_tmp, src, list) {
- if (!memcmp(dst_tmp->addr, src_tmp->addr, ETH_ALEN)) {
+ if (ether_addr_equal(dst_tmp->addr, src_tmp->addr)) {
found = true;
break;
}
@@ -797,7 +857,7 @@ static void update_mclist_flags(struct mlx4_en_priv *priv,
list_for_each_entry(src_tmp, src, list) {
found = false;
list_for_each_entry(dst_tmp, dst, list) {
- if (!memcmp(dst_tmp->addr, src_tmp->addr, ETH_ALEN)) {
+ if (ether_addr_equal(dst_tmp->addr, src_tmp->addr)) {
dst_tmp->action = MCLIST_NONE;
found = true;
break;
@@ -1044,6 +1104,12 @@ static void mlx4_en_do_multicast(struct mlx4_en_priv *priv,
if (err)
en_err(priv, "Fail to detach multicast address\n");
+ if (mclist->tunnel_reg_id) {
+ err = mlx4_flow_detach(priv->mdev->dev, mclist->tunnel_reg_id);
+ if (err)
+ en_err(priv, "Failed to detach multicast address\n");
+ }
+
/* remove from list */
list_del(&mclist->list);
kfree(mclist);
@@ -1061,6 +1127,10 @@ static void mlx4_en_do_multicast(struct mlx4_en_priv *priv,
if (err)
en_err(priv, "Fail to attach multicast address\n");
+ err = mlx4_en_tunnel_steer_add(priv, &mc_list[10], priv->base_qpn,
+ &mclist->tunnel_reg_id);
+ if (err)
+ en_err(priv, "Failed to attach multicast address\n");
}
}
}
@@ -1598,6 +1668,15 @@ int mlx4_en_start_port(struct net_device *dev)
goto tx_err;
}
+ if (mdev->dev->caps.tunnel_offload_mode == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) {
+ err = mlx4_SET_PORT_VXLAN(mdev->dev, priv->port, VXLAN_STEER_BY_OUTER_MAC);
+ if (err) {
+ en_err(priv, "Failed setting port L2 tunnel configuration, err %d\n",
+ err);
+ goto tx_err;
+ }
+ }
+
/* Init port */
en_dbg(HW, priv, "Initializing port\n");
err = mlx4_INIT_PORT(mdev->dev, priv->port);
@@ -1910,8 +1989,10 @@ int mlx4_en_alloc_resources(struct mlx4_en_priv *priv)
prof->tx_ring_size, i, TX, node))
goto err;
- if (mlx4_en_create_tx_ring(priv, &priv->tx_ring[i], priv->base_tx_qpn + i,
- prof->tx_ring_size, TXBB_SIZE, node))
+ if (mlx4_en_create_tx_ring(priv, &priv->tx_ring[i],
+ priv->base_tx_qpn + i,
+ prof->tx_ring_size, TXBB_SIZE,
+ node, i))
goto err;
}
@@ -2025,7 +2106,7 @@ static int mlx4_en_change_mtu(struct net_device *dev, int new_mtu)
return 0;
}
-static int mlx4_en_hwtstamp_ioctl(struct net_device *dev, struct ifreq *ifr)
+static int mlx4_en_hwtstamp_set(struct net_device *dev, struct ifreq *ifr)
{
struct mlx4_en_priv *priv = netdev_priv(dev);
struct mlx4_en_dev *mdev = priv->mdev;
@@ -2084,11 +2165,21 @@ static int mlx4_en_hwtstamp_ioctl(struct net_device *dev, struct ifreq *ifr)
sizeof(config)) ? -EFAULT : 0;
}
+static int mlx4_en_hwtstamp_get(struct net_device *dev, struct ifreq *ifr)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+
+ return copy_to_user(ifr->ifr_data, &priv->hwtstamp_config,
+ sizeof(priv->hwtstamp_config)) ? -EFAULT : 0;
+}
+
static int mlx4_en_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
{
switch (cmd) {
case SIOCSHWTSTAMP:
- return mlx4_en_hwtstamp_ioctl(dev, ifr);
+ return mlx4_en_hwtstamp_set(dev, ifr);
+ case SIOCGHWTSTAMP:
+ return mlx4_en_hwtstamp_get(dev, ifr);
default:
return -EOPNOTSUPP;
}
@@ -2154,6 +2245,27 @@ static int mlx4_en_set_vf_link_state(struct net_device *dev, int vf, int link_st
return mlx4_set_vf_link_state(mdev->dev, en_priv->port, vf, link_state);
}
+
+#define PORT_ID_BYTE_LEN 8
+static int mlx4_en_get_phys_port_id(struct net_device *dev,
+ struct netdev_phys_port_id *ppid)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ struct mlx4_dev *mdev = priv->mdev->dev;
+ int i;
+ u64 phys_port_id = mdev->caps.phys_port_id[priv->port];
+
+ if (!phys_port_id)
+ return -EOPNOTSUPP;
+
+ ppid->id_len = sizeof(phys_port_id);
+ for (i = PORT_ID_BYTE_LEN - 1; i >= 0; --i) {
+ ppid->id[i] = phys_port_id & 0xff;
+ phys_port_id >>= 8;
+ }
+ return 0;
+}
+
static const struct net_device_ops mlx4_netdev_ops = {
.ndo_open = mlx4_en_open,
.ndo_stop = mlx4_en_close,
@@ -2179,6 +2291,7 @@ static const struct net_device_ops mlx4_netdev_ops = {
#ifdef CONFIG_NET_RX_BUSY_POLL
.ndo_busy_poll = mlx4_en_low_latency_recv,
#endif
+ .ndo_get_phys_port_id = mlx4_en_get_phys_port_id,
};
static const struct net_device_ops mlx4_netdev_ops_master = {
@@ -2207,6 +2320,7 @@ static const struct net_device_ops mlx4_netdev_ops_master = {
#ifdef CONFIG_RFS_ACCEL
.ndo_rx_flow_steer = mlx4_en_filter_rfs,
#endif
+ .ndo_get_phys_port_id = mlx4_en_get_phys_port_id,
};
int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
@@ -2365,6 +2479,13 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
if (mdev->dev->caps.steering_mode != MLX4_STEERING_MODE_A0)
dev->priv_flags |= IFF_UNICAST_FLT;
+ if (mdev->dev->caps.tunnel_offload_mode == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) {
+ dev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_RXCSUM |
+ NETIF_F_TSO | NETIF_F_GSO_UDP_TUNNEL;
+ dev->hw_features |= NETIF_F_GSO_UDP_TUNNEL;
+ dev->features |= NETIF_F_GSO_UDP_TUNNEL;
+ }
+
mdev->pndev[port] = dev;
netif_carrier_off(dev);
@@ -2394,6 +2515,15 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
goto out;
}
+ if (mdev->dev->caps.tunnel_offload_mode == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) {
+ err = mlx4_SET_PORT_VXLAN(mdev->dev, priv->port, VXLAN_STEER_BY_OUTER_MAC);
+ if (err) {
+ en_err(priv, "Failed setting port L2 tunnel configuration, err %d\n",
+ err);
+ goto out;
+ }
+ }
+
/* Init port */
en_warn(priv, "Initializing port\n");
err = mlx4_INIT_PORT(mdev->dev, priv->port);
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_resources.c b/drivers/net/ethernet/mellanox/mlx4/en_resources.c
index d3f5086..f1a5500 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_resources.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_resources.c
@@ -68,6 +68,12 @@ void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride,
context->db_rec_addr = cpu_to_be64(priv->res.db.dma << 2);
if (!(dev->features & NETIF_F_HW_VLAN_CTAG_RX))
context->param3 |= cpu_to_be32(1 << 30);
+
+ if (!is_tx && !rss &&
+ (mdev->dev->caps.tunnel_offload_mode == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN)) {
+ en_dbg(HW, priv, "Setting RX qp %x tunnel mode to RX tunneled & non-tunneled\n", qpn);
+ context->srqn = cpu_to_be32(7 << 28); /* this fills bits 30:28 */
+ }
}
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index 07a1d0f..890922c 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -631,6 +631,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
int ip_summed;
int factor = priv->cqe_factor;
u64 timestamp;
+ bool l2_tunnel;
if (!priv->port_up)
return 0;
@@ -709,6 +710,8 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
length -= ring->fcs_del;
ring->bytes += length;
ring->packets++;
+ l2_tunnel = (dev->hw_enc_features & NETIF_F_RXCSUM) &&
+ (cqe->vlan_my_qpn & cpu_to_be32(MLX4_CQE_L2_TUNNEL));
if (likely(dev->features & NETIF_F_RXCSUM)) {
if ((cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPOK)) &&
@@ -721,7 +724,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
* - not an IP fragment
* - no LLS polling in progress
*/
- if (!mlx4_en_cq_ll_polling(cq) &&
+ if (!mlx4_en_cq_busy_polling(cq) &&
(dev->features & NETIF_F_GRO)) {
struct sk_buff *gro_skb = napi_get_frags(&cq->napi);
if (!gro_skb)
@@ -738,6 +741,8 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
gro_skb->data_len = length;
gro_skb->ip_summed = CHECKSUM_UNNECESSARY;
+ if (l2_tunnel)
+ gro_skb->encapsulation = 1;
if ((cqe->vlan_my_qpn &
cpu_to_be32(MLX4_CQE_VLAN_PRESENT_MASK)) &&
(dev->features & NETIF_F_HW_VLAN_CTAG_RX)) {
@@ -747,7 +752,9 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
}
if (dev->features & NETIF_F_RXHASH)
- gro_skb->rxhash = be32_to_cpu(cqe->immed_rss_invalid);
+ skb_set_hash(gro_skb,
+ be32_to_cpu(cqe->immed_rss_invalid),
+ PKT_HASH_TYPE_L3);
skb_record_rx_queue(gro_skb, cq->ring);
@@ -788,8 +795,13 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
skb->protocol = eth_type_trans(skb, dev);
skb_record_rx_queue(skb, cq->ring);
+ if (l2_tunnel)
+ skb->encapsulation = 1;
+
if (dev->features & NETIF_F_RXHASH)
- skb->rxhash = be32_to_cpu(cqe->immed_rss_invalid);
+ skb_set_hash(skb,
+ be32_to_cpu(cqe->immed_rss_invalid),
+ PKT_HASH_TYPE_L3);
if ((be32_to_cpu(cqe->vlan_my_qpn) &
MLX4_CQE_VLAN_PRESENT_MASK) &&
@@ -804,8 +816,10 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
skb_mark_napi_id(skb, &cq->napi);
- /* Push it up the stack */
- netif_receive_skb(skb);
+ if (!mlx4_en_cq_busy_polling(cq))
+ napi_gro_receive(&cq->napi, skb);
+ else
+ netif_receive_skb(skb);
next:
for (nr = 0; nr < priv->num_frags; nr++)
@@ -1053,6 +1067,12 @@ int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv)
rss_mask |= MLX4_RSS_UDP_IPV4 | MLX4_RSS_UDP_IPV6;
rss_context->base_qpn_udp = rss_context->default_qpn;
}
+
+ if (mdev->dev->caps.tunnel_offload_mode == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) {
+ en_info(priv, "Setting RSS context tunnel type to RSS on inner headers\n");
+ rss_mask |= MLX4_RSS_BY_INNER_HEADERS;
+ }
+
rss_context->flags = rss_mask;
rss_context->hash_fn = MLX4_RSS_HASH_TOP;
for (i = 0; i < 10; i++)
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index a7fcd59..8e8a7eb 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -39,6 +39,7 @@
#include <linux/if_vlan.h>
#include <linux/vmalloc.h>
#include <linux/tcp.h>
+#include <linux/ip.h>
#include <linux/moduleparam.h>
#include "mlx4_en.h"
@@ -55,7 +56,7 @@ MODULE_PARM_DESC(inline_thold, "threshold for using inline data");
int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
struct mlx4_en_tx_ring **pring, int qpn, u32 size,
- u16 stride, int node)
+ u16 stride, int node, int queue_index)
{
struct mlx4_en_dev *mdev = priv->mdev;
struct mlx4_en_tx_ring *ring;
@@ -140,6 +141,10 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
ring->bf_enabled = true;
ring->hwtstamp_tx_type = priv->hwtstamp_config.tx_type;
+ ring->queue_index = queue_index;
+
+ if (queue_index < priv->num_tx_rings_p_up && cpu_online(queue_index))
+ cpumask_set_cpu(queue_index, &ring->affinity_mask);
*pring = ring;
return 0;
@@ -206,6 +211,9 @@ int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv,
err = mlx4_qp_to_ready(mdev->dev, &ring->wqres.mtt, &ring->context,
&ring->qp, &ring->qp_state);
+ if (!user_prio && cpu_online(ring->queue_index))
+ netif_set_xps_queue(priv->dev, &ring->affinity_mask,
+ ring->queue_index);
return err;
}
@@ -317,7 +325,7 @@ static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
}
}
}
- dev_kfree_skb_any(skb);
+ dev_kfree_skb(skb);
return tx_info->nr_txbb;
}
@@ -354,7 +362,9 @@ int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring)
return cnt;
}
-static void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq)
+static int mlx4_en_process_tx_cq(struct net_device *dev,
+ struct mlx4_en_cq *cq,
+ int budget)
{
struct mlx4_en_priv *priv = netdev_priv(dev);
struct mlx4_cq *mcq = &cq->mcq;
@@ -372,9 +382,10 @@ static void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq)
u32 bytes = 0;
int factor = priv->cqe_factor;
u64 timestamp = 0;
+ int done = 0;
if (!priv->port_up)
- return;
+ return 0;
index = cons_index & size_mask;
cqe = &buf[(index << factor) + factor];
@@ -383,7 +394,7 @@ static void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq)
/* Process all completed CQEs */
while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK,
- cons_index & size)) {
+ cons_index & size) && (done < budget)) {
/*
* make sure we read the CQE after we read the
* ownership bit
@@ -421,7 +432,7 @@ static void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq)
txbbs_stamp = txbbs_skipped;
packets++;
bytes += ring->tx_info[ring_index].nr_bytes;
- } while (ring_index != new_index);
+ } while ((++done < budget) && (ring_index != new_index));
++cons_index;
index = cons_index & size_mask;
@@ -447,6 +458,7 @@ static void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq)
netif_tx_wake_queue(ring->tx_queue);
priv->port_stats.wake_queue++;
}
+ return done;
}
void mlx4_en_tx_irq(struct mlx4_cq *mcq)
@@ -454,10 +466,31 @@ void mlx4_en_tx_irq(struct mlx4_cq *mcq)
struct mlx4_en_cq *cq = container_of(mcq, struct mlx4_en_cq, mcq);
struct mlx4_en_priv *priv = netdev_priv(cq->dev);
- mlx4_en_process_tx_cq(cq->dev, cq);
- mlx4_en_arm_cq(priv, cq);
+ if (priv->port_up)
+ napi_schedule(&cq->napi);
+ else
+ mlx4_en_arm_cq(priv, cq);
}
+/* TX CQ polling - called by NAPI */
+int mlx4_en_poll_tx_cq(struct napi_struct *napi, int budget)
+{
+ struct mlx4_en_cq *cq = container_of(napi, struct mlx4_en_cq, napi);
+ struct net_device *dev = cq->dev;
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ int done;
+
+ done = mlx4_en_process_tx_cq(dev, cq, budget);
+
+ /* If we used up all the quota - we're probably not done yet... */
+ if (done < budget) {
+ /* Done for now */
+ napi_complete(napi);
+ mlx4_en_arm_cq(priv, cq);
+ return done;
+ }
+ return budget;
+}
static struct mlx4_en_tx_desc *mlx4_en_bounce_to_desc(struct mlx4_en_priv *priv,
struct mlx4_en_tx_ring *ring,
@@ -528,7 +561,10 @@ static int get_real_size(struct sk_buff *skb, struct net_device *dev,
int real_size;
if (skb_is_gso(skb)) {
- *lso_header_size = skb_transport_offset(skb) + tcp_hdrlen(skb);
+ if (skb->encapsulation)
+ *lso_header_size = (skb_inner_transport_header(skb) - skb->data) + inner_tcp_hdrlen(skb);
+ else
+ *lso_header_size = skb_transport_offset(skb) + tcp_hdrlen(skb);
real_size = CTRL_SIZE + skb_shinfo(skb)->nr_frags * DS_SIZE +
ALIGN(*lso_header_size + 4, DS_SIZE);
if (unlikely(*lso_header_size != skb_headlen(skb))) {
@@ -828,6 +864,14 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
tx_info->inl = 1;
}
+ if (skb->encapsulation) {
+ struct iphdr *ipv4 = (struct iphdr *)skb_inner_network_header(skb);
+ if (ipv4->protocol == IPPROTO_TCP || ipv4->protocol == IPPROTO_UDP)
+ op_own |= cpu_to_be32(MLX4_WQE_CTRL_IIP | MLX4_WQE_CTRL_ILP);
+ else
+ op_own |= cpu_to_be32(MLX4_WQE_CTRL_IIP);
+ }
+
ring->prod += nr_txbb;
/* If we used a bounce buffer then copy descriptor back into place */
diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c
index c9cdb2a..8992b38 100644
--- a/drivers/net/ethernet/mellanox/mlx4/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/eq.c
@@ -31,7 +31,6 @@
* SOFTWARE.
*/
-#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/slab.h>
#include <linux/export.h>
@@ -963,7 +962,7 @@ err_out_free_mtt:
mlx4_mtt_cleanup(dev, &eq->mtt);
err_out_free_eq:
- mlx4_bitmap_free(&priv->eq_table.bitmap, eq->eqn);
+ mlx4_bitmap_free(&priv->eq_table.bitmap, eq->eqn, MLX4_USE_RR);
err_out_free_pages:
for (i = 0; i < npages; ++i)
@@ -1018,7 +1017,7 @@ static void mlx4_free_eq(struct mlx4_dev *dev,
eq->page_list[i].map);
kfree(eq->page_list);
- mlx4_bitmap_free(&priv->eq_table.bitmap, eq->eqn);
+ mlx4_bitmap_free(&priv->eq_table.bitmap, eq->eqn, MLX4_USE_RR);
mlx4_free_cmd_mailbox(dev, mailbox);
}
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index 4bd2d80..91b69ff 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -134,7 +134,8 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags)
[5] = "Time stamping support",
[6] = "VST (control vlan insertion/stripping) support",
[7] = "FSM (MAC anti-spoofing) support",
- [8] = "Dynamic QP updates support"
+ [8] = "Dynamic QP updates support",
+ [9] = "TCP/IP offloads/flow-steering for VXLAN support"
};
int i;
@@ -207,25 +208,25 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave,
/* when opcode modifier = 1 */
#define QUERY_FUNC_CAP_PHYS_PORT_OFFSET 0x3
-#define QUERY_FUNC_CAP_RDMA_PROPS_OFFSET 0x8
-#define QUERY_FUNC_CAP_ETH_PROPS_OFFSET 0xc
+#define QUERY_FUNC_CAP_FLAGS0_OFFSET 0x8
+#define QUERY_FUNC_CAP_FLAGS1_OFFSET 0xc
#define QUERY_FUNC_CAP_QP0_TUNNEL 0x10
#define QUERY_FUNC_CAP_QP0_PROXY 0x14
#define QUERY_FUNC_CAP_QP1_TUNNEL 0x18
#define QUERY_FUNC_CAP_QP1_PROXY 0x1c
+#define QUERY_FUNC_CAP_PHYS_PORT_ID 0x28
-#define QUERY_FUNC_CAP_ETH_PROPS_FORCE_MAC 0x40
-#define QUERY_FUNC_CAP_ETH_PROPS_FORCE_VLAN 0x80
+#define QUERY_FUNC_CAP_FLAGS1_FORCE_MAC 0x40
+#define QUERY_FUNC_CAP_FLAGS1_FORCE_VLAN 0x80
+#define QUERY_FUNC_CAP_FLAGS1_NIC_INFO 0x10
-#define QUERY_FUNC_CAP_RDMA_PROPS_FORCE_PHY_WQE_GID 0x80
+#define QUERY_FUNC_CAP_FLAGS0_FORCE_PHY_WQE_GID 0x80
if (vhcr->op_modifier == 1) {
- field = 0;
- /* ensure force vlan and force mac bits are not set */
- MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_ETH_PROPS_OFFSET);
- /* ensure that phy_wqe_gid bit is not set */
- MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_RDMA_PROPS_OFFSET);
+ /* Set nic_info bit to mark new fields support */
+ field = QUERY_FUNC_CAP_FLAGS1_NIC_INFO;
+ MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_FLAGS1_OFFSET);
field = vhcr->in_modifier; /* phys-port = logical-port */
MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_PHYS_PORT_OFFSET);
@@ -243,6 +244,9 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave,
size += 2;
MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP1_PROXY);
+ MLX4_PUT(outbox->buf, dev->caps.phys_port_id[vhcr->in_modifier],
+ QUERY_FUNC_CAP_PHYS_PORT_ID);
+
} else if (vhcr->op_modifier == 0) {
/* enable rdma and ethernet interfaces, and new quota locations */
field = (QUERY_FUNC_CAP_FLAG_ETH | QUERY_FUNC_CAP_FLAG_RDMA |
@@ -391,22 +395,22 @@ int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u32 gen_or_port,
goto out;
}
+ MLX4_GET(func_cap->flags1, outbox, QUERY_FUNC_CAP_FLAGS1_OFFSET);
if (dev->caps.port_type[gen_or_port] == MLX4_PORT_TYPE_ETH) {
- MLX4_GET(field, outbox, QUERY_FUNC_CAP_ETH_PROPS_OFFSET);
- if (field & QUERY_FUNC_CAP_ETH_PROPS_FORCE_VLAN) {
+ if (func_cap->flags1 & QUERY_FUNC_CAP_FLAGS1_OFFSET) {
mlx4_err(dev, "VLAN is enforced on this port\n");
err = -EPROTONOSUPPORT;
goto out;
}
- if (field & QUERY_FUNC_CAP_ETH_PROPS_FORCE_MAC) {
+ if (func_cap->flags1 & QUERY_FUNC_CAP_FLAGS1_FORCE_MAC) {
mlx4_err(dev, "Force mac is enabled on this port\n");
err = -EPROTONOSUPPORT;
goto out;
}
} else if (dev->caps.port_type[gen_or_port] == MLX4_PORT_TYPE_IB) {
- MLX4_GET(field, outbox, QUERY_FUNC_CAP_RDMA_PROPS_OFFSET);
- if (field & QUERY_FUNC_CAP_RDMA_PROPS_FORCE_PHY_WQE_GID) {
+ MLX4_GET(field, outbox, QUERY_FUNC_CAP_FLAGS0_OFFSET);
+ if (field & QUERY_FUNC_CAP_FLAGS0_FORCE_PHY_WQE_GID) {
mlx4_err(dev, "phy_wqe_gid is "
"enforced on this ib port\n");
err = -EPROTONOSUPPORT;
@@ -433,6 +437,10 @@ int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u32 gen_or_port,
MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP1_PROXY);
func_cap->qp1_proxy_qpn = size & 0xFFFFFF;
+ if (func_cap->flags1 & QUERY_FUNC_CAP_FLAGS1_NIC_INFO)
+ MLX4_GET(func_cap->phys_port_id, outbox,
+ QUERY_FUNC_CAP_PHYS_PORT_ID);
+
/* All other resources are allocated by the master, but we still report
* 'num' and 'reserved' capabilities as follows:
* - num remains the maximum resource index
@@ -530,6 +538,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
#define QUERY_DEV_CAP_RSVD_LKEY_OFFSET 0x98
#define QUERY_DEV_CAP_MAX_ICM_SZ_OFFSET 0xa0
#define QUERY_DEV_CAP_FW_REASSIGN_MAC 0x9d
+#define QUERY_DEV_CAP_VXLAN 0x9e
dev_cap->flags2 = 0;
mailbox = mlx4_alloc_cmd_mailbox(dev);
@@ -698,6 +707,9 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
MLX4_GET(field, outbox, QUERY_DEV_CAP_FW_REASSIGN_MAC);
if (field & 1<<6)
dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_REASSIGN_MAC_EN;
+ MLX4_GET(field, outbox, QUERY_DEV_CAP_VXLAN);
+ if (field & 1<<3)
+ dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_VXLAN_OFFLOADS;
MLX4_GET(dev_cap->max_icm_sz, outbox,
QUERY_DEV_CAP_MAX_ICM_SZ_OFFSET);
if (dev_cap->flags & MLX4_DEV_CAP_FLAG_COUNTERS)
@@ -846,6 +858,11 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave,
field &= 0x7f;
MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_CQ_TS_SUPPORT_OFFSET);
+ /* For guests, disable vxlan tunneling */
+ MLX4_GET(field, outbox, QUERY_DEV_CAP_VXLAN);
+ field &= 0xf7;
+ MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_VXLAN);
+
/* For guests, report Blueflame disabled */
MLX4_GET(field, outbox->buf, QUERY_DEV_CAP_BF_OFFSET);
field &= 0x7f;
@@ -1277,6 +1294,7 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param)
#define INIT_HCA_IN_SIZE 0x200
#define INIT_HCA_VERSION_OFFSET 0x000
#define INIT_HCA_VERSION 2
+#define INIT_HCA_VXLAN_OFFSET 0x0c
#define INIT_HCA_CACHELINE_SZ_OFFSET 0x0e
#define INIT_HCA_FLAGS_OFFSET 0x014
#define INIT_HCA_QPC_OFFSET 0x020
@@ -1435,6 +1453,12 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param)
MLX4_PUT(inbox, param->uar_page_sz, INIT_HCA_UAR_PAGE_SZ_OFFSET);
MLX4_PUT(inbox, param->log_uar_sz, INIT_HCA_LOG_UAR_SZ_OFFSET);
+ /* set parser VXLAN attributes */
+ if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_VXLAN_OFFLOADS) {
+ u8 parser_params = 0;
+ MLX4_PUT(inbox, parser_params, INIT_HCA_VXLAN_OFFSET);
+ }
+
err = mlx4_cmd(dev, mailbox->dma, 0, 0, MLX4_CMD_INIT_HCA, 10000,
MLX4_CMD_NATIVE);
@@ -1723,6 +1747,43 @@ int mlx4_NOP(struct mlx4_dev *dev)
return mlx4_cmd(dev, 0, 0x1f, 0, MLX4_CMD_NOP, 100, MLX4_CMD_NATIVE);
}
+int mlx4_get_phys_port_id(struct mlx4_dev *dev)
+{
+ u8 port;
+ u32 *outbox;
+ struct mlx4_cmd_mailbox *mailbox;
+ u32 in_mod;
+ u32 guid_hi, guid_lo;
+ int err, ret = 0;
+#define MOD_STAT_CFG_PORT_OFFSET 8
+#define MOD_STAT_CFG_GUID_H 0X14
+#define MOD_STAT_CFG_GUID_L 0X1c
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ outbox = mailbox->buf;
+
+ for (port = 1; port <= dev->caps.num_ports; port++) {
+ in_mod = port << MOD_STAT_CFG_PORT_OFFSET;
+ err = mlx4_cmd_box(dev, 0, mailbox->dma, in_mod, 0x2,
+ MLX4_CMD_MOD_STAT_CFG, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
+ if (err) {
+ mlx4_err(dev, "Fail to get port %d uplink guid\n",
+ port);
+ ret = err;
+ } else {
+ MLX4_GET(guid_hi, outbox, MOD_STAT_CFG_GUID_H);
+ MLX4_GET(guid_lo, outbox, MOD_STAT_CFG_GUID_L);
+ dev->caps.phys_port_id[port] = (u64)guid_lo |
+ (u64)guid_hi << 32;
+ }
+ }
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return ret;
+}
+
#define MLX4_WOL_SETUP_MODE (5 << 28)
int mlx4_wol_read(struct mlx4_dev *dev, u64 *config, int port)
{
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h
index a0a368b..6811ee0 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.h
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.h
@@ -140,6 +140,8 @@ struct mlx4_func_cap {
u32 qp1_proxy_qpn;
u8 physical_port;
u8 port_flags;
+ u8 flags1;
+ u64 phys_port_id;
};
struct mlx4_adapter {
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 01fc651..d711158 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -96,10 +96,10 @@ MODULE_PARM_DESC(log_num_mgm_entry_size, "log mgm size, that defines the num"
" To activate device managed"
" flow steering when available, set to -1");
-static bool enable_64b_cqe_eqe;
+static bool enable_64b_cqe_eqe = true;
module_param(enable_64b_cqe_eqe, bool, 0444);
MODULE_PARM_DESC(enable_64b_cqe_eqe,
- "Enable 64 byte CQEs/EQEs when the FW supports this");
+ "Enable 64 byte CQEs/EQEs when the FW supports this (default: True)");
#define HCA_GLOBAL_CAP_MASK 0
@@ -388,6 +388,84 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
return 0;
}
+
+static int mlx4_get_pcie_dev_link_caps(struct mlx4_dev *dev,
+ enum pci_bus_speed *speed,
+ enum pcie_link_width *width)
+{
+ u32 lnkcap1, lnkcap2;
+ int err1, err2;
+
+#define PCIE_MLW_CAP_SHIFT 4 /* start of MLW mask in link capabilities */
+
+ *speed = PCI_SPEED_UNKNOWN;
+ *width = PCIE_LNK_WIDTH_UNKNOWN;
+
+ err1 = pcie_capability_read_dword(dev->pdev, PCI_EXP_LNKCAP, &lnkcap1);
+ err2 = pcie_capability_read_dword(dev->pdev, PCI_EXP_LNKCAP2, &lnkcap2);
+ if (!err2 && lnkcap2) { /* PCIe r3.0-compliant */
+ if (lnkcap2 & PCI_EXP_LNKCAP2_SLS_8_0GB)
+ *speed = PCIE_SPEED_8_0GT;
+ else if (lnkcap2 & PCI_EXP_LNKCAP2_SLS_5_0GB)
+ *speed = PCIE_SPEED_5_0GT;
+ else if (lnkcap2 & PCI_EXP_LNKCAP2_SLS_2_5GB)
+ *speed = PCIE_SPEED_2_5GT;
+ }
+ if (!err1) {
+ *width = (lnkcap1 & PCI_EXP_LNKCAP_MLW) >> PCIE_MLW_CAP_SHIFT;
+ if (!lnkcap2) { /* pre-r3.0 */
+ if (lnkcap1 & PCI_EXP_LNKCAP_SLS_5_0GB)
+ *speed = PCIE_SPEED_5_0GT;
+ else if (lnkcap1 & PCI_EXP_LNKCAP_SLS_2_5GB)
+ *speed = PCIE_SPEED_2_5GT;
+ }
+ }
+
+ if (*speed == PCI_SPEED_UNKNOWN || *width == PCIE_LNK_WIDTH_UNKNOWN) {
+ return err1 ? err1 :
+ err2 ? err2 : -EINVAL;
+ }
+ return 0;
+}
+
+static void mlx4_check_pcie_caps(struct mlx4_dev *dev)
+{
+ enum pcie_link_width width, width_cap;
+ enum pci_bus_speed speed, speed_cap;
+ int err;
+
+#define PCIE_SPEED_STR(speed) \
+ (speed == PCIE_SPEED_8_0GT ? "8.0GT/s" : \
+ speed == PCIE_SPEED_5_0GT ? "5.0GT/s" : \
+ speed == PCIE_SPEED_2_5GT ? "2.5GT/s" : \
+ "Unknown")
+
+ err = mlx4_get_pcie_dev_link_caps(dev, &speed_cap, &width_cap);
+ if (err) {
+ mlx4_warn(dev,
+ "Unable to determine PCIe device BW capabilities\n");
+ return;
+ }
+
+ err = pcie_get_minimum_link(dev->pdev, &speed, &width);
+ if (err || speed == PCI_SPEED_UNKNOWN ||
+ width == PCIE_LNK_WIDTH_UNKNOWN) {
+ mlx4_warn(dev,
+ "Unable to determine PCI device chain minimum BW\n");
+ return;
+ }
+
+ if (width != width_cap || speed != speed_cap)
+ mlx4_warn(dev,
+ "PCIe BW is different than device's capability\n");
+
+ mlx4_info(dev, "PCIe link speed is %s, device supports %s\n",
+ PCIE_SPEED_STR(speed), PCIE_SPEED_STR(speed_cap));
+ mlx4_info(dev, "PCIe link width is x%d, device supports x%d\n",
+ width, width_cap);
+ return;
+}
+
/*The function checks if there are live vf, return the num of them*/
static int mlx4_how_many_lives_vf(struct mlx4_dev *dev)
{
@@ -606,6 +684,7 @@ static int mlx4_slave_cap(struct mlx4_dev *dev)
dev->caps.qp1_tunnel[i - 1] = func_cap.qp1_tunnel_qpn;
dev->caps.qp1_proxy[i - 1] = func_cap.qp1_proxy_qpn;
dev->caps.port_mask[i] = dev->caps.port_type[i];
+ dev->caps.phys_port_id[i] = func_cap.phys_port_id;
if (mlx4_get_slave_pkey_gid_tbl_len(dev, i,
&dev->caps.gid_table_len[i],
&dev->caps.pkey_table_len[i]))
@@ -1443,6 +1522,19 @@ static void choose_steering_mode(struct mlx4_dev *dev,
mlx4_log_num_mgm_entry_size);
}
+static void choose_tunnel_offload_mode(struct mlx4_dev *dev,
+ struct mlx4_dev_cap *dev_cap)
+{
+ if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED &&
+ dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_VXLAN_OFFLOADS)
+ dev->caps.tunnel_offload_mode = MLX4_TUNNEL_OFFLOAD_MODE_VXLAN;
+ else
+ dev->caps.tunnel_offload_mode = MLX4_TUNNEL_OFFLOAD_MODE_NONE;
+
+ mlx4_dbg(dev, "Tunneling offload mode is: %s\n", (dev->caps.tunnel_offload_mode
+ == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) ? "vxlan" : "none");
+}
+
static int mlx4_init_hca(struct mlx4_dev *dev)
{
struct mlx4_priv *priv = mlx4_priv(dev);
@@ -1483,6 +1575,11 @@ static int mlx4_init_hca(struct mlx4_dev *dev)
}
choose_steering_mode(dev, &dev_cap);
+ choose_tunnel_offload_mode(dev, &dev_cap);
+
+ err = mlx4_get_phys_port_id(dev);
+ if (err)
+ mlx4_err(dev, "Fail to get physical port id\n");
if (mlx4_is_master(dev))
mlx4_parav_master_pf_caps(dev);
@@ -1654,7 +1751,7 @@ EXPORT_SYMBOL_GPL(mlx4_counter_alloc);
void __mlx4_counter_free(struct mlx4_dev *dev, u32 idx)
{
- mlx4_bitmap_free(&mlx4_priv(dev)->counters_bitmap, idx);
+ mlx4_bitmap_free(&mlx4_priv(dev)->counters_bitmap, idx, MLX4_USE_RR);
return;
}
@@ -2287,6 +2384,12 @@ slave_start:
goto err_mfunc;
}
+ /* check if the device is functioning at its maximum possible speed.
+ * No return code for this call, just warn the user in case of PCI
+ * express device capabilities are under-satisfied by the bus.
+ */
+ mlx4_check_pcie_caps(dev);
+
/* In master functions, the communication channel must be initialized
* after obtaining its address from fw */
if (mlx4_is_master(dev)) {
diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c
index 34dffcf..db7dc0b 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mcg.c
+++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c
@@ -125,9 +125,14 @@ static struct mlx4_promisc_qp *get_promisc_qp(struct mlx4_dev *dev, u8 port,
enum mlx4_steer_type steer,
u32 qpn)
{
- struct mlx4_steer *s_steer = &mlx4_priv(dev)->steer[port - 1];
+ struct mlx4_steer *s_steer;
struct mlx4_promisc_qp *pqp;
+ if (port < 1 || port > dev->caps.num_ports)
+ return NULL;
+
+ s_steer = &mlx4_priv(dev)->steer[port - 1];
+
list_for_each_entry(pqp, &s_steer->promisc_qps[steer], list) {
if (pqp->qpn == qpn)
return pqp;
@@ -154,6 +159,9 @@ static int new_steering_entry(struct mlx4_dev *dev, u8 port,
u32 prot;
int err;
+ if (port < 1 || port > dev->caps.num_ports)
+ return -EINVAL;
+
s_steer = &mlx4_priv(dev)->steer[port - 1];
new_entry = kzalloc(sizeof *new_entry, GFP_KERNEL);
if (!new_entry)
@@ -238,6 +246,9 @@ static int existing_steering_entry(struct mlx4_dev *dev, u8 port,
struct mlx4_promisc_qp *pqp;
struct mlx4_promisc_qp *dqp;
+ if (port < 1 || port > dev->caps.num_ports)
+ return -EINVAL;
+
s_steer = &mlx4_priv(dev)->steer[port - 1];
pqp = get_promisc_qp(dev, port, steer, qpn);
@@ -283,6 +294,9 @@ static bool check_duplicate_entry(struct mlx4_dev *dev, u8 port,
struct mlx4_steer_index *tmp_entry, *entry = NULL;
struct mlx4_promisc_qp *dqp, *tmp_dqp;
+ if (port < 1 || port > dev->caps.num_ports)
+ return NULL;
+
s_steer = &mlx4_priv(dev)->steer[port - 1];
/* if qp is not promisc, it cannot be duplicated */
@@ -324,6 +338,9 @@ static bool can_remove_steering_entry(struct mlx4_dev *dev, u8 port,
bool ret = false;
int i;
+ if (port < 1 || port > dev->caps.num_ports)
+ return NULL;
+
s_steer = &mlx4_priv(dev)->steer[port - 1];
mailbox = mlx4_alloc_cmd_mailbox(dev);
@@ -378,6 +395,9 @@ static int add_promisc_qp(struct mlx4_dev *dev, u8 port,
int err;
struct mlx4_priv *priv = mlx4_priv(dev);
+ if (port < 1 || port > dev->caps.num_ports)
+ return -EINVAL;
+
s_steer = &mlx4_priv(dev)->steer[port - 1];
mutex_lock(&priv->mcg_table.mutex);
@@ -484,6 +504,9 @@ static int remove_promisc_qp(struct mlx4_dev *dev, u8 port,
int loc, i;
int err;
+ if (port < 1 || port > dev->caps.num_ports)
+ return -EINVAL;
+
s_steer = &mlx4_priv(dev)->steer[port - 1];
mutex_lock(&priv->mcg_table.mutex);
@@ -674,7 +697,8 @@ const u16 __sw_id_hw[] = {
[MLX4_NET_TRANS_RULE_ID_IPV6] = 0xE003,
[MLX4_NET_TRANS_RULE_ID_IPV4] = 0xE002,
[MLX4_NET_TRANS_RULE_ID_TCP] = 0xE004,
- [MLX4_NET_TRANS_RULE_ID_UDP] = 0xE006
+ [MLX4_NET_TRANS_RULE_ID_UDP] = 0xE006,
+ [MLX4_NET_TRANS_RULE_ID_VXLAN] = 0xE008
};
int mlx4_map_sw_to_hw_steering_id(struct mlx4_dev *dev,
@@ -699,7 +723,9 @@ static const int __rule_hw_sz[] = {
[MLX4_NET_TRANS_RULE_ID_TCP] =
sizeof(struct mlx4_net_trans_rule_hw_tcp_udp),
[MLX4_NET_TRANS_RULE_ID_UDP] =
- sizeof(struct mlx4_net_trans_rule_hw_tcp_udp)
+ sizeof(struct mlx4_net_trans_rule_hw_tcp_udp),
+ [MLX4_NET_TRANS_RULE_ID_VXLAN] =
+ sizeof(struct mlx4_net_trans_rule_hw_vxlan)
};
int mlx4_hw_rule_sz(struct mlx4_dev *dev,
@@ -764,6 +790,13 @@ static int parse_trans_rule(struct mlx4_dev *dev, struct mlx4_spec_list *spec,
rule_hw->tcp_udp.src_port_msk = spec->tcp_udp.src_port_msk;
break;
+ case MLX4_NET_TRANS_RULE_ID_VXLAN:
+ rule_hw->vxlan.vni =
+ cpu_to_be32(be32_to_cpu(spec->vxlan.vni) << 8);
+ rule_hw->vxlan.vni_mask =
+ cpu_to_be32(be32_to_cpu(spec->vxlan.vni_mask) << 8);
+ break;
+
default:
return -EINVAL;
}
@@ -1013,7 +1046,7 @@ out:
index, dev->caps.num_mgms);
else
mlx4_bitmap_free(&priv->mcg_table.bitmap,
- index - dev->caps.num_mgms);
+ index - dev->caps.num_mgms, MLX4_USE_RR);
}
mutex_unlock(&priv->mcg_table.mutex);
@@ -1104,7 +1137,7 @@ int mlx4_qp_detach_common(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
index, amgm_index, dev->caps.num_mgms);
else
mlx4_bitmap_free(&priv->mcg_table.bitmap,
- amgm_index - dev->caps.num_mgms);
+ amgm_index - dev->caps.num_mgms, MLX4_USE_RR);
}
} else {
/* Remove entry from AMGM */
@@ -1124,7 +1157,7 @@ int mlx4_qp_detach_common(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
prev, index, dev->caps.num_mgms);
else
mlx4_bitmap_free(&priv->mcg_table.bitmap,
- index - dev->caps.num_mgms);
+ index - dev->caps.num_mgms, MLX4_USE_RR);
}
out:
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index 59f67f9..6b65f77 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -783,6 +783,11 @@ enum {
MLX4_PCI_DEV_FORCE_SENSE_PORT = 1 << 1,
};
+enum {
+ MLX4_NO_RR = 0,
+ MLX4_USE_RR = 1,
+};
+
struct mlx4_priv {
struct mlx4_dev dev;
@@ -844,9 +849,10 @@ static inline struct mlx4_priv *mlx4_priv(struct mlx4_dev *dev)
extern struct workqueue_struct *mlx4_wq;
u32 mlx4_bitmap_alloc(struct mlx4_bitmap *bitmap);
-void mlx4_bitmap_free(struct mlx4_bitmap *bitmap, u32 obj);
+void mlx4_bitmap_free(struct mlx4_bitmap *bitmap, u32 obj, int use_rr);
u32 mlx4_bitmap_alloc_range(struct mlx4_bitmap *bitmap, int cnt, int align);
-void mlx4_bitmap_free_range(struct mlx4_bitmap *bitmap, u32 obj, int cnt);
+void mlx4_bitmap_free_range(struct mlx4_bitmap *bitmap, u32 obj, int cnt,
+ int use_rr);
u32 mlx4_bitmap_avail(struct mlx4_bitmap *bitmap);
int mlx4_bitmap_init(struct mlx4_bitmap *bitmap, u32 num, u32 mask,
u32 reserved_bot, u32 resetrved_top);
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index d5758ad..3af04c3 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -45,6 +45,7 @@
#include <linux/dcbnl.h>
#endif
#include <linux/cpu_rmap.h>
+#include <linux/ptp_clock_kernel.h>
#include <linux/mlx4/device.h>
#include <linux/mlx4/qp.h>
@@ -255,6 +256,8 @@ struct mlx4_en_tx_ring {
u16 poll_cnt;
struct mlx4_en_tx_info *tx_info;
u8 *bounce_buf;
+ u8 queue_index;
+ cpumask_t affinity_mask;
u32 last_nr_txbb;
struct mlx4_qp qp;
struct mlx4_qp_context context;
@@ -373,10 +376,14 @@ struct mlx4_en_dev {
u32 priv_pdn;
spinlock_t uar_lock;
u8 mac_removed[MLX4_MAX_PORTS + 1];
+ rwlock_t clock_lock;
+ u32 nominal_c_mult;
struct cyclecounter cycles;
struct timecounter clock;
unsigned long last_overflow_check;
unsigned long overflow_period;
+ struct ptp_clock *ptp_clock;
+ struct ptp_clock_info ptp_clock_info;
};
@@ -434,6 +441,7 @@ struct mlx4_en_mc_list {
enum mlx4_en_mclist_act action;
u8 addr[ETH_ALEN];
u64 reg_id;
+ u64 tunnel_reg_id;
};
struct mlx4_en_frag_info {
@@ -565,7 +573,7 @@ struct mlx4_en_priv {
struct list_head filters;
struct hlist_head filter_hash[1 << MLX4_EN_FILTER_HASH_SHIFT];
#endif
-
+ u64 tunnel_reg_id;
};
enum mlx4_en_wol {
@@ -653,7 +661,7 @@ static inline bool mlx4_en_cq_unlock_poll(struct mlx4_en_cq *cq)
}
/* true if a socket is polling, even if it did not get the lock */
-static inline bool mlx4_en_cq_ll_polling(struct mlx4_en_cq *cq)
+static inline bool mlx4_en_cq_busy_polling(struct mlx4_en_cq *cq)
{
WARN_ON(!(cq->state & MLX4_CQ_LOCKED));
return cq->state & CQ_USER_PEND;
@@ -683,7 +691,7 @@ static inline bool mlx4_en_cq_unlock_poll(struct mlx4_en_cq *cq)
return false;
}
-static inline bool mlx4_en_cq_ll_polling(struct mlx4_en_cq *cq)
+static inline bool mlx4_en_cq_busy_polling(struct mlx4_en_cq *cq)
{
return false;
}
@@ -720,7 +728,8 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev);
int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
struct mlx4_en_tx_ring **pring,
- int qpn, u32 size, u16 stride, int node);
+ int qpn, u32 size, u16 stride,
+ int node, int queue_index);
void mlx4_en_destroy_tx_ring(struct mlx4_en_priv *priv,
struct mlx4_en_tx_ring **pring);
int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv,
@@ -742,6 +751,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev,
struct mlx4_en_cq *cq,
int budget);
int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget);
+int mlx4_en_poll_tx_cq(struct napi_struct *napi, int budget);
void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride,
int is_tx, int rss, int qpn, int cqn, int user_prio,
struct mlx4_qp_context *context);
@@ -787,6 +797,7 @@ void mlx4_en_fill_hwtstamps(struct mlx4_en_dev *mdev,
struct skb_shared_hwtstamps *hwts,
u64 timestamp);
void mlx4_en_init_timestamp(struct mlx4_en_dev *mdev);
+void mlx4_en_remove_timestamp(struct mlx4_en_dev *mdev);
int mlx4_en_timestamp_config(struct net_device *dev,
int tx_type,
int rx_filter);
diff --git a/drivers/net/ethernet/mellanox/mlx4/mr.c b/drivers/net/ethernet/mellanox/mlx4/mr.c
index b3ee9ba..2483585 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mr.c
+++ b/drivers/net/ethernet/mellanox/mlx4/mr.c
@@ -32,7 +32,6 @@
* SOFTWARE.
*/
-#include <linux/init.h>
#include <linux/errno.h>
#include <linux/export.h>
#include <linux/slab.h>
@@ -346,7 +345,7 @@ void __mlx4_mpt_release(struct mlx4_dev *dev, u32 index)
{
struct mlx4_priv *priv = mlx4_priv(dev);
- mlx4_bitmap_free(&priv->mr_table.mpt_bitmap, index);
+ mlx4_bitmap_free(&priv->mr_table.mpt_bitmap, index, MLX4_NO_RR);
}
static void mlx4_mpt_release(struct mlx4_dev *dev, u32 index)
diff --git a/drivers/net/ethernet/mellanox/mlx4/pd.c b/drivers/net/ethernet/mellanox/mlx4/pd.c
index 84cfb40..74216071 100644
--- a/drivers/net/ethernet/mellanox/mlx4/pd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/pd.c
@@ -31,7 +31,6 @@
* SOFTWARE.
*/
-#include <linux/init.h>
#include <linux/errno.h>
#include <linux/export.h>
#include <linux/io-mapping.h>
@@ -59,7 +58,7 @@ EXPORT_SYMBOL_GPL(mlx4_pd_alloc);
void mlx4_pd_free(struct mlx4_dev *dev, u32 pdn)
{
- mlx4_bitmap_free(&mlx4_priv(dev)->pd_bitmap, pdn);
+ mlx4_bitmap_free(&mlx4_priv(dev)->pd_bitmap, pdn, MLX4_USE_RR);
}
EXPORT_SYMBOL_GPL(mlx4_pd_free);
@@ -96,7 +95,7 @@ EXPORT_SYMBOL_GPL(mlx4_xrcd_alloc);
void __mlx4_xrcd_free(struct mlx4_dev *dev, u32 xrcdn)
{
- mlx4_bitmap_free(&mlx4_priv(dev)->xrcd_bitmap, xrcdn);
+ mlx4_bitmap_free(&mlx4_priv(dev)->xrcd_bitmap, xrcdn, MLX4_USE_RR);
}
void mlx4_xrcd_free(struct mlx4_dev *dev, u32 xrcdn)
@@ -164,7 +163,7 @@ EXPORT_SYMBOL_GPL(mlx4_uar_alloc);
void mlx4_uar_free(struct mlx4_dev *dev, struct mlx4_uar *uar)
{
- mlx4_bitmap_free(&mlx4_priv(dev)->uar_table.bitmap, uar->index);
+ mlx4_bitmap_free(&mlx4_priv(dev)->uar_table.bitmap, uar->index, MLX4_USE_RR);
}
EXPORT_SYMBOL_GPL(mlx4_uar_free);
diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c
index f50ef6a..a58bcbf 100644
--- a/drivers/net/ethernet/mellanox/mlx4/port.c
+++ b/drivers/net/ethernet/mellanox/mlx4/port.c
@@ -820,6 +820,47 @@ int mlx4_SET_PORT_SCHEDULER(struct mlx4_dev *dev, u8 port, u8 *tc_tx_bw,
}
EXPORT_SYMBOL(mlx4_SET_PORT_SCHEDULER);
+enum {
+ VXLAN_ENABLE_MODIFY = 1 << 7,
+ VXLAN_STEERING_MODIFY = 1 << 6,
+
+ VXLAN_ENABLE = 1 << 7,
+};
+
+struct mlx4_set_port_vxlan_context {
+ u32 reserved1;
+ u8 modify_flags;
+ u8 reserved2;
+ u8 enable_flags;
+ u8 steering;
+};
+
+int mlx4_SET_PORT_VXLAN(struct mlx4_dev *dev, u8 port, u8 steering)
+{
+ int err;
+ u32 in_mod;
+ struct mlx4_cmd_mailbox *mailbox;
+ struct mlx4_set_port_vxlan_context *context;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ context = mailbox->buf;
+ memset(context, 0, sizeof(*context));
+
+ context->modify_flags = VXLAN_ENABLE_MODIFY | VXLAN_STEERING_MODIFY;
+ context->enable_flags = VXLAN_ENABLE;
+ context->steering = steering;
+
+ in_mod = MLX4_SET_PORT_VXLAN << 8 | port;
+ err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT,
+ MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
+
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return err;
+}
+EXPORT_SYMBOL(mlx4_SET_PORT_VXLAN);
+
int mlx4_SET_MCAST_FLTR_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c
index 2715e61..61d64eb 100644
--- a/drivers/net/ethernet/mellanox/mlx4/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx4/qp.c
@@ -35,7 +35,6 @@
#include <linux/gfp.h>
#include <linux/export.h>
-#include <linux/init.h>
#include <linux/mlx4/cmd.h>
#include <linux/mlx4/qp.h>
@@ -250,7 +249,7 @@ void __mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt)
if (mlx4_is_qp_reserved(dev, (u32) base_qpn))
return;
- mlx4_bitmap_free_range(&qp_table->bitmap, base_qpn, cnt);
+ mlx4_bitmap_free_range(&qp_table->bitmap, base_qpn, cnt, MLX4_USE_RR);
}
void mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt)
diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index 6635103..57428a0 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -1340,43 +1340,29 @@ static int cq_res_start_move_to(struct mlx4_dev *dev, int slave, int cqn,
spin_lock_irq(mlx4_tlock(dev));
r = res_tracker_lookup(&tracker->res_tree[RES_CQ], cqn);
- if (!r)
+ if (!r) {
err = -ENOENT;
- else if (r->com.owner != slave)
+ } else if (r->com.owner != slave) {
err = -EPERM;
- else {
- switch (state) {
- case RES_CQ_BUSY:
- err = -EBUSY;
- break;
-
- case RES_CQ_ALLOCATED:
- if (r->com.state != RES_CQ_HW)
- err = -EINVAL;
- else if (atomic_read(&r->ref_count))
- err = -EBUSY;
- else
- err = 0;
- break;
-
- case RES_CQ_HW:
- if (r->com.state != RES_CQ_ALLOCATED)
- err = -EINVAL;
- else
- err = 0;
- break;
-
- default:
+ } else if (state == RES_CQ_ALLOCATED) {
+ if (r->com.state != RES_CQ_HW)
err = -EINVAL;
- }
+ else if (atomic_read(&r->ref_count))
+ err = -EBUSY;
+ else
+ err = 0;
+ } else if (state != RES_CQ_HW || r->com.state != RES_CQ_ALLOCATED) {
+ err = -EINVAL;
+ } else {
+ err = 0;
+ }
- if (!err) {
- r->com.from_state = r->com.state;
- r->com.to_state = state;
- r->com.state = RES_CQ_BUSY;
- if (cq)
- *cq = r;
- }
+ if (!err) {
+ r->com.from_state = r->com.state;
+ r->com.to_state = state;
+ r->com.state = RES_CQ_BUSY;
+ if (cq)
+ *cq = r;
}
spin_unlock_irq(mlx4_tlock(dev));
@@ -1385,7 +1371,7 @@ static int cq_res_start_move_to(struct mlx4_dev *dev, int slave, int cqn,
}
static int srq_res_start_move_to(struct mlx4_dev *dev, int slave, int index,
- enum res_cq_states state, struct res_srq **srq)
+ enum res_srq_states state, struct res_srq **srq)
{
struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
@@ -1394,39 +1380,25 @@ static int srq_res_start_move_to(struct mlx4_dev *dev, int slave, int index,
spin_lock_irq(mlx4_tlock(dev));
r = res_tracker_lookup(&tracker->res_tree[RES_SRQ], index);
- if (!r)
+ if (!r) {
err = -ENOENT;
- else if (r->com.owner != slave)
+ } else if (r->com.owner != slave) {
err = -EPERM;
- else {
- switch (state) {
- case RES_SRQ_BUSY:
+ } else if (state == RES_SRQ_ALLOCATED) {
+ if (r->com.state != RES_SRQ_HW)
err = -EINVAL;
- break;
-
- case RES_SRQ_ALLOCATED:
- if (r->com.state != RES_SRQ_HW)
- err = -EINVAL;
- else if (atomic_read(&r->ref_count))
- err = -EBUSY;
- break;
-
- case RES_SRQ_HW:
- if (r->com.state != RES_SRQ_ALLOCATED)
- err = -EINVAL;
- break;
-
- default:
- err = -EINVAL;
- }
+ else if (atomic_read(&r->ref_count))
+ err = -EBUSY;
+ } else if (state != RES_SRQ_HW || r->com.state != RES_SRQ_ALLOCATED) {
+ err = -EINVAL;
+ }
- if (!err) {
- r->com.from_state = r->com.state;
- r->com.to_state = state;
- r->com.state = RES_SRQ_BUSY;
- if (srq)
- *srq = r;
- }
+ if (!err) {
+ r->com.from_state = r->com.state;
+ r->com.to_state = state;
+ r->com.state = RES_SRQ_BUSY;
+ if (srq)
+ *srq = r;
}
spin_unlock_irq(mlx4_tlock(dev));
@@ -3634,7 +3606,7 @@ static int validate_eth_header_mac(int slave, struct _rule_hw *eth_header,
!is_broadcast_ether_addr(eth_header->eth.dst_mac)) {
list_for_each_entry_safe(res, tmp, rlist, list) {
be_mac = cpu_to_be64(res->mac << 16);
- if (!memcmp(&be_mac, eth_header->eth.dst_mac, ETH_ALEN))
+ if (ether_addr_equal((u8 *)&be_mac, eth_header->eth.dst_mac))
return 0;
}
pr_err("MAC %pM doesn't belong to VF %d, Steering rule rejected\n",
diff --git a/drivers/net/ethernet/mellanox/mlx4/srq.c b/drivers/net/ethernet/mellanox/mlx4/srq.c
index 8fdf237..98faf87 100644
--- a/drivers/net/ethernet/mellanox/mlx4/srq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/srq.c
@@ -31,7 +31,6 @@
* SOFTWARE.
*/
-#include <linux/init.h>
#include <linux/mlx4/cmd.h>
#include <linux/mlx4/srq.h>
@@ -117,7 +116,7 @@ err_put:
mlx4_table_put(dev, &srq_table->table, *srqn);
err_out:
- mlx4_bitmap_free(&srq_table->bitmap, *srqn);
+ mlx4_bitmap_free(&srq_table->bitmap, *srqn, MLX4_NO_RR);
return err;
}
@@ -145,7 +144,7 @@ void __mlx4_srq_free_icm(struct mlx4_dev *dev, int srqn)
mlx4_table_put(dev, &srq_table->cmpt_table, srqn);
mlx4_table_put(dev, &srq_table->table, srqn);
- mlx4_bitmap_free(&srq_table->bitmap, srqn);
+ mlx4_bitmap_free(&srq_table->bitmap, srqn, MLX4_NO_RR);
}
static void mlx4_srq_free_icm(struct mlx4_dev *dev, int srqn)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 8675d26..405c4fb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -32,7 +32,6 @@
#include <asm-generic/kmap_types.h>
#include <linux/module.h>
-#include <linux/init.h>
#include <linux/errno.h>
#include <linux/pci.h>
#include <linux/dma-mapping.h>
OpenPOWER on IntegriCloud