From 6e8066999800d90d52af5c84ac49ebf683d14cdc Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Sun, 2 Nov 2014 16:26:13 +0200 Subject: net/mlx4_core: Prevent VF from changing port configuration Added wrapper to the ACCESS_REG command for handling guest HW registers access, preventing write operations, but do allow reads. This will prevent SRIOV guests to change port PTYS configuration, such as speed/advertised link modes. Fixes: adbc7ac5c15e ('net/mlx4_core: Introduce ACCESS_REG CMD [...]') Signed-off-by: Saeed Mahameed Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/mlx4.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers/net/ethernet/mellanox/mlx4/mlx4.h') diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index de10dbb..254ec7b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -1273,6 +1273,11 @@ int mlx4_QP_FLOW_STEERING_DETACH_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd); +int mlx4_ACCESS_REG_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); int mlx4_get_mgm_entry_size(struct mlx4_dev *dev); int mlx4_get_qp_per_mgm(struct mlx4_dev *dev); -- cgit v1.1 From d475c95b4bcff983ac76e8522bfd2d29bcc567d0 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Sun, 2 Nov 2014 16:26:17 +0200 Subject: net/mlx4_core: Add retrieval of CONFIG_DEV parameters Add code to issue CONFIG_DEV "get" firmware command. This command is used in order to obtain certain parameters used for supporting various RX checksumming options and vxlan UDP port. The GET operation is allowed for VFs too. Signed-off-by: Matan Barak Signed-off-by: Shani Michaeli Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/mlx4.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers/net/ethernet/mellanox/mlx4/mlx4.h') diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index 254ec7b..f8fc7bd 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -947,6 +947,11 @@ int mlx4_SW2HW_EQ_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd); +int mlx4_CONFIG_DEV_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); int mlx4_DMA_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, -- cgit v1.1 From ffc39f6d6fff2878c55ffa5ffb1828d7618c0a29 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 13 Nov 2014 14:45:29 +0200 Subject: net/mlx4_core: Refactor mlx4_cmd_init and mlx4_cmd_cleanup Refactoring mlx4_cmd_init and mlx4_cmd_cleanup such that partial init and cleanup are possible. After this refactoring, calling mlx4_cmd_init several times is safe. This is necessary in the VF init flow when mlx4_init_hca returns -EACCESS, we need to issue cleanup and re-attempt to call it with the slave flag. Signed-off-by: Matan Barak Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/mlx4.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'drivers/net/ethernet/mellanox/mlx4/mlx4.h') diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index f8fc7bd..f48e7c3 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -606,6 +606,7 @@ struct mlx4_cmd { u8 use_events; u8 toggle; u8 comm_toggle; + u8 initialized; }; enum { @@ -1126,8 +1127,16 @@ int mlx4_QUERY_QP_wrapper(struct mlx4_dev *dev, int slave, int mlx4_GEN_EQE(struct mlx4_dev *dev, int slave, struct mlx4_eqe *eqe); +enum { + MLX4_CMD_CLEANUP_STRUCT = 1UL << 0, + MLX4_CMD_CLEANUP_POOL = 1UL << 1, + MLX4_CMD_CLEANUP_HCR = 1UL << 2, + MLX4_CMD_CLEANUP_VHCR = 1UL << 3, + MLX4_CMD_CLEANUP_ALL = (MLX4_CMD_CLEANUP_VHCR << 1) - 1 +}; + int mlx4_cmd_init(struct mlx4_dev *dev); -void mlx4_cmd_cleanup(struct mlx4_dev *dev); +void mlx4_cmd_cleanup(struct mlx4_dev *dev, int cleanup_mask); int mlx4_multi_func_init(struct mlx4_dev *dev); void mlx4_multi_func_cleanup(struct mlx4_dev *dev); void mlx4_cmd_event(struct mlx4_dev *dev, u16 token, u8 status, u64 out_param); -- cgit v1.1 From 3dca0f42c7baaa4e01699629da13d6556f001ebe Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 11 Dec 2014 10:57:53 +0200 Subject: net/mlx4_core: Use tasklet for user-space CQ completion events Previously, we've fired all our completion callbacks straight from our ISR. Some of those callbacks were lightweight (for example, mlx4_en's and IPoIB napi callbacks), but some of them did more work (for example, the user-space RDMA stack uverbs' completion handler). Besides that, doing more than the minimal work in ISR is generally considered wrong, it could even lead to a hard lockup of the system. Since when a lot of completion events are generated by the hardware, the loop over those events could be so long, that we'll get into a hard lockup by the system watchdog. In order to avoid that, add a new way of invoking completion events callbacks. In the interrupt itself, we add the CQs which receive completion event to a per-EQ list and schedule a tasklet. In the tasklet context we loop over all the CQs in the list and invoke the user callback. Signed-off-by: Matan Barak Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/mlx4.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'drivers/net/ethernet/mellanox/mlx4/mlx4.h') diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index f48e7c3..b67ef48 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -43,6 +43,8 @@ #include #include #include +#include +#include #include #include @@ -373,6 +375,14 @@ struct mlx4_srq_context { __be64 db_rec_addr; }; +struct mlx4_eq_tasklet { + struct list_head list; + struct list_head process_list; + struct tasklet_struct task; + /* lock on completion tasklet list */ + spinlock_t lock; +}; + struct mlx4_eq { struct mlx4_dev *dev; void __iomem *doorbell; @@ -383,6 +393,7 @@ struct mlx4_eq { int nent; struct mlx4_buf_list *page_list; struct mlx4_mtt mtt; + struct mlx4_eq_tasklet tasklet_ctx; }; struct mlx4_slave_eqe { @@ -1146,6 +1157,7 @@ void mlx4_cmd_use_polling(struct mlx4_dev *dev); int mlx4_comm_cmd(struct mlx4_dev *dev, u8 cmd, u16 param, unsigned long timeout); +void mlx4_cq_tasklet_cb(unsigned long data); void mlx4_cq_completion(struct mlx4_dev *dev, u32 cqn); void mlx4_cq_event(struct mlx4_dev *dev, u32 cqn, int event_type); -- cgit v1.1 From ddae0349fdb78bcc5e7219061847012aa1a29069 Mon Sep 17 00:00:00 2001 From: Eugenia Emantayev Date: Thu, 11 Dec 2014 10:57:54 +0200 Subject: net/mlx4: Change QP allocation scheme When using BF (Blue-Flame), the QPN overrides the VLAN, CV, and SV fields in the WQE. Thus, BF may only be used for QPNs with bits 6,7 unset. The current Ethernet driver code reserves a Tx QP range with 256b alignment. This is wrong because if there are more than 64 Tx QPs in use, QPNs >= base + 65 will have bits 6/7 set. This problem is not specific for the Ethernet driver, any entity that tries to reserve more than 64 BF-enabled QPs should fail. Also, using ranges is not necessary here and is wasteful. The new mechanism introduced here will support reservation for "Eth QPs eligible for BF" for all drivers: bare-metal, multi-PF, and VFs (when hypervisors support WC in VMs). The flow we use is: 1. In mlx4_en, allocate Tx QPs one by one instead of a range allocation, and request "BF enabled QPs" if BF is supported for the function 2. In the ALLOC_RES FW command, change param1 to: a. param1[23:0] - number of QPs b. param1[31-24] - flags controlling QPs reservation Bit 31 refers to Eth blueflame supported QPs. Those QPs must have bits 6 and 7 unset in order to be used in Ethernet. Bits 24-30 of the flags are currently reserved. When a function tries to allocate a QP, it states the required attributes for this QP. Those attributes are considered "best-effort". If an attribute, such as Ethernet BF enabled QP, is a must-have attribute, the function has to check that attribute is supported before trying to do the allocation. In a lower layer of the code, mlx4_qp_reserve_range masks out the bits which are unsupported. If SRIOV is used, the PF validates those attributes and masks out unsupported attributes as well. In order to notify VFs which attributes are supported, the VF uses QUERY_FUNC_CAP command. This command's mailbox is filled by the PF, which notifies which QP allocation attributes it supports. Signed-off-by: Eugenia Emantayev Signed-off-by: Matan Barak Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/mlx4.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet/mellanox/mlx4/mlx4.h') diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index b67ef48..6834da6 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -884,7 +884,8 @@ extern struct workqueue_struct *mlx4_wq; u32 mlx4_bitmap_alloc(struct mlx4_bitmap *bitmap); void mlx4_bitmap_free(struct mlx4_bitmap *bitmap, u32 obj, int use_rr); -u32 mlx4_bitmap_alloc_range(struct mlx4_bitmap *bitmap, int cnt, int align); +u32 mlx4_bitmap_alloc_range(struct mlx4_bitmap *bitmap, int cnt, + int align, u32 skip_mask); void mlx4_bitmap_free_range(struct mlx4_bitmap *bitmap, u32 obj, int cnt, int use_rr); u32 mlx4_bitmap_avail(struct mlx4_bitmap *bitmap); @@ -970,7 +971,7 @@ int mlx4_DMA_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd); int __mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, - int *base); + int *base, u8 flags); void __mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt); int __mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac); void __mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, u64 mac); -- cgit v1.1 From 7a89399ffad7b7c47b43afda010309b3b88538c0 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 11 Dec 2014 10:57:56 +0200 Subject: net/mlx4: Add mlx4_bitmap zone allocator The zone allocator is a mechanism which manages a few mlx4_bitmaps. When allocating a resource, the user indicates the desired zone of which this resource will be allocated from. If possible, the resource will be allocated from this zone. Otherwise, the resource will be allocated from a less-than, equal-to, higher-than priority zone, according to the desired zone's properties with that respective allocation order. Signed-off-by: Matan Barak Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/mlx4.h | 69 +++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) (limited to 'drivers/net/ethernet/mellanox/mlx4/mlx4.h') diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index 6834da6..bc1505e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -245,6 +245,7 @@ struct mlx4_bitmap { u32 reserved_top; u32 mask; u32 avail; + u32 effective_len; spinlock_t lock; unsigned long *table; }; @@ -1345,4 +1346,72 @@ int mlx4_get_slave_num_gids(struct mlx4_dev *dev, int slave, int port); int mlx4_get_vf_indx(struct mlx4_dev *dev, int slave); int mlx4_config_mad_demux(struct mlx4_dev *dev); +enum mlx4_zone_flags { + MLX4_ZONE_ALLOW_ALLOC_FROM_LOWER_PRIO = 1UL << 0, + MLX4_ZONE_ALLOW_ALLOC_FROM_EQ_PRIO = 1UL << 1, + MLX4_ZONE_FALLBACK_TO_HIGHER_PRIO = 1UL << 2, + MLX4_ZONE_USE_RR = 1UL << 3, +}; + +enum mlx4_zone_alloc_flags { + /* No two objects could overlap between zones. UID + * could be left unused. If this flag is given and + * two overlapped zones are used, an object will be free'd + * from the smallest possible matching zone. + */ + MLX4_ZONE_ALLOC_FLAGS_NO_OVERLAP = 1UL << 0, +}; + +struct mlx4_zone_allocator; + +/* Create a new zone allocator */ +struct mlx4_zone_allocator *mlx4_zone_allocator_create(enum mlx4_zone_alloc_flags flags); + +/* Attach a mlx4_bitmap of priority to the zone allocator + * . Allocating an object from this zone adds an offset . + * Similarly, when searching for an object to free, this offset it taken into + * account. The use_rr mlx4_ib parameter for allocating objects from this + * is given through the MLX4_ZONE_USE_RR flag in . + * When an allocation fails, tries to allocate from other zones + * according to the policy set by . is the unique identifier + * received to this zone. + */ +int mlx4_zone_add_one(struct mlx4_zone_allocator *zone_alloc, + struct mlx4_bitmap *bitmap, + u32 flags, + int priority, + int offset, + u32 *puid); + +/* Remove bitmap indicated by from */ +int mlx4_zone_remove_one(struct mlx4_zone_allocator *zone_alloc, u32 uid); + +/* Delete the zone allocator objects with align and skip_mask + * from the mlx4_bitmap whose uid is . The bitmap which we actually + * allocated from is returned in . If the allocation fails, a negative + * number is returned. Otherwise, the offset of the first object is returned. + */ +u32 mlx4_zone_alloc_entries(struct mlx4_zone_allocator *zones, u32 uid, int count, + int align, u32 skip_mask, u32 *puid); + +/* Free objects, start from of the uid from zone_allocator + * . + */ +u32 mlx4_zone_free_entries(struct mlx4_zone_allocator *zones, + u32 uid, u32 obj, u32 count); + +/* If was allocated with MLX4_ZONE_ALLOC_FLAGS_NO_OVERLAP, instead of + * specifying the uid when freeing an object, zone allocator could figure it by + * itself. Other parameters are similar to mlx4_zone_free. + */ +u32 mlx4_zone_free_entries_unique(struct mlx4_zone_allocator *zones, u32 obj, u32 count); + +/* Returns a pointer to mlx4_bitmap that was attached to with */ +struct mlx4_bitmap *mlx4_zone_get_bitmap(struct mlx4_zone_allocator *zones, u32 uid); + #endif /* MLX4_H */ -- cgit v1.1 From d57febe1a47801ef8a55dbf10672850523dfaa60 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 11 Dec 2014 10:57:57 +0200 Subject: net/mlx4: Add A0 hybrid steering A0 hybrid steering is a form of high performance flow steering. By using this mode, mlx4 cards use a fast limited table based steering, in order to enable fast steering of unicast packets to a QP. In order to implement A0 hybrid steering we allocate resources from different zones: (1) General range (2) Special MAC-assigned QPs [RSS, Raw-Ethernet] each has its own region. When we create a rss QP or a raw ethernet (A0 steerable and BF ready) QP, we try hard to allocate the QP from range (2). Otherwise, we try hard not to allocate from this range. However, when the system is pushed to its limits and one needs every resource, the allocator uses every region it can. Meaning, when we run out of raw-eth qps, the allocator allocates from the general range (and the special-A0 area is no longer active). If we run out of RSS qps, the mechanism tries to allocate from the raw-eth QP zone. If that is also exhausted, the allocator will allocate from the general range (and the A0 region is no longer active). Note that if a raw-eth qp is allocated from the general range, it attempts to allocate the range such that bits 6 and 7 (blueflame bits) in the QP number are not set. When the feature is used in SRIOV, the VF has to notify the PF what kind of QP attributes it needs. In order to do that, along with the "Eth QP blueflame" bit, we reserve a new "A0 steerable QP". According to the combination of these bits, the PF tries to allocate a suitable QP. In order to maintain backward compatibility (with older PFs), the PF notifies which QP attributes it supports via QUERY_FUNC_CAP command. Signed-off-by: Matan Barak Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/mlx4.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'drivers/net/ethernet/mellanox/mlx4/mlx4.h') diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index bc1505e..cebd118 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -682,8 +682,19 @@ struct mlx4_srq_table { struct mlx4_icm_table cmpt_table; }; +enum mlx4_qp_table_zones { + MLX4_QP_TABLE_ZONE_GENERAL, + MLX4_QP_TABLE_ZONE_RSS, + MLX4_QP_TABLE_ZONE_RAW_ETH, + MLX4_QP_TABLE_ZONE_NUM +}; + +#define MLX4_A0_STEERING_TABLE_SIZE 256 + struct mlx4_qp_table { - struct mlx4_bitmap bitmap; + struct mlx4_bitmap *bitmap_gen; + struct mlx4_zone_allocator *zones; + u32 zones_uids[MLX4_QP_TABLE_ZONE_NUM]; u32 rdmarc_base; int rdmarc_shift; spinlock_t lock; -- cgit v1.1 From 7d077cd34eabb2ffd05abe0f2cad01da1ef11712 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 11 Dec 2014 10:58:00 +0200 Subject: net/mlx4: Add support for A0 steering Add the required firmware commands for A0 steering and a way to enable that. The firmware support focuses on INIT_HCA, QUERY_HCA, QUERY_PORT, QUERY_DEV_CAP and QUERY_FUNC_CAP commands. Those commands are used to configure and query the device. The different A0 DMFS (steering) modes are: Static - optimized performance, but flow steering rules are limited. This mode should be choosed explicitly by the user in order to be used. Dynamic - this mode should be explicitly choosed by the user. In this mode, the FW works in optimized steering mode as long as it can and afterwards automatically drops to classic (full) DMFS. Disable - this mode should be explicitly choosed by the user. The user instructs the system not to use optimized steering, even if the FW supports Dynamic A0 DMFS (and thus will be able to use optimized steering in Default A0 DMFS mode). Default - this mode is implicitly choosed. In this mode, if the FW supports Dynamic A0 DMFS, it'll work in this mode. Otherwise, it'll work at Disable A0 DMFS mode. Under SRIOV configuration, when the A0 steering mode is enabled, older guest VF drivers who aren't using the RX QP allocation flag (MLX4_RESERVE_A0_QP) will get a QP from the general range and fail when attempting to register a steering rule. To avoid that, the PF context behaviour is changed once on A0 static mode, to require support for the allocation flag in VF drivers too. In order to enable A0 steering, we use log_num_mgm_entry_size param. If the value of the parameter is not positive, we treat the absolute value of log_num_mgm_entry_size as a bit field. Setting bit 2 of this bit field enables static A0 steering. Signed-off-by: Matan Barak Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/mlx4.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers/net/ethernet/mellanox/mlx4/mlx4.h') diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index cebd118..bdd4eea 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -689,8 +689,6 @@ enum mlx4_qp_table_zones { MLX4_QP_TABLE_ZONE_NUM }; -#define MLX4_A0_STEERING_TABLE_SIZE 256 - struct mlx4_qp_table { struct mlx4_bitmap *bitmap_gen; struct mlx4_zone_allocator *zones; -- cgit v1.1