summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.mailmap2
-rw-r--r--MAINTAINERS1
-rw-r--r--drivers/infiniband/Kconfig11
-rw-r--r--drivers/infiniband/core/Makefile4
-rw-r--r--drivers/infiniband/core/addr.c13
-rw-r--r--drivers/infiniband/core/cache.c533
-rw-r--r--drivers/infiniband/core/cm.c67
-rw-r--r--drivers/infiniband/core/cma.c160
-rw-r--r--drivers/infiniband/core/cma_priv.h97
-rw-r--r--drivers/infiniband/core/core_priv.h11
-rw-r--r--drivers/infiniband/core/device.c18
-rw-r--r--drivers/infiniband/core/iwpm_util.c5
-rw-r--r--drivers/infiniband/core/multicast.c26
-rw-r--r--drivers/infiniband/core/nldev.c364
-rw-r--r--drivers/infiniband/core/rdma_core.c13
-rw-r--r--drivers/infiniband/core/restrack.c121
-rw-r--r--drivers/infiniband/core/sa_query.c202
-rw-r--r--drivers/infiniband/core/sysfs.c53
-rw-r--r--drivers/infiniband/core/ucm.c18
-rw-r--r--drivers/infiniband/core/ucma.c40
-rw-r--r--drivers/infiniband/core/uverbs.h41
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c279
-rw-r--r--drivers/infiniband/core/uverbs_ioctl.c60
-rw-r--r--drivers/infiniband/core/uverbs_ioctl_merge.c2
-rw-r--r--drivers/infiniband/core/uverbs_main.c218
-rw-r--r--drivers/infiniband/core/uverbs_std_types.c326
-rw-r--r--drivers/infiniband/core/uverbs_std_types_cq.c210
-rw-r--r--drivers/infiniband/core/uverbs_std_types_dm.c108
-rw-r--r--drivers/infiniband/core/uverbs_std_types_flow_action.c435
-rw-r--r--drivers/infiniband/core/uverbs_std_types_mr.c147
-rw-r--r--drivers/infiniband/core/verbs.c87
-rw-r--r--drivers/infiniband/hw/bnxt_re/ib_verbs.c20
-rw-r--r--drivers/infiniband/hw/bnxt_re/ib_verbs.h6
-rw-r--r--drivers/infiniband/hw/bnxt_re/main.c2
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_sp.c2
-rw-r--r--drivers/infiniband/hw/cxgb3/Kconfig9
-rw-r--r--drivers/infiniband/hw/cxgb3/Makefile2
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_dbg.c206
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_hal.h9
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cq.c6
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.c5
-rw-r--r--drivers/infiniband/hw/cxgb4/device.c24
-rw-r--r--drivers/infiniband/hw/cxgb4/mem.c3
-rw-r--r--drivers/infiniband/hw/cxgb4/provider.c24
-rw-r--r--drivers/infiniband/hw/hfi1/driver.c1
-rw-r--r--drivers/infiniband/hw/hfi1/hfi.h1
-rw-r--r--drivers/infiniband/hw/hfi1/qp.c1
-rw-r--r--drivers/infiniband/hw/hfi1/user_exp_rcv.c4
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.c2
-rw-r--r--drivers/infiniband/hw/hns/Makefile2
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_ah.c8
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_cq.c54
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_db.c180
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_device.h59
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v1.c8
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.c80
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.h3
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_main.c41
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_mr.c2
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_pd.c5
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_qp.c70
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw.h11
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_cm.c129
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_cm.h5
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_ctrl.c56
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_d.h5
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_hw.c35
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_main.c2
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_puda.c8
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_type.h11
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_utils.c10
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_verbs.c57
-rw-r--r--drivers/infiniband/hw/mlx4/ah.c10
-rw-r--r--drivers/infiniband/hw/mlx4/main.c102
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h19
-rw-r--r--drivers/infiniband/hw/mlx4/mr.c3
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c27
-rw-r--r--drivers/infiniband/hw/mlx5/ah.c12
-rw-r--r--drivers/infiniband/hw/mlx5/cmd.c104
-rw-r--r--drivers/infiniband/hw/mlx5/cmd.h4
-rw-r--r--drivers/infiniband/hw/mlx5/main.c572
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h71
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c257
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c156
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.c1
-rw-r--r--drivers/infiniband/hw/nes/nes_verbs.c1
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_ah.c8
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_hw.c22
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_main.c4
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.c34
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.h12
-rw-r--r--drivers/infiniband/hw/qedr/main.c9
-rw-r--r--drivers/infiniband/hw/qedr/qedr_roce_cm.c16
-rw-r--r--drivers/infiniband/hw/qedr/verbs.c155
-rw-r--r--drivers/infiniband/hw/qedr/verbs.h6
-rw-r--r--drivers/infiniband/hw/qib/qib.h3
-rw-r--r--drivers/infiniband/hw/qib/qib_diag.c2
-rw-r--r--drivers/infiniband/hw/qib/qib_file_ops.c8
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7322.c10
-rw-r--r--drivers/infiniband/hw/qib/qib_init.c4
-rw-r--r--drivers/infiniband/hw/qib/qib_sdma.c24
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.c2
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_main.c1
-rw-r--r--drivers/infiniband/hw/usnic/usnic_transport.c4
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c32
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c2
-rw-r--r--drivers/infiniband/sw/rdmavt/vt.c5
-rw-r--r--drivers/infiniband/sw/rdmavt/vt.h1
-rw-r--r--drivers/infiniband/sw/rxe/rxe.c4
-rw-r--r--drivers/infiniband/sw/rxe/rxe.h6
-rw-r--r--drivers/infiniband/sw/rxe/rxe_av.c5
-rw-r--r--drivers/infiniband/sw/rxe/rxe_cq.c15
-rw-r--r--drivers/infiniband/sw/rxe/rxe_loc.h20
-rw-r--r--drivers/infiniband/sw/rxe/rxe_net.c56
-rw-r--r--drivers/infiniband/sw/rxe/rxe_qp.c35
-rw-r--r--drivers/infiniband/sw/rxe/rxe_queue.c24
-rw-r--r--drivers/infiniband/sw/rxe/rxe_queue.h5
-rw-r--r--drivers/infiniband/sw/rxe/rxe_recv.c16
-rw-r--r--drivers/infiniband/sw/rxe/rxe_resp.c15
-rw-r--r--drivers/infiniband/sw/rxe/rxe_srq.c44
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.c101
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.h2
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h5
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c2
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c181
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.c396
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.h8
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/fw.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/main.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_common.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c21
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fw.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/rl.c63
-rw-r--r--include/linux/mlx4/device.h4
-rw-r--r--include/linux/mlx5/device.h12
-rw-r--r--include/linux/mlx5/driver.h15
-rw-r--r--include/linux/mlx5/fs_helpers.h8
-rw-r--r--include/linux/mlx5/mlx5_ifc.h102
-rw-r--r--include/rdma/ib_addr.h9
-rw-r--r--include/rdma/ib_cache.h29
-rw-r--r--include/rdma/ib_sa.h13
-rw-r--r--include/rdma/ib_verbs.h212
-rw-r--r--include/rdma/rdma_cm.h44
-rw-r--r--include/rdma/rdma_vt.h2
-rw-r--r--include/rdma/restrack.h26
-rw-r--r--include/rdma/uverbs_ioctl.h153
-rw-r--r--include/rdma/uverbs_named_ioctl.h90
-rw-r--r--include/rdma/uverbs_std_types.h34
-rw-r--r--include/uapi/rdma/bnxt_re-abi.h21
-rw-r--r--include/uapi/rdma/cxgb3-abi.h17
-rw-r--r--include/uapi/rdma/cxgb4-abi.h29
-rw-r--r--include/uapi/rdma/hfi/hfi1_ioctl.h32
-rw-r--r--include/uapi/rdma/hfi/hfi1_user.h10
-rw-r--r--include/uapi/rdma/hns-abi.h22
-rw-r--r--include/uapi/rdma/i40iw-abi.h (renamed from drivers/infiniband/hw/i40iw/i40iw_ucontext.h)16
-rw-r--r--include/uapi/rdma/ib_user_cm.h48
-rw-r--r--include/uapi/rdma/ib_user_ioctl_cmds.h134
-rw-r--r--include/uapi/rdma/ib_user_ioctl_verbs.h96
-rw-r--r--include/uapi/rdma/ib_user_mad.h4
-rw-r--r--include/uapi/rdma/ib_user_verbs.h195
-rw-r--r--include/uapi/rdma/mlx4-abi.h52
-rw-r--r--include/uapi/rdma/mlx5-abi.h72
-rw-r--r--include/uapi/rdma/mlx5_user_ioctl_cmds.h48
-rw-r--r--include/uapi/rdma/mlx5_user_ioctl_verbs.h43
-rw-r--r--include/uapi/rdma/mthca-abi.h10
-rw-r--r--include/uapi/rdma/nes-abi.h6
-rw-r--r--include/uapi/rdma/ocrdma-abi.h36
-rw-r--r--include/uapi/rdma/qedr-abi.h20
-rw-r--r--include/uapi/rdma/rdma_netlink.h51
-rw-r--r--include/uapi/rdma/rdma_user_cm.h49
-rw-r--r--include/uapi/rdma/rdma_user_ioctl.h38
-rw-r--r--include/uapi/rdma/rdma_user_ioctl_cmds.h99
-rw-r--r--include/uapi/rdma/rdma_user_rxe.h58
-rw-r--r--include/uapi/rdma/vmw_pvrdma-abi.h49
175 files changed, 6672 insertions, 2915 deletions
diff --git a/.mailmap b/.mailmap
index 9795e6b..7fa9d41 100644
--- a/.mailmap
+++ b/.mailmap
@@ -102,6 +102,8 @@ Koushik <raghavendra.koushik@neterion.com>
Krzysztof Kozlowski <krzk@kernel.org> <k.kozlowski@samsung.com>
Krzysztof Kozlowski <krzk@kernel.org> <k.kozlowski.k@gmail.com>
Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
+Leon Romanovsky <leon@kernel.org> <leon@leon.nu>
+Leon Romanovsky <leon@kernel.org> <leonro@mellanox.com>
Leonid I Ananiev <leonid.i.ananiev@intel.com>
Linas Vepstas <linas@austin.ibm.com>
Linus Lüssing <linus.luessing@c0d3.blue> <linus.luessing@web.de>
diff --git a/MAINTAINERS b/MAINTAINERS
index f1be26e..0407846 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7214,6 +7214,7 @@ M: Shiraz Saleem <shiraz.saleem@intel.com>
L: linux-rdma@vger.kernel.org
S: Supported
F: drivers/infiniband/hw/i40iw/
+F: include/uapi/rdma/i40iw-abi.h
INTEL SHA MULTIBUFFER DRIVER
M: Megha Dey <megha.dey@linux.intel.com>
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index 8517d6e..ee270e0 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -35,14 +35,13 @@ config INFINIBAND_USER_ACCESS
libibverbs, libibcm and a hardware driver library from
rdma-core <https://github.com/linux-rdma/rdma-core>.
-config INFINIBAND_EXP_USER_ACCESS
- bool "Enable the full uverbs ioctl interface (EXPERIMENTAL)"
+config INFINIBAND_EXP_LEGACY_VERBS_NEW_UAPI
+ bool "Allow experimental legacy verbs in new ioctl uAPI (EXPERIMENTAL)"
depends on INFINIBAND_USER_ACCESS
---help---
- IOCTL based ABI support for Infiniband. This allows userspace
- to invoke the experimental IOCTL based ABI.
- These commands are parsed via per-device parsing tree and
- enables per-device features.
+ IOCTL based uAPI support for Infiniband is enabled by default for
+ new verbs only. This allows userspace to invoke the IOCTL based uAPI
+ for current legacy verbs too.
config INFINIBAND_USER_MEM
bool
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index f69833d..dda9e85 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -34,4 +34,6 @@ ib_ucm-y := ucm.o
ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o \
rdma_core.o uverbs_std_types.o uverbs_ioctl.o \
- uverbs_ioctl_merge.o
+ uverbs_ioctl_merge.o uverbs_std_types_cq.o \
+ uverbs_std_types_flow_action.o uverbs_std_types_dm.o \
+ uverbs_std_types_mr.o
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index cb1d2ab..88a7542 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -329,7 +329,8 @@ static void queue_req(struct addr_req *req)
mutex_unlock(&lock);
}
-static int ib_nl_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
+static int ib_nl_fetch_ha(const struct dst_entry *dst,
+ struct rdma_dev_addr *dev_addr,
const void *daddr, u32 seq, u16 family)
{
if (rdma_nl_chk_listeners(RDMA_NL_GROUP_LS))
@@ -340,7 +341,8 @@ static int ib_nl_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
return ib_nl_ip_send_msg(dev_addr, daddr, seq, family);
}
-static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
+static int dst_fetch_ha(const struct dst_entry *dst,
+ struct rdma_dev_addr *dev_addr,
const void *daddr)
{
struct neighbour *n;
@@ -364,7 +366,7 @@ static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
return ret;
}
-static bool has_gateway(struct dst_entry *dst, sa_family_t family)
+static bool has_gateway(const struct dst_entry *dst, sa_family_t family)
{
struct rtable *rt;
struct rt6_info *rt6;
@@ -378,7 +380,7 @@ static bool has_gateway(struct dst_entry *dst, sa_family_t family)
return rt6->rt6i_flags & RTF_GATEWAY;
}
-static int fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
+static int fetch_ha(const struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
const struct sockaddr *dst_in, u32 seq)
{
const struct sockaddr_in *dst_in4 =
@@ -482,7 +484,7 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
}
#endif
-static int addr_resolve_neigh(struct dst_entry *dst,
+static int addr_resolve_neigh(const struct dst_entry *dst,
const struct sockaddr *dst_in,
struct rdma_dev_addr *addr,
u32 seq)
@@ -736,7 +738,6 @@ int rdma_resolve_ip_route(struct sockaddr *src_addr,
return addr_resolve(src_in, dst_addr, addr, false, 0);
}
-EXPORT_SYMBOL(rdma_resolve_ip_route);
void rdma_addr_cancel(struct rdma_dev_addr *addr)
{
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index e9a409d..e337b08 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -59,8 +59,6 @@ struct ib_update_work {
union ib_gid zgid;
EXPORT_SYMBOL(zgid);
-static const struct ib_gid_attr zattr;
-
enum gid_attr_find_mask {
GID_ATTR_FIND_MASK_GID = 1UL << 0,
GID_ATTR_FIND_MASK_NETDEV = 1UL << 1,
@@ -73,15 +71,6 @@ enum gid_table_entry_props {
GID_TABLE_ENTRY_DEFAULT = 1UL << 1,
};
-enum gid_table_write_action {
- GID_TABLE_WRITE_ACTION_ADD,
- GID_TABLE_WRITE_ACTION_DEL,
- /* MODIFY only updates the GID table. Currently only used by
- * ib_cache_update.
- */
- GID_TABLE_WRITE_ACTION_MODIFY
-};
-
struct ib_gid_table_entry {
unsigned long props;
union ib_gid gid;
@@ -100,31 +89,26 @@ struct ib_gid_table {
* (a) Find the GID
* (b) Delete it.
*
- * Add/delete should be carried out atomically.
- * This is done by locking this mutex from multiple
- * writers. We don't need this lock for IB, as the MAD
- * layer replaces all entries. All data_vec entries
- * are locked by this lock.
**/
- struct mutex lock;
- /* This lock protects the table entries from being
- * read and written simultaneously.
+ /* Any writer to data_vec must hold this lock and the write side of
+ * rwlock. readers must hold only rwlock. All writers must be in a
+ * sleepable context.
*/
+ struct mutex lock;
+ /* rwlock protects data_vec[ix]->props. */
rwlock_t rwlock;
struct ib_gid_table_entry *data_vec;
};
static void dispatch_gid_change_event(struct ib_device *ib_dev, u8 port)
{
- if (rdma_cap_roce_gid_table(ib_dev, port)) {
- struct ib_event event;
+ struct ib_event event;
- event.device = ib_dev;
- event.element.port_num = port;
- event.event = IB_EVENT_GID_CHANGE;
+ event.device = ib_dev;
+ event.element.port_num = port;
+ event.event = IB_EVENT_GID_CHANGE;
- ib_dispatch_event(&event);
- }
+ ib_dispatch_event(&event);
}
static const char * const gid_type_str[] = {
@@ -165,94 +149,127 @@ int ib_cache_gid_parse_type_str(const char *buf)
}
EXPORT_SYMBOL(ib_cache_gid_parse_type_str);
-/* This function expects that rwlock will be write locked in all
- * scenarios and that lock will be locked in sleep-able (RoCE)
- * scenarios.
- */
-static int write_gid(struct ib_device *ib_dev, u8 port,
- struct ib_gid_table *table, int ix,
- const union ib_gid *gid,
- const struct ib_gid_attr *attr,
- enum gid_table_write_action action,
- bool default_gid)
- __releases(&table->rwlock) __acquires(&table->rwlock)
+static void del_roce_gid(struct ib_device *device, u8 port_num,
+ struct ib_gid_table *table, int ix)
{
- int ret = 0;
- struct net_device *old_net_dev;
- enum ib_gid_type old_gid_type;
+ pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__,
+ device->name, port_num, ix,
+ table->data_vec[ix].gid.raw);
+
+ if (rdma_cap_roce_gid_table(device, port_num))
+ device->del_gid(&table->data_vec[ix].attr,
+ &table->data_vec[ix].context);
+ dev_put(table->data_vec[ix].attr.ndev);
+}
- /* in rdma_cap_roce_gid_table, this funciton should be protected by a
- * sleep-able lock.
- */
+static int add_roce_gid(struct ib_gid_table *table,
+ const union ib_gid *gid,
+ const struct ib_gid_attr *attr)
+{
+ struct ib_gid_table_entry *entry;
+ int ix = attr->index;
+ int ret = 0;
- if (rdma_cap_roce_gid_table(ib_dev, port)) {
- table->data_vec[ix].props |= GID_TABLE_ENTRY_INVALID;
- write_unlock_irq(&table->rwlock);
- /* GID_TABLE_WRITE_ACTION_MODIFY currently isn't supported by
- * RoCE providers and thus only updates the cache.
- */
- if (action == GID_TABLE_WRITE_ACTION_ADD)
- ret = ib_dev->add_gid(ib_dev, port, ix, gid, attr,
- &table->data_vec[ix].context);
- else if (action == GID_TABLE_WRITE_ACTION_DEL)
- ret = ib_dev->del_gid(ib_dev, port, ix,
- &table->data_vec[ix].context);
- write_lock_irq(&table->rwlock);
+ if (!attr->ndev) {
+ pr_err("%s NULL netdev device=%s port=%d index=%d\n",
+ __func__, attr->device->name, attr->port_num,
+ attr->index);
+ return -EINVAL;
}
- old_net_dev = table->data_vec[ix].attr.ndev;
- old_gid_type = table->data_vec[ix].attr.gid_type;
- if (old_net_dev && old_net_dev != attr->ndev)
- dev_put(old_net_dev);
- /* if modify_gid failed, just delete the old gid */
- if (ret || action == GID_TABLE_WRITE_ACTION_DEL) {
- gid = &zgid;
- attr = &zattr;
- table->data_vec[ix].context = NULL;
+ entry = &table->data_vec[ix];
+ if ((entry->props & GID_TABLE_ENTRY_INVALID) == 0) {
+ WARN(1, "GID table corruption device=%s port=%d index=%d\n",
+ attr->device->name, attr->port_num,
+ attr->index);
+ return -EINVAL;
}
- memcpy(&table->data_vec[ix].gid, gid, sizeof(*gid));
- memcpy(&table->data_vec[ix].attr, attr, sizeof(*attr));
- if (default_gid) {
- table->data_vec[ix].props |= GID_TABLE_ENTRY_DEFAULT;
- if (action == GID_TABLE_WRITE_ACTION_DEL)
- table->data_vec[ix].attr.gid_type = old_gid_type;
+ if (rdma_cap_roce_gid_table(attr->device, attr->port_num)) {
+ ret = attr->device->add_gid(gid, attr, &entry->context);
+ if (ret) {
+ pr_err("%s GID add failed device=%s port=%d index=%d\n",
+ __func__, attr->device->name, attr->port_num,
+ attr->index);
+ goto add_err;
+ }
}
- if (table->data_vec[ix].attr.ndev &&
- table->data_vec[ix].attr.ndev != old_net_dev)
- dev_hold(table->data_vec[ix].attr.ndev);
-
- table->data_vec[ix].props &= ~GID_TABLE_ENTRY_INVALID;
+ dev_hold(attr->ndev);
+add_err:
+ if (!ret)
+ pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__,
+ attr->device->name, attr->port_num, ix, gid->raw);
return ret;
}
-static int add_gid(struct ib_device *ib_dev, u8 port,
- struct ib_gid_table *table, int ix,
- const union ib_gid *gid,
- const struct ib_gid_attr *attr,
- bool default_gid) {
- return write_gid(ib_dev, port, table, ix, gid, attr,
- GID_TABLE_WRITE_ACTION_ADD, default_gid);
-}
+/**
+ * add_modify_gid - Add or modify GID table entry
+ *
+ * @table: GID table in which GID to be added or modified
+ * @gid: GID content
+ * @attr: Attributes of the GID
+ *
+ * Returns 0 on success or appropriate error code. It accepts zero
+ * GID addition for non RoCE ports for HCA's who report them as valid
+ * GID. However such zero GIDs are not added to the cache.
+ */
+static int add_modify_gid(struct ib_gid_table *table,
+ const union ib_gid *gid,
+ const struct ib_gid_attr *attr)
+{
+ int ret;
+
+ if (rdma_protocol_roce(attr->device, attr->port_num)) {
+ ret = add_roce_gid(table, gid, attr);
+ if (ret)
+ return ret;
+ } else {
+ /*
+ * Some HCA's report multiple GID entries with only one
+ * valid GID, but remaining as zero GID.
+ * So ignore such behavior for IB link layer and don't
+ * fail the call, but don't add such entry to GID cache.
+ */
+ if (!memcmp(gid, &zgid, sizeof(*gid)))
+ return 0;
+ }
+
+ lockdep_assert_held(&table->lock);
+ memcpy(&table->data_vec[attr->index].gid, gid, sizeof(*gid));
+ memcpy(&table->data_vec[attr->index].attr, attr, sizeof(*attr));
-static int modify_gid(struct ib_device *ib_dev, u8 port,
- struct ib_gid_table *table, int ix,
- const union ib_gid *gid,
- const struct ib_gid_attr *attr,
- bool default_gid) {
- return write_gid(ib_dev, port, table, ix, gid, attr,
- GID_TABLE_WRITE_ACTION_MODIFY, default_gid);
+ write_lock_irq(&table->rwlock);
+ table->data_vec[attr->index].props &= ~GID_TABLE_ENTRY_INVALID;
+ write_unlock_irq(&table->rwlock);
+ return 0;
}
-static int del_gid(struct ib_device *ib_dev, u8 port,
- struct ib_gid_table *table, int ix,
- bool default_gid) {
- return write_gid(ib_dev, port, table, ix, &zgid, &zattr,
- GID_TABLE_WRITE_ACTION_DEL, default_gid);
+/**
+ * del_gid - Delete GID table entry
+ *
+ * @ib_dev: IB device whose GID entry to be deleted
+ * @port: Port number of the IB device
+ * @table: GID table of the IB device for a port
+ * @ix: GID entry index to delete
+ *
+ */
+static void del_gid(struct ib_device *ib_dev, u8 port,
+ struct ib_gid_table *table, int ix)
+{
+ lockdep_assert_held(&table->lock);
+ write_lock_irq(&table->rwlock);
+ table->data_vec[ix].props |= GID_TABLE_ENTRY_INVALID;
+ write_unlock_irq(&table->rwlock);
+
+ if (rdma_protocol_roce(ib_dev, port))
+ del_roce_gid(ib_dev, port, table, ix);
+ memcpy(&table->data_vec[ix].gid, &zgid, sizeof(zgid));
+ memset(&table->data_vec[ix].attr, 0, sizeof(table->data_vec[ix].attr));
+ table->data_vec[ix].context = NULL;
}
-/* rwlock should be read locked */
+/* rwlock should be read locked, or lock should be held */
static int find_gid(struct ib_gid_table *table, const union ib_gid *gid,
const struct ib_gid_attr *val, bool default_gid,
unsigned long mask, int *pempty)
@@ -268,15 +285,32 @@ static int find_gid(struct ib_gid_table *table, const union ib_gid *gid,
i++;
+ /* find_gid() is used during GID addition where it is expected
+ * to return a free entry slot which is not duplicate.
+ * Free entry slot is requested and returned if pempty is set,
+ * so lookup free slot only if requested.
+ */
+ if (pempty && empty < 0) {
+ if (data->props & GID_TABLE_ENTRY_INVALID) {
+ /* Found an invalid (free) entry; allocate it */
+ if (data->props & GID_TABLE_ENTRY_DEFAULT) {
+ if (default_gid)
+ empty = curr_index;
+ } else {
+ empty = curr_index;
+ }
+ }
+ }
+
+ /*
+ * Additionally find_gid() is used to find valid entry during
+ * lookup operation, where validity needs to be checked. So
+ * find the empty entry first to continue to search for a free
+ * slot and ignore its INVALID flag.
+ */
if (data->props & GID_TABLE_ENTRY_INVALID)
continue;
- if (empty < 0)
- if (!memcmp(&data->gid, &zgid, sizeof(*gid)) &&
- !memcmp(attr, &zattr, sizeof(*attr)) &&
- !data->props)
- empty = curr_index;
-
if (found >= 0)
continue;
@@ -312,20 +346,56 @@ static void make_default_gid(struct net_device *dev, union ib_gid *gid)
addrconf_ifid_eui48(&gid->raw[8], dev);
}
-int ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
- union ib_gid *gid, struct ib_gid_attr *attr)
+static int __ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
+ union ib_gid *gid, struct ib_gid_attr *attr,
+ unsigned long mask, bool default_gid)
{
struct ib_gid_table *table;
- int ix;
int ret = 0;
- struct net_device *idev;
int empty;
+ int ix;
- table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
-
+ /* Do not allow adding zero GID in support of
+ * IB spec version 1.3 section 4.1.1 point (6) and
+ * section 12.7.10 and section 12.7.20
+ */
if (!memcmp(gid, &zgid, sizeof(*gid)))
return -EINVAL;
+ table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
+
+ mutex_lock(&table->lock);
+
+ ix = find_gid(table, gid, attr, default_gid, mask, &empty);
+ if (ix >= 0)
+ goto out_unlock;
+
+ if (empty < 0) {
+ ret = -ENOSPC;
+ goto out_unlock;
+ }
+ attr->device = ib_dev;
+ attr->index = empty;
+ attr->port_num = port;
+ ret = add_modify_gid(table, gid, attr);
+ if (!ret)
+ dispatch_gid_change_event(ib_dev, port);
+
+out_unlock:
+ mutex_unlock(&table->lock);
+ if (ret)
+ pr_warn("%s: unable to add gid %pI6 error=%d\n",
+ __func__, gid->raw, ret);
+ return ret;
+}
+
+int ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
+ union ib_gid *gid, struct ib_gid_attr *attr)
+{
+ struct net_device *idev;
+ unsigned long mask;
+ int ret;
+
if (ib_dev->get_netdev) {
idev = ib_dev->get_netdev(ib_dev, port);
if (idev && attr->ndev != idev) {
@@ -342,27 +412,11 @@ int ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
dev_put(idev);
}
- mutex_lock(&table->lock);
- write_lock_irq(&table->rwlock);
-
- ix = find_gid(table, gid, attr, false, GID_ATTR_FIND_MASK_GID |
- GID_ATTR_FIND_MASK_GID_TYPE |
- GID_ATTR_FIND_MASK_NETDEV, &empty);
- if (ix >= 0)
- goto out_unlock;
-
- if (empty < 0) {
- ret = -ENOSPC;
- goto out_unlock;
- }
-
- ret = add_gid(ib_dev, port, table, empty, gid, attr, false);
- if (!ret)
- dispatch_gid_change_event(ib_dev, port);
+ mask = GID_ATTR_FIND_MASK_GID |
+ GID_ATTR_FIND_MASK_GID_TYPE |
+ GID_ATTR_FIND_MASK_NETDEV;
-out_unlock:
- write_unlock_irq(&table->rwlock);
- mutex_unlock(&table->lock);
+ ret = __ib_cache_gid_add(ib_dev, port, gid, attr, mask, false);
return ret;
}
@@ -370,29 +424,32 @@ int ib_cache_gid_del(struct ib_device *ib_dev, u8 port,
union ib_gid *gid, struct ib_gid_attr *attr)
{
struct ib_gid_table *table;
+ int ret = 0;
int ix;
table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
mutex_lock(&table->lock);
- write_lock_irq(&table->rwlock);
ix = find_gid(table, gid, attr, false,
GID_ATTR_FIND_MASK_GID |
GID_ATTR_FIND_MASK_GID_TYPE |
- GID_ATTR_FIND_MASK_NETDEV |
- GID_ATTR_FIND_MASK_DEFAULT,
+ GID_ATTR_FIND_MASK_NETDEV,
NULL);
- if (ix < 0)
+ if (ix < 0) {
+ ret = -EINVAL;
goto out_unlock;
+ }
- if (!del_gid(ib_dev, port, table, ix, false))
- dispatch_gid_change_event(ib_dev, port);
+ del_gid(ib_dev, port, table, ix);
+ dispatch_gid_change_event(ib_dev, port);
out_unlock:
- write_unlock_irq(&table->rwlock);
mutex_unlock(&table->lock);
- return 0;
+ if (ret)
+ pr_debug("%s: can't delete gid %pI6 error=%d\n",
+ __func__, gid->raw, ret);
+ return ret;
}
int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
@@ -405,16 +462,14 @@ int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
mutex_lock(&table->lock);
- write_lock_irq(&table->rwlock);
- for (ix = 0; ix < table->sz; ix++)
- if (table->data_vec[ix].attr.ndev == ndev)
- if (!del_gid(ib_dev, port, table, ix,
- !!(table->data_vec[ix].props &
- GID_TABLE_ENTRY_DEFAULT)))
- deleted = true;
+ for (ix = 0; ix < table->sz; ix++) {
+ if (table->data_vec[ix].attr.ndev == ndev) {
+ del_gid(ib_dev, port, table, ix);
+ deleted = true;
+ }
+ }
- write_unlock_irq(&table->rwlock);
mutex_unlock(&table->lock);
if (deleted)
@@ -492,6 +547,19 @@ static int ib_cache_gid_find(struct ib_device *ib_dev,
mask, port, index);
}
+/**
+ * ib_find_cached_gid_by_port - Returns the GID table index where a specified
+ * GID value occurs. It searches for the specified GID value in the local
+ * software cache.
+ * @device: The device to query.
+ * @gid: The GID value to search for.
+ * @gid_type: The GID type to search for.
+ * @port_num: The port number of the device where the GID value should be
+ * searched.
+ * @ndev: In RoCE, the net device of the device. Null means ignore.
+ * @index: The index into the cached GID table where the GID was found. This
+ * parameter may be NULL.
+ */
int ib_find_cached_gid_by_port(struct ib_device *ib_dev,
const union ib_gid *gid,
enum ib_gid_type gid_type,
@@ -528,7 +596,7 @@ int ib_find_cached_gid_by_port(struct ib_device *ib_dev,
EXPORT_SYMBOL(ib_find_cached_gid_by_port);
/**
- * ib_find_gid_by_filter - Returns the GID table index where a specified
+ * ib_cache_gid_find_by_filter - Returns the GID table index where a specified
* GID value occurs
* @device: The device to query.
* @gid: The GID value to search for.
@@ -539,7 +607,7 @@ EXPORT_SYMBOL(ib_find_cached_gid_by_port);
* otherwise, we continue searching the GID table. It's guaranteed that
* while filter is executed, ndev field is valid and the structure won't
* change. filter is executed in an atomic context. filter must not be NULL.
- * @index: The index into the cached GID table where the GID was found. This
+ * @index: The index into the cached GID table where the GID was found. This
* parameter may be NULL.
*
* ib_cache_gid_find_by_filter() searches for the specified GID value
@@ -598,6 +666,7 @@ static struct ib_gid_table *alloc_gid_table(int sz)
{
struct ib_gid_table *table =
kzalloc(sizeof(struct ib_gid_table), GFP_KERNEL);
+ int i;
if (!table)
return NULL;
@@ -611,6 +680,11 @@ static struct ib_gid_table *alloc_gid_table(int sz)
table->sz = sz;
rwlock_init(&table->rwlock);
+ /* Mark all entries as invalid so that allocator can allocate
+ * one of the invalid (free) entry.
+ */
+ for (i = 0; i < sz; i++)
+ table->data_vec[i].props |= GID_TABLE_ENTRY_INVALID;
return table;
err_free_table:
@@ -635,16 +709,15 @@ static void cleanup_gid_table_port(struct ib_device *ib_dev, u8 port,
if (!table)
return;
- write_lock_irq(&table->rwlock);
+ mutex_lock(&table->lock);
for (i = 0; i < table->sz; ++i) {
if (memcmp(&table->data_vec[i].gid, &zgid,
- sizeof(table->data_vec[i].gid)))
- if (!del_gid(ib_dev, port, table, i,
- table->data_vec[i].props &
- GID_ATTR_FIND_MASK_DEFAULT))
- deleted = true;
+ sizeof(table->data_vec[i].gid))) {
+ del_gid(ib_dev, port, table, i);
+ deleted = true;
+ }
}
- write_unlock_irq(&table->rwlock);
+ mutex_unlock(&table->lock);
if (deleted)
dispatch_gid_change_event(ib_dev, port);
@@ -657,9 +730,9 @@ void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
{
union ib_gid gid;
struct ib_gid_attr gid_attr;
- struct ib_gid_attr zattr_type = zattr;
struct ib_gid_table *table;
unsigned int gid_type;
+ unsigned long mask;
table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
@@ -668,60 +741,19 @@ void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
gid_attr.ndev = ndev;
for (gid_type = 0; gid_type < IB_GID_TYPE_SIZE; ++gid_type) {
- int ix;
- union ib_gid current_gid;
- struct ib_gid_attr current_gid_attr = {};
-
if (1UL << gid_type & ~gid_type_mask)
continue;
gid_attr.gid_type = gid_type;
- mutex_lock(&table->lock);
- write_lock_irq(&table->rwlock);
- ix = find_gid(table, NULL, &gid_attr, true,
- GID_ATTR_FIND_MASK_GID_TYPE |
- GID_ATTR_FIND_MASK_DEFAULT,
- NULL);
-
- /* Coudn't find default GID location */
- if (WARN_ON(ix < 0))
- goto release;
-
- zattr_type.gid_type = gid_type;
-
- if (!__ib_cache_gid_get(ib_dev, port, ix,
- &current_gid, &current_gid_attr) &&
- mode == IB_CACHE_GID_DEFAULT_MODE_SET &&
- !memcmp(&gid, &current_gid, sizeof(gid)) &&
- !memcmp(&gid_attr, &current_gid_attr, sizeof(gid_attr)))
- goto release;
-
- if (memcmp(&current_gid, &zgid, sizeof(current_gid)) ||
- memcmp(&current_gid_attr, &zattr_type,
- sizeof(current_gid_attr))) {
- if (del_gid(ib_dev, port, table, ix, true)) {
- pr_warn("ib_cache_gid: can't delete index %d for default gid %pI6\n",
- ix, gid.raw);
- goto release;
- } else {
- dispatch_gid_change_event(ib_dev, port);
- }
- }
-
if (mode == IB_CACHE_GID_DEFAULT_MODE_SET) {
- if (add_gid(ib_dev, port, table, ix, &gid, &gid_attr, true))
- pr_warn("ib_cache_gid: unable to add default gid %pI6\n",
- gid.raw);
- else
- dispatch_gid_change_event(ib_dev, port);
+ mask = GID_ATTR_FIND_MASK_GID_TYPE |
+ GID_ATTR_FIND_MASK_DEFAULT;
+ __ib_cache_gid_add(ib_dev, port, &gid,
+ &gid_attr, mask, true);
+ } else if (mode == IB_CACHE_GID_DEFAULT_MODE_DELETE) {
+ ib_cache_gid_del(ib_dev, port, &gid, &gid_attr);
}
-
-release:
- if (current_gid_attr.ndev)
- dev_put(current_gid_attr.ndev);
- write_unlock_irq(&table->rwlock);
- mutex_unlock(&table->lock);
}
}
@@ -848,6 +880,20 @@ int ib_get_cached_gid(struct ib_device *device,
}
EXPORT_SYMBOL(ib_get_cached_gid);
+/**
+ * ib_find_cached_gid - Returns the port number and GID table index where
+ * a specified GID value occurs.
+ * @device: The device to query.
+ * @gid: The GID value to search for.
+ * @gid_type: The GID type to search for.
+ * @ndev: In RoCE, the net device of the device. NULL means ignore.
+ * @port_num: The port number of the device where the GID value was found.
+ * @index: The index into the cached GID table where the GID was found. This
+ * parameter may be NULL.
+ *
+ * ib_find_cached_gid() searches for the specified GID value in
+ * the local software cache.
+ */
int ib_find_cached_gid(struct ib_device *device,
const union ib_gid *gid,
enum ib_gid_type gid_type,
@@ -868,7 +914,7 @@ int ib_find_gid_by_filter(struct ib_device *device,
void *context, u16 *index)
{
/* Only RoCE GID table supports filter function */
- if (!rdma_cap_roce_gid_table(device, port_num) && filter)
+ if (!rdma_protocol_roce(device, port_num) && filter)
return -EPROTONOSUPPORT;
return ib_cache_gid_find_by_filter(device, gid,
@@ -910,8 +956,7 @@ int ib_get_cached_subnet_prefix(struct ib_device *device,
unsigned long flags;
int p;
- if (port_num < rdma_start_port(device) ||
- port_num > rdma_end_port(device))
+ if (!rdma_is_port_valid(device, port_num))
return -EINVAL;
p = port_num - rdma_start_port(device);
@@ -1021,7 +1066,7 @@ int ib_get_cached_port_state(struct ib_device *device,
unsigned long flags;
int ret = 0;
- if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
+ if (!rdma_is_port_valid(device, port_num))
return -EINVAL;
read_lock_irqsave(&device->cache.lock, flags);
@@ -1033,21 +1078,46 @@ int ib_get_cached_port_state(struct ib_device *device,
}
EXPORT_SYMBOL(ib_get_cached_port_state);
+static int config_non_roce_gid_cache(struct ib_device *device,
+ u8 port, int gid_tbl_len)
+{
+ struct ib_gid_attr gid_attr = {};
+ struct ib_gid_table *table;
+ union ib_gid gid;
+ int ret = 0;
+ int i;
+
+ gid_attr.device = device;
+ gid_attr.port_num = port;
+ table = device->cache.ports[port - rdma_start_port(device)].gid;
+
+ mutex_lock(&table->lock);
+ for (i = 0; i < gid_tbl_len; ++i) {
+ if (!device->query_gid)
+ continue;
+ ret = device->query_gid(device, port, i, &gid);
+ if (ret) {
+ pr_warn("query_gid failed (%d) for %s (index %d)\n",
+ ret, device->name, i);
+ goto err;
+ }
+ gid_attr.index = i;
+ add_modify_gid(table, &gid, &gid_attr);
+ }
+err:
+ mutex_unlock(&table->lock);
+ return ret;
+}
+
static void ib_cache_update(struct ib_device *device,
u8 port,
bool enforce_security)
{
struct ib_port_attr *tprops = NULL;
struct ib_pkey_cache *pkey_cache = NULL, *old_pkey_cache;
- struct ib_gid_cache {
- int table_len;
- union ib_gid table[0];
- } *gid_cache = NULL;
int i;
int ret;
struct ib_gid_table *table;
- bool use_roce_gid_table =
- rdma_cap_roce_gid_table(device, port);
if (!rdma_is_port_valid(device, port))
return;
@@ -1065,6 +1135,13 @@ static void ib_cache_update(struct ib_device *device,
goto err;
}
+ if (!rdma_protocol_roce(device, port)) {
+ ret = config_non_roce_gid_cache(device, port,
+ tprops->gid_tbl_len);
+ if (ret)
+ goto err;
+ }
+
pkey_cache = kmalloc(sizeof *pkey_cache + tprops->pkey_tbl_len *
sizeof *pkey_cache->table, GFP_KERNEL);
if (!pkey_cache)
@@ -1072,15 +1149,6 @@ static void ib_cache_update(struct ib_device *device,
pkey_cache->table_len = tprops->pkey_tbl_len;
- if (!use_roce_gid_table) {
- gid_cache = kmalloc(sizeof(*gid_cache) + tprops->gid_tbl_len *
- sizeof(*gid_cache->table), GFP_KERNEL);
- if (!gid_cache)
- goto err;
-
- gid_cache->table_len = tprops->gid_tbl_len;
- }
-
for (i = 0; i < pkey_cache->table_len; ++i) {
ret = ib_query_pkey(device, port, i, pkey_cache->table + i);
if (ret) {
@@ -1090,33 +1158,12 @@ static void ib_cache_update(struct ib_device *device,
}
}
- if (!use_roce_gid_table) {
- for (i = 0; i < gid_cache->table_len; ++i) {
- ret = ib_query_gid(device, port, i,
- gid_cache->table + i, NULL);
- if (ret) {
- pr_warn("ib_query_gid failed (%d) for %s (index %d)\n",
- ret, device->name, i);
- goto err;
- }
- }
- }
-
write_lock_irq(&device->cache.lock);
old_pkey_cache = device->cache.ports[port -
rdma_start_port(device)].pkey;
device->cache.ports[port - rdma_start_port(device)].pkey = pkey_cache;
- if (!use_roce_gid_table) {
- write_lock(&table->rwlock);
- for (i = 0; i < gid_cache->table_len; i++) {
- modify_gid(device, port, table, i, gid_cache->table + i,
- &zattr, false);
- }
- write_unlock(&table->rwlock);
- }
-
device->cache.ports[port - rdma_start_port(device)].lmc = tprops->lmc;
device->cache.ports[port - rdma_start_port(device)].port_state =
tprops->state;
@@ -1130,14 +1177,12 @@ static void ib_cache_update(struct ib_device *device,
port,
tprops->subnet_prefix);
- kfree(gid_cache);
kfree(old_pkey_cache);
kfree(tprops);
return;
err:
kfree(pkey_cache);
- kfree(gid_cache);
kfree(tprops);
}
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index e674915..a92e1a5 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -462,13 +462,31 @@ static int cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc,
grh, &av->ah_attr);
}
-static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av,
- struct cm_id_private *cm_id_priv)
+static int add_cm_id_to_port_list(struct cm_id_private *cm_id_priv,
+ struct cm_av *av,
+ struct cm_port *port)
+{
+ unsigned long flags;
+ int ret = 0;
+
+ spin_lock_irqsave(&cm.lock, flags);
+
+ if (&cm_id_priv->av == av)
+ list_add_tail(&cm_id_priv->prim_list, &port->cm_priv_prim_list);
+ else if (&cm_id_priv->alt_av == av)
+ list_add_tail(&cm_id_priv->altr_list, &port->cm_priv_altr_list);
+ else
+ ret = -EINVAL;
+
+ spin_unlock_irqrestore(&cm.lock, flags);
+ return ret;
+}
+
+static struct cm_port *get_cm_port_from_path(struct sa_path_rec *path)
{
struct cm_device *cm_dev;
struct cm_port *port = NULL;
unsigned long flags;
- int ret;
u8 p;
struct net_device *ndev = ib_get_ndev_from_path(path);
@@ -477,7 +495,7 @@ static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av,
if (!ib_find_cached_gid(cm_dev->ib_device, &path->sgid,
sa_conv_pathrec_to_gid_type(path),
ndev, &p, NULL)) {
- port = cm_dev->port[p-1];
+ port = cm_dev->port[p - 1];
break;
}
}
@@ -485,9 +503,20 @@ static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av,
if (ndev)
dev_put(ndev);
+ return port;
+}
+static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av,
+ struct cm_id_private *cm_id_priv)
+{
+ struct cm_device *cm_dev;
+ struct cm_port *port;
+ int ret;
+
+ port = get_cm_port_from_path(path);
if (!port)
return -EINVAL;
+ cm_dev = port->cm_dev;
ret = ib_find_cached_pkey(cm_dev->ib_device, port->port_num,
be16_to_cpu(path->pkey), &av->pkey_index);
@@ -502,16 +531,7 @@ static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av,
av->timeout = path->packet_life_time + 1;
- spin_lock_irqsave(&cm.lock, flags);
- if (&cm_id_priv->av == av)
- list_add_tail(&cm_id_priv->prim_list, &port->cm_priv_prim_list);
- else if (&cm_id_priv->alt_av == av)
- list_add_tail(&cm_id_priv->altr_list, &port->cm_priv_altr_list);
- else
- ret = -EINVAL;
-
- spin_unlock_irqrestore(&cm.lock, flags);
-
+ ret = add_cm_id_to_port_list(cm_id_priv, av, port);
return ret;
}
@@ -1523,6 +1543,8 @@ static void cm_format_paths_from_req(struct cm_req_msg *req_msg,
cm_req_get_primary_local_ack_timeout(req_msg);
primary_path->packet_life_time -= (primary_path->packet_life_time > 0);
primary_path->service_id = req_msg->service_id;
+ if (sa_path_is_roce(primary_path))
+ primary_path->roce.route_resolved = false;
if (cm_req_has_alt_path(req_msg)) {
alt_path->dgid = req_msg->alt_local_gid;
@@ -1542,6 +1564,9 @@ static void cm_format_paths_from_req(struct cm_req_msg *req_msg,
cm_req_get_alt_local_ack_timeout(req_msg);
alt_path->packet_life_time -= (alt_path->packet_life_time > 0);
alt_path->service_id = req_msg->service_id;
+
+ if (sa_path_is_roce(alt_path))
+ alt_path->roce.route_resolved = false;
}
cm_format_path_lid_from_req(req_msg, primary_path, alt_path);
}
@@ -3150,6 +3175,13 @@ static int cm_lap_handler(struct cm_work *work)
struct ib_mad_send_buf *msg = NULL;
int ret;
+ /* Currently Alternate path messages are not supported for
+ * RoCE link layer.
+ */
+ if (rdma_protocol_roce(work->port->cm_dev->ib_device,
+ work->port->port_num))
+ return -EINVAL;
+
/* todo: verify LAP request and send reject APR if invalid. */
lap_msg = (struct cm_lap_msg *)work->mad_recv_wc->recv_buf.mad;
cm_id_priv = cm_acquire_id(lap_msg->remote_comm_id,
@@ -3299,6 +3331,13 @@ static int cm_apr_handler(struct cm_work *work)
struct cm_apr_msg *apr_msg;
int ret;
+ /* Currently Alternate path messages are not supported for
+ * RoCE link layer.
+ */
+ if (rdma_protocol_roce(work->port->cm_dev->ib_device,
+ work->port->port_num))
+ return -EINVAL;
+
apr_msg = (struct cm_apr_msg *)work->mad_recv_wc->recv_buf.mad;
cm_id_priv = cm_acquire_id(apr_msg->remote_comm_id,
apr_msg->local_comm_id);
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 6ab1059..51a6410 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -62,6 +62,7 @@
#include <rdma/iw_cm.h>
#include "core_priv.h"
+#include "cma_priv.h"
MODULE_AUTHOR("Sean Hefty");
MODULE_DESCRIPTION("Generic RDMA CM Agent");
@@ -174,7 +175,7 @@ static struct cma_pernet *cma_pernet(struct net *net)
return net_generic(net, cma_pernet_id);
}
-static struct idr *cma_pernet_idr(struct net *net, enum rdma_port_space ps)
+static struct idr *cma_pernet_idr(struct net *net, enum rdma_ucm_port_space ps)
{
struct cma_pernet *pernet = cma_pernet(net);
@@ -203,7 +204,7 @@ struct cma_device {
};
struct rdma_bind_list {
- enum rdma_port_space ps;
+ enum rdma_ucm_port_space ps;
struct hlist_head owners;
unsigned short port;
};
@@ -216,7 +217,7 @@ struct class_port_info_context {
u8 port_num;
};
-static int cma_ps_alloc(struct net *net, enum rdma_port_space ps,
+static int cma_ps_alloc(struct net *net, enum rdma_ucm_port_space ps,
struct rdma_bind_list *bind_list, int snum)
{
struct idr *idr = cma_pernet_idr(net, ps);
@@ -225,14 +226,15 @@ static int cma_ps_alloc(struct net *net, enum rdma_port_space ps,
}
static struct rdma_bind_list *cma_ps_find(struct net *net,
- enum rdma_port_space ps, int snum)
+ enum rdma_ucm_port_space ps, int snum)
{
struct idr *idr = cma_pernet_idr(net, ps);
return idr_find(idr, snum);
}
-static void cma_ps_remove(struct net *net, enum rdma_port_space ps, int snum)
+static void cma_ps_remove(struct net *net, enum rdma_ucm_port_space ps,
+ int snum)
{
struct idr *idr = cma_pernet_idr(net, ps);
@@ -327,46 +329,6 @@ struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev)
* We do this by disabling removal notification while a callback is in process,
* and reporting it after the callback completes.
*/
-struct rdma_id_private {
- struct rdma_cm_id id;
-
- struct rdma_bind_list *bind_list;
- struct hlist_node node;
- struct list_head list; /* listen_any_list or cma_device.list */
- struct list_head listen_list; /* per device listens */
- struct cma_device *cma_dev;
- struct list_head mc_list;
-
- int internal_id;
- enum rdma_cm_state state;
- spinlock_t lock;
- struct mutex qp_mutex;
-
- struct completion comp;
- atomic_t refcount;
- struct mutex handler_mutex;
-
- int backlog;
- int timeout_ms;
- struct ib_sa_query *query;
- int query_id;
- union {
- struct ib_cm_id *ib;
- struct iw_cm_id *iw;
- } cm_id;
-
- u32 seq_num;
- u32 qkey;
- u32 qp_num;
- pid_t owner;
- u32 options;
- u8 srq;
- u8 tos;
- bool tos_set;
- u8 reuseaddr;
- u8 afonly;
- enum ib_gid_type gid_type;
-};
struct cma_multicast {
struct rdma_id_private *id_priv;
@@ -505,6 +467,8 @@ static void _cma_attach_to_dev(struct rdma_id_private *id_priv,
id_priv->id.route.addr.dev_addr.transport =
rdma_node_get_transport(cma_dev->device->node_type);
list_add_tail(&id_priv->list, &cma_dev->id_list);
+ id_priv->res.type = RDMA_RESTRACK_CM_ID;
+ rdma_restrack_add(&id_priv->res);
}
static void cma_attach_to_dev(struct rdma_id_private *id_priv,
@@ -777,10 +741,10 @@ static void cma_deref_id(struct rdma_id_private *id_priv)
complete(&id_priv->comp);
}
-struct rdma_cm_id *rdma_create_id(struct net *net,
- rdma_cm_event_handler event_handler,
- void *context, enum rdma_port_space ps,
- enum ib_qp_type qp_type)
+struct rdma_cm_id *__rdma_create_id(struct net *net,
+ rdma_cm_event_handler event_handler,
+ void *context, enum rdma_ucm_port_space ps,
+ enum ib_qp_type qp_type, const char *caller)
{
struct rdma_id_private *id_priv;
@@ -788,7 +752,10 @@ struct rdma_cm_id *rdma_create_id(struct net *net,
if (!id_priv)
return ERR_PTR(-ENOMEM);
- id_priv->owner = task_pid_nr(current);
+ if (caller)
+ id_priv->res.kern_name = caller;
+ else
+ rdma_restrack_set_task(&id_priv->res, current);
id_priv->state = RDMA_CM_IDLE;
id_priv->id.context = context;
id_priv->id.event_handler = event_handler;
@@ -808,7 +775,7 @@ struct rdma_cm_id *rdma_create_id(struct net *net,
return &id_priv->id;
}
-EXPORT_SYMBOL(rdma_create_id);
+EXPORT_SYMBOL(__rdma_create_id);
static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
{
@@ -1400,7 +1367,7 @@ static struct net_device *cma_get_net_dev(struct ib_cm_event *ib_event,
return net_dev;
}
-static enum rdma_port_space rdma_ps_from_service_id(__be64 service_id)
+static enum rdma_ucm_port_space rdma_ps_from_service_id(__be64 service_id)
{
return (be64_to_cpu(service_id) >> 16) & 0xffff;
}
@@ -1441,21 +1408,12 @@ static bool cma_match_private_data(struct rdma_id_private *id_priv,
return true;
}
-static bool cma_protocol_roce_dev_port(struct ib_device *device, int port_num)
-{
- enum rdma_link_layer ll = rdma_port_get_link_layer(device, port_num);
- enum rdma_transport_type transport =
- rdma_node_get_transport(device->node_type);
-
- return ll == IB_LINK_LAYER_ETHERNET && transport == RDMA_TRANSPORT_IB;
-}
-
static bool cma_protocol_roce(const struct rdma_cm_id *id)
{
struct ib_device *device = id->device;
const int port_num = id->port_num ?: rdma_start_port(device);
- return cma_protocol_roce_dev_port(device, port_num);
+ return rdma_protocol_roce(device, port_num);
}
static bool cma_match_net_dev(const struct rdma_cm_id *id,
@@ -1468,7 +1426,7 @@ static bool cma_match_net_dev(const struct rdma_cm_id *id,
/* This request is an AF_IB request or a RoCE request */
return (!id->port_num || id->port_num == port_num) &&
(addr->src_addr.ss_family == AF_IB ||
- cma_protocol_roce_dev_port(id->device, port_num));
+ rdma_protocol_roce(id->device, port_num));
return !addr->dev_addr.bound_dev_if ||
(net_eq(dev_net(net_dev), addr->dev_addr.net) &&
@@ -1523,7 +1481,7 @@ static struct rdma_id_private *cma_id_from_event(struct ib_cm_id *cm_id,
if (PTR_ERR(*net_dev) == -EAFNOSUPPORT) {
/* Assuming the protocol is AF_IB */
*net_dev = NULL;
- } else if (cma_protocol_roce_dev_port(req.device, req.port)) {
+ } else if (rdma_protocol_roce(req.device, req.port)) {
/* TODO find the net dev matching the request parameters
* through the RoCE GID table */
*net_dev = NULL;
@@ -1668,6 +1626,7 @@ void rdma_destroy_id(struct rdma_cm_id *id)
mutex_unlock(&id_priv->handler_mutex);
if (id_priv->cma_dev) {
+ rdma_restrack_del(&id_priv->res);
if (rdma_cap_ib_cm(id_priv->id.device, 1)) {
if (id_priv->cm_id.ib)
ib_destroy_cm_id(id_priv->cm_id.ib);
@@ -1817,6 +1776,7 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
struct ib_cm_event *ib_event,
struct net_device *net_dev)
{
+ struct rdma_id_private *listen_id_priv;
struct rdma_id_private *id_priv;
struct rdma_cm_id *id;
struct rdma_route *rt;
@@ -1826,9 +1786,11 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
ib_event->param.req_rcvd.primary_path->service_id;
int ret;
- id = rdma_create_id(listen_id->route.addr.dev_addr.net,
+ listen_id_priv = container_of(listen_id, struct rdma_id_private, id);
+ id = __rdma_create_id(listen_id->route.addr.dev_addr.net,
listen_id->event_handler, listen_id->context,
- listen_id->ps, ib_event->param.req_rcvd.qp_type);
+ listen_id->ps, ib_event->param.req_rcvd.qp_type,
+ listen_id_priv->res.kern_name);
if (IS_ERR(id))
return NULL;
@@ -1877,14 +1839,17 @@ static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id,
struct ib_cm_event *ib_event,
struct net_device *net_dev)
{
+ struct rdma_id_private *listen_id_priv;
struct rdma_id_private *id_priv;
struct rdma_cm_id *id;
const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family;
struct net *net = listen_id->route.addr.dev_addr.net;
int ret;
- id = rdma_create_id(net, listen_id->event_handler, listen_id->context,
- listen_id->ps, IB_QPT_UD);
+ listen_id_priv = container_of(listen_id, struct rdma_id_private, id);
+ id = __rdma_create_id(net, listen_id->event_handler, listen_id->context,
+ listen_id->ps, IB_QPT_UD,
+ listen_id_priv->res.kern_name);
if (IS_ERR(id))
return NULL;
@@ -2150,10 +2115,11 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
goto out;
/* Create a new RDMA id for the new IW CM ID */
- new_cm_id = rdma_create_id(listen_id->id.route.addr.dev_addr.net,
- listen_id->id.event_handler,
- listen_id->id.context,
- RDMA_PS_TCP, IB_QPT_RC);
+ new_cm_id = __rdma_create_id(listen_id->id.route.addr.dev_addr.net,
+ listen_id->id.event_handler,
+ listen_id->id.context,
+ RDMA_PS_TCP, IB_QPT_RC,
+ listen_id->res.kern_name);
if (IS_ERR(new_cm_id)) {
ret = -ENOMEM;
goto out;
@@ -2278,8 +2244,8 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv,
if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1))
return;
- id = rdma_create_id(net, cma_listen_handler, id_priv, id_priv->id.ps,
- id_priv->id.qp_type);
+ id = __rdma_create_id(net, cma_listen_handler, id_priv, id_priv->id.ps,
+ id_priv->id.qp_type, id_priv->res.kern_name);
if (IS_ERR(id))
return;
@@ -2541,6 +2507,7 @@ cma_iboe_set_path_rec_l2_fields(struct rdma_id_private *id_priv)
gid_type = ib_network_to_gid_type(addr->dev_addr.network);
route->path_rec->rec_type = sa_conv_gid_to_pathrec_type(gid_type);
+ route->path_rec->roce.route_resolved = true;
sa_path_set_ndev(route->path_rec, addr->dev_addr.net);
sa_path_set_ifindex(route->path_rec, ndev->ifindex);
sa_path_set_dmac(route->path_rec, addr->dev_addr.dst_dev_addr);
@@ -3028,7 +2995,7 @@ static void cma_bind_port(struct rdma_bind_list *bind_list,
hlist_add_head(&id_priv->node, &bind_list->owners);
}
-static int cma_alloc_port(enum rdma_port_space ps,
+static int cma_alloc_port(enum rdma_ucm_port_space ps,
struct rdma_id_private *id_priv, unsigned short snum)
{
struct rdma_bind_list *bind_list;
@@ -3091,7 +3058,7 @@ static int cma_port_is_unique(struct rdma_bind_list *bind_list,
return 0;
}
-static int cma_alloc_any_port(enum rdma_port_space ps,
+static int cma_alloc_any_port(enum rdma_ucm_port_space ps,
struct rdma_id_private *id_priv)
{
static unsigned int last_used_port;
@@ -3169,7 +3136,7 @@ static int cma_check_port(struct rdma_bind_list *bind_list,
return 0;
}
-static int cma_use_port(enum rdma_port_space ps,
+static int cma_use_port(enum rdma_ucm_port_space ps,
struct rdma_id_private *id_priv)
{
struct rdma_bind_list *bind_list;
@@ -3203,8 +3170,8 @@ static int cma_bind_listen(struct rdma_id_private *id_priv)
return ret;
}
-static enum rdma_port_space cma_select_inet_ps(
- struct rdma_id_private *id_priv)
+static enum rdma_ucm_port_space
+cma_select_inet_ps(struct rdma_id_private *id_priv)
{
switch (id_priv->id.ps) {
case RDMA_PS_TCP:
@@ -3218,9 +3185,10 @@ static enum rdma_port_space cma_select_inet_ps(
}
}
-static enum rdma_port_space cma_select_ib_ps(struct rdma_id_private *id_priv)
+static enum rdma_ucm_port_space
+cma_select_ib_ps(struct rdma_id_private *id_priv)
{
- enum rdma_port_space ps = 0;
+ enum rdma_ucm_port_space ps = 0;
struct sockaddr_ib *sib;
u64 sid_ps, mask, sid;
@@ -3251,7 +3219,7 @@ static enum rdma_port_space cma_select_ib_ps(struct rdma_id_private *id_priv)
static int cma_get_port(struct rdma_id_private *id_priv)
{
- enum rdma_port_space ps;
+ enum rdma_ucm_port_space ps;
int ret;
if (cma_family(id_priv) != AF_IB)
@@ -3389,8 +3357,10 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
return 0;
err2:
- if (id_priv->cma_dev)
+ if (id_priv->cma_dev) {
+ rdma_restrack_del(&id_priv->res);
cma_release_dev(id_priv);
+ }
err1:
cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_IDLE);
return ret;
@@ -3773,14 +3743,18 @@ static int cma_send_sidr_rep(struct rdma_id_private *id_priv,
return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep);
}
-int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
+int __rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
+ const char *caller)
{
struct rdma_id_private *id_priv;
int ret;
id_priv = container_of(id, struct rdma_id_private, id);
- id_priv->owner = task_pid_nr(current);
+ if (caller)
+ id_priv->res.kern_name = caller;
+ else
+ rdma_restrack_set_task(&id_priv->res, current);
if (!cma_comp(id_priv, RDMA_CM_CONNECT))
return -EINVAL;
@@ -3820,7 +3794,7 @@ reject:
rdma_reject(id, NULL, 0);
return ret;
}
-EXPORT_SYMBOL(rdma_accept);
+EXPORT_SYMBOL(__rdma_accept);
int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event)
{
@@ -3938,10 +3912,14 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
rdma_start_port(id_priv->cma_dev->device)];
event.event = RDMA_CM_EVENT_MULTICAST_JOIN;
- ib_init_ah_from_mcmember(id_priv->id.device,
- id_priv->id.port_num, &multicast->rec,
- ndev, gid_type,
- &event.param.ud.ah_attr);
+ ret = ib_init_ah_from_mcmember(id_priv->id.device,
+ id_priv->id.port_num,
+ &multicast->rec,
+ ndev, gid_type,
+ &event.param.ud.ah_attr);
+ if (ret)
+ event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
+
event.param.ud.qp_num = 0xFFFFFF;
event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey);
if (ndev)
@@ -4501,7 +4479,7 @@ static int cma_get_id_stats(struct sk_buff *skb, struct netlink_callback *cb)
RDMA_NL_RDMA_CM_ATTR_DST_ADDR))
goto out;
- id_stats->pid = id_priv->owner;
+ id_stats->pid = task_pid_vnr(id_priv->res.task);
id_stats->port_space = id->ps;
id_stats->cm_state = id_priv->state;
id_stats->qp_num = id_priv->qp_num;
diff --git a/drivers/infiniband/core/cma_priv.h b/drivers/infiniband/core/cma_priv.h
new file mode 100644
index 0000000..194cfe7
--- /dev/null
+++ b/drivers/infiniband/core/cma_priv.h
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2005 Voltaire Inc. All rights reserved.
+ * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
+ * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
+ * Copyright (c) 2005-2006 Intel Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _CMA_PRIV_H
+#define _CMA_PRIV_H
+
+enum rdma_cm_state {
+ RDMA_CM_IDLE,
+ RDMA_CM_ADDR_QUERY,
+ RDMA_CM_ADDR_RESOLVED,
+ RDMA_CM_ROUTE_QUERY,
+ RDMA_CM_ROUTE_RESOLVED,
+ RDMA_CM_CONNECT,
+ RDMA_CM_DISCONNECT,
+ RDMA_CM_ADDR_BOUND,
+ RDMA_CM_LISTEN,
+ RDMA_CM_DEVICE_REMOVAL,
+ RDMA_CM_DESTROYING
+};
+
+struct rdma_id_private {
+ struct rdma_cm_id id;
+
+ struct rdma_bind_list *bind_list;
+ struct hlist_node node;
+ struct list_head list; /* listen_any_list or cma_device.list */
+ struct list_head listen_list; /* per device listens */
+ struct cma_device *cma_dev;
+ struct list_head mc_list;
+
+ int internal_id;
+ enum rdma_cm_state state;
+ spinlock_t lock;
+ struct mutex qp_mutex;
+
+ struct completion comp;
+ atomic_t refcount;
+ struct mutex handler_mutex;
+
+ int backlog;
+ int timeout_ms;
+ struct ib_sa_query *query;
+ int query_id;
+ union {
+ struct ib_cm_id *ib;
+ struct iw_cm_id *iw;
+ } cm_id;
+
+ u32 seq_num;
+ u32 qkey;
+ u32 qp_num;
+ u32 options;
+ u8 srq;
+ u8 tos;
+ bool tos_set;
+ u8 reuseaddr;
+ u8 afonly;
+ enum ib_gid_type gid_type;
+
+ /*
+ * Internal to RDMA/core, don't use in the drivers
+ */
+ struct rdma_restrack_entry res;
+};
+#endif /* _CMA_PRIV_H */
diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index 25bb178..54163a6 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -333,4 +333,15 @@ static inline struct ib_qp *_ib_create_qp(struct ib_device *dev,
return qp;
}
+
+struct rdma_dev_addr;
+int rdma_resolve_ip_route(struct sockaddr *src_addr,
+ const struct sockaddr *dst_addr,
+ struct rdma_dev_addr *addr);
+
+int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
+ const union ib_gid *dgid,
+ u8 *dmac, const struct net_device *ndev,
+ int *hoplimit);
+
#endif /* _CORE_PRIV_H */
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index b7459cf..ea9fbcf 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -103,7 +103,6 @@ static int ib_device_check_mandatory(struct ib_device *device)
IB_MANDATORY_FUNC(query_device),
IB_MANDATORY_FUNC(query_port),
IB_MANDATORY_FUNC(query_pkey),
- IB_MANDATORY_FUNC(query_gid),
IB_MANDATORY_FUNC(alloc_pd),
IB_MANDATORY_FUNC(dealloc_pd),
IB_MANDATORY_FUNC(create_ah),
@@ -853,7 +852,7 @@ int ib_query_port(struct ib_device *device,
if (rdma_port_get_link_layer(device, port_num) != IB_LINK_LAYER_INFINIBAND)
return 0;
- err = ib_query_gid(device, port_num, 0, &gid, NULL);
+ err = device->query_gid(device, port_num, 0, &gid);
if (err)
return err;
@@ -871,19 +870,13 @@ EXPORT_SYMBOL(ib_query_port);
* @attr: Returned GID attributes related to this GID index (only in RoCE).
* NULL means ignore.
*
- * ib_query_gid() fetches the specified GID table entry.
+ * ib_query_gid() fetches the specified GID table entry from the cache.
*/
int ib_query_gid(struct ib_device *device,
u8 port_num, int index, union ib_gid *gid,
struct ib_gid_attr *attr)
{
- if (rdma_cap_roce_gid_table(device, port_num))
- return ib_get_cached_gid(device, port_num, index, gid, attr);
-
- if (attr)
- return -EINVAL;
-
- return device->query_gid(device, port_num, index, gid);
+ return ib_get_cached_gid(device, port_num, index, gid, attr);
}
EXPORT_SYMBOL(ib_query_gid);
@@ -1049,19 +1042,18 @@ EXPORT_SYMBOL(ib_modify_port);
* a specified GID value occurs. Its searches only for IB link layer.
* @device: The device to query.
* @gid: The GID value to search for.
- * @ndev: The ndev related to the GID to search for.
* @port_num: The port number of the device where the GID value was found.
* @index: The index into the GID table where the GID was found. This
* parameter may be NULL.
*/
int ib_find_gid(struct ib_device *device, union ib_gid *gid,
- struct net_device *ndev, u8 *port_num, u16 *index)
+ u8 *port_num, u16 *index)
{
union ib_gid tmp_gid;
int ret, port, i;
for (port = rdma_start_port(device); port <= rdma_end_port(device); ++port) {
- if (rdma_cap_roce_gid_table(device, port))
+ if (!rdma_protocol_ib(device, port))
continue;
for (i = 0; i < device->port_immutable[port].gid_tbl_len; ++i) {
diff --git a/drivers/infiniband/core/iwpm_util.c b/drivers/infiniband/core/iwpm_util.c
index 81528f6..9821ae9 100644
--- a/drivers/infiniband/core/iwpm_util.c
+++ b/drivers/infiniband/core/iwpm_util.c
@@ -439,10 +439,9 @@ struct sk_buff *iwpm_create_nlmsg(u32 nl_op, struct nlmsghdr **nlh,
struct sk_buff *skb = NULL;
skb = dev_alloc_skb(IWPM_MSG_SIZE);
- if (!skb) {
- pr_err("%s Unable to allocate skb\n", __func__);
+ if (!skb)
goto create_nlmsg_exit;
- }
+
if (!(ibnl_put_msg(skb, nlh, 0, 0, nl_client, nl_op,
NLM_F_REQUEST))) {
pr_warn("%s: Unable to put the nlmsg header\n", __func__);
diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c
index 45f2f09..4eb72ff 100644
--- a/drivers/infiniband/core/multicast.c
+++ b/drivers/infiniband/core/multicast.c
@@ -724,21 +724,19 @@ int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num,
{
int ret;
u16 gid_index;
- u8 p;
-
- if (rdma_protocol_roce(device, port_num)) {
- ret = ib_find_cached_gid_by_port(device, &rec->port_gid,
- gid_type, port_num,
- ndev,
- &gid_index);
- } else if (rdma_protocol_ib(device, port_num)) {
- ret = ib_find_cached_gid(device, &rec->port_gid,
- IB_GID_TYPE_IB, NULL, &p,
- &gid_index);
- } else {
- ret = -EINVAL;
- }
+ /* GID table is not based on the netdevice for IB link layer,
+ * so ignore ndev during search.
+ */
+ if (rdma_protocol_ib(device, port_num))
+ ndev = NULL;
+ else if (!rdma_protocol_roce(device, port_num))
+ return -EINVAL;
+
+ ret = ib_find_cached_gid_by_port(device, &rec->port_gid,
+ gid_type, port_num,
+ ndev,
+ &gid_index);
if (ret)
return ret;
diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index 5326a68..eb56776 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -34,9 +34,11 @@
#include <linux/pid.h>
#include <linux/pid_namespace.h>
#include <net/netlink.h>
+#include <rdma/rdma_cm.h>
#include <rdma/rdma_netlink.h>
#include "core_priv.h"
+#include "cma_priv.h"
static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
[RDMA_NLDEV_ATTR_DEV_INDEX] = { .type = NLA_U32 },
@@ -71,6 +73,31 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
[RDMA_NLDEV_ATTR_RES_PID] = { .type = NLA_U32 },
[RDMA_NLDEV_ATTR_RES_KERN_NAME] = { .type = NLA_NUL_STRING,
.len = TASK_COMM_LEN },
+ [RDMA_NLDEV_ATTR_RES_CM_ID] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_PS] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_SRC_ADDR] = {
+ .len = sizeof(struct __kernel_sockaddr_storage) },
+ [RDMA_NLDEV_ATTR_RES_DST_ADDR] = {
+ .len = sizeof(struct __kernel_sockaddr_storage) },
+ [RDMA_NLDEV_ATTR_RES_CQ] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_CQ_ENTRY] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_CQE] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_USECNT] = { .type = NLA_U64 },
+ [RDMA_NLDEV_ATTR_RES_POLL_CTX] = { .type = NLA_U8 },
+ [RDMA_NLDEV_ATTR_RES_MR] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_MR_ENTRY] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_RKEY] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_LKEY] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_IOVA] = { .type = NLA_U64 },
+ [RDMA_NLDEV_ATTR_RES_MRLEN] = { .type = NLA_U64 },
+ [RDMA_NLDEV_ATTR_RES_PD] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_PD_ENTRY] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_NDEV_INDEX] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_NDEV_NAME] = { .type = NLA_NUL_STRING,
+ .len = IFNAMSIZ },
};
static int fill_nldev_handle(struct sk_buff *msg, struct ib_device *device)
@@ -99,7 +126,7 @@ static int fill_dev_info(struct sk_buff *msg, struct ib_device *device)
return -EMSGSIZE;
ib_get_device_fw_str(device, fw);
- /* Device without FW has strlen(fw) */
+ /* Device without FW has strlen(fw) = 0 */
if (strlen(fw) && nla_put_string(msg, RDMA_NLDEV_ATTR_FW_VERSION, fw))
return -EMSGSIZE;
@@ -115,8 +142,10 @@ static int fill_dev_info(struct sk_buff *msg, struct ib_device *device)
}
static int fill_port_info(struct sk_buff *msg,
- struct ib_device *device, u32 port)
+ struct ib_device *device, u32 port,
+ const struct net *net)
{
+ struct net_device *netdev = NULL;
struct ib_port_attr attr;
int ret;
@@ -150,7 +179,23 @@ static int fill_port_info(struct sk_buff *msg,
return -EMSGSIZE;
if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_PHYS_STATE, attr.phys_state))
return -EMSGSIZE;
- return 0;
+
+ if (device->get_netdev)
+ netdev = device->get_netdev(device, port);
+
+ if (netdev && net_eq(dev_net(netdev), net)) {
+ ret = nla_put_u32(msg,
+ RDMA_NLDEV_ATTR_NDEV_INDEX, netdev->ifindex);
+ if (ret)
+ goto out;
+ ret = nla_put_string(msg,
+ RDMA_NLDEV_ATTR_NDEV_NAME, netdev->name);
+ }
+
+out:
+ if (netdev)
+ dev_put(netdev);
+ return ret;
}
static int fill_res_info_entry(struct sk_buff *msg,
@@ -182,6 +227,8 @@ static int fill_res_info(struct sk_buff *msg, struct ib_device *device)
[RDMA_RESTRACK_PD] = "pd",
[RDMA_RESTRACK_CQ] = "cq",
[RDMA_RESTRACK_QP] = "qp",
+ [RDMA_RESTRACK_CM_ID] = "cm_id",
+ [RDMA_RESTRACK_MR] = "mr",
};
struct rdma_restrack_root *res = &device->res;
@@ -212,10 +259,29 @@ err:
return ret;
}
-static int fill_res_qp_entry(struct sk_buff *msg,
- struct ib_qp *qp, uint32_t port)
+static int fill_res_name_pid(struct sk_buff *msg,
+ struct rdma_restrack_entry *res)
+{
+ /*
+ * For user resources, user is should read /proc/PID/comm to get the
+ * name of the task file.
+ */
+ if (rdma_is_kernel_res(res)) {
+ if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_KERN_NAME,
+ res->kern_name))
+ return -EMSGSIZE;
+ } else {
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PID,
+ task_pid_vnr(res->task)))
+ return -EMSGSIZE;
+ }
+ return 0;
+}
+
+static int fill_res_qp_entry(struct sk_buff *msg, struct netlink_callback *cb,
+ struct rdma_restrack_entry *res, uint32_t port)
{
- struct rdma_restrack_entry *res = &qp->res;
+ struct ib_qp *qp = container_of(res, struct ib_qp, res);
struct ib_qp_init_attr qp_init_attr;
struct nlattr *entry_attr;
struct ib_qp_attr qp_attr;
@@ -262,19 +328,172 @@ static int fill_res_qp_entry(struct sk_buff *msg,
if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, qp_attr.qp_state))
goto err;
- /*
- * Existence of task means that it is user QP and netlink
- * user is invited to go and read /proc/PID/comm to get name
- * of the task file and res->task_com should be NULL.
- */
- if (rdma_is_kernel_res(res)) {
- if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_KERN_NAME, res->kern_name))
+ if (fill_res_name_pid(msg, res))
+ goto err;
+
+ nla_nest_end(msg, entry_attr);
+ return 0;
+
+err:
+ nla_nest_cancel(msg, entry_attr);
+out:
+ return -EMSGSIZE;
+}
+
+static int fill_res_cm_id_entry(struct sk_buff *msg,
+ struct netlink_callback *cb,
+ struct rdma_restrack_entry *res, uint32_t port)
+{
+ struct rdma_id_private *id_priv =
+ container_of(res, struct rdma_id_private, res);
+ struct rdma_cm_id *cm_id = &id_priv->id;
+ struct nlattr *entry_attr;
+
+ if (port && port != cm_id->port_num)
+ return 0;
+
+ entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY);
+ if (!entry_attr)
+ goto out;
+
+ if (cm_id->port_num &&
+ nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, cm_id->port_num))
+ goto err;
+
+ if (id_priv->qp_num) {
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, id_priv->qp_num))
goto err;
- } else {
- if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PID, task_pid_vnr(res->task)))
+ if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, cm_id->qp_type))
goto err;
}
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PS, cm_id->ps))
+ goto err;
+
+ if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, id_priv->state))
+ goto err;
+
+ if (cm_id->route.addr.src_addr.ss_family &&
+ nla_put(msg, RDMA_NLDEV_ATTR_RES_SRC_ADDR,
+ sizeof(cm_id->route.addr.src_addr),
+ &cm_id->route.addr.src_addr))
+ goto err;
+ if (cm_id->route.addr.dst_addr.ss_family &&
+ nla_put(msg, RDMA_NLDEV_ATTR_RES_DST_ADDR,
+ sizeof(cm_id->route.addr.dst_addr),
+ &cm_id->route.addr.dst_addr))
+ goto err;
+
+ if (fill_res_name_pid(msg, res))
+ goto err;
+
+ nla_nest_end(msg, entry_attr);
+ return 0;
+
+err:
+ nla_nest_cancel(msg, entry_attr);
+out:
+ return -EMSGSIZE;
+}
+
+static int fill_res_cq_entry(struct sk_buff *msg, struct netlink_callback *cb,
+ struct rdma_restrack_entry *res, uint32_t port)
+{
+ struct ib_cq *cq = container_of(res, struct ib_cq, res);
+ struct nlattr *entry_attr;
+
+ entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_CQ_ENTRY);
+ if (!entry_attr)
+ goto out;
+
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQE, cq->cqe))
+ goto err;
+ if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT,
+ atomic_read(&cq->usecnt), 0))
+ goto err;
+
+ /* Poll context is only valid for kernel CQs */
+ if (rdma_is_kernel_res(res) &&
+ nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_POLL_CTX, cq->poll_ctx))
+ goto err;
+
+ if (fill_res_name_pid(msg, res))
+ goto err;
+
+ nla_nest_end(msg, entry_attr);
+ return 0;
+
+err:
+ nla_nest_cancel(msg, entry_attr);
+out:
+ return -EMSGSIZE;
+}
+
+static int fill_res_mr_entry(struct sk_buff *msg, struct netlink_callback *cb,
+ struct rdma_restrack_entry *res, uint32_t port)
+{
+ struct ib_mr *mr = container_of(res, struct ib_mr, res);
+ struct nlattr *entry_attr;
+
+ entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_MR_ENTRY);
+ if (!entry_attr)
+ goto out;
+
+ if (netlink_capable(cb->skb, CAP_NET_ADMIN)) {
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RKEY, mr->rkey))
+ goto err;
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LKEY, mr->lkey))
+ goto err;
+ if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_IOVA,
+ mr->iova, 0))
+ goto err;
+ }
+
+ if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_MRLEN, mr->length, 0))
+ goto err;
+
+ if (fill_res_name_pid(msg, res))
+ goto err;
+
+ nla_nest_end(msg, entry_attr);
+ return 0;
+
+err:
+ nla_nest_cancel(msg, entry_attr);
+out:
+ return -EMSGSIZE;
+}
+
+static int fill_res_pd_entry(struct sk_buff *msg, struct netlink_callback *cb,
+ struct rdma_restrack_entry *res, uint32_t port)
+{
+ struct ib_pd *pd = container_of(res, struct ib_pd, res);
+ struct nlattr *entry_attr;
+
+ entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_PD_ENTRY);
+ if (!entry_attr)
+ goto out;
+
+ if (netlink_capable(cb->skb, CAP_NET_ADMIN)) {
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY,
+ pd->local_dma_lkey))
+ goto err;
+ if ((pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) &&
+ nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY,
+ pd->unsafe_global_rkey))
+ goto err;
+ }
+ if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT,
+ atomic_read(&pd->usecnt), 0))
+ goto err;
+ if ((pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) &&
+ nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY,
+ pd->unsafe_global_rkey))
+ goto err;
+
+ if (fill_res_name_pid(msg, res))
+ goto err;
+
nla_nest_end(msg, entry_attr);
return 0;
@@ -405,7 +624,7 @@ static int nldev_port_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
0, 0);
- err = fill_port_info(msg, device, port);
+ err = fill_port_info(msg, device, port, sock_net(skb->sk));
if (err)
goto err_free;
@@ -465,7 +684,7 @@ static int nldev_port_get_dumpit(struct sk_buff *skb,
RDMA_NLDEV_CMD_PORT_GET),
0, NLM_F_MULTI);
- if (fill_port_info(skb, device, p)) {
+ if (fill_port_info(skb, device, p, sock_net(skb->sk))) {
nlmsg_cancel(skb, nlh);
goto out;
}
@@ -558,23 +777,60 @@ static int nldev_res_get_dumpit(struct sk_buff *skb,
return ib_enum_all_devs(_nldev_res_get_dumpit, skb, cb);
}
-static int nldev_res_get_qp_dumpit(struct sk_buff *skb,
- struct netlink_callback *cb)
+struct nldev_fill_res_entry {
+ int (*fill_res_func)(struct sk_buff *msg, struct netlink_callback *cb,
+ struct rdma_restrack_entry *res, u32 port);
+ enum rdma_nldev_attr nldev_attr;
+ enum rdma_nldev_command nldev_cmd;
+};
+
+static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = {
+ [RDMA_RESTRACK_QP] = {
+ .fill_res_func = fill_res_qp_entry,
+ .nldev_cmd = RDMA_NLDEV_CMD_RES_QP_GET,
+ .nldev_attr = RDMA_NLDEV_ATTR_RES_QP,
+ },
+ [RDMA_RESTRACK_CM_ID] = {
+ .fill_res_func = fill_res_cm_id_entry,
+ .nldev_cmd = RDMA_NLDEV_CMD_RES_CM_ID_GET,
+ .nldev_attr = RDMA_NLDEV_ATTR_RES_CM_ID,
+ },
+ [RDMA_RESTRACK_CQ] = {
+ .fill_res_func = fill_res_cq_entry,
+ .nldev_cmd = RDMA_NLDEV_CMD_RES_CQ_GET,
+ .nldev_attr = RDMA_NLDEV_ATTR_RES_CQ,
+ },
+ [RDMA_RESTRACK_MR] = {
+ .fill_res_func = fill_res_mr_entry,
+ .nldev_cmd = RDMA_NLDEV_CMD_RES_MR_GET,
+ .nldev_attr = RDMA_NLDEV_ATTR_RES_MR,
+ },
+ [RDMA_RESTRACK_PD] = {
+ .fill_res_func = fill_res_pd_entry,
+ .nldev_cmd = RDMA_NLDEV_CMD_RES_PD_GET,
+ .nldev_attr = RDMA_NLDEV_ATTR_RES_PD,
+ },
+};
+
+static int res_get_common_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb,
+ enum rdma_restrack_type res_type)
{
+ const struct nldev_fill_res_entry *fe = &fill_entries[res_type];
struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
struct rdma_restrack_entry *res;
int err, ret = 0, idx = 0;
struct nlattr *table_attr;
struct ib_device *device;
int start = cb->args[0];
- struct ib_qp *qp = NULL;
struct nlmsghdr *nlh;
u32 index, port = 0;
+ bool filled = false;
err = nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
nldev_policy, NULL);
/*
- * Right now, we are expecting the device index to get QP information,
+ * Right now, we are expecting the device index to get res information,
* but it is possible to extend this code to return all devices in
* one shot by checking the existence of RDMA_NLDEV_ATTR_DEV_INDEX.
* if it doesn't exist, we will iterate over all devices.
@@ -601,7 +857,7 @@ static int nldev_res_get_qp_dumpit(struct sk_buff *skb,
}
nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
- RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_QP_GET),
+ RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, fe->nldev_cmd),
0, NLM_F_MULTI);
if (fill_nldev_handle(skb, device)) {
@@ -609,24 +865,26 @@ static int nldev_res_get_qp_dumpit(struct sk_buff *skb,
goto err;
}
- table_attr = nla_nest_start(skb, RDMA_NLDEV_ATTR_RES_QP);
+ table_attr = nla_nest_start(skb, fe->nldev_attr);
if (!table_attr) {
ret = -EMSGSIZE;
goto err;
}
down_read(&device->res.rwsem);
- hash_for_each_possible(device->res.hash, res, node, RDMA_RESTRACK_QP) {
+ hash_for_each_possible(device->res.hash, res, node, res_type) {
if (idx < start)
goto next;
if ((rdma_is_kernel_res(res) &&
task_active_pid_ns(current) != &init_pid_ns) ||
- (!rdma_is_kernel_res(res) &&
- task_active_pid_ns(current) != task_active_pid_ns(res->task)))
+ (!rdma_is_kernel_res(res) && task_active_pid_ns(current) !=
+ task_active_pid_ns(res->task)))
/*
- * 1. Kernel QPs should be visible in init namspace only
- * 2. Present only QPs visible in the current namespace
+ * 1. Kern resources should be visible in init
+ * namspace only
+ * 2. Present only resources visible in the current
+ * namespace
*/
goto next;
@@ -638,10 +896,10 @@ static int nldev_res_get_qp_dumpit(struct sk_buff *skb,
*/
goto next;
- qp = container_of(res, struct ib_qp, res);
+ filled = true;
up_read(&device->res.rwsem);
- ret = fill_res_qp_entry(skb, qp, port);
+ ret = fe->fill_res_func(skb, cb, res, port);
down_read(&device->res.rwsem);
/*
* Return resource back, but it won't be released till
@@ -667,10 +925,10 @@ next: idx++;
cb->args[0] = idx;
/*
- * No more QPs to fill, cancel the message and
+ * No more entries to fill, cancel the message and
* return 0 to mark end of dumpit.
*/
- if (!qp)
+ if (!filled)
goto err;
put_device(&device->dev);
@@ -688,6 +946,36 @@ err_index:
return ret;
}
+static int nldev_res_get_qp_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ return res_get_common_dumpit(skb, cb, RDMA_RESTRACK_QP);
+}
+
+static int nldev_res_get_cm_id_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ return res_get_common_dumpit(skb, cb, RDMA_RESTRACK_CM_ID);
+}
+
+static int nldev_res_get_cq_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ return res_get_common_dumpit(skb, cb, RDMA_RESTRACK_CQ);
+}
+
+static int nldev_res_get_mr_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ return res_get_common_dumpit(skb, cb, RDMA_RESTRACK_MR);
+}
+
+static int nldev_res_get_pd_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ return res_get_common_dumpit(skb, cb, RDMA_RESTRACK_PD);
+}
+
static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
[RDMA_NLDEV_CMD_GET] = {
.doit = nldev_get_doit,
@@ -714,6 +1002,18 @@ static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
* too.
*/
},
+ [RDMA_NLDEV_CMD_RES_CM_ID_GET] = {
+ .dump = nldev_res_get_cm_id_dumpit,
+ },
+ [RDMA_NLDEV_CMD_RES_CQ_GET] = {
+ .dump = nldev_res_get_cq_dumpit,
+ },
+ [RDMA_NLDEV_CMD_RES_MR_GET] = {
+ .dump = nldev_res_get_mr_dumpit,
+ },
+ [RDMA_NLDEV_CMD_RES_PD_GET] = {
+ .dump = nldev_res_get_pd_dumpit,
+ },
};
void __init nldev_init(void)
diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c
index d8eead5..a6e9049 100644
--- a/drivers/infiniband/core/rdma_core.c
+++ b/drivers/infiniband/core/rdma_core.c
@@ -350,13 +350,6 @@ struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_obj_type *type,
return type->type_class->alloc_begin(type, ucontext);
}
-static void uverbs_uobject_add(struct ib_uobject *uobject)
-{
- mutex_lock(&uobject->context->uobjects_lock);
- list_add(&uobject->list, &uobject->context->uobjects);
- mutex_unlock(&uobject->context->uobjects_lock);
-}
-
static int __must_check remove_commit_idr_uobject(struct ib_uobject *uobj,
enum rdma_remove_reason why)
{
@@ -502,7 +495,6 @@ out:
static void alloc_commit_idr_uobject(struct ib_uobject *uobj)
{
- uverbs_uobject_add(uobj);
spin_lock(&uobj->context->ufile->idr_lock);
/*
* We already allocated this IDR with a NULL object, so
@@ -518,7 +510,6 @@ static void alloc_commit_fd_uobject(struct ib_uobject *uobj)
struct ib_uobject_file *uobj_file =
container_of(uobj, struct ib_uobject_file, uobj);
- uverbs_uobject_add(&uobj_file->uobj);
fd_install(uobj_file->uobj.id, uobj->object);
/* This shouldn't be used anymore. Use the file object instead */
uobj_file->uobj.id = 0;
@@ -545,6 +536,10 @@ int rdma_alloc_commit_uobject(struct ib_uobject *uobj)
assert_uverbs_usecnt(uobj, true);
atomic_set(&uobj->usecnt, 0);
+ mutex_lock(&uobj->context->uobjects_lock);
+ list_add(&uobj->list, &uobj->context->uobjects);
+ mutex_unlock(&uobj->context->uobjects_lock);
+
uobj->type->type_class->alloc_commit(uobj);
up_read(&uobj->context->cleanup_rwsem);
diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c
index 3dbc4e4..efddd13 100644
--- a/drivers/infiniband/core/restrack.c
+++ b/drivers/infiniband/core/restrack.c
@@ -3,20 +3,66 @@
* Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved.
*/
+#include <rdma/rdma_cm.h>
#include <rdma/ib_verbs.h>
#include <rdma/restrack.h>
#include <linux/mutex.h>
#include <linux/sched/task.h>
#include <linux/pid_namespace.h>
+#include "cma_priv.h"
+
void rdma_restrack_init(struct rdma_restrack_root *res)
{
init_rwsem(&res->rwsem);
}
+static const char *type2str(enum rdma_restrack_type type)
+{
+ static const char * const names[RDMA_RESTRACK_MAX] = {
+ [RDMA_RESTRACK_PD] = "PD",
+ [RDMA_RESTRACK_CQ] = "CQ",
+ [RDMA_RESTRACK_QP] = "QP",
+ [RDMA_RESTRACK_CM_ID] = "CM_ID",
+ [RDMA_RESTRACK_MR] = "MR",
+ };
+
+ return names[type];
+};
+
void rdma_restrack_clean(struct rdma_restrack_root *res)
{
- WARN_ON_ONCE(!hash_empty(res->hash));
+ struct rdma_restrack_entry *e;
+ char buf[TASK_COMM_LEN];
+ struct ib_device *dev;
+ const char *owner;
+ int bkt;
+
+ if (hash_empty(res->hash))
+ return;
+
+ dev = container_of(res, struct ib_device, res);
+ pr_err("restrack: %s", CUT_HERE);
+ pr_err("restrack: BUG: RESTRACK detected leak of resources on %s\n",
+ dev->name);
+ hash_for_each(res->hash, bkt, e, node) {
+ if (rdma_is_kernel_res(e)) {
+ owner = e->kern_name;
+ } else {
+ /*
+ * There is no need to call get_task_struct here,
+ * because we can be here only if there are more
+ * get_task_struct() call than put_task_struct().
+ */
+ get_task_comm(buf, e->task);
+ owner = buf;
+ }
+
+ pr_err("restrack: %s %s object allocated by %s is not freed\n",
+ rdma_is_kernel_res(e) ? "Kernel" : "User",
+ type2str(e->type), owner);
+ }
+ pr_err("restrack: %s", CUT_HERE);
}
int rdma_restrack_count(struct rdma_restrack_root *res,
@@ -40,51 +86,48 @@ EXPORT_SYMBOL(rdma_restrack_count);
static void set_kern_name(struct rdma_restrack_entry *res)
{
- enum rdma_restrack_type type = res->type;
- struct ib_qp *qp;
-
- if (type != RDMA_RESTRACK_QP)
- /* PD and CQ types already have this name embedded in */
- return;
+ struct ib_pd *pd;
- qp = container_of(res, struct ib_qp, res);
- if (!qp->pd) {
- WARN_ONCE(true, "XRC QPs are not supported\n");
- /* Survive, despite the programmer's error */
- res->kern_name = " ";
- return;
+ switch (res->type) {
+ case RDMA_RESTRACK_QP:
+ pd = container_of(res, struct ib_qp, res)->pd;
+ if (!pd) {
+ WARN_ONCE(true, "XRC QPs are not supported\n");
+ /* Survive, despite the programmer's error */
+ res->kern_name = " ";
+ }
+ break;
+ case RDMA_RESTRACK_MR:
+ pd = container_of(res, struct ib_mr, res)->pd;
+ break;
+ default:
+ /* Other types set kern_name directly */
+ pd = NULL;
+ break;
}
- res->kern_name = qp->pd->res.kern_name;
+ if (pd)
+ res->kern_name = pd->res.kern_name;
}
static struct ib_device *res_to_dev(struct rdma_restrack_entry *res)
{
- enum rdma_restrack_type type = res->type;
- struct ib_device *dev;
- struct ib_pd *pd;
- struct ib_cq *cq;
- struct ib_qp *qp;
-
- switch (type) {
+ switch (res->type) {
case RDMA_RESTRACK_PD:
- pd = container_of(res, struct ib_pd, res);
- dev = pd->device;
- break;
+ return container_of(res, struct ib_pd, res)->device;
case RDMA_RESTRACK_CQ:
- cq = container_of(res, struct ib_cq, res);
- dev = cq->device;
- break;
+ return container_of(res, struct ib_cq, res)->device;
case RDMA_RESTRACK_QP:
- qp = container_of(res, struct ib_qp, res);
- dev = qp->device;
- break;
+ return container_of(res, struct ib_qp, res)->device;
+ case RDMA_RESTRACK_CM_ID:
+ return container_of(res, struct rdma_id_private,
+ res)->id.device;
+ case RDMA_RESTRACK_MR:
+ return container_of(res, struct ib_mr, res)->device;
default:
- WARN_ONCE(true, "Wrong resource tracking type %u\n", type);
+ WARN_ONCE(true, "Wrong resource tracking type %u\n", res->type);
return NULL;
}
-
- return dev;
}
static bool res_is_user(struct rdma_restrack_entry *res)
@@ -96,6 +139,10 @@ static bool res_is_user(struct rdma_restrack_entry *res)
return container_of(res, struct ib_cq, res)->uobject;
case RDMA_RESTRACK_QP:
return container_of(res, struct ib_qp, res)->uobject;
+ case RDMA_RESTRACK_CM_ID:
+ return !res->kern_name;
+ case RDMA_RESTRACK_MR:
+ return container_of(res, struct ib_mr, res)->pd->uobject;
default:
WARN_ONCE(true, "Wrong resource tracking type %u\n", res->type);
return false;
@@ -109,13 +156,15 @@ void rdma_restrack_add(struct rdma_restrack_entry *res)
if (!dev)
return;
+ if (res->type != RDMA_RESTRACK_CM_ID || !res_is_user(res))
+ res->task = NULL;
+
if (res_is_user(res)) {
- get_task_struct(current);
- res->task = current;
+ if (!res->task)
+ rdma_restrack_set_task(res, current);
res->kern_name = NULL;
} else {
set_kern_name(res);
- res->task = NULL;
}
kref_init(&res->kref);
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 9f029a1..a61ec7e 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -1227,118 +1227,130 @@ static u8 get_src_path_mask(struct ib_device *device, u8 port_num)
return src_path_mask;
}
-int ib_init_ah_attr_from_path(struct ib_device *device, u8 port_num,
- struct sa_path_rec *rec,
- struct rdma_ah_attr *ah_attr)
+static int
+roce_resolve_route_from_path(struct ib_device *device, u8 port_num,
+ struct sa_path_rec *rec)
{
+ struct net_device *resolved_dev;
+ struct net_device *ndev;
+ struct net_device *idev;
+ struct rdma_dev_addr dev_addr = {
+ .bound_dev_if = ((sa_path_get_ifindex(rec) >= 0) ?
+ sa_path_get_ifindex(rec) : 0),
+ .net = sa_path_get_ndev(rec) ?
+ sa_path_get_ndev(rec) :
+ &init_net
+ };
+ union {
+ struct sockaddr _sockaddr;
+ struct sockaddr_in _sockaddr_in;
+ struct sockaddr_in6 _sockaddr_in6;
+ } sgid_addr, dgid_addr;
int ret;
- u16 gid_index;
- int use_roce;
- struct net_device *ndev = NULL;
- memset(ah_attr, 0, sizeof *ah_attr);
- ah_attr->type = rdma_ah_find_type(device, port_num);
+ if (rec->roce.route_resolved)
+ return 0;
- rdma_ah_set_dlid(ah_attr, be32_to_cpu(sa_path_get_dlid(rec)));
+ if (!device->get_netdev)
+ return -EOPNOTSUPP;
- if ((ah_attr->type == RDMA_AH_ATTR_TYPE_OPA) &&
- (rdma_ah_get_dlid(ah_attr) == be16_to_cpu(IB_LID_PERMISSIVE)))
- rdma_ah_set_make_grd(ah_attr, true);
+ rdma_gid2ip(&sgid_addr._sockaddr, &rec->sgid);
+ rdma_gid2ip(&dgid_addr._sockaddr, &rec->dgid);
- rdma_ah_set_sl(ah_attr, rec->sl);
- rdma_ah_set_path_bits(ah_attr, be32_to_cpu(sa_path_get_slid(rec)) &
- get_src_path_mask(device, port_num));
- rdma_ah_set_port_num(ah_attr, port_num);
- rdma_ah_set_static_rate(ah_attr, rec->rate);
- use_roce = rdma_cap_eth_ah(device, port_num);
-
- if (use_roce) {
- struct net_device *idev;
- struct net_device *resolved_dev;
- struct rdma_dev_addr dev_addr = {
- .bound_dev_if = ((sa_path_get_ifindex(rec) >= 0) ?
- sa_path_get_ifindex(rec) : 0),
- .net = sa_path_get_ndev(rec) ?
- sa_path_get_ndev(rec) :
- &init_net
- };
- union {
- struct sockaddr _sockaddr;
- struct sockaddr_in _sockaddr_in;
- struct sockaddr_in6 _sockaddr_in6;
- } sgid_addr, dgid_addr;
-
- if (!device->get_netdev)
- return -EOPNOTSUPP;
-
- rdma_gid2ip(&sgid_addr._sockaddr, &rec->sgid);
- rdma_gid2ip(&dgid_addr._sockaddr, &rec->dgid);
-
- /* validate the route */
- ret = rdma_resolve_ip_route(&sgid_addr._sockaddr,
- &dgid_addr._sockaddr, &dev_addr);
- if (ret)
- return ret;
+ /* validate the route */
+ ret = rdma_resolve_ip_route(&sgid_addr._sockaddr,
+ &dgid_addr._sockaddr, &dev_addr);
+ if (ret)
+ return ret;
- if ((dev_addr.network == RDMA_NETWORK_IPV4 ||
- dev_addr.network == RDMA_NETWORK_IPV6) &&
- rec->rec_type != SA_PATH_REC_TYPE_ROCE_V2)
- return -EINVAL;
+ if ((dev_addr.network == RDMA_NETWORK_IPV4 ||
+ dev_addr.network == RDMA_NETWORK_IPV6) &&
+ rec->rec_type != SA_PATH_REC_TYPE_ROCE_V2)
+ return -EINVAL;
- idev = device->get_netdev(device, port_num);
- if (!idev)
- return -ENODEV;
+ idev = device->get_netdev(device, port_num);
+ if (!idev)
+ return -ENODEV;
- resolved_dev = dev_get_by_index(dev_addr.net,
- dev_addr.bound_dev_if);
- if (!resolved_dev) {
- dev_put(idev);
- return -ENODEV;
- }
- ndev = ib_get_ndev_from_path(rec);
- rcu_read_lock();
- if ((ndev && ndev != resolved_dev) ||
- (resolved_dev != idev &&
- !rdma_is_upper_dev_rcu(idev, resolved_dev)))
- ret = -EHOSTUNREACH;
- rcu_read_unlock();
- dev_put(idev);
- dev_put(resolved_dev);
- if (ret) {
- if (ndev)
- dev_put(ndev);
- return ret;
- }
+ resolved_dev = dev_get_by_index(dev_addr.net,
+ dev_addr.bound_dev_if);
+ if (!resolved_dev) {
+ ret = -ENODEV;
+ goto done;
}
+ ndev = ib_get_ndev_from_path(rec);
+ rcu_read_lock();
+ if ((ndev && ndev != resolved_dev) ||
+ (resolved_dev != idev &&
+ !rdma_is_upper_dev_rcu(idev, resolved_dev)))
+ ret = -EHOSTUNREACH;
+ rcu_read_unlock();
+ dev_put(resolved_dev);
+ if (ndev)
+ dev_put(ndev);
+done:
+ dev_put(idev);
+ if (!ret)
+ rec->roce.route_resolved = true;
+ return ret;
+}
- if (rec->hop_limit > 0 || use_roce) {
- enum ib_gid_type type = sa_conv_pathrec_to_gid_type(rec);
+static int init_ah_attr_grh_fields(struct ib_device *device, u8 port_num,
+ struct sa_path_rec *rec,
+ struct rdma_ah_attr *ah_attr)
+{
+ enum ib_gid_type type = sa_conv_pathrec_to_gid_type(rec);
+ struct net_device *ndev;
+ u16 gid_index;
+ int ret;
- ret = ib_find_cached_gid_by_port(device, &rec->sgid, type,
- port_num, ndev, &gid_index);
- if (ret) {
- if (ndev)
- dev_put(ndev);
- return ret;
- }
+ ndev = ib_get_ndev_from_path(rec);
+ ret = ib_find_cached_gid_by_port(device, &rec->sgid, type,
+ port_num, ndev, &gid_index);
+ if (ndev)
+ dev_put(ndev);
+ if (ret)
+ return ret;
- rdma_ah_set_grh(ah_attr, &rec->dgid,
- be32_to_cpu(rec->flow_label),
- gid_index, rec->hop_limit,
- rec->traffic_class);
- if (ndev)
- dev_put(ndev);
- }
+ rdma_ah_set_grh(ah_attr, &rec->dgid,
+ be32_to_cpu(rec->flow_label),
+ gid_index, rec->hop_limit,
+ rec->traffic_class);
+ return 0;
+}
- if (use_roce) {
- u8 *dmac = sa_path_get_dmac(rec);
+int ib_init_ah_attr_from_path(struct ib_device *device, u8 port_num,
+ struct sa_path_rec *rec,
+ struct rdma_ah_attr *ah_attr)
+{
+ int ret = 0;
+
+ memset(ah_attr, 0, sizeof(*ah_attr));
+ ah_attr->type = rdma_ah_find_type(device, port_num);
+ rdma_ah_set_sl(ah_attr, rec->sl);
+ rdma_ah_set_port_num(ah_attr, port_num);
+ rdma_ah_set_static_rate(ah_attr, rec->rate);
- if (!dmac)
- return -EINVAL;
- memcpy(ah_attr->roce.dmac, dmac, ETH_ALEN);
+ if (sa_path_is_roce(rec)) {
+ ret = roce_resolve_route_from_path(device, port_num, rec);
+ if (ret)
+ return ret;
+
+ memcpy(ah_attr->roce.dmac, sa_path_get_dmac(rec), ETH_ALEN);
+ } else {
+ rdma_ah_set_dlid(ah_attr, be32_to_cpu(sa_path_get_dlid(rec)));
+ if (sa_path_is_opa(rec) &&
+ rdma_ah_get_dlid(ah_attr) == be16_to_cpu(IB_LID_PERMISSIVE))
+ rdma_ah_set_make_grd(ah_attr, true);
+
+ rdma_ah_set_path_bits(ah_attr,
+ be32_to_cpu(sa_path_get_slid(rec)) &
+ get_src_path_mask(device, port_num));
}
- return 0;
+ if (rec->hop_limit > 0 || sa_path_is_roce(rec))
+ ret = init_ah_attr_grh_fields(device, port_num, rec, ah_attr);
+ return ret;
}
EXPORT_SYMBOL(ib_init_ah_attr_from_path);
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index 8ae1308e..31c7efa 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -273,6 +273,7 @@ static ssize_t rate_show(struct ib_port *p, struct port_attribute *unused,
break;
case IB_SPEED_SDR:
default: /* default to SDR for invalid rates */
+ speed = " SDR";
rate = 25;
break;
}
@@ -388,14 +389,26 @@ static ssize_t show_port_gid(struct ib_port *p, struct port_attribute *attr,
{
struct port_table_attribute *tab_attr =
container_of(attr, struct port_table_attribute, attr);
+ union ib_gid *pgid;
union ib_gid gid;
ssize_t ret;
ret = ib_query_gid(p->ibdev, p->port_num, tab_attr->index, &gid, NULL);
- if (ret)
- return ret;
- return sprintf(buf, "%pI6\n", gid.raw);
+ /* If reading GID fails, it is likely due to GID entry being empty
+ * (invalid) or reserved GID in the table.
+ * User space expects to read GID table entries as long as it given
+ * index is within GID table size.
+ * Administrative/debugging tool fails to query rest of the GID entries
+ * if it hits error while querying a GID of the given index.
+ * To avoid user space throwing such error on fail to read gid, return
+ * zero GID as before. This maintains backward compatibility.
+ */
+ if (ret)
+ pgid = &zgid;
+ else
+ pgid = &gid;
+ return sprintf(buf, "%pI6\n", pgid->raw);
}
static ssize_t show_port_gid_attr_ndev(struct ib_port *p,
@@ -810,10 +823,15 @@ static ssize_t show_hw_stats(struct kobject *kobj, struct attribute *attr,
dev = port->ibdev;
stats = port->hw_stats;
}
+ mutex_lock(&stats->lock);
ret = update_hw_stats(dev, stats, hsa->port_num, hsa->index);
if (ret)
- return ret;
- return print_hw_stat(stats, hsa->index, buf);
+ goto unlock;
+ ret = print_hw_stat(stats, hsa->index, buf);
+unlock:
+ mutex_unlock(&stats->lock);
+
+ return ret;
}
static ssize_t show_stats_lifespan(struct kobject *kobj,
@@ -821,17 +839,25 @@ static ssize_t show_stats_lifespan(struct kobject *kobj,
char *buf)
{
struct hw_stats_attribute *hsa;
+ struct rdma_hw_stats *stats;
int msecs;
hsa = container_of(attr, struct hw_stats_attribute, attr);
if (!hsa->port_num) {
struct ib_device *dev = container_of((struct device *)kobj,
struct ib_device, dev);
- msecs = jiffies_to_msecs(dev->hw_stats->lifespan);
+
+ stats = dev->hw_stats;
} else {
struct ib_port *p = container_of(kobj, struct ib_port, kobj);
- msecs = jiffies_to_msecs(p->hw_stats->lifespan);
+
+ stats = p->hw_stats;
}
+
+ mutex_lock(&stats->lock);
+ msecs = jiffies_to_msecs(stats->lifespan);
+ mutex_unlock(&stats->lock);
+
return sprintf(buf, "%d\n", msecs);
}
@@ -840,6 +866,7 @@ static ssize_t set_stats_lifespan(struct kobject *kobj,
const char *buf, size_t count)
{
struct hw_stats_attribute *hsa;
+ struct rdma_hw_stats *stats;
int msecs;
int jiffies;
int ret;
@@ -854,11 +881,18 @@ static ssize_t set_stats_lifespan(struct kobject *kobj,
if (!hsa->port_num) {
struct ib_device *dev = container_of((struct device *)kobj,
struct ib_device, dev);
- dev->hw_stats->lifespan = jiffies;
+
+ stats = dev->hw_stats;
} else {
struct ib_port *p = container_of(kobj, struct ib_port, kobj);
- p->hw_stats->lifespan = jiffies;
+
+ stats = p->hw_stats;
}
+
+ mutex_lock(&stats->lock);
+ stats->lifespan = jiffies;
+ mutex_unlock(&stats->lock);
+
return count;
}
@@ -951,6 +985,7 @@ static void setup_hw_stats(struct ib_device *device, struct ib_port *port,
sysfs_attr_init(hsag->attrs[i]);
}
+ mutex_init(&stats->lock);
/* treat an error here as non-fatal */
hsag->attrs[i] = alloc_hsa_lifespan("lifespan", port_num);
if (hsag->attrs[i])
diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c
index 0170226..9eef96d 100644
--- a/drivers/infiniband/core/ucm.c
+++ b/drivers/infiniband/core/ucm.c
@@ -430,7 +430,7 @@ static ssize_t ib_ucm_event(struct ib_ucm_file *file,
uevent->resp.id = ctx->id;
}
- if (copy_to_user((void __user *)(unsigned long)cmd.response,
+ if (copy_to_user(u64_to_user_ptr(cmd.response),
&uevent->resp, sizeof(uevent->resp))) {
result = -EFAULT;
goto done;
@@ -441,7 +441,7 @@ static ssize_t ib_ucm_event(struct ib_ucm_file *file,
result = -ENOMEM;
goto done;
}
- if (copy_to_user((void __user *)(unsigned long)cmd.data,
+ if (copy_to_user(u64_to_user_ptr(cmd.data),
uevent->data, uevent->data_len)) {
result = -EFAULT;
goto done;
@@ -453,7 +453,7 @@ static ssize_t ib_ucm_event(struct ib_ucm_file *file,
result = -ENOMEM;
goto done;
}
- if (copy_to_user((void __user *)(unsigned long)cmd.info,
+ if (copy_to_user(u64_to_user_ptr(cmd.info),
uevent->info, uevent->info_len)) {
result = -EFAULT;
goto done;
@@ -502,7 +502,7 @@ static ssize_t ib_ucm_create_id(struct ib_ucm_file *file,
}
resp.id = ctx->id;
- if (copy_to_user((void __user *)(unsigned long)cmd.response,
+ if (copy_to_user(u64_to_user_ptr(cmd.response),
&resp, sizeof(resp))) {
result = -EFAULT;
goto err2;
@@ -556,7 +556,7 @@ static ssize_t ib_ucm_destroy_id(struct ib_ucm_file *file,
ib_ucm_cleanup_events(ctx);
resp.events_reported = ctx->events_reported;
- if (copy_to_user((void __user *)(unsigned long)cmd.response,
+ if (copy_to_user(u64_to_user_ptr(cmd.response),
&resp, sizeof(resp)))
result = -EFAULT;
@@ -588,7 +588,7 @@ static ssize_t ib_ucm_attr_id(struct ib_ucm_file *file,
resp.local_id = ctx->cm_id->local_id;
resp.remote_id = ctx->cm_id->remote_id;
- if (copy_to_user((void __user *)(unsigned long)cmd.response,
+ if (copy_to_user(u64_to_user_ptr(cmd.response),
&resp, sizeof(resp)))
result = -EFAULT;
@@ -625,7 +625,7 @@ static ssize_t ib_ucm_init_qp_attr(struct ib_ucm_file *file,
ib_copy_qp_attr_to_user(ctx->cm_id->device, &resp, &qp_attr);
- if (copy_to_user((void __user *)(unsigned long)cmd.response,
+ if (copy_to_user(u64_to_user_ptr(cmd.response),
&resp, sizeof(resp)))
result = -EFAULT;
@@ -699,7 +699,7 @@ static int ib_ucm_alloc_data(const void **dest, u64 src, u32 len)
if (!len)
return 0;
- data = memdup_user((void __user *)(unsigned long)src, len);
+ data = memdup_user(u64_to_user_ptr(src), len);
if (IS_ERR(data))
return PTR_ERR(data);
@@ -721,7 +721,7 @@ static int ib_ucm_path_get(struct sa_path_rec **path, u64 src)
if (!sa_path)
return -ENOMEM;
- if (copy_from_user(&upath, (void __user *)(unsigned long)src,
+ if (copy_from_user(&upath, u64_to_user_ptr(src),
sizeof(upath))) {
kfree(sa_path);
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index d933336..7432948 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -382,7 +382,11 @@ static ssize_t ucma_get_event(struct ucma_file *file, const char __user *inbuf,
struct ucma_event *uevent;
int ret = 0;
- if (out_len < sizeof uevent->resp)
+ /*
+ * Old 32 bit user space does not send the 4 byte padding in the
+ * reserved field. We don't care, allow it to keep working.
+ */
+ if (out_len < sizeof(uevent->resp) - sizeof(uevent->resp.reserved))
return -ENOSPC;
if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
@@ -416,8 +420,9 @@ static ssize_t ucma_get_event(struct ucma_file *file, const char __user *inbuf,
uevent->resp.id = ctx->id;
}
- if (copy_to_user((void __user *)(unsigned long)cmd.response,
- &uevent->resp, sizeof uevent->resp)) {
+ if (copy_to_user(u64_to_user_ptr(cmd.response),
+ &uevent->resp,
+ min_t(size_t, out_len, sizeof(uevent->resp)))) {
ret = -EFAULT;
goto done;
}
@@ -477,15 +482,15 @@ static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf,
return -ENOMEM;
ctx->uid = cmd.uid;
- cm_id = rdma_create_id(current->nsproxy->net_ns,
- ucma_event_handler, ctx, cmd.ps, qp_type);
+ cm_id = __rdma_create_id(current->nsproxy->net_ns,
+ ucma_event_handler, ctx, cmd.ps, qp_type, NULL);
if (IS_ERR(cm_id)) {
ret = PTR_ERR(cm_id);
goto err1;
}
resp.id = ctx->id;
- if (copy_to_user((void __user *)(unsigned long)cmd.response,
+ if (copy_to_user(u64_to_user_ptr(cmd.response),
&resp, sizeof(resp))) {
ret = -EFAULT;
goto err2;
@@ -615,7 +620,7 @@ static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf,
}
resp.events_reported = ucma_free_ctx(ctx);
- if (copy_to_user((void __user *)(unsigned long)cmd.response,
+ if (copy_to_user(u64_to_user_ptr(cmd.response),
&resp, sizeof(resp)))
ret = -EFAULT;
@@ -845,7 +850,7 @@ static ssize_t ucma_query_route(struct ucma_file *file,
ucma_copy_iw_route(&resp, &ctx->cm_id->route);
out:
- if (copy_to_user((void __user *)(unsigned long)cmd.response,
+ if (copy_to_user(u64_to_user_ptr(cmd.response),
&resp, sizeof(resp)))
ret = -EFAULT;
@@ -991,7 +996,7 @@ static ssize_t ucma_query(struct ucma_file *file,
if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
return -EFAULT;
- response = (void __user *)(unsigned long) cmd.response;
+ response = u64_to_user_ptr(cmd.response);
ctx = ucma_get_ctx(file, cmd.id);
if (IS_ERR(ctx))
return PTR_ERR(ctx);
@@ -1094,12 +1099,12 @@ static ssize_t ucma_accept(struct ucma_file *file, const char __user *inbuf,
if (cmd.conn_param.valid) {
ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param);
mutex_lock(&file->mut);
- ret = rdma_accept(ctx->cm_id, &conn_param);
+ ret = __rdma_accept(ctx->cm_id, &conn_param, NULL);
if (!ret)
ctx->uid = cmd.uid;
mutex_unlock(&file->mut);
} else
- ret = rdma_accept(ctx->cm_id, NULL);
+ ret = __rdma_accept(ctx->cm_id, NULL, NULL);
ucma_put_ctx(ctx);
return ret;
@@ -1179,7 +1184,7 @@ static ssize_t ucma_init_qp_attr(struct ucma_file *file,
goto out;
ib_copy_qp_attr_to_user(ctx->cm_id->device, &resp, &qp_attr);
- if (copy_to_user((void __user *)(unsigned long)cmd.response,
+ if (copy_to_user(u64_to_user_ptr(cmd.response),
&resp, sizeof(resp)))
ret = -EFAULT;
@@ -1241,6 +1246,9 @@ static int ucma_set_ib_path(struct ucma_context *ctx,
if (!optlen)
return -EINVAL;
+ if (!ctx->cm_id->device)
+ return -EINVAL;
+
memset(&sa_path, 0, sizeof(sa_path));
sa_path.rec_type = SA_PATH_REC_TYPE_IB;
@@ -1315,7 +1323,7 @@ static ssize_t ucma_set_option(struct ucma_file *file, const char __user *inbuf,
if (unlikely(cmd.optlen > KMALLOC_MAX_SIZE))
return -EINVAL;
- optval = memdup_user((void __user *) (unsigned long) cmd.optval,
+ optval = memdup_user(u64_to_user_ptr(cmd.optval),
cmd.optlen);
if (IS_ERR(optval)) {
ret = PTR_ERR(optval);
@@ -1395,7 +1403,7 @@ static ssize_t ucma_process_join(struct ucma_file *file,
goto err2;
resp.id = mc->id;
- if (copy_to_user((void __user *)(unsigned long) cmd->response,
+ if (copy_to_user(u64_to_user_ptr(cmd->response),
&resp, sizeof(resp))) {
ret = -EFAULT;
goto err3;
@@ -1500,7 +1508,7 @@ static ssize_t ucma_leave_multicast(struct ucma_file *file,
resp.events_reported = mc->events_reported;
kfree(mc);
- if (copy_to_user((void __user *)(unsigned long)cmd.response,
+ if (copy_to_user(u64_to_user_ptr(cmd.response),
&resp, sizeof(resp)))
ret = -EFAULT;
out:
@@ -1587,7 +1595,7 @@ static ssize_t ucma_migrate_id(struct ucma_file *new_file,
ucma_unlock_files(cur_file, new_file);
response:
- if (copy_to_user((void __user *)(unsigned long)cmd.response,
+ if (copy_to_user(u64_to_user_ptr(cmd.response),
&resp, sizeof(resp)))
ret = -EFAULT;
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index deccefb..cfb5161 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -46,6 +46,10 @@
#include <rdma/ib_verbs.h>
#include <rdma/ib_umem.h>
#include <rdma/ib_user_verbs.h>
+#include <rdma/uverbs_std_types.h>
+
+#define UVERBS_MODULE_NAME ib_uverbs
+#include <rdma/uverbs_named_ioctl.h>
static inline void
ib_uverbs_init_udata(struct ib_udata *udata,
@@ -199,11 +203,18 @@ struct ib_ucq_object {
u32 async_events_reported;
};
+struct ib_uflow_resources;
+struct ib_uflow_object {
+ struct ib_uobject uobject;
+ struct ib_uflow_resources *resources;
+};
+
extern const struct file_operations uverbs_event_fops;
void ib_uverbs_init_event_queue(struct ib_uverbs_event_queue *ev_queue);
struct file *ib_uverbs_alloc_async_event_file(struct ib_uverbs_file *uverbs_file,
struct ib_device *ib_dev);
void ib_uverbs_free_async_event_file(struct ib_uverbs_file *uverbs_file);
+void ib_uverbs_flow_resources_free(struct ib_uflow_resources *uflow_res);
void ib_uverbs_release_ucq(struct ib_uverbs_file *file,
struct ib_uverbs_completion_event_file *ev_file,
@@ -226,7 +237,13 @@ int uverbs_dealloc_mw(struct ib_mw *mw);
void ib_uverbs_detach_umcast(struct ib_qp *qp,
struct ib_uqp_object *uobj);
+void create_udata(struct uverbs_attr_bundle *ctx, struct ib_udata *udata);
+extern const struct uverbs_attr_def uverbs_uhw_compat_in;
+extern const struct uverbs_attr_def uverbs_uhw_compat_out;
long ib_uverbs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
+int uverbs_destroy_def_handler(struct ib_device *ib_dev,
+ struct ib_uverbs_file *file,
+ struct uverbs_attr_bundle *attrs);
struct ib_uverbs_flow_spec {
union {
@@ -240,13 +257,37 @@ struct ib_uverbs_flow_spec {
};
struct ib_uverbs_flow_spec_eth eth;
struct ib_uverbs_flow_spec_ipv4 ipv4;
+ struct ib_uverbs_flow_spec_esp esp;
struct ib_uverbs_flow_spec_tcp_udp tcp_udp;
struct ib_uverbs_flow_spec_ipv6 ipv6;
struct ib_uverbs_flow_spec_action_tag flow_tag;
struct ib_uverbs_flow_spec_action_drop drop;
+ struct ib_uverbs_flow_spec_action_handle action;
};
};
+int ib_uverbs_kern_spec_to_ib_spec_filter(enum ib_flow_spec_type type,
+ const void *kern_spec_mask,
+ const void *kern_spec_val,
+ size_t kern_filter_sz,
+ union ib_flow_spec *ib_spec);
+
+extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_DEVICE);
+extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_PD);
+extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_MR);
+extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_COMP_CHANNEL);
+extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_CQ);
+extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_QP);
+extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_AH);
+extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_MW);
+extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_SRQ);
+extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_FLOW);
+extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_WQ);
+extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_RWQ_IND_TBL);
+extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_XRCD);
+extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_FLOW_ACTION);
+extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_DM);
+
#define IB_UVERBS_DECLARE_CMD(name) \
ssize_t ib_uverbs_##name(struct ib_uverbs_file *file, \
struct ib_device *ib_dev, \
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index a148de3..13cb5e4 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -50,7 +50,7 @@
static struct ib_uverbs_completion_event_file *
ib_uverbs_lookup_comp_file(int fd, struct ib_ucontext *context)
{
- struct ib_uobject *uobj = uobj_get_read(uobj_get_type(comp_channel),
+ struct ib_uobject *uobj = uobj_get_read(UVERBS_OBJECT_COMP_CHANNEL,
fd, context);
struct ib_uobject_file *uobj_file;
@@ -322,7 +322,7 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
out_len - sizeof(resp));
- uobj = uobj_alloc(uobj_get_type(pd), file->ucontext);
+ uobj = uobj_alloc(UVERBS_OBJECT_PD, file->ucontext);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
@@ -372,7 +372,7 @@ ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- uobj = uobj_get_write(uobj_get_type(pd), cmd.pd_handle,
+ uobj = uobj_get_write(UVERBS_OBJECT_PD, cmd.pd_handle,
file->ucontext);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
@@ -517,7 +517,7 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
}
}
- obj = (struct ib_uxrcd_object *)uobj_alloc(uobj_get_type(xrcd),
+ obj = (struct ib_uxrcd_object *)uobj_alloc(UVERBS_OBJECT_XRCD,
file->ucontext);
if (IS_ERR(obj)) {
ret = PTR_ERR(obj);
@@ -602,7 +602,7 @@ ssize_t ib_uverbs_close_xrcd(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- uobj = uobj_get_write(uobj_get_type(xrcd), cmd.xrcd_handle,
+ uobj = uobj_get_write(UVERBS_OBJECT_XRCD, cmd.xrcd_handle,
file->ucontext);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
@@ -663,11 +663,11 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
if (ret)
return ret;
- uobj = uobj_alloc(uobj_get_type(mr), file->ucontext);
+ uobj = uobj_alloc(UVERBS_OBJECT_MR, file->ucontext);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
- pd = uobj_get_obj_read(pd, cmd.pd_handle, file->ucontext);
+ pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file->ucontext);
if (!pd) {
ret = -EINVAL;
goto err_free;
@@ -693,6 +693,8 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
mr->pd = pd;
mr->uobject = uobj;
atomic_inc(&pd->usecnt);
+ mr->res.type = RDMA_RESTRACK_MR;
+ rdma_restrack_add(&mr->res);
uobj->object = mr;
@@ -756,7 +758,7 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
(cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK)))
return -EINVAL;
- uobj = uobj_get_write(uobj_get_type(mr), cmd.mr_handle,
+ uobj = uobj_get_write(UVERBS_OBJECT_MR, cmd.mr_handle,
file->ucontext);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
@@ -770,7 +772,7 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
}
if (cmd.flags & IB_MR_REREG_PD) {
- pd = uobj_get_obj_read(pd, cmd.pd_handle, file->ucontext);
+ pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file->ucontext);
if (!pd) {
ret = -EINVAL;
goto put_uobjs;
@@ -822,7 +824,7 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- uobj = uobj_get_write(uobj_get_type(mr), cmd.mr_handle,
+ uobj = uobj_get_write(UVERBS_OBJECT_MR, cmd.mr_handle,
file->ucontext);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
@@ -851,11 +853,11 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof(cmd)))
return -EFAULT;
- uobj = uobj_alloc(uobj_get_type(mw), file->ucontext);
+ uobj = uobj_alloc(UVERBS_OBJECT_MW, file->ucontext);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
- pd = uobj_get_obj_read(pd, cmd.pd_handle, file->ucontext);
+ pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file->ucontext);
if (!pd) {
ret = -EINVAL;
goto err_free;
@@ -914,7 +916,7 @@ ssize_t ib_uverbs_dealloc_mw(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof(cmd)))
return -EFAULT;
- uobj = uobj_get_write(uobj_get_type(mw), cmd.mw_handle,
+ uobj = uobj_get_write(UVERBS_OBJECT_MW, cmd.mw_handle,
file->ucontext);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
@@ -939,7 +941,7 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- uobj = uobj_alloc(uobj_get_type(comp_channel), file->ucontext);
+ uobj = uobj_alloc(UVERBS_OBJECT_COMP_CHANNEL, file->ucontext);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
@@ -984,7 +986,7 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
if (cmd->comp_vector >= file->device->num_comp_vectors)
return ERR_PTR(-EINVAL);
- obj = (struct ib_ucq_object *)uobj_alloc(uobj_get_type(cq),
+ obj = (struct ib_ucq_object *)uobj_alloc(UVERBS_OBJECT_CQ,
file->ucontext);
if (IS_ERR(obj))
return obj;
@@ -1173,7 +1175,7 @@ ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file,
in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
out_len - sizeof(resp));
- cq = uobj_get_obj_read(cq, cmd.cq_handle, file->ucontext);
+ cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file->ucontext);
if (!cq)
return -EINVAL;
@@ -1238,7 +1240,7 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- cq = uobj_get_obj_read(cq, cmd.cq_handle, file->ucontext);
+ cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file->ucontext);
if (!cq)
return -EINVAL;
@@ -1285,7 +1287,7 @@ ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- cq = uobj_get_obj_read(cq, cmd.cq_handle, file->ucontext);
+ cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file->ucontext);
if (!cq)
return -EINVAL;
@@ -1312,7 +1314,7 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- uobj = uobj_get_write(uobj_get_type(cq), cmd.cq_handle,
+ uobj = uobj_get_write(UVERBS_OBJECT_CQ, cmd.cq_handle,
file->ucontext);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
@@ -1371,7 +1373,7 @@ static int create_qp(struct ib_uverbs_file *file,
if (cmd->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW))
return -EPERM;
- obj = (struct ib_uqp_object *)uobj_alloc(uobj_get_type(qp),
+ obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP,
file->ucontext);
if (IS_ERR(obj))
return PTR_ERR(obj);
@@ -1382,7 +1384,7 @@ static int create_qp(struct ib_uverbs_file *file,
if (cmd_sz >= offsetof(typeof(*cmd), rwq_ind_tbl_handle) +
sizeof(cmd->rwq_ind_tbl_handle) &&
(cmd->comp_mask & IB_UVERBS_CREATE_QP_MASK_IND_TABLE)) {
- ind_tbl = uobj_get_obj_read(rwq_ind_table,
+ ind_tbl = uobj_get_obj_read(rwq_ind_table, UVERBS_OBJECT_RWQ_IND_TBL,
cmd->rwq_ind_tbl_handle,
file->ucontext);
if (!ind_tbl) {
@@ -1409,7 +1411,7 @@ static int create_qp(struct ib_uverbs_file *file,
has_sq = false;
if (cmd->qp_type == IB_QPT_XRC_TGT) {
- xrcd_uobj = uobj_get_read(uobj_get_type(xrcd), cmd->pd_handle,
+ xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd->pd_handle,
file->ucontext);
if (IS_ERR(xrcd_uobj)) {
@@ -1429,7 +1431,7 @@ static int create_qp(struct ib_uverbs_file *file,
cmd->max_recv_sge = 0;
} else {
if (cmd->is_srq) {
- srq = uobj_get_obj_read(srq, cmd->srq_handle,
+ srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd->srq_handle,
file->ucontext);
if (!srq || srq->srq_type == IB_SRQT_XRC) {
ret = -EINVAL;
@@ -1439,7 +1441,7 @@ static int create_qp(struct ib_uverbs_file *file,
if (!ind_tbl) {
if (cmd->recv_cq_handle != cmd->send_cq_handle) {
- rcq = uobj_get_obj_read(cq, cmd->recv_cq_handle,
+ rcq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd->recv_cq_handle,
file->ucontext);
if (!rcq) {
ret = -EINVAL;
@@ -1450,11 +1452,11 @@ static int create_qp(struct ib_uverbs_file *file,
}
if (has_sq)
- scq = uobj_get_obj_read(cq, cmd->send_cq_handle,
+ scq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd->send_cq_handle,
file->ucontext);
if (!ind_tbl)
rcq = rcq ?: scq;
- pd = uobj_get_obj_read(pd, cmd->pd_handle, file->ucontext);
+ pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle, file->ucontext);
if (!pd || (!scq && has_sq)) {
ret = -EINVAL;
goto err_put;
@@ -1751,12 +1753,12 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file,
in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
out_len - sizeof(resp));
- obj = (struct ib_uqp_object *)uobj_alloc(uobj_get_type(qp),
+ obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP,
file->ucontext);
if (IS_ERR(obj))
return PTR_ERR(obj);
- xrcd_uobj = uobj_get_read(uobj_get_type(xrcd), cmd.pd_handle,
+ xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd.pd_handle,
file->ucontext);
if (IS_ERR(xrcd_uobj)) {
ret = -EINVAL;
@@ -1859,7 +1861,7 @@ ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file,
goto out;
}
- qp = uobj_get_obj_read(qp, cmd.qp_handle, file->ucontext);
+ qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext);
if (!qp) {
ret = -EINVAL;
goto out;
@@ -1964,7 +1966,7 @@ static int modify_qp(struct ib_uverbs_file *file,
if (!attr)
return -ENOMEM;
- qp = uobj_get_obj_read(qp, cmd->base.qp_handle, file->ucontext);
+ qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd->base.qp_handle, file->ucontext);
if (!qp) {
ret = -EINVAL;
goto out;
@@ -1989,6 +1991,13 @@ static int modify_qp(struct ib_uverbs_file *file,
goto release_qp;
}
+ if ((cmd->base.attr_mask & IB_QP_CUR_STATE &&
+ cmd->base.cur_qp_state > IB_QPS_ERR) ||
+ cmd->base.qp_state > IB_QPS_ERR) {
+ ret = -EINVAL;
+ goto release_qp;
+ }
+
attr->qp_state = cmd->base.qp_state;
attr->cur_qp_state = cmd->base.cur_qp_state;
attr->path_mtu = cmd->base.path_mtu;
@@ -2112,7 +2121,7 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
memset(&resp, 0, sizeof resp);
- uobj = uobj_get_write(uobj_get_type(qp), cmd.qp_handle,
+ uobj = uobj_get_write(UVERBS_OBJECT_QP, cmd.qp_handle,
file->ucontext);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
@@ -2178,7 +2187,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
if (!user_wr)
return -ENOMEM;
- qp = uobj_get_obj_read(qp, cmd.qp_handle, file->ucontext);
+ qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext);
if (!qp)
goto out;
@@ -2214,7 +2223,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
goto out_put;
}
- ud->ah = uobj_get_obj_read(ah, user_wr->wr.ud.ah,
+ ud->ah = uobj_get_obj_read(ah, UVERBS_OBJECT_AH, user_wr->wr.ud.ah,
file->ucontext);
if (!ud->ah) {
kfree(ud);
@@ -2449,7 +2458,7 @@ ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file,
if (IS_ERR(wr))
return PTR_ERR(wr);
- qp = uobj_get_obj_read(qp, cmd.qp_handle, file->ucontext);
+ qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext);
if (!qp)
goto out;
@@ -2498,7 +2507,7 @@ ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file,
if (IS_ERR(wr))
return PTR_ERR(wr);
- srq = uobj_get_obj_read(srq, cmd.srq_handle, file->ucontext);
+ srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file->ucontext);
if (!srq)
goto out;
@@ -2555,11 +2564,11 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
out_len - sizeof(resp));
- uobj = uobj_alloc(uobj_get_type(ah), file->ucontext);
+ uobj = uobj_alloc(UVERBS_OBJECT_AH, file->ucontext);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
- pd = uobj_get_obj_read(pd, cmd.pd_handle, file->ucontext);
+ pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file->ucontext);
if (!pd) {
ret = -EINVAL;
goto err;
@@ -2627,7 +2636,7 @@ ssize_t ib_uverbs_destroy_ah(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- uobj = uobj_get_write(uobj_get_type(ah), cmd.ah_handle,
+ uobj = uobj_get_write(UVERBS_OBJECT_AH, cmd.ah_handle,
file->ucontext);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
@@ -2650,7 +2659,7 @@ ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- qp = uobj_get_obj_read(qp, cmd.qp_handle, file->ucontext);
+ qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext);
if (!qp)
return -EINVAL;
@@ -2701,7 +2710,7 @@ ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- qp = uobj_get_obj_read(qp, cmd.qp_handle, file->ucontext);
+ qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext);
if (!qp)
return -EINVAL;
@@ -2730,8 +2739,52 @@ out_put:
return ret ? ret : in_len;
}
-static int kern_spec_to_ib_spec_action(struct ib_uverbs_flow_spec *kern_spec,
- union ib_flow_spec *ib_spec)
+struct ib_uflow_resources {
+ size_t max;
+ size_t num;
+ struct ib_flow_action *collection[0];
+};
+
+static struct ib_uflow_resources *flow_resources_alloc(size_t num_specs)
+{
+ struct ib_uflow_resources *resources;
+
+ resources =
+ kmalloc(sizeof(*resources) +
+ num_specs * sizeof(*resources->collection), GFP_KERNEL);
+
+ if (!resources)
+ return NULL;
+
+ resources->num = 0;
+ resources->max = num_specs;
+
+ return resources;
+}
+
+void ib_uverbs_flow_resources_free(struct ib_uflow_resources *uflow_res)
+{
+ unsigned int i;
+
+ for (i = 0; i < uflow_res->num; i++)
+ atomic_dec(&uflow_res->collection[i]->usecnt);
+
+ kfree(uflow_res);
+}
+
+static void flow_resources_add(struct ib_uflow_resources *uflow_res,
+ struct ib_flow_action *action)
+{
+ WARN_ON(uflow_res->num >= uflow_res->max);
+
+ atomic_inc(&action->usecnt);
+ uflow_res->collection[uflow_res->num++] = action;
+}
+
+static int kern_spec_to_ib_spec_action(struct ib_ucontext *ucontext,
+ struct ib_uverbs_flow_spec *kern_spec,
+ union ib_flow_spec *ib_spec,
+ struct ib_uflow_resources *uflow_res)
{
ib_spec->type = kern_spec->type;
switch (ib_spec->type) {
@@ -2750,19 +2803,34 @@ static int kern_spec_to_ib_spec_action(struct ib_uverbs_flow_spec *kern_spec,
ib_spec->drop.size = sizeof(struct ib_flow_spec_action_drop);
break;
+ case IB_FLOW_SPEC_ACTION_HANDLE:
+ if (kern_spec->action.size !=
+ sizeof(struct ib_uverbs_flow_spec_action_handle))
+ return -EOPNOTSUPP;
+ ib_spec->action.act = uobj_get_obj_read(flow_action,
+ UVERBS_OBJECT_FLOW_ACTION,
+ kern_spec->action.handle,
+ ucontext);
+ if (!ib_spec->action.act)
+ return -EINVAL;
+ ib_spec->action.size =
+ sizeof(struct ib_flow_spec_action_handle);
+ flow_resources_add(uflow_res, ib_spec->action.act);
+ uobj_put_obj_read(ib_spec->action.act);
+ break;
default:
return -EINVAL;
}
return 0;
}
-static size_t kern_spec_filter_sz(struct ib_uverbs_flow_spec_hdr *spec)
+static size_t kern_spec_filter_sz(const struct ib_uverbs_flow_spec_hdr *spec)
{
/* Returns user space filter size, includes padding */
return (spec->size - sizeof(struct ib_uverbs_flow_spec_hdr)) / 2;
}
-static ssize_t spec_filter_size(void *kern_spec_filter, u16 kern_filter_size,
+static ssize_t spec_filter_size(const void *kern_spec_filter, u16 kern_filter_size,
u16 ib_real_filter_sz)
{
/*
@@ -2780,28 +2848,21 @@ static ssize_t spec_filter_size(void *kern_spec_filter, u16 kern_filter_size,
return kern_filter_size;
}
-static int kern_spec_to_ib_spec_filter(struct ib_uverbs_flow_spec *kern_spec,
- union ib_flow_spec *ib_spec)
+int ib_uverbs_kern_spec_to_ib_spec_filter(enum ib_flow_spec_type type,
+ const void *kern_spec_mask,
+ const void *kern_spec_val,
+ size_t kern_filter_sz,
+ union ib_flow_spec *ib_spec)
{
ssize_t actual_filter_sz;
- ssize_t kern_filter_sz;
ssize_t ib_filter_sz;
- void *kern_spec_mask;
- void *kern_spec_val;
- if (kern_spec->reserved)
- return -EINVAL;
-
- ib_spec->type = kern_spec->type;
-
- kern_filter_sz = kern_spec_filter_sz(&kern_spec->hdr);
/* User flow spec size must be aligned to 4 bytes */
if (kern_filter_sz != ALIGN(kern_filter_sz, 4))
return -EINVAL;
- kern_spec_val = (void *)kern_spec +
- sizeof(struct ib_uverbs_flow_spec_hdr);
- kern_spec_mask = kern_spec_val + kern_filter_sz;
+ ib_spec->type = type;
+
if (ib_spec->type == (IB_FLOW_SPEC_INNER | IB_FLOW_SPEC_VXLAN_TUNNEL))
return -EINVAL;
@@ -2870,20 +2931,56 @@ static int kern_spec_to_ib_spec_filter(struct ib_uverbs_flow_spec *kern_spec,
(ntohl(ib_spec->tunnel.val.tunnel_id)) >= BIT(24))
return -EINVAL;
break;
+ case IB_FLOW_SPEC_ESP:
+ ib_filter_sz = offsetof(struct ib_flow_esp_filter, real_sz);
+ actual_filter_sz = spec_filter_size(kern_spec_mask,
+ kern_filter_sz,
+ ib_filter_sz);
+ if (actual_filter_sz <= 0)
+ return -EINVAL;
+ ib_spec->esp.size = sizeof(struct ib_flow_spec_esp);
+ memcpy(&ib_spec->esp.val, kern_spec_val, actual_filter_sz);
+ memcpy(&ib_spec->esp.mask, kern_spec_mask, actual_filter_sz);
+ break;
default:
return -EINVAL;
}
return 0;
}
-static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec,
- union ib_flow_spec *ib_spec)
+static int kern_spec_to_ib_spec_filter(struct ib_uverbs_flow_spec *kern_spec,
+ union ib_flow_spec *ib_spec)
+{
+ ssize_t kern_filter_sz;
+ void *kern_spec_mask;
+ void *kern_spec_val;
+
+ if (kern_spec->reserved)
+ return -EINVAL;
+
+ kern_filter_sz = kern_spec_filter_sz(&kern_spec->hdr);
+
+ kern_spec_val = (void *)kern_spec +
+ sizeof(struct ib_uverbs_flow_spec_hdr);
+ kern_spec_mask = kern_spec_val + kern_filter_sz;
+
+ return ib_uverbs_kern_spec_to_ib_spec_filter(kern_spec->type,
+ kern_spec_mask,
+ kern_spec_val,
+ kern_filter_sz, ib_spec);
+}
+
+static int kern_spec_to_ib_spec(struct ib_ucontext *ucontext,
+ struct ib_uverbs_flow_spec *kern_spec,
+ union ib_flow_spec *ib_spec,
+ struct ib_uflow_resources *uflow_res)
{
if (kern_spec->reserved)
return -EINVAL;
if (kern_spec->type >= IB_FLOW_SPEC_ACTION_TAG)
- return kern_spec_to_ib_spec_action(kern_spec, ib_spec);
+ return kern_spec_to_ib_spec_action(ucontext, kern_spec, ib_spec,
+ uflow_res);
else
return kern_spec_to_ib_spec_filter(kern_spec, ib_spec);
}
@@ -2925,18 +3022,18 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
if (cmd.comp_mask)
return -EOPNOTSUPP;
- obj = (struct ib_uwq_object *)uobj_alloc(uobj_get_type(wq),
+ obj = (struct ib_uwq_object *)uobj_alloc(UVERBS_OBJECT_WQ,
file->ucontext);
if (IS_ERR(obj))
return PTR_ERR(obj);
- pd = uobj_get_obj_read(pd, cmd.pd_handle, file->ucontext);
+ pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file->ucontext);
if (!pd) {
err = -EINVAL;
goto err_uobj;
}
- cq = uobj_get_obj_read(cq, cmd.cq_handle, file->ucontext);
+ cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file->ucontext);
if (!cq) {
err = -EINVAL;
goto err_put_pd;
@@ -3040,7 +3137,7 @@ int ib_uverbs_ex_destroy_wq(struct ib_uverbs_file *file,
return -EOPNOTSUPP;
resp.response_length = required_resp_len;
- uobj = uobj_get_write(uobj_get_type(wq), cmd.wq_handle,
+ uobj = uobj_get_write(UVERBS_OBJECT_WQ, cmd.wq_handle,
file->ucontext);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
@@ -3091,7 +3188,7 @@ int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file,
if (cmd.attr_mask > (IB_WQ_STATE | IB_WQ_CUR_STATE | IB_WQ_FLAGS))
return -EINVAL;
- wq = uobj_get_obj_read(wq, cmd.wq_handle, file->ucontext);
+ wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ, cmd.wq_handle, file->ucontext);
if (!wq)
return -EINVAL;
@@ -3185,7 +3282,7 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
for (num_read_wqs = 0; num_read_wqs < num_wq_handles;
num_read_wqs++) {
- wq = uobj_get_obj_read(wq, wqs_handles[num_read_wqs],
+ wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ, wqs_handles[num_read_wqs],
file->ucontext);
if (!wq) {
err = -EINVAL;
@@ -3195,7 +3292,7 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
wqs[num_read_wqs] = wq;
}
- uobj = uobj_alloc(uobj_get_type(rwq_ind_table), file->ucontext);
+ uobj = uobj_alloc(UVERBS_OBJECT_RWQ_IND_TBL, file->ucontext);
if (IS_ERR(uobj)) {
err = PTR_ERR(uobj);
goto put_wqs;
@@ -3282,7 +3379,7 @@ int ib_uverbs_ex_destroy_rwq_ind_table(struct ib_uverbs_file *file,
if (cmd.comp_mask)
return -EOPNOTSUPP;
- uobj = uobj_get_write(uobj_get_type(rwq_ind_table), cmd.ind_tbl_handle,
+ uobj = uobj_get_write(UVERBS_OBJECT_RWQ_IND_TBL, cmd.ind_tbl_handle,
file->ucontext);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
@@ -3298,10 +3395,12 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
struct ib_uverbs_create_flow cmd;
struct ib_uverbs_create_flow_resp resp;
struct ib_uobject *uobj;
+ struct ib_uflow_object *uflow;
struct ib_flow *flow_id;
struct ib_uverbs_flow_attr *kern_flow_attr;
struct ib_flow_attr *flow_attr;
struct ib_qp *qp;
+ struct ib_uflow_resources *uflow_res;
int err = 0;
void *kern_spec;
void *ib_spec;
@@ -3361,13 +3460,13 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
kern_flow_attr = &cmd.flow_attr;
}
- uobj = uobj_alloc(uobj_get_type(flow), file->ucontext);
+ uobj = uobj_alloc(UVERBS_OBJECT_FLOW, file->ucontext);
if (IS_ERR(uobj)) {
err = PTR_ERR(uobj);
goto err_free_attr;
}
- qp = uobj_get_obj_read(qp, cmd.qp_handle, file->ucontext);
+ qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext);
if (!qp) {
err = -EINVAL;
goto err_uobj;
@@ -3379,6 +3478,11 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
err = -ENOMEM;
goto err_put;
}
+ uflow_res = flow_resources_alloc(cmd.flow_attr.num_of_specs);
+ if (!uflow_res) {
+ err = -ENOMEM;
+ goto err_free_flow_attr;
+ }
flow_attr->type = kern_flow_attr->type;
flow_attr->priority = kern_flow_attr->priority;
@@ -3393,7 +3497,8 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
cmd.flow_attr.size > offsetof(struct ib_uverbs_flow_spec, reserved) &&
cmd.flow_attr.size >=
((struct ib_uverbs_flow_spec *)kern_spec)->size; i++) {
- err = kern_spec_to_ib_spec(kern_spec, ib_spec);
+ err = kern_spec_to_ib_spec(file->ucontext, kern_spec, ib_spec,
+ uflow_res);
if (err)
goto err_free;
flow_attr->size +=
@@ -3415,6 +3520,8 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
}
flow_id->uobject = uobj;
uobj->object = flow_id;
+ uflow = container_of(uobj, typeof(*uflow), uobject);
+ uflow->resources = uflow_res;
memset(&resp, 0, sizeof(resp));
resp.flow_handle = uobj->id;
@@ -3433,6 +3540,8 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
err_copy:
ib_destroy_flow(flow_id);
err_free:
+ ib_uverbs_flow_resources_free(uflow_res);
+err_free_flow_attr:
kfree(flow_attr);
err_put:
uobj_put_obj_read(qp);
@@ -3463,7 +3572,7 @@ int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file,
if (cmd.comp_mask)
return -EINVAL;
- uobj = uobj_get_write(uobj_get_type(flow), cmd.flow_handle,
+ uobj = uobj_get_write(UVERBS_OBJECT_FLOW, cmd.flow_handle,
file->ucontext);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
@@ -3485,7 +3594,7 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
struct ib_srq_init_attr attr;
int ret;
- obj = (struct ib_usrq_object *)uobj_alloc(uobj_get_type(srq),
+ obj = (struct ib_usrq_object *)uobj_alloc(UVERBS_OBJECT_SRQ,
file->ucontext);
if (IS_ERR(obj))
return PTR_ERR(obj);
@@ -3494,7 +3603,7 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
attr.ext.tag_matching.max_num_tags = cmd->max_num_tags;
if (cmd->srq_type == IB_SRQT_XRC) {
- xrcd_uobj = uobj_get_read(uobj_get_type(xrcd), cmd->xrcd_handle,
+ xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd->xrcd_handle,
file->ucontext);
if (IS_ERR(xrcd_uobj)) {
ret = -EINVAL;
@@ -3512,7 +3621,7 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
}
if (ib_srq_has_cq(cmd->srq_type)) {
- attr.ext.cq = uobj_get_obj_read(cq, cmd->cq_handle,
+ attr.ext.cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd->cq_handle,
file->ucontext);
if (!attr.ext.cq) {
ret = -EINVAL;
@@ -3520,7 +3629,7 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
}
}
- pd = uobj_get_obj_read(pd, cmd->pd_handle, file->ucontext);
+ pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle, file->ucontext);
if (!pd) {
ret = -EINVAL;
goto err_put_cq;
@@ -3572,7 +3681,7 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
if (cmd->srq_type == IB_SRQT_XRC)
resp.srqn = srq->ext.xrc.srq_num;
- if (copy_to_user((void __user *) (unsigned long) cmd->response,
+ if (copy_to_user(u64_to_user_ptr(cmd->response),
&resp, sizeof resp)) {
ret = -EFAULT;
goto err_copy;
@@ -3692,7 +3801,7 @@ ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file,
ib_uverbs_init_udata(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd,
out_len);
- srq = uobj_get_obj_read(srq, cmd.srq_handle, file->ucontext);
+ srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file->ucontext);
if (!srq)
return -EINVAL;
@@ -3723,7 +3832,7 @@ ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- srq = uobj_get_obj_read(srq, cmd.srq_handle, file->ucontext);
+ srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file->ucontext);
if (!srq)
return -EINVAL;
@@ -3760,7 +3869,7 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- uobj = uobj_get_write(uobj_get_type(srq), cmd.srq_handle,
+ uobj = uobj_get_write(UVERBS_OBJECT_SRQ, cmd.srq_handle,
file->ucontext);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
@@ -3897,6 +4006,12 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
resp.cq_moderation_caps.max_cq_moderation_period =
attr.cq_caps.max_cq_moderation_period;
resp.response_length += sizeof(resp.cq_moderation_caps);
+
+ if (ucore->outlen < resp.response_length + sizeof(resp.max_dm_size))
+ goto end;
+
+ resp.max_dm_size = attr.max_dm_size;
+ resp.response_length += sizeof(resp.max_dm_size);
end:
err = ib_copy_to_udata(ucore, &resp, resp.response_length);
return err;
@@ -3933,7 +4048,7 @@ int ib_uverbs_ex_modify_cq(struct ib_uverbs_file *file,
if (cmd.attr_mask > IB_CQ_MODERATE)
return -EOPNOTSUPP;
- cq = uobj_get_obj_read(cq, cmd.cq_handle, file->ucontext);
+ cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file->ucontext);
if (!cq)
return -EINVAL;
diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c
index 339b851..8c93970 100644
--- a/drivers/infiniband/core/uverbs_ioctl.c
+++ b/drivers/infiniband/core/uverbs_ioctl.c
@@ -35,6 +35,17 @@
#include "rdma_core.h"
#include "uverbs.h"
+static bool uverbs_is_attr_cleared(const struct ib_uverbs_attr *uattr,
+ u16 len)
+{
+ if (uattr->len > sizeof(((struct ib_uverbs_attr *)0)->data))
+ return ib_is_buffer_cleared(u64_to_user_ptr(uattr->data) + len,
+ uattr->len - len);
+
+ return !memchr_inv((const void *)&uattr->data + len,
+ 0, uattr->len - len);
+}
+
static int uverbs_process_attr(struct ib_device *ibdev,
struct ib_ucontext *ucontext,
const struct ib_uverbs_attr *uattr,
@@ -44,14 +55,12 @@ static int uverbs_process_attr(struct ib_device *ibdev,
struct ib_uverbs_attr __user *uattr_ptr)
{
const struct uverbs_attr_spec *spec;
+ const struct uverbs_attr_spec *val_spec;
struct uverbs_attr *e;
const struct uverbs_object_spec *object;
struct uverbs_obj_attr *o_attr;
struct uverbs_attr *elements = attr_bundle_h->attrs;
- if (uattr->reserved)
- return -EINVAL;
-
if (attr_id >= attr_spec_bucket->num_attrs) {
if (uattr->flags & UVERBS_ATTR_F_MANDATORY)
return -EINVAL;
@@ -63,15 +72,46 @@ static int uverbs_process_attr(struct ib_device *ibdev,
return -EINVAL;
spec = &attr_spec_bucket->attrs[attr_id];
+ val_spec = spec;
e = &elements[attr_id];
e->uattr = uattr_ptr;
switch (spec->type) {
+ case UVERBS_ATTR_TYPE_ENUM_IN:
+ if (uattr->attr_data.enum_data.elem_id >= spec->enum_def.num_elems)
+ return -EOPNOTSUPP;
+
+ if (uattr->attr_data.enum_data.reserved)
+ return -EINVAL;
+
+ val_spec = &spec->enum_def.ids[uattr->attr_data.enum_data.elem_id];
+
+ /* Currently we only support PTR_IN based enums */
+ if (val_spec->type != UVERBS_ATTR_TYPE_PTR_IN)
+ return -EOPNOTSUPP;
+
+ e->ptr_attr.enum_id = uattr->attr_data.enum_data.elem_id;
+ /* fall through */
case UVERBS_ATTR_TYPE_PTR_IN:
+ /* Ensure that any data provided by userspace beyond the known
+ * struct is zero. Userspace that knows how to use some future
+ * longer struct will fail here if used with an old kernel and
+ * non-zero content, making ABI compat/discovery simpler.
+ */
+ if (uattr->len > val_spec->ptr.len &&
+ val_spec->flags & UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO &&
+ !uverbs_is_attr_cleared(uattr, val_spec->ptr.len))
+ return -EOPNOTSUPP;
+
+ /* fall through */
case UVERBS_ATTR_TYPE_PTR_OUT:
- if (uattr->len < spec->len ||
- (!(spec->flags & UVERBS_ATTR_SPEC_F_MIN_SZ) &&
- uattr->len > spec->len))
+ if (uattr->len < val_spec->ptr.min_len ||
+ (!(val_spec->flags & UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO) &&
+ uattr->len > val_spec->ptr.len))
+ return -EINVAL;
+
+ if (spec->type != UVERBS_ATTR_TYPE_ENUM_IN &&
+ uattr->attr_data.reserved)
return -EINVAL;
e->ptr_attr.data = uattr->data;
@@ -84,6 +124,9 @@ static int uverbs_process_attr(struct ib_device *ibdev,
return -EINVAL;
/* fall through */
case UVERBS_ATTR_TYPE_FD:
+ if (uattr->attr_data.reserved)
+ return -EINVAL;
+
if (uattr->len != 0 || !ucontext || uattr->data > INT_MAX)
return -EINVAL;
@@ -246,6 +289,9 @@ static long ib_uverbs_cmd_verbs(struct ib_device *ib_dev,
size_t ctx_size;
uintptr_t data[UVERBS_OPTIMIZE_USING_STACK_SZ / sizeof(uintptr_t)];
+ if (hdr->driver_id != ib_dev->driver_id)
+ return -EINVAL;
+
object_spec = uverbs_get_object(ib_dev, hdr->object_id);
if (!object_spec)
return -EPROTONOSUPPORT;
@@ -350,7 +396,7 @@ long ib_uverbs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
goto out;
}
- if (hdr.reserved) {
+ if (hdr.reserved1 || hdr.reserved2) {
err = -EPROTONOSUPPORT;
goto out;
}
diff --git a/drivers/infiniband/core/uverbs_ioctl_merge.c b/drivers/infiniband/core/uverbs_ioctl_merge.c
index 62e1eb1..0f88a19 100644
--- a/drivers/infiniband/core/uverbs_ioctl_merge.c
+++ b/drivers/infiniband/core/uverbs_ioctl_merge.c
@@ -379,7 +379,7 @@ static struct uverbs_method_spec *build_method_with_attrs(const struct uverbs_me
"ib_uverbs: Tried to merge attr (%d) but it's an object with new/destroy access but isn't mandatory\n",
min_id) ||
WARN(IS_ATTR_OBJECT(attr) &&
- attr->flags & UVERBS_ATTR_SPEC_F_MIN_SZ,
+ attr->flags & UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO,
"ib_uverbs: Tried to merge attr (%d) but it's an object with min_sz flag\n",
min_id)) {
res = -EINVAL;
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index b1ca223..4445d8e 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -468,7 +468,7 @@ void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context)
return;
}
- entry = kmalloc(sizeof *entry, GFP_ATOMIC);
+ entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
if (!entry) {
spin_unlock_irqrestore(&ev_queue->lock, flags);
return;
@@ -501,7 +501,7 @@ static void ib_uverbs_async_handler(struct ib_uverbs_file *file,
return;
}
- entry = kmalloc(sizeof *entry, GFP_ATOMIC);
+ entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
if (!entry) {
spin_unlock_irqrestore(&file->async_file->ev_queue.lock, flags);
return;
@@ -635,39 +635,87 @@ err_put_refs:
return filp;
}
-static int verify_command_mask(struct ib_device *ib_dev, __u32 command)
+static bool verify_command_mask(struct ib_device *ib_dev,
+ u32 command, bool extended)
{
- u64 mask;
+ if (!extended)
+ return ib_dev->uverbs_cmd_mask & BIT_ULL(command);
- if (command <= IB_USER_VERBS_CMD_OPEN_QP)
- mask = ib_dev->uverbs_cmd_mask;
- else
- mask = ib_dev->uverbs_ex_cmd_mask;
-
- if (mask & ((u64)1 << command))
- return 0;
-
- return -1;
+ return ib_dev->uverbs_ex_cmd_mask & BIT_ULL(command);
}
static bool verify_command_idx(u32 command, bool extended)
{
if (extended)
- return command < ARRAY_SIZE(uverbs_ex_cmd_table);
+ return command < ARRAY_SIZE(uverbs_ex_cmd_table) &&
+ uverbs_ex_cmd_table[command];
+
+ return command < ARRAY_SIZE(uverbs_cmd_table) &&
+ uverbs_cmd_table[command];
+}
+
+static ssize_t process_hdr(struct ib_uverbs_cmd_hdr *hdr,
+ u32 *command, bool *extended)
+{
+ if (hdr->command & ~(u32)(IB_USER_VERBS_CMD_FLAG_EXTENDED |
+ IB_USER_VERBS_CMD_COMMAND_MASK))
+ return -EINVAL;
+
+ *command = hdr->command & IB_USER_VERBS_CMD_COMMAND_MASK;
+ *extended = hdr->command & IB_USER_VERBS_CMD_FLAG_EXTENDED;
+
+ if (!verify_command_idx(*command, *extended))
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
+static ssize_t verify_hdr(struct ib_uverbs_cmd_hdr *hdr,
+ struct ib_uverbs_ex_cmd_hdr *ex_hdr,
+ size_t count, bool extended)
+{
+ if (extended) {
+ count -= sizeof(*hdr) + sizeof(*ex_hdr);
+
+ if ((hdr->in_words + ex_hdr->provider_in_words) * 8 != count)
+ return -EINVAL;
+
+ if (ex_hdr->cmd_hdr_reserved)
+ return -EINVAL;
- return command < ARRAY_SIZE(uverbs_cmd_table);
+ if (ex_hdr->response) {
+ if (!hdr->out_words && !ex_hdr->provider_out_words)
+ return -EINVAL;
+
+ if (!access_ok(VERIFY_WRITE,
+ u64_to_user_ptr(ex_hdr->response),
+ (hdr->out_words + ex_hdr->provider_out_words) * 8))
+ return -EFAULT;
+ } else {
+ if (hdr->out_words || ex_hdr->provider_out_words)
+ return -EINVAL;
+ }
+
+ return 0;
+ }
+
+ /* not extended command */
+ if (hdr->in_words * 4 != count)
+ return -EINVAL;
+
+ return 0;
}
static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
size_t count, loff_t *pos)
{
struct ib_uverbs_file *file = filp->private_data;
+ struct ib_uverbs_ex_cmd_hdr ex_hdr;
struct ib_device *ib_dev;
struct ib_uverbs_cmd_hdr hdr;
- bool extended_command;
- __u32 command;
- __u32 flags;
+ bool extended;
int srcu_key;
+ u32 command;
ssize_t ret;
if (!ib_safe_file_access(filp)) {
@@ -676,12 +724,31 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
return -EACCES;
}
- if (count < sizeof hdr)
+ if (count < sizeof(hdr))
return -EINVAL;
- if (copy_from_user(&hdr, buf, sizeof hdr))
+ if (copy_from_user(&hdr, buf, sizeof(hdr)))
return -EFAULT;
+ ret = process_hdr(&hdr, &command, &extended);
+ if (ret)
+ return ret;
+
+ if (!file->ucontext &&
+ (command != IB_USER_VERBS_CMD_GET_CONTEXT || extended))
+ return -EINVAL;
+
+ if (extended) {
+ if (count < (sizeof(hdr) + sizeof(ex_hdr)))
+ return -EINVAL;
+ if (copy_from_user(&ex_hdr, buf + sizeof(hdr), sizeof(ex_hdr)))
+ return -EFAULT;
+ }
+
+ ret = verify_hdr(&hdr, &ex_hdr, count, extended);
+ if (ret)
+ return ret;
+
srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
ib_dev = srcu_dereference(file->device->ib_dev,
&file->device->disassociate_srcu);
@@ -690,106 +757,22 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
goto out;
}
- if (hdr.command & ~(__u32)(IB_USER_VERBS_CMD_FLAGS_MASK |
- IB_USER_VERBS_CMD_COMMAND_MASK)) {
- ret = -EINVAL;
- goto out;
- }
-
- command = hdr.command & IB_USER_VERBS_CMD_COMMAND_MASK;
- flags = (hdr.command &
- IB_USER_VERBS_CMD_FLAGS_MASK) >> IB_USER_VERBS_CMD_FLAGS_SHIFT;
-
- extended_command = flags & IB_USER_VERBS_CMD_FLAG_EXTENDED;
- if (!verify_command_idx(command, extended_command)) {
- ret = -EINVAL;
- goto out;
- }
-
- if (verify_command_mask(ib_dev, command)) {
+ if (!verify_command_mask(ib_dev, command, extended)) {
ret = -EOPNOTSUPP;
goto out;
}
- if (!file->ucontext &&
- command != IB_USER_VERBS_CMD_GET_CONTEXT) {
- ret = -EINVAL;
- goto out;
- }
-
- if (!flags) {
- if (!uverbs_cmd_table[command]) {
- ret = -EINVAL;
- goto out;
- }
-
- if (hdr.in_words * 4 != count) {
- ret = -EINVAL;
- goto out;
- }
+ buf += sizeof(hdr);
- ret = uverbs_cmd_table[command](file, ib_dev,
- buf + sizeof(hdr),
- hdr.in_words * 4,
- hdr.out_words * 4);
-
- } else if (flags == IB_USER_VERBS_CMD_FLAG_EXTENDED) {
- struct ib_uverbs_ex_cmd_hdr ex_hdr;
+ if (!extended) {
+ ret = uverbs_cmd_table[command](file, ib_dev, buf,
+ hdr.in_words * 4,
+ hdr.out_words * 4);
+ } else {
struct ib_udata ucore;
struct ib_udata uhw;
- size_t written_count = count;
- if (!uverbs_ex_cmd_table[command]) {
- ret = -ENOSYS;
- goto out;
- }
-
- if (!file->ucontext) {
- ret = -EINVAL;
- goto out;
- }
-
- if (count < (sizeof(hdr) + sizeof(ex_hdr))) {
- ret = -EINVAL;
- goto out;
- }
-
- if (copy_from_user(&ex_hdr, buf + sizeof(hdr), sizeof(ex_hdr))) {
- ret = -EFAULT;
- goto out;
- }
-
- count -= sizeof(hdr) + sizeof(ex_hdr);
- buf += sizeof(hdr) + sizeof(ex_hdr);
-
- if ((hdr.in_words + ex_hdr.provider_in_words) * 8 != count) {
- ret = -EINVAL;
- goto out;
- }
-
- if (ex_hdr.cmd_hdr_reserved) {
- ret = -EINVAL;
- goto out;
- }
-
- if (ex_hdr.response) {
- if (!hdr.out_words && !ex_hdr.provider_out_words) {
- ret = -EINVAL;
- goto out;
- }
-
- if (!access_ok(VERIFY_WRITE,
- u64_to_user_ptr(ex_hdr.response),
- (hdr.out_words + ex_hdr.provider_out_words) * 8)) {
- ret = -EFAULT;
- goto out;
- }
- } else {
- if (hdr.out_words || ex_hdr.provider_out_words) {
- ret = -EINVAL;
- goto out;
- }
- }
+ buf += sizeof(ex_hdr);
ib_uverbs_init_udata_buf_or_null(&ucore, buf,
u64_to_user_ptr(ex_hdr.response),
@@ -802,10 +785,7 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
ex_hdr.provider_out_words * 8);
ret = uverbs_ex_cmd_table[command](file, ib_dev, &ucore, &uhw);
- if (!ret)
- ret = written_count;
- } else {
- ret = -ENOSYS;
+ ret = (ret) ? : count;
}
out:
@@ -953,10 +933,8 @@ static const struct file_operations uverbs_fops = {
.open = ib_uverbs_open,
.release = ib_uverbs_close,
.llseek = no_llseek,
-#if IS_ENABLED(CONFIG_INFINIBAND_EXP_USER_ACCESS)
.unlocked_ioctl = ib_uverbs_ioctl,
.compat_ioctl = ib_uverbs_ioctl,
-#endif
};
static const struct file_operations uverbs_mmap_fops = {
@@ -966,10 +944,8 @@ static const struct file_operations uverbs_mmap_fops = {
.open = ib_uverbs_open,
.release = ib_uverbs_close,
.llseek = no_llseek,
-#if IS_ENABLED(CONFIG_INFINIBAND_EXP_USER_ACCESS)
.unlocked_ioctl = ib_uverbs_ioctl,
.compat_ioctl = ib_uverbs_ioctl,
-#endif
};
static struct ib_client uverbs_client = {
@@ -1032,7 +1008,7 @@ static void ib_uverbs_add_one(struct ib_device *device)
if (!device->alloc_ucontext)
return;
- uverbs_dev = kzalloc(sizeof *uverbs_dev, GFP_KERNEL);
+ uverbs_dev = kzalloc(sizeof(*uverbs_dev), GFP_KERNEL);
if (!uverbs_dev)
return;
diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c
index df1360e..569f48bd 100644
--- a/drivers/infiniband/core/uverbs_std_types.c
+++ b/drivers/infiniband/core/uverbs_std_types.c
@@ -48,7 +48,16 @@ static int uverbs_free_ah(struct ib_uobject *uobject,
static int uverbs_free_flow(struct ib_uobject *uobject,
enum rdma_remove_reason why)
{
- return ib_destroy_flow((struct ib_flow *)uobject->object);
+ int ret;
+ struct ib_flow *flow = (struct ib_flow *)uobject->object;
+ struct ib_uflow_object *uflow =
+ container_of(uobject, struct ib_uflow_object, uobject);
+
+ ret = ib_destroy_flow(flow);
+ if (!ret)
+ ib_uverbs_flow_resources_free(uflow->resources);
+
+ return ret;
}
static int uverbs_free_mw(struct ib_uobject *uobject,
@@ -135,31 +144,6 @@ static int uverbs_free_srq(struct ib_uobject *uobject,
return ret;
}
-static int uverbs_free_cq(struct ib_uobject *uobject,
- enum rdma_remove_reason why)
-{
- struct ib_cq *cq = uobject->object;
- struct ib_uverbs_event_queue *ev_queue = cq->cq_context;
- struct ib_ucq_object *ucq =
- container_of(uobject, struct ib_ucq_object, uobject);
- int ret;
-
- ret = ib_destroy_cq(cq);
- if (!ret || why != RDMA_REMOVE_DESTROY)
- ib_uverbs_release_ucq(uobject->context->ufile, ev_queue ?
- container_of(ev_queue,
- struct ib_uverbs_completion_event_file,
- ev_queue) : NULL,
- ucq);
- return ret;
-}
-
-static int uverbs_free_mr(struct ib_uobject *uobject,
- enum rdma_remove_reason why)
-{
- return ib_dereg_mr((struct ib_mr *)uobject->object);
-}
-
static int uverbs_free_xrcd(struct ib_uobject *uobject,
enum rdma_remove_reason why)
{
@@ -210,18 +194,26 @@ static int uverbs_hot_unplug_completion_event_file(struct ib_uobject_file *uobj_
return 0;
};
+int uverbs_destroy_def_handler(struct ib_device *ib_dev,
+ struct ib_uverbs_file *file,
+ struct uverbs_attr_bundle *attrs)
+{
+ return 0;
+}
+
/*
* This spec is used in order to pass information to the hardware driver in a
* legacy way. Every verb that could get driver specific data should get this
* spec.
*/
-static const struct uverbs_attr_def uverbs_uhw_compat_in =
- UVERBS_ATTR_PTR_IN_SZ(UVERBS_UHW_IN, 0, UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ));
-static const struct uverbs_attr_def uverbs_uhw_compat_out =
- UVERBS_ATTR_PTR_OUT_SZ(UVERBS_UHW_OUT, 0, UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ));
-
-static void create_udata(struct uverbs_attr_bundle *ctx,
- struct ib_udata *udata)
+const struct uverbs_attr_def uverbs_uhw_compat_in =
+ UVERBS_ATTR_PTR_IN_SZ(UVERBS_ATTR_UHW_IN, UVERBS_ATTR_SIZE(0, USHRT_MAX),
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO));
+const struct uverbs_attr_def uverbs_uhw_compat_out =
+ UVERBS_ATTR_PTR_OUT_SZ(UVERBS_ATTR_UHW_OUT, UVERBS_ATTR_SIZE(0, USHRT_MAX),
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO));
+
+void create_udata(struct uverbs_attr_bundle *ctx, struct ib_udata *udata)
{
/*
* This is for ease of conversion. The purpose is to convert all drivers
@@ -229,9 +221,9 @@ static void create_udata(struct uverbs_attr_bundle *ctx,
* Assume attr == 0 is input and attr == 1 is output.
*/
const struct uverbs_attr *uhw_in =
- uverbs_attr_get(ctx, UVERBS_UHW_IN);
+ uverbs_attr_get(ctx, UVERBS_ATTR_UHW_IN);
const struct uverbs_attr *uhw_out =
- uverbs_attr_get(ctx, UVERBS_UHW_OUT);
+ uverbs_attr_get(ctx, UVERBS_ATTR_UHW_OUT);
if (!IS_ERR(uhw_in)) {
udata->inlen = uhw_in->ptr_attr.len;
@@ -253,207 +245,67 @@ static void create_udata(struct uverbs_attr_bundle *ctx,
}
}
-static int uverbs_create_cq_handler(struct ib_device *ib_dev,
- struct ib_uverbs_file *file,
- struct uverbs_attr_bundle *attrs)
+DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_COMP_CHANNEL,
+ &UVERBS_TYPE_ALLOC_FD(0,
+ sizeof(struct ib_uverbs_completion_event_file),
+ uverbs_hot_unplug_completion_event_file,
+ &uverbs_event_fops,
+ "[infinibandevent]", O_RDONLY));
+
+DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_QP,
+ &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uqp_object), 0,
+ uverbs_free_qp));
+
+DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_MW,
+ &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_mw));
+
+DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_SRQ,
+ &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_usrq_object), 0,
+ uverbs_free_srq));
+
+DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_AH,
+ &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_ah));
+
+DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_FLOW,
+ &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uflow_object),
+ 0, uverbs_free_flow));
+
+DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_WQ,
+ &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uwq_object), 0,
+ uverbs_free_wq));
+
+DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_RWQ_IND_TBL,
+ &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_rwq_ind_tbl));
+
+DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_XRCD,
+ &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uxrcd_object), 0,
+ uverbs_free_xrcd));
+
+DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_PD,
+ /* 2 is used in order to free the PD after MRs */
+ &UVERBS_TYPE_ALLOC_IDR(2, uverbs_free_pd));
+
+DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_DEVICE, NULL);
+
+static DECLARE_UVERBS_OBJECT_TREE(uverbs_default_objects,
+ &UVERBS_OBJECT(UVERBS_OBJECT_DEVICE),
+ &UVERBS_OBJECT(UVERBS_OBJECT_PD),
+ &UVERBS_OBJECT(UVERBS_OBJECT_MR),
+ &UVERBS_OBJECT(UVERBS_OBJECT_COMP_CHANNEL),
+ &UVERBS_OBJECT(UVERBS_OBJECT_CQ),
+ &UVERBS_OBJECT(UVERBS_OBJECT_QP),
+ &UVERBS_OBJECT(UVERBS_OBJECT_AH),
+ &UVERBS_OBJECT(UVERBS_OBJECT_MW),
+ &UVERBS_OBJECT(UVERBS_OBJECT_SRQ),
+ &UVERBS_OBJECT(UVERBS_OBJECT_FLOW),
+ &UVERBS_OBJECT(UVERBS_OBJECT_WQ),
+ &UVERBS_OBJECT(UVERBS_OBJECT_RWQ_IND_TBL),
+ &UVERBS_OBJECT(UVERBS_OBJECT_XRCD),
+ &UVERBS_OBJECT(UVERBS_OBJECT_FLOW_ACTION),
+ &UVERBS_OBJECT(UVERBS_OBJECT_DM));
+
+const struct uverbs_object_tree_def *uverbs_default_get_objects(void)
{
- struct ib_ucontext *ucontext = file->ucontext;
- struct ib_ucq_object *obj;
- struct ib_udata uhw;
- int ret;
- u64 user_handle;
- struct ib_cq_init_attr attr = {};
- struct ib_cq *cq;
- struct ib_uverbs_completion_event_file *ev_file = NULL;
- const struct uverbs_attr *ev_file_attr;
- struct ib_uobject *ev_file_uobj;
-
- if (!(ib_dev->uverbs_cmd_mask & 1ULL << IB_USER_VERBS_CMD_CREATE_CQ))
- return -EOPNOTSUPP;
-
- ret = uverbs_copy_from(&attr.comp_vector, attrs, CREATE_CQ_COMP_VECTOR);
- if (!ret)
- ret = uverbs_copy_from(&attr.cqe, attrs, CREATE_CQ_CQE);
- if (!ret)
- ret = uverbs_copy_from(&user_handle, attrs, CREATE_CQ_USER_HANDLE);
- if (ret)
- return ret;
-
- /* Optional param, if it doesn't exist, we get -ENOENT and skip it */
- if (uverbs_copy_from(&attr.flags, attrs, CREATE_CQ_FLAGS) == -EFAULT)
- return -EFAULT;
-
- ev_file_attr = uverbs_attr_get(attrs, CREATE_CQ_COMP_CHANNEL);
- if (!IS_ERR(ev_file_attr)) {
- ev_file_uobj = ev_file_attr->obj_attr.uobject;
-
- ev_file = container_of(ev_file_uobj,
- struct ib_uverbs_completion_event_file,
- uobj_file.uobj);
- uverbs_uobject_get(ev_file_uobj);
- }
-
- if (attr.comp_vector >= ucontext->ufile->device->num_comp_vectors) {
- ret = -EINVAL;
- goto err_event_file;
- }
-
- obj = container_of(uverbs_attr_get(attrs, CREATE_CQ_HANDLE)->obj_attr.uobject,
- typeof(*obj), uobject);
- obj->uverbs_file = ucontext->ufile;
- obj->comp_events_reported = 0;
- obj->async_events_reported = 0;
- INIT_LIST_HEAD(&obj->comp_list);
- INIT_LIST_HEAD(&obj->async_list);
-
- /* Temporary, only until drivers get the new uverbs_attr_bundle */
- create_udata(attrs, &uhw);
-
- cq = ib_dev->create_cq(ib_dev, &attr, ucontext, &uhw);
- if (IS_ERR(cq)) {
- ret = PTR_ERR(cq);
- goto err_event_file;
- }
-
- cq->device = ib_dev;
- cq->uobject = &obj->uobject;
- cq->comp_handler = ib_uverbs_comp_handler;
- cq->event_handler = ib_uverbs_cq_event_handler;
- cq->cq_context = ev_file ? &ev_file->ev_queue : NULL;
- obj->uobject.object = cq;
- obj->uobject.user_handle = user_handle;
- atomic_set(&cq->usecnt, 0);
- cq->res.type = RDMA_RESTRACK_CQ;
- rdma_restrack_add(&cq->res);
-
- ret = uverbs_copy_to(attrs, CREATE_CQ_RESP_CQE, &cq->cqe,
- sizeof(cq->cqe));
- if (ret)
- goto err_cq;
-
- return 0;
-err_cq:
- ib_destroy_cq(cq);
-
-err_event_file:
- if (ev_file)
- uverbs_uobject_put(ev_file_uobj);
- return ret;
-};
-
-static DECLARE_UVERBS_METHOD(
- uverbs_method_cq_create, UVERBS_CQ_CREATE, uverbs_create_cq_handler,
- &UVERBS_ATTR_IDR(CREATE_CQ_HANDLE, UVERBS_OBJECT_CQ, UVERBS_ACCESS_NEW,
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
- &UVERBS_ATTR_PTR_IN(CREATE_CQ_CQE, u32,
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
- &UVERBS_ATTR_PTR_IN(CREATE_CQ_USER_HANDLE, u64,
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
- &UVERBS_ATTR_FD(CREATE_CQ_COMP_CHANNEL, UVERBS_OBJECT_COMP_CHANNEL,
- UVERBS_ACCESS_READ),
- &UVERBS_ATTR_PTR_IN(CREATE_CQ_COMP_VECTOR, u32,
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
- &UVERBS_ATTR_PTR_IN(CREATE_CQ_FLAGS, u32),
- &UVERBS_ATTR_PTR_OUT(CREATE_CQ_RESP_CQE, u32,
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
- &uverbs_uhw_compat_in, &uverbs_uhw_compat_out);
-
-static int uverbs_destroy_cq_handler(struct ib_device *ib_dev,
- struct ib_uverbs_file *file,
- struct uverbs_attr_bundle *attrs)
-{
- struct ib_uverbs_destroy_cq_resp resp;
- struct ib_uobject *uobj =
- uverbs_attr_get(attrs, DESTROY_CQ_HANDLE)->obj_attr.uobject;
- struct ib_ucq_object *obj = container_of(uobj, struct ib_ucq_object,
- uobject);
- int ret;
-
- if (!(ib_dev->uverbs_cmd_mask & 1ULL << IB_USER_VERBS_CMD_DESTROY_CQ))
- return -EOPNOTSUPP;
-
- ret = rdma_explicit_destroy(uobj);
- if (ret)
- return ret;
-
- resp.comp_events_reported = obj->comp_events_reported;
- resp.async_events_reported = obj->async_events_reported;
-
- return uverbs_copy_to(attrs, DESTROY_CQ_RESP, &resp, sizeof(resp));
+ return &uverbs_default_objects;
}
-
-static DECLARE_UVERBS_METHOD(
- uverbs_method_cq_destroy, UVERBS_CQ_DESTROY, uverbs_destroy_cq_handler,
- &UVERBS_ATTR_IDR(DESTROY_CQ_HANDLE, UVERBS_OBJECT_CQ,
- UVERBS_ACCESS_DESTROY,
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
- &UVERBS_ATTR_PTR_OUT(DESTROY_CQ_RESP, struct ib_uverbs_destroy_cq_resp,
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
-
-DECLARE_UVERBS_OBJECT(uverbs_object_comp_channel,
- UVERBS_OBJECT_COMP_CHANNEL,
- &UVERBS_TYPE_ALLOC_FD(0,
- sizeof(struct ib_uverbs_completion_event_file),
- uverbs_hot_unplug_completion_event_file,
- &uverbs_event_fops,
- "[infinibandevent]", O_RDONLY));
-
-DECLARE_UVERBS_OBJECT(uverbs_object_cq, UVERBS_OBJECT_CQ,
- &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_ucq_object), 0,
- uverbs_free_cq),
- &uverbs_method_cq_create,
- &uverbs_method_cq_destroy);
-
-DECLARE_UVERBS_OBJECT(uverbs_object_qp, UVERBS_OBJECT_QP,
- &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uqp_object), 0,
- uverbs_free_qp));
-
-DECLARE_UVERBS_OBJECT(uverbs_object_mw, UVERBS_OBJECT_MW,
- &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_mw));
-
-DECLARE_UVERBS_OBJECT(uverbs_object_mr, UVERBS_OBJECT_MR,
- /* 1 is used in order to free the MR after all the MWs */
- &UVERBS_TYPE_ALLOC_IDR(1, uverbs_free_mr));
-
-DECLARE_UVERBS_OBJECT(uverbs_object_srq, UVERBS_OBJECT_SRQ,
- &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_usrq_object), 0,
- uverbs_free_srq));
-
-DECLARE_UVERBS_OBJECT(uverbs_object_ah, UVERBS_OBJECT_AH,
- &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_ah));
-
-DECLARE_UVERBS_OBJECT(uverbs_object_flow, UVERBS_OBJECT_FLOW,
- &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_flow));
-
-DECLARE_UVERBS_OBJECT(uverbs_object_wq, UVERBS_OBJECT_WQ,
- &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uwq_object), 0,
- uverbs_free_wq));
-
-DECLARE_UVERBS_OBJECT(uverbs_object_rwq_ind_table,
- UVERBS_OBJECT_RWQ_IND_TBL,
- &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_rwq_ind_tbl));
-
-DECLARE_UVERBS_OBJECT(uverbs_object_xrcd, UVERBS_OBJECT_XRCD,
- &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uxrcd_object), 0,
- uverbs_free_xrcd));
-
-DECLARE_UVERBS_OBJECT(uverbs_object_pd, UVERBS_OBJECT_PD,
- /* 2 is used in order to free the PD after MRs */
- &UVERBS_TYPE_ALLOC_IDR(2, uverbs_free_pd));
-
-DECLARE_UVERBS_OBJECT(uverbs_object_device, UVERBS_OBJECT_DEVICE, NULL);
-
-DECLARE_UVERBS_OBJECT_TREE(uverbs_default_objects,
- &uverbs_object_device,
- &uverbs_object_pd,
- &uverbs_object_mr,
- &uverbs_object_comp_channel,
- &uverbs_object_cq,
- &uverbs_object_qp,
- &uverbs_object_ah,
- &uverbs_object_mw,
- &uverbs_object_srq,
- &uverbs_object_flow,
- &uverbs_object_wq,
- &uverbs_object_rwq_ind_table,
- &uverbs_object_xrcd);
+EXPORT_SYMBOL_GPL(uverbs_default_get_objects);
diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c
new file mode 100644
index 0000000..b0dbae9
--- /dev/null
+++ b/drivers/infiniband/core/uverbs_std_types_cq.c
@@ -0,0 +1,210 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <rdma/uverbs_std_types.h>
+#include "rdma_core.h"
+#include "uverbs.h"
+
+static int uverbs_free_cq(struct ib_uobject *uobject,
+ enum rdma_remove_reason why)
+{
+ struct ib_cq *cq = uobject->object;
+ struct ib_uverbs_event_queue *ev_queue = cq->cq_context;
+ struct ib_ucq_object *ucq =
+ container_of(uobject, struct ib_ucq_object, uobject);
+ int ret;
+
+ ret = ib_destroy_cq(cq);
+ if (!ret || why != RDMA_REMOVE_DESTROY)
+ ib_uverbs_release_ucq(uobject->context->ufile, ev_queue ?
+ container_of(ev_queue,
+ struct ib_uverbs_completion_event_file,
+ ev_queue) : NULL,
+ ucq);
+ return ret;
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(struct ib_device *ib_dev,
+ struct ib_uverbs_file *file,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_ucontext *ucontext = file->ucontext;
+ struct ib_ucq_object *obj;
+ struct ib_udata uhw;
+ int ret;
+ u64 user_handle;
+ struct ib_cq_init_attr attr = {};
+ struct ib_cq *cq;
+ struct ib_uverbs_completion_event_file *ev_file = NULL;
+ const struct uverbs_attr *ev_file_attr;
+ struct ib_uobject *ev_file_uobj;
+
+ if (!(ib_dev->uverbs_cmd_mask & 1ULL << IB_USER_VERBS_CMD_CREATE_CQ))
+ return -EOPNOTSUPP;
+
+ ret = uverbs_copy_from(&attr.comp_vector, attrs,
+ UVERBS_ATTR_CREATE_CQ_COMP_VECTOR);
+ if (!ret)
+ ret = uverbs_copy_from(&attr.cqe, attrs,
+ UVERBS_ATTR_CREATE_CQ_CQE);
+ if (!ret)
+ ret = uverbs_copy_from(&user_handle, attrs,
+ UVERBS_ATTR_CREATE_CQ_USER_HANDLE);
+ if (ret)
+ return ret;
+
+ /* Optional param, if it doesn't exist, we get -ENOENT and skip it */
+ if (IS_UVERBS_COPY_ERR(uverbs_copy_from(&attr.flags, attrs,
+ UVERBS_ATTR_CREATE_CQ_FLAGS)))
+ return -EFAULT;
+
+ ev_file_attr = uverbs_attr_get(attrs, UVERBS_ATTR_CREATE_CQ_COMP_CHANNEL);
+ if (!IS_ERR(ev_file_attr)) {
+ ev_file_uobj = ev_file_attr->obj_attr.uobject;
+
+ ev_file = container_of(ev_file_uobj,
+ struct ib_uverbs_completion_event_file,
+ uobj_file.uobj);
+ uverbs_uobject_get(ev_file_uobj);
+ }
+
+ if (attr.comp_vector >= ucontext->ufile->device->num_comp_vectors) {
+ ret = -EINVAL;
+ goto err_event_file;
+ }
+
+ obj = container_of(uverbs_attr_get(attrs,
+ UVERBS_ATTR_CREATE_CQ_HANDLE)->obj_attr.uobject,
+ typeof(*obj), uobject);
+ obj->uverbs_file = ucontext->ufile;
+ obj->comp_events_reported = 0;
+ obj->async_events_reported = 0;
+ INIT_LIST_HEAD(&obj->comp_list);
+ INIT_LIST_HEAD(&obj->async_list);
+
+ /* Temporary, only until drivers get the new uverbs_attr_bundle */
+ create_udata(attrs, &uhw);
+
+ cq = ib_dev->create_cq(ib_dev, &attr, ucontext, &uhw);
+ if (IS_ERR(cq)) {
+ ret = PTR_ERR(cq);
+ goto err_event_file;
+ }
+
+ cq->device = ib_dev;
+ cq->uobject = &obj->uobject;
+ cq->comp_handler = ib_uverbs_comp_handler;
+ cq->event_handler = ib_uverbs_cq_event_handler;
+ cq->cq_context = ev_file ? &ev_file->ev_queue : NULL;
+ obj->uobject.object = cq;
+ obj->uobject.user_handle = user_handle;
+ atomic_set(&cq->usecnt, 0);
+ cq->res.type = RDMA_RESTRACK_CQ;
+ rdma_restrack_add(&cq->res);
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_CREATE_CQ_RESP_CQE, &cq->cqe,
+ sizeof(cq->cqe));
+ if (ret)
+ goto err_cq;
+
+ return 0;
+err_cq:
+ ib_destroy_cq(cq);
+
+err_event_file:
+ if (ev_file)
+ uverbs_uobject_put(ev_file_uobj);
+ return ret;
+};
+
+static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_CQ_CREATE,
+ &UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_CQ_HANDLE, UVERBS_OBJECT_CQ,
+ UVERBS_ACCESS_NEW,
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+ &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_CQE,
+ UVERBS_ATTR_TYPE(u32),
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+ &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_USER_HANDLE,
+ UVERBS_ATTR_TYPE(u64),
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+ &UVERBS_ATTR_FD(UVERBS_ATTR_CREATE_CQ_COMP_CHANNEL,
+ UVERBS_OBJECT_COMP_CHANNEL,
+ UVERBS_ACCESS_READ),
+ &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_COMP_VECTOR, UVERBS_ATTR_TYPE(u32),
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+ &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_FLAGS, UVERBS_ATTR_TYPE(u32)),
+ &UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_CQ_RESP_CQE, UVERBS_ATTR_TYPE(u32),
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+ &uverbs_uhw_compat_in, &uverbs_uhw_compat_out);
+
+static int UVERBS_HANDLER(UVERBS_METHOD_CQ_DESTROY)(struct ib_device *ib_dev,
+ struct ib_uverbs_file *file,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uverbs_destroy_cq_resp resp;
+ struct ib_uobject *uobj =
+ uverbs_attr_get(attrs, UVERBS_ATTR_DESTROY_CQ_HANDLE)->obj_attr.uobject;
+ struct ib_ucq_object *obj = container_of(uobj, struct ib_ucq_object,
+ uobject);
+ int ret;
+
+ if (!(ib_dev->uverbs_cmd_mask & 1ULL << IB_USER_VERBS_CMD_DESTROY_CQ))
+ return -EOPNOTSUPP;
+
+ ret = rdma_explicit_destroy(uobj);
+ if (ret)
+ return ret;
+
+ resp.comp_events_reported = obj->comp_events_reported;
+ resp.async_events_reported = obj->async_events_reported;
+
+ return uverbs_copy_to(attrs, UVERBS_ATTR_DESTROY_CQ_RESP, &resp,
+ sizeof(resp));
+}
+
+static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_CQ_DESTROY,
+ &UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_CQ_HANDLE, UVERBS_OBJECT_CQ,
+ UVERBS_ACCESS_DESTROY,
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+ &UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_DESTROY_CQ_RESP,
+ UVERBS_ATTR_TYPE(struct ib_uverbs_destroy_cq_resp),
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+
+DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_CQ,
+ &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_ucq_object), 0,
+ uverbs_free_cq),
+#if IS_ENABLED(CONFIG_INFINIBAND_EXP_LEGACY_VERBS_NEW_UAPI)
+ &UVERBS_METHOD(UVERBS_METHOD_CQ_CREATE),
+ &UVERBS_METHOD(UVERBS_METHOD_CQ_DESTROY)
+#endif
+ );
+
diff --git a/drivers/infiniband/core/uverbs_std_types_dm.c b/drivers/infiniband/core/uverbs_std_types_dm.c
new file mode 100644
index 0000000..8b68157
--- /dev/null
+++ b/drivers/infiniband/core/uverbs_std_types_dm.c
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "uverbs.h"
+#include <rdma/uverbs_std_types.h>
+
+static int uverbs_free_dm(struct ib_uobject *uobject,
+ enum rdma_remove_reason why)
+{
+ struct ib_dm *dm = uobject->object;
+
+ if (why == RDMA_REMOVE_DESTROY && atomic_read(&dm->usecnt))
+ return -EBUSY;
+
+ return dm->device->dealloc_dm(dm);
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_DM_ALLOC)(struct ib_device *ib_dev,
+ struct ib_uverbs_file *file,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_ucontext *ucontext = file->ucontext;
+ struct ib_dm_alloc_attr attr = {};
+ struct ib_uobject *uobj;
+ struct ib_dm *dm;
+ int ret;
+
+ if (!ib_dev->alloc_dm)
+ return -EOPNOTSUPP;
+
+ ret = uverbs_copy_from(&attr.length, attrs,
+ UVERBS_ATTR_ALLOC_DM_LENGTH);
+ if (ret)
+ return ret;
+
+ ret = uverbs_copy_from(&attr.alignment, attrs,
+ UVERBS_ATTR_ALLOC_DM_ALIGNMENT);
+ if (ret)
+ return ret;
+
+ uobj = uverbs_attr_get(attrs, UVERBS_ATTR_ALLOC_DM_HANDLE)->obj_attr.uobject;
+
+ dm = ib_dev->alloc_dm(ib_dev, ucontext, &attr, attrs);
+ if (IS_ERR(dm))
+ return PTR_ERR(dm);
+
+ dm->device = ib_dev;
+ dm->length = attr.length;
+ dm->uobject = uobj;
+ atomic_set(&dm->usecnt, 0);
+
+ uobj->object = dm;
+
+ return 0;
+}
+
+static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_DM_ALLOC,
+ &UVERBS_ATTR_IDR(UVERBS_ATTR_ALLOC_DM_HANDLE, UVERBS_OBJECT_DM,
+ UVERBS_ACCESS_NEW,
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+ &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_ALLOC_DM_LENGTH,
+ UVERBS_ATTR_TYPE(u64),
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+ &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_ALLOC_DM_ALIGNMENT,
+ UVERBS_ATTR_TYPE(u32),
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+
+static DECLARE_UVERBS_NAMED_METHOD_WITH_HANDLER(UVERBS_METHOD_DM_FREE,
+ uverbs_destroy_def_handler,
+ &UVERBS_ATTR_IDR(UVERBS_ATTR_FREE_DM_HANDLE,
+ UVERBS_OBJECT_DM,
+ UVERBS_ACCESS_DESTROY,
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+
+DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_DM,
+ /* 1 is used in order to free the DM after MRs */
+ &UVERBS_TYPE_ALLOC_IDR(1, uverbs_free_dm),
+ &UVERBS_METHOD(UVERBS_METHOD_DM_ALLOC),
+ &UVERBS_METHOD(UVERBS_METHOD_DM_FREE));
diff --git a/drivers/infiniband/core/uverbs_std_types_flow_action.c b/drivers/infiniband/core/uverbs_std_types_flow_action.c
new file mode 100644
index 0000000..cbcec3da
--- /dev/null
+++ b/drivers/infiniband/core/uverbs_std_types_flow_action.c
@@ -0,0 +1,435 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "uverbs.h"
+#include <rdma/uverbs_std_types.h>
+
+static int uverbs_free_flow_action(struct ib_uobject *uobject,
+ enum rdma_remove_reason why)
+{
+ struct ib_flow_action *action = uobject->object;
+
+ if (why == RDMA_REMOVE_DESTROY &&
+ atomic_read(&action->usecnt))
+ return -EBUSY;
+
+ return action->device->destroy_flow_action(action);
+}
+
+static u64 esp_flags_uverbs_to_verbs(struct uverbs_attr_bundle *attrs,
+ u32 flags, bool is_modify)
+{
+ u64 verbs_flags = flags;
+
+ if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_ESN))
+ verbs_flags |= IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED;
+
+ if (is_modify && uverbs_attr_is_valid(attrs,
+ UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS))
+ verbs_flags |= IB_FLOW_ACTION_ESP_FLAGS_MOD_ESP_ATTRS;
+
+ return verbs_flags;
+};
+
+static int validate_flow_action_esp_keymat_aes_gcm(struct ib_flow_action_attrs_esp_keymats *keymat)
+{
+ struct ib_uverbs_flow_action_esp_keymat_aes_gcm *aes_gcm =
+ &keymat->keymat.aes_gcm;
+
+ if (aes_gcm->iv_algo > IB_UVERBS_FLOW_ACTION_IV_ALGO_SEQ)
+ return -EOPNOTSUPP;
+
+ if (aes_gcm->key_len != 32 &&
+ aes_gcm->key_len != 24 &&
+ aes_gcm->key_len != 16)
+ return -EINVAL;
+
+ if (aes_gcm->icv_len != 16 &&
+ aes_gcm->icv_len != 8 &&
+ aes_gcm->icv_len != 12)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int (* const flow_action_esp_keymat_validate[])(struct ib_flow_action_attrs_esp_keymats *keymat) = {
+ [IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM] = validate_flow_action_esp_keymat_aes_gcm,
+};
+
+static int flow_action_esp_replay_none(struct ib_flow_action_attrs_esp_replays *replay,
+ bool is_modify)
+{
+ /* This is used in order to modify an esp flow action with an enabled
+ * replay protection to a disabled one. This is only supported via
+ * modify, as in create verb we can simply drop the REPLAY attribute and
+ * achieve the same thing.
+ */
+ return is_modify ? 0 : -EINVAL;
+}
+
+static int flow_action_esp_replay_def_ok(struct ib_flow_action_attrs_esp_replays *replay,
+ bool is_modify)
+{
+ /* Some replay protections could always be enabled without validating
+ * anything.
+ */
+ return 0;
+}
+
+static int (* const flow_action_esp_replay_validate[])(struct ib_flow_action_attrs_esp_replays *replay,
+ bool is_modify) = {
+ [IB_UVERBS_FLOW_ACTION_ESP_REPLAY_NONE] = flow_action_esp_replay_none,
+ [IB_UVERBS_FLOW_ACTION_ESP_REPLAY_BMP] = flow_action_esp_replay_def_ok,
+};
+
+static int parse_esp_ip(enum ib_flow_spec_type proto,
+ const void __user *val_ptr,
+ size_t len, union ib_flow_spec *out)
+{
+ int ret;
+ const struct ib_uverbs_flow_ipv4_filter ipv4 = {
+ .src_ip = cpu_to_be32(0xffffffffUL),
+ .dst_ip = cpu_to_be32(0xffffffffUL),
+ .proto = 0xff,
+ .tos = 0xff,
+ .ttl = 0xff,
+ .flags = 0xff,
+ };
+ const struct ib_uverbs_flow_ipv6_filter ipv6 = {
+ .src_ip = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
+ .dst_ip = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
+ .flow_label = cpu_to_be32(0xffffffffUL),
+ .next_hdr = 0xff,
+ .traffic_class = 0xff,
+ .hop_limit = 0xff,
+ };
+ union {
+ struct ib_uverbs_flow_ipv4_filter ipv4;
+ struct ib_uverbs_flow_ipv6_filter ipv6;
+ } user_val = {};
+ const void *user_pmask;
+ size_t val_len;
+
+ /* If the flow IPv4/IPv6 flow specifications are extended, the mask
+ * should be changed as well.
+ */
+ BUILD_BUG_ON(offsetof(struct ib_uverbs_flow_ipv4_filter, flags) +
+ sizeof(ipv4.flags) != sizeof(ipv4));
+ BUILD_BUG_ON(offsetof(struct ib_uverbs_flow_ipv6_filter, reserved) +
+ sizeof(ipv6.reserved) != sizeof(ipv6));
+
+ switch (proto) {
+ case IB_FLOW_SPEC_IPV4:
+ if (len > sizeof(user_val.ipv4) &&
+ !ib_is_buffer_cleared(val_ptr + sizeof(user_val.ipv4),
+ len - sizeof(user_val.ipv4)))
+ return -EOPNOTSUPP;
+
+ val_len = min_t(size_t, len, sizeof(user_val.ipv4));
+ ret = copy_from_user(&user_val.ipv4, val_ptr,
+ val_len);
+ if (ret)
+ return -EFAULT;
+
+ user_pmask = &ipv4;
+ break;
+ case IB_FLOW_SPEC_IPV6:
+ if (len > sizeof(user_val.ipv6) &&
+ !ib_is_buffer_cleared(val_ptr + sizeof(user_val.ipv6),
+ len - sizeof(user_val.ipv6)))
+ return -EOPNOTSUPP;
+
+ val_len = min_t(size_t, len, sizeof(user_val.ipv6));
+ ret = copy_from_user(&user_val.ipv6, val_ptr,
+ val_len);
+ if (ret)
+ return -EFAULT;
+
+ user_pmask = &ipv6;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return ib_uverbs_kern_spec_to_ib_spec_filter(proto, user_pmask,
+ &user_val,
+ val_len, out);
+}
+
+static int flow_action_esp_get_encap(struct ib_flow_spec_list *out,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uverbs_flow_action_esp_encap uverbs_encap;
+ int ret;
+
+ ret = uverbs_copy_from(&uverbs_encap, attrs,
+ UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP);
+ if (ret)
+ return ret;
+
+ /* We currently support only one encap */
+ if (uverbs_encap.next_ptr)
+ return -EOPNOTSUPP;
+
+ if (uverbs_encap.type != IB_FLOW_SPEC_IPV4 &&
+ uverbs_encap.type != IB_FLOW_SPEC_IPV6)
+ return -EOPNOTSUPP;
+
+ return parse_esp_ip(uverbs_encap.type,
+ u64_to_user_ptr(uverbs_encap.val_ptr),
+ uverbs_encap.len,
+ &out->spec);
+}
+
+struct ib_flow_action_esp_attr {
+ struct ib_flow_action_attrs_esp hdr;
+ struct ib_flow_action_attrs_esp_keymats keymat;
+ struct ib_flow_action_attrs_esp_replays replay;
+ /* We currently support only one spec */
+ struct ib_flow_spec_list encap;
+};
+
+#define ESP_LAST_SUPPORTED_FLAG IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW
+static int parse_flow_action_esp(struct ib_device *ib_dev,
+ struct ib_uverbs_file *file,
+ struct uverbs_attr_bundle *attrs,
+ struct ib_flow_action_esp_attr *esp_attr,
+ bool is_modify)
+{
+ struct ib_uverbs_flow_action_esp uverbs_esp = {};
+ int ret;
+
+ /* Optional param, if it doesn't exist, we get -ENOENT and skip it */
+ ret = uverbs_copy_from(&esp_attr->hdr.esn, attrs,
+ UVERBS_ATTR_FLOW_ACTION_ESP_ESN);
+ if (IS_UVERBS_COPY_ERR(ret))
+ return ret;
+
+ /* This can be called from FLOW_ACTION_ESP_MODIFY where
+ * UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS is optional
+ */
+ if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS)) {
+ ret = uverbs_copy_from_or_zero(&uverbs_esp, attrs,
+ UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS);
+ if (ret)
+ return ret;
+
+ if (uverbs_esp.flags & ~((ESP_LAST_SUPPORTED_FLAG << 1) - 1))
+ return -EOPNOTSUPP;
+
+ esp_attr->hdr.spi = uverbs_esp.spi;
+ esp_attr->hdr.seq = uverbs_esp.seq;
+ esp_attr->hdr.tfc_pad = uverbs_esp.tfc_pad;
+ esp_attr->hdr.hard_limit_pkts = uverbs_esp.hard_limit_pkts;
+ }
+ esp_attr->hdr.flags = esp_flags_uverbs_to_verbs(attrs, uverbs_esp.flags,
+ is_modify);
+
+ if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT)) {
+ esp_attr->keymat.protocol =
+ uverbs_attr_get_enum_id(attrs,
+ UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT);
+ ret = uverbs_copy_from_or_zero(&esp_attr->keymat.keymat,
+ attrs,
+ UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT);
+ if (ret)
+ return ret;
+
+ ret = flow_action_esp_keymat_validate[esp_attr->keymat.protocol](&esp_attr->keymat);
+ if (ret)
+ return ret;
+
+ esp_attr->hdr.keymat = &esp_attr->keymat;
+ }
+
+ if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY)) {
+ esp_attr->replay.protocol =
+ uverbs_attr_get_enum_id(attrs,
+ UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY);
+
+ ret = uverbs_copy_from_or_zero(&esp_attr->replay.replay,
+ attrs,
+ UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY);
+ if (ret)
+ return ret;
+
+ ret = flow_action_esp_replay_validate[esp_attr->replay.protocol](&esp_attr->replay,
+ is_modify);
+ if (ret)
+ return ret;
+
+ esp_attr->hdr.replay = &esp_attr->replay;
+ }
+
+ if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP)) {
+ ret = flow_action_esp_get_encap(&esp_attr->encap, attrs);
+ if (ret)
+ return ret;
+
+ esp_attr->hdr.encap = &esp_attr->encap;
+ }
+
+ return 0;
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE)(struct ib_device *ib_dev,
+ struct ib_uverbs_file *file,
+ struct uverbs_attr_bundle *attrs)
+{
+ int ret;
+ struct ib_uobject *uobj;
+ struct ib_flow_action *action;
+ struct ib_flow_action_esp_attr esp_attr = {};
+
+ if (!ib_dev->create_flow_action_esp)
+ return -EOPNOTSUPP;
+
+ ret = parse_flow_action_esp(ib_dev, file, attrs, &esp_attr, false);
+ if (ret)
+ return ret;
+
+ /* No need to check as this attribute is marked as MANDATORY */
+ uobj = uverbs_attr_get(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE)->obj_attr.uobject;
+ action = ib_dev->create_flow_action_esp(ib_dev, &esp_attr.hdr, attrs);
+ if (IS_ERR(action))
+ return PTR_ERR(action);
+
+ atomic_set(&action->usecnt, 0);
+ action->device = ib_dev;
+ action->type = IB_FLOW_ACTION_ESP;
+ action->uobject = uobj;
+ uobj->object = action;
+
+ return 0;
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY)(struct ib_device *ib_dev,
+ struct ib_uverbs_file *file,
+ struct uverbs_attr_bundle *attrs)
+{
+ int ret;
+ struct ib_uobject *uobj;
+ struct ib_flow_action *action;
+ struct ib_flow_action_esp_attr esp_attr = {};
+
+ if (!ib_dev->modify_flow_action_esp)
+ return -EOPNOTSUPP;
+
+ ret = parse_flow_action_esp(ib_dev, file, attrs, &esp_attr, true);
+ if (ret)
+ return ret;
+
+ uobj = uverbs_attr_get(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE)->obj_attr.uobject;
+ action = uobj->object;
+
+ if (action->type != IB_FLOW_ACTION_ESP)
+ return -EINVAL;
+
+ return ib_dev->modify_flow_action_esp(action,
+ &esp_attr.hdr,
+ attrs);
+}
+
+static const struct uverbs_attr_spec uverbs_flow_action_esp_keymat[] = {
+ [IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM] = {
+ .ptr = {
+ .type = UVERBS_ATTR_TYPE_PTR_IN,
+ UVERBS_ATTR_TYPE(struct ib_uverbs_flow_action_esp_keymat_aes_gcm),
+ .flags = UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO,
+ },
+ },
+};
+
+static const struct uverbs_attr_spec uverbs_flow_action_esp_replay[] = {
+ [IB_UVERBS_FLOW_ACTION_ESP_REPLAY_NONE] = {
+ .ptr = {
+ .type = UVERBS_ATTR_TYPE_PTR_IN,
+ /* No need to specify any data */
+ .len = 0,
+ }
+ },
+ [IB_UVERBS_FLOW_ACTION_ESP_REPLAY_BMP] = {
+ .ptr = {
+ .type = UVERBS_ATTR_TYPE_PTR_IN,
+ UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_replay_bmp, size),
+ .flags = UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO,
+ }
+ },
+};
+
+static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE,
+ &UVERBS_ATTR_IDR(UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE, UVERBS_OBJECT_FLOW_ACTION,
+ UVERBS_ACCESS_NEW,
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+ &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS,
+ UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp, hard_limit_pkts),
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY |
+ UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)),
+ &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ESN, UVERBS_ATTR_TYPE(__u32)),
+ &UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT,
+ uverbs_flow_action_esp_keymat,
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+ &UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY,
+ uverbs_flow_action_esp_replay),
+ &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP,
+ UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_encap, type)));
+
+static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY,
+ &UVERBS_ATTR_IDR(UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE, UVERBS_OBJECT_FLOW_ACTION,
+ UVERBS_ACCESS_WRITE,
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+ &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS,
+ UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp, hard_limit_pkts),
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)),
+ &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ESN, UVERBS_ATTR_TYPE(__u32)),
+ &UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT,
+ uverbs_flow_action_esp_keymat),
+ &UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY,
+ uverbs_flow_action_esp_replay),
+ &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP,
+ UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_encap, type)));
+
+static DECLARE_UVERBS_NAMED_METHOD_WITH_HANDLER(UVERBS_METHOD_FLOW_ACTION_DESTROY,
+ uverbs_destroy_def_handler,
+ &UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_FLOW_ACTION_HANDLE,
+ UVERBS_OBJECT_FLOW_ACTION,
+ UVERBS_ACCESS_DESTROY,
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+
+DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_FLOW_ACTION,
+ &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_flow_action),
+ &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE),
+ &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_DESTROY),
+ &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY));
+
diff --git a/drivers/infiniband/core/uverbs_std_types_mr.c b/drivers/infiniband/core/uverbs_std_types_mr.c
new file mode 100644
index 0000000..68f7cad
--- /dev/null
+++ b/drivers/infiniband/core/uverbs_std_types_mr.c
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "uverbs.h"
+#include <rdma/uverbs_std_types.h>
+
+static int uverbs_free_mr(struct ib_uobject *uobject,
+ enum rdma_remove_reason why)
+{
+ return ib_dereg_mr((struct ib_mr *)uobject->object);
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)(struct ib_device *ib_dev,
+ struct ib_uverbs_file *file,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_dm_mr_attr attr = {};
+ struct ib_uobject *uobj;
+ struct ib_dm *dm;
+ struct ib_pd *pd;
+ struct ib_mr *mr;
+ int ret;
+
+ if (!ib_dev->reg_dm_mr)
+ return -EOPNOTSUPP;
+
+ ret = uverbs_copy_from(&attr.offset, attrs, UVERBS_ATTR_REG_DM_MR_OFFSET);
+ if (ret)
+ return ret;
+
+ ret = uverbs_copy_from(&attr.length, attrs,
+ UVERBS_ATTR_REG_DM_MR_LENGTH);
+ if (ret)
+ return ret;
+
+ ret = uverbs_copy_from(&attr.access_flags, attrs,
+ UVERBS_ATTR_REG_DM_MR_ACCESS_FLAGS);
+ if (ret)
+ return ret;
+
+ if (!(attr.access_flags & IB_ZERO_BASED))
+ return -EINVAL;
+
+ ret = ib_check_mr_access(attr.access_flags);
+ if (ret)
+ return ret;
+
+ pd = uverbs_attr_get_obj(attrs, UVERBS_ATTR_REG_DM_MR_PD_HANDLE);
+
+ dm = uverbs_attr_get_obj(attrs, UVERBS_ATTR_REG_DM_MR_DM_HANDLE);
+
+ uobj = uverbs_attr_get(attrs, UVERBS_ATTR_REG_DM_MR_HANDLE)->obj_attr.uobject;
+
+ if (attr.offset > dm->length || attr.length > dm->length ||
+ attr.length > dm->length - attr.offset)
+ return -EINVAL;
+
+ mr = pd->device->reg_dm_mr(pd, dm, &attr, attrs);
+ if (IS_ERR(mr))
+ return PTR_ERR(mr);
+
+ mr->device = pd->device;
+ mr->pd = pd;
+ mr->dm = dm;
+ mr->uobject = uobj;
+ atomic_inc(&pd->usecnt);
+ atomic_inc(&dm->usecnt);
+
+ uobj->object = mr;
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_REG_DM_MR_RESP_LKEY, &mr->lkey,
+ sizeof(mr->lkey));
+ if (ret)
+ goto err_dereg;
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_REG_DM_MR_RESP_RKEY,
+ &mr->rkey, sizeof(mr->rkey));
+ if (ret)
+ goto err_dereg;
+
+ return 0;
+
+err_dereg:
+ ib_dereg_mr(mr);
+
+ return ret;
+}
+
+static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_DM_MR_REG,
+ &UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_HANDLE, UVERBS_OBJECT_MR,
+ UVERBS_ACCESS_NEW,
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+ &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DM_MR_OFFSET,
+ UVERBS_ATTR_TYPE(u64),
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+ &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DM_MR_LENGTH,
+ UVERBS_ATTR_TYPE(u64),
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+ &UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_PD_HANDLE, UVERBS_OBJECT_PD,
+ UVERBS_ACCESS_READ,
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+ &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DM_MR_ACCESS_FLAGS,
+ UVERBS_ATTR_TYPE(u32),
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+ &UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_DM_HANDLE, UVERBS_OBJECT_DM,
+ UVERBS_ACCESS_READ,
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+ &UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_DM_MR_RESP_LKEY,
+ UVERBS_ATTR_TYPE(u32),
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+ &UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_DM_MR_RESP_RKEY,
+ UVERBS_ATTR_TYPE(u32),
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+
+DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_MR,
+ /* 1 is used in order to free the MR after all the MWs */
+ &UVERBS_TYPE_ALLOC_IDR(1, uverbs_free_mr),
+ &UVERBS_METHOD(UVERBS_METHOD_DM_MR_REG));
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 93025d2..7eff3ae 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -655,7 +655,7 @@ int rdma_modify_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr)
return ah->device->modify_ah ?
ah->device->modify_ah(ah, ah_attr) :
- -ENOSYS;
+ -EOPNOTSUPP;
}
EXPORT_SYMBOL(rdma_modify_ah);
@@ -663,7 +663,7 @@ int rdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr)
{
return ah->device->query_ah ?
ah->device->query_ah(ah, ah_attr) :
- -ENOSYS;
+ -EOPNOTSUPP;
}
EXPORT_SYMBOL(rdma_query_ah);
@@ -689,7 +689,7 @@ struct ib_srq *ib_create_srq(struct ib_pd *pd,
struct ib_srq *srq;
if (!pd->device->create_srq)
- return ERR_PTR(-ENOSYS);
+ return ERR_PTR(-EOPNOTSUPP);
srq = pd->device->create_srq(pd, srq_init_attr, NULL);
@@ -722,7 +722,7 @@ int ib_modify_srq(struct ib_srq *srq,
{
return srq->device->modify_srq ?
srq->device->modify_srq(srq, srq_attr, srq_attr_mask, NULL) :
- -ENOSYS;
+ -EOPNOTSUPP;
}
EXPORT_SYMBOL(ib_modify_srq);
@@ -730,7 +730,7 @@ int ib_query_srq(struct ib_srq *srq,
struct ib_srq_attr *srq_attr)
{
return srq->device->query_srq ?
- srq->device->query_srq(srq, srq_attr) : -ENOSYS;
+ srq->device->query_srq(srq, srq_attr) : -EOPNOTSUPP;
}
EXPORT_SYMBOL(ib_query_srq);
@@ -1263,34 +1263,30 @@ static const struct {
}
};
-int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
- enum ib_qp_type type, enum ib_qp_attr_mask mask,
- enum rdma_link_layer ll)
+bool ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
+ enum ib_qp_type type, enum ib_qp_attr_mask mask,
+ enum rdma_link_layer ll)
{
enum ib_qp_attr_mask req_param, opt_param;
- if (cur_state < 0 || cur_state > IB_QPS_ERR ||
- next_state < 0 || next_state > IB_QPS_ERR)
- return 0;
-
if (mask & IB_QP_CUR_STATE &&
cur_state != IB_QPS_RTR && cur_state != IB_QPS_RTS &&
cur_state != IB_QPS_SQD && cur_state != IB_QPS_SQE)
- return 0;
+ return false;
if (!qp_state_table[cur_state][next_state].valid)
- return 0;
+ return false;
req_param = qp_state_table[cur_state][next_state].req_param[type];
opt_param = qp_state_table[cur_state][next_state].opt_param[type];
if ((mask & req_param) != req_param)
- return 0;
+ return false;
if (mask & ~(req_param | opt_param | IB_QP_STATE))
- return 0;
+ return false;
- return 1;
+ return true;
}
EXPORT_SYMBOL(ib_modify_qp_is_ok);
@@ -1457,7 +1453,7 @@ int ib_query_qp(struct ib_qp *qp,
{
return qp->device->query_qp ?
qp->device->query_qp(qp->real_qp, qp_attr, qp_attr_mask, qp_init_attr) :
- -ENOSYS;
+ -EOPNOTSUPP;
}
EXPORT_SYMBOL(ib_query_qp);
@@ -1594,7 +1590,7 @@ EXPORT_SYMBOL(ib_create_cq);
int rdma_set_cq_moderation(struct ib_cq *cq, u16 cq_count, u16 cq_period)
{
return cq->device->modify_cq ?
- cq->device->modify_cq(cq, cq_count, cq_period) : -ENOSYS;
+ cq->device->modify_cq(cq, cq_count, cq_period) : -EOPNOTSUPP;
}
EXPORT_SYMBOL(rdma_set_cq_moderation);
@@ -1611,7 +1607,7 @@ EXPORT_SYMBOL(ib_destroy_cq);
int ib_resize_cq(struct ib_cq *cq, int cqe)
{
return cq->device->resize_cq ?
- cq->device->resize_cq(cq, cqe, NULL) : -ENOSYS;
+ cq->device->resize_cq(cq, cqe, NULL) : -EOPNOTSUPP;
}
EXPORT_SYMBOL(ib_resize_cq);
@@ -1620,11 +1616,16 @@ EXPORT_SYMBOL(ib_resize_cq);
int ib_dereg_mr(struct ib_mr *mr)
{
struct ib_pd *pd = mr->pd;
+ struct ib_dm *dm = mr->dm;
int ret;
+ rdma_restrack_del(&mr->res);
ret = mr->device->dereg_mr(mr);
- if (!ret)
+ if (!ret) {
atomic_dec(&pd->usecnt);
+ if (dm)
+ atomic_dec(&dm->usecnt);
+ }
return ret;
}
@@ -1649,7 +1650,7 @@ struct ib_mr *ib_alloc_mr(struct ib_pd *pd,
struct ib_mr *mr;
if (!pd->device->alloc_mr)
- return ERR_PTR(-ENOSYS);
+ return ERR_PTR(-EOPNOTSUPP);
mr = pd->device->alloc_mr(pd, mr_type, max_num_sg);
if (!IS_ERR(mr)) {
@@ -1658,6 +1659,8 @@ struct ib_mr *ib_alloc_mr(struct ib_pd *pd,
mr->uobject = NULL;
atomic_inc(&pd->usecnt);
mr->need_inval = false;
+ mr->res.type = RDMA_RESTRACK_MR;
+ rdma_restrack_add(&mr->res);
}
return mr;
@@ -1673,7 +1676,7 @@ struct ib_fmr *ib_alloc_fmr(struct ib_pd *pd,
struct ib_fmr *fmr;
if (!pd->device->alloc_fmr)
- return ERR_PTR(-ENOSYS);
+ return ERR_PTR(-EOPNOTSUPP);
fmr = pd->device->alloc_fmr(pd, mr_access_flags, fmr_attr);
if (!IS_ERR(fmr)) {
@@ -1757,7 +1760,7 @@ int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
int ret;
if (!qp->device->attach_mcast)
- return -ENOSYS;
+ return -EOPNOTSUPP;
if (!rdma_is_multicast_addr((struct in6_addr *)gid->raw) ||
qp->qp_type != IB_QPT_UD || !is_valid_mcast_lid(qp, lid))
@@ -1775,7 +1778,7 @@ int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
int ret;
if (!qp->device->detach_mcast)
- return -ENOSYS;
+ return -EOPNOTSUPP;
if (!rdma_is_multicast_addr((struct in6_addr *)gid->raw) ||
qp->qp_type != IB_QPT_UD || !is_valid_mcast_lid(qp, lid))
@@ -1793,7 +1796,7 @@ struct ib_xrcd *__ib_alloc_xrcd(struct ib_device *device, const char *caller)
struct ib_xrcd *xrcd;
if (!device->alloc_xrcd)
- return ERR_PTR(-ENOSYS);
+ return ERR_PTR(-EOPNOTSUPP);
xrcd = device->alloc_xrcd(device, NULL, NULL);
if (!IS_ERR(xrcd)) {
@@ -1847,7 +1850,7 @@ struct ib_wq *ib_create_wq(struct ib_pd *pd,
struct ib_wq *wq;
if (!pd->device->create_wq)
- return ERR_PTR(-ENOSYS);
+ return ERR_PTR(-EOPNOTSUPP);
wq = pd->device->create_wq(pd, wq_attr, NULL);
if (!IS_ERR(wq)) {
@@ -1902,7 +1905,7 @@ int ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
int err;
if (!wq->device->modify_wq)
- return -ENOSYS;
+ return -EOPNOTSUPP;
err = wq->device->modify_wq(wq, wq_attr, wq_attr_mask, NULL);
return err;
@@ -1927,7 +1930,7 @@ struct ib_rwq_ind_table *ib_create_rwq_ind_table(struct ib_device *device,
u32 table_size;
if (!device->create_rwq_ind_table)
- return ERR_PTR(-ENOSYS);
+ return ERR_PTR(-EOPNOTSUPP);
table_size = (1 << init_attr->log_ind_tbl_size);
rwq_ind_table = device->create_rwq_ind_table(device,
@@ -1977,7 +1980,7 @@ struct ib_flow *ib_create_flow(struct ib_qp *qp,
{
struct ib_flow *flow_id;
if (!qp->device->create_flow)
- return ERR_PTR(-ENOSYS);
+ return ERR_PTR(-EOPNOTSUPP);
flow_id = qp->device->create_flow(qp, flow_attr, domain);
if (!IS_ERR(flow_id)) {
@@ -2004,7 +2007,7 @@ int ib_check_mr_status(struct ib_mr *mr, u32 check_mask,
struct ib_mr_status *mr_status)
{
return mr->device->check_mr_status ?
- mr->device->check_mr_status(mr, check_mask, mr_status) : -ENOSYS;
+ mr->device->check_mr_status(mr, check_mask, mr_status) : -EOPNOTSUPP;
}
EXPORT_SYMBOL(ib_check_mr_status);
@@ -2012,7 +2015,7 @@ int ib_set_vf_link_state(struct ib_device *device, int vf, u8 port,
int state)
{
if (!device->set_vf_link_state)
- return -ENOSYS;
+ return -EOPNOTSUPP;
return device->set_vf_link_state(device, vf, port, state);
}
@@ -2022,7 +2025,7 @@ int ib_get_vf_config(struct ib_device *device, int vf, u8 port,
struct ifla_vf_info *info)
{
if (!device->get_vf_config)
- return -ENOSYS;
+ return -EOPNOTSUPP;
return device->get_vf_config(device, vf, port, info);
}
@@ -2032,7 +2035,7 @@ int ib_get_vf_stats(struct ib_device *device, int vf, u8 port,
struct ifla_vf_stats *stats)
{
if (!device->get_vf_stats)
- return -ENOSYS;
+ return -EOPNOTSUPP;
return device->get_vf_stats(device, vf, port, stats);
}
@@ -2042,7 +2045,7 @@ int ib_set_vf_guid(struct ib_device *device, int vf, u8 port, u64 guid,
int type)
{
if (!device->set_vf_guid)
- return -ENOSYS;
+ return -EOPNOTSUPP;
return device->set_vf_guid(device, vf, port, guid, type);
}
@@ -2077,7 +2080,7 @@ int ib_map_mr_sg(struct ib_mr *mr, struct scatterlist *sg, int sg_nents,
unsigned int *sg_offset, unsigned int page_size)
{
if (unlikely(!mr->device->map_mr_sg))
- return -ENOSYS;
+ return -EOPNOTSUPP;
mr->page_size = page_size;
@@ -2194,7 +2197,14 @@ static void __ib_drain_sq(struct ib_qp *qp)
struct ib_cq *cq = qp->send_cq;
struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
struct ib_drain_cqe sdrain;
- struct ib_send_wr swr = {}, *bad_swr;
+ struct ib_send_wr *bad_swr;
+ struct ib_rdma_wr swr = {
+ .wr = {
+ .next = NULL,
+ { .wr_cqe = &sdrain.cqe, },
+ .opcode = IB_WR_RDMA_WRITE,
+ },
+ };
int ret;
ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
@@ -2203,11 +2213,10 @@ static void __ib_drain_sq(struct ib_qp *qp)
return;
}
- swr.wr_cqe = &sdrain.cqe;
sdrain.cqe.done = ib_drain_qp_done;
init_completion(&sdrain.done);
- ret = ib_post_send(qp, &swr, &bad_swr);
+ ret = ib_post_send(qp, &swr.wr, &bad_swr);
if (ret) {
WARN_ONCE(ret, "failed to drain send queue: %d\n", ret);
return;
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index 8301d7e..a76e206 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -314,12 +314,11 @@ int bnxt_re_query_gid(struct ib_device *ibdev, u8 port_num,
return rc;
}
-int bnxt_re_del_gid(struct ib_device *ibdev, u8 port_num,
- unsigned int index, void **context)
+int bnxt_re_del_gid(const struct ib_gid_attr *attr, void **context)
{
int rc = 0;
struct bnxt_re_gid_ctx *ctx, **ctx_tbl;
- struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
+ struct bnxt_re_dev *rdev = to_bnxt_re_dev(attr->device, ibdev);
struct bnxt_qplib_sgid_tbl *sgid_tbl = &rdev->qplib_res.sgid_tbl;
struct bnxt_qplib_gid *gid_to_del;
@@ -365,15 +364,14 @@ int bnxt_re_del_gid(struct ib_device *ibdev, u8 port_num,
return rc;
}
-int bnxt_re_add_gid(struct ib_device *ibdev, u8 port_num,
- unsigned int index, const union ib_gid *gid,
+int bnxt_re_add_gid(const union ib_gid *gid,
const struct ib_gid_attr *attr, void **context)
{
int rc;
u32 tbl_idx = 0;
u16 vlan_id = 0xFFFF;
struct bnxt_re_gid_ctx *ctx, **ctx_tbl;
- struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
+ struct bnxt_re_dev *rdev = to_bnxt_re_dev(attr->device, ibdev);
struct bnxt_qplib_sgid_tbl *sgid_tbl = &rdev->qplib_res.sgid_tbl;
if ((attr->ndev) && is_vlan_dev(attr->ndev))
@@ -718,8 +716,7 @@ struct ib_ah *bnxt_re_create_ah(struct ib_pd *ib_pd,
grh->sgid_index);
goto fail;
}
- if (sgid_attr.ndev)
- dev_put(sgid_attr.ndev);
+ dev_put(sgid_attr.ndev);
/* Get network header type for this GID */
nw_type = ib_gid_to_network_type(sgid_attr.gid_type, &sgid);
switch (nw_type) {
@@ -1540,14 +1537,13 @@ int bnxt_re_post_srq_recv(struct ib_srq *ib_srq, struct ib_recv_wr *wr,
ib_srq);
struct bnxt_qplib_swqe wqe;
unsigned long flags;
- int rc = 0, payload_sz = 0;
+ int rc = 0;
spin_lock_irqsave(&srq->lock, flags);
while (wr) {
/* Transcribe each ib_recv_wr to qplib_swqe */
wqe.num_sge = wr->num_sge;
- payload_sz = bnxt_re_build_sgl(wr->sg_list, wqe.sg_list,
- wr->num_sge);
+ bnxt_re_build_sgl(wr->sg_list, wqe.sg_list, wr->num_sge);
wqe.wr_id = wr->wr_id;
wqe.type = BNXT_QPLIB_SWQE_TYPE_RECV;
@@ -1698,7 +1694,7 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
status = ib_get_cached_gid(&rdev->ibdev, 1,
grh->sgid_index,
&sgid, &sgid_attr);
- if (!status && sgid_attr.ndev) {
+ if (!status) {
memcpy(qp->qplib_qp.smac, sgid_attr.ndev->dev_addr,
ETH_ALEN);
dev_put(sgid_attr.ndev);
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
index e62b7c2..5c6414c 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
@@ -157,10 +157,8 @@ int bnxt_re_get_port_immutable(struct ib_device *ibdev, u8 port_num,
void bnxt_re_query_fw_str(struct ib_device *ibdev, char *str);
int bnxt_re_query_pkey(struct ib_device *ibdev, u8 port_num,
u16 index, u16 *pkey);
-int bnxt_re_del_gid(struct ib_device *ibdev, u8 port_num,
- unsigned int index, void **context);
-int bnxt_re_add_gid(struct ib_device *ibdev, u8 port_num,
- unsigned int index, const union ib_gid *gid,
+int bnxt_re_del_gid(const struct ib_gid_attr *attr, void **context);
+int bnxt_re_add_gid(const union ib_gid *gid,
const struct ib_gid_attr *attr, void **context);
int bnxt_re_query_gid(struct ib_device *ibdev, u8 port_num,
int index, union ib_gid *gid);
diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
index f6e3617..f6c739e 100644
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -574,7 +574,6 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
ibdev->get_port_immutable = bnxt_re_get_port_immutable;
ibdev->get_dev_fw_str = bnxt_re_query_fw_str;
ibdev->query_pkey = bnxt_re_query_pkey;
- ibdev->query_gid = bnxt_re_query_gid;
ibdev->get_netdev = bnxt_re_get_netdev;
ibdev->add_gid = bnxt_re_add_gid;
ibdev->del_gid = bnxt_re_del_gid;
@@ -619,6 +618,7 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
ibdev->get_hw_stats = bnxt_re_ib_get_hw_stats;
ibdev->alloc_hw_stats = bnxt_re_ib_alloc_hw_stats;
+ ibdev->driver_id = RDMA_DRIVER_BNXT_RE;
return ib_register_device(ibdev, NULL);
}
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
index ee98e5e..2f3f32ea 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
@@ -154,7 +154,7 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw,
attr->tqm_alloc_reqs[i * 4 + 3] = *(++tqm_alloc);
}
- attr->is_atomic = 0;
+ attr->is_atomic = false;
bail:
bnxt_qplib_rcfw_free_sbuf(rcfw, sbuf);
return rc;
diff --git a/drivers/infiniband/hw/cxgb3/Kconfig b/drivers/infiniband/hw/cxgb3/Kconfig
index 431be73..a7b77cb 100644
--- a/drivers/infiniband/hw/cxgb3/Kconfig
+++ b/drivers/infiniband/hw/cxgb3/Kconfig
@@ -16,12 +16,3 @@ config INFINIBAND_CXGB3
To compile this driver as a module, choose M here: the module
will be called iw_cxgb3.
-
-config INFINIBAND_CXGB3_DEBUG
- bool "Verbose debugging output"
- depends on INFINIBAND_CXGB3
- default n
- ---help---
- This option causes the Chelsio RDMA driver to produce copious
- amounts of debug messages. Select this if you are developing
- the driver or trying to diagnose a problem.
diff --git a/drivers/infiniband/hw/cxgb3/Makefile b/drivers/infiniband/hw/cxgb3/Makefile
index 2c66d35..66fe091 100644
--- a/drivers/infiniband/hw/cxgb3/Makefile
+++ b/drivers/infiniband/hw/cxgb3/Makefile
@@ -5,5 +5,3 @@ obj-$(CONFIG_INFINIBAND_CXGB3) += iw_cxgb3.o
iw_cxgb3-y := iwch_cm.o iwch_ev.o iwch_cq.o iwch_qp.o iwch_mem.o \
iwch_provider.o iwch.o cxio_hal.o cxio_resource.o
-
-ccflags-$(CONFIG_INFINIBAND_CXGB3_DEBUG) += -DDEBUG
diff --git a/drivers/infiniband/hw/cxgb3/cxio_dbg.c b/drivers/infiniband/hw/cxgb3/cxio_dbg.c
deleted file mode 100644
index 97dbe72..0000000
--- a/drivers/infiniband/hw/cxgb3/cxio_dbg.c
+++ /dev/null
@@ -1,206 +0,0 @@
-/*
- * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifdef DEBUG
-#include <linux/types.h>
-#include <linux/slab.h>
-#include "common.h"
-#include "cxgb3_ioctl.h"
-#include "cxio_hal.h"
-#include "cxio_wr.h"
-
-void cxio_dump_tpt(struct cxio_rdev *rdev, u32 stag)
-{
- struct ch_mem_range *m;
- u64 *data;
- int rc;
- int size = 32;
-
- m = kmalloc(sizeof(*m) + size, GFP_ATOMIC);
- if (!m)
- return;
-
- m->mem_id = MEM_PMRX;
- m->addr = (stag>>8) * 32 + rdev->rnic_info.tpt_base;
- m->len = size;
- pr_debug("%s TPT addr 0x%x len %d\n", __func__, m->addr, m->len);
- rc = rdev->t3cdev_p->ctl(rdev->t3cdev_p, RDMA_GET_MEM, m);
- if (rc) {
- pr_debug("%s toectl returned error %d\n", __func__, rc);
- kfree(m);
- return;
- }
-
- data = (u64 *)m->buf;
- while (size > 0) {
- pr_debug("TPT %08x: %016llx\n",
- m->addr, (unsigned long long)*data);
- size -= 8;
- data++;
- m->addr += 8;
- }
- kfree(m);
-}
-
-void cxio_dump_pbl(struct cxio_rdev *rdev, u32 pbl_addr, uint len, u8 shift)
-{
- struct ch_mem_range *m;
- u64 *data;
- int rc;
- int size, npages;
-
- shift += 12;
- npages = (len + (1ULL << shift) - 1) >> shift;
- size = npages * sizeof(u64);
-
- m = kmalloc(sizeof(*m) + size, GFP_ATOMIC);
- if (!m)
- return;
-
- m->mem_id = MEM_PMRX;
- m->addr = pbl_addr;
- m->len = size;
- pr_debug("%s PBL addr 0x%x len %d depth %d\n",
- __func__, m->addr, m->len, npages);
- rc = rdev->t3cdev_p->ctl(rdev->t3cdev_p, RDMA_GET_MEM, m);
- if (rc) {
- pr_debug("%s toectl returned error %d\n", __func__, rc);
- kfree(m);
- return;
- }
-
- data = (u64 *)m->buf;
- while (size > 0) {
- pr_debug("PBL %08x: %016llx\n",
- m->addr, (unsigned long long)*data);
- size -= 8;
- data++;
- m->addr += 8;
- }
- kfree(m);
-}
-
-void cxio_dump_wqe(union t3_wr *wqe)
-{
- __be64 *data = (__be64 *)wqe;
- uint size = (uint)(be64_to_cpu(*data) & 0xff);
-
- if (size == 0)
- size = 8;
- while (size > 0) {
- pr_debug("WQE %p: %016llx\n",
- data, (unsigned long long)be64_to_cpu(*data));
- size--;
- data++;
- }
-}
-
-void cxio_dump_wce(struct t3_cqe *wce)
-{
- __be64 *data = (__be64 *)wce;
- int size = sizeof(*wce);
-
- while (size > 0) {
- pr_debug("WCE %p: %016llx\n",
- data, (unsigned long long)be64_to_cpu(*data));
- size -= 8;
- data++;
- }
-}
-
-void cxio_dump_rqt(struct cxio_rdev *rdev, u32 hwtid, int nents)
-{
- struct ch_mem_range *m;
- int size = nents * 64;
- u64 *data;
- int rc;
-
- m = kmalloc(sizeof(*m) + size, GFP_ATOMIC);
- if (!m)
- return;
-
- m->mem_id = MEM_PMRX;
- m->addr = ((hwtid)<<10) + rdev->rnic_info.rqt_base;
- m->len = size;
- pr_debug("%s RQT addr 0x%x len %d\n", __func__, m->addr, m->len);
- rc = rdev->t3cdev_p->ctl(rdev->t3cdev_p, RDMA_GET_MEM, m);
- if (rc) {
- pr_debug("%s toectl returned error %d\n", __func__, rc);
- kfree(m);
- return;
- }
-
- data = (u64 *)m->buf;
- while (size > 0) {
- pr_debug("RQT %08x: %016llx\n",
- m->addr, (unsigned long long)*data);
- size -= 8;
- data++;
- m->addr += 8;
- }
- kfree(m);
-}
-
-void cxio_dump_tcb(struct cxio_rdev *rdev, u32 hwtid)
-{
- struct ch_mem_range *m;
- int size = TCB_SIZE;
- u32 *data;
- int rc;
-
- m = kmalloc(sizeof(*m) + size, GFP_ATOMIC);
- if (!m)
- return;
-
- m->mem_id = MEM_CM;
- m->addr = hwtid * size;
- m->len = size;
- pr_debug("%s TCB %d len %d\n", __func__, m->addr, m->len);
- rc = rdev->t3cdev_p->ctl(rdev->t3cdev_p, RDMA_GET_MEM, m);
- if (rc) {
- pr_debug("%s toectl returned error %d\n", __func__, rc);
- kfree(m);
- return;
- }
-
- data = (u32 *)m->buf;
- while (size > 0) {
- printk("%2u: %08x %08x %08x %08x %08x %08x %08x %08x\n",
- m->addr,
- *(data+2), *(data+3), *(data),*(data+1),
- *(data+6), *(data+7), *(data+4), *(data+5));
- size -= 32;
- data += 8;
- m->addr += 32;
- }
- kfree(m);
-}
-#endif
diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.h b/drivers/infiniband/hw/cxgb3/cxio_hal.h
index 7e70c54..c64e50b 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_hal.h
+++ b/drivers/infiniband/hw/cxgb3/cxio_hal.h
@@ -202,13 +202,4 @@ int iwch_cxgb3_ofld_send(struct t3cdev *tdev, struct sk_buff *skb);
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#ifdef DEBUG
-void cxio_dump_tpt(struct cxio_rdev *rev, u32 stag);
-void cxio_dump_pbl(struct cxio_rdev *rev, u32 pbl_addr, uint len, u8 shift);
-void cxio_dump_wqe(union t3_wr *wqe);
-void cxio_dump_wce(struct t3_cqe *wce);
-void cxio_dump_rqt(struct cxio_rdev *rdev, u32 hwtid, int nents);
-void cxio_dump_tcb(struct cxio_rdev *rdev, u32 hwtid);
-#endif
-
#endif
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cq.c b/drivers/infiniband/hw/cxgb3/iwch_cq.c
index dd5348e..0a8542c 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cq.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cq.c
@@ -200,9 +200,6 @@ int iwch_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
spin_lock_irqsave(&chp->lock, flags);
for (npolled = 0; npolled < num_entries; ++npolled) {
-#ifdef DEBUG
- int i=0;
-#endif
/*
* Because T3 can post CQEs that are _not_ associated
@@ -211,9 +208,6 @@ int iwch_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
*/
do {
err = iwch_poll_cq_one(rhp, chp, wc + npolled);
-#ifdef DEBUG
- BUG_ON(++i > 1000);
-#endif
} while (err == -EAGAIN);
if (err <= 0)
break;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index a578ca5..be097c6 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -440,7 +440,9 @@ static struct ib_pd *iwch_allocate_pd(struct ib_device *ibdev,
php->pdid = pdid;
php->rhp = rhp;
if (context) {
- if (ib_copy_to_udata(udata, &php->pdid, sizeof (__u32))) {
+ struct iwch_alloc_pd_resp resp = {.pdid = php->pdid};
+
+ if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
iwch_deallocate_pd(&php->ibpd);
return ERR_PTR(-EFAULT);
}
@@ -1439,6 +1441,7 @@ int iwch_register_device(struct iwch_dev *dev)
memcpy(dev->ibdev.iwcm->ifname, dev->rdev.t3cdev_p->lldev->name,
sizeof(dev->ibdev.iwcm->ifname));
+ dev->ibdev.driver_id = RDMA_DRIVER_CXGB3;
ret = ib_register_device(&dev->ibdev, NULL);
if (ret)
goto bail1;
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index e96771d..feeb8ee 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -220,14 +220,14 @@ static void set_ep_sin_addrs(struct c4iw_ep *ep,
{
struct iw_cm_id *id = ep->com.cm_id;
- *lsin = (struct sockaddr_in *)&ep->com.local_addr;
- *rsin = (struct sockaddr_in *)&ep->com.remote_addr;
+ *m_lsin = (struct sockaddr_in *)&ep->com.local_addr;
+ *m_rsin = (struct sockaddr_in *)&ep->com.remote_addr;
if (id) {
- *m_lsin = (struct sockaddr_in *)&id->m_local_addr;
- *m_rsin = (struct sockaddr_in *)&id->m_remote_addr;
+ *lsin = (struct sockaddr_in *)&id->local_addr;
+ *rsin = (struct sockaddr_in *)&id->remote_addr;
} else {
- *m_lsin = &zero_sin;
- *m_rsin = &zero_sin;
+ *lsin = &zero_sin;
+ *rsin = &zero_sin;
}
}
@@ -239,14 +239,14 @@ static void set_ep_sin6_addrs(struct c4iw_ep *ep,
{
struct iw_cm_id *id = ep->com.cm_id;
- *lsin6 = (struct sockaddr_in6 *)&ep->com.local_addr;
- *rsin6 = (struct sockaddr_in6 *)&ep->com.remote_addr;
+ *m_lsin6 = (struct sockaddr_in6 *)&ep->com.local_addr;
+ *m_rsin6 = (struct sockaddr_in6 *)&ep->com.remote_addr;
if (id) {
- *m_lsin6 = (struct sockaddr_in6 *)&id->m_local_addr;
- *m_rsin6 = (struct sockaddr_in6 *)&id->m_remote_addr;
+ *lsin6 = (struct sockaddr_in6 *)&id->local_addr;
+ *rsin6 = (struct sockaddr_in6 *)&id->remote_addr;
} else {
- *m_lsin6 = &zero_sin6;
- *m_rsin6 = &zero_sin6;
+ *lsin6 = &zero_sin6;
+ *rsin6 = &zero_sin6;
}
}
diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c
index 7e0eb20..e90f2fd 100644
--- a/drivers/infiniband/hw/cxgb4/mem.c
+++ b/drivers/infiniband/hw/cxgb4/mem.c
@@ -391,6 +391,9 @@ static int finish_mem_reg(struct c4iw_mr *mhp, u32 stag)
mhp->attr.stag = stag;
mmid = stag >> 8;
mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
+ mhp->ibmr.length = mhp->attr.len;
+ mhp->ibmr.iova = mhp->attr.va_fbo;
+ mhp->ibmr.page_size = 1U << (mhp->attr.page_size + 12);
pr_debug("mmid 0x%x mhp %p\n", mmid, mhp);
return insert_handle(mhp->rhp, &mhp->rhp->mmidr, mhp, mmid);
}
diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c
index 1b5c6cd..0b9cc73 100644
--- a/drivers/infiniband/hw/cxgb4/provider.c
+++ b/drivers/infiniband/hw/cxgb4/provider.c
@@ -281,7 +281,9 @@ static struct ib_pd *c4iw_allocate_pd(struct ib_device *ibdev,
php->pdid = pdid;
php->rhp = rhp;
if (context) {
- if (ib_copy_to_udata(udata, &php->pdid, sizeof(u32))) {
+ struct c4iw_alloc_pd_resp uresp = {.pdid = php->pdid};
+
+ if (ib_copy_to_udata(udata, &uresp, sizeof(uresp))) {
c4iw_deallocate_pd(&php->ibpd);
return ERR_PTR(-EFAULT);
}
@@ -531,6 +533,24 @@ static void get_dev_fw_str(struct ib_device *dev, char *str)
FW_HDR_FW_VER_BUILD_G(c4iw_dev->rdev.lldi.fw_vers));
}
+static struct net_device *get_netdev(struct ib_device *dev, u8 port)
+{
+ struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev, ibdev);
+ struct c4iw_rdev *rdev = &c4iw_dev->rdev;
+ struct net_device *ndev;
+
+ if (!port || port > rdev->lldi.nports)
+ return NULL;
+
+ rcu_read_lock();
+ ndev = rdev->lldi.ports[port - 1];
+ if (ndev)
+ dev_hold(ndev);
+ rcu_read_unlock();
+
+ return ndev;
+}
+
void c4iw_register_device(struct work_struct *work)
{
int ret;
@@ -609,6 +629,7 @@ void c4iw_register_device(struct work_struct *work)
dev->ibdev.uverbs_abi_ver = C4IW_UVERBS_ABI_VERSION;
dev->ibdev.get_port_immutable = c4iw_port_immutable;
dev->ibdev.get_dev_fw_str = get_dev_fw_str;
+ dev->ibdev.get_netdev = get_netdev;
dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL);
if (!dev->ibdev.iwcm) {
@@ -627,6 +648,7 @@ void c4iw_register_device(struct work_struct *work)
memcpy(dev->ibdev.iwcm->ifname, dev->rdev.lldi.ports[0]->name,
sizeof(dev->ibdev.iwcm->ifname));
+ dev->ibdev.driver_id = RDMA_DRIVER_CXGB4;
ret = ib_register_device(&dev->ibdev, NULL);
if (ret)
goto err_kfree_iwcm;
diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c
index addc68e..46d1475 100644
--- a/drivers/infiniband/hw/hfi1/driver.c
+++ b/drivers/infiniband/hw/hfi1/driver.c
@@ -390,6 +390,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
svc_type = IB_CC_SVCTYPE_UC;
break;
default:
+ rcu_read_unlock();
goto drop;
}
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index 90bc8c7..32c4826 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -70,7 +70,6 @@
#include <linux/rhashtable.h>
#include <linux/netdevice.h>
#include <rdma/rdma_vt.h>
-#include <rdma/opa_addr.h>
#include "chip_registers.h"
#include "common.h"
diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c
index d30dd1a..1697d96 100644
--- a/drivers/infiniband/hw/hfi1/qp.c
+++ b/drivers/infiniband/hw/hfi1/qp.c
@@ -481,7 +481,6 @@ static void iowait_sdma_drained(struct iowait *wait)
}
/**
- *
* qp_to_sdma_engine - map a qp to a send engine
* @qp: the QP
* @sc5: the 5 bit sc
diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c
index c1c596a..0d5330b 100644
--- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c
+++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c
@@ -473,7 +473,7 @@ nomem:
tinfo->tidcnt = tididx;
tinfo->length = mapped_pages * PAGE_SIZE;
- if (copy_to_user((void __user *)(unsigned long)tinfo->tidlist,
+ if (copy_to_user(u64_to_user_ptr(tinfo->tidlist),
tidlist, sizeof(tidlist[0]) * tididx)) {
/*
* On failure to copy to the user level, we need to undo
@@ -513,7 +513,7 @@ int hfi1_user_exp_rcv_clear(struct hfi1_filedata *fd,
if (unlikely(tinfo->tidcnt > fd->tid_used))
return -EINVAL;
- tidinfo = memdup_user((void __user *)(unsigned long)tinfo->tidlist,
+ tidinfo = memdup_user(u64_to_user_ptr(tinfo->tidlist),
sizeof(tidinfo[0]) * tinfo->tidcnt);
if (IS_ERR(tidinfo))
return PTR_ERR(tidinfo);
diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index 471d55c..c8cf4d4 100644
--- a/drivers/infiniband/hw/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -1960,7 +1960,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
i,
ppd->pkeys);
- ret = rvt_register_device(&dd->verbs_dev.rdi);
+ ret = rvt_register_device(&dd->verbs_dev.rdi, RDMA_DRIVER_HFI1);
if (ret)
goto err_verbs_txreq;
diff --git a/drivers/infiniband/hw/hns/Makefile b/drivers/infiniband/hw/hns/Makefile
index 97bf2cd..cf03404 100644
--- a/drivers/infiniband/hw/hns/Makefile
+++ b/drivers/infiniband/hw/hns/Makefile
@@ -7,7 +7,7 @@ ccflags-y := -Idrivers/net/ethernet/hisilicon/hns3
obj-$(CONFIG_INFINIBAND_HNS) += hns-roce.o
hns-roce-objs := hns_roce_main.o hns_roce_cmd.o hns_roce_pd.o \
hns_roce_ah.o hns_roce_hem.o hns_roce_mr.o hns_roce_qp.o \
- hns_roce_cq.o hns_roce_alloc.o
+ hns_roce_cq.o hns_roce_alloc.o hns_roce_db.o
obj-$(CONFIG_INFINIBAND_HNS_HIP06) += hns-roce-hw-v1.o
hns-roce-hw-v1-objs := hns_roce_hw_v1.o
obj-$(CONFIG_INFINIBAND_HNS_HIP08) += hns-roce-hw-v2.o
diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c
index 7dd6a66..d749286 100644
--- a/drivers/infiniband/hw/hns/hns_roce_ah.c
+++ b/drivers/infiniband/hw/hns/hns_roce_ah.c
@@ -68,11 +68,9 @@ struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd,
return ERR_PTR(ret);
}
- if (gid_attr.ndev) {
- if (is_vlan_dev(gid_attr.ndev))
- vlan_tag = vlan_dev_vlan_id(gid_attr.ndev);
- dev_put(gid_attr.ndev);
- }
+ if (is_vlan_dev(gid_attr.ndev))
+ vlan_tag = vlan_dev_vlan_id(gid_attr.ndev);
+ dev_put(gid_attr.ndev);
if (vlan_tag < 0x1000)
vlan_tag |= (rdma_ah_get_sl(ah_attr) &
diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c
index bccc9b5..14734d0 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_cq.c
@@ -315,6 +315,7 @@ struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev,
struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev);
struct device *dev = hr_dev->dev;
struct hns_roce_ib_create_cq ucmd;
+ struct hns_roce_ib_create_cq_resp resp = {};
struct hns_roce_cq *hr_cq = NULL;
struct hns_roce_uar *uar = NULL;
int vector = attr->comp_vector;
@@ -354,15 +355,36 @@ struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev,
goto err_cq;
}
+ if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) &&
+ (udata->outlen >= sizeof(resp))) {
+ ret = hns_roce_db_map_user(to_hr_ucontext(context),
+ ucmd.db_addr, &hr_cq->db);
+ if (ret) {
+ dev_err(dev, "cq record doorbell map failed!\n");
+ goto err_mtt;
+ }
+ hr_cq->db_en = 1;
+ resp.cap_flags |= HNS_ROCE_SUPPORT_CQ_RECORD_DB;
+ }
+
/* Get user space parameters */
uar = &to_hr_ucontext(context)->uar;
} else {
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) {
+ ret = hns_roce_alloc_db(hr_dev, &hr_cq->db, 1);
+ if (ret)
+ goto err_cq;
+
+ hr_cq->set_ci_db = hr_cq->db.db_record;
+ *hr_cq->set_ci_db = 0;
+ }
+
/* Init mmt table and write buff address to mtt table */
ret = hns_roce_ib_alloc_cq_buf(hr_dev, &hr_cq->hr_buf,
cq_entries);
if (ret) {
dev_err(dev, "Failed to alloc_cq_buf.\n");
- goto err_cq;
+ goto err_db;
}
uar = &hr_dev->priv_uar;
@@ -375,7 +397,7 @@ struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev,
hr_cq, vector);
if (ret) {
dev_err(dev, "Creat CQ .Failed to cq_alloc.\n");
- goto err_mtt;
+ goto err_dbmap;
}
/*
@@ -393,10 +415,10 @@ struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev,
hr_cq->cq_depth = cq_entries;
if (context) {
- if (ib_copy_to_udata(udata, &hr_cq->cqn, sizeof(u64))) {
- ret = -EFAULT;
+ resp.cqn = hr_cq->cqn;
+ ret = ib_copy_to_udata(udata, &resp, sizeof(resp));
+ if (ret)
goto err_cqc;
- }
}
return &hr_cq->ib_cq;
@@ -404,6 +426,12 @@ struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev,
err_cqc:
hns_roce_free_cq(hr_dev, hr_cq);
+err_dbmap:
+ if (context && (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) &&
+ (udata->outlen >= sizeof(resp)))
+ hns_roce_db_unmap_user(to_hr_ucontext(context),
+ &hr_cq->db);
+
err_mtt:
hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt);
if (context)
@@ -412,6 +440,10 @@ err_mtt:
hns_roce_ib_free_cq_buf(hr_dev, &hr_cq->hr_buf,
hr_cq->ib_cq.cqe);
+err_db:
+ if (!context && (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB))
+ hns_roce_free_db(hr_dev, &hr_cq->db);
+
err_cq:
kfree(hr_cq);
return ERR_PTR(ret);
@@ -430,12 +462,20 @@ int hns_roce_ib_destroy_cq(struct ib_cq *ib_cq)
hns_roce_free_cq(hr_dev, hr_cq);
hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt);
- if (ib_cq->uobject)
+ if (ib_cq->uobject) {
ib_umem_release(hr_cq->umem);
- else
+
+ if (hr_cq->db_en == 1)
+ hns_roce_db_unmap_user(
+ to_hr_ucontext(ib_cq->uobject->context),
+ &hr_cq->db);
+ } else {
/* Free the buff of stored cq */
hns_roce_ib_free_cq_buf(hr_dev, &hr_cq->hr_buf,
ib_cq->cqe);
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB)
+ hns_roce_free_db(hr_dev, &hr_cq->db);
+ }
kfree(hr_cq);
}
diff --git a/drivers/infiniband/hw/hns/hns_roce_db.c b/drivers/infiniband/hw/hns/hns_roce_db.c
new file mode 100644
index 0000000..ebee278
--- /dev/null
+++ b/drivers/infiniband/hw/hns/hns_roce_db.c
@@ -0,0 +1,180 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) */
+/*
+ * Copyright (c) 2017 Hisilicon Limited.
+ * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
+ */
+
+#include <linux/platform_device.h>
+#include <rdma/ib_umem.h>
+#include "hns_roce_device.h"
+
+int hns_roce_db_map_user(struct hns_roce_ucontext *context, unsigned long virt,
+ struct hns_roce_db *db)
+{
+ struct hns_roce_user_db_page *page;
+ int ret = 0;
+
+ mutex_lock(&context->page_mutex);
+
+ list_for_each_entry(page, &context->page_list, list)
+ if (page->user_virt == (virt & PAGE_MASK))
+ goto found;
+
+ page = kmalloc(sizeof(*page), GFP_KERNEL);
+ if (!page) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ refcount_set(&page->refcount, 1);
+ page->user_virt = (virt & PAGE_MASK);
+ page->umem = ib_umem_get(&context->ibucontext, virt & PAGE_MASK,
+ PAGE_SIZE, 0, 0);
+ if (IS_ERR(page->umem)) {
+ ret = PTR_ERR(page->umem);
+ kfree(page);
+ goto out;
+ }
+
+ list_add(&page->list, &context->page_list);
+
+found:
+ db->dma = sg_dma_address(page->umem->sg_head.sgl) +
+ (virt & ~PAGE_MASK);
+ db->u.user_page = page;
+ refcount_inc(&page->refcount);
+
+out:
+ mutex_unlock(&context->page_mutex);
+
+ return ret;
+}
+EXPORT_SYMBOL(hns_roce_db_map_user);
+
+void hns_roce_db_unmap_user(struct hns_roce_ucontext *context,
+ struct hns_roce_db *db)
+{
+ mutex_lock(&context->page_mutex);
+
+ refcount_dec(&db->u.user_page->refcount);
+ if (refcount_dec_if_one(&db->u.user_page->refcount)) {
+ list_del(&db->u.user_page->list);
+ ib_umem_release(db->u.user_page->umem);
+ kfree(db->u.user_page);
+ }
+
+ mutex_unlock(&context->page_mutex);
+}
+EXPORT_SYMBOL(hns_roce_db_unmap_user);
+
+static struct hns_roce_db_pgdir *hns_roce_alloc_db_pgdir(
+ struct device *dma_device)
+{
+ struct hns_roce_db_pgdir *pgdir;
+
+ pgdir = kzalloc(sizeof(*pgdir), GFP_KERNEL);
+ if (!pgdir)
+ return NULL;
+
+ bitmap_fill(pgdir->order1, HNS_ROCE_DB_PER_PAGE / 2);
+ pgdir->bits[0] = pgdir->order0;
+ pgdir->bits[1] = pgdir->order1;
+ pgdir->page = dma_alloc_coherent(dma_device, PAGE_SIZE,
+ &pgdir->db_dma, GFP_KERNEL);
+ if (!pgdir->page) {
+ kfree(pgdir);
+ return NULL;
+ }
+
+ return pgdir;
+}
+
+static int hns_roce_alloc_db_from_pgdir(struct hns_roce_db_pgdir *pgdir,
+ struct hns_roce_db *db, int order)
+{
+ int o;
+ int i;
+
+ for (o = order; o <= 1; ++o) {
+ i = find_first_bit(pgdir->bits[o], HNS_ROCE_DB_PER_PAGE >> o);
+ if (i < HNS_ROCE_DB_PER_PAGE >> o)
+ goto found;
+ }
+
+ return -ENOMEM;
+
+found:
+ clear_bit(i, pgdir->bits[o]);
+
+ i <<= o;
+
+ if (o > order)
+ set_bit(i ^ 1, pgdir->bits[order]);
+
+ db->u.pgdir = pgdir;
+ db->index = i;
+ db->db_record = pgdir->page + db->index;
+ db->dma = pgdir->db_dma + db->index * 4;
+ db->order = order;
+
+ return 0;
+}
+
+int hns_roce_alloc_db(struct hns_roce_dev *hr_dev, struct hns_roce_db *db,
+ int order)
+{
+ struct hns_roce_db_pgdir *pgdir;
+ int ret = 0;
+
+ mutex_lock(&hr_dev->pgdir_mutex);
+
+ list_for_each_entry(pgdir, &hr_dev->pgdir_list, list)
+ if (!hns_roce_alloc_db_from_pgdir(pgdir, db, order))
+ goto out;
+
+ pgdir = hns_roce_alloc_db_pgdir(hr_dev->dev);
+ if (!pgdir) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ list_add(&pgdir->list, &hr_dev->pgdir_list);
+
+ /* This should never fail -- we just allocated an empty page: */
+ WARN_ON(hns_roce_alloc_db_from_pgdir(pgdir, db, order));
+
+out:
+ mutex_unlock(&hr_dev->pgdir_mutex);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(hns_roce_alloc_db);
+
+void hns_roce_free_db(struct hns_roce_dev *hr_dev, struct hns_roce_db *db)
+{
+ int o;
+ int i;
+
+ mutex_lock(&hr_dev->pgdir_mutex);
+
+ o = db->order;
+ i = db->index;
+
+ if (db->order == 0 && test_bit(i ^ 1, db->u.pgdir->order0)) {
+ clear_bit(i ^ 1, db->u.pgdir->order0);
+ ++o;
+ }
+
+ i >>= o;
+ set_bit(i, db->u.pgdir->bits[o]);
+
+ if (bitmap_full(db->u.pgdir->order1, HNS_ROCE_DB_PER_PAGE / 2)) {
+ dma_free_coherent(hr_dev->dev, PAGE_SIZE, db->u.pgdir->page,
+ db->u.pgdir->db_dma);
+ list_del(&db->u.pgdir->list);
+ kfree(db->u.pgdir);
+ }
+
+ mutex_unlock(&hr_dev->pgdir_mutex);
+}
+EXPORT_SYMBOL_GPL(hns_roce_free_db);
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index 165a09b3..fb305b7 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -105,6 +105,14 @@
#define PAGES_SHIFT_24 24
#define PAGES_SHIFT_32 32
+enum {
+ HNS_ROCE_SUPPORT_RQ_RECORD_DB = 1 << 0,
+};
+
+enum {
+ HNS_ROCE_SUPPORT_CQ_RECORD_DB = 1 << 0,
+};
+
enum hns_roce_qp_state {
HNS_ROCE_QP_STATE_RST,
HNS_ROCE_QP_STATE_INIT,
@@ -178,7 +186,8 @@ enum {
enum {
HNS_ROCE_CAP_FLAG_REREG_MR = BIT(0),
HNS_ROCE_CAP_FLAG_ROCE_V1_V2 = BIT(1),
- HNS_ROCE_CAP_FLAG_RQ_INLINE = BIT(2)
+ HNS_ROCE_CAP_FLAG_RQ_INLINE = BIT(2),
+ HNS_ROCE_CAP_FLAG_RECORD_DB = BIT(3)
};
enum hns_roce_mtt_type {
@@ -186,6 +195,10 @@ enum hns_roce_mtt_type {
MTT_TYPE_CQE,
};
+enum {
+ HNS_ROCE_DB_PER_PAGE = PAGE_SIZE / 4
+};
+
#define HNS_ROCE_CMD_SUCCESS 1
#define HNS_ROCE_PORT_DOWN 0
@@ -203,6 +216,8 @@ struct hns_roce_uar {
struct hns_roce_ucontext {
struct ib_ucontext ibucontext;
struct hns_roce_uar uar;
+ struct list_head page_list;
+ struct mutex page_mutex;
};
struct hns_roce_pd {
@@ -335,6 +350,33 @@ struct hns_roce_buf {
int page_shift;
};
+struct hns_roce_db_pgdir {
+ struct list_head list;
+ DECLARE_BITMAP(order0, HNS_ROCE_DB_PER_PAGE);
+ DECLARE_BITMAP(order1, HNS_ROCE_DB_PER_PAGE / 2);
+ unsigned long *bits[2];
+ u32 *page;
+ dma_addr_t db_dma;
+};
+
+struct hns_roce_user_db_page {
+ struct list_head list;
+ struct ib_umem *umem;
+ unsigned long user_virt;
+ refcount_t refcount;
+};
+
+struct hns_roce_db {
+ u32 *db_record;
+ union {
+ struct hns_roce_db_pgdir *pgdir;
+ struct hns_roce_user_db_page *user_page;
+ } u;
+ dma_addr_t dma;
+ int index;
+ int order;
+};
+
struct hns_roce_cq_buf {
struct hns_roce_buf hr_buf;
struct hns_roce_mtt hr_mtt;
@@ -343,6 +385,8 @@ struct hns_roce_cq_buf {
struct hns_roce_cq {
struct ib_cq ib_cq;
struct hns_roce_cq_buf hr_buf;
+ struct hns_roce_db db;
+ u8 db_en;
spinlock_t lock;
struct ib_umem *umem;
void (*comp)(struct hns_roce_cq *cq);
@@ -351,6 +395,7 @@ struct hns_roce_cq {
struct hns_roce_uar *uar;
u32 cq_depth;
u32 cons_index;
+ u32 *set_ci_db;
void __iomem *cq_db_l;
u16 *tptr_addr;
int arm_sn;
@@ -466,6 +511,8 @@ struct hns_roce_qp {
struct ib_qp ibqp;
struct hns_roce_buf hr_buf;
struct hns_roce_wq rq;
+ struct hns_roce_db rdb;
+ u8 rdb_en;
u32 doorbell_qpn;
__le32 sq_signal_bits;
u32 sq_next_wqe;
@@ -725,6 +772,8 @@ struct hns_roce_dev {
spinlock_t bt_cmd_lock;
struct hns_roce_ib_iboe iboe;
+ struct list_head pgdir_list;
+ struct mutex pgdir_mutex;
int irq[HNS_ROCE_MAX_IRQ_NUM];
u8 __iomem *reg_base;
struct hns_roce_caps caps;
@@ -930,6 +979,14 @@ struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev,
int hns_roce_ib_destroy_cq(struct ib_cq *ib_cq);
void hns_roce_free_cq(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq);
+int hns_roce_db_map_user(struct hns_roce_ucontext *context, unsigned long virt,
+ struct hns_roce_db *db);
+void hns_roce_db_unmap_user(struct hns_roce_ucontext *context,
+ struct hns_roce_db *db);
+int hns_roce_alloc_db(struct hns_roce_dev *hr_dev, struct hns_roce_db *db,
+ int order);
+void hns_roce_free_db(struct hns_roce_dev *hr_dev, struct hns_roce_db *db);
+
void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn);
void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type);
void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type);
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
index da13bd7..47e1b6a 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
@@ -1687,13 +1687,13 @@ static int hns_roce_v1_post_mbox(struct hns_roce_dev *hr_dev, u64 in_param,
roce_set_field(val, ROCEE_MB6_ROCEE_MB_TOKEN_M,
ROCEE_MB6_ROCEE_MB_TOKEN_S, token);
- __raw_writeq(cpu_to_le64(in_param), hcr + 0);
- __raw_writeq(cpu_to_le64(out_param), hcr + 2);
- __raw_writel(cpu_to_le32(in_modifier), hcr + 4);
+ writeq(in_param, hcr + 0);
+ writeq(out_param, hcr + 2);
+ writel(in_modifier, hcr + 4);
/* Memory barrier */
wmb();
- __raw_writel(cpu_to_le32(val), hcr + 5);
+ writel(val, hcr + 5);
mmiowb();
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index ec63877..8b84ab7 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -498,7 +498,6 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
struct hns_roce_v2_wqe_data_seg *dseg;
struct hns_roce_rinl_sge *sge_list;
struct device *dev = hr_dev->dev;
- struct hns_roce_v2_db rq_db;
unsigned long flags;
void *wqe = NULL;
int ret = 0;
@@ -509,7 +508,7 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
spin_lock_irqsave(&hr_qp->rq.lock, flags);
ind = hr_qp->rq.head & (hr_qp->rq.wqe_cnt - 1);
- if (hr_qp->state == IB_QPS_RESET || hr_qp->state == IB_QPS_ERR) {
+ if (hr_qp->state == IB_QPS_RESET) {
spin_unlock_irqrestore(&hr_qp->rq.lock, flags);
*bad_wr = wr;
return -EINVAL;
@@ -564,17 +563,7 @@ out:
/* Memory barrier */
wmb();
- rq_db.byte_4 = 0;
- rq_db.parameter = 0;
-
- roce_set_field(rq_db.byte_4, V2_DB_BYTE_4_TAG_M,
- V2_DB_BYTE_4_TAG_S, hr_qp->qpn);
- roce_set_field(rq_db.byte_4, V2_DB_BYTE_4_CMD_M,
- V2_DB_BYTE_4_CMD_S, HNS_ROCE_V2_RQ_DB);
- roce_set_field(rq_db.parameter, V2_DB_PARAMETER_CONS_IDX_M,
- V2_DB_PARAMETER_CONS_IDX_S, hr_qp->rq.head);
-
- hns_roce_write64_k((__le32 *)&rq_db, hr_qp->rq.db_reg_l);
+ *hr_qp->rdb.db_record = hr_qp->rq.head & 0xffff;
}
spin_unlock_irqrestore(&hr_qp->rq.lock, flags);
@@ -1168,7 +1157,8 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
caps->flags = HNS_ROCE_CAP_FLAG_REREG_MR |
HNS_ROCE_CAP_FLAG_ROCE_V1_V2 |
- HNS_ROCE_CAP_FLAG_RQ_INLINE;
+ HNS_ROCE_CAP_FLAG_RQ_INLINE |
+ HNS_ROCE_CAP_FLAG_RECORD_DB;
caps->pkey_table_len[0] = 1;
caps->gid_table_len[0] = HNS_ROCE_V2_GID_INDEX_NUM;
caps->ceqe_depth = HNS_ROCE_V2_COMP_EQE_NUM;
@@ -1228,14 +1218,14 @@ static int hns_roce_v2_post_mbox(struct hns_roce_dev *hr_dev, u64 in_param,
roce_set_field(val1, HNS_ROCE_VF_MB5_TOKEN_MASK,
HNS_ROCE_VF_MB5_TOKEN_SHIFT, token);
- __raw_writeq(cpu_to_le64(in_param), hcr + 0);
- __raw_writeq(cpu_to_le64(out_param), hcr + 2);
+ writeq(in_param, hcr + 0);
+ writeq(out_param, hcr + 2);
/* Memory barrier */
wmb();
- __raw_writel(cpu_to_le32(val0), hcr + 4);
- __raw_writel(cpu_to_le32(val1), hcr + 5);
+ writel(val0, hcr + 4);
+ writel(val1, hcr + 5);
mmiowb();
@@ -1507,24 +1497,7 @@ static struct hns_roce_v2_cqe *next_cqe_sw_v2(struct hns_roce_cq *hr_cq)
static void hns_roce_v2_cq_set_ci(struct hns_roce_cq *hr_cq, u32 cons_index)
{
- struct hns_roce_v2_cq_db cq_db;
-
- cq_db.byte_4 = 0;
- cq_db.parameter = 0;
-
- roce_set_field(cq_db.byte_4, V2_CQ_DB_BYTE_4_TAG_M,
- V2_CQ_DB_BYTE_4_TAG_S, hr_cq->cqn);
- roce_set_field(cq_db.byte_4, V2_CQ_DB_BYTE_4_CMD_M,
- V2_CQ_DB_BYTE_4_CMD_S, HNS_ROCE_V2_CQ_DB_PTR);
-
- roce_set_field(cq_db.parameter, V2_CQ_DB_PARAMETER_CONS_IDX_M,
- V2_CQ_DB_PARAMETER_CONS_IDX_S,
- cons_index & ((hr_cq->cq_depth << 1) - 1));
- roce_set_field(cq_db.parameter, V2_CQ_DB_PARAMETER_CMD_SN_M,
- V2_CQ_DB_PARAMETER_CMD_SN_S, 1);
-
- hns_roce_write64_k((__be32 *)&cq_db, hr_cq->cq_db_l);
-
+ *hr_cq->set_ci_db = cons_index & 0xffffff;
}
static void __hns_roce_v2_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn,
@@ -1637,6 +1610,16 @@ static void hns_roce_v2_write_cqc(struct hns_roce_dev *hr_dev,
roce_set_field(cq_context->byte_40_cqe_ba, V2_CQC_BYTE_40_CQE_BA_M,
V2_CQC_BYTE_40_CQE_BA_S, (dma_handle >> (32 + 3)));
+ if (hr_cq->db_en)
+ roce_set_bit(cq_context->byte_44_db_record,
+ V2_CQC_BYTE_44_DB_RECORD_EN_S, 1);
+
+ roce_set_field(cq_context->byte_44_db_record,
+ V2_CQC_BYTE_44_DB_RECORD_ADDR_M,
+ V2_CQC_BYTE_44_DB_RECORD_ADDR_S,
+ ((u32)hr_cq->db.dma) >> 1);
+ cq_context->db_record_addr = hr_cq->db.dma >> 32;
+
roce_set_field(cq_context->byte_56_cqe_period_maxcnt,
V2_CQC_BYTE_56_CQ_MAX_CNT_M,
V2_CQC_BYTE_56_CQ_MAX_CNT_S,
@@ -2274,6 +2257,23 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
hr_qp->qkey = attr->qkey;
}
+ if (hr_qp->rdb_en) {
+ roce_set_bit(context->byte_68_rq_db,
+ V2_QPC_BYTE_68_RQ_RECORD_EN_S, 1);
+ roce_set_bit(qpc_mask->byte_68_rq_db,
+ V2_QPC_BYTE_68_RQ_RECORD_EN_S, 0);
+ }
+
+ roce_set_field(context->byte_68_rq_db,
+ V2_QPC_BYTE_68_RQ_DB_RECORD_ADDR_M,
+ V2_QPC_BYTE_68_RQ_DB_RECORD_ADDR_S,
+ ((u32)hr_qp->rdb.dma) >> 1);
+ roce_set_field(qpc_mask->byte_68_rq_db,
+ V2_QPC_BYTE_68_RQ_DB_RECORD_ADDR_M,
+ V2_QPC_BYTE_68_RQ_DB_RECORD_ADDR_S, 0);
+ context->rq_db_record_addr = hr_qp->rdb.dma >> 32;
+ qpc_mask->rq_db_record_addr = 0;
+
roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RQIE_S, 1);
roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_RQIE_S, 0);
@@ -3211,6 +3211,8 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
hr_qp->sq.tail = 0;
hr_qp->sq_next_wqe = 0;
hr_qp->next_sge = 0;
+ if (hr_qp->rq.wqe_cnt)
+ *hr_qp->rdb.db_record = 0;
}
out:
@@ -3437,11 +3439,17 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev,
hns_roce_mtt_cleanup(hr_dev, &hr_qp->mtt);
if (is_user) {
+ if (hr_qp->rq.wqe_cnt && (hr_qp->rdb_en == 1))
+ hns_roce_db_unmap_user(
+ to_hr_ucontext(hr_qp->ibqp.uobject->context),
+ &hr_qp->rdb);
ib_umem_release(hr_qp->umem);
} else {
kfree(hr_qp->sq.wrid);
kfree(hr_qp->rq.wrid);
hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf);
+ if (hr_qp->rq.wqe_cnt)
+ hns_roce_free_db(hr_dev, &hr_qp->rdb);
}
if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) {
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
index 2bf8a47..182b672 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
@@ -299,6 +299,9 @@ struct hns_roce_v2_cq_context {
#define V2_CQC_BYTE_44_DB_RECORD_EN_S 0
+#define V2_CQC_BYTE_44_DB_RECORD_ADDR_S 1
+#define V2_CQC_BYTE_44_DB_RECORD_ADDR_M GENMASK(31, 1)
+
#define V2_CQC_BYTE_52_CQE_CNT_S 0
#define V2_CQC_BYTE_52_CQE_CNT_M GENMASK(23, 0)
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index eb9a69f..9d48bc0 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -74,12 +74,11 @@ static int hns_roce_set_mac(struct hns_roce_dev *hr_dev, u8 port, u8 *addr)
return hr_dev->hw->set_mac(hr_dev, phy_port, addr);
}
-static int hns_roce_add_gid(struct ib_device *device, u8 port_num,
- unsigned int index, const union ib_gid *gid,
+static int hns_roce_add_gid(const union ib_gid *gid,
const struct ib_gid_attr *attr, void **context)
{
- struct hns_roce_dev *hr_dev = to_hr_dev(device);
- u8 port = port_num - 1;
+ struct hns_roce_dev *hr_dev = to_hr_dev(attr->device);
+ u8 port = attr->port_num - 1;
unsigned long flags;
int ret;
@@ -88,20 +87,20 @@ static int hns_roce_add_gid(struct ib_device *device, u8 port_num,
spin_lock_irqsave(&hr_dev->iboe.lock, flags);
- ret = hr_dev->hw->set_gid(hr_dev, port, index, (union ib_gid *)gid,
- attr);
+ ret = hr_dev->hw->set_gid(hr_dev, port, attr->index,
+ (union ib_gid *)gid, attr);
spin_unlock_irqrestore(&hr_dev->iboe.lock, flags);
return ret;
}
-static int hns_roce_del_gid(struct ib_device *device, u8 port_num,
- unsigned int index, void **context)
+static int hns_roce_del_gid(const struct ib_gid_attr *attr, void **context)
{
- struct hns_roce_dev *hr_dev = to_hr_dev(device);
+ struct hns_roce_dev *hr_dev = to_hr_dev(attr->device);
+ struct ib_gid_attr zattr = { };
union ib_gid zgid = { {0} };
- u8 port = port_num - 1;
+ u8 port = attr->port_num - 1;
unsigned long flags;
int ret;
@@ -110,7 +109,7 @@ static int hns_roce_del_gid(struct ib_device *device, u8 port_num,
spin_lock_irqsave(&hr_dev->iboe.lock, flags);
- ret = hr_dev->hw->set_gid(hr_dev, port, index, &zgid, NULL);
+ ret = hr_dev->hw->set_gid(hr_dev, port, attr->index, &zgid, &zattr);
spin_unlock_irqrestore(&hr_dev->iboe.lock, flags);
@@ -295,12 +294,6 @@ static enum rdma_link_layer hns_roce_get_link_layer(struct ib_device *device,
return IB_LINK_LAYER_ETHERNET;
}
-static int hns_roce_query_gid(struct ib_device *ib_dev, u8 port_num, int index,
- union ib_gid *gid)
-{
- return 0;
-}
-
static int hns_roce_query_pkey(struct ib_device *ib_dev, u8 port, u16 index,
u16 *pkey)
{
@@ -337,7 +330,7 @@ static struct ib_ucontext *hns_roce_alloc_ucontext(struct ib_device *ib_dev,
{
int ret = 0;
struct hns_roce_ucontext *context;
- struct hns_roce_ib_alloc_ucontext_resp resp;
+ struct hns_roce_ib_alloc_ucontext_resp resp = {};
struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev);
resp.qp_tab_size = hr_dev->caps.num_qps;
@@ -350,6 +343,11 @@ static struct ib_ucontext *hns_roce_alloc_ucontext(struct ib_device *ib_dev,
if (ret)
goto error_fail_uar_alloc;
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) {
+ INIT_LIST_HEAD(&context->page_list);
+ mutex_init(&context->page_mutex);
+ }
+
ret = ib_copy_to_udata(udata, &resp, sizeof(resp));
if (ret)
goto error_fail_copy_to_udata;
@@ -476,7 +474,6 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev)
ib_dev->modify_port = hns_roce_modify_port;
ib_dev->get_link_layer = hns_roce_get_link_layer;
ib_dev->get_netdev = hns_roce_get_netdev;
- ib_dev->query_gid = hns_roce_query_gid;
ib_dev->add_gid = hns_roce_add_gid;
ib_dev->del_gid = hns_roce_del_gid;
ib_dev->query_pkey = hns_roce_query_pkey;
@@ -520,6 +517,7 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev)
/* OTHERS */
ib_dev->get_port_immutable = hns_roce_port_immutable;
+ ib_dev->driver_id = RDMA_DRIVER_HNS;
ret = ib_register_device(ib_dev, NULL);
if (ret) {
dev_err(dev, "ib_register_device failed!\n");
@@ -659,6 +657,11 @@ static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev)
spin_lock_init(&hr_dev->sm_lock);
spin_lock_init(&hr_dev->bt_cmd_lock);
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) {
+ INIT_LIST_HEAD(&hr_dev->pgdir_list);
+ mutex_init(&hr_dev->pgdir_mutex);
+ }
+
ret = hns_roce_init_uar_table(hr_dev);
if (ret) {
dev_err(dev, "Failed to initialize uar table. aborting\n");
diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c
index da86a811..f7256d8 100644
--- a/drivers/infiniband/hw/hns/hns_roce_mr.c
+++ b/drivers/infiniband/hw/hns/hns_roce_mr.c
@@ -933,7 +933,7 @@ int hns_roce_ib_umem_write_mtt(struct hns_roce_dev *hr_dev,
ret = hns_roce_write_mtt(hr_dev, mtt, n, i, pages);
out:
- free_page((unsigned long) pages);
+ free_pages((unsigned long) pages, order);
return ret;
}
diff --git a/drivers/infiniband/hw/hns/hns_roce_pd.c b/drivers/infiniband/hw/hns/hns_roce_pd.c
index bdab218..4b41e04 100644
--- a/drivers/infiniband/hw/hns/hns_roce_pd.c
+++ b/drivers/infiniband/hw/hns/hns_roce_pd.c
@@ -32,6 +32,7 @@
#include <linux/platform_device.h>
#include <linux/pci.h>
+#include <uapi/rdma/hns-abi.h>
#include "hns_roce_device.h"
static int hns_roce_pd_alloc(struct hns_roce_dev *hr_dev, unsigned long *pdn)
@@ -77,7 +78,9 @@ struct ib_pd *hns_roce_alloc_pd(struct ib_device *ib_dev,
}
if (context) {
- if (ib_copy_to_udata(udata, &pd->pdn, sizeof(u64))) {
+ struct hns_roce_ib_alloc_pd_resp uresp = {.pdn = pd->pdn};
+
+ if (ib_copy_to_udata(udata, &uresp, sizeof(uresp))) {
hns_roce_pd_free(to_hr_dev(ib_dev), pd->pdn);
dev_err(dev, "[alloc_pd]ib_copy_to_udata failed!\n");
kfree(pd);
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c
index 088973a..e289a92 100644
--- a/drivers/infiniband/hw/hns/hns_roce_qp.c
+++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
@@ -489,6 +489,15 @@ static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev,
return 0;
}
+static int hns_roce_qp_has_rq(struct ib_qp_init_attr *attr)
+{
+ if (attr->qp_type == IB_QPT_XRC_INI ||
+ attr->qp_type == IB_QPT_XRC_TGT || attr->srq)
+ return 0;
+
+ return 1;
+}
+
static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
struct ib_pd *ib_pd,
struct ib_qp_init_attr *init_attr,
@@ -497,6 +506,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
{
struct device *dev = hr_dev->dev;
struct hns_roce_ib_create_qp ucmd;
+ struct hns_roce_ib_create_qp_resp resp = {};
unsigned long qpn = 0;
int ret = 0;
u32 page_shift;
@@ -602,6 +612,18 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
dev_err(dev, "hns_roce_ib_umem_write_mtt error for create qp\n");
goto err_mtt;
}
+
+ if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) &&
+ (udata->outlen >= sizeof(resp)) &&
+ hns_roce_qp_has_rq(init_attr)) {
+ ret = hns_roce_db_map_user(
+ to_hr_ucontext(ib_pd->uobject->context),
+ ucmd.db_addr, &hr_qp->rdb);
+ if (ret) {
+ dev_err(dev, "rp record doorbell map failed!\n");
+ goto err_mtt;
+ }
+ }
} else {
if (init_attr->create_flags &
IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) {
@@ -630,6 +652,16 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
hr_qp->rq.db_reg_l = hr_dev->reg_base + hr_dev->odb_offset +
DB_REG_OFFSET * hr_dev->priv_uar.index;
+ if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) &&
+ hns_roce_qp_has_rq(init_attr)) {
+ ret = hns_roce_alloc_db(hr_dev, &hr_qp->rdb, 0);
+ if (ret) {
+ dev_err(dev, "rq record doorbell alloc failed!\n");
+ goto err_rq_sge_list;
+ }
+ *hr_qp->rdb.db_record = 0;
+ }
+
/* Allocate QP buf */
page_shift = PAGE_SHIFT + hr_dev->caps.mtt_buf_pg_sz;
if (hns_roce_buf_alloc(hr_dev, hr_qp->buff_size,
@@ -637,7 +669,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
&hr_qp->hr_buf, page_shift)) {
dev_err(dev, "hns_roce_buf_alloc error!\n");
ret = -ENOMEM;
- goto err_rq_sge_list;
+ goto err_db;
}
hr_qp->mtt.mtt_type = MTT_TYPE_WQE;
@@ -698,17 +730,44 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
else
hr_qp->doorbell_qpn = cpu_to_le64(hr_qp->qpn);
+ if (ib_pd->uobject && (udata->outlen >= sizeof(resp)) &&
+ (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB)) {
+
+ /* indicate kernel supports record db */
+ resp.cap_flags |= HNS_ROCE_SUPPORT_RQ_RECORD_DB;
+ ret = ib_copy_to_udata(udata, &resp, sizeof(resp));
+ if (ret)
+ goto err_qp;
+
+ hr_qp->rdb_en = 1;
+ }
hr_qp->event = hns_roce_ib_qp_event;
return 0;
+err_qp:
+ if (init_attr->qp_type == IB_QPT_GSI &&
+ hr_dev->hw_rev == HNS_ROCE_HW_VER1)
+ hns_roce_qp_remove(hr_dev, hr_qp);
+ else
+ hns_roce_qp_free(hr_dev, hr_qp);
+
err_qpn:
if (!sqpn)
hns_roce_release_range_qp(hr_dev, qpn, 1);
err_wrid:
- kfree(hr_qp->sq.wrid);
- kfree(hr_qp->rq.wrid);
+ if (ib_pd->uobject) {
+ if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) &&
+ (udata->outlen >= sizeof(resp)) &&
+ hns_roce_qp_has_rq(init_attr))
+ hns_roce_db_unmap_user(
+ to_hr_ucontext(ib_pd->uobject->context),
+ &hr_qp->rdb);
+ } else {
+ kfree(hr_qp->sq.wrid);
+ kfree(hr_qp->rq.wrid);
+ }
err_mtt:
hns_roce_mtt_cleanup(hr_dev, &hr_qp->mtt);
@@ -719,6 +778,11 @@ err_buf:
else
hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf);
+err_db:
+ if (!ib_pd->uobject && hns_roce_qp_has_rq(init_attr) &&
+ (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB))
+ hns_roce_free_db(hr_dev, &hr_qp->rdb);
+
err_rq_sge_list:
if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE)
kfree(hr_qp->rq_inl_buf.wqe_list[0].sg_list);
diff --git a/drivers/infiniband/hw/i40iw/i40iw.h b/drivers/infiniband/hw/i40iw/i40iw.h
index bcddd70..d5d8c1b 100644
--- a/drivers/infiniband/hw/i40iw/i40iw.h
+++ b/drivers/infiniband/hw/i40iw/i40iw.h
@@ -60,7 +60,7 @@
#include <i40e_client.h>
#include "i40iw_type.h"
#include "i40iw_p.h"
-#include "i40iw_ucontext.h"
+#include <rdma/i40iw-abi.h>
#include "i40iw_pble.h"
#include "i40iw_verbs.h"
#include "i40iw_cm.h"
@@ -559,18 +559,25 @@ void i40iw_next_iw_state(struct i40iw_qp *iwqp,
u8 state, u8 del_hash,
u8 term, u8 term_len);
int i40iw_send_syn(struct i40iw_cm_node *cm_node, u32 sendack);
+int i40iw_send_reset(struct i40iw_cm_node *cm_node);
struct i40iw_cm_node *i40iw_find_node(struct i40iw_cm_core *cm_core,
u16 rem_port,
u32 *rem_addr,
u16 loc_port,
u32 *loc_addr,
- bool add_refcnt);
+ bool add_refcnt,
+ bool accelerated_list);
enum i40iw_status_code i40iw_hw_flush_wqes(struct i40iw_device *iwdev,
struct i40iw_sc_qp *qp,
struct i40iw_qp_flush_info *info,
bool wait);
+void i40iw_gen_ae(struct i40iw_device *iwdev,
+ struct i40iw_sc_qp *qp,
+ struct i40iw_gen_ae_info *info,
+ bool wait);
+
void i40iw_copy_ip_ntohl(u32 *dst, __be32 *src);
struct ib_mr *i40iw_reg_phys_mr(struct ib_pd *ib_pd,
u64 addr,
diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c
index abf4cd8..4cfa8f4 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_cm.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c
@@ -539,7 +539,7 @@ static struct i40iw_puda_buf *i40iw_form_cm_frame(struct i40iw_cm_node *cm_node,
* i40iw_send_reset - Send RST packet
* @cm_node: connection's node
*/
-static int i40iw_send_reset(struct i40iw_cm_node *cm_node)
+int i40iw_send_reset(struct i40iw_cm_node *cm_node)
{
struct i40iw_puda_buf *sqbuf;
int flags = SET_RST | SET_ACK;
@@ -1183,6 +1183,26 @@ static void i40iw_handle_close_entry(struct i40iw_cm_node *cm_node, u32 rem_node
}
/**
+ * i40iw_build_timer_list - Add cm_nodes to timer list
+ * @timer_list: ptr to timer list
+ * @hte: ptr to accelerated or non-accelerated list
+ */
+static void i40iw_build_timer_list(struct list_head *timer_list,
+ struct list_head *hte)
+{
+ struct i40iw_cm_node *cm_node;
+ struct list_head *list_core_temp, *list_node;
+
+ list_for_each_safe(list_node, list_core_temp, hte) {
+ cm_node = container_of(list_node, struct i40iw_cm_node, list);
+ if (cm_node->close_entry || cm_node->send_entry) {
+ atomic_inc(&cm_node->ref_count);
+ list_add(&cm_node->timer_entry, timer_list);
+ }
+ }
+}
+
+/**
* i40iw_cm_timer_tick - system's timer expired callback
* @pass: Pointing to cm_core
*/
@@ -1202,15 +1222,10 @@ static void i40iw_cm_timer_tick(struct timer_list *t)
struct list_head timer_list;
INIT_LIST_HEAD(&timer_list);
- spin_lock_irqsave(&cm_core->ht_lock, flags);
- list_for_each_safe(list_node, list_core_temp, &cm_core->connected_nodes) {
- cm_node = container_of(list_node, struct i40iw_cm_node, list);
- if (cm_node->close_entry || cm_node->send_entry) {
- atomic_inc(&cm_node->ref_count);
- list_add(&cm_node->timer_entry, &timer_list);
- }
- }
+ spin_lock_irqsave(&cm_core->ht_lock, flags);
+ i40iw_build_timer_list(&timer_list, &cm_core->non_accelerated_list);
+ i40iw_build_timer_list(&timer_list, &cm_core->accelerated_list);
spin_unlock_irqrestore(&cm_core->ht_lock, flags);
list_for_each_safe(list_node, list_core_temp, &timer_list) {
@@ -1406,19 +1421,22 @@ static int i40iw_send_fin(struct i40iw_cm_node *cm_node)
* @loc_port: local tcp port num
* @loc_addr: loc ip addr
* @add_refcnt: flag to increment refcount of cm_node
+ * @accelerated_list: flag for accelerated vs non-accelerated list to search
*/
struct i40iw_cm_node *i40iw_find_node(struct i40iw_cm_core *cm_core,
u16 rem_port,
u32 *rem_addr,
u16 loc_port,
u32 *loc_addr,
- bool add_refcnt)
+ bool add_refcnt,
+ bool accelerated_list)
{
struct list_head *hte;
struct i40iw_cm_node *cm_node;
unsigned long flags;
- hte = &cm_core->connected_nodes;
+ hte = accelerated_list ?
+ &cm_core->accelerated_list : &cm_core->non_accelerated_list;
/* walk list and find cm_node associated with this session ID */
spin_lock_irqsave(&cm_core->ht_lock, flags);
@@ -1487,22 +1505,40 @@ static struct i40iw_cm_listener *i40iw_find_listener(
static void i40iw_add_hte_node(struct i40iw_cm_core *cm_core,
struct i40iw_cm_node *cm_node)
{
- struct list_head *hte;
unsigned long flags;
if (!cm_node || !cm_core) {
i40iw_pr_err("cm_node or cm_core == NULL\n");
return;
}
- spin_lock_irqsave(&cm_core->ht_lock, flags);
- /* get a handle on the hash table element (list head for this slot) */
- hte = &cm_core->connected_nodes;
- list_add_tail(&cm_node->list, hte);
+ spin_lock_irqsave(&cm_core->ht_lock, flags);
+ list_add_tail(&cm_node->list, &cm_core->non_accelerated_list);
spin_unlock_irqrestore(&cm_core->ht_lock, flags);
}
/**
+ * i40iw_find_port - find port that matches reference port
+ * @port: port number
+ * @accelerated_list: flag for accelerated vs non-accelerated list
+ */
+static bool i40iw_find_port(struct i40iw_cm_core *cm_core, u16 port,
+ bool accelerated_list)
+{
+ struct list_head *hte;
+ struct i40iw_cm_node *cm_node;
+
+ hte = accelerated_list ?
+ &cm_core->accelerated_list : &cm_core->non_accelerated_list;
+
+ list_for_each_entry(cm_node, hte, list) {
+ if (cm_node->loc_port == port)
+ return true;
+ }
+ return false;
+}
+
+/**
* i40iw_port_in_use - determine if port is in use
* @port: port number
* @active_side: flag for listener side vs active side
@@ -1510,19 +1546,14 @@ static void i40iw_add_hte_node(struct i40iw_cm_core *cm_core,
static bool i40iw_port_in_use(struct i40iw_cm_core *cm_core, u16 port, bool active_side)
{
struct i40iw_cm_listener *listen_node;
- struct i40iw_cm_node *cm_node;
unsigned long flags;
bool ret = false;
if (active_side) {
- /* search connected node list */
spin_lock_irqsave(&cm_core->ht_lock, flags);
- list_for_each_entry(cm_node, &cm_core->connected_nodes, list) {
- if (cm_node->loc_port == port) {
- ret = true;
- break;
- }
- }
+ ret = i40iw_find_port(cm_core, port, true);
+ if (!ret)
+ ret = i40iw_find_port(cm_core, port, false);
if (!ret)
clear_bit(port, cm_core->active_side_ports);
spin_unlock_irqrestore(&cm_core->ht_lock, flags);
@@ -1829,9 +1860,11 @@ static int i40iw_dec_refcnt_listen(struct i40iw_cm_core *cm_core,
INIT_LIST_HEAD(&reset_list);
if (free_hanging_nodes) {
spin_lock_irqsave(&cm_core->ht_lock, flags);
- list_for_each_safe(list_pos, list_temp, &cm_core->connected_nodes) {
+ list_for_each_safe(list_pos,
+ list_temp, &cm_core->non_accelerated_list) {
cm_node = container_of(list_pos, struct i40iw_cm_node, list);
- if ((cm_node->listener == listener) && !cm_node->accelerated) {
+ if ((cm_node->listener == listener) &&
+ !cm_node->accelerated) {
atomic_inc(&cm_node->ref_count);
list_add(&cm_node->reset_entry, &reset_list);
}
@@ -3144,7 +3177,8 @@ void i40iw_receive_ilq(struct i40iw_sc_vsi *vsi, struct i40iw_puda_buf *rbuf)
cm_info.rem_addr,
cm_info.loc_port,
cm_info.loc_addr,
- true);
+ true,
+ false);
if (!cm_node) {
/* Only type of packet accepted are for */
@@ -3202,7 +3236,8 @@ void i40iw_setup_cm_core(struct i40iw_device *iwdev)
cm_core->iwdev = iwdev;
cm_core->dev = &iwdev->sc_dev;
- INIT_LIST_HEAD(&cm_core->connected_nodes);
+ INIT_LIST_HEAD(&cm_core->accelerated_list);
+ INIT_LIST_HEAD(&cm_core->non_accelerated_list);
INIT_LIST_HEAD(&cm_core->listen_nodes);
timer_setup(&cm_core->tcp_timer, i40iw_cm_timer_tick, 0);
@@ -3585,6 +3620,7 @@ int i40iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
struct i40iw_qp *iwqp;
struct i40iw_device *iwdev;
struct i40iw_sc_dev *dev;
+ struct i40iw_cm_core *cm_core;
struct i40iw_cm_node *cm_node;
struct ib_qp_attr attr;
int passive_state;
@@ -3594,6 +3630,7 @@ int i40iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
struct i40iw_kmem_info accept;
enum i40iw_status_code status;
u64 tagged_offset;
+ unsigned long flags;
memset(&attr, 0, sizeof(attr));
ibqp = i40iw_get_qp(cm_id->device, conn_param->qpn);
@@ -3603,6 +3640,7 @@ int i40iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
iwqp = to_iwqp(ibqp);
iwdev = iwqp->iwdev;
dev = &iwdev->sc_dev;
+ cm_core = &iwdev->cm_core;
cm_node = (struct i40iw_cm_node *)cm_id->provider_data;
if (((struct sockaddr_in *)&cm_id->local_addr)->sin_family == AF_INET) {
@@ -3697,6 +3735,10 @@ int i40iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
i40iw_modify_qp(&iwqp->ibqp, &attr, IB_QP_STATE, NULL);
cm_node->accelerated = true;
+ spin_lock_irqsave(&cm_core->ht_lock, flags);
+ list_move_tail(&cm_node->list, &cm_core->accelerated_list);
+ spin_unlock_irqrestore(&cm_core->ht_lock, flags);
+
status =
i40iw_send_cm_event(cm_node, cm_id, IW_CM_EVENT_ESTABLISHED, 0);
if (status)
@@ -4026,10 +4068,12 @@ static void i40iw_cm_event_connected(struct i40iw_cm_event *event)
{
struct i40iw_qp *iwqp;
struct i40iw_device *iwdev;
+ struct i40iw_cm_core *cm_core;
struct i40iw_cm_node *cm_node;
struct i40iw_sc_dev *dev;
struct ib_qp_attr attr;
struct iw_cm_id *cm_id;
+ unsigned long flags;
int status;
bool read0;
@@ -4038,6 +4082,7 @@ static void i40iw_cm_event_connected(struct i40iw_cm_event *event)
iwqp = (struct i40iw_qp *)cm_id->provider_data;
iwdev = to_iwdev(iwqp->ibqp.device);
dev = &iwdev->sc_dev;
+ cm_core = &iwdev->cm_core;
if (iwqp->destroyed) {
status = -ETIMEDOUT;
@@ -4057,6 +4102,9 @@ static void i40iw_cm_event_connected(struct i40iw_cm_event *event)
i40iw_modify_qp(&iwqp->ibqp, &attr, IB_QP_STATE, NULL);
cm_node->accelerated = true;
+ spin_lock_irqsave(&cm_core->ht_lock, flags);
+ list_move_tail(&cm_node->list, &cm_core->accelerated_list);
+ spin_unlock_irqrestore(&cm_core->ht_lock, flags);
status = i40iw_send_cm_event(cm_node, cm_id, IW_CM_EVENT_CONNECT_REPLY,
0);
if (status)
@@ -4256,25 +4304,38 @@ void i40iw_cm_teardown_connections(struct i40iw_device *iwdev, u32 *ipaddr,
struct list_head *list_node;
struct i40iw_cm_node *cm_node;
unsigned long flags;
- struct list_head connected_list;
+ struct list_head teardown_list;
struct ib_qp_attr attr;
- INIT_LIST_HEAD(&connected_list);
+ INIT_LIST_HEAD(&teardown_list);
spin_lock_irqsave(&cm_core->ht_lock, flags);
- list_for_each_safe(list_node, list_core_temp, &cm_core->connected_nodes) {
+ list_for_each_safe(list_node, list_core_temp,
+ &cm_core->accelerated_list) {
+ cm_node = container_of(list_node, struct i40iw_cm_node, list);
+ if (disconnect_all ||
+ (nfo->vlan_id == cm_node->vlan_id &&
+ (!memcmp(cm_node->loc_addr, ipaddr, nfo->ipv4 ? 4 : 16) ||
+ !memcmp(cm_node->rem_addr, ipaddr, nfo->ipv4 ? 4 : 16)))) {
+ atomic_inc(&cm_node->ref_count);
+ list_add(&cm_node->teardown_entry, &teardown_list);
+ }
+ }
+ list_for_each_safe(list_node, list_core_temp,
+ &cm_core->non_accelerated_list) {
cm_node = container_of(list_node, struct i40iw_cm_node, list);
if (disconnect_all ||
(nfo->vlan_id == cm_node->vlan_id &&
(!memcmp(cm_node->loc_addr, ipaddr, nfo->ipv4 ? 4 : 16) ||
!memcmp(cm_node->rem_addr, ipaddr, nfo->ipv4 ? 4 : 16)))) {
atomic_inc(&cm_node->ref_count);
- list_add(&cm_node->connected_entry, &connected_list);
+ list_add(&cm_node->teardown_entry, &teardown_list);
}
}
spin_unlock_irqrestore(&cm_core->ht_lock, flags);
- list_for_each_safe(list_node, list_core_temp, &connected_list) {
- cm_node = container_of(list_node, struct i40iw_cm_node, connected_entry);
+ list_for_each_safe(list_node, list_core_temp, &teardown_list) {
+ cm_node = container_of(list_node, struct i40iw_cm_node,
+ teardown_entry);
attr.qp_state = IB_QPS_ERR;
i40iw_modify_qp(&cm_node->iwqp->ibqp, &attr, IB_QP_STATE, NULL);
if (iwdev->reset)
diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.h b/drivers/infiniband/hw/i40iw/i40iw_cm.h
index cf60c45..78ba36a 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_cm.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_cm.h
@@ -341,7 +341,7 @@ struct i40iw_cm_node {
int accept_pend;
struct list_head timer_entry;
struct list_head reset_entry;
- struct list_head connected_entry;
+ struct list_head teardown_entry;
atomic_t passive_state;
bool qhash_set;
u8 user_pri;
@@ -403,7 +403,8 @@ struct i40iw_cm_core {
struct i40iw_sc_dev *dev;
struct list_head listen_nodes;
- struct list_head connected_nodes;
+ struct list_head accelerated_list;
+ struct list_head non_accelerated_list;
struct timer_list tcp_timer;
diff --git a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c
index c74fd33..4d841a3 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c
@@ -2614,10 +2614,8 @@ static enum i40iw_status_code i40iw_sc_qp_flush_wqes(
qp->flush_sq |= flush_sq;
qp->flush_rq |= flush_rq;
- if (!flush_sq && !flush_rq) {
- if (info->ae_code != I40IW_AE_LLP_RECEIVED_MPA_CRC_ERROR)
- return 0;
- }
+ if (!flush_sq && !flush_rq)
+ return 0;
cqp = qp->pd->dev->cqp;
wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
@@ -2659,6 +2657,49 @@ static enum i40iw_status_code i40iw_sc_qp_flush_wqes(
}
/**
+ * i40iw_sc_gen_ae - generate AE, currently uses flush WQE CQP OP
+ * @qp: sc qp
+ * @info: gen ae information
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_gen_ae(
+ struct i40iw_sc_qp *qp,
+ struct i40iw_gen_ae_info *info,
+ u64 scratch,
+ bool post_sq)
+{
+ u64 temp;
+ u64 *wqe;
+ struct i40iw_sc_cqp *cqp;
+ u64 header;
+
+ cqp = qp->pd->dev->cqp;
+ wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return I40IW_ERR_RING_FULL;
+
+ temp = info->ae_code |
+ LS_64(info->ae_source, I40IW_CQPSQ_FWQE_AESOURCE);
+
+ set_64bit_val(wqe, 8, temp);
+
+ header = qp->qp_uk.qp_id |
+ LS_64(I40IW_CQP_OP_GEN_AE, I40IW_CQPSQ_OPCODE) |
+ LS_64(1, I40IW_CQPSQ_FWQE_GENERATE_AE) |
+ LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+ i40iw_insert_wqe_hdr(wqe, header);
+
+ i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "GEN_AE WQE",
+ wqe, I40IW_CQP_WQE_SIZE * 8);
+
+ if (post_sq)
+ i40iw_sc_cqp_post_sq(cqp);
+ return 0;
+}
+
+/**
* i40iw_sc_qp_upload_context - upload qp's context
* @dev: sc device struct
* @info: upload context info ptr for return
@@ -4148,6 +4189,13 @@ static enum i40iw_status_code i40iw_exec_cqp_cmd(struct i40iw_sc_dev *dev,
pcmdinfo->in.u.qp_flush_wqes.
scratch, pcmdinfo->post_sq);
break;
+ case OP_GEN_AE:
+ status = i40iw_sc_gen_ae(
+ pcmdinfo->in.u.gen_ae.qp,
+ &pcmdinfo->in.u.gen_ae.info,
+ pcmdinfo->in.u.gen_ae.scratch,
+ pcmdinfo->post_sq);
+ break;
case OP_ADD_ARP_CACHE_ENTRY:
status = i40iw_sc_add_arp_cache_entry(
pcmdinfo->in.u.add_arp_cache_entry.cqp,
diff --git a/drivers/infiniband/hw/i40iw/i40iw_d.h b/drivers/infiniband/hw/i40iw/i40iw_d.h
index 4b65e41..6ddaeec 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_d.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_d.h
@@ -418,6 +418,8 @@
#define I40IW_CQP_OP_QUERY_FPM_VALUES 0x20
#define I40IW_CQP_OP_COMMIT_FPM_VALUES 0x21
#define I40IW_CQP_OP_FLUSH_WQES 0x22
+/* I40IW_CQP_OP_GEN_AE is the same value as I40IW_CQP_OP_FLUSH_WQES */
+#define I40IW_CQP_OP_GEN_AE 0x22
#define I40IW_CQP_OP_MANAGE_APBVT 0x23
#define I40IW_CQP_OP_NOP 0x24
#define I40IW_CQP_OP_MANAGE_QUAD_HASH_TABLE_ENTRY 0x25
@@ -1729,6 +1731,7 @@ enum i40iw_alignment {
#define OP_COMMIT_FPM_VALUES 30
#define OP_REQUESTED_COMMANDS 31
#define OP_COMPLETED_COMMANDS 32
-#define OP_SIZE_CQP_STAT_ARRAY 33
+#define OP_GEN_AE 33
+#define OP_SIZE_CQP_STAT_ARRAY 34
#endif
diff --git a/drivers/infiniband/hw/i40iw/i40iw_hw.c b/drivers/infiniband/hw/i40iw/i40iw_hw.c
index 61540e1..6139836 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_hw.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_hw.c
@@ -352,6 +352,8 @@ void i40iw_process_aeq(struct i40iw_device *iwdev)
else
i40iw_cm_disconn(iwqp);
break;
+ case I40IW_AE_BAD_CLOSE:
+ /* fall through */
case I40IW_AE_RESET_SENT:
i40iw_next_iw_state(iwqp, I40IW_QP_STATE_ERROR, 1, 0, 0);
i40iw_cm_disconn(iwqp);
@@ -668,6 +670,39 @@ enum i40iw_status_code i40iw_hw_flush_wqes(struct i40iw_device *iwdev,
}
/**
+ * i40iw_gen_ae - generate AE
+ * @iwdev: iwarp device
+ * @qp: qp associated with AE
+ * @info: info for ae
+ * @wait: wait for completion
+ */
+void i40iw_gen_ae(struct i40iw_device *iwdev,
+ struct i40iw_sc_qp *qp,
+ struct i40iw_gen_ae_info *info,
+ bool wait)
+{
+ struct i40iw_gen_ae_info *ae_info;
+ struct i40iw_cqp_request *cqp_request;
+ struct cqp_commands_info *cqp_info;
+
+ cqp_request = i40iw_get_cqp_request(&iwdev->cqp, wait);
+ if (!cqp_request)
+ return;
+
+ cqp_info = &cqp_request->info;
+ ae_info = &cqp_request->info.in.u.gen_ae.info;
+ memcpy(ae_info, info, sizeof(*ae_info));
+
+ cqp_info->cqp_cmd = OP_GEN_AE;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.gen_ae.qp = qp;
+ cqp_info->in.u.gen_ae.scratch = (uintptr_t)cqp_request;
+ if (i40iw_handle_cqp_op(iwdev, cqp_request))
+ i40iw_pr_err("CQP OP failed attempting to generate ae_code=0x%x\n",
+ info->ae_code);
+}
+
+/**
* i40iw_hw_manage_vf_pble_bp - manage vf pbles
* @iwdev: iwarp device
* @info: info for managing pble
diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c
index b088629..9cd0d3e 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_main.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_main.c
@@ -1560,8 +1560,6 @@ static enum i40iw_status_code i40iw_setup_init_state(struct i40iw_handler *hdl,
enum i40iw_status_code status;
memcpy(&hdl->ldev, ldev, sizeof(*ldev));
- if (resource_profile == 1)
- resource_profile = 2;
iwdev->mpa_version = mpa_version;
iwdev->resource_profile = (resource_profile < I40IW_HMC_PROFILE_EQUAL) ?
diff --git a/drivers/infiniband/hw/i40iw/i40iw_puda.c b/drivers/infiniband/hw/i40iw/i40iw_puda.c
index 4c21197..d9c7ae6 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_puda.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_puda.c
@@ -348,8 +348,8 @@ enum i40iw_status_code i40iw_puda_poll_completion(struct i40iw_sc_dev *dev,
spin_lock_irqsave(&rsrc->bufpool_lock, flags);
rsrc->tx_wqe_avail_cnt++;
spin_unlock_irqrestore(&rsrc->bufpool_lock, flags);
- if (!list_empty(&rsrc->vsi->ilq->txpend))
- i40iw_puda_send_buf(rsrc->vsi->ilq, NULL);
+ if (!list_empty(&rsrc->txpend))
+ i40iw_puda_send_buf(rsrc, NULL);
}
done:
@@ -1471,10 +1471,6 @@ static void i40iw_ieq_tx_compl(struct i40iw_sc_vsi *vsi, void *sqwrid)
struct i40iw_puda_buf *buf = (struct i40iw_puda_buf *)sqwrid;
i40iw_puda_ret_bufpool(ieq, buf);
- if (!list_empty(&ieq->txpend)) {
- buf = i40iw_puda_get_listbuf(&ieq->txpend);
- i40iw_puda_send_buf(ieq, buf);
- }
}
/**
diff --git a/drivers/infiniband/hw/i40iw/i40iw_type.h b/drivers/infiniband/hw/i40iw/i40iw_type.h
index a27d392..adc8d2e 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_type.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_type.h
@@ -1004,6 +1004,11 @@ struct i40iw_cqp_query_fpm_values {
u32 pbl_max;
};
+struct i40iw_gen_ae_info {
+ u16 ae_code;
+ u8 ae_source;
+};
+
struct i40iw_cqp_ops {
enum i40iw_status_code (*cqp_init)(struct i40iw_sc_cqp *,
struct i40iw_cqp_init_info *);
@@ -1291,6 +1296,12 @@ struct cqp_info {
} qp_flush_wqes;
struct {
+ struct i40iw_sc_qp *qp;
+ struct i40iw_gen_ae_info info;
+ u64 scratch;
+ } gen_ae;
+
+ struct {
struct i40iw_sc_cqp *cqp;
void *fpm_values_va;
u64 fpm_values_pa;
diff --git a/drivers/infiniband/hw/i40iw/i40iw_utils.c b/drivers/infiniband/hw/i40iw/i40iw_utils.c
index ddc1056..a9ea966 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_utils.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_utils.c
@@ -1284,15 +1284,13 @@ void i40iw_cqp_qp_destroy_cmd(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp)
*/
void i40iw_ieq_mpa_crc_ae(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp)
{
- struct i40iw_qp_flush_info info;
+ struct i40iw_gen_ae_info info;
struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
i40iw_debug(dev, I40IW_DEBUG_AEQ, "%s entered\n", __func__);
- memset(&info, 0, sizeof(info));
info.ae_code = I40IW_AE_LLP_RECEIVED_MPA_CRC_ERROR;
- info.generate_ae = true;
- info.ae_source = 0x3;
- (void)i40iw_hw_flush_wqes(iwdev, qp, &info, false);
+ info.ae_source = I40IW_AE_SOURCE_RQ;
+ i40iw_gen_ae(iwdev, qp, &info, false);
}
/**
@@ -1407,7 +1405,7 @@ struct i40iw_sc_qp *i40iw_ieq_get_qp(struct i40iw_sc_dev *dev,
rem_port = ntohs(tcph->source);
cm_node = i40iw_find_node(&iwdev->cm_core, rem_port, rem_addr, loc_port,
- loc_addr, false);
+ loc_addr, false, true);
if (!cm_node)
return NULL;
iwqp = cm_node->iwqp;
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
index 70024e8..40e4f5a 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
@@ -38,6 +38,7 @@
#include <linux/highmem.h>
#include <linux/time.h>
#include <linux/hugetlb.h>
+#include <linux/irq.h>
#include <asm/byteorder.h>
#include <net/ip.h>
#include <rdma/ib_verbs.h>
@@ -830,10 +831,10 @@ static int i40iw_query_qp(struct ib_qp *ibqp,
void i40iw_hw_modify_qp(struct i40iw_device *iwdev, struct i40iw_qp *iwqp,
struct i40iw_modify_qp_info *info, bool wait)
{
- enum i40iw_status_code status;
struct i40iw_cqp_request *cqp_request;
struct cqp_commands_info *cqp_info;
struct i40iw_modify_qp_info *m_info;
+ struct i40iw_gen_ae_info ae_info;
cqp_request = i40iw_get_cqp_request(&iwdev->cqp, wait);
if (!cqp_request)
@@ -846,9 +847,25 @@ void i40iw_hw_modify_qp(struct i40iw_device *iwdev, struct i40iw_qp *iwqp,
cqp_info->post_sq = 1;
cqp_info->in.u.qp_modify.qp = &iwqp->sc_qp;
cqp_info->in.u.qp_modify.scratch = (uintptr_t)cqp_request;
- status = i40iw_handle_cqp_op(iwdev, cqp_request);
- if (status)
- i40iw_pr_err("CQP-OP Modify QP fail");
+ if (!i40iw_handle_cqp_op(iwdev, cqp_request))
+ return;
+
+ switch (m_info->next_iwarp_state) {
+ case I40IW_QP_STATE_RTS:
+ if (iwqp->iwarp_state == I40IW_QP_STATE_IDLE)
+ i40iw_send_reset(iwqp->cm_node);
+ /* fall through */
+ case I40IW_QP_STATE_IDLE:
+ case I40IW_QP_STATE_TERMINATE:
+ case I40IW_QP_STATE_CLOSING:
+ ae_info.ae_code = I40IW_AE_BAD_CLOSE;
+ ae_info.ae_source = 0;
+ i40iw_gen_ae(iwdev, &iwqp->sc_qp, &ae_info, false);
+ break;
+ case I40IW_QP_STATE_ERROR:
+ default:
+ break;
+ }
}
/**
@@ -961,10 +978,6 @@ int i40iw_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
iwqp->ibqp_state = attr->qp_state;
- if (issue_modify_qp)
- iwqp->iwarp_state = info.next_iwarp_state;
- else
- info.next_iwarp_state = iwqp->iwarp_state;
}
if (attr_mask & IB_QP_ACCESS_FLAGS) {
ctx_info->iwarp_info_valid = true;
@@ -1002,9 +1015,14 @@ int i40iw_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
spin_unlock_irqrestore(&iwqp->lock, flags);
- if (issue_modify_qp)
+ if (issue_modify_qp) {
i40iw_hw_modify_qp(iwdev, iwqp, &info, true);
+ spin_lock_irqsave(&iwqp->lock, flags);
+ iwqp->iwarp_state = info.next_iwarp_state;
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+ }
+
if (issue_modify_qp && (iwqp->ibqp_state > IB_QPS_RTS)) {
if (dont_wait) {
if (iwqp->cm_id && iwqp->hw_tcp_state) {
@@ -2729,6 +2747,25 @@ static int i40iw_destroy_ah(struct ib_ah *ah)
}
/**
+ * i40iw_get_vector_affinity - report IRQ affinity mask
+ * @ibdev: IB device
+ * @comp_vector: completion vector index
+ */
+static const struct cpumask *i40iw_get_vector_affinity(struct ib_device *ibdev,
+ int comp_vector)
+{
+ struct i40iw_device *iwdev = to_iwdev(ibdev);
+ struct i40iw_msix_vector *msix_vec;
+
+ if (iwdev->msix_shared)
+ msix_vec = &iwdev->iw_msixtbl[comp_vector];
+ else
+ msix_vec = &iwdev->iw_msixtbl[comp_vector + 1];
+
+ return irq_get_affinity_mask(msix_vec->irq);
+}
+
+/**
* i40iw_init_rdma_device - initialization of iwarp device
* @iwdev: iwarp device
*/
@@ -2824,6 +2861,7 @@ static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev
iwibdev->ibdev.req_notify_cq = i40iw_req_notify_cq;
iwibdev->ibdev.post_send = i40iw_post_send;
iwibdev->ibdev.post_recv = i40iw_post_recv;
+ iwibdev->ibdev.get_vector_affinity = i40iw_get_vector_affinity;
return iwibdev;
}
@@ -2889,6 +2927,7 @@ int i40iw_register_rdma_device(struct i40iw_device *iwdev)
return -ENOMEM;
iwibdev = iwdev->iwibdev;
+ iwibdev->ibdev.driver_id = RDMA_DRIVER_I40IW;
ret = ib_register_device(&iwibdev->ibdev, NULL);
if (ret)
goto error;
diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c
index 6dee4fd..9345d5b 100644
--- a/drivers/infiniband/hw/mlx4/ah.c
+++ b/drivers/infiniband/hw/mlx4/ah.c
@@ -101,12 +101,10 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd,
if (ret)
return ERR_PTR(ret);
eth_zero_addr(ah->av.eth.s_mac);
- if (gid_attr.ndev) {
- if (is_vlan_dev(gid_attr.ndev))
- vlan_tag = vlan_dev_vlan_id(gid_attr.ndev);
- memcpy(ah->av.eth.s_mac, gid_attr.ndev->dev_addr, ETH_ALEN);
- dev_put(gid_attr.ndev);
- }
+ if (is_vlan_dev(gid_attr.ndev))
+ vlan_tag = vlan_dev_vlan_id(gid_attr.ndev);
+ memcpy(ah->av.eth.s_mac, gid_attr.ndev->dev_addr, ETH_ALEN);
+ dev_put(gid_attr.ndev);
if (vlan_tag < 0x1000)
vlan_tag |= (rdma_ah_get_sl(ah_attr) & 7) << 13;
ah->av.eth.port_pd = cpu_to_be32(to_mpd(pd)->pdn |
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 5a0e4fc..5b70744 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -246,14 +246,11 @@ static int mlx4_ib_update_gids(struct gid_entry *gids,
return mlx4_ib_update_gids_v1(gids, ibdev, port_num);
}
-static int mlx4_ib_add_gid(struct ib_device *device,
- u8 port_num,
- unsigned int index,
- const union ib_gid *gid,
+static int mlx4_ib_add_gid(const union ib_gid *gid,
const struct ib_gid_attr *attr,
void **context)
{
- struct mlx4_ib_dev *ibdev = to_mdev(device);
+ struct mlx4_ib_dev *ibdev = to_mdev(attr->device);
struct mlx4_ib_iboe *iboe = &ibdev->iboe;
struct mlx4_port_gid_table *port_gid_table;
int free = -1, found = -1;
@@ -262,16 +259,16 @@ static int mlx4_ib_add_gid(struct ib_device *device,
int i;
struct gid_entry *gids = NULL;
- if (!rdma_cap_roce_gid_table(device, port_num))
+ if (!rdma_cap_roce_gid_table(attr->device, attr->port_num))
return -EINVAL;
- if (port_num > MLX4_MAX_PORTS)
+ if (attr->port_num > MLX4_MAX_PORTS)
return -EINVAL;
if (!context)
return -EINVAL;
- port_gid_table = &iboe->gids[port_num - 1];
+ port_gid_table = &iboe->gids[attr->port_num - 1];
spin_lock_bh(&iboe->lock);
for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) {
if (!memcmp(&port_gid_table->gids[i].gid, gid, sizeof(*gid)) &&
@@ -318,33 +315,30 @@ static int mlx4_ib_add_gid(struct ib_device *device,
spin_unlock_bh(&iboe->lock);
if (!ret && hw_update) {
- ret = mlx4_ib_update_gids(gids, ibdev, port_num);
+ ret = mlx4_ib_update_gids(gids, ibdev, attr->port_num);
kfree(gids);
}
return ret;
}
-static int mlx4_ib_del_gid(struct ib_device *device,
- u8 port_num,
- unsigned int index,
- void **context)
+static int mlx4_ib_del_gid(const struct ib_gid_attr *attr, void **context)
{
struct gid_cache_context *ctx = *context;
- struct mlx4_ib_dev *ibdev = to_mdev(device);
+ struct mlx4_ib_dev *ibdev = to_mdev(attr->device);
struct mlx4_ib_iboe *iboe = &ibdev->iboe;
struct mlx4_port_gid_table *port_gid_table;
int ret = 0;
int hw_update = 0;
struct gid_entry *gids = NULL;
- if (!rdma_cap_roce_gid_table(device, port_num))
+ if (!rdma_cap_roce_gid_table(attr->device, attr->port_num))
return -EINVAL;
- if (port_num > MLX4_MAX_PORTS)
+ if (attr->port_num > MLX4_MAX_PORTS)
return -EINVAL;
- port_gid_table = &iboe->gids[port_num - 1];
+ port_gid_table = &iboe->gids[attr->port_num - 1];
spin_lock_bh(&iboe->lock);
if (ctx) {
ctx->refcount--;
@@ -376,7 +370,7 @@ static int mlx4_ib_del_gid(struct ib_device *device,
spin_unlock_bh(&iboe->lock);
if (!ret && hw_update) {
- ret = mlx4_ib_update_gids(gids, ibdev, port_num);
+ ret = mlx4_ib_update_gids(gids, ibdev, attr->port_num);
kfree(gids);
}
return ret;
@@ -411,9 +405,6 @@ int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev,
if (attr.ndev)
dev_put(attr.ndev);
- if (!memcmp(&gid, &zgid, sizeof(gid)))
- return -EINVAL;
-
spin_lock_irqsave(&iboe->lock, flags);
port_gid_table = &iboe->gids[port_num - 1];
@@ -429,6 +420,9 @@ int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev,
return real_index;
}
+#define field_avail(type, fld, sz) (offsetof(type, fld) + \
+ sizeof(((type *)0)->fld) <= (sz))
+
static int mlx4_ib_query_device(struct ib_device *ibdev,
struct ib_device_attr *props,
struct ib_udata *uhw)
@@ -556,14 +550,19 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
props->timestamp_mask = 0xFFFFFFFFFFFFULL;
props->max_ah = INT_MAX;
- if ((dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS) &&
- (mlx4_ib_port_link_layer(ibdev, 1) == IB_LINK_LAYER_ETHERNET ||
- mlx4_ib_port_link_layer(ibdev, 2) == IB_LINK_LAYER_ETHERNET)) {
- props->rss_caps.max_rwq_indirection_tables = props->max_qp;
- props->rss_caps.max_rwq_indirection_table_size =
- dev->dev->caps.max_rss_tbl_sz;
- props->rss_caps.supported_qpts = 1 << IB_QPT_RAW_PACKET;
- props->max_wq_type_rq = props->max_qp;
+ if (mlx4_ib_port_link_layer(ibdev, 1) == IB_LINK_LAYER_ETHERNET ||
+ mlx4_ib_port_link_layer(ibdev, 2) == IB_LINK_LAYER_ETHERNET) {
+ if (dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS) {
+ props->rss_caps.max_rwq_indirection_tables =
+ props->max_qp;
+ props->rss_caps.max_rwq_indirection_table_size =
+ dev->dev->caps.max_rss_tbl_sz;
+ props->rss_caps.supported_qpts = 1 << IB_QPT_RAW_PACKET;
+ props->max_wq_type_rq = props->max_qp;
+ }
+
+ if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_FCS_KEEP)
+ props->raw_packet_caps |= IB_RAW_PACKET_CAP_SCATTER_FCS;
}
props->cq_caps.max_cq_moderation_count = MLX4_MAX_CQ_COUNT;
@@ -575,7 +574,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
if (uhw->outlen >= resp.response_length + sizeof(resp.hca_core_clock_offset)) {
resp.response_length += sizeof(resp.hca_core_clock_offset);
if (!err && !mlx4_is_slave(dev->dev)) {
- resp.comp_mask |= QUERY_DEVICE_RESP_MASK_TIMESTAMP;
+ resp.comp_mask |= MLX4_IB_QUERY_DEV_RESP_MASK_CORE_CLOCK_OFFSET;
resp.hca_core_clock_offset = clock_params.offset % PAGE_SIZE;
}
}
@@ -587,8 +586,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
sizeof(struct mlx4_wqe_data_seg);
}
- if (uhw->outlen >= resp.response_length + sizeof(resp.rss_caps)) {
- resp.response_length += sizeof(resp.rss_caps);
+ if (field_avail(typeof(resp), rss_caps, uhw->outlen)) {
if (props->rss_caps.supported_qpts) {
resp.rss_caps.rx_hash_function =
MLX4_IB_RX_HASH_FUNC_TOEPLITZ;
@@ -608,6 +606,22 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
resp.rss_caps.rx_hash_fields_mask |=
MLX4_IB_RX_HASH_INNER;
}
+ resp.response_length = offsetof(typeof(resp), rss_caps) +
+ sizeof(resp.rss_caps);
+ }
+
+ if (field_avail(typeof(resp), tso_caps, uhw->outlen)) {
+ if (dev->dev->caps.max_gso_sz &&
+ ((mlx4_ib_port_link_layer(ibdev, 1) ==
+ IB_LINK_LAYER_ETHERNET) ||
+ (mlx4_ib_port_link_layer(ibdev, 2) ==
+ IB_LINK_LAYER_ETHERNET))) {
+ resp.tso_caps.max_tso = dev->dev->caps.max_gso_sz;
+ resp.tso_caps.supported_qpts |=
+ 1 << IB_QPT_RAW_PACKET;
+ }
+ resp.response_length = offsetof(typeof(resp), tso_caps) +
+ sizeof(resp.tso_caps);
}
if (uhw->outlen) {
@@ -865,24 +879,9 @@ out:
static int mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
union ib_gid *gid)
{
- int ret;
-
if (rdma_protocol_ib(ibdev, port))
return __mlx4_ib_query_gid(ibdev, port, index, gid, 0);
-
- if (!rdma_protocol_roce(ibdev, port))
- return -ENODEV;
-
- if (!rdma_cap_roce_gid_table(ibdev, port))
- return -ENODEV;
-
- ret = ib_get_cached_gid(ibdev, port, index, gid, NULL);
- if (ret == -EAGAIN) {
- memcpy(gid, &zgid, sizeof(*gid));
- return 0;
- }
-
- return ret;
+ return 0;
}
static int mlx4_ib_query_sl2vl(struct ib_device *ibdev, u8 port, u64 *sl2vl_tbl)
@@ -1330,7 +1329,7 @@ static struct ib_pd *mlx4_ib_alloc_pd(struct ib_device *ibdev,
struct mlx4_ib_pd *pd;
int err;
- pd = kmalloc(sizeof *pd, GFP_KERNEL);
+ pd = kzalloc(sizeof(*pd), GFP_KERNEL);
if (!pd)
return ERR_PTR(-ENOMEM);
@@ -1346,7 +1345,6 @@ static struct ib_pd *mlx4_ib_alloc_pd(struct ib_device *ibdev,
kfree(pd);
return ERR_PTR(-EFAULT);
}
-
return &pd->ibpd;
}
@@ -1860,6 +1858,9 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
if (flow_attr->port < 1 || flow_attr->port > qp->device->phys_port_cnt)
return ERR_PTR(-EINVAL);
+ if (flow_attr->flags & ~IB_FLOW_ATTR_FLAGS_DONT_TRAP)
+ return ERR_PTR(-EOPNOTSUPP);
+
if ((flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) &&
(flow_attr->type != IB_FLOW_ATTR_NORMAL))
return ERR_PTR(-EOPNOTSUPP);
@@ -2933,6 +2934,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
if (mlx4_ib_alloc_diag_counters(ibdev))
goto err_steer_free_bitmap;
+ ibdev->ib_dev.driver_id = RDMA_DRIVER_MLX4;
if (ib_register_device(&ibdev->ib_dev, NULL))
goto err_diag_counters;
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index e14919c..7b14299 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -189,6 +189,7 @@ enum mlx4_ib_qp_flags {
MLX4_IB_QP_LSO = IB_QP_CREATE_IPOIB_UD_LSO,
MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK,
MLX4_IB_QP_NETIF = IB_QP_CREATE_NETIF_QP,
+ MLX4_IB_QP_SCATTER_FCS = IB_QP_CREATE_SCATTER_FCS,
/* Mellanox specific flags start from IB_QP_CREATE_RESERVED_START */
MLX4_IB_ROCE_V2_GSI_QP = MLX4_IB_QP_CREATE_ROCE_V2_GSI,
@@ -641,24 +642,6 @@ struct mlx4_uverbs_ex_query_device {
__u32 reserved;
};
-enum query_device_resp_mask {
- QUERY_DEVICE_RESP_MASK_TIMESTAMP = 1UL << 0,
-};
-
-struct mlx4_ib_rss_caps {
- __u64 rx_hash_fields_mask; /* enum mlx4_rx_hash_fields */
- __u8 rx_hash_function; /* enum mlx4_rx_hash_function_flags */
- __u8 reserved[7];
-};
-
-struct mlx4_uverbs_ex_query_device_resp {
- __u32 comp_mask;
- __u32 response_length;
- __u64 hca_core_clock_offset;
- __u32 max_inl_recv_sz;
- struct mlx4_ib_rss_caps rss_caps;
-};
-
static inline struct mlx4_ib_dev *to_mdev(struct ib_device *ibdev)
{
return container_of(ibdev, struct mlx4_ib_dev, ib_dev);
diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
index 4975f3e..17f4f15 100644
--- a/drivers/infiniband/hw/mlx4/mr.c
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -407,6 +407,9 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
goto err_mr;
mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key;
+ mr->ibmr.length = length;
+ mr->ibmr.iova = virt_addr;
+ mr->ibmr.page_size = 1U << shift;
return &mr->ibmr;
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index f045491..50af891 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -1096,6 +1096,17 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
qp->inl_recv_sz = ucmd.qp.inl_recv_sz;
}
+ if (init_attr->create_flags & IB_QP_CREATE_SCATTER_FCS) {
+ if (!(dev->dev->caps.flags &
+ MLX4_DEV_CAP_FLAG_FCS_KEEP)) {
+ pr_debug("scatter FCS is unsupported\n");
+ err = -EOPNOTSUPP;
+ goto err;
+ }
+
+ qp->flags |= MLX4_IB_QP_SCATTER_FCS;
+ }
+
err = set_rq_size(dev, &init_attr->cap, !!pd->uobject,
qp_has_rq(init_attr), qp, qp->inl_recv_sz);
if (err)
@@ -2234,6 +2245,9 @@ static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type,
if (qp->inl_recv_sz)
context->param3 |= cpu_to_be32(1 << 25);
+ if (qp->flags & MLX4_IB_QP_SCATTER_FCS)
+ context->param3 |= cpu_to_be32(1 << 29);
+
if (qp_type == IB_QPT_GSI || qp_type == IB_QPT_SMI)
context->mtu_msgmax = (IB_MTU_4096 << 5) | 11;
else if (qp_type == IB_QPT_RAW_PACKET)
@@ -2356,9 +2370,7 @@ static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type,
status = ib_get_cached_gid(&dev->ib_dev, port_num,
index, &gid, &gid_attr);
- if (!status && !memcmp(&gid, &zgid, sizeof(gid)))
- status = -ENOENT;
- if (!status && gid_attr.ndev) {
+ if (!status) {
vlan = rdma_vlan_dev_vlan_id(gid_attr.ndev);
memcpy(smac, gid_attr.ndev->dev_addr, ETH_ALEN);
dev_put(gid_attr.ndev);
@@ -3880,8 +3892,8 @@ out:
*/
wmb();
- writel(qp->doorbell_qpn,
- to_mdev(ibqp->device)->uar_map + MLX4_SEND_DOORBELL);
+ writel_relaxed(qp->doorbell_qpn,
+ to_mdev(ibqp->device)->uar_map + MLX4_SEND_DOORBELL);
/*
* Make sure doorbells don't leak out of SQ spinlock
@@ -4204,7 +4216,7 @@ struct ib_wq *mlx4_ib_create_wq(struct ib_pd *pd,
return ERR_PTR(-EOPNOTSUPP);
}
- if (init_attr->create_flags) {
+ if (init_attr->create_flags & ~IB_WQ_FLAGS_SCATTER_FCS) {
pr_debug("unsupported create_flags %u\n",
init_attr->create_flags);
return ERR_PTR(-EOPNOTSUPP);
@@ -4225,6 +4237,9 @@ struct ib_wq *mlx4_ib_create_wq(struct ib_pd *pd,
ib_qp_init_attr.recv_cq = init_attr->cq;
ib_qp_init_attr.send_cq = ib_qp_init_attr.recv_cq; /* Dummy CQ */
+ if (init_attr->create_flags & IB_WQ_FLAGS_SCATTER_FCS)
+ ib_qp_init_attr.create_flags |= IB_QP_CREATE_SCATTER_FCS;
+
err = create_qp_common(dev, pd, MLX4_IB_RWQ_SRC, &ib_qp_init_attr,
udata, 0, &qp);
if (err) {
diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c
index fe269f6..e6bde32a 100644
--- a/drivers/infiniband/hw/mlx5/ah.c
+++ b/drivers/infiniband/hw/mlx5/ah.c
@@ -36,6 +36,9 @@ static struct ib_ah *create_ib_ah(struct mlx5_ib_dev *dev,
struct mlx5_ib_ah *ah,
struct rdma_ah_attr *ah_attr)
{
+ enum ib_gid_type gid_type;
+ int err;
+
if (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) {
const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr);
@@ -50,6 +53,12 @@ static struct ib_ah *create_ib_ah(struct mlx5_ib_dev *dev,
ah->av.stat_rate_sl = (rdma_ah_get_static_rate(ah_attr) << 4);
if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) {
+ err = mlx5_get_roce_gid_type(dev, ah_attr->port_num,
+ ah_attr->grh.sgid_index,
+ &gid_type);
+ if (err)
+ return ERR_PTR(err);
+
memcpy(ah->av.rmac, ah_attr->roce.dmac,
sizeof(ah_attr->roce.dmac));
ah->av.udp_sport =
@@ -57,6 +66,9 @@ static struct ib_ah *create_ib_ah(struct mlx5_ib_dev *dev,
rdma_ah_get_port_num(ah_attr),
rdma_ah_read_grh(ah_attr)->sgid_index);
ah->av.stat_rate_sl |= (rdma_ah_get_sl(ah_attr) & 0x7) << 1;
+ if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
+#define MLX5_ECN_ENABLED BIT(1)
+ ah->av.tclass |= MLX5_ECN_ENABLED;
} else {
ah->av.rlid = cpu_to_be16(rdma_ah_get_dlid(ah_attr));
ah->av.fl_mlid = rdma_ah_get_path_bits(ah_attr) & 0x7f;
diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c
index 6f6712f..188512b 100644
--- a/drivers/infiniband/hw/mlx5/cmd.c
+++ b/drivers/infiniband/hw/mlx5/cmd.c
@@ -66,3 +66,107 @@ int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *dev,
return mlx5_cmd_exec(dev, in, in_size, out, sizeof(out));
}
+
+int mlx5_cmd_alloc_memic(struct mlx5_memic *memic, phys_addr_t *addr,
+ u64 length, u32 alignment)
+{
+ struct mlx5_core_dev *dev = memic->dev;
+ u64 num_memic_hw_pages = MLX5_CAP_DEV_MEM(dev, memic_bar_size)
+ >> PAGE_SHIFT;
+ u64 hw_start_addr = MLX5_CAP64_DEV_MEM(dev, memic_bar_start_addr);
+ u32 max_alignment = MLX5_CAP_DEV_MEM(dev, log_max_memic_addr_alignment);
+ u32 num_pages = DIV_ROUND_UP(length, PAGE_SIZE);
+ u32 out[MLX5_ST_SZ_DW(alloc_memic_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(alloc_memic_in)] = {};
+ u32 mlx5_alignment;
+ u64 page_idx = 0;
+ int ret = 0;
+
+ if (!length || (length & MLX5_MEMIC_ALLOC_SIZE_MASK))
+ return -EINVAL;
+
+ /* mlx5 device sets alignment as 64*2^driver_value
+ * so normalizing is needed.
+ */
+ mlx5_alignment = (alignment < MLX5_MEMIC_BASE_ALIGN) ? 0 :
+ alignment - MLX5_MEMIC_BASE_ALIGN;
+ if (mlx5_alignment > max_alignment)
+ return -EINVAL;
+
+ MLX5_SET(alloc_memic_in, in, opcode, MLX5_CMD_OP_ALLOC_MEMIC);
+ MLX5_SET(alloc_memic_in, in, range_size, num_pages * PAGE_SIZE);
+ MLX5_SET(alloc_memic_in, in, memic_size, length);
+ MLX5_SET(alloc_memic_in, in, log_memic_addr_alignment,
+ mlx5_alignment);
+
+ while (page_idx < num_memic_hw_pages) {
+ spin_lock(&memic->memic_lock);
+ page_idx = bitmap_find_next_zero_area(memic->memic_alloc_pages,
+ num_memic_hw_pages,
+ page_idx,
+ num_pages, 0);
+
+ if (page_idx < num_memic_hw_pages)
+ bitmap_set(memic->memic_alloc_pages,
+ page_idx, num_pages);
+
+ spin_unlock(&memic->memic_lock);
+
+ if (page_idx >= num_memic_hw_pages)
+ break;
+
+ MLX5_SET64(alloc_memic_in, in, range_start_addr,
+ hw_start_addr + (page_idx * PAGE_SIZE));
+
+ ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ if (ret) {
+ spin_lock(&memic->memic_lock);
+ bitmap_clear(memic->memic_alloc_pages,
+ page_idx, num_pages);
+ spin_unlock(&memic->memic_lock);
+
+ if (ret == -EAGAIN) {
+ page_idx++;
+ continue;
+ }
+
+ return ret;
+ }
+
+ *addr = pci_resource_start(dev->pdev, 0) +
+ MLX5_GET64(alloc_memic_out, out, memic_start_addr);
+
+ return 0;
+ }
+
+ return -ENOMEM;
+}
+
+int mlx5_cmd_dealloc_memic(struct mlx5_memic *memic, u64 addr, u64 length)
+{
+ struct mlx5_core_dev *dev = memic->dev;
+ u64 hw_start_addr = MLX5_CAP64_DEV_MEM(dev, memic_bar_start_addr);
+ u32 num_pages = DIV_ROUND_UP(length, PAGE_SIZE);
+ u32 out[MLX5_ST_SZ_DW(dealloc_memic_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(dealloc_memic_in)] = {0};
+ u64 start_page_idx;
+ int err;
+
+ addr -= pci_resource_start(dev->pdev, 0);
+ start_page_idx = (addr - hw_start_addr) >> PAGE_SHIFT;
+
+ MLX5_SET(dealloc_memic_in, in, opcode, MLX5_CMD_OP_DEALLOC_MEMIC);
+ MLX5_SET64(dealloc_memic_in, in, memic_start_addr, addr);
+ MLX5_SET(dealloc_memic_in, in, memic_size, length);
+
+ err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+
+ if (!err) {
+ spin_lock(&memic->memic_lock);
+ bitmap_clear(memic->memic_alloc_pages,
+ start_page_idx, num_pages);
+ spin_unlock(&memic->memic_lock);
+ }
+
+ return err;
+}
diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h
index 78ffded..e7206c8 100644
--- a/drivers/infiniband/hw/mlx5/cmd.h
+++ b/drivers/infiniband/hw/mlx5/cmd.h
@@ -33,6 +33,7 @@
#ifndef MLX5_IB_CMD_H
#define MLX5_IB_CMD_H
+#include "mlx5_ib.h"
#include <linux/kernel.h>
#include <linux/mlx5/driver.h>
@@ -41,4 +42,7 @@ int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point,
void *out, int out_size);
int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *mdev,
void *in, int in_size);
+int mlx5_cmd_alloc_memic(struct mlx5_memic *memic, phys_addr_t *addr,
+ u64 length, u32 alignment);
+int mlx5_cmd_dealloc_memic(struct mlx5_memic *memic, u64 addr, u64 length);
#endif /* MLX5_IB_CMD_H */
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 071fd9a..daa919e 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -38,6 +38,7 @@
#include <linux/pci.h>
#include <linux/dma-mapping.h>
#include <linux/slab.h>
+#include <linux/bitmap.h>
#if defined(CONFIG_X86)
#include <asm/pat.h>
#endif
@@ -51,6 +52,7 @@
#include <linux/mlx5/port.h>
#include <linux/mlx5/vport.h>
#include <linux/mlx5/fs.h>
+#include <linux/mlx5/fs_helpers.h>
#include <linux/list.h>
#include <rdma/ib_smi.h>
#include <rdma/ib_umem.h>
@@ -60,6 +62,13 @@
#include "ib_rep.h"
#include "cmd.h"
#include <linux/mlx5/fs_helpers.h>
+#include <linux/mlx5/accel.h>
+#include <rdma/uverbs_std_types.h>
+#include <rdma/mlx5_user_ioctl_verbs.h>
+#include <rdma/mlx5_user_ioctl_cmds.h>
+
+#define UVERBS_MODULE_NAME mlx5_ib
+#include <rdma/uverbs_named_ioctl.h>
#define DRIVER_NAME "mlx5_ib"
#define DRIVER_VERSION "5.0-0"
@@ -92,6 +101,12 @@ static LIST_HEAD(mlx5_ib_dev_list);
*/
static DEFINE_MUTEX(mlx5_ib_multiport_mutex);
+/* We can't use an array for xlt_emergency_page because dma_map_single
+ * doesn't work on kernel modules memory
+ */
+static unsigned long xlt_emergency_page;
+static struct mutex xlt_emergency_page_mutex;
+
struct mlx5_ib_dev *mlx5_ib_get_ibdev_from_mpi(struct mlx5_ib_multiport_info *mpi)
{
struct mlx5_ib_dev *dev;
@@ -399,6 +414,9 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
if (err)
goto out;
+ props->active_width = IB_WIDTH_4X;
+ props->active_speed = IB_SPEED_QDR;
+
translate_eth_proto_oper(eth_prot_oper, &props->active_speed,
&props->active_width);
@@ -493,18 +511,19 @@ static int set_roce_addr(struct mlx5_ib_dev *dev, u8 port_num,
vlan_id, port_num);
}
-static int mlx5_ib_add_gid(struct ib_device *device, u8 port_num,
- unsigned int index, const union ib_gid *gid,
+static int mlx5_ib_add_gid(const union ib_gid *gid,
const struct ib_gid_attr *attr,
__always_unused void **context)
{
- return set_roce_addr(to_mdev(device), port_num, index, gid, attr);
+ return set_roce_addr(to_mdev(attr->device), attr->port_num,
+ attr->index, gid, attr);
}
-static int mlx5_ib_del_gid(struct ib_device *device, u8 port_num,
- unsigned int index, __always_unused void **context)
+static int mlx5_ib_del_gid(const struct ib_gid_attr *attr,
+ __always_unused void **context)
{
- return set_roce_addr(to_mdev(device), port_num, index, NULL, NULL);
+ return set_roce_addr(to_mdev(attr->device), attr->port_num,
+ attr->index, NULL, NULL);
}
__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
@@ -516,9 +535,6 @@ __be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
if (ib_get_cached_gid(&dev->ib_dev, port_num, index, &gid, &attr))
return 0;
- if (!attr.ndev)
- return 0;
-
dev_put(attr.ndev);
if (attr.gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP)
@@ -538,9 +554,6 @@ int mlx5_get_roce_gid_type(struct mlx5_ib_dev *dev, u8 port_num,
if (ret)
return ret;
- if (!attr.ndev)
- return -ENODEV;
-
dev_put(attr.ndev);
*gid_type = attr.gid_type;
@@ -844,6 +857,10 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
MLX5_RX_HASH_SRC_PORT_UDP |
MLX5_RX_HASH_DST_PORT_UDP |
MLX5_RX_HASH_INNER;
+ if (mlx5_accel_ipsec_device_caps(dev->mdev) &
+ MLX5_ACCEL_IPSEC_CAP_DEVICE)
+ resp.rss_caps.rx_hash_fields_mask |=
+ MLX5_RX_HASH_IPSEC_SPI;
resp.response_length += sizeof(resp.rss_caps);
}
} else {
@@ -875,6 +892,11 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
props->raw_packet_caps |= IB_RAW_PACKET_CAP_SCATTER_FCS;
}
+ if (MLX5_CAP_DEV_MEM(mdev, memic)) {
+ props->max_dm_size =
+ MLX5_CAP_DEV_MEM(mdev, max_memic_size);
+ }
+
if (mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_BYPASS))
props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING;
@@ -980,6 +1002,10 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
MLX5_CAP_QOS(mdev, packet_pacing_min_rate);
resp.packet_pacing_caps.supported_qpts |=
1 << IB_QPT_RAW_PACKET;
+ if (MLX5_CAP_QOS(mdev, packet_pacing_burst_bound) &&
+ MLX5_CAP_QOS(mdev, packet_pacing_typical_size))
+ resp.packet_pacing_caps.cap_flags |=
+ MLX5_IB_PP_SUPPORT_BURST;
}
resp.response_length += sizeof(resp.packet_pacing_caps);
}
@@ -1665,6 +1691,18 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
resp.response_length = min(offsetof(typeof(resp), response_length) +
sizeof(resp.response_length), udata->outlen);
+ if (mlx5_accel_ipsec_device_caps(dev->mdev) & MLX5_ACCEL_IPSEC_CAP_DEVICE) {
+ if (mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_EGRESS))
+ resp.flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM;
+ if (mlx5_accel_ipsec_device_caps(dev->mdev) & MLX5_ACCEL_IPSEC_CAP_REQUIRED_METADATA)
+ resp.flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_REQ_METADATA;
+ if (MLX5_CAP_FLOWTABLE(dev->mdev, flow_table_properties_nic_receive.ft_field_support.outer_esp_spi))
+ resp.flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_SPI_STEERING;
+ if (mlx5_accel_ipsec_device_caps(dev->mdev) & MLX5_ACCEL_IPSEC_CAP_TX_IV_IS_ESN)
+ resp.flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_TX_IV_IS_ESN;
+ /* MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_FULL_OFFLOAD is currently always 0 */
+ }
+
context = kzalloc(sizeof(*context), GFP_KERNEL);
if (!context)
return ERR_PTR(-ENOMEM);
@@ -1702,17 +1740,10 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
context->ibucontext.invalidate_range = &mlx5_ib_invalidate_range;
#endif
- context->upd_xlt_page = __get_free_page(GFP_KERNEL);
- if (!context->upd_xlt_page) {
- err = -ENOMEM;
- goto out_uars;
- }
- mutex_init(&context->upd_xlt_page_mutex);
-
if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) {
err = mlx5_ib_alloc_transport_domain(dev, &context->tdn);
if (err)
- goto out_page;
+ goto out_uars;
}
INIT_LIST_HEAD(&context->vma_private_list);
@@ -1789,9 +1820,6 @@ out_td:
if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
mlx5_ib_dealloc_transport_domain(dev, context->tdn);
-out_page:
- free_page(context->upd_xlt_page);
-
out_uars:
deallocate_uars(dev, context);
@@ -1817,7 +1845,6 @@ static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
mlx5_ib_dealloc_transport_domain(dev, context->tdn);
- free_page(context->upd_xlt_page);
deallocate_uars(dev, context);
kfree(bfregi->sys_pages);
kfree(bfregi->count);
@@ -1993,6 +2020,8 @@ static inline char *mmap_cmd2str(enum mlx5_ib_mmap_cmd cmd)
return "best effort WC";
case MLX5_IB_MMAP_NC_PAGE:
return "NC";
+ case MLX5_IB_MMAP_DEVICE_MEM:
+ return "Device Memory";
default:
return NULL;
}
@@ -2151,6 +2180,34 @@ free_bfreg:
return err;
}
+static int dm_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
+{
+ struct mlx5_ib_ucontext *mctx = to_mucontext(context);
+ struct mlx5_ib_dev *dev = to_mdev(context->device);
+ u16 page_idx = get_extended_index(vma->vm_pgoff);
+ size_t map_size = vma->vm_end - vma->vm_start;
+ u32 npages = map_size >> PAGE_SHIFT;
+ phys_addr_t pfn;
+ pgprot_t prot;
+
+ if (find_next_zero_bit(mctx->dm_pages, page_idx + npages, page_idx) !=
+ page_idx + npages)
+ return -EINVAL;
+
+ pfn = ((pci_resource_start(dev->mdev->pdev, 0) +
+ MLX5_CAP64_DEV_MEM(dev->mdev, memic_bar_start_addr)) >>
+ PAGE_SHIFT) +
+ page_idx;
+ prot = pgprot_writecombine(vma->vm_page_prot);
+ vma->vm_page_prot = prot;
+
+ if (io_remap_pfn_range(vma, vma->vm_start, pfn, map_size,
+ vma->vm_page_prot))
+ return -EAGAIN;
+
+ return mlx5_ib_set_vma_data(vma, mctx);
+}
+
static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
{
struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
@@ -2195,6 +2252,9 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm
case MLX5_IB_MMAP_CLOCK_INFO:
return mlx5_ib_mmap_clock_info_page(dev, vma, context);
+ case MLX5_IB_MMAP_DEVICE_MEM:
+ return dm_mmap(ibcontext, vma);
+
default:
return -EINVAL;
}
@@ -2202,6 +2262,87 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm
return 0;
}
+struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev,
+ struct ib_ucontext *context,
+ struct ib_dm_alloc_attr *attr,
+ struct uverbs_attr_bundle *attrs)
+{
+ u64 act_size = roundup(attr->length, MLX5_MEMIC_BASE_SIZE);
+ struct mlx5_memic *memic = &to_mdev(ibdev)->memic;
+ phys_addr_t memic_addr;
+ struct mlx5_ib_dm *dm;
+ u64 start_offset;
+ u32 page_idx;
+ int err;
+
+ dm = kzalloc(sizeof(*dm), GFP_KERNEL);
+ if (!dm)
+ return ERR_PTR(-ENOMEM);
+
+ mlx5_ib_dbg(to_mdev(ibdev), "alloc_memic req: user_length=0x%llx act_length=0x%llx log_alignment=%d\n",
+ attr->length, act_size, attr->alignment);
+
+ err = mlx5_cmd_alloc_memic(memic, &memic_addr,
+ act_size, attr->alignment);
+ if (err)
+ goto err_free;
+
+ start_offset = memic_addr & ~PAGE_MASK;
+ page_idx = (memic_addr - pci_resource_start(memic->dev->pdev, 0) -
+ MLX5_CAP64_DEV_MEM(memic->dev, memic_bar_start_addr)) >>
+ PAGE_SHIFT;
+
+ err = uverbs_copy_to(attrs,
+ MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET,
+ &start_offset, sizeof(start_offset));
+ if (err)
+ goto err_dealloc;
+
+ err = uverbs_copy_to(attrs,
+ MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX,
+ &page_idx, sizeof(page_idx));
+ if (err)
+ goto err_dealloc;
+
+ bitmap_set(to_mucontext(context)->dm_pages, page_idx,
+ DIV_ROUND_UP(act_size, PAGE_SIZE));
+
+ dm->dev_addr = memic_addr;
+
+ return &dm->ibdm;
+
+err_dealloc:
+ mlx5_cmd_dealloc_memic(memic, memic_addr,
+ act_size);
+err_free:
+ kfree(dm);
+ return ERR_PTR(err);
+}
+
+int mlx5_ib_dealloc_dm(struct ib_dm *ibdm)
+{
+ struct mlx5_memic *memic = &to_mdev(ibdm->device)->memic;
+ struct mlx5_ib_dm *dm = to_mdm(ibdm);
+ u64 act_size = roundup(dm->ibdm.length, MLX5_MEMIC_BASE_SIZE);
+ u32 page_idx;
+ int ret;
+
+ ret = mlx5_cmd_dealloc_memic(memic, dm->dev_addr, act_size);
+ if (ret)
+ return ret;
+
+ page_idx = (dm->dev_addr - pci_resource_start(memic->dev->pdev, 0) -
+ MLX5_CAP64_DEV_MEM(memic->dev, memic_bar_start_addr)) >>
+ PAGE_SHIFT;
+ bitmap_clear(to_mucontext(ibdm->uobject->context)->dm_pages,
+ page_idx,
+ DIV_ROUND_UP(act_size, PAGE_SIZE));
+
+ kfree(dm);
+
+ return 0;
+}
+
static struct ib_pd *mlx5_ib_alloc_pd(struct ib_device *ibdev,
struct ib_ucontext *context,
struct ib_udata *udata)
@@ -2317,8 +2458,28 @@ static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val)
offsetof(typeof(filter), field) -\
sizeof(filter.field))
+static int parse_flow_flow_action(const union ib_flow_spec *ib_spec,
+ const struct ib_flow_attr *flow_attr,
+ struct mlx5_flow_act *action)
+{
+ struct mlx5_ib_flow_action *maction = to_mflow_act(ib_spec->action.act);
+
+ switch (maction->ib_action.type) {
+ case IB_FLOW_ACTION_ESP:
+ /* Currently only AES_GCM keymat is supported by the driver */
+ action->esp_id = (uintptr_t)maction->esp_aes_gcm.ctx;
+ action->action |= flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS ?
+ MLX5_FLOW_CONTEXT_ACTION_ENCRYPT :
+ MLX5_FLOW_CONTEXT_ACTION_DECRYPT;
+ return 0;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,
u32 *match_v, const union ib_flow_spec *ib_spec,
+ const struct ib_flow_attr *flow_attr,
struct mlx5_flow_act *action)
{
void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c,
@@ -2328,6 +2489,7 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,
void *headers_c;
void *headers_v;
int match_ipv;
+ int ret;
if (ib_spec->type & IB_FLOW_SPEC_INNER) {
headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
@@ -2478,7 +2640,15 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,
ntohl(ib_spec->ipv6.mask.flow_label),
ntohl(ib_spec->ipv6.val.flow_label),
ib_spec->type & IB_FLOW_SPEC_INNER);
+ break;
+ case IB_FLOW_SPEC_ESP:
+ if (ib_spec->esp.mask.seq)
+ return -EOPNOTSUPP;
+ MLX5_SET(fte_match_set_misc, misc_params_c, outer_esp_spi,
+ ntohl(ib_spec->esp.mask.spi));
+ MLX5_SET(fte_match_set_misc, misc_params_v, outer_esp_spi,
+ ntohl(ib_spec->esp.val.spi));
break;
case IB_FLOW_SPEC_TCP:
if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
@@ -2546,6 +2716,11 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,
return -EOPNOTSUPP;
action->action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
break;
+ case IB_FLOW_SPEC_ACTION_HANDLE:
+ ret = parse_flow_flow_action(ib_spec, flow_attr, action);
+ if (ret)
+ return ret;
+ break;
default:
return -EINVAL;
}
@@ -2587,6 +2762,46 @@ static bool flow_is_multicast_only(const struct ib_flow_attr *ib_attr)
return false;
}
+enum valid_spec {
+ VALID_SPEC_INVALID,
+ VALID_SPEC_VALID,
+ VALID_SPEC_NA,
+};
+
+static enum valid_spec
+is_valid_esp_aes_gcm(struct mlx5_core_dev *mdev,
+ const struct mlx5_flow_spec *spec,
+ const struct mlx5_flow_act *flow_act,
+ bool egress)
+{
+ const u32 *match_c = spec->match_criteria;
+ bool is_crypto =
+ (flow_act->action & (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
+ MLX5_FLOW_CONTEXT_ACTION_DECRYPT));
+ bool is_ipsec = mlx5_fs_is_ipsec_flow(match_c);
+ bool is_drop = flow_act->action & MLX5_FLOW_CONTEXT_ACTION_DROP;
+
+ /*
+ * Currently only crypto is supported in egress, when regular egress
+ * rules would be supported, always return VALID_SPEC_NA.
+ */
+ if (!is_crypto)
+ return egress ? VALID_SPEC_INVALID : VALID_SPEC_NA;
+
+ return is_crypto && is_ipsec &&
+ (!egress || (!is_drop && !flow_act->has_flow_tag)) ?
+ VALID_SPEC_VALID : VALID_SPEC_INVALID;
+}
+
+static bool is_valid_spec(struct mlx5_core_dev *mdev,
+ const struct mlx5_flow_spec *spec,
+ const struct mlx5_flow_act *flow_act,
+ bool egress)
+{
+ /* We curretly only support ipsec egress flow */
+ return is_valid_esp_aes_gcm(mdev, spec, flow_act, egress) != VALID_SPEC_INVALID;
+}
+
static bool is_valid_ethertype(struct mlx5_core_dev *mdev,
const struct ib_flow_attr *flow_attr,
bool check_inner)
@@ -2711,13 +2926,17 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
log_max_ft_size));
if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
- if (flow_is_multicast_only(flow_attr) &&
- !dont_trap)
+ if (ft_type == MLX5_IB_FT_TX)
+ priority = 0;
+ else if (flow_is_multicast_only(flow_attr) &&
+ !dont_trap)
priority = MLX5_IB_FLOW_MCAST_PRIO;
else
priority = ib_prio_to_core_prio(flow_attr->priority,
dont_trap);
ns = mlx5_get_flow_namespace(dev->mdev,
+ ft_type == MLX5_IB_FT_TX ?
+ MLX5_FLOW_NAMESPACE_EGRESS :
MLX5_FLOW_NAMESPACE_BYPASS);
num_entries = MLX5_FS_MAX_ENTRIES;
num_groups = MLX5_FS_MAX_TYPES;
@@ -2804,6 +3023,7 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
unsigned int spec_index;
int err = 0;
int dest_num = 1;
+ bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS;
if (!is_valid_attr(dev->mdev, flow_attr))
return ERR_PTR(-EINVAL);
@@ -2820,7 +3040,7 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
err = parse_flow_attr(dev->mdev, spec->match_criteria,
spec->match_value,
- ib_flow, &flow_act);
+ ib_flow, flow_attr, &flow_act);
if (err < 0)
goto free;
@@ -2843,12 +3063,23 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
}
spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria);
+
+ if (is_egress &&
+ !is_valid_spec(dev->mdev, spec, &flow_act, is_egress)) {
+ err = -EINVAL;
+ goto free;
+ }
+
if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP) {
rule_dst = NULL;
dest_num = 0;
} else {
- flow_act.action = dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST :
- MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
+ if (is_egress)
+ flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+ else
+ flow_act.action |=
+ dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST :
+ MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
}
if (flow_act.has_flow_tag &&
@@ -3022,6 +3253,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
struct mlx5_flow_destination *dst = NULL;
struct mlx5_ib_flow_prio *ft_prio_tx = NULL;
struct mlx5_ib_flow_prio *ft_prio;
+ bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS;
int err;
int underlay_qpn;
@@ -3030,7 +3262,13 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
if (domain != IB_FLOW_DOMAIN_USER ||
flow_attr->port > dev->num_ports ||
- (flow_attr->flags & ~IB_FLOW_ATTR_FLAGS_DONT_TRAP))
+ (flow_attr->flags & ~(IB_FLOW_ATTR_FLAGS_DONT_TRAP |
+ IB_FLOW_ATTR_FLAGS_EGRESS)))
+ return ERR_PTR(-EINVAL);
+
+ if (is_egress &&
+ (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
+ flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT))
return ERR_PTR(-EINVAL);
dst = kzalloc(sizeof(*dst), GFP_KERNEL);
@@ -3039,7 +3277,8 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
mutex_lock(&dev->flow_db->lock);
- ft_prio = get_flow_table(dev, flow_attr, MLX5_IB_FT_RX);
+ ft_prio = get_flow_table(dev, flow_attr,
+ is_egress ? MLX5_IB_FT_TX : MLX5_IB_FT_RX);
if (IS_ERR(ft_prio)) {
err = PTR_ERR(ft_prio);
goto unlock;
@@ -3053,11 +3292,15 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
}
}
- dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR;
- if (mqp->flags & MLX5_IB_QP_RSS)
- dst->tir_num = mqp->rss_qp.tirn;
- else
- dst->tir_num = mqp->raw_packet_qp.rq.tirn;
+ if (is_egress) {
+ dst->type = MLX5_FLOW_DESTINATION_TYPE_PORT;
+ } else {
+ dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+ if (mqp->flags & MLX5_IB_QP_RSS)
+ dst->tir_num = mqp->rss_qp.tirn;
+ else
+ dst->tir_num = mqp->raw_packet_qp.rq.tirn;
+ }
if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) {
@@ -3102,6 +3345,170 @@ unlock:
return ERR_PTR(err);
}
+static u32 mlx5_ib_flow_action_flags_to_accel_xfrm_flags(u32 mlx5_flags)
+{
+ u32 flags = 0;
+
+ if (mlx5_flags & MLX5_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA)
+ flags |= MLX5_ACCEL_XFRM_FLAG_REQUIRE_METADATA;
+
+ return flags;
+}
+
+#define MLX5_FLOW_ACTION_ESP_CREATE_LAST_SUPPORTED MLX5_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA
+static struct ib_flow_action *
+mlx5_ib_create_flow_action_esp(struct ib_device *device,
+ const struct ib_flow_action_attrs_esp *attr,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_ib_dev *mdev = to_mdev(device);
+ struct ib_uverbs_flow_action_esp_keymat_aes_gcm *aes_gcm;
+ struct mlx5_accel_esp_xfrm_attrs accel_attrs = {};
+ struct mlx5_ib_flow_action *action;
+ u64 action_flags;
+ u64 flags;
+ int err = 0;
+
+ if (IS_UVERBS_COPY_ERR(uverbs_copy_from(&action_flags, attrs,
+ MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS)))
+ return ERR_PTR(-EFAULT);
+
+ if (action_flags >= (MLX5_FLOW_ACTION_ESP_CREATE_LAST_SUPPORTED << 1))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ flags = mlx5_ib_flow_action_flags_to_accel_xfrm_flags(action_flags);
+
+ /* We current only support a subset of the standard features. Only a
+ * keymat of type AES_GCM, with icv_len == 16, iv_algo == SEQ and esn
+ * (with overlap). Full offload mode isn't supported.
+ */
+ if (!attr->keymat || attr->replay || attr->encap ||
+ attr->spi || attr->seq || attr->tfc_pad ||
+ attr->hard_limit_pkts ||
+ (attr->flags & ~(IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED |
+ IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ENCRYPT)))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ if (attr->keymat->protocol !=
+ IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ aes_gcm = &attr->keymat->keymat.aes_gcm;
+
+ if (aes_gcm->icv_len != 16 ||
+ aes_gcm->iv_algo != IB_UVERBS_FLOW_ACTION_IV_ALGO_SEQ)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ action = kmalloc(sizeof(*action), GFP_KERNEL);
+ if (!action)
+ return ERR_PTR(-ENOMEM);
+
+ action->esp_aes_gcm.ib_flags = attr->flags;
+ memcpy(&accel_attrs.keymat.aes_gcm.aes_key, &aes_gcm->aes_key,
+ sizeof(accel_attrs.keymat.aes_gcm.aes_key));
+ accel_attrs.keymat.aes_gcm.key_len = aes_gcm->key_len * 8;
+ memcpy(&accel_attrs.keymat.aes_gcm.salt, &aes_gcm->salt,
+ sizeof(accel_attrs.keymat.aes_gcm.salt));
+ memcpy(&accel_attrs.keymat.aes_gcm.seq_iv, &aes_gcm->iv,
+ sizeof(accel_attrs.keymat.aes_gcm.seq_iv));
+ accel_attrs.keymat.aes_gcm.icv_len = aes_gcm->icv_len * 8;
+ accel_attrs.keymat.aes_gcm.iv_algo = MLX5_ACCEL_ESP_AES_GCM_IV_ALGO_SEQ;
+ accel_attrs.keymat_type = MLX5_ACCEL_ESP_KEYMAT_AES_GCM;
+
+ accel_attrs.esn = attr->esn;
+ if (attr->flags & IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED)
+ accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED;
+ if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW)
+ accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP;
+
+ if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ENCRYPT)
+ accel_attrs.action |= MLX5_ACCEL_ESP_ACTION_ENCRYPT;
+
+ action->esp_aes_gcm.ctx =
+ mlx5_accel_esp_create_xfrm(mdev->mdev, &accel_attrs, flags);
+ if (IS_ERR(action->esp_aes_gcm.ctx)) {
+ err = PTR_ERR(action->esp_aes_gcm.ctx);
+ goto err_parse;
+ }
+
+ action->esp_aes_gcm.ib_flags = attr->flags;
+
+ return &action->ib_action;
+
+err_parse:
+ kfree(action);
+ return ERR_PTR(err);
+}
+
+static int
+mlx5_ib_modify_flow_action_esp(struct ib_flow_action *action,
+ const struct ib_flow_action_attrs_esp *attr,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_ib_flow_action *maction = to_mflow_act(action);
+ struct mlx5_accel_esp_xfrm_attrs accel_attrs;
+ int err = 0;
+
+ if (attr->keymat || attr->replay || attr->encap ||
+ attr->spi || attr->seq || attr->tfc_pad ||
+ attr->hard_limit_pkts ||
+ (attr->flags & ~(IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED |
+ IB_FLOW_ACTION_ESP_FLAGS_MOD_ESP_ATTRS |
+ IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW)))
+ return -EOPNOTSUPP;
+
+ /* Only the ESN value or the MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP can
+ * be modified.
+ */
+ if (!(maction->esp_aes_gcm.ib_flags &
+ IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED) &&
+ attr->flags & (IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED |
+ IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW))
+ return -EINVAL;
+
+ memcpy(&accel_attrs, &maction->esp_aes_gcm.ctx->attrs,
+ sizeof(accel_attrs));
+
+ accel_attrs.esn = attr->esn;
+ if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW)
+ accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP;
+ else
+ accel_attrs.flags &= ~MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP;
+
+ err = mlx5_accel_esp_modify_xfrm(maction->esp_aes_gcm.ctx,
+ &accel_attrs);
+ if (err)
+ return err;
+
+ maction->esp_aes_gcm.ib_flags &=
+ ~IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW;
+ maction->esp_aes_gcm.ib_flags |=
+ attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW;
+
+ return 0;
+}
+
+static int mlx5_ib_destroy_flow_action(struct ib_flow_action *action)
+{
+ struct mlx5_ib_flow_action *maction = to_mflow_act(action);
+
+ switch (action->type) {
+ case IB_FLOW_ACTION_ESP:
+ /*
+ * We only support aes_gcm by now, so we implicitly know this is
+ * the underline crypto.
+ */
+ mlx5_accel_esp_destroy_xfrm(maction->esp_aes_gcm.ctx);
+ break;
+ default:
+ WARN_ON(true);
+ break;
+ }
+
+ kfree(maction);
+ return 0;
+}
+
static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
{
struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
@@ -4553,6 +4960,47 @@ static void mlx5_ib_cleanup_multiport_master(struct mlx5_ib_dev *dev)
mlx5_nic_vport_disable_roce(dev->mdev);
}
+ADD_UVERBS_ATTRIBUTES_SIMPLE(mlx5_ib_dm, UVERBS_OBJECT_DM,
+ UVERBS_METHOD_DM_ALLOC,
+ &UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET,
+ UVERBS_ATTR_TYPE(u64),
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+ &UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX,
+ UVERBS_ATTR_TYPE(u16),
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+
+ADD_UVERBS_ATTRIBUTES_SIMPLE(mlx5_ib_flow_action, UVERBS_OBJECT_FLOW_ACTION,
+ UVERBS_METHOD_FLOW_ACTION_ESP_CREATE,
+ &UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS,
+ UVERBS_ATTR_TYPE(u64),
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+
+#define NUM_TREES 2
+static int populate_specs_root(struct mlx5_ib_dev *dev)
+{
+ const struct uverbs_object_tree_def *default_root[NUM_TREES + 1] = {
+ uverbs_default_get_objects()};
+ size_t num_trees = 1;
+
+ if (mlx5_accel_ipsec_device_caps(dev->mdev) & MLX5_ACCEL_IPSEC_CAP_DEVICE &&
+ !WARN_ON(num_trees >= ARRAY_SIZE(default_root)))
+ default_root[num_trees++] = &mlx5_ib_flow_action;
+
+ if (MLX5_CAP_DEV_MEM(dev->mdev, memic) &&
+ !WARN_ON(num_trees >= ARRAY_SIZE(default_root)))
+ default_root[num_trees++] = &mlx5_ib_dm;
+
+ dev->ib_dev.specs_root =
+ uverbs_alloc_spec_tree(num_trees, default_root);
+
+ return PTR_ERR_OR_ZERO(dev->ib_dev.specs_root);
+}
+
+static void depopulate_specs_root(struct mlx5_ib_dev *dev)
+{
+ uverbs_free_spec_tree(dev->ib_dev.specs_root);
+}
+
void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)
{
mlx5_ib_cleanup_multiport_master(dev);
@@ -4616,6 +5064,9 @@ int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
INIT_LIST_HEAD(&dev->qp_list);
spin_lock_init(&dev->reset_flow_resource_lock);
+ spin_lock_init(&dev->memic.memic_lock);
+ dev->memic.dev = mdev;
+
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
err = init_srcu_struct(&dev->mr_srcu);
if (err)
@@ -4778,11 +5229,21 @@ int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
(1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
}
+ if (MLX5_CAP_DEV_MEM(mdev, memic)) {
+ dev->ib_dev.alloc_dm = mlx5_ib_alloc_dm;
+ dev->ib_dev.dealloc_dm = mlx5_ib_dealloc_dm;
+ dev->ib_dev.reg_dm_mr = mlx5_ib_reg_dm_mr;
+ }
+
dev->ib_dev.create_flow = mlx5_ib_create_flow;
dev->ib_dev.destroy_flow = mlx5_ib_destroy_flow;
dev->ib_dev.uverbs_ex_cmd_mask |=
(1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) |
(1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW);
+ dev->ib_dev.create_flow_action_esp = mlx5_ib_create_flow_action_esp;
+ dev->ib_dev.destroy_flow_action = mlx5_ib_destroy_flow_action;
+ dev->ib_dev.modify_flow_action_esp = mlx5_ib_modify_flow_action_esp;
+ dev->ib_dev.driver_id = RDMA_DRIVER_MLX5;
err = init_node_data(dev);
if (err)
@@ -4997,11 +5458,21 @@ void mlx5_ib_stage_bfrag_cleanup(struct mlx5_ib_dev *dev)
mlx5_free_bfreg(dev->mdev, &dev->bfreg);
}
+static int mlx5_ib_stage_populate_specs(struct mlx5_ib_dev *dev)
+{
+ return populate_specs_root(dev);
+}
+
int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev)
{
return ib_register_device(&dev->ib_dev, NULL);
}
+static void mlx5_ib_stage_depopulate_specs(struct mlx5_ib_dev *dev)
+{
+ depopulate_specs_root(dev);
+}
+
void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev)
{
destroy_umrc_res(dev);
@@ -5136,6 +5607,9 @@ static const struct mlx5_ib_profile pf_profile = {
STAGE_CREATE(MLX5_IB_STAGE_PRE_IB_REG_UMR,
NULL,
mlx5_ib_stage_pre_ib_reg_umr_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_SPECS,
+ mlx5_ib_stage_populate_specs,
+ mlx5_ib_stage_depopulate_specs),
STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
mlx5_ib_stage_ib_reg_init,
mlx5_ib_stage_ib_reg_cleanup),
@@ -5181,6 +5655,9 @@ static const struct mlx5_ib_profile nic_rep_profile = {
STAGE_CREATE(MLX5_IB_STAGE_PRE_IB_REG_UMR,
NULL,
mlx5_ib_stage_pre_ib_reg_umr_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_SPECS,
+ mlx5_ib_stage_populate_specs,
+ mlx5_ib_stage_depopulate_specs),
STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
mlx5_ib_stage_ib_reg_init,
mlx5_ib_stage_ib_reg_cleanup),
@@ -5301,13 +5778,32 @@ static struct mlx5_interface mlx5_ib_interface = {
.protocol = MLX5_INTERFACE_PROTOCOL_IB,
};
+unsigned long mlx5_ib_get_xlt_emergency_page(void)
+{
+ mutex_lock(&xlt_emergency_page_mutex);
+ return xlt_emergency_page;
+}
+
+void mlx5_ib_put_xlt_emergency_page(void)
+{
+ mutex_unlock(&xlt_emergency_page_mutex);
+}
+
static int __init mlx5_ib_init(void)
{
int err;
+ xlt_emergency_page = __get_free_page(GFP_KERNEL);
+ if (!xlt_emergency_page)
+ return -ENOMEM;
+
+ mutex_init(&xlt_emergency_page_mutex);
+
mlx5_ib_event_wq = alloc_ordered_workqueue("mlx5_ib_event_wq", 0);
- if (!mlx5_ib_event_wq)
+ if (!mlx5_ib_event_wq) {
+ free_page(xlt_emergency_page);
return -ENOMEM;
+ }
mlx5_ib_odp_init();
@@ -5320,6 +5816,8 @@ static void __exit mlx5_ib_cleanup(void)
{
mlx5_unregister_interface(&mlx5_ib_interface);
destroy_workqueue(mlx5_ib_event_wq);
+ mutex_destroy(&xlt_emergency_page_mutex);
+ free_page(xlt_emergency_page);
}
module_init(mlx5_ib_init);
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index c33bf152..49a1aa0 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -45,6 +45,7 @@
#include <linux/mlx5/transobj.h>
#include <rdma/ib_user_verbs.h>
#include <rdma/mlx5-abi.h>
+#include <rdma/uverbs_ioctl.h>
#define mlx5_ib_dbg(dev, format, arg...) \
pr_debug("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \
@@ -108,6 +109,16 @@ enum {
MLX5_IB_INVALID_BFREG = BIT(31),
};
+enum {
+ MLX5_MAX_MEMIC_PAGES = 0x100,
+ MLX5_MEMIC_ALLOC_SIZE_MASK = 0x3f,
+};
+
+enum {
+ MLX5_MEMIC_BASE_ALIGN = 6,
+ MLX5_MEMIC_BASE_SIZE = 1 << MLX5_MEMIC_BASE_ALIGN,
+};
+
struct mlx5_ib_vma_private_data {
struct list_head list;
struct vm_area_struct *vma;
@@ -130,10 +141,8 @@ struct mlx5_ib_ucontext {
/* protect vma_private_list add/del */
struct mutex vma_private_list_mutex;
- unsigned long upd_xlt_page;
- /* protect ODP/KSM */
- struct mutex upd_xlt_page_mutex;
u64 lib_caps;
+ DECLARE_BITMAP(dm_pages, MLX5_MAX_MEMIC_PAGES);
};
static inline struct mlx5_ib_ucontext *to_mucontext(struct ib_ucontext *ibucontext)
@@ -155,6 +164,7 @@ struct mlx5_ib_pd {
#define MLX5_IB_NUM_FLOW_FT (MLX5_IB_FLOW_LEFTOVERS_PRIO + 1)
#define MLX5_IB_NUM_SNIFFER_FTS 2
+#define MLX5_IB_NUM_EGRESS_FTS 1
struct mlx5_ib_flow_prio {
struct mlx5_flow_table *flow_table;
unsigned int refcount;
@@ -170,6 +180,7 @@ struct mlx5_ib_flow_handler {
struct mlx5_ib_flow_db {
struct mlx5_ib_flow_prio prios[MLX5_IB_NUM_FLOW_FT];
struct mlx5_ib_flow_prio sniffer[MLX5_IB_NUM_SNIFFER_FTS];
+ struct mlx5_ib_flow_prio egress[MLX5_IB_NUM_EGRESS_FTS];
struct mlx5_flow_table *lag_demux_ft;
/* Protect flow steering bypass flow tables
* when add/del flow rules.
@@ -406,7 +417,7 @@ struct mlx5_ib_qp {
struct list_head qps_list;
struct list_head cq_recv_list;
struct list_head cq_send_list;
- u32 rate_limit;
+ struct mlx5_rate_limit rl;
u32 underlay_qpn;
bool tunnel_offload_en;
/* storage for qp sub type when core qp type is IB_QPT_DRIVER */
@@ -522,8 +533,19 @@ enum mlx5_ib_mtt_access_flags {
MLX5_IB_MTT_WRITE = (1 << 1),
};
+struct mlx5_ib_dm {
+ struct ib_dm ibdm;
+ phys_addr_t dev_addr;
+};
+
#define MLX5_IB_MTT_PRESENT (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE)
+#define MLX5_IB_DM_ALLOWED_ACCESS (IB_ACCESS_LOCAL_WRITE |\
+ IB_ACCESS_REMOTE_WRITE |\
+ IB_ACCESS_REMOTE_READ |\
+ IB_ACCESS_REMOTE_ATOMIC |\
+ IB_ZERO_BASED)
+
struct mlx5_ib_mr {
struct ib_mr ibmr;
void *descs;
@@ -743,6 +765,7 @@ enum mlx5_ib_stages {
MLX5_IB_STAGE_UAR,
MLX5_IB_STAGE_BFREG,
MLX5_IB_STAGE_PRE_IB_REG_UMR,
+ MLX5_IB_STAGE_SPECS,
MLX5_IB_STAGE_IB_REG,
MLX5_IB_STAGE_POST_IB_REG_UMR,
MLX5_IB_STAGE_DELAY_DROP,
@@ -774,6 +797,22 @@ struct mlx5_ib_multiport_info {
bool unaffiliate;
};
+struct mlx5_ib_flow_action {
+ struct ib_flow_action ib_action;
+ union {
+ struct {
+ u64 ib_flags;
+ struct mlx5_accel_esp_xfrm *ctx;
+ } esp_aes_gcm;
+ };
+};
+
+struct mlx5_memic {
+ struct mlx5_core_dev *dev;
+ spinlock_t memic_lock;
+ DECLARE_BITMAP(memic_alloc_pages, MLX5_MAX_MEMIC_PAGES);
+};
+
struct mlx5_ib_dev {
struct ib_device ib_dev;
struct mlx5_core_dev *mdev;
@@ -820,6 +859,7 @@ struct mlx5_ib_dev {
u8 umr_fence;
struct list_head ib_dev_list;
u64 sys_image_guid;
+ struct mlx5_memic memic;
};
static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq)
@@ -887,6 +927,11 @@ static inline struct mlx5_ib_srq *to_mibsrq(struct mlx5_core_srq *msrq)
return container_of(msrq, struct mlx5_ib_srq, msrq);
}
+static inline struct mlx5_ib_dm *to_mdm(struct ib_dm *ibdm)
+{
+ return container_of(ibdm, struct mlx5_ib_dm, ibdm);
+}
+
static inline struct mlx5_ib_mr *to_mmr(struct ib_mr *ibmr)
{
return container_of(ibmr, struct mlx5_ib_mr, ibmr);
@@ -897,6 +942,12 @@ static inline struct mlx5_ib_mw *to_mmw(struct ib_mw *ibmw)
return container_of(ibmw, struct mlx5_ib_mw, ibmw);
}
+static inline struct mlx5_ib_flow_action *
+to_mflow_act(struct ib_flow_action *ibact)
+{
+ return container_of(ibact, struct mlx5_ib_flow_action, ib_action);
+}
+
int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt,
struct mlx5_db *db);
void mlx5_ib_db_unmap_user(struct mlx5_ib_ucontext *context, struct mlx5_db *db);
@@ -1025,7 +1076,14 @@ struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device,
struct ib_udata *udata);
int mlx5_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table);
bool mlx5_ib_dc_atomic_is_supported(struct mlx5_ib_dev *dev);
-
+struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev,
+ struct ib_ucontext *context,
+ struct ib_dm_alloc_attr *attr,
+ struct uverbs_attr_bundle *attrs);
+int mlx5_ib_dealloc_dm(struct ib_dm *ibdm);
+struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm,
+ struct ib_dm_mr_attr *attr,
+ struct uverbs_attr_bundle *attrs);
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev);
@@ -1221,4 +1279,7 @@ static inline int get_num_static_uars(struct mlx5_ib_dev *dev,
return get_uars_per_sys_page(dev, bfregi->lib_uar_4k) * bfregi->num_static_sys_pages;
}
+unsigned long mlx5_ib_get_xlt_emergency_page(void);
+void mlx5_ib_put_xlt_emergency_page(void);
+
#endif /* MLX5_IB_H */
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 654bc31..1520a2f 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -47,10 +47,25 @@ enum {
#define MLX5_UMR_ALIGN 2048
-static int clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
-static int dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
+static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
+static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
static int mr_cache_max_order(struct mlx5_ib_dev *dev);
static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
+static bool umr_can_modify_entity_size(struct mlx5_ib_dev *dev)
+{
+ return !MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled);
+}
+
+static bool umr_can_use_indirect_mkey(struct mlx5_ib_dev *dev)
+{
+ return !MLX5_CAP_GEN(dev->mdev, umr_indirect_mkey_disabled);
+}
+
+static bool use_umr(struct mlx5_ib_dev *dev, int order)
+{
+ return order <= mr_cache_max_order(dev) &&
+ umr_can_modify_entity_size(dev);
+}
static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
{
@@ -189,7 +204,9 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
MLX5_SET(mkc, mkc, free, 1);
MLX5_SET(mkc, mkc, umr_en, 1);
- MLX5_SET(mkc, mkc, access_mode, ent->access_mode);
+ MLX5_SET(mkc, mkc, access_mode_1_0, ent->access_mode & 0x3);
+ MLX5_SET(mkc, mkc, access_mode_4_2,
+ (ent->access_mode >> 2) & 0x7);
MLX5_SET(mkc, mkc, qpn, 0xffffff);
MLX5_SET(mkc, mkc, translations_octword_size, ent->xlt);
@@ -220,26 +237,32 @@ static void remove_keys(struct mlx5_ib_dev *dev, int c, int num)
{
struct mlx5_mr_cache *cache = &dev->cache;
struct mlx5_cache_ent *ent = &cache->ent[c];
+ struct mlx5_ib_mr *tmp_mr;
struct mlx5_ib_mr *mr;
- int err;
+ LIST_HEAD(del_list);
int i;
for (i = 0; i < num; i++) {
spin_lock_irq(&ent->lock);
if (list_empty(&ent->head)) {
spin_unlock_irq(&ent->lock);
- return;
+ break;
}
mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
- list_del(&mr->list);
+ list_move(&mr->list, &del_list);
ent->cur--;
ent->size--;
spin_unlock_irq(&ent->lock);
- err = destroy_mkey(dev, mr);
- if (err)
- mlx5_ib_warn(dev, "failed destroy mkey\n");
- else
- kfree(mr);
+ mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
+ }
+
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+ synchronize_srcu(&dev->mr_srcu);
+#endif
+
+ list_for_each_entry_safe(mr, tmp_mr, &del_list, list) {
+ list_del(&mr->list);
+ kfree(mr);
}
}
@@ -562,26 +585,32 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c)
{
struct mlx5_mr_cache *cache = &dev->cache;
struct mlx5_cache_ent *ent = &cache->ent[c];
+ struct mlx5_ib_mr *tmp_mr;
struct mlx5_ib_mr *mr;
- int err;
+ LIST_HEAD(del_list);
cancel_delayed_work(&ent->dwork);
while (1) {
spin_lock_irq(&ent->lock);
if (list_empty(&ent->head)) {
spin_unlock_irq(&ent->lock);
- return;
+ break;
}
mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
- list_del(&mr->list);
+ list_move(&mr->list, &del_list);
ent->cur--;
ent->size--;
spin_unlock_irq(&ent->lock);
- err = destroy_mkey(dev, mr);
- if (err)
- mlx5_ib_warn(dev, "failed destroy mkey\n");
- else
- kfree(mr);
+ mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
+ }
+
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+ synchronize_srcu(&dev->mr_srcu);
+#endif
+
+ list_for_each_entry_safe(mr, tmp_mr, &del_list, list) {
+ list_del(&mr->list);
+ kfree(mr);
}
}
@@ -780,7 +809,7 @@ struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
- MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_PA);
+ MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA);
MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC));
MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE));
MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ));
@@ -947,7 +976,10 @@ static inline int populate_xlt(struct mlx5_ib_mr *mr, int idx, int npages,
{
struct mlx5_ib_dev *dev = mr->dev;
struct ib_umem *umem = mr->umem;
+
if (flags & MLX5_IB_UPD_XLT_INDIRECT) {
+ if (!umr_can_use_indirect_mkey(dev))
+ return -EPERM;
mlx5_odp_populate_klm(xlt, idx, npages, mr, flags);
return npages;
}
@@ -977,7 +1009,6 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
{
struct mlx5_ib_dev *dev = mr->dev;
struct device *ddev = dev->ib_dev.dev.parent;
- struct mlx5_ib_ucontext *uctx = NULL;
int size;
void *xlt;
dma_addr_t dma;
@@ -993,6 +1024,11 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
size_t pages_to_map = 0;
size_t pages_iter = 0;
gfp_t gfp;
+ bool use_emergency_page = false;
+
+ if ((flags & MLX5_IB_UPD_XLT_INDIRECT) &&
+ !umr_can_use_indirect_mkey(dev))
+ return -EPERM;
/* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes,
* so we need to align the offset and length accordingly
@@ -1019,12 +1055,11 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
}
if (!xlt) {
- uctx = to_mucontext(mr->ibmr.pd->uobject->context);
mlx5_ib_warn(dev, "Using XLT emergency buffer\n");
+ xlt = (void *)mlx5_ib_get_xlt_emergency_page();
size = PAGE_SIZE;
- xlt = (void *)uctx->upd_xlt_page;
- mutex_lock(&uctx->upd_xlt_page_mutex);
memset(xlt, 0, size);
+ use_emergency_page = true;
}
pages_iter = size / desc_size;
dma = dma_map_single(ddev, xlt, size, DMA_TO_DEVICE);
@@ -1088,8 +1123,8 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE);
free_xlt:
- if (uctx)
- mutex_unlock(&uctx->upd_xlt_page_mutex);
+ if (use_emergency_page)
+ mlx5_ib_put_xlt_emergency_page();
else
free_pages((unsigned long)xlt, get_order(size));
@@ -1141,7 +1176,7 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
MLX5_SET(mkc, mkc, free, !populate);
- MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_MTT);
+ MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT);
MLX5_SET(mkc, mkc, a, !!(access_flags & IB_ACCESS_REMOTE_ATOMIC));
MLX5_SET(mkc, mkc, rw, !!(access_flags & IB_ACCESS_REMOTE_WRITE));
MLX5_SET(mkc, mkc, rr, !!(access_flags & IB_ACCESS_REMOTE_READ));
@@ -1197,22 +1232,96 @@ static void set_mr_fileds(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
mr->access_flags = access_flags;
}
+static struct ib_mr *mlx5_ib_get_memic_mr(struct ib_pd *pd, u64 memic_addr,
+ u64 length, int acc)
+{
+ struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
+ struct mlx5_core_dev *mdev = dev->mdev;
+ struct mlx5_ib_mr *mr;
+ void *mkc;
+ u32 *in;
+ int err;
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ in = kzalloc(inlen, GFP_KERNEL);
+ if (!in) {
+ err = -ENOMEM;
+ goto err_free;
+ }
+
+ mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+
+ MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MEMIC & 0x3);
+ MLX5_SET(mkc, mkc, access_mode_4_2,
+ (MLX5_MKC_ACCESS_MODE_MEMIC >> 2) & 0x7);
+ MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC));
+ MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE));
+ MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ));
+ MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE));
+ MLX5_SET(mkc, mkc, lr, 1);
+
+ MLX5_SET64(mkc, mkc, len, length);
+ MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
+ MLX5_SET(mkc, mkc, qpn, 0xffffff);
+ MLX5_SET64(mkc, mkc, start_addr,
+ memic_addr - pci_resource_start(dev->mdev->pdev, 0));
+
+ err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, inlen);
+ if (err)
+ goto err_in;
+
+ kfree(in);
+
+ mr->umem = NULL;
+ set_mr_fileds(dev, mr, 0, length, acc);
+
+ return &mr->ibmr;
+
+err_in:
+ kfree(in);
+
+err_free:
+ kfree(mr);
+
+ return ERR_PTR(err);
+}
+
+struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm,
+ struct ib_dm_mr_attr *attr,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_ib_dm *mdm = to_mdm(dm);
+ u64 memic_addr;
+
+ if (attr->access_flags & ~MLX5_IB_DM_ALLOWED_ACCESS)
+ return ERR_PTR(-EINVAL);
+
+ memic_addr = mdm->dev_addr + attr->offset;
+
+ return mlx5_ib_get_memic_mr(pd, memic_addr, attr->length,
+ attr->access_flags);
+}
+
struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int access_flags,
struct ib_udata *udata)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct mlx5_ib_mr *mr = NULL;
+ bool populate_mtts = false;
struct ib_umem *umem;
int page_shift;
int npages;
int ncont;
int order;
int err;
- bool use_umr = true;
if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM))
- return ERR_PTR(-EINVAL);
+ return ERR_PTR(-EOPNOTSUPP);
mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
start, virt_addr, length, access_flags);
@@ -1224,6 +1333,8 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
return ERR_PTR(-EINVAL);
mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), access_flags);
+ if (IS_ERR(mr))
+ return ERR_CAST(mr);
return &mr->ibmr;
}
#endif
@@ -1234,26 +1345,29 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
if (err < 0)
return ERR_PTR(err);
- if (order <= mr_cache_max_order(dev)) {
+ if (use_umr(dev, order)) {
mr = alloc_mr_from_cache(pd, umem, virt_addr, length, ncont,
page_shift, order, access_flags);
if (PTR_ERR(mr) == -EAGAIN) {
mlx5_ib_dbg(dev, "cache empty for order %d\n", order);
mr = NULL;
}
+ populate_mtts = false;
} else if (!MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset)) {
if (access_flags & IB_ACCESS_ON_DEMAND) {
err = -EINVAL;
pr_err("Got MR registration for ODP MR > 512MB, not supported for Connect-IB\n");
goto error;
}
- use_umr = false;
+ populate_mtts = true;
}
if (!mr) {
+ if (!umr_can_modify_entity_size(dev))
+ populate_mtts = true;
mutex_lock(&dev->slow_path_mutex);
mr = reg_create(NULL, pd, virt_addr, length, umem, ncont,
- page_shift, access_flags, !use_umr);
+ page_shift, access_flags, populate_mtts);
mutex_unlock(&dev->slow_path_mutex);
}
@@ -1271,7 +1385,7 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
update_odp_mr(mr);
#endif
- if (use_umr) {
+ if (!populate_mtts) {
int update_xlt_flags = MLX5_IB_UPD_XLT_ENABLE;
if (access_flags & IB_ACCESS_ON_DEMAND)
@@ -1286,7 +1400,9 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
}
}
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
mr->live = 1;
+#endif
return &mr->ibmr;
error:
ib_umem_release(umem);
@@ -1365,36 +1481,34 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
ib_umem_release(mr->umem);
err = mr_umem_get(pd, addr, len, access_flags, &mr->umem,
&npages, &page_shift, &ncont, &order);
- if (err < 0) {
- clean_mr(dev, mr);
- return err;
- }
+ if (err)
+ goto err;
}
if (flags & IB_MR_REREG_TRANS && !use_umr_mtt_update(mr, addr, len)) {
/*
* UMR can't be used - MKey needs to be replaced.
*/
- if (mr->allocated_from_cache) {
+ if (mr->allocated_from_cache)
err = unreg_umr(dev, mr);
- if (err)
- mlx5_ib_warn(dev, "Failed to unregister MR\n");
- } else {
+ else
err = destroy_mkey(dev, mr);
- if (err)
- mlx5_ib_warn(dev, "Failed to destroy MKey\n");
- }
if (err)
- return err;
+ goto err;
mr = reg_create(ib_mr, pd, addr, len, mr->umem, ncont,
page_shift, access_flags, true);
- if (IS_ERR(mr))
- return PTR_ERR(mr);
+ if (IS_ERR(mr)) {
+ err = PTR_ERR(mr);
+ mr = to_mmr(ib_mr);
+ goto err;
+ }
mr->allocated_from_cache = 0;
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
mr->live = 1;
+#endif
} else {
/*
* Send a UMR WQE
@@ -1417,13 +1531,8 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
err = rereg_umr(pd, mr, access_flags, flags);
}
- if (err) {
- mlx5_ib_warn(dev, "Failed to rereg UMR\n");
- ib_umem_release(mr->umem);
- mr->umem = NULL;
- clean_mr(dev, mr);
- return err;
- }
+ if (err)
+ goto err;
}
set_mr_fileds(dev, mr, npages, len, access_flags);
@@ -1432,6 +1541,14 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
update_odp_mr(mr);
#endif
return 0;
+
+err:
+ if (mr->umem) {
+ ib_umem_release(mr->umem);
+ mr->umem = NULL;
+ }
+ clean_mr(dev, mr);
+ return err;
}
static int
@@ -1480,10 +1597,9 @@ mlx5_free_priv_descs(struct mlx5_ib_mr *mr)
}
}
-static int clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
+static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
{
int allocated_from_cache = mr->allocated_from_cache;
- int err;
if (mr->sig) {
if (mlx5_core_destroy_psv(dev->mdev,
@@ -1500,21 +1616,11 @@ static int clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
mlx5_free_priv_descs(mr);
- if (!allocated_from_cache) {
- u32 key = mr->mmkey.key;
-
- err = destroy_mkey(dev, mr);
- if (err) {
- mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n",
- key, err);
- return err;
- }
- }
-
- return 0;
+ if (!allocated_from_cache)
+ destroy_mkey(dev, mr);
}
-static int dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
+static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
{
int npages = mr->npages;
struct ib_umem *umem = mr->umem;
@@ -1555,16 +1661,12 @@ static int dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
kfree(mr);
else
mlx5_mr_cache_free(dev, mr);
-
- return 0;
}
int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
{
- struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
- struct mlx5_ib_mr *mr = to_mmr(ibmr);
-
- return dereg_mr(dev, mr);
+ dereg_mr(to_mdev(ibmr->device), to_mmr(ibmr));
+ return 0;
}
struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
@@ -1645,7 +1747,8 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
goto err_free_in;
}
- MLX5_SET(mkc, mkc, access_mode, mr->access_mode);
+ MLX5_SET(mkc, mkc, access_mode_1_0, mr->access_mode & 0x3);
+ MLX5_SET(mkc, mkc, access_mode_4_2, (mr->access_mode >> 2) & 0x7);
MLX5_SET(mkc, mkc, umr_en, 1);
mr->ibmr.device = pd->device;
@@ -1726,7 +1829,7 @@ struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
MLX5_SET(mkc, mkc, umr_en, 1);
MLX5_SET(mkc, mkc, lr, 1);
- MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_KLMS);
+ MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_KLMS);
MLX5_SET(mkc, mkc, en_rinval, !!((type == IB_MW_TYPE_2)));
MLX5_SET(mkc, mkc, qpn, 0xffffff);
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 0d0b0b8..7ed4b70 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -86,7 +86,9 @@ struct mlx5_modify_raw_qp_param {
u16 operation;
u32 set_mask; /* raw_qp_set_mask_map */
- u32 rate_limit;
+
+ struct mlx5_rate_limit rl;
+
u8 rq_q_ctr_id;
};
@@ -878,7 +880,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
goto err_free;
}
- err = ib_copy_to_udata(udata, resp, sizeof(*resp));
+ err = ib_copy_to_udata(udata, resp, min(udata->outlen, sizeof(*resp)));
if (err) {
mlx5_ib_dbg(dev, "copy failed\n");
goto err_unmap;
@@ -1411,6 +1413,7 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
void *tirc;
void *hfso;
u32 selected_fields = 0;
+ u32 outer_l4;
size_t min_resp_len;
u32 tdn = mucontext->tdn;
struct mlx5_ib_create_qp_rss ucmd = {};
@@ -1466,7 +1469,7 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
return -EOPNOTSUPP;
}
- err = ib_copy_to_udata(udata, &resp, min_resp_len);
+ err = ib_copy_to_udata(udata, &resp, min(udata->outlen, sizeof(resp)));
if (err) {
mlx5_ib_dbg(dev, "copy failed\n");
return -EINVAL;
@@ -1541,10 +1544,14 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
MLX5_L3_PROT_TYPE_IPV6);
- if (((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_TCP) ||
- (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_TCP)) &&
- ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_UDP) ||
- (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP))) {
+ outer_l4 = ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_TCP) ||
+ (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_TCP)) << 0 |
+ ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_UDP) ||
+ (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP)) << 1 |
+ (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_IPSEC_SPI) << 2;
+
+ /* Check that only one l4 protocol is set */
+ if (outer_l4 & (outer_l4 - 1)) {
err = -EINVAL;
goto err;
}
@@ -1575,6 +1582,9 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
(ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP))
selected_fields |= MLX5_HASH_FIELD_SEL_L4_DPORT;
+ if (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_IPSEC_SPI)
+ selected_fields |= MLX5_HASH_FIELD_SEL_IPSEC_SPI;
+
MLX5_SET(rx_hash_field_select, hfso, selected_fields, selected_fields);
create_tir:
@@ -2774,8 +2784,9 @@ static int modify_raw_packet_qp_sq(struct mlx5_core_dev *dev,
const struct mlx5_modify_raw_qp_param *raw_qp_param)
{
struct mlx5_ib_qp *ibqp = sq->base.container_mibqp;
- u32 old_rate = ibqp->rate_limit;
- u32 new_rate = old_rate;
+ struct mlx5_rate_limit old_rl = ibqp->rl;
+ struct mlx5_rate_limit new_rl = old_rl;
+ bool new_rate_added = false;
u16 rl_index = 0;
void *in;
void *sqc;
@@ -2797,39 +2808,43 @@ static int modify_raw_packet_qp_sq(struct mlx5_core_dev *dev,
pr_warn("%s: Rate limit can only be changed when SQ is moving to RDY\n",
__func__);
else
- new_rate = raw_qp_param->rate_limit;
+ new_rl = raw_qp_param->rl;
}
- if (old_rate != new_rate) {
- if (new_rate) {
- err = mlx5_rl_add_rate(dev, new_rate, &rl_index);
+ if (!mlx5_rl_are_equal(&old_rl, &new_rl)) {
+ if (new_rl.rate) {
+ err = mlx5_rl_add_rate(dev, &rl_index, &new_rl);
if (err) {
- pr_err("Failed configuring rate %u: %d\n",
- new_rate, err);
+ pr_err("Failed configuring rate limit(err %d): \
+ rate %u, max_burst_sz %u, typical_pkt_sz %u\n",
+ err, new_rl.rate, new_rl.max_burst_sz,
+ new_rl.typical_pkt_sz);
+
goto out;
}
+ new_rate_added = true;
}
MLX5_SET64(modify_sq_in, in, modify_bitmask, 1);
+ /* index 0 means no limit */
MLX5_SET(sqc, sqc, packet_pacing_rate_limit_index, rl_index);
}
err = mlx5_core_modify_sq(dev, sq->base.mqp.qpn, in, inlen);
if (err) {
/* Remove new rate from table if failed */
- if (new_rate &&
- old_rate != new_rate)
- mlx5_rl_remove_rate(dev, new_rate);
+ if (new_rate_added)
+ mlx5_rl_remove_rate(dev, &new_rl);
goto out;
}
/* Only remove the old rate after new rate was set */
- if ((old_rate &&
- (old_rate != new_rate)) ||
+ if ((old_rl.rate &&
+ !mlx5_rl_are_equal(&old_rl, &new_rl)) ||
(new_state != MLX5_SQC_STATE_RDY))
- mlx5_rl_remove_rate(dev, old_rate);
+ mlx5_rl_remove_rate(dev, &old_rl);
- ibqp->rate_limit = new_rate;
+ ibqp->rl = new_rl;
sq->state = new_state;
out:
@@ -2906,7 +2921,8 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
const struct ib_qp_attr *attr, int attr_mask,
- enum ib_qp_state cur_state, enum ib_qp_state new_state)
+ enum ib_qp_state cur_state, enum ib_qp_state new_state,
+ const struct mlx5_ib_modify_qp *ucmd)
{
static const u16 optab[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE] = {
[MLX5_QP_STATE_RST] = {
@@ -2959,18 +2975,16 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
u16 op;
u8 tx_affinity = 0;
+ mlx5_st = to_mlx5_st(ibqp->qp_type == IB_QPT_DRIVER ?
+ qp->qp_sub_type : ibqp->qp_type);
+ if (mlx5_st < 0)
+ return -EINVAL;
+
context = kzalloc(sizeof(*context), GFP_KERNEL);
if (!context)
return -ENOMEM;
- err = to_mlx5_st(ibqp->qp_type == IB_QPT_DRIVER ?
- qp->qp_sub_type : ibqp->qp_type);
- if (err < 0) {
- mlx5_ib_dbg(dev, "unsupported qp type %d\n", ibqp->qp_type);
- goto out;
- }
-
- context->flags = cpu_to_be32(err << 16);
+ context->flags = cpu_to_be32(mlx5_st << 16);
if (!(attr_mask & IB_QP_PATH_MIG_STATE)) {
context->flags |= cpu_to_be32(MLX5_QP_PM_MIGRATED << 11);
@@ -3124,10 +3138,6 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
mlx5_cur = to_mlx5_state(cur_state);
mlx5_new = to_mlx5_state(new_state);
- mlx5_st = to_mlx5_st(ibqp->qp_type == IB_QPT_DRIVER ?
- qp->qp_sub_type : ibqp->qp_type);
- if (mlx5_st < 0)
- goto out;
if (mlx5_cur >= MLX5_QP_NUM_STATE || mlx5_new >= MLX5_QP_NUM_STATE ||
!optab[mlx5_cur][mlx5_new]) {
@@ -3150,7 +3160,30 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
}
if (attr_mask & IB_QP_RATE_LIMIT) {
- raw_qp_param.rate_limit = attr->rate_limit;
+ raw_qp_param.rl.rate = attr->rate_limit;
+
+ if (ucmd->burst_info.max_burst_sz) {
+ if (attr->rate_limit &&
+ MLX5_CAP_QOS(dev->mdev, packet_pacing_burst_bound)) {
+ raw_qp_param.rl.max_burst_sz =
+ ucmd->burst_info.max_burst_sz;
+ } else {
+ err = -EINVAL;
+ goto out;
+ }
+ }
+
+ if (ucmd->burst_info.typical_pkt_sz) {
+ if (attr->rate_limit &&
+ MLX5_CAP_QOS(dev->mdev, packet_pacing_typical_size)) {
+ raw_qp_param.rl.typical_pkt_sz =
+ ucmd->burst_info.typical_pkt_sz;
+ } else {
+ err = -EINVAL;
+ goto out;
+ }
+ }
+
raw_qp_param.set_mask |= MLX5_RAW_QP_RATE_LIMIT;
}
@@ -3178,7 +3211,8 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
* If we moved a kernel QP to RESET, clean up all old CQ
* entries and reinitialize the QP.
*/
- if (new_state == IB_QPS_RESET && !ibqp->uobject) {
+ if (new_state == IB_QPS_RESET &&
+ !ibqp->uobject && ibqp->qp_type != IB_QPT_XRC_TGT) {
mlx5_ib_cq_clean(recv_cq, base->mqp.qpn,
ibqp->srq ? to_msrq(ibqp->srq) : NULL);
if (send_cq != recv_cq)
@@ -3337,8 +3371,10 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
{
struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
struct mlx5_ib_qp *qp = to_mqp(ibqp);
+ struct mlx5_ib_modify_qp ucmd = {};
enum ib_qp_type qp_type;
enum ib_qp_state cur_state, new_state;
+ size_t required_cmd_sz;
int err = -EINVAL;
int port;
enum rdma_link_layer ll = IB_LINK_LAYER_UNSPECIFIED;
@@ -3346,6 +3382,28 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
if (ibqp->rwq_ind_tbl)
return -ENOSYS;
+ if (udata && udata->inlen) {
+ required_cmd_sz = offsetof(typeof(ucmd), reserved) +
+ sizeof(ucmd.reserved);
+ if (udata->inlen < required_cmd_sz)
+ return -EINVAL;
+
+ if (udata->inlen > sizeof(ucmd) &&
+ !ib_is_udata_cleared(udata, sizeof(ucmd),
+ udata->inlen - sizeof(ucmd)))
+ return -EOPNOTSUPP;
+
+ if (ib_copy_from_udata(&ucmd, udata,
+ min(udata->inlen, sizeof(ucmd))))
+ return -EFAULT;
+
+ if (ucmd.comp_mask ||
+ memchr_inv(&ucmd.reserved, 0, sizeof(ucmd.reserved)) ||
+ memchr_inv(&ucmd.burst_info.reserved, 0,
+ sizeof(ucmd.burst_info.reserved)))
+ return -EOPNOTSUPP;
+ }
+
if (unlikely(ibqp->qp_type == IB_QPT_GSI))
return mlx5_ib_gsi_modify_qp(ibqp, attr, attr_mask);
@@ -3426,7 +3484,8 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
goto out;
}
- err = __mlx5_ib_modify_qp(ibqp, attr, attr_mask, cur_state, new_state);
+ err = __mlx5_ib_modify_qp(ibqp, attr, attr_mask, cur_state,
+ new_state, &ucmd);
out:
mutex_unlock(&qp->mutex);
@@ -3646,8 +3705,19 @@ static __be64 get_umr_update_pd_mask(void)
return cpu_to_be64(result);
}
-static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
- struct ib_send_wr *wr, int atomic)
+static int umr_check_mkey_mask(struct mlx5_ib_dev *dev, u64 mask)
+{
+ if ((mask & MLX5_MKEY_MASK_PAGE_SIZE &&
+ MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled)) ||
+ (mask & MLX5_MKEY_MASK_A &&
+ MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled)))
+ return -EPERM;
+ return 0;
+}
+
+static int set_reg_umr_segment(struct mlx5_ib_dev *dev,
+ struct mlx5_wqe_umr_ctrl_seg *umr,
+ struct ib_send_wr *wr, int atomic)
{
struct mlx5_umr_wr *umrwr = umr_wr(wr);
@@ -3679,6 +3749,8 @@ static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
if (!wr->num_sge)
umr->flags |= MLX5_UMR_INLINE;
+
+ return umr_check_mkey_mask(dev, be64_to_cpu(umr->mkey_mask));
}
static u8 get_umr_flags(int acc)
@@ -4501,7 +4573,9 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
}
qp->sq.wr_data[idx] = MLX5_IB_WR_UMR;
ctrl->imm = cpu_to_be32(umr_wr(wr)->mkey);
- set_reg_umr_segment(seg, wr, !!(MLX5_CAP_GEN(mdev, atomic)));
+ err = set_reg_umr_segment(dev, seg, wr, !!(MLX5_CAP_GEN(mdev, atomic)));
+ if (unlikely(err))
+ goto out;
seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
if (unlikely((seg == qend)))
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 6fee779..541f237 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -1295,6 +1295,7 @@ int mthca_register_device(struct mthca_dev *dev)
mutex_init(&dev->cap_mask_mutex);
+ dev->ib_dev.driver_id = RDMA_DRIVER_MTHCA;
ret = ib_register_device(&dev->ib_dev, NULL);
if (ret)
return ret;
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index 162475a..1040a6e 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -3854,6 +3854,7 @@ int nes_register_ofa_device(struct nes_ib_device *nesibdev)
struct nes_adapter *nesadapter = nesdev->nesadapter;
int i, ret;
+ nesvnic->nesibdev->ibdev.driver_id = RDMA_DRIVER_NES;
ret = ib_register_device(&nesvnic->nesibdev->ibdev, NULL);
if (ret) {
return ret;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
index dec6509..3897b645 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
@@ -193,11 +193,9 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr,
__func__, status);
goto av_conf_err;
}
- if (sgid_attr.ndev) {
- if (is_vlan_dev(sgid_attr.ndev))
- vlan_tag = vlan_dev_vlan_id(sgid_attr.ndev);
- dev_put(sgid_attr.ndev);
- }
+ if (is_vlan_dev(sgid_attr.ndev))
+ vlan_tag = vlan_dev_vlan_id(sgid_attr.ndev);
+ dev_put(sgid_attr.ndev);
/* Get network header type for this GID */
ah->hdr_type = ib_gid_to_network_type(sgid_attr.gid_type, &sgid);
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
index 9904918..2c260e1 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
@@ -2014,7 +2014,7 @@ static int ocrdma_mbx_reg_mr_cont(struct ocrdma_dev *dev,
struct ocrdma_hw_mr *hwmr, u32 pbl_cnt,
u32 pbl_offset, u32 last)
{
- int status = -ENOMEM;
+ int status;
int i;
struct ocrdma_reg_nsmr_cont *cmd;
@@ -2033,9 +2033,7 @@ static int ocrdma_mbx_reg_mr_cont(struct ocrdma_dev *dev,
upper_32_bits(hwmr->pbl_table[i + pbl_offset].pa);
}
status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
- if (status)
- goto mbx_err;
-mbx_err:
+
kfree(cmd);
return status;
}
@@ -2496,7 +2494,7 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp,
{
int status;
struct rdma_ah_attr *ah_attr = &attrs->ah_attr;
- union ib_gid sgid, zgid;
+ union ib_gid sgid;
struct ib_gid_attr sgid_attr;
u32 vlan_id = 0xFFFF;
u8 mac_addr[6], hdr_type;
@@ -2529,16 +2527,12 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp,
status = ib_get_cached_gid(&dev->ibdev, 1, grh->sgid_index,
&sgid, &sgid_attr);
- if (!status && sgid_attr.ndev) {
+ if (!status) {
vlan_id = rdma_vlan_dev_vlan_id(sgid_attr.ndev);
memcpy(mac_addr, sgid_attr.ndev->dev_addr, ETH_ALEN);
dev_put(sgid_attr.ndev);
}
- memset(&zgid, 0, sizeof(zgid));
- if (!memcmp(&sgid, &zgid, sizeof(zgid)))
- return -EINVAL;
-
qp->sgid_idx = grh->sgid_index;
memcpy(&cmd->params.sgid[0], &sgid.raw[0], sizeof(cmd->params.sgid));
status = ocrdma_resolve_dmac(dev, ah_attr, &mac_addr[0]);
@@ -3133,12 +3127,12 @@ done:
static int ocrdma_mbx_modify_eqd(struct ocrdma_dev *dev, struct ocrdma_eq *eq,
int num)
{
- int i, status = -ENOMEM;
+ int i, status;
struct ocrdma_modify_eqd_req *cmd;
cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_MODIFY_EQ_DELAY, sizeof(*cmd));
if (!cmd)
- return status;
+ return -ENOMEM;
ocrdma_init_mch(&cmd->cmd.req, OCRDMA_CMD_MODIFY_EQ_DELAY,
OCRDMA_SUBSYS_COMMON, sizeof(*cmd));
@@ -3151,9 +3145,7 @@ static int ocrdma_mbx_modify_eqd(struct ocrdma_dev *dev, struct ocrdma_eq *eq,
(eq[i].aic_obj.prev_eqd * 65)/100;
}
status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
- if (status)
- goto mbx_err;
-mbx_err:
+
kfree(cmd);
return status;
}
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
index fbfbd9e..eb8b6a9 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
@@ -158,10 +158,7 @@ static int ocrdma_register_device(struct ocrdma_dev *dev)
dev->ibdev.query_device = ocrdma_query_device;
dev->ibdev.query_port = ocrdma_query_port;
dev->ibdev.modify_port = ocrdma_modify_port;
- dev->ibdev.query_gid = ocrdma_query_gid;
dev->ibdev.get_netdev = ocrdma_get_netdev;
- dev->ibdev.add_gid = ocrdma_add_gid;
- dev->ibdev.del_gid = ocrdma_del_gid;
dev->ibdev.get_link_layer = ocrdma_link_layer;
dev->ibdev.alloc_pd = ocrdma_alloc_pd;
dev->ibdev.dealloc_pd = ocrdma_dealloc_pd;
@@ -217,6 +214,7 @@ static int ocrdma_register_device(struct ocrdma_dev *dev)
dev->ibdev.destroy_srq = ocrdma_destroy_srq;
dev->ibdev.post_srq_recv = ocrdma_post_srq_recv;
}
+ dev->ibdev.driver_id = RDMA_DRIVER_OCRDMA;
return ib_register_device(&dev->ibdev, NULL);
}
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index 8009bda..784ed6b 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -62,40 +62,6 @@ int ocrdma_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
return 0;
}
-int ocrdma_query_gid(struct ib_device *ibdev, u8 port,
- int index, union ib_gid *sgid)
-{
- int ret;
-
- memset(sgid, 0, sizeof(*sgid));
- if (index >= OCRDMA_MAX_SGID)
- return -EINVAL;
-
- ret = ib_get_cached_gid(ibdev, port, index, sgid, NULL);
- if (ret == -EAGAIN) {
- memcpy(sgid, &zgid, sizeof(*sgid));
- return 0;
- }
-
- return ret;
-}
-
-int ocrdma_add_gid(struct ib_device *device,
- u8 port_num,
- unsigned int index,
- const union ib_gid *gid,
- const struct ib_gid_attr *attr,
- void **context) {
- return 0;
-}
-
-int ocrdma_del_gid(struct ib_device *device,
- u8 port_num,
- unsigned int index,
- void **context) {
- return 0;
-}
-
int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr,
struct ib_udata *uhw)
{
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
index 704ef1e..9a99717 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
@@ -61,19 +61,7 @@ enum rdma_protocol_type
ocrdma_query_protocol(struct ib_device *device, u8 port_num);
void ocrdma_get_guid(struct ocrdma_dev *, u8 *guid);
-int ocrdma_query_gid(struct ib_device *, u8 port,
- int index, union ib_gid *gid);
struct net_device *ocrdma_get_netdev(struct ib_device *device, u8 port_num);
-int ocrdma_add_gid(struct ib_device *device,
- u8 port_num,
- unsigned int index,
- const union ib_gid *gid,
- const struct ib_gid_attr *attr,
- void **context);
-int ocrdma_del_gid(struct ib_device *device,
- u8 port_num,
- unsigned int index,
- void **context);
int ocrdma_query_pkey(struct ib_device *, u8 port, u16 index, u16 *pkey);
struct ib_ucontext *ocrdma_alloc_ucontext(struct ib_device *,
diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c
index f9a645c..f4cb60b 100644
--- a/drivers/infiniband/hw/qedr/main.c
+++ b/drivers/infiniband/hw/qedr/main.c
@@ -162,10 +162,6 @@ static int qedr_iw_register_device(struct qedr_dev *dev)
static void qedr_roce_register_device(struct qedr_dev *dev)
{
dev->ibdev.node_type = RDMA_NODE_IB_CA;
- dev->ibdev.query_gid = qedr_query_gid;
-
- dev->ibdev.add_gid = qedr_add_gid;
- dev->ibdev.del_gid = qedr_del_gid;
dev->ibdev.get_port_immutable = qedr_roce_port_immutable;
}
@@ -257,6 +253,7 @@ static int qedr_register_device(struct qedr_dev *dev)
dev->ibdev.get_link_layer = qedr_link_layer;
dev->ibdev.get_dev_fw_str = qedr_get_dev_fw_str;
+ dev->ibdev.driver_id = RDMA_DRIVER_QEDR;
return ib_register_device(&dev->ibdev, NULL);
}
@@ -707,7 +704,7 @@ static void qedr_affiliated_event(void *context, u8 e_code, void *fw_handle)
"Error: CQ event with NULL pointer ibcq. Handle=%llx\n",
roce_handle64);
}
- DP_ERR(dev, "CQ event %d on hanlde %p\n", e_code, cq);
+ DP_ERR(dev, "CQ event %d on handle %p\n", e_code, cq);
break;
case EVENT_TYPE_QP:
qp = (struct qedr_qp *)(uintptr_t)roce_handle64;
@@ -723,7 +720,7 @@ static void qedr_affiliated_event(void *context, u8 e_code, void *fw_handle)
"Error: QP event with NULL pointer ibqp. Handle=%llx\n",
roce_handle64);
}
- DP_ERR(dev, "QP event %d on hanlde %p\n", e_code, qp);
+ DP_ERR(dev, "QP event %d on handle %p\n", e_code, qp);
break;
default:
break;
diff --git a/drivers/infiniband/hw/qedr/qedr_roce_cm.c b/drivers/infiniband/hw/qedr/qedr_roce_cm.c
index 2bdbb12..0f14e68 100644
--- a/drivers/infiniband/hw/qedr/qedr_roce_cm.c
+++ b/drivers/infiniband/hw/qedr/qedr_roce_cm.c
@@ -412,19 +412,11 @@ static inline int qedr_gsi_build_header(struct qedr_dev *dev,
return rc;
}
- if (sgid_attr.ndev) {
- vlan_id = rdma_vlan_dev_vlan_id(sgid_attr.ndev);
- if (vlan_id < VLAN_CFI_MASK)
- has_vlan = true;
+ vlan_id = rdma_vlan_dev_vlan_id(sgid_attr.ndev);
+ if (vlan_id < VLAN_CFI_MASK)
+ has_vlan = true;
- dev_put(sgid_attr.ndev);
- }
-
- if (!memcmp(&sgid, &zgid, sizeof(sgid))) {
- DP_ERR(dev, "gsi post send: GID not found GID index %d\n",
- grh->sgid_index);
- return -ENOENT;
- }
+ dev_put(sgid_attr.ndev);
has_udp = (sgid_attr.gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP);
if (!has_udp) {
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index f9c3cc7..7d3763b 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -84,58 +84,6 @@ int qedr_iw_query_gid(struct ib_device *ibdev, u8 port,
return 0;
}
-int qedr_query_gid(struct ib_device *ibdev, u8 port, int index,
- union ib_gid *sgid)
-{
- struct qedr_dev *dev = get_qedr_dev(ibdev);
- int rc = 0;
-
- if (!rdma_cap_roce_gid_table(ibdev, port))
- return -ENODEV;
-
- rc = ib_get_cached_gid(ibdev, port, index, sgid, NULL);
- if (rc == -EAGAIN) {
- memcpy(sgid, &zgid, sizeof(*sgid));
- return 0;
- }
-
- DP_DEBUG(dev, QEDR_MSG_INIT, "query gid: index=%d %llx:%llx\n", index,
- sgid->global.interface_id, sgid->global.subnet_prefix);
-
- return rc;
-}
-
-int qedr_add_gid(struct ib_device *device, u8 port_num,
- unsigned int index, const union ib_gid *gid,
- const struct ib_gid_attr *attr, void **context)
-{
- if (!rdma_cap_roce_gid_table(device, port_num))
- return -EINVAL;
-
- if (port_num > QEDR_MAX_PORT)
- return -EINVAL;
-
- if (!context)
- return -EINVAL;
-
- return 0;
-}
-
-int qedr_del_gid(struct ib_device *device, u8 port_num,
- unsigned int index, void **context)
-{
- if (!rdma_cap_roce_gid_table(device, port_num))
- return -EINVAL;
-
- if (port_num > QEDR_MAX_PORT)
- return -EINVAL;
-
- if (!context)
- return -EINVAL;
-
- return 0;
-}
-
int qedr_query_device(struct ib_device *ibdev,
struct ib_device_attr *attr, struct ib_udata *udata)
{
@@ -525,9 +473,9 @@ struct ib_pd *qedr_alloc_pd(struct ib_device *ibdev,
pd->pd_id = pd_id;
if (udata && context) {
- struct qedr_alloc_pd_uresp uresp;
-
- uresp.pd_id = pd_id;
+ struct qedr_alloc_pd_uresp uresp = {
+ .pd_id = pd_id,
+ };
rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
if (rc) {
@@ -856,8 +804,6 @@ static inline void qedr_init_cq_params(struct qedr_cq *cq,
static void doorbell_cq(struct qedr_cq *cq, u32 cons, u8 flags)
{
- /* Flush data before signalling doorbell */
- wmb();
cq->db.data.agg_flags = flags;
cq->db.data.value = cpu_to_le32(cons);
writeq(cq->db.raw, cq->db_addr);
@@ -1145,46 +1091,41 @@ static inline int get_gid_info_from_table(struct ib_qp *ibqp,
if (rc)
return rc;
- if (!memcmp(&gid, &zgid, sizeof(gid)))
- return -ENOENT;
-
- if (gid_attr.ndev) {
- qp_params->vlan_id = rdma_vlan_dev_vlan_id(gid_attr.ndev);
-
- dev_put(gid_attr.ndev);
- nw_type = ib_gid_to_network_type(gid_attr.gid_type, &gid);
- switch (nw_type) {
- case RDMA_NETWORK_IPV6:
- memcpy(&qp_params->sgid.bytes[0], &gid.raw[0],
- sizeof(qp_params->sgid));
- memcpy(&qp_params->dgid.bytes[0],
- &grh->dgid,
- sizeof(qp_params->dgid));
- qp_params->roce_mode = ROCE_V2_IPV6;
- SET_FIELD(qp_params->modify_flags,
- QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
- break;
- case RDMA_NETWORK_IB:
- memcpy(&qp_params->sgid.bytes[0], &gid.raw[0],
- sizeof(qp_params->sgid));
- memcpy(&qp_params->dgid.bytes[0],
- &grh->dgid,
- sizeof(qp_params->dgid));
- qp_params->roce_mode = ROCE_V1;
- break;
- case RDMA_NETWORK_IPV4:
- memset(&qp_params->sgid, 0, sizeof(qp_params->sgid));
- memset(&qp_params->dgid, 0, sizeof(qp_params->dgid));
- ipv4_addr = qedr_get_ipv4_from_gid(gid.raw);
- qp_params->sgid.ipv4_addr = ipv4_addr;
- ipv4_addr =
- qedr_get_ipv4_from_gid(grh->dgid.raw);
- qp_params->dgid.ipv4_addr = ipv4_addr;
- SET_FIELD(qp_params->modify_flags,
- QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
- qp_params->roce_mode = ROCE_V2_IPV4;
- break;
- }
+ qp_params->vlan_id = rdma_vlan_dev_vlan_id(gid_attr.ndev);
+
+ dev_put(gid_attr.ndev);
+ nw_type = ib_gid_to_network_type(gid_attr.gid_type, &gid);
+ switch (nw_type) {
+ case RDMA_NETWORK_IPV6:
+ memcpy(&qp_params->sgid.bytes[0], &gid.raw[0],
+ sizeof(qp_params->sgid));
+ memcpy(&qp_params->dgid.bytes[0],
+ &grh->dgid,
+ sizeof(qp_params->dgid));
+ qp_params->roce_mode = ROCE_V2_IPV6;
+ SET_FIELD(qp_params->modify_flags,
+ QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
+ break;
+ case RDMA_NETWORK_IB:
+ memcpy(&qp_params->sgid.bytes[0], &gid.raw[0],
+ sizeof(qp_params->sgid));
+ memcpy(&qp_params->dgid.bytes[0],
+ &grh->dgid,
+ sizeof(qp_params->dgid));
+ qp_params->roce_mode = ROCE_V1;
+ break;
+ case RDMA_NETWORK_IPV4:
+ memset(&qp_params->sgid, 0, sizeof(qp_params->sgid));
+ memset(&qp_params->dgid, 0, sizeof(qp_params->dgid));
+ ipv4_addr = qedr_get_ipv4_from_gid(gid.raw);
+ qp_params->sgid.ipv4_addr = ipv4_addr;
+ ipv4_addr =
+ qedr_get_ipv4_from_gid(grh->dgid.raw);
+ qp_params->dgid.ipv4_addr = ipv4_addr;
+ SET_FIELD(qp_params->modify_flags,
+ QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
+ qp_params->roce_mode = ROCE_V2_IPV4;
+ break;
}
for (i = 0; i < 4; i++) {
@@ -1870,7 +1811,6 @@ static int qedr_update_qp_state(struct qedr_dev *dev,
*/
if (rdma_protocol_roce(&dev->ibdev, 1)) {
- wmb();
writel(qp->rq.db_data.raw, qp->rq.db);
/* Make sure write takes effect */
mmiowb();
@@ -3274,8 +3214,15 @@ int qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
* vane. However this is not harmful (as long as the producer value is
* unchanged). For performance reasons we avoid checking for this
* redundant doorbell.
+ *
+ * qp->wqe_wr_id is accessed during qedr_poll_cq, as
+ * soon as we give the doorbell, we could get a completion
+ * for this wr, therefore we need to make sure that the
+ * memory is updated before giving the doorbell.
+ * During qedr_poll_cq, rmb is called before accessing the
+ * cqe. This covers for the smp_rmb as well.
*/
- wmb();
+ smp_wmb();
writel(qp->sq.db_data.raw, qp->sq.db);
/* Make sure write sticks */
@@ -3362,8 +3309,14 @@ int qedr_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
qedr_inc_sw_prod(&qp->rq);
- /* Flush all the writes before signalling doorbell */
- wmb();
+ /* qp->rqe_wr_id is accessed during qedr_poll_cq, as
+ * soon as we give the doorbell, we could get a completion
+ * for this wr, therefore we need to make sure that the
+ * memory is update before giving the doorbell.
+ * During qedr_poll_cq, rmb is called before accessing the
+ * cqe. This covers for the smp_rmb as well.
+ */
+ smp_wmb();
qp->rq.db_data.data.value++;
diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h
index 1a94425..2c57e4c 100644
--- a/drivers/infiniband/hw/qedr/verbs.h
+++ b/drivers/infiniband/hw/qedr/verbs.h
@@ -38,7 +38,6 @@ int qedr_query_port(struct ib_device *, u8 port, struct ib_port_attr *props);
int qedr_modify_port(struct ib_device *, u8 port, int mask,
struct ib_port_modify *props);
-int qedr_query_gid(struct ib_device *, u8 port, int index, union ib_gid *gid);
int qedr_iw_query_gid(struct ib_device *ibdev, u8 port,
int index, union ib_gid *gid);
@@ -48,11 +47,6 @@ struct ib_ucontext *qedr_alloc_ucontext(struct ib_device *, struct ib_udata *);
int qedr_dealloc_ucontext(struct ib_ucontext *);
int qedr_mmap(struct ib_ucontext *, struct vm_area_struct *vma);
-int qedr_del_gid(struct ib_device *device, u8 port_num,
- unsigned int index, void **context);
-int qedr_add_gid(struct ib_device *device, u8 port_num,
- unsigned int index, const union ib_gid *gid,
- const struct ib_gid_attr *attr, void **context);
struct ib_pd *qedr_alloc_pd(struct ib_device *,
struct ib_ucontext *, struct ib_udata *);
int qedr_dealloc_pd(struct ib_pd *pd);
diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h
index 0235f76b..4607245 100644
--- a/drivers/infiniband/hw/qib/qib.h
+++ b/drivers/infiniband/hw/qib/qib.h
@@ -472,9 +472,6 @@ enum qib_sdma_events {
qib_sdma_event_e90_timer_tick,
};
-extern char *qib_sdma_state_names[];
-extern char *qib_sdma_event_names[];
-
struct sdma_set_state_action {
unsigned op_enable:1;
unsigned op_intenable:1;
diff --git a/drivers/infiniband/hw/qib/qib_diag.c b/drivers/infiniband/hw/qib/qib_diag.c
index a9377ee..11da796 100644
--- a/drivers/infiniband/hw/qib/qib_diag.c
+++ b/drivers/infiniband/hw/qib/qib_diag.c
@@ -614,7 +614,7 @@ static ssize_t qib_diagpkt_write(struct file *fp,
}
if (copy_from_user(tmpbuf,
- (const void __user *) (unsigned long) dp.data,
+ u64_to_user_ptr(dp.data),
dp.len)) {
ret = -EFAULT;
goto bail;
diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c
index 52c29db3..6a8800b 100644
--- a/drivers/infiniband/hw/qib/qib_file_ops.c
+++ b/drivers/infiniband/hw/qib/qib_file_ops.c
@@ -443,7 +443,7 @@ cleanup:
ret = -EFAULT;
goto cleanup;
}
- if (copy_to_user((void __user *) (unsigned long) ti->tidmap,
+ if (copy_to_user(u64_to_user_ptr(ti->tidmap),
tidmap, sizeof(tidmap))) {
ret = -EFAULT;
goto cleanup;
@@ -490,7 +490,7 @@ static int qib_tid_free(struct qib_ctxtdata *rcd, unsigned subctxt,
goto done;
}
- if (copy_from_user(tidmap, (void __user *)(unsigned long)ti->tidmap,
+ if (copy_from_user(tidmap, u64_to_user_ptr(ti->tidmap),
sizeof(tidmap))) {
ret = -EFAULT;
goto done;
@@ -2168,8 +2168,8 @@ static ssize_t qib_write(struct file *fp, const char __user *data,
ret = qib_do_user_init(fp, &cmd.cmd.user_info);
if (ret)
goto bail;
- ret = qib_get_base_info(fp, (void __user *) (unsigned long)
- cmd.cmd.user_info.spu_base_info,
+ ret = qib_get_base_info(fp, u64_to_user_ptr(
+ cmd.cmd.user_info.spu_base_info),
cmd.cmd.user_info.spu_base_info_size);
break;
diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c
index 6265dac..8414ae4 100644
--- a/drivers/infiniband/hw/qib/qib_iba7322.c
+++ b/drivers/infiniband/hw/qib/qib_iba7322.c
@@ -463,6 +463,16 @@ static u8 ib_rate_to_delay[IB_RATE_120_GBPS + 1] = {
[IB_RATE_40_GBPS] = 1
};
+static const char * const qib_sdma_state_names[] = {
+ [qib_sdma_state_s00_hw_down] = "s00_HwDown",
+ [qib_sdma_state_s10_hw_start_up_wait] = "s10_HwStartUpWait",
+ [qib_sdma_state_s20_idle] = "s20_Idle",
+ [qib_sdma_state_s30_sw_clean_up_wait] = "s30_SwCleanUpWait",
+ [qib_sdma_state_s40_hw_clean_up_wait] = "s40_HwCleanUpWait",
+ [qib_sdma_state_s50_hw_halt_wait] = "s50_HwHaltWait",
+ [qib_sdma_state_s99_running] = "s99_Running",
+};
+
#define IBA7322_LINKSPEED_SHIFT SYM_LSB(IBCStatusA_0, LinkSpeedActive)
#define IBA7322_LINKWIDTH_SHIFT SYM_LSB(IBCStatusA_0, LinkWidthActive)
diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c
index 3990f38..6c68f8a 100644
--- a/drivers/infiniband/hw/qib/qib_init.c
+++ b/drivers/infiniband/hw/qib/qib_init.c
@@ -678,11 +678,9 @@ int qib_init(struct qib_devdata *dd, int reinit)
lastfail = qib_create_rcvhdrq(dd, rcd);
if (!lastfail)
lastfail = qib_setup_eagerbufs(rcd);
- if (lastfail) {
+ if (lastfail)
qib_dev_err(dd,
"failed to allocate kernel ctxt's rcvhdrq and/or egr bufs\n");
- continue;
- }
}
for (pidx = 0; pidx < dd->num_pports; ++pidx) {
diff --git a/drivers/infiniband/hw/qib/qib_sdma.c b/drivers/infiniband/hw/qib/qib_sdma.c
index c3690bd..d0723d4 100644
--- a/drivers/infiniband/hw/qib/qib_sdma.c
+++ b/drivers/infiniband/hw/qib/qib_sdma.c
@@ -54,30 +54,6 @@ MODULE_PARM_DESC(sdma_descq_cnt, "Number of SDMA descq entries");
#define SDMA_DESC_COUNT_LSB 16
#define SDMA_DESC_GEN_LSB 30
-char *qib_sdma_state_names[] = {
- [qib_sdma_state_s00_hw_down] = "s00_HwDown",
- [qib_sdma_state_s10_hw_start_up_wait] = "s10_HwStartUpWait",
- [qib_sdma_state_s20_idle] = "s20_Idle",
- [qib_sdma_state_s30_sw_clean_up_wait] = "s30_SwCleanUpWait",
- [qib_sdma_state_s40_hw_clean_up_wait] = "s40_HwCleanUpWait",
- [qib_sdma_state_s50_hw_halt_wait] = "s50_HwHaltWait",
- [qib_sdma_state_s99_running] = "s99_Running",
-};
-
-char *qib_sdma_event_names[] = {
- [qib_sdma_event_e00_go_hw_down] = "e00_GoHwDown",
- [qib_sdma_event_e10_go_hw_start] = "e10_GoHwStart",
- [qib_sdma_event_e20_hw_started] = "e20_HwStarted",
- [qib_sdma_event_e30_go_running] = "e30_GoRunning",
- [qib_sdma_event_e40_sw_cleaned] = "e40_SwCleaned",
- [qib_sdma_event_e50_hw_cleaned] = "e50_HwCleaned",
- [qib_sdma_event_e60_hw_halted] = "e60_HwHalted",
- [qib_sdma_event_e70_go_idle] = "e70_GoIdle",
- [qib_sdma_event_e7220_err_halted] = "e7220_ErrHalted",
- [qib_sdma_event_e7322_err_halted] = "e7322_ErrHalted",
- [qib_sdma_event_e90_timer_tick] = "e90_TimerTick",
-};
-
/* declare all statics here rather than keep sorting */
static int alloc_sdma(struct qib_pportdata *);
static void sdma_complete(struct kref *);
diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c
index fabee76..3977abb 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.c
+++ b/drivers/infiniband/hw/qib/qib_verbs.c
@@ -1646,7 +1646,7 @@ int qib_register_ib_device(struct qib_devdata *dd)
dd->rcd[ctxt]->pkeys);
}
- ret = rvt_register_device(&dd->verbs_dev.rdi);
+ ret = rvt_register_device(&dd->verbs_dev.rdi, RDMA_DRIVER_QIB);
if (ret)
goto err_tx;
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c
index ca56380..f0538a4 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_main.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c
@@ -415,6 +415,7 @@ static void *usnic_ib_device_add(struct pci_dev *dev)
us_ibdev->ib_dev.get_dev_fw_str = usnic_get_dev_fw_str;
+ us_ibdev->ib_dev.driver_id = RDMA_DRIVER_USNIC;
if (ib_register_device(&us_ibdev->ib_dev, NULL))
goto err_fwd_dealloc;
diff --git a/drivers/infiniband/hw/usnic/usnic_transport.c b/drivers/infiniband/hw/usnic/usnic_transport.c
index 67de943..e0a9553 100644
--- a/drivers/infiniband/hw/usnic/usnic_transport.c
+++ b/drivers/infiniband/hw/usnic/usnic_transport.c
@@ -200,10 +200,8 @@ int usnic_transport_sock_get_addr(struct socket *sock, int *proto,
int usnic_transport_init(void)
{
roce_bitmap = kzalloc(ROCE_BITMAP_SZ, GFP_KERNEL);
- if (!roce_bitmap) {
- usnic_err("Failed to allocate bit map");
+ if (!roce_bitmap)
return -ENOMEM;
- }
/* Do not ever allocate bit 0, hence set it here */
bitmap_set(roce_bitmap, 0, 1);
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
index d650a9f..0be33a8 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
@@ -62,17 +62,10 @@ static DEFINE_MUTEX(pvrdma_device_list_lock);
static LIST_HEAD(pvrdma_device_list);
static struct workqueue_struct *event_wq;
-static int pvrdma_add_gid(struct ib_device *ibdev,
- u8 port_num,
- unsigned int index,
- const union ib_gid *gid,
+static int pvrdma_add_gid(const union ib_gid *gid,
const struct ib_gid_attr *attr,
void **context);
-static int pvrdma_del_gid(struct ib_device *ibdev,
- u8 port_num,
- unsigned int index,
- void **context);
-
+static int pvrdma_del_gid(const struct ib_gid_attr *attr, void **context);
static ssize_t show_hca(struct device *device, struct device_attribute *attr,
char *buf)
@@ -276,6 +269,7 @@ static int pvrdma_register_device(struct pvrdma_dev *dev)
if (!dev->srq_tbl)
goto err_qp_free;
}
+ dev->ib_dev.driver_id = RDMA_DRIVER_VMW_PVRDMA;
spin_lock_init(&dev->srq_tbl_lock);
ret = ib_register_device(&dev->ib_dev, NULL);
@@ -656,18 +650,15 @@ static int pvrdma_add_gid_at_index(struct pvrdma_dev *dev,
return 0;
}
-static int pvrdma_add_gid(struct ib_device *ibdev,
- u8 port_num,
- unsigned int index,
- const union ib_gid *gid,
+static int pvrdma_add_gid(const union ib_gid *gid,
const struct ib_gid_attr *attr,
void **context)
{
- struct pvrdma_dev *dev = to_vdev(ibdev);
+ struct pvrdma_dev *dev = to_vdev(attr->device);
return pvrdma_add_gid_at_index(dev, gid,
ib_gid_type_to_pvrdma(attr->gid_type),
- index);
+ attr->index);
}
static int pvrdma_del_gid_at_index(struct pvrdma_dev *dev, int index)
@@ -697,17 +688,14 @@ static int pvrdma_del_gid_at_index(struct pvrdma_dev *dev, int index)
return 0;
}
-static int pvrdma_del_gid(struct ib_device *ibdev,
- u8 port_num,
- unsigned int index,
- void **context)
+static int pvrdma_del_gid(const struct ib_gid_attr *attr, void **context)
{
- struct pvrdma_dev *dev = to_vdev(ibdev);
+ struct pvrdma_dev *dev = to_vdev(attr->device);
dev_dbg(&dev->pdev->dev, "removing gid at index %u from %s",
- index, dev->netdev->name);
+ attr->index, dev->netdev->name);
- return pvrdma_del_gid_at_index(dev, index);
+ return pvrdma_del_gid_at_index(dev, attr->index);
}
static void pvrdma_netdevice_event_handle(struct pvrdma_dev *dev,
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
index 7bf518b..eb5b106 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
@@ -489,7 +489,7 @@ int pvrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
union pvrdma_cmd_req req;
union pvrdma_cmd_resp rsp;
struct pvrdma_cmd_modify_qp *cmd = &req.modify_qp;
- int cur_state, next_state;
+ enum ib_qp_state cur_state, next_state;
int ret;
/* Sanity checking. Should need lock here */
diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c
index a4553b2..434199d 100644
--- a/drivers/infiniband/sw/rdmavt/vt.c
+++ b/drivers/infiniband/sw/rdmavt/vt.c
@@ -91,7 +91,7 @@ module_exit(rvt_cleanup);
*/
struct rvt_dev_info *rvt_alloc_device(size_t size, int nports)
{
- struct rvt_dev_info *rdi = ERR_PTR(-ENOMEM);
+ struct rvt_dev_info *rdi;
rdi = (struct rvt_dev_info *)ib_alloc_device(size);
if (!rdi)
@@ -730,7 +730,7 @@ static noinline int check_support(struct rvt_dev_info *rdi, int verb)
*
* Return: 0 on success otherwise an errno.
*/
-int rvt_register_device(struct rvt_dev_info *rdi)
+int rvt_register_device(struct rvt_dev_info *rdi, u32 driver_id)
{
int ret = 0, i;
@@ -831,6 +831,7 @@ int rvt_register_device(struct rvt_dev_info *rdi)
rdi->ibdev.node_type = RDMA_NODE_IB_CA;
rdi->ibdev.num_comp_vectors = 1;
+ rdi->ibdev.driver_id = driver_id;
/* We are now good to announce we exist */
ret = ib_register_device(&rdi->ibdev, rdi->driver_f.port_callback);
if (ret) {
diff --git a/drivers/infiniband/sw/rdmavt/vt.h b/drivers/infiniband/sw/rdmavt/vt.h
index 8823b2e..0675ea6 100644
--- a/drivers/infiniband/sw/rdmavt/vt.h
+++ b/drivers/infiniband/sw/rdmavt/vt.h
@@ -59,7 +59,6 @@
#include "mmap.h"
#include "cq.h"
#include "mad.h"
-#include "mmap.h"
#define rvt_pr_info(rdi, fmt, ...) \
__rvt_pr_info(rdi->driver_f.get_pci_dev(rdi), \
diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c
index b7debb6f..e493fdb 100644
--- a/drivers/infiniband/sw/rxe/rxe.c
+++ b/drivers/infiniband/sw/rxe/rxe.c
@@ -78,7 +78,7 @@ void rxe_release(struct kref *kref)
}
/* initialize rxe device parameters */
-static int rxe_init_device_param(struct rxe_dev *rxe)
+static void rxe_init_device_param(struct rxe_dev *rxe)
{
rxe->max_inline_data = RXE_MAX_INLINE_DATA;
@@ -122,8 +122,6 @@ static int rxe_init_device_param(struct rxe_dev *rxe)
rxe->attr.local_ca_ack_delay = RXE_LOCAL_CA_ACK_DELAY;
rxe->max_ucontext = RXE_MAX_UCONTEXT;
-
- return 0;
}
/* initialize port attributes */
diff --git a/drivers/infiniband/sw/rxe/rxe.h b/drivers/infiniband/sw/rxe/rxe.h
index 7d23261..561ad30 100644
--- a/drivers/infiniband/sw/rxe/rxe.h
+++ b/drivers/infiniband/sw/rxe/rxe.h
@@ -59,7 +59,11 @@
#include "rxe_verbs.h"
#include "rxe_loc.h"
-#define RXE_UVERBS_ABI_VERSION (1)
+/*
+ * Version 1 and Version 2 are identical on 64 bit machines, but on 32 bit
+ * machines Version 2 has a different struct layout.
+ */
+#define RXE_UVERBS_ABI_VERSION 2
#define IB_PHYS_STATE_LINK_UP (5)
#define IB_PHYS_STATE_LINK_DOWN (3)
diff --git a/drivers/infiniband/sw/rxe/rxe_av.c b/drivers/infiniband/sw/rxe/rxe_av.c
index 7522d1a..7f1ae364 100644
--- a/drivers/infiniband/sw/rxe/rxe_av.c
+++ b/drivers/infiniband/sw/rxe/rxe_av.c
@@ -74,8 +74,9 @@ void rxe_av_fill_ip_info(struct rxe_av *av,
struct ib_gid_attr *sgid_attr,
union ib_gid *sgid)
{
- rdma_gid2ip(&av->sgid_addr._sockaddr, sgid);
- rdma_gid2ip(&av->dgid_addr._sockaddr, &rdma_ah_read_grh(attr)->dgid);
+ rdma_gid2ip((struct sockaddr *)&av->sgid_addr, sgid);
+ rdma_gid2ip((struct sockaddr *)&av->dgid_addr,
+ &rdma_ah_read_grh(attr)->dgid);
av->network_type = ib_gid_to_network_type(sgid_attr->gid_type, sgid);
}
diff --git a/drivers/infiniband/sw/rxe/rxe_cq.c b/drivers/infiniband/sw/rxe/rxe_cq.c
index c4aabf7..2ee4b08 100644
--- a/drivers/infiniband/sw/rxe/rxe_cq.c
+++ b/drivers/infiniband/sw/rxe/rxe_cq.c
@@ -36,7 +36,7 @@
#include "rxe_queue.h"
int rxe_cq_chk_attr(struct rxe_dev *rxe, struct rxe_cq *cq,
- int cqe, int comp_vector, struct ib_udata *udata)
+ int cqe, int comp_vector)
{
int count;
@@ -83,7 +83,7 @@ static void rxe_send_complete(unsigned long data)
int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe,
int comp_vector, struct ib_ucontext *context,
- struct ib_udata *udata)
+ struct rxe_create_cq_resp __user *uresp)
{
int err;
@@ -94,15 +94,15 @@ int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe,
return -ENOMEM;
}
- err = do_mmap_info(rxe, udata, false, context, cq->queue->buf,
- cq->queue->buf_size, &cq->queue->ip);
+ err = do_mmap_info(rxe, uresp ? &uresp->mi : NULL, context,
+ cq->queue->buf, cq->queue->buf_size, &cq->queue->ip);
if (err) {
kvfree(cq->queue->buf);
kfree(cq->queue);
return err;
}
- if (udata)
+ if (uresp)
cq->is_user = 1;
cq->is_dying = false;
@@ -114,14 +114,15 @@ int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe,
return 0;
}
-int rxe_cq_resize_queue(struct rxe_cq *cq, int cqe, struct ib_udata *udata)
+int rxe_cq_resize_queue(struct rxe_cq *cq, int cqe,
+ struct rxe_resize_cq_resp __user *uresp)
{
int err;
err = rxe_queue_resize(cq->queue, (unsigned int *)&cqe,
sizeof(struct rxe_cqe),
cq->queue->ip ? cq->queue->ip->context : NULL,
- udata, NULL, &cq->cq_lock);
+ uresp ? &uresp->mi : NULL, NULL, &cq->cq_lock);
if (!err)
cq->ibcq.cqe = cqe;
diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index 4ef75d5..b71023c 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -52,13 +52,14 @@ struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt);
/* rxe_cq.c */
int rxe_cq_chk_attr(struct rxe_dev *rxe, struct rxe_cq *cq,
- int cqe, int comp_vector, struct ib_udata *udata);
+ int cqe, int comp_vector);
int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe,
int comp_vector, struct ib_ucontext *context,
- struct ib_udata *udata);
+ struct rxe_create_cq_resp __user *uresp);
-int rxe_cq_resize_queue(struct rxe_cq *cq, int new_cqe, struct ib_udata *udata);
+int rxe_cq_resize_queue(struct rxe_cq *cq, int new_cqe,
+ struct rxe_resize_cq_resp __user *uresp);
int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited);
@@ -143,8 +144,7 @@ int advance_dma_data(struct rxe_dma_info *dma, unsigned int length);
/* rxe_net.c */
int rxe_loopback(struct sk_buff *skb);
-int rxe_send(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
- struct sk_buff *skb);
+int rxe_send(struct rxe_pkt_info *pkt, struct sk_buff *skb);
struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av,
int paylen, struct rxe_pkt_info *pkt);
int rxe_prepare(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
@@ -159,7 +159,8 @@ int rxe_mcast_delete(struct rxe_dev *rxe, union ib_gid *mgid);
int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init);
int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd,
- struct ib_qp_init_attr *init, struct ib_udata *udata,
+ struct ib_qp_init_attr *init,
+ struct rxe_create_qp_resp __user *uresp,
struct ib_pd *ibpd);
int rxe_qp_to_init(struct rxe_qp *qp, struct ib_qp_init_attr *init);
@@ -227,11 +228,12 @@ int rxe_srq_chk_attr(struct rxe_dev *rxe, struct rxe_srq *srq,
int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq,
struct ib_srq_init_attr *init,
- struct ib_ucontext *context, struct ib_udata *udata);
+ struct ib_ucontext *context,
+ struct rxe_create_srq_resp __user *uresp);
int rxe_srq_from_attr(struct rxe_dev *rxe, struct rxe_srq *srq,
struct ib_srq_attr *attr, enum ib_srq_attr_mask mask,
- struct ib_udata *udata);
+ struct rxe_modify_srq_cmd *ucmd);
void rxe_release(struct kref *kref);
@@ -268,7 +270,7 @@ static inline int rxe_xmit_packet(struct rxe_dev *rxe, struct rxe_qp *qp,
memcpy(SKB_TO_PKT(skb), pkt, sizeof(*pkt));
err = rxe_loopback(skb);
} else {
- err = rxe_send(rxe, pkt, skb);
+ err = rxe_send(pkt, skb);
}
if (err) {
diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index 159246b..9da6e37 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -182,11 +182,39 @@ static struct dst_entry *rxe_find_route6(struct net_device *ndev,
#endif
+/*
+ * Derive the net_device from the av.
+ * For physical devices, this will just return rxe->ndev.
+ * But for VLAN devices, it will return the vlan dev.
+ * Caller should dev_put() the returned net_device.
+ */
+static struct net_device *rxe_netdev_from_av(struct rxe_dev *rxe,
+ int port_num,
+ struct rxe_av *av)
+{
+ union ib_gid gid;
+ struct ib_gid_attr attr;
+ struct net_device *ndev = rxe->ndev;
+
+ if (ib_get_cached_gid(&rxe->ib_dev, port_num, av->grh.sgid_index,
+ &gid, &attr) == 0 &&
+ attr.ndev && attr.ndev != ndev)
+ ndev = attr.ndev;
+ else
+ /* Only to ensure that caller may call dev_put() */
+ dev_hold(ndev);
+
+ return ndev;
+}
+
static struct dst_entry *rxe_find_route(struct rxe_dev *rxe,
struct rxe_qp *qp,
struct rxe_av *av)
{
struct dst_entry *dst = NULL;
+ struct net_device *ndev;
+
+ ndev = rxe_netdev_from_av(rxe, qp->attr.port_num, av);
if (qp_type(qp) == IB_QPT_RC)
dst = sk_dst_get(qp->sk->sk);
@@ -201,14 +229,14 @@ static struct dst_entry *rxe_find_route(struct rxe_dev *rxe,
saddr = &av->sgid_addr._sockaddr_in.sin_addr;
daddr = &av->dgid_addr._sockaddr_in.sin_addr;
- dst = rxe_find_route4(rxe->ndev, saddr, daddr);
+ dst = rxe_find_route4(ndev, saddr, daddr);
} else if (av->network_type == RDMA_NETWORK_IPV6) {
struct in6_addr *saddr6;
struct in6_addr *daddr6;
saddr6 = &av->sgid_addr._sockaddr_in6.sin6_addr;
daddr6 = &av->dgid_addr._sockaddr_in6.sin6_addr;
- dst = rxe_find_route6(rxe->ndev, saddr6, daddr6);
+ dst = rxe_find_route6(ndev, saddr6, daddr6);
#if IS_ENABLED(CONFIG_IPV6)
if (dst)
qp->dst_cookie =
@@ -217,6 +245,7 @@ static struct dst_entry *rxe_find_route(struct rxe_dev *rxe,
}
}
+ dev_put(ndev);
return dst;
}
@@ -224,9 +253,14 @@ static int rxe_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
{
struct udphdr *udph;
struct net_device *ndev = skb->dev;
+ struct net_device *rdev = ndev;
struct rxe_dev *rxe = net_to_rxe(ndev);
struct rxe_pkt_info *pkt = SKB_TO_PKT(skb);
+ if (!rxe && is_vlan_dev(rdev)) {
+ rdev = vlan_dev_real_dev(ndev);
+ rxe = net_to_rxe(rdev);
+ }
if (!rxe)
goto drop;
@@ -450,7 +484,7 @@ static void rxe_skb_tx_dtor(struct sk_buff *skb)
rxe_drop_ref(qp);
}
-int rxe_send(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, struct sk_buff *skb)
+int rxe_send(struct rxe_pkt_info *pkt, struct sk_buff *skb)
{
struct rxe_av *av;
int err;
@@ -498,6 +532,10 @@ struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av,
{
unsigned int hdr_len;
struct sk_buff *skb;
+ struct net_device *ndev;
+ const int port_num = 1;
+
+ ndev = rxe_netdev_from_av(rxe, port_num, av);
if (av->network_type == RDMA_NETWORK_IPV4)
hdr_len = ETH_HLEN + sizeof(struct udphdr) +
@@ -506,26 +544,30 @@ struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av,
hdr_len = ETH_HLEN + sizeof(struct udphdr) +
sizeof(struct ipv6hdr);
- skb = alloc_skb(paylen + hdr_len + LL_RESERVED_SPACE(rxe->ndev),
+ skb = alloc_skb(paylen + hdr_len + LL_RESERVED_SPACE(ndev),
GFP_ATOMIC);
- if (unlikely(!skb))
+
+ if (unlikely(!skb)) {
+ dev_put(ndev);
return NULL;
+ }
skb_reserve(skb, hdr_len + LL_RESERVED_SPACE(rxe->ndev));
- skb->dev = rxe->ndev;
+ skb->dev = ndev;
if (av->network_type == RDMA_NETWORK_IPV4)
skb->protocol = htons(ETH_P_IP);
else
skb->protocol = htons(ETH_P_IPV6);
pkt->rxe = rxe;
- pkt->port_num = 1;
+ pkt->port_num = port_num;
pkt->hdr = skb_put(skb, paylen);
pkt->mask |= RXE_GRH_MASK;
memset(pkt->hdr, 0, paylen);
+ dev_put(ndev);
return skb;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c
index 2fcf1ca..b9f7aa1 100644
--- a/drivers/infiniband/sw/rxe/rxe_qp.c
+++ b/drivers/infiniband/sw/rxe/rxe_qp.c
@@ -40,15 +40,6 @@
#include "rxe_queue.h"
#include "rxe_task.h"
-char *rxe_qp_state_name[] = {
- [QP_STATE_RESET] = "RESET",
- [QP_STATE_INIT] = "INIT",
- [QP_STATE_READY] = "READY",
- [QP_STATE_DRAIN] = "DRAIN",
- [QP_STATE_DRAINED] = "DRAINED",
- [QP_STATE_ERROR] = "ERROR",
-};
-
static int rxe_qp_chk_cap(struct rxe_dev *rxe, struct ib_qp_cap *cap,
int has_srq)
{
@@ -225,7 +216,8 @@ static void rxe_qp_init_misc(struct rxe_dev *rxe, struct rxe_qp *qp,
static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp,
struct ib_qp_init_attr *init,
- struct ib_ucontext *context, struct ib_udata *udata)
+ struct ib_ucontext *context,
+ struct rxe_create_qp_resp __user *uresp)
{
int err;
int wqe_size;
@@ -250,9 +242,9 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp,
if (!qp->sq.queue)
return -ENOMEM;
- err = do_mmap_info(rxe, udata, true,
- context, qp->sq.queue->buf,
- qp->sq.queue->buf_size, &qp->sq.queue->ip);
+ err = do_mmap_info(rxe, uresp ? &uresp->sq_mi : NULL, context,
+ qp->sq.queue->buf, qp->sq.queue->buf_size,
+ &qp->sq.queue->ip);
if (err) {
kvfree(qp->sq.queue->buf);
@@ -283,7 +275,8 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp,
static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp,
struct ib_qp_init_attr *init,
- struct ib_ucontext *context, struct ib_udata *udata)
+ struct ib_ucontext *context,
+ struct rxe_create_qp_resp __user *uresp)
{
int err;
int wqe_size;
@@ -303,9 +296,8 @@ static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp,
if (!qp->rq.queue)
return -ENOMEM;
- err = do_mmap_info(rxe, udata, false, context,
- qp->rq.queue->buf,
- qp->rq.queue->buf_size,
+ err = do_mmap_info(rxe, uresp ? &uresp->rq_mi : NULL, context,
+ qp->rq.queue->buf, qp->rq.queue->buf_size,
&qp->rq.queue->ip);
if (err) {
kvfree(qp->rq.queue->buf);
@@ -331,14 +323,15 @@ static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp,
/* called by the create qp verb */
int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd,
- struct ib_qp_init_attr *init, struct ib_udata *udata,
+ struct ib_qp_init_attr *init,
+ struct rxe_create_qp_resp __user *uresp,
struct ib_pd *ibpd)
{
int err;
struct rxe_cq *rcq = to_rcq(init->recv_cq);
struct rxe_cq *scq = to_rcq(init->send_cq);
struct rxe_srq *srq = init->srq ? to_rsrq(init->srq) : NULL;
- struct ib_ucontext *context = udata ? ibpd->uobject->context : NULL;
+ struct ib_ucontext *context = ibpd->uobject ? ibpd->uobject->context : NULL;
rxe_add_ref(pd);
rxe_add_ref(rcq);
@@ -353,11 +346,11 @@ int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd,
rxe_qp_init_misc(rxe, qp, init);
- err = rxe_qp_init_req(rxe, qp, init, context, udata);
+ err = rxe_qp_init_req(rxe, qp, init, context, uresp);
if (err)
goto err1;
- err = rxe_qp_init_resp(rxe, qp, init, context, udata);
+ err = rxe_qp_init_resp(rxe, qp, init, context, uresp);
if (err)
goto err2;
diff --git a/drivers/infiniband/sw/rxe/rxe_queue.c b/drivers/infiniband/sw/rxe/rxe_queue.c
index d14bf49..f84ab44 100644
--- a/drivers/infiniband/sw/rxe/rxe_queue.c
+++ b/drivers/infiniband/sw/rxe/rxe_queue.c
@@ -37,35 +37,21 @@
#include "rxe_queue.h"
int do_mmap_info(struct rxe_dev *rxe,
- struct ib_udata *udata,
- bool is_req,
+ struct mminfo __user *outbuf,
struct ib_ucontext *context,
struct rxe_queue_buf *buf,
size_t buf_size,
struct rxe_mmap_info **ip_p)
{
int err;
- u32 len, offset;
struct rxe_mmap_info *ip = NULL;
- if (udata) {
- if (is_req) {
- len = udata->outlen - sizeof(struct mminfo);
- offset = sizeof(struct mminfo);
- } else {
- len = udata->outlen;
- offset = 0;
- }
-
- if (len < sizeof(ip->info))
- goto err1;
-
+ if (outbuf) {
ip = rxe_create_mmap_info(rxe, buf_size, context, buf);
if (!ip)
goto err1;
- err = copy_to_user(udata->outbuf + offset, &ip->info,
- sizeof(ip->info));
+ err = copy_to_user(outbuf, &ip->info, sizeof(ip->info));
if (err)
goto err2;
@@ -171,7 +157,7 @@ int rxe_queue_resize(struct rxe_queue *q,
unsigned int *num_elem_p,
unsigned int elem_size,
struct ib_ucontext *context,
- struct ib_udata *udata,
+ struct mminfo __user *outbuf,
spinlock_t *producer_lock,
spinlock_t *consumer_lock)
{
@@ -184,7 +170,7 @@ int rxe_queue_resize(struct rxe_queue *q,
if (!new_q)
return -ENOMEM;
- err = do_mmap_info(new_q->rxe, udata, false, context, new_q->buf,
+ err = do_mmap_info(new_q->rxe, outbuf, context, new_q->buf,
new_q->buf_size, &new_q->ip);
if (err) {
vfree(new_q->buf);
diff --git a/drivers/infiniband/sw/rxe/rxe_queue.h b/drivers/infiniband/sw/rxe/rxe_queue.h
index 8c8641c..79ba4b32 100644
--- a/drivers/infiniband/sw/rxe/rxe_queue.h
+++ b/drivers/infiniband/sw/rxe/rxe_queue.h
@@ -77,8 +77,7 @@ struct rxe_queue {
};
int do_mmap_info(struct rxe_dev *rxe,
- struct ib_udata *udata,
- bool is_req,
+ struct mminfo __user *outbuf,
struct ib_ucontext *context,
struct rxe_queue_buf *buf,
size_t buf_size,
@@ -94,7 +93,7 @@ int rxe_queue_resize(struct rxe_queue *q,
unsigned int *num_elem_p,
unsigned int elem_size,
struct ib_ucontext *context,
- struct ib_udata *udata,
+ struct mminfo __user *outbuf,
/* Protect producers while resizing queue */
spinlock_t *producer_lock,
/* Protect consumers while resizing queue */
diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c
index 4c3f899..dd80c7d 100644
--- a/drivers/infiniband/sw/rxe/rxe_recv.c
+++ b/drivers/infiniband/sw/rxe/rxe_recv.c
@@ -276,7 +276,6 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb)
{
struct rxe_pkt_info *pkt = SKB_TO_PKT(skb);
struct rxe_mc_grp *mcg;
- struct sk_buff *skb_copy;
struct rxe_mc_elem *mce;
struct rxe_qp *qp;
union ib_gid dgid;
@@ -309,18 +308,14 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb)
continue;
/* if *not* the last qp in the list
- * make a copy of the skb to post to the next qp
+ * increase the users of the skb then post to the next qp
*/
- skb_copy = (mce->qp_list.next != &mcg->qp_list) ?
- skb_clone(skb, GFP_ATOMIC) : NULL;
+ if (mce->qp_list.next != &mcg->qp_list)
+ refcount_inc(&skb->users);
pkt->qp = qp;
rxe_add_ref(qp);
rxe_rcv_pkt(rxe, pkt, skb);
-
- skb = skb_copy;
- if (!skb)
- break;
}
spin_unlock_bh(&mcg->mcg_lock);
@@ -328,8 +323,7 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb)
rxe_drop_ref(mcg); /* drop ref from rxe_pool_get_key. */
err1:
- if (skb)
- kfree_skb(skb);
+ kfree_skb(skb);
}
static int rxe_match_dgid(struct rxe_dev *rxe, struct sk_buff *skb)
@@ -347,7 +341,7 @@ static int rxe_match_dgid(struct rxe_dev *rxe, struct sk_buff *skb)
return ib_find_cached_gid_by_port(&rxe->ib_dev, pdgid,
IB_GID_TYPE_ROCE_UDP_ENCAP,
- 1, rxe->ndev, NULL);
+ 1, skb->dev, NULL);
}
/* rxe_rcv is called from the interface driver */
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index d37bb9b..a65c996 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -969,7 +969,6 @@ static int send_atomic_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
int rc = 0;
struct rxe_pkt_info ack_pkt;
struct sk_buff *skb;
- struct sk_buff *skb_copy;
struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
struct resp_res *res;
@@ -981,14 +980,7 @@ static int send_atomic_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
goto out;
}
- skb_copy = skb_clone(skb, GFP_ATOMIC);
- if (skb_copy)
- rxe_add_ref(qp); /* for the new SKB */
- else {
- pr_warn("Could not clone atomic response\n");
- rc = -ENOMEM;
- goto out;
- }
+ rxe_add_ref(qp);
res = &qp->resp.resources[qp->resp.res_head];
free_rd_atomic_resource(qp, res);
@@ -998,19 +990,18 @@ static int send_atomic_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
memset((unsigned char *)SKB_TO_PKT(skb) + sizeof(ack_pkt), 0,
sizeof(skb->cb) - sizeof(ack_pkt));
+ refcount_inc(&skb->users);
res->type = RXE_ATOMIC_MASK;
res->atomic.skb = skb;
res->first_psn = ack_pkt.psn;
res->last_psn = ack_pkt.psn;
res->cur_psn = ack_pkt.psn;
- rc = rxe_xmit_packet(rxe, qp, &ack_pkt, skb_copy);
+ rc = rxe_xmit_packet(rxe, qp, &ack_pkt, skb);
if (rc) {
pr_err_ratelimited("Failed sending ack\n");
rxe_drop_ref(qp);
- kfree_skb(skb_copy);
}
-
out:
return rc;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_srq.c b/drivers/infiniband/sw/rxe/rxe_srq.c
index efc832a..0d6c04b 100644
--- a/drivers/infiniband/sw/rxe/rxe_srq.c
+++ b/drivers/infiniband/sw/rxe/rxe_srq.c
@@ -99,7 +99,8 @@ err1:
int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq,
struct ib_srq_init_attr *init,
- struct ib_ucontext *context, struct ib_udata *udata)
+ struct ib_ucontext *context,
+ struct rxe_create_srq_resp __user *uresp)
{
int err;
int srq_wqe_size;
@@ -126,55 +127,41 @@ int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq,
srq->rq.queue = q;
- err = do_mmap_info(rxe, udata, false, context, q->buf,
+ err = do_mmap_info(rxe, uresp ? &uresp->mi : NULL, context, q->buf,
q->buf_size, &q->ip);
if (err)
return err;
- if (udata && udata->outlen >= sizeof(struct mminfo) + sizeof(u32)) {
- if (copy_to_user(udata->outbuf + sizeof(struct mminfo),
- &srq->srq_num, sizeof(u32)))
+ if (uresp) {
+ if (copy_to_user(&uresp->srq_num, &srq->srq_num,
+ sizeof(uresp->srq_num)))
return -EFAULT;
}
+
return 0;
}
int rxe_srq_from_attr(struct rxe_dev *rxe, struct rxe_srq *srq,
struct ib_srq_attr *attr, enum ib_srq_attr_mask mask,
- struct ib_udata *udata)
+ struct rxe_modify_srq_cmd *ucmd)
{
int err;
struct rxe_queue *q = srq->rq.queue;
- struct mminfo mi = { .offset = 1, .size = 0};
+ struct mminfo __user *mi = NULL;
if (mask & IB_SRQ_MAX_WR) {
- /* Check that we can write the mminfo struct to user space */
- if (udata && udata->inlen >= sizeof(__u64)) {
- __u64 mi_addr;
-
- /* Get address of user space mminfo struct */
- err = ib_copy_from_udata(&mi_addr, udata,
- sizeof(mi_addr));
- if (err)
- goto err1;
-
- udata->outbuf = (void __user *)(unsigned long)mi_addr;
- udata->outlen = sizeof(mi);
-
- if (!access_ok(VERIFY_WRITE,
- (void __user *)udata->outbuf,
- udata->outlen)) {
- err = -EFAULT;
- goto err1;
- }
- }
+ /*
+ * This is completely screwed up, the response is supposed to
+ * be in the outbuf not like this.
+ */
+ mi = u64_to_user_ptr(ucmd->mmap_info_addr);
err = rxe_queue_resize(q, &attr->max_wr,
rcv_wqe_size(srq->rq.max_sge),
srq->rq.queue->ip ?
srq->rq.queue->ip->context :
NULL,
- udata, &srq->rq.producer_lock,
+ mi, &srq->rq.producer_lock,
&srq->rq.consumer_lock);
if (err)
goto err2;
@@ -188,6 +175,5 @@ int rxe_srq_from_attr(struct rxe_dev *rxe, struct rxe_srq *srq,
err2:
rxe_queue_cleanup(q);
srq->rq.queue = NULL;
-err1:
return err;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index f4bab2c..2cb52fd 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -77,40 +77,6 @@ out:
return rc;
}
-static int rxe_query_gid(struct ib_device *device,
- u8 port_num, int index, union ib_gid *gid)
-{
- int ret;
-
- if (index > RXE_PORT_GID_TBL_LEN)
- return -EINVAL;
-
- ret = ib_get_cached_gid(device, port_num, index, gid, NULL);
- if (ret == -EAGAIN) {
- memcpy(gid, &zgid, sizeof(*gid));
- return 0;
- }
-
- return ret;
-}
-
-static int rxe_add_gid(struct ib_device *device, u8 port_num, unsigned int
- index, const union ib_gid *gid,
- const struct ib_gid_attr *attr, void **context)
-{
- if (index >= RXE_PORT_GID_TBL_LEN)
- return -EINVAL;
- return 0;
-}
-
-static int rxe_del_gid(struct ib_device *device, u8 port_num, unsigned int
- index, void **context)
-{
- if (index >= RXE_PORT_GID_TBL_LEN)
- return -EINVAL;
- return 0;
-}
-
static struct net_device *rxe_get_netdev(struct ib_device *device,
u8 port_num)
{
@@ -273,9 +239,7 @@ static int rxe_init_av(struct rxe_dev *rxe, struct rdma_ah_attr *attr,
rxe_av_from_attr(rdma_ah_get_port_num(attr), av, attr);
rxe_av_fill_ip_info(av, attr, &sgid_attr, &sgid);
-
- if (sgid_attr.ndev)
- dev_put(sgid_attr.ndev);
+ dev_put(sgid_attr.ndev);
return 0;
}
@@ -407,6 +371,13 @@ static struct ib_srq *rxe_create_srq(struct ib_pd *ibpd,
struct rxe_pd *pd = to_rpd(ibpd);
struct rxe_srq *srq;
struct ib_ucontext *context = udata ? ibpd->uobject->context : NULL;
+ struct rxe_create_srq_resp __user *uresp = NULL;
+
+ if (udata) {
+ if (udata->outlen < sizeof(*uresp))
+ return ERR_PTR(-EINVAL);
+ uresp = udata->outbuf;
+ }
err = rxe_srq_chk_attr(rxe, NULL, &init->attr, IB_SRQ_INIT_MASK);
if (err)
@@ -422,7 +393,7 @@ static struct ib_srq *rxe_create_srq(struct ib_pd *ibpd,
rxe_add_ref(pd);
srq->pd = pd;
- err = rxe_srq_from_init(rxe, srq, init, context, udata);
+ err = rxe_srq_from_init(rxe, srq, init, context, uresp);
if (err)
goto err2;
@@ -443,12 +414,22 @@ static int rxe_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
int err;
struct rxe_srq *srq = to_rsrq(ibsrq);
struct rxe_dev *rxe = to_rdev(ibsrq->device);
+ struct rxe_modify_srq_cmd ucmd = {};
+
+ if (udata) {
+ if (udata->inlen < sizeof(ucmd))
+ return -EINVAL;
+
+ err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd));
+ if (err)
+ return err;
+ }
err = rxe_srq_chk_attr(rxe, srq, attr, mask);
if (err)
goto err1;
- err = rxe_srq_from_attr(rxe, srq, attr, mask, udata);
+ err = rxe_srq_from_attr(rxe, srq, attr, mask, &ucmd);
if (err)
goto err1;
@@ -517,6 +498,13 @@ static struct ib_qp *rxe_create_qp(struct ib_pd *ibpd,
struct rxe_dev *rxe = to_rdev(ibpd->device);
struct rxe_pd *pd = to_rpd(ibpd);
struct rxe_qp *qp;
+ struct rxe_create_qp_resp __user *uresp = NULL;
+
+ if (udata) {
+ if (udata->outlen < sizeof(*uresp))
+ return ERR_PTR(-EINVAL);
+ uresp = udata->outbuf;
+ }
err = rxe_qp_chk_init(rxe, init);
if (err)
@@ -538,7 +526,7 @@ static struct ib_qp *rxe_create_qp(struct ib_pd *ibpd,
rxe_add_index(qp);
- err = rxe_qp_from_init(rxe, qp, pd, init, udata, ibpd);
+ err = rxe_qp_from_init(rxe, qp, pd, init, uresp, ibpd);
if (err)
goto err3;
@@ -711,9 +699,8 @@ static int init_send_wqe(struct rxe_qp *qp, struct ib_send_wr *ibwr,
memcpy(wqe->dma.sge, ibwr->sg_list,
num_sge * sizeof(struct ib_sge));
- wqe->iova = (mask & WR_ATOMIC_MASK) ?
- atomic_wr(ibwr)->remote_addr :
- rdma_wr(ibwr)->remote_addr;
+ wqe->iova = mask & WR_ATOMIC_MASK ? atomic_wr(ibwr)->remote_addr :
+ mask & WR_READ_OR_WRITE_MASK ? rdma_wr(ibwr)->remote_addr : 0;
wqe->mask = mask;
wqe->dma.length = length;
wqe->dma.resid = length;
@@ -889,11 +876,18 @@ static struct ib_cq *rxe_create_cq(struct ib_device *dev,
int err;
struct rxe_dev *rxe = to_rdev(dev);
struct rxe_cq *cq;
+ struct rxe_create_cq_resp __user *uresp = NULL;
+
+ if (udata) {
+ if (udata->outlen < sizeof(*uresp))
+ return ERR_PTR(-EINVAL);
+ uresp = udata->outbuf;
+ }
if (attr->flags)
return ERR_PTR(-EINVAL);
- err = rxe_cq_chk_attr(rxe, NULL, attr->cqe, attr->comp_vector, udata);
+ err = rxe_cq_chk_attr(rxe, NULL, attr->cqe, attr->comp_vector);
if (err)
goto err1;
@@ -904,7 +898,7 @@ static struct ib_cq *rxe_create_cq(struct ib_device *dev,
}
err = rxe_cq_from_init(rxe, cq, attr->cqe, attr->comp_vector,
- context, udata);
+ context, uresp);
if (err)
goto err2;
@@ -931,12 +925,19 @@ static int rxe_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
int err;
struct rxe_cq *cq = to_rcq(ibcq);
struct rxe_dev *rxe = to_rdev(ibcq->device);
+ struct rxe_resize_cq_resp __user *uresp = NULL;
+
+ if (udata) {
+ if (udata->outlen < sizeof(*uresp))
+ return -EINVAL;
+ uresp = udata->outbuf;
+ }
- err = rxe_cq_chk_attr(rxe, cq, cqe, 0, udata);
+ err = rxe_cq_chk_attr(rxe, cq, cqe, 0);
if (err)
goto err1;
- err = rxe_cq_resize_queue(cq, cqe, udata);
+ err = rxe_cq_resize_queue(cq, cqe, uresp);
if (err)
goto err1;
@@ -1207,7 +1208,7 @@ int rxe_register_device(struct rxe_dev *rxe)
rxe->ndev->dev_addr);
dev->dev.dma_ops = &dma_virt_ops;
dma_coerce_mask_and_coherent(&dev->dev,
- dma_get_required_mask(dev->dev.parent));
+ dma_get_required_mask(&dev->dev));
dev->uverbs_abi_ver = RXE_UVERBS_ABI_VERSION;
dev->uverbs_cmd_mask = BIT_ULL(IB_USER_VERBS_CMD_GET_CONTEXT)
@@ -1248,10 +1249,7 @@ int rxe_register_device(struct rxe_dev *rxe)
dev->query_port = rxe_query_port;
dev->modify_port = rxe_modify_port;
dev->get_link_layer = rxe_get_link_layer;
- dev->query_gid = rxe_query_gid;
dev->get_netdev = rxe_get_netdev;
- dev->add_gid = rxe_add_gid;
- dev->del_gid = rxe_del_gid;
dev->query_pkey = rxe_query_pkey;
dev->alloc_ucontext = rxe_alloc_ucontext;
dev->dealloc_ucontext = rxe_dealloc_ucontext;
@@ -1298,6 +1296,7 @@ int rxe_register_device(struct rxe_dev *rxe)
}
rxe->tfm = tfm;
+ dev->driver_id = RDMA_DRIVER_RXE;
err = ib_register_device(dev, NULL);
if (err) {
pr_warn("%s failed with error %d\n", __func__, err);
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
index 1019f5e..af1470d 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.h
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
@@ -139,8 +139,6 @@ enum rxe_qp_state {
QP_STATE_ERROR
};
-extern char *rxe_qp_state_name[];
-
struct rxe_req_info {
enum rxe_qp_state state;
int wqe_index;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index 8033a00..308e0ce 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -193,11 +193,6 @@ struct ipoib_tx_buf {
u64 mapping[MAX_SKB_FRAGS + 1];
};
-struct ipoib_cm_tx_buf {
- struct sk_buff *skb;
- u64 mapping;
-};
-
struct ib_cm_id;
struct ipoib_cm_data {
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 10384ea..f47f9ac 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -1085,7 +1085,7 @@ static bool ipoib_dev_addr_changed_valid(struct ipoib_dev_priv *priv)
netif_addr_unlock_bh(priv->dev);
- err = ib_find_gid(priv->ca, &search_gid, priv->dev, &port, &index);
+ err = ib_find_gid(priv->ca, &search_gid, &port, &index);
netif_addr_lock_bh(priv->dev);
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index b488438..c35d2cd 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -327,29 +327,10 @@ static int srp_new_ib_cm_id(struct srp_rdma_ch *ch)
return 0;
}
-static const char *inet_ntop(const void *sa, char *dst, unsigned int size)
-{
- switch (((struct sockaddr *)sa)->sa_family) {
- case AF_INET:
- snprintf(dst, size, "%pI4",
- &((struct sockaddr_in *)sa)->sin_addr);
- break;
- case AF_INET6:
- snprintf(dst, size, "%pI6",
- &((struct sockaddr_in6 *)sa)->sin6_addr);
- break;
- default:
- snprintf(dst, size, "???");
- break;
- }
- return dst;
-}
-
static int srp_new_rdma_cm_id(struct srp_rdma_ch *ch)
{
struct srp_target_port *target = ch->target;
struct rdma_cm_id *new_cm_id;
- char src_addr[64], dst_addr[64];
int ret;
new_cm_id = rdma_create_id(target->net, srp_rdma_cm_handler, ch,
@@ -366,13 +347,8 @@ static int srp_new_rdma_cm_id(struct srp_rdma_ch *ch)
(struct sockaddr *)&target->rdma_cm.dst,
SRP_PATH_REC_TIMEOUT_MS);
if (ret) {
- pr_err("No route available from %s to %s (%d)\n",
- target->rdma_cm.src_specified ?
- inet_ntop(&target->rdma_cm.src, src_addr,
- sizeof(src_addr)) : "(any)",
- inet_ntop(&target->rdma_cm.dst, dst_addr,
- sizeof(dst_addr)),
- ret);
+ pr_err("No route available from %pIS to %pIS (%d)\n",
+ &target->rdma_cm.src, &target->rdma_cm.dst, ret);
goto out;
}
ret = wait_for_completion_interruptible(&ch->done);
@@ -381,10 +357,8 @@ static int srp_new_rdma_cm_id(struct srp_rdma_ch *ch)
ret = ch->status;
if (ret) {
- pr_err("Resolving address %s failed (%d)\n",
- inet_ntop(&target->rdma_cm.dst, dst_addr,
- sizeof(dst_addr)),
- ret);
+ pr_err("Resolving address %pIS failed (%d)\n",
+ &target->rdma_cm.dst, ret);
goto out;
}
@@ -457,6 +431,7 @@ static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device,
struct srp_fr_desc *d;
struct ib_mr *mr;
int i, ret = -EINVAL;
+ enum ib_mr_type mr_type;
if (pool_size <= 0)
goto err;
@@ -470,9 +445,13 @@ static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device,
spin_lock_init(&pool->lock);
INIT_LIST_HEAD(&pool->free_list);
+ if (device->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG)
+ mr_type = IB_MR_TYPE_SG_GAPS;
+ else
+ mr_type = IB_MR_TYPE_MEM_REG;
+
for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
- mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG,
- max_page_list_len);
+ mr = ib_alloc_mr(pd, mr_type, max_page_list_len);
if (IS_ERR(mr)) {
ret = PTR_ERR(mr);
if (ret == -ENOMEM)
@@ -765,19 +744,12 @@ static void srp_path_rec_completion(int status,
static int srp_ib_lookup_path(struct srp_rdma_ch *ch)
{
struct srp_target_port *target = ch->target;
- int ret = -ENODEV;
+ int ret;
ch->ib_cm.path.numb_path = 1;
init_completion(&ch->done);
- /*
- * Avoid that the SCSI host can be removed by srp_remove_target()
- * before srp_path_rec_completion() is called.
- */
- if (!scsi_host_get(target->scsi_host))
- goto out;
-
ch->ib_cm.path_query_id = ib_sa_path_rec_get(&srp_sa_client,
target->srp_host->srp_dev->dev,
target->srp_host->port,
@@ -791,27 +763,21 @@ static int srp_ib_lookup_path(struct srp_rdma_ch *ch)
GFP_KERNEL,
srp_path_rec_completion,
ch, &ch->ib_cm.path_query);
- ret = ch->ib_cm.path_query_id;
- if (ret < 0)
- goto put;
+ if (ch->ib_cm.path_query_id < 0)
+ return ch->ib_cm.path_query_id;
ret = wait_for_completion_interruptible(&ch->done);
if (ret < 0)
- goto put;
+ return ret;
- ret = ch->status;
- if (ret < 0)
+ if (ch->status < 0)
shost_printk(KERN_WARNING, target->scsi_host,
PFX "Path record query failed: sgid %pI6, dgid %pI6, pkey %#04x, service_id %#16llx\n",
ch->ib_cm.path.sgid.raw, ch->ib_cm.path.dgid.raw,
be16_to_cpu(target->ib_cm.pkey),
be64_to_cpu(target->ib_cm.service_id));
-put:
- scsi_host_put(target->scsi_host);
-
-out:
- return ret;
+ return ch->status;
}
static int srp_rdma_lookup_path(struct srp_rdma_ch *ch)
@@ -2974,9 +2940,11 @@ static int srp_abort(struct scsi_cmnd *scmnd)
ret = FAST_IO_FAIL;
else
ret = FAILED;
- srp_free_req(ch, req, scmnd, 0);
- scmnd->result = DID_ABORT << 16;
- scmnd->scsi_done(scmnd);
+ if (ret == SUCCESS) {
+ srp_free_req(ch, req, scmnd, 0);
+ scmnd->result = DID_ABORT << 16;
+ scmnd->scsi_done(scmnd);
+ }
return ret;
}
@@ -3033,8 +3001,9 @@ static int srp_slave_alloc(struct scsi_device *sdev)
struct Scsi_Host *shost = sdev->host;
struct srp_target_port *target = host_to_target(shost);
struct srp_device *srp_dev = target->srp_host->srp_dev;
+ struct ib_device *ibdev = srp_dev->dev;
- if (true)
+ if (!(ibdev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG))
blk_queue_virt_boundary(sdev->request_queue,
~srp_dev->mr_page_mask);
@@ -3365,9 +3334,6 @@ static bool srp_conn_unique(struct srp_host *host,
if (t != target &&
target->id_ext == t->id_ext &&
target->ioc_guid == t->ioc_guid &&
- (!target->using_rdma_cm ||
- memcmp(&target->rdma_cm.dst, &t->rdma_cm.dst,
- sizeof(target->rdma_cm.dst)) == 0) &&
target->initiator_ext == t->initiator_ext) {
ret = false;
break;
@@ -3445,18 +3411,37 @@ static const match_table_t srp_opt_tokens = {
{ SRP_OPT_ERR, NULL }
};
+/**
+ * srp_parse_in - parse an IP address and port number combination
+ *
+ * Parse the following address formats:
+ * - IPv4: <ip_address>:<port>, e.g. 1.2.3.4:5.
+ * - IPv6: \[<ipv6_address>\]:<port>, e.g. [1::2:3%4]:5.
+ */
static int srp_parse_in(struct net *net, struct sockaddr_storage *sa,
const char *addr_port_str)
{
- char *addr = kstrdup(addr_port_str, GFP_KERNEL);
- char *port_str = addr;
+ char *addr_end, *addr = kstrdup(addr_port_str, GFP_KERNEL);
+ char *port_str;
int ret;
if (!addr)
return -ENOMEM;
- strsep(&port_str, ":");
- ret = inet_pton_with_scope(net, AF_UNSPEC, addr, port_str, sa);
+ port_str = strrchr(addr, ':');
+ if (!port_str)
+ return -EINVAL;
+ *port_str++ = '\0';
+ ret = inet_pton_with_scope(net, AF_INET, addr, port_str, sa);
+ if (ret && addr[0]) {
+ addr_end = addr + strlen(addr) - 1;
+ if (addr[0] == '[' && *addr_end == ']') {
+ *addr_end = '\0';
+ ret = inet_pton_with_scope(net, AF_INET6, addr + 1,
+ port_str, sa);
+ }
+ }
kfree(addr);
+ pr_debug("%s -> %pISpfsc\n", addr_port_str, sa);
return ret;
}
@@ -3789,14 +3774,11 @@ static ssize_t srp_create_target(struct device *dev,
if (!srp_conn_unique(target->srp_host, target)) {
if (target->using_rdma_cm) {
- char dst_addr[64];
-
shost_printk(KERN_INFO, target->scsi_host,
- PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;dest=%s\n",
+ PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;dest=%pIS\n",
be64_to_cpu(target->id_ext),
be64_to_cpu(target->ioc_guid),
- inet_ntop(&target->rdma_cm.dst, dst_addr,
- sizeof(dst_addr)));
+ &target->rdma_cm.dst);
} else {
shost_printk(KERN_INFO, target->scsi_host,
PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;initiator_ext=%016llx\n",
@@ -3815,26 +3797,36 @@ static ssize_t srp_create_target(struct device *dev,
}
if (srp_dev->use_fast_reg || srp_dev->use_fmr) {
- /*
- * FR and FMR can only map one HCA page per entry. If the
- * start address is not aligned on a HCA page boundary two
- * entries will be used for the head and the tail although
- * these two entries combined contain at most one HCA page of
- * data. Hence the "+ 1" in the calculation below.
- *
- * The indirect data buffer descriptor is contiguous so the
- * memory for that buffer will only be registered if
- * register_always is true. Hence add one to mr_per_cmd if
- * register_always has been set.
- */
+ bool gaps_reg = (ibdev->attrs.device_cap_flags &
+ IB_DEVICE_SG_GAPS_REG);
+
max_sectors_per_mr = srp_dev->max_pages_per_mr <<
(ilog2(srp_dev->mr_page_size) - 9);
- mr_per_cmd = register_always +
- (target->scsi_host->max_sectors + 1 +
- max_sectors_per_mr - 1) / max_sectors_per_mr;
+ if (!gaps_reg) {
+ /*
+ * FR and FMR can only map one HCA page per entry. If
+ * the start address is not aligned on a HCA page
+ * boundary two entries will be used for the head and
+ * the tail although these two entries combined
+ * contain at most one HCA page of data. Hence the "+
+ * 1" in the calculation below.
+ *
+ * The indirect data buffer descriptor is contiguous
+ * so the memory for that buffer will only be
+ * registered if register_always is true. Hence add
+ * one to mr_per_cmd if register_always has been set.
+ */
+ mr_per_cmd = register_always +
+ (target->scsi_host->max_sectors + 1 +
+ max_sectors_per_mr - 1) / max_sectors_per_mr;
+ } else {
+ mr_per_cmd = register_always +
+ (target->sg_tablesize +
+ srp_dev->max_pages_per_mr - 1) /
+ srp_dev->max_pages_per_mr;
+ }
pr_debug("max_sectors = %u; max_pages_per_mr = %u; mr_page_size = %u; max_sectors_per_mr = %u; mr_per_cmd = %u\n",
- target->scsi_host->max_sectors,
- srp_dev->max_pages_per_mr, srp_dev->mr_page_size,
+ target->scsi_host->max_sectors, srp_dev->max_pages_per_mr, srp_dev->mr_page_size,
max_sectors_per_mr, mr_per_cmd);
}
@@ -3871,12 +3863,10 @@ static ssize_t srp_create_target(struct device *dev,
num_online_nodes());
const int ch_end = ((node_idx + 1) * target->ch_count /
num_online_nodes());
- const int cv_start = (node_idx * ibdev->num_comp_vectors /
- num_online_nodes() + target->comp_vector)
- % ibdev->num_comp_vectors;
- const int cv_end = ((node_idx + 1) * ibdev->num_comp_vectors /
- num_online_nodes() + target->comp_vector)
- % ibdev->num_comp_vectors;
+ const int cv_start = node_idx * ibdev->num_comp_vectors /
+ num_online_nodes();
+ const int cv_end = (node_idx + 1) * ibdev->num_comp_vectors /
+ num_online_nodes();
int cpu_idx = 0;
for_each_online_cpu(cpu) {
@@ -3907,8 +3897,8 @@ static ssize_t srp_create_target(struct device *dev,
char dst[64];
if (target->using_rdma_cm)
- inet_ntop(&target->rdma_cm.dst, dst,
- sizeof(dst));
+ snprintf(dst, sizeof(dst), "%pIS",
+ &target->rdma_cm.dst);
else
snprintf(dst, sizeof(dst), "%pI6",
target->ib_cm.orig_dgid.raw);
@@ -3941,14 +3931,11 @@ connected:
if (target->state != SRP_TARGET_REMOVED) {
if (target->using_rdma_cm) {
- char dst[64];
-
- inet_ntop(&target->rdma_cm.dst, dst, sizeof(dst));
shost_printk(KERN_DEBUG, target->scsi_host, PFX
- "new target: id_ext %016llx ioc_guid %016llx sgid %pI6 dest %s\n",
+ "new target: id_ext %016llx ioc_guid %016llx sgid %pI6 dest %pIS\n",
be64_to_cpu(target->id_ext),
be64_to_cpu(target->ioc_guid),
- target->sgid.raw, dst);
+ target->sgid.raw, &target->rdma_cm.dst);
} else {
shost_printk(KERN_DEBUG, target->scsi_host, PFX
"new target: id_ext %016llx ioc_guid %016llx pkey %04x service_id %016llx sgid %pI6 dgid %pI6\n",
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index 0373b7c..dfec0e1 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -41,6 +41,7 @@
#include <linux/string.h>
#include <linux/delay.h>
#include <linux/atomic.h>
+#include <linux/inet.h>
#include <rdma/ib_cache.h>
#include <scsi/scsi_proto.h>
#include <scsi/scsi_tcq.h>
@@ -92,6 +93,11 @@ MODULE_PARM_DESC(srpt_service_guid,
" instead of using the node_guid of the first HCA.");
static struct ib_client srpt_client;
+/* Protects both rdma_cm_port and rdma_cm_id. */
+static DEFINE_MUTEX(rdma_cm_mutex);
+/* Port number RDMA/CM will bind to. */
+static u16 rdma_cm_port;
+static struct rdma_cm_id *rdma_cm_id;
static void srpt_release_cmd(struct se_cmd *se_cmd);
static void srpt_free_ch(struct kref *kref);
static int srpt_queue_status(struct se_cmd *cmd);
@@ -220,7 +226,10 @@ static void srpt_qp_event(struct ib_event *event, struct srpt_rdma_ch *ch)
switch (event->event) {
case IB_EVENT_COMM_EST:
- ib_cm_notify(ch->ib_cm.cm_id, event->event);
+ if (ch->using_rdma_cm)
+ rdma_notify(ch->rdma_cm.cm_id, event->event);
+ else
+ ib_cm_notify(ch->ib_cm.cm_id, event->event);
break;
case IB_EVENT_QP_LAST_WQE_REACHED:
pr_debug("%s-%d, state %s: received Last WQE event.\n",
@@ -838,16 +847,20 @@ static int srpt_post_recv(struct srpt_device *sdev, struct srpt_rdma_ch *ch,
*/
static int srpt_zerolength_write(struct srpt_rdma_ch *ch)
{
- struct ib_send_wr wr, *bad_wr;
+ struct ib_send_wr *bad_wr;
+ struct ib_rdma_wr wr = {
+ .wr = {
+ .next = NULL,
+ { .wr_cqe = &ch->zw_cqe, },
+ .opcode = IB_WR_RDMA_WRITE,
+ .send_flags = IB_SEND_SIGNALED,
+ }
+ };
pr_debug("%s-%d: queued zerolength write\n", ch->sess_name,
ch->qp->qp_num);
- memset(&wr, 0, sizeof(wr));
- wr.opcode = IB_WR_RDMA_WRITE;
- wr.wr_cqe = &ch->zw_cqe;
- wr.send_flags = IB_SEND_SIGNALED;
- return ib_post_send(ch->qp, &wr, &bad_wr);
+ return ib_post_send(ch->qp, &wr.wr, &bad_wr);
}
static void srpt_zerolength_write_done(struct ib_cq *cq, struct ib_wc *wc)
@@ -1057,6 +1070,8 @@ static int srpt_init_ch_qp(struct srpt_rdma_ch *ch, struct ib_qp *qp)
struct ib_qp_attr *attr;
int ret;
+ WARN_ON_ONCE(ch->using_rdma_cm);
+
attr = kzalloc(sizeof(*attr), GFP_KERNEL);
if (!attr)
return -ENOMEM;
@@ -1096,6 +1111,8 @@ static int srpt_ch_qp_rtr(struct srpt_rdma_ch *ch, struct ib_qp *qp)
int attr_mask;
int ret;
+ WARN_ON_ONCE(ch->using_rdma_cm);
+
qp_attr.qp_state = IB_QPS_RTR;
ret = ib_cm_init_qp_attr(ch->ib_cm.cm_id, &qp_attr, &attr_mask);
if (ret)
@@ -1746,18 +1763,33 @@ retry:
qp_init->cap.max_recv_sge = qp_init->cap.max_send_sge;
}
- ch->qp = ib_create_qp(sdev->pd, qp_init);
- if (IS_ERR(ch->qp)) {
- ret = PTR_ERR(ch->qp);
- if (ret == -ENOMEM) {
- sq_size /= 2;
- if (sq_size >= MIN_SRPT_SQ_SIZE) {
- ib_destroy_cq(ch->cq);
- goto retry;
- }
+ if (ch->using_rdma_cm) {
+ ret = rdma_create_qp(ch->rdma_cm.cm_id, sdev->pd, qp_init);
+ ch->qp = ch->rdma_cm.cm_id->qp;
+ } else {
+ ch->qp = ib_create_qp(sdev->pd, qp_init);
+ if (!IS_ERR(ch->qp)) {
+ ret = srpt_init_ch_qp(ch, ch->qp);
+ if (ret)
+ ib_destroy_qp(ch->qp);
+ } else {
+ ret = PTR_ERR(ch->qp);
+ }
+ }
+ if (ret) {
+ bool retry = sq_size > MIN_SRPT_SQ_SIZE;
+
+ if (retry) {
+ pr_debug("failed to create queue pair with sq_size = %d (%d) - retrying\n",
+ sq_size, ret);
+ ib_free_cq(ch->cq);
+ sq_size = max(sq_size / 2, MIN_SRPT_SQ_SIZE);
+ goto retry;
+ } else {
+ pr_err("failed to create queue pair with sq_size = %d (%d)\n",
+ sq_size, ret);
+ goto err_destroy_cq;
}
- pr_err("failed to create_qp ret= %d\n", ret);
- goto err_destroy_cq;
}
atomic_set(&ch->sq_wr_avail, qp_init->cap.max_send_wr);
@@ -1766,10 +1798,6 @@ retry:
__func__, ch->cq->cqe, qp_init->cap.max_send_sge,
qp_init->cap.max_send_wr, ch);
- ret = srpt_init_ch_qp(ch, ch->qp);
- if (ret)
- goto err_destroy_qp;
-
if (!sdev->use_srq)
for (i = 0; i < ch->rq_size; i++)
srpt_post_recv(sdev, ch, ch->ioctx_recv_ring[i]);
@@ -1778,9 +1806,8 @@ out:
kfree(qp_init);
return ret;
-err_destroy_qp:
- ib_destroy_qp(ch->qp);
err_destroy_cq:
+ ch->qp = NULL;
ib_free_cq(ch->cq);
goto out;
}
@@ -1849,9 +1876,13 @@ static int srpt_disconnect_ch(struct srpt_rdma_ch *ch)
if (!srpt_set_ch_state(ch, CH_DISCONNECTING))
return -ENOTCONN;
- ret = ib_send_cm_dreq(ch->ib_cm.cm_id, NULL, 0);
- if (ret < 0)
- ret = ib_send_cm_drep(ch->ib_cm.cm_id, NULL, 0);
+ if (ch->using_rdma_cm) {
+ ret = rdma_disconnect(ch->rdma_cm.cm_id);
+ } else {
+ ret = ib_send_cm_dreq(ch->ib_cm.cm_id, NULL, 0);
+ if (ret < 0)
+ ret = ib_send_cm_drep(ch->ib_cm.cm_id, NULL, 0);
+ }
if (ret < 0 && srpt_close_ch(ch))
ret = 0;
@@ -2002,7 +2033,10 @@ static void srpt_release_channel_work(struct work_struct *w)
transport_deregister_session(se_sess);
ch->sess = NULL;
- ib_destroy_cm_id(ch->ib_cm.cm_id);
+ if (ch->using_rdma_cm)
+ rdma_destroy_id(ch->rdma_cm.cm_id);
+ else
+ ib_destroy_cm_id(ch->ib_cm.cm_id);
srpt_destroy_ch_ib(ch);
@@ -2026,26 +2060,33 @@ static void srpt_release_channel_work(struct work_struct *w)
/**
* srpt_cm_req_recv - process the event IB_CM_REQ_RECEIVED
- * @cm_id: IB/CM connection identifier.
- * @port_num: Port through which the IB/CM REQ message was received.
+ * @sdev: HCA through which the login request was received.
+ * @ib_cm_id: IB/CM connection identifier in case of IB/CM.
+ * @rdma_cm_id: RDMA/CM connection identifier in case of RDMA/CM.
+ * @port_num: Port through which the REQ message was received.
* @pkey: P_Key of the incoming connection.
* @req: SRP login request.
- * @src_addr: GID of the port that submitted the login request.
+ * @src_addr: GID (IB/CM) or IP address (RDMA/CM) of the port that submitted
+ * the login request.
*
* Ownership of the cm_id is transferred to the target session if this
- * functions returns zero. Otherwise the caller remains the owner of cm_id.
+ * function returns zero. Otherwise the caller remains the owner of cm_id.
*/
-static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
+static int srpt_cm_req_recv(struct srpt_device *const sdev,
+ struct ib_cm_id *ib_cm_id,
+ struct rdma_cm_id *rdma_cm_id,
u8 port_num, __be16 pkey,
const struct srp_login_req *req,
const char *src_addr)
{
- struct srpt_device *sdev = cm_id->context;
struct srpt_port *sport = &sdev->port[port_num - 1];
struct srpt_nexus *nexus;
struct srp_login_rsp *rsp = NULL;
struct srp_login_rej *rej = NULL;
- struct ib_cm_rep_param *rep_param = NULL;
+ union {
+ struct rdma_conn_param rdma_cm;
+ struct ib_cm_rep_param ib_cm;
+ } *rep_param = NULL;
struct srpt_rdma_ch *ch;
char i_port_id[36];
u32 it_iu_len;
@@ -2115,8 +2156,14 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
ch->zw_cqe.done = srpt_zerolength_write_done;
INIT_WORK(&ch->release_work, srpt_release_channel_work);
ch->sport = sport;
- ch->ib_cm.cm_id = cm_id;
- cm_id->context = ch;
+ if (ib_cm_id) {
+ ch->ib_cm.cm_id = ib_cm_id;
+ ib_cm_id->context = ch;
+ } else {
+ ch->using_rdma_cm = true;
+ ch->rdma_cm.cm_id = rdma_cm_id;
+ rdma_cm_id->context = ch;
+ }
/*
* ch->rq_size should be at least as large as the initiator queue
* depth to avoid that the initiator driver has to report QUEUE_FULL
@@ -2227,7 +2274,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
mutex_unlock(&sport->mutex);
- ret = srpt_ch_qp_rtr(ch, ch->qp);
+ ret = ch->using_rdma_cm ? 0 : srpt_ch_qp_rtr(ch, ch->qp);
if (ret) {
rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
pr_err("rejected SRP_LOGIN_REQ because enabling RTR failed (error code = %d)\n",
@@ -2251,25 +2298,38 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
atomic_set(&ch->req_lim_delta, 0);
/* create cm reply */
- rep_param->qp_num = ch->qp->qp_num;
- rep_param->private_data = (void *)rsp;
- rep_param->private_data_len = sizeof(*rsp);
- rep_param->rnr_retry_count = 7;
- rep_param->flow_control = 1;
- rep_param->failover_accepted = 0;
- rep_param->srq = 1;
- rep_param->responder_resources = 4;
- rep_param->initiator_depth = 4;
+ if (ch->using_rdma_cm) {
+ rep_param->rdma_cm.private_data = (void *)rsp;
+ rep_param->rdma_cm.private_data_len = sizeof(*rsp);
+ rep_param->rdma_cm.rnr_retry_count = 7;
+ rep_param->rdma_cm.flow_control = 1;
+ rep_param->rdma_cm.responder_resources = 4;
+ rep_param->rdma_cm.initiator_depth = 4;
+ } else {
+ rep_param->ib_cm.qp_num = ch->qp->qp_num;
+ rep_param->ib_cm.private_data = (void *)rsp;
+ rep_param->ib_cm.private_data_len = sizeof(*rsp);
+ rep_param->ib_cm.rnr_retry_count = 7;
+ rep_param->ib_cm.flow_control = 1;
+ rep_param->ib_cm.failover_accepted = 0;
+ rep_param->ib_cm.srq = 1;
+ rep_param->ib_cm.responder_resources = 4;
+ rep_param->ib_cm.initiator_depth = 4;
+ }
/*
* Hold the sport mutex while accepting a connection to avoid that
* srpt_disconnect_ch() is invoked concurrently with this code.
*/
mutex_lock(&sport->mutex);
- if (sport->enabled && ch->state == CH_CONNECTING)
- ret = ib_send_cm_rep(cm_id, rep_param);
- else
+ if (sport->enabled && ch->state == CH_CONNECTING) {
+ if (ch->using_rdma_cm)
+ ret = rdma_accept(rdma_cm_id, &rep_param->rdma_cm);
+ else
+ ret = ib_send_cm_rep(ib_cm_id, &rep_param->ib_cm);
+ } else {
ret = -EINVAL;
+ }
mutex_unlock(&sport->mutex);
switch (ret) {
@@ -2299,7 +2359,8 @@ free_ring:
ch->sport->sdev, ch->rq_size,
ch->max_rsp_size, DMA_TO_DEVICE);
free_ch:
- cm_id->context = NULL;
+ if (ib_cm_id)
+ ib_cm_id->context = NULL;
kfree(ch);
ch = NULL;
@@ -2312,8 +2373,11 @@ reject:
rej->buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT |
SRP_BUF_FORMAT_INDIRECT);
- ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0,
- (void *)rej, sizeof(*rej));
+ if (rdma_cm_id)
+ rdma_reject(rdma_cm_id, rej, sizeof(*rej));
+ else
+ ib_send_cm_rej(ib_cm_id, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0,
+ rej, sizeof(*rej));
out:
kfree(rep_param);
@@ -2332,10 +2396,44 @@ static int srpt_ib_cm_req_recv(struct ib_cm_id *cm_id,
srpt_format_guid(sguid, sizeof(sguid),
&param->primary_path->dgid.global.interface_id);
- return srpt_cm_req_recv(cm_id, param->port, param->primary_path->pkey,
+ return srpt_cm_req_recv(cm_id->context, cm_id, NULL, param->port,
+ param->primary_path->pkey,
private_data, sguid);
}
+static int srpt_rdma_cm_req_recv(struct rdma_cm_id *cm_id,
+ struct rdma_cm_event *event)
+{
+ struct srpt_device *sdev;
+ struct srp_login_req req;
+ const struct srp_login_req_rdma *req_rdma;
+ char src_addr[40];
+
+ sdev = ib_get_client_data(cm_id->device, &srpt_client);
+ if (!sdev)
+ return -ECONNREFUSED;
+
+ if (event->param.conn.private_data_len < sizeof(*req_rdma))
+ return -EINVAL;
+
+ /* Transform srp_login_req_rdma into srp_login_req. */
+ req_rdma = event->param.conn.private_data;
+ memset(&req, 0, sizeof(req));
+ req.opcode = req_rdma->opcode;
+ req.tag = req_rdma->tag;
+ req.req_it_iu_len = req_rdma->req_it_iu_len;
+ req.req_buf_fmt = req_rdma->req_buf_fmt;
+ req.req_flags = req_rdma->req_flags;
+ memcpy(req.initiator_port_id, req_rdma->initiator_port_id, 16);
+ memcpy(req.target_port_id, req_rdma->target_port_id, 16);
+
+ snprintf(src_addr, sizeof(src_addr), "%pIS",
+ &cm_id->route.addr.src_addr);
+
+ return srpt_cm_req_recv(sdev, NULL, cm_id, cm_id->port_num,
+ cm_id->route.path_rec->pkey, &req, src_addr);
+}
+
static void srpt_cm_rej_recv(struct srpt_rdma_ch *ch,
enum ib_cm_rej_reason reason,
const u8 *private_data,
@@ -2359,14 +2457,14 @@ static void srpt_cm_rej_recv(struct srpt_rdma_ch *ch,
* srpt_cm_rtu_recv - process an IB_CM_RTU_RECEIVED or USER_ESTABLISHED event
* @ch: SRPT RDMA channel.
*
- * An IB_CM_RTU_RECEIVED message indicates that the connection is established
- * and that the recipient may begin transmitting (RTU = ready to use).
+ * An RTU (ready to use) message indicates that the connection has been
+ * established and that the recipient may begin transmitting.
*/
static void srpt_cm_rtu_recv(struct srpt_rdma_ch *ch)
{
int ret;
- ret = srpt_ch_qp_rts(ch, ch->qp);
+ ret = ch->using_rdma_cm ? 0 : srpt_ch_qp_rts(ch, ch->qp);
if (ret < 0) {
pr_err("%s-%d: QP transition to RTS failed\n", ch->sess_name,
ch->qp->qp_num);
@@ -2453,6 +2551,49 @@ static int srpt_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
return ret;
}
+static int srpt_rdma_cm_handler(struct rdma_cm_id *cm_id,
+ struct rdma_cm_event *event)
+{
+ struct srpt_rdma_ch *ch = cm_id->context;
+ int ret = 0;
+
+ switch (event->event) {
+ case RDMA_CM_EVENT_CONNECT_REQUEST:
+ ret = srpt_rdma_cm_req_recv(cm_id, event);
+ break;
+ case RDMA_CM_EVENT_REJECTED:
+ srpt_cm_rej_recv(ch, event->status,
+ event->param.conn.private_data,
+ event->param.conn.private_data_len);
+ break;
+ case RDMA_CM_EVENT_ESTABLISHED:
+ srpt_cm_rtu_recv(ch);
+ break;
+ case RDMA_CM_EVENT_DISCONNECTED:
+ if (ch->state < CH_DISCONNECTING)
+ srpt_disconnect_ch(ch);
+ else
+ srpt_close_ch(ch);
+ break;
+ case RDMA_CM_EVENT_TIMEWAIT_EXIT:
+ srpt_close_ch(ch);
+ break;
+ case RDMA_CM_EVENT_UNREACHABLE:
+ pr_info("Received CM REP error for ch %s-%d.\n", ch->sess_name,
+ ch->qp->qp_num);
+ break;
+ case RDMA_CM_EVENT_DEVICE_REMOVAL:
+ case RDMA_CM_EVENT_ADDR_CHANGE:
+ break;
+ default:
+ pr_err("received unrecognized RDMA CM event %d\n",
+ event->event);
+ break;
+ }
+
+ return ret;
+}
+
static int srpt_write_pending_status(struct se_cmd *se_cmd)
{
struct srpt_send_ioctx *ioctx;
@@ -2824,7 +2965,7 @@ static void srpt_add_one(struct ib_device *device)
{
struct srpt_device *sdev;
struct srpt_port *sport;
- int i;
+ int i, ret;
pr_debug("device = %p\n", device);
@@ -2848,9 +2989,15 @@ static void srpt_add_one(struct ib_device *device)
if (!srpt_service_guid)
srpt_service_guid = be64_to_cpu(device->node_guid);
- sdev->cm_id = ib_create_cm_id(device, srpt_cm_handler, sdev);
- if (IS_ERR(sdev->cm_id))
- goto err_ring;
+ if (rdma_port_get_link_layer(device, 1) == IB_LINK_LAYER_INFINIBAND)
+ sdev->cm_id = ib_create_cm_id(device, srpt_cm_handler, sdev);
+ if (IS_ERR(sdev->cm_id)) {
+ pr_info("ib_create_cm_id() failed: %ld\n",
+ PTR_ERR(sdev->cm_id));
+ sdev->cm_id = NULL;
+ if (!rdma_cm_id)
+ goto err_ring;
+ }
/* print out target login information */
pr_debug("Target login info: id_ext=%016llx,ioc_guid=%016llx,"
@@ -2863,8 +3010,14 @@ static void srpt_add_one(struct ib_device *device)
* in the system as service_id; therefore, the target_id will change
* if this HCA is gone bad and replaced by different HCA
*/
- if (ib_cm_listen(sdev->cm_id, cpu_to_be64(srpt_service_guid), 0))
+ ret = sdev->cm_id ?
+ ib_cm_listen(sdev->cm_id, cpu_to_be64(srpt_service_guid), 0) :
+ 0;
+ if (ret < 0) {
+ pr_err("ib_cm_listen() failed: %d (cm_id state = %d)\n", ret,
+ sdev->cm_id->state);
goto err_cm;
+ }
INIT_IB_EVENT_HANDLER(&sdev->event_handler, sdev->device,
srpt_event_handler);
@@ -2904,7 +3057,8 @@ out:
err_event:
ib_unregister_event_handler(&sdev->event_handler);
err_cm:
- ib_destroy_cm_id(sdev->cm_id);
+ if (sdev->cm_id)
+ ib_destroy_cm_id(sdev->cm_id);
err_ring:
srpt_free_srq(sdev);
ib_dealloc_pd(sdev->pd);
@@ -2939,7 +3093,10 @@ static void srpt_remove_one(struct ib_device *device, void *client_data)
for (i = 0; i < sdev->device->phys_port_cnt; i++)
cancel_work_sync(&sdev->port[i].work);
- ib_destroy_cm_id(sdev->cm_id);
+ if (sdev->cm_id)
+ ib_destroy_cm_id(sdev->cm_id);
+
+ ib_set_client_data(device, &srpt_client, NULL);
/*
* Unregistering a target must happen after destroying sdev->cm_id
@@ -3103,18 +3260,26 @@ static int srpt_parse_i_port_id(u8 i_port_id[16], const char *name)
leading_zero_bytes = 16 - count;
memset(i_port_id, 0, leading_zero_bytes);
ret = hex2bin(i_port_id + leading_zero_bytes, p, count);
- if (ret < 0)
- pr_debug("hex2bin failed for srpt_parse_i_port_id: %d\n", ret);
+
out:
return ret;
}
/*
- * configfs callback function invoked for
- * mkdir /sys/kernel/config/target/$driver/$port/$tpg/acls/$i_port_id
+ * configfs callback function invoked for mkdir
+ * /sys/kernel/config/target/$driver/$port/$tpg/acls/$i_port_id
+ *
+ * i_port_id must be an initiator port GUID, GID or IP address. See also the
+ * target_alloc_session() calls in this driver. Examples of valid initiator
+ * port IDs:
+ * 0x0000000000000000505400fffe4a0b7b
+ * 0000000000000000505400fffe4a0b7b
+ * 5054:00ff:fe4a:0b7b
+ * 192.168.122.76
*/
static int srpt_init_nodeacl(struct se_node_acl *se_nacl, const char *name)
{
+ struct sockaddr_storage sa;
u64 guid;
u8 i_port_id[16];
int ret;
@@ -3123,6 +3288,9 @@ static int srpt_init_nodeacl(struct se_node_acl *se_nacl, const char *name)
if (ret < 0)
ret = srpt_parse_i_port_id(i_port_id, name);
if (ret < 0)
+ ret = inet_pton_with_scope(&init_net, AF_UNSPEC, name, NULL,
+ &sa);
+ if (ret < 0)
pr_err("invalid initiator port ID %s\n", name);
return ret;
}
@@ -3296,6 +3464,95 @@ static struct configfs_attribute *srpt_tpg_attrib_attrs[] = {
NULL,
};
+static struct rdma_cm_id *srpt_create_rdma_id(struct sockaddr *listen_addr)
+{
+ struct rdma_cm_id *rdma_cm_id;
+ int ret;
+
+ rdma_cm_id = rdma_create_id(&init_net, srpt_rdma_cm_handler,
+ NULL, RDMA_PS_TCP, IB_QPT_RC);
+ if (IS_ERR(rdma_cm_id)) {
+ pr_err("RDMA/CM ID creation failed: %ld\n",
+ PTR_ERR(rdma_cm_id));
+ goto out;
+ }
+
+ ret = rdma_bind_addr(rdma_cm_id, listen_addr);
+ if (ret) {
+ char addr_str[64];
+
+ snprintf(addr_str, sizeof(addr_str), "%pISp", listen_addr);
+ pr_err("Binding RDMA/CM ID to address %s failed: %d\n",
+ addr_str, ret);
+ rdma_destroy_id(rdma_cm_id);
+ rdma_cm_id = ERR_PTR(ret);
+ goto out;
+ }
+
+ ret = rdma_listen(rdma_cm_id, 128);
+ if (ret) {
+ pr_err("rdma_listen() failed: %d\n", ret);
+ rdma_destroy_id(rdma_cm_id);
+ rdma_cm_id = ERR_PTR(ret);
+ }
+
+out:
+ return rdma_cm_id;
+}
+
+static ssize_t srpt_rdma_cm_port_show(struct config_item *item, char *page)
+{
+ return sprintf(page, "%d\n", rdma_cm_port);
+}
+
+static ssize_t srpt_rdma_cm_port_store(struct config_item *item,
+ const char *page, size_t count)
+{
+ struct sockaddr_in addr4 = { .sin_family = AF_INET };
+ struct sockaddr_in6 addr6 = { .sin6_family = AF_INET6 };
+ struct rdma_cm_id *new_id = NULL;
+ u16 val;
+ int ret;
+
+ ret = kstrtou16(page, 0, &val);
+ if (ret < 0)
+ return ret;
+ ret = count;
+ if (rdma_cm_port == val)
+ goto out;
+
+ if (val) {
+ addr6.sin6_port = cpu_to_be16(val);
+ new_id = srpt_create_rdma_id((struct sockaddr *)&addr6);
+ if (IS_ERR(new_id)) {
+ addr4.sin_port = cpu_to_be16(val);
+ new_id = srpt_create_rdma_id((struct sockaddr *)&addr4);
+ if (IS_ERR(new_id)) {
+ ret = PTR_ERR(new_id);
+ goto out;
+ }
+ }
+ }
+
+ mutex_lock(&rdma_cm_mutex);
+ rdma_cm_port = val;
+ swap(rdma_cm_id, new_id);
+ mutex_unlock(&rdma_cm_mutex);
+
+ if (new_id)
+ rdma_destroy_id(new_id);
+ ret = count;
+out:
+ return ret;
+}
+
+CONFIGFS_ATTR(srpt_, rdma_cm_port);
+
+static struct configfs_attribute *srpt_da_attrs[] = {
+ &srpt_attr_rdma_cm_port,
+ NULL,
+};
+
static ssize_t srpt_tpg_enable_show(struct config_item *item, char *page)
{
struct se_portal_group *se_tpg = to_tpg(item);
@@ -3441,6 +3698,7 @@ static const struct target_core_fabric_ops srpt_template = {
.fabric_drop_tpg = srpt_drop_tpg,
.fabric_init_nodeacl = srpt_init_nodeacl,
+ .tfc_discovery_attrs = srpt_da_attrs,
.tfc_wwn_attrs = srpt_wwn_attrs,
.tfc_tpg_base_attrs = srpt_tpg_attrs,
.tfc_tpg_attrib_attrs = srpt_tpg_attrib_attrs,
@@ -3494,6 +3752,8 @@ out:
static void __exit srpt_cleanup_module(void)
{
+ if (rdma_cm_id)
+ rdma_destroy_id(rdma_cm_id);
ib_unregister_client(&srpt_client);
target_unregister_template(&srpt_template);
}
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h
index 4d9199f..2361483 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.h
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.h
@@ -42,6 +42,7 @@
#include <rdma/ib_verbs.h>
#include <rdma/ib_sa.h>
#include <rdma/ib_cm.h>
+#include <rdma/rdma_cm.h>
#include <rdma/rw.h>
#include <scsi/srp.h>
@@ -261,6 +262,7 @@ enum rdma_ch_state {
* @spinlock: Protects free_list and state.
* @free_list: Head of list with free send I/O contexts.
* @state: channel state. See also enum rdma_ch_state.
+ * @using_rdma_cm: Whether the RDMA/CM or IB/CM is used for this channel.
* @processing_wait_list: Whether or not cmd_wait_list is being processed.
* @ioctx_ring: Send ring.
* @ioctx_recv_ring: Receive I/O context ring.
@@ -280,6 +282,9 @@ struct srpt_rdma_ch {
struct {
struct ib_cm_id *cm_id;
} ib_cm;
+ struct {
+ struct rdma_cm_id *cm_id;
+ } rdma_cm;
};
struct ib_cq *cq;
struct ib_cqe zw_cqe;
@@ -300,9 +305,10 @@ struct srpt_rdma_ch {
struct list_head list;
struct list_head cmd_wait_list;
uint16_t pkey;
+ bool using_rdma_cm;
bool processing_wait_list;
struct se_session *sess;
- u8 sess_name[24];
+ u8 sess_name[40];
struct work_struct release_work;
};
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index 634f603..de6b3d4 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -37,6 +37,7 @@
#include <linux/module.h>
#include <linux/cache.h>
#include <linux/kernel.h>
+#include <uapi/rdma/mlx4-abi.h>
#include "fw.h"
#include "icm.h"
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 5a26851..5af5019 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -46,6 +46,7 @@
#include <linux/etherdevice.h>
#include <net/devlink.h>
+#include <uapi/rdma/mlx4-abi.h>
#include <linux/mlx4/device.h>
#include <linux/mlx4/doorbell.h>
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
index 784e282..db3278c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
@@ -70,7 +70,7 @@ static int mlx5e_create_mkey(struct mlx5_core_dev *mdev, u32 pdn,
return -ENOMEM;
mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
- MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_PA);
+ MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA);
MLX5_SET(mkc, mkc, lw, 1);
MLX5_SET(mkc, mkc, lr, 1);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 0aab3af..d68f510 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -357,7 +357,7 @@ static int mlx5e_create_umr_mkey(struct mlx5_core_dev *mdev,
MLX5_SET(mkc, mkc, umr_en, 1);
MLX5_SET(mkc, mkc, lw, 1);
MLX5_SET(mkc, mkc, lr, 1);
- MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_MTT);
+ MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT);
MLX5_SET(mkc, mkc, qpn, 0xffffff);
MLX5_SET(mkc, mkc, pd, mdev->mlx5e_res.pdn);
@@ -1212,10 +1212,13 @@ static void mlx5e_close_txqsq(struct mlx5e_txqsq *sq)
{
struct mlx5e_channel *c = sq->channel;
struct mlx5_core_dev *mdev = c->mdev;
+ struct mlx5_rate_limit rl = {0};
mlx5e_destroy_sq(mdev, sq->sqn);
- if (sq->rate_limit)
- mlx5_rl_remove_rate(mdev, sq->rate_limit);
+ if (sq->rate_limit) {
+ rl.rate = sq->rate_limit;
+ mlx5_rl_remove_rate(mdev, &rl);
+ }
mlx5e_free_txqsq_descs(sq);
mlx5e_free_txqsq(sq);
}
@@ -1646,6 +1649,7 @@ static int mlx5e_set_sq_maxrate(struct net_device *dev,
struct mlx5e_priv *priv = netdev_priv(dev);
struct mlx5_core_dev *mdev = priv->mdev;
struct mlx5e_modify_sq_param msp = {0};
+ struct mlx5_rate_limit rl = {0};
u16 rl_index = 0;
int err;
@@ -1653,14 +1657,17 @@ static int mlx5e_set_sq_maxrate(struct net_device *dev,
/* nothing to do */
return 0;
- if (sq->rate_limit)
+ if (sq->rate_limit) {
+ rl.rate = sq->rate_limit;
/* remove current rl index to free space to next ones */
- mlx5_rl_remove_rate(mdev, sq->rate_limit);
+ mlx5_rl_remove_rate(mdev, &rl);
+ }
sq->rate_limit = 0;
if (rate) {
- err = mlx5_rl_add_rate(mdev, rate, &rl_index);
+ rl.rate = rate;
+ err = mlx5_rl_add_rate(mdev, &rl_index, &rl);
if (err) {
netdev_err(dev, "Failed configuring rate %u: %d\n",
rate, err);
@@ -1678,7 +1685,7 @@ static int mlx5e_set_sq_maxrate(struct net_device *dev,
rate, err);
/* remove the rate from the table */
if (rate)
- mlx5_rl_remove_rate(mdev, rate);
+ mlx5_rl_remove_rate(mdev, &rl);
return err;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
index e6175f8..de7fe087 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
@@ -232,7 +232,7 @@ static int mlx5_fpga_conn_create_mkey(struct mlx5_core_dev *mdev, u32 pdn,
return -ENOMEM;
mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
- MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_PA);
+ MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA);
MLX5_SET(mkc, mkc, lw, 1);
MLX5_SET(mkc, mkc, lr, 1);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
index 7006697..afd9f4f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
@@ -195,6 +195,12 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev)
if (MLX5_CAP_GEN(dev, qcam_reg))
mlx5_get_qcam_reg(dev);
+ if (MLX5_CAP_GEN(dev, device_memory)) {
+ err = mlx5_core_get_caps(dev, MLX5_CAP_DEV_MEM);
+ if (err)
+ return err;
+ }
+
return 0;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rl.c b/drivers/net/ethernet/mellanox/mlx5/core/rl.c
index d3c33e9..bc86dff 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/rl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/rl.c
@@ -107,16 +107,16 @@ int mlx5_destroy_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
* If the table is full, return NULL
*/
static struct mlx5_rl_entry *find_rl_entry(struct mlx5_rl_table *table,
- u32 rate)
+ struct mlx5_rate_limit *rl)
{
struct mlx5_rl_entry *ret_entry = NULL;
bool empty_found = false;
int i;
for (i = 0; i < table->max_size; i++) {
- if (table->rl_entry[i].rate == rate)
+ if (mlx5_rl_are_equal(&table->rl_entry[i].rl, rl))
return &table->rl_entry[i];
- if (!empty_found && !table->rl_entry[i].rate) {
+ if (!empty_found && !table->rl_entry[i].rl.rate) {
empty_found = true;
ret_entry = &table->rl_entry[i];
}
@@ -126,7 +126,8 @@ static struct mlx5_rl_entry *find_rl_entry(struct mlx5_rl_table *table,
}
static int mlx5_set_pp_rate_limit_cmd(struct mlx5_core_dev *dev,
- u32 rate, u16 index)
+ u16 index,
+ struct mlx5_rate_limit *rl)
{
u32 in[MLX5_ST_SZ_DW(set_pp_rate_limit_in)] = {0};
u32 out[MLX5_ST_SZ_DW(set_pp_rate_limit_out)] = {0};
@@ -134,7 +135,9 @@ static int mlx5_set_pp_rate_limit_cmd(struct mlx5_core_dev *dev,
MLX5_SET(set_pp_rate_limit_in, in, opcode,
MLX5_CMD_OP_SET_PP_RATE_LIMIT);
MLX5_SET(set_pp_rate_limit_in, in, rate_limit_index, index);
- MLX5_SET(set_pp_rate_limit_in, in, rate_limit, rate);
+ MLX5_SET(set_pp_rate_limit_in, in, rate_limit, rl->rate);
+ MLX5_SET(set_pp_rate_limit_in, in, burst_upper_bound, rl->max_burst_sz);
+ MLX5_SET(set_pp_rate_limit_in, in, typical_packet_size, rl->typical_pkt_sz);
return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
}
@@ -146,7 +149,17 @@ bool mlx5_rl_is_in_range(struct mlx5_core_dev *dev, u32 rate)
}
EXPORT_SYMBOL(mlx5_rl_is_in_range);
-int mlx5_rl_add_rate(struct mlx5_core_dev *dev, u32 rate, u16 *index)
+bool mlx5_rl_are_equal(struct mlx5_rate_limit *rl_0,
+ struct mlx5_rate_limit *rl_1)
+{
+ return ((rl_0->rate == rl_1->rate) &&
+ (rl_0->max_burst_sz == rl_1->max_burst_sz) &&
+ (rl_0->typical_pkt_sz == rl_1->typical_pkt_sz));
+}
+EXPORT_SYMBOL(mlx5_rl_are_equal);
+
+int mlx5_rl_add_rate(struct mlx5_core_dev *dev, u16 *index,
+ struct mlx5_rate_limit *rl)
{
struct mlx5_rl_table *table = &dev->priv.rl_table;
struct mlx5_rl_entry *entry;
@@ -154,14 +167,14 @@ int mlx5_rl_add_rate(struct mlx5_core_dev *dev, u32 rate, u16 *index)
mutex_lock(&table->rl_lock);
- if (!rate || !mlx5_rl_is_in_range(dev, rate)) {
+ if (!rl->rate || !mlx5_rl_is_in_range(dev, rl->rate)) {
mlx5_core_err(dev, "Invalid rate: %u, should be %u to %u\n",
- rate, table->min_rate, table->max_rate);
+ rl->rate, table->min_rate, table->max_rate);
err = -EINVAL;
goto out;
}
- entry = find_rl_entry(table, rate);
+ entry = find_rl_entry(table, rl);
if (!entry) {
mlx5_core_err(dev, "Max number of %u rates reached\n",
table->max_size);
@@ -173,13 +186,15 @@ int mlx5_rl_add_rate(struct mlx5_core_dev *dev, u32 rate, u16 *index)
entry->refcount++;
} else {
/* new rate limit */
- err = mlx5_set_pp_rate_limit_cmd(dev, rate, entry->index);
+ err = mlx5_set_pp_rate_limit_cmd(dev, entry->index, rl);
if (err) {
- mlx5_core_err(dev, "Failed configuring rate: %u (%d)\n",
- rate, err);
+ mlx5_core_err(dev, "Failed configuring rate limit(err %d): \
+ rate %u, max_burst_sz %u, typical_pkt_sz %u\n",
+ err, rl->rate, rl->max_burst_sz,
+ rl->typical_pkt_sz);
goto out;
}
- entry->rate = rate;
+ entry->rl = *rl;
entry->refcount = 1;
}
*index = entry->index;
@@ -190,27 +205,30 @@ out:
}
EXPORT_SYMBOL(mlx5_rl_add_rate);
-void mlx5_rl_remove_rate(struct mlx5_core_dev *dev, u32 rate)
+void mlx5_rl_remove_rate(struct mlx5_core_dev *dev, struct mlx5_rate_limit *rl)
{
struct mlx5_rl_table *table = &dev->priv.rl_table;
struct mlx5_rl_entry *entry = NULL;
+ struct mlx5_rate_limit reset_rl = {0};
/* 0 is a reserved value for unlimited rate */
- if (rate == 0)
+ if (rl->rate == 0)
return;
mutex_lock(&table->rl_lock);
- entry = find_rl_entry(table, rate);
+ entry = find_rl_entry(table, rl);
if (!entry || !entry->refcount) {
- mlx5_core_warn(dev, "Rate %u is not configured\n", rate);
+ mlx5_core_warn(dev, "Rate %u, max_burst_sz %u typical_pkt_sz %u \
+ are not configured\n",
+ rl->rate, rl->max_burst_sz, rl->typical_pkt_sz);
goto out;
}
entry->refcount--;
if (!entry->refcount) {
/* need to remove rate */
- mlx5_set_pp_rate_limit_cmd(dev, 0, entry->index);
- entry->rate = 0;
+ mlx5_set_pp_rate_limit_cmd(dev, entry->index, &reset_rl);
+ entry->rl = reset_rl;
}
out:
@@ -257,13 +275,14 @@ int mlx5_init_rl_table(struct mlx5_core_dev *dev)
void mlx5_cleanup_rl_table(struct mlx5_core_dev *dev)
{
struct mlx5_rl_table *table = &dev->priv.rl_table;
+ struct mlx5_rate_limit rl = {0};
int i;
/* Clear all configured rates */
for (i = 0; i < table->max_size; i++)
- if (table->rl_entry[i].rate)
- mlx5_set_pp_rate_limit_cmd(dev, 0,
- table->rl_entry[i].index);
+ if (table->rl_entry[i].rl.rate)
+ mlx5_set_pp_rate_limit_cmd(dev, table->rl_entry[i].index,
+ &rl);
kfree(dev->priv.rl_table.rl_entry);
}
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index a9b5fed..81d0799 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -257,10 +257,6 @@ enum {
};
enum {
- MLX4_USER_DEV_CAP_LARGE_CQE = 1L << 0
-};
-
-enum {
MLX4_FUNC_CAP_64B_EQE_CQE = 1L << 0,
MLX4_FUNC_CAP_EQE_CQE_STRIDE = 1L << 1,
MLX4_FUNC_CAP_DMFS_A0_STATIC = 1L << 2
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 1275859..2bc27f8 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -1017,6 +1017,8 @@ enum mlx5_cap_type {
MLX5_CAP_VECTOR_CALC,
MLX5_CAP_QOS,
MLX5_CAP_DEBUG,
+ MLX5_CAP_RESERVED_14,
+ MLX5_CAP_DEV_MEM,
/* NUM OF CAP Types */
MLX5_CAP_NUM
};
@@ -1168,6 +1170,12 @@ enum mlx5_qcam_feature_groups {
#define MLX5_CAP64_FPGA(mdev, cap) \
MLX5_GET64(fpga_cap, (mdev)->caps.fpga, cap)
+#define MLX5_CAP_DEV_MEM(mdev, cap)\
+ MLX5_GET(device_mem_cap, mdev->caps.hca_cur[MLX5_CAP_DEV_MEM], cap)
+
+#define MLX5_CAP64_DEV_MEM(mdev, cap)\
+ MLX5_GET64(device_mem_cap, mdev->caps.hca_cur[MLX5_CAP_DEV_MEM], cap)
+
enum {
MLX5_CMD_STAT_OK = 0x0,
MLX5_CMD_STAT_INT_ERR = 0x1,
@@ -1211,8 +1219,8 @@ static inline u16 mlx5_to_sw_pkey_sz(int pkey_sz)
return MLX5_MIN_PKEY_TABLE_SIZE << pkey_sz;
}
-#define MLX5_BY_PASS_NUM_REGULAR_PRIOS 8
-#define MLX5_BY_PASS_NUM_DONT_TRAP_PRIOS 8
+#define MLX5_BY_PASS_NUM_REGULAR_PRIOS 16
+#define MLX5_BY_PASS_NUM_DONT_TRAP_PRIOS 16
#define MLX5_BY_PASS_NUM_MULTICAST_PRIOS 1
#define MLX5_BY_PASS_NUM_PRIOS (MLX5_BY_PASS_NUM_REGULAR_PRIOS +\
MLX5_BY_PASS_NUM_DONT_TRAP_PRIOS +\
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index cded85a..767d193 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -591,8 +591,14 @@ struct mlx5_eswitch;
struct mlx5_lag;
struct mlx5_pagefault;
+struct mlx5_rate_limit {
+ u32 rate;
+ u32 max_burst_sz;
+ u16 typical_pkt_sz;
+};
+
struct mlx5_rl_entry {
- u32 rate;
+ struct mlx5_rate_limit rl;
u16 index;
u16 refcount;
};
@@ -1107,9 +1113,12 @@ int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 token,
int mlx5_init_rl_table(struct mlx5_core_dev *dev);
void mlx5_cleanup_rl_table(struct mlx5_core_dev *dev);
-int mlx5_rl_add_rate(struct mlx5_core_dev *dev, u32 rate, u16 *index);
-void mlx5_rl_remove_rate(struct mlx5_core_dev *dev, u32 rate);
+int mlx5_rl_add_rate(struct mlx5_core_dev *dev, u16 *index,
+ struct mlx5_rate_limit *rl);
+void mlx5_rl_remove_rate(struct mlx5_core_dev *dev, struct mlx5_rate_limit *rl);
bool mlx5_rl_is_in_range(struct mlx5_core_dev *dev, u32 rate);
+bool mlx5_rl_are_equal(struct mlx5_rate_limit *rl_0,
+ struct mlx5_rate_limit *rl_1);
int mlx5_alloc_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg,
bool map_wc, bool fast_path);
void mlx5_free_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg);
diff --git a/include/linux/mlx5/fs_helpers.h b/include/linux/mlx5/fs_helpers.h
index 7b476bb..9db21cd 100644
--- a/include/linux/mlx5/fs_helpers.h
+++ b/include/linux/mlx5/fs_helpers.h
@@ -38,6 +38,14 @@
#define MLX5_FS_IPV4_VERSION 4
#define MLX5_FS_IPV6_VERSION 6
+static inline bool mlx5_fs_is_ipsec_flow(const u32 *match_c)
+{
+ void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c,
+ misc_parameters);
+
+ return MLX5_GET(fte_match_set_misc, misc_params_c, outer_esp_spi);
+}
+
static inline bool _mlx5_fs_is_outer_ipproto_flow(const u32 *match_c,
const u32 *match_v, u8 match)
{
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index d25011f..1aad455 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -92,6 +92,8 @@ enum {
MLX5_CMD_OP_DESTROY_MKEY = 0x202,
MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS = 0x203,
MLX5_CMD_OP_PAGE_FAULT_RESUME = 0x204,
+ MLX5_CMD_OP_ALLOC_MEMIC = 0x205,
+ MLX5_CMD_OP_DEALLOC_MEMIC = 0x206,
MLX5_CMD_OP_CREATE_EQ = 0x301,
MLX5_CMD_OP_DESTROY_EQ = 0x302,
MLX5_CMD_OP_QUERY_EQ = 0x303,
@@ -575,7 +577,10 @@ struct mlx5_ifc_qos_cap_bits {
u8 esw_scheduling[0x1];
u8 esw_bw_share[0x1];
u8 esw_rate_limit[0x1];
- u8 reserved_at_4[0x1c];
+ u8 reserved_at_4[0x1];
+ u8 packet_pacing_burst_bound[0x1];
+ u8 packet_pacing_typical_size[0x1];
+ u8 reserved_at_7[0x19];
u8 reserved_at_20[0x20];
@@ -669,6 +674,24 @@ struct mlx5_ifc_roce_cap_bits {
u8 reserved_at_100[0x700];
};
+struct mlx5_ifc_device_mem_cap_bits {
+ u8 memic[0x1];
+ u8 reserved_at_1[0x1f];
+
+ u8 reserved_at_20[0xb];
+ u8 log_min_memic_alloc_size[0x5];
+ u8 reserved_at_30[0x8];
+ u8 log_max_memic_addr_alignment[0x8];
+
+ u8 memic_bar_start_addr[0x40];
+
+ u8 memic_bar_size[0x20];
+
+ u8 max_memic_size[0x20];
+
+ u8 reserved_at_c0[0x740];
+};
+
enum {
MLX5_ATOMIC_CAPS_ATOMIC_SIZE_QP_1_BYTE = 0x0,
MLX5_ATOMIC_CAPS_ATOMIC_SIZE_QP_2_BYTES = 0x2,
@@ -883,7 +906,7 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 ets[0x1];
u8 nic_flow_table[0x1];
u8 eswitch_flow_table[0x1];
- u8 early_vf_enable[0x1];
+ u8 device_memory[0x1];
u8 mcam_reg[0x1];
u8 pcam_reg[0x1];
u8 local_ca_ack_delay[0x5];
@@ -927,7 +950,11 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 reserved_at_202[0x1];
u8 ipoib_enhanced_offloads[0x1];
u8 ipoib_basic_offloads[0x1];
- u8 reserved_at_205[0x5];
+ u8 reserved_at_205[0x1];
+ u8 repeated_block_disabled[0x1];
+ u8 umr_modify_entity_size_disabled[0x1];
+ u8 umr_modify_atomic_disabled[0x1];
+ u8 umr_indirect_mkey_disabled[0x1];
u8 umr_fence[0x2];
u8 reserved_at_20c[0x3];
u8 drain_sigerr[0x1];
@@ -2748,12 +2775,17 @@ enum {
MLX5_MKC_ACCESS_MODE_MTT = 0x1,
MLX5_MKC_ACCESS_MODE_KLMS = 0x2,
MLX5_MKC_ACCESS_MODE_KSM = 0x3,
+ MLX5_MKC_ACCESS_MODE_MEMIC = 0x5,
};
struct mlx5_ifc_mkc_bits {
u8 reserved_at_0[0x1];
u8 free[0x1];
- u8 reserved_at_2[0xd];
+ u8 reserved_at_2[0x1];
+ u8 access_mode_4_2[0x3];
+ u8 reserved_at_6[0x7];
+ u8 relaxed_ordering_write[0x1];
+ u8 reserved_at_e[0x1];
u8 small_fence_on_rdma_read_response[0x1];
u8 umr_en[0x1];
u8 a[0x1];
@@ -2761,7 +2793,7 @@ struct mlx5_ifc_mkc_bits {
u8 rr[0x1];
u8 lw[0x1];
u8 lr[0x1];
- u8 access_mode[0x2];
+ u8 access_mode_1_0[0x2];
u8 reserved_at_18[0x8];
u8 qpn[0x18];
@@ -7397,7 +7429,12 @@ struct mlx5_ifc_set_pp_rate_limit_in_bits {
u8 rate_limit[0x20];
- u8 reserved_at_a0[0x160];
+ u8 burst_upper_bound[0x20];
+
+ u8 reserved_at_c0[0x10];
+ u8 typical_packet_size[0x10];
+
+ u8 reserved_at_e0[0x120];
};
struct mlx5_ifc_access_register_out_bits {
@@ -8951,4 +8988,57 @@ struct mlx5_ifc_destroy_vport_lag_in_bits {
u8 reserved_at_40[0x40];
};
+struct mlx5_ifc_alloc_memic_in_bits {
+ u8 opcode[0x10];
+ u8 reserved_at_10[0x10];
+
+ u8 reserved_at_20[0x10];
+ u8 op_mod[0x10];
+
+ u8 reserved_at_30[0x20];
+
+ u8 reserved_at_40[0x18];
+ u8 log_memic_addr_alignment[0x8];
+
+ u8 range_start_addr[0x40];
+
+ u8 range_size[0x20];
+
+ u8 memic_size[0x20];
+};
+
+struct mlx5_ifc_alloc_memic_out_bits {
+ u8 status[0x8];
+ u8 reserved_at_8[0x18];
+
+ u8 syndrome[0x20];
+
+ u8 memic_start_addr[0x40];
+};
+
+struct mlx5_ifc_dealloc_memic_in_bits {
+ u8 opcode[0x10];
+ u8 reserved_at_10[0x10];
+
+ u8 reserved_at_20[0x10];
+ u8 op_mod[0x10];
+
+ u8 reserved_at_40[0x40];
+
+ u8 memic_start_addr[0x40];
+
+ u8 memic_size[0x20];
+
+ u8 reserved_at_e0[0x20];
+};
+
+struct mlx5_ifc_dealloc_memic_out_bits {
+ u8 status[0x8];
+ u8 reserved_at_8[0x18];
+
+ u8 syndrome[0x20];
+
+ u8 reserved_at_40[0x40];
+};
+
#endif /* MLX5_IFC_H */
diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h
index 415e099..a08cc72 100644
--- a/include/rdma/ib_addr.h
+++ b/include/rdma/ib_addr.h
@@ -119,10 +119,6 @@ int rdma_resolve_ip(struct rdma_addr_client *client,
struct rdma_dev_addr *addr, void *context),
void *context);
-int rdma_resolve_ip_route(struct sockaddr *src_addr,
- const struct sockaddr *dst_addr,
- struct rdma_dev_addr *addr);
-
void rdma_addr_cancel(struct rdma_dev_addr *addr);
void rdma_copy_addr(struct rdma_dev_addr *dev_addr,
@@ -133,11 +129,6 @@ int rdma_addr_size(struct sockaddr *addr);
int rdma_addr_size_in6(struct sockaddr_in6 *addr);
int rdma_addr_size_kss(struct __kernel_sockaddr_storage *addr);
-int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
- const union ib_gid *dgid,
- u8 *dmac, const struct net_device *ndev,
- int *hoplimit);
-
static inline u16 ib_addr_get_pkey(struct rdma_dev_addr *dev_addr)
{
return ((u16)dev_addr->broadcast[8] << 8) | (u16)dev_addr->broadcast[9];
diff --git a/include/rdma/ib_cache.h b/include/rdma/ib_cache.h
index 385ec88..eb49cc8 100644
--- a/include/rdma/ib_cache.h
+++ b/include/rdma/ib_cache.h
@@ -55,20 +55,6 @@ int ib_get_cached_gid(struct ib_device *device,
union ib_gid *gid,
struct ib_gid_attr *attr);
-/**
- * ib_find_cached_gid - Returns the port number and GID table index where
- * a specified GID value occurs.
- * @device: The device to query.
- * @gid: The GID value to search for.
- * @gid_type: The GID type to search for.
- * @ndev: In RoCE, the net device of the device. NULL means ignore.
- * @port_num: The port number of the device where the GID value was found.
- * @index: The index into the cached GID table where the GID was found. This
- * parameter may be NULL.
- *
- * ib_find_cached_gid() searches for the specified GID value in
- * the local software cache.
- */
int ib_find_cached_gid(struct ib_device *device,
const union ib_gid *gid,
enum ib_gid_type gid_type,
@@ -76,21 +62,6 @@ int ib_find_cached_gid(struct ib_device *device,
u8 *port_num,
u16 *index);
-/**
- * ib_find_cached_gid_by_port - Returns the GID table index where a specified
- * GID value occurs
- * @device: The device to query.
- * @gid: The GID value to search for.
- * @gid_type: The GID type to search for.
- * @port_num: The port number of the device where the GID value sould be
- * searched.
- * @ndev: In RoCE, the net device of the device. Null means ignore.
- * @index: The index into the cached GID table where the GID was found. This
- * parameter may be NULL.
- *
- * ib_find_cached_gid() searches for the specified GID value in
- * the local software cache.
- */
int ib_find_cached_gid_by_port(struct ib_device *device,
const union ib_gid *gid,
enum ib_gid_type gid_type,
diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h
index 811cfcfc..bacb144 100644
--- a/include/rdma/ib_sa.h
+++ b/include/rdma/ib_sa.h
@@ -163,7 +163,15 @@ struct sa_path_rec_ib {
u8 raw_traffic;
};
+/**
+ * struct sa_path_rec_roce - RoCE specific portion of the path record entry
+ * @route_resolved: When set, it indicates that this route is already
+ * resolved for this path record entry.
+ * @dmac: Destination mac address for the given DGID entry
+ * of the path record entry.
+ */
struct sa_path_rec_roce {
+ bool route_resolved;
u8 dmac[ETH_ALEN];
/* ignored in IB */
int ifindex;
@@ -590,6 +598,11 @@ static inline bool sa_path_is_roce(struct sa_path_rec *rec)
(rec->rec_type == SA_PATH_REC_TYPE_ROCE_V2));
}
+static inline bool sa_path_is_opa(struct sa_path_rec *rec)
+{
+ return (rec->rec_type == SA_PATH_REC_TYPE_OPA);
+}
+
static inline void sa_path_set_slid(struct sa_path_rec *rec, u32 slid)
{
if (rec->rec_type == SA_PATH_REC_TYPE_IB)
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 6eb1747..9fc8a82 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -64,6 +64,8 @@
#include <linux/cgroup_rdma.h>
#include <uapi/rdma/ib_user_verbs.h>
#include <rdma/restrack.h>
+#include <uapi/rdma/rdma_user_ioctl.h>
+#include <uapi/rdma/ib_user_ioctl_verbs.h>
#define IB_FW_VERSION_NAME_MAX ETHTOOL_FWVERS_LEN
@@ -90,8 +92,11 @@ enum ib_gid_type {
#define ROCE_V2_UDP_DPORT 4791
struct ib_gid_attr {
- enum ib_gid_type gid_type;
struct net_device *ndev;
+ struct ib_device *device;
+ enum ib_gid_type gid_type;
+ u16 index;
+ u8 port_num;
};
enum rdma_node_type {
@@ -316,6 +321,18 @@ struct ib_cq_caps {
u16 max_cq_moderation_period;
};
+struct ib_dm_mr_attr {
+ u64 length;
+ u64 offset;
+ u32 access_flags;
+};
+
+struct ib_dm_alloc_attr {
+ u64 length;
+ u32 alignment;
+ u32 flags;
+};
+
struct ib_device_attr {
u64 fw_ver;
__be64 sys_image_guid;
@@ -367,6 +384,7 @@ struct ib_device_attr {
u32 raw_packet_caps; /* Use ib_raw_packet_caps enum */
struct ib_tm_caps tm_caps;
struct ib_cq_caps cq_caps;
+ u64 max_dm_size;
};
enum ib_mtu {
@@ -469,6 +487,9 @@ enum ib_port_speed {
/**
* struct rdma_hw_stats
+ * @lock - Mutex to protect parallel write access to lifespan and values
+ * of counters, which are 64bits and not guaranteeed to be written
+ * atomicaly on 32bits systems.
* @timestamp - Used by the core code to track when the last update was
* @lifespan - Used by the core code to determine how old the counters
* should be before being updated again. Stored in jiffies, defaults
@@ -484,6 +505,7 @@ enum ib_port_speed {
* filled in by the drivers get_stats routine
*/
struct rdma_hw_stats {
+ struct mutex lock; /* Protect lifespan and values[] */
unsigned long timestamp;
unsigned long lifespan;
const char * const *names;
@@ -1755,6 +1777,14 @@ struct ib_qp {
struct rdma_restrack_entry res;
};
+struct ib_dm {
+ struct ib_device *device;
+ u32 length;
+ u32 flags;
+ struct ib_uobject *uobject;
+ atomic_t usecnt;
+};
+
struct ib_mr {
struct ib_device *device;
struct ib_pd *pd;
@@ -1768,6 +1798,13 @@ struct ib_mr {
struct ib_uobject *uobject; /* user */
struct list_head qp_entry; /* FR */
};
+
+ struct ib_dm *dm;
+
+ /*
+ * Implementation details of the RDMA core, don't use in drivers:
+ */
+ struct rdma_restrack_entry res;
};
struct ib_mw {
@@ -1810,6 +1847,7 @@ enum ib_flow_spec_type {
/* L3 header*/
IB_FLOW_SPEC_IPV4 = 0x30,
IB_FLOW_SPEC_IPV6 = 0x31,
+ IB_FLOW_SPEC_ESP = 0x34,
/* L4 headers*/
IB_FLOW_SPEC_TCP = 0x40,
IB_FLOW_SPEC_UDP = 0x41,
@@ -1818,6 +1856,7 @@ enum ib_flow_spec_type {
/* Actions */
IB_FLOW_SPEC_ACTION_TAG = 0x1000,
IB_FLOW_SPEC_ACTION_DROP = 0x1001,
+ IB_FLOW_SPEC_ACTION_HANDLE = 0x1002,
};
#define IB_FLOW_SPEC_LAYER_MASK 0xF0
#define IB_FLOW_SPEC_SUPPORT_LAYERS 8
@@ -1835,7 +1874,8 @@ enum ib_flow_domain {
enum ib_flow_flags {
IB_FLOW_ATTR_FLAGS_DONT_TRAP = 1UL << 1, /* Continue match, no steal */
- IB_FLOW_ATTR_FLAGS_RESERVED = 1UL << 2 /* Must be last */
+ IB_FLOW_ATTR_FLAGS_EGRESS = 1UL << 2, /* Egress flow */
+ IB_FLOW_ATTR_FLAGS_RESERVED = 1UL << 3 /* Must be last */
};
struct ib_flow_eth_filter {
@@ -1940,6 +1980,20 @@ struct ib_flow_spec_tunnel {
struct ib_flow_tunnel_filter mask;
};
+struct ib_flow_esp_filter {
+ __be32 spi;
+ __be32 seq;
+ /* Must be last */
+ u8 real_sz[0];
+};
+
+struct ib_flow_spec_esp {
+ u32 type;
+ u16 size;
+ struct ib_flow_esp_filter val;
+ struct ib_flow_esp_filter mask;
+};
+
struct ib_flow_spec_action_tag {
enum ib_flow_spec_type type;
u16 size;
@@ -1951,6 +2005,12 @@ struct ib_flow_spec_action_drop {
u16 size;
};
+struct ib_flow_spec_action_handle {
+ enum ib_flow_spec_type type;
+ u16 size;
+ struct ib_flow_action *act;
+};
+
union ib_flow_spec {
struct {
u32 type;
@@ -1962,8 +2022,10 @@ union ib_flow_spec {
struct ib_flow_spec_tcp_udp tcp_udp;
struct ib_flow_spec_ipv6 ipv6;
struct ib_flow_spec_tunnel tunnel;
+ struct ib_flow_spec_esp esp;
struct ib_flow_spec_action_tag flow_tag;
struct ib_flow_spec_action_drop drop;
+ struct ib_flow_spec_action_handle action;
};
struct ib_flow_attr {
@@ -1984,6 +2046,64 @@ struct ib_flow {
struct ib_uobject *uobject;
};
+enum ib_flow_action_type {
+ IB_FLOW_ACTION_UNSPECIFIED,
+ IB_FLOW_ACTION_ESP = 1,
+};
+
+struct ib_flow_action_attrs_esp_keymats {
+ enum ib_uverbs_flow_action_esp_keymat protocol;
+ union {
+ struct ib_uverbs_flow_action_esp_keymat_aes_gcm aes_gcm;
+ } keymat;
+};
+
+struct ib_flow_action_attrs_esp_replays {
+ enum ib_uverbs_flow_action_esp_replay protocol;
+ union {
+ struct ib_uverbs_flow_action_esp_replay_bmp bmp;
+ } replay;
+};
+
+enum ib_flow_action_attrs_esp_flags {
+ /* All user-space flags at the top: Use enum ib_uverbs_flow_action_esp_flags
+ * This is done in order to share the same flags between user-space and
+ * kernel and spare an unnecessary translation.
+ */
+
+ /* Kernel flags */
+ IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED = 1ULL << 32,
+ IB_FLOW_ACTION_ESP_FLAGS_MOD_ESP_ATTRS = 1ULL << 33,
+};
+
+struct ib_flow_spec_list {
+ struct ib_flow_spec_list *next;
+ union ib_flow_spec spec;
+};
+
+struct ib_flow_action_attrs_esp {
+ struct ib_flow_action_attrs_esp_keymats *keymat;
+ struct ib_flow_action_attrs_esp_replays *replay;
+ struct ib_flow_spec_list *encap;
+ /* Used only if IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED is enabled.
+ * Value of 0 is a valid value.
+ */
+ u32 esn;
+ u32 spi;
+ u32 seq;
+ u32 tfc_pad;
+ /* Use enum ib_flow_action_attrs_esp_flags */
+ u64 flags;
+ u64 hard_limit_pkts;
+};
+
+struct ib_flow_action {
+ struct ib_device *device;
+ struct ib_uobject *uobject;
+ enum ib_flow_action_type type;
+ atomic_t usecnt;
+};
+
struct ib_mad_hdr;
struct ib_grh;
@@ -2060,6 +2180,8 @@ struct ib_port_pkey_list {
struct list_head pkey_list;
};
+struct uverbs_attr_bundle;
+
struct ib_device {
/* Do not access @dma_device directly from ULP nor from HW drivers. */
struct device *dma_device;
@@ -2127,37 +2249,36 @@ struct ib_device {
*/
struct net_device *(*get_netdev)(struct ib_device *device,
u8 port_num);
+ /* query_gid should be return GID value for @device, when @port_num
+ * link layer is either IB or iWarp. It is no-op if @port_num port
+ * is RoCE link layer.
+ */
int (*query_gid)(struct ib_device *device,
u8 port_num, int index,
union ib_gid *gid);
- /* When calling add_gid, the HW vendor's driver should
- * add the gid of device @device at gid index @index of
- * port @port_num to be @gid. Meta-info of that gid (for example,
- * the network device related to this gid is available
- * at @attr. @context allows the HW vendor driver to store extra
- * information together with a GID entry. The HW vendor may allocate
- * memory to contain this information and store it in @context when a
- * new GID entry is written to. Params are consistent until the next
- * call of add_gid or delete_gid. The function should return 0 on
+ /* When calling add_gid, the HW vendor's driver should add the gid
+ * of device of port at gid index available at @attr. Meta-info of
+ * that gid (for example, the network device related to this gid) is
+ * available at @attr. @context allows the HW vendor driver to store
+ * extra information together with a GID entry. The HW vendor driver may
+ * allocate memory to contain this information and store it in @context
+ * when a new GID entry is written to. Params are consistent until the
+ * next call of add_gid or delete_gid. The function should return 0 on
* success or error otherwise. The function could be called
- * concurrently for different ports. This function is only called
- * when roce_gid_table is used.
+ * concurrently for different ports. This function is only called when
+ * roce_gid_table is used.
*/
- int (*add_gid)(struct ib_device *device,
- u8 port_num,
- unsigned int index,
- const union ib_gid *gid,
+ int (*add_gid)(const union ib_gid *gid,
const struct ib_gid_attr *attr,
void **context);
/* When calling del_gid, the HW vendor's driver should delete the
- * gid of device @device at gid index @index of port @port_num.
+ * gid of device @device at gid index gid_index of port port_num
+ * available in @attr.
* Upon the deletion of a GID entry, the HW vendor must free any
* allocated memory. The caller will clear @context afterwards.
* This function is only called when roce_gid_table is used.
*/
- int (*del_gid)(struct ib_device *device,
- u8 port_num,
- unsigned int index,
+ int (*del_gid)(const struct ib_gid_attr *attr,
void **context);
int (*query_pkey)(struct ib_device *device,
u8 port_num, u16 index, u16 *pkey);
@@ -2315,6 +2436,21 @@ struct ib_device {
struct ib_rwq_ind_table_init_attr *init_attr,
struct ib_udata *udata);
int (*destroy_rwq_ind_table)(struct ib_rwq_ind_table *wq_ind_table);
+ struct ib_flow_action * (*create_flow_action_esp)(struct ib_device *device,
+ const struct ib_flow_action_attrs_esp *attr,
+ struct uverbs_attr_bundle *attrs);
+ int (*destroy_flow_action)(struct ib_flow_action *action);
+ int (*modify_flow_action_esp)(struct ib_flow_action *action,
+ const struct ib_flow_action_attrs_esp *attr,
+ struct uverbs_attr_bundle *attrs);
+ struct ib_dm * (*alloc_dm)(struct ib_device *device,
+ struct ib_ucontext *context,
+ struct ib_dm_alloc_attr *attr,
+ struct uverbs_attr_bundle *attrs);
+ int (*dealloc_dm)(struct ib_dm *dm);
+ struct ib_mr * (*reg_dm_mr)(struct ib_pd *pd, struct ib_dm *dm,
+ struct ib_dm_mr_attr *attr,
+ struct uverbs_attr_bundle *attrs);
/**
* rdma netdev operation
*
@@ -2376,6 +2512,7 @@ struct ib_device {
int comp_vector);
struct uverbs_root_spec *specs_root;
+ enum rdma_driver_id driver_id;
};
struct ib_client {
@@ -2435,11 +2572,9 @@ static inline int ib_copy_to_udata(struct ib_udata *udata, void *src, size_t len
return copy_to_user(udata->outbuf, src, len) ? -EFAULT : 0;
}
-static inline bool ib_is_udata_cleared(struct ib_udata *udata,
- size_t offset,
- size_t len)
+static inline bool ib_is_buffer_cleared(const void __user *p,
+ size_t len)
{
- const void __user *p = udata->inbuf + offset;
bool ret;
u8 *buf;
@@ -2455,6 +2590,13 @@ static inline bool ib_is_udata_cleared(struct ib_udata *udata,
return ret;
}
+static inline bool ib_is_udata_cleared(struct ib_udata *udata,
+ size_t offset,
+ size_t len)
+{
+ return ib_is_buffer_cleared(udata->inbuf + offset, len);
+}
+
/**
* ib_modify_qp_is_ok - Check that the supplied attribute mask
* contains all required attributes and no attributes not allowed for
@@ -2471,9 +2613,9 @@ static inline bool ib_is_udata_cleared(struct ib_udata *udata,
* transition from cur_state to next_state is allowed by the IB spec,
* and that the attribute mask supplied is allowed for the transition.
*/
-int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
- enum ib_qp_type type, enum ib_qp_attr_mask mask,
- enum rdma_link_layer ll);
+bool ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
+ enum ib_qp_type type, enum ib_qp_attr_mask mask,
+ enum rdma_link_layer ll);
void ib_register_event_handler(struct ib_event_handler *event_handler);
void ib_unregister_event_handler(struct ib_event_handler *event_handler);
@@ -2848,7 +2990,7 @@ int ib_modify_port(struct ib_device *device,
struct ib_port_modify *port_modify);
int ib_find_gid(struct ib_device *device, union ib_gid *gid,
- struct net_device *ndev, u8 *port_num, u16 *index);
+ u8 *port_num, u16 *index);
int ib_find_pkey(struct ib_device *device,
u8 port_num, u16 pkey, u16 *index);
@@ -3217,18 +3359,6 @@ static inline int ib_poll_cq(struct ib_cq *cq, int num_entries,
}
/**
- * ib_peek_cq - Returns the number of unreaped completions currently
- * on the specified CQ.
- * @cq: The CQ to peek.
- * @wc_cnt: A minimum number of unreaped completions to check for.
- *
- * If the number of unreaped completions is greater than or equal to wc_cnt,
- * this function returns wc_cnt, otherwise, it returns the actual number of
- * unreaped completions.
- */
-int ib_peek_cq(struct ib_cq *cq, int wc_cnt);
-
-/**
* ib_req_notify_cq - Request completion notification on a CQ.
* @cq: The CQ to generate an event for.
* @flags:
diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h
index 6538a5c..6909347 100644
--- a/include/rdma/rdma_cm.h
+++ b/include/rdma/rdma_cm.h
@@ -38,6 +38,7 @@
#include <linux/in6.h>
#include <rdma/ib_addr.h>
#include <rdma/ib_sa.h>
+#include <uapi/rdma/rdma_user_cm.h>
/*
* Upon receiving a device removal event, users must destroy the associated
@@ -64,14 +65,6 @@ enum rdma_cm_event_type {
const char *__attribute_const__ rdma_event_msg(enum rdma_cm_event_type event);
-enum rdma_port_space {
- RDMA_PS_SDP = 0x0001,
- RDMA_PS_IPOIB = 0x0002,
- RDMA_PS_IB = 0x013F,
- RDMA_PS_TCP = 0x0106,
- RDMA_PS_UDP = 0x0111,
-};
-
#define RDMA_IB_IP_PS_MASK 0xFFFFFFFFFFFF0000ULL
#define RDMA_IB_IP_PS_TCP 0x0000000001060000ULL
#define RDMA_IB_IP_PS_UDP 0x0000000001110000ULL
@@ -120,20 +113,6 @@ struct rdma_cm_event {
} param;
};
-enum rdma_cm_state {
- RDMA_CM_IDLE,
- RDMA_CM_ADDR_QUERY,
- RDMA_CM_ADDR_RESOLVED,
- RDMA_CM_ROUTE_QUERY,
- RDMA_CM_ROUTE_RESOLVED,
- RDMA_CM_CONNECT,
- RDMA_CM_DISCONNECT,
- RDMA_CM_ADDR_BOUND,
- RDMA_CM_LISTEN,
- RDMA_CM_DEVICE_REMOVAL,
- RDMA_CM_DESTROYING
-};
-
struct rdma_cm_id;
/**
@@ -152,11 +131,17 @@ struct rdma_cm_id {
struct ib_qp *qp;
rdma_cm_event_handler event_handler;
struct rdma_route route;
- enum rdma_port_space ps;
+ enum rdma_ucm_port_space ps;
enum ib_qp_type qp_type;
u8 port_num;
};
+struct rdma_cm_id *__rdma_create_id(struct net *net,
+ rdma_cm_event_handler event_handler,
+ void *context, enum rdma_ucm_port_space ps,
+ enum ib_qp_type qp_type,
+ const char *caller);
+
/**
* rdma_create_id - Create an RDMA identifier.
*
@@ -169,10 +154,9 @@ struct rdma_cm_id {
*
* The id holds a reference on the network namespace until it is destroyed.
*/
-struct rdma_cm_id *rdma_create_id(struct net *net,
- rdma_cm_event_handler event_handler,
- void *context, enum rdma_port_space ps,
- enum ib_qp_type qp_type);
+#define rdma_create_id(net, event_handler, context, ps, qp_type) \
+ __rdma_create_id((net), (event_handler), (context), (ps), (qp_type), \
+ KBUILD_MODNAME)
/**
* rdma_destroy_id - Destroys an RDMA identifier.
@@ -284,6 +268,9 @@ int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param);
*/
int rdma_listen(struct rdma_cm_id *id, int backlog);
+int __rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
+ const char *caller);
+
/**
* rdma_accept - Called to accept a connection request or response.
* @id: Connection identifier associated with the request.
@@ -299,7 +286,8 @@ int rdma_listen(struct rdma_cm_id *id, int backlog);
* state of the qp associated with the id is modified to error, such that any
* previously posted receive buffers would be flushed.
*/
-int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param);
+#define rdma_accept(id, conn_param) \
+ __rdma_accept((id), (conn_param), KBUILD_MODNAME)
/**
* rdma_notify - Notifies the RDMA CM of an asynchronous event that has
diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h
index 4118324..3f4c187 100644
--- a/include/rdma/rdma_vt.h
+++ b/include/rdma/rdma_vt.h
@@ -538,7 +538,7 @@ static inline void rvt_mod_retry_timer(struct rvt_qp *qp)
struct rvt_dev_info *rvt_alloc_device(size_t size, int nports);
void rvt_dealloc_device(struct rvt_dev_info *rdi);
-int rvt_register_device(struct rvt_dev_info *rvd);
+int rvt_register_device(struct rvt_dev_info *rvd, u32 driver_id);
void rvt_unregister_device(struct rvt_dev_info *rvd);
int rvt_check_ah(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr);
int rvt_init_port(struct rvt_dev_info *rdi, struct rvt_ibport *port,
diff --git a/include/rdma/restrack.h b/include/rdma/restrack.h
index 2cdf8dc..f3b3e35 100644
--- a/include/rdma/restrack.h
+++ b/include/rdma/restrack.h
@@ -11,6 +11,7 @@
#include <linux/sched.h>
#include <linux/kref.h>
#include <linux/completion.h>
+#include <linux/sched/task.h>
/**
* enum rdma_restrack_type - HW objects to track
@@ -29,6 +30,14 @@ enum rdma_restrack_type {
*/
RDMA_RESTRACK_QP,
/**
+ * @RDMA_RESTRACK_CM_ID: Connection Manager ID (CM_ID)
+ */
+ RDMA_RESTRACK_CM_ID,
+ /**
+ * @RDMA_RESTRACK_MR: Memory Region (MR)
+ */
+ RDMA_RESTRACK_MR,
+ /**
* @RDMA_RESTRACK_MAX: Last entry, used for array dclarations
*/
RDMA_RESTRACK_MAX
@@ -146,8 +155,23 @@ static inline bool rdma_is_kernel_res(struct rdma_restrack_entry *res)
int __must_check rdma_restrack_get(struct rdma_restrack_entry *res);
/**
- * rdma_restrack_put() - relase resource
+ * rdma_restrack_put() - release resource
* @res: resource entry
*/
int rdma_restrack_put(struct rdma_restrack_entry *res);
+
+/**
+ * rdma_restrack_set_task() - set the task for this resource
+ * @res: resource entry
+ * @task: task struct
+ */
+static inline void rdma_restrack_set_task(struct rdma_restrack_entry *res,
+ struct task_struct *task)
+{
+ if (res->task)
+ put_task_struct(res->task);
+ get_task_struct(task);
+ res->task = task;
+}
+
#endif /* _RDMA_RESTRACK_H_ */
diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h
index 38287d9..4a4201d 100644
--- a/include/rdma/uverbs_ioctl.h
+++ b/include/rdma/uverbs_ioctl.h
@@ -37,6 +37,7 @@
#include <linux/uaccess.h>
#include <rdma/rdma_user_ioctl.h>
#include <rdma/ib_user_ioctl_verbs.h>
+#include <rdma/ib_user_ioctl_cmds.h>
/*
* =======================================
@@ -50,6 +51,7 @@ enum uverbs_attr_type {
UVERBS_ATTR_TYPE_PTR_OUT,
UVERBS_ATTR_TYPE_IDR,
UVERBS_ATTR_TYPE_FD,
+ UVERBS_ATTR_TYPE_ENUM_IN,
};
enum uverbs_obj_access {
@@ -61,15 +63,32 @@ enum uverbs_obj_access {
enum {
UVERBS_ATTR_SPEC_F_MANDATORY = 1U << 0,
- /* Support extending attributes by length */
- UVERBS_ATTR_SPEC_F_MIN_SZ = 1U << 1,
+ /* Support extending attributes by length, validate all unknown size == zero */
+ UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO = 1U << 1,
};
+/* Specification of a single attribute inside the ioctl message */
struct uverbs_attr_spec {
- enum uverbs_attr_type type;
union {
- u16 len;
+ /* Header shared by all following union members - to reduce space. */
struct {
+ enum uverbs_attr_type type;
+ /* Combination of bits from enum UVERBS_ATTR_SPEC_F_XXXX */
+ u8 flags;
+ };
+ struct {
+ enum uverbs_attr_type type;
+ /* Combination of bits from enum UVERBS_ATTR_SPEC_F_XXXX */
+ u8 flags;
+ /* Current known size to kernel */
+ u16 len;
+ /* User isn't allowed to provide something < min_len */
+ u16 min_len;
+ } ptr;
+ struct {
+ enum uverbs_attr_type type;
+ /* Combination of bits from enum UVERBS_ATTR_SPEC_F_XXXX */
+ u8 flags;
/*
* higher bits mean the namespace and lower bits mean
* the type id within the namespace.
@@ -77,9 +96,19 @@ struct uverbs_attr_spec {
u16 obj_type;
u8 access;
} obj;
+ struct {
+ enum uverbs_attr_type type;
+ /* Combination of bits from enum UVERBS_ATTR_SPEC_F_XXXX */
+ u8 flags;
+ u8 num_elems;
+ /*
+ * The enum attribute can select one of the attributes
+ * contained in the ids array. Currently only PTR_IN
+ * attributes are supported in the ids array.
+ */
+ const struct uverbs_attr_spec *ids;
+ } enum_def;
};
- /* Combination of bits from enum UVERBS_ATTR_SPEC_F_XXXX */
- u8 flags;
};
struct uverbs_attr_spec_hash {
@@ -164,30 +193,45 @@ struct uverbs_object_tree_def {
};
#define UA_FLAGS(_flags) .flags = _flags
-#define __UVERBS_ATTR0(_id, _len, _type, ...) \
+#define __UVERBS_ATTR0(_id, _type, _fld, _attr, ...) \
((const struct uverbs_attr_def) \
- {.id = _id, .attr = {.type = _type, {.len = _len}, .flags = 0, } })
-#define __UVERBS_ATTR1(_id, _len, _type, _flags) \
+ {.id = _id, .attr = {{._fld = {.type = _type, _attr, .flags = 0, } }, } })
+#define __UVERBS_ATTR1(_id, _type, _fld, _attr, _extra1, ...) \
((const struct uverbs_attr_def) \
- {.id = _id, .attr = {.type = _type, {.len = _len}, _flags, } })
-#define __UVERBS_ATTR(_id, _len, _type, _flags, _n, ...) \
- __UVERBS_ATTR##_n(_id, _len, _type, _flags)
+ {.id = _id, .attr = {{._fld = {.type = _type, _attr, _extra1 } },} })
+#define __UVERBS_ATTR2(_id, _type, _fld, _attr, _extra1, _extra2) \
+ ((const struct uverbs_attr_def) \
+ {.id = _id, .attr = {{._fld = {.type = _type, _attr, _extra1, _extra2 } },} })
+#define __UVERBS_ATTR(_id, _type, _fld, _attr, _extra1, _extra2, _n, ...) \
+ __UVERBS_ATTR##_n(_id, _type, _fld, _attr, _extra1, _extra2)
+
+#define UVERBS_ATTR_TYPE(_type) \
+ .min_len = sizeof(_type), .len = sizeof(_type)
+#define UVERBS_ATTR_STRUCT(_type, _last) \
+ .min_len = ((uintptr_t)(&((_type *)0)->_last + 1)), .len = sizeof(_type)
+#define UVERBS_ATTR_SIZE(_min_len, _len) \
+ .min_len = _min_len, .len = _len
+
/*
* In new compiler, UVERBS_ATTR could be simplified by declaring it as
* [_id] = {.type = _type, .len = _len, ##__VA_ARGS__}
* But since we support older compilers too, we need the more complex code.
*/
-#define UVERBS_ATTR(_id, _len, _type, ...) \
- __UVERBS_ATTR(_id, _len, _type, ##__VA_ARGS__, 1, 0)
+#define UVERBS_ATTR(_id, _type, _fld, _attr, ...) \
+ __UVERBS_ATTR(_id, _type, _fld, _attr, ##__VA_ARGS__, 2, 1, 0)
#define UVERBS_ATTR_PTR_IN_SZ(_id, _len, ...) \
- UVERBS_ATTR(_id, _len, UVERBS_ATTR_TYPE_PTR_IN, ##__VA_ARGS__)
+ UVERBS_ATTR(_id, UVERBS_ATTR_TYPE_PTR_IN, ptr, _len, ##__VA_ARGS__)
/* If sizeof(_type) <= sizeof(u64), this will be inlined rather than a pointer */
#define UVERBS_ATTR_PTR_IN(_id, _type, ...) \
- UVERBS_ATTR_PTR_IN_SZ(_id, sizeof(_type), ##__VA_ARGS__)
+ UVERBS_ATTR_PTR_IN_SZ(_id, _type, ##__VA_ARGS__)
#define UVERBS_ATTR_PTR_OUT_SZ(_id, _len, ...) \
- UVERBS_ATTR(_id, _len, UVERBS_ATTR_TYPE_PTR_OUT, ##__VA_ARGS__)
+ UVERBS_ATTR(_id, UVERBS_ATTR_TYPE_PTR_OUT, ptr, _len, ##__VA_ARGS__)
#define UVERBS_ATTR_PTR_OUT(_id, _type, ...) \
- UVERBS_ATTR_PTR_OUT_SZ(_id, sizeof(_type), ##__VA_ARGS__)
+ UVERBS_ATTR_PTR_OUT_SZ(_id, _type, ##__VA_ARGS__)
+#define UVERBS_ATTR_ENUM_IN(_id, _enum_arr, ...) \
+ UVERBS_ATTR(_id, UVERBS_ATTR_TYPE_ENUM_IN, enum_def, \
+ .ids = (_enum_arr), \
+ .num_elems = ARRAY_SIZE(_enum_arr), ##__VA_ARGS__)
/*
* In new compiler, UVERBS_ATTR_IDR (and FD) could be simplified by declaring
@@ -202,15 +246,13 @@ struct uverbs_object_tree_def {
#define ___UVERBS_ATTR_OBJ0(_id, _obj_class, _obj_type, _access, ...)\
((const struct uverbs_attr_def) \
{.id = _id, \
- .attr = {.type = _obj_class, \
- {.obj = {.obj_type = _obj_type, .access = _access } },\
- .flags = 0} })
+ .attr = { {.obj = {.type = _obj_class, .obj_type = _obj_type, \
+ .access = _access, .flags = 0 } }, } })
#define ___UVERBS_ATTR_OBJ1(_id, _obj_class, _obj_type, _access, _flags)\
((const struct uverbs_attr_def) \
{.id = _id, \
- .attr = {.type = _obj_class, \
- {.obj = {.obj_type = _obj_type, .access = _access} }, \
- _flags} })
+ .attr = { {.obj = {.type = _obj_class, .obj_type = _obj_type, \
+ .access = _access, _flags} }, } })
#define ___UVERBS_ATTR_OBJ(_id, _obj_class, _obj_type, _access, _flags, \
_n, ...) \
___UVERBS_ATTR_OBJ##_n(_id, _obj_class, _obj_type, _access, _flags)
@@ -229,6 +271,11 @@ struct uverbs_object_tree_def {
#define DECLARE_UVERBS_ATTR_SPEC(_name, ...) \
const struct uverbs_attr_def _name = __VA_ARGS__
+#define DECLARE_UVERBS_ENUM(_name, ...) \
+ const struct uverbs_enum_spec _name = { \
+ .len = ARRAY_SIZE(((struct uverbs_attr_spec[]){__VA_ARGS__})),\
+ .ids = {__VA_ARGS__}, \
+ }
#define _UVERBS_METHOD_ATTRS_SZ(...) \
(sizeof((const struct uverbs_attr_def * const []){__VA_ARGS__}) /\
sizeof(const struct uverbs_attr_def *))
@@ -280,6 +327,7 @@ struct uverbs_ptr_attr {
u16 len;
/* Combination of bits from enum UVERBS_ATTR_F_XXXX */
u16 flags;
+ u8 enum_id;
};
struct uverbs_obj_attr {
@@ -336,6 +384,8 @@ static inline bool uverbs_attr_is_valid(const struct uverbs_attr_bundle *attrs_b
idx & ~UVERBS_ID_NS_MASK);
}
+#define IS_UVERBS_COPY_ERR(_ret) ((_ret) && (_ret) != -ENOENT)
+
static inline const struct uverbs_attr *uverbs_attr_get(const struct uverbs_attr_bundle *attrs_bundle,
u16 idx)
{
@@ -347,6 +397,29 @@ static inline const struct uverbs_attr *uverbs_attr_get(const struct uverbs_attr
return &attrs_bundle->hash[idx_bucket].attrs[idx & ~UVERBS_ID_NS_MASK];
}
+static inline int uverbs_attr_get_enum_id(const struct uverbs_attr_bundle *attrs_bundle,
+ u16 idx)
+{
+ const struct uverbs_attr *attr = uverbs_attr_get(attrs_bundle, idx);
+
+ if (IS_ERR(attr))
+ return PTR_ERR(attr);
+
+ return attr->ptr_attr.enum_id;
+}
+
+static inline void *uverbs_attr_get_obj(const struct uverbs_attr_bundle *attrs_bundle,
+ u16 idx)
+{
+ struct ib_uobject *uobj =
+ uverbs_attr_get(attrs_bundle, idx)->obj_attr.uobject;
+
+ if (IS_ERR(uobj))
+ return uobj;
+
+ return uobj->object;
+}
+
static inline int uverbs_copy_to(const struct uverbs_attr_bundle *attrs_bundle,
size_t idx, const void *from, size_t size)
{
@@ -385,8 +458,8 @@ static inline int _uverbs_copy_from(void *to,
/*
* Validation ensures attr->ptr_attr.len >= size. If the caller is
- * using UVERBS_ATTR_SPEC_F_MIN_SZ then it must call copy_from with
- * the right size.
+ * using UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO then it must call
+ * uverbs_copy_from_or_zero.
*/
if (unlikely(size < attr->ptr_attr.len))
return -EINVAL;
@@ -400,9 +473,37 @@ static inline int _uverbs_copy_from(void *to,
return 0;
}
+static inline int _uverbs_copy_from_or_zero(void *to,
+ const struct uverbs_attr_bundle *attrs_bundle,
+ size_t idx,
+ size_t size)
+{
+ const struct uverbs_attr *attr = uverbs_attr_get(attrs_bundle, idx);
+ size_t min_size;
+
+ if (IS_ERR(attr))
+ return PTR_ERR(attr);
+
+ min_size = min_t(size_t, size, attr->ptr_attr.len);
+
+ if (uverbs_attr_ptr_is_inline(attr))
+ memcpy(to, &attr->ptr_attr.data, min_size);
+ else if (copy_from_user(to, u64_to_user_ptr(attr->ptr_attr.data),
+ min_size))
+ return -EFAULT;
+
+ if (size > min_size)
+ memset(to + min_size, 0, size - min_size);
+
+ return 0;
+}
+
#define uverbs_copy_from(to, attrs_bundle, idx) \
_uverbs_copy_from(to, attrs_bundle, idx, sizeof(*to))
+#define uverbs_copy_from_or_zero(to, attrs_bundle, idx) \
+ _uverbs_copy_from_or_zero(to, attrs_bundle, idx, sizeof(*to))
+
/* =================================================
* Definitions -> Specs infrastructure
* =================================================
diff --git a/include/rdma/uverbs_named_ioctl.h b/include/rdma/uverbs_named_ioctl.h
new file mode 100644
index 0000000..c5bb4eb
--- /dev/null
+++ b/include/rdma/uverbs_named_ioctl.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _UVERBS_NAMED_IOCTL_
+#define _UVERBS_NAMED_IOCTL_
+
+#include <rdma/uverbs_ioctl.h>
+
+#ifndef UVERBS_MODULE_NAME
+#error "Please #define UVERBS_MODULE_NAME before including rdma/uverbs_named_ioctl.h"
+#endif
+
+#define _UVERBS_PASTE(x, y) x ## y
+#define _UVERBS_NAME(x, y) _UVERBS_PASTE(x, y)
+#define UVERBS_METHOD(id) _UVERBS_NAME(UVERBS_MODULE_NAME, _method_##id)
+#define UVERBS_HANDLER(id) _UVERBS_NAME(UVERBS_MODULE_NAME, _handler_##id)
+
+#define DECLARE_UVERBS_NAMED_METHOD(id, ...) \
+ DECLARE_UVERBS_METHOD(UVERBS_METHOD(id), id, UVERBS_HANDLER(id), ##__VA_ARGS__)
+
+#define DECLARE_UVERBS_NAMED_METHOD_WITH_HANDLER(id, handler, ...) \
+ DECLARE_UVERBS_METHOD(UVERBS_METHOD(id), id, handler, ##__VA_ARGS__)
+
+#define DECLARE_UVERBS_NAMED_METHOD_NO_OVERRIDE(id, handler, ...) \
+ DECLARE_UVERBS_METHOD(UVERBS_METHOD(id), id, NULL, ##__VA_ARGS__)
+
+#define DECLARE_UVERBS_NAMED_OBJECT(id, ...) \
+ DECLARE_UVERBS_OBJECT(UVERBS_OBJECT(id), id, ##__VA_ARGS__)
+
+#define _UVERBS_COMP_NAME(x, y, z) _UVERBS_NAME(_UVERBS_NAME(x, y), z)
+
+#define UVERBS_NO_OVERRIDE NULL
+
+/* This declares a parsing tree with one object and one method. This is usually
+ * used for merging driver attributes to the common attributes. The driver has
+ * a chance to override the handler and type attrs of the original object.
+ * The __VA_ARGS__ just contains a list of attributes.
+ */
+#define ADD_UVERBS_ATTRIBUTES(_name, _object, _method, _type_attrs, _handler, ...) \
+static DECLARE_UVERBS_METHOD(_UVERBS_COMP_NAME(UVERBS_MODULE_NAME, \
+ _method_, _name), \
+ _method, _handler, ##__VA_ARGS__); \
+ \
+static DECLARE_UVERBS_OBJECT(_UVERBS_COMP_NAME(UVERBS_MODULE_NAME, \
+ _object_, _name), \
+ _object, _type_attrs, \
+ &_UVERBS_COMP_NAME(UVERBS_MODULE_NAME, \
+ _method_, _name)); \
+ \
+static DECLARE_UVERBS_OBJECT_TREE(_name, \
+ &_UVERBS_COMP_NAME(UVERBS_MODULE_NAME, \
+ _object_, _name))
+
+/* A very common use case is that the driver doesn't override the handler and
+ * type_attrs. Therefore, we provide a simplified macro for this common case.
+ */
+#define ADD_UVERBS_ATTRIBUTES_SIMPLE(_name, _object, _method, ...) \
+ ADD_UVERBS_ATTRIBUTES(_name, _object, _method, UVERBS_NO_OVERRIDE, \
+ UVERBS_NO_OVERRIDE, ##__VA_ARGS__)
+
+#endif
diff --git a/include/rdma/uverbs_std_types.h b/include/rdma/uverbs_std_types.h
index 5f8e20b..9d56cdb 100644
--- a/include/rdma/uverbs_std_types.h
+++ b/include/rdma/uverbs_std_types.h
@@ -37,26 +37,10 @@
#include <rdma/uverbs_ioctl.h>
#include <rdma/ib_user_ioctl_verbs.h>
+#define UVERBS_OBJECT(id) uverbs_object_##id
+
#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
-extern const struct uverbs_object_def uverbs_object_comp_channel;
-extern const struct uverbs_object_def uverbs_object_cq;
-extern const struct uverbs_object_def uverbs_object_qp;
-extern const struct uverbs_object_def uverbs_object_rwq_ind_table;
-extern const struct uverbs_object_def uverbs_object_wq;
-extern const struct uverbs_object_def uverbs_object_srq;
-extern const struct uverbs_object_def uverbs_object_ah;
-extern const struct uverbs_object_def uverbs_object_flow;
-extern const struct uverbs_object_def uverbs_object_mr;
-extern const struct uverbs_object_def uverbs_object_mw;
-extern const struct uverbs_object_def uverbs_object_pd;
-extern const struct uverbs_object_def uverbs_object_xrcd;
-extern const struct uverbs_object_def uverbs_object_device;
-
-extern const struct uverbs_object_tree_def uverbs_default_objects;
-static inline const struct uverbs_object_tree_def *uverbs_default_get_objects(void)
-{
- return &uverbs_default_objects;
-}
+const struct uverbs_object_tree_def *uverbs_default_get_objects(void);
#else
static inline const struct uverbs_object_tree_def *uverbs_default_get_objects(void)
{
@@ -72,22 +56,22 @@ static inline struct ib_uobject *__uobj_get(const struct uverbs_obj_type *type,
return rdma_lookup_get_uobject(type, ucontext, id, write);
}
-#define uobj_get_type(_object) uverbs_object_##_object.type_attrs
+#define uobj_get_type(_object) UVERBS_OBJECT(_object).type_attrs
#define uobj_get_read(_type, _id, _ucontext) \
- __uobj_get(_type, false, _ucontext, _id)
+ __uobj_get(uobj_get_type(_type), false, _ucontext, _id)
-#define uobj_get_obj_read(_object, _id, _ucontext) \
+#define uobj_get_obj_read(_object, _type, _id, _ucontext) \
({ \
struct ib_uobject *__uobj = \
- __uobj_get(uverbs_object_##_object.type_attrs, \
+ __uobj_get(uobj_get_type(_type), \
false, _ucontext, _id); \
\
(struct ib_##_object *)(IS_ERR(__uobj) ? NULL : __uobj->object);\
})
#define uobj_get_write(_type, _id, _ucontext) \
- __uobj_get(_type, true, _ucontext, _id)
+ __uobj_get(uobj_get_type(_type), true, _ucontext, _id)
static inline void uobj_put_read(struct ib_uobject *uobj)
{
@@ -124,7 +108,7 @@ static inline struct ib_uobject *__uobj_alloc(const struct uverbs_obj_type *type
}
#define uobj_alloc(_type, ucontext) \
- __uobj_alloc(_type, ucontext)
+ __uobj_alloc(uobj_get_type(_type), ucontext)
#endif
diff --git a/include/uapi/rdma/bnxt_re-abi.h b/include/uapi/rdma/bnxt_re-abi.h
index db54115..a7a6111 100644
--- a/include/uapi/rdma/bnxt_re-abi.h
+++ b/include/uapi/rdma/bnxt_re-abi.h
@@ -53,15 +53,20 @@ struct bnxt_re_uctx_resp {
__u32 rsvd;
};
+/*
+ * This struct is placed after the ib_uverbs_alloc_pd_resp struct, which is
+ * not 8 byted aligned. To avoid undesired padding in various cases we have to
+ * set this struct to packed.
+ */
struct bnxt_re_pd_resp {
__u32 pdid;
__u32 dpi;
__u64 dbr;
-};
+} __attribute__((packed, aligned(4)));
struct bnxt_re_cq_req {
- __u64 cq_va;
- __u64 cq_handle;
+ __aligned_u64 cq_va;
+ __aligned_u64 cq_handle;
};
struct bnxt_re_cq_resp {
@@ -72,9 +77,9 @@ struct bnxt_re_cq_resp {
};
struct bnxt_re_qp_req {
- __u64 qpsva;
- __u64 qprva;
- __u64 qp_handle;
+ __aligned_u64 qpsva;
+ __aligned_u64 qprva;
+ __aligned_u64 qp_handle;
};
struct bnxt_re_qp_resp {
@@ -83,8 +88,8 @@ struct bnxt_re_qp_resp {
};
struct bnxt_re_srq_req {
- __u64 srqva;
- __u64 srq_handle;
+ __aligned_u64 srqva;
+ __aligned_u64 srq_handle;
};
struct bnxt_re_srq_resp {
diff --git a/include/uapi/rdma/cxgb3-abi.h b/include/uapi/rdma/cxgb3-abi.h
index d5745e4..9acb4b7 100644
--- a/include/uapi/rdma/cxgb3-abi.h
+++ b/include/uapi/rdma/cxgb3-abi.h
@@ -41,21 +41,21 @@
* Make sure that all structs defined in this file remain laid out so
* that they pack the same way on 32-bit and 64-bit architectures (to
* avoid incompatibility between 32-bit userspace and 64-bit kernels).
- * In particular do not use pointer types -- pass pointers in __u64
+ * In particular do not use pointer types -- pass pointers in __aligned_u64
* instead.
*/
struct iwch_create_cq_req {
- __u64 user_rptr_addr;
+ __aligned_u64 user_rptr_addr;
};
struct iwch_create_cq_resp_v0 {
- __u64 key;
+ __aligned_u64 key;
__u32 cqid;
__u32 size_log2;
};
struct iwch_create_cq_resp {
- __u64 key;
+ __aligned_u64 key;
__u32 cqid;
__u32 size_log2;
__u32 memsize;
@@ -63,8 +63,8 @@ struct iwch_create_cq_resp {
};
struct iwch_create_qp_resp {
- __u64 key;
- __u64 db_key;
+ __aligned_u64 key;
+ __aligned_u64 db_key;
__u32 qpid;
__u32 size_log2;
__u32 sq_size_log2;
@@ -74,4 +74,9 @@ struct iwch_create_qp_resp {
struct iwch_reg_user_mr_resp {
__u32 pbl_addr;
};
+
+struct iwch_alloc_pd_resp {
+ __u32 pdid;
+};
+
#endif /* CXGB3_ABI_USER_H */
diff --git a/include/uapi/rdma/cxgb4-abi.h b/include/uapi/rdma/cxgb4-abi.h
index 05f71f1..1fefd01 100644
--- a/include/uapi/rdma/cxgb4-abi.h
+++ b/include/uapi/rdma/cxgb4-abi.h
@@ -41,13 +41,13 @@
* Make sure that all structs defined in this file remain laid out so
* that they pack the same way on 32-bit and 64-bit architectures (to
* avoid incompatibility between 32-bit userspace and 64-bit kernels).
- * In particular do not use pointer types -- pass pointers in __u64
+ * In particular do not use pointer types -- pass pointers in __aligned_u64
* instead.
*/
struct c4iw_create_cq_resp {
- __u64 key;
- __u64 gts_key;
- __u64 memsize;
+ __aligned_u64 key;
+ __aligned_u64 gts_key;
+ __aligned_u64 memsize;
__u32 cqid;
__u32 size;
__u32 qid_mask;
@@ -59,13 +59,13 @@ enum {
};
struct c4iw_create_qp_resp {
- __u64 ma_sync_key;
- __u64 sq_key;
- __u64 rq_key;
- __u64 sq_db_gts_key;
- __u64 rq_db_gts_key;
- __u64 sq_memsize;
- __u64 rq_memsize;
+ __aligned_u64 ma_sync_key;
+ __aligned_u64 sq_key;
+ __aligned_u64 rq_key;
+ __aligned_u64 sq_db_gts_key;
+ __aligned_u64 rq_db_gts_key;
+ __aligned_u64 sq_memsize;
+ __aligned_u64 rq_memsize;
__u32 sqid;
__u32 rqid;
__u32 sq_size;
@@ -75,8 +75,13 @@ struct c4iw_create_qp_resp {
};
struct c4iw_alloc_ucontext_resp {
- __u64 status_page_key;
+ __aligned_u64 status_page_key;
__u32 status_page_size;
__u32 reserved; /* explicit padding (optional for i386) */
};
+
+struct c4iw_alloc_pd_resp {
+ __u32 pdid;
+};
+
#endif /* CXGB4_ABI_USER_H */
diff --git a/include/uapi/rdma/hfi/hfi1_ioctl.h b/include/uapi/rdma/hfi/hfi1_ioctl.h
index 9de78c5..8f3d9fe 100644
--- a/include/uapi/rdma/hfi/hfi1_ioctl.h
+++ b/include/uapi/rdma/hfi/hfi1_ioctl.h
@@ -79,7 +79,7 @@ struct hfi1_user_info {
};
struct hfi1_ctxt_info {
- __u64 runtime_flags; /* chip/drv runtime flags (HFI1_CAP_*) */
+ __aligned_u64 runtime_flags; /* chip/drv runtime flags (HFI1_CAP_*) */
__u32 rcvegr_size; /* size of each eager buffer */
__u16 num_active; /* number of active units */
__u16 unit; /* unit (chip) assigned to caller */
@@ -98,9 +98,9 @@ struct hfi1_ctxt_info {
struct hfi1_tid_info {
/* virtual address of first page in transfer */
- __u64 vaddr;
+ __aligned_u64 vaddr;
/* pointer to tid array. this array is big enough */
- __u64 tidlist;
+ __aligned_u64 tidlist;
/* number of tids programmed by this request */
__u32 tidcnt;
/* length of transfer buffer programmed by this request */
@@ -131,23 +131,23 @@ struct hfi1_base_info {
*/
__u32 bthqp;
/* PIO credit return address, */
- __u64 sc_credits_addr;
+ __aligned_u64 sc_credits_addr;
/*
* Base address of write-only pio buffers for this process.
* Each buffer has sendpio_credits*64 bytes.
*/
- __u64 pio_bufbase_sop;
+ __aligned_u64 pio_bufbase_sop;
/*
* Base address of write-only pio buffers for this process.
* Each buffer has sendpio_credits*64 bytes.
*/
- __u64 pio_bufbase;
+ __aligned_u64 pio_bufbase;
/* address where receive buffer queue is mapped into */
- __u64 rcvhdr_bufbase;
+ __aligned_u64 rcvhdr_bufbase;
/* base address of Eager receive buffers. */
- __u64 rcvegr_bufbase;
+ __aligned_u64 rcvegr_bufbase;
/* base address of SDMA completion ring */
- __u64 sdma_comp_bufbase;
+ __aligned_u64 sdma_comp_bufbase;
/*
* User register base for init code, not to be used directly by
* protocol or applications. Always maps real chip register space.
@@ -155,20 +155,20 @@ struct hfi1_base_info {
* ur_rcvhdrhead, ur_rcvhdrtail, ur_rcvegrhead, ur_rcvegrtail,
* ur_rcvtidflow
*/
- __u64 user_regbase;
+ __aligned_u64 user_regbase;
/* notification events */
- __u64 events_bufbase;
+ __aligned_u64 events_bufbase;
/* status page */
- __u64 status_bufbase;
+ __aligned_u64 status_bufbase;
/* rcvhdrtail update */
- __u64 rcvhdrtail_base;
+ __aligned_u64 rcvhdrtail_base;
/*
* shared memory pages for subctxts if ctxt is shared; these cover
* all the processes in the group sharing a single context.
* all have enough space for the num_subcontexts value on this job.
*/
- __u64 subctxt_uregbase;
- __u64 subctxt_rcvegrbuf;
- __u64 subctxt_rcvhdrbuf;
+ __aligned_u64 subctxt_uregbase;
+ __aligned_u64 subctxt_rcvegrbuf;
+ __aligned_u64 subctxt_rcvhdrbuf;
};
#endif /* _LINIUX__HFI1_IOCTL_H */
diff --git a/include/uapi/rdma/hfi/hfi1_user.h b/include/uapi/rdma/hfi/hfi1_user.h
index 791bea2..c6a984c 100644
--- a/include/uapi/rdma/hfi/hfi1_user.h
+++ b/include/uapi/rdma/hfi/hfi1_user.h
@@ -177,8 +177,8 @@ struct hfi1_sdma_comp_entry {
* Device status and notifications from driver to user-space.
*/
struct hfi1_status {
- __u64 dev; /* device/hw status bits */
- __u64 port; /* port state and status bits */
+ __aligned_u64 dev; /* device/hw status bits */
+ __aligned_u64 port; /* port state and status bits */
char freezemsg[0];
};
@@ -219,7 +219,7 @@ struct sdma_req_info {
* in charge of managing its own ring.
*/
__u16 comp_idx;
-} __packed;
+} __attribute__((__packed__));
/*
* SW KDETH header.
@@ -230,7 +230,7 @@ struct hfi1_kdeth_header {
__le16 jkey;
__le16 hcrc;
__le32 swdata[7];
-} __packed;
+} __attribute__((__packed__));
/*
* Structure describing the headers that User space uses. The
@@ -241,7 +241,7 @@ struct hfi1_pkt_header {
__be16 lrh[4];
__be32 bth[3];
struct hfi1_kdeth_header kdeth;
-} __packed;
+} __attribute__((__packed__));
/*
diff --git a/include/uapi/rdma/hns-abi.h b/include/uapi/rdma/hns-abi.h
index a9c03b0..7092c8d 100644
--- a/include/uapi/rdma/hns-abi.h
+++ b/include/uapi/rdma/hns-abi.h
@@ -37,19 +37,35 @@
#include <linux/types.h>
struct hns_roce_ib_create_cq {
- __u64 buf_addr;
+ __aligned_u64 buf_addr;
+ __aligned_u64 db_addr;
+};
+
+struct hns_roce_ib_create_cq_resp {
+ __aligned_u64 cqn; /* Only 32 bits used, 64 for compat */
+ __aligned_u64 cap_flags;
};
struct hns_roce_ib_create_qp {
- __u64 buf_addr;
- __u64 db_addr;
+ __aligned_u64 buf_addr;
+ __aligned_u64 db_addr;
__u8 log_sq_bb_count;
__u8 log_sq_stride;
__u8 sq_no_prefetch;
__u8 reserved[5];
};
+struct hns_roce_ib_create_qp_resp {
+ __aligned_u64 cap_flags;
+};
+
struct hns_roce_ib_alloc_ucontext_resp {
__u32 qp_tab_size;
+ __u32 reserved;
};
+
+struct hns_roce_ib_alloc_pd_resp {
+ __u32 pdn;
+};
+
#endif /* HNS_ABI_USER_H */
diff --git a/drivers/infiniband/hw/i40iw/i40iw_ucontext.h b/include/uapi/rdma/i40iw-abi.h
index 57d3f1d..79890ba 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_ucontext.h
+++ b/include/uapi/rdma/i40iw-abi.h
@@ -34,8 +34,8 @@
*
*/
-#ifndef I40IW_USER_CONTEXT_H
-#define I40IW_USER_CONTEXT_H
+#ifndef I40IW_ABI_H
+#define I40IW_ABI_H
#include <linux/types.h>
@@ -61,17 +61,17 @@ struct i40iw_alloc_pd_resp {
};
struct i40iw_create_cq_req {
- __u64 user_cq_buffer;
- __u64 user_shadow_area;
+ __aligned_u64 user_cq_buffer;
+ __aligned_u64 user_shadow_area;
};
struct i40iw_create_qp_req {
- __u64 user_wqe_buffers;
- __u64 user_compl_ctx;
+ __aligned_u64 user_wqe_buffers;
+ __aligned_u64 user_compl_ctx;
/* UDA QP PHB */
- __u64 user_sq_phb; /* place for VA of the sq phb buff */
- __u64 user_rq_phb; /* place for VA of the rq phb buff */
+ __aligned_u64 user_sq_phb; /* place for VA of the sq phb buff */
+ __aligned_u64 user_rq_phb; /* place for VA of the rq phb buff */
};
enum i40iw_memreg_type {
diff --git a/include/uapi/rdma/ib_user_cm.h b/include/uapi/rdma/ib_user_cm.h
index f4041bd..4a8f956 100644
--- a/include/uapi/rdma/ib_user_cm.h
+++ b/include/uapi/rdma/ib_user_cm.h
@@ -73,8 +73,8 @@ struct ib_ucm_cmd_hdr {
};
struct ib_ucm_create_id {
- __u64 uid;
- __u64 response;
+ __aligned_u64 uid;
+ __aligned_u64 response;
};
struct ib_ucm_create_id_resp {
@@ -82,7 +82,7 @@ struct ib_ucm_create_id_resp {
};
struct ib_ucm_destroy_id {
- __u64 response;
+ __aligned_u64 response;
__u32 id;
__u32 reserved;
};
@@ -92,7 +92,7 @@ struct ib_ucm_destroy_id_resp {
};
struct ib_ucm_attr_id {
- __u64 response;
+ __aligned_u64 response;
__u32 id;
__u32 reserved;
};
@@ -105,7 +105,7 @@ struct ib_ucm_attr_id_resp {
};
struct ib_ucm_init_qp_attr {
- __u64 response;
+ __aligned_u64 response;
__u32 id;
__u32 qp_state;
};
@@ -123,7 +123,7 @@ struct ib_ucm_notify {
};
struct ib_ucm_private_data {
- __u64 data;
+ __aligned_u64 data;
__u32 id;
__u8 len;
__u8 reserved[3];
@@ -135,9 +135,9 @@ struct ib_ucm_req {
__u32 qp_type;
__u32 psn;
__be64 sid;
- __u64 data;
- __u64 primary_path;
- __u64 alternate_path;
+ __aligned_u64 data;
+ __aligned_u64 primary_path;
+ __aligned_u64 alternate_path;
__u8 len;
__u8 peer_to_peer;
__u8 responder_resources;
@@ -153,8 +153,8 @@ struct ib_ucm_req {
};
struct ib_ucm_rep {
- __u64 uid;
- __u64 data;
+ __aligned_u64 uid;
+ __aligned_u64 data;
__u32 id;
__u32 qpn;
__u32 psn;
@@ -172,15 +172,15 @@ struct ib_ucm_rep {
struct ib_ucm_info {
__u32 id;
__u32 status;
- __u64 info;
- __u64 data;
+ __aligned_u64 info;
+ __aligned_u64 data;
__u8 info_len;
__u8 data_len;
__u8 reserved[6];
};
struct ib_ucm_mra {
- __u64 data;
+ __aligned_u64 data;
__u32 id;
__u8 len;
__u8 timeout;
@@ -188,8 +188,8 @@ struct ib_ucm_mra {
};
struct ib_ucm_lap {
- __u64 path;
- __u64 data;
+ __aligned_u64 path;
+ __aligned_u64 data;
__u32 id;
__u8 len;
__u8 reserved[3];
@@ -199,8 +199,8 @@ struct ib_ucm_sidr_req {
__u32 id;
__u32 timeout;
__be64 sid;
- __u64 data;
- __u64 path;
+ __aligned_u64 data;
+ __aligned_u64 path;
__u16 reserved_pkey;
__u8 len;
__u8 max_cm_retries;
@@ -212,8 +212,8 @@ struct ib_ucm_sidr_rep {
__u32 qpn;
__u32 qkey;
__u32 status;
- __u64 info;
- __u64 data;
+ __aligned_u64 info;
+ __aligned_u64 data;
__u8 info_len;
__u8 data_len;
__u8 reserved[6];
@@ -222,9 +222,9 @@ struct ib_ucm_sidr_rep {
* event notification ABI structures.
*/
struct ib_ucm_event_get {
- __u64 response;
- __u64 data;
- __u64 info;
+ __aligned_u64 response;
+ __aligned_u64 data;
+ __aligned_u64 info;
__u8 data_len;
__u8 info_len;
__u8 reserved[6];
@@ -303,7 +303,7 @@ struct ib_ucm_sidr_rep_event_resp {
#define IB_UCM_PRES_ALTERNATE 0x08
struct ib_ucm_event_resp {
- __u64 uid;
+ __aligned_u64 uid;
__u32 id;
__u32 event;
__u32 present;
diff --git a/include/uapi/rdma/ib_user_ioctl_cmds.h b/include/uapi/rdma/ib_user_ioctl_cmds.h
new file mode 100644
index 0000000..83e3890
--- /dev/null
+++ b/include/uapi/rdma/ib_user_ioctl_cmds.h
@@ -0,0 +1,134 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef IB_USER_IOCTL_CMDS_H
+#define IB_USER_IOCTL_CMDS_H
+
+#define UVERBS_ID_NS_MASK 0xF000
+#define UVERBS_ID_NS_SHIFT 12
+
+#define UVERBS_UDATA_DRIVER_DATA_NS 1
+#define UVERBS_UDATA_DRIVER_DATA_FLAG (1UL << UVERBS_ID_NS_SHIFT)
+
+enum uverbs_default_objects {
+ UVERBS_OBJECT_DEVICE, /* No instances of DEVICE are allowed */
+ UVERBS_OBJECT_PD,
+ UVERBS_OBJECT_COMP_CHANNEL,
+ UVERBS_OBJECT_CQ,
+ UVERBS_OBJECT_QP,
+ UVERBS_OBJECT_SRQ,
+ UVERBS_OBJECT_AH,
+ UVERBS_OBJECT_MR,
+ UVERBS_OBJECT_MW,
+ UVERBS_OBJECT_FLOW,
+ UVERBS_OBJECT_XRCD,
+ UVERBS_OBJECT_RWQ_IND_TBL,
+ UVERBS_OBJECT_WQ,
+ UVERBS_OBJECT_FLOW_ACTION,
+ UVERBS_OBJECT_DM,
+};
+
+enum {
+ UVERBS_ATTR_UHW_IN = UVERBS_UDATA_DRIVER_DATA_FLAG,
+ UVERBS_ATTR_UHW_OUT,
+};
+
+enum uverbs_attrs_create_cq_cmd_attr_ids {
+ UVERBS_ATTR_CREATE_CQ_HANDLE,
+ UVERBS_ATTR_CREATE_CQ_CQE,
+ UVERBS_ATTR_CREATE_CQ_USER_HANDLE,
+ UVERBS_ATTR_CREATE_CQ_COMP_CHANNEL,
+ UVERBS_ATTR_CREATE_CQ_COMP_VECTOR,
+ UVERBS_ATTR_CREATE_CQ_FLAGS,
+ UVERBS_ATTR_CREATE_CQ_RESP_CQE,
+};
+
+enum uverbs_attrs_destroy_cq_cmd_attr_ids {
+ UVERBS_ATTR_DESTROY_CQ_HANDLE,
+ UVERBS_ATTR_DESTROY_CQ_RESP,
+};
+
+enum uverbs_attrs_create_flow_action_esp {
+ UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE,
+ UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS,
+ UVERBS_ATTR_FLOW_ACTION_ESP_ESN,
+ UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT,
+ UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY,
+ UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP,
+};
+
+enum uverbs_attrs_destroy_flow_action_esp {
+ UVERBS_ATTR_DESTROY_FLOW_ACTION_HANDLE,
+};
+
+enum uverbs_methods_cq {
+ UVERBS_METHOD_CQ_CREATE,
+ UVERBS_METHOD_CQ_DESTROY,
+};
+
+enum uverbs_methods_actions_flow_action_ops {
+ UVERBS_METHOD_FLOW_ACTION_ESP_CREATE,
+ UVERBS_METHOD_FLOW_ACTION_DESTROY,
+ UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY,
+};
+
+enum uverbs_attrs_alloc_dm_cmd_attr_ids {
+ UVERBS_ATTR_ALLOC_DM_HANDLE,
+ UVERBS_ATTR_ALLOC_DM_LENGTH,
+ UVERBS_ATTR_ALLOC_DM_ALIGNMENT,
+};
+
+enum uverbs_attrs_free_dm_cmd_attr_ids {
+ UVERBS_ATTR_FREE_DM_HANDLE,
+};
+
+enum uverbs_methods_dm {
+ UVERBS_METHOD_DM_ALLOC,
+ UVERBS_METHOD_DM_FREE,
+};
+
+enum uverbs_attrs_reg_dm_mr_cmd_attr_ids {
+ UVERBS_ATTR_REG_DM_MR_HANDLE,
+ UVERBS_ATTR_REG_DM_MR_OFFSET,
+ UVERBS_ATTR_REG_DM_MR_LENGTH,
+ UVERBS_ATTR_REG_DM_MR_PD_HANDLE,
+ UVERBS_ATTR_REG_DM_MR_ACCESS_FLAGS,
+ UVERBS_ATTR_REG_DM_MR_DM_HANDLE,
+ UVERBS_ATTR_REG_DM_MR_RESP_LKEY,
+ UVERBS_ATTR_REG_DM_MR_RESP_RKEY,
+};
+
+enum uverbs_methods_mr {
+ UVERBS_METHOD_DM_MR_REG,
+};
+
+#endif
diff --git a/include/uapi/rdma/ib_user_ioctl_verbs.h b/include/uapi/rdma/ib_user_ioctl_verbs.h
index 842792e..04e46ea 100644
--- a/include/uapi/rdma/ib_user_ioctl_verbs.h
+++ b/include/uapi/rdma/ib_user_ioctl_verbs.h
@@ -1,5 +1,6 @@
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */
/*
- * Copyright (c) 2017, Mellanox Technologies inc. All rights reserved.
+ * Copyright (c) 2017-2018, Mellanox Technologies inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -33,52 +34,69 @@
#ifndef IB_USER_IOCTL_VERBS_H
#define IB_USER_IOCTL_VERBS_H
-#include <rdma/rdma_user_ioctl.h>
-
-#define UVERBS_UDATA_DRIVER_DATA_NS 1
-#define UVERBS_UDATA_DRIVER_DATA_FLAG (1UL << UVERBS_ID_NS_SHIFT)
-
-enum uverbs_default_objects {
- UVERBS_OBJECT_DEVICE, /* No instances of DEVICE are allowed */
- UVERBS_OBJECT_PD,
- UVERBS_OBJECT_COMP_CHANNEL,
- UVERBS_OBJECT_CQ,
- UVERBS_OBJECT_QP,
- UVERBS_OBJECT_SRQ,
- UVERBS_OBJECT_AH,
- UVERBS_OBJECT_MR,
- UVERBS_OBJECT_MW,
- UVERBS_OBJECT_FLOW,
- UVERBS_OBJECT_XRCD,
- UVERBS_OBJECT_RWQ_IND_TBL,
- UVERBS_OBJECT_WQ,
- UVERBS_OBJECT_LAST,
+#include <linux/types.h>
+
+#ifndef RDMA_UAPI_PTR
+#define RDMA_UAPI_PTR(_type, _name) __aligned_u64 _name
+#endif
+
+enum ib_uverbs_flow_action_esp_keymat {
+ IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM,
};
-enum {
- UVERBS_UHW_IN = UVERBS_UDATA_DRIVER_DATA_FLAG,
- UVERBS_UHW_OUT,
+enum ib_uverbs_flow_action_esp_keymat_aes_gcm_iv_algo {
+ IB_UVERBS_FLOW_ACTION_IV_ALGO_SEQ,
};
-enum uverbs_create_cq_cmd_attr_ids {
- CREATE_CQ_HANDLE,
- CREATE_CQ_CQE,
- CREATE_CQ_USER_HANDLE,
- CREATE_CQ_COMP_CHANNEL,
- CREATE_CQ_COMP_VECTOR,
- CREATE_CQ_FLAGS,
- CREATE_CQ_RESP_CQE,
+struct ib_uverbs_flow_action_esp_keymat_aes_gcm {
+ __aligned_u64 iv;
+ __u32 iv_algo; /* Use enum ib_uverbs_flow_action_esp_keymat_aes_gcm_iv_algo */
+
+ __u32 salt;
+ __u32 icv_len;
+
+ __u32 key_len;
+ __u32 aes_key[256 / 32];
};
-enum uverbs_destroy_cq_cmd_attr_ids {
- DESTROY_CQ_HANDLE,
- DESTROY_CQ_RESP,
+enum ib_uverbs_flow_action_esp_replay {
+ IB_UVERBS_FLOW_ACTION_ESP_REPLAY_NONE,
+ IB_UVERBS_FLOW_ACTION_ESP_REPLAY_BMP,
};
-enum uverbs_actions_cq_ops {
- UVERBS_CQ_CREATE,
- UVERBS_CQ_DESTROY,
+struct ib_uverbs_flow_action_esp_replay_bmp {
+ __u32 size;
};
-#endif
+enum ib_uverbs_flow_action_esp_flags {
+ IB_UVERBS_FLOW_ACTION_ESP_FLAGS_INLINE_CRYPTO = 0UL << 0, /* Default */
+ IB_UVERBS_FLOW_ACTION_ESP_FLAGS_FULL_OFFLOAD = 1UL << 0,
+
+ IB_UVERBS_FLOW_ACTION_ESP_FLAGS_TUNNEL = 0UL << 1, /* Default */
+ IB_UVERBS_FLOW_ACTION_ESP_FLAGS_TRANSPORT = 1UL << 1,
+
+ IB_UVERBS_FLOW_ACTION_ESP_FLAGS_DECRYPT = 0UL << 2, /* Default */
+ IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ENCRYPT = 1UL << 2,
+ IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW = 1UL << 3,
+};
+
+struct ib_uverbs_flow_action_esp_encap {
+ /* This struct represents a list of pointers to flow_xxxx_filter that
+ * encapsulates the payload in ESP tunnel mode.
+ */
+ RDMA_UAPI_PTR(void *, val_ptr); /* pointer to a flow_xxxx_filter */
+ RDMA_UAPI_PTR(struct ib_uverbs_flow_action_esp_encap *, next_ptr);
+ __u16 len; /* Len of the filter struct val_ptr points to */
+ __u16 type; /* Use flow_spec_type enum */
+};
+
+struct ib_uverbs_flow_action_esp {
+ __u32 spi;
+ __u32 seq;
+ __u32 tfc_pad;
+ __u32 flags;
+ __aligned_u64 hard_limit_pkts;
+};
+
+#endif
diff --git a/include/uapi/rdma/ib_user_mad.h b/include/uapi/rdma/ib_user_mad.h
index 330a3c5..ef92118 100644
--- a/include/uapi/rdma/ib_user_mad.h
+++ b/include/uapi/rdma/ib_user_mad.h
@@ -143,7 +143,7 @@ struct ib_user_mad_hdr {
*/
struct ib_user_mad {
struct ib_user_mad_hdr hdr;
- __u64 data[0];
+ __aligned_u64 data[0];
};
/*
@@ -225,7 +225,7 @@ struct ib_user_mad_reg_req2 {
__u8 mgmt_class_version;
__u16 res;
__u32 flags;
- __u64 method_mask[2];
+ __aligned_u64 method_mask[2];
__u32 oui;
__u8 rmpp_version;
__u8 reserved[3];
diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h
index 04d0e67..9be0739 100644
--- a/include/uapi/rdma/ib_user_verbs.h
+++ b/include/uapi/rdma/ib_user_verbs.h
@@ -117,13 +117,13 @@ enum {
*/
struct ib_uverbs_async_event_desc {
- __u64 element;
+ __aligned_u64 element;
__u32 event_type; /* enum ib_event_type */
__u32 reserved;
};
struct ib_uverbs_comp_event_desc {
- __u64 cq_handle;
+ __aligned_u64 cq_handle;
};
struct ib_uverbs_cq_moderation_caps {
@@ -141,10 +141,7 @@ struct ib_uverbs_cq_moderation_caps {
*/
#define IB_USER_VERBS_CMD_COMMAND_MASK 0xff
-#define IB_USER_VERBS_CMD_FLAGS_MASK 0xff000000u
-#define IB_USER_VERBS_CMD_FLAGS_SHIFT 24
-
-#define IB_USER_VERBS_CMD_FLAG_EXTENDED 0x80
+#define IB_USER_VERBS_CMD_FLAG_EXTENDED 0x80000000u
struct ib_uverbs_cmd_hdr {
__u32 command;
@@ -153,15 +150,15 @@ struct ib_uverbs_cmd_hdr {
};
struct ib_uverbs_ex_cmd_hdr {
- __u64 response;
+ __aligned_u64 response;
__u16 provider_in_words;
__u16 provider_out_words;
__u32 cmd_hdr_reserved;
};
struct ib_uverbs_get_context {
- __u64 response;
- __u64 driver_data[0];
+ __aligned_u64 response;
+ __aligned_u64 driver_data[0];
};
struct ib_uverbs_get_context_resp {
@@ -170,16 +167,16 @@ struct ib_uverbs_get_context_resp {
};
struct ib_uverbs_query_device {
- __u64 response;
- __u64 driver_data[0];
+ __aligned_u64 response;
+ __aligned_u64 driver_data[0];
};
struct ib_uverbs_query_device_resp {
- __u64 fw_ver;
+ __aligned_u64 fw_ver;
__be64 node_guid;
__be64 sys_image_guid;
- __u64 max_mr_size;
- __u64 page_size_cap;
+ __aligned_u64 max_mr_size;
+ __aligned_u64 page_size_cap;
__u32 vendor_id;
__u32 vendor_part_id;
__u32 hw_ver;
@@ -224,7 +221,7 @@ struct ib_uverbs_ex_query_device {
};
struct ib_uverbs_odp_caps {
- __u64 general_caps;
+ __aligned_u64 general_caps;
struct {
__u32 rc_odp_caps;
__u32 uc_odp_caps;
@@ -263,21 +260,22 @@ struct ib_uverbs_ex_query_device_resp {
__u32 comp_mask;
__u32 response_length;
struct ib_uverbs_odp_caps odp_caps;
- __u64 timestamp_mask;
- __u64 hca_core_clock; /* in KHZ */
- __u64 device_cap_flags_ex;
+ __aligned_u64 timestamp_mask;
+ __aligned_u64 hca_core_clock; /* in KHZ */
+ __aligned_u64 device_cap_flags_ex;
struct ib_uverbs_rss_caps rss_caps;
__u32 max_wq_type_rq;
__u32 raw_packet_caps;
struct ib_uverbs_tm_caps tm_caps;
struct ib_uverbs_cq_moderation_caps cq_moderation_caps;
+ __aligned_u64 max_dm_size;
};
struct ib_uverbs_query_port {
- __u64 response;
+ __aligned_u64 response;
__u8 port_num;
__u8 reserved[7];
- __u64 driver_data[0];
+ __aligned_u64 driver_data[0];
};
struct ib_uverbs_query_port_resp {
@@ -305,8 +303,8 @@ struct ib_uverbs_query_port_resp {
};
struct ib_uverbs_alloc_pd {
- __u64 response;
- __u64 driver_data[0];
+ __aligned_u64 response;
+ __aligned_u64 driver_data[0];
};
struct ib_uverbs_alloc_pd_resp {
@@ -318,10 +316,10 @@ struct ib_uverbs_dealloc_pd {
};
struct ib_uverbs_open_xrcd {
- __u64 response;
+ __aligned_u64 response;
__u32 fd;
__u32 oflags;
- __u64 driver_data[0];
+ __aligned_u64 driver_data[0];
};
struct ib_uverbs_open_xrcd_resp {
@@ -333,13 +331,13 @@ struct ib_uverbs_close_xrcd {
};
struct ib_uverbs_reg_mr {
- __u64 response;
- __u64 start;
- __u64 length;
- __u64 hca_va;
+ __aligned_u64 response;
+ __aligned_u64 start;
+ __aligned_u64 length;
+ __aligned_u64 hca_va;
__u32 pd_handle;
__u32 access_flags;
- __u64 driver_data[0];
+ __aligned_u64 driver_data[0];
};
struct ib_uverbs_reg_mr_resp {
@@ -349,12 +347,12 @@ struct ib_uverbs_reg_mr_resp {
};
struct ib_uverbs_rereg_mr {
- __u64 response;
+ __aligned_u64 response;
__u32 mr_handle;
__u32 flags;
- __u64 start;
- __u64 length;
- __u64 hca_va;
+ __aligned_u64 start;
+ __aligned_u64 length;
+ __aligned_u64 hca_va;
__u32 pd_handle;
__u32 access_flags;
};
@@ -369,7 +367,7 @@ struct ib_uverbs_dereg_mr {
};
struct ib_uverbs_alloc_mw {
- __u64 response;
+ __aligned_u64 response;
__u32 pd_handle;
__u8 mw_type;
__u8 reserved[3];
@@ -385,7 +383,7 @@ struct ib_uverbs_dealloc_mw {
};
struct ib_uverbs_create_comp_channel {
- __u64 response;
+ __aligned_u64 response;
};
struct ib_uverbs_create_comp_channel_resp {
@@ -393,13 +391,13 @@ struct ib_uverbs_create_comp_channel_resp {
};
struct ib_uverbs_create_cq {
- __u64 response;
- __u64 user_handle;
+ __aligned_u64 response;
+ __aligned_u64 user_handle;
__u32 cqe;
__u32 comp_vector;
__s32 comp_channel;
__u32 reserved;
- __u64 driver_data[0];
+ __aligned_u64 driver_data[0];
};
enum ib_uverbs_ex_create_cq_flags {
@@ -408,7 +406,7 @@ enum ib_uverbs_ex_create_cq_flags {
};
struct ib_uverbs_ex_create_cq {
- __u64 user_handle;
+ __aligned_u64 user_handle;
__u32 cqe;
__u32 comp_vector;
__s32 comp_channel;
@@ -429,26 +427,26 @@ struct ib_uverbs_ex_create_cq_resp {
};
struct ib_uverbs_resize_cq {
- __u64 response;
+ __aligned_u64 response;
__u32 cq_handle;
__u32 cqe;
- __u64 driver_data[0];
+ __aligned_u64 driver_data[0];
};
struct ib_uverbs_resize_cq_resp {
__u32 cqe;
__u32 reserved;
- __u64 driver_data[0];
+ __aligned_u64 driver_data[0];
};
struct ib_uverbs_poll_cq {
- __u64 response;
+ __aligned_u64 response;
__u32 cq_handle;
__u32 ne;
};
struct ib_uverbs_wc {
- __u64 wr_id;
+ __aligned_u64 wr_id;
__u32 status;
__u32 opcode;
__u32 vendor_err;
@@ -480,7 +478,7 @@ struct ib_uverbs_req_notify_cq {
};
struct ib_uverbs_destroy_cq {
- __u64 response;
+ __aligned_u64 response;
__u32 cq_handle;
__u32 reserved;
};
@@ -549,8 +547,8 @@ struct ib_uverbs_qp_attr {
};
struct ib_uverbs_create_qp {
- __u64 response;
- __u64 user_handle;
+ __aligned_u64 response;
+ __aligned_u64 user_handle;
__u32 pd_handle;
__u32 send_cq_handle;
__u32 recv_cq_handle;
@@ -564,7 +562,7 @@ struct ib_uverbs_create_qp {
__u8 qp_type;
__u8 is_srq;
__u8 reserved;
- __u64 driver_data[0];
+ __aligned_u64 driver_data[0];
};
enum ib_uverbs_create_qp_mask {
@@ -590,7 +588,7 @@ enum {
};
struct ib_uverbs_ex_create_qp {
- __u64 user_handle;
+ __aligned_u64 user_handle;
__u32 pd_handle;
__u32 send_cq_handle;
__u32 recv_cq_handle;
@@ -611,13 +609,13 @@ struct ib_uverbs_ex_create_qp {
};
struct ib_uverbs_open_qp {
- __u64 response;
- __u64 user_handle;
+ __aligned_u64 response;
+ __aligned_u64 user_handle;
__u32 pd_handle;
__u32 qpn;
__u8 qp_type;
__u8 reserved[7];
- __u64 driver_data[0];
+ __aligned_u64 driver_data[0];
};
/* also used for open response */
@@ -658,10 +656,10 @@ struct ib_uverbs_qp_dest {
};
struct ib_uverbs_query_qp {
- __u64 response;
+ __aligned_u64 response;
__u32 qp_handle;
__u32 attr_mask;
- __u64 driver_data[0];
+ __aligned_u64 driver_data[0];
};
struct ib_uverbs_query_qp_resp {
@@ -695,7 +693,7 @@ struct ib_uverbs_query_qp_resp {
__u8 alt_timeout;
__u8 sq_sig_all;
__u8 reserved[5];
- __u64 driver_data[0];
+ __aligned_u64 driver_data[0];
};
struct ib_uverbs_modify_qp {
@@ -725,7 +723,7 @@ struct ib_uverbs_modify_qp {
__u8 alt_port_num;
__u8 alt_timeout;
__u8 reserved[2];
- __u64 driver_data[0];
+ __aligned_u64 driver_data[0];
};
struct ib_uverbs_ex_modify_qp {
@@ -743,7 +741,7 @@ struct ib_uverbs_ex_modify_qp_resp {
};
struct ib_uverbs_destroy_qp {
- __u64 response;
+ __aligned_u64 response;
__u32 qp_handle;
__u32 reserved;
};
@@ -759,13 +757,13 @@ struct ib_uverbs_destroy_qp_resp {
* document the ABI.
*/
struct ib_uverbs_sge {
- __u64 addr;
+ __aligned_u64 addr;
__u32 length;
__u32 lkey;
};
struct ib_uverbs_send_wr {
- __u64 wr_id;
+ __aligned_u64 wr_id;
__u32 num_sge;
__u32 opcode;
__u32 send_flags;
@@ -775,14 +773,14 @@ struct ib_uverbs_send_wr {
} ex;
union {
struct {
- __u64 remote_addr;
+ __aligned_u64 remote_addr;
__u32 rkey;
__u32 reserved;
} rdma;
struct {
- __u64 remote_addr;
- __u64 compare_add;
- __u64 swap;
+ __aligned_u64 remote_addr;
+ __aligned_u64 compare_add;
+ __aligned_u64 swap;
__u32 rkey;
__u32 reserved;
} atomic;
@@ -796,7 +794,7 @@ struct ib_uverbs_send_wr {
};
struct ib_uverbs_post_send {
- __u64 response;
+ __aligned_u64 response;
__u32 qp_handle;
__u32 wr_count;
__u32 sge_count;
@@ -809,13 +807,13 @@ struct ib_uverbs_post_send_resp {
};
struct ib_uverbs_recv_wr {
- __u64 wr_id;
+ __aligned_u64 wr_id;
__u32 num_sge;
__u32 reserved;
};
struct ib_uverbs_post_recv {
- __u64 response;
+ __aligned_u64 response;
__u32 qp_handle;
__u32 wr_count;
__u32 sge_count;
@@ -828,7 +826,7 @@ struct ib_uverbs_post_recv_resp {
};
struct ib_uverbs_post_srq_recv {
- __u64 response;
+ __aligned_u64 response;
__u32 srq_handle;
__u32 wr_count;
__u32 sge_count;
@@ -841,8 +839,8 @@ struct ib_uverbs_post_srq_recv_resp {
};
struct ib_uverbs_create_ah {
- __u64 response;
- __u64 user_handle;
+ __aligned_u64 response;
+ __aligned_u64 user_handle;
__u32 pd_handle;
__u32 reserved;
struct ib_uverbs_ah_attr attr;
@@ -861,7 +859,7 @@ struct ib_uverbs_attach_mcast {
__u32 qp_handle;
__u16 mlid;
__u16 reserved;
- __u64 driver_data[0];
+ __aligned_u64 driver_data[0];
};
struct ib_uverbs_detach_mcast {
@@ -869,7 +867,7 @@ struct ib_uverbs_detach_mcast {
__u32 qp_handle;
__u16 mlid;
__u16 reserved;
- __u64 driver_data[0];
+ __aligned_u64 driver_data[0];
};
struct ib_uverbs_flow_spec_hdr {
@@ -877,7 +875,7 @@ struct ib_uverbs_flow_spec_hdr {
__u16 size;
__u16 reserved;
/* followed by flow_spec */
- __u64 flow_spec_data[0];
+ __aligned_u64 flow_spec_data[0];
};
struct ib_uverbs_flow_eth_filter {
@@ -987,6 +985,19 @@ struct ib_uverbs_flow_spec_action_drop {
};
};
+struct ib_uverbs_flow_spec_action_handle {
+ union {
+ struct ib_uverbs_flow_spec_hdr hdr;
+ struct {
+ __u32 type;
+ __u16 size;
+ __u16 reserved;
+ };
+ };
+ __u32 handle;
+ __u32 reserved1;
+};
+
struct ib_uverbs_flow_tunnel_filter {
__be32 tunnel_id;
};
@@ -1004,6 +1015,24 @@ struct ib_uverbs_flow_spec_tunnel {
struct ib_uverbs_flow_tunnel_filter mask;
};
+struct ib_uverbs_flow_spec_esp_filter {
+ __u32 spi;
+ __u32 seq;
+};
+
+struct ib_uverbs_flow_spec_esp {
+ union {
+ struct ib_uverbs_flow_spec_hdr hdr;
+ struct {
+ __u32 type;
+ __u16 size;
+ __u16 reserved;
+ };
+ };
+ struct ib_uverbs_flow_spec_esp_filter val;
+ struct ib_uverbs_flow_spec_esp_filter mask;
+};
+
struct ib_uverbs_flow_attr {
__u32 type;
__u16 size;
@@ -1036,18 +1065,18 @@ struct ib_uverbs_destroy_flow {
};
struct ib_uverbs_create_srq {
- __u64 response;
- __u64 user_handle;
+ __aligned_u64 response;
+ __aligned_u64 user_handle;
__u32 pd_handle;
__u32 max_wr;
__u32 max_sge;
__u32 srq_limit;
- __u64 driver_data[0];
+ __aligned_u64 driver_data[0];
};
struct ib_uverbs_create_xsrq {
- __u64 response;
- __u64 user_handle;
+ __aligned_u64 response;
+ __aligned_u64 user_handle;
__u32 srq_type;
__u32 pd_handle;
__u32 max_wr;
@@ -1056,7 +1085,7 @@ struct ib_uverbs_create_xsrq {
__u32 max_num_tags;
__u32 xrcd_handle;
__u32 cq_handle;
- __u64 driver_data[0];
+ __aligned_u64 driver_data[0];
};
struct ib_uverbs_create_srq_resp {
@@ -1071,14 +1100,14 @@ struct ib_uverbs_modify_srq {
__u32 attr_mask;
__u32 max_wr;
__u32 srq_limit;
- __u64 driver_data[0];
+ __aligned_u64 driver_data[0];
};
struct ib_uverbs_query_srq {
- __u64 response;
+ __aligned_u64 response;
__u32 srq_handle;
__u32 reserved;
- __u64 driver_data[0];
+ __aligned_u64 driver_data[0];
};
struct ib_uverbs_query_srq_resp {
@@ -1089,7 +1118,7 @@ struct ib_uverbs_query_srq_resp {
};
struct ib_uverbs_destroy_srq {
- __u64 response;
+ __aligned_u64 response;
__u32 srq_handle;
__u32 reserved;
};
@@ -1101,7 +1130,7 @@ struct ib_uverbs_destroy_srq_resp {
struct ib_uverbs_ex_create_wq {
__u32 comp_mask;
__u32 wq_type;
- __u64 user_handle;
+ __aligned_u64 user_handle;
__u32 pd_handle;
__u32 cq_handle;
__u32 max_wr;
diff --git a/include/uapi/rdma/mlx4-abi.h b/include/uapi/rdma/mlx4-abi.h
index 7f9c373..04f64bc 100644
--- a/include/uapi/rdma/mlx4-abi.h
+++ b/include/uapi/rdma/mlx4-abi.h
@@ -59,6 +59,10 @@ struct mlx4_ib_alloc_ucontext_resp_v3 {
__u16 bf_regs_per_page;
};
+enum {
+ MLX4_USER_DEV_CAP_LARGE_CQE = 1L << 0,
+};
+
struct mlx4_ib_alloc_ucontext_resp {
__u32 dev_caps;
__u32 qp_tab_size;
@@ -73,8 +77,8 @@ struct mlx4_ib_alloc_pd_resp {
};
struct mlx4_ib_create_cq {
- __u64 buf_addr;
- __u64 db_addr;
+ __aligned_u64 buf_addr;
+ __aligned_u64 db_addr;
};
struct mlx4_ib_create_cq_resp {
@@ -83,12 +87,12 @@ struct mlx4_ib_create_cq_resp {
};
struct mlx4_ib_resize_cq {
- __u64 buf_addr;
+ __aligned_u64 buf_addr;
};
struct mlx4_ib_create_srq {
- __u64 buf_addr;
- __u64 db_addr;
+ __aligned_u64 buf_addr;
+ __aligned_u64 db_addr;
};
struct mlx4_ib_create_srq_resp {
@@ -97,7 +101,7 @@ struct mlx4_ib_create_srq_resp {
};
struct mlx4_ib_create_qp_rss {
- __u64 rx_hash_fields_mask; /* Use enum mlx4_ib_rx_hash_fields */
+ __aligned_u64 rx_hash_fields_mask; /* Use enum mlx4_ib_rx_hash_fields */
__u8 rx_hash_function; /* Use enum mlx4_ib_rx_hash_function_flags */
__u8 reserved[7];
__u8 rx_hash_key[40];
@@ -106,8 +110,8 @@ struct mlx4_ib_create_qp_rss {
};
struct mlx4_ib_create_qp {
- __u64 buf_addr;
- __u64 db_addr;
+ __aligned_u64 buf_addr;
+ __aligned_u64 db_addr;
__u8 log_sq_bb_count;
__u8 log_sq_stride;
__u8 sq_no_prefetch;
@@ -116,8 +120,8 @@ struct mlx4_ib_create_qp {
};
struct mlx4_ib_create_wq {
- __u64 buf_addr;
- __u64 db_addr;
+ __aligned_u64 buf_addr;
+ __aligned_u64 db_addr;
__u8 log_range_size;
__u8 reserved[3];
__u32 comp_mask;
@@ -156,4 +160,32 @@ enum mlx4_ib_rx_hash_fields {
MLX4_IB_RX_HASH_INNER = 1ULL << 31,
};
+struct mlx4_ib_rss_caps {
+ __aligned_u64 rx_hash_fields_mask; /* enum mlx4_ib_rx_hash_fields */
+ __u8 rx_hash_function; /* enum mlx4_ib_rx_hash_function_flags */
+ __u8 reserved[7];
+};
+
+enum query_device_resp_mask {
+ MLX4_IB_QUERY_DEV_RESP_MASK_CORE_CLOCK_OFFSET = 1UL << 0,
+};
+
+struct mlx4_ib_tso_caps {
+ __u32 max_tso; /* Maximum tso payload size in bytes */
+ /* Corresponding bit will be set if qp type from
+ * 'enum ib_qp_type' is supported.
+ */
+ __u32 supported_qpts;
+};
+
+struct mlx4_uverbs_ex_query_device_resp {
+ __u32 comp_mask;
+ __u32 response_length;
+ __aligned_u64 hca_core_clock_offset;
+ __u32 max_inl_recv_sz;
+ __u32 reserved;
+ struct mlx4_ib_rss_caps rss_caps;
+ struct mlx4_ib_tso_caps tso_caps;
+};
+
#endif /* MLX4_ABI_USER_H */
diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h
index 1111aa4..cb4a02c 100644
--- a/include/uapi/rdma/mlx5-abi.h
+++ b/include/uapi/rdma/mlx5-abi.h
@@ -84,7 +84,7 @@ struct mlx5_ib_alloc_ucontext_req_v2 {
__u8 reserved0;
__u16 reserved1;
__u32 reserved2;
- __u64 lib_caps;
+ __aligned_u64 lib_caps;
};
enum mlx5_ib_alloc_ucontext_resp_mask {
@@ -107,6 +107,14 @@ enum mlx5_user_inline_mode {
MLX5_USER_INLINE_MODE_TCP_UDP,
};
+enum {
+ MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM = 1 << 0,
+ MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_REQ_METADATA = 1 << 1,
+ MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_SPI_STEERING = 1 << 2,
+ MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_FULL_OFFLOAD = 1 << 3,
+ MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_TX_IV_IS_ESN = 1 << 4,
+};
+
struct mlx5_ib_alloc_ucontext_resp {
__u32 qp_tab_size;
__u32 bf_reg_size;
@@ -118,14 +126,14 @@ struct mlx5_ib_alloc_ucontext_resp {
__u32 max_recv_wr;
__u32 max_srq_recv_wr;
__u16 num_ports;
- __u16 reserved1;
+ __u16 flow_action_flags;
__u32 comp_mask;
__u32 response_length;
__u8 cqe_version;
__u8 cmds_supp_uhw;
__u8 eth_min_inline;
__u8 clock_info_versions;
- __u64 hca_core_clock_offset;
+ __aligned_u64 hca_core_clock_offset;
__u32 log_uar_size;
__u32 num_uars_per_page;
__u32 num_dyn_bfregs;
@@ -147,7 +155,7 @@ struct mlx5_ib_tso_caps {
};
struct mlx5_ib_rss_caps {
- __u64 rx_hash_fields_mask; /* enum mlx5_rx_hash_fields */
+ __aligned_u64 rx_hash_fields_mask; /* enum mlx5_rx_hash_fields */
__u8 rx_hash_function; /* enum mlx5_rx_hash_function_flags */
__u8 reserved[7];
};
@@ -163,6 +171,10 @@ struct mlx5_ib_cqe_comp_caps {
__u32 supported_format; /* enum mlx5_ib_cqe_comp_res_format */
};
+enum mlx5_ib_packet_pacing_cap_flags {
+ MLX5_IB_PP_SUPPORT_BURST = 1 << 0,
+};
+
struct mlx5_packet_pacing_caps {
__u32 qp_rate_limit_min;
__u32 qp_rate_limit_max; /* In kpbs */
@@ -172,7 +184,8 @@ struct mlx5_packet_pacing_caps {
* supported_qpts |= 1 << IB_QPT_RAW_PACKET
*/
__u32 supported_qpts;
- __u32 reserved;
+ __u8 cap_flags; /* enum mlx5_ib_packet_pacing_cap_flags */
+ __u8 reserved[3];
};
enum mlx5_ib_mpw_caps {
@@ -243,8 +256,8 @@ enum mlx5_ib_create_cq_flags {
};
struct mlx5_ib_create_cq {
- __u64 buf_addr;
- __u64 db_addr;
+ __aligned_u64 buf_addr;
+ __aligned_u64 db_addr;
__u32 cqe_size;
__u8 cqe_comp_en;
__u8 cqe_comp_res_format;
@@ -257,15 +270,15 @@ struct mlx5_ib_create_cq_resp {
};
struct mlx5_ib_resize_cq {
- __u64 buf_addr;
+ __aligned_u64 buf_addr;
__u16 cqe_size;
__u16 reserved0;
__u32 reserved1;
};
struct mlx5_ib_create_srq {
- __u64 buf_addr;
- __u64 db_addr;
+ __aligned_u64 buf_addr;
+ __aligned_u64 db_addr;
__u32 flags;
__u32 reserved0; /* explicit padding (optional on i386) */
__u32 uidx;
@@ -278,8 +291,8 @@ struct mlx5_ib_create_srq_resp {
};
struct mlx5_ib_create_qp {
- __u64 buf_addr;
- __u64 db_addr;
+ __aligned_u64 buf_addr;
+ __aligned_u64 db_addr;
__u32 sq_wqe_count;
__u32 rq_wqe_count;
__u32 rq_wqe_shift;
@@ -287,8 +300,8 @@ struct mlx5_ib_create_qp {
__u32 uidx;
__u32 bfreg_index;
union {
- __u64 sq_buf_addr;
- __u64 access_key;
+ __aligned_u64 sq_buf_addr;
+ __aligned_u64 access_key;
};
};
@@ -314,12 +327,13 @@ enum mlx5_rx_hash_fields {
MLX5_RX_HASH_DST_PORT_TCP = 1 << 5,
MLX5_RX_HASH_SRC_PORT_UDP = 1 << 6,
MLX5_RX_HASH_DST_PORT_UDP = 1 << 7,
+ MLX5_RX_HASH_IPSEC_SPI = 1 << 8,
/* Save bits for future fields */
MLX5_RX_HASH_INNER = (1UL << 31),
};
struct mlx5_ib_create_qp_rss {
- __u64 rx_hash_fields_mask; /* enum mlx5_rx_hash_fields */
+ __aligned_u64 rx_hash_fields_mask; /* enum mlx5_rx_hash_fields */
__u8 rx_hash_function; /* enum mlx5_rx_hash_function_flags */
__u8 rx_key_len; /* valid only for Toeplitz */
__u8 reserved[6];
@@ -330,6 +344,7 @@ struct mlx5_ib_create_qp_rss {
struct mlx5_ib_create_qp_resp {
__u32 bfreg_index;
+ __u32 reserved;
};
struct mlx5_ib_alloc_mw {
@@ -344,8 +359,8 @@ enum mlx5_ib_create_wq_mask {
};
struct mlx5_ib_create_wq {
- __u64 buf_addr;
- __u64 db_addr;
+ __aligned_u64 buf_addr;
+ __aligned_u64 db_addr;
__u32 rq_wqe_count;
__u32 rq_wqe_shift;
__u32 user_index;
@@ -362,6 +377,18 @@ struct mlx5_ib_create_ah_resp {
__u8 reserved[6];
};
+struct mlx5_ib_burst_info {
+ __u32 max_burst_sz;
+ __u16 typical_pkt_sz;
+ __u16 reserved;
+};
+
+struct mlx5_ib_modify_qp {
+ __u32 comp_mask;
+ struct mlx5_ib_burst_info burst_info;
+ __u32 reserved;
+};
+
struct mlx5_ib_modify_qp_resp {
__u32 response_length;
__u32 dctn;
@@ -385,13 +412,13 @@ struct mlx5_ib_modify_wq {
struct mlx5_ib_clock_info {
__u32 sign;
__u32 resv;
- __u64 nsec;
- __u64 cycles;
- __u64 frac;
+ __aligned_u64 nsec;
+ __aligned_u64 cycles;
+ __aligned_u64 frac;
__u32 mult;
__u32 shift;
- __u64 mask;
- __u64 overflow_period;
+ __aligned_u64 mask;
+ __aligned_u64 overflow_period;
};
enum mlx5_ib_mmap_cmd {
@@ -403,6 +430,7 @@ enum mlx5_ib_mmap_cmd {
MLX5_IB_MMAP_CORE_CLOCK = 5,
MLX5_IB_MMAP_ALLOC_WC = 6,
MLX5_IB_MMAP_CLOCK_INFO = 7,
+ MLX5_IB_MMAP_DEVICE_MEM = 8,
};
enum {
diff --git a/include/uapi/rdma/mlx5_user_ioctl_cmds.h b/include/uapi/rdma/mlx5_user_ioctl_cmds.h
new file mode 100644
index 0000000..f7d685e
--- /dev/null
+++ b/include/uapi/rdma/mlx5_user_ioctl_cmds.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef MLX5_USER_IOCTL_CMDS_H
+#define MLX5_USER_IOCTL_CMDS_H
+
+#include <rdma/ib_user_ioctl_cmds.h>
+
+enum mlx5_ib_create_flow_action_attrs {
+ /* This attribute belong to the driver namespace */
+ MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS = (1U << UVERBS_ID_NS_SHIFT),
+};
+
+enum mlx5_ib_alloc_dm_attrs {
+ MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET = (1U << UVERBS_ID_NS_SHIFT),
+ MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX,
+};
+
+#endif
diff --git a/include/uapi/rdma/mlx5_user_ioctl_verbs.h b/include/uapi/rdma/mlx5_user_ioctl_verbs.h
new file mode 100644
index 0000000..8a2fb33
--- /dev/null
+++ b/include/uapi/rdma/mlx5_user_ioctl_verbs.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef MLX5_USER_IOCTL_VERBS_H
+#define MLX5_USER_IOCTL_VERBS_H
+
+#include <linux/types.h>
+
+enum mlx5_ib_uapi_flow_action_flags {
+ MLX5_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA = 1 << 0,
+};
+
+#endif
+
diff --git a/include/uapi/rdma/mthca-abi.h b/include/uapi/rdma/mthca-abi.h
index 3020d8a..ac756cd 100644
--- a/include/uapi/rdma/mthca-abi.h
+++ b/include/uapi/rdma/mthca-abi.h
@@ -74,8 +74,8 @@ struct mthca_reg_mr {
struct mthca_create_cq {
__u32 lkey;
__u32 pdn;
- __u64 arm_db_page;
- __u64 set_db_page;
+ __aligned_u64 arm_db_page;
+ __aligned_u64 set_db_page;
__u32 arm_db_index;
__u32 set_db_index;
};
@@ -93,7 +93,7 @@ struct mthca_resize_cq {
struct mthca_create_srq {
__u32 lkey;
__u32 db_index;
- __u64 db_page;
+ __aligned_u64 db_page;
};
struct mthca_create_srq_resp {
@@ -104,8 +104,8 @@ struct mthca_create_srq_resp {
struct mthca_create_qp {
__u32 lkey;
__u32 reserved;
- __u64 sq_db_page;
- __u64 rq_db_page;
+ __aligned_u64 sq_db_page;
+ __aligned_u64 rq_db_page;
__u32 sq_db_index;
__u32 rq_db_index;
};
diff --git a/include/uapi/rdma/nes-abi.h b/include/uapi/rdma/nes-abi.h
index f5b2437..35bfd40 100644
--- a/include/uapi/rdma/nes-abi.h
+++ b/include/uapi/rdma/nes-abi.h
@@ -72,14 +72,14 @@ struct nes_alloc_pd_resp {
};
struct nes_create_cq_req {
- __u64 user_cq_buffer;
+ __aligned_u64 user_cq_buffer;
__u32 mcrqf;
__u8 reserved[4];
};
struct nes_create_qp_req {
- __u64 user_wqe_buffers;
- __u64 user_qp_buffer;
+ __aligned_u64 user_wqe_buffers;
+ __aligned_u64 user_qp_buffer;
};
enum iwnes_memreg_type {
diff --git a/include/uapi/rdma/ocrdma-abi.h b/include/uapi/rdma/ocrdma-abi.h
index ad64a3c..284d47b 100644
--- a/include/uapi/rdma/ocrdma-abi.h
+++ b/include/uapi/rdma/ocrdma-abi.h
@@ -55,17 +55,17 @@ struct ocrdma_alloc_ucontext_resp {
__u32 wqe_size;
__u32 max_inline_data;
__u32 dpp_wqe_size;
- __u64 ah_tbl_page;
+ __aligned_u64 ah_tbl_page;
__u32 ah_tbl_len;
__u32 rqe_size;
__u8 fw_ver[32];
/* for future use/new features in progress */
- __u64 rsvd1;
- __u64 rsvd2;
+ __aligned_u64 rsvd1;
+ __aligned_u64 rsvd2;
};
struct ocrdma_alloc_pd_ureq {
- __u64 rsvd1;
+ __u32 rsvd[2];
};
struct ocrdma_alloc_pd_uresp {
@@ -73,7 +73,7 @@ struct ocrdma_alloc_pd_uresp {
__u32 dpp_enabled;
__u32 dpp_page_addr_hi;
__u32 dpp_page_addr_lo;
- __u64 rsvd1;
+ __u32 rsvd[2];
};
struct ocrdma_create_cq_ureq {
@@ -87,13 +87,13 @@ struct ocrdma_create_cq_uresp {
__u32 page_size;
__u32 num_pages;
__u32 max_hw_cqe;
- __u64 page_addr[MAX_CQ_PAGES];
- __u64 db_page_addr;
+ __aligned_u64 page_addr[MAX_CQ_PAGES];
+ __aligned_u64 db_page_addr;
__u32 db_page_size;
__u32 phase_change;
/* for future use/new features in progress */
- __u64 rsvd1;
- __u64 rsvd2;
+ __aligned_u64 rsvd1;
+ __aligned_u64 rsvd2;
};
#define MAX_QP_PAGES 8
@@ -115,9 +115,9 @@ struct ocrdma_create_qp_uresp {
__u32 rq_page_size;
__u32 num_sq_pages;
__u32 num_rq_pages;
- __u64 sq_page_addr[MAX_QP_PAGES];
- __u64 rq_page_addr[MAX_QP_PAGES];
- __u64 db_page_addr;
+ __aligned_u64 sq_page_addr[MAX_QP_PAGES];
+ __aligned_u64 rq_page_addr[MAX_QP_PAGES];
+ __aligned_u64 db_page_addr;
__u32 db_page_size;
__u32 dpp_credit;
__u32 dpp_offset;
@@ -126,8 +126,8 @@ struct ocrdma_create_qp_uresp {
__u32 db_sq_offset;
__u32 db_rq_offset;
__u32 db_shift;
- __u64 rsvd[11];
-} __packed;
+ __aligned_u64 rsvd[11];
+};
struct ocrdma_create_srq_uresp {
__u16 rq_dbid;
@@ -137,16 +137,16 @@ struct ocrdma_create_srq_uresp {
__u32 rq_page_size;
__u32 num_rq_pages;
- __u64 rq_page_addr[MAX_QP_PAGES];
- __u64 db_page_addr;
+ __aligned_u64 rq_page_addr[MAX_QP_PAGES];
+ __aligned_u64 db_page_addr;
__u32 db_page_size;
__u32 num_rqe_allocated;
__u32 db_rq_offset;
__u32 db_shift;
- __u64 rsvd2;
- __u64 rsvd3;
+ __aligned_u64 rsvd2;
+ __aligned_u64 rsvd3;
};
#endif /* OCRDMA_ABI_USER_H */
diff --git a/include/uapi/rdma/qedr-abi.h b/include/uapi/rdma/qedr-abi.h
index 261c6db..8ba0989 100644
--- a/include/uapi/rdma/qedr-abi.h
+++ b/include/uapi/rdma/qedr-abi.h
@@ -40,7 +40,7 @@
/* user kernel communication data structures. */
struct qedr_alloc_ucontext_resp {
- __u64 db_pa;
+ __aligned_u64 db_pa;
__u32 db_size;
__u32 max_send_wr;
@@ -53,24 +53,27 @@ struct qedr_alloc_ucontext_resp {
__u8 dpm_enabled;
__u8 wids_enabled;
__u16 wid_count;
+ __u32 reserved;
};
struct qedr_alloc_pd_ureq {
- __u64 rsvd1;
+ __aligned_u64 rsvd1;
};
struct qedr_alloc_pd_uresp {
__u32 pd_id;
+ __u32 reserved;
};
struct qedr_create_cq_ureq {
- __u64 addr;
- __u64 len;
+ __aligned_u64 addr;
+ __aligned_u64 len;
};
struct qedr_create_cq_uresp {
__u32 db_offset;
__u16 icid;
+ __u16 reserved;
};
struct qedr_create_qp_ureq {
@@ -79,17 +82,17 @@ struct qedr_create_qp_ureq {
/* SQ */
/* user space virtual address of SQ buffer */
- __u64 sq_addr;
+ __aligned_u64 sq_addr;
/* length of SQ buffer */
- __u64 sq_len;
+ __aligned_u64 sq_len;
/* RQ */
/* user space virtual address of RQ buffer */
- __u64 rq_addr;
+ __aligned_u64 rq_addr;
/* length of RQ buffer */
- __u64 rq_len;
+ __aligned_u64 rq_len;
};
struct qedr_create_qp_uresp {
@@ -105,6 +108,7 @@ struct qedr_create_qp_uresp {
__u16 rq_icid;
__u32 rq_db2_offset;
+ __u32 reserved;
};
#endif /* __QEDR_USER_H__ */
diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h
index 4c77e2a..0ce0943 100644
--- a/include/uapi/rdma/rdma_netlink.h
+++ b/include/uapi/rdma/rdma_netlink.h
@@ -238,6 +238,14 @@ enum rdma_nldev_command {
RDMA_NLDEV_CMD_RES_QP_GET, /* can dump */
+ RDMA_NLDEV_CMD_RES_CM_ID_GET, /* can dump */
+
+ RDMA_NLDEV_CMD_RES_CQ_GET, /* can dump */
+
+ RDMA_NLDEV_CMD_RES_MR_GET, /* can dump */
+
+ RDMA_NLDEV_CMD_RES_PD_GET, /* can dump */
+
RDMA_NLDEV_NUM_OPS
};
@@ -350,6 +358,49 @@ enum rdma_nldev_attr {
*/
RDMA_NLDEV_ATTR_RES_KERN_NAME, /* string */
+ RDMA_NLDEV_ATTR_RES_CM_ID, /* nested table */
+ RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY, /* nested table */
+ /*
+ * rdma_cm_id port space.
+ */
+ RDMA_NLDEV_ATTR_RES_PS, /* u32 */
+ /*
+ * Source and destination socket addresses
+ */
+ RDMA_NLDEV_ATTR_RES_SRC_ADDR, /* __kernel_sockaddr_storage */
+ RDMA_NLDEV_ATTR_RES_DST_ADDR, /* __kernel_sockaddr_storage */
+
+ RDMA_NLDEV_ATTR_RES_CQ, /* nested table */
+ RDMA_NLDEV_ATTR_RES_CQ_ENTRY, /* nested table */
+ RDMA_NLDEV_ATTR_RES_CQE, /* u32 */
+ RDMA_NLDEV_ATTR_RES_USECNT, /* u64 */
+ RDMA_NLDEV_ATTR_RES_POLL_CTX, /* u8 */
+
+ RDMA_NLDEV_ATTR_RES_MR, /* nested table */
+ RDMA_NLDEV_ATTR_RES_MR_ENTRY, /* nested table */
+ RDMA_NLDEV_ATTR_RES_RKEY, /* u32 */
+ RDMA_NLDEV_ATTR_RES_LKEY, /* u32 */
+ RDMA_NLDEV_ATTR_RES_IOVA, /* u64 */
+ RDMA_NLDEV_ATTR_RES_MRLEN, /* u64 */
+
+ RDMA_NLDEV_ATTR_RES_PD, /* nested table */
+ RDMA_NLDEV_ATTR_RES_PD_ENTRY, /* nested table */
+ RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY, /* u32 */
+ RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY, /* u32 */
+
+ /*
+ * Provides logical name and index of netdevice which is
+ * connected to physical port. This information is relevant
+ * for RoCE and iWARP.
+ *
+ * The netdevices which are associated with containers are
+ * supposed to be exported together with GID table once it
+ * will be exposed through the netlink. Because the
+ * associated netdevices are properties of GIDs.
+ */
+ RDMA_NLDEV_ATTR_NDEV_INDEX, /* u32 */
+ RDMA_NLDEV_ATTR_NDEV_NAME, /* string */
+
RDMA_NLDEV_ATTR_MAX
};
#endif /* _UAPI_RDMA_NETLINK_H */
diff --git a/include/uapi/rdma/rdma_user_cm.h b/include/uapi/rdma/rdma_user_cm.h
index c83ef00..e126902 100644
--- a/include/uapi/rdma/rdma_user_cm.h
+++ b/include/uapi/rdma/rdma_user_cm.h
@@ -70,6 +70,14 @@ enum {
RDMA_USER_CM_CMD_JOIN_MCAST
};
+/* See IBTA Annex A11, servies ID bytes 4 & 5 */
+enum rdma_ucm_port_space {
+ RDMA_PS_IPOIB = 0x0002,
+ RDMA_PS_IB = 0x013F,
+ RDMA_PS_TCP = 0x0106,
+ RDMA_PS_UDP = 0x0111,
+};
+
/*
* command ABI structures.
*/
@@ -80,9 +88,9 @@ struct rdma_ucm_cmd_hdr {
};
struct rdma_ucm_create_id {
- __u64 uid;
- __u64 response;
- __u16 ps;
+ __aligned_u64 uid;
+ __aligned_u64 response;
+ __u16 ps; /* use enum rdma_ucm_port_space */
__u8 qp_type;
__u8 reserved[5];
};
@@ -92,7 +100,7 @@ struct rdma_ucm_create_id_resp {
};
struct rdma_ucm_destroy_id {
- __u64 response;
+ __aligned_u64 response;
__u32 id;
__u32 reserved;
};
@@ -102,7 +110,7 @@ struct rdma_ucm_destroy_id_resp {
};
struct rdma_ucm_bind_ip {
- __u64 response;
+ __aligned_u64 response;
struct sockaddr_in6 addr;
__u32 id;
};
@@ -143,13 +151,13 @@ enum {
};
struct rdma_ucm_query {
- __u64 response;
+ __aligned_u64 response;
__u32 id;
__u32 option;
};
struct rdma_ucm_query_route_resp {
- __u64 node_guid;
+ __aligned_u64 node_guid;
struct ib_user_path_rec ib_route[2];
struct sockaddr_in6 src_addr;
struct sockaddr_in6 dst_addr;
@@ -159,7 +167,7 @@ struct rdma_ucm_query_route_resp {
};
struct rdma_ucm_query_addr_resp {
- __u64 node_guid;
+ __aligned_u64 node_guid;
__u8 port_num;
__u8 reserved;
__u16 pkey;
@@ -210,7 +218,7 @@ struct rdma_ucm_listen {
};
struct rdma_ucm_accept {
- __u64 uid;
+ __aligned_u64 uid;
struct rdma_ucm_conn_param conn_param;
__u32 id;
__u32 reserved;
@@ -228,7 +236,7 @@ struct rdma_ucm_disconnect {
};
struct rdma_ucm_init_qp_attr {
- __u64 response;
+ __aligned_u64 response;
__u32 id;
__u32 qp_state;
};
@@ -239,8 +247,8 @@ struct rdma_ucm_notify {
};
struct rdma_ucm_join_ip_mcast {
- __u64 response; /* rdma_ucm_create_id_resp */
- __u64 uid;
+ __aligned_u64 response; /* rdma_ucm_create_id_resp */
+ __aligned_u64 uid;
struct sockaddr_in6 addr;
__u32 id;
};
@@ -253,8 +261,8 @@ enum {
};
struct rdma_ucm_join_mcast {
- __u64 response; /* rdma_ucma_create_id_resp */
- __u64 uid;
+ __aligned_u64 response; /* rdma_ucma_create_id_resp */
+ __aligned_u64 uid;
__u32 id;
__u16 addr_size;
__u16 join_flags;
@@ -262,18 +270,23 @@ struct rdma_ucm_join_mcast {
};
struct rdma_ucm_get_event {
- __u64 response;
+ __aligned_u64 response;
};
struct rdma_ucm_event_resp {
- __u64 uid;
+ __aligned_u64 uid;
__u32 id;
__u32 event;
__u32 status;
+ /*
+ * NOTE: This union is not aligned to 8 bytes so none of the union
+ * members may contain a u64 or anything with higher alignment than 4.
+ */
union {
struct rdma_ucm_conn_param conn;
struct rdma_ucm_ud_param ud;
} param;
+ __u32 reserved;
};
/* Option levels */
@@ -291,7 +304,7 @@ enum {
};
struct rdma_ucm_set_option {
- __u64 optval;
+ __aligned_u64 optval;
__u32 id;
__u32 level;
__u32 optname;
@@ -299,7 +312,7 @@ struct rdma_ucm_set_option {
};
struct rdma_ucm_migrate_id {
- __u64 response;
+ __aligned_u64 response;
__u32 id;
__u32 fd;
};
diff --git a/include/uapi/rdma/rdma_user_ioctl.h b/include/uapi/rdma/rdma_user_ioctl.h
index 46de088..d223f41 100644
--- a/include/uapi/rdma/rdma_user_ioctl.h
+++ b/include/uapi/rdma/rdma_user_ioctl.h
@@ -34,49 +34,13 @@
#ifndef RDMA_USER_IOCTL_H
#define RDMA_USER_IOCTL_H
-#include <linux/types.h>
-#include <linux/ioctl.h>
#include <rdma/ib_user_mad.h>
#include <rdma/hfi/hfi1_ioctl.h>
+#include <rdma/rdma_user_ioctl_cmds.h>
-/* Documentation/ioctl/ioctl-number.txt */
-#define RDMA_IOCTL_MAGIC 0x1b
/* Legacy name, for user space application which already use it */
#define IB_IOCTL_MAGIC RDMA_IOCTL_MAGIC
-#define RDMA_VERBS_IOCTL \
- _IOWR(RDMA_IOCTL_MAGIC, 1, struct ib_uverbs_ioctl_hdr)
-
-#define UVERBS_ID_NS_MASK 0xF000
-#define UVERBS_ID_NS_SHIFT 12
-
-enum {
- /* User input */
- UVERBS_ATTR_F_MANDATORY = 1U << 0,
- /*
- * Valid output bit should be ignored and considered set in
- * mandatory fields. This bit is kernel output.
- */
- UVERBS_ATTR_F_VALID_OUTPUT = 1U << 1,
-};
-
-struct ib_uverbs_attr {
- __u16 attr_id; /* command specific type attribute */
- __u16 len; /* only for pointers */
- __u16 flags; /* combination of UVERBS_ATTR_F_XXXX */
- __u16 reserved;
- __aligned_u64 data; /* ptr to command, inline data or idr/fd */
-};
-
-struct ib_uverbs_ioctl_hdr {
- __u16 length;
- __u16 object_id;
- __u16 method_id;
- __u16 num_attrs;
- __aligned_u64 reserved;
- struct ib_uverbs_attr attrs[0];
-};
-
/*
* General blocks assignments
* It is closed on purpose do not expose it it user space
diff --git a/include/uapi/rdma/rdma_user_ioctl_cmds.h b/include/uapi/rdma/rdma_user_ioctl_cmds.h
new file mode 100644
index 0000000..1da5a1e
--- /dev/null
+++ b/include/uapi/rdma/rdma_user_ioctl_cmds.h
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef RDMA_USER_IOCTL_CMDS_H
+#define RDMA_USER_IOCTL_CMDS_H
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+/* Documentation/ioctl/ioctl-number.txt */
+#define RDMA_IOCTL_MAGIC 0x1b
+#define RDMA_VERBS_IOCTL \
+ _IOWR(RDMA_IOCTL_MAGIC, 1, struct ib_uverbs_ioctl_hdr)
+
+enum {
+ /* User input */
+ UVERBS_ATTR_F_MANDATORY = 1U << 0,
+ /*
+ * Valid output bit should be ignored and considered set in
+ * mandatory fields. This bit is kernel output.
+ */
+ UVERBS_ATTR_F_VALID_OUTPUT = 1U << 1,
+};
+
+struct ib_uverbs_attr {
+ __u16 attr_id; /* command specific type attribute */
+ __u16 len; /* only for pointers */
+ __u16 flags; /* combination of UVERBS_ATTR_F_XXXX */
+ union {
+ struct {
+ __u8 elem_id;
+ __u8 reserved;
+ } enum_data;
+ __u16 reserved;
+ } attr_data;
+ __aligned_u64 data; /* ptr to command, inline data or idr/fd */
+};
+
+struct ib_uverbs_ioctl_hdr {
+ __u16 length;
+ __u16 object_id;
+ __u16 method_id;
+ __u16 num_attrs;
+ __aligned_u64 reserved1;
+ __u32 driver_id;
+ __u32 reserved2;
+ struct ib_uverbs_attr attrs[0];
+};
+
+enum rdma_driver_id {
+ RDMA_DRIVER_UNKNOWN,
+ RDMA_DRIVER_MLX5,
+ RDMA_DRIVER_MLX4,
+ RDMA_DRIVER_CXGB3,
+ RDMA_DRIVER_CXGB4,
+ RDMA_DRIVER_MTHCA,
+ RDMA_DRIVER_BNXT_RE,
+ RDMA_DRIVER_OCRDMA,
+ RDMA_DRIVER_NES,
+ RDMA_DRIVER_I40IW,
+ RDMA_DRIVER_VMW_PVRDMA,
+ RDMA_DRIVER_QEDR,
+ RDMA_DRIVER_HNS,
+ RDMA_DRIVER_USNIC,
+ RDMA_DRIVER_RXE,
+ RDMA_DRIVER_HFI1,
+ RDMA_DRIVER_QIB,
+};
+
+#endif
diff --git a/include/uapi/rdma/rdma_user_rxe.h b/include/uapi/rdma/rdma_user_rxe.h
index bdeea94..1f8a9e7 100644
--- a/include/uapi/rdma/rdma_user_rxe.h
+++ b/include/uapi/rdma/rdma_user_rxe.h
@@ -35,6 +35,9 @@
#define RDMA_USER_RXE_H
#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/in6.h>
union rxe_gid {
__u8 raw[16];
@@ -55,16 +58,17 @@ struct rxe_global_route {
struct rxe_av {
__u8 port_num;
__u8 network_type;
+ __u16 reserved1;
+ __u32 reserved2;
struct rxe_global_route grh;
union {
- struct sockaddr _sockaddr;
struct sockaddr_in _sockaddr_in;
struct sockaddr_in6 _sockaddr_in6;
} sgid_addr, dgid_addr;
};
struct rxe_send_wr {
- __u64 wr_id;
+ __aligned_u64 wr_id;
__u32 num_sge;
__u32 opcode;
__u32 send_flags;
@@ -74,36 +78,42 @@ struct rxe_send_wr {
} ex;
union {
struct {
- __u64 remote_addr;
+ __aligned_u64 remote_addr;
__u32 rkey;
+ __u32 reserved;
} rdma;
struct {
- __u64 remote_addr;
- __u64 compare_add;
- __u64 swap;
+ __aligned_u64 remote_addr;
+ __aligned_u64 compare_add;
+ __aligned_u64 swap;
__u32 rkey;
+ __u32 reserved;
} atomic;
struct {
__u32 remote_qpn;
__u32 remote_qkey;
__u16 pkey_index;
} ud;
+ /* reg is only used by the kernel and is not part of the uapi */
struct {
- struct ib_mr *mr;
+ union {
+ struct ib_mr *mr;
+ __aligned_u64 reserved;
+ };
__u32 key;
- int access;
+ __u32 access;
} reg;
} wr;
};
struct rxe_sge {
- __u64 addr;
+ __aligned_u64 addr;
__u32 length;
__u32 lkey;
};
struct mminfo {
- __u64 offset;
+ __aligned_u64 offset;
__u32 size;
__u32 pad;
};
@@ -114,6 +124,7 @@ struct rxe_dma_info {
__u32 cur_sge;
__u32 num_sge;
__u32 sge_offset;
+ __u32 reserved;
union {
__u8 inline_data[0];
struct rxe_sge sge[0];
@@ -125,7 +136,7 @@ struct rxe_send_wqe {
struct rxe_av av;
__u32 status;
__u32 state;
- __u64 iova;
+ __aligned_u64 iova;
__u32 mask;
__u32 first_psn;
__u32 last_psn;
@@ -136,10 +147,33 @@ struct rxe_send_wqe {
};
struct rxe_recv_wqe {
- __u64 wr_id;
+ __aligned_u64 wr_id;
__u32 num_sge;
__u32 padding;
struct rxe_dma_info dma;
};
+struct rxe_create_cq_resp {
+ struct mminfo mi;
+};
+
+struct rxe_resize_cq_resp {
+ struct mminfo mi;
+};
+
+struct rxe_create_qp_resp {
+ struct mminfo rq_mi;
+ struct mminfo sq_mi;
+};
+
+struct rxe_create_srq_resp {
+ struct mminfo mi;
+ __u32 srq_num;
+ __u32 reserved;
+};
+
+struct rxe_modify_srq_cmd {
+ __aligned_u64 mmap_info_addr;
+};
+
#endif /* RDMA_USER_RXE_H */
diff --git a/include/uapi/rdma/vmw_pvrdma-abi.h b/include/uapi/rdma/vmw_pvrdma-abi.h
index 02ca0d0..d13fd49 100644
--- a/include/uapi/rdma/vmw_pvrdma-abi.h
+++ b/include/uapi/rdma/vmw_pvrdma-abi.h
@@ -143,7 +143,7 @@ struct pvrdma_alloc_pd_resp {
};
struct pvrdma_create_cq {
- __u64 buf_addr;
+ __aligned_u64 buf_addr;
__u32 buf_size;
__u32 reserved;
};
@@ -154,13 +154,13 @@ struct pvrdma_create_cq_resp {
};
struct pvrdma_resize_cq {
- __u64 buf_addr;
+ __aligned_u64 buf_addr;
__u32 buf_size;
__u32 reserved;
};
struct pvrdma_create_srq {
- __u64 buf_addr;
+ __aligned_u64 buf_addr;
__u32 buf_size;
__u32 reserved;
};
@@ -171,25 +171,25 @@ struct pvrdma_create_srq_resp {
};
struct pvrdma_create_qp {
- __u64 rbuf_addr;
- __u64 sbuf_addr;
+ __aligned_u64 rbuf_addr;
+ __aligned_u64 sbuf_addr;
__u32 rbuf_size;
__u32 sbuf_size;
- __u64 qp_addr;
+ __aligned_u64 qp_addr;
};
/* PVRDMA masked atomic compare and swap */
struct pvrdma_ex_cmp_swap {
- __u64 swap_val;
- __u64 compare_val;
- __u64 swap_mask;
- __u64 compare_mask;
+ __aligned_u64 swap_val;
+ __aligned_u64 compare_val;
+ __aligned_u64 swap_mask;
+ __aligned_u64 compare_mask;
};
/* PVRDMA masked atomic fetch and add */
struct pvrdma_ex_fetch_add {
- __u64 add_val;
- __u64 field_boundary;
+ __aligned_u64 add_val;
+ __aligned_u64 field_boundary;
};
/* PVRDMA address vector. */
@@ -207,14 +207,14 @@ struct pvrdma_av {
/* PVRDMA scatter/gather entry */
struct pvrdma_sge {
- __u64 addr;
+ __aligned_u64 addr;
__u32 length;
__u32 lkey;
};
/* PVRDMA receive queue work request */
struct pvrdma_rq_wqe_hdr {
- __u64 wr_id; /* wr id */
+ __aligned_u64 wr_id; /* wr id */
__u32 num_sge; /* size of s/g array */
__u32 total_len; /* reserved */
};
@@ -222,7 +222,7 @@ struct pvrdma_rq_wqe_hdr {
/* PVRDMA send queue work request */
struct pvrdma_sq_wqe_hdr {
- __u64 wr_id; /* wr id */
+ __aligned_u64 wr_id; /* wr id */
__u32 num_sge; /* size of s/g array */
__u32 total_len; /* reserved */
__u32 opcode; /* operation type */
@@ -234,19 +234,19 @@ struct pvrdma_sq_wqe_hdr {
__u32 reserved;
union {
struct {
- __u64 remote_addr;
+ __aligned_u64 remote_addr;
__u32 rkey;
__u8 reserved[4];
} rdma;
struct {
- __u64 remote_addr;
- __u64 compare_add;
- __u64 swap;
+ __aligned_u64 remote_addr;
+ __aligned_u64 compare_add;
+ __aligned_u64 swap;
__u32 rkey;
__u32 reserved;
} atomic;
struct {
- __u64 remote_addr;
+ __aligned_u64 remote_addr;
__u32 log_arg_sz;
__u32 rkey;
union {
@@ -255,13 +255,14 @@ struct pvrdma_sq_wqe_hdr {
} wr_data;
} masked_atomics;
struct {
- __u64 iova_start;
- __u64 pl_pdir_dma;
+ __aligned_u64 iova_start;
+ __aligned_u64 pl_pdir_dma;
__u32 page_shift;
__u32 page_list_len;
__u32 length;
__u32 access_flags;
__u32 rkey;
+ __u32 reserved;
} fast_reg;
struct {
__u32 remote_qpn;
@@ -274,8 +275,8 @@ struct pvrdma_sq_wqe_hdr {
/* Completion queue element. */
struct pvrdma_cqe {
- __u64 wr_id;
- __u64 qp;
+ __aligned_u64 wr_id;
+ __aligned_u64 qp;
__u32 opcode;
__u32 status;
__u32 byte_len;
OpenPOWER on IntegriCloud