From 417608c20a4c8397bc5307d949ec01ea0a0dd8e5 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Thu, 12 Nov 2009 11:19:44 -0800 Subject: IB/mlx4: Remove limitation on LSO header size Current code has a limitation: an LSO header is not allowed to cross a 64 byte boundary. This patch removes this limitation by setting the WQE RR for large headers thus allowing LSO headers of any size. The extra buffer reserved for MLX4_IB_QP_LSO QPs has been doubled, from 64 to 128 bytes, assuming this is reasonable upper limit for header length. Also, this patch will cause IB_DEVICE_UD_TSO to be set only for HCA FW versions that set MLX4_DEV_CAP_FLAG_BLH; e.g. FW version 2.6.000 and higher. Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- include/linux/mlx4/device.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index ce7cc6c..e92d1bf 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -61,6 +61,7 @@ enum { MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR = 1 << 8, MLX4_DEV_CAP_FLAG_BAD_QKEY_CNTR = 1 << 9, MLX4_DEV_CAP_FLAG_DPDP = 1 << 12, + MLX4_DEV_CAP_FLAG_BLH = 1 << 15, MLX4_DEV_CAP_FLAG_MEM_WINDOW = 1 << 16, MLX4_DEV_CAP_FLAG_APM = 1 << 17, MLX4_DEV_CAP_FLAG_ATOMIC = 1 << 18, -- cgit v1.1 From a7ca1f00ed2921b804d7ebda0f6fca8c9078fa42 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Mon, 16 Nov 2009 09:30:33 -0800 Subject: RDMA/ucma: Add option to manually set IB path Export rdma_set_ib_paths to user space to allow applications to manually set the IB path used for connections. This allows alternative ways for a user space application or library to obtain path record information, including retrieving path information from cached data, avoiding direct interaction with the IB SA. The IB SA is a single, centralized entity that can limit scaling on large clusters running MPI applications. Future changes to the rdma cm can expand on this framework to support the full range of features allowed by the IB CM, such as separate forward and reverse paths and APM. Signed-off-by: Sean Hefty Reviewed-By: Jason Gunthorpe Signed-off-by: Roland Dreier --- include/rdma/ib_sa.h | 6 ++++++ include/rdma/ib_user_sa.h | 16 ++++++++++++++++ include/rdma/rdma_user_cm.h | 6 ++++-- 3 files changed, 26 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h index 3841c1a..1082afa 100644 --- a/include/rdma/ib_sa.h +++ b/include/rdma/ib_sa.h @@ -379,4 +379,10 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num, struct ib_sa_path_rec *rec, struct ib_ah_attr *ah_attr); +/** + * ib_sa_unpack_path - Convert a path record from MAD format to struct + * ib_sa_path_rec. + */ +void ib_sa_unpack_path(void *attribute, struct ib_sa_path_rec *rec); + #endif /* IB_SA_H */ diff --git a/include/rdma/ib_user_sa.h b/include/rdma/ib_user_sa.h index 6591201..cfc7c9b 100644 --- a/include/rdma/ib_user_sa.h +++ b/include/rdma/ib_user_sa.h @@ -35,6 +35,22 @@ #include +enum { + IB_PATH_GMP = 1, + IB_PATH_PRIMARY = (1<<1), + IB_PATH_ALTERNATE = (1<<2), + IB_PATH_OUTBOUND = (1<<3), + IB_PATH_INBOUND = (1<<4), + IB_PATH_INBOUND_REVERSE = (1<<5), + IB_PATH_BIDIRECTIONAL = IB_PATH_OUTBOUND | IB_PATH_INBOUND_REVERSE +}; + +struct ib_path_rec_data { + __u32 flags; + __u32 reserved; + __u32 path_rec[16]; +}; + struct ib_user_path_rec { __u8 dgid[16]; __u8 sgid[16]; diff --git a/include/rdma/rdma_user_cm.h b/include/rdma/rdma_user_cm.h index c557054..1d16502 100644 --- a/include/rdma/rdma_user_cm.h +++ b/include/rdma/rdma_user_cm.h @@ -215,12 +215,14 @@ struct rdma_ucm_event_resp { /* Option levels */ enum { - RDMA_OPTION_ID = 0 + RDMA_OPTION_ID = 0, + RDMA_OPTION_IB = 1 }; /* Option details */ enum { - RDMA_OPTION_ID_TOS = 0 + RDMA_OPTION_ID_TOS = 0, + RDMA_OPTION_IB_PATH = 1 }; struct rdma_ucm_set_option { -- cgit v1.1 From 6266ed6e4164466177238b11ecb825a3a108a3e4 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Thu, 19 Nov 2009 12:55:22 -0800 Subject: RDMA/cma: Replace net_device pointer with index Provide the device interface when resolving route information to ensure that the correct outbound device is used. This will also simplify processing of sin6_scope_id for IPv6 support. Based on work from: David Wilder Jason Gunthorpe Signed-off-by: Sean Hefty Signed-off-by: Roland Dreier --- include/rdma/ib_addr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h index 483057b..27f17cc 100644 --- a/include/rdma/ib_addr.h +++ b/include/rdma/ib_addr.h @@ -61,7 +61,7 @@ struct rdma_dev_addr { unsigned char dst_dev_addr[MAX_ADDR_LEN]; unsigned char broadcast[MAX_ADDR_LEN]; enum rdma_node_type dev_type; - struct net_device *src_dev; + int bound_dev_if; }; /** -- cgit v1.1 From c4315d85f9b76834289fd503796c01b8311c4b84 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Thu, 19 Nov 2009 12:57:18 -0800 Subject: IB/addr: Store net_device type instead of translating to RDMA transport The struct rdma_dev_addr stores net_device address information: the source device address, destination hardware address, and broadcast address. For consistency, store the net_device type rather than converting it to the rdma_node_type. The type indicates the format of the various hardware addresses, which is what we're concerned with, and not the RDMA node type that the address may map to. Signed-off-by: Sean Hefty Signed-off-by: Roland Dreier --- include/rdma/ib_addr.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h index 27f17cc..3a39c55 100644 --- a/include/rdma/ib_addr.h +++ b/include/rdma/ib_addr.h @@ -36,6 +36,7 @@ #include #include +#include #include #include #include @@ -60,7 +61,7 @@ struct rdma_dev_addr { unsigned char src_dev_addr[MAX_ADDR_LEN]; unsigned char dst_dev_addr[MAX_ADDR_LEN]; unsigned char broadcast[MAX_ADDR_LEN]; - enum rdma_node_type dev_type; + unsigned short dev_type; int bound_dev_if; }; -- cgit v1.1 From 6f8372b69c3198e06cecb1df2cb9682d0c55e657 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Thu, 19 Nov 2009 13:26:06 -0800 Subject: RDMA/cm: fix loopback address support The RDMA CM is intended to support the use of a loopback address when establishing a connection; however, the behavior of the CM when loopback addresses are used is confusing and does not always work, depending on whether loopback was specified by the server, the client, or both. The defined behavior of rdma_bind_addr is to associate an RDMA device with an rdma_cm_id, as long as the user specified a non- zero address. (ie they weren't just trying to reserve a port) Currently, if the loopback address is passed to rdam_bind_addr, no device is associated with the rdma_cm_id. Fix this. If a loopback address is specified by the client as the destination address for a connection, it will fail to establish a connection. This is true even if the server is listing across all addresses or on the loopback address itself. The issue is that the server tries to translate the IP address carried in the REQ message to a local net_device address, which fails. The translation is not needed in this case, since the REQ carries the actual HW address that should be used. Finally, cleanup loopback support to be more transport neutral. Replace separate calls to get/set the sgid and dgid from the device address to a single call that behaves correctly depending on the format of the device address. And support both IPv4 and IPv6 address formats. Signed-off-by: Sean Hefty [ Fixed RDS build by s/ib_addr_get/rdma_addr_get/ - Roland ] Signed-off-by: Roland Dreier --- include/rdma/ib_addr.h | 31 ++++++++++--------------------- 1 file changed, 10 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h index 3a39c55..fa0d52b 100644 --- a/include/rdma/ib_addr.h +++ b/include/rdma/ib_addr.h @@ -122,40 +122,29 @@ static inline void ib_addr_get_mgid(struct rdma_dev_addr *dev_addr, memcpy(gid, dev_addr->broadcast + 4, sizeof *gid); } -static inline void ib_addr_get_sgid(struct rdma_dev_addr *dev_addr, - union ib_gid *gid) +static inline int rdma_addr_gid_offset(struct rdma_dev_addr *dev_addr) { - memcpy(gid, dev_addr->src_dev_addr + 4, sizeof *gid); + return dev_addr->dev_type == ARPHRD_INFINIBAND ? 4 : 0; } -static inline void ib_addr_set_sgid(struct rdma_dev_addr *dev_addr, - union ib_gid *gid) +static inline void rdma_addr_get_sgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid) { - memcpy(dev_addr->src_dev_addr + 4, gid, sizeof *gid); + memcpy(gid, dev_addr->src_dev_addr + rdma_addr_gid_offset(dev_addr), sizeof *gid); } -static inline void ib_addr_get_dgid(struct rdma_dev_addr *dev_addr, - union ib_gid *gid) +static inline void rdma_addr_set_sgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid) { - memcpy(gid, dev_addr->dst_dev_addr + 4, sizeof *gid); + memcpy(dev_addr->src_dev_addr + rdma_addr_gid_offset(dev_addr), gid, sizeof *gid); } -static inline void ib_addr_set_dgid(struct rdma_dev_addr *dev_addr, - union ib_gid *gid) +static inline void rdma_addr_get_dgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid) { - memcpy(dev_addr->dst_dev_addr + 4, gid, sizeof *gid); + memcpy(gid, dev_addr->dst_dev_addr + rdma_addr_gid_offset(dev_addr), sizeof *gid); } -static inline void iw_addr_get_sgid(struct rdma_dev_addr *dev_addr, - union ib_gid *gid) -{ - memcpy(gid, dev_addr->src_dev_addr, sizeof *gid); -} - -static inline void iw_addr_get_dgid(struct rdma_dev_addr *dev_addr, - union ib_gid *gid) +static inline void rdma_addr_set_dgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid) { - memcpy(gid, dev_addr->dst_dev_addr, sizeof *gid); + memcpy(dev_addr->dst_dev_addr + rdma_addr_gid_offset(dev_addr), gid, sizeof *gid); } #endif /* IB_ADDR_H */ -- cgit v1.1 From 55464d461bdcffc4422aebfb750eacf99e3c0f27 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 9 Dec 2009 14:20:04 -0800 Subject: IB: Clarify the documentation of ib_post_send() Clarify the behavior of ib_post_send() when a list of work requests is passed in and an immediate error is returned. Signed-off-by: Bart Van Assche Signed-off-by: Roland Dreier --- include/rdma/ib_verbs.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index c179318..09509ed 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1425,6 +1425,11 @@ int ib_destroy_qp(struct ib_qp *qp); * @send_wr: A list of work requests to post on the send queue. * @bad_send_wr: On an immediate failure, this parameter will reference * the work request that failed to be posted on the QP. + * + * While IBA Vol. 1 section 11.4.1.1 specifies that if an immediate + * error is returned, the QP state shall not be affected, + * ib_post_send() will return an immediate error after queueing any + * earlier work requests in the list. */ static inline int ib_post_send(struct ib_qp *qp, struct ib_send_wr *send_wr, -- cgit v1.1