From 24d44a391f1b5d56e9c7a4fc1edd085687864ff9 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Thu, 4 Jul 2013 16:10:44 +0530 Subject: RDMA/cma: Add IPv6 support for iWARP Modify the type of local_addr and remote_addr fields in struct iw_cm_id from struct sockaddr_in to struct sockaddr_storage to hold IPv6 and IPv4 addresses uniformly. Change the references of local_addr and remote_addr in cxgb4, cxgb3, nes and amso drivers to match this. However to be able to actully run traffic over IPv6, low-level drivers have to add code to support this. Signed-off-by: Steve Wise Reviewed-by: Sean Hefty [ Fix unused variable warnings when INFINIBAND_NES_DEBUG not set. - Roland ] Signed-off-by: Roland Dreier --- drivers/infiniband/core/cma.c | 44 +++++++++++++++++-------------------------- 1 file changed, 17 insertions(+), 27 deletions(-) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 7c0f953..3a2c3c3 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -1385,8 +1385,9 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event) { struct rdma_id_private *id_priv = iw_id->context; struct rdma_cm_event event; - struct sockaddr_in *sin; int ret = 0; + struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr; + struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr; if (cma_disable_callback(id_priv, RDMA_CM_CONNECT)) return 0; @@ -1397,10 +1398,10 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event) event.event = RDMA_CM_EVENT_DISCONNECTED; break; case IW_CM_EVENT_CONNECT_REPLY: - sin = (struct sockaddr_in *) cma_src_addr(id_priv); - *sin = iw_event->local_addr; - sin = (struct sockaddr_in *) cma_dst_addr(id_priv); - *sin = iw_event->remote_addr; + memcpy(cma_src_addr(id_priv), laddr, + rdma_addr_size(laddr)); + memcpy(cma_dst_addr(id_priv), raddr, + rdma_addr_size(raddr)); switch (iw_event->status) { case 0: event.event = RDMA_CM_EVENT_ESTABLISHED; @@ -1450,11 +1451,12 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, { struct rdma_cm_id *new_cm_id; struct rdma_id_private *listen_id, *conn_id; - struct sockaddr_in *sin; struct net_device *dev = NULL; struct rdma_cm_event event; int ret; struct ib_device_attr attr; + struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr; + struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr; listen_id = cm_id->context; if (cma_disable_callback(listen_id, RDMA_CM_LISTEN)) @@ -1472,14 +1474,7 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING); conn_id->state = RDMA_CM_CONNECT; - dev = ip_dev_find(&init_net, iw_event->local_addr.sin_addr.s_addr); - if (!dev) { - ret = -EADDRNOTAVAIL; - mutex_unlock(&conn_id->handler_mutex); - rdma_destroy_id(new_cm_id); - goto out; - } - ret = rdma_copy_addr(&conn_id->id.route.addr.dev_addr, dev, NULL); + ret = rdma_translate_ip(laddr, &conn_id->id.route.addr.dev_addr); if (ret) { mutex_unlock(&conn_id->handler_mutex); rdma_destroy_id(new_cm_id); @@ -1497,10 +1492,8 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, cm_id->context = conn_id; cm_id->cm_handler = cma_iw_handler; - sin = (struct sockaddr_in *) cma_src_addr(conn_id); - *sin = iw_event->local_addr; - sin = (struct sockaddr_in *) cma_dst_addr(conn_id); - *sin = iw_event->remote_addr; + memcpy(cma_src_addr(conn_id), laddr, rdma_addr_size(laddr)); + memcpy(cma_dst_addr(conn_id), raddr, rdma_addr_size(raddr)); ret = ib_query_device(conn_id->id.device, &attr); if (ret) { @@ -1576,7 +1569,6 @@ static int cma_ib_listen(struct rdma_id_private *id_priv) static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog) { int ret; - struct sockaddr_in *sin; struct iw_cm_id *id; id = iw_create_cm_id(id_priv->id.device, @@ -1587,8 +1579,8 @@ static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog) id_priv->cm_id.iw = id; - sin = (struct sockaddr_in *) cma_src_addr(id_priv); - id_priv->cm_id.iw->local_addr = *sin; + memcpy(&id_priv->cm_id.iw->local_addr, cma_src_addr(id_priv), + rdma_addr_size(cma_src_addr(id_priv))); ret = iw_cm_listen(id_priv->cm_id.iw, backlog); @@ -2803,7 +2795,6 @@ static int cma_connect_iw(struct rdma_id_private *id_priv, struct rdma_conn_param *conn_param) { struct iw_cm_id *cm_id; - struct sockaddr_in* sin; int ret; struct iw_cm_conn_param iw_param; @@ -2813,11 +2804,10 @@ static int cma_connect_iw(struct rdma_id_private *id_priv, id_priv->cm_id.iw = cm_id; - sin = (struct sockaddr_in *) cma_src_addr(id_priv); - cm_id->local_addr = *sin; - - sin = (struct sockaddr_in *) cma_dst_addr(id_priv); - cm_id->remote_addr = *sin; + memcpy(&cm_id->local_addr, cma_src_addr(id_priv), + rdma_addr_size(cma_src_addr(id_priv))); + memcpy(&cm_id->remote_addr, cma_dst_addr(id_priv), + rdma_addr_size(cma_dst_addr(id_priv))); ret = cma_modify_qp_rtr(id_priv, conn_param); if (ret) -- cgit v1.1 From 73c40c616a33fcb7961b3c90a91b550813129b3e Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Thu, 1 Aug 2013 18:49:53 +0300 Subject: IB/core: Add locking around event dispatching on XRC target QPs Fix a potential race when event occurrs on a target XRC QP and in the middle of reporting that on its shared qps, one of them is destroyed by user space application. Also add note for kernel consumers in ib_verbs.h that they must not destroy the QP from within the handler. Signed-off-by: Yishai Hadas Signed-off-by: Jack Morgenstein Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/core/verbs.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 22192de..077fd64 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -346,10 +346,13 @@ EXPORT_SYMBOL(ib_destroy_srq); static void __ib_shared_qp_event_handler(struct ib_event *event, void *context) { struct ib_qp *qp = context; + unsigned long flags; + spin_lock_irqsave(&qp->device->event_handler_lock, flags); list_for_each_entry(event->element.qp, &qp->open_list, open_list) if (event->element.qp->event_handler) event->element.qp->event_handler(event, event->element.qp->qp_context); + spin_unlock_irqrestore(&qp->device->event_handler_lock, flags); } static void __ib_insert_xrcd_qp(struct ib_xrcd *xrcd, struct ib_qp *qp) -- cgit v1.1 From 846be90d810c285f6474f53abf1f928e1113830e Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Thu, 1 Aug 2013 18:49:54 +0300 Subject: IB/core: Fixes to XRC reference counting in uverbs Added reference counting mechanism for XRC target QPs between ib_uqp_object and its ib_uxrcd_object. This prevents closing an XRC domain that is still attached to a QP. In addition, add missing code in ib_uverbs_destroy_srq() to handle ib_uxrcd_object reference counting correctly when destroying an xsrq. Signed-off-by: Yishai Hadas Signed-off-by: Jack Morgenstein Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/core/uverbs.h | 1 + drivers/infiniband/core/uverbs_cmd.c | 22 ++++++++++++++++++++-- 2 files changed, 21 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index 0fcd7aa..b8431d6 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -135,6 +135,7 @@ struct ib_usrq_object { struct ib_uqp_object { struct ib_uevent_object uevent; struct list_head mcast_list; + struct ib_uxrcd_object *uxrcd; }; struct ib_ucq_object { diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index b3c07b0..b105140 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1526,7 +1526,7 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, (unsigned long) cmd.response + sizeof resp, in_len - sizeof cmd, out_len - sizeof resp); - obj = kmalloc(sizeof *obj, GFP_KERNEL); + obj = kzalloc(sizeof *obj, GFP_KERNEL); if (!obj) return -ENOMEM; @@ -1642,8 +1642,13 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, goto err_copy; } - if (xrcd) + if (xrcd) { + obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, + uobject); + atomic_inc(&obj->uxrcd->refcnt); put_xrcd_read(xrcd_uobj); + } + if (pd) put_pd_read(pd); if (scq) @@ -1753,6 +1758,8 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file, goto err_remove; } + obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject); + atomic_inc(&obj->uxrcd->refcnt); put_xrcd_read(xrcd_uobj); mutex_lock(&file->mutex); @@ -2019,6 +2026,9 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file, if (ret) return ret; + if (obj->uxrcd) + atomic_dec(&obj->uxrcd->refcnt); + idr_remove_uobj(&ib_uverbs_qp_idr, uobj); mutex_lock(&file->mutex); @@ -2860,6 +2870,8 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file, struct ib_srq *srq; struct ib_uevent_object *obj; int ret = -EINVAL; + struct ib_usrq_object *us; + enum ib_srq_type srq_type; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; @@ -2869,6 +2881,7 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file, return -EINVAL; srq = uobj->object; obj = container_of(uobj, struct ib_uevent_object, uobject); + srq_type = srq->srq_type; ret = ib_destroy_srq(srq); if (!ret) @@ -2879,6 +2892,11 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file, if (ret) return ret; + if (srq_type == IB_SRQT_XRC) { + us = container_of(obj, struct ib_usrq_object, uevent); + atomic_dec(&us->uxrcd->refcnt); + } + idr_remove_uobj(&ib_uverbs_srq_idr, uobj); mutex_lock(&file->mutex); -- cgit v1.1 From 319a441d1361ea703b091caf92418f8121eadfc5 Mon Sep 17 00:00:00 2001 From: Hadar Hen Zion Date: Wed, 7 Aug 2013 14:01:59 +0300 Subject: IB/core: Add receive flow steering support The RDMA stack allows for applications to create IB_QPT_RAW_PACKET QPs, which receive plain Ethernet packets, specifically packets that don't carry any QPN to be matched by the receiving side. Applications using these QPs must be provided with a method to program some steering rule with the HW so packets arriving at the local port can be routed to them. This patch adds ib_create_flow(), which allow providing a flow specification for a QP. When there's a match between the specification and a received packet, the packet is forwarded to that QP, in a the same way one uses ib_attach_multicast() for IB UD multicast handling. Flow specifications are provided as instances of struct ib_flow_spec_yyy, which describe L2, L3 and L4 headers. Currently specs for Ethernet, IPv4, TCP and UDP are defined. Flow specs are made of values and masks. The input to ib_create_flow() is a struct ib_flow_attr, which contains a few mandatory control elements and optional flow specs. struct ib_flow_attr { enum ib_flow_attr_type type; u16 size; u16 priority; u32 flags; u8 num_of_specs; u8 port; /* Following are the optional layers according to user request * struct ib_flow_spec_yyy * struct ib_flow_spec_zzz */ }; As these specs are eventually coming from user space, they are defined and used in a way which allows adding new spec types without kernel/user ABI change, just with a little API enhancement which defines the newly added spec. The flow spec structures are defined with TLV (Type-Length-Value) entries, which allows calling ib_create_flow() with a list of variable length of optional specs. For the actual processing of ib_flow_attr the driver uses the number of specs and the size mandatory fields along with the TLV nature of the specs. Steering rules processing order is according to the domain over which the rule is set and the rule priority. All rules set by user space applicatations fall into the IB_FLOW_DOMAIN_USER domain, other domains could be used by future IPoIB RFS and Ethetool flow-steering interface implementation. Lower numerical value for the priority field means higher priority. The returned value from ib_create_flow() is a struct ib_flow, which contains a database pointer (handle) provided by the HW driver to be used when calling ib_destroy_flow(). Applications that offload TCP/IP traffic can also be written over IB UD QPs. The ib_create_flow() / ib_destroy_flow() API is designed to support UD QPs too. A HW driver can set IB_DEVICE_MANAGED_FLOW_STEERING to denote support for flow steering. The ib_flow_attr enum type supports usage of flow steering for promiscuous and sniffer purposes: IB_FLOW_ATTR_NORMAL - "regular" rule, steering according to rule specification IB_FLOW_ATTR_ALL_DEFAULT - default unicast and multicast rule, receive all Ethernet traffic which isn't steered to any QP IB_FLOW_ATTR_MC_DEFAULT - same as IB_FLOW_ATTR_ALL_DEFAULT but only for multicast IB_FLOW_ATTR_SNIFFER - sniffer rule, receive all port traffic ALL_DEFAULT and MC_DEFAULT rules options are valid only for Ethernet link type. Signed-off-by: Hadar Hen Zion Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/core/verbs.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 22192de..87a8102 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1254,3 +1254,30 @@ int ib_dealloc_xrcd(struct ib_xrcd *xrcd) return xrcd->device->dealloc_xrcd(xrcd); } EXPORT_SYMBOL(ib_dealloc_xrcd); + +struct ib_flow *ib_create_flow(struct ib_qp *qp, + struct ib_flow_attr *flow_attr, + int domain) +{ + struct ib_flow *flow_id; + if (!qp->device->create_flow) + return ERR_PTR(-ENOSYS); + + flow_id = qp->device->create_flow(qp, flow_attr, domain); + if (!IS_ERR(flow_id)) + atomic_inc(&qp->usecnt); + return flow_id; +} +EXPORT_SYMBOL(ib_create_flow); + +int ib_destroy_flow(struct ib_flow *flow_id) +{ + int err; + struct ib_qp *qp = flow_id->qp; + + err = qp->device->destroy_flow(flow_id); + if (!err) + atomic_dec(&qp->usecnt); + return err; +} +EXPORT_SYMBOL(ib_destroy_flow); -- cgit v1.1 From 400dbc96583ff3b8ad4c09bd7e9dcd35a6215922 Mon Sep 17 00:00:00 2001 From: Igor Ivanov Date: Wed, 14 Aug 2013 13:58:29 +0300 Subject: IB/core: Infrastructure for extensible uverbs commands Add infrastructure to support extended uverbs capabilities in a forward/backward manner. Uverbs command opcodes which are based on the verbs extensions approach should be greater or equal to IB_USER_VERBS_CMD_THRESHOLD. They have new header format and processed a bit differently. Whenever a specific IB_USER_VERBS_CMD_XXX is extended, which practically means it needs to have additional arguments, we will be able to add them without creating a completely new IB_USER_VERBS_CMD_YYY command or bumping the uverbs ABI version. This patch for itself doesn't provide the whole scheme which is also dependent on adding a comp_mask field to each extended uverbs command struct. The new header framework allows for future extension of the CMD arguments (ib_uverbs_cmd_hdr.in_words, ib_uverbs_cmd_hdr.out_words) for an existing new command (that is a command that supports the new uverbs command header format suggested in this patch) w/o bumping ABI version and with maintaining backward and formward compatibility to new and old libibverbs versions. In the uverbs command we are passing both uverbs arguments and the provider arguments. We split the ib_uverbs_cmd_hdr.in_words to ib_uverbs_cmd_hdr.in_words which will now carry only uverbs input argument struct size and ib_uverbs_cmd_hdr.provider_in_words that will carry the provider input argument size. Same goes for the response (the uverbs CMD output argument). For example take the create_cq call and the mlx4_ib provider: The uverbs layer gets libibverb's struct ibv_create_cq (named struct ib_uverbs_create_cq in the kernel), mlx4_ib gets libmlx4's struct mlx4_create_cq (which includes struct ibv_create_cq and is named struct mlx4_ib_create_cq in the kernel) and in_words = sizeof(mlx4_create_cq)/4 . Thus ib_uverbs_cmd_hdr.in_words carry both uverbs plus mlx4_ib input argument sizes, where uverbs assumes it knows the size of its input argument - struct ibv_create_cq. Now, if we wish to add a variable to struct ibv_create_cq, we can add a comp_mask field to the struct which is basically bit field indicating which fields exists in the struct (as done for the libibverbs API extension), but we need a way to tell what is the total size of the struct and not assume the struct size is predefined (since we may get different struct sizes from different user libibverbs versions). So we know at which point the provider input argument (struct mlx4_create_cq) begins. Same goes for extending the provider struct mlx4_create_cq. Thus we split the ib_uverbs_cmd_hdr.in_words to ib_uverbs_cmd_hdr.in_words which will now carry only uverbs input argument struct size and ib_uverbs_cmd_hdr.provider_in_words that will carry the provider (mlx4_ib) input argument size. Signed-off-by: Igor Ivanov Signed-off-by: Hadar Hen Zion Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/core/uverbs_main.c | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 2c6f0f2..e4e7b24 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -583,9 +583,6 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, if (copy_from_user(&hdr, buf, sizeof hdr)) return -EFAULT; - if (hdr.in_words * 4 != count) - return -EINVAL; - if (hdr.command >= ARRAY_SIZE(uverbs_cmd_table) || !uverbs_cmd_table[hdr.command]) return -EINVAL; @@ -597,8 +594,30 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, if (!(file->device->ib_dev->uverbs_cmd_mask & (1ull << hdr.command))) return -ENOSYS; - return uverbs_cmd_table[hdr.command](file, buf + sizeof hdr, - hdr.in_words * 4, hdr.out_words * 4); + if (hdr.command >= IB_USER_VERBS_CMD_THRESHOLD) { + struct ib_uverbs_cmd_hdr_ex hdr_ex; + + if (copy_from_user(&hdr_ex, buf, sizeof(hdr_ex))) + return -EFAULT; + + if (((hdr_ex.in_words + hdr_ex.provider_in_words) * 4) != count) + return -EINVAL; + + return uverbs_cmd_table[hdr.command](file, + buf + sizeof(hdr_ex), + (hdr_ex.in_words + + hdr_ex.provider_in_words) * 4, + (hdr_ex.out_words + + hdr_ex.provider_out_words) * 4); + } else { + if (hdr.in_words * 4 != count) + return -EINVAL; + + return uverbs_cmd_table[hdr.command](file, + buf + sizeof(hdr), + hdr.in_words * 4, + hdr.out_words * 4); + } } static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma) -- cgit v1.1 From 436f2ad05a0b65b1467ddf51bc68171c381bf844 Mon Sep 17 00:00:00 2001 From: Hadar Hen Zion Date: Wed, 14 Aug 2013 13:58:30 +0300 Subject: IB/core: Export ib_create/destroy_flow through uverbs Implement ib_uverbs_create_flow() and ib_uverbs_destroy_flow() to support flow steering for user space applications. Signed-off-by: Hadar Hen Zion Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/core/uverbs.h | 3 + drivers/infiniband/core/uverbs_cmd.c | 214 ++++++++++++++++++++++++++++++++++ drivers/infiniband/core/uverbs_main.c | 13 ++- 3 files changed, 229 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index 0fcd7aa..ad9d102 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -155,6 +155,7 @@ extern struct idr ib_uverbs_cq_idr; extern struct idr ib_uverbs_qp_idr; extern struct idr ib_uverbs_srq_idr; extern struct idr ib_uverbs_xrcd_idr; +extern struct idr ib_uverbs_rule_idr; void idr_remove_uobj(struct idr *idp, struct ib_uobject *uobj); @@ -215,5 +216,7 @@ IB_UVERBS_DECLARE_CMD(destroy_srq); IB_UVERBS_DECLARE_CMD(create_xsrq); IB_UVERBS_DECLARE_CMD(open_xrcd); IB_UVERBS_DECLARE_CMD(close_xrcd); +IB_UVERBS_DECLARE_CMD(create_flow); +IB_UVERBS_DECLARE_CMD(destroy_flow); #endif /* UVERBS_H */ diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index b3c07b0..6e98df9 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -54,6 +54,7 @@ static struct uverbs_lock_class qp_lock_class = { .name = "QP-uobj" }; static struct uverbs_lock_class ah_lock_class = { .name = "AH-uobj" }; static struct uverbs_lock_class srq_lock_class = { .name = "SRQ-uobj" }; static struct uverbs_lock_class xrcd_lock_class = { .name = "XRCD-uobj" }; +static struct uverbs_lock_class rule_lock_class = { .name = "RULE-uobj" }; #define INIT_UDATA(udata, ibuf, obuf, ilen, olen) \ do { \ @@ -330,6 +331,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, INIT_LIST_HEAD(&ucontext->srq_list); INIT_LIST_HEAD(&ucontext->ah_list); INIT_LIST_HEAD(&ucontext->xrcd_list); + INIT_LIST_HEAD(&ucontext->rule_list); ucontext->closing = 0; resp.num_comp_vectors = file->device->num_comp_vectors; @@ -2587,6 +2589,218 @@ out_put: return ret ? ret : in_len; } +static int kern_spec_to_ib_spec(struct ib_kern_spec *kern_spec, + union ib_flow_spec *ib_spec) +{ + ib_spec->type = kern_spec->type; + + switch (ib_spec->type) { + case IB_FLOW_SPEC_ETH: + ib_spec->eth.size = sizeof(struct ib_flow_spec_eth); + if (ib_spec->eth.size != kern_spec->eth.size) + return -EINVAL; + memcpy(&ib_spec->eth.val, &kern_spec->eth.val, + sizeof(struct ib_flow_eth_filter)); + memcpy(&ib_spec->eth.mask, &kern_spec->eth.mask, + sizeof(struct ib_flow_eth_filter)); + break; + case IB_FLOW_SPEC_IPV4: + ib_spec->ipv4.size = sizeof(struct ib_flow_spec_ipv4); + if (ib_spec->ipv4.size != kern_spec->ipv4.size) + return -EINVAL; + memcpy(&ib_spec->ipv4.val, &kern_spec->ipv4.val, + sizeof(struct ib_flow_ipv4_filter)); + memcpy(&ib_spec->ipv4.mask, &kern_spec->ipv4.mask, + sizeof(struct ib_flow_ipv4_filter)); + break; + case IB_FLOW_SPEC_TCP: + case IB_FLOW_SPEC_UDP: + ib_spec->tcp_udp.size = sizeof(struct ib_flow_spec_tcp_udp); + if (ib_spec->tcp_udp.size != kern_spec->tcp_udp.size) + return -EINVAL; + memcpy(&ib_spec->tcp_udp.val, &kern_spec->tcp_udp.val, + sizeof(struct ib_flow_tcp_udp_filter)); + memcpy(&ib_spec->tcp_udp.mask, &kern_spec->tcp_udp.mask, + sizeof(struct ib_flow_tcp_udp_filter)); + break; + default: + return -EINVAL; + } + return 0; +} + +ssize_t ib_uverbs_create_flow(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_create_flow cmd; + struct ib_uverbs_create_flow_resp resp; + struct ib_uobject *uobj; + struct ib_flow *flow_id; + struct ib_kern_flow_attr *kern_flow_attr; + struct ib_flow_attr *flow_attr; + struct ib_qp *qp; + int err = 0; + void *kern_spec; + void *ib_spec; + int i; + int kern_attr_size; + + if (out_len < sizeof(resp)) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof(cmd))) + return -EFAULT; + + if ((cmd.flow_attr.type == IB_FLOW_ATTR_SNIFFER && + !capable(CAP_NET_ADMIN)) || !capable(CAP_NET_RAW)) + return -EPERM; + + if (cmd.flow_attr.num_of_specs) { + kern_flow_attr = kmalloc(cmd.flow_attr.size, GFP_KERNEL); + if (!kern_flow_attr) + return -ENOMEM; + + memcpy(kern_flow_attr, &cmd.flow_attr, sizeof(*kern_flow_attr)); + kern_attr_size = cmd.flow_attr.size - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr_ex); + if (copy_from_user(kern_flow_attr + 1, buf + sizeof(cmd), + kern_attr_size)) { + err = -EFAULT; + goto err_free_attr; + } + } else { + kern_flow_attr = &cmd.flow_attr; + kern_attr_size = sizeof(cmd.flow_attr); + } + + uobj = kmalloc(sizeof(*uobj), GFP_KERNEL); + if (!uobj) { + err = -ENOMEM; + goto err_free_attr; + } + init_uobj(uobj, 0, file->ucontext, &rule_lock_class); + down_write(&uobj->mutex); + + qp = idr_read_qp(cmd.qp_handle, file->ucontext); + if (!qp) { + err = -EINVAL; + goto err_uobj; + } + + flow_attr = kmalloc(cmd.flow_attr.size, GFP_KERNEL); + if (!flow_attr) { + err = -ENOMEM; + goto err_put; + } + + flow_attr->type = kern_flow_attr->type; + flow_attr->priority = kern_flow_attr->priority; + flow_attr->num_of_specs = kern_flow_attr->num_of_specs; + flow_attr->port = kern_flow_attr->port; + flow_attr->flags = kern_flow_attr->flags; + flow_attr->size = sizeof(*flow_attr); + + kern_spec = kern_flow_attr + 1; + ib_spec = flow_attr + 1; + for (i = 0; i < flow_attr->num_of_specs && kern_attr_size > 0; i++) { + err = kern_spec_to_ib_spec(kern_spec, ib_spec); + if (err) + goto err_free; + flow_attr->size += + ((union ib_flow_spec *) ib_spec)->size; + kern_attr_size -= ((struct ib_kern_spec *) kern_spec)->size; + kern_spec += ((struct ib_kern_spec *) kern_spec)->size; + ib_spec += ((union ib_flow_spec *) ib_spec)->size; + } + if (kern_attr_size) { + pr_warn("create flow failed, %d bytes left from uverb cmd\n", + kern_attr_size); + goto err_free; + } + flow_id = ib_create_flow(qp, flow_attr, IB_FLOW_DOMAIN_USER); + if (IS_ERR(flow_id)) { + err = PTR_ERR(flow_id); + goto err_free; + } + flow_id->qp = qp; + flow_id->uobject = uobj; + uobj->object = flow_id; + + err = idr_add_uobj(&ib_uverbs_rule_idr, uobj); + if (err) + goto destroy_flow; + + memset(&resp, 0, sizeof(resp)); + resp.flow_handle = uobj->id; + + if (copy_to_user((void __user *)(unsigned long) cmd.response, + &resp, sizeof(resp))) { + err = -EFAULT; + goto err_copy; + } + + put_qp_read(qp); + mutex_lock(&file->mutex); + list_add_tail(&uobj->list, &file->ucontext->rule_list); + mutex_unlock(&file->mutex); + + uobj->live = 1; + + up_write(&uobj->mutex); + kfree(flow_attr); + if (cmd.flow_attr.num_of_specs) + kfree(kern_flow_attr); + return in_len; +err_copy: + idr_remove_uobj(&ib_uverbs_rule_idr, uobj); +destroy_flow: + ib_destroy_flow(flow_id); +err_free: + kfree(flow_attr); +err_put: + put_qp_read(qp); +err_uobj: + put_uobj_write(uobj); +err_free_attr: + if (cmd.flow_attr.num_of_specs) + kfree(kern_flow_attr); + return err; +} + +ssize_t ib_uverbs_destroy_flow(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) { + struct ib_uverbs_destroy_flow cmd; + struct ib_flow *flow_id; + struct ib_uobject *uobj; + int ret; + + if (copy_from_user(&cmd, buf, sizeof(cmd))) + return -EFAULT; + + uobj = idr_write_uobj(&ib_uverbs_rule_idr, cmd.flow_handle, + file->ucontext); + if (!uobj) + return -EINVAL; + flow_id = uobj->object; + + ret = ib_destroy_flow(flow_id); + if (!ret) + uobj->live = 0; + + put_uobj_write(uobj); + + idr_remove_uobj(&ib_uverbs_rule_idr, uobj); + + mutex_lock(&file->mutex); + list_del(&uobj->list); + mutex_unlock(&file->mutex); + + put_uobj(uobj); + + return ret ? ret : in_len; +} + static int __uverbs_create_xsrq(struct ib_uverbs_file *file, struct ib_uverbs_create_xsrq *cmd, struct ib_udata *udata) diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index e4e7b24..75ad86c 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -73,6 +73,7 @@ DEFINE_IDR(ib_uverbs_cq_idr); DEFINE_IDR(ib_uverbs_qp_idr); DEFINE_IDR(ib_uverbs_srq_idr); DEFINE_IDR(ib_uverbs_xrcd_idr); +DEFINE_IDR(ib_uverbs_rule_idr); static DEFINE_SPINLOCK(map_lock); static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES); @@ -113,7 +114,9 @@ static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file, [IB_USER_VERBS_CMD_OPEN_XRCD] = ib_uverbs_open_xrcd, [IB_USER_VERBS_CMD_CLOSE_XRCD] = ib_uverbs_close_xrcd, [IB_USER_VERBS_CMD_CREATE_XSRQ] = ib_uverbs_create_xsrq, - [IB_USER_VERBS_CMD_OPEN_QP] = ib_uverbs_open_qp + [IB_USER_VERBS_CMD_OPEN_QP] = ib_uverbs_open_qp, + [IB_USER_VERBS_CMD_CREATE_FLOW] = ib_uverbs_create_flow, + [IB_USER_VERBS_CMD_DESTROY_FLOW] = ib_uverbs_destroy_flow }; static void ib_uverbs_add_one(struct ib_device *device); @@ -212,6 +215,14 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, kfree(uobj); } + list_for_each_entry_safe(uobj, tmp, &context->rule_list, list) { + struct ib_flow *flow_id = uobj->object; + + idr_remove_uobj(&ib_uverbs_rule_idr, uobj); + ib_destroy_flow(flow_id); + kfree(uobj); + } + list_for_each_entry_safe(uobj, tmp, &context->qp_list, list) { struct ib_qp *qp = uobj->object; struct ib_uqp_object *uqp = -- cgit v1.1 From 22878dbc9173a7f0322dd697b1b5b49a83a1d4d5 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Sun, 1 Sep 2013 18:39:52 +0300 Subject: IB/core: Better checking of userspace values for receive flow steering - Don't allow unsupported comp_mask values, user should check ibv_query_device to know which features are supported. - Add a check in ib_uverbs_create_flow() to verify the size passed from the user space. Signed-off-by: Matan Barak Signed-off-by: Roland Dreier --- drivers/infiniband/core/uverbs_cmd.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 6e98df9..9112410 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -2652,17 +2652,31 @@ ssize_t ib_uverbs_create_flow(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof(cmd))) return -EFAULT; + if (cmd.comp_mask) + return -EINVAL; + if ((cmd.flow_attr.type == IB_FLOW_ATTR_SNIFFER && !capable(CAP_NET_ADMIN)) || !capable(CAP_NET_RAW)) return -EPERM; + if (cmd.flow_attr.num_of_specs < 0 || + cmd.flow_attr.num_of_specs > IB_FLOW_SPEC_SUPPORT_LAYERS) + return -EINVAL; + + kern_attr_size = cmd.flow_attr.size - sizeof(cmd) - + sizeof(struct ib_uverbs_cmd_hdr_ex); + + if (cmd.flow_attr.size < 0 || cmd.flow_attr.size > in_len || + kern_attr_size < 0 || kern_attr_size > + (cmd.flow_attr.num_of_specs * sizeof(struct ib_kern_spec))) + return -EINVAL; + if (cmd.flow_attr.num_of_specs) { kern_flow_attr = kmalloc(cmd.flow_attr.size, GFP_KERNEL); if (!kern_flow_attr) return -ENOMEM; memcpy(kern_flow_attr, &cmd.flow_attr, sizeof(*kern_flow_attr)); - kern_attr_size = cmd.flow_attr.size - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr_ex); if (copy_from_user(kern_flow_attr + 1, buf + sizeof(cmd), kern_attr_size)) { err = -EFAULT; -- cgit v1.1