diff options
author | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-10-11 19:43:13 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-10-11 19:43:13 -0700 |
commit | ce9d3c9a6a9aef61525be07fe6ba27d937236aa2 (patch) | |
tree | 1b29bcb8f60fc6b59fa0d7b833cc733b8ebe17c9 /drivers/infiniband/core | |
parent | 038a5008b2f395c85e6e71d6ddf3c684e7c405b0 (diff) | |
parent | 3d73c2884f45f9a297cbc956cea101405a9703f2 (diff) | |
download | op-kernel-dev-ce9d3c9a6a9aef61525be07fe6ba27d937236aa2.zip op-kernel-dev-ce9d3c9a6a9aef61525be07fe6ba27d937236aa2.tar.gz |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband: (87 commits)
mlx4_core: Fix section mismatches
IPoIB: Allow setting policy to ignore multicast groups
IB/mthca: Mark error paths as unlikely() in post_srq_recv functions
IB/ipath: Minor fix to ordering of freeing and zeroing of tid pages.
IB/ipath: Remove redundant link state checks
IB/ipath: Fix IB_EVENT_PORT_ERR event
IB/ipath: Better handling of unexpected GPIO interrupts
IB/ipath: Maintain active time on all chips
IB/ipath: Fix QHT7040 serial number check
IB/ipath: Indicate a couple of chip bugs to userspace
IB/ipath: iba6110 rev4 no longer needs recv header overrun workaround
IB/ipath: Use counters in ipath_poll and cleanup interrupts in ipath_close
IB/ipath: Remove duplicate copy of LMC
IB/ipath: Add ability to set the LMC via the sysfs debugging interface
IB/ipath: Optimize completion queue entry insertion and polling
IB/ipath: Implement IB_EVENT_QP_LAST_WQE_REACHED
IB/ipath: Generate flush CQE when QP is in error state
IB/ipath: Remove redundant code
IB/ipath: Future proof eeprom checksum code (contents reading)
IB/ipath: UC RDMA WRITE with IMMEDIATE doesn't send the immediate
...
Diffstat (limited to 'drivers/infiniband/core')
-rw-r--r-- | drivers/infiniband/core/addr.c | 3 | ||||
-rw-r--r-- | drivers/infiniband/core/cm.c | 51 | ||||
-rw-r--r-- | drivers/infiniband/core/cma.c | 46 | ||||
-rw-r--r-- | drivers/infiniband/core/device.c | 4 | ||||
-rw-r--r-- | drivers/infiniband/core/fmr_pool.c | 22 | ||||
-rw-r--r-- | drivers/infiniband/core/multicast.c | 2 | ||||
-rw-r--r-- | drivers/infiniband/core/sa_query.c | 12 | ||||
-rw-r--r-- | drivers/infiniband/core/ucma.c | 74 | ||||
-rw-r--r-- | drivers/infiniband/core/umem.c | 20 | ||||
-rw-r--r-- | drivers/infiniband/core/user_mad.c | 151 | ||||
-rw-r--r-- | drivers/infiniband/core/uverbs.h | 1 | ||||
-rw-r--r-- | drivers/infiniband/core/uverbs_main.c | 16 |
12 files changed, 291 insertions, 111 deletions
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index c5c33d3..5381c80 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -161,8 +161,7 @@ static void addr_send_arp(struct sockaddr_in *dst_in) if (ip_route_output_key(&rt, &fl)) return; - arp_send(ARPOP_REQUEST, ETH_P_ARP, rt->rt_gateway, rt->idev->dev, - rt->rt_src, NULL, rt->idev->dev->dev_addr, NULL); + neigh_event_send(rt->u.dst.neighbour, NULL); ip_rt_put(rt); } diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 4df269f..2e39236 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -2219,6 +2219,9 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id, { struct cm_id_private *cm_id_priv; struct ib_mad_send_buf *msg; + enum ib_cm_state cm_state; + enum ib_cm_lap_state lap_state; + enum cm_msg_response msg_response; void *data; unsigned long flags; int ret; @@ -2235,48 +2238,40 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id, spin_lock_irqsave(&cm_id_priv->lock, flags); switch(cm_id_priv->id.state) { case IB_CM_REQ_RCVD: - ret = cm_alloc_msg(cm_id_priv, &msg); - if (ret) - goto error1; - - cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv, - CM_MSG_RESPONSE_REQ, service_timeout, - private_data, private_data_len); - ret = ib_post_send_mad(msg, NULL); - if (ret) - goto error2; - cm_id->state = IB_CM_MRA_REQ_SENT; + cm_state = IB_CM_MRA_REQ_SENT; + lap_state = cm_id->lap_state; + msg_response = CM_MSG_RESPONSE_REQ; break; case IB_CM_REP_RCVD: - ret = cm_alloc_msg(cm_id_priv, &msg); - if (ret) - goto error1; - - cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv, - CM_MSG_RESPONSE_REP, service_timeout, - private_data, private_data_len); - ret = ib_post_send_mad(msg, NULL); - if (ret) - goto error2; - cm_id->state = IB_CM_MRA_REP_SENT; + cm_state = IB_CM_MRA_REP_SENT; + lap_state = cm_id->lap_state; + msg_response = CM_MSG_RESPONSE_REP; break; case IB_CM_ESTABLISHED: + cm_state = cm_id->state; + lap_state = IB_CM_MRA_LAP_SENT; + msg_response = CM_MSG_RESPONSE_OTHER; + break; + default: + ret = -EINVAL; + goto error1; + } + + if (!(service_timeout & IB_CM_MRA_FLAG_DELAY)) { ret = cm_alloc_msg(cm_id_priv, &msg); if (ret) goto error1; cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv, - CM_MSG_RESPONSE_OTHER, service_timeout, + msg_response, service_timeout, private_data, private_data_len); ret = ib_post_send_mad(msg, NULL); if (ret) goto error2; - cm_id->lap_state = IB_CM_MRA_LAP_SENT; - break; - default: - ret = -EINVAL; - goto error1; } + + cm_id->state = cm_state; + cm_id->lap_state = lap_state; cm_id_priv->service_timeout = service_timeout; cm_set_private_data(cm_id_priv, data, private_data_len); spin_unlock_irqrestore(&cm_id_priv->lock, flags); diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 2e641b2..93644f8 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -52,6 +52,7 @@ MODULE_LICENSE("Dual BSD/GPL"); #define CMA_CM_RESPONSE_TIMEOUT 20 #define CMA_MAX_CM_RETRIES 15 +#define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24) static void cma_add_one(struct ib_device *device); static void cma_remove_one(struct ib_device *device); @@ -138,6 +139,7 @@ struct rdma_id_private { u32 qkey; u32 qp_num; u8 srq; + u8 tos; }; struct cma_multicast { @@ -1089,6 +1091,7 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) event.param.ud.private_data_len = IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE - offset; } else { + ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0); conn_id = cma_new_conn_id(&listen_id->id, ib_event); cma_set_req_event_data(&event, &ib_event->param.req_rcvd, ib_event->private_data, offset); @@ -1474,6 +1477,15 @@ err: } EXPORT_SYMBOL(rdma_listen); +void rdma_set_service_type(struct rdma_cm_id *id, int tos) +{ + struct rdma_id_private *id_priv; + + id_priv = container_of(id, struct rdma_id_private, id); + id_priv->tos = (u8) tos; +} +EXPORT_SYMBOL(rdma_set_service_type); + static void cma_query_handler(int status, struct ib_sa_path_rec *path_rec, void *context) { @@ -1498,23 +1510,37 @@ static void cma_query_handler(int status, struct ib_sa_path_rec *path_rec, static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms, struct cma_work *work) { - struct rdma_dev_addr *addr = &id_priv->id.route.addr.dev_addr; + struct rdma_addr *addr = &id_priv->id.route.addr; struct ib_sa_path_rec path_rec; + ib_sa_comp_mask comp_mask; + struct sockaddr_in6 *sin6; memset(&path_rec, 0, sizeof path_rec); - ib_addr_get_sgid(addr, &path_rec.sgid); - ib_addr_get_dgid(addr, &path_rec.dgid); - path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(addr)); + ib_addr_get_sgid(&addr->dev_addr, &path_rec.sgid); + ib_addr_get_dgid(&addr->dev_addr, &path_rec.dgid); + path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(&addr->dev_addr)); path_rec.numb_path = 1; path_rec.reversible = 1; + path_rec.service_id = cma_get_service_id(id_priv->id.ps, &addr->dst_addr); + + comp_mask = IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID | + IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH | + IB_SA_PATH_REC_REVERSIBLE | IB_SA_PATH_REC_SERVICE_ID; + + if (addr->src_addr.sa_family == AF_INET) { + path_rec.qos_class = cpu_to_be16((u16) id_priv->tos); + comp_mask |= IB_SA_PATH_REC_QOS_CLASS; + } else { + sin6 = (struct sockaddr_in6 *) &addr->src_addr; + path_rec.traffic_class = (u8) (be32_to_cpu(sin6->sin6_flowinfo) >> 20); + comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS; + } id_priv->query_id = ib_sa_path_rec_get(&sa_client, id_priv->id.device, - id_priv->id.port_num, &path_rec, - IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID | - IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH | - IB_SA_PATH_REC_REVERSIBLE, - timeout_ms, GFP_KERNEL, - cma_query_handler, work, &id_priv->query); + id_priv->id.port_num, &path_rec, + comp_mask, timeout_ms, + GFP_KERNEL, cma_query_handler, + work, &id_priv->query); return (id_priv->query_id < 0) ? id_priv->query_id : 0; } diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 2506c43..5ac5ffe 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -120,12 +120,12 @@ static struct ib_device *__ib_device_get_by_name(const char *name) static int alloc_name(char *name) { - long *inuse; + unsigned long *inuse; char buf[IB_DEVICE_NAME_MAX]; struct ib_device *device; int i; - inuse = (long *) get_zeroed_page(GFP_KERNEL); + inuse = (unsigned long *) get_zeroed_page(GFP_KERNEL); if (!inuse) return -ENOMEM; diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c index a06bcc6..d7f6452 100644 --- a/drivers/infiniband/core/fmr_pool.c +++ b/drivers/infiniband/core/fmr_pool.c @@ -152,7 +152,7 @@ static void ib_fmr_batch_release(struct ib_fmr_pool *pool) #ifdef DEBUG if (fmr->ref_count !=0) { - printk(KERN_WARNING PFX "Unmapping FMR 0x%08x with ref count %d", + printk(KERN_WARNING PFX "Unmapping FMR 0x%08x with ref count %d\n", fmr, fmr->ref_count); } #endif @@ -170,7 +170,7 @@ static void ib_fmr_batch_release(struct ib_fmr_pool *pool) ret = ib_unmap_fmr(&fmr_list); if (ret) - printk(KERN_WARNING PFX "ib_unmap_fmr returned %d", ret); + printk(KERN_WARNING PFX "ib_unmap_fmr returned %d\n", ret); spin_lock_irq(&pool->pool_lock); list_splice(&unmap_list, &pool->free_list); @@ -235,13 +235,13 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd, attr = kmalloc(sizeof *attr, GFP_KERNEL); if (!attr) { - printk(KERN_WARNING PFX "couldn't allocate device attr struct"); + printk(KERN_WARNING PFX "couldn't allocate device attr struct\n"); return ERR_PTR(-ENOMEM); } ret = ib_query_device(device, attr); if (ret) { - printk(KERN_WARNING PFX "couldn't query device: %d", ret); + printk(KERN_WARNING PFX "couldn't query device: %d\n", ret); kfree(attr); return ERR_PTR(ret); } @@ -255,7 +255,7 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd, pool = kmalloc(sizeof *pool, GFP_KERNEL); if (!pool) { - printk(KERN_WARNING PFX "couldn't allocate pool struct"); + printk(KERN_WARNING PFX "couldn't allocate pool struct\n"); return ERR_PTR(-ENOMEM); } @@ -272,7 +272,7 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd, kmalloc(IB_FMR_HASH_SIZE * sizeof *pool->cache_bucket, GFP_KERNEL); if (!pool->cache_bucket) { - printk(KERN_WARNING PFX "Failed to allocate cache in pool"); + printk(KERN_WARNING PFX "Failed to allocate cache in pool\n"); ret = -ENOMEM; goto out_free_pool; } @@ -296,7 +296,7 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd, "ib_fmr(%s)", device->name); if (IS_ERR(pool->thread)) { - printk(KERN_WARNING PFX "couldn't start cleanup thread"); + printk(KERN_WARNING PFX "couldn't start cleanup thread\n"); ret = PTR_ERR(pool->thread); goto out_free_pool; } @@ -314,7 +314,7 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd, GFP_KERNEL); if (!fmr) { printk(KERN_WARNING PFX "failed to allocate fmr " - "struct for FMR %d", i); + "struct for FMR %d\n", i); goto out_fail; } @@ -326,7 +326,7 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd, fmr->fmr = ib_alloc_fmr(pd, params->access, &fmr_attr); if (IS_ERR(fmr->fmr)) { printk(KERN_WARNING PFX "fmr_create failed " - "for FMR %d", i); + "for FMR %d\n", i); kfree(fmr); goto out_fail; } @@ -381,7 +381,7 @@ void ib_destroy_fmr_pool(struct ib_fmr_pool *pool) } if (i < pool->pool_size) - printk(KERN_WARNING PFX "pool still has %d regions registered", + printk(KERN_WARNING PFX "pool still has %d regions registered\n", pool->pool_size - i); kfree(pool->cache_bucket); @@ -518,7 +518,7 @@ int ib_fmr_pool_unmap(struct ib_pool_fmr *fmr) #ifdef DEBUG if (fmr->ref_count < 0) - printk(KERN_WARNING PFX "FMR %p has ref count %d < 0", + printk(KERN_WARNING PFX "FMR %p has ref count %d < 0\n", fmr, fmr->ref_count); #endif diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c index 15b4c4d..1bc1fe6 100644 --- a/drivers/infiniband/core/multicast.c +++ b/drivers/infiniband/core/multicast.c @@ -196,7 +196,7 @@ static void queue_join(struct mcast_member *member) unsigned long flags; spin_lock_irqsave(&group->lock, flags); - list_add(&member->list, &group->pending_list); + list_add_tail(&member->list, &group->pending_list); if (group->state == MCAST_IDLE) { group->state = MCAST_BUSY; atomic_inc(&group->refcount); diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index d271bd7..cf474ec 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -123,14 +123,10 @@ static u32 tid; .field_name = "sa_path_rec:" #field static const struct ib_field path_rec_table[] = { - { RESERVED, + { PATH_REC_FIELD(service_id), .offset_words = 0, .offset_bits = 0, - .size_bits = 32 }, - { RESERVED, - .offset_words = 1, - .offset_bits = 0, - .size_bits = 32 }, + .size_bits = 64 }, { PATH_REC_FIELD(dgid), .offset_words = 2, .offset_bits = 0, @@ -179,7 +175,7 @@ static const struct ib_field path_rec_table[] = { .offset_words = 12, .offset_bits = 16, .size_bits = 16 }, - { RESERVED, + { PATH_REC_FIELD(qos_class), .offset_words = 13, .offset_bits = 0, .size_bits = 12 }, @@ -531,7 +527,7 @@ static int alloc_mad(struct ib_sa_query *query, gfp_t gfp_mask) query->sm_ah->pkey_index, 0, IB_MGMT_SA_HDR, IB_MGMT_SA_DATA, gfp_mask); - if (!query->mad_buf) { + if (IS_ERR(query->mad_buf)) { kref_put(&query->sm_ah->ref, free_sm_ah); return -ENOMEM; } diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 53b4c94..90d675a 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -792,6 +792,78 @@ out: return ret; } +static int ucma_set_option_id(struct ucma_context *ctx, int optname, + void *optval, size_t optlen) +{ + int ret = 0; + + switch (optname) { + case RDMA_OPTION_ID_TOS: + if (optlen != sizeof(u8)) { + ret = -EINVAL; + break; + } + rdma_set_service_type(ctx->cm_id, *((u8 *) optval)); + break; + default: + ret = -ENOSYS; + } + + return ret; +} + +static int ucma_set_option_level(struct ucma_context *ctx, int level, + int optname, void *optval, size_t optlen) +{ + int ret; + + switch (level) { + case RDMA_OPTION_ID: + ret = ucma_set_option_id(ctx, optname, optval, optlen); + break; + default: + ret = -ENOSYS; + } + + return ret; +} + +static ssize_t ucma_set_option(struct ucma_file *file, const char __user *inbuf, + int in_len, int out_len) +{ + struct rdma_ucm_set_option cmd; + struct ucma_context *ctx; + void *optval; + int ret; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + ctx = ucma_get_ctx(file, cmd.id); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + optval = kmalloc(cmd.optlen, GFP_KERNEL); + if (!optval) { + ret = -ENOMEM; + goto out1; + } + + if (copy_from_user(optval, (void __user *) (unsigned long) cmd.optval, + cmd.optlen)) { + ret = -EFAULT; + goto out2; + } + + ret = ucma_set_option_level(ctx, cmd.level, cmd.optname, optval, + cmd.optlen); +out2: + kfree(optval); +out1: + ucma_put_ctx(ctx); + return ret; +} + static ssize_t ucma_notify(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) { @@ -936,7 +1008,7 @@ static ssize_t (*ucma_cmd_table[])(struct ucma_file *file, [RDMA_USER_CM_CMD_INIT_QP_ATTR] = ucma_init_qp_attr, [RDMA_USER_CM_CMD_GET_EVENT] = ucma_get_event, [RDMA_USER_CM_CMD_GET_OPTION] = NULL, - [RDMA_USER_CM_CMD_SET_OPTION] = NULL, + [RDMA_USER_CM_CMD_SET_OPTION] = ucma_set_option, [RDMA_USER_CM_CMD_NOTIFY] = ucma_notify, [RDMA_USER_CM_CMD_JOIN_MCAST] = ucma_join_multicast, [RDMA_USER_CM_CMD_LEAVE_MCAST] = ucma_leave_multicast, diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 664d2faa..2f54e29 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -37,6 +37,7 @@ #include <linux/mm.h> #include <linux/dma-mapping.h> #include <linux/sched.h> +#include <linux/hugetlb.h> #include "uverbs.h" @@ -75,6 +76,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, { struct ib_umem *umem; struct page **page_list; + struct vm_area_struct **vma_list; struct ib_umem_chunk *chunk; unsigned long locked; unsigned long lock_limit; @@ -104,6 +106,9 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, */ umem->writable = !!(access & ~IB_ACCESS_REMOTE_READ); + /* We assume the memory is from hugetlb until proved otherwise */ + umem->hugetlb = 1; + INIT_LIST_HEAD(&umem->chunk_list); page_list = (struct page **) __get_free_page(GFP_KERNEL); @@ -112,6 +117,14 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, return ERR_PTR(-ENOMEM); } + /* + * if we can't alloc the vma_list, it's not so bad; + * just assume the memory is not hugetlb memory + */ + vma_list = (struct vm_area_struct **) __get_free_page(GFP_KERNEL); + if (!vma_list) + umem->hugetlb = 0; + npages = PAGE_ALIGN(size + umem->offset) >> PAGE_SHIFT; down_write(¤t->mm->mmap_sem); @@ -131,7 +144,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, ret = get_user_pages(current, current->mm, cur_base, min_t(int, npages, PAGE_SIZE / sizeof (struct page *)), - 1, !umem->writable, page_list, NULL); + 1, !umem->writable, page_list, vma_list); if (ret < 0) goto out; @@ -152,6 +165,9 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, chunk->nents = min_t(int, ret, IB_UMEM_MAX_PAGE_CHUNK); for (i = 0; i < chunk->nents; ++i) { + if (vma_list && + !is_vm_hugetlb_page(vma_list[i + off])) + umem->hugetlb = 0; chunk->page_list[i].page = page_list[i + off]; chunk->page_list[i].offset = 0; chunk->page_list[i].length = PAGE_SIZE; @@ -186,6 +202,8 @@ out: current->mm->locked_vm = locked; up_write(¤t->mm->mmap_sem); + if (vma_list) + free_page((unsigned long) vma_list); free_page((unsigned long) page_list); return ret < 0 ? ERR_PTR(ret) : umem; diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index d97ded2..b53eac4 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -44,6 +44,7 @@ #include <linux/poll.h> #include <linux/rwsem.h> #include <linux/kref.h> +#include <linux/compat.h> #include <asm/uaccess.h> #include <asm/semaphore.h> @@ -118,6 +119,8 @@ struct ib_umad_file { wait_queue_head_t recv_wait; struct ib_mad_agent *agent[IB_UMAD_MAX_AGENTS]; int agents_dead; + u8 use_pkey_index; + u8 already_used; }; struct ib_umad_packet { @@ -147,6 +150,12 @@ static void ib_umad_release_dev(struct kref *ref) kfree(dev); } +static int hdr_size(struct ib_umad_file *file) +{ + return file->use_pkey_index ? sizeof (struct ib_user_mad_hdr) : + sizeof (struct ib_user_mad_hdr_old); +} + /* caller must hold port->mutex at least for reading */ static struct ib_mad_agent *__get_agent(struct ib_umad_file *file, int id) { @@ -221,13 +230,13 @@ static void recv_handler(struct ib_mad_agent *agent, packet->length = mad_recv_wc->mad_len; packet->recv_wc = mad_recv_wc; - packet->mad.hdr.status = 0; - packet->mad.hdr.length = sizeof (struct ib_user_mad) + - mad_recv_wc->mad_len; - packet->mad.hdr.qpn = cpu_to_be32(mad_recv_wc->wc->src_qp); - packet->mad.hdr.lid = cpu_to_be16(mad_recv_wc->wc->slid); - packet->mad.hdr.sl = mad_recv_wc->wc->sl; - packet->mad.hdr.path_bits = mad_recv_wc->wc->dlid_path_bits; + packet->mad.hdr.status = 0; + packet->mad.hdr.length = hdr_size(file) + mad_recv_wc->mad_len; + packet->mad.hdr.qpn = cpu_to_be32(mad_recv_wc->wc->src_qp); + packet->mad.hdr.lid = cpu_to_be16(mad_recv_wc->wc->slid); + packet->mad.hdr.sl = mad_recv_wc->wc->sl; + packet->mad.hdr.path_bits = mad_recv_wc->wc->dlid_path_bits; + packet->mad.hdr.pkey_index = mad_recv_wc->wc->pkey_index; packet->mad.hdr.grh_present = !!(mad_recv_wc->wc->wc_flags & IB_WC_GRH); if (packet->mad.hdr.grh_present) { struct ib_ah_attr ah_attr; @@ -253,8 +262,8 @@ err1: ib_free_recv_mad(mad_recv_wc); } -static ssize_t copy_recv_mad(char __user *buf, struct ib_umad_packet *packet, - size_t count) +static ssize_t copy_recv_mad(struct ib_umad_file *file, char __user *buf, + struct ib_umad_packet *packet, size_t count) { struct ib_mad_recv_buf *recv_buf; int left, seg_payload, offset, max_seg_payload; @@ -262,15 +271,15 @@ static ssize_t copy_recv_mad(char __user *buf, struct ib_umad_packet *packet, /* We need enough room to copy the first (or only) MAD segment. */ recv_buf = &packet->recv_wc->recv_buf; if ((packet->length <= sizeof (*recv_buf->mad) && - count < sizeof (packet->mad) + packet->length) || + count < hdr_size(file) + packet->length) || (packet->length > sizeof (*recv_buf->mad) && - count < sizeof (packet->mad) + sizeof (*recv_buf->mad))) + count < hdr_size(file) + sizeof (*recv_buf->mad))) return -EINVAL; - if (copy_to_user(buf, &packet->mad, sizeof (packet->mad))) + if (copy_to_user(buf, &packet->mad, hdr_size(file))) return -EFAULT; - buf += sizeof (packet->mad); + buf += hdr_size(file); seg_payload = min_t(int, packet->length, sizeof (*recv_buf->mad)); if (copy_to_user(buf, recv_buf->mad, seg_payload)) return -EFAULT; @@ -280,7 +289,7 @@ static ssize_t copy_recv_mad(char __user *buf, struct ib_umad_packet *packet, * Multipacket RMPP MAD message. Copy remainder of message. * Note that last segment may have a shorter payload. */ - if (count < sizeof (packet->mad) + packet->length) { + if (count < hdr_size(file) + packet->length) { /* * The buffer is too small, return the first RMPP segment, * which includes the RMPP message length. @@ -300,18 +309,23 @@ static ssize_t copy_recv_mad(char __user *buf, struct ib_umad_packet *packet, return -EFAULT; } } - return sizeof (packet->mad) + packet->length; + return hdr_size(file) + packet->length; } -static ssize_t copy_send_mad(char __user *buf, struct ib_umad_packet *packet, - size_t count) +static ssize_t copy_send_mad(struct ib_umad_file *file, char __user *buf, + struct ib_umad_packet *packet, size_t count) { - ssize_t size = sizeof (packet->mad) + packet->length; + ssize_t size = hdr_size(file) + packet->length; if (count < size) return -EINVAL; - if (copy_to_user(buf, &packet->mad, size)) + if (copy_to_user(buf, &packet->mad, hdr_size(file))) + return -EFAULT; + + buf += hdr_size(file); + + if (copy_to_user(buf, packet->mad.data, packet->length)) return -EFAULT; return size; @@ -324,7 +338,7 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf, struct ib_umad_packet *packet; ssize_t ret; - if (count < sizeof (struct ib_user_mad)) + if (count < hdr_size(file)) return -EINVAL; spin_lock_irq(&file->recv_lock); @@ -348,9 +362,9 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf, spin_unlock_irq(&file->recv_lock); if (packet->recv_wc) - ret = copy_recv_mad(buf, packet, count); + ret = copy_recv_mad(file, buf, packet, count); else - ret = copy_send_mad(buf, packet, count); + ret = copy_send_mad(file, buf, packet, count); if (ret < 0) { /* Requeue packet */ @@ -442,15 +456,14 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, __be64 *tid; int ret, data_len, hdr_len, copy_offset, rmpp_active; - if (count < sizeof (struct ib_user_mad) + IB_MGMT_RMPP_HDR) + if (count < hdr_size(file) + IB_MGMT_RMPP_HDR) return -EINVAL; packet = kzalloc(sizeof *packet + IB_MGMT_RMPP_HDR, GFP_KERNEL); if (!packet) return -ENOMEM; - if (copy_from_user(&packet->mad, buf, - sizeof (struct ib_user_mad) + IB_MGMT_RMPP_HDR)) { + if (copy_from_user(&packet->mad, buf, hdr_size(file))) { ret = -EFAULT; goto err; } @@ -461,6 +474,13 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, goto err; } + buf += hdr_size(file); + + if (copy_from_user(packet->mad.data, buf, IB_MGMT_RMPP_HDR)) { + ret = -EFAULT; + goto err; + } + down_read(&file->port->mutex); agent = __get_agent(file, packet->mad.hdr.id); @@ -500,11 +520,11 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, IB_MGMT_RMPP_FLAG_ACTIVE; } - data_len = count - sizeof (struct ib_user_mad) - hdr_len; + data_len = count - hdr_size(file) - hdr_len; packet->msg = ib_create_send_mad(agent, be32_to_cpu(packet->mad.hdr.qpn), - 0, rmpp_active, hdr_len, - data_len, GFP_KERNEL); + packet->mad.hdr.pkey_index, rmpp_active, + hdr_len, data_len, GFP_KERNEL); if (IS_ERR(packet->msg)) { ret = PTR_ERR(packet->msg); goto err_ah; @@ -517,7 +537,6 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, /* Copy MAD header. Any RMPP header is already in place. */ memcpy(packet->msg->mad, packet->mad.data, IB_MGMT_MAD_HDR); - buf += sizeof (struct ib_user_mad); if (!rmpp_active) { if (copy_from_user(packet->msg->mad + copy_offset, @@ -589,7 +608,8 @@ static unsigned int ib_umad_poll(struct file *filp, struct poll_table_struct *wa return mask; } -static int ib_umad_reg_agent(struct ib_umad_file *file, unsigned long arg) +static int ib_umad_reg_agent(struct ib_umad_file *file, void __user *arg, + int compat_method_mask) { struct ib_user_mad_reg_req ureq; struct ib_mad_reg_req req; @@ -604,7 +624,7 @@ static int ib_umad_reg_agent(struct ib_umad_file *file, unsigned long arg) goto out; } - if (copy_from_user(&ureq, (void __user *) arg, sizeof ureq)) { + if (copy_from_user(&ureq, arg, sizeof ureq)) { ret = -EFAULT; goto out; } @@ -625,8 +645,18 @@ found: if (ureq.mgmt_class) { req.mgmt_class = ureq.mgmt_class; req.mgmt_class_version = ureq.mgmt_class_version; - memcpy(req.method_mask, ureq.method_mask, sizeof req.method_mask); - memcpy(req.oui, ureq.oui, sizeof req.oui); + memcpy(req.oui, ureq.oui, sizeof req.oui); + + if (compat_method_mask) { + u32 *umm = (u32 *) ureq.method_mask; + int i; + + for (i = 0; i < BITS_TO_LONGS(IB_MGMT_MAX_METHODS); ++i) + req.method_mask[i] = + umm[i * 2] | ((u64) umm[i * 2 + 1] << 32); + } else + memcpy(req.method_mask, ureq.method_mask, + sizeof req.method_mask); } agent = ib_register_mad_agent(file->port->ib_dev, file->port->port_num, @@ -646,6 +676,16 @@ found: goto out; } + if (!file->already_used) { + file->already_used = 1; + if (!file->use_pkey_index) { + printk(KERN_WARNING "user_mad: process %s did not enable " + "P_Key index support.\n", current->comm); + printk(KERN_WARNING "user_mad: Documentation/infiniband/user_mad.txt " + "has info on the new ABI.\n"); + } + } + file->agent[agent_id] = agent; ret = 0; @@ -654,13 +694,13 @@ out: return ret; } -static int ib_umad_unreg_agent(struct ib_umad_file *file, unsigned long arg) +static int ib_umad_unreg_agent(struct ib_umad_file *file, u32 __user *arg) { struct ib_mad_agent *agent = NULL; u32 id; int ret = 0; - if (get_user(id, (u32 __user *) arg)) + if (get_user(id, arg)) return -EFAULT; down_write(&file->port->mutex); @@ -682,18 +722,51 @@ out: return ret; } +static long ib_umad_enable_pkey(struct ib_umad_file *file) +{ + int ret = 0; + + down_write(&file->port->mutex); + if (file->already_used) + ret = -EINVAL; + else + file->use_pkey_index = 1; + up_write(&file->port->mutex); + + return ret; +} + static long ib_umad_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { switch (cmd) { case IB_USER_MAD_REGISTER_AGENT: - return ib_umad_reg_agent(filp->private_data, arg); + return ib_umad_reg_agent(filp->private_data, (void __user *) arg, 0); + case IB_USER_MAD_UNREGISTER_AGENT: + return ib_umad_unreg_agent(filp->private_data, (__u32 __user *) arg); + case IB_USER_MAD_ENABLE_PKEY: + return ib_umad_enable_pkey(filp->private_data); + default: + return -ENOIOCTLCMD; + } +} + +#ifdef CONFIG_COMPAT +static long ib_umad_compat_ioctl(struct file *filp, unsigned int cmd, + unsigned long arg) +{ + switch (cmd) { + case IB_USER_MAD_REGISTER_AGENT: + return ib_umad_reg_agent(filp->private_data, compat_ptr(arg), 1); case IB_USER_MAD_UNREGISTER_AGENT: - return ib_umad_unreg_agent(filp->private_data, arg); + return ib_umad_unreg_agent(filp->private_data, compat_ptr(arg)); + case IB_USER_MAD_ENABLE_PKEY: + return ib_umad_enable_pkey(filp->private_data); default: return -ENOIOCTLCMD; } } +#endif static int ib_umad_open(struct inode *inode, struct file *filp) { @@ -782,7 +855,9 @@ static const struct file_operations umad_fops = { .write = ib_umad_write, .poll = ib_umad_poll, .unlocked_ioctl = ib_umad_ioctl, - .compat_ioctl = ib_umad_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = ib_umad_compat_ioctl, +#endif .open = ib_umad_open, .release = ib_umad_close }; diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index c33546f..c75eb6c 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -148,7 +148,6 @@ void idr_remove_uobj(struct idr *idp, struct ib_uobject *uobj); struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file, int is_async, int *fd); -void ib_uverbs_release_event_file(struct kref *ref); struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd); void ib_uverbs_release_ucq(struct ib_uverbs_file *file, diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 14d7ccd..7c2ac39 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -125,6 +125,14 @@ static void ib_uverbs_release_dev(struct kref *ref) complete(&dev->comp); } +static void ib_uverbs_release_event_file(struct kref *ref) +{ + struct ib_uverbs_event_file *file = + container_of(ref, struct ib_uverbs_event_file, ref); + + kfree(file); +} + void ib_uverbs_release_ucq(struct ib_uverbs_file *file, struct ib_uverbs_event_file *ev_file, struct ib_ucq_object *uobj) @@ -331,14 +339,6 @@ static unsigned int ib_uverbs_event_poll(struct file *filp, return pollflags; } -void ib_uverbs_release_event_file(struct kref *ref) -{ - struct ib_uverbs_event_file *file = - container_of(ref, struct ib_uverbs_event_file, ref); - - kfree(file); -} - static int ib_uverbs_event_fasync(int fd, struct file *filp, int on) { struct ib_uverbs_event_file *file = filp->private_data; |