From 018771f435388f22f388eb8658c652086fb3633e Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Wed, 21 Sep 2005 21:40:12 -0700 Subject: [IB] mthca: Fix doorbell record resource leak If we allocate a bunch of doorbell records and then free them, we'll end up with completely empty pages, which we then free. However, when we come back to allocate more doorbell pages, we have to reallocate those empty pages rather than always trying to take a slot that we've never used. If we don't, we eventually use up every slot and fail to allocate a doorbell record, even though we have plenty of free space. Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_memfree.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c index 1827400..d0c41fe 100644 --- a/drivers/infiniband/hw/mthca/mthca_memfree.c +++ b/drivers/infiniband/hw/mthca/mthca_memfree.c @@ -529,12 +529,25 @@ int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, __be32 **db) goto found; } + for (i = start; i != end; i += dir) + if (!dev->db_tab->page[i].db_rec) { + page = dev->db_tab->page + i; + goto alloc; + } + if (dev->db_tab->max_group1 >= dev->db_tab->min_group2 - 1) { ret = -ENOMEM; goto out; } + if (group == 0) + ++dev->db_tab->max_group1; + else + --dev->db_tab->min_group2; + page = dev->db_tab->page + end; + +alloc: page->db_rec = dma_alloc_coherent(&dev->pdev->dev, 4096, &page->mapping, GFP_KERNEL); if (!page->db_rec) { @@ -554,10 +567,6 @@ int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, __be32 **db) } bitmap_zero(page->used, MTHCA_DB_REC_PER_PAGE); - if (group == 0) - ++dev->db_tab->max_group1; - else - --dev->db_tab->min_group2; found: j = find_first_zero_bit(page->used, MTHCA_DB_REC_PER_PAGE); -- cgit v1.1 From f7ed3a5971da98acdc506bdbdef25cfe51c334a2 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 26 Sep 2005 09:29:33 -0700 Subject: [IB] mthca: fix off by one in clr_int calculation We should use the first word of the clear interrupt register if the bit we're after is < 32, not < 31. Signed-off-by: Michael S. Tsirkin Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_eq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/mthca/mthca_eq.c b/drivers/infiniband/hw/mthca/mthca_eq.c index 78152a8..c81fa8e 100644 --- a/drivers/infiniband/hw/mthca/mthca_eq.c +++ b/drivers/infiniband/hw/mthca/mthca_eq.c @@ -836,7 +836,7 @@ int __devinit mthca_init_eq_table(struct mthca_dev *dev) dev->eq_table.clr_mask = swab32(1 << (dev->eq_table.inta_pin & 31)); dev->eq_table.clr_int = dev->clr_base + - (dev->eq_table.inta_pin < 31 ? 4 : 0); + (dev->eq_table.inta_pin < 32 ? 4 : 0); } dev->eq_table.arm_mask = 0; -- cgit v1.1 From 44dd823b00fa64bf01e53557d28555011f122a88 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 26 Sep 2005 09:42:09 -0700 Subject: [IB] mthca: Fix off by one bug in mthca_map_cmd The loop in mthca_map_cmd() would fill one entry past the end of the mailbox buffer before calling the firmware command. Signed-off-by: Michael S. Tsirkin Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_cmd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c index cc758a2..f6a8ac0 100644 --- a/drivers/infiniband/hw/mthca/mthca_cmd.c +++ b/drivers/infiniband/hw/mthca/mthca_cmd.c @@ -605,7 +605,7 @@ static int mthca_map_cmd(struct mthca_dev *dev, u16 op, struct mthca_icm *icm, err = -EINVAL; goto out; } - for (i = 0; i < mthca_icm_size(&iter) / (1 << lg); ++i, ++nent) { + for (i = 0; i < mthca_icm_size(&iter) / (1 << lg); ++i) { if (virt != -1) { pages[nent * 2] = cpu_to_be64(virt); virt += 1 << lg; @@ -616,7 +616,7 @@ static int mthca_map_cmd(struct mthca_dev *dev, u16 op, struct mthca_icm *icm, ts += 1 << (lg - 10); ++tc; - if (nent == MTHCA_MAILBOX_SIZE / 16) { + if (++nent == MTHCA_MAILBOX_SIZE / 16) { err = mthca_cmd(dev, mailbox->dma, nent, 0, op, CMD_TIME_CLASS_B, status); if (err || *status) -- cgit v1.1 From 63c47c286d062d93e0501d60797274c84a587e97 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Mon, 26 Sep 2005 13:01:03 -0700 Subject: [IB] uverbs: Close some exploitable races Al Viro pointed out that the current IB userspace verbs interface allows userspace to cause mischief by closing file descriptors before we're ready, or issuing the same command twice at the same time. This patch closes those races, and fixes other obvious problems such as a module reference leak. Some other interface bogosities will require an ABI change to fix properly, so I'm deferring those fixes until 2.6.15. Signed-off-by: Roland Dreier --- drivers/infiniband/core/uverbs.h | 1 + drivers/infiniband/core/uverbs_cmd.c | 120 +++++++++++++++++++--------------- drivers/infiniband/core/uverbs_main.c | 27 +++++--- 3 files changed, 86 insertions(+), 62 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index b1897be..cc12434 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -69,6 +69,7 @@ struct ib_uverbs_event_file { struct ib_uverbs_file { struct kref ref; + struct semaphore mutex; struct ib_uverbs_device *device; struct ib_ucontext *ucontext; struct ib_event_handler event_handler; diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index e91ebde..5624451 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -76,8 +76,9 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, struct ib_uverbs_get_context_resp resp; struct ib_udata udata; struct ib_device *ibdev = file->device->ib_dev; + struct ib_ucontext *ucontext; int i; - int ret = in_len; + int ret; if (out_len < sizeof resp) return -ENOSPC; @@ -85,45 +86,56 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; + down(&file->mutex); + + if (file->ucontext) { + ret = -EINVAL; + goto err; + } + INIT_UDATA(&udata, buf + sizeof cmd, (unsigned long) cmd.response + sizeof resp, in_len - sizeof cmd, out_len - sizeof resp); - file->ucontext = ibdev->alloc_ucontext(ibdev, &udata); - if (IS_ERR(file->ucontext)) { - ret = PTR_ERR(file->ucontext); - file->ucontext = NULL; - return ret; - } + ucontext = ibdev->alloc_ucontext(ibdev, &udata); + if (IS_ERR(ucontext)) + return PTR_ERR(file->ucontext); - file->ucontext->device = ibdev; - INIT_LIST_HEAD(&file->ucontext->pd_list); - INIT_LIST_HEAD(&file->ucontext->mr_list); - INIT_LIST_HEAD(&file->ucontext->mw_list); - INIT_LIST_HEAD(&file->ucontext->cq_list); - INIT_LIST_HEAD(&file->ucontext->qp_list); - INIT_LIST_HEAD(&file->ucontext->srq_list); - INIT_LIST_HEAD(&file->ucontext->ah_list); - spin_lock_init(&file->ucontext->lock); + ucontext->device = ibdev; + INIT_LIST_HEAD(&ucontext->pd_list); + INIT_LIST_HEAD(&ucontext->mr_list); + INIT_LIST_HEAD(&ucontext->mw_list); + INIT_LIST_HEAD(&ucontext->cq_list); + INIT_LIST_HEAD(&ucontext->qp_list); + INIT_LIST_HEAD(&ucontext->srq_list); + INIT_LIST_HEAD(&ucontext->ah_list); resp.async_fd = file->async_file.fd; for (i = 0; i < file->device->num_comp; ++i) if (copy_to_user((void __user *) (unsigned long) cmd.cq_fd_tab + i * sizeof (__u32), - &file->comp_file[i].fd, sizeof (__u32))) - goto err; + &file->comp_file[i].fd, sizeof (__u32))) { + ret = -EFAULT; + goto err_free; + } if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) - goto err; + &resp, sizeof resp)) { + ret = -EFAULT; + goto err_free; + } + + file->ucontext = ucontext; + up(&file->mutex); return in_len; -err: - ibdev->dealloc_ucontext(file->ucontext); - file->ucontext = NULL; +err_free: + ibdev->dealloc_ucontext(ucontext); - return -EFAULT; +err: + up(&file->mutex); + return ret; } ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file, @@ -352,9 +364,9 @@ retry: if (ret) goto err_pd; - spin_lock_irq(&file->ucontext->lock); + down(&file->mutex); list_add_tail(&uobj->list, &file->ucontext->pd_list); - spin_unlock_irq(&file->ucontext->lock); + up(&file->mutex); memset(&resp, 0, sizeof resp); resp.pd_handle = uobj->id; @@ -368,9 +380,9 @@ retry: return in_len; err_list: - spin_lock_irq(&file->ucontext->lock); + down(&file->mutex); list_del(&uobj->list); - spin_unlock_irq(&file->ucontext->lock); + up(&file->mutex); down(&ib_uverbs_idr_mutex); idr_remove(&ib_uverbs_pd_idr, uobj->id); @@ -410,9 +422,9 @@ ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file, idr_remove(&ib_uverbs_pd_idr, cmd.pd_handle); - spin_lock_irq(&file->ucontext->lock); + down(&file->mutex); list_del(&uobj->list); - spin_unlock_irq(&file->ucontext->lock); + up(&file->mutex); kfree(uobj); @@ -512,9 +524,9 @@ retry: resp.mr_handle = obj->uobject.id; - spin_lock_irq(&file->ucontext->lock); + down(&file->mutex); list_add_tail(&obj->uobject.list, &file->ucontext->mr_list); - spin_unlock_irq(&file->ucontext->lock); + up(&file->mutex); if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) { @@ -527,9 +539,9 @@ retry: return in_len; err_list: - spin_lock_irq(&file->ucontext->lock); + down(&file->mutex); list_del(&obj->uobject.list); - spin_unlock_irq(&file->ucontext->lock); + up(&file->mutex); err_unreg: ib_dereg_mr(mr); @@ -570,9 +582,9 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file, idr_remove(&ib_uverbs_mr_idr, cmd.mr_handle); - spin_lock_irq(&file->ucontext->lock); + down(&file->mutex); list_del(&memobj->uobject.list); - spin_unlock_irq(&file->ucontext->lock); + up(&file->mutex); ib_umem_release(file->device->ib_dev, &memobj->umem); kfree(memobj); @@ -647,9 +659,9 @@ retry: if (ret) goto err_cq; - spin_lock_irq(&file->ucontext->lock); + down(&file->mutex); list_add_tail(&uobj->uobject.list, &file->ucontext->cq_list); - spin_unlock_irq(&file->ucontext->lock); + up(&file->mutex); memset(&resp, 0, sizeof resp); resp.cq_handle = uobj->uobject.id; @@ -664,9 +676,9 @@ retry: return in_len; err_list: - spin_lock_irq(&file->ucontext->lock); + down(&file->mutex); list_del(&uobj->uobject.list); - spin_unlock_irq(&file->ucontext->lock); + up(&file->mutex); down(&ib_uverbs_idr_mutex); idr_remove(&ib_uverbs_cq_idr, uobj->uobject.id); @@ -712,9 +724,9 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file, idr_remove(&ib_uverbs_cq_idr, cmd.cq_handle); - spin_lock_irq(&file->ucontext->lock); + down(&file->mutex); list_del(&uobj->uobject.list); - spin_unlock_irq(&file->ucontext->lock); + up(&file->mutex); spin_lock_irq(&file->comp_file[0].lock); list_for_each_entry_safe(evt, tmp, &uobj->comp_list, obj_list) { @@ -847,9 +859,9 @@ retry: resp.qp_handle = uobj->uobject.id; - spin_lock_irq(&file->ucontext->lock); + down(&file->mutex); list_add_tail(&uobj->uobject.list, &file->ucontext->qp_list); - spin_unlock_irq(&file->ucontext->lock); + up(&file->mutex); if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) { @@ -862,9 +874,9 @@ retry: return in_len; err_list: - spin_lock_irq(&file->ucontext->lock); + down(&file->mutex); list_del(&uobj->uobject.list); - spin_unlock_irq(&file->ucontext->lock); + up(&file->mutex); err_destroy: ib_destroy_qp(qp); @@ -989,9 +1001,9 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file, idr_remove(&ib_uverbs_qp_idr, cmd.qp_handle); - spin_lock_irq(&file->ucontext->lock); + down(&file->mutex); list_del(&uobj->uobject.list); - spin_unlock_irq(&file->ucontext->lock); + up(&file->mutex); spin_lock_irq(&file->async_file.lock); list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) { @@ -1136,9 +1148,9 @@ retry: resp.srq_handle = uobj->uobject.id; - spin_lock_irq(&file->ucontext->lock); + down(&file->mutex); list_add_tail(&uobj->uobject.list, &file->ucontext->srq_list); - spin_unlock_irq(&file->ucontext->lock); + up(&file->mutex); if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) { @@ -1151,9 +1163,9 @@ retry: return in_len; err_list: - spin_lock_irq(&file->ucontext->lock); + down(&file->mutex); list_del(&uobj->uobject.list); - spin_unlock_irq(&file->ucontext->lock); + up(&file->mutex); err_destroy: ib_destroy_srq(srq); @@ -1227,9 +1239,9 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file, idr_remove(&ib_uverbs_srq_idr, cmd.srq_handle); - spin_lock_irq(&file->ucontext->lock); + down(&file->mutex); list_del(&uobj->uobject.list); - spin_unlock_irq(&file->ucontext->lock); + up(&file->mutex); spin_lock_irq(&file->async_file.lock); list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) { diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index ce5bdb7..1251180 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -448,7 +448,9 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, if (hdr.in_words * 4 != count) return -EINVAL; - if (hdr.command < 0 || hdr.command >= ARRAY_SIZE(uverbs_cmd_table)) + if (hdr.command < 0 || + hdr.command >= ARRAY_SIZE(uverbs_cmd_table) || + !uverbs_cmd_table[hdr.command]) return -EINVAL; if (!file->ucontext && @@ -484,27 +486,29 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp) file = kmalloc(sizeof *file + (dev->num_comp - 1) * sizeof (struct ib_uverbs_event_file), GFP_KERNEL); - if (!file) - return -ENOMEM; + if (!file) { + ret = -ENOMEM; + goto err; + } file->device = dev; kref_init(&file->ref); + init_MUTEX(&file->mutex); file->ucontext = NULL; + kref_get(&file->ref); ret = ib_uverbs_event_init(&file->async_file, file); if (ret) - goto err; + goto err_kref; file->async_file.is_async = 1; - kref_get(&file->ref); - for (i = 0; i < dev->num_comp; ++i) { + kref_get(&file->ref); ret = ib_uverbs_event_init(&file->comp_file[i], file); if (ret) goto err_async; - kref_get(&file->ref); file->comp_file[i].is_async = 0; } @@ -524,9 +528,16 @@ err_async: ib_uverbs_event_release(&file->async_file); -err: +err_kref: + /* + * One extra kref_put() because we took a reference before the + * event file creation that failed and got us here. + */ + kref_put(&file->ref, ib_uverbs_release_file); kref_put(&file->ref, ib_uverbs_release_file); +err: + module_put(dev->ib_dev->owner); return ret; } -- cgit v1.1 From f02b16bea2d8411b21a531fc381e066985895387 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 26 Sep 2005 21:12:26 -0700 Subject: [IB] mthca: Round up number of slots in HCA context memory table When allocating a table for mem-free HCA context, don't assume that obj_size * nobj is an even multiple of MTHCA_TABLE_CHUNK_SIZE. In particular, make sure we allocate at least one slot even if the table is smaller than MTHCA_TABLE_CHUNK_SIZE. Signed-off-by: Michael S. Tsirkin Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_memfree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c index d0c41fe..7bd7a4b 100644 --- a/drivers/infiniband/hw/mthca/mthca_memfree.c +++ b/drivers/infiniband/hw/mthca/mthca_memfree.c @@ -290,7 +290,7 @@ struct mthca_icm_table *mthca_alloc_icm_table(struct mthca_dev *dev, int i; u8 status; - num_icm = obj_size * nobj / MTHCA_TABLE_CHUNK_SIZE; + num_icm = (obj_size * nobj + MTHCA_TABLE_CHUNK_SIZE - 1) / MTHCA_TABLE_CHUNK_SIZE; table = kmalloc(sizeof *table + num_icm * sizeof *table->icm, GFP_KERNEL); if (!table) -- cgit v1.1 From a1c337afaf4ec4d4eabc75a5e1170d03161de4e1 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Tue, 27 Sep 2005 13:54:44 -0700 Subject: [IB] mthca: fix hw_ver value returned from mthca_query_device The IB spec defines the field to be 32 bits, not 16 bits. Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_provider.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 1c1c2e2..3f5319a 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -84,7 +84,7 @@ static int mthca_query_device(struct ib_device *ibdev, props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) & 0xffffff; props->vendor_part_id = be16_to_cpup((__be16 *) (out_mad->data + 30)); - props->hw_ver = be16_to_cpup((__be16 *) (out_mad->data + 32)); + props->hw_ver = be32_to_cpup((__be32 *) (out_mad->data + 32)); memcpy(&props->sys_image_guid, out_mad->data + 4, 8); memcpy(&props->node_guid, out_mad->data + 12, 8); -- cgit v1.1