From d8b9f23b23e080d820e3c0aa5ccd7834c26ebf96 Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Tue, 9 May 2006 10:50:28 -0700 Subject: IB: Fix display of 4-bit port counters in sysfs The code to display local_link_integrity_errors and excessive_buffer_overrun_errors in /sys/class/infiniband//ports//counters/ uses the wrong shift to extract the 4 bit values. Signed-off-by: Ralph Campbell Signed-off-by: Roland Dreier --- drivers/infiniband/core/sysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 15121cb..21f9282 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -336,7 +336,7 @@ static ssize_t show_pma_counter(struct ib_port *p, struct port_attribute *attr, switch (width) { case 4: ret = sprintf(buf, "%u\n", (out_mad->data[40 + offset / 8] >> - (offset % 4)) & 0xf); + (4 - (offset % 8))) & 0xf); break; case 8: ret = sprintf(buf, "%u\n", out_mad->data[40 + offset / 8]); -- cgit v1.1 From d945e1df28ca07642b3e1a9b9d07074ba5f76be0 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Tue, 9 May 2006 10:50:28 -0700 Subject: IB/srp: Fix tracking of pending requests during error handling If a SCSI abort completes, or the command completes successfully, then the driver must remove the command from its queue of pending commands. Similarly, if a device reset succeeds, then all commands queued for the given device must be removed from the queue. Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/srp/ib_srp.c | 195 +++++++++++++++++++++--------------- drivers/infiniband/ulp/srp/ib_srp.h | 4 +- 2 files changed, 115 insertions(+), 84 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 5bb5574..c32ce43 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -409,6 +409,34 @@ static int srp_connect_target(struct srp_target_port *target) } } +static void srp_unmap_data(struct scsi_cmnd *scmnd, + struct srp_target_port *target, + struct srp_request *req) +{ + struct scatterlist *scat; + int nents; + + if (!scmnd->request_buffer || + (scmnd->sc_data_direction != DMA_TO_DEVICE && + scmnd->sc_data_direction != DMA_FROM_DEVICE)) + return; + + /* + * This handling of non-SG commands can be killed when the + * SCSI midlayer no longer generates non-SG commands. + */ + if (likely(scmnd->use_sg)) { + nents = scmnd->use_sg; + scat = scmnd->request_buffer; + } else { + nents = 1; + scat = &req->fake_sg; + } + + dma_unmap_sg(target->srp_host->dev->dma_device, scat, nents, + scmnd->sc_data_direction); +} + static int srp_reconnect_target(struct srp_target_port *target) { struct ib_cm_id *new_cm_id; @@ -455,16 +483,16 @@ static int srp_reconnect_target(struct srp_target_port *target) list_for_each_entry(req, &target->req_queue, list) { req->scmnd->result = DID_RESET << 16; req->scmnd->scsi_done(req->scmnd); + srp_unmap_data(req->scmnd, target, req); } target->rx_head = 0; target->tx_head = 0; target->tx_tail = 0; - target->req_head = 0; - for (i = 0; i < SRP_SQ_SIZE - 1; ++i) - target->req_ring[i].next = i + 1; - target->req_ring[SRP_SQ_SIZE - 1].next = -1; + INIT_LIST_HEAD(&target->free_reqs); INIT_LIST_HEAD(&target->req_queue); + for (i = 0; i < SRP_SQ_SIZE; ++i) + list_add_tail(&target->req_ring[i].list, &target->free_reqs); ret = srp_connect_target(target); if (ret) @@ -589,40 +617,10 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target, return len; } -static void srp_unmap_data(struct scsi_cmnd *scmnd, - struct srp_target_port *target, - struct srp_request *req) -{ - struct scatterlist *scat; - int nents; - - if (!scmnd->request_buffer || - (scmnd->sc_data_direction != DMA_TO_DEVICE && - scmnd->sc_data_direction != DMA_FROM_DEVICE)) - return; - - /* - * This handling of non-SG commands can be killed when the - * SCSI midlayer no longer generates non-SG commands. - */ - if (likely(scmnd->use_sg)) { - nents = scmnd->use_sg; - scat = scmnd->request_buffer; - } else { - nents = 1; - scat = &req->fake_sg; - } - - dma_unmap_sg(target->srp_host->dev->dma_device, scat, nents, - scmnd->sc_data_direction); -} - -static void srp_remove_req(struct srp_target_port *target, struct srp_request *req, - int index) +static void srp_remove_req(struct srp_target_port *target, struct srp_request *req) { - list_del(&req->list); - req->next = target->req_head; - target->req_head = index; + srp_unmap_data(req->scmnd, target, req); + list_move_tail(&req->list, &target->free_reqs); } static void srp_process_rsp(struct srp_target_port *target, struct srp_rsp *rsp) @@ -647,7 +645,7 @@ static void srp_process_rsp(struct srp_target_port *target, struct srp_rsp *rsp) req->tsk_status = rsp->data[3]; complete(&req->done); } else { - scmnd = req->scmnd; + scmnd = req->scmnd; if (!scmnd) printk(KERN_ERR "Null scmnd for RSP w/tag %016llx\n", (unsigned long long) rsp->tag); @@ -665,14 +663,11 @@ static void srp_process_rsp(struct srp_target_port *target, struct srp_rsp *rsp) else if (rsp->flags & (SRP_RSP_FLAG_DIOVER | SRP_RSP_FLAG_DIUNDER)) scmnd->resid = be32_to_cpu(rsp->data_in_res_cnt); - srp_unmap_data(scmnd, target, req); - if (!req->tsk_mgmt) { - req->scmnd = NULL; scmnd->host_scribble = (void *) -1L; scmnd->scsi_done(scmnd); - srp_remove_req(target, req, rsp->tag & ~SRP_TAG_TSK_MGMT); + srp_remove_req(target, req); } else req->cmd_done = 1; } @@ -859,7 +854,6 @@ static int srp_queuecommand(struct scsi_cmnd *scmnd, struct srp_request *req; struct srp_iu *iu; struct srp_cmd *cmd; - long req_index; int len; if (target->state == SRP_TARGET_CONNECTING) @@ -879,22 +873,20 @@ static int srp_queuecommand(struct scsi_cmnd *scmnd, dma_sync_single_for_cpu(target->srp_host->dev->dma_device, iu->dma, SRP_MAX_IU_LEN, DMA_TO_DEVICE); - req_index = target->req_head; + req = list_entry(target->free_reqs.next, struct srp_request, list); scmnd->scsi_done = done; scmnd->result = 0; - scmnd->host_scribble = (void *) req_index; + scmnd->host_scribble = (void *) (long) req->index; cmd = iu->buf; memset(cmd, 0, sizeof *cmd); cmd->opcode = SRP_CMD; cmd->lun = cpu_to_be64((u64) scmnd->device->lun << 48); - cmd->tag = req_index; + cmd->tag = req->index; memcpy(cmd->cdb, scmnd->cmnd, scmnd->cmd_len); - req = &target->req_ring[req_index]; - req->scmnd = scmnd; req->cmd = iu; req->cmd_done = 0; @@ -919,8 +911,7 @@ static int srp_queuecommand(struct scsi_cmnd *scmnd, goto err_unmap; } - target->req_head = req->next; - list_add_tail(&req->list, &target->req_queue); + list_move_tail(&req->list, &target->req_queue); return 0; @@ -1143,30 +1134,20 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) return 0; } -static int srp_send_tsk_mgmt(struct scsi_cmnd *scmnd, u8 func) +static int srp_send_tsk_mgmt(struct srp_target_port *target, + struct srp_request *req, u8 func) { - struct srp_target_port *target = host_to_target(scmnd->device->host); - struct srp_request *req; struct srp_iu *iu; struct srp_tsk_mgmt *tsk_mgmt; - int req_index; - int ret = FAILED; spin_lock_irq(target->scsi_host->host_lock); if (target->state == SRP_TARGET_DEAD || target->state == SRP_TARGET_REMOVED) { - scmnd->result = DID_BAD_TARGET << 16; + req->scmnd->result = DID_BAD_TARGET << 16; goto out; } - if (scmnd->host_scribble == (void *) -1L) - goto out; - - req_index = (long) scmnd->host_scribble; - printk(KERN_ERR "Abort for req_index %d\n", req_index); - - req = &target->req_ring[req_index]; init_completion(&req->done); iu = __srp_get_tx_iu(target); @@ -1177,10 +1158,10 @@ static int srp_send_tsk_mgmt(struct scsi_cmnd *scmnd, u8 func) memset(tsk_mgmt, 0, sizeof *tsk_mgmt); tsk_mgmt->opcode = SRP_TSK_MGMT; - tsk_mgmt->lun = cpu_to_be64((u64) scmnd->device->lun << 48); - tsk_mgmt->tag = req_index | SRP_TAG_TSK_MGMT; + tsk_mgmt->lun = cpu_to_be64((u64) req->scmnd->device->lun << 48); + tsk_mgmt->tag = req->index | SRP_TAG_TSK_MGMT; tsk_mgmt->tsk_mgmt_func = func; - tsk_mgmt->task_tag = req_index; + tsk_mgmt->task_tag = req->index; if (__srp_post_send(target, iu, sizeof *tsk_mgmt)) goto out; @@ -1188,37 +1169,85 @@ static int srp_send_tsk_mgmt(struct scsi_cmnd *scmnd, u8 func) req->tsk_mgmt = iu; spin_unlock_irq(target->scsi_host->host_lock); + if (!wait_for_completion_timeout(&req->done, msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS))) - return FAILED; - spin_lock_irq(target->scsi_host->host_lock); + return -1; - if (req->cmd_done) { - srp_remove_req(target, req, req_index); - scmnd->scsi_done(scmnd); - } else if (!req->tsk_status) { - srp_remove_req(target, req, req_index); - scmnd->result = DID_ABORT << 16; - ret = SUCCESS; - } + return 0; out: spin_unlock_irq(target->scsi_host->host_lock); - return ret; + return -1; +} + +static int srp_find_req(struct srp_target_port *target, + struct scsi_cmnd *scmnd, + struct srp_request **req) +{ + if (scmnd->host_scribble == (void *) -1L) + return -1; + + *req = &target->req_ring[(long) scmnd->host_scribble]; + + return 0; } static int srp_abort(struct scsi_cmnd *scmnd) { + struct srp_target_port *target = host_to_target(scmnd->device->host); + struct srp_request *req; + int ret = SUCCESS; + printk(KERN_ERR "SRP abort called\n"); - return srp_send_tsk_mgmt(scmnd, SRP_TSK_ABORT_TASK); + if (srp_find_req(target, scmnd, &req)) + return FAILED; + if (srp_send_tsk_mgmt(target, req, SRP_TSK_ABORT_TASK)) + return FAILED; + + spin_lock_irq(target->scsi_host->host_lock); + + if (req->cmd_done) { + srp_remove_req(target, req); + scmnd->scsi_done(scmnd); + } else if (!req->tsk_status) { + srp_remove_req(target, req); + scmnd->result = DID_ABORT << 16; + } else + ret = FAILED; + + spin_unlock_irq(target->scsi_host->host_lock); + + return ret; } static int srp_reset_device(struct scsi_cmnd *scmnd) { + struct srp_target_port *target = host_to_target(scmnd->device->host); + struct srp_request *req, *tmp; + printk(KERN_ERR "SRP reset_device called\n"); - return srp_send_tsk_mgmt(scmnd, SRP_TSK_LUN_RESET); + if (srp_find_req(target, scmnd, &req)) + return FAILED; + if (srp_send_tsk_mgmt(target, req, SRP_TSK_LUN_RESET)) + return FAILED; + if (req->tsk_status) + return FAILED; + + spin_lock_irq(target->scsi_host->host_lock); + + list_for_each_entry_safe(req, tmp, &target->req_queue, list) + if (req->scmnd->device == scmnd->device) { + req->scmnd->result = DID_RESET << 16; + scmnd->scsi_done(scmnd); + srp_remove_req(target, req); + } + + spin_unlock_irq(target->scsi_host->host_lock); + + return SUCCESS; } static int srp_reset_host(struct scsi_cmnd *scmnd) @@ -1518,10 +1547,12 @@ static ssize_t srp_create_target(struct class_device *class_dev, INIT_WORK(&target->work, srp_reconnect_work, target); - for (i = 0; i < SRP_SQ_SIZE - 1; ++i) - target->req_ring[i].next = i + 1; - target->req_ring[SRP_SQ_SIZE - 1].next = -1; + INIT_LIST_HEAD(&target->free_reqs); INIT_LIST_HEAD(&target->req_queue); + for (i = 0; i < SRP_SQ_SIZE; ++i) { + target->req_ring[i].index = i; + list_add_tail(&target->req_ring[i].list, &target->free_reqs); + } ret = srp_parse_options(buf, target); if (ret) diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h index bd7f7c3..c5cd43a 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.h +++ b/drivers/infiniband/ulp/srp/ib_srp.h @@ -101,7 +101,7 @@ struct srp_request { */ struct scatterlist fake_sg; struct completion done; - short next; + short index; u8 cmd_done; u8 tsk_status; }; @@ -133,7 +133,7 @@ struct srp_target_port { unsigned tx_tail; struct srp_iu *tx_ring[SRP_SQ_SIZE + 1]; - int req_head; + struct list_head free_reqs; struct list_head req_queue; struct srp_request req_ring[SRP_SQ_SIZE]; -- cgit v1.1 From a3285aa4eecd722508dab01c4932b11b4ba80134 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Tue, 9 May 2006 10:50:29 -0700 Subject: IB/mthca: Fix race in reference counting Fix races in in destroying various objects. If a destroy routine waits for an object to become free by doing wait_event(&obj->wait, !atomic_read(&obj->refcount)); /* now clean up and destroy the object */ and another place drops a reference to the object by doing if (atomic_dec_and_test(&obj->refcount)) wake_up(&obj->wait); then this is susceptible to a race where the wait_event() and final freeing of the object occur between the atomic_dec_and_test() and the wake_up(). And this is a use-after-free, since wake_up() will be called on part of the already-freed object. Fix this in mthca by replacing the atomic_t refcounts with plain old integers protected by a spinlock. This makes it possible to do the decrement of the reference count and the wake_up() so that it appears as a single atomic operation to the code waiting on the wait queue. While touching this code, also simplify mthca_cq_clean(): the CQ being cleaned cannot go away, because it still has a QP attached to it. So there's no reason to be paranoid and look up the CQ by number; it's perfectly safe to use the pointer that the callers already have. Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_cq.c | 41 ++++++++++++++-------------- drivers/infiniband/hw/mthca/mthca_dev.h | 2 +- drivers/infiniband/hw/mthca/mthca_provider.h | 22 ++++++++------- drivers/infiniband/hw/mthca/mthca_qp.c | 31 +++++++++++++++------ drivers/infiniband/hw/mthca/mthca_srq.c | 23 ++++++++++++---- 5 files changed, 74 insertions(+), 45 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c index 312cf90..205854e 100644 --- a/drivers/infiniband/hw/mthca/mthca_cq.c +++ b/drivers/infiniband/hw/mthca/mthca_cq.c @@ -238,9 +238,9 @@ void mthca_cq_event(struct mthca_dev *dev, u32 cqn, spin_lock(&dev->cq_table.lock); cq = mthca_array_get(&dev->cq_table.cq, cqn & (dev->limits.num_cqs - 1)); - if (cq) - atomic_inc(&cq->refcount); + ++cq->refcount; + spin_unlock(&dev->cq_table.lock); if (!cq) { @@ -254,8 +254,10 @@ void mthca_cq_event(struct mthca_dev *dev, u32 cqn, if (cq->ibcq.event_handler) cq->ibcq.event_handler(&event, cq->ibcq.cq_context); - if (atomic_dec_and_test(&cq->refcount)) + spin_lock(&dev->cq_table.lock); + if (!--cq->refcount) wake_up(&cq->wait); + spin_unlock(&dev->cq_table.lock); } static inline int is_recv_cqe(struct mthca_cqe *cqe) @@ -267,23 +269,13 @@ static inline int is_recv_cqe(struct mthca_cqe *cqe) return !(cqe->is_send & 0x80); } -void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn, +void mthca_cq_clean(struct mthca_dev *dev, struct mthca_cq *cq, u32 qpn, struct mthca_srq *srq) { - struct mthca_cq *cq; struct mthca_cqe *cqe; u32 prod_index; int nfreed = 0; - spin_lock_irq(&dev->cq_table.lock); - cq = mthca_array_get(&dev->cq_table.cq, cqn & (dev->limits.num_cqs - 1)); - if (cq) - atomic_inc(&cq->refcount); - spin_unlock_irq(&dev->cq_table.lock); - - if (!cq) - return; - spin_lock_irq(&cq->lock); /* @@ -301,7 +293,7 @@ void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn, if (0) mthca_dbg(dev, "Cleaning QPN %06x from CQN %06x; ci %d, pi %d\n", - qpn, cqn, cq->cons_index, prod_index); + qpn, cq->cqn, cq->cons_index, prod_index); /* * Now sweep backwards through the CQ, removing CQ entries @@ -325,8 +317,6 @@ void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn, } spin_unlock_irq(&cq->lock); - if (atomic_dec_and_test(&cq->refcount)) - wake_up(&cq->wait); } void mthca_cq_resize_copy_cqes(struct mthca_cq *cq) @@ -821,7 +811,7 @@ int mthca_init_cq(struct mthca_dev *dev, int nent, } spin_lock_init(&cq->lock); - atomic_set(&cq->refcount, 1); + cq->refcount = 1; init_waitqueue_head(&cq->wait); memset(cq_context, 0, sizeof *cq_context); @@ -896,6 +886,17 @@ err_out: return err; } +static inline int get_cq_refcount(struct mthca_dev *dev, struct mthca_cq *cq) +{ + int c; + + spin_lock_irq(&dev->cq_table.lock); + c = cq->refcount; + spin_unlock_irq(&dev->cq_table.lock); + + return c; +} + void mthca_free_cq(struct mthca_dev *dev, struct mthca_cq *cq) { @@ -929,6 +930,7 @@ void mthca_free_cq(struct mthca_dev *dev, spin_lock_irq(&dev->cq_table.lock); mthca_array_clear(&dev->cq_table.cq, cq->cqn & (dev->limits.num_cqs - 1)); + --cq->refcount; spin_unlock_irq(&dev->cq_table.lock); if (dev->mthca_flags & MTHCA_FLAG_MSI_X) @@ -936,8 +938,7 @@ void mthca_free_cq(struct mthca_dev *dev, else synchronize_irq(dev->pdev->irq); - atomic_dec(&cq->refcount); - wait_event(cq->wait, !atomic_read(&cq->refcount)); + wait_event(cq->wait, !get_cq_refcount(dev, cq)); if (cq->is_kernel) { mthca_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe); diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h index 4c1dcb4..f8160b8 100644 --- a/drivers/infiniband/hw/mthca/mthca_dev.h +++ b/drivers/infiniband/hw/mthca/mthca_dev.h @@ -496,7 +496,7 @@ void mthca_free_cq(struct mthca_dev *dev, void mthca_cq_completion(struct mthca_dev *dev, u32 cqn); void mthca_cq_event(struct mthca_dev *dev, u32 cqn, enum ib_event_type event_type); -void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn, +void mthca_cq_clean(struct mthca_dev *dev, struct mthca_cq *cq, u32 qpn, struct mthca_srq *srq); void mthca_cq_resize_copy_cqes(struct mthca_cq *cq); int mthca_alloc_cq_buf(struct mthca_dev *dev, struct mthca_cq_buf *buf, int nent); diff --git a/drivers/infiniband/hw/mthca/mthca_provider.h b/drivers/infiniband/hw/mthca/mthca_provider.h index 6676a78..179a8f6 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.h +++ b/drivers/infiniband/hw/mthca/mthca_provider.h @@ -139,11 +139,12 @@ struct mthca_ah { * a qp may be locked, with the send cq locked first. No other * nesting should be done. * - * Each struct mthca_cq/qp also has an atomic_t ref count. The - * pointer from the cq/qp_table to the struct counts as one reference. - * This reference also is good for access through the consumer API, so - * modifying the CQ/QP etc doesn't need to take another reference. - * Access because of a completion being polled does need a reference. + * Each struct mthca_cq/qp also has an ref count, protected by the + * corresponding table lock. The pointer from the cq/qp_table to the + * struct counts as one reference. This reference also is good for + * access through the consumer API, so modifying the CQ/QP etc doesn't + * need to take another reference. Access to a QP because of a + * completion being polled does not need a reference either. * * Finally, each struct mthca_cq/qp has a wait_queue_head_t for the * destroy function to sleep on. @@ -159,8 +160,9 @@ struct mthca_ah { * - decrement ref count; if zero, wake up waiters * * To destroy a CQ/QP, we can do the following: - * - lock cq/qp_table, remove pointer, unlock cq/qp_table lock - * - decrement ref count + * - lock cq/qp_table + * - remove pointer and decrement ref count + * - unlock cq/qp_table lock * - wait_event until ref count is zero * * It is the consumer's responsibilty to make sure that no QP @@ -197,7 +199,7 @@ struct mthca_cq_resize { struct mthca_cq { struct ib_cq ibcq; spinlock_t lock; - atomic_t refcount; + int refcount; int cqn; u32 cons_index; struct mthca_cq_buf buf; @@ -217,7 +219,7 @@ struct mthca_cq { struct mthca_srq { struct ib_srq ibsrq; spinlock_t lock; - atomic_t refcount; + int refcount; int srqn; int max; int max_gs; @@ -254,7 +256,7 @@ struct mthca_wq { struct mthca_qp { struct ib_qp ibqp; - atomic_t refcount; + int refcount; u32 qpn; int is_direct; u8 port; /* for SQP and memfree use only */ diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index f37b0e3..19765f6 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -240,7 +240,7 @@ void mthca_qp_event(struct mthca_dev *dev, u32 qpn, spin_lock(&dev->qp_table.lock); qp = mthca_array_get(&dev->qp_table.qp, qpn & (dev->limits.num_qps - 1)); if (qp) - atomic_inc(&qp->refcount); + ++qp->refcount; spin_unlock(&dev->qp_table.lock); if (!qp) { @@ -257,8 +257,10 @@ void mthca_qp_event(struct mthca_dev *dev, u32 qpn, if (qp->ibqp.event_handler) qp->ibqp.event_handler(&event, qp->ibqp.qp_context); - if (atomic_dec_and_test(&qp->refcount)) + spin_lock(&dev->qp_table.lock); + if (!--qp->refcount) wake_up(&qp->wait); + spin_unlock(&dev->qp_table.lock); } static int to_mthca_state(enum ib_qp_state ib_state) @@ -833,10 +835,10 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) * entries and reinitialize the QP. */ if (new_state == IB_QPS_RESET && !qp->ibqp.uobject) { - mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq)->cqn, qp->qpn, + mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq), qp->qpn, qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL); if (qp->ibqp.send_cq != qp->ibqp.recv_cq) - mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq)->cqn, qp->qpn, + mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq), qp->qpn, qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL); mthca_wq_init(&qp->sq); @@ -1096,7 +1098,7 @@ static int mthca_alloc_qp_common(struct mthca_dev *dev, int ret; int i; - atomic_set(&qp->refcount, 1); + qp->refcount = 1; init_waitqueue_head(&qp->wait); qp->state = IB_QPS_RESET; qp->atomic_rd_en = 0; @@ -1318,6 +1320,17 @@ int mthca_alloc_sqp(struct mthca_dev *dev, return err; } +static inline int get_qp_refcount(struct mthca_dev *dev, struct mthca_qp *qp) +{ + int c; + + spin_lock_irq(&dev->qp_table.lock); + c = qp->refcount; + spin_unlock_irq(&dev->qp_table.lock); + + return c; +} + void mthca_free_qp(struct mthca_dev *dev, struct mthca_qp *qp) { @@ -1339,14 +1352,14 @@ void mthca_free_qp(struct mthca_dev *dev, spin_lock(&dev->qp_table.lock); mthca_array_clear(&dev->qp_table.qp, qp->qpn & (dev->limits.num_qps - 1)); + --qp->refcount; spin_unlock(&dev->qp_table.lock); if (send_cq != recv_cq) spin_unlock(&recv_cq->lock); spin_unlock_irq(&send_cq->lock); - atomic_dec(&qp->refcount); - wait_event(qp->wait, !atomic_read(&qp->refcount)); + wait_event(qp->wait, !get_qp_refcount(dev, qp)); if (qp->state != IB_QPS_RESET) mthca_MODIFY_QP(dev, qp->state, IB_QPS_RESET, qp->qpn, 0, @@ -1358,10 +1371,10 @@ void mthca_free_qp(struct mthca_dev *dev, * unref the mem-free tables and free the QPN in our table. */ if (!qp->ibqp.uobject) { - mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq)->cqn, qp->qpn, + mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq), qp->qpn, qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL); if (qp->ibqp.send_cq != qp->ibqp.recv_cq) - mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq)->cqn, qp->qpn, + mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq), qp->qpn, qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL); mthca_free_memfree(dev, qp); diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c index adcaf85..1ea4332 100644 --- a/drivers/infiniband/hw/mthca/mthca_srq.c +++ b/drivers/infiniband/hw/mthca/mthca_srq.c @@ -241,7 +241,7 @@ int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd, goto err_out_mailbox; spin_lock_init(&srq->lock); - atomic_set(&srq->refcount, 1); + srq->refcount = 1; init_waitqueue_head(&srq->wait); if (mthca_is_memfree(dev)) @@ -308,6 +308,17 @@ err_out: return err; } +static inline int get_srq_refcount(struct mthca_dev *dev, struct mthca_srq *srq) +{ + int c; + + spin_lock_irq(&dev->srq_table.lock); + c = srq->refcount; + spin_unlock_irq(&dev->srq_table.lock); + + return c; +} + void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq) { struct mthca_mailbox *mailbox; @@ -329,10 +340,10 @@ void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq) spin_lock_irq(&dev->srq_table.lock); mthca_array_clear(&dev->srq_table.srq, srq->srqn & (dev->limits.num_srqs - 1)); + --srq->refcount; spin_unlock_irq(&dev->srq_table.lock); - atomic_dec(&srq->refcount); - wait_event(srq->wait, !atomic_read(&srq->refcount)); + wait_event(srq->wait, !get_srq_refcount(dev, srq)); if (!srq->ibsrq.uobject) { mthca_free_srq_buf(dev, srq); @@ -414,7 +425,7 @@ void mthca_srq_event(struct mthca_dev *dev, u32 srqn, spin_lock(&dev->srq_table.lock); srq = mthca_array_get(&dev->srq_table.srq, srqn & (dev->limits.num_srqs - 1)); if (srq) - atomic_inc(&srq->refcount); + ++srq->refcount; spin_unlock(&dev->srq_table.lock); if (!srq) { @@ -431,8 +442,10 @@ void mthca_srq_event(struct mthca_dev *dev, u32 srqn, srq->ibsrq.event_handler(&event, srq->ibsrq.srq_context); out: - if (atomic_dec_and_test(&srq->refcount)) + spin_lock(&dev->srq_table.lock); + if (!--srq->refcount) wake_up(&srq->wait); + spin_unlock(&dev->srq_table.lock); } /* -- cgit v1.1 From 5941d079f2c3bdf0dffed1afb8941678fcd0bcb7 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Tue, 9 May 2006 22:54:59 -0700 Subject: IPoIB: Free child interfaces properly When deleting a child interface with a non-default P_Key via /sys/class/net/ibX/delete_child, the interface must be freed with free_netdev() (rather than kfree() on the private data). Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib_vlan.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c index 4ca1755..f887780 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c @@ -158,10 +158,8 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey) if (priv->pkey == pkey) { unregister_netdev(priv->dev); ipoib_dev_cleanup(priv->dev); - list_del(&priv->list); - - kfree(priv); + free_netdev(priv->dev); ret = 0; break; -- cgit v1.1 From ce477ae4f8c75c94587c3157deffad8219db09a0 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 10 May 2006 17:58:41 +0300 Subject: IB/mthca: FMR ioremap fix Addresses for ioremap must be calculated off of pci_resource_start; we can't directly use the bus address as seen by the HCA. Fix the code that remaps device memory for FMR access. Based on patch by Klaus Smolin. Signed-off-by: Michael S. Tsirkin Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_mr.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mthca/mthca_mr.c b/drivers/infiniband/hw/mthca/mthca_mr.c index 25e1c1d..a486dec 100644 --- a/drivers/infiniband/hw/mthca/mthca_mr.c +++ b/drivers/infiniband/hw/mthca/mthca_mr.c @@ -761,6 +761,7 @@ void mthca_arbel_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr) int __devinit mthca_init_mr_table(struct mthca_dev *dev) { + unsigned long addr; int err, i; err = mthca_alloc_init(&dev->mr_table.mpt_alloc, @@ -796,9 +797,12 @@ int __devinit mthca_init_mr_table(struct mthca_dev *dev) goto err_fmr_mpt; } + addr = pci_resource_start(dev->pdev, 4) + + ((pci_resource_len(dev->pdev, 4) - 1) & + dev->mr_table.mpt_base); + dev->mr_table.tavor_fmr.mpt_base = - ioremap(dev->mr_table.mpt_base, - (1 << i) * sizeof (struct mthca_mpt_entry)); + ioremap(addr, (1 << i) * sizeof(struct mthca_mpt_entry)); if (!dev->mr_table.tavor_fmr.mpt_base) { mthca_warn(dev, "MPT ioremap for FMR failed.\n"); @@ -806,9 +810,12 @@ int __devinit mthca_init_mr_table(struct mthca_dev *dev) goto err_fmr_mpt; } + addr = pci_resource_start(dev->pdev, 4) + + ((pci_resource_len(dev->pdev, 4) - 1) & + dev->mr_table.mtt_base); + dev->mr_table.tavor_fmr.mtt_base = - ioremap(dev->mr_table.mtt_base, - (1 << i) * MTHCA_MTT_SEG_SIZE); + ioremap(addr, (1 << i) * MTHCA_MTT_SEG_SIZE); if (!dev->mr_table.tavor_fmr.mtt_base) { mthca_warn(dev, "MTT ioremap for FMR failed.\n"); err = -ENOMEM; -- cgit v1.1 From 6f4bb3d8205d943acafa2f536f37131777524b67 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Fri, 12 May 2006 14:57:52 -0700 Subject: IB/ipath: Properly terminate PCI ID table The ipath driver's table of PCI IDs needs a { 0, } entry at the end. This makes all of the device aliases visible to userspace so hotplug loads the module for all supported devices. Without the patch, modinfo ipath_core only shows: alias: pci:v00001FC1d0000000Dsv*sd*bc*sc*i* instead of the correct: alias: pci:v00001FC1d00000010sv*sd*bc*sc*i* alias: pci:v00001FC1d0000000Dsv*sd*bc*sc*i* Signed-off-by: Roland Dreier Signed-off-by: Bryan O'Sullivan --- drivers/infiniband/hw/ipath/ipath_driver.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c index 398add4..3697eda 100644 --- a/drivers/infiniband/hw/ipath/ipath_driver.c +++ b/drivers/infiniband/hw/ipath/ipath_driver.c @@ -116,10 +116,9 @@ static int __devinit ipath_init_one(struct pci_dev *, #define PCI_DEVICE_ID_INFINIPATH_PE800 0x10 static const struct pci_device_id ipath_pci_tbl[] = { - {PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE, - PCI_DEVICE_ID_INFINIPATH_HT)}, - {PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE, - PCI_DEVICE_ID_INFINIPATH_PE800)}, + { PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE, PCI_DEVICE_ID_INFINIPATH_HT) }, + { PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE, PCI_DEVICE_ID_INFINIPATH_PE800) }, + { 0, } }; MODULE_DEVICE_TABLE(pci, ipath_pci_tbl); -- cgit v1.1 From 1b52fa98edd1c3e663ea4a06519e3d20976084a8 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Fri, 12 May 2006 14:57:52 -0700 Subject: IB: refcount race fixes Fix race condition during destruction calls to avoid possibility of accessing object after it has been freed. Instead of waking up a wait queue directly, which is susceptible to a race where the object is freed between the reference count going to 0 and the wake_up(), use a completion to wait in the function doing the freeing. Signed-off-by: Sean Hefty Signed-off-by: Roland Dreier --- drivers/infiniband/core/cm.c | 12 ++++++---- drivers/infiniband/core/mad.c | 47 ++++++++++++++++++++------------------ drivers/infiniband/core/mad_priv.h | 5 ++-- drivers/infiniband/core/mad_rmpp.c | 20 ++++++++-------- drivers/infiniband/core/ucm.c | 12 ++++++---- 5 files changed, 52 insertions(+), 44 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 7cfedb8..86fee43 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -34,6 +34,8 @@ * * $Id: cm.c 2821 2005-07-08 17:07:28Z sean.hefty $ */ + +#include #include #include #include @@ -122,7 +124,7 @@ struct cm_id_private { struct rb_node service_node; struct rb_node sidr_id_node; spinlock_t lock; /* Do not acquire inside cm.lock */ - wait_queue_head_t wait; + struct completion comp; atomic_t refcount; struct ib_mad_send_buf *msg; @@ -159,7 +161,7 @@ static void cm_work_handler(void *data); static inline void cm_deref_id(struct cm_id_private *cm_id_priv) { if (atomic_dec_and_test(&cm_id_priv->refcount)) - wake_up(&cm_id_priv->wait); + complete(&cm_id_priv->comp); } static int cm_alloc_msg(struct cm_id_private *cm_id_priv, @@ -559,7 +561,7 @@ struct ib_cm_id *ib_create_cm_id(struct ib_device *device, goto error; spin_lock_init(&cm_id_priv->lock); - init_waitqueue_head(&cm_id_priv->wait); + init_completion(&cm_id_priv->comp); INIT_LIST_HEAD(&cm_id_priv->work_list); atomic_set(&cm_id_priv->work_count, -1); atomic_set(&cm_id_priv->refcount, 1); @@ -724,8 +726,8 @@ retest: } cm_free_id(cm_id->local_id); - atomic_dec(&cm_id_priv->refcount); - wait_event(cm_id_priv->wait, !atomic_read(&cm_id_priv->refcount)); + cm_deref_id(cm_id_priv); + wait_for_completion(&cm_id_priv->comp); while ((work = cm_dequeue_work(cm_id_priv)) != NULL) cm_free_work(work); if (cm_id_priv->private_data && cm_id_priv->private_data_len) diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 469b692..5ad41a6 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -352,7 +352,7 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, INIT_WORK(&mad_agent_priv->local_work, local_completions, mad_agent_priv); atomic_set(&mad_agent_priv->refcount, 1); - init_waitqueue_head(&mad_agent_priv->wait); + init_completion(&mad_agent_priv->comp); return &mad_agent_priv->agent; @@ -467,7 +467,7 @@ struct ib_mad_agent *ib_register_mad_snoop(struct ib_device *device, mad_snoop_priv->agent.qp = port_priv->qp_info[qpn].qp; mad_snoop_priv->agent.port_num = port_num; mad_snoop_priv->mad_snoop_flags = mad_snoop_flags; - init_waitqueue_head(&mad_snoop_priv->wait); + init_completion(&mad_snoop_priv->comp); mad_snoop_priv->snoop_index = register_snoop_agent( &port_priv->qp_info[qpn], mad_snoop_priv); @@ -486,6 +486,18 @@ error1: } EXPORT_SYMBOL(ib_register_mad_snoop); +static inline void deref_mad_agent(struct ib_mad_agent_private *mad_agent_priv) +{ + if (atomic_dec_and_test(&mad_agent_priv->refcount)) + complete(&mad_agent_priv->comp); +} + +static inline void deref_snoop_agent(struct ib_mad_snoop_private *mad_snoop_priv) +{ + if (atomic_dec_and_test(&mad_snoop_priv->refcount)) + complete(&mad_snoop_priv->comp); +} + static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv) { struct ib_mad_port_private *port_priv; @@ -509,9 +521,8 @@ static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv) flush_workqueue(port_priv->wq); ib_cancel_rmpp_recvs(mad_agent_priv); - atomic_dec(&mad_agent_priv->refcount); - wait_event(mad_agent_priv->wait, - !atomic_read(&mad_agent_priv->refcount)); + deref_mad_agent(mad_agent_priv); + wait_for_completion(&mad_agent_priv->comp); kfree(mad_agent_priv->reg_req); ib_dereg_mr(mad_agent_priv->agent.mr); @@ -529,9 +540,8 @@ static void unregister_mad_snoop(struct ib_mad_snoop_private *mad_snoop_priv) atomic_dec(&qp_info->snoop_count); spin_unlock_irqrestore(&qp_info->snoop_lock, flags); - atomic_dec(&mad_snoop_priv->refcount); - wait_event(mad_snoop_priv->wait, - !atomic_read(&mad_snoop_priv->refcount)); + deref_snoop_agent(mad_snoop_priv); + wait_for_completion(&mad_snoop_priv->comp); kfree(mad_snoop_priv); } @@ -600,8 +610,7 @@ static void snoop_send(struct ib_mad_qp_info *qp_info, spin_unlock_irqrestore(&qp_info->snoop_lock, flags); mad_snoop_priv->agent.snoop_handler(&mad_snoop_priv->agent, send_buf, mad_send_wc); - if (atomic_dec_and_test(&mad_snoop_priv->refcount)) - wake_up(&mad_snoop_priv->wait); + deref_snoop_agent(mad_snoop_priv); spin_lock_irqsave(&qp_info->snoop_lock, flags); } spin_unlock_irqrestore(&qp_info->snoop_lock, flags); @@ -626,8 +635,7 @@ static void snoop_recv(struct ib_mad_qp_info *qp_info, spin_unlock_irqrestore(&qp_info->snoop_lock, flags); mad_snoop_priv->agent.recv_handler(&mad_snoop_priv->agent, mad_recv_wc); - if (atomic_dec_and_test(&mad_snoop_priv->refcount)) - wake_up(&mad_snoop_priv->wait); + deref_snoop_agent(mad_snoop_priv); spin_lock_irqsave(&qp_info->snoop_lock, flags); } spin_unlock_irqrestore(&qp_info->snoop_lock, flags); @@ -968,8 +976,7 @@ void ib_free_send_mad(struct ib_mad_send_buf *send_buf) free_send_rmpp_list(mad_send_wr); kfree(send_buf->mad); - if (atomic_dec_and_test(&mad_agent_priv->refcount)) - wake_up(&mad_agent_priv->wait); + deref_mad_agent(mad_agent_priv); } EXPORT_SYMBOL(ib_free_send_mad); @@ -1757,8 +1764,7 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv, mad_recv_wc = ib_process_rmpp_recv_wc(mad_agent_priv, mad_recv_wc); if (!mad_recv_wc) { - if (atomic_dec_and_test(&mad_agent_priv->refcount)) - wake_up(&mad_agent_priv->wait); + deref_mad_agent(mad_agent_priv); return; } } @@ -1770,8 +1776,7 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv, if (!mad_send_wr) { spin_unlock_irqrestore(&mad_agent_priv->lock, flags); ib_free_recv_mad(mad_recv_wc); - if (atomic_dec_and_test(&mad_agent_priv->refcount)) - wake_up(&mad_agent_priv->wait); + deref_mad_agent(mad_agent_priv); return; } ib_mark_mad_done(mad_send_wr); @@ -1790,8 +1795,7 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv, } else { mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent, mad_recv_wc); - if (atomic_dec_and_test(&mad_agent_priv->refcount)) - wake_up(&mad_agent_priv->wait); + deref_mad_agent(mad_agent_priv); } } @@ -2021,8 +2025,7 @@ void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr, mad_send_wc); /* Release reference on agent taken when sending */ - if (atomic_dec_and_test(&mad_agent_priv->refcount)) - wake_up(&mad_agent_priv->wait); + deref_mad_agent(mad_agent_priv); return; done: spin_unlock_irqrestore(&mad_agent_priv->lock, flags); diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h index 6c9c133..b4fa28d 100644 --- a/drivers/infiniband/core/mad_priv.h +++ b/drivers/infiniband/core/mad_priv.h @@ -37,6 +37,7 @@ #ifndef __IB_MAD_PRIV_H__ #define __IB_MAD_PRIV_H__ +#include #include #include #include @@ -108,7 +109,7 @@ struct ib_mad_agent_private { struct list_head rmpp_list; atomic_t refcount; - wait_queue_head_t wait; + struct completion comp; }; struct ib_mad_snoop_private { @@ -117,7 +118,7 @@ struct ib_mad_snoop_private { int snoop_index; int mad_snoop_flags; atomic_t refcount; - wait_queue_head_t wait; + struct completion comp; }; struct ib_mad_send_wr_private { diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c index dfd4e58..d4704e0 100644 --- a/drivers/infiniband/core/mad_rmpp.c +++ b/drivers/infiniband/core/mad_rmpp.c @@ -49,7 +49,7 @@ struct mad_rmpp_recv { struct list_head list; struct work_struct timeout_work; struct work_struct cleanup_work; - wait_queue_head_t wait; + struct completion comp; enum rmpp_state state; spinlock_t lock; atomic_t refcount; @@ -69,10 +69,16 @@ struct mad_rmpp_recv { u8 method; }; +static inline void deref_rmpp_recv(struct mad_rmpp_recv *rmpp_recv) +{ + if (atomic_dec_and_test(&rmpp_recv->refcount)) + complete(&rmpp_recv->comp); +} + static void destroy_rmpp_recv(struct mad_rmpp_recv *rmpp_recv) { - atomic_dec(&rmpp_recv->refcount); - wait_event(rmpp_recv->wait, !atomic_read(&rmpp_recv->refcount)); + deref_rmpp_recv(rmpp_recv); + wait_for_completion(&rmpp_recv->comp); ib_destroy_ah(rmpp_recv->ah); kfree(rmpp_recv); } @@ -253,7 +259,7 @@ create_rmpp_recv(struct ib_mad_agent_private *agent, goto error; rmpp_recv->agent = agent; - init_waitqueue_head(&rmpp_recv->wait); + init_completion(&rmpp_recv->comp); INIT_WORK(&rmpp_recv->timeout_work, recv_timeout_handler, rmpp_recv); INIT_WORK(&rmpp_recv->cleanup_work, recv_cleanup_handler, rmpp_recv); spin_lock_init(&rmpp_recv->lock); @@ -279,12 +285,6 @@ error: kfree(rmpp_recv); return NULL; } -static inline void deref_rmpp_recv(struct mad_rmpp_recv *rmpp_recv) -{ - if (atomic_dec_and_test(&rmpp_recv->refcount)) - wake_up(&rmpp_recv->wait); -} - static struct mad_rmpp_recv * find_rmpp_recv(struct ib_mad_agent_private *agent, struct ib_mad_recv_wc *mad_recv_wc) diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c index f6a0596..9164a09 100644 --- a/drivers/infiniband/core/ucm.c +++ b/drivers/infiniband/core/ucm.c @@ -32,6 +32,8 @@ * * $Id: ucm.c 2594 2005-06-13 19:46:02Z libor $ */ + +#include #include #include #include @@ -72,7 +74,7 @@ struct ib_ucm_file { struct ib_ucm_context { int id; - wait_queue_head_t wait; + struct completion comp; atomic_t ref; int events_reported; @@ -138,7 +140,7 @@ static struct ib_ucm_context *ib_ucm_ctx_get(struct ib_ucm_file *file, int id) static void ib_ucm_ctx_put(struct ib_ucm_context *ctx) { if (atomic_dec_and_test(&ctx->ref)) - wake_up(&ctx->wait); + complete(&ctx->comp); } static inline int ib_ucm_new_cm_id(int event) @@ -178,7 +180,7 @@ static struct ib_ucm_context *ib_ucm_ctx_alloc(struct ib_ucm_file *file) return NULL; atomic_set(&ctx->ref, 1); - init_waitqueue_head(&ctx->wait); + init_completion(&ctx->comp); ctx->file = file; INIT_LIST_HEAD(&ctx->events); @@ -586,8 +588,8 @@ static ssize_t ib_ucm_destroy_id(struct ib_ucm_file *file, if (IS_ERR(ctx)) return PTR_ERR(ctx); - atomic_dec(&ctx->ref); - wait_event(ctx->wait, !atomic_read(&ctx->ref)); + ib_ucm_ctx_put(ctx); + wait_for_completion(&ctx->comp); /* No new events will be generated after destroying the cm_id. */ ib_destroy_cm_id(ctx->cm_id); -- cgit v1.1 From 1db76c14d215c8b26024dd532de3dcaf66ea30f7 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Wed, 17 May 2006 07:48:07 -0700 Subject: IB/mthca: Make fw_cmd_doorbell default to 0 Setting fw_cmd_doorbell allows FW command to be queued using posted writes instead of requiring polling on a "go" bit, so it should be a performance boost. However, the option causes problems with at least some device/firmware combinations, so set the default to 0 until we understand what's going on better. Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_cmd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c index 1985b5d..798e13e 100644 --- a/drivers/infiniband/hw/mthca/mthca_cmd.c +++ b/drivers/infiniband/hw/mthca/mthca_cmd.c @@ -182,7 +182,7 @@ struct mthca_cmd_context { u8 status; }; -static int fw_cmd_doorbell = 1; +static int fw_cmd_doorbell = 0; module_param(fw_cmd_doorbell, int, 0644); MODULE_PARM_DESC(fw_cmd_doorbell, "post FW commands through doorbell page if nonzero " "(and supported by FW)"); -- cgit v1.1 From e65810566f3e613d9baa5512b8724ebde42ace0f Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Wed, 17 May 2006 09:13:21 -0700 Subject: IB/srp: Don't wait for disconnection if sending DREQ fails Sending a DREQ may fail, for example because the remote target has already broken the connection. If so, then SRP should not wait for the disconnection to complete, because it never will. Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/srp/ib_srp.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index c32ce43..351d66f 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -340,7 +340,10 @@ static void srp_disconnect_target(struct srp_target_port *target) /* XXX should send SRP_I_LOGOUT request */ init_completion(&target->done); - ib_send_cm_dreq(target->cm_id, NULL, 0); + if (ib_send_cm_dreq(target->cm_id, NULL, 0)) { + printk(KERN_DEBUG PFX "Sending CM DREQ failed\n"); + return; + } wait_for_completion(&target->done); } -- cgit v1.1 From ec2d7208494fe599a5ff13b40a0a20c9881f2737 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Wed, 17 May 2006 09:16:03 -0700 Subject: IB/srp: Get rid of extra scsi_host_put()s if reconnection fails If a reconnection attempt fails, then SRP does two scsi_host_put()s. This is a historical relic from an earlier version of the driver that took a reference on the scsi_host before trying to reconnect, so get rid of the extra scsi_host_put(). Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/srp/ib_srp.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 351d66f..0f24f04 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -354,7 +354,6 @@ static void srp_remove_work(void *target_ptr) spin_lock_irq(target->scsi_host->host_lock); if (target->state != SRP_TARGET_DEAD) { spin_unlock_irq(target->scsi_host->host_lock); - scsi_host_put(target->scsi_host); return; } target->state = SRP_TARGET_REMOVED; @@ -368,8 +367,6 @@ static void srp_remove_work(void *target_ptr) ib_destroy_cm_id(target->cm_id); srp_free_target_ib(target); scsi_host_put(target->scsi_host); - /* And another put to really free the target port... */ - scsi_host_put(target->scsi_host); } static int srp_connect_target(struct srp_target_port *target) -- cgit v1.1 From 093beac189e4295d968f0d38787b46f76cb0eaaa Mon Sep 17 00:00:00 2001 From: Ishai Rabinovitz Date: Wed, 17 May 2006 09:20:48 -0700 Subject: IB/srp: Complete correct SCSI commands on device reset When flushing out queued commands after a successful device reset, make sure that SRP completes the right commands, instead of calling scsi_done on the command passed into the device reset handler over and over. Signed-off-by: Ishai Rabinovitz Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/srp/ib_srp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 0f24f04..9cbdffa 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -1241,7 +1241,7 @@ static int srp_reset_device(struct scsi_cmnd *scmnd) list_for_each_entry_safe(req, tmp, &target->req_queue, list) if (req->scmnd->device == scmnd->device) { req->scmnd->result = DID_RESET << 16; - scmnd->scsi_done(scmnd); + req->scmnd->scsi_done(req->scmnd); srp_remove_req(target, req); } -- cgit v1.1 From 0cb4fe8d2658dc0bd1accfbb74ee288a9d6788f4 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Wed, 17 May 2006 22:20:50 -0700 Subject: IB/uverbs: Don't leak ref to mm on error path In ib_umem_release_on_close(), if the kmalloc() fails, then a reference to current->mm will be leaked. Fix this by adding a mmput() instead of just returning on kmalloc() failure. Signed-off-by: Roland Dreier --- drivers/infiniband/core/uverbs_mem.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/uverbs_mem.c b/drivers/infiniband/core/uverbs_mem.c index 36a32c3..efe147d 100644 --- a/drivers/infiniband/core/uverbs_mem.c +++ b/drivers/infiniband/core/uverbs_mem.c @@ -211,8 +211,10 @@ void ib_umem_release_on_close(struct ib_device *dev, struct ib_umem *umem) */ work = kmalloc(sizeof *work, GFP_KERNEL); - if (!work) + if (!work) { + mmput(mm); return; + } INIT_WORK(&work->work, ib_umem_account, work); work->mm = mm; -- cgit v1.1 From 23f3bc0f2c1e26215b671499c07047c325d54d9c Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 18 May 2006 18:32:54 +0300 Subject: IB/mthca: Fix posting lists of 256 receive requests for Tavor If we post a list of length 256 exactly, nreq in doorbell gets set to 256 which is wrong: it should be encoded by 0. This is because we only zero it out on the next WR, which may not be there. The solution is to ring the doorbell after posting a WQE, not before posting the next one. Signed-off-by: Michael S. Tsirkin Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_qp.c | 35 +++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index 19765f6..07c13be 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -1727,23 +1727,7 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, ind = qp->rq.next_ind; - for (nreq = 0; wr; ++nreq, wr = wr->next) { - if (unlikely(nreq == MTHCA_TAVOR_MAX_WQES_PER_RECV_DB)) { - nreq = 0; - - doorbell[0] = cpu_to_be32((qp->rq.next_ind << qp->rq.wqe_shift) | size0); - doorbell[1] = cpu_to_be32(qp->qpn << 8); - - wmb(); - - mthca_write64(doorbell, - dev->kar + MTHCA_RECEIVE_DOORBELL, - MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); - - qp->rq.head += MTHCA_TAVOR_MAX_WQES_PER_RECV_DB; - size0 = 0; - } - + for (nreq = 0; wr; wr = wr->next) { if (mthca_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) { mthca_err(dev, "RQ %06x full (%u head, %u tail," " %d max, %d nreq)\n", qp->qpn, @@ -1797,6 +1781,23 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, ++ind; if (unlikely(ind >= qp->rq.max)) ind -= qp->rq.max; + + ++nreq; + if (unlikely(nreq == MTHCA_TAVOR_MAX_WQES_PER_RECV_DB)) { + nreq = 0; + + doorbell[0] = cpu_to_be32((qp->rq.next_ind << qp->rq.wqe_shift) | size0); + doorbell[1] = cpu_to_be32(qp->qpn << 8); + + wmb(); + + mthca_write64(doorbell, + dev->kar + MTHCA_RECEIVE_DOORBELL, + MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); + + qp->rq.head += MTHCA_TAVOR_MAX_WQES_PER_RECV_DB; + size0 = 0; + } } out: -- cgit v1.1