From eeb8461e36c99fdf2d058751be924a2aab215005 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Tue, 28 Jan 2014 13:40:15 +0200 Subject: IB: Refactor umem to use linear SG table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch refactors the IB core umem code and vendor drivers to use a linear (chained) SG table instead of chunk list. With this change the relevant code becomes clearer—no need for nested loops to build and use umem. Signed-off-by: Shachar Raindel Signed-off-by: Yishai Hadas Signed-off-by: Roland Dreier --- drivers/infiniband/hw/amso1100/c2_provider.c | 23 +-- drivers/infiniband/hw/cxgb3/iwch_provider.c | 19 +- drivers/infiniband/hw/cxgb4/mem.c | 39 ++--- drivers/infiniband/hw/ehca/ehca_classes.h | 2 +- drivers/infiniband/hw/ehca/ehca_mrmw.c | 245 ++++++++++---------------- drivers/infiniband/hw/ipath/ipath_mr.c | 39 ++--- drivers/infiniband/hw/mlx4/doorbell.c | 4 +- drivers/infiniband/hw/mlx4/mr.c | 39 ++--- drivers/infiniband/hw/mlx5/doorbell.c | 4 +- drivers/infiniband/hw/mlx5/mem.c | 80 ++++----- drivers/infiniband/hw/mthca/mthca_provider.c | 42 ++--- drivers/infiniband/hw/nes/nes_verbs.c | 253 ++++++++++++++------------- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 66 ++++--- drivers/infiniband/hw/qib/qib_mr.c | 14 +- 14 files changed, 389 insertions(+), 480 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/amso1100/c2_provider.c b/drivers/infiniband/hw/amso1100/c2_provider.c index 07eb3a8..8af33cf 100644 --- a/drivers/infiniband/hw/amso1100/c2_provider.c +++ b/drivers/infiniband/hw/amso1100/c2_provider.c @@ -431,9 +431,9 @@ static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 *pages; u64 kva = 0; int shift, n, len; - int i, j, k; + int i, k, entry; int err = 0; - struct ib_umem_chunk *chunk; + struct scatterlist *sg; struct c2_pd *c2pd = to_c2pd(pd); struct c2_mr *c2mr; @@ -452,10 +452,7 @@ static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, } shift = ffs(c2mr->umem->page_size) - 1; - - n = 0; - list_for_each_entry(chunk, &c2mr->umem->chunk_list, list) - n += chunk->nents; + n = c2mr->umem->nmap; pages = kmalloc(n * sizeof(u64), GFP_KERNEL); if (!pages) { @@ -464,14 +461,12 @@ static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, } i = 0; - list_for_each_entry(chunk, &c2mr->umem->chunk_list, list) { - for (j = 0; j < chunk->nmap; ++j) { - len = sg_dma_len(&chunk->page_list[j]) >> shift; - for (k = 0; k < len; ++k) { - pages[i++] = - sg_dma_address(&chunk->page_list[j]) + - (c2mr->umem->page_size * k); - } + for_each_sg(c2mr->umem->sg_head.sgl, sg, c2mr->umem->nmap, entry) { + len = sg_dma_len(sg) >> shift; + for (k = 0; k < len; ++k) { + pages[i++] = + sg_dma_address(sg) + + (c2mr->umem->page_size * k); } } diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index d228383..811b24a 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -618,14 +618,13 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, { __be64 *pages; int shift, n, len; - int i, j, k; + int i, k, entry; int err = 0; - struct ib_umem_chunk *chunk; struct iwch_dev *rhp; struct iwch_pd *php; struct iwch_mr *mhp; struct iwch_reg_user_mr_resp uresp; - + struct scatterlist *sg; PDBG("%s ib_pd %p\n", __func__, pd); php = to_iwch_pd(pd); @@ -645,9 +644,7 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, shift = ffs(mhp->umem->page_size) - 1; - n = 0; - list_for_each_entry(chunk, &mhp->umem->chunk_list, list) - n += chunk->nents; + n = mhp->umem->nmap; err = iwch_alloc_pbl(mhp, n); if (err) @@ -661,12 +658,10 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, i = n = 0; - list_for_each_entry(chunk, &mhp->umem->chunk_list, list) - for (j = 0; j < chunk->nmap; ++j) { - len = sg_dma_len(&chunk->page_list[j]) >> shift; + for_each_sg(mhp->umem->sg_head.sgl, sg, mhp->umem->nmap, entry) { + len = sg_dma_len(sg) >> shift; for (k = 0; k < len; ++k) { - pages[i++] = cpu_to_be64(sg_dma_address( - &chunk->page_list[j]) + + pages[i++] = cpu_to_be64(sg_dma_address(sg) + mhp->umem->page_size * k); if (i == PAGE_SIZE / sizeof *pages) { err = iwch_write_pbl(mhp, pages, i, n); @@ -676,7 +671,7 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, i = 0; } } - } + } if (i) err = iwch_write_pbl(mhp, pages, i, n); diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index 41b1195..392d422 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -678,9 +678,9 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, { __be64 *pages; int shift, n, len; - int i, j, k; + int i, k, entry; int err = 0; - struct ib_umem_chunk *chunk; + struct scatterlist *sg; struct c4iw_dev *rhp; struct c4iw_pd *php; struct c4iw_mr *mhp; @@ -710,10 +710,7 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, shift = ffs(mhp->umem->page_size) - 1; - n = 0; - list_for_each_entry(chunk, &mhp->umem->chunk_list, list) - n += chunk->nents; - + n = mhp->umem->nmap; err = alloc_pbl(mhp, n); if (err) goto err; @@ -726,24 +723,22 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, i = n = 0; - list_for_each_entry(chunk, &mhp->umem->chunk_list, list) - for (j = 0; j < chunk->nmap; ++j) { - len = sg_dma_len(&chunk->page_list[j]) >> shift; - for (k = 0; k < len; ++k) { - pages[i++] = cpu_to_be64(sg_dma_address( - &chunk->page_list[j]) + - mhp->umem->page_size * k); - if (i == PAGE_SIZE / sizeof *pages) { - err = write_pbl(&mhp->rhp->rdev, - pages, - mhp->attr.pbl_addr + (n << 3), i); - if (err) - goto pbl_done; - n += i; - i = 0; - } + for_each_sg(mhp->umem->sg_head.sgl, sg, mhp->umem->nmap, entry) { + len = sg_dma_len(sg) >> shift; + for (k = 0; k < len; ++k) { + pages[i++] = cpu_to_be64(sg_dma_address(sg) + + mhp->umem->page_size * k); + if (i == PAGE_SIZE / sizeof *pages) { + err = write_pbl(&mhp->rhp->rdev, + pages, + mhp->attr.pbl_addr + (n << 3), i); + if (err) + goto pbl_done; + n += i; + i = 0; } } + } if (i) err = write_pbl(&mhp->rhp->rdev, pages, diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h index f08f6ea..bd45e0f 100644 --- a/drivers/infiniband/hw/ehca/ehca_classes.h +++ b/drivers/infiniband/hw/ehca/ehca_classes.h @@ -322,7 +322,7 @@ struct ehca_mr_pginfo { } phy; struct { /* type EHCA_MR_PGI_USER section */ struct ib_umem *region; - struct ib_umem_chunk *next_chunk; + struct scatterlist *next_sg; u64 next_nmap; } usr; struct { /* type EHCA_MR_PGI_FMR section */ diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c index bcfb0c1..7168f59 100644 --- a/drivers/infiniband/hw/ehca/ehca_mrmw.c +++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c @@ -400,10 +400,7 @@ reg_user_mr_fallback: pginfo.num_hwpages = num_hwpages; pginfo.u.usr.region = e_mr->umem; pginfo.next_hwpage = e_mr->umem->offset / hwpage_size; - pginfo.u.usr.next_chunk = list_prepare_entry(pginfo.u.usr.next_chunk, - (&e_mr->umem->chunk_list), - list); - + pginfo.u.usr.next_sg = pginfo.u.usr.region->sg_head.sgl; ret = ehca_reg_mr(shca, e_mr, (u64 *)virt, length, mr_access_flags, e_pd, &pginfo, &e_mr->ib.ib_mr.lkey, &e_mr->ib.ib_mr.rkey, EHCA_REG_MR); @@ -1858,61 +1855,39 @@ static int ehca_set_pagebuf_user1(struct ehca_mr_pginfo *pginfo, u64 *kpage) { int ret = 0; - struct ib_umem_chunk *prev_chunk; - struct ib_umem_chunk *chunk; u64 pgaddr; - u32 i = 0; u32 j = 0; int hwpages_per_kpage = PAGE_SIZE / pginfo->hwpage_size; - - /* loop over desired chunk entries */ - chunk = pginfo->u.usr.next_chunk; - prev_chunk = pginfo->u.usr.next_chunk; - list_for_each_entry_continue( - chunk, (&(pginfo->u.usr.region->chunk_list)), list) { - for (i = pginfo->u.usr.next_nmap; i < chunk->nmap; ) { - pgaddr = page_to_pfn(sg_page(&chunk->page_list[i])) - << PAGE_SHIFT ; - *kpage = pgaddr + (pginfo->next_hwpage * - pginfo->hwpage_size); - if ( !(*kpage) ) { - ehca_gen_err("pgaddr=%llx " - "chunk->page_list[i]=%llx " - "i=%x next_hwpage=%llx", - pgaddr, (u64)sg_dma_address( - &chunk->page_list[i]), - i, pginfo->next_hwpage); - return -EFAULT; - } - (pginfo->hwpage_cnt)++; - (pginfo->next_hwpage)++; - kpage++; - if (pginfo->next_hwpage % hwpages_per_kpage == 0) { - (pginfo->kpage_cnt)++; - (pginfo->u.usr.next_nmap)++; - pginfo->next_hwpage = 0; - i++; - } - j++; - if (j >= number) break; + struct scatterlist **sg = &pginfo->u.usr.next_sg; + + while (*sg != NULL) { + pgaddr = page_to_pfn(sg_page(*sg)) + << PAGE_SHIFT; + *kpage = pgaddr + (pginfo->next_hwpage * + pginfo->hwpage_size); + if (!(*kpage)) { + ehca_gen_err("pgaddr=%llx " + "sg_dma_address=%llx " + "entry=%llx next_hwpage=%llx", + pgaddr, (u64)sg_dma_address(*sg), + pginfo->u.usr.next_nmap, + pginfo->next_hwpage); + return -EFAULT; } - if ((pginfo->u.usr.next_nmap >= chunk->nmap) && - (j >= number)) { - pginfo->u.usr.next_nmap = 0; - prev_chunk = chunk; - break; - } else if (pginfo->u.usr.next_nmap >= chunk->nmap) { - pginfo->u.usr.next_nmap = 0; - prev_chunk = chunk; - } else if (j >= number) + (pginfo->hwpage_cnt)++; + (pginfo->next_hwpage)++; + kpage++; + if (pginfo->next_hwpage % hwpages_per_kpage == 0) { + (pginfo->kpage_cnt)++; + (pginfo->u.usr.next_nmap)++; + pginfo->next_hwpage = 0; + *sg = sg_next(*sg); + } + j++; + if (j >= number) break; - else - prev_chunk = chunk; } - pginfo->u.usr.next_chunk = - list_prepare_entry(prev_chunk, - (&(pginfo->u.usr.region->chunk_list)), - list); + return ret; } @@ -1920,20 +1895,19 @@ static int ehca_set_pagebuf_user1(struct ehca_mr_pginfo *pginfo, * check given pages for contiguous layout * last page addr is returned in prev_pgaddr for further check */ -static int ehca_check_kpages_per_ate(struct scatterlist *page_list, - int start_idx, int end_idx, +static int ehca_check_kpages_per_ate(struct scatterlist **sg, + int num_pages, u64 *prev_pgaddr) { - int t; - for (t = start_idx; t <= end_idx; t++) { - u64 pgaddr = page_to_pfn(sg_page(&page_list[t])) << PAGE_SHIFT; + for (; *sg && num_pages > 0; *sg = sg_next(*sg), num_pages--) { + u64 pgaddr = page_to_pfn(sg_page(*sg)) << PAGE_SHIFT; if (ehca_debug_level >= 3) ehca_gen_dbg("chunk_page=%llx value=%016llx", pgaddr, *(u64 *)__va(pgaddr)); if (pgaddr - PAGE_SIZE != *prev_pgaddr) { ehca_gen_err("uncontiguous page found pgaddr=%llx " - "prev_pgaddr=%llx page_list_i=%x", - pgaddr, *prev_pgaddr, t); + "prev_pgaddr=%llx entries_left_in_hwpage=%x", + pgaddr, *prev_pgaddr, num_pages); return -EINVAL; } *prev_pgaddr = pgaddr; @@ -1947,111 +1921,80 @@ static int ehca_set_pagebuf_user2(struct ehca_mr_pginfo *pginfo, u64 *kpage) { int ret = 0; - struct ib_umem_chunk *prev_chunk; - struct ib_umem_chunk *chunk; u64 pgaddr, prev_pgaddr; - u32 i = 0; u32 j = 0; int kpages_per_hwpage = pginfo->hwpage_size / PAGE_SIZE; int nr_kpages = kpages_per_hwpage; + struct scatterlist **sg = &pginfo->u.usr.next_sg; + + while (*sg != NULL) { - /* loop over desired chunk entries */ - chunk = pginfo->u.usr.next_chunk; - prev_chunk = pginfo->u.usr.next_chunk; - list_for_each_entry_continue( - chunk, (&(pginfo->u.usr.region->chunk_list)), list) { - for (i = pginfo->u.usr.next_nmap; i < chunk->nmap; ) { - if (nr_kpages == kpages_per_hwpage) { - pgaddr = ( page_to_pfn(sg_page(&chunk->page_list[i])) - << PAGE_SHIFT ); - *kpage = pgaddr; - if ( !(*kpage) ) { - ehca_gen_err("pgaddr=%llx i=%x", - pgaddr, i); + if (nr_kpages == kpages_per_hwpage) { + pgaddr = (page_to_pfn(sg_page(*sg)) + << PAGE_SHIFT); + *kpage = pgaddr; + if (!(*kpage)) { + ehca_gen_err("pgaddr=%llx entry=%llx", + pgaddr, pginfo->u.usr.next_nmap); + ret = -EFAULT; + return ret; + } + /* + * The first page in a hwpage must be aligned; + * the first MR page is exempt from this rule. + */ + if (pgaddr & (pginfo->hwpage_size - 1)) { + if (pginfo->hwpage_cnt) { + ehca_gen_err( + "invalid alignment " + "pgaddr=%llx entry=%llx " + "mr_pgsize=%llx", + pgaddr, pginfo->u.usr.next_nmap, + pginfo->hwpage_size); ret = -EFAULT; return ret; } - /* - * The first page in a hwpage must be aligned; - * the first MR page is exempt from this rule. - */ - if (pgaddr & (pginfo->hwpage_size - 1)) { - if (pginfo->hwpage_cnt) { - ehca_gen_err( - "invalid alignment " - "pgaddr=%llx i=%x " - "mr_pgsize=%llx", - pgaddr, i, - pginfo->hwpage_size); - ret = -EFAULT; - return ret; - } - /* first MR page */ - pginfo->kpage_cnt = - (pgaddr & - (pginfo->hwpage_size - 1)) >> - PAGE_SHIFT; - nr_kpages -= pginfo->kpage_cnt; - *kpage = pgaddr & - ~(pginfo->hwpage_size - 1); - } - if (ehca_debug_level >= 3) { - u64 val = *(u64 *)__va(pgaddr); - ehca_gen_dbg("kpage=%llx chunk_page=%llx " - "value=%016llx", - *kpage, pgaddr, val); - } - prev_pgaddr = pgaddr; - i++; - pginfo->kpage_cnt++; - pginfo->u.usr.next_nmap++; - nr_kpages--; - if (!nr_kpages) - goto next_kpage; - continue; + /* first MR page */ + pginfo->kpage_cnt = + (pgaddr & + (pginfo->hwpage_size - 1)) >> + PAGE_SHIFT; + nr_kpages -= pginfo->kpage_cnt; + *kpage = pgaddr & + ~(pginfo->hwpage_size - 1); } - if (i + nr_kpages > chunk->nmap) { - ret = ehca_check_kpages_per_ate( - chunk->page_list, i, - chunk->nmap - 1, &prev_pgaddr); - if (ret) return ret; - pginfo->kpage_cnt += chunk->nmap - i; - pginfo->u.usr.next_nmap += chunk->nmap - i; - nr_kpages -= chunk->nmap - i; - break; + if (ehca_debug_level >= 3) { + u64 val = *(u64 *)__va(pgaddr); + ehca_gen_dbg("kpage=%llx page=%llx " + "value=%016llx", + *kpage, pgaddr, val); } + prev_pgaddr = pgaddr; + *sg = sg_next(*sg); + pginfo->kpage_cnt++; + pginfo->u.usr.next_nmap++; + nr_kpages--; + if (!nr_kpages) + goto next_kpage; + continue; + } + + ret = ehca_check_kpages_per_ate(sg, nr_kpages, + &prev_pgaddr); + if (ret) + return ret; + pginfo->kpage_cnt += nr_kpages; + pginfo->u.usr.next_nmap += nr_kpages; - ret = ehca_check_kpages_per_ate(chunk->page_list, i, - i + nr_kpages - 1, - &prev_pgaddr); - if (ret) return ret; - i += nr_kpages; - pginfo->kpage_cnt += nr_kpages; - pginfo->u.usr.next_nmap += nr_kpages; next_kpage: - nr_kpages = kpages_per_hwpage; - (pginfo->hwpage_cnt)++; - kpage++; - j++; - if (j >= number) break; - } - if ((pginfo->u.usr.next_nmap >= chunk->nmap) && - (j >= number)) { - pginfo->u.usr.next_nmap = 0; - prev_chunk = chunk; - break; - } else if (pginfo->u.usr.next_nmap >= chunk->nmap) { - pginfo->u.usr.next_nmap = 0; - prev_chunk = chunk; - } else if (j >= number) + nr_kpages = kpages_per_hwpage; + (pginfo->hwpage_cnt)++; + kpage++; + j++; + if (j >= number) break; - else - prev_chunk = chunk; } - pginfo->u.usr.next_chunk = - list_prepare_entry(prev_chunk, - (&(pginfo->u.usr.region->chunk_list)), - list); + return ret; } diff --git a/drivers/infiniband/hw/ipath/ipath_mr.c b/drivers/infiniband/hw/ipath/ipath_mr.c index e346d38..5e61e9b 100644 --- a/drivers/infiniband/hw/ipath/ipath_mr.c +++ b/drivers/infiniband/hw/ipath/ipath_mr.c @@ -188,8 +188,8 @@ struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, { struct ipath_mr *mr; struct ib_umem *umem; - struct ib_umem_chunk *chunk; - int n, m, i; + int n, m, entry; + struct scatterlist *sg; struct ib_mr *ret; if (length == 0) { @@ -202,10 +202,7 @@ struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (IS_ERR(umem)) return (void *) umem; - n = 0; - list_for_each_entry(chunk, &umem->chunk_list, list) - n += chunk->nents; - + n = umem->nmap; mr = alloc_mr(n, &to_idev(pd->device)->lk_table); if (!mr) { ret = ERR_PTR(-ENOMEM); @@ -224,22 +221,20 @@ struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, m = 0; n = 0; - list_for_each_entry(chunk, &umem->chunk_list, list) { - for (i = 0; i < chunk->nents; i++) { - void *vaddr; - - vaddr = page_address(sg_page(&chunk->page_list[i])); - if (!vaddr) { - ret = ERR_PTR(-EINVAL); - goto bail; - } - mr->mr.map[m]->segs[n].vaddr = vaddr; - mr->mr.map[m]->segs[n].length = umem->page_size; - n++; - if (n == IPATH_SEGSZ) { - m++; - n = 0; - } + for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { + void *vaddr; + + vaddr = page_address(sg_page(sg)); + if (!vaddr) { + ret = ERR_PTR(-EINVAL); + goto bail; + } + mr->mr.map[m]->segs[n].vaddr = vaddr; + mr->mr.map[m]->segs[n].length = umem->page_size; + n++; + if (n == IPATH_SEGSZ) { + m++; + n = 0; } } ret = &mr->ibmr; diff --git a/drivers/infiniband/hw/mlx4/doorbell.c b/drivers/infiniband/hw/mlx4/doorbell.c index 8aee423..c517409 100644 --- a/drivers/infiniband/hw/mlx4/doorbell.c +++ b/drivers/infiniband/hw/mlx4/doorbell.c @@ -45,7 +45,6 @@ int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt, struct mlx4_db *db) { struct mlx4_ib_user_db_page *page; - struct ib_umem_chunk *chunk; int err = 0; mutex_lock(&context->db_page_mutex); @@ -73,8 +72,7 @@ int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt, list_add(&page->list, &context->db_page_list); found: - chunk = list_entry(page->umem->chunk_list.next, struct ib_umem_chunk, list); - db->dma = sg_dma_address(chunk->page_list) + (virt & ~PAGE_MASK); + db->dma = sg_dma_address(page->umem->sg_head.sgl) + (virt & ~PAGE_MASK); db->u.user_page = page; ++page->refcnt; diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index e471f08..cb2a872 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -90,11 +90,11 @@ int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt, struct ib_umem *umem) { u64 *pages; - struct ib_umem_chunk *chunk; - int i, j, k; + int i, k, entry; int n; int len; int err = 0; + struct scatterlist *sg; pages = (u64 *) __get_free_page(GFP_KERNEL); if (!pages) @@ -102,26 +102,25 @@ int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt, i = n = 0; - list_for_each_entry(chunk, &umem->chunk_list, list) - for (j = 0; j < chunk->nmap; ++j) { - len = sg_dma_len(&chunk->page_list[j]) >> mtt->page_shift; - for (k = 0; k < len; ++k) { - pages[i++] = sg_dma_address(&chunk->page_list[j]) + - umem->page_size * k; - /* - * Be friendly to mlx4_write_mtt() and - * pass it chunks of appropriate size. - */ - if (i == PAGE_SIZE / sizeof (u64)) { - err = mlx4_write_mtt(dev->dev, mtt, n, - i, pages); - if (err) - goto out; - n += i; - i = 0; - } + for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { + len = sg_dma_len(sg) >> mtt->page_shift; + for (k = 0; k < len; ++k) { + pages[i++] = sg_dma_address(sg) + + umem->page_size * k; + /* + * Be friendly to mlx4_write_mtt() and + * pass it chunks of appropriate size. + */ + if (i == PAGE_SIZE / sizeof (u64)) { + err = mlx4_write_mtt(dev->dev, mtt, n, + i, pages); + if (err) + goto out; + n += i; + i = 0; } } + } if (i) err = mlx4_write_mtt(dev->dev, mtt, n, i, pages); diff --git a/drivers/infiniband/hw/mlx5/doorbell.c b/drivers/infiniband/hw/mlx5/doorbell.c index 256a233..ece028f 100644 --- a/drivers/infiniband/hw/mlx5/doorbell.c +++ b/drivers/infiniband/hw/mlx5/doorbell.c @@ -47,7 +47,6 @@ int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt, struct mlx5_db *db) { struct mlx5_ib_user_db_page *page; - struct ib_umem_chunk *chunk; int err = 0; mutex_lock(&context->db_page_mutex); @@ -75,8 +74,7 @@ int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt, list_add(&page->list, &context->db_page_list); found: - chunk = list_entry(page->umem->chunk_list.next, struct ib_umem_chunk, list); - db->dma = sg_dma_address(chunk->page_list) + (virt & ~PAGE_MASK); + db->dma = sg_dma_address(page->umem->sg_head.sgl) + (virt & ~PAGE_MASK); db->u.user_page = page; ++page->refcnt; diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c index 3a53228..8499aec 100644 --- a/drivers/infiniband/hw/mlx5/mem.c +++ b/drivers/infiniband/hw/mlx5/mem.c @@ -44,16 +44,17 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift, int *ncont, int *order) { - struct ib_umem_chunk *chunk; unsigned long tmp; unsigned long m; - int i, j, k; + int i, k; u64 base = 0; int p = 0; int skip; int mask; u64 len; u64 pfn; + struct scatterlist *sg; + int entry; addr = addr >> PAGE_SHIFT; tmp = (unsigned long)addr; @@ -61,32 +62,31 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift, skip = 1 << m; mask = skip - 1; i = 0; - list_for_each_entry(chunk, &umem->chunk_list, list) - for (j = 0; j < chunk->nmap; j++) { - len = sg_dma_len(&chunk->page_list[j]) >> PAGE_SHIFT; - pfn = sg_dma_address(&chunk->page_list[j]) >> PAGE_SHIFT; - for (k = 0; k < len; k++) { - if (!(i & mask)) { - tmp = (unsigned long)pfn; - m = min(m, find_first_bit(&tmp, sizeof(tmp))); + for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { + len = sg_dma_len(sg) >> PAGE_SHIFT; + pfn = sg_dma_address(sg) >> PAGE_SHIFT; + for (k = 0; k < len; k++) { + if (!(i & mask)) { + tmp = (unsigned long)pfn; + m = min(m, find_first_bit(&tmp, sizeof(tmp))); + skip = 1 << m; + mask = skip - 1; + base = pfn; + p = 0; + } else { + if (base + p != pfn) { + tmp = (unsigned long)p; + m = find_first_bit(&tmp, sizeof(tmp)); skip = 1 << m; mask = skip - 1; base = pfn; p = 0; - } else { - if (base + p != pfn) { - tmp = (unsigned long)p; - m = find_first_bit(&tmp, sizeof(tmp)); - skip = 1 << m; - mask = skip - 1; - base = pfn; - p = 0; - } } - p++; - i++; } + p++; + i++; } + } if (i) { m = min_t(unsigned long, ilog2(roundup_pow_of_two(i)), m); @@ -112,32 +112,32 @@ void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, { int shift = page_shift - PAGE_SHIFT; int mask = (1 << shift) - 1; - struct ib_umem_chunk *chunk; - int i, j, k; + int i, k; u64 cur = 0; u64 base; int len; + struct scatterlist *sg; + int entry; i = 0; - list_for_each_entry(chunk, &umem->chunk_list, list) - for (j = 0; j < chunk->nmap; j++) { - len = sg_dma_len(&chunk->page_list[j]) >> PAGE_SHIFT; - base = sg_dma_address(&chunk->page_list[j]); - for (k = 0; k < len; k++) { - if (!(i & mask)) { - cur = base + (k << PAGE_SHIFT); - if (umr) - cur |= 3; + for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { + len = sg_dma_len(sg) >> PAGE_SHIFT; + base = sg_dma_address(sg); + for (k = 0; k < len; k++) { + if (!(i & mask)) { + cur = base + (k << PAGE_SHIFT); + if (umr) + cur |= 3; - pas[i >> shift] = cpu_to_be64(cur); - mlx5_ib_dbg(dev, "pas[%d] 0x%llx\n", - i >> shift, be64_to_cpu(pas[i >> shift])); - } else - mlx5_ib_dbg(dev, "=====> 0x%llx\n", - base + (k << PAGE_SHIFT)); - i++; - } + pas[i >> shift] = cpu_to_be64(cur); + mlx5_ib_dbg(dev, "pas[%d] 0x%llx\n", + i >> shift, be64_to_cpu(pas[i >> shift])); + } else + mlx5_ib_dbg(dev, "=====> 0x%llx\n", + base + (k << PAGE_SHIFT)); + i++; } + } } int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset) diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 5b71d43..6440800 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -976,12 +976,12 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt, int acc, struct ib_udata *udata) { struct mthca_dev *dev = to_mdev(pd->device); - struct ib_umem_chunk *chunk; + struct scatterlist *sg; struct mthca_mr *mr; struct mthca_reg_mr ucmd; u64 *pages; int shift, n, len; - int i, j, k; + int i, k, entry; int err = 0; int write_mtt_size; @@ -1009,10 +1009,7 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, } shift = ffs(mr->umem->page_size) - 1; - - n = 0; - list_for_each_entry(chunk, &mr->umem->chunk_list, list) - n += chunk->nents; + n = mr->umem->nmap; mr->mtt = mthca_alloc_mtt(dev, n); if (IS_ERR(mr->mtt)) { @@ -1030,25 +1027,24 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, write_mtt_size = min(mthca_write_mtt_size(dev), (int) (PAGE_SIZE / sizeof *pages)); - list_for_each_entry(chunk, &mr->umem->chunk_list, list) - for (j = 0; j < chunk->nmap; ++j) { - len = sg_dma_len(&chunk->page_list[j]) >> shift; - for (k = 0; k < len; ++k) { - pages[i++] = sg_dma_address(&chunk->page_list[j]) + - mr->umem->page_size * k; - /* - * Be friendly to write_mtt and pass it chunks - * of appropriate size. - */ - if (i == write_mtt_size) { - err = mthca_write_mtt(dev, mr->mtt, n, pages, i); - if (err) - goto mtt_done; - n += i; - i = 0; - } + for_each_sg(mr->umem->sg_head.sgl, sg, mr->umem->nmap, entry) { + len = sg_dma_len(sg) >> shift; + for (k = 0; k < len; ++k) { + pages[i++] = sg_dma_address(sg) + + mr->umem->page_size * k; + /* + * Be friendly to write_mtt and pass it chunks + * of appropriate size. + */ + if (i == write_mtt_size) { + err = mthca_write_mtt(dev, mr->mtt, n, pages, i); + if (err) + goto mtt_done; + n += i; + i = 0; } } + } if (i) err = mthca_write_mtt(dev, mr->mtt, n, pages, i); diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index 8308e36..32d3682 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -2307,7 +2307,7 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, struct nes_device *nesdev = nesvnic->nesdev; struct nes_adapter *nesadapter = nesdev->nesadapter; struct ib_mr *ibmr = ERR_PTR(-EINVAL); - struct ib_umem_chunk *chunk; + struct scatterlist *sg; struct nes_ucontext *nes_ucontext; struct nes_pbl *nespbl; struct nes_mr *nesmr; @@ -2315,7 +2315,7 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, struct nes_mem_reg_req req; struct nes_vpbl vpbl; struct nes_root_vpbl root_vpbl; - int nmap_index, page_index; + int entry, page_index; int page_count = 0; int err, pbl_depth = 0; int chunk_pages; @@ -2330,6 +2330,7 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u16 pbl_count; u8 single_page = 1; u8 stag_key; + int first_page = 1; region = ib_umem_get(pd->uobject->context, start, length, acc, 0); if (IS_ERR(region)) { @@ -2380,128 +2381,125 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, } nesmr->region = region; - list_for_each_entry(chunk, ®ion->chunk_list, list) { - nes_debug(NES_DBG_MR, "Chunk: nents = %u, nmap = %u .\n", - chunk->nents, chunk->nmap); - for (nmap_index = 0; nmap_index < chunk->nmap; ++nmap_index) { - if (sg_dma_address(&chunk->page_list[nmap_index]) & ~PAGE_MASK) { - ib_umem_release(region); - nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); - nes_debug(NES_DBG_MR, "Unaligned Memory Buffer: 0x%x\n", - (unsigned int) sg_dma_address(&chunk->page_list[nmap_index])); - ibmr = ERR_PTR(-EINVAL); - kfree(nesmr); - goto reg_user_mr_err; - } + for_each_sg(region->sg_head.sgl, sg, region->nmap, entry) { + if (sg_dma_address(sg) & ~PAGE_MASK) { + ib_umem_release(region); + nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); + nes_debug(NES_DBG_MR, "Unaligned Memory Buffer: 0x%x\n", + (unsigned int) sg_dma_address(sg)); + ibmr = ERR_PTR(-EINVAL); + kfree(nesmr); + goto reg_user_mr_err; + } - if (!sg_dma_len(&chunk->page_list[nmap_index])) { - ib_umem_release(region); - nes_free_resource(nesadapter, nesadapter->allocated_mrs, - stag_index); - nes_debug(NES_DBG_MR, "Invalid Buffer Size\n"); - ibmr = ERR_PTR(-EINVAL); - kfree(nesmr); - goto reg_user_mr_err; - } + if (!sg_dma_len(sg)) { + ib_umem_release(region); + nes_free_resource(nesadapter, nesadapter->allocated_mrs, + stag_index); + nes_debug(NES_DBG_MR, "Invalid Buffer Size\n"); + ibmr = ERR_PTR(-EINVAL); + kfree(nesmr); + goto reg_user_mr_err; + } - region_length += sg_dma_len(&chunk->page_list[nmap_index]); - chunk_pages = sg_dma_len(&chunk->page_list[nmap_index]) >> 12; - region_length -= skip_pages << 12; - for (page_index=skip_pages; page_index < chunk_pages; page_index++) { - skip_pages = 0; - if ((page_count!=0)&&(page_count<<12)-(region->offset&(4096-1))>=region->length) - goto enough_pages; - if ((page_count&0x01FF) == 0) { - if (page_count >= 1024 * 512) { + region_length += sg_dma_len(sg); + chunk_pages = sg_dma_len(sg) >> 12; + region_length -= skip_pages << 12; + for (page_index = skip_pages; page_index < chunk_pages; page_index++) { + skip_pages = 0; + if ((page_count != 0) && (page_count<<12)-(region->offset&(4096-1)) >= region->length) + goto enough_pages; + if ((page_count&0x01FF) == 0) { + if (page_count >= 1024 * 512) { + ib_umem_release(region); + nes_free_resource(nesadapter, + nesadapter->allocated_mrs, stag_index); + kfree(nesmr); + ibmr = ERR_PTR(-E2BIG); + goto reg_user_mr_err; + } + if (root_pbl_index == 1) { + root_vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, + 8192, &root_vpbl.pbl_pbase); + nes_debug(NES_DBG_MR, "Allocating root PBL, va = %p, pa = 0x%08X\n", + root_vpbl.pbl_vbase, (unsigned int)root_vpbl.pbl_pbase); + if (!root_vpbl.pbl_vbase) { ib_umem_release(region); - nes_free_resource(nesadapter, - nesadapter->allocated_mrs, stag_index); + pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase, + vpbl.pbl_pbase); + nes_free_resource(nesadapter, nesadapter->allocated_mrs, + stag_index); kfree(nesmr); - ibmr = ERR_PTR(-E2BIG); + ibmr = ERR_PTR(-ENOMEM); goto reg_user_mr_err; } - if (root_pbl_index == 1) { - root_vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, - 8192, &root_vpbl.pbl_pbase); - nes_debug(NES_DBG_MR, "Allocating root PBL, va = %p, pa = 0x%08X\n", - root_vpbl.pbl_vbase, (unsigned int)root_vpbl.pbl_pbase); - if (!root_vpbl.pbl_vbase) { - ib_umem_release(region); - pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase, - vpbl.pbl_pbase); - nes_free_resource(nesadapter, nesadapter->allocated_mrs, - stag_index); - kfree(nesmr); - ibmr = ERR_PTR(-ENOMEM); - goto reg_user_mr_err; - } - root_vpbl.leaf_vpbl = kzalloc(sizeof(*root_vpbl.leaf_vpbl)*1024, - GFP_KERNEL); - if (!root_vpbl.leaf_vpbl) { - ib_umem_release(region); - pci_free_consistent(nesdev->pcidev, 8192, root_vpbl.pbl_vbase, - root_vpbl.pbl_pbase); - pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase, - vpbl.pbl_pbase); - nes_free_resource(nesadapter, nesadapter->allocated_mrs, - stag_index); - kfree(nesmr); - ibmr = ERR_PTR(-ENOMEM); - goto reg_user_mr_err; - } - root_vpbl.pbl_vbase[0].pa_low = - cpu_to_le32((u32)vpbl.pbl_pbase); - root_vpbl.pbl_vbase[0].pa_high = - cpu_to_le32((u32)((((u64)vpbl.pbl_pbase) >> 32))); - root_vpbl.leaf_vpbl[0] = vpbl; - } - vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 4096, - &vpbl.pbl_pbase); - nes_debug(NES_DBG_MR, "Allocating leaf PBL, va = %p, pa = 0x%08X\n", - vpbl.pbl_vbase, (unsigned int)vpbl.pbl_pbase); - if (!vpbl.pbl_vbase) { + root_vpbl.leaf_vpbl = kzalloc(sizeof(*root_vpbl.leaf_vpbl)*1024, + GFP_KERNEL); + if (!root_vpbl.leaf_vpbl) { ib_umem_release(region); - nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); - ibmr = ERR_PTR(-ENOMEM); + pci_free_consistent(nesdev->pcidev, 8192, root_vpbl.pbl_vbase, + root_vpbl.pbl_pbase); + pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase, + vpbl.pbl_pbase); + nes_free_resource(nesadapter, nesadapter->allocated_mrs, + stag_index); kfree(nesmr); + ibmr = ERR_PTR(-ENOMEM); goto reg_user_mr_err; } - if (1 <= root_pbl_index) { - root_vpbl.pbl_vbase[root_pbl_index].pa_low = - cpu_to_le32((u32)vpbl.pbl_pbase); - root_vpbl.pbl_vbase[root_pbl_index].pa_high = - cpu_to_le32((u32)((((u64)vpbl.pbl_pbase)>>32))); - root_vpbl.leaf_vpbl[root_pbl_index] = vpbl; - } - root_pbl_index++; - cur_pbl_index = 0; + root_vpbl.pbl_vbase[0].pa_low = + cpu_to_le32((u32)vpbl.pbl_pbase); + root_vpbl.pbl_vbase[0].pa_high = + cpu_to_le32((u32)((((u64)vpbl.pbl_pbase) >> 32))); + root_vpbl.leaf_vpbl[0] = vpbl; } - if (single_page) { - if (page_count != 0) { - if ((last_dma_addr+4096) != - (sg_dma_address(&chunk->page_list[nmap_index])+ - (page_index*4096))) - single_page = 0; - last_dma_addr = sg_dma_address(&chunk->page_list[nmap_index])+ - (page_index*4096); - } else { - first_dma_addr = sg_dma_address(&chunk->page_list[nmap_index])+ - (page_index*4096); - last_dma_addr = first_dma_addr; - } + vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 4096, + &vpbl.pbl_pbase); + nes_debug(NES_DBG_MR, "Allocating leaf PBL, va = %p, pa = 0x%08X\n", + vpbl.pbl_vbase, (unsigned int)vpbl.pbl_pbase); + if (!vpbl.pbl_vbase) { + ib_umem_release(region); + nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); + ibmr = ERR_PTR(-ENOMEM); + kfree(nesmr); + goto reg_user_mr_err; + } + if (1 <= root_pbl_index) { + root_vpbl.pbl_vbase[root_pbl_index].pa_low = + cpu_to_le32((u32)vpbl.pbl_pbase); + root_vpbl.pbl_vbase[root_pbl_index].pa_high = + cpu_to_le32((u32)((((u64)vpbl.pbl_pbase)>>32))); + root_vpbl.leaf_vpbl[root_pbl_index] = vpbl; + } + root_pbl_index++; + cur_pbl_index = 0; + } + if (single_page) { + if (page_count != 0) { + if ((last_dma_addr+4096) != + (sg_dma_address(sg)+ + (page_index*4096))) + single_page = 0; + last_dma_addr = sg_dma_address(sg)+ + (page_index*4096); + } else { + first_dma_addr = sg_dma_address(sg)+ + (page_index*4096); + last_dma_addr = first_dma_addr; } - - vpbl.pbl_vbase[cur_pbl_index].pa_low = - cpu_to_le32((u32)(sg_dma_address(&chunk->page_list[nmap_index])+ - (page_index*4096))); - vpbl.pbl_vbase[cur_pbl_index].pa_high = - cpu_to_le32((u32)((((u64)(sg_dma_address(&chunk->page_list[nmap_index])+ - (page_index*4096))) >> 32))); - cur_pbl_index++; - page_count++; } + + vpbl.pbl_vbase[cur_pbl_index].pa_low = + cpu_to_le32((u32)(sg_dma_address(sg)+ + (page_index*4096))); + vpbl.pbl_vbase[cur_pbl_index].pa_high = + cpu_to_le32((u32)((((u64)(sg_dma_address(sg)+ + (page_index*4096))) >> 32))); + cur_pbl_index++; + page_count++; } } + enough_pages: nes_debug(NES_DBG_MR, "calculating stag, stag_index=0x%08x, driver_key=0x%08x," " stag_key=0x%08x\n", @@ -2613,25 +2611,28 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, nespbl->pbl_size, (unsigned long) nespbl->pbl_pbase, (void *) nespbl->pbl_vbase, nespbl->user_base); - list_for_each_entry(chunk, ®ion->chunk_list, list) { - for (nmap_index = 0; nmap_index < chunk->nmap; ++nmap_index) { - chunk_pages = sg_dma_len(&chunk->page_list[nmap_index]) >> 12; - chunk_pages += (sg_dma_len(&chunk->page_list[nmap_index]) & (4096-1)) ? 1 : 0; - nespbl->page = sg_page(&chunk->page_list[0]); - for (page_index=0; page_indexpage_list[nmap_index])+ - (page_index*4096))); - ((__le32 *)pbl)[1] = cpu_to_le32(((u64) - (sg_dma_address(&chunk->page_list[nmap_index])+ - (page_index*4096)))>>32); - nes_debug(NES_DBG_MR, "pbl=%p, *pbl=0x%016llx, 0x%08x%08x\n", pbl, - (unsigned long long)*pbl, - le32_to_cpu(((__le32 *)pbl)[1]), le32_to_cpu(((__le32 *)pbl)[0])); - pbl++; - } + for_each_sg(region->sg_head.sgl, sg, region->nmap, entry) { + chunk_pages = sg_dma_len(sg) >> 12; + chunk_pages += (sg_dma_len(sg) & (4096-1)) ? 1 : 0; + if (first_page) { + nespbl->page = sg_page(sg); + first_page = 0; + } + + for (page_index = 0; page_index < chunk_pages; page_index++) { + ((__le32 *)pbl)[0] = cpu_to_le32((u32) + (sg_dma_address(sg)+ + (page_index*4096))); + ((__le32 *)pbl)[1] = cpu_to_le32(((u64) + (sg_dma_address(sg)+ + (page_index*4096)))>>32); + nes_debug(NES_DBG_MR, "pbl=%p, *pbl=0x%016llx, 0x%08x%08x\n", pbl, + (unsigned long long)*pbl, + le32_to_cpu(((__le32 *)pbl)[1]), le32_to_cpu(((__le32 *)pbl)[0])); + pbl++; } } + if (req.reg_type == IWNES_MEMREG_TYPE_QP) { list_add_tail(&nespbl->list, &nes_ucontext->qp_reg_mem_list); } else { diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index e0cc201..0de3473 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -726,10 +726,10 @@ static void build_user_pbes(struct ocrdma_dev *dev, struct ocrdma_mr *mr, u32 num_pbes) { struct ocrdma_pbe *pbe; - struct ib_umem_chunk *chunk; + struct scatterlist *sg; struct ocrdma_pbl *pbl_tbl = mr->hwmr.pbl_table; struct ib_umem *umem = mr->umem; - int i, shift, pg_cnt, pages, pbe_cnt, total_num_pbes = 0; + int shift, pg_cnt, pages, pbe_cnt, entry, total_num_pbes = 0; if (!mr->hwmr.num_pbes) return; @@ -739,39 +739,37 @@ static void build_user_pbes(struct ocrdma_dev *dev, struct ocrdma_mr *mr, shift = ilog2(umem->page_size); - list_for_each_entry(chunk, &umem->chunk_list, list) { - /* get all the dma regions from the chunk. */ - for (i = 0; i < chunk->nmap; i++) { - pages = sg_dma_len(&chunk->page_list[i]) >> shift; - for (pg_cnt = 0; pg_cnt < pages; pg_cnt++) { - /* store the page address in pbe */ - pbe->pa_lo = - cpu_to_le32(sg_dma_address - (&chunk->page_list[i]) + - (umem->page_size * pg_cnt)); - pbe->pa_hi = - cpu_to_le32(upper_32_bits - ((sg_dma_address - (&chunk->page_list[i]) + - umem->page_size * pg_cnt))); - pbe_cnt += 1; - total_num_pbes += 1; - pbe++; - - /* if done building pbes, issue the mbx cmd. */ - if (total_num_pbes == num_pbes) - return; - - /* if the given pbl is full storing the pbes, - * move to next pbl. - */ - if (pbe_cnt == - (mr->hwmr.pbl_size / sizeof(u64))) { - pbl_tbl++; - pbe = (struct ocrdma_pbe *)pbl_tbl->va; - pbe_cnt = 0; - } + for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { + pages = sg_dma_len(sg) >> shift; + for (pg_cnt = 0; pg_cnt < pages; pg_cnt++) { + /* store the page address in pbe */ + pbe->pa_lo = + cpu_to_le32(sg_dma_address + (sg) + + (umem->page_size * pg_cnt)); + pbe->pa_hi = + cpu_to_le32(upper_32_bits + ((sg_dma_address + (sg) + + umem->page_size * pg_cnt))); + pbe_cnt += 1; + total_num_pbes += 1; + pbe++; + + /* if done building pbes, issue the mbx cmd. */ + if (total_num_pbes == num_pbes) + return; + + /* if the given pbl is full storing the pbes, + * move to next pbl. + */ + if (pbe_cnt == + (mr->hwmr.pbl_size / sizeof(u64))) { + pbl_tbl++; + pbe = (struct ocrdma_pbe *)pbl_tbl->va; + pbe_cnt = 0; } + } } } diff --git a/drivers/infiniband/hw/qib/qib_mr.c b/drivers/infiniband/hw/qib/qib_mr.c index e6687de..9bbb553 100644 --- a/drivers/infiniband/hw/qib/qib_mr.c +++ b/drivers/infiniband/hw/qib/qib_mr.c @@ -232,8 +232,8 @@ struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, { struct qib_mr *mr; struct ib_umem *umem; - struct ib_umem_chunk *chunk; - int n, m, i; + struct scatterlist *sg; + int n, m, entry; struct ib_mr *ret; if (length == 0) { @@ -246,9 +246,7 @@ struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (IS_ERR(umem)) return (void *) umem; - n = 0; - list_for_each_entry(chunk, &umem->chunk_list, list) - n += chunk->nents; + n = umem->nmap; mr = alloc_mr(n, pd); if (IS_ERR(mr)) { @@ -268,11 +266,10 @@ struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, mr->mr.page_shift = ilog2(umem->page_size); m = 0; n = 0; - list_for_each_entry(chunk, &umem->chunk_list, list) { - for (i = 0; i < chunk->nents; i++) { + for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { void *vaddr; - vaddr = page_address(sg_page(&chunk->page_list[i])); + vaddr = page_address(sg_page(sg)); if (!vaddr) { ret = ERR_PTR(-EINVAL); goto bail; @@ -284,7 +281,6 @@ struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, m++; n = 0; } - } } ret = &mr->ibmr; -- cgit v1.1 From 3121e3c441b5eccdd15e6c320ec32215b334b9ec Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 23 Feb 2014 14:19:06 +0200 Subject: mlx5: Implement create_mr and destroy_mr Support create_mr and destroy_mr verbs. Creating ib_mr may be done for either ib_mr that will register regular page lists like alloc_fast_reg_mr routine, or indirect ib_mrs that can register other (pre-registered) ib_mrs in an indirect manner. In addition user may request signature enable, that will mean that the created ib_mr may be attached with signature attributes (BSF, PSVs). Currently we only allow direct/indirect registration modes. Signed-off-by: Sagi Grimberg Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx5/main.c | 2 + drivers/infiniband/hw/mlx5/mlx5_ib.h | 4 ++ drivers/infiniband/hw/mlx5/mr.c | 111 +++++++++++++++++++++++++++++++++++ 3 files changed, 117 insertions(+) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index aa03e73..7260a29 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1423,9 +1423,11 @@ static int init_one(struct pci_dev *pdev, dev->ib_dev.get_dma_mr = mlx5_ib_get_dma_mr; dev->ib_dev.reg_user_mr = mlx5_ib_reg_user_mr; dev->ib_dev.dereg_mr = mlx5_ib_dereg_mr; + dev->ib_dev.destroy_mr = mlx5_ib_destroy_mr; dev->ib_dev.attach_mcast = mlx5_ib_mcg_attach; dev->ib_dev.detach_mcast = mlx5_ib_mcg_detach; dev->ib_dev.process_mad = mlx5_ib_process_mad; + dev->ib_dev.create_mr = mlx5_ib_create_mr; dev->ib_dev.alloc_fast_reg_mr = mlx5_ib_alloc_fast_reg_mr; dev->ib_dev.alloc_fast_reg_page_list = mlx5_ib_alloc_fast_reg_page_list; dev->ib_dev.free_fast_reg_page_list = mlx5_ib_free_fast_reg_page_list; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 389e319..79c4f14 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -265,6 +265,7 @@ struct mlx5_ib_mr { enum ib_wc_status status; struct mlx5_ib_dev *dev; struct mlx5_create_mkey_mbox_out out; + struct mlx5_core_sig_ctx *sig; }; struct mlx5_ib_fast_reg_page_list { @@ -495,6 +496,9 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata); int mlx5_ib_dereg_mr(struct ib_mr *ibmr); +int mlx5_ib_destroy_mr(struct ib_mr *ibmr); +struct ib_mr *mlx5_ib_create_mr(struct ib_pd *pd, + struct ib_mr_init_attr *mr_init_attr); struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len); struct ib_fast_reg_page_list *mlx5_ib_alloc_fast_reg_page_list(struct ib_device *ibdev, diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 7c95ca1..032445c 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -992,6 +992,117 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr) return 0; } +struct ib_mr *mlx5_ib_create_mr(struct ib_pd *pd, + struct ib_mr_init_attr *mr_init_attr) +{ + struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct mlx5_create_mkey_mbox_in *in; + struct mlx5_ib_mr *mr; + int access_mode, err; + int ndescs = roundup(mr_init_attr->max_reg_descriptors, 4); + + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); + + in = kzalloc(sizeof(*in), GFP_KERNEL); + if (!in) { + err = -ENOMEM; + goto err_free; + } + + in->seg.status = 1 << 6; /* free */ + in->seg.xlt_oct_size = cpu_to_be32(ndescs); + in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); + in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn); + access_mode = MLX5_ACCESS_MODE_MTT; + + if (mr_init_attr->flags & IB_MR_SIGNATURE_EN) { + u32 psv_index[2]; + + in->seg.flags_pd = cpu_to_be32(be32_to_cpu(in->seg.flags_pd) | + MLX5_MKEY_BSF_EN); + in->seg.bsfs_octo_size = cpu_to_be32(MLX5_MKEY_BSF_OCTO_SIZE); + mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL); + if (!mr->sig) { + err = -ENOMEM; + goto err_free_in; + } + + /* create mem & wire PSVs */ + err = mlx5_core_create_psv(&dev->mdev, to_mpd(pd)->pdn, + 2, psv_index); + if (err) + goto err_free_sig; + + access_mode = MLX5_ACCESS_MODE_KLM; + mr->sig->psv_memory.psv_idx = psv_index[0]; + mr->sig->psv_wire.psv_idx = psv_index[1]; + } + + in->seg.flags = MLX5_PERM_UMR_EN | access_mode; + err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, sizeof(*in), + NULL, NULL, NULL); + if (err) + goto err_destroy_psv; + + mr->ibmr.lkey = mr->mmr.key; + mr->ibmr.rkey = mr->mmr.key; + mr->umem = NULL; + kfree(in); + + return &mr->ibmr; + +err_destroy_psv: + if (mr->sig) { + if (mlx5_core_destroy_psv(&dev->mdev, + mr->sig->psv_memory.psv_idx)) + mlx5_ib_warn(dev, "failed to destroy mem psv %d\n", + mr->sig->psv_memory.psv_idx); + if (mlx5_core_destroy_psv(&dev->mdev, + mr->sig->psv_wire.psv_idx)) + mlx5_ib_warn(dev, "failed to destroy wire psv %d\n", + mr->sig->psv_wire.psv_idx); + } +err_free_sig: + kfree(mr->sig); +err_free_in: + kfree(in); +err_free: + kfree(mr); + return ERR_PTR(err); +} + +int mlx5_ib_destroy_mr(struct ib_mr *ibmr) +{ + struct mlx5_ib_dev *dev = to_mdev(ibmr->device); + struct mlx5_ib_mr *mr = to_mmr(ibmr); + int err; + + if (mr->sig) { + if (mlx5_core_destroy_psv(&dev->mdev, + mr->sig->psv_memory.psv_idx)) + mlx5_ib_warn(dev, "failed to destroy mem psv %d\n", + mr->sig->psv_memory.psv_idx); + if (mlx5_core_destroy_psv(&dev->mdev, + mr->sig->psv_wire.psv_idx)) + mlx5_ib_warn(dev, "failed to destroy wire psv %d\n", + mr->sig->psv_wire.psv_idx); + kfree(mr->sig); + } + + err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr); + if (err) { + mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n", + mr->mmr.key, err); + return err; + } + + kfree(mr); + + return err; +} + struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len) { -- cgit v1.1 From e1e66cc26457c2e9412f67618646ec2a441fc409 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 23 Feb 2014 14:19:07 +0200 Subject: IB/mlx5: Initialize mlx5_ib_qp signature-related members If user requested signature enable we initialize relevant mlx5_ib_qp members. We mark the qp as sig_enable and we increase the effective SQ size, but still limit the user max_send_wr to original size computed. We also allow the create_qp routine to accept sig_enable create flag. Signed-off-by: Sagi Grimberg Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 3 +++ drivers/infiniband/hw/mlx5/qp.c | 12 +++++++++--- 2 files changed, 12 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 79c4f14..e438f08 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -189,6 +189,9 @@ struct mlx5_ib_qp { int create_type; u32 pa_lkey; + + /* Store signature errors */ + bool signature_en; }; struct mlx5_ib_cq_buf { diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 7dfe8a1..01999f3 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -256,8 +256,11 @@ static int calc_send_wqe(struct ib_qp_init_attr *attr) } size += attr->cap.max_send_sge * sizeof(struct mlx5_wqe_data_seg); - - return ALIGN(max_t(int, inl_size, size), MLX5_SEND_WQE_BB); + if (attr->create_flags & IB_QP_CREATE_SIGNATURE_EN && + ALIGN(max_t(int, inl_size, size), MLX5_SEND_WQE_BB) < MLX5_SIG_WQE_SIZE) + return MLX5_SIG_WQE_SIZE; + else + return ALIGN(max_t(int, inl_size, size), MLX5_SEND_WQE_BB); } static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr, @@ -284,6 +287,9 @@ static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr, sizeof(struct mlx5_wqe_inline_seg); attr->cap.max_inline_data = qp->max_inline_data; + if (attr->create_flags & IB_QP_CREATE_SIGNATURE_EN) + qp->signature_en = true; + wq_size = roundup_pow_of_two(attr->cap.max_send_wr * wqe_size); qp->sq.wqe_cnt = wq_size / MLX5_SEND_WQE_BB; if (qp->sq.wqe_cnt > dev->mdev.caps.max_wqes) { @@ -665,7 +671,7 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev, int err; uuari = &dev->mdev.priv.uuari; - if (init_attr->create_flags) + if (init_attr->create_flags & ~IB_QP_CREATE_SIGNATURE_EN) return -EINVAL; if (init_attr->qp_type == MLX5_IB_QPT_REG_UMR) -- cgit v1.1 From 6e5eadace1da7c57c1f10d55a484bda13c86dfed Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 23 Feb 2014 14:19:08 +0200 Subject: IB/mlx5: Break up wqe handling into begin & finish routines As a preliminary step for signature feature which will require posting multiple (3) WQEs for a single WR, we break post_send routine WQE indexing into begin and finish routines. This patch does not change any functionality. Signed-off-by: Sagi Grimberg Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx5/qp.c | 97 ++++++++++++++++++++++++++--------------- 1 file changed, 61 insertions(+), 36 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 01999f3..a029009 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -2047,6 +2047,59 @@ static u8 get_fence(u8 fence, struct ib_send_wr *wr) } } +static int begin_wqe(struct mlx5_ib_qp *qp, void **seg, + struct mlx5_wqe_ctrl_seg **ctrl, + struct ib_send_wr *wr, int *idx, + int *size, int nreq) +{ + int err = 0; + + if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq))) { + err = -ENOMEM; + return err; + } + + *idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1); + *seg = mlx5_get_send_wqe(qp, *idx); + *ctrl = *seg; + *(uint32_t *)(*seg + 8) = 0; + (*ctrl)->imm = send_ieth(wr); + (*ctrl)->fm_ce_se = qp->sq_signal_bits | + (wr->send_flags & IB_SEND_SIGNALED ? + MLX5_WQE_CTRL_CQ_UPDATE : 0) | + (wr->send_flags & IB_SEND_SOLICITED ? + MLX5_WQE_CTRL_SOLICITED : 0); + + *seg += sizeof(**ctrl); + *size = sizeof(**ctrl) / 16; + + return err; +} + +static void finish_wqe(struct mlx5_ib_qp *qp, + struct mlx5_wqe_ctrl_seg *ctrl, + u8 size, unsigned idx, u64 wr_id, + int nreq, u8 fence, u8 next_fence, + u32 mlx5_opcode) +{ + u8 opmod = 0; + + ctrl->opmod_idx_opcode = cpu_to_be32(((u32)(qp->sq.cur_post) << 8) | + mlx5_opcode | ((u32)opmod << 24)); + ctrl->qpn_ds = cpu_to_be32(size | (qp->mqp.qpn << 8)); + ctrl->fm_ce_se |= fence; + qp->fm_cache = next_fence; + if (unlikely(qp->wq_sig)) + ctrl->signature = wq_sig(ctrl); + + qp->sq.wrid[idx] = wr_id; + qp->sq.w_list[idx].opcode = mlx5_opcode; + qp->sq.wqe_head[idx] = qp->sq.head + nreq; + qp->sq.cur_post += DIV_ROUND_UP(size * 16, MLX5_SEND_WQE_BB); + qp->sq.w_list[idx].next = qp->sq.cur_post; +} + + int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr) { @@ -2060,7 +2113,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, int uninitialized_var(size); void *qend = qp->sq.qend; unsigned long flags; - u32 mlx5_opcode; unsigned idx; int err = 0; int inl = 0; @@ -2069,7 +2121,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, int nreq; int i; u8 next_fence = 0; - u8 opmod = 0; u8 fence; spin_lock_irqsave(&qp->sq.lock, flags); @@ -2082,36 +2133,23 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, goto out; } - if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq))) { + fence = qp->fm_cache; + num_sge = wr->num_sge; + if (unlikely(num_sge > qp->sq.max_gs)) { mlx5_ib_warn(dev, "\n"); err = -ENOMEM; *bad_wr = wr; goto out; } - fence = qp->fm_cache; - num_sge = wr->num_sge; - if (unlikely(num_sge > qp->sq.max_gs)) { + err = begin_wqe(qp, &seg, &ctrl, wr, &idx, &size, nreq); + if (err) { mlx5_ib_warn(dev, "\n"); err = -ENOMEM; *bad_wr = wr; goto out; } - idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1); - seg = mlx5_get_send_wqe(qp, idx); - ctrl = seg; - *(uint32_t *)(seg + 8) = 0; - ctrl->imm = send_ieth(wr); - ctrl->fm_ce_se = qp->sq_signal_bits | - (wr->send_flags & IB_SEND_SIGNALED ? - MLX5_WQE_CTRL_CQ_UPDATE : 0) | - (wr->send_flags & IB_SEND_SOLICITED ? - MLX5_WQE_CTRL_SOLICITED : 0); - - seg += sizeof(*ctrl); - size = sizeof(*ctrl) / 16; - switch (ibqp->qp_type) { case IB_QPT_XRC_INI: xrc = seg; @@ -2244,22 +2282,9 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, } } - mlx5_opcode = mlx5_ib_opcode[wr->opcode]; - ctrl->opmod_idx_opcode = cpu_to_be32(((u32)(qp->sq.cur_post) << 8) | - mlx5_opcode | - ((u32)opmod << 24)); - ctrl->qpn_ds = cpu_to_be32(size | (qp->mqp.qpn << 8)); - ctrl->fm_ce_se |= get_fence(fence, wr); - qp->fm_cache = next_fence; - if (unlikely(qp->wq_sig)) - ctrl->signature = wq_sig(ctrl); - - qp->sq.wrid[idx] = wr->wr_id; - qp->sq.w_list[idx].opcode = mlx5_opcode; - qp->sq.wqe_head[idx] = qp->sq.head + nreq; - qp->sq.cur_post += DIV_ROUND_UP(size * 16, MLX5_SEND_WQE_BB); - qp->sq.w_list[idx].next = qp->sq.cur_post; - + finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq, + get_fence(fence, wr), next_fence, + mlx5_ib_opcode[wr->opcode]); if (0) dump_wqe(qp, idx, size); } -- cgit v1.1 From 2ac45934f8700e0c2a579f6ee85a56c6e9ea89d5 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 23 Feb 2014 14:19:09 +0200 Subject: IB/mlx5: Remove MTT access mode from umr flags helper function get_umr_flags helper function might be used for types of access modes other than ACCESS_MODE_MTT, such as ACCESS_MODE_KLM. So remove it from helper, and callers will add their own access mode flag. This commit does not add/change functionality. Signed-off-by: Sagi Grimberg Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx5/qp.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index a029009..1dbadbf 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1832,7 +1832,7 @@ static u8 get_umr_flags(int acc) (acc & IB_ACCESS_REMOTE_WRITE ? MLX5_PERM_REMOTE_WRITE : 0) | (acc & IB_ACCESS_REMOTE_READ ? MLX5_PERM_REMOTE_READ : 0) | (acc & IB_ACCESS_LOCAL_WRITE ? MLX5_PERM_LOCAL_WRITE : 0) | - MLX5_PERM_LOCAL_READ | MLX5_PERM_UMR_EN | MLX5_ACCESS_MODE_MTT; + MLX5_PERM_LOCAL_READ | MLX5_PERM_UMR_EN; } static void set_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr, @@ -1844,7 +1844,8 @@ static void set_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr, return; } - seg->flags = get_umr_flags(wr->wr.fast_reg.access_flags); + seg->flags = get_umr_flags(wr->wr.fast_reg.access_flags) | + MLX5_ACCESS_MODE_MTT; *writ = seg->flags & (MLX5_PERM_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE); seg->qpn_mkey7_0 = cpu_to_be32((wr->wr.fast_reg.rkey & 0xff) | 0xffffff00); seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL); -- cgit v1.1 From e6631814fb3ac454fbbf47ea343c2b9508e4e1ba Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 23 Feb 2014 14:19:11 +0200 Subject: IB/mlx5: Support IB_WR_REG_SIG_MR This patch implements IB_WR_REG_SIG_MR posted by the user. Baisically this WR involves 3 WQEs in order to prepare and properly register the signature layout: 1. post UMR WR to register the sig_mr in one of two possible ways: * In case the user registered a single MR for data so the UMR data segment consists of: - single klm (data MR) passed by the user - BSF with signature attributes requested by the user. * In case the user registered 2 MRs, one for data and one for protection, the UMR consists of: - strided block format which includes data and protection MRs and their repetitive block format. - BSF with signature attributes requested by the user. 2. post SET_PSV in order to set the memory domain initial signature parameters passed by the user. SET_PSV is not signaled and solicited CQE. 3. post SET_PSV in order to set the wire domain initial signature parameters passed by the user. SET_PSV is not signaled and solicited CQE. * After this compound WR we place a small fence for next WR to come. This patch also introduces some helper functions to set the BSF correctly and determining the signature format selectors. Signed-off-by: Sagi Grimberg Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx5/qp.c | 422 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 422 insertions(+) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 1dbadbf..67e7998 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1777,6 +1777,26 @@ static __be64 frwr_mkey_mask(void) return cpu_to_be64(result); } +static __be64 sig_mkey_mask(void) +{ + u64 result; + + result = MLX5_MKEY_MASK_LEN | + MLX5_MKEY_MASK_PAGE_SIZE | + MLX5_MKEY_MASK_START_ADDR | + MLX5_MKEY_MASK_EN_RINVAL | + MLX5_MKEY_MASK_KEY | + MLX5_MKEY_MASK_LR | + MLX5_MKEY_MASK_LW | + MLX5_MKEY_MASK_RR | + MLX5_MKEY_MASK_RW | + MLX5_MKEY_MASK_SMALL_FENCE | + MLX5_MKEY_MASK_FREE | + MLX5_MKEY_MASK_BSF_EN; + + return cpu_to_be64(result); +} + static void set_frwr_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, struct ib_send_wr *wr, int li) { @@ -1961,6 +1981,339 @@ static int set_data_inl_seg(struct mlx5_ib_qp *qp, struct ib_send_wr *wr, return 0; } +static u16 prot_field_size(enum ib_signature_type type) +{ + switch (type) { + case IB_SIG_TYPE_T10_DIF: + return MLX5_DIF_SIZE; + default: + return 0; + } +} + +static u8 bs_selector(int block_size) +{ + switch (block_size) { + case 512: return 0x1; + case 520: return 0x2; + case 4096: return 0x3; + case 4160: return 0x4; + case 1073741824: return 0x5; + default: return 0; + } +} + +static int format_selector(struct ib_sig_attrs *attr, + struct ib_sig_domain *domain, + int *selector) +{ + +#define FORMAT_DIF_NONE 0 +#define FORMAT_DIF_CRC_INC 8 +#define FORMAT_DIF_CRC_NO_INC 12 +#define FORMAT_DIF_CSUM_INC 13 +#define FORMAT_DIF_CSUM_NO_INC 14 + + switch (domain->sig.dif.type) { + case IB_T10DIF_NONE: + /* No DIF */ + *selector = FORMAT_DIF_NONE; + break; + case IB_T10DIF_TYPE1: /* Fall through */ + case IB_T10DIF_TYPE2: + switch (domain->sig.dif.bg_type) { + case IB_T10DIF_CRC: + *selector = FORMAT_DIF_CRC_INC; + break; + case IB_T10DIF_CSUM: + *selector = FORMAT_DIF_CSUM_INC; + break; + default: + return 1; + } + break; + case IB_T10DIF_TYPE3: + switch (domain->sig.dif.bg_type) { + case IB_T10DIF_CRC: + *selector = domain->sig.dif.type3_inc_reftag ? + FORMAT_DIF_CRC_INC : + FORMAT_DIF_CRC_NO_INC; + break; + case IB_T10DIF_CSUM: + *selector = domain->sig.dif.type3_inc_reftag ? + FORMAT_DIF_CSUM_INC : + FORMAT_DIF_CSUM_NO_INC; + break; + default: + return 1; + } + break; + default: + return 1; + } + + return 0; +} + +static int mlx5_set_bsf(struct ib_mr *sig_mr, + struct ib_sig_attrs *sig_attrs, + struct mlx5_bsf *bsf, u32 data_size) +{ + struct mlx5_core_sig_ctx *msig = to_mmr(sig_mr)->sig; + struct mlx5_bsf_basic *basic = &bsf->basic; + struct ib_sig_domain *mem = &sig_attrs->mem; + struct ib_sig_domain *wire = &sig_attrs->wire; + int ret, selector; + + switch (sig_attrs->mem.sig_type) { + case IB_SIG_TYPE_T10_DIF: + if (sig_attrs->wire.sig_type != IB_SIG_TYPE_T10_DIF) + return -EINVAL; + + /* Input domain check byte mask */ + basic->check_byte_mask = sig_attrs->check_mask; + if (mem->sig.dif.pi_interval == wire->sig.dif.pi_interval && + mem->sig.dif.type == wire->sig.dif.type) { + /* Same block structure */ + basic->bsf_size_sbs = 1 << 4; + if (mem->sig.dif.bg_type == wire->sig.dif.bg_type) + basic->wire.copy_byte_mask = 0xff; + else + basic->wire.copy_byte_mask = 0x3f; + } else + basic->wire.bs_selector = bs_selector(wire->sig.dif.pi_interval); + + basic->mem.bs_selector = bs_selector(mem->sig.dif.pi_interval); + basic->raw_data_size = cpu_to_be32(data_size); + + ret = format_selector(sig_attrs, mem, &selector); + if (ret) + return -EINVAL; + basic->m_bfs_psv = cpu_to_be32(selector << 24 | + msig->psv_memory.psv_idx); + + ret = format_selector(sig_attrs, wire, &selector); + if (ret) + return -EINVAL; + basic->w_bfs_psv = cpu_to_be32(selector << 24 | + msig->psv_wire.psv_idx); + break; + + default: + return -EINVAL; + } + + return 0; +} + +static int set_sig_data_segment(struct ib_send_wr *wr, struct mlx5_ib_qp *qp, + void **seg, int *size) +{ + struct ib_sig_attrs *sig_attrs = wr->wr.sig_handover.sig_attrs; + struct ib_mr *sig_mr = wr->wr.sig_handover.sig_mr; + struct mlx5_bsf *bsf; + u32 data_len = wr->sg_list->length; + u32 data_key = wr->sg_list->lkey; + u64 data_va = wr->sg_list->addr; + int ret; + int wqe_size; + + if (!wr->wr.sig_handover.prot) { + /** + * Source domain doesn't contain signature information + * So need construct: + * ------------------ + * | data_klm | + * ------------------ + * | BSF | + * ------------------ + **/ + struct mlx5_klm *data_klm = *seg; + + data_klm->bcount = cpu_to_be32(data_len); + data_klm->key = cpu_to_be32(data_key); + data_klm->va = cpu_to_be64(data_va); + wqe_size = ALIGN(sizeof(*data_klm), 64); + } else { + /** + * Source domain contains signature information + * So need construct a strided block format: + * --------------------------- + * | stride_block_ctrl | + * --------------------------- + * | data_klm | + * --------------------------- + * | prot_klm | + * --------------------------- + * | BSF | + * --------------------------- + **/ + struct mlx5_stride_block_ctrl_seg *sblock_ctrl; + struct mlx5_stride_block_entry *data_sentry; + struct mlx5_stride_block_entry *prot_sentry; + u32 prot_key = wr->wr.sig_handover.prot->lkey; + u64 prot_va = wr->wr.sig_handover.prot->addr; + u16 block_size = sig_attrs->mem.sig.dif.pi_interval; + int prot_size; + + sblock_ctrl = *seg; + data_sentry = (void *)sblock_ctrl + sizeof(*sblock_ctrl); + prot_sentry = (void *)data_sentry + sizeof(*data_sentry); + + prot_size = prot_field_size(sig_attrs->mem.sig_type); + if (!prot_size) { + pr_err("Bad block size given: %u\n", block_size); + return -EINVAL; + } + sblock_ctrl->bcount_per_cycle = cpu_to_be32(block_size + + prot_size); + sblock_ctrl->op = cpu_to_be32(MLX5_STRIDE_BLOCK_OP); + sblock_ctrl->repeat_count = cpu_to_be32(data_len / block_size); + sblock_ctrl->num_entries = cpu_to_be16(2); + + data_sentry->bcount = cpu_to_be16(block_size); + data_sentry->key = cpu_to_be32(data_key); + data_sentry->va = cpu_to_be64(data_va); + prot_sentry->bcount = cpu_to_be16(prot_size); + prot_sentry->key = cpu_to_be32(prot_key); + + if (prot_key == data_key && prot_va == data_va) { + /** + * The data and protection are interleaved + * in a single memory region + **/ + prot_sentry->va = cpu_to_be64(data_va + block_size); + prot_sentry->stride = cpu_to_be16(block_size + prot_size); + data_sentry->stride = prot_sentry->stride; + } else { + /* The data and protection are two different buffers */ + prot_sentry->va = cpu_to_be64(prot_va); + data_sentry->stride = cpu_to_be16(block_size); + prot_sentry->stride = cpu_to_be16(prot_size); + } + wqe_size = ALIGN(sizeof(*sblock_ctrl) + sizeof(*data_sentry) + + sizeof(*prot_sentry), 64); + } + + *seg += wqe_size; + *size += wqe_size / 16; + if (unlikely((*seg == qp->sq.qend))) + *seg = mlx5_get_send_wqe(qp, 0); + + bsf = *seg; + ret = mlx5_set_bsf(sig_mr, sig_attrs, bsf, data_len); + if (ret) + return -EINVAL; + + *seg += sizeof(*bsf); + *size += sizeof(*bsf) / 16; + if (unlikely((*seg == qp->sq.qend))) + *seg = mlx5_get_send_wqe(qp, 0); + + return 0; +} + +static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg, + struct ib_send_wr *wr, u32 nelements, + u32 length, u32 pdn) +{ + struct ib_mr *sig_mr = wr->wr.sig_handover.sig_mr; + u32 sig_key = sig_mr->rkey; + + memset(seg, 0, sizeof(*seg)); + + seg->flags = get_umr_flags(wr->wr.sig_handover.access_flags) | + MLX5_ACCESS_MODE_KLM; + seg->qpn_mkey7_0 = cpu_to_be32((sig_key & 0xff) | 0xffffff00); + seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL | + MLX5_MKEY_BSF_EN | pdn); + seg->len = cpu_to_be64(length); + seg->xlt_oct_size = cpu_to_be32(be16_to_cpu(get_klm_octo(nelements))); + seg->bsfs_octo_size = cpu_to_be32(MLX5_MKEY_BSF_OCTO_SIZE); +} + +static void set_sig_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, + struct ib_send_wr *wr, u32 nelements) +{ + memset(umr, 0, sizeof(*umr)); + + umr->flags = MLX5_FLAGS_INLINE | MLX5_FLAGS_CHECK_FREE; + umr->klm_octowords = get_klm_octo(nelements); + umr->bsf_octowords = cpu_to_be16(MLX5_MKEY_BSF_OCTO_SIZE); + umr->mkey_mask = sig_mkey_mask(); +} + + +static int set_sig_umr_wr(struct ib_send_wr *wr, struct mlx5_ib_qp *qp, + void **seg, int *size) +{ + struct mlx5_ib_mr *sig_mr = to_mmr(wr->wr.sig_handover.sig_mr); + u32 pdn = get_pd(qp)->pdn; + u32 klm_oct_size; + int region_len, ret; + + if (unlikely(wr->num_sge != 1) || + unlikely(wr->wr.sig_handover.access_flags & + IB_ACCESS_REMOTE_ATOMIC) || + unlikely(!sig_mr->sig) || unlikely(!qp->signature_en)) + return -EINVAL; + + /* length of the protected region, data + protection */ + region_len = wr->sg_list->length; + if (wr->wr.sig_handover.prot) + region_len += wr->wr.sig_handover.prot->length; + + /** + * KLM octoword size - if protection was provided + * then we use strided block format (3 octowords), + * else we use single KLM (1 octoword) + **/ + klm_oct_size = wr->wr.sig_handover.prot ? 3 : 1; + + set_sig_umr_segment(*seg, wr, klm_oct_size); + *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); + *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; + if (unlikely((*seg == qp->sq.qend))) + *seg = mlx5_get_send_wqe(qp, 0); + + set_sig_mkey_segment(*seg, wr, klm_oct_size, region_len, pdn); + *seg += sizeof(struct mlx5_mkey_seg); + *size += sizeof(struct mlx5_mkey_seg) / 16; + if (unlikely((*seg == qp->sq.qend))) + *seg = mlx5_get_send_wqe(qp, 0); + + ret = set_sig_data_segment(wr, qp, seg, size); + if (ret) + return ret; + + return 0; +} + +static int set_psv_wr(struct ib_sig_domain *domain, + u32 psv_idx, void **seg, int *size) +{ + struct mlx5_seg_set_psv *psv_seg = *seg; + + memset(psv_seg, 0, sizeof(*psv_seg)); + psv_seg->psv_num = cpu_to_be32(psv_idx); + switch (domain->sig_type) { + case IB_SIG_TYPE_T10_DIF: + psv_seg->transient_sig = cpu_to_be32(domain->sig.dif.bg << 16 | + domain->sig.dif.app_tag); + psv_seg->ref_tag = cpu_to_be32(domain->sig.dif.ref_tag); + + *seg += sizeof(*psv_seg); + *size += sizeof(*psv_seg) / 16; + break; + + default: + pr_err("Bad signature type given.\n"); + return 1; + } + + return 0; +} + static int set_frwr_li_wr(void **seg, struct ib_send_wr *wr, int *size, struct mlx5_core_dev *mdev, struct mlx5_ib_pd *pd, struct mlx5_ib_qp *qp) { @@ -2108,6 +2461,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct mlx5_ib_dev *dev = to_mdev(ibqp->device); struct mlx5_core_dev *mdev = &dev->mdev; struct mlx5_ib_qp *qp = to_mqp(ibqp); + struct mlx5_ib_mr *mr; struct mlx5_wqe_data_seg *dpseg; struct mlx5_wqe_xrc_seg *xrc; struct mlx5_bf *bf = qp->bf; @@ -2203,6 +2557,73 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, num_sge = 0; break; + case IB_WR_REG_SIG_MR: + qp->sq.wr_data[idx] = IB_WR_REG_SIG_MR; + mr = to_mmr(wr->wr.sig_handover.sig_mr); + + ctrl->imm = cpu_to_be32(mr->ibmr.rkey); + err = set_sig_umr_wr(wr, qp, &seg, &size); + if (err) { + mlx5_ib_warn(dev, "\n"); + *bad_wr = wr; + goto out; + } + + finish_wqe(qp, ctrl, size, idx, wr->wr_id, + nreq, get_fence(fence, wr), + next_fence, MLX5_OPCODE_UMR); + /* + * SET_PSV WQEs are not signaled and solicited + * on error + */ + wr->send_flags &= ~IB_SEND_SIGNALED; + wr->send_flags |= IB_SEND_SOLICITED; + err = begin_wqe(qp, &seg, &ctrl, wr, + &idx, &size, nreq); + if (err) { + mlx5_ib_warn(dev, "\n"); + err = -ENOMEM; + *bad_wr = wr; + goto out; + } + + err = set_psv_wr(&wr->wr.sig_handover.sig_attrs->mem, + mr->sig->psv_memory.psv_idx, &seg, + &size); + if (err) { + mlx5_ib_warn(dev, "\n"); + *bad_wr = wr; + goto out; + } + + finish_wqe(qp, ctrl, size, idx, wr->wr_id, + nreq, get_fence(fence, wr), + next_fence, MLX5_OPCODE_SET_PSV); + err = begin_wqe(qp, &seg, &ctrl, wr, + &idx, &size, nreq); + if (err) { + mlx5_ib_warn(dev, "\n"); + err = -ENOMEM; + *bad_wr = wr; + goto out; + } + + next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; + err = set_psv_wr(&wr->wr.sig_handover.sig_attrs->wire, + mr->sig->psv_wire.psv_idx, &seg, + &size); + if (err) { + mlx5_ib_warn(dev, "\n"); + *bad_wr = wr; + goto out; + } + + finish_wqe(qp, ctrl, size, idx, wr->wr_id, + nreq, get_fence(fence, wr), + next_fence, MLX5_OPCODE_SET_PSV); + num_sge = 0; + goto skip_psv; + default: break; } @@ -2286,6 +2707,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq, get_fence(fence, wr), next_fence, mlx5_ib_opcode[wr->opcode]); +skip_psv: if (0) dump_wqe(qp, idx, size); } -- cgit v1.1 From d5436ba01075ef4629015f7a00914d64ffd795d6 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 23 Feb 2014 14:19:12 +0200 Subject: IB/mlx5: Collect signature error completion This commit takes care of the generated signature error CQE generated by the HW (if happened). The underlying mlx5 driver will handle signature error completions and will mark the relevant memory region as dirty. Once the consumer gets the completion for the transaction, it must check for signature errors on signature memory region using a new lightweight verb ib_check_mr_status(). In case the user doesn't check for signature error (i.e. doesn't call ib_check_mr_status() with status check IB_MR_CHECK_SIG_STATUS), the memory region cannot be used for another signature operation (REG_SIG_MR work request will fail). Signed-off-by: Sagi Grimberg Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx5/cq.c | 62 ++++++++++++++++++++++++++++++++++++ drivers/infiniband/hw/mlx5/main.c | 1 + drivers/infiniband/hw/mlx5/mlx5_ib.h | 7 ++++ drivers/infiniband/hw/mlx5/mr.c | 46 ++++++++++++++++++++++++++ drivers/infiniband/hw/mlx5/qp.c | 8 +++-- 5 files changed, 122 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index b1705ce..62bb6b4 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -366,6 +366,38 @@ static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf) mlx5_buf_free(&dev->mdev, &buf->buf); } +static void get_sig_err_item(struct mlx5_sig_err_cqe *cqe, + struct ib_sig_err *item) +{ + u16 syndrome = be16_to_cpu(cqe->syndrome); + +#define GUARD_ERR (1 << 13) +#define APPTAG_ERR (1 << 12) +#define REFTAG_ERR (1 << 11) + + if (syndrome & GUARD_ERR) { + item->err_type = IB_SIG_BAD_GUARD; + item->expected = be32_to_cpu(cqe->expected_trans_sig) >> 16; + item->actual = be32_to_cpu(cqe->actual_trans_sig) >> 16; + } else + if (syndrome & REFTAG_ERR) { + item->err_type = IB_SIG_BAD_REFTAG; + item->expected = be32_to_cpu(cqe->expected_reftag); + item->actual = be32_to_cpu(cqe->actual_reftag); + } else + if (syndrome & APPTAG_ERR) { + item->err_type = IB_SIG_BAD_APPTAG; + item->expected = be32_to_cpu(cqe->expected_trans_sig) & 0xffff; + item->actual = be32_to_cpu(cqe->actual_trans_sig) & 0xffff; + } else { + pr_err("Got signature completion error with bad syndrome %04x\n", + syndrome); + } + + item->sig_err_offset = be64_to_cpu(cqe->err_offset); + item->key = be32_to_cpu(cqe->mkey); +} + static int mlx5_poll_one(struct mlx5_ib_cq *cq, struct mlx5_ib_qp **cur_qp, struct ib_wc *wc) @@ -375,6 +407,9 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq, struct mlx5_cqe64 *cqe64; struct mlx5_core_qp *mqp; struct mlx5_ib_wq *wq; + struct mlx5_sig_err_cqe *sig_err_cqe; + struct mlx5_core_mr *mmr; + struct mlx5_ib_mr *mr; uint8_t opcode; uint32_t qpn; u16 wqe_ctr; @@ -475,6 +510,33 @@ repoll: } } break; + case MLX5_CQE_SIG_ERR: + sig_err_cqe = (struct mlx5_sig_err_cqe *)cqe64; + + read_lock(&dev->mdev.priv.mr_table.lock); + mmr = __mlx5_mr_lookup(&dev->mdev, + mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey))); + if (unlikely(!mmr)) { + read_unlock(&dev->mdev.priv.mr_table.lock); + mlx5_ib_warn(dev, "CQE@CQ %06x for unknown MR %6x\n", + cq->mcq.cqn, be32_to_cpu(sig_err_cqe->mkey)); + return -EINVAL; + } + + mr = to_mibmr(mmr); + get_sig_err_item(sig_err_cqe, &mr->sig->err_item); + mr->sig->sig_err_exists = true; + mr->sig->sigerr_count++; + + mlx5_ib_warn(dev, "CQN: 0x%x Got SIGERR on key: 0x%x err_type %x err_offset %llx expected %x actual %x\n", + cq->mcq.cqn, mr->sig->err_item.key, + mr->sig->err_item.err_type, + mr->sig->err_item.sig_err_offset, + mr->sig->err_item.expected, + mr->sig->err_item.actual); + + read_unlock(&dev->mdev.priv.mr_table.lock); + goto repoll; } return 0; diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 7260a29..ba3ecec 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1431,6 +1431,7 @@ static int init_one(struct pci_dev *pdev, dev->ib_dev.alloc_fast_reg_mr = mlx5_ib_alloc_fast_reg_mr; dev->ib_dev.alloc_fast_reg_page_list = mlx5_ib_alloc_fast_reg_page_list; dev->ib_dev.free_fast_reg_page_list = mlx5_ib_free_fast_reg_page_list; + dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status; if (mdev->caps.flags & MLX5_DEV_CAP_FLAG_XRC) { dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index e438f08..5054158 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -400,6 +400,11 @@ static inline struct mlx5_ib_qp *to_mibqp(struct mlx5_core_qp *mqp) return container_of(mqp, struct mlx5_ib_qp, mqp); } +static inline struct mlx5_ib_mr *to_mibmr(struct mlx5_core_mr *mmr) +{ + return container_of(mmr, struct mlx5_ib_mr, mmr); +} + static inline struct mlx5_ib_pd *to_mpd(struct ib_pd *ibpd) { return container_of(ibpd, struct mlx5_ib_pd, ibpd); @@ -537,6 +542,8 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev); int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev); int mlx5_mr_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift); void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context); +int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, + struct ib_mr_status *mr_status); static inline void init_query_mad(struct ib_smp *mad) { diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 032445c..81392b2 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1038,6 +1038,11 @@ struct ib_mr *mlx5_ib_create_mr(struct ib_pd *pd, access_mode = MLX5_ACCESS_MODE_KLM; mr->sig->psv_memory.psv_idx = psv_index[0]; mr->sig->psv_wire.psv_idx = psv_index[1]; + + mr->sig->sig_status_checked = true; + mr->sig->sig_err_exists = false; + /* Next UMR, Arm SIGERR */ + ++mr->sig->sigerr_count; } in->seg.flags = MLX5_PERM_UMR_EN | access_mode; @@ -1188,3 +1193,44 @@ void mlx5_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list) kfree(mfrpl->ibfrpl.page_list); kfree(mfrpl); } + +int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, + struct ib_mr_status *mr_status) +{ + struct mlx5_ib_mr *mmr = to_mmr(ibmr); + int ret = 0; + + if (check_mask & ~IB_MR_CHECK_SIG_STATUS) { + pr_err("Invalid status check mask\n"); + ret = -EINVAL; + goto done; + } + + mr_status->fail_status = 0; + if (check_mask & IB_MR_CHECK_SIG_STATUS) { + if (!mmr->sig) { + ret = -EINVAL; + pr_err("signature status check requested on a non-signature enabled MR\n"); + goto done; + } + + mmr->sig->sig_status_checked = true; + if (!mmr->sig->sig_err_exists) + goto done; + + if (ibmr->lkey == mmr->sig->err_item.key) + memcpy(&mr_status->sig_err, &mmr->sig->err_item, + sizeof(mr_status->sig_err)); + else { + mr_status->sig_err.err_type = IB_SIG_BAD_GUARD; + mr_status->sig_err.sig_err_offset = 0; + mr_status->sig_err.key = mmr->sig->err_item.key; + } + + mmr->sig->sig_err_exists = false; + mr_status->fail_status |= IB_MR_CHECK_SIG_STATUS; + } + +done: + return ret; +} diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 67e7998..ae788d27 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1784,6 +1784,7 @@ static __be64 sig_mkey_mask(void) result = MLX5_MKEY_MASK_LEN | MLX5_MKEY_MASK_PAGE_SIZE | MLX5_MKEY_MASK_START_ADDR | + MLX5_MKEY_MASK_EN_SIGERR | MLX5_MKEY_MASK_EN_RINVAL | MLX5_MKEY_MASK_KEY | MLX5_MKEY_MASK_LR | @@ -2219,13 +2220,14 @@ static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg, { struct ib_mr *sig_mr = wr->wr.sig_handover.sig_mr; u32 sig_key = sig_mr->rkey; + u8 sigerr = to_mmr(sig_mr)->sig->sigerr_count & 1; memset(seg, 0, sizeof(*seg)); seg->flags = get_umr_flags(wr->wr.sig_handover.access_flags) | MLX5_ACCESS_MODE_KLM; seg->qpn_mkey7_0 = cpu_to_be32((sig_key & 0xff) | 0xffffff00); - seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL | + seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL | sigerr << 26 | MLX5_MKEY_BSF_EN | pdn); seg->len = cpu_to_be64(length); seg->xlt_oct_size = cpu_to_be32(be16_to_cpu(get_klm_octo(nelements))); @@ -2255,7 +2257,8 @@ static int set_sig_umr_wr(struct ib_send_wr *wr, struct mlx5_ib_qp *qp, if (unlikely(wr->num_sge != 1) || unlikely(wr->wr.sig_handover.access_flags & IB_ACCESS_REMOTE_ATOMIC) || - unlikely(!sig_mr->sig) || unlikely(!qp->signature_en)) + unlikely(!sig_mr->sig) || unlikely(!qp->signature_en) || + unlikely(!sig_mr->sig->sig_status_checked)) return -EINVAL; /* length of the protected region, data + protection */ @@ -2286,6 +2289,7 @@ static int set_sig_umr_wr(struct ib_send_wr *wr, struct mlx5_ib_qp *qp, if (ret) return ret; + sig_mr->sig->sig_status_checked = false; return 0; } -- cgit v1.1 From 2dea909444c294f55316c068906945ef38980ef3 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 23 Feb 2014 14:19:13 +0200 Subject: IB/mlx5: Expose support for signature MR feature Currently support only T10-DIF types of signature handover operations (types 1|2|3). Signed-off-by: Sagi Grimberg Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx5/main.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index ba3ecec..7b9c078 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -273,6 +273,15 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, if (flags & MLX5_DEV_CAP_FLAG_XRC) props->device_cap_flags |= IB_DEVICE_XRC; props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; + if (flags & MLX5_DEV_CAP_FLAG_SIG_HAND_OVER) { + props->device_cap_flags |= IB_DEVICE_SIGNATURE_HANDOVER; + /* At this stage no support for signature handover */ + props->sig_prot_cap = IB_PROT_T10DIF_TYPE_1 | + IB_PROT_T10DIF_TYPE_2 | + IB_PROT_T10DIF_TYPE_3; + props->sig_guard_cap = IB_GUARD_T10DIF_CRC | + IB_GUARD_T10DIF_CSUM; + } props->vendor_id = be32_to_cpup((__be32 *)(out_mad->data + 36)) & 0xffffff; -- cgit v1.1 From 4ac79a70033cbb9b7fa2bb2f3770308359af8c12 Mon Sep 17 00:00:00 2001 From: Tatyana Nikolova Date: Tue, 11 Mar 2014 14:17:25 -0500 Subject: RDMA/nes: Fixes for IRD/ORD negotiation with MPA v2 Fixes to enable the negotiation of the supported IRD/ORD sizes with the peer when exchanging MPA v2 messages in connection establishment. Signed-off-by: Tatyana Nikolova Signed-off-by: Roland Dreier --- drivers/infiniband/hw/nes/nes_cm.c | 119 ++++++++++++++++++++++++++++--------- drivers/infiniband/hw/nes/nes_cm.h | 3 + 2 files changed, 93 insertions(+), 29 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index 9c9f2f5..e852a3a 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -128,6 +128,7 @@ static void build_mpa_v1(struct nes_cm_node *, void *, u8); static void build_rdma0_msg(struct nes_cm_node *, struct nes_qp **); static void print_core(struct nes_cm_core *core); +static void record_ird_ord(struct nes_cm_node *, u16, u16); /* External CM API Interface */ /* instance of function pointers for client API */ @@ -317,7 +318,6 @@ static int parse_mpa(struct nes_cm_node *cm_node, u8 *buffer, u32 *type, } } - if (priv_data_len + mpa_hdr_len != len) { nes_debug(NES_DBG_CM, "The received ietf buffer was not right" " complete (%x + %x != %x)\n", @@ -356,25 +356,57 @@ static int parse_mpa(struct nes_cm_node *cm_node, u8 *buffer, u32 *type, /* send reset */ return -EINVAL; } + if (ird_size == IETF_NO_IRD_ORD || ord_size == IETF_NO_IRD_ORD) + cm_node->mpav2_ird_ord = IETF_NO_IRD_ORD; - if (cm_node->state != NES_CM_STATE_MPAREQ_SENT) { + if (cm_node->mpav2_ird_ord != IETF_NO_IRD_ORD) { /* responder */ - if (cm_node->ord_size > ird_size) - cm_node->ord_size = ird_size; - } else { - /* initiator */ - if (cm_node->ord_size > ird_size) - cm_node->ord_size = ird_size; - - if (cm_node->ird_size < ord_size) { - /* no resources available */ - /* send terminate message */ - return -EINVAL; + if (cm_node->state != NES_CM_STATE_MPAREQ_SENT) { + /* we are still negotiating */ + if (ord_size > NES_MAX_IRD) { + cm_node->ird_size = NES_MAX_IRD; + } else { + cm_node->ird_size = ord_size; + if (ord_size == 0 && + (rtr_ctrl_ord & IETF_RDMA0_READ)) { + cm_node->ird_size = 1; + nes_debug(NES_DBG_CM, + "%s: Remote peer doesn't support RDMA0_READ (ord=%u)\n", + __func__, ord_size); + } + } + if (ird_size > NES_MAX_ORD) + cm_node->ord_size = NES_MAX_ORD; + else + cm_node->ord_size = ird_size; + } else { /* initiator */ + if (ord_size > NES_MAX_IRD) { + nes_debug(NES_DBG_CM, + "%s: Unable to support the requested (ord =%u)\n", + __func__, ord_size); + return -EINVAL; + } + cm_node->ird_size = ord_size; + + if (ird_size > NES_MAX_ORD) { + cm_node->ord_size = NES_MAX_ORD; + } else { + if (ird_size == 0 && + (rtr_ctrl_ord & IETF_RDMA0_READ)) { + nes_debug(NES_DBG_CM, + "%s: Remote peer doesn't support RDMA0_READ (ird=%u)\n", + __func__, ird_size); + return -EINVAL; + } else { + cm_node->ord_size = ird_size; + } + } } } if (rtr_ctrl_ord & IETF_RDMA0_READ) { cm_node->send_rdma0_op = SEND_RDMA_READ_ZERO; + } else if (rtr_ctrl_ord & IETF_RDMA0_WRITE) { cm_node->send_rdma0_op = SEND_RDMA_WRITE_ZERO; } else { /* Not supported RDMA0 operation */ @@ -514,6 +546,19 @@ static void print_core(struct nes_cm_core *core) nes_debug(NES_DBG_CM, "-------------- end core ---------------\n"); } +static void record_ird_ord(struct nes_cm_node *cm_node, + u16 conn_ird, u16 conn_ord) +{ + if (conn_ird > NES_MAX_IRD) + conn_ird = NES_MAX_IRD; + + if (conn_ord > NES_MAX_ORD) + conn_ord = NES_MAX_ORD; + + cm_node->ird_size = conn_ird; + cm_node->ord_size = conn_ord; +} + /** * cm_build_mpa_frame - build a MPA V1 frame or MPA V2 frame */ @@ -557,11 +602,13 @@ static void build_mpa_v2(struct nes_cm_node *cm_node, mpa_frame->priv_data_len += htons(IETF_RTR_MSG_SIZE); /* initialize RTR msg */ - ctrl_ird = (cm_node->ird_size > IETF_NO_IRD_ORD) ? - IETF_NO_IRD_ORD : cm_node->ird_size; - ctrl_ord = (cm_node->ord_size > IETF_NO_IRD_ORD) ? - IETF_NO_IRD_ORD : cm_node->ord_size; - + if (cm_node->mpav2_ird_ord == IETF_NO_IRD_ORD) { + ctrl_ird = IETF_NO_IRD_ORD; + ctrl_ord = IETF_NO_IRD_ORD; + } else { + ctrl_ird = cm_node->ird_size & IETF_NO_IRD_ORD; + ctrl_ord = cm_node->ord_size & IETF_NO_IRD_ORD; + } ctrl_ird |= IETF_PEER_TO_PEER; ctrl_ird |= IETF_FLPDU_ZERO_LEN; @@ -1409,8 +1456,9 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core, cm_node->mpa_frame_rev = mpa_version; cm_node->send_rdma0_op = SEND_RDMA_READ_ZERO; - cm_node->ird_size = IETF_NO_IRD_ORD; - cm_node->ord_size = IETF_NO_IRD_ORD; + cm_node->mpav2_ird_ord = 0; + cm_node->ird_size = 0; + cm_node->ord_size = 0; nes_debug(NES_DBG_CM, "Make node addresses : loc = %pI4:%x, rem = %pI4:%x\n", &cm_node->loc_addr, cm_node->loc_port, @@ -3027,11 +3075,11 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) rem_ref_cm_node(cm_node->cm_core, cm_node); return -ECONNRESET; } - /* associate the node with the QP */ nesqp->cm_node = (void *)cm_node; cm_node->nesqp = nesqp; + nes_debug(NES_DBG_CM, "QP%u, cm_node=%p, jiffies = %lu listener = %p\n", nesqp->hwqp.qp_id, cm_node, jiffies, cm_node->listener); atomic_inc(&cm_accepts); @@ -3054,6 +3102,11 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) if (cm_node->mpa_frame_rev == IETF_MPA_V1) mpa_frame_offset = 4; + if (cm_node->mpa_frame_rev == IETF_MPA_V1 || + cm_node->mpav2_ird_ord == IETF_NO_IRD_ORD) { + record_ird_ord(cm_node, (u16)conn_param->ird, (u16)conn_param->ord); + } + memcpy(mpa_v2_frame->priv_data, conn_param->private_data, conn_param->private_data_len); @@ -3117,7 +3170,6 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) } nesqp->skip_lsmm = 1; - /* Cache the cm_id in the qp */ nesqp->cm_id = cm_id; cm_node->cm_id = cm_id; @@ -3154,7 +3206,7 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32( ((u32)1 << NES_QPCONTEXT_ORDIRD_IWARP_MODE_SHIFT)); nesqp->nesqp_context->ird_ord_sizes |= - cpu_to_le32((u32)conn_param->ord); + cpu_to_le32((u32)cm_node->ord_size); memset(&nes_quad, 0, sizeof(nes_quad)); nes_quad.DstIpAdrIndex = @@ -3194,6 +3246,9 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) cm_event.remote_addr = cm_id->remote_addr; cm_event.private_data = NULL; cm_event.private_data_len = 0; + cm_event.ird = cm_node->ird_size; + cm_event.ord = cm_node->ord_size; + ret = cm_id->event_handler(cm_id, &cm_event); attr.qp_state = IB_QPS_RTS; nes_modify_qp(&nesqp->ibqp, &attr, IB_QP_STATE, NULL); @@ -3290,14 +3345,8 @@ int nes_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) /* cache the cm_id in the qp */ nesqp->cm_id = cm_id; - cm_id->provider_data = nesqp; - nesqp->private_data_len = conn_param->private_data_len; - nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32((u32)conn_param->ord); - /* space for rdma0 read msg */ - if (conn_param->ord == 0) - nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32(1); nes_debug(NES_DBG_CM, "requested ord = 0x%08X.\n", (u32)conn_param->ord); nes_debug(NES_DBG_CM, "mpa private data len =%u\n", @@ -3334,6 +3383,11 @@ int nes_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) return -ENOMEM; } + record_ird_ord(cm_node, (u16)conn_param->ird, (u16)conn_param->ord); + if (cm_node->send_rdma0_op == SEND_RDMA_READ_ZERO && + cm_node->ord_size == 0) + cm_node->ord_size = 1; + cm_node->apbvt_set = apbvt_set; nesqp->cm_node = cm_node; cm_node->nesqp = nesqp; @@ -3530,6 +3584,8 @@ static void cm_event_connected(struct nes_cm_event *event) nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32((u32)1 << NES_QPCONTEXT_ORDIRD_IWARP_MODE_SHIFT); + nesqp->nesqp_context->ird_ord_sizes |= + cpu_to_le32((u32)cm_node->ord_size); /* Adjust tail for not having a LSMM */ /*nesqp->hwqp.sq_tail = 1;*/ @@ -3742,8 +3798,13 @@ static void cm_event_mpa_req(struct nes_cm_event *event) cm_event_raddr->sin_addr.s_addr = htonl(event->cm_info.rem_addr); cm_event.private_data = cm_node->mpa_frame_buf; cm_event.private_data_len = (u8)cm_node->mpa_frame_size; + if (cm_node->mpa_frame_rev == IETF_MPA_V1) { + cm_event.ird = NES_MAX_IRD; + cm_event.ord = NES_MAX_ORD; + } else { cm_event.ird = cm_node->ird_size; cm_event.ord = cm_node->ord_size; + } ret = cm_id->event_handler(cm_id, &cm_event); if (ret) diff --git a/drivers/infiniband/hw/nes/nes_cm.h b/drivers/infiniband/hw/nes/nes_cm.h index 4646e66..522c99c 100644 --- a/drivers/infiniband/hw/nes/nes_cm.h +++ b/drivers/infiniband/hw/nes/nes_cm.h @@ -58,6 +58,8 @@ #define IETF_RDMA0_WRITE 0x8000 #define IETF_RDMA0_READ 0x4000 #define IETF_NO_IRD_ORD 0x3FFF +#define NES_MAX_IRD 0x40 +#define NES_MAX_ORD 0x7F enum ietf_mpa_flags { IETF_MPA_FLAGS_MARKERS = 0x80, /* receive Markers */ @@ -333,6 +335,7 @@ struct nes_cm_node { enum mpa_frame_version mpa_frame_rev; u16 ird_size; u16 ord_size; + u16 mpav2_ird_ord; u16 mpa_frame_size; struct iw_cm_id *cm_id; -- cgit v1.1 From 43adff3979c8f887b364c887e447c443e2f835e8 Mon Sep 17 00:00:00 2001 From: Tatyana Nikolova Date: Tue, 11 Mar 2014 14:20:17 -0500 Subject: RDMA/nes: Fix for passing a valid QP pointer to the user space library Signed-off-by: Tatyana Nikolova Signed-off-by: Roland Dreier --- drivers/infiniband/hw/nes/nes_cm.c | 2 +- drivers/infiniband/hw/nes/nes_user.h | 5 +++-- drivers/infiniband/hw/nes/nes_verbs.c | 2 ++ drivers/infiniband/hw/nes/nes_verbs.h | 1 + 4 files changed, 7 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index e852a3a..dfa9df4 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -657,7 +657,7 @@ static void build_rdma0_msg(struct nes_cm_node *cm_node, struct nes_qp **nesqp_a struct nes_qp *nesqp = *nesqp_addr; struct nes_hw_qp_wqe *wqe = &nesqp->hwqp.sq_vbase[0]; - u64temp = (unsigned long)nesqp; + u64temp = (unsigned long)nesqp->nesuqp_addr; u64temp |= NES_SW_CONTEXT_ALIGN >> 1; set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX, u64temp); diff --git a/drivers/infiniband/hw/nes/nes_user.h b/drivers/infiniband/hw/nes/nes_user.h index 4926de7..529c421 100644 --- a/drivers/infiniband/hw/nes/nes_user.h +++ b/drivers/infiniband/hw/nes/nes_user.h @@ -39,8 +39,8 @@ #include -#define NES_ABI_USERSPACE_VER 1 -#define NES_ABI_KERNEL_VER 1 +#define NES_ABI_USERSPACE_VER 2 +#define NES_ABI_KERNEL_VER 2 /* * Make sure that all structs defined in this file remain laid out so @@ -78,6 +78,7 @@ struct nes_create_cq_req { struct nes_create_qp_req { __u64 user_wqe_buffers; + __u64 user_qp_buffer; }; enum iwnes_memreg_type { diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index 8308e36..797d887 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -1191,6 +1191,8 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd, if (req.user_wqe_buffers) { virt_wqs = 1; } + if (req.user_qp_buffer) + nesqp->nesuqp_addr = req.user_qp_buffer; if ((ibpd->uobject) && (ibpd->uobject->context)) { nesqp->user_mode = 1; nes_ucontext = to_nesucontext(ibpd->uobject->context); diff --git a/drivers/infiniband/hw/nes/nes_verbs.h b/drivers/infiniband/hw/nes/nes_verbs.h index 0eff7c4..309b31c 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.h +++ b/drivers/infiniband/hw/nes/nes_verbs.h @@ -184,5 +184,6 @@ struct nes_qp { u8 pau_busy; u8 pau_pending; u8 pau_state; + __u64 nesuqp_addr; }; #endif /* NES_VERBS_H */ -- cgit v1.1 From 1c20c81909455f64f2df6107cb099ee5569d9f62 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Thu, 20 Feb 2014 11:02:48 -0500 Subject: IB/qib: Fix potential buffer overrun in sending diag packet routine Guard against a potential buffer overrun. Right now the qib driver is protected by the fact that the data structure in question is only 16 bits. Should that ever change the problem will be exposed. There is a similar defect in the ipath driver and this brings the two code paths into sync. Reported-by: Nico Golde Reported-by: Fabian Yamaguchi Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Roland Dreier --- drivers/infiniband/hw/qib/qib_diag.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/qib/qib_diag.c b/drivers/infiniband/hw/qib/qib_diag.c index 1686fd4..07f9030 100644 --- a/drivers/infiniband/hw/qib/qib_diag.c +++ b/drivers/infiniband/hw/qib/qib_diag.c @@ -546,7 +546,7 @@ static ssize_t qib_diagpkt_write(struct file *fp, size_t count, loff_t *off) { u32 __iomem *piobuf; - u32 plen, clen, pbufn; + u32 plen, pbufn, maxlen_reserve; struct qib_diag_xpkt dp; u32 *tmpbuf = NULL; struct qib_devdata *dd; @@ -590,15 +590,20 @@ static ssize_t qib_diagpkt_write(struct file *fp, } ppd = &dd->pport[dp.port - 1]; - /* need total length before first word written */ - /* +1 word is for the qword padding */ - plen = sizeof(u32) + dp.len; - clen = dp.len >> 2; - - if ((plen + 4) > ppd->ibmaxlen) { + /* + * need total length before first word written, plus 2 Dwords. One Dword + * is for padding so we get the full user data when not aligned on + * a word boundary. The other Dword is to make sure we have room for the + * ICRC which gets tacked on later. + */ + maxlen_reserve = 2 * sizeof(u32); + if (dp.len > ppd->ibmaxlen - maxlen_reserve) { ret = -EINVAL; - goto bail; /* before writing pbc */ + goto bail; } + + plen = sizeof(u32) + dp.len; + tmpbuf = vmalloc(plen); if (!tmpbuf) { qib_devinfo(dd->pcidev, @@ -638,11 +643,11 @@ static ssize_t qib_diagpkt_write(struct file *fp, */ if (dd->flags & QIB_PIO_FLUSH_WC) { qib_flush_wc(); - qib_pio_copy(piobuf + 2, tmpbuf, clen - 1); + qib_pio_copy(piobuf + 2, tmpbuf, plen - 1); qib_flush_wc(); - __raw_writel(tmpbuf[clen - 1], piobuf + clen + 1); + __raw_writel(tmpbuf[plen - 1], piobuf + plen + 1); } else - qib_pio_copy(piobuf + 2, tmpbuf, clen); + qib_pio_copy(piobuf + 2, tmpbuf, plen); if (dd->flags & QIB_USE_SPCL_TRIG) { u32 spcl_off = (pbufn >= dd->piobcnt2k) ? 2047 : 1023; -- cgit v1.1 From a2cb0eb8a64adb29a99fd864013de957028f36ae Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Thu, 20 Feb 2014 11:02:53 -0500 Subject: IB/ipath: Fix potential buffer overrun in sending diag packet routine Guard against a potential buffer overrun. The size to read from the user is passed in, and due to the padding that needs to be taken into account, as well as the place holder for the ICRC it is possible to overflow the 32bit value which would cause more data to be copied from user space than is allocated in the buffer. Cc: Reported-by: Nico Golde Reported-by: Fabian Yamaguchi Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_diag.c | 66 ++++++++++++-------------------- 1 file changed, 25 insertions(+), 41 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/ipath/ipath_diag.c b/drivers/infiniband/hw/ipath/ipath_diag.c index 714293b..e2f9a51 100644 --- a/drivers/infiniband/hw/ipath/ipath_diag.c +++ b/drivers/infiniband/hw/ipath/ipath_diag.c @@ -326,7 +326,7 @@ static ssize_t ipath_diagpkt_write(struct file *fp, size_t count, loff_t *off) { u32 __iomem *piobuf; - u32 plen, clen, pbufn; + u32 plen, pbufn, maxlen_reserve; struct ipath_diag_pkt odp; struct ipath_diag_xpkt dp; u32 *tmpbuf = NULL; @@ -335,51 +335,29 @@ static ssize_t ipath_diagpkt_write(struct file *fp, u64 val; u32 l_state, lt_state; /* LinkState, LinkTrainingState */ - if (count < sizeof(odp)) { - ret = -EINVAL; - goto bail; - } if (count == sizeof(dp)) { if (copy_from_user(&dp, data, sizeof(dp))) { ret = -EFAULT; goto bail; } - } else if (copy_from_user(&odp, data, sizeof(odp))) { - ret = -EFAULT; + } else if (count == sizeof(odp)) { + if (copy_from_user(&odp, data, sizeof(odp))) { + ret = -EFAULT; + goto bail; + } + } else { + ret = -EINVAL; goto bail; } - /* - * Due to padding/alignment issues (lessened with new struct) - * the old and new structs are the same length. We need to - * disambiguate them, which we can do because odp.len has never - * been less than the total of LRH+BTH+DETH so far, while - * dp.unit (same offset) unit is unlikely to get that high. - * Similarly, dp.data, the pointer to user at the same offset - * as odp.unit, is almost certainly at least one (512byte)page - * "above" NULL. The if-block below can be omitted if compatibility - * between a new driver and older diagnostic code is unimportant. - * compatibility the other direction (new diags, old driver) is - * handled in the diagnostic code, with a warning. - */ - if (dp.unit >= 20 && dp.data < 512) { - /* very probable version mismatch. Fix it up */ - memcpy(&odp, &dp, sizeof(odp)); - /* We got a legacy dp, copy elements to dp */ - dp.unit = odp.unit; - dp.data = odp.data; - dp.len = odp.len; - dp.pbc_wd = 0; /* Indicate we need to compute PBC wd */ - } - /* send count must be an exact number of dwords */ if (dp.len & 3) { ret = -EINVAL; goto bail; } - clen = dp.len >> 2; + plen = dp.len >> 2; dd = ipath_lookup(dp.unit); if (!dd || !(dd->ipath_flags & IPATH_PRESENT) || @@ -422,16 +400,22 @@ static ssize_t ipath_diagpkt_write(struct file *fp, goto bail; } - /* need total length before first word written */ - /* +1 word is for the qword padding */ - plen = sizeof(u32) + dp.len; - - if ((plen + 4) > dd->ipath_ibmaxlen) { + /* + * need total length before first word written, plus 2 Dwords. One Dword + * is for padding so we get the full user data when not aligned on + * a word boundary. The other Dword is to make sure we have room for the + * ICRC which gets tacked on later. + */ + maxlen_reserve = 2 * sizeof(u32); + if (dp.len > dd->ipath_ibmaxlen - maxlen_reserve) { ipath_dbg("Pkt len 0x%x > ibmaxlen %x\n", - plen - 4, dd->ipath_ibmaxlen); + dp.len, dd->ipath_ibmaxlen); ret = -EINVAL; - goto bail; /* before writing pbc */ + goto bail; } + + plen = sizeof(u32) + dp.len; + tmpbuf = vmalloc(plen); if (!tmpbuf) { dev_info(&dd->pcidev->dev, "Unable to allocate tmp buffer, " @@ -473,11 +457,11 @@ static ssize_t ipath_diagpkt_write(struct file *fp, */ if (dd->ipath_flags & IPATH_PIO_FLUSH_WC) { ipath_flush_wc(); - __iowrite32_copy(piobuf + 2, tmpbuf, clen - 1); + __iowrite32_copy(piobuf + 2, tmpbuf, plen - 1); ipath_flush_wc(); - __raw_writel(tmpbuf[clen - 1], piobuf + clen + 1); + __raw_writel(tmpbuf[plen - 1], piobuf + plen + 1); } else - __iowrite32_copy(piobuf + 2, tmpbuf, clen); + __iowrite32_copy(piobuf + 2, tmpbuf, plen); ipath_flush_wc(); -- cgit v1.1 From f8b6c47a44c063062317646683a73371c24c69ee Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Fri, 7 Mar 2014 08:32:31 -0500 Subject: IB/qib: Fix debugfs ordering issue with multiple HCAs The debugfs init code was incorrectly called before the idr mechanism is used to get the unit number, so the dd->unit hasn't been initialized. This caused the unit relative directory creation to fail after the first. This patch moves the init for the debugfs stuff until after all of the failures and after the unit number has been determined. A bug in unwind code in qib_alloc_devdata() is also fixed. Cc: Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Roland Dreier --- drivers/infiniband/hw/qib/qib_init.c | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c index 24e802f..76c3e17 100644 --- a/drivers/infiniband/hw/qib/qib_init.c +++ b/drivers/infiniband/hw/qib/qib_init.c @@ -1097,14 +1097,10 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra) int ret; dd = (struct qib_devdata *) ib_alloc_device(sizeof(*dd) + extra); - if (!dd) { - dd = ERR_PTR(-ENOMEM); - goto bail; - } + if (!dd) + return ERR_PTR(-ENOMEM); -#ifdef CONFIG_DEBUG_FS - qib_dbg_ibdev_init(&dd->verbs_dev); -#endif + INIT_LIST_HEAD(&dd->list); idr_preload(GFP_KERNEL); spin_lock_irqsave(&qib_devs_lock, flags); @@ -1121,11 +1117,6 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra) if (ret < 0) { qib_early_err(&pdev->dev, "Could not allocate unit ID: error %d\n", -ret); -#ifdef CONFIG_DEBUG_FS - qib_dbg_ibdev_exit(&dd->verbs_dev); -#endif - ib_dealloc_device(&dd->verbs_dev.ibdev); - dd = ERR_PTR(ret); goto bail; } @@ -1139,9 +1130,15 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra) qib_early_err(&pdev->dev, "Could not alloc cpulist info, cpu affinity might be wrong\n"); } - -bail: +#ifdef CONFIG_DEBUG_FS + qib_dbg_ibdev_init(&dd->verbs_dev); +#endif return dd; +bail: + if (!list_empty(&dd->list)) + list_del_init(&dd->list); + ib_dealloc_device(&dd->verbs_dev.ibdev); + return ERR_PTR(ret);; } /* -- cgit v1.1 From 1ed88dd7d0b361e677b2690f573e5c274bb25c87 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Fri, 7 Mar 2014 08:40:49 -0500 Subject: IB/qib: Add percpu counter replacing qib_devdata int_counter This patch replaces the dd->int_counter with a percpu counter. The maintanance of qib_stats.sps_ints and int_counter are combined into the new counter. There are two new functions added to read the counter: - qib_int_counter (for a particular qib_devdata) - qib_sps_ints (for all HCAs) A z_int_counter is added to allow the interrupt detection logic to determine if interrupts have occured since z_int_counter was "reset". Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Roland Dreier --- drivers/infiniband/hw/qib/qib.h | 10 +++++++-- drivers/infiniband/hw/qib/qib_fs.c | 1 + drivers/infiniband/hw/qib/qib_iba6120.c | 7 +++---- drivers/infiniband/hw/qib/qib_iba7220.c | 8 +++----- drivers/infiniband/hw/qib/qib_iba7322.c | 31 +++++++++------------------- drivers/infiniband/hw/qib/qib_init.c | 36 ++++++++++++++++++++++++++++++++- 6 files changed, 59 insertions(+), 34 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h index 1946101..f044430 100644 --- a/drivers/infiniband/hw/qib/qib.h +++ b/drivers/infiniband/hw/qib/qib.h @@ -868,8 +868,10 @@ struct qib_devdata { /* last buffer for user use */ u32 lastctxt_piobuf; - /* saturating counter of (non-port-specific) device interrupts */ - u32 int_counter; + /* reset value */ + u64 z_int_counter; + /* percpu intcounter */ + u64 __percpu *int_counter; /* pio bufs allocated per ctxt */ u32 pbufsctxt; @@ -1449,6 +1451,10 @@ void qib_nomsi(struct qib_devdata *); void qib_nomsix(struct qib_devdata *); void qib_pcie_getcmd(struct qib_devdata *, u16 *, u8 *, u8 *); void qib_pcie_reenable(struct qib_devdata *, u16, u8, u8); +/* interrupts for device */ +u64 qib_int_counter(struct qib_devdata *); +/* interrupt for all devices */ +u64 qib_sps_ints(void); /* * dma_addr wrappers - all 0's invalid for hw diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c index c61e2a9..cab610c 100644 --- a/drivers/infiniband/hw/qib/qib_fs.c +++ b/drivers/infiniband/hw/qib/qib_fs.c @@ -105,6 +105,7 @@ static int create_file(const char *name, umode_t mode, static ssize_t driver_stats_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { + qib_stats.sps_ints = qib_sps_ints(); return simple_read_from_buffer(buf, count, ppos, &qib_stats, sizeof qib_stats); } diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c index 84e593d..b9bea2e 100644 --- a/drivers/infiniband/hw/qib/qib_iba6120.c +++ b/drivers/infiniband/hw/qib/qib_iba6120.c @@ -1634,9 +1634,7 @@ static irqreturn_t qib_6120intr(int irq, void *data) goto bail; } - qib_stats.sps_ints++; - if (dd->int_counter != (u32) -1) - dd->int_counter++; + this_cpu_inc(*dd->int_counter); if (unlikely(istat & (~QLOGIC_IB_I_BITSEXTANT | QLOGIC_IB_I_GPIO | QLOGIC_IB_I_ERROR))) @@ -1808,7 +1806,8 @@ static int qib_6120_setup_reset(struct qib_devdata *dd) * isn't set. */ dd->flags &= ~(QIB_INITTED | QIB_PRESENT); - dd->int_counter = 0; /* so we check interrupts work again */ + /* so we check interrupts work again */ + dd->z_int_counter = qib_int_counter(dd); val = dd->control | QLOGIC_IB_C_RESET; writeq(val, &dd->kregbase[kr_control]); mb(); /* prevent compiler re-ordering around actual reset */ diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c index 454c2e7..28063d4 100644 --- a/drivers/infiniband/hw/qib/qib_iba7220.c +++ b/drivers/infiniband/hw/qib/qib_iba7220.c @@ -1962,10 +1962,7 @@ static irqreturn_t qib_7220intr(int irq, void *data) goto bail; } - qib_stats.sps_ints++; - if (dd->int_counter != (u32) -1) - dd->int_counter++; - + this_cpu_inc(*dd->int_counter); if (unlikely(istat & (~QLOGIC_IB_I_BITSEXTANT | QLOGIC_IB_I_GPIO | QLOGIC_IB_I_ERROR))) unlikely_7220_intr(dd, istat); @@ -2120,7 +2117,8 @@ static int qib_setup_7220_reset(struct qib_devdata *dd) * isn't set. */ dd->flags &= ~(QIB_INITTED | QIB_PRESENT); - dd->int_counter = 0; /* so we check interrupts work again */ + /* so we check interrupts work again */ + dd->z_int_counter = qib_int_counter(dd); val = dd->control | QLOGIC_IB_C_RESET; writeq(val, &dd->kregbase[kr_control]); mb(); /* prevent compiler reordering around actual reset */ diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index d1bd213..8441579 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -3115,9 +3115,7 @@ static irqreturn_t qib_7322intr(int irq, void *data) goto bail; } - qib_stats.sps_ints++; - if (dd->int_counter != (u32) -1) - dd->int_counter++; + this_cpu_inc(*dd->int_counter); /* handle "errors" of various kinds first, device ahead of port */ if (unlikely(istat & (~QIB_I_BITSEXTANT | QIB_I_GPIO | @@ -3186,9 +3184,7 @@ static irqreturn_t qib_7322pintr(int irq, void *data) */ return IRQ_HANDLED; - qib_stats.sps_ints++; - if (dd->int_counter != (u32) -1) - dd->int_counter++; + this_cpu_inc(*dd->int_counter); /* Clear the interrupt bit we expect to be set. */ qib_write_kreg(dd, kr_intclear, ((1ULL << QIB_I_RCVAVAIL_LSB) | @@ -3215,9 +3211,7 @@ static irqreturn_t qib_7322bufavail(int irq, void *data) */ return IRQ_HANDLED; - qib_stats.sps_ints++; - if (dd->int_counter != (u32) -1) - dd->int_counter++; + this_cpu_inc(*dd->int_counter); /* Clear the interrupt bit we expect to be set. */ qib_write_kreg(dd, kr_intclear, QIB_I_SPIOBUFAVAIL); @@ -3248,9 +3242,7 @@ static irqreturn_t sdma_intr(int irq, void *data) */ return IRQ_HANDLED; - qib_stats.sps_ints++; - if (dd->int_counter != (u32) -1) - dd->int_counter++; + this_cpu_inc(*dd->int_counter); /* Clear the interrupt bit we expect to be set. */ qib_write_kreg(dd, kr_intclear, ppd->hw_pidx ? @@ -3277,9 +3269,7 @@ static irqreturn_t sdma_idle_intr(int irq, void *data) */ return IRQ_HANDLED; - qib_stats.sps_ints++; - if (dd->int_counter != (u32) -1) - dd->int_counter++; + this_cpu_inc(*dd->int_counter); /* Clear the interrupt bit we expect to be set. */ qib_write_kreg(dd, kr_intclear, ppd->hw_pidx ? @@ -3306,9 +3296,7 @@ static irqreturn_t sdma_progress_intr(int irq, void *data) */ return IRQ_HANDLED; - qib_stats.sps_ints++; - if (dd->int_counter != (u32) -1) - dd->int_counter++; + this_cpu_inc(*dd->int_counter); /* Clear the interrupt bit we expect to be set. */ qib_write_kreg(dd, kr_intclear, ppd->hw_pidx ? @@ -3336,9 +3324,7 @@ static irqreturn_t sdma_cleanup_intr(int irq, void *data) */ return IRQ_HANDLED; - qib_stats.sps_ints++; - if (dd->int_counter != (u32) -1) - dd->int_counter++; + this_cpu_inc(*dd->int_counter); /* Clear the interrupt bit we expect to be set. */ qib_write_kreg(dd, kr_intclear, ppd->hw_pidx ? @@ -3723,7 +3709,8 @@ static int qib_do_7322_reset(struct qib_devdata *dd) dd->pport->cpspec->ibsymdelta = 0; dd->pport->cpspec->iblnkerrdelta = 0; dd->pport->cpspec->ibmalfdelta = 0; - dd->int_counter = 0; /* so we check interrupts work again */ + /* so we check interrupts work again */ + dd->z_int_counter = qib_int_counter(dd); /* * Keep chip from being accessed until we are ready. Use diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c index 76c3e17..6d26299 100644 --- a/drivers/infiniband/hw/qib/qib_init.c +++ b/drivers/infiniband/hw/qib/qib_init.c @@ -525,6 +525,7 @@ static void enable_chip(struct qib_devdata *dd) static void verify_interrupt(unsigned long opaque) { struct qib_devdata *dd = (struct qib_devdata *) opaque; + u64 int_counter; if (!dd) return; /* being torn down */ @@ -533,7 +534,8 @@ static void verify_interrupt(unsigned long opaque) * If we don't have a lid or any interrupts, let the user know and * don't bother checking again. */ - if (dd->int_counter == 0) { + int_counter = qib_int_counter(dd) - dd->z_int_counter; + if (int_counter == 0) { if (!dd->f_intr_fallback(dd)) dev_err(&dd->pcidev->dev, "No interrupts detected, not usable.\n"); @@ -1079,9 +1081,34 @@ void qib_free_devdata(struct qib_devdata *dd) #ifdef CONFIG_DEBUG_FS qib_dbg_ibdev_exit(&dd->verbs_dev); #endif + free_percpu(dd->int_counter); ib_dealloc_device(&dd->verbs_dev.ibdev); } +u64 qib_int_counter(struct qib_devdata *dd) +{ + int cpu; + u64 int_counter = 0; + + for_each_possible_cpu(cpu) + int_counter += *per_cpu_ptr(dd->int_counter, cpu); + return int_counter; +} + +u64 qib_sps_ints(void) +{ + unsigned long flags; + struct qib_devdata *dd; + u64 sps_ints = 0; + + spin_lock_irqsave(&qib_devs_lock, flags); + list_for_each_entry(dd, &qib_dev_list, list) { + sps_ints += qib_int_counter(dd); + } + spin_unlock_irqrestore(&qib_devs_lock, flags); + return sps_ints; +} + /* * Allocate our primary per-unit data structure. Must be done via verbs * allocator, because the verbs cleanup process both does cleanup and @@ -1119,6 +1146,13 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra) "Could not allocate unit ID: error %d\n", -ret); goto bail; } + dd->int_counter = alloc_percpu(u64); + if (!dd->int_counter) { + ret = -ENOMEM; + qib_early_err(&pdev->dev, + "Could not allocate per-cpu int_counter\n"); + goto bail; + } if (!qib_cpulist_count) { u32 count = num_online_cpus(); -- cgit v1.1 From 7d7632add8dd99f68b21546efff08a5a162de184 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Fri, 7 Mar 2014 08:40:55 -0500 Subject: IB/qib: Modify software pma counters to use percpu variables The counters, unicast_xmit, unicast_rcv, multicast_xmit, multicast_rcv are now maintained as percpu variables. The mad code is modified to add a z_ latch so that the percpu counters monotonically increase with appropriate adjustments in the reset, read logic to maintain the z_ latch. This patch also corrects the fact the unitcast_xmit wasn't handled at all for UC and RC QPs. Signed-off-by: Mike Marciniszyn Signed-off-by: Roland Dreier --- drivers/infiniband/hw/qib/qib.h | 2 +- drivers/infiniband/hw/qib/qib_iba6120.c | 4 ++- drivers/infiniband/hw/qib/qib_iba7220.c | 4 ++- drivers/infiniband/hw/qib/qib_iba7322.c | 6 ++++- drivers/infiniband/hw/qib/qib_init.c | 22 ++++++++++++----- drivers/infiniband/hw/qib/qib_mad.c | 44 +++++++++++++++++++++++++++------ drivers/infiniband/hw/qib/qib_rc.c | 2 +- drivers/infiniband/hw/qib/qib_ruc.c | 1 + drivers/infiniband/hw/qib/qib_ud.c | 6 ++--- drivers/infiniband/hw/qib/qib_verbs.c | 4 +-- drivers/infiniband/hw/qib/qib_verbs.h | 16 +++++++++--- 11 files changed, 83 insertions(+), 28 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h index f044430..c00ae09 100644 --- a/drivers/infiniband/hw/qib/qib.h +++ b/drivers/infiniband/hw/qib/qib.h @@ -1186,7 +1186,7 @@ int qib_setup_eagerbufs(struct qib_ctxtdata *); void qib_set_ctxtcnt(struct qib_devdata *); int qib_create_ctxts(struct qib_devdata *dd); struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *, u32, int); -void qib_init_pportdata(struct qib_pportdata *, struct qib_devdata *, u8, u8); +int qib_init_pportdata(struct qib_pportdata *, struct qib_devdata *, u8, u8); void qib_free_ctxtdata(struct qib_devdata *, struct qib_ctxtdata *); u32 qib_kreceive(struct qib_ctxtdata *, u32 *, u32 *); diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c index b9bea2e..d68266a 100644 --- a/drivers/infiniband/hw/qib/qib_iba6120.c +++ b/drivers/infiniband/hw/qib/qib_iba6120.c @@ -3265,7 +3265,9 @@ static int init_6120_variables(struct qib_devdata *dd) dd->eep_st_masks[2].errs_to_log = ERR_MASK(ResetNegated); - qib_init_pportdata(ppd, dd, 0, 1); + ret = qib_init_pportdata(ppd, dd, 0, 1); + if (ret) + goto bail; ppd->link_width_supported = IB_WIDTH_1X | IB_WIDTH_4X; ppd->link_speed_supported = QIB_IB_SDR; ppd->link_width_enabled = IB_WIDTH_4X; diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c index 28063d4..7dec89f 100644 --- a/drivers/infiniband/hw/qib/qib_iba7220.c +++ b/drivers/infiniband/hw/qib/qib_iba7220.c @@ -4059,7 +4059,9 @@ static int qib_init_7220_variables(struct qib_devdata *dd) init_waitqueue_head(&cpspec->autoneg_wait); INIT_DELAYED_WORK(&cpspec->autoneg_work, autoneg_7220_work); - qib_init_pportdata(ppd, dd, 0, 1); + ret = qib_init_pportdata(ppd, dd, 0, 1); + if (ret) + goto bail; ppd->link_width_supported = IB_WIDTH_1X | IB_WIDTH_4X; ppd->link_speed_supported = QIB_IB_SDR | QIB_IB_DDR; diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index 8441579..a7eb3251 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -6544,7 +6544,11 @@ static int qib_init_7322_variables(struct qib_devdata *dd) } dd->num_pports++; - qib_init_pportdata(ppd, dd, pidx, dd->num_pports); + ret = qib_init_pportdata(ppd, dd, pidx, dd->num_pports); + if (ret) { + dd->num_pports--; + goto bail; + } ppd->link_width_supported = IB_WIDTH_1X | IB_WIDTH_4X; ppd->link_width_enabled = IB_WIDTH_4X; diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c index 6d26299..7fbf466 100644 --- a/drivers/infiniband/hw/qib/qib_init.c +++ b/drivers/infiniband/hw/qib/qib_init.c @@ -233,7 +233,7 @@ struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *ppd, u32 ctxt, /* * Common code for initializing the physical port structure. */ -void qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd, +int qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd, u8 hw_pidx, u8 port) { int size; @@ -243,6 +243,7 @@ void qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd, spin_lock_init(&ppd->sdma_lock); spin_lock_init(&ppd->lflags_lock); + spin_lock_init(&ppd->cc_shadow_lock); init_waitqueue_head(&ppd->state_wait); init_timer(&ppd->symerr_clear_timer); @@ -250,8 +251,10 @@ void qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd, ppd->symerr_clear_timer.data = (unsigned long)ppd; ppd->qib_wq = NULL; - - spin_lock_init(&ppd->cc_shadow_lock); + ppd->ibport_data.pmastats = + alloc_percpu(struct qib_pma_counters); + if (!ppd->ibport_data.pmastats) + return -ENOMEM; if (qib_cc_table_size < IB_CCT_MIN_ENTRIES) goto bail; @@ -299,7 +302,7 @@ void qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd, goto bail_3; } - return; + return 0; bail_3: kfree(ppd->ccti_entries_shadow); @@ -313,7 +316,7 @@ bail_1: bail: /* User is intentionally disabling the congestion control agent */ if (!qib_cc_table_size) - return; + return 0; if (qib_cc_table_size < IB_CCT_MIN_ENTRIES) { qib_cc_table_size = 0; @@ -324,7 +327,7 @@ bail: qib_dev_err(dd, "Congestion Control Agent disabled for port %d\n", port); - return; + return 0; } static int init_pioavailregs(struct qib_devdata *dd) @@ -635,6 +638,12 @@ wq_error: return -ENOMEM; } +static void qib_free_pportdata(struct qib_pportdata *ppd) +{ + free_percpu(ppd->ibport_data.pmastats); + ppd->ibport_data.pmastats = NULL; +} + /** * qib_init - do the actual initialization sequence on the chip * @dd: the qlogic_ib device @@ -922,6 +931,7 @@ static void qib_shutdown_device(struct qib_devdata *dd) destroy_workqueue(ppd->qib_wq); ppd->qib_wq = NULL; } + qib_free_pportdata(ppd); } qib_update_eeprom_log(dd); diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c index ccb1191..edad991 100644 --- a/drivers/infiniband/hw/qib/qib_mad.c +++ b/drivers/infiniband/hw/qib/qib_mad.c @@ -1634,6 +1634,23 @@ static int pma_get_portcounters_cong(struct ib_pma_mad *pmp, return reply((struct ib_smp *)pmp); } +static void qib_snapshot_pmacounters( + struct qib_ibport *ibp, + struct qib_pma_counters *pmacounters) +{ + struct qib_pma_counters *p; + int cpu; + + memset(pmacounters, 0, sizeof(*pmacounters)); + for_each_possible_cpu(cpu) { + p = per_cpu_ptr(ibp->pmastats, cpu); + pmacounters->n_unicast_xmit += p->n_unicast_xmit; + pmacounters->n_unicast_rcv += p->n_unicast_rcv; + pmacounters->n_multicast_xmit += p->n_multicast_xmit; + pmacounters->n_multicast_rcv += p->n_multicast_rcv; + } +} + static int pma_get_portcounters_ext(struct ib_pma_mad *pmp, struct ib_device *ibdev, u8 port) { @@ -1642,6 +1659,7 @@ static int pma_get_portcounters_ext(struct ib_pma_mad *pmp, struct qib_ibport *ibp = to_iport(ibdev, port); struct qib_pportdata *ppd = ppd_from_ibp(ibp); u64 swords, rwords, spkts, rpkts, xwait; + struct qib_pma_counters pma; u8 port_select = p->port_select; memset(pmp->data, 0, sizeof(pmp->data)); @@ -1664,10 +1682,17 @@ static int pma_get_portcounters_ext(struct ib_pma_mad *pmp, p->port_rcv_data = cpu_to_be64(rwords); p->port_xmit_packets = cpu_to_be64(spkts); p->port_rcv_packets = cpu_to_be64(rpkts); - p->port_unicast_xmit_packets = cpu_to_be64(ibp->n_unicast_xmit); - p->port_unicast_rcv_packets = cpu_to_be64(ibp->n_unicast_rcv); - p->port_multicast_xmit_packets = cpu_to_be64(ibp->n_multicast_xmit); - p->port_multicast_rcv_packets = cpu_to_be64(ibp->n_multicast_rcv); + + qib_snapshot_pmacounters(ibp, &pma); + + p->port_unicast_xmit_packets = cpu_to_be64(pma.n_unicast_xmit + - ibp->z_unicast_xmit); + p->port_unicast_rcv_packets = cpu_to_be64(pma.n_unicast_rcv + - ibp->z_unicast_rcv); + p->port_multicast_xmit_packets = cpu_to_be64(pma.n_multicast_xmit + - ibp->z_multicast_xmit); + p->port_multicast_rcv_packets = cpu_to_be64(pma.n_multicast_rcv + - ibp->z_multicast_rcv); bail: return reply((struct ib_smp *) pmp); @@ -1795,6 +1820,7 @@ static int pma_set_portcounters_ext(struct ib_pma_mad *pmp, struct qib_ibport *ibp = to_iport(ibdev, port); struct qib_pportdata *ppd = ppd_from_ibp(ibp); u64 swords, rwords, spkts, rpkts, xwait; + struct qib_pma_counters pma; qib_snapshot_counters(ppd, &swords, &rwords, &spkts, &rpkts, &xwait); @@ -1810,17 +1836,19 @@ static int pma_set_portcounters_ext(struct ib_pma_mad *pmp, if (p->counter_select & IB_PMA_SELX_PORT_RCV_PACKETS) ibp->z_port_rcv_packets = rpkts; + qib_snapshot_pmacounters(ibp, &pma); + if (p->counter_select & IB_PMA_SELX_PORT_UNI_XMIT_PACKETS) - ibp->n_unicast_xmit = 0; + ibp->z_unicast_xmit = pma.n_unicast_xmit; if (p->counter_select & IB_PMA_SELX_PORT_UNI_RCV_PACKETS) - ibp->n_unicast_rcv = 0; + ibp->z_unicast_rcv = pma.n_unicast_rcv; if (p->counter_select & IB_PMA_SELX_PORT_MULTI_XMIT_PACKETS) - ibp->n_multicast_xmit = 0; + ibp->z_multicast_xmit = pma.n_multicast_xmit; if (p->counter_select & IB_PMA_SELX_PORT_MULTI_RCV_PACKETS) - ibp->n_multicast_rcv = 0; + ibp->z_multicast_rcv = pma.n_multicast_rcv; return pma_get_portcounters_ext(pmp, ibdev, port); } diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c index 3ab3413..2f25018 100644 --- a/drivers/infiniband/hw/qib/qib_rc.c +++ b/drivers/infiniband/hw/qib/qib_rc.c @@ -752,7 +752,7 @@ void qib_send_rc_ack(struct qib_qp *qp) qib_flush_wc(); qib_sendbuf_done(dd, pbufn); - ibp->n_unicast_xmit++; + this_cpu_inc(ibp->pmastats->n_unicast_xmit); goto done; queue_ack: diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c index 357b6cfc..4c07a8b 100644 --- a/drivers/infiniband/hw/qib/qib_ruc.c +++ b/drivers/infiniband/hw/qib/qib_ruc.c @@ -703,6 +703,7 @@ void qib_make_ruc_header(struct qib_qp *qp, struct qib_other_headers *ohdr, ohdr->bth[0] = cpu_to_be32(bth0); ohdr->bth[1] = cpu_to_be32(qp->remote_qpn); ohdr->bth[2] = cpu_to_be32(bth2); + this_cpu_inc(ibp->pmastats->n_unicast_xmit); } /** diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c index 3ad651c..aaf7039 100644 --- a/drivers/infiniband/hw/qib/qib_ud.c +++ b/drivers/infiniband/hw/qib/qib_ud.c @@ -280,11 +280,11 @@ int qib_make_ud_req(struct qib_qp *qp) ah_attr = &to_iah(wqe->wr.wr.ud.ah)->attr; if (ah_attr->dlid >= QIB_MULTICAST_LID_BASE) { if (ah_attr->dlid != QIB_PERMISSIVE_LID) - ibp->n_multicast_xmit++; + this_cpu_inc(ibp->pmastats->n_multicast_xmit); else - ibp->n_unicast_xmit++; + this_cpu_inc(ibp->pmastats->n_unicast_xmit); } else { - ibp->n_unicast_xmit++; + this_cpu_inc(ibp->pmastats->n_unicast_xmit); lid = ah_attr->dlid & ~((1 << ppd->lmc) - 1); if (unlikely(lid == ppd->lid)) { /* diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 092b0bb..1b00734 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -662,7 +662,7 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen) mcast = qib_mcast_find(ibp, &hdr->u.l.grh.dgid); if (mcast == NULL) goto drop; - ibp->n_multicast_rcv++; + this_cpu_inc(ibp->pmastats->n_multicast_rcv); list_for_each_entry_rcu(p, &mcast->qp_list, list) qib_qp_rcv(rcd, hdr, 1, data, tlen, p->qp); /* @@ -689,7 +689,7 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen) rcd->lookaside_qpn = qp_num; } else qp = rcd->lookaside_qp; - ibp->n_unicast_rcv++; + this_cpu_inc(ibp->pmastats->n_unicast_rcv); qib_qp_rcv(rcd, hdr, lnh == QIB_LRH_GRH, data, tlen, qp); } return; diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index a01c7d2..bfc8948 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -664,6 +664,13 @@ struct qib_opcode_stats_perctx { struct qib_opcode_stats stats[128]; }; +struct qib_pma_counters { + u64 n_unicast_xmit; /* total unicast packets sent */ + u64 n_unicast_rcv; /* total unicast packets received */ + u64 n_multicast_xmit; /* total multicast packets sent */ + u64 n_multicast_rcv; /* total multicast packets received */ +}; + struct qib_ibport { struct qib_qp __rcu *qp0; struct qib_qp __rcu *qp1; @@ -680,10 +687,11 @@ struct qib_ibport { __be64 mkey; __be64 guids[QIB_GUIDS_PER_PORT - 1]; /* writable GUIDs */ u64 tid; /* TID for traps */ - u64 n_unicast_xmit; /* total unicast packets sent */ - u64 n_unicast_rcv; /* total unicast packets received */ - u64 n_multicast_xmit; /* total multicast packets sent */ - u64 n_multicast_rcv; /* total multicast packets received */ + struct qib_pma_counters __percpu *pmastats; + u64 z_unicast_xmit; /* starting count for PMA */ + u64 z_unicast_rcv; /* starting count for PMA */ + u64 z_multicast_xmit; /* starting count for PMA */ + u64 z_multicast_rcv; /* starting count for PMA */ u64 z_symbol_error_counter; /* starting count for PMA */ u64 z_link_error_recovery_counter; /* starting count for PMA */ u64 z_link_downed_counter; /* starting count for PMA */ -- cgit v1.1 From 37a967651caf99dd267017023737bd442f5acb3d Mon Sep 17 00:00:00 2001 From: Yann Droneaud Date: Mon, 10 Mar 2014 23:06:28 +0100 Subject: IB/qib: add missing braces in do_qib_user_sdma_queue_create() Commit c804f07248895ff9c moved qib_assign_ctxt() to do_qib_user_sdma_queue_create() but dropped the braces around the statements. This was spotted by coccicheck (coccinelle/spatch): $ make C=2 CHECK=scripts/coccicheck drivers/infiniband/hw/qib/ CHECK drivers/infiniband/hw/qib/qib_file_ops.c drivers/infiniband/hw/qib/qib_file_ops.c:1583:2-23: code aligned with following code on line 1587 This patch adds braces back. Link: http://marc.info/?i=cover.1394485254.git.ydroneaud@opteya.com Cc: Mike Marciniszyn Cc: infinipath@intel.com Cc: Julia Lawall Cc: cocci@systeme.lip6.fr Cc: stable@vger.kernel.org Signed-off-by: Yann Droneaud Tested-by: Mike Marciniszyn Acked-by: Mike Marciniszyn Signed-off-by: Roland Dreier --- drivers/infiniband/hw/qib/qib_file_ops.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c index 275f247..2023cd6 100644 --- a/drivers/infiniband/hw/qib/qib_file_ops.c +++ b/drivers/infiniband/hw/qib/qib_file_ops.c @@ -1578,7 +1578,7 @@ static int do_qib_user_sdma_queue_create(struct file *fp) struct qib_ctxtdata *rcd = fd->rcd; struct qib_devdata *dd = rcd->dd; - if (dd->flags & QIB_HAS_SEND_DMA) + if (dd->flags & QIB_HAS_SEND_DMA) { fd->pq = qib_user_sdma_queue_create(&dd->pcidev->dev, dd->unit, @@ -1586,6 +1586,7 @@ static int do_qib_user_sdma_queue_create(struct file *fp) fd->subctxt); if (!fd->pq) return -ENOMEM; + } return 0; } -- cgit v1.1 From 8572de9732b6d1fd211873ffab8c60b3c1745ee8 Mon Sep 17 00:00:00 2001 From: Yann Droneaud Date: Mon, 10 Mar 2014 23:06:29 +0100 Subject: IB/qib: fixup indentation in qib_ib_rcv() Commit af061a644a0e4d4778 add some code in qib_ib_rcv() which trigger a warning from coccicheck (coccinelle/spatch): $ make C=2 CHECK=scripts/coccicheck drivers/infiniband/hw/qib/ CHECK drivers/infiniband/hw/qib/qib_verbs.c drivers/infiniband/hw/qib/qib_verbs.c:679:5-32: code aligned with following code on line 681 CC [M] drivers/infiniband/hw/qib/qib_verbs.o In fact, according to similar code in qib_kreceive(), qib_ib_rcv() code is correct but improperly indented. This patch fix indentation for the misaligned portion. Link: http://marc.info/?i=cover.1394485254.git.ydroneaud@opteya.com Cc: Mike Marciniszyn Cc: infinipath@intel.com Cc: Julia Lawall Cc: cocci@systeme.lip6.fr Signed-off-by: Yann Droneaud Tested-by: Mike Marciniszyn Acked-by: Mike Marciniszyn Signed-off-by: Roland Dreier --- drivers/infiniband/hw/qib/qib_verbs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 1b00734..9bcfbd8 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -678,8 +678,8 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen) &rcd->lookaside_qp->refcount)) wake_up( &rcd->lookaside_qp->wait); - rcd->lookaside_qp = NULL; - } + rcd->lookaside_qp = NULL; + } } if (!rcd->lookaside_qp) { qp = qib_lookup_qpn(ibp, qp_num); -- cgit v1.1 From 06064a103f6bd5b409ffed6248983270c0681c21 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Mon, 17 Mar 2014 14:07:16 -0400 Subject: IB/qib: Fix memory leak of recv context when driver fails to initialize. In qib_create_ctxts() we allocate an array to hold recv contexts. Then attempt to create data for those recv contexts. If that call to qib_create_ctxtdata() fails then an error is returned but the previously allocated memory is not freed. Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Roland Dreier --- drivers/infiniband/hw/qib/qib_init.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c index 7fbf466..5b7aeb2 100644 --- a/drivers/infiniband/hw/qib/qib_init.c +++ b/drivers/infiniband/hw/qib/qib_init.c @@ -130,7 +130,6 @@ void qib_set_ctxtcnt(struct qib_devdata *dd) int qib_create_ctxts(struct qib_devdata *dd) { unsigned i; - int ret; int local_node_id = pcibus_to_node(dd->pcidev->bus); if (local_node_id < 0) @@ -145,8 +144,7 @@ int qib_create_ctxts(struct qib_devdata *dd) if (!dd->rcd) { qib_dev_err(dd, "Unable to allocate ctxtdata array, failing\n"); - ret = -ENOMEM; - goto done; + return -ENOMEM; } /* create (one or more) kctxt */ @@ -163,15 +161,14 @@ int qib_create_ctxts(struct qib_devdata *dd) if (!rcd) { qib_dev_err(dd, "Unable to allocate ctxtdata for Kernel ctxt, failing\n"); - ret = -ENOMEM; - goto done; + kfree(dd->rcd); + dd->rcd = NULL; + return -ENOMEM; } rcd->pkeys[0] = QIB_DEFAULT_P_KEY; rcd->seq_cnt = 1; } - ret = 0; -done: - return ret; + return 0; } /* -- cgit v1.1 From 9d194d1025f463392feafa26ff8c2d8247f71be1 Mon Sep 17 00:00:00 2001 From: Yann Droneaud Date: Mon, 10 Mar 2014 23:06:27 +0100 Subject: IB/nes: Return an error on ib_copy_from_udata() failure instead of NULL In case of error while accessing to userspace memory, function nes_create_qp() returns NULL instead of an error code wrapped through ERR_PTR(). But NULL is not expected by ib_uverbs_create_qp(), as it check for error with IS_ERR(). As page 0 is likely not mapped, it is going to trigger an Oops when the kernel will try to dereference NULL pointer to access to struct ib_qp's fields. In some rare cases, page 0 could be mapped by userspace, which could turn this bug to a vulnerability that could be exploited: the function pointers in struct ib_device will be under userspace total control. This was caught when using spatch (aka. coccinelle) to rewrite calls to ib_copy_{from,to}_udata(). Link: https://www.gitorious.org/opteya/ib-hw-nes-create-qp-null Link: https://www.gitorious.org/opteya/coccib/source/75ebf2c1033c64c1d81df13e4ae44ee99c989eba:ib_copy_udata.cocci Link: http://marc.info/?i=cover.1394485254.git.ydroneaud@opteya.com Cc: Signed-off-by: Yann Droneaud Signed-off-by: Roland Dreier --- drivers/infiniband/hw/nes/nes_verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index 8308e36..eb62461 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -1186,7 +1186,7 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd, nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num); kfree(nesqp->allocated_buffer); nes_debug(NES_DBG_QP, "ib_copy_from_udata() Failed \n"); - return NULL; + return ERR_PTR(-EFAULT); } if (req.user_wqe_buffers) { virt_wqs = 1; -- cgit v1.1 From 970918b32b030e9b3966e5ccb5f4a5a5b515a5b1 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Tue, 18 Feb 2014 09:54:27 -0300 Subject: IB/usnic: Remove '0x' when using %pa format %pa format already prints in hexadecimal format, so remove the '0x' annotation to avoid a double '0x0x' pattern. Signed-off-by: Fabio Estevam Signed-off-by: Roland Dreier --- drivers/infiniband/hw/usnic/usnic_uiom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c index 16755cd..801a1d6 100644 --- a/drivers/infiniband/hw/usnic/usnic_uiom.c +++ b/drivers/infiniband/hw/usnic/usnic_uiom.c @@ -286,7 +286,7 @@ iter_chunk: err = iommu_map(pd->domain, va_start, pa_start, size, flags); if (err) { - usnic_err("Failed to map va 0x%lx pa 0x%pa size 0x%zx with err %d\n", + usnic_err("Failed to map va 0x%lx pa %pa size 0x%zx with err %d\n", va_start, &pa_start, size, err); goto err_out; } -- cgit v1.1 From db498827ff62611c12c03f6d33bcc532d9fb497e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 13 Feb 2014 14:09:37 +0300 Subject: IB/qib: Remove duplicate check in get_a_ctxt() We already know "pusable" is non-zero, no need to check again. Signed-off-by: Dan Carpenter Signed-off-by: Roland Dreier --- drivers/infiniband/hw/qib/qib_file_ops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c index 2023cd6..b15e34e 100644 --- a/drivers/infiniband/hw/qib/qib_file_ops.c +++ b/drivers/infiniband/hw/qib/qib_file_ops.c @@ -1459,7 +1459,7 @@ static int get_a_ctxt(struct file *fp, const struct qib_user_info *uinfo, cused++; else cfree++; - if (pusable && cfree && cused < inuse) { + if (cfree && cused < inuse) { udd = dd; inuse = cused; } -- cgit v1.1 From 349850f0a9188cb87b4ef6c01c057cb42cca478e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 6 Feb 2014 15:48:23 +0300 Subject: RDMA/nes: Clean up a condition We don't need to test "ret" twice and also the white space is messed up. Signed-off-by: Dan Carpenter Reviewed-by: Tatyana Nikolova Signed-off-by: Roland Dreier --- drivers/infiniband/hw/nes/nes_verbs.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index 797d887..9722eee 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -3136,9 +3136,7 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, " original_last_aeq = 0x%04X. last_aeq = 0x%04X.\n", nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount), original_last_aeq, nesqp->last_aeq); - if ((!ret) || - ((original_last_aeq != NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE) && - (ret))) { + if (!ret || original_last_aeq != NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE) { if (dont_wait) { if (nesqp->cm_id && nesqp->hw_tcp_state != 0) { nes_debug(NES_DBG_MOD_QP, "QP%u Queuing fake disconnect for QP refcount (%d)," -- cgit v1.1 From bc1b04ab34a1485339571242cb0fbad823835685 Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Wed, 19 Feb 2014 15:05:16 -0500 Subject: RDMA/ocrdma: Fix compiler warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drivers/infiniband/hw/ocrdma/ocrdma_verbs.c: In function ‘_ocrdma_modify_qp’: drivers/infiniband/hw/ocrdma/ocrdma_verbs.c:1299:31: error: ‘old_qps’ may be used uninitialized in this function [-Werror=maybe-uninitialized] status = ocrdma_mbx_modify_qp(dev, qp, attr, attr_mask, old_qps); ocrdma_mbx_modify_qp() (and subsequent calls) doesn't appear to use old_qps so it doesn't need to be passed on. Removing the variable results in the warning going away. Signed-off-by: Prarit Bhargava Acked-by: Devesh Sharma (Devesh.sharma@emulex.com) Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/ocrdma_hw.c | 8 +++----- drivers/infiniband/hw/ocrdma/ocrdma_hw.h | 3 +-- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 2 +- 3 files changed, 5 insertions(+), 8 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index 1664d64..ac3fbf2 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -2127,8 +2127,7 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp, static int ocrdma_set_qp_params(struct ocrdma_qp *qp, struct ocrdma_modify_qp *cmd, - struct ib_qp_attr *attrs, int attr_mask, - enum ib_qp_state old_qps) + struct ib_qp_attr *attrs, int attr_mask) { int status = 0; @@ -2233,8 +2232,7 @@ pmtu_err: } int ocrdma_mbx_modify_qp(struct ocrdma_dev *dev, struct ocrdma_qp *qp, - struct ib_qp_attr *attrs, int attr_mask, - enum ib_qp_state old_qps) + struct ib_qp_attr *attrs, int attr_mask) { int status = -ENOMEM; struct ocrdma_modify_qp *cmd; @@ -2257,7 +2255,7 @@ int ocrdma_mbx_modify_qp(struct ocrdma_dev *dev, struct ocrdma_qp *qp, OCRDMA_QP_PARAMS_STATE_MASK; } - status = ocrdma_set_qp_params(qp, cmd, attrs, attr_mask, old_qps); + status = ocrdma_set_qp_params(qp, cmd, attrs, attr_mask); if (status) goto mbx_err; status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd); diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.h b/drivers/infiniband/hw/ocrdma/ocrdma_hw.h index 82fe332..db3d55f 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.h @@ -112,8 +112,7 @@ int ocrdma_mbx_create_qp(struct ocrdma_qp *, struct ib_qp_init_attr *attrs, u8 enable_dpp_cq, u16 dpp_cq_id, u16 *dpp_offset, u16 *dpp_credit_lmt); int ocrdma_mbx_modify_qp(struct ocrdma_dev *, struct ocrdma_qp *, - struct ib_qp_attr *attrs, int attr_mask, - enum ib_qp_state old_qps); + struct ib_qp_attr *attrs, int attr_mask); int ocrdma_mbx_query_qp(struct ocrdma_dev *, struct ocrdma_qp *, struct ocrdma_qp_params *param); int ocrdma_mbx_destroy_qp(struct ocrdma_dev *, struct ocrdma_qp *); diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index e0cc201..f1108eb 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -1296,7 +1296,7 @@ int _ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, */ if (status < 0) return status; - status = ocrdma_mbx_modify_qp(dev, qp, attr, attr_mask, old_qps); + status = ocrdma_mbx_modify_qp(dev, qp, attr, attr_mask); if (!status && attr_mask & IB_QP_STATE && attr->qp_state == IB_QPS_RTR) ocrdma_flush_rq_db(qp); -- cgit v1.1 From 0e9855dbf43a9cd31df5f5f61a18e031ac1c4a82 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 10 Mar 2014 10:33:05 +0100 Subject: IB/mlx4: Fix a sparse endianness warning Fix the following warning for the mlx4 driver: $ make M=drivers/infiniband C=2 CF=-D__CHECK_ENDIAN__ drivers/infiniband/hw/mlx4/qp.c:1885:31: warning: restricted __be16 degrades to integer Signed-off-by: Bart Van Assche Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/qp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index d8f4d1f..7499325 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1882,7 +1882,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, return err; } - if (ah->av.eth.vlan != 0xffff) { + if (ah->av.eth.vlan != cpu_to_be16(0xffff)) { vlan = be16_to_cpu(ah->av.eth.vlan) & 0x0fff; is_vlan = 1; } -- cgit v1.1 From ff1706f4feb8e0e1a2e56a8dd57e17a4b45649b5 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 19 Oct 2013 12:14:12 +0300 Subject: RDMA/cxgb4: Fix underflows in c4iw_create_qp() These sizes should be unsigned so we don't allow negative values and have underflow bugs. These can come from the user so there may be security implications, but I have not tested this. Signed-off-by: Dan Carpenter Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/qp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index 5829367..72ea152 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -1533,7 +1533,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, struct c4iw_cq *schp; struct c4iw_cq *rchp; struct c4iw_create_qp_resp uresp; - int sqsize, rqsize; + unsigned int sqsize, rqsize; struct c4iw_ucontext *ucontext; int ret; struct c4iw_mm_entry *mm1, *mm2, *mm3, *mm4, *mm5 = NULL; -- cgit v1.1 From e24a72a3302a638d4c6e77f0b40c45cc61c3f089 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 19 Oct 2013 12:14:35 +0300 Subject: RDMA/cxgb4: Fix four byte info leak in c4iw_create_cq() There is a four byte hole at the end of the "uresp" struct after the ->qid_mask member. Signed-off-by: Dan Carpenter Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/cq.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index 88de3aa..e436ead 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -930,6 +930,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries, if (!mm2) goto err4; + memset(&uresp, 0, sizeof(uresp)); uresp.qid_mask = rhp->rdev.cqmask; uresp.cqid = chp->cq.cqid; uresp.size = chp->cq.size; -- cgit v1.1 From ffd435924c86de055d33fe59941841819eef9f6a Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Wed, 19 Mar 2014 17:44:38 +0530 Subject: RDMA/cxgb4: Cap CQ size at T4_MAX_IQ_SIZE Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/cq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index e436ead..906119f 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -881,7 +881,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries, /* * Make actual HW queue 2x to avoid cdix_inc overflows. */ - hwentries = entries * 2; + hwentries = min(entries * 2, T4_MAX_IQ_SIZE); /* * Make HW queue at least 64 entries so GTS updates aren't too -- cgit v1.1 From f8e819081f797df355cffbdedb9301ea50ae76b2 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Wed, 19 Mar 2014 17:44:39 +0530 Subject: RDMA/cxgb4: Allow loopback connections find_route() must treat loopback as a valid egress interface. Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/cm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index d286bde..360807e 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -400,7 +400,8 @@ static struct dst_entry *find_route(struct c4iw_dev *dev, __be32 local_ip, n = dst_neigh_lookup(&rt->dst, &peer_ip); if (!n) return NULL; - if (!our_interface(dev, n->dev)) { + if (!our_interface(dev, n->dev) && + !(n->dev->flags & IFF_LOOPBACK)) { dst_release(&rt->dst); return NULL; } -- cgit v1.1 From ebf00060c33b9d0946384fa6f440df7ea35a569e Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Wed, 19 Mar 2014 17:44:40 +0530 Subject: RDMA/cxgb4: Always release neigh entry Always release the neigh entry in rx_pkt(). Based on original work by Santosh Rastapur . Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/cm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 360807e..2b2af96 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -3347,13 +3347,13 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb) pi = (struct port_info *)netdev_priv(pdev); tx_chan = cxgb4_port_chan(pdev); } + neigh_release(neigh); if (!e) { pr_err("%s - failed to allocate l2t entry!\n", __func__); goto free_dst; } - neigh_release(neigh); step = dev->rdev.lldi.nrxq / dev->rdev.lldi.nchan; rss_qid = dev->rdev.lldi.rxq_ids[pi->port_id * step]; window = (__force u16) htons((__force u16)tcph->window); -- cgit v1.1 From 8a9c399eeee8c2d99e22b975f6023001a1fde88f Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Wed, 19 Mar 2014 17:44:42 +0530 Subject: RDMA/cxgb4: Fix incorrect BUG_ON conditions Based on original work from Jay Hernandez Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/cq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index 906119f..d6a7db2 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -603,7 +603,7 @@ proc_cqe: */ if (SQ_TYPE(hw_cqe)) { int idx = CQE_WRID_SQ_IDX(hw_cqe); - BUG_ON(idx > wq->sq.size); + BUG_ON(idx >= wq->sq.size); /* * Account for any unsignaled completions completed by @@ -617,7 +617,7 @@ proc_cqe: wq->sq.in_use -= wq->sq.size + idx - wq->sq.cidx; else wq->sq.in_use -= idx - wq->sq.cidx; - BUG_ON(wq->sq.in_use < 0 && wq->sq.in_use < wq->sq.size); + BUG_ON(wq->sq.in_use <= 0 && wq->sq.in_use >= wq->sq.size); wq->sq.cidx = (uint16_t)idx; PDBG("%s completing sq idx %u\n", __func__, wq->sq.cidx); -- cgit v1.1 From ba32de9d8d8173a1d6dd1ed608c519d5d0a623bb Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Wed, 19 Mar 2014 17:44:43 +0530 Subject: RDMA/cxgb4: Mind the sq_sig_all/sq_sig_type QP attributes Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 1 + drivers/infiniband/hw/cxgb4/qp.c | 6 ++++-- 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 23eaeab..b810d2a 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -441,6 +441,7 @@ struct c4iw_qp { atomic_t refcnt; wait_queue_head_t wait; struct timer_list timer; + int sq_sig_all; }; static inline struct c4iw_qp *to_c4iw_qp(struct ib_qp *ibqp) diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index 72ea152..723ad290 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -675,7 +675,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, fw_flags = 0; if (wr->send_flags & IB_SEND_SOLICITED) fw_flags |= FW_RI_SOLICITED_EVENT_FLAG; - if (wr->send_flags & IB_SEND_SIGNALED) + if (wr->send_flags & IB_SEND_SIGNALED || qhp->sq_sig_all) fw_flags |= FW_RI_COMPLETION_FLAG; swsqe = &qhp->wq.sq.sw_sq[qhp->wq.sq.pidx]; switch (wr->opcode) { @@ -736,7 +736,8 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, } swsqe->idx = qhp->wq.sq.pidx; swsqe->complete = 0; - swsqe->signaled = (wr->send_flags & IB_SEND_SIGNALED); + swsqe->signaled = (wr->send_flags & IB_SEND_SIGNALED) || + qhp->sq_sig_all; swsqe->flushed = 0; swsqe->wr_id = wr->wr_id; @@ -1605,6 +1606,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, qhp->attr.enable_bind = 1; qhp->attr.max_ord = 1; qhp->attr.max_ird = 1; + qhp->sq_sig_all = attrs->sq_sig_type == IB_SIGNAL_ALL_WR; spin_lock_init(&qhp->lock); mutex_init(&qhp->mutex); init_waitqueue_head(&qhp->wait); -- cgit v1.1 From df2d5130ece9118591c2f3fbf0ee4a79183b4ccc Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Wed, 19 Mar 2014 17:44:44 +0530 Subject: RDMA/cxgb4: Default peer2peer mode to 1 Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/cm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 2b2af96..e1fc5c5 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -98,9 +98,9 @@ int c4iw_debug; module_param(c4iw_debug, int, 0644); MODULE_PARM_DESC(c4iw_debug, "Enable debug logging (default=0)"); -static int peer2peer; +static int peer2peer = 1; module_param(peer2peer, int, 0644); -MODULE_PARM_DESC(peer2peer, "Support peer2peer ULPs (default=0)"); +MODULE_PARM_DESC(peer2peer, "Support peer2peer ULPs (default=1)"); static int p2p_type = FW_RI_INIT_P2PTYPE_READ_REQ; module_param(p2p_type, int, 0644); -- cgit v1.1 From eda6d1d1b7932f90d55583f8f3835dd7d6b32543 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Wed, 19 Mar 2014 17:44:45 +0530 Subject: RDMA/cxgb4: Save the correct map length for fast_reg_page_lists We cannot save the mapped length using the rdma max_page_list_len field of the ib_fast_reg_page_list struct because the core code uses it. This results in an incorrect unmap of the page list in c4iw_free_fastreg_pbl(). I found this with dma mapping debugging enabled in the kernel. The fix is to save the length in the c4iw_fr_page_list struct. Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 1 + drivers/infiniband/hw/cxgb4/mem.c | 12 ++++++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index b810d2a..a1e8f13 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -369,6 +369,7 @@ struct c4iw_fr_page_list { DEFINE_DMA_UNMAP_ADDR(mapping); dma_addr_t dma_addr; struct c4iw_dev *dev; + int pll_len; }; static inline struct c4iw_fr_page_list *to_c4iw_fr_page_list( diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index 41b1195..22a2e3e 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -903,7 +903,11 @@ struct ib_fast_reg_page_list *c4iw_alloc_fastreg_pbl(struct ib_device *device, dma_unmap_addr_set(c4pl, mapping, dma_addr); c4pl->dma_addr = dma_addr; c4pl->dev = dev; - c4pl->ibpl.max_page_list_len = pll_len; + c4pl->pll_len = pll_len; + + PDBG("%s c4pl %p pll_len %u page_list %p dma_addr %pad\n", + __func__, c4pl, c4pl->pll_len, c4pl->ibpl.page_list, + &c4pl->dma_addr); return &c4pl->ibpl; } @@ -912,8 +916,12 @@ void c4iw_free_fastreg_pbl(struct ib_fast_reg_page_list *ibpl) { struct c4iw_fr_page_list *c4pl = to_c4iw_fr_page_list(ibpl); + PDBG("%s c4pl %p pll_len %u page_list %p dma_addr %pad\n", + __func__, c4pl, c4pl->pll_len, c4pl->ibpl.page_list, + &c4pl->dma_addr); + dma_free_coherent(&c4pl->dev->rdev.lldi.pdev->dev, - c4pl->ibpl.max_page_list_len, + c4pl->pll_len, c4pl->ibpl.page_list, dma_unmap_addr(c4pl, mapping)); kfree(c4pl); } -- cgit v1.1 From 49c0e2414b20d868cf006addf14152570aef2605 Mon Sep 17 00:00:00 2001 From: CQ Tang Date: Thu, 30 Jan 2014 17:36:00 -0500 Subject: IB/qib: Change SDMA progression mode depending on single- or multi-rail Improve performance by changing the behavour of the driver when all SDMA descriptors are in use, and the processes adding new descriptors are single- or multi-rail. For single-rail processes, the driver will block the call and finish posting all SDMA descriptors onto the hardware queue before returning back to PSM. Repeated kernel calls are slower than blocking. For multi-rail processes, the driver will return to PSM as quick as possible so PSM can feed packets to other rail. If all hardware queues are full, PSM will buffer the remaining SDMA descriptors until notified by interrupt that space is available. This patch builds a red-black tree to track the number rails opened by a particular PID. If the number is more than one, it is a multi-rail PSM process, otherwise, it is a single-rail process. Reviewed-by: Dean Luick Reviewed-by: John A Gregor Reviewed-by: Mitko Haralanov Signed-off-by: CQ Tang Signed-off-by: Mike Marciniszyn Signed-off-by: Roland Dreier --- drivers/infiniband/hw/qib/qib_user_sdma.c | 136 +++++++++++++++++++++++++++--- 1 file changed, 123 insertions(+), 13 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/qib/qib_user_sdma.c b/drivers/infiniband/hw/qib/qib_user_sdma.c index 165aee2..d2806ca 100644 --- a/drivers/infiniband/hw/qib/qib_user_sdma.c +++ b/drivers/infiniband/hw/qib/qib_user_sdma.c @@ -52,6 +52,17 @@ /* attempt to drain the queue for 5secs */ #define QIB_USER_SDMA_DRAIN_TIMEOUT 500 +/* + * track how many times a process open this driver. + */ +static struct rb_root qib_user_sdma_rb_root = RB_ROOT; + +struct qib_user_sdma_rb_node { + struct rb_node node; + int refcount; + pid_t pid; +}; + struct qib_user_sdma_pkt { struct list_head list; /* list element */ @@ -120,15 +131,60 @@ struct qib_user_sdma_queue { /* dma page table */ struct rb_root dma_pages_root; + struct qib_user_sdma_rb_node *sdma_rb_node; + /* protect everything above... */ struct mutex lock; }; +static struct qib_user_sdma_rb_node * +qib_user_sdma_rb_search(struct rb_root *root, pid_t pid) +{ + struct qib_user_sdma_rb_node *sdma_rb_node; + struct rb_node *node = root->rb_node; + + while (node) { + sdma_rb_node = container_of(node, + struct qib_user_sdma_rb_node, node); + if (pid < sdma_rb_node->pid) + node = node->rb_left; + else if (pid > sdma_rb_node->pid) + node = node->rb_right; + else + return sdma_rb_node; + } + return NULL; +} + +static int +qib_user_sdma_rb_insert(struct rb_root *root, struct qib_user_sdma_rb_node *new) +{ + struct rb_node **node = &(root->rb_node); + struct rb_node *parent = NULL; + struct qib_user_sdma_rb_node *got; + + while (*node) { + got = container_of(*node, struct qib_user_sdma_rb_node, node); + parent = *node; + if (new->pid < got->pid) + node = &((*node)->rb_left); + else if (new->pid > got->pid) + node = &((*node)->rb_right); + else + return 0; + } + + rb_link_node(&new->node, parent, node); + rb_insert_color(&new->node, root); + return 1; +} + struct qib_user_sdma_queue * qib_user_sdma_queue_create(struct device *dev, int unit, int ctxt, int sctxt) { struct qib_user_sdma_queue *pq = kmalloc(sizeof(struct qib_user_sdma_queue), GFP_KERNEL); + struct qib_user_sdma_rb_node *sdma_rb_node; if (!pq) goto done; @@ -138,6 +194,7 @@ qib_user_sdma_queue_create(struct device *dev, int unit, int ctxt, int sctxt) pq->num_pending = 0; pq->num_sending = 0; pq->added = 0; + pq->sdma_rb_node = NULL; INIT_LIST_HEAD(&pq->sent); spin_lock_init(&pq->sent_lock); @@ -163,8 +220,30 @@ qib_user_sdma_queue_create(struct device *dev, int unit, int ctxt, int sctxt) pq->dma_pages_root = RB_ROOT; + sdma_rb_node = qib_user_sdma_rb_search(&qib_user_sdma_rb_root, + current->pid); + if (sdma_rb_node) { + sdma_rb_node->refcount++; + } else { + int ret; + sdma_rb_node = kmalloc(sizeof( + struct qib_user_sdma_rb_node), GFP_KERNEL); + if (!sdma_rb_node) + goto err_rb; + + sdma_rb_node->refcount = 1; + sdma_rb_node->pid = current->pid; + + ret = qib_user_sdma_rb_insert(&qib_user_sdma_rb_root, + sdma_rb_node); + BUG_ON(ret == 0); + } + pq->sdma_rb_node = sdma_rb_node; + goto done; +err_rb: + dma_pool_destroy(pq->header_cache); err_slab: kmem_cache_destroy(pq->pkt_slab); err_kfree: @@ -1020,8 +1099,13 @@ void qib_user_sdma_queue_destroy(struct qib_user_sdma_queue *pq) if (!pq) return; - kmem_cache_destroy(pq->pkt_slab); + pq->sdma_rb_node->refcount--; + if (pq->sdma_rb_node->refcount == 0) { + rb_erase(&pq->sdma_rb_node->node, &qib_user_sdma_rb_root); + kfree(pq->sdma_rb_node); + } dma_pool_destroy(pq->header_cache); + kmem_cache_destroy(pq->pkt_slab); kfree(pq); } @@ -1241,26 +1325,52 @@ static int qib_user_sdma_push_pkts(struct qib_pportdata *ppd, struct qib_user_sdma_queue *pq, struct list_head *pktlist, int count) { - int ret = 0; unsigned long flags; if (unlikely(!(ppd->lflags & QIBL_LINKACTIVE))) return -ECOMM; - spin_lock_irqsave(&ppd->sdma_lock, flags); - - if (unlikely(!__qib_sdma_running(ppd))) { - ret = -ECOMM; - goto unlock; + /* non-blocking mode */ + if (pq->sdma_rb_node->refcount > 1) { + spin_lock_irqsave(&ppd->sdma_lock, flags); + if (unlikely(!__qib_sdma_running(ppd))) { + spin_unlock_irqrestore(&ppd->sdma_lock, flags); + return -ECOMM; + } + pq->num_pending += count; + list_splice_tail_init(pktlist, &ppd->sdma_userpending); + qib_user_sdma_send_desc(ppd, &ppd->sdma_userpending); + spin_unlock_irqrestore(&ppd->sdma_lock, flags); + return 0; } + /* In this case, descriptors from this process are not + * linked to ppd pending queue, interrupt handler + * won't update this process, it is OK to directly + * modify without sdma lock. + */ + + pq->num_pending += count; - list_splice_tail_init(pktlist, &ppd->sdma_userpending); - qib_user_sdma_send_desc(ppd, &ppd->sdma_userpending); + /* + * Blocking mode for single rail process, we must + * release/regain sdma_lock to give other process + * chance to make progress. This is important for + * performance. + */ + do { + spin_lock_irqsave(&ppd->sdma_lock, flags); + if (unlikely(!__qib_sdma_running(ppd))) { + spin_unlock_irqrestore(&ppd->sdma_lock, flags); + return -ECOMM; + } + qib_user_sdma_send_desc(ppd, pktlist); + if (!list_empty(pktlist)) + qib_sdma_make_progress(ppd); + spin_unlock_irqrestore(&ppd->sdma_lock, flags); + } while (!list_empty(pktlist)); -unlock: - spin_unlock_irqrestore(&ppd->sdma_lock, flags); - return ret; + return 0; } int qib_user_sdma_writev(struct qib_ctxtdata *rcd, @@ -1290,7 +1400,7 @@ int qib_user_sdma_writev(struct qib_ctxtdata *rcd, qib_user_sdma_queue_clean(ppd, pq); while (dim) { - int mxp = 8; + int mxp = 1; int ndesc = 0; ret = qib_user_sdma_queue_pkts(dd, ppd, pq, -- cgit v1.1 From 186f8ba062f796221d51077342f3ba5202838e9f Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 30 Jan 2014 15:12:31 +0300 Subject: IB/qib: Cleanup qib_register_observer() Returning directly is easier to read than do-nothing gotos. Remove the duplicative check on "olp" and pull the code in one indent level. Signed-off-by: Dan Carpenter Acked-by: Mike Marciniszyn Signed-off-by: Roland Dreier --- drivers/infiniband/hw/qib/qib_diag.c | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/qib/qib_diag.c b/drivers/infiniband/hw/qib/qib_diag.c index 07f9030..5dfda4c 100644 --- a/drivers/infiniband/hw/qib/qib_diag.c +++ b/drivers/infiniband/hw/qib/qib_diag.c @@ -694,28 +694,23 @@ int qib_register_observer(struct qib_devdata *dd, const struct diag_observer *op) { struct diag_observer_list_elt *olp; - int ret = -EINVAL; + unsigned long flags; if (!dd || !op) - goto bail; - ret = -ENOMEM; + return -EINVAL; olp = vmalloc(sizeof *olp); if (!olp) { pr_err("vmalloc for observer failed\n"); - goto bail; + return -ENOMEM; } - if (olp) { - unsigned long flags; - spin_lock_irqsave(&dd->qib_diag_trans_lock, flags); - olp->op = op; - olp->next = dd->diag_observer_list; - dd->diag_observer_list = olp; - spin_unlock_irqrestore(&dd->qib_diag_trans_lock, flags); - ret = 0; - } -bail: - return ret; + spin_lock_irqsave(&dd->qib_diag_trans_lock, flags); + olp->op = op; + olp->next = dd->diag_observer_list; + dd->diag_observer_list = olp; + spin_unlock_irqrestore(&dd->qib_diag_trans_lock, flags); + + return 0; } /* Remove all registered observers when device is closed */ -- cgit v1.1 From dbb084cc5f52152f53b5fd22fa76b9bf69904594 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Fri, 21 Mar 2014 20:40:30 +0530 Subject: RDMA/cxgb4: Don't leak skb in c4iw_uld_rx_handler() Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/device.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index 4a03385..982f815 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -897,11 +897,13 @@ static int c4iw_uld_rx_handler(void *handle, const __be64 *rsp, } opcode = *(u8 *)rsp; - if (c4iw_handlers[opcode]) + if (c4iw_handlers[opcode]) { c4iw_handlers[opcode](dev, skb); - else + } else { pr_info("%s no handler opcode 0x%x...\n", __func__, opcode); + kfree_skb(skb); + } return 0; nomem: -- cgit v1.1 From 1ce1d471acb7ad8e8b8e3a2972de9fbb5f2be79a Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Fri, 21 Mar 2014 20:40:31 +0530 Subject: RDMA/cxgb4: Fix possible memory leak in RX_PKT processing If cxgb4_ofld_send() returns < 0, then send_fw_pass_open_req() must free the request skb and the saved skb with the tcp header. Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/cm.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index e1fc5c5..773d010 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -3204,6 +3204,7 @@ static void send_fw_pass_open_req(struct c4iw_dev *dev, struct sk_buff *skb, struct sk_buff *req_skb; struct fw_ofld_connection_wr *req; struct cpl_pass_accept_req *cpl = cplhdr(skb); + int ret; req_skb = alloc_skb(sizeof(struct fw_ofld_connection_wr), GFP_KERNEL); req = (struct fw_ofld_connection_wr *)__skb_put(req_skb, sizeof(*req)); @@ -3240,7 +3241,13 @@ static void send_fw_pass_open_req(struct c4iw_dev *dev, struct sk_buff *skb, req->cookie = (unsigned long)skb; set_wr_txq(req_skb, CPL_PRIORITY_CONTROL, port_id); - cxgb4_ofld_send(dev->rdev.lldi.ports[0], req_skb); + ret = cxgb4_ofld_send(dev->rdev.lldi.ports[0], req_skb); + if (ret < 0) { + pr_err("%s - cxgb4_ofld_send error %d - dropping\n", __func__, + ret); + kfree_skb(skb); + kfree_skb(req_skb); + } } /* -- cgit v1.1 From 70b9c66053ecadde421658b8ec808c981f2eef11 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Fri, 21 Mar 2014 20:40:32 +0530 Subject: RDMA/cxgb4: Ignore read reponse type 1 CQEs These are generated by HW in some error cases and need to be silently discarded. Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/cq.c | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index d6a7db2..ce468e5 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -365,8 +365,14 @@ void c4iw_flush_hw_cq(struct c4iw_cq *chp) if (CQE_OPCODE(hw_cqe) == FW_RI_READ_RESP) { - /* - * drop peer2peer RTR reads. + /* If we have reached here because of async + * event or other error, and have egress error + * then drop + */ + if (CQE_TYPE(hw_cqe) == 1) + goto next_cqe; + + /* drop peer2peer RTR reads. */ if (CQE_WRID_STAG(hw_cqe) == 1) goto next_cqe; @@ -511,8 +517,18 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe, */ if (RQ_TYPE(hw_cqe) && (CQE_OPCODE(hw_cqe) == FW_RI_READ_RESP)) { - /* - * If this is an unsolicited read response, then the read + /* If we have reached here because of async + * event or other error, and have egress error + * then drop + */ + if (CQE_TYPE(hw_cqe) == 1) { + if (CQE_STATUS(hw_cqe)) + t4_set_wq_in_error(wq); + ret = -EAGAIN; + goto skip_cqe; + } + + /* If this is an unsolicited read response, then the read * was generated by the kernel driver as part of peer-2-peer * connection setup. So ignore the completion. */ -- cgit v1.1 From be13b2dff8c4e41846477b22cc5c164ea5a6ac2e Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Fri, 21 Mar 2014 20:40:33 +0530 Subject: RDMA/cxgb4: Connect_request_upcall fixes When processing an MPA Start Request, if the listening endpoint is DEAD, then abort the connection. If the IWCM returns an error, then we must abort the connection and release resources. Also abort_connection() should not post a CLOSE event, so clean that up too. Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/cm.c | 40 ++++++++++++++++++++++++---------------- 1 file changed, 24 insertions(+), 16 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 773d010..6bfef31 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -968,13 +968,14 @@ static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb) return 0; } -static void close_complete_upcall(struct c4iw_ep *ep) +static void close_complete_upcall(struct c4iw_ep *ep, int status) { struct iw_cm_event event; PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); memset(&event, 0, sizeof(event)); event.event = IW_CM_EVENT_CLOSE; + event.status = status; if (ep->com.cm_id) { PDBG("close complete delivered ep %p cm_id %p tid %u\n", ep, ep->com.cm_id, ep->hwtid); @@ -988,7 +989,6 @@ static void close_complete_upcall(struct c4iw_ep *ep) static int abort_connection(struct c4iw_ep *ep, struct sk_buff *skb, gfp_t gfp) { PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); - close_complete_upcall(ep); state_set(&ep->com, ABORTING); set_bit(ABORT_CONN, &ep->com.history); return send_abort(ep, skb, gfp); @@ -1067,9 +1067,10 @@ static void connect_reply_upcall(struct c4iw_ep *ep, int status) } } -static void connect_request_upcall(struct c4iw_ep *ep) +static int connect_request_upcall(struct c4iw_ep *ep) { struct iw_cm_event event; + int ret; PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); memset(&event, 0, sizeof(event)); @@ -1094,15 +1095,14 @@ static void connect_request_upcall(struct c4iw_ep *ep) event.private_data_len = ep->plen; event.private_data = ep->mpa_pkt + sizeof(struct mpa_message); } - if (state_read(&ep->parent_ep->com) != DEAD) { - c4iw_get_ep(&ep->com); - ep->parent_ep->com.cm_id->event_handler( - ep->parent_ep->com.cm_id, - &event); - } + c4iw_get_ep(&ep->com); + ret = ep->parent_ep->com.cm_id->event_handler(ep->parent_ep->com.cm_id, + &event); + if (ret) + c4iw_put_ep(&ep->com); set_bit(CONNREQ_UPCALL, &ep->com.history); c4iw_put_ep(&ep->parent_ep->com); - ep->parent_ep = NULL; + return ret; } static void established_upcall(struct c4iw_ep *ep) @@ -1401,7 +1401,6 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) return; PDBG("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__); - stop_ep_timer(ep); mpa = (struct mpa_message *) ep->mpa_pkt; /* @@ -1494,9 +1493,17 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) ep->mpa_attr.p2p_type); state_set(&ep->com, MPA_REQ_RCVD); + stop_ep_timer(ep); /* drive upcall */ - connect_request_upcall(ep); + mutex_lock(&ep->parent_ep->com.mutex); + if (ep->parent_ep->com.state != DEAD) { + if (connect_request_upcall(ep)) + abort_connection(ep, skb, GFP_KERNEL); + } else { + abort_connection(ep, skb, GFP_KERNEL); + } + mutex_unlock(&ep->parent_ep->com.mutex); return; } @@ -2247,7 +2254,7 @@ static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb) c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); } - close_complete_upcall(ep); + close_complete_upcall(ep, 0); __state_set(&ep->com, DEAD); release = 1; disconnect = 0; @@ -2426,7 +2433,7 @@ static int close_con_rpl(struct c4iw_dev *dev, struct sk_buff *skb) C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); } - close_complete_upcall(ep); + close_complete_upcall(ep, 0); __state_set(&ep->com, DEAD); release = 1; break; @@ -2981,7 +2988,7 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp) rdev = &ep->com.dev->rdev; if (c4iw_fatal_error(rdev)) { fatal = 1; - close_complete_upcall(ep); + close_complete_upcall(ep, -EIO); ep->com.state = DEAD; } switch (ep->com.state) { @@ -3023,7 +3030,7 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp) if (close) { if (abrupt) { set_bit(EP_DISC_ABORT, &ep->com.history); - close_complete_upcall(ep); + close_complete_upcall(ep, -ECONNRESET); ret = send_abort(ep, NULL, gfp); } else { set_bit(EP_DISC_CLOSE, &ep->com.history); @@ -3435,6 +3442,7 @@ static void process_timeout(struct c4iw_ep *ep) &attrs, 1); } __state_set(&ep->com, ABORTING); + close_complete_upcall(ep, -ETIMEDOUT); break; default: WARN(1, "%s unexpected state ep %p tid %u state %u\n", -- cgit v1.1 From 9c88aa003d26e9f1e9ea6e08511768c2ef666654 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Fri, 21 Mar 2014 20:40:34 +0530 Subject: RDMA/cxgb4: Update snd_seq when sending MPA messages Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/cm.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 6bfef31..a1bc41d 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -762,6 +762,7 @@ static void send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb, start_ep_timer(ep); state_set(&ep->com, MPA_REQ_SENT); ep->mpa_attr.initiator = 1; + ep->snd_seq += mpalen; return; } @@ -841,6 +842,7 @@ static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen) t4_set_arp_err_handler(skb, NULL, arp_failure_discard); BUG_ON(ep->mpa_skb); ep->mpa_skb = skb; + ep->snd_seq += mpalen; return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); } @@ -925,6 +927,7 @@ static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen) t4_set_arp_err_handler(skb, NULL, arp_failure_discard); ep->mpa_skb = skb; state_set(&ep->com, MPA_REP_SENT); + ep->snd_seq += mpalen; return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); } -- cgit v1.1 From 08e74c4b00c30c232d535ff368554959403d0432 Mon Sep 17 00:00:00 2001 From: Yann Droneaud Date: Mon, 10 Mar 2014 23:06:26 +0100 Subject: IB/mthca: Return an error on ib_copy_to_udata() failure In case of error when writing to userspace, the function mthca_create_cq() does not set an error code before following its error path. This patch sets the error code to -EFAULT when ib_copy_to_udata() fails. This was caught when using spatch (aka. coccinelle) to rewrite call to ib_copy_{from,to}_udata(). Link: https://www.gitorious.org/opteya/coccib/source/75ebf2c1033c64c1d81df13e4ae44ee99c989eba:ib_copy_udata.cocci Link: http://marc.info/?i=cover.1394485254.git.ydroneaud@opteya.com Cc: Signed-off-by: Yann Droneaud Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_provider.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 5b71d43..42dde06 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -695,6 +695,7 @@ static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries, if (context && ib_copy_to_udata(udata, &cq->cqn, sizeof (__u32))) { mthca_free_cq(to_mdev(ibdev), cq); + err = -EFAULT; goto err_free; } -- cgit v1.1 From 5bdb0f02add5994b0bc17494f4726925ca5d6ba1 Mon Sep 17 00:00:00 2001 From: Yann Droneaud Date: Mon, 10 Mar 2014 23:06:25 +0100 Subject: IB/ehca: Returns an error on ib_copy_to_udata() failure In case of error when writing to userspace, function ehca_create_cq() does not set an error code before following its error path. This patch sets the error code to -EFAULT when ib_copy_to_udata() fails. This was caught when using spatch (aka. coccinelle) to rewrite call to ib_copy_{from,to}_udata(). Link: https://www.gitorious.org/opteya/coccib/source/75ebf2c1033c64c1d81df13e4ae44ee99c989eba:ib_copy_udata.cocci Link: http://marc.info/?i=cover.1394485254.git.ydroneaud@opteya.com Cc: Signed-off-by: Yann Droneaud Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ehca/ehca_cq.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c index 212150c..8cc8375 100644 --- a/drivers/infiniband/hw/ehca/ehca_cq.c +++ b/drivers/infiniband/hw/ehca/ehca_cq.c @@ -283,6 +283,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector, (my_cq->galpas.user.fw_handle & (PAGE_SIZE - 1)); if (ib_copy_to_udata(udata, &resp, sizeof(resp))) { ehca_err(device, "Copy to udata failed."); + cq = ERR_PTR(-EFAULT); goto create_cq_exit4; } } -- cgit v1.1 From 3839d8ac1bac55922f6632e36040fdc0de374bba Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 28 Mar 2014 11:21:39 +0300 Subject: mlx4_core: Fix some indenting in mlx4_ib_add() The code was indented too far and also kernel style says we should have curly braces. Signed-off-by: Dan Carpenter Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/main.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index e81c554..6eb0d44 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -2056,8 +2056,9 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) err = mlx4_counter_alloc(ibdev->dev, &ibdev->counters[i]); if (err) ibdev->counters[i] = -1; - } else - ibdev->counters[i] = -1; + } else { + ibdev->counters[i] = -1; + } } mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) -- cgit v1.1 From 4661bd798f1b58ee2755bfa04003638345802680 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 28 Mar 2014 11:23:07 +0300 Subject: mlx4_core: Make buffer larger to avoid overflow warning My static checker complains that the sprintf() here can overflow. drivers/infiniband/hw/mlx4/main.c:1836 mlx4_ib_alloc_eqs() error: format string overflow. buf_size: 32 length: 69 This seems like a valid complaint. The "dev->pdev->bus->name" string can be 48 characters long. I just made the buffer 80 characters instead of 69 and I changed the sprintf() to snprintf(). Signed-off-by: Dan Carpenter Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 6eb0d44..20b4d7a 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1803,7 +1803,7 @@ static void init_pkeys(struct mlx4_ib_dev *ibdev) static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev) { - char name[32]; + char name[80]; int eq_per_port = 0; int added_eqs = 0; int total_eqs = 0; @@ -1833,8 +1833,8 @@ static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev) eq = 0; mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) { for (j = 0; j < eq_per_port; j++) { - sprintf(name, "mlx4-ib-%d-%d@%s", - i, j, dev->pdev->bus->name); + snprintf(name, sizeof(name), "mlx4-ib-%d-%d@%s", + i, j, dev->pdev->bus->name); /* Set IRQ for specific name (per ring) */ if (mlx4_assign_eq(dev, name, NULL, &ibdev->eq_table[eq])) { -- cgit v1.1 From 446bf432a9b084d9f3471eca309cc53fa434ccc7 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Fri, 28 Mar 2014 13:26:42 -0400 Subject: IB/qib: Remove ib_sg_dma_address() and ib_sg_dma_len() overloads Remove the overload for .dma_len and .dma_address The removal of these methods is compensated for by code changes to .map_sg to insure that the vanilla sg_dma_address() and sg_dma_len() will do the same thing as the equivalent former ib_sg_dma_address() and ib_sg_dma_len() calls into the drivers. Suggested-by: Bart Van Assche Reviewed-by: Dennis Dalessandro Tested-by: Vinod Kumar Signed-off-by: Mike Marciniszyn Signed-off-by: Roland Dreier --- drivers/infiniband/hw/qib/qib_dma.c | 21 ++++----------------- 1 file changed, 4 insertions(+), 17 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/qib/qib_dma.c b/drivers/infiniband/hw/qib/qib_dma.c index 2920bb3..59fe092 100644 --- a/drivers/infiniband/hw/qib/qib_dma.c +++ b/drivers/infiniband/hw/qib/qib_dma.c @@ -108,6 +108,10 @@ static int qib_map_sg(struct ib_device *dev, struct scatterlist *sgl, ret = 0; break; } + sg->dma_address = addr + sg->offset; +#ifdef CONFIG_NEED_SG_DMA_LENGTH + sg->dma_length = sg->length; +#endif } return ret; } @@ -119,21 +123,6 @@ static void qib_unmap_sg(struct ib_device *dev, BUG_ON(!valid_dma_direction(direction)); } -static u64 qib_sg_dma_address(struct ib_device *dev, struct scatterlist *sg) -{ - u64 addr = (u64) page_address(sg_page(sg)); - - if (addr) - addr += sg->offset; - return addr; -} - -static unsigned int qib_sg_dma_len(struct ib_device *dev, - struct scatterlist *sg) -{ - return sg->length; -} - static void qib_sync_single_for_cpu(struct ib_device *dev, u64 addr, size_t size, enum dma_data_direction dir) { @@ -173,8 +162,6 @@ struct ib_dma_mapping_ops qib_dma_mapping_ops = { .unmap_page = qib_dma_unmap_page, .map_sg = qib_map_sg, .unmap_sg = qib_unmap_sg, - .dma_address = qib_sg_dma_address, - .dma_len = qib_sg_dma_len, .sync_single_for_cpu = qib_sync_single_for_cpu, .sync_single_for_device = qib_sync_single_for_device, .alloc_coherent = qib_dma_alloc_coherent, -- cgit v1.1 From 49c5c27e05c915f0da4c0e756da313cf09ae2c55 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Fri, 28 Mar 2014 15:04:43 -0400 Subject: IB/ipath: Remove ib_sg_dma_address() and ib_sg_dma_len() overloads The removal of these methods is compensated for by code changes to .map_sg to insure that the vanilla sg_dma_address() and sg_dma_len() will do the same thing as the equivalent former ib_sg_dma_address() and ib_sg_dma_len() calls into the drivers. The introduction of this patch required that the struct ipath_dma_mapping_ops be converted to a C99 initializer. Suggested-by: Bart Van Assche Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_dma.c | 43 ++++++++++++--------------------- 1 file changed, 15 insertions(+), 28 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/ipath/ipath_dma.c b/drivers/infiniband/hw/ipath/ipath_dma.c index 644c2c7..123a8c0 100644 --- a/drivers/infiniband/hw/ipath/ipath_dma.c +++ b/drivers/infiniband/hw/ipath/ipath_dma.c @@ -115,6 +115,10 @@ static int ipath_map_sg(struct ib_device *dev, struct scatterlist *sgl, ret = 0; break; } + sg->dma_address = addr + sg->offset; +#ifdef CONFIG_NEED_SG_DMA_LENGTH + sg->dma_length = sg->length; +#endif } return ret; } @@ -126,21 +130,6 @@ static void ipath_unmap_sg(struct ib_device *dev, BUG_ON(!valid_dma_direction(direction)); } -static u64 ipath_sg_dma_address(struct ib_device *dev, struct scatterlist *sg) -{ - u64 addr = (u64) page_address(sg_page(sg)); - - if (addr) - addr += sg->offset; - return addr; -} - -static unsigned int ipath_sg_dma_len(struct ib_device *dev, - struct scatterlist *sg) -{ - return sg->length; -} - static void ipath_sync_single_for_cpu(struct ib_device *dev, u64 addr, size_t size, @@ -176,17 +165,15 @@ static void ipath_dma_free_coherent(struct ib_device *dev, size_t size, } struct ib_dma_mapping_ops ipath_dma_mapping_ops = { - ipath_mapping_error, - ipath_dma_map_single, - ipath_dma_unmap_single, - ipath_dma_map_page, - ipath_dma_unmap_page, - ipath_map_sg, - ipath_unmap_sg, - ipath_sg_dma_address, - ipath_sg_dma_len, - ipath_sync_single_for_cpu, - ipath_sync_single_for_device, - ipath_dma_alloc_coherent, - ipath_dma_free_coherent + .mapping_error = ipath_mapping_error, + .map_single = ipath_dma_map_single, + .unmap_single = ipath_dma_unmap_single, + .map_page = ipath_dma_map_page, + .unmap_page = ipath_dma_unmap_page, + .map_sg = ipath_map_sg, + .unmap_sg = ipath_unmap_sg, + .sync_single_for_cpu = ipath_sync_single_for_cpu, + .sync_single_for_device = ipath_sync_single_for_device, + .alloc_coherent = ipath_dma_alloc_coherent, + .free_coherent = ipath_dma_free_coherent }; -- cgit v1.1 From f3585a6ae31cb0f0ebda53b161fbed7c9a679572 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Fri, 28 Mar 2014 13:26:53 -0400 Subject: IB/ehca: Remove ib_sg_dma_address() and ib_sg_dma_len() overloads These methods appear to only mimic the sg_dma_address() and sg_dma_len() behavior. They can be safely removed. Suggested-by: Bart Van Assche Cc: Bart Van Assche Cc: Hoang-Nam Nguyen Cc: Christoph Raisch Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ehca/ehca_mrmw.c | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c index bcfb0c1..65873ee 100644 --- a/drivers/infiniband/hw/ehca/ehca_mrmw.c +++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c @@ -2591,16 +2591,6 @@ static void ehca_dma_unmap_sg(struct ib_device *dev, struct scatterlist *sg, /* This is only a stub; nothing to be done here */ } -static u64 ehca_dma_address(struct ib_device *dev, struct scatterlist *sg) -{ - return sg->dma_address; -} - -static unsigned int ehca_dma_len(struct ib_device *dev, struct scatterlist *sg) -{ - return sg->length; -} - static void ehca_dma_sync_single_for_cpu(struct ib_device *dev, u64 addr, size_t size, enum dma_data_direction dir) @@ -2653,8 +2643,6 @@ struct ib_dma_mapping_ops ehca_dma_mapping_ops = { .unmap_page = ehca_dma_unmap_page, .map_sg = ehca_dma_map_sg, .unmap_sg = ehca_dma_unmap_sg, - .dma_address = ehca_dma_address, - .dma_len = ehca_dma_len, .sync_single_for_cpu = ehca_dma_sync_single_for_cpu, .sync_single_for_device = ehca_dma_sync_single_for_device, .alloc_coherent = ehca_dma_alloc_coherent, -- cgit v1.1 From a7db89eb89cd6a444b16fdd602e818eed34d8222 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Fri, 21 Mar 2014 20:40:35 +0530 Subject: RDMA/cxgb4: Lock around accept/reject downcalls There is a race between ULP threads doing an accept/reject, and the ingress processing thread handling close/abort for the same connection. The accept/reject path needs to hold the lock to serialize these paths. Signed-off-by: Steve Wise [ Fold in locking fix found by Dan Carpenter . - Roland ] Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/cm.c | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index a1bc41d..6836d11 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -760,7 +760,7 @@ static void send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb, ep->mpa_skb = skb; c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); start_ep_timer(ep); - state_set(&ep->com, MPA_REQ_SENT); + __state_set(&ep->com, MPA_REQ_SENT); ep->mpa_attr.initiator = 1; ep->snd_seq += mpalen; return; @@ -926,7 +926,7 @@ static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen) skb_get(skb); t4_set_arp_err_handler(skb, NULL, arp_failure_discard); ep->mpa_skb = skb; - state_set(&ep->com, MPA_REP_SENT); + __state_set(&ep->com, MPA_REP_SENT); ep->snd_seq += mpalen; return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); } @@ -944,6 +944,7 @@ static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb) PDBG("%s ep %p tid %u snd_isn %u rcv_isn %u\n", __func__, ep, tid, be32_to_cpu(req->snd_isn), be32_to_cpu(req->rcv_isn)); + mutex_lock(&ep->com.mutex); dst_confirm(ep->dst); /* setup the hwtid for this connection */ @@ -967,7 +968,7 @@ static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb) send_mpa_req(ep, skb, 1); else send_mpa_req(ep, skb, mpa_rev); - + mutex_unlock(&ep->com.mutex); return 0; } @@ -2511,22 +2512,28 @@ static int fw4_ack(struct c4iw_dev *dev, struct sk_buff *skb) int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len) { - int err; + int err = 0; + int disconnect = 0; struct c4iw_ep *ep = to_ep(cm_id); PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); - if (state_read(&ep->com) == DEAD) { + mutex_lock(&ep->com.mutex); + if (ep->com.state == DEAD) { + mutex_unlock(&ep->com.mutex); c4iw_put_ep(&ep->com); return -ECONNRESET; } set_bit(ULP_REJECT, &ep->com.history); - BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD); + BUG_ON(ep->com.state != MPA_REQ_RCVD); if (mpa_rev == 0) abort_connection(ep, NULL, GFP_KERNEL); else { err = send_mpa_reject(ep, pdata, pdata_len); - err = c4iw_ep_disconnect(ep, 0, GFP_KERNEL); + disconnect = 1; } + mutex_unlock(&ep->com.mutex); + if (disconnect) + err = c4iw_ep_disconnect(ep, 0, GFP_KERNEL); c4iw_put_ep(&ep->com); return 0; } @@ -2541,12 +2548,14 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) struct c4iw_qp *qp = get_qhp(h, conn_param->qpn); PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); - if (state_read(&ep->com) == DEAD) { + + mutex_lock(&ep->com.mutex); + if (ep->com.state == DEAD) { err = -ECONNRESET; goto err; } - BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD); + BUG_ON(ep->com.state != MPA_REQ_RCVD); BUG_ON(!qp); set_bit(ULP_ACCEPT, &ep->com.history); @@ -2615,14 +2624,16 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) if (err) goto err1; - state_set(&ep->com, FPDU_MODE); + __state_set(&ep->com, FPDU_MODE); established_upcall(ep); + mutex_unlock(&ep->com.mutex); c4iw_put_ep(&ep->com); return 0; err1: ep->com.cm_id = NULL; cm_id->rem_ref(cm_id); err: + mutex_unlock(&ep->com.mutex); c4iw_put_ep(&ep->com); return err; } -- cgit v1.1 From 977116c69862a6062f302395cb3546544d7e1bc1 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Fri, 21 Mar 2014 20:40:36 +0530 Subject: RDMA/cxgb4: Drop RX_DATA packets if the endpoint is gone Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/cm.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 6836d11..8a645d8 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -1521,6 +1521,8 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb) __u8 status = hdr->status; ep = lookup_tid(t, tid); + if (!ep) + return 0; PDBG("%s ep %p tid %u dlen %u\n", __func__, ep, ep->hwtid, dlen); skb_pull(skb, sizeof(*hdr)); skb_trim(skb, dlen); -- cgit v1.1 From c529fb50463992982c246155e095577aa0485f57 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Fri, 21 Mar 2014 20:40:37 +0530 Subject: RDMA/cxgb4: rx_data() needs to hold the ep mutex To avoid racing with other threads doing close/flush/whatever, rx_data() should hold the endpoint mutex. Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/cm.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 8a645d8..26046c2 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -1170,7 +1170,7 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) * the connection. */ stop_ep_timer(ep); - if (state_read(&ep->com) != MPA_REQ_SENT) + if (ep->com.state != MPA_REQ_SENT) return; /* @@ -1245,7 +1245,7 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) * start reply message including private data. And * the MPA header is valid. */ - state_set(&ep->com, FPDU_MODE); + __state_set(&ep->com, FPDU_MODE); ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0; ep->mpa_attr.recv_marker_enabled = markers_enabled; ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0; @@ -1360,7 +1360,7 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) } goto out; err: - state_set(&ep->com, ABORTING); + __state_set(&ep->com, ABORTING); send_abort(ep, skb, GFP_KERNEL); out: connect_reply_upcall(ep, err); @@ -1375,7 +1375,7 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); - if (state_read(&ep->com) != MPA_REQ_WAIT) + if (ep->com.state != MPA_REQ_WAIT) return; /* @@ -1496,7 +1496,7 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version, ep->mpa_attr.p2p_type); - state_set(&ep->com, MPA_REQ_RCVD); + __state_set(&ep->com, MPA_REQ_RCVD); stop_ep_timer(ep); /* drive upcall */ @@ -1526,11 +1526,12 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb) PDBG("%s ep %p tid %u dlen %u\n", __func__, ep, ep->hwtid, dlen); skb_pull(skb, sizeof(*hdr)); skb_trim(skb, dlen); + mutex_lock(&ep->com.mutex); /* update RX credits */ update_rx_credits(ep, dlen); - switch (state_read(&ep->com)) { + switch (ep->com.state) { case MPA_REQ_SENT: ep->rcv_seq += dlen; process_mpa_reply(ep, skb); @@ -1546,7 +1547,7 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb) pr_err("%s Unexpected streaming data." \ " qpid %u ep %p state %d tid %u status %d\n", __func__, ep->com.qp->wq.sq.qid, ep, - state_read(&ep->com), ep->hwtid, status); + ep->com.state, ep->hwtid, status); attrs.next_state = C4IW_QP_STATE_TERMINATE; c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 0); @@ -1555,6 +1556,7 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb) default: break; } + mutex_unlock(&ep->com.mutex); return 0; } -- cgit v1.1 From 96bb2706c883a21d7da6a261d7e60d3bab4cf6bd Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Thu, 27 Mar 2014 12:03:47 -0500 Subject: RDMA/cxgb4: Disable DSGL use by default Current hardware doesn't correctly support DSGL. Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/mem.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index 22a2e3e..2630838 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -37,9 +37,9 @@ #include "iw_cxgb4.h" -int use_dsgl = 1; +int use_dsgl = 0; module_param(use_dsgl, int, 0644); -MODULE_PARM_DESC(use_dsgl, "Use DSGL for PBL/FastReg (default=1)"); +MODULE_PARM_DESC(use_dsgl, "Use DSGL for PBL/FastReg (default=0)"); #define T4_ULPTX_MIN_IO 32 #define C4IW_MAX_INLINE_SIZE 96 -- cgit v1.1 From ea61762679cd4d409dcaa6f502f190f4c8156d09 Mon Sep 17 00:00:00 2001 From: Devesh Sharma Date: Tue, 4 Feb 2014 11:56:54 +0530 Subject: RDMA/ocrdma: EQ full catastrophe avoidance Stale entries in the CQ being destroyed causes hardware to generate EQEs indefinitely for a given CQ. Thus causing uncontrolled execution of irq_handler. This patch fixes this using following sementics: * irq_handler will ring EQ doorbell atleast once and implement budgeting scheme. * cq_destroy will count number of valid entires during destroy and ring cq-db so that hardware does not generate uncontrolled EQE. * cq_destroy will synchronize with last running irq_handler instance. * arm_cq will always defer arming CQ till poll_cq, except for the first arm_cq call. * poll_cq will always ring cq-db with arm=SET if arm_cq was called prior to enter poll_cq. * poll_cq will always ring cq-db with arm=UNSET if arm_cq was not called prior to enter poll_cq. Signed-off-by: Devesh Sharma Signed-off-by: Selvin Xavier Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/ocrdma.h | 18 ++++++- drivers/infiniband/hw/ocrdma/ocrdma_hw.c | 53 +++++++++---------- drivers/infiniband/hw/ocrdma/ocrdma_hw.h | 1 + drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 81 ++++++++++++++++++++++------- 4 files changed, 103 insertions(+), 50 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h index 7c001b9..61f508e7 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma.h @@ -209,8 +209,8 @@ struct ocrdma_cq { */ u32 max_hw_cqe; bool phase_change; - bool armed, solicited; - bool arm_needed; + bool deferred_arm, deferred_sol; + bool first_arm; spinlock_t cq_lock ____cacheline_aligned; /* provide synchronization * to cq polling @@ -223,6 +223,7 @@ struct ocrdma_cq { struct ocrdma_ucontext *ucontext; dma_addr_t pa; u32 len; + u32 cqe_cnt; /* head of all qp's sq and rq for which cqes need to be flushed * by the software. @@ -436,4 +437,17 @@ static inline int ocrdma_resolve_dmac(struct ocrdma_dev *dev, return 0; } +static inline int ocrdma_get_eq_table_index(struct ocrdma_dev *dev, + int eqid) +{ + int indx; + + for (indx = 0; indx < dev->eq_cnt; indx++) { + if (dev->eq_tbl[indx].q.id == eqid) + return indx; + } + + return -EINVAL; +} + #endif diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index ac3fbf2..e3c75e0 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -444,7 +444,7 @@ mbx_err: return status; } -static int ocrdma_get_irq(struct ocrdma_dev *dev, struct ocrdma_eq *eq) +int ocrdma_get_irq(struct ocrdma_dev *dev, struct ocrdma_eq *eq) { int irq; @@ -574,6 +574,7 @@ static int ocrdma_create_mq(struct ocrdma_dev *dev) if (status) goto alloc_err; + dev->eq_tbl[0].cq_cnt++; status = ocrdma_mbx_mq_cq_create(dev, &dev->mq.cq, &dev->eq_tbl[0].q); if (status) goto mbx_cq_free; @@ -858,16 +859,8 @@ static void ocrdma_qp_cq_handler(struct ocrdma_dev *dev, u16 cq_idx) BUG(); cq = dev->cq_tbl[cq_idx]; - if (cq == NULL) { - pr_err("%s%d invalid id=0x%x\n", __func__, dev->id, cq_idx); + if (cq == NULL) return; - } - spin_lock_irqsave(&cq->cq_lock, flags); - cq->armed = false; - cq->solicited = false; - spin_unlock_irqrestore(&cq->cq_lock, flags); - - ocrdma_ring_cq_db(dev, cq->id, false, false, 0); if (cq->ibcq.comp_handler) { spin_lock_irqsave(&cq->comp_handler_lock, flags); @@ -892,27 +885,35 @@ static irqreturn_t ocrdma_irq_handler(int irq, void *handle) struct ocrdma_dev *dev = eq->dev; struct ocrdma_eqe eqe; struct ocrdma_eqe *ptr; - u16 eqe_popped = 0; u16 cq_id; - while (1) { + int budget = eq->cq_cnt; + + do { ptr = ocrdma_get_eqe(eq); eqe = *ptr; ocrdma_le32_to_cpu(&eqe, sizeof(eqe)); if ((eqe.id_valid & OCRDMA_EQE_VALID_MASK) == 0) break; - eqe_popped += 1; + ptr->id_valid = 0; + /* ring eq doorbell as soon as its consumed. */ + ocrdma_ring_eq_db(dev, eq->q.id, false, true, 1); /* check whether its CQE or not. */ if ((eqe.id_valid & OCRDMA_EQE_FOR_CQE_MASK) == 0) { cq_id = eqe.id_valid >> OCRDMA_EQE_RESOURCE_ID_SHIFT; ocrdma_cq_handler(dev, cq_id); } ocrdma_eq_inc_tail(eq); - } - ocrdma_ring_eq_db(dev, eq->q.id, true, true, eqe_popped); - /* Ring EQ doorbell with num_popped to 0 to enable interrupts again. */ - if (dev->nic_info.intr_mode == BE_INTERRUPT_MODE_INTX) - ocrdma_ring_eq_db(dev, eq->q.id, true, true, 0); + + /* There can be a stale EQE after the last bound CQ is + * destroyed. EQE valid and budget == 0 implies this. + */ + if (budget) + budget--; + + } while (budget); + + ocrdma_ring_eq_db(dev, eq->q.id, true, true, 0); return IRQ_HANDLED; } @@ -1357,12 +1358,10 @@ static void ocrdma_unbind_eq(struct ocrdma_dev *dev, u16 eq_id) int i; mutex_lock(&dev->dev_lock); - for (i = 0; i < dev->eq_cnt; i++) { - if (dev->eq_tbl[i].q.id != eq_id) - continue; - dev->eq_tbl[i].cq_cnt -= 1; - break; - } + i = ocrdma_get_eq_table_index(dev, eq_id); + if (i == -EINVAL) + BUG(); + dev->eq_tbl[i].cq_cnt -= 1; mutex_unlock(&dev->dev_lock); } @@ -1417,6 +1416,7 @@ int ocrdma_mbx_create_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq, cq->eqn = ocrdma_bind_eq(dev); cmd->cmd.req.rsvd_version = OCRDMA_CREATE_CQ_VER3; cqe_count = cq->len / cqe_size; + cq->cqe_cnt = cqe_count; if (cqe_count > 1024) { /* Set cnt to 3 to indicate more than 1024 cq entries */ cmd->cmd.ev_cnt_flags |= (0x3 << OCRDMA_CREATE_CQ_CNT_SHIFT); @@ -1484,12 +1484,9 @@ int ocrdma_mbx_destroy_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq) (cq->id << OCRDMA_DESTROY_CQ_QID_SHIFT) & OCRDMA_DESTROY_CQ_QID_MASK; - ocrdma_unbind_eq(dev, cq->eqn); status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd); - if (status) - goto mbx_err; + ocrdma_unbind_eq(dev, cq->eqn); dma_free_coherent(&dev->nic_info.pdev->dev, cq->len, cq->va, cq->pa); -mbx_err: kfree(cmd); return status; } diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.h b/drivers/infiniband/hw/ocrdma/ocrdma_hw.h index db3d55f..77da536 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.h @@ -131,5 +131,6 @@ int ocrdma_qp_state_change(struct ocrdma_qp *, enum ib_qp_state new_state, bool ocrdma_is_qp_in_sq_flushlist(struct ocrdma_cq *, struct ocrdma_qp *); bool ocrdma_is_qp_in_rq_flushlist(struct ocrdma_cq *, struct ocrdma_qp *); void ocrdma_flush_qp(struct ocrdma_qp *); +int ocrdma_get_irq(struct ocrdma_dev *dev, struct ocrdma_eq *eq); #endif /* __OCRDMA_HW_H__ */ diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index f1108eb..7d59ed3 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -910,6 +910,7 @@ struct ib_cq *ocrdma_create_cq(struct ib_device *ibdev, int entries, int vector, spin_lock_init(&cq->comp_handler_lock); INIT_LIST_HEAD(&cq->sq_head); INIT_LIST_HEAD(&cq->rq_head); + cq->first_arm = true; if (ib_ctx) { uctx = get_ocrdma_ucontext(ib_ctx); @@ -927,9 +928,7 @@ struct ib_cq *ocrdma_create_cq(struct ib_device *ibdev, int entries, int vector, goto ctx_err; } cq->phase = OCRDMA_CQE_VALID; - cq->arm_needed = true; dev->cq_tbl[cq->id] = cq; - return &cq->ibcq; ctx_err: @@ -952,15 +951,52 @@ int ocrdma_resize_cq(struct ib_cq *ibcq, int new_cnt, return status; } +static void ocrdma_flush_cq(struct ocrdma_cq *cq) +{ + int cqe_cnt; + int valid_count = 0; + unsigned long flags; + + struct ocrdma_dev *dev = get_ocrdma_dev(cq->ibcq.device); + struct ocrdma_cqe *cqe = NULL; + + cqe = cq->va; + cqe_cnt = cq->cqe_cnt; + + /* Last irq might have scheduled a polling thread + * sync-up with it before hard flushing. + */ + spin_lock_irqsave(&cq->cq_lock, flags); + while (cqe_cnt) { + if (is_cqe_valid(cq, cqe)) + valid_count++; + cqe++; + cqe_cnt--; + } + ocrdma_ring_cq_db(dev, cq->id, false, false, valid_count); + spin_unlock_irqrestore(&cq->cq_lock, flags); +} + int ocrdma_destroy_cq(struct ib_cq *ibcq) { int status; struct ocrdma_cq *cq = get_ocrdma_cq(ibcq); + struct ocrdma_eq *eq = NULL; struct ocrdma_dev *dev = get_ocrdma_dev(ibcq->device); int pdid = 0; + u32 irq, indx; - status = ocrdma_mbx_destroy_cq(dev, cq); + dev->cq_tbl[cq->id] = NULL; + indx = ocrdma_get_eq_table_index(dev, cq->eqn); + if (indx == -EINVAL) + BUG(); + eq = &dev->eq_tbl[indx]; + irq = ocrdma_get_irq(dev, eq); + synchronize_irq(irq); + ocrdma_flush_cq(cq); + + status = ocrdma_mbx_destroy_cq(dev, cq); if (cq->ucontext) { pdid = cq->ucontext->cntxt_pd->id; ocrdma_del_mmap(cq->ucontext, (u64) cq->pa, @@ -969,7 +1005,6 @@ int ocrdma_destroy_cq(struct ib_cq *ibcq) ocrdma_get_db_addr(dev, pdid), dev->nic_info.db_page_size); } - dev->cq_tbl[cq->id] = NULL; kfree(cq); return status; @@ -2705,10 +2740,18 @@ expand_cqe: } stop_cqe: cq->getp = cur_getp; - if (polled_hw_cqes || expand || stop) { - ocrdma_ring_cq_db(dev, cq->id, cq->armed, cq->solicited, + if (cq->deferred_arm) { + ocrdma_ring_cq_db(dev, cq->id, true, cq->deferred_sol, + polled_hw_cqes); + cq->deferred_arm = false; + cq->deferred_sol = false; + } else { + /* We need to pop the CQE. No need to arm */ + ocrdma_ring_cq_db(dev, cq->id, false, cq->deferred_sol, polled_hw_cqes); + cq->deferred_sol = false; } + return i; } @@ -2780,30 +2823,28 @@ int ocrdma_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags cq_flags) struct ocrdma_cq *cq = get_ocrdma_cq(ibcq); struct ocrdma_dev *dev = get_ocrdma_dev(ibcq->device); u16 cq_id; - u16 cur_getp; - struct ocrdma_cqe *cqe; unsigned long flags; + bool arm_needed = false, sol_needed = false; cq_id = cq->id; spin_lock_irqsave(&cq->cq_lock, flags); if (cq_flags & IB_CQ_NEXT_COMP || cq_flags & IB_CQ_SOLICITED) - cq->armed = true; + arm_needed = true; if (cq_flags & IB_CQ_SOLICITED) - cq->solicited = true; - - cur_getp = cq->getp; - cqe = cq->va + cur_getp; + sol_needed = true; - /* check whether any valid cqe exist or not, if not then safe to - * arm. If cqe is not yet consumed, then let it get consumed and then - * we arm it to avoid false interrupts. - */ - if (!is_cqe_valid(cq, cqe) || cq->arm_needed) { - cq->arm_needed = false; - ocrdma_ring_cq_db(dev, cq_id, cq->armed, cq->solicited, 0); + if (cq->first_arm) { + ocrdma_ring_cq_db(dev, cq_id, arm_needed, sol_needed, 0); + cq->first_arm = false; + goto skip_defer; } + cq->deferred_arm = true; + +skip_defer: + cq->deferred_sol = sol_needed; spin_unlock_irqrestore(&cq->cq_lock, flags); + return 0; } -- cgit v1.1 From 2df84fa87f4d00299031b1335748c54176edfe87 Mon Sep 17 00:00:00 2001 From: Devesh Sharma Date: Tue, 4 Feb 2014 11:56:55 +0530 Subject: RDMA/ocrdma: SQ and RQ doorbell offset clean up Introducing new macros to define SQ and RQ doorbell offset. Signed-off-by: Devesh Sharma Signed-off-by: Selvin Xavier Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/ocrdma.h | 7 ------- drivers/infiniband/hw/ocrdma/ocrdma_sli.h | 5 ++++- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 23 +++++++---------------- 3 files changed, 11 insertions(+), 24 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h index 61f508e7..1b51e67 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma.h @@ -386,13 +386,6 @@ static inline struct ocrdma_srq *get_ocrdma_srq(struct ib_srq *ibsrq) return container_of(ibsrq, struct ocrdma_srq, ibsrq); } - -static inline int ocrdma_get_num_posted_shift(struct ocrdma_qp *qp) -{ - return ((qp->dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY && - qp->id < 128) ? 24 : 16); -} - static inline int is_cqe_valid(struct ocrdma_cq *cq, struct ocrdma_cqe *cqe) { int cqe_valid; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h index 60d5ac2..e71685a 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h @@ -103,7 +103,10 @@ enum { OCRDMA_DB_GEN2_SRQ_OFFSET = OCRDMA_DB_GEN2_RQ_OFFSET, OCRDMA_DB_CQ_OFFSET = 0x120, OCRDMA_DB_EQ_OFFSET = OCRDMA_DB_CQ_OFFSET, - OCRDMA_DB_MQ_OFFSET = 0x140 + OCRDMA_DB_MQ_OFFSET = 0x140, + + OCRDMA_DB_SQ_SHIFT = 16, + OCRDMA_DB_RQ_SHIFT = 24 }; #define OCRDMA_DB_CQ_RING_ID_MASK 0x3FF /* bits 0 - 9 */ diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 7d59ed3..be75d2b 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -1127,15 +1127,9 @@ static int ocrdma_copy_qp_uresp(struct ocrdma_qp *qp, } uresp.db_page_addr = usr_db; uresp.db_page_size = dev->nic_info.db_page_size; - if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) { - uresp.db_sq_offset = OCRDMA_DB_GEN2_SQ_OFFSET; - uresp.db_rq_offset = OCRDMA_DB_GEN2_RQ_OFFSET; - uresp.db_shift = 24; - } else { - uresp.db_sq_offset = OCRDMA_DB_SQ_OFFSET; - uresp.db_rq_offset = OCRDMA_DB_RQ_OFFSET; - uresp.db_shift = 16; - } + uresp.db_sq_offset = OCRDMA_DB_GEN2_SQ_OFFSET; + uresp.db_rq_offset = OCRDMA_DB_GEN2_RQ_OFFSET; + uresp.db_shift = OCRDMA_DB_RQ_SHIFT; if (qp->dpp_enabled) { uresp.dpp_credit = dpp_credit_lmt; @@ -1308,7 +1302,7 @@ static void ocrdma_flush_rq_db(struct ocrdma_qp *qp) { if (qp->db_cache) { u32 val = qp->rq.dbid | (qp->db_cache << - ocrdma_get_num_posted_shift(qp)); + OCRDMA_DB_RQ_SHIFT); iowrite32(val, qp->rq_db); qp->db_cache = 0; } @@ -2052,7 +2046,7 @@ static int ocrdma_build_fr(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr, static void ocrdma_ring_sq_db(struct ocrdma_qp *qp) { - u32 val = qp->sq.dbid | (1 << 16); + u32 val = qp->sq.dbid | (1 << OCRDMA_DB_SQ_SHIFT); iowrite32(val, qp->sq_db); } @@ -2157,12 +2151,9 @@ int ocrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, static void ocrdma_ring_rq_db(struct ocrdma_qp *qp) { - u32 val = qp->rq.dbid | (1 << ocrdma_get_num_posted_shift(qp)); + u32 val = qp->rq.dbid | (1 << OCRDMA_DB_RQ_SHIFT); - if (qp->state != OCRDMA_QPS_INIT) - iowrite32(val, qp->rq_db); - else - qp->db_cache++; + iowrite32(val, qp->rq_db); } static void ocrdma_build_rqe(struct ocrdma_hdr_wqe *rqe, struct ib_recv_wr *wr, -- cgit v1.1 From 21c3391a9adfaddd00481a1d03bf30fc1304e292 Mon Sep 17 00:00:00 2001 From: Devesh Sharma Date: Tue, 4 Feb 2014 11:56:56 +0530 Subject: RDMA/ocrdma: Read ASIC_ID register to select asic_gen ocrdma driver selects execution path based on sli_family and asic generation number. This introduces code to read the asic gen number from pci register instead of obtaining it from the Emulex NIC driver. Signed-off-by: Devesh Sharma Signed-off-by: Selvin Xavier Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/ocrdma.h | 13 +++++++++++++ drivers/infiniband/hw/ocrdma/ocrdma_hw.c | 6 +++--- drivers/infiniband/hw/ocrdma/ocrdma_main.c | 2 +- drivers/infiniband/hw/ocrdma/ocrdma_sli.h | 17 +++++++++++++++-- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 6 +++--- 5 files changed, 35 insertions(+), 9 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h index 1b51e67..24fe248 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma.h @@ -197,6 +197,7 @@ struct ocrdma_dev { int id; struct ocrdma_mr *stag_arr[OCRDMA_MAX_STAG]; u16 pvid; + u32 asic_id; }; struct ocrdma_cq { @@ -443,4 +444,16 @@ static inline int ocrdma_get_eq_table_index(struct ocrdma_dev *dev, return -EINVAL; } +static inline u8 ocrdma_get_asic_type(struct ocrdma_dev *dev) +{ + if (dev->nic_info.dev_family == 0xF && !dev->asic_id) { + pci_read_config_dword( + dev->nic_info.pdev, + OCRDMA_SLI_ASIC_ID_OFFSET, &dev->asic_id); + } + + return (dev->asic_id & OCRDMA_SLI_ASIC_GEN_NUM_MASK) >> + OCRDMA_SLI_ASIC_GEN_NUM_SHIFT; +} + #endif diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index e3c75e0..ec310d2 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -1037,7 +1037,7 @@ static void ocrdma_get_attr(struct ocrdma_dev *dev, attr->max_inline_data = attr->wqe_size - (sizeof(struct ocrdma_hdr_wqe) + sizeof(struct ocrdma_sge)); - if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) { + if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) { attr->ird = 1; attr->ird_page_size = OCRDMA_MIN_Q_PAGE_SIZE; attr->num_ird_pages = MAX_OCRDMA_IRD_PAGES; @@ -1379,7 +1379,7 @@ int ocrdma_mbx_create_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq, __func__, dev->id, dev->attr.max_cqe, entries); return -EINVAL; } - if (dpp_cq && (dev->nic_info.dev_family != OCRDMA_GEN2_FAMILY)) + if (dpp_cq && (ocrdma_get_asic_type(dev) != OCRDMA_ASIC_GEN_SKH_R)) return -EINVAL; if (dpp_cq) { @@ -1439,7 +1439,7 @@ int ocrdma_mbx_create_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq, } /* shared eq between all the consumer cqs. */ cmd->cmd.eqn = cq->eqn; - if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) { + if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) { if (dpp_cq) cmd->cmd.pgsz_pgcnt |= OCRDMA_CREATE_CQ_DPP << OCRDMA_CREATE_CQ_TYPE_SHIFT; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index 1a8a945..b9e1478 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -286,7 +286,7 @@ static int ocrdma_register_device(struct ocrdma_dev *dev) dev->ibdev.process_mad = ocrdma_process_mad; - if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) { + if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) { dev->ibdev.uverbs_cmd_mask |= OCRDMA_UVERBS(CREATE_SRQ) | OCRDMA_UVERBS(MODIFY_SRQ) | diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h index e71685a..de4ebfc 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h @@ -30,8 +30,16 @@ #define Bit(_b) (1 << (_b)) -#define OCRDMA_GEN1_FAMILY 0xB -#define OCRDMA_GEN2_FAMILY 0x0F +enum { + OCRDMA_ASIC_GEN_SKH_R = 0x04, + OCRDMA_ASIC_GEN_LANCER = 0x0B +}; + +enum { + OCRDMA_ASIC_REV_A0 = 0x00, + OCRDMA_ASIC_REV_B0 = 0x10, + OCRDMA_ASIC_REV_C0 = 0x20 +}; #define OCRDMA_SUBSYS_ROCE 10 enum { @@ -141,6 +149,11 @@ enum { #define OCRDMA_MIN_Q_PAGE_SIZE (4096) #define OCRDMA_MAX_Q_PAGES (8) +#define OCRDMA_SLI_ASIC_ID_OFFSET 0x9C +#define OCRDMA_SLI_ASIC_REV_MASK 0x000000FF +#define OCRDMA_SLI_ASIC_GEN_NUM_MASK 0x0000FF00 +#define OCRDMA_SLI_ASIC_GEN_NUM_SHIFT 0x08 + /* # 0: 4K Bytes # 1: 8K Bytes diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index be75d2b..c5b4058 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -267,7 +267,7 @@ static struct ocrdma_pd *_ocrdma_alloc_pd(struct ocrdma_dev *dev, if (udata && uctx) { pd->dpp_enabled = - dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY; + ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R; pd->num_dpp_qp = pd->dpp_enabled ? OCRDMA_PD_MAX_DPP_ENABLED_QP : 0; } @@ -1161,7 +1161,7 @@ err: static void ocrdma_set_qp_db(struct ocrdma_dev *dev, struct ocrdma_qp *qp, struct ocrdma_pd *pd) { - if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) { + if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) { qp->sq_db = dev->nic_info.db + (pd->id * dev->nic_info.db_page_size) + OCRDMA_DB_GEN2_SQ_OFFSET; @@ -1688,7 +1688,7 @@ static int ocrdma_copy_srq_uresp(struct ocrdma_dev *dev, struct ocrdma_srq *srq, (srq->pd->id * dev->nic_info.db_page_size); uresp.db_page_size = dev->nic_info.db_page_size; uresp.num_rqe_allocated = srq->rq.max_cnt; - if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) { + if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) { uresp.db_rq_offset = OCRDMA_DB_GEN2_RQ_OFFSET; uresp.db_shift = 24; } else { -- cgit v1.1 From 1eebbb6ec3eb760f77240af7e850e93222e89afe Mon Sep 17 00:00:00 2001 From: Devesh Sharma Date: Tue, 4 Feb 2014 11:56:57 +0530 Subject: RDMA/ocrdma: Allow DPP QP creation Allow creating DPP QP even if inline-data is not requested. This is an optimization to lower latency. Signed-off-by: Devesh Sharma Signed-off-by: Selvin Xavier Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/ocrdma_hw.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index ec310d2..6eedd2d 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -2026,8 +2026,7 @@ int ocrdma_mbx_create_qp(struct ocrdma_qp *qp, struct ib_qp_init_attr *attrs, OCRDMA_CREATE_QP_REQ_RQ_CQID_MASK; qp->rq_cq = cq; - if (pd->dpp_enabled && attrs->cap.max_inline_data && pd->num_dpp_qp && - (attrs->cap.max_inline_data <= dev->attr.max_inline_data)) { + if (pd->dpp_enabled && pd->num_dpp_qp) { ocrdma_set_create_qp_dpp_cmd(cmd, pd, qp, enable_dpp_cq, dpp_cq_id); } -- cgit v1.1 From b6b87d2e6930b6dbb926998d3250c61cef1ec562 Mon Sep 17 00:00:00 2001 From: Devesh Sharma Date: Tue, 4 Feb 2014 11:56:58 +0530 Subject: RDMA/ocrdma: ABI versioning between ocrdma and be2net While loading RoCE driver be2net driver should check for ABI version to catch functional incompatibilities. Signed-off-by: Devesh Sharma Signed-off-by: Selvin Xavier Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/ocrdma_abi.h | 1 + drivers/infiniband/hw/ocrdma/ocrdma_main.c | 1 + 2 files changed, 2 insertions(+) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_abi.h b/drivers/infiniband/hw/ocrdma/ocrdma_abi.h index fbac8eb..2a14d4a 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_abi.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_abi.h @@ -29,6 +29,7 @@ #define __OCRDMA_ABI_H__ #define OCRDMA_ABI_VERSION 1 +#define OCRDMA_BE_ROCE_ABI_VERSION 1 /* user kernel communication data structures. */ struct ocrdma_alloc_ucontext_resp { diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index b9e1478..fc9d71e 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -478,6 +478,7 @@ static struct ocrdma_driver ocrdma_drv = { .add = ocrdma_add, .remove = ocrdma_remove, .state_change_handler = ocrdma_event_handler, + .be_abi_version = OCRDMA_BE_ROCE_ABI_VERSION, }; static void ocrdma_unregister_inet6addr_notifier(void) -- cgit v1.1 From 0154410bd456affe21f31ab27278847461316e0b Mon Sep 17 00:00:00 2001 From: Devesh Sharma Date: Tue, 4 Feb 2014 11:57:00 +0530 Subject: RDMA/ocrdma: Update version string Update the driver vrsion string and node description string Signed-off-by: Devesh Sharma Signed-off-by: Selvin Xavier Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/ocrdma.h | 4 +++- drivers/infiniband/hw/ocrdma/ocrdma_main.c | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h index 24fe248..00b3a29 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma.h @@ -39,7 +39,9 @@ #include #include "ocrdma_sli.h" -#define OCRDMA_ROCE_DEV_VERSION "1.0.0" +#define OCRDMA_ROCE_DRV_VERSION "10.2.145.0u" + +#define OCRDMA_ROCE_DRV_DESC "Emulex OneConnect RoCE Driver" #define OCRDMA_NODE_DESC "Emulex OneConnect RoCE HCA" #define OCRDMA_MAX_AH 512 diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index fc9d71e..ae17a36 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -41,8 +41,8 @@ #include "ocrdma_hw.h" #include "ocrdma_abi.h" -MODULE_VERSION(OCRDMA_ROCE_DEV_VERSION); -MODULE_DESCRIPTION("Emulex RoCE HCA Driver"); +MODULE_VERSION(OCRDMA_ROCE_DRV_VERSION); +MODULE_DESCRIPTION(OCRDMA_ROCE_DRV_DESC " " OCRDMA_ROCE_DRV_VERSION); MODULE_AUTHOR("Emulex Corporation"); MODULE_LICENSE("GPL"); -- cgit v1.1 From 2e6e9f2bb894e902b7fbd3f6865719eaa8306b6c Mon Sep 17 00:00:00 2001 From: Devesh Sharma Date: Tue, 4 Feb 2014 11:57:01 +0530 Subject: RDMA/ocrdma: Increment abi version count Increment the ABI version count for driver/library interface. Signed-off-by: Devesh Sharma Signed-off-by: Selvin Xavier Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/ocrdma_abi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_abi.h b/drivers/infiniband/hw/ocrdma/ocrdma_abi.h index 2a14d4a..5a82ce5 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_abi.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_abi.h @@ -28,7 +28,7 @@ #ifndef __OCRDMA_ABI_H__ #define __OCRDMA_ABI_H__ -#define OCRDMA_ABI_VERSION 1 +#define OCRDMA_ABI_VERSION 2 #define OCRDMA_BE_ROCE_ABI_VERSION 1 /* user kernel communication data structures. */ -- cgit v1.1 From 9d1878a369b23f48a5ca5bcbd89abb0e569c58cc Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Tue, 4 Feb 2014 11:57:02 +0530 Subject: RDMA/ocrdma: Memory leak fix in ocrdma_dereg_mr() Signed-off-by: Selvin Xavier Signed-off-by: Devesh Sharma Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index c5b4058..8cc00d2 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -840,8 +840,7 @@ int ocrdma_dereg_mr(struct ib_mr *ib_mr) status = ocrdma_mbx_dealloc_lkey(dev, mr->hwmr.fr_mr, mr->hwmr.lkey); - if (mr->hwmr.fr_mr == 0) - ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr); + ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr); /* it could be user registered memory. */ if (mr->umem) -- cgit v1.1 From cf5788ade718a2cc654170ff11c7d6f6f1ecbdcc Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Tue, 4 Feb 2014 11:57:03 +0530 Subject: RDMA/ocrdma: Use non-zero tag in SRQ posting As part of SRQ receive buffers posting we populate a non-zero tag which will be returned in SRQ receive completions. Signed-off-by: Selvin Xavier Signed-off-by: Devesh Sharma Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 8cc00d2..2b56c42 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -1538,7 +1538,7 @@ static void ocrdma_discard_cqes(struct ocrdma_qp *qp, struct ocrdma_cq *cq) int discard_cnt = 0; u32 cur_getp, stop_getp; struct ocrdma_cqe *cqe; - u32 qpn = 0; + u32 qpn = 0, wqe_idx = 0; spin_lock_irqsave(&cq->cq_lock, cq_flags); @@ -1567,24 +1567,29 @@ static void ocrdma_discard_cqes(struct ocrdma_qp *qp, struct ocrdma_cq *cq) if (qpn == 0 || qpn != qp->id) goto skip_cqe; - /* mark cqe discarded so that it is not picked up later - * in the poll_cq(). - */ - discard_cnt += 1; - cqe->cmn.qpn = 0; if (is_cqe_for_sq(cqe)) { ocrdma_hwq_inc_tail(&qp->sq); } else { if (qp->srq) { + wqe_idx = (le32_to_cpu(cqe->rq.buftag_qpn) >> + OCRDMA_CQE_BUFTAG_SHIFT) & + qp->srq->rq.max_wqe_idx; + if (wqe_idx < 1) + BUG(); spin_lock_irqsave(&qp->srq->q_lock, flags); ocrdma_hwq_inc_tail(&qp->srq->rq); - ocrdma_srq_toggle_bit(qp->srq, cur_getp); + ocrdma_srq_toggle_bit(qp->srq, wqe_idx - 1); spin_unlock_irqrestore(&qp->srq->q_lock, flags); } else { ocrdma_hwq_inc_tail(&qp->rq); } } + /* mark cqe discarded so that it is not picked up later + * in the poll_cq(). + */ + discard_cnt += 1; + cqe->cmn.qpn = 0; skip_cqe: cur_getp = (cur_getp + 1) % cq->max_hw_cqe; } while (cur_getp != stop_getp); @@ -2238,7 +2243,7 @@ static int ocrdma_srq_get_idx(struct ocrdma_srq *srq) if (row == srq->bit_fields_len) BUG(); - return indx; + return indx + 1; /* Use from index 1 */ } static void ocrdma_ring_srq_db(struct ocrdma_srq *srq) @@ -2575,10 +2580,13 @@ static void ocrdma_update_free_srq_cqe(struct ib_wc *ibwc, srq = get_ocrdma_srq(qp->ibqp.srq); wqe_idx = (le32_to_cpu(cqe->rq.buftag_qpn) >> - OCRDMA_CQE_BUFTAG_SHIFT) & srq->rq.max_wqe_idx; + OCRDMA_CQE_BUFTAG_SHIFT) & srq->rq.max_wqe_idx; + if (wqe_idx < 1) + BUG(); + ibwc->wr_id = srq->rqe_wr_id_tbl[wqe_idx]; spin_lock_irqsave(&srq->q_lock, flags); - ocrdma_srq_toggle_bit(srq, wqe_idx); + ocrdma_srq_toggle_bit(srq, wqe_idx - 1); spin_unlock_irqrestore(&srq->q_lock, flags); ocrdma_hwq_inc_tail(&srq->rq); } -- cgit v1.1 From ac578aef8b9f6095016a12390f0ff638a3a54988 Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Tue, 4 Feb 2014 11:57:04 +0530 Subject: RDMA/ocrdma: Display proper value for max_mw Signed-off-by: Selvin Xavier Signed-off-by: Devesh Sharma Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/ocrdma.h | 1 + drivers/infiniband/hw/ocrdma/ocrdma_hw.c | 1 + drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 2 +- 3 files changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h index 00b3a29..4ea8496 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma.h @@ -67,6 +67,7 @@ struct ocrdma_dev_attr { int max_mr; u64 max_mr_size; u32 max_num_mr_pbl; + int max_mw; int max_fmr; int max_map_per_fmr; int max_pages_per_frmr; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index 6eedd2d..f1ec9d9 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -1016,6 +1016,7 @@ static void ocrdma_get_attr(struct ocrdma_dev *dev, attr->local_ca_ack_delay = (rsp->max_pd_ca_ack_delay & OCRDMA_MBX_QUERY_CFG_CA_ACK_DELAY_MASK) >> OCRDMA_MBX_QUERY_CFG_CA_ACK_DELAY_SHIFT; + attr->max_mw = rsp->max_mw; attr->max_mr = rsp->max_mr; attr->max_mr_size = ~0ull; attr->max_fmr = 0; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 2b56c42..95eeeeb 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -89,7 +89,7 @@ int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr) attr->max_cq = dev->attr.max_cq; attr->max_cqe = dev->attr.max_cqe; attr->max_mr = dev->attr.max_mr; - attr->max_mw = 0; + attr->max_mw = dev->attr.max_mw; attr->max_pd = dev->attr.max_pd; attr->atomic_cap = 0; attr->max_fmr = 0; -- cgit v1.1 From 1228056bcff2e4029c3d9d5d31cf122eea3a4c6a Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Tue, 4 Feb 2014 11:57:05 +0530 Subject: RDMA/ocrdma: Handle CQ overrun error Signed-off-by: Selvin Xavier Signed-off-by: Devesh Sharma Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/ocrdma_hw.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index f1ec9d9..ebcb260 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -640,7 +640,7 @@ static void ocrdma_dispatch_ibevent(struct ocrdma_dev *dev, { struct ocrdma_qp *qp = NULL; struct ocrdma_cq *cq = NULL; - struct ib_event ib_evt; + struct ib_event ib_evt = { 0 }; int cq_event = 0; int qp_event = 1; int srq_event = 0; @@ -665,6 +665,8 @@ static void ocrdma_dispatch_ibevent(struct ocrdma_dev *dev, case OCRDMA_CQ_OVERRUN_ERROR: ib_evt.element.cq = &cq->ibcq; ib_evt.event = IB_EVENT_CQ_ERR; + cq_event = 1; + qp_event = 0; break; case OCRDMA_CQ_QPCAT_ERROR: ib_evt.element.qp = &qp->ibqp; @@ -726,6 +728,7 @@ static void ocrdma_dispatch_ibevent(struct ocrdma_dev *dev, qp->srq->ibsrq. srq_context); } else if (dev_event) { + pr_err("%s: Fatal event received\n", dev->ibdev.name); ib_dispatch_event(&ib_evt); } -- cgit v1.1 From bbc5ec524eecf8af95b81c3c1d15cbc672568b4e Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Tue, 4 Feb 2014 11:57:06 +0530 Subject: RDMA/ocrdma: Support non-embedded mailbox commands Added a routine to issue non-embedded mailbox commands for handling large mailbox request/response data. Signed-off-by: Selvin Xavier Signed-off-by: Devesh Sharma Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/ocrdma_hw.c | 55 +++++++++++++++++++++++++++----- 1 file changed, 47 insertions(+), 8 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index ebcb260..e6797ff 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -953,7 +953,8 @@ static int ocrdma_mbx_cmd(struct ocrdma_dev *dev, struct ocrdma_mqe *mqe) { int status = 0; u16 cqe_status, ext_status; - struct ocrdma_mqe *rsp; + struct ocrdma_mqe *rsp_mqe; + struct ocrdma_mbx_rsp *rsp = NULL; mutex_lock(&dev->mqe_ctx.lock); ocrdma_post_mqe(dev, mqe); @@ -962,23 +963,61 @@ static int ocrdma_mbx_cmd(struct ocrdma_dev *dev, struct ocrdma_mqe *mqe) goto mbx_err; cqe_status = dev->mqe_ctx.cqe_status; ext_status = dev->mqe_ctx.ext_status; - rsp = ocrdma_get_mqe_rsp(dev); - ocrdma_copy_le32_to_cpu(mqe, rsp, (sizeof(*mqe))); + rsp_mqe = ocrdma_get_mqe_rsp(dev); + ocrdma_copy_le32_to_cpu(mqe, rsp_mqe, (sizeof(*mqe))); + if ((mqe->hdr.spcl_sge_cnt_emb & OCRDMA_MQE_HDR_EMB_MASK) >> + OCRDMA_MQE_HDR_EMB_SHIFT) + rsp = &mqe->u.rsp; + if (cqe_status || ext_status) { - pr_err("%s() opcode=0x%x, cqe_status=0x%x, ext_status=0x%x\n", - __func__, - (rsp->u.rsp.subsys_op & OCRDMA_MBX_RSP_OPCODE_MASK) >> - OCRDMA_MBX_RSP_OPCODE_SHIFT, cqe_status, ext_status); + pr_err("%s() cqe_status=0x%x, ext_status=0x%x,", + __func__, cqe_status, ext_status); + if (rsp) { + /* This is for embedded cmds. */ + pr_err("opcode=0x%x, subsystem=0x%x\n", + (rsp->subsys_op & OCRDMA_MBX_RSP_OPCODE_MASK) >> + OCRDMA_MBX_RSP_OPCODE_SHIFT, + (rsp->subsys_op & OCRDMA_MBX_RSP_SUBSYS_MASK) >> + OCRDMA_MBX_RSP_SUBSYS_SHIFT); + } status = ocrdma_get_mbx_cqe_errno(cqe_status); goto mbx_err; } - if (mqe->u.rsp.status & OCRDMA_MBX_RSP_STATUS_MASK) + /* For non embedded, rsp errors are handled in ocrdma_nonemb_mbx_cmd */ + if (rsp && (mqe->u.rsp.status & OCRDMA_MBX_RSP_STATUS_MASK)) status = ocrdma_get_mbx_errno(mqe->u.rsp.status); mbx_err: mutex_unlock(&dev->mqe_ctx.lock); return status; } +static int ocrdma_nonemb_mbx_cmd(struct ocrdma_dev *dev, struct ocrdma_mqe *mqe, + void *payload_va) +{ + int status = 0; + struct ocrdma_mbx_rsp *rsp = payload_va; + + if ((mqe->hdr.spcl_sge_cnt_emb & OCRDMA_MQE_HDR_EMB_MASK) >> + OCRDMA_MQE_HDR_EMB_SHIFT) + BUG(); + + status = ocrdma_mbx_cmd(dev, mqe); + if (!status) + /* For non embedded, only CQE failures are handled in + * ocrdma_mbx_cmd. We need to check for RSP errors. + */ + if (rsp->status & OCRDMA_MBX_RSP_STATUS_MASK) + status = ocrdma_get_mbx_errno(rsp->status); + + if (status) + pr_err("opcode=0x%x, subsystem=0x%x\n", + (rsp->subsys_op & OCRDMA_MBX_RSP_OPCODE_MASK) >> + OCRDMA_MBX_RSP_OPCODE_SHIFT, + (rsp->subsys_op & OCRDMA_MBX_RSP_SUBSYS_MASK) >> + OCRDMA_MBX_RSP_SUBSYS_SHIFT); + return status; +} + static void ocrdma_get_attr(struct ocrdma_dev *dev, struct ocrdma_dev_attr *attr, struct ocrdma_mbx_query_config *rsp) -- cgit v1.1 From a51f06e1679e2abac2e8a817884e60edc18c5c86 Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Tue, 4 Feb 2014 11:57:07 +0530 Subject: RDMA/ocrdma: Query controller information Issue mailbox commands to query ocrdma controller information and phy information and print them while adding ocrdma device. Signed-off-by: Selvin Xavier Signed-off-by: Devesh Sharma Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/Makefile | 2 +- drivers/infiniband/hw/ocrdma/ocrdma.h | 61 +++ drivers/infiniband/hw/ocrdma/ocrdma_hw.c | 148 +++++++ drivers/infiniband/hw/ocrdma/ocrdma_hw.h | 2 + drivers/infiniband/hw/ocrdma/ocrdma_main.c | 14 + drivers/infiniband/hw/ocrdma/ocrdma_sli.h | 233 ++++++++++- drivers/infiniband/hw/ocrdma/ocrdma_stats.c | 623 ++++++++++++++++++++++++++++ drivers/infiniband/hw/ocrdma/ocrdma_stats.h | 54 +++ 8 files changed, 1135 insertions(+), 2 deletions(-) create mode 100644 drivers/infiniband/hw/ocrdma/ocrdma_stats.c create mode 100644 drivers/infiniband/hw/ocrdma/ocrdma_stats.h (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/ocrdma/Makefile b/drivers/infiniband/hw/ocrdma/Makefile index 06a5bed..d1bfd4f 100644 --- a/drivers/infiniband/hw/ocrdma/Makefile +++ b/drivers/infiniband/hw/ocrdma/Makefile @@ -2,4 +2,4 @@ ccflags-y := -Idrivers/net/ethernet/emulex/benet obj-$(CONFIG_INFINIBAND_OCRDMA) += ocrdma.o -ocrdma-y := ocrdma_main.o ocrdma_verbs.o ocrdma_hw.o ocrdma_ah.o +ocrdma-y := ocrdma_main.o ocrdma_verbs.o ocrdma_hw.o ocrdma_ah.o ocrdma_stats.o diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h index 4ea8496..3042c87 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma.h @@ -44,10 +44,17 @@ #define OCRDMA_ROCE_DRV_DESC "Emulex OneConnect RoCE Driver" #define OCRDMA_NODE_DESC "Emulex OneConnect RoCE HCA" +#define OC_NAME_SH OCRDMA_NODE_DESC "(Skyhawk)" +#define OC_NAME_UNKNOWN OCRDMA_NODE_DESC "(Unknown)" + +#define OC_SKH_DEVICE_PF 0x720 +#define OC_SKH_DEVICE_VF 0x728 #define OCRDMA_MAX_AH 512 #define OCRDMA_UVERBS(CMD_NAME) (1ull << IB_USER_VERBS_CMD_##CMD_NAME) +#define convert_to_64bit(lo, hi) ((u64)hi << 32 | (u64)lo) + struct ocrdma_dev_attr { u8 fw_ver[32]; u32 vendor_id; @@ -86,6 +93,12 @@ struct ocrdma_dev_attr { u8 num_ird_pages; }; +struct ocrdma_dma_mem { + void *va; + dma_addr_t pa; + u32 size; +}; + struct ocrdma_pbl { void *va; dma_addr_t pa; @@ -151,6 +164,26 @@ struct ocrdma_mr { struct ocrdma_hw_mr hwmr; }; +struct ocrdma_stats { + u8 type; + struct ocrdma_dev *dev; +}; + +struct stats_mem { + struct ocrdma_mqe mqe; + void *va; + dma_addr_t pa; + u32 size; + char *debugfs_mem; +}; + +struct phy_info { + u16 auto_speeds_supported; + u16 fixed_speeds_supported; + u16 phy_type; + u16 interface_type; +}; + struct ocrdma_dev { struct ib_device ibdev; struct ocrdma_dev_attr attr; @@ -194,6 +227,9 @@ struct ocrdma_dev { struct mqe_ctx mqe_ctx; struct be_dev_info nic_info; + struct phy_info phy; + char model_number[32]; + u32 hba_port_num; struct list_head entry; struct rcu_head rcu; @@ -201,6 +237,20 @@ struct ocrdma_dev { struct ocrdma_mr *stag_arr[OCRDMA_MAX_STAG]; u16 pvid; u32 asic_id; + + ulong last_stats_time; + struct mutex stats_lock; /* provide synch for debugfs operations */ + struct stats_mem stats_mem; + struct ocrdma_stats rsrc_stats; + struct ocrdma_stats rx_stats; + struct ocrdma_stats wqe_stats; + struct ocrdma_stats tx_stats; + struct ocrdma_stats db_err_stats; + struct ocrdma_stats tx_qp_err_stats; + struct ocrdma_stats rx_qp_err_stats; + struct ocrdma_stats tx_dbg_stats; + struct ocrdma_stats rx_dbg_stats; + struct dentry *dir; }; struct ocrdma_cq { @@ -434,6 +484,17 @@ static inline int ocrdma_resolve_dmac(struct ocrdma_dev *dev, return 0; } +static inline char *hca_name(struct ocrdma_dev *dev) +{ + switch (dev->nic_info.pdev->device) { + case OC_SKH_DEVICE_PF: + case OC_SKH_DEVICE_VF: + return OC_NAME_SH; + default: + return OC_NAME_UNKNOWN; + } +} + static inline int ocrdma_get_eq_table_index(struct ocrdma_dev *dev, int eqid) { diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index e6797ff..5d34858 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -243,6 +243,23 @@ static int ocrdma_get_mbx_errno(u32 status) return err_num; } +char *port_speed_string(struct ocrdma_dev *dev) +{ + char *str = ""; + u16 speeds_supported; + + speeds_supported = dev->phy.fixed_speeds_supported | + dev->phy.auto_speeds_supported; + if (speeds_supported & OCRDMA_PHY_SPEED_40GBPS) + str = "40Gbps "; + else if (speeds_supported & OCRDMA_PHY_SPEED_10GBPS) + str = "10Gbps "; + else if (speeds_supported & OCRDMA_PHY_SPEED_1GBPS) + str = "1Gbps "; + + return str; +} + static int ocrdma_get_mbx_cqe_errno(u16 cqe_status) { int err_num = -EINVAL; @@ -332,6 +349,11 @@ static void *ocrdma_init_emb_mqe(u8 opcode, u32 cmd_len) return mqe; } +static void *ocrdma_alloc_mqe(void) +{ + return kzalloc(sizeof(struct ocrdma_mqe), GFP_KERNEL); +} + static void ocrdma_free_q(struct ocrdma_dev *dev, struct ocrdma_queue_info *q) { dma_free_coherent(&dev->nic_info.pdev->dev, q->size, q->va, q->dma); @@ -1154,6 +1176,96 @@ mbx_err: return status; } +int ocrdma_mbx_rdma_stats(struct ocrdma_dev *dev, bool reset) +{ + struct ocrdma_rdma_stats_req *req = dev->stats_mem.va; + struct ocrdma_mqe *mqe = &dev->stats_mem.mqe; + struct ocrdma_rdma_stats_resp *old_stats = NULL; + int status; + + old_stats = kzalloc(sizeof(*old_stats), GFP_KERNEL); + if (old_stats == NULL) + return -ENOMEM; + + memset(mqe, 0, sizeof(*mqe)); + mqe->hdr.pyld_len = dev->stats_mem.size; + mqe->hdr.spcl_sge_cnt_emb |= + (1 << OCRDMA_MQE_HDR_SGE_CNT_SHIFT) & + OCRDMA_MQE_HDR_SGE_CNT_MASK; + mqe->u.nonemb_req.sge[0].pa_lo = (u32) (dev->stats_mem.pa & 0xffffffff); + mqe->u.nonemb_req.sge[0].pa_hi = (u32) upper_32_bits(dev->stats_mem.pa); + mqe->u.nonemb_req.sge[0].len = dev->stats_mem.size; + + /* Cache the old stats */ + memcpy(old_stats, req, sizeof(struct ocrdma_rdma_stats_resp)); + memset(req, 0, dev->stats_mem.size); + + ocrdma_init_mch((struct ocrdma_mbx_hdr *)req, + OCRDMA_CMD_GET_RDMA_STATS, + OCRDMA_SUBSYS_ROCE, + dev->stats_mem.size); + if (reset) + req->reset_stats = reset; + + status = ocrdma_nonemb_mbx_cmd(dev, mqe, dev->stats_mem.va); + if (status) + /* Copy from cache, if mbox fails */ + memcpy(req, old_stats, sizeof(struct ocrdma_rdma_stats_resp)); + else + ocrdma_le32_to_cpu(req, dev->stats_mem.size); + + kfree(old_stats); + return status; +} + +static int ocrdma_mbx_get_ctrl_attribs(struct ocrdma_dev *dev) +{ + int status = -ENOMEM; + struct ocrdma_dma_mem dma; + struct ocrdma_mqe *mqe; + struct ocrdma_get_ctrl_attribs_rsp *ctrl_attr_rsp; + struct mgmt_hba_attribs *hba_attribs; + + mqe = ocrdma_alloc_mqe(); + if (!mqe) + return status; + memset(mqe, 0, sizeof(*mqe)); + + dma.size = sizeof(struct ocrdma_get_ctrl_attribs_rsp); + dma.va = dma_alloc_coherent(&dev->nic_info.pdev->dev, + dma.size, &dma.pa, GFP_KERNEL); + if (!dma.va) + goto free_mqe; + + mqe->hdr.pyld_len = dma.size; + mqe->hdr.spcl_sge_cnt_emb |= + (1 << OCRDMA_MQE_HDR_SGE_CNT_SHIFT) & + OCRDMA_MQE_HDR_SGE_CNT_MASK; + mqe->u.nonemb_req.sge[0].pa_lo = (u32) (dma.pa & 0xffffffff); + mqe->u.nonemb_req.sge[0].pa_hi = (u32) upper_32_bits(dma.pa); + mqe->u.nonemb_req.sge[0].len = dma.size; + + memset(dma.va, 0, dma.size); + ocrdma_init_mch((struct ocrdma_mbx_hdr *)dma.va, + OCRDMA_CMD_GET_CTRL_ATTRIBUTES, + OCRDMA_SUBSYS_COMMON, + dma.size); + + status = ocrdma_nonemb_mbx_cmd(dev, mqe, dma.va); + if (!status) { + ctrl_attr_rsp = (struct ocrdma_get_ctrl_attribs_rsp *)dma.va; + hba_attribs = &ctrl_attr_rsp->ctrl_attribs.hba_attribs; + + dev->hba_port_num = hba_attribs->phy_port; + strncpy(dev->model_number, + hba_attribs->controller_model_number, 31); + } + dma_free_coherent(&dev->nic_info.pdev->dev, dma.size, dma.va, dma.pa); +free_mqe: + kfree(mqe); + return status; +} + static int ocrdma_mbx_query_dev(struct ocrdma_dev *dev) { int status = -ENOMEM; @@ -1201,6 +1313,35 @@ mbx_err: return status; } +static int ocrdma_mbx_get_phy_info(struct ocrdma_dev *dev) +{ + int status = -ENOMEM; + struct ocrdma_mqe *cmd; + struct ocrdma_get_phy_info_rsp *rsp; + + cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_PHY_DETAILS, sizeof(*cmd)); + if (!cmd) + return status; + + ocrdma_init_mch((struct ocrdma_mbx_hdr *)&cmd->u.cmd[0], + OCRDMA_CMD_PHY_DETAILS, OCRDMA_SUBSYS_COMMON, + sizeof(*cmd)); + + status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd); + if (status) + goto mbx_err; + + rsp = (struct ocrdma_get_phy_info_rsp *)cmd; + dev->phy.phy_type = le16_to_cpu(rsp->phy_type); + dev->phy.auto_speeds_supported = + le16_to_cpu(rsp->auto_speeds_supported); + dev->phy.fixed_speeds_supported = + le16_to_cpu(rsp->fixed_speeds_supported); +mbx_err: + kfree(cmd); + return status; +} + int ocrdma_mbx_alloc_pd(struct ocrdma_dev *dev, struct ocrdma_pd *pd) { int status = -ENOMEM; @@ -2570,6 +2711,13 @@ int ocrdma_init_hw(struct ocrdma_dev *dev) status = ocrdma_mbx_create_ah_tbl(dev); if (status) goto conf_err; + status = ocrdma_mbx_get_phy_info(dev); + if (status) + goto conf_err; + status = ocrdma_mbx_get_ctrl_attribs(dev); + if (status) + goto conf_err; + return 0; conf_err: diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.h b/drivers/infiniband/hw/ocrdma/ocrdma_hw.h index 77da536..e513f72 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.h @@ -133,4 +133,6 @@ bool ocrdma_is_qp_in_rq_flushlist(struct ocrdma_cq *, struct ocrdma_qp *); void ocrdma_flush_qp(struct ocrdma_qp *); int ocrdma_get_irq(struct ocrdma_dev *dev, struct ocrdma_eq *eq); +int ocrdma_mbx_rdma_stats(struct ocrdma_dev *, bool reset); +char *port_speed_string(struct ocrdma_dev *dev); #endif /* __OCRDMA_HW_H__ */ diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index ae17a36..7d18b3a 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -39,6 +39,7 @@ #include "ocrdma_ah.h" #include "be_roce.h" #include "ocrdma_hw.h" +#include "ocrdma_stats.h" #include "ocrdma_abi.h" MODULE_VERSION(OCRDMA_ROCE_DRV_VERSION); @@ -372,6 +373,15 @@ static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info) spin_lock(&ocrdma_devlist_lock); list_add_tail_rcu(&dev->entry, &ocrdma_dev_list); spin_unlock(&ocrdma_devlist_lock); + /* Init stats */ + ocrdma_add_port_stats(dev); + + pr_info("%s %s: %s \"%s\" port %d\n", + dev_name(&dev->nic_info.pdev->dev), hca_name(dev), + port_speed_string(dev), dev->model_number, + dev->hba_port_num); + pr_info("%s ocrdma%d driver loaded successfully\n", + dev_name(&dev->nic_info.pdev->dev), dev->id); return dev; alloc_err: @@ -400,6 +410,7 @@ static void ocrdma_remove(struct ocrdma_dev *dev) /* first unregister with stack to stop all the active traffic * of the registered clients. */ + ocrdma_rem_port_stats(dev); ib_unregister_device(&dev->ibdev); spin_lock(&ocrdma_devlist_lock); @@ -492,6 +503,8 @@ static int __init ocrdma_init_module(void) { int status; + ocrdma_init_debugfs(); + status = register_inetaddr_notifier(&ocrdma_inetaddr_notifier); if (status) return status; @@ -513,6 +526,7 @@ static void __exit ocrdma_exit_module(void) { be_roce_unregister_driver(&ocrdma_drv); ocrdma_unregister_inet6addr_notifier(); + ocrdma_rem_debugfs(); } module_init(ocrdma_init_module); diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h index de4ebfc..6e048b7 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h @@ -72,6 +72,7 @@ enum { OCRDMA_CMD_ATTACH_MCAST, OCRDMA_CMD_DETACH_MCAST, + OCRDMA_CMD_GET_RDMA_STATS, OCRDMA_CMD_MAX }; @@ -82,12 +83,14 @@ enum { OCRDMA_CMD_CREATE_CQ = 12, OCRDMA_CMD_CREATE_EQ = 13, OCRDMA_CMD_CREATE_MQ = 21, + OCRDMA_CMD_GET_CTRL_ATTRIBUTES = 32, OCRDMA_CMD_GET_FW_VER = 35, OCRDMA_CMD_DELETE_MQ = 53, OCRDMA_CMD_DELETE_CQ = 54, OCRDMA_CMD_DELETE_EQ = 55, OCRDMA_CMD_GET_FW_CONFIG = 58, - OCRDMA_CMD_CREATE_MQ_EXT = 90 + OCRDMA_CMD_CREATE_MQ_EXT = 90, + OCRDMA_CMD_PHY_DETAILS = 102 }; enum { @@ -578,6 +581,30 @@ enum { OCRDMA_FN_MODE_RDMA = 0x4 }; +struct ocrdma_get_phy_info_rsp { + struct ocrdma_mqe_hdr hdr; + struct ocrdma_mbx_rsp rsp; + + u16 phy_type; + u16 interface_type; + u32 misc_params; + u16 ext_phy_details; + u16 rsvd; + u16 auto_speeds_supported; + u16 fixed_speeds_supported; + u32 future_use[2]; +}; + +enum { + OCRDMA_PHY_SPEED_ZERO = 0x0, + OCRDMA_PHY_SPEED_10MBPS = 0x1, + OCRDMA_PHY_SPEED_100MBPS = 0x2, + OCRDMA_PHY_SPEED_1GBPS = 0x4, + OCRDMA_PHY_SPEED_10GBPS = 0x8, + OCRDMA_PHY_SPEED_40GBPS = 0x20 +}; + + struct ocrdma_get_link_speed_rsp { struct ocrdma_mqe_hdr hdr; struct ocrdma_mbx_rsp rsp; @@ -1719,4 +1746,208 @@ struct ocrdma_av { u32 valid; } __packed; +struct ocrdma_rsrc_stats { + u32 dpp_pds; + u32 non_dpp_pds; + u32 rc_dpp_qps; + u32 uc_dpp_qps; + u32 ud_dpp_qps; + u32 rc_non_dpp_qps; + u32 rsvd; + u32 uc_non_dpp_qps; + u32 ud_non_dpp_qps; + u32 rsvd1; + u32 srqs; + u32 rbqs; + u32 r64K_nsmr; + u32 r64K_to_2M_nsmr; + u32 r2M_to_44M_nsmr; + u32 r44M_to_1G_nsmr; + u32 r1G_to_4G_nsmr; + u32 nsmr_count_4G_to_32G; + u32 r32G_to_64G_nsmr; + u32 r64G_to_128G_nsmr; + u32 r128G_to_higher_nsmr; + u32 embedded_nsmr; + u32 frmr; + u32 prefetch_qps; + u32 ondemand_qps; + u32 phy_mr; + u32 mw; + u32 rsvd2[7]; +}; + +struct ocrdma_db_err_stats { + u32 sq_doorbell_errors; + u32 cq_doorbell_errors; + u32 rq_srq_doorbell_errors; + u32 cq_overflow_errors; + u32 rsvd[4]; +}; + +struct ocrdma_wqe_stats { + u32 large_send_rc_wqes_lo; + u32 large_send_rc_wqes_hi; + u32 large_write_rc_wqes_lo; + u32 large_write_rc_wqes_hi; + u32 rsvd[4]; + u32 read_wqes_lo; + u32 read_wqes_hi; + u32 frmr_wqes_lo; + u32 frmr_wqes_hi; + u32 mw_bind_wqes_lo; + u32 mw_bind_wqes_hi; + u32 invalidate_wqes_lo; + u32 invalidate_wqes_hi; + u32 rsvd1[2]; + u32 dpp_wqe_drops; + u32 rsvd2[5]; +}; + +struct ocrdma_tx_stats { + u32 send_pkts_lo; + u32 send_pkts_hi; + u32 write_pkts_lo; + u32 write_pkts_hi; + u32 read_pkts_lo; + u32 read_pkts_hi; + u32 read_rsp_pkts_lo; + u32 read_rsp_pkts_hi; + u32 ack_pkts_lo; + u32 ack_pkts_hi; + u32 send_bytes_lo; + u32 send_bytes_hi; + u32 write_bytes_lo; + u32 write_bytes_hi; + u32 read_req_bytes_lo; + u32 read_req_bytes_hi; + u32 read_rsp_bytes_lo; + u32 read_rsp_bytes_hi; + u32 ack_timeouts; + u32 rsvd[5]; +}; + + +struct ocrdma_tx_qp_err_stats { + u32 local_length_errors; + u32 local_protection_errors; + u32 local_qp_operation_errors; + u32 retry_count_exceeded_errors; + u32 rnr_retry_count_exceeded_errors; + u32 rsvd[3]; +}; + +struct ocrdma_rx_stats { + u32 roce_frame_bytes_lo; + u32 roce_frame_bytes_hi; + u32 roce_frame_icrc_drops; + u32 roce_frame_payload_len_drops; + u32 ud_drops; + u32 qp1_drops; + u32 psn_error_request_packets; + u32 psn_error_resp_packets; + u32 rnr_nak_timeouts; + u32 rnr_nak_receives; + u32 roce_frame_rxmt_drops; + u32 nak_count_psn_sequence_errors; + u32 rc_drop_count_lookup_errors; + u32 rq_rnr_naks; + u32 srq_rnr_naks; + u32 roce_frames_lo; + u32 roce_frames_hi; + u32 rsvd; +}; + +struct ocrdma_rx_qp_err_stats { + u32 nak_invalid_requst_errors; + u32 nak_remote_operation_errors; + u32 nak_count_remote_access_errors; + u32 local_length_errors; + u32 local_protection_errors; + u32 local_qp_operation_errors; + u32 rsvd[2]; +}; + +struct ocrdma_tx_dbg_stats { + u32 data[100]; +}; + +struct ocrdma_rx_dbg_stats { + u32 data[200]; +}; + +struct ocrdma_rdma_stats_req { + struct ocrdma_mbx_hdr hdr; + u8 reset_stats; + u8 rsvd[3]; +} __packed; + +struct ocrdma_rdma_stats_resp { + struct ocrdma_mbx_hdr hdr; + struct ocrdma_rsrc_stats act_rsrc_stats; + struct ocrdma_rsrc_stats th_rsrc_stats; + struct ocrdma_db_err_stats db_err_stats; + struct ocrdma_wqe_stats wqe_stats; + struct ocrdma_tx_stats tx_stats; + struct ocrdma_tx_qp_err_stats tx_qp_err_stats; + struct ocrdma_rx_stats rx_stats; + struct ocrdma_rx_qp_err_stats rx_qp_err_stats; + struct ocrdma_tx_dbg_stats tx_dbg_stats; + struct ocrdma_rx_dbg_stats rx_dbg_stats; +} __packed; + + +struct mgmt_hba_attribs { + u8 flashrom_version_string[32]; + u8 manufacturer_name[32]; + u32 supported_modes; + u32 rsvd0[3]; + u8 ncsi_ver_string[12]; + u32 default_extended_timeout; + u8 controller_model_number[32]; + u8 controller_description[64]; + u8 controller_serial_number[32]; + u8 ip_version_string[32]; + u8 firmware_version_string[32]; + u8 bios_version_string[32]; + u8 redboot_version_string[32]; + u8 driver_version_string[32]; + u8 fw_on_flash_version_string[32]; + u32 functionalities_supported; + u16 max_cdblength; + u8 asic_revision; + u8 generational_guid[16]; + u8 hba_port_count; + u16 default_link_down_timeout; + u8 iscsi_ver_min_max; + u8 multifunction_device; + u8 cache_valid; + u8 hba_status; + u8 max_domains_supported; + u8 phy_port; + u32 firmware_post_status; + u32 hba_mtu[8]; + u32 rsvd1[4]; +}; + +struct mgmt_controller_attrib { + struct mgmt_hba_attribs hba_attribs; + u16 pci_vendor_id; + u16 pci_device_id; + u16 pci_sub_vendor_id; + u16 pci_sub_system_id; + u8 pci_bus_number; + u8 pci_device_number; + u8 pci_function_number; + u8 interface_type; + u64 unique_identifier; + u32 rsvd0[5]; +}; + +struct ocrdma_get_ctrl_attribs_rsp { + struct ocrdma_mbx_hdr hdr; + struct mgmt_controller_attrib ctrl_attribs; +}; + + #endif /* __OCRDMA_SLI_H__ */ diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c new file mode 100644 index 0000000..6c54106 --- /dev/null +++ b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c @@ -0,0 +1,623 @@ +/******************************************************************* + * This file is part of the Emulex RoCE Device Driver for * + * RoCE (RDMA over Converged Ethernet) adapters. * + * Copyright (C) 2008-2014 Emulex. All rights reserved. * + * EMULEX and SLI are trademarks of Emulex. * + * www.emulex.com * + * * + * This program is free software; you can redistribute it and/or * + * modify it under the terms of version 2 of the GNU General * + * Public License as published by the Free Software Foundation. * + * This program is distributed in the hope that it will be useful. * + * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND * + * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE * + * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD * + * TO BE LEGALLY INVALID. See the GNU General Public License for * + * more details, a copy of which can be found in the file COPYING * + * included with this package. * + * + * Contact Information: + * linux-drivers@emulex.com + * + * Emulex + * 3333 Susan Street + * Costa Mesa, CA 92626 + *******************************************************************/ + +#include +#include "ocrdma_stats.h" + +static struct dentry *ocrdma_dbgfs_dir; + +static int ocrdma_add_stat(char *start, char *pcur, + char *name, u64 count) +{ + char buff[128] = {0}; + int cpy_len = 0; + + snprintf(buff, 128, "%s: %llu\n", name, count); + cpy_len = strlen(buff); + + if (pcur + cpy_len > start + OCRDMA_MAX_DBGFS_MEM) { + pr_err("%s: No space in stats buff\n", __func__); + return 0; + } + + memcpy(pcur, buff, cpy_len); + return cpy_len; +} + +static bool ocrdma_alloc_stats_mem(struct ocrdma_dev *dev) +{ + struct stats_mem *mem = &dev->stats_mem; + + /* Alloc mbox command mem*/ + mem->size = max_t(u32, sizeof(struct ocrdma_rdma_stats_req), + sizeof(struct ocrdma_rdma_stats_resp)); + + mem->va = dma_alloc_coherent(&dev->nic_info.pdev->dev, mem->size, + &mem->pa, GFP_KERNEL); + if (!mem->va) { + pr_err("%s: stats mbox allocation failed\n", __func__); + return false; + } + + memset(mem->va, 0, mem->size); + + /* Alloc debugfs mem */ + mem->debugfs_mem = kzalloc(OCRDMA_MAX_DBGFS_MEM, GFP_KERNEL); + if (!mem->debugfs_mem) { + pr_err("%s: stats debugfs mem allocation failed\n", __func__); + return false; + } + + return true; +} + +static void ocrdma_release_stats_mem(struct ocrdma_dev *dev) +{ + struct stats_mem *mem = &dev->stats_mem; + + if (mem->va) + dma_free_coherent(&dev->nic_info.pdev->dev, mem->size, + mem->va, mem->pa); + kfree(mem->debugfs_mem); +} + +static char *ocrdma_resource_stats(struct ocrdma_dev *dev) +{ + char *stats = dev->stats_mem.debugfs_mem, *pcur; + struct ocrdma_rdma_stats_resp *rdma_stats = + (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va; + struct ocrdma_rsrc_stats *rsrc_stats = &rdma_stats->act_rsrc_stats; + + memset(stats, 0, (OCRDMA_MAX_DBGFS_MEM)); + + pcur = stats; + pcur += ocrdma_add_stat(stats, pcur, "active_dpp_pds", + (u64)rsrc_stats->dpp_pds); + pcur += ocrdma_add_stat(stats, pcur, "active_non_dpp_pds", + (u64)rsrc_stats->non_dpp_pds); + pcur += ocrdma_add_stat(stats, pcur, "active_rc_dpp_qps", + (u64)rsrc_stats->rc_dpp_qps); + pcur += ocrdma_add_stat(stats, pcur, "active_uc_dpp_qps", + (u64)rsrc_stats->uc_dpp_qps); + pcur += ocrdma_add_stat(stats, pcur, "active_ud_dpp_qps", + (u64)rsrc_stats->ud_dpp_qps); + pcur += ocrdma_add_stat(stats, pcur, "active_rc_non_dpp_qps", + (u64)rsrc_stats->rc_non_dpp_qps); + pcur += ocrdma_add_stat(stats, pcur, "active_uc_non_dpp_qps", + (u64)rsrc_stats->uc_non_dpp_qps); + pcur += ocrdma_add_stat(stats, pcur, "active_ud_non_dpp_qps", + (u64)rsrc_stats->ud_non_dpp_qps); + pcur += ocrdma_add_stat(stats, pcur, "active_srqs", + (u64)rsrc_stats->srqs); + pcur += ocrdma_add_stat(stats, pcur, "active_rbqs", + (u64)rsrc_stats->rbqs); + pcur += ocrdma_add_stat(stats, pcur, "active_64K_nsmr", + (u64)rsrc_stats->r64K_nsmr); + pcur += ocrdma_add_stat(stats, pcur, "active_64K_to_2M_nsmr", + (u64)rsrc_stats->r64K_to_2M_nsmr); + pcur += ocrdma_add_stat(stats, pcur, "active_2M_to_44M_nsmr", + (u64)rsrc_stats->r2M_to_44M_nsmr); + pcur += ocrdma_add_stat(stats, pcur, "active_44M_to_1G_nsmr", + (u64)rsrc_stats->r44M_to_1G_nsmr); + pcur += ocrdma_add_stat(stats, pcur, "active_1G_to_4G_nsmr", + (u64)rsrc_stats->r1G_to_4G_nsmr); + pcur += ocrdma_add_stat(stats, pcur, "active_nsmr_count_4G_to_32G", + (u64)rsrc_stats->nsmr_count_4G_to_32G); + pcur += ocrdma_add_stat(stats, pcur, "active_32G_to_64G_nsmr", + (u64)rsrc_stats->r32G_to_64G_nsmr); + pcur += ocrdma_add_stat(stats, pcur, "active_64G_to_128G_nsmr", + (u64)rsrc_stats->r64G_to_128G_nsmr); + pcur += ocrdma_add_stat(stats, pcur, "active_128G_to_higher_nsmr", + (u64)rsrc_stats->r128G_to_higher_nsmr); + pcur += ocrdma_add_stat(stats, pcur, "active_embedded_nsmr", + (u64)rsrc_stats->embedded_nsmr); + pcur += ocrdma_add_stat(stats, pcur, "active_frmr", + (u64)rsrc_stats->frmr); + pcur += ocrdma_add_stat(stats, pcur, "active_prefetch_qps", + (u64)rsrc_stats->prefetch_qps); + pcur += ocrdma_add_stat(stats, pcur, "active_ondemand_qps", + (u64)rsrc_stats->ondemand_qps); + pcur += ocrdma_add_stat(stats, pcur, "active_phy_mr", + (u64)rsrc_stats->phy_mr); + pcur += ocrdma_add_stat(stats, pcur, "active_mw", + (u64)rsrc_stats->mw); + + /* Print the threshold stats */ + rsrc_stats = &rdma_stats->th_rsrc_stats; + + pcur += ocrdma_add_stat(stats, pcur, "threshold_dpp_pds", + (u64)rsrc_stats->dpp_pds); + pcur += ocrdma_add_stat(stats, pcur, "threshold_non_dpp_pds", + (u64)rsrc_stats->non_dpp_pds); + pcur += ocrdma_add_stat(stats, pcur, "threshold_rc_dpp_qps", + (u64)rsrc_stats->rc_dpp_qps); + pcur += ocrdma_add_stat(stats, pcur, "threshold_uc_dpp_qps", + (u64)rsrc_stats->uc_dpp_qps); + pcur += ocrdma_add_stat(stats, pcur, "threshold_ud_dpp_qps", + (u64)rsrc_stats->ud_dpp_qps); + pcur += ocrdma_add_stat(stats, pcur, "threshold_rc_non_dpp_qps", + (u64)rsrc_stats->rc_non_dpp_qps); + pcur += ocrdma_add_stat(stats, pcur, "threshold_uc_non_dpp_qps", + (u64)rsrc_stats->uc_non_dpp_qps); + pcur += ocrdma_add_stat(stats, pcur, "threshold_ud_non_dpp_qps", + (u64)rsrc_stats->ud_non_dpp_qps); + pcur += ocrdma_add_stat(stats, pcur, "threshold_srqs", + (u64)rsrc_stats->srqs); + pcur += ocrdma_add_stat(stats, pcur, "threshold_rbqs", + (u64)rsrc_stats->rbqs); + pcur += ocrdma_add_stat(stats, pcur, "threshold_64K_nsmr", + (u64)rsrc_stats->r64K_nsmr); + pcur += ocrdma_add_stat(stats, pcur, "threshold_64K_to_2M_nsmr", + (u64)rsrc_stats->r64K_to_2M_nsmr); + pcur += ocrdma_add_stat(stats, pcur, "threshold_2M_to_44M_nsmr", + (u64)rsrc_stats->r2M_to_44M_nsmr); + pcur += ocrdma_add_stat(stats, pcur, "threshold_44M_to_1G_nsmr", + (u64)rsrc_stats->r44M_to_1G_nsmr); + pcur += ocrdma_add_stat(stats, pcur, "threshold_1G_to_4G_nsmr", + (u64)rsrc_stats->r1G_to_4G_nsmr); + pcur += ocrdma_add_stat(stats, pcur, "threshold_nsmr_count_4G_to_32G", + (u64)rsrc_stats->nsmr_count_4G_to_32G); + pcur += ocrdma_add_stat(stats, pcur, "threshold_32G_to_64G_nsmr", + (u64)rsrc_stats->r32G_to_64G_nsmr); + pcur += ocrdma_add_stat(stats, pcur, "threshold_64G_to_128G_nsmr", + (u64)rsrc_stats->r64G_to_128G_nsmr); + pcur += ocrdma_add_stat(stats, pcur, "threshold_128G_to_higher_nsmr", + (u64)rsrc_stats->r128G_to_higher_nsmr); + pcur += ocrdma_add_stat(stats, pcur, "threshold_embedded_nsmr", + (u64)rsrc_stats->embedded_nsmr); + pcur += ocrdma_add_stat(stats, pcur, "threshold_frmr", + (u64)rsrc_stats->frmr); + pcur += ocrdma_add_stat(stats, pcur, "threshold_prefetch_qps", + (u64)rsrc_stats->prefetch_qps); + pcur += ocrdma_add_stat(stats, pcur, "threshold_ondemand_qps", + (u64)rsrc_stats->ondemand_qps); + pcur += ocrdma_add_stat(stats, pcur, "threshold_phy_mr", + (u64)rsrc_stats->phy_mr); + pcur += ocrdma_add_stat(stats, pcur, "threshold_mw", + (u64)rsrc_stats->mw); + return stats; +} + +static char *ocrdma_rx_stats(struct ocrdma_dev *dev) +{ + char *stats = dev->stats_mem.debugfs_mem, *pcur; + struct ocrdma_rdma_stats_resp *rdma_stats = + (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va; + struct ocrdma_rx_stats *rx_stats = &rdma_stats->rx_stats; + + memset(stats, 0, (OCRDMA_MAX_DBGFS_MEM)); + + pcur = stats; + pcur += ocrdma_add_stat + (stats, pcur, "roce_frame_bytes", + convert_to_64bit(rx_stats->roce_frame_bytes_lo, + rx_stats->roce_frame_bytes_hi)); + pcur += ocrdma_add_stat(stats, pcur, "roce_frame_icrc_drops", + (u64)rx_stats->roce_frame_icrc_drops); + pcur += ocrdma_add_stat(stats, pcur, "roce_frame_payload_len_drops", + (u64)rx_stats->roce_frame_payload_len_drops); + pcur += ocrdma_add_stat(stats, pcur, "ud_drops", + (u64)rx_stats->ud_drops); + pcur += ocrdma_add_stat(stats, pcur, "qp1_drops", + (u64)rx_stats->qp1_drops); + pcur += ocrdma_add_stat(stats, pcur, "psn_error_request_packets", + (u64)rx_stats->psn_error_request_packets); + pcur += ocrdma_add_stat(stats, pcur, "psn_error_resp_packets", + (u64)rx_stats->psn_error_resp_packets); + pcur += ocrdma_add_stat(stats, pcur, "rnr_nak_timeouts", + (u64)rx_stats->rnr_nak_timeouts); + pcur += ocrdma_add_stat(stats, pcur, "rnr_nak_receives", + (u64)rx_stats->rnr_nak_receives); + pcur += ocrdma_add_stat(stats, pcur, "roce_frame_rxmt_drops", + (u64)rx_stats->roce_frame_rxmt_drops); + pcur += ocrdma_add_stat(stats, pcur, "nak_count_psn_sequence_errors", + (u64)rx_stats->nak_count_psn_sequence_errors); + pcur += ocrdma_add_stat(stats, pcur, "rc_drop_count_lookup_errors", + (u64)rx_stats->rc_drop_count_lookup_errors); + pcur += ocrdma_add_stat(stats, pcur, "rq_rnr_naks", + (u64)rx_stats->rq_rnr_naks); + pcur += ocrdma_add_stat(stats, pcur, "srq_rnr_naks", + (u64)rx_stats->srq_rnr_naks); + pcur += ocrdma_add_stat(stats, pcur, "roce_frames", + convert_to_64bit(rx_stats->roce_frames_lo, + rx_stats->roce_frames_hi)); + + return stats; +} + +static char *ocrdma_tx_stats(struct ocrdma_dev *dev) +{ + char *stats = dev->stats_mem.debugfs_mem, *pcur; + struct ocrdma_rdma_stats_resp *rdma_stats = + (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va; + struct ocrdma_tx_stats *tx_stats = &rdma_stats->tx_stats; + + memset(stats, 0, (OCRDMA_MAX_DBGFS_MEM)); + + pcur = stats; + pcur += ocrdma_add_stat(stats, pcur, "send_pkts", + convert_to_64bit(tx_stats->send_pkts_lo, + tx_stats->send_pkts_hi)); + pcur += ocrdma_add_stat(stats, pcur, "write_pkts", + convert_to_64bit(tx_stats->write_pkts_lo, + tx_stats->write_pkts_hi)); + pcur += ocrdma_add_stat(stats, pcur, "read_pkts", + convert_to_64bit(tx_stats->read_pkts_lo, + tx_stats->read_pkts_hi)); + pcur += ocrdma_add_stat(stats, pcur, "read_rsp_pkts", + convert_to_64bit(tx_stats->read_rsp_pkts_lo, + tx_stats->read_rsp_pkts_hi)); + pcur += ocrdma_add_stat(stats, pcur, "ack_pkts", + convert_to_64bit(tx_stats->ack_pkts_lo, + tx_stats->ack_pkts_hi)); + pcur += ocrdma_add_stat(stats, pcur, "send_bytes", + convert_to_64bit(tx_stats->send_bytes_lo, + tx_stats->send_bytes_hi)); + pcur += ocrdma_add_stat(stats, pcur, "write_bytes", + convert_to_64bit(tx_stats->write_bytes_lo, + tx_stats->write_bytes_hi)); + pcur += ocrdma_add_stat(stats, pcur, "read_req_bytes", + convert_to_64bit(tx_stats->read_req_bytes_lo, + tx_stats->read_req_bytes_hi)); + pcur += ocrdma_add_stat(stats, pcur, "read_rsp_bytes", + convert_to_64bit(tx_stats->read_rsp_bytes_lo, + tx_stats->read_rsp_bytes_hi)); + pcur += ocrdma_add_stat(stats, pcur, "ack_timeouts", + (u64)tx_stats->ack_timeouts); + + return stats; +} + +static char *ocrdma_wqe_stats(struct ocrdma_dev *dev) +{ + char *stats = dev->stats_mem.debugfs_mem, *pcur; + struct ocrdma_rdma_stats_resp *rdma_stats = + (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va; + struct ocrdma_wqe_stats *wqe_stats = &rdma_stats->wqe_stats; + + memset(stats, 0, (OCRDMA_MAX_DBGFS_MEM)); + + pcur = stats; + pcur += ocrdma_add_stat(stats, pcur, "large_send_rc_wqes", + convert_to_64bit(wqe_stats->large_send_rc_wqes_lo, + wqe_stats->large_send_rc_wqes_hi)); + pcur += ocrdma_add_stat(stats, pcur, "large_write_rc_wqes", + convert_to_64bit(wqe_stats->large_write_rc_wqes_lo, + wqe_stats->large_write_rc_wqes_hi)); + pcur += ocrdma_add_stat(stats, pcur, "read_wqes", + convert_to_64bit(wqe_stats->read_wqes_lo, + wqe_stats->read_wqes_hi)); + pcur += ocrdma_add_stat(stats, pcur, "frmr_wqes", + convert_to_64bit(wqe_stats->frmr_wqes_lo, + wqe_stats->frmr_wqes_hi)); + pcur += ocrdma_add_stat(stats, pcur, "mw_bind_wqes", + convert_to_64bit(wqe_stats->mw_bind_wqes_lo, + wqe_stats->mw_bind_wqes_hi)); + pcur += ocrdma_add_stat(stats, pcur, "invalidate_wqes", + convert_to_64bit(wqe_stats->invalidate_wqes_lo, + wqe_stats->invalidate_wqes_hi)); + pcur += ocrdma_add_stat(stats, pcur, "dpp_wqe_drops", + (u64)wqe_stats->dpp_wqe_drops); + return stats; +} + +static char *ocrdma_db_errstats(struct ocrdma_dev *dev) +{ + char *stats = dev->stats_mem.debugfs_mem, *pcur; + struct ocrdma_rdma_stats_resp *rdma_stats = + (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va; + struct ocrdma_db_err_stats *db_err_stats = &rdma_stats->db_err_stats; + + memset(stats, 0, (OCRDMA_MAX_DBGFS_MEM)); + + pcur = stats; + pcur += ocrdma_add_stat(stats, pcur, "sq_doorbell_errors", + (u64)db_err_stats->sq_doorbell_errors); + pcur += ocrdma_add_stat(stats, pcur, "cq_doorbell_errors", + (u64)db_err_stats->cq_doorbell_errors); + pcur += ocrdma_add_stat(stats, pcur, "rq_srq_doorbell_errors", + (u64)db_err_stats->rq_srq_doorbell_errors); + pcur += ocrdma_add_stat(stats, pcur, "cq_overflow_errors", + (u64)db_err_stats->cq_overflow_errors); + return stats; +} + +static char *ocrdma_rxqp_errstats(struct ocrdma_dev *dev) +{ + char *stats = dev->stats_mem.debugfs_mem, *pcur; + struct ocrdma_rdma_stats_resp *rdma_stats = + (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va; + struct ocrdma_rx_qp_err_stats *rx_qp_err_stats = + &rdma_stats->rx_qp_err_stats; + + memset(stats, 0, (OCRDMA_MAX_DBGFS_MEM)); + + pcur = stats; + pcur += ocrdma_add_stat(stats, pcur, "nak_invalid_requst_errors", + (u64)rx_qp_err_stats->nak_invalid_requst_errors); + pcur += ocrdma_add_stat(stats, pcur, "nak_remote_operation_errors", + (u64)rx_qp_err_stats->nak_remote_operation_errors); + pcur += ocrdma_add_stat(stats, pcur, "nak_count_remote_access_errors", + (u64)rx_qp_err_stats->nak_count_remote_access_errors); + pcur += ocrdma_add_stat(stats, pcur, "local_length_errors", + (u64)rx_qp_err_stats->local_length_errors); + pcur += ocrdma_add_stat(stats, pcur, "local_protection_errors", + (u64)rx_qp_err_stats->local_protection_errors); + pcur += ocrdma_add_stat(stats, pcur, "local_qp_operation_errors", + (u64)rx_qp_err_stats->local_qp_operation_errors); + return stats; +} + +static char *ocrdma_txqp_errstats(struct ocrdma_dev *dev) +{ + char *stats = dev->stats_mem.debugfs_mem, *pcur; + struct ocrdma_rdma_stats_resp *rdma_stats = + (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va; + struct ocrdma_tx_qp_err_stats *tx_qp_err_stats = + &rdma_stats->tx_qp_err_stats; + + memset(stats, 0, (OCRDMA_MAX_DBGFS_MEM)); + + pcur = stats; + pcur += ocrdma_add_stat(stats, pcur, "local_length_errors", + (u64)tx_qp_err_stats->local_length_errors); + pcur += ocrdma_add_stat(stats, pcur, "local_protection_errors", + (u64)tx_qp_err_stats->local_protection_errors); + pcur += ocrdma_add_stat(stats, pcur, "local_qp_operation_errors", + (u64)tx_qp_err_stats->local_qp_operation_errors); + pcur += ocrdma_add_stat(stats, pcur, "retry_count_exceeded_errors", + (u64)tx_qp_err_stats->retry_count_exceeded_errors); + pcur += ocrdma_add_stat(stats, pcur, "rnr_retry_count_exceeded_errors", + (u64)tx_qp_err_stats->rnr_retry_count_exceeded_errors); + return stats; +} + +static char *ocrdma_tx_dbg_stats(struct ocrdma_dev *dev) +{ + int i; + char *pstats = dev->stats_mem.debugfs_mem; + struct ocrdma_rdma_stats_resp *rdma_stats = + (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va; + struct ocrdma_tx_dbg_stats *tx_dbg_stats = + &rdma_stats->tx_dbg_stats; + + memset(pstats, 0, (OCRDMA_MAX_DBGFS_MEM)); + + for (i = 0; i < 100; i++) + pstats += snprintf(pstats, 80, "DW[%d] = 0x%x\n", i, + tx_dbg_stats->data[i]); + + return dev->stats_mem.debugfs_mem; +} + +static char *ocrdma_rx_dbg_stats(struct ocrdma_dev *dev) +{ + int i; + char *pstats = dev->stats_mem.debugfs_mem; + struct ocrdma_rdma_stats_resp *rdma_stats = + (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va; + struct ocrdma_rx_dbg_stats *rx_dbg_stats = + &rdma_stats->rx_dbg_stats; + + memset(pstats, 0, (OCRDMA_MAX_DBGFS_MEM)); + + for (i = 0; i < 200; i++) + pstats += snprintf(pstats, 80, "DW[%d] = 0x%x\n", i, + rx_dbg_stats->data[i]); + + return dev->stats_mem.debugfs_mem; +} + +static void ocrdma_update_stats(struct ocrdma_dev *dev) +{ + ulong now = jiffies, secs; + int status = 0; + + secs = jiffies_to_msecs(now - dev->last_stats_time) / 1000U; + if (secs) { + /* update */ + status = ocrdma_mbx_rdma_stats(dev, false); + if (status) + pr_err("%s: stats mbox failed with status = %d\n", + __func__, status); + dev->last_stats_time = jiffies; + } +} + +static ssize_t ocrdma_dbgfs_ops_read(struct file *filp, char __user *buffer, + size_t usr_buf_len, loff_t *ppos) +{ + struct ocrdma_stats *pstats = filp->private_data; + struct ocrdma_dev *dev = pstats->dev; + ssize_t status = 0; + char *data = NULL; + + /* No partial reads */ + if (*ppos != 0) + return 0; + + mutex_lock(&dev->stats_lock); + + ocrdma_update_stats(dev); + + switch (pstats->type) { + case OCRDMA_RSRC_STATS: + data = ocrdma_resource_stats(dev); + break; + case OCRDMA_RXSTATS: + data = ocrdma_rx_stats(dev); + break; + case OCRDMA_WQESTATS: + data = ocrdma_wqe_stats(dev); + break; + case OCRDMA_TXSTATS: + data = ocrdma_tx_stats(dev); + break; + case OCRDMA_DB_ERRSTATS: + data = ocrdma_db_errstats(dev); + break; + case OCRDMA_RXQP_ERRSTATS: + data = ocrdma_rxqp_errstats(dev); + break; + case OCRDMA_TXQP_ERRSTATS: + data = ocrdma_txqp_errstats(dev); + break; + case OCRDMA_TX_DBG_STATS: + data = ocrdma_tx_dbg_stats(dev); + break; + case OCRDMA_RX_DBG_STATS: + data = ocrdma_rx_dbg_stats(dev); + break; + + default: + status = -EFAULT; + goto exit; + } + + if (usr_buf_len < strlen(data)) { + status = -ENOSPC; + goto exit; + } + + status = simple_read_from_buffer(buffer, usr_buf_len, ppos, data, + strlen(data)); +exit: + mutex_unlock(&dev->stats_lock); + return status; +} + +static int ocrdma_debugfs_open(struct inode *inode, struct file *file) +{ + if (inode->i_private) + file->private_data = inode->i_private; + return 0; +} + +static const struct file_operations ocrdma_dbg_ops = { + .owner = THIS_MODULE, + .open = ocrdma_debugfs_open, + .read = ocrdma_dbgfs_ops_read, +}; + +void ocrdma_add_port_stats(struct ocrdma_dev *dev) +{ + if (!ocrdma_dbgfs_dir) + return; + + /* Create post stats base dir */ + dev->dir = debugfs_create_dir(dev->ibdev.name, ocrdma_dbgfs_dir); + if (!dev->dir) + goto err; + + dev->rsrc_stats.type = OCRDMA_RSRC_STATS; + dev->rsrc_stats.dev = dev; + if (!debugfs_create_file("resource_stats", S_IRUSR, dev->dir, + &dev->rsrc_stats, &ocrdma_dbg_ops)) + goto err; + + dev->rx_stats.type = OCRDMA_RXSTATS; + dev->rx_stats.dev = dev; + if (!debugfs_create_file("rx_stats", S_IRUSR, dev->dir, + &dev->rx_stats, &ocrdma_dbg_ops)) + goto err; + + dev->wqe_stats.type = OCRDMA_WQESTATS; + dev->wqe_stats.dev = dev; + if (!debugfs_create_file("wqe_stats", S_IRUSR, dev->dir, + &dev->wqe_stats, &ocrdma_dbg_ops)) + goto err; + + dev->tx_stats.type = OCRDMA_TXSTATS; + dev->tx_stats.dev = dev; + if (!debugfs_create_file("tx_stats", S_IRUSR, dev->dir, + &dev->tx_stats, &ocrdma_dbg_ops)) + goto err; + + dev->db_err_stats.type = OCRDMA_DB_ERRSTATS; + dev->db_err_stats.dev = dev; + if (!debugfs_create_file("db_err_stats", S_IRUSR, dev->dir, + &dev->db_err_stats, &ocrdma_dbg_ops)) + goto err; + + + dev->tx_qp_err_stats.type = OCRDMA_TXQP_ERRSTATS; + dev->tx_qp_err_stats.dev = dev; + if (!debugfs_create_file("tx_qp_err_stats", S_IRUSR, dev->dir, + &dev->tx_qp_err_stats, &ocrdma_dbg_ops)) + goto err; + + dev->rx_qp_err_stats.type = OCRDMA_RXQP_ERRSTATS; + dev->rx_qp_err_stats.dev = dev; + if (!debugfs_create_file("rx_qp_err_stats", S_IRUSR, dev->dir, + &dev->rx_qp_err_stats, &ocrdma_dbg_ops)) + goto err; + + + dev->tx_dbg_stats.type = OCRDMA_TX_DBG_STATS; + dev->tx_dbg_stats.dev = dev; + if (!debugfs_create_file("tx_dbg_stats", S_IRUSR, dev->dir, + &dev->tx_dbg_stats, &ocrdma_dbg_ops)) + goto err; + + dev->rx_dbg_stats.type = OCRDMA_RX_DBG_STATS; + dev->rx_dbg_stats.dev = dev; + if (!debugfs_create_file("rx_dbg_stats", S_IRUSR, dev->dir, + &dev->rx_dbg_stats, &ocrdma_dbg_ops)) + goto err; + + /* Now create dma_mem for stats mbx command */ + if (!ocrdma_alloc_stats_mem(dev)) + goto err; + + mutex_init(&dev->stats_lock); + + return; +err: + ocrdma_release_stats_mem(dev); + debugfs_remove_recursive(dev->dir); + dev->dir = NULL; +} + +void ocrdma_rem_port_stats(struct ocrdma_dev *dev) +{ + if (!dev->dir) + return; + mutex_destroy(&dev->stats_lock); + ocrdma_release_stats_mem(dev); + debugfs_remove(dev->dir); +} + +void ocrdma_init_debugfs(void) +{ + /* Create base dir in debugfs root dir */ + ocrdma_dbgfs_dir = debugfs_create_dir("ocrdma", NULL); +} + +void ocrdma_rem_debugfs(void) +{ + debugfs_remove_recursive(ocrdma_dbgfs_dir); +} diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.h b/drivers/infiniband/hw/ocrdma/ocrdma_stats.h new file mode 100644 index 0000000..5f5e20c --- /dev/null +++ b/drivers/infiniband/hw/ocrdma/ocrdma_stats.h @@ -0,0 +1,54 @@ +/******************************************************************* + * This file is part of the Emulex RoCE Device Driver for * + * RoCE (RDMA over Converged Ethernet) adapters. * + * Copyright (C) 2008-2014 Emulex. All rights reserved. * + * EMULEX and SLI are trademarks of Emulex. * + * www.emulex.com * + * * + * This program is free software; you can redistribute it and/or * + * modify it under the terms of version 2 of the GNU General * + * Public License as published by the Free Software Foundation. * + * This program is distributed in the hope that it will be useful. * + * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND * + * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE * + * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD * + * TO BE LEGALLY INVALID. See the GNU General Public License for * + * more details, a copy of which can be found in the file COPYING * + * included with this package. * + * + * Contact Information: + * linux-drivers@emulex.com + * + * Emulex + * 3333 Susan Street + * Costa Mesa, CA 92626 + *******************************************************************/ + +#ifndef __OCRDMA_STATS_H__ +#define __OCRDMA_STATS_H__ + +#include +#include "ocrdma.h" +#include "ocrdma_hw.h" + +#define OCRDMA_MAX_DBGFS_MEM 4096 + +enum OCRDMA_STATS_TYPE { + OCRDMA_RSRC_STATS, + OCRDMA_RXSTATS, + OCRDMA_WQESTATS, + OCRDMA_TXSTATS, + OCRDMA_DB_ERRSTATS, + OCRDMA_RXQP_ERRSTATS, + OCRDMA_TXQP_ERRSTATS, + OCRDMA_TX_DBG_STATS, + OCRDMA_RX_DBG_STATS +}; + +void ocrdma_rem_debugfs(void); +void ocrdma_init_debugfs(void); +void ocrdma_rem_port_stats(struct ocrdma_dev *dev); +void ocrdma_add_port_stats(struct ocrdma_dev *dev); + +#endif /* __OCRDMA_STATS_H__ */ -- cgit v1.1 From 334b8db3a6c57bec78bcaacef3b1658bfc6e2efe Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Tue, 4 Feb 2014 11:57:09 +0530 Subject: RDMA/ocrdma: Display FW version Adding a sysfs file for getting the FW version. Signed-off-by: Selvin Xavier Signed-off-by: Devesh Sharma Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/ocrdma_main.c | 42 +++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index 7d18b3a..61248b9 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -339,9 +339,42 @@ static void ocrdma_free_resources(struct ocrdma_dev *dev) kfree(dev->sgid_tbl); } +/* OCRDMA sysfs interface */ +static ssize_t show_rev(struct device *device, struct device_attribute *attr, + char *buf) +{ + struct ocrdma_dev *dev = dev_get_drvdata(device); + + return scnprintf(buf, PAGE_SIZE, "0x%x\n", dev->nic_info.pdev->vendor); +} + +static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr, + char *buf) +{ + struct ocrdma_dev *dev = dev_get_drvdata(device); + + return scnprintf(buf, PAGE_SIZE, "%s", &dev->attr.fw_ver[0]); +} + +static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); +static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL); + +static struct device_attribute *ocrdma_attributes[] = { + &dev_attr_hw_rev, + &dev_attr_fw_ver +}; + +static void ocrdma_remove_sysfiles(struct ocrdma_dev *dev) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(ocrdma_attributes); i++) + device_remove_file(&dev->ibdev.dev, ocrdma_attributes[i]); +} + static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info) { - int status = 0; + int status = 0, i; struct ocrdma_dev *dev; dev = (struct ocrdma_dev *)ib_alloc_device(sizeof(struct ocrdma_dev)); @@ -370,6 +403,9 @@ static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info) if (status) goto alloc_err; + for (i = 0; i < ARRAY_SIZE(ocrdma_attributes); i++) + if (device_create_file(&dev->ibdev.dev, ocrdma_attributes[i])) + goto sysfs_err; spin_lock(&ocrdma_devlist_lock); list_add_tail_rcu(&dev->entry, &ocrdma_dev_list); spin_unlock(&ocrdma_devlist_lock); @@ -384,6 +420,8 @@ static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info) dev_name(&dev->nic_info.pdev->dev), dev->id); return dev; +sysfs_err: + ocrdma_remove_sysfiles(dev); alloc_err: ocrdma_free_resources(dev); ocrdma_cleanup_hw(dev); @@ -411,6 +449,8 @@ static void ocrdma_remove(struct ocrdma_dev *dev) * of the registered clients. */ ocrdma_rem_port_stats(dev); + ocrdma_remove_sysfiles(dev); + ib_unregister_device(&dev->ibdev); spin_lock(&ocrdma_devlist_lock); -- cgit v1.1 From fad51b7d361b4f1b439aa10fa258a35dac62b297 Mon Sep 17 00:00:00 2001 From: Devesh Sharma Date: Tue, 4 Feb 2014 11:57:10 +0530 Subject: RDMA/ocrdma: Code clean-up Clean up code. Also modifying GSI QP to error during ocrdma_close is fixed. Signed-off-by: Devesh Sharma Signed-off-by: Selvin Xavier Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/ocrdma.h | 6 ++---- drivers/infiniband/hw/ocrdma/ocrdma_abi.h | 4 +--- drivers/infiniband/hw/ocrdma/ocrdma_ah.c | 2 +- drivers/infiniband/hw/ocrdma/ocrdma_hw.c | 20 ++++++++------------ drivers/infiniband/hw/ocrdma/ocrdma_main.c | 2 +- drivers/infiniband/hw/ocrdma/ocrdma_sli.h | 16 ++++++++-------- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 21 +++------------------ 7 files changed, 24 insertions(+), 47 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h index 3042c87..19011db 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma.h @@ -35,6 +35,7 @@ #include #include +#include #include #include "ocrdma_sli.h" @@ -234,7 +235,7 @@ struct ocrdma_dev { struct list_head entry; struct rcu_head rcu; int id; - struct ocrdma_mr *stag_arr[OCRDMA_MAX_STAG]; + u64 stag_arr[OCRDMA_MAX_STAG]; u16 pvid; u32 asic_id; @@ -287,7 +288,6 @@ struct ocrdma_cq { struct ocrdma_pd { struct ib_pd ibpd; - struct ocrdma_dev *dev; struct ocrdma_ucontext *uctx; u32 id; int num_dpp_qp; @@ -372,10 +372,8 @@ struct ocrdma_qp { bool dpp_enabled; u8 *ird_q_va; bool signaled; - u16 db_cache; }; - struct ocrdma_ucontext { struct ib_ucontext ibucontext; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_abi.h b/drivers/infiniband/hw/ocrdma/ocrdma_abi.h index 5a82ce5..1554cca 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_abi.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_abi.h @@ -108,9 +108,7 @@ struct ocrdma_create_qp_uresp { u32 db_sq_offset; u32 db_rq_offset; u32 db_shift; - u64 rsvd1; - u64 rsvd2; - u64 rsvd3; + u64 rsvd[11]; } __packed; struct ocrdma_create_srq_uresp { diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c index 3407114..d4cc01f 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c @@ -100,7 +100,7 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr) if (!(attr->ah_flags & IB_AH_GRH)) return ERR_PTR(-EINVAL); - ah = kzalloc(sizeof *ah, GFP_ATOMIC); + ah = kzalloc(sizeof(*ah), GFP_ATOMIC); if (!ah) return ERR_PTR(-ENOMEM); diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index 5d34858..3bbf201 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -32,7 +32,6 @@ #include #include -#include #include "ocrdma.h" #include "ocrdma_hw.h" @@ -386,8 +385,8 @@ static void ocrdma_build_q_pages(struct ocrdma_pa *q_pa, int cnt, } } -static int ocrdma_mbx_delete_q(struct ocrdma_dev *dev, struct ocrdma_queue_info *q, - int queue_type) +static int ocrdma_mbx_delete_q(struct ocrdma_dev *dev, + struct ocrdma_queue_info *q, int queue_type) { u8 opcode = 0; int status; @@ -778,7 +777,6 @@ static void ocrdma_process_grp5_aync(struct ocrdma_dev *dev, } } - static void ocrdma_process_acqe(struct ocrdma_dev *dev, void *ae_cqe) { /* async CQE processing */ @@ -825,8 +823,6 @@ static int ocrdma_mq_cq_handler(struct ocrdma_dev *dev, u16 cq_id) ocrdma_process_acqe(dev, cqe); else if (cqe->valid_ae_cmpl_cons & OCRDMA_MCQE_CMPL_MASK) ocrdma_process_mcqe(dev, cqe); - else - pr_err("%s() cqe->compl is not set.\n", __func__); memset(cqe, 0, sizeof(struct ocrdma_mcqe)); ocrdma_mcq_inc_tail(dev); } @@ -1050,6 +1046,9 @@ static void ocrdma_get_attr(struct ocrdma_dev *dev, attr->max_qp = (rsp->qp_srq_cq_ird_ord & OCRDMA_MBX_QUERY_CFG_MAX_QP_MASK) >> OCRDMA_MBX_QUERY_CFG_MAX_QP_SHIFT; + attr->max_srq = + (rsp->max_srq_rpir_qps & OCRDMA_MBX_QUERY_CFG_MAX_SRQ_MASK) >> + OCRDMA_MBX_QUERY_CFG_MAX_SRQ_OFFSET; attr->max_send_sge = ((rsp->max_write_send_sge & OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_MASK) >> OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_SHIFT); @@ -1065,9 +1064,6 @@ static void ocrdma_get_attr(struct ocrdma_dev *dev, attr->max_ord_per_qp = (rsp->max_ird_ord_per_qp & OCRDMA_MBX_QUERY_CFG_MAX_ORD_PER_QP_MASK) >> OCRDMA_MBX_QUERY_CFG_MAX_ORD_PER_QP_SHIFT; - attr->max_srq = - (rsp->max_srq_rpir_qps & OCRDMA_MBX_QUERY_CFG_MAX_SRQ_MASK) >> - OCRDMA_MBX_QUERY_CFG_MAX_SRQ_OFFSET; attr->max_ird_per_qp = (rsp->max_ird_ord_per_qp & OCRDMA_MBX_QUERY_CFG_MAX_IRD_PER_QP_MASK) >> OCRDMA_MBX_QUERY_CFG_MAX_IRD_PER_QP_SHIFT; @@ -1411,7 +1407,7 @@ static int ocrdma_build_q_conf(u32 *num_entries, int entry_size, static int ocrdma_mbx_create_ah_tbl(struct ocrdma_dev *dev) { - int i ; + int i; int status = 0; int max_ah; struct ocrdma_create_ah_tbl *cmd; @@ -2279,7 +2275,7 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp, memcpy(&cmd->params.dgid[0], &ah_attr->grh.dgid.raw[0], sizeof(cmd->params.dgid)); status = ocrdma_query_gid(&qp->dev->ibdev, 1, - ah_attr->grh.sgid_index, &sgid); + ah_attr->grh.sgid_index, &sgid); if (status) return status; @@ -2666,7 +2662,7 @@ static int ocrdma_create_eqs(struct ocrdma_dev *dev) for (i = 0; i < num_eq; i++) { status = ocrdma_create_eq(dev, &dev->eq_tbl[i], - OCRDMA_EQ_LEN); + OCRDMA_EQ_LEN); if (status) { status = -EINVAL; break; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index 61248b9..8bd9db6 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -488,7 +488,7 @@ static int ocrdma_close(struct ocrdma_dev *dev) cur_qp = dev->qp_tbl; for (i = 0; i < OCRDMA_MAX_QP; i++) { qp = cur_qp[i]; - if (qp) { + if (qp && qp->ibqp.qp_type != IB_QPT_GSI) { /* change the QP state to ERROR */ _ocrdma_modify_qp(&qp->ibqp, &attrs, attr_mask); diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h index 6e048b7..96c9ee6 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h @@ -152,11 +152,10 @@ enum { #define OCRDMA_MIN_Q_PAGE_SIZE (4096) #define OCRDMA_MAX_Q_PAGES (8) -#define OCRDMA_SLI_ASIC_ID_OFFSET 0x9C -#define OCRDMA_SLI_ASIC_REV_MASK 0x000000FF -#define OCRDMA_SLI_ASIC_GEN_NUM_MASK 0x0000FF00 -#define OCRDMA_SLI_ASIC_GEN_NUM_SHIFT 0x08 - +#define OCRDMA_SLI_ASIC_ID_OFFSET 0x9C +#define OCRDMA_SLI_ASIC_REV_MASK 0x000000FF +#define OCRDMA_SLI_ASIC_GEN_NUM_MASK 0x0000FF00 +#define OCRDMA_SLI_ASIC_GEN_NUM_SHIFT 0x08 /* # 0: 4K Bytes # 1: 8K Bytes @@ -633,7 +632,7 @@ enum { enum { OCRDMA_CREATE_CQ_VER2 = 2, - OCRDMA_CREATE_CQ_VER3 = 3, + OCRDMA_CREATE_CQ_VER3 = 3, OCRDMA_CREATE_CQ_PAGE_CNT_MASK = 0xFFFF, OCRDMA_CREATE_CQ_PAGE_SIZE_SHIFT = 16, @@ -1093,6 +1092,7 @@ enum { OCRDMA_MODIFY_QP_RSP_MAX_ORD_MASK = 0xFFFF << OCRDMA_MODIFY_QP_RSP_MAX_ORD_SHIFT }; + struct ocrdma_modify_qp_rsp { struct ocrdma_mqe_hdr hdr; struct ocrdma_mbx_rsp rsp; @@ -1105,8 +1105,8 @@ struct ocrdma_query_qp { struct ocrdma_mqe_hdr hdr; struct ocrdma_mbx_hdr req; -#define OCRDMA_QUERY_UP_QP_ID_SHIFT 0 -#define OCRDMA_QUERY_UP_QP_ID_MASK 0xFFFFFF +#define OCRDMA_QUERY_UP_QP_ID_SHIFT 0 +#define OCRDMA_QUERY_UP_QP_ID_MASK 0xFFFFFF u32 qp_id; }; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 95eeeeb..6d3990f 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -53,7 +53,7 @@ int ocrdma_query_gid(struct ib_device *ibdev, u8 port, dev = get_ocrdma_dev(ibdev); memset(sgid, 0, sizeof(*sgid)); - if (index >= OCRDMA_MAX_SGID) + if (index > OCRDMA_MAX_SGID) return -EINVAL; memcpy(sgid, &dev->sgid_tbl[index], sizeof(*sgid)); @@ -144,7 +144,6 @@ static inline void get_link_speed_and_width(struct ocrdma_dev *dev, } } - int ocrdma_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props) { @@ -1210,7 +1209,6 @@ static void ocrdma_set_qp_init_params(struct ocrdma_qp *qp, qp->signaled = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) ? true : false; } - static void ocrdma_store_gsi_qp_cq(struct ocrdma_dev *dev, struct ib_qp_init_attr *attrs) { @@ -1296,17 +1294,6 @@ gen_err: return ERR_PTR(status); } - -static void ocrdma_flush_rq_db(struct ocrdma_qp *qp) -{ - if (qp->db_cache) { - u32 val = qp->rq.dbid | (qp->db_cache << - OCRDMA_DB_RQ_SHIFT); - iowrite32(val, qp->rq_db); - qp->db_cache = 0; - } -} - int _ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) { @@ -1325,8 +1312,6 @@ int _ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (status < 0) return status; status = ocrdma_mbx_modify_qp(dev, qp, attr, attr_mask); - if (!status && attr_mask & IB_QP_STATE && attr->qp_state == IB_QPS_RTR) - ocrdma_flush_rq_db(qp); return status; } @@ -2042,7 +2027,7 @@ static int ocrdma_build_fr(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr, fast_reg->num_sges = wr->wr.fast_reg.page_list_len; fast_reg->size_sge = get_encoded_page_size(1 << wr->wr.fast_reg.page_shift); - mr = (struct ocrdma_mr *) (unsigned long) qp->dev->stag_arr[(hdr->lkey >> 8) & + mr = (struct ocrdma_mr *)qp->dev->stag_arr[(hdr->lkey >> 8) & (OCRDMA_MAX_STAG - 1)]; build_frmr_pbes(wr, mr->hwmr.pbl_table, &mr->hwmr); return 0; @@ -2877,7 +2862,7 @@ struct ib_mr *ocrdma_alloc_frmr(struct ib_pd *ibpd, int max_page_list_len) goto mbx_err; mr->ibmr.rkey = mr->hwmr.lkey; mr->ibmr.lkey = mr->hwmr.lkey; - dev->stag_arr[(mr->hwmr.lkey >> 8) & (OCRDMA_MAX_STAG - 1)] = mr; + dev->stag_arr[(mr->hwmr.lkey >> 8) & (OCRDMA_MAX_STAG - 1)] = (u64)mr; return &mr->ibmr; mbx_err: ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr); -- cgit v1.1 From 7a1e89d8b793feda96754fd3ba5079c4ba3862ee Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Mon, 17 Mar 2014 23:14:17 -0700 Subject: RDMA/ocrdma: Fix warnings about pointer <-> integer casts We should cast pointers to and from unsigned long to turn them into ints. Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 6d3990f..dd554a2 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -2027,8 +2027,8 @@ static int ocrdma_build_fr(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr, fast_reg->num_sges = wr->wr.fast_reg.page_list_len; fast_reg->size_sge = get_encoded_page_size(1 << wr->wr.fast_reg.page_shift); - mr = (struct ocrdma_mr *)qp->dev->stag_arr[(hdr->lkey >> 8) & - (OCRDMA_MAX_STAG - 1)]; + mr = (struct ocrdma_mr *) (unsigned long) + qp->dev->stag_arr[(hdr->lkey >> 8) & (OCRDMA_MAX_STAG - 1)]; build_frmr_pbes(wr, mr->hwmr.pbl_table, &mr->hwmr); return 0; } @@ -2862,7 +2862,8 @@ struct ib_mr *ocrdma_alloc_frmr(struct ib_pd *ibpd, int max_page_list_len) goto mbx_err; mr->ibmr.rkey = mr->hwmr.lkey; mr->ibmr.lkey = mr->hwmr.lkey; - dev->stag_arr[(mr->hwmr.lkey >> 8) & (OCRDMA_MAX_STAG - 1)] = (u64)mr; + dev->stag_arr[(mr->hwmr.lkey >> 8) & (OCRDMA_MAX_STAG - 1)] = + (unsigned long) mr; return &mr->ibmr; mbx_err: ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr); -- cgit v1.1 From 2d8f57d56f5850a75d6225519e77f75ad9949f25 Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Tue, 18 Mar 2014 14:54:56 +0530 Subject: RDMA/ocrdma: Unregister inet notifier when unloading ocrdma Unregister the inet notifier during ocrdma unload to avoid a panic after driver unload. Signed-off-by: Selvin Xavier Signed-off-by: Devesh Sharma Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/ocrdma_main.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index 8bd9db6..7c504e0 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -539,6 +539,11 @@ static void ocrdma_unregister_inet6addr_notifier(void) #endif } +static void ocrdma_unregister_inetaddr_notifier(void) +{ + unregister_inetaddr_notifier(&ocrdma_inetaddr_notifier); +} + static int __init ocrdma_init_module(void) { int status; @@ -552,13 +557,19 @@ static int __init ocrdma_init_module(void) #if IS_ENABLED(CONFIG_IPV6) status = register_inet6addr_notifier(&ocrdma_inet6addr_notifier); if (status) - return status; + goto err_notifier6; #endif status = be_roce_register_driver(&ocrdma_drv); if (status) - ocrdma_unregister_inet6addr_notifier(); + goto err_be_reg; + return 0; + +err_be_reg: + ocrdma_unregister_inet6addr_notifier(); +err_notifier6: + ocrdma_unregister_inetaddr_notifier(); return status; } @@ -566,6 +577,7 @@ static void __exit ocrdma_exit_module(void) { be_roce_unregister_driver(&ocrdma_drv); ocrdma_unregister_inet6addr_notifier(); + ocrdma_unregister_inetaddr_notifier(); ocrdma_rem_debugfs(); } -- cgit v1.1