diff options
author | Haggai Eran <haggaie@mellanox.com> | 2014-12-11 17:04:25 +0200 |
---|---|---|
committer | Roland Dreier <roland@purestorage.com> | 2014-12-15 18:19:03 -0800 |
commit | eab668a6d082b90b806efc6da12f9b30e03f401d (patch) | |
tree | c3c3323be75824d3d0c254a61fa70a824a39c80d /drivers/infiniband | |
parent | 7bdf65d411c1715d695be0d9a555d7f48d0a7220 (diff) | |
download | op-kernel-dev-eab668a6d082b90b806efc6da12f9b30e03f401d.zip op-kernel-dev-eab668a6d082b90b806efc6da12f9b30e03f401d.tar.gz |
IB/mlx5: Add support for RDMA read/write responder page faults
Signed-off-by: Shachar Raindel <raindel@mellanox.com>
Signed-off-by: Haggai Eran <haggaie@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
Diffstat (limited to 'drivers/infiniband')
-rw-r--r-- | drivers/infiniband/hw/mlx5/odp.c | 79 |
1 files changed, 79 insertions, 0 deletions
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index bd1dbe5..936a6cd 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -35,6 +35,8 @@ #include "mlx5_ib.h" +#define MAX_PREFETCH_LEN (4*1024*1024U) + struct workqueue_struct *mlx5_ib_page_fault_wq; #define COPY_ODP_BIT_MLX_TO_IB(reg, ib_caps, field_name, bit_name) do { \ @@ -490,6 +492,80 @@ resolve_page_fault: free_page((unsigned long)buffer); } +static int pages_in_range(u64 address, u32 length) +{ + return (ALIGN(address + length, PAGE_SIZE) - + (address & PAGE_MASK)) >> PAGE_SHIFT; +} + +static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_qp *qp, + struct mlx5_ib_pfault *pfault) +{ + struct mlx5_pagefault *mpfault = &pfault->mpfault; + u64 address; + u32 length; + u32 prefetch_len = mpfault->bytes_committed; + int prefetch_activated = 0; + u32 rkey = mpfault->rdma.r_key; + int ret; + + /* The RDMA responder handler handles the page fault in two parts. + * First it brings the necessary pages for the current packet + * (and uses the pfault context), and then (after resuming the QP) + * prefetches more pages. The second operation cannot use the pfault + * context and therefore uses the dummy_pfault context allocated on + * the stack */ + struct mlx5_ib_pfault dummy_pfault = {}; + + dummy_pfault.mpfault.bytes_committed = 0; + + mpfault->rdma.rdma_va += mpfault->bytes_committed; + mpfault->rdma.rdma_op_len -= min(mpfault->bytes_committed, + mpfault->rdma.rdma_op_len); + mpfault->bytes_committed = 0; + + address = mpfault->rdma.rdma_va; + length = mpfault->rdma.rdma_op_len; + + /* For some operations, the hardware cannot tell the exact message + * length, and in those cases it reports zero. Use prefetch + * logic. */ + if (length == 0) { + prefetch_activated = 1; + length = mpfault->rdma.packet_size; + prefetch_len = min(MAX_PREFETCH_LEN, prefetch_len); + } + + ret = pagefault_single_data_segment(qp, pfault, rkey, address, length, + NULL); + if (ret == -EAGAIN) { + /* We're racing with an invalidation, don't prefetch */ + prefetch_activated = 0; + } else if (ret < 0 || pages_in_range(address, length) > ret) { + mlx5_ib_page_fault_resume(qp, pfault, 1); + return; + } + + mlx5_ib_page_fault_resume(qp, pfault, 0); + + /* At this point, there might be a new pagefault already arriving in + * the eq, switch to the dummy pagefault for the rest of the + * processing. We're still OK with the objects being alive as the + * work-queue is being fenced. */ + + if (prefetch_activated) { + ret = pagefault_single_data_segment(qp, &dummy_pfault, rkey, + address, + prefetch_len, + NULL); + if (ret < 0) { + pr_warn("Prefetch failed (ret = %d, prefetch_activated = %d) for QPN %d, address: 0x%.16llx, length = 0x%.16x\n", + ret, prefetch_activated, + qp->ibqp.qp_num, address, prefetch_len); + } + } +} + void mlx5_ib_mr_pfault_handler(struct mlx5_ib_qp *qp, struct mlx5_ib_pfault *pfault) { @@ -499,6 +575,9 @@ void mlx5_ib_mr_pfault_handler(struct mlx5_ib_qp *qp, case MLX5_PFAULT_SUBTYPE_WQE: mlx5_ib_mr_wqe_pfault_handler(qp, pfault); break; + case MLX5_PFAULT_SUBTYPE_RDMA: + mlx5_ib_mr_rdma_pfault_handler(qp, pfault); + break; default: pr_warn("Invalid page fault event subtype: 0x%x\n", event_subtype); |