summaryrefslogtreecommitdiffstats
path: root/drivers/misc
diff options
context:
space:
mode:
authorIan Munsie <imunsie@au1.ibm.com>2015-05-08 22:55:18 +1000
committerMichael Ellerman <mpe@ellerman.id.au>2015-06-03 13:27:15 +1000
commit8ac75b96be71e20ec1785ca18170890c4dfffe87 (patch)
tree8b06520ebf5859bf62fee63e53b2a0581728ec6e /drivers/misc
parente36f6fe1f7aa4238478d4b253aac7d3fcfff6ee0 (diff)
downloadop-kernel-dev-8ac75b96be71e20ec1785ca18170890c4dfffe87.zip
op-kernel-dev-8ac75b96be71e20ec1785ca18170890c4dfffe87.tar.gz
cxl: Use call_rcu to reduce latency when releasing the afu fd
The afu fd release path was identified as a significant bottleneck in the overall performance of cxl. While an optimal AFU design would minimise the need to close & reopen the AFU fd, it is not always practical to avoid. The bottleneck seems to be down to the call to synchronize_rcu(), which will block until every other thread is guaranteed to be out of an RCU critical section. Replace it with call_rcu() to free the context structures later so we can return to the application sooner. This reduces the time spent in the fd release path from 13356 usec to 13.3 usec - about a 100x speed up. Reported-by: Fei K Chen <uchen@cn.ibm.com> Signed-off-by: Ian Munsie <imunsie@au1.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Diffstat (limited to 'drivers/misc')
-rw-r--r--drivers/misc/cxl/context.c15
-rw-r--r--drivers/misc/cxl/cxl.h2
2 files changed, 12 insertions, 5 deletions
diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c
index d1b55fe..78ce990 100644
--- a/drivers/misc/cxl/context.c
+++ b/drivers/misc/cxl/context.c
@@ -232,12 +232,9 @@ void cxl_context_detach_all(struct cxl_afu *afu)
mutex_unlock(&afu->contexts_lock);
}
-void cxl_context_free(struct cxl_context *ctx)
+static void reclaim_ctx(struct rcu_head *rcu)
{
- mutex_lock(&ctx->afu->contexts_lock);
- idr_remove(&ctx->afu->contexts_idr, ctx->pe);
- mutex_unlock(&ctx->afu->contexts_lock);
- synchronize_rcu();
+ struct cxl_context *ctx = container_of(rcu, struct cxl_context, rcu);
free_page((u64)ctx->sstp);
ctx->sstp = NULL;
@@ -245,3 +242,11 @@ void cxl_context_free(struct cxl_context *ctx)
put_pid(ctx->pid);
kfree(ctx);
}
+
+void cxl_context_free(struct cxl_context *ctx)
+{
+ mutex_lock(&ctx->afu->contexts_lock);
+ idr_remove(&ctx->afu->contexts_idr, ctx->pe);
+ mutex_unlock(&ctx->afu->contexts_lock);
+ call_rcu(&ctx->rcu, reclaim_ctx);
+}
diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index cfee819..b361b48 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -457,6 +457,8 @@ struct cxl_context {
bool pending_irq;
bool pending_fault;
bool pending_afu_err;
+
+ struct rcu_head rcu;
};
struct cxl {
OpenPOWER on IntegriCloud