drm/i915: Store a direct lookup from object handle to vma

The advent of full-ppgtt lead to an extra indirection between the object and its binding. That extra indirection has a noticeable impact on how fast we can convert from the user handles to our internal vma for execbuffer. In order to bypass the extra indirection, we use a resizable hashtable to jump from the object to the per-ctx vma. rhashtable was considered but we don't need the online resizing feature and the extra complexity proved to undermine its usefulness. Instead, we simply reallocate the hastable on demand in a background task and serialize it before iterating. In non-full-ppgtt modes, multiple files and multiple contexts can share the same vma. This leads to having multiple possible handle->vma links, so we only use the first to establish the fast path. The majority of buffers are not shared and so we should still be able to realise speedups with multiple clients. v2: Prettier names, more magic. v3: Many style tweaks, most notably hiding the misuse of execobj[].rsvd2 Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
author: Chris Wilson <chris@chris-wilson.co.uk> 2017-06-16 15:05:16 +0100
committer: Chris Wilson <chris@chris-wilson.co.uk> 2017-06-16 16:54:04 +0100
commit: 4ff4b44cbb70c269259958cbcc48d7b8a2cb9ec8 (patch)
tree: 19b5eda36527b1f71caa77ada46083263fc01e69 /drivers/gpu/drm/i915/i915_gem_context.c
parent: 4c9c0d09741deab0aac76b83961cfe95b24f3e6f (diff)
download: op-kernel-dev-4ff4b44cbb70c269259958cbcc48d7b8a2cb9ec8.zip
op-kernel-dev-4ff4b44cbb70c269259958cbcc48d7b8a2cb9ec8.tar.gz
1 files changed, 81 insertions, 1 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 81c73de..23f7401 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -85,6 +85,7 @@
  *
  */
 
+#include <linux/log2.h>
 #include <drm/drmP.h>
 #include <drm/i915_drm.h>
 #include "i915_drv.h"
@@ -92,6 +93,70 @@
 
 #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1
 
+/* Initial size (as log2) to preallocate the handle->object hashtable */
+#define VMA_HT_BITS 2u /* 4 x 2 pointers, 64 bytes minimum */
+
+static void resize_vma_ht(struct work_struct *work)
+{
+	struct i915_gem_context_vma_lut *lut =
+		container_of(work, typeof(*lut), resize);
+	unsigned int bits, new_bits, size, i;
+	struct hlist_head *new_ht;
+
+	GEM_BUG_ON(!(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS));
+
+	bits = 1 + ilog2(4*lut->ht_count/3 + 1);
+	new_bits = min_t(unsigned int,
+			 max(bits, VMA_HT_BITS),
+			 sizeof(unsigned int) * BITS_PER_BYTE - 1);
+	if (new_bits == lut->ht_bits)
+		goto out;
+
+	new_ht = kzalloc(sizeof(*new_ht)<<new_bits, GFP_KERNEL | __GFP_NOWARN);
+	if (!new_ht)
+		new_ht = vzalloc(sizeof(*new_ht)<<new_bits);
+	if (!new_ht)
+		/* Pretend resize succeeded and stop calling us for a bit! */
+		goto out;
+
+	size = BIT(lut->ht_bits);
+	for (i = 0; i < size; i++) {
+		struct i915_vma *vma;
+		struct hlist_node *tmp;
+
+		hlist_for_each_entry_safe(vma, tmp, &lut->ht[i], ctx_node)
+			hlist_add_head(&vma->ctx_node,
+				       &new_ht[hash_32(vma->ctx_handle,
+						       new_bits)]);
+	}
+	kvfree(lut->ht);
+	lut->ht = new_ht;
+	lut->ht_bits = new_bits;
+out:
+	smp_store_release(&lut->ht_size, BIT(bits));
+	GEM_BUG_ON(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS);
+}
+
+static void vma_lut_free(struct i915_gem_context *ctx)
+{
+	struct i915_gem_context_vma_lut *lut = &ctx->vma_lut;
+	unsigned int i, size;
+
+	if (lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS)
+		cancel_work_sync(&lut->resize);
+
+	size = BIT(lut->ht_bits);
+	for (i = 0; i < size; i++) {
+		struct i915_vma *vma;
+
+		hlist_for_each_entry(vma, &lut->ht[i], ctx_node) {
+			vma->obj->vma_hashed = NULL;
+			vma->ctx = NULL;
+		}
+	}
+	kvfree(lut->ht);
+}
+
 void i915_gem_context_free(struct kref *ctx_ref)
 {
 	struct i915_gem_context *ctx = container_of(ctx_ref, typeof(*ctx), ref);
@@ -101,6 +166,7 @@ void i915_gem_context_free(struct kref *ctx_ref)
 	trace_i915_context_free(ctx);
 	GEM_BUG_ON(!i915_gem_context_is_closed(ctx));
 
+	vma_lut_free(ctx);
 	i915_ppgtt_put(ctx->ppgtt);
 
 	for (i = 0; i < I915_NUM_ENGINES; i++) {
@@ -118,6 +184,7 @@ void i915_gem_context_free(struct kref *ctx_ref)
 
 	kfree(ctx->name);
 	put_pid(ctx->pid);
+
 	list_del(&ctx->link);
 
 	ida_simple_remove(&ctx->i915->context_hw_ida, ctx->hw_id);
@@ -201,13 +268,24 @@ __create_hw_context(struct drm_i915_private *dev_priv,
 	ctx->i915 = dev_priv;
 	ctx->priority = I915_PRIORITY_NORMAL;
 
+	ctx->vma_lut.ht_bits = VMA_HT_BITS;
+	ctx->vma_lut.ht_size = BIT(VMA_HT_BITS);
+	BUILD_BUG_ON(BIT(VMA_HT_BITS) == I915_CTX_RESIZE_IN_PROGRESS);
+	ctx->vma_lut.ht = kcalloc(ctx->vma_lut.ht_size,
+				  sizeof(*ctx->vma_lut.ht),
+				  GFP_KERNEL);
+	if (!ctx->vma_lut.ht)
+		goto err_out;
+
+	INIT_WORK(&ctx->vma_lut.resize, resize_vma_ht);
+
 	/* Default context will never have a file_priv */
 	ret = DEFAULT_CONTEXT_HANDLE;
 	if (file_priv) {
 		ret = idr_alloc(&file_priv->context_idr, ctx,
 				DEFAULT_CONTEXT_HANDLE, 0, GFP_KERNEL);
 		if (ret < 0)
-			goto err_out;
+			goto err_lut;
 	}
 	ctx->user_handle = ret;
 
@@ -248,6 +326,8 @@ __create_hw_context(struct drm_i915_private *dev_priv,
 err_pid:
 	put_pid(ctx->pid);
 	idr_remove(&file_priv->context_idr, ctx->user_handle);
+err_lut:
+	kvfree(ctx->vma_lut.ht);
 err_out:
 	context_close(ctx);
 	return ERR_PTR(ret);
author	Chris Wilson <chris@chris-wilson.co.uk>	2017-06-16 15:05:16 +0100
committer	Chris Wilson <chris@chris-wilson.co.uk>	2017-06-16 16:54:04 +0100
commit	4ff4b44cbb70c269259958cbcc48d7b8a2cb9ec8 (patch)
tree	19b5eda36527b1f71caa77ada46083263fc01e69 /drivers/gpu/drm/i915/i915_gem_context.c
parent	4c9c0d09741deab0aac76b83961cfe95b24f3e6f (diff)
download	op-kernel-dev-4ff4b44cbb70c269259958cbcc48d7b8a2cb9ec8.zip op-kernel-dev-4ff4b44cbb70c269259958cbcc48d7b8a2cb9ec8.tar.gz