summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/i915_gem_context.c
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2017-08-16 09:52:08 +0100
committerChris Wilson <chris@chris-wilson.co.uk>2017-08-18 11:59:02 +0100
commitd1b48c1e7184d9bc4ae6d7f9fe2eed9efed11ffc (patch)
treecbb1cb9f87c16ba6c17cbd47279979c823555d70 /drivers/gpu/drm/i915/i915_gem_context.c
parent170fa29b14fadf2deb361589cefe6a78b21b1b22 (diff)
downloadop-kernel-dev-d1b48c1e7184d9bc4ae6d7f9fe2eed9efed11ffc.zip
op-kernel-dev-d1b48c1e7184d9bc4ae6d7f9fe2eed9efed11ffc.tar.gz
drm/i915: Replace execbuf vma ht with an idr
This was the competing idea long ago, but it was only with the rewrite of the idr as an radixtree and using the radixtree directly ourselves, along with the realisation that we can store the vma directly in the radixtree and only need a list for the reverse mapping, that made the patch performant enough to displace using a hashtable. Though the vma ht is fast and doesn't require any extra allocation (as we can embed the node inside the vma), it does require a thread for resizing and serialization and will have the occasional slow lookup. That is hairy enough to investigate alternatives and favour them if equivalent in peak performance. One advantage of allocating an indirection entry is that we can support a single shared bo between many clients, something that was done on a first-come first-serve basis for shared GGTT vma previously. To offset the extra allocations, we create yet another kmem_cache for them. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20170816085210.4199-5-chris@chris-wilson.co.uk
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem_context.c')
-rw-r--r--drivers/gpu/drm/i915/i915_gem_context.c87
1 files changed, 19 insertions, 68 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 86ac74a..58a2a44 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -93,69 +93,28 @@
#define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1
-/* Initial size (as log2) to preallocate the handle->object hashtable */
-#define VMA_HT_BITS 2u /* 4 x 2 pointers, 64 bytes minimum */
-
-static void resize_vma_ht(struct work_struct *work)
+static void lut_close(struct i915_gem_context *ctx)
{
- struct i915_gem_context_vma_lut *lut =
- container_of(work, typeof(*lut), resize);
- unsigned int bits, new_bits, size, i;
- struct hlist_head *new_ht;
-
- GEM_BUG_ON(!(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS));
-
- bits = 1 + ilog2(4*lut->ht_count/3 + 1);
- new_bits = min_t(unsigned int,
- max(bits, VMA_HT_BITS),
- sizeof(unsigned int) * BITS_PER_BYTE - 1);
- if (new_bits == lut->ht_bits)
- goto out;
-
- new_ht = kzalloc(sizeof(*new_ht)<<new_bits, GFP_KERNEL | __GFP_NOWARN);
- if (!new_ht)
- new_ht = vzalloc(sizeof(*new_ht)<<new_bits);
- if (!new_ht)
- /* Pretend resize succeeded and stop calling us for a bit! */
- goto out;
-
- size = BIT(lut->ht_bits);
- for (i = 0; i < size; i++) {
- struct i915_vma *vma;
- struct hlist_node *tmp;
+ struct i915_lut_handle *lut, *ln;
+ struct radix_tree_iter iter;
+ void __rcu **slot;
- hlist_for_each_entry_safe(vma, tmp, &lut->ht[i], ctx_node)
- hlist_add_head(&vma->ctx_node,
- &new_ht[hash_32(vma->ctx_handle,
- new_bits)]);
+ list_for_each_entry_safe(lut, ln, &ctx->handles_list, ctx_link) {
+ list_del(&lut->obj_link);
+ kmem_cache_free(ctx->i915->luts, lut);
}
- kvfree(lut->ht);
- lut->ht = new_ht;
- lut->ht_bits = new_bits;
-out:
- smp_store_release(&lut->ht_size, BIT(bits));
- GEM_BUG_ON(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS);
-}
-static void vma_lut_free(struct i915_gem_context *ctx)
-{
- struct i915_gem_context_vma_lut *lut = &ctx->vma_lut;
- unsigned int i, size;
+ radix_tree_for_each_slot(slot, &ctx->handles_vma, &iter, 0) {
+ struct i915_vma *vma = rcu_dereference_raw(*slot);
+ struct drm_i915_gem_object *obj = vma->obj;
- if (lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS)
- cancel_work_sync(&lut->resize);
+ radix_tree_iter_delete(&ctx->handles_vma, &iter, slot);
- size = BIT(lut->ht_bits);
- for (i = 0; i < size; i++) {
- struct i915_vma *vma;
+ if (!i915_vma_is_ggtt(vma))
+ i915_vma_close(vma);
- hlist_for_each_entry(vma, &lut->ht[i], ctx_node) {
- vma->obj->vma_hashed = NULL;
- vma->ctx = NULL;
- i915_vma_put(vma);
- }
+ __i915_gem_object_release_unless_active(obj);
}
- kvfree(lut->ht);
}
static void i915_gem_context_free(struct i915_gem_context *ctx)
@@ -165,7 +124,6 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
lockdep_assert_held(&ctx->i915->drm.struct_mutex);
GEM_BUG_ON(!i915_gem_context_is_closed(ctx));
- vma_lut_free(ctx);
i915_ppgtt_put(ctx->ppgtt);
for (i = 0; i < I915_NUM_ENGINES; i++) {
@@ -239,8 +197,11 @@ void i915_gem_context_release(struct kref *ref)
static void context_close(struct i915_gem_context *ctx)
{
i915_gem_context_set_closed(ctx);
+
+ lut_close(ctx);
if (ctx->ppgtt)
i915_ppgtt_close(&ctx->ppgtt->base);
+
ctx->file_priv = ERR_PTR(-EBADF);
i915_gem_context_put(ctx);
}
@@ -313,16 +274,8 @@ __create_hw_context(struct drm_i915_private *dev_priv,
ctx->i915 = dev_priv;
ctx->priority = I915_PRIORITY_NORMAL;
- ctx->vma_lut.ht_bits = VMA_HT_BITS;
- ctx->vma_lut.ht_size = BIT(VMA_HT_BITS);
- BUILD_BUG_ON(BIT(VMA_HT_BITS) == I915_CTX_RESIZE_IN_PROGRESS);
- ctx->vma_lut.ht = kcalloc(ctx->vma_lut.ht_size,
- sizeof(*ctx->vma_lut.ht),
- GFP_KERNEL);
- if (!ctx->vma_lut.ht)
- goto err_out;
-
- INIT_WORK(&ctx->vma_lut.resize, resize_vma_ht);
+ INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
+ INIT_LIST_HEAD(&ctx->handles_list);
/* Default context will never have a file_priv */
ret = DEFAULT_CONTEXT_HANDLE;
@@ -372,8 +325,6 @@ err_pid:
put_pid(ctx->pid);
idr_remove(&file_priv->context_idr, ctx->user_handle);
err_lut:
- kvfree(ctx->vma_lut.ht);
-err_out:
context_close(ctx);
return ERR_PTR(ret);
}
OpenPOWER on IntegriCloud