diff options
author | Chris Wilson <chris@chris-wilson.co.uk> | 2017-08-16 09:52:08 +0100 |
---|---|---|
committer | Chris Wilson <chris@chris-wilson.co.uk> | 2017-08-18 11:59:02 +0100 |
commit | d1b48c1e7184d9bc4ae6d7f9fe2eed9efed11ffc (patch) | |
tree | cbb1cb9f87c16ba6c17cbd47279979c823555d70 /drivers/gpu/drm/i915/i915_gem_context.c | |
parent | 170fa29b14fadf2deb361589cefe6a78b21b1b22 (diff) | |
download | op-kernel-dev-d1b48c1e7184d9bc4ae6d7f9fe2eed9efed11ffc.zip op-kernel-dev-d1b48c1e7184d9bc4ae6d7f9fe2eed9efed11ffc.tar.gz |
drm/i915: Replace execbuf vma ht with an idr
This was the competing idea long ago, but it was only with the rewrite
of the idr as an radixtree and using the radixtree directly ourselves,
along with the realisation that we can store the vma directly in the
radixtree and only need a list for the reverse mapping, that made the
patch performant enough to displace using a hashtable. Though the vma ht
is fast and doesn't require any extra allocation (as we can embed the node
inside the vma), it does require a thread for resizing and serialization
and will have the occasional slow lookup. That is hairy enough to
investigate alternatives and favour them if equivalent in peak performance.
One advantage of allocating an indirection entry is that we can support a
single shared bo between many clients, something that was done on a
first-come first-serve basis for shared GGTT vma previously. To offset
the extra allocations, we create yet another kmem_cache for them.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20170816085210.4199-5-chris@chris-wilson.co.uk
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem_context.c')
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem_context.c | 87 |
1 files changed, 19 insertions, 68 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 86ac74a..58a2a44 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -93,69 +93,28 @@ #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1 -/* Initial size (as log2) to preallocate the handle->object hashtable */ -#define VMA_HT_BITS 2u /* 4 x 2 pointers, 64 bytes minimum */ - -static void resize_vma_ht(struct work_struct *work) +static void lut_close(struct i915_gem_context *ctx) { - struct i915_gem_context_vma_lut *lut = - container_of(work, typeof(*lut), resize); - unsigned int bits, new_bits, size, i; - struct hlist_head *new_ht; - - GEM_BUG_ON(!(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS)); - - bits = 1 + ilog2(4*lut->ht_count/3 + 1); - new_bits = min_t(unsigned int, - max(bits, VMA_HT_BITS), - sizeof(unsigned int) * BITS_PER_BYTE - 1); - if (new_bits == lut->ht_bits) - goto out; - - new_ht = kzalloc(sizeof(*new_ht)<<new_bits, GFP_KERNEL | __GFP_NOWARN); - if (!new_ht) - new_ht = vzalloc(sizeof(*new_ht)<<new_bits); - if (!new_ht) - /* Pretend resize succeeded and stop calling us for a bit! */ - goto out; - - size = BIT(lut->ht_bits); - for (i = 0; i < size; i++) { - struct i915_vma *vma; - struct hlist_node *tmp; + struct i915_lut_handle *lut, *ln; + struct radix_tree_iter iter; + void __rcu **slot; - hlist_for_each_entry_safe(vma, tmp, &lut->ht[i], ctx_node) - hlist_add_head(&vma->ctx_node, - &new_ht[hash_32(vma->ctx_handle, - new_bits)]); + list_for_each_entry_safe(lut, ln, &ctx->handles_list, ctx_link) { + list_del(&lut->obj_link); + kmem_cache_free(ctx->i915->luts, lut); } - kvfree(lut->ht); - lut->ht = new_ht; - lut->ht_bits = new_bits; -out: - smp_store_release(&lut->ht_size, BIT(bits)); - GEM_BUG_ON(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS); -} -static void vma_lut_free(struct i915_gem_context *ctx) -{ - struct i915_gem_context_vma_lut *lut = &ctx->vma_lut; - unsigned int i, size; + radix_tree_for_each_slot(slot, &ctx->handles_vma, &iter, 0) { + struct i915_vma *vma = rcu_dereference_raw(*slot); + struct drm_i915_gem_object *obj = vma->obj; - if (lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS) - cancel_work_sync(&lut->resize); + radix_tree_iter_delete(&ctx->handles_vma, &iter, slot); - size = BIT(lut->ht_bits); - for (i = 0; i < size; i++) { - struct i915_vma *vma; + if (!i915_vma_is_ggtt(vma)) + i915_vma_close(vma); - hlist_for_each_entry(vma, &lut->ht[i], ctx_node) { - vma->obj->vma_hashed = NULL; - vma->ctx = NULL; - i915_vma_put(vma); - } + __i915_gem_object_release_unless_active(obj); } - kvfree(lut->ht); } static void i915_gem_context_free(struct i915_gem_context *ctx) @@ -165,7 +124,6 @@ static void i915_gem_context_free(struct i915_gem_context *ctx) lockdep_assert_held(&ctx->i915->drm.struct_mutex); GEM_BUG_ON(!i915_gem_context_is_closed(ctx)); - vma_lut_free(ctx); i915_ppgtt_put(ctx->ppgtt); for (i = 0; i < I915_NUM_ENGINES; i++) { @@ -239,8 +197,11 @@ void i915_gem_context_release(struct kref *ref) static void context_close(struct i915_gem_context *ctx) { i915_gem_context_set_closed(ctx); + + lut_close(ctx); if (ctx->ppgtt) i915_ppgtt_close(&ctx->ppgtt->base); + ctx->file_priv = ERR_PTR(-EBADF); i915_gem_context_put(ctx); } @@ -313,16 +274,8 @@ __create_hw_context(struct drm_i915_private *dev_priv, ctx->i915 = dev_priv; ctx->priority = I915_PRIORITY_NORMAL; - ctx->vma_lut.ht_bits = VMA_HT_BITS; - ctx->vma_lut.ht_size = BIT(VMA_HT_BITS); - BUILD_BUG_ON(BIT(VMA_HT_BITS) == I915_CTX_RESIZE_IN_PROGRESS); - ctx->vma_lut.ht = kcalloc(ctx->vma_lut.ht_size, - sizeof(*ctx->vma_lut.ht), - GFP_KERNEL); - if (!ctx->vma_lut.ht) - goto err_out; - - INIT_WORK(&ctx->vma_lut.resize, resize_vma_ht); + INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL); + INIT_LIST_HEAD(&ctx->handles_list); /* Default context will never have a file_priv */ ret = DEFAULT_CONTEXT_HANDLE; @@ -372,8 +325,6 @@ err_pid: put_pid(ctx->pid); idr_remove(&file_priv->context_idr, ctx->user_handle); err_lut: - kvfree(ctx->vma_lut.ht); -err_out: context_close(ctx); return ERR_PTR(ret); } |