From e178f7057b81c87a7ceaae0ca204487b6f7eedcf Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Fri, 6 Dec 2013 14:10:47 -0800 Subject: drm/i915: Provide PDP updates via MMIO The initial implementation of this function used MMIO to write the PDPs. Upon review it was determined (correctly) that the docs say to use LRI. The issue is there are times where we want to do a synchronous write (GPU reset). I've tested this, and it works. I've verified with as many people as possible that it should work. This should fix the failing reset problems. Signed-off-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_gtt.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 056f1f0..0560337 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -197,12 +197,19 @@ static gen6_gtt_pte_t iris_pte_encode(dma_addr_t addr, /* Broadwell Page Directory Pointer Descriptors */ static int gen8_write_pdp(struct intel_ring_buffer *ring, unsigned entry, - uint64_t val) + uint64_t val, bool synchronous) { + struct drm_i915_private *dev_priv = ring->dev->dev_private; int ret; BUG_ON(entry >= 4); + if (synchronous) { + I915_WRITE(GEN8_RING_PDP_UDW(ring, entry), val >> 32); + I915_WRITE(GEN8_RING_PDP_LDW(ring, entry), (u32)val); + return 0; + } + ret = intel_ring_begin(ring, 6); if (ret) return ret; @@ -236,7 +243,8 @@ static int gen8_ppgtt_enable(struct drm_device *dev) for (i = used_pd - 1; i >= 0; i--) { dma_addr_t addr = ppgtt->pd_dma_addr[i]; for_each_ring(ring, dev_priv, j) { - ret = gen8_write_pdp(ring, i, addr); + ret = gen8_write_pdp(ring, i, addr, + i915_reset_in_progress(&dev_priv->gpu_error)); if (ret) goto err_out; } -- cgit v1.1 From 6f425321e02a1b6c5e90b70f8fab7c140fcaeefb Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Fri, 6 Dec 2013 14:10:48 -0800 Subject: drm/i915: Don't unconditionally try to deref aliasing ppgtt Since the beginning, the functions which try to properly reference the aliasing PPGTT have deferences a potentially null aliasing_ppgtt member. Since the accessors are meant to be global, this will not do. Introduced originally in: commit a70a3148b0c61cb7c588ea650db785b261b378a3 Author: Ben Widawsky Date: Wed Jul 31 16:59:56 2013 -0700 drm/i915: Make proper functions for VMs Signed-off-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 92149bc..1f89a07 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4975,7 +4975,8 @@ unsigned long i915_gem_obj_offset(struct drm_i915_gem_object *o, struct drm_i915_private *dev_priv = o->base.dev->dev_private; struct i915_vma *vma; - if (vm == &dev_priv->mm.aliasing_ppgtt->base) + if (!dev_priv->mm.aliasing_ppgtt || + vm == &dev_priv->mm.aliasing_ppgtt->base) vm = &dev_priv->gtt.base; BUG_ON(list_empty(&o->vma_list)); @@ -5016,7 +5017,8 @@ unsigned long i915_gem_obj_size(struct drm_i915_gem_object *o, struct drm_i915_private *dev_priv = o->base.dev->dev_private; struct i915_vma *vma; - if (vm == &dev_priv->mm.aliasing_ppgtt->base) + if (!dev_priv->mm.aliasing_ppgtt || + vm == &dev_priv->mm.aliasing_ppgtt->base) vm = &dev_priv->gtt.base; BUG_ON(list_empty(&o->vma_list)); -- cgit v1.1 From 6e164c3382314a1f63526fa7a4322a17318d0e32 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Fri, 6 Dec 2013 14:10:49 -0800 Subject: drm/i915: Allow ggtt lookups to not WARN To be able to effectively use the GGTT object lookup function, we don't want to warn when there is no GGTT mapping. Let the caller deal with it instead. Originally, I had intended to have this behavior, and has not introduced the WARN. It was introduced during review with the addition of the follow commit commit 5c2abbeab798154166d42fce4f71790caa6dd9bc Author: Ben Widawsky Date: Tue Sep 24 09:57:57 2013 -0700 drm/i915: Provide a cheap ggtt vma lookup Signed-off-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 1f89a07..360b68f 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -5073,7 +5073,7 @@ struct i915_vma *i915_gem_obj_to_ggtt(struct drm_i915_gem_object *obj) return NULL; vma = list_first_entry(&obj->vma_list, typeof(*vma), vma_link); - if (WARN_ON(vma->vm != obj_to_ggtt(obj))) + if (vma->vm != obj_to_ggtt(obj)) return NULL; return vma; -- cgit v1.1 From c39538a88dcfdbb905e60f9168d6d49460cabe57 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Fri, 6 Dec 2013 14:10:50 -0800 Subject: drm/i915: Takedown drm_mm on failed gtt setup This was found by code inspection. If the GTT setup fails then we are left without properly tearing down the drm_mm. Hopefully this never happens. Signed-off-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 360b68f..1114159 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4508,6 +4508,7 @@ int i915_gem_init(struct drm_device *dev) mutex_unlock(&dev->struct_mutex); if (ret) { i915_gem_cleanup_aliasing_ppgtt(dev); + drm_mm_takedown(&dev_priv->gtt.base.mm); return ret; } -- cgit v1.1 From feb822cfc2540c2d2df7827f40991aa2f86f1130 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Fri, 6 Dec 2013 14:10:51 -0800 Subject: drm/i915: Handle inactivating objects for all VMAs This came from a patch called, "drm/i915: Move active to vma" When moving an object to the inactive list, we do it for all VMs for which the object is bound. The primary difference from that patch is this time around we don't not track 'active' per vma, but rather by object. Therefore, we only need one unref. Signed-off-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 1114159..d8981ec 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2008,13 +2008,17 @@ static void i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj) { struct drm_i915_private *dev_priv = obj->base.dev->dev_private; - struct i915_address_space *ggtt_vm = &dev_priv->gtt.base; - struct i915_vma *vma = i915_gem_obj_to_vma(obj, ggtt_vm); + struct i915_address_space *vm; + struct i915_vma *vma; BUG_ON(obj->base.write_domain & ~I915_GEM_GPU_DOMAINS); BUG_ON(!obj->active); - list_move_tail(&vma->mm_list, &ggtt_vm->inactive_list); + list_for_each_entry(vm, &dev_priv->vm_list, global_link) { + vma = i915_gem_obj_to_vma(obj, vm); + if (vma && !list_empty(&vma->mm_list)) + list_move_tail(&vma->mm_list, &vm->inactive_list); + } list_del_init(&obj->ring_list); obj->ring = NULL; -- cgit v1.1 From a7b910789f77afa40ae0816d22339e9d25723c6e Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Fri, 6 Dec 2013 14:10:52 -0800 Subject: drm/i915: Add vm to error BO capture formerly: drm/i915: Create VMAs (part 6) - finish error plumbing Signed-off-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gpu_error.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 79dcb8f..2afd9e0 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -482,6 +482,7 @@ static void i915_error_state_free(struct kref *error_ref) static struct drm_i915_error_object * i915_error_object_create_sized(struct drm_i915_private *dev_priv, struct drm_i915_gem_object *src, + struct i915_address_space *vm, const int num_pages) { struct drm_i915_error_object *dst; @@ -495,7 +496,7 @@ i915_error_object_create_sized(struct drm_i915_private *dev_priv, if (dst == NULL) return NULL; - reloc_offset = dst->gtt_offset = i915_gem_obj_ggtt_offset(src); + reloc_offset = dst->gtt_offset = i915_gem_obj_offset(src, vm); for (i = 0; i < num_pages; i++) { unsigned long flags; void *d; @@ -556,8 +557,12 @@ unwind: kfree(dst); return NULL; } -#define i915_error_object_create(dev_priv, src) \ - i915_error_object_create_sized((dev_priv), (src), \ +#define i915_error_object_create(dev_priv, src, vm) \ + i915_error_object_create_sized((dev_priv), (src), (vm), \ + (src)->base.size>>PAGE_SHIFT) + +#define i915_error_ggtt_object_create(dev_priv, src) \ + i915_error_object_create_sized((dev_priv), (src), &(dev_priv)->gtt.base, \ (src)->base.size>>PAGE_SHIFT) static void capture_bo(struct drm_i915_error_buffer *err, @@ -670,7 +675,7 @@ i915_error_first_batchbuffer(struct drm_i915_private *dev_priv, obj = ring->scratch.obj; if (acthd >= i915_gem_obj_ggtt_offset(obj) && acthd < i915_gem_obj_ggtt_offset(obj) + obj->base.size) - return i915_error_object_create(dev_priv, obj); + return i915_error_ggtt_object_create(dev_priv, obj); } seqno = ring->get_seqno(ring, false); @@ -689,7 +694,7 @@ i915_error_first_batchbuffer(struct drm_i915_private *dev_priv, /* We need to copy these to an anonymous buffer as the simplest * method to avoid being overwritten by userspace. */ - return i915_error_object_create(dev_priv, obj); + return i915_error_object_create(dev_priv, obj, vm); } } @@ -765,7 +770,9 @@ static void i915_gem_record_active_context(struct intel_ring_buffer *ring, list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { if ((error->ccid & PAGE_MASK) == i915_gem_obj_ggtt_offset(obj)) { ering->ctx = i915_error_object_create_sized(dev_priv, - obj, 1); + obj, + &dev_priv->gtt.base, + 1); break; } } @@ -786,7 +793,7 @@ static void i915_gem_record_rings(struct drm_device *dev, i915_error_first_batchbuffer(dev_priv, ring); error->ring[i].ringbuffer = - i915_error_object_create(dev_priv, ring->obj); + i915_error_ggtt_object_create(dev_priv, ring->obj); i915_gem_record_active_context(ring, error, &error->ring[i]); -- cgit v1.1 From 496bfcb9f174f68802439b15b8f0bad17ebe0558 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Fri, 6 Dec 2013 14:10:53 -0800 Subject: drm/i915: Don't use gtt mapping for !gtt error objects The existing check was insufficient to determine whether we can use the GTT mapping to read out the object during error capture. The previous condition was, if the object has a GGTT mapping, and the reloc is in the GTT range... the can happen with opjects mapped into multiple vms (one of which being the GTT). There are two solutions to this problem: 1. This patch, which avoid reading the io mapping 2. Use the GGTT offset with the io mapping. Since error capture is about recording the most accurate possible error state, and the error was caused by the object not in the GGTT - I opted for the former. Signed-off-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gpu_error.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 2afd9e0..9bc121c 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -507,7 +507,8 @@ i915_error_object_create_sized(struct drm_i915_private *dev_priv, local_irq_save(flags); if (reloc_offset < dev_priv->gtt.mappable_end && - src->has_global_gtt_mapping) { + src->has_global_gtt_mapping && + i915_is_ggtt(vm)) { void __iomem *s; /* Simply ignore tiling or any overlapping fence. -- cgit v1.1 From 685987c6915222730f45141a89f1cd87fb092e9a Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Fri, 6 Dec 2013 14:10:54 -0800 Subject: drm/i915: Identify active VM for batchbuffer capture Using the current state of the page directory registers, we can determine which of our address spaces was active when the hang occurred. This allows us to scan through all the address spaces to identify the "active" one during error capture. v2: Rebased for BDW error detection. BDW error detection is similar except instead of PP_DIR_BASE, we can use the PDP registers. Signed-off-by: Ben Widawsky [danvet: Add FIXME about global gtt misuse.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gpu_error.c | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 9bc121c..9932243 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -655,6 +655,32 @@ static void i915_gem_record_fences(struct drm_device *dev, } } +/* This assumes all batchbuffers are executed from the PPGTT. It might have to + * change in the future. */ +static bool is_active_vm(struct i915_address_space *vm, + struct intel_ring_buffer *ring) +{ + struct drm_device *dev = vm->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + struct i915_hw_ppgtt *ppgtt; + + if (INTEL_INFO(dev)->gen < 7) + return i915_is_ggtt(vm); + + /* FIXME: This ignores that the global gtt vm is also on this list. */ + ppgtt = container_of(vm, struct i915_hw_ppgtt, base); + + if (INTEL_INFO(dev)->gen >= 8) { + u64 pdp0 = (u64)I915_READ(GEN8_RING_PDP_UDW(ring, 0)) << 32; + pdp0 |= I915_READ(GEN8_RING_PDP_LDW(ring, 0)); + return pdp0 == ppgtt->pd_dma_addr[0]; + } else { + u32 pp_db; + pp_db = I915_READ(RING_PP_DIR_BASE(ring)); + return (pp_db >> 10) == ppgtt->pd_offset; + } +} + static struct drm_i915_error_object * i915_error_first_batchbuffer(struct drm_i915_private *dev_priv, struct intel_ring_buffer *ring) @@ -662,6 +688,7 @@ i915_error_first_batchbuffer(struct drm_i915_private *dev_priv, struct i915_address_space *vm; struct i915_vma *vma; struct drm_i915_gem_object *obj; + bool found_active = false; u32 seqno; if (!ring->get_seqno) @@ -681,6 +708,11 @@ i915_error_first_batchbuffer(struct drm_i915_private *dev_priv, seqno = ring->get_seqno(ring, false); list_for_each_entry(vm, &dev_priv->vm_list, global_link) { + if (!is_active_vm(vm, ring)) + continue; + + found_active = true; + list_for_each_entry(vma, &vm->active_list, mm_list) { obj = vma->obj; if (obj->ring != ring) @@ -699,6 +731,7 @@ i915_error_first_batchbuffer(struct drm_i915_private *dev_priv, } } + WARN_ON(!found_active); return NULL; } -- cgit v1.1 From d7f46fc4e7323887494db13f063a8e59861fefb0 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Fri, 6 Dec 2013 14:10:55 -0800 Subject: drm/i915: Make pin count per VMA Signed-off-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_debugfs.c | 14 ++++++--- drivers/gpu/drm/i915/i915_drv.h | 34 +++++++++++++-------- drivers/gpu/drm/i915/i915_gem.c | 49 ++++++++++++++++-------------- drivers/gpu/drm/i915/i915_gem_context.c | 14 ++++----- drivers/gpu/drm/i915/i915_gem_evict.c | 5 +-- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 6 ++-- drivers/gpu/drm/i915/i915_gem_tiling.c | 2 +- drivers/gpu/drm/i915/i915_gpu_error.c | 6 ++-- drivers/gpu/drm/i915/intel_fbdev.c | 4 +-- drivers/gpu/drm/i915/intel_overlay.c | 8 ++--- drivers/gpu/drm/i915/intel_pm.c | 6 ++-- drivers/gpu/drm/i915/intel_ringbuffer.c | 12 ++++---- 12 files changed, 90 insertions(+), 70 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 13accf7..4c610ee 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -100,7 +100,7 @@ static const char *get_pin_flag(struct drm_i915_gem_object *obj) { if (obj->user_pin_count > 0) return "P"; - else if (obj->pin_count > 0) + else if (i915_gem_obj_is_pinned(obj)) return "p"; else return " "; @@ -125,6 +125,8 @@ static void describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) { struct i915_vma *vma; + int pin_count = 0; + seq_printf(m, "%pK: %s%s%s %8zdKiB %02x %02x %u %u %u%s%s%s", &obj->base, get_pin_flag(obj), @@ -141,8 +143,10 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) obj->madv == I915_MADV_DONTNEED ? " purgeable" : ""); if (obj->base.name) seq_printf(m, " (name: %d)", obj->base.name); - if (obj->pin_count) - seq_printf(m, " (pinned x %d)", obj->pin_count); + list_for_each_entry(vma, &obj->vma_list, vma_link) + if (vma->pin_count > 0) + pin_count++; + seq_printf(m, " (pinned x %d)", pin_count); if (obj->pin_display) seq_printf(m, " (display)"); if (obj->fence_reg != I915_FENCE_REG_NONE) @@ -439,7 +443,7 @@ static int i915_gem_gtt_info(struct seq_file *m, void *data) total_obj_size = total_gtt_size = count = 0; list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { - if (list == PINNED_LIST && obj->pin_count == 0) + if (list == PINNED_LIST && !i915_gem_obj_is_pinned(obj)) continue; seq_puts(m, " "); @@ -2843,7 +2847,7 @@ i915_drop_caches_set(void *data, u64 val) list_for_each_entry(vm, &dev_priv->vm_list, global_link) { list_for_each_entry_safe(vma, x, &vm->inactive_list, mm_list) { - if (vma->obj->pin_count) + if (vma->pin_count) continue; ret = i915_vma_unbind(vma); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 780f815..bf022c4a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -651,6 +651,19 @@ struct i915_vma { unsigned long exec_handle; struct drm_i915_gem_exec_object2 *exec_entry; + /** + * How many users have pinned this object in GTT space. The following + * users can each hold at most one reference: pwrite/pread, pin_ioctl + * (via user_pin_count), execbuffer (objects are not allowed multiple + * times for the same batchbuffer), and the framebuffer code. When + * switching/pageflipping, the framebuffer code has at most two buffers + * pinned per crtc. + * + * In the worst case this is 1 + 1 + 1 + 2*2 = 7. That would fit into 3 + * bits with absolutely no headroom. So use 4 bits. + */ + unsigned int pin_count:4; +#define DRM_I915_GEM_OBJECT_MAX_PIN_COUNT 0xf }; struct i915_ctx_hang_stats { @@ -1617,18 +1630,6 @@ struct drm_i915_gem_object { */ unsigned int fence_dirty:1; - /** How many users have pinned this object in GTT space. The following - * users can each hold at most one reference: pwrite/pread, pin_ioctl - * (via user_pin_count), execbuffer (objects are not allowed multiple - * times for the same batchbuffer), and the framebuffer code. When - * switching/pageflipping, the framebuffer code has at most two buffers - * pinned per crtc. - * - * In the worst case this is 1 + 1 + 1 + 2*2 = 7. That would fit into 3 - * bits with absolutely no headroom. So use 4 bits. */ - unsigned int pin_count:4; -#define DRM_I915_GEM_OBJECT_MAX_PIN_COUNT 0xf - /** * Is the object at the current location in the gtt mappable and * fenceable? Used to avoid costly recalculations. @@ -2005,7 +2006,7 @@ int __must_check i915_gem_object_pin(struct drm_i915_gem_object *obj, uint32_t alignment, bool map_and_fenceable, bool nonblocking); -void i915_gem_object_unpin(struct drm_i915_gem_object *obj); +void i915_gem_object_ggtt_unpin(struct drm_i915_gem_object *obj); int __must_check i915_vma_unbind(struct i915_vma *vma); int __must_check i915_gem_object_ggtt_unbind(struct drm_i915_gem_object *obj); int i915_gem_object_put_pages(struct drm_i915_gem_object *obj); @@ -2168,6 +2169,13 @@ i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj, struct i915_address_space *vm); struct i915_vma *i915_gem_obj_to_ggtt(struct drm_i915_gem_object *obj); +static inline bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj) { + struct i915_vma *vma; + list_for_each_entry(vma, &obj->vma_list, vma_link) + if (vma->pin_count > 0) + return true; + return false; +} /* Some GGTT VM helpers */ #define obj_to_ggtt(obj) \ diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index d8981ec..6dc96bc 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -204,7 +204,7 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, pinned = 0; mutex_lock(&dev->struct_mutex); list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) - if (obj->pin_count) + if (i915_gem_obj_is_pinned(obj)) pinned += i915_gem_obj_ggtt_size(obj); mutex_unlock(&dev->struct_mutex); @@ -651,7 +651,7 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev, } out_unpin: - i915_gem_object_unpin(obj); + i915_gem_object_ggtt_unpin(obj); out: return ret; } @@ -1418,7 +1418,7 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) /* Finally, remap it using the new GTT offset */ ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn); unpin: - i915_gem_object_unpin(obj); + i915_gem_object_ggtt_unpin(obj); unlock: mutex_unlock(&dev->struct_mutex); out: @@ -2721,7 +2721,7 @@ int i915_vma_unbind(struct i915_vma *vma) return 0; } - if (obj->pin_count) + if (vma->pin_count) return -EBUSY; BUG_ON(obj->pages == NULL); @@ -2785,7 +2785,7 @@ i915_gem_object_ggtt_unbind(struct drm_i915_gem_object *obj) if (!i915_gem_obj_ggtt_bound(obj)) return 0; - if (obj->pin_count) + if (i915_gem_obj_to_ggtt(obj)->pin_count) return -EBUSY; BUG_ON(obj->pages == NULL); @@ -3486,7 +3486,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, if (obj->cache_level == cache_level) return 0; - if (obj->pin_count) { + if (i915_gem_obj_is_pinned(obj)) { DRM_DEBUG("can not change the cache level of pinned objects\n"); return -EBUSY; } @@ -3646,7 +3646,7 @@ static bool is_pin_display(struct drm_i915_gem_object *obj) * subtracting the potential reference by the user, any pin_count * remains, it must be due to another use by the display engine. */ - return obj->pin_count - !!obj->user_pin_count; + return i915_gem_obj_to_ggtt(obj)->pin_count - !!obj->user_pin_count; } /* @@ -3720,7 +3720,7 @@ err_unpin_display: void i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj) { - i915_gem_object_unpin(obj); + i915_gem_object_ggtt_unpin(obj); obj->pin_display = is_pin_display(obj); } @@ -3853,18 +3853,18 @@ i915_gem_object_pin(struct drm_i915_gem_object *obj, struct i915_vma *vma; int ret; - if (WARN_ON(obj->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT)) - return -EBUSY; - WARN_ON(map_and_fenceable && !i915_is_ggtt(vm)); vma = i915_gem_obj_to_vma(obj, vm); if (vma) { + if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT)) + return -EBUSY; + if ((alignment && vma->node.start & (alignment - 1)) || (map_and_fenceable && !obj->map_and_fenceable)) { - WARN(obj->pin_count, + WARN(vma->pin_count, "bo is already pinned with incorrect alignment:" " offset=%lx, req.alignment=%x, req.map_and_fenceable=%d," " obj->map_and_fenceable=%d\n", @@ -3893,19 +3893,22 @@ i915_gem_object_pin(struct drm_i915_gem_object *obj, if (!obj->has_global_gtt_mapping && map_and_fenceable) i915_gem_gtt_bind_object(obj, obj->cache_level); - obj->pin_count++; + i915_gem_obj_to_vma(obj, vm)->pin_count++; obj->pin_mappable |= map_and_fenceable; return 0; } void -i915_gem_object_unpin(struct drm_i915_gem_object *obj) +i915_gem_object_ggtt_unpin(struct drm_i915_gem_object *obj) { - BUG_ON(obj->pin_count == 0); - BUG_ON(!i915_gem_obj_bound_any(obj)); + struct i915_vma *vma = i915_gem_obj_to_ggtt(obj); - if (--obj->pin_count == 0) + BUG_ON(!vma); + BUG_ON(vma->pin_count == 0); + BUG_ON(!i915_gem_obj_ggtt_bound(obj)); + + if (--vma->pin_count == 0) obj->pin_mappable = false; } @@ -3989,7 +3992,7 @@ i915_gem_unpin_ioctl(struct drm_device *dev, void *data, obj->user_pin_count--; if (obj->user_pin_count == 0) { obj->pin_filp = NULL; - i915_gem_object_unpin(obj); + i915_gem_object_ggtt_unpin(obj); } out: @@ -4069,7 +4072,7 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data, goto unlock; } - if (obj->pin_count) { + if (i915_gem_obj_is_pinned(obj)) { ret = -EINVAL; goto out; } @@ -4178,12 +4181,14 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj) if (obj->phys_obj) i915_gem_detach_phys_object(dev, obj); - obj->pin_count = 0; /* NB: 0 or 1 elements */ WARN_ON(!list_empty(&obj->vma_list) && !list_is_singular(&obj->vma_list)); list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) { - int ret = i915_vma_unbind(vma); + int ret; + + vma->pin_count = 0; + ret = i915_vma_unbind(vma); if (WARN_ON(ret == -ERESTARTSYS)) { bool was_interruptible; @@ -4963,7 +4968,7 @@ i915_gem_inactive_count(struct shrinker *shrinker, struct shrink_control *sc) if (obj->active) continue; - if (obj->pin_count == 0 && obj->pages_pin_count == 0) + if (!i915_gem_obj_is_pinned(obj) && obj->pages_pin_count == 0) count += obj->base.size >> PAGE_SHIFT; } diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 4187704..b061991 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -241,7 +241,7 @@ static int create_default_context(struct drm_i915_private *dev_priv) return 0; err_unpin: - i915_gem_object_unpin(ctx->obj); + i915_gem_object_ggtt_unpin(ctx->obj); err_destroy: i915_gem_context_unreference(ctx); return ret; @@ -300,11 +300,11 @@ void i915_gem_context_fini(struct drm_device *dev) if (dev_priv->ring[RCS].last_context == dctx) { /* Fake switch to NULL context */ WARN_ON(dctx->obj->active); - i915_gem_object_unpin(dctx->obj); + i915_gem_object_ggtt_unpin(dctx->obj); i915_gem_context_unreference(dctx); } - i915_gem_object_unpin(dctx->obj); + i915_gem_object_ggtt_unpin(dctx->obj); i915_gem_context_unreference(dctx); dev_priv->ring[RCS].default_context = NULL; dev_priv->ring[RCS].last_context = NULL; @@ -412,7 +412,7 @@ static int do_switch(struct i915_hw_context *to) u32 hw_flags = 0; int ret, i; - BUG_ON(from != NULL && from->obj != NULL && from->obj->pin_count == 0); + BUG_ON(from != NULL && from->obj != NULL && !i915_gem_obj_is_pinned(from->obj)); if (from == to && !to->remap_slice) return 0; @@ -428,7 +428,7 @@ static int do_switch(struct i915_hw_context *to) * XXX: We need a real interface to do this instead of trickery. */ ret = i915_gem_object_set_to_gtt_domain(to->obj, false); if (ret) { - i915_gem_object_unpin(to->obj); + i915_gem_object_ggtt_unpin(to->obj); return ret; } @@ -440,7 +440,7 @@ static int do_switch(struct i915_hw_context *to) ret = mi_set_context(ring, to, hw_flags); if (ret) { - i915_gem_object_unpin(to->obj); + i915_gem_object_ggtt_unpin(to->obj); return ret; } @@ -476,7 +476,7 @@ static int do_switch(struct i915_hw_context *to) BUG_ON(from->obj->ring != ring); /* obj is kept alive until the next request by its active ref */ - i915_gem_object_unpin(from->obj); + i915_gem_object_ggtt_unpin(from->obj); i915_gem_context_unreference(from); } diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index b737653..5cb0aa4 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -34,7 +34,8 @@ static bool mark_free(struct i915_vma *vma, struct list_head *unwind) { - if (vma->obj->pin_count) + /* Freeing up memory requires no VMAs are pinned */ + if (i915_gem_obj_is_pinned(vma->obj)) return false; if (WARN_ON(!list_empty(&vma->exec_list))) @@ -186,7 +187,7 @@ int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle) } list_for_each_entry_safe(vma, next, &vm->inactive_list, mm_list) - if (vma->obj->pin_count == 0) + if (vma->pin_count == 0) WARN_ON(i915_vma_unbind(vma)); return 0; diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 9282b4c..a2d6eb5 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -566,7 +566,7 @@ i915_gem_execbuffer_unreserve_vma(struct i915_vma *vma) i915_gem_object_unpin_fence(obj); if (entry->flags & __EXEC_OBJECT_HAS_PIN) - i915_gem_object_unpin(obj); + vma->pin_count--; entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN); } @@ -923,7 +923,9 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas, if (obj->base.write_domain) { obj->dirty = 1; obj->last_write_seqno = intel_ring_get_seqno(ring); - if (obj->pin_count) /* check for potential scanout */ + /* check for potential scanout */ + if (i915_gem_obj_ggtt_bound(obj) && + i915_gem_obj_to_ggtt(obj)->pin_count) intel_mark_fb_busy(obj, ring); } diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index b139053..eb99358 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -308,7 +308,7 @@ i915_gem_set_tiling(struct drm_device *dev, void *data, return -EINVAL; } - if (obj->pin_count || obj->framebuffer_references) { + if (i915_gem_obj_is_pinned(obj) || obj->framebuffer_references) { drm_gem_object_unreference_unlocked(&obj->base); return -EBUSY; } diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 9932243..5dede92 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -578,7 +578,7 @@ static void capture_bo(struct drm_i915_error_buffer *err, err->write_domain = obj->base.write_domain; err->fence_reg = obj->fence_reg; err->pinned = 0; - if (obj->pin_count > 0) + if (i915_gem_obj_is_pinned(obj)) err->pinned = 1; if (obj->user_pin_count > 0) err->pinned = -1; @@ -611,7 +611,7 @@ static u32 capture_pinned_bo(struct drm_i915_error_buffer *err, int i = 0; list_for_each_entry(obj, head, global_list) { - if (obj->pin_count == 0) + if (!i915_gem_obj_is_pinned(obj)) continue; capture_bo(err++, obj); @@ -875,7 +875,7 @@ static void i915_gem_capture_vm(struct drm_i915_private *dev_priv, i++; error->active_bo_count[ndx] = i; list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) - if (obj->pin_count) + if (i915_gem_obj_is_pinned(obj)) i++; error->pinned_bo_count[ndx] = i - error->active_bo_count[ndx]; diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c index 284c3eb..d53c17d 100644 --- a/drivers/gpu/drm/i915/intel_fbdev.c +++ b/drivers/gpu/drm/i915/intel_fbdev.c @@ -104,7 +104,7 @@ static int intelfb_alloc(struct drm_fb_helper *helper, return 0; out_unpin: - i915_gem_object_unpin(obj); + i915_gem_object_ggtt_unpin(obj); out_unref: drm_gem_object_unreference(&obj->base); out: @@ -208,7 +208,7 @@ static int intelfb_create(struct drm_fb_helper *helper, return 0; out_unpin: - i915_gem_object_unpin(obj); + i915_gem_object_ggtt_unpin(obj); drm_gem_object_unreference(&obj->base); out_unlock: mutex_unlock(&dev->struct_mutex); diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c index a98a990..a1397b1 100644 --- a/drivers/gpu/drm/i915/intel_overlay.c +++ b/drivers/gpu/drm/i915/intel_overlay.c @@ -293,7 +293,7 @@ static void intel_overlay_release_old_vid_tail(struct intel_overlay *overlay) { struct drm_i915_gem_object *obj = overlay->old_vid_bo; - i915_gem_object_unpin(obj); + i915_gem_object_ggtt_unpin(obj); drm_gem_object_unreference(&obj->base); overlay->old_vid_bo = NULL; @@ -306,7 +306,7 @@ static void intel_overlay_off_tail(struct intel_overlay *overlay) /* never have the overlay hw on without showing a frame */ BUG_ON(!overlay->vid_bo); - i915_gem_object_unpin(obj); + i915_gem_object_ggtt_unpin(obj); drm_gem_object_unreference(&obj->base); overlay->vid_bo = NULL; @@ -782,7 +782,7 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay, return 0; out_unpin: - i915_gem_object_unpin(new_bo); + i915_gem_object_ggtt_unpin(new_bo); return ret; } @@ -1386,7 +1386,7 @@ void intel_setup_overlay(struct drm_device *dev) out_unpin_bo: if (!OVERLAY_NEEDS_PHYSICAL(dev)) - i915_gem_object_unpin(reg_bo); + i915_gem_object_ggtt_unpin(reg_bo); out_free_bo: drm_gem_object_unreference(®_bo->base); out_free: diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 41b6e08..cba4be8 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3298,7 +3298,7 @@ intel_alloc_context_page(struct drm_device *dev) return ctx; err_unpin: - i915_gem_object_unpin(ctx); + i915_gem_object_ggtt_unpin(ctx); err_unref: drm_gem_object_unreference(&ctx->base); return NULL; @@ -4166,13 +4166,13 @@ void ironlake_teardown_rc6(struct drm_device *dev) struct drm_i915_private *dev_priv = dev->dev_private; if (dev_priv->ips.renderctx) { - i915_gem_object_unpin(dev_priv->ips.renderctx); + i915_gem_object_ggtt_unpin(dev_priv->ips.renderctx); drm_gem_object_unreference(&dev_priv->ips.renderctx->base); dev_priv->ips.renderctx = NULL; } if (dev_priv->ips.pwrctx) { - i915_gem_object_unpin(dev_priv->ips.pwrctx); + i915_gem_object_ggtt_unpin(dev_priv->ips.pwrctx); drm_gem_object_unreference(&dev_priv->ips.pwrctx->base); dev_priv->ips.pwrctx = NULL; } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index e05a021..75c8883 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -549,7 +549,7 @@ init_pipe_control(struct intel_ring_buffer *ring) return 0; err_unpin: - i915_gem_object_unpin(ring->scratch.obj); + i915_gem_object_ggtt_unpin(ring->scratch.obj); err_unref: drm_gem_object_unreference(&ring->scratch.obj->base); err: @@ -625,7 +625,7 @@ static void render_ring_cleanup(struct intel_ring_buffer *ring) if (INTEL_INFO(dev)->gen >= 5) { kunmap(sg_page(ring->scratch.obj->pages->sgl)); - i915_gem_object_unpin(ring->scratch.obj); + i915_gem_object_ggtt_unpin(ring->scratch.obj); } drm_gem_object_unreference(&ring->scratch.obj->base); @@ -1250,7 +1250,7 @@ static void cleanup_status_page(struct intel_ring_buffer *ring) return; kunmap(sg_page(obj->pages->sgl)); - i915_gem_object_unpin(obj); + i915_gem_object_ggtt_unpin(obj); drm_gem_object_unreference(&obj->base); ring->status_page.obj = NULL; } @@ -1290,7 +1290,7 @@ static int init_status_page(struct intel_ring_buffer *ring) return 0; err_unpin: - i915_gem_object_unpin(obj); + i915_gem_object_ggtt_unpin(obj); err_unref: drm_gem_object_unreference(&obj->base); err: @@ -1387,7 +1387,7 @@ static int intel_init_ring_buffer(struct drm_device *dev, err_unmap: iounmap(ring->virtual_start); err_unpin: - i915_gem_object_unpin(obj); + i915_gem_object_ggtt_unpin(obj); err_unref: drm_gem_object_unreference(&obj->base); ring->obj = NULL; @@ -1415,7 +1415,7 @@ void intel_cleanup_ring_buffer(struct intel_ring_buffer *ring) iounmap(ring->virtual_start); - i915_gem_object_unpin(ring->obj); + i915_gem_object_ggtt_unpin(ring->obj); drm_gem_object_unreference(&ring->obj->base); ring->obj = NULL; ring->preallocated_lazy_request = NULL; -- cgit v1.1 From 6f65e29acad7499920cf1e49b675fac7cde24166 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Fri, 6 Dec 2013 14:10:56 -0800 Subject: drm/i915: Create bind/unbind abstraction for VMAs To sum up what goes on here, we abstract the vma binding, similarly to the previous object binding. This helps for distinguishing legacy binding, versus modern binding. To keep the code churn as minimal as possible, I am leaving in insert_entries(). It serves as the per platform pte writing basically. bind_vma and insert_entries do share a lot of similarities, and I did have designs to combine the two, but as mentioned already... too much churn in an already massive patchset. What follows are the 3 commits which existed discretely in the original submissions. Upon rebasing on Broadwell support, it became clear that separation was not good, and only made for more error prone code. Below are the 3 commit messages with all their history. drm/i915: Add bind/unbind object functions to VMA drm/i915: Use the new vm [un]bind functions drm/i915: reduce vm->insert_entries() usage drm/i915: Add bind/unbind object functions to VMA As we plumb the code with more VM information, it has become more obvious that the easiest way to deal with bind and unbind is to simply put the function pointers in the vm, and let those choose the correct way to handle the page table updates. This change allows many places in the code to simply be vm->bind, and not have to worry about distinguishing PPGTT vs GGTT. Notice that this patch has no impact on functionality. I've decided to save the actual change until the next patch because I think it's easier to review that way. I'm happy to squash the two, or let Daniel do it on merge. v2: Make ggtt handle the quirky aliasing ppgtt Add flags to bind object to support above Don't ever call bind/unbind directly for PPGTT until we have real, full PPGTT (use NULLs to assert this) Make sure we rebind the ggtt if there already is a ggtt binding. This happens on set cache levels. Use VMA for bind/unbind (Daniel, Ben) v3: Reorganize ggtt_vma_bind to be more concise and easier to read (Ville). Change logic in unbind to only unbind ggtt when there is a global mapping, and to remove a redundant check if the aliasing ppgtt exists. v4: Make the bind function a bit smarter about the cache levels to avoid unnecessary multiple remaps. "I accept it is a wart, I think unifying the pin_vma / bind_vma could be unified later" (Chris) Removed the git notes, and put version info here. (Daniel) v5: Update the comment to not suck (Chris) v6: Move bind/unbind to the VMA. It makes more sense in the VMA structure (always has, but I was previously lazy). With this change, it will allow us to keep a distinct insert_entries. Reviewed-by: Chris Wilson Signed-off-by: Ben Widawsky drm/i915: Use the new vm [un]bind functions Building on the last patch which created the new function pointers in the VM for bind/unbind, here we actually put those new function pointers to use. Split out as a separate patch to aid in review. I'm fine with squashing into the previous patch if people request it. v2: Updated to address the smart ggtt which can do aliasing as needed Make sure we bind to global gtt when mappable and fenceable. I thought we could get away without this initialy, but we cannot. v3: Make the global GTT binding explicitly use the ggtt VM for bind_vma(). While at it, use the new ggtt_vma helper (Chris) At this point the original mailing list thread diverges. ie. v4^: use target_obj instead of obj for gen6 relocate_entry vma->bind_vma() can be called safely during pin. So simply do that instead of the complicated conditionals. Don't restore PPGTT bound objects on resume path Bug fix in resume path for globally bound Bos Properly handle secure dispatch Rebased on vma bind/unbind conversion Signed-off-by: Ben Widawsky drm/i915: reduce vm->insert_entries() usage FKA: drm/i915: eliminate vm->insert_entries() With bind/unbind function pointers in place, we no longer need insert_entries. We could, and want, to remove clear_range, however it's not totally easy at this point. Since it's used in a couple of place still that don't only deal in objects: setup, ppgtt init, and restore gtt mappings. v2: Don't actually remove insert_entries, just limit its usage. It will be useful when we introduce gen8. It will always be called from the vma bind/unbind. Reviewed-by: Chris Wilson (v1) Signed-off-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 103 ++++++++-------- drivers/gpu/drm/i915/i915_gem.c | 61 ++------- drivers/gpu/drm/i915/i915_gem_context.c | 8 +- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 43 ++++--- drivers/gpu/drm/i915/i915_gem_gtt.c | 192 +++++++++++++++++++++++------ 5 files changed, 245 insertions(+), 162 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index bf022c4a..9fe078b 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -523,6 +523,57 @@ enum i915_cache_level { typedef uint32_t gen6_gtt_pte_t; +/** + * A VMA represents a GEM BO that is bound into an address space. Therefore, a + * VMA's presence cannot be guaranteed before binding, or after unbinding the + * object into/from the address space. + * + * To make things as simple as possible (ie. no refcounting), a VMA's lifetime + * will always be <= an objects lifetime. So object refcounting should cover us. + */ +struct i915_vma { + struct drm_mm_node node; + struct drm_i915_gem_object *obj; + struct i915_address_space *vm; + + /** This object's place on the active/inactive lists */ + struct list_head mm_list; + + struct list_head vma_link; /* Link in the object's VMA list */ + + /** This vma's place in the batchbuffer or on the eviction list */ + struct list_head exec_list; + + /** + * Used for performing relocations during execbuffer insertion. + */ + struct hlist_node exec_node; + unsigned long exec_handle; + struct drm_i915_gem_exec_object2 *exec_entry; + + /** + * How many users have pinned this object in GTT space. The following + * users can each hold at most one reference: pwrite/pread, pin_ioctl + * (via user_pin_count), execbuffer (objects are not allowed multiple + * times for the same batchbuffer), and the framebuffer code. When + * switching/pageflipping, the framebuffer code has at most two buffers + * pinned per crtc. + * + * In the worst case this is 1 + 1 + 1 + 2*2 = 7. That would fit into 3 + * bits with absolutely no headroom. So use 4 bits. */ + unsigned int pin_count:4; +#define DRM_I915_GEM_OBJECT_MAX_PIN_COUNT 0xf + + /** Unmap an object from an address space. This usually consists of + * setting the valid PTE entries to a reserved scratch page. */ + void (*unbind_vma)(struct i915_vma *vma); + /* Map an object into an address space with the given cache flags. */ +#define GLOBAL_BIND (1<<0) + void (*bind_vma)(struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 flags); +}; + struct i915_address_space { struct drm_mm mm; struct drm_device *dev; @@ -623,49 +674,6 @@ struct i915_hw_ppgtt { int (*enable)(struct drm_device *dev); }; -/** - * A VMA represents a GEM BO that is bound into an address space. Therefore, a - * VMA's presence cannot be guaranteed before binding, or after unbinding the - * object into/from the address space. - * - * To make things as simple as possible (ie. no refcounting), a VMA's lifetime - * will always be <= an objects lifetime. So object refcounting should cover us. - */ -struct i915_vma { - struct drm_mm_node node; - struct drm_i915_gem_object *obj; - struct i915_address_space *vm; - - /** This object's place on the active/inactive lists */ - struct list_head mm_list; - - struct list_head vma_link; /* Link in the object's VMA list */ - - /** This vma's place in the batchbuffer or on the eviction list */ - struct list_head exec_list; - - /** - * Used for performing relocations during execbuffer insertion. - */ - struct hlist_node exec_node; - unsigned long exec_handle; - struct drm_i915_gem_exec_object2 *exec_entry; - - /** - * How many users have pinned this object in GTT space. The following - * users can each hold at most one reference: pwrite/pread, pin_ioctl - * (via user_pin_count), execbuffer (objects are not allowed multiple - * times for the same batchbuffer), and the framebuffer code. When - * switching/pageflipping, the framebuffer code has at most two buffers - * pinned per crtc. - * - * In the worst case this is 1 + 1 + 1 + 2*2 = 7. That would fit into 3 - * bits with absolutely no headroom. So use 4 bits. - */ - unsigned int pin_count:4; -#define DRM_I915_GEM_OBJECT_MAX_PIN_COUNT 0xf -}; - struct i915_ctx_hang_stats { /* This context had batch pending when hang was declared */ unsigned batch_pending; @@ -2242,19 +2250,10 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data, /* i915_gem_gtt.c */ void i915_gem_cleanup_aliasing_ppgtt(struct drm_device *dev); -void i915_ppgtt_bind_object(struct i915_hw_ppgtt *ppgtt, - struct drm_i915_gem_object *obj, - enum i915_cache_level cache_level); -void i915_ppgtt_unbind_object(struct i915_hw_ppgtt *ppgtt, - struct drm_i915_gem_object *obj); - void i915_check_and_clear_faults(struct drm_device *dev); void i915_gem_suspend_gtt_mappings(struct drm_device *dev); void i915_gem_restore_gtt_mappings(struct drm_device *dev); int __must_check i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj); -void i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj, - enum i915_cache_level cache_level); -void i915_gem_gtt_unbind_object(struct drm_i915_gem_object *obj); void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj); void i915_gem_init_global_gtt(struct drm_device *dev); void i915_gem_setup_global_gtt(struct drm_device *dev, unsigned long start, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 6dc96bc..2b92e89 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2743,12 +2743,8 @@ int i915_vma_unbind(struct i915_vma *vma) trace_i915_vma_unbind(vma); - if (obj->has_global_gtt_mapping) - i915_gem_gtt_unbind_object(obj); - if (obj->has_aliasing_ppgtt_mapping) { - i915_ppgtt_unbind_object(dev_priv->mm.aliasing_ppgtt, obj); - obj->has_aliasing_ppgtt_mapping = 0; - } + vma->unbind_vma(vma); + i915_gem_gtt_finish_object(obj); list_del(&vma->mm_list); @@ -3479,7 +3475,6 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, enum i915_cache_level cache_level) { struct drm_device *dev = obj->base.dev; - drm_i915_private_t *dev_priv = dev->dev_private; struct i915_vma *vma; int ret; @@ -3518,11 +3513,8 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, return ret; } - if (obj->has_global_gtt_mapping) - i915_gem_gtt_bind_object(obj, cache_level); - if (obj->has_aliasing_ppgtt_mapping) - i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt, - obj, cache_level); + list_for_each_entry(vma, &obj->vma_list, vma_link) + vma->bind_vma(vma, cache_level, 0); } list_for_each_entry(vma, &obj->vma_list, vma_link) @@ -3850,6 +3842,7 @@ i915_gem_object_pin(struct drm_i915_gem_object *obj, bool map_and_fenceable, bool nonblocking) { + const u32 flags = map_and_fenceable ? GLOBAL_BIND : 0; struct i915_vma *vma; int ret; @@ -3878,20 +3871,17 @@ i915_gem_object_pin(struct drm_i915_gem_object *obj, } if (!i915_gem_obj_bound(obj, vm)) { - struct drm_i915_private *dev_priv = obj->base.dev->dev_private; - ret = i915_gem_object_bind_to_vm(obj, vm, alignment, map_and_fenceable, nonblocking); if (ret) return ret; - if (!dev_priv->mm.aliasing_ppgtt) - i915_gem_gtt_bind_object(obj, obj->cache_level); } - if (!obj->has_global_gtt_mapping && map_and_fenceable) - i915_gem_gtt_bind_object(obj, obj->cache_level); + vma = i915_gem_obj_to_vma(obj, vm); + + vma->bind_vma(vma, obj->cache_level, flags); i915_gem_obj_to_vma(obj, vm)->pin_count++; obj->pin_mappable |= map_and_fenceable; @@ -4235,41 +4225,6 @@ struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, return NULL; } -static struct i915_vma *__i915_gem_vma_create(struct drm_i915_gem_object *obj, - struct i915_address_space *vm) -{ - struct i915_vma *vma = kzalloc(sizeof(*vma), GFP_KERNEL); - if (vma == NULL) - return ERR_PTR(-ENOMEM); - - INIT_LIST_HEAD(&vma->vma_link); - INIT_LIST_HEAD(&vma->mm_list); - INIT_LIST_HEAD(&vma->exec_list); - vma->vm = vm; - vma->obj = obj; - - /* Keep GGTT vmas first to make debug easier */ - if (i915_is_ggtt(vm)) - list_add(&vma->vma_link, &obj->vma_list); - else - list_add_tail(&vma->vma_link, &obj->vma_list); - - return vma; -} - -struct i915_vma * -i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj, - struct i915_address_space *vm) -{ - struct i915_vma *vma; - - vma = i915_gem_obj_to_vma(obj, vm); - if (!vma) - vma = __i915_gem_vma_create(obj, vm); - - return vma; -} - void i915_gem_vma_destroy(struct i915_vma *vma) { WARN_ON(vma->node.allocated); diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index b061991..0640ab8 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -408,6 +408,7 @@ mi_set_context(struct intel_ring_buffer *ring, static int do_switch(struct i915_hw_context *to) { struct intel_ring_buffer *ring = to->ring; + struct drm_i915_private *dev_priv = ring->dev->dev_private; struct i915_hw_context *from = ring->last_context; u32 hw_flags = 0; int ret, i; @@ -432,8 +433,11 @@ static int do_switch(struct i915_hw_context *to) return ret; } - if (!to->obj->has_global_gtt_mapping) - i915_gem_gtt_bind_object(to->obj, to->obj->cache_level); + if (!to->obj->has_global_gtt_mapping) { + struct i915_vma *vma = i915_gem_obj_to_vma(to->obj, + &dev_priv->gtt.base); + vma->bind_vma(vma, to->obj->cache_level, GLOBAL_BIND); + } if (!to->is_initialized || is_default_context(to)) hw_flags |= MI_RESTORE_INHIBIT; diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index a2d6eb5..c093a2b 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -88,6 +88,7 @@ eb_lookup_vmas(struct eb_vmas *eb, struct i915_address_space *vm, struct drm_file *file) { + struct drm_i915_private *dev_priv = vm->dev->dev_private; struct drm_i915_gem_object *obj; struct list_head objects; int i, ret = 0; @@ -122,6 +123,15 @@ eb_lookup_vmas(struct eb_vmas *eb, i = 0; list_for_each_entry(obj, &objects, obj_exec_link) { struct i915_vma *vma; + struct i915_address_space *bind_vm = vm; + + /* If we have secure dispatch, or the userspace assures us that + * they know what they're doing, use the GGTT VM. + */ + if (exec[i].flags & EXEC_OBJECT_NEEDS_GTT || + ((args->flags & I915_EXEC_SECURE) && + (i == (args->buffer_count - 1)))) + bind_vm = &dev_priv->gtt.base; /* * NOTE: We can leak any vmas created here when something fails @@ -131,7 +141,7 @@ eb_lookup_vmas(struct eb_vmas *eb, * from the (obj, vm) we don't run the risk of creating * duplicated vmas for the same vm. */ - vma = i915_gem_obj_lookup_or_create_vma(obj, vm); + vma = i915_gem_obj_lookup_or_create_vma(obj, bind_vm); if (IS_ERR(vma)) { DRM_DEBUG("Failed to lookup VMA\n"); ret = PTR_ERR(vma); @@ -315,8 +325,8 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, if (unlikely(IS_GEN6(dev) && reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION && !target_i915_obj->has_global_gtt_mapping)) { - i915_gem_gtt_bind_object(target_i915_obj, - target_i915_obj->cache_level); + struct i915_vma *vma = i915_gem_obj_to_vma(target_i915_obj, vm); + vma->bind_vma(vma, target_i915_obj->cache_level, GLOBAL_BIND); } /* Validate that the target is in a valid r/w GPU domain */ @@ -493,11 +503,12 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, struct intel_ring_buffer *ring, bool *need_reloc) { - struct drm_i915_private *dev_priv = ring->dev->dev_private; + struct drm_i915_gem_object *obj = vma->obj; struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; bool need_fence, need_mappable; - struct drm_i915_gem_object *obj = vma->obj; + u32 flags = (entry->flags & EXEC_OBJECT_NEEDS_GTT) && + !vma->obj->has_global_gtt_mapping ? GLOBAL_BIND : 0; int ret; need_fence = @@ -526,14 +537,6 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, } } - /* Ensure ppgtt mapping exists if needed */ - if (dev_priv->mm.aliasing_ppgtt && !obj->has_aliasing_ppgtt_mapping) { - i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt, - obj, obj->cache_level); - - obj->has_aliasing_ppgtt_mapping = 1; - } - if (entry->offset != vma->node.start) { entry->offset = vma->node.start; *need_reloc = true; @@ -544,9 +547,7 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, obj->base.pending_write_domain = I915_GEM_DOMAIN_RENDER; } - if (entry->flags & EXEC_OBJECT_NEEDS_GTT && - !obj->has_global_gtt_mapping) - i915_gem_gtt_bind_object(obj, obj->cache_level); + vma->bind_vma(vma, obj->cache_level, flags); return 0; } @@ -1171,8 +1172,14 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, /* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure * batch" bit. Hence we need to pin secure batches into the global gtt. * hsw should have this fixed, but bdw mucks it up again. */ - if (flags & I915_DISPATCH_SECURE && !batch_obj->has_global_gtt_mapping) - i915_gem_gtt_bind_object(batch_obj, batch_obj->cache_level); + if (flags & I915_DISPATCH_SECURE && + !batch_obj->has_global_gtt_mapping) { + /* When we have multiple VMs, we'll need to make sure that we + * allocate space first */ + struct i915_vma *vma = i915_gem_obj_to_ggtt(batch_obj); + BUG_ON(!vma); + vma->bind_vma(vma, batch_obj->cache_level, GLOBAL_BIND); + } ret = i915_gem_execbuffer_move_to_gpu(ring, &eb->vmas); if (ret) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 0560337..73117ec 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -68,6 +68,11 @@ typedef gen8_gtt_pte_t gen8_ppgtt_pde_t; #define PPAT_CACHED_INDEX _PAGE_PAT /* WB LLCeLLC */ #define PPAT_DISPLAY_ELLC_INDEX _PAGE_PCD /* WT eLLC */ +static void ppgtt_bind_vma(struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 flags); +static void ppgtt_unbind_vma(struct i915_vma *vma); + static inline gen8_gtt_pte_t gen8_pte_encode(dma_addr_t addr, enum i915_cache_level level, bool valid) @@ -746,22 +751,26 @@ void i915_gem_cleanup_aliasing_ppgtt(struct drm_device *dev) dev_priv->mm.aliasing_ppgtt = NULL; } -void i915_ppgtt_bind_object(struct i915_hw_ppgtt *ppgtt, - struct drm_i915_gem_object *obj, - enum i915_cache_level cache_level) +static void __always_unused +ppgtt_bind_vma(struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 flags) { - ppgtt->base.insert_entries(&ppgtt->base, obj->pages, - i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT, - cache_level); + const unsigned long entry = vma->node.start >> PAGE_SHIFT; + + WARN_ON(flags); + + vma->vm->insert_entries(vma->vm, vma->obj->pages, entry, cache_level); } -void i915_ppgtt_unbind_object(struct i915_hw_ppgtt *ppgtt, - struct drm_i915_gem_object *obj) +static void __always_unused ppgtt_unbind_vma(struct i915_vma *vma) { - ppgtt->base.clear_range(&ppgtt->base, - i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT, - obj->base.size >> PAGE_SHIFT, - true); + const unsigned long entry = vma->node.start >> PAGE_SHIFT; + + vma->vm->clear_range(vma->vm, + entry, + vma->obj->base.size >> PAGE_SHIFT, + true); } extern int intel_iommu_gfx_mapped; @@ -863,8 +872,18 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev) true); list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { + struct i915_vma *vma = i915_gem_obj_to_vma(obj, + &dev_priv->gtt.base); + if (!vma) + continue; + i915_gem_clflush_object(obj, obj->pin_display); - i915_gem_gtt_bind_object(obj, obj->cache_level); + /* The bind_vma code tries to be smart about tracking mappings. + * Unfortunately above, we've just wiped out the mappings + * without telling our object about it. So we need to fake it. + */ + obj->has_global_gtt_mapping = 0; + vma->bind_vma(vma, obj->cache_level, GLOBAL_BIND); } i915_gem_chipset_flush(dev); @@ -1023,16 +1042,18 @@ static void gen6_ggtt_clear_range(struct i915_address_space *vm, readl(gtt_base); } -static void i915_ggtt_insert_entries(struct i915_address_space *vm, - struct sg_table *st, - unsigned int pg_start, - enum i915_cache_level cache_level) + +static void i915_ggtt_bind_vma(struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 unused) { + const unsigned long entry = vma->node.start >> PAGE_SHIFT; unsigned int flags = (cache_level == I915_CACHE_NONE) ? AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; - intel_gtt_insert_sg_entries(st, pg_start, flags); - + BUG_ON(!i915_is_ggtt(vma->vm)); + intel_gtt_insert_sg_entries(vma->obj->pages, entry, flags); + vma->obj->has_global_gtt_mapping = 1; } static void i915_ggtt_clear_range(struct i915_address_space *vm, @@ -1043,33 +1064,77 @@ static void i915_ggtt_clear_range(struct i915_address_space *vm, intel_gtt_clear_range(first_entry, num_entries); } +static void i915_ggtt_unbind_vma(struct i915_vma *vma) +{ + const unsigned int first = vma->node.start >> PAGE_SHIFT; + const unsigned int size = vma->obj->base.size >> PAGE_SHIFT; -void i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj, - enum i915_cache_level cache_level) + BUG_ON(!i915_is_ggtt(vma->vm)); + vma->obj->has_global_gtt_mapping = 0; + intel_gtt_clear_range(first, size); +} + +static void ggtt_bind_vma(struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 flags) { - struct drm_device *dev = obj->base.dev; + struct drm_device *dev = vma->vm->dev; struct drm_i915_private *dev_priv = dev->dev_private; - const unsigned long entry = i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT; + struct drm_i915_gem_object *obj = vma->obj; + const unsigned long entry = vma->node.start >> PAGE_SHIFT; - dev_priv->gtt.base.insert_entries(&dev_priv->gtt.base, obj->pages, - entry, - cache_level); + /* If there is no aliasing PPGTT, or the caller needs a global mapping, + * or we have a global mapping already but the cacheability flags have + * changed, set the global PTEs. + * + * If there is an aliasing PPGTT it is anecdotally faster, so use that + * instead if none of the above hold true. + * + * NB: A global mapping should only be needed for special regions like + * "gtt mappable", SNB errata, or if specified via special execbuf + * flags. At all other times, the GPU will use the aliasing PPGTT. + */ + if (!dev_priv->mm.aliasing_ppgtt || flags & GLOBAL_BIND) { + if (!obj->has_global_gtt_mapping || + (cache_level != obj->cache_level)) { + vma->vm->insert_entries(vma->vm, obj->pages, entry, + cache_level); + obj->has_global_gtt_mapping = 1; + } + } - obj->has_global_gtt_mapping = 1; + if (dev_priv->mm.aliasing_ppgtt && + (!obj->has_aliasing_ppgtt_mapping || + (cache_level != obj->cache_level))) { + struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt; + appgtt->base.insert_entries(&appgtt->base, + vma->obj->pages, entry, cache_level); + vma->obj->has_aliasing_ppgtt_mapping = 1; + } } -void i915_gem_gtt_unbind_object(struct drm_i915_gem_object *obj) +static void ggtt_unbind_vma(struct i915_vma *vma) { - struct drm_device *dev = obj->base.dev; + struct drm_device *dev = vma->vm->dev; struct drm_i915_private *dev_priv = dev->dev_private; - const unsigned long entry = i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT; - - dev_priv->gtt.base.clear_range(&dev_priv->gtt.base, - entry, - obj->base.size >> PAGE_SHIFT, - true); + struct drm_i915_gem_object *obj = vma->obj; + const unsigned long entry = vma->node.start >> PAGE_SHIFT; + + if (obj->has_global_gtt_mapping) { + vma->vm->clear_range(vma->vm, entry, + vma->obj->base.size >> PAGE_SHIFT, + true); + obj->has_global_gtt_mapping = 0; + } - obj->has_global_gtt_mapping = 0; + if (obj->has_aliasing_ppgtt_mapping) { + struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt; + appgtt->base.clear_range(&appgtt->base, + entry, + obj->base.size >> PAGE_SHIFT, + true); + obj->has_aliasing_ppgtt_mapping = 0; + } } void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj) @@ -1444,7 +1509,6 @@ static int i915_gmch_probe(struct drm_device *dev, dev_priv->gtt.do_idle_maps = needs_idle_maps(dev_priv->dev); dev_priv->gtt.base.clear_range = i915_ggtt_clear_range; - dev_priv->gtt.base.insert_entries = i915_ggtt_insert_entries; return 0; } @@ -1496,3 +1560,57 @@ int i915_gem_gtt_init(struct drm_device *dev) return 0; } + +static struct i915_vma *__i915_gem_vma_create(struct drm_i915_gem_object *obj, + struct i915_address_space *vm) +{ + struct i915_vma *vma = kzalloc(sizeof(*vma), GFP_KERNEL); + if (vma == NULL) + return ERR_PTR(-ENOMEM); + + INIT_LIST_HEAD(&vma->vma_link); + INIT_LIST_HEAD(&vma->mm_list); + INIT_LIST_HEAD(&vma->exec_list); + vma->vm = vm; + vma->obj = obj; + + switch (INTEL_INFO(vm->dev)->gen) { + case 8: + case 7: + case 6: + vma->unbind_vma = ggtt_unbind_vma; + vma->bind_vma = ggtt_bind_vma; + break; + case 5: + case 4: + case 3: + case 2: + BUG_ON(!i915_is_ggtt(vm)); + vma->unbind_vma = i915_ggtt_unbind_vma; + vma->bind_vma = i915_ggtt_bind_vma; + break; + default: + BUG(); + } + + /* Keep GGTT vmas first to make debug easier */ + if (i915_is_ggtt(vm)) + list_add(&vma->vma_link, &obj->vma_list); + else + list_add_tail(&vma->vma_link, &obj->vma_list); + + return vma; +} + +struct i915_vma * +i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj, + struct i915_address_space *vm) +{ + struct i915_vma *vma; + + vma = i915_gem_obj_to_vma(obj, vm); + if (!vma) + vma = __i915_gem_vma_create(obj, vm); + + return vma; +} -- cgit v1.1 From 3e7a032295f178d1db4e4b9ac25b6d6bc6d5826e Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Fri, 6 Dec 2013 14:10:57 -0800 Subject: drm/i915: Remove vm arg from relocate entry The only place we were using it was for GEN6, which won't have PPGTT support anyway (ie. the VM is always the same). To clear things up, (it only added confusion for me since it doesn't allow us to assert vma->vm is what we always want, when just looking at the code). Signed-off-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index c093a2b..0999981 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -300,8 +300,7 @@ relocate_entry_gtt(struct drm_i915_gem_object *obj, static int i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, struct eb_vmas *eb, - struct drm_i915_gem_relocation_entry *reloc, - struct i915_address_space *vm) + struct drm_i915_gem_relocation_entry *reloc) { struct drm_device *dev = obj->base.dev; struct drm_gem_object *target_obj; @@ -325,7 +324,9 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, if (unlikely(IS_GEN6(dev) && reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION && !target_i915_obj->has_global_gtt_mapping)) { - struct i915_vma *vma = i915_gem_obj_to_vma(target_i915_obj, vm); + struct i915_vma *vma = + list_first_entry(&target_i915_obj->vma_list, + typeof(*vma), vma_link); vma->bind_vma(vma, target_i915_obj->cache_level, GLOBAL_BIND); } @@ -424,8 +425,7 @@ i915_gem_execbuffer_relocate_vma(struct i915_vma *vma, do { u64 offset = r->presumed_offset; - ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, r, - vma->vm); + ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, r); if (ret) return ret; @@ -454,8 +454,7 @@ i915_gem_execbuffer_relocate_vma_slow(struct i915_vma *vma, int i, ret; for (i = 0; i < entry->relocation_count; i++) { - ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, &relocs[i], - vma->vm); + ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, &relocs[i]); if (ret) return ret; } -- cgit v1.1 From e422b888ebda24f8aeeece032875c640acba2cdc Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Fri, 6 Dec 2013 14:10:58 -0800 Subject: drm/i915: Add a context open function We'll be doing a bit more stuff with each file, so having our own open function should make things clean. This also allows us to easily add conditionals for stuff we don't want to do when we don't have HW contexts. Signed-off-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gem.c | 7 +++++-- drivers/gpu/drm/i915/i915_gem_context.c | 15 +++++++++++++++ 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 9fe078b..2c0115e 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2225,6 +2225,7 @@ i915_gem_obj_ggtt_pin(struct drm_i915_gem_object *obj, /* i915_gem_context.c */ int __must_check i915_gem_context_init(struct drm_device *dev); void i915_gem_context_fini(struct drm_device *dev); +int i915_gem_context_open(struct drm_device *dev, struct drm_file *file); void i915_gem_context_close(struct drm_device *dev, struct drm_file *file); int i915_switch_context(struct intel_ring_buffer *ring, struct drm_file *file, int to_id); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 2b92e89..254f575 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4859,6 +4859,7 @@ i915_gem_file_idle_work_handler(struct work_struct *work) int i915_gem_open(struct drm_device *dev, struct drm_file *file) { struct drm_i915_file_private *file_priv; + int ret; DRM_DEBUG_DRIVER("\n"); @@ -4874,9 +4875,11 @@ int i915_gem_open(struct drm_device *dev, struct drm_file *file) INIT_DELAYED_WORK(&file_priv->mm.idle_work, i915_gem_file_idle_work_handler); - idr_init(&file_priv->context_idr); + ret = i915_gem_context_open(dev, file); + if (ret) + kfree(file_priv); - return 0; + return ret; } static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 0640ab8..2ae6e4f 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -341,10 +341,25 @@ i915_gem_context_get_hang_stats(struct drm_device *dev, return &ctx->hang_stats; } +int i915_gem_context_open(struct drm_device *dev, struct drm_file *file) +{ + struct drm_i915_file_private *file_priv = file->driver_priv; + + if (!HAS_HW_CONTEXTS(dev)) + return 0; + + idr_init(&file_priv->context_idr); + + return 0; +} + void i915_gem_context_close(struct drm_device *dev, struct drm_file *file) { struct drm_i915_file_private *file_priv = file->driver_priv; + if (!HAS_HW_CONTEXTS(dev)) + return; + mutex_lock(&dev->struct_mutex); idr_for_each(&file_priv->context_idr, context_idr_cleanup, NULL); idr_destroy(&file_priv->context_idr); -- cgit v1.1 From b731d33d05dd5ce6b387cbadb0d9d24cb3732b40 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Fri, 6 Dec 2013 14:10:59 -0800 Subject: drm/i915: relax context alignment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With the introduction of contexts per fd in the future, one can easily envision more contexts being used. We do not have an easy remedy to reduce the space requirements of the contexts, we can make things slightly better by using less stringent alignments on later hardware. Ville: Since I can almost predict you'll point this out. I can no longer find the docs which specify the 64k requirement on certain gen6 SKUs. If you'd like to change that too, be my guest. CC: Ville Syrjälä Signed-off-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_context.c | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 2ae6e4f..4041370 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -93,12 +93,21 @@ * I've seen in a spec to date, and that was a workaround for a non-shipping * part. It should be safe to decrease this, but it's more future proof as is. */ -#define CONTEXT_ALIGN (64<<10) +#define GEN6_CONTEXT_ALIGN (64<<10) +#define GEN7_CONTEXT_ALIGN 4096 static struct i915_hw_context * i915_gem_context_get(struct drm_i915_file_private *file_priv, u32 id); static int do_switch(struct i915_hw_context *to); +static size_t get_context_alignment(struct drm_device *dev) +{ + if (IS_GEN6(dev)) + return GEN6_CONTEXT_ALIGN; + + return GEN7_CONTEXT_ALIGN; +} + static int get_context_size(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; @@ -206,14 +215,15 @@ static inline bool is_default_context(struct i915_hw_context *ctx) * context state of the GPU for applications that don't utilize HW contexts, as * well as an idle case. */ -static int create_default_context(struct drm_i915_private *dev_priv) +static int create_default_context(struct drm_device *dev) { + struct drm_i915_private *dev_priv = dev->dev_private; struct i915_hw_context *ctx; int ret; - BUG_ON(!mutex_is_locked(&dev_priv->dev->struct_mutex)); + BUG_ON(!mutex_is_locked(&dev->struct_mutex)); - ctx = create_hw_context(dev_priv->dev, NULL); + ctx = create_hw_context(dev, NULL); if (IS_ERR(ctx)) return PTR_ERR(ctx); @@ -223,7 +233,8 @@ static int create_default_context(struct drm_i915_private *dev_priv) * may not be available. To avoid this we always pin the * default context. */ - ret = i915_gem_obj_ggtt_pin(ctx->obj, CONTEXT_ALIGN, false, false); + ret = i915_gem_obj_ggtt_pin(ctx->obj, get_context_alignment(dev), + false, false); if (ret) { DRM_DEBUG_DRIVER("Couldn't pin %d\n", ret); goto err_destroy; @@ -266,7 +277,7 @@ int i915_gem_context_init(struct drm_device *dev) return -E2BIG; } - ret = create_default_context(dev_priv); + ret = create_default_context(dev); if (ret) { DRM_DEBUG_DRIVER("Disabling HW Contexts; create failed %d\n", ret); @@ -433,7 +444,8 @@ static int do_switch(struct i915_hw_context *to) if (from == to && !to->remap_slice) return 0; - ret = i915_gem_obj_ggtt_pin(to->obj, CONTEXT_ALIGN, false, false); + ret = i915_gem_obj_ggtt_pin(to->obj, get_context_alignment(ring->dev), + false, false); if (ret) return ret; -- cgit v1.1 From ca01b12b401a0e17a265a5ee18bf33e2cfbd32aa Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Fri, 6 Dec 2013 14:11:00 -0800 Subject: drm/i915: Simplify ring handling in execbuf Signed-off-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 41 ++++++++---------------------- 1 file changed, 10 insertions(+), 31 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 0999981..dfe7cb9 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1006,41 +1006,20 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, if (args->flags & I915_EXEC_IS_PINNED) flags |= I915_DISPATCH_PINNED; - switch (args->flags & I915_EXEC_RING_MASK) { - case I915_EXEC_DEFAULT: - case I915_EXEC_RENDER: - ring = &dev_priv->ring[RCS]; - break; - case I915_EXEC_BSD: - ring = &dev_priv->ring[VCS]; - if (ctx_id != DEFAULT_CONTEXT_ID) { - DRM_DEBUG("Ring %s doesn't support contexts\n", - ring->name); - return -EPERM; - } - break; - case I915_EXEC_BLT: - ring = &dev_priv->ring[BCS]; - if (ctx_id != DEFAULT_CONTEXT_ID) { - DRM_DEBUG("Ring %s doesn't support contexts\n", - ring->name); - return -EPERM; - } - break; - case I915_EXEC_VEBOX: - ring = &dev_priv->ring[VECS]; - if (ctx_id != DEFAULT_CONTEXT_ID) { - DRM_DEBUG("Ring %s doesn't support contexts\n", - ring->name); - return -EPERM; - } - break; - - default: + if ((args->flags & I915_EXEC_RING_MASK) > I915_NUM_RINGS) { DRM_DEBUG("execbuf with unknown ring: %d\n", (int)(args->flags & I915_EXEC_RING_MASK)); return -EINVAL; } + if (ctx_id != DEFAULT_CONTEXT_ID && + (args->flags & I915_EXEC_RING_MASK) > I915_EXEC_RENDER) + return -EPERM; + + if ((args->flags & I915_EXEC_RING_MASK) == I915_EXEC_DEFAULT) + ring = &dev_priv->ring[RCS]; + else + ring = &dev_priv->ring[(args->flags & I915_EXEC_RING_MASK) - 1]; + if (!intel_ring_initialized(ring)) { DRM_DEBUG("execbuf with invalid ring: %d\n", (int)(args->flags & I915_EXEC_RING_MASK)); -- cgit v1.1 From 67e3d2979be1bf42d1818b2961c671eb31e0b4d9 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Fri, 6 Dec 2013 14:11:01 -0800 Subject: drm/i915: Permit contexts on all rings If we want to use contexts in more abstract terms (specifically with PPGTT in mind), we need to allow them to be specified for any ring. Since the upcoming patches will bring about the use of multiple address spaces, and each ring needs to have an address space programmed (which we intend to do at context switch time), we can no longer only use RCS. With multiple rings having a last context, we must now unreference these contexts. NOTE: This commit requires an update to intel-gpu-tools to make it not fail. v2: Rebased with some logical conflicts. Squashed in the context fini refcount patch Signed-off-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_context.c | 54 +++++++++++++++++++++++------- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 3 -- 2 files changed, 42 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 4041370..7a5311c 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -98,7 +98,8 @@ static struct i915_hw_context * i915_gem_context_get(struct drm_i915_file_private *file_priv, u32 id); -static int do_switch(struct i915_hw_context *to); +static int do_switch(struct intel_ring_buffer *ring, + struct i915_hw_context *to); static size_t get_context_alignment(struct drm_device *dev) { @@ -240,7 +241,7 @@ static int create_default_context(struct drm_device *dev) goto err_destroy; } - ret = do_switch(ctx); + ret = do_switch(&dev_priv->ring[RCS], ctx); if (ret) { DRM_DEBUG_DRIVER("Switch failed %d\n", ret); goto err_unpin; @@ -261,7 +262,8 @@ err_destroy: int i915_gem_context_init(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; - int ret; + struct intel_ring_buffer *ring; + int i, ret; if (!HAS_HW_CONTEXTS(dev)) return 0; @@ -284,6 +286,16 @@ int i915_gem_context_init(struct drm_device *dev) return ret; } + for (i = RCS + 1; i < I915_NUM_RINGS; i++) { + if (!(INTEL_INFO(dev)->ring_mask & (1<ring[i]; + + /* NB: RCS will hold a ref for all rings */ + ring->default_context = dev_priv->ring[RCS].default_context; + } + DRM_DEBUG_DRIVER("HW context support initialized\n"); return 0; } @@ -292,6 +304,7 @@ void i915_gem_context_fini(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; struct i915_hw_context *dctx = dev_priv->ring[RCS].default_context; + int i; if (!HAS_HW_CONTEXTS(dev)) return; @@ -313,12 +326,22 @@ void i915_gem_context_fini(struct drm_device *dev) WARN_ON(dctx->obj->active); i915_gem_object_ggtt_unpin(dctx->obj); i915_gem_context_unreference(dctx); + dev_priv->ring[RCS].last_context = NULL; + } + + for (i = 0; i < I915_NUM_RINGS; i++) { + struct intel_ring_buffer *ring = &dev_priv->ring[i]; + if (!(INTEL_INFO(dev)->ring_mask & (1<last_context) + i915_gem_context_unreference(ring->last_context); + + ring->default_context = NULL; } i915_gem_object_ggtt_unpin(dctx->obj); i915_gem_context_unreference(dctx); - dev_priv->ring[RCS].default_context = NULL; - dev_priv->ring[RCS].last_context = NULL; } static int context_idr_cleanup(int id, void *p, void *data) @@ -431,19 +454,28 @@ mi_set_context(struct intel_ring_buffer *ring, return ret; } -static int do_switch(struct i915_hw_context *to) +static int do_switch(struct intel_ring_buffer *ring, + struct i915_hw_context *to) { - struct intel_ring_buffer *ring = to->ring; struct drm_i915_private *dev_priv = ring->dev->dev_private; struct i915_hw_context *from = ring->last_context; u32 hw_flags = 0; int ret, i; - BUG_ON(from != NULL && from->obj != NULL && !i915_gem_obj_is_pinned(from->obj)); + if (from != NULL && ring == &dev_priv->ring[RCS]) { + BUG_ON(from->obj == NULL); + BUG_ON(!i915_gem_obj_is_pinned(from->obj)); + } if (from == to && !to->remap_slice) return 0; + if (ring != &dev_priv->ring[RCS]) { + if (from) + i915_gem_context_unreference(from); + goto done; + } + ret = i915_gem_obj_ggtt_pin(to->obj, get_context_alignment(ring->dev), false, false); if (ret) @@ -511,6 +543,7 @@ static int do_switch(struct i915_hw_context *to) i915_gem_context_unreference(from); } +done: i915_gem_context_reference(to); ring->last_context = to; to->is_initialized = true; @@ -541,9 +574,6 @@ int i915_switch_context(struct intel_ring_buffer *ring, WARN_ON(!mutex_is_locked(&dev_priv->dev->struct_mutex)); - if (ring != &dev_priv->ring[RCS]) - return 0; - if (to_id == DEFAULT_CONTEXT_ID) { to = ring->default_context; } else { @@ -555,7 +585,7 @@ int i915_switch_context(struct intel_ring_buffer *ring, return -ENOENT; } - return do_switch(to); + return do_switch(ring, to); } int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index dfe7cb9..d608a07 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1011,9 +1011,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, (int)(args->flags & I915_EXEC_RING_MASK)); return -EINVAL; } - if (ctx_id != DEFAULT_CONTEXT_ID && - (args->flags & I915_EXEC_RING_MASK) > I915_EXEC_RENDER) - return -EPERM; if ((args->flags & I915_EXEC_RING_MASK) == I915_EXEC_DEFAULT) ring = &dev_priv->ring[RCS]; -- cgit v1.1 From 0009e46cd54324c4af20b0b52b89973b1b914167 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Fri, 6 Dec 2013 14:11:02 -0800 Subject: drm/i915: Track which ring a context ran on Previously we dropped the association of a context to a ring. It is however very important to know which ring a context ran on (we could have reused the other member, but I was nitpicky). This is very important when we switch address spaces, which unlike context objects, do change per ring. As an example, if we have: RCS BCS ctx A ctx A ctx B ctx B Without tracking the last ring B ran on, we wouldn't know to switch the address space on BCS in the last row. As a result, we no longer need to track which ring a context "belongs" to, as it never really made much sense anyway. Signed-off-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/i915_gem_context.c | 12 +++++------- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 2c0115e..2b16c29 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -696,7 +696,7 @@ struct i915_hw_context { bool is_initialized; uint8_t remap_slice; struct drm_i915_file_private *file_priv; - struct intel_ring_buffer *ring; + struct intel_ring_buffer *last_ring; struct drm_i915_gem_object *obj; struct i915_ctx_hang_stats hang_stats; diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 7a5311c..5f8bc06e 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -176,11 +176,6 @@ create_hw_context(struct drm_device *dev, goto err_out; } - /* The ring associated with the context object is handled by the normal - * object tracking code. We give an initial ring value simple to pass an - * assertion in the context switch code. - */ - ctx->ring = &dev_priv->ring[RCS]; list_add_tail(&ctx->link, &dev_priv->context_list); /* Default context will never have a file_priv */ @@ -208,7 +203,8 @@ err_out: static inline bool is_default_context(struct i915_hw_context *ctx) { - return (ctx == ctx->ring->default_context); + /* Cheap trick to determine default contexts */ + return ctx->file_priv ? false : true; } /** @@ -338,6 +334,7 @@ void i915_gem_context_fini(struct drm_device *dev) i915_gem_context_unreference(ring->last_context); ring->default_context = NULL; + ring->last_context = NULL; } i915_gem_object_ggtt_unpin(dctx->obj); @@ -467,7 +464,7 @@ static int do_switch(struct intel_ring_buffer *ring, BUG_ON(!i915_gem_obj_is_pinned(from->obj)); } - if (from == to && !to->remap_slice) + if (from == to && from->last_ring == ring && !to->remap_slice) return 0; if (ring != &dev_priv->ring[RCS]) { @@ -547,6 +544,7 @@ done: i915_gem_context_reference(to); ring->last_context = to; to->is_initialized = true; + to->last_ring = ring; return 0; } -- cgit v1.1 From acce9ffa4807027965ebd948456fa8385bbee32e Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Fri, 6 Dec 2013 14:11:03 -0800 Subject: drm/i915: Better reset handling for contexts This patch adds to changes for contexts on reset: Sets last context to default - this will prevent the context switch happening after a reset. That switch is not possible because the rings are hung during reset and context switch requires reset. This behavior will need to be reworked in the future, but this is what we want for now. In the future, we'll also want to reset the guilty context to uninitialized. We should wait for ARB_Robustness related code to land for that. This is somewhat for paranoia. Because we really don't know what the GPU was doing when it hung, or the state it was in (mid context write, for example), later restoring the context is a bad idea. By setting the flag to not initialized, the next load of that context will not restore the state, and thus on the subsequent switch away from the context will overwrite the old data. NOTE: This code needs a fixup when we actually have multiple VMs. The issue that can occur is inactive objects in a VM will need to be destroyed before the last context unref. This can now happen via the fake switch introduced in this patch (and it other ways in the future) Signed-off-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gem.c | 2 ++ drivers/gpu/drm/i915/i915_gem_context.c | 43 +++++++++++++++++++++++++++++++++ 3 files changed, 46 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 2b16c29..40acdde 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2225,6 +2225,7 @@ i915_gem_obj_ggtt_pin(struct drm_i915_gem_object *obj, /* i915_gem_context.c */ int __must_check i915_gem_context_init(struct drm_device *dev); void i915_gem_context_fini(struct drm_device *dev); +void i915_gem_context_reset(struct drm_device *dev); int i915_gem_context_open(struct drm_device *dev, struct drm_file *file); void i915_gem_context_close(struct drm_device *dev, struct drm_file *file); int i915_switch_context(struct intel_ring_buffer *ring, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 254f575..fe17c62 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2412,6 +2412,8 @@ void i915_gem_reset(struct drm_device *dev) i915_gem_cleanup_ringbuffer(dev); + i915_gem_context_reset(dev); + i915_gem_restore_fences(dev); } diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 5f8bc06e..509e460 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -255,6 +255,49 @@ err_destroy: return ret; } +void i915_gem_context_reset(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_ring_buffer *ring; + int i; + + if (!HAS_HW_CONTEXTS(dev)) + return; + + /* Prevent the hardware from restoring the last context (which hung) on + * the next switch */ + for (i = 0; i < I915_NUM_RINGS; i++) { + struct i915_hw_context *dctx; + if (!(INTEL_INFO(dev)->ring_mask & (1<ring[i]; + dctx = ring->default_context; + if (WARN_ON(!dctx)) + continue; + + if (!ring->last_context) + continue; + + if (ring->last_context == dctx) + continue; + + if (i == RCS) { + WARN_ON(i915_gem_obj_ggtt_pin(dctx->obj, + get_context_alignment(dev), + false, false)); + /* Fake a finish/inactive */ + dctx->obj->base.write_domain = 0; + dctx->obj->active = 0; + } + + i915_gem_context_unreference(ring->last_context); + i915_gem_context_reference(dctx); + ring->last_context = dctx; + } +} + int i915_gem_context_init(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; -- cgit v1.1 From 2fa48d8d4a0b09ec397a57a0f5717eddea8fb009 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Fri, 6 Dec 2013 14:11:04 -0800 Subject: drm/i915: Split context enabling from init We **need** to do this for exactly 1 reason, because we want to embed a PPGTT into the context, but we don't want to special case the default context. To achieve that, we must be able to initialize contexts after the GTT is setup (so we can allocate and pin the default context's BO), but before the PPGTT and rings are initialized. This is because, currently, context initialization requires ring usage. We don't have rings until after the GTT is setup. If we split the enabling part of context initialization, the part requiring the ringbuffer, we can untangle this, and then later embed the PPGTT Incidentally this allows us to also adhere to the original design of context init/fini in future patches: they were only ever meant to be called at driver load and unload. v2: Move hw_contexts_disabled test in i915_gem_context_enable() (Chris) v3: BUG_ON after checking for disabled contexts. Or else it blows up pre gen6 (Ben) v4: Forward port Modified enable for each ring, since that patch is earlier in the series Dropped ring arg from create_default_context so it can be used by others Signed-off-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gem.c | 24 ++++++++++++++++------ drivers/gpu/drm/i915/i915_gem_context.c | 35 +++++++++++++++++++++++---------- 3 files changed, 44 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 40acdde..61c0f5c 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2227,6 +2227,7 @@ int __must_check i915_gem_context_init(struct drm_device *dev); void i915_gem_context_fini(struct drm_device *dev); void i915_gem_context_reset(struct drm_device *dev); int i915_gem_context_open(struct drm_device *dev, struct drm_file *file); +int i915_gem_context_enable(struct drm_i915_private *dev_priv); void i915_gem_context_close(struct drm_device *dev, struct drm_file *file); int i915_switch_context(struct intel_ring_buffer *ring, struct drm_file *file, int to_id); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index fe17c62..e3431e7 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4433,14 +4433,16 @@ i915_gem_init_hw(struct drm_device *dev) i915_gem_l3_remap(&dev_priv->ring[RCS], i); /* - * XXX: There was some w/a described somewhere suggesting loading - * contexts before PPGTT. + * XXX: Contexts should only be initialized once. Doing a switch to the + * default context switch however is something we'd like to do after + * reset or thaw (the latter may not actually be necessary for HW, but + * goes with our code better). Context switching requires rings (for + * the do_switch), but before enabling PPGTT. So don't move this. */ - ret = i915_gem_context_init(dev); + ret = i915_gem_context_enable(dev_priv); if (ret) { - i915_gem_cleanup_ringbuffer(dev); - DRM_ERROR("Context initialization failed %d\n", ret); - return ret; + DRM_ERROR("Context enable failed %d\n", ret); + goto err_out; } if (dev_priv->mm.aliasing_ppgtt) { @@ -4448,10 +4450,15 @@ i915_gem_init_hw(struct drm_device *dev) if (ret) { i915_gem_cleanup_aliasing_ppgtt(dev); DRM_INFO("PPGTT enable failed. This is not fatal, but unexpected\n"); + ret = 0; } } return 0; + +err_out: + i915_gem_cleanup_ringbuffer(dev); + return ret; } int i915_gem_init(struct drm_device *dev) @@ -4470,9 +4477,14 @@ int i915_gem_init(struct drm_device *dev) i915_gem_init_global_gtt(dev); + ret = i915_gem_context_init(dev); + if (ret) + return ret; + ret = i915_gem_init_hw(dev); mutex_unlock(&dev->struct_mutex); if (ret) { + i915_gem_context_fini(dev); i915_gem_cleanup_aliasing_ppgtt(dev); drm_mm_takedown(&dev_priv->gtt.base.mm); return ret; diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 509e460..08e48b2 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -237,19 +237,11 @@ static int create_default_context(struct drm_device *dev) goto err_destroy; } - ret = do_switch(&dev_priv->ring[RCS], ctx); - if (ret) { - DRM_DEBUG_DRIVER("Switch failed %d\n", ret); - goto err_unpin; - } - dev_priv->ring[RCS].default_context = ctx; DRM_DEBUG_DRIVER("Default HW context loaded\n"); return 0; -err_unpin: - i915_gem_object_ggtt_unpin(ctx->obj); err_destroy: i915_gem_context_unreference(ctx); return ret; @@ -307,8 +299,9 @@ int i915_gem_context_init(struct drm_device *dev) if (!HAS_HW_CONTEXTS(dev)) return 0; - /* If called from reset, or thaw... we've been here already */ - if (dev_priv->ring[RCS].default_context) + /* Init should only be called once per module load. Eventually the + * restriction on the context_disabled check can be loosened. */ + if (WARN_ON(dev_priv->ring[RCS].default_context)) return 0; dev_priv->hw_context_size = round_up(get_context_size(dev), 4096); @@ -384,6 +377,28 @@ void i915_gem_context_fini(struct drm_device *dev) i915_gem_context_unreference(dctx); } +int i915_gem_context_enable(struct drm_i915_private *dev_priv) +{ + struct intel_ring_buffer *ring; + int ret, i; + + if (!HAS_HW_CONTEXTS(dev_priv->dev)) + return 0; + + /* FIXME: We should make this work, even in reset */ + if (i915_reset_in_progress(&dev_priv->gpu_error)) + return 0; + + BUG_ON(!dev_priv->ring[RCS].default_context); + for_each_ring(ring, dev_priv, i) { + ret = do_switch(ring, ring->default_context); + if (ret) + return ret; + } + + return 0; +} + static int context_idr_cleanup(int id, void *p, void *data) { struct i915_hw_context *ctx = p; -- cgit v1.1 From a45d0f6a7fbfcffeb76b8910ee166affcb4b8229 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Fri, 6 Dec 2013 14:11:05 -0800 Subject: drm/i915: Generalize default context setup The plan to to make every file descriptor have a default context. To accommodate this, generalize out default context setup function so it can be used at file open time. Signed-off-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_context.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 08e48b2..149cf00 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -212,9 +212,9 @@ static inline bool is_default_context(struct i915_hw_context *ctx) * context state of the GPU for applications that don't utilize HW contexts, as * well as an idle case. */ -static int create_default_context(struct drm_device *dev) +static struct i915_hw_context * +create_default_context(struct drm_device *dev) { - struct drm_i915_private *dev_priv = dev->dev_private; struct i915_hw_context *ctx; int ret; @@ -222,7 +222,7 @@ static int create_default_context(struct drm_device *dev) ctx = create_hw_context(dev, NULL); if (IS_ERR(ctx)) - return PTR_ERR(ctx); + return ctx; /* We may need to do things with the shrinker which require us to * immediately switch back to the default context. This can cause a @@ -237,14 +237,12 @@ static int create_default_context(struct drm_device *dev) goto err_destroy; } - dev_priv->ring[RCS].default_context = ctx; - DRM_DEBUG_DRIVER("Default HW context loaded\n"); - return 0; + return ctx; err_destroy: i915_gem_context_unreference(ctx); - return ret; + return ERR_PTR(ret); } void i915_gem_context_reset(struct drm_device *dev) @@ -294,7 +292,7 @@ int i915_gem_context_init(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; struct intel_ring_buffer *ring; - int i, ret; + int i; if (!HAS_HW_CONTEXTS(dev)) return 0; @@ -311,11 +309,12 @@ int i915_gem_context_init(struct drm_device *dev) return -E2BIG; } - ret = create_default_context(dev); - if (ret) { - DRM_DEBUG_DRIVER("Disabling HW Contexts; create failed %d\n", - ret); - return ret; + + dev_priv->ring[RCS].default_context = create_default_context(dev); + if (IS_ERR_OR_NULL(dev_priv->ring[RCS].default_context)) { + DRM_DEBUG_DRIVER("Disabling HW Contexts; create failed %ld\n", + PTR_ERR(dev_priv->ring[RCS].default_context)); + return PTR_ERR(dev_priv->ring[RCS].default_context); } for (i = RCS + 1; i < I915_NUM_RINGS; i++) { -- cgit v1.1 From a3d67d2396e1d8563dbd420a427bed704bcaff09 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Fri, 6 Dec 2013 14:11:06 -0800 Subject: drm/i915: PPGTT vfuncs should take a ppgtt argument Signed-off-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 3 ++- drivers/gpu/drm/i915/i915_gem.c | 4 +++- drivers/gpu/drm/i915/i915_gem_gtt.c | 8 ++++---- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 61c0f5c..3d26c4c 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -671,7 +671,8 @@ struct i915_hw_ppgtt { dma_addr_t *pt_dma_addr; dma_addr_t *gen8_pt_dma_addr[4]; }; - int (*enable)(struct drm_device *dev); + + int (*enable)(struct i915_hw_ppgtt *ppgtt); }; struct i915_ctx_hang_stats { diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index e3431e7..cc1ac79 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4404,6 +4404,7 @@ int i915_gem_init_hw(struct drm_device *dev) { drm_i915_private_t *dev_priv = dev->dev_private; + struct i915_hw_ppgtt *ppgtt; int ret, i; if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt()) @@ -4446,7 +4447,8 @@ i915_gem_init_hw(struct drm_device *dev) } if (dev_priv->mm.aliasing_ppgtt) { - ret = dev_priv->mm.aliasing_ppgtt->enable(dev); + ppgtt = dev_priv->mm.aliasing_ppgtt; + ret = ppgtt->enable(ppgtt); if (ret) { i915_gem_cleanup_aliasing_ppgtt(dev); DRM_INFO("PPGTT enable failed. This is not fatal, but unexpected\n"); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 73117ec..976bc1e 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -230,11 +230,11 @@ static int gen8_write_pdp(struct intel_ring_buffer *ring, unsigned entry, return 0; } -static int gen8_ppgtt_enable(struct drm_device *dev) +static int gen8_ppgtt_enable(struct i915_hw_ppgtt *ppgtt) { + struct drm_device *dev = ppgtt->base.dev; struct drm_i915_private *dev_priv = dev->dev_private; struct intel_ring_buffer *ring; - struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; int i, j, ret; /* bit of a hack to find the actual last used pd */ @@ -491,12 +491,12 @@ static void gen6_write_pdes(struct i915_hw_ppgtt *ppgtt) readl(pd_addr); } -static int gen6_ppgtt_enable(struct drm_device *dev) +static int gen6_ppgtt_enable(struct i915_hw_ppgtt *ppgtt) { + struct drm_device *dev = ppgtt->base.dev; drm_i915_private_t *dev_priv = dev->dev_private; uint32_t pd_offset; struct intel_ring_buffer *ring; - struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; int i; BUG_ON(ppgtt->pd_offset & 0x3f); -- cgit v1.1 From c8d4c0d6683d9270c3f1b69b73848f6fadf0d78b Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Fri, 6 Dec 2013 14:11:07 -0800 Subject: drm/i915: Use drm_mm for PPGTT PDEs When PPGTT support was originally enabled, it was only designed to support 1 PPGTT. It therefore made sense to simply hide the GGTT space required to enable this from the drm_mm allocator. Since we intend to support full PPGTT, which means more than 1, and they can be created and destroyed ad hoc it will be required to use the proper allocation techniques we already have. The first step here is to make the existing single PPGTT use the allocator. The astute observer will notice that we are reserving space in the GGTT for the PDEs for the lifetime of the address space, and would be right to question whether or not this is a good idea. It does not make a difference with this current patch only the aliasing PPGTT (indeed the PDEs should still be hidden from the shrinker). For the future, we are allocating from top to bottom to avoid using the precious "gtt space" The GGTT space at that point should only be used for scanout, HW contexts, ringbuffers, HWSP, PDEs, and a couple of other small buffers (potentially) used by the kernel. Everything else should be mapped into a PPGTT. To put the consumption in more tangible terms, it takes approximately 4 sets of PDEs to equal one 19x10 framebuffer (with no fancy stride or alignment constraints). 3/4 of the total [average] GGTT can be used for PDEs, and hopefully never touch the 1/4 that the framebuffer needs. The astute, and persistent observer might ask about the page tables which are also pinned for the address space. This waste is unfortunate. We use 2MB of memory per address space. We leave wrapping the PDEs as a real GEM object as a TODO. v2: Align PDEs to 64b in GTT Allocate the node dynamically so we can use drm_mm_put_block Now tested on IGT Allocate node at the top to avoid fragmentation (Chris) v3: Use Chris' top down allocator v4: Embed drm_mm_node into ppgtt struct (Jesse) Remove hunks which didn't belong (Jesse) v5: Don't subtract guard page since we now killed the guard page prior to this patch. (Ben) v6: Rebased and removed guard page stuff. Added a chunk to the commit message Allow adding a context to mappable region v7: Undo v3, so we can make the drm patch last in the series Cc: Chris Wilson Reviewed-by: Jesse Barnes (v4) Signed-off-by: Ben Widawsky squash: drm/i915: allow PPGTT to use mappable Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gem_gtt.c | 56 ++++++++++++++++++++----------------- 2 files changed, 32 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 3d26c4c..ab65308 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -655,6 +655,7 @@ struct i915_gtt { struct i915_hw_ppgtt { struct i915_address_space base; + struct drm_mm_node node; unsigned num_pd_entries; union { struct page **pt_pages; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 976bc1e..926b2a6 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -618,6 +618,7 @@ static void gen6_ppgtt_cleanup(struct i915_address_space *vm) int i; drm_mm_takedown(&ppgtt->base.mm); + drm_mm_remove_node(&ppgtt->node); if (ppgtt->pt_dma_addr) { for (i = 0; i < ppgtt->num_pd_entries; i++) @@ -635,16 +636,27 @@ static void gen6_ppgtt_cleanup(struct i915_address_space *vm) static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) { +#define GEN6_PD_ALIGN (PAGE_SIZE * 16) +#define GEN6_PD_SIZE (GEN6_PPGTT_PD_ENTRIES * PAGE_SIZE) struct drm_device *dev = ppgtt->base.dev; struct drm_i915_private *dev_priv = dev->dev_private; - unsigned first_pd_entry_in_global_pt; - int i; - int ret = -ENOMEM; + int i, ret; - /* ppgtt PDEs reside in the global gtt pagetable, which has 512*1024 - * entries. For aliasing ppgtt support we just steal them at the end for - * now. */ - first_pd_entry_in_global_pt = gtt_total_entries(dev_priv->gtt); + /* PPGTT PDEs reside in the GGTT and consists of 512 entries. The + * allocator works in address space sizes, so it's multiplied by page + * size. We allocate at the top of the GTT to avoid fragmentation. + */ + BUG_ON(!drm_mm_initialized(&dev_priv->gtt.base.mm)); + ret = drm_mm_insert_node_in_range_generic(&dev_priv->gtt.base.mm, + &ppgtt->node, GEN6_PD_SIZE, + GEN6_PD_ALIGN, 0, + 0, dev_priv->gtt.base.total, + DRM_MM_SEARCH_DEFAULT); + if (ret) + return ret; + + if (ppgtt->node.start < dev_priv->gtt.mappable_end) + DRM_DEBUG("Forced to use aperture for PDEs\n"); ppgtt->base.pte_encode = dev_priv->gtt.base.pte_encode; ppgtt->num_pd_entries = GEN6_PPGTT_PD_ENTRIES; @@ -657,8 +669,10 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) ppgtt->base.total = GEN6_PPGTT_PD_ENTRIES * I915_PPGTT_PT_ENTRIES * PAGE_SIZE; ppgtt->pt_pages = kcalloc(ppgtt->num_pd_entries, sizeof(struct page *), GFP_KERNEL); - if (!ppgtt->pt_pages) + if (!ppgtt->pt_pages) { + drm_mm_remove_node(&ppgtt->node); return -ENOMEM; + } for (i = 0; i < ppgtt->num_pd_entries; i++) { ppgtt->pt_pages[i] = alloc_page(GFP_KERNEL); @@ -688,7 +702,11 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) ppgtt->base.clear_range(&ppgtt->base, 0, ppgtt->num_pd_entries * I915_PPGTT_PT_ENTRIES, true); - ppgtt->pd_offset = first_pd_entry_in_global_pt * sizeof(gen6_gtt_pte_t); + DRM_DEBUG_DRIVER("Allocated pde space (%ldM) at GTT entry: %lx\n", + ppgtt->node.size >> 20, + ppgtt->node.start / PAGE_SIZE); + ppgtt->pd_offset = + ppgtt->node.start / PAGE_SIZE * sizeof(gen6_gtt_pte_t); return 0; @@ -705,6 +723,7 @@ err_pt_alloc: __free_page(ppgtt->pt_pages[i]); } kfree(ppgtt->pt_pages); + drm_mm_remove_node(&ppgtt->node); return ret; } @@ -1249,27 +1268,14 @@ void i915_gem_init_global_gtt(struct drm_device *dev) gtt_size = dev_priv->gtt.base.total; mappable_size = dev_priv->gtt.mappable_end; + i915_gem_setup_global_gtt(dev, 0, mappable_size, gtt_size); if (intel_enable_ppgtt(dev) && HAS_ALIASING_PPGTT(dev)) { int ret; - if (INTEL_INFO(dev)->gen <= 7) { - /* PPGTT pdes are stolen from global gtt ptes, so shrink the - * aperture accordingly when using aliasing ppgtt. */ - gtt_size -= GEN6_PPGTT_PD_ENTRIES * PAGE_SIZE; - } - - i915_gem_setup_global_gtt(dev, 0, mappable_size, gtt_size); - ret = i915_gem_init_aliasing_ppgtt(dev); - if (!ret) - return; - - DRM_ERROR("Aliased PPGTT setup failed %d\n", ret); - drm_mm_takedown(&dev_priv->gtt.base.mm); - if (INTEL_INFO(dev)->gen < 8) - gtt_size += GEN6_PPGTT_PD_ENTRIES*PAGE_SIZE; + if (ret) + DRM_ERROR("Aliased PPGTT setup failed %d\n", ret); } - i915_gem_setup_global_gtt(dev, 0, mappable_size, gtt_size); } static int setup_scratch_page(struct drm_device *dev) -- cgit v1.1 From e3cc19957f519dede119d6fc2fc51869bfb09e0e Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Fri, 6 Dec 2013 14:11:08 -0800 Subject: drm/i915: One hopeful eviction on PPGTT alloc The patch before this changed the way in which we allocate space for the PPGTT PDEs. It began carving out the PPGTT PDEs (which live in the Global GTT) from the GGTT's drm_mm. Prior to that patch, the PDEs were hidden from the drm_mm, and therefore could never fail to be allocated. In unfortunate cases, the drm_mm may be full when we want to allocate the space. This can technically occur whenever we try to allocate, which happens in two places currently. Practically, it can only really ever happen at GPU reset. Later, when we allocate more PDEs for multiple PPGTTs this will potentially even more useful. Signed-off-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_gtt.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 926b2a6..91a76df 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -640,6 +640,7 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) #define GEN6_PD_SIZE (GEN6_PPGTT_PD_ENTRIES * PAGE_SIZE) struct drm_device *dev = ppgtt->base.dev; struct drm_i915_private *dev_priv = dev->dev_private; + bool retried = false; int i, ret; /* PPGTT PDEs reside in the GGTT and consists of 512 entries. The @@ -647,13 +648,22 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) * size. We allocate at the top of the GTT to avoid fragmentation. */ BUG_ON(!drm_mm_initialized(&dev_priv->gtt.base.mm)); +alloc: ret = drm_mm_insert_node_in_range_generic(&dev_priv->gtt.base.mm, &ppgtt->node, GEN6_PD_SIZE, GEN6_PD_ALIGN, 0, 0, dev_priv->gtt.base.total, DRM_MM_SEARCH_DEFAULT); - if (ret) - return ret; + if (ret == -ENOSPC && !retried) { + ret = i915_gem_evict_something(dev, &dev_priv->gtt.base, + GEN6_PD_SIZE, GEN6_PD_ALIGN, + I915_CACHE_NONE, false, true); + if (ret) + return ret; + + retried = true; + goto alloc; + } if (ppgtt->node.start < dev_priv->gtt.mappable_end) DRM_DEBUG("Forced to use aperture for PDEs\n"); -- cgit v1.1 From b4a74e3adf616c5deb3c3c319352d89e62ff9ecc Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Fri, 6 Dec 2013 14:11:09 -0800 Subject: drm/i915: Use platform specific ppgtt enable Signed-off-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_gtt.c | 93 ++++++++++++++++++++++--------------- 1 file changed, 55 insertions(+), 38 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 91a76df..fbad915 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -491,61 +491,73 @@ static void gen6_write_pdes(struct i915_hw_ppgtt *ppgtt) readl(pd_addr); } -static int gen6_ppgtt_enable(struct i915_hw_ppgtt *ppgtt) +static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt) +{ + BUG_ON(ppgtt->pd_offset & 0x3f); + + return (ppgtt->pd_offset / 64) << 16; +} + +static int gen7_ppgtt_enable(struct i915_hw_ppgtt *ppgtt) { struct drm_device *dev = ppgtt->base.dev; drm_i915_private_t *dev_priv = dev->dev_private; - uint32_t pd_offset; struct intel_ring_buffer *ring; + uint32_t ecochk, ecobits; int i; - BUG_ON(ppgtt->pd_offset & 0x3f); - gen6_write_pdes(ppgtt); - pd_offset = ppgtt->pd_offset; - pd_offset /= 64; /* in cachelines, */ - pd_offset <<= 16; + ecobits = I915_READ(GAC_ECO_BITS); + I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B); - if (INTEL_INFO(dev)->gen == 6) { - uint32_t ecochk, gab_ctl, ecobits; + ecochk = I915_READ(GAM_ECOCHK); + if (IS_HASWELL(dev)) { + ecochk |= ECOCHK_PPGTT_WB_HSW; + } else { + ecochk |= ECOCHK_PPGTT_LLC_IVB; + ecochk &= ~ECOCHK_PPGTT_GFDT_IVB; + } + I915_WRITE(GAM_ECOCHK, ecochk); + /* GFX_MODE is per-ring on gen7+ */ - ecobits = I915_READ(GAC_ECO_BITS); - I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT | - ECOBITS_PPGTT_CACHE64B); + for_each_ring(ring, dev_priv, i) { + I915_WRITE(RING_MODE_GEN7(ring), + _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); - gab_ctl = I915_READ(GAB_CTL); - I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT); + I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G); + I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt)); + } + return 0; +} - ecochk = I915_READ(GAM_ECOCHK); - I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | - ECOCHK_PPGTT_CACHE64B); - I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); - } else if (INTEL_INFO(dev)->gen >= 7) { - uint32_t ecochk, ecobits; +static int gen6_ppgtt_enable(struct i915_hw_ppgtt *ppgtt) +{ + struct drm_device *dev = ppgtt->base.dev; + drm_i915_private_t *dev_priv = dev->dev_private; + struct intel_ring_buffer *ring; + uint32_t ecochk, gab_ctl, ecobits; + int i; - ecobits = I915_READ(GAC_ECO_BITS); - I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B); + gen6_write_pdes(ppgtt); - ecochk = I915_READ(GAM_ECOCHK); - if (IS_HASWELL(dev)) { - ecochk |= ECOCHK_PPGTT_WB_HSW; - } else { - ecochk |= ECOCHK_PPGTT_LLC_IVB; - ecochk &= ~ECOCHK_PPGTT_GFDT_IVB; - } - I915_WRITE(GAM_ECOCHK, ecochk); - /* GFX_MODE is per-ring on gen7+ */ - } + ecobits = I915_READ(GAC_ECO_BITS); + I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT | + ECOBITS_PPGTT_CACHE64B); - for_each_ring(ring, dev_priv, i) { - if (INTEL_INFO(dev)->gen >= 7) - I915_WRITE(RING_MODE_GEN7(ring), - _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); + gab_ctl = I915_READ(GAB_CTL); + I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT); + + ecochk = I915_READ(GAM_ECOCHK); + I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B); + + I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); + for_each_ring(ring, dev_priv, i) { I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G); - I915_WRITE(RING_PP_DIR_BASE(ring), pd_offset); + I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt)); } + return 0; } @@ -670,7 +682,12 @@ alloc: ppgtt->base.pte_encode = dev_priv->gtt.base.pte_encode; ppgtt->num_pd_entries = GEN6_PPGTT_PD_ENTRIES; - ppgtt->enable = gen6_ppgtt_enable; + if (IS_GEN6(dev)) + ppgtt->enable = gen6_ppgtt_enable; + if (IS_GEN7(dev)) + ppgtt->enable = gen7_ppgtt_enable; + else + BUG(); ppgtt->base.clear_range = gen6_ppgtt_clear_range; ppgtt->base.insert_entries = gen6_ppgtt_insert_entries; ppgtt->base.cleanup = gen6_ppgtt_cleanup; -- cgit v1.1 From eeb9488e751a0a6401e7516a893efaf9d1f77fb5 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Fri, 6 Dec 2013 14:11:10 -0800 Subject: drm/i915: Extract mm switching to function In order to do the full context switch with address space, it's convenient to have a way to switch the address space. We already have this in our code - just pull it out to be called by the context switch code later. v2: Rebased on BDW support. Required adding BDW. Signed-off-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 3 ++ drivers/gpu/drm/i915/i915_gem_gtt.c | 85 +++++++++++++++++++++++++------------ 2 files changed, 61 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index ab65308..99ef5ed 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -674,6 +674,9 @@ struct i915_hw_ppgtt { }; int (*enable)(struct i915_hw_ppgtt *ppgtt); + int (*switch_mm)(struct i915_hw_ppgtt *ppgtt, + struct intel_ring_buffer *ring, + bool synchronous); }; struct i915_ctx_hang_stats { diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index fbad915..bf6abf1 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -72,6 +72,7 @@ static void ppgtt_bind_vma(struct i915_vma *vma, enum i915_cache_level cache_level, u32 flags); static void ppgtt_unbind_vma(struct i915_vma *vma); +static int gen8_ppgtt_enable(struct i915_hw_ppgtt *ppgtt); static inline gen8_gtt_pte_t gen8_pte_encode(dma_addr_t addr, enum i915_cache_level level, @@ -230,37 +231,23 @@ static int gen8_write_pdp(struct intel_ring_buffer *ring, unsigned entry, return 0; } -static int gen8_ppgtt_enable(struct i915_hw_ppgtt *ppgtt) +static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt, + struct intel_ring_buffer *ring, + bool synchronous) { - struct drm_device *dev = ppgtt->base.dev; - struct drm_i915_private *dev_priv = dev->dev_private; - struct intel_ring_buffer *ring; - int i, j, ret; + int i, ret; /* bit of a hack to find the actual last used pd */ int used_pd = ppgtt->num_pd_entries / GEN8_PDES_PER_PAGE; - for_each_ring(ring, dev_priv, j) { - I915_WRITE(RING_MODE_GEN7(ring), - _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); - } - for (i = used_pd - 1; i >= 0; i--) { dma_addr_t addr = ppgtt->pd_dma_addr[i]; - for_each_ring(ring, dev_priv, j) { - ret = gen8_write_pdp(ring, i, addr, - i915_reset_in_progress(&dev_priv->gpu_error)); - if (ret) - goto err_out; - } + ret = gen8_write_pdp(ring, i, addr, synchronous); + if (ret) + return ret; } - return 0; -err_out: - for_each_ring(ring, dev_priv, j) - I915_WRITE(RING_MODE_GEN7(ring), - _MASKED_BIT_DISABLE(GFX_PPGTT_ENABLE)); - return ret; + return 0; } static void gen8_ppgtt_clear_range(struct i915_address_space *vm, @@ -397,6 +384,7 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size) ppgtt->num_pt_pages = 1 << get_order(num_pt_pages << PAGE_SHIFT); ppgtt->num_pd_entries = max_pdp * GEN8_PDES_PER_PAGE; ppgtt->enable = gen8_ppgtt_enable; + ppgtt->switch_mm = gen8_mm_switch; ppgtt->base.clear_range = gen8_ppgtt_clear_range; ppgtt->base.insert_entries = gen8_ppgtt_insert_entries; ppgtt->base.cleanup = gen8_ppgtt_cleanup; @@ -498,6 +486,45 @@ static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt) return (ppgtt->pd_offset / 64) << 16; } +static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt, + struct intel_ring_buffer *ring, + bool synchronous) +{ + struct drm_device *dev = ppgtt->base.dev; + struct drm_i915_private *dev_priv = dev->dev_private; + + I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G); + I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt)); + + POSTING_READ(RING_PP_DIR_DCLV(ring)); + + return 0; +} + +static int gen8_ppgtt_enable(struct i915_hw_ppgtt *ppgtt) +{ + struct drm_device *dev = ppgtt->base.dev; + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_ring_buffer *ring; + int j, ret; + + for_each_ring(ring, dev_priv, j) { + I915_WRITE(RING_MODE_GEN7(ring), + _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); + ret = ppgtt->switch_mm(ppgtt, ring, true); + if (ret) + goto err_out; + } + + return 0; + +err_out: + for_each_ring(ring, dev_priv, j) + I915_WRITE(RING_MODE_GEN7(ring), + _MASKED_BIT_DISABLE(GFX_PPGTT_ENABLE)); + return ret; +} + static int gen7_ppgtt_enable(struct i915_hw_ppgtt *ppgtt) { struct drm_device *dev = ppgtt->base.dev; @@ -519,14 +546,16 @@ static int gen7_ppgtt_enable(struct i915_hw_ppgtt *ppgtt) ecochk &= ~ECOCHK_PPGTT_GFDT_IVB; } I915_WRITE(GAM_ECOCHK, ecochk); - /* GFX_MODE is per-ring on gen7+ */ for_each_ring(ring, dev_priv, i) { + int ret; + /* GFX_MODE is per-ring on gen7+ */ I915_WRITE(RING_MODE_GEN7(ring), _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); + ret = ppgtt->switch_mm(ppgtt, ring, true); + if (ret) + return ret; - I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G); - I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt)); } return 0; } @@ -554,8 +583,9 @@ static int gen6_ppgtt_enable(struct i915_hw_ppgtt *ppgtt) I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); for_each_ring(ring, dev_priv, i) { - I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G); - I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt)); + int ret = ppgtt->switch_mm(ppgtt, ring, true); + if (ret) + return ret; } return 0; @@ -688,6 +718,7 @@ alloc: ppgtt->enable = gen7_ppgtt_enable; else BUG(); + ppgtt->switch_mm = gen6_mm_switch; ppgtt->base.clear_range = gen6_ppgtt_clear_range; ppgtt->base.insert_entries = gen6_ppgtt_insert_entries; ppgtt->base.cleanup = gen6_ppgtt_cleanup; -- cgit v1.1 From 48a10389c82df842658c5d2560768eb674b71258 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Fri, 6 Dec 2013 14:11:11 -0800 Subject: drm/i915: Use LRI for switching PP_DIR_BASE The docs seem to suggest this is the appropriate method (though it doesn't say so outright). In other words, we probably should have done this before. We certainly must do this for switching VMs on the fly, since synchronizing the rings to MMIO updates isn't acceptable. v2: Make the reset code actually work for all rings. Note that this was fixed in subsequent commits, but was indeed broken for this commit. Add a posting read to the reset case. It probably should have existed before hand, but since we have no failures; there is no reason to make it a separate commit. Make IS_GEN6 not use the ring because I am seeing crashes when using it. It is a bit of a hack in this patch, it will get fixed up in a couple of patches. Signed-off-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_gtt.c | 56 ++++++++++++++++++++++++++++++++++--- 1 file changed, 52 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index bf6abf1..08a706d 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -486,6 +486,50 @@ static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt) return (ppgtt->pd_offset / 64) << 16; } +static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, + struct intel_ring_buffer *ring, + bool synchronous) +{ + struct drm_device *dev = ppgtt->base.dev; + struct drm_i915_private *dev_priv = dev->dev_private; + int ret; + + /* If we're in reset, we can assume the GPU is sufficiently idle to + * manually frob these bits. Ideally we could use the ring functions, + * except our error handling makes it quite difficult (can't use + * intel_ring_begin, ring->flush, or intel_ring_advance) + * + * FIXME: We should try not to special case reset + */ + if (synchronous || + i915_reset_in_progress(&dev_priv->gpu_error)) { + WARN_ON(ppgtt != dev_priv->mm.aliasing_ppgtt); + I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G); + I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt)); + POSTING_READ(RING_PP_DIR_BASE(ring)); + return 0; + } + + /* NB: TLBs must be flushed and invalidated before a switch */ + ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); + if (ret) + return ret; + + ret = intel_ring_begin(ring, 6); + if (ret) + return ret; + + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2)); + intel_ring_emit(ring, RING_PP_DIR_DCLV(ring)); + intel_ring_emit(ring, PP_DIR_DCLV_2G); + intel_ring_emit(ring, RING_PP_DIR_BASE(ring)); + intel_ring_emit(ring, get_pd_offset(ppgtt)); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); + + return 0; +} + static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt, struct intel_ring_buffer *ring, bool synchronous) @@ -493,6 +537,9 @@ static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt, struct drm_device *dev = ppgtt->base.dev; struct drm_i915_private *dev_priv = dev->dev_private; + if (!synchronous) + return 0; + I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G); I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt)); @@ -712,13 +759,14 @@ alloc: ppgtt->base.pte_encode = dev_priv->gtt.base.pte_encode; ppgtt->num_pd_entries = GEN6_PPGTT_PD_ENTRIES; - if (IS_GEN6(dev)) + if (IS_GEN6(dev)) { ppgtt->enable = gen6_ppgtt_enable; - if (IS_GEN7(dev)) + ppgtt->switch_mm = gen6_mm_switch; + } else if (IS_GEN7(dev)) { ppgtt->enable = gen7_ppgtt_enable; - else + ppgtt->switch_mm = gen7_mm_switch; + } else BUG(); - ppgtt->switch_mm = gen6_mm_switch; ppgtt->base.clear_range = gen6_ppgtt_clear_range; ppgtt->base.insert_entries = gen6_ppgtt_insert_entries; ppgtt->base.cleanup = gen6_ppgtt_cleanup; -- cgit v1.1 From 90252e5c680c8181500ea32864bb45f65f904ffd Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Fri, 6 Dec 2013 14:11:12 -0800 Subject: drm/i915: Flush TLBs after !RCS PP_DIR_BASE I've found this by accident. The docs don't really come out and say you need to do this. What the docs do tell you is you need to flush the TLBs before you set the PP_DIR_BASE, and that the RCS will invalidate its TLBs upon setting the new PP_DIR_BASE. It makes no such comment about any of the other rings. Empirically, this indeed fixes a really obvious bug whereby the batches being sent to the blitter were not executing (we were executing the HSWP somehow instead). NOTE: This should make no difference with the current code. It only applies when we start using multiple VMs. NOTE2: HSW appears to be immune to this. Signed-off-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_gtt.c | 54 +++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 08a706d..0218e34 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -486,6 +486,50 @@ static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt) return (ppgtt->pd_offset / 64) << 16; } +static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, + struct intel_ring_buffer *ring, + bool synchronous) +{ + struct drm_device *dev = ppgtt->base.dev; + struct drm_i915_private *dev_priv = dev->dev_private; + int ret; + + /* If we're in reset, we can assume the GPU is sufficiently idle to + * manually frob these bits. Ideally we could use the ring functions, + * except our error handling makes it quite difficult (can't use + * intel_ring_begin, ring->flush, or intel_ring_advance) + * + * FIXME: We should try not to special case reset + */ + if (synchronous || + i915_reset_in_progress(&dev_priv->gpu_error)) { + WARN_ON(ppgtt != dev_priv->mm.aliasing_ppgtt); + I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G); + I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt)); + POSTING_READ(RING_PP_DIR_BASE(ring)); + return 0; + } + + /* NB: TLBs must be flushed and invalidated before a switch */ + ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); + if (ret) + return ret; + + ret = intel_ring_begin(ring, 6); + if (ret) + return ret; + + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2)); + intel_ring_emit(ring, RING_PP_DIR_DCLV(ring)); + intel_ring_emit(ring, PP_DIR_DCLV_2G); + intel_ring_emit(ring, RING_PP_DIR_BASE(ring)); + intel_ring_emit(ring, get_pd_offset(ppgtt)); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); + + return 0; +} + static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, struct intel_ring_buffer *ring, bool synchronous) @@ -527,6 +571,13 @@ static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, intel_ring_emit(ring, MI_NOOP); intel_ring_advance(ring); + /* XXX: RCS is the only one to auto invalidate the TLBs? */ + if (ring->id != RCS) { + ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); + if (ret) + return ret; + } + return 0; } @@ -762,6 +813,9 @@ alloc: if (IS_GEN6(dev)) { ppgtt->enable = gen6_ppgtt_enable; ppgtt->switch_mm = gen6_mm_switch; + } else if (IS_HASWELL(dev)) { + ppgtt->enable = gen7_ppgtt_enable; + ppgtt->switch_mm = hsw_mm_switch; } else if (IS_GEN7(dev)) { ppgtt->enable = gen7_ppgtt_enable; ppgtt->switch_mm = gen7_mm_switch; -- cgit v1.1 From d6660add648d10e7e35085d8c7d2653e0f9f61b7 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Fri, 6 Dec 2013 14:11:13 -0800 Subject: drm/i915: Generalize PPGTT init Rearrange the initialization code to try to special case the aliasing PPGTT less, and provide usable interfaces for the general case later. Signed-off-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_gtt.c | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 0218e34..fdeed68 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -888,15 +888,11 @@ err_pt_alloc: return ret; } -static int i915_gem_init_aliasing_ppgtt(struct drm_device *dev) +static int i915_gem_init_ppgtt(struct drm_device *dev, + struct i915_hw_ppgtt *ppgtt) { struct drm_i915_private *dev_priv = dev->dev_private; - struct i915_hw_ppgtt *ppgtt; - int ret; - - ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); - if (!ppgtt) - return -ENOMEM; + int ret = 0; ppgtt->base.dev = dev; @@ -907,13 +903,9 @@ static int i915_gem_init_aliasing_ppgtt(struct drm_device *dev) else BUG(); - if (ret) - kfree(ppgtt); - else { - dev_priv->mm.aliasing_ppgtt = ppgtt; + if (!ret) drm_mm_init(&ppgtt->base.mm, ppgtt->base.start, ppgtt->base.total); - } return ret; } @@ -1430,11 +1422,23 @@ void i915_gem_init_global_gtt(struct drm_device *dev) i915_gem_setup_global_gtt(dev, 0, mappable_size, gtt_size); if (intel_enable_ppgtt(dev) && HAS_ALIASING_PPGTT(dev)) { + struct i915_hw_ppgtt *ppgtt; int ret; - ret = i915_gem_init_aliasing_ppgtt(dev); - if (ret) - DRM_ERROR("Aliased PPGTT setup failed %d\n", ret); + ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); + if (!ppgtt) { + DRM_ERROR("Aliased PPGTT setup failed -ENOMEM\n"); + return; + } + + ret = i915_gem_init_ppgtt(dev, ppgtt); + if (!ret) { + dev_priv->mm.aliasing_ppgtt = ppgtt; + return; + } + + kfree(ppgtt); + DRM_ERROR("Aliased PPGTT setup failed %d\n", ret); } } -- cgit v1.1 From 246cbfb5fb9a1ca0997fbb135464c1ff5bb9c549 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Fri, 6 Dec 2013 14:11:14 -0800 Subject: drm/i915: Reorganize intel_enable_ppgtt This patch consolidates the way in which we handle the various supported PPGTT by module parameter in addition to what the hardware supports. It strives to make doing the right thing in the code as simple as possible, with the USES_ macros. I've opted to add the full PPGTT argument simply so one can see how I intend to use this function. It will not/cannot be used until later. Signed-off-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 22 +++++++++++++++++++++- drivers/gpu/drm/i915/i915_gem_gtt.c | 20 ++------------------ 2 files changed, 23 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 99ef5ed..dbea50a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1828,7 +1828,8 @@ struct drm_i915_file_private { #define I915_NEED_GFX_HWS(dev) (INTEL_INFO(dev)->need_gfx_hws) #define HAS_HW_CONTEXTS(dev) (INTEL_INFO(dev)->gen >= 6) -#define HAS_ALIASING_PPGTT(dev) (INTEL_INFO(dev)->gen >=6 && !IS_VALLEYVIEW(dev)) +#define HAS_ALIASING_PPGTT(dev) (INTEL_INFO(dev)->gen >= 6 && !IS_VALLEYVIEW(dev)) +#define USES_ALIASING_PPGTT(dev) intel_enable_ppgtt(dev, false) #define HAS_OVERLAY(dev) (INTEL_INFO(dev)->has_overlay) #define OVERLAY_NEEDS_PHYSICAL(dev) (INTEL_INFO(dev)->overlay_needs_physical) @@ -2272,6 +2273,25 @@ static inline void i915_gem_chipset_flush(struct drm_device *dev) if (INTEL_INFO(dev)->gen < 6) intel_gtt_chipset_flush(); } +int i915_gem_init_ppgtt(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt); +static inline bool intel_enable_ppgtt(struct drm_device *dev, bool full) +{ + if (i915_enable_ppgtt == 0 || !HAS_ALIASING_PPGTT(dev)) + return false; + + BUG_ON(full); + +#ifdef CONFIG_INTEL_IOMMU + /* Disable ppgtt on SNB if VT-d is on. */ + if (INTEL_INFO(dev)->gen == 6 && intel_iommu_gfx_mapped) { + DRM_INFO("Disabling PPGTT because VT-d is on\n"); + return false; + } +#endif + + return HAS_ALIASING_PPGTT(dev); +} + /* i915_gem_evict.c */ diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index fdeed68..c69fa2c 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -888,8 +888,7 @@ err_pt_alloc: return ret; } -static int i915_gem_init_ppgtt(struct drm_device *dev, - struct i915_hw_ppgtt *ppgtt) +int i915_gem_init_ppgtt(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt) { struct drm_i915_private *dev_priv = dev->dev_private; int ret = 0; @@ -1397,21 +1396,6 @@ void i915_gem_setup_global_gtt(struct drm_device *dev, ggtt_vm->clear_range(ggtt_vm, end / PAGE_SIZE - 1, 1, true); } -static bool -intel_enable_ppgtt(struct drm_device *dev) -{ - if (i915_enable_ppgtt >= 0) - return i915_enable_ppgtt; - -#ifdef CONFIG_INTEL_IOMMU - /* Disable ppgtt on SNB if VT-d is on. */ - if (INTEL_INFO(dev)->gen == 6 && intel_iommu_gfx_mapped) - return false; -#endif - - return true; -} - void i915_gem_init_global_gtt(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; @@ -1421,7 +1405,7 @@ void i915_gem_init_global_gtt(struct drm_device *dev) mappable_size = dev_priv->gtt.mappable_end; i915_gem_setup_global_gtt(dev, 0, mappable_size, gtt_size); - if (intel_enable_ppgtt(dev) && HAS_ALIASING_PPGTT(dev)) { + if (USES_ALIASING_PPGTT(dev)) { struct i915_hw_ppgtt *ppgtt; int ret; -- cgit v1.1 From c7c48dfdff246d65408ff4f336978cc861722ca4 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Fri, 6 Dec 2013 14:11:15 -0800 Subject: drm/i915: Add VM to context Pretty straightforward so far except for the bit about the refcounting. The PPGTT will potentially be shared amongst multiple contexts. Because contexts themselves have a refcounted lifecycle, the easiest way to manage this will be to refcount the PPGTT. To acheive this, we piggy back off of the existing context refcount, and will increment and decrement the PPGTT refcount with context creation, and destruction. To put it more clearly, if context A, and context B both use PPGTT 0, we can't free the PPGTT until both A, and B are destroyed. Note that because the PPGTT is permanently pinned (for now), it really just matters for the PPGTT destruction, as opposed to making space under memory pressure. Signed-off-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 8 ++++++++ drivers/gpu/drm/i915/i915_gem_context.c | 12 +++++++++++- drivers/gpu/drm/i915/i915_gem_gtt.c | 7 +++++-- 3 files changed, 24 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index dbea50a..a47a43e 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -655,6 +655,7 @@ struct i915_gtt { struct i915_hw_ppgtt { struct i915_address_space base; + struct kref ref; struct drm_mm_node node; unsigned num_pd_entries; union { @@ -704,6 +705,7 @@ struct i915_hw_context { struct intel_ring_buffer *last_ring; struct drm_i915_gem_object *obj; struct i915_ctx_hang_stats hang_stats; + struct i915_address_space *vm; struct list_head link; }; @@ -2292,6 +2294,12 @@ static inline bool intel_enable_ppgtt(struct drm_device *dev, bool full) return HAS_ALIASING_PPGTT(dev); } +static inline void ppgtt_release(struct kref *kref) +{ + struct i915_hw_ppgtt *ppgtt = container_of(kref, struct i915_hw_ppgtt, ref); + + ppgtt->base.cleanup(&ppgtt->base); +} /* i915_gem_evict.c */ diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 149cf00..0b32bcf 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -141,9 +141,19 @@ void i915_gem_context_free(struct kref *ctx_ref) { struct i915_hw_context *ctx = container_of(ctx_ref, typeof(*ctx), ref); + struct i915_hw_ppgtt *ppgtt = NULL; - list_del(&ctx->link); + /* We refcount even the aliasing PPGTT to keep the code symmetric */ + if (USES_ALIASING_PPGTT(ctx->obj->base.dev)) + ppgtt = container_of(ctx->vm, struct i915_hw_ppgtt, base); + + /* XXX: Free up the object before tearing down the address space, in + * case we're bound in the PPGTT */ drm_gem_object_unreference(&ctx->obj->base); + + if (ppgtt) + kref_put(&ppgtt->ref, ppgtt_release); + list_del(&ctx->link); kfree(ctx); } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index c69fa2c..bd92288 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -902,9 +902,11 @@ int i915_gem_init_ppgtt(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt) else BUG(); - if (!ret) + if (!ret) { + kref_init(&ppgtt->ref); drm_mm_init(&ppgtt->base.mm, ppgtt->base.start, ppgtt->base.total); + } return ret; } @@ -917,7 +919,8 @@ void i915_gem_cleanup_aliasing_ppgtt(struct drm_device *dev) if (!ppgtt) return; - ppgtt->base.cleanup(&ppgtt->base); + kref_put(&dev_priv->mm.aliasing_ppgtt->ref, ppgtt_release); + dev_priv->mm.aliasing_ppgtt = NULL; } -- cgit v1.1 From 9f273d48aa98cd193b19db9bb4c16bfb81c39052 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Fri, 6 Dec 2013 14:11:16 -0800 Subject: drm/i915: Write PDEs at init instead of enable We won't be calling enable() for all PPGTTs. We do need to write PDEs for all PPGTTs however. By moving the writing to init (which is called for all PPGTTs) we should accomplish this. ADD NOTE ABOUT PDE restore TODO: Eventually, we should allocate the page tables on demand. v2: Rebased on BDW. Only do PDEs for pre-gen8 Signed-off-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_gtt.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index bd92288..2c07795 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -631,8 +631,6 @@ static int gen7_ppgtt_enable(struct i915_hw_ppgtt *ppgtt) uint32_t ecochk, ecobits; int i; - gen6_write_pdes(ppgtt); - ecobits = I915_READ(GAC_ECO_BITS); I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B); @@ -666,8 +664,6 @@ static int gen6_ppgtt_enable(struct i915_hw_ppgtt *ppgtt) uint32_t ecochk, gab_ctl, ecobits; int i; - gen6_write_pdes(ppgtt); - ecobits = I915_READ(GAC_ECO_BITS); I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT | ECOBITS_PPGTT_CACHE64B); @@ -906,6 +902,8 @@ int i915_gem_init_ppgtt(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt) kref_init(&ppgtt->ref); drm_mm_init(&ppgtt->base.mm, ppgtt->base.start, ppgtt->base.total); + if (INTEL_INFO(dev)->gen < 8) + gen6_write_pdes(ppgtt); } return ret; @@ -1059,6 +1057,9 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev) vma->bind_vma(vma, obj->cache_level, GLOBAL_BIND); } + if (dev_priv->mm.aliasing_ppgtt) + gen6_write_pdes(dev_priv->mm.aliasing_ppgtt); + i915_gem_chipset_flush(dev); } -- cgit v1.1 From 80da2161710cf28bca96c9a03331f8b24616e24d Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Fri, 6 Dec 2013 14:11:17 -0800 Subject: drm/i915: Restore PDEs for all VMs In following with the old restore code, we must now restore ever PPGTT's PDEs, since they aren't proper GEM ojbects. v2: Rebased on BDW. Only do restore pdes for gen6 & 7 Signed-off-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_gtt.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 2c07795..5e2efca 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1033,6 +1033,7 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; struct drm_i915_gem_object *obj; + struct i915_address_space *vm; i915_check_and_clear_faults(dev); @@ -1057,8 +1058,20 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev) vma->bind_vma(vma, obj->cache_level, GLOBAL_BIND); } - if (dev_priv->mm.aliasing_ppgtt) - gen6_write_pdes(dev_priv->mm.aliasing_ppgtt); + + if (INTEL_INFO(dev)->gen >= 8) + return; + + list_for_each_entry(vm, &dev_priv->vm_list, global_link) { + /* TODO: Perhaps it shouldn't be gen6 specific */ + if (i915_is_ggtt(vm)) { + if (dev_priv->mm.aliasing_ppgtt) + gen6_write_pdes(dev_priv->mm.aliasing_ppgtt); + continue; + } + + gen6_write_pdes(container_of(vm, struct i915_hw_ppgtt, base)); + } i915_gem_chipset_flush(dev); } -- cgit v1.1 From bdf4fd7ea0765966c920f62a360532e3929177ca Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Fri, 6 Dec 2013 14:11:18 -0800 Subject: drm/i915: Do aliasing PPGTT init with contexts We have a default context which suits the aliasing PPGTT well. Tie them together so it looks like any other context/PPGTT pair. This makes the code cleaner as it won't have to special case aliasing as often. The patch has one slightly tricky part in the default context creation function. In the future (and on aliased setup) we create a new VM for a context (potentially). However, if we have aliasing PPGTT, which occurs at this point in time for all platforms GEN6+, we can simply manage the refcounting to allow things to behave as normal. Now is a good time to recall that the aliasing_ppgtt doesn't have a real VM, it uses the GGTT drm_mm. Signed-off-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_dma.c | 4 +- drivers/gpu/drm/i915/i915_drv.h | 1 - drivers/gpu/drm/i915/i915_gem.c | 13 +---- drivers/gpu/drm/i915/i915_gem_context.c | 100 +++++++++++++++++++++++++++----- drivers/gpu/drm/i915/i915_gem_gtt.c | 32 ---------- 5 files changed, 87 insertions(+), 63 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index a5d010c..0817dd1 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1364,7 +1364,7 @@ cleanup_gem: i915_gem_cleanup_ringbuffer(dev); i915_gem_context_fini(dev); mutex_unlock(&dev->struct_mutex); - i915_gem_cleanup_aliasing_ppgtt(dev); + WARN_ON(dev_priv->mm.aliasing_ppgtt); drm_mm_takedown(&dev_priv->gtt.base.mm); cleanup_power: intel_display_power_put(dev, POWER_DOMAIN_VGA); @@ -1765,8 +1765,8 @@ int i915_driver_unload(struct drm_device *dev) i915_gem_free_all_phys_object(dev); i915_gem_cleanup_ringbuffer(dev); i915_gem_context_fini(dev); + WARN_ON(dev_priv->mm.aliasing_ppgtt); mutex_unlock(&dev->struct_mutex); - i915_gem_cleanup_aliasing_ppgtt(dev); i915_gem_cleanup_stolen(dev); if (!I915_NEED_GFX_HWS(dev)) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index a47a43e..6dcfa18 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2260,7 +2260,6 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data, struct drm_file *file); /* i915_gem_gtt.c */ -void i915_gem_cleanup_aliasing_ppgtt(struct drm_device *dev); void i915_check_and_clear_faults(struct drm_device *dev); void i915_gem_suspend_gtt_mappings(struct drm_device *dev); void i915_gem_restore_gtt_mappings(struct drm_device *dev); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index cc1ac79..427596b 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4404,7 +4404,6 @@ int i915_gem_init_hw(struct drm_device *dev) { drm_i915_private_t *dev_priv = dev->dev_private; - struct i915_hw_ppgtt *ppgtt; int ret, i; if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt()) @@ -4446,16 +4445,6 @@ i915_gem_init_hw(struct drm_device *dev) goto err_out; } - if (dev_priv->mm.aliasing_ppgtt) { - ppgtt = dev_priv->mm.aliasing_ppgtt; - ret = ppgtt->enable(ppgtt); - if (ret) { - i915_gem_cleanup_aliasing_ppgtt(dev); - DRM_INFO("PPGTT enable failed. This is not fatal, but unexpected\n"); - ret = 0; - } - } - return 0; err_out: @@ -4486,8 +4475,8 @@ int i915_gem_init(struct drm_device *dev) ret = i915_gem_init_hw(dev); mutex_unlock(&dev->struct_mutex); if (ret) { + WARN_ON(dev_priv->mm.aliasing_ppgtt); i915_gem_context_fini(dev); - i915_gem_cleanup_aliasing_ppgtt(dev); drm_mm_takedown(&dev_priv->gtt.base.mm); return ret; } diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 0b32bcf..215a36d 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -157,6 +157,25 @@ void i915_gem_context_free(struct kref *ctx_ref) kfree(ctx); } +static struct i915_hw_ppgtt * +create_vm_for_ctx(struct drm_device *dev, struct i915_hw_context *ctx) +{ + struct i915_hw_ppgtt *ppgtt; + int ret; + + ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); + if (!ppgtt) + return ERR_PTR(-ENOMEM); + + ret = i915_gem_init_ppgtt(dev, ppgtt); + if (ret) { + kfree(ppgtt); + return ERR_PTR(ret); + } + + return ppgtt; +} + static struct i915_hw_context * create_hw_context(struct drm_device *dev, struct drm_i915_file_private *file_priv) @@ -223,31 +242,70 @@ static inline bool is_default_context(struct i915_hw_context *ctx) * well as an idle case. */ static struct i915_hw_context * -create_default_context(struct drm_device *dev) +create_default_context(struct drm_device *dev, + struct drm_i915_file_private *file_priv, + bool create_vm) { + struct drm_i915_private *dev_priv = dev->dev_private; struct i915_hw_context *ctx; - int ret; + int ret = 0; BUG_ON(!mutex_is_locked(&dev->struct_mutex)); - ctx = create_hw_context(dev, NULL); + /* Not yet supported */ + BUG_ON(file_priv); + + ctx = create_hw_context(dev, file_priv); if (IS_ERR(ctx)) return ctx; - /* We may need to do things with the shrinker which require us to - * immediately switch back to the default context. This can cause a - * problem as pinning the default context also requires GTT space which - * may not be available. To avoid this we always pin the - * default context. - */ - ret = i915_gem_obj_ggtt_pin(ctx->obj, get_context_alignment(dev), - false, false); - if (ret) { - DRM_DEBUG_DRIVER("Couldn't pin %d\n", ret); - goto err_destroy; + if (create_vm) { + struct i915_hw_ppgtt *ppgtt = create_vm_for_ctx(dev, ctx); + + if (IS_ERR_OR_NULL(ppgtt)) { + DRM_ERROR("PPGTT setup failed (%ld)\n", PTR_ERR(ppgtt)); + ret = PTR_ERR(ppgtt); + goto err_destroy; + } else + ctx->vm = &ppgtt->base; + + /* This case is reserved for the global default context and + * should only happen once. */ + if (!file_priv) { + if (WARN_ON(dev_priv->mm.aliasing_ppgtt)) { + ret = -EEXIST; + goto err_destroy; + } + + dev_priv->mm.aliasing_ppgtt = ppgtt; + + /* We may need to do things with the shrinker which + * require us to immediately switch back to the default + * context. This can cause a problem as pinning the + * default context also requires GTT space which may not + * be available. To avoid this we always pin the default + * context. + */ + ret = i915_gem_obj_ggtt_pin(ctx->obj, + get_context_alignment(dev), + false, false); + if (ret) { + DRM_DEBUG_DRIVER("Couldn't pin %d\n", ret); + goto err_destroy; + } + } + } else if (USES_ALIASING_PPGTT(dev)) { + /* For platforms which only have aliasing PPGTT, we fake the + * address space and refcounting. */ + kref_get(&dev_priv->mm.aliasing_ppgtt->ref); } - DRM_DEBUG_DRIVER("Default HW context loaded\n"); + /* TODO: Until full ppgtt... */ + if (USES_ALIASING_PPGTT(dev)) + ctx->vm = &dev_priv->mm.aliasing_ppgtt->base; + else + ctx->vm = &dev_priv->gtt.base; + return ctx; err_destroy: @@ -319,8 +377,9 @@ int i915_gem_context_init(struct drm_device *dev) return -E2BIG; } + dev_priv->ring[RCS].default_context = + create_default_context(dev, NULL, USES_ALIASING_PPGTT(dev)); - dev_priv->ring[RCS].default_context = create_default_context(dev); if (IS_ERR_OR_NULL(dev_priv->ring[RCS].default_context)) { DRM_DEBUG_DRIVER("Disabling HW Contexts; create failed %ld\n", PTR_ERR(dev_priv->ring[RCS].default_context)); @@ -384,6 +443,7 @@ void i915_gem_context_fini(struct drm_device *dev) i915_gem_object_ggtt_unpin(dctx->obj); i915_gem_context_unreference(dctx); + dev_priv->mm.aliasing_ppgtt = NULL; } int i915_gem_context_enable(struct drm_i915_private *dev_priv) @@ -394,11 +454,19 @@ int i915_gem_context_enable(struct drm_i915_private *dev_priv) if (!HAS_HW_CONTEXTS(dev_priv->dev)) return 0; + /* This is the only place the aliasing PPGTT gets enabled, which means + * it has to happen before we bail on reset */ + if (dev_priv->mm.aliasing_ppgtt) { + struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; + ppgtt->enable(ppgtt); + } + /* FIXME: We should make this work, even in reset */ if (i915_reset_in_progress(&dev_priv->gpu_error)) return 0; BUG_ON(!dev_priv->ring[RCS].default_context); + for_each_ring(ring, dev_priv, i) { ret = do_switch(ring, ring->default_context); if (ret) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 5e2efca..a6211e0 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -909,19 +909,6 @@ int i915_gem_init_ppgtt(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt) return ret; } -void i915_gem_cleanup_aliasing_ppgtt(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; - - if (!ppgtt) - return; - - kref_put(&dev_priv->mm.aliasing_ppgtt->ref, ppgtt_release); - - dev_priv->mm.aliasing_ppgtt = NULL; -} - static void __always_unused ppgtt_bind_vma(struct i915_vma *vma, enum i915_cache_level cache_level, @@ -1422,25 +1409,6 @@ void i915_gem_init_global_gtt(struct drm_device *dev) mappable_size = dev_priv->gtt.mappable_end; i915_gem_setup_global_gtt(dev, 0, mappable_size, gtt_size); - if (USES_ALIASING_PPGTT(dev)) { - struct i915_hw_ppgtt *ppgtt; - int ret; - - ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); - if (!ppgtt) { - DRM_ERROR("Aliased PPGTT setup failed -ENOMEM\n"); - return; - } - - ret = i915_gem_init_ppgtt(dev, ppgtt); - if (!ret) { - dev_priv->mm.aliasing_ppgtt = ppgtt; - return; - } - - kfree(ppgtt); - DRM_ERROR("Aliased PPGTT setup failed %d\n", ret); - } } static int setup_scratch_page(struct drm_device *dev) -- cgit v1.1 From 0eea67eb26000657079b7fc41079097942339452 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Fri, 6 Dec 2013 14:11:19 -0800 Subject: drm/i915: Create a per file_priv default context Every file will get it's own context, and we use this context instead of the default context. The default context still exists for future shrinker usage as well as reset handling. v2: Updated to address Mika's recent context guilty changes Some more changes around this come up in later patches as well. v3: Use a fake context to avoid allocation for the !HAS_HW_CONTEXT case. I've tried the alternatives. This looks the best to me. Removed hangstat stuff from v2 - for a separate patch Demote failed PPGTT set to DRM_DEBUG_DRIVER since it can now be invoked easily from userspace. Signed-off-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 2 ++ drivers/gpu/drm/i915/i915_gem_context.c | 62 +++++++++++++++++++-------------- 2 files changed, 38 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 6dcfa18..b8f187a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1758,6 +1758,7 @@ struct drm_i915_file_private { struct idr context_idr; struct i915_ctx_hang_stats hang_stats; + struct i915_hw_context *private_default_ctx; atomic_t rps_wait_boost; }; @@ -2231,6 +2232,7 @@ i915_gem_obj_ggtt_pin(struct drm_i915_gem_object *obj, } /* i915_gem_context.c */ +#define ctx_to_ppgtt(ctx) container_of((ctx)->vm, struct i915_hw_ppgtt, base) int __must_check i915_gem_context_init(struct drm_device *dev); void i915_gem_context_fini(struct drm_device *dev); void i915_gem_context_reset(struct drm_device *dev); diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 215a36d..d5d35e2 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -145,7 +145,7 @@ void i915_gem_context_free(struct kref *ctx_ref) /* We refcount even the aliasing PPGTT to keep the code symmetric */ if (USES_ALIASING_PPGTT(ctx->obj->base.dev)) - ppgtt = container_of(ctx->vm, struct i915_hw_ppgtt, base); + ppgtt = ctx_to_ppgtt(ctx); /* XXX: Free up the object before tearing down the address space, in * case we're bound in the PPGTT */ @@ -177,7 +177,7 @@ create_vm_for_ctx(struct drm_device *dev, struct i915_hw_context *ctx) } static struct i915_hw_context * -create_hw_context(struct drm_device *dev, +__create_hw_context(struct drm_device *dev, struct drm_i915_file_private *file_priv) { struct drm_i915_private *dev_priv = dev->dev_private; @@ -211,7 +211,7 @@ create_hw_context(struct drm_device *dev, if (file_priv == NULL) return ctx; - ret = idr_alloc(&file_priv->context_idr, ctx, DEFAULT_CONTEXT_ID + 1, 0, + ret = idr_alloc(&file_priv->context_idr, ctx, DEFAULT_CONTEXT_ID, 0, GFP_KERNEL); if (ret < 0) goto err_out; @@ -232,8 +232,7 @@ err_out: static inline bool is_default_context(struct i915_hw_context *ctx) { - /* Cheap trick to determine default contexts */ - return ctx->file_priv ? false : true; + return (ctx->id == DEFAULT_CONTEXT_ID); } /** @@ -242,9 +241,9 @@ static inline bool is_default_context(struct i915_hw_context *ctx) * well as an idle case. */ static struct i915_hw_context * -create_default_context(struct drm_device *dev, - struct drm_i915_file_private *file_priv, - bool create_vm) +i915_gem_create_context(struct drm_device *dev, + struct drm_i915_file_private *file_priv, + bool create_vm) { struct drm_i915_private *dev_priv = dev->dev_private; struct i915_hw_context *ctx; @@ -252,10 +251,7 @@ create_default_context(struct drm_device *dev, BUG_ON(!mutex_is_locked(&dev->struct_mutex)); - /* Not yet supported */ - BUG_ON(file_priv); - - ctx = create_hw_context(dev, file_priv); + ctx = __create_hw_context(dev, file_priv); if (IS_ERR(ctx)) return ctx; @@ -263,7 +259,8 @@ create_default_context(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt = create_vm_for_ctx(dev, ctx); if (IS_ERR_OR_NULL(ppgtt)) { - DRM_ERROR("PPGTT setup failed (%ld)\n", PTR_ERR(ppgtt)); + DRM_DEBUG_DRIVER("PPGTT setup failed (%ld)\n", + PTR_ERR(ppgtt)); ret = PTR_ERR(ppgtt); goto err_destroy; } else @@ -378,7 +375,7 @@ int i915_gem_context_init(struct drm_device *dev) } dev_priv->ring[RCS].default_context = - create_default_context(dev, NULL, USES_ALIASING_PPGTT(dev)); + i915_gem_create_context(dev, NULL, USES_ALIASING_PPGTT(dev)); if (IS_ERR_OR_NULL(dev_priv->ring[RCS].default_context)) { DRM_DEBUG_DRIVER("Disabling HW Contexts; create failed %ld\n", @@ -480,7 +477,9 @@ static int context_idr_cleanup(int id, void *p, void *data) { struct i915_hw_context *ctx = p; - BUG_ON(id == DEFAULT_CONTEXT_ID); + /* Ignore the default context because close will handle it */ + if (is_default_context(ctx)) + return 0; i915_gem_context_unreference(ctx); return 0; @@ -516,6 +515,16 @@ int i915_gem_context_open(struct drm_device *dev, struct drm_file *file) idr_init(&file_priv->context_idr); + mutex_lock(&dev->struct_mutex); + file_priv->private_default_ctx = + i915_gem_create_context(dev, file_priv, false); + mutex_unlock(&dev->struct_mutex); + + if (IS_ERR(file_priv->private_default_ctx)) { + idr_destroy(&file_priv->context_idr); + return PTR_ERR(file_priv->private_default_ctx); + } + return 0; } @@ -528,6 +537,7 @@ void i915_gem_context_close(struct drm_device *dev, struct drm_file *file) mutex_lock(&dev->struct_mutex); idr_for_each(&file_priv->context_idr, context_idr_cleanup, NULL); + i915_gem_context_unreference(file_priv->private_default_ctx); idr_destroy(&file_priv->context_idr); mutex_unlock(&dev->struct_mutex); } @@ -702,21 +712,18 @@ int i915_switch_context(struct intel_ring_buffer *ring, struct drm_i915_private *dev_priv = ring->dev->dev_private; struct i915_hw_context *to; + WARN_ON(!mutex_is_locked(&dev_priv->dev->struct_mutex)); + if (!HAS_HW_CONTEXTS(ring->dev)) return 0; - WARN_ON(!mutex_is_locked(&dev_priv->dev->struct_mutex)); - - if (to_id == DEFAULT_CONTEXT_ID) { + if (file == NULL) to = ring->default_context; - } else { - if (file == NULL) - return -EINVAL; - + else to = i915_gem_context_get(file->driver_priv, to_id); - if (to == NULL) - return -ENOENT; - } + + if (to == NULL) + return -ENOENT; return do_switch(ring, to); } @@ -739,7 +746,7 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, if (ret) return ret; - ctx = create_hw_context(dev, file_priv); + ctx = i915_gem_create_context(dev, file_priv, false); mutex_unlock(&dev->struct_mutex); if (IS_ERR(ctx)) return PTR_ERR(ctx); @@ -761,6 +768,9 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data, if (!(dev->driver->driver_features & DRIVER_GEM)) return -ENODEV; + if (args->ctx_id == DEFAULT_CONTEXT_ID) + return -EPERM; + ret = i915_mutex_lock_interruptible(dev); if (ret) return ret; -- cgit v1.1 From c482972a086e03e6a6d27e4f7af2d868bf659648 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Fri, 6 Dec 2013 14:11:20 -0800 Subject: drm/i915: Piggy back hangstats off of contexts To simplify the codepaths somewhat, we can simply always create a context. Contexts already keep hangstat information. This prevents us from having to differentiate at other parts in the code. There is allocation overhead, but it should not be measurable. Signed-off-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 7 ++++--- drivers/gpu/drm/i915/i915_gem.c | 2 +- drivers/gpu/drm/i915/i915_gem_context.c | 26 ++++++++++++++------------ 3 files changed, 19 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index b8f187a..c1adb82 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1757,7 +1757,6 @@ struct drm_i915_file_private { } mm; struct idr context_idr; - struct i915_ctx_hang_stats hang_stats; struct i915_hw_context *private_default_ctx; atomic_t rps_wait_boost; }; @@ -2244,12 +2243,14 @@ int i915_switch_context(struct intel_ring_buffer *ring, void i915_gem_context_free(struct kref *ctx_ref); static inline void i915_gem_context_reference(struct i915_hw_context *ctx) { - kref_get(&ctx->ref); + if (ctx->obj && HAS_HW_CONTEXTS(ctx->obj->base.dev)) + kref_get(&ctx->ref); } static inline void i915_gem_context_unreference(struct i915_hw_context *ctx) { - kref_put(&ctx->ref, i915_gem_context_free); + if (ctx->obj && HAS_HW_CONTEXTS(ctx->obj->base.dev)) + kref_put(&ctx->ref, i915_gem_context_free); } struct i915_ctx_hang_stats * __must_check diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 427596b..42647c3 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2323,7 +2323,7 @@ static void i915_set_reset_status(struct intel_ring_buffer *ring, if (request->ctx && request->ctx->id != DEFAULT_CONTEXT_ID) hs = &request->ctx->hang_stats; else if (request->file_priv) - hs = &request->file_priv->hang_stats; + hs = &request->file_priv->private_default_ctx->hang_stats; if (hs) { if (guilty) { diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index d5d35e2..192a259 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -490,15 +490,8 @@ i915_gem_context_get_hang_stats(struct drm_device *dev, struct drm_file *file, u32 id) { - struct drm_i915_file_private *file_priv = file->driver_priv; struct i915_hw_context *ctx; - if (id == DEFAULT_CONTEXT_ID) - return &file_priv->hang_stats; - - if (!HAS_HW_CONTEXTS(dev)) - return ERR_PTR(-ENOENT); - ctx = i915_gem_context_get(file->driver_priv, id); if (ctx == NULL) return ERR_PTR(-ENOENT); @@ -509,9 +502,15 @@ i915_gem_context_get_hang_stats(struct drm_device *dev, int i915_gem_context_open(struct drm_device *dev, struct drm_file *file) { struct drm_i915_file_private *file_priv = file->driver_priv; + struct drm_i915_private *dev_priv = dev->dev_private; - if (!HAS_HW_CONTEXTS(dev)) + if (!HAS_HW_CONTEXTS(dev)) { + /* Cheat for hang stats */ + file_priv->private_default_ctx = + kzalloc(sizeof(struct i915_hw_context), GFP_KERNEL); + file_priv->private_default_ctx->vm = &dev_priv->gtt.base; return 0; + } idr_init(&file_priv->context_idr); @@ -532,8 +531,10 @@ void i915_gem_context_close(struct drm_device *dev, struct drm_file *file) { struct drm_i915_file_private *file_priv = file->driver_priv; - if (!HAS_HW_CONTEXTS(dev)) + if (!HAS_HW_CONTEXTS(dev)) { + kfree(file_priv->private_default_ctx); return; + } mutex_lock(&dev->struct_mutex); idr_for_each(&file_priv->context_idr, context_idr_cleanup, NULL); @@ -714,9 +715,6 @@ int i915_switch_context(struct intel_ring_buffer *ring, WARN_ON(!mutex_is_locked(&dev_priv->dev->struct_mutex)); - if (!HAS_HW_CONTEXTS(ring->dev)) - return 0; - if (file == NULL) to = ring->default_context; else @@ -725,6 +723,10 @@ int i915_switch_context(struct intel_ring_buffer *ring, if (to == NULL) return -ENOENT; + /* We have the fake context, but don't supports switching. */ + if (!HAS_HW_CONTEXTS(ring->dev)) + return 0; + return do_switch(ring, to); } -- cgit v1.1 From 41bde5535a7d48876095926bb55b1aed5ccd6b2c Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Fri, 6 Dec 2013 14:11:21 -0800 Subject: drm/i915: Get context early in execbuf We need to have the address space when reserving space for the objects. Since the address space and context are tied together, and reserve occurs before context switch (for good reason), we must lookup our context earlier in the process. This leaves some room for optimizations where we no longer need to use ctx_id in certain places. This will be addressed in a subsequent patch. Important tricky bit: Because slow relocations during execbuffer drop struct_mutex Perhaps it would be best to acquire the reference when we get the context, but I'll save that for another day (note I have written the patch before, and I found the changes required to be uglier than this). Note that since we currently access everything via context id, and not the data structure this is fine, though not desirable. The next change attempts to get the context only once via the context ID idr lookup, and as such, the following can happen: CTX-A is created, refcount = 1 CTX-A execbuf, mutex dropped close IOCTL called on CTX-A, refcount = 0 CTX-A resumes in execbuf. v2: Rebased on top of commit b6359918b885da7c7b58c050674278dbd06020ab Author: Mika Kuoppala Date: Wed Oct 30 15:44:16 2013 +0200 drm/i915: add i915_get_reset_stats_ioctl v3: Rebased on top of commit 25b3dfc87bff80317d67ddd2cd4cfb91e6fe7d79 Author: Mika Westerberg Date: Tue Nov 12 11:57:30 2013 +0200 Author: Mika Kuoppala Date: Tue Nov 26 16:14:33 2013 +0200 drm/i915: check context reset stats before relocations Signed-off-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 8 ++---- drivers/gpu/drm/i915/i915_gem.c | 2 +- drivers/gpu/drm/i915/i915_gem_context.c | 32 ++++------------------ drivers/gpu/drm/i915/i915_gem_execbuffer.c | 44 ++++++++++++++++++------------ drivers/gpu/drm/i915/intel_uncore.c | 8 ++++-- 5 files changed, 41 insertions(+), 53 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index c1adb82..4f0b17b 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2239,7 +2239,9 @@ int i915_gem_context_open(struct drm_device *dev, struct drm_file *file); int i915_gem_context_enable(struct drm_i915_private *dev_priv); void i915_gem_context_close(struct drm_device *dev, struct drm_file *file); int i915_switch_context(struct intel_ring_buffer *ring, - struct drm_file *file, int to_id); + struct drm_file *file, struct i915_hw_context *to); +struct i915_hw_context * +i915_gem_context_get(struct drm_i915_file_private *file_priv, u32 id); void i915_gem_context_free(struct kref *ctx_ref); static inline void i915_gem_context_reference(struct i915_hw_context *ctx) { @@ -2253,10 +2255,6 @@ static inline void i915_gem_context_unreference(struct i915_hw_context *ctx) kref_put(&ctx->ref, i915_gem_context_free); } -struct i915_ctx_hang_stats * __must_check -i915_gem_context_get_hang_stats(struct drm_device *dev, - struct drm_file *file, - u32 id); int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file); int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 42647c3..89e2f92 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2799,7 +2799,7 @@ int i915_gpu_idle(struct drm_device *dev) /* Flush everything onto the inactive list. */ for_each_ring(ring, dev_priv, i) { - ret = i915_switch_context(ring, NULL, DEFAULT_CONTEXT_ID); + ret = i915_switch_context(ring, NULL, ring->default_context); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 192a259..d3a17ef 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -96,8 +96,6 @@ #define GEN6_CONTEXT_ALIGN (64<<10) #define GEN7_CONTEXT_ALIGN 4096 -static struct i915_hw_context * -i915_gem_context_get(struct drm_i915_file_private *file_priv, u32 id); static int do_switch(struct intel_ring_buffer *ring, struct i915_hw_context *to); @@ -485,20 +483,6 @@ static int context_idr_cleanup(int id, void *p, void *data) return 0; } -struct i915_ctx_hang_stats * -i915_gem_context_get_hang_stats(struct drm_device *dev, - struct drm_file *file, - u32 id) -{ - struct i915_hw_context *ctx; - - ctx = i915_gem_context_get(file->driver_priv, id); - if (ctx == NULL) - return ERR_PTR(-ENOENT); - - return &ctx->hang_stats; -} - int i915_gem_context_open(struct drm_device *dev, struct drm_file *file) { struct drm_i915_file_private *file_priv = file->driver_priv; @@ -543,9 +527,12 @@ void i915_gem_context_close(struct drm_device *dev, struct drm_file *file) mutex_unlock(&dev->struct_mutex); } -static struct i915_hw_context * +struct i915_hw_context * i915_gem_context_get(struct drm_i915_file_private *file_priv, u32 id) { + if (!HAS_HW_CONTEXTS(file_priv->dev_priv->dev)) + return file_priv->private_default_ctx; + return (struct i915_hw_context *)idr_find(&file_priv->context_idr, id); } @@ -708,20 +695,13 @@ done: */ int i915_switch_context(struct intel_ring_buffer *ring, struct drm_file *file, - int to_id) + struct i915_hw_context *to) { struct drm_i915_private *dev_priv = ring->dev->dev_private; - struct i915_hw_context *to; WARN_ON(!mutex_is_locked(&dev_priv->dev->struct_mutex)); - if (file == NULL) - to = ring->default_context; - else - to = i915_gem_context_get(file->driver_priv, to_id); - - if (to == NULL) - return -ENOENT; + BUG_ON(file && to == NULL); /* We have the fake context, but don't supports switching. */ if (!HAS_HW_CONTEXTS(ring->dev)) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index d608a07..e78c5c0 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -884,22 +884,24 @@ validate_exec_list(struct drm_i915_gem_exec_object2 *exec, return 0; } -static int +static struct i915_hw_context * i915_gem_validate_context(struct drm_device *dev, struct drm_file *file, const u32 ctx_id) { + struct i915_hw_context *ctx = NULL; struct i915_ctx_hang_stats *hs; - hs = i915_gem_context_get_hang_stats(dev, file, ctx_id); - if (IS_ERR(hs)) - return PTR_ERR(hs); + ctx = i915_gem_context_get(file->driver_priv, ctx_id); + if (IS_ERR_OR_NULL(ctx)) + return ctx; + hs = &ctx->hang_stats; if (hs->banned) { DRM_DEBUG("Context %u tried to submit while banned\n", ctx_id); - return -EIO; + return ERR_PTR(-EIO); } - return 0; + return ctx; } static void @@ -975,14 +977,15 @@ static int i915_gem_do_execbuffer(struct drm_device *dev, void *data, struct drm_file *file, struct drm_i915_gem_execbuffer2 *args, - struct drm_i915_gem_exec_object2 *exec, - struct i915_address_space *vm) + struct drm_i915_gem_exec_object2 *exec) { drm_i915_private_t *dev_priv = dev->dev_private; struct eb_vmas *eb; struct drm_i915_gem_object *batch_obj; struct drm_clip_rect *cliprects = NULL; struct intel_ring_buffer *ring; + struct i915_hw_context *ctx; + struct i915_address_space *vm; const u32 ctx_id = i915_execbuffer2_get_context_id(*args); u32 exec_start, exec_len; u32 mask, flags; @@ -1096,11 +1099,18 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, goto pre_mutex_err; } - ret = i915_gem_validate_context(dev, file, ctx_id); - if (ret) { + ctx = i915_gem_validate_context(dev, file, ctx_id); + if (IS_ERR_OR_NULL(ctx)) { mutex_unlock(&dev->struct_mutex); + ret = PTR_ERR(ctx); goto pre_mutex_err; - } + } + + i915_gem_context_reference(ctx); + + /* HACK until we have full PPGTT */ + /* vm = ctx->vm; */ + vm = &dev_priv->gtt.base; eb = eb_create(args); if (eb == NULL) { @@ -1160,7 +1170,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, if (ret) goto err; - ret = i915_switch_context(ring, file, ctx_id); + ret = i915_switch_context(ring, file, ctx); if (ret) goto err; @@ -1215,6 +1225,8 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, i915_gem_execbuffer_retire_commands(dev, file, ring, batch_obj); err: + /* the request owns the ref now */ + i915_gem_context_unreference(ctx); eb_destroy(eb); mutex_unlock(&dev->struct_mutex); @@ -1232,7 +1244,6 @@ int i915_gem_execbuffer(struct drm_device *dev, void *data, struct drm_file *file) { - struct drm_i915_private *dev_priv = dev->dev_private; struct drm_i915_gem_execbuffer *args = data; struct drm_i915_gem_execbuffer2 exec2; struct drm_i915_gem_exec_object *exec_list = NULL; @@ -1288,8 +1299,7 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, exec2.flags = I915_EXEC_RENDER; i915_execbuffer2_set_context_id(exec2, 0); - ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list, - &dev_priv->gtt.base); + ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list); if (!ret) { /* Copy the new buffer offsets back to the user's exec list. */ for (i = 0; i < args->buffer_count; i++) @@ -1315,7 +1325,6 @@ int i915_gem_execbuffer2(struct drm_device *dev, void *data, struct drm_file *file) { - struct drm_i915_private *dev_priv = dev->dev_private; struct drm_i915_gem_execbuffer2 *args = data; struct drm_i915_gem_exec_object2 *exec2_list = NULL; int ret; @@ -1346,8 +1355,7 @@ i915_gem_execbuffer2(struct drm_device *dev, void *data, return -EFAULT; } - ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list, - &dev_priv->gtt.base); + ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list); if (!ret) { /* Copy the new buffer offsets back to the user's exec list. */ ret = copy_to_user(to_user_ptr(args->buffers_ptr), diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index feb2d66..e52fcce 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -836,6 +836,7 @@ int i915_get_reset_stats_ioctl(struct drm_device *dev, struct drm_i915_private *dev_priv = dev->dev_private; struct drm_i915_reset_stats *args = data; struct i915_ctx_hang_stats *hs; + struct i915_hw_context *ctx; int ret; if (args->flags || args->pad) @@ -848,11 +849,12 @@ int i915_get_reset_stats_ioctl(struct drm_device *dev, if (ret) return ret; - hs = i915_gem_context_get_hang_stats(dev, file, args->ctx_id); - if (IS_ERR(hs)) { + ctx = i915_gem_context_get(file->driver_priv, args->ctx_id); + if (IS_ERR(ctx)) { mutex_unlock(&dev->struct_mutex); - return PTR_ERR(hs); + return PTR_ERR(ctx); } + hs = &ctx->hang_stats; if (capable(CAP_SYS_ADMIN)) args->reset_count = i915_reset_count(&dev_priv->gpu_error); -- cgit v1.1 From e20780439b26ba95aeb29d3e27cd8cc32bc82a4c Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Fri, 6 Dec 2013 14:11:22 -0800 Subject: drm/i915: Defer request freeing With context destruction, we always want to be able to tear down the underlying address space. This is invoked on the last unreference to the context which could happen before we've moved all objects to the inactive list. To enable a clean tear down the address space, make sure to process the request free lastly. Without this change, we cannot guarantee to we don't still have active objects in the VM. As an example of a failing case: CTX-A is created, count=1 CTX-A is used during execbuf does a context switch count = 2 and add_request count = 3 CTX B runs, switches, CTX-A count = 2 CTX-A is destroyed, count = 1 retire requests is called free_request from CTX-A, count = 0 <--- free context with active object As mentioned above, by doing the free request after processing the active list, we can avoid this case. Signed-off-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 89e2f92..99c05e3 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2423,6 +2423,8 @@ void i915_gem_reset(struct drm_device *dev) void i915_gem_retire_requests_ring(struct intel_ring_buffer *ring) { + LIST_HEAD(deferred_request_free); + struct drm_i915_gem_request *request; uint32_t seqno; if (list_empty(&ring->request_list)) @@ -2433,8 +2435,6 @@ i915_gem_retire_requests_ring(struct intel_ring_buffer *ring) seqno = ring->get_seqno(ring, true); while (!list_empty(&ring->request_list)) { - struct drm_i915_gem_request *request; - request = list_first_entry(&ring->request_list, struct drm_i915_gem_request, list); @@ -2450,7 +2450,7 @@ i915_gem_retire_requests_ring(struct intel_ring_buffer *ring) */ ring->last_retired_head = request->tail; - i915_gem_free_request(request); + list_move_tail(&request->list, &deferred_request_free); } /* Move any buffers on the active list that are no longer referenced @@ -2475,6 +2475,13 @@ i915_gem_retire_requests_ring(struct intel_ring_buffer *ring) ring->trace_irq_seqno = 0; } + /* Finish processing active list before freeing request */ + while (!list_empty(&deferred_request_free)) { + request = list_first_entry(&deferred_request_free, + struct drm_i915_gem_request, + list); + i915_gem_free_request(request); + } WARN_ON(i915_verify_lists(ring->dev)); } -- cgit v1.1 From 679845ede0a67a7a7492b28dbd0e11d2a45eda61 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Fri, 6 Dec 2013 14:11:23 -0800 Subject: drm/i915: Clean up VMAs before freeing It's quite common for an object to simply be on the inactive list (and not unbound) when we want to free the context. This of course happens with lazy unbinding. Simply, this is needed when an object isn't fully unbound but we want to free one VMA of the object, for whatever reason. NOTE: The aliasing PPGTT is not a proper VM, so it needs special casing. This addresses the fixup requirement mentioned in: drm/915: Better reset handling for contexts In the flink, and dmabuf case, we can't assert that the object isn't still active. To keep it more generic, just check the vma's link in the object vma list. If we wanted to do a better job, we could track last seqno (and active) per VMA. It was decided not to do this in the last iteration. Unfortunately this means the assertion can miss real bugs when using flink/dmabuf. v2: Use the newer introduced i915_gem_evict_vm(). Note that handling the aliasing PPGTT is special. Signed-off-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 52 +++++++++++++++++++++++++++++++---------- 1 file changed, 40 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 4f0b17b..b10d466 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2260,6 +2260,17 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data, struct drm_file *file); +/* i915_gem_evict.c */ +int __must_check i915_gem_evict_something(struct drm_device *dev, + struct i915_address_space *vm, + int min_size, + unsigned alignment, + unsigned cache_level, + bool mappable, + bool nonblock); +int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle); +int i915_gem_evict_everything(struct drm_device *dev); + /* i915_gem_gtt.c */ void i915_check_and_clear_faults(struct drm_device *dev); void i915_gem_suspend_gtt_mappings(struct drm_device *dev); @@ -2297,22 +2308,39 @@ static inline bool intel_enable_ppgtt(struct drm_device *dev, bool full) static inline void ppgtt_release(struct kref *kref) { struct i915_hw_ppgtt *ppgtt = container_of(kref, struct i915_hw_ppgtt, ref); + struct drm_device *dev = ppgtt->base.dev; + struct drm_i915_private *dev_priv = dev->dev_private; + struct i915_address_space *vm = &ppgtt->base; + + if (ppgtt == dev_priv->mm.aliasing_ppgtt || + (list_empty(&vm->active_list) && list_empty(&vm->inactive_list))) { + ppgtt->base.cleanup(&ppgtt->base); + return; + } + + /* + * Make sure vmas are unbound before we take down the drm_mm + * + * FIXME: Proper refcounting should take care of this, this shouldn't be + * needed at all. + */ + if (!list_empty(&vm->active_list)) { + struct i915_vma *vma; + + list_for_each_entry(vma, &vm->active_list, mm_list) + if (WARN_ON(list_empty(&vma->vma_link) || + list_is_singular(&vma->vma_link))) + break; + + i915_gem_evict_vm(&ppgtt->base, true); + } else { + i915_gem_retire_requests(dev); + i915_gem_evict_vm(&ppgtt->base, false); + } ppgtt->base.cleanup(&ppgtt->base); } - -/* i915_gem_evict.c */ -int __must_check i915_gem_evict_something(struct drm_device *dev, - struct i915_address_space *vm, - int min_size, - unsigned alignment, - unsigned cache_level, - bool mappable, - bool nonblock); -int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle); -int i915_gem_evict_everything(struct drm_device *dev); - /* i915_gem_stolen.c */ int i915_gem_init_stolen(struct drm_device *dev); int i915_gem_stolen_setup_compression(struct drm_device *dev, int size); -- cgit v1.1 From 4fe9adbc36097317864bfec3c32047da7c45a2fa Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Fri, 6 Dec 2013 14:11:24 -0800 Subject: drm/i915: Do not allow buffers at offset 0 This is primarily a band aid for an unexplainable error in gem_reloc_vs_gpu/forked-faulting-reloc-thrashing. Essentially as soon as a relocated buffer (which had a non-zero presumed offset) moved to offset 0, something goes bad. Since I have been unable to solve this, and potentially this is a good thing to do anyway, since many things can accidentally write to offset 0, why not? Signed-off-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 99c05e3..0572257 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3280,9 +3280,11 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, WARN_ON(!list_is_singular(&obj->vma_list)); search_free: + /* FIXME: Some tests are failing when they receive a reloc of 0. To + * prevent this, we simply don't allow the 0th offset. */ ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node, size, alignment, - obj->cache_level, 0, gtt_max, + obj->cache_level, 1, gtt_max, DRM_MM_SEARCH_DEFAULT); if (ret) { ret = i915_gem_evict_something(dev, vm, size, alignment, -- cgit v1.1 From 7e0d96bc03c140cb8183955ad6f0290caa731e64 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Fri, 6 Dec 2013 14:11:26 -0800 Subject: drm/i915: Use multiple VMs -- the point of no return As with processes which run on the CPU, the goal of multiple VMs is to provide process isolation. Specific to GEN, there is also the ability to map more objects per process (2GB each instead of 2Gb-2k total). For the most part, all the pipes have been laid, and all we need to do is remove asserts and actually start changing address spaces with the context switch. Since prior to this we've converted the setting of the page tables to a streamed version, this is quite easy. One important thing to point out (since it'd been hotly contested) is that with this patch, every context created will have it's own address space (provided the HW can do it). v2: Disable BDW on rebase NOTE: I tried to make this commit as small as possible. I needed one place where I could "turn everything on" and that is here. It could be split into finer commits, but I didn't really see much point. Cc: Eric Anholt Cc: Daniel Vetter Signed-off-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_dma.c | 3 ++ drivers/gpu/drm/i915/i915_drv.c | 3 +- drivers/gpu/drm/i915/i915_drv.h | 12 +++++- drivers/gpu/drm/i915/i915_gem.c | 22 ++++------- drivers/gpu/drm/i915/i915_gem_context.c | 60 +++++++++++++++++------------- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 16 +++++--- drivers/gpu/drm/i915/i915_gem_gtt.c | 22 ++++++++--- drivers/gpu/drm/i915/i915_gpu_error.c | 5 --- include/uapi/drm/i915_drm.h | 1 + 9 files changed, 86 insertions(+), 58 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 89af75a..9fedfa0 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1011,6 +1011,9 @@ static int i915_getparam(struct drm_device *dev, void *data, case I915_PARAM_HAS_EXEC_HANDLE_LUT: value = 1; break; + case I915_PARAM_HAS_FULL_PPGTT: + value = USES_FULL_PPGTT(dev); + break; default: DRM_DEBUG("Unknown parameter %d\n", param->param); return -EINVAL; diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 65b5c83..6cdaa78 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -116,7 +116,8 @@ MODULE_PARM_DESC(enable_hangcheck, int i915_enable_ppgtt __read_mostly = -1; module_param_named(i915_enable_ppgtt, i915_enable_ppgtt, int, 0400); MODULE_PARM_DESC(i915_enable_ppgtt, - "Enable PPGTT (default: true)"); + "Override PPGTT usage. " + "(-1=auto [default], 0=disabled, 1=aliasing, 2=full)"); int i915_enable_psr __read_mostly = 0; module_param_named(enable_psr, i915_enable_psr, int, 0600); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index a70d9c8..8fd99ac 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1831,7 +1831,9 @@ struct drm_i915_file_private { #define HAS_HW_CONTEXTS(dev) (INTEL_INFO(dev)->gen >= 6) #define HAS_ALIASING_PPGTT(dev) (INTEL_INFO(dev)->gen >= 6 && !IS_VALLEYVIEW(dev)) +#define HAS_PPGTT(dev) (INTEL_INFO(dev)->gen >= 7 && !IS_VALLEYVIEW(dev) && !IS_BROADWELL(dev)) #define USES_ALIASING_PPGTT(dev) intel_enable_ppgtt(dev, false) +#define USES_FULL_PPGTT(dev) intel_enable_ppgtt(dev, true) #define HAS_OVERLAY(dev) (INTEL_INFO(dev)->has_overlay) #define OVERLAY_NEEDS_PHYSICAL(dev) (INTEL_INFO(dev)->overlay_needs_physical) @@ -2012,6 +2014,8 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, const struct drm_i915_gem_object_ops *ops); struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, size_t size); +void i915_init_vm(struct drm_i915_private *dev_priv, + struct i915_address_space *vm); void i915_gem_free_object(struct drm_gem_object *obj); void i915_gem_vma_destroy(struct i915_vma *vma); @@ -2290,7 +2294,8 @@ static inline bool intel_enable_ppgtt(struct drm_device *dev, bool full) if (i915_enable_ppgtt == 0 || !HAS_ALIASING_PPGTT(dev)) return false; - BUG_ON(full); + if (i915_enable_ppgtt == 1 && full) + return false; #ifdef CONFIG_INTEL_IOMMU /* Disable ppgtt on SNB if VT-d is on. */ @@ -2300,7 +2305,10 @@ static inline bool intel_enable_ppgtt(struct drm_device *dev, bool full) } #endif - return HAS_ALIASING_PPGTT(dev); + if (full) + return HAS_PPGTT(dev); + else + return HAS_ALIASING_PPGTT(dev); } static inline void ppgtt_release(struct kref *kref) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index a9cabff..f3b0025 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2247,7 +2247,10 @@ request_to_vm(struct drm_i915_gem_request *request) struct drm_i915_private *dev_priv = request->ring->dev->dev_private; struct i915_address_space *vm; - vm = &dev_priv->gtt.base; + if (request->ctx) + vm = request->ctx->vm; + else + vm = &dev_priv->gtt.base; return vm; } @@ -2718,9 +2721,6 @@ int i915_vma_unbind(struct i915_vma *vma) drm_i915_private_t *dev_priv = obj->base.dev->dev_private; int ret; - /* For now we only ever use 1 vma per object */ - WARN_ON(!list_is_singular(&obj->vma_list)); - if (list_empty(&vma->vma_link)) return 0; @@ -3268,17 +3268,12 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, i915_gem_object_pin_pages(obj); - BUG_ON(!i915_is_ggtt(vm)); - vma = i915_gem_obj_lookup_or_create_vma(obj, vm); if (IS_ERR(vma)) { ret = PTR_ERR(vma); goto err_unpin; } - /* For now we only ever use 1 vma per object */ - WARN_ON(!list_is_singular(&obj->vma_list)); - search_free: /* FIXME: Some tests are failing when they receive a reloc of 0. To * prevent this, we simply don't allow the 0th offset. */ @@ -4182,9 +4177,6 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj) if (obj->phys_obj) i915_gem_detach_phys_object(dev, obj); - /* NB: 0 or 1 elements */ - WARN_ON(!list_empty(&obj->vma_list) && - !list_is_singular(&obj->vma_list)); list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) { int ret; @@ -4580,9 +4572,11 @@ init_ring_lists(struct intel_ring_buffer *ring) INIT_LIST_HEAD(&ring->request_list); } -static void i915_init_vm(struct drm_i915_private *dev_priv, - struct i915_address_space *vm) +void i915_init_vm(struct drm_i915_private *dev_priv, + struct i915_address_space *vm) { + if (!i915_is_ggtt(vm)) + drm_mm_init(&vm->mm, vm->start, vm->total); vm->dev = dev_priv->dev; INIT_LIST_HEAD(&vm->active_list); INIT_LIST_HEAD(&vm->inactive_list); diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 165a5c7..ebe0f67 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -288,17 +288,15 @@ i915_gem_create_context(struct drm_device *dev, DRM_DEBUG_DRIVER("Couldn't pin %d\n", ret); goto err_destroy; } + + ctx->vm = &dev_priv->mm.aliasing_ppgtt->base; } } else if (USES_ALIASING_PPGTT(dev)) { /* For platforms which only have aliasing PPGTT, we fake the * address space and refcounting. */ - kref_get(&dev_priv->mm.aliasing_ppgtt->ref); - } - - /* TODO: Until full ppgtt... */ - if (USES_ALIASING_PPGTT(dev)) ctx->vm = &dev_priv->mm.aliasing_ppgtt->base; - else + kref_get(&dev_priv->mm.aliasing_ppgtt->ref); + } else ctx->vm = &dev_priv->gtt.base; return ctx; @@ -500,7 +498,7 @@ int i915_gem_context_open(struct drm_device *dev, struct drm_file *file) mutex_lock(&dev->struct_mutex); file_priv->private_default_ctx = - i915_gem_create_context(dev, file_priv, false); + i915_gem_create_context(dev, file_priv, USES_FULL_PPGTT(dev)); mutex_unlock(&dev->struct_mutex); if (IS_ERR(file_priv->private_default_ctx)) { @@ -587,6 +585,7 @@ static int do_switch(struct intel_ring_buffer *ring, { struct drm_i915_private *dev_priv = ring->dev->dev_private; struct i915_hw_context *from = ring->last_context; + struct i915_hw_ppgtt *ppgtt = ctx_to_ppgtt(to); u32 hw_flags = 0; int ret, i; @@ -598,17 +597,15 @@ static int do_switch(struct intel_ring_buffer *ring, if (from == to && from->last_ring == ring && !to->remap_slice) return 0; - if (ring != &dev_priv->ring[RCS]) { - if (from) - i915_gem_context_unreference(from); - goto done; + /* Trying to pin first makes error handling easier. */ + if (ring == &dev_priv->ring[RCS]) { + ret = i915_gem_obj_ggtt_pin(to->obj, + get_context_alignment(ring->dev), + false, false); + if (ret) + return ret; } - ret = i915_gem_obj_ggtt_pin(to->obj, get_context_alignment(ring->dev), - false, false); - if (ret) - return ret; - /* * Pin can switch back to the default context if we end up calling into * evict_everything - as a last ditch gtt defrag effort that also @@ -616,6 +613,18 @@ static int do_switch(struct intel_ring_buffer *ring, */ from = ring->last_context; + if (USES_FULL_PPGTT(ring->dev)) { + ret = ppgtt->switch_mm(ppgtt, ring, false); + if (ret) + goto unpin_out; + } + + if (ring != &dev_priv->ring[RCS]) { + if (from) + i915_gem_context_unreference(from); + goto done; + } + /* * Clear this page out of any CPU caches for coherent swap-in/out. Note * that thanks to write = false in this call and us not setting any gpu @@ -625,10 +634,8 @@ static int do_switch(struct intel_ring_buffer *ring, * XXX: We need a real interface to do this instead of trickery. */ ret = i915_gem_object_set_to_gtt_domain(to->obj, false); - if (ret) { - i915_gem_object_ggtt_unpin(to->obj); - return ret; - } + if (ret) + goto unpin_out; if (!to->obj->has_global_gtt_mapping) { struct i915_vma *vma = i915_gem_obj_to_vma(to->obj, @@ -640,10 +647,8 @@ static int do_switch(struct intel_ring_buffer *ring, hw_flags |= MI_RESTORE_INHIBIT; ret = mi_set_context(ring, to, hw_flags); - if (ret) { - i915_gem_object_ggtt_unpin(to->obj); - return ret; - } + if (ret) + goto unpin_out; for (i = 0; i < MAX_L3_SLICES; i++) { if (!(to->remap_slice & (1<last_ring = ring; return 0; + +unpin_out: + if (ring->id == RCS) + i915_gem_object_ggtt_unpin(to->obj); + return ret; } /** @@ -736,7 +746,7 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, if (ret) return ret; - ctx = i915_gem_create_context(dev, file_priv, false); + ctx = i915_gem_create_context(dev, file_priv, USES_FULL_PPGTT(dev)); mutex_unlock(&dev->struct_mutex); if (IS_ERR(ctx)) return PTR_ERR(ctx); diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 8779d75..2e80f8c 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -991,7 +991,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, struct i915_hw_context *ctx; struct i915_address_space *vm; const u32 ctx_id = i915_execbuffer2_get_context_id(*args); - u32 exec_start, exec_len; + u32 exec_start = args->batch_start_offset, exec_len; u32 mask, flags; int ret, mode, i; bool need_relocs; @@ -1112,9 +1112,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, i915_gem_context_reference(ctx); - /* HACK until we have full PPGTT */ - /* vm = ctx->vm; */ - vm = &dev_priv->gtt.base; + vm = ctx->vm; + if (!USES_FULL_PPGTT(dev)) + vm = &dev_priv->gtt.base; eb = eb_create(args); if (eb == NULL) { @@ -1170,6 +1170,11 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, vma->bind_vma(vma, batch_obj->cache_level, GLOBAL_BIND); } + if (flags & I915_DISPATCH_SECURE) + exec_start += i915_gem_obj_ggtt_offset(batch_obj); + else + exec_start += i915_gem_obj_offset(batch_obj, vm); + ret = i915_gem_execbuffer_move_to_gpu(ring, &eb->vmas); if (ret) goto err; @@ -1199,8 +1204,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, goto err; } - exec_start = i915_gem_obj_offset(batch_obj, vm) + - args->batch_start_offset; + exec_len = args->batch_len; if (cliprects) { for (i = 0; i < args->num_cliprects; i++) { diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 88e49b1..4143efd 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -324,6 +324,7 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm) container_of(vm, struct i915_hw_ppgtt, base); int i, j; + list_del(&vm->global_link); drm_mm_takedown(&vm->mm); for (i = 0; i < ppgtt->num_pd_pages ; i++) { @@ -755,6 +756,7 @@ static void gen6_ppgtt_cleanup(struct i915_address_space *vm) container_of(vm, struct i915_hw_ppgtt, base); int i; + list_del(&vm->global_link); drm_mm_takedown(&ppgtt->base.mm); drm_mm_remove_node(&ppgtt->node); @@ -901,17 +903,22 @@ int i915_gem_init_ppgtt(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt) BUG(); if (!ret) { + struct drm_i915_private *dev_priv = dev->dev_private; kref_init(&ppgtt->ref); drm_mm_init(&ppgtt->base.mm, ppgtt->base.start, ppgtt->base.total); - if (INTEL_INFO(dev)->gen < 8) + i915_init_vm(dev_priv, &ppgtt->base); + if (INTEL_INFO(dev)->gen < 8) { gen6_write_pdes(ppgtt); + DRM_DEBUG("Adding PPGTT at offset %x\n", + ppgtt->pd_offset << 10); + } } return ret; } -static void __always_unused +static void ppgtt_bind_vma(struct i915_vma *vma, enum i915_cache_level cache_level, u32 flags) @@ -923,7 +930,7 @@ ppgtt_bind_vma(struct i915_vma *vma, vma->vm->insert_entries(vma->vm, vma->obj->pages, entry, cache_level); } -static void __always_unused ppgtt_unbind_vma(struct i915_vma *vma) +static void ppgtt_unbind_vma(struct i915_vma *vma) { const unsigned long entry = vma->node.start >> PAGE_SHIFT; @@ -1719,8 +1726,13 @@ static struct i915_vma *__i915_gem_vma_create(struct drm_i915_gem_object *obj, case 8: case 7: case 6: - vma->unbind_vma = ggtt_unbind_vma; - vma->bind_vma = ggtt_bind_vma; + if (i915_is_ggtt(vm)) { + vma->unbind_vma = ggtt_unbind_vma; + vma->bind_vma = ggtt_bind_vma; + } else { + vma->unbind_vma = ppgtt_unbind_vma; + vma->bind_vma = ppgtt_bind_vma; + } break; case 5: case 4: diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 5dede92..80773ec 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -909,11 +909,6 @@ static void i915_gem_capture_buffers(struct drm_i915_private *dev_priv, list_for_each_entry(vm, &dev_priv->vm_list, global_link) cnt++; - if (WARN(cnt > 1, "Multiple VMs not yet supported\n")) - cnt = 1; - - vm = &dev_priv->gtt.base; - error->active_bo = kcalloc(cnt, sizeof(*error->active_bo), GFP_ATOMIC); error->pinned_bo = kcalloc(cnt, sizeof(*error->pinned_bo), GFP_ATOMIC); error->active_bo_count = kcalloc(cnt, sizeof(*error->active_bo_count), diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 52aed89..d5b5284 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -337,6 +337,7 @@ typedef struct drm_i915_irq_wait { #define I915_PARAM_HAS_EXEC_NO_RELOC 25 #define I915_PARAM_HAS_EXEC_HANDLE_LUT 26 #define I915_PARAM_HAS_WT 27 +#define I915_PARAM_HAS_FULL_PPGTT 28 typedef struct drm_i915_getparam { int param; -- cgit v1.1 From d2ff7192f3cd77fcace0adf99a47ff5e30d6e0d3 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Fri, 6 Dec 2013 14:11:27 -0800 Subject: drm/i915: Remove extraneous mm_switch in ppgtt enable Originally this commit message said: Now that do_switch does the mm switch, and we always enable the aliasing PPGTT, and contexts at the same time, there is no need to continue doing this during PPGTT enabling. Since originally writing the patch however, I introduced the concept of synchronous mm switching (using MMIO). Since this is generally not recommended in the spec (for reasons unknown), I've isolated its usage as much as possible. As such the "extraneous" switch only ever will occur when we have full PPGTT. Signed-off-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_gtt.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 4143efd..ece9d8e 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -612,6 +612,12 @@ static int gen8_ppgtt_enable(struct i915_hw_ppgtt *ppgtt) for_each_ring(ring, dev_priv, j) { I915_WRITE(RING_MODE_GEN7(ring), _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); + + /* We promise to do a switch later with FULL PPGTT. If this is + * aliasing, this is the one and only switch we'll do */ + if (USES_FULL_PPGTT(dev)) + continue; + ret = ppgtt->switch_mm(ppgtt, ring, true); if (ret) goto err_out; @@ -651,11 +657,17 @@ static int gen7_ppgtt_enable(struct i915_hw_ppgtt *ppgtt) /* GFX_MODE is per-ring on gen7+ */ I915_WRITE(RING_MODE_GEN7(ring), _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); + + /* We promise to do a switch later with FULL PPGTT. If this is + * aliasing, this is the one and only switch we'll do */ + if (USES_FULL_PPGTT(dev)) + continue; + ret = ppgtt->switch_mm(ppgtt, ring, true); if (ret) return ret; - } + return 0; } -- cgit v1.1 From 87d60b63e0371529faaed0667d457e5022964010 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Fri, 6 Dec 2013 14:11:29 -0800 Subject: drm/i915: Add PPGTT dumper Dump the aliasing PPGTT with it. The aliasing PPGTT should actually always be empty. TODO: Broadwell. Since we don't yet use full PPGTT on Broadwell, not having the dumper is okay. Signed-off-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_debugfs.c | 1 + drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gem_gtt.c | 57 +++++++++++++++++++++++++++++++++++++ 3 files changed, 59 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 4c610ee..6a98b64 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1704,6 +1704,7 @@ static void gen6_ppgtt_info(struct seq_file *m, struct drm_device *dev) seq_puts(m, "aliasing PPGTT:\n"); seq_printf(m, "pd gtt offset: 0x%08x\n", ppgtt->pd_offset); + ppgtt->debug_dump(ppgtt, m); } seq_printf(m, "ECOCHK: 0x%08x\n", I915_READ(GAM_ECOCHK)); } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 8fd99ac..aab400c 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -678,6 +678,7 @@ struct i915_hw_ppgtt { int (*switch_mm)(struct i915_hw_ppgtt *ppgtt, struct intel_ring_buffer *ring, bool synchronous); + void (*debug_dump)(struct i915_hw_ppgtt *ppgtt, struct seq_file *m); }; struct i915_ctx_hang_stats { diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index ece9d8e..998f9a0 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -460,6 +460,62 @@ err_out: return ret; } +static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) +{ + struct drm_i915_private *dev_priv = ppgtt->base.dev->dev_private; + struct i915_address_space *vm = &ppgtt->base; + gen6_gtt_pte_t __iomem *pd_addr; + gen6_gtt_pte_t scratch_pte; + uint32_t pd_entry; + int pte, pde; + + scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true); + + pd_addr = (gen6_gtt_pte_t __iomem *)dev_priv->gtt.gsm + + ppgtt->pd_offset / sizeof(gen6_gtt_pte_t); + + seq_printf(m, " VM %p (pd_offset %x-%x):\n", vm, + ppgtt->pd_offset, ppgtt->pd_offset + ppgtt->num_pd_entries); + for (pde = 0; pde < ppgtt->num_pd_entries; pde++) { + u32 expected; + gen6_gtt_pte_t *pt_vaddr; + dma_addr_t pt_addr = ppgtt->pt_dma_addr[pde]; + pd_entry = readl(pd_addr + pde); + expected = (GEN6_PDE_ADDR_ENCODE(pt_addr) | GEN6_PDE_VALID); + + if (pd_entry != expected) + seq_printf(m, "\tPDE #%d mismatch: Actual PDE: %x Expected PDE: %x\n", + pde, + pd_entry, + expected); + seq_printf(m, "\tPDE: %x\n", pd_entry); + + pt_vaddr = kmap_atomic(ppgtt->pt_pages[pde]); + for (pte = 0; pte < I915_PPGTT_PT_ENTRIES; pte+=4) { + unsigned long va = + (pde * PAGE_SIZE * I915_PPGTT_PT_ENTRIES) + + (pte * PAGE_SIZE); + int i; + bool found = false; + for (i = 0; i < 4; i++) + if (pt_vaddr[pte + i] != scratch_pte) + found = true; + if (!found) + continue; + + seq_printf(m, "\t\t0x%lx [%03d,%04d]: =", va, pde, pte); + for (i = 0; i < 4; i++) { + if (pt_vaddr[pte + i] != scratch_pte) + seq_printf(m, " %08x", pt_vaddr[pte + i]); + else + seq_puts(m, " SCRATCH "); + } + seq_puts(m, "\n"); + } + kunmap_atomic(pt_vaddr); + } +} + static void gen6_write_pdes(struct i915_hw_ppgtt *ppgtt) { struct drm_i915_private *dev_priv = ppgtt->base.dev->dev_private; @@ -873,6 +929,7 @@ alloc: ppgtt->base.clear_range(&ppgtt->base, 0, ppgtt->num_pd_entries * I915_PPGTT_PT_ENTRIES, true); + ppgtt->debug_dump = gen6_dump_ppgtt; DRM_DEBUG_DRIVER("Allocated pde space (%ldM) at GTT entry: %lx\n", ppgtt->node.size >> 20, -- cgit v1.1 From 1c60fef535d143860d5bf6593e24ab6417f5227c Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Fri, 6 Dec 2013 14:11:30 -0800 Subject: drm/i915: Dump all ppgtt Signed-off-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_debugfs.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 6a98b64..7273af0a 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1657,6 +1657,17 @@ static int i915_swizzle_info(struct seq_file *m, void *data) return 0; } +static int per_file_ctx(int id, void *ptr, void *data) +{ + struct i915_hw_context *ctx = ptr; + struct seq_file *m = data; + struct i915_hw_ppgtt *ppgtt = ctx_to_ppgtt(ctx); + + ppgtt->debug_dump(ppgtt, m); + + return 0; +} + static void gen8_ppgtt_info(struct seq_file *m, struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; @@ -1686,6 +1697,7 @@ static void gen6_ppgtt_info(struct seq_file *m, struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; struct intel_ring_buffer *ring; + struct drm_file *file; int i; if (INTEL_INFO(dev)->gen == 6) @@ -1704,7 +1716,20 @@ static void gen6_ppgtt_info(struct seq_file *m, struct drm_device *dev) seq_puts(m, "aliasing PPGTT:\n"); seq_printf(m, "pd gtt offset: 0x%08x\n", ppgtt->pd_offset); + ppgtt->debug_dump(ppgtt, m); + } else + return; + + list_for_each_entry_reverse(file, &dev->filelist, lhead) { + struct drm_i915_file_private *file_priv = file->driver_priv; + struct i915_hw_ppgtt *pvt_ppgtt; + + pvt_ppgtt = ctx_to_ppgtt(file_priv->private_default_ctx); + seq_printf(m, "proc: %s\n", + get_pid_task(file->pid, PIDTYPE_PID)->comm); + seq_puts(m, " default context:\n"); + idr_for_each(&file_priv->context_idr, per_file_ctx, m); } seq_printf(m, "ECOCHK: 0x%08x\n", I915_READ(GAM_ECOCHK)); } -- cgit v1.1 From 02f6bcccf7c324115747aae2f0addd6af5d321cd Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 18 Dec 2013 16:30:22 +0100 Subject: drm/i915: Reject the pin ioctl on gen6+ Especially with ppgtt this kinda stopped making sense. And if we indeed need this to hack around an issue, we need something that also works for non-root. Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index f3b0025..9ff3509 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3916,6 +3916,9 @@ i915_gem_pin_ioctl(struct drm_device *dev, void *data, struct drm_i915_gem_object *obj; int ret; + if (INTEL_INFO(dev)->gen >= 6) + return -ENODEV; + ret = i915_mutex_lock_interruptible(dev); if (ret) return ret; -- cgit v1.1 From 7d9c477966e739a52d4c9655149958a2671ef376 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 18 Dec 2013 16:32:00 +0100 Subject: drm/i915: Drop I915_PARAM_HAS_FULL_PPGTT again At least for now userspace has no business at all to know that we switch address spaces around. For any need it has to know whether hw ppgtt is enabled (e.g. to set bits in MI commands correctly) it can inquire the existing ppgtt param. v2: Avoid ternary operator precedence fail (Chris). Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_dma.c | 5 +---- include/uapi/drm/i915_drm.h | 1 - 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 9fedfa0..24a36f2 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -988,7 +988,7 @@ static int i915_getparam(struct drm_device *dev, void *data, value = HAS_WT(dev); break; case I915_PARAM_HAS_ALIASING_PPGTT: - value = dev_priv->mm.aliasing_ppgtt ? 1 : 0; + value = dev_priv->mm.aliasing_ppgtt || USES_FULL_PPGTT(dev); break; case I915_PARAM_HAS_WAIT_TIMEOUT: value = 1; @@ -1011,9 +1011,6 @@ static int i915_getparam(struct drm_device *dev, void *data, case I915_PARAM_HAS_EXEC_HANDLE_LUT: value = 1; break; - case I915_PARAM_HAS_FULL_PPGTT: - value = USES_FULL_PPGTT(dev); - break; default: DRM_DEBUG("Unknown parameter %d\n", param->param); return -EINVAL; diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index d5b5284..52aed89 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -337,7 +337,6 @@ typedef struct drm_i915_irq_wait { #define I915_PARAM_HAS_EXEC_NO_RELOC 25 #define I915_PARAM_HAS_EXEC_HANDLE_LUT 26 #define I915_PARAM_HAS_WT 27 -#define I915_PARAM_HAS_FULL_PPGTT 28 typedef struct drm_i915_getparam { int param; -- cgit v1.1 From 7c9c4b8f5dfe224ce587a470ce8817214c92271e Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 18 Dec 2013 16:37:49 +0100 Subject: drm/i915: Reject non-default contexts on non-render again This reverts the abi-change from commit 67e3d2979be1bf42d1818b2961c671eb31e0b4d9 Author: Ben Widawsky Date: Fri Dec 6 14:11:01 2013 -0800 drm/i915: Permit contexts on all rings We don't actually need this, only the internal changes to allow contexts on all rings for the purpose of ppgtt switching are required. And I'm not sure whether this is the right thing to do given some of the hw features in the pipeline. Also, new abi needs userspace patches as a proof-of-need, which is completely lacking here. Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 2e80f8c..f5a1e0c 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -890,11 +890,14 @@ validate_exec_list(struct drm_i915_gem_exec_object2 *exec, static struct i915_hw_context * i915_gem_validate_context(struct drm_device *dev, struct drm_file *file, - const u32 ctx_id) + struct intel_ring_buffer *ring, const u32 ctx_id) { struct i915_hw_context *ctx = NULL; struct i915_ctx_hang_stats *hs; + if (ring->id != RCS && ctx_id != DEFAULT_CONTEXT_ID) + return ERR_PTR(-EINVAL); + ctx = i915_gem_context_get(file->driver_priv, ctx_id); if (IS_ERR_OR_NULL(ctx)) return ctx; @@ -1103,7 +1106,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, goto pre_mutex_err; } - ctx = i915_gem_validate_context(dev, file, ctx_id); + ctx = i915_gem_validate_context(dev, file, ring, ctx_id); if (IS_ERR_OR_NULL(ctx)) { mutex_unlock(&dev->struct_mutex); ret = PTR_ERR(ctx); -- cgit v1.1 From bfca05275a594920ad5111f5a23ec6fadc0d0780 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 18 Dec 2013 16:40:38 +0100 Subject: Revert "drm/i915: Do not allow buffers at offset 0" This reverts commit 4fe9adbc36097317864bfec3c32047da7c45a2fa. The patch completely lacks a detailed explanation of what exactly blows up and how, so is insufficiently justified as a band-aid. Otoh the justification as a safety measure against userspace botching up relocations is also fairly weak: If we want real project we need to at least make the gab big enough that the gpu doesn't scribble over more important stuff. With 4k screens that would be 32MB. Also I think this would be much better in conjunction with a (debug) switch to disable our use of the scratch page. Hence revert this. Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 9ff3509..ef274f6 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3275,11 +3275,9 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, } search_free: - /* FIXME: Some tests are failing when they receive a reloc of 0. To - * prevent this, we simply don't allow the 0th offset. */ ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node, size, alignment, - obj->cache_level, 1, gtt_max, + obj->cache_level, 0, gtt_max, DRM_MM_SEARCH_DEFAULT); if (ret) { ret = i915_gem_evict_something(dev, vm, size, alignment, -- cgit v1.1 From 2c9f8d56a1ccc9064180a95cf22531c4b37154be Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 18 Dec 2013 17:38:53 +0100 Subject: drm/i915: Reject NEEDS_GTT relocations with full ppgtt Doesn't make sense. Spotted while fixing an issue Chris noticed in the same area. Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index f5a1e0c..2774855 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -128,6 +128,12 @@ eb_lookup_vmas(struct eb_vmas *eb, struct i915_vma *vma; struct i915_address_space *bind_vm = vm; + if (exec[i].flags & EXEC_OBJECT_NEEDS_GTT && + USES_FULL_PPGTT(vm->dev)) { + ret = -EINVAL; + goto out; + } + /* If we have secure dispatch, or the userspace assures us that * they know what they're doing, use the GGTT VM. */ -- cgit v1.1 From a7c1d426ef335ccfb6bd567a3f616fa232418fa2 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 18 Dec 2013 17:46:18 +0100 Subject: drm/i915: Don't check for NEEDS_GTT when deciding the address space This means something different and is only relevant for gen6 and the reason why we cant use anything else than aliasing ppgtt there. Note that the currently implemented logic for secure batches is broken: Userspace wants the buffer both in ppgtt (for self-referencing relocations) and in ggtt (for priveledge operations). This is the same issue the command parser is also facing. Unfortunately our coverage for corner-cases of self-referencing batches is spotty. Note that this will break vsync'ed Xv and DRI2 copies. Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 2774855..a36511d 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -137,8 +137,7 @@ eb_lookup_vmas(struct eb_vmas *eb, /* If we have secure dispatch, or the userspace assures us that * they know what they're doing, use the GGTT VM. */ - if (exec[i].flags & EXEC_OBJECT_NEEDS_GTT || - ((args->flags & I915_EXEC_SECURE) && + if (((args->flags & I915_EXEC_SECURE) && (i == (args->buffer_count - 1)))) bind_vm = &dev_priv->gtt.base; -- cgit v1.1 From 72ad5c45f0c9036cbc6d23aeff4e8beb6d8b5e33 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Thu, 2 Jan 2014 19:50:27 -1000 Subject: drm/i915/ppgtt: Fix ioctl errno for "no such context" Without this fix the ioctls silently succeeded (but actually did nothing). It makes all the code which calls into this function way too confusing. v2: Fix destroy IOCTL as well v3: Clarify the other two callers of i915_gem_context_get() to never check for NULL. (Mika) Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=72903 Signed-off-by: Ben Widawsky Testcase: igt/gem_ctx_exec/basic [danvet: Fix up the commit message and actually bother to mention the testcase this fixes.] Reviewed-by: Mika Kuoppala Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_context.c | 12 +++++++++--- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 4 ++-- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index ebe0f67..44dddc00 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -526,10 +526,16 @@ void i915_gem_context_close(struct drm_device *dev, struct drm_file *file) struct i915_hw_context * i915_gem_context_get(struct drm_i915_file_private *file_priv, u32 id) { + struct i915_hw_context *ctx; + if (!HAS_HW_CONTEXTS(file_priv->dev_priv->dev)) return file_priv->private_default_ctx; - return (struct i915_hw_context *)idr_find(&file_priv->context_idr, id); + ctx = (struct i915_hw_context *)idr_find(&file_priv->context_idr, id); + if (!ctx) + return ERR_PTR(-ENOENT); + + return ctx; } static inline int @@ -776,9 +782,9 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data, return ret; ctx = i915_gem_context_get(file_priv, args->ctx_id); - if (!ctx) { + if (IS_ERR(ctx)) { mutex_unlock(&dev->struct_mutex); - return -ENOENT; + return PTR_ERR(ctx); } idr_remove(&ctx->file_priv->context_idr, ctx->id); diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index a36511d..0843e0e 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -904,7 +904,7 @@ i915_gem_validate_context(struct drm_device *dev, struct drm_file *file, return ERR_PTR(-EINVAL); ctx = i915_gem_context_get(file->driver_priv, ctx_id); - if (IS_ERR_OR_NULL(ctx)) + if (IS_ERR(ctx)) return ctx; hs = &ctx->hang_stats; @@ -1112,7 +1112,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, } ctx = i915_gem_validate_context(dev, file, ring, ctx_id); - if (IS_ERR_OR_NULL(ctx)) { + if (IS_ERR(ctx)) { mutex_unlock(&dev->struct_mutex); ret = PTR_ERR(ctx); goto pre_mutex_err; -- cgit v1.1 From 0e46ce2e7a2dc6a60b321b741d45567e6feb3502 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 8 Jan 2014 16:10:27 +0100 Subject: drm/i915: fix ppgtt dump code for DEBUG_FS=n A regression in the topic/ppgtt branch introduce in commit 87d60b63e0371529faaed0667d457e5022964010 Author: Ben Widawsky Date: Fri Dec 6 14:11:29 2013 -0800 drm/i915: Add PPGTT dumper The issue is that we're missing the definitions for the seq_file functions and hence compilation fails. v2: Just include the right header instead of splattering #ifdefs all over the place (Chris). Cc: Chris Wilson Reported-by: kbuild test robot Reported-by: Antti Koskipaa Cc: Ben Widawsky Reviewed-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_gtt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 998f9a0..6e9e621 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -22,6 +22,7 @@ * */ +#include #include #include #include "i915_drv.h" -- cgit v1.1 From c2cf2416cadc13aeccb3df10be893b15fb16ac17 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Tue, 24 Dec 2013 16:02:54 -0800 Subject: drm/i915/bdw: Return -ENONENT on default ctx destroy This was an accidental "ABI" change introduced during PPGTT: commit 0eea67eb26000657079b7fc41079097942339452 Author: Ben Widawsky Date: Fri Dec 6 14:11:19 2013 -0800 drm/i915: Create a per file_priv default context The failure test application actually tests the return type. The other option is to simply change the test. Signed-off-by: Ben Widawsky Reviewed-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_context.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 44dddc00..c5975f6 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -775,7 +775,7 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data, return -ENODEV; if (args->ctx_id == DEFAULT_CONTEXT_ID) - return -EPERM; + return -ENOENT; ret = i915_mutex_lock_interruptible(dev); if (ret) -- cgit v1.1 From ad1d219974a3d13412268525309c5892f6779ae9 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Sat, 28 Dec 2013 13:31:49 -0800 Subject: drm/i915: set ctx->initialized only after RCS The initialized flag is used to specify a context has been initialized and it's context is safe to load, ie. the 3d state is setup properly. With full PPGTT, we emit the address space loads during context switch and this currently marks a context as initialized. With full PPGTT patches, if a client first emits a batch to !RCS, then later, RCS, the code will mistake the context as initialized and try to reload an uninitialized context. 1. context 1 blit // context marked as initialized, but isn't 2. context 1 render // loads random state from step 2 It is really easy to hit this with a planned upcoming patch which makes default context reuse possible. NOTE: This should only effect full PPGTT branches, ie. current drm-intel-nightly. Thanks to Chris for helping me track this down. Cc: Chris Wilson Signed-off-by: Ben Widawsky Reviewed-by: Chris Wilson [danvet: Simplify the failure scenario in the commit message according to Chris' review a bit.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_context.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index c5975f6..112f865 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -692,10 +692,11 @@ static int do_switch(struct intel_ring_buffer *ring, i915_gem_context_unreference(from); } + to->is_initialized = true; + done: i915_gem_context_reference(to); ring->last_context = to; - to->is_initialized = true; to->last_ring = ring; return 0; -- cgit v1.1 From e91030380282240477864bf9d721b8c966acb6d9 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 7 Jan 2014 11:45:14 +0000 Subject: drm/i915: Free requests after object release when retiring requests Freeing a request triggers the destruction of the context. This needs to occur after all objects are themselves unbound from the context, and so the free request needs to occur after the object release during retire. This tidies up commit e20780439b26ba95aeb29d3e27cd8cc32bc82a4c Author: Ben Widawsky Date: Fri Dec 6 14:11:22 2013 -0800 drm/i915: Defer request freeing by simply swapping the order of operations rather than introducing further complexity - as noted during review. Signed-off-by: Chris Wilson Cc: Ben Widawsky Reviewed-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 47 ++++++++++++++++++----------------------- 1 file changed, 21 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index ef274f6..4f54a13 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2426,8 +2426,6 @@ void i915_gem_reset(struct drm_device *dev) void i915_gem_retire_requests_ring(struct intel_ring_buffer *ring) { - LIST_HEAD(deferred_request_free); - struct drm_i915_gem_request *request; uint32_t seqno; if (list_empty(&ring->request_list)) @@ -2437,7 +2435,27 @@ i915_gem_retire_requests_ring(struct intel_ring_buffer *ring) seqno = ring->get_seqno(ring, true); + /* Move any buffers on the active list that are no longer referenced + * by the ringbuffer to the flushing/inactive lists as appropriate, + * before we free the context associated with the requests. + */ + while (!list_empty(&ring->active_list)) { + struct drm_i915_gem_object *obj; + + obj = list_first_entry(&ring->active_list, + struct drm_i915_gem_object, + ring_list); + + if (!i915_seqno_passed(seqno, obj->last_read_seqno)) + break; + + i915_gem_object_move_to_inactive(obj); + } + + while (!list_empty(&ring->request_list)) { + struct drm_i915_gem_request *request; + request = list_first_entry(&ring->request_list, struct drm_i915_gem_request, list); @@ -2453,23 +2471,7 @@ i915_gem_retire_requests_ring(struct intel_ring_buffer *ring) */ ring->last_retired_head = request->tail; - list_move_tail(&request->list, &deferred_request_free); - } - - /* Move any buffers on the active list that are no longer referenced - * by the ringbuffer to the flushing/inactive lists as appropriate. - */ - while (!list_empty(&ring->active_list)) { - struct drm_i915_gem_object *obj; - - obj = list_first_entry(&ring->active_list, - struct drm_i915_gem_object, - ring_list); - - if (!i915_seqno_passed(seqno, obj->last_read_seqno)) - break; - - i915_gem_object_move_to_inactive(obj); + i915_gem_free_request(request); } if (unlikely(ring->trace_irq_seqno && @@ -2478,13 +2480,6 @@ i915_gem_retire_requests_ring(struct intel_ring_buffer *ring) ring->trace_irq_seqno = 0; } - /* Finish processing active list before freeing request */ - while (!list_empty(&deferred_request_free)) { - request = list_first_entry(&deferred_request_free, - struct drm_i915_gem_request, - list); - i915_gem_free_request(request); - } WARN_ON(i915_verify_lists(ring->dev)); } -- cgit v1.1 From f72d21eddfa900bfa2674195dcc0203e18d0cc62 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 9 Jan 2014 22:57:22 +0000 Subject: drm/i915: Place the Global GTT VM first in the list of VM This is useful for debugging as we then know that the first entry is always the global GTT, and all later entries the per-process GTT VM. Signed-off-by: Chris Wilson Reviewed-by: Ben Widawsky Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 4f54a13..03c2179 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4577,7 +4577,7 @@ void i915_init_vm(struct drm_i915_private *dev_priv, INIT_LIST_HEAD(&vm->active_list); INIT_LIST_HEAD(&vm->inactive_list); INIT_LIST_HEAD(&vm->global_link); - list_add(&vm->global_link, &dev_priv->vm_list); + list_add_tail(&vm->global_link, &dev_priv->vm_list); } void -- cgit v1.1