summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/i915_gem_execbuffer.c
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2016-10-28 13:58:44 +0100
committerChris Wilson <chris@chris-wilson.co.uk>2016-10-28 20:53:50 +0100
commitd07f0e59b2c762584478920cd2d11fba2980a94a (patch)
treea9fc0cade87570bdec356493e6744ee5223bdaaf /drivers/gpu/drm/i915/i915_gem_execbuffer.c
parentf0cd518206e1a47e57bc251e1faba9d38eadcc59 (diff)
downloadop-kernel-dev-d07f0e59b2c762584478920cd2d11fba2980a94a.zip
op-kernel-dev-d07f0e59b2c762584478920cd2d11fba2980a94a.tar.gz
drm/i915: Move GEM activity tracking into a common struct reservation_object
In preparation to support many distinct timelines, we need to expand the activity tracking on the GEM object to handle more than just a request per engine. We already use the struct reservation_object on the dma-buf to handle many fence contexts, so integrating that into the GEM object itself is the preferred solution. (For example, we can now share the same reservation_object between every consumer/producer using this buffer and skip the manual import/export via dma-buf.) v2: Reimplement busy-ioctl (by walking the reservation object), postpone the ABI change for another day. Similarly use the reservation object to find the last_write request (if active and from i915) for choosing display CS flips. Caveats: * busy-ioctl: busy-ioctl only reports on the native fences, it will not warn of stalls (in set-domain-ioctl, pread/pwrite etc) if the object is being rendered to by external fences. It also will not report the same busy state as wait-ioctl (or polling on the dma-buf) in the same circumstances. On the plus side, it does retain reporting of which *i915* engines are engaged with this object. * non-blocking atomic modesets take a step backwards as the wait for render completion blocks the ioctl. This is fixed in a subsequent patch to use a fence instead for awaiting on the rendering, see "drm/i915: Restore nonblocking awaits for modesetting" * dynamic array manipulation for shared-fences in reservation is slower than the previous lockless static assignment (e.g. gem_exec_lut_handle runtime on ivb goes from 42s to 66s), mainly due to atomic operations (maintaining the fence refcounts). * loss of object-level retirement callbacks, emulated by VMA retirement tracking. * minor loss of object-level last activity information from debugfs, could be replaced with per-vma information if desired Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/20161028125858.23563-21-chris@chris-wilson.co.uk
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem_execbuffer.c')
-rw-r--r--drivers/gpu/drm/i915/i915_gem_execbuffer.c55
1 files changed, 11 insertions, 44 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index d95c4e0..c35e847 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -34,7 +34,6 @@
#include <drm/i915_drm.h>
#include "i915_drv.h"
-#include "i915_gem_dmabuf.h"
#include "i915_trace.h"
#include "intel_drv.h"
#include "intel_frontbuffer.h"
@@ -1101,45 +1100,20 @@ err:
return ret;
}
-static unsigned int eb_other_engines(struct drm_i915_gem_request *req)
-{
- unsigned int mask;
-
- mask = ~intel_engine_flag(req->engine) & I915_BO_ACTIVE_MASK;
- mask <<= I915_BO_ACTIVE_SHIFT;
-
- return mask;
-}
-
static int
i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
struct list_head *vmas)
{
- const unsigned int other_rings = eb_other_engines(req);
struct i915_vma *vma;
int ret;
list_for_each_entry(vma, vmas, exec_list) {
struct drm_i915_gem_object *obj = vma->obj;
- struct reservation_object *resv;
- if (obj->flags & other_rings) {
- ret = i915_gem_request_await_object
- (req, obj, obj->base.pending_write_domain);
- if (ret)
- return ret;
- }
-
- resv = i915_gem_object_get_dmabuf_resv(obj);
- if (resv) {
- ret = i915_sw_fence_await_reservation
- (&req->submit, resv, &i915_fence_ops,
- obj->base.pending_write_domain,
- I915_FENCE_TIMEOUT,
- GFP_KERNEL | __GFP_NOWARN);
- if (ret < 0)
- return ret;
- }
+ ret = i915_gem_request_await_object
+ (req, obj, obj->base.pending_write_domain);
+ if (ret)
+ return ret;
if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
i915_gem_clflush_object(obj, false);
@@ -1281,8 +1255,6 @@ void i915_vma_move_to_active(struct i915_vma *vma,
GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
- obj->mm.dirty = true; /* be paranoid */
-
/* Add a reference if we're newly entering the active list.
* The order in which we add operations to the retirement queue is
* vital here: mark_active adds to the start of the callback list,
@@ -1290,11 +1262,14 @@ void i915_vma_move_to_active(struct i915_vma *vma,
* add the active reference first and queue for it to be dropped
* *last*.
*/
- i915_gem_object_set_active(obj, idx);
- i915_gem_active_set(&obj->last_read[idx], req);
+ if (!i915_vma_is_active(vma))
+ obj->active_count++;
+ i915_vma_set_active(vma, idx);
+ i915_gem_active_set(&vma->last_read[idx], req);
+ list_move_tail(&vma->vm_link, &vma->vm->active_list);
if (flags & EXEC_OBJECT_WRITE) {
- i915_gem_active_set(&obj->last_write, req);
+ i915_gem_active_set(&vma->last_write, req);
intel_fb_obj_invalidate(obj, ORIGIN_CS);
@@ -1304,21 +1279,13 @@ void i915_vma_move_to_active(struct i915_vma *vma,
if (flags & EXEC_OBJECT_NEEDS_FENCE)
i915_gem_active_set(&vma->last_fence, req);
-
- i915_vma_set_active(vma, idx);
- i915_gem_active_set(&vma->last_read[idx], req);
- list_move_tail(&vma->vm_link, &vma->vm->active_list);
}
static void eb_export_fence(struct drm_i915_gem_object *obj,
struct drm_i915_gem_request *req,
unsigned int flags)
{
- struct reservation_object *resv;
-
- resv = i915_gem_object_get_dmabuf_resv(obj);
- if (!resv)
- return;
+ struct reservation_object *resv = obj->resv;
/* Ignore errors from failing to allocate the new fence, we can't
* handle an error right now. Worst case should be missed
OpenPOWER on IntegriCloud