From d07f0e59b2c762584478920cd2d11fba2980a94a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 28 Oct 2016 13:58:44 +0100 Subject: drm/i915: Move GEM activity tracking into a common struct reservation_object In preparation to support many distinct timelines, we need to expand the activity tracking on the GEM object to handle more than just a request per engine. We already use the struct reservation_object on the dma-buf to handle many fence contexts, so integrating that into the GEM object itself is the preferred solution. (For example, we can now share the same reservation_object between every consumer/producer using this buffer and skip the manual import/export via dma-buf.) v2: Reimplement busy-ioctl (by walking the reservation object), postpone the ABI change for another day. Similarly use the reservation object to find the last_write request (if active and from i915) for choosing display CS flips. Caveats: * busy-ioctl: busy-ioctl only reports on the native fences, it will not warn of stalls (in set-domain-ioctl, pread/pwrite etc) if the object is being rendered to by external fences. It also will not report the same busy state as wait-ioctl (or polling on the dma-buf) in the same circumstances. On the plus side, it does retain reporting of which *i915* engines are engaged with this object. * non-blocking atomic modesets take a step backwards as the wait for render completion blocks the ioctl. This is fixed in a subsequent patch to use a fence instead for awaiting on the rendering, see "drm/i915: Restore nonblocking awaits for modesetting" * dynamic array manipulation for shared-fences in reservation is slower than the previous lockless static assignment (e.g. gem_exec_lut_handle runtime on ivb goes from 42s to 66s), mainly due to atomic operations (maintaining the fence refcounts). * loss of object-level retirement callbacks, emulated by VMA retirement tracking. * minor loss of object-level last activity information from debugfs, could be replaced with per-vma information if desired Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20161028125858.23563-21-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_dmabuf.c | 53 ++-------------------------------- 1 file changed, 3 insertions(+), 50 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem_dmabuf.c') diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c index 4d45f20..5e38299 100644 --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c @@ -211,60 +211,17 @@ static const struct dma_buf_ops i915_dmabuf_ops = { .end_cpu_access = i915_gem_end_cpu_access, }; -static void export_fences(struct drm_i915_gem_object *obj, - struct dma_buf *dma_buf) -{ - struct reservation_object *resv = dma_buf->resv; - struct drm_i915_gem_request *req; - unsigned long active; - int idx; - - active = __I915_BO_ACTIVE(obj); - if (!active) - return; - - /* Serialise with execbuf to prevent concurrent fence-loops */ - mutex_lock(&obj->base.dev->struct_mutex); - - /* Mark the object for future fences before racily adding old fences */ - obj->base.dma_buf = dma_buf; - - ww_mutex_lock(&resv->lock, NULL); - - for_each_active(active, idx) { - req = i915_gem_active_get(&obj->last_read[idx], - &obj->base.dev->struct_mutex); - if (!req) - continue; - - if (reservation_object_reserve_shared(resv) == 0) - reservation_object_add_shared_fence(resv, &req->fence); - - i915_gem_request_put(req); - } - - req = i915_gem_active_get(&obj->last_write, - &obj->base.dev->struct_mutex); - if (req) { - reservation_object_add_excl_fence(resv, &req->fence); - i915_gem_request_put(req); - } - - ww_mutex_unlock(&resv->lock); - mutex_unlock(&obj->base.dev->struct_mutex); -} - struct dma_buf *i915_gem_prime_export(struct drm_device *dev, struct drm_gem_object *gem_obj, int flags) { struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); DEFINE_DMA_BUF_EXPORT_INFO(exp_info); - struct dma_buf *dma_buf; exp_info.ops = &i915_dmabuf_ops; exp_info.size = gem_obj->size; exp_info.flags = flags; exp_info.priv = gem_obj; + exp_info.resv = obj->resv; if (obj->ops->dmabuf_export) { int ret = obj->ops->dmabuf_export(obj); @@ -272,12 +229,7 @@ struct dma_buf *i915_gem_prime_export(struct drm_device *dev, return ERR_PTR(ret); } - dma_buf = drm_gem_dmabuf_export(dev, &exp_info); - if (IS_ERR(dma_buf)) - return dma_buf; - - export_fences(obj, dma_buf); - return dma_buf; + return drm_gem_dmabuf_export(dev, &exp_info); } static struct sg_table * @@ -335,6 +287,7 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev, drm_gem_private_object_init(dev, &obj->base, dma_buf->size); i915_gem_object_init(obj, &i915_gem_object_dmabuf_ops); obj->base.import_attach = attach; + obj->resv = dma_buf->resv; /* We use GTT as shorthand for a coherent domain, one that is * neither in the GPU cache nor in the CPU cache, where all -- cgit v1.1