diff options
author | Chris Wilson <chris@chris-wilson.co.uk> | 2017-05-03 10:39:21 +0100 |
---|---|---|
committer | Chris Wilson <chris@chris-wilson.co.uk> | 2017-05-03 11:08:48 +0100 |
commit | 4797948071f607c5b43753cb8f1b7ddcf22e146d (patch) | |
tree | 65ee87bef56977d4f056947aaa664fe7b47c11bd /drivers/gpu/drm/i915/i915_gem_timeline.h | |
parent | ceae14bd4cc4333b9a3b0b6b9457bb16e7ca410a (diff) | |
download | op-kernel-dev-4797948071f607c5b43753cb8f1b7ddcf22e146d.zip op-kernel-dev-4797948071f607c5b43753cb8f1b7ddcf22e146d.tar.gz |
drm/i915: Squash repeated awaits on the same fence
Track the latest fence waited upon on each context, and only add a new
asynchronous wait if the new fence is more recent than the recorded
fence for that context. This requires us to filter out unordered
timelines, which are noted by DMA_FENCE_NO_CONTEXT. However, in the
absence of a universal identifier, we have to use our own
i915->mm.unordered_timeline token.
v2: Throw around the debug crutches
v3: Inline the likely case of the pre-allocation cache being full.
v4: Drop the pre-allocation support, we can lose the most recent fence
in case of allocation failure -- it just means we may emit more awaits
than strictly necessary but will not break.
v5: Trim allocation size for leaf nodes, they only need an array of u32
not pointers.
v6: Create mock_timeline to tidy selftest writing
v7: s/intel_timeline_sync_get/intel_timeline_sync_is_later/ (Tvrtko)
v8: Prune the stale sync points when we idle.
v9: Include a small benchmark in the kselftests
v10: Separate the idr implementation into its own compartment. (Tvrkto)
v11: Refactor igt_sync kselftests to avoid deep nesting (Tvrkto)
v12: __sync_leaf_idx() to assert that p->height is 0 when checking leaves
v13: kselftests to investigate struct i915_syncmap itself (Tvrtko)
v14: Foray into ascii art graphs
v15: Take into account that the random lookup/insert does 2 prng calls,
not 1, when benchmarking, and use for_each_set_bit() (Tvrtko)
v16: Improved ascii art
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170503093924.5320-4-chris@chris-wilson.co.uk
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem_timeline.h')
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem_timeline.h | 38 |
1 files changed, 38 insertions, 0 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.h b/drivers/gpu/drm/i915/i915_gem_timeline.h index 6c53e14..ff65c64 100644 --- a/drivers/gpu/drm/i915/i915_gem_timeline.h +++ b/drivers/gpu/drm/i915/i915_gem_timeline.h @@ -27,7 +27,9 @@ #include <linux/list.h> +#include "i915_utils.h" #include "i915_gem_request.h" +#include "i915_syncmap.h" struct i915_gem_timeline; @@ -55,6 +57,17 @@ struct intel_timeline { * struct_mutex. */ struct i915_gem_active last_request; + + /** + * We track the most recent seqno that we wait on in every context so + * that we only have to emit a new await and dependency on a more + * recent sync point. As the contexts may be executed out-of-order, we + * have to track each individually and can not rely on an absolute + * global_seqno. When we know that all tracked fences are completed + * (i.e. when the driver is idle), we know that the syncmap is + * redundant and we can discard it without loss of generality. + */ + struct i915_syncmap *sync; u32 sync_seqno[I915_NUM_ENGINES]; struct i915_gem_timeline *common; @@ -73,6 +86,31 @@ int i915_gem_timeline_init(struct drm_i915_private *i915, struct i915_gem_timeline *tl, const char *name); int i915_gem_timeline_init__global(struct drm_i915_private *i915); +void i915_gem_timelines_mark_idle(struct drm_i915_private *i915); void i915_gem_timeline_fini(struct i915_gem_timeline *tl); +static inline int __intel_timeline_sync_set(struct intel_timeline *tl, + u64 context, u32 seqno) +{ + return i915_syncmap_set(&tl->sync, context, seqno); +} + +static inline int intel_timeline_sync_set(struct intel_timeline *tl, + const struct dma_fence *fence) +{ + return __intel_timeline_sync_set(tl, fence->context, fence->seqno); +} + +static inline bool __intel_timeline_sync_is_later(struct intel_timeline *tl, + u64 context, u32 seqno) +{ + return i915_syncmap_is_later(&tl->sync, context, seqno); +} + +static inline bool intel_timeline_sync_is_later(struct intel_timeline *tl, + const struct dma_fence *fence) +{ + return __intel_timeline_sync_is_later(tl, fence->context, fence->seqno); +} + #endif |