diff options
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem.c')
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem.c | 111 |
1 files changed, 84 insertions, 27 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 32e6aad..f56af0a 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1146,23 +1146,74 @@ static bool missed_irq(struct drm_i915_private *dev_priv, return test_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings); } -static int __i915_spin_request(struct drm_i915_gem_request *req) +static unsigned long local_clock_us(unsigned *cpu) +{ + unsigned long t; + + /* Cheaply and approximately convert from nanoseconds to microseconds. + * The result and subsequent calculations are also defined in the same + * approximate microseconds units. The principal source of timing + * error here is from the simple truncation. + * + * Note that local_clock() is only defined wrt to the current CPU; + * the comparisons are no longer valid if we switch CPUs. Instead of + * blocking preemption for the entire busywait, we can detect the CPU + * switch and use that as indicator of system load and a reason to + * stop busywaiting, see busywait_stop(). + */ + *cpu = get_cpu(); + t = local_clock() >> 10; + put_cpu(); + + return t; +} + +static bool busywait_stop(unsigned long timeout, unsigned cpu) +{ + unsigned this_cpu; + + if (time_after(local_clock_us(&this_cpu), timeout)) + return true; + + return this_cpu != cpu; +} + +static int __i915_spin_request(struct drm_i915_gem_request *req, int state) { unsigned long timeout; + unsigned cpu; + + /* When waiting for high frequency requests, e.g. during synchronous + * rendering split between the CPU and GPU, the finite amount of time + * required to set up the irq and wait upon it limits the response + * rate. By busywaiting on the request completion for a short while we + * can service the high frequency waits as quick as possible. However, + * if it is a slow request, we want to sleep as quickly as possible. + * The tradeoff between waiting and sleeping is roughly the time it + * takes to sleep on a request, on the order of a microsecond. + */ - if (i915_gem_request_get_ring(req)->irq_refcount) + if (req->ring->irq_refcount) return -EBUSY; - timeout = jiffies + 1; + /* Only spin if we know the GPU is processing this request */ + if (!i915_gem_request_started(req, true)) + return -EAGAIN; + + timeout = local_clock_us(&cpu) + 5; while (!need_resched()) { if (i915_gem_request_completed(req, true)) return 0; - if (time_after_eq(jiffies, timeout)) + if (signal_pending_state(state, current)) + break; + + if (busywait_stop(timeout, cpu)) break; cpu_relax_lowlatency(); } + if (i915_gem_request_completed(req, false)) return 0; @@ -1197,6 +1248,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req, struct drm_i915_private *dev_priv = dev->dev_private; const bool irq_test_in_progress = ACCESS_ONCE(dev_priv->gpu_error.test_irq_rings) & intel_ring_flag(ring); + int state = interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; DEFINE_WAIT(wait); unsigned long timeout_expire; s64 before, now; @@ -1229,7 +1281,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req, before = ktime_get_raw_ns(); /* Optimistic spin for the next jiffie before touching IRQs */ - ret = __i915_spin_request(req); + ret = __i915_spin_request(req, state); if (ret == 0) goto out; @@ -1241,8 +1293,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req, for (;;) { struct timer_list timer; - prepare_to_wait(&ring->irq_queue, &wait, - interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE); + prepare_to_wait(&ring->irq_queue, &wait, state); /* We need to check whether any gpu reset happened in between * the caller grabbing the seqno and now ... */ @@ -1260,7 +1311,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req, break; } - if (interruptible && signal_pending(current)) { + if (signal_pending_state(state, current)) { ret = -ERESTARTSYS; break; } @@ -2554,6 +2605,7 @@ void __i915_add_request(struct drm_i915_gem_request *request, request->batch_obj = obj; request->emitted_jiffies = jiffies; + request->previous_seqno = ring->last_submitted_seqno; ring->last_submitted_seqno = request->seqno; list_add_tail(&request->list, &ring->request_list); @@ -4080,6 +4132,29 @@ i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags) return false; } +void __i915_vma_set_map_and_fenceable(struct i915_vma *vma) +{ + struct drm_i915_gem_object *obj = vma->obj; + bool mappable, fenceable; + u32 fence_size, fence_alignment; + + fence_size = i915_gem_get_gtt_size(obj->base.dev, + obj->base.size, + obj->tiling_mode); + fence_alignment = i915_gem_get_gtt_alignment(obj->base.dev, + obj->base.size, + obj->tiling_mode, + true); + + fenceable = (vma->node.size == fence_size && + (vma->node.start & (fence_alignment - 1)) == 0); + + mappable = (vma->node.start + fence_size <= + to_i915(obj->base.dev)->gtt.mappable_end); + + obj->map_and_fenceable = mappable && fenceable; +} + static int i915_gem_object_do_pin(struct drm_i915_gem_object *obj, struct i915_address_space *vm, @@ -4147,25 +4222,7 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj, if (ggtt_view && ggtt_view->type == I915_GGTT_VIEW_NORMAL && (bound ^ vma->bound) & GLOBAL_BIND) { - bool mappable, fenceable; - u32 fence_size, fence_alignment; - - fence_size = i915_gem_get_gtt_size(obj->base.dev, - obj->base.size, - obj->tiling_mode); - fence_alignment = i915_gem_get_gtt_alignment(obj->base.dev, - obj->base.size, - obj->tiling_mode, - true); - - fenceable = (vma->node.size == fence_size && - (vma->node.start & (fence_alignment - 1)) == 0); - - mappable = (vma->node.start + fence_size <= - dev_priv->gtt.mappable_end); - - obj->map_and_fenceable = mappable && fenceable; - + __i915_vma_set_map_and_fenceable(vma); WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable); } |