drm/i915/bdw: Avoid non-lite-restore preemptions

In the current Execlists feeding mechanism, full preemption is not supported yet: only lite-restores are allowed (this is: the GPU simply samples a new tail pointer for the context currently in execution). But we have identified an scenario in which a full preemption occurs: 1) We submit two contexts for execution (A & B). 2) The GPU finishes with the first one (A), switches to the second one (B) and informs us. 3) We submit B again (hoping to cause a lite restore) together with C, but in the time we spend writing to the ELSP, the GPU finishes B. 4) The GPU start executing B again (since we told it so). 5) We receive a B finished interrupt and, mistakenly, we submit C (again) and D, causing a full preemption of B. The race is avoided by keeping track of how many times a context has been submitted to the hardware and by better discriminating the received context switch interrupts: in the example, when we have submitted B twice, we won´t submit C and D as soon as we receive the notification that B is completed because we were expecting to get a LITE_RESTORE and we didn´t, so we know a second completion will be received shortly. Without this explicit checking, somehow, the batch buffer execution order gets messed with. This can be verified with the IGT test I sent together with the series. I don´t know the exact mechanism by which the pre-emption messes with the execution order but, since other people is working on the Scheduler + Preemption on Execlists, I didn´t try to fix it. In these series, only Lite Restores are supported (other kind of preemptions WARN). v2: elsp_submitted belongs in the new intel_ctx_submit_request. Several rebase changes. v3: Clarify how the race is avoided, as requested by Daniel. Signed-off-by: Oscar Mateo <oscar.mateo@intel.com> Reviewed-by: Damien Lespiau <damien.lespiau@intel.com> [danvet: Align function parameters ...] Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
author: Oscar Mateo <oscar.mateo@intel.com> 2014-07-24 17:04:40 +0100
committer: Daniel Vetter <daniel.vetter@ffwll.ch> 2014-08-14 22:43:58 +0200
commit: e1fee72c2ea2e9c0c6e6743d32a6832f21337d6c (patch)
tree: 2ef77c1d3490d28d0e518e9578916238aa4677e9
parent: e981e7b17f2b41970e7e2367d4225e0bb3310667 (diff)
download: op-kernel-dev-e1fee72c2ea2e9c0c6e6743d32a6832f21337d6c.zip
op-kernel-dev-e1fee72c2ea2e9c0c6e6743d32a6832f21337d6c.tar.gz
2 files changed, 27 insertions, 4 deletions
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 22f6a7c..0f1b6b2 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -268,6 +268,7 @@ static void execlists_context_unqueue(struct intel_engine_cs *ring)
 		} else if (req0->ctx == cursor->ctx) {
 			/* Same ctx: ignore first request, as second request
 			 * will update tail past first request's workload */
+			cursor->elsp_submitted = req0->elsp_submitted;
 			list_del(&req0->execlist_link);
 			queue_work(dev_priv->wq, &req0->work);
 			req0 = cursor;
@@ -277,9 +278,15 @@ static void execlists_context_unqueue(struct intel_engine_cs *ring)
 		}
 	}
 
+	WARN_ON(req1 && req1->elsp_submitted);
+
 	WARN_ON(execlists_submit_context(ring, req0->ctx, req0->tail,
 					 req1 ? req1->ctx : NULL,
 					 req1 ? req1->tail : 0));
+
+	req0->elsp_submitted++;
+	if (req1)
+		req1->elsp_submitted++;
 }
 
 static bool execlists_check_remove_request(struct intel_engine_cs *ring,
@@ -298,9 +305,14 @@ static bool execlists_check_remove_request(struct intel_engine_cs *ring,
 		struct drm_i915_gem_object *ctx_obj =
 				head_req->ctx->engine[ring->id].state;
 		if (intel_execlists_ctx_id(ctx_obj) == request_id) {
-			list_del(&head_req->execlist_link);
-			queue_work(dev_priv->wq, &head_req->work);
-			return true;
+			WARN(head_req->elsp_submitted == 0,
+			     "Never submitted head request\n");
+
+			if (--head_req->elsp_submitted <= 0) {
+				list_del(&head_req->execlist_link);
+				queue_work(dev_priv->wq, &head_req->work);
+				return true;
+			}
 		}
 	}
 
@@ -333,7 +345,16 @@ void intel_execlists_handle_ctx_events(struct intel_engine_cs *ring)
 		status_id = I915_READ(RING_CONTEXT_STATUS_BUF(ring) +
 				(read_pointer % 6) * 8 + 4);
 
-		if (status & GEN8_CTX_STATUS_COMPLETE) {
+		if (status & GEN8_CTX_STATUS_PREEMPTED) {
+			if (status & GEN8_CTX_STATUS_LITE_RESTORE) {
+				if (execlists_check_remove_request(ring, status_id))
+					WARN(1, "Lite Restored request removed from queue\n");
+			} else
+				WARN(1, "Preemption without Lite Restore\n");
+		}
+
+		 if ((status & GEN8_CTX_STATUS_ACTIVE_IDLE) ||
+		     (status & GEN8_CTX_STATUS_ELEMENT_SWITCH)) {
 			if (execlists_check_remove_request(ring, status_id))
 				submit_contexts++;
 		}
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index a3f135c..331c6c2 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -66,6 +66,8 @@ struct intel_ctx_submit_request {
 
 	struct list_head execlist_link;
 	struct work_struct work;
+
+	int elsp_submitted;
 };
 
 void intel_execlists_handle_ctx_events(struct intel_engine_cs *ring);
author	Oscar Mateo <oscar.mateo@intel.com>	2014-07-24 17:04:40 +0100
committer	Daniel Vetter <daniel.vetter@ffwll.ch>	2014-08-14 22:43:58 +0200
commit	e1fee72c2ea2e9c0c6e6743d32a6832f21337d6c (patch)
tree	2ef77c1d3490d28d0e518e9578916238aa4677e9
parent	e981e7b17f2b41970e7e2367d4225e0bb3310667 (diff)
download	op-kernel-dev-e1fee72c2ea2e9c0c6e6743d32a6832f21337d6c.zip op-kernel-dev-e1fee72c2ea2e9c0c6e6743d32a6832f21337d6c.tar.gz