From c6a32fcbbbc25d7d4e7178aa984e5ef186ee9589 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Fri, 20 Jan 2012 10:43:44 +0100
Subject: drm/i915: clarify gen2 pageflip cmd

I've reviewed gen2 pageflip code to hunt down multiple prepare pageflip
issues. The only thing I've found is a slight but functionally
meaningless confusion about the length of the mi cmd.

Fix it up and add a comment about what this dword should be (according
to docs at least).

Reviewed-by: Eric Anholt <eric@anholt.net>
Signed-Off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/intel_display.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index ebe71ed..4247a7b 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -7288,7 +7288,7 @@ static int intel_gen2_queue_flip(struct drm_device *dev,
 		 MI_DISPLAY_FLIP_PLANE(intel_crtc->plane));
 	OUT_RING(fb->pitches[0]);
 	OUT_RING(obj->gtt_offset + offset);
-	OUT_RING(MI_NOOP);
+	OUT_RING(0); /* aux display base address, unused */
 	ADVANCE_LP_RING();
 out:
 	return ret;
-- 
cgit v1.1


From 47842649ef43ba4b81ac2486df8caa0934e01195 Mon Sep 17 00:00:00 2001
From: Jesse Barnes <jbarnes@virtuousgeek.org>
Date: Mon, 16 Jan 2012 11:57:54 -0800
Subject: drm/i915: properly mask and or watermark values for sprites

Now that we're using the sprite WM fields, we need to take care not to
clobber them in the main update_wm functions.  While we're at it, make
sure we mask out the old sprite wm value before or'ing in the new one
when the sprite wm is updated.

Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Reviewed-by: Keith Packard <keithp@keithp.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/intel_display.c | 24 +++++++++++++++++-------
 1 file changed, 17 insertions(+), 7 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 4247a7b..ec31350 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -4548,6 +4548,7 @@ void sandybridge_update_wm(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	int latency = SNB_READ_WM0_LATENCY() * 100;	/* In unit 0.1us */
+	u32 val;
 	int fbc_wm, plane_wm, cursor_wm;
 	unsigned int enabled;
 
@@ -4556,8 +4557,10 @@ void sandybridge_update_wm(struct drm_device *dev)
 			    &sandybridge_display_wm_info, latency,
 			    &sandybridge_cursor_wm_info, latency,
 			    &plane_wm, &cursor_wm)) {
-		I915_WRITE(WM0_PIPEA_ILK,
-			   (plane_wm << WM0_PIPE_PLANE_SHIFT) | cursor_wm);
+		val = I915_READ(WM0_PIPEA_ILK);
+		val &= ~(WM0_PIPE_PLANE_MASK | WM0_PIPE_CURSOR_MASK);
+		I915_WRITE(WM0_PIPEA_ILK, val |
+			   ((plane_wm << WM0_PIPE_PLANE_SHIFT) | cursor_wm));
 		DRM_DEBUG_KMS("FIFO watermarks For pipe A -"
 			      " plane %d, " "cursor: %d\n",
 			      plane_wm, cursor_wm);
@@ -4568,8 +4571,10 @@ void sandybridge_update_wm(struct drm_device *dev)
 			    &sandybridge_display_wm_info, latency,
 			    &sandybridge_cursor_wm_info, latency,
 			    &plane_wm, &cursor_wm)) {
-		I915_WRITE(WM0_PIPEB_ILK,
-			   (plane_wm << WM0_PIPE_PLANE_SHIFT) | cursor_wm);
+		val = I915_READ(WM0_PIPEB_ILK);
+		val &= ~(WM0_PIPE_PLANE_MASK | WM0_PIPE_CURSOR_MASK);
+		I915_WRITE(WM0_PIPEB_ILK, val |
+			   ((plane_wm << WM0_PIPE_PLANE_SHIFT) | cursor_wm));
 		DRM_DEBUG_KMS("FIFO watermarks For pipe B -"
 			      " plane %d, cursor: %d\n",
 			      plane_wm, cursor_wm);
@@ -4582,8 +4587,10 @@ void sandybridge_update_wm(struct drm_device *dev)
 			    &sandybridge_display_wm_info, latency,
 			    &sandybridge_cursor_wm_info, latency,
 			    &plane_wm, &cursor_wm)) {
-		I915_WRITE(WM0_PIPEC_IVB,
-			   (plane_wm << WM0_PIPE_PLANE_SHIFT) | cursor_wm);
+		val = I915_READ(WM0_PIPEC_IVB);
+		val &= ~(WM0_PIPE_PLANE_MASK | WM0_PIPE_CURSOR_MASK);
+		I915_WRITE(WM0_PIPEC_IVB, val |
+			   ((plane_wm << WM0_PIPE_PLANE_SHIFT) | cursor_wm));
 		DRM_DEBUG_KMS("FIFO watermarks For pipe C -"
 			      " plane %d, cursor: %d\n",
 			      plane_wm, cursor_wm);
@@ -4727,6 +4734,7 @@ static void sandybridge_update_sprite_wm(struct drm_device *dev, int pipe,
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	int latency = SNB_READ_WM0_LATENCY() * 100;	/* In unit 0.1us */
+	u32 val;
 	int sprite_wm, reg;
 	int ret;
 
@@ -4753,7 +4761,9 @@ static void sandybridge_update_sprite_wm(struct drm_device *dev, int pipe,
 		return;
 	}
 
-	I915_WRITE(reg, I915_READ(reg) | (sprite_wm << WM0_PIPE_SPRITE_SHIFT));
+	val = I915_READ(reg);
+	val &= ~WM0_PIPE_SPRITE_MASK;
+	I915_WRITE(reg, val | (sprite_wm << WM0_PIPE_SPRITE_SHIFT));
 	DRM_DEBUG_KMS("sprite watermarks For pipe %d - %d\n", pipe, sprite_wm);
 
 
-- 
cgit v1.1


From aca258482ed7c600b5aeed03aa8727d94d8dd07e Mon Sep 17 00:00:00 2001
From: Eugeni Dodonov <eugeni.dodonov@intel.com>
Date: Tue, 17 Jan 2012 15:25:45 -0200
Subject: drm/i915: print out which pixel format we do not support

Otherwise, we are left with pretty bogus message saying that the pixel
format is not supported while leaving the details to the telepatic powers.

v2: use DRM_DEBUG_KMS instead of DRM_ERROR

Signed-off-by: Eugeni Dodonov <eugeni.dodonov@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/intel_display.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index ec31350..cfd3a87 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -7890,7 +7890,8 @@ int intel_framebuffer_init(struct drm_device *dev,
 	case DRM_FORMAT_VYUY:
 		break;
 	default:
-		DRM_ERROR("unsupported pixel format\n");
+		DRM_DEBUG_KMS("unsupported pixel format %u\n",
+				mode_cmd->pixel_format);
 		return -EINVAL;
 	}
 
-- 
cgit v1.1


From 8e636784b6f76653d358d521af9c2a8c246df38b Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Sun, 22 Jan 2012 01:36:48 +0100
Subject: drm/i915: fixup assert_pipe to take the pipe A quirk into account

This was completely spamming dmesg on my i855gm. This issue was just
shortly introduced with:

commit 931872fceabacf2d4f8b6fbd51611c167e83164c
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Mon Jan 16 23:01:13 2012 +0000

    drm/i915: Check that plane/pipe is disabled before removing the fb

Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-Off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/intel_display.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index cfd3a87..ebb3452 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -936,6 +936,10 @@ void assert_pipe(struct drm_i915_private *dev_priv,
 	u32 val;
 	bool cur_state;
 
+	/* if we need the pipe A quirk it must be always on */
+	if (pipe == PIPE_A && dev_priv->quirks & QUIRK_PIPEA_FORCE)
+		state = true;
+
 	reg = PIPECONF(pipe);
 	val = I915_READ(reg);
 	cur_state = !!(val & PIPECONF_ENABLE);
-- 
cgit v1.1


From 8d79c3490aecfe6e51f0ba6f9780746fb1434954 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Thu, 19 Jan 2012 10:50:05 -0800
Subject: drm/i915: Remove the MI_FLUSH_ENABLE setting.

We have always been using the wrong bit -- it's bit 12.  However, the
bit also doesn't do anything -- hardware has always accepted the
MI_FLUSH command even when it was specced not to.

Given that there is only one MI_FLUSH emitted in all of the driver
stack on gen6+ (in i965_video.c of the 2d driver, and it should be
using other code to do its flush instead), just remove the MI_FLUSH
enable instead of trying to fix it.

Signed-off-by: Eric Anholt <eric@anholt.net>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/intel_ringbuffer.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 77e729d..b3da17a 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -399,8 +399,6 @@ static int init_render_ring(struct intel_ring_buffer *ring)
 
 	if (INTEL_INFO(dev)->gen > 3) {
 		int mode = VS_TIMER_DISPATCH << 16 | VS_TIMER_DISPATCH;
-		if (IS_GEN6(dev) || IS_GEN7(dev))
-			mode |= MI_FLUSH_ENABLE << 16 | MI_FLUSH_ENABLE;
 		I915_WRITE(MI_MODE, mode);
 		if (IS_GEN7(dev))
 			I915_WRITE(GFX_MODE_GEN7,
-- 
cgit v1.1


From fc74d8e01165b567922921d110b6d16320a61fa6 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Thu, 19 Jan 2012 10:50:06 -0800
Subject: drm/i915: Correct the bit number for the MI_FLUSH_ENABLE.

Older specs claimed this was bit 11, but newer specs and the actual
simulator code say it was bit 12.  Regardless, we don't use MI_FLUSH,
or try to enable it any more.

Signed-off-by: Eric Anholt <eric@anholt.net>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
[danvet: Anyone trying to use this bit, please read all the relevant
discussions, it's epic.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_reg.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index c3afb78..bbad788 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -391,7 +391,7 @@
 
 #define MI_MODE		0x0209c
 # define VS_TIMER_DISPATCH				(1 << 6)
-# define MI_FLUSH_ENABLE				(1 << 11)
+# define MI_FLUSH_ENABLE				(1 << 12)
 
 #define GFX_MODE	0x02520
 #define GFX_MODE_GEN7	0x0229c
-- 
cgit v1.1


From b93f9cf14e714c20ce9a544ed1a6070ee7604588 Mon Sep 17 00:00:00 2001
From: Ben Widawsky <ben@bwidawsk.net>
Date: Wed, 25 Jan 2012 15:39:34 -0800
Subject: drm/i915: argument to control retiring behavior

Sometimes it may be the case when we idle the gpu or wait on something
we don't actually want to process the retiring list. This patch allows
callers to choose the behavior.

Reviewed-by: Keith Packard <keithp@keithp.com>
Reviewed-by: Eugeni Dodonov <eugeni.dodonov@intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_dma.c            |  2 +-
 drivers/gpu/drm/i915/i915_drv.h            |  5 +++--
 drivers/gpu/drm/i915/i915_gem.c            | 28 ++++++++++++++++------------
 drivers/gpu/drm/i915/i915_gem_evict.c      |  2 +-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  2 +-
 drivers/gpu/drm/i915/i915_gem_gtt.c        |  2 +-
 drivers/gpu/drm/i915/intel_overlay.c       |  6 ++++--
 7 files changed, 27 insertions(+), 20 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 8122738..3f27173 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -2131,7 +2131,7 @@ int i915_driver_unload(struct drm_device *dev)
 		unregister_shrinker(&dev_priv->mm.inactive_shrinker);
 
 	mutex_lock(&dev->struct_mutex);
-	ret = i915_gpu_idle(dev);
+	ret = i915_gpu_idle(dev, true);
 	if (ret)
 		DRM_ERROR("failed to idle hardware: %d\n", ret);
 	mutex_unlock(&dev->struct_mutex);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index f02a5f5..1d10b8c 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1179,13 +1179,14 @@ void i915_gem_do_init(struct drm_device *dev,
 		      unsigned long start,
 		      unsigned long mappable_end,
 		      unsigned long end);
-int __must_check i915_gpu_idle(struct drm_device *dev);
+int __must_check i915_gpu_idle(struct drm_device *dev, bool do_retire);
 int __must_check i915_gem_idle(struct drm_device *dev);
 int __must_check i915_add_request(struct intel_ring_buffer *ring,
 				  struct drm_file *file,
 				  struct drm_i915_gem_request *request);
 int __must_check i915_wait_request(struct intel_ring_buffer *ring,
-				   uint32_t seqno);
+				   uint32_t seqno,
+				   bool do_retire);
 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
 int __must_check
 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index eb98a7f..ff3066c 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1943,7 +1943,8 @@ i915_gem_retire_work_handler(struct work_struct *work)
  */
 int
 i915_wait_request(struct intel_ring_buffer *ring,
-		  uint32_t seqno)
+		  uint32_t seqno,
+		  bool do_retire)
 {
 	drm_i915_private_t *dev_priv = ring->dev->dev_private;
 	u32 ier;
@@ -2027,7 +2028,7 @@ i915_wait_request(struct intel_ring_buffer *ring,
 	 * buffer to have made it to the inactive list, and we would need
 	 * a separate wait queue to handle that.
 	 */
-	if (ret == 0)
+	if (ret == 0 && do_retire)
 		i915_gem_retire_requests_ring(ring);
 
 	return ret;
@@ -2051,7 +2052,8 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj)
 	 * it.
 	 */
 	if (obj->active) {
-		ret = i915_wait_request(obj->ring, obj->last_rendering_seqno);
+		ret = i915_wait_request(obj->ring, obj->last_rendering_seqno,
+					true);
 		if (ret)
 			return ret;
 	}
@@ -2172,7 +2174,7 @@ i915_gem_flush_ring(struct intel_ring_buffer *ring,
 	return 0;
 }
 
-static int i915_ring_idle(struct intel_ring_buffer *ring)
+static int i915_ring_idle(struct intel_ring_buffer *ring, bool do_retire)
 {
 	int ret;
 
@@ -2186,18 +2188,18 @@ static int i915_ring_idle(struct intel_ring_buffer *ring)
 			return ret;
 	}
 
-	return i915_wait_request(ring, i915_gem_next_request_seqno(ring));
+	return i915_wait_request(ring, i915_gem_next_request_seqno(ring),
+				 do_retire);
 }
 
-int
-i915_gpu_idle(struct drm_device *dev)
+int i915_gpu_idle(struct drm_device *dev, bool do_retire)
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	int ret, i;
 
 	/* Flush everything onto the inactive list. */
 	for (i = 0; i < I915_NUM_RINGS; i++) {
-		ret = i915_ring_idle(&dev_priv->ring[i]);
+		ret = i915_ring_idle(&dev_priv->ring[i], do_retire);
 		if (ret)
 			return ret;
 	}
@@ -2400,7 +2402,8 @@ i915_gem_object_flush_fence(struct drm_i915_gem_object *obj,
 		if (!ring_passed_seqno(obj->last_fenced_ring,
 				       obj->last_fenced_seqno)) {
 			ret = i915_wait_request(obj->last_fenced_ring,
-						obj->last_fenced_seqno);
+						obj->last_fenced_seqno,
+						true);
 			if (ret)
 				return ret;
 		}
@@ -2541,7 +2544,8 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj,
 				if (!ring_passed_seqno(obj->last_fenced_ring,
 						       reg->setup_seqno)) {
 					ret = i915_wait_request(obj->last_fenced_ring,
-								reg->setup_seqno);
+								reg->setup_seqno,
+								true);
 					if (ret)
 						return ret;
 				}
@@ -3710,7 +3714,7 @@ i915_gem_idle(struct drm_device *dev)
 		return 0;
 	}
 
-	ret = i915_gpu_idle(dev);
+	ret = i915_gpu_idle(dev, true);
 	if (ret) {
 		mutex_unlock(&dev->struct_mutex);
 		return ret;
@@ -4201,7 +4205,7 @@ rescan:
 		 * This has a dramatic impact to reduce the number of
 		 * OOM-killer events whilst running the GPU aggressively.
 		 */
-		if (i915_gpu_idle(dev) == 0)
+		if (i915_gpu_idle(dev, true) == 0)
 			goto rescan;
 	}
 	mutex_unlock(&dev->struct_mutex);
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index ead5d00..097119c 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -195,7 +195,7 @@ i915_gem_evict_everything(struct drm_device *dev, bool purgeable_only)
 	trace_i915_gem_evict_everything(dev, purgeable_only);
 
 	/* Flush everything (on to the inactive lists) and evict */
-	ret = i915_gpu_idle(dev);
+	ret = i915_gpu_idle(dev, true);
 	if (ret)
 		return ret;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index c01cb20..c649e0f 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1186,7 +1186,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 			 * so every billion or so execbuffers, we need to stall
 			 * the GPU in order to reset the counters.
 			 */
-			ret = i915_gpu_idle(dev);
+			ret = i915_gpu_idle(dev, true);
 			if (ret)
 				goto err;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 6042c5e..e050b90 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -55,7 +55,7 @@ static bool do_idling(struct drm_i915_private *dev_priv)
 
 	if (unlikely(dev_priv->mm.gtt->do_idle_maps)) {
 		dev_priv->mm.interruptible = false;
-		if (i915_gpu_idle(dev_priv->dev)) {
+		if (i915_gpu_idle(dev_priv->dev, true)) {
 			DRM_ERROR("Couldn't idle GPU\n");
 			/* Wait a bit, in hopes it avoids the hang */
 			udelay(10);
diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
index cdf17d4..23a543c 100644
--- a/drivers/gpu/drm/i915/intel_overlay.c
+++ b/drivers/gpu/drm/i915/intel_overlay.c
@@ -227,7 +227,8 @@ static int intel_overlay_do_wait_request(struct intel_overlay *overlay,
 	}
 	overlay->last_flip_req = request->seqno;
 	overlay->flip_tail = tail;
-	ret = i915_wait_request(LP_RING(dev_priv), overlay->last_flip_req);
+	ret = i915_wait_request(LP_RING(dev_priv), overlay->last_flip_req,
+				true);
 	if (ret)
 		return ret;
 
@@ -448,7 +449,8 @@ static int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay)
 	if (overlay->last_flip_req == 0)
 		return 0;
 
-	ret = i915_wait_request(LP_RING(dev_priv), overlay->last_flip_req);
+	ret = i915_wait_request(LP_RING(dev_priv), overlay->last_flip_req,
+				true);
 	if (ret)
 		return ret;
 
-- 
cgit v1.1


From 8436473a4b10243fd4c3009b97b6646c2ba642f7 Mon Sep 17 00:00:00 2001
From: Ben Widawsky <ben@bwidawsk.net>
Date: Tue, 24 Jan 2012 20:36:15 -0800
Subject: drm/i915: drm/i915: Fix recursive calls to unmap

After the ILK vt-d workaround patches it became clear that we had
introduced a bug.  Chris Wilson tracked down the issue to recursive
calls to unmap. This happens because we try to optimize waiting on
requests by calling retire requests after the wait, which may drop the
last reference on an object and end up freeing the object (and then
unmap the object from the gtt).

After the last patch we can now choose to defer processing the retire
list.

Kudos to Chris Wilson for tracking this one down.

This patch fixes gem_unref_active_buffers from i-g-t. It was tested by
forcing do_idle_maps to true.

This also fixes tests/gem_linear_blits in intel-gpu-tools.

Reported-by: guang.a.yang@intel.com
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=42180
Reviewed-by: Keith Packard <keithp@keithp.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index e050b90..11bddd5 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -55,7 +55,7 @@ static bool do_idling(struct drm_i915_private *dev_priv)
 
 	if (unlikely(dev_priv->mm.gtt->do_idle_maps)) {
 		dev_priv->mm.interruptible = false;
-		if (i915_gpu_idle(dev_priv->dev, true)) {
+		if (i915_gpu_idle(dev_priv->dev, false)) {
 			DRM_ERROR("Couldn't idle GPU\n");
 			/* Wait a bit, in hopes it avoids the hang */
 			udelay(10);
-- 
cgit v1.1


From 6dc0e816bb7478fd4dfebddcc65257f9ef161f7a Mon Sep 17 00:00:00 2001
From: Ben Widawsky <ben@bwidawsk.net>
Date: Mon, 23 Jan 2012 15:30:02 -0800
Subject: drm/i915: correct lock type in destroy

This is only relevant when using module unloading, and really only helps
get rid of a probably benign warning.

I can't remember if I sent this out already, but it's not turning up in
any of my searches.

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Reviewed-by: Keith Packard <keithp@keithp.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_irq.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 5d433fc..275ab6f 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1017,11 +1017,12 @@ void i915_destroy_error_state(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_error_state *error;
+	unsigned long flags;
 
-	spin_lock(&dev_priv->error_lock);
+	spin_lock_irqsave(&dev_priv->error_lock, flags);
 	error = dev_priv->first_error;
 	dev_priv->first_error = NULL;
-	spin_unlock(&dev_priv->error_lock);
+	spin_unlock_irqrestore(&dev_priv->error_lock, flags);
 
 	if (error)
 		i915_error_state_free(dev, error);
-- 
cgit v1.1


From b6daa025b1e1aebf276508eb2a5a9f730391aa84 Mon Sep 17 00:00:00 2001
From: Wu Fengguang <fengguang.wu@intel.com>
Date: Fri, 6 Jan 2012 14:41:31 -0600
Subject: drm/i915: set AUD_CONFIG N_value_index for DisplayPort

It should be programmed to "0" for HDMI or "1" for DisplayPort.

This enables DisplayPort audio for

- HP EliteBook 8460p
  (whose BIOS does not set the N_value_index bit for us)

- DisplayPort monitor hot plugged after boot
  (otherwise most BIOS will fill the N_value_index bit for us)

Tested-by: Robert Lemaire <rlemaire@suse.com>
Reviewed-by: Keith Packard <keithp@keithp.com>
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
Signed-Off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_reg.h      | 12 ++++++++++++
 drivers/gpu/drm/i915/intel_display.c |  8 +++++++-
 2 files changed, 19 insertions(+), 1 deletion(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index bbad788..f4daa6e 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -3742,4 +3742,16 @@
  */
 #define GEN7_SO_WRITE_OFFSET(n)		(0x5280 + (n) * 4)
 
+#define IBX_AUD_CONFIG_A			0xe2000
+#define CPT_AUD_CONFIG_A			0xe5000
+#define   AUD_CONFIG_N_VALUE_INDEX		(1 << 29)
+#define   AUD_CONFIG_N_PROG_ENABLE		(1 << 28)
+#define   AUD_CONFIG_UPPER_N_SHIFT		20
+#define   AUD_CONFIG_UPPER_N_VALUE		(0xff << 20)
+#define   AUD_CONFIG_LOWER_N_SHIFT		4
+#define   AUD_CONFIG_LOWER_N_VALUE		(0xfff << 4)
+#define   AUD_CONFIG_PIXEL_CLOCK_HDMI_SHIFT	16
+#define   AUD_CONFIG_PIXEL_CLOCK_HDMI		(0xf << 16)
+#define   AUD_CONFIG_DISABLE_NCTS		(1 << 3)
+
 #endif /* _I915_REG_H_ */
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index ebb3452..0770671 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -6141,15 +6141,18 @@ static void ironlake_write_eld(struct drm_connector *connector,
 	uint32_t i;
 	int len;
 	int hdmiw_hdmiedid;
+	int aud_config;
 	int aud_cntl_st;
 	int aud_cntrl_st2;
 
 	if (HAS_PCH_IBX(connector->dev)) {
 		hdmiw_hdmiedid = IBX_HDMIW_HDMIEDID_A;
+		aud_config = IBX_AUD_CONFIG_A;
 		aud_cntl_st = IBX_AUD_CNTL_ST_A;
 		aud_cntrl_st2 = IBX_AUD_CNTL_ST2;
 	} else {
 		hdmiw_hdmiedid = CPT_HDMIW_HDMIEDID_A;
+		aud_config = CPT_AUD_CONFIG_A;
 		aud_cntl_st = CPT_AUD_CNTL_ST_A;
 		aud_cntrl_st2 = CPT_AUD_CNTRL_ST2;
 	}
@@ -6157,6 +6160,7 @@ static void ironlake_write_eld(struct drm_connector *connector,
 	i = to_intel_crtc(crtc)->pipe;
 	hdmiw_hdmiedid += i * 0x100;
 	aud_cntl_st += i * 0x100;
+	aud_config += i * 0x100;
 
 	DRM_DEBUG_DRIVER("ELD on pipe %c\n", pipe_name(i));
 
@@ -6176,7 +6180,9 @@ static void ironlake_write_eld(struct drm_connector *connector,
 	if (intel_pipe_has_type(crtc, INTEL_OUTPUT_DISPLAYPORT)) {
 		DRM_DEBUG_DRIVER("ELD: DisplayPort detected\n");
 		eld[5] |= (1 << 2);	/* Conn_Type, 0x1 = DisplayPort */
-	}
+		I915_WRITE(aud_config, AUD_CONFIG_N_VALUE_INDEX); /* 0x1 = DP */
+	} else
+		I915_WRITE(aud_config, 0);
 
 	if (intel_eld_uptodate(connector,
 			       aud_cntrl_st2, eldv,
-- 
cgit v1.1


From 96154f2faba540281073243d61108d1705d19c6d Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Wed, 14 Dec 2011 13:57:00 +0100
Subject: drm/i915: switch ring->id to be a real id

... and add a helpr function for the places where we want a flag.

This way we can use ring->id to index into arrays.

v2: Resurrect the missing beautification-space Chris Wilson noted.
I'm moving this space around because I'll reuse ring_str in the next
patch.

Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
Reviewed-by: Eugeni Dodonov <eugeni.dodonov@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_debugfs.c        |  9 +++++----
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  4 ++--
 drivers/gpu/drm/i915/i915_irq.c            |  2 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c    | 14 +++++++-------
 drivers/gpu/drm/i915/intel_ringbuffer.h    | 20 ++++++++++----------
 5 files changed, 25 insertions(+), 24 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 6c3be86..9c5db4e 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -669,9 +669,9 @@ static int i915_ringbuffer_info(struct seq_file *m, void *data)
 static const char *ring_str(int ring)
 {
 	switch (ring) {
-	case RING_RENDER: return " render";
-	case RING_BSD: return " bsd";
-	case RING_BLT: return " blt";
+	case RCS: return "render";
+	case VCS: return "bsd";
+	case BCS: return "blt";
 	default: return "";
 	}
 }
@@ -714,7 +714,7 @@ static void print_error_buffers(struct seq_file *m,
 	seq_printf(m, "%s [%d]:\n", name, count);
 
 	while (count--) {
-		seq_printf(m, "  %08x %8u %04x %04x %08x%s%s%s%s%s%s",
+		seq_printf(m, "  %08x %8u %04x %04x %08x%s%s%s%s%s%s%s",
 			   err->gtt_offset,
 			   err->size,
 			   err->read_domains,
@@ -724,6 +724,7 @@ static void print_error_buffers(struct seq_file *m,
 			   tiling_flag(err->tiling),
 			   dirty_flag(err->dirty),
 			   purgeable_flag(err->purgeable),
+			   err->ring != -1 ? " " : "",
 			   ring_str(err->ring),
 			   cache_level_str(err->cache_level));
 
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index c649e0f..49b3ebc 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -203,9 +203,9 @@ i915_gem_object_set_to_gpu_domain(struct drm_i915_gem_object *obj,
 	cd->invalidate_domains |= invalidate_domains;
 	cd->flush_domains |= flush_domains;
 	if (flush_domains & I915_GEM_GPU_DOMAINS)
-		cd->flush_rings |= obj->ring->id;
+		cd->flush_rings |= intel_ring_flag(obj->ring);
 	if (invalidate_domains & I915_GEM_GPU_DOMAINS)
-		cd->flush_rings |= ring->id;
+		cd->flush_rings |= intel_ring_flag(ring);
 }
 
 struct eb_objects {
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 275ab6f..ab53edb 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -804,7 +804,7 @@ static u32 capture_bo_list(struct drm_i915_error_buffer *err,
 		err->tiling = obj->tiling_mode;
 		err->dirty = obj->dirty;
 		err->purgeable = obj->madv != I915_MADV_WILLNEED;
-		err->ring = obj->ring ? obj->ring->id : 0;
+		err->ring = obj->ring ? obj->ring->id : -1;
 		err->cache_level = obj->cache_level;
 
 		if (++i == count)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index b3da17a..48042f3 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -729,13 +729,13 @@ void intel_ring_setup_status_page(struct intel_ring_buffer *ring)
 	 */
 	if (IS_GEN7(dev)) {
 		switch (ring->id) {
-		case RING_RENDER:
+		case RCS:
 			mmio = RENDER_HWS_PGA_GEN7;
 			break;
-		case RING_BLT:
+		case BCS:
 			mmio = BLT_HWS_PGA_GEN7;
 			break;
-		case RING_BSD:
+		case VCS:
 			mmio = BSD_HWS_PGA_GEN7;
 			break;
 		}
@@ -1199,7 +1199,7 @@ void intel_ring_advance(struct intel_ring_buffer *ring)
 
 static const struct intel_ring_buffer render_ring = {
 	.name			= "render ring",
-	.id			= RING_RENDER,
+	.id			= RCS,
 	.mmio_base		= RENDER_RING_BASE,
 	.size			= 32 * PAGE_SIZE,
 	.init			= init_render_ring,
@@ -1222,7 +1222,7 @@ static const struct intel_ring_buffer render_ring = {
 
 static const struct intel_ring_buffer bsd_ring = {
 	.name                   = "bsd ring",
-	.id			= RING_BSD,
+	.id			= VCS,
 	.mmio_base		= BSD_RING_BASE,
 	.size			= 32 * PAGE_SIZE,
 	.init			= init_ring_common,
@@ -1332,7 +1332,7 @@ gen6_bsd_ring_put_irq(struct intel_ring_buffer *ring)
 /* ring buffer for Video Codec for Gen6+ */
 static const struct intel_ring_buffer gen6_bsd_ring = {
 	.name			= "gen6 bsd ring",
-	.id			= RING_BSD,
+	.id			= VCS,
 	.mmio_base		= GEN6_BSD_RING_BASE,
 	.size			= 32 * PAGE_SIZE,
 	.init			= init_ring_common,
@@ -1467,7 +1467,7 @@ static void blt_ring_cleanup(struct intel_ring_buffer *ring)
 
 static const struct intel_ring_buffer gen6_blt_ring = {
 	.name			= "blt ring",
-	.id			= RING_BLT,
+	.id			= BCS,
 	.mmio_base		= BLT_RING_BASE,
 	.size			= 32 * PAGE_SIZE,
 	.init			= blt_ring_init,
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 68281c9..c8b9cc0 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -1,13 +1,6 @@
 #ifndef _INTEL_RINGBUFFER_H_
 #define _INTEL_RINGBUFFER_H_
 
-enum {
-	RCS = 0x0,
-	VCS,
-	BCS,
-	I915_NUM_RINGS,
-};
-
 struct  intel_hw_status_page {
 	u32	__iomem	*page_addr;
 	unsigned int	gfx_addr;
@@ -36,10 +29,11 @@ struct  intel_hw_status_page {
 struct  intel_ring_buffer {
 	const char	*name;
 	enum intel_ring_id {
-		RING_RENDER = 0x1,
-		RING_BSD = 0x2,
-		RING_BLT = 0x4,
+		RCS = 0x0,
+		VCS,
+		BCS,
 	} id;
+#define I915_NUM_RINGS 3
 	u32		mmio_base;
 	void		__iomem *virtual_start;
 	struct		drm_device *dev;
@@ -119,6 +113,12 @@ struct  intel_ring_buffer {
 	void *private;
 };
 
+static inline unsigned
+intel_ring_flag(struct intel_ring_buffer *ring)
+{
+	return 1 << ring->id;
+}
+
 static inline u32
 intel_ring_sync_index(struct intel_ring_buffer *ring,
 		      struct intel_ring_buffer *other)
-- 
cgit v1.1


From d27b1e0ec2a0a04770b2ebf70a2e01281ef93562 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Wed, 14 Dec 2011 13:57:01 +0100
Subject: drm/i915: refactor ring error state capture to use arrays

The code already got unwieldy and we want to dump more per-ring
registers.

Only functional change is that we now also capture the video
ring registers on ilk.

v2: fixup a refactor fumble spotted by Chris Wilson.

Reviewed-by: Eugeni Dodonov <eugeni.dodonov@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_debugfs.c | 55 +++++++++++++++--------------
 drivers/gpu/drm/i915/i915_drv.h     | 20 +++--------
 drivers/gpu/drm/i915/i915_irq.c     | 70 +++++++++++++++++++------------------
 drivers/gpu/drm/i915/i915_reg.h     | 11 ++----
 4 files changed, 73 insertions(+), 83 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 9c5db4e..4002846 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -738,6 +738,26 @@ static void print_error_buffers(struct seq_file *m,
 	}
 }
 
+static void i915_ring_error_state(struct seq_file *m,
+				  struct drm_device *dev,
+				  struct drm_i915_error_state *error,
+				  unsigned ring)
+{
+	seq_printf(m, "%s command stream:\n", ring_str(ring));
+	seq_printf(m, "  ACTHD: 0x%08x\n", error->acthd[ring]);
+	seq_printf(m, "  IPEIR: 0x%08x\n", error->ipeir[ring]);
+	seq_printf(m, "  IPEHR: 0x%08x\n", error->ipehr[ring]);
+	seq_printf(m, "  INSTDONE: 0x%08x\n", error->instdone[ring]);
+	if (ring == RCS) {
+		if (INTEL_INFO(dev)->gen >= 4) {
+			seq_printf(m, "  INSTDONE1: 0x%08x\n", error->instdone1);
+			seq_printf(m, "  INSTPS: 0x%08x\n", error->instps);
+		}
+		seq_printf(m, "  INSTPM: 0x%08x\n", error->instpm);
+	}
+	seq_printf(m, "  seqno: 0x%08x\n", error->seqno[ring]);
+}
+
 static int i915_error_state(struct seq_file *m, void *unused)
 {
 	struct drm_info_node *node = (struct drm_info_node *) m->private;
@@ -760,36 +780,19 @@ static int i915_error_state(struct seq_file *m, void *unused)
 	seq_printf(m, "PCI ID: 0x%04x\n", dev->pci_device);
 	seq_printf(m, "EIR: 0x%08x\n", error->eir);
 	seq_printf(m, "PGTBL_ER: 0x%08x\n", error->pgtbl_er);
-	if (INTEL_INFO(dev)->gen >= 6) {
-		seq_printf(m, "ERROR: 0x%08x\n", error->error);
-		seq_printf(m, "Blitter command stream:\n");
-		seq_printf(m, "  ACTHD:    0x%08x\n", error->bcs_acthd);
-		seq_printf(m, "  IPEIR:    0x%08x\n", error->bcs_ipeir);
-		seq_printf(m, "  IPEHR:    0x%08x\n", error->bcs_ipehr);
-		seq_printf(m, "  INSTDONE: 0x%08x\n", error->bcs_instdone);
-		seq_printf(m, "  seqno:    0x%08x\n", error->bcs_seqno);
-		seq_printf(m, "Video (BSD) command stream:\n");
-		seq_printf(m, "  ACTHD:    0x%08x\n", error->vcs_acthd);
-		seq_printf(m, "  IPEIR:    0x%08x\n", error->vcs_ipeir);
-		seq_printf(m, "  IPEHR:    0x%08x\n", error->vcs_ipehr);
-		seq_printf(m, "  INSTDONE: 0x%08x\n", error->vcs_instdone);
-		seq_printf(m, "  seqno:    0x%08x\n", error->vcs_seqno);
-	}
-	seq_printf(m, "Render command stream:\n");
-	seq_printf(m, "  ACTHD: 0x%08x\n", error->acthd);
-	seq_printf(m, "  IPEIR: 0x%08x\n", error->ipeir);
-	seq_printf(m, "  IPEHR: 0x%08x\n", error->ipehr);
-	seq_printf(m, "  INSTDONE: 0x%08x\n", error->instdone);
-	if (INTEL_INFO(dev)->gen >= 4) {
-		seq_printf(m, "  INSTDONE1: 0x%08x\n", error->instdone1);
-		seq_printf(m, "  INSTPS: 0x%08x\n", error->instps);
-	}
-	seq_printf(m, "  INSTPM: 0x%08x\n", error->instpm);
-	seq_printf(m, "  seqno: 0x%08x\n", error->seqno);
 
 	for (i = 0; i < dev_priv->num_fence_regs; i++)
 		seq_printf(m, "  fence[%d] = %08llx\n", i, error->fence[i]);
 
+	if (INTEL_INFO(dev)->gen >= 6) 
+		seq_printf(m, "ERROR: 0x%08x\n", error->error);
+
+	i915_ring_error_state(m, dev, error, RCS);
+	if (HAS_BLT(dev))
+		i915_ring_error_state(m, dev, error, BCS);
+	if (HAS_BSD(dev))
+		i915_ring_error_state(m, dev, error, VCS);
+
 	if (error->active_bo)
 		print_error_buffers(m, "Active",
 				    error->active_bo,
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 1d10b8c..766ef1c 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -152,25 +152,15 @@ struct drm_i915_error_state {
 	u32 eir;
 	u32 pgtbl_er;
 	u32 pipestat[I915_MAX_PIPES];
-	u32 ipeir;
-	u32 ipehr;
-	u32 instdone;
-	u32 acthd;
+	u32 ipeir[I915_NUM_RINGS];
+	u32 ipehr[I915_NUM_RINGS];
+	u32 instdone[I915_NUM_RINGS];
+	u32 acthd[I915_NUM_RINGS];
 	u32 error; /* gen6+ */
-	u32 bcs_acthd; /* gen6+ blt engine */
-	u32 bcs_ipehr;
-	u32 bcs_ipeir;
-	u32 bcs_instdone;
-	u32 bcs_seqno;
-	u32 vcs_acthd; /* gen6+ bsd engine */
-	u32 vcs_ipehr;
-	u32 vcs_ipeir;
-	u32 vcs_instdone;
-	u32 vcs_seqno;
 	u32 instpm;
 	u32 instps;
 	u32 instdone1;
-	u32 seqno;
+	u32 seqno[I915_NUM_RINGS];
 	u64 bbaddr;
 	u64 fence[I915_MAX_NUM_FENCES];
 	struct timeval time;
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index ab53edb..dffeef8 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -876,6 +876,32 @@ i915_error_first_batchbuffer(struct drm_i915_private *dev_priv,
 	return NULL;
 }
 
+static void i915_record_ring_state(struct drm_device *dev,
+				   struct drm_i915_error_state *error,
+				   struct intel_ring_buffer *ring)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	if (INTEL_INFO(dev)->gen >= 4) {
+		error->ipeir[ring->id] = I915_READ(RING_IPEIR(ring->mmio_base));
+		error->ipehr[ring->id] = I915_READ(RING_IPEHR(ring->mmio_base));
+		error->instdone[ring->id] = I915_READ(RING_INSTDONE(ring->mmio_base));
+		if (ring->id == RCS) {
+			error->instps = I915_READ(INSTPS);
+			error->instdone1 = I915_READ(INSTDONE1);
+			error->bbaddr = I915_READ64(BB_ADDR);
+		}
+	} else {
+		error->ipeir[ring->id] = I915_READ(IPEIR);
+		error->ipehr[ring->id] = I915_READ(IPEHR);
+		error->instdone[ring->id] = I915_READ(INSTDONE);
+		error->bbaddr = 0;
+	}
+
+	error->seqno[ring->id] = ring->get_seqno(ring);
+	error->acthd[ring->id] = intel_ring_get_active_head(ring);
+}
+
 /**
  * i915_capture_error_state - capture an error record for later analysis
  * @dev: drm device
@@ -909,47 +935,23 @@ static void i915_capture_error_state(struct drm_device *dev)
 	DRM_INFO("capturing error event; look for more information in /debug/dri/%d/i915_error_state\n",
 		 dev->primary->index);
 
-	error->seqno = dev_priv->ring[RCS].get_seqno(&dev_priv->ring[RCS]);
 	error->eir = I915_READ(EIR);
 	error->pgtbl_er = I915_READ(PGTBL_ER);
 	for_each_pipe(pipe)
 		error->pipestat[pipe] = I915_READ(PIPESTAT(pipe));
 	error->instpm = I915_READ(INSTPM);
-	error->error = 0;
-	if (INTEL_INFO(dev)->gen >= 6) {
+
+	if (INTEL_INFO(dev)->gen >= 6)
 		error->error = I915_READ(ERROR_GEN6);
+	else
+		error->error = 0;
+
+	i915_record_ring_state(dev, error, &dev_priv->ring[RCS]);
+	if (HAS_BLT(dev))
+		i915_record_ring_state(dev, error, &dev_priv->ring[BCS]);
+	if (HAS_BSD(dev))
+		i915_record_ring_state(dev, error, &dev_priv->ring[VCS]);
 
-		error->bcs_acthd = I915_READ(BCS_ACTHD);
-		error->bcs_ipehr = I915_READ(BCS_IPEHR);
-		error->bcs_ipeir = I915_READ(BCS_IPEIR);
-		error->bcs_instdone = I915_READ(BCS_INSTDONE);
-		error->bcs_seqno = 0;
-		if (dev_priv->ring[BCS].get_seqno)
-			error->bcs_seqno = dev_priv->ring[BCS].get_seqno(&dev_priv->ring[BCS]);
-
-		error->vcs_acthd = I915_READ(VCS_ACTHD);
-		error->vcs_ipehr = I915_READ(VCS_IPEHR);
-		error->vcs_ipeir = I915_READ(VCS_IPEIR);
-		error->vcs_instdone = I915_READ(VCS_INSTDONE);
-		error->vcs_seqno = 0;
-		if (dev_priv->ring[VCS].get_seqno)
-			error->vcs_seqno = dev_priv->ring[VCS].get_seqno(&dev_priv->ring[VCS]);
-	}
-	if (INTEL_INFO(dev)->gen >= 4) {
-		error->ipeir = I915_READ(IPEIR_I965);
-		error->ipehr = I915_READ(IPEHR_I965);
-		error->instdone = I915_READ(INSTDONE_I965);
-		error->instps = I915_READ(INSTPS);
-		error->instdone1 = I915_READ(INSTDONE1);
-		error->acthd = I915_READ(ACTHD_I965);
-		error->bbaddr = I915_READ64(BB_ADDR);
-	} else {
-		error->ipeir = I915_READ(IPEIR);
-		error->ipehr = I915_READ(IPEHR);
-		error->instdone = I915_READ(INSTDONE);
-		error->acthd = I915_READ(ACTHD);
-		error->bbaddr = 0;
-	}
 	i915_gem_record_fences(dev, error);
 
 	/* Record the active batch and ring buffers */
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index f4daa6e..80febc5 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -352,6 +352,9 @@
 #define IPEIR_I965	0x02064
 #define IPEHR_I965	0x02068
 #define INSTDONE_I965	0x0206c
+#define RING_IPEIR(base)	((base)+0x64)
+#define RING_IPEHR(base)	((base)+0x68)
+#define RING_INSTDONE(base)	((base)+0x6c)
 #define INSTPS		0x02070 /* 965+ only */
 #define INSTDONE1	0x0207c /* 965+ only */
 #define ACTHD_I965	0x02074
@@ -365,14 +368,6 @@
 #define INSTDONE	0x02090
 #define NOPID		0x02094
 #define HWSTAM		0x02098
-#define VCS_INSTDONE	0x1206C
-#define VCS_IPEIR	0x12064
-#define VCS_IPEHR	0x12068
-#define VCS_ACTHD	0x12074
-#define BCS_INSTDONE	0x2206C
-#define BCS_IPEIR	0x22064
-#define BCS_IPEHR	0x22068
-#define BCS_ACTHD	0x22074
 
 #define ERROR_GEN6	0x040a0
 
-- 
cgit v1.1


From c1cd90ed7957d1dd8aa6138468d71003fbc095ce Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Wed, 14 Dec 2011 13:57:02 +0100
Subject: drm/i915: collect more per ring error state

Based on a patch by Ben Widawsky, but with different colors
for the bikeshed.

In contrast to Ben's patch this one doesn't add the fault regs.
Afaics they're for the optional page fault support which
- we're not enabling
- and which seems to be unsupported by the hw team. Recent bspec
  lacks tons of information about this that the public docs released
  half a year back still contain.

Also dump ring HEAD/TAIL registers - I've recently seen a few
error_state where just guessing these is not good enough.

v2: Also dump INSTPM for every ring.

v3: Fix a few really silly goof-ups spotted by Chris Wilson.

Reviewed-by: Eugeni Dodonov <eugeni.dodonov@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_debugfs.c | 16 ++++++++++------
 drivers/gpu/drm/i915/i915_drv.h     |  7 +++++--
 drivers/gpu/drm/i915/i915_irq.c     |  9 +++++++--
 drivers/gpu/drm/i915/i915_reg.h     |  3 +++
 4 files changed, 25 insertions(+), 10 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 4002846..5aa2568 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -744,17 +744,21 @@ static void i915_ring_error_state(struct seq_file *m,
 				  unsigned ring)
 {
 	seq_printf(m, "%s command stream:\n", ring_str(ring));
+	seq_printf(m, "  HEAD: 0x%08x\n", error->head[ring]);
+	seq_printf(m, "  TAIL: 0x%08x\n", error->tail[ring]);
 	seq_printf(m, "  ACTHD: 0x%08x\n", error->acthd[ring]);
 	seq_printf(m, "  IPEIR: 0x%08x\n", error->ipeir[ring]);
 	seq_printf(m, "  IPEHR: 0x%08x\n", error->ipehr[ring]);
 	seq_printf(m, "  INSTDONE: 0x%08x\n", error->instdone[ring]);
-	if (ring == RCS) {
-		if (INTEL_INFO(dev)->gen >= 4) {
-			seq_printf(m, "  INSTDONE1: 0x%08x\n", error->instdone1);
-			seq_printf(m, "  INSTPS: 0x%08x\n", error->instps);
-		}
-		seq_printf(m, "  INSTPM: 0x%08x\n", error->instpm);
+	if (ring == RCS && INTEL_INFO(dev)->gen >= 4) {
+		seq_printf(m, "  INSTDONE1: 0x%08x\n", error->instdone1);
+		seq_printf(m, "  BBADDR: 0x%08llx\n", error->bbaddr);
 	}
+	if (INTEL_INFO(dev)->gen >= 4)
+		seq_printf(m, "  INSTPS: 0x%08x\n", error->instps[ring]);
+	seq_printf(m, "  INSTPM: 0x%08x\n", error->instpm[ring]);
+	if (INTEL_INFO(dev)->gen >= 6)
+		seq_printf(m, "  FADDR: 0x%08x\n", error->faddr[ring]);
 	seq_printf(m, "  seqno: 0x%08x\n", error->seqno[ring]);
 }
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 766ef1c..733f5f5 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -152,16 +152,19 @@ struct drm_i915_error_state {
 	u32 eir;
 	u32 pgtbl_er;
 	u32 pipestat[I915_MAX_PIPES];
+	u32 tail[I915_NUM_RINGS];
+	u32 head[I915_NUM_RINGS];
 	u32 ipeir[I915_NUM_RINGS];
 	u32 ipehr[I915_NUM_RINGS];
 	u32 instdone[I915_NUM_RINGS];
 	u32 acthd[I915_NUM_RINGS];
 	u32 error; /* gen6+ */
-	u32 instpm;
-	u32 instps;
+	u32 instpm[I915_NUM_RINGS];
+	u32 instps[I915_NUM_RINGS];
 	u32 instdone1;
 	u32 seqno[I915_NUM_RINGS];
 	u64 bbaddr;
+	u32 faddr[I915_NUM_RINGS];
 	u64 fence[I915_MAX_NUM_FENCES];
 	struct timeval time;
 	struct drm_i915_error_object {
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index dffeef8..1e75c44 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -882,12 +882,15 @@ static void i915_record_ring_state(struct drm_device *dev,
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
+	if (INTEL_INFO(dev)->gen >= 6)
+		error->faddr[ring->id] = I915_READ(RING_DMA_FADD(ring->mmio_base));
+
 	if (INTEL_INFO(dev)->gen >= 4) {
 		error->ipeir[ring->id] = I915_READ(RING_IPEIR(ring->mmio_base));
 		error->ipehr[ring->id] = I915_READ(RING_IPEHR(ring->mmio_base));
 		error->instdone[ring->id] = I915_READ(RING_INSTDONE(ring->mmio_base));
+		error->instps[ring->id] = I915_READ(RING_INSTPS(ring->mmio_base));
 		if (ring->id == RCS) {
-			error->instps = I915_READ(INSTPS);
 			error->instdone1 = I915_READ(INSTDONE1);
 			error->bbaddr = I915_READ64(BB_ADDR);
 		}
@@ -898,8 +901,11 @@ static void i915_record_ring_state(struct drm_device *dev,
 		error->bbaddr = 0;
 	}
 
+	error->instpm[ring->id] = I915_READ(RING_INSTPM(ring->mmio_base));
 	error->seqno[ring->id] = ring->get_seqno(ring);
 	error->acthd[ring->id] = intel_ring_get_active_head(ring);
+	error->head[ring->id] = I915_READ_HEAD(ring);
+	error->tail[ring->id] = I915_READ_TAIL(ring);
 }
 
 /**
@@ -939,7 +945,6 @@ static void i915_capture_error_state(struct drm_device *dev)
 	error->pgtbl_er = I915_READ(PGTBL_ER);
 	for_each_pipe(pipe)
 		error->pipestat[pipe] = I915_READ(PIPESTAT(pipe));
-	error->instpm = I915_READ(INSTPM);
 
 	if (INTEL_INFO(dev)->gen >= 6)
 		error->error = I915_READ(ERROR_GEN6);
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 80febc5..d107a1d 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -355,6 +355,9 @@
 #define RING_IPEIR(base)	((base)+0x64)
 #define RING_IPEHR(base)	((base)+0x68)
 #define RING_INSTDONE(base)	((base)+0x6c)
+#define RING_INSTPS(base)	((base)+0x70)
+#define RING_DMA_FADD(base)	((base)+0x78)
+#define RING_INSTPM(base)	((base)+0xc0)
 #define INSTPS		0x02070 /* 965+ only */
 #define INSTDONE1	0x0207c /* 965+ only */
 #define ACTHD_I965	0x02074
-- 
cgit v1.1


From 6a233c78878d8795517d716544d045d5675b3061 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Wed, 14 Dec 2011 13:57:07 +0100
Subject: drm/i915/ringbuffer: kill snb blt workaround

This was just to facilitate product enablement with pre-production hw.
Allows us to kill quite a bit of cruft.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/intel_ringbuffer.c | 81 +--------------------------------
 1 file changed, 2 insertions(+), 79 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 48042f3..6e80f83 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1368,79 +1368,13 @@ blt_ring_put_irq(struct intel_ring_buffer *ring)
 			  GEN6_BLITTER_USER_INTERRUPT);
 }
 
-
-/* Workaround for some stepping of SNB,
- * each time when BLT engine ring tail moved,
- * the first command in the ring to be parsed
- * should be MI_BATCH_BUFFER_START
- */
-#define NEED_BLT_WORKAROUND(dev) \
-	(IS_GEN6(dev) && (dev->pdev->revision < 8))
-
-static inline struct drm_i915_gem_object *
-to_blt_workaround(struct intel_ring_buffer *ring)
-{
-	return ring->private;
-}
-
-static int blt_ring_init(struct intel_ring_buffer *ring)
-{
-	if (NEED_BLT_WORKAROUND(ring->dev)) {
-		struct drm_i915_gem_object *obj;
-		u32 *ptr;
-		int ret;
-
-		obj = i915_gem_alloc_object(ring->dev, 4096);
-		if (obj == NULL)
-			return -ENOMEM;
-
-		ret = i915_gem_object_pin(obj, 4096, true);
-		if (ret) {
-			drm_gem_object_unreference(&obj->base);
-			return ret;
-		}
-
-		ptr = kmap(obj->pages[0]);
-		*ptr++ = MI_BATCH_BUFFER_END;
-		*ptr++ = MI_NOOP;
-		kunmap(obj->pages[0]);
-
-		ret = i915_gem_object_set_to_gtt_domain(obj, false);
-		if (ret) {
-			i915_gem_object_unpin(obj);
-			drm_gem_object_unreference(&obj->base);
-			return ret;
-		}
-
-		ring->private = obj;
-	}
-
-	return init_ring_common(ring);
-}
-
-static int blt_ring_begin(struct intel_ring_buffer *ring,
-			  int num_dwords)
-{
-	if (ring->private) {
-		int ret = intel_ring_begin(ring, num_dwords+2);
-		if (ret)
-			return ret;
-
-		intel_ring_emit(ring, MI_BATCH_BUFFER_START);
-		intel_ring_emit(ring, to_blt_workaround(ring)->gtt_offset);
-
-		return 0;
-	} else
-		return intel_ring_begin(ring, 4);
-}
-
 static int blt_ring_flush(struct intel_ring_buffer *ring,
 			  u32 invalidate, u32 flush)
 {
 	uint32_t cmd;
 	int ret;
 
-	ret = blt_ring_begin(ring, 4);
+	ret = intel_ring_begin(ring, 4);
 	if (ret)
 		return ret;
 
@@ -1455,22 +1389,12 @@ static int blt_ring_flush(struct intel_ring_buffer *ring,
 	return 0;
 }
 
-static void blt_ring_cleanup(struct intel_ring_buffer *ring)
-{
-	if (!ring->private)
-		return;
-
-	i915_gem_object_unpin(ring->private);
-	drm_gem_object_unreference(ring->private);
-	ring->private = NULL;
-}
-
 static const struct intel_ring_buffer gen6_blt_ring = {
 	.name			= "blt ring",
 	.id			= BCS,
 	.mmio_base		= BLT_RING_BASE,
 	.size			= 32 * PAGE_SIZE,
-	.init			= blt_ring_init,
+	.init			= init_ring_common,
 	.write_tail		= ring_write_tail,
 	.flush			= blt_ring_flush,
 	.add_request		= gen6_add_request,
@@ -1478,7 +1402,6 @@ static const struct intel_ring_buffer gen6_blt_ring = {
 	.irq_get		= blt_ring_get_irq,
 	.irq_put		= blt_ring_put_irq,
 	.dispatch_execbuffer	= gen6_ring_dispatch_execbuffer,
-	.cleanup		= blt_ring_cleanup,
 	.sync_to		= gen6_blt_ring_sync_to,
 	.semaphore_register	= {MI_SEMAPHORE_SYNC_BR,
 				   MI_SEMAPHORE_SYNC_BV,
-- 
cgit v1.1


From 1690e1eb7a9021826853e181baa48dd77090da28 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Wed, 14 Dec 2011 13:57:08 +0100
Subject: drm/i915: Separate fence pin counting from normal bind pin counting

In order to correctly account for reserving space in the GTT and fences
for a batch buffer, we need to independently track whether the fence is
pinned due to a fenced GPU access in the batch or whether the buffer is
pinned in the aperture. Currently we count the fenced as pinned if the
buffer has already been seen in the execbuffer. This leads to a false
accounting of available fence registers, causing frequent mass evictions.
Worse, if coupled with the change to make i915_gem_object_get_fence()
report EDADLK upon fence starvation, the batchbuffer can fail with only
one fence required...

Fixes intel-gpu-tools/tests/gem_fenced_exec_thrash

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=38735
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Tested-by: Paul Neumann <paul104x@yahoo.de>
[danvet: Resolve the functional conflict with Jesse Barnes sprite
patches, acked by Chris Wilson on irc.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_drv.h            |  19 ++++
 drivers/gpu/drm/i915/i915_gem.c            |   7 +-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 139 +++++++++++++++++++----------
 drivers/gpu/drm/i915/intel_display.c       |  16 +++-
 drivers/gpu/drm/i915/intel_drv.h           |   1 +
 drivers/gpu/drm/i915/intel_sprite.c        |   4 +-
 6 files changed, 129 insertions(+), 57 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 733f5f5..12e8cce 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -135,6 +135,7 @@ struct drm_i915_fence_reg {
 	struct list_head lru_list;
 	struct drm_i915_gem_object *obj;
 	uint32_t setup_seqno;
+	int pin_count;
 };
 
 struct sdvo_device_mapping {
@@ -1159,6 +1160,24 @@ int __must_check i915_gem_object_get_fence(struct drm_i915_gem_object *obj,
 					   struct intel_ring_buffer *pipelined);
 int __must_check i915_gem_object_put_fence(struct drm_i915_gem_object *obj);
 
+static inline void
+i915_gem_object_pin_fence(struct drm_i915_gem_object *obj)
+{
+	if (obj->fence_reg != I915_FENCE_REG_NONE) {
+		struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
+		dev_priv->fence_regs[obj->fence_reg].pin_count++;
+	}
+}
+
+static inline void
+i915_gem_object_unpin_fence(struct drm_i915_gem_object *obj)
+{
+	if (obj->fence_reg != I915_FENCE_REG_NONE) {
+		struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
+		dev_priv->fence_regs[obj->fence_reg].pin_count--;
+	}
+}
+
 void i915_gem_retire_requests(struct drm_device *dev);
 void i915_gem_reset(struct drm_device *dev);
 void i915_gem_clflush_object(struct drm_i915_gem_object *obj);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index ff3066c..c78930e 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2435,6 +2435,8 @@ i915_gem_object_put_fence(struct drm_i915_gem_object *obj)
 
 	if (obj->fence_reg != I915_FENCE_REG_NONE) {
 		struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
+
+		WARN_ON(dev_priv->fence_regs[obj->fence_reg].pin_count);
 		i915_gem_clear_fence_reg(obj->base.dev,
 					 &dev_priv->fence_regs[obj->fence_reg]);
 
@@ -2459,7 +2461,7 @@ i915_find_fence_reg(struct drm_device *dev,
 		if (!reg->obj)
 			return reg;
 
-		if (!reg->obj->pin_count)
+		if (!reg->pin_count)
 			avail = reg;
 	}
 
@@ -2469,7 +2471,7 @@ i915_find_fence_reg(struct drm_device *dev,
 	/* None available, try to steal one or wait for a user to finish */
 	avail = first = NULL;
 	list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) {
-		if (reg->obj->pin_count)
+		if (reg->pin_count)
 			continue;
 
 		if (first == NULL)
@@ -2664,6 +2666,7 @@ i915_gem_clear_fence_reg(struct drm_device *dev,
 	list_del_init(&reg->lru_list);
 	reg->obj = NULL;
 	reg->setup_seqno = 0;
+	reg->pin_count = 0;
 }
 
 /**
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 49b3ebc..4a43ef5db 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -461,6 +461,54 @@ i915_gem_execbuffer_relocate(struct drm_device *dev,
 	return ret;
 }
 
+#define  __EXEC_OBJECT_HAS_FENCE (1<<31)
+
+static int
+pin_and_fence_object(struct drm_i915_gem_object *obj,
+		     struct intel_ring_buffer *ring)
+{
+	struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
+	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
+	bool need_fence, need_mappable;
+	int ret;
+
+	need_fence =
+		has_fenced_gpu_access &&
+		entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
+		obj->tiling_mode != I915_TILING_NONE;
+	need_mappable =
+		entry->relocation_count ? true : need_fence;
+
+	ret = i915_gem_object_pin(obj, entry->alignment, need_mappable);
+	if (ret)
+		return ret;
+
+	if (has_fenced_gpu_access) {
+		if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
+			if (obj->tiling_mode) {
+				ret = i915_gem_object_get_fence(obj, ring);
+				if (ret)
+					goto err_unpin;
+
+				entry->flags |= __EXEC_OBJECT_HAS_FENCE;
+				i915_gem_object_pin_fence(obj);
+			} else {
+				ret = i915_gem_object_put_fence(obj);
+				if (ret)
+					goto err_unpin;
+			}
+		}
+		obj->pending_fenced_gpu_access = need_fence;
+	}
+
+	entry->offset = obj->gtt_offset;
+	return 0;
+
+err_unpin:
+	i915_gem_object_unpin(obj);
+	return ret;
+}
+
 static int
 i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
 			    struct drm_file *file,
@@ -518,6 +566,7 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
 		list_for_each_entry(obj, objects, exec_list) {
 			struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
 			bool need_fence, need_mappable;
+
 			if (!obj->gtt_space)
 				continue;
 
@@ -532,58 +581,47 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
 			    (need_mappable && !obj->map_and_fenceable))
 				ret = i915_gem_object_unbind(obj);
 			else
-				ret = i915_gem_object_pin(obj,
-							  entry->alignment,
-							  need_mappable);
+				ret = pin_and_fence_object(obj, ring);
 			if (ret)
 				goto err;
-
-			entry++;
 		}
 
 		/* Bind fresh objects */
 		list_for_each_entry(obj, objects, exec_list) {
-			struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
-			bool need_fence;
-
-			need_fence =
-				has_fenced_gpu_access &&
-				entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
-				obj->tiling_mode != I915_TILING_NONE;
-
-			if (!obj->gtt_space) {
-				bool need_mappable =
-					entry->relocation_count ? true : need_fence;
-
-				ret = i915_gem_object_pin(obj,
-							  entry->alignment,
-							  need_mappable);
-				if (ret)
-					break;
-			}
+			if (obj->gtt_space)
+				continue;
 
-			if (has_fenced_gpu_access) {
-				if (need_fence) {
-					ret = i915_gem_object_get_fence(obj, ring);
-					if (ret)
-						break;
-				} else if (entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
-					   obj->tiling_mode == I915_TILING_NONE) {
-					/* XXX pipelined! */
-					ret = i915_gem_object_put_fence(obj);
-					if (ret)
-						break;
-				}
-				obj->pending_fenced_gpu_access = need_fence;
+			ret = pin_and_fence_object(obj, ring);
+			if (ret) {
+				int ret_ignore;
+
+				/* This can potentially raise a harmless
+				 * -EINVAL if we failed to bind in the above
+				 * call. It cannot raise -EINTR since we know
+				 * that the bo is freshly bound and so will
+				 * not need to be flushed or waited upon.
+				 */
+				ret_ignore = i915_gem_object_unbind(obj);
+				(void)ret_ignore;
+				WARN_ON(obj->gtt_space);
+				break;
 			}
-
-			entry->offset = obj->gtt_offset;
 		}
 
 		/* Decrement pin count for bound objects */
 		list_for_each_entry(obj, objects, exec_list) {
-			if (obj->gtt_space)
-				i915_gem_object_unpin(obj);
+			struct drm_i915_gem_exec_object2 *entry;
+
+			if (!obj->gtt_space)
+				continue;
+
+			entry = obj->exec_entry;
+			if (entry->flags & __EXEC_OBJECT_HAS_FENCE) {
+				i915_gem_object_unpin_fence(obj);
+				entry->flags &= ~__EXEC_OBJECT_HAS_FENCE;
+			}
+
+			i915_gem_object_unpin(obj);
 		}
 
 		if (ret != -ENOSPC || retry > 1)
@@ -600,16 +638,19 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
 	} while (1);
 
 err:
-	obj = list_entry(obj->exec_list.prev,
-			 struct drm_i915_gem_object,
-			 exec_list);
-	while (objects != &obj->exec_list) {
-		if (obj->gtt_space)
-			i915_gem_object_unpin(obj);
+	list_for_each_entry_continue_reverse(obj, objects, exec_list) {
+		struct drm_i915_gem_exec_object2 *entry;
+
+		if (!obj->gtt_space)
+			continue;
+
+		entry = obj->exec_entry;
+		if (entry->flags & __EXEC_OBJECT_HAS_FENCE) {
+			i915_gem_object_unpin_fence(obj);
+			entry->flags &= ~__EXEC_OBJECT_HAS_FENCE;
+		}
 
-		obj = list_entry(obj->exec_list.prev,
-				 struct drm_i915_gem_object,
-				 exec_list);
+		i915_gem_object_unpin(obj);
 	}
 
 	return ret;
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 0770671..fc9bc19 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -2041,6 +2041,8 @@ intel_pin_and_fence_fb_obj(struct drm_device *dev,
 		ret = i915_gem_object_get_fence(obj, pipelined);
 		if (ret)
 			goto err_unpin;
+
+		i915_gem_object_pin_fence(obj);
 	}
 
 	dev_priv->mm.interruptible = true;
@@ -2053,6 +2055,12 @@ err_interruptible:
 	return ret;
 }
 
+void intel_unpin_fb_obj(struct drm_i915_gem_object *obj)
+{
+	i915_gem_object_unpin_fence(obj);
+	i915_gem_object_unpin(obj);
+}
+
 static int i9xx_update_plane(struct drm_crtc *crtc, struct drm_framebuffer *fb,
 			     int x, int y)
 {
@@ -2284,7 +2292,7 @@ intel_pipe_set_base(struct drm_crtc *crtc, int x, int y,
 	ret = intel_pipe_set_base_atomic(crtc, crtc->fb, x, y,
 					 LEAVE_ATOMIC_MODE_SET);
 	if (ret) {
-		i915_gem_object_unpin(to_intel_framebuffer(crtc->fb)->obj);
+		intel_unpin_fb_obj(to_intel_framebuffer(crtc->fb)->obj);
 		mutex_unlock(&dev->struct_mutex);
 		DRM_ERROR("failed to update base address\n");
 		return ret;
@@ -2292,7 +2300,7 @@ intel_pipe_set_base(struct drm_crtc *crtc, int x, int y,
 
 	if (old_fb) {
 		intel_wait_for_vblank(dev, intel_crtc->pipe);
-		i915_gem_object_unpin(to_intel_framebuffer(old_fb)->obj);
+		intel_unpin_fb_obj(to_intel_framebuffer(old_fb)->obj);
 	}
 
 	mutex_unlock(&dev->struct_mutex);
@@ -3355,7 +3363,7 @@ static void intel_crtc_disable(struct drm_crtc *crtc)
 
 	if (crtc->fb) {
 		mutex_lock(&dev->struct_mutex);
-		i915_gem_object_unpin(to_intel_framebuffer(crtc->fb)->obj);
+		intel_unpin_fb_obj(to_intel_framebuffer(crtc->fb)->obj);
 		mutex_unlock(&dev->struct_mutex);
 	}
 }
@@ -7158,7 +7166,7 @@ static void intel_unpin_work_fn(struct work_struct *__work)
 		container_of(__work, struct intel_unpin_work, work);
 
 	mutex_lock(&work->dev->struct_mutex);
-	i915_gem_object_unpin(work->old_fb_obj);
+	intel_unpin_fb_obj(work->old_fb_obj);
 	drm_gem_object_unreference(&work->pending_flip_obj->base);
 	drm_gem_object_unreference(&work->old_fb_obj->base);
 
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 1348705..9cec6c3 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -374,6 +374,7 @@ extern void intel_init_emon(struct drm_device *dev);
 extern int intel_pin_and_fence_fb_obj(struct drm_device *dev,
 				      struct drm_i915_gem_object *obj,
 				      struct intel_ring_buffer *pipelined);
+extern void intel_unpin_fb_obj(struct drm_i915_gem_object *obj);
 
 extern int intel_framebuffer_init(struct drm_device *dev,
 				  struct intel_framebuffer *ifb,
diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c
index d13989f..ad3bd92 100644
--- a/drivers/gpu/drm/i915/intel_sprite.c
+++ b/drivers/gpu/drm/i915/intel_sprite.c
@@ -503,7 +503,7 @@ intel_update_plane(struct drm_plane *plane, struct drm_crtc *crtc,
 			intel_wait_for_vblank(dev, to_intel_crtc(crtc)->pipe);
 			mutex_lock(&dev->struct_mutex);
 		}
-		i915_gem_object_unpin(old_obj);
+		intel_unpin_fb_obj(old_obj);
 	}
 
 out_unlock:
@@ -530,7 +530,7 @@ intel_disable_plane(struct drm_plane *plane)
 		goto out;
 
 	mutex_lock(&dev->struct_mutex);
-	i915_gem_object_unpin(intel_plane->obj);
+	intel_unpin_fb_obj(intel_plane->obj);
 	intel_plane->obj = NULL;
 	mutex_unlock(&dev->struct_mutex);
 out:
-- 
cgit v1.1


From 39965b376601314ba374b8fa3944f5957d4ff2de Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Wed, 14 Dec 2011 13:57:09 +0100
Subject: drm/i915: don't trash the gtt when running out of fences

With the fence accounting fixed up in the previous commit not finding
enough fences is a fatal error and userspace bug. Trashing the entire
gtt is not gonna turn up that missing fence, so don't to this by
returning another error thatn ENOSPC.

This has the added benefit that it's easier to distinguish fence
accounting errors from gtt space accounting issues.

TTM serves as precendence for the EDEADLK error code - it returns it
when the reservation code needs resources already blocked by the
current reservation.

Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-Off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index c78930e..7219241 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2566,7 +2566,7 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj,
 
 	reg = i915_find_fence_reg(dev, pipelined);
 	if (reg == NULL)
-		return -ENOSPC;
+		return -EDEADLK;
 
 	ret = i915_gem_object_flush_fence(obj, pipelined);
 	if (ret)
-- 
cgit v1.1


From 08e14e80d0229262063139a95209fa0dc354feaa Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Wed, 14 Dec 2011 13:57:10 +0100
Subject: drm/i915: refactor debugfs open function

Only forcewake has an open with special semantics, the other r/w
debugfs only assign the file private pointer.

Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Eugeni Dodonov <eugeni.dodonov@intel.com>
Signed-Off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_debugfs.c | 26 +++++---------------------
 1 file changed, 5 insertions(+), 21 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 5aa2568..3774042 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1415,8 +1415,8 @@ static int i915_gen6_forcewake_count_info(struct seq_file *m, void *data)
 }
 
 static int
-i915_wedged_open(struct inode *inode,
-		 struct file *filp)
+i915_debugfs_common_open(struct inode *inode,
+			 struct file *filp)
 {
 	filp->private_data = inode->i_private;
 	return 0;
@@ -1472,20 +1472,12 @@ i915_wedged_write(struct file *filp,
 
 static const struct file_operations i915_wedged_fops = {
 	.owner = THIS_MODULE,
-	.open = i915_wedged_open,
+	.open = i915_debugfs_common_open,
 	.read = i915_wedged_read,
 	.write = i915_wedged_write,
 	.llseek = default_llseek,
 };
 
-static int
-i915_max_freq_open(struct inode *inode,
-		   struct file *filp)
-{
-	filp->private_data = inode->i_private;
-	return 0;
-}
-
 static ssize_t
 i915_max_freq_read(struct file *filp,
 		   char __user *ubuf,
@@ -1542,20 +1534,12 @@ i915_max_freq_write(struct file *filp,
 
 static const struct file_operations i915_max_freq_fops = {
 	.owner = THIS_MODULE,
-	.open = i915_max_freq_open,
+	.open = i915_debugfs_common_open,
 	.read = i915_max_freq_read,
 	.write = i915_max_freq_write,
 	.llseek = default_llseek,
 };
 
-static int
-i915_cache_sharing_open(struct inode *inode,
-		   struct file *filp)
-{
-	filp->private_data = inode->i_private;
-	return 0;
-}
-
 static ssize_t
 i915_cache_sharing_read(struct file *filp,
 		   char __user *ubuf,
@@ -1621,7 +1605,7 @@ i915_cache_sharing_write(struct file *filp,
 
 static const struct file_operations i915_cache_sharing_fops = {
 	.owner = THIS_MODULE,
-	.open = i915_cache_sharing_open,
+	.open = i915_debugfs_common_open,
 	.read = i915_cache_sharing_read,
 	.write = i915_cache_sharing_write,
 	.llseek = default_llseek,
-- 
cgit v1.1


From 6a9c308de0332b873720c629cb88f58d154b4f1d Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Wed, 14 Dec 2011 13:57:11 +0100
Subject: drm/i915: refactor debugfs create functions

All r/w debugfs files are created equal.

v2: Add some newlines to make the code easier on the eyes as requested
by Ben Widawsky.

Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eugeni Dodonov <eugeni.dodonov@intel.com>
Signed-Off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_debugfs.c | 55 ++++++++++++-------------------------
 1 file changed, 18 insertions(+), 37 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 3774042..9a2f3fc 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1637,21 +1637,6 @@ drm_add_fake_info_node(struct drm_minor *minor,
 	return 0;
 }
 
-static int i915_wedged_create(struct dentry *root, struct drm_minor *minor)
-{
-	struct drm_device *dev = minor->dev;
-	struct dentry *ent;
-
-	ent = debugfs_create_file("i915_wedged",
-				  S_IRUGO | S_IWUSR,
-				  root, dev,
-				  &i915_wedged_fops);
-	if (IS_ERR(ent))
-		return PTR_ERR(ent);
-
-	return drm_add_fake_info_node(minor, ent, &i915_wedged_fops);
-}
-
 static int i915_forcewake_open(struct inode *inode, struct file *file)
 {
 	struct drm_device *dev = inode->i_private;
@@ -1713,34 +1698,22 @@ static int i915_forcewake_create(struct dentry *root, struct drm_minor *minor)
 	return drm_add_fake_info_node(minor, ent, &i915_forcewake_fops);
 }
 
-static int i915_max_freq_create(struct dentry *root, struct drm_minor *minor)
-{
-	struct drm_device *dev = minor->dev;
-	struct dentry *ent;
-
-	ent = debugfs_create_file("i915_max_freq",
-				  S_IRUGO | S_IWUSR,
-				  root, dev,
-				  &i915_max_freq_fops);
-	if (IS_ERR(ent))
-		return PTR_ERR(ent);
-
-	return drm_add_fake_info_node(minor, ent, &i915_max_freq_fops);
-}
-
-static int i915_cache_sharing_create(struct dentry *root, struct drm_minor *minor)
+static int i915_debugfs_create(struct dentry *root,
+			       struct drm_minor *minor,
+			       const char *name,
+			       const struct file_operations *fops)
 {
 	struct drm_device *dev = minor->dev;
 	struct dentry *ent;
 
-	ent = debugfs_create_file("i915_cache_sharing",
+	ent = debugfs_create_file(name,
 				  S_IRUGO | S_IWUSR,
 				  root, dev,
-				  &i915_cache_sharing_fops);
+				  fops);
 	if (IS_ERR(ent))
 		return PTR_ERR(ent);
 
-	return drm_add_fake_info_node(minor, ent, &i915_cache_sharing_fops);
+	return drm_add_fake_info_node(minor, ent, fops);
 }
 
 static struct drm_info_list i915_debugfs_list[] = {
@@ -1789,17 +1762,25 @@ int i915_debugfs_init(struct drm_minor *minor)
 {
 	int ret;
 
-	ret = i915_wedged_create(minor->debugfs_root, minor);
+	ret = i915_debugfs_create(minor->debugfs_root, minor,
+				  "i915_wedged",
+				  &i915_wedged_fops);
 	if (ret)
 		return ret;
 
 	ret = i915_forcewake_create(minor->debugfs_root, minor);
 	if (ret)
 		return ret;
-	ret = i915_max_freq_create(minor->debugfs_root, minor);
+
+	ret = i915_debugfs_create(minor->debugfs_root, minor,
+				  "i915_max_freq",
+				  &i915_max_freq_fops);
 	if (ret)
 		return ret;
-	ret = i915_cache_sharing_create(minor->debugfs_root, minor);
+
+	ret = i915_debugfs_create(minor->debugfs_root, minor,
+				  "i915_cache_sharing",
+				  &i915_cache_sharing_fops);
 	if (ret)
 		return ret;
 
-- 
cgit v1.1


From 653d7bed26a0c298dee7d60f6ab4bb442acf8b82 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Wed, 14 Dec 2011 13:57:21 +0100
Subject: drm/i915: capture error_state also for stuck rings

Since quite a while we also the basic output configuration in the
error_state, so it should contain enough information to diagnose
these MI_WAIT hangs.

Reviewed-and-tested-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Eugeni Dodonov <eugeni.dodonov@intel.com>
Signed-Off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_irq.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 1e75c44..4dedb31 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1706,6 +1706,7 @@ void i915_hangcheck_elapsed(unsigned long data)
 	    dev_priv->last_instdone1 == instdone1) {
 		if (dev_priv->hangcheck_count++ > 1) {
 			DRM_ERROR("Hangcheck timer elapsed... GPU hung\n");
+			i915_handle_error(dev, true);
 
 			if (!IS_GEN2(dev)) {
 				/* Is the chip hanging on a WAIT_FOR_EVENT?
@@ -1713,7 +1714,6 @@ void i915_hangcheck_elapsed(unsigned long data)
 				 * and break the hang. This should work on
 				 * all but the second generation chipsets.
 				 */
-
 				if (kick_ring(&dev_priv->ring[RCS]))
 					goto repeat;
 
@@ -1726,7 +1726,6 @@ void i915_hangcheck_elapsed(unsigned long data)
 					goto repeat;
 			}
 
-			i915_handle_error(dev, true);
 			return;
 		}
 	} else {
-- 
cgit v1.1


From ff865f79764ca43bc601ff5a2499738451fc96dd Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Wed, 14 Dec 2011 13:57:26 +0100
Subject: drm/i915: remove the i915_batchbuffer_info debugfs file

With the error_state facility in place, this has outlived it's
usefulness. It also oopses with the lates llc-reloc patches because
it directly access objects through the gtt without any checks.

Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Eugeni Dodonov <eugeni.dodonov@intel.com>
Signed-Off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_debugfs.c | 40 -------------------------------------
 1 file changed, 40 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 9a2f3fc..f048605 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -564,45 +564,6 @@ static int i915_hws_info(struct seq_file *m, void *data)
 	return 0;
 }
 
-static void i915_dump_object(struct seq_file *m,
-			     struct io_mapping *mapping,
-			     struct drm_i915_gem_object *obj)
-{
-	int page, page_count, i;
-
-	page_count = obj->base.size / PAGE_SIZE;
-	for (page = 0; page < page_count; page++) {
-		u32 *mem = io_mapping_map_wc(mapping,
-					     obj->gtt_offset + page * PAGE_SIZE);
-		for (i = 0; i < PAGE_SIZE; i += 4)
-			seq_printf(m, "%08x :  %08x\n", i, mem[i / 4]);
-		io_mapping_unmap(mem);
-	}
-}
-
-static int i915_batchbuffer_info(struct seq_file *m, void *data)
-{
-	struct drm_info_node *node = (struct drm_info_node *) m->private;
-	struct drm_device *dev = node->minor->dev;
-	drm_i915_private_t *dev_priv = dev->dev_private;
-	struct drm_i915_gem_object *obj;
-	int ret;
-
-	ret = mutex_lock_interruptible(&dev->struct_mutex);
-	if (ret)
-		return ret;
-
-	list_for_each_entry(obj, &dev_priv->mm.active_list, mm_list) {
-		if (obj->base.read_domains & I915_GEM_DOMAIN_COMMAND) {
-		    seq_printf(m, "--- gtt_offset = 0x%08x\n", obj->gtt_offset);
-		    i915_dump_object(m, dev_priv->mm.gtt_mapping, obj);
-		}
-	}
-
-	mutex_unlock(&dev->struct_mutex);
-	return 0;
-}
-
 static int i915_ringbuffer_data(struct seq_file *m, void *data)
 {
 	struct drm_info_node *node = (struct drm_info_node *) m->private;
@@ -1739,7 +1700,6 @@ static struct drm_info_list i915_debugfs_list[] = {
 	{"i915_bsd_ringbuffer_info", i915_ringbuffer_info, 0, (void *)VCS},
 	{"i915_blt_ringbuffer_data", i915_ringbuffer_data, 0, (void *)BCS},
 	{"i915_blt_ringbuffer_info", i915_ringbuffer_info, 0, (void *)BCS},
-	{"i915_batchbuffers", i915_batchbuffer_info, 0},
 	{"i915_error_state", i915_error_state, 0},
 	{"i915_rstdby_delays", i915_rstdby_delays, 0},
 	{"i915_cur_delayinfo", i915_cur_delayinfo, 0},
-- 
cgit v1.1


From 4ca4a250ac93d5538a2a5c98ee2bcf9195f38be4 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Wed, 14 Dec 2011 13:57:27 +0100
Subject: drm/i915: reject GTT domain in relocations

This confuses our domain tracking and can (for gtt write domains) lead
to a subsequent oops.

Tested by tests/gem_exec_bad_domains from i-g-t.

Reviewed-by: Eric Anholt <eric@anholt.net>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-Off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 4a43ef5db..123c514 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -303,8 +303,9 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
 			  reloc->write_domain);
 		return ret;
 	}
-	if (unlikely((reloc->write_domain | reloc->read_domains) & I915_GEM_DOMAIN_CPU)) {
-		DRM_ERROR("reloc with read/write CPU domains: "
+	if (unlikely((reloc->write_domain | reloc->read_domains)
+		     & ~I915_GEM_GPU_DOMAINS)) {
+		DRM_ERROR("reloc with read/write non-GPU domains: "
 			  "obj %p target %d offset %d "
 			  "read %08x write %08x",
 			  obj, reloc->target_handle,
-- 
cgit v1.1


From 33f3f518fbb65d86f163083b74823e8bbe561bfc Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Wed, 14 Dec 2011 13:57:39 +0100
Subject: drm/i915: add per-ring fault reg to error_state

This was pretty handy when figuring out what exactly went wrong with
ppgtt and it might also be useful when we stop filling the entire gart
with scratch page entries.

Also add the gen6+ DONE reg while at it.

v2: Chris Wilson suggested to allocate the error_state with kzalloc
for better paranoia. Also kill existing spurious clears of the
error_state while at it.

Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
Reviewed-by: Eugeni Dodonov <eugeni.dodonov@intel.com>
Signed-Off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_debugfs.c |  8 ++++++--
 drivers/gpu/drm/i915/i915_drv.h     |  2 ++
 drivers/gpu/drm/i915/i915_irq.c     | 13 +++++++------
 drivers/gpu/drm/i915/i915_reg.h     |  2 ++
 4 files changed, 17 insertions(+), 8 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index f048605..d410490 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -718,8 +718,10 @@ static void i915_ring_error_state(struct seq_file *m,
 	if (INTEL_INFO(dev)->gen >= 4)
 		seq_printf(m, "  INSTPS: 0x%08x\n", error->instps[ring]);
 	seq_printf(m, "  INSTPM: 0x%08x\n", error->instpm[ring]);
-	if (INTEL_INFO(dev)->gen >= 6)
+	if (INTEL_INFO(dev)->gen >= 6) {
 		seq_printf(m, "  FADDR: 0x%08x\n", error->faddr[ring]);
+		seq_printf(m, "  FAULT_REG: 0x%08x\n", error->fault_reg[ring]);
+	}
 	seq_printf(m, "  seqno: 0x%08x\n", error->seqno[ring]);
 }
 
@@ -749,8 +751,10 @@ static int i915_error_state(struct seq_file *m, void *unused)
 	for (i = 0; i < dev_priv->num_fence_regs; i++)
 		seq_printf(m, "  fence[%d] = %08llx\n", i, error->fence[i]);
 
-	if (INTEL_INFO(dev)->gen >= 6) 
+	if (INTEL_INFO(dev)->gen >= 6) {
 		seq_printf(m, "ERROR: 0x%08x\n", error->error);
+		seq_printf(m, "DONE_REG: 0x%08x\n", error->done_reg);
+	}
 
 	i915_ring_error_state(m, dev, error, RCS);
 	if (HAS_BLT(dev))
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 12e8cce..865de80 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -165,6 +165,8 @@ struct drm_i915_error_state {
 	u32 instdone1;
 	u32 seqno[I915_NUM_RINGS];
 	u64 bbaddr;
+	u32 fault_reg[I915_NUM_RINGS];
+	u32 done_reg;
 	u32 faddr[I915_NUM_RINGS];
 	u64 fence[I915_MAX_NUM_FENCES];
 	struct timeval time;
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 4dedb31..64bb212 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -882,8 +882,10 @@ static void i915_record_ring_state(struct drm_device *dev,
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
-	if (INTEL_INFO(dev)->gen >= 6)
+	if (INTEL_INFO(dev)->gen >= 6) {
 		error->faddr[ring->id] = I915_READ(RING_DMA_FADD(ring->mmio_base));
+		error->fault_reg[ring->id] = I915_READ(RING_FAULT_REG(ring));
+	}
 
 	if (INTEL_INFO(dev)->gen >= 4) {
 		error->ipeir[ring->id] = I915_READ(RING_IPEIR(ring->mmio_base));
@@ -898,7 +900,6 @@ static void i915_record_ring_state(struct drm_device *dev,
 		error->ipeir[ring->id] = I915_READ(IPEIR);
 		error->ipehr[ring->id] = I915_READ(IPEHR);
 		error->instdone[ring->id] = I915_READ(INSTDONE);
-		error->bbaddr = 0;
 	}
 
 	error->instpm[ring->id] = I915_READ(RING_INSTPM(ring->mmio_base));
@@ -932,7 +933,7 @@ static void i915_capture_error_state(struct drm_device *dev)
 		return;
 
 	/* Account for pipe specific data like PIPE*STAT */
-	error = kmalloc(sizeof(*error), GFP_ATOMIC);
+	error = kzalloc(sizeof(*error), GFP_ATOMIC);
 	if (!error) {
 		DRM_DEBUG_DRIVER("out of memory, not capturing error state\n");
 		return;
@@ -946,10 +947,10 @@ static void i915_capture_error_state(struct drm_device *dev)
 	for_each_pipe(pipe)
 		error->pipestat[pipe] = I915_READ(PIPESTAT(pipe));
 
-	if (INTEL_INFO(dev)->gen >= 6)
+	if (INTEL_INFO(dev)->gen >= 6) {
 		error->error = I915_READ(ERROR_GEN6);
-	else
-		error->error = 0;
+		error->done_reg = I915_READ(DONE_REG);
+	}
 
 	i915_record_ring_state(dev, error, &dev_priv->ring[RCS]);
 	if (HAS_BLT(dev))
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index d107a1d..f960738 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -319,6 +319,8 @@
 #define RING_HWS_PGA(base)	((base)+0x80)
 #define RING_HWS_PGA_GEN6(base)	((base)+0x2080)
 #define RENDER_HWS_PGA_GEN7	(0x04080)
+#define RING_FAULT_REG(ring)	(0x4094 + 0x100*(ring)->id)
+#define DONE_REG		0x40b0
 #define BSD_HWS_PGA_GEN7	(0x04180)
 #define BLT_HWS_PGA_GEN7	(0x04280)
 #define RING_ACTHD(base)	((base)+0x74)
-- 
cgit v1.1


From 068c6ff1cbbfc09eed2b892dd7b0572dd6737785 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Sun, 29 Jan 2012 16:52:05 +0000
Subject: drm/i915: Remove the upper limit on the bo size for mapping into the
 CPU domain

The original intention of comparing the bo against the mappable GTT
limits was to prevent a subsequent faulting of the bo into the GTT from
clearing the entire GTT in vain. However, that was clearly a cut'n'paste
mistake as a CPU mapping never binds the bo into the aperture. Whilst
there may be some merit to limiting the maximum size of the bo to
something that can be utilized by the GPU, that limit itself does not
belong as a safeguard to mmapping the bo, so remove the check entirely.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Eric Anholt <eric@anholt.net>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem.c | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 7219241..df23c62 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1141,7 +1141,6 @@ int
 i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
 		    struct drm_file *file)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_gem_mmap *args = data;
 	struct drm_gem_object *obj;
 	unsigned long addr;
@@ -1153,11 +1152,6 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
 	if (obj == NULL)
 		return -ENOENT;
 
-	if (obj->size > dev_priv->mm.gtt_mappable_end) {
-		drm_gem_object_unreference_unlocked(obj);
-		return -E2BIG;
-	}
-
 	down_write(&current->mm->mmap_sem);
 	addr = do_mmap(obj->filp, 0, args->size,
 		       PROT_READ | PROT_WRITE, MAP_SHARED,
-- 
cgit v1.1


From c9c4b6f6c28354f1df9bd288dc33ba7ae0e66aaa Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Wed, 14 Dec 2011 13:57:15 +0100
Subject: drm/i915: fix swizzle detection for gen3
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It looks like the desktop variants of i915 and i945 also have the DCC
register to control dram channel interleave and cpu side bit6
swizzling.

Unfortunately internal Cspec/ConfigDB documentation for these ancient chips
have already been dropped and there seem to be no archives. Also
somebody thought the swizzling behaviour is surely a worthy secret to
keep and redacted any mention of these fields from the published Intel
datasheets.

I suspect the hw engineers were really proud of the page coloring
they've achieved in their first dual channel dram controller with
bit17 - after all Bspec explains in great length the optimal layout of
page frame numbers modulo 4 for the color and depth buffers, too.
Later on when they've started to work on VT-d they shamefully
discoverd their stupidity and tried to cover the tracks ...

Tested-by: Daniel Vetter <daniel.vetter@ffwll.ch> (i915g)
Tested-by: Pavel Ondračka <pavel.ondracka@email.cz> (i945g)
Tested-by: Chris Wilson <chris@chris-wilson.co.uk>
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=42625
Signed-Off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem_tiling.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index 31d334d..861223b 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -107,10 +107,10 @@ i915_gem_detect_bit_6_swizzle(struct drm_device *dev)
 		 */
 		swizzle_x = I915_BIT_6_SWIZZLE_NONE;
 		swizzle_y = I915_BIT_6_SWIZZLE_NONE;
-	} else if (IS_MOBILE(dev)) {
+	} else if (IS_MOBILE(dev) || (IS_GEN3(dev) && !IS_G33(dev))) {
 		uint32_t dcc;
 
-		/* On mobile 9xx chipsets, channel interleave by the CPU is
+		/* On 9xx chipsets, channel interleave by the CPU is
 		 * determined by DCC.  For single-channel, neither the CPU
 		 * nor the GPU do swizzling.  For dual channel interleaved,
 		 * the GPU's interleave is bit 9 and 10 for X tiled, and bit
-- 
cgit v1.1


From ea16a3cdb9218a2389fafc804356c1914c157654 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Wed, 14 Dec 2011 13:57:16 +0100
Subject: drm/i915: add debugfs file for swizzling information

This will also come handy for the gen6+ swizzling support, where the
driver is supposed to control swizzling depending upon dram
configuration.

v2: CxDRB3 are 16 bit regs! Noticed by Chris Wilson.

Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-Off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_debugfs.c | 50 +++++++++++++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index d410490..681cbe4 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1379,6 +1379,55 @@ static int i915_gen6_forcewake_count_info(struct seq_file *m, void *data)
 	return 0;
 }
 
+static const char *swizzle_string(unsigned swizzle)
+{
+	switch(swizzle) {
+	case I915_BIT_6_SWIZZLE_NONE:
+		return "none";
+	case I915_BIT_6_SWIZZLE_9:
+		return "bit9";
+	case I915_BIT_6_SWIZZLE_9_10:
+		return "bit9/bit10";
+	case I915_BIT_6_SWIZZLE_9_11:
+		return "bit9/bit11";
+	case I915_BIT_6_SWIZZLE_9_10_11:
+		return "bit9/bit10/bit11";
+	case I915_BIT_6_SWIZZLE_9_17:
+		return "bit9/bit17";
+	case I915_BIT_6_SWIZZLE_9_10_17:
+		return "bit9/bit10/bit17";
+	case I915_BIT_6_SWIZZLE_UNKNOWN:
+		return "unkown";
+	}
+
+	return "bug";
+}
+
+static int i915_swizzle_info(struct seq_file *m, void *data)
+{
+	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_device *dev = node->minor->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	mutex_lock(&dev->struct_mutex);
+	seq_printf(m, "bit6 swizzle for X-tiling = %s\n",
+		   swizzle_string(dev_priv->mm.bit_6_swizzle_x));
+	seq_printf(m, "bit6 swizzle for Y-tiling = %s\n",
+		   swizzle_string(dev_priv->mm.bit_6_swizzle_y));
+
+	if (IS_GEN3(dev) || IS_GEN4(dev)) {
+		seq_printf(m, "DDC = 0x%08x\n",
+			   I915_READ(DCC));
+		seq_printf(m, "C0DRB3 = 0x%04x\n",
+			   I915_READ16(C0DRB3));
+		seq_printf(m, "C1DRB3 = 0x%04x\n",
+			   I915_READ16(C1DRB3));
+	}
+	mutex_unlock(&dev->struct_mutex);
+
+	return 0;
+}
+
 static int
 i915_debugfs_common_open(struct inode *inode,
 			 struct file *filp)
@@ -1719,6 +1768,7 @@ static struct drm_info_list i915_debugfs_list[] = {
 	{"i915_gem_framebuffer", i915_gem_framebuffer_info, 0},
 	{"i915_context_status", i915_context_status, 0},
 	{"i915_gen6_forcewake_count", i915_gen6_forcewake_count_info, 0},
+	{"i915_swizzle_info", i915_swizzle_info, 0},
 };
 #define I915_DEBUGFS_ENTRIES ARRAY_SIZE(i915_debugfs_list)
 
-- 
cgit v1.1


From 5c0480f21f9896c443b0e65d779c8e09a695da7b Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Wed, 14 Dec 2011 13:57:30 +0100
Subject: drm/i915: fall through pwrite_gtt_slow to the shmem slow path

The gtt_pwrite slowpath grabs the userspace memory with
get_user_pages. This will not work for non-page backed memory, like a
gtt mmapped gem object. Hence fall throuh to the shmem paths if we hit
-EFAULT in the gtt paths.

Now the shmem paths have exactly the same problem, but this way we
only need to rearrange the code in one write path.

v2: v1 accidentaly falls back to shmem pwrite for phys objects. Fixed.

v3: Make the codeflow around phys_pwrite cleara as suggested by Chris
Wilson.

Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-Off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem.c | 33 +++++++++++++++++++++------------
 1 file changed, 21 insertions(+), 12 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index df23c62..7bb32ec 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -996,10 +996,13 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
 	 * pread/pwrite currently are reading and writing from the CPU
 	 * perspective, requiring manual detiling by the client.
 	 */
-	if (obj->phys_obj)
+	if (obj->phys_obj) {
 		ret = i915_gem_phys_pwrite(dev, obj, args, file);
-	else if (obj->gtt_space &&
-		 obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
+		goto out;
+	}
+
+	if (obj->gtt_space &&
+	    obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
 		ret = i915_gem_object_pin(obj, 0, true);
 		if (ret)
 			goto out;
@@ -1018,18 +1021,24 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
 
 out_unpin:
 		i915_gem_object_unpin(obj);
-	} else {
-		ret = i915_gem_object_set_to_cpu_domain(obj, 1);
-		if (ret)
-			goto out;
 
-		ret = -EFAULT;
-		if (!i915_gem_object_needs_bit17_swizzle(obj))
-			ret = i915_gem_shmem_pwrite_fast(dev, obj, args, file);
-		if (ret == -EFAULT)
-			ret = i915_gem_shmem_pwrite_slow(dev, obj, args, file);
+		if (ret != -EFAULT)
+			goto out;
+		/* Fall through to the shmfs paths because the gtt paths might
+		 * fail with non-page-backed user pointers (e.g. gtt mappings
+		 * when moving data between textures). */
 	}
 
+	ret = i915_gem_object_set_to_cpu_domain(obj, 1);
+	if (ret)
+		goto out;
+
+	ret = -EFAULT;
+	if (!i915_gem_object_needs_bit17_swizzle(obj))
+		ret = i915_gem_shmem_pwrite_fast(dev, obj, args, file);
+	if (ret == -EFAULT)
+		ret = i915_gem_shmem_pwrite_slow(dev, obj, args, file);
+
 out:
 	drm_gem_object_unreference(&obj->base);
 unlock:
-- 
cgit v1.1


From 8c59967c448af5d77fe5189d0e73f32cd3dbe439 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Wed, 14 Dec 2011 13:57:31 +0100
Subject: drm/i915: rewrite shmem_pwrite_slow to use copy_from_user

... instead of get_user_pages, because that fails on non page-backed
user addresses like e.g. a gtt mapping of a bo.

To get there essentially copy the vfs read path into pagecache. We
can't call that right away because we have to take care of bit17
swizzling. To not deadlock with our own pagefault handler we need
to completely drop struct_mutex, reducing the atomicty-guarantees
of our userspace abi. Implications for racing with other gem ioctl:

- execbuf, pwrite, pread: Due to -EFAULT fallback to slow paths there's
  already the risk of the pwrite call not being atomic, no degration.
- read/write access to mmaps: already fully racy, no degration.
- set_tiling: Calling set_tiling while reading/writing is already
  pretty much undefined, now it just got a bit worse. set_tiling is
  only called by libdrm on unused/new bos, so no problem.
- set_domain: When changing to the gtt domain while copying (without any
  read/write access, e.g. for synchronization), we might leave unflushed
  data in the cpu caches. The clflush_object at the end of pwrite_slow
  takes care of this problem.
- truncating of purgeable objects: the shmem_read_mapping_page call could
  reinstate backing storage for truncated objects. The check at the end
  of pwrite_slow takes care of this.

v2:
- add missing intel_gtt_chipset_flush
- add __ to copy_from_user_swizzled as suggest by Chris Wilson.

v3: Fixup bit17 swizzling, it swizzled the wrong pages.

Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem.c | 127 ++++++++++++++++++++--------------------
 1 file changed, 65 insertions(+), 62 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 7bb32ec..3f391aa 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -58,6 +58,7 @@ static void i915_gem_free_object_tail(struct drm_i915_gem_object *obj);
 
 static int i915_gem_inactive_shrink(struct shrinker *shrinker,
 				    struct shrink_control *sc);
+static void i915_gem_object_truncate(struct drm_i915_gem_object *obj);
 
 /* some bookkeeping */
 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
@@ -385,6 +386,32 @@ i915_gem_shmem_pread_fast(struct drm_device *dev,
 	return 0;
 }
 
+static inline int
+__copy_from_user_swizzled(char __user *gpu_vaddr, int gpu_offset,
+			  const char *cpu_vaddr,
+			  int length)
+{
+	int ret, cpu_offset = 0;
+
+	while (length > 0) {
+		int cacheline_end = ALIGN(gpu_offset + 1, 64);
+		int this_length = min(cacheline_end - gpu_offset, length);
+		int swizzled_gpu_offset = gpu_offset ^ 64;
+
+		ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset,
+				       cpu_vaddr + cpu_offset,
+				       this_length);
+		if (ret)
+			return ret + length;
+
+		cpu_offset += this_length;
+		gpu_offset += this_length;
+		length -= this_length;
+	}
+
+	return 0;
+}
+
 /**
  * This is the fallback shmem pread path, which allocates temporary storage
  * in kernel space to copy_to_user into outside of the struct_mutex, so we
@@ -841,71 +868,36 @@ i915_gem_shmem_pwrite_slow(struct drm_device *dev,
 			   struct drm_file *file)
 {
 	struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
-	struct mm_struct *mm = current->mm;
-	struct page **user_pages;
 	ssize_t remain;
-	loff_t offset, pinned_pages, i;
-	loff_t first_data_page, last_data_page, num_pages;
-	int shmem_page_offset;
-	int data_page_index,  data_page_offset;
-	int page_length;
-	int ret;
-	uint64_t data_ptr = args->data_ptr;
-	int do_bit17_swizzling;
+	loff_t offset;
+	char __user *user_data;
+	int shmem_page_offset, page_length, ret;
+	int obj_do_bit17_swizzling, page_do_bit17_swizzling;
 
+	user_data = (char __user *) (uintptr_t) args->data_ptr;
 	remain = args->size;
 
-	/* Pin the user pages containing the data.  We can't fault while
-	 * holding the struct mutex, and all of the pwrite implementations
-	 * want to hold it while dereferencing the user data.
-	 */
-	first_data_page = data_ptr / PAGE_SIZE;
-	last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
-	num_pages = last_data_page - first_data_page + 1;
-
-	user_pages = drm_malloc_ab(num_pages, sizeof(struct page *));
-	if (user_pages == NULL)
-		return -ENOMEM;
-
-	mutex_unlock(&dev->struct_mutex);
-	down_read(&mm->mmap_sem);
-	pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
-				      num_pages, 0, 0, user_pages, NULL);
-	up_read(&mm->mmap_sem);
-	mutex_lock(&dev->struct_mutex);
-	if (pinned_pages < num_pages) {
-		ret = -EFAULT;
-		goto out;
-	}
-
-	ret = i915_gem_object_set_to_cpu_domain(obj, 1);
-	if (ret)
-		goto out;
-
-	do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
+	obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
 
 	offset = args->offset;
 	obj->dirty = 1;
 
+	mutex_unlock(&dev->struct_mutex);
+
 	while (remain > 0) {
 		struct page *page;
+		char *vaddr;
 
 		/* Operation in this page
 		 *
 		 * shmem_page_offset = offset within page in shmem file
-		 * data_page_index = page number in get_user_pages return
-		 * data_page_offset = offset with data_page_index page.
 		 * page_length = bytes to copy for this page
 		 */
 		shmem_page_offset = offset_in_page(offset);
-		data_page_index = data_ptr / PAGE_SIZE - first_data_page;
-		data_page_offset = offset_in_page(data_ptr);
 
 		page_length = remain;
 		if ((shmem_page_offset + page_length) > PAGE_SIZE)
 			page_length = PAGE_SIZE - shmem_page_offset;
-		if ((data_page_offset + page_length) > PAGE_SIZE)
-			page_length = PAGE_SIZE - data_page_offset;
 
 		page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT);
 		if (IS_ERR(page)) {
@@ -913,34 +905,45 @@ i915_gem_shmem_pwrite_slow(struct drm_device *dev,
 			goto out;
 		}
 
-		if (do_bit17_swizzling) {
-			slow_shmem_bit17_copy(page,
-					      shmem_page_offset,
-					      user_pages[data_page_index],
-					      data_page_offset,
-					      page_length,
-					      0);
-		} else {
-			slow_shmem_copy(page,
-					shmem_page_offset,
-					user_pages[data_page_index],
-					data_page_offset,
-					page_length);
-		}
+		page_do_bit17_swizzling = obj_do_bit17_swizzling &&
+			(page_to_phys(page) & (1 << 17)) != 0;
+
+		vaddr = kmap(page);
+		if (page_do_bit17_swizzling)
+			ret = __copy_from_user_swizzled(vaddr, shmem_page_offset,
+							user_data,
+							page_length);
+		else
+			ret = __copy_from_user(vaddr + shmem_page_offset,
+					       user_data,
+					       page_length);
+		kunmap(page);
 
 		set_page_dirty(page);
 		mark_page_accessed(page);
 		page_cache_release(page);
 
+		if (ret) {
+			ret = -EFAULT;
+			goto out;
+		}
+
 		remain -= page_length;
-		data_ptr += page_length;
+		user_data += page_length;
 		offset += page_length;
 	}
 
 out:
-	for (i = 0; i < pinned_pages; i++)
-		page_cache_release(user_pages[i]);
-	drm_free_large(user_pages);
+	mutex_lock(&dev->struct_mutex);
+	/* Fixup: Kill any reinstated backing storage pages */
+	if (obj->madv == __I915_MADV_PURGED)
+		i915_gem_object_truncate(obj);
+	/* and flush dirty cachelines in case the object isn't in the cpu write
+	 * domain anymore. */
+	if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
+		i915_gem_clflush_object(obj);
+		intel_gtt_chipset_flush();
+	}
 
 	return ret;
 }
-- 
cgit v1.1


From 8461d2267726dddcb9b5d6ae6b32769b38a326cc Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Wed, 14 Dec 2011 13:57:32 +0100
Subject: drm/i915: rewrite shmem_pread_slow to use copy_to_user

Like for shmem_pwrite_slow. The only difference is that because we
read data, we can leave the fetched cachelines in the cpu: In the case
that the object isn't in the cpu read domain anymore, the clflush for
the next cpu read domain invalidation will simply drop these
cachelines.

slow_shmem_bit17_copy is now ununsed, so kill it.

With this patch tests/gem_mmap_gtt now actually works.

v2: add __ to copy_to_user_swizzled as suggested by Chris Wilson.

v3: Fixup the swizzling logic, it swizzled the wrong pages.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=38115
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem.c | 191 ++++++++++++----------------------------
 1 file changed, 57 insertions(+), 134 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 3f391aa..51a2b0c 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -259,73 +259,6 @@ static int i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj)
 		obj->tiling_mode != I915_TILING_NONE;
 }
 
-static inline void
-slow_shmem_copy(struct page *dst_page,
-		int dst_offset,
-		struct page *src_page,
-		int src_offset,
-		int length)
-{
-	char *dst_vaddr, *src_vaddr;
-
-	dst_vaddr = kmap(dst_page);
-	src_vaddr = kmap(src_page);
-
-	memcpy(dst_vaddr + dst_offset, src_vaddr + src_offset, length);
-
-	kunmap(src_page);
-	kunmap(dst_page);
-}
-
-static inline void
-slow_shmem_bit17_copy(struct page *gpu_page,
-		      int gpu_offset,
-		      struct page *cpu_page,
-		      int cpu_offset,
-		      int length,
-		      int is_read)
-{
-	char *gpu_vaddr, *cpu_vaddr;
-
-	/* Use the unswizzled path if this page isn't affected. */
-	if ((page_to_phys(gpu_page) & (1 << 17)) == 0) {
-		if (is_read)
-			return slow_shmem_copy(cpu_page, cpu_offset,
-					       gpu_page, gpu_offset, length);
-		else
-			return slow_shmem_copy(gpu_page, gpu_offset,
-					       cpu_page, cpu_offset, length);
-	}
-
-	gpu_vaddr = kmap(gpu_page);
-	cpu_vaddr = kmap(cpu_page);
-
-	/* Copy the data, XORing A6 with A17 (1). The user already knows he's
-	 * XORing with the other bits (A9 for Y, A9 and A10 for X)
-	 */
-	while (length > 0) {
-		int cacheline_end = ALIGN(gpu_offset + 1, 64);
-		int this_length = min(cacheline_end - gpu_offset, length);
-		int swizzled_gpu_offset = gpu_offset ^ 64;
-
-		if (is_read) {
-			memcpy(cpu_vaddr + cpu_offset,
-			       gpu_vaddr + swizzled_gpu_offset,
-			       this_length);
-		} else {
-			memcpy(gpu_vaddr + swizzled_gpu_offset,
-			       cpu_vaddr + cpu_offset,
-			       this_length);
-		}
-		cpu_offset += this_length;
-		gpu_offset += this_length;
-		length -= this_length;
-	}
-
-	kunmap(cpu_page);
-	kunmap(gpu_page);
-}
-
 /**
  * This is the fast shmem pread path, which attempts to copy_from_user directly
  * from the backing pages of the object to the user's address space.  On a
@@ -387,6 +320,32 @@ i915_gem_shmem_pread_fast(struct drm_device *dev,
 }
 
 static inline int
+__copy_to_user_swizzled(char __user *cpu_vaddr,
+			const char *gpu_vaddr, int gpu_offset,
+			int length)
+{
+	int ret, cpu_offset = 0;
+
+	while (length > 0) {
+		int cacheline_end = ALIGN(gpu_offset + 1, 64);
+		int this_length = min(cacheline_end - gpu_offset, length);
+		int swizzled_gpu_offset = gpu_offset ^ 64;
+
+		ret = __copy_to_user(cpu_vaddr + cpu_offset,
+				     gpu_vaddr + swizzled_gpu_offset,
+				     this_length);
+		if (ret)
+			return ret + length;
+
+		cpu_offset += this_length;
+		gpu_offset += this_length;
+		length -= this_length;
+	}
+
+	return 0;
+}
+
+static inline int
 __copy_from_user_swizzled(char __user *gpu_vaddr, int gpu_offset,
 			  const char *cpu_vaddr,
 			  int length)
@@ -425,72 +384,34 @@ i915_gem_shmem_pread_slow(struct drm_device *dev,
 			  struct drm_file *file)
 {
 	struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
-	struct mm_struct *mm = current->mm;
-	struct page **user_pages;
+	char __user *user_data;
 	ssize_t remain;
-	loff_t offset, pinned_pages, i;
-	loff_t first_data_page, last_data_page, num_pages;
-	int shmem_page_offset;
-	int data_page_index, data_page_offset;
-	int page_length;
-	int ret;
-	uint64_t data_ptr = args->data_ptr;
-	int do_bit17_swizzling;
+	loff_t offset;
+	int shmem_page_offset, page_length, ret;
+	int obj_do_bit17_swizzling, page_do_bit17_swizzling;
 
+	user_data = (char __user *) (uintptr_t) args->data_ptr;
 	remain = args->size;
 
-	/* Pin the user pages containing the data.  We can't fault while
-	 * holding the struct mutex, yet we want to hold it while
-	 * dereferencing the user data.
-	 */
-	first_data_page = data_ptr / PAGE_SIZE;
-	last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
-	num_pages = last_data_page - first_data_page + 1;
+	obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
 
-	user_pages = drm_malloc_ab(num_pages, sizeof(struct page *));
-	if (user_pages == NULL)
-		return -ENOMEM;
+	offset = args->offset;
 
 	mutex_unlock(&dev->struct_mutex);
-	down_read(&mm->mmap_sem);
-	pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
-				      num_pages, 1, 0, user_pages, NULL);
-	up_read(&mm->mmap_sem);
-	mutex_lock(&dev->struct_mutex);
-	if (pinned_pages < num_pages) {
-		ret = -EFAULT;
-		goto out;
-	}
-
-	ret = i915_gem_object_set_cpu_read_domain_range(obj,
-							args->offset,
-							args->size);
-	if (ret)
-		goto out;
-
-	do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
-
-	offset = args->offset;
 
 	while (remain > 0) {
 		struct page *page;
+		char *vaddr;
 
 		/* Operation in this page
 		 *
 		 * shmem_page_offset = offset within page in shmem file
-		 * data_page_index = page number in get_user_pages return
-		 * data_page_offset = offset with data_page_index page.
 		 * page_length = bytes to copy for this page
 		 */
 		shmem_page_offset = offset_in_page(offset);
-		data_page_index = data_ptr / PAGE_SIZE - first_data_page;
-		data_page_offset = offset_in_page(data_ptr);
-
 		page_length = remain;
 		if ((shmem_page_offset + page_length) > PAGE_SIZE)
 			page_length = PAGE_SIZE - shmem_page_offset;
-		if ((data_page_offset + page_length) > PAGE_SIZE)
-			page_length = PAGE_SIZE - data_page_offset;
 
 		page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT);
 		if (IS_ERR(page)) {
@@ -498,36 +419,38 @@ i915_gem_shmem_pread_slow(struct drm_device *dev,
 			goto out;
 		}
 
-		if (do_bit17_swizzling) {
-			slow_shmem_bit17_copy(page,
-					      shmem_page_offset,
-					      user_pages[data_page_index],
-					      data_page_offset,
-					      page_length,
-					      1);
-		} else {
-			slow_shmem_copy(user_pages[data_page_index],
-					data_page_offset,
-					page,
-					shmem_page_offset,
-					page_length);
-		}
+		page_do_bit17_swizzling = obj_do_bit17_swizzling &&
+			(page_to_phys(page) & (1 << 17)) != 0;
+
+		vaddr = kmap(page);
+		if (page_do_bit17_swizzling)
+			ret = __copy_to_user_swizzled(user_data,
+						      vaddr, shmem_page_offset,
+						      page_length);
+		else
+			ret = __copy_to_user(user_data,
+					     vaddr + shmem_page_offset,
+					     page_length);
+		kunmap(page);
 
 		mark_page_accessed(page);
 		page_cache_release(page);
 
+		if (ret) {
+			ret = -EFAULT;
+			goto out;
+		}
+
 		remain -= page_length;
-		data_ptr += page_length;
+		user_data += page_length;
 		offset += page_length;
 	}
 
 out:
-	for (i = 0; i < pinned_pages; i++) {
-		SetPageDirty(user_pages[i]);
-		mark_page_accessed(user_pages[i]);
-		page_cache_release(user_pages[i]);
-	}
-	drm_free_large(user_pages);
+	mutex_lock(&dev->struct_mutex);
+	/* Fixup: Kill any reinstated backing storage pages */
+	if (obj->madv == __I915_MADV_PURGED)
+		i915_gem_object_truncate(obj);
 
 	return ret;
 }
-- 
cgit v1.1


From 172975aa746e155533cb386c7159c2d6510e2bc8 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Wed, 14 Dec 2011 13:57:25 +0100
Subject: drm/i915: Handle unmappable buffers during error state capture

As the buffer is not necessarily accessible through the GTT at the time
of a GPU hang, and capturing some of its contents is far more valuable
than skipping it, provide a clflushed fallback read path. We still
prefer to read through the GTT as that is more consistent with the GPU
access of the same buffer. So example it will demonstrate any errorneous
tiling or swizzling of the command buffer as seen by the GPU.

This becomes necessary with use of CPU relocations and lazy GTT binding,
but could potentially happen anyway as a result of a pathological error.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Reviewed-by: Eugeni Dodonov <eugeni.dodonov@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_irq.c | 28 +++++++++++++++++++++++-----
 1 file changed, 23 insertions(+), 5 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 64bb212..8ea1ca4 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -720,7 +720,6 @@ i915_error_object_create(struct drm_i915_private *dev_priv,
 	reloc_offset = src->gtt_offset;
 	for (page = 0; page < page_count; page++) {
 		unsigned long flags;
-		void __iomem *s;
 		void *d;
 
 		d = kmalloc(PAGE_SIZE, GFP_ATOMIC);
@@ -728,10 +727,29 @@ i915_error_object_create(struct drm_i915_private *dev_priv,
 			goto unwind;
 
 		local_irq_save(flags);
-		s = io_mapping_map_atomic_wc(dev_priv->mm.gtt_mapping,
-					     reloc_offset);
-		memcpy_fromio(d, s, PAGE_SIZE);
-		io_mapping_unmap_atomic(s);
+		if (reloc_offset < dev_priv->mm.gtt_mappable_end) {
+			void __iomem *s;
+
+			/* Simply ignore tiling or any overlapping fence.
+			 * It's part of the error state, and this hopefully
+			 * captures what the GPU read.
+			 */
+
+			s = io_mapping_map_atomic_wc(dev_priv->mm.gtt_mapping,
+						     reloc_offset);
+			memcpy_fromio(d, s, PAGE_SIZE);
+			io_mapping_unmap_atomic(s);
+		} else {
+			void *s;
+
+			drm_clflush_pages(&src->pages[page], 1);
+
+			s = kmap_atomic(src->pages[page]);
+			memcpy(d, s, PAGE_SIZE);
+			kunmap_atomic(s);
+
+			drm_clflush_pages(&src->pages[page], 1);
+		}
 		local_irq_restore(flags);
 
 		dst->pages[page] = d;
-- 
cgit v1.1