From d822bb18ce96391245d877d5bada8913b88a15cf Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 3 Apr 2017 12:34:25 +0100 Subject: drm/i915: intel_ring.engine is unused Or rather it is used only by intel_ring_pin() to extract the drm_i915_private which we can easily pass in. As this is a relatively rare operation, save the space in the struct, and as such it is even break even in the extra code for passing around the parameter: add/remove: 0/0 grow/shrink: 2/3 up/down: 15/-15 (0) function old new delta intel_init_ring_buffer 906 918 +12 execlists_context_pin 1308 1311 +3 mock_engine 407 403 -4 intel_engine_create_ring 367 363 -4 intel_ring_pin 326 319 -7 Total: Before=1261794, After=1261794, chg +0.00% v2: Reorder intel_init_ring_buffer to keep the ring setup together: add/remove: 0/0 grow/shrink: 2/3 up/down: 9/-15 (-6) function old new delta intel_init_ring_buffer 906 912 +6 execlists_context_pin 1308 1311 +3 mock_engine 407 403 -4 intel_engine_create_ring 367 363 -4 intel_ring_pin 326 319 -7 Total: Before=1261794, After=1261788, chg -0.00% Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170403113426.25707-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_lrc.c | 2 +- drivers/gpu/drm/i915/intel_ringbuffer.c | 28 +++++++++++++--------------- drivers/gpu/drm/i915/intel_ringbuffer.h | 6 +++--- drivers/gpu/drm/i915/selftests/mock_engine.c | 1 - 4 files changed, 17 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index c8f7c63..0dc1cc4 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -771,7 +771,7 @@ static int execlists_context_pin(struct intel_engine_cs *engine, goto unpin_vma; } - ret = intel_ring_pin(ce->ring, ctx->ggtt_offset_bias); + ret = intel_ring_pin(ce->ring, ctx->i915, ctx->ggtt_offset_bias); if (ret) goto unpin_map; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 66a2b8b..5e7634c 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1270,17 +1270,18 @@ static int init_phys_status_page(struct intel_engine_cs *engine) return 0; } -int intel_ring_pin(struct intel_ring *ring, unsigned int offset_bias) +int intel_ring_pin(struct intel_ring *ring, + struct drm_i915_private *i915, + unsigned int offset_bias) { - unsigned int flags; - enum i915_map_type map; + enum i915_map_type map = HAS_LLC(i915) ? I915_MAP_WB : I915_MAP_WC; struct i915_vma *vma = ring->vma; + unsigned int flags; void *addr; int ret; GEM_BUG_ON(ring->vaddr); - map = HAS_LLC(ring->engine->i915) ? I915_MAP_WB : I915_MAP_WC; flags = PIN_GLOBAL; if (offset_bias) @@ -1369,8 +1370,6 @@ intel_engine_create_ring(struct intel_engine_cs *engine, int size) if (!ring) return ERR_PTR(-ENOMEM); - ring->engine = engine; - INIT_LIST_HEAD(&ring->request_list); ring->size = size; @@ -1481,7 +1480,6 @@ static void intel_ring_context_unpin(struct intel_engine_cs *engine, static int intel_init_ring_buffer(struct intel_engine_cs *engine) { - struct drm_i915_private *dev_priv = engine->i915; struct intel_ring *ring; int ret; @@ -1493,13 +1491,7 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine) if (ret) goto error; - ring = intel_engine_create_ring(engine, 32 * PAGE_SIZE); - if (IS_ERR(ring)) { - ret = PTR_ERR(ring); - goto error; - } - - if (HWS_NEEDS_PHYSICAL(dev_priv)) { + if (HWS_NEEDS_PHYSICAL(engine->i915)) { WARN_ON(engine->id != RCS); ret = init_phys_status_page(engine); if (ret) @@ -1510,8 +1502,14 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine) goto error; } + ring = intel_engine_create_ring(engine, 32 * PAGE_SIZE); + if (IS_ERR(ring)) { + ret = PTR_ERR(ring); + goto error; + } + /* Ring wraparound at offset 0 sometimes hangs. No idea why. */ - ret = intel_ring_pin(ring, I915_GTT_PAGE_SIZE); + ret = intel_ring_pin(ring, engine->i915, I915_GTT_PAGE_SIZE); if (ret) { intel_ring_free(ring); goto error; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index a82a080..cbe61d3 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -139,8 +139,6 @@ struct intel_ring { struct i915_vma *vma; void *vaddr; - struct intel_engine_cs *engine; - struct list_head request_list; u32 head; @@ -487,7 +485,9 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value) struct intel_ring * intel_engine_create_ring(struct intel_engine_cs *engine, int size); -int intel_ring_pin(struct intel_ring *ring, unsigned int offset_bias); +int intel_ring_pin(struct intel_ring *ring, + struct drm_i915_private *i915, + unsigned int offset_bias); void intel_ring_unpin(struct intel_ring *ring); void intel_ring_free(struct intel_ring *ring); diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c index 0ad624a..b89050e 100644 --- a/drivers/gpu/drm/i915/selftests/mock_engine.c +++ b/drivers/gpu/drm/i915/selftests/mock_engine.c @@ -112,7 +112,6 @@ static struct intel_ring *mock_ring(struct intel_engine_cs *engine) if (!ring) return NULL; - ring->engine = engine; ring->size = sz; ring->effective_size = sz; ring->vaddr = (void *)(ring + 1); -- cgit v1.1 From 1a5788bf2729c6e98444c7f56f960957694472c2 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 3 Apr 2017 12:34:26 +0100 Subject: drm/i915: Onion unwind for intel_init_ring_common() Rather than call intel_engine_cleanup() with a partially constructed engine, unwind the error during intel_init_ring_common(). Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170403113426.25707-2-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/intel_ringbuffer.c | 77 +++++++++++++++------------------ 1 file changed, 36 insertions(+), 41 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 5e7634c..c98acc2 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1259,6 +1259,8 @@ static int init_phys_status_page(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; + GEM_BUG_ON(engine->id != RCS); + dev_priv->status_page_dmah = drm_pci_alloc(&dev_priv->drm, PAGE_SIZE, PAGE_SIZE); if (!dev_priv->status_page_dmah) @@ -1481,76 +1483,69 @@ static void intel_ring_context_unpin(struct intel_engine_cs *engine, static int intel_init_ring_buffer(struct intel_engine_cs *engine) { struct intel_ring *ring; - int ret; - - WARN_ON(engine->buffer); + int err; intel_engine_setup_common(engine); - ret = intel_engine_init_common(engine); - if (ret) - goto error; + err = intel_engine_init_common(engine); + if (err) + goto err; - if (HWS_NEEDS_PHYSICAL(engine->i915)) { - WARN_ON(engine->id != RCS); - ret = init_phys_status_page(engine); - if (ret) - goto error; - } else { - ret = init_status_page(engine); - if (ret) - goto error; - } + if (HWS_NEEDS_PHYSICAL(engine->i915)) + err = init_phys_status_page(engine); + else + err = init_status_page(engine); + if (err) + goto err; ring = intel_engine_create_ring(engine, 32 * PAGE_SIZE); if (IS_ERR(ring)) { - ret = PTR_ERR(ring); - goto error; + err = PTR_ERR(ring); + goto err_hws; } /* Ring wraparound at offset 0 sometimes hangs. No idea why. */ - ret = intel_ring_pin(ring, engine->i915, I915_GTT_PAGE_SIZE); - if (ret) { - intel_ring_free(ring); - goto error; - } + err = intel_ring_pin(ring, engine->i915, I915_GTT_PAGE_SIZE); + if (err) + goto err_ring; + + GEM_BUG_ON(engine->buffer); engine->buffer = ring; return 0; -error: - intel_engine_cleanup(engine); - return ret; +err_ring: + intel_ring_free(ring); +err_hws: + if (HWS_NEEDS_PHYSICAL(engine->i915)) + cleanup_phys_status_page(engine); + else + cleanup_status_page(engine); +err: + intel_engine_cleanup_common(engine); + return err; } void intel_engine_cleanup(struct intel_engine_cs *engine) { - struct drm_i915_private *dev_priv; - - dev_priv = engine->i915; + struct drm_i915_private *dev_priv = engine->i915; - if (engine->buffer) { - WARN_ON(INTEL_GEN(dev_priv) > 2 && - (I915_READ_MODE(engine) & MODE_IDLE) == 0); + WARN_ON(INTEL_GEN(dev_priv) > 2 && + (I915_READ_MODE(engine) & MODE_IDLE) == 0); - intel_ring_unpin(engine->buffer); - intel_ring_free(engine->buffer); - engine->buffer = NULL; - } + intel_ring_unpin(engine->buffer); + intel_ring_free(engine->buffer); if (engine->cleanup) engine->cleanup(engine); - if (HWS_NEEDS_PHYSICAL(dev_priv)) { - WARN_ON(engine->id != RCS); + if (HWS_NEEDS_PHYSICAL(dev_priv)) cleanup_phys_status_page(engine); - } else { + else cleanup_status_page(engine); - } intel_engine_cleanup_common(engine); - engine->i915 = NULL; dev_priv->engine[engine->id] = NULL; kfree(engine); } -- cgit v1.1 From 7a3ee5deb46e1350b2c290d2b1fe0a2bc4087bff Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Thu, 30 Mar 2017 17:31:30 +0100 Subject: drm/i915: Remove user-triggerable WARN from i915_gem_object_create Since this can be triggered by simply attempting a huge object, a WARN_ON is not appropriate. Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170330163130.24141-1-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/i915_gem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index bbc6f1c..4ca88f2 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4195,7 +4195,7 @@ i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size) * catch if we ever need to fix it. In the meantime, if you do spot * such a local variable, please consider fixing! */ - if (WARN_ON(size >> PAGE_SHIFT > INT_MAX)) + if (size >> PAGE_SHIFT > INT_MAX) return ERR_PTR(-E2BIG); if (overflows_type(size, obj->base.size)) -- cgit v1.1 From b1becb88268beb72df6495e35d3d76c138d215bb Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 3 Apr 2017 11:51:24 +0100 Subject: drm/i915: Park the signaler before sleeping If the signal to park arrives before we sleep, then we need to check kthread_should_park() before sleeping to avoid missing the signal. Otherwise, if the signal arrives whilst we are processing completed requests, we will reset the current->state back to TASK_INTERRUPTIBLE and so miss the wakeup. Fixes: fe3288b5da2c ("drm/i915: Park the breadcrumbs signaler across a GPU reset") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170403105124.8969-1-chris@chris-wilson.co.uk Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/intel_breadcrumbs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index b6ea192..308c56a 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -629,6 +629,9 @@ static int intel_breadcrumbs_signaler(void *arg) } else { DEFINE_WAIT(exec); + if (kthread_should_park()) + kthread_parkme(); + if (kthread_should_stop()) { GEM_BUG_ON(request); break; @@ -641,9 +644,6 @@ static int intel_breadcrumbs_signaler(void *arg) if (request) remove_wait_queue(&request->execute, &exec); - - if (kthread_should_park()) - kthread_parkme(); } i915_gem_request_put(request); } while (1); -- cgit v1.1 From a7980a640cbd339aa80f406d1786a275a2c320bc Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 4 Apr 2017 13:05:31 +0100 Subject: drm/i915: Apply a cond_resched() to the saturated signaler If the engine is continually completing nops, we can saturate the signaler and keep it working indefinitely. This angers the NMI watchdog! A good example is to disable semaphores on snb and run igt/gem_exec_nop - the parallel, multi-engine workloads are more than sufficient to hog the CPU, preventing the system from even processing ICMP echo replies. v2: Tvrtko dug into cond_resched() on x86 and found that it only depended upon preempt_count and not tif_need_resched() - which means that we would always call schedule() at that point. Fixes: c81d46138da6 ("drm/i915: Convert trace-irq to the breadcrumb waiter") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170404120531.10737-1-chris@chris-wilson.co.uk Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/intel_breadcrumbs.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 308c56a..9ccbf26 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -580,6 +580,8 @@ static int intel_breadcrumbs_signaler(void *arg) signaler_set_rtpriority(); do { + bool do_schedule = true; + set_current_state(TASK_INTERRUPTIBLE); /* We are either woken up by the interrupt bottom-half, @@ -626,7 +628,18 @@ static int intel_breadcrumbs_signaler(void *arg) spin_unlock_irq(&b->rb_lock); i915_gem_request_put(request); - } else { + + /* If the engine is saturated we may be continually + * processing completed requests. This angers the + * NMI watchdog if we never let anything else + * have access to the CPU. Let's pretend to be nice + * and relinquish the CPU if we burn through the + * entire RT timeslice! + */ + do_schedule = need_resched(); + } + + if (unlikely(do_schedule)) { DEFINE_WAIT(exec); if (kthread_should_park()) -- cgit v1.1 From 01a9ca0ba87176432f0b91fd9adbfa66e33253ed Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Fri, 31 Mar 2017 11:57:09 +0000 Subject: drm/i915/huc: Simplify intel_huc_init_hw() On last guc/huc cleanup series we've simplified guc init hw function but missed the one for the huc. While here, change its signature as we don't care about huc loading status. Signed-off-by: Michal Wajdeczko Cc: Anusha Srivatsa Cc: Arkadiusz Hiler Cc: Tvrtko Ursulin Reviewed-by: Anusha Srivatsa Signed-off-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170331115709.181940-1-michal.wajdeczko@intel.com --- drivers/gpu/drm/i915/intel_huc.c | 48 +++++++--------------------------------- drivers/gpu/drm/i915/intel_uc.h | 2 +- 2 files changed, 9 insertions(+), 41 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_huc.c b/drivers/gpu/drm/i915/intel_huc.c index 9ee8196..385cacb 100644 --- a/drivers/gpu/drm/i915/intel_huc.c +++ b/drivers/gpu/drm/i915/intel_huc.c @@ -186,68 +186,36 @@ void intel_huc_select_fw(struct intel_huc *huc) * earlier call to intel_huc_init(), so here we need only check that * is succeeded, and then transfer the image to the h/w. * - * Return: non-zero code on error */ -int intel_huc_init_hw(struct intel_huc *huc) +void intel_huc_init_hw(struct intel_huc *huc) { struct drm_i915_private *dev_priv = huc_to_i915(huc); int err; - if (huc->fw.fetch_status == INTEL_UC_FIRMWARE_NONE) - return 0; - DRM_DEBUG_DRIVER("%s fw status: fetch %s, load %s\n", huc->fw.path, intel_uc_fw_status_repr(huc->fw.fetch_status), intel_uc_fw_status_repr(huc->fw.load_status)); - if (huc->fw.fetch_status == INTEL_UC_FIRMWARE_SUCCESS && - huc->fw.load_status == INTEL_UC_FIRMWARE_FAIL) - return -ENOEXEC; + if (huc->fw.fetch_status != INTEL_UC_FIRMWARE_SUCCESS) + return; huc->fw.load_status = INTEL_UC_FIRMWARE_PENDING; - switch (huc->fw.fetch_status) { - case INTEL_UC_FIRMWARE_FAIL: - /* something went wrong :( */ - err = -EIO; - goto fail; - - case INTEL_UC_FIRMWARE_NONE: - case INTEL_UC_FIRMWARE_PENDING: - default: - /* "can't happen" */ - WARN_ONCE(1, "HuC fw %s invalid fetch_status %s [%d]\n", - huc->fw.path, - intel_uc_fw_status_repr(huc->fw.fetch_status), - huc->fw.fetch_status); - err = -ENXIO; - goto fail; - - case INTEL_UC_FIRMWARE_SUCCESS: - break; - } - err = huc_ucode_xfer(dev_priv); - if (err) - goto fail; - huc->fw.load_status = INTEL_UC_FIRMWARE_SUCCESS; + huc->fw.load_status = err ? + INTEL_UC_FIRMWARE_FAIL : INTEL_UC_FIRMWARE_SUCCESS; DRM_DEBUG_DRIVER("%s fw status: fetch %s, load %s\n", huc->fw.path, intel_uc_fw_status_repr(huc->fw.fetch_status), intel_uc_fw_status_repr(huc->fw.load_status)); - return 0; - -fail: - if (huc->fw.load_status == INTEL_UC_FIRMWARE_PENDING) - huc->fw.load_status = INTEL_UC_FIRMWARE_FAIL; - - DRM_ERROR("Failed to complete HuC uCode load with ret %d\n", err); + if (huc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS) + DRM_ERROR("Failed to complete HuC uCode load with ret %d\n", err); - return err; + return; } /** diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h index 4b7f73a..2f0229d 100644 --- a/drivers/gpu/drm/i915/intel_uc.h +++ b/drivers/gpu/drm/i915/intel_uc.h @@ -266,7 +266,7 @@ static inline u32 guc_ggtt_offset(struct i915_vma *vma) /* intel_huc.c */ void intel_huc_select_fw(struct intel_huc *huc); -int intel_huc_init_hw(struct intel_huc *huc); +void intel_huc_init_hw(struct intel_huc *huc); void intel_guc_auth_huc(struct drm_i915_private *dev_priv); #endif -- cgit v1.1 From 895203044067af64400cedbc055898bcec98d102 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 29 Mar 2017 17:21:23 +0300 Subject: drm/i915: Make legacy cursor updates more unsynced MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We're clearing the legacy_cursor_update flag before calling drm_atomic_helper_setup_commit() which means the helper will wait for the flip to complete before cleaning up the framebuffers. That's not what we want for the legacy cursor, so let's clear the flag after setting up the commit. Also toss in a FIXME about solving these problems in a nicer way using the fabled vblank workers. v2: Also unsync with legacy page flips Cc: Maarten Lankhorst Cc: Daniel Vetter Cc: Uwe Kleine-König Cc: Rafael Ristovski Fixes: a5509abda48e ("drm/i915: Fix legacy cursor vs. watermarks for ILK-BDW") Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170329142123.5923-1-ville.syrjala@linux.intel.com Reviewed-by: Maarten Lankhorst --- drivers/gpu/drm/i915/intel_display.c | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index fe6bd74..26e871a 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -13024,17 +13024,6 @@ static int intel_atomic_commit(struct drm_device *dev, struct drm_i915_private *dev_priv = to_i915(dev); int ret = 0; - /* - * The intel_legacy_cursor_update() fast path takes care - * of avoiding the vblank waits for simple cursor - * movement and flips. For cursor on/off and size changes, - * we want to perform the vblank waits so that watermark - * updates happen during the correct frames. Gen9+ have - * double buffered watermarks and so shouldn't need this. - */ - if (INTEL_GEN(dev_priv) < 9) - state->legacy_cursor_update = false; - ret = drm_atomic_helper_setup_commit(state, nonblock); if (ret) return ret; @@ -13050,6 +13039,26 @@ static int intel_atomic_commit(struct drm_device *dev, return ret; } + /* + * The intel_legacy_cursor_update() fast path takes care + * of avoiding the vblank waits for simple cursor + * movement and flips. For cursor on/off and size changes, + * we want to perform the vblank waits so that watermark + * updates happen during the correct frames. Gen9+ have + * double buffered watermarks and so shouldn't need this. + * + * Do this after drm_atomic_helper_setup_commit() and + * intel_atomic_prepare_commit() because we still want + * to skip the flip and fb cleanup waits. Although that + * does risk yanking the mapping from under the display + * engine. + * + * FIXME doing watermarks and fb cleanup from a vblank worker + * (assuming we had any) would solve these problems. + */ + if (INTEL_GEN(dev_priv) < 9) + state->legacy_cursor_update = false; + drm_atomic_helper_swap_state(state, true); dev_priv->wm.distrust_bios_wm = false; intel_shared_dpll_swap_state(state); -- cgit v1.1 From b64b7a605d8e6869106e1101e1fefa5dc2790e65 Mon Sep 17 00:00:00 2001 From: Dhinakaran Pandiyan Date: Tue, 4 Apr 2017 11:16:05 -0700 Subject: drm/i915: Typo fix - 'pipe bpc' to 'pipe bpp' Noticed this while I was looking at some debug output, [drm:intel_hdmi_compute_config [i915]] picking bpc to 12 for HDMI output [drm:intel_hdmi_compute_config [i915]] forcing pipe bpc to 36 for HDMI I believe the second line should be pipe *bpp* Signed-off-by: Dhinakaran Pandiyan Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1491329765-14340-1-git-send-email-dhinakaran.pandiyan@intel.com --- drivers/gpu/drm/i915/intel_hdmi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index 1d623b5..6efc3cb 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -1392,7 +1392,7 @@ bool intel_hdmi_compute_config(struct intel_encoder *encoder, } if (!pipe_config->bw_constrained) { - DRM_DEBUG_KMS("forcing pipe bpc to %i for HDMI\n", desired_bpp); + DRM_DEBUG_KMS("forcing pipe bpp to %i for HDMI\n", desired_bpp); pipe_config->pipe_bpp = desired_bpp; } -- cgit v1.1 From b53af8bb18b85188b5c8cacaa6a966774b548f17 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 4 Apr 2017 13:38:36 +0000 Subject: drm/i915/guc: Use GUC prefix for CORE_FAMILY definitions Almost all other GuC fw definitions are using GUC|guc prefix. While around, in get_core_family() change explicit WARN into MISSING_CASE as it looks more appropriate, since GuC support capability we are controlling by intel_device_info.has_guc flag. Signed-off-by: Michal Wajdeczko Cc: Joonas Lahtinen Cc: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170404133836.125736-1-michal.wajdeczko@intel.com Reviewed-by: Tvrtko Ursulin Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/intel_guc_fwif.h | 4 ++-- drivers/gpu/drm/i915/intel_guc_loader.c | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_guc_fwif.h b/drivers/gpu/drm/i915/intel_guc_fwif.h index cb36cbf..6156845 100644 --- a/drivers/gpu/drm/i915/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/intel_guc_fwif.h @@ -23,8 +23,8 @@ #ifndef _INTEL_GUC_FWIF_H #define _INTEL_GUC_FWIF_H -#define GFXCORE_FAMILY_GEN9 12 -#define GFXCORE_FAMILY_UNKNOWN 0x7fffffff +#define GUC_CORE_FAMILY_GEN9 12 +#define GUC_CORE_FAMILY_UNKNOWN 0x7fffffff #define GUC_CLIENT_PRIORITY_KMD_HIGH 0 #define GUC_CLIENT_PRIORITY_HIGH 1 diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c index 8a1a023..2793c01 100644 --- a/drivers/gpu/drm/i915/intel_guc_loader.c +++ b/drivers/gpu/drm/i915/intel_guc_loader.c @@ -86,11 +86,11 @@ static u32 get_core_family(struct drm_i915_private *dev_priv) switch (gen) { case 9: - return GFXCORE_FAMILY_GEN9; + return GUC_CORE_FAMILY_GEN9; default: - WARN(1, "GEN%d does not support GuC operation!\n", gen); - return GFXCORE_FAMILY_UNKNOWN; + MISSING_CASE(gen); + return GUC_CORE_FAMILY_UNKNOWN; } } -- cgit v1.1 From 709f3fc92c113c88c96c5def37acb92e2eef610a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 3 Mar 2017 17:19:26 +0200 Subject: drm/i915: Check for id==PLANE_CURSOR instead of type==DRM_PLANE_TYPE_CURSOR MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The VLV/CHV watermark calculation is really interested in the hardware plane type rather than the plane type (which is more of a software concept). Let's check plane->id rather plane->type. No functional changes. Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170303151928.23053-3-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/intel_pm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 570bd60..2807054 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -1039,7 +1039,7 @@ static uint16_t vlv_compute_wm_level(const struct intel_crtc_state *crtc_state, if (WARN_ON(htotal == 0)) htotal = 1; - if (plane->base.type == DRM_PLANE_TYPE_CURSOR) { + if (plane->id == PLANE_CURSOR) { /* * FIXME the formula gives values that are * too big for the cursor FIFO, and hence we -- cgit v1.1 From a07102f1cce6ebf11f8d880665eb6f6353b16177 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 3 Mar 2017 17:19:27 +0200 Subject: drm/i915: Use intel_wm_plane_visible() on VLV/CHV as well MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit VLV/CHV don't have double buffered watermarks so they need to consider the cursor visibility as a special case just like ILK-BDW. Let's use the helper we have for that. Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170303151928.23053-4-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/intel_pm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 2807054..55e1e88 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -1029,7 +1029,7 @@ static uint16_t vlv_compute_wm_level(const struct intel_crtc_state *crtc_state, if (dev_priv->wm.pri_latency[level] == 0) return USHRT_MAX; - if (!plane_state->base.visible) + if (!intel_wm_plane_visible(crtc_state, plane_state)) return 0; cpp = plane_state->base.fb->format->cpp[0]; @@ -1203,7 +1203,7 @@ static bool vlv_plane_wm_compute(struct intel_crtc_state *crtc_state, int level; bool dirty = false; - if (!plane_state->base.visible) { + if (!intel_wm_plane_visible(crtc_state, plane_state)) { dirty |= vlv_raw_plane_wm_set(crtc_state, 0, plane_id, 0); goto out; } -- cgit v1.1 From ff4c3b76eefe3c9c52326250c6701149d0985fb6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 3 Mar 2017 17:19:28 +0200 Subject: drm/i915: Enable atomic on VLV/CHV MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit VLV/CHV watermarks are now able to handle the radiation, so mark these platforms as ready for atomic. Cc: Maarten Lankhorst Suggested-by: Maarten Lankhorst Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170303151928.23053-5-ville.syrjala@linux.intel.com Reviewed-by: Maarten Lankhorst --- drivers/gpu/drm/i915/i915_drv.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 0b38116..0595600 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1213,9 +1213,8 @@ int i915_driver_load(struct pci_dev *pdev, const struct pci_device_id *ent) struct drm_i915_private *dev_priv; int ret; - /* Enable nuclear pageflip on ILK+, except vlv/chv */ - if (!i915.nuclear_pageflip && - (match_info->gen < 5 || match_info->has_gmch_display)) + /* Enable nuclear pageflip on ILK+ */ + if (!i915.nuclear_pageflip && match_info->gen < 5) driver.driver_features &= ~DRIVER_ATOMIC; ret = -ENOMEM; -- cgit v1.1 From fd08923384385400101c71ac0d21d37d6b23b00d Mon Sep 17 00:00:00 2001 From: Sagar Arun Kamble Date: Wed, 5 Apr 2017 15:51:50 +0530 Subject: drm/i915: Suspend GuC prior to GPU Reset during GEM suspend i915 is currently doing a full GPU reset at the end of i915_gem_suspend() followed by GuC suspend in i915_drm_suspend(). This GPU reset clobbers the GuC, causing the suspend request to then fail, leaving the GuC in an undefined state. We need to tell the GuC to suspend before we do the direct intel_gpu_reset(). v2: Commit message update. (Chris, Daniele) Fixes: 1c777c5d1dcd ("drm/i915/hsw: Fix GPU hang during resume from S3-devices state") Cc: Jeff McGee Cc: Daniele Ceraolo Spurio Cc: Chris Wilson Cc: Joonas Lahtinen Cc: Imre Deak Cc: Mika Kuoppala Signed-off-by: Sagar Arun Kamble Link: http://patchwork.freedesktop.org/patch/msgid/1491387710-20553-1-git-send-email-sagar.a.kamble@intel.com Reviewed-by: Daniele Ceraolo Spurio Acked-by: Chris Wilson Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_drv.c | 2 -- drivers/gpu/drm/i915/i915_gem.c | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 0595600..94c61aa 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1486,8 +1486,6 @@ static int i915_drm_suspend(struct drm_device *dev) goto out; } - intel_guc_suspend(dev_priv); - intel_display_suspend(dev); intel_dp_mst_suspend(dev); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 4ca88f2..28e77d2 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4456,6 +4456,8 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv) i915_gem_context_lost(dev_priv); mutex_unlock(&dev->struct_mutex); + intel_guc_suspend(dev_priv); + cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work); cancel_delayed_work_sync(&dev_priv->gt.retire_work); -- cgit v1.1 From 3194102439f6acb0b43f36cc909c04adf3ad97e3 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 28 Mar 2017 17:59:01 +0300 Subject: drm/i915/dp: use known correct array size in rate_to_index MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I can't think of a real world bug this could cause now, but this will be required in follow-up work. While at it, change the parameter order to be slightly more sensible. Cc: Manasi Navare Cc: Ville Syrjälä Reviewed-by: Manasi Navare Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/ff5b08f45a72c2247f5326b080027e2f5d8cc4ee.1490712890.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_dp.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index fd96a6c..88c708b 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1538,12 +1538,12 @@ bool intel_dp_read_desc(struct intel_dp *intel_dp) return true; } -static int rate_to_index(int find, const int *rates) +static int rate_to_index(const int *rates, int len, int rate) { - int i = 0; + int i; - for (i = 0; i < DP_MAX_SUPPORTED_RATES; ++i) - if (find == rates[i]) + for (i = 0; i < len; i++) + if (rate == rates[i]) break; return i; @@ -1564,7 +1564,8 @@ intel_dp_max_link_rate(struct intel_dp *intel_dp) int intel_dp_rate_select(struct intel_dp *intel_dp, int rate) { - return rate_to_index(rate, intel_dp->sink_rates); + return rate_to_index(intel_dp->sink_rates, intel_dp->num_sink_rates, + rate); } void intel_dp_compute_rate(struct intel_dp *intel_dp, int port_clock, -- cgit v1.1 From b5c72b207baaa33184b23a5a69603f981aaef0c1 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 28 Mar 2017 17:59:02 +0300 Subject: drm/i915/dp: return errors from rate_to_index() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We shouldn't silently use the first element if we can't find the rate we're looking for. Make rate_to_index() more generally useful, and fallback to the first element in the caller, with a big warning. Cc: Manasi Navare Cc: Ville Syrjälä Reviewed-by: Manasi Navare Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/8a6e83b7bf35da0cbbc703ae157944107ff145be.1490712890.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_dp.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 88c708b..0e200a3 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1544,9 +1544,9 @@ static int rate_to_index(const int *rates, int len, int rate) for (i = 0; i < len; i++) if (rate == rates[i]) - break; + return i; - return i; + return -1; } int @@ -1564,8 +1564,13 @@ intel_dp_max_link_rate(struct intel_dp *intel_dp) int intel_dp_rate_select(struct intel_dp *intel_dp, int rate) { - return rate_to_index(intel_dp->sink_rates, intel_dp->num_sink_rates, - rate); + int i = rate_to_index(intel_dp->sink_rates, intel_dp->num_sink_rates, + rate); + + if (WARN_ON(i < 0)) + i = 0; + + return i; } void intel_dp_compute_rate(struct intel_dp *intel_dp, int port_clock, -- cgit v1.1 From 8001b7541aa7a2f0adc4941bdebf44ea969e7ed5 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 28 Mar 2017 17:59:03 +0300 Subject: drm/i915/dp: rename rate_to_index() to intel_dp_rate_index() and reuse MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rename the function, move it at the top, and reuse in intel_dp_link_rate_index(). If there was a reason in the past to use reverse search order here, there isn't now. The names may be slightly confusing now, but intel_dp_link_rate_index() will go away in follow-up patches. v2: Use name intel_dp_rate_index (Dhinakaran) Cc: Manasi Navare Cc: Ville Syrjälä Reviewed-by: Manasi Navare Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/c7b6197aaa12e368a0d024dc142fa574fd0443a7.1490712890.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_dp.c | 34 +++++++++++++++------------------- 1 file changed, 15 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 0e200a3..9fc066d 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -266,6 +266,18 @@ static int intersect_rates(const int *source_rates, int source_len, return k; } +/* return index of rate in rates array, or -1 if not found */ +static int intel_dp_rate_index(const int *rates, int len, int rate) +{ + int i; + + for (i = 0; i < len; i++) + if (rate == rates[i]) + return i; + + return -1; +} + static int intel_dp_common_rates(struct intel_dp *intel_dp, int *common_rates) { @@ -284,15 +296,10 @@ static int intel_dp_link_rate_index(struct intel_dp *intel_dp, int *common_rates, int link_rate) { int common_len; - int index; common_len = intel_dp_common_rates(intel_dp, common_rates); - for (index = 0; index < common_len; index++) { - if (link_rate == common_rates[common_len - index - 1]) - return common_len - index - 1; - } - return -1; + return intel_dp_rate_index(common_rates, common_len, link_rate); } int intel_dp_get_link_train_fallback_values(struct intel_dp *intel_dp, @@ -1538,17 +1545,6 @@ bool intel_dp_read_desc(struct intel_dp *intel_dp) return true; } -static int rate_to_index(const int *rates, int len, int rate) -{ - int i; - - for (i = 0; i < len; i++) - if (rate == rates[i]) - return i; - - return -1; -} - int intel_dp_max_link_rate(struct intel_dp *intel_dp) { @@ -1564,8 +1560,8 @@ intel_dp_max_link_rate(struct intel_dp *intel_dp) int intel_dp_rate_select(struct intel_dp *intel_dp, int rate) { - int i = rate_to_index(intel_dp->sink_rates, intel_dp->num_sink_rates, - rate); + int i = intel_dp_rate_index(intel_dp->sink_rates, + intel_dp->num_sink_rates, rate); if (WARN_ON(i < 0)) i = 0; -- cgit v1.1 From 55cfc580809698e952c1df36140eefb8d97ce222 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 28 Mar 2017 17:59:04 +0300 Subject: drm/i915/dp: cache source rates at init MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need the source rates array so often that it makes sense to set it once at init. This reduces function calls when we need the rates, making the code easier to follow. Cc: Manasi Navare Cc: Ville Syrjälä Reviewed-by: Manasi Navare Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/aa998882d2b824f671272c60e9d26621ab9d2d17.1490712890.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_dp.c | 35 +++++++++++++++++++++-------------- drivers/gpu/drm/i915/intel_drv.h | 3 +++ 2 files changed, 24 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 9fc066d..e9bd75f 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -218,21 +218,25 @@ intel_dp_sink_rates(struct intel_dp *intel_dp, const int **sink_rates) return (intel_dp->max_sink_link_bw >> 3) + 1; } -static int -intel_dp_source_rates(struct intel_dp *intel_dp, const int **source_rates) +static void +intel_dp_set_source_rates(struct intel_dp *intel_dp) { struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev); + const int *source_rates; int size; + /* This should only be done once */ + WARN_ON(intel_dp->source_rates || intel_dp->num_source_rates); + if (IS_GEN9_LP(dev_priv)) { - *source_rates = bxt_rates; + source_rates = bxt_rates; size = ARRAY_SIZE(bxt_rates); } else if (IS_GEN9_BC(dev_priv)) { - *source_rates = skl_rates; + source_rates = skl_rates; size = ARRAY_SIZE(skl_rates); } else { - *source_rates = default_rates; + source_rates = default_rates; size = ARRAY_SIZE(default_rates); } @@ -240,7 +244,8 @@ intel_dp_source_rates(struct intel_dp *intel_dp, const int **source_rates) if (!intel_dp_source_supports_hbr2(intel_dp)) size--; - return size; + intel_dp->source_rates = source_rates; + intel_dp->num_source_rates = size; } static int intersect_rates(const int *source_rates, int source_len, @@ -281,13 +286,13 @@ static int intel_dp_rate_index(const int *rates, int len, int rate) static int intel_dp_common_rates(struct intel_dp *intel_dp, int *common_rates) { - const int *source_rates, *sink_rates; - int source_len, sink_len; + const int *sink_rates; + int sink_len; sink_len = intel_dp_sink_rates(intel_dp, &sink_rates); - source_len = intel_dp_source_rates(intel_dp, &source_rates); - return intersect_rates(source_rates, source_len, + return intersect_rates(intel_dp->source_rates, + intel_dp->num_source_rates, sink_rates, sink_len, common_rates); } @@ -1493,16 +1498,16 @@ static void snprintf_int_array(char *str, size_t len, static void intel_dp_print_rates(struct intel_dp *intel_dp) { - const int *source_rates, *sink_rates; - int source_len, sink_len, common_len; + const int *sink_rates; + int sink_len, common_len; int common_rates[DP_MAX_SUPPORTED_RATES]; char str[128]; /* FIXME: too big for stack? */ if ((drm_debug & DRM_UT_KMS) == 0) return; - source_len = intel_dp_source_rates(intel_dp, &source_rates); - snprintf_int_array(str, sizeof(str), source_rates, source_len); + snprintf_int_array(str, sizeof(str), + intel_dp->source_rates, intel_dp->num_source_rates); DRM_DEBUG_KMS("source rates: %s\n", str); sink_len = intel_dp_sink_rates(intel_dp, &sink_rates); @@ -5943,6 +5948,8 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port, intel_dig_port->max_lanes, port_name(port))) return false; + intel_dp_set_source_rates(intel_dp); + intel_dp->reset_link_params = true; intel_dp->pps_pipe = INVALID_PIPE; intel_dp->active_pipe = INVALID_PIPE; diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 313fad0..c3382eb 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -949,6 +949,9 @@ struct intel_dp { uint8_t psr_dpcd[EDP_PSR_RECEIVER_CAP_SIZE]; uint8_t downstream_ports[DP_MAX_DOWNSTREAM_PORTS]; uint8_t edp_dpcd[EDP_DISPLAY_CTL_CAP_SIZE]; + /* source rates */ + int num_source_rates; + const int *source_rates; /* sink rates as reported by DP_SUPPORTED_LINK_RATES */ uint8_t num_sink_rates; int sink_rates[DP_MAX_SUPPORTED_RATES]; -- cgit v1.1 From 68f357cb734738d60a749abb6673e7b63ccf0221 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 28 Mar 2017 17:59:05 +0300 Subject: drm/i915/dp: generate and cache sink rate array for all DP, not just eDP 1.4 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is some conflation related to sink rates, making this change more complicated than it would otherwise have to be. There are three changes here that are rather difficult to split up: 1) Use the intel_dp->sink_rates array for all DP, not just eDP 1.4. We initialize it from DPCD on eDP 1.4 like before, but generate it based on DP_MAX_LINK_RATE on others. This reduces code complexity when we need to use the sink rates; they are all always in the sink_rates array. 2) Update the sink rate array whenever we read DPCD, and use the information from there. This increases code readability when we need the sink rates. 3) Disentangle fallback rate limiting from sink rates. In the code, the max rate is a dynamic property of the *link*, not of the *sink*. Do the limiting after intersecting the source and sink rates, which are static properties of the devices. This paves the way for follow-up refactoring that I've refrained from doing here to keep this change as simple as it possibly can. v2: introduce use_rate_select and handle non-confirming eDP (Ville) v3: don't clobber cached eDP rates on short pulse (Ville) Cc: Manasi Navare Cc: Ville Syrjälä Reviewed-by: Manasi Navare Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/071bad76467f8ab2e73f3f61ad52d5a468004c71.1490712890.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_dp.c | 81 ++++++++++++++++++--------- drivers/gpu/drm/i915/intel_dp_link_training.c | 3 +- drivers/gpu/drm/i915/intel_drv.h | 5 +- 3 files changed, 61 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index e9bd75f..b38cba7 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -133,6 +133,34 @@ static void vlv_steal_power_sequencer(struct drm_device *dev, enum pipe pipe); static void intel_dp_unset_edid(struct intel_dp *intel_dp); +static int intel_dp_num_rates(u8 link_bw_code) +{ + switch (link_bw_code) { + default: + WARN(1, "invalid max DP link bw val %x, using 1.62Gbps\n", + link_bw_code); + case DP_LINK_BW_1_62: + return 1; + case DP_LINK_BW_2_7: + return 2; + case DP_LINK_BW_5_4: + return 3; + } +} + +/* update sink rates from dpcd */ +static void intel_dp_set_sink_rates(struct intel_dp *intel_dp) +{ + int i, num_rates; + + num_rates = intel_dp_num_rates(intel_dp->dpcd[DP_MAX_LINK_RATE]); + + for (i = 0; i < num_rates; i++) + intel_dp->sink_rates[i] = default_rates[i]; + + intel_dp->num_sink_rates = num_rates; +} + static int intel_dp_max_link_bw(struct intel_dp *intel_dp) { @@ -205,19 +233,6 @@ intel_dp_downstream_max_dotclock(struct intel_dp *intel_dp) return max_dotclk; } -static int -intel_dp_sink_rates(struct intel_dp *intel_dp, const int **sink_rates) -{ - if (intel_dp->num_sink_rates) { - *sink_rates = intel_dp->sink_rates; - return intel_dp->num_sink_rates; - } - - *sink_rates = default_rates; - - return (intel_dp->max_sink_link_bw >> 3) + 1; -} - static void intel_dp_set_source_rates(struct intel_dp *intel_dp) { @@ -286,15 +301,22 @@ static int intel_dp_rate_index(const int *rates, int len, int rate) static int intel_dp_common_rates(struct intel_dp *intel_dp, int *common_rates) { - const int *sink_rates; - int sink_len; + int max_rate = drm_dp_bw_code_to_link_rate(intel_dp->max_sink_link_bw); + int i, common_len; - sink_len = intel_dp_sink_rates(intel_dp, &sink_rates); + common_len = intersect_rates(intel_dp->source_rates, + intel_dp->num_source_rates, + intel_dp->sink_rates, + intel_dp->num_sink_rates, + common_rates); + + /* Limit results by potentially reduced max rate */ + for (i = 0; i < common_len; i++) { + if (common_rates[common_len - i - 1] <= max_rate) + return common_len - i; + } - return intersect_rates(intel_dp->source_rates, - intel_dp->num_source_rates, - sink_rates, sink_len, - common_rates); + return 0; } static int intel_dp_link_rate_index(struct intel_dp *intel_dp, @@ -1498,8 +1520,7 @@ static void snprintf_int_array(char *str, size_t len, static void intel_dp_print_rates(struct intel_dp *intel_dp) { - const int *sink_rates; - int sink_len, common_len; + int common_len; int common_rates[DP_MAX_SUPPORTED_RATES]; char str[128]; /* FIXME: too big for stack? */ @@ -1510,8 +1531,8 @@ static void intel_dp_print_rates(struct intel_dp *intel_dp) intel_dp->source_rates, intel_dp->num_source_rates); DRM_DEBUG_KMS("source rates: %s\n", str); - sink_len = intel_dp_sink_rates(intel_dp, &sink_rates); - snprintf_int_array(str, sizeof(str), sink_rates, sink_len); + snprintf_int_array(str, sizeof(str), + intel_dp->sink_rates, intel_dp->num_sink_rates); DRM_DEBUG_KMS("sink rates: %s\n", str); common_len = intel_dp_common_rates(intel_dp, common_rates); @@ -1577,7 +1598,8 @@ int intel_dp_rate_select(struct intel_dp *intel_dp, int rate) void intel_dp_compute_rate(struct intel_dp *intel_dp, int port_clock, uint8_t *link_bw, uint8_t *rate_select) { - if (intel_dp->num_sink_rates) { + /* eDP 1.4 rate select method. */ + if (intel_dp->use_rate_select) { *link_bw = 0; *rate_select = intel_dp_rate_select(intel_dp, port_clock); @@ -3702,6 +3724,11 @@ intel_edp_init_dpcd(struct intel_dp *intel_dp) intel_dp->num_sink_rates = i; } + if (intel_dp->num_sink_rates) + intel_dp->use_rate_select = true; + else + intel_dp_set_sink_rates(intel_dp); + return true; } @@ -3712,6 +3739,10 @@ intel_dp_get_dpcd(struct intel_dp *intel_dp) if (!intel_dp_read_dpcd(intel_dp)) return false; + /* Don't clobber cached eDP rates. */ + if (!is_edp(intel_dp)) + intel_dp_set_sink_rates(intel_dp); + if (drm_dp_dpcd_read(&intel_dp->aux, DP_SINK_COUNT, &intel_dp->sink_count, 1) < 0) return false; diff --git a/drivers/gpu/drm/i915/intel_dp_link_training.c b/drivers/gpu/drm/i915/intel_dp_link_training.c index 0048b52..694ad0f 100644 --- a/drivers/gpu/drm/i915/intel_dp_link_training.c +++ b/drivers/gpu/drm/i915/intel_dp_link_training.c @@ -146,7 +146,8 @@ intel_dp_link_training_clock_recovery(struct intel_dp *intel_dp) link_config[1] |= DP_LANE_COUNT_ENHANCED_FRAME_EN; drm_dp_dpcd_write(&intel_dp->aux, DP_LINK_BW_SET, link_config, 2); - if (intel_dp->num_sink_rates) + /* eDP 1.4 rate select method. */ + if (!link_bw) drm_dp_dpcd_write(&intel_dp->aux, DP_LINK_RATE_SET, &rate_select, 1); diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index c3382eb..7bc0c25 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -952,9 +952,10 @@ struct intel_dp { /* source rates */ int num_source_rates; const int *source_rates; - /* sink rates as reported by DP_SUPPORTED_LINK_RATES */ - uint8_t num_sink_rates; + /* sink rates as reported by DP_MAX_LINK_RATE/DP_SUPPORTED_LINK_RATES */ + int num_sink_rates; int sink_rates[DP_MAX_SUPPORTED_RATES]; + bool use_rate_select; /* Max lane count for the sink as per DPCD registers */ uint8_t max_sink_lane_count; /* Max link BW for the sink as per DPCD registers */ -- cgit v1.1 From 97f55ca5b6625bb4ddeca7b1272b53ca04ab3cf0 Mon Sep 17 00:00:00 2001 From: Madhav Chauhan Date: Wed, 5 Apr 2017 09:04:23 -0400 Subject: drm/i915/glk: limit pixel clock to 99% of cdclk workaround MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As per BSPEC, valid cdclk values for glk are 79.2, 158.4, 316.8 Mhz. Practically we can achive only 99% of these cdclk values (HW team checking on this). So cdclk should be calculated for the given pixclk as per that otherwise it may lead to screen corruption, explained below: 1. For DSI AUO panel(1920x1200 @60) required pixclk is 157100 KHZ 2. glk_calc_cdclk returns 79200 KHZ for this pixclk, For 2PPC it will be 158400 KHZ 3. Practically 100% of the cdclk can’t be achieved, so 99% of 158400 KHZ = 156816 which is less than the desired pixlclk and causes panel corruption. v2: Rebased to new CDLCK code framework v3: Addressed review comments from Ander/Jani - Add comment in code about 99% usage of CDCLK - Calculate max dot clock as well with 99% limit v4 by Jani: - drop superfluous whitespace change - rewrite code comments to clarify v5: Added details of non-working scenario in commit message Cc: Ander Conselvan de Oliveira Cc: Ville Syrjälä Signed-off-by: Madhav Chauhan Signed-off-by: Jani Nikula Reviewed-by: Ander Conselvan de Oliveira Link: http://patchwork.freedesktop.org/patch/msgid/1491397463-13637-1-git-send-email-madhav.chauhan@intel.com --- drivers/gpu/drm/i915/intel_cdclk.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c index dd3ad52..763010f 100644 --- a/drivers/gpu/drm/i915/intel_cdclk.c +++ b/drivers/gpu/drm/i915/intel_cdclk.c @@ -1071,9 +1071,15 @@ static int bxt_calc_cdclk(int max_pixclk) static int glk_calc_cdclk(int max_pixclk) { - if (max_pixclk > 2 * 158400) + /* + * FIXME: Avoid using a pixel clock that is more than 99% of the cdclk + * as a temporary workaround. Use a higher cdclk instead. (Note that + * intel_compute_max_dotclk() limits the max pixel clock to 99% of max + * cdclk.) + */ + if (max_pixclk > DIV_ROUND_UP(2 * 158400 * 99, 100)) return 316800; - else if (max_pixclk > 2 * 79200) + else if (max_pixclk > DIV_ROUND_UP(2 * 79200 * 99, 100)) return 158400; else return 79200; @@ -1664,7 +1670,11 @@ static int intel_compute_max_dotclk(struct drm_i915_private *dev_priv) int max_cdclk_freq = dev_priv->max_cdclk_freq; if (IS_GEMINILAKE(dev_priv)) - return 2 * max_cdclk_freq; + /* + * FIXME: Limiting to 99% as a temporary workaround. See + * glk_calc_cdclk() for details. + */ + return 2 * max_cdclk_freq * 99 / 100; else if (INTEL_INFO(dev_priv)->gen >= 9 || IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) return max_cdclk_freq; -- cgit v1.1 From b268d9fe0f10544f5f7a1b7015e2b97075e6215d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 5 Apr 2017 23:15:14 +0100 Subject: drm/i915: Use the right mapping_gfp_mask for final shmem allocation Many sightings report the greater prevalence of allocation failures. This is all due to the incorrect use of mapping_gfp_constraint(), so remove it in favour of just querying the mapping_gfp_mask() which are the exact gfp_t we wanted in the first place. We still do expect a higher chance of reporting ENOMEM, as that is the intention of using __GFP_NORETRY -- to fail rather than oom after having reclaimed from our bo caches, and having done a direct|kswapd reclaim pass. Reported-by: Jason Ekstrand Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100594 Fixes: 24f8e00a8a2e ("drm/i915: Prefer to report ENOMEM rather than incur the oom for gfx allocations") Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20170405221514.23251-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 28e77d2..391aa69 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2340,7 +2340,7 @@ rebuild_st: * defer the oom here by reporting the ENOMEM back * to userspace. */ - reclaim = mapping_gfp_constraint(mapping, 0); + reclaim = mapping_gfp_mask(mapping); reclaim |= __GFP_NORETRY; /* reclaim, but no oom */ page = shmem_read_mapping_page_gfp(mapping, i, reclaim); -- cgit v1.1 From 90f192c8241e431d2c3076e4f2cb99ac25bfb1c5 Mon Sep 17 00:00:00 2001 From: Anusha Srivatsa Date: Thu, 30 Mar 2017 13:24:06 -0700 Subject: drm/i915/GuC/GLK: Load GuC on GLK Load GuC 10.56 on GLK. Work on firmware is still in progress. Testing has not been done yet. This patch addresses the initial need to load the GuC firmware for HuC authentication v2: rebased. Cc: Jeff mcgee Cc: Rodrigo Vivi Cc: John Spotswood Signed-off-by: Anusha Srivatsa Reviewed-by: John Spotswood Signed-off-by: Ander Conselvan de Oliveira Link: http://patchwork.freedesktop.org/patch/msgid/1490905447-15815-1-git-send-email-anusha.srivatsa@intel.com --- drivers/gpu/drm/i915/intel_guc_loader.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c index 2793c01..12f80ec 100644 --- a/drivers/gpu/drm/i915/intel_guc_loader.c +++ b/drivers/gpu/drm/i915/intel_guc_loader.c @@ -61,6 +61,9 @@ #define KBL_FW_MAJOR 9 #define KBL_FW_MINOR 14 +#define GLK_FW_MAJOR 10 +#define GLK_FW_MINOR 56 + #define GUC_FW_PATH(platform, major, minor) \ "i915/" __stringify(platform) "_guc_ver" __stringify(major) "_" __stringify(minor) ".bin" @@ -73,6 +76,8 @@ MODULE_FIRMWARE(I915_BXT_GUC_UCODE); #define I915_KBL_GUC_UCODE GUC_FW_PATH(kbl, KBL_FW_MAJOR, KBL_FW_MINOR) MODULE_FIRMWARE(I915_KBL_GUC_UCODE); +#define I915_GLK_GUC_UCODE GUC_FW_PATH(glk, GLK_FW_MAJOR, GLK_FW_MINOR) + static u32 get_gttype(struct drm_i915_private *dev_priv) { @@ -405,6 +410,10 @@ int intel_guc_select_fw(struct intel_guc *guc) guc->fw.path = I915_KBL_GUC_UCODE; guc->fw.major_ver_wanted = KBL_FW_MAJOR; guc->fw.minor_ver_wanted = KBL_FW_MINOR; + } else if (IS_GEMINILAKE(dev_priv)) { + guc->fw.path = I915_GLK_GUC_UCODE; + guc->fw.major_ver_wanted = GLK_FW_MAJOR; + guc->fw.minor_ver_wanted = GLK_FW_MINOR; } else { DRM_ERROR("No GuC firmware known for platform with GuC!\n"); return -ENOENT; -- cgit v1.1 From db5ba0d8931ee0e470805b972a905c869dc793bb Mon Sep 17 00:00:00 2001 From: Anusha Srivatsa Date: Thu, 30 Mar 2017 13:24:07 -0700 Subject: drm/i915/GLK/HuC: Load HuC on GLK Load HuC version 1.07.1748 on GLK. v2: rebased. v3: Use name of the right platform(John Spotswood) v4: rebased. Cc: Jeff Mcgee Cc: John Spotswood Cc: Rodrigo Vivi Signed-off-by: Anusha Srivatsa Reviewed-by: John Spotswood Signed-off-by: Ander Conselvan de Oliveira Link: http://patchwork.freedesktop.org/patch/msgid/1490905447-15815-2-git-send-email-anusha.srivatsa@intel.com --- drivers/gpu/drm/i915/intel_huc.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_huc.c b/drivers/gpu/drm/i915/intel_huc.c index 385cacb..7a0bf15 100644 --- a/drivers/gpu/drm/i915/intel_huc.c +++ b/drivers/gpu/drm/i915/intel_huc.c @@ -52,6 +52,10 @@ #define KBL_HUC_FW_MINOR 00 #define KBL_BLD_NUM 1810 +#define GLK_HUC_FW_MAJOR 01 +#define GLK_HUC_FW_MINOR 07 +#define GLK_BLD_NUM 1748 + #define HUC_FW_PATH(platform, major, minor, bld_num) \ "i915/" __stringify(platform) "_huc_ver" __stringify(major) "_" \ __stringify(minor) "_" __stringify(bld_num) ".bin" @@ -68,6 +72,9 @@ MODULE_FIRMWARE(I915_BXT_HUC_UCODE); KBL_HUC_FW_MINOR, KBL_BLD_NUM) MODULE_FIRMWARE(I915_KBL_HUC_UCODE); +#define I915_GLK_HUC_UCODE HUC_FW_PATH(glk, GLK_HUC_FW_MAJOR, \ + GLK_HUC_FW_MINOR, GLK_BLD_NUM) + /** * huc_ucode_xfer() - DMA's the firmware * @dev_priv: the drm_i915_private device @@ -169,6 +176,10 @@ void intel_huc_select_fw(struct intel_huc *huc) huc->fw.path = I915_KBL_HUC_UCODE; huc->fw.major_ver_wanted = KBL_HUC_FW_MAJOR; huc->fw.minor_ver_wanted = KBL_HUC_FW_MINOR; + } else if (IS_GEMINILAKE(dev_priv)) { + huc->fw.path = I915_GLK_HUC_UCODE; + huc->fw.major_ver_wanted = GLK_HUC_FW_MAJOR; + huc->fw.minor_ver_wanted = GLK_HUC_FW_MINOR; } else { DRM_ERROR("No HuC firmware known for platform with HuC!\n"); return; -- cgit v1.1 From 2ca9faa551c4c97610bb0209e20c7231a93712ff Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 5 Apr 2017 16:30:54 +0100 Subject: drm/i915: Assert the engine is idle before overwiting the HWS When we update the global seqno (on the engine timeline), we modify HW state (both registers and mapped pages). As we do this, we should be sure that the HW is idle and we are not causing a conflict. The caller is supposed to wait_for_idle before calling us to update the seqno, so let's assert they have and the engine is indeed idle. Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170405153055.28123-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem_request.c | 2 +- drivers/gpu/drm/i915/intel_engine_cs.c | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 6348353..2f8c513 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -214,8 +214,8 @@ static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno) } /* Finally reset hw state */ - tl->seqno = seqno; intel_engine_init_global_seqno(engine, seqno); + tl->seqno = seqno; list_for_each_entry(timeline, &i915->gt.timelines, link) memset(timeline->engine[id].sync_seqno, 0, diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 854e8e0..92f871c 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -223,6 +223,8 @@ void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno) { struct drm_i915_private *dev_priv = engine->i915; + GEM_BUG_ON(!intel_engine_is_idle(engine)); + /* Our semaphore implementation is strictly monotonic (i.e. we proceed * so long as the semaphore value in the register/page is greater * than the sync value), so whenever we reset the seqno, @@ -260,6 +262,8 @@ void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno) * there are any waiters for that seqno. */ intel_engine_wakeup(engine); + + GEM_BUG_ON(intel_engine_get_seqno(engine) != seqno); } static void intel_engine_init_timeline(struct intel_engine_cs *engine) -- cgit v1.1 From cbb60b4b987c8a57533dca0f66887ed14a9498e5 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 6 Apr 2017 18:00:28 +0100 Subject: drm/i915: Advance ring->head fully when idle When we retire the last request on the ring, before we ever access that ring again we know it will be completely idle and so we can advance the ring->head fully to the end (i.e. ring->tail) and not just to the start of the breadcrumb. This allows us to skip re-emitting the breadcrumb after resetting the GPU if the ring was entirely idle. This prevents us from overwriting a seqno wraparound by re-executing a stale breadcrumb, i.e. submit_request(1) intel_engine_init_global_seqno(0) i915_reset() would then leave 1 in the HWS, but the next request to execute would also be with seqno 1. The sanity checks upon submission detect this as a timewarp and explode. By setting the ring as empty, upon reset the HWS is left as 0, leaving it consistent with the timeline. v2: Fix check for deleting last element of list. We know that this request is always the first element of the ring, so only if next points back to the start will this be the only request in flight. v3: Remove opencoding of list_is_last() v4: Move the block to its own function for some clarity. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100144 Testcase: igt/gem_exec_whisper/hang-* Signed-off-by: Chris Wilson Cc: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170406170028.26871-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem_request.c | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 2f8c513..313cdff 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -271,6 +271,27 @@ void i915_gem_retire_noop(struct i915_gem_active *active, /* Space left intentionally blank */ } +static void advance_ring(struct drm_i915_gem_request *request) +{ + unsigned int tail; + + /* We know the GPU must have read the request to have + * sent us the seqno + interrupt, so use the position + * of tail of the request to update the last known position + * of the GPU head. + * + * Note this requires that we are always called in request + * completion order. + */ + if (list_is_last(&request->ring_link, &request->ring->request_list)) + tail = request->ring->tail; + else + tail = request->postfix; + list_del(&request->ring_link); + + request->ring->head = tail; +} + static void i915_gem_request_retire(struct drm_i915_gem_request *request) { struct intel_engine_cs *engine = request->engine; @@ -287,16 +308,6 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request) list_del_init(&request->link); spin_unlock_irq(&engine->timeline->lock); - /* We know the GPU must have read the request to have - * sent us the seqno + interrupt, so use the position - * of tail of the request to update the last known position - * of the GPU head. - * - * Note this requires that we are always called in request - * completion order. - */ - list_del(&request->ring_link); - request->ring->head = request->postfix; if (!--request->i915->gt.active_requests) { GEM_BUG_ON(!request->i915->gt.awake); mod_delayed_work(request->i915->wq, @@ -304,6 +315,7 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request) msecs_to_jiffies(100)); } unreserve_seqno(request->engine); + advance_ring(request); /* Walk through the active list, calling retire on each. This allows * objects to track their GPU activity and mark themselves as idle -- cgit v1.1 From 400c19d9f85c58913956eb5ca9705c9bdd4b8fe6 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Fri, 7 Apr 2017 01:23:45 +0200 Subject: i915: initialize the free_list of the fencing atomic_helper Just in case the llist model changes and NULL isn't valid initialization. Signed-off-by: Andrea Arcangeli Link: http://patchwork.freedesktop.org/patch/msgid/20170406232347.988-4-aarcange@redhat.com Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/intel_display.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 26e871a..b60ba16 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -14992,6 +14992,7 @@ int intel_modeset_init(struct drm_device *dev) dev->mode_config.funcs = &intel_mode_funcs; + init_llist_head(&dev_priv->atomic_helper.free_list); INIT_WORK(&dev_priv->atomic_helper.free_work, intel_atomic_helper_free_state_worker); -- cgit v1.1 From d0aa301ae5a60cdfddac12888dc373a68985335b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 7 Apr 2017 11:25:49 +0100 Subject: drm/i915: The shrinker already acquires struct_mutex, so call it unlocked The shrinker is prepared to be called unlocked (and at other times with struct_mutex held for DIRECT_RECLAIM) so we can skip acquiring the struct_mutex prior to calling the shrinker during freeze. This improves our ability to shrink as we can be more aggressive when we know the caller isn't holding struct_mutex. Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170407102552.5781-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 391aa69..372183e 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4864,9 +4864,10 @@ void i915_gem_load_cleanup(struct drm_i915_private *dev_priv) int i915_gem_freeze(struct drm_i915_private *dev_priv) { - mutex_lock(&dev_priv->drm.struct_mutex); + /* Discard all purgeable objects, let userspace recover those as + * required after resuming. + */ i915_gem_shrink_all(dev_priv); - mutex_unlock(&dev_priv->drm.struct_mutex); return 0; } @@ -4891,12 +4892,12 @@ int i915_gem_freeze_late(struct drm_i915_private *dev_priv) * we update that state just before writing out the image. * * To try and reduce the hibernation image, we manually shrink - * the objects as well. + * the objects as well, see i915_gem_freeze() */ - mutex_lock(&dev_priv->drm.struct_mutex); i915_gem_shrink(dev_priv, -1UL, I915_SHRINK_UNBOUND); + mutex_lock(&dev_priv->drm.struct_mutex); for (p = phases; *p; p++) { list_for_each_entry(obj, *p, global_link) { obj->base.read_domains = I915_GEM_DOMAIN_CPU; -- cgit v1.1 From 17b93c40e74868c92f23a3e95c905bc3296580e9 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 7 Apr 2017 11:25:50 +0100 Subject: drm/i915: Drain any freed objects prior to hibernation As we call into the shrinker during freeze, we may have freed more objects since we idled during i915_gem_suspend. Make sure we flush the i915_gem_free_objects worker prior to saving the unwanted pages into the hibernation image. Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170407102552.5781-2-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 372183e..38e4e58 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4896,6 +4896,7 @@ int i915_gem_freeze_late(struct drm_i915_private *dev_priv) */ i915_gem_shrink(dev_priv, -1UL, I915_SHRINK_UNBOUND); + i915_gem_drain_freed_objects(dev_priv); mutex_lock(&dev_priv->drm.struct_mutex); for (p = phases; *p; p++) { -- cgit v1.1 From 8f612d055183545070ca1009ac2eb1f2e044cc20 Mon Sep 17 00:00:00 2001 From: Joonas Lahtinen Date: Fri, 7 Apr 2017 13:49:34 +0300 Subject: drm/i915: Don't call synchronize_rcu_expedited under struct_mutex Only call synchronize_rcu_expedited after unlocking struct_mutex to avoid deadlock because the workqueues depend on struct_mutex. >From original patch by Andrea: synchronize_rcu/synchronize_sched/synchronize_rcu_expedited() will hang until its own workqueues are run. The i915 gem workqueues will wait on the struct_mutex to be released. So we cannot wait for a quiescent state using those rcu primitives while holding the struct_mutex or it creates a circular lock dependency resulting in kernel hangs (which is reproducible but goes undetected by lockdep). kswapd0 D 0 700 2 0x00000000 Call Trace: ? __schedule+0x1a5/0x660 ? schedule+0x36/0x80 ? _synchronize_rcu_expedited.constprop.65+0x2ef/0x300 ? wake_up_bit+0x20/0x20 ? rcu_stall_kick_kthreads.part.54+0xc0/0xc0 ? rcu_exp_wait_wake+0x530/0x530 ? i915_gem_shrink+0x34b/0x4b0 ? i915_gem_shrinker_scan+0x7c/0x90 ? i915_gem_shrinker_scan+0x7c/0x90 ? shrink_slab.part.61.constprop.72+0x1c1/0x3a0 ? shrink_zone+0x154/0x160 ? kswapd+0x40a/0x720 ? kthread+0xf4/0x130 ? try_to_free_pages+0x450/0x450 ? kthread_create_on_node+0x40/0x40 ? ret_from_fork+0x23/0x30 plasmashell D 0 4657 4614 0x00000000 Call Trace: ? __schedule+0x1a5/0x660 ? schedule+0x36/0x80 ? schedule_preempt_disabled+0xe/0x10 ? __mutex_lock.isra.4+0x1c9/0x790 ? i915_gem_close_object+0x26/0xc0 ? i915_gem_close_object+0x26/0xc0 ? drm_gem_object_release_handle+0x48/0x90 ? drm_gem_handle_delete+0x50/0x80 ? drm_ioctl+0x1fa/0x420 ? drm_gem_handle_create+0x40/0x40 ? pipe_write+0x391/0x410 ? __vfs_write+0xc6/0x120 ? do_vfs_ioctl+0x8b/0x5d0 ? SyS_ioctl+0x3b/0x70 ? entry_SYSCALL_64_fastpath+0x13/0x94 kworker/0:0 D 0 29186 2 0x00000000 Workqueue: events __i915_gem_free_work Call Trace: ? __schedule+0x1a5/0x660 ? schedule+0x36/0x80 ? schedule_preempt_disabled+0xe/0x10 ? __mutex_lock.isra.4+0x1c9/0x790 ? del_timer_sync+0x44/0x50 ? update_curr+0x57/0x110 ? __i915_gem_free_objects+0x31/0x300 ? __i915_gem_free_objects+0x31/0x300 ? __i915_gem_free_work+0x2d/0x40 ? process_one_work+0x13a/0x3b0 ? worker_thread+0x4a/0x460 ? kthread+0xf4/0x130 ? process_one_work+0x3b0/0x3b0 ? kthread_create_on_node+0x40/0x40 ? ret_from_fork+0x23/0x30 Fixes: 3d3d18f086cd ("drm/i915: Avoid rcu_barrier() from reclaim paths (shrinker)") Reported-by: Andrea Arcangeli Signed-off-by: Joonas Lahtinen Cc: Andrea Arcangeli Cc: Chris Wilson Cc: Daniel Vetter Cc: Jani Nikula Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem_shrinker.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index 2978acd..129ed30 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -53,6 +53,17 @@ static bool i915_gem_shrinker_lock(struct drm_device *dev, bool *unlock) BUG(); } +static void i915_gem_shrinker_unlock(struct drm_device *dev, bool unlock) +{ + if (!unlock) + return; + + mutex_unlock(&dev->struct_mutex); + + /* expedite the RCU grace period to free some request slabs */ + synchronize_rcu_expedited(); +} + static bool any_vma_pinned(struct drm_i915_gem_object *obj) { struct i915_vma *vma; @@ -232,11 +243,8 @@ i915_gem_shrink(struct drm_i915_private *dev_priv, intel_runtime_pm_put(dev_priv); i915_gem_retire_requests(dev_priv); - if (unlock) - mutex_unlock(&dev_priv->drm.struct_mutex); - /* expedite the RCU grace period to free some request slabs */ - synchronize_rcu_expedited(); + i915_gem_shrinker_unlock(&dev_priv->drm, unlock); return count; } @@ -296,8 +304,7 @@ i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc) count += obj->base.size >> PAGE_SHIFT; } - if (unlock) - mutex_unlock(&dev->struct_mutex); + i915_gem_shrinker_unlock(dev, unlock); return count; } @@ -324,8 +331,8 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) sc->nr_to_scan - freed, I915_SHRINK_BOUND | I915_SHRINK_UNBOUND); - if (unlock) - mutex_unlock(&dev->struct_mutex); + + i915_gem_shrinker_unlock(dev, unlock); return freed; } @@ -367,8 +374,7 @@ i915_gem_shrinker_unlock_uninterruptible(struct drm_i915_private *dev_priv, struct shrinker_lock_uninterruptible *slu) { dev_priv->mm.interruptible = slu->was_interruptible; - if (slu->unlock) - mutex_unlock(&dev_priv->drm.struct_mutex); + i915_gem_shrinker_unlock(&dev_priv->drm, slu->unlock); } static int -- cgit v1.1 From e92075ff7dc54305bd5709a758ffe825aef35e8f Mon Sep 17 00:00:00 2001 From: Joonas Lahtinen Date: Fri, 7 Apr 2017 13:49:35 +0300 Subject: drm/i915: Simplify shrinker locking By using the same structure for both interruptible and uninterruptible locking in shrinker code, combined with the information that mm.interruptible is only being written to, the code can be greatly simplified. Also removing the i915_gem_ prefix from the locking functions so that nobody in their wildest dreams considers exporting them. Signed-off-by: Joonas Lahtinen Cc: Chris Wilson Reviewed-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/1491562175-27680-1-git-send-email-joonas.lahtinen@linux.intel.com --- drivers/gpu/drm/i915/i915_drv.h | 6 ---- drivers/gpu/drm/i915/i915_gem.c | 2 -- drivers/gpu/drm/i915/i915_gem_shrinker.c | 56 +++++++++++--------------------- drivers/gpu/drm/i915/intel_display.c | 3 -- 4 files changed, 19 insertions(+), 48 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index c9b0949..f990f0e 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1511,12 +1511,6 @@ struct i915_gem_mm { /** LRU list of objects with fence regs on them. */ struct list_head fence_list; - /** - * Are we in a non-interruptible section of code like - * modesetting? - */ - bool interruptible; - /* the indicator for dispatch video commands on two BSD rings */ atomic_t bsd_engine_dispatch_index; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 38e4e58..5cfe63e 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4822,8 +4822,6 @@ i915_gem_load_init(struct drm_i915_private *dev_priv) init_waitqueue_head(&dev_priv->pending_flip_queue); - dev_priv->mm.interruptible = true; - atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0); spin_lock_init(&dev_priv->fb_tracking.lock); diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index 129ed30..0e7352d 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -35,9 +35,9 @@ #include "i915_drv.h" #include "i915_trace.h" -static bool i915_gem_shrinker_lock(struct drm_device *dev, bool *unlock) +static bool shrinker_lock(struct drm_i915_private *dev_priv, bool *unlock) { - switch (mutex_trylock_recursive(&dev->struct_mutex)) { + switch (mutex_trylock_recursive(&dev_priv->drm.struct_mutex)) { case MUTEX_TRYLOCK_FAILED: return false; @@ -53,12 +53,12 @@ static bool i915_gem_shrinker_lock(struct drm_device *dev, bool *unlock) BUG(); } -static void i915_gem_shrinker_unlock(struct drm_device *dev, bool unlock) +static void shrinker_unlock(struct drm_i915_private *dev_priv, bool unlock) { if (!unlock) return; - mutex_unlock(&dev->struct_mutex); + mutex_unlock(&dev_priv->drm.struct_mutex); /* expedite the RCU grace period to free some request slabs */ synchronize_rcu_expedited(); @@ -156,7 +156,7 @@ i915_gem_shrink(struct drm_i915_private *dev_priv, unsigned long count = 0; bool unlock; - if (!i915_gem_shrinker_lock(&dev_priv->drm, &unlock)) + if (!shrinker_lock(dev_priv, &unlock)) return 0; trace_i915_gem_shrink(dev_priv, target, flags); @@ -244,7 +244,7 @@ i915_gem_shrink(struct drm_i915_private *dev_priv, i915_gem_retire_requests(dev_priv); - i915_gem_shrinker_unlock(&dev_priv->drm, unlock); + shrinker_unlock(dev_priv, unlock); return count; } @@ -284,12 +284,11 @@ i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc) { struct drm_i915_private *dev_priv = container_of(shrinker, struct drm_i915_private, mm.shrinker); - struct drm_device *dev = &dev_priv->drm; struct drm_i915_gem_object *obj; unsigned long count; bool unlock; - if (!i915_gem_shrinker_lock(dev, &unlock)) + if (!shrinker_lock(dev_priv, &unlock)) return 0; i915_gem_retire_requests(dev_priv); @@ -304,7 +303,7 @@ i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc) count += obj->base.size >> PAGE_SHIFT; } - i915_gem_shrinker_unlock(dev, unlock); + shrinker_unlock(dev_priv, unlock); return count; } @@ -314,11 +313,10 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) { struct drm_i915_private *dev_priv = container_of(shrinker, struct drm_i915_private, mm.shrinker); - struct drm_device *dev = &dev_priv->drm; unsigned long freed; bool unlock; - if (!i915_gem_shrinker_lock(dev, &unlock)) + if (!shrinker_lock(dev_priv, &unlock)) return SHRINK_STOP; freed = i915_gem_shrink(dev_priv, @@ -332,26 +330,20 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) I915_SHRINK_BOUND | I915_SHRINK_UNBOUND); - i915_gem_shrinker_unlock(dev, unlock); + shrinker_unlock(dev_priv, unlock); return freed; } -struct shrinker_lock_uninterruptible { - bool was_interruptible; - bool unlock; -}; - static bool -i915_gem_shrinker_lock_uninterruptible(struct drm_i915_private *dev_priv, - struct shrinker_lock_uninterruptible *slu, - int timeout_ms) +shrinker_lock_uninterruptible(struct drm_i915_private *dev_priv, bool *unlock, + int timeout_ms) { unsigned long timeout = jiffies + msecs_to_jiffies_timeout(timeout_ms); do { if (i915_gem_wait_for_idle(dev_priv, 0) == 0 && - i915_gem_shrinker_lock(&dev_priv->drm, &slu->unlock)) + shrinker_lock(dev_priv, unlock)) break; schedule_timeout_killable(1); @@ -364,29 +356,19 @@ i915_gem_shrinker_lock_uninterruptible(struct drm_i915_private *dev_priv, } } while (1); - slu->was_interruptible = dev_priv->mm.interruptible; - dev_priv->mm.interruptible = false; return true; } -static void -i915_gem_shrinker_unlock_uninterruptible(struct drm_i915_private *dev_priv, - struct shrinker_lock_uninterruptible *slu) -{ - dev_priv->mm.interruptible = slu->was_interruptible; - i915_gem_shrinker_unlock(&dev_priv->drm, slu->unlock); -} - static int i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr) { struct drm_i915_private *dev_priv = container_of(nb, struct drm_i915_private, mm.oom_notifier); - struct shrinker_lock_uninterruptible slu; struct drm_i915_gem_object *obj; unsigned long unevictable, bound, unbound, freed_pages; + bool unlock; - if (!i915_gem_shrinker_lock_uninterruptible(dev_priv, &slu, 5000)) + if (!shrinker_lock_uninterruptible(dev_priv, &unlock, 5000)) return NOTIFY_DONE; freed_pages = i915_gem_shrink_all(dev_priv); @@ -415,7 +397,7 @@ i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr) bound += obj->base.size >> PAGE_SHIFT; } - i915_gem_shrinker_unlock_uninterruptible(dev_priv, &slu); + shrinker_unlock(dev_priv, unlock); if (freed_pages || unbound || bound) pr_info("Purging GPU memory, %lu pages freed, " @@ -435,12 +417,12 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr { struct drm_i915_private *dev_priv = container_of(nb, struct drm_i915_private, mm.vmap_notifier); - struct shrinker_lock_uninterruptible slu; struct i915_vma *vma, *next; unsigned long freed_pages = 0; + bool unlock; int ret; - if (!i915_gem_shrinker_lock_uninterruptible(dev_priv, &slu, 5000)) + if (!shrinker_lock_uninterruptible(dev_priv, &unlock, 5000)) return NOTIFY_DONE; /* Force everything onto the inactive lists */ @@ -465,7 +447,7 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr } out: - i915_gem_shrinker_unlock_uninterruptible(dev_priv, &slu); + shrinker_unlock(dev_priv, unlock); *(unsigned long *)ptr += freed_pages; return NOTIFY_DONE; diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index b60ba16..ced8a12 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -4872,12 +4872,9 @@ static void intel_crtc_dpms_overlay_disable(struct intel_crtc *intel_crtc) { if (intel_crtc->overlay) { struct drm_device *dev = intel_crtc->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); mutex_lock(&dev->struct_mutex); - dev_priv->mm.interruptible = false; (void) intel_overlay_switch_off(intel_crtc->overlay); - dev_priv->mm.interruptible = true; mutex_unlock(&dev->struct_mutex); } -- cgit v1.1 From 5ad08be7e34fc6f3c6936ac40063ea839d4ce0d2 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 7 Apr 2017 11:25:51 +0100 Subject: drm/i915: Break up long runs of freeing objects Before freeing the next batch of objects from the worker, check if the worker's timeslice has expired and if so, defer the next batch to the next invocation of the worker. Suggested-by: Andrea Arcangeli Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170407102552.5781-3-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 5cfe63e..9bc4d89 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4349,8 +4349,11 @@ static void __i915_gem_free_work(struct work_struct *work) * unbound now. */ - while ((freed = llist_del_all(&i915->mm.free_list))) + while ((freed = llist_del_all(&i915->mm.free_list))) { __i915_gem_free_objects(i915, freed); + if (need_resched()) + break; + } } static void __i915_gem_free_object_rcu(struct rcu_head *head) -- cgit v1.1 From f2be9d68334dbb4ab8a3aa40b1633e3f408c616b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 7 Apr 2017 11:25:52 +0100 Subject: drm/i915: Insert cond_resched() into i915_gem_free_objects As we may have very many objects to free, check to see if the task needs to be rescheduled whilst freeing them. Suggested-by: Andrea Arcangeli Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170407102552.5781-4-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 9bc4d89..b210acc 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4302,6 +4302,8 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915, intel_runtime_pm_put(i915); mutex_unlock(&i915->drm.struct_mutex); + cond_resched(); + llist_for_each_entry_safe(obj, on, freed, freed) { GEM_BUG_ON(obj->bind_count); GEM_BUG_ON(atomic_read(&obj->frontbuffer_bits)); -- cgit v1.1 From d769ab182771df6d7ad6c3ecb0449d94043b7921 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Fri, 7 Apr 2017 13:32:10 +0000 Subject: drm/i915: Fix type of timeout_ms parameter in intel_wait_for_register_fw() There is no need to specify timeout as unsigned long since this parameter will be consumed by usecs_to_jiffies() which expects unsigned int only. Signed-off-by: Michal Wajdeczko Cc: Chris Wilson Cc: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170407133212.174608-1-michal.wajdeczko@intel.com Reviewed-by: Tvrtko Ursulin Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/intel_uncore.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index f990f0e..59433df 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3092,7 +3092,7 @@ int intel_wait_for_register_fw(struct drm_i915_private *dev_priv, i915_reg_t reg, const u32 mask, const u32 value, - const unsigned long timeout_ms); + const unsigned int timeout_ms); static inline bool intel_gvt_active(struct drm_i915_private *dev_priv) { diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 6d1ea26..bcabf54 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -1610,7 +1610,7 @@ int intel_wait_for_register_fw(struct drm_i915_private *dev_priv, i915_reg_t reg, const u32 mask, const u32 value, - const unsigned long timeout_ms) + const unsigned int timeout_ms) { #define done ((I915_READ_FW(reg) & mask) == value) int ret = wait_for_us(done, 2); -- cgit v1.1 From 1d1a9774e40414148ecebbdb713746bfb6f9a561 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Fri, 7 Apr 2017 16:01:44 +0000 Subject: drm/i915: Extend intel_wait_for_register_fw() with fast timeout In some cases we may want to spend more time in atomic wait than hardcoded 2us. Let's add additional fast timeout parameter to allow flexible configuration of atomic timeout before switching into heavy wait. Add also possibility to return registry value to avoid extra read. v2: use explicit fast timeout (Tvrtko/Chris) allow returning register value (Chris) Signed-off-by: Michal Wajdeczko Suggested-by: Tvrtko Ursulin Cc: Tvrtko Ursulin Cc: Joonas Lahtinen Cc: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170407160145.181328-1-michal.wajdeczko@intel.com Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_drv.h | 14 +++++++++++++- drivers/gpu/drm/i915/intel_uncore.c | 36 ++++++++++++++++++++++++------------ 2 files changed, 37 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 59433df..bb6fc1e 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3088,11 +3088,23 @@ int intel_wait_for_register(struct drm_i915_private *dev_priv, const u32 mask, const u32 value, const unsigned long timeout_ms); +int __intel_wait_for_register_fw(struct drm_i915_private *dev_priv, + i915_reg_t reg, + const u32 mask, + const u32 value, + const unsigned int fast_timeout_us, + const unsigned int slow_timeout_ms, + u32 *out_value); +static inline int intel_wait_for_register_fw(struct drm_i915_private *dev_priv, i915_reg_t reg, const u32 mask, const u32 value, - const unsigned int timeout_ms); + const unsigned int timeout_ms) +{ + return __intel_wait_for_register_fw(dev_priv, reg, mask, value, + 2, timeout_ms, NULL); +} static inline bool intel_gvt_active(struct drm_i915_private *dev_priv) { diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index bcabf54..ace0993 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -1585,19 +1585,21 @@ static int gen6_reset_engines(struct drm_i915_private *dev_priv, } /** - * intel_wait_for_register_fw - wait until register matches expected state + * __intel_wait_for_register_fw - wait until register matches expected state * @dev_priv: the i915 device * @reg: the register to read * @mask: mask to apply to register value * @value: expected value - * @timeout_ms: timeout in millisecond + * @fast_timeout_us: fast timeout in microsecond for atomic/tight wait + * @slow_timeout_ms: slow timeout in millisecond + * @out_value: optional placeholder to hold registry value * * This routine waits until the target register @reg contains the expected * @value after applying the @mask, i.e. it waits until :: * * (I915_READ_FW(reg) & mask) == value * - * Otherwise, the wait will timeout after @timeout_ms milliseconds. + * Otherwise, the wait will timeout after @slow_timeout_ms milliseconds. * * Note that this routine assumes the caller holds forcewake asserted, it is * not suitable for very long waits. See intel_wait_for_register() if you @@ -1606,16 +1608,26 @@ static int gen6_reset_engines(struct drm_i915_private *dev_priv, * * Returns 0 if the register matches the desired condition, or -ETIMEOUT. */ -int intel_wait_for_register_fw(struct drm_i915_private *dev_priv, - i915_reg_t reg, - const u32 mask, - const u32 value, - const unsigned int timeout_ms) -{ -#define done ((I915_READ_FW(reg) & mask) == value) - int ret = wait_for_us(done, 2); +int __intel_wait_for_register_fw(struct drm_i915_private *dev_priv, + i915_reg_t reg, + const u32 mask, + const u32 value, + const unsigned int fast_timeout_us, + const unsigned int slow_timeout_ms, + u32 *out_value) +{ + u32 reg_value; +#define done (((reg_value = I915_READ_FW(reg)) & mask) == value) + int ret; + + if (fast_timeout_us > 10) + ret = _wait_for(done, fast_timeout_us, 10); + else + ret = _wait_for_atomic(done, fast_timeout_us, 0); if (ret) - ret = wait_for(done, timeout_ms); + ret = wait_for(done, slow_timeout_ms); + if (out_value) + *out_value = reg_value; return ret; #undef done } -- cgit v1.1 From bea4e4a4f831df1c104be60b3caa7205ba1bb4f9 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Fri, 7 Apr 2017 16:01:45 +0000 Subject: drm/i915/guc: Use wait_for_register_fw() while waiting for MMIO response Waiting for the response status in scratch register can be done using our generic function. Let's use it. v2: rebased Signed-off-by: Michal Wajdeczko Suggested-by: Tvrtko Ursulin Cc: Tvrtko Ursulin Cc: Joonas Lahtinen Cc: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170407160145.181328-2-michal.wajdeczko@intel.com Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/intel_uc.c | 26 +++++++------------------- 1 file changed, 7 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c index c117424..4364b1a 100644 --- a/drivers/gpu/drm/i915/intel_uc.c +++ b/drivers/gpu/drm/i915/intel_uc.c @@ -360,19 +360,6 @@ void intel_uc_fini_hw(struct drm_i915_private *dev_priv) } /* - * Read GuC command/status register (SOFT_SCRATCH_0) - * Return true if it contains a response rather than a command - */ -static bool guc_recv(struct intel_guc *guc, u32 *status) -{ - struct drm_i915_private *dev_priv = guc_to_i915(guc); - - u32 val = I915_READ(SOFT_SCRATCH(0)); - *status = val; - return INTEL_GUC_RECV_IS_RESPONSE(val); -} - -/* * This function implements the MMIO based host to GuC interface. */ int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len) @@ -399,13 +386,14 @@ int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len) I915_WRITE(GUC_SEND_INTERRUPT, GUC_SEND_TRIGGER); /* - * Fast commands should complete in less than 10us, so sample quickly - * up to that length of time, then switch to a slower sleep-wait loop. - * No inte_guc_send command should ever take longer than 10ms. + * No GuC command should ever take longer than 10ms. + * Fast commands should still complete in 10us. */ - ret = wait_for_us(guc_recv(guc, &status), 10); - if (ret) - ret = wait_for(guc_recv(guc, &status), 10); + ret = __intel_wait_for_register_fw(dev_priv, + SOFT_SCRATCH(0), + INTEL_GUC_RECV_MASK, + INTEL_GUC_RECV_MASK, + 10, 10, &status); if (status != INTEL_GUC_STATUS_SUCCESS) { /* * Either the GuC explicitly returned an error (which -- cgit v1.1 From bcc36d8a4f1eb91a05655471f0c8cf2b5be07b16 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 7 Apr 2017 20:42:20 +0100 Subject: drm/i915: Use drm_i915_private directly from debugfs The void *data passed to debugfs callbacks is actually the drm_i915_private pointer, so use it thusly and avoid the to_i915(dev) indirection. Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170407194220.821-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_debugfs.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index d689e51..870c470 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2568,8 +2568,7 @@ static int i915_guc_log_dump(struct seq_file *m, void *data) static int i915_guc_log_control_get(void *data, u64 *val) { - struct drm_device *dev = data; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = data; if (!dev_priv->guc.log.vma) return -EINVAL; @@ -2581,14 +2580,13 @@ static int i915_guc_log_control_get(void *data, u64 *val) static int i915_guc_log_control_set(void *data, u64 val) { - struct drm_device *dev = data; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = data; int ret; if (!dev_priv->guc.log.vma) return -EINVAL; - ret = mutex_lock_interruptible(&dev->struct_mutex); + ret = mutex_lock_interruptible(&dev_priv->drm.struct_mutex); if (ret) return ret; @@ -2596,7 +2594,7 @@ static int i915_guc_log_control_set(void *data, u64 val) ret = i915_guc_log_control(dev_priv, val); intel_runtime_pm_put(dev_priv); - mutex_unlock(&dev->struct_mutex); + mutex_unlock(&dev_priv->drm.struct_mutex); return ret; } -- cgit v1.1 From 3fc7d86b3268af92181b517ca832258809b2d771 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Mon, 10 Apr 2017 09:38:17 +0000 Subject: drm/i915: Drop const qualifiers from params in wait_for_register() These params are passed by value, const qualifiers are ignored any way. While around, unify timeout_ms type from long to int. Signed-off-by: Michal Wajdeczko Suggested-by: Joonas Lahtinen Cc: Joonas Lahtinen Cc: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170410093817.151280-1-michal.wajdeczko@intel.com Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_drv.h | 20 ++++++++++---------- drivers/gpu/drm/i915/intel_uncore.c | 14 +++++++------- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index bb6fc1e..ed079c2 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3085,22 +3085,22 @@ void assert_forcewakes_inactive(struct drm_i915_private *dev_priv); int intel_wait_for_register(struct drm_i915_private *dev_priv, i915_reg_t reg, - const u32 mask, - const u32 value, - const unsigned long timeout_ms); + u32 mask, + u32 value, + unsigned int timeout_ms); int __intel_wait_for_register_fw(struct drm_i915_private *dev_priv, i915_reg_t reg, - const u32 mask, - const u32 value, - const unsigned int fast_timeout_us, - const unsigned int slow_timeout_ms, + u32 mask, + u32 value, + unsigned int fast_timeout_us, + unsigned int slow_timeout_ms, u32 *out_value); static inline int intel_wait_for_register_fw(struct drm_i915_private *dev_priv, i915_reg_t reg, - const u32 mask, - const u32 value, - const unsigned int timeout_ms) + u32 mask, + u32 value, + unsigned int timeout_ms) { return __intel_wait_for_register_fw(dev_priv, reg, mask, value, 2, timeout_ms, NULL); diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index ace0993..1deb1a4 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -1610,10 +1610,10 @@ static int gen6_reset_engines(struct drm_i915_private *dev_priv, */ int __intel_wait_for_register_fw(struct drm_i915_private *dev_priv, i915_reg_t reg, - const u32 mask, - const u32 value, - const unsigned int fast_timeout_us, - const unsigned int slow_timeout_ms, + u32 mask, + u32 value, + unsigned int fast_timeout_us, + unsigned int slow_timeout_ms, u32 *out_value) { u32 reg_value; @@ -1651,9 +1651,9 @@ int __intel_wait_for_register_fw(struct drm_i915_private *dev_priv, */ int intel_wait_for_register(struct drm_i915_private *dev_priv, i915_reg_t reg, - const u32 mask, - const u32 value, - const unsigned long timeout_ms) + u32 mask, + u32 value, + unsigned int timeout_ms) { unsigned fw = -- cgit v1.1 From 6976e74b5fa12430b36c3b0f8afd7e8e695be0ab Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Mon, 10 Apr 2017 12:17:47 +0000 Subject: drm/i915: Don't allow overuse of __intel_wait_for_register_fw() This function should not be called with long timeouts in atomic context. Annotate it as might_sleep if timeout is longer than 10us. v2: fix comment (Michal) Signed-off-by: Michal Wajdeczko Suggested-by: Chris Wilson Cc: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170410121747.209200-1-michal.wajdeczko@intel.com Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/intel_uncore.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 1deb1a4..eb38392 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -1600,6 +1600,8 @@ static int gen6_reset_engines(struct drm_i915_private *dev_priv, * (I915_READ_FW(reg) & mask) == value * * Otherwise, the wait will timeout after @slow_timeout_ms milliseconds. + * For atomic context @slow_timeout_ms must be zero and @fast_timeout_us + * must be not larger than 10 microseconds. * * Note that this routine assumes the caller holds forcewake asserted, it is * not suitable for very long waits. See intel_wait_for_register() if you @@ -1620,6 +1622,9 @@ int __intel_wait_for_register_fw(struct drm_i915_private *dev_priv, #define done (((reg_value = I915_READ_FW(reg)) & mask) == value) int ret; + /* Catch any overuse of this function */ + might_sleep_if(fast_timeout_us > 10 || slow_timeout_ms); + if (fast_timeout_us > 10) ret = _wait_for(done, fast_timeout_us, 10); else -- cgit v1.1 From 84d84cb7e20d3d2c6413c5f75634e88eb68a3f97 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 11 Apr 2017 12:27:05 +0100 Subject: drm/i915: Stop second guessing the caller for intel_uncore_wait_for_register() Allow the caller to use the fast_timeout_us to specify how long to wait within the atomic section, rather than transparently switching to a sleeping loop for larger values. This is required as some callsites may need a long wait and are in an atomic section. v2: Reinforce kerneldoc fast_timeout_us limit with a GEM_BUG_ON Signed-off-by: Chris Wilson Cc: Michal Wajdeczko Cc: Joonas Lahtinen Cc: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170411112705.12656-1-chris@chris-wilson.co.uk Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/intel_uncore.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index eb38392..dded42d 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -1601,7 +1601,7 @@ static int gen6_reset_engines(struct drm_i915_private *dev_priv, * * Otherwise, the wait will timeout after @slow_timeout_ms milliseconds. * For atomic context @slow_timeout_ms must be zero and @fast_timeout_us - * must be not larger than 10 microseconds. + * must be not larger than 20,0000 microseconds. * * Note that this routine assumes the caller holds forcewake asserted, it is * not suitable for very long waits. See intel_wait_for_register() if you @@ -1623,16 +1623,18 @@ int __intel_wait_for_register_fw(struct drm_i915_private *dev_priv, int ret; /* Catch any overuse of this function */ - might_sleep_if(fast_timeout_us > 10 || slow_timeout_ms); + might_sleep_if(slow_timeout_ms); + GEM_BUG_ON(fast_timeout_us > 20000); - if (fast_timeout_us > 10) - ret = _wait_for(done, fast_timeout_us, 10); - else + ret = -ETIMEDOUT; + if (fast_timeout_us && fast_timeout_us <= 20000) ret = _wait_for_atomic(done, fast_timeout_us, 0); if (ret) ret = wait_for(done, slow_timeout_ms); + if (out_value) *out_value = reg_value; + return ret; #undef done } -- cgit v1.1 From 02b312d05d1b51008e37f72f5a30c88e11c51ea5 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 11 Apr 2017 11:13:37 +0100 Subject: drm/i915: Stop sleeping from inside gen6_bsd_submit_request() submit_request() is called from an atomic context, it's not allowed to sleep. We have to be careful in our parameters to intel_uncore_wait_for_register() to limit ourselves to the atomic wait loop and not incur the wrath of our warnings. Fixes: 6976e74b5fa1 ("drm/i915: Don't allow overuse of __intel_wait_for_register_fw()") Signed-off-by: Chris Wilson Cc: Michal Wajdeczko Cc: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170410143807.22725-1-chris@chris-wilson.co.uk Link: http://patchwork.freedesktop.org/patch/msgid/20170411101340.31994-2-chris@chris-wilson.co.uk Reviewed-by: Michal Wajdeczko Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/intel_ringbuffer.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index c98acc2..331da59 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1729,11 +1729,11 @@ static void gen6_bsd_submit_request(struct drm_i915_gem_request *request) I915_WRITE64_FW(GEN6_BSD_RNCID, 0x0); /* Wait for the ring not to be idle, i.e. for it to wake up. */ - if (intel_wait_for_register_fw(dev_priv, - GEN6_BSD_SLEEP_PSMI_CONTROL, - GEN6_BSD_SLEEP_INDICATOR, - 0, - 50)) + if (__intel_wait_for_register_fw(dev_priv, + GEN6_BSD_SLEEP_PSMI_CONTROL, + GEN6_BSD_SLEEP_INDICATOR, + 0, + 1000, 0, NULL)) DRM_ERROR("timed out waiting for the BSD ring to wake up\n"); /* Now that the ring is fully powered up, update the tail */ -- cgit v1.1 From 0564654340e2776843ade007c1aaa8e8f30dd147 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 11 Apr 2017 11:13:38 +0100 Subject: drm/i915: Acquire uncore.lock over intel_uncore_wait_for_register() We acquire the forcewake and use I915_READ_FW instead for the atomic wait within intel_uncore_wait_for_register. However, this still leaves us vulnerable to concurrent mmio access to the register, which can cause system hangs on gen7. The protection is to acquire uncore.lock around each register, so lets add it back. v2: Wrap __intel_wait_for_register_fw() to re-use its atomic wait_for loop and spare adding another for ourselves. v3: Add might_sleep() annotation Signed-off-by: Chris Wilson Cc: Michal Wajdeczko Cc: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170411101340.31994-3-chris@chris-wilson.co.uk Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/intel_uncore.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index dded42d..fb38c76 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -1662,14 +1662,22 @@ int intel_wait_for_register(struct drm_i915_private *dev_priv, u32 value, unsigned int timeout_ms) { - unsigned fw = intel_uncore_forcewake_for_reg(dev_priv, reg, FW_REG_READ); int ret; - intel_uncore_forcewake_get(dev_priv, fw); - ret = wait_for_us((I915_READ_FW(reg) & mask) == value, 2); - intel_uncore_forcewake_put(dev_priv, fw); + might_sleep(); + + spin_lock_irq(&dev_priv->uncore.lock); + intel_uncore_forcewake_get__locked(dev_priv, fw); + + ret = __intel_wait_for_register_fw(dev_priv, + reg, mask, value, + 2, 0, NULL); + + intel_uncore_forcewake_put__locked(dev_priv, fw); + spin_unlock_irq(&dev_priv->uncore.lock); + if (ret) ret = wait_for((I915_READ_NOTRACE(reg) & mask) == value, timeout_ms); -- cgit v1.1 From e09a3036412a959689bacf017bf2cbc226c9fea4 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 11 Apr 2017 11:13:39 +0100 Subject: drm/i915: Use __intel_uncore_wait_for_register_fw for sandybride_pcode_read Since the sandybridge_pcode_read() may be called from skl_pcode_request() inside an atomic context (with preempt disabled), we should avoid hitting any sleeping paths. Currently is being called with a 500ms timeout, irrespective of being inside an atomic context or not. This is reduced down to 500us to play nice with the atomic context, and that appears to be sufficient to keep BAT happy (we have a DRM_ERROR should it timeout), i.e. we do not see any 500us pcode timeouts for normal use. So leave it as a pure spin without having to introduce new code paths to separate atomic/normal contexts. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170411101340.31994-4-chris@chris-wilson.co.uk Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/intel_pm.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 55e1e88..cacb65f 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -8135,9 +8135,9 @@ int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val I915_WRITE_FW(GEN6_PCODE_DATA1, 0); I915_WRITE_FW(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox); - if (intel_wait_for_register_fw(dev_priv, - GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0, - 500)) { + if (__intel_wait_for_register_fw(dev_priv, + GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0, + 500, 0, NULL)) { DRM_ERROR("timeout waiting for pcode read (%d) to finish\n", mbox); return -ETIMEDOUT; } @@ -8180,9 +8180,9 @@ int sandybridge_pcode_write(struct drm_i915_private *dev_priv, I915_WRITE_FW(GEN6_PCODE_DATA1, 0); I915_WRITE_FW(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox); - if (intel_wait_for_register_fw(dev_priv, - GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0, - 500)) { + if (__intel_wait_for_register_fw(dev_priv, + GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0, + 500, 0, NULL)) { DRM_ERROR("timeout waiting for pcode write (%d) to finish\n", mbox); return -ETIMEDOUT; } -- cgit v1.1 From f42bb651d1e796ad6d86fc66d578abdee445696b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 11 Apr 2017 11:13:40 +0100 Subject: drm/i915: Use safer intel_uncore_wait_for_register in ring-init While we do hold the forcewake for legacy ringbuffer initialisation, we don't guard our access with the uncore.lock spinlock. In theory, we only initialise when no others should be accessing the same mmio cachelines, but in practice be safe as this is an infrequently used path and not worth risky micro-optimisations. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170411101340.31994-5-chris@chris-wilson.co.uk Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/intel_ringbuffer.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 331da59..97d5fcc 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -538,9 +538,9 @@ static int init_ring_common(struct intel_engine_cs *engine) I915_WRITE_CTL(engine, RING_CTL_SIZE(ring->size) | RING_VALID); /* If the head is still not zero, the ring is dead */ - if (intel_wait_for_register_fw(dev_priv, RING_CTL(engine->mmio_base), - RING_VALID, RING_VALID, - 50)) { + if (intel_wait_for_register(dev_priv, RING_CTL(engine->mmio_base), + RING_VALID, RING_VALID, + 50)) { DRM_ERROR("%s initialization failed " "ctl %08x (valid? %d) head %08x [%08x] tail %08x [%08x] start %08x [expected %08x]\n", engine->name, -- cgit v1.1 From 0908180b9a1cf8c5410d0c1151894fae710744dc Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Mon, 10 Apr 2017 07:34:29 -0700 Subject: drm/i915: Classify the engines in class + instance In such a way that vcs and vcs2 are just two different instances (0 and 1) of the same engine class (VIDEO_DECODE_CLASS). v2: Align the instance types (Tvrtko) v3: Don't use enums for bspec-defined stuff (Michal) Cc: Paulo Zanoni Cc: Rodrigo Vivi Cc: Chris Wilson Signed-off-by: Daniele Ceraolo Spurio Reviewed-by: Tvrtko Ursulin Reviewed-by: Michal Wajdeczko Signed-off-by: Oscar Mateo Link: http://patchwork.freedesktop.org/patch/msgid/1491834873-9345-2-git-send-email-oscar.mateo@intel.com Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_reg.h | 8 ++++++++ drivers/gpu/drm/i915/intel_engine_cs.c | 14 ++++++++++++++ drivers/gpu/drm/i915/intel_ringbuffer.h | 4 ++++ 3 files changed, 26 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 11b12f4..4c72ada 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -85,6 +85,14 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define VECS_HW 3 #define VCS2_HW 4 +/* Engine class */ + +#define RENDER_CLASS 0 +#define VIDEO_DECODE_CLASS 1 +#define VIDEO_ENHANCEMENT_CLASS 2 +#define COPY_ENGINE_CLASS 3 +#define OTHER_CLASS 4 + /* PCI config space */ #define MCHBAR_I915 0x44 diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 92f871c..bb22927 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -30,6 +30,8 @@ static const struct engine_info { const char *name; unsigned int exec_id; unsigned int hw_id; + u8 class; + u8 instance; u32 mmio_base; unsigned irq_shift; int (*init_legacy)(struct intel_engine_cs *engine); @@ -39,6 +41,8 @@ static const struct engine_info { .name = "rcs", .hw_id = RCS_HW, .exec_id = I915_EXEC_RENDER, + .class = RENDER_CLASS, + .instance = 0, .mmio_base = RENDER_RING_BASE, .irq_shift = GEN8_RCS_IRQ_SHIFT, .init_execlists = logical_render_ring_init, @@ -48,6 +52,8 @@ static const struct engine_info { .name = "bcs", .hw_id = BCS_HW, .exec_id = I915_EXEC_BLT, + .class = COPY_ENGINE_CLASS, + .instance = 0, .mmio_base = BLT_RING_BASE, .irq_shift = GEN8_BCS_IRQ_SHIFT, .init_execlists = logical_xcs_ring_init, @@ -57,6 +63,8 @@ static const struct engine_info { .name = "vcs", .hw_id = VCS_HW, .exec_id = I915_EXEC_BSD, + .class = VIDEO_DECODE_CLASS, + .instance = 0, .mmio_base = GEN6_BSD_RING_BASE, .irq_shift = GEN8_VCS1_IRQ_SHIFT, .init_execlists = logical_xcs_ring_init, @@ -66,6 +74,8 @@ static const struct engine_info { .name = "vcs2", .hw_id = VCS2_HW, .exec_id = I915_EXEC_BSD, + .class = VIDEO_DECODE_CLASS, + .instance = 1, .mmio_base = GEN8_BSD2_RING_BASE, .irq_shift = GEN8_VCS2_IRQ_SHIFT, .init_execlists = logical_xcs_ring_init, @@ -75,6 +85,8 @@ static const struct engine_info { .name = "vecs", .hw_id = VECS_HW, .exec_id = I915_EXEC_VEBOX, + .class = VIDEO_ENHANCEMENT_CLASS, + .instance = 0, .mmio_base = VEBOX_RING_BASE, .irq_shift = GEN8_VECS_IRQ_SHIFT, .init_execlists = logical_xcs_ring_init, @@ -101,6 +113,8 @@ intel_engine_setup(struct drm_i915_private *dev_priv, engine->hw_id = engine->guc_id = info->hw_id; engine->mmio_base = info->mmio_base; engine->irq_shift = info->irq_shift; + engine->class = info->class; + engine->instance = info->instance; /* Nothing to do here, execute in order of dependencies */ engine->schedule = NULL; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index cbe61d3..f54fe7d 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -193,6 +193,10 @@ struct intel_engine_cs { enum intel_engine_id id; unsigned int exec_id; unsigned int hw_id; + + u8 class; + u8 instance; + unsigned int guc_id; u32 mmio_base; unsigned int irq_shift; -- cgit v1.1 From 5ff36d36a36d55c8bd4bc937b6a9450b72853c5f Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Mon, 10 Apr 2017 07:34:30 -0700 Subject: drm/i915: Use the same vfunc for BSD2 ring init If we needed to do something different for the init functions, we could always look at the engine instance to make the distinction. But, in any case, the two functions are virtually identical already (please notice that BSD2_RING is only used from gen8 onwards). With this, the init functions depends excusively on the engine class (a fact that we will use soon). v2: Commit message Cc: Paulo Zanoni Cc: Rodrigo Vivi Cc: Chris Wilson Cc: Daniele Ceraolo Spurio Signed-off-by: Oscar Mateo Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/1491834873-9345-3-git-send-email-oscar.mateo@intel.com Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/intel_engine_cs.c | 2 +- drivers/gpu/drm/i915/intel_ringbuffer.c | 14 -------------- drivers/gpu/drm/i915/intel_ringbuffer.h | 1 - 3 files changed, 1 insertion(+), 16 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index bb22927..a7ffa4c 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -79,7 +79,7 @@ static const struct engine_info { .mmio_base = GEN8_BSD2_RING_BASE, .irq_shift = GEN8_VCS2_IRQ_SHIFT, .init_execlists = logical_xcs_ring_init, - .init_legacy = intel_init_bsd2_ring_buffer, + .init_legacy = intel_init_bsd_ring_buffer, }, [VECS] = { .name = "vecs", diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 97d5fcc..833740b 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2175,20 +2175,6 @@ int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine) return intel_init_ring_buffer(engine); } -/** - * Initialize the second BSD ring (eg. Broadwell GT3, Skylake GT3) - */ -int intel_init_bsd2_ring_buffer(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - - intel_ring_default_vfuncs(dev_priv, engine); - - engine->emit_flush = gen6_bsd_ring_flush; - - return intel_init_ring_buffer(engine); -} - int intel_init_blt_ring_buffer(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index f54fe7d..8b53ddb 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -555,7 +555,6 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine); int intel_init_render_ring_buffer(struct intel_engine_cs *engine); int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine); -int intel_init_bsd2_ring_buffer(struct intel_engine_cs *engine); int intel_init_blt_ring_buffer(struct intel_engine_cs *engine); int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine); -- cgit v1.1 From 6e516148f35401029ac920e323cce34266c22e6d Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Mon, 10 Apr 2017 07:34:31 -0700 Subject: drm/i915: Generate the engine name based on the instance number Not really needed, but makes the next change a little bit more compact. v2: - Use zero-based numbering for engine names: xcs0, xcs1.. xcsN (Tvrtko, Chris) - Make sure the mock engine name is null-terminated (Tvrtko, Chris) v3: Because I'm stupid (Chris) v4: Verify engine name wasn't truncated (Michal) v5: - Kill the warning in mock engine (Chris) Cc: Paulo Zanoni Cc: Rodrigo Vivi Cc: Chris Wilson Cc: Daniele Ceraolo Spurio Reviewed-by: Tvrtko Ursulin Reviewed-by: Michal Wajdeczko Signed-off-by: Oscar Mateo Link: http://patchwork.freedesktop.org/patch/msgid/1491834873-9345-4-git-send-email-oscar.mateo@intel.com Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/intel_engine_cs.c | 5 +++-- drivers/gpu/drm/i915/intel_ringbuffer.h | 4 +++- drivers/gpu/drm/i915/selftests/mock_engine.c | 2 +- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index a7ffa4c..5e5cda0 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -71,7 +71,7 @@ static const struct engine_info { .init_legacy = intel_init_bsd_ring_buffer, }, [VCS2] = { - .name = "vcs2", + .name = "vcs", .hw_id = VCS2_HW, .exec_id = I915_EXEC_BSD, .class = VIDEO_DECODE_CLASS, @@ -108,7 +108,8 @@ intel_engine_setup(struct drm_i915_private *dev_priv, engine->id = id; engine->i915 = dev_priv; - engine->name = info->name; + WARN_ON(snprintf(engine->name, sizeof(engine->name), "%s%u", + info->name, info->instance) >= sizeof(engine->name)); engine->exec_id = info->exec_id; engine->hw_id = engine->guc_id = info->hw_id; engine->mmio_base = info->mmio_base; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 8b53ddb..fd8994b 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -187,9 +187,11 @@ enum intel_engine_id { VECS }; +#define INTEL_ENGINE_CS_MAX_NAME 8 + struct intel_engine_cs { struct drm_i915_private *i915; - const char *name; + char name[INTEL_ENGINE_CS_MAX_NAME]; enum intel_engine_id id; unsigned int exec_id; unsigned int hw_id; diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c index b89050e..b8e53bd 100644 --- a/drivers/gpu/drm/i915/selftests/mock_engine.c +++ b/drivers/gpu/drm/i915/selftests/mock_engine.c @@ -140,7 +140,7 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, /* minimal engine setup for requests */ engine->base.i915 = i915; - engine->base.name = name; + snprintf(engine->base.name, sizeof(engine->base.name), "%s", name); engine->base.id = id++; engine->base.status_page.page_addr = (void *)(engine + 1); -- cgit v1.1 From b8400f01fcbbacec274d89a1c41340d0409529bb Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Mon, 10 Apr 2017 07:34:32 -0700 Subject: drm/i915: Split the engine info table in two levels, using class + instance There are some properties that logically belong to the engine class, and some that belong to the engine instance. Make it explicit. v2: Commit message (Tvrtko) v3: - Rebased - Exec/uabi id should be per instance (Chris) v4: - Rebased - Avoid re-ordering fields for smaller diff (Tvrtko) - Bug on oob access to the class array (Michal) v5: Bug on the right thing (Michal) v6: Rebased Cc: Tvrtko Ursulin Cc: Paulo Zanoni Cc: Rodrigo Vivi Cc: Chris Wilson Cc: Daniele Ceraolo Spurio Reviewed-by: Michal Wajdeczko Signed-off-by: Oscar Mateo Link: http://patchwork.freedesktop.org/patch/msgid/1491834873-9345-5-git-send-email-oscar.mateo@intel.com Reviewed-by: Tvrtko Ursulin Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/intel_engine_cs.c | 66 ++++++++++++++++++++++------------ 1 file changed, 43 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 5e5cda0..058ecc0 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -26,71 +26,84 @@ #include "intel_ringbuffer.h" #include "intel_lrc.h" -static const struct engine_info { +struct engine_class_info { const char *name; - unsigned int exec_id; + int (*init_legacy)(struct intel_engine_cs *engine); + int (*init_execlists)(struct intel_engine_cs *engine); +}; + +static const struct engine_class_info intel_engine_classes[] = { + [RENDER_CLASS] = { + .name = "rcs", + .init_execlists = logical_render_ring_init, + .init_legacy = intel_init_render_ring_buffer, + }, + [COPY_ENGINE_CLASS] = { + .name = "bcs", + .init_execlists = logical_xcs_ring_init, + .init_legacy = intel_init_blt_ring_buffer, + }, + [VIDEO_DECODE_CLASS] = { + .name = "vcs", + .init_execlists = logical_xcs_ring_init, + .init_legacy = intel_init_bsd_ring_buffer, + }, + [VIDEO_ENHANCEMENT_CLASS] = { + .name = "vecs", + .init_execlists = logical_xcs_ring_init, + .init_legacy = intel_init_vebox_ring_buffer, + }, +}; + +struct engine_info { unsigned int hw_id; + unsigned int exec_id; u8 class; u8 instance; u32 mmio_base; unsigned irq_shift; - int (*init_legacy)(struct intel_engine_cs *engine); - int (*init_execlists)(struct intel_engine_cs *engine); -} intel_engines[] = { +}; + +static const struct engine_info intel_engines[] = { [RCS] = { - .name = "rcs", .hw_id = RCS_HW, .exec_id = I915_EXEC_RENDER, .class = RENDER_CLASS, .instance = 0, .mmio_base = RENDER_RING_BASE, .irq_shift = GEN8_RCS_IRQ_SHIFT, - .init_execlists = logical_render_ring_init, - .init_legacy = intel_init_render_ring_buffer, }, [BCS] = { - .name = "bcs", .hw_id = BCS_HW, .exec_id = I915_EXEC_BLT, .class = COPY_ENGINE_CLASS, .instance = 0, .mmio_base = BLT_RING_BASE, .irq_shift = GEN8_BCS_IRQ_SHIFT, - .init_execlists = logical_xcs_ring_init, - .init_legacy = intel_init_blt_ring_buffer, }, [VCS] = { - .name = "vcs", .hw_id = VCS_HW, .exec_id = I915_EXEC_BSD, .class = VIDEO_DECODE_CLASS, .instance = 0, .mmio_base = GEN6_BSD_RING_BASE, .irq_shift = GEN8_VCS1_IRQ_SHIFT, - .init_execlists = logical_xcs_ring_init, - .init_legacy = intel_init_bsd_ring_buffer, }, [VCS2] = { - .name = "vcs", .hw_id = VCS2_HW, .exec_id = I915_EXEC_BSD, .class = VIDEO_DECODE_CLASS, .instance = 1, .mmio_base = GEN8_BSD2_RING_BASE, .irq_shift = GEN8_VCS2_IRQ_SHIFT, - .init_execlists = logical_xcs_ring_init, - .init_legacy = intel_init_bsd_ring_buffer, }, [VECS] = { - .name = "vecs", .hw_id = VECS_HW, .exec_id = I915_EXEC_VEBOX, .class = VIDEO_ENHANCEMENT_CLASS, .instance = 0, .mmio_base = VEBOX_RING_BASE, .irq_shift = GEN8_VECS_IRQ_SHIFT, - .init_execlists = logical_xcs_ring_init, - .init_legacy = intel_init_vebox_ring_buffer, }, }; @@ -99,8 +112,12 @@ intel_engine_setup(struct drm_i915_private *dev_priv, enum intel_engine_id id) { const struct engine_info *info = &intel_engines[id]; + const struct engine_class_info *class_info; struct intel_engine_cs *engine; + GEM_BUG_ON(info->class >= ARRAY_SIZE(intel_engine_classes)); + class_info = &intel_engine_classes[info->class]; + GEM_BUG_ON(dev_priv->engine[id]); engine = kzalloc(sizeof(*engine), GFP_KERNEL); if (!engine) @@ -109,7 +126,8 @@ intel_engine_setup(struct drm_i915_private *dev_priv, engine->id = id; engine->i915 = dev_priv; WARN_ON(snprintf(engine->name, sizeof(engine->name), "%s%u", - info->name, info->instance) >= sizeof(engine->name)); + class_info->name, info->instance) >= + sizeof(engine->name)); engine->exec_id = info->exec_id; engine->hw_id = engine->guc_id = info->hw_id; engine->mmio_base = info->mmio_base; @@ -190,12 +208,14 @@ int intel_engines_init(struct drm_i915_private *dev_priv) int err = 0; for_each_engine(engine, dev_priv, id) { + const struct engine_class_info *class_info = + &intel_engine_classes[engine->class]; int (*init)(struct intel_engine_cs *engine); if (i915.enable_execlists) - init = intel_engines[id].init_execlists; + init = class_info->init_execlists; else - init = intel_engines[id].init_legacy; + init = class_info->init_legacy; if (!init) { kfree(engine); dev_priv->engine[id] = NULL; -- cgit v1.1 From 1d39f28170cb95e8b9eb9833d1c17528f400f9c4 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 11 Apr 2017 13:43:06 +0100 Subject: drm/i915: Rename intel_engine_cs.exec_id to uabi_id We want to refer to the index of the engine consistently throughout the userspace ABI. We already have such an index through the execbuffer engine specifier, that needs to be able to refer to each engine specifically, so rename it the index to uabi_id to reflect its generality beyond execbuf. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170411124306.15448-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_cmd_parser.c | 8 ++++---- drivers/gpu/drm/i915/i915_gem.c | 2 +- drivers/gpu/drm/i915/intel_engine_cs.c | 14 +++++++------- drivers/gpu/drm/i915/intel_ringbuffer.h | 2 +- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 7af100f..2a1a334 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -1166,8 +1166,8 @@ static bool check_cmd(const struct intel_engine_cs *engine, find_reg(engine, is_master, reg_addr); if (!reg) { - DRM_DEBUG_DRIVER("CMD: Rejected register 0x%08X in command: 0x%08X (exec_id=%d)\n", - reg_addr, *cmd, engine->exec_id); + DRM_DEBUG_DRIVER("CMD: Rejected register 0x%08X in command: 0x%08X (%s)\n", + reg_addr, *cmd, engine->name); return false; } @@ -1222,11 +1222,11 @@ static bool check_cmd(const struct intel_engine_cs *engine, desc->bits[i].mask; if (dword != desc->bits[i].expected) { - DRM_DEBUG_DRIVER("CMD: Rejected command 0x%08X for bitmask 0x%08X (exp=0x%08X act=0x%08X) (exec_id=%d)\n", + DRM_DEBUG_DRIVER("CMD: Rejected command 0x%08X for bitmask 0x%08X (exp=0x%08X act=0x%08X) (%s)\n", *cmd, desc->bits[i].mask, desc->bits[i].expected, - dword, engine->exec_id); + dword, engine->name); return false; } } diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index b210acc..cb8c6a9 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3996,7 +3996,7 @@ __busy_set_if_active(const struct dma_fence *fence, if (i915_gem_request_completed(rq)) return 0; - return flag(rq->engine->exec_id); + return flag(rq->engine->uabi_id); } static __always_inline unsigned int diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 058ecc0..71e89a9 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -57,7 +57,7 @@ static const struct engine_class_info intel_engine_classes[] = { struct engine_info { unsigned int hw_id; - unsigned int exec_id; + unsigned int uabi_id; u8 class; u8 instance; u32 mmio_base; @@ -67,7 +67,7 @@ struct engine_info { static const struct engine_info intel_engines[] = { [RCS] = { .hw_id = RCS_HW, - .exec_id = I915_EXEC_RENDER, + .uabi_id = I915_EXEC_RENDER, .class = RENDER_CLASS, .instance = 0, .mmio_base = RENDER_RING_BASE, @@ -75,7 +75,7 @@ static const struct engine_info intel_engines[] = { }, [BCS] = { .hw_id = BCS_HW, - .exec_id = I915_EXEC_BLT, + .uabi_id = I915_EXEC_BLT, .class = COPY_ENGINE_CLASS, .instance = 0, .mmio_base = BLT_RING_BASE, @@ -83,7 +83,7 @@ static const struct engine_info intel_engines[] = { }, [VCS] = { .hw_id = VCS_HW, - .exec_id = I915_EXEC_BSD, + .uabi_id = I915_EXEC_BSD, .class = VIDEO_DECODE_CLASS, .instance = 0, .mmio_base = GEN6_BSD_RING_BASE, @@ -91,7 +91,7 @@ static const struct engine_info intel_engines[] = { }, [VCS2] = { .hw_id = VCS2_HW, - .exec_id = I915_EXEC_BSD, + .uabi_id = I915_EXEC_BSD, .class = VIDEO_DECODE_CLASS, .instance = 1, .mmio_base = GEN8_BSD2_RING_BASE, @@ -99,7 +99,7 @@ static const struct engine_info intel_engines[] = { }, [VECS] = { .hw_id = VECS_HW, - .exec_id = I915_EXEC_VEBOX, + .uabi_id = I915_EXEC_VEBOX, .class = VIDEO_ENHANCEMENT_CLASS, .instance = 0, .mmio_base = VEBOX_RING_BASE, @@ -128,7 +128,7 @@ intel_engine_setup(struct drm_i915_private *dev_priv, WARN_ON(snprintf(engine->name, sizeof(engine->name), "%s%u", class_info->name, info->instance) >= sizeof(engine->name)); - engine->exec_id = info->exec_id; + engine->uabi_id = info->uabi_id; engine->hw_id = engine->guc_id = info->hw_id; engine->mmio_base = info->mmio_base; engine->irq_shift = info->irq_shift; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index fd8994b..00d36aa4 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -193,7 +193,7 @@ struct intel_engine_cs { struct drm_i915_private *i915; char name[INTEL_ENGINE_CS_MAX_NAME]; enum intel_engine_id id; - unsigned int exec_id; + unsigned int uabi_id; unsigned int hw_id; u8 class; -- cgit v1.1 From a079d10812a3ed84a73d152522e658fe9f76526e Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 6 Apr 2017 16:44:09 +0300 Subject: drm/i915/dp: use the sink rates array for max sink rates MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Looking at DPCD DP_MAX_LINK_RATE may be completely bogus for eDP 1.4 which is allowed to use link rate select method and have 0 in max link rate. With this change, it makes sense to store the max rate as the actual rate rather than as a bw code. Cc: Manasi Navare Cc: Ville Syrjälä Reviewed-by: Ville Syrjälä Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/3e8baadb406d59f414cab36fed9f0b35d207fde5.1491485983.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_dp.c | 28 +++++++--------------------- drivers/gpu/drm/i915/intel_drv.h | 2 +- 2 files changed, 8 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index b38cba7..e4650f1 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -161,23 +161,9 @@ static void intel_dp_set_sink_rates(struct intel_dp *intel_dp) intel_dp->num_sink_rates = num_rates; } -static int -intel_dp_max_link_bw(struct intel_dp *intel_dp) +static int intel_dp_max_sink_rate(struct intel_dp *intel_dp) { - int max_link_bw = intel_dp->dpcd[DP_MAX_LINK_RATE]; - - switch (max_link_bw) { - case DP_LINK_BW_1_62: - case DP_LINK_BW_2_7: - case DP_LINK_BW_5_4: - break; - default: - WARN(1, "invalid max DP link bw val %x, using 1.62Gbps\n", - max_link_bw); - max_link_bw = DP_LINK_BW_1_62; - break; - } - return max_link_bw; + return intel_dp->sink_rates[intel_dp->num_sink_rates - 1]; } static u8 intel_dp_max_lane_count(struct intel_dp *intel_dp) @@ -301,7 +287,7 @@ static int intel_dp_rate_index(const int *rates, int len, int rate) static int intel_dp_common_rates(struct intel_dp *intel_dp, int *common_rates) { - int max_rate = drm_dp_bw_code_to_link_rate(intel_dp->max_sink_link_bw); + int max_rate = intel_dp->max_sink_link_rate; int i, common_len; common_len = intersect_rates(intel_dp->source_rates, @@ -339,10 +325,10 @@ int intel_dp_get_link_train_fallback_values(struct intel_dp *intel_dp, common_rates, link_rate); if (link_rate_index > 0) { - intel_dp->max_sink_link_bw = drm_dp_link_rate_to_bw_code(common_rates[link_rate_index - 1]); + intel_dp->max_sink_link_rate = common_rates[link_rate_index - 1]; intel_dp->max_sink_lane_count = lane_count; } else if (lane_count > 1) { - intel_dp->max_sink_link_bw = intel_dp_max_link_bw(intel_dp); + intel_dp->max_sink_link_rate = intel_dp_max_sink_rate(intel_dp); intel_dp->max_sink_lane_count = lane_count >> 1; } else { DRM_ERROR("Link Training Unsuccessful\n"); @@ -4652,8 +4638,8 @@ intel_dp_long_pulse(struct intel_connector *intel_connector) /* Set the max lane count for sink */ intel_dp->max_sink_lane_count = drm_dp_max_lane_count(intel_dp->dpcd); - /* Set the max link BW for sink */ - intel_dp->max_sink_link_bw = intel_dp_max_link_bw(intel_dp); + /* Set the max link rate for sink */ + intel_dp->max_sink_link_rate = intel_dp_max_sink_rate(intel_dp); intel_dp->reset_link_params = false; } diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 7bc0c25..92e353d 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -959,7 +959,7 @@ struct intel_dp { /* Max lane count for the sink as per DPCD registers */ uint8_t max_sink_lane_count; /* Max link BW for the sink as per DPCD registers */ - int max_sink_link_bw; + int max_sink_link_rate; /* sink or branch descriptor */ struct intel_dp_desc desc; struct drm_dp_aux aux; -- cgit v1.1 From 975ee5fca10b713aff92cee87e1789e5e2e6c1da Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 6 Apr 2017 16:44:10 +0300 Subject: drm/i915/dp: cache common rates with sink rates MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that source rates are static and sink rates are updated whenever DPCD is updated, we can do and cache the intersection of them whenever sink rates are updated. This reduces code complexity, as we don't have to keep calling the functions to intersect. We also get rid of several common rates arrays on stack. Limiting the common rates by a max link rate can be done by picking the first N elements of the cached common rates. v2: get rid of the local common_rates variable (Manasi) v3: don't clobber cached eDP rates on short pulse (Ville) Cc: Manasi Navare Cc: Ville Syrjälä Reviewed-by: Manasi Navare Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/e3b287e8cb6559b1f8fd4e80b78a8d22f1802eb7.1491485983.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_dp.c | 75 ++++++++++++++++++++++------------------ drivers/gpu/drm/i915/intel_drv.h | 3 ++ 2 files changed, 45 insertions(+), 33 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index e4650f1..1808af6 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -284,17 +284,29 @@ static int intel_dp_rate_index(const int *rates, int len, int rate) return -1; } -static int intel_dp_common_rates(struct intel_dp *intel_dp, - int *common_rates) +static void intel_dp_set_common_rates(struct intel_dp *intel_dp) { - int max_rate = intel_dp->max_sink_link_rate; - int i, common_len; + WARN_ON(!intel_dp->num_source_rates || !intel_dp->num_sink_rates); - common_len = intersect_rates(intel_dp->source_rates, - intel_dp->num_source_rates, - intel_dp->sink_rates, - intel_dp->num_sink_rates, - common_rates); + intel_dp->num_common_rates = intersect_rates(intel_dp->source_rates, + intel_dp->num_source_rates, + intel_dp->sink_rates, + intel_dp->num_sink_rates, + intel_dp->common_rates); + + /* Paranoia, there should always be something in common. */ + if (WARN_ON(intel_dp->num_common_rates == 0)) { + intel_dp->common_rates[0] = default_rates[0]; + intel_dp->num_common_rates = 1; + } +} + +/* get length of common rates potentially limited by max_rate */ +static int intel_dp_common_len_rate_limit(struct intel_dp *intel_dp, + int max_rate) +{ + const int *common_rates = intel_dp->common_rates; + int i, common_len = intel_dp->num_common_rates; /* Limit results by potentially reduced max rate */ for (i = 0; i < common_len; i++) { @@ -305,25 +317,23 @@ static int intel_dp_common_rates(struct intel_dp *intel_dp, return 0; } -static int intel_dp_link_rate_index(struct intel_dp *intel_dp, - int *common_rates, int link_rate) +static int intel_dp_link_rate_index(struct intel_dp *intel_dp, int link_rate) { int common_len; - common_len = intel_dp_common_rates(intel_dp, common_rates); + common_len = intel_dp_common_len_rate_limit(intel_dp, + intel_dp->max_sink_link_rate); - return intel_dp_rate_index(common_rates, common_len, link_rate); + return intel_dp_rate_index(intel_dp->common_rates, common_len, link_rate); } int intel_dp_get_link_train_fallback_values(struct intel_dp *intel_dp, int link_rate, uint8_t lane_count) { - int common_rates[DP_MAX_SUPPORTED_RATES]; + const int *common_rates = intel_dp->common_rates; int link_rate_index; - link_rate_index = intel_dp_link_rate_index(intel_dp, - common_rates, - link_rate); + link_rate_index = intel_dp_link_rate_index(intel_dp, link_rate); if (link_rate_index > 0) { intel_dp->max_sink_link_rate = common_rates[link_rate_index - 1]; intel_dp->max_sink_lane_count = lane_count; @@ -1506,8 +1516,6 @@ static void snprintf_int_array(char *str, size_t len, static void intel_dp_print_rates(struct intel_dp *intel_dp) { - int common_len; - int common_rates[DP_MAX_SUPPORTED_RATES]; char str[128]; /* FIXME: too big for stack? */ if ((drm_debug & DRM_UT_KMS) == 0) @@ -1521,8 +1529,8 @@ static void intel_dp_print_rates(struct intel_dp *intel_dp) intel_dp->sink_rates, intel_dp->num_sink_rates); DRM_DEBUG_KMS("sink rates: %s\n", str); - common_len = intel_dp_common_rates(intel_dp, common_rates); - snprintf_int_array(str, sizeof(str), common_rates, common_len); + snprintf_int_array(str, sizeof(str), + intel_dp->common_rates, intel_dp->num_common_rates); DRM_DEBUG_KMS("common rates: %s\n", str); } @@ -1560,14 +1568,14 @@ bool intel_dp_read_desc(struct intel_dp *intel_dp) int intel_dp_max_link_rate(struct intel_dp *intel_dp) { - int rates[DP_MAX_SUPPORTED_RATES] = {}; int len; - len = intel_dp_common_rates(intel_dp, rates); + len = intel_dp_common_len_rate_limit(intel_dp, + intel_dp->max_sink_link_rate); if (WARN_ON(len <= 0)) return 162000; - return rates[len - 1]; + return intel_dp->common_rates[len - 1]; } int intel_dp_rate_select(struct intel_dp *intel_dp, int rate) @@ -1636,11 +1644,11 @@ intel_dp_compute_config(struct intel_encoder *encoder, int link_rate_index; int bpp, mode_rate; int link_avail, link_clock; - int common_rates[DP_MAX_SUPPORTED_RATES] = {}; int common_len; uint8_t link_bw, rate_select; - common_len = intel_dp_common_rates(intel_dp, common_rates); + common_len = intel_dp_common_len_rate_limit(intel_dp, + intel_dp->max_sink_link_rate); /* No common link rates between source and sink */ WARN_ON(common_len <= 0); @@ -1678,7 +1686,6 @@ intel_dp_compute_config(struct intel_encoder *encoder, /* Use values requested by Compliance Test Request */ if (intel_dp->compliance.test_type == DP_TEST_LINK_TRAINING) { link_rate_index = intel_dp_link_rate_index(intel_dp, - common_rates, intel_dp->compliance.test_link_rate); if (link_rate_index >= 0) min_clock = max_clock = link_rate_index; @@ -1686,7 +1693,7 @@ intel_dp_compute_config(struct intel_encoder *encoder, } DRM_DEBUG_KMS("DP link computation with max lane count %i " "max bw %d pixel clock %iKHz\n", - max_lane_count, common_rates[max_clock], + max_lane_count, intel_dp->common_rates[max_clock], adjusted_mode->crtc_clock); /* Walk through all bpp values. Luckily they're all nicely spaced with 2 @@ -1722,7 +1729,7 @@ intel_dp_compute_config(struct intel_encoder *encoder, lane_count <= max_lane_count; lane_count <<= 1) { - link_clock = common_rates[clock]; + link_clock = intel_dp->common_rates[clock]; link_avail = intel_dp_max_data_rate(link_clock, lane_count); @@ -1754,7 +1761,7 @@ found: pipe_config->lane_count = lane_count; pipe_config->pipe_bpp = bpp; - pipe_config->port_clock = common_rates[clock]; + pipe_config->port_clock = intel_dp->common_rates[clock]; intel_dp_compute_rate(intel_dp, pipe_config->port_clock, &link_bw, &rate_select); @@ -3715,6 +3722,8 @@ intel_edp_init_dpcd(struct intel_dp *intel_dp) else intel_dp_set_sink_rates(intel_dp); + intel_dp_set_common_rates(intel_dp); + return true; } @@ -3726,8 +3735,10 @@ intel_dp_get_dpcd(struct intel_dp *intel_dp) return false; /* Don't clobber cached eDP rates. */ - if (!is_edp(intel_dp)) + if (!is_edp(intel_dp)) { intel_dp_set_sink_rates(intel_dp); + intel_dp_set_common_rates(intel_dp); + } if (drm_dp_dpcd_read(&intel_dp->aux, DP_SINK_COUNT, &intel_dp->sink_count, 1) < 0) @@ -3950,7 +3961,6 @@ static uint8_t intel_dp_autotest_link_training(struct intel_dp *intel_dp) { int status = 0; int min_lane_count = 1; - int common_rates[DP_MAX_SUPPORTED_RATES] = {}; int link_rate_index, test_link_rate; uint8_t test_lane_count, test_link_bw; /* (DP CTS 1.2) @@ -3979,7 +3989,6 @@ static uint8_t intel_dp_autotest_link_training(struct intel_dp *intel_dp) /* Validate the requested link rate */ test_link_rate = drm_dp_bw_code_to_link_rate(test_link_bw); link_rate_index = intel_dp_link_rate_index(intel_dp, - common_rates, test_link_rate); if (link_rate_index < 0) return DP_TEST_NAK; diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 92e353d..4a4bf9c 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -956,6 +956,9 @@ struct intel_dp { int num_sink_rates; int sink_rates[DP_MAX_SUPPORTED_RATES]; bool use_rate_select; + /* intersection of source and sink rates */ + int num_common_rates; + int common_rates[DP_MAX_SUPPORTED_RATES]; /* Max lane count for the sink as per DPCD registers */ uint8_t max_sink_lane_count; /* Max link BW for the sink as per DPCD registers */ -- cgit v1.1 From b1810a74a0513993e02ba13e60a29c5f01ea3bf0 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 6 Apr 2017 16:44:11 +0300 Subject: drm/i915/dp: do not limit rate seek when not needed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In link training fallback, we're trying to find a rate that we know is in a sorted array of common link rates. We don't need to limit the array using the max rate. For test request, the DP CTS doesn't say we should limit the rate based on earlier fallback. This lets us get rid of intel_dp_link_rate_index() and use intel_dp_rate_index() instead. Cc: Manasi Navare Cc: Ville Syrjälä Reviewed-by: Manasi Navare Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/33cab481a3228f31e938b5891a6285d892dcf272.1491485983.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_dp.c | 31 ++++++++++++------------------- 1 file changed, 12 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 1808af6..8c061c5 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -317,25 +317,16 @@ static int intel_dp_common_len_rate_limit(struct intel_dp *intel_dp, return 0; } -static int intel_dp_link_rate_index(struct intel_dp *intel_dp, int link_rate) -{ - int common_len; - - common_len = intel_dp_common_len_rate_limit(intel_dp, - intel_dp->max_sink_link_rate); - - return intel_dp_rate_index(intel_dp->common_rates, common_len, link_rate); -} - int intel_dp_get_link_train_fallback_values(struct intel_dp *intel_dp, int link_rate, uint8_t lane_count) { - const int *common_rates = intel_dp->common_rates; - int link_rate_index; + int index; - link_rate_index = intel_dp_link_rate_index(intel_dp, link_rate); - if (link_rate_index > 0) { - intel_dp->max_sink_link_rate = common_rates[link_rate_index - 1]; + index = intel_dp_rate_index(intel_dp->common_rates, + intel_dp->num_common_rates, + link_rate); + if (index > 0) { + intel_dp->max_sink_link_rate = intel_dp->common_rates[index - 1]; intel_dp->max_sink_lane_count = lane_count; } else if (lane_count > 1) { intel_dp->max_sink_link_rate = intel_dp_max_sink_rate(intel_dp); @@ -1685,8 +1676,9 @@ intel_dp_compute_config(struct intel_encoder *encoder, /* Use values requested by Compliance Test Request */ if (intel_dp->compliance.test_type == DP_TEST_LINK_TRAINING) { - link_rate_index = intel_dp_link_rate_index(intel_dp, - intel_dp->compliance.test_link_rate); + link_rate_index = intel_dp_rate_index(intel_dp->common_rates, + intel_dp->num_common_rates, + intel_dp->compliance.test_link_rate); if (link_rate_index >= 0) min_clock = max_clock = link_rate_index; min_lane_count = max_lane_count = intel_dp->compliance.test_lane_count; @@ -3988,8 +3980,9 @@ static uint8_t intel_dp_autotest_link_training(struct intel_dp *intel_dp) } /* Validate the requested link rate */ test_link_rate = drm_dp_bw_code_to_link_rate(test_link_bw); - link_rate_index = intel_dp_link_rate_index(intel_dp, - test_link_rate); + link_rate_index = intel_dp_rate_index(intel_dp->common_rates, + intel_dp->num_common_rates, + test_link_rate); if (link_rate_index < 0) return DP_TEST_NAK; -- cgit v1.1 From e6c0c64a291e20e34668b8878b34af78389f9da3 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 6 Apr 2017 16:44:12 +0300 Subject: drm/i915/dp: don't call the link parameters sink parameters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If we modify these on the fly depending on the link conditions, don't pretend they are sink properties. Some link vs. sink confusion still remains, but we'll take care of them in follow-up patches. Cc: Manasi Navare Cc: Ville Syrjälä Reviewed-by: Manasi Navare Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/3739b4fac502ebd1c6e075a62c1a195e4094eb16.1491485983.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_dp.c | 25 ++++++++++++------------- drivers/gpu/drm/i915/intel_drv.h | 8 ++++---- 2 files changed, 16 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 8c061c5..a0082a3 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -172,7 +172,7 @@ static u8 intel_dp_max_lane_count(struct intel_dp *intel_dp) u8 source_max, sink_max; source_max = intel_dig_port->max_lanes; - sink_max = intel_dp->max_sink_lane_count; + sink_max = intel_dp->max_link_lane_count; return min(source_max, sink_max); } @@ -326,11 +326,11 @@ int intel_dp_get_link_train_fallback_values(struct intel_dp *intel_dp, intel_dp->num_common_rates, link_rate); if (index > 0) { - intel_dp->max_sink_link_rate = intel_dp->common_rates[index - 1]; - intel_dp->max_sink_lane_count = lane_count; + intel_dp->max_link_rate = intel_dp->common_rates[index - 1]; + intel_dp->max_link_lane_count = lane_count; } else if (lane_count > 1) { - intel_dp->max_sink_link_rate = intel_dp_max_sink_rate(intel_dp); - intel_dp->max_sink_lane_count = lane_count >> 1; + intel_dp->max_link_rate = intel_dp_max_sink_rate(intel_dp); + intel_dp->max_link_lane_count = lane_count >> 1; } else { DRM_ERROR("Link Training Unsuccessful\n"); return -1; @@ -1561,8 +1561,7 @@ intel_dp_max_link_rate(struct intel_dp *intel_dp) { int len; - len = intel_dp_common_len_rate_limit(intel_dp, - intel_dp->max_sink_link_rate); + len = intel_dp_common_len_rate_limit(intel_dp, intel_dp->max_link_rate); if (WARN_ON(len <= 0)) return 162000; @@ -1639,7 +1638,7 @@ intel_dp_compute_config(struct intel_encoder *encoder, uint8_t link_bw, rate_select; common_len = intel_dp_common_len_rate_limit(intel_dp, - intel_dp->max_sink_link_rate); + intel_dp->max_link_rate); /* No common link rates between source and sink */ WARN_ON(common_len <= 0); @@ -3969,7 +3968,7 @@ static uint8_t intel_dp_autotest_link_training(struct intel_dp *intel_dp) test_lane_count &= DP_MAX_LANE_COUNT_MASK; /* Validate the requested lane count */ if (test_lane_count < min_lane_count || - test_lane_count > intel_dp->max_sink_lane_count) + test_lane_count > intel_dp->max_link_lane_count) return DP_TEST_NAK; status = drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_LINK_RATE, @@ -4637,11 +4636,11 @@ intel_dp_long_pulse(struct intel_connector *intel_connector) yesno(drm_dp_tps3_supported(intel_dp->dpcd))); if (intel_dp->reset_link_params) { - /* Set the max lane count for sink */ - intel_dp->max_sink_lane_count = drm_dp_max_lane_count(intel_dp->dpcd); + /* Set the max lane count for link */ + intel_dp->max_link_lane_count = drm_dp_max_lane_count(intel_dp->dpcd); - /* Set the max link rate for sink */ - intel_dp->max_sink_link_rate = intel_dp_max_sink_rate(intel_dp); + /* Set the max link rate for link */ + intel_dp->max_link_rate = intel_dp_max_sink_rate(intel_dp); intel_dp->reset_link_params = false; } diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 4a4bf9c..f97603b 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -959,10 +959,10 @@ struct intel_dp { /* intersection of source and sink rates */ int num_common_rates; int common_rates[DP_MAX_SUPPORTED_RATES]; - /* Max lane count for the sink as per DPCD registers */ - uint8_t max_sink_lane_count; - /* Max link BW for the sink as per DPCD registers */ - int max_sink_link_rate; + /* Max lane count for the current link */ + int max_link_lane_count; + /* Max rate for the current link */ + int max_link_rate; /* sink or branch descriptor */ struct intel_dp_desc desc; struct drm_dp_aux aux; -- cgit v1.1 From 540b0b7fe915858870be6cfe0fecd1fa85ccb4d6 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 6 Apr 2017 16:44:13 +0300 Subject: drm/i915/dp: add functions for max common link rate and lane count MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These are the theoretical maximums common for source and sink. These are the maximums we should start with. They may be degraded in case of link training failures, and the dynamic link values are stored separately. Cc: Manasi Navare Cc: Ville Syrjälä Reviewed-by: Manasi Navare Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/5088aca253c47dfa18251e1adb976aca1718f083.1491485983.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_dp.c | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index a0082a3..b3df208 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -161,22 +161,27 @@ static void intel_dp_set_sink_rates(struct intel_dp *intel_dp) intel_dp->num_sink_rates = num_rates; } -static int intel_dp_max_sink_rate(struct intel_dp *intel_dp) +/* Theoretical max between source and sink */ +static int intel_dp_max_common_rate(struct intel_dp *intel_dp) { - return intel_dp->sink_rates[intel_dp->num_sink_rates - 1]; + return intel_dp->common_rates[intel_dp->num_common_rates - 1]; } -static u8 intel_dp_max_lane_count(struct intel_dp *intel_dp) +/* Theoretical max between source and sink */ +static int intel_dp_max_common_lane_count(struct intel_dp *intel_dp) { struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - u8 source_max, sink_max; - - source_max = intel_dig_port->max_lanes; - sink_max = intel_dp->max_link_lane_count; + int source_max = intel_dig_port->max_lanes; + int sink_max = drm_dp_max_lane_count(intel_dp->dpcd); return min(source_max, sink_max); } +static int intel_dp_max_lane_count(struct intel_dp *intel_dp) +{ + return intel_dp->max_link_lane_count; +} + int intel_dp_link_required(int pixel_clock, int bpp) { @@ -329,7 +334,7 @@ int intel_dp_get_link_train_fallback_values(struct intel_dp *intel_dp, intel_dp->max_link_rate = intel_dp->common_rates[index - 1]; intel_dp->max_link_lane_count = lane_count; } else if (lane_count > 1) { - intel_dp->max_link_rate = intel_dp_max_sink_rate(intel_dp); + intel_dp->max_link_rate = intel_dp_max_common_rate(intel_dp); intel_dp->max_link_lane_count = lane_count >> 1; } else { DRM_ERROR("Link Training Unsuccessful\n"); @@ -4636,11 +4641,11 @@ intel_dp_long_pulse(struct intel_connector *intel_connector) yesno(drm_dp_tps3_supported(intel_dp->dpcd))); if (intel_dp->reset_link_params) { - /* Set the max lane count for link */ - intel_dp->max_link_lane_count = drm_dp_max_lane_count(intel_dp->dpcd); + /* Initial max link lane count */ + intel_dp->max_link_lane_count = intel_dp_max_common_lane_count(intel_dp); - /* Set the max link rate for link */ - intel_dp->max_link_rate = intel_dp_max_sink_rate(intel_dp); + /* Initial max link rate */ + intel_dp->max_link_rate = intel_dp_max_common_rate(intel_dp); intel_dp->reset_link_params = false; } -- cgit v1.1 From 3d65a735d8341830ef8ec57e290ed785b01085a1 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 6 Apr 2017 16:44:14 +0300 Subject: drm/i915/mst: use max link not sink lane count MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The source might not support as many lanes as the sink, or the link training might have failed at higher lane counts. Take these into account. Cc: Dhinakaran Pandiyan Cc: Manasi Navare Cc: Ville Syrjälä Reviewed-by: Manasi Navare Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/cf59530acafaf9258fb643d321ad251b44f34e29.1491485983.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_dp.c | 2 +- drivers/gpu/drm/i915/intel_dp_mst.c | 4 ++-- drivers/gpu/drm/i915/intel_drv.h | 1 + 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index b3df208..95f2278 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -177,7 +177,7 @@ static int intel_dp_max_common_lane_count(struct intel_dp *intel_dp) return min(source_max, sink_max); } -static int intel_dp_max_lane_count(struct intel_dp *intel_dp) +int intel_dp_max_lane_count(struct intel_dp *intel_dp) { return intel_dp->max_link_lane_count; } diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c index 094cbdc..40608101 100644 --- a/drivers/gpu/drm/i915/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/intel_dp_mst.c @@ -56,7 +56,7 @@ static bool intel_dp_mst_compute_config(struct intel_encoder *encoder, * for MST we always configure max link bw - the spec doesn't * seem to suggest we should do otherwise. */ - lane_count = drm_dp_max_lane_count(intel_dp->dpcd); + lane_count = intel_dp_max_lane_count(intel_dp); pipe_config->lane_count = lane_count; @@ -343,7 +343,7 @@ intel_dp_mst_mode_valid(struct drm_connector *connector, int max_rate, mode_rate, max_lanes, max_link_clock; max_link_clock = intel_dp_max_link_rate(intel_dp); - max_lanes = drm_dp_max_lane_count(intel_dp->dpcd); + max_lanes = intel_dp_max_lane_count(intel_dp); max_rate = intel_dp_max_data_rate(max_link_clock, max_lanes); mode_rate = intel_dp_link_required(mode->clock, bpp); diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index f97603b..5f6e1aa 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1503,6 +1503,7 @@ void intel_dp_add_properties(struct intel_dp *intel_dp, struct drm_connector *co void intel_dp_mst_suspend(struct drm_device *dev); void intel_dp_mst_resume(struct drm_device *dev); int intel_dp_max_link_rate(struct intel_dp *intel_dp); +int intel_dp_max_lane_count(struct intel_dp *intel_dp); int intel_dp_rate_select(struct intel_dp *intel_dp, int rate); void intel_dp_hot_plug(struct intel_encoder *intel_encoder); void intel_power_sequencer_reset(struct drm_i915_private *dev_priv); -- cgit v1.1 From ec990e21d7668e08f6f4bd9b73e9d40d04b04198 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 6 Apr 2017 16:44:15 +0300 Subject: drm/i915/dp: localize link rate index variable more MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Localize link_rate_index to the if block, and rename to just index to reduce indent. Cc: Manasi Navare Cc: Ville Syrjälä Reviewed-by: Manasi Navare Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/d348d990c96705427b93c1cac8c3e4447d06eebf.1491485983.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_dp.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 95f2278..6f74349 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1636,7 +1636,6 @@ intel_dp_compute_config(struct intel_encoder *encoder, /* Conveniently, the link BW constants become indices with a shift...*/ int min_clock = 0; int max_clock; - int link_rate_index; int bpp, mode_rate; int link_avail, link_clock; int common_len; @@ -1680,11 +1679,13 @@ intel_dp_compute_config(struct intel_encoder *encoder, /* Use values requested by Compliance Test Request */ if (intel_dp->compliance.test_type == DP_TEST_LINK_TRAINING) { - link_rate_index = intel_dp_rate_index(intel_dp->common_rates, - intel_dp->num_common_rates, - intel_dp->compliance.test_link_rate); - if (link_rate_index >= 0) - min_clock = max_clock = link_rate_index; + int index; + + index = intel_dp_rate_index(intel_dp->common_rates, + intel_dp->num_common_rates, + intel_dp->compliance.test_link_rate); + if (index >= 0) + min_clock = max_clock = index; min_lane_count = max_lane_count = intel_dp->compliance.test_lane_count; } DRM_DEBUG_KMS("DP link computation with max lane count %i " -- cgit v1.1 From 010b9b397b32e1fa2d2bd15ec521350a99ac4dc4 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 6 Apr 2017 16:44:16 +0300 Subject: drm/i915/dp: use readb and writeb calls for single byte DPCD access MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is what we have the readb and writeb variants for. Do some minor return value and variable cleanup while at it. Cc: Manasi Navare Cc: Ville Syrjälä Reviewed-by: Manasi Navare Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/fd8a8f110bcfdc73a8c9241e5f9d61f7dd7c9677.1491485983.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_dp.c | 37 +++++++++++++++++-------------------- 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 6f74349..81682fd 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -3661,9 +3661,9 @@ intel_edp_init_dpcd(struct intel_dp *intel_dp) uint8_t frame_sync_cap; dev_priv->psr.sink_support = true; - drm_dp_dpcd_read(&intel_dp->aux, - DP_SINK_DEVICE_AUX_FRAME_SYNC_CAP, - &frame_sync_cap, 1); + drm_dp_dpcd_readb(&intel_dp->aux, + DP_SINK_DEVICE_AUX_FRAME_SYNC_CAP, + &frame_sync_cap); dev_priv->psr.aux_frame_sync = frame_sync_cap ? true : false; /* PSR2 needs frame sync as well */ dev_priv->psr.psr2_support = dev_priv->psr.aux_frame_sync; @@ -3737,8 +3737,8 @@ intel_dp_get_dpcd(struct intel_dp *intel_dp) intel_dp_set_common_rates(intel_dp); } - if (drm_dp_dpcd_read(&intel_dp->aux, DP_SINK_COUNT, - &intel_dp->sink_count, 1) < 0) + if (drm_dp_dpcd_readb(&intel_dp->aux, DP_SINK_COUNT, + &intel_dp->sink_count) <= 0) return false; /* @@ -3775,7 +3775,7 @@ intel_dp_get_dpcd(struct intel_dp *intel_dp) static bool intel_dp_can_mst(struct intel_dp *intel_dp) { - u8 buf[1]; + u8 mstm_cap; if (!i915.enable_dp_mst) return false; @@ -3786,10 +3786,10 @@ intel_dp_can_mst(struct intel_dp *intel_dp) if (intel_dp->dpcd[DP_DPCD_REV] < 0x12) return false; - if (drm_dp_dpcd_read(&intel_dp->aux, DP_MSTM_CAP, buf, 1) != 1) + if (drm_dp_dpcd_readb(&intel_dp->aux, DP_MSTM_CAP, &mstm_cap) != 1) return false; - return buf[0] & DP_MST_CAP; + return mstm_cap & DP_MST_CAP; } static void @@ -3935,9 +3935,8 @@ stop: static bool intel_dp_get_sink_irq(struct intel_dp *intel_dp, u8 *sink_irq_vector) { - return drm_dp_dpcd_read(&intel_dp->aux, - DP_DEVICE_SERVICE_IRQ_VECTOR, - sink_irq_vector, 1) == 1; + return drm_dp_dpcd_readb(&intel_dp->aux, DP_DEVICE_SERVICE_IRQ_VECTOR, + sink_irq_vector) == 1; } static bool @@ -4000,13 +3999,13 @@ static uint8_t intel_dp_autotest_link_training(struct intel_dp *intel_dp) static uint8_t intel_dp_autotest_video_pattern(struct intel_dp *intel_dp) { uint8_t test_pattern; - uint16_t test_misc; + uint8_t test_misc; __be16 h_width, v_height; int status = 0; /* Read the TEST_PATTERN (DP CTS 3.1.5) */ - status = drm_dp_dpcd_read(&intel_dp->aux, DP_TEST_PATTERN, - &test_pattern, 1); + status = drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_PATTERN, + &test_pattern); if (status <= 0) { DRM_DEBUG_KMS("Test pattern read failed\n"); return DP_TEST_NAK; @@ -4028,8 +4027,8 @@ static uint8_t intel_dp_autotest_video_pattern(struct intel_dp *intel_dp) return DP_TEST_NAK; } - status = drm_dp_dpcd_read(&intel_dp->aux, DP_TEST_MISC0, - &test_misc, 1); + status = drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_MISC0, + &test_misc); if (status <= 0) { DRM_DEBUG_KMS("TEST MISC read failed\n"); return DP_TEST_NAK; @@ -4088,10 +4087,8 @@ static uint8_t intel_dp_autotest_edid(struct intel_dp *intel_dp) */ block += intel_connector->detect_edid->extensions; - if (!drm_dp_dpcd_write(&intel_dp->aux, - DP_TEST_EDID_CHECKSUM, - &block->checksum, - 1)) + if (drm_dp_dpcd_writeb(&intel_dp->aux, DP_TEST_EDID_CHECKSUM, + block->checksum) <= 0) DRM_DEBUG_KMS("Failed to write EDID checksum\n"); test_result = DP_TEST_ACK | DP_TEST_EDID_CHECKSUM_WRITE; -- cgit v1.1 From 27dbefb911f2650f473eb98b5de79e88f6559c64 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 6 Apr 2017 16:44:17 +0300 Subject: drm/i915/dp: read sink count to a temporary variable first MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Don't clobber intel_dp->sink_count with the raw value. Suggested-by: Ville Syrjälä Reviewed-by: Ville Syrjälä Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/37d3222115172922fcd5ab038238359935bd561f.1491485983.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_dp.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 81682fd..3c5c80d 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -3728,6 +3728,8 @@ intel_edp_init_dpcd(struct intel_dp *intel_dp) static bool intel_dp_get_dpcd(struct intel_dp *intel_dp) { + u8 sink_count; + if (!intel_dp_read_dpcd(intel_dp)) return false; @@ -3737,8 +3739,7 @@ intel_dp_get_dpcd(struct intel_dp *intel_dp) intel_dp_set_common_rates(intel_dp); } - if (drm_dp_dpcd_readb(&intel_dp->aux, DP_SINK_COUNT, - &intel_dp->sink_count) <= 0) + if (drm_dp_dpcd_readb(&intel_dp->aux, DP_SINK_COUNT, &sink_count) <= 0) return false; /* @@ -3746,7 +3747,7 @@ intel_dp_get_dpcd(struct intel_dp *intel_dp) * a member variable in intel_dp will track any changes * between short pulse interrupts. */ - intel_dp->sink_count = DP_GET_SINK_COUNT(intel_dp->sink_count); + intel_dp->sink_count = DP_GET_SINK_COUNT(sink_count); /* * SINK_COUNT == 0 and DOWNSTREAM_PORT_PRESENT == 1 implies that -- cgit v1.1 From 5f9be05432cb4c323967f6d71ce0ecc024a775c7 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 11 Apr 2017 17:56:58 +0100 Subject: drm/i915: Bail if we do not setup the RCS engine In places, we assume that RCS exists. This has been true forever, but let us catch this failure during bringup by adding an explicit check that we do have an RCS engine. v2: Make use of HAS_ENGINE (Tvrtko) Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170411165658.23828-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_engine_cs.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 71e89a9..15970f1 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -153,10 +153,10 @@ intel_engine_setup(struct drm_i915_private *dev_priv, int intel_engines_init_early(struct drm_i915_private *dev_priv) { struct intel_device_info *device_info = mkwrite_device_info(dev_priv); - unsigned int ring_mask = INTEL_INFO(dev_priv)->ring_mask; - unsigned int mask = 0; + const unsigned int ring_mask = INTEL_INFO(dev_priv)->ring_mask; struct intel_engine_cs *engine; enum intel_engine_id id; + unsigned int mask = 0; unsigned int i; int err; @@ -183,6 +183,12 @@ int intel_engines_init_early(struct drm_i915_private *dev_priv) if (WARN_ON(mask != ring_mask)) device_info->ring_mask = mask; + /* We always presume we have at least RCS available for later probing */ + if (WARN_ON(!HAS_ENGINE(dev_priv, RCS))) { + err = -ENODEV; + goto cleanup; + } + device_info->num_rings = hweight32(mask); return 0; -- cgit v1.1 From ddfb570c205446fc6dcfaff1efb5c0e5ca1b30d7 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Tue, 11 Apr 2017 03:11:12 -0700 Subject: drm/i915: Use the engine class to get the context size Technically speaking, the context size is per engine class, not per instance. v2: Add MISSING_CASE (Tvrtko) v3: Rebased v4: Restore the interface back to hiding the class lookup (Chris) Cc: Paulo Zanoni Cc: Rodrigo Vivi Cc: Chris Wilson Signed-off-by: Daniele Ceraolo Spurio Reviewed-by: Tvrtko Ursulin Signed-off-by: Oscar Mateo Link: http://patchwork.freedesktop.org/patch/msgid/1491905472-16189-1-git-send-email-oscar.mateo@intel.com Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/intel_lrc.c | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 0dc1cc4..711125a 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1923,21 +1923,30 @@ populate_lr_context(struct i915_gem_context *ctx, */ uint32_t intel_lr_context_size(struct intel_engine_cs *engine) { - int ret = 0; + struct drm_i915_private *dev_priv = engine->i915; + int ret; - WARN_ON(INTEL_GEN(engine->i915) < 8); + WARN_ON(INTEL_GEN(dev_priv) < 8); - switch (engine->id) { - case RCS: - if (INTEL_GEN(engine->i915) >= 9) + switch (engine->class) { + case RENDER_CLASS: + switch (INTEL_GEN(dev_priv)) { + default: + MISSING_CASE(INTEL_GEN(dev_priv)); + case 9: ret = GEN9_LR_CONTEXT_RENDER_SIZE; - else + break; + case 8: ret = GEN8_LR_CONTEXT_RENDER_SIZE; + break; + } break; - case VCS: - case BCS: - case VECS: - case VCS2: + + default: + MISSING_CASE(engine->class); + case VIDEO_DECODE_CLASS: + case VIDEO_ENHANCEMENT_CLASS: + case COPY_ENGINE_CLASS: ret = GEN8_LR_CONTEXT_OTHER_SIZE; break; } -- cgit v1.1 From a8e9a419c337a655e23b4bab422e85e47ee86c92 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 11 Apr 2017 20:00:42 +0100 Subject: drm/i915: Lie and treat all engines as idle if wedged Similar to commit 8490ae207f1d ("drm/i915: Suppress busy status for engines if wedged") we also want to report intel_engine_is_idle() as true as well as the main intel_engines_are_idle(), as we now check that the engines are idle when overwriting the HWS page. This is not true whilst we are setting the device as wedged, at least according to our bookkeeping, so we have to lie to ourselves! [ 383.588601] [drm:i915_reset [i915]] *ERROR* Failed to reset chip: -110 [ 383.588685] ------------[ cut here ]------------ [ 383.588755] WARNING: CPU: 0 PID: 12 at drivers/gpu/drm/i915/intel_engine_cs.c:226 intel_engine_init_global_seqno+0x222/0x290 [i915] [ 383.588757] WARN_ON(!intel_engine_is_idle(engine)) [ 383.588759] Modules linked in: ctr ccm snd_hda_codec_hdmi snd_hda_codec_conexant snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hda_core arc4 iwldvm mac80211 snd_pcm snd_hwdep snd_seq_midi snd_seq_midi_event rfcomm bnep snd_rawmidi intel_powerclamp coretemp dm_multipath iwlwifi crct10dif_pclmul snd_seq crc32_pclmul ghash_clmulni_intel btusb aesni_intel btrtl btbcm aes_x86_64 crypto_simd cryptd btintel snd_timer glue_helper bluetooth intel_ips snd_seq_device cfg80211 snd soundcore binfmt_misc mei_me mei dm_mirror dm_region_hash dm_log i915 intel_gtt i2c_algo_bit drm_kms_helper cfbfillrect syscopyarea cfbimgblt sysfillrect sysimgblt fb_sys_fops cfbcopyarea prime_numbers ahci libahci drm e1000e [ 383.588851] CPU: 0 PID: 12 Comm: migration/0 Not tainted 4.11.0-rc5+ #207 [ 383.588853] Hardware name: LENOVO 514328U/514328U, BIOS 6QET44WW (1.14 ) 04/20/2010 [ 383.588855] Call Trace: [ 383.588866] dump_stack+0x63/0x90 [ 383.588871] __warn+0xc7/0xf0 [ 383.588876] warn_slowpath_fmt+0x4a/0x50 [ 383.588883] ? set_next_entity+0x821/0x910 [ 383.588943] intel_engine_init_global_seqno+0x222/0x290 [i915] [ 383.588998] __i915_gem_set_wedged_BKL+0xa4/0x190 [i915] [ 383.589003] ? __switch_to+0x215/0x390 [ 383.589008] multi_cpu_stop+0xbb/0xe0 [ 383.589012] ? cpu_stop_queue_work+0x90/0x90 [ 383.589016] cpu_stopper_thread+0x82/0x110 [ 383.589021] smpboot_thread_fn+0x137/0x190 [ 383.589026] kthread+0xf7/0x130 [ 383.589030] ? sort_range+0x20/0x20 [ 383.589034] ? kthread_park+0x90/0x90 [ 383.589040] ret_from_fork+0x2c/0x40 Fixes: 2ca9faa551c4 ("drm/i915: Assert the engine is idle before overwiting the HWS") Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170411190042.25662-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_engine_cs.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 15970f1..ee87ca7 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1131,6 +1131,10 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; + /* More white lies, if wedged, hw state is inconsistent */ + if (i915_terminally_wedged(&dev_priv->gpu_error)) + return true; + /* Any inflight/incomplete requests? */ if (!i915_seqno_passed(intel_engine_get_seqno(engine), intel_engine_last_submit(engine))) -- cgit v1.1 From 48921260b7d030ec6a2e11a435eae28ddb88ea56 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 11 Apr 2017 18:58:50 +0100 Subject: drm/i915/execlists: Document runtime pm for intel_lrc_irq_handler() We indirectly hold the runtime-pm for the intel_lrc_irq_handler() by virtue of dev_priv->gt.awake keeping a wakeref whilst the requests are busy. As this is not obvious from the code, add a comment. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170411175850.2470-1-chris@chris-wilson.co.uk Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/intel_lrc.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 711125a..7df278f 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -515,6 +515,15 @@ static void intel_lrc_irq_handler(unsigned long data) struct execlist_port *port = engine->execlist_port; struct drm_i915_private *dev_priv = engine->i915; + /* We can skip acquiring intel_runtime_pm_get() here as it was taken + * on our behalf by the request (see i915_gem_mark_busy()) and it will + * not be relinquished until the device is idle (see + * i915_gem_idle_work_handler()). As a precaution, we make sure + * that all ELSP are drained i.e. we have processed the CSB, + * before allowing ourselves to idle and calling intel_runtime_pm_put(). + */ + GEM_BUG_ON(!dev_priv->gt.awake); + intel_uncore_forcewake_get(dev_priv, engine->fw_domains); /* Prefer doing test_and_clear_bit() as a two stage operation to avoid -- cgit v1.1 From 735a9876cf7f5ddda1027942344f31d3a1a4488a Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Mon, 10 Apr 2017 11:07:07 +0200 Subject: drm/i915: Remove unused members from intel_tv.c They have been unused since 2010, after the code for intel_tv_save/restore was removed in the below commit: commit 6443170f6d862a1cc89e61e4bb2410b714b875f4 Author: Eric Anholt Date: Fri Apr 2 15:24:27 2010 -0700 drm/i915: Remove dead KMS encoder save/restore code. This was brought over from UMS, and used for a while until we decided that drm_helper_resume_force_mode was easier and more reliable, since it didn't require duplicating all the code deleted here. We just forgot to delete all that junk for a while. Reviewed-by: Chris Wilson Reviewed-by: Daniel Vetter Signed-off-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/1491815239-10685-2-git-send-email-maarten.lankhorst@linux.intel.com [mlankhorst: Add commit blurb based on danvet's feedback.] --- drivers/gpu/drm/i915/intel_tv.c | 33 --------------------------------- 1 file changed, 33 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c index e077c2a..3af857a 100644 --- a/drivers/gpu/drm/i915/intel_tv.c +++ b/drivers/gpu/drm/i915/intel_tv.c @@ -50,39 +50,6 @@ struct intel_tv { int type; const char *tv_format; int margin[4]; - u32 save_TV_H_CTL_1; - u32 save_TV_H_CTL_2; - u32 save_TV_H_CTL_3; - u32 save_TV_V_CTL_1; - u32 save_TV_V_CTL_2; - u32 save_TV_V_CTL_3; - u32 save_TV_V_CTL_4; - u32 save_TV_V_CTL_5; - u32 save_TV_V_CTL_6; - u32 save_TV_V_CTL_7; - u32 save_TV_SC_CTL_1, save_TV_SC_CTL_2, save_TV_SC_CTL_3; - - u32 save_TV_CSC_Y; - u32 save_TV_CSC_Y2; - u32 save_TV_CSC_U; - u32 save_TV_CSC_U2; - u32 save_TV_CSC_V; - u32 save_TV_CSC_V2; - u32 save_TV_CLR_KNOBS; - u32 save_TV_CLR_LEVEL; - u32 save_TV_WIN_POS; - u32 save_TV_WIN_SIZE; - u32 save_TV_FILTER_CTL_1; - u32 save_TV_FILTER_CTL_2; - u32 save_TV_FILTER_CTL_3; - - u32 save_TV_H_LUMA[60]; - u32 save_TV_H_CHROMA[60]; - u32 save_TV_V_LUMA[43]; - u32 save_TV_V_CHROMA[43]; - - u32 save_TV_DAC; - u32 save_TV_CTL; }; struct video_levels { -- cgit v1.1 From 0e891b3f447f4d8af3010761fa0c358b057ae2e8 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Mon, 10 Apr 2017 11:07:08 +0200 Subject: drm/i915: Convert intel_tv connector properties to atomic, v5. intel_tv has properties that are handled in the atomic core, but needs a modeset to update the properties inside the connector. The detect(), get_mode() and mode_valid() probe callbacks also depend on the connector state, which made this a good connector to convert first. It helped find all the issues when converting connectors to atomic. Because of these requirements, connector atomic_check() was added and connection_mutex is held during probing. The diffstat looks more favorable now. :) Changes since v1: - Add intel_encoder->swap_state to allow updating connector state. - Add intel_tv->format for detect_mode and mode_valid, updated on atomic commit. Changes since v2: - Fix typo in tv_choose_preferred modes function name. - Assignment of tv properties is done in core, so intel_tv only needs a atomic_check function. Thanks Ville! Changes since v3: - connection_mutex is now held in mode_valid() and get_modes(), this removes the need for caching parts of the connector_state. Changes since v4: - Use the new atomic connector check function. Signed-off-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/1491815239-10685-3-git-send-email-maarten.lankhorst@linux.intel.com Reviewed-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_tv.c | 175 +++++++++++++++------------------------- 1 file changed, 63 insertions(+), 112 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c index 3af857a..784df02 100644 --- a/drivers/gpu/drm/i915/intel_tv.c +++ b/drivers/gpu/drm/i915/intel_tv.c @@ -48,8 +48,6 @@ struct intel_tv { struct intel_encoder base; int type; - const char *tv_format; - int margin[4]; }; struct video_levels { @@ -840,32 +838,18 @@ intel_disable_tv(struct intel_encoder *encoder, I915_WRITE(TV_CTL, I915_READ(TV_CTL) & ~TV_ENC_ENABLE); } -static const struct tv_mode * -intel_tv_mode_lookup(const char *tv_format) +static const struct tv_mode *intel_tv_mode_find(struct drm_connector_state *conn_state) { - int i; - - for (i = 0; i < ARRAY_SIZE(tv_modes); i++) { - const struct tv_mode *tv_mode = &tv_modes[i]; + int format = conn_state->tv.mode; - if (!strcmp(tv_format, tv_mode->name)) - return tv_mode; - } - return NULL; -} - -static const struct tv_mode * -intel_tv_mode_find(struct intel_tv *intel_tv) -{ - return intel_tv_mode_lookup(intel_tv->tv_format); + return &tv_modes[format]; } static enum drm_mode_status intel_tv_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { - struct intel_tv *intel_tv = intel_attached_tv(connector); - const struct tv_mode *tv_mode = intel_tv_mode_find(intel_tv); + const struct tv_mode *tv_mode = intel_tv_mode_find(connector->state); int max_dotclk = to_i915(connector->dev)->max_dotclk_freq; if (mode->clock > max_dotclk) @@ -892,8 +876,7 @@ intel_tv_compute_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config, struct drm_connector_state *conn_state) { - struct intel_tv *intel_tv = enc_to_tv(encoder); - const struct tv_mode *tv_mode = intel_tv_mode_find(intel_tv); + const struct tv_mode *tv_mode = intel_tv_mode_find(conn_state); if (!tv_mode) return false; @@ -999,7 +982,7 @@ static void intel_tv_pre_enable(struct intel_encoder *encoder, struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_crtc *intel_crtc = to_intel_crtc(encoder->base.crtc); struct intel_tv *intel_tv = enc_to_tv(encoder); - const struct tv_mode *tv_mode = intel_tv_mode_find(intel_tv); + const struct tv_mode *tv_mode = intel_tv_mode_find(conn_state); u32 tv_ctl; u32 scctl1, scctl2, scctl3; int i, j; @@ -1102,12 +1085,12 @@ static void intel_tv_pre_enable(struct intel_encoder *encoder, else ysize = 2*tv_mode->nbr_end + 1; - xpos += intel_tv->margin[TV_MARGIN_LEFT]; - ypos += intel_tv->margin[TV_MARGIN_TOP]; - xsize -= (intel_tv->margin[TV_MARGIN_LEFT] + - intel_tv->margin[TV_MARGIN_RIGHT]); - ysize -= (intel_tv->margin[TV_MARGIN_TOP] + - intel_tv->margin[TV_MARGIN_BOTTOM]); + xpos += conn_state->tv.margins.left; + ypos += conn_state->tv.margins.top; + xsize -= (conn_state->tv.margins.left + + conn_state->tv.margins.right); + ysize -= (conn_state->tv.margins.top + + conn_state->tv.margins.bottom); I915_WRITE(TV_WIN_POS, (xpos<<16)|ypos); I915_WRITE(TV_WIN_SIZE, (xsize<<16)|ysize); @@ -1255,7 +1238,7 @@ intel_tv_detect_type(struct intel_tv *intel_tv, static void intel_tv_find_better_format(struct drm_connector *connector) { struct intel_tv *intel_tv = intel_attached_tv(connector); - const struct tv_mode *tv_mode = intel_tv_mode_find(intel_tv); + const struct tv_mode *tv_mode = intel_tv_mode_find(connector->state); int i; if ((intel_tv->type == DRM_MODE_CONNECTOR_Component) == @@ -1271,9 +1254,7 @@ static void intel_tv_find_better_format(struct drm_connector *connector) break; } - intel_tv->tv_format = tv_mode->name; - drm_object_property_set_value(&connector->base, - connector->dev->mode_config.tv_mode_property, i); + connector->state->tv.mode = i; } /** @@ -1314,16 +1295,15 @@ intel_tv_detect(struct drm_connector *connector, connector_status_connected; } else status = connector_status_unknown; - } else - return connector->status; - if (status != connector_status_connected) - return status; - - intel_tv->type = type; - intel_tv_find_better_format(connector); + if (status == connector_status_connected) { + intel_tv->type = type; + intel_tv_find_better_format(connector); + } - return connector_status_connected; + return status; + } else + return connector->status; } static const struct input_res { @@ -1343,12 +1323,9 @@ static const struct input_res { * Chose preferred mode according to line number of TV format */ static void -intel_tv_chose_preferred_modes(struct drm_connector *connector, +intel_tv_choose_preferred_modes(const struct tv_mode *tv_mode, struct drm_display_mode *mode_ptr) { - struct intel_tv *intel_tv = intel_attached_tv(connector); - const struct tv_mode *tv_mode = intel_tv_mode_find(intel_tv); - if (tv_mode->nbr_end < 480 && mode_ptr->vdisplay == 480) mode_ptr->type |= DRM_MODE_TYPE_PREFERRED; else if (tv_mode->nbr_end > 480) { @@ -1371,8 +1348,7 @@ static int intel_tv_get_modes(struct drm_connector *connector) { struct drm_display_mode *mode_ptr; - struct intel_tv *intel_tv = intel_attached_tv(connector); - const struct tv_mode *tv_mode = intel_tv_mode_find(intel_tv); + const struct tv_mode *tv_mode = intel_tv_mode_find(connector->state); int j, count = 0; u64 tmp; @@ -1415,7 +1391,7 @@ intel_tv_get_modes(struct drm_connector *connector) mode_ptr->clock = (int) tmp; mode_ptr->type = DRM_MODE_TYPE_DRIVER; - intel_tv_chose_preferred_modes(connector, mode_ptr); + intel_tv_choose_preferred_modes(tv_mode, mode_ptr); drm_mode_probed_add(connector, mode_ptr); count++; } @@ -1430,74 +1406,47 @@ intel_tv_destroy(struct drm_connector *connector) kfree(connector); } - -static int -intel_tv_set_property(struct drm_connector *connector, struct drm_property *property, - uint64_t val) -{ - struct drm_device *dev = connector->dev; - struct intel_tv *intel_tv = intel_attached_tv(connector); - struct drm_crtc *crtc = intel_tv->base.base.crtc; - int ret = 0; - bool changed = false; - - ret = drm_object_property_set_value(&connector->base, property, val); - if (ret < 0) - goto out; - - if (property == dev->mode_config.tv_left_margin_property && - intel_tv->margin[TV_MARGIN_LEFT] != val) { - intel_tv->margin[TV_MARGIN_LEFT] = val; - changed = true; - } else if (property == dev->mode_config.tv_right_margin_property && - intel_tv->margin[TV_MARGIN_RIGHT] != val) { - intel_tv->margin[TV_MARGIN_RIGHT] = val; - changed = true; - } else if (property == dev->mode_config.tv_top_margin_property && - intel_tv->margin[TV_MARGIN_TOP] != val) { - intel_tv->margin[TV_MARGIN_TOP] = val; - changed = true; - } else if (property == dev->mode_config.tv_bottom_margin_property && - intel_tv->margin[TV_MARGIN_BOTTOM] != val) { - intel_tv->margin[TV_MARGIN_BOTTOM] = val; - changed = true; - } else if (property == dev->mode_config.tv_mode_property) { - if (val >= ARRAY_SIZE(tv_modes)) { - ret = -EINVAL; - goto out; - } - if (!strcmp(intel_tv->tv_format, tv_modes[val].name)) - goto out; - - intel_tv->tv_format = tv_modes[val].name; - changed = true; - } else { - ret = -EINVAL; - goto out; - } - - if (changed && crtc) - intel_crtc_restore_mode(crtc); -out: - return ret; -} - static const struct drm_connector_funcs intel_tv_connector_funcs = { .dpms = drm_atomic_helper_connector_dpms, .late_register = intel_connector_register, .early_unregister = intel_connector_unregister, .destroy = intel_tv_destroy, - .set_property = intel_tv_set_property, - .atomic_get_property = intel_connector_atomic_get_property, + .set_property = drm_atomic_helper_connector_set_property, .fill_modes = drm_helper_probe_single_connector_modes, .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, }; +static int intel_tv_atomic_check(struct drm_connector *connector, + struct drm_connector_state *new_state) +{ + struct drm_crtc_state *new_crtc_state; + struct drm_connector_state *old_state; + + if (!new_state->crtc) + return 0; + + old_state = drm_atomic_get_old_connector_state(new_state->state, connector); + new_crtc_state = drm_atomic_get_new_crtc_state(new_state->state, new_state->crtc); + + if (old_state->tv.mode != new_state->tv.mode || + old_state->tv.margins.left != new_state->tv.margins.left || + old_state->tv.margins.right != new_state->tv.margins.right || + old_state->tv.margins.top != new_state->tv.margins.top || + old_state->tv.margins.bottom != new_state->tv.margins.bottom) { + /* Force a modeset. */ + + new_crtc_state->connectors_changed = true; + } + + return 0; +} + static const struct drm_connector_helper_funcs intel_tv_connector_helper_funcs = { .detect_ctx = intel_tv_detect, .mode_valid = intel_tv_mode_valid, .get_modes = intel_tv_get_modes, + .atomic_check = intel_tv_atomic_check, }; static const struct drm_encoder_funcs intel_tv_enc_funcs = { @@ -1515,6 +1464,7 @@ intel_tv_init(struct drm_i915_private *dev_priv) u32 tv_dac_on, tv_dac_off, save_tv_dac; const char *tv_format_names[ARRAY_SIZE(tv_modes)]; int i, initial_mode = 0; + struct drm_connector_state *state; if ((I915_READ(TV_CTL) & TV_FUSE_STATE_MASK) == TV_FUSE_STATE_DISABLED) return; @@ -1560,6 +1510,7 @@ intel_tv_init(struct drm_i915_private *dev_priv) intel_encoder = &intel_tv->base; connector = &intel_connector->base; + state = connector->state; /* The documentation, for the older chipsets at least, recommend * using a polling method rather than hotplug detection for TVs. @@ -1597,12 +1548,12 @@ intel_tv_init(struct drm_i915_private *dev_priv) intel_tv->type = DRM_MODE_CONNECTOR_Unknown; /* BIOS margin values */ - intel_tv->margin[TV_MARGIN_LEFT] = 54; - intel_tv->margin[TV_MARGIN_TOP] = 36; - intel_tv->margin[TV_MARGIN_RIGHT] = 46; - intel_tv->margin[TV_MARGIN_BOTTOM] = 37; + state->tv.margins.left = 54; + state->tv.margins.top = 36; + state->tv.margins.right = 46; + state->tv.margins.bottom = 37; - intel_tv->tv_format = tv_modes[initial_mode].name; + state->tv.mode = initial_mode; drm_connector_helper_add(connector, &intel_tv_connector_helper_funcs); connector->interlace_allowed = false; @@ -1616,17 +1567,17 @@ intel_tv_init(struct drm_i915_private *dev_priv) tv_format_names); drm_object_attach_property(&connector->base, dev->mode_config.tv_mode_property, - initial_mode); + state->tv.mode); drm_object_attach_property(&connector->base, dev->mode_config.tv_left_margin_property, - intel_tv->margin[TV_MARGIN_LEFT]); + state->tv.margins.left); drm_object_attach_property(&connector->base, dev->mode_config.tv_top_margin_property, - intel_tv->margin[TV_MARGIN_TOP]); + state->tv.margins.top); drm_object_attach_property(&connector->base, dev->mode_config.tv_right_margin_property, - intel_tv->margin[TV_MARGIN_RIGHT]); + state->tv.margins.right); drm_object_attach_property(&connector->base, dev->mode_config.tv_bottom_margin_property, - intel_tv->margin[TV_MARGIN_BOTTOM]); + state->tv.margins.bottom); } -- cgit v1.1 From 200819ab5b4d2d35fde5aedf7f0ee7582a9c7263 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Mon, 10 Apr 2017 12:51:10 +0200 Subject: drm/i915: Remove unused dp properties for dp-mst. Those properties are not hooked up on MST and were ignored. Best not expose them at all. Without this the next patch fails to start on X.org, because the DP-MST properties could not be read. Signed-off-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/751b85a0-81cd-09e2-9e60-6d4ddbf1c6ac@linux.intel.com Testcase: kms_properties Reviewed-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_dp.c | 2 +- drivers/gpu/drm/i915/intel_dp_mst.c | 1 - drivers/gpu/drm/i915/intel_drv.h | 1 - 3 files changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 8389ed1..944b144 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -5145,7 +5145,7 @@ bool intel_dp_is_edp(struct drm_i915_private *dev_priv, enum port port) return intel_bios_is_port_edp(dev_priv, port); } -void +static void intel_dp_add_properties(struct intel_dp *intel_dp, struct drm_connector *connector) { struct intel_connector *intel_connector = to_intel_connector(connector); diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c index 3451e2a..b2f5de5 100644 --- a/drivers/gpu/drm/i915/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/intel_dp_mst.c @@ -459,7 +459,6 @@ static struct drm_connector *intel_dp_add_mst_connector(struct drm_dp_mst_topolo drm_mode_connector_attach_encoder(&intel_connector->base, &intel_dp->mst_encoders[i]->base.base); } - intel_dp_add_properties(intel_dp, connector); drm_object_attach_property(&connector->base, dev->mode_config.path_property, 0); drm_object_attach_property(&connector->base, dev->mode_config.tile_property, 0); diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 7036054..f78d7c5 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1499,7 +1499,6 @@ void intel_edp_backlight_off(struct intel_dp *intel_dp); void intel_edp_panel_vdd_on(struct intel_dp *intel_dp); void intel_edp_panel_on(struct intel_dp *intel_dp); void intel_edp_panel_off(struct intel_dp *intel_dp); -void intel_dp_add_properties(struct intel_dp *intel_dp, struct drm_connector *connector); void intel_dp_mst_suspend(struct drm_device *dev); void intel_dp_mst_resume(struct drm_device *dev); int intel_dp_max_link_rate(struct intel_dp *intel_dp); -- cgit v1.1 From 2e222edabfebf8139b681fb17a9cd80bc9ca2896 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Mon, 10 Apr 2017 11:07:09 +0200 Subject: drm/i915: Convert intel_dp_mst connector properties to atomic. MST doesn't support setting any properties, but it should still use the atomic helper for setting properties. Only path and tile properties are supported (read-only). Those are immutable, and handled by drm core. Signed-off-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/1491815239-10685-4-git-send-email-maarten.lankhorst@linux.intel.com Reviewed-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_dp_mst.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c index b2f5de5..5af22a7 100644 --- a/drivers/gpu/drm/i915/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/intel_dp_mst.c @@ -294,14 +294,6 @@ intel_dp_mst_detect(struct drm_connector *connector, bool force) return drm_dp_mst_detect_port(connector, &intel_dp->mst_mgr, intel_connector->port); } -static int -intel_dp_mst_set_property(struct drm_connector *connector, - struct drm_property *property, - uint64_t val) -{ - return 0; -} - static void intel_dp_mst_connector_destroy(struct drm_connector *connector) { @@ -318,8 +310,7 @@ static const struct drm_connector_funcs intel_dp_mst_connector_funcs = { .dpms = drm_atomic_helper_connector_dpms, .detect = intel_dp_mst_detect, .fill_modes = drm_helper_probe_single_connector_modes, - .set_property = intel_dp_mst_set_property, - .atomic_get_property = intel_connector_atomic_get_property, + .set_property = drm_atomic_helper_connector_set_property, .late_register = intel_connector_register, .early_unregister = intel_connector_unregister, .destroy = intel_dp_mst_connector_destroy, -- cgit v1.1 From eadc1db859c83324653ea640d5e06d4e2bba9e86 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Mon, 10 Apr 2017 11:07:10 +0200 Subject: drm/i915: Convert intel_crt connector properties to atomic. No properties are supported, so just use the helper and reject everything. Signed-off-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/1491815239-10685-5-git-send-email-maarten.lankhorst@linux.intel.com Reviewed-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_crt.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c index 2797bf3..84a1f5e 100644 --- a/drivers/gpu/drm/i915/intel_crt.c +++ b/drivers/gpu/drm/i915/intel_crt.c @@ -777,13 +777,6 @@ out: return ret; } -static int intel_crt_set_property(struct drm_connector *connector, - struct drm_property *property, - uint64_t value) -{ - return 0; -} - void intel_crt_reset(struct drm_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->dev); @@ -814,10 +807,9 @@ static const struct drm_connector_funcs intel_crt_connector_funcs = { .late_register = intel_connector_register, .early_unregister = intel_connector_unregister, .destroy = intel_crt_destroy, - .set_property = intel_crt_set_property, + .set_property = drm_atomic_helper_connector_set_property, .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, - .atomic_get_property = intel_connector_atomic_get_property, }; static const struct drm_connector_helper_funcs intel_crt_connector_helper_funcs = { -- cgit v1.1 From 9cb9be037c2e5eb21a759b15c1e428ba6d544c6d Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Mon, 10 Apr 2017 11:07:11 +0200 Subject: drm/i915: Convert intel DVO connector to atomic No properties are supported, so just use the helper and reject everything. Signed-off-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/1491815239-10685-6-git-send-email-maarten.lankhorst@linux.intel.com Reviewed-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_dvo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_dvo.c b/drivers/gpu/drm/i915/intel_dvo.c index 6025839..c1544a5 100644 --- a/drivers/gpu/drm/i915/intel_dvo.c +++ b/drivers/gpu/drm/i915/intel_dvo.c @@ -350,7 +350,7 @@ static const struct drm_connector_funcs intel_dvo_connector_funcs = { .early_unregister = intel_connector_unregister, .destroy = intel_dvo_destroy, .fill_modes = drm_helper_probe_single_connector_modes, - .atomic_get_property = intel_connector_atomic_get_property, + .set_property = drm_atomic_helper_connector_set_property, .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, }; -- cgit v1.1 From c50b4bf6e25a3ce2b058a132f49335ac9fa67062 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Tue, 4 Apr 2017 15:22:48 +0200 Subject: Revert "drm/i915: Lock mode_config.mutex in intel_display_resume." This reverts commit ea49c9acf2db7082f0406bb3a570cc6bad37082b. mode_config.mutex was originally added to fix WARNs in connector functions, but now that atomic nonblocking modeset support is included, we will likely never hold any any lock at all. The WARN mentioned in commit bbf35e9defb9a6d1 ("drm/i915: Pass atomic state to intel_audio_codec_enable, v2."), so it's safe to revert this now. Signed-off-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/1491312168-18147-1-git-send-email-maarten.lankhorst@linux.intel.com Reviewed-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index b6b40cd..48a5462 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -15559,13 +15559,6 @@ void intel_display_resume(struct drm_device *dev) if (state) state->acquire_ctx = &ctx; - /* - * This is a cludge because with real atomic modeset mode_config.mutex - * won't be taken. Unfortunately some probed state like - * audio_codec_enable is still protected by mode_config.mutex, so lock - * it here for now. - */ - mutex_lock(&dev->mode_config.mutex); drm_modeset_acquire_init(&ctx, 0); while (1) { @@ -15581,7 +15574,6 @@ void intel_display_resume(struct drm_device *dev) drm_modeset_drop_locks(&ctx); drm_modeset_acquire_fini(&ctx); - mutex_unlock(&dev->mode_config.mutex); if (ret) DRM_ERROR("Restoring old state failed with %i\n", ret); -- cgit v1.1 From aab9094b032528da1b32cc5203964075cd04d5be Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Tue, 4 Apr 2017 15:24:57 +0200 Subject: drm/i915: Do not use lock all in hsw_trans_edp_pipe_A_crc_wa There is no need to acquire all locks here, doing a commit after forcing a modeset on the affected crtc is enough. Any other locks needed will be acquired as needed. Signed-off-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/1491312297-18673-1-git-send-email-maarten.lankhorst@linux.intel.com Reviewed-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_pipe_crc.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pipe_crc.c b/drivers/gpu/drm/i915/intel_pipe_crc.c index 206ee4f..647426c 100644 --- a/drivers/gpu/drm/i915/intel_pipe_crc.c +++ b/drivers/gpu/drm/i915/intel_pipe_crc.c @@ -513,16 +513,20 @@ static void hsw_trans_edp_pipe_A_crc_wa(struct drm_i915_private *dev_priv, struct intel_crtc *crtc = intel_get_crtc_for_pipe(dev_priv, PIPE_A); struct intel_crtc_state *pipe_config; struct drm_atomic_state *state; + struct drm_modeset_acquire_ctx ctx; int ret = 0; - drm_modeset_lock_all(dev); + drm_modeset_acquire_init(&ctx, 0); + state = drm_atomic_state_alloc(dev); if (!state) { ret = -ENOMEM; goto unlock; } - state->acquire_ctx = crtc->base.dev->mode_config.acquire_ctx; + state->acquire_ctx = &ctx; + +retry: pipe_config = intel_atomic_get_crtc_state(state, crtc); if (IS_ERR(pipe_config)) { ret = PTR_ERR(pipe_config); @@ -537,10 +541,17 @@ static void hsw_trans_edp_pipe_A_crc_wa(struct drm_i915_private *dev_priv, ret = drm_atomic_commit(state); put_state: + if (ret == -EDEADLK) { + drm_atomic_state_clear(state); + drm_modeset_backoff(&ctx); + goto retry; + } + drm_atomic_state_put(state); unlock: WARN(ret, "Toggling workaround to %i returns %i\n", enable, ret); - drm_modeset_unlock_all(dev); + drm_modeset_drop_locks(&ctx); + drm_modeset_acquire_fini(&ctx); } static int ivb_pipe_crc_ctl_reg(struct drm_i915_private *dev_priv, -- cgit v1.1 From ef74921bc679232c6590afa881d3ea605ebdddd8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 12 Apr 2017 12:01:10 +0100 Subject: drm/i915: Combine write_domain flushes to a single function In the next patch, we will introduce a new cache domain for differentiating between GTT access and direct WC access. This will require us to include WC in our write_domain flushes. Rather than duplicate a third function, combine the existing two into one and flushing WC writes will then be automatically handled as well. v2: Be smarter and clearer by passing in the write domains to flush (Joonas) v3: One missed ~ in v2 conversion Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170412110111.26626-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 125 ++++++++++----------- .../gpu/drm/i915/selftests/i915_gem_coherency.c | 4 +- drivers/gpu/drm/i915/selftests/i915_gem_object.c | 2 +- 3 files changed, 64 insertions(+), 67 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index cb8c6a9..f1c2866 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -46,8 +46,6 @@ #include static void i915_gem_flush_free_objects(struct drm_i915_private *i915); -static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); -static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj); static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) { @@ -705,6 +703,61 @@ i915_gem_create_ioctl(struct drm_device *dev, void *data, args->size, &args->handle); } +static inline enum fb_op_origin +fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain) +{ + return (domain == I915_GEM_DOMAIN_GTT ? + obj->frontbuffer_ggtt_origin : ORIGIN_CPU); +} + +static void +flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains) +{ + struct drm_i915_private *dev_priv = to_i915(obj->base.dev); + + if (!(obj->base.write_domain & flush_domains)) + return; + + /* No actual flushing is required for the GTT write domain. Writes + * to it "immediately" go to main memory as far as we know, so there's + * no chipset flush. It also doesn't land in render cache. + * + * However, we do have to enforce the order so that all writes through + * the GTT land before any writes to the device, such as updates to + * the GATT itself. + * + * We also have to wait a bit for the writes to land from the GTT. + * An uncached read (i.e. mmio) seems to be ideal for the round-trip + * timing. This issue has only been observed when switching quickly + * between GTT writes and CPU reads from inside the kernel on recent hw, + * and it appears to only affect discrete GTT blocks (i.e. on LLC + * system agents we cannot reproduce this behaviour). + */ + wmb(); + + switch (obj->base.write_domain) { + case I915_GEM_DOMAIN_GTT: + if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv)) { + if (intel_runtime_pm_get_if_in_use(dev_priv)) { + spin_lock_irq(&dev_priv->uncore.lock); + POSTING_READ_FW(RING_ACTHD(dev_priv->engine[RCS]->mmio_base)); + spin_unlock_irq(&dev_priv->uncore.lock); + intel_runtime_pm_put(dev_priv); + } + } + + intel_fb_obj_flush(obj, + fb_write_origin(obj, I915_GEM_DOMAIN_GTT)); + break; + + case I915_GEM_DOMAIN_CPU: + i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); + break; + } + + obj->base.write_domain = 0; +} + static inline int __copy_to_user_swizzled(char __user *cpu_vaddr, const char *gpu_vaddr, int gpu_offset, @@ -794,7 +847,7 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, goto out; } - i915_gem_object_flush_gtt_write_domain(obj); + flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); /* If we're not in the cpu read domain, set ourself into the gtt * read domain and manually flush cachelines (if required). This @@ -846,7 +899,7 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj, goto out; } - i915_gem_object_flush_gtt_write_domain(obj); + flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); /* If we're not in the cpu write domain, set ourself into the * gtt write domain and manually flush cachelines (as required). @@ -1501,13 +1554,6 @@ err: return ret; } -static inline enum fb_op_origin -write_origin(struct drm_i915_gem_object *obj, unsigned domain) -{ - return (domain == I915_GEM_DOMAIN_GTT ? - obj->frontbuffer_ggtt_origin : ORIGIN_CPU); -} - static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj) { struct drm_i915_private *i915; @@ -1602,7 +1648,8 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, mutex_unlock(&dev->struct_mutex); if (write_domain != 0) - intel_fb_obj_invalidate(obj, write_origin(obj, write_domain)); + intel_fb_obj_invalidate(obj, + fb_write_origin(obj, write_domain)); out_unpin: i915_gem_object_unpin_pages(obj); @@ -3320,56 +3367,6 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags) return ret; } -/** Flushes the GTT write domain for the object if it's dirty. */ -static void -i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) -{ - struct drm_i915_private *dev_priv = to_i915(obj->base.dev); - - if (obj->base.write_domain != I915_GEM_DOMAIN_GTT) - return; - - /* No actual flushing is required for the GTT write domain. Writes - * to it "immediately" go to main memory as far as we know, so there's - * no chipset flush. It also doesn't land in render cache. - * - * However, we do have to enforce the order so that all writes through - * the GTT land before any writes to the device, such as updates to - * the GATT itself. - * - * We also have to wait a bit for the writes to land from the GTT. - * An uncached read (i.e. mmio) seems to be ideal for the round-trip - * timing. This issue has only been observed when switching quickly - * between GTT writes and CPU reads from inside the kernel on recent hw, - * and it appears to only affect discrete GTT blocks (i.e. on LLC - * system agents we cannot reproduce this behaviour). - */ - wmb(); - if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv)) { - if (intel_runtime_pm_get_if_in_use(dev_priv)) { - spin_lock_irq(&dev_priv->uncore.lock); - POSTING_READ_FW(RING_ACTHD(dev_priv->engine[RCS]->mmio_base)); - spin_unlock_irq(&dev_priv->uncore.lock); - intel_runtime_pm_put(dev_priv); - } - } - - intel_fb_obj_flush(obj, write_origin(obj, I915_GEM_DOMAIN_GTT)); - - obj->base.write_domain = 0; -} - -/** Flushes the CPU write domain for the object if it's dirty. */ -static void -i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) -{ - if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) - return; - - i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); - obj->base.write_domain = 0; -} - static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj) { if (obj->base.write_domain != I915_GEM_DOMAIN_CPU && !obj->cache_dirty) @@ -3428,7 +3425,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) if (ret) return ret; - i915_gem_object_flush_cpu_write_domain(obj); + flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT); /* Serialise direct access to this object with the barriers for * coherent writes from the GPU, by effectively invalidating the @@ -3802,7 +3799,7 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) return 0; - i915_gem_object_flush_gtt_write_domain(obj); + flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); /* Flush the CPU cache if it's still invalid. */ if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) { diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c index f08d017..c61d0ef 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c @@ -139,7 +139,7 @@ static int wc_set(struct drm_i915_gem_object *obj, int err; /* XXX GTT write followed by WC write go missing */ - i915_gem_object_flush_gtt_write_domain(obj); + flush_write_domain(obj, ~0); err = i915_gem_object_set_to_gtt_domain(obj, true); if (err) @@ -163,7 +163,7 @@ static int wc_get(struct drm_i915_gem_object *obj, int err; /* XXX WC write followed by GTT write go missing */ - i915_gem_object_flush_gtt_write_domain(obj); + flush_write_domain(obj, ~0); err = i915_gem_object_set_to_gtt_domain(obj, false); if (err) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c index 67d82bf..1634247 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c @@ -266,7 +266,7 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj, if (offset >= obj->base.size) continue; - i915_gem_object_flush_gtt_write_domain(obj); + flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); p = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT); cpu = kmap(p) + offset_in_page(offset); -- cgit v1.1 From e22d8e3c69a9f432b40baaaf3f894a128fdc2222 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 12 Apr 2017 12:01:11 +0100 Subject: drm/i915: Treat WC a separate cache domain When discussing a new WC mmap, we based the interface upon the assumption that GTT was fully coherent. How naive! Commits 3b5724d702ef ("drm/i915: Wait for writes through the GTT to land before reading back") and ed4596ea992d ("drm/i915/guc: WA to address the Ringbuffer coherency issue") demonstrate that writes through the GTT are indeed delayed and may be overtaken by direct WC access. To be safe, if userspace is mixing WC mmaps with other potential GTT access (pwrite, GTT mmaps) it should use set_domain(WC). Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=96563 Testcase: igt/gem_pwrite/small-gtt* Testcase: igt/drv_selftest/coherency Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170412110111.26626-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 5 +- drivers/gpu/drm/i915/i915_gem.c | 76 ++++++++++++++++++++-- drivers/gpu/drm/i915/intel_guc_log.c | 6 +- .../gpu/drm/i915/selftests/i915_gem_coherency.c | 10 +-- drivers/gpu/drm/i915/selftests/i915_gem_request.c | 2 +- include/uapi/drm/i915_drm.h | 2 + 6 files changed, 85 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index ed079c2..1af4e6f 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3453,8 +3453,9 @@ int i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, #define I915_PRIORITY_DISPLAY I915_PRIORITY_MAX int __must_check -i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, - bool write); +i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write); +int __must_check +i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write); int __must_check i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write); struct i915_vma * __must_check diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index f1c2866..33fb11c 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1637,10 +1637,12 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, if (err) goto out_unpin; - if (read_domains & I915_GEM_DOMAIN_GTT) - err = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); + if (read_domains & I915_GEM_DOMAIN_WC) + err = i915_gem_object_set_to_wc_domain(obj, write_domain); + else if (read_domains & I915_GEM_DOMAIN_GTT) + err = i915_gem_object_set_to_gtt_domain(obj, write_domain); else - err = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0); + err = i915_gem_object_set_to_cpu_domain(obj, write_domain); /* And bump the LRU for this access */ i915_gem_object_bump_inactive_ggtt(obj); @@ -1784,6 +1786,9 @@ static unsigned int tile_row_pages(struct drm_i915_gem_object *obj) * into userspace. (This view is aligned and sized appropriately for * fenced access.) * + * 2 - Recognise WC as a separate cache domain so that we can flush the + * delayed writes via GTT before performing direct access via WC. + * * Restrictions: * * * snoopable objects cannot be accessed via the GTT. It can cause machine @@ -1811,7 +1816,7 @@ static unsigned int tile_row_pages(struct drm_i915_gem_object *obj) */ int i915_gem_mmap_gtt_version(void) { - return 1; + return 2; } static inline struct i915_ggtt_view @@ -3387,6 +3392,69 @@ void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj) } /** + * Moves a single object to the WC read, and possibly write domain. + * @obj: object to act on + * @write: ask for write access or read only + * + * This function returns when the move is complete, including waiting on + * flushes to occur. + */ +int +i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write) +{ + int ret; + + lockdep_assert_held(&obj->base.dev->struct_mutex); + + ret = i915_gem_object_wait(obj, + I915_WAIT_INTERRUPTIBLE | + I915_WAIT_LOCKED | + (write ? I915_WAIT_ALL : 0), + MAX_SCHEDULE_TIMEOUT, + NULL); + if (ret) + return ret; + + if (obj->base.write_domain == I915_GEM_DOMAIN_WC) + return 0; + + /* Flush and acquire obj->pages so that we are coherent through + * direct access in memory with previous cached writes through + * shmemfs and that our cache domain tracking remains valid. + * For example, if the obj->filp was moved to swap without us + * being notified and releasing the pages, we would mistakenly + * continue to assume that the obj remained out of the CPU cached + * domain. + */ + ret = i915_gem_object_pin_pages(obj); + if (ret) + return ret; + + flush_write_domain(obj, ~I915_GEM_DOMAIN_WC); + + /* Serialise direct access to this object with the barriers for + * coherent writes from the GPU, by effectively invalidating the + * WC domain upon first access. + */ + if ((obj->base.read_domains & I915_GEM_DOMAIN_WC) == 0) + mb(); + + /* It should now be out of any other write domains, and we can update + * the domain values for our changes. + */ + GEM_BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_WC) != 0); + obj->base.read_domains |= I915_GEM_DOMAIN_WC; + if (write) { + obj->base.read_domains = I915_GEM_DOMAIN_WC; + obj->base.write_domain = I915_GEM_DOMAIN_WC; + obj->mm.dirty = true; + } + + i915_gem_object_unpin_pages(obj); + return 0; +} + +/** * Moves a single object to the GTT read, and possibly write domain. * @obj: object to act on * @write: ask for write access or read only diff --git a/drivers/gpu/drm/i915/intel_guc_log.c b/drivers/gpu/drm/i915/intel_guc_log.c index 6fb63a3..16d3b87 100644 --- a/drivers/gpu/drm/i915/intel_guc_log.c +++ b/drivers/gpu/drm/i915/intel_guc_log.c @@ -359,12 +359,16 @@ static int guc_log_runtime_create(struct intel_guc *guc) void *vaddr; struct rchan *guc_log_relay_chan; size_t n_subbufs, subbuf_size; - int ret = 0; + int ret; lockdep_assert_held(&dev_priv->drm.struct_mutex); GEM_BUG_ON(guc_log_has_runtime(guc)); + ret = i915_gem_object_set_to_wc_domain(guc->log.vma->obj, true); + if (ret) + return ret; + /* Create a WC (Uncached for read) vmalloc mapping of log * buffer pages, so that we can directly get the data * (up-to-date) from memory. diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c index c61d0ef..95d4aeb 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c @@ -138,10 +138,7 @@ static int wc_set(struct drm_i915_gem_object *obj, typeof(v) *map; int err; - /* XXX GTT write followed by WC write go missing */ - flush_write_domain(obj, ~0); - - err = i915_gem_object_set_to_gtt_domain(obj, true); + err = i915_gem_object_set_to_wc_domain(obj, true); if (err) return err; @@ -162,10 +159,7 @@ static int wc_get(struct drm_i915_gem_object *obj, typeof(v) map; int err; - /* XXX WC write followed by GTT write go missing */ - flush_write_domain(obj, ~0); - - err = i915_gem_object_set_to_gtt_domain(obj, false); + err = i915_gem_object_set_to_wc_domain(obj, false); if (err) return err; diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_request.c b/drivers/gpu/drm/i915/selftests/i915_gem_request.c index 98b7aac..6664cb2 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_request.c @@ -580,7 +580,7 @@ static struct i915_vma *recursive_batch(struct drm_i915_private *i915) if (err) goto err; - err = i915_gem_object_set_to_gtt_domain(obj, true); + err = i915_gem_object_set_to_wc_domain(obj, true); if (err) goto err; diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 3554495..9ee06ec 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -666,6 +666,8 @@ struct drm_i915_gem_relocation_entry { #define I915_GEM_DOMAIN_VERTEX 0x00000020 /** GTT domain - aperture and scanout */ #define I915_GEM_DOMAIN_GTT 0x00000040 +/** WC domain - uncached access */ +#define I915_GEM_DOMAIN_WC 0x00000080 /** @} */ struct drm_i915_gem_exec_object { -- cgit v1.1 From 0757ac8fc7c1dac32be080e6746324ff42b7a9b7 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 12 Apr 2017 10:21:43 +0100 Subject: drm/i915: Add stub mmio read/write routines to mock device Provide dummy function pointers for the mock device in case we do hit mmio during testing. v2: Use ASSIGN_READ/WRITE_MMIO_FUNCS macros Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170412092143.3822-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_uncore.c | 47 ++++++++++++------------ drivers/gpu/drm/i915/selftests/mock_gem_device.c | 2 + drivers/gpu/drm/i915/selftests/mock_uncore.c | 46 +++++++++++++++++++++++ drivers/gpu/drm/i915/selftests/mock_uncore.h | 30 +++++++++++++++ 4 files changed, 101 insertions(+), 24 deletions(-) create mode 100644 drivers/gpu/drm/i915/selftests/mock_uncore.c create mode 100644 drivers/gpu/drm/i915/selftests/mock_uncore.h diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index fb38c76..0cd56bf 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -1108,19 +1108,19 @@ __gen6_write(32) #undef GEN6_WRITE_FOOTER #undef GEN6_WRITE_HEADER -#define ASSIGN_WRITE_MMIO_VFUNCS(x) \ +#define ASSIGN_WRITE_MMIO_VFUNCS(i915, x) \ do { \ - dev_priv->uncore.funcs.mmio_writeb = x##_write8; \ - dev_priv->uncore.funcs.mmio_writew = x##_write16; \ - dev_priv->uncore.funcs.mmio_writel = x##_write32; \ + (i915)->uncore.funcs.mmio_writeb = x##_write8; \ + (i915)->uncore.funcs.mmio_writew = x##_write16; \ + (i915)->uncore.funcs.mmio_writel = x##_write32; \ } while (0) -#define ASSIGN_READ_MMIO_VFUNCS(x) \ +#define ASSIGN_READ_MMIO_VFUNCS(i915, x) \ do { \ - dev_priv->uncore.funcs.mmio_readb = x##_read8; \ - dev_priv->uncore.funcs.mmio_readw = x##_read16; \ - dev_priv->uncore.funcs.mmio_readl = x##_read32; \ - dev_priv->uncore.funcs.mmio_readq = x##_read64; \ + (i915)->uncore.funcs.mmio_readb = x##_read8; \ + (i915)->uncore.funcs.mmio_readw = x##_read16; \ + (i915)->uncore.funcs.mmio_readl = x##_read32; \ + (i915)->uncore.funcs.mmio_readq = x##_read64; \ } while (0) @@ -1310,34 +1310,34 @@ void intel_uncore_init(struct drm_i915_private *dev_priv) i915_pmic_bus_access_notifier; if (IS_GEN(dev_priv, 2, 4) || intel_vgpu_active(dev_priv)) { - ASSIGN_WRITE_MMIO_VFUNCS(gen2); - ASSIGN_READ_MMIO_VFUNCS(gen2); + ASSIGN_WRITE_MMIO_VFUNCS(dev_priv, gen2); + ASSIGN_READ_MMIO_VFUNCS(dev_priv, gen2); } else if (IS_GEN5(dev_priv)) { - ASSIGN_WRITE_MMIO_VFUNCS(gen5); - ASSIGN_READ_MMIO_VFUNCS(gen5); + ASSIGN_WRITE_MMIO_VFUNCS(dev_priv, gen5); + ASSIGN_READ_MMIO_VFUNCS(dev_priv, gen5); } else if (IS_GEN(dev_priv, 6, 7)) { - ASSIGN_WRITE_MMIO_VFUNCS(gen6); + ASSIGN_WRITE_MMIO_VFUNCS(dev_priv, gen6); if (IS_VALLEYVIEW(dev_priv)) { ASSIGN_FW_DOMAINS_TABLE(__vlv_fw_ranges); - ASSIGN_READ_MMIO_VFUNCS(fwtable); + ASSIGN_READ_MMIO_VFUNCS(dev_priv, fwtable); } else { - ASSIGN_READ_MMIO_VFUNCS(gen6); + ASSIGN_READ_MMIO_VFUNCS(dev_priv, gen6); } } else if (IS_GEN8(dev_priv)) { if (IS_CHERRYVIEW(dev_priv)) { ASSIGN_FW_DOMAINS_TABLE(__chv_fw_ranges); - ASSIGN_WRITE_MMIO_VFUNCS(fwtable); - ASSIGN_READ_MMIO_VFUNCS(fwtable); + ASSIGN_WRITE_MMIO_VFUNCS(dev_priv, fwtable); + ASSIGN_READ_MMIO_VFUNCS(dev_priv, fwtable); } else { - ASSIGN_WRITE_MMIO_VFUNCS(gen8); - ASSIGN_READ_MMIO_VFUNCS(gen6); + ASSIGN_WRITE_MMIO_VFUNCS(dev_priv, gen8); + ASSIGN_READ_MMIO_VFUNCS(dev_priv, gen6); } } else { ASSIGN_FW_DOMAINS_TABLE(__gen9_fw_ranges); - ASSIGN_WRITE_MMIO_VFUNCS(fwtable); - ASSIGN_READ_MMIO_VFUNCS(fwtable); + ASSIGN_WRITE_MMIO_VFUNCS(dev_priv, fwtable); + ASSIGN_READ_MMIO_VFUNCS(dev_priv, fwtable); if (HAS_DECOUPLED_MMIO(dev_priv)) { dev_priv->uncore.funcs.mmio_readl = gen9_decoupled_read32; @@ -1353,8 +1353,6 @@ void intel_uncore_init(struct drm_i915_private *dev_priv) i915_check_and_clear_faults(dev_priv); } -#undef ASSIGN_WRITE_MMIO_VFUNCS -#undef ASSIGN_READ_MMIO_VFUNCS void intel_uncore_fini(struct drm_i915_private *dev_priv) { @@ -1900,5 +1898,6 @@ intel_uncore_forcewake_for_reg(struct drm_i915_private *dev_priv, } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/mock_uncore.c" #include "selftests/intel_uncore.c" #endif diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index 6a8258e..f321bdf 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -30,6 +30,7 @@ #include "mock_gem_device.h" #include "mock_gem_object.h" #include "mock_gtt.h" +#include "mock_uncore.h" void mock_device_flush(struct drm_i915_private *i915) { @@ -143,6 +144,7 @@ struct drm_i915_private *mock_gem_device(void) mkwrite_device_info(i915)->gen = -1; spin_lock_init(&i915->mm.object_stat_lock); + mock_uncore_init(i915); init_waitqueue_head(&i915->gpu_error.wait_queue); init_waitqueue_head(&i915->gpu_error.reset_queue); diff --git a/drivers/gpu/drm/i915/selftests/mock_uncore.c b/drivers/gpu/drm/i915/selftests/mock_uncore.c new file mode 100644 index 0000000..8ef14c7 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_uncore.c @@ -0,0 +1,46 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "mock_uncore.h" + +#define __nop_write(x) \ +static void \ +nop_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, bool trace) { } +__nop_write(8) +__nop_write(16) +__nop_write(32) + +#define __nop_read(x) \ +static u##x \ +nop_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { return 0; } +__nop_read(8) +__nop_read(16) +__nop_read(32) +__nop_read(64) + +void mock_uncore_init(struct drm_i915_private *i915) +{ + ASSIGN_WRITE_MMIO_VFUNCS(i915, nop); + ASSIGN_READ_MMIO_VFUNCS(i915, nop); +} diff --git a/drivers/gpu/drm/i915/selftests/mock_uncore.h b/drivers/gpu/drm/i915/selftests/mock_uncore.h new file mode 100644 index 0000000..d79aa3c --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_uncore.h @@ -0,0 +1,30 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __MOCK_UNCORE_H +#define __MOCK_UNCORE_H + +void mock_uncore_init(struct drm_i915_private *i915); + +#endif /* !__MOCK_UNCORE_H */ -- cgit v1.1 From 8968a3649a7c09a0c7f92001b0a599b93a4d9685 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 12 Apr 2017 00:44:26 +0100 Subject: drm/i915: Pretend the engine is always idle when mocking If we have a mock engine and it has no more requests in flight, report it as idle as there is no hardware to contradict us! Otherwise we attempt to query the hw that doesn't exist and find that the hw hasn't set its idle bit and we get upset. Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170411234427.14841-2-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/intel_engine_cs.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index ee87ca7..402769d 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1140,6 +1140,9 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine) intel_engine_last_submit(engine))) return false; + if (I915_SELFTEST_ONLY(engine->breadcrumbs.mock)) + return true; + /* Interrupt/tasklet pending? */ if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)) return false; -- cgit v1.1 From 10e9bd9ab0322a3929db0bf84ff1ca3f28ed0ee4 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 12 Apr 2017 00:44:27 +0100 Subject: drm/i915: Wake device for emitting request during selftest igt_mmap_offset_exhaustion() selftest was using live requests to make an object busy, but we did not hold a runtime pm wakeref for submitting the requests. Acquire it to avoid triggering "RPM wakelock ref not held during HW access" warnings. Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170411234427.14841-3-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/selftests/i915_gem_object.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c index 1634247..8f011c4 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c @@ -545,7 +545,9 @@ static int igt_mmap_offset_exhaustion(void *arg) } mutex_lock(&i915->drm.struct_mutex); + intel_runtime_pm_get(i915); err = make_obj_busy(obj); + intel_runtime_pm_put(i915); mutex_unlock(&i915->drm.struct_mutex); if (err) { pr_err("[loop %d] Failed to busy the object\n", loop); -- cgit v1.1 From 14c562c0cde36e87e69918244cdf51a544580d19 Mon Sep 17 00:00:00 2001 From: Manasi Navare Date: Thu, 6 Apr 2017 14:00:12 -0700 Subject: drm/i915/dp: Validate cached link rate and lane count before retraining MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently intel_dp_check_link_status() tries to retrain the link if Clock recovery or Channel EQ for any of the lanes indicated by intel_dp->lane_count is not set. However these values cached in intel_dp structure can be stale if link training has failed for these values during previous modeset. Or these values can get stale since we have now re read the DPCD registers or it can be 0 in case of connected boot case. This patch validates these values against the max link rate and max lane count values. This is absolutely required incase the common_rates or max lane count are now different due to link fallback. v2: * Include the FIXME commnet inside the function (Ville Syrjala) * Remove the redundant parenthesis (Ville Syrjala) v3 by Jani: * rebase on the DP refactoring series * rename intel_dp_link_params_is_valid to intel_dp_link_params_valid * minor stylistic changes v4: * Compare the link rate against max link rate not the common_rates since common_rates does not account for the lowered fallback link rate value. (Ville Syrjala) v5: * Fixed a warning for unused variable (Manasi) Cc: Ville Syrjala Cc: Jani Nikula Reviewed-by: Ville Syrjälä Signed-off-by: Manasi Navare Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1491512412-30016-1-git-send-email-manasi.d.navare@intel.com --- drivers/gpu/drm/i915/intel_dp.c | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 944b144..16b7bf7 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -322,6 +322,24 @@ static int intel_dp_common_len_rate_limit(struct intel_dp *intel_dp, return 0; } +static bool intel_dp_link_params_valid(struct intel_dp *intel_dp) +{ + /* + * FIXME: we need to synchronize the current link parameters with + * hardware readout. Currently fast link training doesn't work on + * boot-up. + */ + if (intel_dp->link_rate == 0 || + intel_dp->link_rate > intel_dp->max_link_rate) + return false; + + if (intel_dp->lane_count == 0 || + intel_dp->lane_count > intel_dp_max_lane_count(intel_dp)) + return false; + + return true; +} + int intel_dp_get_link_train_fallback_values(struct intel_dp *intel_dp, int link_rate, uint8_t lane_count) { @@ -4253,9 +4271,11 @@ intel_dp_check_link_status(struct intel_dp *intel_dp) if (!to_intel_crtc(intel_encoder->base.crtc)->active) return; - /* FIXME: we need to synchronize this sort of stuff with hardware - * readout. Currently fast link training doesn't work on boot-up. */ - if (!intel_dp->lane_count) + /* + * Validate the cached values of intel_dp->link_rate and + * intel_dp->lane_count before attempting to retrain. + */ + if (!intel_dp_link_params_valid(intel_dp)) return; /* Retrain if Channel EQ or CR not ok */ -- cgit v1.1 From 48ae80741da4b8a26b6df0f765713912bc7cc480 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 12 Apr 2017 09:02:51 +0100 Subject: drm/i915: Fix use after free in lpe_audio_platdev_destroy() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [31908.547136] BUG: KASAN: use-after-free in intel_lpe_audio_teardown+0x78/0xb0 [i915] at addr ffff8801f7788358 [31908.547297] Read of size 8 by task drv_selftest/3781 [31908.547405] CPU: 0 PID: 3781 Comm: drv_selftest Tainted: G BU W 4.10.0+ #451 [31908.547553] Hardware name: / , BIOS PYBSWCEL.86A.0027.2015.0507.1758 05/07/2015 [31908.547682] Call Trace: [31908.547772] dump_stack+0x68/0x9f [31908.547857] kasan_object_err+0x1c/0x70 [31908.547947] kasan_report_error+0x1f1/0x4f0 [31908.548038] ? kfree+0xaa/0x170 [31908.548121] kasan_report+0x34/0x40 [31908.548211] ? klist_children_get+0x20/0x30 [31908.548472] ? intel_lpe_audio_teardown+0x78/0xb0 [i915] [31908.548567] __asan_load8+0x5e/0x70 [31908.548824] intel_lpe_audio_teardown+0x78/0xb0 [i915] [31908.549080] intel_audio_deinit+0x28/0x80 [i915] [31908.549315] i915_driver_unload+0xe4/0x360 [i915] [31908.549551] ? i915_driver_load+0x1d70/0x1d70 [i915] [31908.549651] ? trace_hardirqs_on+0xd/0x10 [31908.549885] i915_pci_remove+0x23/0x30 [i915] [31908.549978] pci_device_remove+0x5c/0x100 [31908.550069] device_release_driver_internal+0x1db/0x2e0 [31908.550165] driver_detach+0x68/0xc0 [31908.550256] bus_remove_driver+0x8b/0x150 [31908.550346] driver_unregister+0x3e/0x60 [31908.550439] pci_unregister_driver+0x1d/0x110 [31908.550531] ? find_module_all+0x7a/0xa0 [31908.550791] i915_exit+0x1a/0x87 [i915] [31908.550881] SyS_delete_module+0x264/0x2c0 [31908.550971] ? free_module+0x430/0x430 [31908.551064] ? trace_hardirqs_off_caller+0x16/0x110 [31908.551159] ? trace_hardirqs_on_caller+0x16/0x280 [31908.551256] ? trace_hardirqs_on_thunk+0x1a/0x1c [31908.551350] entry_SYSCALL_64_fastpath+0x1c/0xb1 [31908.551440] RIP: 0033:0x7f1d67312ec7 [31908.551520] RSP: 002b:00007ffebe34e888 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0 [31908.551650] RAX: ffffffffffffffda RBX: ffffffff811123f6 RCX: 00007f1d67312ec7 [31908.551743] RDX: 0000000000000000 RSI: 0000000000000800 RDI: 0000560d0af476b8 [31908.551837] RBP: ffff880233d87f98 R08: 0000000000000000 R09: 00007ffebe34e8b8 [31908.551930] R10: 00007f1d68adf8c0 R11: 0000000000000206 R12: 0000000000000000 [31908.552023] R13: 0000560d0af46440 R14: 0000000000000034 R15: 00007ffebe34d860 [31908.552121] ? trace_hardirqs_off_caller+0x16/0x110 [31908.552217] Object at ffff8801f7788000, in cache kmalloc-2048 size: 2048 [31908.552306] Allocated: [31908.552377] PID = 3781 [31908.552456] save_stack_trace+0x16/0x20 [31908.552539] kasan_kmalloc+0xee/0x190 [31908.552627] __kmalloc+0xdb/0x1b0 [31908.552713] platform_device_alloc+0x27/0x90 [31908.552804] platform_device_register_full+0x36/0x220 [31908.553066] intel_lpe_audio_init+0x41e/0x570 [i915] [31908.553320] intel_audio_init+0xd/0x40 [i915] [31908.553552] i915_driver_load+0x13f5/0x1d70 [i915] [31908.553788] i915_pci_probe+0x65/0xe0 [i915] [31908.553881] pci_device_probe+0xda/0x140 [31908.553969] driver_probe_device+0x400/0x660 [31908.554058] __driver_attach+0x11c/0x120 [31908.554147] bus_for_each_dev+0xe6/0x150 [31908.554237] driver_attach+0x26/0x30 [31908.554325] bus_add_driver+0x26b/0x3b0 [31908.554412] driver_register+0xce/0x190 [31908.554502] __pci_register_driver+0xaf/0xc0 [31908.554589] 0xffffffffa0550063 [31908.554675] do_one_initcall+0x8b/0x1e0 [31908.554764] do_init_module+0x102/0x325 [31908.554852] load_module+0x3aad/0x45e0 [31908.554944] SyS_finit_module+0x169/0x1a0 [31908.555033] entry_SYSCALL_64_fastpath+0x1c/0xb1 [31908.555119] Freed: [31908.555188] PID = 3781 [31908.555266] save_stack_trace+0x16/0x20 [31908.555349] kasan_slab_free+0xb0/0x180 [31908.555436] kfree+0xaa/0x170 [31908.555520] platform_device_release+0x76/0x80 [31908.555610] device_release+0x45/0xe0 [31908.555698] kobject_put+0x11f/0x260 [31908.555785] put_device+0x12/0x20 [31908.555871] platform_device_unregister+0x1b/0x20 [31908.556135] intel_lpe_audio_teardown+0x5c/0xb0 [i915] [31908.556390] intel_audio_deinit+0x28/0x80 [i915] [31908.556622] i915_driver_unload+0xe4/0x360 [i915] [31908.556858] i915_pci_remove+0x23/0x30 [i915] [31908.556948] pci_device_remove+0x5c/0x100 [31908.557037] device_release_driver_internal+0x1db/0x2e0 [31908.557129] driver_detach+0x68/0xc0 [31908.557217] bus_remove_driver+0x8b/0x150 [31908.557304] driver_unregister+0x3e/0x60 [31908.557394] pci_unregister_driver+0x1d/0x110 [31908.557653] i915_exit+0x1a/0x87 [i915] [31908.557741] SyS_delete_module+0x264/0x2c0 [31908.557834] entry_SYSCALL_64_fastpath+0x1c/0xb1 [31908.557919] Memory state around the buggy address: [31908.558005] ffff8801f7788200: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [31908.558127] ffff8801f7788280: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [31908.558255] >ffff8801f7788300: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [31908.558374] ^ [31908.558467] ffff8801f7788380: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [31908.558595] ffff8801f7788400: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb v2: Just leak the memory (8 bytes) as freeing it ourselves is not safe, and we need to coordinate a proper fix in platform_device itself. Fixes: eef57324d926 ("drm/i915: setup bridge for HDMI LPE audio driver") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=99952 Signed-off-by: Chris Wilson Cc: Pierre-Louis Bossart Cc: Jerome Anand Cc: Jani Nikula Cc: Takashi Iwai Link: http://patchwork.freedesktop.org/patch/msgid/20170412080251.30648-1-chris@chris-wilson.co.uk Reviewed-by: Takashi Iwai Reviewed-by: Ville Syrjälä --- drivers/gpu/drm/i915/intel_lpe_audio.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_lpe_audio.c b/drivers/gpu/drm/i915/intel_lpe_audio.c index d8ca187..25d8e76 100644 --- a/drivers/gpu/drm/i915/intel_lpe_audio.c +++ b/drivers/gpu/drm/i915/intel_lpe_audio.c @@ -131,8 +131,15 @@ err: static void lpe_audio_platdev_destroy(struct drm_i915_private *dev_priv) { + /* XXX Note that platform_device_register_full() allocates a dma_mask + * and never frees it. We can't free it here as we cannot guarantee + * this is the last reference (i.e. that the dma_mask will not be + * used after our unregister). So ee choose to leak the sizeof(u64) + * allocation here - it should be fixed in the platform_device rather + * than us fiddle with its internals. + */ + platform_device_unregister(dev_priv->lpe_audio.platdev); - kfree(dev_priv->lpe_audio.platdev->dev.dma_mask); } static void lpe_audio_irq_unmask(struct irq_data *d) -- cgit v1.1 From 13f6c719eb88f2a0c8981d7a0c45404194aa2ef5 Mon Sep 17 00:00:00 2001 From: "daniele.ceraolospurio@intel.com" Date: Thu, 6 Apr 2017 17:18:52 -0700 Subject: drm/i915/guc: write wopcm related register once during uc init The wopcm registers are write-once, so any write after the first one will just be ignored. The registers survive a GPU reset but not always a suspend/resume cycle, so to keep things simple keep the writes in the intel_uc_init_hw function instead of moving it earlier to make sure we attempt them every time we try to load GuC. Cc: Jeff McGee Cc: Anusha Srivatsa Signed-off-by: Daniele Ceraolo Spurio Reviewed-by: Joonas Lahtinen Signed-off-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1491524332-23860-1-git-send-email-daniele.ceraolospurio@intel.com --- drivers/gpu/drm/i915/intel_guc_loader.c | 4 ---- drivers/gpu/drm/i915/intel_huc.c | 5 ----- drivers/gpu/drm/i915/intel_uc.c | 5 +++++ 3 files changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c index 12f80ec..d9045b6 100644 --- a/drivers/gpu/drm/i915/intel_guc_loader.c +++ b/drivers/gpu/drm/i915/intel_guc_loader.c @@ -285,10 +285,6 @@ static int guc_ucode_xfer(struct drm_i915_private *dev_priv) intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); - /* init WOPCM */ - I915_WRITE(GUC_WOPCM_SIZE, intel_guc_wopcm_size(dev_priv)); - I915_WRITE(DMA_GUC_WOPCM_OFFSET, GUC_WOPCM_OFFSET_VALUE); - /* Enable MIA caching. GuC clock gating is disabled. */ I915_WRITE(GUC_SHIM_CONTROL, GUC_SHIM_CONTROL_VALUE); diff --git a/drivers/gpu/drm/i915/intel_huc.c b/drivers/gpu/drm/i915/intel_huc.c index 7a0bf15..88b4cf3 100644 --- a/drivers/gpu/drm/i915/intel_huc.c +++ b/drivers/gpu/drm/i915/intel_huc.c @@ -106,11 +106,6 @@ static int huc_ucode_xfer(struct drm_i915_private *dev_priv) intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); - /* init WOPCM */ - I915_WRITE(GUC_WOPCM_SIZE, intel_guc_wopcm_size(dev_priv)); - I915_WRITE(DMA_GUC_WOPCM_OFFSET, GUC_WOPCM_OFFSET_VALUE | - HUC_LOADING_AGENT_GUC); - /* Set the source address for the uCode */ offset = guc_ggtt_offset(vma) + huc_fw->header_offset; I915_WRITE(DMA_ADDR_0_LOW, lower_32_bits(offset)); diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c index 4364b1a..900e376 100644 --- a/drivers/gpu/drm/i915/intel_uc.c +++ b/drivers/gpu/drm/i915/intel_uc.c @@ -274,6 +274,11 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv) goto err_guc; } + /* init WOPCM */ + I915_WRITE(GUC_WOPCM_SIZE, intel_guc_wopcm_size(dev_priv)); + I915_WRITE(DMA_GUC_WOPCM_OFFSET, + GUC_WOPCM_OFFSET_VALUE | HUC_LOADING_AGENT_GUC); + /* WaEnableuKernelHeaderValidFix:skl */ /* WaEnableGuCBootHashCheckNotSet:skl,bxt,kbl */ if (IS_GEN9(dev_priv)) -- cgit v1.1 From 1a36147bb93921651f7fbd7a6e522da6c349081b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 12 Apr 2017 22:30:17 +0300 Subject: drm/i915: Perform link quality check unconditionally during long pulse MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Apparently some DP sinks are a little nuts and cause HPD to drop intermittently during modesets. This happens eg. on an ASUS PB287Q. In oder to recover from this we can't really use the previous connector status to determine if the link needs retraining, so let's just ignore that piece of information and do the retrain unconditionally. We do of course still check whether the link is supposed to be running or not. To actually get read out the EDID and update things properly we also need to nuke the goto out added by commit 7d23e3c37bb3 ("drm/i915: Cleaning up intel_dp_hpd_pulse"). I'm actually not sure why that was there. Perhaps to avoid an EDID read if the connector status didn't appear to change, but that sort of thing is quite racy and would have failed anyway if we failed to keep up with the hotplugs (if we missed the HPD down in between two HPD ups). And now that we take this codepath unconditionally we definitely need to drop the goto as otherwise we would never do the EDID read. v2: Drop the goto that made us skip EDID reads entirely. Doh! v3: Rebase due to locking changes s/apparely/apparently/ in the comment (Chris) Cc: stable@vger.kernel.org Cc: Manasi Navare Cc: Palmer Dabbelt Reported-by: Palmer Dabbelt Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=99766 References: https://lists.freedesktop.org/archives/intel-gfx/2017-February/119779.html Signed-off-by: Ville Syrjälä Reviewed-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170412193017.21029-1-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_dp.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 16b7bf7..09601a2 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -4685,9 +4685,20 @@ intel_dp_long_pulse(struct intel_connector *intel_connector) */ status = connector_status_disconnected; goto out; - } else if (connector->status == connector_status_connected) { + } else { + /* + * If display is now connected check links status, + * there has been known issues of link loss triggerring + * long pulse. + * + * Some sinks (eg. ASUS PB287Q) seem to perform some + * weird HPD ping pong during modesets. So we can apparently + * end up with HPD going low during a modeset, and then + * going back up soon after. And once that happens we must + * retrain the link to get a picture. That's in case no + * userspace component reacted to intermittent HPD dip. + */ intel_dp_check_link_status(intel_dp); - goto out; } /* -- cgit v1.1 From 9301397a63b3bf1090dffe846c6f1c8efa032236 Mon Sep 17 00:00:00 2001 From: Manasi Navare Date: Thu, 6 Apr 2017 16:44:19 +0300 Subject: drm/i915: Implement Link Rate fallback on Link training failure If link training at a link rate optimal for a particular mode fails during modeset's atomic commit phase, then we let the modeset complete and then retry. We save the link rate value at which link training failed, update the link status property to "BAD" and use a lower link rate to prune the modes. It will redo the modeset on the current mode at lower link rate or if the current mode gets pruned due to lower link constraints then, it will send a hotplug uevent for userspace to handle it. This is also required to pass DP CTS tests 4.3.1.3, 4.3.1.4, 4.3.1.6. This patch is a resend of the original commit id (233ce881dd91fb "drm/i915: Implement Link Rate fallback on Link training failure") which got reverted in this commit id (afc1ebf4562a14 Revert "drm/i915: Implement Link Rate fallback on Link training failure") due to CI failures. After investigating the CI failures it was found that these were essentially the failures which were always there but hidden because they used to be DRM_DEBUG_KMS messages for link failures so never got caught by CI. But now this patch actually throws DRM_ERROR if the link training fails at RBR and 1 lane. So it caught these link train failures. There were two failures: 1. On SKL 6700k this was because the machine in CI lab is a SKL desktop without eDP on Port A. But our VBT initialization code in the driver writes VBT defaults in a way that it always sets DP flag on Port A and this does not get cleared after parsing the VBT outputs. This has been fixed in commit id (bb1d132935c2f8 "drm/i915/vbt: split out defaults that are set when there is no VBT) and (665788572c6410b "drm/i915/vbt: don't propagate errors from intel_bios_init()) 2. On ILK-650 desktop - This was happening because of a bad monitor desktop combination. I switched the monitor in the CI lab and that helped get rid of the link failures on ILK system. v10: * Rebase on drm-tip and resend after revert v9: * Use the trimmed max values of link rate/lane count based on link train fallback (Daniel Vetter) v8: * Set link_status to BAD first and then call mode_valid (Jani Nikula) v7: Remove the redundant variable in previous patch itself v6: * Obtain link rate index from fallback_link_rate using the helper intel_dp_link_rate_index (Jani Nikula) * Include fallback within intel_dp_start_link_train (Jani Nikula) v5: * Move set link status to drm core (Daniel Vetter, Jani Nikula) v4: * Add fallback support for non DDI platforms too * Set connector->link status inside set_link_status function (Jani Nikula) v3: * Set link status property to BAd unconditionally (Jani Nikula) * Dont use two separate variables link_train_failed and link_status to indicate same thing (Jani Nikula) v2: * Squashed a few patches (Jani Nikula) Acked-by: Tony Cheng Acked-by: Harry Wentland Cc: Jani Nikula Cc: Daniel Vetter Cc: Ville Syrjala Signed-off-by: Manasi Navare Reviewed-by: Jani Nikula Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/16ca48b1e74c618929245e9a085b9e3483c3a16d.1491485983.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_dp.c | 27 +++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_dp_link_training.c | 22 ++++++++++++++++++++-- drivers/gpu/drm/i915/intel_drv.h | 3 +++ 3 files changed, 50 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 09601a2..08834f7 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -5981,6 +5981,29 @@ intel_dp_init_connector_port_info(struct intel_digital_port *intel_dig_port) } } +static void intel_dp_modeset_retry_work_fn(struct work_struct *work) +{ + struct intel_connector *intel_connector; + struct drm_connector *connector; + + intel_connector = container_of(work, typeof(*intel_connector), + modeset_retry_work); + connector = &intel_connector->base; + DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n", connector->base.id, + connector->name); + + /* Grab the locks before changing connector property*/ + mutex_lock(&connector->dev->mode_config.mutex); + /* Set connector link status to BAD and send a Uevent to notify + * userspace to do a modeset. + */ + drm_mode_connector_set_link_status_property(connector, + DRM_MODE_LINK_STATUS_BAD); + mutex_unlock(&connector->dev->mode_config.mutex); + /* Send Hotplug uevent so userspace can reprobe */ + drm_kms_helper_hotplug_event(connector->dev); +} + bool intel_dp_init_connector(struct intel_digital_port *intel_dig_port, struct intel_connector *intel_connector) @@ -5993,6 +6016,10 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port, enum port port = intel_dig_port->port; int type; + /* Initialize the work for modeset in case of link train failure */ + INIT_WORK(&intel_connector->modeset_retry_work, + intel_dp_modeset_retry_work_fn); + if (WARN(intel_dig_port->max_lanes < 1, "Not enough lanes (%d) for DP on port %c\n", intel_dig_port->max_lanes, port_name(port))) diff --git a/drivers/gpu/drm/i915/intel_dp_link_training.c b/drivers/gpu/drm/i915/intel_dp_link_training.c index 694ad0f..b79c1c0 100644 --- a/drivers/gpu/drm/i915/intel_dp_link_training.c +++ b/drivers/gpu/drm/i915/intel_dp_link_training.c @@ -314,6 +314,24 @@ void intel_dp_stop_link_train(struct intel_dp *intel_dp) void intel_dp_start_link_train(struct intel_dp *intel_dp) { - intel_dp_link_training_clock_recovery(intel_dp); - intel_dp_link_training_channel_equalization(intel_dp); + struct intel_connector *intel_connector = intel_dp->attached_connector; + + if (!intel_dp_link_training_clock_recovery(intel_dp)) + goto failure_handling; + if (!intel_dp_link_training_channel_equalization(intel_dp)) + goto failure_handling; + + DRM_DEBUG_KMS("Link Training Passed at Link Rate = %d, Lane count = %d", + intel_dp->link_rate, intel_dp->lane_count); + return; + + failure_handling: + DRM_DEBUG_KMS("Link Training failed at link rate = %d, lane count = %d", + intel_dp->link_rate, intel_dp->lane_count); + if (!intel_dp_get_link_train_fallback_values(intel_dp, + intel_dp->link_rate, + intel_dp->lane_count)) + /* Schedule a Hotplug Uevent to userspace to start modeset */ + schedule_work(&intel_connector->modeset_retry_work); + return; } diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index f78d7c5..100750c 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -321,6 +321,9 @@ struct intel_connector { void *port; /* store this opaque as its illegal to dereference it */ struct intel_dp *mst_port; + + /* Work struct to schedule a uevent on link train failure */ + struct work_struct modeset_retry_work; }; struct dpll { -- cgit v1.1 From be02f7556447a0dee672acb5e462f03377b98ae8 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 13 Apr 2017 22:52:17 +0300 Subject: drm/i915: checking for NULL instead of IS_ERR() in mock selftests i915_gem_request_alloc() uses error pointers. It never returns NULLs. Fixes: 0daf0113cff6 ("drm/i915: Mock infrastructure for request emission") Signed-off-by: Dan Carpenter Link: http://patchwork.freedesktop.org/patch/msgid/20170413195217.GA26108@mwanda Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/selftests/mock_request.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/selftests/mock_request.c b/drivers/gpu/drm/i915/selftests/mock_request.c index 0e8d2e7..8097e36 100644 --- a/drivers/gpu/drm/i915/selftests/mock_request.c +++ b/drivers/gpu/drm/i915/selftests/mock_request.c @@ -35,7 +35,7 @@ mock_request(struct intel_engine_cs *engine, /* NB the i915->requests slab cache is enlarged to fit mock_request */ request = i915_gem_request_alloc(engine, context); - if (!request) + if (IS_ERR(request)) return NULL; mock = container_of(request, typeof(*mock), base); -- cgit v1.1 From f4bf77b49516e12e89c7de89a343accd6cbe87f2 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 14 Apr 2017 22:54:25 +0300 Subject: drm/i915: set "ret" correctly on error paths If "crtc" is NULL, then my static checker complains that "ret" isn't initialized on that path. It doesn't really cause a problem unless "ret" is somehow set to -EDEADLK which is not likely. Chris Wilson also noticed another error path where "ret" isn't set correctly. Signed-off-by: Dan Carpenter Link: http://patchwork.freedesktop.org/patch/msgid/20170414195425.GA8144@mwanda Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/intel_display.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 48a5462..85b9e2f5 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -9563,6 +9563,7 @@ int intel_get_load_detect_pipe(struct drm_connector *connector, */ if (!crtc) { DRM_DEBUG_KMS("no pipe available for load-detect\n"); + ret = -ENODEV; goto fail; } @@ -9619,6 +9620,7 @@ found: DRM_DEBUG_KMS("reusing fbdev for load-detection framebuffer\n"); if (IS_ERR(fb)) { DRM_DEBUG_KMS("failed to allocate framebuffer for load-detection\n"); + ret = PTR_ERR(fb); goto fail; } -- cgit v1.1 From b0fd47adc6233e4c198a97e1ddb05accd31eeacb Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 15 Apr 2017 10:39:02 +0100 Subject: drm/i915: Copy user requested buffers into the error state Introduce a new execobject.flag (EXEC_OBJECT_CAPTURE) that userspace may use to indicate that it wants the contents of this buffer preserved in the error state (/sys/class/drm/cardN/error) following a GPU hang involving this batch. Use this at your discretion, the contents of the error state. although compressed, are allocated with GFP_ATOMIC (i.e. limited) and kept for all eternity (until the error state is destroyed). Based on an earlier patch by Ben Widawsky Testcase: igt/gem_exec_capture Signed-off-by: Chris Wilson Cc: Ben Widawsky Cc: Matt Turner Acked-by: Ben Widawsky Acked-by: Matt Turner Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170415093902.22581-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.c | 1 + drivers/gpu/drm/i915/i915_drv.h | 3 +++ drivers/gpu/drm/i915/i915_gem_execbuffer.c | 12 +++++++++ drivers/gpu/drm/i915/i915_gem_request.c | 16 ++++++++++++ drivers/gpu/drm/i915/i915_gem_request.h | 11 ++++++++ drivers/gpu/drm/i915/i915_gpu_error.c | 40 +++++++++++++++++++++++++++++- include/uapi/drm/i915_drm.h | 15 ++++++++++- 7 files changed, 96 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index bd85e38..cc7393e 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -350,6 +350,7 @@ static int i915_getparam(struct drm_device *dev, void *data, case I915_PARAM_HAS_EXEC_SOFTPIN: case I915_PARAM_HAS_EXEC_ASYNC: case I915_PARAM_HAS_EXEC_FENCE: + case I915_PARAM_HAS_EXEC_CAPTURE: /* For the time being all of these are always true; * if some supported hardware does not have one of these * features this value needs to be provided from diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 1af4e6f..ed21f0a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1025,6 +1025,9 @@ struct i915_gpu_state { u32 *pages[0]; } *ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page; + struct drm_i915_error_object **user_bo; + long user_bo_count; + struct drm_i915_error_object *wa_ctx; struct drm_i915_error_request { diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index a3e59c8..af19657 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1114,6 +1114,18 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req, list_for_each_entry(vma, vmas, exec_list) { struct drm_i915_gem_object *obj = vma->obj; + if (vma->exec_entry->flags & EXEC_OBJECT_CAPTURE) { + struct i915_gem_capture_list *capture; + + capture = kmalloc(sizeof(*capture), GFP_KERNEL); + if (unlikely(!capture)) + return -ENOMEM; + + capture->next = req->capture_list; + capture->vma = vma; + req->capture_list = capture; + } + if (vma->exec_entry->flags & EXEC_OBJECT_ASYNC) continue; diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 313cdff..095cccc 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -292,6 +292,19 @@ static void advance_ring(struct drm_i915_gem_request *request) request->ring->head = tail; } +static void free_capture_list(struct drm_i915_gem_request *request) +{ + struct i915_gem_capture_list *capture; + + capture = request->capture_list; + while (capture) { + struct i915_gem_capture_list *next = capture->next; + + kfree(capture); + capture = next; + } +} + static void i915_gem_request_retire(struct drm_i915_gem_request *request) { struct intel_engine_cs *engine = request->engine; @@ -317,6 +330,8 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request) unreserve_seqno(request->engine); advance_ring(request); + free_capture_list(request); + /* Walk through the active list, calling retire on each. This allows * objects to track their GPU activity and mark themselves as idle * when their *last* active request is completed (updating state @@ -615,6 +630,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, req->global_seqno = 0; req->file_priv = NULL; req->batch = NULL; + req->capture_list = NULL; /* * Reserve space in the ring buffer for all the commands required to diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index a211c53..4ccab5a 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -73,6 +73,11 @@ struct i915_priotree { #define I915_PRIORITY_MIN (-I915_PRIORITY_MAX) }; +struct i915_gem_capture_list { + struct i915_gem_capture_list *next; + struct i915_vma *vma; +}; + /** * Request queue structure. * @@ -167,6 +172,12 @@ struct drm_i915_gem_request { * error state dump only). */ struct i915_vma *batch; + /** Additional buffers requested by userspace to be captured upon + * a GPU hang. The vma/obj on this list are protected by their + * active reference - all objects on this list must also be + * on the active_list (of their final request). + */ + struct i915_gem_capture_list *capture_list; struct list_head active_list; /** Time at which this request was emitted, in jiffies. */ diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 8effc59..4b247b0 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -712,6 +712,10 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, print_error_obj(m, dev_priv->engine[i], NULL, obj); } + for (j = 0; j < ee->user_bo_count; j++) + print_error_obj(m, dev_priv->engine[i], + "user", ee->user_bo[j]); + if (ee->num_requests) { err_printf(m, "%s --- %d requests\n", dev_priv->engine[i]->name, @@ -825,11 +829,15 @@ void __i915_gpu_state_free(struct kref *error_ref) { struct i915_gpu_state *error = container_of(error_ref, typeof(*error), ref); - int i; + long i, j; for (i = 0; i < ARRAY_SIZE(error->engine); i++) { struct drm_i915_error_engine *ee = &error->engine[i]; + for (j = 0; j < ee->user_bo_count; j++) + i915_error_object_free(ee->user_bo[j]); + kfree(ee->user_bo); + i915_error_object_free(ee->batchbuffer); i915_error_object_free(ee->wa_batchbuffer); i915_error_object_free(ee->ringbuffer); @@ -1346,6 +1354,35 @@ static void record_context(struct drm_i915_error_context *e, e->active = ctx->active_count; } +static void request_record_user_bo(struct drm_i915_gem_request *request, + struct drm_i915_error_engine *ee) +{ + struct i915_gem_capture_list *c; + struct drm_i915_error_object **bo; + long count; + + count = 0; + for (c = request->capture_list; c; c = c->next) + count++; + + bo = NULL; + if (count) + bo = kcalloc(count, sizeof(*bo), GFP_ATOMIC); + if (!bo) + return; + + count = 0; + for (c = request->capture_list; c; c = c->next) { + bo[count] = i915_error_object_create(request->i915, c->vma); + if (!bo[count]) + break; + count++; + } + + ee->user_bo = bo; + ee->user_bo_count = count; +} + static void i915_gem_record_rings(struct drm_i915_private *dev_priv, struct i915_gpu_state *error) { @@ -1392,6 +1429,7 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv, ee->wa_batchbuffer = i915_error_object_create(dev_priv, engine->scratch); + request_record_user_bo(request, ee); ee->ctx = i915_error_object_create(dev_priv, diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 9ee06ec..f24a80d 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -412,6 +412,12 @@ typedef struct drm_i915_irq_wait { */ #define I915_PARAM_HAS_EXEC_FENCE 44 +/* Query whether DRM_I915_GEM_EXECBUFFER2 supports the ability to capture + * user specified bufffers for post-mortem debugging of GPU hangs. See + * EXEC_OBJECT_CAPTURE. + */ +#define I915_PARAM_HAS_EXEC_CAPTURE 45 + typedef struct drm_i915_getparam { __s32 param; /* @@ -775,8 +781,15 @@ struct drm_i915_gem_exec_object2 { * I915_PARAM_HAS_EXEC_FENCE to order execbufs and execute them asynchronously. */ #define EXEC_OBJECT_ASYNC (1<<6) +/* Request that the contents of this execobject be copied into the error + * state upon a GPU hang involving this batch for post-mortem debugging. + * These buffers are recorded in no particular order as "user" in + * /sys/class/drm/cardN/error. Query I915_PARAM_HAS_EXEC_CAPTURE to see + * if the kernel supports this flag. + */ +#define EXEC_OBJECT_CAPTURE (1<<7) /* All remaining bits are MBZ and RESERVED FOR FUTURE USE */ -#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_ASYNC<<1) +#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_CAPTURE<<1) __u64 flags; union { -- cgit v1.1 From a0242b0d598a4654841065888bfa1135b6a6760b Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 18 Apr 2017 11:18:16 +0200 Subject: drm/i915: Update DRIVER_DATE to 20170418 Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index ed21f0a..357b6c6 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -79,8 +79,8 @@ #define DRIVER_NAME "i915" #define DRIVER_DESC "Intel Graphics" -#define DRIVER_DATE "20170403" -#define DRIVER_TIMESTAMP 1491198738 +#define DRIVER_DATE "20170418" +#define DRIVER_TIMESTAMP 1492507096 /* Use I915_STATE_WARN(x) and I915_STATE_WARN_ON() (rather than WARN() and * WARN_ON()) for hw state sanity checks to check for unexpected conditions -- cgit v1.1 From 3396a273851c14634b98bb27be37508b06df94f4 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Thu, 13 Apr 2017 14:15:27 +0300 Subject: drm/i915: Fix system hang with EI UP masked on Haswell Previously with commit a9c1f90c8e17 ("drm/i915: Don't mask EI UP interrupt on IVB|SNB") certain, seemingly unrelated bit (GEN6_PM_RP_UP_EI_EXPIRED) was needed to be unmasked for IVB and SNB in order to prevent system hang with chained batchbuffers. Our CI was seeing incomplete results with tests that used chained batches and it was found out that HSW needs to have this same bit unmasked to reliably survive chained batches. Always unmask GEN6_PM_RP_UP_EI_EXPIRED on Haswell to prevent system hang with batch chaining. Testcase: igt/gem_exec_fence/nb-await-default Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100672 Cc: Chris Wilson Cc: stable@vger.kernel.org Signed-off-by: Mika Kuoppala Acked-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/1492082127-29007-1-git-send-email-mika.kuoppala@intel.com --- drivers/gpu/drm/i915/i915_irq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index d9d1969..fd97fe0 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -4252,12 +4252,12 @@ void intel_irq_init(struct drm_i915_private *dev_priv) dev_priv->rps.pm_intrmsk_mbz = 0; /* - * SNB,IVB can while VLV,CHV may hard hang on looping batchbuffer + * SNB,IVB,HSW can while VLV,CHV may hard hang on looping batchbuffer * if GEN6_PM_UP_EI_EXPIRED is masked. * * TODO: verify if this can be reproduced on VLV,CHV. */ - if (INTEL_INFO(dev_priv)->gen <= 7 && !IS_HASWELL(dev_priv)) + if (INTEL_INFO(dev_priv)->gen <= 7) dev_priv->rps.pm_intrmsk_mbz |= GEN6_PM_RP_UP_EI_EXPIRED; if (INTEL_INFO(dev_priv)->gen >= 8) -- cgit v1.1 From 939cf46c3a32c969392c66a6218593a38e1c7bf1 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 18 Apr 2017 11:52:11 +0100 Subject: drm/i915: Fix GCC 4.4 build issue with __intel_wait_for_register_fw Move the BUILD_BUG_ONs for busy-wait duration outside the _wait_for_atomic macro as discussed on the mailing list. v2: Simplify the macro by omitting the ret__ local. (Chris Wilson) Signed-off-by: Tvrtko Ursulin Suggested-by: Michal Wajdeczko Fixes: 1d1a9774e404 ("drm/i915: Extend intel_wait_for_register_fw() with fast timeout") Cc: Michal Wajdeczko Cc: Chris Wilson Reviewed-by: Michal Wajdeczko Link: http://patchwork.freedesktop.org/patch/msgid/20170418105211.7089-1-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/intel_drv.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 100750c..54f3ff8 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -88,7 +88,6 @@ int cpu, ret, timeout = (US) * 1000; \ u64 base; \ _WAIT_FOR_ATOMIC_CHECK(ATOMIC); \ - BUILD_BUG_ON((US) > 50000); \ if (!(ATOMIC)) { \ preempt_disable(); \ cpu = smp_processor_id(); \ @@ -130,8 +129,14 @@ ret__; \ }) -#define wait_for_atomic(COND, MS) _wait_for_atomic((COND), (MS) * 1000, 1) -#define wait_for_atomic_us(COND, US) _wait_for_atomic((COND), (US), 1) +#define wait_for_atomic_us(COND, US) \ +({ \ + BUILD_BUG_ON(!__builtin_constant_p(US)); \ + BUILD_BUG_ON((US) > 50000); \ + _wait_for_atomic((COND), (US), 1); \ +}) + +#define wait_for_atomic(COND, MS) wait_for_atomic_us((COND), (MS) * 1000) #define KHz(x) (1000 * (x)) #define MHz(x) KHz(1000 * (x)) -- cgit v1.1 From 916a491d11a866218ad92ff4f3124baa97d68ba0 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 10 Apr 2017 21:56:30 -0700 Subject: drm/i915: Avoid format string expansion from engine names While highly unlikely, this makes sure that the string built from engine names won't be processed as a format string. Signed-off-by: Kees Cook Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/20170411045630.GA6612@beast --- drivers/gpu/drm/i915/intel_hangcheck.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c index dce7422..9b0ece4 100644 --- a/drivers/gpu/drm/i915/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/intel_hangcheck.c @@ -407,7 +407,7 @@ static void hangcheck_declare_hang(struct drm_i915_private *i915, "%s, ", engine->name); msg[len-2] = '\0'; - return i915_handle_error(i915, hung, msg); + return i915_handle_error(i915, hung, "%s", msg); } /* -- cgit v1.1 From 2310b3c952c5dc56c2e08f71b907b8e23ab3270d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 19 Apr 2017 10:41:17 +0100 Subject: drm/i915/selftests: Allocate inode/file dynamically Avoid having too large a stack by creating the fake struct inode/file on the heap instead. drivers/gpu/drm/i915/selftests/mock_drm.c: In function 'mock_file': drivers/gpu/drm/i915/selftests/mock_drm.c:46:1: error: the frame size of 1328 bytes is larger than 1280 bytes [-Werror=frame-larger-than=] drivers/gpu/drm/i915/selftests/mock_drm.c: In function 'mock_file_free': drivers/gpu/drm/i915/selftests/mock_drm.c:54:1: error: the frame size of 1312 bytes is larger than 1280 bytes [-Werror=frame-larger-than=] Reported-by: Arnd Bergmann Fixes: 66d9cb5d805a ("drm/i915: Mock the GEM device for self-testing") Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Tvrtko Ursulin Cc: Matthew Auld Cc: Arnd Bergmann Acked-by: Arnd Bergmann Link: http://patchwork.freedesktop.org/patch/msgid/20170419094143.16922-2-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/selftests/mock_drm.c | 45 ++++++++++++++++++++++--------- 1 file changed, 32 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/selftests/mock_drm.c b/drivers/gpu/drm/i915/selftests/mock_drm.c index 113dec0..09c7041 100644 --- a/drivers/gpu/drm/i915/selftests/mock_drm.c +++ b/drivers/gpu/drm/i915/selftests/mock_drm.c @@ -24,31 +24,50 @@ #include "mock_drm.h" -static inline struct inode fake_inode(struct drm_i915_private *i915) -{ - return (struct inode){ .i_rdev = i915->drm.primary->index }; -} - struct drm_file *mock_file(struct drm_i915_private *i915) { - struct inode inode = fake_inode(i915); - struct file filp = {}; + struct file *filp; + struct inode *inode; struct drm_file *file; int err; - err = drm_open(&inode, &filp); - if (unlikely(err)) - return ERR_PTR(err); + inode = kzalloc(sizeof(*inode), GFP_KERNEL); + if (!inode) { + err = -ENOMEM; + goto err; + } + + inode->i_rdev = i915->drm.primary->index; - file = filp.private_data; + filp = kzalloc(sizeof(*filp), GFP_KERNEL); + if (!filp) { + err = -ENOMEM; + goto err_inode; + } + + err = drm_open(inode, filp); + if (err) + goto err_filp; + + file = filp->private_data; + memset(&file->filp, POISON_INUSE, sizeof(file->filp)); file->authenticated = true; + + kfree(filp); + kfree(inode); return file; + +err_filp: + kfree(filp); +err_inode: + kfree(inode); +err: + return ERR_PTR(err); } void mock_file_free(struct drm_i915_private *i915, struct drm_file *file) { - struct inode inode = fake_inode(i915); struct file filp = { .private_data = file }; - drm_release(&inode, &filp); + drm_release(NULL, &filp); } -- cgit v1.1 From 2d6c4c84230df9ff99b626b52b11049e449632c9 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 20 Apr 2017 11:17:09 +0100 Subject: drm/i915: Use discardable buffers for rings The contents of a ring are only valid between HEAD and TAIL, when the ring is idle (HEAD == TAIL) we can simply let the pages go under memory pressure if they are not pinned by an active context. Any new content will be written after HEAD and so the ring will again be valid between HEAD and TAIL, everything outside can be discarded. Note that we take care of ensuring that we do not reset the HEAD backwards following a GPU hang on an idle ring. The same precautions are what enable us to use stolen memory for rings. Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170420101709.27250-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/intel_ringbuffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 833740b..32afac6 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1341,7 +1341,7 @@ intel_ring_create_vma(struct drm_i915_private *dev_priv, int size) obj = i915_gem_object_create_stolen(dev_priv, size); if (!obj) - obj = i915_gem_object_create(dev_priv, size); + obj = i915_gem_object_create_internal(dev_priv, size); if (IS_ERR(obj)) return ERR_CAST(obj); -- cgit v1.1 From 546cdbc75b6a1cba6445896141c736b0a3070afc Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 21 Apr 2017 09:31:13 +0100 Subject: drm/i915: Stop touching hangcheck.seqno from intel_engine_init_global_seqno() The hangcheck runs independently to the main flow of seqno through the driver. However, we have an odd coupling of the seqno reset that is unwelcome, and if poked at just the right rate can cause spurious hangs (e.g. gem_exec_whisper) on an apparently idle engine. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170421083113.21321-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_engine_cs.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 402769d..82a274b 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -265,6 +265,7 @@ void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno) struct drm_i915_private *dev_priv = engine->i915; GEM_BUG_ON(!intel_engine_is_idle(engine)); + GEM_BUG_ON(i915_gem_active_isset(&engine->timeline->last_request)); /* Our semaphore implementation is strictly monotonic (i.e. we proceed * so long as the semaphore value in the register/page is greater @@ -296,9 +297,6 @@ void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno) intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno); clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted); - GEM_BUG_ON(i915_gem_active_isset(&engine->timeline->last_request)); - engine->hangcheck.seqno = seqno; - /* After manually advancing the seqno, fake the interrupt in case * there are any waiters for that seqno. */ -- cgit v1.1 From 3dd14c04d77d7d702de5aa7157df4cc9417329f3 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 21 Apr 2017 14:58:15 +0100 Subject: drm/i915: Avoid busy-spinning on VLV_GLTC_PW_STATUS mmio MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The busy-spin, as the first stage of intel_wait_for_register(), is currently under suspicion for causing: [ 62.034926] NMI watchdog: Watchdog detected hard LOCKUP on cpu 1 [ 62.034928] Modules linked in: i2c_dev i915 intel_gtt drm_kms_helper prime_numbers [ 62.034932] CPU: 1 PID: 183 Comm: kworker/1:2 Not tainted 4.11.0-rc7+ #471 [ 62.034933] Hardware name: / , BIOS PYBSWCEL.86A.0027.2015.0507.1758 05/07/2015 [ 62.034934] Workqueue: pm pm_runtime_work [ 62.034936] task: ffff880275a04ec0 task.stack: ffffc900002d8000 [ 62.034936] RIP: 0010:__intel_wait_for_register_fw+0x77/0x1a0 [i915] [ 62.034937] RSP: 0018:ffffc900002dbc38 EFLAGS: 00000082 [ 62.034939] RAX: ffffc90003530094 RBX: 0000000000130094 RCX: 0000000000000001 [ 62.034940] RDX: 00000000000000a1 RSI: ffff88027fd15e58 RDI: 0000000000000000 [ 62.034941] RBP: ffffc900002dbc78 R08: 0000000000000002 R09: 0000000000000000 [ 62.034942] R10: ffffc900002dbc18 R11: ffff880276429dd0 R12: ffff8802707c0000 [ 62.034943] R13: 00000000000000a0 R14: 0000000000000000 R15: 00000000fffefc10 [ 62.034945] FS: 0000000000000000(0000) GS:ffff88027fd00000(0000) knlGS:0000000000000000 [ 62.034945] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 62.034947] CR2: 00007ffd3cd98ff8 CR3: 0000000274c19000 CR4: 00000000001006e0 [ 62.034947] Call Trace: [ 62.034948] intel_wait_for_register+0x77/0x140 [i915] [ 62.034949] vlv_suspend_complete+0x23/0x5b0 [i915] [ 62.034950] intel_runtime_suspend+0x16c/0x2a0 [i915] [ 62.034950] pci_pm_runtime_suspend+0x50/0x180 [ 62.034951] ? pci_pm_runtime_resume+0xa0/0xa0 [ 62.034952] __rpm_callback+0xc5/0x210 [ 62.034953] rpm_callback+0x1f/0x80 [ 62.034953] ? pci_pm_runtime_resume+0xa0/0xa0 [ 62.034954] rpm_suspend+0x118/0x580 [ 62.034955] pm_runtime_work+0x64/0x90 [ 62.034956] process_one_work+0x1bb/0x3e0 [ 62.034956] worker_thread+0x46/0x4f0 [ 62.034957] ? __schedule+0x18b/0x610 [ 62.034958] kthread+0xff/0x140 [ 62.034958] ? process_one_work+0x3e0/0x3e0 [ 62.034959] ? kthread_create_on_node+ and related hard lockups in CI for byt and bsw. Note this effectively reverts commits 41ce405e6894 and b27366958869 ("drm/i915: Convert wait_for(I915_READ(reg)) to intel_wait_for_register()") v2: Convert bool allow into a u32 mask for clarity and repeat the comment on vlv rc6 timing to justify the 3ms timeout used for the wait (Ville) Fixes: 41ce405e6894 ("drm/i915: Convert wait_for(I915_READ(reg)) to intel_wait_for_register()") Fixes: b27366958869 ("drm/i915: Convert wait_for(I915_READ(reg)) to intel_wait_for_register()") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100718 Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Ville Syrjälä Cc: Tomi Sarvela Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170421135815.11897-1-chris@chris-wilson.co.uk Tested-by: Tomi Sarvela --- drivers/gpu/drm/i915/i915_drv.c | 46 +++++++++++++++++++++-------------------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index cc7393e..c7d68e7 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -2175,6 +2175,20 @@ static void vlv_restore_gunit_s0ix_state(struct drm_i915_private *dev_priv) I915_WRITE(VLV_GUNIT_CLOCK_GATE2, s->clock_gate_dis2); } +static int vlv_wait_for_pw_status(struct drm_i915_private *dev_priv, + u32 mask, u32 val) +{ + /* The HW does not like us polling for PW_STATUS frequently, so + * use the sleeping loop rather than risk the busy spin within + * intel_wait_for_register(). + * + * Transitioning between RC6 states should be at most 2ms (see + * valleyview_enable_rps) so use a 3ms timeout. + */ + return wait_for((I915_READ_NOTRACE(VLV_GTLC_PW_STATUS) & mask) == val, + 3); +} + int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool force_on) { u32 val; @@ -2203,8 +2217,9 @@ int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool force_on) static int vlv_allow_gt_wake(struct drm_i915_private *dev_priv, bool allow) { + u32 mask; u32 val; - int err = 0; + int err; val = I915_READ(VLV_GTLC_WAKE_CTRL); val &= ~VLV_GTLC_ALLOWWAKEREQ; @@ -2213,45 +2228,32 @@ static int vlv_allow_gt_wake(struct drm_i915_private *dev_priv, bool allow) I915_WRITE(VLV_GTLC_WAKE_CTRL, val); POSTING_READ(VLV_GTLC_WAKE_CTRL); - err = intel_wait_for_register(dev_priv, - VLV_GTLC_PW_STATUS, - VLV_GTLC_ALLOWWAKEACK, - allow, - 1); + mask = VLV_GTLC_ALLOWWAKEACK; + val = allow ? mask : 0; + + err = vlv_wait_for_pw_status(dev_priv, mask, val); if (err) DRM_ERROR("timeout disabling GT waking\n"); return err; } -static int vlv_wait_for_gt_wells(struct drm_i915_private *dev_priv, - bool wait_for_on) +static void vlv_wait_for_gt_wells(struct drm_i915_private *dev_priv, + bool wait_for_on) { u32 mask; u32 val; - int err; mask = VLV_GTLC_PW_MEDIA_STATUS_MASK | VLV_GTLC_PW_RENDER_STATUS_MASK; val = wait_for_on ? mask : 0; - if ((I915_READ(VLV_GTLC_PW_STATUS) & mask) == val) - return 0; - - DRM_DEBUG_KMS("waiting for GT wells to go %s (%08x)\n", - onoff(wait_for_on), - I915_READ(VLV_GTLC_PW_STATUS)); /* * RC6 transitioning can be delayed up to 2 msec (see * valleyview_enable_rps), use 3 msec for safety. */ - err = intel_wait_for_register(dev_priv, - VLV_GTLC_PW_STATUS, mask, val, - 3); - if (err) + if (vlv_wait_for_pw_status(dev_priv, mask, val)) DRM_ERROR("timeout waiting for GT wells to go %s\n", onoff(wait_for_on)); - - return err; } static void vlv_check_no_gt_access(struct drm_i915_private *dev_priv) @@ -2272,7 +2274,7 @@ static int vlv_suspend_complete(struct drm_i915_private *dev_priv) * Bspec defines the following GT well on flags as debug only, so * don't treat them as hard failures. */ - (void)vlv_wait_for_gt_wells(dev_priv, false); + vlv_wait_for_gt_wells(dev_priv, false); mask = VLV_GTLC_RENDER_CTX_EXISTS | VLV_GTLC_MEDIA_CTX_EXISTS; WARN_ON((I915_READ(VLV_GTLC_WAKE_CTRL) & mask) != mask); -- cgit v1.1 From ade0b0c965f59176daddbef9c4717354034f9bce Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 22 Apr 2017 09:15:37 +0100 Subject: drm/i915: Confirm the request is still active before adding it to the await MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Although we do check the completion-status of the request before actually adding a wait on it (either to its submit fence or its completion dma-fence), we currently do not check before adding it to the dependency lists. In fact, without checking for a completed request we may try to use the signaler after it has been retired and its dependency tree freed: [ 60.044057] BUG: KASAN: use-after-free in __list_add_valid+0x1d/0xd0 at addr ffff880348c9e6a0 [ 60.044118] Read of size 8 by task gem_exec_fence/530 [ 60.044164] CPU: 1 PID: 530 Comm: gem_exec_fence Tainted: G E 4.11.0-rc7+ #46 [ 60.044226] Hardware name: ��������������������������������� ���������������������������������/���������������������������������, BIOS RYBDWi35.86A.0246.2 [ 60.044290] Call Trace: [ 60.044337] dump_stack+0x4d/0x6a [ 60.044383] kasan_object_err+0x21/0x70 [ 60.044435] kasan_report+0x225/0x4e0 [ 60.044488] ? __list_add_valid+0x1d/0xd0 [ 60.044534] ? kasan_kmalloc+0xad/0xe0 [ 60.044587] __asan_load8+0x5e/0x70 [ 60.044639] __list_add_valid+0x1d/0xd0 [ 60.044788] __i915_priotree_add_dependency+0x67/0x130 [i915] [ 60.044895] i915_gem_request_await_request+0xa8/0x370 [i915] [ 60.044974] i915_gem_request_await_dma_fence+0x129/0x140 [i915] [ 60.045049] i915_gem_do_execbuffer.isra.37+0xb0a/0x26b0 [i915] [ 60.045077] ? save_stack+0xb1/0xd0 [ 60.045105] ? save_stack_trace+0x1b/0x20 [ 60.045132] ? save_stack+0x46/0xd0 [ 60.045158] ? kasan_kmalloc+0xad/0xe0 [ 60.045184] ? __kmalloc+0xd8/0x670 [ 60.045229] ? drm_ioctl+0x359/0x640 [drm] [ 60.045256] ? SyS_ioctl+0x41/0x70 [ 60.045330] ? i915_vma_move_to_active+0x540/0x540 [i915] [ 60.045360] ? tty_insert_flip_string_flags+0xa1/0xf0 [ 60.045387] ? tty_flip_buffer_push+0x63/0x70 [ 60.045414] ? remove_wait_queue+0xa9/0xc0 [ 60.045441] ? kasan_unpoison_shadow+0x35/0x50 [ 60.045467] ? kasan_kmalloc+0xad/0xe0 [ 60.045494] ? kasan_check_write+0x14/0x20 [ 60.045568] i915_gem_execbuffer2+0xdb/0x2a0 [i915] [ 60.045616] drm_ioctl+0x359/0x640 [drm] [ 60.045705] ? i915_gem_execbuffer+0x5a0/0x5a0 [i915] [ 60.045751] ? drm_version+0x150/0x150 [drm] [ 60.045778] ? compat_start_thread+0x60/0x60 [ 60.045805] ? plist_del+0xda/0x1a0 [ 60.045833] do_vfs_ioctl+0x12e/0x910 [ 60.045860] ? ioctl_preallocate+0x130/0x130 [ 60.045886] ? pci_mmcfg_check_reserved+0xc0/0xc0 [ 60.045913] ? vfs_write+0x196/0x240 [ 60.045939] ? __fget_light+0xa7/0xc0 [ 60.045965] SyS_ioctl+0x41/0x70 [ 60.045991] entry_SYSCALL_64_fastpath+0x17/0x98 [ 60.046017] RIP: 0033:0x7feb2baefc47 [ 60.046042] RSP: 002b:00007fff56d28e58 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 [ 60.046075] RAX: ffffffffffffffda RBX: 00007fff56d290a8 RCX: 00007feb2baefc47 [ 60.046102] RDX: 00007fff56d29050 RSI: 00000000c0406469 RDI: 0000000000000003 [ 60.046129] RBP: 00007fff56d29050 R08: 000055ecc4cd27d0 R09: 00007feb2bda8600 [ 60.046154] R10: 0000000000000073 R11: 0000000000000246 R12: 00000000c0406469 [ 60.046177] R13: 0000000000000003 R14: 000000000000000f R15: 0000000000000099 [ 60.046203] Object at ffff880348c9e680, in cache i915_dependency size: 64 [ 60.046225] Allocated: [ 60.046246] PID = 530 [ 60.046269] save_stack_trace+0x1b/0x20 [ 60.046292] save_stack+0x46/0xd0 [ 60.046318] kasan_kmalloc+0xad/0xe0 [ 60.046343] kasan_slab_alloc+0x12/0x20 [ 60.046368] kmem_cache_alloc+0xab/0x650 [ 60.046445] i915_gem_request_await_request+0x88/0x370 [i915] [ 60.046559] i915_gem_request_await_dma_fence+0x129/0x140 [i915] [ 60.046705] i915_gem_do_execbuffer.isra.37+0xb0a/0x26b0 [i915] [ 60.046849] i915_gem_execbuffer2+0xdb/0x2a0 [i915] [ 60.046936] drm_ioctl+0x359/0x640 [drm] [ 60.046987] do_vfs_ioctl+0x12e/0x910 [ 60.047038] SyS_ioctl+0x41/0x70 [ 60.047090] entry_SYSCALL_64_fastpath+0x17/0x98 [ 60.047139] Freed: [ 60.047179] PID = 530 [ 60.047223] save_stack_trace+0x1b/0x20 [ 60.047269] save_stack+0x46/0xd0 [ 60.047317] kasan_slab_free+0x72/0xc0 [ 60.047366] kmem_cache_free+0x39/0x160 [ 60.047512] i915_gem_request_retire+0x83f/0x930 [i915] [ 60.047657] i915_gem_request_alloc+0x166/0x600 [i915] [ 60.047799] i915_gem_do_execbuffer.isra.37+0xad8/0x26b0 [i915] [ 60.047897] i915_gem_execbuffer2+0xdb/0x2a0 [i915] [ 60.047942] drm_ioctl+0x359/0x640 [drm] [ 60.047968] do_vfs_ioctl+0x12e/0x910 [ 60.047993] SyS_ioctl+0x41/0x70 [ 60.048019] entry_SYSCALL_64_fastpath+0x17/0x98 [ 60.048044] Memory state around the buggy address: [ 60.048066] ffff880348c9e580: 00 00 00 00 00 00 00 00 fc fc fc fc fc fc fc fc [ 60.048105] ffff880348c9e600: 00 00 00 00 00 00 00 00 fc fc fc fc fc fc fc fc [ 60.048138] >ffff880348c9e680: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc [ 60.048170] ^ [ 60.048191] ffff880348c9e700: 00 00 00 00 00 00 00 00 fc fc fc fc fc fc fc fc [ 60.048225] ffff880348c9e780: 00 00 00 00 00 00 00 00 fc fc fc fc fc fc fc fc Note to hit the use-after-free requires us to be passed back a request via a fence-array, that is from explicit fencing accumulated into a sync-file fence-array. Fixes: 52e542090701 ("drm/i915/scheduler: Record all dependencies upon request construction") Testcase: igt/gem_exec_fence/expired-history Signed-off-by: Chris Wilson Reviewed-by: Michał Winiarski Reviewed-by: Joonas Lahtinen Cc: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170422081537.6468-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_request.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 095cccc..e2ec42b 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -680,6 +680,9 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to, GEM_BUG_ON(to == from); + if (i915_gem_request_completed(from)) + return 0; + if (to->engine->schedule) { ret = i915_priotree_add_dependency(to->i915, &to->priotree, -- cgit v1.1 From 695eaa3b60a0a9a2baf41241e60b589efb6479ee Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 23 Apr 2017 18:06:19 +0100 Subject: drm/i915: Include interesting seqno in the missed breadcrumb debug Knowing the neighbouring seqno (current on hw, last submitted to hw) provide some useful breadcrumbs to the debug log. Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170423170619.7156-4-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/intel_breadcrumbs.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 9ccbf26..35da592 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -64,10 +64,12 @@ static unsigned long wait_timeout(void) static noinline void missed_breadcrumb(struct intel_engine_cs *engine) { - DRM_DEBUG_DRIVER("%s missed breadcrumb at %pF, irq posted? %s\n", + DRM_DEBUG_DRIVER("%s missed breadcrumb at %pF, irq posted? %s, current seqno=%x, last=%x\n", engine->name, __builtin_return_address(0), yesno(test_bit(ENGINE_IRQ_BREADCRUMB, - &engine->irq_posted))); + &engine->irq_posted)), + intel_engine_get_seqno(engine), + intel_engine_last_submit(engine)); set_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings); } -- cgit v1.1 From 6b764a594fc4e9f1caa537728a5a8e2192e18fc6 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 25 Apr 2017 11:38:35 +0100 Subject: drm/i915: Report request restarts for both execlists/guc As we now share the execlist_port[] tracking for both execlists/guc, we can reset the inflight count on both and report which requests are being restarted. Suggested-by: Michel Thierry Signed-off-by: Chris Wilson Cc: Michel Thierry Cc: Mika Kuoppala Cc: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170425103835.31871-1-chris@chris-wilson.co.uk Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/intel_lrc.c | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 7df278f..7a5f164 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1148,14 +1148,11 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine) return ret; } -static u32 port_seqno(struct execlist_port *port) -{ - return port->request ? port->request->global_seqno : 0; -} - static int gen8_init_common_ring(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; + struct execlist_port *port = engine->execlist_port; + unsigned int n; int ret; ret = intel_mocs_init_engine(engine); @@ -1176,16 +1173,22 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine) /* After a GPU reset, we may have requests to replay */ clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); - if (!i915.enable_guc_submission && !execlists_elsp_idle(engine)) { - DRM_DEBUG_DRIVER("Restarting %s from requests [0x%x, 0x%x]\n", - engine->name, - port_seqno(&engine->execlist_port[0]), - port_seqno(&engine->execlist_port[1])); - engine->execlist_port[0].count = 0; - engine->execlist_port[1].count = 0; - execlists_submit_ports(engine); + + for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++) { + if (!port[n].request) + break; + + DRM_DEBUG_DRIVER("Restarting %s:%d from 0x%x\n", + engine->name, n, + port[n].request->global_seqno); + + /* Discard the current inflight count */ + port[n].count = 0; } + if (!i915.enable_guc_submission && !execlists_elsp_idle(engine)) + execlists_submit_ports(engine); + return 0; } -- cgit v1.1 From e6ba9992de6c63fe86c028b4876338e1cb7dac34 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 25 Apr 2017 14:00:49 +0100 Subject: drm/i915: Differentiate between sw write location into ring and last hw read We need to keep track of the last location we ask the hw to read up to (RING_TAIL) separately from our last write location into the ring, so that in the event of a GPU reset we do not tell the HW to proceed into a partially written request (which can happen if that request is waiting for an external signal before being executed). v2: Refactor intel_ring_reset() (Mika) Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100144 Testcase: igt/gem_exec_fence/await-hang Fixes: 821ed7df6e2a ("drm/i915: Update reset path to fix incomplete requests") Fixes: d55ac5bf97c6 ("drm/i915: Defer transfer onto execution timeline to actual hw submission") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170425130049.26147-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/i915_gem_request.c | 16 +++++++++--- drivers/gpu/drm/i915/i915_guc_submission.c | 4 +-- drivers/gpu/drm/i915/intel_lrc.c | 6 ++--- drivers/gpu/drm/i915/intel_ringbuffer.c | 41 ++++++++++++++++++++---------- drivers/gpu/drm/i915/intel_ringbuffer.h | 19 ++++++++++++-- 5 files changed, 59 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index e2ec42b..126cd13 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -283,10 +283,18 @@ static void advance_ring(struct drm_i915_gem_request *request) * Note this requires that we are always called in request * completion order. */ - if (list_is_last(&request->ring_link, &request->ring->request_list)) - tail = request->ring->tail; - else + if (list_is_last(&request->ring_link, &request->ring->request_list)) { + /* We may race here with execlists resubmitting this request + * as we retire it. The resubmission will move the ring->tail + * forwards (to request->wa_tail). We either read the + * current value that was written to hw, or the value that + * is just about to be. Either works, if we miss the last two + * noops - they are safe to be replayed on a reset. + */ + tail = READ_ONCE(request->ring->tail); + } else { tail = request->postfix; + } list_del(&request->ring_link); request->ring->head = tail; @@ -651,7 +659,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, * GPU processing the request, we never over-estimate the * position of the head. */ - req->head = req->ring->tail; + req->head = req->ring->emit; /* Check that we didn't interrupt ourselves with a new request */ GEM_BUG_ON(req->timeline->seqno != req->fence.seqno); diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 1642fff..ab5140b 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -480,9 +480,7 @@ static void guc_wq_item_append(struct i915_guc_client *client, GEM_BUG_ON(freespace < wqi_size); /* The GuC firmware wants the tail index in QWords, not bytes */ - tail = rq->tail; - assert_ring_tail_valid(rq->ring, rq->tail); - tail >>= 3; + tail = intel_ring_set_tail(rq->ring, rq->tail) >> 3; GEM_BUG_ON(tail > WQ_RING_TAIL_MAX); /* For now workqueue item is 4 DWs; workqueue buffer is 2 pages. So we diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 7a5f164..5ec064a 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -326,8 +326,7 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq) rq->ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt; u32 *reg_state = ce->lrc_reg_state; - assert_ring_tail_valid(rq->ring, rq->tail); - reg_state[CTX_RING_TAIL+1] = rq->tail; + reg_state[CTX_RING_TAIL+1] = intel_ring_set_tail(rq->ring, rq->tail); /* True 32b PPGTT with dynamic page allocation: update PDP * registers and point the unallocated PDPs to scratch page. @@ -2057,8 +2056,7 @@ void intel_lr_context_resume(struct drm_i915_private *dev_priv) ce->state->obj->mm.dirty = true; i915_gem_object_unpin_map(ce->state->obj); - ce->ring->head = ce->ring->tail = 0; - intel_ring_update_space(ce->ring); + intel_ring_reset(ce->ring, 0); } } } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 32afac6..227dfcf 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -49,7 +49,7 @@ static int __intel_ring_space(int head, int tail, int size) void intel_ring_update_space(struct intel_ring *ring) { - ring->space = __intel_ring_space(ring->head, ring->tail, ring->size); + ring->space = __intel_ring_space(ring->head, ring->emit, ring->size); } static int @@ -774,8 +774,8 @@ static void i9xx_submit_request(struct drm_i915_gem_request *request) i915_gem_request_submit(request); - assert_ring_tail_valid(request->ring, request->tail); - I915_WRITE_TAIL(request->engine, request->tail); + I915_WRITE_TAIL(request->engine, + intel_ring_set_tail(request->ring, request->tail)); } static void i9xx_emit_breadcrumb(struct drm_i915_gem_request *req, u32 *cs) @@ -1319,11 +1319,23 @@ err: return PTR_ERR(addr); } +void intel_ring_reset(struct intel_ring *ring, u32 tail) +{ + GEM_BUG_ON(!list_empty(&ring->request_list)); + ring->tail = tail; + ring->head = tail; + ring->emit = tail; + intel_ring_update_space(ring); +} + void intel_ring_unpin(struct intel_ring *ring) { GEM_BUG_ON(!ring->vma); GEM_BUG_ON(!ring->vaddr); + /* Discard any unused bytes beyond that submitted to hw. */ + intel_ring_reset(ring, ring->tail); + if (i915_vma_is_map_and_fenceable(ring->vma)) i915_vma_unpin_iomap(ring->vma); else @@ -1555,8 +1567,9 @@ void intel_legacy_submission_resume(struct drm_i915_private *dev_priv) struct intel_engine_cs *engine; enum intel_engine_id id; + /* Restart from the beginning of the rings for convenience */ for_each_engine(engine, dev_priv, id) - engine->buffer->head = engine->buffer->tail; + intel_ring_reset(engine->buffer, 0); } static int ring_request_alloc(struct drm_i915_gem_request *request) @@ -1609,7 +1622,7 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes) unsigned space; /* Would completion of this request free enough space? */ - space = __intel_ring_space(target->postfix, ring->tail, + space = __intel_ring_space(target->postfix, ring->emit, ring->size); if (space >= bytes) break; @@ -1634,8 +1647,8 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes) u32 *intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords) { struct intel_ring *ring = req->ring; - int remain_actual = ring->size - ring->tail; - int remain_usable = ring->effective_size - ring->tail; + int remain_actual = ring->size - ring->emit; + int remain_usable = ring->effective_size - ring->emit; int bytes = num_dwords * sizeof(u32); int total_bytes, wait_bytes; bool need_wrap = false; @@ -1671,17 +1684,17 @@ u32 *intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords) if (unlikely(need_wrap)) { GEM_BUG_ON(remain_actual > ring->space); - GEM_BUG_ON(ring->tail + remain_actual > ring->size); + GEM_BUG_ON(ring->emit + remain_actual > ring->size); /* Fill the tail with MI_NOOP */ - memset(ring->vaddr + ring->tail, 0, remain_actual); - ring->tail = 0; + memset(ring->vaddr + ring->emit, 0, remain_actual); + ring->emit = 0; ring->space -= remain_actual; } - GEM_BUG_ON(ring->tail > ring->size - bytes); - cs = ring->vaddr + ring->tail; - ring->tail += bytes; + GEM_BUG_ON(ring->emit > ring->size - bytes); + cs = ring->vaddr + ring->emit; + ring->emit += bytes; ring->space -= bytes; GEM_BUG_ON(ring->space < 0); @@ -1692,7 +1705,7 @@ u32 *intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords) int intel_ring_cacheline_align(struct drm_i915_gem_request *req) { int num_dwords = - (req->ring->tail & (CACHELINE_BYTES - 1)) / sizeof(uint32_t); + (req->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(uint32_t); u32 *cs; if (num_dwords == 0) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 00d36aa4..96710b6 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -143,6 +143,7 @@ struct intel_ring { u32 head; u32 tail; + u32 emit; int space; int size; @@ -494,6 +495,8 @@ intel_engine_create_ring(struct intel_engine_cs *engine, int size); int intel_ring_pin(struct intel_ring *ring, struct drm_i915_private *i915, unsigned int offset_bias); +void intel_ring_reset(struct intel_ring *ring, u32 tail); +void intel_ring_update_space(struct intel_ring *ring); void intel_ring_unpin(struct intel_ring *ring); void intel_ring_free(struct intel_ring *ring); @@ -517,7 +520,7 @@ intel_ring_advance(struct drm_i915_gem_request *req, u32 *cs) * reserved for the command packet (i.e. the value passed to * intel_ring_begin()). */ - GEM_BUG_ON((req->ring->vaddr + req->ring->tail) != cs); + GEM_BUG_ON((req->ring->vaddr + req->ring->emit) != cs); } static inline u32 @@ -546,7 +549,19 @@ assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail) GEM_BUG_ON(tail >= ring->size); } -void intel_ring_update_space(struct intel_ring *ring); +static inline unsigned int +intel_ring_set_tail(struct intel_ring *ring, unsigned int tail) +{ + /* Whilst writes to the tail are strictly order, there is no + * serialisation between readers and the writers. The tail may be + * read by i915_gem_request_retire() just as it is being updated + * by execlists, as although the breadcrumb is complete, the context + * switch hasn't been seen. + */ + assert_ring_tail_valid(ring, tail); + ring->tail = tail; + return tail; +} void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno); -- cgit v1.1 From 010018638659fbe8332c470e94750daf6ef4ab14 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 23 Apr 2017 18:06:17 +0100 Subject: drm/i915: Poison the request before emitting commands If we poison the request before we emit commands, it should be easier to spot when we execute an uninitialised request. References: https://bugs.freedesktop.org/show_bug.cgi?id=100144 Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170423170619.7156-2-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/intel_ringbuffer.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 227dfcf..6836efb 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1694,6 +1694,7 @@ u32 *intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords) GEM_BUG_ON(ring->emit > ring->size - bytes); cs = ring->vaddr + ring->emit; + GEM_DEBUG_EXEC(memset(cs, POISON_INUSE, bytes)); ring->emit += bytes; ring->space -= bytes; GEM_BUG_ON(ring->space < 0); -- cgit v1.1 From f7b02a529a81761b44d7fa164bd84333f3232989 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 26 Apr 2017 09:06:59 +0100 Subject: drm/i915: Skip waking the signaler when enabling before request submission If we are enabling the breadcrumbs signaling prior to submitting the request, we know that we cannot have missed the interrupt and can therefore skip immediately waking the signaler to check. This reduces a significant chunk of the __i915_gem_request_submit() overhead for inter-engine synchronisation, for example in gem_exec_whisper. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170426080659.28771-1-chris@chris-wilson.co.uk Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_gem_request.c | 4 ++-- drivers/gpu/drm/i915/i915_guc_submission.c | 2 +- drivers/gpu/drm/i915/intel_breadcrumbs.c | 7 ++++--- drivers/gpu/drm/i915/intel_ringbuffer.h | 3 ++- 4 files changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 126cd13..6198f69 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -61,7 +61,7 @@ static bool i915_fence_enable_signaling(struct dma_fence *fence) if (i915_fence_signaled(fence)) return false; - intel_engine_enable_signaling(to_request(fence)); + intel_engine_enable_signaling(to_request(fence), true); return true; } @@ -437,7 +437,7 @@ void __i915_gem_request_submit(struct drm_i915_gem_request *request) spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); request->global_seqno = seqno; if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags)) - intel_engine_enable_signaling(request); + intel_engine_enable_signaling(request, false); spin_unlock(&request->lock); engine->emit_breadcrumb(request, diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index ab5140b..4cc97bf 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -649,7 +649,7 @@ static void nested_enable_signaling(struct drm_i915_gem_request *rq) trace_dma_fence_enable_signal(&rq->fence); spin_lock_nested(&rq->lock, SINGLE_DEPTH_NESTING); - intel_engine_enable_signaling(rq); + intel_engine_enable_signaling(rq, true); spin_unlock(&rq->lock); } diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 35da592..183afcb 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -667,12 +667,13 @@ static int intel_breadcrumbs_signaler(void *arg) return 0; } -void intel_engine_enable_signaling(struct drm_i915_gem_request *request) +void intel_engine_enable_signaling(struct drm_i915_gem_request *request, + bool wakeup) { struct intel_engine_cs *engine = request->engine; struct intel_breadcrumbs *b = &engine->breadcrumbs; struct rb_node *parent, **p; - bool first, wakeup; + bool first; u32 seqno; /* Note that we may be called from an interrupt handler on another @@ -705,7 +706,7 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request) * If we are the oldest waiter, enable the irq (after which we * must double check that the seqno did not complete). */ - wakeup = __intel_engine_add_wait(engine, &request->signaling.wait); + wakeup &= __intel_engine_add_wait(engine, &request->signaling.wait); /* Now insert ourselves into the retirement ordered list of signals * on this engine. We track the oldest seqno as that will be the diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 96710b6..2506bbe 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -672,7 +672,8 @@ bool intel_engine_add_wait(struct intel_engine_cs *engine, struct intel_wait *wait); void intel_engine_remove_wait(struct intel_engine_cs *engine, struct intel_wait *wait); -void intel_engine_enable_signaling(struct drm_i915_gem_request *request); +void intel_engine_enable_signaling(struct drm_i915_gem_request *request, + bool wakeup); void intel_engine_cancel_signaling(struct drm_i915_gem_request *request); static inline bool intel_engine_has_waiter(const struct intel_engine_cs *engine) -- cgit v1.1 From 46649d8b6cb876e4f823e741d39959cf6e231e85 Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Mon, 24 Apr 2017 13:47:18 +0300 Subject: drm/i915/glk: Don't allow 12 bpc when htotal is too big Display workaround #1139 for Geminilake instructs us to restrict HDMI to 8 bpc when htotal is greater than 5460. Otherwise, the pipe is unable to generate a proper signal and is left in a state where corruption is seen with other modes. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100440 Cc: Shashank Sharma Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Shashank Sharma Link: http://patchwork.freedesktop.org/patch/msgid/20170424104718.26448-1-ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/intel_hdmi.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index 6efc3cb..52f0b2d 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -1327,6 +1327,11 @@ static bool hdmi_12bpc_possible(struct intel_crtc_state *crtc_state) return false; } + /* Display Wa #1139 */ + if (IS_GLK_REVID(dev_priv, 0, GLK_REVID_A1) && + crtc_state->base.adjusted_mode.htotal > 5460) + return false; + return true; } -- cgit v1.1 From 56306c6e932ab390530fc93936b249656301d827 Mon Sep 17 00:00:00 2001 From: Michel Thierry Date: Tue, 18 Apr 2017 13:23:16 -0700 Subject: drm/i915: Fix stale comment about I915_RESET_IN_PROGRESS flag It has been replaced by I915_RESET_BACKOFF / I915_RESET_HANDOFF. Signed-off-by: Michel Thierry Link: http://patchwork.freedesktop.org/patch/msgid/20170418202335.35232-2-michel.thierry@intel.com Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_drv.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 357b6c6..d1f7c48 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1563,7 +1563,7 @@ struct i915_gpu_error { * * This is a counter which gets incremented when reset is triggered, * - * Before the reset commences, the I915_RESET_IN_PROGRESS bit is set + * Before the reset commences, the I915_RESET_BACKOFF bit is set * meaning that any waiters holding onto the struct_mutex should * relinquish the lock immediately in order for the reset to start. * -- cgit v1.1 From e3895af8bb12612972efb1a07f0bb02b2853afda Mon Sep 17 00:00:00 2001 From: Michel Thierry Date: Tue, 18 Apr 2017 13:23:17 -0700 Subject: drm/i915: Rename gen8_(un)request_engine_reset to gen8_reset_engine_start/cancel As all other functions related to resetting engines are using reset_engine. v2: remove _request_ and use start/cancel instead (Chris) Cc: Chris Wilson Signed-off-by: Michel Thierry Link: http://patchwork.freedesktop.org/patch/msgid/20170418202335.35232-3-michel.thierry@intel.com Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/intel_uncore.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 0cd56bf..0d479c0 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -1683,7 +1683,7 @@ int intel_wait_for_register(struct drm_i915_private *dev_priv, return ret; } -static int gen8_request_engine_reset(struct intel_engine_cs *engine) +static int gen8_reset_engine_start(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; int ret; @@ -1702,7 +1702,7 @@ static int gen8_request_engine_reset(struct intel_engine_cs *engine) return ret; } -static void gen8_unrequest_engine_reset(struct intel_engine_cs *engine) +static void gen8_reset_engine_cancel(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; @@ -1717,14 +1717,14 @@ static int gen8_reset_engines(struct drm_i915_private *dev_priv, unsigned int tmp; for_each_engine_masked(engine, dev_priv, engine_mask, tmp) - if (gen8_request_engine_reset(engine)) + if (gen8_reset_engine_start(engine)) goto not_ready; return gen6_reset_engines(dev_priv, engine_mask); not_ready: for_each_engine_masked(engine, dev_priv, engine_mask, tmp) - gen8_unrequest_engine_reset(engine); + gen8_reset_engine_cancel(engine); return -EIO; } -- cgit v1.1 From 3204c343bb691c42a5e568cbd2a9ec9b2b5703c0 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 27 Apr 2017 11:46:51 +0100 Subject: drm/i915: Defer context state allocation for legacy ring submission Almost from the outset for execlists, we used deferred allocation of the logical context and rings. Then we ported the infrastructure for pinning contexts back to legacy, and so now we are able to also implement deferred allocation for context objects prior to first use on the legacy submission. v2: We still need to differentiate between legacy engines, Joonas is fixing that but I want this first ;) (Joonas) Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Tvrtko Ursulin Cc: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170427104651.22394-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem_context.c | 59 --------------------------------- drivers/gpu/drm/i915/intel_ringbuffer.c | 50 ++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 59 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 8bd0c49..d46a69d 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -151,45 +151,6 @@ void i915_gem_context_free(struct kref *ctx_ref) kfree(ctx); } -static struct drm_i915_gem_object * -alloc_context_obj(struct drm_i915_private *dev_priv, u64 size) -{ - struct drm_i915_gem_object *obj; - int ret; - - lockdep_assert_held(&dev_priv->drm.struct_mutex); - - obj = i915_gem_object_create(dev_priv, size); - if (IS_ERR(obj)) - return obj; - - /* - * Try to make the context utilize L3 as well as LLC. - * - * On VLV we don't have L3 controls in the PTEs so we - * shouldn't touch the cache level, especially as that - * would make the object snooped which might have a - * negative performance impact. - * - * Snooping is required on non-llc platforms in execlist - * mode, but since all GGTT accesses use PAT entry 0 we - * get snooping anyway regardless of cache_level. - * - * This is only applicable for Ivy Bridge devices since - * later platforms don't have L3 control bits in the PTE. - */ - if (IS_IVYBRIDGE(dev_priv)) { - ret = i915_gem_object_set_cache_level(obj, I915_CACHE_L3_LLC); - /* Failure shouldn't ever happen this early */ - if (WARN_ON(ret)) { - i915_gem_object_put(obj); - return ERR_PTR(ret); - } - } - - return obj; -} - static void context_close(struct i915_gem_context *ctx) { i915_gem_context_set_closed(ctx); @@ -266,26 +227,6 @@ __create_hw_context(struct drm_i915_private *dev_priv, list_add_tail(&ctx->link, &dev_priv->context_list); ctx->i915 = dev_priv; - if (dev_priv->hw_context_size) { - struct drm_i915_gem_object *obj; - struct i915_vma *vma; - - obj = alloc_context_obj(dev_priv, dev_priv->hw_context_size); - if (IS_ERR(obj)) { - ret = PTR_ERR(obj); - goto err_out; - } - - vma = i915_vma_instance(obj, &dev_priv->ggtt.base, NULL); - if (IS_ERR(vma)) { - i915_gem_object_put(obj); - ret = PTR_ERR(vma); - goto err_out; - } - - ctx->engine[RCS].state = vma; - } - /* Default context will never have a file_priv */ ret = DEFAULT_CONTEXT_HANDLE; if (file_priv) { diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 6836efb..61f6124 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1437,6 +1437,44 @@ static int context_pin(struct i915_gem_context *ctx) PIN_GLOBAL | PIN_HIGH); } +static struct i915_vma * +alloc_context_vma(struct intel_engine_cs *engine) +{ + struct drm_i915_private *i915 = engine->i915; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + + obj = i915_gem_object_create(i915, i915->hw_context_size); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + /* + * Try to make the context utilize L3 as well as LLC. + * + * On VLV we don't have L3 controls in the PTEs so we + * shouldn't touch the cache level, especially as that + * would make the object snooped which might have a + * negative performance impact. + * + * Snooping is required on non-llc platforms in execlist + * mode, but since all GGTT accesses use PAT entry 0 we + * get snooping anyway regardless of cache_level. + * + * This is only applicable for Ivy Bridge devices since + * later platforms don't have L3 control bits in the PTE. + */ + if (IS_IVYBRIDGE(i915)) { + /* Ignore any error, regard it as a simple optimisation */ + i915_gem_object_set_cache_level(obj, I915_CACHE_L3_LLC); + } + + vma = i915_vma_instance(obj, &i915->ggtt.base, NULL); + if (IS_ERR(vma)) + i915_gem_object_put(obj); + + return vma; +} + static int intel_ring_context_pin(struct intel_engine_cs *engine, struct i915_gem_context *ctx) { @@ -1449,6 +1487,18 @@ static int intel_ring_context_pin(struct intel_engine_cs *engine, return 0; GEM_BUG_ON(!ce->pin_count); /* no overflow please! */ + if (engine->id == RCS && !ce->state && engine->i915->hw_context_size) { + struct i915_vma *vma; + + vma = alloc_context_vma(engine); + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); + goto error; + } + + ce->state = vma; + } + if (ce->state) { ret = context_pin(ctx); if (ret) -- cgit v1.1 From a3662830e1e120e9950072a48d75a61ed921ad4a Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Wed, 12 Apr 2017 16:48:42 +0100 Subject: drm/i915/guc: Fix sleep under spinlock during reset Looks like intel_guc_reset had the ability to sleep under the uncore spinlock since forever but it wasn't detected until the recent changes annotated the wait for register with might_sleep. I have fixed it by removing holding of the uncore spinlock over the call to gen6_hw_domain_reset, since I do not see that is really needed. But there is always a possibility I am missing some nasty detail so please double check. Signed-off-by: Tvrtko Ursulin Cc: Chris Wilson Cc: Michal Wajdeczko Cc: Arkadiusz Hiler Cc: Joonas Lahtinen Cc: Oscar Mateo Acked-by: Michel Thierry Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/intel_uncore.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 0d479c0..07a722f 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -1532,7 +1532,7 @@ static int gen6_hw_domain_reset(struct drm_i915_private *dev_priv, */ __raw_i915_write32(dev_priv, GEN6_GDRST, hw_domain_mask); - /* Spin waiting for the device to ack the reset requests */ + /* Wait for the device to ack the reset requests */ return intel_wait_for_register_fw(dev_priv, GEN6_GDRST, hw_domain_mask, 0, 500); @@ -1779,17 +1779,12 @@ bool intel_has_gpu_reset(struct drm_i915_private *dev_priv) int intel_guc_reset(struct drm_i915_private *dev_priv) { int ret; - unsigned long irqflags; if (!HAS_GUC(dev_priv)) return -EINVAL; intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); - spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); - ret = gen6_hw_domain_reset(dev_priv, GEN9_GRDOM_GUC); - - spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); return ret; -- cgit v1.1 From 63ffbcdadcf2b5dde2cd6db6715fc94e77cd43b6 Mon Sep 17 00:00:00 2001 From: Joonas Lahtinen Date: Fri, 28 Apr 2017 10:53:36 +0300 Subject: drm/i915: Sanitize engine context sizes Pre-calculate engine context size based on engine class and device generation and store it in the engine instance. v2: - Squash and get rid of hw_context_size (Chris) v3: - Move after MMIO init for probing on Gen7 and 8 (Chris) - Retained rounding (Tvrtko) v4: - Rebase for deferred legacy context allocation Signed-off-by: Joonas Lahtinen Cc: Paulo Zanoni Cc: Rodrigo Vivi Cc: Chris Wilson Cc: Daniele Ceraolo Spurio Cc: Tvrtko Ursulin Cc: Oscar Mateo Cc: Zhenyu Wang Cc: intel-gvt-dev@lists.freedesktop.org Acked-by: Tvrtko Ursulin Cc: Tvrtko Ursulin Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/gvt/scheduler.c | 6 +- drivers/gpu/drm/i915/i915_drv.c | 15 +++-- drivers/gpu/drm/i915/i915_drv.h | 3 +- drivers/gpu/drm/i915/i915_gem_context.c | 56 ++----------------- drivers/gpu/drm/i915/i915_guc_submission.c | 3 +- drivers/gpu/drm/i915/i915_reg.h | 10 ---- drivers/gpu/drm/i915/intel_engine_cs.c | 90 +++++++++++++++++++++++++++++- drivers/gpu/drm/i915/intel_lrc.c | 54 +----------------- drivers/gpu/drm/i915/intel_lrc.h | 2 - drivers/gpu/drm/i915/intel_ringbuffer.c | 4 +- drivers/gpu/drm/i915/intel_ringbuffer.h | 7 ++- 11 files changed, 112 insertions(+), 138 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index a77db23..ac538dc 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -69,8 +69,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) gvt_dbg_sched("ring id %d workload lrca %x", ring_id, workload->ctx_desc.lrca); - context_page_num = intel_lr_context_size( - gvt->dev_priv->engine[ring_id]); + context_page_num = gvt->dev_priv->engine[ring_id]->context_size; context_page_num = context_page_num >> PAGE_SHIFT; @@ -333,8 +332,7 @@ static void update_guest_context(struct intel_vgpu_workload *workload) gvt_dbg_sched("ring id %d workload lrca %x\n", ring_id, workload->ctx_desc.lrca); - context_page_num = intel_lr_context_size( - gvt->dev_priv->engine[ring_id]); + context_page_num = gvt->dev_priv->engine[ring_id]->context_size; context_page_num = context_page_num >> PAGE_SHIFT; diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index c7d68e7..2d3c4264 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -835,10 +835,6 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv, intel_uc_init_early(dev_priv); i915_memcpy_init_early(dev_priv); - ret = intel_engines_init_early(dev_priv); - if (ret) - return ret; - ret = i915_workqueues_init(dev_priv); if (ret < 0) goto err_engines; @@ -948,14 +944,21 @@ static int i915_driver_init_mmio(struct drm_i915_private *dev_priv) ret = i915_mmio_setup(dev_priv); if (ret < 0) - goto put_bridge; + goto err_bridge; intel_uncore_init(dev_priv); + + ret = intel_engines_init_mmio(dev_priv); + if (ret) + goto err_uncore; + i915_gem_init_mmio(dev_priv); return 0; -put_bridge: +err_uncore: + intel_uncore_fini(dev_priv); +err_bridge: pci_dev_put(dev_priv->bridge_dev); return ret; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index d1f7c48..e68edf1 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2359,7 +2359,6 @@ struct drm_i915_private { */ struct mutex av_mutex; - uint32_t hw_context_size; struct list_head context_list; u32 fdi_rx_config; @@ -3023,7 +3022,7 @@ extern unsigned long i915_gfx_val(struct drm_i915_private *dev_priv); extern void i915_update_gfx_val(struct drm_i915_private *dev_priv); int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool on); -int intel_engines_init_early(struct drm_i915_private *dev_priv); +int intel_engines_init_mmio(struct drm_i915_private *dev_priv); int intel_engines_init(struct drm_i915_private *dev_priv); /* intel_hotplug.c */ diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index d46a69d..31a73c3 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -92,33 +92,6 @@ #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1 -static int get_context_size(struct drm_i915_private *dev_priv) -{ - int ret; - u32 reg; - - switch (INTEL_GEN(dev_priv)) { - case 6: - reg = I915_READ(CXT_SIZE); - ret = GEN6_CXT_TOTAL_SIZE(reg) * 64; - break; - case 7: - reg = I915_READ(GEN7_CXT_SIZE); - if (IS_HASWELL(dev_priv)) - ret = HSW_CXT_TOTAL_SIZE; - else - ret = GEN7_CXT_TOTAL_SIZE(reg) * 64; - break; - case 8: - ret = GEN8_CXT_TOTAL_SIZE; - break; - default: - BUG(); - } - - return ret; -} - void i915_gem_context_free(struct kref *ctx_ref) { struct i915_gem_context *ctx = container_of(ctx_ref, typeof(*ctx), ref); @@ -384,21 +357,6 @@ int i915_gem_context_init(struct drm_i915_private *dev_priv) BUILD_BUG_ON(MAX_CONTEXT_HW_ID > INT_MAX); ida_init(&dev_priv->context_hw_ida); - if (i915.enable_execlists) { - /* NB: intentionally left blank. We will allocate our own - * backing objects as we need them, thank you very much */ - dev_priv->hw_context_size = 0; - } else if (HAS_HW_CONTEXTS(dev_priv)) { - dev_priv->hw_context_size = - round_up(get_context_size(dev_priv), - I915_GTT_PAGE_SIZE); - if (dev_priv->hw_context_size > (1<<20)) { - DRM_DEBUG_DRIVER("Disabling HW Contexts; invalid size %d\n", - dev_priv->hw_context_size); - dev_priv->hw_context_size = 0; - } - } - ctx = i915_gem_create_context(dev_priv, NULL); if (IS_ERR(ctx)) { DRM_ERROR("Failed to create default global context (error %ld)\n", @@ -418,8 +376,8 @@ int i915_gem_context_init(struct drm_i915_private *dev_priv) GEM_BUG_ON(!i915_gem_context_is_kernel(ctx)); DRM_DEBUG_DRIVER("%s context support initialized\n", - i915.enable_execlists ? "LR" : - dev_priv->hw_context_size ? "HW" : "fake"); + dev_priv->engine[RCS]->context_size ? "logical" : + "fake"); return 0; } @@ -882,11 +840,6 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv) return 0; } -static bool contexts_enabled(struct drm_device *dev) -{ - return i915.enable_execlists || to_i915(dev)->hw_context_size; -} - static bool client_is_banned(struct drm_i915_file_private *file_priv) { return file_priv->context_bans > I915_MAX_CLIENT_CONTEXT_BANS; @@ -895,12 +848,13 @@ static bool client_is_banned(struct drm_i915_file_private *file_priv) int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { + struct drm_i915_private *dev_priv = to_i915(dev); struct drm_i915_gem_context_create *args = data; struct drm_i915_file_private *file_priv = file->driver_priv; struct i915_gem_context *ctx; int ret; - if (!contexts_enabled(dev)) + if (!dev_priv->engine[RCS]->context_size) return -ENODEV; if (args->pad != 0) @@ -918,7 +872,7 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, if (ret) return ret; - ctx = i915_gem_create_context(to_i915(dev), file_priv); + ctx = i915_gem_create_context(dev_priv, file_priv); mutex_unlock(&dev->struct_mutex); if (IS_ERR(ctx)) return PTR_ERR(ctx); diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 4cc97bf..7e85b5a 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -1051,8 +1051,7 @@ static int guc_ads_create(struct intel_guc *guc) dev_priv->engine[RCS]->status_page.ggtt_offset; for_each_engine(engine, dev_priv, id) - blob->ads.eng_state_size[engine->guc_id] = - intel_lr_context_size(engine); + blob->ads.eng_state_size[engine->guc_id] = engine->context_size; base = guc_ggtt_offset(vma); blob->ads.scheduler_policies = base + ptr_offset(blob, policies); diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 4c72ada..ee8170c 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -3370,16 +3370,6 @@ enum skl_disp_power_wells { #define GEN7_CXT_VFSTATE_SIZE(ctx_reg) (((ctx_reg) >> 0) & 0x3f) #define GEN7_CXT_TOTAL_SIZE(ctx_reg) (GEN7_CXT_EXTENDED_SIZE(ctx_reg) + \ GEN7_CXT_VFSTATE_SIZE(ctx_reg)) -/* Haswell does have the CXT_SIZE register however it does not appear to be - * valid. Now, docs explain in dwords what is in the context object. The full - * size is 70720 bytes, however, the power context and execlist context will - * never be saved (power context is stored elsewhere, and execlists don't work - * on HSW) - so the final size, including the extra state required for the - * Resource Streamer, is 66944 bytes, which rounds to 17 pages. - */ -#define HSW_CXT_TOTAL_SIZE (17 * PAGE_SIZE) -/* Same as Haswell, but 72064 bytes now. */ -#define GEN8_CXT_TOTAL_SIZE (18 * PAGE_SIZE) enum { INTEL_ADVANCED_CONTEXT = 0, diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 82a274b..6d3d838 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -26,6 +26,22 @@ #include "intel_ringbuffer.h" #include "intel_lrc.h" +/* Haswell does have the CXT_SIZE register however it does not appear to be + * valid. Now, docs explain in dwords what is in the context object. The full + * size is 70720 bytes, however, the power context and execlist context will + * never be saved (power context is stored elsewhere, and execlists don't work + * on HSW) - so the final size, including the extra state required for the + * Resource Streamer, is 66944 bytes, which rounds to 17 pages. + */ +#define HSW_CXT_TOTAL_SIZE (17 * PAGE_SIZE) +/* Same as Haswell, but 72064 bytes now. */ +#define GEN8_CXT_TOTAL_SIZE (18 * PAGE_SIZE) + +#define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE) +#define GEN9_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE) + +#define GEN8_LR_CONTEXT_OTHER_SIZE ( 2 * PAGE_SIZE) + struct engine_class_info { const char *name; int (*init_legacy)(struct intel_engine_cs *engine); @@ -107,6 +123,69 @@ static const struct engine_info intel_engines[] = { }, }; +/** + * ___intel_engine_context_size() - return the size of the context for an engine + * @dev_priv: i915 device private + * @class: engine class + * + * Each engine class may require a different amount of space for a context + * image. + * + * Return: size (in bytes) of an engine class specific context image + * + * Note: this size includes the HWSP, which is part of the context image + * in LRC mode, but does not include the "shared data page" used with + * GuC submission. The caller should account for this if using the GuC. + */ +static u32 +__intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class) +{ + u32 cxt_size; + + BUILD_BUG_ON(I915_GTT_PAGE_SIZE != PAGE_SIZE); + + switch (class) { + case RENDER_CLASS: + switch (INTEL_GEN(dev_priv)) { + default: + MISSING_CASE(INTEL_GEN(dev_priv)); + case 9: + return GEN9_LR_CONTEXT_RENDER_SIZE; + case 8: + return i915.enable_execlists ? + GEN8_LR_CONTEXT_RENDER_SIZE : + GEN8_CXT_TOTAL_SIZE; + case 7: + if (IS_HASWELL(dev_priv)) + return HSW_CXT_TOTAL_SIZE; + + cxt_size = I915_READ(GEN7_CXT_SIZE); + return round_up(GEN7_CXT_TOTAL_SIZE(cxt_size) * 64, + PAGE_SIZE); + case 6: + cxt_size = I915_READ(CXT_SIZE); + return round_up(GEN6_CXT_TOTAL_SIZE(cxt_size) * 64, + PAGE_SIZE); + case 5: + case 4: + case 3: + case 2: + /* For the special day when i810 gets merged. */ + case 1: + return 0; + } + break; + default: + MISSING_CASE(class); + case VIDEO_DECODE_CLASS: + case VIDEO_ENHANCEMENT_CLASS: + case COPY_ENGINE_CLASS: + if (INTEL_GEN(dev_priv) < 8) + return 0; + return GEN8_LR_CONTEXT_OTHER_SIZE; + } +} + static int intel_engine_setup(struct drm_i915_private *dev_priv, enum intel_engine_id id) @@ -135,6 +214,11 @@ intel_engine_setup(struct drm_i915_private *dev_priv, engine->class = info->class; engine->instance = info->instance; + engine->context_size = __intel_engine_context_size(dev_priv, + engine->class); + if (WARN_ON(engine->context_size > BIT(20))) + engine->context_size = 0; + /* Nothing to do here, execute in order of dependencies */ engine->schedule = NULL; @@ -145,12 +229,12 @@ intel_engine_setup(struct drm_i915_private *dev_priv, } /** - * intel_engines_init_early() - allocate the Engine Command Streamers + * intel_engines_init_mmio() - allocate and prepare the Engine Command Streamers * @dev_priv: i915 device private * * Return: non-zero if the initialization failed. */ -int intel_engines_init_early(struct drm_i915_private *dev_priv) +int intel_engines_init_mmio(struct drm_i915_private *dev_priv) { struct intel_device_info *device_info = mkwrite_device_info(dev_priv); const unsigned int ring_mask = INTEL_INFO(dev_priv)->ring_mask; @@ -200,7 +284,7 @@ cleanup: } /** - * intel_engines_init() - allocate, populate and init the Engine Command Streamers + * intel_engines_init() - init the Engine Command Streamers * @dev_priv: i915 device private * * Return: non-zero if the initialization failed. diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 5ec064a..0909549 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -138,10 +138,6 @@ #include "i915_drv.h" #include "intel_mocs.h" -#define GEN9_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE) -#define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE) -#define GEN8_LR_CONTEXT_OTHER_SIZE (2 * PAGE_SIZE) - #define RING_EXECLIST_QFULL (1 << 0x2) #define RING_EXECLIST1_VALID (1 << 0x3) #define RING_EXECLIST0_VALID (1 << 0x4) @@ -1918,53 +1914,6 @@ populate_lr_context(struct i915_gem_context *ctx, return 0; } -/** - * intel_lr_context_size() - return the size of the context for an engine - * @engine: which engine to find the context size for - * - * Each engine may require a different amount of space for a context image, - * so when allocating (or copying) an image, this function can be used to - * find the right size for the specific engine. - * - * Return: size (in bytes) of an engine-specific context image - * - * Note: this size includes the HWSP, which is part of the context image - * in LRC mode, but does not include the "shared data page" used with - * GuC submission. The caller should account for this if using the GuC. - */ -uint32_t intel_lr_context_size(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - int ret; - - WARN_ON(INTEL_GEN(dev_priv) < 8); - - switch (engine->class) { - case RENDER_CLASS: - switch (INTEL_GEN(dev_priv)) { - default: - MISSING_CASE(INTEL_GEN(dev_priv)); - case 9: - ret = GEN9_LR_CONTEXT_RENDER_SIZE; - break; - case 8: - ret = GEN8_LR_CONTEXT_RENDER_SIZE; - break; - } - break; - - default: - MISSING_CASE(engine->class); - case VIDEO_DECODE_CLASS: - case VIDEO_ENHANCEMENT_CLASS: - case COPY_ENGINE_CLASS: - ret = GEN8_LR_CONTEXT_OTHER_SIZE; - break; - } - - return ret; -} - static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, struct intel_engine_cs *engine) { @@ -1977,8 +1926,7 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, WARN_ON(ce->state); - context_size = round_up(intel_lr_context_size(engine), - I915_GTT_PAGE_SIZE); + context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE); /* One extra page as the sharing data between driver and GuC */ context_size += PAGE_SIZE * LRC_PPHWSP_PN; diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index e8015e7..52b3a1f 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -78,8 +78,6 @@ int logical_xcs_ring_init(struct intel_engine_cs *engine); struct drm_i915_private; struct i915_gem_context; -uint32_t intel_lr_context_size(struct intel_engine_cs *engine); - void intel_lr_context_resume(struct drm_i915_private *dev_priv); uint64_t intel_lr_context_descriptor(struct i915_gem_context *ctx, struct intel_engine_cs *engine); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 61f6124..29b5afa 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1444,7 +1444,7 @@ alloc_context_vma(struct intel_engine_cs *engine) struct drm_i915_gem_object *obj; struct i915_vma *vma; - obj = i915_gem_object_create(i915, i915->hw_context_size); + obj = i915_gem_object_create(i915, engine->context_size); if (IS_ERR(obj)) return ERR_CAST(obj); @@ -1487,7 +1487,7 @@ static int intel_ring_context_pin(struct intel_engine_cs *engine, return 0; GEM_BUG_ON(!ce->pin_count); /* no overflow please! */ - if (engine->id == RCS && !ce->state && engine->i915->hw_context_size) { + if (!ce->state && engine->context_size) { struct i915_vma *vma; vma = alloc_context_vma(engine); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 2506bbe..02d741e 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -196,13 +196,14 @@ struct intel_engine_cs { enum intel_engine_id id; unsigned int uabi_id; unsigned int hw_id; + unsigned int guc_id; u8 class; u8 instance; - - unsigned int guc_id; - u32 mmio_base; + u32 context_size; + u32 mmio_base; unsigned int irq_shift; + struct intel_ring *buffer; struct intel_timeline *timeline; -- cgit v1.1 From f2e4d76ec2b16e8edcbcf6703f4a64c0ce8178b8 Mon Sep 17 00:00:00 2001 From: Joonas Lahtinen Date: Fri, 28 Apr 2017 10:53:37 +0300 Subject: drm/i915: Eliminate HAS_HW_CONTEXTS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit HAS_HW_CONTEXTS is misleading condition for GPU reset and CCID, replace it with Gen specific (to be updated in next patches). HAS_HW_CONTEXTS in i915_l3_write is bogus because each HAS_L3_DPF match also has .has_hw_contexts = 1 set. This leads to us being able to get rid of the property completely. v2: - Keep the checks at Gen6 for no functional change (Ville) Signed-off-by: Joonas Lahtinen Cc: Chris Wilson Cc: Tvrtko Ursulin Cc: Mika Kuoppala Reviewed-by: Chris Wilson Cc: Ville Syrjälä --- drivers/gpu/drm/i915/i915_drv.h | 2 -- drivers/gpu/drm/i915/i915_gem.c | 2 +- drivers/gpu/drm/i915/i915_gpu_error.c | 6 +++--- drivers/gpu/drm/i915/i915_pci.c | 5 ----- drivers/gpu/drm/i915/i915_sysfs.c | 3 --- 5 files changed, 4 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index e68edf1..cfa5689 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -822,7 +822,6 @@ struct intel_csr { func(has_gmch_display); \ func(has_guc); \ func(has_hotplug); \ - func(has_hw_contexts); \ func(has_l3_dpf); \ func(has_llc); \ func(has_logical_ring_contexts); \ @@ -2866,7 +2865,6 @@ intel_info(const struct drm_i915_private *dev_priv) #define HWS_NEEDS_PHYSICAL(dev_priv) ((dev_priv)->info.hws_needs_physical) -#define HAS_HW_CONTEXTS(dev_priv) ((dev_priv)->info.has_hw_contexts) #define HAS_LOGICAL_RING_CONTEXTS(dev_priv) \ ((dev_priv)->info.has_logical_ring_contexts) #define USES_PPGTT(dev_priv) (i915.enable_ppgtt) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 33fb11c..ea34a32 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4488,7 +4488,7 @@ void i915_gem_sanitize(struct drm_i915_private *i915) * of the reset, so we only reset recent machines with logical * context support (that must be reset to remove any stray contexts). */ - if (HAS_HW_CONTEXTS(i915)) { + if (INTEL_GEN(i915) >= 6) { int reset = intel_gpu_reset(i915, ALL_ENGINES); WARN_ON(reset && reset != -ENODEV); } diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 4b247b0..fbdd06c 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1598,6 +1598,9 @@ static void i915_capture_reg_state(struct drm_i915_private *dev_priv, error->done_reg = I915_READ(DONE_REG); } + if (INTEL_GEN(dev_priv) >= 6) + error->ccid = I915_READ(CCID); + /* 3: Feature specific registers */ if (IS_GEN6(dev_priv) || IS_GEN7(dev_priv)) { error->gam_ecochk = I915_READ(GAM_ECOCHK); @@ -1605,9 +1608,6 @@ static void i915_capture_reg_state(struct drm_i915_private *dev_priv, } /* 4: Everything else */ - if (HAS_HW_CONTEXTS(dev_priv)) - error->ccid = I915_READ(CCID); - if (INTEL_GEN(dev_priv) >= 8) { error->ier = I915_READ(GEN8_DE_MISC_IER); for (i = 0; i < 4; i++) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index f87b0c4..f80db2c 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -220,7 +220,6 @@ static const struct intel_device_info intel_ironlake_m_info = { .has_rc6 = 1, \ .has_rc6p = 1, \ .has_gmbus_irq = 1, \ - .has_hw_contexts = 1, \ .has_aliasing_ppgtt = 1, \ GEN_DEFAULT_PIPEOFFSETS, \ CURSOR_OFFSETS @@ -245,7 +244,6 @@ static const struct intel_device_info intel_sandybridge_m_info = { .has_rc6 = 1, \ .has_rc6p = 1, \ .has_gmbus_irq = 1, \ - .has_hw_contexts = 1, \ .has_aliasing_ppgtt = 1, \ .has_full_ppgtt = 1, \ GEN_DEFAULT_PIPEOFFSETS, \ @@ -280,7 +278,6 @@ static const struct intel_device_info intel_valleyview_info = { .has_runtime_pm = 1, .has_rc6 = 1, .has_gmbus_irq = 1, - .has_hw_contexts = 1, .has_gmch_display = 1, .has_hotplug = 1, .has_aliasing_ppgtt = 1, @@ -340,7 +337,6 @@ static const struct intel_device_info intel_cherryview_info = { .has_resource_streamer = 1, .has_rc6 = 1, .has_gmbus_irq = 1, - .has_hw_contexts = 1, .has_logical_ring_contexts = 1, .has_gmch_display = 1, .has_aliasing_ppgtt = 1, @@ -387,7 +383,6 @@ static const struct intel_device_info intel_skylake_gt3_info = { .has_rc6 = 1, \ .has_dp_mst = 1, \ .has_gmbus_irq = 1, \ - .has_hw_contexts = 1, \ .has_logical_ring_contexts = 1, \ .has_guc = 1, \ .has_decoupled_mmio = 1, \ diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index f3fdfda..a6ad1c2 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -185,9 +185,6 @@ i915_l3_write(struct file *filp, struct kobject *kobj, int slice = (int)(uintptr_t)attr->private; int ret; - if (!HAS_HW_CONTEXTS(dev_priv)) - return -ENXIO; - ret = l3_access_valid(dev_priv, offset); if (ret) return ret; -- cgit v1.1 From ea117b8ddc9fd36f7aa96df920e9ff9ff5232671 Mon Sep 17 00:00:00 2001 From: Joonas Lahtinen Date: Fri, 28 Apr 2017 10:53:38 +0300 Subject: drm/i915: Reset ILK during GEM sanitization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ILK should survive a reset without display corruption. Suggested-by: Chris Wilson Signed-off-by: Joonas Lahtinen Cc: Chris Wilson Cc: Ville Syrjälä Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index ea34a32..f5f6057 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4485,10 +4485,9 @@ void i915_gem_sanitize(struct drm_i915_private *i915) * try to take over. The only way to remove the earlier state * is by resetting. However, resetting on earlier gen is tricky as * it may impact the display and we are uncertain about the stability - * of the reset, so we only reset recent machines with logical - * context support (that must be reset to remove any stray contexts). + * of the reset, so this could be applied to even earlier gen. */ - if (INTEL_GEN(i915) >= 6) { + if (INTEL_GEN(i915) >= 5) { int reset = intel_gpu_reset(i915, ALL_ENGINES); WARN_ON(reset && reset != -ENODEV); } -- cgit v1.1 From 5de92320409c1f33cb3119551c25bdfcd8318186 Mon Sep 17 00:00:00 2001 From: Joonas Lahtinen Date: Fri, 28 Apr 2017 10:53:39 +0300 Subject: drm/i915: Capture CCID on ILK MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CCID register existed already on ILK according to the PRM (Chris verified the address to match too). Signed-off-by: Joonas Lahtinen Cc: Chris Wilson Cc: Ville Syrjälä Reviewed-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/1493366019-18380-1-git-send-email-joonas.lahtinen@linux.intel.com --- drivers/gpu/drm/i915/i915_gpu_error.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index fbdd06c..ec526d9 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1598,7 +1598,7 @@ static void i915_capture_reg_state(struct drm_i915_private *dev_priv, error->done_reg = I915_READ(DONE_REG); } - if (INTEL_GEN(dev_priv) >= 6) + if (INTEL_GEN(dev_priv) >= 5) error->ccid = I915_READ(CCID); /* 3: Feature specific registers */ -- cgit v1.1 From 3acbec03b3c51559d01c879e9564d9c9610fe8ce Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Fri, 28 Apr 2017 11:02:22 +0300 Subject: drm/i915/glk: Fix DSI "*ERROR* ULPS is still active" messages The sequence in glk_dsi_device_ready() enters ULPS then waits until it is *not* active to then disable it. The correct sequence according to the spec is to enter ULPS then wait until the GLK_ULPS_NOT_ACTIVE bit is zero, i.e., ULPS is active, and then disable ULPS. Fixing the condition gets rid of the following spurious error messages: [drm:glk_dsi_device_ready [i915]] *ERROR* ULPS is still active Fixes: 4644848369c0 ("drm/i915/glk: Add MIPIIO Enable/disable sequence") Cc: Deepak M Cc: Madhav Chauhan Cc: Jani Nikula Cc: Daniel Vetter Cc: Jani Nikula Cc: intel-gfx@lists.freedesktop.org Cc: Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Madhav Chauhan Link: http://patchwork.freedesktop.org/patch/msgid/20170428080222.6147-1-ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/intel_dsi.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index 3ffe8b1..fc0ef49 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -410,11 +410,10 @@ static void glk_dsi_device_ready(struct intel_encoder *encoder) val |= (ULPS_STATE_ENTER | DEVICE_READY); I915_WRITE(MIPI_DEVICE_READY(port), val); - /* Wait for ULPS Not active */ + /* Wait for ULPS active */ if (intel_wait_for_register(dev_priv, - MIPI_CTRL(port), GLK_ULPS_NOT_ACTIVE, - GLK_ULPS_NOT_ACTIVE, 20)) - DRM_ERROR("ULPS is still active\n"); + MIPI_CTRL(port), GLK_ULPS_NOT_ACTIVE, 0, 20)) + DRM_ERROR("ULPS not active\n"); /* Exit ULPS */ val = I915_READ(MIPI_DEVICE_READY(port)); -- cgit v1.1 From a021880f78e6ab3dd6149c92725b84833002af1b Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 2 May 2017 09:29:47 +0200 Subject: drm/i915: Update DRIVER_DATE to 20170502 Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index cfa5689..720fa3e 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -79,8 +79,8 @@ #define DRIVER_NAME "i915" #define DRIVER_DESC "Intel Graphics" -#define DRIVER_DATE "20170418" -#define DRIVER_TIMESTAMP 1492507096 +#define DRIVER_DATE "20170502" +#define DRIVER_TIMESTAMP 1493710187 /* Use I915_STATE_WARN(x) and I915_STATE_WARN_ON() (rather than WARN() and * WARN_ON()) for hw state sanity checks to check for unexpected conditions -- cgit v1.1 From cefcff8f93c3306bfd331a2eaa363263196fdf73 Mon Sep 17 00:00:00 2001 From: Joonas Lahtinen Date: Fri, 28 Apr 2017 10:58:39 +0300 Subject: drm/i915: Do not leak dev_priv->l3_parity.remap_info[] Add intel_irq_fini() for placing the deinitialization code, starting with freeing dev_priv->l3_parity.remap_info[]. Signed-off-by: Joonas Lahtinen Cc: Chris Wilson Cc: Tvrtko Ursulin Cc: Mika Kuoppala Reviewed-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/1493366319-18515-1-git-send-email-joonas.lahtinen@linux.intel.com --- drivers/gpu/drm/i915/i915_drv.c | 6 ++++-- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_irq.c | 20 +++++++++++++++++++- drivers/gpu/drm/i915/i915_sysfs.c | 23 ++++++++++++----------- 4 files changed, 36 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 2d3c4264..452c265 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -852,7 +852,7 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv, intel_init_audio_hooks(dev_priv); ret = i915_gem_load_init(dev_priv); if (ret < 0) - goto err_workqueues; + goto err_irq; intel_display_crc_init(dev_priv); @@ -864,7 +864,8 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv, return 0; -err_workqueues: +err_irq: + intel_irq_fini(dev_priv); i915_workqueues_cleanup(dev_priv); err_engines: i915_engines_cleanup(dev_priv); @@ -879,6 +880,7 @@ static void i915_driver_cleanup_early(struct drm_i915_private *dev_priv) { i915_perf_fini(dev_priv); i915_gem_load_cleanup(dev_priv); + intel_irq_fini(dev_priv); i915_workqueues_cleanup(dev_priv); i915_engines_cleanup(dev_priv); } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 720fa3e..4588b3e 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3057,6 +3057,7 @@ void i915_handle_error(struct drm_i915_private *dev_priv, const char *fmt, ...); extern void intel_irq_init(struct drm_i915_private *dev_priv); +extern void intel_irq_fini(struct drm_i915_private *dev_priv); int intel_irq_install(struct drm_i915_private *dev_priv); void intel_irq_uninstall(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index fd97fe0..0e4dcbeb 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1236,7 +1236,7 @@ out: static void ivybridge_parity_work(struct work_struct *work) { struct drm_i915_private *dev_priv = - container_of(work, struct drm_i915_private, l3_parity.error_work); + container_of(work, typeof(*dev_priv), l3_parity.error_work); u32 error_status, row, bank, subbank; char *parity_event[6]; uint32_t misccpctl; @@ -4233,11 +4233,15 @@ static void i965_irq_uninstall(struct drm_device * dev) void intel_irq_init(struct drm_i915_private *dev_priv) { struct drm_device *dev = &dev_priv->drm; + int i; intel_hpd_init_work(dev_priv); INIT_WORK(&dev_priv->rps.work, gen6_pm_rps_work); + INIT_WORK(&dev_priv->l3_parity.error_work, ivybridge_parity_work); + for (i = 0; i < MAX_L3_SLICES; ++i) + dev_priv->l3_parity.remap_info[i] = NULL; if (HAS_GUC_SCHED(dev_priv)) dev_priv->pm_guc_events = GEN9_GUC_TO_HOST_INT_EVENT; @@ -4363,6 +4367,20 @@ void intel_irq_init(struct drm_i915_private *dev_priv) } /** + * intel_irq_fini - deinitializes IRQ support + * @i915: i915 device instance + * + * This function deinitializes all the IRQ support. + */ +void intel_irq_fini(struct drm_i915_private *i915) +{ + int i; + + for (i = 0; i < MAX_L3_SLICES; ++i) + kfree(i915->l3_parity.remap_info[i]); +} + +/** * intel_irq_install - enables the hardware interrupt * @dev_priv: i915 device instance * diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index a6ad1c2..1eef3fa 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -181,8 +181,8 @@ i915_l3_write(struct file *filp, struct kobject *kobj, struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); struct drm_device *dev = &dev_priv->drm; struct i915_gem_context *ctx; - u32 *temp = NULL; /* Just here to make handling failures easy */ int slice = (int)(uintptr_t)attr->private; + u32 **remap_info; int ret; ret = l3_access_valid(dev_priv, offset); @@ -193,11 +193,12 @@ i915_l3_write(struct file *filp, struct kobject *kobj, if (ret) return ret; - if (!dev_priv->l3_parity.remap_info[slice]) { - temp = kzalloc(GEN7_L3LOG_SIZE, GFP_KERNEL); - if (!temp) { - mutex_unlock(&dev->struct_mutex); - return -ENOMEM; + remap_info = &dev_priv->l3_parity.remap_info[slice]; + if (!*remap_info) { + *remap_info = kzalloc(GEN7_L3LOG_SIZE, GFP_KERNEL); + if (!*remap_info) { + ret = -ENOMEM; + goto out; } } @@ -205,18 +206,18 @@ i915_l3_write(struct file *filp, struct kobject *kobj, * aren't propagated. Since I cannot find a stable way to reset the GPU * at this point it is left as a TODO. */ - if (temp) - dev_priv->l3_parity.remap_info[slice] = temp; - - memcpy(dev_priv->l3_parity.remap_info[slice] + (offset/4), buf, count); + memcpy(*remap_info + (offset/4), buf, count); /* NB: We defer the remapping until we switch to the context */ list_for_each_entry(ctx, &dev_priv->context_list, link) ctx->remap_slice |= (1<struct_mutex); - return count; + return ret; } static struct bin_attribute dpf_attrs = { -- cgit v1.1 From 789a625158b0c0c2bc94a5dc404e7608d6100e5e Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 2 May 2017 10:32:42 +0000 Subject: drm/i915/guc: Enable send function only after successful init It is safer to setup valid send function after successful GuC hardware initialization. In addition we prepare placeholder where we can setup any alternate GuC communication mechanism. Signed-off-by: Michal Wajdeczko Cc: Joonas Lahtinen Cc: Daniele Ceraolo Spurio Reviewed-by: Daniele Ceraolo Spurio Link: http://patchwork.freedesktop.org/patch/msgid/20170502103243.54940-1-michal.wajdeczko@intel.com [ickle: Fixup ENODEV for an impossible error path] Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/intel_uc.c | 27 ++++++++++++++++++++++++++- drivers/gpu/drm/i915/intel_uc.h | 1 + 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c index 900e376..7fd75ca 100644 --- a/drivers/gpu/drm/i915/intel_uc.c +++ b/drivers/gpu/drm/i915/intel_uc.c @@ -99,7 +99,7 @@ void intel_uc_init_early(struct drm_i915_private *dev_priv) struct intel_guc *guc = &dev_priv->guc; mutex_init(&guc->send_mutex); - guc->send = intel_guc_send_mmio; + guc->send = intel_guc_send_nop; } static void fetch_uc_fw(struct drm_i915_private *dev_priv, @@ -252,13 +252,27 @@ void intel_uc_fini_fw(struct drm_i915_private *dev_priv) __intel_uc_fw_fini(&dev_priv->huc.fw); } +static int guc_enable_communication(struct intel_guc *guc) +{ + /* XXX: placeholder for alternate setup */ + guc->send = intel_guc_send_mmio; + return 0; +} + +static void guc_disable_communication(struct intel_guc *guc) +{ + guc->send = intel_guc_send_nop; +} + int intel_uc_init_hw(struct drm_i915_private *dev_priv) { + struct intel_guc *guc = &dev_priv->guc; int ret, attempts; if (!i915.enable_guc_loading) return 0; + guc_disable_communication(guc); gen9_reset_guc_interrupts(dev_priv); /* We need to notify the guc whenever we change the GGTT */ @@ -308,6 +322,10 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv) if (ret) goto err_submission; + ret = guc_enable_communication(guc); + if (ret) + goto err_submission; + intel_guc_auth_huc(dev_priv); if (i915.enable_guc_submission) { if (i915.guc_log_level >= 0) @@ -330,6 +348,7 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv) * marks the GPU as wedged until reset). */ err_interrupts: + guc_disable_communication(guc); gen9_disable_guc_interrupts(dev_priv); err_submission: if (i915.enable_guc_submission) @@ -364,6 +383,12 @@ void intel_uc_fini_hw(struct drm_i915_private *dev_priv) i915_ggtt_disable_guc(dev_priv); } +int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len) +{ + WARN(1, "Unexpected send: action=%#x\n", *action); + return -ENODEV; +} + /* * This function implements the MMIO based host to GuC interface. */ diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h index 2f0229d..1e0eecd 100644 --- a/drivers/gpu/drm/i915/intel_uc.h +++ b/drivers/gpu/drm/i915/intel_uc.h @@ -227,6 +227,7 @@ void intel_uc_fini_fw(struct drm_i915_private *dev_priv); int intel_uc_init_hw(struct drm_i915_private *dev_priv); void intel_uc_fini_hw(struct drm_i915_private *dev_priv); int intel_guc_sample_forcewake(struct intel_guc *guc); +int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len); int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len); static inline int intel_guc_send(struct intel_guc *guc, const u32 *action, u32 len) { -- cgit v1.1 From a338908c11528afb9331b33909096ead17811491 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Thu, 6 Apr 2017 18:39:42 +0300 Subject: drm/i915: Move the GTFIFODBG to the common mmio dbg framework Remove the per-mmio checking of the FIFO debug register into the common conditional mmio debug handling. Based on patch from Chris Wilson. v2: postpone warn on fifodbg for unclaimed reg debugs Signed-off-by: Mika Kuoppala Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/intel_uncore.c | 76 +++++++++++++++---------------------- 1 file changed, 30 insertions(+), 46 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 07a722f..ba7e9e8 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -172,22 +172,6 @@ static void fw_domains_get_with_thread_status(struct drm_i915_private *dev_priv, __gen6_gt_wait_for_thread_c0(dev_priv); } -static void gen6_gt_check_fifodbg(struct drm_i915_private *dev_priv) -{ - u32 gtfifodbg; - - gtfifodbg = __raw_i915_read32(dev_priv, GTFIFODBG); - if (WARN(gtfifodbg, "GT wake FIFO error 0x%x\n", gtfifodbg)) - __raw_i915_write32(dev_priv, GTFIFODBG, gtfifodbg); -} - -static void fw_domains_put_with_fifo(struct drm_i915_private *dev_priv, - enum forcewake_domains fw_domains) -{ - fw_domains_put(dev_priv, fw_domains); - gen6_gt_check_fifodbg(dev_priv); -} - static inline u32 fifo_free_entries(struct drm_i915_private *dev_priv) { u32 count = __raw_i915_read32(dev_priv, GTFIFOCTL); @@ -384,15 +368,35 @@ vlv_check_for_unclaimed_mmio(struct drm_i915_private *dev_priv) } static bool +gen6_check_for_fifo_debug(struct drm_i915_private *dev_priv) +{ + u32 fifodbg; + + fifodbg = __raw_i915_read32(dev_priv, GTFIFODBG); + + if (unlikely(fifodbg)) { + DRM_DEBUG_DRIVER("GTFIFODBG = 0x08%x\n", fifodbg); + __raw_i915_write32(dev_priv, GTFIFODBG, fifodbg); + } + + return fifodbg; +} + +static bool check_for_unclaimed_mmio(struct drm_i915_private *dev_priv) { + bool ret = false; + if (HAS_FPGA_DBG_UNCLAIMED(dev_priv)) - return fpga_check_for_unclaimed_mmio(dev_priv); + ret |= fpga_check_for_unclaimed_mmio(dev_priv); if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) - return vlv_check_for_unclaimed_mmio(dev_priv); + ret |= vlv_check_for_unclaimed_mmio(dev_priv); - return false; + if (IS_GEN6(dev_priv) || IS_GEN7(dev_priv)) + ret |= gen6_check_for_fifo_debug(dev_priv); + + return ret; } static void __intel_uncore_early_sanitize(struct drm_i915_private *dev_priv, @@ -404,11 +408,6 @@ static void __intel_uncore_early_sanitize(struct drm_i915_private *dev_priv, if (check_for_unclaimed_mmio(dev_priv)) DRM_DEBUG("unclaimed mmio detected on uncore init, clearing\n"); - /* clear out old GT FIFO errors */ - if (IS_GEN6(dev_priv) || IS_GEN7(dev_priv)) - __raw_i915_write32(dev_priv, GTFIFODBG, - __raw_i915_read32(dev_priv, GTFIFODBG)); - /* WaDisableShadowRegForCpd:chv */ if (IS_CHERRYVIEW(dev_priv)) { __raw_i915_write32(dev_priv, GTFIFOCTL, @@ -1047,15 +1046,10 @@ __gen2_write(32) #define __gen6_write(x) \ static void \ gen6_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, bool trace) { \ - u32 __fifo_ret = 0; \ GEN6_WRITE_HEADER; \ - if (NEEDS_FORCE_WAKE(offset)) { \ - __fifo_ret = __gen6_gt_wait_for_fifo(dev_priv); \ - } \ + if (NEEDS_FORCE_WAKE(offset)) \ + __gen6_gt_wait_for_fifo(dev_priv); \ __raw_i915_write##x(dev_priv, reg, val); \ - if (unlikely(__fifo_ret)) { \ - gen6_gt_check_fifodbg(dev_priv); \ - } \ GEN6_WRITE_FOOTER; \ } @@ -1190,11 +1184,7 @@ static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv) FORCEWAKE_MEDIA_GEN9, FORCEWAKE_ACK_MEDIA_GEN9); } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { dev_priv->uncore.funcs.force_wake_get = fw_domains_get; - if (!IS_CHERRYVIEW(dev_priv)) - dev_priv->uncore.funcs.force_wake_put = - fw_domains_put_with_fifo; - else - dev_priv->uncore.funcs.force_wake_put = fw_domains_put; + dev_priv->uncore.funcs.force_wake_put = fw_domains_put; fw_domain_init(dev_priv, FW_DOMAIN_ID_RENDER, FORCEWAKE_VLV, FORCEWAKE_ACK_VLV); fw_domain_init(dev_priv, FW_DOMAIN_ID_MEDIA, @@ -1202,11 +1192,7 @@ static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv) } else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) { dev_priv->uncore.funcs.force_wake_get = fw_domains_get_with_thread_status; - if (IS_HASWELL(dev_priv)) - dev_priv->uncore.funcs.force_wake_put = - fw_domains_put_with_fifo; - else - dev_priv->uncore.funcs.force_wake_put = fw_domains_put; + dev_priv->uncore.funcs.force_wake_put = fw_domains_put; fw_domain_init(dev_priv, FW_DOMAIN_ID_RENDER, FORCEWAKE_MT, FORCEWAKE_ACK_HSW); } else if (IS_IVYBRIDGE(dev_priv)) { @@ -1223,8 +1209,7 @@ static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv) */ dev_priv->uncore.funcs.force_wake_get = fw_domains_get_with_thread_status; - dev_priv->uncore.funcs.force_wake_put = - fw_domains_put_with_fifo; + dev_priv->uncore.funcs.force_wake_put = fw_domains_put; /* We need to init first for ECOBUS access and then * determine later if we want to reinit, in case of MT access is @@ -1242,7 +1227,7 @@ static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv) spin_lock_irq(&dev_priv->uncore.lock); fw_domains_get_with_thread_status(dev_priv, FORCEWAKE_RENDER); ecobus = __raw_i915_read32(dev_priv, ECOBUS); - fw_domains_put_with_fifo(dev_priv, FORCEWAKE_RENDER); + fw_domains_put(dev_priv, FORCEWAKE_RENDER); spin_unlock_irq(&dev_priv->uncore.lock); if (!(ecobus & FORCEWAKE_MT_ENABLE)) { @@ -1254,8 +1239,7 @@ static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv) } else if (IS_GEN6(dev_priv)) { dev_priv->uncore.funcs.force_wake_get = fw_domains_get_with_thread_status; - dev_priv->uncore.funcs.force_wake_put = - fw_domains_put_with_fifo; + dev_priv->uncore.funcs.force_wake_put = fw_domains_put; fw_domain_init(dev_priv, FW_DOMAIN_ID_RENDER, FORCEWAKE, FORCEWAKE_ACK); } -- cgit v1.1 From 6b07b6d2bd66a70dae0f9e63b88c11da2108cf06 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Tue, 2 May 2017 17:03:44 +0300 Subject: drm/i915: Use wait_for_atomic_us when waiting for gt fifo Replace the handcrafter loop when checking for fifo slots with atomic wait for. This brings this wait in line with the other waits on register access. We also get a readable timeout constraint, so make it to fail after 10ms. Chris suggested that we should fail silently as the fifo debug handler, now attached to unclaimed mmio handling, will take care of the possible errors at later stage. Note that the decision to wait was changed so that we avoid allocating the first reserved entry. Nor do we reduce the count if we fail the wait, removing the possiblity to wrap the count if the hw fifo returned zero. v2: remove unclaimed check on timeout (Chris) v3: use void return (Chris) References: https://bugs.freedesktop.org/show_bug.cgi?id=100247 Signed-off-by: Mika Kuoppala Reviewed-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/1491493182-31540-1-git-send-email-mika.kuoppala@intel.com --- drivers/gpu/drm/i915/intel_uncore.c | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index ba7e9e8..aa9d306 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -29,6 +29,7 @@ #include #define FORCEWAKE_ACK_TIMEOUT_MS 50 +#define GT_FIFO_TIMEOUT_MS 10 #define __raw_posting_read(dev_priv__, reg__) (void)__raw_i915_read32((dev_priv__), (reg__)) @@ -179,30 +180,27 @@ static inline u32 fifo_free_entries(struct drm_i915_private *dev_priv) return count & GT_FIFO_FREE_ENTRIES_MASK; } -static int __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv) +static void __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv) { - int ret = 0; + u32 n; /* On VLV, FIFO will be shared by both SW and HW. * So, we need to read the FREE_ENTRIES everytime */ if (IS_VALLEYVIEW(dev_priv)) - dev_priv->uncore.fifo_count = fifo_free_entries(dev_priv); - - if (dev_priv->uncore.fifo_count < GT_FIFO_NUM_RESERVED_ENTRIES) { - int loop = 500; - u32 fifo = fifo_free_entries(dev_priv); - - while (fifo <= GT_FIFO_NUM_RESERVED_ENTRIES && loop--) { - udelay(10); - fifo = fifo_free_entries(dev_priv); + n = fifo_free_entries(dev_priv); + else + n = dev_priv->uncore.fifo_count; + + if (n <= GT_FIFO_NUM_RESERVED_ENTRIES) { + if (wait_for_atomic((n = fifo_free_entries(dev_priv)) > + GT_FIFO_NUM_RESERVED_ENTRIES, + GT_FIFO_TIMEOUT_MS)) { + DRM_DEBUG("GT_FIFO timeout, entries: %u\n", n); + return; } - if (WARN_ON(loop < 0 && fifo <= GT_FIFO_NUM_RESERVED_ENTRIES)) - ++ret; - dev_priv->uncore.fifo_count = fifo; } - dev_priv->uncore.fifo_count--; - return ret; + dev_priv->uncore.fifo_count = n - 1; } static enum hrtimer_restart -- cgit v1.1 From 9431282832e145237196a19f2464531eb9790240 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 3 May 2017 10:39:18 +0100 Subject: drm/i915: Mark up clflushes as belonging to an unordered timeline 2 clflushes on two different objects are not ordered, and so do not belong to the same timeline (context). Either we use a unique context for each, or we reserve a special global context to mean unordered. Ideally, we would reserve 0 to mean unordered (DMA_FENCE_NO_CONTEXT) to have the same semantics everywhere. Signed-off-by: Chris Wilson Cc: Daniel Vetter Cc: Joonas Lahtinen Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170503093924.5320-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 2 ++ drivers/gpu/drm/i915/i915_gem.c | 2 +- drivers/gpu/drm/i915/i915_gem_clflush.c | 8 +------- drivers/gpu/drm/i915/i915_gem_clflush.h | 1 - 4 files changed, 4 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 4588b3e..271a04c 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1513,6 +1513,8 @@ struct i915_gem_mm { /** LRU list of objects with fence regs on them. */ struct list_head fence_list; + u64 unordered_timeline; + /* the indicator for dispatch video commands on two BSD rings */ atomic_t bsd_engine_dispatch_index; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index f5f6057..e91590d 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4745,7 +4745,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv) mutex_lock(&dev_priv->drm.struct_mutex); - i915_gem_clflush_init(dev_priv); + dev_priv->mm.unordered_timeline = dma_fence_context_alloc(1); if (!i915.enable_execlists) { dev_priv->gt.resume = intel_legacy_submission_resume; diff --git a/drivers/gpu/drm/i915/i915_gem_clflush.c b/drivers/gpu/drm/i915/i915_gem_clflush.c index ffd01e0..ffac7a1 100644 --- a/drivers/gpu/drm/i915/i915_gem_clflush.c +++ b/drivers/gpu/drm/i915/i915_gem_clflush.c @@ -27,7 +27,6 @@ #include "i915_gem_clflush.h" static DEFINE_SPINLOCK(clflush_lock); -static u64 clflush_context; struct clflush { struct dma_fence dma; /* Must be first for dma_fence_free() */ @@ -157,7 +156,7 @@ void i915_gem_clflush_object(struct drm_i915_gem_object *obj, dma_fence_init(&clflush->dma, &i915_clflush_ops, &clflush_lock, - clflush_context, + to_i915(obj->base.dev)->mm.unordered_timeline, 0); i915_sw_fence_init(&clflush->wait, i915_clflush_notify); @@ -182,8 +181,3 @@ void i915_gem_clflush_object(struct drm_i915_gem_object *obj, GEM_BUG_ON(obj->base.write_domain != I915_GEM_DOMAIN_CPU); } } - -void i915_gem_clflush_init(struct drm_i915_private *i915) -{ - clflush_context = dma_fence_context_alloc(1); -} diff --git a/drivers/gpu/drm/i915/i915_gem_clflush.h b/drivers/gpu/drm/i915/i915_gem_clflush.h index b62d61a..2455a78 100644 --- a/drivers/gpu/drm/i915/i915_gem_clflush.h +++ b/drivers/gpu/drm/i915/i915_gem_clflush.h @@ -28,7 +28,6 @@ struct drm_i915_private; struct drm_i915_gem_object; -void i915_gem_clflush_init(struct drm_i915_private *i915); void i915_gem_clflush_object(struct drm_i915_gem_object *obj, unsigned int flags); #define I915_CLFLUSH_FORCE BIT(0) -- cgit v1.1 From 29ef3fa987edb9768e19a6325030e1d2d58e29de Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 3 May 2017 10:39:19 +0100 Subject: drm/i915: Unwrap top level fence-array By first unwrapping an incoming fence-array into its child fences, we can simplify the internal branching, and so avoid triggering a potential bug in the next patch when not squashing the child fences on the same timeline. It will also have the advantage of keeping the (top-level) fence arrays out of any fence/timeline caching since these are unordered timelines but with a random context id. Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170503093924.5320-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_request.c | 41 +++++++++++++++------------------ 1 file changed, 18 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 6198f69..b68935d 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -743,22 +743,9 @@ int i915_gem_request_await_dma_fence(struct drm_i915_gem_request *req, struct dma_fence *fence) { - struct dma_fence_array *array; + struct dma_fence **child = &fence; + unsigned int nchild = 1; int ret; - int i; - - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) - return 0; - - if (dma_fence_is_i915(fence)) - return i915_gem_request_await_request(req, to_request(fence)); - - if (!dma_fence_is_array(fence)) { - ret = i915_sw_fence_await_dma_fence(&req->submit, - fence, I915_FENCE_TIMEOUT, - GFP_KERNEL); - return ret < 0 ? ret : 0; - } /* Note that if the fence-array was created in signal-on-any mode, * we should *not* decompose it into its individual fences. However, @@ -767,21 +754,29 @@ i915_gem_request_await_dma_fence(struct drm_i915_gem_request *req, * amdgpu and we should not see any incoming fence-array from * sync-file being in signal-on-any mode. */ + if (dma_fence_is_array(fence)) { + struct dma_fence_array *array = to_dma_fence_array(fence); + + child = array->fences; + nchild = array->num_fences; + GEM_BUG_ON(!nchild); + } - array = to_dma_fence_array(fence); - for (i = 0; i < array->num_fences; i++) { - struct dma_fence *child = array->fences[i]; + do { + fence = *child++; + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) + continue; - if (dma_fence_is_i915(child)) + if (dma_fence_is_i915(fence)) ret = i915_gem_request_await_request(req, - to_request(child)); + to_request(fence)); else - ret = i915_sw_fence_await_dma_fence(&req->submit, - child, I915_FENCE_TIMEOUT, + ret = i915_sw_fence_await_dma_fence(&req->submit, fence, + I915_FENCE_TIMEOUT, GFP_KERNEL); if (ret < 0) return ret; - } + } while (--nchild); return 0; } -- cgit v1.1 From ceae14bd4cc4333b9a3b0b6b9457bb16e7ca410a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 3 May 2017 10:39:20 +0100 Subject: drm/i915: Lift timeline ordering to await_dma_fence Currently we filter out repeated use of the same timeline in the low level i915_gem_request_await_request(), after having added the dependency on the old request. However, we can lift this to i915_gem_request_await_dma_fence() (before the dependency is added) using the observation that requests along the same timeline are explicitly ordered via i915_add_request (along with the dependencies). Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170503093924.5320-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_request.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index b68935d..022f558 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -687,6 +687,7 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to, int ret; GEM_BUG_ON(to == from); + GEM_BUG_ON(to->timeline == from->timeline); if (i915_gem_request_completed(from)) return 0; @@ -699,9 +700,6 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to, return ret; } - if (to->timeline == from->timeline) - return 0; - if (to->engine == from->engine) { ret = i915_sw_fence_await_sw_fence_gfp(&to->submit, &from->submit, @@ -767,6 +765,14 @@ i915_gem_request_await_dma_fence(struct drm_i915_gem_request *req, if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) continue; + /* + * Requests on the same timeline are explicitly ordered, along + * with their dependencies, by i915_add_request() which ensures + * that requests are submitted in-order through each ring. + */ + if (fence->context == req->fence.context) + continue; + if (dma_fence_is_i915(fence)) ret = i915_gem_request_await_request(req, to_request(fence)); -- cgit v1.1 From 4797948071f607c5b43753cb8f1b7ddcf22e146d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 3 May 2017 10:39:21 +0100 Subject: drm/i915: Squash repeated awaits on the same fence Track the latest fence waited upon on each context, and only add a new asynchronous wait if the new fence is more recent than the recorded fence for that context. This requires us to filter out unordered timelines, which are noted by DMA_FENCE_NO_CONTEXT. However, in the absence of a universal identifier, we have to use our own i915->mm.unordered_timeline token. v2: Throw around the debug crutches v3: Inline the likely case of the pre-allocation cache being full. v4: Drop the pre-allocation support, we can lose the most recent fence in case of allocation failure -- it just means we may emit more awaits than strictly necessary but will not break. v5: Trim allocation size for leaf nodes, they only need an array of u32 not pointers. v6: Create mock_timeline to tidy selftest writing v7: s/intel_timeline_sync_get/intel_timeline_sync_is_later/ (Tvrtko) v8: Prune the stale sync points when we idle. v9: Include a small benchmark in the kselftests v10: Separate the idr implementation into its own compartment. (Tvrkto) v11: Refactor igt_sync kselftests to avoid deep nesting (Tvrkto) v12: __sync_leaf_idx() to assert that p->height is 0 when checking leaves v13: kselftests to investigate struct i915_syncmap itself (Tvrtko) v14: Foray into ascii art graphs v15: Take into account that the random lookup/insert does 2 prng calls, not 1, when benchmarking, and use for_each_set_bit() (Tvrtko) v16: Improved ascii art Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Joonas Lahtinen Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170503093924.5320-4-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/i915_gem.c | 1 + drivers/gpu/drm/i915/i915_gem.h | 2 + drivers/gpu/drm/i915/i915_gem_request.c | 9 + drivers/gpu/drm/i915/i915_gem_timeline.c | 95 +++- drivers/gpu/drm/i915/i915_gem_timeline.h | 38 ++ drivers/gpu/drm/i915/i915_syncmap.c | 412 ++++++++++++++ drivers/gpu/drm/i915/i915_syncmap.h | 38 ++ drivers/gpu/drm/i915/selftests/i915_gem_timeline.c | 301 ++++++++++ .../gpu/drm/i915/selftests/i915_mock_selftests.h | 2 + drivers/gpu/drm/i915/selftests/i915_random.c | 11 + drivers/gpu/drm/i915/selftests/i915_random.h | 2 + drivers/gpu/drm/i915/selftests/i915_syncmap.c | 616 +++++++++++++++++++++ drivers/gpu/drm/i915/selftests/mock_timeline.c | 45 ++ drivers/gpu/drm/i915/selftests/mock_timeline.h | 33 ++ 15 files changed, 1588 insertions(+), 18 deletions(-) create mode 100644 drivers/gpu/drm/i915/i915_syncmap.c create mode 100644 drivers/gpu/drm/i915/i915_syncmap.h create mode 100644 drivers/gpu/drm/i915/selftests/i915_gem_timeline.c create mode 100644 drivers/gpu/drm/i915/selftests/i915_syncmap.c create mode 100644 drivers/gpu/drm/i915/selftests/mock_timeline.c create mode 100644 drivers/gpu/drm/i915/selftests/mock_timeline.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 2cf0450..7b05fb8 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -16,6 +16,7 @@ i915-y := i915_drv.o \ i915_params.o \ i915_pci.o \ i915_suspend.o \ + i915_syncmap.o \ i915_sw_fence.o \ i915_sysfs.o \ intel_csr.o \ diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index e91590d..f9c6b9b 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3196,6 +3196,7 @@ i915_gem_idle_work_handler(struct work_struct *work) intel_engine_disarm_breadcrumbs(engine); i915_gem_batch_pool_fini(&engine->batch_pool); } + i915_gem_timelines_mark_idle(dev_priv); GEM_BUG_ON(!dev_priv->gt.awake); dev_priv->gt.awake = false; diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h index 5a49487..ee54597 100644 --- a/drivers/gpu/drm/i915/i915_gem.h +++ b/drivers/gpu/drm/i915/i915_gem.h @@ -25,6 +25,8 @@ #ifndef __I915_GEM_H__ #define __I915_GEM_H__ +#include + #ifdef CONFIG_DRM_I915_DEBUG_GEM #define GEM_BUG_ON(expr) BUG_ON(expr) #define GEM_WARN_ON(expr) WARN_ON(expr) diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 022f558..637b8cd 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -773,6 +773,11 @@ i915_gem_request_await_dma_fence(struct drm_i915_gem_request *req, if (fence->context == req->fence.context) continue; + /* Squash repeated waits to the same timelines */ + if (fence->context != req->i915->mm.unordered_timeline && + intel_timeline_sync_is_later(req->timeline, fence)) + continue; + if (dma_fence_is_i915(fence)) ret = i915_gem_request_await_request(req, to_request(fence)); @@ -782,6 +787,10 @@ i915_gem_request_await_dma_fence(struct drm_i915_gem_request *req, GFP_KERNEL); if (ret < 0) return ret; + + /* Record the latest fence used against each timeline */ + if (fence->context != req->i915->mm.unordered_timeline) + intel_timeline_sync_set(req->timeline, fence); } while (--nchild); return 0; diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.c b/drivers/gpu/drm/i915/i915_gem_timeline.c index b596ca7..c597ce2 100644 --- a/drivers/gpu/drm/i915/i915_gem_timeline.c +++ b/drivers/gpu/drm/i915/i915_gem_timeline.c @@ -23,6 +23,32 @@ */ #include "i915_drv.h" +#include "i915_syncmap.h" + +static void __intel_timeline_init(struct intel_timeline *tl, + struct i915_gem_timeline *parent, + u64 context, + struct lock_class_key *lockclass, + const char *lockname) +{ + tl->fence_context = context; + tl->common = parent; +#ifdef CONFIG_DEBUG_SPINLOCK + __raw_spin_lock_init(&tl->lock.rlock, lockname, lockclass); +#else + spin_lock_init(&tl->lock); +#endif + init_request_active(&tl->last_request, NULL); + INIT_LIST_HEAD(&tl->requests); + i915_syncmap_init(&tl->sync); +} + +static void __intel_timeline_fini(struct intel_timeline *tl) +{ + GEM_BUG_ON(!list_empty(&tl->requests)); + + i915_syncmap_free(&tl->sync); +} static int __i915_gem_timeline_init(struct drm_i915_private *i915, struct i915_gem_timeline *timeline, @@ -35,6 +61,14 @@ static int __i915_gem_timeline_init(struct drm_i915_private *i915, lockdep_assert_held(&i915->drm.struct_mutex); + /* + * Ideally we want a set of engines on a single leaf as we expect + * to mostly be tracking synchronisation between engines. It is not + * a huge issue if this is not the case, but we may want to mitigate + * any page crossing penalties if they become an issue. + */ + BUILD_BUG_ON(KSYNCMAP < I915_NUM_ENGINES); + timeline->i915 = i915; timeline->name = kstrdup(name ?: "[kernel]", GFP_KERNEL); if (!timeline->name) @@ -44,19 +78,10 @@ static int __i915_gem_timeline_init(struct drm_i915_private *i915, /* Called during early_init before we know how many engines there are */ fences = dma_fence_context_alloc(ARRAY_SIZE(timeline->engine)); - for (i = 0; i < ARRAY_SIZE(timeline->engine); i++) { - struct intel_timeline *tl = &timeline->engine[i]; - - tl->fence_context = fences++; - tl->common = timeline; -#ifdef CONFIG_DEBUG_SPINLOCK - __raw_spin_lock_init(&tl->lock.rlock, lockname, lockclass); -#else - spin_lock_init(&tl->lock); -#endif - init_request_active(&tl->last_request, NULL); - INIT_LIST_HEAD(&tl->requests); - } + for (i = 0; i < ARRAY_SIZE(timeline->engine); i++) + __intel_timeline_init(&timeline->engine[i], + timeline, fences++, + lockclass, lockname); return 0; } @@ -81,18 +106,52 @@ int i915_gem_timeline_init__global(struct drm_i915_private *i915) &class, "&global_timeline->lock"); } +/** + * i915_gem_timelines_mark_idle -- called when the driver idles + * @i915 - the drm_i915_private device + * + * When the driver is completely idle, we know that all of our sync points + * have been signaled and our tracking is then entirely redundant. Any request + * to wait upon an older sync point will be completed instantly as we know + * the fence is signaled and therefore we will not even look them up in the + * sync point map. + */ +void i915_gem_timelines_mark_idle(struct drm_i915_private *i915) +{ + struct i915_gem_timeline *timeline; + int i; + + lockdep_assert_held(&i915->drm.struct_mutex); + + list_for_each_entry(timeline, &i915->gt.timelines, link) { + for (i = 0; i < ARRAY_SIZE(timeline->engine); i++) { + struct intel_timeline *tl = &timeline->engine[i]; + + /* + * All known fences are completed so we can scrap + * the current sync point tracking and start afresh, + * any attempt to wait upon a previous sync point + * will be skipped as the fence was signaled. + */ + i915_syncmap_free(&tl->sync); + } + } +} + void i915_gem_timeline_fini(struct i915_gem_timeline *timeline) { int i; lockdep_assert_held(&timeline->i915->drm.struct_mutex); - for (i = 0; i < ARRAY_SIZE(timeline->engine); i++) { - struct intel_timeline *tl = &timeline->engine[i]; - - GEM_BUG_ON(!list_empty(&tl->requests)); - } + for (i = 0; i < ARRAY_SIZE(timeline->engine); i++) + __intel_timeline_fini(&timeline->engine[i]); list_del(&timeline->link); kfree(timeline->name); } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/mock_timeline.c" +#include "selftests/i915_gem_timeline.c" +#endif diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.h b/drivers/gpu/drm/i915/i915_gem_timeline.h index 6c53e14..ff65c64 100644 --- a/drivers/gpu/drm/i915/i915_gem_timeline.h +++ b/drivers/gpu/drm/i915/i915_gem_timeline.h @@ -27,7 +27,9 @@ #include +#include "i915_utils.h" #include "i915_gem_request.h" +#include "i915_syncmap.h" struct i915_gem_timeline; @@ -55,6 +57,17 @@ struct intel_timeline { * struct_mutex. */ struct i915_gem_active last_request; + + /** + * We track the most recent seqno that we wait on in every context so + * that we only have to emit a new await and dependency on a more + * recent sync point. As the contexts may be executed out-of-order, we + * have to track each individually and can not rely on an absolute + * global_seqno. When we know that all tracked fences are completed + * (i.e. when the driver is idle), we know that the syncmap is + * redundant and we can discard it without loss of generality. + */ + struct i915_syncmap *sync; u32 sync_seqno[I915_NUM_ENGINES]; struct i915_gem_timeline *common; @@ -73,6 +86,31 @@ int i915_gem_timeline_init(struct drm_i915_private *i915, struct i915_gem_timeline *tl, const char *name); int i915_gem_timeline_init__global(struct drm_i915_private *i915); +void i915_gem_timelines_mark_idle(struct drm_i915_private *i915); void i915_gem_timeline_fini(struct i915_gem_timeline *tl); +static inline int __intel_timeline_sync_set(struct intel_timeline *tl, + u64 context, u32 seqno) +{ + return i915_syncmap_set(&tl->sync, context, seqno); +} + +static inline int intel_timeline_sync_set(struct intel_timeline *tl, + const struct dma_fence *fence) +{ + return __intel_timeline_sync_set(tl, fence->context, fence->seqno); +} + +static inline bool __intel_timeline_sync_is_later(struct intel_timeline *tl, + u64 context, u32 seqno) +{ + return i915_syncmap_is_later(&tl->sync, context, seqno); +} + +static inline bool intel_timeline_sync_is_later(struct intel_timeline *tl, + const struct dma_fence *fence) +{ + return __intel_timeline_sync_is_later(tl, fence->context, fence->seqno); +} + #endif diff --git a/drivers/gpu/drm/i915/i915_syncmap.c b/drivers/gpu/drm/i915/i915_syncmap.c new file mode 100644 index 0000000..0087acf --- /dev/null +++ b/drivers/gpu/drm/i915/i915_syncmap.c @@ -0,0 +1,412 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include + +#include "i915_syncmap.h" + +#include "i915_gem.h" /* GEM_BUG_ON() */ +#include "i915_selftest.h" + +#define SHIFT ilog2(KSYNCMAP) +#define MASK (KSYNCMAP - 1) + +/* + * struct i915_syncmap is a layer of a radixtree that maps a u64 fence + * context id to the last u32 fence seqno waited upon from that context. + * Unlike lib/radixtree it uses a parent pointer that allows traversal back to + * the root. This allows us to access the whole tree via a single pointer + * to the most recently used layer. We expect fence contexts to be dense + * and most reuse to be on the same i915_gem_context but on neighbouring + * engines (i.e. on adjacent contexts) and reuse the same leaf, a very + * effective lookup cache. If the new lookup is not on the same leaf, we + * expect it to be on the neighbouring branch. + * + * A leaf holds an array of u32 seqno, and has height 0. The bitmap field + * allows us to store whether a particular seqno is valid (i.e. allows us + * to distinguish unset from 0). + * + * A branch holds an array of layer pointers, and has height > 0, and always + * has at least 2 layers (either branches or leaves) below it. + * + * For example, + * for x in + * 0 1 2 0x10 0x11 0x200 0x201 + * 0x500000 0x500001 0x503000 0x503001 + * 0xE<<60: + * i915_syncmap_set(&sync, x, lower_32_bits(x)); + * will build a tree like: + * 0xXXXXXXXXXXXXXXXX + * 0-> 0x0000000000XXXXXX + * | 0-> 0x0000000000000XXX + * | | 0-> 0x00000000000000XX + * | | | 0-> 0x000000000000000X 0:0, 1:1, 2:2 + * | | | 1-> 0x000000000000001X 0:10, 1:11 + * | | 2-> 0x000000000000020X 0:200, 1:201 + * | 5-> 0x000000000050XXXX + * | 0-> 0x000000000050000X 0:500000, 1:500001 + * | 3-> 0x000000000050300X 0:503000, 1:503001 + * e-> 0xe00000000000000X e:e + */ + +struct i915_syncmap { + u64 prefix; + unsigned int height; + unsigned int bitmap; + struct i915_syncmap *parent; + /* + * Following this header is an array of either seqno or child pointers: + * union { + * u32 seqno[KSYNCMAP]; + * struct i915_syncmap *child[KSYNCMAP]; + * }; + */ +}; + +/** + * i915_syncmap_init -- initialise the #i915_syncmap + * @root - pointer to the #i915_syncmap + */ +void i915_syncmap_init(struct i915_syncmap **root) +{ + BUILD_BUG_ON_NOT_POWER_OF_2(KSYNCMAP); + BUILD_BUG_ON_NOT_POWER_OF_2(SHIFT); + BUILD_BUG_ON(KSYNCMAP > BITS_PER_BYTE * sizeof((*root)->bitmap)); + *root = NULL; +} + +static inline u32 *__sync_seqno(struct i915_syncmap *p) +{ + GEM_BUG_ON(p->height); + return (u32 *)(p + 1); +} + +static inline struct i915_syncmap **__sync_child(struct i915_syncmap *p) +{ + GEM_BUG_ON(!p->height); + return (struct i915_syncmap **)(p + 1); +} + +static inline unsigned int +__sync_branch_idx(const struct i915_syncmap *p, u64 id) +{ + return (id >> p->height) & MASK; +} + +static inline unsigned int +__sync_leaf_idx(const struct i915_syncmap *p, u64 id) +{ + GEM_BUG_ON(p->height); + return id & MASK; +} + +static inline u64 __sync_branch_prefix(const struct i915_syncmap *p, u64 id) +{ + return id >> p->height >> SHIFT; +} + +static inline u64 __sync_leaf_prefix(const struct i915_syncmap *p, u64 id) +{ + GEM_BUG_ON(p->height); + return id >> SHIFT; +} + +static inline bool seqno_later(u32 a, u32 b) +{ + return (s32)(a - b) >= 0; +} + +/** + * i915_syncmap_is_later -- compare against the last know sync point + * @root - pointer to the #i915_syncmap + * @id - the context id (other timeline) we are synchronising to + * @seqno - the sequence number along the other timeline + * + * If we have already synchronised this @root timeline with another (@id) then + * we can omit any repeated or earlier synchronisation requests. If the two + * timelines are already coupled, we can also omit the dependency between the + * two as that is already known via the timeline. + * + * Returns true if the two timelines are already synchronised wrt to @seqno, + * false if not and the synchronisation must be emitted. + */ +bool i915_syncmap_is_later(struct i915_syncmap **root, u64 id, u32 seqno) +{ + struct i915_syncmap *p; + unsigned int idx; + + p = *root; + if (!p) + return false; + + if (likely(__sync_leaf_prefix(p, id) == p->prefix)) + goto found; + + /* First climb the tree back to a parent branch */ + do { + p = p->parent; + if (!p) + return false; + + if (__sync_branch_prefix(p, id) == p->prefix) + break; + } while (1); + + /* And then descend again until we find our leaf */ + do { + if (!p->height) + break; + + p = __sync_child(p)[__sync_branch_idx(p, id)]; + if (!p) + return false; + + if (__sync_branch_prefix(p, id) != p->prefix) + return false; + } while (1); + + *root = p; +found: + idx = __sync_leaf_idx(p, id); + if (!(p->bitmap & BIT(idx))) + return false; + + return seqno_later(__sync_seqno(p)[idx], seqno); +} + +static struct i915_syncmap * +__sync_alloc_leaf(struct i915_syncmap *parent, u64 id) +{ + struct i915_syncmap *p; + + p = kmalloc(sizeof(*p) + KSYNCMAP * sizeof(u32), GFP_KERNEL); + if (unlikely(!p)) + return NULL; + + p->parent = parent; + p->height = 0; + p->bitmap = 0; + p->prefix = __sync_leaf_prefix(p, id); + return p; +} + +static inline void __sync_set_seqno(struct i915_syncmap *p, u64 id, u32 seqno) +{ + unsigned int idx = __sync_leaf_idx(p, id); + + p->bitmap |= BIT(idx); + __sync_seqno(p)[idx] = seqno; +} + +static inline void __sync_set_child(struct i915_syncmap *p, + unsigned int idx, + struct i915_syncmap *child) +{ + p->bitmap |= BIT(idx); + __sync_child(p)[idx] = child; +} + +static noinline int __sync_set(struct i915_syncmap **root, u64 id, u32 seqno) +{ + struct i915_syncmap *p = *root; + unsigned int idx; + + if (!p) { + p = __sync_alloc_leaf(NULL, id); + if (unlikely(!p)) + return -ENOMEM; + + goto found; + } + + /* Caller handled the likely cached case */ + GEM_BUG_ON(__sync_leaf_prefix(p, id) == p->prefix); + + /* Climb back up the tree until we find a common prefix */ + do { + if (!p->parent) + break; + + p = p->parent; + + if (__sync_branch_prefix(p, id) == p->prefix) + break; + } while (1); + + /* + * No shortcut, we have to descend the tree to find the right layer + * containing this fence. + * + * Each layer in the tree holds 16 (KSYNCMAP) pointers, either fences + * or lower layers. Leaf nodes (height = 0) contain the fences, all + * other nodes (height > 0) are internal layers that point to a lower + * node. Each internal layer has at least 2 descendents. + * + * Starting at the top, we check whether the current prefix matches. If + * it doesn't, we have gone past our target and need to insert a join + * into the tree, and a new leaf node for the target as a descendent + * of the join, as well as the original layer. + * + * The matching prefix means we are still following the right branch + * of the tree. If it has height 0, we have found our leaf and just + * need to replace the fence slot with ourselves. If the height is + * not zero, our slot contains the next layer in the tree (unless + * it is empty, in which case we can add ourselves as a new leaf). + * As descend the tree the prefix grows (and height decreases). + */ + do { + struct i915_syncmap *next; + + if (__sync_branch_prefix(p, id) != p->prefix) { + unsigned int above; + + /* Insert a join above the current layer */ + next = kzalloc(sizeof(*next) + KSYNCMAP * sizeof(next), + GFP_KERNEL); + if (unlikely(!next)) + return -ENOMEM; + + /* Compute the height at which these two diverge */ + above = fls64(__sync_branch_prefix(p, id) ^ p->prefix); + above = round_up(above, SHIFT); + next->height = above + p->height; + next->prefix = __sync_branch_prefix(next, id); + + /* Insert the join into the parent */ + if (p->parent) { + idx = __sync_branch_idx(p->parent, id); + __sync_child(p->parent)[idx] = next; + GEM_BUG_ON(!(p->parent->bitmap & BIT(idx))); + } + next->parent = p->parent; + + /* Compute the idx of the other branch, not our id! */ + idx = p->prefix >> (above - SHIFT) & MASK; + __sync_set_child(next, idx, p); + p->parent = next; + + /* Ascend to the join */ + p = next; + } else { + if (!p->height) + break; + } + + /* Descend into the next layer */ + GEM_BUG_ON(!p->height); + idx = __sync_branch_idx(p, id); + next = __sync_child(p)[idx]; + if (!next) { + next = __sync_alloc_leaf(p, id); + if (unlikely(!next)) + return -ENOMEM; + + __sync_set_child(p, idx, next); + p = next; + break; + } + + p = next; + } while (1); + +found: + GEM_BUG_ON(p->prefix != __sync_leaf_prefix(p, id)); + __sync_set_seqno(p, id, seqno); + *root = p; + return 0; +} + +/** + * i915_syncmap_set -- mark the most recent syncpoint between contexts + * @root - pointer to the #i915_syncmap + * @id - the context id (other timeline) we have synchronised to + * @seqno - the sequence number along the other timeline + * + * When we synchronise this @root timeline with another (@id), we also know + * that we have synchronized with all previous seqno along that timeline. If + * we then have a request to synchronise with the same seqno or older, we can + * omit it, see i915_syncmap_is_later() + * + * Returns 0 on success, or a negative error code. + */ +int i915_syncmap_set(struct i915_syncmap **root, u64 id, u32 seqno) +{ + struct i915_syncmap *p = *root; + + /* + * We expect to be called in sequence following is_later(id), which + * should have preloaded the root for us. + */ + if (likely(p && __sync_leaf_prefix(p, id) == p->prefix)) { + __sync_set_seqno(p, id, seqno); + return 0; + } + + return __sync_set(root, id, seqno); +} + +static void __sync_free(struct i915_syncmap *p) +{ + if (p->height) { + unsigned int i; + + while ((i = ffs(p->bitmap))) { + p->bitmap &= ~0u << i; + __sync_free(__sync_child(p)[i - 1]); + } + } + + kfree(p); +} + +/** + * i915_syncmap_free -- free all memory associated with the syncmap + * @root - pointer to the #i915_syncmap + * + * Either when the timeline is to be freed and we no longer need the sync + * point tracking, or when the fences are all known to be signaled and the + * sync point tracking is redundant, we can free the #i915_syncmap to recover + * its allocations. + * + * Will reinitialise the @root pointer so that the #i915_syncmap is ready for + * reuse. + */ +void i915_syncmap_free(struct i915_syncmap **root) +{ + struct i915_syncmap *p; + + p = *root; + if (!p) + return; + + while (p->parent) + p = p->parent; + + __sync_free(p); + *root = NULL; +} + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/i915_syncmap.c" +#endif diff --git a/drivers/gpu/drm/i915/i915_syncmap.h b/drivers/gpu/drm/i915/i915_syncmap.h new file mode 100644 index 0000000..0653f70 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_syncmap.h @@ -0,0 +1,38 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __I915_SYNCMAP_H__ +#define __I915_SYNCMAP_H__ + +#include + +struct i915_syncmap; +#define KSYNCMAP 16 /* radix of the tree, how many slots in each layer */ + +void i915_syncmap_init(struct i915_syncmap **root); +int i915_syncmap_set(struct i915_syncmap **root, u64 id, u32 seqno); +bool i915_syncmap_is_later(struct i915_syncmap **root, u64 id, u32 seqno); +void i915_syncmap_free(struct i915_syncmap **root); + +#endif /* __I915_SYNCMAP_H__ */ diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_timeline.c b/drivers/gpu/drm/i915/selftests/i915_gem_timeline.c new file mode 100644 index 0000000..6df00cc --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_gem_timeline.c @@ -0,0 +1,301 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "../i915_selftest.h" +#include "i915_random.h" + +#include "mock_gem_device.h" +#include "mock_timeline.h" + +struct __igt_sync { + const char *name; + u32 seqno; + bool expected; + bool set; +}; + +static int __igt_sync(struct intel_timeline *tl, + u64 ctx, + const struct __igt_sync *p, + const char *name) +{ + int ret; + + if (__intel_timeline_sync_is_later(tl, ctx, p->seqno) != p->expected) { + pr_err("%s: %s(ctx=%llu, seqno=%u) expected passed %s but failed\n", + name, p->name, ctx, p->seqno, yesno(p->expected)); + return -EINVAL; + } + + if (p->set) { + ret = __intel_timeline_sync_set(tl, ctx, p->seqno); + if (ret) + return ret; + } + + return 0; +} + +static int igt_sync(void *arg) +{ + const struct __igt_sync pass[] = { + { "unset", 0, false, false }, + { "new", 0, false, true }, + { "0a", 0, true, true }, + { "1a", 1, false, true }, + { "1b", 1, true, true }, + { "0b", 0, true, false }, + { "2a", 2, false, true }, + { "4", 4, false, true }, + { "INT_MAX", INT_MAX, false, true }, + { "INT_MAX-1", INT_MAX-1, true, false }, + { "INT_MAX+1", (u32)INT_MAX+1, false, true }, + { "INT_MAX", INT_MAX, true, false }, + { "UINT_MAX", UINT_MAX, false, true }, + { "wrap", 0, false, true }, + { "unwrap", UINT_MAX, true, false }, + {}, + }, *p; + struct intel_timeline *tl; + int order, offset; + int ret; + + tl = mock_timeline(0); + if (!tl) + return -ENOMEM; + + for (p = pass; p->name; p++) { + for (order = 1; order < 64; order++) { + for (offset = -1; offset <= (order > 1); offset++) { + u64 ctx = BIT_ULL(order) + offset; + + ret = __igt_sync(tl, ctx, p, "1"); + if (ret) + goto out; + } + } + } + mock_timeline_destroy(tl); + + tl = mock_timeline(0); + if (!tl) + return -ENOMEM; + + for (order = 1; order < 64; order++) { + for (offset = -1; offset <= (order > 1); offset++) { + u64 ctx = BIT_ULL(order) + offset; + + for (p = pass; p->name; p++) { + ret = __igt_sync(tl, ctx, p, "2"); + if (ret) + goto out; + } + } + } + +out: + mock_timeline_destroy(tl); + return ret; +} + +static unsigned int random_engine(struct rnd_state *rnd) +{ + return ((u64)prandom_u32_state(rnd) * I915_NUM_ENGINES) >> 32; +} + +static int bench_sync(void *arg) +{ +#define M (1 << 20) + struct rnd_state prng; + struct intel_timeline *tl; + unsigned long end_time, count; + u64 prng32_1M; + ktime_t kt; + int order, last_order; + + tl = mock_timeline(0); + if (!tl) + return -ENOMEM; + + /* Lookups from cache are very fast and so the random number generation + * and the loop itself becomes a significant factor in the per-iteration + * timings. We try to compensate the results by measuring the overhead + * of the prng and subtract it from the reported results. + */ + prandom_seed_state(&prng, i915_selftest.random_seed); + count = 0; + kt = ktime_get(); + end_time = jiffies + HZ/10; + do { + u32 x; + + /* Make sure the compiler doesn't optimise away the prng call */ + WRITE_ONCE(x, prandom_u32_state(&prng)); + + count++; + } while (!time_after(jiffies, end_time)); + kt = ktime_sub(ktime_get(), kt); + pr_debug("%s: %lu random evaluations, %lluns/prng\n", + __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); + prng32_1M = ktime_to_ns(kt) * M / count; + + /* Benchmark (only) setting random context ids */ + prandom_seed_state(&prng, i915_selftest.random_seed); + count = 0; + kt = ktime_get(); + end_time = jiffies + HZ/10; + do { + u64 id = i915_prandom_u64_state(&prng); + + __intel_timeline_sync_set(tl, id, 0); + count++; + } while (!time_after(jiffies, end_time)); + kt = ktime_sub(ktime_get(), kt); + kt = ktime_sub_ns(kt, count * prng32_1M * 2 / M); + pr_info("%s: %lu random insertions, %lluns/insert\n", + __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); + + /* Benchmark looking up the exact same context ids as we just set */ + prandom_seed_state(&prng, i915_selftest.random_seed); + end_time = count; + kt = ktime_get(); + while (end_time--) { + u64 id = i915_prandom_u64_state(&prng); + + if (!__intel_timeline_sync_is_later(tl, id, 0)) { + mock_timeline_destroy(tl); + pr_err("Lookup of %llu failed\n", id); + return -EINVAL; + } + } + kt = ktime_sub(ktime_get(), kt); + kt = ktime_sub_ns(kt, count * prng32_1M * 2 / M); + pr_info("%s: %lu random lookups, %lluns/lookup\n", + __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); + + mock_timeline_destroy(tl); + cond_resched(); + + tl = mock_timeline(0); + if (!tl) + return -ENOMEM; + + /* Benchmark setting the first N (in order) contexts */ + count = 0; + kt = ktime_get(); + end_time = jiffies + HZ/10; + do { + __intel_timeline_sync_set(tl, count++, 0); + } while (!time_after(jiffies, end_time)); + kt = ktime_sub(ktime_get(), kt); + pr_info("%s: %lu in-order insertions, %lluns/insert\n", + __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); + + /* Benchmark looking up the exact same context ids as we just set */ + end_time = count; + kt = ktime_get(); + while (end_time--) { + if (!__intel_timeline_sync_is_later(tl, end_time, 0)) { + pr_err("Lookup of %lu failed\n", end_time); + mock_timeline_destroy(tl); + return -EINVAL; + } + } + kt = ktime_sub(ktime_get(), kt); + pr_info("%s: %lu in-order lookups, %lluns/lookup\n", + __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); + + mock_timeline_destroy(tl); + cond_resched(); + + tl = mock_timeline(0); + if (!tl) + return -ENOMEM; + + /* Benchmark searching for a random context id and maybe changing it */ + prandom_seed_state(&prng, i915_selftest.random_seed); + count = 0; + kt = ktime_get(); + end_time = jiffies + HZ/10; + do { + u32 id = random_engine(&prng); + u32 seqno = prandom_u32_state(&prng); + + if (!__intel_timeline_sync_is_later(tl, id, seqno)) + __intel_timeline_sync_set(tl, id, seqno); + + count++; + } while (!time_after(jiffies, end_time)); + kt = ktime_sub(ktime_get(), kt); + kt = ktime_sub_ns(kt, count * prng32_1M * 2 / M); + pr_info("%s: %lu repeated insert/lookups, %lluns/op\n", + __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); + mock_timeline_destroy(tl); + cond_resched(); + + /* Benchmark searching for a known context id and changing the seqno */ + for (last_order = 1, order = 1; order < 32; + ({ int tmp = last_order; last_order = order; order += tmp; })) { + unsigned int mask = BIT(order) - 1; + + tl = mock_timeline(0); + if (!tl) + return -ENOMEM; + + count = 0; + kt = ktime_get(); + end_time = jiffies + HZ/10; + do { + /* Without assuming too many details of the underlying + * implementation, try to identify its phase-changes + * (if any)! + */ + u64 id = (u64)(count & mask) << order; + + __intel_timeline_sync_is_later(tl, id, 0); + __intel_timeline_sync_set(tl, id, 0); + + count++; + } while (!time_after(jiffies, end_time)); + kt = ktime_sub(ktime_get(), kt); + pr_info("%s: %lu cyclic/%d insert/lookups, %lluns/op\n", + __func__, count, order, + (long long)div64_ul(ktime_to_ns(kt), count)); + mock_timeline_destroy(tl); + cond_resched(); + } + + return 0; +#undef M +} + +int i915_gem_timeline_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_sync), + SUBTEST(bench_sync), + }; + + return i915_subtests(tests, NULL); +} diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h index be9a9eb..76c1f14 100644 --- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h @@ -10,8 +10,10 @@ */ selftest(sanitycheck, i915_mock_sanitycheck) /* keep first (igt selfcheck) */ selftest(scatterlist, scatterlist_mock_selftests) +selftest(syncmap, i915_syncmap_mock_selftests) selftest(uncore, intel_uncore_mock_selftests) selftest(breadcrumbs, intel_breadcrumbs_mock_selftests) +selftest(timelines, i915_gem_timeline_mock_selftests) selftest(requests, i915_gem_request_mock_selftests) selftest(objects, i915_gem_object_mock_selftests) selftest(dmabuf, i915_gem_dmabuf_mock_selftests) diff --git a/drivers/gpu/drm/i915/selftests/i915_random.c b/drivers/gpu/drm/i915/selftests/i915_random.c index c17c83c..d044bf9 100644 --- a/drivers/gpu/drm/i915/selftests/i915_random.c +++ b/drivers/gpu/drm/i915/selftests/i915_random.c @@ -30,6 +30,17 @@ #include "i915_random.h" +u64 i915_prandom_u64_state(struct rnd_state *rnd) +{ + u64 x; + + x = prandom_u32_state(rnd); + x <<= 32; + x |= prandom_u32_state(rnd); + + return x; +} + static inline u32 i915_prandom_u32_max_state(u32 ep_ro, struct rnd_state *state) { return upper_32_bits((u64)prandom_u32_state(state) * ep_ro); diff --git a/drivers/gpu/drm/i915/selftests/i915_random.h b/drivers/gpu/drm/i915/selftests/i915_random.h index b9c334c..6c93798 100644 --- a/drivers/gpu/drm/i915/selftests/i915_random.h +++ b/drivers/gpu/drm/i915/selftests/i915_random.h @@ -41,6 +41,8 @@ #define I915_RND_SUBSTATE(name__, parent__) \ struct rnd_state name__ = I915_RND_STATE_INITIALIZER(prandom_u32_state(&(parent__))) +u64 i915_prandom_u64_state(struct rnd_state *rnd); + unsigned int *i915_random_order(unsigned int count, struct rnd_state *state); void i915_random_reorder(unsigned int *order, diff --git a/drivers/gpu/drm/i915/selftests/i915_syncmap.c b/drivers/gpu/drm/i915/selftests/i915_syncmap.c new file mode 100644 index 0000000..bcab3d0 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_syncmap.c @@ -0,0 +1,616 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "../i915_selftest.h" +#include "i915_random.h" + +static char * +__sync_print(struct i915_syncmap *p, + char *buf, unsigned long *sz, + unsigned int depth, + unsigned int last, + unsigned int idx) +{ + unsigned long len; + unsigned int i, X; + + if (depth) { + unsigned int d; + + for (d = 0; d < depth - 1; d++) { + if (last & BIT(depth - d - 1)) + len = scnprintf(buf, *sz, "| "); + else + len = scnprintf(buf, *sz, " "); + buf += len; + *sz -= len; + } + len = scnprintf(buf, *sz, "%x-> ", idx); + buf += len; + *sz -= len; + } + + /* We mark bits after the prefix as "X" */ + len = scnprintf(buf, *sz, "0x%016llx", p->prefix << p->height << SHIFT); + buf += len; + *sz -= len; + X = (p->height + SHIFT) / 4; + scnprintf(buf - X, *sz + X, "%*s", X, "XXXXXXXXXXXXXXXXX"); + + if (!p->height) { + for_each_set_bit(i, (unsigned long *)&p->bitmap, KSYNCMAP) { + len = scnprintf(buf, *sz, " %x:%x,", + i, __sync_seqno(p)[i]); + buf += len; + *sz -= len; + } + buf -= 1; + *sz += 1; + } + + len = scnprintf(buf, *sz, "\n"); + buf += len; + *sz -= len; + + if (p->height) { + for_each_set_bit(i, (unsigned long *)&p->bitmap, KSYNCMAP) { + buf = __sync_print(__sync_child(p)[i], buf, sz, + depth + 1, + last << 1 | !!(p->bitmap >> (i + 1)), + i); + } + } + + return buf; +} + +static bool +i915_syncmap_print_to_buf(struct i915_syncmap *p, char *buf, unsigned long sz) +{ + if (!p) + return false; + + while (p->parent) + p = p->parent; + + __sync_print(p, buf, &sz, 0, 1, 0); + return true; +} + +static int check_syncmap_free(struct i915_syncmap **sync) +{ + i915_syncmap_free(sync); + if (*sync) { + pr_err("sync not cleared after free\n"); + return -EINVAL; + } + + return 0; +} + +static int dump_syncmap(struct i915_syncmap *sync, int err) +{ + char *buf; + + if (!err) + return check_syncmap_free(&sync); + + buf = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!buf) + goto skip; + + if (i915_syncmap_print_to_buf(sync, buf, PAGE_SIZE)) + pr_err("%s", buf); + + kfree(buf); + +skip: + i915_syncmap_free(&sync); + return err; +} + +static int igt_syncmap_init(void *arg) +{ + struct i915_syncmap *sync = (void *)~0ul; + + /* + * Cursory check that we can initialise a random pointer and transform + * it into the root pointer of a syncmap. + */ + + i915_syncmap_init(&sync); + return check_syncmap_free(&sync); +} + +static int check_seqno(struct i915_syncmap *leaf, unsigned int idx, u32 seqno) +{ + if (leaf->height) { + pr_err("%s: not a leaf, height is %d\n", + __func__, leaf->height); + return -EINVAL; + } + + if (__sync_seqno(leaf)[idx] != seqno) { + pr_err("%s: seqno[%d], found %x, expected %x\n", + __func__, idx, __sync_seqno(leaf)[idx], seqno); + return -EINVAL; + } + + return 0; +} + +static int check_one(struct i915_syncmap **sync, u64 context, u32 seqno) +{ + int err; + + err = i915_syncmap_set(sync, context, seqno); + if (err) + return err; + + if ((*sync)->height) { + pr_err("Inserting first context=%llx did not return leaf (height=%d, prefix=%llx\n", + context, (*sync)->height, (*sync)->prefix); + return -EINVAL; + } + + if ((*sync)->parent) { + pr_err("Inserting first context=%llx created branches!\n", + context); + return -EINVAL; + } + + if (hweight32((*sync)->bitmap) != 1) { + pr_err("First bitmap does not contain a single entry, found %x (count=%d)!\n", + (*sync)->bitmap, hweight32((*sync)->bitmap)); + return -EINVAL; + } + + err = check_seqno((*sync), ilog2((*sync)->bitmap), seqno); + if (err) + return err; + + if (!i915_syncmap_is_later(sync, context, seqno)) { + pr_err("Lookup of first context=%llx/seqno=%x failed!\n", + context, seqno); + return -EINVAL; + } + + return 0; +} + +static int igt_syncmap_one(void *arg) +{ + I915_RND_STATE(prng); + IGT_TIMEOUT(end_time); + struct i915_syncmap *sync; + unsigned long max = 1; + int err; + + /* + * Check that inserting a new id, creates a leaf and only that leaf. + */ + + i915_syncmap_init(&sync); + + do { + u64 context = i915_prandom_u64_state(&prng); + unsigned long loop; + + err = check_syncmap_free(&sync); + if (err) + goto out; + + for (loop = 0; loop <= max; loop++) { + err = check_one(&sync, context, + prandom_u32_state(&prng)); + if (err) + goto out; + } + max++; + } while (!__igt_timeout(end_time, NULL)); + pr_debug("%s: Completed %lu single insertions\n", + __func__, max * (max - 1) / 2); +out: + return dump_syncmap(sync, err); +} + +static int check_leaf(struct i915_syncmap **sync, u64 context, u32 seqno) +{ + int err; + + err = i915_syncmap_set(sync, context, seqno); + if (err) + return err; + + if ((*sync)->height) { + pr_err("Inserting context=%llx did not return leaf (height=%d, prefix=%llx\n", + context, (*sync)->height, (*sync)->prefix); + return -EINVAL; + } + + if (hweight32((*sync)->bitmap) != 1) { + pr_err("First entry into leaf (context=%llx) does not contain a single entry, found %x (count=%d)!\n", + context, (*sync)->bitmap, hweight32((*sync)->bitmap)); + return -EINVAL; + } + + err = check_seqno((*sync), ilog2((*sync)->bitmap), seqno); + if (err) + return err; + + if (!i915_syncmap_is_later(sync, context, seqno)) { + pr_err("Lookup of first entry context=%llx/seqno=%x failed!\n", + context, seqno); + return -EINVAL; + } + + return 0; +} + +static int igt_syncmap_join_above(void *arg) +{ + struct i915_syncmap *sync; + unsigned int pass, order; + int err; + + i915_syncmap_init(&sync); + + /* + * When we have a new id that doesn't fit inside the existing tree, + * we need to add a new layer above. + * + * 1: 0x00000001 + * 2: 0x00000010 + * 3: 0x00000100 + * 4: 0x00001000 + * ... + * Each pass the common prefix shrinks and we have to insert a join. + * Each join will only contain two branches, the latest of which + * is always a leaf. + * + * If we then reuse the same set of contexts, we expect to build an + * identical tree. + */ + for (pass = 0; pass < 3; pass++) { + for (order = 0; order < 64; order += SHIFT) { + u64 context = BIT_ULL(order); + struct i915_syncmap *join; + + err = check_leaf(&sync, context, 0); + if (err) + goto out; + + join = sync->parent; + if (!join) /* very first insert will have no parents */ + continue; + + if (!join->height) { + pr_err("Parent with no height!\n"); + err = -EINVAL; + goto out; + } + + if (hweight32(join->bitmap) != 2) { + pr_err("Join does not have 2 children: %x (%d)\n", + join->bitmap, hweight32(join->bitmap)); + err = -EINVAL; + goto out; + } + + if (__sync_child(join)[__sync_branch_idx(join, context)] != sync) { + pr_err("Leaf misplaced in parent!\n"); + err = -EINVAL; + goto out; + } + } + } +out: + return dump_syncmap(sync, err); +} + +static int igt_syncmap_join_below(void *arg) +{ + struct i915_syncmap *sync; + unsigned int step, order, idx; + int err; + + i915_syncmap_init(&sync); + + /* + * Check that we can split a compacted branch by replacing it with + * a join. + */ + for (step = 0; step < KSYNCMAP; step++) { + for (order = 64 - SHIFT; order > 0; order -= SHIFT) { + u64 context = step * BIT_ULL(order); + + err = i915_syncmap_set(&sync, context, 0); + if (err) + goto out; + + if (sync->height) { + pr_err("Inserting context=%llx (order=%d, step=%d) did not return leaf (height=%d, prefix=%llx\n", + context, order, step, sync->height, sync->prefix); + err = -EINVAL; + goto out; + } + } + } + + for (step = 0; step < KSYNCMAP; step++) { + for (order = SHIFT; order < 64; order += SHIFT) { + u64 context = step * BIT_ULL(order); + + if (!i915_syncmap_is_later(&sync, context, 0)) { + pr_err("1: context %llx (order=%d, step=%d) not found\n", + context, order, step); + err = -EINVAL; + goto out; + } + + for (idx = 1; idx < KSYNCMAP; idx++) { + if (i915_syncmap_is_later(&sync, context + idx, 0)) { + pr_err("1: context %llx (order=%d, step=%d) should not exist\n", + context + idx, order, step); + err = -EINVAL; + goto out; + } + } + } + } + + for (order = SHIFT; order < 64; order += SHIFT) { + for (step = 0; step < KSYNCMAP; step++) { + u64 context = step * BIT_ULL(order); + + if (!i915_syncmap_is_later(&sync, context, 0)) { + pr_err("2: context %llx (order=%d, step=%d) not found\n", + context, order, step); + err = -EINVAL; + goto out; + } + } + } + +out: + return dump_syncmap(sync, err); +} + +static int igt_syncmap_neighbours(void *arg) +{ + I915_RND_STATE(prng); + IGT_TIMEOUT(end_time); + struct i915_syncmap *sync; + int err; + + /* + * Each leaf holds KSYNCMAP seqno. Check that when we create KSYNCMAP + * neighbouring ids, they all fit into the same leaf. + */ + + i915_syncmap_init(&sync); + do { + u64 context = i915_prandom_u64_state(&prng) & ~MASK; + unsigned int idx; + + if (i915_syncmap_is_later(&sync, context, 0)) /* Skip repeats */ + continue; + + for (idx = 0; idx < KSYNCMAP; idx++) { + err = i915_syncmap_set(&sync, context + idx, 0); + if (err) + goto out; + + if (sync->height) { + pr_err("Inserting context=%llx did not return leaf (height=%d, prefix=%llx\n", + context, sync->height, sync->prefix); + err = -EINVAL; + goto out; + } + + if (sync->bitmap != BIT(idx + 1) - 1) { + pr_err("Inserting neighbouring context=0x%llx+%d, did not fit into the same leaf bitmap=%x (%d), expected %lx (%d)\n", + context, idx, + sync->bitmap, hweight32(sync->bitmap), + BIT(idx + 1) - 1, idx + 1); + err = -EINVAL; + goto out; + } + } + } while (!__igt_timeout(end_time, NULL)); +out: + return dump_syncmap(sync, err); +} + +static int igt_syncmap_compact(void *arg) +{ + struct i915_syncmap *sync; + unsigned int idx, order; + int err; + + i915_syncmap_init(&sync); + + /* + * The syncmap are "space efficient" compressed radix trees - any + * branch with only one child is skipped and replaced by the child. + * + * If we construct a tree with ids that are neighbouring at a non-zero + * height, we form a join but each child of that join is directly a + * leaf holding the single id. + */ + for (order = SHIFT; order < 64; order += SHIFT) { + err = check_syncmap_free(&sync); + if (err) + goto out; + + /* Create neighbours in the parent */ + for (idx = 0; idx < KSYNCMAP; idx++) { + u64 context = idx * BIT_ULL(order) + idx; + + err = i915_syncmap_set(&sync, context, 0); + if (err) + goto out; + + if (sync->height) { + pr_err("Inserting context=%llx (order=%d, idx=%d) did not return leaf (height=%d, prefix=%llx\n", + context, order, idx, + sync->height, sync->prefix); + err = -EINVAL; + goto out; + } + } + + sync = sync->parent; + if (sync->parent) { + pr_err("Parent (join) of last leaf was not the sync!\n"); + err = -EINVAL; + goto out; + } + + if (sync->height != order) { + pr_err("Join does not have the expected height, found %d, expected %d\n", + sync->height, order); + err = -EINVAL; + goto out; + } + + if (sync->bitmap != BIT(KSYNCMAP) - 1) { + pr_err("Join is not full!, found %x (%d) expected %lx (%d)\n", + sync->bitmap, hweight32(sync->bitmap), + BIT(KSYNCMAP) - 1, KSYNCMAP); + err = -EINVAL; + goto out; + } + + /* Each of our children should be a leaf */ + for (idx = 0; idx < KSYNCMAP; idx++) { + struct i915_syncmap *leaf = __sync_child(sync)[idx]; + + if (leaf->height) { + pr_err("Child %d is a not leaf!\n", idx); + err = -EINVAL; + goto out; + } + + if (leaf->parent != sync) { + pr_err("Child %d is not attached to us!\n", + idx); + err = -EINVAL; + goto out; + } + + if (!is_power_of_2(leaf->bitmap)) { + pr_err("Child %d holds more than one id, found %x (%d)\n", + idx, leaf->bitmap, hweight32(leaf->bitmap)); + err = -EINVAL; + goto out; + } + + if (leaf->bitmap != BIT(idx)) { + pr_err("Child %d has wrong seqno idx, found %d, expected %d\n", + idx, ilog2(leaf->bitmap), idx); + err = -EINVAL; + goto out; + } + } + } +out: + return dump_syncmap(sync, err); +} + +static int igt_syncmap_random(void *arg) +{ + I915_RND_STATE(prng); + IGT_TIMEOUT(end_time); + struct i915_syncmap *sync; + unsigned long count, phase, i; + u32 seqno; + int err; + + i915_syncmap_init(&sync); + + /* + * Having tried to test the individual operations within i915_syncmap, + * run a smoketest exploring the entire u64 space with random + * insertions. + */ + + count = 0; + phase = jiffies + HZ/100 + 1; + do { + u64 context = i915_prandom_u64_state(&prng); + + err = i915_syncmap_set(&sync, context, 0); + if (err) + goto out; + + count++; + } while (!time_after(jiffies, phase)); + seqno = 0; + + phase = 0; + do { + I915_RND_STATE(ctx); + u32 last_seqno = seqno; + bool expect; + + seqno = prandom_u32_state(&prng); + expect = seqno_later(last_seqno, seqno); + + for (i = 0; i < count; i++) { + u64 context = i915_prandom_u64_state(&ctx); + + if (i915_syncmap_is_later(&sync, context, seqno) != expect) { + pr_err("context=%llu, last=%u this=%u did not match expectation (%d)\n", + context, last_seqno, seqno, expect); + err = -EINVAL; + goto out; + } + + err = i915_syncmap_set(&sync, context, seqno); + if (err) + goto out; + } + + phase++; + } while (!__igt_timeout(end_time, NULL)); + pr_debug("Completed %lu passes, each of %lu contexts\n", phase, count); +out: + return dump_syncmap(sync, err); +} + +int i915_syncmap_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_syncmap_init), + SUBTEST(igt_syncmap_one), + SUBTEST(igt_syncmap_join_above), + SUBTEST(igt_syncmap_join_below), + SUBTEST(igt_syncmap_neighbours), + SUBTEST(igt_syncmap_compact), + SUBTEST(igt_syncmap_random), + }; + + return i915_subtests(tests, NULL); +} diff --git a/drivers/gpu/drm/i915/selftests/mock_timeline.c b/drivers/gpu/drm/i915/selftests/mock_timeline.c new file mode 100644 index 0000000..47b1f47 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_timeline.c @@ -0,0 +1,45 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "mock_timeline.h" + +struct intel_timeline *mock_timeline(u64 context) +{ + static struct lock_class_key class; + struct intel_timeline *tl; + + tl = kzalloc(sizeof(*tl), GFP_KERNEL); + if (!tl) + return NULL; + + __intel_timeline_init(tl, NULL, context, &class, "mock"); + + return tl; +} + +void mock_timeline_destroy(struct intel_timeline *tl) +{ + __intel_timeline_fini(tl); + kfree(tl); +} diff --git a/drivers/gpu/drm/i915/selftests/mock_timeline.h b/drivers/gpu/drm/i915/selftests/mock_timeline.h new file mode 100644 index 0000000..c27ff46 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_timeline.h @@ -0,0 +1,33 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __MOCK_TIMELINE__ +#define __MOCK_TIMELINE__ + +#include "../i915_gem_timeline.h" + +struct intel_timeline *mock_timeline(u64 context); +void mock_timeline_destroy(struct intel_timeline *tl); + +#endif /* !__MOCK_TIMELINE__ */ -- cgit v1.1 From 7e8894e97dc4999270a5713034d8bd265e3ac3da Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 3 May 2017 10:39:22 +0100 Subject: drm/i915: Rename intel_timeline.sync_seqno[] to .global_sync[] With the addition of the inter-context intel_time.sync map, having a very similar sync_seqno[] is confusing. Aide the reader by denoting that this is a pre-allocated array for storing semaphore sync points wrt to the global seqno. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170503093924.5320-5-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_request.c | 8 ++++---- drivers/gpu/drm/i915/i915_gem_timeline.h | 9 ++++++++- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 637b8cd..b6246b5 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -218,8 +218,8 @@ static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno) tl->seqno = seqno; list_for_each_entry(timeline, &i915->gt.timelines, link) - memset(timeline->engine[id].sync_seqno, 0, - sizeof(timeline->engine[id].sync_seqno)); + memset(timeline->engine[id].global_sync, 0, + sizeof(timeline->engine[id].global_sync)); } return 0; @@ -715,7 +715,7 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to, return ret < 0 ? ret : 0; } - if (seqno <= to->timeline->sync_seqno[from->engine->id]) + if (seqno <= to->timeline->global_sync[from->engine->id]) return 0; trace_i915_gem_ring_sync_to(to, from); @@ -733,7 +733,7 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to, return ret; } - to->timeline->sync_seqno[from->engine->id] = seqno; + to->timeline->global_sync[from->engine->id] = seqno; return 0; } diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.h b/drivers/gpu/drm/i915/i915_gem_timeline.h index ff65c64..bfb5eb9 100644 --- a/drivers/gpu/drm/i915/i915_gem_timeline.h +++ b/drivers/gpu/drm/i915/i915_gem_timeline.h @@ -68,7 +68,14 @@ struct intel_timeline { * redundant and we can discard it without loss of generality. */ struct i915_syncmap *sync; - u32 sync_seqno[I915_NUM_ENGINES]; + /** + * Separately to the inter-context seqno map above, we track the last + * barrier (e.g. semaphore wait) to the global engine timelines. Note + * that this tracks global_seqno rather than the context.seqno, and + * so it is subject to the limitations of hw wraparound and that we + * may need to revoke global_seqno (on pre-emption). + */ + u32 global_sync[I915_NUM_ENGINES]; struct i915_gem_timeline *common; }; -- cgit v1.1 From fc9d4d2b6fd5a21d7718137807d8587f6fd9e03c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 3 May 2017 10:39:23 +0100 Subject: drm/i915: Do not record a successful syncpoint for a dma-await MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As we may unwind the requests, even though the request we are awaiting has a global_seqno that seqno may be revoked during the await and so we can not reliably use it as a barrier for all future awaits on the same timeline. Signed-off-by: Chris Wilson Cc: Michał Winiarski Reviewed-by: Michał Winiarski Link: http://patchwork.freedesktop.org/patch/msgid/20170503093924.5320-6-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_request.c | 37 +++++++++++++++++---------------- 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index b6246b5..b30d432 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -708,33 +708,34 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to, } seqno = i915_gem_request_global_seqno(from); - if (!seqno) { - ret = i915_sw_fence_await_dma_fence(&to->submit, - &from->fence, 0, - GFP_KERNEL); - return ret < 0 ? ret : 0; - } + if (!seqno) + goto await_dma_fence; - if (seqno <= to->timeline->global_sync[from->engine->id]) - return 0; - - trace_i915_gem_ring_sync_to(to, from); if (!i915.semaphores) { - if (!i915_spin_request(from, TASK_INTERRUPTIBLE, 2)) { - ret = i915_sw_fence_await_dma_fence(&to->submit, - &from->fence, 0, - GFP_KERNEL); - if (ret < 0) - return ret; - } + if (!__i915_gem_request_started(from, seqno)) + goto await_dma_fence; + + if (!__i915_spin_request(from, seqno, TASK_INTERRUPTIBLE, 2)) + goto await_dma_fence; } else { + if (seqno <= to->timeline->global_sync[from->engine->id]) + return 0; + + trace_i915_gem_ring_sync_to(to, from); ret = to->engine->semaphore.sync_to(to, from); if (ret) return ret; + + to->timeline->global_sync[from->engine->id] = seqno; } - to->timeline->global_sync[from->engine->id] = seqno; return 0; + +await_dma_fence: + ret = i915_sw_fence_await_dma_fence(&to->submit, + &from->fence, 0, + GFP_KERNEL); + return ret < 0 ? ret : 0; } int -- cgit v1.1 From 49f08598bf7a52eadebda851a5e8e6fa1dc2e15e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 3 May 2017 10:39:24 +0100 Subject: drm/i915: Switch the global i915.semaphores check to a local predicate Rather than use a global modparam, we can just check to see if the engine has semaphores configured upon it. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170503093924.5320-7-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_request.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index b30d432..9074303 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -711,13 +711,15 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to, if (!seqno) goto await_dma_fence; - if (!i915.semaphores) { + if (!to->engine->semaphore.sync_to) { if (!__i915_gem_request_started(from, seqno)) goto await_dma_fence; if (!__i915_spin_request(from, seqno, TASK_INTERRUPTIBLE, 2)) goto await_dma_fence; } else { + GEM_BUG_ON(!from->engine->semaphore.signal); + if (seqno <= to->timeline->global_sync[from->engine->id]) return 0; -- cgit v1.1 From 183c00350ccda86781f6695840e6c5f5b22efbd1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 27 Apr 2017 19:02:20 +0300 Subject: drm/i915: Fix runtime PM for LPE audio MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Not calling pm_runtime_enable() means that runtime PM can't be enabled at all via sysfs. So we definitely need to call it from somewhere. Calling it from the driver seems like a bad idea because it would have to be paired with a pm_runtime_disable() at driver unload time, otherwise the core gets upset. Also if there's no LPE audio driver loaded then we couldn't runtime suspend i915 either. So it looks like a better plan is to call it from i915 when we register the platform device. That seems to match how pci generally does things. I cargo culted the pm_runtime_forbid() and pm_runtime_set_active() calls from pci as well. The exposed runtime PM API is massive an thorougly misleading, so I don't actually know if this is how you're supposed to use the API or not. But it seems to work. I can now runtime suspend i915 again with or without the LPE audio driver loaded, and reloading the LPE audio driver also seems to work. Note that powertop won't auto-tune runtime PM for platform devices, which is a little annoying. So I'm not sure that leaving runtime PM in "on" mode by default is the best choice here. But I've left it like that for now at least. Also remove the comment about there not being much benefit from LPE audio runtime PM. Not allowing runtime PM blocks i915 runtime PM, which will also block s0ix, and that could have a measurable impact on power consumption. Cc: stable@vger.kernel.org Cc: Takashi Iwai Cc: Pierre-Louis Bossart Fixes: 0b6b524f3915 ("ALSA: x86: Don't enable runtime PM as default") Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170427160231.13337-2-ville.syrjala@linux.intel.com Reviewed-by: Takashi Iwai --- drivers/gpu/drm/i915/intel_lpe_audio.c | 5 +++++ sound/x86/intel_hdmi_audio.c | 4 ---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lpe_audio.c b/drivers/gpu/drm/i915/intel_lpe_audio.c index 25d8e76..668f004 100644 --- a/drivers/gpu/drm/i915/intel_lpe_audio.c +++ b/drivers/gpu/drm/i915/intel_lpe_audio.c @@ -63,6 +63,7 @@ #include #include #include +#include #include "i915_drv.h" #include @@ -121,6 +122,10 @@ lpe_audio_platdev_create(struct drm_i915_private *dev_priv) kfree(rsc); + pm_runtime_forbid(&platdev->dev); + pm_runtime_set_active(&platdev->dev); + pm_runtime_enable(&platdev->dev); + return platdev; err: diff --git a/sound/x86/intel_hdmi_audio.c b/sound/x86/intel_hdmi_audio.c index c505b01..bfac6f2 100644 --- a/sound/x86/intel_hdmi_audio.c +++ b/sound/x86/intel_hdmi_audio.c @@ -1809,10 +1809,6 @@ static int hdmi_lpe_audio_probe(struct platform_device *pdev) pdata->notify_pending = false; spin_unlock_irq(&pdata->lpe_audio_slock); - /* runtime PM isn't enabled as default, since it won't save much on - * BYT/CHT devices; user who want the runtime PM should adjust the - * power/ontrol and power/autosuspend_delay_ms sysfs entries instead - */ pm_runtime_use_autosuspend(&pdev->dev); pm_runtime_mark_last_busy(&pdev->dev); pm_runtime_set_active(&pdev->dev); -- cgit v1.1 From 8d5c30308d7c5a17db96fa5452c0232f633377c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 27 Apr 2017 19:02:21 +0300 Subject: ALSA: x86: Clear the pdata.notify_lpe_audio pointer before teardown MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Clear the notify function pointer in the platform data before we tear down the driver. Otherwise i915 would end up calling a stale function pointer and possibly explode. Cc: stable@vger.kernel.org Cc: Takashi Iwai Cc: Pierre-Louis Bossart Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170427160231.13337-3-ville.syrjala@linux.intel.com Reviewed-by: Takashi Iwai --- sound/x86/intel_hdmi_audio.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sound/x86/intel_hdmi_audio.c b/sound/x86/intel_hdmi_audio.c index bfac6f2..5b89662 100644 --- a/sound/x86/intel_hdmi_audio.c +++ b/sound/x86/intel_hdmi_audio.c @@ -1665,6 +1665,11 @@ static int __maybe_unused hdmi_lpe_audio_resume(struct device *dev) static void hdmi_lpe_audio_free(struct snd_card *card) { struct snd_intelhad *ctx = card->private_data; + struct intel_hdmi_lpe_audio_pdata *pdata = ctx->dev->platform_data; + + spin_lock_irq(&pdata->lpe_audio_slock); + pdata->notify_audio_lpe = NULL; + spin_unlock_irq(&pdata->lpe_audio_slock); cancel_work_sync(&ctx->hdmi_audio_wq); -- cgit v1.1 From ebf5f921478b9b55ed4e634b994571dd23a8fca3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 27 Apr 2017 19:02:22 +0300 Subject: drm/i915: Stop pretending to mask/unmask LPE audio interrupts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit vlv_display_irq_postinstall() enables the LPE audio interrupts regardless of whether the LPE audio irq chip has masked/unmasked them. Also the irqchip masking/unmasking doesn't consider the state of the display power well or the device, and hence just leads to dmesg spew when it tries to access the hardware while it's powered down. If the current way works, then we don't need to do anything in the mask/unmask hooks. If it doesn't work, well, then we'd need to properly track whether the irqchip has masked/unmasked the interrupts when we enable display interrupts. And the mask/unmask hooks would need to check whether display interrupts are even enabled before frobbing with he registers. So let's just assume the current way works and neuter the mask/unmask hooks. Also clean up vlv_display_irq_postinstall() a bit and stop it from trying to unmask/enable the LPE C interrupt on VLV since it doesn't exist. Cc: Takashi Iwai Cc: Pierre-Louis Bossart Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170427160231.13337-4-ville.syrjala@linux.intel.com Reviewed-by: Takashi Iwai --- drivers/gpu/drm/i915/i915_irq.c | 15 ++++++-------- drivers/gpu/drm/i915/intel_lpe_audio.c | 36 ---------------------------------- 2 files changed, 6 insertions(+), 45 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 0e4dcbeb..c99f51c 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -2953,7 +2953,6 @@ static void vlv_display_irq_postinstall(struct drm_i915_private *dev_priv) u32 pipestat_mask; u32 enable_mask; enum pipe pipe; - u32 val; pipestat_mask = PLANE_FLIP_DONE_INT_STATUS_VLV | PIPE_CRC_DONE_INTERRUPT_STATUS; @@ -2964,18 +2963,16 @@ static void vlv_display_irq_postinstall(struct drm_i915_private *dev_priv) enable_mask = I915_DISPLAY_PORT_INTERRUPT | I915_DISPLAY_PIPE_A_EVENT_INTERRUPT | - I915_DISPLAY_PIPE_B_EVENT_INTERRUPT; + I915_DISPLAY_PIPE_B_EVENT_INTERRUPT | + I915_LPE_PIPE_A_INTERRUPT | + I915_LPE_PIPE_B_INTERRUPT; + if (IS_CHERRYVIEW(dev_priv)) - enable_mask |= I915_DISPLAY_PIPE_C_EVENT_INTERRUPT; + enable_mask |= I915_DISPLAY_PIPE_C_EVENT_INTERRUPT | + I915_LPE_PIPE_C_INTERRUPT; WARN_ON(dev_priv->irq_mask != ~0); - val = (I915_LPE_PIPE_A_INTERRUPT | - I915_LPE_PIPE_B_INTERRUPT | - I915_LPE_PIPE_C_INTERRUPT); - - enable_mask |= val; - dev_priv->irq_mask = ~enable_mask; GEN5_IRQ_INIT(VLV_, dev_priv->irq_mask, enable_mask); diff --git a/drivers/gpu/drm/i915/intel_lpe_audio.c b/drivers/gpu/drm/i915/intel_lpe_audio.c index 668f004..292fedf 100644 --- a/drivers/gpu/drm/i915/intel_lpe_audio.c +++ b/drivers/gpu/drm/i915/intel_lpe_audio.c @@ -149,44 +149,10 @@ static void lpe_audio_platdev_destroy(struct drm_i915_private *dev_priv) static void lpe_audio_irq_unmask(struct irq_data *d) { - struct drm_i915_private *dev_priv = d->chip_data; - unsigned long irqflags; - u32 val = (I915_LPE_PIPE_A_INTERRUPT | - I915_LPE_PIPE_B_INTERRUPT); - - if (IS_CHERRYVIEW(dev_priv)) - val |= I915_LPE_PIPE_C_INTERRUPT; - - spin_lock_irqsave(&dev_priv->irq_lock, irqflags); - - dev_priv->irq_mask &= ~val; - I915_WRITE(VLV_IIR, val); - I915_WRITE(VLV_IIR, val); - I915_WRITE(VLV_IMR, dev_priv->irq_mask); - POSTING_READ(VLV_IMR); - - spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags); } static void lpe_audio_irq_mask(struct irq_data *d) { - struct drm_i915_private *dev_priv = d->chip_data; - unsigned long irqflags; - u32 val = (I915_LPE_PIPE_A_INTERRUPT | - I915_LPE_PIPE_B_INTERRUPT); - - if (IS_CHERRYVIEW(dev_priv)) - val |= I915_LPE_PIPE_C_INTERRUPT; - - spin_lock_irqsave(&dev_priv->irq_lock, irqflags); - - dev_priv->irq_mask |= val; - I915_WRITE(VLV_IMR, dev_priv->irq_mask); - I915_WRITE(VLV_IIR, val); - I915_WRITE(VLV_IIR, val); - POSTING_READ(VLV_IIR); - - spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags); } static struct irq_chip lpe_audio_irqchip = { @@ -330,8 +296,6 @@ void intel_lpe_audio_teardown(struct drm_i915_private *dev_priv) desc = irq_to_desc(dev_priv->lpe_audio.irq); - lpe_audio_irq_mask(&desc->irq_data); - lpe_audio_platdev_destroy(dev_priv); irq_free_desc(dev_priv->lpe_audio.irq); -- cgit v1.1 From d2205595800dbd53eba06318e399a1cba1c0fc67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 27 Apr 2017 19:02:23 +0300 Subject: drm/i915: Remove the unused pending_notify from LPE platform data MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The pending_notify flag in the LPE audio platform data is pointless, actually unused. So let's kill it off. v2: Fix typo in patch subject Cc: Takashi Iwai Cc: Pierre-Louis Bossart Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170427160231.13337-5-ville.syrjala@linux.intel.com Reviewed-by: Takashi Iwai --- drivers/gpu/drm/i915/intel_lpe_audio.c | 2 -- include/drm/intel_lpe_audio.h | 1 - sound/x86/intel_hdmi_audio.c | 1 - 3 files changed, 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lpe_audio.c b/drivers/gpu/drm/i915/intel_lpe_audio.c index 292fedf..79b9dca 100644 --- a/drivers/gpu/drm/i915/intel_lpe_audio.c +++ b/drivers/gpu/drm/i915/intel_lpe_audio.c @@ -361,8 +361,6 @@ void intel_lpe_audio_notify(struct drm_i915_private *dev_priv, if (pdata->notify_audio_lpe) pdata->notify_audio_lpe(dev_priv->lpe_audio.platdev); - else - pdata->notify_pending = true; spin_unlock_irqrestore(&pdata->lpe_audio_slock, irq_flags); diff --git a/include/drm/intel_lpe_audio.h b/include/drm/intel_lpe_audio.h index e9892b4..c201d39 100644 --- a/include/drm/intel_lpe_audio.h +++ b/include/drm/intel_lpe_audio.h @@ -38,7 +38,6 @@ struct intel_hdmi_lpe_audio_eld { }; struct intel_hdmi_lpe_audio_pdata { - bool notify_pending; int tmds_clock_speed; bool hdmi_connected; bool dp_output; diff --git a/sound/x86/intel_hdmi_audio.c b/sound/x86/intel_hdmi_audio.c index 5b89662..cbba4a7 100644 --- a/sound/x86/intel_hdmi_audio.c +++ b/sound/x86/intel_hdmi_audio.c @@ -1811,7 +1811,6 @@ static int hdmi_lpe_audio_probe(struct platform_device *pdev) spin_lock_irq(&pdata->lpe_audio_slock); pdata->notify_audio_lpe = notify_audio_lpe; - pdata->notify_pending = false; spin_unlock_irq(&pdata->lpe_audio_slock); pm_runtime_use_autosuspend(&pdev->dev); -- cgit v1.1 From c98ec5ba6cbbaaa98cb4c00888aecf58f7d470c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 27 Apr 2017 19:02:24 +0300 Subject: drm/i915: Replace tmds_clock_speed and link_rate with just ls_clock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There's no need to distinguish between the DP link rate and HDMI TMDS clock for the purposes of the LPE audio. Both are actually the same thing more or less, which is the link symbol clock. So let's just call the thing ls_clock and simplify the code. Cc: Takashi Iwai Cc: Pierre-Louis Bossart Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170427160231.13337-6-ville.syrjala@linux.intel.com Reviewed-by: Takashi Iwai --- drivers/gpu/drm/i915/i915_drv.h | 4 ++-- drivers/gpu/drm/i915/intel_audio.c | 19 ++++--------------- drivers/gpu/drm/i915/intel_lpe_audio.c | 14 ++++++-------- include/drm/intel_lpe_audio.h | 3 +-- sound/x86/intel_hdmi_audio.c | 11 ++++++++--- 5 files changed, 21 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 271a04c..38b6bf9 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3721,8 +3721,8 @@ int intel_lpe_audio_init(struct drm_i915_private *dev_priv); void intel_lpe_audio_teardown(struct drm_i915_private *dev_priv); void intel_lpe_audio_irq_handler(struct drm_i915_private *dev_priv); void intel_lpe_audio_notify(struct drm_i915_private *dev_priv, - void *eld, int port, int pipe, int tmds_clk_speed, - bool dp_output, int link_rate); + void *eld, int port, int pipe, int ls_clock, + bool dp_output); /* intel_i2c.c */ extern int intel_setup_gmbus(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/intel_audio.c b/drivers/gpu/drm/i915/intel_audio.c index 52c207e..79eeef2 100644 --- a/drivers/gpu/drm/i915/intel_audio.c +++ b/drivers/gpu/drm/i915/intel_audio.c @@ -632,20 +632,9 @@ void intel_audio_codec_enable(struct intel_encoder *intel_encoder, (int) port, (int) pipe); } - switch (intel_encoder->type) { - case INTEL_OUTPUT_HDMI: - intel_lpe_audio_notify(dev_priv, connector->eld, port, pipe, - crtc_state->port_clock, - false, 0); - break; - case INTEL_OUTPUT_DP: - intel_lpe_audio_notify(dev_priv, connector->eld, port, pipe, - adjusted_mode->crtc_clock, - true, crtc_state->port_clock); - break; - default: - break; - } + intel_lpe_audio_notify(dev_priv, connector->eld, port, pipe, + crtc_state->port_clock, + intel_encoder->type == INTEL_OUTPUT_DP); } /** @@ -680,7 +669,7 @@ void intel_audio_codec_disable(struct intel_encoder *intel_encoder) (int) port, (int) pipe); } - intel_lpe_audio_notify(dev_priv, NULL, port, pipe, 0, false, 0); + intel_lpe_audio_notify(dev_priv, NULL, port, pipe, 0, false); } /** diff --git a/drivers/gpu/drm/i915/intel_lpe_audio.c b/drivers/gpu/drm/i915/intel_lpe_audio.c index 79b9dca..5a1a37e 100644 --- a/drivers/gpu/drm/i915/intel_lpe_audio.c +++ b/drivers/gpu/drm/i915/intel_lpe_audio.c @@ -309,13 +309,14 @@ void intel_lpe_audio_teardown(struct drm_i915_private *dev_priv) * @eld : ELD data * @pipe: pipe id * @port: port id - * @tmds_clk_speed: tmds clock frequency in Hz + * @ls_clock: Link symbol clock in kHz + * @dp_output: Driving a DP output? * * Notify lpe audio driver of eld change. */ void intel_lpe_audio_notify(struct drm_i915_private *dev_priv, - void *eld, int port, int pipe, int tmds_clk_speed, - bool dp_output, int link_rate) + void *eld, int port, int pipe, int ls_clock, + bool dp_output) { unsigned long irq_flags; struct intel_hdmi_lpe_audio_pdata *pdata = NULL; @@ -337,12 +338,8 @@ void intel_lpe_audio_notify(struct drm_i915_private *dev_priv, pdata->eld.port_id = port; pdata->eld.pipe_id = pipe; pdata->hdmi_connected = true; - + pdata->ls_clock = ls_clock; pdata->dp_output = dp_output; - if (tmds_clk_speed) - pdata->tmds_clock_speed = tmds_clk_speed; - if (link_rate) - pdata->link_rate = link_rate; /* Unmute the amp for both DP and HDMI */ I915_WRITE(VLV_AUD_PORT_EN_DBG(port), @@ -352,6 +349,7 @@ void intel_lpe_audio_notify(struct drm_i915_private *dev_priv, memset(pdata->eld.eld_data, 0, HDMI_MAX_ELD_BYTES); pdata->hdmi_connected = false; + pdata->ls_clock = 0; pdata->dp_output = false; /* Mute the amp for both DP and HDMI */ diff --git a/include/drm/intel_lpe_audio.h b/include/drm/intel_lpe_audio.h index c201d39..8bf804c 100644 --- a/include/drm/intel_lpe_audio.h +++ b/include/drm/intel_lpe_audio.h @@ -38,10 +38,9 @@ struct intel_hdmi_lpe_audio_eld { }; struct intel_hdmi_lpe_audio_pdata { - int tmds_clock_speed; + int ls_clock; bool hdmi_connected; bool dp_output; - int link_rate; struct intel_hdmi_lpe_audio_eld eld; void (*notify_audio_lpe)(struct platform_device *pdev); spinlock_t lpe_audio_slock; diff --git a/sound/x86/intel_hdmi_audio.c b/sound/x86/intel_hdmi_audio.c index cbba4a7..4eaf5de 100644 --- a/sound/x86/intel_hdmi_audio.c +++ b/sound/x86/intel_hdmi_audio.c @@ -1568,7 +1568,7 @@ static void had_audio_wq(struct work_struct *work) struct intel_hdmi_lpe_audio_eld *eld = &pdata->eld; dev_dbg(ctx->dev, "%s: HAD_NOTIFY_ELD : port = %d, tmds = %d\n", - __func__, eld->port_id, pdata->tmds_clock_speed); + __func__, eld->port_id, pdata->ls_clock); switch (eld->pipe_id) { case 0: @@ -1589,8 +1589,13 @@ static void had_audio_wq(struct work_struct *work) memcpy(ctx->eld, eld->eld_data, sizeof(ctx->eld)); ctx->dp_output = pdata->dp_output; - ctx->tmds_clock_speed = pdata->tmds_clock_speed; - ctx->link_rate = pdata->link_rate; + if (ctx->dp_output) { + ctx->tmds_clock_speed = 0; + ctx->link_rate = pdata->ls_clock; + } else { + ctx->tmds_clock_speed = pdata->ls_clock; + ctx->link_rate = 0; + } had_process_hot_plug(ctx); -- cgit v1.1 From 265fa2e18f50cc55c0d0517b29bd5cdd5b4f776f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 27 Apr 2017 19:02:25 +0300 Subject: drm/i915: Remove hdmi_connected from LPE audio pdata MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We can determine that the pipe was shut down from pipe<0, so there's no point in duplicating that information as 'hdmi_connected'. v2: Use pipe<0 instead of port<0 as we'll want to do per-port PCM devices later Initialize pipe to -1 to inidicate inactive initial state Cc: Takashi Iwai Cc: Pierre-Louis Bossart Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170427160231.13337-7-ville.syrjala@linux.intel.com Reviewed-by: Takashi Iwai --- drivers/gpu/drm/i915/intel_lpe_audio.c | 9 +++++---- include/drm/intel_lpe_audio.h | 3 +-- sound/x86/intel_hdmi_audio.c | 8 ++++---- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lpe_audio.c b/drivers/gpu/drm/i915/intel_lpe_audio.c index 5a1a37e..7fd9573 100644 --- a/drivers/gpu/drm/i915/intel_lpe_audio.c +++ b/drivers/gpu/drm/i915/intel_lpe_audio.c @@ -111,6 +111,7 @@ lpe_audio_platdev_create(struct drm_i915_private *dev_priv) pinfo.size_data = sizeof(*pdata); pinfo.dma_mask = DMA_BIT_MASK(32); + pdata->pipe = -1; spin_lock_init(&pdata->lpe_audio_slock); platdev = platform_device_register_full(&pinfo); @@ -332,12 +333,12 @@ void intel_lpe_audio_notify(struct drm_i915_private *dev_priv, audio_enable = I915_READ(VLV_AUD_PORT_EN_DBG(port)); + pdata->eld.port_id = port; + if (eld != NULL) { memcpy(pdata->eld.eld_data, eld, HDMI_MAX_ELD_BYTES); - pdata->eld.port_id = port; - pdata->eld.pipe_id = pipe; - pdata->hdmi_connected = true; + pdata->pipe = pipe; pdata->ls_clock = ls_clock; pdata->dp_output = dp_output; @@ -348,7 +349,7 @@ void intel_lpe_audio_notify(struct drm_i915_private *dev_priv, } else { memset(pdata->eld.eld_data, 0, HDMI_MAX_ELD_BYTES); - pdata->hdmi_connected = false; + pdata->pipe = -1; pdata->ls_clock = 0; pdata->dp_output = false; diff --git a/include/drm/intel_lpe_audio.h b/include/drm/intel_lpe_audio.h index 8bf804c..9a5bdf5 100644 --- a/include/drm/intel_lpe_audio.h +++ b/include/drm/intel_lpe_audio.h @@ -33,13 +33,12 @@ struct platform_device; struct intel_hdmi_lpe_audio_eld { int port_id; - int pipe_id; unsigned char eld_data[HDMI_MAX_ELD_BYTES]; }; struct intel_hdmi_lpe_audio_pdata { + int pipe; int ls_clock; - bool hdmi_connected; bool dp_output; struct intel_hdmi_lpe_audio_eld eld; void (*notify_audio_lpe)(struct platform_device *pdev); diff --git a/sound/x86/intel_hdmi_audio.c b/sound/x86/intel_hdmi_audio.c index 4eaf5de..1a09518 100644 --- a/sound/x86/intel_hdmi_audio.c +++ b/sound/x86/intel_hdmi_audio.c @@ -1559,7 +1559,7 @@ static void had_audio_wq(struct work_struct *work) pm_runtime_get_sync(ctx->dev); mutex_lock(&ctx->mutex); - if (!pdata->hdmi_connected) { + if (pdata->pipe < 0) { dev_dbg(ctx->dev, "%s: Event: HAD_NOTIFY_HOT_UNPLUG\n", __func__); memset(ctx->eld, 0, sizeof(ctx->eld)); /* clear the old ELD */ @@ -1568,9 +1568,9 @@ static void had_audio_wq(struct work_struct *work) struct intel_hdmi_lpe_audio_eld *eld = &pdata->eld; dev_dbg(ctx->dev, "%s: HAD_NOTIFY_ELD : port = %d, tmds = %d\n", - __func__, eld->port_id, pdata->ls_clock); + __func__, eld->port_id, pdata->ls_clock); - switch (eld->pipe_id) { + switch (pdata->pipe) { case 0: ctx->had_config_offset = AUDIO_HDMI_CONFIG_A; break; @@ -1582,7 +1582,7 @@ static void had_audio_wq(struct work_struct *work) break; default: dev_dbg(ctx->dev, "Invalid pipe %d\n", - eld->pipe_id); + pdata->pipe); break; } -- cgit v1.1 From 20be551e6d0a59d56a593dee0998922cd80f5eb9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 27 Apr 2017 19:02:26 +0300 Subject: drm/i915: Reorganize intel_lpe_audio_notify() arguments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Shuffle the arguments to intel_lpe_audio_notify() around a bit. Pipe and port being the most important things, so let's put the first, and thre rest can come in as is. Also constify the eld argument. Cc: Takashi Iwai Cc: Pierre-Louis Bossart Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170427160231.13337-8-ville.syrjala@linux.intel.com Reviewed-by: Takashi Iwai --- drivers/gpu/drm/i915/i915_drv.h | 4 ++-- drivers/gpu/drm/i915/intel_audio.c | 4 ++-- drivers/gpu/drm/i915/intel_lpe_audio.c | 8 ++++---- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 38b6bf9..b20ed16 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3721,8 +3721,8 @@ int intel_lpe_audio_init(struct drm_i915_private *dev_priv); void intel_lpe_audio_teardown(struct drm_i915_private *dev_priv); void intel_lpe_audio_irq_handler(struct drm_i915_private *dev_priv); void intel_lpe_audio_notify(struct drm_i915_private *dev_priv, - void *eld, int port, int pipe, int ls_clock, - bool dp_output); + enum pipe pipe, enum port port, + const void *eld, int ls_clock, bool dp_output); /* intel_i2c.c */ extern int intel_setup_gmbus(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/intel_audio.c b/drivers/gpu/drm/i915/intel_audio.c index 79eeef2..d805b6e 100644 --- a/drivers/gpu/drm/i915/intel_audio.c +++ b/drivers/gpu/drm/i915/intel_audio.c @@ -632,7 +632,7 @@ void intel_audio_codec_enable(struct intel_encoder *intel_encoder, (int) port, (int) pipe); } - intel_lpe_audio_notify(dev_priv, connector->eld, port, pipe, + intel_lpe_audio_notify(dev_priv, pipe, port, connector->eld, crtc_state->port_clock, intel_encoder->type == INTEL_OUTPUT_DP); } @@ -669,7 +669,7 @@ void intel_audio_codec_disable(struct intel_encoder *intel_encoder) (int) port, (int) pipe); } - intel_lpe_audio_notify(dev_priv, NULL, port, pipe, 0, false); + intel_lpe_audio_notify(dev_priv, pipe, port, NULL, 0, false); } /** diff --git a/drivers/gpu/drm/i915/intel_lpe_audio.c b/drivers/gpu/drm/i915/intel_lpe_audio.c index 7fd9573..4c770d0 100644 --- a/drivers/gpu/drm/i915/intel_lpe_audio.c +++ b/drivers/gpu/drm/i915/intel_lpe_audio.c @@ -307,17 +307,17 @@ void intel_lpe_audio_teardown(struct drm_i915_private *dev_priv) * intel_lpe_audio_notify() - notify lpe audio event * audio driver and i915 * @dev_priv: the i915 drm device private data + * @pipe: pipe + * @port: port * @eld : ELD data - * @pipe: pipe id - * @port: port id * @ls_clock: Link symbol clock in kHz * @dp_output: Driving a DP output? * * Notify lpe audio driver of eld change. */ void intel_lpe_audio_notify(struct drm_i915_private *dev_priv, - void *eld, int port, int pipe, int ls_clock, - bool dp_output) + enum pipe pipe, enum port port, + const void *eld, int ls_clock, bool dp_output) { unsigned long irq_flags; struct intel_hdmi_lpe_audio_pdata *pdata = NULL; -- cgit v1.1 From a8562e4dec9c835e3c3b77e2ebfcef89dff9efb4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 27 Apr 2017 19:02:27 +0300 Subject: drm/i915: Clean up the LPE audio platform data MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Split the LPE audio platform data into a port specific chunk and device specific chunk. Eventually we'll have a port specific chunk for each port, but for now we'll stick to just one. We'll also get rid of the intel_hdmi_lpe_audio_eld structure which doesn't seem to have any real reason to exist. v2: Organize per port instead of per pipe Cc: Takashi Iwai Cc: Pierre-Louis Bossart Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170427160231.13337-9-ville.syrjala@linux.intel.com Reviewed-by: Takashi Iwai --- drivers/gpu/drm/i915/intel_lpe_audio.c | 30 ++++++++++++++---------------- include/drm/intel_lpe_audio.h | 15 ++++++++------- sound/x86/intel_hdmi_audio.c | 19 +++++++++---------- 3 files changed, 31 insertions(+), 33 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lpe_audio.c b/drivers/gpu/drm/i915/intel_lpe_audio.c index 4c770d0..bdbc235 100644 --- a/drivers/gpu/drm/i915/intel_lpe_audio.c +++ b/drivers/gpu/drm/i915/intel_lpe_audio.c @@ -111,7 +111,7 @@ lpe_audio_platdev_create(struct drm_i915_private *dev_priv) pinfo.size_data = sizeof(*pdata); pinfo.dma_mask = DMA_BIT_MASK(32); - pdata->pipe = -1; + pdata->port.pipe = -1; spin_lock_init(&pdata->lpe_audio_slock); platdev = platform_device_register_full(&pinfo); @@ -320,38 +320,36 @@ void intel_lpe_audio_notify(struct drm_i915_private *dev_priv, const void *eld, int ls_clock, bool dp_output) { unsigned long irq_flags; - struct intel_hdmi_lpe_audio_pdata *pdata = NULL; + struct intel_hdmi_lpe_audio_pdata *pdata; + struct intel_hdmi_lpe_audio_port_pdata *ppdata; u32 audio_enable; if (!HAS_LPE_AUDIO(dev_priv)) return; - pdata = dev_get_platdata( - &(dev_priv->lpe_audio.platdev->dev)); + pdata = dev_get_platdata(&dev_priv->lpe_audio.platdev->dev); + ppdata = &pdata->port; spin_lock_irqsave(&pdata->lpe_audio_slock, irq_flags); audio_enable = I915_READ(VLV_AUD_PORT_EN_DBG(port)); - pdata->eld.port_id = port; + ppdata->port = port; if (eld != NULL) { - memcpy(pdata->eld.eld_data, eld, - HDMI_MAX_ELD_BYTES); - pdata->pipe = pipe; - pdata->ls_clock = ls_clock; - pdata->dp_output = dp_output; + memcpy(ppdata->eld, eld, HDMI_MAX_ELD_BYTES); + ppdata->pipe = pipe; + ppdata->ls_clock = ls_clock; + ppdata->dp_output = dp_output; /* Unmute the amp for both DP and HDMI */ I915_WRITE(VLV_AUD_PORT_EN_DBG(port), audio_enable & ~VLV_AMP_MUTE); - } else { - memset(pdata->eld.eld_data, 0, - HDMI_MAX_ELD_BYTES); - pdata->pipe = -1; - pdata->ls_clock = 0; - pdata->dp_output = false; + memset(ppdata->eld, 0, HDMI_MAX_ELD_BYTES); + ppdata->pipe = -1; + ppdata->ls_clock = 0; + ppdata->dp_output = false; /* Mute the amp for both DP and HDMI */ I915_WRITE(VLV_AUD_PORT_EN_DBG(port), diff --git a/include/drm/intel_lpe_audio.h b/include/drm/intel_lpe_audio.h index 9a5bdf5..211f1cd 100644 --- a/include/drm/intel_lpe_audio.h +++ b/include/drm/intel_lpe_audio.h @@ -31,16 +31,17 @@ struct platform_device; #define HDMI_MAX_ELD_BYTES 128 -struct intel_hdmi_lpe_audio_eld { - int port_id; - unsigned char eld_data[HDMI_MAX_ELD_BYTES]; -}; - -struct intel_hdmi_lpe_audio_pdata { +struct intel_hdmi_lpe_audio_port_pdata { + u8 eld[HDMI_MAX_ELD_BYTES]; + int port; int pipe; int ls_clock; bool dp_output; - struct intel_hdmi_lpe_audio_eld eld; +}; + +struct intel_hdmi_lpe_audio_pdata { + struct intel_hdmi_lpe_audio_port_pdata port; + void (*notify_audio_lpe)(struct platform_device *pdev); spinlock_t lpe_audio_slock; }; diff --git a/sound/x86/intel_hdmi_audio.c b/sound/x86/intel_hdmi_audio.c index 1a09518..c2b7862 100644 --- a/sound/x86/intel_hdmi_audio.c +++ b/sound/x86/intel_hdmi_audio.c @@ -1556,21 +1556,20 @@ static void had_audio_wq(struct work_struct *work) struct snd_intelhad *ctx = container_of(work, struct snd_intelhad, hdmi_audio_wq); struct intel_hdmi_lpe_audio_pdata *pdata = ctx->dev->platform_data; + struct intel_hdmi_lpe_audio_port_pdata *ppdata = &pdata->port; pm_runtime_get_sync(ctx->dev); mutex_lock(&ctx->mutex); - if (pdata->pipe < 0) { + if (ppdata->pipe < 0) { dev_dbg(ctx->dev, "%s: Event: HAD_NOTIFY_HOT_UNPLUG\n", __func__); memset(ctx->eld, 0, sizeof(ctx->eld)); /* clear the old ELD */ had_process_hot_unplug(ctx); } else { - struct intel_hdmi_lpe_audio_eld *eld = &pdata->eld; - dev_dbg(ctx->dev, "%s: HAD_NOTIFY_ELD : port = %d, tmds = %d\n", - __func__, eld->port_id, pdata->ls_clock); + __func__, ppdata->port, ppdata->ls_clock); - switch (pdata->pipe) { + switch (ppdata->pipe) { case 0: ctx->had_config_offset = AUDIO_HDMI_CONFIG_A; break; @@ -1582,18 +1581,18 @@ static void had_audio_wq(struct work_struct *work) break; default: dev_dbg(ctx->dev, "Invalid pipe %d\n", - pdata->pipe); + ppdata->pipe); break; } - memcpy(ctx->eld, eld->eld_data, sizeof(ctx->eld)); + memcpy(ctx->eld, ppdata->eld, sizeof(ctx->eld)); - ctx->dp_output = pdata->dp_output; + ctx->dp_output = ppdata->dp_output; if (ctx->dp_output) { ctx->tmds_clock_speed = 0; - ctx->link_rate = pdata->ls_clock; + ctx->link_rate = ppdata->ls_clock; } else { - ctx->tmds_clock_speed = pdata->ls_clock; + ctx->tmds_clock_speed = ppdata->ls_clock; ctx->link_rate = 0; } -- cgit v1.1 From bb4ac5a0ecaeeef0229c69f6c935be2ee70abfec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 27 Apr 2017 19:02:28 +0300 Subject: ALSA: x86: Prepare LPE audio ctls for multiple PCMs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In preparation for register a PCM device for each pipe adjust link up the ctl elements with the corresponding PCM device. Cc: Takashi Iwai Cc: Pierre-Louis Bossart Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170427160231.13337-10-ville.syrjala@linux.intel.com Reviewed-by: Takashi Iwai --- sound/x86/intel_hdmi_audio.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/sound/x86/intel_hdmi_audio.c b/sound/x86/intel_hdmi_audio.c index c2b7862..69e1084 100644 --- a/sound/x86/intel_hdmi_audio.c +++ b/sound/x86/intel_hdmi_audio.c @@ -1609,11 +1609,16 @@ static void had_audio_wq(struct work_struct *work) /* * Jack interface */ -static int had_create_jack(struct snd_intelhad *ctx) +static int had_create_jack(struct snd_intelhad *ctx, + struct snd_pcm *pcm) { + char hdmi_str[32]; int err; - err = snd_jack_new(ctx->card, "HDMI/DP", SND_JACK_AVOUT, &ctx->jack, + snprintf(hdmi_str, sizeof(hdmi_str), + "HDMI/DP,pcm=%d", pcm->device); + + err = snd_jack_new(ctx->card, hdmi_str, SND_JACK_AVOUT, &ctx->jack, true, false); if (err < 0) return err; @@ -1793,7 +1798,17 @@ static int hdmi_lpe_audio_probe(struct platform_device *pdev) /* create controls */ for (i = 0; i < ARRAY_SIZE(had_controls); i++) { - ret = snd_ctl_add(card, snd_ctl_new1(&had_controls[i], ctx)); + struct snd_kcontrol *kctl; + + kctl = snd_ctl_new1(&had_controls[i], ctx); + if (!kctl) { + ret = -ENOMEM; + goto err; + } + + kctl->id.device = pcm->device; + + ret = snd_ctl_add(card, kctl); if (ret < 0) goto err; } @@ -1805,7 +1820,7 @@ static int hdmi_lpe_audio_probe(struct platform_device *pdev) if (ret < 0) goto err; - ret = had_create_jack(ctx); + ret = had_create_jack(ctx, pcm); if (ret < 0) goto err; -- cgit v1.1 From b4eb0d522fcba0ee819f955fd3279ff4682b8b33 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 27 Apr 2017 19:02:29 +0300 Subject: ALSA: x86: Split snd_intelhad into card and PCM specific structures MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To allow multiple PCM devices to be registered for the LPE audio card, split the private data into card and PCM specific chunks. For now we'll stick to just one PCM device as before. v2: Rework to do a pcm device per port instead of per pipe Cc: Takashi Iwai Cc: Pierre-Louis Bossart Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170427160231.13337-11-ville.syrjala@linux.intel.com Reviewed-by: Takashi Iwai --- sound/x86/intel_hdmi_audio.c | 227 +++++++++++++++++++++++++------------------ sound/x86/intel_hdmi_audio.h | 15 ++- 2 files changed, 142 insertions(+), 100 deletions(-) diff --git a/sound/x86/intel_hdmi_audio.c b/sound/x86/intel_hdmi_audio.c index 69e1084..12fae26 100644 --- a/sound/x86/intel_hdmi_audio.c +++ b/sound/x86/intel_hdmi_audio.c @@ -42,6 +42,9 @@ #include #include "intel_hdmi_audio.h" +#define for_each_port(card_ctx, port) \ + for ((port) = 0; (port) < (card_ctx)->num_ports; (port)++) + /*standard module options for ALSA. This module supports only one card*/ static int hdmi_card_index = SNDRV_DEFAULT_IDX1; static char *hdmi_card_id = SNDRV_DEFAULT_STR1; @@ -192,12 +195,12 @@ static void had_substream_put(struct snd_intelhad *intelhaddata) /* Register access functions */ static u32 had_read_register_raw(struct snd_intelhad *ctx, u32 reg) { - return ioread32(ctx->mmio_start + ctx->had_config_offset + reg); + return ioread32(ctx->card_ctx->mmio_start + ctx->had_config_offset + reg); } static void had_write_register_raw(struct snd_intelhad *ctx, u32 reg, u32 val) { - iowrite32(val, ctx->mmio_start + ctx->had_config_offset + reg); + iowrite32(val, ctx->card_ctx->mmio_start + ctx->had_config_offset + reg); } static void had_read_register(struct snd_intelhad *ctx, u32 reg, u32 *val) @@ -1519,22 +1522,27 @@ static const struct snd_kcontrol_new had_controls[] = { */ static irqreturn_t display_pipe_interrupt_handler(int irq, void *dev_id) { - struct snd_intelhad *ctx = dev_id; - u32 audio_stat; + struct snd_intelhad_card *card_ctx = dev_id; + int port; - /* use raw register access to ack IRQs even while disconnected */ - audio_stat = had_read_register_raw(ctx, AUD_HDMI_STATUS); + for_each_port(card_ctx, port) { + struct snd_intelhad *ctx = &card_ctx->pcm_ctx[port]; + u32 audio_stat; - if (audio_stat & HDMI_AUDIO_UNDERRUN) { - had_write_register_raw(ctx, AUD_HDMI_STATUS, - HDMI_AUDIO_UNDERRUN); - had_process_buffer_underrun(ctx); - } + /* use raw register access to ack IRQs even while disconnected */ + audio_stat = had_read_register_raw(ctx, AUD_HDMI_STATUS); + + if (audio_stat & HDMI_AUDIO_UNDERRUN) { + had_write_register_raw(ctx, AUD_HDMI_STATUS, + HDMI_AUDIO_UNDERRUN); + had_process_buffer_underrun(ctx); + } - if (audio_stat & HDMI_AUDIO_BUFFER_DONE) { - had_write_register_raw(ctx, AUD_HDMI_STATUS, - HDMI_AUDIO_BUFFER_DONE); - had_process_buffer_done(ctx); + if (audio_stat & HDMI_AUDIO_BUFFER_DONE) { + had_write_register_raw(ctx, AUD_HDMI_STATUS, + HDMI_AUDIO_BUFFER_DONE); + had_process_buffer_done(ctx); + } } return IRQ_HANDLED; @@ -1545,9 +1553,14 @@ static irqreturn_t display_pipe_interrupt_handler(int irq, void *dev_id) */ static void notify_audio_lpe(struct platform_device *pdev) { - struct snd_intelhad *ctx = platform_get_drvdata(pdev); + struct snd_intelhad_card *card_ctx = platform_get_drvdata(pdev); + int port; + + for_each_port(card_ctx, port) { + struct snd_intelhad *ctx = &card_ctx->pcm_ctx[port]; - schedule_work(&ctx->hdmi_audio_wq); + schedule_work(&ctx->hdmi_audio_wq); + } } /* the work to handle monitor hot plug/unplug */ @@ -1618,7 +1631,8 @@ static int had_create_jack(struct snd_intelhad *ctx, snprintf(hdmi_str, sizeof(hdmi_str), "HDMI/DP,pcm=%d", pcm->device); - err = snd_jack_new(ctx->card, hdmi_str, SND_JACK_AVOUT, &ctx->jack, + err = snd_jack_new(ctx->card_ctx->card, hdmi_str, + SND_JACK_AVOUT, &ctx->jack, true, false); if (err < 0) return err; @@ -1632,13 +1646,18 @@ static int had_create_jack(struct snd_intelhad *ctx, static int hdmi_lpe_audio_runtime_suspend(struct device *dev) { - struct snd_intelhad *ctx = dev_get_drvdata(dev); - struct snd_pcm_substream *substream; + struct snd_intelhad_card *card_ctx = dev_get_drvdata(dev); + int port; - substream = had_substream_get(ctx); - if (substream) { - snd_pcm_suspend(substream); - had_substream_put(ctx); + for_each_port(card_ctx, port) { + struct snd_intelhad *ctx = &card_ctx->pcm_ctx[port]; + struct snd_pcm_substream *substream; + + substream = had_substream_get(ctx); + if (substream) { + snd_pcm_suspend(substream); + had_substream_put(ctx); + } } return 0; @@ -1646,12 +1665,12 @@ static int hdmi_lpe_audio_runtime_suspend(struct device *dev) static int __maybe_unused hdmi_lpe_audio_suspend(struct device *dev) { - struct snd_intelhad *ctx = dev_get_drvdata(dev); + struct snd_intelhad_card *card_ctx = dev_get_drvdata(dev); int err; err = hdmi_lpe_audio_runtime_suspend(dev); if (!err) - snd_power_change_state(ctx->card, SNDRV_CTL_POWER_D3hot); + snd_power_change_state(card_ctx->card, SNDRV_CTL_POWER_D3hot); return err; } @@ -1663,29 +1682,34 @@ static int hdmi_lpe_audio_runtime_resume(struct device *dev) static int __maybe_unused hdmi_lpe_audio_resume(struct device *dev) { - struct snd_intelhad *ctx = dev_get_drvdata(dev); + struct snd_intelhad_card *card_ctx = dev_get_drvdata(dev); hdmi_lpe_audio_runtime_resume(dev); - snd_power_change_state(ctx->card, SNDRV_CTL_POWER_D0); + snd_power_change_state(card_ctx->card, SNDRV_CTL_POWER_D0); return 0; } /* release resources */ static void hdmi_lpe_audio_free(struct snd_card *card) { - struct snd_intelhad *ctx = card->private_data; - struct intel_hdmi_lpe_audio_pdata *pdata = ctx->dev->platform_data; + struct snd_intelhad_card *card_ctx = card->private_data; + struct intel_hdmi_lpe_audio_pdata *pdata = card_ctx->dev->platform_data; + int port; spin_lock_irq(&pdata->lpe_audio_slock); pdata->notify_audio_lpe = NULL; spin_unlock_irq(&pdata->lpe_audio_slock); - cancel_work_sync(&ctx->hdmi_audio_wq); + for_each_port(card_ctx, port) { + struct snd_intelhad *ctx = &card_ctx->pcm_ctx[port]; + + cancel_work_sync(&ctx->hdmi_audio_wq); + } - if (ctx->mmio_start) - iounmap(ctx->mmio_start); - if (ctx->irq >= 0) - free_irq(ctx->irq, ctx); + if (card_ctx->mmio_start) + iounmap(card_ctx->mmio_start); + if (card_ctx->irq >= 0) + free_irq(card_ctx->irq, card_ctx); } /* @@ -1697,12 +1721,12 @@ static void hdmi_lpe_audio_free(struct snd_card *card) static int hdmi_lpe_audio_probe(struct platform_device *pdev) { struct snd_card *card; - struct snd_intelhad *ctx; + struct snd_intelhad_card *card_ctx; struct snd_pcm *pcm; struct intel_hdmi_lpe_audio_pdata *pdata; int irq; struct resource *res_mmio; - int i, ret; + int port, ret; pdata = pdev->dev.platform_data; if (!pdata) { @@ -1725,39 +1749,30 @@ static int hdmi_lpe_audio_probe(struct platform_device *pdev) /* create a card instance with ALSA framework */ ret = snd_card_new(&pdev->dev, hdmi_card_index, hdmi_card_id, - THIS_MODULE, sizeof(*ctx), &card); + THIS_MODULE, sizeof(*card_ctx), &card); if (ret) return ret; - ctx = card->private_data; - spin_lock_init(&ctx->had_spinlock); - mutex_init(&ctx->mutex); - ctx->connected = false; - ctx->dev = &pdev->dev; - ctx->card = card; - ctx->aes_bits = SNDRV_PCM_DEFAULT_CON_SPDIF; + card_ctx = card->private_data; + card_ctx->dev = &pdev->dev; + card_ctx->card = card; strcpy(card->driver, INTEL_HAD); strcpy(card->shortname, "Intel HDMI/DP LPE Audio"); strcpy(card->longname, "Intel HDMI/DP LPE Audio"); - ctx->irq = -1; - ctx->tmds_clock_speed = DIS_SAMPLE_RATE_148_5; - INIT_WORK(&ctx->hdmi_audio_wq, had_audio_wq); + card_ctx->irq = -1; card->private_free = hdmi_lpe_audio_free; - /* assume pipe A as default */ - ctx->had_config_offset = AUDIO_HDMI_CONFIG_A; - - platform_set_drvdata(pdev, ctx); + platform_set_drvdata(pdev, card_ctx); dev_dbg(&pdev->dev, "%s: mmio_start = 0x%x, mmio_end = 0x%x\n", __func__, (unsigned int)res_mmio->start, (unsigned int)res_mmio->end); - ctx->mmio_start = ioremap_nocache(res_mmio->start, - (size_t)(resource_size(res_mmio))); - if (!ctx->mmio_start) { + card_ctx->mmio_start = ioremap_nocache(res_mmio->start, + (size_t)(resource_size(res_mmio))); + if (!card_ctx->mmio_start) { dev_err(&pdev->dev, "Could not get ioremap\n"); ret = -EACCES; goto err; @@ -1765,65 +1780,79 @@ static int hdmi_lpe_audio_probe(struct platform_device *pdev) /* setup interrupt handler */ ret = request_irq(irq, display_pipe_interrupt_handler, 0, - pdev->name, ctx); + pdev->name, card_ctx); if (ret < 0) { dev_err(&pdev->dev, "request_irq failed\n"); goto err; } - ctx->irq = irq; - - ret = snd_pcm_new(card, INTEL_HAD, PCM_INDEX, MAX_PB_STREAMS, - MAX_CAP_STREAMS, &pcm); - if (ret) - goto err; - - /* setup private data which can be retrieved when required */ - pcm->private_data = ctx; - pcm->info_flags = 0; - strncpy(pcm->name, card->shortname, strlen(card->shortname)); - /* setup the ops for playabck */ - snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_PLAYBACK, &had_pcm_ops); + card_ctx->irq = irq; /* only 32bit addressable */ dma_set_mask(&pdev->dev, DMA_BIT_MASK(32)); dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32)); - /* allocate dma pages; - * try to allocate 600k buffer as default which is large enough - */ - snd_pcm_lib_preallocate_pages_for_all(pcm, - SNDRV_DMA_TYPE_DEV, NULL, - HAD_DEFAULT_BUFFER, HAD_MAX_BUFFER); + init_channel_allocations(); - /* create controls */ - for (i = 0; i < ARRAY_SIZE(had_controls); i++) { - struct snd_kcontrol *kctl; + card_ctx->num_ports = 1; - kctl = snd_ctl_new1(&had_controls[i], ctx); - if (!kctl) { - ret = -ENOMEM; + for_each_port(card_ctx, port) { + struct snd_intelhad *ctx = &card_ctx->pcm_ctx[port]; + int i; + + ctx->card_ctx = card_ctx; + ctx->dev = card_ctx->dev; + + INIT_WORK(&ctx->hdmi_audio_wq, had_audio_wq); + + ctx->had_config_offset = AUDIO_HDMI_CONFIG_A; + + ret = snd_pcm_new(card, INTEL_HAD, PCM_INDEX, MAX_PB_STREAMS, + MAX_CAP_STREAMS, &pcm); + if (ret) goto err; + + /* setup private data which can be retrieved when required */ + pcm->private_data = ctx; + pcm->info_flags = 0; + strncpy(pcm->name, card->shortname, strlen(card->shortname)); + /* setup the ops for playabck */ + snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_PLAYBACK, &had_pcm_ops); + + /* allocate dma pages; + * try to allocate 600k buffer as default which is large enough + */ + snd_pcm_lib_preallocate_pages_for_all(pcm, + SNDRV_DMA_TYPE_DEV, NULL, + HAD_DEFAULT_BUFFER, HAD_MAX_BUFFER); + + /* create controls */ + for (i = 0; i < ARRAY_SIZE(had_controls); i++) { + struct snd_kcontrol *kctl; + + kctl = snd_ctl_new1(&had_controls[i], ctx); + if (!kctl) { + ret = -ENOMEM; + goto err; + } + + kctl->id.device = pcm->device; + + ret = snd_ctl_add(card, kctl); + if (ret < 0) + goto err; } - kctl->id.device = pcm->device; + /* Register channel map controls */ + ret = had_register_chmap_ctls(ctx, pcm); + if (ret < 0) + goto err; - ret = snd_ctl_add(card, kctl); + ret = had_create_jack(ctx, pcm); if (ret < 0) goto err; } - init_channel_allocations(); - - /* Register channel map controls */ - ret = had_register_chmap_ctls(ctx, pcm); - if (ret < 0) - goto err; - - ret = had_create_jack(ctx, pcm); - if (ret < 0) - goto err; - ret = snd_card_register(card); if (ret) goto err; @@ -1837,7 +1866,11 @@ static int hdmi_lpe_audio_probe(struct platform_device *pdev) pm_runtime_set_active(&pdev->dev); dev_dbg(&pdev->dev, "%s: handle pending notification\n", __func__); - schedule_work(&ctx->hdmi_audio_wq); + for_each_port(card_ctx, port) { + struct snd_intelhad *ctx = &card_ctx->pcm_ctx[port]; + + schedule_work(&ctx->hdmi_audio_wq); + } return 0; @@ -1853,9 +1886,9 @@ err: */ static int hdmi_lpe_audio_remove(struct platform_device *pdev) { - struct snd_intelhad *ctx = platform_get_drvdata(pdev); + struct snd_intelhad_card *card_ctx = platform_get_drvdata(pdev); - snd_card_free(ctx->card); + snd_card_free(card_ctx->card); return 0; } diff --git a/sound/x86/intel_hdmi_audio.h b/sound/x86/intel_hdmi_audio.h index 2d3e389..2725964 100644 --- a/sound/x86/intel_hdmi_audio.h +++ b/sound/x86/intel_hdmi_audio.h @@ -101,7 +101,7 @@ struct pcm_stream_info { * @chmap: holds channel map info */ struct snd_intelhad { - struct snd_card *card; + struct snd_intelhad_card *card_ctx; bool connected; struct pcm_stream_info stream_info; unsigned char eld[HDMI_MAX_ELD_BYTES]; @@ -123,8 +123,6 @@ struct snd_intelhad { unsigned int period_bytes; /* PCM period size in bytes */ /* internal stuff */ - int irq; - void __iomem *mmio_start; unsigned int had_config_offset; union aud_cfg aud_config; /* AUD_CONFIG reg value cache */ struct work_struct hdmi_audio_wq; @@ -133,4 +131,15 @@ struct snd_intelhad { struct snd_jack *jack; }; +struct snd_intelhad_card { + struct snd_card *card; + struct device *dev; + + /* internal stuff */ + int irq; + void __iomem *mmio_start; + int num_ports; + struct snd_intelhad pcm_ctx[3]; +}; + #endif /* _INTEL_HDMI_AUDIO_ */ -- cgit v1.1 From 8a2d6ae1f737fd22eaeadd0dc32b85c92f239025 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 27 Apr 2017 19:02:30 +0300 Subject: ALSA: x86: Register multiple PCM devices for the LPE audio card MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that everything is in place let's register a PCM device for each port of the display engine. This will make it possible to actually output audio to multiple displays at the same time. And it avoids modesets on unrelated displays from clobbering up the ELD and whatnot for the display currently doing the playback. v2: Add a PCM per port instead of per pipe v3: Fix off by one error with port numbers (Pierre-Louis) Fix .notify_audio_lpe() prototype (Pierre-Louis) Cc: Takashi Iwai Cc: Pierre-Louis Bossart Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170427160231.13337-12-ville.syrjala@linux.intel.com Reviewed-by: Takashi Iwai --- drivers/gpu/drm/i915/intel_lpe_audio.c | 19 ++--- include/drm/intel_lpe_audio.h | 6 +- sound/x86/intel_hdmi_audio.c | 126 +++++++++++++++++++-------------- sound/x86/intel_hdmi_audio.h | 7 +- 4 files changed, 92 insertions(+), 66 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lpe_audio.c b/drivers/gpu/drm/i915/intel_lpe_audio.c index bdbc235..3bf6528 100644 --- a/drivers/gpu/drm/i915/intel_lpe_audio.c +++ b/drivers/gpu/drm/i915/intel_lpe_audio.c @@ -111,7 +111,11 @@ lpe_audio_platdev_create(struct drm_i915_private *dev_priv) pinfo.size_data = sizeof(*pdata); pinfo.dma_mask = DMA_BIT_MASK(32); - pdata->port.pipe = -1; + pdata->num_pipes = INTEL_INFO(dev_priv)->num_pipes; + pdata->num_ports = IS_CHERRYVIEW(dev_priv) ? 3 : 2; /* B,C,D or B,C */ + pdata->port[0].pipe = -1; + pdata->port[1].pipe = -1; + pdata->port[2].pipe = -1; spin_lock_init(&pdata->lpe_audio_slock); platdev = platform_device_register_full(&pinfo); @@ -319,7 +323,7 @@ void intel_lpe_audio_notify(struct drm_i915_private *dev_priv, enum pipe pipe, enum port port, const void *eld, int ls_clock, bool dp_output) { - unsigned long irq_flags; + unsigned long irqflags; struct intel_hdmi_lpe_audio_pdata *pdata; struct intel_hdmi_lpe_audio_port_pdata *ppdata; u32 audio_enable; @@ -328,14 +332,12 @@ void intel_lpe_audio_notify(struct drm_i915_private *dev_priv, return; pdata = dev_get_platdata(&dev_priv->lpe_audio.platdev->dev); - ppdata = &pdata->port; + ppdata = &pdata->port[port - PORT_B]; - spin_lock_irqsave(&pdata->lpe_audio_slock, irq_flags); + spin_lock_irqsave(&pdata->lpe_audio_slock, irqflags); audio_enable = I915_READ(VLV_AUD_PORT_EN_DBG(port)); - ppdata->port = port; - if (eld != NULL) { memcpy(ppdata->eld, eld, HDMI_MAX_ELD_BYTES); ppdata->pipe = pipe; @@ -357,8 +359,7 @@ void intel_lpe_audio_notify(struct drm_i915_private *dev_priv, } if (pdata->notify_audio_lpe) - pdata->notify_audio_lpe(dev_priv->lpe_audio.platdev); + pdata->notify_audio_lpe(dev_priv->lpe_audio.platdev, port - PORT_B); - spin_unlock_irqrestore(&pdata->lpe_audio_slock, - irq_flags); + spin_unlock_irqrestore(&pdata->lpe_audio_slock, irqflags); } diff --git a/include/drm/intel_lpe_audio.h b/include/drm/intel_lpe_audio.h index 211f1cd..b6121c8 100644 --- a/include/drm/intel_lpe_audio.h +++ b/include/drm/intel_lpe_audio.h @@ -40,9 +40,11 @@ struct intel_hdmi_lpe_audio_port_pdata { }; struct intel_hdmi_lpe_audio_pdata { - struct intel_hdmi_lpe_audio_port_pdata port; + struct intel_hdmi_lpe_audio_port_pdata port[3]; /* for ports B,C,D */ + int num_ports; + int num_pipes; - void (*notify_audio_lpe)(struct platform_device *pdev); + void (*notify_audio_lpe)(struct platform_device *pdev, int port); /* port: 0==B,1==C,2==D */ spinlock_t lpe_audio_slock; }; diff --git a/sound/x86/intel_hdmi_audio.c b/sound/x86/intel_hdmi_audio.c index 12fae26..909391d 100644 --- a/sound/x86/intel_hdmi_audio.c +++ b/sound/x86/intel_hdmi_audio.c @@ -42,6 +42,8 @@ #include #include "intel_hdmi_audio.h" +#define for_each_pipe(card_ctx, pipe) \ + for ((pipe) = 0; (pipe) < (card_ctx)->num_pipes; (pipe)++) #define for_each_port(card_ctx, port) \ for ((port) = 0; (port) < (card_ctx)->num_ports; (port)++) @@ -192,15 +194,30 @@ static void had_substream_put(struct snd_intelhad *intelhaddata) spin_unlock_irqrestore(&intelhaddata->had_spinlock, flags); } +static u32 had_config_offset(int pipe) +{ + switch (pipe) { + default: + case 0: + return AUDIO_HDMI_CONFIG_A; + case 1: + return AUDIO_HDMI_CONFIG_B; + case 2: + return AUDIO_HDMI_CONFIG_C; + } +} + /* Register access functions */ -static u32 had_read_register_raw(struct snd_intelhad *ctx, u32 reg) +static u32 had_read_register_raw(struct snd_intelhad_card *card_ctx, + int pipe, u32 reg) { - return ioread32(ctx->card_ctx->mmio_start + ctx->had_config_offset + reg); + return ioread32(card_ctx->mmio_start + had_config_offset(pipe) + reg); } -static void had_write_register_raw(struct snd_intelhad *ctx, u32 reg, u32 val) +static void had_write_register_raw(struct snd_intelhad_card *card_ctx, + int pipe, u32 reg, u32 val) { - iowrite32(val, ctx->card_ctx->mmio_start + ctx->had_config_offset + reg); + iowrite32(val, card_ctx->mmio_start + had_config_offset(pipe) + reg); } static void had_read_register(struct snd_intelhad *ctx, u32 reg, u32 *val) @@ -208,13 +225,13 @@ static void had_read_register(struct snd_intelhad *ctx, u32 reg, u32 *val) if (!ctx->connected) *val = 0; else - *val = had_read_register_raw(ctx, reg); + *val = had_read_register_raw(ctx->card_ctx, ctx->pipe, reg); } static void had_write_register(struct snd_intelhad *ctx, u32 reg, u32 val) { if (ctx->connected) - had_write_register_raw(ctx, reg, val); + had_write_register_raw(ctx->card_ctx, ctx->pipe, reg, val); } /* @@ -1361,6 +1378,9 @@ static void had_process_hot_plug(struct snd_intelhad *intelhaddata) return; } + /* Disable Audio */ + had_enable_audio(intelhaddata, false); + intelhaddata->connected = true; dev_dbg(intelhaddata->dev, "%s @ %d:DEBUG PLUG/UNPLUG : HAD_DRV_CONNECTED\n", @@ -1523,26 +1543,31 @@ static const struct snd_kcontrol_new had_controls[] = { static irqreturn_t display_pipe_interrupt_handler(int irq, void *dev_id) { struct snd_intelhad_card *card_ctx = dev_id; - int port; + u32 audio_stat[3] = {}; + int pipe, port; + + for_each_pipe(card_ctx, pipe) { + /* use raw register access to ack IRQs even while disconnected */ + audio_stat[pipe] = had_read_register_raw(card_ctx, pipe, + AUD_HDMI_STATUS) & + (HDMI_AUDIO_UNDERRUN | HDMI_AUDIO_BUFFER_DONE); + + if (audio_stat[pipe]) + had_write_register_raw(card_ctx, pipe, + AUD_HDMI_STATUS, audio_stat[pipe]); + } for_each_port(card_ctx, port) { struct snd_intelhad *ctx = &card_ctx->pcm_ctx[port]; - u32 audio_stat; + int pipe = ctx->pipe; - /* use raw register access to ack IRQs even while disconnected */ - audio_stat = had_read_register_raw(ctx, AUD_HDMI_STATUS); - - if (audio_stat & HDMI_AUDIO_UNDERRUN) { - had_write_register_raw(ctx, AUD_HDMI_STATUS, - HDMI_AUDIO_UNDERRUN); - had_process_buffer_underrun(ctx); - } + if (pipe < 0) + continue; - if (audio_stat & HDMI_AUDIO_BUFFER_DONE) { - had_write_register_raw(ctx, AUD_HDMI_STATUS, - HDMI_AUDIO_BUFFER_DONE); + if (audio_stat[pipe] & HDMI_AUDIO_BUFFER_DONE) had_process_buffer_done(ctx); - } + if (audio_stat[pipe] & HDMI_AUDIO_UNDERRUN) + had_process_buffer_underrun(ctx); } return IRQ_HANDLED; @@ -1551,16 +1576,12 @@ static irqreturn_t display_pipe_interrupt_handler(int irq, void *dev_id) /* * monitor plug/unplug notification from i915; just kick off the work */ -static void notify_audio_lpe(struct platform_device *pdev) +static void notify_audio_lpe(struct platform_device *pdev, int port) { struct snd_intelhad_card *card_ctx = platform_get_drvdata(pdev); - int port; + struct snd_intelhad *ctx = &card_ctx->pcm_ctx[port]; - for_each_port(card_ctx, port) { - struct snd_intelhad *ctx = &card_ctx->pcm_ctx[port]; - - schedule_work(&ctx->hdmi_audio_wq); - } + schedule_work(&ctx->hdmi_audio_wq); } /* the work to handle monitor hot plug/unplug */ @@ -1569,34 +1590,27 @@ static void had_audio_wq(struct work_struct *work) struct snd_intelhad *ctx = container_of(work, struct snd_intelhad, hdmi_audio_wq); struct intel_hdmi_lpe_audio_pdata *pdata = ctx->dev->platform_data; - struct intel_hdmi_lpe_audio_port_pdata *ppdata = &pdata->port; + struct intel_hdmi_lpe_audio_port_pdata *ppdata = &pdata->port[ctx->port]; pm_runtime_get_sync(ctx->dev); mutex_lock(&ctx->mutex); if (ppdata->pipe < 0) { - dev_dbg(ctx->dev, "%s: Event: HAD_NOTIFY_HOT_UNPLUG\n", - __func__); + dev_dbg(ctx->dev, "%s: Event: HAD_NOTIFY_HOT_UNPLUG : port = %d\n", + __func__, ctx->port); + memset(ctx->eld, 0, sizeof(ctx->eld)); /* clear the old ELD */ + + ctx->dp_output = false; + ctx->tmds_clock_speed = 0; + ctx->link_rate = 0; + + /* Shut down the stream */ had_process_hot_unplug(ctx); + + ctx->pipe = -1; } else { dev_dbg(ctx->dev, "%s: HAD_NOTIFY_ELD : port = %d, tmds = %d\n", - __func__, ppdata->port, ppdata->ls_clock); - - switch (ppdata->pipe) { - case 0: - ctx->had_config_offset = AUDIO_HDMI_CONFIG_A; - break; - case 1: - ctx->had_config_offset = AUDIO_HDMI_CONFIG_B; - break; - case 2: - ctx->had_config_offset = AUDIO_HDMI_CONFIG_C; - break; - default: - dev_dbg(ctx->dev, "Invalid pipe %d\n", - ppdata->pipe); - break; - } + __func__, ctx->port, ppdata->ls_clock); memcpy(ctx->eld, ppdata->eld, sizeof(ctx->eld)); @@ -1609,11 +1623,18 @@ static void had_audio_wq(struct work_struct *work) ctx->link_rate = 0; } + /* + * Shut down the stream before we change + * the pipe assignment for this pcm device + */ had_process_hot_plug(ctx); - /* Process mode change if stream is active */ + ctx->pipe = ppdata->pipe; + + /* Restart the stream if necessary */ had_process_mode_change(ctx); } + mutex_unlock(&ctx->mutex); pm_runtime_mark_last_busy(ctx->dev); pm_runtime_put_autosuspend(ctx->dev); @@ -1794,7 +1815,8 @@ static int hdmi_lpe_audio_probe(struct platform_device *pdev) init_channel_allocations(); - card_ctx->num_ports = 1; + card_ctx->num_pipes = pdata->num_pipes; + card_ctx->num_ports = pdata->num_ports; for_each_port(card_ctx, port) { struct snd_intelhad *ctx = &card_ctx->pcm_ctx[port]; @@ -1802,12 +1824,12 @@ static int hdmi_lpe_audio_probe(struct platform_device *pdev) ctx->card_ctx = card_ctx; ctx->dev = card_ctx->dev; + ctx->port = port; + ctx->pipe = -1; INIT_WORK(&ctx->hdmi_audio_wq, had_audio_wq); - ctx->had_config_offset = AUDIO_HDMI_CONFIG_A; - - ret = snd_pcm_new(card, INTEL_HAD, PCM_INDEX, MAX_PB_STREAMS, + ret = snd_pcm_new(card, INTEL_HAD, port, MAX_PB_STREAMS, MAX_CAP_STREAMS, &pcm); if (ret) goto err; diff --git a/sound/x86/intel_hdmi_audio.h b/sound/x86/intel_hdmi_audio.h index 2725964..0d91bb5 100644 --- a/sound/x86/intel_hdmi_audio.h +++ b/sound/x86/intel_hdmi_audio.h @@ -32,7 +32,6 @@ #include "intel_hdmi_lpe_audio.h" -#define PCM_INDEX 0 #define MAX_PB_STREAMS 1 #define MAX_CAP_STREAMS 0 #define BYTES_PER_WORD 0x4 @@ -112,6 +111,8 @@ struct snd_intelhad { struct snd_pcm_chmap *chmap; int tmds_clock_speed; int link_rate; + int port; /* fixed */ + int pipe; /* can change dynamically */ /* ring buffer (BD) position index */ unsigned int bd_head; @@ -123,7 +124,6 @@ struct snd_intelhad { unsigned int period_bytes; /* PCM period size in bytes */ /* internal stuff */ - unsigned int had_config_offset; union aud_cfg aud_config; /* AUD_CONFIG reg value cache */ struct work_struct hdmi_audio_wq; struct mutex mutex; /* for protecting chmap and eld */ @@ -138,8 +138,9 @@ struct snd_intelhad_card { /* internal stuff */ int irq; void __iomem *mmio_start; + int num_pipes; int num_ports; - struct snd_intelhad pcm_ctx[3]; + struct snd_intelhad pcm_ctx[3]; /* one for each port */ }; #endif /* _INTEL_HDMI_AUDIO_ */ -- cgit v1.1 From c944a308a95ab456c9cee4df4b4fa01763b94621 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 3 May 2017 21:25:17 +0100 Subject: drm/i915: Implement dma_buf_ops->kmap Since kmap allows us to block we can pin the pages and use our normal page lookup routine making the implementation simple, or as some might say quick and dirty. Testcase: igt/drv_selftest/dmabuf Testcase: igt/prime_rw Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170503202517.16797-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_dmabuf.c | 24 ++++++ drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c | 100 +++++++++++++++++++++++ 2 files changed, 124 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c index f225bf6..6176e58 100644 --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c @@ -122,12 +122,36 @@ static void i915_gem_dmabuf_kunmap_atomic(struct dma_buf *dma_buf, unsigned long } static void *i915_gem_dmabuf_kmap(struct dma_buf *dma_buf, unsigned long page_num) { + struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf); + struct page *page; + + if (page_num >= obj->base.size >> PAGE_SHIFT) + return NULL; + + if (!i915_gem_object_has_struct_page(obj)) + return NULL; + + if (i915_gem_object_pin_pages(obj)) + return NULL; + + /* Synchronisation is left to the caller (via .begin_cpu_access()) */ + page = i915_gem_object_get_page(obj, page_num); + if (IS_ERR(page)) + goto err_unpin; + + return kmap(page); + +err_unpin: + i915_gem_object_unpin_pages(obj); return NULL; } static void i915_gem_dmabuf_kunmap(struct dma_buf *dma_buf, unsigned long page_num, void *addr) { + struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf); + kunmap(virt_to_page(addr)); + i915_gem_object_unpin_pages(obj); } static int i915_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *vma) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c index 817bef7..d15cc9d 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c @@ -271,6 +271,105 @@ err_obj: return err; } +static int igt_dmabuf_export_kmap(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + struct dma_buf *dmabuf; + void *ptr; + int err; + + obj = i915_gem_object_create(i915, 2*PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + dmabuf = i915_gem_prime_export(&i915->drm, &obj->base, 0); + i915_gem_object_put(obj); + if (IS_ERR(dmabuf)) { + err = PTR_ERR(dmabuf); + pr_err("i915_gem_prime_export failed with err=%d\n", err); + return err; + } + + ptr = dma_buf_kmap(dmabuf, 0); + if (!ptr) { + pr_err("dma_buf_kmap failed\n"); + err = -ENOMEM; + goto err; + } + + if (memchr_inv(ptr, 0, PAGE_SIZE)) { + dma_buf_kunmap(dmabuf, 0, ptr); + pr_err("Exported page[0] not initialiased to zero!\n"); + err = -EINVAL; + goto err; + } + + memset(ptr, 0xc5, PAGE_SIZE); + dma_buf_kunmap(dmabuf, 0, ptr); + + ptr = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(ptr)) { + err = PTR_ERR(ptr); + pr_err("i915_gem_object_pin_map failed with err=%d\n", err); + goto err; + } + memset(ptr + PAGE_SIZE, 0xaa, PAGE_SIZE); + i915_gem_object_unpin_map(obj); + + ptr = dma_buf_kmap(dmabuf, 1); + if (!ptr) { + pr_err("dma_buf_kmap failed\n"); + err = -ENOMEM; + goto err; + } + + if (memchr_inv(ptr, 0xaa, PAGE_SIZE)) { + dma_buf_kunmap(dmabuf, 1, ptr); + pr_err("Exported page[1] not set to 0xaa!\n"); + err = -EINVAL; + goto err; + } + + memset(ptr, 0xc5, PAGE_SIZE); + dma_buf_kunmap(dmabuf, 1, ptr); + + ptr = dma_buf_kmap(dmabuf, 0); + if (!ptr) { + pr_err("dma_buf_kmap failed\n"); + err = -ENOMEM; + goto err; + } + if (memchr_inv(ptr, 0xc5, PAGE_SIZE)) { + dma_buf_kunmap(dmabuf, 0, ptr); + pr_err("Exported page[0] did not retain 0xc5!\n"); + err = -EINVAL; + goto err; + } + dma_buf_kunmap(dmabuf, 0, ptr); + + ptr = dma_buf_kmap(dmabuf, 2); + if (ptr) { + pr_err("Erroneously kmapped beyond the end of the object!\n"); + dma_buf_kunmap(dmabuf, 2, ptr); + err = -EINVAL; + goto err; + } + + ptr = dma_buf_kmap(dmabuf, -1); + if (ptr) { + pr_err("Erroneously kmapped before the start of the object!\n"); + dma_buf_kunmap(dmabuf, -1, ptr); + err = -EINVAL; + goto err; + } + + err = 0; +err: + dma_buf_put(dmabuf); + return err; +} + int i915_gem_dmabuf_mock_selftests(void) { static const struct i915_subtest tests[] = { @@ -279,6 +378,7 @@ int i915_gem_dmabuf_mock_selftests(void) SUBTEST(igt_dmabuf_import), SUBTEST(igt_dmabuf_import_ownership), SUBTEST(igt_dmabuf_export_vmap), + SUBTEST(igt_dmabuf_export_kmap), }; struct drm_i915_private *i915; int err; -- cgit v1.1 From 266a240bf0abf1e00e72e571f3724ec753a35f19 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 4 May 2017 10:33:08 +0100 Subject: drm/i915: Use engine->context_pin() to report the intel_ring Since unifying ringbuffer/execlist submission to use engine->pin_context, we ensure that the intel_ring is available before we start constructing the request. We can therefore move the assignment of the request->ring to the central i915_gem_request_alloc() and not require it in every engine->request_alloc() callback. Another small step towards simplification (of the core, but at a cost of handling error pointers in less important callers of engine->pin_context). v2: Rearrange a few branches to reduce impact of PTR_ERR() on gcc's code generation. Signed-off-by: Chris Wilson Cc: Oscar Mateo Cc: Joonas Lahtinen Reviewed-by: Oscar Mateo Link: http://patchwork.freedesktop.org/patch/msgid/20170504093308.4137-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gvt/scheduler.c | 6 ++++-- drivers/gpu/drm/i915/i915_gem_request.c | 9 ++++++--- drivers/gpu/drm/i915/i915_perf.c | 13 ++++++------- drivers/gpu/drm/i915/intel_engine_cs.c | 7 ++++--- drivers/gpu/drm/i915/intel_lrc.c | 17 ++++++++--------- drivers/gpu/drm/i915/intel_ringbuffer.c | 25 +++++++++++++------------ drivers/gpu/drm/i915/intel_ringbuffer.h | 4 ++-- drivers/gpu/drm/i915/selftests/mock_engine.c | 8 ++++---- 8 files changed, 47 insertions(+), 42 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 1256fe2..6ae286c 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -180,6 +180,7 @@ static int dispatch_workload(struct intel_vgpu_workload *workload) struct intel_engine_cs *engine = dev_priv->engine[ring_id]; struct drm_i915_gem_request *rq; struct intel_vgpu *vgpu = workload->vgpu; + struct intel_ring *ring; int ret; gvt_dbg_sched("ring id %d prepare to dispatch workload %p\n", @@ -198,8 +199,9 @@ static int dispatch_workload(struct intel_vgpu_workload *workload) * shadow_ctx pages invalid. So gvt need to pin itself. After update * the guest context, gvt can unpin the shadow_ctx safely. */ - ret = engine->context_pin(engine, shadow_ctx); - if (ret) { + ring = engine->context_pin(engine, shadow_ctx); + if (IS_ERR(ring)) { + ret = PTR_ERR(ring); gvt_vgpu_err("fail to pin shadow context\n"); workload->status = ret; mutex_unlock(&dev_priv->drm.struct_mutex); diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 9074303..10361c7 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -551,6 +551,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, { struct drm_i915_private *dev_priv = engine->i915; struct drm_i915_gem_request *req; + struct intel_ring *ring; int ret; lockdep_assert_held(&dev_priv->drm.struct_mutex); @@ -565,9 +566,10 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, * GGTT space, so do this first before we reserve a seqno for * ourselves. */ - ret = engine->context_pin(engine, ctx); - if (ret) - return ERR_PTR(ret); + ring = engine->context_pin(engine, ctx); + if (IS_ERR(ring)) + return ERR_CAST(ring); + GEM_BUG_ON(!ring); ret = reserve_seqno(engine); if (ret) @@ -633,6 +635,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, req->i915 = dev_priv; req->engine = engine; req->ctx = ctx; + req->ring = ring; /* No zalloc, must clear what we need by hand */ req->global_seqno = 0; diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 060b171..cdac685 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -744,6 +744,7 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) { struct drm_i915_private *dev_priv = stream->dev_priv; struct intel_engine_cs *engine = dev_priv->engine[RCS]; + struct intel_ring *ring; int ret; ret = i915_mutex_lock_interruptible(&dev_priv->drm); @@ -755,9 +756,10 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) * * NB: implied RCS engine... */ - ret = engine->context_pin(engine, stream->ctx); - if (ret) - goto unlock; + ring = engine->context_pin(engine, stream->ctx); + mutex_unlock(&dev_priv->drm.struct_mutex); + if (IS_ERR(ring)) + return PTR_ERR(ring); /* Explicitly track the ID (instead of calling i915_ggtt_offset() * on the fly) considering the difference with gen8+ and @@ -766,10 +768,7 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) dev_priv->perf.oa.specific_ctx_id = i915_ggtt_offset(stream->ctx->engine[engine->id].state); -unlock: - mutex_unlock(&dev_priv->drm.struct_mutex); - - return ret; + return 0; } /** diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 6d3d838..483ed76 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -469,6 +469,7 @@ static void intel_engine_cleanup_scratch(struct intel_engine_cs *engine) */ int intel_engine_init_common(struct intel_engine_cs *engine) { + struct intel_ring *ring; int ret; engine->set_default_submission(engine); @@ -480,9 +481,9 @@ int intel_engine_init_common(struct intel_engine_cs *engine) * be available. To avoid this we always pin the default * context. */ - ret = engine->context_pin(engine, engine->i915->kernel_context); - if (ret) - return ret; + ring = engine->context_pin(engine, engine->i915->kernel_context); + if (IS_ERR(ring)) + return PTR_ERR(ring); ret = intel_engine_init_breadcrumbs(engine); if (ret) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 0909549..319d9a8 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -740,8 +740,9 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio) /* XXX Do we need to preempt to make room for us and our deps? */ } -static int execlists_context_pin(struct intel_engine_cs *engine, - struct i915_gem_context *ctx) +static struct intel_ring * +execlists_context_pin(struct intel_engine_cs *engine, + struct i915_gem_context *ctx) { struct intel_context *ce = &ctx->engine[engine->id]; unsigned int flags; @@ -750,8 +751,8 @@ static int execlists_context_pin(struct intel_engine_cs *engine, lockdep_assert_held(&ctx->i915->drm.struct_mutex); - if (ce->pin_count++) - return 0; + if (likely(ce->pin_count++)) + goto out; GEM_BUG_ON(!ce->pin_count); /* no overflow please! */ if (!ce->state) { @@ -788,7 +789,8 @@ static int execlists_context_pin(struct intel_engine_cs *engine, ce->state->obj->mm.dirty = true; i915_gem_context_get(ctx); - return 0; +out: + return ce->ring; unpin_map: i915_gem_object_unpin_map(ce->state->obj); @@ -796,7 +798,7 @@ unpin_vma: __i915_vma_unpin(ce->state); err: ce->pin_count = 0; - return ret; + return ERR_PTR(ret); } static void execlists_context_unpin(struct intel_engine_cs *engine, @@ -833,9 +835,6 @@ static int execlists_request_alloc(struct drm_i915_gem_request *request) */ request->reserved_space += EXECLISTS_REQUEST_SIZE; - GEM_BUG_ON(!ce->ring); - request->ring = ce->ring; - if (i915.enable_guc_submission) { /* * Check that the GuC has space for the request before diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 29b5afa..3ce1c87 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1475,16 +1475,17 @@ alloc_context_vma(struct intel_engine_cs *engine) return vma; } -static int intel_ring_context_pin(struct intel_engine_cs *engine, - struct i915_gem_context *ctx) +static struct intel_ring * +intel_ring_context_pin(struct intel_engine_cs *engine, + struct i915_gem_context *ctx) { struct intel_context *ce = &ctx->engine[engine->id]; int ret; lockdep_assert_held(&ctx->i915->drm.struct_mutex); - if (ce->pin_count++) - return 0; + if (likely(ce->pin_count++)) + goto out; GEM_BUG_ON(!ce->pin_count); /* no overflow please! */ if (!ce->state && engine->context_size) { @@ -1493,7 +1494,7 @@ static int intel_ring_context_pin(struct intel_engine_cs *engine, vma = alloc_context_vma(engine); if (IS_ERR(vma)) { ret = PTR_ERR(vma); - goto error; + goto err; } ce->state = vma; @@ -1502,7 +1503,7 @@ static int intel_ring_context_pin(struct intel_engine_cs *engine, if (ce->state) { ret = context_pin(ctx); if (ret) - goto error; + goto err; ce->state->obj->mm.dirty = true; } @@ -1518,11 +1519,14 @@ static int intel_ring_context_pin(struct intel_engine_cs *engine, ce->initialised = true; i915_gem_context_get(ctx); - return 0; -error: +out: + /* One ringbuffer to rule them all */ + return engine->buffer; + +err: ce->pin_count = 0; - return ret; + return ERR_PTR(ret); } static void intel_ring_context_unpin(struct intel_engine_cs *engine, @@ -1634,9 +1638,6 @@ static int ring_request_alloc(struct drm_i915_gem_request *request) */ request->reserved_space += LEGACY_REQUEST_SIZE; - GEM_BUG_ON(!request->engine->buffer); - request->ring = request->engine->buffer; - cs = intel_ring_begin(request, 0); if (IS_ERR(cs)) return PTR_ERR(cs); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 02d741e..600713b 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -271,8 +271,8 @@ struct intel_engine_cs { void (*set_default_submission)(struct intel_engine_cs *engine); - int (*context_pin)(struct intel_engine_cs *engine, - struct i915_gem_context *ctx); + struct intel_ring *(*context_pin)(struct intel_engine_cs *engine, + struct i915_gem_context *ctx); void (*context_unpin)(struct intel_engine_cs *engine, struct i915_gem_context *ctx); int (*request_alloc)(struct drm_i915_gem_request *req); diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c index b8e53bd..5b18a2d 100644 --- a/drivers/gpu/drm/i915/selftests/mock_engine.c +++ b/drivers/gpu/drm/i915/selftests/mock_engine.c @@ -52,11 +52,12 @@ static void hw_delay_complete(unsigned long data) spin_unlock(&engine->hw_lock); } -static int mock_context_pin(struct intel_engine_cs *engine, - struct i915_gem_context *ctx) +static struct intel_ring * +mock_context_pin(struct intel_engine_cs *engine, + struct i915_gem_context *ctx) { i915_gem_context_get(ctx); - return 0; + return engine->buffer; } static void mock_context_unpin(struct intel_engine_cs *engine, @@ -72,7 +73,6 @@ static int mock_request_alloc(struct drm_i915_gem_request *request) INIT_LIST_HEAD(&mock->link); mock->delay = 0; - request->ring = request->engine->buffer; return 0; } -- cgit v1.1 From 605d5b3297687cce9d3c4298c699188e61486a4c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 4 May 2017 14:08:44 +0100 Subject: drm/i915: Avoid the branch in computing intel_ring_space() Exploit the power-of-two ring size to compute the space across the wraparound using a mask rather than a if. Convert to unsigned integers so the operation is well defined. References: https://bugs.freedesktop.org/show_bug.cgi?id=99671 Signed-off-by: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170504130846.4807-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_ringbuffer.c | 24 ++++++++++++---------- drivers/gpu/drm/i915/intel_ringbuffer.h | 36 ++++++++++++++++++++------------- 2 files changed, 35 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 3ce1c87..46f2696 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -39,12 +39,17 @@ */ #define LEGACY_REQUEST_SIZE 200 -static int __intel_ring_space(int head, int tail, int size) +static unsigned int __intel_ring_space(unsigned int head, + unsigned int tail, + unsigned int size) { - int space = head - tail; - if (space <= 0) - space += size; - return space - I915_RING_FREE_SPACE; + /* + * "If the Ring Buffer Head Pointer and the Tail Pointer are on the + * same cacheline, the Head Pointer must not be greater than the Tail + * Pointer." + */ + GEM_BUG_ON(!is_power_of_2(size)); + return (head - tail - CACHELINE_BYTES) & (size - 1); } void intel_ring_update_space(struct intel_ring *ring) @@ -1670,12 +1675,9 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes) GEM_BUG_ON(!req->reserved_space); list_for_each_entry(target, &ring->request_list, ring_link) { - unsigned space; - /* Would completion of this request free enough space? */ - space = __intel_ring_space(target->postfix, ring->emit, - ring->size); - if (space >= bytes) + if (bytes <= __intel_ring_space(target->postfix, + ring->emit, ring->size)) break; } @@ -1744,11 +1746,11 @@ u32 *intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords) } GEM_BUG_ON(ring->emit > ring->size - bytes); + GEM_BUG_ON(ring->space < bytes); cs = ring->vaddr + ring->emit; GEM_DEBUG_EXEC(memset(cs, POISON_INUSE, bytes)); ring->emit += bytes; ring->space -= bytes; - GEM_BUG_ON(ring->space < 0); return cs; } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 600713b..650ab88 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -17,17 +17,6 @@ #define CACHELINE_BYTES 64 #define CACHELINE_DWORDS (CACHELINE_BYTES / sizeof(uint32_t)) -/* - * Gen2 BSpec "1. Programming Environment" / 1.4.4.6 "Ring Buffer Use" - * Gen3 BSpec "vol1c Memory Interface Functions" / 2.3.4.5 "Ring Buffer Use" - * Gen4+ BSpec "vol1c Memory Interface and Command Stream" / 5.3.4.5 "Ring Buffer Use" - * - * "If the Ring Buffer Head Pointer and the Tail Pointer are on the same - * cacheline, the Head Pointer must not be greater than the Tail - * Pointer." - */ -#define I915_RING_FREE_SPACE 64 - struct intel_hw_status_page { struct i915_vma *vma; u32 *page_addr; @@ -145,9 +134,9 @@ struct intel_ring { u32 tail; u32 emit; - int space; - int size; - int effective_size; + u32 space; + u32 size; + u32 effective_size; }; struct i915_gem_context; @@ -548,6 +537,25 @@ assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail) */ GEM_BUG_ON(!IS_ALIGNED(tail, 8)); GEM_BUG_ON(tail >= ring->size); + + /* + * "Ring Buffer Use" + * Gen2 BSpec "1. Programming Environment" / 1.4.4.6 + * Gen3 BSpec "1c Memory Interface Functions" / 2.3.4.5 + * Gen4+ BSpec "1c Memory Interface and Command Stream" / 5.3.4.5 + * "If the Ring Buffer Head Pointer and the Tail Pointer are on the + * same cacheline, the Head Pointer must not be greater than the Tail + * Pointer." + * + * We use ring->head as the last known location of the actual RING_HEAD, + * it may have advanced but in the worst case it is equally the same + * as ring->head and so we should never program RING_TAIL to advance + * into the same cacheline as ring->head. + */ +#define cacheline(a) round_down(a, CACHELINE_BYTES) + GEM_BUG_ON(cacheline(tail) == cacheline(ring->head) && + tail < ring->head); +#undef cacheline } static inline unsigned int -- cgit v1.1 From 95aebcb2da73079f9ecb7f4e353af71ff1f04c05 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 4 May 2017 14:08:45 +0100 Subject: drm/i915: Report the ring->space from intel_ring_update_space() Some callers immediately want to know the current ring->space after calling intel_ring_update_space(), which we can freely provide via the return parameter. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170504130846.4807-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_ringbuffer.c | 12 ++++++++---- drivers/gpu/drm/i915/intel_ringbuffer.h | 2 +- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 46f2696..b308e73 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -52,9 +52,14 @@ static unsigned int __intel_ring_space(unsigned int head, return (head - tail - CACHELINE_BYTES) & (size - 1); } -void intel_ring_update_space(struct intel_ring *ring) +unsigned int intel_ring_update_space(struct intel_ring *ring) { - ring->space = __intel_ring_space(ring->head, ring->emit, ring->size); + unsigned int space; + + space = __intel_ring_space(ring->head, ring->emit, ring->size); + + ring->space = space; + return space; } static int @@ -1659,8 +1664,7 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes) lockdep_assert_held(&req->i915->drm.struct_mutex); - intel_ring_update_space(ring); - if (ring->space >= bytes) + if (intel_ring_update_space(ring) >= bytes) return 0; /* diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 650ab88..3e343b0 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -486,7 +486,7 @@ int intel_ring_pin(struct intel_ring *ring, struct drm_i915_private *i915, unsigned int offset_bias); void intel_ring_reset(struct intel_ring *ring, u32 tail); -void intel_ring_update_space(struct intel_ring *ring); +unsigned int intel_ring_update_space(struct intel_ring *ring); void intel_ring_unpin(struct intel_ring *ring); void intel_ring_free(struct intel_ring *ring); -- cgit v1.1 From 5e5655c32de83a0151de0c4993d7783c22b6f9b4 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 4 May 2017 14:08:46 +0100 Subject: drm/i915: Micro-optimise hotpath through intel_ring_begin() Typically, there is space available within the ring and if not we have to wait (by definition a slow path). Rearrange the code to reduce the number of branches and stack size for the hotpath, accomodating a slight growth for the wait. v2: Fix the new assert that packets are not larger than the actual ring. v3: Make the parameters unsigned as well to make usage. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170504130846.4807-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_ringbuffer.c | 67 ++++++++++++++++++--------------- drivers/gpu/drm/i915/intel_ringbuffer.h | 3 +- 2 files changed, 38 insertions(+), 32 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index b308e73..acd1da9 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1656,7 +1656,8 @@ static int ring_request_alloc(struct drm_i915_gem_request *request) return 0; } -static int wait_for_space(struct drm_i915_gem_request *req, int bytes) +static noinline int wait_for_space(struct drm_i915_gem_request *req, + unsigned int bytes) { struct intel_ring *ring = req->ring; struct drm_i915_gem_request *target; @@ -1701,52 +1702,56 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes) return 0; } -u32 *intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords) +u32 *intel_ring_begin(struct drm_i915_gem_request *req, + unsigned int num_dwords) { struct intel_ring *ring = req->ring; - int remain_actual = ring->size - ring->emit; - int remain_usable = ring->effective_size - ring->emit; - int bytes = num_dwords * sizeof(u32); - int total_bytes, wait_bytes; - bool need_wrap = false; + const unsigned int remain_usable = ring->effective_size - ring->emit; + const unsigned int bytes = num_dwords * sizeof(u32); + unsigned int need_wrap = 0; + unsigned int total_bytes; u32 *cs; total_bytes = bytes + req->reserved_space; + GEM_BUG_ON(total_bytes > ring->effective_size); - if (unlikely(bytes > remain_usable)) { - /* - * Not enough space for the basic request. So need to flush - * out the remainder and then wait for base + reserved. - */ - wait_bytes = remain_actual + total_bytes; - need_wrap = true; - } else if (unlikely(total_bytes > remain_usable)) { - /* - * The base request will fit but the reserved space - * falls off the end. So we don't need an immediate wrap - * and only need to effectively wait for the reserved - * size space from the start of ringbuffer. - */ - wait_bytes = remain_actual + req->reserved_space; - } else { - /* No wrapping required, just waiting. */ - wait_bytes = total_bytes; + if (unlikely(total_bytes > remain_usable)) { + const int remain_actual = ring->size - ring->emit; + + if (bytes > remain_usable) { + /* + * Not enough space for the basic request. So need to + * flush out the remainder and then wait for + * base + reserved. + */ + total_bytes += remain_actual; + need_wrap = remain_actual | 1; + } else { + /* + * The base request will fit but the reserved space + * falls off the end. So we don't need an immediate + * wrap and only need to effectively wait for the + * reserved size from the start of ringbuffer. + */ + total_bytes = req->reserved_space + remain_actual; + } } - if (wait_bytes > ring->space) { - int ret = wait_for_space(req, wait_bytes); + if (unlikely(total_bytes > ring->space)) { + int ret = wait_for_space(req, total_bytes); if (unlikely(ret)) return ERR_PTR(ret); } if (unlikely(need_wrap)) { - GEM_BUG_ON(remain_actual > ring->space); - GEM_BUG_ON(ring->emit + remain_actual > ring->size); + need_wrap &= ~1; + GEM_BUG_ON(need_wrap > ring->space); + GEM_BUG_ON(ring->emit + need_wrap > ring->size); /* Fill the tail with MI_NOOP */ - memset(ring->vaddr + ring->emit, 0, remain_actual); + memset(ring->vaddr + ring->emit, 0, need_wrap); ring->emit = 0; - ring->space -= remain_actual; + ring->space -= need_wrap; } GEM_BUG_ON(ring->emit > ring->size - bytes); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 3e343b0..ec16fb6 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -497,7 +497,8 @@ void intel_legacy_submission_resume(struct drm_i915_private *dev_priv); int __must_check intel_ring_cacheline_align(struct drm_i915_gem_request *req); -u32 __must_check *intel_ring_begin(struct drm_i915_gem_request *req, int n); +u32 __must_check *intel_ring_begin(struct drm_i915_gem_request *req, + unsigned int n); static inline void intel_ring_advance(struct drm_i915_gem_request *req, u32 *cs) -- cgit v1.1 From 6f38123ecaac446312a63523b68df84ceb5a06ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 4 May 2017 21:15:30 +0300 Subject: drm/i915: Fix rawclk readout for g4x MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Turns out our skills in decoding the CLKCFG register weren't good enough. On this particular elk the answer we got was 400 MHz when in reality the clock was running at 266 MHz, which then caused us to program a bogus AUX clock divider that caused all AUX communication to fail. Sadly the docs are now in bit heaven, so the fix will have to be based on empirical evidence. Using another elk machine I was able to frob the FSB frequency from the BIOS and see how it affects the CLKCFG register. The machine seesm to use a frequency of 266 MHz by default, and fortunately it still boot even with the 50% CPU overclock that we get when we bump the FSB up to 400 MHz. It turns out the actual FSB frequency and the register have no real link whatsoever. The register value is based on some straps or something, but fortunately those too can be configured from the BIOS on this board, although it doesn't seem to respect the settings 100%. In the end I was able to derive the following relationship: BIOS FSB / strap | CLKCFG ------------------------- 200 | 0x2 266 | 0x0 333 | 0x4 400 | 0x4 So only the 200 and 400 MHz cases actually match how we're currently decoding that register. But as the comment next to some of the defines says, we have been just guessing anyway. So let's fix things up so that at least the 266 MHz case will work correctly as that is actually the setting used by both the buggy machine and my test machine. The fact that 333 and 400 MHz BIOS settings result in the same register value is a little disappointing, as that means we can't tell them apart. However, according to the gmch datasheet for both elk and ctg 400 Mhz is not even a supported FSB frequency, so I'm going to make the assumption that we should decode it as 333 MHz instead. Cc: stable@vger.kernel.org Cc: Tomi Sarvela Reported-by: Tomi Sarvela Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100926 Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170504181530.6908-1-ville.syrjala@linux.intel.com Acked-by: Jani Nikula Tested-by: Tomi Sarvela --- drivers/gpu/drm/i915/i915_reg.h | 10 +++++++--- drivers/gpu/drm/i915/intel_cdclk.c | 6 ++---- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index ee8170c..524fdfd 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -3059,10 +3059,14 @@ enum skl_disp_power_wells { #define CLKCFG_FSB_667 (3 << 0) /* hrawclk 166 */ #define CLKCFG_FSB_800 (2 << 0) /* hrawclk 200 */ #define CLKCFG_FSB_1067 (6 << 0) /* hrawclk 266 */ +#define CLKCFG_FSB_1067_ALT (0 << 0) /* hrawclk 266 */ #define CLKCFG_FSB_1333 (7 << 0) /* hrawclk 333 */ -/* Note, below two are guess */ -#define CLKCFG_FSB_1600 (4 << 0) /* hrawclk 400 */ -#define CLKCFG_FSB_1600_ALT (0 << 0) /* hrawclk 400 */ +/* + * Note that on at least on ELK the below value is reported for both + * 333 and 400 MHz BIOS FSB setting, but given that the gmch datasheet + * lists only 200/266/333 MHz FSB as supported let's decode it as 333 MHz. + */ +#define CLKCFG_FSB_1333_ALT (4 << 0) /* hrawclk 333 */ #define CLKCFG_FSB_MASK (7 << 0) #define CLKCFG_MEM_533 (1 << 4) #define CLKCFG_MEM_667 (2 << 4) diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c index 763010f..2979297 100644 --- a/drivers/gpu/drm/i915/intel_cdclk.c +++ b/drivers/gpu/drm/i915/intel_cdclk.c @@ -1808,13 +1808,11 @@ static int g4x_hrawclk(struct drm_i915_private *dev_priv) case CLKCFG_FSB_800: return 200000; case CLKCFG_FSB_1067: + case CLKCFG_FSB_1067_ALT: return 266667; case CLKCFG_FSB_1333: + case CLKCFG_FSB_1333_ALT: return 333333; - /* these two are just a guess; one of them might be right */ - case CLKCFG_FSB_1600: - case CLKCFG_FSB_1600_ALT: - return 400000; default: return 133333; } -- cgit v1.1 From 261aeba834f384cd9cf85be72078291d30fca4e4 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Sat, 6 May 2017 23:40:17 +0800 Subject: drm/i915: use memdup_user_nul Use memdup_user_nul() helper instead of open-coding to simplify the code. Signed-off-by: Geliang Tang Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/6baf3aa45d0b5e0fd016b508bac905ebf8443aac.1493779294.git.geliangtang@gmail.com --- drivers/gpu/drm/i915/i915_debugfs.c | 13 +++---------- drivers/gpu/drm/i915/intel_pipe_crc.c | 13 +++---------- 2 files changed, 6 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 870c470..1003511 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -3702,16 +3702,10 @@ static ssize_t i915_displayport_test_active_write(struct file *file, if (len == 0) return 0; - input_buffer = kmalloc(len + 1, GFP_KERNEL); - if (!input_buffer) - return -ENOMEM; + input_buffer = memdup_user_nul(ubuf, len); + if (IS_ERR(input_buffer)) + return PTR_ERR(input_buffer); - if (copy_from_user(input_buffer, ubuf, len)) { - status = -EFAULT; - goto out; - } - - input_buffer[len] = '\0'; DRM_DEBUG_DRIVER("Copied %d bytes from user\n", (unsigned int)len); drm_connector_list_iter_begin(dev, &conn_iter); @@ -3737,7 +3731,6 @@ static ssize_t i915_displayport_test_active_write(struct file *file, } } drm_connector_list_iter_end(&conn_iter); -out: kfree(input_buffer); if (status < 0) return status; diff --git a/drivers/gpu/drm/i915/intel_pipe_crc.c b/drivers/gpu/drm/i915/intel_pipe_crc.c index 647426c..8fbd2bd 100644 --- a/drivers/gpu/drm/i915/intel_pipe_crc.c +++ b/drivers/gpu/drm/i915/intel_pipe_crc.c @@ -853,19 +853,12 @@ static ssize_t display_crc_ctl_write(struct file *file, const char __user *ubuf, return -E2BIG; } - tmpbuf = kmalloc(len + 1, GFP_KERNEL); - if (!tmpbuf) - return -ENOMEM; - - if (copy_from_user(tmpbuf, ubuf, len)) { - ret = -EFAULT; - goto out; - } - tmpbuf[len] = '\0'; + tmpbuf = memdup_user_nul(ubuf, len); + if (IS_ERR(tmpbuf)) + return PTR_ERR(tmpbuf); ret = display_crc_ctl_parse(dev_priv, tmpbuf, len); -out: kfree(tmpbuf); if (ret < 0) return ret; -- cgit v1.1 From 16586fcd86c4a498bbfc2643490a5a38648e1d5e Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 9 May 2017 09:20:21 +0000 Subject: drm/i915: Move uncore definitions into a separate header In order to allow use of e.g. forcewake_domains in a other feature headers included from the top of i915_drv.h, move all uncore related definitions into their own header. v2: move __mask_next_bit macro to utils header (Mika) Signed-off-by: Michal Wajdeczko Suggested-by: Joonas Lahtinen Cc: Joonas Lahtinen Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Signed-off-by: Mika Kuoppala --- drivers/gpu/drm/i915/i915_drv.h | 157 +-------------------------------- drivers/gpu/drm/i915/i915_utils.h | 6 ++ drivers/gpu/drm/i915/intel_uncore.c | 12 +++ drivers/gpu/drm/i915/intel_uncore.h | 169 ++++++++++++++++++++++++++++++++++++ 4 files changed, 188 insertions(+), 156 deletions(-) create mode 100644 drivers/gpu/drm/i915/intel_uncore.h diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index b20ed16..29a6966 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -55,6 +55,7 @@ #include "i915_reg.h" #include "i915_utils.h" +#include "intel_uncore.h" #include "intel_bios.h" #include "intel_dpll_mgr.h" #include "intel_uc.h" @@ -676,116 +677,6 @@ struct drm_i915_display_funcs { void (*load_luts)(struct drm_crtc_state *crtc_state); }; -enum forcewake_domain_id { - FW_DOMAIN_ID_RENDER = 0, - FW_DOMAIN_ID_BLITTER, - FW_DOMAIN_ID_MEDIA, - - FW_DOMAIN_ID_COUNT -}; - -enum forcewake_domains { - FORCEWAKE_RENDER = BIT(FW_DOMAIN_ID_RENDER), - FORCEWAKE_BLITTER = BIT(FW_DOMAIN_ID_BLITTER), - FORCEWAKE_MEDIA = BIT(FW_DOMAIN_ID_MEDIA), - FORCEWAKE_ALL = (FORCEWAKE_RENDER | - FORCEWAKE_BLITTER | - FORCEWAKE_MEDIA) -}; - -#define FW_REG_READ (1) -#define FW_REG_WRITE (2) - -enum decoupled_power_domain { - GEN9_DECOUPLED_PD_BLITTER = 0, - GEN9_DECOUPLED_PD_RENDER, - GEN9_DECOUPLED_PD_MEDIA, - GEN9_DECOUPLED_PD_ALL -}; - -enum decoupled_ops { - GEN9_DECOUPLED_OP_WRITE = 0, - GEN9_DECOUPLED_OP_READ -}; - -enum forcewake_domains -intel_uncore_forcewake_for_reg(struct drm_i915_private *dev_priv, - i915_reg_t reg, unsigned int op); - -struct intel_uncore_funcs { - void (*force_wake_get)(struct drm_i915_private *dev_priv, - enum forcewake_domains domains); - void (*force_wake_put)(struct drm_i915_private *dev_priv, - enum forcewake_domains domains); - - uint8_t (*mmio_readb)(struct drm_i915_private *dev_priv, - i915_reg_t r, bool trace); - uint16_t (*mmio_readw)(struct drm_i915_private *dev_priv, - i915_reg_t r, bool trace); - uint32_t (*mmio_readl)(struct drm_i915_private *dev_priv, - i915_reg_t r, bool trace); - uint64_t (*mmio_readq)(struct drm_i915_private *dev_priv, - i915_reg_t r, bool trace); - - void (*mmio_writeb)(struct drm_i915_private *dev_priv, - i915_reg_t r, uint8_t val, bool trace); - void (*mmio_writew)(struct drm_i915_private *dev_priv, - i915_reg_t r, uint16_t val, bool trace); - void (*mmio_writel)(struct drm_i915_private *dev_priv, - i915_reg_t r, uint32_t val, bool trace); -}; - -struct intel_forcewake_range { - u32 start; - u32 end; - - enum forcewake_domains domains; -}; - -struct intel_uncore { - spinlock_t lock; /** lock is also taken in irq contexts. */ - - const struct intel_forcewake_range *fw_domains_table; - unsigned int fw_domains_table_entries; - - struct notifier_block pmic_bus_access_nb; - struct intel_uncore_funcs funcs; - - unsigned fifo_count; - - enum forcewake_domains fw_domains; - enum forcewake_domains fw_domains_active; - - u32 fw_set; - u32 fw_clear; - u32 fw_reset; - - struct intel_uncore_forcewake_domain { - enum forcewake_domain_id id; - enum forcewake_domains mask; - unsigned wake_count; - struct hrtimer timer; - i915_reg_t reg_set; - i915_reg_t reg_ack; - } fw_domain[FW_DOMAIN_ID_COUNT]; - - int unclaimed_mmio_check; -}; - -#define __mask_next_bit(mask) ({ \ - int __idx = ffs(mask) - 1; \ - mask &= ~BIT(__idx); \ - __idx; \ -}) - -/* Iterate over initialised fw domains */ -#define for_each_fw_domain_masked(domain__, mask__, dev_priv__, tmp__) \ - for (tmp__ = (mask__); \ - tmp__ ? (domain__ = &(dev_priv__)->uncore.fw_domain[__mask_next_bit(tmp__)]), 1 : 0;) - -#define for_each_fw_domain(domain__, dev_priv__, tmp__) \ - for_each_fw_domain_masked(domain__, (dev_priv__)->uncore.fw_domains, dev_priv__, tmp__) - #define CSR_VERSION(major, minor) ((major) << 16 | (minor)) #define CSR_VERSION_MAJOR(version) ((version) >> 16) #define CSR_VERSION_MINOR(version) ((version) & 0xffff) @@ -3063,52 +2954,6 @@ extern void intel_irq_fini(struct drm_i915_private *dev_priv); int intel_irq_install(struct drm_i915_private *dev_priv); void intel_irq_uninstall(struct drm_i915_private *dev_priv); -extern void intel_uncore_sanitize(struct drm_i915_private *dev_priv); -extern void intel_uncore_init(struct drm_i915_private *dev_priv); -extern bool intel_uncore_unclaimed_mmio(struct drm_i915_private *dev_priv); -extern bool intel_uncore_arm_unclaimed_mmio_detection(struct drm_i915_private *dev_priv); -extern void intel_uncore_fini(struct drm_i915_private *dev_priv); -extern void intel_uncore_suspend(struct drm_i915_private *dev_priv); -extern void intel_uncore_resume_early(struct drm_i915_private *dev_priv); -const char *intel_uncore_forcewake_domain_to_str(const enum forcewake_domain_id id); -void intel_uncore_forcewake_get(struct drm_i915_private *dev_priv, - enum forcewake_domains domains); -void intel_uncore_forcewake_put(struct drm_i915_private *dev_priv, - enum forcewake_domains domains); -/* Like above but the caller must manage the uncore.lock itself. - * Must be used with I915_READ_FW and friends. - */ -void intel_uncore_forcewake_get__locked(struct drm_i915_private *dev_priv, - enum forcewake_domains domains); -void intel_uncore_forcewake_put__locked(struct drm_i915_private *dev_priv, - enum forcewake_domains domains); -u64 intel_uncore_edram_size(struct drm_i915_private *dev_priv); - -void assert_forcewakes_inactive(struct drm_i915_private *dev_priv); - -int intel_wait_for_register(struct drm_i915_private *dev_priv, - i915_reg_t reg, - u32 mask, - u32 value, - unsigned int timeout_ms); -int __intel_wait_for_register_fw(struct drm_i915_private *dev_priv, - i915_reg_t reg, - u32 mask, - u32 value, - unsigned int fast_timeout_us, - unsigned int slow_timeout_ms, - u32 *out_value); -static inline -int intel_wait_for_register_fw(struct drm_i915_private *dev_priv, - i915_reg_t reg, - u32 mask, - u32 value, - unsigned int timeout_ms) -{ - return __intel_wait_for_register_fw(dev_priv, reg, mask, value, - 2, timeout_ms, NULL); -} - static inline bool intel_gvt_active(struct drm_i915_private *dev_priv) { return dev_priv->gvt; diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h index c5455d3..f9d6607 100644 --- a/drivers/gpu/drm/i915/i915_utils.h +++ b/drivers/gpu/drm/i915/i915_utils.h @@ -92,4 +92,10 @@ __T; \ }) +#define __mask_next_bit(mask) ({ \ + int __idx = ffs(mask) - 1; \ + mask &= ~BIT(__idx); \ + __idx; \ +}) + #endif /* !__I915_UTILS_H */ diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index aa9d306..2c628df 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -801,6 +801,18 @@ unclaimed_reg_debug(struct drm_i915_private *dev_priv, __unclaimed_reg_debug(dev_priv, reg, read, before); } +enum decoupled_power_domain { + GEN9_DECOUPLED_PD_BLITTER = 0, + GEN9_DECOUPLED_PD_RENDER, + GEN9_DECOUPLED_PD_MEDIA, + GEN9_DECOUPLED_PD_ALL +}; + +enum decoupled_ops { + GEN9_DECOUPLED_OP_WRITE = 0, + GEN9_DECOUPLED_OP_READ +}; + static const enum decoupled_power_domain fw2dpd_domain[] = { GEN9_DECOUPLED_PD_RENDER, GEN9_DECOUPLED_PD_BLITTER, diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h new file mode 100644 index 0000000..ff6fe2b --- /dev/null +++ b/drivers/gpu/drm/i915/intel_uncore.h @@ -0,0 +1,169 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __INTEL_UNCORE_H__ +#define __INTEL_UNCORE_H__ + +struct drm_i915_private; + +enum forcewake_domain_id { + FW_DOMAIN_ID_RENDER = 0, + FW_DOMAIN_ID_BLITTER, + FW_DOMAIN_ID_MEDIA, + + FW_DOMAIN_ID_COUNT +}; + +enum forcewake_domains { + FORCEWAKE_RENDER = BIT(FW_DOMAIN_ID_RENDER), + FORCEWAKE_BLITTER = BIT(FW_DOMAIN_ID_BLITTER), + FORCEWAKE_MEDIA = BIT(FW_DOMAIN_ID_MEDIA), + FORCEWAKE_ALL = (FORCEWAKE_RENDER | + FORCEWAKE_BLITTER | + FORCEWAKE_MEDIA) +}; + +struct intel_uncore_funcs { + void (*force_wake_get)(struct drm_i915_private *dev_priv, + enum forcewake_domains domains); + void (*force_wake_put)(struct drm_i915_private *dev_priv, + enum forcewake_domains domains); + + uint8_t (*mmio_readb)(struct drm_i915_private *dev_priv, + i915_reg_t r, bool trace); + uint16_t (*mmio_readw)(struct drm_i915_private *dev_priv, + i915_reg_t r, bool trace); + uint32_t (*mmio_readl)(struct drm_i915_private *dev_priv, + i915_reg_t r, bool trace); + uint64_t (*mmio_readq)(struct drm_i915_private *dev_priv, + i915_reg_t r, bool trace); + + void (*mmio_writeb)(struct drm_i915_private *dev_priv, + i915_reg_t r, uint8_t val, bool trace); + void (*mmio_writew)(struct drm_i915_private *dev_priv, + i915_reg_t r, uint16_t val, bool trace); + void (*mmio_writel)(struct drm_i915_private *dev_priv, + i915_reg_t r, uint32_t val, bool trace); +}; + +struct intel_forcewake_range { + u32 start; + u32 end; + + enum forcewake_domains domains; +}; + +struct intel_uncore { + spinlock_t lock; /** lock is also taken in irq contexts. */ + + const struct intel_forcewake_range *fw_domains_table; + unsigned int fw_domains_table_entries; + + struct notifier_block pmic_bus_access_nb; + struct intel_uncore_funcs funcs; + + unsigned int fifo_count; + + enum forcewake_domains fw_domains; + enum forcewake_domains fw_domains_active; + + u32 fw_set; + u32 fw_clear; + u32 fw_reset; + + struct intel_uncore_forcewake_domain { + enum forcewake_domain_id id; + enum forcewake_domains mask; + unsigned int wake_count; + struct hrtimer timer; + i915_reg_t reg_set; + i915_reg_t reg_ack; + } fw_domain[FW_DOMAIN_ID_COUNT]; + + int unclaimed_mmio_check; +}; + +/* Iterate over initialised fw domains */ +#define for_each_fw_domain_masked(domain__, mask__, dev_priv__, tmp__) \ + for (tmp__ = (mask__); \ + tmp__ ? (domain__ = &(dev_priv__)->uncore.fw_domain[__mask_next_bit(tmp__)]), 1 : 0;) + +#define for_each_fw_domain(domain__, dev_priv__, tmp__) \ + for_each_fw_domain_masked(domain__, (dev_priv__)->uncore.fw_domains, dev_priv__, tmp__) + + +void intel_uncore_sanitize(struct drm_i915_private *dev_priv); +void intel_uncore_init(struct drm_i915_private *dev_priv); +bool intel_uncore_unclaimed_mmio(struct drm_i915_private *dev_priv); +bool intel_uncore_arm_unclaimed_mmio_detection(struct drm_i915_private *dev_priv); +void intel_uncore_fini(struct drm_i915_private *dev_priv); +void intel_uncore_suspend(struct drm_i915_private *dev_priv); +void intel_uncore_resume_early(struct drm_i915_private *dev_priv); + +u64 intel_uncore_edram_size(struct drm_i915_private *dev_priv); +void assert_forcewakes_inactive(struct drm_i915_private *dev_priv); +const char *intel_uncore_forcewake_domain_to_str(const enum forcewake_domain_id id); + +enum forcewake_domains +intel_uncore_forcewake_for_reg(struct drm_i915_private *dev_priv, + i915_reg_t reg, unsigned int op); +#define FW_REG_READ (1) +#define FW_REG_WRITE (2) + +void intel_uncore_forcewake_get(struct drm_i915_private *dev_priv, + enum forcewake_domains domains); +void intel_uncore_forcewake_put(struct drm_i915_private *dev_priv, + enum forcewake_domains domains); +/* Like above but the caller must manage the uncore.lock itself. + * Must be used with I915_READ_FW and friends. + */ +void intel_uncore_forcewake_get__locked(struct drm_i915_private *dev_priv, + enum forcewake_domains domains); +void intel_uncore_forcewake_put__locked(struct drm_i915_private *dev_priv, + enum forcewake_domains domains); + +int intel_wait_for_register(struct drm_i915_private *dev_priv, + i915_reg_t reg, + u32 mask, + u32 value, + unsigned int timeout_ms); +int __intel_wait_for_register_fw(struct drm_i915_private *dev_priv, + i915_reg_t reg, + u32 mask, + u32 value, + unsigned int fast_timeout_us, + unsigned int slow_timeout_ms, + u32 *out_value); +static inline +int intel_wait_for_register_fw(struct drm_i915_private *dev_priv, + i915_reg_t reg, + u32 mask, + u32 value, + unsigned int timeout_ms) +{ + return __intel_wait_for_register_fw(dev_priv, reg, mask, value, + 2, timeout_ms, NULL); +} + +#endif /* !__INTEL_UNCORE_H__ */ -- cgit v1.1 From 48de568c644c5b5a9307c92b13c53811c5a93999 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Tue, 9 May 2017 13:05:22 +0300 Subject: drm/i915: Show dmc debug registers on Kabylake The assumption is that the registers offsets are identical as with skl. Also all the published kbl firmwares support the debug registers. So let kbl show the debug counts. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100975 Cc: Imre Deak Signed-off-by: Mika Kuoppala Reviewed-by: Imre Deak Link: http://patchwork.freedesktop.org/patch/msgid/1494324322-28193-1-git-send-email-mika.kuoppala@intel.com --- drivers/gpu/drm/i915/i915_debugfs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 1003511..34785fb 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2853,7 +2853,8 @@ static int i915_dmc_info(struct seq_file *m, void *unused) seq_printf(m, "version: %d.%d\n", CSR_VERSION_MAJOR(csr->version), CSR_VERSION_MINOR(csr->version)); - if (IS_SKYLAKE(dev_priv) && csr->version >= CSR_VERSION(1, 6)) { + if (IS_KABYLAKE(dev_priv) || + (IS_SKYLAKE(dev_priv) && csr->version >= CSR_VERSION(1, 6))) { seq_printf(m, "DC3 -> DC5 count: %d\n", I915_READ(SKL_CSR_DC3_DC5_COUNT)); seq_printf(m, "DC5 -> DC6 count: %d\n", -- cgit v1.1 From ed58570f55e2c57553161413881c7947f0952b5c Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Wed, 10 May 2017 12:21:47 +0300 Subject: drm/i915/vlv: Fix port B PLL opamp initialization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current code looks like a typo, the specification calls for setting bits 31:24 to 0x8C, while preserving bits 23:0. Fix things accordingly. I'm not aware of the typo causing a real problem, so the fix is only for consistency. Cc: Ville Syrjälä Signed-off-by: Imre Deak Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1494408113-379-1-git-send-email-imre.deak@intel.com --- drivers/gpu/drm/i915/intel_display.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 85b9e2f5..19a7a1e 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -6369,8 +6369,8 @@ static void vlv_pllb_recal_opamp(struct drm_i915_private *dev_priv, enum pipe vlv_dpio_write(dev_priv, pipe, VLV_PLL_DW9(1), reg_val); reg_val = vlv_dpio_read(dev_priv, pipe, VLV_REF_DW13); - reg_val &= 0x8cffffff; - reg_val = 0x8c000000; + reg_val &= 0x00ffffff; + reg_val |= 0x8c000000; vlv_dpio_write(dev_priv, pipe, VLV_REF_DW13, reg_val); reg_val = vlv_dpio_read(dev_priv, pipe, VLV_PLL_DW9(1)); -- cgit v1.1 From 9bacd4b1f8553428c5723e4c8f2ca491b400e429 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Wed, 10 May 2017 12:21:48 +0300 Subject: drm/i915/dp: Check error return during DPCD capability queries MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The assumptions of these users of drm_dp_dpcd_readb() is that the passed in output buffer won't change in case of error, but this isn't guaranteed. Fix this by treating any error as the lack of the given capability. In case of DP_SINK_DEVICE_AUX_FRAME_SYNC_CAP an error would leave the buffer uninitialized even with the above assumption. Signed-off-by: Imre Deak Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1494408113-379-2-git-send-email-imre.deak@intel.com --- drivers/gpu/drm/i915/intel_dp.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 08834f7..4a6feb6 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -3088,7 +3088,8 @@ static bool intel_dp_get_y_cord_status(struct intel_dp *intel_dp) { uint8_t psr_caps = 0; - drm_dp_dpcd_readb(&intel_dp->aux, DP_PSR_CAPS, &psr_caps); + if (drm_dp_dpcd_readb(&intel_dp->aux, DP_PSR_CAPS, &psr_caps) != 1) + return false; return psr_caps & DP_PSR2_SU_Y_COORDINATE_REQUIRED; } @@ -3096,9 +3097,9 @@ static bool intel_dp_get_colorimetry_status(struct intel_dp *intel_dp) { uint8_t dprx = 0; - drm_dp_dpcd_readb(&intel_dp->aux, - DP_DPRX_FEATURE_ENUMERATION_LIST, - &dprx); + if (drm_dp_dpcd_readb(&intel_dp->aux, DP_DPRX_FEATURE_ENUMERATION_LIST, + &dprx) != 1) + return false; return dprx & DP_VSC_SDP_EXT_FOR_COLORIMETRY_SUPPORTED; } @@ -3106,7 +3107,9 @@ static bool intel_dp_get_alpm_status(struct intel_dp *intel_dp) { uint8_t alpm_caps = 0; - drm_dp_dpcd_readb(&intel_dp->aux, DP_RECEIVER_ALPM_CAP, &alpm_caps); + if (drm_dp_dpcd_readb(&intel_dp->aux, DP_RECEIVER_ALPM_CAP, + &alpm_caps) != 1) + return false; return alpm_caps & DP_ALPM_CAP; } @@ -3679,9 +3682,10 @@ intel_edp_init_dpcd(struct intel_dp *intel_dp) uint8_t frame_sync_cap; dev_priv->psr.sink_support = true; - drm_dp_dpcd_readb(&intel_dp->aux, - DP_SINK_DEVICE_AUX_FRAME_SYNC_CAP, - &frame_sync_cap); + if (drm_dp_dpcd_readb(&intel_dp->aux, + DP_SINK_DEVICE_AUX_FRAME_SYNC_CAP, + &frame_sync_cap) != 1) + frame_sync_cap = 0; dev_priv->psr.aux_frame_sync = frame_sync_cap ? true : false; /* PSR2 needs frame sync as well */ dev_priv->psr.psr2_support = dev_priv->psr.aux_frame_sync; -- cgit v1.1 From 99016646608148be3cf3df157a60f83447dc45aa Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Wed, 10 May 2017 12:21:49 +0300 Subject: drm/i915/sdvo: Check error return from intel_sdvo_get_value() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current code assumes that 'enhancements' won't change in case of an error, but this isn't guaranteed. Fix things by treating any error as a lack of the given capability. v2: - Remove the now redundant init of enhancements. (Ville) Cc: Ville Syrjälä Signed-off-by: Imre Deak Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1494408113-379-3-git-send-email-imre.deak@intel.com --- drivers/gpu/drm/i915/intel_sdvo.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c index 816a6f5..496b24c 100644 --- a/drivers/gpu/drm/i915/intel_sdvo.c +++ b/drivers/gpu/drm/i915/intel_sdvo.c @@ -2892,11 +2892,10 @@ static bool intel_sdvo_create_enhance_property(struct intel_sdvo *intel_sdvo, BUILD_BUG_ON(sizeof(enhancements) != 2); - enhancements.response = 0; - intel_sdvo_get_value(intel_sdvo, - SDVO_CMD_GET_SUPPORTED_ENHANCEMENTS, - &enhancements, sizeof(enhancements)); - if (enhancements.response == 0) { + if (!intel_sdvo_get_value(intel_sdvo, + SDVO_CMD_GET_SUPPORTED_ENHANCEMENTS, + &enhancements, sizeof(enhancements)) || + enhancements.response == 0) { DRM_DEBUG_KMS("No enhancement is supported\n"); return true; } -- cgit v1.1 From 4519290aec0e93e2832f77db3437c332bba6506c Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Wed, 10 May 2017 12:21:50 +0300 Subject: drm/i915: Check error return when setting DMA mask Even though an error from these functions isn't fatal we still want to have a diagnostic message about it. v2: - Don't do assignments in if statements. (Jani) Cc: Jani Nikula Signed-off-by: Imre Deak Reviewed-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1494408113-379-4-git-send-email-imre.deak@intel.com --- drivers/gpu/drm/i915/i915_gem_gtt.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 8bab4ae..0178c9e 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2741,13 +2741,17 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) struct pci_dev *pdev = dev_priv->drm.pdev; unsigned int size; u16 snb_gmch_ctl; + int err; /* TODO: We're not aware of mappable constraints on gen8 yet */ ggtt->mappable_base = pci_resource_start(pdev, 2); ggtt->mappable_end = pci_resource_len(pdev, 2); - if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(39))) - pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(39)); + err = pci_set_dma_mask(pdev, DMA_BIT_MASK(39)); + if (!err) + err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(39)); + if (err) + DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err); pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); @@ -2790,6 +2794,7 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt) struct pci_dev *pdev = dev_priv->drm.pdev; unsigned int size; u16 snb_gmch_ctl; + int err; ggtt->mappable_base = pci_resource_start(pdev, 2); ggtt->mappable_end = pci_resource_len(pdev, 2); @@ -2802,8 +2807,11 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt) return -ENXIO; } - if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(40))) - pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(40)); + err = pci_set_dma_mask(pdev, DMA_BIT_MASK(40)); + if (!err) + err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(40)); + if (err) + DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err); pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); ggtt->stolen_size = gen6_get_stolen_size(snb_gmch_ctl); -- cgit v1.1 From 0290cf3f9f77b7d31691b21cfe505e4804aa4bbb Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Wed, 10 May 2017 12:21:51 +0300 Subject: drm/i915: Check error return when converting pipe to connector MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit An error from intel_get_pipe_from_connector() would mean a bug somewhere else, but we still should check for it to prevent some other more obscure bug later. v2: - Fall back to a reasonable default instead of bailing out in case of error. (Jani) v3: - Fix s/PIPE_INVALID/INVALID_PIPE/ typo. (Jani) v4: - Fix bogus bracing around WARN() condition. (Ville) Cc: Jani Nikula Cc: Ville Syrjälä Signed-off-by: Imre Deak Reviewed-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1494408113-379-5-git-send-email-imre.deak@intel.com --- drivers/gpu/drm/i915/intel_panel.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c index cb50c52..c8103f8 100644 --- a/drivers/gpu/drm/i915/intel_panel.c +++ b/drivers/gpu/drm/i915/intel_panel.c @@ -888,10 +888,14 @@ static void pch_enable_backlight(struct intel_connector *connector) struct drm_i915_private *dev_priv = to_i915(connector->base.dev); struct intel_panel *panel = &connector->panel; enum pipe pipe = intel_get_pipe_from_connector(connector); - enum transcoder cpu_transcoder = - intel_pipe_to_cpu_transcoder(dev_priv, pipe); + enum transcoder cpu_transcoder; u32 cpu_ctl2, pch_ctl1, pch_ctl2; + if (!WARN_ON_ONCE(pipe == INVALID_PIPE)) + cpu_transcoder = intel_pipe_to_cpu_transcoder(dev_priv, pipe); + else + cpu_transcoder = TRANSCODER_EDP; + cpu_ctl2 = I915_READ(BLC_PWM_CPU_CTL2); if (cpu_ctl2 & BLM_PWM_ENABLE) { DRM_DEBUG_KMS("cpu backlight already enabled\n"); @@ -973,6 +977,9 @@ static void i965_enable_backlight(struct intel_connector *connector) enum pipe pipe = intel_get_pipe_from_connector(connector); u32 ctl, ctl2, freq; + if (WARN_ON_ONCE(pipe == INVALID_PIPE)) + pipe = PIPE_A; + ctl2 = I915_READ(BLC_PWM_CTL2); if (ctl2 & BLM_PWM_ENABLE) { DRM_DEBUG_KMS("backlight already enabled\n"); @@ -1037,6 +1044,9 @@ static void bxt_enable_backlight(struct intel_connector *connector) enum pipe pipe = intel_get_pipe_from_connector(connector); u32 pwm_ctl, val; + if (WARN_ON_ONCE(pipe == INVALID_PIPE)) + pipe = PIPE_A; + /* Controller 1 uses the utility pin. */ if (panel->backlight.controller == 1) { val = I915_READ(UTIL_PIN_CTL); @@ -1093,7 +1103,8 @@ void intel_panel_enable_backlight(struct intel_connector *connector) if (!panel->backlight.present) return; - DRM_DEBUG_KMS("pipe %c\n", pipe_name(pipe)); + if (!WARN_ON_ONCE(pipe == INVALID_PIPE)) + DRM_DEBUG_KMS("pipe %c\n", pipe_name(pipe)); mutex_lock(&dev_priv->backlight_lock); -- cgit v1.1 From a92d1a91cf729c585b978cecb75b046ced0bc164 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Wed, 10 May 2017 12:21:52 +0300 Subject: drm/i915: Sanitize stolen memory size calculation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On GEN8+ (not counting CHV) the calculation can in theory result in an incorrect sign extension with all upper bits set. In practice this is unlikely to happen since it would require 4GB of stolen memory set aside. For consistency still prevent the sign extension explicitly everywhere. Signed-off-by: Imre Deak Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1494408113-379-6-git-send-email-imre.deak@intel.com --- drivers/gpu/drm/i915/i915_gem_gtt.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 0178c9e..ac2b8f6 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2577,14 +2577,14 @@ static size_t gen6_get_stolen_size(u16 snb_gmch_ctl) { snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT; snb_gmch_ctl &= SNB_GMCH_GMS_MASK; - return snb_gmch_ctl << 25; /* 32 MB units */ + return (size_t)snb_gmch_ctl << 25; /* 32 MB units */ } static size_t gen8_get_stolen_size(u16 bdw_gmch_ctl) { bdw_gmch_ctl >>= BDW_GMCH_GMS_SHIFT; bdw_gmch_ctl &= BDW_GMCH_GMS_MASK; - return bdw_gmch_ctl << 25; /* 32 MB units */ + return (size_t)bdw_gmch_ctl << 25; /* 32 MB units */ } static size_t chv_get_stolen_size(u16 gmch_ctrl) @@ -2598,11 +2598,11 @@ static size_t chv_get_stolen_size(u16 gmch_ctrl) * 0x17 to 0x1d: 4MB increments start at 36MB */ if (gmch_ctrl < 0x11) - return gmch_ctrl << 25; + return (size_t)gmch_ctrl << 25; else if (gmch_ctrl < 0x17) - return (gmch_ctrl - 0x11 + 2) << 22; + return (size_t)(gmch_ctrl - 0x11 + 2) << 22; else - return (gmch_ctrl - 0x17 + 9) << 22; + return (size_t)(gmch_ctrl - 0x17 + 9) << 22; } static size_t gen9_get_stolen_size(u16 gen9_gmch_ctl) @@ -2611,10 +2611,10 @@ static size_t gen9_get_stolen_size(u16 gen9_gmch_ctl) gen9_gmch_ctl &= BDW_GMCH_GMS_MASK; if (gen9_gmch_ctl < 0xf0) - return gen9_gmch_ctl << 25; /* 32 MB units */ + return (size_t)gen9_gmch_ctl << 25; /* 32 MB units */ else /* 4MB increments starting at 0xf0 for 4MB */ - return (gen9_gmch_ctl - 0xf0 + 1) << 22; + return (size_t)(gen9_gmch_ctl - 0xf0 + 1) << 22; } static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) -- cgit v1.1 From 52eb92c6fe8f89c3099dd28386f178999abd5438 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Wed, 10 May 2017 12:21:53 +0300 Subject: drm/i915/lvds: Remove magic from PLL programming MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This looks like a left-over from enabling work. The spec defines CH7017_LVDS_PLL_FEEDBACK_DEFAULT_RESERVED as reserved set, so follow this in the programming. v2: - Follow the spec to set CH7017_LVDS_PLL_FEEDBACK_DEFAULT_RESERVED. (Ville) Cc: Ville Syrjälä Signed-off-by: Imre Deak Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1494408113-379-7-git-send-email-imre.deak@intel.com --- drivers/gpu/drm/i915/dvo_ch7017.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/dvo_ch7017.c b/drivers/gpu/drm/i915/dvo_ch7017.c index b3c7c19..80b3e16 100644 --- a/drivers/gpu/drm/i915/dvo_ch7017.c +++ b/drivers/gpu/drm/i915/dvo_ch7017.c @@ -280,10 +280,10 @@ static void ch7017_mode_set(struct intel_dvo_device *dvo, (0 << CH7017_PHASE_DETECTOR_SHIFT); } else { outputs_enable = CH7017_LVDS_CHANNEL_A | CH7017_CHARGE_PUMP_HIGH; - lvds_pll_feedback_div = CH7017_LVDS_PLL_FEEDBACK_DEFAULT_RESERVED | + lvds_pll_feedback_div = + CH7017_LVDS_PLL_FEEDBACK_DEFAULT_RESERVED | (2 << CH7017_LVDS_PLL_FEED_BACK_DIVIDER_SHIFT) | (3 << CH7017_LVDS_PLL_FEED_FORWARD_DIVIDER_SHIFT); - lvds_pll_feedback_div = 35; lvds_control_2 = (3 << CH7017_LOOP_FILTER_SHIFT) | (0 << CH7017_PHASE_DETECTOR_SHIFT); if (1) { /* XXX: dual channel panel detection. Assume yes for now. */ -- cgit v1.1 From 77d14ee415b8aef427822ddf4c7f9dd43f1c83b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 21 Apr 2017 21:14:18 +0300 Subject: drm/i915: s/vlv_plane_wm_compute/vlv_raw_plane_wm_compute/ etc. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rename some of the vlv wm functions to reflect the fact that they operate on the "raw" watermarks. Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170421181432.15216-2-ville.syrjala@linux.intel.com Reviewed-by: Maarten Lankhorst --- drivers/gpu/drm/i915/intel_pm.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index cacb65f..23fff91 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -1194,8 +1194,8 @@ static bool vlv_raw_plane_wm_set(struct intel_crtc_state *crtc_state, return dirty; } -static bool vlv_plane_wm_compute(struct intel_crtc_state *crtc_state, - const struct intel_plane_state *plane_state) +static bool vlv_raw_plane_wm_compute(struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state) { struct intel_plane *plane = to_intel_plane(plane_state->base.plane); enum plane_id plane_id = plane->id; @@ -1234,8 +1234,8 @@ out: return dirty; } -static bool vlv_plane_wm_is_valid(const struct intel_crtc_state *crtc_state, - enum plane_id plane_id, int level) +static bool vlv_raw_plane_wm_is_valid(const struct intel_crtc_state *crtc_state, + enum plane_id plane_id, int level) { const struct vlv_pipe_wm *raw = &crtc_state->wm.vlv.raw[level]; @@ -1245,12 +1245,12 @@ static bool vlv_plane_wm_is_valid(const struct intel_crtc_state *crtc_state, return raw->plane[plane_id] <= fifo_state->plane[plane_id]; } -static bool vlv_crtc_wm_is_valid(const struct intel_crtc_state *crtc_state, int level) +static bool vlv_raw_crtc_wm_is_valid(const struct intel_crtc_state *crtc_state, int level) { - return vlv_plane_wm_is_valid(crtc_state, PLANE_PRIMARY, level) && - vlv_plane_wm_is_valid(crtc_state, PLANE_SPRITE0, level) && - vlv_plane_wm_is_valid(crtc_state, PLANE_SPRITE1, level) && - vlv_plane_wm_is_valid(crtc_state, PLANE_CURSOR, level); + return vlv_raw_plane_wm_is_valid(crtc_state, PLANE_PRIMARY, level) && + vlv_raw_plane_wm_is_valid(crtc_state, PLANE_SPRITE0, level) && + vlv_raw_plane_wm_is_valid(crtc_state, PLANE_SPRITE1, level) && + vlv_raw_plane_wm_is_valid(crtc_state, PLANE_CURSOR, level); } static int vlv_compute_pipe_wm(struct intel_crtc_state *crtc_state) @@ -1279,7 +1279,7 @@ static int vlv_compute_pipe_wm(struct intel_crtc_state *crtc_state) old_plane_state->base.crtc != &crtc->base) continue; - if (vlv_plane_wm_compute(crtc_state, plane_state)) + if (vlv_raw_plane_wm_compute(crtc_state, plane_state)) dirty |= BIT(plane->id); } @@ -1325,7 +1325,7 @@ static int vlv_compute_pipe_wm(struct intel_crtc_state *crtc_state) const struct vlv_pipe_wm *raw = &crtc_state->wm.vlv.raw[level]; const int sr_fifo_size = INTEL_INFO(dev_priv)->num_pipes * 512 - 1; - if (!vlv_crtc_wm_is_valid(crtc_state, level)) + if (!vlv_raw_crtc_wm_is_valid(crtc_state, level)) break; for_each_plane_id_on_crtc(crtc, plane_id) { -- cgit v1.1 From 868b0c08b2c461f4a1856305f13974d33c465cab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 21 Apr 2017 21:14:19 +0300 Subject: drm/i915: Drop the debug message from vlv_get_fifo_size() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Seeing the display FIFO sizes at driver load time doesn't really provide anything useful for us, so let's just drop the debug message. One can always use eg. intel_watermarks to dump out the hardware settings prior to loading the driver. Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170421181432.15216-3-ville.syrjala@linux.intel.com Reviewed-by: Maarten Lankhorst --- drivers/gpu/drm/i915/intel_pm.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 23fff91..2c63abe 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -454,13 +454,6 @@ static void vlv_get_fifo_size(struct intel_crtc_state *crtc_state) fifo_state->plane[PLANE_SPRITE0] = sprite1_start - sprite0_start; fifo_state->plane[PLANE_SPRITE1] = 511 - sprite1_start; fifo_state->plane[PLANE_CURSOR] = 63; - - DRM_DEBUG_KMS("Pipe %c FIFO size: %d/%d/%d/%d\n", - pipe_name(pipe), - fifo_state->plane[PLANE_PRIMARY], - fifo_state->plane[PLANE_SPRITE0], - fifo_state->plane[PLANE_SPRITE1], - fifo_state->plane[PLANE_CURSOR]); } static int i9xx_get_fifo_size(struct drm_i915_private *dev_priv, int plane) -- cgit v1.1 From 6d5019b681976c3487f43141f1f64bd1ac770ac5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 21 Apr 2017 21:14:20 +0300 Subject: drm/i915: s/vlv_num_wm_levels/intel_wm_num_levels/ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rename the VLV/CHV max_level->num_levels helper to have an intel_ prefix since it's not VLV/CHV specific and I'll want to use it on other platforms as well. Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170421181432.15216-4-ville.syrjala@linux.intel.com Reviewed-by: Maarten Lankhorst --- drivers/gpu/drm/i915/intel_pm.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 2c63abe..ee045be 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -648,6 +648,11 @@ static unsigned long intel_calculate_wm(unsigned long clock_in_khz, return wm_size; } +static int intel_wm_num_levels(struct drm_i915_private *dev_priv) +{ + return dev_priv->wm.max_level + 1; +} + static bool intel_wm_plane_visible(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { @@ -1136,18 +1141,13 @@ static int vlv_compute_fifo(struct intel_crtc_state *crtc_state) return 0; } -static int vlv_num_wm_levels(struct drm_i915_private *dev_priv) -{ - return dev_priv->wm.max_level + 1; -} - /* mark all levels starting from 'level' as invalid */ static void vlv_invalidate_wms(struct intel_crtc *crtc, struct vlv_wm_state *wm_state, int level) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - for (; level < vlv_num_wm_levels(dev_priv); level++) { + for (; level < intel_wm_num_levels(dev_priv); level++) { enum plane_id plane_id; for_each_plane_id_on_crtc(crtc, plane_id) @@ -1174,7 +1174,7 @@ static bool vlv_raw_plane_wm_set(struct intel_crtc_state *crtc_state, int level, enum plane_id plane_id, u16 value) { struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); - int num_levels = vlv_num_wm_levels(dev_priv); + int num_levels = intel_wm_num_levels(dev_priv); bool dirty = false; for (; level < num_levels; level++) { @@ -1192,7 +1192,7 @@ static bool vlv_raw_plane_wm_compute(struct intel_crtc_state *crtc_state, { struct intel_plane *plane = to_intel_plane(plane_state->base.plane); enum plane_id plane_id = plane->id; - int num_levels = vlv_num_wm_levels(to_i915(plane->base.dev)); + int num_levels = intel_wm_num_levels(to_i915(plane->base.dev)); int level; bool dirty = false; @@ -1306,7 +1306,7 @@ static int vlv_compute_pipe_wm(struct intel_crtc_state *crtc_state) } /* initially allow all levels */ - wm_state->num_levels = vlv_num_wm_levels(dev_priv); + wm_state->num_levels = intel_wm_num_levels(dev_priv); /* * Note that enabling cxsr with no primary/sprite planes * enabled can wedge the pipe. Hence we only allow cxsr -- cgit v1.1 From 114d7dc0dd733d3ed5b726c8b9e028a792d71617 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 21 Apr 2017 21:14:21 +0300 Subject: drm/i915: Rename bunch of vlv_ watermark structures to g4x_ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We'll be wanting to share some of these watermark structures on g4x, so let's rename them to have a g4x_ prefix instead of vlv_. Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170421181432.15216-5-ville.syrjala@linux.intel.com Reviewed-by: Maarten Lankhorst --- drivers/gpu/drm/i915/i915_drv.h | 8 ++++---- drivers/gpu/drm/i915/intel_drv.h | 6 +++--- drivers/gpu/drm/i915/intel_pm.c | 14 +++++++------- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 29a6966..7e37fb9 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1652,11 +1652,11 @@ struct ilk_wm_values { enum intel_ddb_partitioning partitioning; }; -struct vlv_pipe_wm { +struct g4x_pipe_wm { uint16_t plane[I915_MAX_PLANES]; }; -struct vlv_sr_wm { +struct g4x_sr_wm { uint16_t plane; uint16_t cursor; }; @@ -1666,8 +1666,8 @@ struct vlv_wm_ddl_values { }; struct vlv_wm_values { - struct vlv_pipe_wm pipe[3]; - struct vlv_sr_wm sr; + struct g4x_pipe_wm pipe[3]; + struct g4x_sr_wm sr; struct vlv_wm_ddl_values ddl[3]; uint8_t level; bool cxsr; diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 54f3ff8..d1cdd10 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -512,8 +512,8 @@ enum vlv_wm_level { }; struct vlv_wm_state { - struct vlv_pipe_wm wm[NUM_VLV_WM_LEVELS]; - struct vlv_sr_wm sr[NUM_VLV_WM_LEVELS]; + struct g4x_pipe_wm wm[NUM_VLV_WM_LEVELS]; + struct g4x_sr_wm sr[NUM_VLV_WM_LEVELS]; uint8_t num_levels; bool cxsr; }; @@ -549,7 +549,7 @@ struct intel_crtc_wm_state { struct { /* "raw" watermarks (not inverted) */ - struct vlv_pipe_wm raw[NUM_VLV_WM_LEVELS]; + struct g4x_pipe_wm raw[NUM_VLV_WM_LEVELS]; /* intermediate watermarks (inverted) */ struct vlv_wm_state intermediate; /* optimal watermarks (inverted) */ diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index ee045be..1cc13cc 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -1062,7 +1062,7 @@ static bool vlv_need_sprite0_fifo_workaround(unsigned int active_planes) static int vlv_compute_fifo(struct intel_crtc_state *crtc_state) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); - const struct vlv_pipe_wm *raw = + const struct g4x_pipe_wm *raw = &crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM2]; struct vlv_fifo_state *fifo_state = &crtc_state->wm.vlv.fifo_state; unsigned int active_planes = crtc_state->active_planes & ~BIT(PLANE_CURSOR); @@ -1178,7 +1178,7 @@ static bool vlv_raw_plane_wm_set(struct intel_crtc_state *crtc_state, bool dirty = false; for (; level < num_levels; level++) { - struct vlv_pipe_wm *raw = &crtc_state->wm.vlv.raw[level]; + struct g4x_pipe_wm *raw = &crtc_state->wm.vlv.raw[level]; dirty |= raw->plane[plane_id] != value; raw->plane[plane_id] = value; @@ -1202,7 +1202,7 @@ static bool vlv_raw_plane_wm_compute(struct intel_crtc_state *crtc_state, } for (level = 0; level < num_levels; level++) { - struct vlv_pipe_wm *raw = &crtc_state->wm.vlv.raw[level]; + struct g4x_pipe_wm *raw = &crtc_state->wm.vlv.raw[level]; int wm = vlv_compute_wm_level(crtc_state, plane_state, level); int max_wm = plane_id == PLANE_CURSOR ? 63 : 511; @@ -1230,7 +1230,7 @@ out: static bool vlv_raw_plane_wm_is_valid(const struct intel_crtc_state *crtc_state, enum plane_id plane_id, int level) { - const struct vlv_pipe_wm *raw = + const struct g4x_pipe_wm *raw = &crtc_state->wm.vlv.raw[level]; const struct vlv_fifo_state *fifo_state = &crtc_state->wm.vlv.fifo_state; @@ -1315,7 +1315,7 @@ static int vlv_compute_pipe_wm(struct intel_crtc_state *crtc_state) wm_state->cxsr = crtc->pipe != PIPE_C && num_active_planes == 1; for (level = 0; level < wm_state->num_levels; level++) { - const struct vlv_pipe_wm *raw = &crtc_state->wm.vlv.raw[level]; + const struct g4x_pipe_wm *raw = &crtc_state->wm.vlv.raw[level]; const int sr_fifo_size = INTEL_INFO(dev_priv)->num_pipes * 512 - 1; if (!vlv_raw_crtc_wm_is_valid(crtc_state, level)) @@ -4785,7 +4785,7 @@ void vlv_wm_get_hw_state(struct drm_device *dev) active->cxsr = wm->cxsr; for (level = 0; level < active->num_levels; level++) { - struct vlv_pipe_wm *raw = + struct g4x_pipe_wm *raw = &crtc_state->wm.vlv.raw[level]; active->sr[level].plane = wm->sr.plane; @@ -4845,7 +4845,7 @@ void vlv_wm_sanitize(struct drm_i915_private *dev_priv) continue; for (level = 0; level < wm_state->num_levels; level++) { - struct vlv_pipe_wm *raw = + struct g4x_pipe_wm *raw = &crtc_state->wm.vlv.raw[level]; raw->plane[plane_id] = 0; -- cgit v1.1 From 57a6528a528f75307b4e5ec4185d26e8277c140a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 21 Apr 2017 21:14:22 +0300 Subject: drm/i915: Make vlv/chv watermark debug print less cryptic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The magic numbers 0,1,2 aren't all that interesting for users perhaps. Since we know what these watermark levels mean for VLV/CHV let's print their names. Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170421181432.15216-6-ville.syrjala@linux.intel.com Reviewed-by: Maarten Lankhorst --- drivers/gpu/drm/i915/intel_pm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 1cc13cc..da20727 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -1218,7 +1218,7 @@ static bool vlv_raw_plane_wm_compute(struct intel_crtc_state *crtc_state, out: if (dirty) - DRM_DEBUG_KMS("%s wms: [0]=%d,[1]=%d,[2]=%d\n", + DRM_DEBUG_KMS("%s watermarks: PM2=%d, PM5=%d, DDR DVFS=%d\n", plane->base.name, crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM2].plane[plane_id], crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM5].plane[plane_id], -- cgit v1.1 From 62571fc365a0674fd57f70c0268ea88060877339 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 21 Apr 2017 21:14:23 +0300 Subject: drm/i915: Document CxSR MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add some documentation explaining what CxSR actually is. Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170421181432.15216-7-ville.syrjala@linux.intel.com Reviewed-by: Maarten Lankhorst --- drivers/gpu/drm/i915/intel_pm.c | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index da20727..7bd4c86 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -386,6 +386,43 @@ static bool _intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enabl return was_enabled; } +/** + * intel_set_memory_cxsr - Configure CxSR state + * @dev_priv: i915 device + * @enable: Allow vs. disallow CxSR + * + * Allow or disallow the system to enter a special CxSR + * (C-state self refresh) state. What typically happens in CxSR mode + * is that several display FIFOs may get combined into a single larger + * FIFO for a particular plane (so called max FIFO mode) to allow the + * system to defer memory fetches longer, and the memory will enter + * self refresh. + * + * Note that enabling CxSR does not guarantee that the system enter + * this special mode, nor does it guarantee that the system stays + * in that mode once entered. So this just allows/disallows the system + * to autonomously utilize the CxSR mode. Other factors such as core + * C-states will affect when/if the system actually enters/exits the + * CxSR mode. + * + * Note that on VLV/CHV this actually only controls the max FIFO mode, + * and the system is free to enter/exit memory self refresh at any time + * even when the use of CxSR has been disallowed. + * + * While the system is actually in the CxSR/max FIFO mode, some plane + * control registers will not get latched on vblank. Thus in order to + * guarantee the system will respond to changes in the plane registers + * we must always disallow CxSR prior to making changes to those registers. + * Unfortunately the system will re-evaluate the CxSR conditions at + * frame start which happens after vblank start (which is when the plane + * registers would get latched), so we can't proceed with the plane update + * during the same frame where we disallowed CxSR. + * + * Certain platforms also have a deeper HPLL SR mode. Fortunately the + * HPLL SR mode depends on CxSR itself, so we don't have to hand hold + * the hardware w.r.t. HPLL SR when writing to plane registers. + * Disallowing just CxSR is sufficient. + */ bool intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable) { bool ret; -- cgit v1.1 From 99834b1487c265363fd33ee9a2138b6bb4c7d281 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 21 Apr 2017 21:14:24 +0300 Subject: drm/i915: Fix cursor 'cpp' in watermark calculatins for old platforms MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The watermark code for the old platforms (g4x and older) uses the primary plane cpp when computing cursor watermarks. To keep the fix simple let's just hardcode cpp=4 for the cursor on those platforms since that's all we support. Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170421181432.15216-8-ville.syrjala@linux.intel.com Reviewed-by: Maarten Lankhorst --- drivers/gpu/drm/i915/intel_pm.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 7bd4c86..7d0d1a0 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -768,7 +768,7 @@ static void pineview_update_wm(struct intel_crtc *unused_crtc) /* cursor SR */ wm = intel_calculate_wm(clock, &pineview_cursor_wm, pineview_display_wm.fifo_size, - cpp, latency->cursor_sr); + 4, latency->cursor_sr); reg = I915_READ(DSPFW3); reg &= ~DSPFW_CURSOR_SR_MASK; reg |= FW_WM(wm, CURSOR_SR); @@ -786,7 +786,7 @@ static void pineview_update_wm(struct intel_crtc *unused_crtc) /* cursor HPLL off SR */ wm = intel_calculate_wm(clock, &pineview_cursor_hplloff_wm, pineview_display_hplloff_wm.fifo_size, - cpp, latency->cursor_hpll_disable); + 4, latency->cursor_hpll_disable); reg = I915_READ(DSPFW3); reg &= ~DSPFW_HPLL_CURSOR_MASK; reg |= FW_WM(wm, HPLL_CURSOR); @@ -842,7 +842,7 @@ static bool g4x_compute_wm0(struct drm_i915_private *dev_priv, /* Use the large buffer method to calculate cursor watermark */ line_time_us = max(htotal * 1000 / clock, 1); line_count = (cursor_latency_ns / line_time_us + 1000) / 1000; - entries = line_count * crtc->base.cursor->state->crtc_w * cpp; + entries = line_count * crtc->base.cursor->state->crtc_w * 4; tlb_miss = cursor->fifo_size*cursor->cacheline_size - hdisplay * 8; if (tlb_miss > 0) entries += tlb_miss; @@ -930,7 +930,7 @@ static bool g4x_compute_srwm(struct drm_i915_private *dev_priv, *display_wm = entries + display->guard_size; /* calculate the self-refresh watermark for display cursor */ - entries = line_count * cpp * crtc->base.cursor->state->crtc_w; + entries = line_count * 4 * crtc->base.cursor->state->crtc_w; entries = DIV_ROUND_UP(entries, cursor->cacheline_size); *cursor_wm = entries + cursor->guard_size; @@ -1736,7 +1736,7 @@ static void i965_update_wm(struct intel_crtc *unused_crtc) entries, srwm); entries = (((sr_latency_ns / line_time_us) + 1000) / 1000) * - cpp * crtc->base.cursor->state->crtc_w; + 4 * crtc->base.cursor->state->crtc_w; entries = DIV_ROUND_UP(entries, i965_cursor_wm_info.cacheline_size); cursor_sr = i965_cursor_wm_info.fifo_size - -- cgit v1.1 From 624a0ac32d88418a9206d78bddaba26755e213fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 21 Apr 2017 21:14:25 +0300 Subject: drm/i915: Fix the g4x watermark TLB miss workaround MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The g4x watermark TLB miss workaround requires that we bump up the watermark by the difference between 8 full lines and the FIFO size. Unfortunately the way we compute it at the moment ignores the size of the pixels. The code also used the primary plane width as the cursor width when computing the TLB miss w/a for the cursor. Let's fix both problems. Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170421181432.15216-9-ville.syrjala@linux.intel.com Reviewed-by: Maarten Lankhorst --- drivers/gpu/drm/i915/intel_pm.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 7d0d1a0..09d4676 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -811,7 +811,7 @@ static bool g4x_compute_wm0(struct drm_i915_private *dev_priv, struct intel_crtc *crtc; const struct drm_display_mode *adjusted_mode; const struct drm_framebuffer *fb; - int htotal, hdisplay, clock, cpp; + int htotal, plane_width, cursor_width, clock, cpp; int line_time_us, line_count; int entries, tlb_miss; @@ -826,12 +826,13 @@ static bool g4x_compute_wm0(struct drm_i915_private *dev_priv, fb = crtc->base.primary->state->fb; clock = adjusted_mode->crtc_clock; htotal = adjusted_mode->crtc_htotal; - hdisplay = crtc->config->pipe_src_w; + plane_width = crtc->config->pipe_src_w; + cursor_width = crtc->base.cursor->state->crtc_w; cpp = fb->format->cpp[0]; /* Use the small buffer method to calculate plane watermark */ entries = ((clock * cpp / 1000) * display_latency_ns) / 1000; - tlb_miss = display->fifo_size*display->cacheline_size - hdisplay * 8; + tlb_miss = display->fifo_size*display->cacheline_size - plane_width * cpp * 8; if (tlb_miss > 0) entries += tlb_miss; entries = DIV_ROUND_UP(entries, display->cacheline_size); @@ -842,8 +843,8 @@ static bool g4x_compute_wm0(struct drm_i915_private *dev_priv, /* Use the large buffer method to calculate cursor watermark */ line_time_us = max(htotal * 1000 / clock, 1); line_count = (cursor_latency_ns / line_time_us + 1000) / 1000; - entries = line_count * crtc->base.cursor->state->crtc_w * 4; - tlb_miss = cursor->fifo_size*cursor->cacheline_size - hdisplay * 8; + entries = line_count * cursor_width * 4; + tlb_miss = cursor->fifo_size*cursor->cacheline_size - cursor_width * 4 * 8; if (tlb_miss > 0) entries += tlb_miss; entries = DIV_ROUND_UP(entries, cursor->cacheline_size); -- cgit v1.1 From 0f95ff8505b25aa29530818afcf12ff08399ccf5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 21 Apr 2017 21:14:26 +0300 Subject: drm/i915: Refactor the g4x TLB miss w/a to a helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pull the g4x TLB miss w/a calculation into a small helper. Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170421181432.15216-10-ville.syrjala@linux.intel.com Reviewed-by: Maarten Lankhorst --- drivers/gpu/drm/i915/intel_pm.c | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 09d4676..c43fcd5 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -799,6 +799,23 @@ static void pineview_update_wm(struct intel_crtc *unused_crtc) } } +/* + * Documentation says: + * "If the line size is small, the TLB fetches can get in the way of the + * data fetches, causing some lag in the pixel data return which is not + * accounted for in the above formulas. The following adjustment only + * needs to be applied if eight whole lines fit in the buffer at once. + * The WM is adjusted upwards by the difference between the FIFO size + * and the size of 8 whole lines. This adjustment is always performed + * in the actual pixel depth regardless of whether FBC is enabled or not." + */ +static int g4x_tlb_miss_wa(int fifo_size, int width, int cpp) +{ + int tlb_miss = fifo_size * 64 - width * cpp * 8; + + return max(0, tlb_miss); +} + static bool g4x_compute_wm0(struct drm_i915_private *dev_priv, int plane, const struct intel_watermark_params *display, @@ -813,7 +830,7 @@ static bool g4x_compute_wm0(struct drm_i915_private *dev_priv, const struct drm_framebuffer *fb; int htotal, plane_width, cursor_width, clock, cpp; int line_time_us, line_count; - int entries, tlb_miss; + int entries; crtc = intel_get_crtc_for_plane(dev_priv, plane); if (!intel_crtc_active(crtc)) { @@ -832,9 +849,7 @@ static bool g4x_compute_wm0(struct drm_i915_private *dev_priv, /* Use the small buffer method to calculate plane watermark */ entries = ((clock * cpp / 1000) * display_latency_ns) / 1000; - tlb_miss = display->fifo_size*display->cacheline_size - plane_width * cpp * 8; - if (tlb_miss > 0) - entries += tlb_miss; + entries += g4x_tlb_miss_wa(display->fifo_size, plane_width, cpp); entries = DIV_ROUND_UP(entries, display->cacheline_size); *plane_wm = entries + display->guard_size; if (*plane_wm > (int)display->max_wm) @@ -844,9 +859,7 @@ static bool g4x_compute_wm0(struct drm_i915_private *dev_priv, line_time_us = max(htotal * 1000 / clock, 1); line_count = (cursor_latency_ns / line_time_us + 1000) / 1000; entries = line_count * cursor_width * 4; - tlb_miss = cursor->fifo_size*cursor->cacheline_size - cursor_width * 4 * 8; - if (tlb_miss > 0) - entries += tlb_miss; + entries += g4x_tlb_miss_wa(cursor->fifo_size, cursor_width, 4); entries = DIV_ROUND_UP(entries, cursor->cacheline_size); *cursor_wm = entries + cursor->guard_size; if (*cursor_wm > (int)cursor->max_wm) -- cgit v1.1 From baf69ca8a51c8a5ef6ef869f684764459a9b6fa2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 21 Apr 2017 21:14:27 +0300 Subject: drm/i915: Refactor wm calculations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All platforms until SKL compute their watermarks essentially using the same method1/small buffer and method2/large buffer formulas. Most just open code it in slightly different ways. Let's pull it all into common helpers. This makes it a little easier to spot the actual differences. While at it try to add some docs explainign what the formulas are trying to do. Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170421181432.15216-11-ville.syrjala@linux.intel.com Reviewed-by: Maarten Lankhorst --- drivers/gpu/drm/i915/intel_pm.c | 221 +++++++++++++++++++++++++++------------- 1 file changed, 149 insertions(+), 72 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index c43fcd5..c07f3b2 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -626,8 +626,104 @@ static const struct intel_watermark_params i845_wm_info = { }; /** + * intel_wm_method1 - Method 1 / "small buffer" watermark formula + * @pixel_rate: Pipe pixel rate in kHz + * @cpp: Plane bytes per pixel + * @latency: Memory wakeup latency in 0.1us units + * + * Compute the watermark using the method 1 or "small buffer" + * formula. The caller may additonally add extra cachelines + * to account for TLB misses and clock crossings. + * + * This method is concerned with the short term drain rate + * of the FIFO, ie. it does not account for blanking periods + * which would effectively reduce the average drain rate across + * a longer period. The name "small" refers to the fact the + * FIFO is relatively small compared to the amount of data + * fetched. + * + * The FIFO level vs. time graph might look something like: + * + * |\ |\ + * | \ | \ + * __---__---__ (- plane active, _ blanking) + * -> time + * + * or perhaps like this: + * + * |\|\ |\|\ + * __----__----__ (- plane active, _ blanking) + * -> time + * + * Returns: + * The watermark in bytes + */ +static unsigned int intel_wm_method1(unsigned int pixel_rate, + unsigned int cpp, + unsigned int latency) +{ + uint64_t ret; + + ret = (uint64_t) pixel_rate * cpp * latency; + ret = DIV_ROUND_UP_ULL(ret, 10000); + + return ret; +} + +/** + * intel_wm_method2 - Method 2 / "large buffer" watermark formula + * @pixel_rate: Pipe pixel rate in kHz + * @htotal: Pipe horizontal total + * @width: Plane width in pixels + * @cpp: Plane bytes per pixel + * @latency: Memory wakeup latency in 0.1us units + * + * Compute the watermark using the method 2 or "large buffer" + * formula. The caller may additonally add extra cachelines + * to account for TLB misses and clock crossings. + * + * This method is concerned with the long term drain rate + * of the FIFO, ie. it does account for blanking periods + * which effectively reduce the average drain rate across + * a longer period. The name "large" refers to the fact the + * FIFO is relatively large compared to the amount of data + * fetched. + * + * The FIFO level vs. time graph might look something like: + * + * |\___ |\___ + * | \___ | \___ + * | \ | \ + * __ --__--__--__--__--__--__ (- plane active, _ blanking) + * -> time + * + * Returns: + * The watermark in bytes + */ +static unsigned int intel_wm_method2(unsigned int pixel_rate, + unsigned int htotal, + unsigned int width, + unsigned int cpp, + unsigned int latency) +{ + unsigned int ret; + + /* + * FIXME remove once all users are computing + * watermarks in the correct place. + */ + if (WARN_ON_ONCE(htotal == 0)) + htotal = 1; + + ret = (latency * pixel_rate) / (htotal * 10000); + ret = (ret + 1) * width * cpp; + + return ret; +} + +/** * intel_calculate_wm - calculate watermark level - * @clock_in_khz: pixel clock + * @pixel_rate: pixel clock * @wm: chip FIFO params * @cpp: bytes per pixel * @latency_ns: memory latency for the platform @@ -643,12 +739,12 @@ static const struct intel_watermark_params i845_wm_info = { * past the watermark point. If the FIFO drains completely, a FIFO underrun * will occur, and a display engine hang could result. */ -static unsigned long intel_calculate_wm(unsigned long clock_in_khz, - const struct intel_watermark_params *wm, - int fifo_size, int cpp, - unsigned long latency_ns) +static unsigned int intel_calculate_wm(int pixel_rate, + const struct intel_watermark_params *wm, + int fifo_size, int cpp, + unsigned int latency_ns) { - long entries_required, wm_size; + int entries, wm_size; /* * Note: we need to make sure we don't overflow for various clock & @@ -656,18 +752,17 @@ static unsigned long intel_calculate_wm(unsigned long clock_in_khz, * clocks go from a few thousand to several hundred thousand. * latency is usually a few thousand */ - entries_required = ((clock_in_khz / 1000) * cpp * latency_ns) / - 1000; - entries_required = DIV_ROUND_UP(entries_required, wm->cacheline_size); - - DRM_DEBUG_KMS("FIFO entries required for mode: %ld\n", entries_required); + entries = intel_wm_method1(pixel_rate, cpp, + latency_ns / 100); + entries = DIV_ROUND_UP(entries, wm->cacheline_size) + + wm->guard_size; + DRM_DEBUG_KMS("FIFO entries required for mode: %d\n", entries); - wm_size = fifo_size - (entries_required + wm->guard_size); - - DRM_DEBUG_KMS("FIFO watermark level: %ld\n", wm_size); + wm_size = fifo_size - entries; + DRM_DEBUG_KMS("FIFO watermark level: %d\n", wm_size); /* Don't promote wm_size to unsigned... */ - if (wm_size > (long)wm->max_wm) + if (wm_size > wm->max_wm) wm_size = wm->max_wm; if (wm_size <= 0) wm_size = wm->default_wm; @@ -734,7 +829,7 @@ static void pineview_update_wm(struct intel_crtc *unused_crtc) struct intel_crtc *crtc; const struct cxsr_latency *latency; u32 reg; - unsigned long wm; + unsigned int wm; latency = intel_get_cxsr_latency(IS_PINEVIEW_G(dev_priv), dev_priv->is_ddr3, @@ -829,7 +924,6 @@ static bool g4x_compute_wm0(struct drm_i915_private *dev_priv, const struct drm_display_mode *adjusted_mode; const struct drm_framebuffer *fb; int htotal, plane_width, cursor_width, clock, cpp; - int line_time_us, line_count; int entries; crtc = intel_get_crtc_for_plane(dev_priv, plane); @@ -848,7 +942,7 @@ static bool g4x_compute_wm0(struct drm_i915_private *dev_priv, cpp = fb->format->cpp[0]; /* Use the small buffer method to calculate plane watermark */ - entries = ((clock * cpp / 1000) * display_latency_ns) / 1000; + entries = intel_wm_method1(clock, cpp, display_latency_ns / 100); entries += g4x_tlb_miss_wa(display->fifo_size, plane_width, cpp); entries = DIV_ROUND_UP(entries, display->cacheline_size); *plane_wm = entries + display->guard_size; @@ -856,9 +950,8 @@ static bool g4x_compute_wm0(struct drm_i915_private *dev_priv, *plane_wm = display->max_wm; /* Use the large buffer method to calculate cursor watermark */ - line_time_us = max(htotal * 1000 / clock, 1); - line_count = (cursor_latency_ns / line_time_us + 1000) / 1000; - entries = line_count * cursor_width * 4; + entries = intel_wm_method2(clock, htotal, cursor_width, 4, + cursor_latency_ns / 100); entries += g4x_tlb_miss_wa(cursor->fifo_size, cursor_width, 4); entries = DIV_ROUND_UP(entries, cursor->cacheline_size); *cursor_wm = entries + cursor->guard_size; @@ -914,8 +1007,6 @@ static bool g4x_compute_srwm(struct drm_i915_private *dev_priv, const struct drm_display_mode *adjusted_mode; const struct drm_framebuffer *fb; int hdisplay, htotal, cpp, clock; - unsigned long line_time_us; - int line_count, line_size; int small, large; int entries; @@ -932,19 +1023,17 @@ static bool g4x_compute_srwm(struct drm_i915_private *dev_priv, hdisplay = crtc->config->pipe_src_w; cpp = fb->format->cpp[0]; - line_time_us = max(htotal * 1000 / clock, 1); - line_count = (latency_ns / line_time_us + 1000) / 1000; - line_size = hdisplay * cpp; - /* Use the minimum of the small and large buffer method for primary */ - small = ((clock * cpp / 1000) * latency_ns) / 1000; - large = line_count * line_size; - + small = intel_wm_method1(clock, cpp, latency_ns / 100); + large = intel_wm_method2(clock, htotal, hdisplay, cpp, + latency_ns / 100); entries = DIV_ROUND_UP(min(small, large), display->cacheline_size); *display_wm = entries + display->guard_size; /* calculate the self-refresh watermark for display cursor */ - entries = line_count * 4 * crtc->base.cursor->state->crtc_w; + entries = intel_wm_method2(clock, htotal, + crtc->base.cursor->state->crtc_w, 4, + latency_ns / 100); entries = DIV_ROUND_UP(entries, cursor->cacheline_size); *cursor_wm = entries + cursor->guard_size; @@ -1036,15 +1125,15 @@ static void vlv_write_wm_values(struct drm_i915_private *dev_priv, /* latency must be in 0.1us units. */ static unsigned int vlv_wm_method2(unsigned int pixel_rate, - unsigned int pipe_htotal, - unsigned int horiz_pixels, + unsigned int htotal, + unsigned int width, unsigned int cpp, unsigned int latency) { unsigned int ret; - ret = (latency * pixel_rate) / (pipe_htotal * 10000); - ret = (ret + 1) * horiz_pixels * cpp; + ret = intel_wm_method2(pixel_rate, htotal, + width, cpp, latency); ret = DIV_ROUND_UP(ret, 64); return ret; @@ -1085,8 +1174,6 @@ static uint16_t vlv_compute_wm_level(const struct intel_crtc_state *crtc_state, clock = adjusted_mode->crtc_clock; htotal = adjusted_mode->crtc_htotal; width = crtc_state->pipe_src_w; - if (WARN_ON(htotal == 0)) - htotal = 1; if (plane->id == PLANE_CURSOR) { /* @@ -1733,14 +1820,10 @@ static void i965_update_wm(struct intel_crtc *unused_crtc) int htotal = adjusted_mode->crtc_htotal; int hdisplay = crtc->config->pipe_src_w; int cpp = fb->format->cpp[0]; - unsigned long line_time_us; int entries; - line_time_us = max(htotal * 1000 / clock, 1); - - /* Use ns/us then divide to preserve precision */ - entries = (((sr_latency_ns / line_time_us) + 1000) / 1000) * - cpp * hdisplay; + entries = intel_wm_method2(clock, htotal, + hdisplay, cpp, sr_latency_ns / 100); entries = DIV_ROUND_UP(entries, I915_FIFO_LINE_SIZE); srwm = I965_FIFO_SIZE - entries; if (srwm < 0) @@ -1749,13 +1832,14 @@ static void i965_update_wm(struct intel_crtc *unused_crtc) DRM_DEBUG_KMS("self-refresh entries: %d, wm: %d\n", entries, srwm); - entries = (((sr_latency_ns / line_time_us) + 1000) / 1000) * - 4 * crtc->base.cursor->state->crtc_w; + entries = intel_wm_method2(clock, htotal, + crtc->base.cursor->state->crtc_w, 4, + sr_latency_ns / 100); entries = DIV_ROUND_UP(entries, - i965_cursor_wm_info.cacheline_size); - cursor_sr = i965_cursor_wm_info.fifo_size - - (entries + i965_cursor_wm_info.guard_size); + i965_cursor_wm_info.cacheline_size) + + i965_cursor_wm_info.guard_size; + cursor_sr = i965_cursor_wm_info.fifo_size - entries; if (cursor_sr > i965_cursor_wm_info.max_wm) cursor_sr = i965_cursor_wm_info.max_wm; @@ -1892,7 +1976,6 @@ static void i9xx_update_wm(struct intel_crtc *unused_crtc) int htotal = adjusted_mode->crtc_htotal; int hdisplay = enabled->config->pipe_src_w; int cpp; - unsigned long line_time_us; int entries; if (IS_I915GM(dev_priv) || IS_I945GM(dev_priv)) @@ -1900,11 +1983,8 @@ static void i9xx_update_wm(struct intel_crtc *unused_crtc) else cpp = fb->format->cpp[0]; - line_time_us = max(htotal * 1000 / clock, 1); - - /* Use ns/us then divide to preserve precision */ - entries = (((sr_latency_ns / line_time_us) + 1000) / 1000) * - cpp * hdisplay; + entries = intel_wm_method2(clock, htotal, hdisplay, cpp, + sr_latency_ns / 100); entries = DIV_ROUND_UP(entries, wm_info->cacheline_size); DRM_DEBUG_KMS("self-refresh entries: %d\n", entries); srwm = wm_info->fifo_size - entries; @@ -1961,34 +2041,31 @@ static void i845_update_wm(struct intel_crtc *unused_crtc) } /* latency must be in 0.1us units. */ -static uint32_t ilk_wm_method1(uint32_t pixel_rate, uint8_t cpp, uint32_t latency) +static unsigned int ilk_wm_method1(unsigned int pixel_rate, + unsigned int cpp, + unsigned int latency) { - uint64_t ret; - - if (WARN(latency == 0, "Latency value missing\n")) - return UINT_MAX; + unsigned int ret; - ret = (uint64_t) pixel_rate * cpp * latency; - ret = DIV_ROUND_UP_ULL(ret, 64 * 10000) + 2; + ret = intel_wm_method1(pixel_rate, cpp, latency); + ret = DIV_ROUND_UP(ret, 64) + 2; return ret; } /* latency must be in 0.1us units. */ -static uint32_t ilk_wm_method2(uint32_t pixel_rate, uint32_t pipe_htotal, - uint32_t horiz_pixels, uint8_t cpp, - uint32_t latency) +static unsigned int ilk_wm_method2(unsigned int pixel_rate, + unsigned int htotal, + unsigned int width, + unsigned int cpp, + unsigned int latency) { - uint32_t ret; - - if (WARN(latency == 0, "Latency value missing\n")) - return UINT_MAX; - if (WARN_ON(!pipe_htotal)) - return UINT_MAX; + unsigned int ret; - ret = (latency * pixel_rate) / (pipe_htotal * 10000); - ret = (ret + 1) * horiz_pixels * cpp; + ret = intel_wm_method2(pixel_rate, htotal, + width, cpp, latency); ret = DIV_ROUND_UP(ret, 64) + 2; + return ret; } -- cgit v1.1 From 42f4ac66c5352d3b84aa5119b3419750ec57e008 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 21 Apr 2017 21:14:28 +0300 Subject: drm/i915: Apply the g4x TLB miss w/a to SR watermarks as well MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The documentation I've seen doesn't actually specify which watermarks need the TLB miss w/a. Currently we only apply the w/a to the normal watermarks for both primary and cursor planes. Since the documentation doesn't explicitly say anything I'm going to assume that the w/a should equally apply to the SR/HPLL watermarks. So let's do that. Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170421181432.15216-12-ville.syrjala@linux.intel.com Reviewed-by: Maarten Lankhorst --- drivers/gpu/drm/i915/intel_pm.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index c07f3b2..61b6799 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -1006,7 +1006,7 @@ static bool g4x_compute_srwm(struct drm_i915_private *dev_priv, struct intel_crtc *crtc; const struct drm_display_mode *adjusted_mode; const struct drm_framebuffer *fb; - int hdisplay, htotal, cpp, clock; + int plane_width, cursor_width, htotal, cpp, clock; int small, large; int entries; @@ -1020,20 +1020,23 @@ static bool g4x_compute_srwm(struct drm_i915_private *dev_priv, fb = crtc->base.primary->state->fb; clock = adjusted_mode->crtc_clock; htotal = adjusted_mode->crtc_htotal; - hdisplay = crtc->config->pipe_src_w; + plane_width = crtc->config->pipe_src_w; + cursor_width = crtc->base.cursor->state->crtc_w; cpp = fb->format->cpp[0]; /* Use the minimum of the small and large buffer method for primary */ small = intel_wm_method1(clock, cpp, latency_ns / 100); - large = intel_wm_method2(clock, htotal, hdisplay, cpp, + large = intel_wm_method2(clock, htotal, plane_width, cpp, latency_ns / 100); - entries = DIV_ROUND_UP(min(small, large), display->cacheline_size); + entries = min(small, large); + entries += g4x_tlb_miss_wa(display->fifo_size, plane_width, cpp); + entries = DIV_ROUND_UP(entries, display->cacheline_size); *display_wm = entries + display->guard_size; /* calculate the self-refresh watermark for display cursor */ - entries = intel_wm_method2(clock, htotal, - crtc->base.cursor->state->crtc_w, 4, + entries = intel_wm_method2(clock, htotal, cursor_width, 4, latency_ns / 100); + entries += g4x_tlb_miss_wa(cursor->fifo_size, cursor_width, 4); entries = DIV_ROUND_UP(entries, cursor->cacheline_size); *cursor_wm = entries + cursor->guard_size; -- cgit v1.1 From 04548cbada77c662b2af149d742a1d93aa3bc568 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 21 Apr 2017 21:14:29 +0300 Subject: drm/i915: Two stage watermarks for g4x MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement proper two stage watermark programming for g4x. As with other pre-SKL platforms, the watermark registers aren't double buffered on g4x. Hence we must sequence the watermark update carefully around plane updates. The code is quite heavily modelled on the VLV/CHV code, with some fairly significant differences due to the different hardware architecture: * g4x doesn't use inverted watermark values * CxSR actually affects the watermarks since it controls memory self refresh in addition to the max FIFO mode * A further HPLL SR mode is possible with higher memory wakeup latency * g4x has FBC2 and so it also has FBC watermarks * max FIFO mode for primary plane only (cursor is allowed, sprite is not) * g4x has no manual FIFO repartitioning * some TLB miss related workarounds are needed for the watermarks Actually the hardware is quite similar to ILK+ in many ways. The most visible differences are in the actual watermakr register layout. ILK revamped that part quite heavily whereas g4x is still using the layout inherited from earlier platforms. Note that we didn't previously enable the HPLL SR on g4x. So in order to not introduce too many functional changes in this patch I've not actually enabled it here either, even though the code is now fully ready for it. We'll enable it separately later on. Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170421181432.15216-13-ville.syrjala@linux.intel.com Reviewed-by: Maarten Lankhorst --- drivers/gpu/drm/i915/i915_debugfs.c | 12 +- drivers/gpu/drm/i915/i915_drv.h | 12 + drivers/gpu/drm/i915/intel_display.c | 25 +- drivers/gpu/drm/i915/intel_drv.h | 28 ++ drivers/gpu/drm/i915/intel_pm.c | 940 +++++++++++++++++++++++++++-------- 5 files changed, 791 insertions(+), 226 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 34785fb..b302c7b 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -3892,6 +3892,8 @@ static void wm_latency_show(struct seq_file *m, const uint16_t wm[8]) num_levels = 3; else if (IS_VALLEYVIEW(dev_priv)) num_levels = 1; + else if (IS_G4X(dev_priv)) + num_levels = 3; else num_levels = ilk_wm_max_level(dev_priv) + 1; @@ -3904,8 +3906,10 @@ static void wm_latency_show(struct seq_file *m, const uint16_t wm[8]) * - WM1+ latency values in 0.5us units * - latencies are in us on gen9/vlv/chv */ - if (INTEL_GEN(dev_priv) >= 9 || IS_VALLEYVIEW(dev_priv) || - IS_CHERRYVIEW(dev_priv)) + if (INTEL_GEN(dev_priv) >= 9 || + IS_VALLEYVIEW(dev_priv) || + IS_CHERRYVIEW(dev_priv) || + IS_G4X(dev_priv)) latency *= 10; else if (level > 0) latency *= 5; @@ -3966,7 +3970,7 @@ static int pri_wm_latency_open(struct inode *inode, struct file *file) { struct drm_i915_private *dev_priv = inode->i_private; - if (INTEL_GEN(dev_priv) < 5) + if (INTEL_GEN(dev_priv) < 5 && !IS_G4X(dev_priv)) return -ENODEV; return single_open(file, pri_wm_latency_show, dev_priv); @@ -4008,6 +4012,8 @@ static ssize_t wm_latency_write(struct file *file, const char __user *ubuf, num_levels = 3; else if (IS_VALLEYVIEW(dev_priv)) num_levels = 1; + else if (IS_G4X(dev_priv)) + num_levels = 3; else num_levels = ilk_wm_max_level(dev_priv) + 1; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 7e37fb9..74dffbe 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1654,11 +1654,13 @@ struct ilk_wm_values { struct g4x_pipe_wm { uint16_t plane[I915_MAX_PLANES]; + uint16_t fbc; }; struct g4x_sr_wm { uint16_t plane; uint16_t cursor; + uint16_t fbc; }; struct vlv_wm_ddl_values { @@ -1673,6 +1675,15 @@ struct vlv_wm_values { bool cxsr; }; +struct g4x_wm_values { + struct g4x_pipe_wm pipe[2]; + struct g4x_sr_wm sr; + struct g4x_sr_wm hpll; + bool cxsr; + bool hpll_en; + bool fbc_en; +}; + struct skl_ddb_entry { uint16_t start, end; /* in number of blocks, 'end' is exclusive */ }; @@ -2301,6 +2312,7 @@ struct drm_i915_private { struct ilk_wm_values hw; struct skl_wm_values skl_hw; struct vlv_wm_values vlv; + struct g4x_wm_values g4x; }; uint8_t max_level; diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 19a7a1e..99f8dc8 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -5719,6 +5719,8 @@ static void i9xx_set_pll_dividers(struct intel_crtc *crtc) static void i9xx_crtc_enable(struct intel_crtc_state *pipe_config, struct drm_atomic_state *old_state) { + struct intel_atomic_state *old_intel_state = + to_intel_atomic_state(old_state); struct drm_crtc *crtc = pipe_config->base.crtc; struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = to_i915(dev); @@ -5751,7 +5753,11 @@ static void i9xx_crtc_enable(struct intel_crtc_state *pipe_config, intel_color_load_luts(&pipe_config->base); - intel_update_watermarks(intel_crtc); + if (dev_priv->display.initial_watermarks != NULL) + dev_priv->display.initial_watermarks(old_intel_state, + intel_crtc->config); + else + intel_update_watermarks(intel_crtc); intel_enable_pipe(intel_crtc); assert_vblank_disabled(crtc); @@ -10852,21 +10858,21 @@ int intel_plane_atomic_calc_changes(struct drm_crtc_state *crtc_state, turn_off, turn_on, mode_changed); if (turn_on) { - if (INTEL_GEN(dev_priv) < 5) + if (INTEL_GEN(dev_priv) < 5 && !IS_G4X(dev_priv)) pipe_config->update_wm_pre = true; /* must disable cxsr around plane enable/disable */ if (plane->id != PLANE_CURSOR) pipe_config->disable_cxsr = true; } else if (turn_off) { - if (INTEL_GEN(dev_priv) < 5) + if (INTEL_GEN(dev_priv) < 5 && !IS_G4X(dev_priv)) pipe_config->update_wm_post = true; /* must disable cxsr around plane enable/disable */ if (plane->id != PLANE_CURSOR) pipe_config->disable_cxsr = true; } else if (intel_wm_need_update(&plane->base, plane_state)) { - if (INTEL_GEN(dev_priv) < 5) { + if (INTEL_GEN(dev_priv) < 5 && !IS_G4X(dev_priv)) { /* FIXME bollocks */ pipe_config->update_wm_pre = true; pipe_config->update_wm_post = true; @@ -11290,7 +11296,8 @@ clear_intel_crtc_state(struct intel_crtc_state *crtc_state) shared_dpll = crtc_state->shared_dpll; dpll_hw_state = crtc_state->dpll_hw_state; force_thru = crtc_state->pch_pfit.force_thru; - if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + if (IS_G4X(dev_priv) || + IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) wm_state = crtc_state->wm; /* Keep base drm_crtc_state intact, only clear our extended struct */ @@ -11302,7 +11309,8 @@ clear_intel_crtc_state(struct intel_crtc_state *crtc_state) crtc_state->shared_dpll = shared_dpll; crtc_state->dpll_hw_state = dpll_hw_state; crtc_state->pch_pfit.force_thru = force_thru; - if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + if (IS_G4X(dev_priv) || + IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) crtc_state->wm = wm_state; } @@ -15527,7 +15535,10 @@ intel_modeset_setup_hw_state(struct drm_device *dev) pll->on = false; } - if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { + if (IS_G4X(dev_priv)) { + g4x_wm_get_hw_state(dev); + g4x_wm_sanitize(dev_priv); + } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { vlv_wm_get_hw_state(dev); vlv_wm_sanitize(dev_priv); } else if (IS_GEN9(dev_priv)) { diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index d1cdd10..f530df7 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -522,6 +522,22 @@ struct vlv_fifo_state { u16 plane[I915_MAX_PLANES]; }; +enum g4x_wm_level { + G4X_WM_LEVEL_NORMAL, + G4X_WM_LEVEL_SR, + G4X_WM_LEVEL_HPLL, + NUM_G4X_WM_LEVELS, +}; + +struct g4x_wm_state { + struct g4x_pipe_wm wm; + struct g4x_sr_wm sr; + struct g4x_sr_wm hpll; + bool cxsr; + bool hpll_en; + bool fbc_en; +}; + struct intel_crtc_wm_state { union { struct { @@ -557,6 +573,15 @@ struct intel_crtc_wm_state { /* display FIFO split */ struct vlv_fifo_state fifo_state; } vlv; + + struct { + /* "raw" watermarks */ + struct g4x_pipe_wm raw[NUM_G4X_WM_LEVELS]; + /* intermediate watermarks */ + struct g4x_wm_state intermediate; + /* optimal watermarks */ + struct g4x_wm_state optimal; + } g4x; }; /* @@ -794,6 +819,7 @@ struct intel_crtc { union { struct intel_pipe_wm ilk; struct vlv_wm_state vlv; + struct g4x_wm_state g4x; } active; } wm; @@ -1841,6 +1867,7 @@ void gen6_rps_boost(struct drm_i915_private *dev_priv, struct intel_rps_client *rps, unsigned long submitted); void intel_queue_rps_boost_for_request(struct drm_i915_gem_request *req); +void g4x_wm_get_hw_state(struct drm_device *dev); void vlv_wm_get_hw_state(struct drm_device *dev); void ilk_wm_get_hw_state(struct drm_device *dev); void skl_wm_get_hw_state(struct drm_device *dev); @@ -1848,6 +1875,7 @@ void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv, struct skl_ddb_allocation *ddb /* out */); void skl_pipe_wm_get_hw_state(struct drm_crtc *crtc, struct skl_pipe_wm *out); +void g4x_wm_sanitize(struct drm_i915_private *dev_priv); void vlv_wm_sanitize(struct drm_i915_private *dev_priv); bool intel_can_enable_sagv(struct drm_atomic_state *state); int intel_enable_sagv(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 61b6799..9b0a6a4 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -429,7 +429,10 @@ bool intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable) mutex_lock(&dev_priv->wm.wm_mutex); ret = _intel_set_memory_cxsr(dev_priv, enable); - dev_priv->wm.vlv.cxsr = enable; + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + dev_priv->wm.vlv.cxsr = enable; + else if (IS_G4X(dev_priv)) + dev_priv->wm.g4x.cxsr = enable; mutex_unlock(&dev_priv->wm.wm_mutex); return ret; @@ -568,20 +571,6 @@ static const struct intel_watermark_params pineview_cursor_hplloff_wm = { .guard_size = PINEVIEW_CURSOR_GUARD_WM, .cacheline_size = PINEVIEW_FIFO_LINE_SIZE, }; -static const struct intel_watermark_params g4x_wm_info = { - .fifo_size = G4X_FIFO_SIZE, - .max_wm = G4X_MAX_WM, - .default_wm = G4X_MAX_WM, - .guard_size = 2, - .cacheline_size = G4X_FIFO_LINE_SIZE, -}; -static const struct intel_watermark_params g4x_cursor_wm_info = { - .fifo_size = I965_CURSOR_FIFO, - .max_wm = I965_CURSOR_MAX_WM, - .default_wm = I965_CURSOR_DFT_WM, - .guard_size = 2, - .cacheline_size = G4X_FIFO_LINE_SIZE, -}; static const struct intel_watermark_params i965_cursor_wm_info = { .fifo_size = I965_CURSOR_FIFO, .max_wm = I965_CURSOR_MAX_WM, @@ -780,6 +769,16 @@ static unsigned int intel_calculate_wm(int pixel_rate, return wm_size; } +static bool is_disabling(int old, int new, int threshold) +{ + return old >= threshold && new < threshold; +} + +static bool is_enabling(int old, int new, int threshold) +{ + return old < threshold && new >= threshold; +} + static int intel_wm_num_levels(struct drm_i915_private *dev_priv) { return dev_priv->wm.max_level + 1; @@ -911,138 +910,28 @@ static int g4x_tlb_miss_wa(int fifo_size, int width, int cpp) return max(0, tlb_miss); } -static bool g4x_compute_wm0(struct drm_i915_private *dev_priv, - int plane, - const struct intel_watermark_params *display, - int display_latency_ns, - const struct intel_watermark_params *cursor, - int cursor_latency_ns, - int *plane_wm, - int *cursor_wm) -{ - struct intel_crtc *crtc; - const struct drm_display_mode *adjusted_mode; - const struct drm_framebuffer *fb; - int htotal, plane_width, cursor_width, clock, cpp; - int entries; - - crtc = intel_get_crtc_for_plane(dev_priv, plane); - if (!intel_crtc_active(crtc)) { - *cursor_wm = cursor->guard_size; - *plane_wm = display->guard_size; - return false; - } - - adjusted_mode = &crtc->config->base.adjusted_mode; - fb = crtc->base.primary->state->fb; - clock = adjusted_mode->crtc_clock; - htotal = adjusted_mode->crtc_htotal; - plane_width = crtc->config->pipe_src_w; - cursor_width = crtc->base.cursor->state->crtc_w; - cpp = fb->format->cpp[0]; - - /* Use the small buffer method to calculate plane watermark */ - entries = intel_wm_method1(clock, cpp, display_latency_ns / 100); - entries += g4x_tlb_miss_wa(display->fifo_size, plane_width, cpp); - entries = DIV_ROUND_UP(entries, display->cacheline_size); - *plane_wm = entries + display->guard_size; - if (*plane_wm > (int)display->max_wm) - *plane_wm = display->max_wm; - - /* Use the large buffer method to calculate cursor watermark */ - entries = intel_wm_method2(clock, htotal, cursor_width, 4, - cursor_latency_ns / 100); - entries += g4x_tlb_miss_wa(cursor->fifo_size, cursor_width, 4); - entries = DIV_ROUND_UP(entries, cursor->cacheline_size); - *cursor_wm = entries + cursor->guard_size; - if (*cursor_wm > (int)cursor->max_wm) - *cursor_wm = (int)cursor->max_wm; - - return true; -} - -/* - * Check the wm result. - * - * If any calculated watermark values is larger than the maximum value that - * can be programmed into the associated watermark register, that watermark - * must be disabled. - */ -static bool g4x_check_srwm(struct drm_i915_private *dev_priv, - int display_wm, int cursor_wm, - const struct intel_watermark_params *display, - const struct intel_watermark_params *cursor) -{ - DRM_DEBUG_KMS("SR watermark: display plane %d, cursor %d\n", - display_wm, cursor_wm); - - if (display_wm > display->max_wm) { - DRM_DEBUG_KMS("display watermark is too large(%d/%u), disabling\n", - display_wm, display->max_wm); - return false; - } - - if (cursor_wm > cursor->max_wm) { - DRM_DEBUG_KMS("cursor watermark is too large(%d/%u), disabling\n", - cursor_wm, cursor->max_wm); - return false; - } - - if (!(display_wm || cursor_wm)) { - DRM_DEBUG_KMS("SR latency is 0, disabling\n"); - return false; - } - - return true; -} - -static bool g4x_compute_srwm(struct drm_i915_private *dev_priv, - int plane, - int latency_ns, - const struct intel_watermark_params *display, - const struct intel_watermark_params *cursor, - int *display_wm, int *cursor_wm) +static void g4x_write_wm_values(struct drm_i915_private *dev_priv, + const struct g4x_wm_values *wm) { - struct intel_crtc *crtc; - const struct drm_display_mode *adjusted_mode; - const struct drm_framebuffer *fb; - int plane_width, cursor_width, htotal, cpp, clock; - int small, large; - int entries; - - if (!latency_ns) { - *display_wm = *cursor_wm = 0; - return false; - } - - crtc = intel_get_crtc_for_plane(dev_priv, plane); - adjusted_mode = &crtc->config->base.adjusted_mode; - fb = crtc->base.primary->state->fb; - clock = adjusted_mode->crtc_clock; - htotal = adjusted_mode->crtc_htotal; - plane_width = crtc->config->pipe_src_w; - cursor_width = crtc->base.cursor->state->crtc_w; - cpp = fb->format->cpp[0]; - - /* Use the minimum of the small and large buffer method for primary */ - small = intel_wm_method1(clock, cpp, latency_ns / 100); - large = intel_wm_method2(clock, htotal, plane_width, cpp, - latency_ns / 100); - entries = min(small, large); - entries += g4x_tlb_miss_wa(display->fifo_size, plane_width, cpp); - entries = DIV_ROUND_UP(entries, display->cacheline_size); - *display_wm = entries + display->guard_size; - - /* calculate the self-refresh watermark for display cursor */ - entries = intel_wm_method2(clock, htotal, cursor_width, 4, - latency_ns / 100); - entries += g4x_tlb_miss_wa(cursor->fifo_size, cursor_width, 4); - entries = DIV_ROUND_UP(entries, cursor->cacheline_size); - *cursor_wm = entries + cursor->guard_size; + I915_WRITE(DSPFW1, + FW_WM(wm->sr.plane, SR) | + FW_WM(wm->pipe[PIPE_B].plane[PLANE_CURSOR], CURSORB) | + FW_WM(wm->pipe[PIPE_B].plane[PLANE_PRIMARY], PLANEB) | + FW_WM(wm->pipe[PIPE_A].plane[PLANE_PRIMARY], PLANEA)); + I915_WRITE(DSPFW2, + (wm->fbc_en ? DSPFW_FBC_SR_EN : 0) | + FW_WM(wm->sr.fbc, FBC_SR) | + FW_WM(wm->hpll.fbc, FBC_HPLL_SR) | + FW_WM(wm->pipe[PIPE_B].plane[PLANE_SPRITE0], SPRITEB) | + FW_WM(wm->pipe[PIPE_A].plane[PLANE_CURSOR], CURSORA) | + FW_WM(wm->pipe[PIPE_A].plane[PLANE_SPRITE0], SPRITEA)); + I915_WRITE(DSPFW3, + (wm->hpll_en ? DSPFW_HPLL_SR_EN : 0) | + FW_WM(wm->sr.cursor, CURSOR_SR) | + FW_WM(wm->hpll.cursor, HPLL_CURSOR) | + FW_WM(wm->hpll.plane, HPLL_SR)); - return g4x_check_srwm(dev_priv, - *display_wm, *cursor_wm, - display, cursor); + POSTING_READ(DSPFW1); } #define FW_WM_VLV(value, plane) \ @@ -1126,6 +1015,523 @@ static void vlv_write_wm_values(struct drm_i915_private *dev_priv, #undef FW_WM_VLV +static void g4x_setup_wm_latency(struct drm_i915_private *dev_priv) +{ + /* all latencies in usec */ + dev_priv->wm.pri_latency[G4X_WM_LEVEL_NORMAL] = 5; + dev_priv->wm.pri_latency[G4X_WM_LEVEL_SR] = 12; + + dev_priv->wm.max_level = G4X_WM_LEVEL_SR; +} + +static int g4x_plane_fifo_size(enum plane_id plane_id, int level) +{ + /* + * DSPCNTR[13] supposedly controls whether the + * primary plane can use the FIFO space otherwise + * reserved for the sprite plane. It's not 100% clear + * what the actual FIFO size is, but it looks like we + * can happily set both primary and sprite watermarks + * up to 127 cachelines. So that would seem to mean + * that either DSPCNTR[13] doesn't do anything, or that + * the total FIFO is >= 256 cachelines in size. Either + * way, we don't seem to have to worry about this + * repartitioning as the maximum watermark value the + * register can hold for each plane is lower than the + * minimum FIFO size. + */ + switch (plane_id) { + case PLANE_CURSOR: + return 63; + case PLANE_PRIMARY: + return level == G4X_WM_LEVEL_NORMAL ? 127 : 511; + case PLANE_SPRITE0: + return level == G4X_WM_LEVEL_NORMAL ? 127 : 0; + default: + MISSING_CASE(plane_id); + return 0; + } +} + +static int g4x_fbc_fifo_size(int level) +{ + switch (level) { + case G4X_WM_LEVEL_SR: + return 7; + case G4X_WM_LEVEL_HPLL: + return 15; + default: + MISSING_CASE(level); + return 0; + } +} + +static uint16_t g4x_compute_wm(const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state, + int level) +{ + struct intel_plane *plane = to_intel_plane(plane_state->base.plane); + struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + const struct drm_display_mode *adjusted_mode = + &crtc_state->base.adjusted_mode; + int clock, htotal, cpp, width, wm; + int latency = dev_priv->wm.pri_latency[level] * 10; + + if (latency == 0) + return USHRT_MAX; + + if (!intel_wm_plane_visible(crtc_state, plane_state)) + return 0; + + /* + * Not 100% sure which way ELK should go here as the + * spec only says CL/CTG should assume 32bpp and BW + * doesn't need to. But as these things followed the + * mobile vs. desktop lines on gen3 as well, let's + * assume ELK doesn't need this. + * + * The spec also fails to list such a restriction for + * the HPLL watermark, which seems a little strange. + * Let's use 32bpp for the HPLL watermark as well. + */ + if (IS_GM45(dev_priv) && plane->id == PLANE_PRIMARY && + level != G4X_WM_LEVEL_NORMAL) + cpp = 4; + else + cpp = plane_state->base.fb->format->cpp[0]; + + clock = adjusted_mode->crtc_clock; + htotal = adjusted_mode->crtc_htotal; + + if (plane->id == PLANE_CURSOR) + width = plane_state->base.crtc_w; + else + width = drm_rect_width(&plane_state->base.dst); + + if (plane->id == PLANE_CURSOR) { + wm = intel_wm_method2(clock, htotal, width, cpp, latency); + } else if (plane->id == PLANE_PRIMARY && + level == G4X_WM_LEVEL_NORMAL) { + wm = intel_wm_method1(clock, cpp, latency); + } else { + int small, large; + + small = intel_wm_method1(clock, cpp, latency); + large = intel_wm_method2(clock, htotal, width, cpp, latency); + + wm = min(small, large); + } + + wm += g4x_tlb_miss_wa(g4x_plane_fifo_size(plane->id, level), + width, cpp); + + wm = DIV_ROUND_UP(wm, 64) + 2; + + return min_t(int, wm, USHRT_MAX); +} + +static bool g4x_raw_plane_wm_set(struct intel_crtc_state *crtc_state, + int level, enum plane_id plane_id, u16 value) +{ + struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + bool dirty = false; + + for (; level < intel_wm_num_levels(dev_priv); level++) { + struct g4x_pipe_wm *raw = &crtc_state->wm.g4x.raw[level]; + + dirty |= raw->plane[plane_id] != value; + raw->plane[plane_id] = value; + } + + return dirty; +} + +static bool g4x_raw_fbc_wm_set(struct intel_crtc_state *crtc_state, + int level, u16 value) +{ + struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + bool dirty = false; + + /* NORMAL level doesn't have an FBC watermark */ + level = max(level, G4X_WM_LEVEL_SR); + + for (; level < intel_wm_num_levels(dev_priv); level++) { + struct g4x_pipe_wm *raw = &crtc_state->wm.g4x.raw[level]; + + dirty |= raw->fbc != value; + raw->fbc = value; + } + + return dirty; +} + +static uint32_t ilk_compute_fbc_wm(const struct intel_crtc_state *cstate, + const struct intel_plane_state *pstate, + uint32_t pri_val); + +static bool g4x_raw_plane_wm_compute(struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state) +{ + struct intel_plane *plane = to_intel_plane(plane_state->base.plane); + int num_levels = intel_wm_num_levels(to_i915(plane->base.dev)); + enum plane_id plane_id = plane->id; + bool dirty = false; + int level; + + if (!intel_wm_plane_visible(crtc_state, plane_state)) { + dirty |= g4x_raw_plane_wm_set(crtc_state, 0, plane_id, 0); + if (plane_id == PLANE_PRIMARY) + dirty |= g4x_raw_fbc_wm_set(crtc_state, 0, 0); + goto out; + } + + for (level = 0; level < num_levels; level++) { + struct g4x_pipe_wm *raw = &crtc_state->wm.g4x.raw[level]; + int wm, max_wm; + + wm = g4x_compute_wm(crtc_state, plane_state, level); + max_wm = g4x_plane_fifo_size(plane_id, level); + + if (wm > max_wm) + break; + + dirty |= raw->plane[plane_id] != wm; + raw->plane[plane_id] = wm; + + if (plane_id != PLANE_PRIMARY || + level == G4X_WM_LEVEL_NORMAL) + continue; + + wm = ilk_compute_fbc_wm(crtc_state, plane_state, + raw->plane[plane_id]); + max_wm = g4x_fbc_fifo_size(level); + + /* + * FBC wm is not mandatory as we + * can always just disable its use. + */ + if (wm > max_wm) + wm = USHRT_MAX; + + dirty |= raw->fbc != wm; + raw->fbc = wm; + } + + /* mark watermarks as invalid */ + dirty |= g4x_raw_plane_wm_set(crtc_state, level, plane_id, USHRT_MAX); + + if (plane_id == PLANE_PRIMARY) + dirty |= g4x_raw_fbc_wm_set(crtc_state, level, USHRT_MAX); + + out: + if (dirty) { + DRM_DEBUG_KMS("%s watermarks: normal=%d, SR=%d, HPLL=%d\n", + plane->base.name, + crtc_state->wm.g4x.raw[G4X_WM_LEVEL_NORMAL].plane[plane_id], + crtc_state->wm.g4x.raw[G4X_WM_LEVEL_SR].plane[plane_id], + crtc_state->wm.g4x.raw[G4X_WM_LEVEL_HPLL].plane[plane_id]); + + if (plane_id == PLANE_PRIMARY) + DRM_DEBUG_KMS("FBC watermarks: SR=%d, HPLL=%d\n", + crtc_state->wm.g4x.raw[G4X_WM_LEVEL_SR].fbc, + crtc_state->wm.g4x.raw[G4X_WM_LEVEL_HPLL].fbc); + } + + return dirty; +} + +static bool g4x_raw_plane_wm_is_valid(const struct intel_crtc_state *crtc_state, + enum plane_id plane_id, int level) +{ + const struct g4x_pipe_wm *raw = &crtc_state->wm.g4x.raw[level]; + + return raw->plane[plane_id] <= g4x_plane_fifo_size(plane_id, level); +} + +static bool g4x_raw_crtc_wm_is_valid(const struct intel_crtc_state *crtc_state, + int level) +{ + struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + + if (level > dev_priv->wm.max_level) + return false; + + return g4x_raw_plane_wm_is_valid(crtc_state, PLANE_PRIMARY, level) && + g4x_raw_plane_wm_is_valid(crtc_state, PLANE_SPRITE0, level) && + g4x_raw_plane_wm_is_valid(crtc_state, PLANE_CURSOR, level); +} + +/* mark all levels starting from 'level' as invalid */ +static void g4x_invalidate_wms(struct intel_crtc *crtc, + struct g4x_wm_state *wm_state, int level) +{ + if (level <= G4X_WM_LEVEL_NORMAL) { + enum plane_id plane_id; + + for_each_plane_id_on_crtc(crtc, plane_id) + wm_state->wm.plane[plane_id] = USHRT_MAX; + } + + if (level <= G4X_WM_LEVEL_SR) { + wm_state->cxsr = false; + wm_state->sr.cursor = USHRT_MAX; + wm_state->sr.plane = USHRT_MAX; + wm_state->sr.fbc = USHRT_MAX; + } + + if (level <= G4X_WM_LEVEL_HPLL) { + wm_state->hpll_en = false; + wm_state->hpll.cursor = USHRT_MAX; + wm_state->hpll.plane = USHRT_MAX; + wm_state->hpll.fbc = USHRT_MAX; + } +} + +static int g4x_compute_pipe_wm(struct intel_crtc_state *crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_atomic_state *state = + to_intel_atomic_state(crtc_state->base.state); + struct g4x_wm_state *wm_state = &crtc_state->wm.g4x.optimal; + int num_active_planes = hweight32(crtc_state->active_planes & + ~BIT(PLANE_CURSOR)); + const struct g4x_pipe_wm *raw; + struct intel_plane_state *plane_state; + struct intel_plane *plane; + enum plane_id plane_id; + int i, level; + unsigned int dirty = 0; + + for_each_intel_plane_in_state(state, plane, plane_state, i) { + const struct intel_plane_state *old_plane_state = + to_intel_plane_state(plane->base.state); + + if (plane_state->base.crtc != &crtc->base && + old_plane_state->base.crtc != &crtc->base) + continue; + + if (g4x_raw_plane_wm_compute(crtc_state, plane_state)) + dirty |= BIT(plane->id); + } + + if (!dirty) + return 0; + + level = G4X_WM_LEVEL_NORMAL; + if (!g4x_raw_crtc_wm_is_valid(crtc_state, level)) + goto out; + + raw = &crtc_state->wm.g4x.raw[level]; + for_each_plane_id_on_crtc(crtc, plane_id) + wm_state->wm.plane[plane_id] = raw->plane[plane_id]; + + level = G4X_WM_LEVEL_SR; + + if (!g4x_raw_crtc_wm_is_valid(crtc_state, level)) + goto out; + + raw = &crtc_state->wm.g4x.raw[level]; + wm_state->sr.plane = raw->plane[PLANE_PRIMARY]; + wm_state->sr.cursor = raw->plane[PLANE_CURSOR]; + wm_state->sr.fbc = raw->fbc; + + wm_state->cxsr = num_active_planes == BIT(PLANE_PRIMARY); + + level = G4X_WM_LEVEL_HPLL; + + if (!g4x_raw_crtc_wm_is_valid(crtc_state, level)) + goto out; + + raw = &crtc_state->wm.g4x.raw[level]; + wm_state->hpll.plane = raw->plane[PLANE_PRIMARY]; + wm_state->hpll.cursor = raw->plane[PLANE_CURSOR]; + wm_state->hpll.fbc = raw->fbc; + + wm_state->hpll_en = wm_state->cxsr; + + level++; + + out: + if (level == G4X_WM_LEVEL_NORMAL) + return -EINVAL; + + /* invalidate the higher levels */ + g4x_invalidate_wms(crtc, wm_state, level); + + /* + * Determine if the FBC watermark(s) can be used. IF + * this isn't the case we prefer to disable the FBC + ( watermark(s) rather than disable the SR/HPLL + * level(s) entirely. + */ + wm_state->fbc_en = level > G4X_WM_LEVEL_NORMAL; + + if (level >= G4X_WM_LEVEL_SR && + wm_state->sr.fbc > g4x_fbc_fifo_size(G4X_WM_LEVEL_SR)) + wm_state->fbc_en = false; + else if (level >= G4X_WM_LEVEL_HPLL && + wm_state->hpll.fbc > g4x_fbc_fifo_size(G4X_WM_LEVEL_HPLL)) + wm_state->fbc_en = false; + + return 0; +} + +static int g4x_compute_intermediate_wm(struct drm_device *dev, + struct intel_crtc *crtc, + struct intel_crtc_state *crtc_state) +{ + struct g4x_wm_state *intermediate = &crtc_state->wm.g4x.intermediate; + const struct g4x_wm_state *optimal = &crtc_state->wm.g4x.optimal; + const struct g4x_wm_state *active = &crtc->wm.active.g4x; + enum plane_id plane_id; + + intermediate->cxsr = optimal->cxsr && active->cxsr && + !crtc_state->disable_cxsr; + intermediate->hpll_en = optimal->hpll_en && active->hpll_en && + !crtc_state->disable_cxsr; + intermediate->fbc_en = optimal->fbc_en && active->fbc_en; + + for_each_plane_id_on_crtc(crtc, plane_id) { + intermediate->wm.plane[plane_id] = + max(optimal->wm.plane[plane_id], + active->wm.plane[plane_id]); + + WARN_ON(intermediate->wm.plane[plane_id] > + g4x_plane_fifo_size(plane_id, G4X_WM_LEVEL_NORMAL)); + } + + intermediate->sr.plane = max(optimal->sr.plane, + active->sr.plane); + intermediate->sr.cursor = max(optimal->sr.cursor, + active->sr.cursor); + intermediate->sr.fbc = max(optimal->sr.fbc, + active->sr.fbc); + + intermediate->hpll.plane = max(optimal->hpll.plane, + active->hpll.plane); + intermediate->hpll.cursor = max(optimal->hpll.cursor, + active->hpll.cursor); + intermediate->hpll.fbc = max(optimal->hpll.fbc, + active->hpll.fbc); + + WARN_ON((intermediate->sr.plane > + g4x_plane_fifo_size(PLANE_PRIMARY, G4X_WM_LEVEL_SR) || + intermediate->sr.cursor > + g4x_plane_fifo_size(PLANE_CURSOR, G4X_WM_LEVEL_SR)) && + intermediate->cxsr); + WARN_ON((intermediate->sr.plane > + g4x_plane_fifo_size(PLANE_PRIMARY, G4X_WM_LEVEL_HPLL) || + intermediate->sr.cursor > + g4x_plane_fifo_size(PLANE_CURSOR, G4X_WM_LEVEL_HPLL)) && + intermediate->hpll_en); + + WARN_ON(intermediate->sr.fbc > g4x_fbc_fifo_size(1) && + intermediate->fbc_en && intermediate->cxsr); + WARN_ON(intermediate->hpll.fbc > g4x_fbc_fifo_size(2) && + intermediate->fbc_en && intermediate->hpll_en); + + /* + * If our intermediate WM are identical to the final WM, then we can + * omit the post-vblank programming; only update if it's different. + */ + if (memcmp(intermediate, optimal, sizeof(*intermediate)) != 0) + crtc_state->wm.need_postvbl_update = true; + + return 0; +} + +static void g4x_merge_wm(struct drm_i915_private *dev_priv, + struct g4x_wm_values *wm) +{ + struct intel_crtc *crtc; + int num_active_crtcs = 0; + + wm->cxsr = true; + wm->hpll_en = true; + wm->fbc_en = true; + + for_each_intel_crtc(&dev_priv->drm, crtc) { + const struct g4x_wm_state *wm_state = &crtc->wm.active.g4x; + + if (!crtc->active) + continue; + + if (!wm_state->cxsr) + wm->cxsr = false; + if (!wm_state->hpll_en) + wm->hpll_en = false; + if (!wm_state->fbc_en) + wm->fbc_en = false; + + num_active_crtcs++; + } + + if (num_active_crtcs != 1) { + wm->cxsr = false; + wm->hpll_en = false; + wm->fbc_en = false; + } + + for_each_intel_crtc(&dev_priv->drm, crtc) { + const struct g4x_wm_state *wm_state = &crtc->wm.active.g4x; + enum pipe pipe = crtc->pipe; + + wm->pipe[pipe] = wm_state->wm; + if (crtc->active && wm->cxsr) + wm->sr = wm_state->sr; + if (crtc->active && wm->hpll_en) + wm->hpll = wm_state->hpll; + } +} + +static void g4x_program_watermarks(struct drm_i915_private *dev_priv) +{ + struct g4x_wm_values *old_wm = &dev_priv->wm.g4x; + struct g4x_wm_values new_wm = {}; + + g4x_merge_wm(dev_priv, &new_wm); + + if (memcmp(old_wm, &new_wm, sizeof(new_wm)) == 0) + return; + + if (is_disabling(old_wm->cxsr, new_wm.cxsr, true)) + _intel_set_memory_cxsr(dev_priv, false); + + g4x_write_wm_values(dev_priv, &new_wm); + + if (is_enabling(old_wm->cxsr, new_wm.cxsr, true)) + _intel_set_memory_cxsr(dev_priv, true); + + *old_wm = new_wm; +} + +static void g4x_initial_watermarks(struct intel_atomic_state *state, + struct intel_crtc_state *crtc_state) +{ + struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + + mutex_lock(&dev_priv->wm.wm_mutex); + crtc->wm.active.g4x = crtc_state->wm.g4x.intermediate; + g4x_program_watermarks(dev_priv); + mutex_unlock(&dev_priv->wm.wm_mutex); +} + +static void g4x_optimize_watermarks(struct intel_atomic_state *state, + struct intel_crtc_state *crtc_state) +{ + struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc); + + if (!crtc_state->wm.need_postvbl_update) + return; + + mutex_lock(&dev_priv->wm.wm_mutex); + intel_crtc->wm.active.g4x = crtc_state->wm.g4x.optimal; + g4x_program_watermarks(dev_priv); + mutex_unlock(&dev_priv->wm.wm_mutex); +} + /* latency must be in 0.1us units. */ static unsigned int vlv_wm_method2(unsigned int pixel_rate, unsigned int htotal, @@ -1673,16 +2079,6 @@ static void vlv_merge_wm(struct drm_i915_private *dev_priv, } } -static bool is_disabling(int old, int new, int threshold) -{ - return old >= threshold && new < threshold; -} - -static bool is_enabling(int old, int new, int threshold) -{ - return old < threshold && new >= threshold; -} - static void vlv_program_watermarks(struct drm_i915_private *dev_priv) { struct vlv_wm_values *old_wm = &dev_priv->wm.vlv; @@ -1743,65 +2139,6 @@ static void vlv_optimize_watermarks(struct intel_atomic_state *state, mutex_unlock(&dev_priv->wm.wm_mutex); } -#define single_plane_enabled(mask) is_power_of_2(mask) - -static void g4x_update_wm(struct intel_crtc *crtc) -{ - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - static const int sr_latency_ns = 12000; - int planea_wm, planeb_wm, cursora_wm, cursorb_wm; - int plane_sr, cursor_sr; - unsigned int enabled = 0; - bool cxsr_enabled; - - if (g4x_compute_wm0(dev_priv, PIPE_A, - &g4x_wm_info, pessimal_latency_ns, - &g4x_cursor_wm_info, pessimal_latency_ns, - &planea_wm, &cursora_wm)) - enabled |= 1 << PIPE_A; - - if (g4x_compute_wm0(dev_priv, PIPE_B, - &g4x_wm_info, pessimal_latency_ns, - &g4x_cursor_wm_info, pessimal_latency_ns, - &planeb_wm, &cursorb_wm)) - enabled |= 1 << PIPE_B; - - if (single_plane_enabled(enabled) && - g4x_compute_srwm(dev_priv, ffs(enabled) - 1, - sr_latency_ns, - &g4x_wm_info, - &g4x_cursor_wm_info, - &plane_sr, &cursor_sr)) { - cxsr_enabled = true; - } else { - cxsr_enabled = false; - intel_set_memory_cxsr(dev_priv, false); - plane_sr = cursor_sr = 0; - } - - DRM_DEBUG_KMS("Setting FIFO watermarks - A: plane=%d, cursor=%d, " - "B: plane=%d, cursor=%d, SR: plane=%d, cursor=%d\n", - planea_wm, cursora_wm, - planeb_wm, cursorb_wm, - plane_sr, cursor_sr); - - I915_WRITE(DSPFW1, - FW_WM(plane_sr, SR) | - FW_WM(cursorb_wm, CURSORB) | - FW_WM(planeb_wm, PLANEB) | - FW_WM(planea_wm, PLANEA)); - I915_WRITE(DSPFW2, - (I915_READ(DSPFW2) & ~DSPFW_CURSORA_MASK) | - FW_WM(cursora_wm, CURSORA)); - /* HPLL off in SR has some issues on G4x... disable it */ - I915_WRITE(DSPFW3, - (I915_READ(DSPFW3) & ~(DSPFW_HPLL_SR_EN | DSPFW_CURSOR_SR_MASK)) | - FW_WM(cursor_sr, CURSOR_SR)); - - if (cxsr_enabled) - intel_set_memory_cxsr(dev_priv, true); -} - static void i965_update_wm(struct intel_crtc *unused_crtc) { struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev); @@ -4778,6 +5115,32 @@ static void ilk_pipe_wm_get_hw_state(struct drm_crtc *crtc) #define _FW_WM_VLV(value, plane) \ (((value) & DSPFW_ ## plane ## _MASK_VLV) >> DSPFW_ ## plane ## _SHIFT) +static void g4x_read_wm_values(struct drm_i915_private *dev_priv, + struct g4x_wm_values *wm) +{ + uint32_t tmp; + + tmp = I915_READ(DSPFW1); + wm->sr.plane = _FW_WM(tmp, SR); + wm->pipe[PIPE_B].plane[PLANE_CURSOR] = _FW_WM(tmp, CURSORB); + wm->pipe[PIPE_B].plane[PLANE_PRIMARY] = _FW_WM(tmp, PLANEB); + wm->pipe[PIPE_A].plane[PLANE_PRIMARY] = _FW_WM(tmp, PLANEA); + + tmp = I915_READ(DSPFW2); + wm->fbc_en = tmp & DSPFW_FBC_SR_EN; + wm->sr.fbc = _FW_WM(tmp, FBC_SR); + wm->hpll.fbc = _FW_WM(tmp, FBC_HPLL_SR); + wm->pipe[PIPE_B].plane[PLANE_SPRITE0] = _FW_WM(tmp, SPRITEB); + wm->pipe[PIPE_A].plane[PLANE_CURSOR] = _FW_WM(tmp, CURSORA); + wm->pipe[PIPE_A].plane[PLANE_SPRITE0] = _FW_WM(tmp, SPRITEA); + + tmp = I915_READ(DSPFW3); + wm->hpll_en = tmp & DSPFW_HPLL_SR_EN; + wm->sr.cursor = _FW_WM(tmp, CURSOR_SR); + wm->hpll.cursor = _FW_WM(tmp, HPLL_CURSOR); + wm->hpll.plane = _FW_WM(tmp, HPLL_SR); +} + static void vlv_read_wm_values(struct drm_i915_private *dev_priv, struct vlv_wm_values *wm) { @@ -4854,6 +5217,147 @@ static void vlv_read_wm_values(struct drm_i915_private *dev_priv, #undef _FW_WM #undef _FW_WM_VLV +void g4x_wm_get_hw_state(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = to_i915(dev); + struct g4x_wm_values *wm = &dev_priv->wm.g4x; + struct intel_crtc *crtc; + + g4x_read_wm_values(dev_priv, wm); + + wm->cxsr = I915_READ(FW_BLC_SELF) & FW_BLC_SELF_EN; + + for_each_intel_crtc(dev, crtc) { + struct intel_crtc_state *crtc_state = + to_intel_crtc_state(crtc->base.state); + struct g4x_wm_state *active = &crtc->wm.active.g4x; + struct g4x_pipe_wm *raw; + enum pipe pipe = crtc->pipe; + enum plane_id plane_id; + int level, max_level; + + active->cxsr = wm->cxsr; + active->hpll_en = wm->hpll_en; + active->fbc_en = wm->fbc_en; + + active->sr = wm->sr; + active->hpll = wm->hpll; + + for_each_plane_id_on_crtc(crtc, plane_id) { + active->wm.plane[plane_id] = + wm->pipe[pipe].plane[plane_id]; + } + + if (wm->cxsr && wm->hpll_en) + max_level = G4X_WM_LEVEL_HPLL; + else if (wm->cxsr) + max_level = G4X_WM_LEVEL_SR; + else + max_level = G4X_WM_LEVEL_NORMAL; + + level = G4X_WM_LEVEL_NORMAL; + raw = &crtc_state->wm.g4x.raw[level]; + for_each_plane_id_on_crtc(crtc, plane_id) + raw->plane[plane_id] = active->wm.plane[plane_id]; + + if (++level > max_level) + goto out; + + raw = &crtc_state->wm.g4x.raw[level]; + raw->plane[PLANE_PRIMARY] = active->sr.plane; + raw->plane[PLANE_CURSOR] = active->sr.cursor; + raw->plane[PLANE_SPRITE0] = 0; + raw->fbc = active->sr.fbc; + + if (++level > max_level) + goto out; + + raw = &crtc_state->wm.g4x.raw[level]; + raw->plane[PLANE_PRIMARY] = active->hpll.plane; + raw->plane[PLANE_CURSOR] = active->hpll.cursor; + raw->plane[PLANE_SPRITE0] = 0; + raw->fbc = active->hpll.fbc; + + out: + for_each_plane_id_on_crtc(crtc, plane_id) + g4x_raw_plane_wm_set(crtc_state, level, + plane_id, USHRT_MAX); + g4x_raw_fbc_wm_set(crtc_state, level, USHRT_MAX); + + crtc_state->wm.g4x.optimal = *active; + crtc_state->wm.g4x.intermediate = *active; + + DRM_DEBUG_KMS("Initial watermarks: pipe %c, plane=%d, cursor=%d, sprite=%d\n", + pipe_name(pipe), + wm->pipe[pipe].plane[PLANE_PRIMARY], + wm->pipe[pipe].plane[PLANE_CURSOR], + wm->pipe[pipe].plane[PLANE_SPRITE0]); + } + + DRM_DEBUG_KMS("Initial SR watermarks: plane=%d, cursor=%d fbc=%d\n", + wm->sr.plane, wm->sr.cursor, wm->sr.fbc); + DRM_DEBUG_KMS("Initial HPLL watermarks: plane=%d, SR cursor=%d fbc=%d\n", + wm->hpll.plane, wm->hpll.cursor, wm->hpll.fbc); + DRM_DEBUG_KMS("Initial SR=%s HPLL=%s FBC=%s\n", + yesno(wm->cxsr), yesno(wm->hpll_en), yesno(wm->fbc_en)); +} + +void g4x_wm_sanitize(struct drm_i915_private *dev_priv) +{ + struct intel_plane *plane; + struct intel_crtc *crtc; + + mutex_lock(&dev_priv->wm.wm_mutex); + + for_each_intel_plane(&dev_priv->drm, plane) { + struct intel_crtc *crtc = + intel_get_crtc_for_pipe(dev_priv, plane->pipe); + struct intel_crtc_state *crtc_state = + to_intel_crtc_state(crtc->base.state); + struct intel_plane_state *plane_state = + to_intel_plane_state(plane->base.state); + struct g4x_wm_state *wm_state = &crtc_state->wm.g4x.optimal; + enum plane_id plane_id = plane->id; + int level; + + if (plane_state->base.visible) + continue; + + for (level = 0; level < 3; level++) { + struct g4x_pipe_wm *raw = + &crtc_state->wm.g4x.raw[level]; + + raw->plane[plane_id] = 0; + wm_state->wm.plane[plane_id] = 0; + } + + if (plane_id == PLANE_PRIMARY) { + for (level = 0; level < 3; level++) { + struct g4x_pipe_wm *raw = + &crtc_state->wm.g4x.raw[level]; + raw->fbc = 0; + } + + wm_state->sr.fbc = 0; + wm_state->hpll.fbc = 0; + wm_state->fbc_en = false; + } + } + + for_each_intel_crtc(&dev_priv->drm, crtc) { + struct intel_crtc_state *crtc_state = + to_intel_crtc_state(crtc->base.state); + + crtc_state->wm.g4x.intermediate = + crtc_state->wm.g4x.optimal; + crtc->wm.active.g4x = crtc_state->wm.g4x.optimal; + } + + g4x_program_watermarks(dev_priv); + + mutex_unlock(&dev_priv->wm.wm_mutex); +} + void vlv_wm_get_hw_state(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); @@ -8160,6 +8664,12 @@ void intel_init_pm(struct drm_i915_private *dev_priv) dev_priv->display.initial_watermarks = vlv_initial_watermarks; dev_priv->display.optimize_watermarks = vlv_optimize_watermarks; dev_priv->display.atomic_update_watermarks = vlv_atomic_update_fifo; + } else if (IS_G4X(dev_priv)) { + g4x_setup_wm_latency(dev_priv); + dev_priv->display.compute_pipe_wm = g4x_compute_pipe_wm; + dev_priv->display.compute_intermediate_wm = g4x_compute_intermediate_wm; + dev_priv->display.initial_watermarks = g4x_initial_watermarks; + dev_priv->display.optimize_watermarks = g4x_optimize_watermarks; } else if (IS_PINEVIEW(dev_priv)) { if (!intel_get_cxsr_latency(IS_PINEVIEW_G(dev_priv), dev_priv->is_ddr3, @@ -8175,8 +8685,6 @@ void intel_init_pm(struct drm_i915_private *dev_priv) dev_priv->display.update_wm = NULL; } else dev_priv->display.update_wm = pineview_update_wm; - } else if (IS_G4X(dev_priv)) { - dev_priv->display.update_wm = g4x_update_wm; } else if (IS_GEN4(dev_priv)) { dev_priv->display.update_wm = i965_update_wm; } else if (IS_GEN3(dev_priv)) { -- cgit v1.1 From 79d94306ea7a07189222cfa7a454bc04480e5c50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 21 Apr 2017 21:14:30 +0300 Subject: drm/i915: Enable HPLL watermarks on g4x MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I don't see why we couldn't use the HPLL watermarks on g4x. So let's enable them. Let's assume a 35 usec memory latency for the HPLL mode. That's roughly what PNV uses. Based on the behaviour of the ELK box I have 35 usec is probably overkill. Actually all the current latency values used seem overkill as I can reduce them pretty drastically before I start to see underruns. But let's play things a bit safe for now. Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170421181432.15216-14-ville.syrjala@linux.intel.com Reviewed-by: Maarten Lankhorst --- drivers/gpu/drm/i915/intel_pm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 9b0a6a4..957ef10 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -1020,8 +1020,9 @@ static void g4x_setup_wm_latency(struct drm_i915_private *dev_priv) /* all latencies in usec */ dev_priv->wm.pri_latency[G4X_WM_LEVEL_NORMAL] = 5; dev_priv->wm.pri_latency[G4X_WM_LEVEL_SR] = 12; + dev_priv->wm.pri_latency[G4X_WM_LEVEL_HPLL] = 35; - dev_priv->wm.max_level = G4X_WM_LEVEL_SR; + dev_priv->wm.max_level = G4X_WM_LEVEL_HPLL; } static int g4x_plane_fifo_size(enum plane_id plane_id, int level) -- cgit v1.1 From e93329a5c0830812f3c7bddcbf728aa83e78f42d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 21 Apr 2017 21:14:31 +0300 Subject: drm/i915: Add g4x watermark tracepoint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a tracepoint for watermark programming on g4x, similar to what we have on vlv/chv. Should help in debugging watermark programming sequence issues. Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170421181432.15216-15-ville.syrjala@linux.intel.com Reviewed-by: Maarten Lankhorst --- drivers/gpu/drm/i915/i915_trace.h | 49 +++++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_pm.c | 5 ++++ 2 files changed, 54 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 66404c5..b24a83d 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -89,6 +89,55 @@ TRACE_EVENT(intel_memory_cxsr, __entry->frame[PIPE_C], __entry->scanline[PIPE_C]) ); +TRACE_EVENT(g4x_wm, + TP_PROTO(struct intel_crtc *crtc, const struct g4x_wm_values *wm), + TP_ARGS(crtc, wm), + + TP_STRUCT__entry( + __field(enum pipe, pipe) + __field(u32, frame) + __field(u32, scanline) + __field(u16, primary) + __field(u16, sprite) + __field(u16, cursor) + __field(u16, sr_plane) + __field(u16, sr_cursor) + __field(u16, sr_fbc) + __field(u16, hpll_plane) + __field(u16, hpll_cursor) + __field(u16, hpll_fbc) + __field(bool, cxsr) + __field(bool, hpll) + __field(bool, fbc) + ), + + TP_fast_assign( + __entry->pipe = crtc->pipe; + __entry->frame = crtc->base.dev->driver->get_vblank_counter(crtc->base.dev, + crtc->pipe); + __entry->scanline = intel_get_crtc_scanline(crtc); + __entry->primary = wm->pipe[crtc->pipe].plane[PLANE_PRIMARY]; + __entry->sprite = wm->pipe[crtc->pipe].plane[PLANE_SPRITE0]; + __entry->cursor = wm->pipe[crtc->pipe].plane[PLANE_CURSOR]; + __entry->sr_plane = wm->sr.plane; + __entry->sr_cursor = wm->sr.cursor; + __entry->sr_fbc = wm->sr.fbc; + __entry->hpll_plane = wm->hpll.plane; + __entry->hpll_cursor = wm->hpll.cursor; + __entry->hpll_fbc = wm->hpll.fbc; + __entry->cxsr = wm->cxsr; + __entry->hpll = wm->hpll_en; + __entry->fbc = wm->fbc_en; + ), + + TP_printk("pipe %c, frame=%u, scanline=%u, wm %d/%d/%d, sr %s/%d/%d/%d, hpll %s/%d/%d/%d, fbc %s", + pipe_name(__entry->pipe), __entry->frame, __entry->scanline, + __entry->primary, __entry->sprite, __entry->cursor, + yesno(__entry->cxsr), __entry->sr_plane, __entry->sr_cursor, __entry->sr_fbc, + yesno(__entry->hpll), __entry->hpll_plane, __entry->hpll_cursor, __entry->hpll_fbc, + yesno(__entry->fbc)) +); + TRACE_EVENT(vlv_wm, TP_PROTO(struct intel_crtc *crtc, const struct vlv_wm_values *wm), TP_ARGS(crtc, wm), diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 957ef10..ef0e9f8 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -913,6 +913,11 @@ static int g4x_tlb_miss_wa(int fifo_size, int width, int cpp) static void g4x_write_wm_values(struct drm_i915_private *dev_priv, const struct g4x_wm_values *wm) { + enum pipe pipe; + + for_each_pipe(dev_priv, pipe) + trace_g4x_wm(intel_get_crtc_for_pipe(dev_priv, pipe), wm); + I915_WRITE(DSPFW1, FW_WM(wm->sr.plane, SR) | FW_WM(wm->pipe[PIPE_B].plane[PLANE_CURSOR], CURSORB) | -- cgit v1.1 From ab33081a1880ae79eb3d1a0abd596ef1ab05d923 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 21 Apr 2017 21:14:32 +0300 Subject: drm/i915: Add support for sprites on g4x MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that the watermarks are in order, it should be safe to enable sprite planes on g4x. We alreday have the code in fact, we just call it ilk_. Let's rename to g4x_ and let it loose. Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170421181432.15216-16-ville.syrjala@linux.intel.com Reviewed-by: Maarten Lankhorst --- drivers/gpu/drm/i915/intel_device_info.c | 2 +- drivers/gpu/drm/i915/intel_display.c | 4 ++-- drivers/gpu/drm/i915/intel_sprite.c | 18 +++++++++--------- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index 7d01dfe..3718341 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -337,7 +337,7 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { for_each_pipe(dev_priv, pipe) info->num_sprites[pipe] = 2; - } else if (INTEL_GEN(dev_priv) >= 5) { + } else if (INTEL_GEN(dev_priv) >= 5 || IS_G4X(dev_priv)) { for_each_pipe(dev_priv, pipe) info->num_sprites[pipe] = 1; } diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 99f8dc8..a04bf68 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -1277,7 +1277,7 @@ static void assert_sprites_disabled(struct drm_i915_private *dev_priv, I915_STATE_WARN(val & SPRITE_ENABLE, "sprite %c assertion failure, should be off on pipe %c but is still active\n", plane_name(pipe), pipe_name(pipe)); - } else if (INTEL_GEN(dev_priv) >= 5) { + } else if (INTEL_GEN(dev_priv) >= 5 || IS_G4X(dev_priv)) { u32 val = I915_READ(DVSCNTR(pipe)); I915_STATE_WARN(val & DVS_ENABLE, "sprite %c assertion failure, should be off on pipe %c but is still active\n", @@ -14429,7 +14429,7 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb, case DRM_FORMAT_UYVY: case DRM_FORMAT_YVYU: case DRM_FORMAT_VYUY: - if (INTEL_GEN(dev_priv) < 5) { + if (INTEL_GEN(dev_priv) < 5 && !IS_G4X(dev_priv)) { DRM_DEBUG_KMS("unsupported pixel format: %s\n", drm_get_format_name(mode_cmd->pixel_format, &format_name)); goto err; diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index f7d4314..511f0e9 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -629,7 +629,7 @@ ivb_disable_plane(struct drm_plane *plane, struct drm_crtc *crtc) spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); } -static u32 ilk_sprite_ctl(const struct intel_crtc_state *crtc_state, +static u32 g4x_sprite_ctl(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { struct drm_i915_private *dev_priv = @@ -683,7 +683,7 @@ static u32 ilk_sprite_ctl(const struct intel_crtc_state *crtc_state, } static void -ilk_update_plane(struct drm_plane *plane, +g4x_update_plane(struct drm_plane *plane, const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { @@ -744,7 +744,7 @@ ilk_update_plane(struct drm_plane *plane, } static void -ilk_disable_plane(struct drm_plane *plane, struct drm_crtc *crtc) +g4x_disable_plane(struct drm_plane *plane, struct drm_crtc *crtc) { struct drm_device *dev = plane->dev; struct drm_i915_private *dev_priv = to_i915(dev); @@ -964,7 +964,7 @@ intel_check_sprite_plane(struct drm_plane *plane, if (ret) return ret; - state->ctl = ilk_sprite_ctl(crtc_state, state); + state->ctl = g4x_sprite_ctl(crtc_state, state); } return 0; @@ -1024,7 +1024,7 @@ out: return ret; } -static const uint32_t ilk_plane_formats[] = { +static const uint32_t g4x_plane_formats[] = { DRM_FORMAT_XRGB8888, DRM_FORMAT_YUYV, DRM_FORMAT_YVYU, @@ -1128,15 +1128,15 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv, intel_plane->can_scale = true; intel_plane->max_downscale = 16; - intel_plane->update_plane = ilk_update_plane; - intel_plane->disable_plane = ilk_disable_plane; + intel_plane->update_plane = g4x_update_plane; + intel_plane->disable_plane = g4x_disable_plane; if (IS_GEN6(dev_priv)) { plane_formats = snb_plane_formats; num_plane_formats = ARRAY_SIZE(snb_plane_formats); } else { - plane_formats = ilk_plane_formats; - num_plane_formats = ARRAY_SIZE(ilk_plane_formats); + plane_formats = g4x_plane_formats; + num_plane_formats = ARRAY_SIZE(g4x_plane_formats); } } -- cgit v1.1 From d509e28b70e45ea0699128764d05893bcf347007 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 27 Mar 2017 21:55:32 +0300 Subject: drm/i915: Parametrize cursor/primary pipe select bits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Ville Syrjälä Reviewed-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170327185546.2977-2-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/i915_reg.h | 7 ++----- drivers/gpu/drm/i915/intel_display.c | 9 +++------ 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 524fdfd..2b151dc 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -5439,9 +5439,7 @@ enum { #define CURSOR_MODE_128_ARGB_AX ((1 << 5) | CURSOR_MODE_128_32B_AX) #define CURSOR_MODE_256_ARGB_AX ((1 << 5) | CURSOR_MODE_256_32B_AX) #define CURSOR_MODE_64_ARGB_AX ((1 << 5) | CURSOR_MODE_64_32B_AX) -#define MCURSOR_PIPE_SELECT (1 << 28) -#define MCURSOR_PIPE_A 0x00 -#define MCURSOR_PIPE_B (1 << 28) +#define MCURSOR_PIPE_SELECT(pipe) ((pipe) << 28) #define MCURSOR_GAMMA_ENABLE (1 << 26) #define CURSOR_ROTATE_180 (1<<15) #define CURSOR_TRICKLE_FEED_DISABLE (1 << 14) @@ -5499,8 +5497,7 @@ enum { #define DISPPLANE_PIPE_CSC_ENABLE (1<<24) #define DISPPLANE_SEL_PIPE_SHIFT 24 #define DISPPLANE_SEL_PIPE_MASK (3<pipe == PIPE_B) - dspcntr |= DISPPLANE_SEL_PIPE_B; - } + if (INTEL_GEN(dev_priv) < 4) + dspcntr |= DISPPLANE_SEL_PIPE(crtc->pipe); switch (fb->format->format) { case DRM_FORMAT_C8: @@ -9217,7 +9215,6 @@ static u32 i9xx_cursor_ctl(const struct intel_crtc_state *crtc_state, struct drm_i915_private *dev_priv = to_i915(plane_state->base.plane->dev); struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); - enum pipe pipe = crtc->pipe; u32 cntl; cntl = MCURSOR_GAMMA_ENABLE; @@ -9225,7 +9222,7 @@ static u32 i9xx_cursor_ctl(const struct intel_crtc_state *crtc_state, if (HAS_DDI(dev_priv)) cntl |= CURSOR_PIPE_CSC_ENABLE; - cntl |= pipe << 28; /* Connect to correct pipe */ + cntl |= MCURSOR_PIPE_SELECT(crtc->pipe); switch (plane_state->base.crtc_w) { case 64: -- cgit v1.1 From 282dbf9b017bc6d5fdaeadf14e534c2fe22fee2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 27 Mar 2017 21:55:33 +0300 Subject: drm/i915: Pass intel_plane and intel_crtc to plane hooks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Streamline things by passing intel_plane and intel_crtc instead of the drm types to our plane hooks. v2: s/ilk/g4x/ in sprite code Signed-off-by: Ville Syrjälä Reviewed-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170327185546.2977-3-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_atomic_plane.c | 6 +- drivers/gpu/drm/i915/intel_display.c | 109 ++++++++++++++--------------- drivers/gpu/drm/i915/intel_drv.h | 8 +-- drivers/gpu/drm/i915/intel_sprite.c | 110 +++++++++++++----------------- 4 files changed, 104 insertions(+), 129 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.c b/drivers/gpu/drm/i915/intel_atomic_plane.c index cfb4729..182dc2a 100644 --- a/drivers/gpu/drm/i915/intel_atomic_plane.c +++ b/drivers/gpu/drm/i915/intel_atomic_plane.c @@ -185,7 +185,7 @@ int intel_plane_atomic_check_with_state(struct intel_crtc_state *crtc_state, } intel_state->base.visible = false; - ret = intel_plane->check_plane(plane, crtc_state, intel_state); + ret = intel_plane->check_plane(intel_plane, crtc_state, intel_state); if (ret) return ret; @@ -235,14 +235,14 @@ static void intel_plane_atomic_update(struct drm_plane *plane, trace_intel_update_plane(plane, to_intel_crtc(crtc)); - intel_plane->update_plane(plane, + intel_plane->update_plane(intel_plane, to_intel_crtc_state(crtc->state), intel_state); } else { trace_intel_disable_plane(plane, to_intel_crtc(crtc)); - intel_plane->disable_plane(plane, crtc); + intel_plane->disable_plane(intel_plane, to_intel_crtc(crtc)); } } diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 8e54ef0..4c51fc6 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2750,7 +2750,7 @@ intel_find_initial_plane_obj(struct intel_crtc *intel_crtc, false); intel_pre_disable_primary_noatomic(&intel_crtc->base); trace_intel_disable_plane(primary, intel_crtc); - intel_plane->disable_plane(primary, &intel_crtc->base); + intel_plane->disable_plane(intel_plane, intel_crtc); return; @@ -3061,14 +3061,14 @@ int i9xx_check_plane_surface(struct intel_plane_state *plane_state) return 0; } -static void i9xx_update_primary_plane(struct drm_plane *primary, +static void i9xx_update_primary_plane(struct intel_plane *primary, const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { - struct drm_i915_private *dev_priv = to_i915(primary->dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc); - struct drm_framebuffer *fb = plane_state->base.fb; - int plane = intel_crtc->plane; + struct drm_i915_private *dev_priv = to_i915(primary->base.dev); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + const struct drm_framebuffer *fb = plane_state->base.fb; + enum plane plane = primary->plane; u32 linear_offset; u32 dspcntr = plane_state->ctl; i915_reg_t reg = DSPCNTR(plane); @@ -3079,12 +3079,12 @@ static void i9xx_update_primary_plane(struct drm_plane *primary, linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0); if (INTEL_GEN(dev_priv) >= 4) - intel_crtc->dspaddr_offset = plane_state->main.offset; + crtc->dspaddr_offset = plane_state->main.offset; else - intel_crtc->dspaddr_offset = linear_offset; + crtc->dspaddr_offset = linear_offset; - intel_crtc->adjusted_x = x; - intel_crtc->adjusted_y = y; + crtc->adjusted_x = x; + crtc->adjusted_y = y; spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); @@ -3110,31 +3110,29 @@ static void i9xx_update_primary_plane(struct drm_plane *primary, if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) { I915_WRITE_FW(DSPSURF(plane), intel_plane_ggtt_offset(plane_state) + - intel_crtc->dspaddr_offset); + crtc->dspaddr_offset); I915_WRITE_FW(DSPOFFSET(plane), (y << 16) | x); } else if (INTEL_GEN(dev_priv) >= 4) { I915_WRITE_FW(DSPSURF(plane), intel_plane_ggtt_offset(plane_state) + - intel_crtc->dspaddr_offset); + crtc->dspaddr_offset); I915_WRITE_FW(DSPTILEOFF(plane), (y << 16) | x); I915_WRITE_FW(DSPLINOFF(plane), linear_offset); } else { I915_WRITE_FW(DSPADDR(plane), intel_plane_ggtt_offset(plane_state) + - intel_crtc->dspaddr_offset); + crtc->dspaddr_offset); } POSTING_READ_FW(reg); spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); } -static void i9xx_disable_primary_plane(struct drm_plane *primary, - struct drm_crtc *crtc) +static void i9xx_disable_primary_plane(struct intel_plane *primary, + struct intel_crtc *crtc) { - struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - int plane = intel_crtc->plane; + struct drm_i915_private *dev_priv = to_i915(primary->base.dev); + enum plane plane = primary->plane; unsigned long irqflags; spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); @@ -3319,16 +3317,15 @@ u32 skl_plane_ctl(const struct intel_crtc_state *crtc_state, return plane_ctl; } -static void skylake_update_primary_plane(struct drm_plane *plane, +static void skylake_update_primary_plane(struct intel_plane *plane, const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { - struct drm_device *dev = plane->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc); - struct drm_framebuffer *fb = plane_state->base.fb; - enum plane_id plane_id = to_intel_plane(plane)->id; - enum pipe pipe = to_intel_plane(plane)->pipe; + struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + const struct drm_framebuffer *fb = plane_state->base.fb; + enum plane_id plane_id = plane->id; + enum pipe pipe = plane->pipe; u32 plane_ctl = plane_state->ctl; unsigned int rotation = plane_state->base.rotation; u32 stride = skl_plane_stride(fb, 0, rotation); @@ -3350,10 +3347,10 @@ static void skylake_update_primary_plane(struct drm_plane *plane, dst_w--; dst_h--; - intel_crtc->dspaddr_offset = surf_addr; + crtc->dspaddr_offset = surf_addr; - intel_crtc->adjusted_x = src_x; - intel_crtc->adjusted_y = src_y; + crtc->adjusted_x = src_x; + crtc->adjusted_y = src_y; spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); @@ -3392,13 +3389,12 @@ static void skylake_update_primary_plane(struct drm_plane *plane, spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); } -static void skylake_disable_primary_plane(struct drm_plane *primary, - struct drm_crtc *crtc) +static void skylake_disable_primary_plane(struct intel_plane *primary, + struct intel_crtc *crtc) { - struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - enum plane_id plane_id = to_intel_plane(primary)->id; - enum pipe pipe = to_intel_plane(primary)->pipe; + struct drm_i915_private *dev_priv = to_i915(primary->base.dev); + enum plane_id plane_id = primary->id; + enum pipe pipe = primary->pipe; unsigned long irqflags; spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); @@ -3431,7 +3427,7 @@ static void intel_update_primary_planes(struct drm_device *dev) trace_intel_update_plane(&plane->base, to_intel_crtc(crtc)); - plane->update_plane(&plane->base, + plane->update_plane(plane, to_intel_crtc_state(crtc->state), plane_state); } @@ -5081,7 +5077,7 @@ static void intel_crtc_disable_planes(struct drm_crtc *crtc, unsigned plane_mask intel_crtc_dpms_overlay_disable(intel_crtc); drm_for_each_plane_mask(p, dev, plane_mask) - to_intel_plane(p)->disable_plane(p, crtc); + to_intel_plane(p)->disable_plane(to_intel_plane(p), intel_crtc); /* * FIXME: Once we grow proper nuclear flip support out of this we need @@ -13279,11 +13275,11 @@ skl_max_scale(struct intel_crtc *intel_crtc, struct intel_crtc_state *crtc_state } static int -intel_check_primary_plane(struct drm_plane *plane, +intel_check_primary_plane(struct intel_plane *plane, struct intel_crtc_state *crtc_state, struct intel_plane_state *state) { - struct drm_i915_private *dev_priv = to_i915(plane->dev); + struct drm_i915_private *dev_priv = to_i915(plane->base.dev); struct drm_crtc *crtc = state->base.crtc; int min_scale = DRM_PLANE_HELPER_NO_SCALING; int max_scale = DRM_PLANE_HELPER_NO_SCALING; @@ -13498,12 +13494,12 @@ intel_legacy_cursor_update(struct drm_plane *plane, if (plane->state->visible) { trace_intel_update_plane(plane, to_intel_crtc(crtc)); - intel_plane->update_plane(plane, + intel_plane->update_plane(intel_plane, to_intel_crtc_state(crtc->state), to_intel_plane_state(plane->state)); } else { trace_intel_disable_plane(plane, to_intel_crtc(crtc)); - intel_plane->disable_plane(plane, crtc); + intel_plane->disable_plane(intel_plane, to_intel_crtc(crtc)); } intel_cleanup_plane_fb(plane, new_plane_state); @@ -13647,14 +13643,14 @@ fail: } static int -intel_check_cursor_plane(struct drm_plane *plane, +intel_check_cursor_plane(struct intel_plane *plane, struct intel_crtc_state *crtc_state, struct intel_plane_state *state) { - struct drm_i915_private *dev_priv = to_i915(plane->dev); - struct drm_framebuffer *fb = state->base.fb; + struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + const struct drm_framebuffer *fb = state->base.fb; struct drm_i915_gem_object *obj = intel_fb_obj(fb); - enum pipe pipe = to_intel_plane(plane)->pipe; + enum pipe pipe = plane->pipe; unsigned stride; int ret; @@ -13714,23 +13710,20 @@ intel_check_cursor_plane(struct drm_plane *plane, } static void -intel_disable_cursor_plane(struct drm_plane *plane, - struct drm_crtc *crtc) +intel_disable_cursor_plane(struct intel_plane *plane, + struct intel_crtc *crtc) { - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - - intel_crtc->cursor_addr = 0; - intel_crtc_update_cursor(crtc, NULL); + crtc->cursor_addr = 0; + intel_crtc_update_cursor(&crtc->base, NULL); } static void -intel_update_cursor_plane(struct drm_plane *plane, +intel_update_cursor_plane(struct intel_plane *plane, const struct intel_crtc_state *crtc_state, const struct intel_plane_state *state) { - struct drm_crtc *crtc = crtc_state->base.crtc; - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - struct drm_i915_private *dev_priv = to_i915(plane->dev); + struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_i915_gem_object *obj = intel_fb_obj(state->base.fb); uint32_t addr; @@ -13741,8 +13734,8 @@ intel_update_cursor_plane(struct drm_plane *plane, else addr = obj->phys_handle->busaddr; - intel_crtc->cursor_addr = addr; - intel_crtc_update_cursor(crtc, state); + crtc->cursor_addr = addr; + intel_crtc_update_cursor(&crtc->base, state); } static struct intel_plane * @@ -15160,7 +15153,7 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc) continue; trace_intel_disable_plane(&plane->base, crtc); - plane->disable_plane(&plane->base, &crtc->base); + plane->disable_plane(plane, crtc); } } diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index f530df7..f922e04 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -851,12 +851,12 @@ struct intel_plane { * the intel_plane_state structure and accessed via plane_state. */ - void (*update_plane)(struct drm_plane *plane, + void (*update_plane)(struct intel_plane *plane, const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state); - void (*disable_plane)(struct drm_plane *plane, - struct drm_crtc *crtc); - int (*check_plane)(struct drm_plane *plane, + void (*disable_plane)(struct intel_plane *plane, + struct intel_crtc *crtc); + int (*check_plane)(struct intel_plane *plane, struct intel_crtc_state *crtc_state, struct intel_plane_state *state); }; diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index 511f0e9..9dfd5b3 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -207,16 +207,14 @@ void intel_pipe_update_end(struct intel_crtc *crtc, struct intel_flip_work *work } static void -skl_update_plane(struct drm_plane *drm_plane, +skl_update_plane(struct intel_plane *plane, const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { - struct drm_device *dev = drm_plane->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_plane *intel_plane = to_intel_plane(drm_plane); - struct drm_framebuffer *fb = plane_state->base.fb; - enum plane_id plane_id = intel_plane->id; - enum pipe pipe = intel_plane->pipe; + struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + const struct drm_framebuffer *fb = plane_state->base.fb; + enum plane_id plane_id = plane->id; + enum pipe pipe = plane->pipe; u32 plane_ctl = plane_state->ctl; const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; u32 surf_addr = plane_state->main.offset; @@ -285,13 +283,11 @@ skl_update_plane(struct drm_plane *drm_plane, } static void -skl_disable_plane(struct drm_plane *dplane, struct drm_crtc *crtc) +skl_disable_plane(struct intel_plane *plane, struct intel_crtc *crtc) { - struct drm_device *dev = dplane->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_plane *intel_plane = to_intel_plane(dplane); - enum plane_id plane_id = intel_plane->id; - enum pipe pipe = intel_plane->pipe; + struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + enum plane_id plane_id = plane->id; + enum pipe pipe = plane->pipe; unsigned long irqflags; spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); @@ -305,10 +301,10 @@ skl_disable_plane(struct drm_plane *dplane, struct drm_crtc *crtc) } static void -chv_update_csc(struct intel_plane *intel_plane, uint32_t format) +chv_update_csc(struct intel_plane *plane, uint32_t format) { - struct drm_i915_private *dev_priv = to_i915(intel_plane->base.dev); - enum plane_id plane_id = intel_plane->id; + struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + enum plane_id plane_id = plane->id; /* Seems RGB data bypasses the CSC always */ if (!format_is_yuv(format)) @@ -408,16 +404,14 @@ static u32 vlv_sprite_ctl(const struct intel_crtc_state *crtc_state, } static void -vlv_update_plane(struct drm_plane *dplane, +vlv_update_plane(struct intel_plane *plane, const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { - struct drm_device *dev = dplane->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_plane *intel_plane = to_intel_plane(dplane); - struct drm_framebuffer *fb = plane_state->base.fb; - enum pipe pipe = intel_plane->pipe; - enum plane_id plane_id = intel_plane->id; + struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + const struct drm_framebuffer *fb = plane_state->base.fb; + enum pipe pipe = plane->pipe; + enum plane_id plane_id = plane->id; u32 sprctl = plane_state->ctl; u32 sprsurf_offset = plane_state->main.offset; u32 linear_offset; @@ -439,7 +433,7 @@ vlv_update_plane(struct drm_plane *dplane, spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); if (IS_CHERRYVIEW(dev_priv) && pipe == PIPE_B) - chv_update_csc(intel_plane, fb->format->format); + chv_update_csc(plane, fb->format->format); if (key->flags) { I915_WRITE_FW(SPKEYMINVAL(pipe, plane_id), key->min_value); @@ -466,13 +460,11 @@ vlv_update_plane(struct drm_plane *dplane, } static void -vlv_disable_plane(struct drm_plane *dplane, struct drm_crtc *crtc) +vlv_disable_plane(struct intel_plane *plane, struct intel_crtc *crtc) { - struct drm_device *dev = dplane->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_plane *intel_plane = to_intel_plane(dplane); - enum pipe pipe = intel_plane->pipe; - enum plane_id plane_id = intel_plane->id; + struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + enum pipe pipe = plane->pipe; + enum plane_id plane_id = plane->id; unsigned long irqflags; spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); @@ -542,15 +534,13 @@ static u32 ivb_sprite_ctl(const struct intel_crtc_state *crtc_state, } static void -ivb_update_plane(struct drm_plane *plane, +ivb_update_plane(struct intel_plane *plane, const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { - struct drm_device *dev = plane->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_plane *intel_plane = to_intel_plane(plane); - struct drm_framebuffer *fb = plane_state->base.fb; - enum pipe pipe = intel_plane->pipe; + struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + const struct drm_framebuffer *fb = plane_state->base.fb; + enum pipe pipe = plane->pipe; u32 sprctl = plane_state->ctl, sprscale = 0; u32 sprsurf_offset = plane_state->main.offset; u32 linear_offset; @@ -597,7 +587,7 @@ ivb_update_plane(struct drm_plane *plane, I915_WRITE_FW(SPRLINOFF(pipe), linear_offset); I915_WRITE_FW(SPRSIZE(pipe), (crtc_h << 16) | crtc_w); - if (intel_plane->can_scale) + if (plane->can_scale) I915_WRITE_FW(SPRSCALE(pipe), sprscale); I915_WRITE_FW(SPRCTL(pipe), sprctl); I915_WRITE_FW(SPRSURF(pipe), @@ -608,19 +598,17 @@ ivb_update_plane(struct drm_plane *plane, } static void -ivb_disable_plane(struct drm_plane *plane, struct drm_crtc *crtc) +ivb_disable_plane(struct intel_plane *plane, struct intel_crtc *crtc) { - struct drm_device *dev = plane->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_plane *intel_plane = to_intel_plane(plane); - int pipe = intel_plane->pipe; + struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + enum pipe pipe = plane->pipe; unsigned long irqflags; spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); I915_WRITE_FW(SPRCTL(pipe), 0); /* Can't leave the scaler enabled... */ - if (intel_plane->can_scale) + if (plane->can_scale) I915_WRITE_FW(SPRSCALE(pipe), 0); I915_WRITE_FW(SPRSURF(pipe), 0); @@ -683,15 +671,13 @@ static u32 g4x_sprite_ctl(const struct intel_crtc_state *crtc_state, } static void -g4x_update_plane(struct drm_plane *plane, +g4x_update_plane(struct intel_plane *plane, const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { - struct drm_device *dev = plane->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_plane *intel_plane = to_intel_plane(plane); - struct drm_framebuffer *fb = plane_state->base.fb; - int pipe = intel_plane->pipe; + struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + const struct drm_framebuffer *fb = plane_state->base.fb; + enum pipe pipe = plane->pipe; u32 dvscntr = plane_state->ctl, dvsscale = 0; u32 dvssurf_offset = plane_state->main.offset; u32 linear_offset; @@ -744,12 +730,10 @@ g4x_update_plane(struct drm_plane *plane, } static void -g4x_disable_plane(struct drm_plane *plane, struct drm_crtc *crtc) +g4x_disable_plane(struct intel_plane *plane, struct intel_crtc *crtc) { - struct drm_device *dev = plane->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_plane *intel_plane = to_intel_plane(plane); - int pipe = intel_plane->pipe; + struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + enum pipe pipe = plane->pipe; unsigned long irqflags; spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); @@ -765,14 +749,12 @@ g4x_disable_plane(struct drm_plane *plane, struct drm_crtc *crtc) } static int -intel_check_sprite_plane(struct drm_plane *plane, +intel_check_sprite_plane(struct intel_plane *plane, struct intel_crtc_state *crtc_state, struct intel_plane_state *state) { - struct drm_i915_private *dev_priv = to_i915(plane->dev); - struct drm_crtc *crtc = state->base.crtc; - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - struct intel_plane *intel_plane = to_intel_plane(plane); + struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_framebuffer *fb = state->base.fb; int crtc_x, crtc_y; unsigned int crtc_w, crtc_h; @@ -794,7 +776,7 @@ intel_check_sprite_plane(struct drm_plane *plane, } /* Don't modify another pipe's plane */ - if (intel_plane->pipe != intel_crtc->pipe) { + if (plane->pipe != crtc->pipe) { DRM_DEBUG_KMS("Wrong plane <-> crtc mapping\n"); return -EINVAL; } @@ -811,16 +793,16 @@ intel_check_sprite_plane(struct drm_plane *plane, if (state->ckey.flags == I915_SET_COLORKEY_NONE) { can_scale = 1; min_scale = 1; - max_scale = skl_max_scale(intel_crtc, crtc_state); + max_scale = skl_max_scale(crtc, crtc_state); } else { can_scale = 0; min_scale = DRM_PLANE_HELPER_NO_SCALING; max_scale = DRM_PLANE_HELPER_NO_SCALING; } } else { - can_scale = intel_plane->can_scale; - max_scale = intel_plane->max_downscale << 16; - min_scale = intel_plane->can_scale ? 1 : (1 << 16); + can_scale = plane->can_scale; + max_scale = plane->max_downscale << 16; + min_scale = plane->can_scale ? 1 : (1 << 16); } /* -- cgit v1.1 From 1cecc830e6b662a765d60860112cf69182b56b8e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 27 Mar 2017 21:55:34 +0300 Subject: drm/i915: Refactor CURBASE calculation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The remaining cursor base address calculations are spread around into several different locations. Just pull it all into one place. v2: Don't pass intel_plane as we don't really need it Signed-off-by: Ville Syrjälä Reviewed-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170327185546.2977-4-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_display.c | 49 +++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 4c51fc6..6e0a579 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -9135,6 +9135,31 @@ out: return active; } +static u32 intel_cursor_base(struct intel_crtc *crtc, + const struct intel_plane_state *plane_state) +{ + struct drm_i915_private *dev_priv = + to_i915(plane_state->base.plane->dev); + const struct drm_framebuffer *fb = plane_state->base.fb; + const struct drm_i915_gem_object *obj = intel_fb_obj(fb); + u32 base; + + if (INTEL_INFO(dev_priv)->cursor_needs_physical) + base = obj->phys_handle->busaddr; + else + base = intel_plane_ggtt_offset(plane_state); + + crtc->cursor_addr = base; + + /* ILK+ do this automagically */ + if (HAS_GMCH_DISPLAY(dev_priv) && + plane_state->base.rotation & DRM_ROTATE_180) + base += (plane_state->base.crtc_h * + plane_state->base.crtc_w - 1) * fb->format->cpp[0]; + + return base; +} + static u32 i845_cursor_ctl(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { @@ -9274,9 +9299,8 @@ static void intel_crtc_update_cursor(struct drm_crtc *crtc, struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc); int pipe = intel_crtc->pipe; - u32 base = intel_crtc->cursor_addr; + u32 pos = 0, base = 0; unsigned long irqflags; - u32 pos = 0; if (plane_state) { int x = plane_state->base.crtc_x; @@ -9294,12 +9318,9 @@ static void intel_crtc_update_cursor(struct drm_crtc *crtc, } pos |= y << CURSOR_Y_SHIFT; - /* ILK+ do this automagically */ - if (HAS_GMCH_DISPLAY(dev_priv) && - plane_state->base.rotation & DRM_ROTATE_180) { - base += (plane_state->base.crtc_h * - plane_state->base.crtc_w - 1) * 4; - } + base = intel_cursor_base(intel_crtc, plane_state); + } else { + intel_crtc->cursor_addr = 0; } spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); @@ -13713,7 +13734,6 @@ static void intel_disable_cursor_plane(struct intel_plane *plane, struct intel_crtc *crtc) { - crtc->cursor_addr = 0; intel_crtc_update_cursor(&crtc->base, NULL); } @@ -13722,19 +13742,8 @@ intel_update_cursor_plane(struct intel_plane *plane, const struct intel_crtc_state *crtc_state, const struct intel_plane_state *state) { - struct drm_i915_private *dev_priv = to_i915(plane->base.dev); struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); - struct drm_i915_gem_object *obj = intel_fb_obj(state->base.fb); - uint32_t addr; - - if (!obj) - addr = 0; - else if (!INTEL_INFO(dev_priv)->cursor_needs_physical) - addr = intel_plane_ggtt_offset(state); - else - addr = obj->phys_handle->busaddr; - crtc->cursor_addr = addr; intel_crtc_update_cursor(&crtc->base, state); } -- cgit v1.1 From cd5dcbf1b26c60dfa8fd8628fd2fcf3d33877c63 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 27 Mar 2017 21:55:35 +0300 Subject: drm/i915: Clean up cursor junk from intel_crtc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move cursor_base, cursor_cntl, and cursor_size from intel_crtc into intel_plane so that we don't need the crtc for cursor stuff so much. Also entirely nuke cursor_addr which IMO doesn't provide any benefit since it's not actually used by the cursor code itself. I'm not 100% sure what the SKL+ DDB is code is after by looking at cursor_addr so I just make it do its checks unconditionally. If that's not correct then we should likely replace it with somehting like plane_state->visible. Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170327185546.2977-5-ville.syrjala@linux.intel.com Reviewed-by: Imre Deak --- drivers/gpu/drm/i915/i915_debugfs.c | 48 +++++----------------- drivers/gpu/drm/i915/intel_display.c | 79 +++++++++++++++--------------------- drivers/gpu/drm/i915/intel_drv.h | 9 ++-- 3 files changed, 47 insertions(+), 89 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index b302c7b..7830337 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -3042,36 +3042,6 @@ static void intel_connector_info(struct seq_file *m, intel_seq_print_mode(m, 2, mode); } -static bool cursor_active(struct drm_i915_private *dev_priv, int pipe) -{ - u32 state; - - if (IS_I845G(dev_priv) || IS_I865G(dev_priv)) - state = I915_READ(CURCNTR(PIPE_A)) & CURSOR_ENABLE; - else - state = I915_READ(CURCNTR(pipe)) & CURSOR_MODE; - - return state; -} - -static bool cursor_position(struct drm_i915_private *dev_priv, - int pipe, int *x, int *y) -{ - u32 pos; - - pos = I915_READ(CURPOS(pipe)); - - *x = (pos >> CURSOR_X_SHIFT) & CURSOR_POS_MASK; - if (pos & (CURSOR_POS_SIGN << CURSOR_X_SHIFT)) - *x = -*x; - - *y = (pos >> CURSOR_Y_SHIFT) & CURSOR_POS_MASK; - if (pos & (CURSOR_POS_SIGN << CURSOR_Y_SHIFT)) - *y = -*y; - - return cursor_active(dev_priv, pipe); -} - static const char *plane_type(enum drm_plane_type type) { switch (type) { @@ -3193,9 +3163,7 @@ static int i915_display_info(struct seq_file *m, void *unused) seq_printf(m, "CRTC info\n"); seq_printf(m, "---------\n"); for_each_intel_crtc(dev, crtc) { - bool active; struct intel_crtc_state *pipe_config; - int x, y; drm_modeset_lock(&crtc->base.mutex, NULL); pipe_config = to_intel_crtc_state(crtc->base.state); @@ -3207,14 +3175,18 @@ static int i915_display_info(struct seq_file *m, void *unused) yesno(pipe_config->dither), pipe_config->pipe_bpp); if (pipe_config->base.active) { + struct intel_plane *cursor = + to_intel_plane(crtc->base.cursor); + intel_crtc_info(m, crtc); - active = cursor_position(dev_priv, crtc->pipe, &x, &y); - seq_printf(m, "\tcursor visible? %s, position (%d, %d), size %dx%d, addr 0x%08x, active? %s\n", - yesno(crtc->cursor_base), - x, y, crtc->base.cursor->state->crtc_w, - crtc->base.cursor->state->crtc_h, - crtc->cursor_addr, yesno(active)); + seq_printf(m, "\tcursor visible? %s, position (%d, %d), size %dx%d, addr 0x%08x\n", + yesno(cursor->base.state->visible), + cursor->base.state->crtc_x, + cursor->base.state->crtc_y, + cursor->base.state->crtc_w, + cursor->base.state->crtc_h, + cursor->cursor.base); intel_scaler_info(m, crtc); intel_plane_info(m, crtc); } diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 6e0a579..bbc2022 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -9135,8 +9135,7 @@ out: return active; } -static u32 intel_cursor_base(struct intel_crtc *crtc, - const struct intel_plane_state *plane_state) +static u32 intel_cursor_base(const struct intel_plane_state *plane_state) { struct drm_i915_private *dev_priv = to_i915(plane_state->base.plane->dev); @@ -9149,8 +9148,6 @@ static u32 intel_cursor_base(struct intel_crtc *crtc, else base = intel_plane_ggtt_offset(plane_state); - crtc->cursor_addr = base; - /* ILK+ do this automagically */ if (HAS_GMCH_DISPLAY(dev_priv) && plane_state->base.rotation & DRM_ROTATE_180) @@ -9185,12 +9182,10 @@ static u32 i845_cursor_ctl(const struct intel_crtc_state *crtc_state, CURSOR_STRIDE(stride); } -static void i845_update_cursor(struct drm_crtc *crtc, u32 base, +static void i845_update_cursor(struct intel_plane *plane, u32 base, const struct intel_plane_state *plane_state) { - struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + struct drm_i915_private *dev_priv = to_i915(plane->base.dev); uint32_t cntl = 0, size = 0; if (plane_state && plane_state->base.visible) { @@ -9201,32 +9196,32 @@ static void i845_update_cursor(struct drm_crtc *crtc, u32 base, size = (height << 12) | width; } - if (intel_crtc->cursor_cntl != 0 && - (intel_crtc->cursor_base != base || - intel_crtc->cursor_size != size || - intel_crtc->cursor_cntl != cntl)) { + if (plane->cursor.cntl != 0 && + (plane->cursor.base != base || + plane->cursor.size != size || + plane->cursor.cntl != cntl)) { /* On these chipsets we can only modify the base/size/stride * whilst the cursor is disabled. */ I915_WRITE_FW(CURCNTR(PIPE_A), 0); POSTING_READ_FW(CURCNTR(PIPE_A)); - intel_crtc->cursor_cntl = 0; + plane->cursor.cntl = 0; } - if (intel_crtc->cursor_base != base) { + if (plane->cursor.base != base) { I915_WRITE_FW(CURBASE(PIPE_A), base); - intel_crtc->cursor_base = base; + plane->cursor.base = base; } - if (intel_crtc->cursor_size != size) { + if (plane->cursor.size != size) { I915_WRITE_FW(CURSIZE, size); - intel_crtc->cursor_size = size; + plane->cursor.size = size; } - if (intel_crtc->cursor_cntl != cntl) { + if (plane->cursor.cntl != cntl) { I915_WRITE_FW(CURCNTR(PIPE_A), cntl); POSTING_READ_FW(CURCNTR(PIPE_A)); - intel_crtc->cursor_cntl = cntl; + plane->cursor.cntl = cntl; } } @@ -9266,39 +9261,35 @@ static u32 i9xx_cursor_ctl(const struct intel_crtc_state *crtc_state, return cntl; } -static void i9xx_update_cursor(struct drm_crtc *crtc, u32 base, +static void i9xx_update_cursor(struct intel_plane *plane, u32 base, const struct intel_plane_state *plane_state) { - struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - int pipe = intel_crtc->pipe; + struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + enum pipe pipe = plane->pipe; uint32_t cntl = 0; if (plane_state && plane_state->base.visible) cntl = plane_state->ctl; - if (intel_crtc->cursor_cntl != cntl) { + if (plane->cursor.cntl != cntl) { I915_WRITE_FW(CURCNTR(pipe), cntl); POSTING_READ_FW(CURCNTR(pipe)); - intel_crtc->cursor_cntl = cntl; + plane->cursor.cntl = cntl; } /* and commit changes on next vblank */ I915_WRITE_FW(CURBASE(pipe), base); POSTING_READ_FW(CURBASE(pipe)); - intel_crtc->cursor_base = base; + plane->cursor.base = base; } /* If no-part of the cursor is visible on the framebuffer, then the GPU may hang... */ -static void intel_crtc_update_cursor(struct drm_crtc *crtc, +static void intel_crtc_update_cursor(struct intel_plane *plane, const struct intel_plane_state *plane_state) { - struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - int pipe = intel_crtc->pipe; + struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + enum pipe pipe = plane->pipe; u32 pos = 0, base = 0; unsigned long irqflags; @@ -9318,9 +9309,7 @@ static void intel_crtc_update_cursor(struct drm_crtc *crtc, } pos |= y << CURSOR_Y_SHIFT; - base = intel_cursor_base(intel_crtc, plane_state); - } else { - intel_crtc->cursor_addr = 0; + base = intel_cursor_base(plane_state); } spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); @@ -9328,9 +9317,9 @@ static void intel_crtc_update_cursor(struct drm_crtc *crtc, I915_WRITE_FW(CURPOS(pipe), pos); if (IS_I845G(dev_priv) || IS_I865G(dev_priv)) - i845_update_cursor(crtc, base, plane_state); + i845_update_cursor(plane, base, plane_state); else - i9xx_update_cursor(crtc, base, plane_state); + i9xx_update_cursor(plane, base, plane_state); spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); } @@ -11892,7 +11881,7 @@ static void verify_wm_state(struct drm_crtc *crtc, * allocation. In that case since the ddb allocation will be updated * once the plane becomes visible, we can skip this check */ - if (intel_crtc->cursor_addr) { + if (1) { hw_plane_wm = &hw_wm.planes[PLANE_CURSOR]; sw_plane_wm = &sw_wm->planes[PLANE_CURSOR]; @@ -13734,7 +13723,7 @@ static void intel_disable_cursor_plane(struct intel_plane *plane, struct intel_crtc *crtc) { - intel_crtc_update_cursor(&crtc->base, NULL); + intel_crtc_update_cursor(plane, NULL); } static void @@ -13742,9 +13731,7 @@ intel_update_cursor_plane(struct intel_plane *plane, const struct intel_crtc_state *crtc_state, const struct intel_plane_state *state) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); - - intel_crtc_update_cursor(&crtc->base, state); + intel_crtc_update_cursor(plane, state); } static struct intel_plane * @@ -13778,6 +13765,10 @@ intel_cursor_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe) cursor->update_plane = intel_update_cursor_plane; cursor->disable_plane = intel_disable_cursor_plane; + cursor->cursor.base = ~0; + cursor->cursor.cntl = ~0; + cursor->cursor.size = ~0; + ret = drm_universal_plane_init(&dev_priv->drm, &cursor->base, 0, &intel_cursor_plane_funcs, intel_cursor_formats, @@ -13885,10 +13876,6 @@ static int intel_crtc_init(struct drm_i915_private *dev_priv, enum pipe pipe) intel_crtc->pipe = pipe; intel_crtc->plane = primary->plane; - intel_crtc->cursor_base = ~0; - intel_crtc->cursor_cntl = ~0; - intel_crtc->cursor_size = ~0; - /* initialize shared scalers */ intel_crtc_init_scalers(intel_crtc, crtc_state); diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index f922e04..cc13706 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -799,11 +799,6 @@ struct intel_crtc { int adjusted_x; int adjusted_y; - uint32_t cursor_addr; - uint32_t cursor_cntl; - uint32_t cursor_size; - uint32_t cursor_base; - struct intel_crtc_state *config; /* global reset count when the last flip was submitted */ @@ -845,6 +840,10 @@ struct intel_plane { int max_downscale; uint32_t frontbuffer_bit; + struct { + u32 base, cntl, size; + } cursor; + /* * NOTE: Do not place new plane state fields here (e.g., when adding * new plane properties). New runtime state should now be placed in -- cgit v1.1 From ed270223759c06525ecf18d5d4aade54c827e7ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 27 Mar 2017 21:55:36 +0300 Subject: drm/i915: Refactor CURPOS calculation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the CURPOS calculations to seprate function. This will allow sharing the code between the 845/865 vs. others codepaths when we otherwise split them apart. v2: Don't pass intel_plane as it's not needed Signed-off-by: Ville Syrjälä Reviewed-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170327185546.2977-6-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_display.c | 37 +++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index bbc2022..3c310d9 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -9157,6 +9157,27 @@ static u32 intel_cursor_base(const struct intel_plane_state *plane_state) return base; } +static u32 intel_cursor_position(const struct intel_plane_state *plane_state) +{ + int x = plane_state->base.crtc_x; + int y = plane_state->base.crtc_y; + u32 pos = 0; + + if (x < 0) { + pos |= CURSOR_POS_SIGN << CURSOR_X_SHIFT; + x = -x; + } + pos |= x << CURSOR_X_SHIFT; + + if (y < 0) { + pos |= CURSOR_POS_SIGN << CURSOR_Y_SHIFT; + y = -y; + } + pos |= y << CURSOR_Y_SHIFT; + + return pos; +} + static u32 i845_cursor_ctl(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { @@ -9294,22 +9315,8 @@ static void intel_crtc_update_cursor(struct intel_plane *plane, unsigned long irqflags; if (plane_state) { - int x = plane_state->base.crtc_x; - int y = plane_state->base.crtc_y; - - if (x < 0) { - pos |= CURSOR_POS_SIGN << CURSOR_X_SHIFT; - x = -x; - } - pos |= x << CURSOR_X_SHIFT; - - if (y < 0) { - pos |= CURSOR_POS_SIGN << CURSOR_Y_SHIFT; - y = -y; - } - pos |= y << CURSOR_Y_SHIFT; - base = intel_cursor_base(plane_state); + pos = intel_cursor_position(plane_state); } spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); -- cgit v1.1 From b2d03b0ddfc9310be1f35320597487157f829ede Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 27 Mar 2017 21:55:37 +0300 Subject: drm/i915: Move cursor position and base handling into the platform specific functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Supposedly on some platforms we can get extra atomicity guarantees for CURPOS if we write it between the CURCNTR and CURBASE. Let's move the CURPOS handling into the platform specific hooks to make the possible without having to pass the calculated CURPOS around. And while at it, do the same for the CURBASE to avoid passing that either. v2: Use I915_WRITE_FW() and grab uncore.lock Signed-off-by: Ville Syrjälä Reviewed-by: Chris Wilson #v1 Link: http://patchwork.freedesktop.org/patch/msgid/20170327185546.2977-7-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_display.c | 95 +++++++++++++++++++----------------- 1 file changed, 49 insertions(+), 46 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 3c310d9..2372b2a 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -9203,11 +9203,13 @@ static u32 i845_cursor_ctl(const struct intel_crtc_state *crtc_state, CURSOR_STRIDE(stride); } -static void i845_update_cursor(struct intel_plane *plane, u32 base, +static void i845_update_cursor(struct intel_plane *plane, + const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { struct drm_i915_private *dev_priv = to_i915(plane->base.dev); - uint32_t cntl = 0, size = 0; + u32 cntl = 0, base = 0, pos = 0, size = 0; + unsigned long irqflags; if (plane_state && plane_state->base.visible) { unsigned int width = plane_state->base.crtc_w; @@ -9215,8 +9217,13 @@ static void i845_update_cursor(struct intel_plane *plane, u32 base, cntl = plane_state->ctl; size = (height << 12) | width; + + base = intel_cursor_base(plane_state); + pos = intel_cursor_position(plane_state); } + spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); + if (plane->cursor.cntl != 0 && (plane->cursor.base != base || plane->cursor.size != size || @@ -9239,11 +9246,22 @@ static void i845_update_cursor(struct intel_plane *plane, u32 base, plane->cursor.size = size; } + if (cntl) + I915_WRITE_FW(CURPOS(PIPE_A), pos); + if (plane->cursor.cntl != cntl) { I915_WRITE_FW(CURCNTR(PIPE_A), cntl); POSTING_READ_FW(CURCNTR(PIPE_A)); plane->cursor.cntl = cntl; } + + spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); +} + +static void i845_disable_cursor(struct intel_plane *plane, + struct intel_crtc *crtc) +{ + i845_update_cursor(plane, NULL, NULL); } static u32 i9xx_cursor_ctl(const struct intel_crtc_state *crtc_state, @@ -9282,53 +9300,46 @@ static u32 i9xx_cursor_ctl(const struct intel_crtc_state *crtc_state, return cntl; } -static void i9xx_update_cursor(struct intel_plane *plane, u32 base, +static void i9xx_update_cursor(struct intel_plane *plane, + const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { struct drm_i915_private *dev_priv = to_i915(plane->base.dev); enum pipe pipe = plane->pipe; - uint32_t cntl = 0; + u32 cntl = 0, base = 0, pos = 0; + unsigned long irqflags; - if (plane_state && plane_state->base.visible) + if (plane_state && plane_state->base.visible) { cntl = plane_state->ctl; + base = intel_cursor_base(plane_state); + pos = intel_cursor_position(plane_state); + } + + spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); + if (plane->cursor.cntl != cntl) { I915_WRITE_FW(CURCNTR(pipe), cntl); POSTING_READ_FW(CURCNTR(pipe)); plane->cursor.cntl = cntl; } + if (cntl) + I915_WRITE_FW(CURPOS(pipe), pos); + /* and commit changes on next vblank */ I915_WRITE_FW(CURBASE(pipe), base); POSTING_READ_FW(CURBASE(pipe)); + spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); + plane->cursor.base = base; } -/* If no-part of the cursor is visible on the framebuffer, then the GPU may hang... */ -static void intel_crtc_update_cursor(struct intel_plane *plane, - const struct intel_plane_state *plane_state) +static void i9xx_disable_cursor(struct intel_plane *plane, + struct intel_crtc *crtc) { - struct drm_i915_private *dev_priv = to_i915(plane->base.dev); - enum pipe pipe = plane->pipe; - u32 pos = 0, base = 0; - unsigned long irqflags; - - if (plane_state) { - base = intel_cursor_base(plane_state); - pos = intel_cursor_position(plane_state); - } - - spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); - - I915_WRITE_FW(CURPOS(pipe), pos); - - if (IS_I845G(dev_priv) || IS_I865G(dev_priv)) - i845_update_cursor(plane, base, plane_state); - else - i9xx_update_cursor(plane, base, plane_state); - - spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); + i9xx_update_cursor(plane, NULL, NULL); } static bool cursor_size_ok(struct drm_i915_private *dev_priv, @@ -13726,23 +13737,9 @@ intel_check_cursor_plane(struct intel_plane *plane, return 0; } -static void -intel_disable_cursor_plane(struct intel_plane *plane, - struct intel_crtc *crtc) -{ - intel_crtc_update_cursor(plane, NULL); -} - -static void -intel_update_cursor_plane(struct intel_plane *plane, - const struct intel_crtc_state *crtc_state, - const struct intel_plane_state *state) -{ - intel_crtc_update_cursor(plane, state); -} - static struct intel_plane * -intel_cursor_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe) +intel_cursor_plane_create(struct drm_i915_private *dev_priv, + enum pipe pipe) { struct intel_plane *cursor = NULL; struct intel_plane_state *state = NULL; @@ -13769,8 +13766,14 @@ intel_cursor_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe) cursor->id = PLANE_CURSOR; cursor->frontbuffer_bit = INTEL_FRONTBUFFER_CURSOR(pipe); cursor->check_plane = intel_check_cursor_plane; - cursor->update_plane = intel_update_cursor_plane; - cursor->disable_plane = intel_disable_cursor_plane; + + if (IS_I845G(dev_priv) || IS_I865G(dev_priv)) { + cursor->update_plane = i845_update_cursor; + cursor->disable_plane = i845_disable_cursor; + } else { + cursor->update_plane = i9xx_update_cursor; + cursor->disable_plane = i9xx_disable_cursor; + } cursor->cursor.base = ~0; cursor->cursor.cntl = ~0; -- cgit v1.1 From 75343a44c901b681328b22ae50bcf5efdf6c988f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 27 Mar 2017 21:55:38 +0300 Subject: drm/i915: Drop useless posting reads from cursor commit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There should be no need to do posting reads between all the cursor register accessess. Let's just drop them. v2: Rebase due to I915_WRITE_FW() and uncore.lock Signed-off-by: Ville Syrjälä Reviewed-by: Chris Wilson #v1 Link: http://patchwork.freedesktop.org/patch/msgid/20170327185546.2977-8-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_display.c | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 2372b2a..cb20de0 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -9232,30 +9232,28 @@ static void i845_update_cursor(struct intel_plane *plane, * whilst the cursor is disabled. */ I915_WRITE_FW(CURCNTR(PIPE_A), 0); - POSTING_READ_FW(CURCNTR(PIPE_A)); plane->cursor.cntl = 0; } - if (plane->cursor.base != base) { + if (plane->cursor.base != base) I915_WRITE_FW(CURBASE(PIPE_A), base); - plane->cursor.base = base; - } - if (plane->cursor.size != size) { + if (plane->cursor.size != size) I915_WRITE_FW(CURSIZE, size); - plane->cursor.size = size; - } if (cntl) I915_WRITE_FW(CURPOS(PIPE_A), pos); - if (plane->cursor.cntl != cntl) { + if (plane->cursor.cntl != cntl) I915_WRITE_FW(CURCNTR(PIPE_A), cntl); - POSTING_READ_FW(CURCNTR(PIPE_A)); - plane->cursor.cntl = cntl; - } + + POSTING_READ_FW(CURCNTR(PIPE_A)); spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); + + plane->cursor.cntl = cntl; + plane->cursor.base = base; + plane->cursor.size = size; } static void i845_disable_cursor(struct intel_plane *plane, @@ -9318,21 +9316,21 @@ static void i9xx_update_cursor(struct intel_plane *plane, spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); - if (plane->cursor.cntl != cntl) { + if (plane->cursor.cntl != cntl) I915_WRITE_FW(CURCNTR(pipe), cntl); - POSTING_READ_FW(CURCNTR(pipe)); - plane->cursor.cntl = cntl; - } if (cntl) I915_WRITE_FW(CURPOS(pipe), pos); - /* and commit changes on next vblank */ - I915_WRITE_FW(CURBASE(pipe), base); + if (plane->cursor.cntl != cntl || + plane->cursor.base != base) + I915_WRITE_FW(CURBASE(pipe), base); + POSTING_READ_FW(CURBASE(pipe)); spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); + plane->cursor.cntl = cntl; plane->cursor.base = base; } -- cgit v1.1 From 659056f257e01fbc81e7d0887af7551d4f145130 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 27 Mar 2017 21:55:39 +0300 Subject: drm/i915: Split cursor check_plane into i845 and i9xx variants MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 845/865 and 830/855/9xx+ style cursor don't have that much in common with each other, so let's just split the .check_plane() hook into two variants as well. v2: Keep the common stuff in one place (Chris) v3: s/DRM_FORMAT_MOD_NONE/DRM_FORMAT_MOD_LINEAR/ Cc: Chris Wilson Signed-off-by: Ville Syrjälä Reviewed-by: Chris Wilson #v1 Link: http://patchwork.freedesktop.org/patch/msgid/20170327185546.2977-9-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_display.c | 275 ++++++++++++++++++++++------------- 1 file changed, 171 insertions(+), 104 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index cb20de0..ed016320 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -9178,6 +9178,31 @@ static u32 intel_cursor_position(const struct intel_plane_state *plane_state) return pos; } +static int intel_check_cursor(struct intel_crtc_state *crtc_state, + struct intel_plane_state *plane_state) +{ + const struct drm_framebuffer *fb = plane_state->base.fb; + int ret; + + ret = drm_plane_helper_check_state(&plane_state->base, + &plane_state->clip, + DRM_PLANE_HELPER_NO_SCALING, + DRM_PLANE_HELPER_NO_SCALING, + true, true); + if (ret) + return ret; + + if (!fb) + return 0; + + if (fb->modifier != DRM_FORMAT_MOD_LINEAR) { + DRM_DEBUG_KMS("cursor cannot be tiled\n"); + return -EINVAL; + } + + return 0; +} + static u32 i845_cursor_ctl(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { @@ -9203,6 +9228,68 @@ static u32 i845_cursor_ctl(const struct intel_crtc_state *crtc_state, CURSOR_STRIDE(stride); } +static bool i845_cursor_size_ok(const struct intel_plane_state *plane_state) +{ + struct drm_i915_private *dev_priv = + to_i915(plane_state->base.plane->dev); + int width = plane_state->base.crtc_w; + int height = plane_state->base.crtc_h; + + if (width == 0 || height == 0) + return false; + + /* + * 845g/865g are only limited by the width of their cursors, + * the height is arbitrary up to the precision of the register. + */ + if (!IS_ALIGNED(width, 64)) + return false; + + if (width > (IS_I845G(dev_priv) ? 64 : 512)) + return false; + + if (height > 1023) + return false; + + return true; +} + +static int i845_check_cursor(struct intel_plane *plane, + struct intel_crtc_state *crtc_state, + struct intel_plane_state *plane_state) +{ + const struct drm_framebuffer *fb = plane_state->base.fb; + const struct drm_i915_gem_object *obj = intel_fb_obj(fb); + unsigned int stride; + int ret; + + ret = intel_check_cursor(crtc_state, plane_state); + if (ret) + return ret; + + /* if we want to turn off the cursor ignore width and height */ + if (!obj) + return 0; + + /* Check for which cursor types we support */ + if (!i845_cursor_size_ok(plane_state)) { + DRM_DEBUG("Cursor dimension %dx%d not supported\n", + plane_state->base.crtc_w, + plane_state->base.crtc_h); + return -EINVAL; + } + + stride = roundup_pow_of_two(plane_state->base.crtc_w) * 4; + if (obj->base.size < stride * plane_state->base.crtc_h) { + DRM_DEBUG_KMS("buffer is too small\n"); + return -ENOMEM; + } + + plane_state->ctl = i845_cursor_ctl(crtc_state, plane_state); + + return 0; +} + static void i845_update_cursor(struct intel_plane *plane, const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) @@ -9298,6 +9385,88 @@ static u32 i9xx_cursor_ctl(const struct intel_crtc_state *crtc_state, return cntl; } +static bool i9xx_cursor_size_ok(const struct intel_plane_state *plane_state) +{ + struct drm_i915_private *dev_priv = + to_i915(plane_state->base.plane->dev); + int width = plane_state->base.crtc_w; + int height = plane_state->base.crtc_h; + + if (width == 0 || height == 0) + return false; + + /* + * Cursors are limited to a few power-of-two + * sizes, and they must be square. + */ + switch (width | height) { + case 256: + case 128: + if (IS_GEN2(dev_priv)) + return false; + case 64: + break; + default: + return false; + } + + return true; +} + +static int i9xx_check_cursor(struct intel_plane *plane, + struct intel_crtc_state *crtc_state, + struct intel_plane_state *plane_state) +{ + struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + const struct drm_framebuffer *fb = plane_state->base.fb; + const struct drm_i915_gem_object *obj = intel_fb_obj(fb); + enum pipe pipe = plane->pipe; + unsigned int stride; + int ret; + + ret = intel_check_cursor(crtc_state, plane_state); + if (ret) + return ret; + + /* if we want to turn off the cursor ignore width and height */ + if (!obj) + return 0; + + /* Check for which cursor types we support */ + if (!i9xx_cursor_size_ok(plane_state)) { + DRM_DEBUG("Cursor dimension %dx%d not supported\n", + plane_state->base.crtc_w, + plane_state->base.crtc_h); + return -EINVAL; + } + + stride = roundup_pow_of_two(plane_state->base.crtc_w) * 4; + if (obj->base.size < stride * plane_state->base.crtc_h) { + DRM_DEBUG_KMS("buffer is too small\n"); + return -ENOMEM; + } + + /* + * There's something wrong with the cursor on CHV pipe C. + * If it straddles the left edge of the screen then + * moving it away from the edge or disabling it often + * results in a pipe underrun, and often that can lead to + * dead pipe (constant underrun reported, and it scans + * out just a solid color). To recover from that, the + * display power well must be turned off and on again. + * Refuse the put the cursor into that compromised position. + */ + if (IS_CHERRYVIEW(dev_priv) && pipe == PIPE_C && + plane_state->base.visible && plane_state->base.crtc_x < 0) { + DRM_DEBUG_KMS("CHV cursor C not allowed to straddle the left screen edge\n"); + return -EINVAL; + } + + plane_state->ctl = i9xx_cursor_ctl(crtc_state, plane_state); + + return 0; +} + static void i9xx_update_cursor(struct intel_plane *plane, const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) @@ -9340,42 +9509,6 @@ static void i9xx_disable_cursor(struct intel_plane *plane, i9xx_update_cursor(plane, NULL, NULL); } -static bool cursor_size_ok(struct drm_i915_private *dev_priv, - uint32_t width, uint32_t height) -{ - if (width == 0 || height == 0) - return false; - - /* - * 845g/865g are special in that they are only limited by - * the width of their cursors, the height is arbitrary up to - * the precision of the register. Everything else requires - * square cursors, limited to a few power-of-two sizes. - */ - if (IS_I845G(dev_priv) || IS_I865G(dev_priv)) { - if ((width & 63) != 0) - return false; - - if (width > (IS_I845G(dev_priv) ? 64 : 512)) - return false; - - if (height > 1023) - return false; - } else { - switch (width | height) { - case 256: - case 128: - if (IS_GEN2(dev_priv)) - return false; - case 64: - break; - default: - return false; - } - } - - return true; -} /* VESA 640x480x72Hz mode to set on the pipe */ static struct drm_display_mode load_detect_mode = { @@ -13668,73 +13801,6 @@ fail: return ERR_PTR(ret); } -static int -intel_check_cursor_plane(struct intel_plane *plane, - struct intel_crtc_state *crtc_state, - struct intel_plane_state *state) -{ - struct drm_i915_private *dev_priv = to_i915(plane->base.dev); - const struct drm_framebuffer *fb = state->base.fb; - struct drm_i915_gem_object *obj = intel_fb_obj(fb); - enum pipe pipe = plane->pipe; - unsigned stride; - int ret; - - ret = drm_plane_helper_check_state(&state->base, - &state->clip, - DRM_PLANE_HELPER_NO_SCALING, - DRM_PLANE_HELPER_NO_SCALING, - true, true); - if (ret) - return ret; - - /* if we want to turn off the cursor ignore width and height */ - if (!obj) - return 0; - - /* Check for which cursor types we support */ - if (!cursor_size_ok(dev_priv, state->base.crtc_w, - state->base.crtc_h)) { - DRM_DEBUG("Cursor dimension %dx%d not supported\n", - state->base.crtc_w, state->base.crtc_h); - return -EINVAL; - } - - stride = roundup_pow_of_two(state->base.crtc_w) * 4; - if (obj->base.size < stride * state->base.crtc_h) { - DRM_DEBUG_KMS("buffer is too small\n"); - return -ENOMEM; - } - - if (fb->modifier != DRM_FORMAT_MOD_LINEAR) { - DRM_DEBUG_KMS("cursor cannot be tiled\n"); - return -EINVAL; - } - - /* - * There's something wrong with the cursor on CHV pipe C. - * If it straddles the left edge of the screen then - * moving it away from the edge or disabling it often - * results in a pipe underrun, and often that can lead to - * dead pipe (constant underrun reported, and it scans - * out just a solid color). To recover from that, the - * display power well must be turned off and on again. - * Refuse the put the cursor into that compromised position. - */ - if (IS_CHERRYVIEW(dev_priv) && pipe == PIPE_C && - state->base.visible && state->base.crtc_x < 0) { - DRM_DEBUG_KMS("CHV cursor C not allowed to straddle the left screen edge\n"); - return -EINVAL; - } - - if (IS_I845G(dev_priv) || IS_I865G(dev_priv)) - state->ctl = i845_cursor_ctl(crtc_state, state); - else - state->ctl = i9xx_cursor_ctl(crtc_state, state); - - return 0; -} - static struct intel_plane * intel_cursor_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe) @@ -13763,14 +13829,15 @@ intel_cursor_plane_create(struct drm_i915_private *dev_priv, cursor->plane = pipe; cursor->id = PLANE_CURSOR; cursor->frontbuffer_bit = INTEL_FRONTBUFFER_CURSOR(pipe); - cursor->check_plane = intel_check_cursor_plane; if (IS_I845G(dev_priv) || IS_I865G(dev_priv)) { cursor->update_plane = i845_update_cursor; cursor->disable_plane = i845_disable_cursor; + cursor->check_plane = i845_check_cursor; } else { cursor->update_plane = i9xx_update_cursor; cursor->disable_plane = i9xx_disable_cursor; + cursor->check_plane = i9xx_check_cursor; } cursor->cursor.base = ~0; -- cgit v1.1 From 3637ecf095c80f3d19947d8af41869289f71ef1c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 27 Mar 2017 21:55:40 +0300 Subject: drm/i915: Generalize cursor size checks a bit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We have the maximum cursor dimensions stored in the mode_config, so let's just consult that information instead of hardcoding the same information in multiple places. We still need to keep some per-platform checks as the limitations are quite diverse. Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170327185546.2977-10-ville.syrjala@linux.intel.com Reviewed-by: Imre Deak --- drivers/gpu/drm/i915/intel_display.c | 34 +++++++++++++--------------------- 1 file changed, 13 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index ed016320..de5f933 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -9178,6 +9178,17 @@ static u32 intel_cursor_position(const struct intel_plane_state *plane_state) return pos; } +static bool intel_cursor_size_ok(const struct intel_plane_state *plane_state) +{ + const struct drm_mode_config *config = + &plane_state->base.plane->dev->mode_config; + int width = plane_state->base.crtc_w; + int height = plane_state->base.crtc_h; + + return width > 0 && width <= config->cursor_width && + height > 0 && height <= config->cursor_height; +} + static int intel_check_cursor(struct intel_crtc_state *crtc_state, struct intel_plane_state *plane_state) { @@ -9230,28 +9241,13 @@ static u32 i845_cursor_ctl(const struct intel_crtc_state *crtc_state, static bool i845_cursor_size_ok(const struct intel_plane_state *plane_state) { - struct drm_i915_private *dev_priv = - to_i915(plane_state->base.plane->dev); int width = plane_state->base.crtc_w; - int height = plane_state->base.crtc_h; - - if (width == 0 || height == 0) - return false; /* * 845g/865g are only limited by the width of their cursors, * the height is arbitrary up to the precision of the register. */ - if (!IS_ALIGNED(width, 64)) - return false; - - if (width > (IS_I845G(dev_priv) ? 64 : 512)) - return false; - - if (height > 1023) - return false; - - return true; + return intel_cursor_size_ok(plane_state) && IS_ALIGNED(width, 64); } static int i845_check_cursor(struct intel_plane *plane, @@ -9387,12 +9383,10 @@ static u32 i9xx_cursor_ctl(const struct intel_crtc_state *crtc_state, static bool i9xx_cursor_size_ok(const struct intel_plane_state *plane_state) { - struct drm_i915_private *dev_priv = - to_i915(plane_state->base.plane->dev); int width = plane_state->base.crtc_w; int height = plane_state->base.crtc_h; - if (width == 0 || height == 0) + if (!intel_cursor_size_ok(plane_state)) return false; /* @@ -9402,8 +9396,6 @@ static bool i9xx_cursor_size_ok(const struct intel_plane_state *plane_state) switch (width | height) { case 256: case 128: - if (IS_GEN2(dev_priv)) - return false; case 64: break; default: -- cgit v1.1 From 1e1bb8710e6018e932d18f9064d815634e3179f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 27 Mar 2017 21:55:41 +0300 Subject: drm/i915: Use fb->pitches[0] in cursor code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The cursor code currently ignores fb->pitches[0] (except when creating the fb itself), and just uses the cursor_width*4 as the stride. Let's make sure fb->pitches[0] actually matches what we expect it to be. We can also relax the stride vs. cursor width relationship on 845/865 since the stride is programmed separately. The only constraint is that width*cpp doesn't exceed the stride, and that's already been checked by the core since it makes sure the entire plane fits within the fb. We can also drop the bo size check as that's already checked when we create the fb. That is the fb is guaranteed to fit within the bo. v2: Rebase due to i845_cursor_ctl() and i9xx_cursor_ctl() Signed-off-by: Ville Syrjälä Reviewed-by: Chris Wilson #v1 Link: http://patchwork.freedesktop.org/patch/msgid/20170327185546.2977-11-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_display.c | 48 ++++++++++++++---------------------- 1 file changed, 18 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index de5f933..1c99944 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -9217,26 +9217,12 @@ static int intel_check_cursor(struct intel_crtc_state *crtc_state, static u32 i845_cursor_ctl(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { - unsigned int width = plane_state->base.crtc_w; - unsigned int stride = roundup_pow_of_two(width) * 4; - - switch (stride) { - default: - WARN_ONCE(1, "Invalid cursor width/stride, width=%u, stride=%u\n", - width, stride); - stride = 256; - /* fallthrough */ - case 256: - case 512: - case 1024: - case 2048: - break; - } + const struct drm_framebuffer *fb = plane_state->base.fb; return CURSOR_ENABLE | CURSOR_GAMMA_ENABLE | CURSOR_FORMAT_ARGB | - CURSOR_STRIDE(stride); + CURSOR_STRIDE(fb->pitches[0]); } static bool i845_cursor_size_ok(const struct intel_plane_state *plane_state) @@ -9255,8 +9241,6 @@ static int i845_check_cursor(struct intel_plane *plane, struct intel_plane_state *plane_state) { const struct drm_framebuffer *fb = plane_state->base.fb; - const struct drm_i915_gem_object *obj = intel_fb_obj(fb); - unsigned int stride; int ret; ret = intel_check_cursor(crtc_state, plane_state); @@ -9264,7 +9248,7 @@ static int i845_check_cursor(struct intel_plane *plane, return ret; /* if we want to turn off the cursor ignore width and height */ - if (!obj) + if (!fb) return 0; /* Check for which cursor types we support */ @@ -9275,10 +9259,16 @@ static int i845_check_cursor(struct intel_plane *plane, return -EINVAL; } - stride = roundup_pow_of_two(plane_state->base.crtc_w) * 4; - if (obj->base.size < stride * plane_state->base.crtc_h) { - DRM_DEBUG_KMS("buffer is too small\n"); - return -ENOMEM; + switch (fb->pitches[0]) { + case 256: + case 512: + case 1024: + case 2048: + break; + default: + DRM_DEBUG_KMS("Invalid cursor stride (%u)\n", + fb->pitches[0]); + return -EINVAL; } plane_state->ctl = i845_cursor_ctl(crtc_state, plane_state); @@ -9411,9 +9401,7 @@ static int i9xx_check_cursor(struct intel_plane *plane, { struct drm_i915_private *dev_priv = to_i915(plane->base.dev); const struct drm_framebuffer *fb = plane_state->base.fb; - const struct drm_i915_gem_object *obj = intel_fb_obj(fb); enum pipe pipe = plane->pipe; - unsigned int stride; int ret; ret = intel_check_cursor(crtc_state, plane_state); @@ -9421,7 +9409,7 @@ static int i9xx_check_cursor(struct intel_plane *plane, return ret; /* if we want to turn off the cursor ignore width and height */ - if (!obj) + if (!fb) return 0; /* Check for which cursor types we support */ @@ -9432,10 +9420,10 @@ static int i9xx_check_cursor(struct intel_plane *plane, return -EINVAL; } - stride = roundup_pow_of_two(plane_state->base.crtc_w) * 4; - if (obj->base.size < stride * plane_state->base.crtc_h) { - DRM_DEBUG_KMS("buffer is too small\n"); - return -ENOMEM; + if (fb->pitches[0] != plane_state->base.crtc_w * fb->format->cpp[0]) { + DRM_DEBUG_KMS("Invalid cursor stride (%u) (cursor width %d)\n", + fb->pitches[0], plane_state->base.crtc_w); + return -EINVAL; } /* -- cgit v1.1 From 024faac7d59b97be439cc645ac998dbd184bfeb0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 27 Mar 2017 21:55:42 +0300 Subject: drm/i915: Support variable cursor height on ivb+ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit IVB introduced the CUR_FBC_CTL register which allows reducing the cursor height down to 8 lines from the otherwise square cursor dimensions. Implement support for it. CUR_FBC_CTL can't be used when the cursor is rotated. Commandeer the otherwise unused cursor->cursor.size to track the current value of CUR_FBC_CTL to optimize away redundant CUR_FBC_CTL writes, and to notice when we need to arm the update via CURBASE if just CUR_FBC_CTL changes. v2: Reverse the gen check to make it sane v3: Only enable CUR_FBC_CTL when cursor is enabled, adapt to earlier code changes which means we now actually turn off the cursor when we're supposed to unlike v2 v4: Add a comment about rotation vs. CUR_FBC_CTL, rebase due to 'dirty' (Chris) v5: Rebase to the atomic world Handle 180 degree rotation Add HAS_CUR_FBC() v6: Rebase v7: Rebase due to I915_WRITE_FW/uncore.lock s/size/fbc_ctl/ Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170327185546.2977-12-ville.syrjala@linux.intel.com Reviewed-by: Imre Deak --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_reg.h | 5 ++++- drivers/gpu/drm/i915/intel_display.c | 38 +++++++++++++++++++++++++++++------- 3 files changed, 36 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 74dffbe..ff3574a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2808,6 +2808,7 @@ intel_info(const struct drm_i915_private *dev_priv) #define HAS_FW_BLC(dev_priv) (INTEL_GEN(dev_priv) > 2) #define HAS_PIPE_CXSR(dev_priv) ((dev_priv)->info.has_pipe_cxsr) #define HAS_FBC(dev_priv) ((dev_priv)->info.has_fbc) +#define HAS_CUR_FBC(dev_priv) (!HAS_GMCH_DISPLAY(dev_priv) && INTEL_INFO(dev_priv)->gen >= 7) #define HAS_IPS(dev_priv) (IS_HSW_ULT(dev_priv) || IS_BROADWELL(dev_priv)) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 2b151dc..ee144ec 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -5449,7 +5449,9 @@ enum { #define CURSOR_POS_SIGN 0x8000 #define CURSOR_X_SHIFT 0 #define CURSOR_Y_SHIFT 16 -#define CURSIZE _MMIO(0x700a0) +#define CURSIZE _MMIO(0x700a0) /* 845/865 */ +#define _CUR_FBC_CTL_A 0x700a0 /* ivb+ */ +#define CUR_FBC_CTL_EN (1 << 31) #define _CURBCNTR 0x700c0 #define _CURBBASE 0x700c4 #define _CURBPOS 0x700c8 @@ -5465,6 +5467,7 @@ enum { #define CURCNTR(pipe) _CURSOR2(pipe, _CURACNTR) #define CURBASE(pipe) _CURSOR2(pipe, _CURABASE) #define CURPOS(pipe) _CURSOR2(pipe, _CURAPOS) +#define CUR_FBC_CTL(pipe) _CURSOR2(pipe, _CUR_FBC_CTL_A) #define CURSOR_A_OFFSET 0x70080 #define CURSOR_B_OFFSET 0x700c0 diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 1c99944..f0f38c1 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -9373,17 +9373,16 @@ static u32 i9xx_cursor_ctl(const struct intel_crtc_state *crtc_state, static bool i9xx_cursor_size_ok(const struct intel_plane_state *plane_state) { + struct drm_i915_private *dev_priv = + to_i915(plane_state->base.plane->dev); int width = plane_state->base.crtc_w; int height = plane_state->base.crtc_h; if (!intel_cursor_size_ok(plane_state)) return false; - /* - * Cursors are limited to a few power-of-two - * sizes, and they must be square. - */ - switch (width | height) { + /* Cursor width is limited to a few power-of-two sizes */ + switch (width) { case 256: case 128: case 64: @@ -9392,6 +9391,21 @@ static bool i9xx_cursor_size_ok(const struct intel_plane_state *plane_state) return false; } + /* + * IVB+ have CUR_FBC_CTL which allows an arbitrary cursor + * height from 8 lines up to the cursor width, when the + * cursor is not rotated. Everything else requires square + * cursors. + */ + if (HAS_CUR_FBC(dev_priv) && + plane_state->base.rotation & DRM_ROTATE_0) { + if (height < 8 || height > width) + return false; + } else { + if (height != width) + return false; + } + return true; } @@ -9453,12 +9467,15 @@ static void i9xx_update_cursor(struct intel_plane *plane, { struct drm_i915_private *dev_priv = to_i915(plane->base.dev); enum pipe pipe = plane->pipe; - u32 cntl = 0, base = 0, pos = 0; + u32 cntl = 0, base = 0, pos = 0, fbc_ctl = 0; unsigned long irqflags; if (plane_state && plane_state->base.visible) { cntl = plane_state->ctl; + if (plane_state->base.crtc_h != plane_state->base.crtc_w) + fbc_ctl = CUR_FBC_CTL_EN | (plane_state->base.crtc_h - 1); + base = intel_cursor_base(plane_state); pos = intel_cursor_position(plane_state); } @@ -9468,10 +9485,14 @@ static void i9xx_update_cursor(struct intel_plane *plane, if (plane->cursor.cntl != cntl) I915_WRITE_FW(CURCNTR(pipe), cntl); + if (plane->cursor.size != fbc_ctl) + I915_WRITE_FW(CUR_FBC_CTL(pipe), fbc_ctl); + if (cntl) I915_WRITE_FW(CURPOS(pipe), pos); if (plane->cursor.cntl != cntl || + plane->cursor.size != fbc_ctl || plane->cursor.base != base) I915_WRITE_FW(CURBASE(pipe), base); @@ -9481,6 +9502,7 @@ static void i9xx_update_cursor(struct intel_plane *plane, plane->cursor.cntl = cntl; plane->cursor.base = base; + plane->cursor.size = fbc_ctl; } static void i9xx_disable_cursor(struct intel_plane *plane, @@ -13822,7 +13844,9 @@ intel_cursor_plane_create(struct drm_i915_private *dev_priv, cursor->cursor.base = ~0; cursor->cursor.cntl = ~0; - cursor->cursor.size = ~0; + + if (IS_I845G(dev_priv) || IS_I865G(dev_priv) || HAS_CUR_FBC(dev_priv)) + cursor->cursor.size = ~0; ret = drm_universal_plane_init(&dev_priv->drm, &cursor->base, 0, &intel_cursor_plane_funcs, -- cgit v1.1 From fabac4840169217de95c8b6d64db2ddd27c192f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 27 Mar 2017 21:55:43 +0300 Subject: drm/i915: Fix gen3 physical cursor alignment requirements MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bspec tells us that gen3 platforms need 4KiB alignment for CURBASE rather than the 256 byte alignment required by i85x. Let's fix that and pull the code to determine the correct alignment to a helper function. Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170327185546.2977-13-ville.syrjala@linux.intel.com Reviewed-by: Imre Deak --- drivers/gpu/drm/i915/intel_display.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index f0f38c1..d275b72 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2084,6 +2084,16 @@ intel_fill_fb_ggtt_view(struct i915_ggtt_view *view, } } +static unsigned int intel_cursor_alignment(const struct drm_i915_private *dev_priv) +{ + if (IS_I830(dev_priv)) + return 16 * 1024; + else if (IS_I85X(dev_priv)) + return 256; + else + return 4 * 1024; +} + static unsigned int intel_linear_alignment(const struct drm_i915_private *dev_priv) { if (INTEL_INFO(dev_priv)->gen >= 9) @@ -13306,7 +13316,7 @@ intel_prepare_plane_fb(struct drm_plane *plane, if (obj) { if (plane->type == DRM_PLANE_TYPE_CURSOR && INTEL_INFO(dev_priv)->cursor_needs_physical) { - const int align = IS_I830(dev_priv) ? 16 * 1024 : 256; + const int align = intel_cursor_alignment(dev_priv); ret = i915_gem_object_attach_phys(obj, align); if (ret) { @@ -13619,7 +13629,7 @@ intel_legacy_cursor_update(struct drm_plane *plane, goto out_free; if (INTEL_INFO(dev_priv)->cursor_needs_physical) { - int align = IS_I830(dev_priv) ? 16 * 1024 : 256; + int align = intel_cursor_alignment(dev_priv); ret = i915_gem_object_attach_phys(intel_fb_obj(fb), align); if (ret) { -- cgit v1.1 From 1e7b4fd8944e013a803af9bd44403e90b645ef5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 27 Mar 2017 21:55:44 +0300 Subject: drm/i915: Handle fb offset and src coordinates for cursors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The cursor plane doesn't have any kind of source offset register, so the only form of panning possible is via a the base address register. The alignment required by CURBASE ranges from 32B to 16KiB depending on the platform. Let's make sure the user didn't ask for something we can't do. Obviously this is impossible to hit via the legacy cursor ioctl since the src offsets are always 0, but via the plane/atomic ioctls the user can ask for pretty much anything so we have to deal with this. Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170327185546.2977-14-ville.syrjala@linux.intel.com Reviewed-by: Imre Deak --- drivers/gpu/drm/i915/intel_display.c | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index d275b72..0313642 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2396,11 +2396,17 @@ u32 intel_compute_tile_offset(int *x, int *y, const struct intel_plane_state *state, int plane) { - const struct drm_i915_private *dev_priv = to_i915(state->base.plane->dev); + struct intel_plane *intel_plane = to_intel_plane(state->base.plane); + struct drm_i915_private *dev_priv = to_i915(intel_plane->base.dev); const struct drm_framebuffer *fb = state->base.fb; unsigned int rotation = state->base.rotation; int pitch = intel_fb_pitch(fb, plane, rotation); - u32 alignment = intel_surf_alignment(fb, plane); + u32 alignment; + + if (intel_plane->id == PLANE_CURSOR) + alignment = intel_cursor_alignment(dev_priv); + else + alignment = intel_surf_alignment(fb, plane); return _intel_compute_tile_offset(dev_priv, x, y, fb, plane, pitch, rotation, alignment); @@ -9158,6 +9164,8 @@ static u32 intel_cursor_base(const struct intel_plane_state *plane_state) else base = intel_plane_ggtt_offset(plane_state); + base += plane_state->main.offset; + /* ILK+ do this automagically */ if (HAS_GMCH_DISPLAY(dev_priv) && plane_state->base.rotation & DRM_ROTATE_180) @@ -9203,6 +9211,8 @@ static int intel_check_cursor(struct intel_crtc_state *crtc_state, struct intel_plane_state *plane_state) { const struct drm_framebuffer *fb = plane_state->base.fb; + int src_x, src_y; + u32 offset; int ret; ret = drm_plane_helper_check_state(&plane_state->base, @@ -9221,6 +9231,19 @@ static int intel_check_cursor(struct intel_crtc_state *crtc_state, return -EINVAL; } + src_x = plane_state->base.src_x >> 16; + src_y = plane_state->base.src_y >> 16; + + intel_add_fb_offsets(&src_x, &src_y, plane_state, 0); + offset = intel_compute_tile_offset(&src_x, &src_y, plane_state, 0); + + if (src_x != 0 || src_y != 0) { + DRM_DEBUG_KMS("Arbitrary cursor panning not supported\n"); + return -EINVAL; + } + + plane_state->main.offset = offset; + return 0; } -- cgit v1.1 From d9e1551ec1391dee9db8b1121007f616454e1ca7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 27 Mar 2017 21:55:45 +0300 Subject: drm/i915: Relax 845/865 CURBASE alignemnt requirement to 32 bytes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Supposedly 845/865 require only 32 byte alignment for CURBASE. Let's relax the checks to allow that instead of demanding 4KiB alignment. This will allow cursor panning in 8 pixel units. Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170327185546.2977-15-ville.syrjala@linux.intel.com Reviewed-by: Imre Deak --- drivers/gpu/drm/i915/intel_display.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 0313642..fa95170 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2090,6 +2090,8 @@ static unsigned int intel_cursor_alignment(const struct drm_i915_private *dev_pr return 16 * 1024; else if (IS_I85X(dev_priv)) return 256; + else if (IS_I845G(dev_priv) || IS_I865G(dev_priv)) + return 32; else return 4 * 1024; } -- cgit v1.1 From e11ffddba1f363f9713442500f779693b0d7cc19 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 27 Mar 2017 21:55:46 +0300 Subject: drm/i915: Simplify cursor register write sequence MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It looks like simply writing all the cursor register every single time might be slightly faster than checking to see of each of them need to be written. So if any other register apart from CURPOS needs to be written let's just write all the registers. CURPOS is left as a special case mainly for 845/865 where we have to disable the cursor to change many of the cursor parameters. This introduces a slight chance of the cursor flickering when things get updated (since we're not currently doing the vblank evade for cursor updates). If we write CURPOS alone then that obviously can't happen. And let's follow the same pattern in the i9xx code just for symmetry. I wasn't able to see a singificant performance difference between this and just writing all the registers unconditionally. Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170327185546.2977-16-ville.syrjala@linux.intel.com Reviewed-by: Imre Deak --- drivers/gpu/drm/i915/intel_display.c | 69 ++++++++++++++++++------------------ 1 file changed, 34 insertions(+), 35 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index fa95170..2256cf0 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -9332,36 +9332,28 @@ static void i845_update_cursor(struct intel_plane *plane, spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); - if (plane->cursor.cntl != 0 && - (plane->cursor.base != base || - plane->cursor.size != size || - plane->cursor.cntl != cntl)) { - /* On these chipsets we can only modify the base/size/stride - * whilst the cursor is disabled. - */ + /* On these chipsets we can only modify the base/size/stride + * whilst the cursor is disabled. + */ + if (plane->cursor.base != base || + plane->cursor.size != size || + plane->cursor.cntl != cntl) { I915_WRITE_FW(CURCNTR(PIPE_A), 0); - plane->cursor.cntl = 0; - } - - if (plane->cursor.base != base) I915_WRITE_FW(CURBASE(PIPE_A), base); - - if (plane->cursor.size != size) I915_WRITE_FW(CURSIZE, size); - - if (cntl) I915_WRITE_FW(CURPOS(PIPE_A), pos); - - if (plane->cursor.cntl != cntl) I915_WRITE_FW(CURCNTR(PIPE_A), cntl); + plane->cursor.base = base; + plane->cursor.size = size; + plane->cursor.cntl = cntl; + } else { + I915_WRITE_FW(CURPOS(PIPE_A), pos); + } + POSTING_READ_FW(CURCNTR(PIPE_A)); spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); - - plane->cursor.cntl = cntl; - plane->cursor.base = base; - plane->cursor.size = size; } static void i845_disable_cursor(struct intel_plane *plane, @@ -9517,27 +9509,34 @@ static void i9xx_update_cursor(struct intel_plane *plane, spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); - if (plane->cursor.cntl != cntl) + /* + * On some platforms writing CURCNTR first will also + * cause CURPOS to be armed by the CURBASE write. + * Without the CURCNTR write the CURPOS write would + * arm itself. + * + * CURCNTR and CUR_FBC_CTL are always + * armed by the CURBASE write only. + */ + if (plane->cursor.base != base || + plane->cursor.size != fbc_ctl || + plane->cursor.cntl != cntl) { I915_WRITE_FW(CURCNTR(pipe), cntl); - - if (plane->cursor.size != fbc_ctl) - I915_WRITE_FW(CUR_FBC_CTL(pipe), fbc_ctl); - - if (cntl) + if (HAS_CUR_FBC(dev_priv)) + I915_WRITE_FW(CUR_FBC_CTL(pipe), fbc_ctl); I915_WRITE_FW(CURPOS(pipe), pos); - - if (plane->cursor.cntl != cntl || - plane->cursor.size != fbc_ctl || - plane->cursor.base != base) I915_WRITE_FW(CURBASE(pipe), base); + plane->cursor.base = base; + plane->cursor.size = fbc_ctl; + plane->cursor.cntl = cntl; + } else { + I915_WRITE_FW(CURPOS(pipe), pos); + } + POSTING_READ_FW(CURBASE(pipe)); spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); - - plane->cursor.cntl = cntl; - plane->cursor.base = base; - plane->cursor.size = fbc_ctl; } static void i9xx_disable_cursor(struct intel_plane *plane, -- cgit v1.1 From ff26ffa8ee267dcbd27a5d81cb21d1c41bfdb44a Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 10 May 2017 17:19:32 +0200 Subject: drm/i915: Fix __intel_wait_for_register_fw to not sleep in atomic The unconditionally fallback to the blocking wait_for resulted in impressive fireworks at boot-up on my snb here. Make sure if we set the slow timeout to 0 that we never ever sleep. The tail of the callchain was intel_wait_for_register -> __intel_wait_for_register_fw -> usleep_range -> BOOM It blew up in intel_crt_detect load detection code on the ADPA_CRT_HOTPLUG_FORCE_TRIGGER in the ADPA register. v2: Shut up gcc. v3: Use uninitialized_var() (Chris). Fixes: 0564654340e2 ("drm/i915: Acquire uncore.lock over intel_uncore_wait_for_register()") Cc: Chris Wilson Cc: Michal Wajdeczko Cc: Joonas Lahtinen Cc: Tvrtko Ursulin Cc: Daniel Vetter Cc: Jani Nikula Reviewed-by: Chris Wilson Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/1494429572-15118-1-git-send-email-daniel.vetter@ffwll.ch --- drivers/gpu/drm/i915/intel_uncore.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 2c628df..08d7d08 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -1610,7 +1610,7 @@ int __intel_wait_for_register_fw(struct drm_i915_private *dev_priv, unsigned int slow_timeout_ms, u32 *out_value) { - u32 reg_value; + u32 uninitialized_var(reg_value); #define done (((reg_value = I915_READ_FW(reg)) & mask) == value) int ret; @@ -1621,7 +1621,7 @@ int __intel_wait_for_register_fw(struct drm_i915_private *dev_priv, ret = -ETIMEDOUT; if (fast_timeout_us && fast_timeout_us <= 20000) ret = _wait_for_atomic(done, fast_timeout_us, 0); - if (ret) + if (ret && slow_timeout_ms) ret = wait_for(done, slow_timeout_ms); if (out_value) -- cgit v1.1 From a8b9370fc79c1ec043194452a455b0b590be609a Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Wed, 10 May 2017 15:04:51 +0000 Subject: drm/i915/guc: Dump the GuC stage descriptor pool in debugfs We are missing pieces of information that could be useful for GuC debugging. v2: Reuse some code (Joonas) Cc: Daniele Ceraolo Spurio Reviewed-by: Joonas Lahtinen Signed-off-by: Oscar Mateo [Joonas: Removed extra newline and s/uint32_t/u32/ for checkpatch.pl] Signed-off-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1494428691-20672-1-git-send-email-oscar.mateo@intel.com --- drivers/gpu/drm/i915/i915_debugfs.c | 76 ++++++++++++++++++++++++++++++++++--- 1 file changed, 71 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 7830337..bd9abef 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2494,22 +2494,33 @@ static void i915_guc_client_info(struct seq_file *m, seq_printf(m, "\tTotal: %llu\n", tot); } -static int i915_guc_info(struct seq_file *m, void *data) +static bool check_guc_submission(struct seq_file *m) { struct drm_i915_private *dev_priv = node_to_i915(m->private); const struct intel_guc *guc = &dev_priv->guc; - struct intel_engine_cs *engine; - enum intel_engine_id id; - u64 total; if (!guc->execbuf_client) { seq_printf(m, "GuC submission %s\n", HAS_GUC_SCHED(dev_priv) ? "disabled" : "not supported"); - return 0; + return false; } + return true; +} + +static int i915_guc_info(struct seq_file *m, void *data) +{ + struct drm_i915_private *dev_priv = node_to_i915(m->private); + const struct intel_guc *guc = &dev_priv->guc; + struct intel_engine_cs *engine; + enum intel_engine_id id; + u64 total; + + if (!check_guc_submission(m)) + return 0; + seq_printf(m, "Doorbell map:\n"); seq_printf(m, "\t%*pb\n", GUC_NUM_DOORBELLS, guc->doorbell_bitmap); seq_printf(m, "Doorbell next cacheline: 0x%x\n\n", guc->db_cacheline); @@ -2540,6 +2551,60 @@ static int i915_guc_info(struct seq_file *m, void *data) return 0; } +static int i915_guc_stage_pool(struct seq_file *m, void *data) +{ + struct drm_i915_private *dev_priv = node_to_i915(m->private); + const struct intel_guc *guc = &dev_priv->guc; + struct guc_stage_desc *desc = guc->stage_desc_pool_vaddr; + struct i915_guc_client *client = guc->execbuf_client; + unsigned int tmp; + int index; + + if (!check_guc_submission(m)) + return 0; + + for (index = 0; index < GUC_MAX_STAGE_DESCRIPTORS; index++, desc++) { + struct intel_engine_cs *engine; + + if (!(desc->attribute & GUC_STAGE_DESC_ATTR_ACTIVE)) + continue; + + seq_printf(m, "GuC stage descriptor %u:\n", index); + seq_printf(m, "\tIndex: %u\n", desc->stage_id); + seq_printf(m, "\tAttribute: 0x%x\n", desc->attribute); + seq_printf(m, "\tPriority: %d\n", desc->priority); + seq_printf(m, "\tDoorbell id: %d\n", desc->db_id); + seq_printf(m, "\tEngines used: 0x%x\n", + desc->engines_used); + seq_printf(m, "\tDoorbell trigger phy: 0x%llx, cpu: 0x%llx, uK: 0x%x\n", + desc->db_trigger_phy, + desc->db_trigger_cpu, + desc->db_trigger_uk); + seq_printf(m, "\tProcess descriptor: 0x%x\n", + desc->process_desc); + seq_printf(m, "\tWorkqueue adddress: 0x%x, size: 0x%x\n", + desc->wq_addr, desc->wq_size); + seq_putc(m, '\n'); + + for_each_engine_masked(engine, dev_priv, client->engines, tmp) { + u32 guc_engine_id = engine->guc_id; + struct guc_execlist_context *lrc = + &desc->lrc[guc_engine_id]; + + seq_printf(m, "\t%s LRC:\n", engine->name); + seq_printf(m, "\t\tContext desc: 0x%x\n", + lrc->context_desc); + seq_printf(m, "\t\tContext id: 0x%x\n", lrc->context_id); + seq_printf(m, "\t\tLRCA: 0x%x\n", lrc->ring_lrca); + seq_printf(m, "\t\tRing begin: 0x%x\n", lrc->ring_begin); + seq_printf(m, "\t\tRing end: 0x%x\n", lrc->ring_end); + seq_putc(m, '\n'); + } + } + + return 0; +} + static int i915_guc_log_dump(struct seq_file *m, void *data) { struct drm_i915_private *dev_priv = node_to_i915(m->private); @@ -4746,6 +4811,7 @@ static const struct drm_info_list i915_debugfs_list[] = { {"i915_guc_info", i915_guc_info, 0}, {"i915_guc_load_status", i915_guc_load_status_info, 0}, {"i915_guc_log_dump", i915_guc_log_dump, 0}, + {"i915_guc_stage_pool", i915_guc_stage_pool, 0}, {"i915_huc_load_status", i915_huc_load_status_info, 0}, {"i915_frequency_info", i915_frequency_info, 0}, {"i915_hangcheck_info", i915_hangcheck_info, 0}, -- cgit v1.1 From 749d98b80b9e3743fa0297afb42441c551bf9c52 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Thu, 11 May 2017 10:28:43 +0200 Subject: drm/i915: Fix hw state verifier access to crtc->state. We shouldn't inspect crtc->state, instead grab the crtc state. At this point the hw state verifier should be able to run even if crtc->state has been updated (which cannot currently happen). Signed-off-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/20170511082844.13965-1-maarten.lankhorst@linux.intel.com Reviewed-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 2256cf0..849e854 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -5935,9 +5935,10 @@ void intel_encoder_destroy(struct drm_encoder *encoder) /* Cross check the actual hw state with our own modeset state tracking (and it's * internal consistency). */ -static void intel_connector_verify_state(struct intel_connector *connector) +static void intel_connector_verify_state(struct drm_crtc_state *crtc_state, + struct drm_connector_state *conn_state) { - struct drm_crtc *crtc = connector->base.state->crtc; + struct intel_connector *connector = to_intel_connector(conn_state->connector); DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n", connector->base.base.id, @@ -5945,15 +5946,14 @@ static void intel_connector_verify_state(struct intel_connector *connector) if (connector->get_hw_state(connector)) { struct intel_encoder *encoder = connector->encoder; - struct drm_connector_state *conn_state = connector->base.state; - I915_STATE_WARN(!crtc, + I915_STATE_WARN(!crtc_state, "connector enabled without attached crtc\n"); - if (!crtc) + if (!crtc_state) return; - I915_STATE_WARN(!crtc->state->active, + I915_STATE_WARN(!crtc_state->active, "connector is active, but attached crtc isn't\n"); if (!encoder || encoder->type == INTEL_OUTPUT_DP_MST) @@ -5965,9 +5965,9 @@ static void intel_connector_verify_state(struct intel_connector *connector) I915_STATE_WARN(conn_state->crtc != encoder->base.crtc, "attached encoder crtc differs from connector crtc\n"); } else { - I915_STATE_WARN(crtc && crtc->state->active, + I915_STATE_WARN(crtc_state && crtc_state->active, "attached crtc is active, but connector isn't\n"); - I915_STATE_WARN(!crtc && connector->base.state->best_encoder, + I915_STATE_WARN(!crtc_state && conn_state->best_encoder, "best encoder set without crtc!\n"); } } @@ -12122,11 +12122,15 @@ verify_connector_state(struct drm_device *dev, for_each_new_connector_in_state(state, connector, new_conn_state, i) { struct drm_encoder *encoder = connector->encoder; + struct drm_crtc_state *crtc_state = NULL; if (new_conn_state->crtc != crtc) continue; - intel_connector_verify_state(to_intel_connector(connector)); + if (crtc) + crtc_state = drm_atomic_get_new_crtc_state(state, new_conn_state->crtc); + + intel_connector_verify_state(crtc_state, new_conn_state); I915_STATE_WARN(new_conn_state->best_encoder != encoder, "connector's atomic encoder doesn't match legacy encoder\n"); @@ -12244,7 +12248,7 @@ verify_crtc_state(struct drm_crtc *crtc, intel_pipe_config_sanity_check(dev_priv, pipe_config); - sw_config = to_intel_crtc_state(crtc->state); + sw_config = to_intel_crtc_state(new_crtc_state); if (!intel_pipe_config_compare(dev_priv, sw_config, pipe_config, false)) { I915_STATE_WARN(1, "pipe state doesn't match!\n"); -- cgit v1.1 From eb8bc8dcc5354a1f85d4ea79b1f466dc6273f832 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Thu, 11 May 2017 10:28:44 +0200 Subject: drm/i915: Remove vma unpin in intel_plane_destroy commit a667fb402c1e856209bf9e77ba41fc1cf356b867 Author: Maarten Lankhorst Date: Thu Dec 15 15:29:44 2016 +0100 drm/i915: Disable all crtcs during driver unload, v2. made sure that all crtc's are disabled on driver unload, but only the following commit made sure all fb's are cleaned up correctly: commit 9b2104f423de5c148749a07e8197dbab4c449877 Author: Maarten Lankhorst Date: Tue Feb 21 14:51:40 2017 +0100 drm/atomic: Make disable_all helper fully disable the crtc. Finally remove this and add a WARN_ON when vma is set. It should have been removed by intel_cleanup_plane_fb(). Signed-off-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/20170511082844.13965-2-maarten.lankhorst@linux.intel.com Reviewed-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_atomic_plane.c | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.c b/drivers/gpu/drm/i915/intel_atomic_plane.c index 182dc2a..4a0ed02 100644 --- a/drivers/gpu/drm/i915/intel_atomic_plane.c +++ b/drivers/gpu/drm/i915/intel_atomic_plane.c @@ -102,23 +102,7 @@ void intel_plane_destroy_state(struct drm_plane *plane, struct drm_plane_state *state) { - struct i915_vma *vma; - - vma = fetch_and_zero(&to_intel_plane_state(state)->vma); - - /* - * FIXME: Normally intel_cleanup_plane_fb handles destruction of vma. - * We currently don't clear all planes during driver unload, so we have - * to be able to unpin vma here for now. - * - * Normally this can only happen during unload when kmscon is disabled - * and userspace doesn't attempt to set a framebuffer at all. - */ - if (vma) { - mutex_lock(&plane->dev->struct_mutex); - intel_unpin_fb_vma(vma); - mutex_unlock(&plane->dev->struct_mutex); - } + WARN_ON(to_intel_plane_state(state)->vma); drm_atomic_helper_plane_destroy_state(plane, state); } -- cgit v1.1 From a03aac442d6f4ca4525e526047d831efb77b436a Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 10 May 2017 12:59:26 +0000 Subject: drm/i915/guc: Move notification code into virtual function Prepare for alternate GuC notification mechanism. Signed-off-by: Michal Wajdeczko Cc: Joonas Lahtinen Cc: Daniele Ceraolo Spurio Reviewed-by: Daniele Ceraolo Spurio [Joonas: Added newlines] Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/intel_uc.c | 10 +++++++++- drivers/gpu/drm/i915/intel_uc.h | 9 +++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c index 7fd75ca..72f49e6 100644 --- a/drivers/gpu/drm/i915/intel_uc.c +++ b/drivers/gpu/drm/i915/intel_uc.c @@ -94,12 +94,20 @@ void intel_uc_sanitize_options(struct drm_i915_private *dev_priv) i915.enable_guc_submission = HAS_GUC_SCHED(dev_priv); } +static void guc_write_irq_trigger(struct intel_guc *guc) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + + I915_WRITE(GUC_SEND_INTERRUPT, GUC_SEND_TRIGGER); +} + void intel_uc_init_early(struct drm_i915_private *dev_priv) { struct intel_guc *guc = &dev_priv->guc; mutex_init(&guc->send_mutex); guc->send = intel_guc_send_nop; + guc->notify = guc_write_irq_trigger; } static void fetch_uc_fw(struct drm_i915_private *dev_priv, @@ -413,7 +421,7 @@ int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len) POSTING_READ(SOFT_SCRATCH(i - 1)); - I915_WRITE(GUC_SEND_INTERRUPT, GUC_SEND_TRIGGER); + intel_guc_notify(guc); /* * No GuC command should ever take longer than 10ms. diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h index 1e0eecd..2af5633 100644 --- a/drivers/gpu/drm/i915/intel_uc.h +++ b/drivers/gpu/drm/i915/intel_uc.h @@ -210,6 +210,9 @@ struct intel_guc { /* GuC's FW specific send function */ int (*send)(struct intel_guc *guc, const u32 *data, u32 len); + + /* GuC's FW specific notify function */ + void (*notify)(struct intel_guc *guc); }; struct intel_huc { @@ -229,11 +232,17 @@ void intel_uc_fini_hw(struct drm_i915_private *dev_priv); int intel_guc_sample_forcewake(struct intel_guc *guc); int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len); int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len); + static inline int intel_guc_send(struct intel_guc *guc, const u32 *action, u32 len) { return guc->send(guc, action, len); } +static inline void intel_guc_notify(struct intel_guc *guc) +{ + guc->notify(guc); +} + /* intel_guc_loader.c */ int intel_guc_select_fw(struct intel_guc *guc); int intel_guc_init_hw(struct intel_guc *guc); -- cgit v1.1 From a0c1fe219080d6b23270d5c7f7d773e7d753177a Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 10 May 2017 12:59:27 +0000 Subject: drm/i915/guc: Make scratch register base and count flexible We are using some scratch registers in MMIO based send function. Make their base and count flexible in preparation of upcoming GuC firmware/hardware changes. While around, change cmd len parameter verification from WARN_ON to GEM_BUG_ON as we don't need this all the time. v2: call out WARN/GEM_BUG change in the commit msg (Daniele) v3: don't overqualify the ints (Chris) v4: rebase and use proper enum Signed-off-by: Michal Wajdeczko Suggested-by: Daniele Ceraolo Spurio Cc: Daniele Ceraolo Spurio Cc: Joonas Lahtinen Cc: Chris Wilson Cc: Jani Nikula Reviewed-by: Daniele Ceraolo Spurio Reviewed-by: Joonas Lahtinen Acked-by: Jani Nikula Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/intel_uc.c | 41 ++++++++++++++++++++++++++++++++++------- drivers/gpu/drm/i915/intel_uc.h | 7 +++++++ 2 files changed, 41 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c index 72f49e6..07c5658 100644 --- a/drivers/gpu/drm/i915/intel_uc.c +++ b/drivers/gpu/drm/i915/intel_uc.c @@ -260,9 +260,36 @@ void intel_uc_fini_fw(struct drm_i915_private *dev_priv) __intel_uc_fw_fini(&dev_priv->huc.fw); } +static inline i915_reg_t guc_send_reg(struct intel_guc *guc, u32 i) +{ + GEM_BUG_ON(!guc->send_regs.base); + GEM_BUG_ON(!guc->send_regs.count); + GEM_BUG_ON(i >= guc->send_regs.count); + + return _MMIO(guc->send_regs.base + 4 * i); +} + +static void guc_init_send_regs(struct intel_guc *guc) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + enum forcewake_domains fw_domains = 0; + unsigned int i; + + guc->send_regs.base = i915_mmio_reg_offset(SOFT_SCRATCH(0)); + guc->send_regs.count = SOFT_SCRATCH_COUNT - 1; + + for (i = 0; i < guc->send_regs.count; i++) { + fw_domains |= intel_uncore_forcewake_for_reg(dev_priv, + guc_send_reg(guc, i), + FW_REG_READ | FW_REG_WRITE); + } + guc->send_regs.fw_domains = fw_domains; +} + static int guc_enable_communication(struct intel_guc *guc) { /* XXX: placeholder for alternate setup */ + guc_init_send_regs(guc); guc->send = intel_guc_send_mmio; return 0; } @@ -407,19 +434,19 @@ int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len) int i; int ret; - if (WARN_ON(len < 1 || len > 15)) - return -EINVAL; + GEM_BUG_ON(!len); + GEM_BUG_ON(len > guc->send_regs.count); mutex_lock(&guc->send_mutex); - intel_uncore_forcewake_get(dev_priv, FORCEWAKE_BLITTER); + intel_uncore_forcewake_get(dev_priv, guc->send_regs.fw_domains); dev_priv->guc.action_count += 1; dev_priv->guc.action_cmd = action[0]; for (i = 0; i < len; i++) - I915_WRITE(SOFT_SCRATCH(i), action[i]); + I915_WRITE(guc_send_reg(guc, i), action[i]); - POSTING_READ(SOFT_SCRATCH(i - 1)); + POSTING_READ(guc_send_reg(guc, i - 1)); intel_guc_notify(guc); @@ -428,7 +455,7 @@ int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len) * Fast commands should still complete in 10us. */ ret = __intel_wait_for_register_fw(dev_priv, - SOFT_SCRATCH(0), + guc_send_reg(guc, 0), INTEL_GUC_RECV_MASK, INTEL_GUC_RECV_MASK, 10, 10, &status); @@ -450,7 +477,7 @@ int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len) } dev_priv->guc.action_status = status; - intel_uncore_forcewake_put(dev_priv, FORCEWAKE_BLITTER); + intel_uncore_forcewake_put(dev_priv, guc->send_regs.fw_domains); mutex_unlock(&guc->send_mutex); return ret; diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h index 2af5633..7618b71 100644 --- a/drivers/gpu/drm/i915/intel_uc.h +++ b/drivers/gpu/drm/i915/intel_uc.h @@ -205,6 +205,13 @@ struct intel_guc { uint64_t submissions[I915_NUM_ENGINES]; uint32_t last_seqno[I915_NUM_ENGINES]; + /* GuC's FW specific registers used in MMIO send */ + struct { + u32 base; + unsigned int count; + enum forcewake_domains fw_domains; + } send_regs; + /* To serialize the intel_guc_send actions */ struct mutex send_mutex; -- cgit v1.1 From 73cc0b9aa9afa5ba65d92e46ded61d29430d72a4 Mon Sep 17 00:00:00 2001 From: Joonas Lahtinen Date: Wed, 10 May 2017 14:00:40 +0300 Subject: drm/i915: Do not sync RCU during shrinking Due to the complex dependencies between workqueues and RCU, which are not easily detected by lockdep, do not synchronize RCU during shrinking. On low-on-memory systems (mem=1G for example), the RCU sync leads to all system workqueus freezing and unrelated lockdep splats are displayed according to reports. GIT bisecting done by J. R. Okajima points to the commit where RCU syncing was extended. RCU sync gains us very little benefit in real life scenarios where the amount of memory used by object backing storage is dominant over the metadata under RCU, so drop it altogether. " Yeeeaah, if core could just, go ahead and reclaim RCU queues, that'd be great. " - Chris Wilson, 2016 (0eafec6d3244) v2: More information to commit message. v3: Remove "grep _rcu_" escapee from i915_gem_shrink_all (Andrea) Fixes: c053b5a506d3 ("drm/i915: Don't call synchronize_rcu_expedited under struct_mutex") Suggested-by: Chris Wilson Reported-by: J. R. Okajima Signed-off-by: Joonas Lahtinen Reviewed-by: Chris Wilson Tested-by: Hugh Dickins Tested-by: Andrea Arcangeli Cc: Chris Wilson Cc: Tvrtko Ursulin Cc: J. R. Okajima Cc: Andrea Arcangeli Cc: Hugh Dickins Cc: Jani Nikula Cc: # v4.11+ Link: http://patchwork.freedesktop.org/patch/msgid/1494414040-11160-1-git-send-email-joonas.lahtinen@linux.intel.com --- drivers/gpu/drm/i915/i915_gem_shrinker.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index 0e7352d..b409e67 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -59,9 +59,6 @@ static void shrinker_unlock(struct drm_i915_private *dev_priv, bool unlock) return; mutex_unlock(&dev_priv->drm.struct_mutex); - - /* expedite the RCU grace period to free some request slabs */ - synchronize_rcu_expedited(); } static bool any_vma_pinned(struct drm_i915_gem_object *obj) @@ -274,8 +271,6 @@ unsigned long i915_gem_shrink_all(struct drm_i915_private *dev_priv) I915_SHRINK_ACTIVE); intel_runtime_pm_put(dev_priv); - synchronize_rcu(); /* wait for our earlier RCU delayed slab frees */ - return freed; } -- cgit v1.1 From 0d402a24df8c8160727af934d83293f3d44d31a3 Mon Sep 17 00:00:00 2001 From: Chuanxiao Dong Date: Thu, 11 May 2017 18:07:42 +0800 Subject: drm/i915: set initialised only when init_context callback is NULL During execlist_context_deferred_alloc() we presumed that the context is uninitialised (we only just allocated the state object for it!) and chose to optimise away the later call to engine->init_context() if engine->init_context were NULL. This breaks with GVT's contexts that are marked as pre-initialised to avoid us annoyingly calling engine->init_context(). The fix is to not override ce->initialised if it is already true. Cc: Chris Wilson Signed-off-by: Chuanxiao Dong Link: http://patchwork.freedesktop.org/patch/msgid/1494497262-24855-1-git-send-email-chuanxiao.dong@intel.com Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/intel_lrc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 319d9a8..9a1192d 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1956,7 +1956,7 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, ce->ring = ring; ce->state = vma; - ce->initialised = engine->init_context == NULL; + ce->initialised |= engine->init_context == NULL; return 0; -- cgit v1.1 From 1f23475c893a85c934143cd64865ebb9b6af383f Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 12 May 2017 10:14:23 +0100 Subject: drm/i915: don't do allocate_va_range again on PIN_UPDATE If a vma is already bound to a ppgtt, we incorrectly call allocate_va_range again when doing a PIN_UPDATE, which will result in over accounting within our paging structures, such that when we do unbind something we don't actually destroy the structures and end up inadvertently recycling them. In reality this probably isn't too bad, but once we start touching PDEs and PDPEs for 64K/2M/1G pages this apparent recycling will manifest into lots of really, really subtle bugs. v2: Fix the testing of vma->flags for aliasing_ppgtt_bind_vma Fixes: ff685975d97f ("drm/i915: Move allocate_va_range to GTT") Signed-off-by: Matthew Auld Cc: Chris Wilson Cc: Joonas Lahtinen Reviewed-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170512091423.26085-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index ac2b8f6..fa51903 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -193,9 +193,12 @@ static int ppgtt_bind_vma(struct i915_vma *vma, u32 pte_flags; int ret; - ret = vma->vm->allocate_va_range(vma->vm, vma->node.start, vma->size); - if (ret) - return ret; + if (!(vma->flags & I915_VMA_LOCAL_BIND)) { + ret = vma->vm->allocate_va_range(vma->vm, vma->node.start, + vma->size); + if (ret) + return ret; + } vma->pages = vma->obj->mm.pages; @@ -2304,7 +2307,8 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma, if (flags & I915_VMA_LOCAL_BIND) { struct i915_hw_ppgtt *appgtt = i915->mm.aliasing_ppgtt; - if (appgtt->base.allocate_va_range) { + if (!(vma->flags & I915_VMA_LOCAL_BIND) && + appgtt->base.allocate_va_range) { ret = appgtt->base.allocate_va_range(&appgtt->base, vma->node.start, vma->node.size); -- cgit v1.1 From a644ca9bcdac3b9e9f0fd2726c9198e85cecfbee Mon Sep 17 00:00:00 2001 From: Puthikorn Voravootivat Date: Thu, 11 May 2017 16:02:17 -0700 Subject: drm/i915: Fix cap check for intel_dp_aux_backlight driver intel_dp_aux_backlight driver should check for the DP_EDP_BACKLIGHT_BRIGHTNESS_AUX_SET_CAP before enable the driver. Signed-off-by: Puthikorn Voravootivat Reviewed-by: Dhinakaran Pandiyan Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/20170511230225.142870-2-puthik@chromium.org --- drivers/gpu/drm/i915/intel_dp_aux_backlight.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/intel_dp_aux_backlight.c b/drivers/gpu/drm/i915/intel_dp_aux_backlight.c index 6532e22..341bf2c 100644 --- a/drivers/gpu/drm/i915/intel_dp_aux_backlight.c +++ b/drivers/gpu/drm/i915/intel_dp_aux_backlight.c @@ -144,6 +144,7 @@ intel_dp_aux_display_control_capable(struct intel_connector *connector) */ if (intel_dp->edp_dpcd[1] & DP_EDP_TCON_BACKLIGHT_ADJUSTMENT_CAP && (intel_dp->edp_dpcd[1] & DP_EDP_BACKLIGHT_AUX_ENABLE_CAP) && + (intel_dp->edp_dpcd[2] & DP_EDP_BACKLIGHT_BRIGHTNESS_AUX_SET_CAP) && !((intel_dp->edp_dpcd[1] & DP_EDP_BACKLIGHT_PIN_ENABLE_CAP) || (intel_dp->edp_dpcd[2] & DP_EDP_BACKLIGHT_BRIGHTNESS_PWM_PIN_CAP))) { DRM_DEBUG_KMS("AUX Backlight Control Supported!\n"); -- cgit v1.1 From 73ab484c90f5a696a6552c82f5d64b13303c0813 Mon Sep 17 00:00:00 2001 From: Puthikorn Voravootivat Date: Thu, 11 May 2017 16:02:18 -0700 Subject: drm/i915: Correctly enable backlight brightness adjustment via DPCD intel_dp_aux_enable_backlight() assumed that the register BACKLIGHT_BRIGHTNESS_CONTROL_MODE can only has value 01 (DP_EDP_BACKLIGHT_CONTROL_MODE_PRESET) when initialize. This patch fixed that by handling all cases of that register. Signed-off-by: Puthikorn Voravootivat Reviewed-by: Dhinakaran Pandiyan Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/20170511230225.142870-3-puthik@chromium.org --- drivers/gpu/drm/i915/intel_dp_aux_backlight.c | 33 ++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp_aux_backlight.c b/drivers/gpu/drm/i915/intel_dp_aux_backlight.c index 341bf2c..870c03f 100644 --- a/drivers/gpu/drm/i915/intel_dp_aux_backlight.c +++ b/drivers/gpu/drm/i915/intel_dp_aux_backlight.c @@ -97,15 +97,36 @@ static void intel_dp_aux_enable_backlight(struct intel_connector *connector) { struct intel_dp *intel_dp = enc_to_intel_dp(&connector->encoder->base); uint8_t dpcd_buf = 0; + uint8_t edp_backlight_mode = 0; set_aux_backlight_enable(intel_dp, true); - if ((drm_dp_dpcd_readb(&intel_dp->aux, - DP_EDP_BACKLIGHT_MODE_SET_REGISTER, &dpcd_buf) == 1) && - ((dpcd_buf & DP_EDP_BACKLIGHT_CONTROL_MODE_MASK) == - DP_EDP_BACKLIGHT_CONTROL_MODE_PRESET)) - drm_dp_dpcd_writeb(&intel_dp->aux, DP_EDP_BACKLIGHT_MODE_SET_REGISTER, - (dpcd_buf | DP_EDP_BACKLIGHT_CONTROL_MODE_DPCD)); + if (drm_dp_dpcd_readb(&intel_dp->aux, + DP_EDP_BACKLIGHT_MODE_SET_REGISTER, &dpcd_buf) != 1) { + DRM_DEBUG_KMS("Failed to read DPCD register 0x%x\n", + DP_EDP_BACKLIGHT_MODE_SET_REGISTER); + return; + } + + edp_backlight_mode = dpcd_buf & DP_EDP_BACKLIGHT_CONTROL_MODE_MASK; + + switch (edp_backlight_mode) { + case DP_EDP_BACKLIGHT_CONTROL_MODE_PWM: + case DP_EDP_BACKLIGHT_CONTROL_MODE_PRESET: + case DP_EDP_BACKLIGHT_CONTROL_MODE_PRODUCT: + dpcd_buf &= ~DP_EDP_BACKLIGHT_CONTROL_MODE_MASK; + dpcd_buf |= DP_EDP_BACKLIGHT_CONTROL_MODE_DPCD; + if (drm_dp_dpcd_writeb(&intel_dp->aux, + DP_EDP_BACKLIGHT_MODE_SET_REGISTER, dpcd_buf) < 0) { + DRM_DEBUG_KMS("Failed to write aux backlight mode\n"); + } + break; + + /* Do nothing when it is already DPCD mode */ + case DP_EDP_BACKLIGHT_CONTROL_MODE_DPCD: + default: + break; + } } static void intel_dp_aux_disable_backlight(struct intel_connector *connector) -- cgit v1.1 From e9c9e5ae8b57a6f29aecc303b36173e0be7fdd60 Mon Sep 17 00:00:00 2001 From: Puthikorn Voravootivat Date: Thu, 11 May 2017 16:02:21 -0700 Subject: drm/i915: Set backlight mode before enable backlight We should set backlight mode register before set register to enable the backlight. Signed-off-by: Puthikorn Voravootivat Reviewed-by: Dhinakaran Pandiyan Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/20170511230225.142870-6-puthik@chromium.org --- drivers/gpu/drm/i915/intel_dp_aux_backlight.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp_aux_backlight.c b/drivers/gpu/drm/i915/intel_dp_aux_backlight.c index 870c03f..5f945e0 100644 --- a/drivers/gpu/drm/i915/intel_dp_aux_backlight.c +++ b/drivers/gpu/drm/i915/intel_dp_aux_backlight.c @@ -99,8 +99,6 @@ static void intel_dp_aux_enable_backlight(struct intel_connector *connector) uint8_t dpcd_buf = 0; uint8_t edp_backlight_mode = 0; - set_aux_backlight_enable(intel_dp, true); - if (drm_dp_dpcd_readb(&intel_dp->aux, DP_EDP_BACKLIGHT_MODE_SET_REGISTER, &dpcd_buf) != 1) { DRM_DEBUG_KMS("Failed to read DPCD register 0x%x\n", @@ -127,6 +125,8 @@ static void intel_dp_aux_enable_backlight(struct intel_connector *connector) default: break; } + + set_aux_backlight_enable(intel_dp, true); } static void intel_dp_aux_disable_backlight(struct intel_connector *connector) -- cgit v1.1 From 2630addf6b1e7beb21a5e8ec04562cae2e5d9567 Mon Sep 17 00:00:00 2001 From: Puthikorn Voravootivat Date: Thu, 11 May 2017 16:02:23 -0700 Subject: drm/i915: Restore brightness level in aux backlight driver Some panel will default to zero brightness when turning the panel off and on again. This patch restores last brightness level back when panel is turning back on. Signed-off-by: Puthikorn Voravootivat Reviewed-by: Dhinakaran Pandiyan Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/20170511230225.142870-8-puthik@chromium.org --- drivers/gpu/drm/i915/intel_dp_aux_backlight.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/intel_dp_aux_backlight.c b/drivers/gpu/drm/i915/intel_dp_aux_backlight.c index 5f945e0..b87c5a3 100644 --- a/drivers/gpu/drm/i915/intel_dp_aux_backlight.c +++ b/drivers/gpu/drm/i915/intel_dp_aux_backlight.c @@ -127,6 +127,7 @@ static void intel_dp_aux_enable_backlight(struct intel_connector *connector) } set_aux_backlight_enable(intel_dp, true); + intel_dp_aux_set_backlight(connector, connector->panel.backlight.level); } static void intel_dp_aux_disable_backlight(struct intel_connector *connector) -- cgit v1.1 From e81b3a555f27cae5381ab148df3fa543e1b93ea2 Mon Sep 17 00:00:00 2001 From: Robert Bragg Date: Thu, 11 May 2017 16:43:24 +0100 Subject: drm/i915/perf: fix gen7_append_oa_reports comment If I'm going to complain about a back-to-front convention then the least I can do is not muddle the comment up too. Signed-off-by: Robert Bragg Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170511154345.962-2-lionel.g.landwerlin@intel.com Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_perf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index cdac685..6227e48 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -431,7 +431,7 @@ static int append_oa_sample(struct i915_perf_stream *stream, * userspace. * * Note: reports are consumed from the head, and appended to the - * tail, so the head chases the tail?... If you think that's mad + * tail, so the tail chases the head?... If you think that's mad * and back-to-front you're not alone, but this follows the * Gen PRM naming convention. * -- cgit v1.1 From 26ebd9c734d5b807055c333b29504b05fe971185 Mon Sep 17 00:00:00 2001 From: Robert Bragg Date: Thu, 11 May 2017 16:43:25 +0100 Subject: drm/i915/perf: avoid poll, read, EAGAIN busy loops If the function for checking whether there is OA buffer data available (during a poll or blocking read) has false positives then we want to avoid a situation where the subsequent read() returns EAGAIN (after a more accurate check) followed by a poll() immediately reporting the same false positive POLLIN event and effectively maintaining a busy loop until there really is data. This makes sure that we clear the .pollin event status whenever we return EAGAIN to userspace which will throttle subsequent POLLIN events and repeated attempts to read to the 5ms intervals of the hrtimer callback we have. Signed-off-by: Robert Bragg Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170511154345.962-3-lionel.g.landwerlin@intel.com Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_perf.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 6227e48..e115889 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1351,7 +1351,15 @@ static ssize_t i915_perf_read(struct file *file, mutex_unlock(&dev_priv->perf.lock); } - if (ret >= 0) { + /* We allow the poll checking to sometimes report false positive POLLIN + * events where we might actually report EAGAIN on read() if there's + * not really any data available. In this situation though we don't + * want to enter a busy loop between poll() reporting a POLLIN event + * and read() returning -EAGAIN. Clearing the oa.pollin state here + * effectively ensures we back off until the next hrtimer callback + * before reporting another POLLIN event. + */ + if (ret >= 0 || ret == -EAGAIN) { /* Maybe make ->pollin per-stream state if we support multiple * concurrent streams in the future. */ -- cgit v1.1 From f279020a02c187d352d40e0dc33d7439f0fc2cce Mon Sep 17 00:00:00 2001 From: Robert Bragg Date: Thu, 11 May 2017 16:43:26 +0100 Subject: drm/i915/perf: avoid read back of head register There's no need for the driver to keep reading back the head pointer from hardware since the hardware doesn't update it automatically. This way we can treat any invalid head pointer value as a software/driver bug instead of spurious hardware behaviour. This change is also a small stepping stone towards re-working how the head and tail state is managed as part of an improved workaround for the tail register race condition. Signed-off-by: Robert Bragg Reviewed-by: Matthew Auld Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170511154345.962-4-lionel.g.landwerlin@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 11 ++++++++++ drivers/gpu/drm/i915/i915_perf.c | 46 ++++++++++++++++++---------------------- 2 files changed, 32 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index ff3574a..080dcb0 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2372,6 +2372,17 @@ struct drm_i915_private { u8 *vaddr; int format; int format_size; + + /** + * Although we can always read back the head + * pointer register, we prefer to avoid + * trusting the HW state, just to avoid any + * risk that some hardware condition could + * somehow bump the head pointer unpredictably + * and cause us to forward the wrong OA buffer + * data to userspace. + */ + u32 head; } oa_buffer; u32 gen7_latched_oastatus1; diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index e115889..838ebc0 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -322,9 +322,8 @@ struct perf_open_properties { static bool gen7_oa_buffer_is_empty_fop_unlocked(struct drm_i915_private *dev_priv) { int report_size = dev_priv->perf.oa.oa_buffer.format_size; - u32 oastatus2 = I915_READ(GEN7_OASTATUS2); u32 oastatus1 = I915_READ(GEN7_OASTATUS1); - u32 head = oastatus2 & GEN7_OASTATUS2_HEAD_MASK; + u32 head = dev_priv->perf.oa.oa_buffer.head; u32 tail = oastatus1 & GEN7_OASTATUS1_TAIL_MASK; return OA_TAKEN(tail, head) < @@ -458,16 +457,24 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream, return -EIO; head = *head_ptr - gtt_offset; + + /* An out of bounds or misaligned head pointer implies a driver bug + * since we are in full control of head pointer which should only + * be incremented by multiples of the report size (notably also + * all a power of two). + */ + if (WARN_ONCE(head > OA_BUFFER_SIZE || head % report_size, + "Inconsistent OA buffer head pointer = %u\n", head)) + return -EIO; + tail -= gtt_offset; /* The OA unit is expected to wrap the tail pointer according to the OA - * buffer size and since we should never write a misaligned head - * pointer we don't expect to read one back either... + * buffer size */ - if (tail > OA_BUFFER_SIZE || head > OA_BUFFER_SIZE || - head % report_size) { - DRM_ERROR("Inconsistent OA buffer pointer (head = %u, tail = %u): force restart\n", - head, tail); + if (tail > OA_BUFFER_SIZE) { + DRM_ERROR("Inconsistent OA buffer tail pointer = %u: force restart\n", + tail); dev_priv->perf.oa.ops.oa_disable(dev_priv); dev_priv->perf.oa.ops.oa_enable(dev_priv); *head_ptr = I915_READ(GEN7_OASTATUS2) & @@ -562,8 +569,6 @@ static int gen7_oa_read(struct i915_perf_stream *stream, size_t *offset) { struct drm_i915_private *dev_priv = stream->dev_priv; - int report_size = dev_priv->perf.oa.oa_buffer.format_size; - u32 oastatus2; u32 oastatus1; u32 head; u32 tail; @@ -572,10 +577,9 @@ static int gen7_oa_read(struct i915_perf_stream *stream, if (WARN_ON(!dev_priv->perf.oa.oa_buffer.vaddr)) return -EIO; - oastatus2 = I915_READ(GEN7_OASTATUS2); oastatus1 = I915_READ(GEN7_OASTATUS1); - head = oastatus2 & GEN7_OASTATUS2_HEAD_MASK; + head = dev_priv->perf.oa.oa_buffer.head; tail = oastatus1 & GEN7_OASTATUS1_TAIL_MASK; /* XXX: On Haswell we don't have a safe way to clear oastatus1 @@ -616,10 +620,9 @@ static int gen7_oa_read(struct i915_perf_stream *stream, dev_priv->perf.oa.ops.oa_disable(dev_priv); dev_priv->perf.oa.ops.oa_enable(dev_priv); - oastatus2 = I915_READ(GEN7_OASTATUS2); oastatus1 = I915_READ(GEN7_OASTATUS1); - head = oastatus2 & GEN7_OASTATUS2_HEAD_MASK; + head = dev_priv->perf.oa.oa_buffer.head; tail = oastatus1 & GEN7_OASTATUS1_TAIL_MASK; } @@ -635,17 +638,6 @@ static int gen7_oa_read(struct i915_perf_stream *stream, ret = gen7_append_oa_reports(stream, buf, count, offset, &head, tail); - /* All the report sizes are a power of two and the - * head should always be incremented by some multiple - * of the report size. - * - * A warning here, but notably if we later read back a - * misaligned pointer we will treat that as a bug since - * it could lead to a buffer overrun. - */ - WARN_ONCE(head & (report_size - 1), - "i915: Writing misaligned OA head pointer"); - /* Note: we update the head pointer here even if an error * was returned since the error may represent a short read * where some some reports were successfully copied. @@ -653,6 +645,7 @@ static int gen7_oa_read(struct i915_perf_stream *stream, I915_WRITE(GEN7_OASTATUS2, ((head & GEN7_OASTATUS2_HEAD_MASK) | OA_MEM_SELECT_GGTT)); + dev_priv->perf.oa.oa_buffer.head = head; return ret; } @@ -833,7 +826,10 @@ static void gen7_init_oa_buffer(struct drm_i915_private *dev_priv) * before OASTATUS1, but after OASTATUS2 */ I915_WRITE(GEN7_OASTATUS2, gtt_offset | OA_MEM_SELECT_GGTT); /* head */ + dev_priv->perf.oa.oa_buffer.head = gtt_offset; + I915_WRITE(GEN7_OABUFFER, gtt_offset); + I915_WRITE(GEN7_OASTATUS1, gtt_offset | OABUFFER_SIZE_16M); /* tail */ /* On Haswell we have to track which OASTATUS1 flags we've -- cgit v1.1 From 3bb335c1e73a709c3c1ff0015faa87e60e03d17d Mon Sep 17 00:00:00 2001 From: Robert Bragg Date: Thu, 11 May 2017 16:43:27 +0100 Subject: drm/i915/perf: no head/tail ref in gen7_oa_read This avoids redundantly passing an (inout) head and tail pointer to gen7_append_oa_reports() from gen7_oa_read which doesn't need to reference either itself. Moving the head/tail reads and writes into gen7_append_oa_reports should have no functional effect except to avoid some redundant head pointer writes in cases where nothing was copied to userspace. This is a stepping stone towards updating how the head and tail pointer state is managed to improve the workaround for the OA unit's tail pointer race. It reduces the number of places we need to read/write the head and tail pointers. Signed-off-by: Robert Bragg Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170511154345.962-5-lionel.g.landwerlin@intel.com Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_perf.c | 50 +++++++++++++++------------------------- 1 file changed, 18 insertions(+), 32 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 838ebc0..29cad6b 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -420,8 +420,6 @@ static int append_oa_sample(struct i915_perf_stream *stream, * @buf: destination buffer given by userspace * @count: the number of bytes userspace wants to read * @offset: (inout): the current position for writing into @buf - * @head_ptr: (inout): the current oa buffer cpu read position - * @tail: the current oa buffer gpu write position * * Notably any error condition resulting in a short read (-%ENOSPC or * -%EFAULT) will be returned even though one or more records may @@ -439,9 +437,7 @@ static int append_oa_sample(struct i915_perf_stream *stream, static int gen7_append_oa_reports(struct i915_perf_stream *stream, char __user *buf, size_t count, - size_t *offset, - u32 *head_ptr, - u32 tail) + size_t *offset) { struct drm_i915_private *dev_priv = stream->dev_priv; int report_size = dev_priv->perf.oa.oa_buffer.format_size; @@ -449,14 +445,15 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream, int tail_margin = dev_priv->perf.oa.tail_margin; u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma); u32 mask = (OA_BUFFER_SIZE - 1); - u32 head; + size_t start_offset = *offset; + u32 head, oastatus1, tail; u32 taken; int ret = 0; if (WARN_ON(!stream->enabled)) return -EIO; - head = *head_ptr - gtt_offset; + head = dev_priv->perf.oa.oa_buffer.head - gtt_offset; /* An out of bounds or misaligned head pointer implies a driver bug * since we are in full control of head pointer which should only @@ -467,7 +464,8 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream, "Inconsistent OA buffer head pointer = %u\n", head)) return -EIO; - tail -= gtt_offset; + oastatus1 = I915_READ(GEN7_OASTATUS1); + tail = (oastatus1 & GEN7_OASTATUS1_TAIL_MASK) - gtt_offset; /* The OA unit is expected to wrap the tail pointer according to the OA * buffer size @@ -477,8 +475,6 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream, tail); dev_priv->perf.oa.ops.oa_disable(dev_priv); dev_priv->perf.oa.ops.oa_enable(dev_priv); - *head_ptr = I915_READ(GEN7_OASTATUS2) & - GEN7_OASTATUS2_HEAD_MASK; return -EIO; } @@ -542,7 +538,17 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream, report32[0] = 0; } - *head_ptr = gtt_offset + head; + if (start_offset != *offset) { + /* We removed the gtt_offset for the copy loop above, indexing + * relative to oa_buf_base so put back here... + */ + head += gtt_offset; + + I915_WRITE(GEN7_OASTATUS2, + ((head & GEN7_OASTATUS2_HEAD_MASK) | + OA_MEM_SELECT_GGTT)); + dev_priv->perf.oa.oa_buffer.head = head; + } return ret; } @@ -570,8 +576,6 @@ static int gen7_oa_read(struct i915_perf_stream *stream, { struct drm_i915_private *dev_priv = stream->dev_priv; u32 oastatus1; - u32 head; - u32 tail; int ret; if (WARN_ON(!dev_priv->perf.oa.oa_buffer.vaddr)) @@ -579,9 +583,6 @@ static int gen7_oa_read(struct i915_perf_stream *stream, oastatus1 = I915_READ(GEN7_OASTATUS1); - head = dev_priv->perf.oa.oa_buffer.head; - tail = oastatus1 & GEN7_OASTATUS1_TAIL_MASK; - /* XXX: On Haswell we don't have a safe way to clear oastatus1 * bits while the OA unit is enabled (while the tail pointer * may be updated asynchronously) so we ignore status bits @@ -621,9 +622,6 @@ static int gen7_oa_read(struct i915_perf_stream *stream, dev_priv->perf.oa.ops.oa_enable(dev_priv); oastatus1 = I915_READ(GEN7_OASTATUS1); - - head = dev_priv->perf.oa.oa_buffer.head; - tail = oastatus1 & GEN7_OASTATUS1_TAIL_MASK; } if (unlikely(oastatus1 & GEN7_OASTATUS1_REPORT_LOST)) { @@ -635,19 +633,7 @@ static int gen7_oa_read(struct i915_perf_stream *stream, GEN7_OASTATUS1_REPORT_LOST; } - ret = gen7_append_oa_reports(stream, buf, count, offset, - &head, tail); - - /* Note: we update the head pointer here even if an error - * was returned since the error may represent a short read - * where some some reports were successfully copied. - */ - I915_WRITE(GEN7_OASTATUS2, - ((head & GEN7_OASTATUS2_HEAD_MASK) | - OA_MEM_SELECT_GGTT)); - dev_priv->perf.oa.oa_buffer.head = head; - - return ret; + return gen7_append_oa_reports(stream, buf, count, offset); } /** -- cgit v1.1 From 0dd860cf73186e8bf0e90824ff7e0763bb70805e Mon Sep 17 00:00:00 2001 From: Robert Bragg Date: Thu, 11 May 2017 16:43:28 +0100 Subject: drm/i915/perf: improve tail race workaround There's a HW race condition between OA unit tail pointer register updates and writes to memory whereby the tail pointer can sometimes get ahead of what's been written out to the OA buffer so far (in terms of what's visible to the CPU). Although this can be observed explicitly while copying reports to userspace by checking for a zeroed report-id field in tail reports, we want to account for this earlier, as part of the _oa_buffer_check to avoid lots of redundant read() attempts. Previously the driver used to define an effective tail pointer that lagged the real pointer by a 'tail margin' measured in bytes derived from OA_TAIL_MARGIN_NSEC and the configured sampling frequency. Unfortunately this was flawed considering that the OA unit may also automatically generate non-periodic reports (such as on context switch) or the OA unit may be enabled without any periodic sampling. This improves how we define a tail pointer for reading that lags the real tail pointer by at least %OA_TAIL_MARGIN_NSEC nanoseconds, which gives enough time for the corresponding reports to become visible to the CPU. The driver now maintains two tail pointers: 1) An 'aging' tail with an associated timestamp that is tracked until we can trust the corresponding data is visible to the CPU; at which point it is considered 'aged'. 2) An 'aged' tail that can be used for read()ing. The two separate pointers let us decouple read()s from tail pointer aging. The tail pointers are checked and updated at a limited rate within a hrtimer callback (the same callback that is used for delivering POLLIN events) and since we're now measuring the wall clock time elapsed since a given tail pointer was read the mechanism no longer cares about the OA unit's periodic sampling frequency. The natural place to handle the tail pointer updates was in gen7_oa_buffer_is_empty() which is called as part of blocking reads and the hrtimer callback used for polling, and so this was renamed to oa_buffer_check() considering the added side effect while checking whether the buffer contains data. Signed-off-by: Robert Bragg Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170511154345.962-6-lionel.g.landwerlin@intel.com Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_drv.h | 60 ++++++++- drivers/gpu/drm/i915/i915_perf.c | 277 ++++++++++++++++++++++++++------------- 2 files changed, 241 insertions(+), 96 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 080dcb0..22b2ea3 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2000,7 +2000,7 @@ struct i915_oa_ops { size_t *offset); /** - * @oa_buffer_is_empty: Check if OA buffer empty (false positives OK) + * @oa_buffer_check: Check for OA buffer data + update tail * * This is either called via fops or the poll check hrtimer (atomic * ctx) without any locks taken. @@ -2013,7 +2013,7 @@ struct i915_oa_ops { * here, which will be handled gracefully - likely resulting in an * %EAGAIN error for userspace. */ - bool (*oa_buffer_is_empty)(struct drm_i915_private *dev_priv); + bool (*oa_buffer_check)(struct drm_i915_private *dev_priv); }; struct intel_cdclk_state { @@ -2356,9 +2356,6 @@ struct drm_i915_private { bool periodic; int period_exponent; - int timestamp_frequency; - - int tail_margin; int metrics_set; @@ -2374,6 +2371,59 @@ struct drm_i915_private { int format_size; /** + * Locks reads and writes to all head/tail state + * + * Consider: the head and tail pointer state + * needs to be read consistently from a hrtimer + * callback (atomic context) and read() fop + * (user context) with tail pointer updates + * happening in atomic context and head updates + * in user context and the (unlikely) + * possibility of read() errors needing to + * reset all head/tail state. + * + * Note: Contention or performance aren't + * currently a significant concern here + * considering the relatively low frequency of + * hrtimer callbacks (5ms period) and that + * reads typically only happen in response to a + * hrtimer event and likely complete before the + * next callback. + * + * Note: This lock is not held *while* reading + * and copying data to userspace so the value + * of head observed in htrimer callbacks won't + * represent any partial consumption of data. + */ + spinlock_t ptr_lock; + + /** + * One 'aging' tail pointer and one 'aged' + * tail pointer ready to used for reading. + * + * Initial values of 0xffffffff are invalid + * and imply that an update is required + * (and should be ignored by an attempted + * read) + */ + struct { + u32 offset; + } tails[2]; + + /** + * Index for the aged tail ready to read() + * data up to. + */ + unsigned int aged_tail_idx; + + /** + * A monotonic timestamp for when the current + * aging tail pointer was read; used to + * determine when it is old enough to trust. + */ + u64 aging_timestamp; + + /** * Although we can always read back the head * pointer register, we prefer to avoid * trusting the HW state, just to avoid any diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 29cad6b..cc6a17d 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -205,25 +205,49 @@ #define OA_TAKEN(tail, head) ((tail - head) & (OA_BUFFER_SIZE - 1)) -/* There's a HW race condition between OA unit tail pointer register updates and +/** + * DOC: OA Tail Pointer Race + * + * There's a HW race condition between OA unit tail pointer register updates and * writes to memory whereby the tail pointer can sometimes get ahead of what's - * been written out to the OA buffer so far. + * been written out to the OA buffer so far (in terms of what's visible to the + * CPU). + * + * Although this can be observed explicitly while copying reports to userspace + * by checking for a zeroed report-id field in tail reports, we want to account + * for this earlier, as part of the _oa_buffer_check to avoid lots of redundant + * read() attempts. + * + * In effect we define a tail pointer for reading that lags the real tail + * pointer by at least %OA_TAIL_MARGIN_NSEC nanoseconds, which gives enough + * time for the corresponding reports to become visible to the CPU. + * + * To manage this we actually track two tail pointers: + * 1) An 'aging' tail with an associated timestamp that is tracked until we + * can trust the corresponding data is visible to the CPU; at which point + * it is considered 'aged'. + * 2) An 'aged' tail that can be used for read()ing. * - * Although this can be observed explicitly by checking for a zeroed report-id - * field in tail reports, it seems preferable to account for this earlier e.g. - * as part of the _oa_buffer_is_empty checks to minimize -EAGAIN polling cycles - * in this situation. + * The two separate pointers let us decouple read()s from tail pointer aging. * - * To give time for the most recent reports to land before they may be copied to - * userspace, the driver operates as if the tail pointer effectively lags behind - * the HW tail pointer by 'tail_margin' bytes. The margin in bytes is calculated - * based on this constant in nanoseconds, the current OA sampling exponent - * and current report size. + * The tail pointers are checked and updated at a limited rate within a hrtimer + * callback (the same callback that is used for delivering POLLIN events) * - * There is also a fallback check while reading to simply skip over reports with - * a zeroed report-id. + * Initially the tails are marked invalid with %INVALID_TAIL_PTR which + * indicates that an updated tail pointer is needed. + * + * Most of the implementation details for this workaround are in + * gen7_oa_buffer_check_unlocked() and gen7_appand_oa_reports() + * + * Note for posterity: previously the driver used to define an effective tail + * pointer that lagged the real pointer by a 'tail margin' measured in bytes + * derived from %OA_TAIL_MARGIN_NSEC and the configured sampling frequency. + * This was flawed considering that the OA unit may also automatically generate + * non-periodic reports (such as on context switch) or the OA unit may be + * enabled without any periodic sampling. */ #define OA_TAIL_MARGIN_NSEC 100000ULL +#define INVALID_TAIL_PTR 0xffffffff /* frequency for checking whether the OA unit has written new reports to the * circular OA buffer... @@ -308,26 +332,116 @@ struct perf_open_properties { int oa_period_exponent; }; -/* NB: This is either called via fops or the poll check hrtimer (atomic ctx) +/** + * gen7_oa_buffer_check_unlocked - check for data and update tail ptr state + * @dev_priv: i915 device instance * - * It's safe to read OA config state here unlocked, assuming that this is only - * called while the stream is enabled, while the global OA configuration can't - * be modified. + * This is either called via fops (for blocking reads in user ctx) or the poll + * check hrtimer (atomic ctx) to check the OA buffer tail pointer and check + * if there is data available for userspace to read. * - * Note: we don't lock around the head/tail reads even though there's the slim - * possibility of read() fop errors forcing a re-init of the OA buffer - * pointers. A race here could result in a false positive !empty status which - * is acceptable. + * This function is central to providing a workaround for the OA unit tail + * pointer having a race with respect to what data is visible to the CPU. + * It is responsible for reading tail pointers from the hardware and giving + * the pointers time to 'age' before they are made available for reading. + * (See description of OA_TAIL_MARGIN_NSEC above for further details.) + * + * Besides returning true when there is data available to read() this function + * also has the side effect of updating the oa_buffer.tails[], .aging_timestamp + * and .aged_tail_idx state used for reading. + * + * Note: It's safe to read OA config state here unlocked, assuming that this is + * only called while the stream is enabled, while the global OA configuration + * can't be modified. + * + * Returns: %true if the OA buffer contains data, else %false */ -static bool gen7_oa_buffer_is_empty_fop_unlocked(struct drm_i915_private *dev_priv) +static bool gen7_oa_buffer_check_unlocked(struct drm_i915_private *dev_priv) { int report_size = dev_priv->perf.oa.oa_buffer.format_size; - u32 oastatus1 = I915_READ(GEN7_OASTATUS1); - u32 head = dev_priv->perf.oa.oa_buffer.head; - u32 tail = oastatus1 & GEN7_OASTATUS1_TAIL_MASK; + unsigned long flags; + unsigned int aged_idx; + u32 oastatus1; + u32 head, hw_tail, aged_tail, aging_tail; + u64 now; + + /* We have to consider the (unlikely) possibility that read() errors + * could result in an OA buffer reset which might reset the head, + * tails[] and aged_tail state. + */ + spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); + + /* NB: The head we observe here might effectively be a little out of + * date (between head and tails[aged_idx].offset if there is currently + * a read() in progress. + */ + head = dev_priv->perf.oa.oa_buffer.head; + + aged_idx = dev_priv->perf.oa.oa_buffer.aged_tail_idx; + aged_tail = dev_priv->perf.oa.oa_buffer.tails[aged_idx].offset; + aging_tail = dev_priv->perf.oa.oa_buffer.tails[!aged_idx].offset; + + oastatus1 = I915_READ(GEN7_OASTATUS1); + hw_tail = oastatus1 & GEN7_OASTATUS1_TAIL_MASK; + + /* The tail pointer increases in 64 byte increments, + * not in report_size steps... + */ + hw_tail &= ~(report_size - 1); + + now = ktime_get_mono_fast_ns(); + + /* Update the aging tail + * + * We throttle aging tail updates until we have a new tail that + * represents >= one report more data than is already available for + * reading. This ensures there will be enough data for a successful + * read once this new pointer has aged and ensures we will give the new + * pointer time to age. + */ + if (aging_tail == INVALID_TAIL_PTR && + (aged_tail == INVALID_TAIL_PTR || + OA_TAKEN(hw_tail, aged_tail) >= report_size)) { + struct i915_vma *vma = dev_priv->perf.oa.oa_buffer.vma; + u32 gtt_offset = i915_ggtt_offset(vma); + + /* Be paranoid and do a bounds check on the pointer read back + * from hardware, just in case some spurious hardware condition + * could put the tail out of bounds... + */ + if (hw_tail >= gtt_offset && + hw_tail < (gtt_offset + OA_BUFFER_SIZE)) { + dev_priv->perf.oa.oa_buffer.tails[!aged_idx].offset = + aging_tail = hw_tail; + dev_priv->perf.oa.oa_buffer.aging_timestamp = now; + } else { + DRM_ERROR("Ignoring spurious out of range OA buffer tail pointer = %u\n", + hw_tail); + } + } + + /* Update the aged tail + * + * Flip the tail pointer available for read()s once the aging tail is + * old enough to trust that the corresponding data will be visible to + * the CPU... + */ + if (aging_tail != INVALID_TAIL_PTR && + ((now - dev_priv->perf.oa.oa_buffer.aging_timestamp) > + OA_TAIL_MARGIN_NSEC)) { + aged_idx ^= 1; + dev_priv->perf.oa.oa_buffer.aged_tail_idx = aged_idx; + + aged_tail = aging_tail; - return OA_TAKEN(tail, head) < - dev_priv->perf.oa.tail_margin + report_size; + /* Mark that we need a new pointer to start aging... */ + dev_priv->perf.oa.oa_buffer.tails[!aged_idx].offset = INVALID_TAIL_PTR; + } + + spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); + + return aged_tail == INVALID_TAIL_PTR ? + false : OA_TAKEN(aged_tail, head) >= report_size; } /** @@ -442,58 +556,50 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream, struct drm_i915_private *dev_priv = stream->dev_priv; int report_size = dev_priv->perf.oa.oa_buffer.format_size; u8 *oa_buf_base = dev_priv->perf.oa.oa_buffer.vaddr; - int tail_margin = dev_priv->perf.oa.tail_margin; u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma); u32 mask = (OA_BUFFER_SIZE - 1); size_t start_offset = *offset; - u32 head, oastatus1, tail; + unsigned long flags; + unsigned int aged_tail_idx; + u32 head, tail; u32 taken; int ret = 0; if (WARN_ON(!stream->enabled)) return -EIO; - head = dev_priv->perf.oa.oa_buffer.head - gtt_offset; + spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); - /* An out of bounds or misaligned head pointer implies a driver bug - * since we are in full control of head pointer which should only - * be incremented by multiples of the report size (notably also - * all a power of two). - */ - if (WARN_ONCE(head > OA_BUFFER_SIZE || head % report_size, - "Inconsistent OA buffer head pointer = %u\n", head)) - return -EIO; + head = dev_priv->perf.oa.oa_buffer.head; + aged_tail_idx = dev_priv->perf.oa.oa_buffer.aged_tail_idx; + tail = dev_priv->perf.oa.oa_buffer.tails[aged_tail_idx].offset; - oastatus1 = I915_READ(GEN7_OASTATUS1); - tail = (oastatus1 & GEN7_OASTATUS1_TAIL_MASK) - gtt_offset; + spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); - /* The OA unit is expected to wrap the tail pointer according to the OA - * buffer size + /* An invalid tail pointer here means we're still waiting for the poll + * hrtimer callback to give us a pointer */ - if (tail > OA_BUFFER_SIZE) { - DRM_ERROR("Inconsistent OA buffer tail pointer = %u: force restart\n", - tail); - dev_priv->perf.oa.ops.oa_disable(dev_priv); - dev_priv->perf.oa.ops.oa_enable(dev_priv); - return -EIO; - } - + if (tail == INVALID_TAIL_PTR) + return -EAGAIN; - /* The tail pointer increases in 64 byte increments, not in report_size - * steps... + /* NB: oa_buffer.head/tail include the gtt_offset which we don't want + * while indexing relative to oa_buf_base. */ - tail &= ~(report_size - 1); + head -= gtt_offset; + tail -= gtt_offset; - /* Move the tail pointer back by the current tail_margin to account for - * the possibility that the latest reports may not have really landed - * in memory yet... + /* An out of bounds or misaligned head or tail pointer implies a driver + * bug since we validate + align the tail pointers we read from the + * hardware and we are in full control of the head pointer which should + * only be incremented by multiples of the report size (notably also + * all a power of two). */ + if (WARN_ONCE(head > OA_BUFFER_SIZE || head % report_size || + tail > OA_BUFFER_SIZE || tail % report_size, + "Inconsistent OA buffer pointers: head = %u, tail = %u\n", + head, tail)) + return -EIO; - if (OA_TAKEN(tail, head) < report_size + tail_margin) - return -EAGAIN; - - tail -= tail_margin; - tail &= mask; for (/* none */; (taken = OA_TAKEN(tail, head)); @@ -539,6 +645,8 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream, } if (start_offset != *offset) { + spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); + /* We removed the gtt_offset for the copy loop above, indexing * relative to oa_buf_base so put back here... */ @@ -548,6 +656,8 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream, ((head & GEN7_OASTATUS2_HEAD_MASK) | OA_MEM_SELECT_GGTT)); dev_priv->perf.oa.oa_buffer.head = head; + + spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); } return ret; @@ -658,14 +768,8 @@ static int i915_oa_wait_unlocked(struct i915_perf_stream *stream) if (!dev_priv->perf.oa.periodic) return -EIO; - /* Note: the oa_buffer_is_empty() condition is ok to run unlocked as it - * just performs mmio reads of the OA buffer head + tail pointers and - * it's assumed we're handling some operation that implies the stream - * can't be destroyed until completion (such as a read()) that ensures - * the device + OA buffer can't disappear - */ return wait_event_interruptible(dev_priv->perf.oa.poll_wq, - !dev_priv->perf.oa.ops.oa_buffer_is_empty(dev_priv)); + dev_priv->perf.oa.ops.oa_buffer_check(dev_priv)); } /** @@ -807,6 +911,9 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream) static void gen7_init_oa_buffer(struct drm_i915_private *dev_priv) { u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma); + unsigned long flags; + + spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); /* Pre-DevBDW: OABUFFER must be set with counters off, * before OASTATUS1, but after OASTATUS2 @@ -818,6 +925,12 @@ static void gen7_init_oa_buffer(struct drm_i915_private *dev_priv) I915_WRITE(GEN7_OASTATUS1, gtt_offset | OABUFFER_SIZE_16M); /* tail */ + /* Mark that we need updated tail pointers to read from... */ + dev_priv->perf.oa.oa_buffer.tails[0].offset = INVALID_TAIL_PTR; + dev_priv->perf.oa.oa_buffer.tails[1].offset = INVALID_TAIL_PTR; + + spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); + /* On Haswell we have to track which OASTATUS1 flags we've * already seen since they can't be cleared while periodic * sampling is enabled. @@ -1075,12 +1188,6 @@ static void i915_oa_stream_disable(struct i915_perf_stream *stream) hrtimer_cancel(&dev_priv->perf.oa.poll_check_timer); } -static u64 oa_exponent_to_ns(struct drm_i915_private *dev_priv, int exponent) -{ - return div_u64(1000000000ULL * (2ULL << exponent), - dev_priv->perf.oa.timestamp_frequency); -} - static const struct i915_perf_stream_ops i915_oa_stream_ops = { .destroy = i915_oa_stream_destroy, .enable = i915_oa_stream_enable, @@ -1171,20 +1278,9 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, dev_priv->perf.oa.metrics_set = props->metrics_set; dev_priv->perf.oa.periodic = props->oa_periodic; - if (dev_priv->perf.oa.periodic) { - u32 tail; - + if (dev_priv->perf.oa.periodic) dev_priv->perf.oa.period_exponent = props->oa_period_exponent; - /* See comment for OA_TAIL_MARGIN_NSEC for details - * about this tail_margin... - */ - tail = div64_u64(OA_TAIL_MARGIN_NSEC, - oa_exponent_to_ns(dev_priv, - props->oa_period_exponent)); - dev_priv->perf.oa.tail_margin = (tail + 1) * format_size; - } - if (stream->ctx) { ret = oa_get_render_ctx_id(stream); if (ret) @@ -1357,7 +1453,7 @@ static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer) container_of(hrtimer, typeof(*dev_priv), perf.oa.poll_check_timer); - if (!dev_priv->perf.oa.ops.oa_buffer_is_empty(dev_priv)) { + if (dev_priv->perf.oa.ops.oa_buffer_check(dev_priv)) { dev_priv->perf.oa.pollin = true; wake_up(&dev_priv->perf.oa.poll_wq); } @@ -2052,6 +2148,7 @@ void i915_perf_init(struct drm_i915_private *dev_priv) INIT_LIST_HEAD(&dev_priv->perf.streams); mutex_init(&dev_priv->perf.lock); spin_lock_init(&dev_priv->perf.hook_lock); + spin_lock_init(&dev_priv->perf.oa.oa_buffer.ptr_lock); dev_priv->perf.oa.ops.init_oa_buffer = gen7_init_oa_buffer; dev_priv->perf.oa.ops.enable_metric_set = hsw_enable_metric_set; @@ -2059,10 +2156,8 @@ void i915_perf_init(struct drm_i915_private *dev_priv) dev_priv->perf.oa.ops.oa_enable = gen7_oa_enable; dev_priv->perf.oa.ops.oa_disable = gen7_oa_disable; dev_priv->perf.oa.ops.read = gen7_oa_read; - dev_priv->perf.oa.ops.oa_buffer_is_empty = - gen7_oa_buffer_is_empty_fop_unlocked; - - dev_priv->perf.oa.timestamp_frequency = 12500000; + dev_priv->perf.oa.ops.oa_buffer_check = + gen7_oa_buffer_check_unlocked; dev_priv->perf.oa.oa_formats = hsw_oa_formats; -- cgit v1.1 From 52c57c263f14f90ebf43705d1c9cc4d53229fd7c Mon Sep 17 00:00:00 2001 From: Robert Bragg Date: Thu, 11 May 2017 16:43:29 +0100 Subject: drm/i915/perf: improve invalid OA format debug message A minor improvement to debugging output Signed-off-by: Robert Bragg Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170511154345.962-7-lionel.g.landwerlin@intel.com Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_perf.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index cc6a17d..957b959 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1902,11 +1902,13 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv, break; case DRM_I915_PERF_PROP_OA_FORMAT: if (value == 0 || value >= I915_OA_FORMAT_MAX) { - DRM_DEBUG("Invalid OA report format\n"); + DRM_DEBUG("Out-of-range OA report format %llu\n", + value); return -EINVAL; } if (!dev_priv->perf.oa.oa_formats[value].size) { - DRM_DEBUG("Invalid OA report format\n"); + DRM_DEBUG("Unsupported OA report format %llu\n", + value); return -EINVAL; } props->oa_format = value; -- cgit v1.1 From 4117ebc74cb3ca25966488b8da6157a299f02e92 Mon Sep 17 00:00:00 2001 From: Robert Bragg Date: Thu, 11 May 2017 16:43:30 +0100 Subject: drm/i915/perf: better pipeline aged/aging tail updates This updates the tail pointer race workaround handling to updating the 'aged' pointer before looking to start aging a new one. There's the possibility that there is already new data available and so we can immediately start aging a new pointer without having to first wait for a later hrtimer callback (and then another to age). Signed-off-by: Robert Bragg Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170511154345.962-8-lionel.g.landwerlin@intel.com Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_perf.c | 41 ++++++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 957b959..4d31b70 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -391,6 +391,29 @@ static bool gen7_oa_buffer_check_unlocked(struct drm_i915_private *dev_priv) now = ktime_get_mono_fast_ns(); + /* Update the aged tail + * + * Flip the tail pointer available for read()s once the aging tail is + * old enough to trust that the corresponding data will be visible to + * the CPU... + * + * Do this before updating the aging pointer in case we may be able to + * immediately start aging a new pointer too (if new data has become + * available) without needing to wait for a later hrtimer callback. + */ + if (aging_tail != INVALID_TAIL_PTR && + ((now - dev_priv->perf.oa.oa_buffer.aging_timestamp) > + OA_TAIL_MARGIN_NSEC)) { + aged_idx ^= 1; + dev_priv->perf.oa.oa_buffer.aged_tail_idx = aged_idx; + + aged_tail = aging_tail; + + /* Mark that we need a new pointer to start aging... */ + dev_priv->perf.oa.oa_buffer.tails[!aged_idx].offset = INVALID_TAIL_PTR; + aging_tail = INVALID_TAIL_PTR; + } + /* Update the aging tail * * We throttle aging tail updates until we have a new tail that @@ -420,24 +443,6 @@ static bool gen7_oa_buffer_check_unlocked(struct drm_i915_private *dev_priv) } } - /* Update the aged tail - * - * Flip the tail pointer available for read()s once the aging tail is - * old enough to trust that the corresponding data will be visible to - * the CPU... - */ - if (aging_tail != INVALID_TAIL_PTR && - ((now - dev_priv->perf.oa.oa_buffer.aging_timestamp) > - OA_TAIL_MARGIN_NSEC)) { - aged_idx ^= 1; - dev_priv->perf.oa.oa_buffer.aged_tail_idx = aged_idx; - - aged_tail = aging_tail; - - /* Mark that we need a new pointer to start aging... */ - dev_priv->perf.oa.oa_buffer.tails[!aged_idx].offset = INVALID_TAIL_PTR; - } - spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); return aged_tail == INVALID_TAIL_PTR ? -- cgit v1.1 From 712122eaa105cfa865b08020a3e47fca628e9c44 Mon Sep 17 00:00:00 2001 From: Robert Bragg Date: Thu, 11 May 2017 16:43:31 +0100 Subject: drm/i915/perf: rate limit spurious oa report notice This change is pre-emptively aiming to avoid a potential cause of kernel logging noise in case some condition were to result in us seeing invalid OA reports. The workaround for the OA unit's tail pointer race condition is what avoids the primary known cause of invalid reports being seen and with that in place we aren't expecting to see this notice but it can't be entirely ruled out. Just in case some condition does lead to the notice then it's likely that it will be triggered repeatedly while attempting to append a sequence of reports and depending on the configured OA sampling frequency that might be a large number of repeat notices. v2: (Chris) avoid inconsistent warning on throttle with printk_ratelimit() v3: (Matt) init and summarise with stream init/close not driver init/fini Signed-off-by: Robert Bragg Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170511154345.962-9-lionel.g.landwerlin@intel.com Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_drv.h | 6 ++++++ drivers/gpu/drm/i915/i915_perf.c | 28 +++++++++++++++++++++++++++- 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 22b2ea3..66dee15 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2354,6 +2354,12 @@ struct drm_i915_private { wait_queue_head_t poll_wq; bool pollin; + /** + * For rate limiting any notifications of spurious + * invalid OA reports + */ + struct ratelimit_state spurious_report_rs; + bool periodic; int period_exponent; diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 4d31b70..85269bc 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -632,7 +632,8 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream, * copying it to userspace... */ if (report32[0] == 0) { - DRM_NOTE("Skipping spurious, invalid OA report\n"); + if (__ratelimit(&dev_priv->perf.oa.spurious_report_rs)) + DRM_NOTE("Skipping spurious, invalid OA report\n"); continue; } @@ -911,6 +912,11 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream) oa_put_render_ctx_id(stream); dev_priv->perf.oa.exclusive_stream = NULL; + + if (dev_priv->perf.oa.spurious_report_rs.missed) { + DRM_NOTE("%d spurious OA report notices suppressed due to ratelimiting\n", + dev_priv->perf.oa.spurious_report_rs.missed); + } } static void gen7_init_oa_buffer(struct drm_i915_private *dev_priv) @@ -1266,6 +1272,26 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, return -EINVAL; } + /* We set up some ratelimit state to potentially throttle any _NOTES + * about spurious, invalid OA reports which we don't forward to + * userspace. + * + * The initialization is associated with opening the stream (not driver + * init) considering we print a _NOTE about any throttling when closing + * the stream instead of waiting until driver _fini which no one would + * ever see. + * + * Using the same limiting factors as printk_ratelimit() + */ + ratelimit_state_init(&dev_priv->perf.oa.spurious_report_rs, + 5 * HZ, 10); + /* Since we use a DRM_NOTE for spurious reports it would be + * inconsistent to let __ratelimit() automatically print a warning for + * throttling. + */ + ratelimit_set_flags(&dev_priv->perf.oa.spurious_report_rs, + RATELIMIT_MSG_ON_RELEASE); + stream->sample_size = sizeof(struct drm_i915_perf_record_header); format_size = dev_priv->perf.oa.oa_formats[props->oa_format].size; -- cgit v1.1 From 2388cd9c5056b10c8e461ca2ab075f49d6f1b25d Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 15 May 2017 09:11:48 +0200 Subject: drm/i915: Update DRIVER_DATE to 20170515 Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 66dee15..a6f2047 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -80,8 +80,8 @@ #define DRIVER_NAME "i915" #define DRIVER_DESC "Intel Graphics" -#define DRIVER_DATE "20170502" -#define DRIVER_TIMESTAMP 1493710187 +#define DRIVER_DATE "20170515" +#define DRIVER_TIMESTAMP 1494832308 /* Use I915_STATE_WARN(x) and I915_STATE_WARN_ON() (rather than WARN() and * WARN_ON()) for hw state sanity checks to check for unexpected conditions -- cgit v1.1 From 9081d0805638aa7d4f9cbc05c881217d3eaf30e8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 13 May 2017 10:41:54 +0100 Subject: drm/i915: Fixup 64bit divides in timelines selftest Some 64b divides snuck in when doing the prng timing compensation. Fixes: 4797948071f6 ("drm/i915: Squash repeated awaits on the same fence") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170513094154.3581-1-chris@chris-wilson.co.uk Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/selftests/i915_gem_timeline.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_timeline.c b/drivers/gpu/drm/i915/selftests/i915_gem_timeline.c index 6df00cc..7a44dab 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_timeline.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_timeline.c @@ -126,7 +126,6 @@ static unsigned int random_engine(struct rnd_state *rnd) static int bench_sync(void *arg) { -#define M (1 << 20) struct rnd_state prng; struct intel_timeline *tl; unsigned long end_time, count; @@ -158,7 +157,7 @@ static int bench_sync(void *arg) kt = ktime_sub(ktime_get(), kt); pr_debug("%s: %lu random evaluations, %lluns/prng\n", __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); - prng32_1M = ktime_to_ns(kt) * M / count; + prng32_1M = div64_ul(ktime_to_ns(kt) << 20, count); /* Benchmark (only) setting random context ids */ prandom_seed_state(&prng, i915_selftest.random_seed); @@ -172,7 +171,7 @@ static int bench_sync(void *arg) count++; } while (!time_after(jiffies, end_time)); kt = ktime_sub(ktime_get(), kt); - kt = ktime_sub_ns(kt, count * prng32_1M * 2 / M); + kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20); pr_info("%s: %lu random insertions, %lluns/insert\n", __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); @@ -190,7 +189,7 @@ static int bench_sync(void *arg) } } kt = ktime_sub(ktime_get(), kt); - kt = ktime_sub_ns(kt, count * prng32_1M * 2 / M); + kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20); pr_info("%s: %lu random lookups, %lluns/lookup\n", __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); @@ -248,7 +247,7 @@ static int bench_sync(void *arg) count++; } while (!time_after(jiffies, end_time)); kt = ktime_sub(ktime_get(), kt); - kt = ktime_sub_ns(kt, count * prng32_1M * 2 / M); + kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20); pr_info("%s: %lu repeated insert/lookups, %lluns/op\n", __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); mock_timeline_destroy(tl); @@ -287,7 +286,6 @@ static int bench_sync(void *arg) } return 0; -#undef M } int i915_gem_timeline_mock_selftests(void) -- cgit v1.1 From 1fdd783e9ce3a60c99f4da0d844d17b7c3ef8aef Mon Sep 17 00:00:00 2001 From: Madhav Chauhan Date: Tue, 9 May 2017 18:59:24 +0530 Subject: drm/i915/glk: Calculate high/low switch count for GLK As per BSPEC, high/low switch count to be programmed in terms of byteclock using exit_zero_count and prep_count. For Geminilake exit/prep counts are already calculated in terms of byteclock. This patch calculates high/low switch count using counts value in byteclock, old calculation leads to screen flicker/shift issue while resuming from S3/S4. Signed-off-by: Madhav Chauhan Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1494336565-19185-1-git-send-email-madhav.chauhan@intel.com --- drivers/gpu/drm/i915/intel_dsi_vbt.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dsi_vbt.c b/drivers/gpu/drm/i915/intel_dsi_vbt.c index 0dce779..7158c7c 100644 --- a/drivers/gpu/drm/i915/intel_dsi_vbt.c +++ b/drivers/gpu/drm/i915/intel_dsi_vbt.c @@ -694,8 +694,8 @@ bool intel_dsi_vbt_init(struct intel_dsi *intel_dsi, u16 panel_id) clk_zero_cnt << 8 | prepare_cnt; /* - * LP to HS switch count = 4TLPX + PREP_COUNT * 2 + EXIT_ZERO_COUNT * 2 - * + 10UI + Extra Byte Count + * LP to HS switch count = 4TLPX + PREP_COUNT * mul + EXIT_ZERO_COUNT * + * mul + 10UI + Extra Byte Count * * HS to LP switch count = THS-TRAIL + 2TLPX + Extra Byte Count * Extra Byte Count is calculated according to number of lanes. @@ -708,8 +708,8 @@ bool intel_dsi_vbt_init(struct intel_dsi *intel_dsi, u16 panel_id) /* B044 */ /* FIXME: * The comment above does not match with the code */ - lp_to_hs_switch = DIV_ROUND_UP(4 * tlpx_ui + prepare_cnt * 2 + - exit_zero_cnt * 2 + 10, 8); + lp_to_hs_switch = DIV_ROUND_UP(4 * tlpx_ui + prepare_cnt * mul + + exit_zero_cnt * mul + 10, 8); hs_to_lp_switch = DIV_ROUND_UP(mipi_config->ths_trail + 2 * tlpx_ui, 8); -- cgit v1.1 From 9a09485d41dbba0f475774587f030a95573032e5 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 16 May 2017 10:22:35 +0100 Subject: drm/i915/guc:fix spelling mistake: "adddress" -> "address" Trivial fix to spelling mistake in seq_printf message. Fixes: a8b9370fc79c1 ("drm/i915/guc: Dump the GuC stage descriptor pool in debugfs") Signed-off-by: Colin Ian King Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20170516092235.28640-1-colin.king@canonical.com --- drivers/gpu/drm/i915/i915_debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index bd9abef..76abff1 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2582,7 +2582,7 @@ static int i915_guc_stage_pool(struct seq_file *m, void *data) desc->db_trigger_uk); seq_printf(m, "\tProcess descriptor: 0x%x\n", desc->process_desc); - seq_printf(m, "\tWorkqueue adddress: 0x%x, size: 0x%x\n", + seq_printf(m, "\tWorkqueue address: 0x%x, size: 0x%x\n", desc->wq_addr, desc->wq_size); seq_putc(m, '\n'); -- cgit v1.1 From cbaa331504d16489c8b207da960351e7b0afb3e2 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 15 May 2017 16:56:05 -0500 Subject: gpu: drm: i915: remove dead code Local variable has_reduced_clock is assigned to a constant value and it is never updated again. Remove this variable and the dead code it guards. Addresses-Coverity-ID: 1362230 Signed-off-by: Gustavo A. R. Silva Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20170515215605.GA14963@embeddedgus --- drivers/gpu/drm/i915/intel_display.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 849e854..c0b4477 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -8192,8 +8192,6 @@ static int ironlake_crtc_compute_clock(struct intel_crtc *crtc, { struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct dpll reduced_clock; - bool has_reduced_clock = false; struct intel_shared_dpll *pll; const struct intel_limit *limit; int refclk = 120000; @@ -8236,8 +8234,7 @@ static int ironlake_crtc_compute_clock(struct intel_crtc *crtc, return -EINVAL; } - ironlake_compute_dpll(crtc, crtc_state, - has_reduced_clock ? &reduced_clock : NULL); + ironlake_compute_dpll(crtc, crtc_state, NULL); pll = intel_get_shared_dpll(crtc, crtc_state, NULL); if (pll == NULL) { @@ -8246,10 +8243,6 @@ static int ironlake_crtc_compute_clock(struct intel_crtc *crtc, return -EINVAL; } - if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_LVDS) && - has_reduced_clock) - crtc->lowfreq_avail = true; - return 0; } -- cgit v1.1 From efd38b68c7472728e18861b2cfd02432a60fc39f Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 15 May 2017 17:00:28 -0500 Subject: gpu: drm: i915: compress logic into one line Simplify logic to avoid unnecessary variable declaration and assignment. Signed-off-by: Gustavo A. R. Silva Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20170515220028.GA15149@embeddedgus --- drivers/gpu/drm/i915/intel_display.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index c0b4477..55c2c14 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -8192,7 +8192,6 @@ static int ironlake_crtc_compute_clock(struct intel_crtc *crtc, { struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_shared_dpll *pll; const struct intel_limit *limit; int refclk = 120000; @@ -8236,8 +8235,7 @@ static int ironlake_crtc_compute_clock(struct intel_crtc *crtc, ironlake_compute_dpll(crtc, crtc_state, NULL); - pll = intel_get_shared_dpll(crtc, crtc_state, NULL); - if (pll == NULL) { + if (!intel_get_shared_dpll(crtc, crtc_state, NULL)) { DRM_DEBUG_DRIVER("failed to find PLL for pipe %c\n", pipe_name(crtc->pipe)); return -EINVAL; -- cgit v1.1 From d567232cbd9ec2a289ddffea4013b7265bbcc3d5 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Tue, 16 May 2017 09:55:14 +0100 Subject: drm/i915: use vma->size for appgtt allocate_va_range For the aliasing ppgtt we clear the va range up to vma->size, but seem to allocate up to vma->node.size, which is a little inconsistent given that vma->node.size >= vma->size. Not that is really matters all that much since we preallocate anyway, but for consistency just use vma->size. Fixes: ff685975d97f ("drm/i915: Move allocate_va_range to GTT") Signed-off-by: Matthew Auld Cc: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170516085514.5853-1-matthew.auld@intel.com Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem_gtt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index fa51903..b18ed51 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2311,7 +2311,7 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma, appgtt->base.allocate_va_range) { ret = appgtt->base.allocate_va_range(&appgtt->base, vma->node.start, - vma->node.size); + vma->size); if (ret) goto err_pages; } -- cgit v1.1 From 0b71cea29fc29bbd8e9dd9c641fee6bd75f68274 Mon Sep 17 00:00:00 2001 From: Arkadiusz Hiler Date: Fri, 12 May 2017 13:20:15 +0200 Subject: drm/i915/gen9: Reintroduce WaEnableYV12BugFixInHalfSliceChicken7 This basically reverts commit 465418c6064c ("drm/i915/gen9: Remove WaEnableYV12BugFixInHalfSliceChicken7") with small addition - marking it as affecting GLK as well. It was incorrectly considered fixed in production steppings. References: HSD#2126385, HSD#2131381, HSDES#1504433555, BSID#0764 Cc: Mika Kuoppala Cc: Jeff McGee Signed-off-by: Arkadiusz Hiler Reviewed-by: Mika Kuoppala [Mika: s/KBL/GLK on commit message] Signed-off-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170512112015.19082-1-arkadiusz.hiler@intel.com --- drivers/gpu/drm/i915/intel_engine_cs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 483ed76..49c2315 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -851,8 +851,10 @@ static int gen9_init_workarounds(struct intel_engine_cs *engine) */ } + /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk */ /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl */ WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7, + GEN9_ENABLE_YV12_BUGFIX | GEN9_ENABLE_GPGPU_PREEMPTION); /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk */ -- cgit v1.1 From 9310cb7f8dbee342995c1144714218a185e7dd07 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 17 May 2017 13:09:56 +0100 Subject: drm/i915: Remove kref from i915_sw_fence My original intention was for i915_sw_fence to be the base class and provide the reference count for the container. This was from starting with a design to handle async_work. In practice, for i915 we embed fences into structs which have their own independent reference counting, making the i915_sw_fence.kref duplicitous. If we remove the kref, we remove the i915_sw_fence's ability to free itself and its independence, it can only exist within a container and must be supplied with a callback to handle its release. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170517121007.27224-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_sw_fence.c | 55 ++++++++---------------------------- drivers/gpu/drm/i915/i915_sw_fence.h | 1 - 2 files changed, 11 insertions(+), 45 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c index a277f8e..a0a690d 100644 --- a/drivers/gpu/drm/i915/i915_sw_fence.c +++ b/drivers/gpu/drm/i915/i915_sw_fence.c @@ -120,34 +120,6 @@ void i915_sw_fence_fini(struct i915_sw_fence *fence) } #endif -static void i915_sw_fence_release(struct kref *kref) -{ - struct i915_sw_fence *fence = container_of(kref, typeof(*fence), kref); - - WARN_ON(atomic_read(&fence->pending) > 0); - debug_fence_destroy(fence); - - if (fence->flags & I915_SW_FENCE_MASK) { - __i915_sw_fence_notify(fence, FENCE_FREE); - } else { - i915_sw_fence_fini(fence); - kfree(fence); - } -} - -static void i915_sw_fence_put(struct i915_sw_fence *fence) -{ - debug_fence_assert(fence); - kref_put(&fence->kref, i915_sw_fence_release); -} - -static struct i915_sw_fence *i915_sw_fence_get(struct i915_sw_fence *fence) -{ - debug_fence_assert(fence); - kref_get(&fence->kref); - return fence; -} - static void __i915_sw_fence_wake_up_all(struct i915_sw_fence *fence, struct list_head *continuation) { @@ -202,13 +174,15 @@ static void __i915_sw_fence_complete(struct i915_sw_fence *fence, debug_fence_set_state(fence, DEBUG_FENCE_IDLE, DEBUG_FENCE_NOTIFY); - if (fence->flags & I915_SW_FENCE_MASK && - __i915_sw_fence_notify(fence, FENCE_COMPLETE) != NOTIFY_DONE) + if (__i915_sw_fence_notify(fence, FENCE_COMPLETE) != NOTIFY_DONE) return; debug_fence_set_state(fence, DEBUG_FENCE_NOTIFY, DEBUG_FENCE_IDLE); __i915_sw_fence_wake_up_all(fence, continuation); + + debug_fence_destroy(fence); + __i915_sw_fence_notify(fence, FENCE_FREE); } static void i915_sw_fence_complete(struct i915_sw_fence *fence) @@ -232,33 +206,26 @@ void __i915_sw_fence_init(struct i915_sw_fence *fence, const char *name, struct lock_class_key *key) { - BUG_ON((unsigned long)fn & ~I915_SW_FENCE_MASK); + BUG_ON(!fn || (unsigned long)fn & ~I915_SW_FENCE_MASK); debug_fence_init(fence); __init_waitqueue_head(&fence->wait, name, key); - kref_init(&fence->kref); atomic_set(&fence->pending, 1); fence->flags = (unsigned long)fn; } -static void __i915_sw_fence_commit(struct i915_sw_fence *fence) -{ - i915_sw_fence_complete(fence); - i915_sw_fence_put(fence); -} - void i915_sw_fence_commit(struct i915_sw_fence *fence) { debug_fence_activate(fence); - __i915_sw_fence_commit(fence); + i915_sw_fence_complete(fence); } static int i915_sw_fence_wake(wait_queue_t *wq, unsigned mode, int flags, void *key) { list_del(&wq->task_list); __i915_sw_fence_complete(wq->private, key); - i915_sw_fence_put(wq->private); + if (wq->flags & I915_SW_FENCE_FLAG_ALLOC) kfree(wq); return 0; @@ -353,7 +320,7 @@ static int __i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence, INIT_LIST_HEAD(&wq->task_list); wq->flags = pending; wq->func = i915_sw_fence_wake; - wq->private = i915_sw_fence_get(fence); + wq->private = fence; i915_sw_fence_await(fence); @@ -402,7 +369,7 @@ static void timer_i915_sw_fence_wake(unsigned long data) dma_fence_put(cb->dma); cb->dma = NULL; - __i915_sw_fence_commit(cb->fence); + i915_sw_fence_complete(cb->fence); cb->timer.function = NULL; } @@ -413,7 +380,7 @@ static void dma_i915_sw_fence_wake(struct dma_fence *dma, del_timer_sync(&cb->timer); if (cb->timer.function) - __i915_sw_fence_commit(cb->fence); + i915_sw_fence_complete(cb->fence); dma_fence_put(cb->dma); kfree(cb); @@ -440,7 +407,7 @@ int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence, return dma_fence_wait(dma, false); } - cb->fence = i915_sw_fence_get(fence); + cb->fence = fence; i915_sw_fence_await(fence); cb->dma = NULL; diff --git a/drivers/gpu/drm/i915/i915_sw_fence.h b/drivers/gpu/drm/i915/i915_sw_fence.h index d31cefbb..1d3b605 100644 --- a/drivers/gpu/drm/i915/i915_sw_fence.h +++ b/drivers/gpu/drm/i915/i915_sw_fence.h @@ -23,7 +23,6 @@ struct reservation_object; struct i915_sw_fence { wait_queue_head_t wait; unsigned long flags; - struct kref kref; atomic_t pending; }; -- cgit v1.1 From 47624cc3301b6033d51b84a1381c2d69fda9b1a0 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 17 May 2017 13:09:57 +0100 Subject: drm/i915: Import the kfence selftests for i915_sw_fence A long time ago, I wrote some selftests for the struct kfence idea. Now that we have infrastructure in i915/igt for running kselftests, include some for i915_sw_fence. v2: INIT_WORK_ONSTACK/destroy_work_on_stack (Mika) Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170517121007.27224-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/Kconfig.debug | 12 + drivers/gpu/drm/i915/i915_sw_fence.c | 7 +- .../gpu/drm/i915/selftests/i915_mock_selftests.h | 1 + drivers/gpu/drm/i915/selftests/i915_sw_fence.c | 577 +++++++++++++++++++++ 4 files changed, 596 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/i915/selftests/i915_sw_fence.c diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug index e091809..d4860c3 100644 --- a/drivers/gpu/drm/i915/Kconfig.debug +++ b/drivers/gpu/drm/i915/Kconfig.debug @@ -61,6 +61,18 @@ config DRM_I915_SW_FENCE_DEBUG_OBJECTS If in doubt, say "N". +config DRM_I915_SW_FENCE_CHECK_DAG + bool "Enable additional driver debugging for detecting dependency cycles" + depends on DRM_I915 + default n + help + Choose this option to turn on extra driver debugging that may affect + performance but will catch some internal issues. + + Recommended for driver developers only. + + If in doubt, say "N". + config DRM_I915_SELFTEST bool "Enable selftests upon driver load" depends on DRM_I915 diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c index a0a690d..474d23c 100644 --- a/drivers/gpu/drm/i915/i915_sw_fence.c +++ b/drivers/gpu/drm/i915/i915_sw_fence.c @@ -12,6 +12,7 @@ #include #include "i915_sw_fence.h" +#include "i915_selftest.h" #define I915_SW_FENCE_FLAG_ALLOC BIT(3) /* after WQ_FLAG_* for safety */ @@ -274,7 +275,7 @@ static bool i915_sw_fence_check_if_after(struct i915_sw_fence *fence, unsigned long flags; bool err; - if (!IS_ENABLED(CONFIG_I915_SW_FENCE_CHECK_DAG)) + if (!IS_ENABLED(CONFIG_DRM_I915_SW_FENCE_CHECK_DAG)) return false; spin_lock_irqsave(&i915_sw_fence_lock, flags); @@ -490,3 +491,7 @@ int i915_sw_fence_await_reservation(struct i915_sw_fence *fence, return ret; } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/i915_sw_fence.c" +#endif diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h index 76c1f14..fc74687 100644 --- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h @@ -9,6 +9,7 @@ * Tests are executed in order by igt/drv_selftest */ selftest(sanitycheck, i915_mock_sanitycheck) /* keep first (igt selfcheck) */ +selftest(fence, i915_sw_fence_mock_selftests) selftest(scatterlist, scatterlist_mock_selftests) selftest(syncmap, i915_syncmap_mock_selftests) selftest(uncore, intel_uncore_mock_selftests) diff --git a/drivers/gpu/drm/i915/selftests/i915_sw_fence.c b/drivers/gpu/drm/i915/selftests/i915_sw_fence.c new file mode 100644 index 0000000..98baf10 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_sw_fence.c @@ -0,0 +1,577 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include +#include + +#include "../i915_selftest.h" + +static int __i915_sw_fence_call +fence_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) +{ + switch (state) { + case FENCE_COMPLETE: + break; + + case FENCE_FREE: + /* Leave the fence for the caller to free it after testing */ + break; + } + + return NOTIFY_DONE; +} + +static struct i915_sw_fence *alloc_fence(void) +{ + struct i915_sw_fence *fence; + + fence = kmalloc(sizeof(*fence), GFP_KERNEL); + if (!fence) + return NULL; + + i915_sw_fence_init(fence, fence_notify); + return fence; +} + +static void free_fence(struct i915_sw_fence *fence) +{ + i915_sw_fence_fini(fence); + kfree(fence); +} + +static int __test_self(struct i915_sw_fence *fence) +{ + if (i915_sw_fence_done(fence)) + return -EINVAL; + + i915_sw_fence_commit(fence); + if (!i915_sw_fence_done(fence)) + return -EINVAL; + + i915_sw_fence_wait(fence); + if (!i915_sw_fence_done(fence)) + return -EINVAL; + + return 0; +} + +static int test_self(void *arg) +{ + struct i915_sw_fence *fence; + int ret; + + /* Test i915_sw_fence signaling and completion testing */ + fence = alloc_fence(); + if (!fence) + return -ENOMEM; + + ret = __test_self(fence); + + free_fence(fence); + return ret; +} + +static int test_dag(void *arg) +{ + struct i915_sw_fence *A, *B, *C; + int ret = -EINVAL; + + /* Test detection of cycles within the i915_sw_fence graphs */ + if (!IS_ENABLED(CONFIG_DRM_I915_SW_FENCE_CHECK_DAG)) + return 0; + + A = alloc_fence(); + if (!A) + return -ENOMEM; + + if (i915_sw_fence_await_sw_fence_gfp(A, A, GFP_KERNEL) != -EINVAL) { + pr_err("recursive cycle not detected (AA)\n"); + goto err_A; + } + + B = alloc_fence(); + if (!B) { + ret = -ENOMEM; + goto err_A; + } + + i915_sw_fence_await_sw_fence_gfp(A, B, GFP_KERNEL); + if (i915_sw_fence_await_sw_fence_gfp(B, A, GFP_KERNEL) != -EINVAL) { + pr_err("single depth cycle not detected (BAB)\n"); + goto err_B; + } + + C = alloc_fence(); + if (i915_sw_fence_await_sw_fence_gfp(B, C, GFP_KERNEL) == -EINVAL) { + pr_err("invalid cycle detected\n"); + goto err_C; + } + if (i915_sw_fence_await_sw_fence_gfp(C, B, GFP_KERNEL) != -EINVAL) { + pr_err("single depth cycle not detected (CBC)\n"); + goto err_C; + } + if (i915_sw_fence_await_sw_fence_gfp(C, A, GFP_KERNEL) != -EINVAL) { + pr_err("cycle not detected (BA, CB, AC)\n"); + goto err_C; + } + if (i915_sw_fence_await_sw_fence_gfp(A, C, GFP_KERNEL) == -EINVAL) { + pr_err("invalid cycle detected\n"); + goto err_C; + } + + i915_sw_fence_commit(A); + i915_sw_fence_commit(B); + i915_sw_fence_commit(C); + + ret = 0; + if (!i915_sw_fence_done(C)) { + pr_err("fence C not done\n"); + ret = -EINVAL; + } + if (!i915_sw_fence_done(B)) { + pr_err("fence B not done\n"); + ret = -EINVAL; + } + if (!i915_sw_fence_done(A)) { + pr_err("fence A not done\n"); + ret = -EINVAL; + } +err_C: + free_fence(C); +err_B: + free_fence(B); +err_A: + free_fence(A); + return ret; +} + +static int test_AB(void *arg) +{ + struct i915_sw_fence *A, *B; + int ret; + + /* Test i915_sw_fence (A) waiting on an event source (B) */ + A = alloc_fence(); + if (!A) + return -ENOMEM; + B = alloc_fence(); + if (!B) { + ret = -ENOMEM; + goto err_A; + } + + ret = i915_sw_fence_await_sw_fence_gfp(A, B, GFP_KERNEL); + if (ret < 0) + goto err_B; + if (ret == 0) { + pr_err("Incorrectly reported fence A was complete before await\n"); + ret = -EINVAL; + goto err_B; + } + + ret = -EINVAL; + i915_sw_fence_commit(A); + if (i915_sw_fence_done(A)) + goto err_B; + + i915_sw_fence_commit(B); + if (!i915_sw_fence_done(B)) { + pr_err("Fence B is not done\n"); + goto err_B; + } + + if (!i915_sw_fence_done(A)) { + pr_err("Fence A is not done\n"); + goto err_B; + } + + ret = 0; +err_B: + free_fence(B); +err_A: + free_fence(A); + return ret; +} + +static int test_ABC(void *arg) +{ + struct i915_sw_fence *A, *B, *C; + int ret; + + /* Test a chain of fences, A waits on B who waits on C */ + A = alloc_fence(); + if (!A) + return -ENOMEM; + + B = alloc_fence(); + if (!B) { + ret = -ENOMEM; + goto err_A; + } + + C = alloc_fence(); + if (!C) { + ret = -ENOMEM; + goto err_B; + } + + ret = i915_sw_fence_await_sw_fence_gfp(A, B, GFP_KERNEL); + if (ret < 0) + goto err_C; + if (ret == 0) { + pr_err("Incorrectly reported fence B was complete before await\n"); + goto err_C; + } + + ret = i915_sw_fence_await_sw_fence_gfp(B, C, GFP_KERNEL); + if (ret < 0) + goto err_C; + if (ret == 0) { + pr_err("Incorrectly reported fence C was complete before await\n"); + goto err_C; + } + + ret = -EINVAL; + i915_sw_fence_commit(A); + if (i915_sw_fence_done(A)) { + pr_err("Fence A completed early\n"); + goto err_C; + } + + i915_sw_fence_commit(B); + if (i915_sw_fence_done(B)) { + pr_err("Fence B completed early\n"); + goto err_C; + } + + if (i915_sw_fence_done(A)) { + pr_err("Fence A completed early (after signaling B)\n"); + goto err_C; + } + + i915_sw_fence_commit(C); + + ret = 0; + if (!i915_sw_fence_done(C)) { + pr_err("Fence C not done\n"); + ret = -EINVAL; + } + if (!i915_sw_fence_done(B)) { + pr_err("Fence B not done\n"); + ret = -EINVAL; + } + if (!i915_sw_fence_done(A)) { + pr_err("Fence A not done\n"); + ret = -EINVAL; + } +err_C: + free_fence(C); +err_B: + free_fence(B); +err_A: + free_fence(A); + return ret; +} + +static int test_AB_C(void *arg) +{ + struct i915_sw_fence *A, *B, *C; + int ret = -EINVAL; + + /* Test multiple fences (AB) waiting on a single event (C) */ + A = alloc_fence(); + if (!A) + return -ENOMEM; + + B = alloc_fence(); + if (!B) { + ret = -ENOMEM; + goto err_A; + } + + C = alloc_fence(); + if (!B) { + ret = -ENOMEM; + goto err_B; + } + + ret = i915_sw_fence_await_sw_fence_gfp(A, C, GFP_KERNEL); + if (ret < 0) + goto err_C; + if (ret == 0) { + ret = -EINVAL; + goto err_C; + } + + ret = i915_sw_fence_await_sw_fence_gfp(B, C, GFP_KERNEL); + if (ret < 0) + goto err_C; + if (ret == 0) { + ret = -EINVAL; + goto err_C; + } + + i915_sw_fence_commit(A); + i915_sw_fence_commit(B); + + ret = 0; + if (i915_sw_fence_done(A)) { + pr_err("Fence A completed early\n"); + ret = -EINVAL; + } + + if (i915_sw_fence_done(B)) { + pr_err("Fence B completed early\n"); + ret = -EINVAL; + } + + i915_sw_fence_commit(C); + if (!i915_sw_fence_done(C)) { + pr_err("Fence C not done\n"); + ret = -EINVAL; + } + + if (!i915_sw_fence_done(B)) { + pr_err("Fence B not done\n"); + ret = -EINVAL; + } + + if (!i915_sw_fence_done(A)) { + pr_err("Fence A not done\n"); + ret = -EINVAL; + } + +err_C: + free_fence(C); +err_B: + free_fence(B); +err_A: + free_fence(A); + return ret; +} + +static int test_C_AB(void *arg) +{ + struct i915_sw_fence *A, *B, *C; + int ret; + + /* Test multiple event sources (A,B) for a single fence (C) */ + A = alloc_fence(); + if (!A) + return -ENOMEM; + + B = alloc_fence(); + if (!B) { + ret = -ENOMEM; + goto err_A; + } + + C = alloc_fence(); + if (!B) { + ret = -ENOMEM; + goto err_B; + } + + ret = i915_sw_fence_await_sw_fence_gfp(C, A, GFP_KERNEL); + if (ret < 0) + goto err_C; + if (ret == 0) { + ret = -EINVAL; + goto err_C; + } + + ret = i915_sw_fence_await_sw_fence_gfp(C, B, GFP_KERNEL); + if (ret < 0) + goto err_C; + if (ret == 0) { + ret = -EINVAL; + goto err_C; + } + + ret = 0; + i915_sw_fence_commit(C); + if (i915_sw_fence_done(C)) + ret = -EINVAL; + + i915_sw_fence_commit(A); + i915_sw_fence_commit(B); + + if (!i915_sw_fence_done(A)) { + pr_err("Fence A not done\n"); + ret = -EINVAL; + } + + if (!i915_sw_fence_done(B)) { + pr_err("Fence B not done\n"); + ret = -EINVAL; + } + + if (!i915_sw_fence_done(C)) { + pr_err("Fence C not done\n"); + ret = -EINVAL; + } + +err_C: + free_fence(C); +err_B: + free_fence(B); +err_A: + free_fence(A); + return ret; +} + +static int test_chain(void *arg) +{ + int nfences = 4096; + struct i915_sw_fence **fences; + int ret, i; + + /* Test a long chain of fences */ + fences = kmalloc_array(nfences, sizeof(*fences), GFP_KERNEL); + if (!fences) + return -ENOMEM; + + for (i = 0; i < nfences; i++) { + fences[i] = alloc_fence(); + if (!fences[i]) { + nfences = i; + ret = -ENOMEM; + goto err; + } + + if (i > 0) { + ret = i915_sw_fence_await_sw_fence_gfp(fences[i], + fences[i - 1], + GFP_KERNEL); + if (ret < 0) { + nfences = i + 1; + goto err; + } + + i915_sw_fence_commit(fences[i]); + } + } + + ret = 0; + for (i = nfences; --i; ) { + if (i915_sw_fence_done(fences[i])) { + if (ret == 0) + pr_err("Fence[%d] completed early\n", i); + ret = -EINVAL; + } + } + i915_sw_fence_commit(fences[0]); + for (i = 0; ret == 0 && i < nfences; i++) { + if (!i915_sw_fence_done(fences[i])) { + pr_err("Fence[%d] is not done\n", i); + ret = -EINVAL; + } + } + +err: + for (i = 0; i < nfences; i++) + free_fence(fences[i]); + kfree(fences); + return ret; +} + +struct task_ipc { + struct work_struct work; + struct completion started; + struct i915_sw_fence *in, *out; + int value; +}; + +static void task_ipc(struct work_struct *work) +{ + struct task_ipc *ipc = container_of(work, typeof(*ipc), work); + + complete(&ipc->started); + + i915_sw_fence_wait(ipc->in); + smp_store_mb(ipc->value, 1); + i915_sw_fence_commit(ipc->out); +} + +static int test_ipc(void *arg) +{ + struct task_ipc ipc; + int ret = 0; + + /* Test use of i915_sw_fence as an interprocess signaling mechanism */ + ipc.in = alloc_fence(); + if (!ipc.in) + return -ENOMEM; + ipc.out = alloc_fence(); + if (!ipc.out) { + ret = -ENOMEM; + goto err_in; + } + + /* use a completion to avoid chicken-and-egg testing */ + init_completion(&ipc.started); + + ipc.value = 0; + INIT_WORK_ONSTACK(&ipc.work, task_ipc); + schedule_work(&ipc.work); + + wait_for_completion(&ipc.started); + + usleep_range(1000, 2000); + if (READ_ONCE(ipc.value)) { + pr_err("worker updated value before i915_sw_fence was signaled\n"); + ret = -EINVAL; + } + + i915_sw_fence_commit(ipc.in); + i915_sw_fence_wait(ipc.out); + + if (!READ_ONCE(ipc.value)) { + pr_err("worker signaled i915_sw_fence before value was posted\n"); + ret = -EINVAL; + } + + flush_work(&ipc.work); + destroy_work_on_stack(&ipc.work); + free_fence(ipc.out); +err_in: + free_fence(ipc.in); + return ret; +} + +int i915_sw_fence_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(test_self), + SUBTEST(test_dag), + SUBTEST(test_AB), + SUBTEST(test_ABC), + SUBTEST(test_AB_C), + SUBTEST(test_C_AB), + SUBTEST(test_chain), + SUBTEST(test_ipc), + }; + + return i915_subtests(tests, NULL); +} -- cgit v1.1 From 991bfc64db096cb924d5d216a9cb469590f00428 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 17 May 2017 13:09:58 +0100 Subject: drm/i915: Make ptr_unpack_bits() more function-like ptr_unpack_bits() is a function-like macro, as such it is meant to be replaceable by a function. In this case, we should be passing in the out-param as a pointer. Bizarrely this does affect code generation: function old new delta i915_gem_object_pin_map 409 389 -20 An improvement(?) in this case, but one can't help wonder what strict-aliasing optimisations we are preventing. The generated code looks identical in using ptr_unpack_bits (no extra motions to stack, the pointer and bits appear to be kept in registers), the difference appears to be code ordering and with a reorder it is able to use smaller forward jumps. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170517121007.27224-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 2 +- drivers/gpu/drm/i915/i915_utils.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index f9c6b9b..5d68bec 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2612,7 +2612,7 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, } GEM_BUG_ON(!obj->mm.pages); - ptr = ptr_unpack_bits(obj->mm.mapping, has_type); + ptr = ptr_unpack_bits(obj->mm.mapping, &has_type); if (ptr && has_type != type) { if (pinned) { ret = -EBUSY; diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h index f9d6607..18630d8 100644 --- a/drivers/gpu/drm/i915/i915_utils.h +++ b/drivers/gpu/drm/i915/i915_utils.h @@ -77,7 +77,7 @@ #define ptr_unpack_bits(ptr, bits) ({ \ unsigned long __v = (unsigned long)(ptr); \ - (bits) = __v & ~PAGE_MASK; \ + *(bits) = __v & ~PAGE_MASK; \ (typeof(ptr))(__v & PAGE_MASK); \ }) -- cgit v1.1 From 0ce81788080570e28efb43a4036976854e0edca8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 17 May 2017 13:09:59 +0100 Subject: drm/i915: Redefine ptr_pack_bits() and friends Rebrand the current (pointer | bits) pack/unpack utility macros as explicit bit twiddling for PAGE_SIZE so that we can use the more flexible underlying macros for different bits. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170517121007.27224-4-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_cmd_parser.c | 2 +- drivers/gpu/drm/i915/i915_gem.c | 6 +++--- drivers/gpu/drm/i915/i915_utils.h | 19 +++++++++++++------ 3 files changed, 17 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 2a1a334..f0cb22c 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -1284,7 +1284,7 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine, if (*cmd == MI_BATCH_BUFFER_END) { if (needs_clflush_after) { - void *ptr = ptr_mask_bits(shadow_batch_obj->mm.mapping); + void *ptr = page_mask_bits(shadow_batch_obj->mm.mapping); drm_clflush_virt_range(ptr, (void *)(cmd + 1) - ptr); } diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 5d68bec..b63c3d1 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2280,7 +2280,7 @@ void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, if (obj->mm.mapping) { void *ptr; - ptr = ptr_mask_bits(obj->mm.mapping); + ptr = page_mask_bits(obj->mm.mapping); if (is_vmalloc_addr(ptr)) vunmap(ptr); else @@ -2612,7 +2612,7 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, } GEM_BUG_ON(!obj->mm.pages); - ptr = ptr_unpack_bits(obj->mm.mapping, &has_type); + ptr = page_unpack_bits(obj->mm.mapping, &has_type); if (ptr && has_type != type) { if (pinned) { ret = -EBUSY; @@ -2634,7 +2634,7 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, goto err_unpin; } - obj->mm.mapping = ptr_pack_bits(ptr, type); + obj->mm.mapping = page_pack_bits(ptr, type); } out_unlock: diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h index 18630d8..d9df237 100644 --- a/drivers/gpu/drm/i915/i915_utils.h +++ b/drivers/gpu/drm/i915/i915_utils.h @@ -70,20 +70,27 @@ #define overflows_type(x, T) \ (sizeof(x) > sizeof(T) && (x) >> (sizeof(T) * BITS_PER_BYTE)) -#define ptr_mask_bits(ptr) ({ \ +#define ptr_mask_bits(ptr, n) ({ \ unsigned long __v = (unsigned long)(ptr); \ - (typeof(ptr))(__v & PAGE_MASK); \ + (typeof(ptr))(__v & -BIT(n)); \ }) -#define ptr_unpack_bits(ptr, bits) ({ \ +#define ptr_unmask_bits(ptr, n) ((unsigned long)(ptr) & (BIT(n) - 1)) + +#define ptr_unpack_bits(ptr, bits, n) ({ \ unsigned long __v = (unsigned long)(ptr); \ - *(bits) = __v & ~PAGE_MASK; \ - (typeof(ptr))(__v & PAGE_MASK); \ + *(bits) = __v & (BIT(n) - 1); \ + (typeof(ptr))(__v & -BIT(n)); \ }) -#define ptr_pack_bits(ptr, bits) \ +#define ptr_pack_bits(ptr, bits, n) \ ((typeof(ptr))((unsigned long)(ptr) | (bits))) +#define page_mask_bits(ptr) ptr_mask_bits(ptr, PAGE_SHIFT) +#define page_unmask_bits(ptr) ptr_unmask_bits(ptr, PAGE_SHIFT) +#define page_pack_bits(ptr, bits) ptr_pack_bits(ptr, bits, PAGE_SHIFT) +#define page_unpack_bits(ptr, bits) ptr_unpack_bits(ptr, bits, PAGE_SHIFT) + #define ptr_offset(ptr, member) offsetof(typeof(*(ptr)), member) #define fetch_and_zero(ptr) ({ \ -- cgit v1.1 From 77f0d0e925e8a0f17a927a1f4e266d1f0e95cb72 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 17 May 2017 13:10:00 +0100 Subject: drm/i915/execlists: Pack the count into the low bits of the port.request add/remove: 1/1 grow/shrink: 5/4 up/down: 391/-578 (-187) function old new delta execlists_submit_ports 262 471 +209 port_assign.isra - 136 +136 capture 6344 6359 +15 reset_common_ring 438 452 +14 execlists_submit_request 228 238 +10 gen8_init_common_ring 334 341 +7 intel_engine_is_idle 106 105 -1 i915_engine_info 2314 2290 -24 __i915_gem_set_wedged_BKL 485 411 -74 intel_lrc_irq_handler 1789 1604 -185 execlists_update_context 294 - -294 The most important change there is the improve to the intel_lrc_irq_handler and excclist_submit_ports (net improvement since execlists_update_context is now inlined). v2: Use the port_api() for guc as well (even though currently we do not pack any counters in there, yet) and hide all port->request_count inside the helpers. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170517121007.27224-5-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_debugfs.c | 32 ++++---- drivers/gpu/drm/i915/i915_gem.c | 6 +- drivers/gpu/drm/i915/i915_gpu_error.c | 13 +++- drivers/gpu/drm/i915/i915_guc_submission.c | 34 +++++--- drivers/gpu/drm/i915/intel_engine_cs.c | 2 +- drivers/gpu/drm/i915/intel_lrc.c | 121 ++++++++++++++++------------- drivers/gpu/drm/i915/intel_ringbuffer.h | 11 ++- 7 files changed, 126 insertions(+), 93 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 76abff1..e08ac70 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -3353,6 +3353,7 @@ static int i915_engine_info(struct seq_file *m, void *unused) if (i915.enable_execlists) { u32 ptr, read, write; struct rb_node *rb; + unsigned int idx; seq_printf(m, "\tExeclist status: 0x%08x %08x\n", I915_READ(RING_EXECLIST_STATUS_LO(engine)), @@ -3370,8 +3371,7 @@ static int i915_engine_info(struct seq_file *m, void *unused) if (read > write) write += GEN8_CSB_ENTRIES; while (read < write) { - unsigned int idx = ++read % GEN8_CSB_ENTRIES; - + idx = ++read % GEN8_CSB_ENTRIES; seq_printf(m, "\tExeclist CSB[%d]: 0x%08x, context: %d\n", idx, I915_READ(RING_CONTEXT_STATUS_BUF_LO(engine, idx)), @@ -3379,21 +3379,19 @@ static int i915_engine_info(struct seq_file *m, void *unused) } rcu_read_lock(); - rq = READ_ONCE(engine->execlist_port[0].request); - if (rq) { - seq_printf(m, "\t\tELSP[0] count=%d, ", - engine->execlist_port[0].count); - print_request(m, rq, "rq: "); - } else { - seq_printf(m, "\t\tELSP[0] idle\n"); - } - rq = READ_ONCE(engine->execlist_port[1].request); - if (rq) { - seq_printf(m, "\t\tELSP[1] count=%d, ", - engine->execlist_port[1].count); - print_request(m, rq, "rq: "); - } else { - seq_printf(m, "\t\tELSP[1] idle\n"); + for (idx = 0; idx < ARRAY_SIZE(engine->execlist_port); idx++) { + unsigned int count; + + rq = port_unpack(&engine->execlist_port[idx], + &count); + if (rq) { + seq_printf(m, "\t\tELSP[%d] count=%d, ", + idx, count); + print_request(m, rq, "rq: "); + } else { + seq_printf(m, "\t\tELSP[%d] idle\n", + idx); + } } rcu_read_unlock(); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index b63c3d1..75d7575 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3019,12 +3019,14 @@ static void engine_set_wedged(struct intel_engine_cs *engine) */ if (i915.enable_execlists) { + struct execlist_port *port = engine->execlist_port; unsigned long flags; + unsigned int n; spin_lock_irqsave(&engine->timeline->lock, flags); - i915_gem_request_put(engine->execlist_port[0].request); - i915_gem_request_put(engine->execlist_port[1].request); + for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++) + i915_gem_request_put(port_request(&port[n])); memset(engine->execlist_port, 0, sizeof(engine->execlist_port)); engine->execlist_queue = RB_ROOT; engine->execlist_first = NULL; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index ec526d9..e18f350 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1324,12 +1324,17 @@ static void engine_record_requests(struct intel_engine_cs *engine, static void error_record_engine_execlists(struct intel_engine_cs *engine, struct drm_i915_error_engine *ee) { + const struct execlist_port *port = engine->execlist_port; unsigned int n; - for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++) - if (engine->execlist_port[n].request) - record_request(engine->execlist_port[n].request, - &ee->execlist[n]); + for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++) { + struct drm_i915_gem_request *rq = port_request(&port[n]); + + if (!rq) + break; + + record_request(rq, &ee->execlist[n]); + } } static void record_context(struct drm_i915_error_context *e, diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 7e85b5a..014cbd1 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -653,10 +653,22 @@ static void nested_enable_signaling(struct drm_i915_gem_request *rq) spin_unlock(&rq->lock); } +static void port_assign(struct execlist_port *port, + struct drm_i915_gem_request *rq) +{ + GEM_BUG_ON(rq == port_request(port)); + + if (port_isset(port)) + i915_gem_request_put(port_request(port)); + + port_set(port, i915_gem_request_get(rq)); + nested_enable_signaling(rq); +} + static bool i915_guc_dequeue(struct intel_engine_cs *engine) { struct execlist_port *port = engine->execlist_port; - struct drm_i915_gem_request *last = port[0].request; + struct drm_i915_gem_request *last = port_request(port); struct rb_node *rb; bool submit = false; @@ -670,8 +682,7 @@ static bool i915_guc_dequeue(struct intel_engine_cs *engine) if (port != engine->execlist_port) break; - i915_gem_request_assign(&port->request, last); - nested_enable_signaling(last); + port_assign(port, last); port++; } @@ -681,13 +692,12 @@ static bool i915_guc_dequeue(struct intel_engine_cs *engine) rq->priotree.priority = INT_MAX; i915_guc_submit(rq); - trace_i915_gem_request_in(rq, port - engine->execlist_port); + trace_i915_gem_request_in(rq, port_index(port, engine)); last = rq; submit = true; } if (submit) { - i915_gem_request_assign(&port->request, last); - nested_enable_signaling(last); + port_assign(port, last); engine->execlist_first = rb; } spin_unlock_irq(&engine->timeline->lock); @@ -703,17 +713,19 @@ static void i915_guc_irq_handler(unsigned long data) bool submit; do { - rq = port[0].request; + rq = port_request(&port[0]); while (rq && i915_gem_request_completed(rq)) { trace_i915_gem_request_out(rq); i915_gem_request_put(rq); - port[0].request = port[1].request; - port[1].request = NULL; - rq = port[0].request; + + port[0] = port[1]; + memset(&port[1], 0, sizeof(port[1])); + + rq = port_request(&port[0]); } submit = false; - if (!port[1].request) + if (!port_count(&port[1])) submit = i915_guc_dequeue(engine); } while (submit); } diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 49c2315..e312dec 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1233,7 +1233,7 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine) return false; /* Both ports drained, no more ELSP submission? */ - if (engine->execlist_port[0].request) + if (port_request(&engine->execlist_port[0])) return false; /* Ring stopped? */ diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 9a1192d..53ec0d5 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -337,39 +337,32 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq) static void execlists_submit_ports(struct intel_engine_cs *engine) { - struct drm_i915_private *dev_priv = engine->i915; struct execlist_port *port = engine->execlist_port; u32 __iomem *elsp = - dev_priv->regs + i915_mmio_reg_offset(RING_ELSP(engine)); - u64 desc[2]; - - GEM_BUG_ON(port[0].count > 1); - if (!port[0].count) - execlists_context_status_change(port[0].request, - INTEL_CONTEXT_SCHEDULE_IN); - desc[0] = execlists_update_context(port[0].request); - GEM_DEBUG_EXEC(port[0].context_id = upper_32_bits(desc[0])); - port[0].count++; - - if (port[1].request) { - GEM_BUG_ON(port[1].count); - execlists_context_status_change(port[1].request, - INTEL_CONTEXT_SCHEDULE_IN); - desc[1] = execlists_update_context(port[1].request); - GEM_DEBUG_EXEC(port[1].context_id = upper_32_bits(desc[1])); - port[1].count = 1; - } else { - desc[1] = 0; - } - GEM_BUG_ON(desc[0] == desc[1]); + engine->i915->regs + i915_mmio_reg_offset(RING_ELSP(engine)); + unsigned int n; - /* You must always write both descriptors in the order below. */ - writel(upper_32_bits(desc[1]), elsp); - writel(lower_32_bits(desc[1]), elsp); + for (n = ARRAY_SIZE(engine->execlist_port); n--; ) { + struct drm_i915_gem_request *rq; + unsigned int count; + u64 desc; + + rq = port_unpack(&port[n], &count); + if (rq) { + GEM_BUG_ON(count > !n); + if (!count++) + execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN); + port_set(&port[n], port_pack(rq, count)); + desc = execlists_update_context(rq); + GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc)); + } else { + GEM_BUG_ON(!n); + desc = 0; + } - writel(upper_32_bits(desc[0]), elsp); - /* The context is automatically loaded after the following */ - writel(lower_32_bits(desc[0]), elsp); + writel(upper_32_bits(desc), elsp); + writel(lower_32_bits(desc), elsp); + } } static bool ctx_single_port_submission(const struct i915_gem_context *ctx) @@ -390,6 +383,17 @@ static bool can_merge_ctx(const struct i915_gem_context *prev, return true; } +static void port_assign(struct execlist_port *port, + struct drm_i915_gem_request *rq) +{ + GEM_BUG_ON(rq == port_request(port)); + + if (port_isset(port)) + i915_gem_request_put(port_request(port)); + + port_set(port, port_pack(i915_gem_request_get(rq), port_count(port))); +} + static void execlists_dequeue(struct intel_engine_cs *engine) { struct drm_i915_gem_request *last; @@ -397,7 +401,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) struct rb_node *rb; bool submit = false; - last = port->request; + last = port_request(port); if (last) /* WaIdleLiteRestore:bdw,skl * Apply the wa NOOPs to prevent ring:HEAD == req:TAIL @@ -407,7 +411,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) */ last->tail = last->wa_tail; - GEM_BUG_ON(port[1].request); + GEM_BUG_ON(port_isset(&port[1])); /* Hardware submission is through 2 ports. Conceptually each port * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is @@ -464,7 +468,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine) GEM_BUG_ON(last->ctx == cursor->ctx); - i915_gem_request_assign(&port->request, last); + if (submit) + port_assign(port, last); port++; } @@ -474,12 +479,12 @@ static void execlists_dequeue(struct intel_engine_cs *engine) cursor->priotree.priority = INT_MAX; __i915_gem_request_submit(cursor); - trace_i915_gem_request_in(cursor, port - engine->execlist_port); + trace_i915_gem_request_in(cursor, port_index(port, engine)); last = cursor; submit = true; } if (submit) { - i915_gem_request_assign(&port->request, last); + port_assign(port, last); engine->execlist_first = rb; } spin_unlock_irq(&engine->timeline->lock); @@ -488,16 +493,11 @@ static void execlists_dequeue(struct intel_engine_cs *engine) execlists_submit_ports(engine); } -static bool execlists_elsp_idle(struct intel_engine_cs *engine) -{ - return !engine->execlist_port[0].request; -} - static bool execlists_elsp_ready(const struct intel_engine_cs *engine) { const struct execlist_port *port = engine->execlist_port; - return port[0].count + port[1].count < 2; + return port_count(&port[0]) + port_count(&port[1]) < 2; } /* @@ -547,7 +547,9 @@ static void intel_lrc_irq_handler(unsigned long data) tail = GEN8_CSB_WRITE_PTR(head); head = GEN8_CSB_READ_PTR(head); while (head != tail) { + struct drm_i915_gem_request *rq; unsigned int status; + unsigned int count; if (++head == GEN8_CSB_ENTRIES) head = 0; @@ -575,22 +577,26 @@ static void intel_lrc_irq_handler(unsigned long data) /* Check the context/desc id for this event matches */ GEM_DEBUG_BUG_ON(readl(buf + 2 * head + 1) != - port[0].context_id); + port->context_id); - GEM_BUG_ON(port[0].count == 0); - if (--port[0].count == 0) { + rq = port_unpack(port, &count); + GEM_BUG_ON(count == 0); + if (--count == 0) { GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED); - GEM_BUG_ON(!i915_gem_request_completed(port[0].request)); - execlists_context_status_change(port[0].request, - INTEL_CONTEXT_SCHEDULE_OUT); + GEM_BUG_ON(!i915_gem_request_completed(rq)); + execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); + + trace_i915_gem_request_out(rq); + i915_gem_request_put(rq); - trace_i915_gem_request_out(port[0].request); - i915_gem_request_put(port[0].request); port[0] = port[1]; memset(&port[1], 0, sizeof(port[1])); + } else { + port_set(port, port_pack(rq, count)); } - GEM_BUG_ON(port[0].count == 0 && + /* After the final element, the hw should be idle */ + GEM_BUG_ON(port_count(port) == 0 && !(status & GEN8_CTX_STATUS_ACTIVE_IDLE)); } @@ -1147,6 +1153,7 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine) struct drm_i915_private *dev_priv = engine->i915; struct execlist_port *port = engine->execlist_port; unsigned int n; + bool submit; int ret; ret = intel_mocs_init_engine(engine); @@ -1168,19 +1175,21 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine) /* After a GPU reset, we may have requests to replay */ clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); + submit = false; for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++) { - if (!port[n].request) + if (!port_isset(&port[n])) break; DRM_DEBUG_DRIVER("Restarting %s:%d from 0x%x\n", engine->name, n, - port[n].request->global_seqno); + port_request(&port[n])->global_seqno); /* Discard the current inflight count */ - port[n].count = 0; + port_set(&port[n], port_request(&port[n])); + submit = true; } - if (!i915.enable_guc_submission && !execlists_elsp_idle(engine)) + if (submit && !i915.enable_guc_submission) execlists_submit_ports(engine); return 0; @@ -1258,13 +1267,13 @@ static void reset_common_ring(struct intel_engine_cs *engine, intel_ring_update_space(request->ring); /* Catch up with any missed context-switch interrupts */ - if (request->ctx != port[0].request->ctx) { - i915_gem_request_put(port[0].request); + if (request->ctx != port_request(port)->ctx) { + i915_gem_request_put(port_request(port)); port[0] = port[1]; memset(&port[1], 0, sizeof(port[1])); } - GEM_BUG_ON(request->ctx != port[0].request->ctx); + GEM_BUG_ON(request->ctx != port_request(port)->ctx); /* Reset WaIdleLiteRestore:bdw,skl as well */ request->tail = diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index ec16fb6..162f0a9 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -368,8 +368,15 @@ struct intel_engine_cs { /* Execlists */ struct tasklet_struct irq_tasklet; struct execlist_port { - struct drm_i915_gem_request *request; - unsigned int count; + struct drm_i915_gem_request *request_count; +#define EXECLIST_COUNT_BITS 2 +#define port_request(p) ptr_mask_bits((p)->request_count, EXECLIST_COUNT_BITS) +#define port_count(p) ptr_unmask_bits((p)->request_count, EXECLIST_COUNT_BITS) +#define port_pack(rq, count) ptr_pack_bits(rq, count, EXECLIST_COUNT_BITS) +#define port_unpack(p, count) ptr_unpack_bits((p)->request_count, count, EXECLIST_COUNT_BITS) +#define port_set(p, packed) ((p)->request_count = (packed)) +#define port_isset(p) ((p)->request_count) +#define port_index(p, e) ((p) - (e)->execlist_port) GEM_DEBUG_DECL(u32 context_id); } execlist_port[2]; struct rb_root execlist_queue; -- cgit v1.1 From a4b2b01523a8f147112f44cd0867d246ed07b43c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 17 May 2017 13:10:01 +0100 Subject: drm/i915: Don't mark an execlists context-switch when idle If we *know* that the engine is idle, i.e. we have not more contexts in flight, we can skip any spurious CSB idle interrupts. These spurious interrupts seem to arrive long after we assert that the engines are completely idle, triggering later assertions: [ 178.896646] intel_engine_is_idle(bcs): interrupt not handled, irq_posted=2 [ 178.896655] ------------[ cut here ]------------ [ 178.896658] kernel BUG at drivers/gpu/drm/i915/intel_engine_cs.c:226! [ 178.896661] invalid opcode: 0000 [#1] SMP [ 178.896663] Modules linked in: i915(E) x86_pkg_temp_thermal(E) crct10dif_pclmul(E) crc32_pclmul(E) crc32c_intel(E) ghash_clmulni_intel(E) nls_ascii(E) nls_cp437(E) vfat(E) fat(E) intel_gtt(E) i2c_algo_bit(E) drm_kms_helper(E) syscopyarea(E) sysfillrect(E) sysimgblt(E) fb_sys_fops(E) aesni_intel(E) prime_numbers(E) evdev(E) aes_x86_64(E) drm(E) crypto_simd(E) cryptd(E) glue_helper(E) mei_me(E) mei(E) lpc_ich(E) efivars(E) mfd_core(E) battery(E) video(E) acpi_pad(E) button(E) tpm_tis(E) tpm_tis_core(E) tpm(E) autofs4(E) i2c_i801(E) fan(E) thermal(E) i2c_designware_platform(E) i2c_designware_core(E) [ 178.896694] CPU: 1 PID: 522 Comm: gem_exec_whispe Tainted: G E 4.11.0-rc5+ #14 [ 178.896702] task: ffff88040aba8d40 task.stack: ffffc900003f0000 [ 178.896722] RIP: 0010:intel_engine_init_global_seqno+0x1db/0x1f0 [i915] [ 178.896725] RSP: 0018:ffffc900003f3ab0 EFLAGS: 00010246 [ 178.896728] RAX: 0000000000000000 RBX: ffff88040af54000 RCX: 0000000000000000 [ 178.896731] RDX: ffff88041ec933e0 RSI: ffff88041ec8cc48 RDI: ffff88041ec8cc48 [ 178.896734] RBP: ffffc900003f3ac8 R08: 0000000000000000 R09: 000000000000047d [ 178.896736] R10: 0000000000000040 R11: ffff88040b344f80 R12: 0000000000000000 [ 178.896739] R13: ffff88040bce0000 R14: ffff88040bce52d8 R15: ffff88040bce0000 [ 178.896742] FS: 00007f2cccc2d8c0(0000) GS:ffff88041ec80000(0000) knlGS:0000000000000000 [ 178.896746] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 178.896749] CR2: 00007f41ddd8f000 CR3: 000000040bb03000 CR4: 00000000001406e0 [ 178.896752] Call Trace: [ 178.896768] reset_all_global_seqno.part.33+0x4e/0xd0 [i915] [ 178.896782] i915_gem_request_alloc+0x304/0x330 [i915] [ 178.896795] i915_gem_do_execbuffer+0x8a1/0x17d0 [i915] [ 178.896799] ? remove_wait_queue+0x48/0x50 [ 178.896812] ? i915_wait_request+0x300/0x590 [i915] [ 178.896816] ? wake_up_q+0x70/0x70 [ 178.896819] ? refcount_dec_and_test+0x11/0x20 [ 178.896823] ? reservation_object_add_excl_fence+0xa5/0x100 [ 178.896835] i915_gem_execbuffer2+0xab/0x1f0 [i915] [ 178.896844] drm_ioctl+0x1e6/0x460 [drm] [ 178.896858] ? i915_gem_execbuffer+0x260/0x260 [i915] [ 178.896862] ? dput+0xcf/0x250 [ 178.896866] ? full_proxy_release+0x66/0x80 [ 178.896869] ? mntput+0x1f/0x30 [ 178.896872] do_vfs_ioctl+0x8f/0x5b0 [ 178.896875] ? ____fput+0x9/0x10 [ 178.896878] ? task_work_run+0x80/0xa0 [ 178.896881] SyS_ioctl+0x3c/0x70 [ 178.896885] entry_SYSCALL_64_fastpath+0x17/0x98 [ 178.896888] RIP: 0033:0x7f2ccb455ca7 [ 178.896890] RSP: 002b:00007ffcabec72d8 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 [ 178.896894] RAX: ffffffffffffffda RBX: 000055f897a44b90 RCX: 00007f2ccb455ca7 [ 178.896897] RDX: 00007ffcabec74a0 RSI: 0000000040406469 RDI: 0000000000000003 [ 178.896900] RBP: 00007f2ccb70a440 R08: 00007f2ccb70d0a4 R09: 0000000000000000 [ 178.896903] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 [ 178.896905] R13: 000055f89782d71a R14: 00007ffcabecf838 R15: 0000000000000003 [ 178.896908] Code: 00 31 d2 4c 89 ef 8d 70 48 41 ff 95 f8 06 00 00 e9 68 fe ff ff be 0f 00 00 00 48 c7 c7 48 dc 37 a0 e8 fa 33 d6 e0 e9 0b ff ff ff <0f> 0b 0f 0b 0f 0b 0f 0b 0f 1f 00 66 2e 0f 1f 84 00 00 00 00 00 On the other hand, by ignoring the interrupt do we risk running out of space in CSB ring? Testing for a few hours suggests not, i.e. that we only seem to get the odd delayed CSB idle notification. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170517121007.27224-6-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_irq.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index c99f51c..636b231 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1359,8 +1359,10 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir, int test_shift) bool tasklet = false; if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << test_shift)) { - set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); - tasklet = true; + if (port_count(&engine->execlist_port[0])) { + set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); + tasklet = true; + } } if (iir & (GT_RENDER_USER_INTERRUPT << test_shift)) { -- cgit v1.1 From e4f815f6bf4694b275defee97d2cd36a4764d6a1 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 17 May 2017 13:10:02 +0100 Subject: drm/i915: Use a define for the default priority [0] Explicitly assign the default priority, and give it a name. After much discussion, we have chosen to call it I915_PRIORITY_NORMAL! Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170517121007.27224-7-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_context.c | 1 + drivers/gpu/drm/i915/i915_gem_request.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 31a73c3..c5d1666 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -199,6 +199,7 @@ __create_hw_context(struct drm_i915_private *dev_priv, kref_init(&ctx->ref); list_add_tail(&ctx->link, &dev_priv->context_list); ctx->i915 = dev_priv; + ctx->priority = I915_PRIORITY_NORMAL; /* Default context will never have a file_priv */ ret = DEFAULT_CONTEXT_HANDLE; diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index 4ccab5a..0ecfc5e 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -70,6 +70,7 @@ struct i915_priotree { struct rb_node node; int priority; #define I915_PRIORITY_MAX 1024 +#define I915_PRIORITY_NORMAL 0 #define I915_PRIORITY_MIN (-I915_PRIORITY_MAX) }; -- cgit v1.1 From 6c067579e69b42bff476959fd7bb561ffa3f11e0 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 17 May 2017 13:10:03 +0100 Subject: drm/i915: Split execlist priority queue into rbtree + linked list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All the requests at the same priority are executed in FIFO order. They do not need to be stored in the rbtree themselves, as they are a simple list within a level. If we move the requests at one priority into a list, we can then reduce the rbtree to the set of priorities. This should keep the height of the rbtree small, as the number of active priorities can not exceed the number of active requests and should be typically only a few. Currently, we have ~2k possible different priority levels, that may increase to allow even more fine grained selection. Allocating those in advance seems a waste (and may be impossible), so we opt for allocating upon first use, and freeing after its requests are depleted. To avoid the possibility of an allocation failure causing us to lose a request, we preallocate the default priority (0) and bump any request to that priority if we fail to allocate it the appropriate plist. Having a request (that is ready to run, so not leading to corruption) execute out-of-order is better than leaking the request (and its dependency tree) entirely. There should be a benefit to reducing execlists_dequeue() to principally using a simple list (and reducing the frequency of both rbtree iteration and balancing on erase) but for typical workloads, request coalescing should be small enough that we don't notice any change. The main gain is from improving PI calls to schedule, and the explicit list within a level should make request unwinding simpler (we just need to insert at the head of the list rather than the tail and not have to make the rbtree search more complicated). v2: Avoid use-after-free when deleting a depleted priolist v3: Michał found the solution to handling the allocation failure gracefully. If we disable all priority scheduling following the allocation failure, those requests will be executed in fifo and we will ensure that this request and its dependencies are in strict fifo (even when it doesn't realise it is only a single list). Normal scheduling is restored once we know the device is idle, until the next failure! Suggested-by: Michał Wajdeczko Signed-off-by: Chris Wilson Cc: Michał Winiarski Cc: Tvrtko Ursulin Cc: Joonas Lahtinen Reviewed-by: Michał Winiarski Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170517121007.27224-8-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_debugfs.c | 11 +- drivers/gpu/drm/i915/i915_gem.c | 7 +- drivers/gpu/drm/i915/i915_gem_request.c | 4 +- drivers/gpu/drm/i915/i915_gem_request.h | 2 +- drivers/gpu/drm/i915/i915_guc_submission.c | 50 ++++---- drivers/gpu/drm/i915/i915_utils.h | 9 ++ drivers/gpu/drm/i915/intel_engine_cs.c | 12 ++ drivers/gpu/drm/i915/intel_lrc.c | 183 +++++++++++++++++++---------- drivers/gpu/drm/i915/intel_ringbuffer.h | 9 ++ 9 files changed, 192 insertions(+), 95 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index e08ac70..8abb939 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -3352,7 +3352,6 @@ static int i915_engine_info(struct seq_file *m, void *unused) if (i915.enable_execlists) { u32 ptr, read, write; - struct rb_node *rb; unsigned int idx; seq_printf(m, "\tExeclist status: 0x%08x %08x\n", @@ -3396,9 +3395,13 @@ static int i915_engine_info(struct seq_file *m, void *unused) rcu_read_unlock(); spin_lock_irq(&engine->timeline->lock); - for (rb = engine->execlist_first; rb; rb = rb_next(rb)) { - rq = rb_entry(rb, typeof(*rq), priotree.node); - print_request(m, rq, "\t\tQ "); + for (rb = engine->execlist_first; rb; rb = rb_next(rb)){ + struct i915_priolist *p = + rb_entry(rb, typeof(*p), node); + + list_for_each_entry(rq, &p->requests, + priotree.link) + print_request(m, rq, "\t\tQ "); } spin_unlock_irq(&engine->timeline->lock); } else if (INTEL_GEN(dev_priv) > 6) { diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 75d7575..3d9161c 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3155,8 +3155,6 @@ i915_gem_idle_work_handler(struct work_struct *work) struct drm_i915_private *dev_priv = container_of(work, typeof(*dev_priv), gt.idle_work.work); struct drm_device *dev = &dev_priv->drm; - struct intel_engine_cs *engine; - enum intel_engine_id id; bool rearm_hangcheck; if (!READ_ONCE(dev_priv->gt.awake)) @@ -3194,10 +3192,7 @@ i915_gem_idle_work_handler(struct work_struct *work) if (wait_for(intel_engines_are_idle(dev_priv), 10)) DRM_ERROR("Timeout waiting for engines to idle\n"); - for_each_engine(engine, dev_priv, id) { - intel_engine_disarm_breadcrumbs(engine); - i915_gem_batch_pool_fini(&engine->batch_pool); - } + intel_engines_mark_idle(dev_priv); i915_gem_timelines_mark_idle(dev_priv); GEM_BUG_ON(!dev_priv->gt.awake); diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 10361c7..1ccf252 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -159,7 +159,7 @@ i915_priotree_fini(struct drm_i915_private *i915, struct i915_priotree *pt) { struct i915_dependency *dep, *next; - GEM_BUG_ON(!RB_EMPTY_NODE(&pt->node)); + GEM_BUG_ON(!list_empty(&pt->link)); /* Everyone we depended upon (the fences we wait to be signaled) * should retire before us and remove themselves from our list. @@ -185,7 +185,7 @@ i915_priotree_init(struct i915_priotree *pt) { INIT_LIST_HEAD(&pt->signalers_list); INIT_LIST_HEAD(&pt->waiters_list); - RB_CLEAR_NODE(&pt->node); + INIT_LIST_HEAD(&pt->link); pt->priority = INT_MIN; } diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index 0ecfc5e..8c508bd 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -67,7 +67,7 @@ struct i915_dependency { struct i915_priotree { struct list_head signalers_list; /* those before us, we depend upon */ struct list_head waiters_list; /* those after us, they depend upon us */ - struct rb_node node; + struct list_head link; int priority; #define I915_PRIORITY_MAX 1024 #define I915_PRIORITY_NORMAL 0 diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 014cbd1..3b9cdb0 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -674,32 +674,42 @@ static bool i915_guc_dequeue(struct intel_engine_cs *engine) spin_lock_irq(&engine->timeline->lock); rb = engine->execlist_first; + GEM_BUG_ON(rb_first(&engine->execlist_queue) != rb); while (rb) { - struct drm_i915_gem_request *rq = - rb_entry(rb, typeof(*rq), priotree.node); - - if (last && rq->ctx != last->ctx) { - if (port != engine->execlist_port) - break; - - port_assign(port, last); - port++; + struct i915_priolist *p = rb_entry(rb, typeof(*p), node); + struct drm_i915_gem_request *rq, *rn; + + list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) { + if (last && rq->ctx != last->ctx) { + if (port != engine->execlist_port) { + __list_del_many(&p->requests, + &rq->priotree.link); + goto done; + } + + port_assign(port, last); + port++; + } + + INIT_LIST_HEAD(&rq->priotree.link); + rq->priotree.priority = INT_MAX; + + i915_guc_submit(rq); + trace_i915_gem_request_in(rq, port_index(port, engine)); + last = rq; + submit = true; } rb = rb_next(rb); - rb_erase(&rq->priotree.node, &engine->execlist_queue); - RB_CLEAR_NODE(&rq->priotree.node); - rq->priotree.priority = INT_MAX; - - i915_guc_submit(rq); - trace_i915_gem_request_in(rq, port_index(port, engine)); - last = rq; - submit = true; + rb_erase(&p->node, &engine->execlist_queue); + INIT_LIST_HEAD(&p->requests); + if (p->priority != I915_PRIORITY_NORMAL) + kfree(p); } - if (submit) { +done: + engine->execlist_first = rb; + if (submit) port_assign(port, last); - engine->execlist_first = rb; - } spin_unlock_irq(&engine->timeline->lock); return submit; diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h index d9df237..16ecd1a 100644 --- a/drivers/gpu/drm/i915/i915_utils.h +++ b/drivers/gpu/drm/i915/i915_utils.h @@ -105,4 +105,13 @@ __idx; \ }) +#include + +static inline void __list_del_many(struct list_head *head, + struct list_head *first) +{ + first->prev = head; + WRITE_ONCE(head->next, first); +} + #endif /* !__I915_UTILS_H */ diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index e312dec..413bfd8 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1274,6 +1274,18 @@ void intel_engines_reset_default_submission(struct drm_i915_private *i915) engine->set_default_submission(engine); } +void intel_engines_mark_idle(struct drm_i915_private *i915) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, i915, id) { + intel_engine_disarm_breadcrumbs(engine); + i915_gem_batch_pool_fini(&engine->batch_pool); + engine->no_priolist = false; + } +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/mock_engine.c" #endif diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 53ec0d5..626db61 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -436,57 +436,75 @@ static void execlists_dequeue(struct intel_engine_cs *engine) spin_lock_irq(&engine->timeline->lock); rb = engine->execlist_first; + GEM_BUG_ON(rb_first(&engine->execlist_queue) != rb); while (rb) { - struct drm_i915_gem_request *cursor = - rb_entry(rb, typeof(*cursor), priotree.node); - - /* Can we combine this request with the current port? It has to - * be the same context/ringbuffer and not have any exceptions - * (e.g. GVT saying never to combine contexts). - * - * If we can combine the requests, we can execute both by - * updating the RING_TAIL to point to the end of the second - * request, and so we never need to tell the hardware about - * the first. - */ - if (last && !can_merge_ctx(cursor->ctx, last->ctx)) { - /* If we are on the second port and cannot combine - * this request with the last, then we are done. - */ - if (port != engine->execlist_port) - break; - - /* If GVT overrides us we only ever submit port[0], - * leaving port[1] empty. Note that we also have - * to be careful that we don't queue the same - * context (even though a different request) to - * the second port. + struct i915_priolist *p = rb_entry(rb, typeof(*p), node); + struct drm_i915_gem_request *rq, *rn; + + list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) { + /* + * Can we combine this request with the current port? + * It has to be the same context/ringbuffer and not + * have any exceptions (e.g. GVT saying never to + * combine contexts). + * + * If we can combine the requests, we can execute both + * by updating the RING_TAIL to point to the end of the + * second request, and so we never need to tell the + * hardware about the first. */ - if (ctx_single_port_submission(last->ctx) || - ctx_single_port_submission(cursor->ctx)) - break; + if (last && !can_merge_ctx(rq->ctx, last->ctx)) { + /* + * If we are on the second port and cannot + * combine this request with the last, then we + * are done. + */ + if (port != engine->execlist_port) { + __list_del_many(&p->requests, + &rq->priotree.link); + goto done; + } + + /* + * If GVT overrides us we only ever submit + * port[0], leaving port[1] empty. Note that we + * also have to be careful that we don't queue + * the same context (even though a different + * request) to the second port. + */ + if (ctx_single_port_submission(last->ctx) || + ctx_single_port_submission(rq->ctx)) { + __list_del_many(&p->requests, + &rq->priotree.link); + goto done; + } + + GEM_BUG_ON(last->ctx == rq->ctx); + + if (submit) + port_assign(port, last); + port++; + } - GEM_BUG_ON(last->ctx == cursor->ctx); + INIT_LIST_HEAD(&rq->priotree.link); + rq->priotree.priority = INT_MAX; - if (submit) - port_assign(port, last); - port++; + __i915_gem_request_submit(rq); + trace_i915_gem_request_in(rq, port_index(port, engine)); + last = rq; + submit = true; } rb = rb_next(rb); - rb_erase(&cursor->priotree.node, &engine->execlist_queue); - RB_CLEAR_NODE(&cursor->priotree.node); - cursor->priotree.priority = INT_MAX; - - __i915_gem_request_submit(cursor); - trace_i915_gem_request_in(cursor, port_index(port, engine)); - last = cursor; - submit = true; + rb_erase(&p->node, &engine->execlist_queue); + INIT_LIST_HEAD(&p->requests); + if (p->priority != I915_PRIORITY_NORMAL) + kfree(p); } - if (submit) { +done: + engine->execlist_first = rb; + if (submit) port_assign(port, last); - engine->execlist_first = rb; - } spin_unlock_irq(&engine->timeline->lock); if (submit) @@ -610,28 +628,66 @@ static void intel_lrc_irq_handler(unsigned long data) intel_uncore_forcewake_put(dev_priv, engine->fw_domains); } -static bool insert_request(struct i915_priotree *pt, struct rb_root *root) +static bool +insert_request(struct intel_engine_cs *engine, + struct i915_priotree *pt, + int prio) { - struct rb_node **p, *rb; + struct i915_priolist *p; + struct rb_node **parent, *rb; bool first = true; + if (unlikely(engine->no_priolist)) + prio = I915_PRIORITY_NORMAL; + +find_priolist: /* most positive priority is scheduled first, equal priorities fifo */ rb = NULL; - p = &root->rb_node; - while (*p) { - struct i915_priotree *pos; - - rb = *p; - pos = rb_entry(rb, typeof(*pos), node); - if (pt->priority > pos->priority) { - p = &rb->rb_left; - } else { - p = &rb->rb_right; + parent = &engine->execlist_queue.rb_node; + while (*parent) { + rb = *parent; + p = rb_entry(rb, typeof(*p), node); + if (prio > p->priority) { + parent = &rb->rb_left; + } else if (prio < p->priority) { + parent = &rb->rb_right; first = false; + } else { + list_add_tail(&pt->link, &p->requests); + return false; + } + } + + if (prio == I915_PRIORITY_NORMAL) { + p = &engine->default_priolist; + } else { + p = kmalloc(sizeof(*p), GFP_ATOMIC); + /* Convert an allocation failure to a priority bump */ + if (unlikely(!p)) { + prio = I915_PRIORITY_NORMAL; /* recurses just once */ + + /* To maintain ordering with all rendering, after an + * allocation failure we have to disable all scheduling. + * Requests will then be executed in fifo, and schedule + * will ensure that dependencies are emitted in fifo. + * There will be still some reordering with existing + * requests, so if userspace lied about their + * dependencies that reordering may be visible. + */ + engine->no_priolist = true; + goto find_priolist; } } - rb_link_node(&pt->node, rb, p); - rb_insert_color(&pt->node, root); + + p->priority = prio; + rb_link_node(&p->node, rb, parent); + rb_insert_color(&p->node, &engine->execlist_queue); + + INIT_LIST_HEAD(&p->requests); + list_add_tail(&pt->link, &p->requests); + + if (first) + engine->execlist_first = &p->node; return first; } @@ -644,12 +700,16 @@ static void execlists_submit_request(struct drm_i915_gem_request *request) /* Will be called from irq-context when using foreign fences. */ spin_lock_irqsave(&engine->timeline->lock, flags); - if (insert_request(&request->priotree, &engine->execlist_queue)) { - engine->execlist_first = &request->priotree.node; + if (insert_request(engine, + &request->priotree, + request->priotree.priority)) { if (execlists_elsp_ready(engine)) tasklet_hi_schedule(&engine->irq_tasklet); } + GEM_BUG_ON(!engine->execlist_first); + GEM_BUG_ON(list_empty(&request->priotree.link)); + spin_unlock_irqrestore(&engine->timeline->lock, flags); } @@ -734,10 +794,9 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio) continue; pt->priority = prio; - if (!RB_EMPTY_NODE(&pt->node)) { - rb_erase(&pt->node, &engine->execlist_queue); - if (insert_request(pt, &engine->execlist_queue)) - engine->execlist_first = &pt->node; + if (!list_empty(&pt->link)) { + __list_del_entry(&pt->link); + insert_request(engine, pt, prio); } } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 162f0a9..6aa20ac 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -177,6 +177,12 @@ enum intel_engine_id { VECS }; +struct i915_priolist { + struct rb_node node; + struct list_head requests; + int priority; +}; + #define INTEL_ENGINE_CS_MAX_NAME 8 struct intel_engine_cs { @@ -367,6 +373,8 @@ struct intel_engine_cs { /* Execlists */ struct tasklet_struct irq_tasklet; + struct i915_priolist default_priolist; + bool no_priolist; struct execlist_port { struct drm_i915_gem_request *request_count; #define EXECLIST_COUNT_BITS 2 @@ -723,6 +731,7 @@ static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset) bool intel_engine_is_idle(struct intel_engine_cs *engine); bool intel_engines_are_idle(struct drm_i915_private *dev_priv); +void intel_engines_mark_idle(struct drm_i915_private *i915); void intel_engines_reset_default_submission(struct drm_i915_private *i915); #endif /* _INTEL_RINGBUFFER_H_ */ -- cgit v1.1 From c5cf9a9147ff6fc9f87251a8f8a5b6ac8b8bdcdc Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 17 May 2017 13:10:04 +0100 Subject: drm/i915: Create a kmem_cache to allocate struct i915_priolist from The i915_priolist are allocated within an atomic context on a path where we wish to minimise latency. If we use a dedicated kmem_cache, we have the advantage of a local freelist from which to service new requests that should keep the latency impact of an allocation small. Though currently we expect the majority of requests to be at default priority (and so hit the preallocate priolist), once userspace starts using priorities they are likely to use many fine grained policies improving the utilisation of a private slab. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170517121007.27224-9-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gem.c | 9 ++++++++- drivers/gpu/drm/i915/i915_guc_submission.c | 2 +- drivers/gpu/drm/i915/intel_lrc.c | 4 ++-- drivers/gpu/drm/i915/selftests/mock_gem_device.c | 9 ++++++++- 5 files changed, 20 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index a6f2047..08ee5c8 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2027,6 +2027,7 @@ struct drm_i915_private { struct kmem_cache *vmas; struct kmem_cache *requests; struct kmem_cache *dependencies; + struct kmem_cache *priorities; const struct intel_device_info info; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 3d9161c..0680bd2 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4866,12 +4866,16 @@ i915_gem_load_init(struct drm_i915_private *dev_priv) if (!dev_priv->dependencies) goto err_requests; + dev_priv->priorities = KMEM_CACHE(i915_priolist, SLAB_HWCACHE_ALIGN); + if (!dev_priv->priorities) + goto err_dependencies; + mutex_lock(&dev_priv->drm.struct_mutex); INIT_LIST_HEAD(&dev_priv->gt.timelines); err = i915_gem_timeline_init__global(dev_priv); mutex_unlock(&dev_priv->drm.struct_mutex); if (err) - goto err_dependencies; + goto err_priorities; INIT_LIST_HEAD(&dev_priv->context_list); INIT_WORK(&dev_priv->mm.free_work, __i915_gem_free_work); @@ -4895,6 +4899,8 @@ i915_gem_load_init(struct drm_i915_private *dev_priv) return 0; +err_priorities: + kmem_cache_destroy(dev_priv->priorities); err_dependencies: kmem_cache_destroy(dev_priv->dependencies); err_requests: @@ -4918,6 +4924,7 @@ void i915_gem_load_cleanup(struct drm_i915_private *dev_priv) WARN_ON(!list_empty(&dev_priv->gt.timelines)); mutex_unlock(&dev_priv->drm.struct_mutex); + kmem_cache_destroy(dev_priv->priorities); kmem_cache_destroy(dev_priv->dependencies); kmem_cache_destroy(dev_priv->requests); kmem_cache_destroy(dev_priv->vmas); diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 3b9cdb0..b3da056 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -704,7 +704,7 @@ static bool i915_guc_dequeue(struct intel_engine_cs *engine) rb_erase(&p->node, &engine->execlist_queue); INIT_LIST_HEAD(&p->requests); if (p->priority != I915_PRIORITY_NORMAL) - kfree(p); + kmem_cache_free(engine->i915->priorities, p); } done: engine->execlist_first = rb; diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 626db61..8529746 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -499,7 +499,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) rb_erase(&p->node, &engine->execlist_queue); INIT_LIST_HEAD(&p->requests); if (p->priority != I915_PRIORITY_NORMAL) - kfree(p); + kmem_cache_free(engine->i915->priorities, p); } done: engine->execlist_first = rb; @@ -661,7 +661,7 @@ find_priolist: if (prio == I915_PRIORITY_NORMAL) { p = &engine->default_priolist; } else { - p = kmalloc(sizeof(*p), GFP_ATOMIC); + p = kmem_cache_alloc(engine->i915->priorities, GFP_ATOMIC); /* Convert an allocation failure to a priority bump */ if (unlikely(!p)) { prio = I915_PRIORITY_NORMAL; /* recurses just once */ diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index f321bdf..3d0e313 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -74,6 +74,7 @@ static void mock_device_release(struct drm_device *dev) destroy_workqueue(i915->wq); + kmem_cache_destroy(i915->priorities); kmem_cache_destroy(i915->dependencies); kmem_cache_destroy(i915->requests); kmem_cache_destroy(i915->vmas); @@ -186,12 +187,16 @@ struct drm_i915_private *mock_gem_device(void) if (!i915->dependencies) goto err_requests; + i915->priorities = KMEM_CACHE(i915_priolist, SLAB_HWCACHE_ALIGN); + if (!i915->priorities) + goto err_dependencies; + mutex_lock(&i915->drm.struct_mutex); INIT_LIST_HEAD(&i915->gt.timelines); err = i915_gem_timeline_init__global(i915); if (err) { mutex_unlock(&i915->drm.struct_mutex); - goto err_dependencies; + goto err_priorities; } mock_init_ggtt(i915); @@ -211,6 +216,8 @@ struct drm_i915_private *mock_gem_device(void) err_engine: for_each_engine(engine, i915, id) mock_engine_free(engine); +err_priorities: + kmem_cache_destroy(i915->priorities); err_dependencies: kmem_cache_destroy(i915->dependencies); err_requests: -- cgit v1.1 From 349bdb68bd48f4e1861058ac2dbaa0aafd4ca38d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 17 May 2017 13:10:05 +0100 Subject: drm/i915/execlists: Reduce lock contention between schedule/submit_request If we do not require to perform priority bumping, and we haven't yet submitted the request, we can update its priority in situ and skip acquiring the engine locks -- thus avoiding any contention between us and submit/execute. v2: Remove the stack element from the list if we can do the early assignment. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170517121007.27224-10-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_lrc.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 8529746..014b30a 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -779,6 +779,19 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio) list_safe_reset_next(dep, p, dfs_link); } + /* If we didn't need to bump any existing priorities, and we haven't + * yet submitted this request (i.e. there is no potential race with + * execlists_submit_request()), we can set our own priority and skip + * acquiring the engine locks. + */ + if (request->priotree.priority == INT_MIN) { + GEM_BUG_ON(!list_empty(&request->priotree.link)); + request->priotree.priority = prio; + if (stack.dfs_link.next == stack.dfs_link.prev) + return; + __list_del_entry(&stack.dfs_link); + } + engine = request->engine; spin_lock_irq(&engine->timeline->lock); -- cgit v1.1 From 5d3d69d5c1195c47baf12035a29aaa990cc89a40 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 17 May 2017 13:10:06 +0100 Subject: drm/i915: Stop inlining the execlists IRQ handler As the handler is now quite complex, involving a few atomics, the cost of the function preamble is negligible in comparison and so we should leave the function out-of-line for better I$. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170517121007.27224-11-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_irq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 636b231..8f1ca6a 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1353,7 +1353,7 @@ static void snb_gt_irq_handler(struct drm_i915_private *dev_priv, ivybridge_parity_error_irq_handler(dev_priv, gt_iir); } -static __always_inline void +static void gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir, int test_shift) { bool tasklet = false; -- cgit v1.1 From 955a4b8979fb558e8897f5b358ac77d2d423821d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 17 May 2017 13:10:07 +0100 Subject: drm/i915: Don't force serialisation on marking up execlists irq posted Since we coordinate with the execlists tasklet using a locked schedule operation that ensures that after we set the engine->irq_posted we always have an invocation of the tasklet, we do not need to use a locked operation to set the engine->irq_posted itself. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170517121007.27224-12-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_irq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 8f1ca6a..d63a2ba 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1360,7 +1360,7 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir, int test_shift) if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << test_shift)) { if (port_count(&engine->execlist_port[0])) { - set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); + __set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); tasklet = true; } } -- cgit v1.1 From afbc95cd0c4792a72f384bbcb98251617db40107 Mon Sep 17 00:00:00 2001 From: "Kumar, Mahesh" Date: Wed, 17 May 2017 17:28:20 +0530 Subject: drm/i915: fix naming of fixed_16_16 wrapper. fixed_16_16_div_round_up(_u64), wrapper for fixed_16_16 division operation don't really round_up the result. Wrapper round_up only the fraction part of the result to make it 16-bit. This patch eliminates round_up keyword from the wrapper. Later patch will introduce the new wrapper to do rounding-off the result and give unt32_t output to cleanup mix use of fixed_16_16_t & uint32_t variables. Signed-off-by: Mahesh Kumar Reviewed-by: Matt Roper Reviewed-by: Maarten Lankhorst Signed-off-by: Matt Roper Link: http://patchwork.freedesktop.org/patch/msgid/20170517115831.13830-2-mahesh1.kumar@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 6 ++---- drivers/gpu/drm/i915/intel_pm.c | 6 +++--- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 08ee5c8..9bf038f 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -153,8 +153,7 @@ static inline uint_fixed_16_16_t max_fixed_16_16(uint_fixed_16_16_t max1, return max; } -static inline uint_fixed_16_16_t fixed_16_16_div_round_up(uint32_t val, - uint32_t d) +static inline uint_fixed_16_16_t fixed_16_16_div(uint32_t val, uint32_t d) { uint_fixed_16_16_t fp, res; @@ -163,8 +162,7 @@ static inline uint_fixed_16_16_t fixed_16_16_div_round_up(uint32_t val, return res; } -static inline uint_fixed_16_16_t fixed_16_16_div_round_up_u64(uint32_t val, - uint32_t d) +static inline uint_fixed_16_16_t fixed_16_16_div_u64(uint32_t val, uint32_t d) { uint_fixed_16_16_t res; uint64_t interm_val; diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index ef0e9f8..d12bbe6 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4165,7 +4165,7 @@ static uint_fixed_16_16_t skl_wm_method1(uint32_t pixel_rate, uint8_t cpp, return FP_16_16_MAX; wm_intermediate_val = latency * pixel_rate * cpp; - ret = fixed_16_16_div_round_up_u64(wm_intermediate_val, 1000 * 512); + ret = fixed_16_16_div_u64(wm_intermediate_val, 1000 * 512); return ret; } @@ -4301,8 +4301,8 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, if (y_tiled) { interm_pbpl = DIV_ROUND_UP(plane_bytes_per_line * y_min_scanlines, 512); - plane_blocks_per_line = - fixed_16_16_div_round_up(interm_pbpl, y_min_scanlines); + plane_blocks_per_line = fixed_16_16_div(interm_pbpl, + y_min_scanlines); } else if (x_tiled) { interm_pbpl = DIV_ROUND_UP(plane_bytes_per_line, 512); plane_blocks_per_line = u32_to_fixed_16_16(interm_pbpl); -- cgit v1.1 From a9d055de65a714f67a3b47dcceee1dc450743c83 Mon Sep 17 00:00:00 2001 From: "Kumar, Mahesh" Date: Wed, 17 May 2017 17:28:21 +0530 Subject: drm/i915: Add more wrapper for fixed_point_16_16 operations This patch adds few wrapper to perform fixed_point_16_16 operations mul_round_up_u32_fixed16 : Multiplies u32 and fixed_16_16_t variables & returns u32 result with rounding-up. mul_fixed16 : Multiplies two fixed_16_16_t variable & returns fixed_16_16 div_round_up_fixed16 : Perform division operation on fixed_16_16_t variables & return u32 result with round-off div_round_up_u32_fixed16 : devide uint32_t variable by fixed_16_16 variable and round_up the result to uint32_t. These wrappers will be used by later patches in the series. Changes from V1: - Rename wrapper as per Matt's comment Changes from V2: - Fix indentation Signed-off-by: Mahesh Kumar Reviewed-by: Matt Roper Reviewed-by: Maarten Lankhorst Signed-off-by: Matt Roper Link: http://patchwork.freedesktop.org/patch/msgid/20170517115831.13830-3-mahesh1.kumar@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 43 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 9bf038f..f6c241f 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -153,6 +153,38 @@ static inline uint_fixed_16_16_t max_fixed_16_16(uint_fixed_16_16_t max1, return max; } +static inline uint32_t div_round_up_fixed16(uint_fixed_16_16_t val, + uint_fixed_16_16_t d) +{ + return DIV_ROUND_UP(val.val, d.val); +} + +static inline uint32_t mul_round_up_u32_fixed16(uint32_t val, + uint_fixed_16_16_t mul) +{ + uint64_t intermediate_val; + uint32_t result; + + intermediate_val = (uint64_t) val * mul.val; + intermediate_val = DIV_ROUND_UP_ULL(intermediate_val, 1 << 16); + WARN_ON(intermediate_val >> 32); + result = clamp_t(uint32_t, intermediate_val, 0, ~0); + return result; +} + +static inline uint_fixed_16_16_t mul_fixed16(uint_fixed_16_16_t val, + uint_fixed_16_16_t mul) +{ + uint64_t intermediate_val; + uint_fixed_16_16_t fp; + + intermediate_val = (uint64_t) val.val * mul.val; + intermediate_val = intermediate_val >> 16; + WARN_ON(intermediate_val >> 32); + fp.val = clamp_t(uint32_t, intermediate_val, 0, ~0); + return fp; +} + static inline uint_fixed_16_16_t fixed_16_16_div(uint32_t val, uint32_t d) { uint_fixed_16_16_t fp, res; @@ -175,6 +207,17 @@ static inline uint_fixed_16_16_t fixed_16_16_div_u64(uint32_t val, uint32_t d) return res; } +static inline uint32_t div_round_up_u32_fixed16(uint32_t val, + uint_fixed_16_16_t d) +{ + uint64_t interm_val; + + interm_val = (uint64_t)val << 16; + interm_val = DIV_ROUND_UP_ULL(interm_val, d.val); + WARN_ON(interm_val >> 32); + return clamp_t(uint32_t, interm_val, 0, ~0); +} + static inline uint_fixed_16_16_t mul_u32_fixed_16_16(uint32_t val, uint_fixed_16_16_t mul) { -- cgit v1.1 From d273ecce3dde63168ed7840e2b8e5659da0df0f5 Mon Sep 17 00:00:00 2001 From: "Kumar, Mahesh" Date: Wed, 17 May 2017 17:28:22 +0530 Subject: drm/i915: Use fixed_16_16 wrapper for division operation Don't use fixed_16_16 structure members directly, instead use wrapper to perform fixed_16_16 division operation. Signed-off-by: Mahesh Kumar Reviewed-by: Matt Roper Reviewed-by: Maarten Lankhorst Signed-off-by: Matt Roper Link: http://patchwork.freedesktop.org/patch/msgid/20170517115831.13830-4-mahesh1.kumar@intel.com --- drivers/gpu/drm/i915/intel_pm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index d12bbe6..8ff8cc5 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4334,8 +4334,8 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, } res_blocks = fixed_16_16_to_u32_round_up(selected_result) + 1; - res_lines = DIV_ROUND_UP(selected_result.val, - plane_blocks_per_line.val); + res_lines = div_round_up_fixed16(selected_result, + plane_blocks_per_line); if (level >= 1 && level <= 7) { if (y_tiled) { -- cgit v1.1 From 7084b50bdd8f8b8fc232fb8642d83de913575baa Mon Sep 17 00:00:00 2001 From: "Kumar, Mahesh" Date: Wed, 17 May 2017 17:28:23 +0530 Subject: drm/i915/skl+: calculate pixel_rate & relative_data_rate in fixed point This patch make changes to calculate adjusted plane pixel rate & plane downscale amount using fixed_point functions available. This patch will give uniformity in code, & will help to avoid mixing of 32bit uint32_t variable for fixed-16.16 with fixed_16_16_t variables in later patch in the series. Changes from V1: - Rebase based on wrapper name change - Remove unnecessary comment Signed-off-by: Mahesh Kumar Reviewed-by: Matt Roper Reviewed-by: Maarten Lankhorst Signed-off-by: Matt Roper Link: http://patchwork.freedesktop.org/patch/msgid/20170517115831.13830-5-mahesh1.kumar@intel.com --- drivers/gpu/drm/i915/intel_pm.c | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 8ff8cc5..ab05695 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3827,26 +3827,27 @@ void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv, * Return value is provided in 16.16 fixed point form to retain fractional part. * Caller should take care of dividing & rounding off the value. */ -static uint32_t +static uint_fixed_16_16_t skl_plane_downscale_amount(const struct intel_crtc_state *cstate, const struct intel_plane_state *pstate) { struct intel_plane *plane = to_intel_plane(pstate->base.plane); - uint32_t downscale_h, downscale_w; uint32_t src_w, src_h, dst_w, dst_h; + uint_fixed_16_16_t fp_w_ratio, fp_h_ratio; + uint_fixed_16_16_t downscale_h, downscale_w; if (WARN_ON(!intel_wm_plane_visible(cstate, pstate))) - return DRM_PLANE_HELPER_NO_SCALING; + return u32_to_fixed_16_16(0); /* n.b., src is 16.16 fixed point, dst is whole integer */ if (plane->id == PLANE_CURSOR) { - src_w = pstate->base.src_w; - src_h = pstate->base.src_h; + src_w = pstate->base.src_w >> 16; + src_h = pstate->base.src_h >> 16; dst_w = pstate->base.crtc_w; dst_h = pstate->base.crtc_h; } else { - src_w = drm_rect_width(&pstate->base.src); - src_h = drm_rect_height(&pstate->base.src); + src_w = drm_rect_width(&pstate->base.src) >> 16; + src_h = drm_rect_height(&pstate->base.src) >> 16; dst_w = drm_rect_width(&pstate->base.dst); dst_h = drm_rect_height(&pstate->base.dst); } @@ -3854,11 +3855,12 @@ skl_plane_downscale_amount(const struct intel_crtc_state *cstate, if (drm_rotation_90_or_270(pstate->base.rotation)) swap(dst_w, dst_h); - downscale_h = max(src_h / dst_h, (uint32_t)DRM_PLANE_HELPER_NO_SCALING); - downscale_w = max(src_w / dst_w, (uint32_t)DRM_PLANE_HELPER_NO_SCALING); + fp_w_ratio = fixed_16_16_div(src_w, dst_w); + fp_h_ratio = fixed_16_16_div(src_h, dst_h); + downscale_w = max_fixed_16_16(fp_w_ratio, u32_to_fixed_16_16(1)); + downscale_h = max_fixed_16_16(fp_h_ratio, u32_to_fixed_16_16(1)); - /* Provide result in 16.16 fixed point */ - return (uint64_t)downscale_w * downscale_h >> 16; + return mul_fixed16(downscale_w, downscale_h); } static unsigned int @@ -3868,10 +3870,11 @@ skl_plane_relative_data_rate(const struct intel_crtc_state *cstate, { struct intel_plane *plane = to_intel_plane(pstate->plane); struct intel_plane_state *intel_pstate = to_intel_plane_state(pstate); - uint32_t down_scale_amount, data_rate; + uint32_t data_rate; uint32_t width = 0, height = 0; struct drm_framebuffer *fb; u32 format; + uint_fixed_16_16_t down_scale_amount; if (!intel_pstate->base.visible) return 0; @@ -3905,7 +3908,7 @@ skl_plane_relative_data_rate(const struct intel_crtc_state *cstate, down_scale_amount = skl_plane_downscale_amount(cstate, intel_pstate); - return (uint64_t)data_rate * down_scale_amount >> 16; + return mul_round_up_u32_fixed16(data_rate, down_scale_amount); } /* @@ -4191,8 +4194,7 @@ static uint32_t skl_adjusted_plane_pixel_rate(const struct intel_crtc_state *cst struct intel_plane_state *pstate) { uint64_t adjusted_pixel_rate; - uint64_t downscale_amount; - uint64_t pixel_rate; + uint_fixed_16_16_t downscale_amount; /* Shouldn't reach here on disabled planes... */ if (WARN_ON(!intel_wm_plane_visible(cstate, pstate))) @@ -4205,10 +4207,8 @@ static uint32_t skl_adjusted_plane_pixel_rate(const struct intel_crtc_state *cst adjusted_pixel_rate = cstate->pixel_rate; downscale_amount = skl_plane_downscale_amount(cstate, pstate); - pixel_rate = adjusted_pixel_rate * downscale_amount >> 16; - WARN_ON(pixel_rate != clamp_t(uint32_t, pixel_rate, 0, ~0)); - - return pixel_rate; + return mul_round_up_u32_fixed16(adjusted_pixel_rate, + downscale_amount); } static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, -- cgit v1.1 From 7b75119c8bb9bde34f0cc36690094f8928a4c491 Mon Sep 17 00:00:00 2001 From: "Kumar, Mahesh" Date: Wed, 17 May 2017 17:28:24 +0530 Subject: drm/i915/skl: Fail the flip if no FB for WM calculation Fail the flip if no FB is present but plane_state is set as visible. Above is not a valid combination so instead of continue fail the flip. Signed-off-by: Mahesh Kumar Reviewed-by: Matt Roper Reviewed-by: Maarten Lankhorst Signed-off-by: Matt Roper Link: http://patchwork.freedesktop.org/patch/msgid/20170517115831.13830-6-mahesh1.kumar@intel.com --- drivers/gpu/drm/i915/intel_pm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index ab05695..f494af3 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4408,7 +4408,8 @@ skl_compute_wm_level(const struct drm_i915_private *dev_priv, if (!intel_pstate) intel_pstate = to_intel_plane_state(plane->state); - WARN_ON(!intel_pstate->base.fb); + if (WARN_ON(!intel_pstate->base.fb)) + return -EINVAL; ddb_blocks = skl_ddb_entry_size(&ddb->plane[pipe][intel_plane->id]); -- cgit v1.1 From 336031ea1cdb056eb2f7bd21fce46dda52fc0412 Mon Sep 17 00:00:00 2001 From: "Kumar, Mahesh" Date: Wed, 17 May 2017 17:28:25 +0530 Subject: drm/i915/skl+: no need to memset again We are already doing memset of ddb structure at the begining of skl_allocate_pipe_ddb function, No need to again do a memset. Signed-off-by: Mahesh Kumar Reviewed-by: Matt Roper Reviewed-by: Maarten Lankhorst Signed-off-by: Matt Roper Link: http://patchwork.freedesktop.org/patch/msgid/20170517115831.13830-7-mahesh1.kumar@intel.com --- drivers/gpu/drm/i915/intel_pm.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index f494af3..37dd3e7 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4072,10 +4072,8 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, skl_ddb_get_pipe_allocation_limits(dev, cstate, alloc, &num_active); alloc_size = skl_ddb_entry_size(alloc); - if (alloc_size == 0) { - memset(ddb->plane[pipe], 0, sizeof(ddb->plane[pipe])); + if (alloc_size == 0) return 0; - } skl_ddb_calc_min(cstate, num_active, minimum, y_minimum); -- cgit v1.1 From 5ba6faafbe18451352747ae730392ddb316e4ed5 Mon Sep 17 00:00:00 2001 From: "Kumar, Mahesh" Date: Wed, 17 May 2017 17:28:26 +0530 Subject: drm/i915/skl+: Fail the flip if ddb min requirement exceeds pipe allocation DDB minimum requirement of crtc configuration (cumulative of all the enabled planes in crtc) may exceed the allocated DDB for crtc/pipe. This patch make changes to fail the flip/ioctl if minimum requirement for pipe exceeds the total ddb allocated to the pipe. Previously it succeeded but making alloc_size a negative value. Which will make subsequent calculations for plane ddb allocation bogus & may lead to screen corruption or system hang. Changes from V1: - Improve commit message as per Ander's comment - Remove extra parentheses (Ander) Signed-off-by: Mahesh Kumar Reviewed-by: Matt Roper Reviewed-by: Ander Conselvan de Oliveira Reviewed-by: Maarten Lankhorst Signed-off-by: Matt Roper Link: http://patchwork.freedesktop.org/patch/msgid/20170517115831.13830-8-mahesh1.kumar@intel.com --- drivers/gpu/drm/i915/intel_pm.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 37dd3e7..bbc7206 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4057,6 +4057,7 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, int num_active; unsigned plane_data_rate[I915_MAX_PLANES] = {}; unsigned plane_y_data_rate[I915_MAX_PLANES] = {}; + uint16_t total_min_blocks = 0; /* Clear the partitioning for disabled planes. */ memset(ddb->plane[pipe], 0, sizeof(ddb->plane[pipe])); @@ -4084,10 +4085,18 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, */ for_each_plane_id_on_crtc(intel_crtc, plane_id) { - alloc_size -= minimum[plane_id]; - alloc_size -= y_minimum[plane_id]; + total_min_blocks += minimum[plane_id]; + total_min_blocks += y_minimum[plane_id]; } + if (total_min_blocks > alloc_size) { + DRM_DEBUG_KMS("Requested display configuration exceeds system DDB limitations"); + DRM_DEBUG_KMS("minimum required %d/%d\n", total_min_blocks, + alloc_size); + return -EINVAL; + } + + alloc_size -= total_min_blocks; ddb->plane[pipe][PLANE_CURSOR].start = alloc->end - minimum[PLANE_CURSOR]; ddb->plane[pipe][PLANE_CURSOR].end = alloc->end; -- cgit v1.1 From eb2fdcdf821c5658813899de18eb6d9475f822df Mon Sep 17 00:00:00 2001 From: "Kumar, Mahesh" Date: Wed, 17 May 2017 17:28:27 +0530 Subject: drm/i915/skl+: Watermark calculation cleanup This patch cleanup/reorganises the watermark calculation functions. This patch make use of already available macro "drm_atomic_crtc_state_for_each_plane_state" to walk through plane_state list instead of calculating plane_state in function itself. This restructuring will help later patch for new DDB allocation algorithm to do only algo related changes. Changes from V1: - split the patch in two parts as per Matt's comment Signed-off-by: Mahesh Kumar Reviewed-by: Matt Roper Reviewed-by: Maarten Lankhorst Signed-off-by: Matt Roper Link: http://patchwork.freedesktop.org/patch/msgid/20170517115831.13830-9-mahesh1.kumar@intel.com --- drivers/gpu/drm/i915/intel_pm.c | 53 +++++++++++++++-------------------------- 1 file changed, 19 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index bbc7206..c24a4e1 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4197,8 +4197,9 @@ static uint_fixed_16_16_t skl_wm_method2(uint32_t pixel_rate, return ret; } -static uint32_t skl_adjusted_plane_pixel_rate(const struct intel_crtc_state *cstate, - struct intel_plane_state *pstate) +static uint32_t +skl_adjusted_plane_pixel_rate(const struct intel_crtc_state *cstate, + const struct intel_plane_state *pstate) { uint64_t adjusted_pixel_rate; uint_fixed_16_16_t downscale_amount; @@ -4220,7 +4221,7 @@ static uint32_t skl_adjusted_plane_pixel_rate(const struct intel_crtc_state *cst static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, struct intel_crtc_state *cstate, - struct intel_plane_state *intel_pstate, + const struct intel_plane_state *intel_pstate, uint16_t ddb_allocation, int level, uint16_t *out_blocks, /* out */ @@ -4228,8 +4229,8 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, bool *enabled /* out */) { struct intel_plane *plane = to_intel_plane(intel_pstate->base.plane); - struct drm_plane_state *pstate = &intel_pstate->base; - struct drm_framebuffer *fb = pstate->fb; + const struct drm_plane_state *pstate = &intel_pstate->base; + const struct drm_framebuffer *fb = pstate->fb; uint32_t latency = dev_priv->wm.skl_latency[level]; uint_fixed_16_16_t method1, method2; uint_fixed_16_16_t plane_blocks_per_line; @@ -4384,37 +4385,17 @@ static int skl_compute_wm_level(const struct drm_i915_private *dev_priv, struct skl_ddb_allocation *ddb, struct intel_crtc_state *cstate, - struct intel_plane *intel_plane, + const struct intel_plane_state *intel_pstate, int level, struct skl_wm_level *result) { - struct drm_atomic_state *state = cstate->base.state; struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc); - struct drm_plane *plane = &intel_plane->base; - struct intel_plane_state *intel_pstate = NULL; + struct drm_plane *plane = intel_pstate->base.plane; + struct intel_plane *intel_plane = to_intel_plane(plane); uint16_t ddb_blocks; enum pipe pipe = intel_crtc->pipe; int ret; - if (state) - intel_pstate = - intel_atomic_get_existing_plane_state(state, - intel_plane); - - /* - * Note: If we start supporting multiple pending atomic commits against - * the same planes/CRTC's in the future, plane->state will no longer be - * the correct pre-state to use for the calculations here and we'll - * need to change where we get the 'unchanged' plane data from. - * - * For now this is fine because we only allow one queued commit against - * a CRTC. Even if the plane isn't modified by this transaction and we - * don't have a plane lock, we still have the CRTC's lock, so we know - * that no other transactions are racing with us to update it. - */ - if (!intel_pstate) - intel_pstate = to_intel_plane_state(plane->state); - if (WARN_ON(!intel_pstate->base.fb)) return -EINVAL; @@ -4475,8 +4456,10 @@ static int skl_build_pipe_wm(struct intel_crtc_state *cstate, struct skl_pipe_wm *pipe_wm) { struct drm_device *dev = cstate->base.crtc->dev; + struct drm_crtc_state *crtc_state = &cstate->base; const struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_plane *intel_plane; + struct drm_plane *plane; + const struct drm_plane_state *pstate; struct skl_plane_wm *wm; int level, max_level = ilk_wm_max_level(dev_priv); int ret; @@ -4487,14 +4470,16 @@ static int skl_build_pipe_wm(struct intel_crtc_state *cstate, */ memset(pipe_wm->planes, 0, sizeof(pipe_wm->planes)); - for_each_intel_plane_mask(&dev_priv->drm, - intel_plane, - cstate->base.plane_mask) { - wm = &pipe_wm->planes[intel_plane->id]; + drm_atomic_crtc_state_for_each_plane_state(plane, pstate, crtc_state) { + const struct intel_plane_state *intel_pstate = + to_intel_plane_state(pstate); + enum plane_id plane_id = to_intel_plane(plane)->id; + + wm = &pipe_wm->planes[plane_id]; for (level = 0; level <= max_level; level++) { ret = skl_compute_wm_level(dev_priv, ddb, cstate, - intel_plane, level, + intel_pstate, level, &wm->wm[level]); if (ret) return ret; -- cgit v1.1 From d2f5e36df61f4f06b9a52605fba23b3c3608efca Mon Sep 17 00:00:00 2001 From: "Kumar, Mahesh" Date: Wed, 17 May 2017 17:28:28 +0530 Subject: drm/i915/skl+: Perform wm level calculations in separate function Instead of iterating over planes & wm levels in a single function use skl_compute_wm_level function to interate over WM levels. Change name of function to skl_compute_wm_levels (Matt). These changes are to clean-up WM code & will help in making only new ddb algorithm related changes in later patch in series. Signed-off-by: Mahesh Kumar Reviewed-by: Matt Roper Reviewed-by: Maarten Lankhorst Signed-off-by: Matt Roper Link: http://patchwork.freedesktop.org/patch/msgid/20170517115831.13830-10-mahesh1.kumar@intel.com --- drivers/gpu/drm/i915/intel_pm.c | 48 ++++++++++++++++++++--------------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index c24a4e1..0f1d9f6 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4382,18 +4382,18 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, } static int -skl_compute_wm_level(const struct drm_i915_private *dev_priv, - struct skl_ddb_allocation *ddb, - struct intel_crtc_state *cstate, - const struct intel_plane_state *intel_pstate, - int level, - struct skl_wm_level *result) +skl_compute_wm_levels(const struct drm_i915_private *dev_priv, + struct skl_ddb_allocation *ddb, + struct intel_crtc_state *cstate, + const struct intel_plane_state *intel_pstate, + struct skl_plane_wm *wm) { struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc); struct drm_plane *plane = intel_pstate->base.plane; struct intel_plane *intel_plane = to_intel_plane(plane); uint16_t ddb_blocks; enum pipe pipe = intel_crtc->pipe; + int level, max_level = ilk_wm_max_level(dev_priv); int ret; if (WARN_ON(!intel_pstate->base.fb)) @@ -4401,16 +4401,20 @@ skl_compute_wm_level(const struct drm_i915_private *dev_priv, ddb_blocks = skl_ddb_entry_size(&ddb->plane[pipe][intel_plane->id]); - ret = skl_compute_plane_wm(dev_priv, - cstate, - intel_pstate, - ddb_blocks, - level, - &result->plane_res_b, - &result->plane_res_l, - &result->plane_en); - if (ret) - return ret; + for (level = 0; level <= max_level; level++) { + struct skl_wm_level *result = &wm->wm[level]; + + ret = skl_compute_plane_wm(dev_priv, + cstate, + intel_pstate, + ddb_blocks, + level, + &result->plane_res_b, + &result->plane_res_l, + &result->plane_en); + if (ret) + return ret; + } return 0; } @@ -4461,7 +4465,6 @@ static int skl_build_pipe_wm(struct intel_crtc_state *cstate, struct drm_plane *plane; const struct drm_plane_state *pstate; struct skl_plane_wm *wm; - int level, max_level = ilk_wm_max_level(dev_priv); int ret; /* @@ -4477,13 +4480,10 @@ static int skl_build_pipe_wm(struct intel_crtc_state *cstate, wm = &pipe_wm->planes[plane_id]; - for (level = 0; level <= max_level; level++) { - ret = skl_compute_wm_level(dev_priv, ddb, cstate, - intel_pstate, level, - &wm->wm[level]); - if (ret) - return ret; - } + ret = skl_compute_wm_levels(dev_priv, ddb, cstate, + intel_pstate, wm); + if (ret) + return ret; skl_compute_transition_wm(cstate, &wm->trans_wm); } pipe_wm->linetime = skl_compute_linetime_wm(cstate); -- cgit v1.1 From d555cb5827d603244969e08444340e3db78c8a37 Mon Sep 17 00:00:00 2001 From: "Kumar, Mahesh" Date: Wed, 17 May 2017 17:28:29 +0530 Subject: drm/i915/skl+: use linetime latency if ddb size is not available This patch make changes to use linetime latency if allocated DDB size during plane watermark calculation is not available. linetime is the time, display engine takes to fetch one line worth of pixels with given pixel clock rate. This is required to implement new DDB allocation algorithm. In New Algorithm DDB is allocated based on WM values, because of which number of DDB blocks will not be available during WM calculation, So this "linetime latency" is suggested by SV/HW team to be used during switch-case for WM blocks selection. linetime latency us = pipe horizontal total pixels/adjusted pixel rate MHz Changes since v1: - Rebase on top of Paulo's patch series Changes since v2: - Fix if-else condition (pointed by Maarten) Changes since v3: - Use common function for timetime_us calculation (Paulo) - rebase on drm-tip Changes since v4: - Use consistent name for fixed_point operation Changes since v5: - Improve commit message - rename skl_get_linetime_us to intel_get_linetime_us - fix watermark result selection (Matt) Signed-off-by: "Mahesh Kumar" Reviewed-by: Maarten Lankhorst Reviewed-by: Matt Roper Signed-off-by: Matt Roper Link: http://patchwork.freedesktop.org/patch/msgid/20170517115831.13830-11-mahesh1.kumar@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 7 +++++++ drivers/gpu/drm/i915/intel_pm.c | 42 ++++++++++++++++++++++++++++++++--------- 2 files changed, 40 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index f6c241f..17883a8 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -115,6 +115,13 @@ typedef struct { fp; \ }) +static inline bool is_fixed16_zero(uint_fixed_16_16_t val) +{ + if (val.val == 0) + return true; + return false; +} + static inline uint_fixed_16_16_t u32_to_fixed_16_16(uint32_t val) { uint_fixed_16_16_t fp; diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 0f1d9f6..936eef1 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4197,6 +4197,27 @@ static uint_fixed_16_16_t skl_wm_method2(uint32_t pixel_rate, return ret; } +static uint_fixed_16_16_t +intel_get_linetime_us(struct intel_crtc_state *cstate) +{ + uint32_t pixel_rate; + uint32_t crtc_htotal; + uint_fixed_16_16_t linetime_us; + + if (!cstate->base.active) + return u32_to_fixed_16_16(0); + + pixel_rate = cstate->pixel_rate; + + if (WARN_ON(pixel_rate == 0)) + return u32_to_fixed_16_16(0); + + crtc_htotal = cstate->base.adjusted_mode.crtc_htotal; + linetime_us = fixed_16_16_div_u64(crtc_htotal * 1000, pixel_rate); + + return linetime_us; +} + static uint32_t skl_adjusted_plane_pixel_rate(const struct intel_crtc_state *cstate, const struct intel_plane_state *pstate) @@ -4331,12 +4352,18 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, if (y_tiled) { selected_result = max_fixed_16_16(method2, y_tile_minimum); } else { + uint32_t linetime_us; + + linetime_us = fixed_16_16_to_u32_round_up( + intel_get_linetime_us(cstate)); if ((cpp * cstate->base.adjusted_mode.crtc_htotal / 512 < 1) && (plane_bytes_per_line / 512 < 1)) selected_result = method2; - else if ((ddb_allocation / + else if ((ddb_allocation && ddb_allocation / fixed_16_16_to_u32_round_up(plane_blocks_per_line)) >= 1) selected_result = min_fixed_16_16(method1, method2); + else if (latency >= linetime_us) + selected_result = min_fixed_16_16(method1, method2); else selected_result = method1; } @@ -4424,19 +4451,16 @@ skl_compute_linetime_wm(struct intel_crtc_state *cstate) { struct drm_atomic_state *state = cstate->base.state; struct drm_i915_private *dev_priv = to_i915(state->dev); - uint32_t pixel_rate; + uint_fixed_16_16_t linetime_us; uint32_t linetime_wm; - if (!cstate->base.active) - return 0; + linetime_us = intel_get_linetime_us(cstate); - pixel_rate = cstate->pixel_rate; - - if (WARN_ON(pixel_rate == 0)) + if (is_fixed16_zero(linetime_us)) return 0; - linetime_wm = DIV_ROUND_UP(8 * cstate->base.adjusted_mode.crtc_htotal * - 1000, pixel_rate); + linetime_wm = fixed_16_16_to_u32_round_up(mul_u32_fixed_16_16(8, + linetime_us)); /* Display WA #1135: bxt. */ if (IS_BROXTON(dev_priv) && dev_priv->ipc_enabled) -- cgit v1.1 From 0ad4dc887d4168448e8c801aa4edd8fe1e0bd534 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 18 May 2017 13:06:44 +0200 Subject: drm/i915: Fix new -Wint-in-bool-context gcc compiler warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit fixes the following compiler warning: drivers/gpu/drm/i915/intel_dsi.c: In function ‘intel_dsi_prepare’: drivers/gpu/drm/i915/intel_dsi.c:1487:23: warning: ?: using integer constants in boolean context [-Wint-in-bool-context] PORT_A ? PORT_C : PORT_A), Fixes: f4c3a88e5f04 ("drm/i915: Tighten mmio arrays for MIPI_PORT") Signed-off-by: Hans de Goede Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/20170518110644.9902-1-hdegoede@redhat.com --- drivers/gpu/drm/i915/i915_reg.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index ee144ec..89888ad 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -8278,7 +8278,7 @@ enum { /* MIPI DSI registers */ -#define _MIPI_PORT(port, a, c) ((port) ? c : a) /* ports A and C only */ +#define _MIPI_PORT(port, a, c) (((port) == PORT_A) ? a : c) /* ports A and C only */ #define _MMIO_MIPI(port, a, c) _MMIO(_MIPI_PORT(port, a, c)) #define MIPIO_TXESC_CLK_DIV1 _MMIO(0x160004) -- cgit v1.1 From ac0a73fb526100adc521ec2069623e47ca3997a8 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 18 May 2017 14:39:42 +0100 Subject: drm/i915: Check C for null pointer rather than B There are two occasions where pointer B is being check for a NULL when it should be pointer C instead. Fix these. Detected by CoverityScan, CID#1436348,1436349 ("Logically Dead Code") Fixes: 47624cc3301b60 ("drm/i915: Import the kfence selftests for i915_sw_fence") Signed-off-by: Colin Ian King Link: http://patchwork.freedesktop.org/patch/msgid/20170518133942.5660-1-colin.king@canonical.com Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/selftests/i915_sw_fence.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/selftests/i915_sw_fence.c b/drivers/gpu/drm/i915/selftests/i915_sw_fence.c index 98baf10..c31d439 100644 --- a/drivers/gpu/drm/i915/selftests/i915_sw_fence.c +++ b/drivers/gpu/drm/i915/selftests/i915_sw_fence.c @@ -311,7 +311,7 @@ static int test_AB_C(void *arg) } C = alloc_fence(); - if (!B) { + if (!C) { ret = -ENOMEM; goto err_B; } @@ -388,7 +388,7 @@ static int test_C_AB(void *arg) } C = alloc_fence(); - if (!B) { + if (!C) { ret = -ENOMEM; goto err_B; } -- cgit v1.1 From 29f31623d7a83048b1c90a4db89b6f325bfde942 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 18 May 2017 10:46:15 +0100 Subject: drm/i915/selftests: Pretend to be a gfx pci device Set the class on our mock pci device to GFX. This should be useful for utilities like intel-iommu that special case gfx devices. References: https://bugs.freedesktop.org/show_bug.cgi?id=101080 Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170518094638.5469-1-chris@chris-wilson.co.uk Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/selftests/mock_gem_device.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index 3d0e313..4af0000 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -121,6 +121,7 @@ struct drm_i915_private *mock_gem_device(void) goto err; device_initialize(&pdev->dev); + pdev->class = PCI_BASE_CLASS_DISPLAY << 16; pdev->dev.release = release_dev; dev_set_name(&pdev->dev, "mock"); dma_coerce_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); -- cgit v1.1 From 9593a6576fd50dbfdedaf04296481998c219a3b8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 18 May 2017 21:48:10 +0100 Subject: drm/i915: Try harder to reset the GPU Repeat the reset a couple of times if at first we do not succeed. v2: differentiate which path/engine failed with a debug message Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170513083726.502-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170518204811.7408-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_uncore.c | 67 +++++++++++++++++++++++++------------ 1 file changed, 46 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 08d7d08..b27cc00 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -1427,9 +1427,10 @@ out: return ret; } -static int i915_reset_complete(struct pci_dev *pdev) +static bool i915_reset_complete(struct pci_dev *pdev) { u8 gdrst; + pci_read_config_byte(pdev, I915_GDRST, &gdrst); return (gdrst & GRDOM_RESET_STATUS) == 0; } @@ -1440,15 +1441,16 @@ static int i915_do_reset(struct drm_i915_private *dev_priv, unsigned engine_mask /* assert reset for at least 20 usec */ pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE); - udelay(20); + usleep_range(50, 200); pci_write_config_byte(pdev, I915_GDRST, 0); return wait_for(i915_reset_complete(pdev), 500); } -static int g4x_reset_complete(struct pci_dev *pdev) +static bool g4x_reset_complete(struct pci_dev *pdev) { u8 gdrst; + pci_read_config_byte(pdev, I915_GDRST, &gdrst); return (gdrst & GRDOM_RESET_ENABLE) == 0; } @@ -1456,6 +1458,7 @@ static int g4x_reset_complete(struct pci_dev *pdev) static int g33_do_reset(struct drm_i915_private *dev_priv, unsigned engine_mask) { struct pci_dev *pdev = dev_priv->drm.pdev; + pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE); return wait_for(g4x_reset_complete(pdev), 500); } @@ -1468,8 +1471,10 @@ static int g4x_do_reset(struct drm_i915_private *dev_priv, unsigned engine_mask) pci_write_config_byte(pdev, I915_GDRST, GRDOM_RENDER | GRDOM_RESET_ENABLE); ret = wait_for(g4x_reset_complete(pdev), 500); - if (ret) - return ret; + if (ret) { + DRM_DEBUG_DRIVER("Wait for render reset failed\n"); + goto out; + } /* WaVcpClkGateDisableForMediaReset:ctg,elk */ I915_WRITE(VDECCLK_GATE_D, I915_READ(VDECCLK_GATE_D) | VCP_UNIT_CLOCK_GATE_DISABLE); @@ -1478,16 +1483,17 @@ static int g4x_do_reset(struct drm_i915_private *dev_priv, unsigned engine_mask) pci_write_config_byte(pdev, I915_GDRST, GRDOM_MEDIA | GRDOM_RESET_ENABLE); ret = wait_for(g4x_reset_complete(pdev), 500); - if (ret) - return ret; + if (ret) { + DRM_DEBUG_DRIVER("Wait for media reset failed\n"); + } /* WaVcpClkGateDisableForMediaReset:ctg,elk */ I915_WRITE(VDECCLK_GATE_D, I915_READ(VDECCLK_GATE_D) & ~VCP_UNIT_CLOCK_GATE_DISABLE); POSTING_READ(VDECCLK_GATE_D); +out: pci_write_config_byte(pdev, I915_GDRST, 0); - - return 0; + return ret; } static int ironlake_do_reset(struct drm_i915_private *dev_priv, @@ -1495,31 +1501,36 @@ static int ironlake_do_reset(struct drm_i915_private *dev_priv, { int ret; - I915_WRITE(ILK_GDSR, - ILK_GRDOM_RENDER | ILK_GRDOM_RESET_ENABLE); + I915_WRITE(ILK_GDSR, ILK_GRDOM_RENDER | ILK_GRDOM_RESET_ENABLE); ret = intel_wait_for_register(dev_priv, ILK_GDSR, ILK_GRDOM_RESET_ENABLE, 0, 500); - if (ret) - return ret; + if (ret) { + DRM_DEBUG_DRIVER("Wait for render reset failed\n"); + goto out; + } - I915_WRITE(ILK_GDSR, - ILK_GRDOM_MEDIA | ILK_GRDOM_RESET_ENABLE); + I915_WRITE(ILK_GDSR, ILK_GRDOM_MEDIA | ILK_GRDOM_RESET_ENABLE); ret = intel_wait_for_register(dev_priv, ILK_GDSR, ILK_GRDOM_RESET_ENABLE, 0, 500); - if (ret) - return ret; + if (ret) { + DRM_DEBUG_DRIVER("Wait for media reset failed\n"); + goto out; + } +out: I915_WRITE(ILK_GDSR, 0); - - return 0; + POSTING_READ(ILK_GDSR); + return ret; } /* Reset the hardware domains (GENX_GRDOM_*) specified by mask */ static int gen6_hw_domain_reset(struct drm_i915_private *dev_priv, u32 hw_domain_mask) { + int err; + /* GEN6_GDRST is not in the gt power well, no need to check * for fifo space for the write or forcewake the chip for * the read @@ -1527,9 +1538,14 @@ static int gen6_hw_domain_reset(struct drm_i915_private *dev_priv, __raw_i915_write32(dev_priv, GEN6_GDRST, hw_domain_mask); /* Wait for the device to ack the reset requests */ - return intel_wait_for_register_fw(dev_priv, + err = intel_wait_for_register_fw(dev_priv, GEN6_GDRST, hw_domain_mask, 0, 500); + if (err) + DRM_DEBUG_DRIVER("Wait for 0x%08x engines reset failed\n", + hw_domain_mask); + + return err; } /** @@ -1749,8 +1765,11 @@ static reset_func intel_get_gpu_reset(struct drm_i915_private *dev_priv) int intel_gpu_reset(struct drm_i915_private *dev_priv, unsigned engine_mask) { reset_func reset; + int retry; int ret; + might_sleep(); + reset = intel_get_gpu_reset(dev_priv); if (reset == NULL) return -ENODEV; @@ -1759,7 +1778,13 @@ int intel_gpu_reset(struct drm_i915_private *dev_priv, unsigned engine_mask) * request may be dropped and never completes (causing -EIO). */ intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); - ret = reset(dev_priv, engine_mask); + for (retry = 0; retry < 3; retry++) { + ret = reset(dev_priv, engine_mask); + if (ret != -ETIMEDOUT) + break; + + cond_resched(); + } intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); return ret; -- cgit v1.1 From 44e1e7baa3b90b725efe1635e48083c04acba782 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 18 May 2017 21:48:11 +0100 Subject: drm/i915: Reorder media/render reset on g4x MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ville found a reference to WaMediaResetBeforeFullReset which we presume means that we should simply do the media reset first. References: https://bugs.freedesktop.org/show_bug.cgi?id=100942 Suggested-by: Ville Syrjälä Signed-off-by: Chris Wilson Cc: Ville Syrjälä Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170518204811.7408-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_uncore.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index b27cc00..7922264 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -1468,16 +1468,9 @@ static int g4x_do_reset(struct drm_i915_private *dev_priv, unsigned engine_mask) struct pci_dev *pdev = dev_priv->drm.pdev; int ret; - pci_write_config_byte(pdev, I915_GDRST, - GRDOM_RENDER | GRDOM_RESET_ENABLE); - ret = wait_for(g4x_reset_complete(pdev), 500); - if (ret) { - DRM_DEBUG_DRIVER("Wait for render reset failed\n"); - goto out; - } - /* WaVcpClkGateDisableForMediaReset:ctg,elk */ - I915_WRITE(VDECCLK_GATE_D, I915_READ(VDECCLK_GATE_D) | VCP_UNIT_CLOCK_GATE_DISABLE); + I915_WRITE(VDECCLK_GATE_D, + I915_READ(VDECCLK_GATE_D) | VCP_UNIT_CLOCK_GATE_DISABLE); POSTING_READ(VDECCLK_GATE_D); pci_write_config_byte(pdev, I915_GDRST, @@ -1485,14 +1478,24 @@ static int g4x_do_reset(struct drm_i915_private *dev_priv, unsigned engine_mask) ret = wait_for(g4x_reset_complete(pdev), 500); if (ret) { DRM_DEBUG_DRIVER("Wait for media reset failed\n"); + goto out; } - /* WaVcpClkGateDisableForMediaReset:ctg,elk */ - I915_WRITE(VDECCLK_GATE_D, I915_READ(VDECCLK_GATE_D) & ~VCP_UNIT_CLOCK_GATE_DISABLE); - POSTING_READ(VDECCLK_GATE_D); + pci_write_config_byte(pdev, I915_GDRST, + GRDOM_RENDER | GRDOM_RESET_ENABLE); + ret = wait_for(g4x_reset_complete(pdev), 500); + if (ret) { + DRM_DEBUG_DRIVER("Wait for render reset failed\n"); + goto out; + } out: pci_write_config_byte(pdev, I915_GDRST, 0); + + I915_WRITE(VDECCLK_GATE_D, + I915_READ(VDECCLK_GATE_D) & ~VCP_UNIT_CLOCK_GATE_DISABLE); + POSTING_READ(VDECCLK_GATE_D); + return ret; } -- cgit v1.1 From 4afc67be8e203ee8f5e95e829c0777eae7a14702 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Thu, 18 May 2017 11:31:02 +0000 Subject: drm/i915/guc: Remove stale comment for q_fail This member was dropped long time ago. Fixes: 774439e1 ("drm/i915/guc: re-optimise i915_guc_client layout") Signed-off-by: Michal Wajdeczko Cc: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170518113104.54400-1-michal.wajdeczko@intel.com Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/intel_uc.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h index 7618b71..8d52a37 100644 --- a/drivers/gpu/drm/i915/intel_uc.h +++ b/drivers/gpu/drm/i915/intel_uc.h @@ -59,8 +59,6 @@ struct drm_i915_gem_request; * available in the work queue (note, the queue is shared, * not per-engine). It is OK for this to be nonzero, but * it should not be huge! - * q_fail: failed to enqueue a work item. This should never happen, - * because we check for space beforehand. * b_fail: failed to ring the doorbell. This should never happen, unless * somehow the hardware misbehaves, or maybe if the GuC firmware * crashes? We probably need to reset the GPU to recover. -- cgit v1.1 From c1adab97034814d3ccc961cbe027fae505d710ba Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Thu, 18 May 2017 11:31:03 +0000 Subject: drm/i915/guc: Remove failed doorbell stat from debugfs This stat is almost always zero unless fatal error occurs, which should be reported by other means anyway. Suggested-by: Chris Wilson Signed-off-by: Michal Wajdeczko Cc: Chris Wilson Cc: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170518113104.54400-2-michal.wajdeczko@intel.com Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_debugfs.c | 1 - drivers/gpu/drm/i915/i915_guc_submission.c | 2 -- drivers/gpu/drm/i915/intel_uc.h | 4 ---- 3 files changed, 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 8abb939..207175e 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2482,7 +2482,6 @@ static void i915_guc_client_info(struct seq_file *m, client->wq_size, client->wq_offset, client->wq_tail); seq_printf(m, "\tWork queue full: %u\n", client->no_wq_space); - seq_printf(m, "\tFailed doorbell: %u\n", client->b_fail); seq_printf(m, "\tLast submission result: %d\n", client->retcode); for_each_engine(engine, dev_priv, id) { diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index b3da056..dc84311 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -615,8 +615,6 @@ static void __i915_guc_submit(struct drm_i915_gem_request *rq) client->submissions[engine_id] += 1; client->retcode = b_ret; - if (b_ret) - client->b_fail += 1; guc->submissions[engine_id] += 1; guc->last_seqno[engine_id] = rq->global_seqno; diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h index 8d52a37..398a491 100644 --- a/drivers/gpu/drm/i915/intel_uc.h +++ b/drivers/gpu/drm/i915/intel_uc.h @@ -59,9 +59,6 @@ struct drm_i915_gem_request; * available in the work queue (note, the queue is shared, * not per-engine). It is OK for this to be nonzero, but * it should not be huge! - * b_fail: failed to ring the doorbell. This should never happen, unless - * somehow the hardware misbehaves, or maybe if the GuC firmware - * crashes? We probably need to reset the GPU to recover. * retcode: errno from last guc_submit() */ struct i915_guc_client { @@ -85,7 +82,6 @@ struct i915_guc_client { uint32_t wq_tail; uint32_t wq_rsvd; uint32_t no_wq_space; - uint32_t b_fail; int retcode; /* Per-engine counts of GuC submissions */ -- cgit v1.1 From f1448a62a1030f3b82af00dc1565c60bbbde0df1 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Thu, 18 May 2017 11:31:04 +0000 Subject: drm/i915/guc: Remove last submission result from debugfs Debugfs does not seems to be a right place to display transient data. If we want to capture errors, we should log them. Signed-off-by: Michal Wajdeczko Cc: Chris Wilson Cc: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170518113104.54400-3-michal.wajdeczko@intel.com Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_debugfs.c | 1 - drivers/gpu/drm/i915/i915_guc_submission.c | 1 - drivers/gpu/drm/i915/intel_uc.h | 2 -- 3 files changed, 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 207175e..6bdc903 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2482,7 +2482,6 @@ static void i915_guc_client_info(struct seq_file *m, client->wq_size, client->wq_offset, client->wq_tail); seq_printf(m, "\tWork queue full: %u\n", client->no_wq_space); - seq_printf(m, "\tLast submission result: %d\n", client->retcode); for_each_engine(engine, dev_priv, id) { u64 submissions = client->submissions[id]; diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index dc84311..ee32fa9 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -614,7 +614,6 @@ static void __i915_guc_submit(struct drm_i915_gem_request *rq) b_ret = guc_ring_doorbell(client); client->submissions[engine_id] += 1; - client->retcode = b_ret; guc->submissions[engine_id] += 1; guc->last_seqno[engine_id] = rq->global_seqno; diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h index 398a491..33c8f77 100644 --- a/drivers/gpu/drm/i915/intel_uc.h +++ b/drivers/gpu/drm/i915/intel_uc.h @@ -59,7 +59,6 @@ struct drm_i915_gem_request; * available in the work queue (note, the queue is shared, * not per-engine). It is OK for this to be nonzero, but * it should not be huge! - * retcode: errno from last guc_submit() */ struct i915_guc_client { struct i915_vma *vma; @@ -82,7 +81,6 @@ struct i915_guc_client { uint32_t wq_tail; uint32_t wq_rsvd; uint32_t no_wq_space; - int retcode; /* Per-engine counts of GuC submissions */ uint64_t submissions[I915_NUM_ENGINES]; -- cgit v1.1 From 2c80353f3cd0cd4b28b17d55226e5914d2c0d5e1 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Fri, 19 May 2017 12:13:40 +0300 Subject: drm/i915/g4x: Improve gpu reset reliability MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ELK seems to very picky about the preconditions to reset. Evidence on Eaglelake (8086:2e12 (rev 03)) shows that it does not like if reset occurs when there is active ring. Ville found out that there is workaround with name 'WaMediaResetMainRingCleanup' which suggests that we need to cleanup rings before resetting. It is unclear what cleanup exactly means but evidence shows that stopping the ring does have an effect on reset reliability. This patch makes reset successful on hangs induced by chained batches (the igt ones). Note that if the hang is inside a shader, it is possible that our attempts to stop the ring achieves anything. v2: zero ctl,head,tail also. bug ref. use driver debugs (Chris) v3: specify platform on testcases, comment tidyup (Chris) Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100942 Testcase: igt/gem_busy/*-hang #elk Testcase: igt/gem_ringfill/hang-* #elk Suggested-by: Ville Syrjälä Cc: Ville Syrjälä Cc: Chris Wilson Cc: Tomi Sarvela Signed-off-by: Mika Kuoppala Reviewed-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170519091340.21439-1-mika.kuoppala@intel.com --- drivers/gpu/drm/i915/intel_uncore.c | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 7922264..df425bf 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -1427,6 +1427,35 @@ out: return ret; } +static void gen3_stop_rings(struct drm_i915_private *dev_priv) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, dev_priv, id) { + const u32 base = engine->mmio_base; + const i915_reg_t mode = RING_MI_MODE(base); + + I915_WRITE_FW(mode, _MASKED_BIT_ENABLE(STOP_RING)); + if (intel_wait_for_register_fw(dev_priv, + mode, + MODE_IDLE, + MODE_IDLE, + 500)) + DRM_DEBUG_DRIVER("%s: timed out on STOP_RING\n", + engine->name); + + I915_WRITE_FW(RING_CTL(base), 0); + I915_WRITE_FW(RING_HEAD(base), 0); + I915_WRITE_FW(RING_TAIL(base), 0); + + /* Check acts as a post */ + if (I915_READ_FW(RING_HEAD(base)) != 0) + DRM_DEBUG_DRIVER("%s: ring head not parked\n", + engine->name); + } +} + static bool i915_reset_complete(struct pci_dev *pdev) { u8 gdrst; @@ -1473,6 +1502,12 @@ static int g4x_do_reset(struct drm_i915_private *dev_priv, unsigned engine_mask) I915_READ(VDECCLK_GATE_D) | VCP_UNIT_CLOCK_GATE_DISABLE); POSTING_READ(VDECCLK_GATE_D); + /* We stop engines, otherwise we might get failed reset and a + * dead gpu (on elk). + * WaMediaResetMainRingCleanup:ctg,elk (presumably) + */ + gen3_stop_rings(dev_priv); + pci_write_config_byte(pdev, I915_GDRST, GRDOM_MEDIA | GRDOM_RESET_ENABLE); ret = wait_for(g4x_reset_complete(pdev), 500); -- cgit v1.1 From 00bd16f257a4cd94305bb6e1c73f7c93c196ffc7 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Mon, 15 May 2017 17:06:09 +0000 Subject: drm/i915/guc: Remove action status and statistics from debugfs Usefulness of these stats was over-advertised. v2: remove duplicated engine stats (Chris) Suggested-by: Chris Wilson Signed-off-by: Michal Wajdeczko Cc: Chris Wilson Cc: Daniele Ceraolo Spurio Link: http://patchwork.freedesktop.org/patch/msgid/20170515170610.35528-1-michal.wajdeczko@intel.com Acked-by: Tvrtko Ursulin Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_debugfs.c | 19 ------------------- drivers/gpu/drm/i915/i915_guc_submission.c | 3 --- drivers/gpu/drm/i915/intel_uc.c | 7 ------- drivers/gpu/drm/i915/intel_uc.h | 10 ---------- 4 files changed, 39 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 6bdc903..c51c56a 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2512,9 +2512,6 @@ static int i915_guc_info(struct seq_file *m, void *data) { struct drm_i915_private *dev_priv = node_to_i915(m->private); const struct intel_guc *guc = &dev_priv->guc; - struct intel_engine_cs *engine; - enum intel_engine_id id; - u64 total; if (!check_guc_submission(m)) return 0; @@ -2523,22 +2520,6 @@ static int i915_guc_info(struct seq_file *m, void *data) seq_printf(m, "\t%*pb\n", GUC_NUM_DOORBELLS, guc->doorbell_bitmap); seq_printf(m, "Doorbell next cacheline: 0x%x\n\n", guc->db_cacheline); - seq_printf(m, "GuC total action count: %llu\n", guc->action_count); - seq_printf(m, "GuC action failure count: %u\n", guc->action_fail); - seq_printf(m, "GuC last action command: 0x%x\n", guc->action_cmd); - seq_printf(m, "GuC last action status: 0x%x\n", guc->action_status); - seq_printf(m, "GuC last action error code: %d\n", guc->action_err); - - total = 0; - seq_printf(m, "\nGuC submissions:\n"); - for_each_engine(engine, dev_priv, id) { - u64 submissions = guc->submissions[id]; - total += submissions; - seq_printf(m, "\t%-24s: %10llu, last seqno 0x%08x\n", - engine->name, submissions, guc->last_seqno[id]); - } - seq_printf(m, "\t%s: %llu\n", "Total", total); - seq_printf(m, "\nGuC execbuf client @ %p:\n", guc->execbuf_client); i915_guc_client_info(m, dev_priv, guc->execbuf_client); diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index ee32fa9..777f48e 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -615,9 +615,6 @@ static void __i915_guc_submit(struct drm_i915_gem_request *rq) client->submissions[engine_id] += 1; - guc->submissions[engine_id] += 1; - guc->last_seqno[engine_id] = rq->global_seqno; - spin_unlock_irqrestore(&client->wq_lock, flags); } diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c index 07c5658..d27b527 100644 --- a/drivers/gpu/drm/i915/intel_uc.c +++ b/drivers/gpu/drm/i915/intel_uc.c @@ -440,9 +440,6 @@ int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len) mutex_lock(&guc->send_mutex); intel_uncore_forcewake_get(dev_priv, guc->send_regs.fw_domains); - dev_priv->guc.action_count += 1; - dev_priv->guc.action_cmd = action[0]; - for (i = 0; i < len; i++) I915_WRITE(guc_send_reg(guc, i), action[i]); @@ -471,11 +468,7 @@ int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len) DRM_WARN("INTEL_GUC_SEND: Action 0x%X failed;" " ret=%d status=0x%08X response=0x%08X\n", action[0], ret, status, I915_READ(SOFT_SCRATCH(15))); - - dev_priv->guc.action_fail += 1; - dev_priv->guc.action_err = ret; } - dev_priv->guc.action_status = status; intel_uncore_forcewake_put(dev_priv, guc->send_regs.fw_domains); mutex_unlock(&guc->send_mutex); diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h index 33c8f77..930f2e1 100644 --- a/drivers/gpu/drm/i915/intel_uc.h +++ b/drivers/gpu/drm/i915/intel_uc.h @@ -187,16 +187,6 @@ struct intel_guc { DECLARE_BITMAP(doorbell_bitmap, GUC_NUM_DOORBELLS); uint32_t db_cacheline; /* Cyclic counter mod pagesize */ - /* Action status & statistics */ - uint64_t action_count; /* Total commands issued */ - uint32_t action_cmd; /* Last command word */ - uint32_t action_status; /* Last return status */ - uint32_t action_fail; /* Total number of failures */ - int32_t action_err; /* Last error code */ - - uint64_t submissions[I915_NUM_ENGINES]; - uint32_t last_seqno[I915_NUM_ENGINES]; - /* GuC's FW specific registers used in MMIO send */ struct { u32 base; -- cgit v1.1 From 0109808145180d0ed3da05361f80839e6c80caeb Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 19 May 2017 18:56:17 +0100 Subject: drm/i915: Check for allocation failure The memory allocation for C is not being null checked and hence we could end up with a null pointer dereference. Fix this with a null pointer check. (I really should have noticed this when I was fixing an earlier issue.) Detected by CoverityScan, CID#1436406 ("Dereference null return") Fixes: 47624cc3301b60 ("drm/i915: Import the kfence selftests for i915_sw_fence") Signed-off-by: Colin Ian King Link: http://patchwork.freedesktop.org/patch/msgid/20170519175617.7036-1-colin.king@canonical.com Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/selftests/i915_sw_fence.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/i915_sw_fence.c b/drivers/gpu/drm/i915/selftests/i915_sw_fence.c index c31d439..19d145d6 100644 --- a/drivers/gpu/drm/i915/selftests/i915_sw_fence.c +++ b/drivers/gpu/drm/i915/selftests/i915_sw_fence.c @@ -123,6 +123,11 @@ static int test_dag(void *arg) } C = alloc_fence(); + if (!C) { + ret = -ENOMEM; + goto err_B; + } + if (i915_sw_fence_await_sw_fence_gfp(B, C, GFP_KERNEL) == -EINVAL) { pr_err("invalid cycle detected\n"); goto err_C; -- cgit v1.1 From dbc26ebd97b20e8732309c89bef5d8bfee5c0a3b Mon Sep 17 00:00:00 2001 From: Anusha Srivatsa Date: Thu, 18 May 2017 10:47:11 -0700 Subject: drm/i915/huc: Update GLK HuC version Update version of HuC from 01.07.1748 to the version 02.00.1748 Cc: Ander Conselvan Cc: John Spotswood Signed-off-by: Anusha Srivatsa Reviewed-by: Ander Conselvan de Oliveira Signed-off-by: Ander Conselvan de Oliveira Link: http://patchwork.freedesktop.org/patch/msgid/1495129631-2930-1-git-send-email-anusha.srivatsa@intel.com --- drivers/gpu/drm/i915/intel_huc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_huc.c b/drivers/gpu/drm/i915/intel_huc.c index 88b4cf3..f5eb18d0e 100644 --- a/drivers/gpu/drm/i915/intel_huc.c +++ b/drivers/gpu/drm/i915/intel_huc.c @@ -52,8 +52,8 @@ #define KBL_HUC_FW_MINOR 00 #define KBL_BLD_NUM 1810 -#define GLK_HUC_FW_MAJOR 01 -#define GLK_HUC_FW_MINOR 07 +#define GLK_HUC_FW_MAJOR 02 +#define GLK_HUC_FW_MINOR 00 #define GLK_BLD_NUM 1748 #define HUC_FW_PATH(platform, major, minor, bld_num) \ -- cgit v1.1 From 6bdba81979b2c3c8fed0be62ca31c32c3129d85f Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 17 May 2017 15:15:57 +0200 Subject: Revert "drm/i915: Restore lost "Initialized i915" welcome message" This reverts commit bc5ca47c0af4f949ba889e666b7da65569e36093. Gabriel put this back into generic code with commit 75f6dfe3e652e1adef8cc1b073c89f3e22103a8f Author: Gabriel Krisman Bertazi Date: Wed Dec 28 12:32:11 2016 -0200 drm: Deduplicate driver initialization message but somehow he missed Chris' patch to add the message meanwhile. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101025 Fixes: 75f6dfe3e652 ("drm: Deduplicate driver initialization message") Cc: Gabriel Krisman Bertazi Cc: Daniel Vetter Cc: Jani Nikula Cc: Chris Wilson Cc: # v4.11+ Reviewed-by: Gabriel Krisman Bertazi Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20170517131557.7836-1-daniel.vetter@ffwll.ch --- drivers/gpu/drm/i915/i915_drv.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 452c265..7b8c727 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1277,10 +1277,6 @@ int i915_driver_load(struct pci_dev *pdev, const struct pci_device_id *ent) dev_priv->ipc_enabled = false; - /* Everything is in place, we can now relax! */ - DRM_INFO("Initialized %s %d.%d.%d %s for %s on minor %d\n", - driver.name, driver.major, driver.minor, driver.patchlevel, - driver.date, pci_name(pdev), dev_priv->drm.primary->index); if (IS_ENABLED(CONFIG_DRM_I915_DEBUG)) DRM_INFO("DRM_I915_DEBUG enabled\n"); if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) -- cgit v1.1 From 48f1fc3a301d50c27c07a0fe73cd3a75d4f59552 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Mon, 22 May 2017 12:02:44 +0300 Subject: drm/i915/g33: Improve reset reliability MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We improved the reset reliablity on gen4 with stopping all engines before commencing reset, in commit 2c80353f3cd0 ("drm/i915/g4x: Improve gpu reset reliability") Evidence indicates that this same trick works with g33. v2: proper gen naming, comment readability (Chris) Testcase: igt/gem_busy/*-hang #blb-e6850 Cc: Ville Syrjälä Cc: Chris Wilson Cc: Tomi Sarvela Signed-off-by: Mika Kuoppala Acked-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170522090244.2557-1-mika.kuoppala@intel.com --- drivers/gpu/drm/i915/intel_uncore.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index df425bf..9269cae 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -1488,6 +1488,9 @@ static int g33_do_reset(struct drm_i915_private *dev_priv, unsigned engine_mask) { struct pci_dev *pdev = dev_priv->drm.pdev; + /* Stop engines before we reset; see g4x_do_reset() below for why. */ + gen3_stop_rings(dev_priv); + pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE); return wait_for(g4x_reset_complete(pdev), 500); } -- cgit v1.1 From 1d5fd00313c49f7972e3356ac58e0ad7934e1948 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Winiarski?= Date: Tue, 23 May 2017 12:23:58 +0200 Subject: drm/i915: Remove misleading comment in request_alloc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Passing NULL ctx to request_alloc would lead to null-ptr-deref. v2: Let's not replace the comment with a BUG_ON Signed-off-by: Michał Winiarski Reviewed-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170523102400.9614-1-michal.winiarski@intel.com Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem_request.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 1ccf252..0d1e0d8 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -538,9 +538,6 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) * * @engine: engine that we wish to issue the request on. * @ctx: context that the request will be associated with. - * This can be NULL if the request is not directly related to - * any specific user context, in which case this function will - * choose an appropriate context to use. * * Returns a pointer to the allocated request if successful, * or an error code if not. -- cgit v1.1 From f63078abff0d2927c2f58f22b1082520def70865 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Winiarski?= Date: Tue, 23 May 2017 12:23:59 +0200 Subject: drm/i915/guc: Skip port assign on first iteration of GuC dequeue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If port[0] is occupied and we're trying to dequeue request from different context, we will inevitably hit BUG_ON in port_assign. Let's skip it - similar to what we're doing in execlists counterpart. Fixes: 77f0d0e925e8a0 ("drm/i915/execlists: Pack the count into the low bits of the port.request") Cc: Chris Wilson Cc: Michał Wajdeczko Cc: Mika Kuoppala Cc: Tvrtko Ursulin Signed-off-by: Michał Winiarski Reviewed-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170523102400.9614-2-michal.winiarski@intel.com Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_guc_submission.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 777f48e..e6e0c6e 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -681,7 +681,8 @@ static bool i915_guc_dequeue(struct intel_engine_cs *engine) goto done; } - port_assign(port, last); + if (submit) + port_assign(port, last); port++; } -- cgit v1.1 From ca83d5840cb641b2efb04db0b70fa56955dd1453 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 23 May 2017 20:44:12 +0100 Subject: drm/i915/selftests: Silence compiler warning in igt_ctx_exec The compiler doesn't always spot the guard that object is allocated on the first pass, leading to: drivers/gpu/drm/i915/selftests/i915_gem_context.c: warning: 'obj' may be used uninitialized in this function [-Wuninitialized]: => 370:8 v2: Make it more obvious by setting obj to NULL on the first pass and any later pass where we need to reallocate. Reported-by: Geert Uytterhoeven Fixes: 791ff39ae32a ("drm/i915: Live testing for context execution") Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Tvrtko Ursulin Cc: Matthew Auld c: # v4.12-rc1+ Link: http://patchwork.freedesktop.org/patch/msgid/20170523194412.1195-1-chris@chris-wilson.co.uk Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/selftests/i915_gem_context.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c index 1afb8b06..12b85b3 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c @@ -320,7 +320,7 @@ static unsigned long max_dwords(struct drm_i915_gem_object *obj) static int igt_ctx_exec(void *arg) { struct drm_i915_private *i915 = arg; - struct drm_i915_gem_object *obj; + struct drm_i915_gem_object *obj = NULL; struct drm_file *file; IGT_TIMEOUT(end_time); LIST_HEAD(objects); @@ -359,7 +359,7 @@ static int igt_ctx_exec(void *arg) } for_each_engine(engine, i915, id) { - if (dw == 0) { + if (!obj) { obj = create_test_object(ctx, file, &objects); if (IS_ERR(obj)) { err = PTR_ERR(obj); @@ -376,8 +376,10 @@ static int igt_ctx_exec(void *arg) goto out_unlock; } - if (++dw == max_dwords(obj)) + if (++dw == max_dwords(obj)) { + obj = NULL; dw = 0; + } ndwords++; } ncontexts++; -- cgit v1.1 From 22284f400a4dcf48824d96142c0acb75c0c94fba Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 23 May 2017 11:31:16 +0100 Subject: drm/i915: Convert i915_gem_object_ops->flags values to use BIT() Having just watched someone add a new value, 0x3, without realising that the flags were bit values, I have come to appreciate the value in using BIT. Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170523103116.32239-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem_object.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h index 174cf92..35e1a27 100644 --- a/drivers/gpu/drm/i915/i915_gem_object.h +++ b/drivers/gpu/drm/i915/i915_gem_object.h @@ -37,8 +37,8 @@ struct drm_i915_gem_object_ops { unsigned int flags; -#define I915_GEM_OBJECT_HAS_STRUCT_PAGE 0x1 -#define I915_GEM_OBJECT_IS_SHRINKABLE 0x2 +#define I915_GEM_OBJECT_HAS_STRUCT_PAGE BIT(0) +#define I915_GEM_OBJECT_IS_SHRINKABLE BIT(1) /* Interface between the GEM object and its backing storage. * get_pages() is called once prior to the use of the associated set -- cgit v1.1 From 0ef34ad6222abfa513117515fec720c33a58f105 Mon Sep 17 00:00:00 2001 From: Jon Bloomfield Date: Wed, 24 May 2017 08:54:11 -0700 Subject: drm/i915: Serialize GTT/Aperture accesses on BXT BXT has a H/W issue with IOMMU which can lead to system hangs when Aperture accesses are queued within the GAM behind GTT Accesses. This patch avoids the condition by wrapping all GTT updates in stop_machine and using a flushing read prior to restarting the machine. The stop_machine guarantees no new Aperture accesses can begin while the PTE writes are being emmitted. The flushing read ensures that any following Aperture accesses cannot begin until the PTE writes have been cleared out of the GAM's fifo. Only FOLLOWING Aperture accesses need to be separated from in flight PTE updates. PTE Writes may follow tightly behind already in flight Aperture accesses, so no flushing read is required at the start of a PTE update sequence. This issue was reproduced by running igt/gem_readwrite and igt/gem_render_copy simultaneously from different processes, each in a tight loop, with INTEL_IOMMU enabled. This patch was originally published as: drm/i915: Serialize GTT Updates on BXT v2: Move bxt/iommu detection into static function Remove #ifdef CONFIG_INTEL_IOMMU protection Make function names more reflective of purpose Move flushing read into static function v3: Tidy up for checkpatch.pl Testcase: igt/gem_concurrent_blit Signed-off-by: Jon Bloomfield Cc: John Harrison Cc: Chris Wilson Cc: Daniel Vetter Cc: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/1495641251-30022-1-git-send-email-jon.bloomfield@intel.com Reviewed-by: Tvrtko Ursulin Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_drv.h | 10 ++++ drivers/gpu/drm/i915/i915_gem_gtt.c | 103 ++++++++++++++++++++++++++++++++++++ 2 files changed, 113 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 17883a8..a5a01b6 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3006,6 +3006,16 @@ static inline bool intel_scanout_needs_vtd_wa(struct drm_i915_private *dev_priv) return false; } +static inline bool +intel_ggtt_update_needs_vtd_wa(struct drm_i915_private *dev_priv) +{ +#ifdef CONFIG_INTEL_IOMMU + if (IS_BROXTON(dev_priv) && intel_iommu_gfx_mapped) + return true; +#endif + return false; +} + int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv, int enable_ppgtt); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index b18ed51..93f11c7 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2189,6 +2189,101 @@ static void gen8_ggtt_clear_range(struct i915_address_space *vm, gen8_set_pte(>t_base[i], scratch_pte); } +static void bxt_vtd_ggtt_wa(struct i915_address_space *vm) +{ + struct drm_i915_private *dev_priv = vm->i915; + + /* + * Make sure the internal GAM fifo has been cleared of all GTT + * writes before exiting stop_machine(). This guarantees that + * any aperture accesses waiting to start in another process + * cannot back up behind the GTT writes causing a hang. + * The register can be any arbitrary GAM register. + */ + POSTING_READ(GFX_FLSH_CNTL_GEN6); +} + +struct insert_page { + struct i915_address_space *vm; + dma_addr_t addr; + u64 offset; + enum i915_cache_level level; +}; + +static int bxt_vtd_ggtt_insert_page__cb(void *_arg) +{ + struct insert_page *arg = _arg; + + gen8_ggtt_insert_page(arg->vm, arg->addr, arg->offset, arg->level, 0); + bxt_vtd_ggtt_wa(arg->vm); + + return 0; +} + +static void bxt_vtd_ggtt_insert_page__BKL(struct i915_address_space *vm, + dma_addr_t addr, + u64 offset, + enum i915_cache_level level, + u32 unused) +{ + struct insert_page arg = { vm, addr, offset, level }; + + stop_machine(bxt_vtd_ggtt_insert_page__cb, &arg, NULL); +} + +struct insert_entries { + struct i915_address_space *vm; + struct sg_table *st; + u64 start; + enum i915_cache_level level; +}; + +static int bxt_vtd_ggtt_insert_entries__cb(void *_arg) +{ + struct insert_entries *arg = _arg; + + gen8_ggtt_insert_entries(arg->vm, arg->st, arg->start, arg->level, 0); + bxt_vtd_ggtt_wa(arg->vm); + + return 0; +} + +static void bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space *vm, + struct sg_table *st, + u64 start, + enum i915_cache_level level, + u32 unused) +{ + struct insert_entries arg = { vm, st, start, level }; + + stop_machine(bxt_vtd_ggtt_insert_entries__cb, &arg, NULL); +} + +struct clear_range { + struct i915_address_space *vm; + u64 start; + u64 length; +}; + +static int bxt_vtd_ggtt_clear_range__cb(void *_arg) +{ + struct clear_range *arg = _arg; + + gen8_ggtt_clear_range(arg->vm, arg->start, arg->length); + bxt_vtd_ggtt_wa(arg->vm); + + return 0; +} + +static void bxt_vtd_ggtt_clear_range__BKL(struct i915_address_space *vm, + u64 start, + u64 length) +{ + struct clear_range arg = { vm, start, length }; + + stop_machine(bxt_vtd_ggtt_clear_range__cb, &arg, NULL); +} + static void gen6_ggtt_clear_range(struct i915_address_space *vm, u64 start, u64 length) { @@ -2787,6 +2882,14 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) ggtt->base.insert_entries = gen8_ggtt_insert_entries; + /* Serialize GTT updates with aperture access on BXT if VT-d is on. */ + if (intel_ggtt_update_needs_vtd_wa(dev_priv)) { + ggtt->base.insert_entries = bxt_vtd_ggtt_insert_entries__BKL; + ggtt->base.insert_page = bxt_vtd_ggtt_insert_page__BKL; + if (ggtt->base.clear_range != nop_clear_range) + ggtt->base.clear_range = bxt_vtd_ggtt_clear_range__BKL; + } + ggtt->invalidate = gen6_ggtt_invalidate; return ggtt_probe_common(ggtt, size); -- cgit v1.1 From b2241f182aa0942749e7c95eca92b840ab93263b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 25 May 2017 08:25:28 +0100 Subject: drm/i915: Only GGTT vma may be pinned and prevent shrinking As only GGTT vma may be permanently pinned and are always at the head of the object's vma list, as soon as we seen a ppGTT vma we can stop searching for any_vma_pinned(). Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170525072528.11185-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem_shrinker.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index b409e67..0fd2b58 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -65,9 +65,17 @@ static bool any_vma_pinned(struct drm_i915_gem_object *obj) { struct i915_vma *vma; - list_for_each_entry(vma, &obj->vma_list, obj_link) + list_for_each_entry(vma, &obj->vma_list, obj_link) { + /* Only GGTT vma may be permanently pinned, and are always + * at the start of the list. We can stop hunting as soon + * as we see a ppGTT vma. + */ + if (!i915_vma_is_ggtt(vma)) + break; + if (i915_vma_is_pinned(vma)) return true; + } return false; } -- cgit v1.1 From 80debff8d9afbf649a15c7ecb0c2cdeb89b36231 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 25 May 2017 13:16:12 +0100 Subject: drm/i915: Consolidate #ifdef CONFIG_INTEL_IOMMU We depend on intel_iommu_gfx_mapped for various workarounds, but that is only available under an #ifdef CONFIG_INTEL_IOMMU. Refactor all the cut-and-paste ifdefs to a common routine. Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170525121612.2190-1-chris@chris-wilson.co.uk Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_drv.h | 15 ++++++++------- drivers/gpu/drm/i915/i915_gem.c | 4 +--- drivers/gpu/drm/i915/i915_gem_gtt.c | 14 +++----------- drivers/gpu/drm/i915/i915_gem_stolen.c | 4 +--- drivers/gpu/drm/i915/intel_fbc.c | 4 +--- 5 files changed, 14 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index a5a01b6..9ba2242 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2997,23 +2997,24 @@ intel_info(const struct drm_i915_private *dev_priv) #include "i915_trace.h" -static inline bool intel_scanout_needs_vtd_wa(struct drm_i915_private *dev_priv) +static inline bool intel_vtd_active(void) { #ifdef CONFIG_INTEL_IOMMU - if (INTEL_GEN(dev_priv) >= 6 && intel_iommu_gfx_mapped) + if (intel_iommu_gfx_mapped) return true; #endif return false; } +static inline bool intel_scanout_needs_vtd_wa(struct drm_i915_private *dev_priv) +{ + return INTEL_GEN(dev_priv) >= 6 && intel_vtd_active(); +} + static inline bool intel_ggtt_update_needs_vtd_wa(struct drm_i915_private *dev_priv) { -#ifdef CONFIG_INTEL_IOMMU - if (IS_BROXTON(dev_priv) && intel_iommu_gfx_mapped) - return true; -#endif - return false; + return IS_BROXTON(dev_priv) && intel_vtd_active(); } int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 0680bd2..7572dcd 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4728,11 +4728,9 @@ bool intel_sanitize_semaphores(struct drm_i915_private *dev_priv, int value) if (value >= 0) return value; -#ifdef CONFIG_INTEL_IOMMU /* Enable semaphores on SNB when IO remapping is off */ - if (INTEL_INFO(dev_priv)->gen == 6 && intel_iommu_gfx_mapped) + if (IS_GEN6(dev_priv) && intel_vtd_active()) return false; -#endif return true; } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 93f11c7..f52068a 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -166,13 +166,11 @@ int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv, if (enable_ppgtt == 3 && has_full_48bit_ppgtt) return 3; -#ifdef CONFIG_INTEL_IOMMU /* Disable ppgtt on SNB if VT-d is on. */ - if (IS_GEN6(dev_priv) && intel_iommu_gfx_mapped) { + if (IS_GEN6(dev_priv) && intel_vtd_active()) { DRM_INFO("Disabling PPGTT because VT-d is on\n"); return 0; } -#endif /* Early VLV doesn't have this */ if (IS_VALLEYVIEW(dev_priv) && dev_priv->drm.pdev->revision < 0xb) { @@ -1990,14 +1988,10 @@ void i915_ppgtt_release(struct kref *kref) */ static bool needs_idle_maps(struct drm_i915_private *dev_priv) { -#ifdef CONFIG_INTEL_IOMMU /* Query intel_iommu to see if we need the workaround. Presumably that * was loaded first. */ - if (IS_GEN5(dev_priv) && IS_MOBILE(dev_priv) && intel_iommu_gfx_mapped) - return true; -#endif - return false; + return IS_GEN5(dev_priv) && IS_MOBILE(dev_priv) && intel_vtd_active(); } void i915_check_and_clear_faults(struct drm_i915_private *dev_priv) @@ -3037,10 +3031,8 @@ int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv) ggtt->base.total >> 20); DRM_DEBUG_DRIVER("GMADR size = %lldM\n", ggtt->mappable_end >> 20); DRM_DEBUG_DRIVER("GTT stolen size = %uM\n", ggtt->stolen_size >> 20); -#ifdef CONFIG_INTEL_IOMMU - if (intel_iommu_gfx_mapped) + if (intel_vtd_active()) DRM_INFO("VT-d active for gfx access\n"); -#endif return 0; } diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index f3abdc2..681db60 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -414,12 +414,10 @@ int i915_gem_init_stolen(struct drm_i915_private *dev_priv) return 0; } -#ifdef CONFIG_INTEL_IOMMU - if (intel_iommu_gfx_mapped && INTEL_GEN(dev_priv) < 8) { + if (intel_vtd_active() && INTEL_GEN(dev_priv) < 8) { DRM_INFO("DMAR active, disabling use of stolen memory\n"); return 0; } -#endif if (ggtt->stolen_size == 0) return 0; diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c index ded2add..64e44d9 100644 --- a/drivers/gpu/drm/i915/intel_fbc.c +++ b/drivers/gpu/drm/i915/intel_fbc.c @@ -1312,14 +1312,12 @@ static int intel_sanitize_fbc_option(struct drm_i915_private *dev_priv) static bool need_fbc_vtd_wa(struct drm_i915_private *dev_priv) { -#ifdef CONFIG_INTEL_IOMMU /* WaFbcTurnOffFbcWhenHyperVisorIsUsed:skl,bxt */ - if (intel_iommu_gfx_mapped && + if (intel_vtd_active() && (IS_SKYLAKE(dev_priv) || IS_BROXTON(dev_priv))) { DRM_INFO("Disabling framebuffer compression (FBC) to prevent screen flicker with VT-d enabled\n"); return true; } -#endif return false; } -- cgit v1.1 From 49d191dd5d41ca3a6b447075d50ce5f287171f71 Mon Sep 17 00:00:00 2001 From: Puthikorn Voravootivat Date: Tue, 23 May 2017 15:38:01 -0700 Subject: drm/i915: Drop AUX backlight enable check for backlight control There are some panel that (1) does not support display backlight enable via AUX (2) support display backlight adjustment via AUX (3) support display backlight enable via eDP BL_ENABLE pin The current driver required that (1) must be support to enable (2). This patch drops that requirement. Signed-off-by: Puthikorn Voravootivat Reviewed-by: Jani Nikula Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/20170523223805.46372-2-puthik@chromium.org --- drivers/gpu/drm/i915/intel_dp_aux_backlight.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp_aux_backlight.c b/drivers/gpu/drm/i915/intel_dp_aux_backlight.c index b87c5a3..a0995c00 100644 --- a/drivers/gpu/drm/i915/intel_dp_aux_backlight.c +++ b/drivers/gpu/drm/i915/intel_dp_aux_backlight.c @@ -28,6 +28,10 @@ static void set_aux_backlight_enable(struct intel_dp *intel_dp, bool enable) { uint8_t reg_val = 0; + /* Early return when display use other mechanism to enable backlight. */ + if (!(intel_dp->edp_dpcd[1] & DP_EDP_BACKLIGHT_AUX_ENABLE_CAP)) + return; + if (drm_dp_dpcd_readb(&intel_dp->aux, DP_EDP_DISPLAY_CONTROL_REGISTER, ®_val) < 0) { DRM_DEBUG_KMS("Failed to read DPCD register 0x%x\n", @@ -165,10 +169,8 @@ intel_dp_aux_display_control_capable(struct intel_connector *connector) * the panel can support backlight control over the aux channel */ if (intel_dp->edp_dpcd[1] & DP_EDP_TCON_BACKLIGHT_ADJUSTMENT_CAP && - (intel_dp->edp_dpcd[1] & DP_EDP_BACKLIGHT_AUX_ENABLE_CAP) && (intel_dp->edp_dpcd[2] & DP_EDP_BACKLIGHT_BRIGHTNESS_AUX_SET_CAP) && - !((intel_dp->edp_dpcd[1] & DP_EDP_BACKLIGHT_PIN_ENABLE_CAP) || - (intel_dp->edp_dpcd[2] & DP_EDP_BACKLIGHT_BRIGHTNESS_PWM_PIN_CAP))) { + !(intel_dp->edp_dpcd[2] & DP_EDP_BACKLIGHT_BRIGHTNESS_PWM_PIN_CAP)) { DRM_DEBUG_KMS("AUX Backlight Control Supported!\n"); return true; } -- cgit v1.1 From 77a494a7250ce082def81a5fa67c24789ecabe0b Mon Sep 17 00:00:00 2001 From: Puthikorn Voravootivat Date: Tue, 23 May 2017 15:38:04 -0700 Subject: drm: Add definition for eDP backlight frequency This patch adds the following definition - Bit mask for EDP_PWMGEN_BIT_COUNT and min/max cap register which only use bit 0:4 - Base frequency (27 MHz) for backlight PWM frequency generator. Signed-off-by: Puthikorn Voravootivat Reviewed-by: Dhinakaran Pandiyan Acked-by: Dave Airlie Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/20170523223805.46372-5-puthik@chromium.org --- include/drm/drm_dp_helper.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/drm/drm_dp_helper.h b/include/drm/drm_dp_helper.h index c0bd0d7..23837f5 100644 --- a/include/drm/drm_dp_helper.h +++ b/include/drm/drm_dp_helper.h @@ -572,10 +572,12 @@ #define DP_EDP_PWMGEN_BIT_COUNT 0x724 #define DP_EDP_PWMGEN_BIT_COUNT_CAP_MIN 0x725 #define DP_EDP_PWMGEN_BIT_COUNT_CAP_MAX 0x726 +# define DP_EDP_PWMGEN_BIT_COUNT_MASK (0x1f << 0) #define DP_EDP_BACKLIGHT_CONTROL_STATUS 0x727 #define DP_EDP_BACKLIGHT_FREQ_SET 0x728 +# define DP_EDP_BACKLIGHT_FREQ_BASE_KHZ 27000 #define DP_EDP_BACKLIGHT_FREQ_CAP_MIN_MSB 0x72a #define DP_EDP_BACKLIGHT_FREQ_CAP_MIN_MID 0x72b -- cgit v1.1 From 2f64085a75f623f5f5f32237131ea8de1c59be0e Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Fri, 26 May 2017 11:13:24 +0000 Subject: drm/i915/guc: Disable send function on fini In earlier patch 789a625 we were enabling send function only after successful init. For completeness, we should make sure that we disable it on fini. v2: don't group steps by submission flag (Chris) Signed-off-by: Michal Wajdeczko Cc: Joonas Lahtinen Cc: Daniele Ceraolo Spurio Cc: Chris Wilson Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170526111326.87280-2-michal.wajdeczko@intel.com --- drivers/gpu/drm/i915/intel_uc.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c index d27b527..31dc8c3 100644 --- a/drivers/gpu/drm/i915/intel_uc.c +++ b/drivers/gpu/drm/i915/intel_uc.c @@ -410,11 +410,16 @@ void intel_uc_fini_hw(struct drm_i915_private *dev_priv) if (!i915.enable_guc_loading) return; - if (i915.enable_guc_submission) { + if (i915.enable_guc_submission) i915_guc_submission_disable(dev_priv); + + guc_disable_communication(&dev_priv->guc); + + if (i915.enable_guc_submission) { gen9_disable_guc_interrupts(dev_priv); i915_guc_submission_fini(dev_priv); } + i915_ggtt_disable_guc(dev_priv); } -- cgit v1.1 From f8a58d639dd95b0188862b4c1c1cc81c737db612 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Fri, 26 May 2017 11:13:25 +0000 Subject: drm/i915/guc: Introduce buffer based cmd transport Buffer based command transport can replace MMIO based mechanism. It may be used to perform host-2-guc and guc-to-host communication. Portions of this patch are based on work by: Michel Thierry Robert Beckett Daniele Ceraolo Spurio v2: use gem_object_pin_map (Chris) don't use DEBUG_RATELIMITED (Chris) don't track action stats (Chris) simplify next fence (Chris) use READ_ONCE (Chris) move blob allocation to new function (Chris) v3: use static owner id (Daniele) v4: but keep channel initialization generic (Daniele) and introduce owner_sub_id (Daniele) Signed-off-by: Michal Wajdeczko Cc: Daniele Ceraolo Spurio Cc: Oscar Mateo Cc: Joonas Lahtinen Cc: Chris Wilson Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170526111326.87280-3-michal.wajdeczko@intel.com --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/i915_drv.h | 2 + drivers/gpu/drm/i915/intel_guc_ct.c | 461 ++++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_guc_ct.h | 86 +++++++ drivers/gpu/drm/i915/intel_guc_fwif.h | 43 ++++ drivers/gpu/drm/i915/intel_uc.c | 19 +- drivers/gpu/drm/i915/intel_uc.h | 3 +- 7 files changed, 613 insertions(+), 2 deletions(-) create mode 100644 drivers/gpu/drm/i915/intel_guc_ct.c create mode 100644 drivers/gpu/drm/i915/intel_guc_ct.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 7b05fb8..16dccf5 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -58,6 +58,7 @@ i915-y += i915_cmd_parser.o \ # general-purpose microcontroller (GuC) support i915-y += intel_uc.o \ + intel_guc_ct.o \ intel_guc_log.o \ intel_guc_loader.o \ intel_huc.o \ diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 9ba2242..d2a5749 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -760,6 +760,7 @@ struct intel_csr { func(has_gmbus_irq); \ func(has_gmch_display); \ func(has_guc); \ + func(has_guc_ct); \ func(has_hotplug); \ func(has_l3_dpf); \ func(has_llc); \ @@ -2947,6 +2948,7 @@ intel_info(const struct drm_i915_private *dev_priv) * properties, so we have separate macros to test them. */ #define HAS_GUC(dev_priv) ((dev_priv)->info.has_guc) +#define HAS_GUC_CT(dev_priv) ((dev_priv)->info.has_guc_ct) #define HAS_GUC_UCODE(dev_priv) (HAS_GUC(dev_priv)) #define HAS_GUC_SCHED(dev_priv) (HAS_GUC(dev_priv)) #define HAS_HUC_UCODE(dev_priv) (HAS_GUC(dev_priv)) diff --git a/drivers/gpu/drm/i915/intel_guc_ct.c b/drivers/gpu/drm/i915/intel_guc_ct.c new file mode 100644 index 0000000..c4cbec1 --- /dev/null +++ b/drivers/gpu/drm/i915/intel_guc_ct.c @@ -0,0 +1,461 @@ +/* + * Copyright © 2016-2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "i915_drv.h" +#include "intel_guc_ct.h" + +enum { CTB_SEND = 0, CTB_RECV = 1 }; + +enum { CTB_OWNER_HOST = 0 }; + +void intel_guc_ct_init_early(struct intel_guc_ct *ct) +{ + /* we're using static channel owners */ + ct->host_channel.owner = CTB_OWNER_HOST; +} + +static inline const char *guc_ct_buffer_type_to_str(u32 type) +{ + switch (type) { + case INTEL_GUC_CT_BUFFER_TYPE_SEND: + return "SEND"; + case INTEL_GUC_CT_BUFFER_TYPE_RECV: + return "RECV"; + default: + return ""; + } +} + +static void guc_ct_buffer_desc_init(struct guc_ct_buffer_desc *desc, + u32 cmds_addr, u32 size, u32 owner) +{ + DRM_DEBUG_DRIVER("CT: desc %p init addr=%#x size=%u owner=%u\n", + desc, cmds_addr, size, owner); + memset(desc, 0, sizeof(*desc)); + desc->addr = cmds_addr; + desc->size = size; + desc->owner = owner; +} + +static void guc_ct_buffer_desc_reset(struct guc_ct_buffer_desc *desc) +{ + DRM_DEBUG_DRIVER("CT: desc %p reset head=%u tail=%u\n", + desc, desc->head, desc->tail); + desc->head = 0; + desc->tail = 0; + desc->is_in_error = 0; +} + +static int guc_action_register_ct_buffer(struct intel_guc *guc, + u32 desc_addr, + u32 type) +{ + u32 action[] = { + INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER, + desc_addr, + sizeof(struct guc_ct_buffer_desc), + type + }; + int err; + + /* Can't use generic send(), CT registration must go over MMIO */ + err = intel_guc_send_mmio(guc, action, ARRAY_SIZE(action)); + if (err) + DRM_ERROR("CT: register %s buffer failed; err=%d\n", + guc_ct_buffer_type_to_str(type), err); + return err; +} + +static int guc_action_deregister_ct_buffer(struct intel_guc *guc, + u32 owner, + u32 type) +{ + u32 action[] = { + INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER, + owner, + type + }; + int err; + + /* Can't use generic send(), CT deregistration must go over MMIO */ + err = intel_guc_send_mmio(guc, action, ARRAY_SIZE(action)); + if (err) + DRM_ERROR("CT: deregister %s buffer failed; owner=%d err=%d\n", + guc_ct_buffer_type_to_str(type), owner, err); + return err; +} + +static bool ctch_is_open(struct intel_guc_ct_channel *ctch) +{ + return ctch->vma != NULL; +} + +static int ctch_init(struct intel_guc *guc, + struct intel_guc_ct_channel *ctch) +{ + struct i915_vma *vma; + void *blob; + int err; + int i; + + GEM_BUG_ON(ctch->vma); + + /* We allocate 1 page to hold both descriptors and both buffers. + * ___________..................... + * |desc (SEND)| : + * |___________| PAGE/4 + * :___________....................: + * |desc (RECV)| : + * |___________| PAGE/4 + * :_______________________________: + * |cmds (SEND) | + * | PAGE/4 + * |_______________________________| + * |cmds (RECV) | + * | PAGE/4 + * |_______________________________| + * + * Each message can use a maximum of 32 dwords and we don't expect to + * have more than 1 in flight at any time, so we have enough space. + * Some logic further ahead will rely on the fact that there is only 1 + * page and that it is always mapped, so if the size is changed the + * other code will need updating as well. + */ + + /* allocate vma */ + vma = intel_guc_allocate_vma(guc, PAGE_SIZE); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err_out; + } + ctch->vma = vma; + + /* map first page */ + blob = i915_gem_object_pin_map(vma->obj, I915_MAP_WB); + if (IS_ERR(blob)) { + err = PTR_ERR(blob); + goto err_vma; + } + DRM_DEBUG_DRIVER("CT: vma base=%#x\n", guc_ggtt_offset(ctch->vma)); + + /* store pointers to desc and cmds */ + for (i = 0; i < ARRAY_SIZE(ctch->ctbs); i++) { + GEM_BUG_ON((i != CTB_SEND) && (i != CTB_RECV)); + ctch->ctbs[i].desc = blob + PAGE_SIZE/4 * i; + ctch->ctbs[i].cmds = blob + PAGE_SIZE/4 * i + PAGE_SIZE/2; + } + + return 0; + +err_vma: + i915_vma_unpin_and_release(&ctch->vma); +err_out: + DRM_DEBUG_DRIVER("CT: channel %d initialization failed; err=%d\n", + ctch->owner, err); + return err; +} + +static void ctch_fini(struct intel_guc *guc, + struct intel_guc_ct_channel *ctch) +{ + GEM_BUG_ON(!ctch->vma); + + i915_gem_object_unpin_map(ctch->vma->obj); + i915_vma_unpin_and_release(&ctch->vma); +} + +static int ctch_open(struct intel_guc *guc, + struct intel_guc_ct_channel *ctch) +{ + u32 base; + int err; + int i; + + DRM_DEBUG_DRIVER("CT: channel %d reopen=%s\n", + ctch->owner, yesno(ctch_is_open(ctch))); + + if (!ctch->vma) { + err = ctch_init(guc, ctch); + if (unlikely(err)) + goto err_out; + } + + /* vma should be already allocated and map'ed */ + base = guc_ggtt_offset(ctch->vma); + + /* (re)initialize descriptors + * cmds buffers are in the second half of the blob page + */ + for (i = 0; i < ARRAY_SIZE(ctch->ctbs); i++) { + GEM_BUG_ON((i != CTB_SEND) && (i != CTB_RECV)); + guc_ct_buffer_desc_init(ctch->ctbs[i].desc, + base + PAGE_SIZE/4 * i + PAGE_SIZE/2, + PAGE_SIZE/4, + ctch->owner); + } + + /* register buffers, starting wirh RECV buffer + * descriptors are in first half of the blob + */ + err = guc_action_register_ct_buffer(guc, + base + PAGE_SIZE/4 * CTB_RECV, + INTEL_GUC_CT_BUFFER_TYPE_RECV); + if (unlikely(err)) + goto err_fini; + + err = guc_action_register_ct_buffer(guc, + base + PAGE_SIZE/4 * CTB_SEND, + INTEL_GUC_CT_BUFFER_TYPE_SEND); + if (unlikely(err)) + goto err_deregister; + + return 0; + +err_deregister: + guc_action_deregister_ct_buffer(guc, + ctch->owner, + INTEL_GUC_CT_BUFFER_TYPE_RECV); +err_fini: + ctch_fini(guc, ctch); +err_out: + DRM_ERROR("CT: can't open channel %d; err=%d\n", ctch->owner, err); + return err; +} + +static void ctch_close(struct intel_guc *guc, + struct intel_guc_ct_channel *ctch) +{ + GEM_BUG_ON(!ctch_is_open(ctch)); + + guc_action_deregister_ct_buffer(guc, + ctch->owner, + INTEL_GUC_CT_BUFFER_TYPE_SEND); + guc_action_deregister_ct_buffer(guc, + ctch->owner, + INTEL_GUC_CT_BUFFER_TYPE_RECV); + ctch_fini(guc, ctch); +} + +static u32 ctch_get_next_fence(struct intel_guc_ct_channel *ctch) +{ + /* For now it's trivial */ + return ++ctch->next_fence; +} + +static int ctb_write(struct intel_guc_ct_buffer *ctb, + const u32 *action, + u32 len /* in dwords */, + u32 fence) +{ + struct guc_ct_buffer_desc *desc = ctb->desc; + u32 head = desc->head / 4; /* in dwords */ + u32 tail = desc->tail / 4; /* in dwords */ + u32 size = desc->size / 4; /* in dwords */ + u32 used; /* in dwords */ + u32 header; + u32 *cmds = ctb->cmds; + unsigned int i; + + GEM_BUG_ON(desc->size % 4); + GEM_BUG_ON(desc->head % 4); + GEM_BUG_ON(desc->tail % 4); + GEM_BUG_ON(tail >= size); + + /* + * tail == head condition indicates empty. GuC FW does not support + * using up the entire buffer to get tail == head meaning full. + */ + if (tail < head) + used = (size - head) + tail; + else + used = tail - head; + + /* make sure there is a space including extra dw for the fence */ + if (unlikely(used + len + 1 >= size)) + return -ENOSPC; + + /* Write the message. The format is the following: + * DW0: header (including action code) + * DW1: fence + * DW2+: action data + */ + header = (len << GUC_CT_MSG_LEN_SHIFT) | + (GUC_CT_MSG_WRITE_FENCE_TO_DESC) | + (action[0] << GUC_CT_MSG_ACTION_SHIFT); + + cmds[tail] = header; + tail = (tail + 1) % size; + + cmds[tail] = fence; + tail = (tail + 1) % size; + + for (i = 1; i < len; i++) { + cmds[tail] = action[i]; + tail = (tail + 1) % size; + } + + /* now update desc tail (back in bytes) */ + desc->tail = tail * 4; + GEM_BUG_ON(desc->tail > desc->size); + + return 0; +} + +/* Wait for the response from the GuC. + * @fence: response fence + * @status: placeholder for status + * return: 0 response received (status is valid) + * -ETIMEDOUT no response within hardcoded timeout + * -EPROTO no response, ct buffer was in error + */ +static int wait_for_response(struct guc_ct_buffer_desc *desc, + u32 fence, + u32 *status) +{ + int err; + + /* + * Fast commands should complete in less than 10us, so sample quickly + * up to that length of time, then switch to a slower sleep-wait loop. + * No GuC command should ever take longer than 10ms. + */ +#define done (READ_ONCE(desc->fence) == fence) + err = wait_for_us(done, 10); + if (err) + err = wait_for(done, 10); +#undef done + + if (unlikely(err)) { + DRM_ERROR("CT: fence %u failed; reported fence=%u\n", + fence, desc->fence); + + if (WARN_ON(desc->is_in_error)) { + /* Something went wrong with the messaging, try to reset + * the buffer and hope for the best + */ + guc_ct_buffer_desc_reset(desc); + err = -EPROTO; + } + } + + *status = desc->status; + return err; +} + +static int ctch_send(struct intel_guc *guc, + struct intel_guc_ct_channel *ctch, + const u32 *action, + u32 len, + u32 *status) +{ + struct intel_guc_ct_buffer *ctb = &ctch->ctbs[CTB_SEND]; + struct guc_ct_buffer_desc *desc = ctb->desc; + u32 fence; + int err; + + GEM_BUG_ON(!ctch_is_open(ctch)); + GEM_BUG_ON(!len); + GEM_BUG_ON(len & ~GUC_CT_MSG_LEN_MASK); + + fence = ctch_get_next_fence(ctch); + err = ctb_write(ctb, action, len, fence); + if (unlikely(err)) + return err; + + intel_guc_notify(guc); + + err = wait_for_response(desc, fence, status); + if (unlikely(err)) + return err; + if (*status != INTEL_GUC_STATUS_SUCCESS) + return -EIO; + return 0; +} + +/* + * Command Transport (CT) buffer based GuC send function. + */ +static int intel_guc_send_ct(struct intel_guc *guc, const u32 *action, u32 len) +{ + struct intel_guc_ct_channel *ctch = &guc->ct.host_channel; + u32 status = ~0; /* undefined */ + int err; + + mutex_lock(&guc->send_mutex); + + err = ctch_send(guc, ctch, action, len, &status); + if (unlikely(err)) { + DRM_ERROR("CT: send action %#X failed; err=%d status=%#X\n", + action[0], err, status); + } + + mutex_unlock(&guc->send_mutex); + return err; +} + +/** + * Enable buffer based command transport + * Shall only be called for platforms with HAS_GUC_CT. + * @guc: the guc + * return: 0 on success + * non-zero on failure + */ +int intel_guc_enable_ct(struct intel_guc *guc) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + struct intel_guc_ct_channel *ctch = &guc->ct.host_channel; + int err; + + GEM_BUG_ON(!HAS_GUC_CT(dev_priv)); + + err = ctch_open(guc, ctch); + if (unlikely(err)) + return err; + + /* Switch into cmd transport buffer based send() */ + guc->send = intel_guc_send_ct; + DRM_INFO("CT: %s\n", enableddisabled(true)); + return 0; +} + +/** + * Disable buffer based command transport. + * Shall only be called for platforms with HAS_GUC_CT. + * @guc: the guc + */ +void intel_guc_disable_ct(struct intel_guc *guc) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + struct intel_guc_ct_channel *ctch = &guc->ct.host_channel; + + GEM_BUG_ON(!HAS_GUC_CT(dev_priv)); + + if (!ctch_is_open(ctch)) + return; + + ctch_close(guc, ctch); + + /* Disable send */ + guc->send = intel_guc_send_nop; + DRM_INFO("CT: %s\n", enableddisabled(false)); +} diff --git a/drivers/gpu/drm/i915/intel_guc_ct.h b/drivers/gpu/drm/i915/intel_guc_ct.h new file mode 100644 index 0000000..6d97f36 --- /dev/null +++ b/drivers/gpu/drm/i915/intel_guc_ct.h @@ -0,0 +1,86 @@ +/* + * Copyright © 2016-2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef _INTEL_GUC_CT_H_ +#define _INTEL_GUC_CT_H_ + +struct intel_guc; +struct i915_vma; + +#include "intel_guc_fwif.h" + +/** + * DOC: Command Transport (CT). + * + * Buffer based command transport is a replacement for MMIO based mechanism. + * It can be used to perform both host-2-guc and guc-to-host communication. + */ + +/** Represents single command transport buffer. + * + * A single command transport buffer consists of two parts, the header + * record (command transport buffer descriptor) and the actual buffer which + * holds the commands. + * + * @desc: pointer to the buffer descriptor + * @cmds: pointer to the commands buffer + */ +struct intel_guc_ct_buffer { + struct guc_ct_buffer_desc *desc; + u32 *cmds; +}; + +/** Represents pair of command transport buffers. + * + * Buffers go in pairs to allow bi-directional communication. + * To simplify the code we place both of them in the same vma. + * Buffers from the same pair must share unique owner id. + * + * @vma: pointer to the vma with pair of CT buffers + * @ctbs: buffers for sending(0) and receiving(1) commands + * @owner: unique identifier + * @next_fence: fence to be used with next send command + */ +struct intel_guc_ct_channel { + struct i915_vma *vma; + struct intel_guc_ct_buffer ctbs[2]; + u32 owner; + u32 next_fence; +}; + +/** Holds all command transport channels. + * + * @host_channel: main channel used by the host + */ +struct intel_guc_ct { + struct intel_guc_ct_channel host_channel; + /* other channels are tbd */ +}; + +void intel_guc_ct_init_early(struct intel_guc_ct *ct); + +/* XXX: move to intel_uc.h ? don't fit there either */ +int intel_guc_enable_ct(struct intel_guc *guc); +void intel_guc_disable_ct(struct intel_guc *guc); + +#endif /* _INTEL_GUC_CT_H_ */ diff --git a/drivers/gpu/drm/i915/intel_guc_fwif.h b/drivers/gpu/drm/i915/intel_guc_fwif.h index 6156845..5fa2860 100644 --- a/drivers/gpu/drm/i915/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/intel_guc_fwif.h @@ -331,6 +331,47 @@ struct guc_stage_desc { u64 desc_private; } __packed; +/* + * Describes single command transport buffer. + * Used by both guc-master and clients. + */ +struct guc_ct_buffer_desc { + u32 addr; /* gfx address */ + u64 host_private; /* host private data */ + u32 size; /* size in bytes */ + u32 head; /* offset updated by GuC*/ + u32 tail; /* offset updated by owner */ + u32 is_in_error; /* error indicator */ + u32 fence; /* fence updated by GuC */ + u32 status; /* status updated by GuC */ + u32 owner; /* id of the channel owner */ + u32 owner_sub_id; /* owner-defined field for extra tracking */ + u32 reserved[5]; +} __packed; + +/* Type of command transport buffer */ +#define INTEL_GUC_CT_BUFFER_TYPE_SEND 0x0u +#define INTEL_GUC_CT_BUFFER_TYPE_RECV 0x1u + +/* + * Definition of the command transport message header (DW0) + * + * bit[4..0] message len (in dwords) + * bit[7..5] reserved + * bit[8] write fence to desc + * bit[9] write status to H2G buff + * bit[10] send status (via G2H) + * bit[15..11] reserved + * bit[31..16] action code + */ +#define GUC_CT_MSG_LEN_SHIFT 0 +#define GUC_CT_MSG_LEN_MASK 0x1F +#define GUC_CT_MSG_WRITE_FENCE_TO_DESC (1 << 8) +#define GUC_CT_MSG_WRITE_STATUS_TO_BUFF (1 << 9) +#define GUC_CT_MSG_SEND_STATUS (1 << 10) +#define GUC_CT_MSG_ACTION_SHIFT 16 +#define GUC_CT_MSG_ACTION_MASK 0xFFFF + #define GUC_FORCEWAKE_RENDER (1 << 0) #define GUC_FORCEWAKE_MEDIA (1 << 1) @@ -515,6 +556,8 @@ enum intel_guc_action { INTEL_GUC_ACTION_EXIT_S_STATE = 0x502, INTEL_GUC_ACTION_SLPC_REQUEST = 0x3003, INTEL_GUC_ACTION_AUTHENTICATE_HUC = 0x4000, + INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER = 0x4505, + INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER = 0x4506, INTEL_GUC_ACTION_UK_LOG_ENABLE_LOGGING = 0x0E000, INTEL_GUC_ACTION_LIMIT }; diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c index 31dc8c3..d17029c 100644 --- a/drivers/gpu/drm/i915/intel_uc.c +++ b/drivers/gpu/drm/i915/intel_uc.c @@ -105,6 +105,8 @@ void intel_uc_init_early(struct drm_i915_private *dev_priv) { struct intel_guc *guc = &dev_priv->guc; + intel_guc_ct_init_early(&guc->ct); + mutex_init(&guc->send_mutex); guc->send = intel_guc_send_nop; guc->notify = guc_write_irq_trigger; @@ -288,14 +290,24 @@ static void guc_init_send_regs(struct intel_guc *guc) static int guc_enable_communication(struct intel_guc *guc) { - /* XXX: placeholder for alternate setup */ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + guc_init_send_regs(guc); + + if (HAS_GUC_CT(dev_priv)) + return intel_guc_enable_ct(guc); + guc->send = intel_guc_send_mmio; return 0; } static void guc_disable_communication(struct intel_guc *guc) { + struct drm_i915_private *dev_priv = guc_to_i915(guc); + + if (HAS_GUC_CT(dev_priv)) + intel_guc_disable_ct(guc); + guc->send = intel_guc_send_nop; } @@ -442,6 +454,11 @@ int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len) GEM_BUG_ON(!len); GEM_BUG_ON(len > guc->send_regs.count); + /* If CT is available, we expect to use MMIO only during init/fini */ + GEM_BUG_ON(HAS_GUC_CT(dev_priv) && + *action != INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER && + *action != INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER); + mutex_lock(&guc->send_mutex); intel_uncore_forcewake_get(dev_priv, guc->send_regs.fw_domains); diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h index 930f2e1..fb1d640 100644 --- a/drivers/gpu/drm/i915/intel_uc.h +++ b/drivers/gpu/drm/i915/intel_uc.h @@ -27,7 +27,7 @@ #include "intel_guc_fwif.h" #include "i915_guc_reg.h" #include "intel_ringbuffer.h" - +#include "intel_guc_ct.h" #include "i915_vma.h" struct drm_i915_gem_request; @@ -173,6 +173,7 @@ struct intel_guc_log { struct intel_guc { struct intel_uc_fw fw; struct intel_guc_log log; + struct intel_guc_ct ct; /* intel_guc_recv interrupt related state */ bool interrupts_enabled; -- cgit v1.1 From ac58d2ab0ad9c8b7e41404048a3ba4375db012d3 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Mon, 22 May 2017 10:50:28 -0700 Subject: drm/i915/guc: capture GuC logs if FW fails to load We're currently deleting the GuC logs if the FW fails to load, but those are still useful to understand why the loading failed. Keeping the object around allows us to access them after driver load is completed. v2: keep the object around instead of using kernel memory (chris) don't store the logs in the gpu_error struct (Chris) add a check on guc_log_level to avoid snapshotting empty logs v3: use separate debugfs for error log (Chris) v4: rebased v5: clean up obj selection, move err_load inside guc_log, move err_load cleanup, rename functions (Michal) v6: move obj back to intel_guc, move functions to intel_uc.c, don't clear obj on new GuC load, free object only if enable_guc_loading is set (Michal) Cc: Chris Wilson Cc: Oscar Mateo Cc: Michal Wajdeczko Signed-off-by: Daniele Ceraolo Spurio Link: http://patchwork.freedesktop.org/patch/msgid/1495475428-19295-1-git-send-email-daniele.ceraolospurio@intel.com Reviewed-by: Michal Wajdeczko Tested-by: Michel Thierry Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_debugfs.c | 39 ++++++++++++++++++++++++------------- drivers/gpu/drm/i915/intel_uc.c | 25 ++++++++++++++++++++++-- drivers/gpu/drm/i915/intel_uc.h | 3 +++ 3 files changed, 51 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index c51c56a..0615237 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2586,27 +2586,37 @@ static int i915_guc_stage_pool(struct seq_file *m, void *data) static int i915_guc_log_dump(struct seq_file *m, void *data) { - struct drm_i915_private *dev_priv = node_to_i915(m->private); - struct drm_i915_gem_object *obj; - int i = 0, pg; - - if (!dev_priv->guc.log.vma) - return 0; + struct drm_info_node *node = m->private; + struct drm_i915_private *dev_priv = node_to_i915(node); + bool dump_load_err = !!node->info_ent->data; + struct drm_i915_gem_object *obj = NULL; + u32 *log; + int i = 0; - obj = dev_priv->guc.log.vma->obj; - for (pg = 0; pg < obj->base.size / PAGE_SIZE; pg++) { - u32 *log = kmap_atomic(i915_gem_object_get_page(obj, pg)); + if (dump_load_err) + obj = dev_priv->guc.load_err_log; + else if (dev_priv->guc.log.vma) + obj = dev_priv->guc.log.vma->obj; - for (i = 0; i < PAGE_SIZE / sizeof(u32); i += 4) - seq_printf(m, "0x%08x 0x%08x 0x%08x 0x%08x\n", - *(log + i), *(log + i + 1), - *(log + i + 2), *(log + i + 3)); + if (!obj) + return 0; - kunmap_atomic(log); + log = i915_gem_object_pin_map(obj, I915_MAP_WC); + if (IS_ERR(log)) { + DRM_DEBUG("Failed to pin object\n"); + seq_puts(m, "(log data unaccessible)\n"); + return PTR_ERR(log); } + for (i = 0; i < obj->base.size / sizeof(u32); i += 4) + seq_printf(m, "0x%08x 0x%08x 0x%08x 0x%08x\n", + *(log + i), *(log + i + 1), + *(log + i + 2), *(log + i + 3)); + seq_putc(m, '\n'); + i915_gem_object_unpin_map(obj); + return 0; } @@ -4791,6 +4801,7 @@ static const struct drm_info_list i915_debugfs_list[] = { {"i915_guc_info", i915_guc_info, 0}, {"i915_guc_load_status", i915_guc_load_status_info, 0}, {"i915_guc_log_dump", i915_guc_log_dump, 0}, + {"i915_guc_load_err_log_dump", i915_guc_log_dump, 0, (void *)1}, {"i915_guc_stage_pool", i915_guc_stage_pool, 0}, {"i915_huc_load_status", i915_huc_load_status_info, 0}, {"i915_frequency_info", i915_frequency_info, 0}, diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c index d17029c..7a7b07d 100644 --- a/drivers/gpu/drm/i915/intel_uc.c +++ b/drivers/gpu/drm/i915/intel_uc.c @@ -288,6 +288,23 @@ static void guc_init_send_regs(struct intel_guc *guc) guc->send_regs.fw_domains = fw_domains; } +static void guc_capture_load_err_log(struct intel_guc *guc) +{ + if (!guc->log.vma || i915.guc_log_level < 0) + return; + + if (!guc->load_err_log) + guc->load_err_log = i915_gem_object_get(guc->log.vma->obj); + + return; +} + +static void guc_free_load_err_log(struct intel_guc *guc) +{ + if (guc->load_err_log) + i915_gem_object_put(guc->load_err_log); +} + static int guc_enable_communication(struct intel_guc *guc) { struct drm_i915_private *dev_priv = guc_to_i915(guc); @@ -367,11 +384,11 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv) /* Did we succeded or run out of retries? */ if (ret) - goto err_submission; + goto err_log_capture; ret = guc_enable_communication(guc); if (ret) - goto err_submission; + goto err_log_capture; intel_guc_auth_huc(dev_priv); if (i915.enable_guc_submission) { @@ -397,6 +414,8 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv) err_interrupts: guc_disable_communication(guc); gen9_disable_guc_interrupts(dev_priv); +err_log_capture: + guc_capture_load_err_log(guc); err_submission: if (i915.enable_guc_submission) i915_guc_submission_fini(dev_priv); @@ -422,6 +441,8 @@ void intel_uc_fini_hw(struct drm_i915_private *dev_priv) if (!i915.enable_guc_loading) return; + guc_free_load_err_log(&dev_priv->guc); + if (i915.enable_guc_submission) i915_guc_submission_disable(dev_priv); diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h index fb1d640..69daf4c 100644 --- a/drivers/gpu/drm/i915/intel_uc.h +++ b/drivers/gpu/drm/i915/intel_uc.h @@ -175,6 +175,9 @@ struct intel_guc { struct intel_guc_log log; struct intel_guc_ct ct; + /* Log snapshot if GuC errors during load */ + struct drm_i915_gem_object *load_err_log; + /* intel_guc_recv interrupt related state */ bool interrupts_enabled; -- cgit v1.1 From c9e0c6da53231f685d7dbd3d420483e1ede212e2 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 26 May 2017 14:22:09 +0100 Subject: drm/i915: Keep the forcewake timer alive for 1ms past the most recent use Currently the timer is armed for 1ms after the first use and is killed immediately, dropping the forcewake as early as possible. However, for very frequent operations the forcewake dance has a large impact on latency and keeping the timer alive until we are idle is preferred. To achieve this, if we call intel_uncore_forcewake_get whilst the timer is alive (repeated use), then set a flag to restart the timer on expiry rather than drop the forcewake usage count. The timer is racy, the consequence of the race is to expire the timer earlier than is now desired but does not impact on correct behaviour. The offset the race slightly, we set the active flag again on intel_uncore_forcewake_put. The effect should be to reduce the jitter of reacquiring the fw every 1ms on a busy system. However, the cost is to keep the timer alive for an extra 1ms on a nearly idle system. We chose to incur the jitter previously to keep the timer off for as much as possible. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170526132209.14640-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_uncore.c | 15 ++++++++++++--- drivers/gpu/drm/i915/intel_uncore.h | 1 + 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 9269cae..47d7ee1 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -214,6 +214,9 @@ intel_uncore_fw_release_timer(struct hrtimer *timer) assert_rpm_device_not_suspended(dev_priv); + if (xchg(&domain->active, false)) + return HRTIMER_RESTART; + spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); if (WARN_ON(domain->wake_count == 0)) domain->wake_count++; @@ -244,6 +247,7 @@ static void intel_uncore_forcewake_reset(struct drm_i915_private *dev_priv, active_domains = 0; for_each_fw_domain(domain, dev_priv, tmp) { + smp_store_mb(domain->active, false); if (hrtimer_cancel(&domain->timer) == 0) continue; @@ -451,9 +455,12 @@ static void __intel_uncore_forcewake_get(struct drm_i915_private *dev_priv, fw_domains &= dev_priv->uncore.fw_domains; - for_each_fw_domain_masked(domain, fw_domains, dev_priv, tmp) - if (domain->wake_count++) + for_each_fw_domain_masked(domain, fw_domains, dev_priv, tmp) { + if (domain->wake_count++) { fw_domains &= ~domain->mask; + domain->active = true; + } + } if (fw_domains) dev_priv->uncore.funcs.force_wake_get(dev_priv, fw_domains); @@ -518,8 +525,10 @@ static void __intel_uncore_forcewake_put(struct drm_i915_private *dev_priv, if (WARN_ON(domain->wake_count == 0)) continue; - if (--domain->wake_count) + if (--domain->wake_count) { + domain->active = true; continue; + } fw_domain_arm_timer(domain); } diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h index ff6fe2b..5f90278 100644 --- a/drivers/gpu/drm/i915/intel_uncore.h +++ b/drivers/gpu/drm/i915/intel_uncore.h @@ -96,6 +96,7 @@ struct intel_uncore { enum forcewake_domain_id id; enum forcewake_domains mask; unsigned int wake_count; + bool active; struct hrtimer timer; i915_reg_t reg_set; i915_reg_t reg_ack; -- cgit v1.1 From cd9f4688a3297c0df0eecc2adaae5812d3e5b997 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 29 May 2017 09:00:58 +0200 Subject: drm/i915: Update DRIVER_DATE to 20170529 Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index d2a5749..35e161b 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -80,8 +80,8 @@ #define DRIVER_NAME "i915" #define DRIVER_DESC "Intel Graphics" -#define DRIVER_DATE "20170515" -#define DRIVER_TIMESTAMP 1494832308 +#define DRIVER_DATE "20170529" +#define DRIVER_TIMESTAMP 1496041258 /* Use I915_STATE_WARN(x) and I915_STATE_WARN_ON() (rather than WARN() and * WARN_ON()) for hw state sanity checks to check for unexpected conditions -- cgit v1.1